1 /******************************************************************************
3 Copyright (c) 2001-2015, Intel Corporation
6 Redistribution and use in source and binary forms, with or without
7 modification, are permitted provided that the following conditions are met:
9 1. Redistributions of source code must retain the above copyright notice,
10 this list of conditions and the following disclaimer.
12 2. Redistributions in binary form must reproduce the above copyright
13 notice, this list of conditions and the following disclaimer in the
14 documentation and/or other materials provided with the distribution.
16 3. Neither the name of the Intel Corporation nor the names of its
17 contributors may be used to endorse or promote products derived from
18 this software without specific prior written permission.
20 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
24 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
29 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30 POSSIBILITY OF SUCH DAMAGE.
32 ******************************************************************************/
38 #include "opt_inet6.h"
40 #ifdef HAVE_KERNEL_OPTION_HEADERS
41 #include "opt_device_polling.h"
44 #include <sys/param.h>
45 #include <sys/systm.h>
47 #include <sys/types.h>
50 #if __FreeBSD_version >= 800000
51 #include <sys/buf_ring.h>
54 #include <sys/endian.h>
55 #include <sys/kernel.h>
56 #include <sys/kthread.h>
57 #include <sys/malloc.h>
59 #include <sys/module.h>
62 #include <sys/socket.h>
63 #include <sys/sockio.h>
64 #include <sys/sysctl.h>
65 #include <sys/taskqueue.h>
66 #include <sys/eventhandler.h>
67 #include <machine/bus.h>
68 #include <machine/resource.h>
71 #include <net/ethernet.h>
73 #include <net/if_arp.h>
74 #include <net/if_dl.h>
75 #include <net/if_media.h>
77 #include <net/if_types.h>
78 #include <net/if_vlan_var.h>
80 #include <netinet/in_systm.h>
81 #include <netinet/in.h>
82 #include <netinet/if_ether.h>
83 #include <netinet/ip.h>
84 #include <netinet/ip6.h>
85 #include <netinet/tcp.h>
86 #include <netinet/udp.h>
88 #include <machine/in_cksum.h>
89 #include <dev/led/led.h>
90 #include <dev/pci/pcivar.h>
91 #include <dev/pci/pcireg.h>
93 #include "e1000_api.h"
94 #include "e1000_82571.h"
97 /*********************************************************************
99 *********************************************************************/
100 char em_driver_version[] = "7.6.1-k";
102 /*********************************************************************
103 * PCI Device ID Table
105 * Used by probe to select devices to load on
106 * Last field stores an index into e1000_strings
107 * Last entry must be all 0s
109 * { Vendor ID, Device ID, SubVendor ID, SubDevice ID, String Index }
110 *********************************************************************/
112 static em_vendor_info_t em_vendor_info_array[] =
114 /* Intel(R) PRO/1000 Network Connection */
115 { 0x8086, E1000_DEV_ID_82571EB_COPPER, PCI_ANY_ID, PCI_ANY_ID, 0},
116 { 0x8086, E1000_DEV_ID_82571EB_FIBER, PCI_ANY_ID, PCI_ANY_ID, 0},
117 { 0x8086, E1000_DEV_ID_82571EB_SERDES, PCI_ANY_ID, PCI_ANY_ID, 0},
118 { 0x8086, E1000_DEV_ID_82571EB_SERDES_DUAL,
119 PCI_ANY_ID, PCI_ANY_ID, 0},
120 { 0x8086, E1000_DEV_ID_82571EB_SERDES_QUAD,
121 PCI_ANY_ID, PCI_ANY_ID, 0},
122 { 0x8086, E1000_DEV_ID_82571EB_QUAD_COPPER,
123 PCI_ANY_ID, PCI_ANY_ID, 0},
124 { 0x8086, E1000_DEV_ID_82571EB_QUAD_COPPER_LP,
125 PCI_ANY_ID, PCI_ANY_ID, 0},
126 { 0x8086, E1000_DEV_ID_82571EB_QUAD_FIBER,
127 PCI_ANY_ID, PCI_ANY_ID, 0},
128 { 0x8086, E1000_DEV_ID_82571PT_QUAD_COPPER,
129 PCI_ANY_ID, PCI_ANY_ID, 0},
130 { 0x8086, E1000_DEV_ID_82572EI_COPPER, PCI_ANY_ID, PCI_ANY_ID, 0},
131 { 0x8086, E1000_DEV_ID_82572EI_FIBER, PCI_ANY_ID, PCI_ANY_ID, 0},
132 { 0x8086, E1000_DEV_ID_82572EI_SERDES, PCI_ANY_ID, PCI_ANY_ID, 0},
133 { 0x8086, E1000_DEV_ID_82572EI, PCI_ANY_ID, PCI_ANY_ID, 0},
135 { 0x8086, E1000_DEV_ID_82573E, PCI_ANY_ID, PCI_ANY_ID, 0},
136 { 0x8086, E1000_DEV_ID_82573E_IAMT, PCI_ANY_ID, PCI_ANY_ID, 0},
137 { 0x8086, E1000_DEV_ID_82573L, PCI_ANY_ID, PCI_ANY_ID, 0},
138 { 0x8086, E1000_DEV_ID_82583V, PCI_ANY_ID, PCI_ANY_ID, 0},
139 { 0x8086, E1000_DEV_ID_80003ES2LAN_COPPER_SPT,
140 PCI_ANY_ID, PCI_ANY_ID, 0},
141 { 0x8086, E1000_DEV_ID_80003ES2LAN_SERDES_SPT,
142 PCI_ANY_ID, PCI_ANY_ID, 0},
143 { 0x8086, E1000_DEV_ID_80003ES2LAN_COPPER_DPT,
144 PCI_ANY_ID, PCI_ANY_ID, 0},
145 { 0x8086, E1000_DEV_ID_80003ES2LAN_SERDES_DPT,
146 PCI_ANY_ID, PCI_ANY_ID, 0},
147 { 0x8086, E1000_DEV_ID_ICH8_IGP_M_AMT, PCI_ANY_ID, PCI_ANY_ID, 0},
148 { 0x8086, E1000_DEV_ID_ICH8_IGP_AMT, PCI_ANY_ID, PCI_ANY_ID, 0},
149 { 0x8086, E1000_DEV_ID_ICH8_IGP_C, PCI_ANY_ID, PCI_ANY_ID, 0},
150 { 0x8086, E1000_DEV_ID_ICH8_IFE, PCI_ANY_ID, PCI_ANY_ID, 0},
151 { 0x8086, E1000_DEV_ID_ICH8_IFE_GT, PCI_ANY_ID, PCI_ANY_ID, 0},
152 { 0x8086, E1000_DEV_ID_ICH8_IFE_G, PCI_ANY_ID, PCI_ANY_ID, 0},
153 { 0x8086, E1000_DEV_ID_ICH8_IGP_M, PCI_ANY_ID, PCI_ANY_ID, 0},
154 { 0x8086, E1000_DEV_ID_ICH8_82567V_3, PCI_ANY_ID, PCI_ANY_ID, 0},
155 { 0x8086, E1000_DEV_ID_ICH9_IGP_M_AMT, PCI_ANY_ID, PCI_ANY_ID, 0},
156 { 0x8086, E1000_DEV_ID_ICH9_IGP_AMT, PCI_ANY_ID, PCI_ANY_ID, 0},
157 { 0x8086, E1000_DEV_ID_ICH9_IGP_C, PCI_ANY_ID, PCI_ANY_ID, 0},
158 { 0x8086, E1000_DEV_ID_ICH9_IGP_M, PCI_ANY_ID, PCI_ANY_ID, 0},
159 { 0x8086, E1000_DEV_ID_ICH9_IGP_M_V, PCI_ANY_ID, PCI_ANY_ID, 0},
160 { 0x8086, E1000_DEV_ID_ICH9_IFE, PCI_ANY_ID, PCI_ANY_ID, 0},
161 { 0x8086, E1000_DEV_ID_ICH9_IFE_GT, PCI_ANY_ID, PCI_ANY_ID, 0},
162 { 0x8086, E1000_DEV_ID_ICH9_IFE_G, PCI_ANY_ID, PCI_ANY_ID, 0},
163 { 0x8086, E1000_DEV_ID_ICH9_BM, PCI_ANY_ID, PCI_ANY_ID, 0},
164 { 0x8086, E1000_DEV_ID_82574L, PCI_ANY_ID, PCI_ANY_ID, 0},
165 { 0x8086, E1000_DEV_ID_82574LA, PCI_ANY_ID, PCI_ANY_ID, 0},
166 { 0x8086, E1000_DEV_ID_ICH10_R_BM_LM, PCI_ANY_ID, PCI_ANY_ID, 0},
167 { 0x8086, E1000_DEV_ID_ICH10_R_BM_LF, PCI_ANY_ID, PCI_ANY_ID, 0},
168 { 0x8086, E1000_DEV_ID_ICH10_R_BM_V, PCI_ANY_ID, PCI_ANY_ID, 0},
169 { 0x8086, E1000_DEV_ID_ICH10_D_BM_LM, PCI_ANY_ID, PCI_ANY_ID, 0},
170 { 0x8086, E1000_DEV_ID_ICH10_D_BM_LF, PCI_ANY_ID, PCI_ANY_ID, 0},
171 { 0x8086, E1000_DEV_ID_ICH10_D_BM_V, PCI_ANY_ID, PCI_ANY_ID, 0},
172 { 0x8086, E1000_DEV_ID_PCH_M_HV_LM, PCI_ANY_ID, PCI_ANY_ID, 0},
173 { 0x8086, E1000_DEV_ID_PCH_M_HV_LC, PCI_ANY_ID, PCI_ANY_ID, 0},
174 { 0x8086, E1000_DEV_ID_PCH_D_HV_DM, PCI_ANY_ID, PCI_ANY_ID, 0},
175 { 0x8086, E1000_DEV_ID_PCH_D_HV_DC, PCI_ANY_ID, PCI_ANY_ID, 0},
176 { 0x8086, E1000_DEV_ID_PCH2_LV_LM, PCI_ANY_ID, PCI_ANY_ID, 0},
177 { 0x8086, E1000_DEV_ID_PCH2_LV_V, PCI_ANY_ID, PCI_ANY_ID, 0},
178 { 0x8086, E1000_DEV_ID_PCH_LPT_I217_LM, PCI_ANY_ID, PCI_ANY_ID, 0},
179 { 0x8086, E1000_DEV_ID_PCH_LPT_I217_V, PCI_ANY_ID, PCI_ANY_ID, 0},
180 { 0x8086, E1000_DEV_ID_PCH_LPTLP_I218_LM,
181 PCI_ANY_ID, PCI_ANY_ID, 0},
182 { 0x8086, E1000_DEV_ID_PCH_LPTLP_I218_V,
183 PCI_ANY_ID, PCI_ANY_ID, 0},
184 { 0x8086, E1000_DEV_ID_PCH_I218_LM2, PCI_ANY_ID, PCI_ANY_ID, 0},
185 { 0x8086, E1000_DEV_ID_PCH_I218_V2, PCI_ANY_ID, PCI_ANY_ID, 0},
186 { 0x8086, E1000_DEV_ID_PCH_I218_LM3, PCI_ANY_ID, PCI_ANY_ID, 0},
187 { 0x8086, E1000_DEV_ID_PCH_I218_V3, PCI_ANY_ID, PCI_ANY_ID, 0},
188 { 0x8086, E1000_DEV_ID_PCH_SPT_I219_LM, PCI_ANY_ID, PCI_ANY_ID, 0},
189 { 0x8086, E1000_DEV_ID_PCH_SPT_I219_V, PCI_ANY_ID, PCI_ANY_ID, 0},
190 { 0x8086, E1000_DEV_ID_PCH_SPT_I219_LM2,
191 PCI_ANY_ID, PCI_ANY_ID, 0},
192 { 0x8086, E1000_DEV_ID_PCH_SPT_I219_V2, PCI_ANY_ID, PCI_ANY_ID, 0},
193 { 0x8086, E1000_DEV_ID_PCH_LBG_I219_LM3,
194 PCI_ANY_ID, PCI_ANY_ID, 0},
195 { 0x8086, E1000_DEV_ID_PCH_SPT_I219_LM4,
196 PCI_ANY_ID, PCI_ANY_ID, 0},
197 { 0x8086, E1000_DEV_ID_PCH_SPT_I219_V4, PCI_ANY_ID, PCI_ANY_ID, 0},
198 { 0x8086, E1000_DEV_ID_PCH_SPT_I219_LM5,
199 PCI_ANY_ID, PCI_ANY_ID, 0},
200 { 0x8086, E1000_DEV_ID_PCH_SPT_I219_V5, PCI_ANY_ID, PCI_ANY_ID, 0},
201 { 0x8086, E1000_DEV_ID_PCH_SPT_I219_LM4,
202 PCI_ANY_ID, PCI_ANY_ID, 0},
203 { 0x8086, E1000_DEV_ID_PCH_SPT_I219_V4, PCI_ANY_ID, PCI_ANY_ID, 0},
204 { 0x8086, E1000_DEV_ID_PCH_SPT_I219_LM5,
205 PCI_ANY_ID, PCI_ANY_ID, 0},
206 { 0x8086, E1000_DEV_ID_PCH_SPT_I219_V5, PCI_ANY_ID, PCI_ANY_ID, 0},
207 /* required last entry */
211 /*********************************************************************
212 * Table of branding strings for all supported NICs.
213 *********************************************************************/
215 static char *em_strings[] = {
216 "Intel(R) PRO/1000 Network Connection"
219 /*********************************************************************
220 * Function prototypes
221 *********************************************************************/
222 static int em_probe(device_t);
223 static int em_attach(device_t);
224 static int em_detach(device_t);
225 static int em_shutdown(device_t);
226 static int em_suspend(device_t);
227 static int em_resume(device_t);
229 static int em_mq_start(struct ifnet *, struct mbuf *);
230 static int em_mq_start_locked(struct ifnet *,
232 static void em_qflush(struct ifnet *);
234 static void em_start(struct ifnet *);
235 static void em_start_locked(struct ifnet *, struct tx_ring *);
237 static int em_ioctl(struct ifnet *, u_long, caddr_t);
238 static void em_init(void *);
239 static void em_init_locked(struct adapter *);
240 static void em_stop(void *);
241 static void em_media_status(struct ifnet *, struct ifmediareq *);
242 static int em_media_change(struct ifnet *);
243 static void em_identify_hardware(struct adapter *);
244 static int em_allocate_pci_resources(struct adapter *);
245 static int em_allocate_legacy(struct adapter *);
246 static int em_allocate_msix(struct adapter *);
247 static int em_allocate_queues(struct adapter *);
248 static int em_setup_msix(struct adapter *);
249 static void em_free_pci_resources(struct adapter *);
250 static void em_local_timer(void *);
251 static void em_reset(struct adapter *);
252 static int em_setup_interface(device_t, struct adapter *);
253 static void em_flush_desc_rings(struct adapter *);
255 static void em_setup_transmit_structures(struct adapter *);
256 static void em_initialize_transmit_unit(struct adapter *);
257 static int em_allocate_transmit_buffers(struct tx_ring *);
258 static void em_free_transmit_structures(struct adapter *);
259 static void em_free_transmit_buffers(struct tx_ring *);
261 static int em_setup_receive_structures(struct adapter *);
262 static int em_allocate_receive_buffers(struct rx_ring *);
263 static void em_initialize_receive_unit(struct adapter *);
264 static void em_free_receive_structures(struct adapter *);
265 static void em_free_receive_buffers(struct rx_ring *);
267 static void em_enable_intr(struct adapter *);
268 static void em_disable_intr(struct adapter *);
269 static void em_update_stats_counters(struct adapter *);
270 static void em_add_hw_stats(struct adapter *adapter);
271 static void em_txeof(struct tx_ring *);
272 static bool em_rxeof(struct rx_ring *, int, int *);
273 #ifndef __NO_STRICT_ALIGNMENT
274 static int em_fixup_rx(struct rx_ring *);
276 static void em_setup_rxdesc(union e1000_rx_desc_extended *,
277 const struct em_rxbuffer *rxbuf);
278 static void em_receive_checksum(uint32_t status, struct mbuf *);
279 static void em_transmit_checksum_setup(struct tx_ring *, struct mbuf *, int,
280 struct ip *, u32 *, u32 *);
281 static void em_tso_setup(struct tx_ring *, struct mbuf *, int, struct ip *,
282 struct tcphdr *, u32 *, u32 *);
283 static void em_set_promisc(struct adapter *);
284 static void em_disable_promisc(struct adapter *);
285 static void em_set_multi(struct adapter *);
286 static void em_update_link_status(struct adapter *);
287 static void em_refresh_mbufs(struct rx_ring *, int);
288 static void em_register_vlan(void *, struct ifnet *, u16);
289 static void em_unregister_vlan(void *, struct ifnet *, u16);
290 static void em_setup_vlan_hw_support(struct adapter *);
291 static int em_xmit(struct tx_ring *, struct mbuf **);
292 static int em_dma_malloc(struct adapter *, bus_size_t,
293 struct em_dma_alloc *, int);
294 static void em_dma_free(struct adapter *, struct em_dma_alloc *);
295 static int em_sysctl_nvm_info(SYSCTL_HANDLER_ARGS);
296 static void em_print_nvm_info(struct adapter *);
297 static int em_sysctl_debug_info(SYSCTL_HANDLER_ARGS);
298 static void em_print_debug_info(struct adapter *);
299 static int em_is_valid_ether_addr(u8 *);
300 static int em_sysctl_int_delay(SYSCTL_HANDLER_ARGS);
301 static void em_add_int_delay_sysctl(struct adapter *, const char *,
302 const char *, struct em_int_delay_info *, int, int);
303 /* Management and WOL Support */
304 static void em_init_manageability(struct adapter *);
305 static void em_release_manageability(struct adapter *);
306 static void em_get_hw_control(struct adapter *);
307 static void em_release_hw_control(struct adapter *);
308 static void em_get_wakeup(device_t);
309 static void em_enable_wakeup(device_t);
310 static int em_enable_phy_wakeup(struct adapter *);
311 static void em_led_func(void *, int);
312 static void em_disable_aspm(struct adapter *);
314 static int em_irq_fast(void *);
317 static void em_msix_tx(void *);
318 static void em_msix_rx(void *);
319 static void em_msix_link(void *);
320 static void em_handle_tx(void *context, int pending);
321 static void em_handle_rx(void *context, int pending);
322 static void em_handle_link(void *context, int pending);
325 static void em_enable_vectors_82574(struct adapter *);
328 static void em_set_sysctl_value(struct adapter *, const char *,
329 const char *, int *, int);
330 static int em_set_flowcntl(SYSCTL_HANDLER_ARGS);
331 static int em_sysctl_eee(SYSCTL_HANDLER_ARGS);
333 static __inline void em_rx_discard(struct rx_ring *, int);
335 #ifdef DEVICE_POLLING
336 static poll_handler_t em_poll;
339 /*********************************************************************
340 * FreeBSD Device Interface Entry Points
341 *********************************************************************/
343 static device_method_t em_methods[] = {
344 /* Device interface */
345 DEVMETHOD(device_probe, em_probe),
346 DEVMETHOD(device_attach, em_attach),
347 DEVMETHOD(device_detach, em_detach),
348 DEVMETHOD(device_shutdown, em_shutdown),
349 DEVMETHOD(device_suspend, em_suspend),
350 DEVMETHOD(device_resume, em_resume),
354 static driver_t em_driver = {
355 "em", em_methods, sizeof(struct adapter),
358 devclass_t em_devclass;
359 DRIVER_MODULE(em, pci, em_driver, em_devclass, 0, 0);
360 MODULE_DEPEND(em, pci, 1, 1, 1);
361 MODULE_DEPEND(em, ether, 1, 1, 1);
363 /*********************************************************************
364 * Tunable default values.
365 *********************************************************************/
367 #define EM_TICKS_TO_USECS(ticks) ((1024 * (ticks) + 500) / 1000)
368 #define EM_USECS_TO_TICKS(usecs) ((1000 * (usecs) + 512) / 1024)
371 #define MAX_INTS_PER_SEC 8000
372 #define DEFAULT_ITR (1000000000/(MAX_INTS_PER_SEC * 256))
374 #define TSO_WORKAROUND 4
376 static SYSCTL_NODE(_hw, OID_AUTO, em, CTLFLAG_RD, 0, "EM driver parameters");
378 static int em_disable_crc_stripping = 0;
379 SYSCTL_INT(_hw_em, OID_AUTO, disable_crc_stripping, CTLFLAG_RDTUN,
380 &em_disable_crc_stripping, 0, "Disable CRC Stripping");
382 static int em_tx_int_delay_dflt = EM_TICKS_TO_USECS(EM_TIDV);
383 static int em_rx_int_delay_dflt = EM_TICKS_TO_USECS(EM_RDTR);
384 TUNABLE_INT("hw.em.tx_int_delay", &em_tx_int_delay_dflt);
385 TUNABLE_INT("hw.em.rx_int_delay", &em_rx_int_delay_dflt);
386 SYSCTL_INT(_hw_em, OID_AUTO, tx_int_delay, CTLFLAG_RDTUN, &em_tx_int_delay_dflt,
387 0, "Default transmit interrupt delay in usecs");
388 SYSCTL_INT(_hw_em, OID_AUTO, rx_int_delay, CTLFLAG_RDTUN, &em_rx_int_delay_dflt,
389 0, "Default receive interrupt delay in usecs");
391 static int em_tx_abs_int_delay_dflt = EM_TICKS_TO_USECS(EM_TADV);
392 static int em_rx_abs_int_delay_dflt = EM_TICKS_TO_USECS(EM_RADV);
393 TUNABLE_INT("hw.em.tx_abs_int_delay", &em_tx_abs_int_delay_dflt);
394 TUNABLE_INT("hw.em.rx_abs_int_delay", &em_rx_abs_int_delay_dflt);
395 SYSCTL_INT(_hw_em, OID_AUTO, tx_abs_int_delay, CTLFLAG_RDTUN,
396 &em_tx_abs_int_delay_dflt, 0,
397 "Default transmit interrupt delay limit in usecs");
398 SYSCTL_INT(_hw_em, OID_AUTO, rx_abs_int_delay, CTLFLAG_RDTUN,
399 &em_rx_abs_int_delay_dflt, 0,
400 "Default receive interrupt delay limit in usecs");
402 static int em_rxd = EM_DEFAULT_RXD;
403 static int em_txd = EM_DEFAULT_TXD;
404 TUNABLE_INT("hw.em.rxd", &em_rxd);
405 TUNABLE_INT("hw.em.txd", &em_txd);
406 SYSCTL_INT(_hw_em, OID_AUTO, rxd, CTLFLAG_RDTUN, &em_rxd, 0,
407 "Number of receive descriptors per queue");
408 SYSCTL_INT(_hw_em, OID_AUTO, txd, CTLFLAG_RDTUN, &em_txd, 0,
409 "Number of transmit descriptors per queue");
411 static int em_smart_pwr_down = FALSE;
412 TUNABLE_INT("hw.em.smart_pwr_down", &em_smart_pwr_down);
413 SYSCTL_INT(_hw_em, OID_AUTO, smart_pwr_down, CTLFLAG_RDTUN, &em_smart_pwr_down,
414 0, "Set to true to leave smart power down enabled on newer adapters");
416 /* Controls whether promiscuous also shows bad packets */
417 static int em_debug_sbp = FALSE;
418 TUNABLE_INT("hw.em.sbp", &em_debug_sbp);
419 SYSCTL_INT(_hw_em, OID_AUTO, sbp, CTLFLAG_RDTUN, &em_debug_sbp, 0,
420 "Show bad packets in promiscuous mode");
422 static int em_enable_msix = TRUE;
423 TUNABLE_INT("hw.em.enable_msix", &em_enable_msix);
424 SYSCTL_INT(_hw_em, OID_AUTO, enable_msix, CTLFLAG_RDTUN, &em_enable_msix, 0,
425 "Enable MSI-X interrupts");
428 static int em_num_queues = 1;
429 TUNABLE_INT("hw.em.num_queues", &em_num_queues);
430 SYSCTL_INT(_hw_em, OID_AUTO, num_queues, CTLFLAG_RDTUN, &em_num_queues, 0,
431 "82574 only: Number of queues to configure, 0 indicates autoconfigure");
435 ** Global variable to store last used CPU when binding queues
436 ** to CPUs in igb_allocate_msix. Starts at CPU_FIRST and increments when a
437 ** queue is bound to a cpu.
439 static int em_last_bind_cpu = -1;
441 /* How many packets rxeof tries to clean at a time */
442 static int em_rx_process_limit = 100;
443 TUNABLE_INT("hw.em.rx_process_limit", &em_rx_process_limit);
444 SYSCTL_INT(_hw_em, OID_AUTO, rx_process_limit, CTLFLAG_RDTUN,
445 &em_rx_process_limit, 0,
446 "Maximum number of received packets to process "
447 "at a time, -1 means unlimited");
449 /* Energy efficient ethernet - default to OFF */
450 static int eee_setting = 1;
451 TUNABLE_INT("hw.em.eee_setting", &eee_setting);
452 SYSCTL_INT(_hw_em, OID_AUTO, eee_setting, CTLFLAG_RDTUN, &eee_setting, 0,
453 "Enable Energy Efficient Ethernet");
455 /* Global used in WOL setup with multiport cards */
456 static int global_quad_port_a = 0;
458 #ifdef DEV_NETMAP /* see ixgbe.c for details */
459 #include <dev/netmap/if_em_netmap.h>
460 #endif /* DEV_NETMAP */
462 /*********************************************************************
463 * Device identification routine
465 * em_probe determines if the driver should be loaded on
466 * adapter based on PCI vendor/device id of the adapter.
468 * return BUS_PROBE_DEFAULT on success, positive on failure
469 *********************************************************************/
472 em_probe(device_t dev)
474 char adapter_name[60];
475 uint16_t pci_vendor_id = 0;
476 uint16_t pci_device_id = 0;
477 uint16_t pci_subvendor_id = 0;
478 uint16_t pci_subdevice_id = 0;
479 em_vendor_info_t *ent;
481 INIT_DEBUGOUT("em_probe: begin");
483 pci_vendor_id = pci_get_vendor(dev);
484 if (pci_vendor_id != EM_VENDOR_ID)
487 pci_device_id = pci_get_device(dev);
488 pci_subvendor_id = pci_get_subvendor(dev);
489 pci_subdevice_id = pci_get_subdevice(dev);
491 ent = em_vendor_info_array;
492 while (ent->vendor_id != 0) {
493 if ((pci_vendor_id == ent->vendor_id) &&
494 (pci_device_id == ent->device_id) &&
496 ((pci_subvendor_id == ent->subvendor_id) ||
497 (ent->subvendor_id == PCI_ANY_ID)) &&
499 ((pci_subdevice_id == ent->subdevice_id) ||
500 (ent->subdevice_id == PCI_ANY_ID))) {
501 sprintf(adapter_name, "%s %s",
502 em_strings[ent->index],
504 device_set_desc_copy(dev, adapter_name);
505 return (BUS_PROBE_DEFAULT);
513 /*********************************************************************
514 * Device initialization routine
516 * The attach entry point is called when the driver is being loaded.
517 * This routine identifies the type of hardware, allocates all resources
518 * and initializes the hardware.
520 * return 0 on success, positive on failure
521 *********************************************************************/
524 em_attach(device_t dev)
526 struct adapter *adapter;
530 INIT_DEBUGOUT("em_attach: begin");
532 if (resource_disabled("em", device_get_unit(dev))) {
533 device_printf(dev, "Disabled by device hint\n");
537 adapter = device_get_softc(dev);
538 adapter->dev = adapter->osdep.dev = dev;
540 EM_CORE_LOCK_INIT(adapter, device_get_nameunit(dev));
543 SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
544 SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
545 OID_AUTO, "nvm", CTLTYPE_INT|CTLFLAG_RW, adapter, 0,
546 em_sysctl_nvm_info, "I", "NVM Information");
548 SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
549 SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
550 OID_AUTO, "debug", CTLTYPE_INT|CTLFLAG_RW, adapter, 0,
551 em_sysctl_debug_info, "I", "Debug Information");
553 SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
554 SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
555 OID_AUTO, "fc", CTLTYPE_INT|CTLFLAG_RW, adapter, 0,
556 em_set_flowcntl, "I", "Flow Control");
558 callout_init_mtx(&adapter->timer, &adapter->core_mtx, 0);
560 /* Determine hardware and mac info */
561 em_identify_hardware(adapter);
563 /* Setup PCI resources */
564 if (em_allocate_pci_resources(adapter)) {
565 device_printf(dev, "Allocation of PCI resources failed\n");
571 ** For ICH8 and family we need to
572 ** map the flash memory, and this
573 ** must happen after the MAC is
576 if ((hw->mac.type == e1000_ich8lan) ||
577 (hw->mac.type == e1000_ich9lan) ||
578 (hw->mac.type == e1000_ich10lan) ||
579 (hw->mac.type == e1000_pchlan) ||
580 (hw->mac.type == e1000_pch2lan) ||
581 (hw->mac.type == e1000_pch_lpt)) {
582 int rid = EM_BAR_TYPE_FLASH;
583 adapter->flash = bus_alloc_resource_any(dev,
584 SYS_RES_MEMORY, &rid, RF_ACTIVE);
585 if (adapter->flash == NULL) {
586 device_printf(dev, "Mapping of Flash failed\n");
590 /* This is used in the shared code */
591 hw->flash_address = (u8 *)adapter->flash;
592 adapter->osdep.flash_bus_space_tag =
593 rman_get_bustag(adapter->flash);
594 adapter->osdep.flash_bus_space_handle =
595 rman_get_bushandle(adapter->flash);
598 ** In the new SPT device flash is not a
599 ** seperate BAR, rather it is also in BAR0,
600 ** so use the same tag and an offset handle for the
601 ** FLASH read/write macros in the shared code.
603 else if (hw->mac.type == e1000_pch_spt) {
604 adapter->osdep.flash_bus_space_tag =
605 adapter->osdep.mem_bus_space_tag;
606 adapter->osdep.flash_bus_space_handle =
607 adapter->osdep.mem_bus_space_handle
608 + E1000_FLASH_BASE_ADDR;
611 /* Do Shared Code initialization */
612 error = e1000_setup_init_funcs(hw, TRUE);
614 device_printf(dev, "Setup of Shared code failed, error %d\n",
621 * Setup MSI/X or MSI if PCI Express
623 adapter->msix = em_setup_msix(adapter);
625 e1000_get_bus_info(hw);
627 /* Set up some sysctls for the tunable interrupt delays */
628 em_add_int_delay_sysctl(adapter, "rx_int_delay",
629 "receive interrupt delay in usecs", &adapter->rx_int_delay,
630 E1000_REGISTER(hw, E1000_RDTR), em_rx_int_delay_dflt);
631 em_add_int_delay_sysctl(adapter, "tx_int_delay",
632 "transmit interrupt delay in usecs", &adapter->tx_int_delay,
633 E1000_REGISTER(hw, E1000_TIDV), em_tx_int_delay_dflt);
634 em_add_int_delay_sysctl(adapter, "rx_abs_int_delay",
635 "receive interrupt delay limit in usecs",
636 &adapter->rx_abs_int_delay,
637 E1000_REGISTER(hw, E1000_RADV),
638 em_rx_abs_int_delay_dflt);
639 em_add_int_delay_sysctl(adapter, "tx_abs_int_delay",
640 "transmit interrupt delay limit in usecs",
641 &adapter->tx_abs_int_delay,
642 E1000_REGISTER(hw, E1000_TADV),
643 em_tx_abs_int_delay_dflt);
644 em_add_int_delay_sysctl(adapter, "itr",
645 "interrupt delay limit in usecs/4",
647 E1000_REGISTER(hw, E1000_ITR),
650 /* Sysctl for limiting the amount of work done in the taskqueue */
651 em_set_sysctl_value(adapter, "rx_processing_limit",
652 "max number of rx packets to process", &adapter->rx_process_limit,
653 em_rx_process_limit);
656 * Validate number of transmit and receive descriptors. It
657 * must not exceed hardware maximum, and must be multiple
658 * of E1000_DBA_ALIGN.
660 if (((em_txd * sizeof(struct e1000_tx_desc)) % EM_DBA_ALIGN) != 0 ||
661 (em_txd > EM_MAX_TXD) || (em_txd < EM_MIN_TXD)) {
662 device_printf(dev, "Using %d TX descriptors instead of %d!\n",
663 EM_DEFAULT_TXD, em_txd);
664 adapter->num_tx_desc = EM_DEFAULT_TXD;
666 adapter->num_tx_desc = em_txd;
668 if (((em_rxd * sizeof(union e1000_rx_desc_extended)) % EM_DBA_ALIGN) != 0 ||
669 (em_rxd > EM_MAX_RXD) || (em_rxd < EM_MIN_RXD)) {
670 device_printf(dev, "Using %d RX descriptors instead of %d!\n",
671 EM_DEFAULT_RXD, em_rxd);
672 adapter->num_rx_desc = EM_DEFAULT_RXD;
674 adapter->num_rx_desc = em_rxd;
676 hw->mac.autoneg = DO_AUTO_NEG;
677 hw->phy.autoneg_wait_to_complete = FALSE;
678 hw->phy.autoneg_advertised = AUTONEG_ADV_DEFAULT;
681 if (hw->phy.media_type == e1000_media_type_copper) {
682 hw->phy.mdix = AUTO_ALL_MODES;
683 hw->phy.disable_polarity_correction = FALSE;
684 hw->phy.ms_type = EM_MASTER_SLAVE;
688 * Set the frame limits assuming
689 * standard ethernet sized frames.
691 adapter->hw.mac.max_frame_size =
692 ETHERMTU + ETHER_HDR_LEN + ETHERNET_FCS_SIZE;
695 * This controls when hardware reports transmit completion
698 hw->mac.report_tx_early = 1;
701 ** Get queue/ring memory
703 if (em_allocate_queues(adapter)) {
708 /* Allocate multicast array memory. */
709 adapter->mta = malloc(sizeof(u8) * ETH_ADDR_LEN *
710 MAX_NUM_MULTICAST_ADDRESSES, M_DEVBUF, M_NOWAIT);
711 if (adapter->mta == NULL) {
712 device_printf(dev, "Can not allocate multicast setup array\n");
717 /* Check SOL/IDER usage */
718 if (e1000_check_reset_block(hw))
719 device_printf(dev, "PHY reset is blocked"
720 " due to SOL/IDER session.\n");
722 /* Sysctl for setting Energy Efficient Ethernet */
723 hw->dev_spec.ich8lan.eee_disable = eee_setting;
724 SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
725 SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
726 OID_AUTO, "eee_control", CTLTYPE_INT|CTLFLAG_RW,
727 adapter, 0, em_sysctl_eee, "I",
728 "Disable Energy Efficient Ethernet");
731 ** Start from a known state, this is
732 ** important in reading the nvm and
738 /* Make sure we have a good EEPROM before we read from it */
739 if (e1000_validate_nvm_checksum(hw) < 0) {
741 ** Some PCI-E parts fail the first check due to
742 ** the link being in sleep state, call it again,
743 ** if it fails a second time its a real issue.
745 if (e1000_validate_nvm_checksum(hw) < 0) {
747 "The EEPROM Checksum Is Not Valid\n");
753 /* Copy the permanent MAC address out of the EEPROM */
754 if (e1000_read_mac_addr(hw) < 0) {
755 device_printf(dev, "EEPROM read error while reading MAC"
761 if (!em_is_valid_ether_addr(hw->mac.addr)) {
762 device_printf(dev, "Invalid MAC address\n");
767 /* Disable ULP support */
768 e1000_disable_ulp_lpt_lp(hw, TRUE);
771 ** Do interrupt configuration
773 if (adapter->msix > 1) /* Do MSIX */
774 error = em_allocate_msix(adapter);
775 else /* MSI or Legacy */
776 error = em_allocate_legacy(adapter);
781 * Get Wake-on-Lan and Management info for later use
785 /* Setup OS specific network interface */
786 if (em_setup_interface(dev, adapter) != 0)
791 /* Initialize statistics */
792 em_update_stats_counters(adapter);
794 hw->mac.get_link_status = 1;
795 em_update_link_status(adapter);
797 /* Register for VLAN events */
798 adapter->vlan_attach = EVENTHANDLER_REGISTER(vlan_config,
799 em_register_vlan, adapter, EVENTHANDLER_PRI_FIRST);
800 adapter->vlan_detach = EVENTHANDLER_REGISTER(vlan_unconfig,
801 em_unregister_vlan, adapter, EVENTHANDLER_PRI_FIRST);
803 em_add_hw_stats(adapter);
805 /* Non-AMT based hardware can now take control from firmware */
806 if (adapter->has_manage && !adapter->has_amt)
807 em_get_hw_control(adapter);
809 /* Tell the stack that the interface is not active */
810 adapter->ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
811 adapter->ifp->if_drv_flags |= IFF_DRV_OACTIVE;
813 adapter->led_dev = led_create(em_led_func, adapter,
814 device_get_nameunit(dev));
816 em_netmap_attach(adapter);
817 #endif /* DEV_NETMAP */
819 INIT_DEBUGOUT("em_attach: end");
824 em_free_transmit_structures(adapter);
825 em_free_receive_structures(adapter);
826 em_release_hw_control(adapter);
827 if (adapter->ifp != NULL)
828 if_free(adapter->ifp);
830 em_free_pci_resources(adapter);
831 free(adapter->mta, M_DEVBUF);
832 EM_CORE_LOCK_DESTROY(adapter);
837 /*********************************************************************
838 * Device removal routine
840 * The detach entry point is called when the driver is being removed.
841 * This routine stops the adapter and deallocates all the resources
842 * that were allocated for driver operation.
844 * return 0 on success, positive on failure
845 *********************************************************************/
848 em_detach(device_t dev)
850 struct adapter *adapter = device_get_softc(dev);
851 struct ifnet *ifp = adapter->ifp;
853 INIT_DEBUGOUT("em_detach: begin");
855 /* Make sure VLANS are not using driver */
856 if (adapter->ifp->if_vlantrunk != NULL) {
857 device_printf(dev,"Vlan in use, detach first\n");
861 #ifdef DEVICE_POLLING
862 if (ifp->if_capenable & IFCAP_POLLING)
863 ether_poll_deregister(ifp);
866 if (adapter->led_dev != NULL)
867 led_destroy(adapter->led_dev);
869 EM_CORE_LOCK(adapter);
870 adapter->in_detach = 1;
872 EM_CORE_UNLOCK(adapter);
873 EM_CORE_LOCK_DESTROY(adapter);
875 e1000_phy_hw_reset(&adapter->hw);
877 em_release_manageability(adapter);
878 em_release_hw_control(adapter);
880 /* Unregister VLAN events */
881 if (adapter->vlan_attach != NULL)
882 EVENTHANDLER_DEREGISTER(vlan_config, adapter->vlan_attach);
883 if (adapter->vlan_detach != NULL)
884 EVENTHANDLER_DEREGISTER(vlan_unconfig, adapter->vlan_detach);
886 ether_ifdetach(adapter->ifp);
887 callout_drain(&adapter->timer);
891 #endif /* DEV_NETMAP */
893 em_free_pci_resources(adapter);
894 bus_generic_detach(dev);
897 em_free_transmit_structures(adapter);
898 em_free_receive_structures(adapter);
900 em_release_hw_control(adapter);
901 free(adapter->mta, M_DEVBUF);
906 /*********************************************************************
908 * Shutdown entry point
910 **********************************************************************/
913 em_shutdown(device_t dev)
915 return em_suspend(dev);
919 * Suspend/resume device methods.
922 em_suspend(device_t dev)
924 struct adapter *adapter = device_get_softc(dev);
926 EM_CORE_LOCK(adapter);
928 em_release_manageability(adapter);
929 em_release_hw_control(adapter);
930 em_enable_wakeup(dev);
932 EM_CORE_UNLOCK(adapter);
934 return bus_generic_suspend(dev);
938 em_resume(device_t dev)
940 struct adapter *adapter = device_get_softc(dev);
941 struct tx_ring *txr = adapter->tx_rings;
942 struct ifnet *ifp = adapter->ifp;
944 EM_CORE_LOCK(adapter);
945 if (adapter->hw.mac.type == e1000_pch2lan)
946 e1000_resume_workarounds_pchlan(&adapter->hw);
947 em_init_locked(adapter);
948 em_init_manageability(adapter);
950 if ((ifp->if_flags & IFF_UP) &&
951 (ifp->if_drv_flags & IFF_DRV_RUNNING) && adapter->link_active) {
952 for (int i = 0; i < adapter->num_queues; i++, txr++) {
955 if (!drbr_empty(ifp, txr->br))
956 em_mq_start_locked(ifp, txr);
958 if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
959 em_start_locked(ifp, txr);
964 EM_CORE_UNLOCK(adapter);
966 return bus_generic_resume(dev);
970 #ifndef EM_MULTIQUEUE
972 em_start_locked(struct ifnet *ifp, struct tx_ring *txr)
974 struct adapter *adapter = ifp->if_softc;
977 EM_TX_LOCK_ASSERT(txr);
979 if ((ifp->if_drv_flags & (IFF_DRV_RUNNING|IFF_DRV_OACTIVE)) !=
983 if (!adapter->link_active)
986 while (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) {
987 /* Call cleanup if number of TX descriptors low */
988 if (txr->tx_avail <= EM_TX_CLEANUP_THRESHOLD)
990 if (txr->tx_avail < EM_MAX_SCATTER) {
991 ifp->if_drv_flags |= IFF_DRV_OACTIVE;
994 IFQ_DRV_DEQUEUE(&ifp->if_snd, m_head);
998 * Encapsulation can modify our pointer, and or make it
999 * NULL on failure. In that event, we can't requeue.
1001 if (em_xmit(txr, &m_head)) {
1004 IFQ_DRV_PREPEND(&ifp->if_snd, m_head);
1008 /* Mark the queue as having work */
1009 if (txr->busy == EM_TX_IDLE)
1010 txr->busy = EM_TX_BUSY;
1012 /* Send a copy of the frame to the BPF listener */
1013 ETHER_BPF_MTAP(ifp, m_head);
1021 em_start(struct ifnet *ifp)
1023 struct adapter *adapter = ifp->if_softc;
1024 struct tx_ring *txr = adapter->tx_rings;
1026 if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1028 em_start_locked(ifp, txr);
1033 #else /* EM_MULTIQUEUE */
1034 /*********************************************************************
1035 * Multiqueue Transmit routines
1037 * em_mq_start is called by the stack to initiate a transmit.
1038 * however, if busy the driver can queue the request rather
1039 * than do an immediate send. It is this that is an advantage
1040 * in this driver, rather than also having multiple tx queues.
1041 **********************************************************************/
1043 ** Multiqueue capable stack interface
1046 em_mq_start(struct ifnet *ifp, struct mbuf *m)
1048 struct adapter *adapter = ifp->if_softc;
1049 struct tx_ring *txr = adapter->tx_rings;
1050 unsigned int i, error;
1052 if (M_HASHTYPE_GET(m) != M_HASHTYPE_NONE)
1053 i = m->m_pkthdr.flowid % adapter->num_queues;
1055 i = curcpu % adapter->num_queues;
1057 txr = &adapter->tx_rings[i];
1059 error = drbr_enqueue(ifp, txr->br, m);
1063 if (EM_TX_TRYLOCK(txr)) {
1064 em_mq_start_locked(ifp, txr);
1067 taskqueue_enqueue(txr->tq, &txr->tx_task);
1073 em_mq_start_locked(struct ifnet *ifp, struct tx_ring *txr)
1075 struct adapter *adapter = txr->adapter;
1077 int err = 0, enq = 0;
1079 if ((ifp->if_drv_flags & (IFF_DRV_RUNNING | IFF_DRV_OACTIVE)) !=
1080 IFF_DRV_RUNNING || adapter->link_active == 0) {
1084 /* Process the queue */
1085 while ((next = drbr_peek(ifp, txr->br)) != NULL) {
1086 if ((err = em_xmit(txr, &next)) != 0) {
1088 /* It was freed, move forward */
1089 drbr_advance(ifp, txr->br);
1092 * Still have one left, it may not be
1093 * the same since the transmit function
1094 * may have changed it.
1096 drbr_putback(ifp, txr->br, next);
1100 drbr_advance(ifp, txr->br);
1102 ifp->if_obytes += next->m_pkthdr.len;
1103 if (next->m_flags & M_MCAST)
1105 ETHER_BPF_MTAP(ifp, next);
1106 if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
1110 /* Mark the queue as having work */
1111 if ((enq > 0) && (txr->busy == EM_TX_IDLE))
1112 txr->busy = EM_TX_BUSY;
1114 if (txr->tx_avail < EM_MAX_SCATTER)
1116 if (txr->tx_avail < EM_MAX_SCATTER) {
1117 ifp->if_drv_flags |= IFF_DRV_OACTIVE;
1123 ** Flush all ring buffers
1126 em_qflush(struct ifnet *ifp)
1128 struct adapter *adapter = ifp->if_softc;
1129 struct tx_ring *txr = adapter->tx_rings;
1132 for (int i = 0; i < adapter->num_queues; i++, txr++) {
1134 while ((m = buf_ring_dequeue_sc(txr->br)) != NULL)
1140 #endif /* EM_MULTIQUEUE */
1142 /*********************************************************************
1145 * em_ioctl is called when the user wants to configure the
1148 * return 0 on success, positive on failure
1149 **********************************************************************/
1152 em_ioctl(struct ifnet *ifp, u_long command, caddr_t data)
1154 struct adapter *adapter = ifp->if_softc;
1155 struct ifreq *ifr = (struct ifreq *)data;
1156 #if defined(INET) || defined(INET6)
1157 struct ifaddr *ifa = (struct ifaddr *)data;
1159 bool avoid_reset = FALSE;
1162 if (adapter->in_detach)
1168 if (ifa->ifa_addr->sa_family == AF_INET)
1172 if (ifa->ifa_addr->sa_family == AF_INET6)
1176 ** Calling init results in link renegotiation,
1177 ** so we avoid doing it when possible.
1180 ifp->if_flags |= IFF_UP;
1181 if (!(ifp->if_drv_flags & IFF_DRV_RUNNING))
1184 if (!(ifp->if_flags & IFF_NOARP))
1185 arp_ifinit(ifp, ifa);
1188 error = ether_ioctl(ifp, command, data);
1194 IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFMTU (Set Interface MTU)");
1196 EM_CORE_LOCK(adapter);
1197 switch (adapter->hw.mac.type) {
1201 case e1000_ich10lan:
1207 case e1000_80003es2lan: /* 9K Jumbo Frame size */
1208 max_frame_size = 9234;
1211 max_frame_size = 4096;
1213 /* Adapters that do not support jumbo frames */
1215 max_frame_size = ETHER_MAX_LEN;
1218 max_frame_size = MAX_JUMBO_FRAME_SIZE;
1220 if (ifr->ifr_mtu > max_frame_size - ETHER_HDR_LEN -
1222 EM_CORE_UNLOCK(adapter);
1227 ifp->if_mtu = ifr->ifr_mtu;
1228 adapter->hw.mac.max_frame_size =
1229 ifp->if_mtu + ETHER_HDR_LEN + ETHER_CRC_LEN;
1230 if (ifp->if_drv_flags & IFF_DRV_RUNNING)
1231 em_init_locked(adapter);
1232 EM_CORE_UNLOCK(adapter);
1236 IOCTL_DEBUGOUT("ioctl rcv'd:\
1237 SIOCSIFFLAGS (Set Interface Flags)");
1238 EM_CORE_LOCK(adapter);
1239 if (ifp->if_flags & IFF_UP) {
1240 if ((ifp->if_drv_flags & IFF_DRV_RUNNING)) {
1241 if ((ifp->if_flags ^ adapter->if_flags) &
1242 (IFF_PROMISC | IFF_ALLMULTI)) {
1243 em_disable_promisc(adapter);
1244 em_set_promisc(adapter);
1247 em_init_locked(adapter);
1249 if (ifp->if_drv_flags & IFF_DRV_RUNNING)
1251 adapter->if_flags = ifp->if_flags;
1252 EM_CORE_UNLOCK(adapter);
1256 IOCTL_DEBUGOUT("ioctl rcv'd: SIOC(ADD|DEL)MULTI");
1257 if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1258 EM_CORE_LOCK(adapter);
1259 em_disable_intr(adapter);
1260 em_set_multi(adapter);
1261 #ifdef DEVICE_POLLING
1262 if (!(ifp->if_capenable & IFCAP_POLLING))
1264 em_enable_intr(adapter);
1265 EM_CORE_UNLOCK(adapter);
1269 /* Check SOL/IDER usage */
1270 EM_CORE_LOCK(adapter);
1271 if (e1000_check_reset_block(&adapter->hw)) {
1272 EM_CORE_UNLOCK(adapter);
1273 device_printf(adapter->dev, "Media change is"
1274 " blocked due to SOL/IDER session.\n");
1277 EM_CORE_UNLOCK(adapter);
1280 IOCTL_DEBUGOUT("ioctl rcv'd: \
1281 SIOCxIFMEDIA (Get/Set Interface Media)");
1282 error = ifmedia_ioctl(ifp, ifr, &adapter->media, command);
1288 IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFCAP (Set Capabilities)");
1290 mask = ifr->ifr_reqcap ^ ifp->if_capenable;
1291 #ifdef DEVICE_POLLING
1292 if (mask & IFCAP_POLLING) {
1293 if (ifr->ifr_reqcap & IFCAP_POLLING) {
1294 error = ether_poll_register(em_poll, ifp);
1297 EM_CORE_LOCK(adapter);
1298 em_disable_intr(adapter);
1299 ifp->if_capenable |= IFCAP_POLLING;
1300 EM_CORE_UNLOCK(adapter);
1302 error = ether_poll_deregister(ifp);
1303 /* Enable interrupt even in error case */
1304 EM_CORE_LOCK(adapter);
1305 em_enable_intr(adapter);
1306 ifp->if_capenable &= ~IFCAP_POLLING;
1307 EM_CORE_UNLOCK(adapter);
1311 if (mask & IFCAP_HWCSUM) {
1312 ifp->if_capenable ^= IFCAP_HWCSUM;
1315 if (mask & IFCAP_TSO4) {
1316 ifp->if_capenable ^= IFCAP_TSO4;
1319 if (mask & IFCAP_VLAN_HWTAGGING) {
1320 ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING;
1323 if (mask & IFCAP_VLAN_HWFILTER) {
1324 ifp->if_capenable ^= IFCAP_VLAN_HWFILTER;
1327 if (mask & IFCAP_VLAN_HWTSO) {
1328 ifp->if_capenable ^= IFCAP_VLAN_HWTSO;
1331 if ((mask & IFCAP_WOL) &&
1332 (ifp->if_capabilities & IFCAP_WOL) != 0) {
1333 if (mask & IFCAP_WOL_MCAST)
1334 ifp->if_capenable ^= IFCAP_WOL_MCAST;
1335 if (mask & IFCAP_WOL_MAGIC)
1336 ifp->if_capenable ^= IFCAP_WOL_MAGIC;
1338 if (reinit && (ifp->if_drv_flags & IFF_DRV_RUNNING))
1340 VLAN_CAPABILITIES(ifp);
1345 error = ether_ioctl(ifp, command, data);
1353 /*********************************************************************
1356 * This routine is used in two ways. It is used by the stack as
1357 * init entry point in network interface structure. It is also used
1358 * by the driver as a hw/sw initialization routine to get to a
1361 * return 0 on success, positive on failure
1362 **********************************************************************/
1365 em_init_locked(struct adapter *adapter)
1367 struct ifnet *ifp = adapter->ifp;
1368 device_t dev = adapter->dev;
1370 INIT_DEBUGOUT("em_init: begin");
1372 EM_CORE_LOCK_ASSERT(adapter);
1374 em_disable_intr(adapter);
1375 callout_stop(&adapter->timer);
1377 /* Get the latest mac address, User can use a LAA */
1378 bcopy(IF_LLADDR(adapter->ifp), adapter->hw.mac.addr,
1381 /* Put the address into the Receive Address Array */
1382 e1000_rar_set(&adapter->hw, adapter->hw.mac.addr, 0);
1385 * With the 82571 adapter, RAR[0] may be overwritten
1386 * when the other port is reset, we make a duplicate
1387 * in RAR[14] for that eventuality, this assures
1388 * the interface continues to function.
1390 if (adapter->hw.mac.type == e1000_82571) {
1391 e1000_set_laa_state_82571(&adapter->hw, TRUE);
1392 e1000_rar_set(&adapter->hw, adapter->hw.mac.addr,
1393 E1000_RAR_ENTRIES - 1);
1396 /* Initialize the hardware */
1398 em_update_link_status(adapter);
1400 /* Setup VLAN support, basic and offload if available */
1401 E1000_WRITE_REG(&adapter->hw, E1000_VET, ETHERTYPE_VLAN);
1403 /* Set hardware offload abilities */
1404 if (ifp->if_capenable & IFCAP_TXCSUM)
1405 ifp->if_hwassist |= (CSUM_TCP | CSUM_UDP);
1407 ifp->if_hwassist &= ~(CSUM_TCP | CSUM_UDP);
1409 /* Configure for OS presence */
1410 em_init_manageability(adapter);
1412 /* Prepare transmit descriptors and buffers */
1413 em_setup_transmit_structures(adapter);
1414 em_initialize_transmit_unit(adapter);
1416 /* Setup Multicast table */
1417 em_set_multi(adapter);
1420 ** Figure out the desired mbuf
1421 ** pool for doing jumbos
1423 if (adapter->hw.mac.max_frame_size <= 2048)
1424 adapter->rx_mbuf_sz = MCLBYTES;
1425 else if (adapter->hw.mac.max_frame_size <= 4096)
1426 adapter->rx_mbuf_sz = MJUMPAGESIZE;
1428 adapter->rx_mbuf_sz = MJUM9BYTES;
1430 /* Prepare receive descriptors and buffers */
1431 if (em_setup_receive_structures(adapter)) {
1432 device_printf(dev, "Could not setup receive structures\n");
1436 em_initialize_receive_unit(adapter);
1438 /* Use real VLAN Filter support? */
1439 if (ifp->if_capenable & IFCAP_VLAN_HWTAGGING) {
1440 if (ifp->if_capenable & IFCAP_VLAN_HWFILTER)
1441 /* Use real VLAN Filter support */
1442 em_setup_vlan_hw_support(adapter);
1445 ctrl = E1000_READ_REG(&adapter->hw, E1000_CTRL);
1446 ctrl |= E1000_CTRL_VME;
1447 E1000_WRITE_REG(&adapter->hw, E1000_CTRL, ctrl);
1451 /* Don't lose promiscuous settings */
1452 em_set_promisc(adapter);
1454 /* Set the interface as ACTIVE */
1455 ifp->if_drv_flags |= IFF_DRV_RUNNING;
1456 ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
1458 callout_reset(&adapter->timer, hz, em_local_timer, adapter);
1459 e1000_clear_hw_cntrs_base_generic(&adapter->hw);
1461 /* MSI/X configuration for 82574 */
1462 if (adapter->hw.mac.type == e1000_82574) {
1464 tmp = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
1465 tmp |= E1000_CTRL_EXT_PBA_CLR;
1466 E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT, tmp);
1467 /* Set the IVAR - interrupt vector routing. */
1468 E1000_WRITE_REG(&adapter->hw, E1000_IVAR, adapter->ivars);
1471 #ifdef DEVICE_POLLING
1473 * Only enable interrupts if we are not polling, make sure
1474 * they are off otherwise.
1476 if (ifp->if_capenable & IFCAP_POLLING)
1477 em_disable_intr(adapter);
1479 #endif /* DEVICE_POLLING */
1480 em_enable_intr(adapter);
1482 /* AMT based hardware can now take control from firmware */
1483 if (adapter->has_manage && adapter->has_amt)
1484 em_get_hw_control(adapter);
1490 struct adapter *adapter = arg;
1492 EM_CORE_LOCK(adapter);
1493 em_init_locked(adapter);
1494 EM_CORE_UNLOCK(adapter);
1498 #ifdef DEVICE_POLLING
1499 /*********************************************************************
1501 * Legacy polling routine: note this only works with single queue
1503 *********************************************************************/
1505 em_poll(struct ifnet *ifp, enum poll_cmd cmd, int count)
1507 struct adapter *adapter = ifp->if_softc;
1508 struct tx_ring *txr = adapter->tx_rings;
1509 struct rx_ring *rxr = adapter->rx_rings;
1513 EM_CORE_LOCK(adapter);
1514 if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) {
1515 EM_CORE_UNLOCK(adapter);
1519 if (cmd == POLL_AND_CHECK_STATUS) {
1520 reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1521 if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
1522 callout_stop(&adapter->timer);
1523 adapter->hw.mac.get_link_status = 1;
1524 em_update_link_status(adapter);
1525 callout_reset(&adapter->timer, hz,
1526 em_local_timer, adapter);
1529 EM_CORE_UNLOCK(adapter);
1531 em_rxeof(rxr, count, &rx_done);
1535 #ifdef EM_MULTIQUEUE
1536 if (!drbr_empty(ifp, txr->br))
1537 em_mq_start_locked(ifp, txr);
1539 if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1540 em_start_locked(ifp, txr);
1546 #endif /* DEVICE_POLLING */
1549 /*********************************************************************
1551 * Fast Legacy/MSI Combined Interrupt Service routine
1553 *********************************************************************/
1555 em_irq_fast(void *arg)
1557 struct adapter *adapter = arg;
1563 reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1566 if (reg_icr == 0xffffffff)
1567 return FILTER_STRAY;
1569 /* Definitely not our interrupt. */
1571 return FILTER_STRAY;
1574 * Starting with the 82571 chip, bit 31 should be used to
1575 * determine whether the interrupt belongs to us.
1577 if (adapter->hw.mac.type >= e1000_82571 &&
1578 (reg_icr & E1000_ICR_INT_ASSERTED) == 0)
1579 return FILTER_STRAY;
1581 em_disable_intr(adapter);
1582 taskqueue_enqueue(adapter->tq, &adapter->que_task);
1584 /* Link status change */
1585 if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
1586 adapter->hw.mac.get_link_status = 1;
1587 taskqueue_enqueue(taskqueue_fast, &adapter->link_task);
1590 if (reg_icr & E1000_ICR_RXO)
1591 adapter->rx_overruns++;
1592 return FILTER_HANDLED;
1595 /* Combined RX/TX handler, used by Legacy and MSI */
1597 em_handle_que(void *context, int pending)
1599 struct adapter *adapter = context;
1600 struct ifnet *ifp = adapter->ifp;
1601 struct tx_ring *txr = adapter->tx_rings;
1602 struct rx_ring *rxr = adapter->rx_rings;
1604 if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1605 bool more = em_rxeof(rxr, adapter->rx_process_limit, NULL);
1609 #ifdef EM_MULTIQUEUE
1610 if (!drbr_empty(ifp, txr->br))
1611 em_mq_start_locked(ifp, txr);
1613 if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1614 em_start_locked(ifp, txr);
1618 taskqueue_enqueue(adapter->tq, &adapter->que_task);
1623 em_enable_intr(adapter);
1628 /*********************************************************************
1630 * MSIX Interrupt Service Routines
1632 **********************************************************************/
1634 em_msix_tx(void *arg)
1636 struct tx_ring *txr = arg;
1637 struct adapter *adapter = txr->adapter;
1638 struct ifnet *ifp = adapter->ifp;
1643 #ifdef EM_MULTIQUEUE
1644 if (!drbr_empty(ifp, txr->br))
1645 em_mq_start_locked(ifp, txr);
1647 if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1648 em_start_locked(ifp, txr);
1651 /* Reenable this interrupt */
1652 E1000_WRITE_REG(&adapter->hw, E1000_IMS, txr->ims);
1657 /*********************************************************************
1659 * MSIX RX Interrupt Service routine
1661 **********************************************************************/
1664 em_msix_rx(void *arg)
1666 struct rx_ring *rxr = arg;
1667 struct adapter *adapter = rxr->adapter;
1671 if (!(adapter->ifp->if_drv_flags & IFF_DRV_RUNNING))
1673 more = em_rxeof(rxr, adapter->rx_process_limit, NULL);
1675 taskqueue_enqueue(rxr->tq, &rxr->rx_task);
1677 /* Reenable this interrupt */
1678 E1000_WRITE_REG(&adapter->hw, E1000_IMS, rxr->ims);
1683 /*********************************************************************
1685 * MSIX Link Fast Interrupt Service routine
1687 **********************************************************************/
1689 em_msix_link(void *arg)
1691 struct adapter *adapter = arg;
1694 ++adapter->link_irq;
1695 reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1697 if (reg_icr & E1000_ICR_RXO)
1698 adapter->rx_overruns++;
1700 if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
1701 adapter->hw.mac.get_link_status = 1;
1702 em_handle_link(adapter, 0);
1704 E1000_WRITE_REG(&adapter->hw, E1000_IMS,
1705 EM_MSIX_LINK | E1000_IMS_LSC);
1707 ** Because we must read the ICR for this interrupt
1708 ** it may clear other causes using autoclear, for
1709 ** this reason we simply create a soft interrupt
1710 ** for all these vectors.
1713 E1000_WRITE_REG(&adapter->hw,
1714 E1000_ICS, adapter->ims);
1720 em_handle_rx(void *context, int pending)
1722 struct rx_ring *rxr = context;
1723 struct adapter *adapter = rxr->adapter;
1726 more = em_rxeof(rxr, adapter->rx_process_limit, NULL);
1728 taskqueue_enqueue(rxr->tq, &rxr->rx_task);
1730 /* Reenable this interrupt */
1731 E1000_WRITE_REG(&adapter->hw, E1000_IMS, rxr->ims);
1736 em_handle_tx(void *context, int pending)
1738 struct tx_ring *txr = context;
1739 struct adapter *adapter = txr->adapter;
1740 struct ifnet *ifp = adapter->ifp;
1744 #ifdef EM_MULTIQUEUE
1745 if (!drbr_empty(ifp, txr->br))
1746 em_mq_start_locked(ifp, txr);
1748 if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1749 em_start_locked(ifp, txr);
1751 E1000_WRITE_REG(&adapter->hw, E1000_IMS, txr->ims);
1756 em_handle_link(void *context, int pending)
1758 struct adapter *adapter = context;
1759 struct tx_ring *txr = adapter->tx_rings;
1760 struct ifnet *ifp = adapter->ifp;
1762 if (!(ifp->if_drv_flags & IFF_DRV_RUNNING))
1765 EM_CORE_LOCK(adapter);
1766 callout_stop(&adapter->timer);
1767 em_update_link_status(adapter);
1768 callout_reset(&adapter->timer, hz, em_local_timer, adapter);
1769 E1000_WRITE_REG(&adapter->hw, E1000_IMS,
1770 EM_MSIX_LINK | E1000_IMS_LSC);
1771 if (adapter->link_active) {
1772 for (int i = 0; i < adapter->num_queues; i++, txr++) {
1774 #ifdef EM_MULTIQUEUE
1775 if (!drbr_empty(ifp, txr->br))
1776 em_mq_start_locked(ifp, txr);
1778 if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1779 em_start_locked(ifp, txr);
1784 EM_CORE_UNLOCK(adapter);
1788 /*********************************************************************
1790 * Media Ioctl callback
1792 * This routine is called whenever the user queries the status of
1793 * the interface using ifconfig.
1795 **********************************************************************/
1797 em_media_status(struct ifnet *ifp, struct ifmediareq *ifmr)
1799 struct adapter *adapter = ifp->if_softc;
1800 u_char fiber_type = IFM_1000_SX;
1802 INIT_DEBUGOUT("em_media_status: begin");
1804 EM_CORE_LOCK(adapter);
1805 em_update_link_status(adapter);
1807 ifmr->ifm_status = IFM_AVALID;
1808 ifmr->ifm_active = IFM_ETHER;
1810 if (!adapter->link_active) {
1811 EM_CORE_UNLOCK(adapter);
1815 ifmr->ifm_status |= IFM_ACTIVE;
1817 if ((adapter->hw.phy.media_type == e1000_media_type_fiber) ||
1818 (adapter->hw.phy.media_type == e1000_media_type_internal_serdes)) {
1819 ifmr->ifm_active |= fiber_type | IFM_FDX;
1821 switch (adapter->link_speed) {
1823 ifmr->ifm_active |= IFM_10_T;
1826 ifmr->ifm_active |= IFM_100_TX;
1829 ifmr->ifm_active |= IFM_1000_T;
1832 if (adapter->link_duplex == FULL_DUPLEX)
1833 ifmr->ifm_active |= IFM_FDX;
1835 ifmr->ifm_active |= IFM_HDX;
1837 EM_CORE_UNLOCK(adapter);
1840 /*********************************************************************
1842 * Media Ioctl callback
1844 * This routine is called when the user changes speed/duplex using
1845 * media/mediopt option with ifconfig.
1847 **********************************************************************/
1849 em_media_change(struct ifnet *ifp)
1851 struct adapter *adapter = ifp->if_softc;
1852 struct ifmedia *ifm = &adapter->media;
1854 INIT_DEBUGOUT("em_media_change: begin");
1856 if (IFM_TYPE(ifm->ifm_media) != IFM_ETHER)
1859 EM_CORE_LOCK(adapter);
1860 switch (IFM_SUBTYPE(ifm->ifm_media)) {
1862 adapter->hw.mac.autoneg = DO_AUTO_NEG;
1863 adapter->hw.phy.autoneg_advertised = AUTONEG_ADV_DEFAULT;
1868 adapter->hw.mac.autoneg = DO_AUTO_NEG;
1869 adapter->hw.phy.autoneg_advertised = ADVERTISE_1000_FULL;
1872 adapter->hw.mac.autoneg = FALSE;
1873 adapter->hw.phy.autoneg_advertised = 0;
1874 if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX)
1875 adapter->hw.mac.forced_speed_duplex = ADVERTISE_100_FULL;
1877 adapter->hw.mac.forced_speed_duplex = ADVERTISE_100_HALF;
1880 adapter->hw.mac.autoneg = FALSE;
1881 adapter->hw.phy.autoneg_advertised = 0;
1882 if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX)
1883 adapter->hw.mac.forced_speed_duplex = ADVERTISE_10_FULL;
1885 adapter->hw.mac.forced_speed_duplex = ADVERTISE_10_HALF;
1888 device_printf(adapter->dev, "Unsupported media type\n");
1891 em_init_locked(adapter);
1892 EM_CORE_UNLOCK(adapter);
1897 /*********************************************************************
1899 * This routine maps the mbufs to tx descriptors.
1901 * return 0 on success, positive on failure
1902 **********************************************************************/
1905 em_xmit(struct tx_ring *txr, struct mbuf **m_headp)
1907 struct adapter *adapter = txr->adapter;
1908 bus_dma_segment_t segs[EM_MAX_SCATTER];
1910 struct em_txbuffer *tx_buffer, *tx_buffer_mapped;
1911 struct e1000_tx_desc *ctxd = NULL;
1912 struct mbuf *m_head;
1913 struct ether_header *eh;
1914 struct ip *ip = NULL;
1915 struct tcphdr *tp = NULL;
1916 u32 txd_upper = 0, txd_lower = 0;
1918 int nsegs, i, j, first, last = 0;
1920 bool do_tso, tso_desc, remap = TRUE;
1923 do_tso = m_head->m_pkthdr.csum_flags & CSUM_IP_TSO;
1928 * Intel recommends entire IP/TCP header length reside in a single
1929 * buffer. If multiple descriptors are used to describe the IP and
1930 * TCP header, each descriptor should describe one or more
1931 * complete headers; descriptors referencing only parts of headers
1932 * are not supported. If all layer headers are not coalesced into
1933 * a single buffer, each buffer should not cross a 4KB boundary,
1934 * or be larger than the maximum read request size.
1935 * Controller also requires modifing IP/TCP header to make TSO work
1936 * so we firstly get a writable mbuf chain then coalesce ethernet/
1937 * IP/TCP header into a single buffer to meet the requirement of
1938 * controller. This also simplifies IP/TCP/UDP checksum offloading
1939 * which also has similiar restrictions.
1941 if (do_tso || m_head->m_pkthdr.csum_flags & CSUM_OFFLOAD) {
1942 if (do_tso || (m_head->m_next != NULL &&
1943 m_head->m_pkthdr.csum_flags & CSUM_OFFLOAD)) {
1944 if (M_WRITABLE(*m_headp) == 0) {
1945 m_head = m_dup(*m_headp, M_NOWAIT);
1947 if (m_head == NULL) {
1956 * Assume IPv4, we don't have TSO/checksum offload support
1959 ip_off = sizeof(struct ether_header);
1960 if (m_head->m_len < ip_off) {
1961 m_head = m_pullup(m_head, ip_off);
1962 if (m_head == NULL) {
1967 eh = mtod(m_head, struct ether_header *);
1968 if (eh->ether_type == htons(ETHERTYPE_VLAN)) {
1969 ip_off = sizeof(struct ether_vlan_header);
1970 if (m_head->m_len < ip_off) {
1971 m_head = m_pullup(m_head, ip_off);
1972 if (m_head == NULL) {
1978 if (m_head->m_len < ip_off + sizeof(struct ip)) {
1979 m_head = m_pullup(m_head, ip_off + sizeof(struct ip));
1980 if (m_head == NULL) {
1985 ip = (struct ip *)(mtod(m_head, char *) + ip_off);
1986 poff = ip_off + (ip->ip_hl << 2);
1988 if (do_tso || (m_head->m_pkthdr.csum_flags & CSUM_TCP)) {
1989 if (m_head->m_len < poff + sizeof(struct tcphdr)) {
1990 m_head = m_pullup(m_head, poff +
1991 sizeof(struct tcphdr));
1992 if (m_head == NULL) {
1997 tp = (struct tcphdr *)(mtod(m_head, char *) + poff);
2000 * pull 4 more bytes of data into it.
2002 if (m_head->m_len < poff + (tp->th_off << 2)) {
2003 m_head = m_pullup(m_head, poff +
2006 if (m_head == NULL) {
2011 ip = (struct ip *)(mtod(m_head, char *) + ip_off);
2012 tp = (struct tcphdr *)(mtod(m_head, char *) + poff);
2014 ip->ip_len = htons(m_head->m_pkthdr.tso_segsz +
2019 * The pseudo TCP checksum does not include TCP
2020 * payload length so driver should recompute
2021 * the checksum here what hardware expect to
2022 * see. This is adherence of Microsoft's Large
2023 * Send specification.
2025 tp->th_sum = in_pseudo(ip->ip_src.s_addr,
2026 ip->ip_dst.s_addr, htons(IPPROTO_TCP));
2028 } else if (m_head->m_pkthdr.csum_flags & CSUM_UDP) {
2029 if (m_head->m_len < poff + sizeof(struct udphdr)) {
2030 m_head = m_pullup(m_head, poff +
2031 sizeof(struct udphdr));
2032 if (m_head == NULL) {
2037 ip = (struct ip *)(mtod(m_head, char *) + ip_off);
2043 * Map the packet for DMA
2045 * Capture the first descriptor index,
2046 * this descriptor will have the index
2047 * of the EOP which is the only one that
2048 * now gets a DONE bit writeback.
2050 first = txr->next_avail_desc;
2051 tx_buffer = &txr->tx_buffers[first];
2052 tx_buffer_mapped = tx_buffer;
2053 map = tx_buffer->map;
2056 error = bus_dmamap_load_mbuf_sg(txr->txtag, map,
2057 *m_headp, segs, &nsegs, BUS_DMA_NOWAIT);
2060 * There are two types of errors we can (try) to handle:
2061 * - EFBIG means the mbuf chain was too long and bus_dma ran
2062 * out of segments. Defragment the mbuf chain and try again.
2063 * - ENOMEM means bus_dma could not obtain enough bounce buffers
2064 * at this point in time. Defer sending and try again later.
2065 * All other errors, in particular EINVAL, are fatal and prevent the
2066 * mbuf chain from ever going through. Drop it and report error.
2068 if (error == EFBIG && remap) {
2071 m = m_collapse(*m_headp, M_NOWAIT, EM_MAX_SCATTER);
2073 adapter->mbuf_defrag_failed++;
2080 /* Try it again, but only once */
2083 } else if (error != 0) {
2084 adapter->no_tx_dma_setup++;
2091 * TSO Hardware workaround, if this packet is not
2092 * TSO, and is only a single descriptor long, and
2093 * it follows a TSO burst, then we need to add a
2094 * sentinel descriptor to prevent premature writeback.
2096 if ((!do_tso) && (txr->tx_tso == TRUE)) {
2099 txr->tx_tso = FALSE;
2102 if (txr->tx_avail < (nsegs + EM_MAX_SCATTER)) {
2103 txr->no_desc_avail++;
2104 bus_dmamap_unload(txr->txtag, map);
2109 /* Do hardware assists */
2110 if (m_head->m_pkthdr.csum_flags & CSUM_IP_TSO) {
2111 em_tso_setup(txr, m_head, ip_off, ip, tp,
2112 &txd_upper, &txd_lower);
2113 /* we need to make a final sentinel transmit desc */
2115 } else if (m_head->m_pkthdr.csum_flags & CSUM_OFFLOAD)
2116 em_transmit_checksum_setup(txr, m_head,
2117 ip_off, ip, &txd_upper, &txd_lower);
2119 if (m_head->m_flags & M_VLANTAG) {
2120 /* Set the vlan id. */
2122 (htole16(m_head->m_pkthdr.ether_vtag) << 16);
2123 /* Tell hardware to add tag */
2124 txd_lower |= htole32(E1000_TXD_CMD_VLE);
2127 i = txr->next_avail_desc;
2129 /* Set up our transmit descriptors */
2130 for (j = 0; j < nsegs; j++) {
2132 bus_addr_t seg_addr;
2134 tx_buffer = &txr->tx_buffers[i];
2135 ctxd = &txr->tx_base[i];
2136 seg_addr = segs[j].ds_addr;
2137 seg_len = segs[j].ds_len;
2140 ** If this is the last descriptor, we want to
2141 ** split it so we have a small final sentinel
2143 if (tso_desc && (j == (nsegs - 1)) && (seg_len > 8)) {
2144 seg_len -= TSO_WORKAROUND;
2145 ctxd->buffer_addr = htole64(seg_addr);
2146 ctxd->lower.data = htole32(
2147 adapter->txd_cmd | txd_lower | seg_len);
2148 ctxd->upper.data = htole32(txd_upper);
2149 if (++i == adapter->num_tx_desc)
2152 /* Now make the sentinel */
2154 ctxd = &txr->tx_base[i];
2155 tx_buffer = &txr->tx_buffers[i];
2157 htole64(seg_addr + seg_len);
2158 ctxd->lower.data = htole32(
2159 adapter->txd_cmd | txd_lower | TSO_WORKAROUND);
2163 if (++i == adapter->num_tx_desc)
2166 ctxd->buffer_addr = htole64(seg_addr);
2167 ctxd->lower.data = htole32(
2168 adapter->txd_cmd | txd_lower | seg_len);
2169 ctxd->upper.data = htole32(txd_upper);
2171 if (++i == adapter->num_tx_desc)
2174 tx_buffer->m_head = NULL;
2175 tx_buffer->next_eop = -1;
2178 txr->next_avail_desc = i;
2179 txr->tx_avail -= nsegs;
2181 tx_buffer->m_head = m_head;
2183 ** Here we swap the map so the last descriptor,
2184 ** which gets the completion interrupt has the
2185 ** real map, and the first descriptor gets the
2186 ** unused map from this descriptor.
2188 tx_buffer_mapped->map = tx_buffer->map;
2189 tx_buffer->map = map;
2190 bus_dmamap_sync(txr->txtag, map, BUS_DMASYNC_PREWRITE);
2193 * Last Descriptor of Packet
2194 * needs End Of Packet (EOP)
2195 * and Report Status (RS)
2198 htole32(E1000_TXD_CMD_EOP | E1000_TXD_CMD_RS);
2200 * Keep track in the first buffer which
2201 * descriptor will be written back
2203 tx_buffer = &txr->tx_buffers[first];
2204 tx_buffer->next_eop = last;
2207 * Advance the Transmit Descriptor Tail (TDT), this tells the E1000
2208 * that this frame is available to transmit.
2210 bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
2211 BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
2212 E1000_WRITE_REG(&adapter->hw, E1000_TDT(txr->me), i);
2218 em_set_promisc(struct adapter *adapter)
2220 struct ifnet *ifp = adapter->ifp;
2223 reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2225 if (ifp->if_flags & IFF_PROMISC) {
2226 reg_rctl |= (E1000_RCTL_UPE | E1000_RCTL_MPE);
2227 /* Turn this on if you want to see bad packets */
2229 reg_rctl |= E1000_RCTL_SBP;
2230 E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2231 } else if (ifp->if_flags & IFF_ALLMULTI) {
2232 reg_rctl |= E1000_RCTL_MPE;
2233 reg_rctl &= ~E1000_RCTL_UPE;
2234 E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2239 em_disable_promisc(struct adapter *adapter)
2241 struct ifnet *ifp = adapter->ifp;
2245 reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2246 reg_rctl &= (~E1000_RCTL_UPE);
2247 if (ifp->if_flags & IFF_ALLMULTI)
2248 mcnt = MAX_NUM_MULTICAST_ADDRESSES;
2250 struct ifmultiaddr *ifma;
2251 #if __FreeBSD_version < 800000
2254 if_maddr_rlock(ifp);
2256 TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
2257 if (ifma->ifma_addr->sa_family != AF_LINK)
2259 if (mcnt == MAX_NUM_MULTICAST_ADDRESSES)
2263 #if __FreeBSD_version < 800000
2264 IF_ADDR_UNLOCK(ifp);
2266 if_maddr_runlock(ifp);
2269 /* Don't disable if in MAX groups */
2270 if (mcnt < MAX_NUM_MULTICAST_ADDRESSES)
2271 reg_rctl &= (~E1000_RCTL_MPE);
2272 reg_rctl &= (~E1000_RCTL_SBP);
2273 E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2277 /*********************************************************************
2280 * This routine is called whenever multicast address list is updated.
2282 **********************************************************************/
2285 em_set_multi(struct adapter *adapter)
2287 struct ifnet *ifp = adapter->ifp;
2288 struct ifmultiaddr *ifma;
2290 u8 *mta; /* Multicast array memory */
2293 IOCTL_DEBUGOUT("em_set_multi: begin");
2296 bzero(mta, sizeof(u8) * ETH_ADDR_LEN * MAX_NUM_MULTICAST_ADDRESSES);
2298 if (adapter->hw.mac.type == e1000_82542 &&
2299 adapter->hw.revision_id == E1000_REVISION_2) {
2300 reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2301 if (adapter->hw.bus.pci_cmd_word & CMD_MEM_WRT_INVALIDATE)
2302 e1000_pci_clear_mwi(&adapter->hw);
2303 reg_rctl |= E1000_RCTL_RST;
2304 E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2308 #if __FreeBSD_version < 800000
2311 if_maddr_rlock(ifp);
2313 TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
2314 if (ifma->ifma_addr->sa_family != AF_LINK)
2317 if (mcnt == MAX_NUM_MULTICAST_ADDRESSES)
2320 bcopy(LLADDR((struct sockaddr_dl *)ifma->ifma_addr),
2321 &mta[mcnt * ETH_ADDR_LEN], ETH_ADDR_LEN);
2324 #if __FreeBSD_version < 800000
2325 IF_ADDR_UNLOCK(ifp);
2327 if_maddr_runlock(ifp);
2329 if (mcnt >= MAX_NUM_MULTICAST_ADDRESSES) {
2330 reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2331 reg_rctl |= E1000_RCTL_MPE;
2332 E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2334 e1000_update_mc_addr_list(&adapter->hw, mta, mcnt);
2336 if (adapter->hw.mac.type == e1000_82542 &&
2337 adapter->hw.revision_id == E1000_REVISION_2) {
2338 reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2339 reg_rctl &= ~E1000_RCTL_RST;
2340 E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2342 if (adapter->hw.bus.pci_cmd_word & CMD_MEM_WRT_INVALIDATE)
2343 e1000_pci_set_mwi(&adapter->hw);
2348 /*********************************************************************
2351 * This routine checks for link status and updates statistics.
2353 **********************************************************************/
2356 em_local_timer(void *arg)
2358 struct adapter *adapter = arg;
2359 struct ifnet *ifp = adapter->ifp;
2360 struct tx_ring *txr = adapter->tx_rings;
2361 struct rx_ring *rxr = adapter->rx_rings;
2364 EM_CORE_LOCK_ASSERT(adapter);
2366 em_update_link_status(adapter);
2367 em_update_stats_counters(adapter);
2369 /* Reset LAA into RAR[0] on 82571 */
2370 if ((adapter->hw.mac.type == e1000_82571) &&
2371 e1000_get_laa_state_82571(&adapter->hw))
2372 e1000_rar_set(&adapter->hw, adapter->hw.mac.addr, 0);
2374 /* Mask to use in the irq trigger */
2375 if (adapter->msix_mem) {
2376 for (int i = 0; i < adapter->num_queues; i++, rxr++)
2377 trigger |= rxr->ims;
2378 rxr = adapter->rx_rings;
2380 trigger = E1000_ICS_RXDMT0;
2383 ** Check on the state of the TX queue(s), this
2384 ** can be done without the lock because its RO
2385 ** and the HUNG state will be static if set.
2387 for (int i = 0; i < adapter->num_queues; i++, txr++) {
2388 if (txr->busy == EM_TX_HUNG)
2390 if (txr->busy >= EM_TX_MAXTRIES)
2391 txr->busy = EM_TX_HUNG;
2392 /* Schedule a TX tasklet if needed */
2393 if (txr->tx_avail <= EM_MAX_SCATTER)
2394 taskqueue_enqueue(txr->tq, &txr->tx_task);
2397 callout_reset(&adapter->timer, hz, em_local_timer, adapter);
2398 #ifndef DEVICE_POLLING
2399 /* Trigger an RX interrupt to guarantee mbuf refresh */
2400 E1000_WRITE_REG(&adapter->hw, E1000_ICS, trigger);
2404 /* Looks like we're hung */
2405 device_printf(adapter->dev, "Watchdog timeout Queue[%d]-- resetting\n",
2407 em_print_debug_info(adapter);
2408 ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
2409 adapter->watchdog_events++;
2410 em_init_locked(adapter);
2415 em_update_link_status(struct adapter *adapter)
2417 struct e1000_hw *hw = &adapter->hw;
2418 struct ifnet *ifp = adapter->ifp;
2419 device_t dev = adapter->dev;
2420 struct tx_ring *txr = adapter->tx_rings;
2423 /* Get the cached link value or read phy for real */
2424 switch (hw->phy.media_type) {
2425 case e1000_media_type_copper:
2426 if (hw->mac.get_link_status) {
2427 if (hw->mac.type == e1000_pch_spt)
2429 /* Do the work to read phy */
2430 e1000_check_for_link(hw);
2431 link_check = !hw->mac.get_link_status;
2432 if (link_check) /* ESB2 fix */
2433 e1000_cfg_on_link_up(hw);
2437 case e1000_media_type_fiber:
2438 e1000_check_for_link(hw);
2439 link_check = (E1000_READ_REG(hw, E1000_STATUS) &
2442 case e1000_media_type_internal_serdes:
2443 e1000_check_for_link(hw);
2444 link_check = adapter->hw.mac.serdes_has_link;
2447 case e1000_media_type_unknown:
2451 /* Now check for a transition */
2452 if (link_check && (adapter->link_active == 0)) {
2453 e1000_get_speed_and_duplex(hw, &adapter->link_speed,
2454 &adapter->link_duplex);
2457 ** There have proven to be problems with TSO when not at full
2458 ** gigabit speed, so disable the assist automatically when at
2459 ** lower speeds. -jfv
2461 if (ifp->if_capenable & IFCAP_TSO4) {
2462 if (adapter->link_speed == SPEED_1000)
2463 ifp->if_hwassist |= CSUM_IP_TSO;
2465 ifp->if_hwassist &= ~CSUM_IP_TSO;
2468 /* Check if we must disable SPEED_MODE bit on PCI-E */
2469 if ((adapter->link_speed != SPEED_1000) &&
2470 ((hw->mac.type == e1000_82571) ||
2471 (hw->mac.type == e1000_82572))) {
2473 tarc0 = E1000_READ_REG(hw, E1000_TARC(0));
2474 tarc0 &= ~TARC_SPEED_MODE_BIT;
2475 E1000_WRITE_REG(hw, E1000_TARC(0), tarc0);
2478 device_printf(dev, "Link is up %d Mbps %s\n",
2479 adapter->link_speed,
2480 ((adapter->link_duplex == FULL_DUPLEX) ?
2481 "Full Duplex" : "Half Duplex"));
2482 adapter->link_active = 1;
2483 adapter->smartspeed = 0;
2484 ifp->if_baudrate = adapter->link_speed * 1000000;
2485 if_link_state_change(ifp, LINK_STATE_UP);
2486 } else if (!link_check && (adapter->link_active == 1)) {
2487 ifp->if_baudrate = adapter->link_speed = 0;
2488 adapter->link_duplex = 0;
2490 device_printf(dev, "Link is Down\n");
2491 adapter->link_active = 0;
2492 /* Link down, disable hang detection */
2493 for (int i = 0; i < adapter->num_queues; i++, txr++)
2494 txr->busy = EM_TX_IDLE;
2495 if_link_state_change(ifp, LINK_STATE_DOWN);
2499 /*********************************************************************
2501 * This routine disables all traffic on the adapter by issuing a
2502 * global reset on the MAC and deallocates TX/RX buffers.
2504 * This routine should always be called with BOTH the CORE
2506 **********************************************************************/
2511 struct adapter *adapter = arg;
2512 struct ifnet *ifp = adapter->ifp;
2513 struct tx_ring *txr = adapter->tx_rings;
2515 EM_CORE_LOCK_ASSERT(adapter);
2517 INIT_DEBUGOUT("em_stop: begin");
2519 em_disable_intr(adapter);
2520 callout_stop(&adapter->timer);
2522 /* Tell the stack that the interface is no longer active */
2523 ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
2524 ifp->if_drv_flags |= IFF_DRV_OACTIVE;
2526 /* Disarm Hang Detection. */
2527 for (int i = 0; i < adapter->num_queues; i++, txr++) {
2529 txr->busy = EM_TX_IDLE;
2533 /* I219 needs some special flushing to avoid hangs */
2534 if (adapter->hw.mac.type == e1000_pch_spt)
2535 em_flush_desc_rings(adapter);
2537 e1000_reset_hw(&adapter->hw);
2538 E1000_WRITE_REG(&adapter->hw, E1000_WUC, 0);
2540 e1000_led_off(&adapter->hw);
2541 e1000_cleanup_led(&adapter->hw);
2545 /*********************************************************************
2547 * Determine hardware revision.
2549 **********************************************************************/
2551 em_identify_hardware(struct adapter *adapter)
2553 device_t dev = adapter->dev;
2555 /* Make sure our PCI config space has the necessary stuff set */
2556 pci_enable_busmaster(dev);
2557 adapter->hw.bus.pci_cmd_word = pci_read_config(dev, PCIR_COMMAND, 2);
2559 /* Save off the information about this board */
2560 adapter->hw.vendor_id = pci_get_vendor(dev);
2561 adapter->hw.device_id = pci_get_device(dev);
2562 adapter->hw.revision_id = pci_read_config(dev, PCIR_REVID, 1);
2563 adapter->hw.subsystem_vendor_id =
2564 pci_read_config(dev, PCIR_SUBVEND_0, 2);
2565 adapter->hw.subsystem_device_id =
2566 pci_read_config(dev, PCIR_SUBDEV_0, 2);
2568 /* Do Shared Code Init and Setup */
2569 if (e1000_set_mac_type(&adapter->hw)) {
2570 device_printf(dev, "Setup init failure\n");
2576 em_allocate_pci_resources(struct adapter *adapter)
2578 device_t dev = adapter->dev;
2582 adapter->memory = bus_alloc_resource_any(dev, SYS_RES_MEMORY,
2584 if (adapter->memory == NULL) {
2585 device_printf(dev, "Unable to allocate bus resource: memory\n");
2588 adapter->osdep.mem_bus_space_tag =
2589 rman_get_bustag(adapter->memory);
2590 adapter->osdep.mem_bus_space_handle =
2591 rman_get_bushandle(adapter->memory);
2592 adapter->hw.hw_addr = (u8 *)&adapter->osdep.mem_bus_space_handle;
2594 adapter->hw.back = &adapter->osdep;
2599 /*********************************************************************
2601 * Setup the Legacy or MSI Interrupt handler
2603 **********************************************************************/
2605 em_allocate_legacy(struct adapter *adapter)
2607 device_t dev = adapter->dev;
2608 struct tx_ring *txr = adapter->tx_rings;
2611 /* Manually turn off all interrupts */
2612 E1000_WRITE_REG(&adapter->hw, E1000_IMC, 0xffffffff);
2614 if (adapter->msix == 1) /* using MSI */
2616 /* We allocate a single interrupt resource */
2617 adapter->res = bus_alloc_resource_any(dev,
2618 SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE);
2619 if (adapter->res == NULL) {
2620 device_printf(dev, "Unable to allocate bus resource: "
2626 * Allocate a fast interrupt and the associated
2627 * deferred processing contexts.
2629 TASK_INIT(&adapter->que_task, 0, em_handle_que, adapter);
2630 adapter->tq = taskqueue_create_fast("em_taskq", M_NOWAIT,
2631 taskqueue_thread_enqueue, &adapter->tq);
2632 taskqueue_start_threads(&adapter->tq, 1, PI_NET, "%s que",
2633 device_get_nameunit(adapter->dev));
2634 /* Use a TX only tasklet for local timer */
2635 TASK_INIT(&txr->tx_task, 0, em_handle_tx, txr);
2636 txr->tq = taskqueue_create_fast("em_txq", M_NOWAIT,
2637 taskqueue_thread_enqueue, &txr->tq);
2638 taskqueue_start_threads(&txr->tq, 1, PI_NET, "%s txq",
2639 device_get_nameunit(adapter->dev));
2640 TASK_INIT(&adapter->link_task, 0, em_handle_link, adapter);
2641 if ((error = bus_setup_intr(dev, adapter->res, INTR_TYPE_NET,
2642 em_irq_fast, NULL, adapter, &adapter->tag)) != 0) {
2643 device_printf(dev, "Failed to register fast interrupt "
2644 "handler: %d\n", error);
2645 taskqueue_free(adapter->tq);
2653 /*********************************************************************
2655 * Setup the MSIX Interrupt handlers
2656 * This is not really Multiqueue, rather
2657 * its just seperate interrupt vectors
2658 * for TX, RX, and Link.
2660 **********************************************************************/
2662 em_allocate_msix(struct adapter *adapter)
2664 device_t dev = adapter->dev;
2665 struct tx_ring *txr = adapter->tx_rings;
2666 struct rx_ring *rxr = adapter->rx_rings;
2667 int error, rid, vector = 0;
2671 /* Make sure all interrupts are disabled */
2672 E1000_WRITE_REG(&adapter->hw, E1000_IMC, 0xffffffff);
2674 /* First set up ring resources */
2675 for (int i = 0; i < adapter->num_queues; i++, rxr++, vector++) {
2680 rxr->res = bus_alloc_resource_any(dev,
2681 SYS_RES_IRQ, &rid, RF_ACTIVE);
2682 if (rxr->res == NULL) {
2684 "Unable to allocate bus resource: "
2685 "RX MSIX Interrupt %d\n", i);
2688 if ((error = bus_setup_intr(dev, rxr->res,
2689 INTR_TYPE_NET | INTR_MPSAFE, NULL, em_msix_rx,
2690 rxr, &rxr->tag)) != 0) {
2691 device_printf(dev, "Failed to register RX handler");
2694 #if __FreeBSD_version >= 800504
2695 bus_describe_intr(dev, rxr->res, rxr->tag, "rx%d", i);
2699 if (em_last_bind_cpu < 0)
2700 em_last_bind_cpu = CPU_FIRST();
2701 cpu_id = em_last_bind_cpu;
2702 bus_bind_intr(dev, rxr->res, cpu_id);
2704 TASK_INIT(&rxr->rx_task, 0, em_handle_rx, rxr);
2705 rxr->tq = taskqueue_create_fast("em_rxq", M_NOWAIT,
2706 taskqueue_thread_enqueue, &rxr->tq);
2707 taskqueue_start_threads(&rxr->tq, 1, PI_NET, "%s rxq (cpuid %d)",
2708 device_get_nameunit(adapter->dev), cpu_id);
2710 ** Set the bit to enable interrupt
2711 ** in E1000_IMS -- bits 20 and 21
2712 ** are for RX0 and RX1, note this has
2713 ** NOTHING to do with the MSIX vector
2715 rxr->ims = 1 << (20 + i);
2716 adapter->ims |= rxr->ims;
2717 adapter->ivars |= (8 | rxr->msix) << (i * 4);
2719 em_last_bind_cpu = CPU_NEXT(em_last_bind_cpu);
2722 for (int i = 0; i < adapter->num_queues; i++, txr++, vector++) {
2725 txr->res = bus_alloc_resource_any(dev,
2726 SYS_RES_IRQ, &rid, RF_ACTIVE);
2727 if (txr->res == NULL) {
2729 "Unable to allocate bus resource: "
2730 "TX MSIX Interrupt %d\n", i);
2733 if ((error = bus_setup_intr(dev, txr->res,
2734 INTR_TYPE_NET | INTR_MPSAFE, NULL, em_msix_tx,
2735 txr, &txr->tag)) != 0) {
2736 device_printf(dev, "Failed to register TX handler");
2739 #if __FreeBSD_version >= 800504
2740 bus_describe_intr(dev, txr->res, txr->tag, "tx%d", i);
2744 if (em_last_bind_cpu < 0)
2745 em_last_bind_cpu = CPU_FIRST();
2746 cpu_id = em_last_bind_cpu;
2747 bus_bind_intr(dev, txr->res, cpu_id);
2749 TASK_INIT(&txr->tx_task, 0, em_handle_tx, txr);
2750 txr->tq = taskqueue_create_fast("em_txq", M_NOWAIT,
2751 taskqueue_thread_enqueue, &txr->tq);
2752 taskqueue_start_threads(&txr->tq, 1, PI_NET, "%s txq (cpuid %d)",
2753 device_get_nameunit(adapter->dev), cpu_id);
2755 ** Set the bit to enable interrupt
2756 ** in E1000_IMS -- bits 22 and 23
2757 ** are for TX0 and TX1, note this has
2758 ** NOTHING to do with the MSIX vector
2760 txr->ims = 1 << (22 + i);
2761 adapter->ims |= txr->ims;
2762 adapter->ivars |= (8 | txr->msix) << (8 + (i * 4));
2764 em_last_bind_cpu = CPU_NEXT(em_last_bind_cpu);
2767 /* Link interrupt */
2769 adapter->res = bus_alloc_resource_any(dev,
2770 SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE);
2771 if (!adapter->res) {
2772 device_printf(dev,"Unable to allocate "
2773 "bus resource: Link interrupt [%d]\n", rid);
2776 /* Set the link handler function */
2777 error = bus_setup_intr(dev, adapter->res,
2778 INTR_TYPE_NET | INTR_MPSAFE, NULL,
2779 em_msix_link, adapter, &adapter->tag);
2781 adapter->res = NULL;
2782 device_printf(dev, "Failed to register LINK handler");
2785 #if __FreeBSD_version >= 800504
2786 bus_describe_intr(dev, adapter->res, adapter->tag, "link");
2788 adapter->linkvec = vector;
2789 adapter->ivars |= (8 | vector) << 16;
2790 adapter->ivars |= 0x80000000;
2797 em_free_pci_resources(struct adapter *adapter)
2799 device_t dev = adapter->dev;
2800 struct tx_ring *txr;
2801 struct rx_ring *rxr;
2806 ** Release all the queue interrupt resources:
2808 for (int i = 0; i < adapter->num_queues; i++) {
2809 txr = &adapter->tx_rings[i];
2810 /* an early abort? */
2814 if (txr->tag != NULL) {
2815 bus_teardown_intr(dev, txr->res, txr->tag);
2818 if (txr->res != NULL)
2819 bus_release_resource(dev, SYS_RES_IRQ,
2822 rxr = &adapter->rx_rings[i];
2823 /* an early abort? */
2827 if (rxr->tag != NULL) {
2828 bus_teardown_intr(dev, rxr->res, rxr->tag);
2831 if (rxr->res != NULL)
2832 bus_release_resource(dev, SYS_RES_IRQ,
2836 if (adapter->linkvec) /* we are doing MSIX */
2837 rid = adapter->linkvec + 1;
2839 (adapter->msix != 0) ? (rid = 1):(rid = 0);
2841 if (adapter->tag != NULL) {
2842 bus_teardown_intr(dev, adapter->res, adapter->tag);
2843 adapter->tag = NULL;
2846 if (adapter->res != NULL)
2847 bus_release_resource(dev, SYS_RES_IRQ, rid, adapter->res);
2851 pci_release_msi(dev);
2853 if (adapter->msix_mem != NULL)
2854 bus_release_resource(dev, SYS_RES_MEMORY,
2855 adapter->memrid, adapter->msix_mem);
2857 if (adapter->memory != NULL)
2858 bus_release_resource(dev, SYS_RES_MEMORY,
2859 PCIR_BAR(0), adapter->memory);
2861 if (adapter->flash != NULL)
2862 bus_release_resource(dev, SYS_RES_MEMORY,
2863 EM_FLASH, adapter->flash);
2867 * Setup MSI or MSI/X
2870 em_setup_msix(struct adapter *adapter)
2872 device_t dev = adapter->dev;
2875 /* Nearly always going to use one queue */
2876 adapter->num_queues = 1;
2879 ** Try using MSI-X for Hartwell adapters
2881 if ((adapter->hw.mac.type == e1000_82574) &&
2882 (em_enable_msix == TRUE)) {
2883 #ifdef EM_MULTIQUEUE
2884 adapter->num_queues = (em_num_queues == 1) ? 1 : 2;
2885 if (adapter->num_queues > 1)
2886 em_enable_vectors_82574(adapter);
2888 /* Map the MSIX BAR */
2889 adapter->memrid = PCIR_BAR(EM_MSIX_BAR);
2890 adapter->msix_mem = bus_alloc_resource_any(dev,
2891 SYS_RES_MEMORY, &adapter->memrid, RF_ACTIVE);
2892 if (adapter->msix_mem == NULL) {
2893 /* May not be enabled */
2894 device_printf(adapter->dev,
2895 "Unable to map MSIX table \n");
2898 val = pci_msix_count(dev);
2900 #ifdef EM_MULTIQUEUE
2901 /* We need 5 vectors in the multiqueue case */
2902 if (adapter->num_queues > 1 ) {
2906 adapter->num_queues = 1;
2907 device_printf(adapter->dev,
2908 "Insufficient MSIX vectors for >1 queue, "
2909 "using single queue...\n");
2918 device_printf(adapter->dev,
2919 "Insufficient MSIX vectors, using MSI\n");
2922 #ifdef EM_MULTIQUEUE
2926 if ((pci_alloc_msix(dev, &val) == 0)) {
2927 device_printf(adapter->dev,
2928 "Using MSIX interrupts "
2929 "with %d vectors\n", val);
2934 ** If MSIX alloc failed or provided us with
2935 ** less than needed, free and fall through to MSI
2937 pci_release_msi(dev);
2940 if (adapter->msix_mem != NULL) {
2941 bus_release_resource(dev, SYS_RES_MEMORY,
2942 adapter->memrid, adapter->msix_mem);
2943 adapter->msix_mem = NULL;
2946 if (pci_alloc_msi(dev, &val) == 0) {
2947 device_printf(adapter->dev, "Using an MSI interrupt\n");
2950 /* Should only happen due to manual configuration */
2951 device_printf(adapter->dev,"No MSI/MSIX using a Legacy IRQ\n");
2957 ** The 3 following flush routines are used as a workaround in the
2958 ** I219 client parts and only for them.
2960 ** em_flush_tx_ring - remove all descriptors from the tx_ring
2962 ** We want to clear all pending descriptors from the TX ring.
2963 ** zeroing happens when the HW reads the regs. We assign the ring itself as
2964 ** the data of the next descriptor. We don't care about the data we are about
2968 em_flush_tx_ring(struct adapter *adapter)
2970 struct e1000_hw *hw = &adapter->hw;
2971 struct tx_ring *txr = adapter->tx_rings;
2972 struct e1000_tx_desc *txd;
2973 u32 tctl, txd_lower = E1000_TXD_CMD_IFCS;
2976 tctl = E1000_READ_REG(hw, E1000_TCTL);
2977 E1000_WRITE_REG(hw, E1000_TCTL, tctl | E1000_TCTL_EN);
2979 txd = &txr->tx_base[txr->next_avail_desc++];
2980 if (txr->next_avail_desc == adapter->num_tx_desc)
2981 txr->next_avail_desc = 0;
2983 /* Just use the ring as a dummy buffer addr */
2984 txd->buffer_addr = txr->txdma.dma_paddr;
2985 txd->lower.data = htole32(txd_lower | size);
2986 txd->upper.data = 0;
2988 /* flush descriptors to memory before notifying the HW */
2991 E1000_WRITE_REG(hw, E1000_TDT(0), txr->next_avail_desc);
2997 ** em_flush_rx_ring - remove all descriptors from the rx_ring
2999 ** Mark all descriptors in the RX ring as consumed and disable the rx ring
3002 em_flush_rx_ring(struct adapter *adapter)
3004 struct e1000_hw *hw = &adapter->hw;
3007 rctl = E1000_READ_REG(hw, E1000_RCTL);
3008 E1000_WRITE_REG(hw, E1000_RCTL, rctl & ~E1000_RCTL_EN);
3009 E1000_WRITE_FLUSH(hw);
3012 rxdctl = E1000_READ_REG(hw, E1000_RXDCTL(0));
3013 /* zero the lower 14 bits (prefetch and host thresholds) */
3014 rxdctl &= 0xffffc000;
3016 * update thresholds: prefetch threshold to 31, host threshold to 1
3017 * and make sure the granularity is "descriptors" and not "cache lines"
3019 rxdctl |= (0x1F | (1 << 8) | E1000_RXDCTL_THRESH_UNIT_DESC);
3020 E1000_WRITE_REG(hw, E1000_RXDCTL(0), rxdctl);
3022 /* momentarily enable the RX ring for the changes to take effect */
3023 E1000_WRITE_REG(hw, E1000_RCTL, rctl | E1000_RCTL_EN);
3024 E1000_WRITE_FLUSH(hw);
3026 E1000_WRITE_REG(hw, E1000_RCTL, rctl & ~E1000_RCTL_EN);
3030 ** em_flush_desc_rings - remove all descriptors from the descriptor rings
3032 ** In i219, the descriptor rings must be emptied before resetting the HW
3033 ** or before changing the device state to D3 during runtime (runtime PM).
3035 ** Failure to do this will cause the HW to enter a unit hang state which can
3036 ** only be released by PCI reset on the device
3040 em_flush_desc_rings(struct adapter *adapter)
3042 struct e1000_hw *hw = &adapter->hw;
3043 device_t dev = adapter->dev;
3045 u32 fext_nvm11, tdlen;
3047 /* First, disable MULR fix in FEXTNVM11 */
3048 fext_nvm11 = E1000_READ_REG(hw, E1000_FEXTNVM11);
3049 fext_nvm11 |= E1000_FEXTNVM11_DISABLE_MULR_FIX;
3050 E1000_WRITE_REG(hw, E1000_FEXTNVM11, fext_nvm11);
3052 /* do nothing if we're not in faulty state, or if the queue is empty */
3053 tdlen = E1000_READ_REG(hw, E1000_TDLEN(0));
3054 hang_state = pci_read_config(dev, PCICFG_DESC_RING_STATUS, 2);
3055 if (!(hang_state & FLUSH_DESC_REQUIRED) || !tdlen)
3057 em_flush_tx_ring(adapter);
3059 /* recheck, maybe the fault is caused by the rx ring */
3060 hang_state = pci_read_config(dev, PCICFG_DESC_RING_STATUS, 2);
3061 if (hang_state & FLUSH_DESC_REQUIRED)
3062 em_flush_rx_ring(adapter);
3066 /*********************************************************************
3068 * Initialize the hardware to a configuration
3069 * as specified by the adapter structure.
3071 **********************************************************************/
3073 em_reset(struct adapter *adapter)
3075 device_t dev = adapter->dev;
3076 struct ifnet *ifp = adapter->ifp;
3077 struct e1000_hw *hw = &adapter->hw;
3081 INIT_DEBUGOUT("em_reset: begin");
3083 /* Set up smart power down as default off on newer adapters. */
3084 if (!em_smart_pwr_down && (hw->mac.type == e1000_82571 ||
3085 hw->mac.type == e1000_82572)) {
3088 /* Speed up time to link by disabling smart power down. */
3089 e1000_read_phy_reg(hw, IGP02E1000_PHY_POWER_MGMT, &phy_tmp);
3090 phy_tmp &= ~IGP02E1000_PM_SPD;
3091 e1000_write_phy_reg(hw, IGP02E1000_PHY_POWER_MGMT, phy_tmp);
3095 * Packet Buffer Allocation (PBA)
3096 * Writing PBA sets the receive portion of the buffer
3097 * the remainder is used for the transmit buffer.
3099 switch (hw->mac.type) {
3100 /* Total Packet Buffer on these is 48K */
3103 case e1000_80003es2lan:
3104 pba = E1000_PBA_32K; /* 32K for Rx, 16K for Tx */
3106 case e1000_82573: /* 82573: Total Packet Buffer is 32K */
3107 pba = E1000_PBA_12K; /* 12K for Rx, 20K for Tx */
3111 pba = E1000_PBA_20K; /* 20K for Rx, 20K for Tx */
3117 case e1000_ich10lan:
3118 /* Boost Receive side for jumbo frames */
3119 if (adapter->hw.mac.max_frame_size > 4096)
3120 pba = E1000_PBA_14K;
3122 pba = E1000_PBA_10K;
3128 pba = E1000_PBA_26K;
3131 if (adapter->hw.mac.max_frame_size > 8192)
3132 pba = E1000_PBA_40K; /* 40K for Rx, 24K for Tx */
3134 pba = E1000_PBA_48K; /* 48K for Rx, 16K for Tx */
3136 E1000_WRITE_REG(&adapter->hw, E1000_PBA, pba);
3139 * These parameters control the automatic generation (Tx) and
3140 * response (Rx) to Ethernet PAUSE frames.
3141 * - High water mark should allow for at least two frames to be
3142 * received after sending an XOFF.
3143 * - Low water mark works best when it is very near the high water mark.
3144 * This allows the receiver to restart by sending XON when it has
3145 * drained a bit. Here we use an arbitary value of 1500 which will
3146 * restart after one full frame is pulled from the buffer. There
3147 * could be several smaller frames in the buffer and if so they will
3148 * not trigger the XON until their total number reduces the buffer
3150 * - The pause time is fairly large at 1000 x 512ns = 512 usec.
3152 rx_buffer_size = ((E1000_READ_REG(hw, E1000_PBA) & 0xffff) << 10 );
3153 hw->fc.high_water = rx_buffer_size -
3154 roundup2(adapter->hw.mac.max_frame_size, 1024);
3155 hw->fc.low_water = hw->fc.high_water - 1500;
3157 if (adapter->fc) /* locally set flow control value? */
3158 hw->fc.requested_mode = adapter->fc;
3160 hw->fc.requested_mode = e1000_fc_full;
3162 if (hw->mac.type == e1000_80003es2lan)
3163 hw->fc.pause_time = 0xFFFF;
3165 hw->fc.pause_time = EM_FC_PAUSE_TIME;
3167 hw->fc.send_xon = TRUE;
3169 /* Device specific overrides/settings */
3170 switch (hw->mac.type) {
3172 /* Workaround: no TX flow ctrl for PCH */
3173 hw->fc.requested_mode = e1000_fc_rx_pause;
3174 hw->fc.pause_time = 0xFFFF; /* override */
3175 if (ifp->if_mtu > ETHERMTU) {
3176 hw->fc.high_water = 0x3500;
3177 hw->fc.low_water = 0x1500;
3179 hw->fc.high_water = 0x5000;
3180 hw->fc.low_water = 0x3000;
3182 hw->fc.refresh_time = 0x1000;
3187 hw->fc.high_water = 0x5C20;
3188 hw->fc.low_water = 0x5048;
3189 hw->fc.pause_time = 0x0650;
3190 hw->fc.refresh_time = 0x0400;
3191 /* Jumbos need adjusted PBA */
3192 if (ifp->if_mtu > ETHERMTU)
3193 E1000_WRITE_REG(hw, E1000_PBA, 12);
3195 E1000_WRITE_REG(hw, E1000_PBA, 26);
3198 case e1000_ich10lan:
3199 if (ifp->if_mtu > ETHERMTU) {
3200 hw->fc.high_water = 0x2800;
3201 hw->fc.low_water = hw->fc.high_water - 8;
3204 /* else fall thru */
3206 if (hw->mac.type == e1000_80003es2lan)
3207 hw->fc.pause_time = 0xFFFF;
3211 /* I219 needs some special flushing to avoid hangs */
3212 if (hw->mac.type == e1000_pch_spt)
3213 em_flush_desc_rings(adapter);
3215 /* Issue a global reset */
3217 E1000_WRITE_REG(hw, E1000_WUC, 0);
3218 em_disable_aspm(adapter);
3220 if (e1000_init_hw(hw) < 0) {
3221 device_printf(dev, "Hardware Initialization Failed\n");
3225 E1000_WRITE_REG(hw, E1000_VET, ETHERTYPE_VLAN);
3226 e1000_get_phy_info(hw);
3227 e1000_check_for_link(hw);
3231 /*********************************************************************
3233 * Setup networking device structure and register an interface.
3235 **********************************************************************/
3237 em_setup_interface(device_t dev, struct adapter *adapter)
3241 INIT_DEBUGOUT("em_setup_interface: begin");
3243 ifp = adapter->ifp = if_alloc(IFT_ETHER);
3245 device_printf(dev, "can not allocate ifnet structure\n");
3248 if_initname(ifp, device_get_name(dev), device_get_unit(dev));
3249 ifp->if_init = em_init;
3250 ifp->if_softc = adapter;
3251 ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
3252 ifp->if_ioctl = em_ioctl;
3254 /* TSO parameters */
3255 ifp->if_hw_tsomax = IP_MAXPACKET;
3256 /* Take m_pullup(9)'s in em_xmit() w/ TSO into acount. */
3257 ifp->if_hw_tsomaxsegcount = EM_MAX_SCATTER - 5;
3258 ifp->if_hw_tsomaxsegsize = EM_TSO_SEG_SIZE;
3260 #ifdef EM_MULTIQUEUE
3261 /* Multiqueue stack interface */
3262 ifp->if_transmit = em_mq_start;
3263 ifp->if_qflush = em_qflush;
3265 ifp->if_start = em_start;
3266 IFQ_SET_MAXLEN(&ifp->if_snd, adapter->num_tx_desc - 1);
3267 ifp->if_snd.ifq_drv_maxlen = adapter->num_tx_desc - 1;
3268 IFQ_SET_READY(&ifp->if_snd);
3271 ether_ifattach(ifp, adapter->hw.mac.addr);
3273 ifp->if_capabilities = IFCAP_HWCSUM | IFCAP_VLAN_HWCSUM;
3274 ifp->if_capenable = ifp->if_capabilities;
3277 * Tell the upper layer(s) we
3278 * support full VLAN capability
3280 ifp->if_data.ifi_hdrlen = sizeof(struct ether_vlan_header);
3281 ifp->if_capabilities |= IFCAP_VLAN_HWTAGGING
3284 ifp->if_capenable |= IFCAP_VLAN_HWTAGGING
3288 * We don't enable IFCAP_{TSO4,VLAN_HWTSO} by default because:
3289 * - Although the silicon bug of TSO only working at gigabit speed is
3290 * worked around in em_update_link_status() by selectively setting
3291 * CSUM_IP_TSO, we cannot atomically flush already queued TSO-using
3292 * descriptors. Thus, such descriptors may still cause the MAC to
3293 * hang and, consequently, TSO is only safe to be used in setups
3294 * where the link isn't expected to switch from gigabit to lower
3296 * - Similarly, there's currently no way to trigger a reconfiguration
3297 * of vlan(4) when the state of IFCAP_VLAN_HWTSO support changes at
3298 * runtime. Therefore, IFCAP_VLAN_HWTSO also only is safe to use
3299 * when link speed changes are not to be expected.
3300 * - Despite all the workarounds for TSO-related silicon bugs, at
3301 * least 82579 still may hang at gigabit speed with IFCAP_TSO4.
3303 ifp->if_capabilities |= IFCAP_TSO4 | IFCAP_VLAN_HWTSO;
3306 ** Don't turn this on by default, if vlans are
3307 ** created on another pseudo device (eg. lagg)
3308 ** then vlan events are not passed thru, breaking
3309 ** operation, but with HW FILTER off it works. If
3310 ** using vlans directly on the em driver you can
3311 ** enable this and get full hardware tag filtering.
3313 ifp->if_capabilities |= IFCAP_VLAN_HWFILTER;
3315 #ifdef DEVICE_POLLING
3316 ifp->if_capabilities |= IFCAP_POLLING;
3319 /* Enable only WOL MAGIC by default */
3321 ifp->if_capabilities |= IFCAP_WOL;
3322 ifp->if_capenable |= IFCAP_WOL_MAGIC;
3326 * Specify the media types supported by this adapter and register
3327 * callbacks to update media and link information
3329 ifmedia_init(&adapter->media, IFM_IMASK,
3330 em_media_change, em_media_status);
3331 if ((adapter->hw.phy.media_type == e1000_media_type_fiber) ||
3332 (adapter->hw.phy.media_type == e1000_media_type_internal_serdes)) {
3333 u_char fiber_type = IFM_1000_SX; /* default type */
3335 ifmedia_add(&adapter->media, IFM_ETHER | fiber_type | IFM_FDX,
3337 ifmedia_add(&adapter->media, IFM_ETHER | fiber_type, 0, NULL);
3339 ifmedia_add(&adapter->media, IFM_ETHER | IFM_10_T, 0, NULL);
3340 ifmedia_add(&adapter->media, IFM_ETHER | IFM_10_T | IFM_FDX,
3342 ifmedia_add(&adapter->media, IFM_ETHER | IFM_100_TX,
3344 ifmedia_add(&adapter->media, IFM_ETHER | IFM_100_TX | IFM_FDX,
3346 if (adapter->hw.phy.type != e1000_phy_ife) {
3347 ifmedia_add(&adapter->media,
3348 IFM_ETHER | IFM_1000_T | IFM_FDX, 0, NULL);
3349 ifmedia_add(&adapter->media,
3350 IFM_ETHER | IFM_1000_T, 0, NULL);
3353 ifmedia_add(&adapter->media, IFM_ETHER | IFM_AUTO, 0, NULL);
3354 ifmedia_set(&adapter->media, IFM_ETHER | IFM_AUTO);
3360 * Manage DMA'able memory.
3363 em_dmamap_cb(void *arg, bus_dma_segment_t *segs, int nseg, int error)
3367 *(bus_addr_t *) arg = segs[0].ds_addr;
3371 em_dma_malloc(struct adapter *adapter, bus_size_t size,
3372 struct em_dma_alloc *dma, int mapflags)
3376 error = bus_dma_tag_create(bus_get_dma_tag(adapter->dev), /* parent */
3377 EM_DBA_ALIGN, 0, /* alignment, bounds */
3378 BUS_SPACE_MAXADDR, /* lowaddr */
3379 BUS_SPACE_MAXADDR, /* highaddr */
3380 NULL, NULL, /* filter, filterarg */
3383 size, /* maxsegsize */
3385 NULL, /* lockfunc */
3389 device_printf(adapter->dev,
3390 "%s: bus_dma_tag_create failed: %d\n",
3395 error = bus_dmamem_alloc(dma->dma_tag, (void**) &dma->dma_vaddr,
3396 BUS_DMA_NOWAIT | BUS_DMA_COHERENT, &dma->dma_map);
3398 device_printf(adapter->dev,
3399 "%s: bus_dmamem_alloc(%ju) failed: %d\n",
3400 __func__, (uintmax_t)size, error);
3405 error = bus_dmamap_load(dma->dma_tag, dma->dma_map, dma->dma_vaddr,
3406 size, em_dmamap_cb, &dma->dma_paddr, mapflags | BUS_DMA_NOWAIT);
3407 if (error || dma->dma_paddr == 0) {
3408 device_printf(adapter->dev,
3409 "%s: bus_dmamap_load failed: %d\n",
3417 bus_dmamap_unload(dma->dma_tag, dma->dma_map);
3419 bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
3420 bus_dma_tag_destroy(dma->dma_tag);
3422 dma->dma_tag = NULL;
3428 em_dma_free(struct adapter *adapter, struct em_dma_alloc *dma)
3430 if (dma->dma_tag == NULL)
3432 if (dma->dma_paddr != 0) {
3433 bus_dmamap_sync(dma->dma_tag, dma->dma_map,
3434 BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
3435 bus_dmamap_unload(dma->dma_tag, dma->dma_map);
3438 if (dma->dma_vaddr != NULL) {
3439 bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
3440 dma->dma_vaddr = NULL;
3442 bus_dma_tag_destroy(dma->dma_tag);
3443 dma->dma_tag = NULL;
3447 /*********************************************************************
3449 * Allocate memory for the transmit and receive rings, and then
3450 * the descriptors associated with each, called only once at attach.
3452 **********************************************************************/
3454 em_allocate_queues(struct adapter *adapter)
3456 device_t dev = adapter->dev;
3457 struct tx_ring *txr = NULL;
3458 struct rx_ring *rxr = NULL;
3459 int rsize, tsize, error = E1000_SUCCESS;
3460 int txconf = 0, rxconf = 0;
3463 /* Allocate the TX ring struct memory */
3464 if (!(adapter->tx_rings =
3465 (struct tx_ring *) malloc(sizeof(struct tx_ring) *
3466 adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
3467 device_printf(dev, "Unable to allocate TX ring memory\n");
3472 /* Now allocate the RX */
3473 if (!(adapter->rx_rings =
3474 (struct rx_ring *) malloc(sizeof(struct rx_ring) *
3475 adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
3476 device_printf(dev, "Unable to allocate RX ring memory\n");
3481 tsize = roundup2(adapter->num_tx_desc *
3482 sizeof(struct e1000_tx_desc), EM_DBA_ALIGN);
3484 * Now set up the TX queues, txconf is needed to handle the
3485 * possibility that things fail midcourse and we need to
3486 * undo memory gracefully
3488 for (int i = 0; i < adapter->num_queues; i++, txconf++) {
3489 /* Set up some basics */
3490 txr = &adapter->tx_rings[i];
3491 txr->adapter = adapter;
3494 /* Initialize the TX lock */
3495 snprintf(txr->mtx_name, sizeof(txr->mtx_name), "%s:tx(%d)",
3496 device_get_nameunit(dev), txr->me);
3497 mtx_init(&txr->tx_mtx, txr->mtx_name, NULL, MTX_DEF);
3499 if (em_dma_malloc(adapter, tsize,
3500 &txr->txdma, BUS_DMA_NOWAIT)) {
3502 "Unable to allocate TX Descriptor memory\n");
3506 txr->tx_base = (struct e1000_tx_desc *)txr->txdma.dma_vaddr;
3507 bzero((void *)txr->tx_base, tsize);
3509 if (em_allocate_transmit_buffers(txr)) {
3511 "Critical Failure setting up transmit buffers\n");
3515 #if __FreeBSD_version >= 800000
3516 /* Allocate a buf ring */
3517 txr->br = buf_ring_alloc(4096, M_DEVBUF,
3518 M_WAITOK, &txr->tx_mtx);
3523 * Next the RX queues...
3525 rsize = roundup2(adapter->num_rx_desc *
3526 sizeof(union e1000_rx_desc_extended), EM_DBA_ALIGN);
3527 for (int i = 0; i < adapter->num_queues; i++, rxconf++) {
3528 rxr = &adapter->rx_rings[i];
3529 rxr->adapter = adapter;
3532 /* Initialize the RX lock */
3533 snprintf(rxr->mtx_name, sizeof(rxr->mtx_name), "%s:rx(%d)",
3534 device_get_nameunit(dev), txr->me);
3535 mtx_init(&rxr->rx_mtx, rxr->mtx_name, NULL, MTX_DEF);
3537 if (em_dma_malloc(adapter, rsize,
3538 &rxr->rxdma, BUS_DMA_NOWAIT)) {
3540 "Unable to allocate RxDescriptor memory\n");
3544 rxr->rx_base = (union e1000_rx_desc_extended *)rxr->rxdma.dma_vaddr;
3545 bzero((void *)rxr->rx_base, rsize);
3547 /* Allocate receive buffers for the ring*/
3548 if (em_allocate_receive_buffers(rxr)) {
3550 "Critical Failure setting up receive buffers\n");
3559 for (rxr = adapter->rx_rings; rxconf > 0; rxr++, rxconf--)
3560 em_dma_free(adapter, &rxr->rxdma);
3562 for (txr = adapter->tx_rings; txconf > 0; txr++, txconf--)
3563 em_dma_free(adapter, &txr->txdma);
3564 free(adapter->rx_rings, M_DEVBUF);
3566 #if __FreeBSD_version >= 800000
3567 buf_ring_free(txr->br, M_DEVBUF);
3569 free(adapter->tx_rings, M_DEVBUF);
3575 /*********************************************************************
3577 * Allocate memory for tx_buffer structures. The tx_buffer stores all
3578 * the information needed to transmit a packet on the wire. This is
3579 * called only once at attach, setup is done every reset.
3581 **********************************************************************/
3583 em_allocate_transmit_buffers(struct tx_ring *txr)
3585 struct adapter *adapter = txr->adapter;
3586 device_t dev = adapter->dev;
3587 struct em_txbuffer *txbuf;
3591 * Setup DMA descriptor areas.
3593 if ((error = bus_dma_tag_create(bus_get_dma_tag(dev),
3594 1, 0, /* alignment, bounds */
3595 BUS_SPACE_MAXADDR, /* lowaddr */
3596 BUS_SPACE_MAXADDR, /* highaddr */
3597 NULL, NULL, /* filter, filterarg */
3598 EM_TSO_SIZE, /* maxsize */
3599 EM_MAX_SCATTER, /* nsegments */
3600 PAGE_SIZE, /* maxsegsize */
3602 NULL, /* lockfunc */
3603 NULL, /* lockfuncarg */
3605 device_printf(dev,"Unable to allocate TX DMA tag\n");
3609 if (!(txr->tx_buffers =
3610 (struct em_txbuffer *) malloc(sizeof(struct em_txbuffer) *
3611 adapter->num_tx_desc, M_DEVBUF, M_NOWAIT | M_ZERO))) {
3612 device_printf(dev, "Unable to allocate tx_buffer memory\n");
3617 /* Create the descriptor buffer dma maps */
3618 txbuf = txr->tx_buffers;
3619 for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) {
3620 error = bus_dmamap_create(txr->txtag, 0, &txbuf->map);
3622 device_printf(dev, "Unable to create TX DMA map\n");
3629 /* We free all, it handles case where we are in the middle */
3630 em_free_transmit_structures(adapter);
3634 /*********************************************************************
3636 * Initialize a transmit ring.
3638 **********************************************************************/
3640 em_setup_transmit_ring(struct tx_ring *txr)
3642 struct adapter *adapter = txr->adapter;
3643 struct em_txbuffer *txbuf;
3646 struct netmap_adapter *na = NA(adapter->ifp);
3647 struct netmap_slot *slot;
3648 #endif /* DEV_NETMAP */
3650 /* Clear the old descriptor contents */
3653 slot = netmap_reset(na, NR_TX, txr->me, 0);
3654 #endif /* DEV_NETMAP */
3656 bzero((void *)txr->tx_base,
3657 (sizeof(struct e1000_tx_desc)) * adapter->num_tx_desc);
3659 txr->next_avail_desc = 0;
3660 txr->next_to_clean = 0;
3662 /* Free any existing tx buffers. */
3663 txbuf = txr->tx_buffers;
3664 for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) {
3665 if (txbuf->m_head != NULL) {
3666 bus_dmamap_sync(txr->txtag, txbuf->map,
3667 BUS_DMASYNC_POSTWRITE);
3668 bus_dmamap_unload(txr->txtag, txbuf->map);
3669 m_freem(txbuf->m_head);
3670 txbuf->m_head = NULL;
3674 int si = netmap_idx_n2k(&na->tx_rings[txr->me], i);
3678 addr = PNMB(na, slot + si, &paddr);
3679 txr->tx_base[i].buffer_addr = htole64(paddr);
3680 /* reload the map for netmap mode */
3681 netmap_load_map(na, txr->txtag, txbuf->map, addr);
3683 #endif /* DEV_NETMAP */
3685 /* clear the watch index */
3686 txbuf->next_eop = -1;
3689 /* Set number of descriptors available */
3690 txr->tx_avail = adapter->num_tx_desc;
3691 txr->busy = EM_TX_IDLE;
3693 /* Clear checksum offload context. */
3694 txr->last_hw_offload = 0;
3695 txr->last_hw_ipcss = 0;
3696 txr->last_hw_ipcso = 0;
3697 txr->last_hw_tucss = 0;
3698 txr->last_hw_tucso = 0;
3700 bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3701 BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
3705 /*********************************************************************
3707 * Initialize all transmit rings.
3709 **********************************************************************/
3711 em_setup_transmit_structures(struct adapter *adapter)
3713 struct tx_ring *txr = adapter->tx_rings;
3715 for (int i = 0; i < adapter->num_queues; i++, txr++)
3716 em_setup_transmit_ring(txr);
3721 /*********************************************************************
3723 * Enable transmit unit.
3725 **********************************************************************/
3727 em_initialize_transmit_unit(struct adapter *adapter)
3729 struct tx_ring *txr = adapter->tx_rings;
3730 struct e1000_hw *hw = &adapter->hw;
3731 u32 tctl, txdctl = 0, tarc, tipg = 0;
3733 INIT_DEBUGOUT("em_initialize_transmit_unit: begin");
3735 for (int i = 0; i < adapter->num_queues; i++, txr++) {
3736 u64 bus_addr = txr->txdma.dma_paddr;
3737 /* Base and Len of TX Ring */
3738 E1000_WRITE_REG(hw, E1000_TDLEN(i),
3739 adapter->num_tx_desc * sizeof(struct e1000_tx_desc));
3740 E1000_WRITE_REG(hw, E1000_TDBAH(i),
3741 (u32)(bus_addr >> 32));
3742 E1000_WRITE_REG(hw, E1000_TDBAL(i),
3744 /* Init the HEAD/TAIL indices */
3745 E1000_WRITE_REG(hw, E1000_TDT(i), 0);
3746 E1000_WRITE_REG(hw, E1000_TDH(i), 0);
3748 HW_DEBUGOUT2("Base = %x, Length = %x\n",
3749 E1000_READ_REG(&adapter->hw, E1000_TDBAL(i)),
3750 E1000_READ_REG(&adapter->hw, E1000_TDLEN(i)));
3752 txr->busy = EM_TX_IDLE;
3753 txdctl = 0; /* clear txdctl */
3754 txdctl |= 0x1f; /* PTHRESH */
3755 txdctl |= 1 << 8; /* HTHRESH */
3756 txdctl |= 1 << 16;/* WTHRESH */
3757 txdctl |= 1 << 22; /* Reserved bit 22 must always be 1 */
3758 txdctl |= E1000_TXDCTL_GRAN;
3759 txdctl |= 1 << 25; /* LWTHRESH */
3761 E1000_WRITE_REG(hw, E1000_TXDCTL(i), txdctl);
3764 /* Set the default values for the Tx Inter Packet Gap timer */
3765 switch (adapter->hw.mac.type) {
3766 case e1000_80003es2lan:
3767 tipg = DEFAULT_82543_TIPG_IPGR1;
3768 tipg |= DEFAULT_80003ES2LAN_TIPG_IPGR2 <<
3769 E1000_TIPG_IPGR2_SHIFT;
3772 if ((adapter->hw.phy.media_type == e1000_media_type_fiber) ||
3773 (adapter->hw.phy.media_type ==
3774 e1000_media_type_internal_serdes))
3775 tipg = DEFAULT_82543_TIPG_IPGT_FIBER;
3777 tipg = DEFAULT_82543_TIPG_IPGT_COPPER;
3778 tipg |= DEFAULT_82543_TIPG_IPGR1 << E1000_TIPG_IPGR1_SHIFT;
3779 tipg |= DEFAULT_82543_TIPG_IPGR2 << E1000_TIPG_IPGR2_SHIFT;
3782 E1000_WRITE_REG(&adapter->hw, E1000_TIPG, tipg);
3783 E1000_WRITE_REG(&adapter->hw, E1000_TIDV, adapter->tx_int_delay.value);
3785 if(adapter->hw.mac.type >= e1000_82540)
3786 E1000_WRITE_REG(&adapter->hw, E1000_TADV,
3787 adapter->tx_abs_int_delay.value);
3789 if ((adapter->hw.mac.type == e1000_82571) ||
3790 (adapter->hw.mac.type == e1000_82572)) {
3791 tarc = E1000_READ_REG(&adapter->hw, E1000_TARC(0));
3792 tarc |= TARC_SPEED_MODE_BIT;
3793 E1000_WRITE_REG(&adapter->hw, E1000_TARC(0), tarc);
3794 } else if (adapter->hw.mac.type == e1000_80003es2lan) {
3795 /* errata: program both queues to unweighted RR */
3796 tarc = E1000_READ_REG(&adapter->hw, E1000_TARC(0));
3798 E1000_WRITE_REG(&adapter->hw, E1000_TARC(0), tarc);
3799 tarc = E1000_READ_REG(&adapter->hw, E1000_TARC(1));
3801 E1000_WRITE_REG(&adapter->hw, E1000_TARC(1), tarc);
3802 } else if (adapter->hw.mac.type == e1000_82574) {
3803 tarc = E1000_READ_REG(&adapter->hw, E1000_TARC(0));
3804 tarc |= TARC_ERRATA_BIT;
3805 if ( adapter->num_queues > 1) {
3806 tarc |= (TARC_COMPENSATION_MODE | TARC_MQ_FIX);
3807 E1000_WRITE_REG(&adapter->hw, E1000_TARC(0), tarc);
3808 E1000_WRITE_REG(&adapter->hw, E1000_TARC(1), tarc);
3810 E1000_WRITE_REG(&adapter->hw, E1000_TARC(0), tarc);
3813 adapter->txd_cmd = E1000_TXD_CMD_IFCS;
3814 if (adapter->tx_int_delay.value > 0)
3815 adapter->txd_cmd |= E1000_TXD_CMD_IDE;
3817 /* Program the Transmit Control Register */
3818 tctl = E1000_READ_REG(&adapter->hw, E1000_TCTL);
3819 tctl &= ~E1000_TCTL_CT;
3820 tctl |= (E1000_TCTL_PSP | E1000_TCTL_RTLC | E1000_TCTL_EN |
3821 (E1000_COLLISION_THRESHOLD << E1000_CT_SHIFT));
3823 if (adapter->hw.mac.type >= e1000_82571)
3824 tctl |= E1000_TCTL_MULR;
3826 /* This write will effectively turn on the transmit unit. */
3827 E1000_WRITE_REG(&adapter->hw, E1000_TCTL, tctl);
3829 if (hw->mac.type == e1000_pch_spt) {
3831 reg = E1000_READ_REG(hw, E1000_IOSFPC);
3832 reg |= E1000_RCTL_RDMTS_HEX;
3833 E1000_WRITE_REG(hw, E1000_IOSFPC, reg);
3834 reg = E1000_READ_REG(hw, E1000_TARC(0));
3835 reg |= E1000_TARC0_CB_MULTIQ_3_REQ;
3836 E1000_WRITE_REG(hw, E1000_TARC(0), reg);
3841 /*********************************************************************
3843 * Free all transmit rings.
3845 **********************************************************************/
3847 em_free_transmit_structures(struct adapter *adapter)
3849 struct tx_ring *txr = adapter->tx_rings;
3851 for (int i = 0; i < adapter->num_queues; i++, txr++) {
3853 em_free_transmit_buffers(txr);
3854 em_dma_free(adapter, &txr->txdma);
3856 EM_TX_LOCK_DESTROY(txr);
3859 free(adapter->tx_rings, M_DEVBUF);
3862 /*********************************************************************
3864 * Free transmit ring related data structures.
3866 **********************************************************************/
3868 em_free_transmit_buffers(struct tx_ring *txr)
3870 struct adapter *adapter = txr->adapter;
3871 struct em_txbuffer *txbuf;
3873 INIT_DEBUGOUT("free_transmit_ring: begin");
3875 if (txr->tx_buffers == NULL)
3878 for (int i = 0; i < adapter->num_tx_desc; i++) {
3879 txbuf = &txr->tx_buffers[i];
3880 if (txbuf->m_head != NULL) {
3881 bus_dmamap_sync(txr->txtag, txbuf->map,
3882 BUS_DMASYNC_POSTWRITE);
3883 bus_dmamap_unload(txr->txtag,
3885 m_freem(txbuf->m_head);
3886 txbuf->m_head = NULL;
3887 if (txbuf->map != NULL) {
3888 bus_dmamap_destroy(txr->txtag,
3892 } else if (txbuf->map != NULL) {
3893 bus_dmamap_unload(txr->txtag,
3895 bus_dmamap_destroy(txr->txtag,
3900 #if __FreeBSD_version >= 800000
3901 if (txr->br != NULL)
3902 buf_ring_free(txr->br, M_DEVBUF);
3904 if (txr->tx_buffers != NULL) {
3905 free(txr->tx_buffers, M_DEVBUF);
3906 txr->tx_buffers = NULL;
3908 if (txr->txtag != NULL) {
3909 bus_dma_tag_destroy(txr->txtag);
3916 /*********************************************************************
3917 * The offload context is protocol specific (TCP/UDP) and thus
3918 * only needs to be set when the protocol changes. The occasion
3919 * of a context change can be a performance detriment, and
3920 * might be better just disabled. The reason arises in the way
3921 * in which the controller supports pipelined requests from the
3922 * Tx data DMA. Up to four requests can be pipelined, and they may
3923 * belong to the same packet or to multiple packets. However all
3924 * requests for one packet are issued before a request is issued
3925 * for a subsequent packet and if a request for the next packet
3926 * requires a context change, that request will be stalled
3927 * until the previous request completes. This means setting up
3928 * a new context effectively disables pipelined Tx data DMA which
3929 * in turn greatly slow down performance to send small sized
3931 **********************************************************************/
3933 em_transmit_checksum_setup(struct tx_ring *txr, struct mbuf *mp, int ip_off,
3934 struct ip *ip, u32 *txd_upper, u32 *txd_lower)
3936 struct adapter *adapter = txr->adapter;
3937 struct e1000_context_desc *TXD = NULL;
3938 struct em_txbuffer *tx_buffer;
3942 u8 ipcso, ipcss, tucso, tucss;
3944 ipcss = ipcso = tucss = tucso = 0;
3945 hdr_len = ip_off + (ip->ip_hl << 2);
3946 cur = txr->next_avail_desc;
3948 /* Setup of IP header checksum. */
3949 if (mp->m_pkthdr.csum_flags & CSUM_IP) {
3950 *txd_upper |= E1000_TXD_POPTS_IXSM << 8;
3953 ipcso = ip_off + offsetof(struct ip, ip_sum);
3955 * Start offset for header checksum calculation.
3956 * End offset for header checksum calculation.
3957 * Offset of place to put the checksum.
3959 TXD = (struct e1000_context_desc *)&txr->tx_base[cur];
3960 TXD->lower_setup.ip_fields.ipcss = ipcss;
3961 TXD->lower_setup.ip_fields.ipcse = htole16(hdr_len);
3962 TXD->lower_setup.ip_fields.ipcso = ipcso;
3963 cmd |= E1000_TXD_CMD_IP;
3966 if (mp->m_pkthdr.csum_flags & CSUM_TCP) {
3967 *txd_lower = E1000_TXD_CMD_DEXT | E1000_TXD_DTYP_D;
3968 *txd_upper |= E1000_TXD_POPTS_TXSM << 8;
3969 offload |= CSUM_TCP;
3971 tucso = hdr_len + offsetof(struct tcphdr, th_sum);
3973 * The 82574L can only remember the *last* context used
3974 * regardless of queue that it was use for. We cannot reuse
3975 * contexts on this hardware platform and must generate a new
3976 * context every time. 82574L hardware spec, section 7.2.6,
3979 if (adapter->num_queues < 2) {
3981 * Setting up new checksum offload context for every
3982 * frames takes a lot of processing time for hardware.
3983 * This also reduces performance a lot for small sized
3984 * frames so avoid it if driver can use previously
3985 * configured checksum offload context.
3987 if (txr->last_hw_offload == offload) {
3988 if (offload & CSUM_IP) {
3989 if (txr->last_hw_ipcss == ipcss &&
3990 txr->last_hw_ipcso == ipcso &&
3991 txr->last_hw_tucss == tucss &&
3992 txr->last_hw_tucso == tucso)
3995 if (txr->last_hw_tucss == tucss &&
3996 txr->last_hw_tucso == tucso)
4000 txr->last_hw_offload = offload;
4001 txr->last_hw_tucss = tucss;
4002 txr->last_hw_tucso = tucso;
4005 * Start offset for payload checksum calculation.
4006 * End offset for payload checksum calculation.
4007 * Offset of place to put the checksum.
4009 TXD = (struct e1000_context_desc *)&txr->tx_base[cur];
4010 TXD->upper_setup.tcp_fields.tucss = hdr_len;
4011 TXD->upper_setup.tcp_fields.tucse = htole16(0);
4012 TXD->upper_setup.tcp_fields.tucso = tucso;
4013 cmd |= E1000_TXD_CMD_TCP;
4014 } else if (mp->m_pkthdr.csum_flags & CSUM_UDP) {
4015 *txd_lower = E1000_TXD_CMD_DEXT | E1000_TXD_DTYP_D;
4016 *txd_upper |= E1000_TXD_POPTS_TXSM << 8;
4018 tucso = hdr_len + offsetof(struct udphdr, uh_sum);
4020 * The 82574L can only remember the *last* context used
4021 * regardless of queue that it was use for. We cannot reuse
4022 * contexts on this hardware platform and must generate a new
4023 * context every time. 82574L hardware spec, section 7.2.6,
4026 if (adapter->num_queues < 2) {
4028 * Setting up new checksum offload context for every
4029 * frames takes a lot of processing time for hardware.
4030 * This also reduces performance a lot for small sized
4031 * frames so avoid it if driver can use previously
4032 * configured checksum offload context.
4034 if (txr->last_hw_offload == offload) {
4035 if (offload & CSUM_IP) {
4036 if (txr->last_hw_ipcss == ipcss &&
4037 txr->last_hw_ipcso == ipcso &&
4038 txr->last_hw_tucss == tucss &&
4039 txr->last_hw_tucso == tucso)
4042 if (txr->last_hw_tucss == tucss &&
4043 txr->last_hw_tucso == tucso)
4047 txr->last_hw_offload = offload;
4048 txr->last_hw_tucss = tucss;
4049 txr->last_hw_tucso = tucso;
4052 * Start offset for header checksum calculation.
4053 * End offset for header checksum calculation.
4054 * Offset of place to put the checksum.
4056 TXD = (struct e1000_context_desc *)&txr->tx_base[cur];
4057 TXD->upper_setup.tcp_fields.tucss = tucss;
4058 TXD->upper_setup.tcp_fields.tucse = htole16(0);
4059 TXD->upper_setup.tcp_fields.tucso = tucso;
4062 if (offload & CSUM_IP) {
4063 txr->last_hw_ipcss = ipcss;
4064 txr->last_hw_ipcso = ipcso;
4067 TXD->tcp_seg_setup.data = htole32(0);
4068 TXD->cmd_and_length =
4069 htole32(adapter->txd_cmd | E1000_TXD_CMD_DEXT | cmd);
4070 tx_buffer = &txr->tx_buffers[cur];
4071 tx_buffer->m_head = NULL;
4072 tx_buffer->next_eop = -1;
4074 if (++cur == adapter->num_tx_desc)
4078 txr->next_avail_desc = cur;
4082 /**********************************************************************
4084 * Setup work for hardware segmentation offload (TSO)
4086 **********************************************************************/
4088 em_tso_setup(struct tx_ring *txr, struct mbuf *mp, int ip_off,
4089 struct ip *ip, struct tcphdr *tp, u32 *txd_upper, u32 *txd_lower)
4091 struct adapter *adapter = txr->adapter;
4092 struct e1000_context_desc *TXD;
4093 struct em_txbuffer *tx_buffer;
4097 * In theory we can use the same TSO context if and only if
4098 * frame is the same type(IP/TCP) and the same MSS. However
4099 * checking whether a frame has the same IP/TCP structure is
4100 * hard thing so just ignore that and always restablish a
4103 hdr_len = ip_off + (ip->ip_hl << 2) + (tp->th_off << 2);
4104 *txd_lower = (E1000_TXD_CMD_DEXT | /* Extended descr type */
4105 E1000_TXD_DTYP_D | /* Data descr type */
4106 E1000_TXD_CMD_TSE); /* Do TSE on this packet */
4108 /* IP and/or TCP header checksum calculation and insertion. */
4109 *txd_upper = (E1000_TXD_POPTS_IXSM | E1000_TXD_POPTS_TXSM) << 8;
4111 cur = txr->next_avail_desc;
4112 tx_buffer = &txr->tx_buffers[cur];
4113 TXD = (struct e1000_context_desc *) &txr->tx_base[cur];
4116 * Start offset for header checksum calculation.
4117 * End offset for header checksum calculation.
4118 * Offset of place put the checksum.
4120 TXD->lower_setup.ip_fields.ipcss = ip_off;
4121 TXD->lower_setup.ip_fields.ipcse =
4122 htole16(ip_off + (ip->ip_hl << 2) - 1);
4123 TXD->lower_setup.ip_fields.ipcso = ip_off + offsetof(struct ip, ip_sum);
4125 * Start offset for payload checksum calculation.
4126 * End offset for payload checksum calculation.
4127 * Offset of place to put the checksum.
4129 TXD->upper_setup.tcp_fields.tucss = ip_off + (ip->ip_hl << 2);
4130 TXD->upper_setup.tcp_fields.tucse = 0;
4131 TXD->upper_setup.tcp_fields.tucso =
4132 ip_off + (ip->ip_hl << 2) + offsetof(struct tcphdr, th_sum);
4134 * Payload size per packet w/o any headers.
4135 * Length of all headers up to payload.
4137 TXD->tcp_seg_setup.fields.mss = htole16(mp->m_pkthdr.tso_segsz);
4138 TXD->tcp_seg_setup.fields.hdr_len = hdr_len;
4140 TXD->cmd_and_length = htole32(adapter->txd_cmd |
4141 E1000_TXD_CMD_DEXT | /* Extended descr */
4142 E1000_TXD_CMD_TSE | /* TSE context */
4143 E1000_TXD_CMD_IP | /* Do IP csum */
4144 E1000_TXD_CMD_TCP | /* Do TCP checksum */
4145 (mp->m_pkthdr.len - (hdr_len))); /* Total len */
4147 tx_buffer->m_head = NULL;
4148 tx_buffer->next_eop = -1;
4150 if (++cur == adapter->num_tx_desc)
4154 txr->next_avail_desc = cur;
4159 /**********************************************************************
4161 * Examine each tx_buffer in the used queue. If the hardware is done
4162 * processing the packet then free associated resources. The
4163 * tx_buffer is put back on the free queue.
4165 **********************************************************************/
4167 em_txeof(struct tx_ring *txr)
4169 struct adapter *adapter = txr->adapter;
4170 int first, last, done, processed;
4171 struct em_txbuffer *tx_buffer;
4172 struct e1000_tx_desc *tx_desc, *eop_desc;
4173 struct ifnet *ifp = adapter->ifp;
4175 EM_TX_LOCK_ASSERT(txr);
4177 if (netmap_tx_irq(ifp, txr->me))
4179 #endif /* DEV_NETMAP */
4181 /* No work, make sure hang detection is disabled */
4182 if (txr->tx_avail == adapter->num_tx_desc) {
4183 txr->busy = EM_TX_IDLE;
4188 first = txr->next_to_clean;
4189 tx_desc = &txr->tx_base[first];
4190 tx_buffer = &txr->tx_buffers[first];
4191 last = tx_buffer->next_eop;
4192 eop_desc = &txr->tx_base[last];
4195 * What this does is get the index of the
4196 * first descriptor AFTER the EOP of the
4197 * first packet, that way we can do the
4198 * simple comparison on the inner while loop.
4200 if (++last == adapter->num_tx_desc)
4204 bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
4205 BUS_DMASYNC_POSTREAD);
4207 while (eop_desc->upper.fields.status & E1000_TXD_STAT_DD) {
4208 /* We clean the range of the packet */
4209 while (first != done) {
4210 tx_desc->upper.data = 0;
4211 tx_desc->lower.data = 0;
4212 tx_desc->buffer_addr = 0;
4216 if (tx_buffer->m_head) {
4217 bus_dmamap_sync(txr->txtag,
4219 BUS_DMASYNC_POSTWRITE);
4220 bus_dmamap_unload(txr->txtag,
4222 m_freem(tx_buffer->m_head);
4223 tx_buffer->m_head = NULL;
4225 tx_buffer->next_eop = -1;
4227 if (++first == adapter->num_tx_desc)
4230 tx_buffer = &txr->tx_buffers[first];
4231 tx_desc = &txr->tx_base[first];
4234 /* See if we can continue to the next packet */
4235 last = tx_buffer->next_eop;
4237 eop_desc = &txr->tx_base[last];
4238 /* Get new done point */
4239 if (++last == adapter->num_tx_desc) last = 0;
4244 bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
4245 BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
4247 txr->next_to_clean = first;
4250 ** Hang detection: we know there's work outstanding
4251 ** or the entry return would have been taken, so no
4252 ** descriptor processed here indicates a potential hang.
4253 ** The local timer will examine this and do a reset if needed.
4255 if (processed == 0) {
4256 if (txr->busy != EM_TX_HUNG)
4258 } else /* At least one descriptor was cleaned */
4259 txr->busy = EM_TX_BUSY; /* note this clears HUNG */
4262 * If we have a minimum free, clear IFF_DRV_OACTIVE
4263 * to tell the stack that it is OK to send packets.
4264 * Notice that all writes of OACTIVE happen under the
4265 * TX lock which, with a single queue, guarantees
4268 if (txr->tx_avail >= EM_MAX_SCATTER) {
4269 ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
4272 /* Disable hang detection if all clean */
4273 if (txr->tx_avail == adapter->num_tx_desc)
4274 txr->busy = EM_TX_IDLE;
4277 /*********************************************************************
4279 * Refresh RX descriptor mbufs from system mbuf buffer pool.
4281 **********************************************************************/
4283 em_refresh_mbufs(struct rx_ring *rxr, int limit)
4285 struct adapter *adapter = rxr->adapter;
4287 bus_dma_segment_t segs;
4288 struct em_rxbuffer *rxbuf;
4289 int i, j, error, nsegs;
4290 bool cleaned = FALSE;
4292 i = j = rxr->next_to_refresh;
4294 ** Get one descriptor beyond
4295 ** our work mark to control
4298 if (++j == adapter->num_rx_desc)
4301 while (j != limit) {
4302 rxbuf = &rxr->rx_buffers[i];
4303 if (rxbuf->m_head == NULL) {
4304 m = m_getjcl(M_NOWAIT, MT_DATA,
4305 M_PKTHDR, adapter->rx_mbuf_sz);
4307 ** If we have a temporary resource shortage
4308 ** that causes a failure, just abort refresh
4309 ** for now, we will return to this point when
4310 ** reinvoked from em_rxeof.
4317 m->m_len = m->m_pkthdr.len = adapter->rx_mbuf_sz;
4318 m->m_flags |= M_PKTHDR;
4319 m->m_data = m->m_ext.ext_buf;
4321 /* Use bus_dma machinery to setup the memory mapping */
4322 error = bus_dmamap_load_mbuf_sg(rxr->rxtag, rxbuf->map,
4323 m, &segs, &nsegs, BUS_DMA_NOWAIT);
4325 printf("Refresh mbufs: hdr dmamap load"
4326 " failure - %d\n", error);
4328 rxbuf->m_head = NULL;
4332 rxbuf->paddr = segs.ds_addr;
4333 bus_dmamap_sync(rxr->rxtag,
4334 rxbuf->map, BUS_DMASYNC_PREREAD);
4335 em_setup_rxdesc(&rxr->rx_base[i], rxbuf);
4338 i = j; /* Next is precalulated for us */
4339 rxr->next_to_refresh = i;
4340 /* Calculate next controlling index */
4341 if (++j == adapter->num_rx_desc)
4346 ** Update the tail pointer only if,
4347 ** and as far as we have refreshed.
4350 E1000_WRITE_REG(&adapter->hw,
4351 E1000_RDT(rxr->me), rxr->next_to_refresh);
4357 /*********************************************************************
4359 * Allocate memory for rx_buffer structures. Since we use one
4360 * rx_buffer per received packet, the maximum number of rx_buffer's
4361 * that we'll need is equal to the number of receive descriptors
4362 * that we've allocated.
4364 **********************************************************************/
4366 em_allocate_receive_buffers(struct rx_ring *rxr)
4368 struct adapter *adapter = rxr->adapter;
4369 device_t dev = adapter->dev;
4370 struct em_rxbuffer *rxbuf;
4373 rxr->rx_buffers = malloc(sizeof(struct em_rxbuffer) *
4374 adapter->num_rx_desc, M_DEVBUF, M_NOWAIT | M_ZERO);
4375 if (rxr->rx_buffers == NULL) {
4376 device_printf(dev, "Unable to allocate rx_buffer memory\n");
4380 error = bus_dma_tag_create(bus_get_dma_tag(dev), /* parent */
4381 1, 0, /* alignment, bounds */
4382 BUS_SPACE_MAXADDR, /* lowaddr */
4383 BUS_SPACE_MAXADDR, /* highaddr */
4384 NULL, NULL, /* filter, filterarg */
4385 MJUM9BYTES, /* maxsize */
4387 MJUM9BYTES, /* maxsegsize */
4389 NULL, /* lockfunc */
4393 device_printf(dev, "%s: bus_dma_tag_create failed %d\n",
4398 rxbuf = rxr->rx_buffers;
4399 for (int i = 0; i < adapter->num_rx_desc; i++, rxbuf++) {
4400 rxbuf = &rxr->rx_buffers[i];
4401 error = bus_dmamap_create(rxr->rxtag, 0, &rxbuf->map);
4403 device_printf(dev, "%s: bus_dmamap_create failed: %d\n",
4412 em_free_receive_structures(adapter);
4417 /*********************************************************************
4419 * Initialize a receive ring and its buffers.
4421 **********************************************************************/
4423 em_setup_receive_ring(struct rx_ring *rxr)
4425 struct adapter *adapter = rxr->adapter;
4426 struct em_rxbuffer *rxbuf;
4427 bus_dma_segment_t seg[1];
4428 int rsize, nsegs, error = 0;
4430 struct netmap_adapter *na = NA(adapter->ifp);
4431 struct netmap_slot *slot;
4435 /* Clear the ring contents */
4437 rsize = roundup2(adapter->num_rx_desc *
4438 sizeof(union e1000_rx_desc_extended), EM_DBA_ALIGN);
4439 bzero((void *)rxr->rx_base, rsize);
4441 slot = netmap_reset(na, NR_RX, 0, 0);
4445 ** Free current RX buffer structs and their mbufs
4447 for (int i = 0; i < adapter->num_rx_desc; i++) {
4448 rxbuf = &rxr->rx_buffers[i];
4449 if (rxbuf->m_head != NULL) {
4450 bus_dmamap_sync(rxr->rxtag, rxbuf->map,
4451 BUS_DMASYNC_POSTREAD);
4452 bus_dmamap_unload(rxr->rxtag, rxbuf->map);
4453 m_freem(rxbuf->m_head);
4454 rxbuf->m_head = NULL; /* mark as freed */
4458 /* Now replenish the mbufs */
4459 for (int j = 0; j != adapter->num_rx_desc; ++j) {
4460 rxbuf = &rxr->rx_buffers[j];
4463 int si = netmap_idx_n2k(&na->rx_rings[rxr->me], j);
4467 addr = PNMB(na, slot + si, &paddr);
4468 netmap_load_map(na, rxr->rxtag, rxbuf->map, addr);
4469 rxbuf->paddr = paddr;
4470 em_setup_rxdesc(&rxr->rx_base[j], rxbuf);
4473 #endif /* DEV_NETMAP */
4474 rxbuf->m_head = m_getjcl(M_NOWAIT, MT_DATA,
4475 M_PKTHDR, adapter->rx_mbuf_sz);
4476 if (rxbuf->m_head == NULL) {
4480 rxbuf->m_head->m_len = adapter->rx_mbuf_sz;
4481 rxbuf->m_head->m_flags &= ~M_HASFCS; /* we strip it */
4482 rxbuf->m_head->m_pkthdr.len = adapter->rx_mbuf_sz;
4484 /* Get the memory mapping */
4485 error = bus_dmamap_load_mbuf_sg(rxr->rxtag,
4486 rxbuf->map, rxbuf->m_head, seg,
4487 &nsegs, BUS_DMA_NOWAIT);
4489 m_freem(rxbuf->m_head);
4490 rxbuf->m_head = NULL;
4493 bus_dmamap_sync(rxr->rxtag,
4494 rxbuf->map, BUS_DMASYNC_PREREAD);
4496 rxbuf->paddr = seg[0].ds_addr;
4497 em_setup_rxdesc(&rxr->rx_base[j], rxbuf);
4499 rxr->next_to_check = 0;
4500 rxr->next_to_refresh = 0;
4501 bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
4502 BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
4509 /*********************************************************************
4511 * Initialize all receive rings.
4513 **********************************************************************/
4515 em_setup_receive_structures(struct adapter *adapter)
4517 struct rx_ring *rxr = adapter->rx_rings;
4520 for (q = 0; q < adapter->num_queues; q++, rxr++)
4521 if (em_setup_receive_ring(rxr))
4527 * Free RX buffers allocated so far, we will only handle
4528 * the rings that completed, the failing case will have
4529 * cleaned up for itself. 'q' failed, so its the terminus.
4531 for (int i = 0; i < q; ++i) {
4532 rxr = &adapter->rx_rings[i];
4533 for (int n = 0; n < adapter->num_rx_desc; n++) {
4534 struct em_rxbuffer *rxbuf;
4535 rxbuf = &rxr->rx_buffers[n];
4536 if (rxbuf->m_head != NULL) {
4537 bus_dmamap_sync(rxr->rxtag, rxbuf->map,
4538 BUS_DMASYNC_POSTREAD);
4539 bus_dmamap_unload(rxr->rxtag, rxbuf->map);
4540 m_freem(rxbuf->m_head);
4541 rxbuf->m_head = NULL;
4544 rxr->next_to_check = 0;
4545 rxr->next_to_refresh = 0;
4551 /*********************************************************************
4553 * Free all receive rings.
4555 **********************************************************************/
4557 em_free_receive_structures(struct adapter *adapter)
4559 struct rx_ring *rxr = adapter->rx_rings;
4561 for (int i = 0; i < adapter->num_queues; i++, rxr++) {
4562 em_free_receive_buffers(rxr);
4563 /* Free the ring memory as well */
4564 em_dma_free(adapter, &rxr->rxdma);
4565 EM_RX_LOCK_DESTROY(rxr);
4568 free(adapter->rx_rings, M_DEVBUF);
4572 /*********************************************************************
4574 * Free receive ring data structures
4576 **********************************************************************/
4578 em_free_receive_buffers(struct rx_ring *rxr)
4580 struct adapter *adapter = rxr->adapter;
4581 struct em_rxbuffer *rxbuf = NULL;
4583 INIT_DEBUGOUT("free_receive_buffers: begin");
4585 if (rxr->rx_buffers != NULL) {
4586 for (int i = 0; i < adapter->num_rx_desc; i++) {
4587 rxbuf = &rxr->rx_buffers[i];
4588 if (rxbuf->map != NULL) {
4589 bus_dmamap_sync(rxr->rxtag, rxbuf->map,
4590 BUS_DMASYNC_POSTREAD);
4591 bus_dmamap_unload(rxr->rxtag, rxbuf->map);
4592 bus_dmamap_destroy(rxr->rxtag, rxbuf->map);
4594 if (rxbuf->m_head != NULL) {
4595 m_freem(rxbuf->m_head);
4596 rxbuf->m_head = NULL;
4599 free(rxr->rx_buffers, M_DEVBUF);
4600 rxr->rx_buffers = NULL;
4601 rxr->next_to_check = 0;
4602 rxr->next_to_refresh = 0;
4605 if (rxr->rxtag != NULL) {
4606 bus_dma_tag_destroy(rxr->rxtag);
4614 /*********************************************************************
4616 * Enable receive unit.
4618 **********************************************************************/
4621 em_initialize_receive_unit(struct adapter *adapter)
4623 struct rx_ring *rxr = adapter->rx_rings;
4624 struct ifnet *ifp = adapter->ifp;
4625 struct e1000_hw *hw = &adapter->hw;
4626 u32 rctl, rxcsum, rfctl;
4628 INIT_DEBUGOUT("em_initialize_receive_units: begin");
4631 * Make sure receives are disabled while setting
4632 * up the descriptor ring
4634 rctl = E1000_READ_REG(hw, E1000_RCTL);
4635 /* Do not disable if ever enabled on this hardware */
4636 if ((hw->mac.type != e1000_82574) && (hw->mac.type != e1000_82583))
4637 E1000_WRITE_REG(hw, E1000_RCTL, rctl & ~E1000_RCTL_EN);
4639 /* Setup the Receive Control Register */
4640 rctl &= ~(3 << E1000_RCTL_MO_SHIFT);
4641 rctl |= E1000_RCTL_EN | E1000_RCTL_BAM |
4642 E1000_RCTL_LBM_NO | E1000_RCTL_RDMTS_HALF |
4643 (hw->mac.mc_filter_type << E1000_RCTL_MO_SHIFT);
4645 /* Do not store bad packets */
4646 rctl &= ~E1000_RCTL_SBP;
4648 /* Enable Long Packet receive */
4649 if (ifp->if_mtu > ETHERMTU)
4650 rctl |= E1000_RCTL_LPE;
4652 rctl &= ~E1000_RCTL_LPE;
4655 if (!em_disable_crc_stripping)
4656 rctl |= E1000_RCTL_SECRC;
4658 E1000_WRITE_REG(&adapter->hw, E1000_RADV,
4659 adapter->rx_abs_int_delay.value);
4661 E1000_WRITE_REG(&adapter->hw, E1000_RDTR,
4662 adapter->rx_int_delay.value);
4664 * Set the interrupt throttling rate. Value is calculated
4665 * as DEFAULT_ITR = 1/(MAX_INTS_PER_SEC * 256ns)
4667 E1000_WRITE_REG(hw, E1000_ITR, DEFAULT_ITR);
4669 /* Use extended rx descriptor formats */
4670 rfctl = E1000_READ_REG(hw, E1000_RFCTL);
4671 rfctl |= E1000_RFCTL_EXTEN;
4673 ** When using MSIX interrupts we need to throttle
4674 ** using the EITR register (82574 only)
4676 if (hw->mac.type == e1000_82574) {
4677 for (int i = 0; i < 4; i++)
4678 E1000_WRITE_REG(hw, E1000_EITR_82574(i),
4680 /* Disable accelerated acknowledge */
4681 rfctl |= E1000_RFCTL_ACK_DIS;
4683 E1000_WRITE_REG(hw, E1000_RFCTL, rfctl);
4685 rxcsum = E1000_READ_REG(hw, E1000_RXCSUM);
4686 if (ifp->if_capenable & IFCAP_RXCSUM) {
4687 #ifdef EM_MULTIQUEUE
4688 rxcsum |= E1000_RXCSUM_TUOFL |
4689 E1000_RXCSUM_IPOFL |
4692 rxcsum |= E1000_RXCSUM_TUOFL;
4695 rxcsum &= ~E1000_RXCSUM_TUOFL;
4697 E1000_WRITE_REG(hw, E1000_RXCSUM, rxcsum);
4699 #ifdef EM_MULTIQUEUE
4700 #define RSSKEYLEN 10
4701 if (adapter->num_queues > 1) {
4702 uint8_t rss_key[4 * RSSKEYLEN];
4709 arc4rand(rss_key, sizeof(rss_key), 0);
4710 for (i = 0; i < RSSKEYLEN; ++i) {
4713 rssrk = EM_RSSRK_VAL(rss_key, i);
4714 E1000_WRITE_REG(hw,E1000_RSSRK(i), rssrk);
4718 * Configure RSS redirect table in following fashion:
4719 * (hash & ring_cnt_mask) == rdr_table[(hash & rdr_table_mask)]
4721 for (i = 0; i < sizeof(reta); ++i) {
4724 q = (i % adapter->num_queues) << 7;
4725 reta |= q << (8 * i);
4728 for (i = 0; i < 32; ++i) {
4729 E1000_WRITE_REG(hw, E1000_RETA(i), reta);
4732 E1000_WRITE_REG(hw, E1000_MRQC, E1000_MRQC_RSS_ENABLE_2Q |
4733 E1000_MRQC_RSS_FIELD_IPV4_TCP |
4734 E1000_MRQC_RSS_FIELD_IPV4 |
4735 E1000_MRQC_RSS_FIELD_IPV6_TCP_EX |
4736 E1000_MRQC_RSS_FIELD_IPV6_EX |
4737 E1000_MRQC_RSS_FIELD_IPV6);
4741 ** XXX TEMPORARY WORKAROUND: on some systems with 82573
4742 ** long latencies are observed, like Lenovo X60. This
4743 ** change eliminates the problem, but since having positive
4744 ** values in RDTR is a known source of problems on other
4745 ** platforms another solution is being sought.
4747 if (hw->mac.type == e1000_82573)
4748 E1000_WRITE_REG(hw, E1000_RDTR, 0x20);
4750 for (int i = 0; i < adapter->num_queues; i++, rxr++) {
4751 /* Setup the Base and Length of the Rx Descriptor Ring */
4752 u64 bus_addr = rxr->rxdma.dma_paddr;
4753 u32 rdt = adapter->num_rx_desc - 1; /* default */
4755 E1000_WRITE_REG(hw, E1000_RDLEN(i),
4756 adapter->num_rx_desc * sizeof(union e1000_rx_desc_extended));
4757 E1000_WRITE_REG(hw, E1000_RDBAH(i), (u32)(bus_addr >> 32));
4758 E1000_WRITE_REG(hw, E1000_RDBAL(i), (u32)bus_addr);
4759 /* Setup the Head and Tail Descriptor Pointers */
4760 E1000_WRITE_REG(hw, E1000_RDH(i), 0);
4763 * an init() while a netmap client is active must
4764 * preserve the rx buffers passed to userspace.
4766 if (ifp->if_capenable & IFCAP_NETMAP)
4767 rdt -= nm_kr_rxspace(&NA(adapter->ifp)->rx_rings[i]);
4768 #endif /* DEV_NETMAP */
4769 E1000_WRITE_REG(hw, E1000_RDT(i), rdt);
4773 * Set PTHRESH for improved jumbo performance
4774 * According to 10.2.5.11 of Intel 82574 Datasheet,
4775 * RXDCTL(1) is written whenever RXDCTL(0) is written.
4776 * Only write to RXDCTL(1) if there is a need for different
4779 if (((adapter->hw.mac.type == e1000_ich9lan) ||
4780 (adapter->hw.mac.type == e1000_pch2lan) ||
4781 (adapter->hw.mac.type == e1000_ich10lan)) &&
4782 (ifp->if_mtu > ETHERMTU)) {
4783 u32 rxdctl = E1000_READ_REG(hw, E1000_RXDCTL(0));
4784 E1000_WRITE_REG(hw, E1000_RXDCTL(0), rxdctl | 3);
4785 } else if (adapter->hw.mac.type == e1000_82574) {
4786 for (int i = 0; i < adapter->num_queues; i++) {
4787 u32 rxdctl = E1000_READ_REG(hw, E1000_RXDCTL(i));
4789 rxdctl |= 0x20; /* PTHRESH */
4790 rxdctl |= 4 << 8; /* HTHRESH */
4791 rxdctl |= 4 << 16;/* WTHRESH */
4792 rxdctl |= 1 << 24; /* Switch to granularity */
4793 E1000_WRITE_REG(hw, E1000_RXDCTL(i), rxdctl);
4797 if (adapter->hw.mac.type >= e1000_pch2lan) {
4798 if (ifp->if_mtu > ETHERMTU)
4799 e1000_lv_jumbo_workaround_ich8lan(hw, TRUE);
4801 e1000_lv_jumbo_workaround_ich8lan(hw, FALSE);
4804 /* Make sure VLAN Filters are off */
4805 rctl &= ~E1000_RCTL_VFE;
4807 if (adapter->rx_mbuf_sz == MCLBYTES)
4808 rctl |= E1000_RCTL_SZ_2048;
4809 else if (adapter->rx_mbuf_sz == MJUMPAGESIZE)
4810 rctl |= E1000_RCTL_SZ_4096 | E1000_RCTL_BSEX;
4811 else if (adapter->rx_mbuf_sz > MJUMPAGESIZE)
4812 rctl |= E1000_RCTL_SZ_8192 | E1000_RCTL_BSEX;
4814 /* ensure we clear use DTYPE of 00 here */
4815 rctl &= ~0x00000C00;
4816 /* Write out the settings */
4817 E1000_WRITE_REG(hw, E1000_RCTL, rctl);
4823 /*********************************************************************
4825 * This routine executes in interrupt context. It replenishes
4826 * the mbufs in the descriptor and sends data which has been
4827 * dma'ed into host memory to upper layer.
4829 * We loop at most count times if count is > 0, or until done if
4832 * For polling we also now return the number of cleaned packets
4833 *********************************************************************/
4835 em_rxeof(struct rx_ring *rxr, int count, int *done)
4837 struct adapter *adapter = rxr->adapter;
4838 struct ifnet *ifp = adapter->ifp;
4839 struct mbuf *mp, *sendmp;
4842 int i, processed, rxdone = 0;
4844 union e1000_rx_desc_extended *cur;
4849 bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
4850 BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
4854 if (netmap_rx_irq(ifp, rxr->me, &processed)) {
4858 #endif /* DEV_NETMAP */
4860 for (i = rxr->next_to_check, processed = 0; count != 0;) {
4861 if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
4864 cur = &rxr->rx_base[i];
4865 status = le32toh(cur->wb.upper.status_error);
4868 if ((status & E1000_RXD_STAT_DD) == 0)
4871 len = le16toh(cur->wb.upper.length);
4872 eop = (status & E1000_RXD_STAT_EOP) != 0;
4874 if ((status & E1000_RXDEXT_ERR_FRAME_ERR_MASK) ||
4875 (rxr->discard == TRUE)) {
4876 adapter->dropped_pkts++;
4877 ++rxr->rx_discarded;
4878 if (!eop) /* Catch subsequent segs */
4879 rxr->discard = TRUE;
4881 rxr->discard = FALSE;
4882 em_rx_discard(rxr, i);
4885 bus_dmamap_unload(rxr->rxtag, rxr->rx_buffers[i].map);
4887 /* Assign correct length to the current fragment */
4888 mp = rxr->rx_buffers[i].m_head;
4891 /* Trigger for refresh */
4892 rxr->rx_buffers[i].m_head = NULL;
4894 /* First segment? */
4895 if (rxr->fmp == NULL) {
4896 mp->m_pkthdr.len = len;
4897 rxr->fmp = rxr->lmp = mp;
4899 /* Chain mbuf's together */
4900 mp->m_flags &= ~M_PKTHDR;
4901 rxr->lmp->m_next = mp;
4903 rxr->fmp->m_pkthdr.len += len;
4909 sendmp->m_pkthdr.rcvif = ifp;
4911 em_receive_checksum(status, sendmp);
4912 #ifndef __NO_STRICT_ALIGNMENT
4913 if (adapter->hw.mac.max_frame_size >
4914 (MCLBYTES - ETHER_ALIGN) &&
4915 em_fixup_rx(rxr) != 0)
4918 if (status & E1000_RXD_STAT_VP) {
4919 sendmp->m_pkthdr.ether_vtag =
4920 le16toh(cur->wb.upper.vlan);
4921 sendmp->m_flags |= M_VLANTAG;
4923 #ifndef __NO_STRICT_ALIGNMENT
4926 rxr->fmp = rxr->lmp = NULL;
4930 bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
4931 BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
4933 /* Zero out the receive descriptors status. */
4934 cur->wb.upper.status_error &= htole32(~0xFF);
4935 ++rxdone; /* cumulative for POLL */
4938 /* Advance our pointers to the next descriptor. */
4939 if (++i == adapter->num_rx_desc)
4942 /* Send to the stack */
4943 if (sendmp != NULL) {
4944 rxr->next_to_check = i;
4946 (*ifp->if_input)(ifp, sendmp);
4948 i = rxr->next_to_check;
4951 /* Only refresh mbufs every 8 descriptors */
4952 if (processed == 8) {
4953 em_refresh_mbufs(rxr, i);
4958 /* Catch any remaining refresh work */
4959 if (e1000_rx_unrefreshed(rxr))
4960 em_refresh_mbufs(rxr, i);
4962 rxr->next_to_check = i;
4967 return ((status & E1000_RXD_STAT_DD) ? TRUE : FALSE);
4970 static __inline void
4971 em_rx_discard(struct rx_ring *rxr, int i)
4973 struct em_rxbuffer *rbuf;
4975 rbuf = &rxr->rx_buffers[i];
4976 bus_dmamap_unload(rxr->rxtag, rbuf->map);
4978 /* Free any previous pieces */
4979 if (rxr->fmp != NULL) {
4980 rxr->fmp->m_flags |= M_PKTHDR;
4986 ** Free buffer and allow em_refresh_mbufs()
4987 ** to clean up and recharge buffer.
4990 m_free(rbuf->m_head);
4991 rbuf->m_head = NULL;
4996 #ifndef __NO_STRICT_ALIGNMENT
4998 * When jumbo frames are enabled we should realign entire payload on
4999 * architecures with strict alignment. This is serious design mistake of 8254x
5000 * as it nullifies DMA operations. 8254x just allows RX buffer size to be
5001 * 2048/4096/8192/16384. What we really want is 2048 - ETHER_ALIGN to align its
5002 * payload. On architecures without strict alignment restrictions 8254x still
5003 * performs unaligned memory access which would reduce the performance too.
5004 * To avoid copying over an entire frame to align, we allocate a new mbuf and
5005 * copy ethernet header to the new mbuf. The new mbuf is prepended into the
5006 * existing mbuf chain.
5008 * Be aware, best performance of the 8254x is achived only when jumbo frame is
5009 * not used at all on architectures with strict alignment.
5012 em_fixup_rx(struct rx_ring *rxr)
5014 struct adapter *adapter = rxr->adapter;
5020 if (m->m_len <= (MCLBYTES - ETHER_HDR_LEN)) {
5021 bcopy(m->m_data, m->m_data + ETHER_HDR_LEN, m->m_len);
5022 m->m_data += ETHER_HDR_LEN;
5024 MGETHDR(n, M_NOWAIT, MT_DATA);
5026 bcopy(m->m_data, n->m_data, ETHER_HDR_LEN);
5027 m->m_data += ETHER_HDR_LEN;
5028 m->m_len -= ETHER_HDR_LEN;
5029 n->m_len = ETHER_HDR_LEN;
5030 M_MOVE_PKTHDR(n, m);
5034 adapter->dropped_pkts++;
5046 em_setup_rxdesc(union e1000_rx_desc_extended *rxd, const struct em_rxbuffer *rxbuf)
5048 rxd->read.buffer_addr = htole64(rxbuf->paddr);
5049 /* DD bits must be cleared */
5050 rxd->wb.upper.status_error= 0;
5053 /*********************************************************************
5055 * Verify that the hardware indicated that the checksum is valid.
5056 * Inform the stack about the status of checksum so that stack
5057 * doesn't spend time verifying the checksum.
5059 *********************************************************************/
5061 em_receive_checksum(uint32_t status, struct mbuf *mp)
5063 mp->m_pkthdr.csum_flags = 0;
5065 /* Ignore Checksum bit is set */
5066 if (status & E1000_RXD_STAT_IXSM)
5069 /* If the IP checksum exists and there is no IP Checksum error */
5070 if ((status & (E1000_RXD_STAT_IPCS | E1000_RXDEXT_STATERR_IPE)) ==
5071 E1000_RXD_STAT_IPCS) {
5072 mp->m_pkthdr.csum_flags = (CSUM_IP_CHECKED | CSUM_IP_VALID);
5075 /* TCP or UDP checksum */
5076 if ((status & (E1000_RXD_STAT_TCPCS | E1000_RXDEXT_STATERR_TCPE)) ==
5077 E1000_RXD_STAT_TCPCS) {
5078 mp->m_pkthdr.csum_flags |= (CSUM_DATA_VALID | CSUM_PSEUDO_HDR);
5079 mp->m_pkthdr.csum_data = htons(0xffff);
5081 if (status & E1000_RXD_STAT_UDPCS) {
5082 mp->m_pkthdr.csum_flags |= (CSUM_DATA_VALID | CSUM_PSEUDO_HDR);
5083 mp->m_pkthdr.csum_data = htons(0xffff);
5088 * This routine is run via an vlan
5092 em_register_vlan(void *arg, struct ifnet *ifp, u16 vtag)
5094 struct adapter *adapter = ifp->if_softc;
5097 if (ifp->if_softc != arg) /* Not our event */
5100 if ((vtag == 0) || (vtag > 4095)) /* Invalid ID */
5103 EM_CORE_LOCK(adapter);
5104 index = (vtag >> 5) & 0x7F;
5106 adapter->shadow_vfta[index] |= (1 << bit);
5107 ++adapter->num_vlans;
5108 /* Re-init to load the changes */
5109 if (ifp->if_capenable & IFCAP_VLAN_HWFILTER)
5110 em_init_locked(adapter);
5111 EM_CORE_UNLOCK(adapter);
5115 * This routine is run via an vlan
5119 em_unregister_vlan(void *arg, struct ifnet *ifp, u16 vtag)
5121 struct adapter *adapter = ifp->if_softc;
5124 if (ifp->if_softc != arg)
5127 if ((vtag == 0) || (vtag > 4095)) /* Invalid */
5130 EM_CORE_LOCK(adapter);
5131 index = (vtag >> 5) & 0x7F;
5133 adapter->shadow_vfta[index] &= ~(1 << bit);
5134 --adapter->num_vlans;
5135 /* Re-init to load the changes */
5136 if (ifp->if_capenable & IFCAP_VLAN_HWFILTER)
5137 em_init_locked(adapter);
5138 EM_CORE_UNLOCK(adapter);
5142 em_setup_vlan_hw_support(struct adapter *adapter)
5144 struct e1000_hw *hw = &adapter->hw;
5148 ** We get here thru init_locked, meaning
5149 ** a soft reset, this has already cleared
5150 ** the VFTA and other state, so if there
5151 ** have been no vlan's registered do nothing.
5153 if (adapter->num_vlans == 0)
5157 ** A soft reset zero's out the VFTA, so
5158 ** we need to repopulate it now.
5160 for (int i = 0; i < EM_VFTA_SIZE; i++)
5161 if (adapter->shadow_vfta[i] != 0)
5162 E1000_WRITE_REG_ARRAY(hw, E1000_VFTA,
5163 i, adapter->shadow_vfta[i]);
5165 reg = E1000_READ_REG(hw, E1000_CTRL);
5166 reg |= E1000_CTRL_VME;
5167 E1000_WRITE_REG(hw, E1000_CTRL, reg);
5169 /* Enable the Filter Table */
5170 reg = E1000_READ_REG(hw, E1000_RCTL);
5171 reg &= ~E1000_RCTL_CFIEN;
5172 reg |= E1000_RCTL_VFE;
5173 E1000_WRITE_REG(hw, E1000_RCTL, reg);
5177 em_enable_intr(struct adapter *adapter)
5179 struct e1000_hw *hw = &adapter->hw;
5180 u32 ims_mask = IMS_ENABLE_MASK;
5182 if (hw->mac.type == e1000_82574) {
5183 E1000_WRITE_REG(hw, EM_EIAC, EM_MSIX_MASK);
5184 ims_mask |= EM_MSIX_MASK;
5186 E1000_WRITE_REG(hw, E1000_IMS, ims_mask);
5190 em_disable_intr(struct adapter *adapter)
5192 struct e1000_hw *hw = &adapter->hw;
5194 if (hw->mac.type == e1000_82574)
5195 E1000_WRITE_REG(hw, EM_EIAC, 0);
5196 E1000_WRITE_REG(&adapter->hw, E1000_IMC, 0xffffffff);
5200 * Bit of a misnomer, what this really means is
5201 * to enable OS management of the system... aka
5202 * to disable special hardware management features
5205 em_init_manageability(struct adapter *adapter)
5207 /* A shared code workaround */
5208 #define E1000_82542_MANC2H E1000_MANC2H
5209 if (adapter->has_manage) {
5210 int manc2h = E1000_READ_REG(&adapter->hw, E1000_MANC2H);
5211 int manc = E1000_READ_REG(&adapter->hw, E1000_MANC);
5213 /* disable hardware interception of ARP */
5214 manc &= ~(E1000_MANC_ARP_EN);
5216 /* enable receiving management packets to the host */
5217 manc |= E1000_MANC_EN_MNG2HOST;
5218 #define E1000_MNG2HOST_PORT_623 (1 << 5)
5219 #define E1000_MNG2HOST_PORT_664 (1 << 6)
5220 manc2h |= E1000_MNG2HOST_PORT_623;
5221 manc2h |= E1000_MNG2HOST_PORT_664;
5222 E1000_WRITE_REG(&adapter->hw, E1000_MANC2H, manc2h);
5223 E1000_WRITE_REG(&adapter->hw, E1000_MANC, manc);
5228 * Give control back to hardware management
5229 * controller if there is one.
5232 em_release_manageability(struct adapter *adapter)
5234 if (adapter->has_manage) {
5235 int manc = E1000_READ_REG(&adapter->hw, E1000_MANC);
5237 /* re-enable hardware interception of ARP */
5238 manc |= E1000_MANC_ARP_EN;
5239 manc &= ~E1000_MANC_EN_MNG2HOST;
5241 E1000_WRITE_REG(&adapter->hw, E1000_MANC, manc);
5246 * em_get_hw_control sets the {CTRL_EXT|FWSM}:DRV_LOAD bit.
5247 * For ASF and Pass Through versions of f/w this means
5248 * that the driver is loaded. For AMT version type f/w
5249 * this means that the network i/f is open.
5252 em_get_hw_control(struct adapter *adapter)
5256 if (adapter->hw.mac.type == e1000_82573) {
5257 swsm = E1000_READ_REG(&adapter->hw, E1000_SWSM);
5258 E1000_WRITE_REG(&adapter->hw, E1000_SWSM,
5259 swsm | E1000_SWSM_DRV_LOAD);
5263 ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
5264 E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT,
5265 ctrl_ext | E1000_CTRL_EXT_DRV_LOAD);
5270 * em_release_hw_control resets {CTRL_EXT|FWSM}:DRV_LOAD bit.
5271 * For ASF and Pass Through versions of f/w this means that
5272 * the driver is no longer loaded. For AMT versions of the
5273 * f/w this means that the network i/f is closed.
5276 em_release_hw_control(struct adapter *adapter)
5280 if (!adapter->has_manage)
5283 if (adapter->hw.mac.type == e1000_82573) {
5284 swsm = E1000_READ_REG(&adapter->hw, E1000_SWSM);
5285 E1000_WRITE_REG(&adapter->hw, E1000_SWSM,
5286 swsm & ~E1000_SWSM_DRV_LOAD);
5290 ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
5291 E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT,
5292 ctrl_ext & ~E1000_CTRL_EXT_DRV_LOAD);
5297 em_is_valid_ether_addr(u8 *addr)
5299 char zero_addr[6] = { 0, 0, 0, 0, 0, 0 };
5301 if ((addr[0] & 1) || (!bcmp(addr, zero_addr, ETHER_ADDR_LEN))) {
5309 ** Parse the interface capabilities with regard
5310 ** to both system management and wake-on-lan for
5314 em_get_wakeup(device_t dev)
5316 struct adapter *adapter = device_get_softc(dev);
5317 u16 eeprom_data = 0, device_id, apme_mask;
5319 adapter->has_manage = e1000_enable_mng_pass_thru(&adapter->hw);
5320 apme_mask = EM_EEPROM_APME;
5322 switch (adapter->hw.mac.type) {
5325 adapter->has_amt = TRUE;
5329 case e1000_80003es2lan:
5330 if (adapter->hw.bus.func == 1) {
5331 e1000_read_nvm(&adapter->hw,
5332 NVM_INIT_CONTROL3_PORT_B, 1, &eeprom_data);
5335 e1000_read_nvm(&adapter->hw,
5336 NVM_INIT_CONTROL3_PORT_A, 1, &eeprom_data);
5340 case e1000_ich10lan:
5345 apme_mask = E1000_WUC_APME;
5346 adapter->has_amt = TRUE;
5347 eeprom_data = E1000_READ_REG(&adapter->hw, E1000_WUC);
5350 e1000_read_nvm(&adapter->hw,
5351 NVM_INIT_CONTROL3_PORT_A, 1, &eeprom_data);
5354 if (eeprom_data & apme_mask)
5355 adapter->wol = (E1000_WUFC_MAG | E1000_WUFC_MC);
5357 * We have the eeprom settings, now apply the special cases
5358 * where the eeprom may be wrong or the board won't support
5359 * wake on lan on a particular port
5361 device_id = pci_get_device(dev);
5362 switch (device_id) {
5363 case E1000_DEV_ID_82571EB_FIBER:
5364 /* Wake events only supported on port A for dual fiber
5365 * regardless of eeprom setting */
5366 if (E1000_READ_REG(&adapter->hw, E1000_STATUS) &
5367 E1000_STATUS_FUNC_1)
5370 case E1000_DEV_ID_82571EB_QUAD_COPPER:
5371 case E1000_DEV_ID_82571EB_QUAD_FIBER:
5372 case E1000_DEV_ID_82571EB_QUAD_COPPER_LP:
5373 /* if quad port adapter, disable WoL on all but port A */
5374 if (global_quad_port_a != 0)
5376 /* Reset for multiple quad port adapters */
5377 if (++global_quad_port_a == 4)
5378 global_quad_port_a = 0;
5386 * Enable PCI Wake On Lan capability
5389 em_enable_wakeup(device_t dev)
5391 struct adapter *adapter = device_get_softc(dev);
5392 struct ifnet *ifp = adapter->ifp;
5394 u32 pmc, ctrl, ctrl_ext, rctl;
5397 if (pci_find_cap(dev, PCIY_PMG, &pmc) != 0)
5401 ** Determine type of Wakeup: note that wol
5402 ** is set with all bits on by default.
5404 if ((ifp->if_capenable & IFCAP_WOL_MAGIC) == 0)
5405 adapter->wol &= ~E1000_WUFC_MAG;
5407 if ((ifp->if_capenable & IFCAP_WOL_MCAST) == 0)
5408 adapter->wol &= ~E1000_WUFC_MC;
5410 rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
5411 rctl |= E1000_RCTL_MPE;
5412 E1000_WRITE_REG(&adapter->hw, E1000_RCTL, rctl);
5415 if (!(adapter->wol & (E1000_WUFC_EX | E1000_WUFC_MAG | E1000_WUFC_MC)))
5418 /* Advertise the wakeup capability */
5419 ctrl = E1000_READ_REG(&adapter->hw, E1000_CTRL);
5420 ctrl |= (E1000_CTRL_SWDPIN2 | E1000_CTRL_SWDPIN3);
5421 E1000_WRITE_REG(&adapter->hw, E1000_CTRL, ctrl);
5423 /* Keep the laser running on Fiber adapters */
5424 if (adapter->hw.phy.media_type == e1000_media_type_fiber ||
5425 adapter->hw.phy.media_type == e1000_media_type_internal_serdes) {
5426 ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
5427 ctrl_ext |= E1000_CTRL_EXT_SDP3_DATA;
5428 E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT, ctrl_ext);
5431 if ((adapter->hw.mac.type == e1000_ich8lan) ||
5432 (adapter->hw.mac.type == e1000_pchlan) ||
5433 (adapter->hw.mac.type == e1000_ich9lan) ||
5434 (adapter->hw.mac.type == e1000_ich10lan))
5435 e1000_suspend_workarounds_ich8lan(&adapter->hw);
5437 if ((adapter->hw.mac.type == e1000_pchlan) ||
5438 (adapter->hw.mac.type == e1000_pch2lan) ||
5439 (adapter->hw.mac.type == e1000_pch_lpt) ||
5440 (adapter->hw.mac.type == e1000_pch_spt)) {
5441 error = em_enable_phy_wakeup(adapter);
5445 /* Enable wakeup by the MAC */
5446 E1000_WRITE_REG(&adapter->hw, E1000_WUC, E1000_WUC_PME_EN);
5447 E1000_WRITE_REG(&adapter->hw, E1000_WUFC, adapter->wol);
5450 if (adapter->hw.phy.type == e1000_phy_igp_3)
5451 e1000_igp3_phy_powerdown_workaround_ich8lan(&adapter->hw);
5454 status = pci_read_config(dev, pmc + PCIR_POWER_STATUS, 2);
5455 status &= ~(PCIM_PSTAT_PME | PCIM_PSTAT_PMEENABLE);
5456 if (!error && (ifp->if_capenable & IFCAP_WOL))
5457 status |= PCIM_PSTAT_PME | PCIM_PSTAT_PMEENABLE;
5458 pci_write_config(dev, pmc + PCIR_POWER_STATUS, status, 2);
5464 ** WOL in the newer chipset interfaces (pchlan)
5465 ** require thing to be copied into the phy
5468 em_enable_phy_wakeup(struct adapter *adapter)
5470 struct e1000_hw *hw = &adapter->hw;
5474 /* copy MAC RARs to PHY RARs */
5475 e1000_copy_rx_addrs_to_phy_ich8lan(hw);
5477 /* copy MAC MTA to PHY MTA */
5478 for (int i = 0; i < adapter->hw.mac.mta_reg_count; i++) {
5479 mreg = E1000_READ_REG_ARRAY(hw, E1000_MTA, i);
5480 e1000_write_phy_reg(hw, BM_MTA(i), (u16)(mreg & 0xFFFF));
5481 e1000_write_phy_reg(hw, BM_MTA(i) + 1,
5482 (u16)((mreg >> 16) & 0xFFFF));
5485 /* configure PHY Rx Control register */
5486 e1000_read_phy_reg(&adapter->hw, BM_RCTL, &preg);
5487 mreg = E1000_READ_REG(hw, E1000_RCTL);
5488 if (mreg & E1000_RCTL_UPE)
5489 preg |= BM_RCTL_UPE;
5490 if (mreg & E1000_RCTL_MPE)
5491 preg |= BM_RCTL_MPE;
5492 preg &= ~(BM_RCTL_MO_MASK);
5493 if (mreg & E1000_RCTL_MO_3)
5494 preg |= (((mreg & E1000_RCTL_MO_3) >> E1000_RCTL_MO_SHIFT)
5495 << BM_RCTL_MO_SHIFT);
5496 if (mreg & E1000_RCTL_BAM)
5497 preg |= BM_RCTL_BAM;
5498 if (mreg & E1000_RCTL_PMCF)
5499 preg |= BM_RCTL_PMCF;
5500 mreg = E1000_READ_REG(hw, E1000_CTRL);
5501 if (mreg & E1000_CTRL_RFCE)
5502 preg |= BM_RCTL_RFCE;
5503 e1000_write_phy_reg(&adapter->hw, BM_RCTL, preg);
5505 /* enable PHY wakeup in MAC register */
5506 E1000_WRITE_REG(hw, E1000_WUC,
5507 E1000_WUC_PHY_WAKE | E1000_WUC_PME_EN);
5508 E1000_WRITE_REG(hw, E1000_WUFC, adapter->wol);
5510 /* configure and enable PHY wakeup in PHY registers */
5511 e1000_write_phy_reg(&adapter->hw, BM_WUFC, adapter->wol);
5512 e1000_write_phy_reg(&adapter->hw, BM_WUC, E1000_WUC_PME_EN);
5514 /* activate PHY wakeup */
5515 ret = hw->phy.ops.acquire(hw);
5517 printf("Could not acquire PHY\n");
5520 e1000_write_phy_reg_mdic(hw, IGP01E1000_PHY_PAGE_SELECT,
5521 (BM_WUC_ENABLE_PAGE << IGP_PAGE_SHIFT));
5522 ret = e1000_read_phy_reg_mdic(hw, BM_WUC_ENABLE_REG, &preg);
5524 printf("Could not read PHY page 769\n");
5527 preg |= BM_WUC_ENABLE_BIT | BM_WUC_HOST_WU_BIT;
5528 ret = e1000_write_phy_reg_mdic(hw, BM_WUC_ENABLE_REG, preg);
5530 printf("Could not set PHY Host Wakeup bit\n");
5532 hw->phy.ops.release(hw);
5538 em_led_func(void *arg, int onoff)
5540 struct adapter *adapter = arg;
5542 EM_CORE_LOCK(adapter);
5544 e1000_setup_led(&adapter->hw);
5545 e1000_led_on(&adapter->hw);
5547 e1000_led_off(&adapter->hw);
5548 e1000_cleanup_led(&adapter->hw);
5550 EM_CORE_UNLOCK(adapter);
5554 ** Disable the L0S and L1 LINK states
5557 em_disable_aspm(struct adapter *adapter)
5560 u16 link_cap,link_ctrl;
5561 device_t dev = adapter->dev;
5563 switch (adapter->hw.mac.type) {
5571 if (pci_find_cap(dev, PCIY_EXPRESS, &base) != 0)
5573 reg = base + PCIER_LINK_CAP;
5574 link_cap = pci_read_config(dev, reg, 2);
5575 if ((link_cap & PCIEM_LINK_CAP_ASPM) == 0)
5577 reg = base + PCIER_LINK_CTL;
5578 link_ctrl = pci_read_config(dev, reg, 2);
5579 link_ctrl &= ~PCIEM_LINK_CTL_ASPMC;
5580 pci_write_config(dev, reg, link_ctrl, 2);
5584 /**********************************************************************
5586 * Update the board statistics counters.
5588 **********************************************************************/
5590 em_update_stats_counters(struct adapter *adapter)
5594 if(adapter->hw.phy.media_type == e1000_media_type_copper ||
5595 (E1000_READ_REG(&adapter->hw, E1000_STATUS) & E1000_STATUS_LU)) {
5596 adapter->stats.symerrs += E1000_READ_REG(&adapter->hw, E1000_SYMERRS);
5597 adapter->stats.sec += E1000_READ_REG(&adapter->hw, E1000_SEC);
5599 adapter->stats.crcerrs += E1000_READ_REG(&adapter->hw, E1000_CRCERRS);
5600 adapter->stats.mpc += E1000_READ_REG(&adapter->hw, E1000_MPC);
5601 adapter->stats.scc += E1000_READ_REG(&adapter->hw, E1000_SCC);
5602 adapter->stats.ecol += E1000_READ_REG(&adapter->hw, E1000_ECOL);
5604 adapter->stats.mcc += E1000_READ_REG(&adapter->hw, E1000_MCC);
5605 adapter->stats.latecol += E1000_READ_REG(&adapter->hw, E1000_LATECOL);
5606 adapter->stats.colc += E1000_READ_REG(&adapter->hw, E1000_COLC);
5607 adapter->stats.dc += E1000_READ_REG(&adapter->hw, E1000_DC);
5608 adapter->stats.rlec += E1000_READ_REG(&adapter->hw, E1000_RLEC);
5609 adapter->stats.xonrxc += E1000_READ_REG(&adapter->hw, E1000_XONRXC);
5610 adapter->stats.xontxc += E1000_READ_REG(&adapter->hw, E1000_XONTXC);
5611 adapter->stats.xoffrxc += E1000_READ_REG(&adapter->hw, E1000_XOFFRXC);
5612 adapter->stats.xofftxc += E1000_READ_REG(&adapter->hw, E1000_XOFFTXC);
5613 adapter->stats.fcruc += E1000_READ_REG(&adapter->hw, E1000_FCRUC);
5614 adapter->stats.prc64 += E1000_READ_REG(&adapter->hw, E1000_PRC64);
5615 adapter->stats.prc127 += E1000_READ_REG(&adapter->hw, E1000_PRC127);
5616 adapter->stats.prc255 += E1000_READ_REG(&adapter->hw, E1000_PRC255);
5617 adapter->stats.prc511 += E1000_READ_REG(&adapter->hw, E1000_PRC511);
5618 adapter->stats.prc1023 += E1000_READ_REG(&adapter->hw, E1000_PRC1023);
5619 adapter->stats.prc1522 += E1000_READ_REG(&adapter->hw, E1000_PRC1522);
5620 adapter->stats.gprc += E1000_READ_REG(&adapter->hw, E1000_GPRC);
5621 adapter->stats.bprc += E1000_READ_REG(&adapter->hw, E1000_BPRC);
5622 adapter->stats.mprc += E1000_READ_REG(&adapter->hw, E1000_MPRC);
5623 adapter->stats.gptc += E1000_READ_REG(&adapter->hw, E1000_GPTC);
5625 /* For the 64-bit byte counters the low dword must be read first. */
5626 /* Both registers clear on the read of the high dword */
5628 adapter->stats.gorc += E1000_READ_REG(&adapter->hw, E1000_GORCL) +
5629 ((u64)E1000_READ_REG(&adapter->hw, E1000_GORCH) << 32);
5630 adapter->stats.gotc += E1000_READ_REG(&adapter->hw, E1000_GOTCL) +
5631 ((u64)E1000_READ_REG(&adapter->hw, E1000_GOTCH) << 32);
5633 adapter->stats.rnbc += E1000_READ_REG(&adapter->hw, E1000_RNBC);
5634 adapter->stats.ruc += E1000_READ_REG(&adapter->hw, E1000_RUC);
5635 adapter->stats.rfc += E1000_READ_REG(&adapter->hw, E1000_RFC);
5636 adapter->stats.roc += E1000_READ_REG(&adapter->hw, E1000_ROC);
5637 adapter->stats.rjc += E1000_READ_REG(&adapter->hw, E1000_RJC);
5639 adapter->stats.tor += E1000_READ_REG(&adapter->hw, E1000_TORH);
5640 adapter->stats.tot += E1000_READ_REG(&adapter->hw, E1000_TOTH);
5642 adapter->stats.tpr += E1000_READ_REG(&adapter->hw, E1000_TPR);
5643 adapter->stats.tpt += E1000_READ_REG(&adapter->hw, E1000_TPT);
5644 adapter->stats.ptc64 += E1000_READ_REG(&adapter->hw, E1000_PTC64);
5645 adapter->stats.ptc127 += E1000_READ_REG(&adapter->hw, E1000_PTC127);
5646 adapter->stats.ptc255 += E1000_READ_REG(&adapter->hw, E1000_PTC255);
5647 adapter->stats.ptc511 += E1000_READ_REG(&adapter->hw, E1000_PTC511);
5648 adapter->stats.ptc1023 += E1000_READ_REG(&adapter->hw, E1000_PTC1023);
5649 adapter->stats.ptc1522 += E1000_READ_REG(&adapter->hw, E1000_PTC1522);
5650 adapter->stats.mptc += E1000_READ_REG(&adapter->hw, E1000_MPTC);
5651 adapter->stats.bptc += E1000_READ_REG(&adapter->hw, E1000_BPTC);
5653 /* Interrupt Counts */
5655 adapter->stats.iac += E1000_READ_REG(&adapter->hw, E1000_IAC);
5656 adapter->stats.icrxptc += E1000_READ_REG(&adapter->hw, E1000_ICRXPTC);
5657 adapter->stats.icrxatc += E1000_READ_REG(&adapter->hw, E1000_ICRXATC);
5658 adapter->stats.ictxptc += E1000_READ_REG(&adapter->hw, E1000_ICTXPTC);
5659 adapter->stats.ictxatc += E1000_READ_REG(&adapter->hw, E1000_ICTXATC);
5660 adapter->stats.ictxqec += E1000_READ_REG(&adapter->hw, E1000_ICTXQEC);
5661 adapter->stats.ictxqmtc += E1000_READ_REG(&adapter->hw, E1000_ICTXQMTC);
5662 adapter->stats.icrxdmtc += E1000_READ_REG(&adapter->hw, E1000_ICRXDMTC);
5663 adapter->stats.icrxoc += E1000_READ_REG(&adapter->hw, E1000_ICRXOC);
5665 if (adapter->hw.mac.type >= e1000_82543) {
5666 adapter->stats.algnerrc +=
5667 E1000_READ_REG(&adapter->hw, E1000_ALGNERRC);
5668 adapter->stats.rxerrc +=
5669 E1000_READ_REG(&adapter->hw, E1000_RXERRC);
5670 adapter->stats.tncrs +=
5671 E1000_READ_REG(&adapter->hw, E1000_TNCRS);
5672 adapter->stats.cexterr +=
5673 E1000_READ_REG(&adapter->hw, E1000_CEXTERR);
5674 adapter->stats.tsctc +=
5675 E1000_READ_REG(&adapter->hw, E1000_TSCTC);
5676 adapter->stats.tsctfc +=
5677 E1000_READ_REG(&adapter->hw, E1000_TSCTFC);
5681 ifp->if_collisions = adapter->stats.colc;
5684 ifp->if_ierrors = adapter->dropped_pkts + adapter->stats.rxerrc +
5685 adapter->stats.crcerrs + adapter->stats.algnerrc +
5686 adapter->stats.ruc + adapter->stats.roc +
5687 adapter->stats.mpc + adapter->stats.cexterr;
5690 ifp->if_oerrors = adapter->stats.ecol +
5691 adapter->stats.latecol + adapter->watchdog_events;
5694 /* Export a single 32-bit register via a read-only sysctl. */
5696 em_sysctl_reg_handler(SYSCTL_HANDLER_ARGS)
5698 struct adapter *adapter;
5701 adapter = oidp->oid_arg1;
5702 val = E1000_READ_REG(&adapter->hw, oidp->oid_arg2);
5703 return (sysctl_handle_int(oidp, &val, 0, req));
5707 * Add sysctl variables, one per statistic, to the system.
5710 em_add_hw_stats(struct adapter *adapter)
5712 device_t dev = adapter->dev;
5714 struct tx_ring *txr = adapter->tx_rings;
5715 struct rx_ring *rxr = adapter->rx_rings;
5717 struct sysctl_ctx_list *ctx = device_get_sysctl_ctx(dev);
5718 struct sysctl_oid *tree = device_get_sysctl_tree(dev);
5719 struct sysctl_oid_list *child = SYSCTL_CHILDREN(tree);
5720 struct e1000_hw_stats *stats = &adapter->stats;
5722 struct sysctl_oid *stat_node, *queue_node, *int_node;
5723 struct sysctl_oid_list *stat_list, *queue_list, *int_list;
5725 #define QUEUE_NAME_LEN 32
5726 char namebuf[QUEUE_NAME_LEN];
5728 /* Driver Statistics */
5729 SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "dropped",
5730 CTLFLAG_RD, &adapter->dropped_pkts,
5731 "Driver dropped packets");
5732 SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "link_irq",
5733 CTLFLAG_RD, &adapter->link_irq,
5734 "Link MSIX IRQ Handled");
5735 SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "mbuf_defrag_fail",
5736 CTLFLAG_RD, &adapter->mbuf_defrag_failed,
5737 "Defragmenting mbuf chain failed");
5738 SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "tx_dma_fail",
5739 CTLFLAG_RD, &adapter->no_tx_dma_setup,
5740 "Driver tx dma failure in xmit");
5741 SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "rx_overruns",
5742 CTLFLAG_RD, &adapter->rx_overruns,
5744 SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "watchdog_timeouts",
5745 CTLFLAG_RD, &adapter->watchdog_events,
5746 "Watchdog timeouts");
5748 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "device_control",
5749 CTLTYPE_UINT | CTLFLAG_RD, adapter, E1000_CTRL,
5750 em_sysctl_reg_handler, "IU",
5751 "Device Control Register");
5752 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "rx_control",
5753 CTLTYPE_UINT | CTLFLAG_RD, adapter, E1000_RCTL,
5754 em_sysctl_reg_handler, "IU",
5755 "Receiver Control Register");
5756 SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "fc_high_water",
5757 CTLFLAG_RD, &adapter->hw.fc.high_water, 0,
5758 "Flow Control High Watermark");
5759 SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "fc_low_water",
5760 CTLFLAG_RD, &adapter->hw.fc.low_water, 0,
5761 "Flow Control Low Watermark");
5763 for (int i = 0; i < adapter->num_queues; i++, txr++, rxr++) {
5764 snprintf(namebuf, QUEUE_NAME_LEN, "queue_tx_%d", i);
5765 queue_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, namebuf,
5766 CTLFLAG_RD, NULL, "TX Queue Name");
5767 queue_list = SYSCTL_CHILDREN(queue_node);
5769 SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "txd_head",
5770 CTLTYPE_UINT | CTLFLAG_RD, adapter,
5772 em_sysctl_reg_handler, "IU",
5773 "Transmit Descriptor Head");
5774 SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "txd_tail",
5775 CTLTYPE_UINT | CTLFLAG_RD, adapter,
5777 em_sysctl_reg_handler, "IU",
5778 "Transmit Descriptor Tail");
5779 SYSCTL_ADD_ULONG(ctx, queue_list, OID_AUTO, "tx_irq",
5780 CTLFLAG_RD, &txr->tx_irq,
5781 "Queue MSI-X Transmit Interrupts");
5782 SYSCTL_ADD_ULONG(ctx, queue_list, OID_AUTO, "no_desc_avail",
5783 CTLFLAG_RD, &txr->no_desc_avail,
5784 "Queue No Descriptor Available");
5786 snprintf(namebuf, QUEUE_NAME_LEN, "queue_rx_%d", i);
5787 queue_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, namebuf,
5788 CTLFLAG_RD, NULL, "RX Queue Name");
5789 queue_list = SYSCTL_CHILDREN(queue_node);
5791 SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "rxd_head",
5792 CTLTYPE_UINT | CTLFLAG_RD, adapter,
5794 em_sysctl_reg_handler, "IU",
5795 "Receive Descriptor Head");
5796 SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "rxd_tail",
5797 CTLTYPE_UINT | CTLFLAG_RD, adapter,
5799 em_sysctl_reg_handler, "IU",
5800 "Receive Descriptor Tail");
5801 SYSCTL_ADD_ULONG(ctx, queue_list, OID_AUTO, "rx_irq",
5802 CTLFLAG_RD, &rxr->rx_irq,
5803 "Queue MSI-X Receive Interrupts");
5806 /* MAC stats get their own sub node */
5808 stat_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "mac_stats",
5809 CTLFLAG_RD, NULL, "Statistics");
5810 stat_list = SYSCTL_CHILDREN(stat_node);
5812 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "excess_coll",
5813 CTLFLAG_RD, &stats->ecol,
5814 "Excessive collisions");
5815 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "single_coll",
5816 CTLFLAG_RD, &stats->scc,
5817 "Single collisions");
5818 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "multiple_coll",
5819 CTLFLAG_RD, &stats->mcc,
5820 "Multiple collisions");
5821 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "late_coll",
5822 CTLFLAG_RD, &stats->latecol,
5824 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "collision_count",
5825 CTLFLAG_RD, &stats->colc,
5827 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "symbol_errors",
5828 CTLFLAG_RD, &adapter->stats.symerrs,
5830 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "sequence_errors",
5831 CTLFLAG_RD, &adapter->stats.sec,
5833 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "defer_count",
5834 CTLFLAG_RD, &adapter->stats.dc,
5836 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "missed_packets",
5837 CTLFLAG_RD, &adapter->stats.mpc,
5839 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_no_buff",
5840 CTLFLAG_RD, &adapter->stats.rnbc,
5841 "Receive No Buffers");
5842 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_undersize",
5843 CTLFLAG_RD, &adapter->stats.ruc,
5844 "Receive Undersize");
5845 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_fragmented",
5846 CTLFLAG_RD, &adapter->stats.rfc,
5847 "Fragmented Packets Received ");
5848 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_oversize",
5849 CTLFLAG_RD, &adapter->stats.roc,
5850 "Oversized Packets Received");
5851 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_jabber",
5852 CTLFLAG_RD, &adapter->stats.rjc,
5854 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_errs",
5855 CTLFLAG_RD, &adapter->stats.rxerrc,
5857 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "crc_errs",
5858 CTLFLAG_RD, &adapter->stats.crcerrs,
5860 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "alignment_errs",
5861 CTLFLAG_RD, &adapter->stats.algnerrc,
5862 "Alignment Errors");
5863 /* On 82575 these are collision counts */
5864 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "coll_ext_errs",
5865 CTLFLAG_RD, &adapter->stats.cexterr,
5866 "Collision/Carrier extension errors");
5867 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xon_recvd",
5868 CTLFLAG_RD, &adapter->stats.xonrxc,
5870 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xon_txd",
5871 CTLFLAG_RD, &adapter->stats.xontxc,
5873 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xoff_recvd",
5874 CTLFLAG_RD, &adapter->stats.xoffrxc,
5876 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xoff_txd",
5877 CTLFLAG_RD, &adapter->stats.xofftxc,
5878 "XOFF Transmitted");
5880 /* Packet Reception Stats */
5881 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "total_pkts_recvd",
5882 CTLFLAG_RD, &adapter->stats.tpr,
5883 "Total Packets Received ");
5884 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_pkts_recvd",
5885 CTLFLAG_RD, &adapter->stats.gprc,
5886 "Good Packets Received");
5887 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "bcast_pkts_recvd",
5888 CTLFLAG_RD, &adapter->stats.bprc,
5889 "Broadcast Packets Received");
5890 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "mcast_pkts_recvd",
5891 CTLFLAG_RD, &adapter->stats.mprc,
5892 "Multicast Packets Received");
5893 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_64",
5894 CTLFLAG_RD, &adapter->stats.prc64,
5895 "64 byte frames received ");
5896 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_65_127",
5897 CTLFLAG_RD, &adapter->stats.prc127,
5898 "65-127 byte frames received");
5899 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_128_255",
5900 CTLFLAG_RD, &adapter->stats.prc255,
5901 "128-255 byte frames received");
5902 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_256_511",
5903 CTLFLAG_RD, &adapter->stats.prc511,
5904 "256-511 byte frames received");
5905 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_512_1023",
5906 CTLFLAG_RD, &adapter->stats.prc1023,
5907 "512-1023 byte frames received");
5908 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_1024_1522",
5909 CTLFLAG_RD, &adapter->stats.prc1522,
5910 "1023-1522 byte frames received");
5911 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_octets_recvd",
5912 CTLFLAG_RD, &adapter->stats.gorc,
5913 "Good Octets Received");
5915 /* Packet Transmission Stats */
5916 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_octets_txd",
5917 CTLFLAG_RD, &adapter->stats.gotc,
5918 "Good Octets Transmitted");
5919 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "total_pkts_txd",
5920 CTLFLAG_RD, &adapter->stats.tpt,
5921 "Total Packets Transmitted");
5922 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_pkts_txd",
5923 CTLFLAG_RD, &adapter->stats.gptc,
5924 "Good Packets Transmitted");
5925 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "bcast_pkts_txd",
5926 CTLFLAG_RD, &adapter->stats.bptc,
5927 "Broadcast Packets Transmitted");
5928 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "mcast_pkts_txd",
5929 CTLFLAG_RD, &adapter->stats.mptc,
5930 "Multicast Packets Transmitted");
5931 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_64",
5932 CTLFLAG_RD, &adapter->stats.ptc64,
5933 "64 byte frames transmitted ");
5934 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_65_127",
5935 CTLFLAG_RD, &adapter->stats.ptc127,
5936 "65-127 byte frames transmitted");
5937 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_128_255",
5938 CTLFLAG_RD, &adapter->stats.ptc255,
5939 "128-255 byte frames transmitted");
5940 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_256_511",
5941 CTLFLAG_RD, &adapter->stats.ptc511,
5942 "256-511 byte frames transmitted");
5943 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_512_1023",
5944 CTLFLAG_RD, &adapter->stats.ptc1023,
5945 "512-1023 byte frames transmitted");
5946 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_1024_1522",
5947 CTLFLAG_RD, &adapter->stats.ptc1522,
5948 "1024-1522 byte frames transmitted");
5949 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tso_txd",
5950 CTLFLAG_RD, &adapter->stats.tsctc,
5951 "TSO Contexts Transmitted");
5952 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tso_ctx_fail",
5953 CTLFLAG_RD, &adapter->stats.tsctfc,
5954 "TSO Contexts Failed");
5957 /* Interrupt Stats */
5959 int_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "interrupts",
5960 CTLFLAG_RD, NULL, "Interrupt Statistics");
5961 int_list = SYSCTL_CHILDREN(int_node);
5963 SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "asserts",
5964 CTLFLAG_RD, &adapter->stats.iac,
5965 "Interrupt Assertion Count");
5967 SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "rx_pkt_timer",
5968 CTLFLAG_RD, &adapter->stats.icrxptc,
5969 "Interrupt Cause Rx Pkt Timer Expire Count");
5971 SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "rx_abs_timer",
5972 CTLFLAG_RD, &adapter->stats.icrxatc,
5973 "Interrupt Cause Rx Abs Timer Expire Count");
5975 SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "tx_pkt_timer",
5976 CTLFLAG_RD, &adapter->stats.ictxptc,
5977 "Interrupt Cause Tx Pkt Timer Expire Count");
5979 SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "tx_abs_timer",
5980 CTLFLAG_RD, &adapter->stats.ictxatc,
5981 "Interrupt Cause Tx Abs Timer Expire Count");
5983 SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "tx_queue_empty",
5984 CTLFLAG_RD, &adapter->stats.ictxqec,
5985 "Interrupt Cause Tx Queue Empty Count");
5987 SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "tx_queue_min_thresh",
5988 CTLFLAG_RD, &adapter->stats.ictxqmtc,
5989 "Interrupt Cause Tx Queue Min Thresh Count");
5991 SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "rx_desc_min_thresh",
5992 CTLFLAG_RD, &adapter->stats.icrxdmtc,
5993 "Interrupt Cause Rx Desc Min Thresh Count");
5995 SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "rx_overrun",
5996 CTLFLAG_RD, &adapter->stats.icrxoc,
5997 "Interrupt Cause Receiver Overrun Count");
6000 /**********************************************************************
6002 * This routine provides a way to dump out the adapter eeprom,
6003 * often a useful debug/service tool. This only dumps the first
6004 * 32 words, stuff that matters is in that extent.
6006 **********************************************************************/
6008 em_sysctl_nvm_info(SYSCTL_HANDLER_ARGS)
6010 struct adapter *adapter = (struct adapter *)arg1;
6015 error = sysctl_handle_int(oidp, &result, 0, req);
6017 if (error || !req->newptr)
6021 * This value will cause a hex dump of the
6022 * first 32 16-bit words of the EEPROM to
6026 em_print_nvm_info(adapter);
6032 em_print_nvm_info(struct adapter *adapter)
6037 /* Its a bit crude, but it gets the job done */
6038 printf("\nInterface EEPROM Dump:\n");
6039 printf("Offset\n0x0000 ");
6040 for (i = 0, j = 0; i < 32; i++, j++) {
6041 if (j == 8) { /* Make the offset block */
6043 printf("\n0x00%x0 ",row);
6045 e1000_read_nvm(&adapter->hw, i, 1, &eeprom_data);
6046 printf("%04x ", eeprom_data);
6052 em_sysctl_int_delay(SYSCTL_HANDLER_ARGS)
6054 struct em_int_delay_info *info;
6055 struct adapter *adapter;
6057 int error, usecs, ticks;
6059 info = (struct em_int_delay_info *)arg1;
6060 usecs = info->value;
6061 error = sysctl_handle_int(oidp, &usecs, 0, req);
6062 if (error != 0 || req->newptr == NULL)
6064 if (usecs < 0 || usecs > EM_TICKS_TO_USECS(65535))
6066 info->value = usecs;
6067 ticks = EM_USECS_TO_TICKS(usecs);
6068 if (info->offset == E1000_ITR) /* units are 256ns here */
6071 adapter = info->adapter;
6073 EM_CORE_LOCK(adapter);
6074 regval = E1000_READ_OFFSET(&adapter->hw, info->offset);
6075 regval = (regval & ~0xffff) | (ticks & 0xffff);
6076 /* Handle a few special cases. */
6077 switch (info->offset) {
6082 adapter->txd_cmd &= ~E1000_TXD_CMD_IDE;
6083 /* Don't write 0 into the TIDV register. */
6086 adapter->txd_cmd |= E1000_TXD_CMD_IDE;
6089 E1000_WRITE_OFFSET(&adapter->hw, info->offset, regval);
6090 EM_CORE_UNLOCK(adapter);
6095 em_add_int_delay_sysctl(struct adapter *adapter, const char *name,
6096 const char *description, struct em_int_delay_info *info,
6097 int offset, int value)
6099 info->adapter = adapter;
6100 info->offset = offset;
6101 info->value = value;
6102 SYSCTL_ADD_PROC(device_get_sysctl_ctx(adapter->dev),
6103 SYSCTL_CHILDREN(device_get_sysctl_tree(adapter->dev)),
6104 OID_AUTO, name, CTLTYPE_INT|CTLFLAG_RW,
6105 info, 0, em_sysctl_int_delay, "I", description);
6109 em_set_sysctl_value(struct adapter *adapter, const char *name,
6110 const char *description, int *limit, int value)
6113 SYSCTL_ADD_INT(device_get_sysctl_ctx(adapter->dev),
6114 SYSCTL_CHILDREN(device_get_sysctl_tree(adapter->dev)),
6115 OID_AUTO, name, CTLFLAG_RW, limit, value, description);
6120 ** Set flow control using sysctl:
6121 ** Flow control values:
6128 em_set_flowcntl(SYSCTL_HANDLER_ARGS)
6131 static int input = 3; /* default is full */
6132 struct adapter *adapter = (struct adapter *) arg1;
6134 error = sysctl_handle_int(oidp, &input, 0, req);
6136 if ((error) || (req->newptr == NULL))
6139 if (input == adapter->fc) /* no change? */
6143 case e1000_fc_rx_pause:
6144 case e1000_fc_tx_pause:
6147 adapter->hw.fc.requested_mode = input;
6148 adapter->fc = input;
6155 adapter->hw.fc.current_mode = adapter->hw.fc.requested_mode;
6156 e1000_force_mac_fc(&adapter->hw);
6161 ** Manage Energy Efficient Ethernet:
6163 ** 0/1 - enabled/disabled
6166 em_sysctl_eee(SYSCTL_HANDLER_ARGS)
6168 struct adapter *adapter = (struct adapter *) arg1;
6171 value = adapter->hw.dev_spec.ich8lan.eee_disable;
6172 error = sysctl_handle_int(oidp, &value, 0, req);
6173 if (error || req->newptr == NULL)
6175 EM_CORE_LOCK(adapter);
6176 adapter->hw.dev_spec.ich8lan.eee_disable = (value != 0);
6177 em_init_locked(adapter);
6178 EM_CORE_UNLOCK(adapter);
6183 em_sysctl_debug_info(SYSCTL_HANDLER_ARGS)
6185 struct adapter *adapter;
6190 error = sysctl_handle_int(oidp, &result, 0, req);
6192 if (error || !req->newptr)
6196 adapter = (struct adapter *)arg1;
6197 em_print_debug_info(adapter);
6204 ** This routine is meant to be fluid, add whatever is
6205 ** needed for debugging a problem. -jfv
6208 em_print_debug_info(struct adapter *adapter)
6210 device_t dev = adapter->dev;
6211 struct tx_ring *txr = adapter->tx_rings;
6212 struct rx_ring *rxr = adapter->rx_rings;
6214 if (adapter->ifp->if_drv_flags & IFF_DRV_RUNNING)
6215 printf("Interface is RUNNING ");
6217 printf("Interface is NOT RUNNING\n");
6219 if (adapter->ifp->if_drv_flags & IFF_DRV_OACTIVE)
6220 printf("and INACTIVE\n");
6222 printf("and ACTIVE\n");
6224 for (int i = 0; i < adapter->num_queues; i++, txr++, rxr++) {
6225 device_printf(dev, "TX Queue %d ------\n", i);
6226 device_printf(dev, "hw tdh = %d, hw tdt = %d\n",
6227 E1000_READ_REG(&adapter->hw, E1000_TDH(i)),
6228 E1000_READ_REG(&adapter->hw, E1000_TDT(i)));
6229 device_printf(dev, "Tx Queue Status = %d\n", txr->busy);
6230 device_printf(dev, "TX descriptors avail = %d\n",
6232 device_printf(dev, "Tx Descriptors avail failure = %ld\n",
6233 txr->no_desc_avail);
6234 device_printf(dev, "RX Queue %d ------\n", i);
6235 device_printf(dev, "hw rdh = %d, hw rdt = %d\n",
6236 E1000_READ_REG(&adapter->hw, E1000_RDH(i)),
6237 E1000_READ_REG(&adapter->hw, E1000_RDT(i)));
6238 device_printf(dev, "RX discarded packets = %ld\n",
6240 device_printf(dev, "RX Next to Check = %d\n", rxr->next_to_check);
6241 device_printf(dev, "RX Next to Refresh = %d\n", rxr->next_to_refresh);
6245 #ifdef EM_MULTIQUEUE
6248 * Write a new value to the EEPROM increasing the number of MSIX
6249 * vectors from 3 to 5, for proper multiqueue support.
6252 em_enable_vectors_82574(struct adapter *adapter)
6254 struct e1000_hw *hw = &adapter->hw;
6255 device_t dev = adapter->dev;
6258 e1000_read_nvm(hw, EM_NVM_PCIE_CTRL, 1, &edata);
6259 printf("Current cap: %#06x\n", edata);
6260 if (((edata & EM_NVM_MSIX_N_MASK) >> EM_NVM_MSIX_N_SHIFT) != 4) {
6261 device_printf(dev, "Writing to eeprom: increasing "
6262 "reported MSIX vectors from 3 to 5...\n");
6263 edata &= ~(EM_NVM_MSIX_N_MASK);
6264 edata |= 4 << EM_NVM_MSIX_N_SHIFT;
6265 e1000_write_nvm(hw, EM_NVM_PCIE_CTRL, 1, &edata);
6266 e1000_update_nvm_checksum(hw);
6267 device_printf(dev, "Writing to eeprom: done\n");
6273 DB_COMMAND(em_reset_dev, em_ddb_reset_dev)
6278 dc = devclass_find("em");
6279 max_em = devclass_get_maxunit(dc);
6281 for (int index = 0; index < (max_em - 1); index++) {
6283 dev = devclass_get_device(dc, index);
6284 if (device_get_driver(dev) == &em_driver) {
6285 struct adapter *adapter = device_get_softc(dev);
6286 EM_CORE_LOCK(adapter);
6287 em_init_locked(adapter);
6288 EM_CORE_UNLOCK(adapter);
6292 DB_COMMAND(em_dump_queue, em_ddb_dump_queue)
6297 dc = devclass_find("em");
6298 max_em = devclass_get_maxunit(dc);
6300 for (int index = 0; index < (max_em - 1); index++) {
6302 dev = devclass_get_device(dc, index);
6303 if (device_get_driver(dev) == &em_driver)
6304 em_print_debug_info(device_get_softc(dev));