1 /******************************************************************************
3 Copyright (c) 2001-2015, Intel Corporation
6 Redistribution and use in source and binary forms, with or without
7 modification, are permitted provided that the following conditions are met:
9 1. Redistributions of source code must retain the above copyright notice,
10 this list of conditions and the following disclaimer.
12 2. Redistributions in binary form must reproduce the above copyright
13 notice, this list of conditions and the following disclaimer in the
14 documentation and/or other materials provided with the distribution.
16 3. Neither the name of the Intel Corporation nor the names of its
17 contributors may be used to endorse or promote products derived from
18 this software without specific prior written permission.
20 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
24 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
29 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30 POSSIBILITY OF SUCH DAMAGE.
32 ******************************************************************************/
38 #include "opt_inet6.h"
40 #ifdef HAVE_KERNEL_OPTION_HEADERS
41 #include "opt_device_polling.h"
44 #include <sys/param.h>
45 #include <sys/systm.h>
47 #include <sys/types.h>
50 #if __FreeBSD_version >= 800000
51 #include <sys/buf_ring.h>
54 #include <sys/endian.h>
55 #include <sys/kernel.h>
56 #include <sys/kthread.h>
57 #include <sys/malloc.h>
59 #include <sys/module.h>
62 #include <sys/socket.h>
63 #include <sys/sockio.h>
64 #include <sys/sysctl.h>
65 #include <sys/taskqueue.h>
66 #include <sys/eventhandler.h>
67 #include <machine/bus.h>
68 #include <machine/resource.h>
71 #include <net/ethernet.h>
73 #include <net/if_arp.h>
74 #include <net/if_dl.h>
75 #include <net/if_media.h>
77 #include <net/if_types.h>
78 #include <net/if_vlan_var.h>
80 #include <netinet/in_systm.h>
81 #include <netinet/in.h>
82 #include <netinet/if_ether.h>
83 #include <netinet/ip.h>
84 #include <netinet/ip6.h>
85 #include <netinet/tcp.h>
86 #include <netinet/udp.h>
88 #include <machine/in_cksum.h>
89 #include <dev/led/led.h>
90 #include <dev/pci/pcivar.h>
91 #include <dev/pci/pcireg.h>
93 #include "e1000_api.h"
94 #include "e1000_82571.h"
97 /*********************************************************************
99 *********************************************************************/
100 char em_driver_version[] = "7.6.1-k";
102 /*********************************************************************
103 * PCI Device ID Table
105 * Used by probe to select devices to load on
106 * Last field stores an index into e1000_strings
107 * Last entry must be all 0s
109 * { Vendor ID, Device ID, SubVendor ID, SubDevice ID, String Index }
110 *********************************************************************/
112 static em_vendor_info_t em_vendor_info_array[] =
114 /* Intel(R) PRO/1000 Network Connection */
115 { 0x8086, E1000_DEV_ID_82571EB_COPPER, PCI_ANY_ID, PCI_ANY_ID, 0},
116 { 0x8086, E1000_DEV_ID_82571EB_FIBER, PCI_ANY_ID, PCI_ANY_ID, 0},
117 { 0x8086, E1000_DEV_ID_82571EB_SERDES, PCI_ANY_ID, PCI_ANY_ID, 0},
118 { 0x8086, E1000_DEV_ID_82571EB_SERDES_DUAL,
119 PCI_ANY_ID, PCI_ANY_ID, 0},
120 { 0x8086, E1000_DEV_ID_82571EB_SERDES_QUAD,
121 PCI_ANY_ID, PCI_ANY_ID, 0},
122 { 0x8086, E1000_DEV_ID_82571EB_QUAD_COPPER,
123 PCI_ANY_ID, PCI_ANY_ID, 0},
124 { 0x8086, E1000_DEV_ID_82571EB_QUAD_COPPER_LP,
125 PCI_ANY_ID, PCI_ANY_ID, 0},
126 { 0x8086, E1000_DEV_ID_82571EB_QUAD_FIBER,
127 PCI_ANY_ID, PCI_ANY_ID, 0},
128 { 0x8086, E1000_DEV_ID_82571PT_QUAD_COPPER,
129 PCI_ANY_ID, PCI_ANY_ID, 0},
130 { 0x8086, E1000_DEV_ID_82572EI_COPPER, PCI_ANY_ID, PCI_ANY_ID, 0},
131 { 0x8086, E1000_DEV_ID_82572EI_FIBER, PCI_ANY_ID, PCI_ANY_ID, 0},
132 { 0x8086, E1000_DEV_ID_82572EI_SERDES, PCI_ANY_ID, PCI_ANY_ID, 0},
133 { 0x8086, E1000_DEV_ID_82572EI, PCI_ANY_ID, PCI_ANY_ID, 0},
135 { 0x8086, E1000_DEV_ID_82573E, PCI_ANY_ID, PCI_ANY_ID, 0},
136 { 0x8086, E1000_DEV_ID_82573E_IAMT, PCI_ANY_ID, PCI_ANY_ID, 0},
137 { 0x8086, E1000_DEV_ID_82573L, PCI_ANY_ID, PCI_ANY_ID, 0},
138 { 0x8086, E1000_DEV_ID_82583V, PCI_ANY_ID, PCI_ANY_ID, 0},
139 { 0x8086, E1000_DEV_ID_80003ES2LAN_COPPER_SPT,
140 PCI_ANY_ID, PCI_ANY_ID, 0},
141 { 0x8086, E1000_DEV_ID_80003ES2LAN_SERDES_SPT,
142 PCI_ANY_ID, PCI_ANY_ID, 0},
143 { 0x8086, E1000_DEV_ID_80003ES2LAN_COPPER_DPT,
144 PCI_ANY_ID, PCI_ANY_ID, 0},
145 { 0x8086, E1000_DEV_ID_80003ES2LAN_SERDES_DPT,
146 PCI_ANY_ID, PCI_ANY_ID, 0},
147 { 0x8086, E1000_DEV_ID_ICH8_IGP_M_AMT, PCI_ANY_ID, PCI_ANY_ID, 0},
148 { 0x8086, E1000_DEV_ID_ICH8_IGP_AMT, PCI_ANY_ID, PCI_ANY_ID, 0},
149 { 0x8086, E1000_DEV_ID_ICH8_IGP_C, PCI_ANY_ID, PCI_ANY_ID, 0},
150 { 0x8086, E1000_DEV_ID_ICH8_IFE, PCI_ANY_ID, PCI_ANY_ID, 0},
151 { 0x8086, E1000_DEV_ID_ICH8_IFE_GT, PCI_ANY_ID, PCI_ANY_ID, 0},
152 { 0x8086, E1000_DEV_ID_ICH8_IFE_G, PCI_ANY_ID, PCI_ANY_ID, 0},
153 { 0x8086, E1000_DEV_ID_ICH8_IGP_M, PCI_ANY_ID, PCI_ANY_ID, 0},
154 { 0x8086, E1000_DEV_ID_ICH8_82567V_3, PCI_ANY_ID, PCI_ANY_ID, 0},
155 { 0x8086, E1000_DEV_ID_ICH9_IGP_M_AMT, PCI_ANY_ID, PCI_ANY_ID, 0},
156 { 0x8086, E1000_DEV_ID_ICH9_IGP_AMT, PCI_ANY_ID, PCI_ANY_ID, 0},
157 { 0x8086, E1000_DEV_ID_ICH9_IGP_C, PCI_ANY_ID, PCI_ANY_ID, 0},
158 { 0x8086, E1000_DEV_ID_ICH9_IGP_M, PCI_ANY_ID, PCI_ANY_ID, 0},
159 { 0x8086, E1000_DEV_ID_ICH9_IGP_M_V, PCI_ANY_ID, PCI_ANY_ID, 0},
160 { 0x8086, E1000_DEV_ID_ICH9_IFE, PCI_ANY_ID, PCI_ANY_ID, 0},
161 { 0x8086, E1000_DEV_ID_ICH9_IFE_GT, PCI_ANY_ID, PCI_ANY_ID, 0},
162 { 0x8086, E1000_DEV_ID_ICH9_IFE_G, PCI_ANY_ID, PCI_ANY_ID, 0},
163 { 0x8086, E1000_DEV_ID_ICH9_BM, PCI_ANY_ID, PCI_ANY_ID, 0},
164 { 0x8086, E1000_DEV_ID_82574L, PCI_ANY_ID, PCI_ANY_ID, 0},
165 { 0x8086, E1000_DEV_ID_82574LA, PCI_ANY_ID, PCI_ANY_ID, 0},
166 { 0x8086, E1000_DEV_ID_ICH10_R_BM_LM, PCI_ANY_ID, PCI_ANY_ID, 0},
167 { 0x8086, E1000_DEV_ID_ICH10_R_BM_LF, PCI_ANY_ID, PCI_ANY_ID, 0},
168 { 0x8086, E1000_DEV_ID_ICH10_R_BM_V, PCI_ANY_ID, PCI_ANY_ID, 0},
169 { 0x8086, E1000_DEV_ID_ICH10_D_BM_LM, PCI_ANY_ID, PCI_ANY_ID, 0},
170 { 0x8086, E1000_DEV_ID_ICH10_D_BM_LF, PCI_ANY_ID, PCI_ANY_ID, 0},
171 { 0x8086, E1000_DEV_ID_ICH10_D_BM_V, PCI_ANY_ID, PCI_ANY_ID, 0},
172 { 0x8086, E1000_DEV_ID_PCH_M_HV_LM, PCI_ANY_ID, PCI_ANY_ID, 0},
173 { 0x8086, E1000_DEV_ID_PCH_M_HV_LC, PCI_ANY_ID, PCI_ANY_ID, 0},
174 { 0x8086, E1000_DEV_ID_PCH_D_HV_DM, PCI_ANY_ID, PCI_ANY_ID, 0},
175 { 0x8086, E1000_DEV_ID_PCH_D_HV_DC, PCI_ANY_ID, PCI_ANY_ID, 0},
176 { 0x8086, E1000_DEV_ID_PCH2_LV_LM, PCI_ANY_ID, PCI_ANY_ID, 0},
177 { 0x8086, E1000_DEV_ID_PCH2_LV_V, PCI_ANY_ID, PCI_ANY_ID, 0},
178 { 0x8086, E1000_DEV_ID_PCH_LPT_I217_LM, PCI_ANY_ID, PCI_ANY_ID, 0},
179 { 0x8086, E1000_DEV_ID_PCH_LPT_I217_V, PCI_ANY_ID, PCI_ANY_ID, 0},
180 { 0x8086, E1000_DEV_ID_PCH_LPTLP_I218_LM,
181 PCI_ANY_ID, PCI_ANY_ID, 0},
182 { 0x8086, E1000_DEV_ID_PCH_LPTLP_I218_V,
183 PCI_ANY_ID, PCI_ANY_ID, 0},
184 { 0x8086, E1000_DEV_ID_PCH_I218_LM2, PCI_ANY_ID, PCI_ANY_ID, 0},
185 { 0x8086, E1000_DEV_ID_PCH_I218_V2, PCI_ANY_ID, PCI_ANY_ID, 0},
186 { 0x8086, E1000_DEV_ID_PCH_I218_LM3, PCI_ANY_ID, PCI_ANY_ID, 0},
187 { 0x8086, E1000_DEV_ID_PCH_I218_V3, PCI_ANY_ID, PCI_ANY_ID, 0},
188 { 0x8086, E1000_DEV_ID_PCH_SPT_I219_LM, PCI_ANY_ID, PCI_ANY_ID, 0},
189 { 0x8086, E1000_DEV_ID_PCH_SPT_I219_V, PCI_ANY_ID, PCI_ANY_ID, 0},
190 { 0x8086, E1000_DEV_ID_PCH_SPT_I219_LM2,
191 PCI_ANY_ID, PCI_ANY_ID, 0},
192 { 0x8086, E1000_DEV_ID_PCH_SPT_I219_V2, PCI_ANY_ID, PCI_ANY_ID, 0},
193 { 0x8086, E1000_DEV_ID_PCH_LBG_I219_LM3,
194 PCI_ANY_ID, PCI_ANY_ID, 0},
195 { 0x8086, E1000_DEV_ID_PCH_SPT_I219_LM4,
196 PCI_ANY_ID, PCI_ANY_ID, 0},
197 { 0x8086, E1000_DEV_ID_PCH_SPT_I219_V4, PCI_ANY_ID, PCI_ANY_ID, 0},
198 { 0x8086, E1000_DEV_ID_PCH_SPT_I219_LM5,
199 PCI_ANY_ID, PCI_ANY_ID, 0},
200 { 0x8086, E1000_DEV_ID_PCH_SPT_I219_V5, PCI_ANY_ID, PCI_ANY_ID, 0},
201 { 0x8086, E1000_DEV_ID_PCH_SPT_I219_LM4,
202 PCI_ANY_ID, PCI_ANY_ID, 0},
203 { 0x8086, E1000_DEV_ID_PCH_SPT_I219_V4, PCI_ANY_ID, PCI_ANY_ID, 0},
204 { 0x8086, E1000_DEV_ID_PCH_SPT_I219_LM5,
205 PCI_ANY_ID, PCI_ANY_ID, 0},
206 { 0x8086, E1000_DEV_ID_PCH_SPT_I219_V5, PCI_ANY_ID, PCI_ANY_ID, 0},
207 { 0x8086, E1000_DEV_ID_PCH_CNP_I219_LM6,
208 PCI_ANY_ID, PCI_ANY_ID, 0},
209 { 0x8086, E1000_DEV_ID_PCH_CNP_I219_V6, PCI_ANY_ID, PCI_ANY_ID, 0},
210 { 0x8086, E1000_DEV_ID_PCH_CNP_I219_LM7,
211 PCI_ANY_ID, PCI_ANY_ID, 0},
212 { 0x8086, E1000_DEV_ID_PCH_CNP_I219_V7, PCI_ANY_ID, PCI_ANY_ID, 0},
213 { 0x8086, E1000_DEV_ID_PCH_ICP_I219_LM8,
214 PCI_ANY_ID, PCI_ANY_ID, 0},
215 { 0x8086, E1000_DEV_ID_PCH_ICP_I219_V8, PCI_ANY_ID, PCI_ANY_ID, 0},
216 { 0x8086, E1000_DEV_ID_PCH_ICP_I219_LM9,
217 PCI_ANY_ID, PCI_ANY_ID, 0},
218 { 0x8086, E1000_DEV_ID_PCH_ICP_I219_V9, PCI_ANY_ID, PCI_ANY_ID, 0},
219 /* required last entry */
223 /*********************************************************************
224 * Table of branding strings for all supported NICs.
225 *********************************************************************/
227 static char *em_strings[] = {
228 "Intel(R) PRO/1000 Network Connection"
231 /*********************************************************************
232 * Function prototypes
233 *********************************************************************/
234 static int em_probe(device_t);
235 static int em_attach(device_t);
236 static int em_detach(device_t);
237 static int em_shutdown(device_t);
238 static int em_suspend(device_t);
239 static int em_resume(device_t);
241 static int em_mq_start(struct ifnet *, struct mbuf *);
242 static int em_mq_start_locked(struct ifnet *,
244 static void em_qflush(struct ifnet *);
246 static void em_start(struct ifnet *);
247 static void em_start_locked(struct ifnet *, struct tx_ring *);
249 static int em_ioctl(struct ifnet *, u_long, caddr_t);
250 static void em_init(void *);
251 static void em_init_locked(struct adapter *);
252 static void em_stop(void *);
253 static void em_media_status(struct ifnet *, struct ifmediareq *);
254 static int em_media_change(struct ifnet *);
255 static void em_identify_hardware(struct adapter *);
256 static int em_allocate_pci_resources(struct adapter *);
257 static int em_allocate_legacy(struct adapter *);
258 static int em_allocate_msix(struct adapter *);
259 static int em_allocate_queues(struct adapter *);
260 static int em_setup_msix(struct adapter *);
261 static void em_free_pci_resources(struct adapter *);
262 static void em_local_timer(void *);
263 static void em_reset(struct adapter *);
264 static int em_setup_interface(device_t, struct adapter *);
265 static void em_flush_desc_rings(struct adapter *);
267 static void em_setup_transmit_structures(struct adapter *);
268 static void em_initialize_transmit_unit(struct adapter *);
269 static int em_allocate_transmit_buffers(struct tx_ring *);
270 static void em_free_transmit_structures(struct adapter *);
271 static void em_free_transmit_buffers(struct tx_ring *);
273 static int em_setup_receive_structures(struct adapter *);
274 static int em_allocate_receive_buffers(struct rx_ring *);
275 static void em_initialize_receive_unit(struct adapter *);
276 static void em_free_receive_structures(struct adapter *);
277 static void em_free_receive_buffers(struct rx_ring *);
279 static void em_enable_intr(struct adapter *);
280 static void em_disable_intr(struct adapter *);
281 static void em_update_stats_counters(struct adapter *);
282 static void em_add_hw_stats(struct adapter *adapter);
283 static void em_txeof(struct tx_ring *);
284 static bool em_rxeof(struct rx_ring *, int, int *);
285 #ifndef __NO_STRICT_ALIGNMENT
286 static int em_fixup_rx(struct rx_ring *);
288 static void em_setup_rxdesc(union e1000_rx_desc_extended *,
289 const struct em_rxbuffer *rxbuf);
290 static void em_receive_checksum(uint32_t status, struct mbuf *);
291 static void em_transmit_checksum_setup(struct tx_ring *, struct mbuf *, int,
292 struct ip *, u32 *, u32 *);
293 static void em_tso_setup(struct tx_ring *, struct mbuf *, int, struct ip *,
294 struct tcphdr *, u32 *, u32 *);
295 static void em_set_promisc(struct adapter *);
296 static void em_disable_promisc(struct adapter *);
297 static void em_set_multi(struct adapter *);
298 static void em_update_link_status(struct adapter *);
299 static void em_refresh_mbufs(struct rx_ring *, int);
300 static void em_register_vlan(void *, struct ifnet *, u16);
301 static void em_unregister_vlan(void *, struct ifnet *, u16);
302 static void em_setup_vlan_hw_support(struct adapter *);
303 static int em_xmit(struct tx_ring *, struct mbuf **);
304 static int em_dma_malloc(struct adapter *, bus_size_t,
305 struct em_dma_alloc *, int);
306 static void em_dma_free(struct adapter *, struct em_dma_alloc *);
307 static int em_sysctl_nvm_info(SYSCTL_HANDLER_ARGS);
308 static void em_print_nvm_info(struct adapter *);
309 static int em_sysctl_debug_info(SYSCTL_HANDLER_ARGS);
310 static void em_print_debug_info(struct adapter *);
311 static int em_is_valid_ether_addr(u8 *);
312 static int em_sysctl_int_delay(SYSCTL_HANDLER_ARGS);
313 static void em_add_int_delay_sysctl(struct adapter *, const char *,
314 const char *, struct em_int_delay_info *, int, int);
315 /* Management and WOL Support */
316 static void em_init_manageability(struct adapter *);
317 static void em_release_manageability(struct adapter *);
318 static void em_get_hw_control(struct adapter *);
319 static void em_release_hw_control(struct adapter *);
320 static void em_get_wakeup(device_t);
321 static void em_enable_wakeup(device_t);
322 static int em_enable_phy_wakeup(struct adapter *);
323 static void em_led_func(void *, int);
324 static void em_disable_aspm(struct adapter *);
326 static int em_irq_fast(void *);
329 static void em_msix_tx(void *);
330 static void em_msix_rx(void *);
331 static void em_msix_link(void *);
332 static void em_handle_tx(void *context, int pending);
333 static void em_handle_rx(void *context, int pending);
334 static void em_handle_link(void *context, int pending);
337 static void em_enable_vectors_82574(struct adapter *);
340 static void em_set_sysctl_value(struct adapter *, const char *,
341 const char *, int *, int);
342 static int em_set_flowcntl(SYSCTL_HANDLER_ARGS);
343 static int em_sysctl_eee(SYSCTL_HANDLER_ARGS);
345 static __inline void em_rx_discard(struct rx_ring *, int);
347 #ifdef DEVICE_POLLING
348 static poll_handler_t em_poll;
351 /*********************************************************************
352 * FreeBSD Device Interface Entry Points
353 *********************************************************************/
355 static device_method_t em_methods[] = {
356 /* Device interface */
357 DEVMETHOD(device_probe, em_probe),
358 DEVMETHOD(device_attach, em_attach),
359 DEVMETHOD(device_detach, em_detach),
360 DEVMETHOD(device_shutdown, em_shutdown),
361 DEVMETHOD(device_suspend, em_suspend),
362 DEVMETHOD(device_resume, em_resume),
366 static driver_t em_driver = {
367 "em", em_methods, sizeof(struct adapter),
370 devclass_t em_devclass;
371 DRIVER_MODULE(em, pci, em_driver, em_devclass, 0, 0);
372 MODULE_DEPEND(em, pci, 1, 1, 1);
373 MODULE_DEPEND(em, ether, 1, 1, 1);
375 /*********************************************************************
376 * Tunable default values.
377 *********************************************************************/
379 #define EM_TICKS_TO_USECS(ticks) ((1024 * (ticks) + 500) / 1000)
380 #define EM_USECS_TO_TICKS(usecs) ((1000 * (usecs) + 512) / 1024)
383 #define MAX_INTS_PER_SEC 8000
384 #define DEFAULT_ITR (1000000000/(MAX_INTS_PER_SEC * 256))
386 #define TSO_WORKAROUND 4
388 static SYSCTL_NODE(_hw, OID_AUTO, em, CTLFLAG_RD, 0, "EM driver parameters");
390 static int em_disable_crc_stripping = 0;
391 SYSCTL_INT(_hw_em, OID_AUTO, disable_crc_stripping, CTLFLAG_RDTUN,
392 &em_disable_crc_stripping, 0, "Disable CRC Stripping");
394 static int em_tx_int_delay_dflt = EM_TICKS_TO_USECS(EM_TIDV);
395 static int em_rx_int_delay_dflt = EM_TICKS_TO_USECS(EM_RDTR);
396 TUNABLE_INT("hw.em.tx_int_delay", &em_tx_int_delay_dflt);
397 TUNABLE_INT("hw.em.rx_int_delay", &em_rx_int_delay_dflt);
398 SYSCTL_INT(_hw_em, OID_AUTO, tx_int_delay, CTLFLAG_RDTUN, &em_tx_int_delay_dflt,
399 0, "Default transmit interrupt delay in usecs");
400 SYSCTL_INT(_hw_em, OID_AUTO, rx_int_delay, CTLFLAG_RDTUN, &em_rx_int_delay_dflt,
401 0, "Default receive interrupt delay in usecs");
403 static int em_tx_abs_int_delay_dflt = EM_TICKS_TO_USECS(EM_TADV);
404 static int em_rx_abs_int_delay_dflt = EM_TICKS_TO_USECS(EM_RADV);
405 TUNABLE_INT("hw.em.tx_abs_int_delay", &em_tx_abs_int_delay_dflt);
406 TUNABLE_INT("hw.em.rx_abs_int_delay", &em_rx_abs_int_delay_dflt);
407 SYSCTL_INT(_hw_em, OID_AUTO, tx_abs_int_delay, CTLFLAG_RDTUN,
408 &em_tx_abs_int_delay_dflt, 0,
409 "Default transmit interrupt delay limit in usecs");
410 SYSCTL_INT(_hw_em, OID_AUTO, rx_abs_int_delay, CTLFLAG_RDTUN,
411 &em_rx_abs_int_delay_dflt, 0,
412 "Default receive interrupt delay limit in usecs");
414 static int em_rxd = EM_DEFAULT_RXD;
415 static int em_txd = EM_DEFAULT_TXD;
416 TUNABLE_INT("hw.em.rxd", &em_rxd);
417 TUNABLE_INT("hw.em.txd", &em_txd);
418 SYSCTL_INT(_hw_em, OID_AUTO, rxd, CTLFLAG_RDTUN, &em_rxd, 0,
419 "Number of receive descriptors per queue");
420 SYSCTL_INT(_hw_em, OID_AUTO, txd, CTLFLAG_RDTUN, &em_txd, 0,
421 "Number of transmit descriptors per queue");
423 static int em_smart_pwr_down = FALSE;
424 TUNABLE_INT("hw.em.smart_pwr_down", &em_smart_pwr_down);
425 SYSCTL_INT(_hw_em, OID_AUTO, smart_pwr_down, CTLFLAG_RDTUN, &em_smart_pwr_down,
426 0, "Set to true to leave smart power down enabled on newer adapters");
428 /* Controls whether promiscuous also shows bad packets */
429 static int em_debug_sbp = FALSE;
430 TUNABLE_INT("hw.em.sbp", &em_debug_sbp);
431 SYSCTL_INT(_hw_em, OID_AUTO, sbp, CTLFLAG_RDTUN, &em_debug_sbp, 0,
432 "Show bad packets in promiscuous mode");
434 static int em_enable_msix = TRUE;
435 TUNABLE_INT("hw.em.enable_msix", &em_enable_msix);
436 SYSCTL_INT(_hw_em, OID_AUTO, enable_msix, CTLFLAG_RDTUN, &em_enable_msix, 0,
437 "Enable MSI-X interrupts");
440 static int em_num_queues = 1;
441 TUNABLE_INT("hw.em.num_queues", &em_num_queues);
442 SYSCTL_INT(_hw_em, OID_AUTO, num_queues, CTLFLAG_RDTUN, &em_num_queues, 0,
443 "82574 only: Number of queues to configure, 0 indicates autoconfigure");
447 ** Global variable to store last used CPU when binding queues
448 ** to CPUs in igb_allocate_msix. Starts at CPU_FIRST and increments when a
449 ** queue is bound to a cpu.
451 static int em_last_bind_cpu = -1;
453 /* How many packets rxeof tries to clean at a time */
454 static int em_rx_process_limit = 100;
455 TUNABLE_INT("hw.em.rx_process_limit", &em_rx_process_limit);
456 SYSCTL_INT(_hw_em, OID_AUTO, rx_process_limit, CTLFLAG_RDTUN,
457 &em_rx_process_limit, 0,
458 "Maximum number of received packets to process "
459 "at a time, -1 means unlimited");
461 /* Energy efficient ethernet - default to OFF */
462 static int eee_setting = 1;
463 TUNABLE_INT("hw.em.eee_setting", &eee_setting);
464 SYSCTL_INT(_hw_em, OID_AUTO, eee_setting, CTLFLAG_RDTUN, &eee_setting, 0,
465 "Enable Energy Efficient Ethernet");
467 /* Global used in WOL setup with multiport cards */
468 static int global_quad_port_a = 0;
470 #ifdef DEV_NETMAP /* see ixgbe.c for details */
471 #include <dev/netmap/if_em_netmap.h>
472 #endif /* DEV_NETMAP */
474 /*********************************************************************
475 * Device identification routine
477 * em_probe determines if the driver should be loaded on
478 * adapter based on PCI vendor/device id of the adapter.
480 * return BUS_PROBE_DEFAULT on success, positive on failure
481 *********************************************************************/
484 em_probe(device_t dev)
486 char adapter_name[60];
487 uint16_t pci_vendor_id = 0;
488 uint16_t pci_device_id = 0;
489 uint16_t pci_subvendor_id = 0;
490 uint16_t pci_subdevice_id = 0;
491 em_vendor_info_t *ent;
493 INIT_DEBUGOUT("em_probe: begin");
495 pci_vendor_id = pci_get_vendor(dev);
496 if (pci_vendor_id != EM_VENDOR_ID)
499 pci_device_id = pci_get_device(dev);
500 pci_subvendor_id = pci_get_subvendor(dev);
501 pci_subdevice_id = pci_get_subdevice(dev);
503 ent = em_vendor_info_array;
504 while (ent->vendor_id != 0) {
505 if ((pci_vendor_id == ent->vendor_id) &&
506 (pci_device_id == ent->device_id) &&
508 ((pci_subvendor_id == ent->subvendor_id) ||
509 (ent->subvendor_id == PCI_ANY_ID)) &&
511 ((pci_subdevice_id == ent->subdevice_id) ||
512 (ent->subdevice_id == PCI_ANY_ID))) {
513 sprintf(adapter_name, "%s %s",
514 em_strings[ent->index],
516 device_set_desc_copy(dev, adapter_name);
517 return (BUS_PROBE_DEFAULT);
525 /*********************************************************************
526 * Device initialization routine
528 * The attach entry point is called when the driver is being loaded.
529 * This routine identifies the type of hardware, allocates all resources
530 * and initializes the hardware.
532 * return 0 on success, positive on failure
533 *********************************************************************/
536 em_attach(device_t dev)
538 struct adapter *adapter;
542 INIT_DEBUGOUT("em_attach: begin");
544 if (resource_disabled("em", device_get_unit(dev))) {
545 device_printf(dev, "Disabled by device hint\n");
549 adapter = device_get_softc(dev);
550 adapter->dev = adapter->osdep.dev = dev;
552 EM_CORE_LOCK_INIT(adapter, device_get_nameunit(dev));
555 SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
556 SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
557 OID_AUTO, "nvm", CTLTYPE_INT|CTLFLAG_RW, adapter, 0,
558 em_sysctl_nvm_info, "I", "NVM Information");
560 SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
561 SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
562 OID_AUTO, "debug", CTLTYPE_INT|CTLFLAG_RW, adapter, 0,
563 em_sysctl_debug_info, "I", "Debug Information");
565 SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
566 SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
567 OID_AUTO, "fc", CTLTYPE_INT|CTLFLAG_RW, adapter, 0,
568 em_set_flowcntl, "I", "Flow Control");
570 callout_init_mtx(&adapter->timer, &adapter->core_mtx, 0);
572 /* Determine hardware and mac info */
573 em_identify_hardware(adapter);
575 /* Setup PCI resources */
576 if (em_allocate_pci_resources(adapter)) {
577 device_printf(dev, "Allocation of PCI resources failed\n");
583 ** For ICH8 and family we need to
584 ** map the flash memory, and this
585 ** must happen after the MAC is
588 if ((hw->mac.type == e1000_ich8lan) ||
589 (hw->mac.type == e1000_ich9lan) ||
590 (hw->mac.type == e1000_ich10lan) ||
591 (hw->mac.type == e1000_pchlan) ||
592 (hw->mac.type == e1000_pch2lan) ||
593 (hw->mac.type == e1000_pch_lpt)) {
594 int rid = EM_BAR_TYPE_FLASH;
595 adapter->flash = bus_alloc_resource_any(dev,
596 SYS_RES_MEMORY, &rid, RF_ACTIVE);
597 if (adapter->flash == NULL) {
598 device_printf(dev, "Mapping of Flash failed\n");
602 /* This is used in the shared code */
603 hw->flash_address = (u8 *)adapter->flash;
604 adapter->osdep.flash_bus_space_tag =
605 rman_get_bustag(adapter->flash);
606 adapter->osdep.flash_bus_space_handle =
607 rman_get_bushandle(adapter->flash);
610 ** In the new SPT device flash is not a
611 ** seperate BAR, rather it is also in BAR0,
612 ** so use the same tag and an offset handle for the
613 ** FLASH read/write macros in the shared code.
615 else if (hw->mac.type >= e1000_pch_spt) {
616 adapter->osdep.flash_bus_space_tag =
617 adapter->osdep.mem_bus_space_tag;
618 adapter->osdep.flash_bus_space_handle =
619 adapter->osdep.mem_bus_space_handle
620 + E1000_FLASH_BASE_ADDR;
623 /* Do Shared Code initialization */
624 error = e1000_setup_init_funcs(hw, TRUE);
626 device_printf(dev, "Setup of Shared code failed, error %d\n",
633 * Setup MSI/X or MSI if PCI Express
635 adapter->msix = em_setup_msix(adapter);
637 e1000_get_bus_info(hw);
639 /* Set up some sysctls for the tunable interrupt delays */
640 em_add_int_delay_sysctl(adapter, "rx_int_delay",
641 "receive interrupt delay in usecs", &adapter->rx_int_delay,
642 E1000_REGISTER(hw, E1000_RDTR), em_rx_int_delay_dflt);
643 em_add_int_delay_sysctl(adapter, "tx_int_delay",
644 "transmit interrupt delay in usecs", &adapter->tx_int_delay,
645 E1000_REGISTER(hw, E1000_TIDV), em_tx_int_delay_dflt);
646 em_add_int_delay_sysctl(adapter, "rx_abs_int_delay",
647 "receive interrupt delay limit in usecs",
648 &adapter->rx_abs_int_delay,
649 E1000_REGISTER(hw, E1000_RADV),
650 em_rx_abs_int_delay_dflt);
651 em_add_int_delay_sysctl(adapter, "tx_abs_int_delay",
652 "transmit interrupt delay limit in usecs",
653 &adapter->tx_abs_int_delay,
654 E1000_REGISTER(hw, E1000_TADV),
655 em_tx_abs_int_delay_dflt);
656 em_add_int_delay_sysctl(adapter, "itr",
657 "interrupt delay limit in usecs/4",
659 E1000_REGISTER(hw, E1000_ITR),
662 /* Sysctl for limiting the amount of work done in the taskqueue */
663 em_set_sysctl_value(adapter, "rx_processing_limit",
664 "max number of rx packets to process", &adapter->rx_process_limit,
665 em_rx_process_limit);
668 * Validate number of transmit and receive descriptors. It
669 * must not exceed hardware maximum, and must be multiple
670 * of E1000_DBA_ALIGN.
672 if (((em_txd * sizeof(struct e1000_tx_desc)) % EM_DBA_ALIGN) != 0 ||
673 (em_txd > EM_MAX_TXD) || (em_txd < EM_MIN_TXD)) {
674 device_printf(dev, "Using %d TX descriptors instead of %d!\n",
675 EM_DEFAULT_TXD, em_txd);
676 adapter->num_tx_desc = EM_DEFAULT_TXD;
678 adapter->num_tx_desc = em_txd;
680 if (((em_rxd * sizeof(union e1000_rx_desc_extended)) % EM_DBA_ALIGN) != 0 ||
681 (em_rxd > EM_MAX_RXD) || (em_rxd < EM_MIN_RXD)) {
682 device_printf(dev, "Using %d RX descriptors instead of %d!\n",
683 EM_DEFAULT_RXD, em_rxd);
684 adapter->num_rx_desc = EM_DEFAULT_RXD;
686 adapter->num_rx_desc = em_rxd;
688 hw->mac.autoneg = DO_AUTO_NEG;
689 hw->phy.autoneg_wait_to_complete = FALSE;
690 hw->phy.autoneg_advertised = AUTONEG_ADV_DEFAULT;
693 if (hw->phy.media_type == e1000_media_type_copper) {
694 hw->phy.mdix = AUTO_ALL_MODES;
695 hw->phy.disable_polarity_correction = FALSE;
696 hw->phy.ms_type = EM_MASTER_SLAVE;
700 * Set the frame limits assuming
701 * standard ethernet sized frames.
703 adapter->hw.mac.max_frame_size =
704 ETHERMTU + ETHER_HDR_LEN + ETHERNET_FCS_SIZE;
707 * This controls when hardware reports transmit completion
710 hw->mac.report_tx_early = 1;
713 ** Get queue/ring memory
715 if (em_allocate_queues(adapter)) {
720 /* Allocate multicast array memory. */
721 adapter->mta = malloc(sizeof(u8) * ETH_ADDR_LEN *
722 MAX_NUM_MULTICAST_ADDRESSES, M_DEVBUF, M_NOWAIT);
723 if (adapter->mta == NULL) {
724 device_printf(dev, "Can not allocate multicast setup array\n");
729 /* Check SOL/IDER usage */
730 if (e1000_check_reset_block(hw))
731 device_printf(dev, "PHY reset is blocked"
732 " due to SOL/IDER session.\n");
734 /* Sysctl for setting Energy Efficient Ethernet */
735 hw->dev_spec.ich8lan.eee_disable = eee_setting;
736 SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
737 SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
738 OID_AUTO, "eee_control", CTLTYPE_INT|CTLFLAG_RW,
739 adapter, 0, em_sysctl_eee, "I",
740 "Disable Energy Efficient Ethernet");
743 ** Start from a known state, this is
744 ** important in reading the nvm and
750 /* Make sure we have a good EEPROM before we read from it */
751 if (e1000_validate_nvm_checksum(hw) < 0) {
753 ** Some PCI-E parts fail the first check due to
754 ** the link being in sleep state, call it again,
755 ** if it fails a second time its a real issue.
757 if (e1000_validate_nvm_checksum(hw) < 0) {
759 "The EEPROM Checksum Is Not Valid\n");
765 /* Copy the permanent MAC address out of the EEPROM */
766 if (e1000_read_mac_addr(hw) < 0) {
767 device_printf(dev, "EEPROM read error while reading MAC"
773 if (!em_is_valid_ether_addr(hw->mac.addr)) {
774 device_printf(dev, "Invalid MAC address\n");
779 /* Disable ULP support */
780 e1000_disable_ulp_lpt_lp(hw, TRUE);
783 ** Do interrupt configuration
785 if (adapter->msix > 1) /* Do MSIX */
786 error = em_allocate_msix(adapter);
787 else /* MSI or Legacy */
788 error = em_allocate_legacy(adapter);
793 * Get Wake-on-Lan and Management info for later use
797 /* Setup OS specific network interface */
798 if (em_setup_interface(dev, adapter) != 0)
803 /* Initialize statistics */
804 em_update_stats_counters(adapter);
806 hw->mac.get_link_status = 1;
807 em_update_link_status(adapter);
809 /* Register for VLAN events */
810 adapter->vlan_attach = EVENTHANDLER_REGISTER(vlan_config,
811 em_register_vlan, adapter, EVENTHANDLER_PRI_FIRST);
812 adapter->vlan_detach = EVENTHANDLER_REGISTER(vlan_unconfig,
813 em_unregister_vlan, adapter, EVENTHANDLER_PRI_FIRST);
815 em_add_hw_stats(adapter);
817 /* Non-AMT based hardware can now take control from firmware */
818 if (adapter->has_manage && !adapter->has_amt)
819 em_get_hw_control(adapter);
821 /* Tell the stack that the interface is not active */
822 adapter->ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
823 adapter->ifp->if_drv_flags |= IFF_DRV_OACTIVE;
825 adapter->led_dev = led_create(em_led_func, adapter,
826 device_get_nameunit(dev));
828 em_netmap_attach(adapter);
829 #endif /* DEV_NETMAP */
831 INIT_DEBUGOUT("em_attach: end");
836 em_free_transmit_structures(adapter);
837 em_free_receive_structures(adapter);
838 em_release_hw_control(adapter);
839 if (adapter->ifp != NULL)
840 if_free(adapter->ifp);
842 em_free_pci_resources(adapter);
843 free(adapter->mta, M_DEVBUF);
844 EM_CORE_LOCK_DESTROY(adapter);
849 /*********************************************************************
850 * Device removal routine
852 * The detach entry point is called when the driver is being removed.
853 * This routine stops the adapter and deallocates all the resources
854 * that were allocated for driver operation.
856 * return 0 on success, positive on failure
857 *********************************************************************/
860 em_detach(device_t dev)
862 struct adapter *adapter = device_get_softc(dev);
863 struct ifnet *ifp = adapter->ifp;
865 INIT_DEBUGOUT("em_detach: begin");
867 /* Make sure VLANS are not using driver */
868 if (adapter->ifp->if_vlantrunk != NULL) {
869 device_printf(dev,"Vlan in use, detach first\n");
873 #ifdef DEVICE_POLLING
874 if (ifp->if_capenable & IFCAP_POLLING)
875 ether_poll_deregister(ifp);
878 if (adapter->led_dev != NULL)
879 led_destroy(adapter->led_dev);
881 EM_CORE_LOCK(adapter);
882 adapter->in_detach = 1;
884 EM_CORE_UNLOCK(adapter);
885 EM_CORE_LOCK_DESTROY(adapter);
887 e1000_phy_hw_reset(&adapter->hw);
889 em_release_manageability(adapter);
890 em_release_hw_control(adapter);
892 /* Unregister VLAN events */
893 if (adapter->vlan_attach != NULL)
894 EVENTHANDLER_DEREGISTER(vlan_config, adapter->vlan_attach);
895 if (adapter->vlan_detach != NULL)
896 EVENTHANDLER_DEREGISTER(vlan_unconfig, adapter->vlan_detach);
898 ether_ifdetach(adapter->ifp);
899 callout_drain(&adapter->timer);
903 #endif /* DEV_NETMAP */
905 em_free_pci_resources(adapter);
906 bus_generic_detach(dev);
909 em_free_transmit_structures(adapter);
910 em_free_receive_structures(adapter);
912 em_release_hw_control(adapter);
913 free(adapter->mta, M_DEVBUF);
918 /*********************************************************************
920 * Shutdown entry point
922 **********************************************************************/
925 em_shutdown(device_t dev)
927 return em_suspend(dev);
931 * Suspend/resume device methods.
934 em_suspend(device_t dev)
936 struct adapter *adapter = device_get_softc(dev);
938 EM_CORE_LOCK(adapter);
940 em_release_manageability(adapter);
941 em_release_hw_control(adapter);
942 em_enable_wakeup(dev);
944 EM_CORE_UNLOCK(adapter);
946 return bus_generic_suspend(dev);
950 em_resume(device_t dev)
952 struct adapter *adapter = device_get_softc(dev);
953 struct tx_ring *txr = adapter->tx_rings;
954 struct ifnet *ifp = adapter->ifp;
956 EM_CORE_LOCK(adapter);
957 if (adapter->hw.mac.type == e1000_pch2lan)
958 e1000_resume_workarounds_pchlan(&adapter->hw);
959 em_init_locked(adapter);
960 em_init_manageability(adapter);
962 if ((ifp->if_flags & IFF_UP) &&
963 (ifp->if_drv_flags & IFF_DRV_RUNNING) && adapter->link_active) {
964 for (int i = 0; i < adapter->num_queues; i++, txr++) {
967 if (!drbr_empty(ifp, txr->br))
968 em_mq_start_locked(ifp, txr);
970 if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
971 em_start_locked(ifp, txr);
976 EM_CORE_UNLOCK(adapter);
978 return bus_generic_resume(dev);
982 #ifndef EM_MULTIQUEUE
984 em_start_locked(struct ifnet *ifp, struct tx_ring *txr)
986 struct adapter *adapter = ifp->if_softc;
989 EM_TX_LOCK_ASSERT(txr);
991 if ((ifp->if_drv_flags & (IFF_DRV_RUNNING|IFF_DRV_OACTIVE)) !=
995 if (!adapter->link_active)
998 while (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) {
999 /* Call cleanup if number of TX descriptors low */
1000 if (txr->tx_avail <= EM_TX_CLEANUP_THRESHOLD)
1002 if (txr->tx_avail < EM_MAX_SCATTER) {
1003 ifp->if_drv_flags |= IFF_DRV_OACTIVE;
1006 IFQ_DRV_DEQUEUE(&ifp->if_snd, m_head);
1010 * Encapsulation can modify our pointer, and or make it
1011 * NULL on failure. In that event, we can't requeue.
1013 if (em_xmit(txr, &m_head)) {
1016 IFQ_DRV_PREPEND(&ifp->if_snd, m_head);
1020 /* Mark the queue as having work */
1021 if (txr->busy == EM_TX_IDLE)
1022 txr->busy = EM_TX_BUSY;
1024 /* Send a copy of the frame to the BPF listener */
1025 ETHER_BPF_MTAP(ifp, m_head);
1033 em_start(struct ifnet *ifp)
1035 struct adapter *adapter = ifp->if_softc;
1036 struct tx_ring *txr = adapter->tx_rings;
1038 if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1040 em_start_locked(ifp, txr);
1045 #else /* EM_MULTIQUEUE */
1046 /*********************************************************************
1047 * Multiqueue Transmit routines
1049 * em_mq_start is called by the stack to initiate a transmit.
1050 * however, if busy the driver can queue the request rather
1051 * than do an immediate send. It is this that is an advantage
1052 * in this driver, rather than also having multiple tx queues.
1053 **********************************************************************/
1055 ** Multiqueue capable stack interface
1058 em_mq_start(struct ifnet *ifp, struct mbuf *m)
1060 struct adapter *adapter = ifp->if_softc;
1061 struct tx_ring *txr = adapter->tx_rings;
1062 unsigned int i, error;
1064 if (M_HASHTYPE_GET(m) != M_HASHTYPE_NONE)
1065 i = m->m_pkthdr.flowid % adapter->num_queues;
1067 i = curcpu % adapter->num_queues;
1069 txr = &adapter->tx_rings[i];
1071 error = drbr_enqueue(ifp, txr->br, m);
1075 if (EM_TX_TRYLOCK(txr)) {
1076 em_mq_start_locked(ifp, txr);
1079 taskqueue_enqueue(txr->tq, &txr->tx_task);
1085 em_mq_start_locked(struct ifnet *ifp, struct tx_ring *txr)
1087 struct adapter *adapter = txr->adapter;
1089 int err = 0, enq = 0;
1091 if ((ifp->if_drv_flags & (IFF_DRV_RUNNING | IFF_DRV_OACTIVE)) !=
1092 IFF_DRV_RUNNING || adapter->link_active == 0) {
1096 /* Process the queue */
1097 while ((next = drbr_peek(ifp, txr->br)) != NULL) {
1098 if ((err = em_xmit(txr, &next)) != 0) {
1100 /* It was freed, move forward */
1101 drbr_advance(ifp, txr->br);
1104 * Still have one left, it may not be
1105 * the same since the transmit function
1106 * may have changed it.
1108 drbr_putback(ifp, txr->br, next);
1112 drbr_advance(ifp, txr->br);
1114 ifp->if_obytes += next->m_pkthdr.len;
1115 if (next->m_flags & M_MCAST)
1117 ETHER_BPF_MTAP(ifp, next);
1118 if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
1122 /* Mark the queue as having work */
1123 if ((enq > 0) && (txr->busy == EM_TX_IDLE))
1124 txr->busy = EM_TX_BUSY;
1126 if (txr->tx_avail < EM_MAX_SCATTER)
1128 if (txr->tx_avail < EM_MAX_SCATTER) {
1129 ifp->if_drv_flags |= IFF_DRV_OACTIVE;
1135 ** Flush all ring buffers
1138 em_qflush(struct ifnet *ifp)
1140 struct adapter *adapter = ifp->if_softc;
1141 struct tx_ring *txr = adapter->tx_rings;
1144 for (int i = 0; i < adapter->num_queues; i++, txr++) {
1146 while ((m = buf_ring_dequeue_sc(txr->br)) != NULL)
1152 #endif /* EM_MULTIQUEUE */
1154 /*********************************************************************
1157 * em_ioctl is called when the user wants to configure the
1160 * return 0 on success, positive on failure
1161 **********************************************************************/
1164 em_ioctl(struct ifnet *ifp, u_long command, caddr_t data)
1166 struct adapter *adapter = ifp->if_softc;
1167 struct ifreq *ifr = (struct ifreq *)data;
1168 #if defined(INET) || defined(INET6)
1169 struct ifaddr *ifa = (struct ifaddr *)data;
1171 bool avoid_reset = FALSE;
1174 if (adapter->in_detach)
1180 if (ifa->ifa_addr->sa_family == AF_INET)
1184 if (ifa->ifa_addr->sa_family == AF_INET6)
1188 ** Calling init results in link renegotiation,
1189 ** so we avoid doing it when possible.
1192 ifp->if_flags |= IFF_UP;
1193 if (!(ifp->if_drv_flags & IFF_DRV_RUNNING))
1196 if (!(ifp->if_flags & IFF_NOARP))
1197 arp_ifinit(ifp, ifa);
1200 error = ether_ioctl(ifp, command, data);
1206 IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFMTU (Set Interface MTU)");
1208 EM_CORE_LOCK(adapter);
1209 switch (adapter->hw.mac.type) {
1213 case e1000_ich10lan:
1220 case e1000_80003es2lan: /* 9K Jumbo Frame size */
1221 max_frame_size = 9234;
1224 max_frame_size = 4096;
1226 /* Adapters that do not support jumbo frames */
1228 max_frame_size = ETHER_MAX_LEN;
1231 max_frame_size = MAX_JUMBO_FRAME_SIZE;
1233 if (ifr->ifr_mtu > max_frame_size - ETHER_HDR_LEN -
1235 EM_CORE_UNLOCK(adapter);
1240 ifp->if_mtu = ifr->ifr_mtu;
1241 adapter->hw.mac.max_frame_size =
1242 ifp->if_mtu + ETHER_HDR_LEN + ETHER_CRC_LEN;
1243 if (ifp->if_drv_flags & IFF_DRV_RUNNING)
1244 em_init_locked(adapter);
1245 EM_CORE_UNLOCK(adapter);
1249 IOCTL_DEBUGOUT("ioctl rcv'd:\
1250 SIOCSIFFLAGS (Set Interface Flags)");
1251 EM_CORE_LOCK(adapter);
1252 if (ifp->if_flags & IFF_UP) {
1253 if ((ifp->if_drv_flags & IFF_DRV_RUNNING)) {
1254 if ((ifp->if_flags ^ adapter->if_flags) &
1255 (IFF_PROMISC | IFF_ALLMULTI)) {
1256 em_disable_promisc(adapter);
1257 em_set_promisc(adapter);
1260 em_init_locked(adapter);
1262 if (ifp->if_drv_flags & IFF_DRV_RUNNING)
1264 adapter->if_flags = ifp->if_flags;
1265 EM_CORE_UNLOCK(adapter);
1269 IOCTL_DEBUGOUT("ioctl rcv'd: SIOC(ADD|DEL)MULTI");
1270 if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1271 EM_CORE_LOCK(adapter);
1272 em_disable_intr(adapter);
1273 em_set_multi(adapter);
1274 #ifdef DEVICE_POLLING
1275 if (!(ifp->if_capenable & IFCAP_POLLING))
1277 em_enable_intr(adapter);
1278 EM_CORE_UNLOCK(adapter);
1282 /* Check SOL/IDER usage */
1283 EM_CORE_LOCK(adapter);
1284 if (e1000_check_reset_block(&adapter->hw)) {
1285 EM_CORE_UNLOCK(adapter);
1286 device_printf(adapter->dev, "Media change is"
1287 " blocked due to SOL/IDER session.\n");
1290 EM_CORE_UNLOCK(adapter);
1293 IOCTL_DEBUGOUT("ioctl rcv'd: \
1294 SIOCxIFMEDIA (Get/Set Interface Media)");
1295 error = ifmedia_ioctl(ifp, ifr, &adapter->media, command);
1301 IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFCAP (Set Capabilities)");
1303 mask = ifr->ifr_reqcap ^ ifp->if_capenable;
1304 #ifdef DEVICE_POLLING
1305 if (mask & IFCAP_POLLING) {
1306 if (ifr->ifr_reqcap & IFCAP_POLLING) {
1307 error = ether_poll_register(em_poll, ifp);
1310 EM_CORE_LOCK(adapter);
1311 em_disable_intr(adapter);
1312 ifp->if_capenable |= IFCAP_POLLING;
1313 EM_CORE_UNLOCK(adapter);
1315 error = ether_poll_deregister(ifp);
1316 /* Enable interrupt even in error case */
1317 EM_CORE_LOCK(adapter);
1318 em_enable_intr(adapter);
1319 ifp->if_capenable &= ~IFCAP_POLLING;
1320 EM_CORE_UNLOCK(adapter);
1324 if (mask & IFCAP_HWCSUM) {
1325 ifp->if_capenable ^= IFCAP_HWCSUM;
1328 if (mask & IFCAP_TSO4) {
1329 ifp->if_capenable ^= IFCAP_TSO4;
1332 if (mask & IFCAP_VLAN_HWTAGGING) {
1333 ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING;
1336 if (mask & IFCAP_VLAN_HWFILTER) {
1337 ifp->if_capenable ^= IFCAP_VLAN_HWFILTER;
1340 if (mask & IFCAP_VLAN_HWTSO) {
1341 ifp->if_capenable ^= IFCAP_VLAN_HWTSO;
1344 if ((mask & IFCAP_WOL) &&
1345 (ifp->if_capabilities & IFCAP_WOL) != 0) {
1346 if (mask & IFCAP_WOL_MCAST)
1347 ifp->if_capenable ^= IFCAP_WOL_MCAST;
1348 if (mask & IFCAP_WOL_MAGIC)
1349 ifp->if_capenable ^= IFCAP_WOL_MAGIC;
1351 if (reinit && (ifp->if_drv_flags & IFF_DRV_RUNNING))
1353 VLAN_CAPABILITIES(ifp);
1358 error = ether_ioctl(ifp, command, data);
1366 /*********************************************************************
1369 * This routine is used in two ways. It is used by the stack as
1370 * init entry point in network interface structure. It is also used
1371 * by the driver as a hw/sw initialization routine to get to a
1374 * return 0 on success, positive on failure
1375 **********************************************************************/
1378 em_init_locked(struct adapter *adapter)
1380 struct ifnet *ifp = adapter->ifp;
1381 device_t dev = adapter->dev;
1383 INIT_DEBUGOUT("em_init: begin");
1385 EM_CORE_LOCK_ASSERT(adapter);
1387 em_disable_intr(adapter);
1388 callout_stop(&adapter->timer);
1390 /* Get the latest mac address, User can use a LAA */
1391 bcopy(IF_LLADDR(adapter->ifp), adapter->hw.mac.addr,
1394 /* Put the address into the Receive Address Array */
1395 e1000_rar_set(&adapter->hw, adapter->hw.mac.addr, 0);
1398 * With the 82571 adapter, RAR[0] may be overwritten
1399 * when the other port is reset, we make a duplicate
1400 * in RAR[14] for that eventuality, this assures
1401 * the interface continues to function.
1403 if (adapter->hw.mac.type == e1000_82571) {
1404 e1000_set_laa_state_82571(&adapter->hw, TRUE);
1405 e1000_rar_set(&adapter->hw, adapter->hw.mac.addr,
1406 E1000_RAR_ENTRIES - 1);
1409 /* Initialize the hardware */
1411 em_update_link_status(adapter);
1413 /* Setup VLAN support, basic and offload if available */
1414 E1000_WRITE_REG(&adapter->hw, E1000_VET, ETHERTYPE_VLAN);
1416 /* Set hardware offload abilities */
1417 if (ifp->if_capenable & IFCAP_TXCSUM)
1418 ifp->if_hwassist |= (CSUM_TCP | CSUM_UDP);
1420 ifp->if_hwassist &= ~(CSUM_TCP | CSUM_UDP);
1422 /* Configure for OS presence */
1423 em_init_manageability(adapter);
1425 /* Prepare transmit descriptors and buffers */
1426 em_setup_transmit_structures(adapter);
1427 em_initialize_transmit_unit(adapter);
1429 /* Setup Multicast table */
1430 em_set_multi(adapter);
1433 ** Figure out the desired mbuf
1434 ** pool for doing jumbos
1436 if (adapter->hw.mac.max_frame_size <= 2048)
1437 adapter->rx_mbuf_sz = MCLBYTES;
1438 #ifndef CONTIGMALLOC_WORKS
1440 adapter->rx_mbuf_sz = MJUMPAGESIZE;
1442 else if (adapter->hw.mac.max_frame_size <= 4096)
1443 adapter->rx_mbuf_sz = MJUMPAGESIZE;
1445 adapter->rx_mbuf_sz = MJUM9BYTES;
1448 /* Prepare receive descriptors and buffers */
1449 if (em_setup_receive_structures(adapter)) {
1450 device_printf(dev, "Could not setup receive structures\n");
1454 em_initialize_receive_unit(adapter);
1456 /* Use real VLAN Filter support? */
1457 if (ifp->if_capenable & IFCAP_VLAN_HWTAGGING) {
1458 if (ifp->if_capenable & IFCAP_VLAN_HWFILTER)
1459 /* Use real VLAN Filter support */
1460 em_setup_vlan_hw_support(adapter);
1463 ctrl = E1000_READ_REG(&adapter->hw, E1000_CTRL);
1464 ctrl |= E1000_CTRL_VME;
1465 E1000_WRITE_REG(&adapter->hw, E1000_CTRL, ctrl);
1469 /* Don't lose promiscuous settings */
1470 em_set_promisc(adapter);
1472 /* Set the interface as ACTIVE */
1473 ifp->if_drv_flags |= IFF_DRV_RUNNING;
1474 ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
1476 callout_reset(&adapter->timer, hz, em_local_timer, adapter);
1477 e1000_clear_hw_cntrs_base_generic(&adapter->hw);
1479 /* MSI/X configuration for 82574 */
1480 if (adapter->hw.mac.type == e1000_82574) {
1482 tmp = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
1483 tmp |= E1000_CTRL_EXT_PBA_CLR;
1484 E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT, tmp);
1485 /* Set the IVAR - interrupt vector routing. */
1486 E1000_WRITE_REG(&adapter->hw, E1000_IVAR, adapter->ivars);
1489 #ifdef DEVICE_POLLING
1491 * Only enable interrupts if we are not polling, make sure
1492 * they are off otherwise.
1494 if (ifp->if_capenable & IFCAP_POLLING)
1495 em_disable_intr(adapter);
1497 #endif /* DEVICE_POLLING */
1498 em_enable_intr(adapter);
1500 /* AMT based hardware can now take control from firmware */
1501 if (adapter->has_manage && adapter->has_amt)
1502 em_get_hw_control(adapter);
1508 struct adapter *adapter = arg;
1510 EM_CORE_LOCK(adapter);
1511 em_init_locked(adapter);
1512 EM_CORE_UNLOCK(adapter);
1516 #ifdef DEVICE_POLLING
1517 /*********************************************************************
1519 * Legacy polling routine: note this only works with single queue
1521 *********************************************************************/
1523 em_poll(struct ifnet *ifp, enum poll_cmd cmd, int count)
1525 struct adapter *adapter = ifp->if_softc;
1526 struct tx_ring *txr = adapter->tx_rings;
1527 struct rx_ring *rxr = adapter->rx_rings;
1531 EM_CORE_LOCK(adapter);
1532 if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) {
1533 EM_CORE_UNLOCK(adapter);
1537 if (cmd == POLL_AND_CHECK_STATUS) {
1538 reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1539 if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
1540 callout_stop(&adapter->timer);
1541 adapter->hw.mac.get_link_status = 1;
1542 em_update_link_status(adapter);
1543 callout_reset(&adapter->timer, hz,
1544 em_local_timer, adapter);
1547 EM_CORE_UNLOCK(adapter);
1549 em_rxeof(rxr, count, &rx_done);
1553 #ifdef EM_MULTIQUEUE
1554 if (!drbr_empty(ifp, txr->br))
1555 em_mq_start_locked(ifp, txr);
1557 if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1558 em_start_locked(ifp, txr);
1564 #endif /* DEVICE_POLLING */
1567 /*********************************************************************
1569 * Fast Legacy/MSI Combined Interrupt Service routine
1571 *********************************************************************/
1573 em_irq_fast(void *arg)
1575 struct adapter *adapter = arg;
1581 reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1584 if (reg_icr == 0xffffffff)
1585 return FILTER_STRAY;
1587 /* Definitely not our interrupt. */
1589 return FILTER_STRAY;
1592 * Starting with the 82571 chip, bit 31 should be used to
1593 * determine whether the interrupt belongs to us.
1595 if (adapter->hw.mac.type >= e1000_82571 &&
1596 (reg_icr & E1000_ICR_INT_ASSERTED) == 0)
1597 return FILTER_STRAY;
1599 em_disable_intr(adapter);
1600 taskqueue_enqueue(adapter->tq, &adapter->que_task);
1602 /* Link status change */
1603 if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
1604 adapter->hw.mac.get_link_status = 1;
1605 taskqueue_enqueue(taskqueue_fast, &adapter->link_task);
1608 if (reg_icr & E1000_ICR_RXO)
1609 adapter->rx_overruns++;
1610 return FILTER_HANDLED;
1613 /* Combined RX/TX handler, used by Legacy and MSI */
1615 em_handle_que(void *context, int pending)
1617 struct adapter *adapter = context;
1618 struct ifnet *ifp = adapter->ifp;
1619 struct tx_ring *txr = adapter->tx_rings;
1620 struct rx_ring *rxr = adapter->rx_rings;
1622 if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1623 bool more = em_rxeof(rxr, adapter->rx_process_limit, NULL);
1627 #ifdef EM_MULTIQUEUE
1628 if (!drbr_empty(ifp, txr->br))
1629 em_mq_start_locked(ifp, txr);
1631 if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1632 em_start_locked(ifp, txr);
1636 taskqueue_enqueue(adapter->tq, &adapter->que_task);
1641 em_enable_intr(adapter);
1646 /*********************************************************************
1648 * MSIX Interrupt Service Routines
1650 **********************************************************************/
1652 em_msix_tx(void *arg)
1654 struct tx_ring *txr = arg;
1655 struct adapter *adapter = txr->adapter;
1656 struct ifnet *ifp = adapter->ifp;
1661 #ifdef EM_MULTIQUEUE
1662 if (!drbr_empty(ifp, txr->br))
1663 em_mq_start_locked(ifp, txr);
1665 if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1666 em_start_locked(ifp, txr);
1669 /* Reenable this interrupt */
1670 E1000_WRITE_REG(&adapter->hw, E1000_IMS, txr->ims);
1675 /*********************************************************************
1677 * MSIX RX Interrupt Service routine
1679 **********************************************************************/
1682 em_msix_rx(void *arg)
1684 struct rx_ring *rxr = arg;
1685 struct adapter *adapter = rxr->adapter;
1689 if (!(adapter->ifp->if_drv_flags & IFF_DRV_RUNNING))
1691 more = em_rxeof(rxr, adapter->rx_process_limit, NULL);
1693 taskqueue_enqueue(rxr->tq, &rxr->rx_task);
1695 /* Reenable this interrupt */
1696 E1000_WRITE_REG(&adapter->hw, E1000_IMS, rxr->ims);
1701 /*********************************************************************
1703 * MSIX Link Fast Interrupt Service routine
1705 **********************************************************************/
1707 em_msix_link(void *arg)
1709 struct adapter *adapter = arg;
1712 ++adapter->link_irq;
1713 reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1715 if (reg_icr & E1000_ICR_RXO)
1716 adapter->rx_overruns++;
1718 if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
1719 adapter->hw.mac.get_link_status = 1;
1720 em_handle_link(adapter, 0);
1722 E1000_WRITE_REG(&adapter->hw, E1000_IMS,
1723 EM_MSIX_LINK | E1000_IMS_LSC);
1725 ** Because we must read the ICR for this interrupt
1726 ** it may clear other causes using autoclear, for
1727 ** this reason we simply create a soft interrupt
1728 ** for all these vectors.
1731 E1000_WRITE_REG(&adapter->hw,
1732 E1000_ICS, adapter->ims);
1738 em_handle_rx(void *context, int pending)
1740 struct rx_ring *rxr = context;
1741 struct adapter *adapter = rxr->adapter;
1744 more = em_rxeof(rxr, adapter->rx_process_limit, NULL);
1746 taskqueue_enqueue(rxr->tq, &rxr->rx_task);
1748 /* Reenable this interrupt */
1749 E1000_WRITE_REG(&adapter->hw, E1000_IMS, rxr->ims);
1754 em_handle_tx(void *context, int pending)
1756 struct tx_ring *txr = context;
1757 struct adapter *adapter = txr->adapter;
1758 struct ifnet *ifp = adapter->ifp;
1762 #ifdef EM_MULTIQUEUE
1763 if (!drbr_empty(ifp, txr->br))
1764 em_mq_start_locked(ifp, txr);
1766 if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1767 em_start_locked(ifp, txr);
1769 E1000_WRITE_REG(&adapter->hw, E1000_IMS, txr->ims);
1774 em_handle_link(void *context, int pending)
1776 struct adapter *adapter = context;
1777 struct tx_ring *txr = adapter->tx_rings;
1778 struct ifnet *ifp = adapter->ifp;
1780 if (!(ifp->if_drv_flags & IFF_DRV_RUNNING))
1783 EM_CORE_LOCK(adapter);
1784 callout_stop(&adapter->timer);
1785 em_update_link_status(adapter);
1786 callout_reset(&adapter->timer, hz, em_local_timer, adapter);
1787 E1000_WRITE_REG(&adapter->hw, E1000_IMS,
1788 EM_MSIX_LINK | E1000_IMS_LSC);
1789 if (adapter->link_active) {
1790 for (int i = 0; i < adapter->num_queues; i++, txr++) {
1792 #ifdef EM_MULTIQUEUE
1793 if (!drbr_empty(ifp, txr->br))
1794 em_mq_start_locked(ifp, txr);
1796 if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1797 em_start_locked(ifp, txr);
1802 EM_CORE_UNLOCK(adapter);
1806 /*********************************************************************
1808 * Media Ioctl callback
1810 * This routine is called whenever the user queries the status of
1811 * the interface using ifconfig.
1813 **********************************************************************/
1815 em_media_status(struct ifnet *ifp, struct ifmediareq *ifmr)
1817 struct adapter *adapter = ifp->if_softc;
1818 u_char fiber_type = IFM_1000_SX;
1820 INIT_DEBUGOUT("em_media_status: begin");
1822 EM_CORE_LOCK(adapter);
1823 em_update_link_status(adapter);
1825 ifmr->ifm_status = IFM_AVALID;
1826 ifmr->ifm_active = IFM_ETHER;
1828 if (!adapter->link_active) {
1829 EM_CORE_UNLOCK(adapter);
1833 ifmr->ifm_status |= IFM_ACTIVE;
1835 if ((adapter->hw.phy.media_type == e1000_media_type_fiber) ||
1836 (adapter->hw.phy.media_type == e1000_media_type_internal_serdes)) {
1837 ifmr->ifm_active |= fiber_type | IFM_FDX;
1839 switch (adapter->link_speed) {
1841 ifmr->ifm_active |= IFM_10_T;
1844 ifmr->ifm_active |= IFM_100_TX;
1847 ifmr->ifm_active |= IFM_1000_T;
1850 if (adapter->link_duplex == FULL_DUPLEX)
1851 ifmr->ifm_active |= IFM_FDX;
1853 ifmr->ifm_active |= IFM_HDX;
1855 EM_CORE_UNLOCK(adapter);
1858 /*********************************************************************
1860 * Media Ioctl callback
1862 * This routine is called when the user changes speed/duplex using
1863 * media/mediopt option with ifconfig.
1865 **********************************************************************/
1867 em_media_change(struct ifnet *ifp)
1869 struct adapter *adapter = ifp->if_softc;
1870 struct ifmedia *ifm = &adapter->media;
1872 INIT_DEBUGOUT("em_media_change: begin");
1874 if (IFM_TYPE(ifm->ifm_media) != IFM_ETHER)
1877 EM_CORE_LOCK(adapter);
1878 switch (IFM_SUBTYPE(ifm->ifm_media)) {
1880 adapter->hw.mac.autoneg = DO_AUTO_NEG;
1881 adapter->hw.phy.autoneg_advertised = AUTONEG_ADV_DEFAULT;
1886 adapter->hw.mac.autoneg = DO_AUTO_NEG;
1887 adapter->hw.phy.autoneg_advertised = ADVERTISE_1000_FULL;
1890 adapter->hw.mac.autoneg = FALSE;
1891 adapter->hw.phy.autoneg_advertised = 0;
1892 if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX)
1893 adapter->hw.mac.forced_speed_duplex = ADVERTISE_100_FULL;
1895 adapter->hw.mac.forced_speed_duplex = ADVERTISE_100_HALF;
1898 adapter->hw.mac.autoneg = FALSE;
1899 adapter->hw.phy.autoneg_advertised = 0;
1900 if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX)
1901 adapter->hw.mac.forced_speed_duplex = ADVERTISE_10_FULL;
1903 adapter->hw.mac.forced_speed_duplex = ADVERTISE_10_HALF;
1906 device_printf(adapter->dev, "Unsupported media type\n");
1909 em_init_locked(adapter);
1910 EM_CORE_UNLOCK(adapter);
1915 /*********************************************************************
1917 * This routine maps the mbufs to tx descriptors.
1919 * return 0 on success, positive on failure
1920 **********************************************************************/
1923 em_xmit(struct tx_ring *txr, struct mbuf **m_headp)
1925 struct adapter *adapter = txr->adapter;
1926 bus_dma_segment_t segs[EM_MAX_SCATTER];
1928 struct em_txbuffer *tx_buffer, *tx_buffer_mapped;
1929 struct e1000_tx_desc *ctxd = NULL;
1930 struct mbuf *m_head;
1931 struct ether_header *eh;
1932 struct ip *ip = NULL;
1933 struct tcphdr *tp = NULL;
1934 u32 txd_upper = 0, txd_lower = 0;
1936 int nsegs, i, j, first, last = 0;
1938 bool do_tso, tso_desc, remap = TRUE;
1941 do_tso = m_head->m_pkthdr.csum_flags & CSUM_IP_TSO;
1946 * Intel recommends entire IP/TCP header length reside in a single
1947 * buffer. If multiple descriptors are used to describe the IP and
1948 * TCP header, each descriptor should describe one or more
1949 * complete headers; descriptors referencing only parts of headers
1950 * are not supported. If all layer headers are not coalesced into
1951 * a single buffer, each buffer should not cross a 4KB boundary,
1952 * or be larger than the maximum read request size.
1953 * Controller also requires modifing IP/TCP header to make TSO work
1954 * so we firstly get a writable mbuf chain then coalesce ethernet/
1955 * IP/TCP header into a single buffer to meet the requirement of
1956 * controller. This also simplifies IP/TCP/UDP checksum offloading
1957 * which also has similiar restrictions.
1959 if (do_tso || m_head->m_pkthdr.csum_flags & CSUM_OFFLOAD) {
1960 if (do_tso || (m_head->m_next != NULL &&
1961 m_head->m_pkthdr.csum_flags & CSUM_OFFLOAD)) {
1962 if (M_WRITABLE(*m_headp) == 0) {
1963 m_head = m_dup(*m_headp, M_NOWAIT);
1965 if (m_head == NULL) {
1974 * Assume IPv4, we don't have TSO/checksum offload support
1977 ip_off = sizeof(struct ether_header);
1978 if (m_head->m_len < ip_off) {
1979 m_head = m_pullup(m_head, ip_off);
1980 if (m_head == NULL) {
1985 eh = mtod(m_head, struct ether_header *);
1986 if (eh->ether_type == htons(ETHERTYPE_VLAN)) {
1987 ip_off = sizeof(struct ether_vlan_header);
1988 if (m_head->m_len < ip_off) {
1989 m_head = m_pullup(m_head, ip_off);
1990 if (m_head == NULL) {
1996 if (m_head->m_len < ip_off + sizeof(struct ip)) {
1997 m_head = m_pullup(m_head, ip_off + sizeof(struct ip));
1998 if (m_head == NULL) {
2003 ip = (struct ip *)(mtod(m_head, char *) + ip_off);
2004 poff = ip_off + (ip->ip_hl << 2);
2006 if (do_tso || (m_head->m_pkthdr.csum_flags & CSUM_TCP)) {
2007 if (m_head->m_len < poff + sizeof(struct tcphdr)) {
2008 m_head = m_pullup(m_head, poff +
2009 sizeof(struct tcphdr));
2010 if (m_head == NULL) {
2015 tp = (struct tcphdr *)(mtod(m_head, char *) + poff);
2018 * pull 4 more bytes of data into it.
2020 if (m_head->m_len < poff + (tp->th_off << 2)) {
2021 m_head = m_pullup(m_head, poff +
2024 if (m_head == NULL) {
2029 ip = (struct ip *)(mtod(m_head, char *) + ip_off);
2030 tp = (struct tcphdr *)(mtod(m_head, char *) + poff);
2032 ip->ip_len = htons(m_head->m_pkthdr.tso_segsz +
2037 * The pseudo TCP checksum does not include TCP
2038 * payload length so driver should recompute
2039 * the checksum here what hardware expect to
2040 * see. This is adherence of Microsoft's Large
2041 * Send specification.
2043 tp->th_sum = in_pseudo(ip->ip_src.s_addr,
2044 ip->ip_dst.s_addr, htons(IPPROTO_TCP));
2046 } else if (m_head->m_pkthdr.csum_flags & CSUM_UDP) {
2047 if (m_head->m_len < poff + sizeof(struct udphdr)) {
2048 m_head = m_pullup(m_head, poff +
2049 sizeof(struct udphdr));
2050 if (m_head == NULL) {
2055 ip = (struct ip *)(mtod(m_head, char *) + ip_off);
2061 * Map the packet for DMA
2063 * Capture the first descriptor index,
2064 * this descriptor will have the index
2065 * of the EOP which is the only one that
2066 * now gets a DONE bit writeback.
2068 first = txr->next_avail_desc;
2069 tx_buffer = &txr->tx_buffers[first];
2070 tx_buffer_mapped = tx_buffer;
2071 map = tx_buffer->map;
2074 error = bus_dmamap_load_mbuf_sg(txr->txtag, map,
2075 *m_headp, segs, &nsegs, BUS_DMA_NOWAIT);
2078 * There are two types of errors we can (try) to handle:
2079 * - EFBIG means the mbuf chain was too long and bus_dma ran
2080 * out of segments. Defragment the mbuf chain and try again.
2081 * - ENOMEM means bus_dma could not obtain enough bounce buffers
2082 * at this point in time. Defer sending and try again later.
2083 * All other errors, in particular EINVAL, are fatal and prevent the
2084 * mbuf chain from ever going through. Drop it and report error.
2086 if (error == EFBIG && remap) {
2089 m = m_collapse(*m_headp, M_NOWAIT, EM_MAX_SCATTER);
2091 adapter->mbuf_defrag_failed++;
2098 /* Try it again, but only once */
2101 } else if (error != 0) {
2102 adapter->no_tx_dma_setup++;
2109 * TSO Hardware workaround, if this packet is not
2110 * TSO, and is only a single descriptor long, and
2111 * it follows a TSO burst, then we need to add a
2112 * sentinel descriptor to prevent premature writeback.
2114 if ((!do_tso) && (txr->tx_tso == TRUE)) {
2117 txr->tx_tso = FALSE;
2120 if (txr->tx_avail < (nsegs + EM_MAX_SCATTER)) {
2121 txr->no_desc_avail++;
2122 bus_dmamap_unload(txr->txtag, map);
2127 /* Do hardware assists */
2128 if (m_head->m_pkthdr.csum_flags & CSUM_IP_TSO) {
2129 em_tso_setup(txr, m_head, ip_off, ip, tp,
2130 &txd_upper, &txd_lower);
2131 /* we need to make a final sentinel transmit desc */
2133 } else if (m_head->m_pkthdr.csum_flags & CSUM_OFFLOAD)
2134 em_transmit_checksum_setup(txr, m_head,
2135 ip_off, ip, &txd_upper, &txd_lower);
2137 if (m_head->m_flags & M_VLANTAG) {
2138 /* Set the vlan id. */
2140 (htole16(m_head->m_pkthdr.ether_vtag) << 16);
2141 /* Tell hardware to add tag */
2142 txd_lower |= htole32(E1000_TXD_CMD_VLE);
2145 i = txr->next_avail_desc;
2147 /* Set up our transmit descriptors */
2148 for (j = 0; j < nsegs; j++) {
2150 bus_addr_t seg_addr;
2152 tx_buffer = &txr->tx_buffers[i];
2153 ctxd = &txr->tx_base[i];
2154 seg_addr = segs[j].ds_addr;
2155 seg_len = segs[j].ds_len;
2158 ** If this is the last descriptor, we want to
2159 ** split it so we have a small final sentinel
2161 if (tso_desc && (j == (nsegs - 1)) && (seg_len > 8)) {
2162 seg_len -= TSO_WORKAROUND;
2163 ctxd->buffer_addr = htole64(seg_addr);
2164 ctxd->lower.data = htole32(
2165 adapter->txd_cmd | txd_lower | seg_len);
2166 ctxd->upper.data = htole32(txd_upper);
2167 if (++i == adapter->num_tx_desc)
2170 /* Now make the sentinel */
2172 ctxd = &txr->tx_base[i];
2173 tx_buffer = &txr->tx_buffers[i];
2175 htole64(seg_addr + seg_len);
2176 ctxd->lower.data = htole32(
2177 adapter->txd_cmd | txd_lower | TSO_WORKAROUND);
2181 if (++i == adapter->num_tx_desc)
2184 ctxd->buffer_addr = htole64(seg_addr);
2185 ctxd->lower.data = htole32(
2186 adapter->txd_cmd | txd_lower | seg_len);
2187 ctxd->upper.data = htole32(txd_upper);
2189 if (++i == adapter->num_tx_desc)
2192 tx_buffer->m_head = NULL;
2193 tx_buffer->next_eop = -1;
2196 txr->next_avail_desc = i;
2197 txr->tx_avail -= nsegs;
2199 tx_buffer->m_head = m_head;
2201 ** Here we swap the map so the last descriptor,
2202 ** which gets the completion interrupt has the
2203 ** real map, and the first descriptor gets the
2204 ** unused map from this descriptor.
2206 tx_buffer_mapped->map = tx_buffer->map;
2207 tx_buffer->map = map;
2208 bus_dmamap_sync(txr->txtag, map, BUS_DMASYNC_PREWRITE);
2211 * Last Descriptor of Packet
2212 * needs End Of Packet (EOP)
2213 * and Report Status (RS)
2216 htole32(E1000_TXD_CMD_EOP | E1000_TXD_CMD_RS);
2218 * Keep track in the first buffer which
2219 * descriptor will be written back
2221 tx_buffer = &txr->tx_buffers[first];
2222 tx_buffer->next_eop = last;
2225 * Advance the Transmit Descriptor Tail (TDT), this tells the E1000
2226 * that this frame is available to transmit.
2228 bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
2229 BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
2230 E1000_WRITE_REG(&adapter->hw, E1000_TDT(txr->me), i);
2236 em_set_promisc(struct adapter *adapter)
2238 struct ifnet *ifp = adapter->ifp;
2241 reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2243 if (ifp->if_flags & IFF_PROMISC) {
2244 reg_rctl |= (E1000_RCTL_UPE | E1000_RCTL_MPE);
2245 /* Turn this on if you want to see bad packets */
2247 reg_rctl |= E1000_RCTL_SBP;
2248 E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2249 } else if (ifp->if_flags & IFF_ALLMULTI) {
2250 reg_rctl |= E1000_RCTL_MPE;
2251 reg_rctl &= ~E1000_RCTL_UPE;
2252 E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2257 em_disable_promisc(struct adapter *adapter)
2259 struct ifnet *ifp = adapter->ifp;
2263 reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2264 reg_rctl &= (~E1000_RCTL_UPE);
2265 if (ifp->if_flags & IFF_ALLMULTI)
2266 mcnt = MAX_NUM_MULTICAST_ADDRESSES;
2268 struct ifmultiaddr *ifma;
2269 #if __FreeBSD_version < 800000
2272 if_maddr_rlock(ifp);
2274 TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
2275 if (ifma->ifma_addr->sa_family != AF_LINK)
2277 if (mcnt == MAX_NUM_MULTICAST_ADDRESSES)
2281 #if __FreeBSD_version < 800000
2282 IF_ADDR_UNLOCK(ifp);
2284 if_maddr_runlock(ifp);
2287 /* Don't disable if in MAX groups */
2288 if (mcnt < MAX_NUM_MULTICAST_ADDRESSES)
2289 reg_rctl &= (~E1000_RCTL_MPE);
2290 reg_rctl &= (~E1000_RCTL_SBP);
2291 E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2295 /*********************************************************************
2298 * This routine is called whenever multicast address list is updated.
2300 **********************************************************************/
2303 em_set_multi(struct adapter *adapter)
2305 struct ifnet *ifp = adapter->ifp;
2306 struct ifmultiaddr *ifma;
2308 u8 *mta; /* Multicast array memory */
2311 IOCTL_DEBUGOUT("em_set_multi: begin");
2314 bzero(mta, sizeof(u8) * ETH_ADDR_LEN * MAX_NUM_MULTICAST_ADDRESSES);
2316 if (adapter->hw.mac.type == e1000_82542 &&
2317 adapter->hw.revision_id == E1000_REVISION_2) {
2318 reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2319 if (adapter->hw.bus.pci_cmd_word & CMD_MEM_WRT_INVALIDATE)
2320 e1000_pci_clear_mwi(&adapter->hw);
2321 reg_rctl |= E1000_RCTL_RST;
2322 E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2326 #if __FreeBSD_version < 800000
2329 if_maddr_rlock(ifp);
2331 TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
2332 if (ifma->ifma_addr->sa_family != AF_LINK)
2335 if (mcnt == MAX_NUM_MULTICAST_ADDRESSES)
2338 bcopy(LLADDR((struct sockaddr_dl *)ifma->ifma_addr),
2339 &mta[mcnt * ETH_ADDR_LEN], ETH_ADDR_LEN);
2342 #if __FreeBSD_version < 800000
2343 IF_ADDR_UNLOCK(ifp);
2345 if_maddr_runlock(ifp);
2347 if (mcnt >= MAX_NUM_MULTICAST_ADDRESSES) {
2348 reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2349 reg_rctl |= E1000_RCTL_MPE;
2350 E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2352 e1000_update_mc_addr_list(&adapter->hw, mta, mcnt);
2354 if (adapter->hw.mac.type == e1000_82542 &&
2355 adapter->hw.revision_id == E1000_REVISION_2) {
2356 reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2357 reg_rctl &= ~E1000_RCTL_RST;
2358 E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2360 if (adapter->hw.bus.pci_cmd_word & CMD_MEM_WRT_INVALIDATE)
2361 e1000_pci_set_mwi(&adapter->hw);
2366 /*********************************************************************
2369 * This routine checks for link status and updates statistics.
2371 **********************************************************************/
2374 em_local_timer(void *arg)
2376 struct adapter *adapter = arg;
2377 struct ifnet *ifp = adapter->ifp;
2378 struct tx_ring *txr = adapter->tx_rings;
2379 struct rx_ring *rxr = adapter->rx_rings;
2382 EM_CORE_LOCK_ASSERT(adapter);
2384 em_update_link_status(adapter);
2385 em_update_stats_counters(adapter);
2387 /* Reset LAA into RAR[0] on 82571 */
2388 if ((adapter->hw.mac.type == e1000_82571) &&
2389 e1000_get_laa_state_82571(&adapter->hw))
2390 e1000_rar_set(&adapter->hw, adapter->hw.mac.addr, 0);
2392 /* Mask to use in the irq trigger */
2393 if (adapter->msix_mem) {
2394 for (int i = 0; i < adapter->num_queues; i++, rxr++)
2395 trigger |= rxr->ims;
2396 rxr = adapter->rx_rings;
2398 trigger = E1000_ICS_RXDMT0;
2401 ** Check on the state of the TX queue(s), this
2402 ** can be done without the lock because its RO
2403 ** and the HUNG state will be static if set.
2405 for (int i = 0; i < adapter->num_queues; i++, txr++) {
2406 if (txr->busy == EM_TX_HUNG)
2408 if (txr->busy >= EM_TX_MAXTRIES)
2409 txr->busy = EM_TX_HUNG;
2410 /* Schedule a TX tasklet if needed */
2411 if (txr->tx_avail <= EM_MAX_SCATTER)
2412 taskqueue_enqueue(txr->tq, &txr->tx_task);
2415 callout_reset(&adapter->timer, hz, em_local_timer, adapter);
2416 #ifndef DEVICE_POLLING
2417 /* Trigger an RX interrupt to guarantee mbuf refresh */
2418 E1000_WRITE_REG(&adapter->hw, E1000_ICS, trigger);
2422 /* Looks like we're hung */
2423 device_printf(adapter->dev, "Watchdog timeout Queue[%d]-- resetting\n",
2425 em_print_debug_info(adapter);
2426 ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
2427 adapter->watchdog_events++;
2428 em_init_locked(adapter);
2433 em_update_link_status(struct adapter *adapter)
2435 struct e1000_hw *hw = &adapter->hw;
2436 struct ifnet *ifp = adapter->ifp;
2437 device_t dev = adapter->dev;
2438 struct tx_ring *txr = adapter->tx_rings;
2441 /* Get the cached link value or read phy for real */
2442 switch (hw->phy.media_type) {
2443 case e1000_media_type_copper:
2444 if (hw->mac.get_link_status) {
2445 if (hw->mac.type == e1000_pch_spt)
2447 /* Do the work to read phy */
2448 e1000_check_for_link(hw);
2449 link_check = !hw->mac.get_link_status;
2450 if (link_check) /* ESB2 fix */
2451 e1000_cfg_on_link_up(hw);
2455 case e1000_media_type_fiber:
2456 e1000_check_for_link(hw);
2457 link_check = (E1000_READ_REG(hw, E1000_STATUS) &
2460 case e1000_media_type_internal_serdes:
2461 e1000_check_for_link(hw);
2462 link_check = adapter->hw.mac.serdes_has_link;
2465 case e1000_media_type_unknown:
2469 /* Now check for a transition */
2470 if (link_check && (adapter->link_active == 0)) {
2471 e1000_get_speed_and_duplex(hw, &adapter->link_speed,
2472 &adapter->link_duplex);
2475 ** There have proven to be problems with TSO when not at full
2476 ** gigabit speed, so disable the assist automatically when at
2477 ** lower speeds. -jfv
2479 if (ifp->if_capenable & IFCAP_TSO4) {
2480 if (adapter->link_speed == SPEED_1000)
2481 ifp->if_hwassist |= CSUM_IP_TSO;
2483 ifp->if_hwassist &= ~CSUM_IP_TSO;
2486 /* Check if we must disable SPEED_MODE bit on PCI-E */
2487 if ((adapter->link_speed != SPEED_1000) &&
2488 ((hw->mac.type == e1000_82571) ||
2489 (hw->mac.type == e1000_82572))) {
2491 tarc0 = E1000_READ_REG(hw, E1000_TARC(0));
2492 tarc0 &= ~TARC_SPEED_MODE_BIT;
2493 E1000_WRITE_REG(hw, E1000_TARC(0), tarc0);
2496 device_printf(dev, "Link is up %d Mbps %s\n",
2497 adapter->link_speed,
2498 ((adapter->link_duplex == FULL_DUPLEX) ?
2499 "Full Duplex" : "Half Duplex"));
2500 adapter->link_active = 1;
2501 adapter->smartspeed = 0;
2502 ifp->if_baudrate = adapter->link_speed * 1000000;
2503 if_link_state_change(ifp, LINK_STATE_UP);
2504 } else if (!link_check && (adapter->link_active == 1)) {
2505 ifp->if_baudrate = adapter->link_speed = 0;
2506 adapter->link_duplex = 0;
2508 device_printf(dev, "Link is Down\n");
2509 adapter->link_active = 0;
2510 /* Link down, disable hang detection */
2511 for (int i = 0; i < adapter->num_queues; i++, txr++)
2512 txr->busy = EM_TX_IDLE;
2513 if_link_state_change(ifp, LINK_STATE_DOWN);
2517 /*********************************************************************
2519 * This routine disables all traffic on the adapter by issuing a
2520 * global reset on the MAC and deallocates TX/RX buffers.
2522 * This routine should always be called with BOTH the CORE
2524 **********************************************************************/
2529 struct adapter *adapter = arg;
2530 struct ifnet *ifp = adapter->ifp;
2531 struct tx_ring *txr = adapter->tx_rings;
2533 EM_CORE_LOCK_ASSERT(adapter);
2535 INIT_DEBUGOUT("em_stop: begin");
2537 em_disable_intr(adapter);
2538 callout_stop(&adapter->timer);
2540 /* Tell the stack that the interface is no longer active */
2541 ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
2542 ifp->if_drv_flags |= IFF_DRV_OACTIVE;
2544 /* Disarm Hang Detection. */
2545 for (int i = 0; i < adapter->num_queues; i++, txr++) {
2547 txr->busy = EM_TX_IDLE;
2551 /* I219 needs some special flushing to avoid hangs */
2552 if (adapter->hw.mac.type == e1000_pch_spt)
2553 em_flush_desc_rings(adapter);
2555 e1000_reset_hw(&adapter->hw);
2556 E1000_WRITE_REG(&adapter->hw, E1000_WUC, 0);
2558 e1000_led_off(&adapter->hw);
2559 e1000_cleanup_led(&adapter->hw);
2563 /*********************************************************************
2565 * Determine hardware revision.
2567 **********************************************************************/
2569 em_identify_hardware(struct adapter *adapter)
2571 device_t dev = adapter->dev;
2573 /* Make sure our PCI config space has the necessary stuff set */
2574 pci_enable_busmaster(dev);
2575 adapter->hw.bus.pci_cmd_word = pci_read_config(dev, PCIR_COMMAND, 2);
2577 /* Save off the information about this board */
2578 adapter->hw.vendor_id = pci_get_vendor(dev);
2579 adapter->hw.device_id = pci_get_device(dev);
2580 adapter->hw.revision_id = pci_read_config(dev, PCIR_REVID, 1);
2581 adapter->hw.subsystem_vendor_id =
2582 pci_read_config(dev, PCIR_SUBVEND_0, 2);
2583 adapter->hw.subsystem_device_id =
2584 pci_read_config(dev, PCIR_SUBDEV_0, 2);
2586 /* Do Shared Code Init and Setup */
2587 if (e1000_set_mac_type(&adapter->hw)) {
2588 device_printf(dev, "Setup init failure\n");
2594 em_allocate_pci_resources(struct adapter *adapter)
2596 device_t dev = adapter->dev;
2600 adapter->memory = bus_alloc_resource_any(dev, SYS_RES_MEMORY,
2602 if (adapter->memory == NULL) {
2603 device_printf(dev, "Unable to allocate bus resource: memory\n");
2606 adapter->osdep.mem_bus_space_tag =
2607 rman_get_bustag(adapter->memory);
2608 adapter->osdep.mem_bus_space_handle =
2609 rman_get_bushandle(adapter->memory);
2610 adapter->hw.hw_addr = (u8 *)&adapter->osdep.mem_bus_space_handle;
2612 adapter->hw.back = &adapter->osdep;
2617 /*********************************************************************
2619 * Setup the Legacy or MSI Interrupt handler
2621 **********************************************************************/
2623 em_allocate_legacy(struct adapter *adapter)
2625 device_t dev = adapter->dev;
2626 struct tx_ring *txr = adapter->tx_rings;
2629 /* Manually turn off all interrupts */
2630 E1000_WRITE_REG(&adapter->hw, E1000_IMC, 0xffffffff);
2632 if (adapter->msix == 1) /* using MSI */
2634 /* We allocate a single interrupt resource */
2635 adapter->res = bus_alloc_resource_any(dev,
2636 SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE);
2637 if (adapter->res == NULL) {
2638 device_printf(dev, "Unable to allocate bus resource: "
2644 * Allocate a fast interrupt and the associated
2645 * deferred processing contexts.
2647 TASK_INIT(&adapter->que_task, 0, em_handle_que, adapter);
2648 adapter->tq = taskqueue_create_fast("em_taskq", M_NOWAIT,
2649 taskqueue_thread_enqueue, &adapter->tq);
2650 taskqueue_start_threads(&adapter->tq, 1, PI_NET, "%s que",
2651 device_get_nameunit(adapter->dev));
2652 /* Use a TX only tasklet for local timer */
2653 TASK_INIT(&txr->tx_task, 0, em_handle_tx, txr);
2654 txr->tq = taskqueue_create_fast("em_txq", M_NOWAIT,
2655 taskqueue_thread_enqueue, &txr->tq);
2656 taskqueue_start_threads(&txr->tq, 1, PI_NET, "%s txq",
2657 device_get_nameunit(adapter->dev));
2658 TASK_INIT(&adapter->link_task, 0, em_handle_link, adapter);
2659 if ((error = bus_setup_intr(dev, adapter->res, INTR_TYPE_NET,
2660 em_irq_fast, NULL, adapter, &adapter->tag)) != 0) {
2661 device_printf(dev, "Failed to register fast interrupt "
2662 "handler: %d\n", error);
2663 taskqueue_free(adapter->tq);
2671 /*********************************************************************
2673 * Setup the MSIX Interrupt handlers
2674 * This is not really Multiqueue, rather
2675 * its just seperate interrupt vectors
2676 * for TX, RX, and Link.
2678 **********************************************************************/
2680 em_allocate_msix(struct adapter *adapter)
2682 device_t dev = adapter->dev;
2683 struct tx_ring *txr = adapter->tx_rings;
2684 struct rx_ring *rxr = adapter->rx_rings;
2685 int error, rid, vector = 0;
2689 /* Make sure all interrupts are disabled */
2690 E1000_WRITE_REG(&adapter->hw, E1000_IMC, 0xffffffff);
2692 /* First set up ring resources */
2693 for (int i = 0; i < adapter->num_queues; i++, rxr++, vector++) {
2698 rxr->res = bus_alloc_resource_any(dev,
2699 SYS_RES_IRQ, &rid, RF_ACTIVE);
2700 if (rxr->res == NULL) {
2702 "Unable to allocate bus resource: "
2703 "RX MSIX Interrupt %d\n", i);
2706 if ((error = bus_setup_intr(dev, rxr->res,
2707 INTR_TYPE_NET | INTR_MPSAFE, NULL, em_msix_rx,
2708 rxr, &rxr->tag)) != 0) {
2709 device_printf(dev, "Failed to register RX handler");
2712 #if __FreeBSD_version >= 800504
2713 bus_describe_intr(dev, rxr->res, rxr->tag, "rx%d", i);
2717 if (em_last_bind_cpu < 0)
2718 em_last_bind_cpu = CPU_FIRST();
2719 cpu_id = em_last_bind_cpu;
2720 bus_bind_intr(dev, rxr->res, cpu_id);
2722 TASK_INIT(&rxr->rx_task, 0, em_handle_rx, rxr);
2723 rxr->tq = taskqueue_create_fast("em_rxq", M_NOWAIT,
2724 taskqueue_thread_enqueue, &rxr->tq);
2725 taskqueue_start_threads(&rxr->tq, 1, PI_NET, "%s rxq (cpuid %d)",
2726 device_get_nameunit(adapter->dev), cpu_id);
2728 ** Set the bit to enable interrupt
2729 ** in E1000_IMS -- bits 20 and 21
2730 ** are for RX0 and RX1, note this has
2731 ** NOTHING to do with the MSIX vector
2733 rxr->ims = 1 << (20 + i);
2734 adapter->ims |= rxr->ims;
2735 adapter->ivars |= (8 | rxr->msix) << (i * 4);
2737 em_last_bind_cpu = CPU_NEXT(em_last_bind_cpu);
2740 for (int i = 0; i < adapter->num_queues; i++, txr++, vector++) {
2743 txr->res = bus_alloc_resource_any(dev,
2744 SYS_RES_IRQ, &rid, RF_ACTIVE);
2745 if (txr->res == NULL) {
2747 "Unable to allocate bus resource: "
2748 "TX MSIX Interrupt %d\n", i);
2751 if ((error = bus_setup_intr(dev, txr->res,
2752 INTR_TYPE_NET | INTR_MPSAFE, NULL, em_msix_tx,
2753 txr, &txr->tag)) != 0) {
2754 device_printf(dev, "Failed to register TX handler");
2757 #if __FreeBSD_version >= 800504
2758 bus_describe_intr(dev, txr->res, txr->tag, "tx%d", i);
2762 if (em_last_bind_cpu < 0)
2763 em_last_bind_cpu = CPU_FIRST();
2764 cpu_id = em_last_bind_cpu;
2765 bus_bind_intr(dev, txr->res, cpu_id);
2767 TASK_INIT(&txr->tx_task, 0, em_handle_tx, txr);
2768 txr->tq = taskqueue_create_fast("em_txq", M_NOWAIT,
2769 taskqueue_thread_enqueue, &txr->tq);
2770 taskqueue_start_threads(&txr->tq, 1, PI_NET, "%s txq (cpuid %d)",
2771 device_get_nameunit(adapter->dev), cpu_id);
2773 ** Set the bit to enable interrupt
2774 ** in E1000_IMS -- bits 22 and 23
2775 ** are for TX0 and TX1, note this has
2776 ** NOTHING to do with the MSIX vector
2778 txr->ims = 1 << (22 + i);
2779 adapter->ims |= txr->ims;
2780 adapter->ivars |= (8 | txr->msix) << (8 + (i * 4));
2782 em_last_bind_cpu = CPU_NEXT(em_last_bind_cpu);
2785 /* Link interrupt */
2787 adapter->res = bus_alloc_resource_any(dev,
2788 SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE);
2789 if (!adapter->res) {
2790 device_printf(dev,"Unable to allocate "
2791 "bus resource: Link interrupt [%d]\n", rid);
2794 /* Set the link handler function */
2795 error = bus_setup_intr(dev, adapter->res,
2796 INTR_TYPE_NET | INTR_MPSAFE, NULL,
2797 em_msix_link, adapter, &adapter->tag);
2799 adapter->res = NULL;
2800 device_printf(dev, "Failed to register LINK handler");
2803 #if __FreeBSD_version >= 800504
2804 bus_describe_intr(dev, adapter->res, adapter->tag, "link");
2806 adapter->linkvec = vector;
2807 adapter->ivars |= (8 | vector) << 16;
2808 adapter->ivars |= 0x80000000;
2815 em_free_pci_resources(struct adapter *adapter)
2817 device_t dev = adapter->dev;
2818 struct tx_ring *txr;
2819 struct rx_ring *rxr;
2824 ** Release all the queue interrupt resources:
2826 for (int i = 0; i < adapter->num_queues; i++) {
2827 txr = &adapter->tx_rings[i];
2828 /* an early abort? */
2832 if (txr->tag != NULL) {
2833 bus_teardown_intr(dev, txr->res, txr->tag);
2836 if (txr->res != NULL)
2837 bus_release_resource(dev, SYS_RES_IRQ,
2840 rxr = &adapter->rx_rings[i];
2841 /* an early abort? */
2845 if (rxr->tag != NULL) {
2846 bus_teardown_intr(dev, rxr->res, rxr->tag);
2849 if (rxr->res != NULL)
2850 bus_release_resource(dev, SYS_RES_IRQ,
2854 if (adapter->linkvec) /* we are doing MSIX */
2855 rid = adapter->linkvec + 1;
2857 (adapter->msix != 0) ? (rid = 1):(rid = 0);
2859 if (adapter->tag != NULL) {
2860 bus_teardown_intr(dev, adapter->res, adapter->tag);
2861 adapter->tag = NULL;
2864 if (adapter->res != NULL)
2865 bus_release_resource(dev, SYS_RES_IRQ, rid, adapter->res);
2869 pci_release_msi(dev);
2871 if (adapter->msix_mem != NULL)
2872 bus_release_resource(dev, SYS_RES_MEMORY,
2873 adapter->memrid, adapter->msix_mem);
2875 if (adapter->memory != NULL)
2876 bus_release_resource(dev, SYS_RES_MEMORY,
2877 PCIR_BAR(0), adapter->memory);
2879 if (adapter->flash != NULL)
2880 bus_release_resource(dev, SYS_RES_MEMORY,
2881 EM_FLASH, adapter->flash);
2885 * Setup MSI or MSI/X
2888 em_setup_msix(struct adapter *adapter)
2890 device_t dev = adapter->dev;
2893 /* Nearly always going to use one queue */
2894 adapter->num_queues = 1;
2897 ** Try using MSI-X for Hartwell adapters
2899 if ((adapter->hw.mac.type == e1000_82574) &&
2900 (em_enable_msix == TRUE)) {
2901 #ifdef EM_MULTIQUEUE
2902 adapter->num_queues = (em_num_queues == 1) ? 1 : 2;
2903 if (adapter->num_queues > 1)
2904 em_enable_vectors_82574(adapter);
2906 /* Map the MSIX BAR */
2907 adapter->memrid = PCIR_BAR(EM_MSIX_BAR);
2908 adapter->msix_mem = bus_alloc_resource_any(dev,
2909 SYS_RES_MEMORY, &adapter->memrid, RF_ACTIVE);
2910 if (adapter->msix_mem == NULL) {
2911 /* May not be enabled */
2912 device_printf(adapter->dev,
2913 "Unable to map MSIX table \n");
2916 val = pci_msix_count(dev);
2918 #ifdef EM_MULTIQUEUE
2919 /* We need 5 vectors in the multiqueue case */
2920 if (adapter->num_queues > 1 ) {
2924 adapter->num_queues = 1;
2925 device_printf(adapter->dev,
2926 "Insufficient MSIX vectors for >1 queue, "
2927 "using single queue...\n");
2936 device_printf(adapter->dev,
2937 "Insufficient MSIX vectors, using MSI\n");
2940 #ifdef EM_MULTIQUEUE
2944 if ((pci_alloc_msix(dev, &val) == 0)) {
2945 device_printf(adapter->dev,
2946 "Using MSIX interrupts "
2947 "with %d vectors\n", val);
2952 ** If MSIX alloc failed or provided us with
2953 ** less than needed, free and fall through to MSI
2955 pci_release_msi(dev);
2958 if (adapter->msix_mem != NULL) {
2959 bus_release_resource(dev, SYS_RES_MEMORY,
2960 adapter->memrid, adapter->msix_mem);
2961 adapter->msix_mem = NULL;
2964 if (pci_alloc_msi(dev, &val) == 0) {
2965 device_printf(adapter->dev, "Using an MSI interrupt\n");
2968 /* Should only happen due to manual configuration */
2969 device_printf(adapter->dev,"No MSI/MSIX using a Legacy IRQ\n");
2975 ** The 3 following flush routines are used as a workaround in the
2976 ** I219 client parts and only for them.
2978 ** em_flush_tx_ring - remove all descriptors from the tx_ring
2980 ** We want to clear all pending descriptors from the TX ring.
2981 ** zeroing happens when the HW reads the regs. We assign the ring itself as
2982 ** the data of the next descriptor. We don't care about the data we are about
2986 em_flush_tx_ring(struct adapter *adapter)
2988 struct e1000_hw *hw = &adapter->hw;
2989 struct tx_ring *txr = adapter->tx_rings;
2990 struct e1000_tx_desc *txd;
2991 u32 tctl, txd_lower = E1000_TXD_CMD_IFCS;
2994 tctl = E1000_READ_REG(hw, E1000_TCTL);
2995 E1000_WRITE_REG(hw, E1000_TCTL, tctl | E1000_TCTL_EN);
2997 txd = &txr->tx_base[txr->next_avail_desc++];
2998 if (txr->next_avail_desc == adapter->num_tx_desc)
2999 txr->next_avail_desc = 0;
3001 /* Just use the ring as a dummy buffer addr */
3002 txd->buffer_addr = txr->txdma.dma_paddr;
3003 txd->lower.data = htole32(txd_lower | size);
3004 txd->upper.data = 0;
3006 /* flush descriptors to memory before notifying the HW */
3009 E1000_WRITE_REG(hw, E1000_TDT(0), txr->next_avail_desc);
3015 ** em_flush_rx_ring - remove all descriptors from the rx_ring
3017 ** Mark all descriptors in the RX ring as consumed and disable the rx ring
3020 em_flush_rx_ring(struct adapter *adapter)
3022 struct e1000_hw *hw = &adapter->hw;
3025 rctl = E1000_READ_REG(hw, E1000_RCTL);
3026 E1000_WRITE_REG(hw, E1000_RCTL, rctl & ~E1000_RCTL_EN);
3027 E1000_WRITE_FLUSH(hw);
3030 rxdctl = E1000_READ_REG(hw, E1000_RXDCTL(0));
3031 /* zero the lower 14 bits (prefetch and host thresholds) */
3032 rxdctl &= 0xffffc000;
3034 * update thresholds: prefetch threshold to 31, host threshold to 1
3035 * and make sure the granularity is "descriptors" and not "cache lines"
3037 rxdctl |= (0x1F | (1 << 8) | E1000_RXDCTL_THRESH_UNIT_DESC);
3038 E1000_WRITE_REG(hw, E1000_RXDCTL(0), rxdctl);
3040 /* momentarily enable the RX ring for the changes to take effect */
3041 E1000_WRITE_REG(hw, E1000_RCTL, rctl | E1000_RCTL_EN);
3042 E1000_WRITE_FLUSH(hw);
3044 E1000_WRITE_REG(hw, E1000_RCTL, rctl & ~E1000_RCTL_EN);
3048 ** em_flush_desc_rings - remove all descriptors from the descriptor rings
3050 ** In i219, the descriptor rings must be emptied before resetting the HW
3051 ** or before changing the device state to D3 during runtime (runtime PM).
3053 ** Failure to do this will cause the HW to enter a unit hang state which can
3054 ** only be released by PCI reset on the device
3058 em_flush_desc_rings(struct adapter *adapter)
3060 struct e1000_hw *hw = &adapter->hw;
3061 device_t dev = adapter->dev;
3063 u32 fext_nvm11, tdlen;
3065 /* First, disable MULR fix in FEXTNVM11 */
3066 fext_nvm11 = E1000_READ_REG(hw, E1000_FEXTNVM11);
3067 fext_nvm11 |= E1000_FEXTNVM11_DISABLE_MULR_FIX;
3068 E1000_WRITE_REG(hw, E1000_FEXTNVM11, fext_nvm11);
3070 /* do nothing if we're not in faulty state, or if the queue is empty */
3071 tdlen = E1000_READ_REG(hw, E1000_TDLEN(0));
3072 hang_state = pci_read_config(dev, PCICFG_DESC_RING_STATUS, 2);
3073 if (!(hang_state & FLUSH_DESC_REQUIRED) || !tdlen)
3075 em_flush_tx_ring(adapter);
3077 /* recheck, maybe the fault is caused by the rx ring */
3078 hang_state = pci_read_config(dev, PCICFG_DESC_RING_STATUS, 2);
3079 if (hang_state & FLUSH_DESC_REQUIRED)
3080 em_flush_rx_ring(adapter);
3084 /*********************************************************************
3086 * Initialize the hardware to a configuration
3087 * as specified by the adapter structure.
3089 **********************************************************************/
3091 em_reset(struct adapter *adapter)
3093 device_t dev = adapter->dev;
3094 struct ifnet *ifp = adapter->ifp;
3095 struct e1000_hw *hw = &adapter->hw;
3099 INIT_DEBUGOUT("em_reset: begin");
3101 /* Set up smart power down as default off on newer adapters. */
3102 if (!em_smart_pwr_down && (hw->mac.type == e1000_82571 ||
3103 hw->mac.type == e1000_82572)) {
3106 /* Speed up time to link by disabling smart power down. */
3107 e1000_read_phy_reg(hw, IGP02E1000_PHY_POWER_MGMT, &phy_tmp);
3108 phy_tmp &= ~IGP02E1000_PM_SPD;
3109 e1000_write_phy_reg(hw, IGP02E1000_PHY_POWER_MGMT, phy_tmp);
3113 * Packet Buffer Allocation (PBA)
3114 * Writing PBA sets the receive portion of the buffer
3115 * the remainder is used for the transmit buffer.
3117 switch (hw->mac.type) {
3118 /* Total Packet Buffer on these is 48K */
3121 case e1000_80003es2lan:
3122 pba = E1000_PBA_32K; /* 32K for Rx, 16K for Tx */
3124 case e1000_82573: /* 82573: Total Packet Buffer is 32K */
3125 pba = E1000_PBA_12K; /* 12K for Rx, 20K for Tx */
3129 pba = E1000_PBA_20K; /* 20K for Rx, 20K for Tx */
3135 case e1000_ich10lan:
3136 /* Boost Receive side for jumbo frames */
3137 if (adapter->hw.mac.max_frame_size > 4096)
3138 pba = E1000_PBA_14K;
3140 pba = E1000_PBA_10K;
3147 pba = E1000_PBA_26K;
3150 if (adapter->hw.mac.max_frame_size > 8192)
3151 pba = E1000_PBA_40K; /* 40K for Rx, 24K for Tx */
3153 pba = E1000_PBA_48K; /* 48K for Rx, 16K for Tx */
3155 E1000_WRITE_REG(&adapter->hw, E1000_PBA, pba);
3158 * These parameters control the automatic generation (Tx) and
3159 * response (Rx) to Ethernet PAUSE frames.
3160 * - High water mark should allow for at least two frames to be
3161 * received after sending an XOFF.
3162 * - Low water mark works best when it is very near the high water mark.
3163 * This allows the receiver to restart by sending XON when it has
3164 * drained a bit. Here we use an arbitary value of 1500 which will
3165 * restart after one full frame is pulled from the buffer. There
3166 * could be several smaller frames in the buffer and if so they will
3167 * not trigger the XON until their total number reduces the buffer
3169 * - The pause time is fairly large at 1000 x 512ns = 512 usec.
3171 rx_buffer_size = ((E1000_READ_REG(hw, E1000_PBA) & 0xffff) << 10 );
3172 hw->fc.high_water = rx_buffer_size -
3173 roundup2(adapter->hw.mac.max_frame_size, 1024);
3174 hw->fc.low_water = hw->fc.high_water - 1500;
3176 if (adapter->fc) /* locally set flow control value? */
3177 hw->fc.requested_mode = adapter->fc;
3179 hw->fc.requested_mode = e1000_fc_full;
3181 if (hw->mac.type == e1000_80003es2lan)
3182 hw->fc.pause_time = 0xFFFF;
3184 hw->fc.pause_time = EM_FC_PAUSE_TIME;
3186 hw->fc.send_xon = TRUE;
3188 /* Device specific overrides/settings */
3189 switch (hw->mac.type) {
3191 /* Workaround: no TX flow ctrl for PCH */
3192 hw->fc.requested_mode = e1000_fc_rx_pause;
3193 hw->fc.pause_time = 0xFFFF; /* override */
3194 if (ifp->if_mtu > ETHERMTU) {
3195 hw->fc.high_water = 0x3500;
3196 hw->fc.low_water = 0x1500;
3198 hw->fc.high_water = 0x5000;
3199 hw->fc.low_water = 0x3000;
3201 hw->fc.refresh_time = 0x1000;
3207 hw->fc.high_water = 0x5C20;
3208 hw->fc.low_water = 0x5048;
3209 hw->fc.pause_time = 0x0650;
3210 hw->fc.refresh_time = 0x0400;
3211 /* Jumbos need adjusted PBA */
3212 if (ifp->if_mtu > ETHERMTU)
3213 E1000_WRITE_REG(hw, E1000_PBA, 12);
3215 E1000_WRITE_REG(hw, E1000_PBA, 26);
3218 case e1000_ich10lan:
3219 if (ifp->if_mtu > ETHERMTU) {
3220 hw->fc.high_water = 0x2800;
3221 hw->fc.low_water = hw->fc.high_water - 8;
3224 /* else fall thru */
3226 if (hw->mac.type == e1000_80003es2lan)
3227 hw->fc.pause_time = 0xFFFF;
3231 /* I219 needs some special flushing to avoid hangs */
3232 if (hw->mac.type == e1000_pch_spt)
3233 em_flush_desc_rings(adapter);
3235 /* Issue a global reset */
3237 E1000_WRITE_REG(hw, E1000_WUC, 0);
3238 em_disable_aspm(adapter);
3240 if (e1000_init_hw(hw) < 0) {
3241 device_printf(dev, "Hardware Initialization Failed\n");
3245 E1000_WRITE_REG(hw, E1000_VET, ETHERTYPE_VLAN);
3246 e1000_get_phy_info(hw);
3247 e1000_check_for_link(hw);
3251 /*********************************************************************
3253 * Setup networking device structure and register an interface.
3255 **********************************************************************/
3257 em_setup_interface(device_t dev, struct adapter *adapter)
3261 INIT_DEBUGOUT("em_setup_interface: begin");
3263 ifp = adapter->ifp = if_alloc(IFT_ETHER);
3265 device_printf(dev, "can not allocate ifnet structure\n");
3268 if_initname(ifp, device_get_name(dev), device_get_unit(dev));
3269 ifp->if_init = em_init;
3270 ifp->if_softc = adapter;
3271 ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
3272 ifp->if_ioctl = em_ioctl;
3274 /* TSO parameters */
3275 ifp->if_hw_tsomax = IP_MAXPACKET;
3276 /* Take m_pullup(9)'s in em_xmit() w/ TSO into acount. */
3277 ifp->if_hw_tsomaxsegcount = EM_MAX_SCATTER - 5;
3278 ifp->if_hw_tsomaxsegsize = EM_TSO_SEG_SIZE;
3280 #ifdef EM_MULTIQUEUE
3281 /* Multiqueue stack interface */
3282 ifp->if_transmit = em_mq_start;
3283 ifp->if_qflush = em_qflush;
3285 ifp->if_start = em_start;
3286 IFQ_SET_MAXLEN(&ifp->if_snd, adapter->num_tx_desc - 1);
3287 ifp->if_snd.ifq_drv_maxlen = adapter->num_tx_desc - 1;
3288 IFQ_SET_READY(&ifp->if_snd);
3291 ether_ifattach(ifp, adapter->hw.mac.addr);
3293 ifp->if_capabilities = IFCAP_HWCSUM | IFCAP_VLAN_HWCSUM;
3294 ifp->if_capenable = ifp->if_capabilities;
3297 * Tell the upper layer(s) we
3298 * support full VLAN capability
3300 ifp->if_data.ifi_hdrlen = sizeof(struct ether_vlan_header);
3301 ifp->if_capabilities |= IFCAP_VLAN_HWTAGGING
3304 ifp->if_capenable |= IFCAP_VLAN_HWTAGGING
3308 * We don't enable IFCAP_{TSO4,VLAN_HWTSO} by default because:
3309 * - Although the silicon bug of TSO only working at gigabit speed is
3310 * worked around in em_update_link_status() by selectively setting
3311 * CSUM_IP_TSO, we cannot atomically flush already queued TSO-using
3312 * descriptors. Thus, such descriptors may still cause the MAC to
3313 * hang and, consequently, TSO is only safe to be used in setups
3314 * where the link isn't expected to switch from gigabit to lower
3316 * - Similarly, there's currently no way to trigger a reconfiguration
3317 * of vlan(4) when the state of IFCAP_VLAN_HWTSO support changes at
3318 * runtime. Therefore, IFCAP_VLAN_HWTSO also only is safe to use
3319 * when link speed changes are not to be expected.
3320 * - Despite all the workarounds for TSO-related silicon bugs, at
3321 * least 82579 still may hang at gigabit speed with IFCAP_TSO4.
3323 ifp->if_capabilities |= IFCAP_TSO4 | IFCAP_VLAN_HWTSO;
3326 ** Don't turn this on by default, if vlans are
3327 ** created on another pseudo device (eg. lagg)
3328 ** then vlan events are not passed thru, breaking
3329 ** operation, but with HW FILTER off it works. If
3330 ** using vlans directly on the em driver you can
3331 ** enable this and get full hardware tag filtering.
3333 ifp->if_capabilities |= IFCAP_VLAN_HWFILTER;
3335 #ifdef DEVICE_POLLING
3336 ifp->if_capabilities |= IFCAP_POLLING;
3339 /* Enable only WOL MAGIC by default */
3341 ifp->if_capabilities |= IFCAP_WOL;
3342 ifp->if_capenable |= IFCAP_WOL_MAGIC;
3346 * Specify the media types supported by this adapter and register
3347 * callbacks to update media and link information
3349 ifmedia_init(&adapter->media, IFM_IMASK,
3350 em_media_change, em_media_status);
3351 if ((adapter->hw.phy.media_type == e1000_media_type_fiber) ||
3352 (adapter->hw.phy.media_type == e1000_media_type_internal_serdes)) {
3353 u_char fiber_type = IFM_1000_SX; /* default type */
3355 ifmedia_add(&adapter->media, IFM_ETHER | fiber_type | IFM_FDX,
3357 ifmedia_add(&adapter->media, IFM_ETHER | fiber_type, 0, NULL);
3359 ifmedia_add(&adapter->media, IFM_ETHER | IFM_10_T, 0, NULL);
3360 ifmedia_add(&adapter->media, IFM_ETHER | IFM_10_T | IFM_FDX,
3362 ifmedia_add(&adapter->media, IFM_ETHER | IFM_100_TX,
3364 ifmedia_add(&adapter->media, IFM_ETHER | IFM_100_TX | IFM_FDX,
3366 if (adapter->hw.phy.type != e1000_phy_ife) {
3367 ifmedia_add(&adapter->media,
3368 IFM_ETHER | IFM_1000_T | IFM_FDX, 0, NULL);
3369 ifmedia_add(&adapter->media,
3370 IFM_ETHER | IFM_1000_T, 0, NULL);
3373 ifmedia_add(&adapter->media, IFM_ETHER | IFM_AUTO, 0, NULL);
3374 ifmedia_set(&adapter->media, IFM_ETHER | IFM_AUTO);
3380 * Manage DMA'able memory.
3383 em_dmamap_cb(void *arg, bus_dma_segment_t *segs, int nseg, int error)
3387 *(bus_addr_t *) arg = segs[0].ds_addr;
3391 em_dma_malloc(struct adapter *adapter, bus_size_t size,
3392 struct em_dma_alloc *dma, int mapflags)
3396 error = bus_dma_tag_create(bus_get_dma_tag(adapter->dev), /* parent */
3397 EM_DBA_ALIGN, 0, /* alignment, bounds */
3398 BUS_SPACE_MAXADDR, /* lowaddr */
3399 BUS_SPACE_MAXADDR, /* highaddr */
3400 NULL, NULL, /* filter, filterarg */
3403 size, /* maxsegsize */
3405 NULL, /* lockfunc */
3409 device_printf(adapter->dev,
3410 "%s: bus_dma_tag_create failed: %d\n",
3415 error = bus_dmamem_alloc(dma->dma_tag, (void**) &dma->dma_vaddr,
3416 BUS_DMA_NOWAIT | BUS_DMA_COHERENT, &dma->dma_map);
3418 device_printf(adapter->dev,
3419 "%s: bus_dmamem_alloc(%ju) failed: %d\n",
3420 __func__, (uintmax_t)size, error);
3425 error = bus_dmamap_load(dma->dma_tag, dma->dma_map, dma->dma_vaddr,
3426 size, em_dmamap_cb, &dma->dma_paddr, mapflags | BUS_DMA_NOWAIT);
3427 if (error || dma->dma_paddr == 0) {
3428 device_printf(adapter->dev,
3429 "%s: bus_dmamap_load failed: %d\n",
3437 bus_dmamap_unload(dma->dma_tag, dma->dma_map);
3439 bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
3440 bus_dma_tag_destroy(dma->dma_tag);
3442 dma->dma_tag = NULL;
3448 em_dma_free(struct adapter *adapter, struct em_dma_alloc *dma)
3450 if (dma->dma_tag == NULL)
3452 if (dma->dma_paddr != 0) {
3453 bus_dmamap_sync(dma->dma_tag, dma->dma_map,
3454 BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
3455 bus_dmamap_unload(dma->dma_tag, dma->dma_map);
3458 if (dma->dma_vaddr != NULL) {
3459 bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
3460 dma->dma_vaddr = NULL;
3462 bus_dma_tag_destroy(dma->dma_tag);
3463 dma->dma_tag = NULL;
3467 /*********************************************************************
3469 * Allocate memory for the transmit and receive rings, and then
3470 * the descriptors associated with each, called only once at attach.
3472 **********************************************************************/
3474 em_allocate_queues(struct adapter *adapter)
3476 device_t dev = adapter->dev;
3477 struct tx_ring *txr = NULL;
3478 struct rx_ring *rxr = NULL;
3479 int rsize, tsize, error = E1000_SUCCESS;
3480 int txconf = 0, rxconf = 0;
3483 /* Allocate the TX ring struct memory */
3484 if (!(adapter->tx_rings =
3485 (struct tx_ring *) malloc(sizeof(struct tx_ring) *
3486 adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
3487 device_printf(dev, "Unable to allocate TX ring memory\n");
3492 /* Now allocate the RX */
3493 if (!(adapter->rx_rings =
3494 (struct rx_ring *) malloc(sizeof(struct rx_ring) *
3495 adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
3496 device_printf(dev, "Unable to allocate RX ring memory\n");
3501 tsize = roundup2(adapter->num_tx_desc *
3502 sizeof(struct e1000_tx_desc), EM_DBA_ALIGN);
3504 * Now set up the TX queues, txconf is needed to handle the
3505 * possibility that things fail midcourse and we need to
3506 * undo memory gracefully
3508 for (int i = 0; i < adapter->num_queues; i++, txconf++) {
3509 /* Set up some basics */
3510 txr = &adapter->tx_rings[i];
3511 txr->adapter = adapter;
3514 /* Initialize the TX lock */
3515 snprintf(txr->mtx_name, sizeof(txr->mtx_name), "%s:tx(%d)",
3516 device_get_nameunit(dev), txr->me);
3517 mtx_init(&txr->tx_mtx, txr->mtx_name, NULL, MTX_DEF);
3519 if (em_dma_malloc(adapter, tsize,
3520 &txr->txdma, BUS_DMA_NOWAIT)) {
3522 "Unable to allocate TX Descriptor memory\n");
3526 txr->tx_base = (struct e1000_tx_desc *)txr->txdma.dma_vaddr;
3527 bzero((void *)txr->tx_base, tsize);
3529 if (em_allocate_transmit_buffers(txr)) {
3531 "Critical Failure setting up transmit buffers\n");
3535 #if __FreeBSD_version >= 800000
3536 /* Allocate a buf ring */
3537 txr->br = buf_ring_alloc(4096, M_DEVBUF,
3538 M_WAITOK, &txr->tx_mtx);
3543 * Next the RX queues...
3545 rsize = roundup2(adapter->num_rx_desc *
3546 sizeof(union e1000_rx_desc_extended), EM_DBA_ALIGN);
3547 for (int i = 0; i < adapter->num_queues; i++, rxconf++) {
3548 rxr = &adapter->rx_rings[i];
3549 rxr->adapter = adapter;
3552 /* Initialize the RX lock */
3553 snprintf(rxr->mtx_name, sizeof(rxr->mtx_name), "%s:rx(%d)",
3554 device_get_nameunit(dev), txr->me);
3555 mtx_init(&rxr->rx_mtx, rxr->mtx_name, NULL, MTX_DEF);
3557 if (em_dma_malloc(adapter, rsize,
3558 &rxr->rxdma, BUS_DMA_NOWAIT)) {
3560 "Unable to allocate RxDescriptor memory\n");
3564 rxr->rx_base = (union e1000_rx_desc_extended *)rxr->rxdma.dma_vaddr;
3565 bzero((void *)rxr->rx_base, rsize);
3567 /* Allocate receive buffers for the ring*/
3568 if (em_allocate_receive_buffers(rxr)) {
3570 "Critical Failure setting up receive buffers\n");
3579 for (rxr = adapter->rx_rings; rxconf > 0; rxr++, rxconf--)
3580 em_dma_free(adapter, &rxr->rxdma);
3582 for (txr = adapter->tx_rings; txconf > 0; txr++, txconf--)
3583 em_dma_free(adapter, &txr->txdma);
3584 free(adapter->rx_rings, M_DEVBUF);
3586 #if __FreeBSD_version >= 800000
3587 buf_ring_free(txr->br, M_DEVBUF);
3589 free(adapter->tx_rings, M_DEVBUF);
3595 /*********************************************************************
3597 * Allocate memory for tx_buffer structures. The tx_buffer stores all
3598 * the information needed to transmit a packet on the wire. This is
3599 * called only once at attach, setup is done every reset.
3601 **********************************************************************/
3603 em_allocate_transmit_buffers(struct tx_ring *txr)
3605 struct adapter *adapter = txr->adapter;
3606 device_t dev = adapter->dev;
3607 struct em_txbuffer *txbuf;
3611 * Setup DMA descriptor areas.
3613 if ((error = bus_dma_tag_create(bus_get_dma_tag(dev),
3614 1, 0, /* alignment, bounds */
3615 BUS_SPACE_MAXADDR, /* lowaddr */
3616 BUS_SPACE_MAXADDR, /* highaddr */
3617 NULL, NULL, /* filter, filterarg */
3618 EM_TSO_SIZE, /* maxsize */
3619 EM_MAX_SCATTER, /* nsegments */
3620 PAGE_SIZE, /* maxsegsize */
3622 NULL, /* lockfunc */
3623 NULL, /* lockfuncarg */
3625 device_printf(dev,"Unable to allocate TX DMA tag\n");
3629 if (!(txr->tx_buffers =
3630 (struct em_txbuffer *) malloc(sizeof(struct em_txbuffer) *
3631 adapter->num_tx_desc, M_DEVBUF, M_NOWAIT | M_ZERO))) {
3632 device_printf(dev, "Unable to allocate tx_buffer memory\n");
3637 /* Create the descriptor buffer dma maps */
3638 txbuf = txr->tx_buffers;
3639 for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) {
3640 error = bus_dmamap_create(txr->txtag, 0, &txbuf->map);
3642 device_printf(dev, "Unable to create TX DMA map\n");
3649 /* We free all, it handles case where we are in the middle */
3650 em_free_transmit_structures(adapter);
3654 /*********************************************************************
3656 * Initialize a transmit ring.
3658 **********************************************************************/
3660 em_setup_transmit_ring(struct tx_ring *txr)
3662 struct adapter *adapter = txr->adapter;
3663 struct em_txbuffer *txbuf;
3666 struct netmap_adapter *na = NA(adapter->ifp);
3667 struct netmap_slot *slot;
3668 #endif /* DEV_NETMAP */
3670 /* Clear the old descriptor contents */
3673 slot = netmap_reset(na, NR_TX, txr->me, 0);
3674 #endif /* DEV_NETMAP */
3676 bzero((void *)txr->tx_base,
3677 (sizeof(struct e1000_tx_desc)) * adapter->num_tx_desc);
3679 txr->next_avail_desc = 0;
3680 txr->next_to_clean = 0;
3682 /* Free any existing tx buffers. */
3683 txbuf = txr->tx_buffers;
3684 for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) {
3685 if (txbuf->m_head != NULL) {
3686 bus_dmamap_sync(txr->txtag, txbuf->map,
3687 BUS_DMASYNC_POSTWRITE);
3688 bus_dmamap_unload(txr->txtag, txbuf->map);
3689 m_freem(txbuf->m_head);
3690 txbuf->m_head = NULL;
3694 int si = netmap_idx_n2k(&na->tx_rings[txr->me], i);
3698 addr = PNMB(na, slot + si, &paddr);
3699 txr->tx_base[i].buffer_addr = htole64(paddr);
3700 /* reload the map for netmap mode */
3701 netmap_load_map(na, txr->txtag, txbuf->map, addr);
3703 #endif /* DEV_NETMAP */
3705 /* clear the watch index */
3706 txbuf->next_eop = -1;
3709 /* Set number of descriptors available */
3710 txr->tx_avail = adapter->num_tx_desc;
3711 txr->busy = EM_TX_IDLE;
3713 /* Clear checksum offload context. */
3714 txr->last_hw_offload = 0;
3715 txr->last_hw_ipcss = 0;
3716 txr->last_hw_ipcso = 0;
3717 txr->last_hw_tucss = 0;
3718 txr->last_hw_tucso = 0;
3720 bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3721 BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
3725 /*********************************************************************
3727 * Initialize all transmit rings.
3729 **********************************************************************/
3731 em_setup_transmit_structures(struct adapter *adapter)
3733 struct tx_ring *txr = adapter->tx_rings;
3735 for (int i = 0; i < adapter->num_queues; i++, txr++)
3736 em_setup_transmit_ring(txr);
3741 /*********************************************************************
3743 * Enable transmit unit.
3745 **********************************************************************/
3747 em_initialize_transmit_unit(struct adapter *adapter)
3749 struct tx_ring *txr = adapter->tx_rings;
3750 struct e1000_hw *hw = &adapter->hw;
3751 u32 tctl, txdctl = 0, tarc, tipg = 0;
3753 INIT_DEBUGOUT("em_initialize_transmit_unit: begin");
3755 for (int i = 0; i < adapter->num_queues; i++, txr++) {
3756 u64 bus_addr = txr->txdma.dma_paddr;
3757 /* Base and Len of TX Ring */
3758 E1000_WRITE_REG(hw, E1000_TDLEN(i),
3759 adapter->num_tx_desc * sizeof(struct e1000_tx_desc));
3760 E1000_WRITE_REG(hw, E1000_TDBAH(i),
3761 (u32)(bus_addr >> 32));
3762 E1000_WRITE_REG(hw, E1000_TDBAL(i),
3764 /* Init the HEAD/TAIL indices */
3765 E1000_WRITE_REG(hw, E1000_TDT(i), 0);
3766 E1000_WRITE_REG(hw, E1000_TDH(i), 0);
3768 HW_DEBUGOUT2("Base = %x, Length = %x\n",
3769 E1000_READ_REG(&adapter->hw, E1000_TDBAL(i)),
3770 E1000_READ_REG(&adapter->hw, E1000_TDLEN(i)));
3772 txr->busy = EM_TX_IDLE;
3773 txdctl = 0; /* clear txdctl */
3774 txdctl |= 0x1f; /* PTHRESH */
3775 txdctl |= 1 << 8; /* HTHRESH */
3776 txdctl |= 1 << 16;/* WTHRESH */
3777 txdctl |= 1 << 22; /* Reserved bit 22 must always be 1 */
3778 txdctl |= E1000_TXDCTL_GRAN;
3779 txdctl |= 1 << 25; /* LWTHRESH */
3781 E1000_WRITE_REG(hw, E1000_TXDCTL(i), txdctl);
3784 /* Set the default values for the Tx Inter Packet Gap timer */
3785 switch (adapter->hw.mac.type) {
3786 case e1000_80003es2lan:
3787 tipg = DEFAULT_82543_TIPG_IPGR1;
3788 tipg |= DEFAULT_80003ES2LAN_TIPG_IPGR2 <<
3789 E1000_TIPG_IPGR2_SHIFT;
3792 if ((adapter->hw.phy.media_type == e1000_media_type_fiber) ||
3793 (adapter->hw.phy.media_type ==
3794 e1000_media_type_internal_serdes))
3795 tipg = DEFAULT_82543_TIPG_IPGT_FIBER;
3797 tipg = DEFAULT_82543_TIPG_IPGT_COPPER;
3798 tipg |= DEFAULT_82543_TIPG_IPGR1 << E1000_TIPG_IPGR1_SHIFT;
3799 tipg |= DEFAULT_82543_TIPG_IPGR2 << E1000_TIPG_IPGR2_SHIFT;
3802 E1000_WRITE_REG(&adapter->hw, E1000_TIPG, tipg);
3803 E1000_WRITE_REG(&adapter->hw, E1000_TIDV, adapter->tx_int_delay.value);
3805 if(adapter->hw.mac.type >= e1000_82540)
3806 E1000_WRITE_REG(&adapter->hw, E1000_TADV,
3807 adapter->tx_abs_int_delay.value);
3809 if ((adapter->hw.mac.type == e1000_82571) ||
3810 (adapter->hw.mac.type == e1000_82572)) {
3811 tarc = E1000_READ_REG(&adapter->hw, E1000_TARC(0));
3812 tarc |= TARC_SPEED_MODE_BIT;
3813 E1000_WRITE_REG(&adapter->hw, E1000_TARC(0), tarc);
3814 } else if (adapter->hw.mac.type == e1000_80003es2lan) {
3815 /* errata: program both queues to unweighted RR */
3816 tarc = E1000_READ_REG(&adapter->hw, E1000_TARC(0));
3818 E1000_WRITE_REG(&adapter->hw, E1000_TARC(0), tarc);
3819 tarc = E1000_READ_REG(&adapter->hw, E1000_TARC(1));
3821 E1000_WRITE_REG(&adapter->hw, E1000_TARC(1), tarc);
3822 } else if (adapter->hw.mac.type == e1000_82574) {
3823 tarc = E1000_READ_REG(&adapter->hw, E1000_TARC(0));
3824 tarc |= TARC_ERRATA_BIT;
3825 if ( adapter->num_queues > 1) {
3826 tarc |= (TARC_COMPENSATION_MODE | TARC_MQ_FIX);
3827 E1000_WRITE_REG(&adapter->hw, E1000_TARC(0), tarc);
3828 E1000_WRITE_REG(&adapter->hw, E1000_TARC(1), tarc);
3830 E1000_WRITE_REG(&adapter->hw, E1000_TARC(0), tarc);
3833 adapter->txd_cmd = E1000_TXD_CMD_IFCS;
3834 if (adapter->tx_int_delay.value > 0)
3835 adapter->txd_cmd |= E1000_TXD_CMD_IDE;
3837 /* Program the Transmit Control Register */
3838 tctl = E1000_READ_REG(&adapter->hw, E1000_TCTL);
3839 tctl &= ~E1000_TCTL_CT;
3840 tctl |= (E1000_TCTL_PSP | E1000_TCTL_RTLC | E1000_TCTL_EN |
3841 (E1000_COLLISION_THRESHOLD << E1000_CT_SHIFT));
3843 if (adapter->hw.mac.type >= e1000_82571)
3844 tctl |= E1000_TCTL_MULR;
3846 /* This write will effectively turn on the transmit unit. */
3847 E1000_WRITE_REG(&adapter->hw, E1000_TCTL, tctl);
3849 /* SPT and KBL errata workarounds */
3850 if (hw->mac.type == e1000_pch_spt) {
3852 reg = E1000_READ_REG(hw, E1000_IOSFPC);
3853 reg |= E1000_RCTL_RDMTS_HEX;
3854 E1000_WRITE_REG(hw, E1000_IOSFPC, reg);
3855 /* i218-i219 Specification Update 1.5.4.5 */
3856 reg = E1000_READ_REG(hw, E1000_TARC(0));
3857 reg &= ~E1000_TARC0_CB_MULTIQ_3_REQ;
3858 reg |= E1000_TARC0_CB_MULTIQ_2_REQ;
3859 E1000_WRITE_REG(hw, E1000_TARC(0), reg);
3864 /*********************************************************************
3866 * Free all transmit rings.
3868 **********************************************************************/
3870 em_free_transmit_structures(struct adapter *adapter)
3872 struct tx_ring *txr = adapter->tx_rings;
3874 for (int i = 0; i < adapter->num_queues; i++, txr++) {
3876 em_free_transmit_buffers(txr);
3877 em_dma_free(adapter, &txr->txdma);
3879 EM_TX_LOCK_DESTROY(txr);
3882 free(adapter->tx_rings, M_DEVBUF);
3885 /*********************************************************************
3887 * Free transmit ring related data structures.
3889 **********************************************************************/
3891 em_free_transmit_buffers(struct tx_ring *txr)
3893 struct adapter *adapter = txr->adapter;
3894 struct em_txbuffer *txbuf;
3896 INIT_DEBUGOUT("free_transmit_ring: begin");
3898 if (txr->tx_buffers == NULL)
3901 for (int i = 0; i < adapter->num_tx_desc; i++) {
3902 txbuf = &txr->tx_buffers[i];
3903 if (txbuf->m_head != NULL) {
3904 bus_dmamap_sync(txr->txtag, txbuf->map,
3905 BUS_DMASYNC_POSTWRITE);
3906 bus_dmamap_unload(txr->txtag,
3908 m_freem(txbuf->m_head);
3909 txbuf->m_head = NULL;
3910 if (txbuf->map != NULL) {
3911 bus_dmamap_destroy(txr->txtag,
3915 } else if (txbuf->map != NULL) {
3916 bus_dmamap_unload(txr->txtag,
3918 bus_dmamap_destroy(txr->txtag,
3923 #if __FreeBSD_version >= 800000
3924 if (txr->br != NULL)
3925 buf_ring_free(txr->br, M_DEVBUF);
3927 if (txr->tx_buffers != NULL) {
3928 free(txr->tx_buffers, M_DEVBUF);
3929 txr->tx_buffers = NULL;
3931 if (txr->txtag != NULL) {
3932 bus_dma_tag_destroy(txr->txtag);
3939 /*********************************************************************
3940 * The offload context is protocol specific (TCP/UDP) and thus
3941 * only needs to be set when the protocol changes. The occasion
3942 * of a context change can be a performance detriment, and
3943 * might be better just disabled. The reason arises in the way
3944 * in which the controller supports pipelined requests from the
3945 * Tx data DMA. Up to four requests can be pipelined, and they may
3946 * belong to the same packet or to multiple packets. However all
3947 * requests for one packet are issued before a request is issued
3948 * for a subsequent packet and if a request for the next packet
3949 * requires a context change, that request will be stalled
3950 * until the previous request completes. This means setting up
3951 * a new context effectively disables pipelined Tx data DMA which
3952 * in turn greatly slow down performance to send small sized
3954 **********************************************************************/
3956 em_transmit_checksum_setup(struct tx_ring *txr, struct mbuf *mp, int ip_off,
3957 struct ip *ip, u32 *txd_upper, u32 *txd_lower)
3959 struct adapter *adapter = txr->adapter;
3960 struct e1000_context_desc *TXD = NULL;
3961 struct em_txbuffer *tx_buffer;
3965 u8 ipcso, ipcss, tucso, tucss;
3967 ipcss = ipcso = tucss = tucso = 0;
3968 hdr_len = ip_off + (ip->ip_hl << 2);
3969 cur = txr->next_avail_desc;
3971 /* Setup of IP header checksum. */
3972 if (mp->m_pkthdr.csum_flags & CSUM_IP) {
3973 *txd_upper |= E1000_TXD_POPTS_IXSM << 8;
3976 ipcso = ip_off + offsetof(struct ip, ip_sum);
3978 * Start offset for header checksum calculation.
3979 * End offset for header checksum calculation.
3980 * Offset of place to put the checksum.
3982 TXD = (struct e1000_context_desc *)&txr->tx_base[cur];
3983 TXD->lower_setup.ip_fields.ipcss = ipcss;
3984 TXD->lower_setup.ip_fields.ipcse = htole16(hdr_len);
3985 TXD->lower_setup.ip_fields.ipcso = ipcso;
3986 cmd |= E1000_TXD_CMD_IP;
3989 if (mp->m_pkthdr.csum_flags & CSUM_TCP) {
3990 *txd_lower = E1000_TXD_CMD_DEXT | E1000_TXD_DTYP_D;
3991 *txd_upper |= E1000_TXD_POPTS_TXSM << 8;
3992 offload |= CSUM_TCP;
3994 tucso = hdr_len + offsetof(struct tcphdr, th_sum);
3996 * The 82574L can only remember the *last* context used
3997 * regardless of queue that it was use for. We cannot reuse
3998 * contexts on this hardware platform and must generate a new
3999 * context every time. 82574L hardware spec, section 7.2.6,
4002 if (adapter->num_queues < 2) {
4004 * Setting up new checksum offload context for every
4005 * frames takes a lot of processing time for hardware.
4006 * This also reduces performance a lot for small sized
4007 * frames so avoid it if driver can use previously
4008 * configured checksum offload context.
4010 if (txr->last_hw_offload == offload) {
4011 if (offload & CSUM_IP) {
4012 if (txr->last_hw_ipcss == ipcss &&
4013 txr->last_hw_ipcso == ipcso &&
4014 txr->last_hw_tucss == tucss &&
4015 txr->last_hw_tucso == tucso)
4018 if (txr->last_hw_tucss == tucss &&
4019 txr->last_hw_tucso == tucso)
4023 txr->last_hw_offload = offload;
4024 txr->last_hw_tucss = tucss;
4025 txr->last_hw_tucso = tucso;
4028 * Start offset for payload checksum calculation.
4029 * End offset for payload checksum calculation.
4030 * Offset of place to put the checksum.
4032 TXD = (struct e1000_context_desc *)&txr->tx_base[cur];
4033 TXD->upper_setup.tcp_fields.tucss = hdr_len;
4034 TXD->upper_setup.tcp_fields.tucse = htole16(0);
4035 TXD->upper_setup.tcp_fields.tucso = tucso;
4036 cmd |= E1000_TXD_CMD_TCP;
4037 } else if (mp->m_pkthdr.csum_flags & CSUM_UDP) {
4038 *txd_lower = E1000_TXD_CMD_DEXT | E1000_TXD_DTYP_D;
4039 *txd_upper |= E1000_TXD_POPTS_TXSM << 8;
4041 tucso = hdr_len + offsetof(struct udphdr, uh_sum);
4043 * The 82574L can only remember the *last* context used
4044 * regardless of queue that it was use for. We cannot reuse
4045 * contexts on this hardware platform and must generate a new
4046 * context every time. 82574L hardware spec, section 7.2.6,
4049 if (adapter->num_queues < 2) {
4051 * Setting up new checksum offload context for every
4052 * frames takes a lot of processing time for hardware.
4053 * This also reduces performance a lot for small sized
4054 * frames so avoid it if driver can use previously
4055 * configured checksum offload context.
4057 if (txr->last_hw_offload == offload) {
4058 if (offload & CSUM_IP) {
4059 if (txr->last_hw_ipcss == ipcss &&
4060 txr->last_hw_ipcso == ipcso &&
4061 txr->last_hw_tucss == tucss &&
4062 txr->last_hw_tucso == tucso)
4065 if (txr->last_hw_tucss == tucss &&
4066 txr->last_hw_tucso == tucso)
4070 txr->last_hw_offload = offload;
4071 txr->last_hw_tucss = tucss;
4072 txr->last_hw_tucso = tucso;
4075 * Start offset for header checksum calculation.
4076 * End offset for header checksum calculation.
4077 * Offset of place to put the checksum.
4079 TXD = (struct e1000_context_desc *)&txr->tx_base[cur];
4080 TXD->upper_setup.tcp_fields.tucss = tucss;
4081 TXD->upper_setup.tcp_fields.tucse = htole16(0);
4082 TXD->upper_setup.tcp_fields.tucso = tucso;
4085 if (offload & CSUM_IP) {
4086 txr->last_hw_ipcss = ipcss;
4087 txr->last_hw_ipcso = ipcso;
4090 TXD->tcp_seg_setup.data = htole32(0);
4091 TXD->cmd_and_length =
4092 htole32(adapter->txd_cmd | E1000_TXD_CMD_DEXT | cmd);
4093 tx_buffer = &txr->tx_buffers[cur];
4094 tx_buffer->m_head = NULL;
4095 tx_buffer->next_eop = -1;
4097 if (++cur == adapter->num_tx_desc)
4101 txr->next_avail_desc = cur;
4105 /**********************************************************************
4107 * Setup work for hardware segmentation offload (TSO)
4109 **********************************************************************/
4111 em_tso_setup(struct tx_ring *txr, struct mbuf *mp, int ip_off,
4112 struct ip *ip, struct tcphdr *tp, u32 *txd_upper, u32 *txd_lower)
4114 struct adapter *adapter = txr->adapter;
4115 struct e1000_context_desc *TXD;
4116 struct em_txbuffer *tx_buffer;
4120 * In theory we can use the same TSO context if and only if
4121 * frame is the same type(IP/TCP) and the same MSS. However
4122 * checking whether a frame has the same IP/TCP structure is
4123 * hard thing so just ignore that and always restablish a
4126 hdr_len = ip_off + (ip->ip_hl << 2) + (tp->th_off << 2);
4127 *txd_lower = (E1000_TXD_CMD_DEXT | /* Extended descr type */
4128 E1000_TXD_DTYP_D | /* Data descr type */
4129 E1000_TXD_CMD_TSE); /* Do TSE on this packet */
4131 /* IP and/or TCP header checksum calculation and insertion. */
4132 *txd_upper = (E1000_TXD_POPTS_IXSM | E1000_TXD_POPTS_TXSM) << 8;
4134 cur = txr->next_avail_desc;
4135 tx_buffer = &txr->tx_buffers[cur];
4136 TXD = (struct e1000_context_desc *) &txr->tx_base[cur];
4139 * Start offset for header checksum calculation.
4140 * End offset for header checksum calculation.
4141 * Offset of place put the checksum.
4143 TXD->lower_setup.ip_fields.ipcss = ip_off;
4144 TXD->lower_setup.ip_fields.ipcse =
4145 htole16(ip_off + (ip->ip_hl << 2) - 1);
4146 TXD->lower_setup.ip_fields.ipcso = ip_off + offsetof(struct ip, ip_sum);
4148 * Start offset for payload checksum calculation.
4149 * End offset for payload checksum calculation.
4150 * Offset of place to put the checksum.
4152 TXD->upper_setup.tcp_fields.tucss = ip_off + (ip->ip_hl << 2);
4153 TXD->upper_setup.tcp_fields.tucse = 0;
4154 TXD->upper_setup.tcp_fields.tucso =
4155 ip_off + (ip->ip_hl << 2) + offsetof(struct tcphdr, th_sum);
4157 * Payload size per packet w/o any headers.
4158 * Length of all headers up to payload.
4160 TXD->tcp_seg_setup.fields.mss = htole16(mp->m_pkthdr.tso_segsz);
4161 TXD->tcp_seg_setup.fields.hdr_len = hdr_len;
4163 TXD->cmd_and_length = htole32(adapter->txd_cmd |
4164 E1000_TXD_CMD_DEXT | /* Extended descr */
4165 E1000_TXD_CMD_TSE | /* TSE context */
4166 E1000_TXD_CMD_IP | /* Do IP csum */
4167 E1000_TXD_CMD_TCP | /* Do TCP checksum */
4168 (mp->m_pkthdr.len - (hdr_len))); /* Total len */
4170 tx_buffer->m_head = NULL;
4171 tx_buffer->next_eop = -1;
4173 if (++cur == adapter->num_tx_desc)
4177 txr->next_avail_desc = cur;
4182 /**********************************************************************
4184 * Examine each tx_buffer in the used queue. If the hardware is done
4185 * processing the packet then free associated resources. The
4186 * tx_buffer is put back on the free queue.
4188 **********************************************************************/
4190 em_txeof(struct tx_ring *txr)
4192 struct adapter *adapter = txr->adapter;
4193 int first, last, done, processed;
4194 struct em_txbuffer *tx_buffer;
4195 struct e1000_tx_desc *tx_desc, *eop_desc;
4196 struct ifnet *ifp = adapter->ifp;
4198 EM_TX_LOCK_ASSERT(txr);
4200 if (netmap_tx_irq(ifp, txr->me))
4202 #endif /* DEV_NETMAP */
4204 /* No work, make sure hang detection is disabled */
4205 if (txr->tx_avail == adapter->num_tx_desc) {
4206 txr->busy = EM_TX_IDLE;
4211 first = txr->next_to_clean;
4212 tx_desc = &txr->tx_base[first];
4213 tx_buffer = &txr->tx_buffers[first];
4214 last = tx_buffer->next_eop;
4215 eop_desc = &txr->tx_base[last];
4218 * What this does is get the index of the
4219 * first descriptor AFTER the EOP of the
4220 * first packet, that way we can do the
4221 * simple comparison on the inner while loop.
4223 if (++last == adapter->num_tx_desc)
4227 bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
4228 BUS_DMASYNC_POSTREAD);
4230 while (eop_desc->upper.fields.status & E1000_TXD_STAT_DD) {
4231 /* We clean the range of the packet */
4232 while (first != done) {
4233 tx_desc->upper.data = 0;
4234 tx_desc->lower.data = 0;
4235 tx_desc->buffer_addr = 0;
4239 if (tx_buffer->m_head) {
4240 bus_dmamap_sync(txr->txtag,
4242 BUS_DMASYNC_POSTWRITE);
4243 bus_dmamap_unload(txr->txtag,
4245 m_freem(tx_buffer->m_head);
4246 tx_buffer->m_head = NULL;
4248 tx_buffer->next_eop = -1;
4250 if (++first == adapter->num_tx_desc)
4253 tx_buffer = &txr->tx_buffers[first];
4254 tx_desc = &txr->tx_base[first];
4257 /* See if we can continue to the next packet */
4258 last = tx_buffer->next_eop;
4260 eop_desc = &txr->tx_base[last];
4261 /* Get new done point */
4262 if (++last == adapter->num_tx_desc) last = 0;
4267 bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
4268 BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
4270 txr->next_to_clean = first;
4273 ** Hang detection: we know there's work outstanding
4274 ** or the entry return would have been taken, so no
4275 ** descriptor processed here indicates a potential hang.
4276 ** The local timer will examine this and do a reset if needed.
4278 if (processed == 0) {
4279 if (txr->busy != EM_TX_HUNG)
4281 } else /* At least one descriptor was cleaned */
4282 txr->busy = EM_TX_BUSY; /* note this clears HUNG */
4285 * If we have a minimum free, clear IFF_DRV_OACTIVE
4286 * to tell the stack that it is OK to send packets.
4287 * Notice that all writes of OACTIVE happen under the
4288 * TX lock which, with a single queue, guarantees
4291 if (txr->tx_avail >= EM_MAX_SCATTER) {
4292 ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
4295 /* Disable hang detection if all clean */
4296 if (txr->tx_avail == adapter->num_tx_desc)
4297 txr->busy = EM_TX_IDLE;
4300 /*********************************************************************
4302 * Refresh RX descriptor mbufs from system mbuf buffer pool.
4304 **********************************************************************/
4306 em_refresh_mbufs(struct rx_ring *rxr, int limit)
4308 struct adapter *adapter = rxr->adapter;
4310 bus_dma_segment_t segs;
4311 struct em_rxbuffer *rxbuf;
4312 int i, j, error, nsegs;
4313 bool cleaned = FALSE;
4315 i = j = rxr->next_to_refresh;
4317 ** Get one descriptor beyond
4318 ** our work mark to control
4321 if (++j == adapter->num_rx_desc)
4324 while (j != limit) {
4325 rxbuf = &rxr->rx_buffers[i];
4326 if (rxbuf->m_head == NULL) {
4327 m = m_getjcl(M_NOWAIT, MT_DATA,
4328 M_PKTHDR, adapter->rx_mbuf_sz);
4330 ** If we have a temporary resource shortage
4331 ** that causes a failure, just abort refresh
4332 ** for now, we will return to this point when
4333 ** reinvoked from em_rxeof.
4340 m->m_len = m->m_pkthdr.len = adapter->rx_mbuf_sz;
4341 m->m_flags |= M_PKTHDR;
4342 m->m_data = m->m_ext.ext_buf;
4344 /* Use bus_dma machinery to setup the memory mapping */
4345 error = bus_dmamap_load_mbuf_sg(rxr->rxtag, rxbuf->map,
4346 m, &segs, &nsegs, BUS_DMA_NOWAIT);
4348 printf("Refresh mbufs: hdr dmamap load"
4349 " failure - %d\n", error);
4351 rxbuf->m_head = NULL;
4355 rxbuf->paddr = segs.ds_addr;
4356 bus_dmamap_sync(rxr->rxtag,
4357 rxbuf->map, BUS_DMASYNC_PREREAD);
4358 em_setup_rxdesc(&rxr->rx_base[i], rxbuf);
4361 i = j; /* Next is precalulated for us */
4362 rxr->next_to_refresh = i;
4363 /* Calculate next controlling index */
4364 if (++j == adapter->num_rx_desc)
4369 ** Update the tail pointer only if,
4370 ** and as far as we have refreshed.
4373 E1000_WRITE_REG(&adapter->hw,
4374 E1000_RDT(rxr->me), rxr->next_to_refresh);
4380 /*********************************************************************
4382 * Allocate memory for rx_buffer structures. Since we use one
4383 * rx_buffer per received packet, the maximum number of rx_buffer's
4384 * that we'll need is equal to the number of receive descriptors
4385 * that we've allocated.
4387 **********************************************************************/
4389 em_allocate_receive_buffers(struct rx_ring *rxr)
4391 struct adapter *adapter = rxr->adapter;
4392 device_t dev = adapter->dev;
4393 struct em_rxbuffer *rxbuf;
4396 rxr->rx_buffers = malloc(sizeof(struct em_rxbuffer) *
4397 adapter->num_rx_desc, M_DEVBUF, M_NOWAIT | M_ZERO);
4398 if (rxr->rx_buffers == NULL) {
4399 device_printf(dev, "Unable to allocate rx_buffer memory\n");
4403 error = bus_dma_tag_create(bus_get_dma_tag(dev), /* parent */
4404 1, 0, /* alignment, bounds */
4405 BUS_SPACE_MAXADDR, /* lowaddr */
4406 BUS_SPACE_MAXADDR, /* highaddr */
4407 NULL, NULL, /* filter, filterarg */
4408 MJUM9BYTES, /* maxsize */
4410 MJUM9BYTES, /* maxsegsize */
4412 NULL, /* lockfunc */
4416 device_printf(dev, "%s: bus_dma_tag_create failed %d\n",
4421 rxbuf = rxr->rx_buffers;
4422 for (int i = 0; i < adapter->num_rx_desc; i++, rxbuf++) {
4423 rxbuf = &rxr->rx_buffers[i];
4424 error = bus_dmamap_create(rxr->rxtag, 0, &rxbuf->map);
4426 device_printf(dev, "%s: bus_dmamap_create failed: %d\n",
4435 em_free_receive_structures(adapter);
4440 /*********************************************************************
4442 * Initialize a receive ring and its buffers.
4444 **********************************************************************/
4446 em_setup_receive_ring(struct rx_ring *rxr)
4448 struct adapter *adapter = rxr->adapter;
4449 struct em_rxbuffer *rxbuf;
4450 bus_dma_segment_t seg[1];
4451 int rsize, nsegs, error = 0;
4453 struct netmap_adapter *na = NA(adapter->ifp);
4454 struct netmap_slot *slot;
4458 /* Clear the ring contents */
4460 rsize = roundup2(adapter->num_rx_desc *
4461 sizeof(union e1000_rx_desc_extended), EM_DBA_ALIGN);
4462 bzero((void *)rxr->rx_base, rsize);
4464 slot = netmap_reset(na, NR_RX, 0, 0);
4468 ** Free current RX buffer structs and their mbufs
4470 for (int i = 0; i < adapter->num_rx_desc; i++) {
4471 rxbuf = &rxr->rx_buffers[i];
4472 if (rxbuf->m_head != NULL) {
4473 bus_dmamap_sync(rxr->rxtag, rxbuf->map,
4474 BUS_DMASYNC_POSTREAD);
4475 bus_dmamap_unload(rxr->rxtag, rxbuf->map);
4476 m_freem(rxbuf->m_head);
4477 rxbuf->m_head = NULL; /* mark as freed */
4481 /* Now replenish the mbufs */
4482 for (int j = 0; j != adapter->num_rx_desc; ++j) {
4483 rxbuf = &rxr->rx_buffers[j];
4486 int si = netmap_idx_n2k(&na->rx_rings[rxr->me], j);
4490 addr = PNMB(na, slot + si, &paddr);
4491 netmap_load_map(na, rxr->rxtag, rxbuf->map, addr);
4492 rxbuf->paddr = paddr;
4493 em_setup_rxdesc(&rxr->rx_base[j], rxbuf);
4496 #endif /* DEV_NETMAP */
4497 rxbuf->m_head = m_getjcl(M_NOWAIT, MT_DATA,
4498 M_PKTHDR, adapter->rx_mbuf_sz);
4499 if (rxbuf->m_head == NULL) {
4503 rxbuf->m_head->m_len = adapter->rx_mbuf_sz;
4504 rxbuf->m_head->m_flags &= ~M_HASFCS; /* we strip it */
4505 rxbuf->m_head->m_pkthdr.len = adapter->rx_mbuf_sz;
4507 /* Get the memory mapping */
4508 error = bus_dmamap_load_mbuf_sg(rxr->rxtag,
4509 rxbuf->map, rxbuf->m_head, seg,
4510 &nsegs, BUS_DMA_NOWAIT);
4512 m_freem(rxbuf->m_head);
4513 rxbuf->m_head = NULL;
4516 bus_dmamap_sync(rxr->rxtag,
4517 rxbuf->map, BUS_DMASYNC_PREREAD);
4519 rxbuf->paddr = seg[0].ds_addr;
4520 em_setup_rxdesc(&rxr->rx_base[j], rxbuf);
4522 rxr->next_to_check = 0;
4523 rxr->next_to_refresh = 0;
4524 bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
4525 BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
4532 /*********************************************************************
4534 * Initialize all receive rings.
4536 **********************************************************************/
4538 em_setup_receive_structures(struct adapter *adapter)
4540 struct rx_ring *rxr = adapter->rx_rings;
4543 for (q = 0; q < adapter->num_queues; q++, rxr++)
4544 if (em_setup_receive_ring(rxr))
4550 * Free RX buffers allocated so far, we will only handle
4551 * the rings that completed, the failing case will have
4552 * cleaned up for itself. 'q' failed, so its the terminus.
4554 for (int i = 0; i < q; ++i) {
4555 rxr = &adapter->rx_rings[i];
4556 for (int n = 0; n < adapter->num_rx_desc; n++) {
4557 struct em_rxbuffer *rxbuf;
4558 rxbuf = &rxr->rx_buffers[n];
4559 if (rxbuf->m_head != NULL) {
4560 bus_dmamap_sync(rxr->rxtag, rxbuf->map,
4561 BUS_DMASYNC_POSTREAD);
4562 bus_dmamap_unload(rxr->rxtag, rxbuf->map);
4563 m_freem(rxbuf->m_head);
4564 rxbuf->m_head = NULL;
4567 rxr->next_to_check = 0;
4568 rxr->next_to_refresh = 0;
4574 /*********************************************************************
4576 * Free all receive rings.
4578 **********************************************************************/
4580 em_free_receive_structures(struct adapter *adapter)
4582 struct rx_ring *rxr = adapter->rx_rings;
4584 for (int i = 0; i < adapter->num_queues; i++, rxr++) {
4585 em_free_receive_buffers(rxr);
4586 /* Free the ring memory as well */
4587 em_dma_free(adapter, &rxr->rxdma);
4588 EM_RX_LOCK_DESTROY(rxr);
4591 free(adapter->rx_rings, M_DEVBUF);
4595 /*********************************************************************
4597 * Free receive ring data structures
4599 **********************************************************************/
4601 em_free_receive_buffers(struct rx_ring *rxr)
4603 struct adapter *adapter = rxr->adapter;
4604 struct em_rxbuffer *rxbuf = NULL;
4606 INIT_DEBUGOUT("free_receive_buffers: begin");
4608 if (rxr->rx_buffers != NULL) {
4609 for (int i = 0; i < adapter->num_rx_desc; i++) {
4610 rxbuf = &rxr->rx_buffers[i];
4611 if (rxbuf->map != NULL) {
4612 bus_dmamap_sync(rxr->rxtag, rxbuf->map,
4613 BUS_DMASYNC_POSTREAD);
4614 bus_dmamap_unload(rxr->rxtag, rxbuf->map);
4615 bus_dmamap_destroy(rxr->rxtag, rxbuf->map);
4617 if (rxbuf->m_head != NULL) {
4618 m_freem(rxbuf->m_head);
4619 rxbuf->m_head = NULL;
4622 free(rxr->rx_buffers, M_DEVBUF);
4623 rxr->rx_buffers = NULL;
4624 rxr->next_to_check = 0;
4625 rxr->next_to_refresh = 0;
4628 if (rxr->rxtag != NULL) {
4629 bus_dma_tag_destroy(rxr->rxtag);
4637 /*********************************************************************
4639 * Enable receive unit.
4641 **********************************************************************/
4644 em_initialize_receive_unit(struct adapter *adapter)
4646 struct rx_ring *rxr = adapter->rx_rings;
4647 struct ifnet *ifp = adapter->ifp;
4648 struct e1000_hw *hw = &adapter->hw;
4649 u32 rctl, rxcsum, rfctl;
4651 INIT_DEBUGOUT("em_initialize_receive_units: begin");
4654 * Make sure receives are disabled while setting
4655 * up the descriptor ring
4657 rctl = E1000_READ_REG(hw, E1000_RCTL);
4658 /* Do not disable if ever enabled on this hardware */
4659 if ((hw->mac.type != e1000_82574) && (hw->mac.type != e1000_82583))
4660 E1000_WRITE_REG(hw, E1000_RCTL, rctl & ~E1000_RCTL_EN);
4662 /* Setup the Receive Control Register */
4663 rctl &= ~(3 << E1000_RCTL_MO_SHIFT);
4664 rctl |= E1000_RCTL_EN | E1000_RCTL_BAM |
4665 E1000_RCTL_LBM_NO | E1000_RCTL_RDMTS_HALF |
4666 (hw->mac.mc_filter_type << E1000_RCTL_MO_SHIFT);
4668 /* Do not store bad packets */
4669 rctl &= ~E1000_RCTL_SBP;
4671 /* Enable Long Packet receive */
4672 if (ifp->if_mtu > ETHERMTU)
4673 rctl |= E1000_RCTL_LPE;
4675 rctl &= ~E1000_RCTL_LPE;
4678 if (!em_disable_crc_stripping)
4679 rctl |= E1000_RCTL_SECRC;
4681 E1000_WRITE_REG(&adapter->hw, E1000_RADV,
4682 adapter->rx_abs_int_delay.value);
4684 E1000_WRITE_REG(&adapter->hw, E1000_RDTR,
4685 adapter->rx_int_delay.value);
4687 * Set the interrupt throttling rate. Value is calculated
4688 * as DEFAULT_ITR = 1/(MAX_INTS_PER_SEC * 256ns)
4690 E1000_WRITE_REG(hw, E1000_ITR, DEFAULT_ITR);
4692 /* Use extended rx descriptor formats */
4693 rfctl = E1000_READ_REG(hw, E1000_RFCTL);
4694 rfctl |= E1000_RFCTL_EXTEN;
4696 ** When using MSIX interrupts we need to throttle
4697 ** using the EITR register (82574 only)
4699 if (hw->mac.type == e1000_82574) {
4700 for (int i = 0; i < 4; i++)
4701 E1000_WRITE_REG(hw, E1000_EITR_82574(i),
4703 /* Disable accelerated acknowledge */
4704 rfctl |= E1000_RFCTL_ACK_DIS;
4706 E1000_WRITE_REG(hw, E1000_RFCTL, rfctl);
4708 rxcsum = E1000_READ_REG(hw, E1000_RXCSUM);
4709 if (ifp->if_capenable & IFCAP_RXCSUM) {
4710 #ifdef EM_MULTIQUEUE
4711 rxcsum |= E1000_RXCSUM_TUOFL |
4712 E1000_RXCSUM_IPOFL |
4715 rxcsum |= E1000_RXCSUM_TUOFL;
4718 rxcsum &= ~E1000_RXCSUM_TUOFL;
4720 E1000_WRITE_REG(hw, E1000_RXCSUM, rxcsum);
4722 #ifdef EM_MULTIQUEUE
4723 #define RSSKEYLEN 10
4724 if (adapter->num_queues > 1) {
4725 uint8_t rss_key[4 * RSSKEYLEN];
4732 arc4rand(rss_key, sizeof(rss_key), 0);
4733 for (i = 0; i < RSSKEYLEN; ++i) {
4736 rssrk = EM_RSSRK_VAL(rss_key, i);
4737 E1000_WRITE_REG(hw,E1000_RSSRK(i), rssrk);
4741 * Configure RSS redirect table in following fashion:
4742 * (hash & ring_cnt_mask) == rdr_table[(hash & rdr_table_mask)]
4744 for (i = 0; i < sizeof(reta); ++i) {
4747 q = (i % adapter->num_queues) << 7;
4748 reta |= q << (8 * i);
4751 for (i = 0; i < 32; ++i) {
4752 E1000_WRITE_REG(hw, E1000_RETA(i), reta);
4755 E1000_WRITE_REG(hw, E1000_MRQC, E1000_MRQC_RSS_ENABLE_2Q |
4756 E1000_MRQC_RSS_FIELD_IPV4_TCP |
4757 E1000_MRQC_RSS_FIELD_IPV4 |
4758 E1000_MRQC_RSS_FIELD_IPV6_TCP_EX |
4759 E1000_MRQC_RSS_FIELD_IPV6_EX |
4760 E1000_MRQC_RSS_FIELD_IPV6);
4764 ** XXX TEMPORARY WORKAROUND: on some systems with 82573
4765 ** long latencies are observed, like Lenovo X60. This
4766 ** change eliminates the problem, but since having positive
4767 ** values in RDTR is a known source of problems on other
4768 ** platforms another solution is being sought.
4770 if (hw->mac.type == e1000_82573)
4771 E1000_WRITE_REG(hw, E1000_RDTR, 0x20);
4773 for (int i = 0; i < adapter->num_queues; i++, rxr++) {
4774 /* Setup the Base and Length of the Rx Descriptor Ring */
4775 u64 bus_addr = rxr->rxdma.dma_paddr;
4776 u32 rdt = adapter->num_rx_desc - 1; /* default */
4778 E1000_WRITE_REG(hw, E1000_RDLEN(i),
4779 adapter->num_rx_desc * sizeof(union e1000_rx_desc_extended));
4780 E1000_WRITE_REG(hw, E1000_RDBAH(i), (u32)(bus_addr >> 32));
4781 E1000_WRITE_REG(hw, E1000_RDBAL(i), (u32)bus_addr);
4782 /* Setup the Head and Tail Descriptor Pointers */
4783 E1000_WRITE_REG(hw, E1000_RDH(i), 0);
4786 * an init() while a netmap client is active must
4787 * preserve the rx buffers passed to userspace.
4789 if (ifp->if_capenable & IFCAP_NETMAP)
4790 rdt -= nm_kr_rxspace(&NA(adapter->ifp)->rx_rings[i]);
4791 #endif /* DEV_NETMAP */
4792 E1000_WRITE_REG(hw, E1000_RDT(i), rdt);
4796 * Set PTHRESH for improved jumbo performance
4797 * According to 10.2.5.11 of Intel 82574 Datasheet,
4798 * RXDCTL(1) is written whenever RXDCTL(0) is written.
4799 * Only write to RXDCTL(1) if there is a need for different
4802 if (((adapter->hw.mac.type == e1000_ich9lan) ||
4803 (adapter->hw.mac.type == e1000_pch2lan) ||
4804 (adapter->hw.mac.type == e1000_ich10lan)) &&
4805 (ifp->if_mtu > ETHERMTU)) {
4806 u32 rxdctl = E1000_READ_REG(hw, E1000_RXDCTL(0));
4807 E1000_WRITE_REG(hw, E1000_RXDCTL(0), rxdctl | 3);
4808 } else if (adapter->hw.mac.type == e1000_82574) {
4809 for (int i = 0; i < adapter->num_queues; i++) {
4810 u32 rxdctl = E1000_READ_REG(hw, E1000_RXDCTL(i));
4812 rxdctl |= 0x20; /* PTHRESH */
4813 rxdctl |= 4 << 8; /* HTHRESH */
4814 rxdctl |= 4 << 16;/* WTHRESH */
4815 rxdctl |= 1 << 24; /* Switch to granularity */
4816 E1000_WRITE_REG(hw, E1000_RXDCTL(i), rxdctl);
4820 if (adapter->hw.mac.type >= e1000_pch2lan) {
4821 if (ifp->if_mtu > ETHERMTU)
4822 e1000_lv_jumbo_workaround_ich8lan(hw, TRUE);
4824 e1000_lv_jumbo_workaround_ich8lan(hw, FALSE);
4827 /* Make sure VLAN Filters are off */
4828 rctl &= ~E1000_RCTL_VFE;
4830 if (adapter->rx_mbuf_sz == MCLBYTES)
4831 rctl |= E1000_RCTL_SZ_2048;
4832 else if (adapter->rx_mbuf_sz == MJUMPAGESIZE)
4833 rctl |= E1000_RCTL_SZ_4096 | E1000_RCTL_BSEX;
4834 else if (adapter->rx_mbuf_sz > MJUMPAGESIZE)
4835 rctl |= E1000_RCTL_SZ_8192 | E1000_RCTL_BSEX;
4837 /* ensure we clear use DTYPE of 00 here */
4838 rctl &= ~0x00000C00;
4839 /* Write out the settings */
4840 E1000_WRITE_REG(hw, E1000_RCTL, rctl);
4846 /*********************************************************************
4848 * This routine executes in interrupt context. It replenishes
4849 * the mbufs in the descriptor and sends data which has been
4850 * dma'ed into host memory to upper layer.
4852 * We loop at most count times if count is > 0, or until done if
4855 * For polling we also now return the number of cleaned packets
4856 *********************************************************************/
4858 em_rxeof(struct rx_ring *rxr, int count, int *done)
4860 struct adapter *adapter = rxr->adapter;
4861 struct ifnet *ifp = adapter->ifp;
4862 struct mbuf *mp, *sendmp;
4865 int i, processed, rxdone = 0;
4867 union e1000_rx_desc_extended *cur;
4872 bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
4873 BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
4877 if (netmap_rx_irq(ifp, rxr->me, &processed)) {
4881 #endif /* DEV_NETMAP */
4883 for (i = rxr->next_to_check, processed = 0; count != 0;) {
4884 if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
4887 cur = &rxr->rx_base[i];
4888 status = le32toh(cur->wb.upper.status_error);
4891 if ((status & E1000_RXD_STAT_DD) == 0)
4894 len = le16toh(cur->wb.upper.length);
4895 eop = (status & E1000_RXD_STAT_EOP) != 0;
4897 if ((status & E1000_RXDEXT_ERR_FRAME_ERR_MASK) ||
4898 (rxr->discard == TRUE)) {
4899 adapter->dropped_pkts++;
4900 ++rxr->rx_discarded;
4901 if (!eop) /* Catch subsequent segs */
4902 rxr->discard = TRUE;
4904 rxr->discard = FALSE;
4905 em_rx_discard(rxr, i);
4908 bus_dmamap_unload(rxr->rxtag, rxr->rx_buffers[i].map);
4910 /* Assign correct length to the current fragment */
4911 mp = rxr->rx_buffers[i].m_head;
4914 /* Trigger for refresh */
4915 rxr->rx_buffers[i].m_head = NULL;
4917 /* First segment? */
4918 if (rxr->fmp == NULL) {
4919 mp->m_pkthdr.len = len;
4920 rxr->fmp = rxr->lmp = mp;
4922 /* Chain mbuf's together */
4923 mp->m_flags &= ~M_PKTHDR;
4924 rxr->lmp->m_next = mp;
4926 rxr->fmp->m_pkthdr.len += len;
4932 sendmp->m_pkthdr.rcvif = ifp;
4934 em_receive_checksum(status, sendmp);
4935 #ifndef __NO_STRICT_ALIGNMENT
4936 if (adapter->hw.mac.max_frame_size >
4937 (MCLBYTES - ETHER_ALIGN) &&
4938 em_fixup_rx(rxr) != 0)
4941 if (status & E1000_RXD_STAT_VP) {
4942 sendmp->m_pkthdr.ether_vtag =
4943 le16toh(cur->wb.upper.vlan);
4944 sendmp->m_flags |= M_VLANTAG;
4946 #ifndef __NO_STRICT_ALIGNMENT
4949 rxr->fmp = rxr->lmp = NULL;
4953 bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
4954 BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
4956 /* Zero out the receive descriptors status. */
4957 cur->wb.upper.status_error &= htole32(~0xFF);
4958 ++rxdone; /* cumulative for POLL */
4961 /* Advance our pointers to the next descriptor. */
4962 if (++i == adapter->num_rx_desc)
4965 /* Send to the stack */
4966 if (sendmp != NULL) {
4967 rxr->next_to_check = i;
4969 (*ifp->if_input)(ifp, sendmp);
4971 i = rxr->next_to_check;
4974 /* Only refresh mbufs every 8 descriptors */
4975 if (processed == 8) {
4976 em_refresh_mbufs(rxr, i);
4981 /* Catch any remaining refresh work */
4982 if (e1000_rx_unrefreshed(rxr))
4983 em_refresh_mbufs(rxr, i);
4985 rxr->next_to_check = i;
4990 return ((status & E1000_RXD_STAT_DD) ? TRUE : FALSE);
4993 static __inline void
4994 em_rx_discard(struct rx_ring *rxr, int i)
4996 struct em_rxbuffer *rbuf;
4998 rbuf = &rxr->rx_buffers[i];
4999 bus_dmamap_unload(rxr->rxtag, rbuf->map);
5001 /* Free any previous pieces */
5002 if (rxr->fmp != NULL) {
5003 rxr->fmp->m_flags |= M_PKTHDR;
5009 ** Free buffer and allow em_refresh_mbufs()
5010 ** to clean up and recharge buffer.
5013 m_free(rbuf->m_head);
5014 rbuf->m_head = NULL;
5019 #ifndef __NO_STRICT_ALIGNMENT
5021 * When jumbo frames are enabled we should realign entire payload on
5022 * architecures with strict alignment. This is serious design mistake of 8254x
5023 * as it nullifies DMA operations. 8254x just allows RX buffer size to be
5024 * 2048/4096/8192/16384. What we really want is 2048 - ETHER_ALIGN to align its
5025 * payload. On architecures without strict alignment restrictions 8254x still
5026 * performs unaligned memory access which would reduce the performance too.
5027 * To avoid copying over an entire frame to align, we allocate a new mbuf and
5028 * copy ethernet header to the new mbuf. The new mbuf is prepended into the
5029 * existing mbuf chain.
5031 * Be aware, best performance of the 8254x is achived only when jumbo frame is
5032 * not used at all on architectures with strict alignment.
5035 em_fixup_rx(struct rx_ring *rxr)
5037 struct adapter *adapter = rxr->adapter;
5043 if (m->m_len <= (MCLBYTES - ETHER_HDR_LEN)) {
5044 bcopy(m->m_data, m->m_data + ETHER_HDR_LEN, m->m_len);
5045 m->m_data += ETHER_HDR_LEN;
5047 MGETHDR(n, M_NOWAIT, MT_DATA);
5049 bcopy(m->m_data, n->m_data, ETHER_HDR_LEN);
5050 m->m_data += ETHER_HDR_LEN;
5051 m->m_len -= ETHER_HDR_LEN;
5052 n->m_len = ETHER_HDR_LEN;
5053 M_MOVE_PKTHDR(n, m);
5057 adapter->dropped_pkts++;
5069 em_setup_rxdesc(union e1000_rx_desc_extended *rxd, const struct em_rxbuffer *rxbuf)
5071 rxd->read.buffer_addr = htole64(rxbuf->paddr);
5072 /* DD bits must be cleared */
5073 rxd->wb.upper.status_error= 0;
5076 /*********************************************************************
5078 * Verify that the hardware indicated that the checksum is valid.
5079 * Inform the stack about the status of checksum so that stack
5080 * doesn't spend time verifying the checksum.
5082 *********************************************************************/
5084 em_receive_checksum(uint32_t status, struct mbuf *mp)
5086 mp->m_pkthdr.csum_flags = 0;
5088 /* Ignore Checksum bit is set */
5089 if (status & E1000_RXD_STAT_IXSM)
5092 /* If the IP checksum exists and there is no IP Checksum error */
5093 if ((status & (E1000_RXD_STAT_IPCS | E1000_RXDEXT_STATERR_IPE)) ==
5094 E1000_RXD_STAT_IPCS) {
5095 mp->m_pkthdr.csum_flags = (CSUM_IP_CHECKED | CSUM_IP_VALID);
5098 /* TCP or UDP checksum */
5099 if ((status & (E1000_RXD_STAT_TCPCS | E1000_RXDEXT_STATERR_TCPE)) ==
5100 E1000_RXD_STAT_TCPCS) {
5101 mp->m_pkthdr.csum_flags |= (CSUM_DATA_VALID | CSUM_PSEUDO_HDR);
5102 mp->m_pkthdr.csum_data = htons(0xffff);
5104 if (status & E1000_RXD_STAT_UDPCS) {
5105 mp->m_pkthdr.csum_flags |= (CSUM_DATA_VALID | CSUM_PSEUDO_HDR);
5106 mp->m_pkthdr.csum_data = htons(0xffff);
5111 * This routine is run via an vlan
5115 em_register_vlan(void *arg, struct ifnet *ifp, u16 vtag)
5117 struct adapter *adapter = ifp->if_softc;
5120 if (ifp->if_softc != arg) /* Not our event */
5123 if ((vtag == 0) || (vtag > 4095)) /* Invalid ID */
5126 EM_CORE_LOCK(adapter);
5127 index = (vtag >> 5) & 0x7F;
5129 adapter->shadow_vfta[index] |= (1 << bit);
5130 ++adapter->num_vlans;
5131 /* Re-init to load the changes */
5132 if (ifp->if_capenable & IFCAP_VLAN_HWFILTER)
5133 em_init_locked(adapter);
5134 EM_CORE_UNLOCK(adapter);
5138 * This routine is run via an vlan
5142 em_unregister_vlan(void *arg, struct ifnet *ifp, u16 vtag)
5144 struct adapter *adapter = ifp->if_softc;
5147 if (ifp->if_softc != arg)
5150 if ((vtag == 0) || (vtag > 4095)) /* Invalid */
5153 EM_CORE_LOCK(adapter);
5154 index = (vtag >> 5) & 0x7F;
5156 adapter->shadow_vfta[index] &= ~(1 << bit);
5157 --adapter->num_vlans;
5158 /* Re-init to load the changes */
5159 if (ifp->if_capenable & IFCAP_VLAN_HWFILTER)
5160 em_init_locked(adapter);
5161 EM_CORE_UNLOCK(adapter);
5165 em_setup_vlan_hw_support(struct adapter *adapter)
5167 struct e1000_hw *hw = &adapter->hw;
5171 ** We get here thru init_locked, meaning
5172 ** a soft reset, this has already cleared
5173 ** the VFTA and other state, so if there
5174 ** have been no vlan's registered do nothing.
5176 if (adapter->num_vlans == 0)
5180 ** A soft reset zero's out the VFTA, so
5181 ** we need to repopulate it now.
5183 for (int i = 0; i < EM_VFTA_SIZE; i++)
5184 if (adapter->shadow_vfta[i] != 0)
5185 E1000_WRITE_REG_ARRAY(hw, E1000_VFTA,
5186 i, adapter->shadow_vfta[i]);
5188 reg = E1000_READ_REG(hw, E1000_CTRL);
5189 reg |= E1000_CTRL_VME;
5190 E1000_WRITE_REG(hw, E1000_CTRL, reg);
5192 /* Enable the Filter Table */
5193 reg = E1000_READ_REG(hw, E1000_RCTL);
5194 reg &= ~E1000_RCTL_CFIEN;
5195 reg |= E1000_RCTL_VFE;
5196 E1000_WRITE_REG(hw, E1000_RCTL, reg);
5200 em_enable_intr(struct adapter *adapter)
5202 struct e1000_hw *hw = &adapter->hw;
5203 u32 ims_mask = IMS_ENABLE_MASK;
5205 if (hw->mac.type == e1000_82574) {
5206 E1000_WRITE_REG(hw, EM_EIAC, EM_MSIX_MASK);
5207 ims_mask |= EM_MSIX_MASK;
5209 E1000_WRITE_REG(hw, E1000_IMS, ims_mask);
5213 em_disable_intr(struct adapter *adapter)
5215 struct e1000_hw *hw = &adapter->hw;
5217 if (hw->mac.type == e1000_82574)
5218 E1000_WRITE_REG(hw, EM_EIAC, 0);
5219 E1000_WRITE_REG(&adapter->hw, E1000_IMC, 0xffffffff);
5223 * Bit of a misnomer, what this really means is
5224 * to enable OS management of the system... aka
5225 * to disable special hardware management features
5228 em_init_manageability(struct adapter *adapter)
5230 /* A shared code workaround */
5231 #define E1000_82542_MANC2H E1000_MANC2H
5232 if (adapter->has_manage) {
5233 int manc2h = E1000_READ_REG(&adapter->hw, E1000_MANC2H);
5234 int manc = E1000_READ_REG(&adapter->hw, E1000_MANC);
5236 /* disable hardware interception of ARP */
5237 manc &= ~(E1000_MANC_ARP_EN);
5239 /* enable receiving management packets to the host */
5240 manc |= E1000_MANC_EN_MNG2HOST;
5241 #define E1000_MNG2HOST_PORT_623 (1 << 5)
5242 #define E1000_MNG2HOST_PORT_664 (1 << 6)
5243 manc2h |= E1000_MNG2HOST_PORT_623;
5244 manc2h |= E1000_MNG2HOST_PORT_664;
5245 E1000_WRITE_REG(&adapter->hw, E1000_MANC2H, manc2h);
5246 E1000_WRITE_REG(&adapter->hw, E1000_MANC, manc);
5251 * Give control back to hardware management
5252 * controller if there is one.
5255 em_release_manageability(struct adapter *adapter)
5257 if (adapter->has_manage) {
5258 int manc = E1000_READ_REG(&adapter->hw, E1000_MANC);
5260 /* re-enable hardware interception of ARP */
5261 manc |= E1000_MANC_ARP_EN;
5262 manc &= ~E1000_MANC_EN_MNG2HOST;
5264 E1000_WRITE_REG(&adapter->hw, E1000_MANC, manc);
5269 * em_get_hw_control sets the {CTRL_EXT|FWSM}:DRV_LOAD bit.
5270 * For ASF and Pass Through versions of f/w this means
5271 * that the driver is loaded. For AMT version type f/w
5272 * this means that the network i/f is open.
5275 em_get_hw_control(struct adapter *adapter)
5279 if (adapter->hw.mac.type == e1000_82573) {
5280 swsm = E1000_READ_REG(&adapter->hw, E1000_SWSM);
5281 E1000_WRITE_REG(&adapter->hw, E1000_SWSM,
5282 swsm | E1000_SWSM_DRV_LOAD);
5286 ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
5287 E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT,
5288 ctrl_ext | E1000_CTRL_EXT_DRV_LOAD);
5293 * em_release_hw_control resets {CTRL_EXT|FWSM}:DRV_LOAD bit.
5294 * For ASF and Pass Through versions of f/w this means that
5295 * the driver is no longer loaded. For AMT versions of the
5296 * f/w this means that the network i/f is closed.
5299 em_release_hw_control(struct adapter *adapter)
5303 if (!adapter->has_manage)
5306 if (adapter->hw.mac.type == e1000_82573) {
5307 swsm = E1000_READ_REG(&adapter->hw, E1000_SWSM);
5308 E1000_WRITE_REG(&adapter->hw, E1000_SWSM,
5309 swsm & ~E1000_SWSM_DRV_LOAD);
5313 ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
5314 E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT,
5315 ctrl_ext & ~E1000_CTRL_EXT_DRV_LOAD);
5320 em_is_valid_ether_addr(u8 *addr)
5322 char zero_addr[6] = { 0, 0, 0, 0, 0, 0 };
5324 if ((addr[0] & 1) || (!bcmp(addr, zero_addr, ETHER_ADDR_LEN))) {
5332 ** Parse the interface capabilities with regard
5333 ** to both system management and wake-on-lan for
5337 em_get_wakeup(device_t dev)
5339 struct adapter *adapter = device_get_softc(dev);
5340 u16 eeprom_data = 0, device_id, apme_mask;
5342 adapter->has_manage = e1000_enable_mng_pass_thru(&adapter->hw);
5343 apme_mask = EM_EEPROM_APME;
5345 switch (adapter->hw.mac.type) {
5348 adapter->has_amt = TRUE;
5352 case e1000_80003es2lan:
5353 if (adapter->hw.bus.func == 1) {
5354 e1000_read_nvm(&adapter->hw,
5355 NVM_INIT_CONTROL3_PORT_B, 1, &eeprom_data);
5358 e1000_read_nvm(&adapter->hw,
5359 NVM_INIT_CONTROL3_PORT_A, 1, &eeprom_data);
5363 case e1000_ich10lan:
5369 apme_mask = E1000_WUC_APME;
5370 adapter->has_amt = TRUE;
5371 eeprom_data = E1000_READ_REG(&adapter->hw, E1000_WUC);
5374 e1000_read_nvm(&adapter->hw,
5375 NVM_INIT_CONTROL3_PORT_A, 1, &eeprom_data);
5378 if (eeprom_data & apme_mask)
5379 adapter->wol = (E1000_WUFC_MAG | E1000_WUFC_MC);
5381 * We have the eeprom settings, now apply the special cases
5382 * where the eeprom may be wrong or the board won't support
5383 * wake on lan on a particular port
5385 device_id = pci_get_device(dev);
5386 switch (device_id) {
5387 case E1000_DEV_ID_82571EB_FIBER:
5388 /* Wake events only supported on port A for dual fiber
5389 * regardless of eeprom setting */
5390 if (E1000_READ_REG(&adapter->hw, E1000_STATUS) &
5391 E1000_STATUS_FUNC_1)
5394 case E1000_DEV_ID_82571EB_QUAD_COPPER:
5395 case E1000_DEV_ID_82571EB_QUAD_FIBER:
5396 case E1000_DEV_ID_82571EB_QUAD_COPPER_LP:
5397 /* if quad port adapter, disable WoL on all but port A */
5398 if (global_quad_port_a != 0)
5400 /* Reset for multiple quad port adapters */
5401 if (++global_quad_port_a == 4)
5402 global_quad_port_a = 0;
5410 * Enable PCI Wake On Lan capability
5413 em_enable_wakeup(device_t dev)
5415 struct adapter *adapter = device_get_softc(dev);
5416 struct ifnet *ifp = adapter->ifp;
5418 u32 pmc, ctrl, ctrl_ext, rctl;
5421 if (pci_find_cap(dev, PCIY_PMG, &pmc) != 0)
5425 ** Determine type of Wakeup: note that wol
5426 ** is set with all bits on by default.
5428 if ((ifp->if_capenable & IFCAP_WOL_MAGIC) == 0)
5429 adapter->wol &= ~E1000_WUFC_MAG;
5431 if ((ifp->if_capenable & IFCAP_WOL_MCAST) == 0)
5432 adapter->wol &= ~E1000_WUFC_MC;
5434 rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
5435 rctl |= E1000_RCTL_MPE;
5436 E1000_WRITE_REG(&adapter->hw, E1000_RCTL, rctl);
5439 if (!(adapter->wol & (E1000_WUFC_EX | E1000_WUFC_MAG | E1000_WUFC_MC)))
5442 /* Advertise the wakeup capability */
5443 ctrl = E1000_READ_REG(&adapter->hw, E1000_CTRL);
5444 ctrl |= (E1000_CTRL_SWDPIN2 | E1000_CTRL_SWDPIN3);
5445 E1000_WRITE_REG(&adapter->hw, E1000_CTRL, ctrl);
5447 /* Keep the laser running on Fiber adapters */
5448 if (adapter->hw.phy.media_type == e1000_media_type_fiber ||
5449 adapter->hw.phy.media_type == e1000_media_type_internal_serdes) {
5450 ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
5451 ctrl_ext |= E1000_CTRL_EXT_SDP3_DATA;
5452 E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT, ctrl_ext);
5455 if ((adapter->hw.mac.type == e1000_ich8lan) ||
5456 (adapter->hw.mac.type == e1000_pchlan) ||
5457 (adapter->hw.mac.type == e1000_ich9lan) ||
5458 (adapter->hw.mac.type == e1000_ich10lan))
5459 e1000_suspend_workarounds_ich8lan(&adapter->hw);
5461 if ((adapter->hw.mac.type == e1000_pchlan) ||
5462 (adapter->hw.mac.type == e1000_pch2lan) ||
5463 (adapter->hw.mac.type == e1000_pch_lpt) ||
5464 (adapter->hw.mac.type == e1000_pch_spt) ||
5465 (adapter->hw.mac.type == e1000_pch_cnp)) {
5466 error = em_enable_phy_wakeup(adapter);
5470 /* Enable wakeup by the MAC */
5471 E1000_WRITE_REG(&adapter->hw, E1000_WUC, E1000_WUC_PME_EN);
5472 E1000_WRITE_REG(&adapter->hw, E1000_WUFC, adapter->wol);
5475 if (adapter->hw.phy.type == e1000_phy_igp_3)
5476 e1000_igp3_phy_powerdown_workaround_ich8lan(&adapter->hw);
5479 status = pci_read_config(dev, pmc + PCIR_POWER_STATUS, 2);
5480 status &= ~(PCIM_PSTAT_PME | PCIM_PSTAT_PMEENABLE);
5481 if (!error && (ifp->if_capenable & IFCAP_WOL))
5482 status |= PCIM_PSTAT_PME | PCIM_PSTAT_PMEENABLE;
5483 pci_write_config(dev, pmc + PCIR_POWER_STATUS, status, 2);
5489 ** WOL in the newer chipset interfaces (pchlan)
5490 ** require thing to be copied into the phy
5493 em_enable_phy_wakeup(struct adapter *adapter)
5495 struct e1000_hw *hw = &adapter->hw;
5499 /* copy MAC RARs to PHY RARs */
5500 e1000_copy_rx_addrs_to_phy_ich8lan(hw);
5502 /* copy MAC MTA to PHY MTA */
5503 for (int i = 0; i < adapter->hw.mac.mta_reg_count; i++) {
5504 mreg = E1000_READ_REG_ARRAY(hw, E1000_MTA, i);
5505 e1000_write_phy_reg(hw, BM_MTA(i), (u16)(mreg & 0xFFFF));
5506 e1000_write_phy_reg(hw, BM_MTA(i) + 1,
5507 (u16)((mreg >> 16) & 0xFFFF));
5510 /* configure PHY Rx Control register */
5511 e1000_read_phy_reg(&adapter->hw, BM_RCTL, &preg);
5512 mreg = E1000_READ_REG(hw, E1000_RCTL);
5513 if (mreg & E1000_RCTL_UPE)
5514 preg |= BM_RCTL_UPE;
5515 if (mreg & E1000_RCTL_MPE)
5516 preg |= BM_RCTL_MPE;
5517 preg &= ~(BM_RCTL_MO_MASK);
5518 if (mreg & E1000_RCTL_MO_3)
5519 preg |= (((mreg & E1000_RCTL_MO_3) >> E1000_RCTL_MO_SHIFT)
5520 << BM_RCTL_MO_SHIFT);
5521 if (mreg & E1000_RCTL_BAM)
5522 preg |= BM_RCTL_BAM;
5523 if (mreg & E1000_RCTL_PMCF)
5524 preg |= BM_RCTL_PMCF;
5525 mreg = E1000_READ_REG(hw, E1000_CTRL);
5526 if (mreg & E1000_CTRL_RFCE)
5527 preg |= BM_RCTL_RFCE;
5528 e1000_write_phy_reg(&adapter->hw, BM_RCTL, preg);
5530 /* enable PHY wakeup in MAC register */
5531 E1000_WRITE_REG(hw, E1000_WUC,
5532 E1000_WUC_PHY_WAKE | E1000_WUC_PME_EN);
5533 E1000_WRITE_REG(hw, E1000_WUFC, adapter->wol);
5535 /* configure and enable PHY wakeup in PHY registers */
5536 e1000_write_phy_reg(&adapter->hw, BM_WUFC, adapter->wol);
5537 e1000_write_phy_reg(&adapter->hw, BM_WUC, E1000_WUC_PME_EN);
5539 /* activate PHY wakeup */
5540 ret = hw->phy.ops.acquire(hw);
5542 printf("Could not acquire PHY\n");
5545 e1000_write_phy_reg_mdic(hw, IGP01E1000_PHY_PAGE_SELECT,
5546 (BM_WUC_ENABLE_PAGE << IGP_PAGE_SHIFT));
5547 ret = e1000_read_phy_reg_mdic(hw, BM_WUC_ENABLE_REG, &preg);
5549 printf("Could not read PHY page 769\n");
5552 preg |= BM_WUC_ENABLE_BIT | BM_WUC_HOST_WU_BIT;
5553 ret = e1000_write_phy_reg_mdic(hw, BM_WUC_ENABLE_REG, preg);
5555 printf("Could not set PHY Host Wakeup bit\n");
5557 hw->phy.ops.release(hw);
5563 em_led_func(void *arg, int onoff)
5565 struct adapter *adapter = arg;
5567 EM_CORE_LOCK(adapter);
5569 e1000_setup_led(&adapter->hw);
5570 e1000_led_on(&adapter->hw);
5572 e1000_led_off(&adapter->hw);
5573 e1000_cleanup_led(&adapter->hw);
5575 EM_CORE_UNLOCK(adapter);
5579 ** Disable the L0S and L1 LINK states
5582 em_disable_aspm(struct adapter *adapter)
5585 u16 link_cap,link_ctrl;
5586 device_t dev = adapter->dev;
5588 switch (adapter->hw.mac.type) {
5596 if (pci_find_cap(dev, PCIY_EXPRESS, &base) != 0)
5598 reg = base + PCIER_LINK_CAP;
5599 link_cap = pci_read_config(dev, reg, 2);
5600 if ((link_cap & PCIEM_LINK_CAP_ASPM) == 0)
5602 reg = base + PCIER_LINK_CTL;
5603 link_ctrl = pci_read_config(dev, reg, 2);
5604 link_ctrl &= ~PCIEM_LINK_CTL_ASPMC;
5605 pci_write_config(dev, reg, link_ctrl, 2);
5609 /**********************************************************************
5611 * Update the board statistics counters.
5613 **********************************************************************/
5615 em_update_stats_counters(struct adapter *adapter)
5619 if(adapter->hw.phy.media_type == e1000_media_type_copper ||
5620 (E1000_READ_REG(&adapter->hw, E1000_STATUS) & E1000_STATUS_LU)) {
5621 adapter->stats.symerrs += E1000_READ_REG(&adapter->hw, E1000_SYMERRS);
5622 adapter->stats.sec += E1000_READ_REG(&adapter->hw, E1000_SEC);
5624 adapter->stats.crcerrs += E1000_READ_REG(&adapter->hw, E1000_CRCERRS);
5625 adapter->stats.mpc += E1000_READ_REG(&adapter->hw, E1000_MPC);
5626 adapter->stats.scc += E1000_READ_REG(&adapter->hw, E1000_SCC);
5627 adapter->stats.ecol += E1000_READ_REG(&adapter->hw, E1000_ECOL);
5629 adapter->stats.mcc += E1000_READ_REG(&adapter->hw, E1000_MCC);
5630 adapter->stats.latecol += E1000_READ_REG(&adapter->hw, E1000_LATECOL);
5631 adapter->stats.colc += E1000_READ_REG(&adapter->hw, E1000_COLC);
5632 adapter->stats.dc += E1000_READ_REG(&adapter->hw, E1000_DC);
5633 adapter->stats.rlec += E1000_READ_REG(&adapter->hw, E1000_RLEC);
5634 adapter->stats.xonrxc += E1000_READ_REG(&adapter->hw, E1000_XONRXC);
5635 adapter->stats.xontxc += E1000_READ_REG(&adapter->hw, E1000_XONTXC);
5636 adapter->stats.xoffrxc += E1000_READ_REG(&adapter->hw, E1000_XOFFRXC);
5637 adapter->stats.xofftxc += E1000_READ_REG(&adapter->hw, E1000_XOFFTXC);
5638 adapter->stats.fcruc += E1000_READ_REG(&adapter->hw, E1000_FCRUC);
5639 adapter->stats.prc64 += E1000_READ_REG(&adapter->hw, E1000_PRC64);
5640 adapter->stats.prc127 += E1000_READ_REG(&adapter->hw, E1000_PRC127);
5641 adapter->stats.prc255 += E1000_READ_REG(&adapter->hw, E1000_PRC255);
5642 adapter->stats.prc511 += E1000_READ_REG(&adapter->hw, E1000_PRC511);
5643 adapter->stats.prc1023 += E1000_READ_REG(&adapter->hw, E1000_PRC1023);
5644 adapter->stats.prc1522 += E1000_READ_REG(&adapter->hw, E1000_PRC1522);
5645 adapter->stats.gprc += E1000_READ_REG(&adapter->hw, E1000_GPRC);
5646 adapter->stats.bprc += E1000_READ_REG(&adapter->hw, E1000_BPRC);
5647 adapter->stats.mprc += E1000_READ_REG(&adapter->hw, E1000_MPRC);
5648 adapter->stats.gptc += E1000_READ_REG(&adapter->hw, E1000_GPTC);
5650 /* For the 64-bit byte counters the low dword must be read first. */
5651 /* Both registers clear on the read of the high dword */
5653 adapter->stats.gorc += E1000_READ_REG(&adapter->hw, E1000_GORCL) +
5654 ((u64)E1000_READ_REG(&adapter->hw, E1000_GORCH) << 32);
5655 adapter->stats.gotc += E1000_READ_REG(&adapter->hw, E1000_GOTCL) +
5656 ((u64)E1000_READ_REG(&adapter->hw, E1000_GOTCH) << 32);
5658 adapter->stats.rnbc += E1000_READ_REG(&adapter->hw, E1000_RNBC);
5659 adapter->stats.ruc += E1000_READ_REG(&adapter->hw, E1000_RUC);
5660 adapter->stats.rfc += E1000_READ_REG(&adapter->hw, E1000_RFC);
5661 adapter->stats.roc += E1000_READ_REG(&adapter->hw, E1000_ROC);
5662 adapter->stats.rjc += E1000_READ_REG(&adapter->hw, E1000_RJC);
5664 adapter->stats.tor += E1000_READ_REG(&adapter->hw, E1000_TORH);
5665 adapter->stats.tot += E1000_READ_REG(&adapter->hw, E1000_TOTH);
5667 adapter->stats.tpr += E1000_READ_REG(&adapter->hw, E1000_TPR);
5668 adapter->stats.tpt += E1000_READ_REG(&adapter->hw, E1000_TPT);
5669 adapter->stats.ptc64 += E1000_READ_REG(&adapter->hw, E1000_PTC64);
5670 adapter->stats.ptc127 += E1000_READ_REG(&adapter->hw, E1000_PTC127);
5671 adapter->stats.ptc255 += E1000_READ_REG(&adapter->hw, E1000_PTC255);
5672 adapter->stats.ptc511 += E1000_READ_REG(&adapter->hw, E1000_PTC511);
5673 adapter->stats.ptc1023 += E1000_READ_REG(&adapter->hw, E1000_PTC1023);
5674 adapter->stats.ptc1522 += E1000_READ_REG(&adapter->hw, E1000_PTC1522);
5675 adapter->stats.mptc += E1000_READ_REG(&adapter->hw, E1000_MPTC);
5676 adapter->stats.bptc += E1000_READ_REG(&adapter->hw, E1000_BPTC);
5678 /* Interrupt Counts */
5680 adapter->stats.iac += E1000_READ_REG(&adapter->hw, E1000_IAC);
5681 adapter->stats.icrxptc += E1000_READ_REG(&adapter->hw, E1000_ICRXPTC);
5682 adapter->stats.icrxatc += E1000_READ_REG(&adapter->hw, E1000_ICRXATC);
5683 adapter->stats.ictxptc += E1000_READ_REG(&adapter->hw, E1000_ICTXPTC);
5684 adapter->stats.ictxatc += E1000_READ_REG(&adapter->hw, E1000_ICTXATC);
5685 adapter->stats.ictxqec += E1000_READ_REG(&adapter->hw, E1000_ICTXQEC);
5686 adapter->stats.ictxqmtc += E1000_READ_REG(&adapter->hw, E1000_ICTXQMTC);
5687 adapter->stats.icrxdmtc += E1000_READ_REG(&adapter->hw, E1000_ICRXDMTC);
5688 adapter->stats.icrxoc += E1000_READ_REG(&adapter->hw, E1000_ICRXOC);
5690 if (adapter->hw.mac.type >= e1000_82543) {
5691 adapter->stats.algnerrc +=
5692 E1000_READ_REG(&adapter->hw, E1000_ALGNERRC);
5693 adapter->stats.rxerrc +=
5694 E1000_READ_REG(&adapter->hw, E1000_RXERRC);
5695 adapter->stats.tncrs +=
5696 E1000_READ_REG(&adapter->hw, E1000_TNCRS);
5697 adapter->stats.cexterr +=
5698 E1000_READ_REG(&adapter->hw, E1000_CEXTERR);
5699 adapter->stats.tsctc +=
5700 E1000_READ_REG(&adapter->hw, E1000_TSCTC);
5701 adapter->stats.tsctfc +=
5702 E1000_READ_REG(&adapter->hw, E1000_TSCTFC);
5706 ifp->if_collisions = adapter->stats.colc;
5709 ifp->if_ierrors = adapter->dropped_pkts + adapter->stats.rxerrc +
5710 adapter->stats.crcerrs + adapter->stats.algnerrc +
5711 adapter->stats.ruc + adapter->stats.roc +
5712 adapter->stats.mpc + adapter->stats.cexterr;
5715 ifp->if_oerrors = adapter->stats.ecol +
5716 adapter->stats.latecol + adapter->watchdog_events;
5719 /* Export a single 32-bit register via a read-only sysctl. */
5721 em_sysctl_reg_handler(SYSCTL_HANDLER_ARGS)
5723 struct adapter *adapter;
5726 adapter = oidp->oid_arg1;
5727 val = E1000_READ_REG(&adapter->hw, oidp->oid_arg2);
5728 return (sysctl_handle_int(oidp, &val, 0, req));
5732 * Add sysctl variables, one per statistic, to the system.
5735 em_add_hw_stats(struct adapter *adapter)
5737 device_t dev = adapter->dev;
5739 struct tx_ring *txr = adapter->tx_rings;
5740 struct rx_ring *rxr = adapter->rx_rings;
5742 struct sysctl_ctx_list *ctx = device_get_sysctl_ctx(dev);
5743 struct sysctl_oid *tree = device_get_sysctl_tree(dev);
5744 struct sysctl_oid_list *child = SYSCTL_CHILDREN(tree);
5745 struct e1000_hw_stats *stats = &adapter->stats;
5747 struct sysctl_oid *stat_node, *queue_node, *int_node;
5748 struct sysctl_oid_list *stat_list, *queue_list, *int_list;
5750 #define QUEUE_NAME_LEN 32
5751 char namebuf[QUEUE_NAME_LEN];
5753 /* Driver Statistics */
5754 SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "dropped",
5755 CTLFLAG_RD, &adapter->dropped_pkts,
5756 "Driver dropped packets");
5757 SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "link_irq",
5758 CTLFLAG_RD, &adapter->link_irq,
5759 "Link MSIX IRQ Handled");
5760 SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "mbuf_defrag_fail",
5761 CTLFLAG_RD, &adapter->mbuf_defrag_failed,
5762 "Defragmenting mbuf chain failed");
5763 SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "tx_dma_fail",
5764 CTLFLAG_RD, &adapter->no_tx_dma_setup,
5765 "Driver tx dma failure in xmit");
5766 SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "rx_overruns",
5767 CTLFLAG_RD, &adapter->rx_overruns,
5769 SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "watchdog_timeouts",
5770 CTLFLAG_RD, &adapter->watchdog_events,
5771 "Watchdog timeouts");
5773 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "device_control",
5774 CTLTYPE_UINT | CTLFLAG_RD, adapter, E1000_CTRL,
5775 em_sysctl_reg_handler, "IU",
5776 "Device Control Register");
5777 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "rx_control",
5778 CTLTYPE_UINT | CTLFLAG_RD, adapter, E1000_RCTL,
5779 em_sysctl_reg_handler, "IU",
5780 "Receiver Control Register");
5781 SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "fc_high_water",
5782 CTLFLAG_RD, &adapter->hw.fc.high_water, 0,
5783 "Flow Control High Watermark");
5784 SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "fc_low_water",
5785 CTLFLAG_RD, &adapter->hw.fc.low_water, 0,
5786 "Flow Control Low Watermark");
5788 for (int i = 0; i < adapter->num_queues; i++, txr++, rxr++) {
5789 snprintf(namebuf, QUEUE_NAME_LEN, "queue_tx_%d", i);
5790 queue_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, namebuf,
5791 CTLFLAG_RD, NULL, "TX Queue Name");
5792 queue_list = SYSCTL_CHILDREN(queue_node);
5794 SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "txd_head",
5795 CTLTYPE_UINT | CTLFLAG_RD, adapter,
5797 em_sysctl_reg_handler, "IU",
5798 "Transmit Descriptor Head");
5799 SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "txd_tail",
5800 CTLTYPE_UINT | CTLFLAG_RD, adapter,
5802 em_sysctl_reg_handler, "IU",
5803 "Transmit Descriptor Tail");
5804 SYSCTL_ADD_ULONG(ctx, queue_list, OID_AUTO, "tx_irq",
5805 CTLFLAG_RD, &txr->tx_irq,
5806 "Queue MSI-X Transmit Interrupts");
5807 SYSCTL_ADD_ULONG(ctx, queue_list, OID_AUTO, "no_desc_avail",
5808 CTLFLAG_RD, &txr->no_desc_avail,
5809 "Queue No Descriptor Available");
5811 snprintf(namebuf, QUEUE_NAME_LEN, "queue_rx_%d", i);
5812 queue_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, namebuf,
5813 CTLFLAG_RD, NULL, "RX Queue Name");
5814 queue_list = SYSCTL_CHILDREN(queue_node);
5816 SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "rxd_head",
5817 CTLTYPE_UINT | CTLFLAG_RD, adapter,
5819 em_sysctl_reg_handler, "IU",
5820 "Receive Descriptor Head");
5821 SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "rxd_tail",
5822 CTLTYPE_UINT | CTLFLAG_RD, adapter,
5824 em_sysctl_reg_handler, "IU",
5825 "Receive Descriptor Tail");
5826 SYSCTL_ADD_ULONG(ctx, queue_list, OID_AUTO, "rx_irq",
5827 CTLFLAG_RD, &rxr->rx_irq,
5828 "Queue MSI-X Receive Interrupts");
5831 /* MAC stats get their own sub node */
5833 stat_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "mac_stats",
5834 CTLFLAG_RD, NULL, "Statistics");
5835 stat_list = SYSCTL_CHILDREN(stat_node);
5837 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "excess_coll",
5838 CTLFLAG_RD, &stats->ecol,
5839 "Excessive collisions");
5840 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "single_coll",
5841 CTLFLAG_RD, &stats->scc,
5842 "Single collisions");
5843 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "multiple_coll",
5844 CTLFLAG_RD, &stats->mcc,
5845 "Multiple collisions");
5846 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "late_coll",
5847 CTLFLAG_RD, &stats->latecol,
5849 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "collision_count",
5850 CTLFLAG_RD, &stats->colc,
5852 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "symbol_errors",
5853 CTLFLAG_RD, &adapter->stats.symerrs,
5855 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "sequence_errors",
5856 CTLFLAG_RD, &adapter->stats.sec,
5858 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "defer_count",
5859 CTLFLAG_RD, &adapter->stats.dc,
5861 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "missed_packets",
5862 CTLFLAG_RD, &adapter->stats.mpc,
5864 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_no_buff",
5865 CTLFLAG_RD, &adapter->stats.rnbc,
5866 "Receive No Buffers");
5867 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_undersize",
5868 CTLFLAG_RD, &adapter->stats.ruc,
5869 "Receive Undersize");
5870 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_fragmented",
5871 CTLFLAG_RD, &adapter->stats.rfc,
5872 "Fragmented Packets Received ");
5873 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_oversize",
5874 CTLFLAG_RD, &adapter->stats.roc,
5875 "Oversized Packets Received");
5876 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_jabber",
5877 CTLFLAG_RD, &adapter->stats.rjc,
5879 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_errs",
5880 CTLFLAG_RD, &adapter->stats.rxerrc,
5882 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "crc_errs",
5883 CTLFLAG_RD, &adapter->stats.crcerrs,
5885 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "alignment_errs",
5886 CTLFLAG_RD, &adapter->stats.algnerrc,
5887 "Alignment Errors");
5888 /* On 82575 these are collision counts */
5889 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "coll_ext_errs",
5890 CTLFLAG_RD, &adapter->stats.cexterr,
5891 "Collision/Carrier extension errors");
5892 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xon_recvd",
5893 CTLFLAG_RD, &adapter->stats.xonrxc,
5895 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xon_txd",
5896 CTLFLAG_RD, &adapter->stats.xontxc,
5898 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xoff_recvd",
5899 CTLFLAG_RD, &adapter->stats.xoffrxc,
5901 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xoff_txd",
5902 CTLFLAG_RD, &adapter->stats.xofftxc,
5903 "XOFF Transmitted");
5905 /* Packet Reception Stats */
5906 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "total_pkts_recvd",
5907 CTLFLAG_RD, &adapter->stats.tpr,
5908 "Total Packets Received ");
5909 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_pkts_recvd",
5910 CTLFLAG_RD, &adapter->stats.gprc,
5911 "Good Packets Received");
5912 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "bcast_pkts_recvd",
5913 CTLFLAG_RD, &adapter->stats.bprc,
5914 "Broadcast Packets Received");
5915 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "mcast_pkts_recvd",
5916 CTLFLAG_RD, &adapter->stats.mprc,
5917 "Multicast Packets Received");
5918 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_64",
5919 CTLFLAG_RD, &adapter->stats.prc64,
5920 "64 byte frames received ");
5921 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_65_127",
5922 CTLFLAG_RD, &adapter->stats.prc127,
5923 "65-127 byte frames received");
5924 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_128_255",
5925 CTLFLAG_RD, &adapter->stats.prc255,
5926 "128-255 byte frames received");
5927 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_256_511",
5928 CTLFLAG_RD, &adapter->stats.prc511,
5929 "256-511 byte frames received");
5930 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_512_1023",
5931 CTLFLAG_RD, &adapter->stats.prc1023,
5932 "512-1023 byte frames received");
5933 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_1024_1522",
5934 CTLFLAG_RD, &adapter->stats.prc1522,
5935 "1023-1522 byte frames received");
5936 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_octets_recvd",
5937 CTLFLAG_RD, &adapter->stats.gorc,
5938 "Good Octets Received");
5940 /* Packet Transmission Stats */
5941 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_octets_txd",
5942 CTLFLAG_RD, &adapter->stats.gotc,
5943 "Good Octets Transmitted");
5944 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "total_pkts_txd",
5945 CTLFLAG_RD, &adapter->stats.tpt,
5946 "Total Packets Transmitted");
5947 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_pkts_txd",
5948 CTLFLAG_RD, &adapter->stats.gptc,
5949 "Good Packets Transmitted");
5950 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "bcast_pkts_txd",
5951 CTLFLAG_RD, &adapter->stats.bptc,
5952 "Broadcast Packets Transmitted");
5953 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "mcast_pkts_txd",
5954 CTLFLAG_RD, &adapter->stats.mptc,
5955 "Multicast Packets Transmitted");
5956 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_64",
5957 CTLFLAG_RD, &adapter->stats.ptc64,
5958 "64 byte frames transmitted ");
5959 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_65_127",
5960 CTLFLAG_RD, &adapter->stats.ptc127,
5961 "65-127 byte frames transmitted");
5962 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_128_255",
5963 CTLFLAG_RD, &adapter->stats.ptc255,
5964 "128-255 byte frames transmitted");
5965 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_256_511",
5966 CTLFLAG_RD, &adapter->stats.ptc511,
5967 "256-511 byte frames transmitted");
5968 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_512_1023",
5969 CTLFLAG_RD, &adapter->stats.ptc1023,
5970 "512-1023 byte frames transmitted");
5971 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_1024_1522",
5972 CTLFLAG_RD, &adapter->stats.ptc1522,
5973 "1024-1522 byte frames transmitted");
5974 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tso_txd",
5975 CTLFLAG_RD, &adapter->stats.tsctc,
5976 "TSO Contexts Transmitted");
5977 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tso_ctx_fail",
5978 CTLFLAG_RD, &adapter->stats.tsctfc,
5979 "TSO Contexts Failed");
5982 /* Interrupt Stats */
5984 int_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "interrupts",
5985 CTLFLAG_RD, NULL, "Interrupt Statistics");
5986 int_list = SYSCTL_CHILDREN(int_node);
5988 SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "asserts",
5989 CTLFLAG_RD, &adapter->stats.iac,
5990 "Interrupt Assertion Count");
5992 SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "rx_pkt_timer",
5993 CTLFLAG_RD, &adapter->stats.icrxptc,
5994 "Interrupt Cause Rx Pkt Timer Expire Count");
5996 SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "rx_abs_timer",
5997 CTLFLAG_RD, &adapter->stats.icrxatc,
5998 "Interrupt Cause Rx Abs Timer Expire Count");
6000 SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "tx_pkt_timer",
6001 CTLFLAG_RD, &adapter->stats.ictxptc,
6002 "Interrupt Cause Tx Pkt Timer Expire Count");
6004 SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "tx_abs_timer",
6005 CTLFLAG_RD, &adapter->stats.ictxatc,
6006 "Interrupt Cause Tx Abs Timer Expire Count");
6008 SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "tx_queue_empty",
6009 CTLFLAG_RD, &adapter->stats.ictxqec,
6010 "Interrupt Cause Tx Queue Empty Count");
6012 SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "tx_queue_min_thresh",
6013 CTLFLAG_RD, &adapter->stats.ictxqmtc,
6014 "Interrupt Cause Tx Queue Min Thresh Count");
6016 SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "rx_desc_min_thresh",
6017 CTLFLAG_RD, &adapter->stats.icrxdmtc,
6018 "Interrupt Cause Rx Desc Min Thresh Count");
6020 SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "rx_overrun",
6021 CTLFLAG_RD, &adapter->stats.icrxoc,
6022 "Interrupt Cause Receiver Overrun Count");
6025 /**********************************************************************
6027 * This routine provides a way to dump out the adapter eeprom,
6028 * often a useful debug/service tool. This only dumps the first
6029 * 32 words, stuff that matters is in that extent.
6031 **********************************************************************/
6033 em_sysctl_nvm_info(SYSCTL_HANDLER_ARGS)
6035 struct adapter *adapter = (struct adapter *)arg1;
6040 error = sysctl_handle_int(oidp, &result, 0, req);
6042 if (error || !req->newptr)
6046 * This value will cause a hex dump of the
6047 * first 32 16-bit words of the EEPROM to
6051 em_print_nvm_info(adapter);
6057 em_print_nvm_info(struct adapter *adapter)
6062 /* Its a bit crude, but it gets the job done */
6063 printf("\nInterface EEPROM Dump:\n");
6064 printf("Offset\n0x0000 ");
6065 for (i = 0, j = 0; i < 32; i++, j++) {
6066 if (j == 8) { /* Make the offset block */
6068 printf("\n0x00%x0 ",row);
6070 e1000_read_nvm(&adapter->hw, i, 1, &eeprom_data);
6071 printf("%04x ", eeprom_data);
6077 em_sysctl_int_delay(SYSCTL_HANDLER_ARGS)
6079 struct em_int_delay_info *info;
6080 struct adapter *adapter;
6082 int error, usecs, ticks;
6084 info = (struct em_int_delay_info *)arg1;
6085 usecs = info->value;
6086 error = sysctl_handle_int(oidp, &usecs, 0, req);
6087 if (error != 0 || req->newptr == NULL)
6089 if (usecs < 0 || usecs > EM_TICKS_TO_USECS(65535))
6091 info->value = usecs;
6092 ticks = EM_USECS_TO_TICKS(usecs);
6093 if (info->offset == E1000_ITR) /* units are 256ns here */
6096 adapter = info->adapter;
6098 EM_CORE_LOCK(adapter);
6099 regval = E1000_READ_OFFSET(&adapter->hw, info->offset);
6100 regval = (regval & ~0xffff) | (ticks & 0xffff);
6101 /* Handle a few special cases. */
6102 switch (info->offset) {
6107 adapter->txd_cmd &= ~E1000_TXD_CMD_IDE;
6108 /* Don't write 0 into the TIDV register. */
6111 adapter->txd_cmd |= E1000_TXD_CMD_IDE;
6114 E1000_WRITE_OFFSET(&adapter->hw, info->offset, regval);
6115 EM_CORE_UNLOCK(adapter);
6120 em_add_int_delay_sysctl(struct adapter *adapter, const char *name,
6121 const char *description, struct em_int_delay_info *info,
6122 int offset, int value)
6124 info->adapter = adapter;
6125 info->offset = offset;
6126 info->value = value;
6127 SYSCTL_ADD_PROC(device_get_sysctl_ctx(adapter->dev),
6128 SYSCTL_CHILDREN(device_get_sysctl_tree(adapter->dev)),
6129 OID_AUTO, name, CTLTYPE_INT|CTLFLAG_RW,
6130 info, 0, em_sysctl_int_delay, "I", description);
6134 em_set_sysctl_value(struct adapter *adapter, const char *name,
6135 const char *description, int *limit, int value)
6138 SYSCTL_ADD_INT(device_get_sysctl_ctx(adapter->dev),
6139 SYSCTL_CHILDREN(device_get_sysctl_tree(adapter->dev)),
6140 OID_AUTO, name, CTLFLAG_RW, limit, value, description);
6145 ** Set flow control using sysctl:
6146 ** Flow control values:
6153 em_set_flowcntl(SYSCTL_HANDLER_ARGS)
6156 static int input = 3; /* default is full */
6157 struct adapter *adapter = (struct adapter *) arg1;
6159 error = sysctl_handle_int(oidp, &input, 0, req);
6161 if ((error) || (req->newptr == NULL))
6164 if (input == adapter->fc) /* no change? */
6168 case e1000_fc_rx_pause:
6169 case e1000_fc_tx_pause:
6172 adapter->hw.fc.requested_mode = input;
6173 adapter->fc = input;
6180 adapter->hw.fc.current_mode = adapter->hw.fc.requested_mode;
6181 e1000_force_mac_fc(&adapter->hw);
6186 ** Manage Energy Efficient Ethernet:
6188 ** 0/1 - enabled/disabled
6191 em_sysctl_eee(SYSCTL_HANDLER_ARGS)
6193 struct adapter *adapter = (struct adapter *) arg1;
6196 value = adapter->hw.dev_spec.ich8lan.eee_disable;
6197 error = sysctl_handle_int(oidp, &value, 0, req);
6198 if (error || req->newptr == NULL)
6200 EM_CORE_LOCK(adapter);
6201 adapter->hw.dev_spec.ich8lan.eee_disable = (value != 0);
6202 em_init_locked(adapter);
6203 EM_CORE_UNLOCK(adapter);
6208 em_sysctl_debug_info(SYSCTL_HANDLER_ARGS)
6210 struct adapter *adapter;
6215 error = sysctl_handle_int(oidp, &result, 0, req);
6217 if (error || !req->newptr)
6221 adapter = (struct adapter *)arg1;
6222 em_print_debug_info(adapter);
6229 ** This routine is meant to be fluid, add whatever is
6230 ** needed for debugging a problem. -jfv
6233 em_print_debug_info(struct adapter *adapter)
6235 device_t dev = adapter->dev;
6236 struct tx_ring *txr = adapter->tx_rings;
6237 struct rx_ring *rxr = adapter->rx_rings;
6239 if (adapter->ifp->if_drv_flags & IFF_DRV_RUNNING)
6240 printf("Interface is RUNNING ");
6242 printf("Interface is NOT RUNNING\n");
6244 if (adapter->ifp->if_drv_flags & IFF_DRV_OACTIVE)
6245 printf("and INACTIVE\n");
6247 printf("and ACTIVE\n");
6249 for (int i = 0; i < adapter->num_queues; i++, txr++, rxr++) {
6250 device_printf(dev, "TX Queue %d ------\n", i);
6251 device_printf(dev, "hw tdh = %d, hw tdt = %d\n",
6252 E1000_READ_REG(&adapter->hw, E1000_TDH(i)),
6253 E1000_READ_REG(&adapter->hw, E1000_TDT(i)));
6254 device_printf(dev, "Tx Queue Status = %d\n", txr->busy);
6255 device_printf(dev, "TX descriptors avail = %d\n",
6257 device_printf(dev, "Tx Descriptors avail failure = %ld\n",
6258 txr->no_desc_avail);
6259 device_printf(dev, "RX Queue %d ------\n", i);
6260 device_printf(dev, "hw rdh = %d, hw rdt = %d\n",
6261 E1000_READ_REG(&adapter->hw, E1000_RDH(i)),
6262 E1000_READ_REG(&adapter->hw, E1000_RDT(i)));
6263 device_printf(dev, "RX discarded packets = %ld\n",
6265 device_printf(dev, "RX Next to Check = %d\n", rxr->next_to_check);
6266 device_printf(dev, "RX Next to Refresh = %d\n", rxr->next_to_refresh);
6270 #ifdef EM_MULTIQUEUE
6273 * Write a new value to the EEPROM increasing the number of MSIX
6274 * vectors from 3 to 5, for proper multiqueue support.
6277 em_enable_vectors_82574(struct adapter *adapter)
6279 struct e1000_hw *hw = &adapter->hw;
6280 device_t dev = adapter->dev;
6283 e1000_read_nvm(hw, EM_NVM_PCIE_CTRL, 1, &edata);
6284 printf("Current cap: %#06x\n", edata);
6285 if (((edata & EM_NVM_MSIX_N_MASK) >> EM_NVM_MSIX_N_SHIFT) != 4) {
6286 device_printf(dev, "Writing to eeprom: increasing "
6287 "reported MSIX vectors from 3 to 5...\n");
6288 edata &= ~(EM_NVM_MSIX_N_MASK);
6289 edata |= 4 << EM_NVM_MSIX_N_SHIFT;
6290 e1000_write_nvm(hw, EM_NVM_PCIE_CTRL, 1, &edata);
6291 e1000_update_nvm_checksum(hw);
6292 device_printf(dev, "Writing to eeprom: done\n");
6298 DB_COMMAND(em_reset_dev, em_ddb_reset_dev)
6303 dc = devclass_find("em");
6304 max_em = devclass_get_maxunit(dc);
6306 for (int index = 0; index < (max_em - 1); index++) {
6308 dev = devclass_get_device(dc, index);
6309 if (device_get_driver(dev) == &em_driver) {
6310 struct adapter *adapter = device_get_softc(dev);
6311 EM_CORE_LOCK(adapter);
6312 em_init_locked(adapter);
6313 EM_CORE_UNLOCK(adapter);
6317 DB_COMMAND(em_dump_queue, em_ddb_dump_queue)
6322 dc = devclass_find("em");
6323 max_em = devclass_get_maxunit(dc);
6325 for (int index = 0; index < (max_em - 1); index++) {
6327 dev = devclass_get_device(dc, index);
6328 if (device_get_driver(dev) == &em_driver)
6329 em_print_debug_info(device_get_softc(dev));