1 /******************************************************************************
3 Copyright (c) 2001-2015, Intel Corporation
6 Redistribution and use in source and binary forms, with or without
7 modification, are permitted provided that the following conditions are met:
9 1. Redistributions of source code must retain the above copyright notice,
10 this list of conditions and the following disclaimer.
12 2. Redistributions in binary form must reproduce the above copyright
13 notice, this list of conditions and the following disclaimer in the
14 documentation and/or other materials provided with the distribution.
16 3. Neither the name of the Intel Corporation nor the names of its
17 contributors may be used to endorse or promote products derived from
18 this software without specific prior written permission.
20 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
24 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
29 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30 POSSIBILITY OF SUCH DAMAGE.
32 ******************************************************************************/
38 #include "opt_inet6.h"
40 #ifdef HAVE_KERNEL_OPTION_HEADERS
41 #include "opt_device_polling.h"
44 #include <sys/param.h>
45 #include <sys/systm.h>
47 #include <sys/types.h>
50 #if __FreeBSD_version >= 800000
51 #include <sys/buf_ring.h>
54 #include <sys/endian.h>
55 #include <sys/kernel.h>
56 #include <sys/kthread.h>
57 #include <sys/malloc.h>
59 #include <sys/module.h>
62 #include <sys/socket.h>
63 #include <sys/sockio.h>
64 #include <sys/sysctl.h>
65 #include <sys/taskqueue.h>
66 #include <sys/eventhandler.h>
67 #include <machine/bus.h>
68 #include <machine/resource.h>
71 #include <net/ethernet.h>
73 #include <net/if_arp.h>
74 #include <net/if_dl.h>
75 #include <net/if_media.h>
77 #include <net/if_types.h>
78 #include <net/if_vlan_var.h>
80 #include <netinet/in_systm.h>
81 #include <netinet/in.h>
82 #include <netinet/if_ether.h>
83 #include <netinet/ip.h>
84 #include <netinet/ip6.h>
85 #include <netinet/tcp.h>
86 #include <netinet/udp.h>
88 #include <machine/in_cksum.h>
89 #include <dev/led/led.h>
90 #include <dev/pci/pcivar.h>
91 #include <dev/pci/pcireg.h>
93 #include "e1000_api.h"
94 #include "e1000_82571.h"
97 /*********************************************************************
99 *********************************************************************/
100 char em_driver_version[] = "7.6.1-k";
102 /*********************************************************************
103 * PCI Device ID Table
105 * Used by probe to select devices to load on
106 * Last field stores an index into e1000_strings
107 * Last entry must be all 0s
109 * { Vendor ID, Device ID, SubVendor ID, SubDevice ID, String Index }
110 *********************************************************************/
112 static em_vendor_info_t em_vendor_info_array[] =
114 /* Intel(R) PRO/1000 Network Connection */
115 { 0x8086, E1000_DEV_ID_82571EB_COPPER, PCI_ANY_ID, PCI_ANY_ID, 0},
116 { 0x8086, E1000_DEV_ID_82571EB_FIBER, PCI_ANY_ID, PCI_ANY_ID, 0},
117 { 0x8086, E1000_DEV_ID_82571EB_SERDES, PCI_ANY_ID, PCI_ANY_ID, 0},
118 { 0x8086, E1000_DEV_ID_82571EB_SERDES_DUAL,
119 PCI_ANY_ID, PCI_ANY_ID, 0},
120 { 0x8086, E1000_DEV_ID_82571EB_SERDES_QUAD,
121 PCI_ANY_ID, PCI_ANY_ID, 0},
122 { 0x8086, E1000_DEV_ID_82571EB_QUAD_COPPER,
123 PCI_ANY_ID, PCI_ANY_ID, 0},
124 { 0x8086, E1000_DEV_ID_82571EB_QUAD_COPPER_LP,
125 PCI_ANY_ID, PCI_ANY_ID, 0},
126 { 0x8086, E1000_DEV_ID_82571EB_QUAD_FIBER,
127 PCI_ANY_ID, PCI_ANY_ID, 0},
128 { 0x8086, E1000_DEV_ID_82571PT_QUAD_COPPER,
129 PCI_ANY_ID, PCI_ANY_ID, 0},
130 { 0x8086, E1000_DEV_ID_82572EI_COPPER, PCI_ANY_ID, PCI_ANY_ID, 0},
131 { 0x8086, E1000_DEV_ID_82572EI_FIBER, PCI_ANY_ID, PCI_ANY_ID, 0},
132 { 0x8086, E1000_DEV_ID_82572EI_SERDES, PCI_ANY_ID, PCI_ANY_ID, 0},
133 { 0x8086, E1000_DEV_ID_82572EI, PCI_ANY_ID, PCI_ANY_ID, 0},
135 { 0x8086, E1000_DEV_ID_82573E, PCI_ANY_ID, PCI_ANY_ID, 0},
136 { 0x8086, E1000_DEV_ID_82573E_IAMT, PCI_ANY_ID, PCI_ANY_ID, 0},
137 { 0x8086, E1000_DEV_ID_82573L, PCI_ANY_ID, PCI_ANY_ID, 0},
138 { 0x8086, E1000_DEV_ID_82583V, PCI_ANY_ID, PCI_ANY_ID, 0},
139 { 0x8086, E1000_DEV_ID_80003ES2LAN_COPPER_SPT,
140 PCI_ANY_ID, PCI_ANY_ID, 0},
141 { 0x8086, E1000_DEV_ID_80003ES2LAN_SERDES_SPT,
142 PCI_ANY_ID, PCI_ANY_ID, 0},
143 { 0x8086, E1000_DEV_ID_80003ES2LAN_COPPER_DPT,
144 PCI_ANY_ID, PCI_ANY_ID, 0},
145 { 0x8086, E1000_DEV_ID_80003ES2LAN_SERDES_DPT,
146 PCI_ANY_ID, PCI_ANY_ID, 0},
147 { 0x8086, E1000_DEV_ID_ICH8_IGP_M_AMT, PCI_ANY_ID, PCI_ANY_ID, 0},
148 { 0x8086, E1000_DEV_ID_ICH8_IGP_AMT, PCI_ANY_ID, PCI_ANY_ID, 0},
149 { 0x8086, E1000_DEV_ID_ICH8_IGP_C, PCI_ANY_ID, PCI_ANY_ID, 0},
150 { 0x8086, E1000_DEV_ID_ICH8_IFE, PCI_ANY_ID, PCI_ANY_ID, 0},
151 { 0x8086, E1000_DEV_ID_ICH8_IFE_GT, PCI_ANY_ID, PCI_ANY_ID, 0},
152 { 0x8086, E1000_DEV_ID_ICH8_IFE_G, PCI_ANY_ID, PCI_ANY_ID, 0},
153 { 0x8086, E1000_DEV_ID_ICH8_IGP_M, PCI_ANY_ID, PCI_ANY_ID, 0},
154 { 0x8086, E1000_DEV_ID_ICH8_82567V_3, PCI_ANY_ID, PCI_ANY_ID, 0},
155 { 0x8086, E1000_DEV_ID_ICH9_IGP_M_AMT, PCI_ANY_ID, PCI_ANY_ID, 0},
156 { 0x8086, E1000_DEV_ID_ICH9_IGP_AMT, PCI_ANY_ID, PCI_ANY_ID, 0},
157 { 0x8086, E1000_DEV_ID_ICH9_IGP_C, PCI_ANY_ID, PCI_ANY_ID, 0},
158 { 0x8086, E1000_DEV_ID_ICH9_IGP_M, PCI_ANY_ID, PCI_ANY_ID, 0},
159 { 0x8086, E1000_DEV_ID_ICH9_IGP_M_V, PCI_ANY_ID, PCI_ANY_ID, 0},
160 { 0x8086, E1000_DEV_ID_ICH9_IFE, PCI_ANY_ID, PCI_ANY_ID, 0},
161 { 0x8086, E1000_DEV_ID_ICH9_IFE_GT, PCI_ANY_ID, PCI_ANY_ID, 0},
162 { 0x8086, E1000_DEV_ID_ICH9_IFE_G, PCI_ANY_ID, PCI_ANY_ID, 0},
163 { 0x8086, E1000_DEV_ID_ICH9_BM, PCI_ANY_ID, PCI_ANY_ID, 0},
164 { 0x8086, E1000_DEV_ID_82574L, PCI_ANY_ID, PCI_ANY_ID, 0},
165 { 0x8086, E1000_DEV_ID_82574LA, PCI_ANY_ID, PCI_ANY_ID, 0},
166 { 0x8086, E1000_DEV_ID_ICH10_R_BM_LM, PCI_ANY_ID, PCI_ANY_ID, 0},
167 { 0x8086, E1000_DEV_ID_ICH10_R_BM_LF, PCI_ANY_ID, PCI_ANY_ID, 0},
168 { 0x8086, E1000_DEV_ID_ICH10_R_BM_V, PCI_ANY_ID, PCI_ANY_ID, 0},
169 { 0x8086, E1000_DEV_ID_ICH10_D_BM_LM, PCI_ANY_ID, PCI_ANY_ID, 0},
170 { 0x8086, E1000_DEV_ID_ICH10_D_BM_LF, PCI_ANY_ID, PCI_ANY_ID, 0},
171 { 0x8086, E1000_DEV_ID_ICH10_D_BM_V, PCI_ANY_ID, PCI_ANY_ID, 0},
172 { 0x8086, E1000_DEV_ID_PCH_M_HV_LM, PCI_ANY_ID, PCI_ANY_ID, 0},
173 { 0x8086, E1000_DEV_ID_PCH_M_HV_LC, PCI_ANY_ID, PCI_ANY_ID, 0},
174 { 0x8086, E1000_DEV_ID_PCH_D_HV_DM, PCI_ANY_ID, PCI_ANY_ID, 0},
175 { 0x8086, E1000_DEV_ID_PCH_D_HV_DC, PCI_ANY_ID, PCI_ANY_ID, 0},
176 { 0x8086, E1000_DEV_ID_PCH2_LV_LM, PCI_ANY_ID, PCI_ANY_ID, 0},
177 { 0x8086, E1000_DEV_ID_PCH2_LV_V, PCI_ANY_ID, PCI_ANY_ID, 0},
178 { 0x8086, E1000_DEV_ID_PCH_LPT_I217_LM, PCI_ANY_ID, PCI_ANY_ID, 0},
179 { 0x8086, E1000_DEV_ID_PCH_LPT_I217_V, PCI_ANY_ID, PCI_ANY_ID, 0},
180 { 0x8086, E1000_DEV_ID_PCH_LPTLP_I218_LM,
181 PCI_ANY_ID, PCI_ANY_ID, 0},
182 { 0x8086, E1000_DEV_ID_PCH_LPTLP_I218_V,
183 PCI_ANY_ID, PCI_ANY_ID, 0},
184 { 0x8086, E1000_DEV_ID_PCH_I218_LM2, PCI_ANY_ID, PCI_ANY_ID, 0},
185 { 0x8086, E1000_DEV_ID_PCH_I218_V2, PCI_ANY_ID, PCI_ANY_ID, 0},
186 { 0x8086, E1000_DEV_ID_PCH_I218_LM3, PCI_ANY_ID, PCI_ANY_ID, 0},
187 { 0x8086, E1000_DEV_ID_PCH_I218_V3, PCI_ANY_ID, PCI_ANY_ID, 0},
188 { 0x8086, E1000_DEV_ID_PCH_SPT_I219_LM, PCI_ANY_ID, PCI_ANY_ID, 0},
189 { 0x8086, E1000_DEV_ID_PCH_SPT_I219_V, PCI_ANY_ID, PCI_ANY_ID, 0},
190 { 0x8086, E1000_DEV_ID_PCH_SPT_I219_LM2,
191 PCI_ANY_ID, PCI_ANY_ID, 0},
192 { 0x8086, E1000_DEV_ID_PCH_SPT_I219_V2, PCI_ANY_ID, PCI_ANY_ID, 0},
193 { 0x8086, E1000_DEV_ID_PCH_LBG_I219_LM3,
194 PCI_ANY_ID, PCI_ANY_ID, 0},
195 { 0x8086, E1000_DEV_ID_PCH_SPT_I219_LM4,
196 PCI_ANY_ID, PCI_ANY_ID, 0},
197 { 0x8086, E1000_DEV_ID_PCH_SPT_I219_V4, PCI_ANY_ID, PCI_ANY_ID, 0},
198 { 0x8086, E1000_DEV_ID_PCH_SPT_I219_LM5,
199 PCI_ANY_ID, PCI_ANY_ID, 0},
200 { 0x8086, E1000_DEV_ID_PCH_SPT_I219_V5, PCI_ANY_ID, PCI_ANY_ID, 0},
201 { 0x8086, E1000_DEV_ID_PCH_SPT_I219_LM4,
202 PCI_ANY_ID, PCI_ANY_ID, 0},
203 { 0x8086, E1000_DEV_ID_PCH_SPT_I219_V4, PCI_ANY_ID, PCI_ANY_ID, 0},
204 { 0x8086, E1000_DEV_ID_PCH_SPT_I219_LM5,
205 PCI_ANY_ID, PCI_ANY_ID, 0},
206 { 0x8086, E1000_DEV_ID_PCH_SPT_I219_V5, PCI_ANY_ID, PCI_ANY_ID, 0},
207 { 0x8086, E1000_DEV_ID_PCH_CNP_I219_LM6,
208 PCI_ANY_ID, PCI_ANY_ID, 0},
209 { 0x8086, E1000_DEV_ID_PCH_CNP_I219_V6, PCI_ANY_ID, PCI_ANY_ID, 0},
210 { 0x8086, E1000_DEV_ID_PCH_CNP_I219_LM7,
211 PCI_ANY_ID, PCI_ANY_ID, 0},
212 { 0x8086, E1000_DEV_ID_PCH_CNP_I219_V7, PCI_ANY_ID, PCI_ANY_ID, 0},
213 { 0x8086, E1000_DEV_ID_PCH_ICP_I219_LM8,
214 PCI_ANY_ID, PCI_ANY_ID, 0},
215 { 0x8086, E1000_DEV_ID_PCH_ICP_I219_V8, PCI_ANY_ID, PCI_ANY_ID, 0},
216 { 0x8086, E1000_DEV_ID_PCH_ICP_I219_LM9,
217 PCI_ANY_ID, PCI_ANY_ID, 0},
218 { 0x8086, E1000_DEV_ID_PCH_ICP_I219_V9, PCI_ANY_ID, PCI_ANY_ID, 0},
219 /* required last entry */
223 /*********************************************************************
224 * Table of branding strings for all supported NICs.
225 *********************************************************************/
227 static char *em_strings[] = {
228 "Intel(R) PRO/1000 Network Connection"
231 /*********************************************************************
232 * Function prototypes
233 *********************************************************************/
234 static int em_probe(device_t);
235 static int em_attach(device_t);
236 static int em_detach(device_t);
237 static int em_shutdown(device_t);
238 static int em_suspend(device_t);
239 static int em_resume(device_t);
241 static int em_mq_start(struct ifnet *, struct mbuf *);
242 static int em_mq_start_locked(struct ifnet *,
244 static void em_qflush(struct ifnet *);
246 static void em_start(struct ifnet *);
247 static void em_start_locked(struct ifnet *, struct tx_ring *);
249 static int em_ioctl(struct ifnet *, u_long, caddr_t);
250 static void em_init(void *);
251 static void em_init_locked(struct adapter *);
252 static void em_stop(void *);
253 static void em_media_status(struct ifnet *, struct ifmediareq *);
254 static int em_media_change(struct ifnet *);
255 static void em_identify_hardware(struct adapter *);
256 static int em_allocate_pci_resources(struct adapter *);
257 static int em_allocate_legacy(struct adapter *);
258 static int em_allocate_msix(struct adapter *);
259 static int em_allocate_queues(struct adapter *);
260 static int em_setup_msix(struct adapter *);
261 static void em_free_pci_resources(struct adapter *);
262 static void em_local_timer(void *);
263 static void em_reset(struct adapter *);
264 static int em_setup_interface(device_t, struct adapter *);
265 static void em_flush_desc_rings(struct adapter *);
267 static void em_setup_transmit_structures(struct adapter *);
268 static void em_initialize_transmit_unit(struct adapter *);
269 static int em_allocate_transmit_buffers(struct tx_ring *);
270 static void em_free_transmit_structures(struct adapter *);
271 static void em_free_transmit_buffers(struct tx_ring *);
273 static int em_setup_receive_structures(struct adapter *);
274 static int em_allocate_receive_buffers(struct rx_ring *);
275 static void em_initialize_receive_unit(struct adapter *);
276 static void em_free_receive_structures(struct adapter *);
277 static void em_free_receive_buffers(struct rx_ring *);
279 static void em_enable_intr(struct adapter *);
280 static void em_disable_intr(struct adapter *);
281 static void em_update_stats_counters(struct adapter *);
282 static void em_add_hw_stats(struct adapter *adapter);
283 static void em_txeof(struct tx_ring *);
284 static bool em_rxeof(struct rx_ring *, int, int *);
285 #ifndef __NO_STRICT_ALIGNMENT
286 static int em_fixup_rx(struct rx_ring *);
288 static void em_setup_rxdesc(union e1000_rx_desc_extended *,
289 const struct em_rxbuffer *rxbuf);
290 static void em_receive_checksum(uint32_t status, struct mbuf *);
291 static void em_transmit_checksum_setup(struct tx_ring *, struct mbuf *, int,
292 struct ip *, u32 *, u32 *);
293 static void em_tso_setup(struct tx_ring *, struct mbuf *, int, struct ip *,
294 struct tcphdr *, u32 *, u32 *);
295 static void em_set_promisc(struct adapter *);
296 static void em_disable_promisc(struct adapter *);
297 static void em_set_multi(struct adapter *);
298 static void em_update_link_status(struct adapter *);
299 static void em_refresh_mbufs(struct rx_ring *, int);
300 static void em_register_vlan(void *, struct ifnet *, u16);
301 static void em_unregister_vlan(void *, struct ifnet *, u16);
302 static void em_setup_vlan_hw_support(struct adapter *);
303 static int em_xmit(struct tx_ring *, struct mbuf **);
304 static int em_dma_malloc(struct adapter *, bus_size_t,
305 struct em_dma_alloc *, int);
306 static void em_dma_free(struct adapter *, struct em_dma_alloc *);
307 static int em_sysctl_nvm_info(SYSCTL_HANDLER_ARGS);
308 static void em_print_nvm_info(struct adapter *);
309 static int em_sysctl_debug_info(SYSCTL_HANDLER_ARGS);
310 static void em_print_debug_info(struct adapter *);
311 static int em_is_valid_ether_addr(u8 *);
312 static int em_sysctl_int_delay(SYSCTL_HANDLER_ARGS);
313 static void em_add_int_delay_sysctl(struct adapter *, const char *,
314 const char *, struct em_int_delay_info *, int, int);
315 /* Management and WOL Support */
316 static void em_init_manageability(struct adapter *);
317 static void em_release_manageability(struct adapter *);
318 static void em_get_hw_control(struct adapter *);
319 static void em_release_hw_control(struct adapter *);
320 static void em_get_wakeup(device_t);
321 static void em_enable_wakeup(device_t);
322 static int em_enable_phy_wakeup(struct adapter *);
323 static void em_led_func(void *, int);
324 static void em_disable_aspm(struct adapter *);
326 static int em_irq_fast(void *);
329 static void em_msix_tx(void *);
330 static void em_msix_rx(void *);
331 static void em_msix_link(void *);
332 static void em_handle_tx(void *context, int pending);
333 static void em_handle_rx(void *context, int pending);
334 static void em_handle_link(void *context, int pending);
337 static void em_enable_vectors_82574(struct adapter *);
340 static void em_set_sysctl_value(struct adapter *, const char *,
341 const char *, int *, int);
342 static int em_set_flowcntl(SYSCTL_HANDLER_ARGS);
343 static int em_sysctl_eee(SYSCTL_HANDLER_ARGS);
345 static __inline void em_rx_discard(struct rx_ring *, int);
347 #ifdef DEVICE_POLLING
348 static poll_handler_t em_poll;
351 /*********************************************************************
352 * FreeBSD Device Interface Entry Points
353 *********************************************************************/
355 static device_method_t em_methods[] = {
356 /* Device interface */
357 DEVMETHOD(device_probe, em_probe),
358 DEVMETHOD(device_attach, em_attach),
359 DEVMETHOD(device_detach, em_detach),
360 DEVMETHOD(device_shutdown, em_shutdown),
361 DEVMETHOD(device_suspend, em_suspend),
362 DEVMETHOD(device_resume, em_resume),
366 static driver_t em_driver = {
367 "em", em_methods, sizeof(struct adapter),
370 devclass_t em_devclass;
371 DRIVER_MODULE(em, pci, em_driver, em_devclass, 0, 0);
372 MODULE_DEPEND(em, pci, 1, 1, 1);
373 MODULE_DEPEND(em, ether, 1, 1, 1);
375 /*********************************************************************
376 * Tunable default values.
377 *********************************************************************/
379 #define EM_TICKS_TO_USECS(ticks) ((1024 * (ticks) + 500) / 1000)
380 #define EM_USECS_TO_TICKS(usecs) ((1000 * (usecs) + 512) / 1024)
383 #define MAX_INTS_PER_SEC 8000
384 #define DEFAULT_ITR (1000000000/(MAX_INTS_PER_SEC * 256))
386 #define TSO_WORKAROUND 4
388 static SYSCTL_NODE(_hw, OID_AUTO, em, CTLFLAG_RD, 0, "EM driver parameters");
390 static int em_disable_crc_stripping = 0;
391 SYSCTL_INT(_hw_em, OID_AUTO, disable_crc_stripping, CTLFLAG_RDTUN,
392 &em_disable_crc_stripping, 0, "Disable CRC Stripping");
394 static int em_tx_int_delay_dflt = EM_TICKS_TO_USECS(EM_TIDV);
395 static int em_rx_int_delay_dflt = EM_TICKS_TO_USECS(EM_RDTR);
396 TUNABLE_INT("hw.em.tx_int_delay", &em_tx_int_delay_dflt);
397 TUNABLE_INT("hw.em.rx_int_delay", &em_rx_int_delay_dflt);
398 SYSCTL_INT(_hw_em, OID_AUTO, tx_int_delay, CTLFLAG_RDTUN, &em_tx_int_delay_dflt,
399 0, "Default transmit interrupt delay in usecs");
400 SYSCTL_INT(_hw_em, OID_AUTO, rx_int_delay, CTLFLAG_RDTUN, &em_rx_int_delay_dflt,
401 0, "Default receive interrupt delay in usecs");
403 static int em_tx_abs_int_delay_dflt = EM_TICKS_TO_USECS(EM_TADV);
404 static int em_rx_abs_int_delay_dflt = EM_TICKS_TO_USECS(EM_RADV);
405 TUNABLE_INT("hw.em.tx_abs_int_delay", &em_tx_abs_int_delay_dflt);
406 TUNABLE_INT("hw.em.rx_abs_int_delay", &em_rx_abs_int_delay_dflt);
407 SYSCTL_INT(_hw_em, OID_AUTO, tx_abs_int_delay, CTLFLAG_RDTUN,
408 &em_tx_abs_int_delay_dflt, 0,
409 "Default transmit interrupt delay limit in usecs");
410 SYSCTL_INT(_hw_em, OID_AUTO, rx_abs_int_delay, CTLFLAG_RDTUN,
411 &em_rx_abs_int_delay_dflt, 0,
412 "Default receive interrupt delay limit in usecs");
414 static int em_rxd = EM_DEFAULT_RXD;
415 static int em_txd = EM_DEFAULT_TXD;
416 TUNABLE_INT("hw.em.rxd", &em_rxd);
417 TUNABLE_INT("hw.em.txd", &em_txd);
418 SYSCTL_INT(_hw_em, OID_AUTO, rxd, CTLFLAG_RDTUN, &em_rxd, 0,
419 "Number of receive descriptors per queue");
420 SYSCTL_INT(_hw_em, OID_AUTO, txd, CTLFLAG_RDTUN, &em_txd, 0,
421 "Number of transmit descriptors per queue");
423 static int em_smart_pwr_down = FALSE;
424 TUNABLE_INT("hw.em.smart_pwr_down", &em_smart_pwr_down);
425 SYSCTL_INT(_hw_em, OID_AUTO, smart_pwr_down, CTLFLAG_RDTUN, &em_smart_pwr_down,
426 0, "Set to true to leave smart power down enabled on newer adapters");
428 /* Controls whether promiscuous also shows bad packets */
429 static int em_debug_sbp = FALSE;
430 TUNABLE_INT("hw.em.sbp", &em_debug_sbp);
431 SYSCTL_INT(_hw_em, OID_AUTO, sbp, CTLFLAG_RDTUN, &em_debug_sbp, 0,
432 "Show bad packets in promiscuous mode");
434 static int em_enable_msix = TRUE;
435 TUNABLE_INT("hw.em.enable_msix", &em_enable_msix);
436 SYSCTL_INT(_hw_em, OID_AUTO, enable_msix, CTLFLAG_RDTUN, &em_enable_msix, 0,
437 "Enable MSI-X interrupts");
440 static int em_num_queues = 1;
441 TUNABLE_INT("hw.em.num_queues", &em_num_queues);
442 SYSCTL_INT(_hw_em, OID_AUTO, num_queues, CTLFLAG_RDTUN, &em_num_queues, 0,
443 "82574 only: Number of queues to configure, 0 indicates autoconfigure");
447 ** Global variable to store last used CPU when binding queues
448 ** to CPUs in igb_allocate_msix. Starts at CPU_FIRST and increments when a
449 ** queue is bound to a cpu.
451 static int em_last_bind_cpu = -1;
453 /* How many packets rxeof tries to clean at a time */
454 static int em_rx_process_limit = 100;
455 TUNABLE_INT("hw.em.rx_process_limit", &em_rx_process_limit);
456 SYSCTL_INT(_hw_em, OID_AUTO, rx_process_limit, CTLFLAG_RDTUN,
457 &em_rx_process_limit, 0,
458 "Maximum number of received packets to process "
459 "at a time, -1 means unlimited");
461 /* Energy efficient ethernet - default to OFF */
462 static int eee_setting = 1;
463 TUNABLE_INT("hw.em.eee_setting", &eee_setting);
464 SYSCTL_INT(_hw_em, OID_AUTO, eee_setting, CTLFLAG_RDTUN, &eee_setting, 0,
465 "Enable Energy Efficient Ethernet");
467 /* Global used in WOL setup with multiport cards */
468 static int global_quad_port_a = 0;
470 #ifdef DEV_NETMAP /* see ixgbe.c for details */
471 #include <dev/netmap/if_em_netmap.h>
472 #endif /* DEV_NETMAP */
474 /*********************************************************************
475 * Device identification routine
477 * em_probe determines if the driver should be loaded on
478 * adapter based on PCI vendor/device id of the adapter.
480 * return BUS_PROBE_DEFAULT on success, positive on failure
481 *********************************************************************/
484 em_probe(device_t dev)
486 char adapter_name[60];
487 uint16_t pci_vendor_id = 0;
488 uint16_t pci_device_id = 0;
489 uint16_t pci_subvendor_id = 0;
490 uint16_t pci_subdevice_id = 0;
491 em_vendor_info_t *ent;
493 INIT_DEBUGOUT("em_probe: begin");
495 pci_vendor_id = pci_get_vendor(dev);
496 if (pci_vendor_id != EM_VENDOR_ID)
499 pci_device_id = pci_get_device(dev);
500 pci_subvendor_id = pci_get_subvendor(dev);
501 pci_subdevice_id = pci_get_subdevice(dev);
503 ent = em_vendor_info_array;
504 while (ent->vendor_id != 0) {
505 if ((pci_vendor_id == ent->vendor_id) &&
506 (pci_device_id == ent->device_id) &&
508 ((pci_subvendor_id == ent->subvendor_id) ||
509 (ent->subvendor_id == PCI_ANY_ID)) &&
511 ((pci_subdevice_id == ent->subdevice_id) ||
512 (ent->subdevice_id == PCI_ANY_ID))) {
513 sprintf(adapter_name, "%s %s",
514 em_strings[ent->index],
516 device_set_desc_copy(dev, adapter_name);
517 return (BUS_PROBE_DEFAULT);
525 /*********************************************************************
526 * Device initialization routine
528 * The attach entry point is called when the driver is being loaded.
529 * This routine identifies the type of hardware, allocates all resources
530 * and initializes the hardware.
532 * return 0 on success, positive on failure
533 *********************************************************************/
536 em_attach(device_t dev)
538 struct adapter *adapter;
542 INIT_DEBUGOUT("em_attach: begin");
544 if (resource_disabled("em", device_get_unit(dev))) {
545 device_printf(dev, "Disabled by device hint\n");
549 adapter = device_get_softc(dev);
550 adapter->dev = adapter->osdep.dev = dev;
552 EM_CORE_LOCK_INIT(adapter, device_get_nameunit(dev));
555 SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
556 SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
557 OID_AUTO, "nvm", CTLTYPE_INT|CTLFLAG_RW, adapter, 0,
558 em_sysctl_nvm_info, "I", "NVM Information");
560 SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
561 SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
562 OID_AUTO, "debug", CTLTYPE_INT|CTLFLAG_RW, adapter, 0,
563 em_sysctl_debug_info, "I", "Debug Information");
565 SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
566 SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
567 OID_AUTO, "fc", CTLTYPE_INT|CTLFLAG_RW, adapter, 0,
568 em_set_flowcntl, "I", "Flow Control");
570 callout_init_mtx(&adapter->timer, &adapter->core_mtx, 0);
572 /* Determine hardware and mac info */
573 em_identify_hardware(adapter);
575 /* Setup PCI resources */
576 if (em_allocate_pci_resources(adapter)) {
577 device_printf(dev, "Allocation of PCI resources failed\n");
583 ** For ICH8 and family we need to
584 ** map the flash memory, and this
585 ** must happen after the MAC is
588 if ((hw->mac.type == e1000_ich8lan) ||
589 (hw->mac.type == e1000_ich9lan) ||
590 (hw->mac.type == e1000_ich10lan) ||
591 (hw->mac.type == e1000_pchlan) ||
592 (hw->mac.type == e1000_pch2lan) ||
593 (hw->mac.type == e1000_pch_lpt)) {
594 int rid = EM_BAR_TYPE_FLASH;
595 adapter->flash = bus_alloc_resource_any(dev,
596 SYS_RES_MEMORY, &rid, RF_ACTIVE);
597 if (adapter->flash == NULL) {
598 device_printf(dev, "Mapping of Flash failed\n");
602 /* This is used in the shared code */
603 hw->flash_address = (u8 *)adapter->flash;
604 adapter->osdep.flash_bus_space_tag =
605 rman_get_bustag(adapter->flash);
606 adapter->osdep.flash_bus_space_handle =
607 rman_get_bushandle(adapter->flash);
610 ** In the new SPT device flash is not a
611 ** seperate BAR, rather it is also in BAR0,
612 ** so use the same tag and an offset handle for the
613 ** FLASH read/write macros in the shared code.
615 else if (hw->mac.type >= e1000_pch_spt) {
616 adapter->osdep.flash_bus_space_tag =
617 adapter->osdep.mem_bus_space_tag;
618 adapter->osdep.flash_bus_space_handle =
619 adapter->osdep.mem_bus_space_handle
620 + E1000_FLASH_BASE_ADDR;
623 /* Do Shared Code initialization */
624 error = e1000_setup_init_funcs(hw, TRUE);
626 device_printf(dev, "Setup of Shared code failed, error %d\n",
633 * Setup MSI/X or MSI if PCI Express
635 adapter->msix = em_setup_msix(adapter);
637 e1000_get_bus_info(hw);
639 /* Set up some sysctls for the tunable interrupt delays */
640 em_add_int_delay_sysctl(adapter, "rx_int_delay",
641 "receive interrupt delay in usecs", &adapter->rx_int_delay,
642 E1000_REGISTER(hw, E1000_RDTR), em_rx_int_delay_dflt);
643 em_add_int_delay_sysctl(adapter, "tx_int_delay",
644 "transmit interrupt delay in usecs", &adapter->tx_int_delay,
645 E1000_REGISTER(hw, E1000_TIDV), em_tx_int_delay_dflt);
646 em_add_int_delay_sysctl(adapter, "rx_abs_int_delay",
647 "receive interrupt delay limit in usecs",
648 &adapter->rx_abs_int_delay,
649 E1000_REGISTER(hw, E1000_RADV),
650 em_rx_abs_int_delay_dflt);
651 em_add_int_delay_sysctl(adapter, "tx_abs_int_delay",
652 "transmit interrupt delay limit in usecs",
653 &adapter->tx_abs_int_delay,
654 E1000_REGISTER(hw, E1000_TADV),
655 em_tx_abs_int_delay_dflt);
656 em_add_int_delay_sysctl(adapter, "itr",
657 "interrupt delay limit in usecs/4",
659 E1000_REGISTER(hw, E1000_ITR),
662 /* Sysctl for limiting the amount of work done in the taskqueue */
663 em_set_sysctl_value(adapter, "rx_processing_limit",
664 "max number of rx packets to process", &adapter->rx_process_limit,
665 em_rx_process_limit);
668 * Validate number of transmit and receive descriptors. It
669 * must not exceed hardware maximum, and must be multiple
670 * of E1000_DBA_ALIGN.
672 if (((em_txd * sizeof(struct e1000_tx_desc)) % EM_DBA_ALIGN) != 0 ||
673 (em_txd > EM_MAX_TXD) || (em_txd < EM_MIN_TXD)) {
674 device_printf(dev, "Using %d TX descriptors instead of %d!\n",
675 EM_DEFAULT_TXD, em_txd);
676 adapter->num_tx_desc = EM_DEFAULT_TXD;
678 adapter->num_tx_desc = em_txd;
680 if (((em_rxd * sizeof(union e1000_rx_desc_extended)) % EM_DBA_ALIGN) != 0 ||
681 (em_rxd > EM_MAX_RXD) || (em_rxd < EM_MIN_RXD)) {
682 device_printf(dev, "Using %d RX descriptors instead of %d!\n",
683 EM_DEFAULT_RXD, em_rxd);
684 adapter->num_rx_desc = EM_DEFAULT_RXD;
686 adapter->num_rx_desc = em_rxd;
688 hw->mac.autoneg = DO_AUTO_NEG;
689 hw->phy.autoneg_wait_to_complete = FALSE;
690 hw->phy.autoneg_advertised = AUTONEG_ADV_DEFAULT;
693 if (hw->phy.media_type == e1000_media_type_copper) {
694 hw->phy.mdix = AUTO_ALL_MODES;
695 hw->phy.disable_polarity_correction = FALSE;
696 hw->phy.ms_type = EM_MASTER_SLAVE;
700 * Set the frame limits assuming
701 * standard ethernet sized frames.
703 adapter->hw.mac.max_frame_size =
704 ETHERMTU + ETHER_HDR_LEN + ETHERNET_FCS_SIZE;
707 * This controls when hardware reports transmit completion
710 hw->mac.report_tx_early = 1;
713 ** Get queue/ring memory
715 if (em_allocate_queues(adapter)) {
720 /* Allocate multicast array memory. */
721 adapter->mta = malloc(sizeof(u8) * ETH_ADDR_LEN *
722 MAX_NUM_MULTICAST_ADDRESSES, M_DEVBUF, M_NOWAIT);
723 if (adapter->mta == NULL) {
724 device_printf(dev, "Can not allocate multicast setup array\n");
729 /* Check SOL/IDER usage */
730 if (e1000_check_reset_block(hw))
731 device_printf(dev, "PHY reset is blocked"
732 " due to SOL/IDER session.\n");
734 /* Sysctl for setting Energy Efficient Ethernet */
735 hw->dev_spec.ich8lan.eee_disable = eee_setting;
736 SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
737 SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
738 OID_AUTO, "eee_control", CTLTYPE_INT|CTLFLAG_RW,
739 adapter, 0, em_sysctl_eee, "I",
740 "Disable Energy Efficient Ethernet");
743 ** Start from a known state, this is
744 ** important in reading the nvm and
750 /* Make sure we have a good EEPROM before we read from it */
751 if (e1000_validate_nvm_checksum(hw) < 0) {
753 ** Some PCI-E parts fail the first check due to
754 ** the link being in sleep state, call it again,
755 ** if it fails a second time its a real issue.
757 if (e1000_validate_nvm_checksum(hw) < 0) {
759 "The EEPROM Checksum Is Not Valid\n");
765 /* Copy the permanent MAC address out of the EEPROM */
766 if (e1000_read_mac_addr(hw) < 0) {
767 device_printf(dev, "EEPROM read error while reading MAC"
773 if (!em_is_valid_ether_addr(hw->mac.addr)) {
774 device_printf(dev, "Invalid MAC address\n");
779 /* Disable ULP support */
780 e1000_disable_ulp_lpt_lp(hw, TRUE);
783 ** Do interrupt configuration
785 if (adapter->msix > 1) /* Do MSIX */
786 error = em_allocate_msix(adapter);
787 else /* MSI or Legacy */
788 error = em_allocate_legacy(adapter);
793 * Get Wake-on-Lan and Management info for later use
797 /* Setup OS specific network interface */
798 if (em_setup_interface(dev, adapter) != 0)
803 /* Initialize statistics */
804 em_update_stats_counters(adapter);
806 hw->mac.get_link_status = 1;
807 em_update_link_status(adapter);
809 /* Register for VLAN events */
810 adapter->vlan_attach = EVENTHANDLER_REGISTER(vlan_config,
811 em_register_vlan, adapter, EVENTHANDLER_PRI_FIRST);
812 adapter->vlan_detach = EVENTHANDLER_REGISTER(vlan_unconfig,
813 em_unregister_vlan, adapter, EVENTHANDLER_PRI_FIRST);
815 em_add_hw_stats(adapter);
817 /* Non-AMT based hardware can now take control from firmware */
818 if (adapter->has_manage && !adapter->has_amt)
819 em_get_hw_control(adapter);
821 /* Tell the stack that the interface is not active */
822 adapter->ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
823 adapter->ifp->if_drv_flags |= IFF_DRV_OACTIVE;
825 adapter->led_dev = led_create(em_led_func, adapter,
826 device_get_nameunit(dev));
828 em_netmap_attach(adapter);
829 #endif /* DEV_NETMAP */
831 INIT_DEBUGOUT("em_attach: end");
836 em_free_transmit_structures(adapter);
837 em_free_receive_structures(adapter);
838 em_release_hw_control(adapter);
839 if (adapter->ifp != NULL)
840 if_free(adapter->ifp);
842 em_free_pci_resources(adapter);
843 free(adapter->mta, M_DEVBUF);
844 EM_CORE_LOCK_DESTROY(adapter);
849 /*********************************************************************
850 * Device removal routine
852 * The detach entry point is called when the driver is being removed.
853 * This routine stops the adapter and deallocates all the resources
854 * that were allocated for driver operation.
856 * return 0 on success, positive on failure
857 *********************************************************************/
860 em_detach(device_t dev)
862 struct adapter *adapter = device_get_softc(dev);
863 struct ifnet *ifp = adapter->ifp;
865 INIT_DEBUGOUT("em_detach: begin");
867 /* Make sure VLANS are not using driver */
868 if (adapter->ifp->if_vlantrunk != NULL) {
869 device_printf(dev,"Vlan in use, detach first\n");
873 #ifdef DEVICE_POLLING
874 if (ifp->if_capenable & IFCAP_POLLING)
875 ether_poll_deregister(ifp);
878 if (adapter->led_dev != NULL)
879 led_destroy(adapter->led_dev);
881 EM_CORE_LOCK(adapter);
882 adapter->in_detach = 1;
884 EM_CORE_UNLOCK(adapter);
885 EM_CORE_LOCK_DESTROY(adapter);
887 e1000_phy_hw_reset(&adapter->hw);
889 em_release_manageability(adapter);
890 em_release_hw_control(adapter);
892 /* Unregister VLAN events */
893 if (adapter->vlan_attach != NULL)
894 EVENTHANDLER_DEREGISTER(vlan_config, adapter->vlan_attach);
895 if (adapter->vlan_detach != NULL)
896 EVENTHANDLER_DEREGISTER(vlan_unconfig, adapter->vlan_detach);
898 ether_ifdetach(adapter->ifp);
899 callout_drain(&adapter->timer);
903 #endif /* DEV_NETMAP */
905 em_free_pci_resources(adapter);
906 bus_generic_detach(dev);
909 em_free_transmit_structures(adapter);
910 em_free_receive_structures(adapter);
912 em_release_hw_control(adapter);
913 free(adapter->mta, M_DEVBUF);
918 /*********************************************************************
920 * Shutdown entry point
922 **********************************************************************/
925 em_shutdown(device_t dev)
927 return em_suspend(dev);
931 * Suspend/resume device methods.
934 em_suspend(device_t dev)
936 struct adapter *adapter = device_get_softc(dev);
938 EM_CORE_LOCK(adapter);
940 em_release_manageability(adapter);
941 em_release_hw_control(adapter);
942 em_enable_wakeup(dev);
944 EM_CORE_UNLOCK(adapter);
946 return bus_generic_suspend(dev);
950 em_resume(device_t dev)
952 struct adapter *adapter = device_get_softc(dev);
953 struct tx_ring *txr = adapter->tx_rings;
954 struct ifnet *ifp = adapter->ifp;
956 EM_CORE_LOCK(adapter);
957 if (adapter->hw.mac.type == e1000_pch2lan)
958 e1000_resume_workarounds_pchlan(&adapter->hw);
959 em_init_locked(adapter);
960 em_init_manageability(adapter);
962 if ((ifp->if_flags & IFF_UP) &&
963 (ifp->if_drv_flags & IFF_DRV_RUNNING) && adapter->link_active) {
964 for (int i = 0; i < adapter->num_queues; i++, txr++) {
967 if (!drbr_empty(ifp, txr->br))
968 em_mq_start_locked(ifp, txr);
970 if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
971 em_start_locked(ifp, txr);
976 EM_CORE_UNLOCK(adapter);
978 return bus_generic_resume(dev);
982 #ifndef EM_MULTIQUEUE
984 em_start_locked(struct ifnet *ifp, struct tx_ring *txr)
986 struct adapter *adapter = ifp->if_softc;
989 EM_TX_LOCK_ASSERT(txr);
991 if ((ifp->if_drv_flags & (IFF_DRV_RUNNING|IFF_DRV_OACTIVE)) !=
995 if (!adapter->link_active)
998 while (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) {
999 /* Call cleanup if number of TX descriptors low */
1000 if (txr->tx_avail <= EM_TX_CLEANUP_THRESHOLD)
1002 if (txr->tx_avail < EM_MAX_SCATTER) {
1003 ifp->if_drv_flags |= IFF_DRV_OACTIVE;
1006 IFQ_DRV_DEQUEUE(&ifp->if_snd, m_head);
1010 * Encapsulation can modify our pointer, and or make it
1011 * NULL on failure. In that event, we can't requeue.
1013 if (em_xmit(txr, &m_head)) {
1016 IFQ_DRV_PREPEND(&ifp->if_snd, m_head);
1020 /* Mark the queue as having work */
1021 if (txr->busy == EM_TX_IDLE)
1022 txr->busy = EM_TX_BUSY;
1024 /* Send a copy of the frame to the BPF listener */
1025 ETHER_BPF_MTAP(ifp, m_head);
1033 em_start(struct ifnet *ifp)
1035 struct adapter *adapter = ifp->if_softc;
1036 struct tx_ring *txr = adapter->tx_rings;
1038 if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1040 em_start_locked(ifp, txr);
1045 #else /* EM_MULTIQUEUE */
1046 /*********************************************************************
1047 * Multiqueue Transmit routines
1049 * em_mq_start is called by the stack to initiate a transmit.
1050 * however, if busy the driver can queue the request rather
1051 * than do an immediate send. It is this that is an advantage
1052 * in this driver, rather than also having multiple tx queues.
1053 **********************************************************************/
1055 ** Multiqueue capable stack interface
1058 em_mq_start(struct ifnet *ifp, struct mbuf *m)
1060 struct adapter *adapter = ifp->if_softc;
1061 struct tx_ring *txr = adapter->tx_rings;
1062 unsigned int i, error;
1064 if (M_HASHTYPE_GET(m) != M_HASHTYPE_NONE)
1065 i = m->m_pkthdr.flowid % adapter->num_queues;
1067 i = curcpu % adapter->num_queues;
1069 txr = &adapter->tx_rings[i];
1071 error = drbr_enqueue(ifp, txr->br, m);
1075 if (EM_TX_TRYLOCK(txr)) {
1076 em_mq_start_locked(ifp, txr);
1079 taskqueue_enqueue(txr->tq, &txr->tx_task);
1085 em_mq_start_locked(struct ifnet *ifp, struct tx_ring *txr)
1087 struct adapter *adapter = txr->adapter;
1089 int err = 0, enq = 0;
1091 if ((ifp->if_drv_flags & (IFF_DRV_RUNNING | IFF_DRV_OACTIVE)) !=
1092 IFF_DRV_RUNNING || adapter->link_active == 0) {
1096 /* Process the queue */
1097 while ((next = drbr_peek(ifp, txr->br)) != NULL) {
1098 if ((err = em_xmit(txr, &next)) != 0) {
1100 /* It was freed, move forward */
1101 drbr_advance(ifp, txr->br);
1104 * Still have one left, it may not be
1105 * the same since the transmit function
1106 * may have changed it.
1108 drbr_putback(ifp, txr->br, next);
1112 drbr_advance(ifp, txr->br);
1114 ifp->if_obytes += next->m_pkthdr.len;
1115 if (next->m_flags & M_MCAST)
1117 ETHER_BPF_MTAP(ifp, next);
1118 if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
1122 /* Mark the queue as having work */
1123 if ((enq > 0) && (txr->busy == EM_TX_IDLE))
1124 txr->busy = EM_TX_BUSY;
1126 if (txr->tx_avail < EM_MAX_SCATTER)
1128 if (txr->tx_avail < EM_MAX_SCATTER) {
1129 ifp->if_drv_flags |= IFF_DRV_OACTIVE;
1135 ** Flush all ring buffers
1138 em_qflush(struct ifnet *ifp)
1140 struct adapter *adapter = ifp->if_softc;
1141 struct tx_ring *txr = adapter->tx_rings;
1144 for (int i = 0; i < adapter->num_queues; i++, txr++) {
1146 while ((m = buf_ring_dequeue_sc(txr->br)) != NULL)
1152 #endif /* EM_MULTIQUEUE */
1154 /*********************************************************************
1157 * em_ioctl is called when the user wants to configure the
1160 * return 0 on success, positive on failure
1161 **********************************************************************/
1164 em_ioctl(struct ifnet *ifp, u_long command, caddr_t data)
1166 struct adapter *adapter = ifp->if_softc;
1167 struct ifreq *ifr = (struct ifreq *)data;
1168 #if defined(INET) || defined(INET6)
1169 struct ifaddr *ifa = (struct ifaddr *)data;
1171 bool avoid_reset = FALSE;
1174 if (adapter->in_detach)
1180 if (ifa->ifa_addr->sa_family == AF_INET)
1184 if (ifa->ifa_addr->sa_family == AF_INET6)
1188 ** Calling init results in link renegotiation,
1189 ** so we avoid doing it when possible.
1192 ifp->if_flags |= IFF_UP;
1193 if (!(ifp->if_drv_flags & IFF_DRV_RUNNING))
1196 if (!(ifp->if_flags & IFF_NOARP))
1197 arp_ifinit(ifp, ifa);
1200 error = ether_ioctl(ifp, command, data);
1206 IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFMTU (Set Interface MTU)");
1208 EM_CORE_LOCK(adapter);
1209 switch (adapter->hw.mac.type) {
1213 case e1000_ich10lan:
1220 case e1000_80003es2lan: /* 9K Jumbo Frame size */
1221 max_frame_size = 9234;
1224 max_frame_size = 4096;
1226 /* Adapters that do not support jumbo frames */
1228 max_frame_size = ETHER_MAX_LEN;
1231 max_frame_size = MAX_JUMBO_FRAME_SIZE;
1233 if (ifr->ifr_mtu > max_frame_size - ETHER_HDR_LEN -
1235 EM_CORE_UNLOCK(adapter);
1240 ifp->if_mtu = ifr->ifr_mtu;
1241 adapter->hw.mac.max_frame_size =
1242 ifp->if_mtu + ETHER_HDR_LEN + ETHER_CRC_LEN;
1243 if (ifp->if_drv_flags & IFF_DRV_RUNNING)
1244 em_init_locked(adapter);
1245 EM_CORE_UNLOCK(adapter);
1249 IOCTL_DEBUGOUT("ioctl rcv'd:\
1250 SIOCSIFFLAGS (Set Interface Flags)");
1251 EM_CORE_LOCK(adapter);
1252 if (ifp->if_flags & IFF_UP) {
1253 if ((ifp->if_drv_flags & IFF_DRV_RUNNING)) {
1254 if ((ifp->if_flags ^ adapter->if_flags) &
1255 (IFF_PROMISC | IFF_ALLMULTI)) {
1256 em_disable_promisc(adapter);
1257 em_set_promisc(adapter);
1260 em_init_locked(adapter);
1262 if (ifp->if_drv_flags & IFF_DRV_RUNNING)
1264 adapter->if_flags = ifp->if_flags;
1265 EM_CORE_UNLOCK(adapter);
1269 IOCTL_DEBUGOUT("ioctl rcv'd: SIOC(ADD|DEL)MULTI");
1270 if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1271 EM_CORE_LOCK(adapter);
1272 em_disable_intr(adapter);
1273 em_set_multi(adapter);
1274 #ifdef DEVICE_POLLING
1275 if (!(ifp->if_capenable & IFCAP_POLLING))
1277 em_enable_intr(adapter);
1278 EM_CORE_UNLOCK(adapter);
1282 /* Check SOL/IDER usage */
1283 EM_CORE_LOCK(adapter);
1284 if (e1000_check_reset_block(&adapter->hw)) {
1285 EM_CORE_UNLOCK(adapter);
1286 device_printf(adapter->dev, "Media change is"
1287 " blocked due to SOL/IDER session.\n");
1290 EM_CORE_UNLOCK(adapter);
1293 IOCTL_DEBUGOUT("ioctl rcv'd: \
1294 SIOCxIFMEDIA (Get/Set Interface Media)");
1295 error = ifmedia_ioctl(ifp, ifr, &adapter->media, command);
1301 IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFCAP (Set Capabilities)");
1303 mask = ifr->ifr_reqcap ^ ifp->if_capenable;
1304 #ifdef DEVICE_POLLING
1305 if (mask & IFCAP_POLLING) {
1306 if (ifr->ifr_reqcap & IFCAP_POLLING) {
1307 error = ether_poll_register(em_poll, ifp);
1310 EM_CORE_LOCK(adapter);
1311 em_disable_intr(adapter);
1312 ifp->if_capenable |= IFCAP_POLLING;
1313 EM_CORE_UNLOCK(adapter);
1315 error = ether_poll_deregister(ifp);
1316 /* Enable interrupt even in error case */
1317 EM_CORE_LOCK(adapter);
1318 em_enable_intr(adapter);
1319 ifp->if_capenable &= ~IFCAP_POLLING;
1320 EM_CORE_UNLOCK(adapter);
1324 if (mask & IFCAP_HWCSUM) {
1325 ifp->if_capenable ^= IFCAP_HWCSUM;
1328 if (mask & IFCAP_TSO4) {
1329 ifp->if_capenable ^= IFCAP_TSO4;
1332 if (mask & IFCAP_VLAN_HWTAGGING) {
1333 ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING;
1336 if (mask & IFCAP_VLAN_HWFILTER) {
1337 ifp->if_capenable ^= IFCAP_VLAN_HWFILTER;
1340 if (mask & IFCAP_VLAN_HWTSO) {
1341 ifp->if_capenable ^= IFCAP_VLAN_HWTSO;
1344 if ((mask & IFCAP_WOL) &&
1345 (ifp->if_capabilities & IFCAP_WOL) != 0) {
1346 if (mask & IFCAP_WOL_MCAST)
1347 ifp->if_capenable ^= IFCAP_WOL_MCAST;
1348 if (mask & IFCAP_WOL_MAGIC)
1349 ifp->if_capenable ^= IFCAP_WOL_MAGIC;
1351 if (reinit && (ifp->if_drv_flags & IFF_DRV_RUNNING))
1353 VLAN_CAPABILITIES(ifp);
1358 error = ether_ioctl(ifp, command, data);
1366 /*********************************************************************
1369 * This routine is used in two ways. It is used by the stack as
1370 * init entry point in network interface structure. It is also used
1371 * by the driver as a hw/sw initialization routine to get to a
1374 * return 0 on success, positive on failure
1375 **********************************************************************/
1378 em_init_locked(struct adapter *adapter)
1380 struct ifnet *ifp = adapter->ifp;
1381 device_t dev = adapter->dev;
1383 INIT_DEBUGOUT("em_init: begin");
1385 EM_CORE_LOCK_ASSERT(adapter);
1387 em_disable_intr(adapter);
1388 callout_stop(&adapter->timer);
1390 /* Get the latest mac address, User can use a LAA */
1391 bcopy(IF_LLADDR(adapter->ifp), adapter->hw.mac.addr,
1394 /* Put the address into the Receive Address Array */
1395 e1000_rar_set(&adapter->hw, adapter->hw.mac.addr, 0);
1398 * With the 82571 adapter, RAR[0] may be overwritten
1399 * when the other port is reset, we make a duplicate
1400 * in RAR[14] for that eventuality, this assures
1401 * the interface continues to function.
1403 if (adapter->hw.mac.type == e1000_82571) {
1404 e1000_set_laa_state_82571(&adapter->hw, TRUE);
1405 e1000_rar_set(&adapter->hw, adapter->hw.mac.addr,
1406 E1000_RAR_ENTRIES - 1);
1409 /* Initialize the hardware */
1411 em_update_link_status(adapter);
1413 /* Setup VLAN support, basic and offload if available */
1414 E1000_WRITE_REG(&adapter->hw, E1000_VET, ETHERTYPE_VLAN);
1416 /* Set hardware offload abilities */
1417 if (ifp->if_capenable & IFCAP_TXCSUM)
1418 ifp->if_hwassist |= (CSUM_TCP | CSUM_UDP);
1420 ifp->if_hwassist &= ~(CSUM_TCP | CSUM_UDP);
1422 /* Configure for OS presence */
1423 em_init_manageability(adapter);
1425 /* Prepare transmit descriptors and buffers */
1426 em_setup_transmit_structures(adapter);
1427 em_initialize_transmit_unit(adapter);
1429 /* Setup Multicast table */
1430 em_set_multi(adapter);
1433 ** Figure out the desired mbuf
1434 ** pool for doing jumbos
1436 if (adapter->hw.mac.max_frame_size <= 2048)
1437 adapter->rx_mbuf_sz = MCLBYTES;
1438 else if (adapter->hw.mac.max_frame_size <= 4096)
1439 adapter->rx_mbuf_sz = MJUMPAGESIZE;
1441 adapter->rx_mbuf_sz = MJUM9BYTES;
1443 /* Prepare receive descriptors and buffers */
1444 if (em_setup_receive_structures(adapter)) {
1445 device_printf(dev, "Could not setup receive structures\n");
1449 em_initialize_receive_unit(adapter);
1451 /* Use real VLAN Filter support? */
1452 if (ifp->if_capenable & IFCAP_VLAN_HWTAGGING) {
1453 if (ifp->if_capenable & IFCAP_VLAN_HWFILTER)
1454 /* Use real VLAN Filter support */
1455 em_setup_vlan_hw_support(adapter);
1458 ctrl = E1000_READ_REG(&adapter->hw, E1000_CTRL);
1459 ctrl |= E1000_CTRL_VME;
1460 E1000_WRITE_REG(&adapter->hw, E1000_CTRL, ctrl);
1464 /* Don't lose promiscuous settings */
1465 em_set_promisc(adapter);
1467 /* Set the interface as ACTIVE */
1468 ifp->if_drv_flags |= IFF_DRV_RUNNING;
1469 ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
1471 callout_reset(&adapter->timer, hz, em_local_timer, adapter);
1472 e1000_clear_hw_cntrs_base_generic(&adapter->hw);
1474 /* MSI/X configuration for 82574 */
1475 if (adapter->hw.mac.type == e1000_82574) {
1477 tmp = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
1478 tmp |= E1000_CTRL_EXT_PBA_CLR;
1479 E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT, tmp);
1480 /* Set the IVAR - interrupt vector routing. */
1481 E1000_WRITE_REG(&adapter->hw, E1000_IVAR, adapter->ivars);
1484 #ifdef DEVICE_POLLING
1486 * Only enable interrupts if we are not polling, make sure
1487 * they are off otherwise.
1489 if (ifp->if_capenable & IFCAP_POLLING)
1490 em_disable_intr(adapter);
1492 #endif /* DEVICE_POLLING */
1493 em_enable_intr(adapter);
1495 /* AMT based hardware can now take control from firmware */
1496 if (adapter->has_manage && adapter->has_amt)
1497 em_get_hw_control(adapter);
1503 struct adapter *adapter = arg;
1505 EM_CORE_LOCK(adapter);
1506 em_init_locked(adapter);
1507 EM_CORE_UNLOCK(adapter);
1511 #ifdef DEVICE_POLLING
1512 /*********************************************************************
1514 * Legacy polling routine: note this only works with single queue
1516 *********************************************************************/
1518 em_poll(struct ifnet *ifp, enum poll_cmd cmd, int count)
1520 struct adapter *adapter = ifp->if_softc;
1521 struct tx_ring *txr = adapter->tx_rings;
1522 struct rx_ring *rxr = adapter->rx_rings;
1526 EM_CORE_LOCK(adapter);
1527 if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) {
1528 EM_CORE_UNLOCK(adapter);
1532 if (cmd == POLL_AND_CHECK_STATUS) {
1533 reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1534 if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
1535 callout_stop(&adapter->timer);
1536 adapter->hw.mac.get_link_status = 1;
1537 em_update_link_status(adapter);
1538 callout_reset(&adapter->timer, hz,
1539 em_local_timer, adapter);
1542 EM_CORE_UNLOCK(adapter);
1544 em_rxeof(rxr, count, &rx_done);
1548 #ifdef EM_MULTIQUEUE
1549 if (!drbr_empty(ifp, txr->br))
1550 em_mq_start_locked(ifp, txr);
1552 if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1553 em_start_locked(ifp, txr);
1559 #endif /* DEVICE_POLLING */
1562 /*********************************************************************
1564 * Fast Legacy/MSI Combined Interrupt Service routine
1566 *********************************************************************/
1568 em_irq_fast(void *arg)
1570 struct adapter *adapter = arg;
1576 reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1579 if (reg_icr == 0xffffffff)
1580 return FILTER_STRAY;
1582 /* Definitely not our interrupt. */
1584 return FILTER_STRAY;
1587 * Starting with the 82571 chip, bit 31 should be used to
1588 * determine whether the interrupt belongs to us.
1590 if (adapter->hw.mac.type >= e1000_82571 &&
1591 (reg_icr & E1000_ICR_INT_ASSERTED) == 0)
1592 return FILTER_STRAY;
1594 em_disable_intr(adapter);
1595 taskqueue_enqueue(adapter->tq, &adapter->que_task);
1597 /* Link status change */
1598 if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
1599 adapter->hw.mac.get_link_status = 1;
1600 taskqueue_enqueue(taskqueue_fast, &adapter->link_task);
1603 if (reg_icr & E1000_ICR_RXO)
1604 adapter->rx_overruns++;
1605 return FILTER_HANDLED;
1608 /* Combined RX/TX handler, used by Legacy and MSI */
1610 em_handle_que(void *context, int pending)
1612 struct adapter *adapter = context;
1613 struct ifnet *ifp = adapter->ifp;
1614 struct tx_ring *txr = adapter->tx_rings;
1615 struct rx_ring *rxr = adapter->rx_rings;
1617 if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1618 bool more = em_rxeof(rxr, adapter->rx_process_limit, NULL);
1622 #ifdef EM_MULTIQUEUE
1623 if (!drbr_empty(ifp, txr->br))
1624 em_mq_start_locked(ifp, txr);
1626 if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1627 em_start_locked(ifp, txr);
1631 taskqueue_enqueue(adapter->tq, &adapter->que_task);
1636 em_enable_intr(adapter);
1641 /*********************************************************************
1643 * MSIX Interrupt Service Routines
1645 **********************************************************************/
1647 em_msix_tx(void *arg)
1649 struct tx_ring *txr = arg;
1650 struct adapter *adapter = txr->adapter;
1651 struct ifnet *ifp = adapter->ifp;
1656 #ifdef EM_MULTIQUEUE
1657 if (!drbr_empty(ifp, txr->br))
1658 em_mq_start_locked(ifp, txr);
1660 if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1661 em_start_locked(ifp, txr);
1664 /* Reenable this interrupt */
1665 E1000_WRITE_REG(&adapter->hw, E1000_IMS, txr->ims);
1670 /*********************************************************************
1672 * MSIX RX Interrupt Service routine
1674 **********************************************************************/
1677 em_msix_rx(void *arg)
1679 struct rx_ring *rxr = arg;
1680 struct adapter *adapter = rxr->adapter;
1684 if (!(adapter->ifp->if_drv_flags & IFF_DRV_RUNNING))
1686 more = em_rxeof(rxr, adapter->rx_process_limit, NULL);
1688 taskqueue_enqueue(rxr->tq, &rxr->rx_task);
1690 /* Reenable this interrupt */
1691 E1000_WRITE_REG(&adapter->hw, E1000_IMS, rxr->ims);
1696 /*********************************************************************
1698 * MSIX Link Fast Interrupt Service routine
1700 **********************************************************************/
1702 em_msix_link(void *arg)
1704 struct adapter *adapter = arg;
1707 ++adapter->link_irq;
1708 reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1710 if (reg_icr & E1000_ICR_RXO)
1711 adapter->rx_overruns++;
1713 if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
1714 adapter->hw.mac.get_link_status = 1;
1715 em_handle_link(adapter, 0);
1717 E1000_WRITE_REG(&adapter->hw, E1000_IMS,
1718 EM_MSIX_LINK | E1000_IMS_LSC);
1720 ** Because we must read the ICR for this interrupt
1721 ** it may clear other causes using autoclear, for
1722 ** this reason we simply create a soft interrupt
1723 ** for all these vectors.
1726 E1000_WRITE_REG(&adapter->hw,
1727 E1000_ICS, adapter->ims);
1733 em_handle_rx(void *context, int pending)
1735 struct rx_ring *rxr = context;
1736 struct adapter *adapter = rxr->adapter;
1739 more = em_rxeof(rxr, adapter->rx_process_limit, NULL);
1741 taskqueue_enqueue(rxr->tq, &rxr->rx_task);
1743 /* Reenable this interrupt */
1744 E1000_WRITE_REG(&adapter->hw, E1000_IMS, rxr->ims);
1749 em_handle_tx(void *context, int pending)
1751 struct tx_ring *txr = context;
1752 struct adapter *adapter = txr->adapter;
1753 struct ifnet *ifp = adapter->ifp;
1757 #ifdef EM_MULTIQUEUE
1758 if (!drbr_empty(ifp, txr->br))
1759 em_mq_start_locked(ifp, txr);
1761 if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1762 em_start_locked(ifp, txr);
1764 E1000_WRITE_REG(&adapter->hw, E1000_IMS, txr->ims);
1769 em_handle_link(void *context, int pending)
1771 struct adapter *adapter = context;
1772 struct tx_ring *txr = adapter->tx_rings;
1773 struct ifnet *ifp = adapter->ifp;
1775 if (!(ifp->if_drv_flags & IFF_DRV_RUNNING))
1778 EM_CORE_LOCK(adapter);
1779 callout_stop(&adapter->timer);
1780 em_update_link_status(adapter);
1781 callout_reset(&adapter->timer, hz, em_local_timer, adapter);
1782 E1000_WRITE_REG(&adapter->hw, E1000_IMS,
1783 EM_MSIX_LINK | E1000_IMS_LSC);
1784 if (adapter->link_active) {
1785 for (int i = 0; i < adapter->num_queues; i++, txr++) {
1787 #ifdef EM_MULTIQUEUE
1788 if (!drbr_empty(ifp, txr->br))
1789 em_mq_start_locked(ifp, txr);
1791 if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1792 em_start_locked(ifp, txr);
1797 EM_CORE_UNLOCK(adapter);
1801 /*********************************************************************
1803 * Media Ioctl callback
1805 * This routine is called whenever the user queries the status of
1806 * the interface using ifconfig.
1808 **********************************************************************/
1810 em_media_status(struct ifnet *ifp, struct ifmediareq *ifmr)
1812 struct adapter *adapter = ifp->if_softc;
1813 u_char fiber_type = IFM_1000_SX;
1815 INIT_DEBUGOUT("em_media_status: begin");
1817 EM_CORE_LOCK(adapter);
1818 em_update_link_status(adapter);
1820 ifmr->ifm_status = IFM_AVALID;
1821 ifmr->ifm_active = IFM_ETHER;
1823 if (!adapter->link_active) {
1824 EM_CORE_UNLOCK(adapter);
1828 ifmr->ifm_status |= IFM_ACTIVE;
1830 if ((adapter->hw.phy.media_type == e1000_media_type_fiber) ||
1831 (adapter->hw.phy.media_type == e1000_media_type_internal_serdes)) {
1832 ifmr->ifm_active |= fiber_type | IFM_FDX;
1834 switch (adapter->link_speed) {
1836 ifmr->ifm_active |= IFM_10_T;
1839 ifmr->ifm_active |= IFM_100_TX;
1842 ifmr->ifm_active |= IFM_1000_T;
1845 if (adapter->link_duplex == FULL_DUPLEX)
1846 ifmr->ifm_active |= IFM_FDX;
1848 ifmr->ifm_active |= IFM_HDX;
1850 EM_CORE_UNLOCK(adapter);
1853 /*********************************************************************
1855 * Media Ioctl callback
1857 * This routine is called when the user changes speed/duplex using
1858 * media/mediopt option with ifconfig.
1860 **********************************************************************/
1862 em_media_change(struct ifnet *ifp)
1864 struct adapter *adapter = ifp->if_softc;
1865 struct ifmedia *ifm = &adapter->media;
1867 INIT_DEBUGOUT("em_media_change: begin");
1869 if (IFM_TYPE(ifm->ifm_media) != IFM_ETHER)
1872 EM_CORE_LOCK(adapter);
1873 switch (IFM_SUBTYPE(ifm->ifm_media)) {
1875 adapter->hw.mac.autoneg = DO_AUTO_NEG;
1876 adapter->hw.phy.autoneg_advertised = AUTONEG_ADV_DEFAULT;
1881 adapter->hw.mac.autoneg = DO_AUTO_NEG;
1882 adapter->hw.phy.autoneg_advertised = ADVERTISE_1000_FULL;
1885 adapter->hw.mac.autoneg = FALSE;
1886 adapter->hw.phy.autoneg_advertised = 0;
1887 if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX)
1888 adapter->hw.mac.forced_speed_duplex = ADVERTISE_100_FULL;
1890 adapter->hw.mac.forced_speed_duplex = ADVERTISE_100_HALF;
1893 adapter->hw.mac.autoneg = FALSE;
1894 adapter->hw.phy.autoneg_advertised = 0;
1895 if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX)
1896 adapter->hw.mac.forced_speed_duplex = ADVERTISE_10_FULL;
1898 adapter->hw.mac.forced_speed_duplex = ADVERTISE_10_HALF;
1901 device_printf(adapter->dev, "Unsupported media type\n");
1904 em_init_locked(adapter);
1905 EM_CORE_UNLOCK(adapter);
1910 /*********************************************************************
1912 * This routine maps the mbufs to tx descriptors.
1914 * return 0 on success, positive on failure
1915 **********************************************************************/
1918 em_xmit(struct tx_ring *txr, struct mbuf **m_headp)
1920 struct adapter *adapter = txr->adapter;
1921 bus_dma_segment_t segs[EM_MAX_SCATTER];
1923 struct em_txbuffer *tx_buffer, *tx_buffer_mapped;
1924 struct e1000_tx_desc *ctxd = NULL;
1925 struct mbuf *m_head;
1926 struct ether_header *eh;
1927 struct ip *ip = NULL;
1928 struct tcphdr *tp = NULL;
1929 u32 txd_upper = 0, txd_lower = 0;
1931 int nsegs, i, j, first, last = 0;
1933 bool do_tso, tso_desc, remap = TRUE;
1936 do_tso = m_head->m_pkthdr.csum_flags & CSUM_IP_TSO;
1941 * Intel recommends entire IP/TCP header length reside in a single
1942 * buffer. If multiple descriptors are used to describe the IP and
1943 * TCP header, each descriptor should describe one or more
1944 * complete headers; descriptors referencing only parts of headers
1945 * are not supported. If all layer headers are not coalesced into
1946 * a single buffer, each buffer should not cross a 4KB boundary,
1947 * or be larger than the maximum read request size.
1948 * Controller also requires modifing IP/TCP header to make TSO work
1949 * so we firstly get a writable mbuf chain then coalesce ethernet/
1950 * IP/TCP header into a single buffer to meet the requirement of
1951 * controller. This also simplifies IP/TCP/UDP checksum offloading
1952 * which also has similiar restrictions.
1954 if (do_tso || m_head->m_pkthdr.csum_flags & CSUM_OFFLOAD) {
1955 if (do_tso || (m_head->m_next != NULL &&
1956 m_head->m_pkthdr.csum_flags & CSUM_OFFLOAD)) {
1957 if (M_WRITABLE(*m_headp) == 0) {
1958 m_head = m_dup(*m_headp, M_NOWAIT);
1960 if (m_head == NULL) {
1969 * Assume IPv4, we don't have TSO/checksum offload support
1972 ip_off = sizeof(struct ether_header);
1973 if (m_head->m_len < ip_off) {
1974 m_head = m_pullup(m_head, ip_off);
1975 if (m_head == NULL) {
1980 eh = mtod(m_head, struct ether_header *);
1981 if (eh->ether_type == htons(ETHERTYPE_VLAN)) {
1982 ip_off = sizeof(struct ether_vlan_header);
1983 if (m_head->m_len < ip_off) {
1984 m_head = m_pullup(m_head, ip_off);
1985 if (m_head == NULL) {
1991 if (m_head->m_len < ip_off + sizeof(struct ip)) {
1992 m_head = m_pullup(m_head, ip_off + sizeof(struct ip));
1993 if (m_head == NULL) {
1998 ip = (struct ip *)(mtod(m_head, char *) + ip_off);
1999 poff = ip_off + (ip->ip_hl << 2);
2001 if (do_tso || (m_head->m_pkthdr.csum_flags & CSUM_TCP)) {
2002 if (m_head->m_len < poff + sizeof(struct tcphdr)) {
2003 m_head = m_pullup(m_head, poff +
2004 sizeof(struct tcphdr));
2005 if (m_head == NULL) {
2010 tp = (struct tcphdr *)(mtod(m_head, char *) + poff);
2013 * pull 4 more bytes of data into it.
2015 if (m_head->m_len < poff + (tp->th_off << 2)) {
2016 m_head = m_pullup(m_head, poff +
2019 if (m_head == NULL) {
2024 ip = (struct ip *)(mtod(m_head, char *) + ip_off);
2025 tp = (struct tcphdr *)(mtod(m_head, char *) + poff);
2027 ip->ip_len = htons(m_head->m_pkthdr.tso_segsz +
2032 * The pseudo TCP checksum does not include TCP
2033 * payload length so driver should recompute
2034 * the checksum here what hardware expect to
2035 * see. This is adherence of Microsoft's Large
2036 * Send specification.
2038 tp->th_sum = in_pseudo(ip->ip_src.s_addr,
2039 ip->ip_dst.s_addr, htons(IPPROTO_TCP));
2041 } else if (m_head->m_pkthdr.csum_flags & CSUM_UDP) {
2042 if (m_head->m_len < poff + sizeof(struct udphdr)) {
2043 m_head = m_pullup(m_head, poff +
2044 sizeof(struct udphdr));
2045 if (m_head == NULL) {
2050 ip = (struct ip *)(mtod(m_head, char *) + ip_off);
2056 * Map the packet for DMA
2058 * Capture the first descriptor index,
2059 * this descriptor will have the index
2060 * of the EOP which is the only one that
2061 * now gets a DONE bit writeback.
2063 first = txr->next_avail_desc;
2064 tx_buffer = &txr->tx_buffers[first];
2065 tx_buffer_mapped = tx_buffer;
2066 map = tx_buffer->map;
2069 error = bus_dmamap_load_mbuf_sg(txr->txtag, map,
2070 *m_headp, segs, &nsegs, BUS_DMA_NOWAIT);
2073 * There are two types of errors we can (try) to handle:
2074 * - EFBIG means the mbuf chain was too long and bus_dma ran
2075 * out of segments. Defragment the mbuf chain and try again.
2076 * - ENOMEM means bus_dma could not obtain enough bounce buffers
2077 * at this point in time. Defer sending and try again later.
2078 * All other errors, in particular EINVAL, are fatal and prevent the
2079 * mbuf chain from ever going through. Drop it and report error.
2081 if (error == EFBIG && remap) {
2084 m = m_collapse(*m_headp, M_NOWAIT, EM_MAX_SCATTER);
2086 adapter->mbuf_defrag_failed++;
2093 /* Try it again, but only once */
2096 } else if (error != 0) {
2097 adapter->no_tx_dma_setup++;
2104 * TSO Hardware workaround, if this packet is not
2105 * TSO, and is only a single descriptor long, and
2106 * it follows a TSO burst, then we need to add a
2107 * sentinel descriptor to prevent premature writeback.
2109 if ((!do_tso) && (txr->tx_tso == TRUE)) {
2112 txr->tx_tso = FALSE;
2115 if (txr->tx_avail < (nsegs + EM_MAX_SCATTER)) {
2116 txr->no_desc_avail++;
2117 bus_dmamap_unload(txr->txtag, map);
2122 /* Do hardware assists */
2123 if (m_head->m_pkthdr.csum_flags & CSUM_IP_TSO) {
2124 em_tso_setup(txr, m_head, ip_off, ip, tp,
2125 &txd_upper, &txd_lower);
2126 /* we need to make a final sentinel transmit desc */
2128 } else if (m_head->m_pkthdr.csum_flags & CSUM_OFFLOAD)
2129 em_transmit_checksum_setup(txr, m_head,
2130 ip_off, ip, &txd_upper, &txd_lower);
2132 if (m_head->m_flags & M_VLANTAG) {
2133 /* Set the vlan id. */
2135 (htole16(m_head->m_pkthdr.ether_vtag) << 16);
2136 /* Tell hardware to add tag */
2137 txd_lower |= htole32(E1000_TXD_CMD_VLE);
2140 i = txr->next_avail_desc;
2142 /* Set up our transmit descriptors */
2143 for (j = 0; j < nsegs; j++) {
2145 bus_addr_t seg_addr;
2147 tx_buffer = &txr->tx_buffers[i];
2148 ctxd = &txr->tx_base[i];
2149 seg_addr = segs[j].ds_addr;
2150 seg_len = segs[j].ds_len;
2153 ** If this is the last descriptor, we want to
2154 ** split it so we have a small final sentinel
2156 if (tso_desc && (j == (nsegs - 1)) && (seg_len > 8)) {
2157 seg_len -= TSO_WORKAROUND;
2158 ctxd->buffer_addr = htole64(seg_addr);
2159 ctxd->lower.data = htole32(
2160 adapter->txd_cmd | txd_lower | seg_len);
2161 ctxd->upper.data = htole32(txd_upper);
2162 if (++i == adapter->num_tx_desc)
2165 /* Now make the sentinel */
2167 ctxd = &txr->tx_base[i];
2168 tx_buffer = &txr->tx_buffers[i];
2170 htole64(seg_addr + seg_len);
2171 ctxd->lower.data = htole32(
2172 adapter->txd_cmd | txd_lower | TSO_WORKAROUND);
2176 if (++i == adapter->num_tx_desc)
2179 ctxd->buffer_addr = htole64(seg_addr);
2180 ctxd->lower.data = htole32(
2181 adapter->txd_cmd | txd_lower | seg_len);
2182 ctxd->upper.data = htole32(txd_upper);
2184 if (++i == adapter->num_tx_desc)
2187 tx_buffer->m_head = NULL;
2188 tx_buffer->next_eop = -1;
2191 txr->next_avail_desc = i;
2192 txr->tx_avail -= nsegs;
2194 tx_buffer->m_head = m_head;
2196 ** Here we swap the map so the last descriptor,
2197 ** which gets the completion interrupt has the
2198 ** real map, and the first descriptor gets the
2199 ** unused map from this descriptor.
2201 tx_buffer_mapped->map = tx_buffer->map;
2202 tx_buffer->map = map;
2203 bus_dmamap_sync(txr->txtag, map, BUS_DMASYNC_PREWRITE);
2206 * Last Descriptor of Packet
2207 * needs End Of Packet (EOP)
2208 * and Report Status (RS)
2211 htole32(E1000_TXD_CMD_EOP | E1000_TXD_CMD_RS);
2213 * Keep track in the first buffer which
2214 * descriptor will be written back
2216 tx_buffer = &txr->tx_buffers[first];
2217 tx_buffer->next_eop = last;
2220 * Advance the Transmit Descriptor Tail (TDT), this tells the E1000
2221 * that this frame is available to transmit.
2223 bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
2224 BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
2225 E1000_WRITE_REG(&adapter->hw, E1000_TDT(txr->me), i);
2231 em_set_promisc(struct adapter *adapter)
2233 struct ifnet *ifp = adapter->ifp;
2236 reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2238 if (ifp->if_flags & IFF_PROMISC) {
2239 reg_rctl |= (E1000_RCTL_UPE | E1000_RCTL_MPE);
2240 /* Turn this on if you want to see bad packets */
2242 reg_rctl |= E1000_RCTL_SBP;
2243 E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2244 } else if (ifp->if_flags & IFF_ALLMULTI) {
2245 reg_rctl |= E1000_RCTL_MPE;
2246 reg_rctl &= ~E1000_RCTL_UPE;
2247 E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2252 em_disable_promisc(struct adapter *adapter)
2254 struct ifnet *ifp = adapter->ifp;
2258 reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2259 reg_rctl &= (~E1000_RCTL_UPE);
2260 if (ifp->if_flags & IFF_ALLMULTI)
2261 mcnt = MAX_NUM_MULTICAST_ADDRESSES;
2263 struct ifmultiaddr *ifma;
2264 #if __FreeBSD_version < 800000
2267 if_maddr_rlock(ifp);
2269 TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
2270 if (ifma->ifma_addr->sa_family != AF_LINK)
2272 if (mcnt == MAX_NUM_MULTICAST_ADDRESSES)
2276 #if __FreeBSD_version < 800000
2277 IF_ADDR_UNLOCK(ifp);
2279 if_maddr_runlock(ifp);
2282 /* Don't disable if in MAX groups */
2283 if (mcnt < MAX_NUM_MULTICAST_ADDRESSES)
2284 reg_rctl &= (~E1000_RCTL_MPE);
2285 reg_rctl &= (~E1000_RCTL_SBP);
2286 E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2290 /*********************************************************************
2293 * This routine is called whenever multicast address list is updated.
2295 **********************************************************************/
2298 em_set_multi(struct adapter *adapter)
2300 struct ifnet *ifp = adapter->ifp;
2301 struct ifmultiaddr *ifma;
2303 u8 *mta; /* Multicast array memory */
2306 IOCTL_DEBUGOUT("em_set_multi: begin");
2309 bzero(mta, sizeof(u8) * ETH_ADDR_LEN * MAX_NUM_MULTICAST_ADDRESSES);
2311 if (adapter->hw.mac.type == e1000_82542 &&
2312 adapter->hw.revision_id == E1000_REVISION_2) {
2313 reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2314 if (adapter->hw.bus.pci_cmd_word & CMD_MEM_WRT_INVALIDATE)
2315 e1000_pci_clear_mwi(&adapter->hw);
2316 reg_rctl |= E1000_RCTL_RST;
2317 E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2321 #if __FreeBSD_version < 800000
2324 if_maddr_rlock(ifp);
2326 TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
2327 if (ifma->ifma_addr->sa_family != AF_LINK)
2330 if (mcnt == MAX_NUM_MULTICAST_ADDRESSES)
2333 bcopy(LLADDR((struct sockaddr_dl *)ifma->ifma_addr),
2334 &mta[mcnt * ETH_ADDR_LEN], ETH_ADDR_LEN);
2337 #if __FreeBSD_version < 800000
2338 IF_ADDR_UNLOCK(ifp);
2340 if_maddr_runlock(ifp);
2342 if (mcnt >= MAX_NUM_MULTICAST_ADDRESSES) {
2343 reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2344 reg_rctl |= E1000_RCTL_MPE;
2345 E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2347 e1000_update_mc_addr_list(&adapter->hw, mta, mcnt);
2349 if (adapter->hw.mac.type == e1000_82542 &&
2350 adapter->hw.revision_id == E1000_REVISION_2) {
2351 reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2352 reg_rctl &= ~E1000_RCTL_RST;
2353 E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2355 if (adapter->hw.bus.pci_cmd_word & CMD_MEM_WRT_INVALIDATE)
2356 e1000_pci_set_mwi(&adapter->hw);
2361 /*********************************************************************
2364 * This routine checks for link status and updates statistics.
2366 **********************************************************************/
2369 em_local_timer(void *arg)
2371 struct adapter *adapter = arg;
2372 struct ifnet *ifp = adapter->ifp;
2373 struct tx_ring *txr = adapter->tx_rings;
2374 struct rx_ring *rxr = adapter->rx_rings;
2377 EM_CORE_LOCK_ASSERT(adapter);
2379 em_update_link_status(adapter);
2380 em_update_stats_counters(adapter);
2382 /* Reset LAA into RAR[0] on 82571 */
2383 if ((adapter->hw.mac.type == e1000_82571) &&
2384 e1000_get_laa_state_82571(&adapter->hw))
2385 e1000_rar_set(&adapter->hw, adapter->hw.mac.addr, 0);
2387 /* Mask to use in the irq trigger */
2388 if (adapter->msix_mem) {
2389 for (int i = 0; i < adapter->num_queues; i++, rxr++)
2390 trigger |= rxr->ims;
2391 rxr = adapter->rx_rings;
2393 trigger = E1000_ICS_RXDMT0;
2396 ** Check on the state of the TX queue(s), this
2397 ** can be done without the lock because its RO
2398 ** and the HUNG state will be static if set.
2400 for (int i = 0; i < adapter->num_queues; i++, txr++) {
2401 if (txr->busy == EM_TX_HUNG)
2403 if (txr->busy >= EM_TX_MAXTRIES)
2404 txr->busy = EM_TX_HUNG;
2405 /* Schedule a TX tasklet if needed */
2406 if (txr->tx_avail <= EM_MAX_SCATTER)
2407 taskqueue_enqueue(txr->tq, &txr->tx_task);
2410 callout_reset(&adapter->timer, hz, em_local_timer, adapter);
2411 #ifndef DEVICE_POLLING
2412 /* Trigger an RX interrupt to guarantee mbuf refresh */
2413 E1000_WRITE_REG(&adapter->hw, E1000_ICS, trigger);
2417 /* Looks like we're hung */
2418 device_printf(adapter->dev, "Watchdog timeout Queue[%d]-- resetting\n",
2420 em_print_debug_info(adapter);
2421 ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
2422 adapter->watchdog_events++;
2423 em_init_locked(adapter);
2428 em_update_link_status(struct adapter *adapter)
2430 struct e1000_hw *hw = &adapter->hw;
2431 struct ifnet *ifp = adapter->ifp;
2432 device_t dev = adapter->dev;
2433 struct tx_ring *txr = adapter->tx_rings;
2436 /* Get the cached link value or read phy for real */
2437 switch (hw->phy.media_type) {
2438 case e1000_media_type_copper:
2439 if (hw->mac.get_link_status) {
2440 if (hw->mac.type == e1000_pch_spt)
2442 /* Do the work to read phy */
2443 e1000_check_for_link(hw);
2444 link_check = !hw->mac.get_link_status;
2445 if (link_check) /* ESB2 fix */
2446 e1000_cfg_on_link_up(hw);
2450 case e1000_media_type_fiber:
2451 e1000_check_for_link(hw);
2452 link_check = (E1000_READ_REG(hw, E1000_STATUS) &
2455 case e1000_media_type_internal_serdes:
2456 e1000_check_for_link(hw);
2457 link_check = adapter->hw.mac.serdes_has_link;
2460 case e1000_media_type_unknown:
2464 /* Now check for a transition */
2465 if (link_check && (adapter->link_active == 0)) {
2466 e1000_get_speed_and_duplex(hw, &adapter->link_speed,
2467 &adapter->link_duplex);
2470 ** There have proven to be problems with TSO when not at full
2471 ** gigabit speed, so disable the assist automatically when at
2472 ** lower speeds. -jfv
2474 if (ifp->if_capenable & IFCAP_TSO4) {
2475 if (adapter->link_speed == SPEED_1000)
2476 ifp->if_hwassist |= CSUM_IP_TSO;
2478 ifp->if_hwassist &= ~CSUM_IP_TSO;
2481 /* Check if we must disable SPEED_MODE bit on PCI-E */
2482 if ((adapter->link_speed != SPEED_1000) &&
2483 ((hw->mac.type == e1000_82571) ||
2484 (hw->mac.type == e1000_82572))) {
2486 tarc0 = E1000_READ_REG(hw, E1000_TARC(0));
2487 tarc0 &= ~TARC_SPEED_MODE_BIT;
2488 E1000_WRITE_REG(hw, E1000_TARC(0), tarc0);
2491 device_printf(dev, "Link is up %d Mbps %s\n",
2492 adapter->link_speed,
2493 ((adapter->link_duplex == FULL_DUPLEX) ?
2494 "Full Duplex" : "Half Duplex"));
2495 adapter->link_active = 1;
2496 adapter->smartspeed = 0;
2497 ifp->if_baudrate = adapter->link_speed * 1000000;
2498 if_link_state_change(ifp, LINK_STATE_UP);
2499 } else if (!link_check && (adapter->link_active == 1)) {
2500 ifp->if_baudrate = adapter->link_speed = 0;
2501 adapter->link_duplex = 0;
2503 device_printf(dev, "Link is Down\n");
2504 adapter->link_active = 0;
2505 /* Link down, disable hang detection */
2506 for (int i = 0; i < adapter->num_queues; i++, txr++)
2507 txr->busy = EM_TX_IDLE;
2508 if_link_state_change(ifp, LINK_STATE_DOWN);
2512 /*********************************************************************
2514 * This routine disables all traffic on the adapter by issuing a
2515 * global reset on the MAC and deallocates TX/RX buffers.
2517 * This routine should always be called with BOTH the CORE
2519 **********************************************************************/
2524 struct adapter *adapter = arg;
2525 struct ifnet *ifp = adapter->ifp;
2526 struct tx_ring *txr = adapter->tx_rings;
2528 EM_CORE_LOCK_ASSERT(adapter);
2530 INIT_DEBUGOUT("em_stop: begin");
2532 em_disable_intr(adapter);
2533 callout_stop(&adapter->timer);
2535 /* Tell the stack that the interface is no longer active */
2536 ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
2537 ifp->if_drv_flags |= IFF_DRV_OACTIVE;
2539 /* Disarm Hang Detection. */
2540 for (int i = 0; i < adapter->num_queues; i++, txr++) {
2542 txr->busy = EM_TX_IDLE;
2546 /* I219 needs some special flushing to avoid hangs */
2547 if (adapter->hw.mac.type == e1000_pch_spt)
2548 em_flush_desc_rings(adapter);
2550 e1000_reset_hw(&adapter->hw);
2551 E1000_WRITE_REG(&adapter->hw, E1000_WUC, 0);
2553 e1000_led_off(&adapter->hw);
2554 e1000_cleanup_led(&adapter->hw);
2558 /*********************************************************************
2560 * Determine hardware revision.
2562 **********************************************************************/
2564 em_identify_hardware(struct adapter *adapter)
2566 device_t dev = adapter->dev;
2568 /* Make sure our PCI config space has the necessary stuff set */
2569 pci_enable_busmaster(dev);
2570 adapter->hw.bus.pci_cmd_word = pci_read_config(dev, PCIR_COMMAND, 2);
2572 /* Save off the information about this board */
2573 adapter->hw.vendor_id = pci_get_vendor(dev);
2574 adapter->hw.device_id = pci_get_device(dev);
2575 adapter->hw.revision_id = pci_read_config(dev, PCIR_REVID, 1);
2576 adapter->hw.subsystem_vendor_id =
2577 pci_read_config(dev, PCIR_SUBVEND_0, 2);
2578 adapter->hw.subsystem_device_id =
2579 pci_read_config(dev, PCIR_SUBDEV_0, 2);
2581 /* Do Shared Code Init and Setup */
2582 if (e1000_set_mac_type(&adapter->hw)) {
2583 device_printf(dev, "Setup init failure\n");
2589 em_allocate_pci_resources(struct adapter *adapter)
2591 device_t dev = adapter->dev;
2595 adapter->memory = bus_alloc_resource_any(dev, SYS_RES_MEMORY,
2597 if (adapter->memory == NULL) {
2598 device_printf(dev, "Unable to allocate bus resource: memory\n");
2601 adapter->osdep.mem_bus_space_tag =
2602 rman_get_bustag(adapter->memory);
2603 adapter->osdep.mem_bus_space_handle =
2604 rman_get_bushandle(adapter->memory);
2605 adapter->hw.hw_addr = (u8 *)&adapter->osdep.mem_bus_space_handle;
2607 adapter->hw.back = &adapter->osdep;
2612 /*********************************************************************
2614 * Setup the Legacy or MSI Interrupt handler
2616 **********************************************************************/
2618 em_allocate_legacy(struct adapter *adapter)
2620 device_t dev = adapter->dev;
2621 struct tx_ring *txr = adapter->tx_rings;
2624 /* Manually turn off all interrupts */
2625 E1000_WRITE_REG(&adapter->hw, E1000_IMC, 0xffffffff);
2627 if (adapter->msix == 1) /* using MSI */
2629 /* We allocate a single interrupt resource */
2630 adapter->res = bus_alloc_resource_any(dev,
2631 SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE);
2632 if (adapter->res == NULL) {
2633 device_printf(dev, "Unable to allocate bus resource: "
2639 * Allocate a fast interrupt and the associated
2640 * deferred processing contexts.
2642 TASK_INIT(&adapter->que_task, 0, em_handle_que, adapter);
2643 adapter->tq = taskqueue_create_fast("em_taskq", M_NOWAIT,
2644 taskqueue_thread_enqueue, &adapter->tq);
2645 taskqueue_start_threads(&adapter->tq, 1, PI_NET, "%s que",
2646 device_get_nameunit(adapter->dev));
2647 /* Use a TX only tasklet for local timer */
2648 TASK_INIT(&txr->tx_task, 0, em_handle_tx, txr);
2649 txr->tq = taskqueue_create_fast("em_txq", M_NOWAIT,
2650 taskqueue_thread_enqueue, &txr->tq);
2651 taskqueue_start_threads(&txr->tq, 1, PI_NET, "%s txq",
2652 device_get_nameunit(adapter->dev));
2653 TASK_INIT(&adapter->link_task, 0, em_handle_link, adapter);
2654 if ((error = bus_setup_intr(dev, adapter->res, INTR_TYPE_NET,
2655 em_irq_fast, NULL, adapter, &adapter->tag)) != 0) {
2656 device_printf(dev, "Failed to register fast interrupt "
2657 "handler: %d\n", error);
2658 taskqueue_free(adapter->tq);
2666 /*********************************************************************
2668 * Setup the MSIX Interrupt handlers
2669 * This is not really Multiqueue, rather
2670 * its just seperate interrupt vectors
2671 * for TX, RX, and Link.
2673 **********************************************************************/
2675 em_allocate_msix(struct adapter *adapter)
2677 device_t dev = adapter->dev;
2678 struct tx_ring *txr = adapter->tx_rings;
2679 struct rx_ring *rxr = adapter->rx_rings;
2680 int error, rid, vector = 0;
2684 /* Make sure all interrupts are disabled */
2685 E1000_WRITE_REG(&adapter->hw, E1000_IMC, 0xffffffff);
2687 /* First set up ring resources */
2688 for (int i = 0; i < adapter->num_queues; i++, rxr++, vector++) {
2693 rxr->res = bus_alloc_resource_any(dev,
2694 SYS_RES_IRQ, &rid, RF_ACTIVE);
2695 if (rxr->res == NULL) {
2697 "Unable to allocate bus resource: "
2698 "RX MSIX Interrupt %d\n", i);
2701 if ((error = bus_setup_intr(dev, rxr->res,
2702 INTR_TYPE_NET | INTR_MPSAFE, NULL, em_msix_rx,
2703 rxr, &rxr->tag)) != 0) {
2704 device_printf(dev, "Failed to register RX handler");
2707 #if __FreeBSD_version >= 800504
2708 bus_describe_intr(dev, rxr->res, rxr->tag, "rx%d", i);
2712 if (em_last_bind_cpu < 0)
2713 em_last_bind_cpu = CPU_FIRST();
2714 cpu_id = em_last_bind_cpu;
2715 bus_bind_intr(dev, rxr->res, cpu_id);
2717 TASK_INIT(&rxr->rx_task, 0, em_handle_rx, rxr);
2718 rxr->tq = taskqueue_create_fast("em_rxq", M_NOWAIT,
2719 taskqueue_thread_enqueue, &rxr->tq);
2720 taskqueue_start_threads(&rxr->tq, 1, PI_NET, "%s rxq (cpuid %d)",
2721 device_get_nameunit(adapter->dev), cpu_id);
2723 ** Set the bit to enable interrupt
2724 ** in E1000_IMS -- bits 20 and 21
2725 ** are for RX0 and RX1, note this has
2726 ** NOTHING to do with the MSIX vector
2728 rxr->ims = 1 << (20 + i);
2729 adapter->ims |= rxr->ims;
2730 adapter->ivars |= (8 | rxr->msix) << (i * 4);
2732 em_last_bind_cpu = CPU_NEXT(em_last_bind_cpu);
2735 for (int i = 0; i < adapter->num_queues; i++, txr++, vector++) {
2738 txr->res = bus_alloc_resource_any(dev,
2739 SYS_RES_IRQ, &rid, RF_ACTIVE);
2740 if (txr->res == NULL) {
2742 "Unable to allocate bus resource: "
2743 "TX MSIX Interrupt %d\n", i);
2746 if ((error = bus_setup_intr(dev, txr->res,
2747 INTR_TYPE_NET | INTR_MPSAFE, NULL, em_msix_tx,
2748 txr, &txr->tag)) != 0) {
2749 device_printf(dev, "Failed to register TX handler");
2752 #if __FreeBSD_version >= 800504
2753 bus_describe_intr(dev, txr->res, txr->tag, "tx%d", i);
2757 if (em_last_bind_cpu < 0)
2758 em_last_bind_cpu = CPU_FIRST();
2759 cpu_id = em_last_bind_cpu;
2760 bus_bind_intr(dev, txr->res, cpu_id);
2762 TASK_INIT(&txr->tx_task, 0, em_handle_tx, txr);
2763 txr->tq = taskqueue_create_fast("em_txq", M_NOWAIT,
2764 taskqueue_thread_enqueue, &txr->tq);
2765 taskqueue_start_threads(&txr->tq, 1, PI_NET, "%s txq (cpuid %d)",
2766 device_get_nameunit(adapter->dev), cpu_id);
2768 ** Set the bit to enable interrupt
2769 ** in E1000_IMS -- bits 22 and 23
2770 ** are for TX0 and TX1, note this has
2771 ** NOTHING to do with the MSIX vector
2773 txr->ims = 1 << (22 + i);
2774 adapter->ims |= txr->ims;
2775 adapter->ivars |= (8 | txr->msix) << (8 + (i * 4));
2777 em_last_bind_cpu = CPU_NEXT(em_last_bind_cpu);
2780 /* Link interrupt */
2782 adapter->res = bus_alloc_resource_any(dev,
2783 SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE);
2784 if (!adapter->res) {
2785 device_printf(dev,"Unable to allocate "
2786 "bus resource: Link interrupt [%d]\n", rid);
2789 /* Set the link handler function */
2790 error = bus_setup_intr(dev, adapter->res,
2791 INTR_TYPE_NET | INTR_MPSAFE, NULL,
2792 em_msix_link, adapter, &adapter->tag);
2794 adapter->res = NULL;
2795 device_printf(dev, "Failed to register LINK handler");
2798 #if __FreeBSD_version >= 800504
2799 bus_describe_intr(dev, adapter->res, adapter->tag, "link");
2801 adapter->linkvec = vector;
2802 adapter->ivars |= (8 | vector) << 16;
2803 adapter->ivars |= 0x80000000;
2810 em_free_pci_resources(struct adapter *adapter)
2812 device_t dev = adapter->dev;
2813 struct tx_ring *txr;
2814 struct rx_ring *rxr;
2819 ** Release all the queue interrupt resources:
2821 for (int i = 0; i < adapter->num_queues; i++) {
2822 txr = &adapter->tx_rings[i];
2823 /* an early abort? */
2827 if (txr->tag != NULL) {
2828 bus_teardown_intr(dev, txr->res, txr->tag);
2831 if (txr->res != NULL)
2832 bus_release_resource(dev, SYS_RES_IRQ,
2835 rxr = &adapter->rx_rings[i];
2836 /* an early abort? */
2840 if (rxr->tag != NULL) {
2841 bus_teardown_intr(dev, rxr->res, rxr->tag);
2844 if (rxr->res != NULL)
2845 bus_release_resource(dev, SYS_RES_IRQ,
2849 if (adapter->linkvec) /* we are doing MSIX */
2850 rid = adapter->linkvec + 1;
2852 (adapter->msix != 0) ? (rid = 1):(rid = 0);
2854 if (adapter->tag != NULL) {
2855 bus_teardown_intr(dev, adapter->res, adapter->tag);
2856 adapter->tag = NULL;
2859 if (adapter->res != NULL)
2860 bus_release_resource(dev, SYS_RES_IRQ, rid, adapter->res);
2864 pci_release_msi(dev);
2866 if (adapter->msix_mem != NULL)
2867 bus_release_resource(dev, SYS_RES_MEMORY,
2868 adapter->memrid, adapter->msix_mem);
2870 if (adapter->memory != NULL)
2871 bus_release_resource(dev, SYS_RES_MEMORY,
2872 PCIR_BAR(0), adapter->memory);
2874 if (adapter->flash != NULL)
2875 bus_release_resource(dev, SYS_RES_MEMORY,
2876 EM_FLASH, adapter->flash);
2880 * Setup MSI or MSI/X
2883 em_setup_msix(struct adapter *adapter)
2885 device_t dev = adapter->dev;
2888 /* Nearly always going to use one queue */
2889 adapter->num_queues = 1;
2892 ** Try using MSI-X for Hartwell adapters
2894 if ((adapter->hw.mac.type == e1000_82574) &&
2895 (em_enable_msix == TRUE)) {
2896 #ifdef EM_MULTIQUEUE
2897 adapter->num_queues = (em_num_queues == 1) ? 1 : 2;
2898 if (adapter->num_queues > 1)
2899 em_enable_vectors_82574(adapter);
2901 /* Map the MSIX BAR */
2902 adapter->memrid = PCIR_BAR(EM_MSIX_BAR);
2903 adapter->msix_mem = bus_alloc_resource_any(dev,
2904 SYS_RES_MEMORY, &adapter->memrid, RF_ACTIVE);
2905 if (adapter->msix_mem == NULL) {
2906 /* May not be enabled */
2907 device_printf(adapter->dev,
2908 "Unable to map MSIX table \n");
2911 val = pci_msix_count(dev);
2913 #ifdef EM_MULTIQUEUE
2914 /* We need 5 vectors in the multiqueue case */
2915 if (adapter->num_queues > 1 ) {
2919 adapter->num_queues = 1;
2920 device_printf(adapter->dev,
2921 "Insufficient MSIX vectors for >1 queue, "
2922 "using single queue...\n");
2931 device_printf(adapter->dev,
2932 "Insufficient MSIX vectors, using MSI\n");
2935 #ifdef EM_MULTIQUEUE
2939 if ((pci_alloc_msix(dev, &val) == 0)) {
2940 device_printf(adapter->dev,
2941 "Using MSIX interrupts "
2942 "with %d vectors\n", val);
2947 ** If MSIX alloc failed or provided us with
2948 ** less than needed, free and fall through to MSI
2950 pci_release_msi(dev);
2953 if (adapter->msix_mem != NULL) {
2954 bus_release_resource(dev, SYS_RES_MEMORY,
2955 adapter->memrid, adapter->msix_mem);
2956 adapter->msix_mem = NULL;
2959 if (pci_alloc_msi(dev, &val) == 0) {
2960 device_printf(adapter->dev, "Using an MSI interrupt\n");
2963 /* Should only happen due to manual configuration */
2964 device_printf(adapter->dev,"No MSI/MSIX using a Legacy IRQ\n");
2970 ** The 3 following flush routines are used as a workaround in the
2971 ** I219 client parts and only for them.
2973 ** em_flush_tx_ring - remove all descriptors from the tx_ring
2975 ** We want to clear all pending descriptors from the TX ring.
2976 ** zeroing happens when the HW reads the regs. We assign the ring itself as
2977 ** the data of the next descriptor. We don't care about the data we are about
2981 em_flush_tx_ring(struct adapter *adapter)
2983 struct e1000_hw *hw = &adapter->hw;
2984 struct tx_ring *txr = adapter->tx_rings;
2985 struct e1000_tx_desc *txd;
2986 u32 tctl, txd_lower = E1000_TXD_CMD_IFCS;
2989 tctl = E1000_READ_REG(hw, E1000_TCTL);
2990 E1000_WRITE_REG(hw, E1000_TCTL, tctl | E1000_TCTL_EN);
2992 txd = &txr->tx_base[txr->next_avail_desc++];
2993 if (txr->next_avail_desc == adapter->num_tx_desc)
2994 txr->next_avail_desc = 0;
2996 /* Just use the ring as a dummy buffer addr */
2997 txd->buffer_addr = txr->txdma.dma_paddr;
2998 txd->lower.data = htole32(txd_lower | size);
2999 txd->upper.data = 0;
3001 /* flush descriptors to memory before notifying the HW */
3004 E1000_WRITE_REG(hw, E1000_TDT(0), txr->next_avail_desc);
3010 ** em_flush_rx_ring - remove all descriptors from the rx_ring
3012 ** Mark all descriptors in the RX ring as consumed and disable the rx ring
3015 em_flush_rx_ring(struct adapter *adapter)
3017 struct e1000_hw *hw = &adapter->hw;
3020 rctl = E1000_READ_REG(hw, E1000_RCTL);
3021 E1000_WRITE_REG(hw, E1000_RCTL, rctl & ~E1000_RCTL_EN);
3022 E1000_WRITE_FLUSH(hw);
3025 rxdctl = E1000_READ_REG(hw, E1000_RXDCTL(0));
3026 /* zero the lower 14 bits (prefetch and host thresholds) */
3027 rxdctl &= 0xffffc000;
3029 * update thresholds: prefetch threshold to 31, host threshold to 1
3030 * and make sure the granularity is "descriptors" and not "cache lines"
3032 rxdctl |= (0x1F | (1 << 8) | E1000_RXDCTL_THRESH_UNIT_DESC);
3033 E1000_WRITE_REG(hw, E1000_RXDCTL(0), rxdctl);
3035 /* momentarily enable the RX ring for the changes to take effect */
3036 E1000_WRITE_REG(hw, E1000_RCTL, rctl | E1000_RCTL_EN);
3037 E1000_WRITE_FLUSH(hw);
3039 E1000_WRITE_REG(hw, E1000_RCTL, rctl & ~E1000_RCTL_EN);
3043 ** em_flush_desc_rings - remove all descriptors from the descriptor rings
3045 ** In i219, the descriptor rings must be emptied before resetting the HW
3046 ** or before changing the device state to D3 during runtime (runtime PM).
3048 ** Failure to do this will cause the HW to enter a unit hang state which can
3049 ** only be released by PCI reset on the device
3053 em_flush_desc_rings(struct adapter *adapter)
3055 struct e1000_hw *hw = &adapter->hw;
3056 device_t dev = adapter->dev;
3058 u32 fext_nvm11, tdlen;
3060 /* First, disable MULR fix in FEXTNVM11 */
3061 fext_nvm11 = E1000_READ_REG(hw, E1000_FEXTNVM11);
3062 fext_nvm11 |= E1000_FEXTNVM11_DISABLE_MULR_FIX;
3063 E1000_WRITE_REG(hw, E1000_FEXTNVM11, fext_nvm11);
3065 /* do nothing if we're not in faulty state, or if the queue is empty */
3066 tdlen = E1000_READ_REG(hw, E1000_TDLEN(0));
3067 hang_state = pci_read_config(dev, PCICFG_DESC_RING_STATUS, 2);
3068 if (!(hang_state & FLUSH_DESC_REQUIRED) || !tdlen)
3070 em_flush_tx_ring(adapter);
3072 /* recheck, maybe the fault is caused by the rx ring */
3073 hang_state = pci_read_config(dev, PCICFG_DESC_RING_STATUS, 2);
3074 if (hang_state & FLUSH_DESC_REQUIRED)
3075 em_flush_rx_ring(adapter);
3079 /*********************************************************************
3081 * Initialize the hardware to a configuration
3082 * as specified by the adapter structure.
3084 **********************************************************************/
3086 em_reset(struct adapter *adapter)
3088 device_t dev = adapter->dev;
3089 struct ifnet *ifp = adapter->ifp;
3090 struct e1000_hw *hw = &adapter->hw;
3094 INIT_DEBUGOUT("em_reset: begin");
3096 /* Set up smart power down as default off on newer adapters. */
3097 if (!em_smart_pwr_down && (hw->mac.type == e1000_82571 ||
3098 hw->mac.type == e1000_82572)) {
3101 /* Speed up time to link by disabling smart power down. */
3102 e1000_read_phy_reg(hw, IGP02E1000_PHY_POWER_MGMT, &phy_tmp);
3103 phy_tmp &= ~IGP02E1000_PM_SPD;
3104 e1000_write_phy_reg(hw, IGP02E1000_PHY_POWER_MGMT, phy_tmp);
3108 * Packet Buffer Allocation (PBA)
3109 * Writing PBA sets the receive portion of the buffer
3110 * the remainder is used for the transmit buffer.
3112 switch (hw->mac.type) {
3113 /* Total Packet Buffer on these is 48K */
3116 case e1000_80003es2lan:
3117 pba = E1000_PBA_32K; /* 32K for Rx, 16K for Tx */
3119 case e1000_82573: /* 82573: Total Packet Buffer is 32K */
3120 pba = E1000_PBA_12K; /* 12K for Rx, 20K for Tx */
3124 pba = E1000_PBA_20K; /* 20K for Rx, 20K for Tx */
3130 case e1000_ich10lan:
3131 /* Boost Receive side for jumbo frames */
3132 if (adapter->hw.mac.max_frame_size > 4096)
3133 pba = E1000_PBA_14K;
3135 pba = E1000_PBA_10K;
3142 pba = E1000_PBA_26K;
3145 if (adapter->hw.mac.max_frame_size > 8192)
3146 pba = E1000_PBA_40K; /* 40K for Rx, 24K for Tx */
3148 pba = E1000_PBA_48K; /* 48K for Rx, 16K for Tx */
3150 E1000_WRITE_REG(&adapter->hw, E1000_PBA, pba);
3153 * These parameters control the automatic generation (Tx) and
3154 * response (Rx) to Ethernet PAUSE frames.
3155 * - High water mark should allow for at least two frames to be
3156 * received after sending an XOFF.
3157 * - Low water mark works best when it is very near the high water mark.
3158 * This allows the receiver to restart by sending XON when it has
3159 * drained a bit. Here we use an arbitary value of 1500 which will
3160 * restart after one full frame is pulled from the buffer. There
3161 * could be several smaller frames in the buffer and if so they will
3162 * not trigger the XON until their total number reduces the buffer
3164 * - The pause time is fairly large at 1000 x 512ns = 512 usec.
3166 rx_buffer_size = ((E1000_READ_REG(hw, E1000_PBA) & 0xffff) << 10 );
3167 hw->fc.high_water = rx_buffer_size -
3168 roundup2(adapter->hw.mac.max_frame_size, 1024);
3169 hw->fc.low_water = hw->fc.high_water - 1500;
3171 if (adapter->fc) /* locally set flow control value? */
3172 hw->fc.requested_mode = adapter->fc;
3174 hw->fc.requested_mode = e1000_fc_full;
3176 if (hw->mac.type == e1000_80003es2lan)
3177 hw->fc.pause_time = 0xFFFF;
3179 hw->fc.pause_time = EM_FC_PAUSE_TIME;
3181 hw->fc.send_xon = TRUE;
3183 /* Device specific overrides/settings */
3184 switch (hw->mac.type) {
3186 /* Workaround: no TX flow ctrl for PCH */
3187 hw->fc.requested_mode = e1000_fc_rx_pause;
3188 hw->fc.pause_time = 0xFFFF; /* override */
3189 if (ifp->if_mtu > ETHERMTU) {
3190 hw->fc.high_water = 0x3500;
3191 hw->fc.low_water = 0x1500;
3193 hw->fc.high_water = 0x5000;
3194 hw->fc.low_water = 0x3000;
3196 hw->fc.refresh_time = 0x1000;
3202 hw->fc.high_water = 0x5C20;
3203 hw->fc.low_water = 0x5048;
3204 hw->fc.pause_time = 0x0650;
3205 hw->fc.refresh_time = 0x0400;
3206 /* Jumbos need adjusted PBA */
3207 if (ifp->if_mtu > ETHERMTU)
3208 E1000_WRITE_REG(hw, E1000_PBA, 12);
3210 E1000_WRITE_REG(hw, E1000_PBA, 26);
3213 case e1000_ich10lan:
3214 if (ifp->if_mtu > ETHERMTU) {
3215 hw->fc.high_water = 0x2800;
3216 hw->fc.low_water = hw->fc.high_water - 8;
3219 /* else fall thru */
3221 if (hw->mac.type == e1000_80003es2lan)
3222 hw->fc.pause_time = 0xFFFF;
3226 /* I219 needs some special flushing to avoid hangs */
3227 if (hw->mac.type == e1000_pch_spt)
3228 em_flush_desc_rings(adapter);
3230 /* Issue a global reset */
3232 E1000_WRITE_REG(hw, E1000_WUC, 0);
3233 em_disable_aspm(adapter);
3235 if (e1000_init_hw(hw) < 0) {
3236 device_printf(dev, "Hardware Initialization Failed\n");
3240 E1000_WRITE_REG(hw, E1000_VET, ETHERTYPE_VLAN);
3241 e1000_get_phy_info(hw);
3242 e1000_check_for_link(hw);
3246 /*********************************************************************
3248 * Setup networking device structure and register an interface.
3250 **********************************************************************/
3252 em_setup_interface(device_t dev, struct adapter *adapter)
3256 INIT_DEBUGOUT("em_setup_interface: begin");
3258 ifp = adapter->ifp = if_alloc(IFT_ETHER);
3260 device_printf(dev, "can not allocate ifnet structure\n");
3263 if_initname(ifp, device_get_name(dev), device_get_unit(dev));
3264 ifp->if_init = em_init;
3265 ifp->if_softc = adapter;
3266 ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
3267 ifp->if_ioctl = em_ioctl;
3269 /* TSO parameters */
3270 ifp->if_hw_tsomax = IP_MAXPACKET;
3271 /* Take m_pullup(9)'s in em_xmit() w/ TSO into acount. */
3272 ifp->if_hw_tsomaxsegcount = EM_MAX_SCATTER - 5;
3273 ifp->if_hw_tsomaxsegsize = EM_TSO_SEG_SIZE;
3275 #ifdef EM_MULTIQUEUE
3276 /* Multiqueue stack interface */
3277 ifp->if_transmit = em_mq_start;
3278 ifp->if_qflush = em_qflush;
3280 ifp->if_start = em_start;
3281 IFQ_SET_MAXLEN(&ifp->if_snd, adapter->num_tx_desc - 1);
3282 ifp->if_snd.ifq_drv_maxlen = adapter->num_tx_desc - 1;
3283 IFQ_SET_READY(&ifp->if_snd);
3286 ether_ifattach(ifp, adapter->hw.mac.addr);
3288 ifp->if_capabilities = IFCAP_HWCSUM | IFCAP_VLAN_HWCSUM;
3289 ifp->if_capenable = ifp->if_capabilities;
3292 * Tell the upper layer(s) we
3293 * support full VLAN capability
3295 ifp->if_data.ifi_hdrlen = sizeof(struct ether_vlan_header);
3296 ifp->if_capabilities |= IFCAP_VLAN_HWTAGGING
3299 ifp->if_capenable |= IFCAP_VLAN_HWTAGGING
3303 * We don't enable IFCAP_{TSO4,VLAN_HWTSO} by default because:
3304 * - Although the silicon bug of TSO only working at gigabit speed is
3305 * worked around in em_update_link_status() by selectively setting
3306 * CSUM_IP_TSO, we cannot atomically flush already queued TSO-using
3307 * descriptors. Thus, such descriptors may still cause the MAC to
3308 * hang and, consequently, TSO is only safe to be used in setups
3309 * where the link isn't expected to switch from gigabit to lower
3311 * - Similarly, there's currently no way to trigger a reconfiguration
3312 * of vlan(4) when the state of IFCAP_VLAN_HWTSO support changes at
3313 * runtime. Therefore, IFCAP_VLAN_HWTSO also only is safe to use
3314 * when link speed changes are not to be expected.
3315 * - Despite all the workarounds for TSO-related silicon bugs, at
3316 * least 82579 still may hang at gigabit speed with IFCAP_TSO4.
3318 ifp->if_capabilities |= IFCAP_TSO4 | IFCAP_VLAN_HWTSO;
3321 ** Don't turn this on by default, if vlans are
3322 ** created on another pseudo device (eg. lagg)
3323 ** then vlan events are not passed thru, breaking
3324 ** operation, but with HW FILTER off it works. If
3325 ** using vlans directly on the em driver you can
3326 ** enable this and get full hardware tag filtering.
3328 ifp->if_capabilities |= IFCAP_VLAN_HWFILTER;
3330 #ifdef DEVICE_POLLING
3331 ifp->if_capabilities |= IFCAP_POLLING;
3334 /* Enable only WOL MAGIC by default */
3336 ifp->if_capabilities |= IFCAP_WOL;
3337 ifp->if_capenable |= IFCAP_WOL_MAGIC;
3341 * Specify the media types supported by this adapter and register
3342 * callbacks to update media and link information
3344 ifmedia_init(&adapter->media, IFM_IMASK,
3345 em_media_change, em_media_status);
3346 if ((adapter->hw.phy.media_type == e1000_media_type_fiber) ||
3347 (adapter->hw.phy.media_type == e1000_media_type_internal_serdes)) {
3348 u_char fiber_type = IFM_1000_SX; /* default type */
3350 ifmedia_add(&adapter->media, IFM_ETHER | fiber_type | IFM_FDX,
3352 ifmedia_add(&adapter->media, IFM_ETHER | fiber_type, 0, NULL);
3354 ifmedia_add(&adapter->media, IFM_ETHER | IFM_10_T, 0, NULL);
3355 ifmedia_add(&adapter->media, IFM_ETHER | IFM_10_T | IFM_FDX,
3357 ifmedia_add(&adapter->media, IFM_ETHER | IFM_100_TX,
3359 ifmedia_add(&adapter->media, IFM_ETHER | IFM_100_TX | IFM_FDX,
3361 if (adapter->hw.phy.type != e1000_phy_ife) {
3362 ifmedia_add(&adapter->media,
3363 IFM_ETHER | IFM_1000_T | IFM_FDX, 0, NULL);
3364 ifmedia_add(&adapter->media,
3365 IFM_ETHER | IFM_1000_T, 0, NULL);
3368 ifmedia_add(&adapter->media, IFM_ETHER | IFM_AUTO, 0, NULL);
3369 ifmedia_set(&adapter->media, IFM_ETHER | IFM_AUTO);
3375 * Manage DMA'able memory.
3378 em_dmamap_cb(void *arg, bus_dma_segment_t *segs, int nseg, int error)
3382 *(bus_addr_t *) arg = segs[0].ds_addr;
3386 em_dma_malloc(struct adapter *adapter, bus_size_t size,
3387 struct em_dma_alloc *dma, int mapflags)
3391 error = bus_dma_tag_create(bus_get_dma_tag(adapter->dev), /* parent */
3392 EM_DBA_ALIGN, 0, /* alignment, bounds */
3393 BUS_SPACE_MAXADDR, /* lowaddr */
3394 BUS_SPACE_MAXADDR, /* highaddr */
3395 NULL, NULL, /* filter, filterarg */
3398 size, /* maxsegsize */
3400 NULL, /* lockfunc */
3404 device_printf(adapter->dev,
3405 "%s: bus_dma_tag_create failed: %d\n",
3410 error = bus_dmamem_alloc(dma->dma_tag, (void**) &dma->dma_vaddr,
3411 BUS_DMA_NOWAIT | BUS_DMA_COHERENT, &dma->dma_map);
3413 device_printf(adapter->dev,
3414 "%s: bus_dmamem_alloc(%ju) failed: %d\n",
3415 __func__, (uintmax_t)size, error);
3420 error = bus_dmamap_load(dma->dma_tag, dma->dma_map, dma->dma_vaddr,
3421 size, em_dmamap_cb, &dma->dma_paddr, mapflags | BUS_DMA_NOWAIT);
3422 if (error || dma->dma_paddr == 0) {
3423 device_printf(adapter->dev,
3424 "%s: bus_dmamap_load failed: %d\n",
3432 bus_dmamap_unload(dma->dma_tag, dma->dma_map);
3434 bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
3435 bus_dma_tag_destroy(dma->dma_tag);
3437 dma->dma_tag = NULL;
3443 em_dma_free(struct adapter *adapter, struct em_dma_alloc *dma)
3445 if (dma->dma_tag == NULL)
3447 if (dma->dma_paddr != 0) {
3448 bus_dmamap_sync(dma->dma_tag, dma->dma_map,
3449 BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
3450 bus_dmamap_unload(dma->dma_tag, dma->dma_map);
3453 if (dma->dma_vaddr != NULL) {
3454 bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
3455 dma->dma_vaddr = NULL;
3457 bus_dma_tag_destroy(dma->dma_tag);
3458 dma->dma_tag = NULL;
3462 /*********************************************************************
3464 * Allocate memory for the transmit and receive rings, and then
3465 * the descriptors associated with each, called only once at attach.
3467 **********************************************************************/
3469 em_allocate_queues(struct adapter *adapter)
3471 device_t dev = adapter->dev;
3472 struct tx_ring *txr = NULL;
3473 struct rx_ring *rxr = NULL;
3474 int rsize, tsize, error = E1000_SUCCESS;
3475 int txconf = 0, rxconf = 0;
3478 /* Allocate the TX ring struct memory */
3479 if (!(adapter->tx_rings =
3480 (struct tx_ring *) malloc(sizeof(struct tx_ring) *
3481 adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
3482 device_printf(dev, "Unable to allocate TX ring memory\n");
3487 /* Now allocate the RX */
3488 if (!(adapter->rx_rings =
3489 (struct rx_ring *) malloc(sizeof(struct rx_ring) *
3490 adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
3491 device_printf(dev, "Unable to allocate RX ring memory\n");
3496 tsize = roundup2(adapter->num_tx_desc *
3497 sizeof(struct e1000_tx_desc), EM_DBA_ALIGN);
3499 * Now set up the TX queues, txconf is needed to handle the
3500 * possibility that things fail midcourse and we need to
3501 * undo memory gracefully
3503 for (int i = 0; i < adapter->num_queues; i++, txconf++) {
3504 /* Set up some basics */
3505 txr = &adapter->tx_rings[i];
3506 txr->adapter = adapter;
3509 /* Initialize the TX lock */
3510 snprintf(txr->mtx_name, sizeof(txr->mtx_name), "%s:tx(%d)",
3511 device_get_nameunit(dev), txr->me);
3512 mtx_init(&txr->tx_mtx, txr->mtx_name, NULL, MTX_DEF);
3514 if (em_dma_malloc(adapter, tsize,
3515 &txr->txdma, BUS_DMA_NOWAIT)) {
3517 "Unable to allocate TX Descriptor memory\n");
3521 txr->tx_base = (struct e1000_tx_desc *)txr->txdma.dma_vaddr;
3522 bzero((void *)txr->tx_base, tsize);
3524 if (em_allocate_transmit_buffers(txr)) {
3526 "Critical Failure setting up transmit buffers\n");
3530 #if __FreeBSD_version >= 800000
3531 /* Allocate a buf ring */
3532 txr->br = buf_ring_alloc(4096, M_DEVBUF,
3533 M_WAITOK, &txr->tx_mtx);
3538 * Next the RX queues...
3540 rsize = roundup2(adapter->num_rx_desc *
3541 sizeof(union e1000_rx_desc_extended), EM_DBA_ALIGN);
3542 for (int i = 0; i < adapter->num_queues; i++, rxconf++) {
3543 rxr = &adapter->rx_rings[i];
3544 rxr->adapter = adapter;
3547 /* Initialize the RX lock */
3548 snprintf(rxr->mtx_name, sizeof(rxr->mtx_name), "%s:rx(%d)",
3549 device_get_nameunit(dev), txr->me);
3550 mtx_init(&rxr->rx_mtx, rxr->mtx_name, NULL, MTX_DEF);
3552 if (em_dma_malloc(adapter, rsize,
3553 &rxr->rxdma, BUS_DMA_NOWAIT)) {
3555 "Unable to allocate RxDescriptor memory\n");
3559 rxr->rx_base = (union e1000_rx_desc_extended *)rxr->rxdma.dma_vaddr;
3560 bzero((void *)rxr->rx_base, rsize);
3562 /* Allocate receive buffers for the ring*/
3563 if (em_allocate_receive_buffers(rxr)) {
3565 "Critical Failure setting up receive buffers\n");
3574 for (rxr = adapter->rx_rings; rxconf > 0; rxr++, rxconf--)
3575 em_dma_free(adapter, &rxr->rxdma);
3577 for (txr = adapter->tx_rings; txconf > 0; txr++, txconf--)
3578 em_dma_free(adapter, &txr->txdma);
3579 free(adapter->rx_rings, M_DEVBUF);
3581 #if __FreeBSD_version >= 800000
3582 buf_ring_free(txr->br, M_DEVBUF);
3584 free(adapter->tx_rings, M_DEVBUF);
3590 /*********************************************************************
3592 * Allocate memory for tx_buffer structures. The tx_buffer stores all
3593 * the information needed to transmit a packet on the wire. This is
3594 * called only once at attach, setup is done every reset.
3596 **********************************************************************/
3598 em_allocate_transmit_buffers(struct tx_ring *txr)
3600 struct adapter *adapter = txr->adapter;
3601 device_t dev = adapter->dev;
3602 struct em_txbuffer *txbuf;
3606 * Setup DMA descriptor areas.
3608 if ((error = bus_dma_tag_create(bus_get_dma_tag(dev),
3609 1, 0, /* alignment, bounds */
3610 BUS_SPACE_MAXADDR, /* lowaddr */
3611 BUS_SPACE_MAXADDR, /* highaddr */
3612 NULL, NULL, /* filter, filterarg */
3613 EM_TSO_SIZE, /* maxsize */
3614 EM_MAX_SCATTER, /* nsegments */
3615 PAGE_SIZE, /* maxsegsize */
3617 NULL, /* lockfunc */
3618 NULL, /* lockfuncarg */
3620 device_printf(dev,"Unable to allocate TX DMA tag\n");
3624 if (!(txr->tx_buffers =
3625 (struct em_txbuffer *) malloc(sizeof(struct em_txbuffer) *
3626 adapter->num_tx_desc, M_DEVBUF, M_NOWAIT | M_ZERO))) {
3627 device_printf(dev, "Unable to allocate tx_buffer memory\n");
3632 /* Create the descriptor buffer dma maps */
3633 txbuf = txr->tx_buffers;
3634 for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) {
3635 error = bus_dmamap_create(txr->txtag, 0, &txbuf->map);
3637 device_printf(dev, "Unable to create TX DMA map\n");
3644 /* We free all, it handles case where we are in the middle */
3645 em_free_transmit_structures(adapter);
3649 /*********************************************************************
3651 * Initialize a transmit ring.
3653 **********************************************************************/
3655 em_setup_transmit_ring(struct tx_ring *txr)
3657 struct adapter *adapter = txr->adapter;
3658 struct em_txbuffer *txbuf;
3661 struct netmap_adapter *na = NA(adapter->ifp);
3662 struct netmap_slot *slot;
3663 #endif /* DEV_NETMAP */
3665 /* Clear the old descriptor contents */
3668 slot = netmap_reset(na, NR_TX, txr->me, 0);
3669 #endif /* DEV_NETMAP */
3671 bzero((void *)txr->tx_base,
3672 (sizeof(struct e1000_tx_desc)) * adapter->num_tx_desc);
3674 txr->next_avail_desc = 0;
3675 txr->next_to_clean = 0;
3677 /* Free any existing tx buffers. */
3678 txbuf = txr->tx_buffers;
3679 for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) {
3680 if (txbuf->m_head != NULL) {
3681 bus_dmamap_sync(txr->txtag, txbuf->map,
3682 BUS_DMASYNC_POSTWRITE);
3683 bus_dmamap_unload(txr->txtag, txbuf->map);
3684 m_freem(txbuf->m_head);
3685 txbuf->m_head = NULL;
3689 int si = netmap_idx_n2k(&na->tx_rings[txr->me], i);
3693 addr = PNMB(na, slot + si, &paddr);
3694 txr->tx_base[i].buffer_addr = htole64(paddr);
3695 /* reload the map for netmap mode */
3696 netmap_load_map(na, txr->txtag, txbuf->map, addr);
3698 #endif /* DEV_NETMAP */
3700 /* clear the watch index */
3701 txbuf->next_eop = -1;
3704 /* Set number of descriptors available */
3705 txr->tx_avail = adapter->num_tx_desc;
3706 txr->busy = EM_TX_IDLE;
3708 /* Clear checksum offload context. */
3709 txr->last_hw_offload = 0;
3710 txr->last_hw_ipcss = 0;
3711 txr->last_hw_ipcso = 0;
3712 txr->last_hw_tucss = 0;
3713 txr->last_hw_tucso = 0;
3715 bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3716 BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
3720 /*********************************************************************
3722 * Initialize all transmit rings.
3724 **********************************************************************/
3726 em_setup_transmit_structures(struct adapter *adapter)
3728 struct tx_ring *txr = adapter->tx_rings;
3730 for (int i = 0; i < adapter->num_queues; i++, txr++)
3731 em_setup_transmit_ring(txr);
3736 /*********************************************************************
3738 * Enable transmit unit.
3740 **********************************************************************/
3742 em_initialize_transmit_unit(struct adapter *adapter)
3744 struct tx_ring *txr = adapter->tx_rings;
3745 struct e1000_hw *hw = &adapter->hw;
3746 u32 tctl, txdctl = 0, tarc, tipg = 0;
3748 INIT_DEBUGOUT("em_initialize_transmit_unit: begin");
3750 for (int i = 0; i < adapter->num_queues; i++, txr++) {
3751 u64 bus_addr = txr->txdma.dma_paddr;
3752 /* Base and Len of TX Ring */
3753 E1000_WRITE_REG(hw, E1000_TDLEN(i),
3754 adapter->num_tx_desc * sizeof(struct e1000_tx_desc));
3755 E1000_WRITE_REG(hw, E1000_TDBAH(i),
3756 (u32)(bus_addr >> 32));
3757 E1000_WRITE_REG(hw, E1000_TDBAL(i),
3759 /* Init the HEAD/TAIL indices */
3760 E1000_WRITE_REG(hw, E1000_TDT(i), 0);
3761 E1000_WRITE_REG(hw, E1000_TDH(i), 0);
3763 HW_DEBUGOUT2("Base = %x, Length = %x\n",
3764 E1000_READ_REG(&adapter->hw, E1000_TDBAL(i)),
3765 E1000_READ_REG(&adapter->hw, E1000_TDLEN(i)));
3767 txr->busy = EM_TX_IDLE;
3768 txdctl = 0; /* clear txdctl */
3769 txdctl |= 0x1f; /* PTHRESH */
3770 txdctl |= 1 << 8; /* HTHRESH */
3771 txdctl |= 1 << 16;/* WTHRESH */
3772 txdctl |= 1 << 22; /* Reserved bit 22 must always be 1 */
3773 txdctl |= E1000_TXDCTL_GRAN;
3774 txdctl |= 1 << 25; /* LWTHRESH */
3776 E1000_WRITE_REG(hw, E1000_TXDCTL(i), txdctl);
3779 /* Set the default values for the Tx Inter Packet Gap timer */
3780 switch (adapter->hw.mac.type) {
3781 case e1000_80003es2lan:
3782 tipg = DEFAULT_82543_TIPG_IPGR1;
3783 tipg |= DEFAULT_80003ES2LAN_TIPG_IPGR2 <<
3784 E1000_TIPG_IPGR2_SHIFT;
3787 if ((adapter->hw.phy.media_type == e1000_media_type_fiber) ||
3788 (adapter->hw.phy.media_type ==
3789 e1000_media_type_internal_serdes))
3790 tipg = DEFAULT_82543_TIPG_IPGT_FIBER;
3792 tipg = DEFAULT_82543_TIPG_IPGT_COPPER;
3793 tipg |= DEFAULT_82543_TIPG_IPGR1 << E1000_TIPG_IPGR1_SHIFT;
3794 tipg |= DEFAULT_82543_TIPG_IPGR2 << E1000_TIPG_IPGR2_SHIFT;
3797 E1000_WRITE_REG(&adapter->hw, E1000_TIPG, tipg);
3798 E1000_WRITE_REG(&adapter->hw, E1000_TIDV, adapter->tx_int_delay.value);
3800 if(adapter->hw.mac.type >= e1000_82540)
3801 E1000_WRITE_REG(&adapter->hw, E1000_TADV,
3802 adapter->tx_abs_int_delay.value);
3804 if ((adapter->hw.mac.type == e1000_82571) ||
3805 (adapter->hw.mac.type == e1000_82572)) {
3806 tarc = E1000_READ_REG(&adapter->hw, E1000_TARC(0));
3807 tarc |= TARC_SPEED_MODE_BIT;
3808 E1000_WRITE_REG(&adapter->hw, E1000_TARC(0), tarc);
3809 } else if (adapter->hw.mac.type == e1000_80003es2lan) {
3810 /* errata: program both queues to unweighted RR */
3811 tarc = E1000_READ_REG(&adapter->hw, E1000_TARC(0));
3813 E1000_WRITE_REG(&adapter->hw, E1000_TARC(0), tarc);
3814 tarc = E1000_READ_REG(&adapter->hw, E1000_TARC(1));
3816 E1000_WRITE_REG(&adapter->hw, E1000_TARC(1), tarc);
3817 } else if (adapter->hw.mac.type == e1000_82574) {
3818 tarc = E1000_READ_REG(&adapter->hw, E1000_TARC(0));
3819 tarc |= TARC_ERRATA_BIT;
3820 if ( adapter->num_queues > 1) {
3821 tarc |= (TARC_COMPENSATION_MODE | TARC_MQ_FIX);
3822 E1000_WRITE_REG(&adapter->hw, E1000_TARC(0), tarc);
3823 E1000_WRITE_REG(&adapter->hw, E1000_TARC(1), tarc);
3825 E1000_WRITE_REG(&adapter->hw, E1000_TARC(0), tarc);
3828 adapter->txd_cmd = E1000_TXD_CMD_IFCS;
3829 if (adapter->tx_int_delay.value > 0)
3830 adapter->txd_cmd |= E1000_TXD_CMD_IDE;
3832 /* Program the Transmit Control Register */
3833 tctl = E1000_READ_REG(&adapter->hw, E1000_TCTL);
3834 tctl &= ~E1000_TCTL_CT;
3835 tctl |= (E1000_TCTL_PSP | E1000_TCTL_RTLC | E1000_TCTL_EN |
3836 (E1000_COLLISION_THRESHOLD << E1000_CT_SHIFT));
3838 if (adapter->hw.mac.type >= e1000_82571)
3839 tctl |= E1000_TCTL_MULR;
3841 /* This write will effectively turn on the transmit unit. */
3842 E1000_WRITE_REG(&adapter->hw, E1000_TCTL, tctl);
3844 /* SPT and KBL errata workarounds */
3845 if (hw->mac.type == e1000_pch_spt) {
3847 reg = E1000_READ_REG(hw, E1000_IOSFPC);
3848 reg |= E1000_RCTL_RDMTS_HEX;
3849 E1000_WRITE_REG(hw, E1000_IOSFPC, reg);
3850 /* i218-i219 Specification Update 1.5.4.5 */
3851 reg = E1000_READ_REG(hw, E1000_TARC(0));
3852 reg &= ~E1000_TARC0_CB_MULTIQ_3_REQ;
3853 reg |= E1000_TARC0_CB_MULTIQ_2_REQ;
3854 E1000_WRITE_REG(hw, E1000_TARC(0), reg);
3859 /*********************************************************************
3861 * Free all transmit rings.
3863 **********************************************************************/
3865 em_free_transmit_structures(struct adapter *adapter)
3867 struct tx_ring *txr = adapter->tx_rings;
3869 for (int i = 0; i < adapter->num_queues; i++, txr++) {
3871 em_free_transmit_buffers(txr);
3872 em_dma_free(adapter, &txr->txdma);
3874 EM_TX_LOCK_DESTROY(txr);
3877 free(adapter->tx_rings, M_DEVBUF);
3880 /*********************************************************************
3882 * Free transmit ring related data structures.
3884 **********************************************************************/
3886 em_free_transmit_buffers(struct tx_ring *txr)
3888 struct adapter *adapter = txr->adapter;
3889 struct em_txbuffer *txbuf;
3891 INIT_DEBUGOUT("free_transmit_ring: begin");
3893 if (txr->tx_buffers == NULL)
3896 for (int i = 0; i < adapter->num_tx_desc; i++) {
3897 txbuf = &txr->tx_buffers[i];
3898 if (txbuf->m_head != NULL) {
3899 bus_dmamap_sync(txr->txtag, txbuf->map,
3900 BUS_DMASYNC_POSTWRITE);
3901 bus_dmamap_unload(txr->txtag,
3903 m_freem(txbuf->m_head);
3904 txbuf->m_head = NULL;
3905 if (txbuf->map != NULL) {
3906 bus_dmamap_destroy(txr->txtag,
3910 } else if (txbuf->map != NULL) {
3911 bus_dmamap_unload(txr->txtag,
3913 bus_dmamap_destroy(txr->txtag,
3918 #if __FreeBSD_version >= 800000
3919 if (txr->br != NULL)
3920 buf_ring_free(txr->br, M_DEVBUF);
3922 if (txr->tx_buffers != NULL) {
3923 free(txr->tx_buffers, M_DEVBUF);
3924 txr->tx_buffers = NULL;
3926 if (txr->txtag != NULL) {
3927 bus_dma_tag_destroy(txr->txtag);
3934 /*********************************************************************
3935 * The offload context is protocol specific (TCP/UDP) and thus
3936 * only needs to be set when the protocol changes. The occasion
3937 * of a context change can be a performance detriment, and
3938 * might be better just disabled. The reason arises in the way
3939 * in which the controller supports pipelined requests from the
3940 * Tx data DMA. Up to four requests can be pipelined, and they may
3941 * belong to the same packet or to multiple packets. However all
3942 * requests for one packet are issued before a request is issued
3943 * for a subsequent packet and if a request for the next packet
3944 * requires a context change, that request will be stalled
3945 * until the previous request completes. This means setting up
3946 * a new context effectively disables pipelined Tx data DMA which
3947 * in turn greatly slow down performance to send small sized
3949 **********************************************************************/
3951 em_transmit_checksum_setup(struct tx_ring *txr, struct mbuf *mp, int ip_off,
3952 struct ip *ip, u32 *txd_upper, u32 *txd_lower)
3954 struct adapter *adapter = txr->adapter;
3955 struct e1000_context_desc *TXD = NULL;
3956 struct em_txbuffer *tx_buffer;
3960 u8 ipcso, ipcss, tucso, tucss;
3962 ipcss = ipcso = tucss = tucso = 0;
3963 hdr_len = ip_off + (ip->ip_hl << 2);
3964 cur = txr->next_avail_desc;
3966 /* Setup of IP header checksum. */
3967 if (mp->m_pkthdr.csum_flags & CSUM_IP) {
3968 *txd_upper |= E1000_TXD_POPTS_IXSM << 8;
3971 ipcso = ip_off + offsetof(struct ip, ip_sum);
3973 * Start offset for header checksum calculation.
3974 * End offset for header checksum calculation.
3975 * Offset of place to put the checksum.
3977 TXD = (struct e1000_context_desc *)&txr->tx_base[cur];
3978 TXD->lower_setup.ip_fields.ipcss = ipcss;
3979 TXD->lower_setup.ip_fields.ipcse = htole16(hdr_len);
3980 TXD->lower_setup.ip_fields.ipcso = ipcso;
3981 cmd |= E1000_TXD_CMD_IP;
3984 if (mp->m_pkthdr.csum_flags & CSUM_TCP) {
3985 *txd_lower = E1000_TXD_CMD_DEXT | E1000_TXD_DTYP_D;
3986 *txd_upper |= E1000_TXD_POPTS_TXSM << 8;
3987 offload |= CSUM_TCP;
3989 tucso = hdr_len + offsetof(struct tcphdr, th_sum);
3991 * The 82574L can only remember the *last* context used
3992 * regardless of queue that it was use for. We cannot reuse
3993 * contexts on this hardware platform and must generate a new
3994 * context every time. 82574L hardware spec, section 7.2.6,
3997 if (adapter->num_queues < 2) {
3999 * Setting up new checksum offload context for every
4000 * frames takes a lot of processing time for hardware.
4001 * This also reduces performance a lot for small sized
4002 * frames so avoid it if driver can use previously
4003 * configured checksum offload context.
4005 if (txr->last_hw_offload == offload) {
4006 if (offload & CSUM_IP) {
4007 if (txr->last_hw_ipcss == ipcss &&
4008 txr->last_hw_ipcso == ipcso &&
4009 txr->last_hw_tucss == tucss &&
4010 txr->last_hw_tucso == tucso)
4013 if (txr->last_hw_tucss == tucss &&
4014 txr->last_hw_tucso == tucso)
4018 txr->last_hw_offload = offload;
4019 txr->last_hw_tucss = tucss;
4020 txr->last_hw_tucso = tucso;
4023 * Start offset for payload checksum calculation.
4024 * End offset for payload checksum calculation.
4025 * Offset of place to put the checksum.
4027 TXD = (struct e1000_context_desc *)&txr->tx_base[cur];
4028 TXD->upper_setup.tcp_fields.tucss = hdr_len;
4029 TXD->upper_setup.tcp_fields.tucse = htole16(0);
4030 TXD->upper_setup.tcp_fields.tucso = tucso;
4031 cmd |= E1000_TXD_CMD_TCP;
4032 } else if (mp->m_pkthdr.csum_flags & CSUM_UDP) {
4033 *txd_lower = E1000_TXD_CMD_DEXT | E1000_TXD_DTYP_D;
4034 *txd_upper |= E1000_TXD_POPTS_TXSM << 8;
4036 tucso = hdr_len + offsetof(struct udphdr, uh_sum);
4038 * The 82574L can only remember the *last* context used
4039 * regardless of queue that it was use for. We cannot reuse
4040 * contexts on this hardware platform and must generate a new
4041 * context every time. 82574L hardware spec, section 7.2.6,
4044 if (adapter->num_queues < 2) {
4046 * Setting up new checksum offload context for every
4047 * frames takes a lot of processing time for hardware.
4048 * This also reduces performance a lot for small sized
4049 * frames so avoid it if driver can use previously
4050 * configured checksum offload context.
4052 if (txr->last_hw_offload == offload) {
4053 if (offload & CSUM_IP) {
4054 if (txr->last_hw_ipcss == ipcss &&
4055 txr->last_hw_ipcso == ipcso &&
4056 txr->last_hw_tucss == tucss &&
4057 txr->last_hw_tucso == tucso)
4060 if (txr->last_hw_tucss == tucss &&
4061 txr->last_hw_tucso == tucso)
4065 txr->last_hw_offload = offload;
4066 txr->last_hw_tucss = tucss;
4067 txr->last_hw_tucso = tucso;
4070 * Start offset for header checksum calculation.
4071 * End offset for header checksum calculation.
4072 * Offset of place to put the checksum.
4074 TXD = (struct e1000_context_desc *)&txr->tx_base[cur];
4075 TXD->upper_setup.tcp_fields.tucss = tucss;
4076 TXD->upper_setup.tcp_fields.tucse = htole16(0);
4077 TXD->upper_setup.tcp_fields.tucso = tucso;
4080 if (offload & CSUM_IP) {
4081 txr->last_hw_ipcss = ipcss;
4082 txr->last_hw_ipcso = ipcso;
4085 TXD->tcp_seg_setup.data = htole32(0);
4086 TXD->cmd_and_length =
4087 htole32(adapter->txd_cmd | E1000_TXD_CMD_DEXT | cmd);
4088 tx_buffer = &txr->tx_buffers[cur];
4089 tx_buffer->m_head = NULL;
4090 tx_buffer->next_eop = -1;
4092 if (++cur == adapter->num_tx_desc)
4096 txr->next_avail_desc = cur;
4100 /**********************************************************************
4102 * Setup work for hardware segmentation offload (TSO)
4104 **********************************************************************/
4106 em_tso_setup(struct tx_ring *txr, struct mbuf *mp, int ip_off,
4107 struct ip *ip, struct tcphdr *tp, u32 *txd_upper, u32 *txd_lower)
4109 struct adapter *adapter = txr->adapter;
4110 struct e1000_context_desc *TXD;
4111 struct em_txbuffer *tx_buffer;
4115 * In theory we can use the same TSO context if and only if
4116 * frame is the same type(IP/TCP) and the same MSS. However
4117 * checking whether a frame has the same IP/TCP structure is
4118 * hard thing so just ignore that and always restablish a
4121 hdr_len = ip_off + (ip->ip_hl << 2) + (tp->th_off << 2);
4122 *txd_lower = (E1000_TXD_CMD_DEXT | /* Extended descr type */
4123 E1000_TXD_DTYP_D | /* Data descr type */
4124 E1000_TXD_CMD_TSE); /* Do TSE on this packet */
4126 /* IP and/or TCP header checksum calculation and insertion. */
4127 *txd_upper = (E1000_TXD_POPTS_IXSM | E1000_TXD_POPTS_TXSM) << 8;
4129 cur = txr->next_avail_desc;
4130 tx_buffer = &txr->tx_buffers[cur];
4131 TXD = (struct e1000_context_desc *) &txr->tx_base[cur];
4134 * Start offset for header checksum calculation.
4135 * End offset for header checksum calculation.
4136 * Offset of place put the checksum.
4138 TXD->lower_setup.ip_fields.ipcss = ip_off;
4139 TXD->lower_setup.ip_fields.ipcse =
4140 htole16(ip_off + (ip->ip_hl << 2) - 1);
4141 TXD->lower_setup.ip_fields.ipcso = ip_off + offsetof(struct ip, ip_sum);
4143 * Start offset for payload checksum calculation.
4144 * End offset for payload checksum calculation.
4145 * Offset of place to put the checksum.
4147 TXD->upper_setup.tcp_fields.tucss = ip_off + (ip->ip_hl << 2);
4148 TXD->upper_setup.tcp_fields.tucse = 0;
4149 TXD->upper_setup.tcp_fields.tucso =
4150 ip_off + (ip->ip_hl << 2) + offsetof(struct tcphdr, th_sum);
4152 * Payload size per packet w/o any headers.
4153 * Length of all headers up to payload.
4155 TXD->tcp_seg_setup.fields.mss = htole16(mp->m_pkthdr.tso_segsz);
4156 TXD->tcp_seg_setup.fields.hdr_len = hdr_len;
4158 TXD->cmd_and_length = htole32(adapter->txd_cmd |
4159 E1000_TXD_CMD_DEXT | /* Extended descr */
4160 E1000_TXD_CMD_TSE | /* TSE context */
4161 E1000_TXD_CMD_IP | /* Do IP csum */
4162 E1000_TXD_CMD_TCP | /* Do TCP checksum */
4163 (mp->m_pkthdr.len - (hdr_len))); /* Total len */
4165 tx_buffer->m_head = NULL;
4166 tx_buffer->next_eop = -1;
4168 if (++cur == adapter->num_tx_desc)
4172 txr->next_avail_desc = cur;
4177 /**********************************************************************
4179 * Examine each tx_buffer in the used queue. If the hardware is done
4180 * processing the packet then free associated resources. The
4181 * tx_buffer is put back on the free queue.
4183 **********************************************************************/
4185 em_txeof(struct tx_ring *txr)
4187 struct adapter *adapter = txr->adapter;
4188 int first, last, done, processed;
4189 struct em_txbuffer *tx_buffer;
4190 struct e1000_tx_desc *tx_desc, *eop_desc;
4191 struct ifnet *ifp = adapter->ifp;
4193 EM_TX_LOCK_ASSERT(txr);
4195 if (netmap_tx_irq(ifp, txr->me))
4197 #endif /* DEV_NETMAP */
4199 /* No work, make sure hang detection is disabled */
4200 if (txr->tx_avail == adapter->num_tx_desc) {
4201 txr->busy = EM_TX_IDLE;
4206 first = txr->next_to_clean;
4207 tx_desc = &txr->tx_base[first];
4208 tx_buffer = &txr->tx_buffers[first];
4209 last = tx_buffer->next_eop;
4210 eop_desc = &txr->tx_base[last];
4213 * What this does is get the index of the
4214 * first descriptor AFTER the EOP of the
4215 * first packet, that way we can do the
4216 * simple comparison on the inner while loop.
4218 if (++last == adapter->num_tx_desc)
4222 bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
4223 BUS_DMASYNC_POSTREAD);
4225 while (eop_desc->upper.fields.status & E1000_TXD_STAT_DD) {
4226 /* We clean the range of the packet */
4227 while (first != done) {
4228 tx_desc->upper.data = 0;
4229 tx_desc->lower.data = 0;
4230 tx_desc->buffer_addr = 0;
4234 if (tx_buffer->m_head) {
4235 bus_dmamap_sync(txr->txtag,
4237 BUS_DMASYNC_POSTWRITE);
4238 bus_dmamap_unload(txr->txtag,
4240 m_freem(tx_buffer->m_head);
4241 tx_buffer->m_head = NULL;
4243 tx_buffer->next_eop = -1;
4245 if (++first == adapter->num_tx_desc)
4248 tx_buffer = &txr->tx_buffers[first];
4249 tx_desc = &txr->tx_base[first];
4252 /* See if we can continue to the next packet */
4253 last = tx_buffer->next_eop;
4255 eop_desc = &txr->tx_base[last];
4256 /* Get new done point */
4257 if (++last == adapter->num_tx_desc) last = 0;
4262 bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
4263 BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
4265 txr->next_to_clean = first;
4268 ** Hang detection: we know there's work outstanding
4269 ** or the entry return would have been taken, so no
4270 ** descriptor processed here indicates a potential hang.
4271 ** The local timer will examine this and do a reset if needed.
4273 if (processed == 0) {
4274 if (txr->busy != EM_TX_HUNG)
4276 } else /* At least one descriptor was cleaned */
4277 txr->busy = EM_TX_BUSY; /* note this clears HUNG */
4280 * If we have a minimum free, clear IFF_DRV_OACTIVE
4281 * to tell the stack that it is OK to send packets.
4282 * Notice that all writes of OACTIVE happen under the
4283 * TX lock which, with a single queue, guarantees
4286 if (txr->tx_avail >= EM_MAX_SCATTER) {
4287 ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
4290 /* Disable hang detection if all clean */
4291 if (txr->tx_avail == adapter->num_tx_desc)
4292 txr->busy = EM_TX_IDLE;
4295 /*********************************************************************
4297 * Refresh RX descriptor mbufs from system mbuf buffer pool.
4299 **********************************************************************/
4301 em_refresh_mbufs(struct rx_ring *rxr, int limit)
4303 struct adapter *adapter = rxr->adapter;
4305 bus_dma_segment_t segs;
4306 struct em_rxbuffer *rxbuf;
4307 int i, j, error, nsegs;
4308 bool cleaned = FALSE;
4310 i = j = rxr->next_to_refresh;
4312 ** Get one descriptor beyond
4313 ** our work mark to control
4316 if (++j == adapter->num_rx_desc)
4319 while (j != limit) {
4320 rxbuf = &rxr->rx_buffers[i];
4321 if (rxbuf->m_head == NULL) {
4322 m = m_getjcl(M_NOWAIT, MT_DATA,
4323 M_PKTHDR, adapter->rx_mbuf_sz);
4325 ** If we have a temporary resource shortage
4326 ** that causes a failure, just abort refresh
4327 ** for now, we will return to this point when
4328 ** reinvoked from em_rxeof.
4335 m->m_len = m->m_pkthdr.len = adapter->rx_mbuf_sz;
4336 m->m_flags |= M_PKTHDR;
4337 m->m_data = m->m_ext.ext_buf;
4339 /* Use bus_dma machinery to setup the memory mapping */
4340 error = bus_dmamap_load_mbuf_sg(rxr->rxtag, rxbuf->map,
4341 m, &segs, &nsegs, BUS_DMA_NOWAIT);
4343 printf("Refresh mbufs: hdr dmamap load"
4344 " failure - %d\n", error);
4346 rxbuf->m_head = NULL;
4350 rxbuf->paddr = segs.ds_addr;
4351 bus_dmamap_sync(rxr->rxtag,
4352 rxbuf->map, BUS_DMASYNC_PREREAD);
4353 em_setup_rxdesc(&rxr->rx_base[i], rxbuf);
4356 i = j; /* Next is precalulated for us */
4357 rxr->next_to_refresh = i;
4358 /* Calculate next controlling index */
4359 if (++j == adapter->num_rx_desc)
4364 ** Update the tail pointer only if,
4365 ** and as far as we have refreshed.
4368 E1000_WRITE_REG(&adapter->hw,
4369 E1000_RDT(rxr->me), rxr->next_to_refresh);
4375 /*********************************************************************
4377 * Allocate memory for rx_buffer structures. Since we use one
4378 * rx_buffer per received packet, the maximum number of rx_buffer's
4379 * that we'll need is equal to the number of receive descriptors
4380 * that we've allocated.
4382 **********************************************************************/
4384 em_allocate_receive_buffers(struct rx_ring *rxr)
4386 struct adapter *adapter = rxr->adapter;
4387 device_t dev = adapter->dev;
4388 struct em_rxbuffer *rxbuf;
4391 rxr->rx_buffers = malloc(sizeof(struct em_rxbuffer) *
4392 adapter->num_rx_desc, M_DEVBUF, M_NOWAIT | M_ZERO);
4393 if (rxr->rx_buffers == NULL) {
4394 device_printf(dev, "Unable to allocate rx_buffer memory\n");
4398 error = bus_dma_tag_create(bus_get_dma_tag(dev), /* parent */
4399 1, 0, /* alignment, bounds */
4400 BUS_SPACE_MAXADDR, /* lowaddr */
4401 BUS_SPACE_MAXADDR, /* highaddr */
4402 NULL, NULL, /* filter, filterarg */
4403 MJUM9BYTES, /* maxsize */
4405 MJUM9BYTES, /* maxsegsize */
4407 NULL, /* lockfunc */
4411 device_printf(dev, "%s: bus_dma_tag_create failed %d\n",
4416 rxbuf = rxr->rx_buffers;
4417 for (int i = 0; i < adapter->num_rx_desc; i++, rxbuf++) {
4418 rxbuf = &rxr->rx_buffers[i];
4419 error = bus_dmamap_create(rxr->rxtag, 0, &rxbuf->map);
4421 device_printf(dev, "%s: bus_dmamap_create failed: %d\n",
4430 em_free_receive_structures(adapter);
4435 /*********************************************************************
4437 * Initialize a receive ring and its buffers.
4439 **********************************************************************/
4441 em_setup_receive_ring(struct rx_ring *rxr)
4443 struct adapter *adapter = rxr->adapter;
4444 struct em_rxbuffer *rxbuf;
4445 bus_dma_segment_t seg[1];
4446 int rsize, nsegs, error = 0;
4448 struct netmap_adapter *na = NA(adapter->ifp);
4449 struct netmap_slot *slot;
4453 /* Clear the ring contents */
4455 rsize = roundup2(adapter->num_rx_desc *
4456 sizeof(union e1000_rx_desc_extended), EM_DBA_ALIGN);
4457 bzero((void *)rxr->rx_base, rsize);
4459 slot = netmap_reset(na, NR_RX, 0, 0);
4463 ** Free current RX buffer structs and their mbufs
4465 for (int i = 0; i < adapter->num_rx_desc; i++) {
4466 rxbuf = &rxr->rx_buffers[i];
4467 if (rxbuf->m_head != NULL) {
4468 bus_dmamap_sync(rxr->rxtag, rxbuf->map,
4469 BUS_DMASYNC_POSTREAD);
4470 bus_dmamap_unload(rxr->rxtag, rxbuf->map);
4471 m_freem(rxbuf->m_head);
4472 rxbuf->m_head = NULL; /* mark as freed */
4476 /* Now replenish the mbufs */
4477 for (int j = 0; j != adapter->num_rx_desc; ++j) {
4478 rxbuf = &rxr->rx_buffers[j];
4481 int si = netmap_idx_n2k(&na->rx_rings[rxr->me], j);
4485 addr = PNMB(na, slot + si, &paddr);
4486 netmap_load_map(na, rxr->rxtag, rxbuf->map, addr);
4487 rxbuf->paddr = paddr;
4488 em_setup_rxdesc(&rxr->rx_base[j], rxbuf);
4491 #endif /* DEV_NETMAP */
4492 rxbuf->m_head = m_getjcl(M_NOWAIT, MT_DATA,
4493 M_PKTHDR, adapter->rx_mbuf_sz);
4494 if (rxbuf->m_head == NULL) {
4498 rxbuf->m_head->m_len = adapter->rx_mbuf_sz;
4499 rxbuf->m_head->m_flags &= ~M_HASFCS; /* we strip it */
4500 rxbuf->m_head->m_pkthdr.len = adapter->rx_mbuf_sz;
4502 /* Get the memory mapping */
4503 error = bus_dmamap_load_mbuf_sg(rxr->rxtag,
4504 rxbuf->map, rxbuf->m_head, seg,
4505 &nsegs, BUS_DMA_NOWAIT);
4507 m_freem(rxbuf->m_head);
4508 rxbuf->m_head = NULL;
4511 bus_dmamap_sync(rxr->rxtag,
4512 rxbuf->map, BUS_DMASYNC_PREREAD);
4514 rxbuf->paddr = seg[0].ds_addr;
4515 em_setup_rxdesc(&rxr->rx_base[j], rxbuf);
4517 rxr->next_to_check = 0;
4518 rxr->next_to_refresh = 0;
4519 bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
4520 BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
4527 /*********************************************************************
4529 * Initialize all receive rings.
4531 **********************************************************************/
4533 em_setup_receive_structures(struct adapter *adapter)
4535 struct rx_ring *rxr = adapter->rx_rings;
4538 for (q = 0; q < adapter->num_queues; q++, rxr++)
4539 if (em_setup_receive_ring(rxr))
4545 * Free RX buffers allocated so far, we will only handle
4546 * the rings that completed, the failing case will have
4547 * cleaned up for itself. 'q' failed, so its the terminus.
4549 for (int i = 0; i < q; ++i) {
4550 rxr = &adapter->rx_rings[i];
4551 for (int n = 0; n < adapter->num_rx_desc; n++) {
4552 struct em_rxbuffer *rxbuf;
4553 rxbuf = &rxr->rx_buffers[n];
4554 if (rxbuf->m_head != NULL) {
4555 bus_dmamap_sync(rxr->rxtag, rxbuf->map,
4556 BUS_DMASYNC_POSTREAD);
4557 bus_dmamap_unload(rxr->rxtag, rxbuf->map);
4558 m_freem(rxbuf->m_head);
4559 rxbuf->m_head = NULL;
4562 rxr->next_to_check = 0;
4563 rxr->next_to_refresh = 0;
4569 /*********************************************************************
4571 * Free all receive rings.
4573 **********************************************************************/
4575 em_free_receive_structures(struct adapter *adapter)
4577 struct rx_ring *rxr = adapter->rx_rings;
4579 for (int i = 0; i < adapter->num_queues; i++, rxr++) {
4580 em_free_receive_buffers(rxr);
4581 /* Free the ring memory as well */
4582 em_dma_free(adapter, &rxr->rxdma);
4583 EM_RX_LOCK_DESTROY(rxr);
4586 free(adapter->rx_rings, M_DEVBUF);
4590 /*********************************************************************
4592 * Free receive ring data structures
4594 **********************************************************************/
4596 em_free_receive_buffers(struct rx_ring *rxr)
4598 struct adapter *adapter = rxr->adapter;
4599 struct em_rxbuffer *rxbuf = NULL;
4601 INIT_DEBUGOUT("free_receive_buffers: begin");
4603 if (rxr->rx_buffers != NULL) {
4604 for (int i = 0; i < adapter->num_rx_desc; i++) {
4605 rxbuf = &rxr->rx_buffers[i];
4606 if (rxbuf->map != NULL) {
4607 bus_dmamap_sync(rxr->rxtag, rxbuf->map,
4608 BUS_DMASYNC_POSTREAD);
4609 bus_dmamap_unload(rxr->rxtag, rxbuf->map);
4610 bus_dmamap_destroy(rxr->rxtag, rxbuf->map);
4612 if (rxbuf->m_head != NULL) {
4613 m_freem(rxbuf->m_head);
4614 rxbuf->m_head = NULL;
4617 free(rxr->rx_buffers, M_DEVBUF);
4618 rxr->rx_buffers = NULL;
4619 rxr->next_to_check = 0;
4620 rxr->next_to_refresh = 0;
4623 if (rxr->rxtag != NULL) {
4624 bus_dma_tag_destroy(rxr->rxtag);
4632 /*********************************************************************
4634 * Enable receive unit.
4636 **********************************************************************/
4639 em_initialize_receive_unit(struct adapter *adapter)
4641 struct rx_ring *rxr = adapter->rx_rings;
4642 struct ifnet *ifp = adapter->ifp;
4643 struct e1000_hw *hw = &adapter->hw;
4644 u32 rctl, rxcsum, rfctl;
4646 INIT_DEBUGOUT("em_initialize_receive_units: begin");
4649 * Make sure receives are disabled while setting
4650 * up the descriptor ring
4652 rctl = E1000_READ_REG(hw, E1000_RCTL);
4653 /* Do not disable if ever enabled on this hardware */
4654 if ((hw->mac.type != e1000_82574) && (hw->mac.type != e1000_82583))
4655 E1000_WRITE_REG(hw, E1000_RCTL, rctl & ~E1000_RCTL_EN);
4657 /* Setup the Receive Control Register */
4658 rctl &= ~(3 << E1000_RCTL_MO_SHIFT);
4659 rctl |= E1000_RCTL_EN | E1000_RCTL_BAM |
4660 E1000_RCTL_LBM_NO | E1000_RCTL_RDMTS_HALF |
4661 (hw->mac.mc_filter_type << E1000_RCTL_MO_SHIFT);
4663 /* Do not store bad packets */
4664 rctl &= ~E1000_RCTL_SBP;
4666 /* Enable Long Packet receive */
4667 if (ifp->if_mtu > ETHERMTU)
4668 rctl |= E1000_RCTL_LPE;
4670 rctl &= ~E1000_RCTL_LPE;
4673 if (!em_disable_crc_stripping)
4674 rctl |= E1000_RCTL_SECRC;
4676 E1000_WRITE_REG(&adapter->hw, E1000_RADV,
4677 adapter->rx_abs_int_delay.value);
4679 E1000_WRITE_REG(&adapter->hw, E1000_RDTR,
4680 adapter->rx_int_delay.value);
4682 * Set the interrupt throttling rate. Value is calculated
4683 * as DEFAULT_ITR = 1/(MAX_INTS_PER_SEC * 256ns)
4685 E1000_WRITE_REG(hw, E1000_ITR, DEFAULT_ITR);
4687 /* Use extended rx descriptor formats */
4688 rfctl = E1000_READ_REG(hw, E1000_RFCTL);
4689 rfctl |= E1000_RFCTL_EXTEN;
4691 ** When using MSIX interrupts we need to throttle
4692 ** using the EITR register (82574 only)
4694 if (hw->mac.type == e1000_82574) {
4695 for (int i = 0; i < 4; i++)
4696 E1000_WRITE_REG(hw, E1000_EITR_82574(i),
4698 /* Disable accelerated acknowledge */
4699 rfctl |= E1000_RFCTL_ACK_DIS;
4701 E1000_WRITE_REG(hw, E1000_RFCTL, rfctl);
4703 rxcsum = E1000_READ_REG(hw, E1000_RXCSUM);
4704 if (ifp->if_capenable & IFCAP_RXCSUM) {
4705 #ifdef EM_MULTIQUEUE
4706 rxcsum |= E1000_RXCSUM_TUOFL |
4707 E1000_RXCSUM_IPOFL |
4710 rxcsum |= E1000_RXCSUM_TUOFL;
4713 rxcsum &= ~E1000_RXCSUM_TUOFL;
4715 E1000_WRITE_REG(hw, E1000_RXCSUM, rxcsum);
4717 #ifdef EM_MULTIQUEUE
4718 #define RSSKEYLEN 10
4719 if (adapter->num_queues > 1) {
4720 uint8_t rss_key[4 * RSSKEYLEN];
4727 arc4rand(rss_key, sizeof(rss_key), 0);
4728 for (i = 0; i < RSSKEYLEN; ++i) {
4731 rssrk = EM_RSSRK_VAL(rss_key, i);
4732 E1000_WRITE_REG(hw,E1000_RSSRK(i), rssrk);
4736 * Configure RSS redirect table in following fashion:
4737 * (hash & ring_cnt_mask) == rdr_table[(hash & rdr_table_mask)]
4739 for (i = 0; i < sizeof(reta); ++i) {
4742 q = (i % adapter->num_queues) << 7;
4743 reta |= q << (8 * i);
4746 for (i = 0; i < 32; ++i) {
4747 E1000_WRITE_REG(hw, E1000_RETA(i), reta);
4750 E1000_WRITE_REG(hw, E1000_MRQC, E1000_MRQC_RSS_ENABLE_2Q |
4751 E1000_MRQC_RSS_FIELD_IPV4_TCP |
4752 E1000_MRQC_RSS_FIELD_IPV4 |
4753 E1000_MRQC_RSS_FIELD_IPV6_TCP_EX |
4754 E1000_MRQC_RSS_FIELD_IPV6_EX |
4755 E1000_MRQC_RSS_FIELD_IPV6);
4759 ** XXX TEMPORARY WORKAROUND: on some systems with 82573
4760 ** long latencies are observed, like Lenovo X60. This
4761 ** change eliminates the problem, but since having positive
4762 ** values in RDTR is a known source of problems on other
4763 ** platforms another solution is being sought.
4765 if (hw->mac.type == e1000_82573)
4766 E1000_WRITE_REG(hw, E1000_RDTR, 0x20);
4768 for (int i = 0; i < adapter->num_queues; i++, rxr++) {
4769 /* Setup the Base and Length of the Rx Descriptor Ring */
4770 u64 bus_addr = rxr->rxdma.dma_paddr;
4771 u32 rdt = adapter->num_rx_desc - 1; /* default */
4773 E1000_WRITE_REG(hw, E1000_RDLEN(i),
4774 adapter->num_rx_desc * sizeof(union e1000_rx_desc_extended));
4775 E1000_WRITE_REG(hw, E1000_RDBAH(i), (u32)(bus_addr >> 32));
4776 E1000_WRITE_REG(hw, E1000_RDBAL(i), (u32)bus_addr);
4777 /* Setup the Head and Tail Descriptor Pointers */
4778 E1000_WRITE_REG(hw, E1000_RDH(i), 0);
4781 * an init() while a netmap client is active must
4782 * preserve the rx buffers passed to userspace.
4784 if (ifp->if_capenable & IFCAP_NETMAP)
4785 rdt -= nm_kr_rxspace(&NA(adapter->ifp)->rx_rings[i]);
4786 #endif /* DEV_NETMAP */
4787 E1000_WRITE_REG(hw, E1000_RDT(i), rdt);
4791 * Set PTHRESH for improved jumbo performance
4792 * According to 10.2.5.11 of Intel 82574 Datasheet,
4793 * RXDCTL(1) is written whenever RXDCTL(0) is written.
4794 * Only write to RXDCTL(1) if there is a need for different
4797 if (((adapter->hw.mac.type == e1000_ich9lan) ||
4798 (adapter->hw.mac.type == e1000_pch2lan) ||
4799 (adapter->hw.mac.type == e1000_ich10lan)) &&
4800 (ifp->if_mtu > ETHERMTU)) {
4801 u32 rxdctl = E1000_READ_REG(hw, E1000_RXDCTL(0));
4802 E1000_WRITE_REG(hw, E1000_RXDCTL(0), rxdctl | 3);
4803 } else if (adapter->hw.mac.type == e1000_82574) {
4804 for (int i = 0; i < adapter->num_queues; i++) {
4805 u32 rxdctl = E1000_READ_REG(hw, E1000_RXDCTL(i));
4807 rxdctl |= 0x20; /* PTHRESH */
4808 rxdctl |= 4 << 8; /* HTHRESH */
4809 rxdctl |= 4 << 16;/* WTHRESH */
4810 rxdctl |= 1 << 24; /* Switch to granularity */
4811 E1000_WRITE_REG(hw, E1000_RXDCTL(i), rxdctl);
4815 if (adapter->hw.mac.type >= e1000_pch2lan) {
4816 if (ifp->if_mtu > ETHERMTU)
4817 e1000_lv_jumbo_workaround_ich8lan(hw, TRUE);
4819 e1000_lv_jumbo_workaround_ich8lan(hw, FALSE);
4822 /* Make sure VLAN Filters are off */
4823 rctl &= ~E1000_RCTL_VFE;
4825 if (adapter->rx_mbuf_sz == MCLBYTES)
4826 rctl |= E1000_RCTL_SZ_2048;
4827 else if (adapter->rx_mbuf_sz == MJUMPAGESIZE)
4828 rctl |= E1000_RCTL_SZ_4096 | E1000_RCTL_BSEX;
4829 else if (adapter->rx_mbuf_sz > MJUMPAGESIZE)
4830 rctl |= E1000_RCTL_SZ_8192 | E1000_RCTL_BSEX;
4832 /* ensure we clear use DTYPE of 00 here */
4833 rctl &= ~0x00000C00;
4834 /* Write out the settings */
4835 E1000_WRITE_REG(hw, E1000_RCTL, rctl);
4841 /*********************************************************************
4843 * This routine executes in interrupt context. It replenishes
4844 * the mbufs in the descriptor and sends data which has been
4845 * dma'ed into host memory to upper layer.
4847 * We loop at most count times if count is > 0, or until done if
4850 * For polling we also now return the number of cleaned packets
4851 *********************************************************************/
4853 em_rxeof(struct rx_ring *rxr, int count, int *done)
4855 struct adapter *adapter = rxr->adapter;
4856 struct ifnet *ifp = adapter->ifp;
4857 struct mbuf *mp, *sendmp;
4860 int i, processed, rxdone = 0;
4862 union e1000_rx_desc_extended *cur;
4867 bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
4868 BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
4872 if (netmap_rx_irq(ifp, rxr->me, &processed)) {
4876 #endif /* DEV_NETMAP */
4878 for (i = rxr->next_to_check, processed = 0; count != 0;) {
4879 if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
4882 cur = &rxr->rx_base[i];
4883 status = le32toh(cur->wb.upper.status_error);
4886 if ((status & E1000_RXD_STAT_DD) == 0)
4889 len = le16toh(cur->wb.upper.length);
4890 eop = (status & E1000_RXD_STAT_EOP) != 0;
4892 if ((status & E1000_RXDEXT_ERR_FRAME_ERR_MASK) ||
4893 (rxr->discard == TRUE)) {
4894 adapter->dropped_pkts++;
4895 ++rxr->rx_discarded;
4896 if (!eop) /* Catch subsequent segs */
4897 rxr->discard = TRUE;
4899 rxr->discard = FALSE;
4900 em_rx_discard(rxr, i);
4903 bus_dmamap_unload(rxr->rxtag, rxr->rx_buffers[i].map);
4905 /* Assign correct length to the current fragment */
4906 mp = rxr->rx_buffers[i].m_head;
4909 /* Trigger for refresh */
4910 rxr->rx_buffers[i].m_head = NULL;
4912 /* First segment? */
4913 if (rxr->fmp == NULL) {
4914 mp->m_pkthdr.len = len;
4915 rxr->fmp = rxr->lmp = mp;
4917 /* Chain mbuf's together */
4918 mp->m_flags &= ~M_PKTHDR;
4919 rxr->lmp->m_next = mp;
4921 rxr->fmp->m_pkthdr.len += len;
4927 sendmp->m_pkthdr.rcvif = ifp;
4929 em_receive_checksum(status, sendmp);
4930 #ifndef __NO_STRICT_ALIGNMENT
4931 if (adapter->hw.mac.max_frame_size >
4932 (MCLBYTES - ETHER_ALIGN) &&
4933 em_fixup_rx(rxr) != 0)
4936 if (status & E1000_RXD_STAT_VP) {
4937 sendmp->m_pkthdr.ether_vtag =
4938 le16toh(cur->wb.upper.vlan);
4939 sendmp->m_flags |= M_VLANTAG;
4941 #ifndef __NO_STRICT_ALIGNMENT
4944 rxr->fmp = rxr->lmp = NULL;
4948 bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
4949 BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
4951 /* Zero out the receive descriptors status. */
4952 cur->wb.upper.status_error &= htole32(~0xFF);
4953 ++rxdone; /* cumulative for POLL */
4956 /* Advance our pointers to the next descriptor. */
4957 if (++i == adapter->num_rx_desc)
4960 /* Send to the stack */
4961 if (sendmp != NULL) {
4962 rxr->next_to_check = i;
4964 (*ifp->if_input)(ifp, sendmp);
4966 i = rxr->next_to_check;
4969 /* Only refresh mbufs every 8 descriptors */
4970 if (processed == 8) {
4971 em_refresh_mbufs(rxr, i);
4976 /* Catch any remaining refresh work */
4977 if (e1000_rx_unrefreshed(rxr))
4978 em_refresh_mbufs(rxr, i);
4980 rxr->next_to_check = i;
4985 return ((status & E1000_RXD_STAT_DD) ? TRUE : FALSE);
4988 static __inline void
4989 em_rx_discard(struct rx_ring *rxr, int i)
4991 struct em_rxbuffer *rbuf;
4993 rbuf = &rxr->rx_buffers[i];
4994 bus_dmamap_unload(rxr->rxtag, rbuf->map);
4996 /* Free any previous pieces */
4997 if (rxr->fmp != NULL) {
4998 rxr->fmp->m_flags |= M_PKTHDR;
5004 ** Free buffer and allow em_refresh_mbufs()
5005 ** to clean up and recharge buffer.
5008 m_free(rbuf->m_head);
5009 rbuf->m_head = NULL;
5014 #ifndef __NO_STRICT_ALIGNMENT
5016 * When jumbo frames are enabled we should realign entire payload on
5017 * architecures with strict alignment. This is serious design mistake of 8254x
5018 * as it nullifies DMA operations. 8254x just allows RX buffer size to be
5019 * 2048/4096/8192/16384. What we really want is 2048 - ETHER_ALIGN to align its
5020 * payload. On architecures without strict alignment restrictions 8254x still
5021 * performs unaligned memory access which would reduce the performance too.
5022 * To avoid copying over an entire frame to align, we allocate a new mbuf and
5023 * copy ethernet header to the new mbuf. The new mbuf is prepended into the
5024 * existing mbuf chain.
5026 * Be aware, best performance of the 8254x is achived only when jumbo frame is
5027 * not used at all on architectures with strict alignment.
5030 em_fixup_rx(struct rx_ring *rxr)
5032 struct adapter *adapter = rxr->adapter;
5038 if (m->m_len <= (MCLBYTES - ETHER_HDR_LEN)) {
5039 bcopy(m->m_data, m->m_data + ETHER_HDR_LEN, m->m_len);
5040 m->m_data += ETHER_HDR_LEN;
5042 MGETHDR(n, M_NOWAIT, MT_DATA);
5044 bcopy(m->m_data, n->m_data, ETHER_HDR_LEN);
5045 m->m_data += ETHER_HDR_LEN;
5046 m->m_len -= ETHER_HDR_LEN;
5047 n->m_len = ETHER_HDR_LEN;
5048 M_MOVE_PKTHDR(n, m);
5052 adapter->dropped_pkts++;
5064 em_setup_rxdesc(union e1000_rx_desc_extended *rxd, const struct em_rxbuffer *rxbuf)
5066 rxd->read.buffer_addr = htole64(rxbuf->paddr);
5067 /* DD bits must be cleared */
5068 rxd->wb.upper.status_error= 0;
5071 /*********************************************************************
5073 * Verify that the hardware indicated that the checksum is valid.
5074 * Inform the stack about the status of checksum so that stack
5075 * doesn't spend time verifying the checksum.
5077 *********************************************************************/
5079 em_receive_checksum(uint32_t status, struct mbuf *mp)
5081 mp->m_pkthdr.csum_flags = 0;
5083 /* Ignore Checksum bit is set */
5084 if (status & E1000_RXD_STAT_IXSM)
5087 /* If the IP checksum exists and there is no IP Checksum error */
5088 if ((status & (E1000_RXD_STAT_IPCS | E1000_RXDEXT_STATERR_IPE)) ==
5089 E1000_RXD_STAT_IPCS) {
5090 mp->m_pkthdr.csum_flags = (CSUM_IP_CHECKED | CSUM_IP_VALID);
5093 /* TCP or UDP checksum */
5094 if ((status & (E1000_RXD_STAT_TCPCS | E1000_RXDEXT_STATERR_TCPE)) ==
5095 E1000_RXD_STAT_TCPCS) {
5096 mp->m_pkthdr.csum_flags |= (CSUM_DATA_VALID | CSUM_PSEUDO_HDR);
5097 mp->m_pkthdr.csum_data = htons(0xffff);
5099 if (status & E1000_RXD_STAT_UDPCS) {
5100 mp->m_pkthdr.csum_flags |= (CSUM_DATA_VALID | CSUM_PSEUDO_HDR);
5101 mp->m_pkthdr.csum_data = htons(0xffff);
5106 * This routine is run via an vlan
5110 em_register_vlan(void *arg, struct ifnet *ifp, u16 vtag)
5112 struct adapter *adapter = ifp->if_softc;
5115 if (ifp->if_softc != arg) /* Not our event */
5118 if ((vtag == 0) || (vtag > 4095)) /* Invalid ID */
5121 EM_CORE_LOCK(adapter);
5122 index = (vtag >> 5) & 0x7F;
5124 adapter->shadow_vfta[index] |= (1 << bit);
5125 ++adapter->num_vlans;
5126 /* Re-init to load the changes */
5127 if (ifp->if_capenable & IFCAP_VLAN_HWFILTER)
5128 em_init_locked(adapter);
5129 EM_CORE_UNLOCK(adapter);
5133 * This routine is run via an vlan
5137 em_unregister_vlan(void *arg, struct ifnet *ifp, u16 vtag)
5139 struct adapter *adapter = ifp->if_softc;
5142 if (ifp->if_softc != arg)
5145 if ((vtag == 0) || (vtag > 4095)) /* Invalid */
5148 EM_CORE_LOCK(adapter);
5149 index = (vtag >> 5) & 0x7F;
5151 adapter->shadow_vfta[index] &= ~(1 << bit);
5152 --adapter->num_vlans;
5153 /* Re-init to load the changes */
5154 if (ifp->if_capenable & IFCAP_VLAN_HWFILTER)
5155 em_init_locked(adapter);
5156 EM_CORE_UNLOCK(adapter);
5160 em_setup_vlan_hw_support(struct adapter *adapter)
5162 struct e1000_hw *hw = &adapter->hw;
5166 ** We get here thru init_locked, meaning
5167 ** a soft reset, this has already cleared
5168 ** the VFTA and other state, so if there
5169 ** have been no vlan's registered do nothing.
5171 if (adapter->num_vlans == 0)
5175 ** A soft reset zero's out the VFTA, so
5176 ** we need to repopulate it now.
5178 for (int i = 0; i < EM_VFTA_SIZE; i++)
5179 if (adapter->shadow_vfta[i] != 0)
5180 E1000_WRITE_REG_ARRAY(hw, E1000_VFTA,
5181 i, adapter->shadow_vfta[i]);
5183 reg = E1000_READ_REG(hw, E1000_CTRL);
5184 reg |= E1000_CTRL_VME;
5185 E1000_WRITE_REG(hw, E1000_CTRL, reg);
5187 /* Enable the Filter Table */
5188 reg = E1000_READ_REG(hw, E1000_RCTL);
5189 reg &= ~E1000_RCTL_CFIEN;
5190 reg |= E1000_RCTL_VFE;
5191 E1000_WRITE_REG(hw, E1000_RCTL, reg);
5195 em_enable_intr(struct adapter *adapter)
5197 struct e1000_hw *hw = &adapter->hw;
5198 u32 ims_mask = IMS_ENABLE_MASK;
5200 if (hw->mac.type == e1000_82574) {
5201 E1000_WRITE_REG(hw, EM_EIAC, EM_MSIX_MASK);
5202 ims_mask |= EM_MSIX_MASK;
5204 E1000_WRITE_REG(hw, E1000_IMS, ims_mask);
5208 em_disable_intr(struct adapter *adapter)
5210 struct e1000_hw *hw = &adapter->hw;
5212 if (hw->mac.type == e1000_82574)
5213 E1000_WRITE_REG(hw, EM_EIAC, 0);
5214 E1000_WRITE_REG(&adapter->hw, E1000_IMC, 0xffffffff);
5218 * Bit of a misnomer, what this really means is
5219 * to enable OS management of the system... aka
5220 * to disable special hardware management features
5223 em_init_manageability(struct adapter *adapter)
5225 /* A shared code workaround */
5226 #define E1000_82542_MANC2H E1000_MANC2H
5227 if (adapter->has_manage) {
5228 int manc2h = E1000_READ_REG(&adapter->hw, E1000_MANC2H);
5229 int manc = E1000_READ_REG(&adapter->hw, E1000_MANC);
5231 /* disable hardware interception of ARP */
5232 manc &= ~(E1000_MANC_ARP_EN);
5234 /* enable receiving management packets to the host */
5235 manc |= E1000_MANC_EN_MNG2HOST;
5236 #define E1000_MNG2HOST_PORT_623 (1 << 5)
5237 #define E1000_MNG2HOST_PORT_664 (1 << 6)
5238 manc2h |= E1000_MNG2HOST_PORT_623;
5239 manc2h |= E1000_MNG2HOST_PORT_664;
5240 E1000_WRITE_REG(&adapter->hw, E1000_MANC2H, manc2h);
5241 E1000_WRITE_REG(&adapter->hw, E1000_MANC, manc);
5246 * Give control back to hardware management
5247 * controller if there is one.
5250 em_release_manageability(struct adapter *adapter)
5252 if (adapter->has_manage) {
5253 int manc = E1000_READ_REG(&adapter->hw, E1000_MANC);
5255 /* re-enable hardware interception of ARP */
5256 manc |= E1000_MANC_ARP_EN;
5257 manc &= ~E1000_MANC_EN_MNG2HOST;
5259 E1000_WRITE_REG(&adapter->hw, E1000_MANC, manc);
5264 * em_get_hw_control sets the {CTRL_EXT|FWSM}:DRV_LOAD bit.
5265 * For ASF and Pass Through versions of f/w this means
5266 * that the driver is loaded. For AMT version type f/w
5267 * this means that the network i/f is open.
5270 em_get_hw_control(struct adapter *adapter)
5274 if (adapter->hw.mac.type == e1000_82573) {
5275 swsm = E1000_READ_REG(&adapter->hw, E1000_SWSM);
5276 E1000_WRITE_REG(&adapter->hw, E1000_SWSM,
5277 swsm | E1000_SWSM_DRV_LOAD);
5281 ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
5282 E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT,
5283 ctrl_ext | E1000_CTRL_EXT_DRV_LOAD);
5288 * em_release_hw_control resets {CTRL_EXT|FWSM}:DRV_LOAD bit.
5289 * For ASF and Pass Through versions of f/w this means that
5290 * the driver is no longer loaded. For AMT versions of the
5291 * f/w this means that the network i/f is closed.
5294 em_release_hw_control(struct adapter *adapter)
5298 if (!adapter->has_manage)
5301 if (adapter->hw.mac.type == e1000_82573) {
5302 swsm = E1000_READ_REG(&adapter->hw, E1000_SWSM);
5303 E1000_WRITE_REG(&adapter->hw, E1000_SWSM,
5304 swsm & ~E1000_SWSM_DRV_LOAD);
5308 ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
5309 E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT,
5310 ctrl_ext & ~E1000_CTRL_EXT_DRV_LOAD);
5315 em_is_valid_ether_addr(u8 *addr)
5317 char zero_addr[6] = { 0, 0, 0, 0, 0, 0 };
5319 if ((addr[0] & 1) || (!bcmp(addr, zero_addr, ETHER_ADDR_LEN))) {
5327 ** Parse the interface capabilities with regard
5328 ** to both system management and wake-on-lan for
5332 em_get_wakeup(device_t dev)
5334 struct adapter *adapter = device_get_softc(dev);
5335 u16 eeprom_data = 0, device_id, apme_mask;
5337 adapter->has_manage = e1000_enable_mng_pass_thru(&adapter->hw);
5338 apme_mask = EM_EEPROM_APME;
5340 switch (adapter->hw.mac.type) {
5343 adapter->has_amt = TRUE;
5347 case e1000_80003es2lan:
5348 if (adapter->hw.bus.func == 1) {
5349 e1000_read_nvm(&adapter->hw,
5350 NVM_INIT_CONTROL3_PORT_B, 1, &eeprom_data);
5353 e1000_read_nvm(&adapter->hw,
5354 NVM_INIT_CONTROL3_PORT_A, 1, &eeprom_data);
5358 case e1000_ich10lan:
5364 apme_mask = E1000_WUC_APME;
5365 adapter->has_amt = TRUE;
5366 eeprom_data = E1000_READ_REG(&adapter->hw, E1000_WUC);
5369 e1000_read_nvm(&adapter->hw,
5370 NVM_INIT_CONTROL3_PORT_A, 1, &eeprom_data);
5373 if (eeprom_data & apme_mask)
5374 adapter->wol = (E1000_WUFC_MAG | E1000_WUFC_MC);
5376 * We have the eeprom settings, now apply the special cases
5377 * where the eeprom may be wrong or the board won't support
5378 * wake on lan on a particular port
5380 device_id = pci_get_device(dev);
5381 switch (device_id) {
5382 case E1000_DEV_ID_82571EB_FIBER:
5383 /* Wake events only supported on port A for dual fiber
5384 * regardless of eeprom setting */
5385 if (E1000_READ_REG(&adapter->hw, E1000_STATUS) &
5386 E1000_STATUS_FUNC_1)
5389 case E1000_DEV_ID_82571EB_QUAD_COPPER:
5390 case E1000_DEV_ID_82571EB_QUAD_FIBER:
5391 case E1000_DEV_ID_82571EB_QUAD_COPPER_LP:
5392 /* if quad port adapter, disable WoL on all but port A */
5393 if (global_quad_port_a != 0)
5395 /* Reset for multiple quad port adapters */
5396 if (++global_quad_port_a == 4)
5397 global_quad_port_a = 0;
5405 * Enable PCI Wake On Lan capability
5408 em_enable_wakeup(device_t dev)
5410 struct adapter *adapter = device_get_softc(dev);
5411 struct ifnet *ifp = adapter->ifp;
5413 u32 pmc, ctrl, ctrl_ext, rctl;
5416 if (pci_find_cap(dev, PCIY_PMG, &pmc) != 0)
5420 ** Determine type of Wakeup: note that wol
5421 ** is set with all bits on by default.
5423 if ((ifp->if_capenable & IFCAP_WOL_MAGIC) == 0)
5424 adapter->wol &= ~E1000_WUFC_MAG;
5426 if ((ifp->if_capenable & IFCAP_WOL_MCAST) == 0)
5427 adapter->wol &= ~E1000_WUFC_MC;
5429 rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
5430 rctl |= E1000_RCTL_MPE;
5431 E1000_WRITE_REG(&adapter->hw, E1000_RCTL, rctl);
5434 if (!(adapter->wol & (E1000_WUFC_EX | E1000_WUFC_MAG | E1000_WUFC_MC)))
5437 /* Advertise the wakeup capability */
5438 ctrl = E1000_READ_REG(&adapter->hw, E1000_CTRL);
5439 ctrl |= (E1000_CTRL_SWDPIN2 | E1000_CTRL_SWDPIN3);
5440 E1000_WRITE_REG(&adapter->hw, E1000_CTRL, ctrl);
5442 /* Keep the laser running on Fiber adapters */
5443 if (adapter->hw.phy.media_type == e1000_media_type_fiber ||
5444 adapter->hw.phy.media_type == e1000_media_type_internal_serdes) {
5445 ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
5446 ctrl_ext |= E1000_CTRL_EXT_SDP3_DATA;
5447 E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT, ctrl_ext);
5450 if ((adapter->hw.mac.type == e1000_ich8lan) ||
5451 (adapter->hw.mac.type == e1000_pchlan) ||
5452 (adapter->hw.mac.type == e1000_ich9lan) ||
5453 (adapter->hw.mac.type == e1000_ich10lan))
5454 e1000_suspend_workarounds_ich8lan(&adapter->hw);
5456 if ((adapter->hw.mac.type == e1000_pchlan) ||
5457 (adapter->hw.mac.type == e1000_pch2lan) ||
5458 (adapter->hw.mac.type == e1000_pch_lpt) ||
5459 (adapter->hw.mac.type == e1000_pch_spt)) {
5460 error = em_enable_phy_wakeup(adapter);
5464 /* Enable wakeup by the MAC */
5465 E1000_WRITE_REG(&adapter->hw, E1000_WUC, E1000_WUC_PME_EN);
5466 E1000_WRITE_REG(&adapter->hw, E1000_WUFC, adapter->wol);
5469 if (adapter->hw.phy.type == e1000_phy_igp_3)
5470 e1000_igp3_phy_powerdown_workaround_ich8lan(&adapter->hw);
5473 status = pci_read_config(dev, pmc + PCIR_POWER_STATUS, 2);
5474 status &= ~(PCIM_PSTAT_PME | PCIM_PSTAT_PMEENABLE);
5475 if (!error && (ifp->if_capenable & IFCAP_WOL))
5476 status |= PCIM_PSTAT_PME | PCIM_PSTAT_PMEENABLE;
5477 pci_write_config(dev, pmc + PCIR_POWER_STATUS, status, 2);
5483 ** WOL in the newer chipset interfaces (pchlan)
5484 ** require thing to be copied into the phy
5487 em_enable_phy_wakeup(struct adapter *adapter)
5489 struct e1000_hw *hw = &adapter->hw;
5493 /* copy MAC RARs to PHY RARs */
5494 e1000_copy_rx_addrs_to_phy_ich8lan(hw);
5496 /* copy MAC MTA to PHY MTA */
5497 for (int i = 0; i < adapter->hw.mac.mta_reg_count; i++) {
5498 mreg = E1000_READ_REG_ARRAY(hw, E1000_MTA, i);
5499 e1000_write_phy_reg(hw, BM_MTA(i), (u16)(mreg & 0xFFFF));
5500 e1000_write_phy_reg(hw, BM_MTA(i) + 1,
5501 (u16)((mreg >> 16) & 0xFFFF));
5504 /* configure PHY Rx Control register */
5505 e1000_read_phy_reg(&adapter->hw, BM_RCTL, &preg);
5506 mreg = E1000_READ_REG(hw, E1000_RCTL);
5507 if (mreg & E1000_RCTL_UPE)
5508 preg |= BM_RCTL_UPE;
5509 if (mreg & E1000_RCTL_MPE)
5510 preg |= BM_RCTL_MPE;
5511 preg &= ~(BM_RCTL_MO_MASK);
5512 if (mreg & E1000_RCTL_MO_3)
5513 preg |= (((mreg & E1000_RCTL_MO_3) >> E1000_RCTL_MO_SHIFT)
5514 << BM_RCTL_MO_SHIFT);
5515 if (mreg & E1000_RCTL_BAM)
5516 preg |= BM_RCTL_BAM;
5517 if (mreg & E1000_RCTL_PMCF)
5518 preg |= BM_RCTL_PMCF;
5519 mreg = E1000_READ_REG(hw, E1000_CTRL);
5520 if (mreg & E1000_CTRL_RFCE)
5521 preg |= BM_RCTL_RFCE;
5522 e1000_write_phy_reg(&adapter->hw, BM_RCTL, preg);
5524 /* enable PHY wakeup in MAC register */
5525 E1000_WRITE_REG(hw, E1000_WUC,
5526 E1000_WUC_PHY_WAKE | E1000_WUC_PME_EN);
5527 E1000_WRITE_REG(hw, E1000_WUFC, adapter->wol);
5529 /* configure and enable PHY wakeup in PHY registers */
5530 e1000_write_phy_reg(&adapter->hw, BM_WUFC, adapter->wol);
5531 e1000_write_phy_reg(&adapter->hw, BM_WUC, E1000_WUC_PME_EN);
5533 /* activate PHY wakeup */
5534 ret = hw->phy.ops.acquire(hw);
5536 printf("Could not acquire PHY\n");
5539 e1000_write_phy_reg_mdic(hw, IGP01E1000_PHY_PAGE_SELECT,
5540 (BM_WUC_ENABLE_PAGE << IGP_PAGE_SHIFT));
5541 ret = e1000_read_phy_reg_mdic(hw, BM_WUC_ENABLE_REG, &preg);
5543 printf("Could not read PHY page 769\n");
5546 preg |= BM_WUC_ENABLE_BIT | BM_WUC_HOST_WU_BIT;
5547 ret = e1000_write_phy_reg_mdic(hw, BM_WUC_ENABLE_REG, preg);
5549 printf("Could not set PHY Host Wakeup bit\n");
5551 hw->phy.ops.release(hw);
5557 em_led_func(void *arg, int onoff)
5559 struct adapter *adapter = arg;
5561 EM_CORE_LOCK(adapter);
5563 e1000_setup_led(&adapter->hw);
5564 e1000_led_on(&adapter->hw);
5566 e1000_led_off(&adapter->hw);
5567 e1000_cleanup_led(&adapter->hw);
5569 EM_CORE_UNLOCK(adapter);
5573 ** Disable the L0S and L1 LINK states
5576 em_disable_aspm(struct adapter *adapter)
5579 u16 link_cap,link_ctrl;
5580 device_t dev = adapter->dev;
5582 switch (adapter->hw.mac.type) {
5590 if (pci_find_cap(dev, PCIY_EXPRESS, &base) != 0)
5592 reg = base + PCIER_LINK_CAP;
5593 link_cap = pci_read_config(dev, reg, 2);
5594 if ((link_cap & PCIEM_LINK_CAP_ASPM) == 0)
5596 reg = base + PCIER_LINK_CTL;
5597 link_ctrl = pci_read_config(dev, reg, 2);
5598 link_ctrl &= ~PCIEM_LINK_CTL_ASPMC;
5599 pci_write_config(dev, reg, link_ctrl, 2);
5603 /**********************************************************************
5605 * Update the board statistics counters.
5607 **********************************************************************/
5609 em_update_stats_counters(struct adapter *adapter)
5613 if(adapter->hw.phy.media_type == e1000_media_type_copper ||
5614 (E1000_READ_REG(&adapter->hw, E1000_STATUS) & E1000_STATUS_LU)) {
5615 adapter->stats.symerrs += E1000_READ_REG(&adapter->hw, E1000_SYMERRS);
5616 adapter->stats.sec += E1000_READ_REG(&adapter->hw, E1000_SEC);
5618 adapter->stats.crcerrs += E1000_READ_REG(&adapter->hw, E1000_CRCERRS);
5619 adapter->stats.mpc += E1000_READ_REG(&adapter->hw, E1000_MPC);
5620 adapter->stats.scc += E1000_READ_REG(&adapter->hw, E1000_SCC);
5621 adapter->stats.ecol += E1000_READ_REG(&adapter->hw, E1000_ECOL);
5623 adapter->stats.mcc += E1000_READ_REG(&adapter->hw, E1000_MCC);
5624 adapter->stats.latecol += E1000_READ_REG(&adapter->hw, E1000_LATECOL);
5625 adapter->stats.colc += E1000_READ_REG(&adapter->hw, E1000_COLC);
5626 adapter->stats.dc += E1000_READ_REG(&adapter->hw, E1000_DC);
5627 adapter->stats.rlec += E1000_READ_REG(&adapter->hw, E1000_RLEC);
5628 adapter->stats.xonrxc += E1000_READ_REG(&adapter->hw, E1000_XONRXC);
5629 adapter->stats.xontxc += E1000_READ_REG(&adapter->hw, E1000_XONTXC);
5630 adapter->stats.xoffrxc += E1000_READ_REG(&adapter->hw, E1000_XOFFRXC);
5631 adapter->stats.xofftxc += E1000_READ_REG(&adapter->hw, E1000_XOFFTXC);
5632 adapter->stats.fcruc += E1000_READ_REG(&adapter->hw, E1000_FCRUC);
5633 adapter->stats.prc64 += E1000_READ_REG(&adapter->hw, E1000_PRC64);
5634 adapter->stats.prc127 += E1000_READ_REG(&adapter->hw, E1000_PRC127);
5635 adapter->stats.prc255 += E1000_READ_REG(&adapter->hw, E1000_PRC255);
5636 adapter->stats.prc511 += E1000_READ_REG(&adapter->hw, E1000_PRC511);
5637 adapter->stats.prc1023 += E1000_READ_REG(&adapter->hw, E1000_PRC1023);
5638 adapter->stats.prc1522 += E1000_READ_REG(&adapter->hw, E1000_PRC1522);
5639 adapter->stats.gprc += E1000_READ_REG(&adapter->hw, E1000_GPRC);
5640 adapter->stats.bprc += E1000_READ_REG(&adapter->hw, E1000_BPRC);
5641 adapter->stats.mprc += E1000_READ_REG(&adapter->hw, E1000_MPRC);
5642 adapter->stats.gptc += E1000_READ_REG(&adapter->hw, E1000_GPTC);
5644 /* For the 64-bit byte counters the low dword must be read first. */
5645 /* Both registers clear on the read of the high dword */
5647 adapter->stats.gorc += E1000_READ_REG(&adapter->hw, E1000_GORCL) +
5648 ((u64)E1000_READ_REG(&adapter->hw, E1000_GORCH) << 32);
5649 adapter->stats.gotc += E1000_READ_REG(&adapter->hw, E1000_GOTCL) +
5650 ((u64)E1000_READ_REG(&adapter->hw, E1000_GOTCH) << 32);
5652 adapter->stats.rnbc += E1000_READ_REG(&adapter->hw, E1000_RNBC);
5653 adapter->stats.ruc += E1000_READ_REG(&adapter->hw, E1000_RUC);
5654 adapter->stats.rfc += E1000_READ_REG(&adapter->hw, E1000_RFC);
5655 adapter->stats.roc += E1000_READ_REG(&adapter->hw, E1000_ROC);
5656 adapter->stats.rjc += E1000_READ_REG(&adapter->hw, E1000_RJC);
5658 adapter->stats.tor += E1000_READ_REG(&adapter->hw, E1000_TORH);
5659 adapter->stats.tot += E1000_READ_REG(&adapter->hw, E1000_TOTH);
5661 adapter->stats.tpr += E1000_READ_REG(&adapter->hw, E1000_TPR);
5662 adapter->stats.tpt += E1000_READ_REG(&adapter->hw, E1000_TPT);
5663 adapter->stats.ptc64 += E1000_READ_REG(&adapter->hw, E1000_PTC64);
5664 adapter->stats.ptc127 += E1000_READ_REG(&adapter->hw, E1000_PTC127);
5665 adapter->stats.ptc255 += E1000_READ_REG(&adapter->hw, E1000_PTC255);
5666 adapter->stats.ptc511 += E1000_READ_REG(&adapter->hw, E1000_PTC511);
5667 adapter->stats.ptc1023 += E1000_READ_REG(&adapter->hw, E1000_PTC1023);
5668 adapter->stats.ptc1522 += E1000_READ_REG(&adapter->hw, E1000_PTC1522);
5669 adapter->stats.mptc += E1000_READ_REG(&adapter->hw, E1000_MPTC);
5670 adapter->stats.bptc += E1000_READ_REG(&adapter->hw, E1000_BPTC);
5672 /* Interrupt Counts */
5674 adapter->stats.iac += E1000_READ_REG(&adapter->hw, E1000_IAC);
5675 adapter->stats.icrxptc += E1000_READ_REG(&adapter->hw, E1000_ICRXPTC);
5676 adapter->stats.icrxatc += E1000_READ_REG(&adapter->hw, E1000_ICRXATC);
5677 adapter->stats.ictxptc += E1000_READ_REG(&adapter->hw, E1000_ICTXPTC);
5678 adapter->stats.ictxatc += E1000_READ_REG(&adapter->hw, E1000_ICTXATC);
5679 adapter->stats.ictxqec += E1000_READ_REG(&adapter->hw, E1000_ICTXQEC);
5680 adapter->stats.ictxqmtc += E1000_READ_REG(&adapter->hw, E1000_ICTXQMTC);
5681 adapter->stats.icrxdmtc += E1000_READ_REG(&adapter->hw, E1000_ICRXDMTC);
5682 adapter->stats.icrxoc += E1000_READ_REG(&adapter->hw, E1000_ICRXOC);
5684 if (adapter->hw.mac.type >= e1000_82543) {
5685 adapter->stats.algnerrc +=
5686 E1000_READ_REG(&adapter->hw, E1000_ALGNERRC);
5687 adapter->stats.rxerrc +=
5688 E1000_READ_REG(&adapter->hw, E1000_RXERRC);
5689 adapter->stats.tncrs +=
5690 E1000_READ_REG(&adapter->hw, E1000_TNCRS);
5691 adapter->stats.cexterr +=
5692 E1000_READ_REG(&adapter->hw, E1000_CEXTERR);
5693 adapter->stats.tsctc +=
5694 E1000_READ_REG(&adapter->hw, E1000_TSCTC);
5695 adapter->stats.tsctfc +=
5696 E1000_READ_REG(&adapter->hw, E1000_TSCTFC);
5700 ifp->if_collisions = adapter->stats.colc;
5703 ifp->if_ierrors = adapter->dropped_pkts + adapter->stats.rxerrc +
5704 adapter->stats.crcerrs + adapter->stats.algnerrc +
5705 adapter->stats.ruc + adapter->stats.roc +
5706 adapter->stats.mpc + adapter->stats.cexterr;
5709 ifp->if_oerrors = adapter->stats.ecol +
5710 adapter->stats.latecol + adapter->watchdog_events;
5713 /* Export a single 32-bit register via a read-only sysctl. */
5715 em_sysctl_reg_handler(SYSCTL_HANDLER_ARGS)
5717 struct adapter *adapter;
5720 adapter = oidp->oid_arg1;
5721 val = E1000_READ_REG(&adapter->hw, oidp->oid_arg2);
5722 return (sysctl_handle_int(oidp, &val, 0, req));
5726 * Add sysctl variables, one per statistic, to the system.
5729 em_add_hw_stats(struct adapter *adapter)
5731 device_t dev = adapter->dev;
5733 struct tx_ring *txr = adapter->tx_rings;
5734 struct rx_ring *rxr = adapter->rx_rings;
5736 struct sysctl_ctx_list *ctx = device_get_sysctl_ctx(dev);
5737 struct sysctl_oid *tree = device_get_sysctl_tree(dev);
5738 struct sysctl_oid_list *child = SYSCTL_CHILDREN(tree);
5739 struct e1000_hw_stats *stats = &adapter->stats;
5741 struct sysctl_oid *stat_node, *queue_node, *int_node;
5742 struct sysctl_oid_list *stat_list, *queue_list, *int_list;
5744 #define QUEUE_NAME_LEN 32
5745 char namebuf[QUEUE_NAME_LEN];
5747 /* Driver Statistics */
5748 SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "dropped",
5749 CTLFLAG_RD, &adapter->dropped_pkts,
5750 "Driver dropped packets");
5751 SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "link_irq",
5752 CTLFLAG_RD, &adapter->link_irq,
5753 "Link MSIX IRQ Handled");
5754 SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "mbuf_defrag_fail",
5755 CTLFLAG_RD, &adapter->mbuf_defrag_failed,
5756 "Defragmenting mbuf chain failed");
5757 SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "tx_dma_fail",
5758 CTLFLAG_RD, &adapter->no_tx_dma_setup,
5759 "Driver tx dma failure in xmit");
5760 SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "rx_overruns",
5761 CTLFLAG_RD, &adapter->rx_overruns,
5763 SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "watchdog_timeouts",
5764 CTLFLAG_RD, &adapter->watchdog_events,
5765 "Watchdog timeouts");
5767 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "device_control",
5768 CTLTYPE_UINT | CTLFLAG_RD, adapter, E1000_CTRL,
5769 em_sysctl_reg_handler, "IU",
5770 "Device Control Register");
5771 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "rx_control",
5772 CTLTYPE_UINT | CTLFLAG_RD, adapter, E1000_RCTL,
5773 em_sysctl_reg_handler, "IU",
5774 "Receiver Control Register");
5775 SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "fc_high_water",
5776 CTLFLAG_RD, &adapter->hw.fc.high_water, 0,
5777 "Flow Control High Watermark");
5778 SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "fc_low_water",
5779 CTLFLAG_RD, &adapter->hw.fc.low_water, 0,
5780 "Flow Control Low Watermark");
5782 for (int i = 0; i < adapter->num_queues; i++, txr++, rxr++) {
5783 snprintf(namebuf, QUEUE_NAME_LEN, "queue_tx_%d", i);
5784 queue_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, namebuf,
5785 CTLFLAG_RD, NULL, "TX Queue Name");
5786 queue_list = SYSCTL_CHILDREN(queue_node);
5788 SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "txd_head",
5789 CTLTYPE_UINT | CTLFLAG_RD, adapter,
5791 em_sysctl_reg_handler, "IU",
5792 "Transmit Descriptor Head");
5793 SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "txd_tail",
5794 CTLTYPE_UINT | CTLFLAG_RD, adapter,
5796 em_sysctl_reg_handler, "IU",
5797 "Transmit Descriptor Tail");
5798 SYSCTL_ADD_ULONG(ctx, queue_list, OID_AUTO, "tx_irq",
5799 CTLFLAG_RD, &txr->tx_irq,
5800 "Queue MSI-X Transmit Interrupts");
5801 SYSCTL_ADD_ULONG(ctx, queue_list, OID_AUTO, "no_desc_avail",
5802 CTLFLAG_RD, &txr->no_desc_avail,
5803 "Queue No Descriptor Available");
5805 snprintf(namebuf, QUEUE_NAME_LEN, "queue_rx_%d", i);
5806 queue_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, namebuf,
5807 CTLFLAG_RD, NULL, "RX Queue Name");
5808 queue_list = SYSCTL_CHILDREN(queue_node);
5810 SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "rxd_head",
5811 CTLTYPE_UINT | CTLFLAG_RD, adapter,
5813 em_sysctl_reg_handler, "IU",
5814 "Receive Descriptor Head");
5815 SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "rxd_tail",
5816 CTLTYPE_UINT | CTLFLAG_RD, adapter,
5818 em_sysctl_reg_handler, "IU",
5819 "Receive Descriptor Tail");
5820 SYSCTL_ADD_ULONG(ctx, queue_list, OID_AUTO, "rx_irq",
5821 CTLFLAG_RD, &rxr->rx_irq,
5822 "Queue MSI-X Receive Interrupts");
5825 /* MAC stats get their own sub node */
5827 stat_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "mac_stats",
5828 CTLFLAG_RD, NULL, "Statistics");
5829 stat_list = SYSCTL_CHILDREN(stat_node);
5831 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "excess_coll",
5832 CTLFLAG_RD, &stats->ecol,
5833 "Excessive collisions");
5834 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "single_coll",
5835 CTLFLAG_RD, &stats->scc,
5836 "Single collisions");
5837 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "multiple_coll",
5838 CTLFLAG_RD, &stats->mcc,
5839 "Multiple collisions");
5840 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "late_coll",
5841 CTLFLAG_RD, &stats->latecol,
5843 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "collision_count",
5844 CTLFLAG_RD, &stats->colc,
5846 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "symbol_errors",
5847 CTLFLAG_RD, &adapter->stats.symerrs,
5849 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "sequence_errors",
5850 CTLFLAG_RD, &adapter->stats.sec,
5852 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "defer_count",
5853 CTLFLAG_RD, &adapter->stats.dc,
5855 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "missed_packets",
5856 CTLFLAG_RD, &adapter->stats.mpc,
5858 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_no_buff",
5859 CTLFLAG_RD, &adapter->stats.rnbc,
5860 "Receive No Buffers");
5861 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_undersize",
5862 CTLFLAG_RD, &adapter->stats.ruc,
5863 "Receive Undersize");
5864 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_fragmented",
5865 CTLFLAG_RD, &adapter->stats.rfc,
5866 "Fragmented Packets Received ");
5867 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_oversize",
5868 CTLFLAG_RD, &adapter->stats.roc,
5869 "Oversized Packets Received");
5870 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_jabber",
5871 CTLFLAG_RD, &adapter->stats.rjc,
5873 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_errs",
5874 CTLFLAG_RD, &adapter->stats.rxerrc,
5876 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "crc_errs",
5877 CTLFLAG_RD, &adapter->stats.crcerrs,
5879 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "alignment_errs",
5880 CTLFLAG_RD, &adapter->stats.algnerrc,
5881 "Alignment Errors");
5882 /* On 82575 these are collision counts */
5883 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "coll_ext_errs",
5884 CTLFLAG_RD, &adapter->stats.cexterr,
5885 "Collision/Carrier extension errors");
5886 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xon_recvd",
5887 CTLFLAG_RD, &adapter->stats.xonrxc,
5889 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xon_txd",
5890 CTLFLAG_RD, &adapter->stats.xontxc,
5892 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xoff_recvd",
5893 CTLFLAG_RD, &adapter->stats.xoffrxc,
5895 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xoff_txd",
5896 CTLFLAG_RD, &adapter->stats.xofftxc,
5897 "XOFF Transmitted");
5899 /* Packet Reception Stats */
5900 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "total_pkts_recvd",
5901 CTLFLAG_RD, &adapter->stats.tpr,
5902 "Total Packets Received ");
5903 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_pkts_recvd",
5904 CTLFLAG_RD, &adapter->stats.gprc,
5905 "Good Packets Received");
5906 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "bcast_pkts_recvd",
5907 CTLFLAG_RD, &adapter->stats.bprc,
5908 "Broadcast Packets Received");
5909 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "mcast_pkts_recvd",
5910 CTLFLAG_RD, &adapter->stats.mprc,
5911 "Multicast Packets Received");
5912 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_64",
5913 CTLFLAG_RD, &adapter->stats.prc64,
5914 "64 byte frames received ");
5915 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_65_127",
5916 CTLFLAG_RD, &adapter->stats.prc127,
5917 "65-127 byte frames received");
5918 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_128_255",
5919 CTLFLAG_RD, &adapter->stats.prc255,
5920 "128-255 byte frames received");
5921 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_256_511",
5922 CTLFLAG_RD, &adapter->stats.prc511,
5923 "256-511 byte frames received");
5924 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_512_1023",
5925 CTLFLAG_RD, &adapter->stats.prc1023,
5926 "512-1023 byte frames received");
5927 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_1024_1522",
5928 CTLFLAG_RD, &adapter->stats.prc1522,
5929 "1023-1522 byte frames received");
5930 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_octets_recvd",
5931 CTLFLAG_RD, &adapter->stats.gorc,
5932 "Good Octets Received");
5934 /* Packet Transmission Stats */
5935 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_octets_txd",
5936 CTLFLAG_RD, &adapter->stats.gotc,
5937 "Good Octets Transmitted");
5938 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "total_pkts_txd",
5939 CTLFLAG_RD, &adapter->stats.tpt,
5940 "Total Packets Transmitted");
5941 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_pkts_txd",
5942 CTLFLAG_RD, &adapter->stats.gptc,
5943 "Good Packets Transmitted");
5944 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "bcast_pkts_txd",
5945 CTLFLAG_RD, &adapter->stats.bptc,
5946 "Broadcast Packets Transmitted");
5947 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "mcast_pkts_txd",
5948 CTLFLAG_RD, &adapter->stats.mptc,
5949 "Multicast Packets Transmitted");
5950 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_64",
5951 CTLFLAG_RD, &adapter->stats.ptc64,
5952 "64 byte frames transmitted ");
5953 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_65_127",
5954 CTLFLAG_RD, &adapter->stats.ptc127,
5955 "65-127 byte frames transmitted");
5956 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_128_255",
5957 CTLFLAG_RD, &adapter->stats.ptc255,
5958 "128-255 byte frames transmitted");
5959 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_256_511",
5960 CTLFLAG_RD, &adapter->stats.ptc511,
5961 "256-511 byte frames transmitted");
5962 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_512_1023",
5963 CTLFLAG_RD, &adapter->stats.ptc1023,
5964 "512-1023 byte frames transmitted");
5965 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_1024_1522",
5966 CTLFLAG_RD, &adapter->stats.ptc1522,
5967 "1024-1522 byte frames transmitted");
5968 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tso_txd",
5969 CTLFLAG_RD, &adapter->stats.tsctc,
5970 "TSO Contexts Transmitted");
5971 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tso_ctx_fail",
5972 CTLFLAG_RD, &adapter->stats.tsctfc,
5973 "TSO Contexts Failed");
5976 /* Interrupt Stats */
5978 int_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "interrupts",
5979 CTLFLAG_RD, NULL, "Interrupt Statistics");
5980 int_list = SYSCTL_CHILDREN(int_node);
5982 SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "asserts",
5983 CTLFLAG_RD, &adapter->stats.iac,
5984 "Interrupt Assertion Count");
5986 SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "rx_pkt_timer",
5987 CTLFLAG_RD, &adapter->stats.icrxptc,
5988 "Interrupt Cause Rx Pkt Timer Expire Count");
5990 SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "rx_abs_timer",
5991 CTLFLAG_RD, &adapter->stats.icrxatc,
5992 "Interrupt Cause Rx Abs Timer Expire Count");
5994 SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "tx_pkt_timer",
5995 CTLFLAG_RD, &adapter->stats.ictxptc,
5996 "Interrupt Cause Tx Pkt Timer Expire Count");
5998 SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "tx_abs_timer",
5999 CTLFLAG_RD, &adapter->stats.ictxatc,
6000 "Interrupt Cause Tx Abs Timer Expire Count");
6002 SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "tx_queue_empty",
6003 CTLFLAG_RD, &adapter->stats.ictxqec,
6004 "Interrupt Cause Tx Queue Empty Count");
6006 SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "tx_queue_min_thresh",
6007 CTLFLAG_RD, &adapter->stats.ictxqmtc,
6008 "Interrupt Cause Tx Queue Min Thresh Count");
6010 SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "rx_desc_min_thresh",
6011 CTLFLAG_RD, &adapter->stats.icrxdmtc,
6012 "Interrupt Cause Rx Desc Min Thresh Count");
6014 SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "rx_overrun",
6015 CTLFLAG_RD, &adapter->stats.icrxoc,
6016 "Interrupt Cause Receiver Overrun Count");
6019 /**********************************************************************
6021 * This routine provides a way to dump out the adapter eeprom,
6022 * often a useful debug/service tool. This only dumps the first
6023 * 32 words, stuff that matters is in that extent.
6025 **********************************************************************/
6027 em_sysctl_nvm_info(SYSCTL_HANDLER_ARGS)
6029 struct adapter *adapter = (struct adapter *)arg1;
6034 error = sysctl_handle_int(oidp, &result, 0, req);
6036 if (error || !req->newptr)
6040 * This value will cause a hex dump of the
6041 * first 32 16-bit words of the EEPROM to
6045 em_print_nvm_info(adapter);
6051 em_print_nvm_info(struct adapter *adapter)
6056 /* Its a bit crude, but it gets the job done */
6057 printf("\nInterface EEPROM Dump:\n");
6058 printf("Offset\n0x0000 ");
6059 for (i = 0, j = 0; i < 32; i++, j++) {
6060 if (j == 8) { /* Make the offset block */
6062 printf("\n0x00%x0 ",row);
6064 e1000_read_nvm(&adapter->hw, i, 1, &eeprom_data);
6065 printf("%04x ", eeprom_data);
6071 em_sysctl_int_delay(SYSCTL_HANDLER_ARGS)
6073 struct em_int_delay_info *info;
6074 struct adapter *adapter;
6076 int error, usecs, ticks;
6078 info = (struct em_int_delay_info *)arg1;
6079 usecs = info->value;
6080 error = sysctl_handle_int(oidp, &usecs, 0, req);
6081 if (error != 0 || req->newptr == NULL)
6083 if (usecs < 0 || usecs > EM_TICKS_TO_USECS(65535))
6085 info->value = usecs;
6086 ticks = EM_USECS_TO_TICKS(usecs);
6087 if (info->offset == E1000_ITR) /* units are 256ns here */
6090 adapter = info->adapter;
6092 EM_CORE_LOCK(adapter);
6093 regval = E1000_READ_OFFSET(&adapter->hw, info->offset);
6094 regval = (regval & ~0xffff) | (ticks & 0xffff);
6095 /* Handle a few special cases. */
6096 switch (info->offset) {
6101 adapter->txd_cmd &= ~E1000_TXD_CMD_IDE;
6102 /* Don't write 0 into the TIDV register. */
6105 adapter->txd_cmd |= E1000_TXD_CMD_IDE;
6108 E1000_WRITE_OFFSET(&adapter->hw, info->offset, regval);
6109 EM_CORE_UNLOCK(adapter);
6114 em_add_int_delay_sysctl(struct adapter *adapter, const char *name,
6115 const char *description, struct em_int_delay_info *info,
6116 int offset, int value)
6118 info->adapter = adapter;
6119 info->offset = offset;
6120 info->value = value;
6121 SYSCTL_ADD_PROC(device_get_sysctl_ctx(adapter->dev),
6122 SYSCTL_CHILDREN(device_get_sysctl_tree(adapter->dev)),
6123 OID_AUTO, name, CTLTYPE_INT|CTLFLAG_RW,
6124 info, 0, em_sysctl_int_delay, "I", description);
6128 em_set_sysctl_value(struct adapter *adapter, const char *name,
6129 const char *description, int *limit, int value)
6132 SYSCTL_ADD_INT(device_get_sysctl_ctx(adapter->dev),
6133 SYSCTL_CHILDREN(device_get_sysctl_tree(adapter->dev)),
6134 OID_AUTO, name, CTLFLAG_RW, limit, value, description);
6139 ** Set flow control using sysctl:
6140 ** Flow control values:
6147 em_set_flowcntl(SYSCTL_HANDLER_ARGS)
6150 static int input = 3; /* default is full */
6151 struct adapter *adapter = (struct adapter *) arg1;
6153 error = sysctl_handle_int(oidp, &input, 0, req);
6155 if ((error) || (req->newptr == NULL))
6158 if (input == adapter->fc) /* no change? */
6162 case e1000_fc_rx_pause:
6163 case e1000_fc_tx_pause:
6166 adapter->hw.fc.requested_mode = input;
6167 adapter->fc = input;
6174 adapter->hw.fc.current_mode = adapter->hw.fc.requested_mode;
6175 e1000_force_mac_fc(&adapter->hw);
6180 ** Manage Energy Efficient Ethernet:
6182 ** 0/1 - enabled/disabled
6185 em_sysctl_eee(SYSCTL_HANDLER_ARGS)
6187 struct adapter *adapter = (struct adapter *) arg1;
6190 value = adapter->hw.dev_spec.ich8lan.eee_disable;
6191 error = sysctl_handle_int(oidp, &value, 0, req);
6192 if (error || req->newptr == NULL)
6194 EM_CORE_LOCK(adapter);
6195 adapter->hw.dev_spec.ich8lan.eee_disable = (value != 0);
6196 em_init_locked(adapter);
6197 EM_CORE_UNLOCK(adapter);
6202 em_sysctl_debug_info(SYSCTL_HANDLER_ARGS)
6204 struct adapter *adapter;
6209 error = sysctl_handle_int(oidp, &result, 0, req);
6211 if (error || !req->newptr)
6215 adapter = (struct adapter *)arg1;
6216 em_print_debug_info(adapter);
6223 ** This routine is meant to be fluid, add whatever is
6224 ** needed for debugging a problem. -jfv
6227 em_print_debug_info(struct adapter *adapter)
6229 device_t dev = adapter->dev;
6230 struct tx_ring *txr = adapter->tx_rings;
6231 struct rx_ring *rxr = adapter->rx_rings;
6233 if (adapter->ifp->if_drv_flags & IFF_DRV_RUNNING)
6234 printf("Interface is RUNNING ");
6236 printf("Interface is NOT RUNNING\n");
6238 if (adapter->ifp->if_drv_flags & IFF_DRV_OACTIVE)
6239 printf("and INACTIVE\n");
6241 printf("and ACTIVE\n");
6243 for (int i = 0; i < adapter->num_queues; i++, txr++, rxr++) {
6244 device_printf(dev, "TX Queue %d ------\n", i);
6245 device_printf(dev, "hw tdh = %d, hw tdt = %d\n",
6246 E1000_READ_REG(&adapter->hw, E1000_TDH(i)),
6247 E1000_READ_REG(&adapter->hw, E1000_TDT(i)));
6248 device_printf(dev, "Tx Queue Status = %d\n", txr->busy);
6249 device_printf(dev, "TX descriptors avail = %d\n",
6251 device_printf(dev, "Tx Descriptors avail failure = %ld\n",
6252 txr->no_desc_avail);
6253 device_printf(dev, "RX Queue %d ------\n", i);
6254 device_printf(dev, "hw rdh = %d, hw rdt = %d\n",
6255 E1000_READ_REG(&adapter->hw, E1000_RDH(i)),
6256 E1000_READ_REG(&adapter->hw, E1000_RDT(i)));
6257 device_printf(dev, "RX discarded packets = %ld\n",
6259 device_printf(dev, "RX Next to Check = %d\n", rxr->next_to_check);
6260 device_printf(dev, "RX Next to Refresh = %d\n", rxr->next_to_refresh);
6264 #ifdef EM_MULTIQUEUE
6267 * Write a new value to the EEPROM increasing the number of MSIX
6268 * vectors from 3 to 5, for proper multiqueue support.
6271 em_enable_vectors_82574(struct adapter *adapter)
6273 struct e1000_hw *hw = &adapter->hw;
6274 device_t dev = adapter->dev;
6277 e1000_read_nvm(hw, EM_NVM_PCIE_CTRL, 1, &edata);
6278 printf("Current cap: %#06x\n", edata);
6279 if (((edata & EM_NVM_MSIX_N_MASK) >> EM_NVM_MSIX_N_SHIFT) != 4) {
6280 device_printf(dev, "Writing to eeprom: increasing "
6281 "reported MSIX vectors from 3 to 5...\n");
6282 edata &= ~(EM_NVM_MSIX_N_MASK);
6283 edata |= 4 << EM_NVM_MSIX_N_SHIFT;
6284 e1000_write_nvm(hw, EM_NVM_PCIE_CTRL, 1, &edata);
6285 e1000_update_nvm_checksum(hw);
6286 device_printf(dev, "Writing to eeprom: done\n");
6292 DB_COMMAND(em_reset_dev, em_ddb_reset_dev)
6297 dc = devclass_find("em");
6298 max_em = devclass_get_maxunit(dc);
6300 for (int index = 0; index < (max_em - 1); index++) {
6302 dev = devclass_get_device(dc, index);
6303 if (device_get_driver(dev) == &em_driver) {
6304 struct adapter *adapter = device_get_softc(dev);
6305 EM_CORE_LOCK(adapter);
6306 em_init_locked(adapter);
6307 EM_CORE_UNLOCK(adapter);
6311 DB_COMMAND(em_dump_queue, em_ddb_dump_queue)
6316 dc = devclass_find("em");
6317 max_em = devclass_get_maxunit(dc);
6319 for (int index = 0; index < (max_em - 1); index++) {
6321 dev = devclass_get_device(dc, index);
6322 if (device_get_driver(dev) == &em_driver)
6323 em_print_debug_info(device_get_softc(dev));