1 /******************************************************************************
3 Copyright (c) 2001-2015, Intel Corporation
6 Redistribution and use in source and binary forms, with or without
7 modification, are permitted provided that the following conditions are met:
9 1. Redistributions of source code must retain the above copyright notice,
10 this list of conditions and the following disclaimer.
12 2. Redistributions in binary form must reproduce the above copyright
13 notice, this list of conditions and the following disclaimer in the
14 documentation and/or other materials provided with the distribution.
16 3. Neither the name of the Intel Corporation nor the names of its
17 contributors may be used to endorse or promote products derived from
18 this software without specific prior written permission.
20 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
24 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
29 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30 POSSIBILITY OF SUCH DAMAGE.
32 ******************************************************************************/
38 #include "opt_inet6.h"
40 #ifdef HAVE_KERNEL_OPTION_HEADERS
41 #include "opt_device_polling.h"
44 #include <sys/param.h>
45 #include <sys/systm.h>
47 #include <sys/types.h>
50 #if __FreeBSD_version >= 800000
51 #include <sys/buf_ring.h>
54 #include <sys/endian.h>
55 #include <sys/kernel.h>
56 #include <sys/kthread.h>
57 #include <sys/malloc.h>
59 #include <sys/module.h>
62 #include <sys/socket.h>
63 #include <sys/sockio.h>
64 #include <sys/sysctl.h>
65 #include <sys/taskqueue.h>
66 #include <sys/eventhandler.h>
67 #include <machine/bus.h>
68 #include <machine/resource.h>
71 #include <net/ethernet.h>
73 #include <net/if_arp.h>
74 #include <net/if_dl.h>
75 #include <net/if_media.h>
77 #include <net/if_types.h>
78 #include <net/if_vlan_var.h>
80 #include <netinet/in_systm.h>
81 #include <netinet/in.h>
82 #include <netinet/if_ether.h>
83 #include <netinet/ip.h>
84 #include <netinet/ip6.h>
85 #include <netinet/tcp.h>
86 #include <netinet/udp.h>
88 #include <machine/in_cksum.h>
89 #include <dev/led/led.h>
90 #include <dev/pci/pcivar.h>
91 #include <dev/pci/pcireg.h>
93 #include "e1000_api.h"
94 #include "e1000_82571.h"
97 /*********************************************************************
99 *********************************************************************/
100 char em_driver_version[] = "7.6.1-k";
102 /*********************************************************************
103 * PCI Device ID Table
105 * Used by probe to select devices to load on
106 * Last field stores an index into e1000_strings
107 * Last entry must be all 0s
109 * { Vendor ID, Device ID, SubVendor ID, SubDevice ID, String Index }
110 *********************************************************************/
112 static em_vendor_info_t em_vendor_info_array[] =
114 /* Intel(R) PRO/1000 Network Connection */
115 { 0x8086, E1000_DEV_ID_82571EB_COPPER, PCI_ANY_ID, PCI_ANY_ID, 0},
116 { 0x8086, E1000_DEV_ID_82571EB_FIBER, PCI_ANY_ID, PCI_ANY_ID, 0},
117 { 0x8086, E1000_DEV_ID_82571EB_SERDES, PCI_ANY_ID, PCI_ANY_ID, 0},
118 { 0x8086, E1000_DEV_ID_82571EB_SERDES_DUAL,
119 PCI_ANY_ID, PCI_ANY_ID, 0},
120 { 0x8086, E1000_DEV_ID_82571EB_SERDES_QUAD,
121 PCI_ANY_ID, PCI_ANY_ID, 0},
122 { 0x8086, E1000_DEV_ID_82571EB_QUAD_COPPER,
123 PCI_ANY_ID, PCI_ANY_ID, 0},
124 { 0x8086, E1000_DEV_ID_82571EB_QUAD_COPPER_LP,
125 PCI_ANY_ID, PCI_ANY_ID, 0},
126 { 0x8086, E1000_DEV_ID_82571EB_QUAD_FIBER,
127 PCI_ANY_ID, PCI_ANY_ID, 0},
128 { 0x8086, E1000_DEV_ID_82571PT_QUAD_COPPER,
129 PCI_ANY_ID, PCI_ANY_ID, 0},
130 { 0x8086, E1000_DEV_ID_82572EI_COPPER, PCI_ANY_ID, PCI_ANY_ID, 0},
131 { 0x8086, E1000_DEV_ID_82572EI_FIBER, PCI_ANY_ID, PCI_ANY_ID, 0},
132 { 0x8086, E1000_DEV_ID_82572EI_SERDES, PCI_ANY_ID, PCI_ANY_ID, 0},
133 { 0x8086, E1000_DEV_ID_82572EI, PCI_ANY_ID, PCI_ANY_ID, 0},
135 { 0x8086, E1000_DEV_ID_82573E, PCI_ANY_ID, PCI_ANY_ID, 0},
136 { 0x8086, E1000_DEV_ID_82573E_IAMT, PCI_ANY_ID, PCI_ANY_ID, 0},
137 { 0x8086, E1000_DEV_ID_82573L, PCI_ANY_ID, PCI_ANY_ID, 0},
138 { 0x8086, E1000_DEV_ID_82583V, PCI_ANY_ID, PCI_ANY_ID, 0},
139 { 0x8086, E1000_DEV_ID_80003ES2LAN_COPPER_SPT,
140 PCI_ANY_ID, PCI_ANY_ID, 0},
141 { 0x8086, E1000_DEV_ID_80003ES2LAN_SERDES_SPT,
142 PCI_ANY_ID, PCI_ANY_ID, 0},
143 { 0x8086, E1000_DEV_ID_80003ES2LAN_COPPER_DPT,
144 PCI_ANY_ID, PCI_ANY_ID, 0},
145 { 0x8086, E1000_DEV_ID_80003ES2LAN_SERDES_DPT,
146 PCI_ANY_ID, PCI_ANY_ID, 0},
147 { 0x8086, E1000_DEV_ID_ICH8_IGP_M_AMT, PCI_ANY_ID, PCI_ANY_ID, 0},
148 { 0x8086, E1000_DEV_ID_ICH8_IGP_AMT, PCI_ANY_ID, PCI_ANY_ID, 0},
149 { 0x8086, E1000_DEV_ID_ICH8_IGP_C, PCI_ANY_ID, PCI_ANY_ID, 0},
150 { 0x8086, E1000_DEV_ID_ICH8_IFE, PCI_ANY_ID, PCI_ANY_ID, 0},
151 { 0x8086, E1000_DEV_ID_ICH8_IFE_GT, PCI_ANY_ID, PCI_ANY_ID, 0},
152 { 0x8086, E1000_DEV_ID_ICH8_IFE_G, PCI_ANY_ID, PCI_ANY_ID, 0},
153 { 0x8086, E1000_DEV_ID_ICH8_IGP_M, PCI_ANY_ID, PCI_ANY_ID, 0},
154 { 0x8086, E1000_DEV_ID_ICH8_82567V_3, PCI_ANY_ID, PCI_ANY_ID, 0},
155 { 0x8086, E1000_DEV_ID_ICH9_IGP_M_AMT, PCI_ANY_ID, PCI_ANY_ID, 0},
156 { 0x8086, E1000_DEV_ID_ICH9_IGP_AMT, PCI_ANY_ID, PCI_ANY_ID, 0},
157 { 0x8086, E1000_DEV_ID_ICH9_IGP_C, PCI_ANY_ID, PCI_ANY_ID, 0},
158 { 0x8086, E1000_DEV_ID_ICH9_IGP_M, PCI_ANY_ID, PCI_ANY_ID, 0},
159 { 0x8086, E1000_DEV_ID_ICH9_IGP_M_V, PCI_ANY_ID, PCI_ANY_ID, 0},
160 { 0x8086, E1000_DEV_ID_ICH9_IFE, PCI_ANY_ID, PCI_ANY_ID, 0},
161 { 0x8086, E1000_DEV_ID_ICH9_IFE_GT, PCI_ANY_ID, PCI_ANY_ID, 0},
162 { 0x8086, E1000_DEV_ID_ICH9_IFE_G, PCI_ANY_ID, PCI_ANY_ID, 0},
163 { 0x8086, E1000_DEV_ID_ICH9_BM, PCI_ANY_ID, PCI_ANY_ID, 0},
164 { 0x8086, E1000_DEV_ID_82574L, PCI_ANY_ID, PCI_ANY_ID, 0},
165 { 0x8086, E1000_DEV_ID_82574LA, PCI_ANY_ID, PCI_ANY_ID, 0},
166 { 0x8086, E1000_DEV_ID_ICH10_R_BM_LM, PCI_ANY_ID, PCI_ANY_ID, 0},
167 { 0x8086, E1000_DEV_ID_ICH10_R_BM_LF, PCI_ANY_ID, PCI_ANY_ID, 0},
168 { 0x8086, E1000_DEV_ID_ICH10_R_BM_V, PCI_ANY_ID, PCI_ANY_ID, 0},
169 { 0x8086, E1000_DEV_ID_ICH10_D_BM_LM, PCI_ANY_ID, PCI_ANY_ID, 0},
170 { 0x8086, E1000_DEV_ID_ICH10_D_BM_LF, PCI_ANY_ID, PCI_ANY_ID, 0},
171 { 0x8086, E1000_DEV_ID_ICH10_D_BM_V, PCI_ANY_ID, PCI_ANY_ID, 0},
172 { 0x8086, E1000_DEV_ID_PCH_M_HV_LM, PCI_ANY_ID, PCI_ANY_ID, 0},
173 { 0x8086, E1000_DEV_ID_PCH_M_HV_LC, PCI_ANY_ID, PCI_ANY_ID, 0},
174 { 0x8086, E1000_DEV_ID_PCH_D_HV_DM, PCI_ANY_ID, PCI_ANY_ID, 0},
175 { 0x8086, E1000_DEV_ID_PCH_D_HV_DC, PCI_ANY_ID, PCI_ANY_ID, 0},
176 { 0x8086, E1000_DEV_ID_PCH2_LV_LM, PCI_ANY_ID, PCI_ANY_ID, 0},
177 { 0x8086, E1000_DEV_ID_PCH2_LV_V, PCI_ANY_ID, PCI_ANY_ID, 0},
178 { 0x8086, E1000_DEV_ID_PCH_LPT_I217_LM, PCI_ANY_ID, PCI_ANY_ID, 0},
179 { 0x8086, E1000_DEV_ID_PCH_LPT_I217_V, PCI_ANY_ID, PCI_ANY_ID, 0},
180 { 0x8086, E1000_DEV_ID_PCH_LPTLP_I218_LM,
181 PCI_ANY_ID, PCI_ANY_ID, 0},
182 { 0x8086, E1000_DEV_ID_PCH_LPTLP_I218_V,
183 PCI_ANY_ID, PCI_ANY_ID, 0},
184 { 0x8086, E1000_DEV_ID_PCH_I218_LM2, PCI_ANY_ID, PCI_ANY_ID, 0},
185 { 0x8086, E1000_DEV_ID_PCH_I218_V2, PCI_ANY_ID, PCI_ANY_ID, 0},
186 { 0x8086, E1000_DEV_ID_PCH_I218_LM3, PCI_ANY_ID, PCI_ANY_ID, 0},
187 { 0x8086, E1000_DEV_ID_PCH_I218_V3, PCI_ANY_ID, PCI_ANY_ID, 0},
188 { 0x8086, E1000_DEV_ID_PCH_SPT_I219_LM, PCI_ANY_ID, PCI_ANY_ID, 0},
189 { 0x8086, E1000_DEV_ID_PCH_SPT_I219_V, PCI_ANY_ID, PCI_ANY_ID, 0},
190 { 0x8086, E1000_DEV_ID_PCH_SPT_I219_LM2,
191 PCI_ANY_ID, PCI_ANY_ID, 0},
192 { 0x8086, E1000_DEV_ID_PCH_SPT_I219_V2, PCI_ANY_ID, PCI_ANY_ID, 0},
193 { 0x8086, E1000_DEV_ID_PCH_LBG_I219_LM3,
194 PCI_ANY_ID, PCI_ANY_ID, 0},
195 { 0x8086, E1000_DEV_ID_PCH_SPT_I219_LM4,
196 PCI_ANY_ID, PCI_ANY_ID, 0},
197 { 0x8086, E1000_DEV_ID_PCH_SPT_I219_V4, PCI_ANY_ID, PCI_ANY_ID, 0},
198 { 0x8086, E1000_DEV_ID_PCH_SPT_I219_LM5,
199 PCI_ANY_ID, PCI_ANY_ID, 0},
200 { 0x8086, E1000_DEV_ID_PCH_SPT_I219_V5, PCI_ANY_ID, PCI_ANY_ID, 0},
201 { 0x8086, E1000_DEV_ID_PCH_SPT_I219_LM4,
202 PCI_ANY_ID, PCI_ANY_ID, 0},
203 { 0x8086, E1000_DEV_ID_PCH_SPT_I219_V4, PCI_ANY_ID, PCI_ANY_ID, 0},
204 { 0x8086, E1000_DEV_ID_PCH_SPT_I219_LM5,
205 PCI_ANY_ID, PCI_ANY_ID, 0},
206 { 0x8086, E1000_DEV_ID_PCH_SPT_I219_V5, PCI_ANY_ID, PCI_ANY_ID, 0},
207 { 0x8086, E1000_DEV_ID_PCH_CNP_I219_LM6,
208 PCI_ANY_ID, PCI_ANY_ID, 0},
209 { 0x8086, E1000_DEV_ID_PCH_CNP_I219_V6, PCI_ANY_ID, PCI_ANY_ID, 0},
210 { 0x8086, E1000_DEV_ID_PCH_CNP_I219_LM7,
211 PCI_ANY_ID, PCI_ANY_ID, 0},
212 { 0x8086, E1000_DEV_ID_PCH_CNP_I219_V7, PCI_ANY_ID, PCI_ANY_ID, 0},
213 { 0x8086, E1000_DEV_ID_PCH_ICP_I219_LM8,
214 PCI_ANY_ID, PCI_ANY_ID, 0},
215 { 0x8086, E1000_DEV_ID_PCH_ICP_I219_V8, PCI_ANY_ID, PCI_ANY_ID, 0},
216 { 0x8086, E1000_DEV_ID_PCH_ICP_I219_LM9,
217 PCI_ANY_ID, PCI_ANY_ID, 0},
218 { 0x8086, E1000_DEV_ID_PCH_ICP_I219_V9, PCI_ANY_ID, PCI_ANY_ID, 0},
219 /* required last entry */
223 /*********************************************************************
224 * Table of branding strings for all supported NICs.
225 *********************************************************************/
227 static char *em_strings[] = {
228 "Intel(R) PRO/1000 Network Connection"
231 /*********************************************************************
232 * Function prototypes
233 *********************************************************************/
234 static int em_probe(device_t);
235 static int em_attach(device_t);
236 static int em_detach(device_t);
237 static int em_shutdown(device_t);
238 static int em_suspend(device_t);
239 static int em_resume(device_t);
241 static int em_mq_start(struct ifnet *, struct mbuf *);
242 static int em_mq_start_locked(struct ifnet *,
244 static void em_qflush(struct ifnet *);
246 static void em_start(struct ifnet *);
247 static void em_start_locked(struct ifnet *, struct tx_ring *);
249 static int em_ioctl(struct ifnet *, u_long, caddr_t);
250 static void em_init(void *);
251 static void em_init_locked(struct adapter *);
252 static void em_stop(void *);
253 static void em_media_status(struct ifnet *, struct ifmediareq *);
254 static int em_media_change(struct ifnet *);
255 static void em_identify_hardware(struct adapter *);
256 static int em_allocate_pci_resources(struct adapter *);
257 static int em_allocate_legacy(struct adapter *);
258 static int em_allocate_msix(struct adapter *);
259 static int em_allocate_queues(struct adapter *);
260 static int em_setup_msix(struct adapter *);
261 static void em_free_pci_resources(struct adapter *);
262 static void em_local_timer(void *);
263 static void em_reset(struct adapter *);
264 static int em_setup_interface(device_t, struct adapter *);
265 static void em_flush_desc_rings(struct adapter *);
267 static void em_setup_transmit_structures(struct adapter *);
268 static void em_initialize_transmit_unit(struct adapter *);
269 static int em_allocate_transmit_buffers(struct tx_ring *);
270 static void em_free_transmit_structures(struct adapter *);
271 static void em_free_transmit_buffers(struct tx_ring *);
273 static int em_setup_receive_structures(struct adapter *);
274 static int em_allocate_receive_buffers(struct rx_ring *);
275 static void em_initialize_receive_unit(struct adapter *);
276 static void em_free_receive_structures(struct adapter *);
277 static void em_free_receive_buffers(struct rx_ring *);
279 static void em_enable_intr(struct adapter *);
280 static void em_disable_intr(struct adapter *);
281 static void em_update_stats_counters(struct adapter *);
282 static void em_add_hw_stats(struct adapter *adapter);
283 static void em_txeof(struct tx_ring *);
284 static bool em_rxeof(struct rx_ring *, int, int *);
285 #ifndef __NO_STRICT_ALIGNMENT
286 static int em_fixup_rx(struct rx_ring *);
288 static void em_setup_rxdesc(union e1000_rx_desc_extended *,
289 const struct em_rxbuffer *rxbuf);
290 static void em_receive_checksum(uint32_t status, struct mbuf *);
291 static void em_transmit_checksum_setup(struct tx_ring *, struct mbuf *, int,
292 struct ip *, u32 *, u32 *);
293 static void em_tso_setup(struct tx_ring *, struct mbuf *, int, struct ip *,
294 struct tcphdr *, u32 *, u32 *);
295 static void em_set_promisc(struct adapter *);
296 static void em_disable_promisc(struct adapter *);
297 static void em_set_multi(struct adapter *);
298 static void em_update_link_status(struct adapter *);
299 static void em_refresh_mbufs(struct rx_ring *, int);
300 static void em_register_vlan(void *, struct ifnet *, u16);
301 static void em_unregister_vlan(void *, struct ifnet *, u16);
302 static void em_setup_vlan_hw_support(struct adapter *);
303 static int em_xmit(struct tx_ring *, struct mbuf **);
304 static int em_dma_malloc(struct adapter *, bus_size_t,
305 struct em_dma_alloc *, int);
306 static void em_dma_free(struct adapter *, struct em_dma_alloc *);
307 static int em_sysctl_nvm_info(SYSCTL_HANDLER_ARGS);
308 static void em_print_nvm_info(struct adapter *);
309 static int em_sysctl_debug_info(SYSCTL_HANDLER_ARGS);
310 static void em_print_debug_info(struct adapter *);
311 static int em_is_valid_ether_addr(u8 *);
312 static int em_sysctl_int_delay(SYSCTL_HANDLER_ARGS);
313 static void em_add_int_delay_sysctl(struct adapter *, const char *,
314 const char *, struct em_int_delay_info *, int, int);
315 /* Management and WOL Support */
316 static void em_init_manageability(struct adapter *);
317 static void em_release_manageability(struct adapter *);
318 static void em_get_hw_control(struct adapter *);
319 static void em_release_hw_control(struct adapter *);
320 static void em_get_wakeup(device_t);
321 static void em_enable_wakeup(device_t);
322 static int em_enable_phy_wakeup(struct adapter *);
323 static void em_led_func(void *, int);
324 static void em_disable_aspm(struct adapter *);
326 static int em_irq_fast(void *);
329 static void em_msix_tx(void *);
330 static void em_msix_rx(void *);
331 static void em_msix_link(void *);
332 static void em_handle_tx(void *context, int pending);
333 static void em_handle_rx(void *context, int pending);
334 static void em_handle_link(void *context, int pending);
337 static void em_enable_vectors_82574(struct adapter *);
340 static void em_set_sysctl_value(struct adapter *, const char *,
341 const char *, int *, int);
342 static int em_set_flowcntl(SYSCTL_HANDLER_ARGS);
343 static int em_sysctl_eee(SYSCTL_HANDLER_ARGS);
345 static __inline void em_rx_discard(struct rx_ring *, int);
347 #ifdef DEVICE_POLLING
348 static poll_handler_t em_poll;
351 /*********************************************************************
352 * FreeBSD Device Interface Entry Points
353 *********************************************************************/
355 static device_method_t em_methods[] = {
356 /* Device interface */
357 DEVMETHOD(device_probe, em_probe),
358 DEVMETHOD(device_attach, em_attach),
359 DEVMETHOD(device_detach, em_detach),
360 DEVMETHOD(device_shutdown, em_shutdown),
361 DEVMETHOD(device_suspend, em_suspend),
362 DEVMETHOD(device_resume, em_resume),
366 static driver_t em_driver = {
367 "em", em_methods, sizeof(struct adapter),
370 devclass_t em_devclass;
371 DRIVER_MODULE(em, pci, em_driver, em_devclass, 0, 0);
372 MODULE_DEPEND(em, pci, 1, 1, 1);
373 MODULE_DEPEND(em, ether, 1, 1, 1);
375 /*********************************************************************
376 * Tunable default values.
377 *********************************************************************/
379 #define EM_TICKS_TO_USECS(ticks) ((1024 * (ticks) + 500) / 1000)
380 #define EM_USECS_TO_TICKS(usecs) ((1000 * (usecs) + 512) / 1024)
383 #define MAX_INTS_PER_SEC 8000
384 #define DEFAULT_ITR (1000000000/(MAX_INTS_PER_SEC * 256))
386 #define TSO_WORKAROUND 4
388 static SYSCTL_NODE(_hw, OID_AUTO, em, CTLFLAG_RD, 0, "EM driver parameters");
390 static int em_disable_crc_stripping = 0;
391 SYSCTL_INT(_hw_em, OID_AUTO, disable_crc_stripping, CTLFLAG_RDTUN,
392 &em_disable_crc_stripping, 0, "Disable CRC Stripping");
394 static int em_tx_int_delay_dflt = EM_TICKS_TO_USECS(EM_TIDV);
395 static int em_rx_int_delay_dflt = EM_TICKS_TO_USECS(EM_RDTR);
396 TUNABLE_INT("hw.em.tx_int_delay", &em_tx_int_delay_dflt);
397 TUNABLE_INT("hw.em.rx_int_delay", &em_rx_int_delay_dflt);
398 SYSCTL_INT(_hw_em, OID_AUTO, tx_int_delay, CTLFLAG_RDTUN, &em_tx_int_delay_dflt,
399 0, "Default transmit interrupt delay in usecs");
400 SYSCTL_INT(_hw_em, OID_AUTO, rx_int_delay, CTLFLAG_RDTUN, &em_rx_int_delay_dflt,
401 0, "Default receive interrupt delay in usecs");
403 static int em_tx_abs_int_delay_dflt = EM_TICKS_TO_USECS(EM_TADV);
404 static int em_rx_abs_int_delay_dflt = EM_TICKS_TO_USECS(EM_RADV);
405 TUNABLE_INT("hw.em.tx_abs_int_delay", &em_tx_abs_int_delay_dflt);
406 TUNABLE_INT("hw.em.rx_abs_int_delay", &em_rx_abs_int_delay_dflt);
407 SYSCTL_INT(_hw_em, OID_AUTO, tx_abs_int_delay, CTLFLAG_RDTUN,
408 &em_tx_abs_int_delay_dflt, 0,
409 "Default transmit interrupt delay limit in usecs");
410 SYSCTL_INT(_hw_em, OID_AUTO, rx_abs_int_delay, CTLFLAG_RDTUN,
411 &em_rx_abs_int_delay_dflt, 0,
412 "Default receive interrupt delay limit in usecs");
414 static int em_rxd = EM_DEFAULT_RXD;
415 static int em_txd = EM_DEFAULT_TXD;
416 TUNABLE_INT("hw.em.rxd", &em_rxd);
417 TUNABLE_INT("hw.em.txd", &em_txd);
418 SYSCTL_INT(_hw_em, OID_AUTO, rxd, CTLFLAG_RDTUN, &em_rxd, 0,
419 "Number of receive descriptors per queue");
420 SYSCTL_INT(_hw_em, OID_AUTO, txd, CTLFLAG_RDTUN, &em_txd, 0,
421 "Number of transmit descriptors per queue");
423 static int em_smart_pwr_down = FALSE;
424 TUNABLE_INT("hw.em.smart_pwr_down", &em_smart_pwr_down);
425 SYSCTL_INT(_hw_em, OID_AUTO, smart_pwr_down, CTLFLAG_RDTUN, &em_smart_pwr_down,
426 0, "Set to true to leave smart power down enabled on newer adapters");
428 /* Controls whether promiscuous also shows bad packets */
429 static int em_debug_sbp = FALSE;
430 TUNABLE_INT("hw.em.sbp", &em_debug_sbp);
431 SYSCTL_INT(_hw_em, OID_AUTO, sbp, CTLFLAG_RDTUN, &em_debug_sbp, 0,
432 "Show bad packets in promiscuous mode");
434 static int em_enable_msix = TRUE;
435 TUNABLE_INT("hw.em.enable_msix", &em_enable_msix);
436 SYSCTL_INT(_hw_em, OID_AUTO, enable_msix, CTLFLAG_RDTUN, &em_enable_msix, 0,
437 "Enable MSI-X interrupts");
440 static int em_num_queues = 1;
441 TUNABLE_INT("hw.em.num_queues", &em_num_queues);
442 SYSCTL_INT(_hw_em, OID_AUTO, num_queues, CTLFLAG_RDTUN, &em_num_queues, 0,
443 "82574 only: Number of queues to configure, 0 indicates autoconfigure");
447 ** Global variable to store last used CPU when binding queues
448 ** to CPUs in igb_allocate_msix. Starts at CPU_FIRST and increments when a
449 ** queue is bound to a cpu.
451 static int em_last_bind_cpu = -1;
453 /* How many packets rxeof tries to clean at a time */
454 static int em_rx_process_limit = 100;
455 TUNABLE_INT("hw.em.rx_process_limit", &em_rx_process_limit);
456 SYSCTL_INT(_hw_em, OID_AUTO, rx_process_limit, CTLFLAG_RDTUN,
457 &em_rx_process_limit, 0,
458 "Maximum number of received packets to process "
459 "at a time, -1 means unlimited");
461 /* Energy efficient ethernet - default to OFF */
462 static int eee_setting = 1;
463 TUNABLE_INT("hw.em.eee_setting", &eee_setting);
464 SYSCTL_INT(_hw_em, OID_AUTO, eee_setting, CTLFLAG_RDTUN, &eee_setting, 0,
465 "Enable Energy Efficient Ethernet");
467 /* Global used in WOL setup with multiport cards */
468 static int global_quad_port_a = 0;
470 #ifdef DEV_NETMAP /* see ixgbe.c for details */
471 #include <dev/netmap/if_em_netmap.h>
472 #endif /* DEV_NETMAP */
474 /*********************************************************************
475 * Device identification routine
477 * em_probe determines if the driver should be loaded on
478 * adapter based on PCI vendor/device id of the adapter.
480 * return BUS_PROBE_DEFAULT on success, positive on failure
481 *********************************************************************/
484 em_probe(device_t dev)
486 char adapter_name[60];
487 uint16_t pci_vendor_id = 0;
488 uint16_t pci_device_id = 0;
489 uint16_t pci_subvendor_id = 0;
490 uint16_t pci_subdevice_id = 0;
491 em_vendor_info_t *ent;
493 INIT_DEBUGOUT("em_probe: begin");
495 pci_vendor_id = pci_get_vendor(dev);
496 if (pci_vendor_id != EM_VENDOR_ID)
499 pci_device_id = pci_get_device(dev);
500 pci_subvendor_id = pci_get_subvendor(dev);
501 pci_subdevice_id = pci_get_subdevice(dev);
503 ent = em_vendor_info_array;
504 while (ent->vendor_id != 0) {
505 if ((pci_vendor_id == ent->vendor_id) &&
506 (pci_device_id == ent->device_id) &&
508 ((pci_subvendor_id == ent->subvendor_id) ||
509 (ent->subvendor_id == PCI_ANY_ID)) &&
511 ((pci_subdevice_id == ent->subdevice_id) ||
512 (ent->subdevice_id == PCI_ANY_ID))) {
513 sprintf(adapter_name, "%s %s",
514 em_strings[ent->index],
516 device_set_desc_copy(dev, adapter_name);
517 return (BUS_PROBE_DEFAULT);
525 /*********************************************************************
526 * Device initialization routine
528 * The attach entry point is called when the driver is being loaded.
529 * This routine identifies the type of hardware, allocates all resources
530 * and initializes the hardware.
532 * return 0 on success, positive on failure
533 *********************************************************************/
536 em_attach(device_t dev)
538 struct adapter *adapter;
542 INIT_DEBUGOUT("em_attach: begin");
544 if (resource_disabled("em", device_get_unit(dev))) {
545 device_printf(dev, "Disabled by device hint\n");
549 adapter = device_get_softc(dev);
550 adapter->dev = adapter->osdep.dev = dev;
552 EM_CORE_LOCK_INIT(adapter, device_get_nameunit(dev));
555 SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
556 SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
557 OID_AUTO, "nvm", CTLTYPE_INT|CTLFLAG_RW, adapter, 0,
558 em_sysctl_nvm_info, "I", "NVM Information");
560 SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
561 SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
562 OID_AUTO, "debug", CTLTYPE_INT|CTLFLAG_RW, adapter, 0,
563 em_sysctl_debug_info, "I", "Debug Information");
565 SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
566 SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
567 OID_AUTO, "fc", CTLTYPE_INT|CTLFLAG_RW, adapter, 0,
568 em_set_flowcntl, "I", "Flow Control");
570 callout_init_mtx(&adapter->timer, &adapter->core_mtx, 0);
572 /* Determine hardware and mac info */
573 em_identify_hardware(adapter);
575 /* Setup PCI resources */
576 if (em_allocate_pci_resources(adapter)) {
577 device_printf(dev, "Allocation of PCI resources failed\n");
583 ** For ICH8 and family we need to
584 ** map the flash memory, and this
585 ** must happen after the MAC is
588 if ((hw->mac.type == e1000_ich8lan) ||
589 (hw->mac.type == e1000_ich9lan) ||
590 (hw->mac.type == e1000_ich10lan) ||
591 (hw->mac.type == e1000_pchlan) ||
592 (hw->mac.type == e1000_pch2lan) ||
593 (hw->mac.type == e1000_pch_lpt)) {
594 int rid = EM_BAR_TYPE_FLASH;
595 adapter->flash = bus_alloc_resource_any(dev,
596 SYS_RES_MEMORY, &rid, RF_ACTIVE);
597 if (adapter->flash == NULL) {
598 device_printf(dev, "Mapping of Flash failed\n");
602 /* This is used in the shared code */
603 hw->flash_address = (u8 *)adapter->flash;
604 adapter->osdep.flash_bus_space_tag =
605 rman_get_bustag(adapter->flash);
606 adapter->osdep.flash_bus_space_handle =
607 rman_get_bushandle(adapter->flash);
610 ** In the new SPT device flash is not a
611 ** seperate BAR, rather it is also in BAR0,
612 ** so use the same tag and an offset handle for the
613 ** FLASH read/write macros in the shared code.
615 else if (hw->mac.type >= e1000_pch_spt) {
616 adapter->osdep.flash_bus_space_tag =
617 adapter->osdep.mem_bus_space_tag;
618 adapter->osdep.flash_bus_space_handle =
619 adapter->osdep.mem_bus_space_handle
620 + E1000_FLASH_BASE_ADDR;
623 /* Do Shared Code initialization */
624 error = e1000_setup_init_funcs(hw, TRUE);
626 device_printf(dev, "Setup of Shared code failed, error %d\n",
633 * Setup MSI/X or MSI if PCI Express
635 adapter->msix = em_setup_msix(adapter);
637 e1000_get_bus_info(hw);
639 /* Set up some sysctls for the tunable interrupt delays */
640 em_add_int_delay_sysctl(adapter, "rx_int_delay",
641 "receive interrupt delay in usecs", &adapter->rx_int_delay,
642 E1000_REGISTER(hw, E1000_RDTR), em_rx_int_delay_dflt);
643 em_add_int_delay_sysctl(adapter, "tx_int_delay",
644 "transmit interrupt delay in usecs", &adapter->tx_int_delay,
645 E1000_REGISTER(hw, E1000_TIDV), em_tx_int_delay_dflt);
646 em_add_int_delay_sysctl(adapter, "rx_abs_int_delay",
647 "receive interrupt delay limit in usecs",
648 &adapter->rx_abs_int_delay,
649 E1000_REGISTER(hw, E1000_RADV),
650 em_rx_abs_int_delay_dflt);
651 em_add_int_delay_sysctl(adapter, "tx_abs_int_delay",
652 "transmit interrupt delay limit in usecs",
653 &adapter->tx_abs_int_delay,
654 E1000_REGISTER(hw, E1000_TADV),
655 em_tx_abs_int_delay_dflt);
656 em_add_int_delay_sysctl(adapter, "itr",
657 "interrupt delay limit in usecs/4",
659 E1000_REGISTER(hw, E1000_ITR),
662 /* Sysctl for limiting the amount of work done in the taskqueue */
663 em_set_sysctl_value(adapter, "rx_processing_limit",
664 "max number of rx packets to process", &adapter->rx_process_limit,
665 em_rx_process_limit);
668 * Validate number of transmit and receive descriptors. It
669 * must not exceed hardware maximum, and must be multiple
670 * of E1000_DBA_ALIGN.
672 if (((em_txd * sizeof(struct e1000_tx_desc)) % EM_DBA_ALIGN) != 0 ||
673 (em_txd > EM_MAX_TXD) || (em_txd < EM_MIN_TXD)) {
674 device_printf(dev, "Using %d TX descriptors instead of %d!\n",
675 EM_DEFAULT_TXD, em_txd);
676 adapter->num_tx_desc = EM_DEFAULT_TXD;
678 adapter->num_tx_desc = em_txd;
680 if (((em_rxd * sizeof(union e1000_rx_desc_extended)) % EM_DBA_ALIGN) != 0 ||
681 (em_rxd > EM_MAX_RXD) || (em_rxd < EM_MIN_RXD)) {
682 device_printf(dev, "Using %d RX descriptors instead of %d!\n",
683 EM_DEFAULT_RXD, em_rxd);
684 adapter->num_rx_desc = EM_DEFAULT_RXD;
686 adapter->num_rx_desc = em_rxd;
688 hw->mac.autoneg = DO_AUTO_NEG;
689 hw->phy.autoneg_wait_to_complete = FALSE;
690 hw->phy.autoneg_advertised = AUTONEG_ADV_DEFAULT;
693 if (hw->phy.media_type == e1000_media_type_copper) {
694 hw->phy.mdix = AUTO_ALL_MODES;
695 hw->phy.disable_polarity_correction = FALSE;
696 hw->phy.ms_type = EM_MASTER_SLAVE;
700 * Set the frame limits assuming
701 * standard ethernet sized frames.
703 adapter->hw.mac.max_frame_size =
704 ETHERMTU + ETHER_HDR_LEN + ETHERNET_FCS_SIZE;
707 * This controls when hardware reports transmit completion
710 hw->mac.report_tx_early = 1;
713 ** Get queue/ring memory
715 if (em_allocate_queues(adapter)) {
720 /* Allocate multicast array memory. */
721 adapter->mta = malloc(sizeof(u8) * ETH_ADDR_LEN *
722 MAX_NUM_MULTICAST_ADDRESSES, M_DEVBUF, M_NOWAIT);
723 if (adapter->mta == NULL) {
724 device_printf(dev, "Can not allocate multicast setup array\n");
729 /* Check SOL/IDER usage */
730 if (e1000_check_reset_block(hw))
731 device_printf(dev, "PHY reset is blocked"
732 " due to SOL/IDER session.\n");
734 /* Sysctl for setting Energy Efficient Ethernet */
735 hw->dev_spec.ich8lan.eee_disable = eee_setting;
736 SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
737 SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
738 OID_AUTO, "eee_control", CTLTYPE_INT|CTLFLAG_RW,
739 adapter, 0, em_sysctl_eee, "I",
740 "Disable Energy Efficient Ethernet");
743 ** Start from a known state, this is
744 ** important in reading the nvm and
750 /* Make sure we have a good EEPROM before we read from it */
751 if (e1000_validate_nvm_checksum(hw) < 0) {
753 ** Some PCI-E parts fail the first check due to
754 ** the link being in sleep state, call it again,
755 ** if it fails a second time its a real issue.
757 if (e1000_validate_nvm_checksum(hw) < 0) {
759 "The EEPROM Checksum Is Not Valid\n");
765 /* Copy the permanent MAC address out of the EEPROM */
766 if (e1000_read_mac_addr(hw) < 0) {
767 device_printf(dev, "EEPROM read error while reading MAC"
773 if (!em_is_valid_ether_addr(hw->mac.addr)) {
774 device_printf(dev, "Invalid MAC address\n");
779 /* Disable ULP support */
780 e1000_disable_ulp_lpt_lp(hw, TRUE);
783 ** Do interrupt configuration
785 if (adapter->msix > 1) /* Do MSIX */
786 error = em_allocate_msix(adapter);
787 else /* MSI or Legacy */
788 error = em_allocate_legacy(adapter);
793 * Get Wake-on-Lan and Management info for later use
797 /* Setup OS specific network interface */
798 if (em_setup_interface(dev, adapter) != 0)
803 /* Initialize statistics */
804 em_update_stats_counters(adapter);
806 hw->mac.get_link_status = 1;
807 em_update_link_status(adapter);
809 /* Register for VLAN events */
810 adapter->vlan_attach = EVENTHANDLER_REGISTER(vlan_config,
811 em_register_vlan, adapter, EVENTHANDLER_PRI_FIRST);
812 adapter->vlan_detach = EVENTHANDLER_REGISTER(vlan_unconfig,
813 em_unregister_vlan, adapter, EVENTHANDLER_PRI_FIRST);
815 em_add_hw_stats(adapter);
817 /* Non-AMT based hardware can now take control from firmware */
818 if (adapter->has_manage && !adapter->has_amt)
819 em_get_hw_control(adapter);
821 /* Tell the stack that the interface is not active */
822 adapter->ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
823 adapter->ifp->if_drv_flags |= IFF_DRV_OACTIVE;
825 adapter->led_dev = led_create(em_led_func, adapter,
826 device_get_nameunit(dev));
828 em_netmap_attach(adapter);
829 #endif /* DEV_NETMAP */
831 INIT_DEBUGOUT("em_attach: end");
836 em_free_transmit_structures(adapter);
837 em_free_receive_structures(adapter);
838 em_release_hw_control(adapter);
839 if (adapter->ifp != NULL)
840 if_free(adapter->ifp);
842 em_free_pci_resources(adapter);
843 free(adapter->mta, M_DEVBUF);
844 EM_CORE_LOCK_DESTROY(adapter);
849 /*********************************************************************
850 * Device removal routine
852 * The detach entry point is called when the driver is being removed.
853 * This routine stops the adapter and deallocates all the resources
854 * that were allocated for driver operation.
856 * return 0 on success, positive on failure
857 *********************************************************************/
860 em_detach(device_t dev)
862 struct adapter *adapter = device_get_softc(dev);
863 struct ifnet *ifp = adapter->ifp;
865 INIT_DEBUGOUT("em_detach: begin");
867 /* Make sure VLANS are not using driver */
868 if (adapter->ifp->if_vlantrunk != NULL) {
869 device_printf(dev,"Vlan in use, detach first\n");
873 #ifdef DEVICE_POLLING
874 if (ifp->if_capenable & IFCAP_POLLING)
875 ether_poll_deregister(ifp);
878 if (adapter->led_dev != NULL)
879 led_destroy(adapter->led_dev);
881 EM_CORE_LOCK(adapter);
882 adapter->in_detach = 1;
884 EM_CORE_UNLOCK(adapter);
885 EM_CORE_LOCK_DESTROY(adapter);
887 e1000_phy_hw_reset(&adapter->hw);
889 em_release_manageability(adapter);
890 em_release_hw_control(adapter);
892 /* Unregister VLAN events */
893 if (adapter->vlan_attach != NULL)
894 EVENTHANDLER_DEREGISTER(vlan_config, adapter->vlan_attach);
895 if (adapter->vlan_detach != NULL)
896 EVENTHANDLER_DEREGISTER(vlan_unconfig, adapter->vlan_detach);
898 ether_ifdetach(adapter->ifp);
899 callout_drain(&adapter->timer);
903 #endif /* DEV_NETMAP */
905 em_free_pci_resources(adapter);
906 bus_generic_detach(dev);
909 em_free_transmit_structures(adapter);
910 em_free_receive_structures(adapter);
912 em_release_hw_control(adapter);
913 free(adapter->mta, M_DEVBUF);
918 /*********************************************************************
920 * Shutdown entry point
922 **********************************************************************/
925 em_shutdown(device_t dev)
927 return em_suspend(dev);
931 * Suspend/resume device methods.
934 em_suspend(device_t dev)
936 struct adapter *adapter = device_get_softc(dev);
938 EM_CORE_LOCK(adapter);
940 em_release_manageability(adapter);
941 em_release_hw_control(adapter);
942 em_enable_wakeup(dev);
944 EM_CORE_UNLOCK(adapter);
946 return bus_generic_suspend(dev);
950 em_resume(device_t dev)
952 struct adapter *adapter = device_get_softc(dev);
953 struct tx_ring *txr = adapter->tx_rings;
954 struct ifnet *ifp = adapter->ifp;
956 EM_CORE_LOCK(adapter);
957 if (adapter->hw.mac.type == e1000_pch2lan)
958 e1000_resume_workarounds_pchlan(&adapter->hw);
959 em_init_locked(adapter);
960 em_init_manageability(adapter);
962 if ((ifp->if_flags & IFF_UP) &&
963 (ifp->if_drv_flags & IFF_DRV_RUNNING) && adapter->link_active) {
964 for (int i = 0; i < adapter->num_queues; i++, txr++) {
967 if (!drbr_empty(ifp, txr->br))
968 em_mq_start_locked(ifp, txr);
970 if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
971 em_start_locked(ifp, txr);
976 EM_CORE_UNLOCK(adapter);
978 return bus_generic_resume(dev);
982 #ifndef EM_MULTIQUEUE
984 em_start_locked(struct ifnet *ifp, struct tx_ring *txr)
986 struct adapter *adapter = ifp->if_softc;
989 EM_TX_LOCK_ASSERT(txr);
991 if ((ifp->if_drv_flags & (IFF_DRV_RUNNING|IFF_DRV_OACTIVE)) !=
995 if (!adapter->link_active)
998 while (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) {
999 /* Call cleanup if number of TX descriptors low */
1000 if (txr->tx_avail <= EM_TX_CLEANUP_THRESHOLD)
1002 if (txr->tx_avail < EM_MAX_SCATTER) {
1003 ifp->if_drv_flags |= IFF_DRV_OACTIVE;
1006 IFQ_DRV_DEQUEUE(&ifp->if_snd, m_head);
1010 * Encapsulation can modify our pointer, and or make it
1011 * NULL on failure. In that event, we can't requeue.
1013 if (em_xmit(txr, &m_head)) {
1016 IFQ_DRV_PREPEND(&ifp->if_snd, m_head);
1020 /* Mark the queue as having work */
1021 if (txr->busy == EM_TX_IDLE)
1022 txr->busy = EM_TX_BUSY;
1024 /* Send a copy of the frame to the BPF listener */
1025 ETHER_BPF_MTAP(ifp, m_head);
1033 em_start(struct ifnet *ifp)
1035 struct adapter *adapter = ifp->if_softc;
1036 struct tx_ring *txr = adapter->tx_rings;
1038 if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1040 em_start_locked(ifp, txr);
1045 #else /* EM_MULTIQUEUE */
1046 /*********************************************************************
1047 * Multiqueue Transmit routines
1049 * em_mq_start is called by the stack to initiate a transmit.
1050 * however, if busy the driver can queue the request rather
1051 * than do an immediate send. It is this that is an advantage
1052 * in this driver, rather than also having multiple tx queues.
1053 **********************************************************************/
1055 ** Multiqueue capable stack interface
1058 em_mq_start(struct ifnet *ifp, struct mbuf *m)
1060 struct adapter *adapter = ifp->if_softc;
1061 struct tx_ring *txr = adapter->tx_rings;
1062 unsigned int i, error;
1064 if (M_HASHTYPE_GET(m) != M_HASHTYPE_NONE)
1065 i = m->m_pkthdr.flowid % adapter->num_queues;
1067 i = curcpu % adapter->num_queues;
1069 txr = &adapter->tx_rings[i];
1071 error = drbr_enqueue(ifp, txr->br, m);
1075 if (EM_TX_TRYLOCK(txr)) {
1076 em_mq_start_locked(ifp, txr);
1079 taskqueue_enqueue(txr->tq, &txr->tx_task);
1085 em_mq_start_locked(struct ifnet *ifp, struct tx_ring *txr)
1087 struct adapter *adapter = txr->adapter;
1089 int err = 0, enq = 0;
1091 if ((ifp->if_drv_flags & (IFF_DRV_RUNNING | IFF_DRV_OACTIVE)) !=
1092 IFF_DRV_RUNNING || adapter->link_active == 0) {
1096 /* Process the queue */
1097 while ((next = drbr_peek(ifp, txr->br)) != NULL) {
1098 if ((err = em_xmit(txr, &next)) != 0) {
1100 /* It was freed, move forward */
1101 drbr_advance(ifp, txr->br);
1104 * Still have one left, it may not be
1105 * the same since the transmit function
1106 * may have changed it.
1108 drbr_putback(ifp, txr->br, next);
1112 drbr_advance(ifp, txr->br);
1114 ifp->if_obytes += next->m_pkthdr.len;
1115 if (next->m_flags & M_MCAST)
1117 ETHER_BPF_MTAP(ifp, next);
1118 if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
1122 /* Mark the queue as having work */
1123 if ((enq > 0) && (txr->busy == EM_TX_IDLE))
1124 txr->busy = EM_TX_BUSY;
1126 if (txr->tx_avail < EM_MAX_SCATTER)
1128 if (txr->tx_avail < EM_MAX_SCATTER) {
1129 ifp->if_drv_flags |= IFF_DRV_OACTIVE;
1135 ** Flush all ring buffers
1138 em_qflush(struct ifnet *ifp)
1140 struct adapter *adapter = ifp->if_softc;
1141 struct tx_ring *txr = adapter->tx_rings;
1144 for (int i = 0; i < adapter->num_queues; i++, txr++) {
1146 while ((m = buf_ring_dequeue_sc(txr->br)) != NULL)
1152 #endif /* EM_MULTIQUEUE */
1154 /*********************************************************************
1157 * em_ioctl is called when the user wants to configure the
1160 * return 0 on success, positive on failure
1161 **********************************************************************/
1164 em_ioctl(struct ifnet *ifp, u_long command, caddr_t data)
1166 struct adapter *adapter = ifp->if_softc;
1167 struct ifreq *ifr = (struct ifreq *)data;
1168 #if defined(INET) || defined(INET6)
1169 struct ifaddr *ifa = (struct ifaddr *)data;
1171 bool avoid_reset = FALSE;
1174 if (adapter->in_detach)
1180 if (ifa->ifa_addr->sa_family == AF_INET)
1184 if (ifa->ifa_addr->sa_family == AF_INET6)
1188 ** Calling init results in link renegotiation,
1189 ** so we avoid doing it when possible.
1192 ifp->if_flags |= IFF_UP;
1193 if (!(ifp->if_drv_flags & IFF_DRV_RUNNING))
1196 if (!(ifp->if_flags & IFF_NOARP))
1197 arp_ifinit(ifp, ifa);
1200 error = ether_ioctl(ifp, command, data);
1206 IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFMTU (Set Interface MTU)");
1208 EM_CORE_LOCK(adapter);
1209 switch (adapter->hw.mac.type) {
1213 case e1000_ich10lan:
1220 case e1000_80003es2lan: /* 9K Jumbo Frame size */
1221 max_frame_size = 9234;
1224 max_frame_size = 4096;
1226 /* Adapters that do not support jumbo frames */
1228 max_frame_size = ETHER_MAX_LEN;
1231 max_frame_size = MAX_JUMBO_FRAME_SIZE;
1233 if (ifr->ifr_mtu > max_frame_size - ETHER_HDR_LEN -
1235 EM_CORE_UNLOCK(adapter);
1240 ifp->if_mtu = ifr->ifr_mtu;
1241 adapter->hw.mac.max_frame_size =
1242 ifp->if_mtu + ETHER_HDR_LEN + ETHER_CRC_LEN;
1243 if (ifp->if_drv_flags & IFF_DRV_RUNNING)
1244 em_init_locked(adapter);
1245 EM_CORE_UNLOCK(adapter);
1249 IOCTL_DEBUGOUT("ioctl rcv'd:\
1250 SIOCSIFFLAGS (Set Interface Flags)");
1251 EM_CORE_LOCK(adapter);
1252 if (ifp->if_flags & IFF_UP) {
1253 if ((ifp->if_drv_flags & IFF_DRV_RUNNING)) {
1254 if ((ifp->if_flags ^ adapter->if_flags) &
1255 (IFF_PROMISC | IFF_ALLMULTI)) {
1256 em_disable_promisc(adapter);
1257 em_set_promisc(adapter);
1260 em_init_locked(adapter);
1262 if (ifp->if_drv_flags & IFF_DRV_RUNNING)
1264 adapter->if_flags = ifp->if_flags;
1265 EM_CORE_UNLOCK(adapter);
1269 IOCTL_DEBUGOUT("ioctl rcv'd: SIOC(ADD|DEL)MULTI");
1270 if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1271 EM_CORE_LOCK(adapter);
1272 em_disable_intr(adapter);
1273 em_set_multi(adapter);
1274 #ifdef DEVICE_POLLING
1275 if (!(ifp->if_capenable & IFCAP_POLLING))
1277 em_enable_intr(adapter);
1278 EM_CORE_UNLOCK(adapter);
1282 /* Check SOL/IDER usage */
1283 EM_CORE_LOCK(adapter);
1284 if (e1000_check_reset_block(&adapter->hw)) {
1285 EM_CORE_UNLOCK(adapter);
1286 device_printf(adapter->dev, "Media change is"
1287 " blocked due to SOL/IDER session.\n");
1290 EM_CORE_UNLOCK(adapter);
1293 IOCTL_DEBUGOUT("ioctl rcv'd: \
1294 SIOCxIFMEDIA (Get/Set Interface Media)");
1295 error = ifmedia_ioctl(ifp, ifr, &adapter->media, command);
1301 IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFCAP (Set Capabilities)");
1303 mask = ifr->ifr_reqcap ^ ifp->if_capenable;
1304 #ifdef DEVICE_POLLING
1305 if (mask & IFCAP_POLLING) {
1306 if (ifr->ifr_reqcap & IFCAP_POLLING) {
1307 error = ether_poll_register(em_poll, ifp);
1310 EM_CORE_LOCK(adapter);
1311 em_disable_intr(adapter);
1312 ifp->if_capenable |= IFCAP_POLLING;
1313 EM_CORE_UNLOCK(adapter);
1315 error = ether_poll_deregister(ifp);
1316 /* Enable interrupt even in error case */
1317 EM_CORE_LOCK(adapter);
1318 em_enable_intr(adapter);
1319 ifp->if_capenable &= ~IFCAP_POLLING;
1320 EM_CORE_UNLOCK(adapter);
1324 if (mask & IFCAP_HWCSUM) {
1325 ifp->if_capenable ^= IFCAP_HWCSUM;
1328 if (mask & IFCAP_TSO4) {
1329 ifp->if_capenable ^= IFCAP_TSO4;
1332 if (mask & IFCAP_VLAN_HWTAGGING) {
1333 ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING;
1336 if (mask & IFCAP_VLAN_HWFILTER) {
1337 ifp->if_capenable ^= IFCAP_VLAN_HWFILTER;
1340 if (mask & IFCAP_VLAN_HWTSO) {
1341 ifp->if_capenable ^= IFCAP_VLAN_HWTSO;
1344 if ((mask & IFCAP_WOL) &&
1345 (ifp->if_capabilities & IFCAP_WOL) != 0) {
1346 if (mask & IFCAP_WOL_MCAST)
1347 ifp->if_capenable ^= IFCAP_WOL_MCAST;
1348 if (mask & IFCAP_WOL_MAGIC)
1349 ifp->if_capenable ^= IFCAP_WOL_MAGIC;
1351 if (reinit && (ifp->if_drv_flags & IFF_DRV_RUNNING))
1353 VLAN_CAPABILITIES(ifp);
1358 error = ether_ioctl(ifp, command, data);
1366 /*********************************************************************
1369 * This routine is used in two ways. It is used by the stack as
1370 * init entry point in network interface structure. It is also used
1371 * by the driver as a hw/sw initialization routine to get to a
1374 * return 0 on success, positive on failure
1375 **********************************************************************/
1378 em_init_locked(struct adapter *adapter)
1380 struct ifnet *ifp = adapter->ifp;
1381 device_t dev = adapter->dev;
1383 INIT_DEBUGOUT("em_init: begin");
1385 EM_CORE_LOCK_ASSERT(adapter);
1387 em_disable_intr(adapter);
1388 callout_stop(&adapter->timer);
1390 /* Get the latest mac address, User can use a LAA */
1391 bcopy(IF_LLADDR(adapter->ifp), adapter->hw.mac.addr,
1394 /* Put the address into the Receive Address Array */
1395 e1000_rar_set(&adapter->hw, adapter->hw.mac.addr, 0);
1398 * With the 82571 adapter, RAR[0] may be overwritten
1399 * when the other port is reset, we make a duplicate
1400 * in RAR[14] for that eventuality, this assures
1401 * the interface continues to function.
1403 if (adapter->hw.mac.type == e1000_82571) {
1404 e1000_set_laa_state_82571(&adapter->hw, TRUE);
1405 e1000_rar_set(&adapter->hw, adapter->hw.mac.addr,
1406 E1000_RAR_ENTRIES - 1);
1409 /* Initialize the hardware */
1411 em_update_link_status(adapter);
1413 /* Setup VLAN support, basic and offload if available */
1414 E1000_WRITE_REG(&adapter->hw, E1000_VET, ETHERTYPE_VLAN);
1416 /* Set hardware offload abilities */
1417 if (ifp->if_capenable & IFCAP_TXCSUM)
1418 ifp->if_hwassist |= (CSUM_TCP | CSUM_UDP);
1420 ifp->if_hwassist &= ~(CSUM_TCP | CSUM_UDP);
1422 /* Configure for OS presence */
1423 em_init_manageability(adapter);
1425 /* Prepare transmit descriptors and buffers */
1426 em_setup_transmit_structures(adapter);
1427 em_initialize_transmit_unit(adapter);
1429 /* Setup Multicast table */
1430 em_set_multi(adapter);
1433 ** Figure out the desired mbuf
1434 ** pool for doing jumbos
1436 if (adapter->hw.mac.max_frame_size <= 2048)
1437 adapter->rx_mbuf_sz = MCLBYTES;
1438 #ifndef CONTIGMALLOC_WORKS
1440 adapter->rx_mbuf_sz = MJUMPAGESIZE;
1442 else if (adapter->hw.mac.max_frame_size <= 4096)
1443 adapter->rx_mbuf_sz = MJUMPAGESIZE;
1445 adapter->rx_mbuf_sz = MJUM9BYTES;
1448 /* Prepare receive descriptors and buffers */
1449 if (em_setup_receive_structures(adapter)) {
1450 device_printf(dev, "Could not setup receive structures\n");
1454 em_initialize_receive_unit(adapter);
1456 /* Use real VLAN Filter support? */
1457 if (ifp->if_capenable & IFCAP_VLAN_HWTAGGING) {
1458 if (ifp->if_capenable & IFCAP_VLAN_HWFILTER)
1459 /* Use real VLAN Filter support */
1460 em_setup_vlan_hw_support(adapter);
1463 ctrl = E1000_READ_REG(&adapter->hw, E1000_CTRL);
1464 ctrl |= E1000_CTRL_VME;
1465 E1000_WRITE_REG(&adapter->hw, E1000_CTRL, ctrl);
1469 /* Don't lose promiscuous settings */
1470 em_set_promisc(adapter);
1472 /* Set the interface as ACTIVE */
1473 ifp->if_drv_flags |= IFF_DRV_RUNNING;
1474 ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
1476 callout_reset(&adapter->timer, hz, em_local_timer, adapter);
1477 e1000_clear_hw_cntrs_base_generic(&adapter->hw);
1479 /* MSI/X configuration for 82574 */
1480 if (adapter->hw.mac.type == e1000_82574) {
1482 tmp = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
1483 tmp |= E1000_CTRL_EXT_PBA_CLR;
1484 E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT, tmp);
1485 /* Set the IVAR - interrupt vector routing. */
1486 E1000_WRITE_REG(&adapter->hw, E1000_IVAR, adapter->ivars);
1489 #ifdef DEVICE_POLLING
1491 * Only enable interrupts if we are not polling, make sure
1492 * they are off otherwise.
1494 if (ifp->if_capenable & IFCAP_POLLING)
1495 em_disable_intr(adapter);
1497 #endif /* DEVICE_POLLING */
1498 em_enable_intr(adapter);
1500 /* AMT based hardware can now take control from firmware */
1501 if (adapter->has_manage && adapter->has_amt)
1502 em_get_hw_control(adapter);
1508 struct adapter *adapter = arg;
1510 EM_CORE_LOCK(adapter);
1511 em_init_locked(adapter);
1512 EM_CORE_UNLOCK(adapter);
1516 #ifdef DEVICE_POLLING
1517 /*********************************************************************
1519 * Legacy polling routine: note this only works with single queue
1521 *********************************************************************/
1523 em_poll(struct ifnet *ifp, enum poll_cmd cmd, int count)
1525 struct adapter *adapter = ifp->if_softc;
1526 struct tx_ring *txr = adapter->tx_rings;
1527 struct rx_ring *rxr = adapter->rx_rings;
1531 EM_CORE_LOCK(adapter);
1532 if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) {
1533 EM_CORE_UNLOCK(adapter);
1537 if (cmd == POLL_AND_CHECK_STATUS) {
1538 reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1539 if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
1540 callout_stop(&adapter->timer);
1541 adapter->hw.mac.get_link_status = 1;
1542 em_update_link_status(adapter);
1543 callout_reset(&adapter->timer, hz,
1544 em_local_timer, adapter);
1547 EM_CORE_UNLOCK(adapter);
1549 em_rxeof(rxr, count, &rx_done);
1553 #ifdef EM_MULTIQUEUE
1554 if (!drbr_empty(ifp, txr->br))
1555 em_mq_start_locked(ifp, txr);
1557 if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1558 em_start_locked(ifp, txr);
1564 #endif /* DEVICE_POLLING */
1567 /*********************************************************************
1569 * Fast Legacy/MSI Combined Interrupt Service routine
1571 *********************************************************************/
1573 em_irq_fast(void *arg)
1575 struct adapter *adapter = arg;
1581 reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1584 if (reg_icr == 0xffffffff)
1585 return FILTER_STRAY;
1587 /* Definitely not our interrupt. */
1589 return FILTER_STRAY;
1592 * Starting with the 82571 chip, bit 31 should be used to
1593 * determine whether the interrupt belongs to us.
1595 if (adapter->hw.mac.type >= e1000_82571 &&
1596 (reg_icr & E1000_ICR_INT_ASSERTED) == 0)
1597 return FILTER_STRAY;
1599 em_disable_intr(adapter);
1600 taskqueue_enqueue(adapter->tq, &adapter->que_task);
1602 /* Link status change */
1603 if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
1604 adapter->hw.mac.get_link_status = 1;
1605 taskqueue_enqueue(taskqueue_fast, &adapter->link_task);
1608 if (reg_icr & E1000_ICR_RXO)
1609 adapter->rx_overruns++;
1610 return FILTER_HANDLED;
1613 /* Combined RX/TX handler, used by Legacy and MSI */
1615 em_handle_que(void *context, int pending)
1617 struct adapter *adapter = context;
1618 struct ifnet *ifp = adapter->ifp;
1619 struct tx_ring *txr = adapter->tx_rings;
1620 struct rx_ring *rxr = adapter->rx_rings;
1622 if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1623 bool more = em_rxeof(rxr, adapter->rx_process_limit, NULL);
1627 #ifdef EM_MULTIQUEUE
1628 if (!drbr_empty(ifp, txr->br))
1629 em_mq_start_locked(ifp, txr);
1631 if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1632 em_start_locked(ifp, txr);
1636 taskqueue_enqueue(adapter->tq, &adapter->que_task);
1641 em_enable_intr(adapter);
1646 /*********************************************************************
1648 * MSIX Interrupt Service Routines
1650 **********************************************************************/
1652 em_msix_tx(void *arg)
1654 struct tx_ring *txr = arg;
1655 struct adapter *adapter = txr->adapter;
1656 struct ifnet *ifp = adapter->ifp;
1661 #ifdef EM_MULTIQUEUE
1662 if (!drbr_empty(ifp, txr->br))
1663 em_mq_start_locked(ifp, txr);
1665 if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1666 em_start_locked(ifp, txr);
1669 /* Reenable this interrupt */
1670 E1000_WRITE_REG(&adapter->hw, E1000_IMS, txr->ims);
1675 /*********************************************************************
1677 * MSIX RX Interrupt Service routine
1679 **********************************************************************/
1682 em_msix_rx(void *arg)
1684 struct rx_ring *rxr = arg;
1685 struct adapter *adapter = rxr->adapter;
1689 if (!(adapter->ifp->if_drv_flags & IFF_DRV_RUNNING))
1691 more = em_rxeof(rxr, adapter->rx_process_limit, NULL);
1693 taskqueue_enqueue(rxr->tq, &rxr->rx_task);
1695 /* Reenable this interrupt */
1696 E1000_WRITE_REG(&adapter->hw, E1000_IMS, rxr->ims);
1701 /*********************************************************************
1703 * MSIX Link Fast Interrupt Service routine
1705 **********************************************************************/
1707 em_msix_link(void *arg)
1709 struct adapter *adapter = arg;
1712 ++adapter->link_irq;
1713 reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1715 if (reg_icr & E1000_ICR_RXO)
1716 adapter->rx_overruns++;
1718 if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
1719 adapter->hw.mac.get_link_status = 1;
1720 em_handle_link(adapter, 0);
1722 E1000_WRITE_REG(&adapter->hw, E1000_IMS,
1723 EM_MSIX_LINK | E1000_IMS_LSC);
1725 ** Because we must read the ICR for this interrupt
1726 ** it may clear other causes using autoclear, for
1727 ** this reason we simply create a soft interrupt
1728 ** for all these vectors.
1731 E1000_WRITE_REG(&adapter->hw,
1732 E1000_ICS, adapter->ims);
1738 em_handle_rx(void *context, int pending)
1740 struct rx_ring *rxr = context;
1741 struct adapter *adapter = rxr->adapter;
1744 more = em_rxeof(rxr, adapter->rx_process_limit, NULL);
1746 taskqueue_enqueue(rxr->tq, &rxr->rx_task);
1748 /* Reenable this interrupt */
1749 E1000_WRITE_REG(&adapter->hw, E1000_IMS, rxr->ims);
1754 em_handle_tx(void *context, int pending)
1756 struct tx_ring *txr = context;
1757 struct adapter *adapter = txr->adapter;
1758 struct ifnet *ifp = adapter->ifp;
1762 #ifdef EM_MULTIQUEUE
1763 if (!drbr_empty(ifp, txr->br))
1764 em_mq_start_locked(ifp, txr);
1766 if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1767 em_start_locked(ifp, txr);
1769 E1000_WRITE_REG(&adapter->hw, E1000_IMS, txr->ims);
1774 em_handle_link(void *context, int pending)
1776 struct adapter *adapter = context;
1777 struct e1000_hw *hw = &adapter->hw;
1778 struct tx_ring *txr = adapter->tx_rings;
1779 struct ifnet *ifp = adapter->ifp;
1781 if (!(ifp->if_drv_flags & IFF_DRV_RUNNING))
1784 EM_CORE_LOCK(adapter);
1785 callout_stop(&adapter->timer);
1786 em_update_link_status(adapter);
1787 callout_reset(&adapter->timer, hz, em_local_timer, adapter);
1788 if (hw->mac.type == e1000_82574 && adapter->msix_mem != NULL)
1789 E1000_WRITE_REG(hw, E1000_IMS, EM_MSIX_LINK | E1000_IMS_LSC);
1790 if (adapter->link_active) {
1791 for (int i = 0; i < adapter->num_queues; i++, txr++) {
1793 #ifdef EM_MULTIQUEUE
1794 if (!drbr_empty(ifp, txr->br))
1795 em_mq_start_locked(ifp, txr);
1797 if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1798 em_start_locked(ifp, txr);
1803 EM_CORE_UNLOCK(adapter);
1807 /*********************************************************************
1809 * Media Ioctl callback
1811 * This routine is called whenever the user queries the status of
1812 * the interface using ifconfig.
1814 **********************************************************************/
1816 em_media_status(struct ifnet *ifp, struct ifmediareq *ifmr)
1818 struct adapter *adapter = ifp->if_softc;
1819 u_char fiber_type = IFM_1000_SX;
1821 INIT_DEBUGOUT("em_media_status: begin");
1823 EM_CORE_LOCK(adapter);
1824 em_update_link_status(adapter);
1826 ifmr->ifm_status = IFM_AVALID;
1827 ifmr->ifm_active = IFM_ETHER;
1829 if (!adapter->link_active) {
1830 EM_CORE_UNLOCK(adapter);
1834 ifmr->ifm_status |= IFM_ACTIVE;
1836 if ((adapter->hw.phy.media_type == e1000_media_type_fiber) ||
1837 (adapter->hw.phy.media_type == e1000_media_type_internal_serdes)) {
1838 ifmr->ifm_active |= fiber_type | IFM_FDX;
1840 switch (adapter->link_speed) {
1842 ifmr->ifm_active |= IFM_10_T;
1845 ifmr->ifm_active |= IFM_100_TX;
1848 ifmr->ifm_active |= IFM_1000_T;
1851 if (adapter->link_duplex == FULL_DUPLEX)
1852 ifmr->ifm_active |= IFM_FDX;
1854 ifmr->ifm_active |= IFM_HDX;
1856 EM_CORE_UNLOCK(adapter);
1859 /*********************************************************************
1861 * Media Ioctl callback
1863 * This routine is called when the user changes speed/duplex using
1864 * media/mediopt option with ifconfig.
1866 **********************************************************************/
1868 em_media_change(struct ifnet *ifp)
1870 struct adapter *adapter = ifp->if_softc;
1871 struct ifmedia *ifm = &adapter->media;
1873 INIT_DEBUGOUT("em_media_change: begin");
1875 if (IFM_TYPE(ifm->ifm_media) != IFM_ETHER)
1878 EM_CORE_LOCK(adapter);
1879 switch (IFM_SUBTYPE(ifm->ifm_media)) {
1881 adapter->hw.mac.autoneg = DO_AUTO_NEG;
1882 adapter->hw.phy.autoneg_advertised = AUTONEG_ADV_DEFAULT;
1887 adapter->hw.mac.autoneg = DO_AUTO_NEG;
1888 adapter->hw.phy.autoneg_advertised = ADVERTISE_1000_FULL;
1891 adapter->hw.mac.autoneg = FALSE;
1892 adapter->hw.phy.autoneg_advertised = 0;
1893 if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX)
1894 adapter->hw.mac.forced_speed_duplex = ADVERTISE_100_FULL;
1896 adapter->hw.mac.forced_speed_duplex = ADVERTISE_100_HALF;
1899 adapter->hw.mac.autoneg = FALSE;
1900 adapter->hw.phy.autoneg_advertised = 0;
1901 if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX)
1902 adapter->hw.mac.forced_speed_duplex = ADVERTISE_10_FULL;
1904 adapter->hw.mac.forced_speed_duplex = ADVERTISE_10_HALF;
1907 device_printf(adapter->dev, "Unsupported media type\n");
1910 em_init_locked(adapter);
1911 EM_CORE_UNLOCK(adapter);
1916 /*********************************************************************
1918 * This routine maps the mbufs to tx descriptors.
1920 * return 0 on success, positive on failure
1921 **********************************************************************/
1924 em_xmit(struct tx_ring *txr, struct mbuf **m_headp)
1926 struct adapter *adapter = txr->adapter;
1927 bus_dma_segment_t segs[EM_MAX_SCATTER];
1929 struct em_txbuffer *tx_buffer, *tx_buffer_mapped;
1930 struct e1000_tx_desc *ctxd = NULL;
1931 struct mbuf *m_head;
1932 struct ether_header *eh;
1933 struct ip *ip = NULL;
1934 struct tcphdr *tp = NULL;
1935 u32 txd_upper = 0, txd_lower = 0;
1937 int nsegs, i, j, first, last = 0;
1939 bool do_tso, tso_desc, remap = TRUE;
1942 do_tso = m_head->m_pkthdr.csum_flags & CSUM_IP_TSO;
1947 * Intel recommends entire IP/TCP header length reside in a single
1948 * buffer. If multiple descriptors are used to describe the IP and
1949 * TCP header, each descriptor should describe one or more
1950 * complete headers; descriptors referencing only parts of headers
1951 * are not supported. If all layer headers are not coalesced into
1952 * a single buffer, each buffer should not cross a 4KB boundary,
1953 * or be larger than the maximum read request size.
1954 * Controller also requires modifing IP/TCP header to make TSO work
1955 * so we firstly get a writable mbuf chain then coalesce ethernet/
1956 * IP/TCP header into a single buffer to meet the requirement of
1957 * controller. This also simplifies IP/TCP/UDP checksum offloading
1958 * which also has similiar restrictions.
1960 if (do_tso || m_head->m_pkthdr.csum_flags & CSUM_OFFLOAD) {
1961 if (do_tso || (m_head->m_next != NULL &&
1962 m_head->m_pkthdr.csum_flags & CSUM_OFFLOAD)) {
1963 if (M_WRITABLE(*m_headp) == 0) {
1964 m_head = m_dup(*m_headp, M_NOWAIT);
1966 if (m_head == NULL) {
1975 * Assume IPv4, we don't have TSO/checksum offload support
1978 ip_off = sizeof(struct ether_header);
1979 if (m_head->m_len < ip_off) {
1980 m_head = m_pullup(m_head, ip_off);
1981 if (m_head == NULL) {
1986 eh = mtod(m_head, struct ether_header *);
1987 if (eh->ether_type == htons(ETHERTYPE_VLAN)) {
1988 ip_off = sizeof(struct ether_vlan_header);
1989 if (m_head->m_len < ip_off) {
1990 m_head = m_pullup(m_head, ip_off);
1991 if (m_head == NULL) {
1997 if (m_head->m_len < ip_off + sizeof(struct ip)) {
1998 m_head = m_pullup(m_head, ip_off + sizeof(struct ip));
1999 if (m_head == NULL) {
2004 ip = (struct ip *)(mtod(m_head, char *) + ip_off);
2005 poff = ip_off + (ip->ip_hl << 2);
2007 if (do_tso || (m_head->m_pkthdr.csum_flags & CSUM_TCP)) {
2008 if (m_head->m_len < poff + sizeof(struct tcphdr)) {
2009 m_head = m_pullup(m_head, poff +
2010 sizeof(struct tcphdr));
2011 if (m_head == NULL) {
2016 tp = (struct tcphdr *)(mtod(m_head, char *) + poff);
2019 * pull 4 more bytes of data into it.
2021 if (m_head->m_len < poff + (tp->th_off << 2)) {
2022 m_head = m_pullup(m_head, poff +
2025 if (m_head == NULL) {
2030 ip = (struct ip *)(mtod(m_head, char *) + ip_off);
2031 tp = (struct tcphdr *)(mtod(m_head, char *) + poff);
2033 ip->ip_len = htons(m_head->m_pkthdr.tso_segsz +
2038 * The pseudo TCP checksum does not include TCP
2039 * payload length so driver should recompute
2040 * the checksum here what hardware expect to
2041 * see. This is adherence of Microsoft's Large
2042 * Send specification.
2044 tp->th_sum = in_pseudo(ip->ip_src.s_addr,
2045 ip->ip_dst.s_addr, htons(IPPROTO_TCP));
2047 } else if (m_head->m_pkthdr.csum_flags & CSUM_UDP) {
2048 if (m_head->m_len < poff + sizeof(struct udphdr)) {
2049 m_head = m_pullup(m_head, poff +
2050 sizeof(struct udphdr));
2051 if (m_head == NULL) {
2056 ip = (struct ip *)(mtod(m_head, char *) + ip_off);
2062 * Map the packet for DMA
2064 * Capture the first descriptor index,
2065 * this descriptor will have the index
2066 * of the EOP which is the only one that
2067 * now gets a DONE bit writeback.
2069 first = txr->next_avail_desc;
2070 tx_buffer = &txr->tx_buffers[first];
2071 tx_buffer_mapped = tx_buffer;
2072 map = tx_buffer->map;
2075 error = bus_dmamap_load_mbuf_sg(txr->txtag, map,
2076 *m_headp, segs, &nsegs, BUS_DMA_NOWAIT);
2079 * There are two types of errors we can (try) to handle:
2080 * - EFBIG means the mbuf chain was too long and bus_dma ran
2081 * out of segments. Defragment the mbuf chain and try again.
2082 * - ENOMEM means bus_dma could not obtain enough bounce buffers
2083 * at this point in time. Defer sending and try again later.
2084 * All other errors, in particular EINVAL, are fatal and prevent the
2085 * mbuf chain from ever going through. Drop it and report error.
2087 if (error == EFBIG && remap) {
2090 m = m_collapse(*m_headp, M_NOWAIT, EM_MAX_SCATTER);
2092 adapter->mbuf_defrag_failed++;
2099 /* Try it again, but only once */
2102 } else if (error != 0) {
2103 adapter->no_tx_dma_setup++;
2110 * TSO Hardware workaround, if this packet is not
2111 * TSO, and is only a single descriptor long, and
2112 * it follows a TSO burst, then we need to add a
2113 * sentinel descriptor to prevent premature writeback.
2115 if ((!do_tso) && (txr->tx_tso == TRUE)) {
2118 txr->tx_tso = FALSE;
2121 if (txr->tx_avail < (nsegs + EM_MAX_SCATTER)) {
2122 txr->no_desc_avail++;
2123 bus_dmamap_unload(txr->txtag, map);
2128 /* Do hardware assists */
2129 if (m_head->m_pkthdr.csum_flags & CSUM_IP_TSO) {
2130 em_tso_setup(txr, m_head, ip_off, ip, tp,
2131 &txd_upper, &txd_lower);
2132 /* we need to make a final sentinel transmit desc */
2134 } else if (m_head->m_pkthdr.csum_flags & CSUM_OFFLOAD)
2135 em_transmit_checksum_setup(txr, m_head,
2136 ip_off, ip, &txd_upper, &txd_lower);
2138 if (m_head->m_flags & M_VLANTAG) {
2139 /* Set the vlan id. */
2141 (htole16(m_head->m_pkthdr.ether_vtag) << 16);
2142 /* Tell hardware to add tag */
2143 txd_lower |= htole32(E1000_TXD_CMD_VLE);
2146 i = txr->next_avail_desc;
2148 /* Set up our transmit descriptors */
2149 for (j = 0; j < nsegs; j++) {
2151 bus_addr_t seg_addr;
2153 tx_buffer = &txr->tx_buffers[i];
2154 ctxd = &txr->tx_base[i];
2155 seg_addr = segs[j].ds_addr;
2156 seg_len = segs[j].ds_len;
2159 ** If this is the last descriptor, we want to
2160 ** split it so we have a small final sentinel
2162 if (tso_desc && (j == (nsegs - 1)) && (seg_len > 8)) {
2163 seg_len -= TSO_WORKAROUND;
2164 ctxd->buffer_addr = htole64(seg_addr);
2165 ctxd->lower.data = htole32(
2166 adapter->txd_cmd | txd_lower | seg_len);
2167 ctxd->upper.data = htole32(txd_upper);
2168 if (++i == adapter->num_tx_desc)
2171 /* Now make the sentinel */
2173 ctxd = &txr->tx_base[i];
2174 tx_buffer = &txr->tx_buffers[i];
2176 htole64(seg_addr + seg_len);
2177 ctxd->lower.data = htole32(
2178 adapter->txd_cmd | txd_lower | TSO_WORKAROUND);
2182 if (++i == adapter->num_tx_desc)
2185 ctxd->buffer_addr = htole64(seg_addr);
2186 ctxd->lower.data = htole32(
2187 adapter->txd_cmd | txd_lower | seg_len);
2188 ctxd->upper.data = htole32(txd_upper);
2190 if (++i == adapter->num_tx_desc)
2193 tx_buffer->m_head = NULL;
2194 tx_buffer->next_eop = -1;
2197 txr->next_avail_desc = i;
2198 txr->tx_avail -= nsegs;
2200 tx_buffer->m_head = m_head;
2202 ** Here we swap the map so the last descriptor,
2203 ** which gets the completion interrupt has the
2204 ** real map, and the first descriptor gets the
2205 ** unused map from this descriptor.
2207 tx_buffer_mapped->map = tx_buffer->map;
2208 tx_buffer->map = map;
2209 bus_dmamap_sync(txr->txtag, map, BUS_DMASYNC_PREWRITE);
2212 * Last Descriptor of Packet
2213 * needs End Of Packet (EOP)
2214 * and Report Status (RS)
2217 htole32(E1000_TXD_CMD_EOP | E1000_TXD_CMD_RS);
2219 * Keep track in the first buffer which
2220 * descriptor will be written back
2222 tx_buffer = &txr->tx_buffers[first];
2223 tx_buffer->next_eop = last;
2226 * Advance the Transmit Descriptor Tail (TDT), this tells the E1000
2227 * that this frame is available to transmit.
2229 bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
2230 BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
2231 E1000_WRITE_REG(&adapter->hw, E1000_TDT(txr->me), i);
2237 em_set_promisc(struct adapter *adapter)
2239 struct ifnet *ifp = adapter->ifp;
2242 reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2244 if (ifp->if_flags & IFF_PROMISC) {
2245 reg_rctl |= (E1000_RCTL_UPE | E1000_RCTL_MPE);
2246 /* Turn this on if you want to see bad packets */
2248 reg_rctl |= E1000_RCTL_SBP;
2249 E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2250 } else if (ifp->if_flags & IFF_ALLMULTI) {
2251 reg_rctl |= E1000_RCTL_MPE;
2252 reg_rctl &= ~E1000_RCTL_UPE;
2253 E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2258 em_disable_promisc(struct adapter *adapter)
2260 struct ifnet *ifp = adapter->ifp;
2264 reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2265 reg_rctl &= (~E1000_RCTL_UPE);
2266 if (ifp->if_flags & IFF_ALLMULTI)
2267 mcnt = MAX_NUM_MULTICAST_ADDRESSES;
2269 struct ifmultiaddr *ifma;
2270 #if __FreeBSD_version < 800000
2273 if_maddr_rlock(ifp);
2275 TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
2276 if (ifma->ifma_addr->sa_family != AF_LINK)
2278 if (mcnt == MAX_NUM_MULTICAST_ADDRESSES)
2282 #if __FreeBSD_version < 800000
2283 IF_ADDR_UNLOCK(ifp);
2285 if_maddr_runlock(ifp);
2288 /* Don't disable if in MAX groups */
2289 if (mcnt < MAX_NUM_MULTICAST_ADDRESSES)
2290 reg_rctl &= (~E1000_RCTL_MPE);
2291 reg_rctl &= (~E1000_RCTL_SBP);
2292 E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2296 /*********************************************************************
2299 * This routine is called whenever multicast address list is updated.
2301 **********************************************************************/
2304 em_set_multi(struct adapter *adapter)
2306 struct ifnet *ifp = adapter->ifp;
2307 struct ifmultiaddr *ifma;
2309 u8 *mta; /* Multicast array memory */
2312 IOCTL_DEBUGOUT("em_set_multi: begin");
2315 bzero(mta, sizeof(u8) * ETH_ADDR_LEN * MAX_NUM_MULTICAST_ADDRESSES);
2317 if (adapter->hw.mac.type == e1000_82542 &&
2318 adapter->hw.revision_id == E1000_REVISION_2) {
2319 reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2320 if (adapter->hw.bus.pci_cmd_word & CMD_MEM_WRT_INVALIDATE)
2321 e1000_pci_clear_mwi(&adapter->hw);
2322 reg_rctl |= E1000_RCTL_RST;
2323 E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2327 #if __FreeBSD_version < 800000
2330 if_maddr_rlock(ifp);
2332 TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
2333 if (ifma->ifma_addr->sa_family != AF_LINK)
2336 if (mcnt == MAX_NUM_MULTICAST_ADDRESSES)
2339 bcopy(LLADDR((struct sockaddr_dl *)ifma->ifma_addr),
2340 &mta[mcnt * ETH_ADDR_LEN], ETH_ADDR_LEN);
2343 #if __FreeBSD_version < 800000
2344 IF_ADDR_UNLOCK(ifp);
2346 if_maddr_runlock(ifp);
2348 if (mcnt >= MAX_NUM_MULTICAST_ADDRESSES) {
2349 reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2350 reg_rctl |= E1000_RCTL_MPE;
2351 E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2353 e1000_update_mc_addr_list(&adapter->hw, mta, mcnt);
2355 if (adapter->hw.mac.type == e1000_82542 &&
2356 adapter->hw.revision_id == E1000_REVISION_2) {
2357 reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2358 reg_rctl &= ~E1000_RCTL_RST;
2359 E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2361 if (adapter->hw.bus.pci_cmd_word & CMD_MEM_WRT_INVALIDATE)
2362 e1000_pci_set_mwi(&adapter->hw);
2367 /*********************************************************************
2370 * This routine checks for link status and updates statistics.
2372 **********************************************************************/
2375 em_local_timer(void *arg)
2377 struct adapter *adapter = arg;
2378 struct ifnet *ifp = adapter->ifp;
2379 struct tx_ring *txr = adapter->tx_rings;
2380 struct rx_ring *rxr = adapter->rx_rings;
2383 EM_CORE_LOCK_ASSERT(adapter);
2385 em_update_link_status(adapter);
2386 em_update_stats_counters(adapter);
2388 /* Reset LAA into RAR[0] on 82571 */
2389 if ((adapter->hw.mac.type == e1000_82571) &&
2390 e1000_get_laa_state_82571(&adapter->hw))
2391 e1000_rar_set(&adapter->hw, adapter->hw.mac.addr, 0);
2393 /* Mask to use in the irq trigger */
2394 if (adapter->msix_mem) {
2395 for (int i = 0; i < adapter->num_queues; i++, rxr++)
2396 trigger |= rxr->ims;
2397 rxr = adapter->rx_rings;
2399 trigger = E1000_ICS_RXDMT0;
2402 ** Check on the state of the TX queue(s), this
2403 ** can be done without the lock because its RO
2404 ** and the HUNG state will be static if set.
2406 for (int i = 0; i < adapter->num_queues; i++, txr++) {
2407 if (txr->busy == EM_TX_HUNG)
2409 if (txr->busy >= EM_TX_MAXTRIES)
2410 txr->busy = EM_TX_HUNG;
2411 /* Schedule a TX tasklet if needed */
2412 if (txr->tx_avail <= EM_MAX_SCATTER)
2413 taskqueue_enqueue(txr->tq, &txr->tx_task);
2416 callout_reset(&adapter->timer, hz, em_local_timer, adapter);
2417 #ifndef DEVICE_POLLING
2418 /* Trigger an RX interrupt to guarantee mbuf refresh */
2419 E1000_WRITE_REG(&adapter->hw, E1000_ICS, trigger);
2423 /* Looks like we're hung */
2424 device_printf(adapter->dev, "Watchdog timeout Queue[%d]-- resetting\n",
2426 em_print_debug_info(adapter);
2427 ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
2428 adapter->watchdog_events++;
2429 em_init_locked(adapter);
2434 em_update_link_status(struct adapter *adapter)
2436 struct e1000_hw *hw = &adapter->hw;
2437 struct ifnet *ifp = adapter->ifp;
2438 device_t dev = adapter->dev;
2439 struct tx_ring *txr = adapter->tx_rings;
2442 /* Get the cached link value or read phy for real */
2443 switch (hw->phy.media_type) {
2444 case e1000_media_type_copper:
2445 if (hw->mac.get_link_status) {
2446 if (hw->mac.type == e1000_pch_spt)
2448 /* Do the work to read phy */
2449 e1000_check_for_link(hw);
2450 link_check = !hw->mac.get_link_status;
2451 if (link_check) /* ESB2 fix */
2452 e1000_cfg_on_link_up(hw);
2456 case e1000_media_type_fiber:
2457 e1000_check_for_link(hw);
2458 link_check = (E1000_READ_REG(hw, E1000_STATUS) &
2461 case e1000_media_type_internal_serdes:
2462 e1000_check_for_link(hw);
2463 link_check = adapter->hw.mac.serdes_has_link;
2466 case e1000_media_type_unknown:
2470 /* Now check for a transition */
2471 if (link_check && (adapter->link_active == 0)) {
2472 e1000_get_speed_and_duplex(hw, &adapter->link_speed,
2473 &adapter->link_duplex);
2476 ** There have proven to be problems with TSO when not at full
2477 ** gigabit speed, so disable the assist automatically when at
2478 ** lower speeds. -jfv
2480 if (ifp->if_capenable & IFCAP_TSO4) {
2481 if (adapter->link_speed == SPEED_1000)
2482 ifp->if_hwassist |= CSUM_IP_TSO;
2484 ifp->if_hwassist &= ~CSUM_IP_TSO;
2487 /* Check if we must disable SPEED_MODE bit on PCI-E */
2488 if ((adapter->link_speed != SPEED_1000) &&
2489 ((hw->mac.type == e1000_82571) ||
2490 (hw->mac.type == e1000_82572))) {
2492 tarc0 = E1000_READ_REG(hw, E1000_TARC(0));
2493 tarc0 &= ~TARC_SPEED_MODE_BIT;
2494 E1000_WRITE_REG(hw, E1000_TARC(0), tarc0);
2497 device_printf(dev, "Link is up %d Mbps %s\n",
2498 adapter->link_speed,
2499 ((adapter->link_duplex == FULL_DUPLEX) ?
2500 "Full Duplex" : "Half Duplex"));
2501 adapter->link_active = 1;
2502 adapter->smartspeed = 0;
2503 ifp->if_baudrate = adapter->link_speed * 1000000;
2504 if_link_state_change(ifp, LINK_STATE_UP);
2505 } else if (!link_check && (adapter->link_active == 1)) {
2506 ifp->if_baudrate = adapter->link_speed = 0;
2507 adapter->link_duplex = 0;
2509 device_printf(dev, "Link is Down\n");
2510 adapter->link_active = 0;
2511 /* Link down, disable hang detection */
2512 for (int i = 0; i < adapter->num_queues; i++, txr++)
2513 txr->busy = EM_TX_IDLE;
2514 if_link_state_change(ifp, LINK_STATE_DOWN);
2518 /*********************************************************************
2520 * This routine disables all traffic on the adapter by issuing a
2521 * global reset on the MAC and deallocates TX/RX buffers.
2523 * This routine should always be called with BOTH the CORE
2525 **********************************************************************/
2530 struct adapter *adapter = arg;
2531 struct ifnet *ifp = adapter->ifp;
2532 struct tx_ring *txr = adapter->tx_rings;
2534 EM_CORE_LOCK_ASSERT(adapter);
2536 INIT_DEBUGOUT("em_stop: begin");
2538 em_disable_intr(adapter);
2539 callout_stop(&adapter->timer);
2541 /* Tell the stack that the interface is no longer active */
2542 ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
2543 ifp->if_drv_flags |= IFF_DRV_OACTIVE;
2545 /* Disarm Hang Detection. */
2546 for (int i = 0; i < adapter->num_queues; i++, txr++) {
2548 txr->busy = EM_TX_IDLE;
2552 /* I219 needs some special flushing to avoid hangs */
2553 if (adapter->hw.mac.type == e1000_pch_spt)
2554 em_flush_desc_rings(adapter);
2556 e1000_reset_hw(&adapter->hw);
2557 E1000_WRITE_REG(&adapter->hw, E1000_WUC, 0);
2559 e1000_led_off(&adapter->hw);
2560 e1000_cleanup_led(&adapter->hw);
2564 /*********************************************************************
2566 * Determine hardware revision.
2568 **********************************************************************/
2570 em_identify_hardware(struct adapter *adapter)
2572 device_t dev = adapter->dev;
2574 /* Make sure our PCI config space has the necessary stuff set */
2575 pci_enable_busmaster(dev);
2576 adapter->hw.bus.pci_cmd_word = pci_read_config(dev, PCIR_COMMAND, 2);
2578 /* Save off the information about this board */
2579 adapter->hw.vendor_id = pci_get_vendor(dev);
2580 adapter->hw.device_id = pci_get_device(dev);
2581 adapter->hw.revision_id = pci_read_config(dev, PCIR_REVID, 1);
2582 adapter->hw.subsystem_vendor_id =
2583 pci_read_config(dev, PCIR_SUBVEND_0, 2);
2584 adapter->hw.subsystem_device_id =
2585 pci_read_config(dev, PCIR_SUBDEV_0, 2);
2587 /* Do Shared Code Init and Setup */
2588 if (e1000_set_mac_type(&adapter->hw)) {
2589 device_printf(dev, "Setup init failure\n");
2595 em_allocate_pci_resources(struct adapter *adapter)
2597 device_t dev = adapter->dev;
2601 adapter->memory = bus_alloc_resource_any(dev, SYS_RES_MEMORY,
2603 if (adapter->memory == NULL) {
2604 device_printf(dev, "Unable to allocate bus resource: memory\n");
2607 adapter->osdep.mem_bus_space_tag =
2608 rman_get_bustag(adapter->memory);
2609 adapter->osdep.mem_bus_space_handle =
2610 rman_get_bushandle(adapter->memory);
2611 adapter->hw.hw_addr = (u8 *)&adapter->osdep.mem_bus_space_handle;
2613 adapter->hw.back = &adapter->osdep;
2618 /*********************************************************************
2620 * Setup the Legacy or MSI Interrupt handler
2622 **********************************************************************/
2624 em_allocate_legacy(struct adapter *adapter)
2626 device_t dev = adapter->dev;
2627 struct tx_ring *txr = adapter->tx_rings;
2630 /* Manually turn off all interrupts */
2631 E1000_WRITE_REG(&adapter->hw, E1000_IMC, 0xffffffff);
2633 if (adapter->msix == 1) /* using MSI */
2635 /* We allocate a single interrupt resource */
2636 adapter->res = bus_alloc_resource_any(dev,
2637 SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE);
2638 if (adapter->res == NULL) {
2639 device_printf(dev, "Unable to allocate bus resource: "
2645 * Allocate a fast interrupt and the associated
2646 * deferred processing contexts.
2648 TASK_INIT(&adapter->que_task, 0, em_handle_que, adapter);
2649 adapter->tq = taskqueue_create_fast("em_taskq", M_NOWAIT,
2650 taskqueue_thread_enqueue, &adapter->tq);
2651 taskqueue_start_threads(&adapter->tq, 1, PI_NET, "%s que",
2652 device_get_nameunit(adapter->dev));
2653 /* Use a TX only tasklet for local timer */
2654 TASK_INIT(&txr->tx_task, 0, em_handle_tx, txr);
2655 txr->tq = taskqueue_create_fast("em_txq", M_NOWAIT,
2656 taskqueue_thread_enqueue, &txr->tq);
2657 taskqueue_start_threads(&txr->tq, 1, PI_NET, "%s txq",
2658 device_get_nameunit(adapter->dev));
2659 TASK_INIT(&adapter->link_task, 0, em_handle_link, adapter);
2660 if ((error = bus_setup_intr(dev, adapter->res, INTR_TYPE_NET,
2661 em_irq_fast, NULL, adapter, &adapter->tag)) != 0) {
2662 device_printf(dev, "Failed to register fast interrupt "
2663 "handler: %d\n", error);
2664 taskqueue_free(adapter->tq);
2672 /*********************************************************************
2674 * Setup the MSIX Interrupt handlers
2675 * This is not really Multiqueue, rather
2676 * its just seperate interrupt vectors
2677 * for TX, RX, and Link.
2679 **********************************************************************/
2681 em_allocate_msix(struct adapter *adapter)
2683 device_t dev = adapter->dev;
2684 struct tx_ring *txr = adapter->tx_rings;
2685 struct rx_ring *rxr = adapter->rx_rings;
2686 int error, rid, vector = 0;
2690 /* Make sure all interrupts are disabled */
2691 E1000_WRITE_REG(&adapter->hw, E1000_IMC, 0xffffffff);
2693 /* First set up ring resources */
2694 for (int i = 0; i < adapter->num_queues; i++, rxr++, vector++) {
2699 rxr->res = bus_alloc_resource_any(dev,
2700 SYS_RES_IRQ, &rid, RF_ACTIVE);
2701 if (rxr->res == NULL) {
2703 "Unable to allocate bus resource: "
2704 "RX MSIX Interrupt %d\n", i);
2707 if ((error = bus_setup_intr(dev, rxr->res,
2708 INTR_TYPE_NET | INTR_MPSAFE, NULL, em_msix_rx,
2709 rxr, &rxr->tag)) != 0) {
2710 device_printf(dev, "Failed to register RX handler");
2713 #if __FreeBSD_version >= 800504
2714 bus_describe_intr(dev, rxr->res, rxr->tag, "rx%d", i);
2718 if (em_last_bind_cpu < 0)
2719 em_last_bind_cpu = CPU_FIRST();
2720 cpu_id = em_last_bind_cpu;
2721 bus_bind_intr(dev, rxr->res, cpu_id);
2723 TASK_INIT(&rxr->rx_task, 0, em_handle_rx, rxr);
2724 rxr->tq = taskqueue_create_fast("em_rxq", M_NOWAIT,
2725 taskqueue_thread_enqueue, &rxr->tq);
2726 taskqueue_start_threads(&rxr->tq, 1, PI_NET, "%s rxq (cpuid %d)",
2727 device_get_nameunit(adapter->dev), cpu_id);
2729 ** Set the bit to enable interrupt
2730 ** in E1000_IMS -- bits 20 and 21
2731 ** are for RX0 and RX1, note this has
2732 ** NOTHING to do with the MSIX vector
2734 rxr->ims = 1 << (20 + i);
2735 adapter->ims |= rxr->ims;
2736 adapter->ivars |= (8 | rxr->msix) << (i * 4);
2738 em_last_bind_cpu = CPU_NEXT(em_last_bind_cpu);
2741 for (int i = 0; i < adapter->num_queues; i++, txr++, vector++) {
2744 txr->res = bus_alloc_resource_any(dev,
2745 SYS_RES_IRQ, &rid, RF_ACTIVE);
2746 if (txr->res == NULL) {
2748 "Unable to allocate bus resource: "
2749 "TX MSIX Interrupt %d\n", i);
2752 if ((error = bus_setup_intr(dev, txr->res,
2753 INTR_TYPE_NET | INTR_MPSAFE, NULL, em_msix_tx,
2754 txr, &txr->tag)) != 0) {
2755 device_printf(dev, "Failed to register TX handler");
2758 #if __FreeBSD_version >= 800504
2759 bus_describe_intr(dev, txr->res, txr->tag, "tx%d", i);
2763 if (em_last_bind_cpu < 0)
2764 em_last_bind_cpu = CPU_FIRST();
2765 cpu_id = em_last_bind_cpu;
2766 bus_bind_intr(dev, txr->res, cpu_id);
2768 TASK_INIT(&txr->tx_task, 0, em_handle_tx, txr);
2769 txr->tq = taskqueue_create_fast("em_txq", M_NOWAIT,
2770 taskqueue_thread_enqueue, &txr->tq);
2771 taskqueue_start_threads(&txr->tq, 1, PI_NET, "%s txq (cpuid %d)",
2772 device_get_nameunit(adapter->dev), cpu_id);
2774 ** Set the bit to enable interrupt
2775 ** in E1000_IMS -- bits 22 and 23
2776 ** are for TX0 and TX1, note this has
2777 ** NOTHING to do with the MSIX vector
2779 txr->ims = 1 << (22 + i);
2780 adapter->ims |= txr->ims;
2781 adapter->ivars |= (8 | txr->msix) << (8 + (i * 4));
2783 em_last_bind_cpu = CPU_NEXT(em_last_bind_cpu);
2786 /* Link interrupt */
2788 adapter->res = bus_alloc_resource_any(dev,
2789 SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE);
2790 if (!adapter->res) {
2791 device_printf(dev,"Unable to allocate "
2792 "bus resource: Link interrupt [%d]\n", rid);
2795 /* Set the link handler function */
2796 error = bus_setup_intr(dev, adapter->res,
2797 INTR_TYPE_NET | INTR_MPSAFE, NULL,
2798 em_msix_link, adapter, &adapter->tag);
2800 adapter->res = NULL;
2801 device_printf(dev, "Failed to register LINK handler");
2804 #if __FreeBSD_version >= 800504
2805 bus_describe_intr(dev, adapter->res, adapter->tag, "link");
2807 adapter->linkvec = vector;
2808 adapter->ivars |= (8 | vector) << 16;
2809 adapter->ivars |= 0x80000000;
2816 em_free_pci_resources(struct adapter *adapter)
2818 device_t dev = adapter->dev;
2819 struct tx_ring *txr;
2820 struct rx_ring *rxr;
2825 ** Release all the queue interrupt resources:
2827 for (int i = 0; i < adapter->num_queues; i++) {
2828 txr = &adapter->tx_rings[i];
2829 /* an early abort? */
2833 if (txr->tag != NULL) {
2834 bus_teardown_intr(dev, txr->res, txr->tag);
2837 if (txr->res != NULL)
2838 bus_release_resource(dev, SYS_RES_IRQ,
2841 rxr = &adapter->rx_rings[i];
2842 /* an early abort? */
2846 if (rxr->tag != NULL) {
2847 bus_teardown_intr(dev, rxr->res, rxr->tag);
2850 if (rxr->res != NULL)
2851 bus_release_resource(dev, SYS_RES_IRQ,
2855 if (adapter->linkvec) /* we are doing MSIX */
2856 rid = adapter->linkvec + 1;
2858 (adapter->msix != 0) ? (rid = 1):(rid = 0);
2860 if (adapter->tag != NULL) {
2861 bus_teardown_intr(dev, adapter->res, adapter->tag);
2862 adapter->tag = NULL;
2865 if (adapter->res != NULL)
2866 bus_release_resource(dev, SYS_RES_IRQ, rid, adapter->res);
2870 pci_release_msi(dev);
2872 if (adapter->msix_mem != NULL)
2873 bus_release_resource(dev, SYS_RES_MEMORY,
2874 adapter->memrid, adapter->msix_mem);
2876 if (adapter->memory != NULL)
2877 bus_release_resource(dev, SYS_RES_MEMORY,
2878 PCIR_BAR(0), adapter->memory);
2880 if (adapter->flash != NULL)
2881 bus_release_resource(dev, SYS_RES_MEMORY,
2882 EM_FLASH, adapter->flash);
2886 * Setup MSI or MSI/X
2889 em_setup_msix(struct adapter *adapter)
2891 device_t dev = adapter->dev;
2894 /* Nearly always going to use one queue */
2895 adapter->num_queues = 1;
2898 ** Try using MSI-X for Hartwell adapters
2900 if ((adapter->hw.mac.type == e1000_82574) &&
2901 (em_enable_msix == TRUE)) {
2902 #ifdef EM_MULTIQUEUE
2903 adapter->num_queues = (em_num_queues == 1) ? 1 : 2;
2904 if (adapter->num_queues > 1)
2905 em_enable_vectors_82574(adapter);
2907 /* Map the MSIX BAR */
2908 adapter->memrid = PCIR_BAR(EM_MSIX_BAR);
2909 adapter->msix_mem = bus_alloc_resource_any(dev,
2910 SYS_RES_MEMORY, &adapter->memrid, RF_ACTIVE);
2911 if (adapter->msix_mem == NULL) {
2912 /* May not be enabled */
2913 device_printf(adapter->dev,
2914 "Unable to map MSIX table \n");
2917 val = pci_msix_count(dev);
2919 #ifdef EM_MULTIQUEUE
2920 /* We need 5 vectors in the multiqueue case */
2921 if (adapter->num_queues > 1 ) {
2925 adapter->num_queues = 1;
2926 device_printf(adapter->dev,
2927 "Insufficient MSIX vectors for >1 queue, "
2928 "using single queue...\n");
2937 device_printf(adapter->dev,
2938 "Insufficient MSIX vectors, using MSI\n");
2941 #ifdef EM_MULTIQUEUE
2945 if ((pci_alloc_msix(dev, &val) == 0)) {
2946 device_printf(adapter->dev,
2947 "Using MSIX interrupts "
2948 "with %d vectors\n", val);
2953 ** If MSIX alloc failed or provided us with
2954 ** less than needed, free and fall through to MSI
2956 pci_release_msi(dev);
2959 if (adapter->msix_mem != NULL) {
2960 bus_release_resource(dev, SYS_RES_MEMORY,
2961 adapter->memrid, adapter->msix_mem);
2962 adapter->msix_mem = NULL;
2965 if (pci_alloc_msi(dev, &val) == 0) {
2966 device_printf(adapter->dev, "Using an MSI interrupt\n");
2969 /* Should only happen due to manual configuration */
2970 device_printf(adapter->dev,"No MSI/MSIX using a Legacy IRQ\n");
2976 ** The 3 following flush routines are used as a workaround in the
2977 ** I219 client parts and only for them.
2979 ** em_flush_tx_ring - remove all descriptors from the tx_ring
2981 ** We want to clear all pending descriptors from the TX ring.
2982 ** zeroing happens when the HW reads the regs. We assign the ring itself as
2983 ** the data of the next descriptor. We don't care about the data we are about
2987 em_flush_tx_ring(struct adapter *adapter)
2989 struct e1000_hw *hw = &adapter->hw;
2990 struct tx_ring *txr = adapter->tx_rings;
2991 struct e1000_tx_desc *txd;
2992 u32 tctl, txd_lower = E1000_TXD_CMD_IFCS;
2995 tctl = E1000_READ_REG(hw, E1000_TCTL);
2996 E1000_WRITE_REG(hw, E1000_TCTL, tctl | E1000_TCTL_EN);
2998 txd = &txr->tx_base[txr->next_avail_desc++];
2999 if (txr->next_avail_desc == adapter->num_tx_desc)
3000 txr->next_avail_desc = 0;
3002 /* Just use the ring as a dummy buffer addr */
3003 txd->buffer_addr = txr->txdma.dma_paddr;
3004 txd->lower.data = htole32(txd_lower | size);
3005 txd->upper.data = 0;
3007 /* flush descriptors to memory before notifying the HW */
3010 E1000_WRITE_REG(hw, E1000_TDT(0), txr->next_avail_desc);
3016 ** em_flush_rx_ring - remove all descriptors from the rx_ring
3018 ** Mark all descriptors in the RX ring as consumed and disable the rx ring
3021 em_flush_rx_ring(struct adapter *adapter)
3023 struct e1000_hw *hw = &adapter->hw;
3026 rctl = E1000_READ_REG(hw, E1000_RCTL);
3027 E1000_WRITE_REG(hw, E1000_RCTL, rctl & ~E1000_RCTL_EN);
3028 E1000_WRITE_FLUSH(hw);
3031 rxdctl = E1000_READ_REG(hw, E1000_RXDCTL(0));
3032 /* zero the lower 14 bits (prefetch and host thresholds) */
3033 rxdctl &= 0xffffc000;
3035 * update thresholds: prefetch threshold to 31, host threshold to 1
3036 * and make sure the granularity is "descriptors" and not "cache lines"
3038 rxdctl |= (0x1F | (1 << 8) | E1000_RXDCTL_THRESH_UNIT_DESC);
3039 E1000_WRITE_REG(hw, E1000_RXDCTL(0), rxdctl);
3041 /* momentarily enable the RX ring for the changes to take effect */
3042 E1000_WRITE_REG(hw, E1000_RCTL, rctl | E1000_RCTL_EN);
3043 E1000_WRITE_FLUSH(hw);
3045 E1000_WRITE_REG(hw, E1000_RCTL, rctl & ~E1000_RCTL_EN);
3049 ** em_flush_desc_rings - remove all descriptors from the descriptor rings
3051 ** In i219, the descriptor rings must be emptied before resetting the HW
3052 ** or before changing the device state to D3 during runtime (runtime PM).
3054 ** Failure to do this will cause the HW to enter a unit hang state which can
3055 ** only be released by PCI reset on the device
3059 em_flush_desc_rings(struct adapter *adapter)
3061 struct e1000_hw *hw = &adapter->hw;
3062 device_t dev = adapter->dev;
3064 u32 fext_nvm11, tdlen;
3066 /* First, disable MULR fix in FEXTNVM11 */
3067 fext_nvm11 = E1000_READ_REG(hw, E1000_FEXTNVM11);
3068 fext_nvm11 |= E1000_FEXTNVM11_DISABLE_MULR_FIX;
3069 E1000_WRITE_REG(hw, E1000_FEXTNVM11, fext_nvm11);
3071 /* do nothing if we're not in faulty state, or if the queue is empty */
3072 tdlen = E1000_READ_REG(hw, E1000_TDLEN(0));
3073 hang_state = pci_read_config(dev, PCICFG_DESC_RING_STATUS, 2);
3074 if (!(hang_state & FLUSH_DESC_REQUIRED) || !tdlen)
3076 em_flush_tx_ring(adapter);
3078 /* recheck, maybe the fault is caused by the rx ring */
3079 hang_state = pci_read_config(dev, PCICFG_DESC_RING_STATUS, 2);
3080 if (hang_state & FLUSH_DESC_REQUIRED)
3081 em_flush_rx_ring(adapter);
3085 /*********************************************************************
3087 * Initialize the hardware to a configuration
3088 * as specified by the adapter structure.
3090 **********************************************************************/
3092 em_reset(struct adapter *adapter)
3094 device_t dev = adapter->dev;
3095 struct ifnet *ifp = adapter->ifp;
3096 struct e1000_hw *hw = &adapter->hw;
3100 INIT_DEBUGOUT("em_reset: begin");
3102 /* Set up smart power down as default off on newer adapters. */
3103 if (!em_smart_pwr_down && (hw->mac.type == e1000_82571 ||
3104 hw->mac.type == e1000_82572)) {
3107 /* Speed up time to link by disabling smart power down. */
3108 e1000_read_phy_reg(hw, IGP02E1000_PHY_POWER_MGMT, &phy_tmp);
3109 phy_tmp &= ~IGP02E1000_PM_SPD;
3110 e1000_write_phy_reg(hw, IGP02E1000_PHY_POWER_MGMT, phy_tmp);
3114 * Packet Buffer Allocation (PBA)
3115 * Writing PBA sets the receive portion of the buffer
3116 * the remainder is used for the transmit buffer.
3118 switch (hw->mac.type) {
3119 /* Total Packet Buffer on these is 48K */
3122 case e1000_80003es2lan:
3123 pba = E1000_PBA_32K; /* 32K for Rx, 16K for Tx */
3125 case e1000_82573: /* 82573: Total Packet Buffer is 32K */
3126 pba = E1000_PBA_12K; /* 12K for Rx, 20K for Tx */
3130 pba = E1000_PBA_20K; /* 20K for Rx, 20K for Tx */
3136 case e1000_ich10lan:
3137 /* Boost Receive side for jumbo frames */
3138 if (adapter->hw.mac.max_frame_size > 4096)
3139 pba = E1000_PBA_14K;
3141 pba = E1000_PBA_10K;
3148 pba = E1000_PBA_26K;
3151 if (adapter->hw.mac.max_frame_size > 8192)
3152 pba = E1000_PBA_40K; /* 40K for Rx, 24K for Tx */
3154 pba = E1000_PBA_48K; /* 48K for Rx, 16K for Tx */
3156 E1000_WRITE_REG(&adapter->hw, E1000_PBA, pba);
3159 * These parameters control the automatic generation (Tx) and
3160 * response (Rx) to Ethernet PAUSE frames.
3161 * - High water mark should allow for at least two frames to be
3162 * received after sending an XOFF.
3163 * - Low water mark works best when it is very near the high water mark.
3164 * This allows the receiver to restart by sending XON when it has
3165 * drained a bit. Here we use an arbitary value of 1500 which will
3166 * restart after one full frame is pulled from the buffer. There
3167 * could be several smaller frames in the buffer and if so they will
3168 * not trigger the XON until their total number reduces the buffer
3170 * - The pause time is fairly large at 1000 x 512ns = 512 usec.
3172 rx_buffer_size = ((E1000_READ_REG(hw, E1000_PBA) & 0xffff) << 10 );
3173 hw->fc.high_water = rx_buffer_size -
3174 roundup2(adapter->hw.mac.max_frame_size, 1024);
3175 hw->fc.low_water = hw->fc.high_water - 1500;
3177 if (adapter->fc) /* locally set flow control value? */
3178 hw->fc.requested_mode = adapter->fc;
3180 hw->fc.requested_mode = e1000_fc_full;
3182 if (hw->mac.type == e1000_80003es2lan)
3183 hw->fc.pause_time = 0xFFFF;
3185 hw->fc.pause_time = EM_FC_PAUSE_TIME;
3187 hw->fc.send_xon = TRUE;
3189 /* Device specific overrides/settings */
3190 switch (hw->mac.type) {
3192 /* Workaround: no TX flow ctrl for PCH */
3193 hw->fc.requested_mode = e1000_fc_rx_pause;
3194 hw->fc.pause_time = 0xFFFF; /* override */
3195 if (ifp->if_mtu > ETHERMTU) {
3196 hw->fc.high_water = 0x3500;
3197 hw->fc.low_water = 0x1500;
3199 hw->fc.high_water = 0x5000;
3200 hw->fc.low_water = 0x3000;
3202 hw->fc.refresh_time = 0x1000;
3208 hw->fc.high_water = 0x5C20;
3209 hw->fc.low_water = 0x5048;
3210 hw->fc.pause_time = 0x0650;
3211 hw->fc.refresh_time = 0x0400;
3212 /* Jumbos need adjusted PBA */
3213 if (ifp->if_mtu > ETHERMTU)
3214 E1000_WRITE_REG(hw, E1000_PBA, 12);
3216 E1000_WRITE_REG(hw, E1000_PBA, 26);
3219 case e1000_ich10lan:
3220 if (ifp->if_mtu > ETHERMTU) {
3221 hw->fc.high_water = 0x2800;
3222 hw->fc.low_water = hw->fc.high_water - 8;
3225 /* else fall thru */
3227 if (hw->mac.type == e1000_80003es2lan)
3228 hw->fc.pause_time = 0xFFFF;
3232 /* I219 needs some special flushing to avoid hangs */
3233 if (hw->mac.type == e1000_pch_spt)
3234 em_flush_desc_rings(adapter);
3236 /* Issue a global reset */
3238 E1000_WRITE_REG(hw, E1000_WUC, 0);
3239 em_disable_aspm(adapter);
3241 if (e1000_init_hw(hw) < 0) {
3242 device_printf(dev, "Hardware Initialization Failed\n");
3246 E1000_WRITE_REG(hw, E1000_VET, ETHERTYPE_VLAN);
3247 e1000_get_phy_info(hw);
3248 e1000_check_for_link(hw);
3252 /*********************************************************************
3254 * Setup networking device structure and register an interface.
3256 **********************************************************************/
3258 em_setup_interface(device_t dev, struct adapter *adapter)
3262 INIT_DEBUGOUT("em_setup_interface: begin");
3264 ifp = adapter->ifp = if_alloc(IFT_ETHER);
3266 device_printf(dev, "can not allocate ifnet structure\n");
3269 if_initname(ifp, device_get_name(dev), device_get_unit(dev));
3270 ifp->if_init = em_init;
3271 ifp->if_softc = adapter;
3272 ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
3273 ifp->if_ioctl = em_ioctl;
3275 /* TSO parameters */
3276 ifp->if_hw_tsomax = IP_MAXPACKET;
3277 /* Take m_pullup(9)'s in em_xmit() w/ TSO into acount. */
3278 ifp->if_hw_tsomaxsegcount = EM_MAX_SCATTER - 5;
3279 ifp->if_hw_tsomaxsegsize = EM_TSO_SEG_SIZE;
3281 #ifdef EM_MULTIQUEUE
3282 /* Multiqueue stack interface */
3283 ifp->if_transmit = em_mq_start;
3284 ifp->if_qflush = em_qflush;
3286 ifp->if_start = em_start;
3287 IFQ_SET_MAXLEN(&ifp->if_snd, adapter->num_tx_desc - 1);
3288 ifp->if_snd.ifq_drv_maxlen = adapter->num_tx_desc - 1;
3289 IFQ_SET_READY(&ifp->if_snd);
3292 ether_ifattach(ifp, adapter->hw.mac.addr);
3294 ifp->if_capabilities = IFCAP_HWCSUM | IFCAP_VLAN_HWCSUM;
3295 ifp->if_capenable = ifp->if_capabilities;
3298 * Tell the upper layer(s) we
3299 * support full VLAN capability
3301 ifp->if_data.ifi_hdrlen = sizeof(struct ether_vlan_header);
3302 ifp->if_capabilities |= IFCAP_VLAN_HWTAGGING
3305 ifp->if_capenable |= IFCAP_VLAN_HWTAGGING
3309 * We don't enable IFCAP_{TSO4,VLAN_HWTSO} by default because:
3310 * - Although the silicon bug of TSO only working at gigabit speed is
3311 * worked around in em_update_link_status() by selectively setting
3312 * CSUM_IP_TSO, we cannot atomically flush already queued TSO-using
3313 * descriptors. Thus, such descriptors may still cause the MAC to
3314 * hang and, consequently, TSO is only safe to be used in setups
3315 * where the link isn't expected to switch from gigabit to lower
3317 * - Similarly, there's currently no way to trigger a reconfiguration
3318 * of vlan(4) when the state of IFCAP_VLAN_HWTSO support changes at
3319 * runtime. Therefore, IFCAP_VLAN_HWTSO also only is safe to use
3320 * when link speed changes are not to be expected.
3321 * - Despite all the workarounds for TSO-related silicon bugs, at
3322 * least 82579 still may hang at gigabit speed with IFCAP_TSO4.
3324 ifp->if_capabilities |= IFCAP_TSO4 | IFCAP_VLAN_HWTSO;
3327 ** Don't turn this on by default, if vlans are
3328 ** created on another pseudo device (eg. lagg)
3329 ** then vlan events are not passed thru, breaking
3330 ** operation, but with HW FILTER off it works. If
3331 ** using vlans directly on the em driver you can
3332 ** enable this and get full hardware tag filtering.
3334 ifp->if_capabilities |= IFCAP_VLAN_HWFILTER;
3336 #ifdef DEVICE_POLLING
3337 ifp->if_capabilities |= IFCAP_POLLING;
3340 /* Enable only WOL MAGIC by default */
3342 ifp->if_capabilities |= IFCAP_WOL;
3343 ifp->if_capenable |= IFCAP_WOL_MAGIC;
3347 * Specify the media types supported by this adapter and register
3348 * callbacks to update media and link information
3350 ifmedia_init(&adapter->media, IFM_IMASK,
3351 em_media_change, em_media_status);
3352 if ((adapter->hw.phy.media_type == e1000_media_type_fiber) ||
3353 (adapter->hw.phy.media_type == e1000_media_type_internal_serdes)) {
3354 u_char fiber_type = IFM_1000_SX; /* default type */
3356 ifmedia_add(&adapter->media, IFM_ETHER | fiber_type | IFM_FDX,
3358 ifmedia_add(&adapter->media, IFM_ETHER | fiber_type, 0, NULL);
3360 ifmedia_add(&adapter->media, IFM_ETHER | IFM_10_T, 0, NULL);
3361 ifmedia_add(&adapter->media, IFM_ETHER | IFM_10_T | IFM_FDX,
3363 ifmedia_add(&adapter->media, IFM_ETHER | IFM_100_TX,
3365 ifmedia_add(&adapter->media, IFM_ETHER | IFM_100_TX | IFM_FDX,
3367 if (adapter->hw.phy.type != e1000_phy_ife) {
3368 ifmedia_add(&adapter->media,
3369 IFM_ETHER | IFM_1000_T | IFM_FDX, 0, NULL);
3370 ifmedia_add(&adapter->media,
3371 IFM_ETHER | IFM_1000_T, 0, NULL);
3374 ifmedia_add(&adapter->media, IFM_ETHER | IFM_AUTO, 0, NULL);
3375 ifmedia_set(&adapter->media, IFM_ETHER | IFM_AUTO);
3381 * Manage DMA'able memory.
3384 em_dmamap_cb(void *arg, bus_dma_segment_t *segs, int nseg, int error)
3388 *(bus_addr_t *) arg = segs[0].ds_addr;
3392 em_dma_malloc(struct adapter *adapter, bus_size_t size,
3393 struct em_dma_alloc *dma, int mapflags)
3397 error = bus_dma_tag_create(bus_get_dma_tag(adapter->dev), /* parent */
3398 EM_DBA_ALIGN, 0, /* alignment, bounds */
3399 BUS_SPACE_MAXADDR, /* lowaddr */
3400 BUS_SPACE_MAXADDR, /* highaddr */
3401 NULL, NULL, /* filter, filterarg */
3404 size, /* maxsegsize */
3406 NULL, /* lockfunc */
3410 device_printf(adapter->dev,
3411 "%s: bus_dma_tag_create failed: %d\n",
3416 error = bus_dmamem_alloc(dma->dma_tag, (void**) &dma->dma_vaddr,
3417 BUS_DMA_NOWAIT | BUS_DMA_COHERENT, &dma->dma_map);
3419 device_printf(adapter->dev,
3420 "%s: bus_dmamem_alloc(%ju) failed: %d\n",
3421 __func__, (uintmax_t)size, error);
3426 error = bus_dmamap_load(dma->dma_tag, dma->dma_map, dma->dma_vaddr,
3427 size, em_dmamap_cb, &dma->dma_paddr, mapflags | BUS_DMA_NOWAIT);
3428 if (error || dma->dma_paddr == 0) {
3429 device_printf(adapter->dev,
3430 "%s: bus_dmamap_load failed: %d\n",
3438 bus_dmamap_unload(dma->dma_tag, dma->dma_map);
3440 bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
3441 bus_dma_tag_destroy(dma->dma_tag);
3443 dma->dma_tag = NULL;
3449 em_dma_free(struct adapter *adapter, struct em_dma_alloc *dma)
3451 if (dma->dma_tag == NULL)
3453 if (dma->dma_paddr != 0) {
3454 bus_dmamap_sync(dma->dma_tag, dma->dma_map,
3455 BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
3456 bus_dmamap_unload(dma->dma_tag, dma->dma_map);
3459 if (dma->dma_vaddr != NULL) {
3460 bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
3461 dma->dma_vaddr = NULL;
3463 bus_dma_tag_destroy(dma->dma_tag);
3464 dma->dma_tag = NULL;
3468 /*********************************************************************
3470 * Allocate memory for the transmit and receive rings, and then
3471 * the descriptors associated with each, called only once at attach.
3473 **********************************************************************/
3475 em_allocate_queues(struct adapter *adapter)
3477 device_t dev = adapter->dev;
3478 struct tx_ring *txr = NULL;
3479 struct rx_ring *rxr = NULL;
3480 int rsize, tsize, error = E1000_SUCCESS;
3481 int txconf = 0, rxconf = 0;
3484 /* Allocate the TX ring struct memory */
3485 if (!(adapter->tx_rings =
3486 (struct tx_ring *) malloc(sizeof(struct tx_ring) *
3487 adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
3488 device_printf(dev, "Unable to allocate TX ring memory\n");
3493 /* Now allocate the RX */
3494 if (!(adapter->rx_rings =
3495 (struct rx_ring *) malloc(sizeof(struct rx_ring) *
3496 adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
3497 device_printf(dev, "Unable to allocate RX ring memory\n");
3502 tsize = roundup2(adapter->num_tx_desc *
3503 sizeof(struct e1000_tx_desc), EM_DBA_ALIGN);
3505 * Now set up the TX queues, txconf is needed to handle the
3506 * possibility that things fail midcourse and we need to
3507 * undo memory gracefully
3509 for (int i = 0; i < adapter->num_queues; i++, txconf++) {
3510 /* Set up some basics */
3511 txr = &adapter->tx_rings[i];
3512 txr->adapter = adapter;
3515 /* Initialize the TX lock */
3516 snprintf(txr->mtx_name, sizeof(txr->mtx_name), "%s:tx(%d)",
3517 device_get_nameunit(dev), txr->me);
3518 mtx_init(&txr->tx_mtx, txr->mtx_name, NULL, MTX_DEF);
3520 if (em_dma_malloc(adapter, tsize,
3521 &txr->txdma, BUS_DMA_NOWAIT)) {
3523 "Unable to allocate TX Descriptor memory\n");
3527 txr->tx_base = (struct e1000_tx_desc *)txr->txdma.dma_vaddr;
3528 bzero((void *)txr->tx_base, tsize);
3530 if (em_allocate_transmit_buffers(txr)) {
3532 "Critical Failure setting up transmit buffers\n");
3536 #if __FreeBSD_version >= 800000
3537 /* Allocate a buf ring */
3538 txr->br = buf_ring_alloc(4096, M_DEVBUF,
3539 M_WAITOK, &txr->tx_mtx);
3544 * Next the RX queues...
3546 rsize = roundup2(adapter->num_rx_desc *
3547 sizeof(union e1000_rx_desc_extended), EM_DBA_ALIGN);
3548 for (int i = 0; i < adapter->num_queues; i++, rxconf++) {
3549 rxr = &adapter->rx_rings[i];
3550 rxr->adapter = adapter;
3553 /* Initialize the RX lock */
3554 snprintf(rxr->mtx_name, sizeof(rxr->mtx_name), "%s:rx(%d)",
3555 device_get_nameunit(dev), txr->me);
3556 mtx_init(&rxr->rx_mtx, rxr->mtx_name, NULL, MTX_DEF);
3558 if (em_dma_malloc(adapter, rsize,
3559 &rxr->rxdma, BUS_DMA_NOWAIT)) {
3561 "Unable to allocate RxDescriptor memory\n");
3565 rxr->rx_base = (union e1000_rx_desc_extended *)rxr->rxdma.dma_vaddr;
3566 bzero((void *)rxr->rx_base, rsize);
3568 /* Allocate receive buffers for the ring*/
3569 if (em_allocate_receive_buffers(rxr)) {
3571 "Critical Failure setting up receive buffers\n");
3580 for (rxr = adapter->rx_rings; rxconf > 0; rxr++, rxconf--)
3581 em_dma_free(adapter, &rxr->rxdma);
3583 for (txr = adapter->tx_rings; txconf > 0; txr++, txconf--)
3584 em_dma_free(adapter, &txr->txdma);
3585 free(adapter->rx_rings, M_DEVBUF);
3587 #if __FreeBSD_version >= 800000
3588 buf_ring_free(txr->br, M_DEVBUF);
3590 free(adapter->tx_rings, M_DEVBUF);
3596 /*********************************************************************
3598 * Allocate memory for tx_buffer structures. The tx_buffer stores all
3599 * the information needed to transmit a packet on the wire. This is
3600 * called only once at attach, setup is done every reset.
3602 **********************************************************************/
3604 em_allocate_transmit_buffers(struct tx_ring *txr)
3606 struct adapter *adapter = txr->adapter;
3607 device_t dev = adapter->dev;
3608 struct em_txbuffer *txbuf;
3612 * Setup DMA descriptor areas.
3614 if ((error = bus_dma_tag_create(bus_get_dma_tag(dev),
3615 1, 0, /* alignment, bounds */
3616 BUS_SPACE_MAXADDR, /* lowaddr */
3617 BUS_SPACE_MAXADDR, /* highaddr */
3618 NULL, NULL, /* filter, filterarg */
3619 EM_TSO_SIZE, /* maxsize */
3620 EM_MAX_SCATTER, /* nsegments */
3621 PAGE_SIZE, /* maxsegsize */
3623 NULL, /* lockfunc */
3624 NULL, /* lockfuncarg */
3626 device_printf(dev,"Unable to allocate TX DMA tag\n");
3630 if (!(txr->tx_buffers =
3631 (struct em_txbuffer *) malloc(sizeof(struct em_txbuffer) *
3632 adapter->num_tx_desc, M_DEVBUF, M_NOWAIT | M_ZERO))) {
3633 device_printf(dev, "Unable to allocate tx_buffer memory\n");
3638 /* Create the descriptor buffer dma maps */
3639 txbuf = txr->tx_buffers;
3640 for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) {
3641 error = bus_dmamap_create(txr->txtag, 0, &txbuf->map);
3643 device_printf(dev, "Unable to create TX DMA map\n");
3650 /* We free all, it handles case where we are in the middle */
3651 em_free_transmit_structures(adapter);
3655 /*********************************************************************
3657 * Initialize a transmit ring.
3659 **********************************************************************/
3661 em_setup_transmit_ring(struct tx_ring *txr)
3663 struct adapter *adapter = txr->adapter;
3664 struct em_txbuffer *txbuf;
3667 struct netmap_adapter *na = NA(adapter->ifp);
3668 struct netmap_slot *slot;
3669 #endif /* DEV_NETMAP */
3671 /* Clear the old descriptor contents */
3674 slot = netmap_reset(na, NR_TX, txr->me, 0);
3675 #endif /* DEV_NETMAP */
3677 bzero((void *)txr->tx_base,
3678 (sizeof(struct e1000_tx_desc)) * adapter->num_tx_desc);
3680 txr->next_avail_desc = 0;
3681 txr->next_to_clean = 0;
3683 /* Free any existing tx buffers. */
3684 txbuf = txr->tx_buffers;
3685 for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) {
3686 if (txbuf->m_head != NULL) {
3687 bus_dmamap_sync(txr->txtag, txbuf->map,
3688 BUS_DMASYNC_POSTWRITE);
3689 bus_dmamap_unload(txr->txtag, txbuf->map);
3690 m_freem(txbuf->m_head);
3691 txbuf->m_head = NULL;
3695 int si = netmap_idx_n2k(&na->tx_rings[txr->me], i);
3699 addr = PNMB(na, slot + si, &paddr);
3700 txr->tx_base[i].buffer_addr = htole64(paddr);
3701 /* reload the map for netmap mode */
3702 netmap_load_map(na, txr->txtag, txbuf->map, addr);
3704 #endif /* DEV_NETMAP */
3706 /* clear the watch index */
3707 txbuf->next_eop = -1;
3710 /* Set number of descriptors available */
3711 txr->tx_avail = adapter->num_tx_desc;
3712 txr->busy = EM_TX_IDLE;
3714 /* Clear checksum offload context. */
3715 txr->last_hw_offload = 0;
3716 txr->last_hw_ipcss = 0;
3717 txr->last_hw_ipcso = 0;
3718 txr->last_hw_tucss = 0;
3719 txr->last_hw_tucso = 0;
3721 bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3722 BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
3726 /*********************************************************************
3728 * Initialize all transmit rings.
3730 **********************************************************************/
3732 em_setup_transmit_structures(struct adapter *adapter)
3734 struct tx_ring *txr = adapter->tx_rings;
3736 for (int i = 0; i < adapter->num_queues; i++, txr++)
3737 em_setup_transmit_ring(txr);
3742 /*********************************************************************
3744 * Enable transmit unit.
3746 **********************************************************************/
3748 em_initialize_transmit_unit(struct adapter *adapter)
3750 struct tx_ring *txr = adapter->tx_rings;
3751 struct e1000_hw *hw = &adapter->hw;
3752 u32 tctl, txdctl = 0, tarc, tipg = 0;
3754 INIT_DEBUGOUT("em_initialize_transmit_unit: begin");
3756 for (int i = 0; i < adapter->num_queues; i++, txr++) {
3757 u64 bus_addr = txr->txdma.dma_paddr;
3758 /* Base and Len of TX Ring */
3759 E1000_WRITE_REG(hw, E1000_TDLEN(i),
3760 adapter->num_tx_desc * sizeof(struct e1000_tx_desc));
3761 E1000_WRITE_REG(hw, E1000_TDBAH(i),
3762 (u32)(bus_addr >> 32));
3763 E1000_WRITE_REG(hw, E1000_TDBAL(i),
3765 /* Init the HEAD/TAIL indices */
3766 E1000_WRITE_REG(hw, E1000_TDT(i), 0);
3767 E1000_WRITE_REG(hw, E1000_TDH(i), 0);
3769 HW_DEBUGOUT2("Base = %x, Length = %x\n",
3770 E1000_READ_REG(&adapter->hw, E1000_TDBAL(i)),
3771 E1000_READ_REG(&adapter->hw, E1000_TDLEN(i)));
3773 txr->busy = EM_TX_IDLE;
3774 txdctl = 0; /* clear txdctl */
3775 txdctl |= 0x1f; /* PTHRESH */
3776 txdctl |= 1 << 8; /* HTHRESH */
3777 txdctl |= 1 << 16;/* WTHRESH */
3778 txdctl |= 1 << 22; /* Reserved bit 22 must always be 1 */
3779 txdctl |= E1000_TXDCTL_GRAN;
3780 txdctl |= 1 << 25; /* LWTHRESH */
3782 E1000_WRITE_REG(hw, E1000_TXDCTL(i), txdctl);
3785 /* Set the default values for the Tx Inter Packet Gap timer */
3786 switch (adapter->hw.mac.type) {
3787 case e1000_80003es2lan:
3788 tipg = DEFAULT_82543_TIPG_IPGR1;
3789 tipg |= DEFAULT_80003ES2LAN_TIPG_IPGR2 <<
3790 E1000_TIPG_IPGR2_SHIFT;
3793 if ((adapter->hw.phy.media_type == e1000_media_type_fiber) ||
3794 (adapter->hw.phy.media_type ==
3795 e1000_media_type_internal_serdes))
3796 tipg = DEFAULT_82543_TIPG_IPGT_FIBER;
3798 tipg = DEFAULT_82543_TIPG_IPGT_COPPER;
3799 tipg |= DEFAULT_82543_TIPG_IPGR1 << E1000_TIPG_IPGR1_SHIFT;
3800 tipg |= DEFAULT_82543_TIPG_IPGR2 << E1000_TIPG_IPGR2_SHIFT;
3803 E1000_WRITE_REG(&adapter->hw, E1000_TIPG, tipg);
3804 E1000_WRITE_REG(&adapter->hw, E1000_TIDV, adapter->tx_int_delay.value);
3806 if(adapter->hw.mac.type >= e1000_82540)
3807 E1000_WRITE_REG(&adapter->hw, E1000_TADV,
3808 adapter->tx_abs_int_delay.value);
3810 if ((adapter->hw.mac.type == e1000_82571) ||
3811 (adapter->hw.mac.type == e1000_82572)) {
3812 tarc = E1000_READ_REG(&adapter->hw, E1000_TARC(0));
3813 tarc |= TARC_SPEED_MODE_BIT;
3814 E1000_WRITE_REG(&adapter->hw, E1000_TARC(0), tarc);
3815 } else if (adapter->hw.mac.type == e1000_80003es2lan) {
3816 /* errata: program both queues to unweighted RR */
3817 tarc = E1000_READ_REG(&adapter->hw, E1000_TARC(0));
3819 E1000_WRITE_REG(&adapter->hw, E1000_TARC(0), tarc);
3820 tarc = E1000_READ_REG(&adapter->hw, E1000_TARC(1));
3822 E1000_WRITE_REG(&adapter->hw, E1000_TARC(1), tarc);
3823 } else if (adapter->hw.mac.type == e1000_82574) {
3824 tarc = E1000_READ_REG(&adapter->hw, E1000_TARC(0));
3825 tarc |= TARC_ERRATA_BIT;
3826 if ( adapter->num_queues > 1) {
3827 tarc |= (TARC_COMPENSATION_MODE | TARC_MQ_FIX);
3828 E1000_WRITE_REG(&adapter->hw, E1000_TARC(0), tarc);
3829 E1000_WRITE_REG(&adapter->hw, E1000_TARC(1), tarc);
3831 E1000_WRITE_REG(&adapter->hw, E1000_TARC(0), tarc);
3834 adapter->txd_cmd = E1000_TXD_CMD_IFCS;
3835 if (adapter->tx_int_delay.value > 0)
3836 adapter->txd_cmd |= E1000_TXD_CMD_IDE;
3838 /* Program the Transmit Control Register */
3839 tctl = E1000_READ_REG(&adapter->hw, E1000_TCTL);
3840 tctl &= ~E1000_TCTL_CT;
3841 tctl |= (E1000_TCTL_PSP | E1000_TCTL_RTLC | E1000_TCTL_EN |
3842 (E1000_COLLISION_THRESHOLD << E1000_CT_SHIFT));
3844 if (adapter->hw.mac.type >= e1000_82571)
3845 tctl |= E1000_TCTL_MULR;
3847 /* This write will effectively turn on the transmit unit. */
3848 E1000_WRITE_REG(&adapter->hw, E1000_TCTL, tctl);
3850 /* SPT and KBL errata workarounds */
3851 if (hw->mac.type == e1000_pch_spt) {
3853 reg = E1000_READ_REG(hw, E1000_IOSFPC);
3854 reg |= E1000_RCTL_RDMTS_HEX;
3855 E1000_WRITE_REG(hw, E1000_IOSFPC, reg);
3856 /* i218-i219 Specification Update 1.5.4.5 */
3857 reg = E1000_READ_REG(hw, E1000_TARC(0));
3858 reg &= ~E1000_TARC0_CB_MULTIQ_3_REQ;
3859 reg |= E1000_TARC0_CB_MULTIQ_2_REQ;
3860 E1000_WRITE_REG(hw, E1000_TARC(0), reg);
3865 /*********************************************************************
3867 * Free all transmit rings.
3869 **********************************************************************/
3871 em_free_transmit_structures(struct adapter *adapter)
3873 struct tx_ring *txr = adapter->tx_rings;
3875 for (int i = 0; i < adapter->num_queues; i++, txr++) {
3877 em_free_transmit_buffers(txr);
3878 em_dma_free(adapter, &txr->txdma);
3880 EM_TX_LOCK_DESTROY(txr);
3883 free(adapter->tx_rings, M_DEVBUF);
3886 /*********************************************************************
3888 * Free transmit ring related data structures.
3890 **********************************************************************/
3892 em_free_transmit_buffers(struct tx_ring *txr)
3894 struct adapter *adapter = txr->adapter;
3895 struct em_txbuffer *txbuf;
3897 INIT_DEBUGOUT("free_transmit_ring: begin");
3899 if (txr->tx_buffers == NULL)
3902 for (int i = 0; i < adapter->num_tx_desc; i++) {
3903 txbuf = &txr->tx_buffers[i];
3904 if (txbuf->m_head != NULL) {
3905 bus_dmamap_sync(txr->txtag, txbuf->map,
3906 BUS_DMASYNC_POSTWRITE);
3907 bus_dmamap_unload(txr->txtag,
3909 m_freem(txbuf->m_head);
3910 txbuf->m_head = NULL;
3911 if (txbuf->map != NULL) {
3912 bus_dmamap_destroy(txr->txtag,
3916 } else if (txbuf->map != NULL) {
3917 bus_dmamap_unload(txr->txtag,
3919 bus_dmamap_destroy(txr->txtag,
3924 #if __FreeBSD_version >= 800000
3925 if (txr->br != NULL)
3926 buf_ring_free(txr->br, M_DEVBUF);
3928 if (txr->tx_buffers != NULL) {
3929 free(txr->tx_buffers, M_DEVBUF);
3930 txr->tx_buffers = NULL;
3932 if (txr->txtag != NULL) {
3933 bus_dma_tag_destroy(txr->txtag);
3940 /*********************************************************************
3941 * The offload context is protocol specific (TCP/UDP) and thus
3942 * only needs to be set when the protocol changes. The occasion
3943 * of a context change can be a performance detriment, and
3944 * might be better just disabled. The reason arises in the way
3945 * in which the controller supports pipelined requests from the
3946 * Tx data DMA. Up to four requests can be pipelined, and they may
3947 * belong to the same packet or to multiple packets. However all
3948 * requests for one packet are issued before a request is issued
3949 * for a subsequent packet and if a request for the next packet
3950 * requires a context change, that request will be stalled
3951 * until the previous request completes. This means setting up
3952 * a new context effectively disables pipelined Tx data DMA which
3953 * in turn greatly slow down performance to send small sized
3955 **********************************************************************/
3957 em_transmit_checksum_setup(struct tx_ring *txr, struct mbuf *mp, int ip_off,
3958 struct ip *ip, u32 *txd_upper, u32 *txd_lower)
3960 struct adapter *adapter = txr->adapter;
3961 struct e1000_context_desc *TXD = NULL;
3962 struct em_txbuffer *tx_buffer;
3966 u8 ipcso, ipcss, tucso, tucss;
3968 ipcss = ipcso = tucss = tucso = 0;
3969 hdr_len = ip_off + (ip->ip_hl << 2);
3970 cur = txr->next_avail_desc;
3972 /* Setup of IP header checksum. */
3973 if (mp->m_pkthdr.csum_flags & CSUM_IP) {
3974 *txd_upper |= E1000_TXD_POPTS_IXSM << 8;
3977 ipcso = ip_off + offsetof(struct ip, ip_sum);
3979 * Start offset for header checksum calculation.
3980 * End offset for header checksum calculation.
3981 * Offset of place to put the checksum.
3983 TXD = (struct e1000_context_desc *)&txr->tx_base[cur];
3984 TXD->lower_setup.ip_fields.ipcss = ipcss;
3985 TXD->lower_setup.ip_fields.ipcse = htole16(hdr_len);
3986 TXD->lower_setup.ip_fields.ipcso = ipcso;
3987 cmd |= E1000_TXD_CMD_IP;
3990 if (mp->m_pkthdr.csum_flags & CSUM_TCP) {
3991 *txd_lower = E1000_TXD_CMD_DEXT | E1000_TXD_DTYP_D;
3992 *txd_upper |= E1000_TXD_POPTS_TXSM << 8;
3993 offload |= CSUM_TCP;
3995 tucso = hdr_len + offsetof(struct tcphdr, th_sum);
3997 * The 82574L can only remember the *last* context used
3998 * regardless of queue that it was use for. We cannot reuse
3999 * contexts on this hardware platform and must generate a new
4000 * context every time. 82574L hardware spec, section 7.2.6,
4003 if (adapter->num_queues < 2) {
4005 * Setting up new checksum offload context for every
4006 * frames takes a lot of processing time for hardware.
4007 * This also reduces performance a lot for small sized
4008 * frames so avoid it if driver can use previously
4009 * configured checksum offload context.
4011 if (txr->last_hw_offload == offload) {
4012 if (offload & CSUM_IP) {
4013 if (txr->last_hw_ipcss == ipcss &&
4014 txr->last_hw_ipcso == ipcso &&
4015 txr->last_hw_tucss == tucss &&
4016 txr->last_hw_tucso == tucso)
4019 if (txr->last_hw_tucss == tucss &&
4020 txr->last_hw_tucso == tucso)
4024 txr->last_hw_offload = offload;
4025 txr->last_hw_tucss = tucss;
4026 txr->last_hw_tucso = tucso;
4029 * Start offset for payload checksum calculation.
4030 * End offset for payload checksum calculation.
4031 * Offset of place to put the checksum.
4033 TXD = (struct e1000_context_desc *)&txr->tx_base[cur];
4034 TXD->upper_setup.tcp_fields.tucss = hdr_len;
4035 TXD->upper_setup.tcp_fields.tucse = htole16(0);
4036 TXD->upper_setup.tcp_fields.tucso = tucso;
4037 cmd |= E1000_TXD_CMD_TCP;
4038 } else if (mp->m_pkthdr.csum_flags & CSUM_UDP) {
4039 *txd_lower = E1000_TXD_CMD_DEXT | E1000_TXD_DTYP_D;
4040 *txd_upper |= E1000_TXD_POPTS_TXSM << 8;
4042 tucso = hdr_len + offsetof(struct udphdr, uh_sum);
4044 * The 82574L can only remember the *last* context used
4045 * regardless of queue that it was use for. We cannot reuse
4046 * contexts on this hardware platform and must generate a new
4047 * context every time. 82574L hardware spec, section 7.2.6,
4050 if (adapter->num_queues < 2) {
4052 * Setting up new checksum offload context for every
4053 * frames takes a lot of processing time for hardware.
4054 * This also reduces performance a lot for small sized
4055 * frames so avoid it if driver can use previously
4056 * configured checksum offload context.
4058 if (txr->last_hw_offload == offload) {
4059 if (offload & CSUM_IP) {
4060 if (txr->last_hw_ipcss == ipcss &&
4061 txr->last_hw_ipcso == ipcso &&
4062 txr->last_hw_tucss == tucss &&
4063 txr->last_hw_tucso == tucso)
4066 if (txr->last_hw_tucss == tucss &&
4067 txr->last_hw_tucso == tucso)
4071 txr->last_hw_offload = offload;
4072 txr->last_hw_tucss = tucss;
4073 txr->last_hw_tucso = tucso;
4076 * Start offset for header checksum calculation.
4077 * End offset for header checksum calculation.
4078 * Offset of place to put the checksum.
4080 TXD = (struct e1000_context_desc *)&txr->tx_base[cur];
4081 TXD->upper_setup.tcp_fields.tucss = tucss;
4082 TXD->upper_setup.tcp_fields.tucse = htole16(0);
4083 TXD->upper_setup.tcp_fields.tucso = tucso;
4086 if (offload & CSUM_IP) {
4087 txr->last_hw_ipcss = ipcss;
4088 txr->last_hw_ipcso = ipcso;
4091 TXD->tcp_seg_setup.data = htole32(0);
4092 TXD->cmd_and_length =
4093 htole32(adapter->txd_cmd | E1000_TXD_CMD_DEXT | cmd);
4094 tx_buffer = &txr->tx_buffers[cur];
4095 tx_buffer->m_head = NULL;
4096 tx_buffer->next_eop = -1;
4098 if (++cur == adapter->num_tx_desc)
4102 txr->next_avail_desc = cur;
4106 /**********************************************************************
4108 * Setup work for hardware segmentation offload (TSO)
4110 **********************************************************************/
4112 em_tso_setup(struct tx_ring *txr, struct mbuf *mp, int ip_off,
4113 struct ip *ip, struct tcphdr *tp, u32 *txd_upper, u32 *txd_lower)
4115 struct adapter *adapter = txr->adapter;
4116 struct e1000_context_desc *TXD;
4117 struct em_txbuffer *tx_buffer;
4121 * In theory we can use the same TSO context if and only if
4122 * frame is the same type(IP/TCP) and the same MSS. However
4123 * checking whether a frame has the same IP/TCP structure is
4124 * hard thing so just ignore that and always restablish a
4127 hdr_len = ip_off + (ip->ip_hl << 2) + (tp->th_off << 2);
4128 *txd_lower = (E1000_TXD_CMD_DEXT | /* Extended descr type */
4129 E1000_TXD_DTYP_D | /* Data descr type */
4130 E1000_TXD_CMD_TSE); /* Do TSE on this packet */
4132 /* IP and/or TCP header checksum calculation and insertion. */
4133 *txd_upper = (E1000_TXD_POPTS_IXSM | E1000_TXD_POPTS_TXSM) << 8;
4135 cur = txr->next_avail_desc;
4136 tx_buffer = &txr->tx_buffers[cur];
4137 TXD = (struct e1000_context_desc *) &txr->tx_base[cur];
4140 * Start offset for header checksum calculation.
4141 * End offset for header checksum calculation.
4142 * Offset of place put the checksum.
4144 TXD->lower_setup.ip_fields.ipcss = ip_off;
4145 TXD->lower_setup.ip_fields.ipcse =
4146 htole16(ip_off + (ip->ip_hl << 2) - 1);
4147 TXD->lower_setup.ip_fields.ipcso = ip_off + offsetof(struct ip, ip_sum);
4149 * Start offset for payload checksum calculation.
4150 * End offset for payload checksum calculation.
4151 * Offset of place to put the checksum.
4153 TXD->upper_setup.tcp_fields.tucss = ip_off + (ip->ip_hl << 2);
4154 TXD->upper_setup.tcp_fields.tucse = 0;
4155 TXD->upper_setup.tcp_fields.tucso =
4156 ip_off + (ip->ip_hl << 2) + offsetof(struct tcphdr, th_sum);
4158 * Payload size per packet w/o any headers.
4159 * Length of all headers up to payload.
4161 TXD->tcp_seg_setup.fields.mss = htole16(mp->m_pkthdr.tso_segsz);
4162 TXD->tcp_seg_setup.fields.hdr_len = hdr_len;
4164 TXD->cmd_and_length = htole32(adapter->txd_cmd |
4165 E1000_TXD_CMD_DEXT | /* Extended descr */
4166 E1000_TXD_CMD_TSE | /* TSE context */
4167 E1000_TXD_CMD_IP | /* Do IP csum */
4168 E1000_TXD_CMD_TCP | /* Do TCP checksum */
4169 (mp->m_pkthdr.len - (hdr_len))); /* Total len */
4171 tx_buffer->m_head = NULL;
4172 tx_buffer->next_eop = -1;
4174 if (++cur == adapter->num_tx_desc)
4178 txr->next_avail_desc = cur;
4183 /**********************************************************************
4185 * Examine each tx_buffer in the used queue. If the hardware is done
4186 * processing the packet then free associated resources. The
4187 * tx_buffer is put back on the free queue.
4189 **********************************************************************/
4191 em_txeof(struct tx_ring *txr)
4193 struct adapter *adapter = txr->adapter;
4194 int first, last, done, processed;
4195 struct em_txbuffer *tx_buffer;
4196 struct e1000_tx_desc *tx_desc, *eop_desc;
4197 struct ifnet *ifp = adapter->ifp;
4199 EM_TX_LOCK_ASSERT(txr);
4201 if (netmap_tx_irq(ifp, txr->me))
4203 #endif /* DEV_NETMAP */
4205 /* No work, make sure hang detection is disabled */
4206 if (txr->tx_avail == adapter->num_tx_desc) {
4207 txr->busy = EM_TX_IDLE;
4212 first = txr->next_to_clean;
4213 tx_desc = &txr->tx_base[first];
4214 tx_buffer = &txr->tx_buffers[first];
4215 last = tx_buffer->next_eop;
4216 eop_desc = &txr->tx_base[last];
4219 * What this does is get the index of the
4220 * first descriptor AFTER the EOP of the
4221 * first packet, that way we can do the
4222 * simple comparison on the inner while loop.
4224 if (++last == adapter->num_tx_desc)
4228 bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
4229 BUS_DMASYNC_POSTREAD);
4231 while (eop_desc->upper.fields.status & E1000_TXD_STAT_DD) {
4232 /* We clean the range of the packet */
4233 while (first != done) {
4234 tx_desc->upper.data = 0;
4235 tx_desc->lower.data = 0;
4236 tx_desc->buffer_addr = 0;
4240 if (tx_buffer->m_head) {
4241 bus_dmamap_sync(txr->txtag,
4243 BUS_DMASYNC_POSTWRITE);
4244 bus_dmamap_unload(txr->txtag,
4246 m_freem(tx_buffer->m_head);
4247 tx_buffer->m_head = NULL;
4249 tx_buffer->next_eop = -1;
4251 if (++first == adapter->num_tx_desc)
4254 tx_buffer = &txr->tx_buffers[first];
4255 tx_desc = &txr->tx_base[first];
4258 /* See if we can continue to the next packet */
4259 last = tx_buffer->next_eop;
4261 eop_desc = &txr->tx_base[last];
4262 /* Get new done point */
4263 if (++last == adapter->num_tx_desc) last = 0;
4268 bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
4269 BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
4271 txr->next_to_clean = first;
4274 ** Hang detection: we know there's work outstanding
4275 ** or the entry return would have been taken, so no
4276 ** descriptor processed here indicates a potential hang.
4277 ** The local timer will examine this and do a reset if needed.
4279 if (processed == 0) {
4280 if (txr->busy != EM_TX_HUNG)
4282 } else /* At least one descriptor was cleaned */
4283 txr->busy = EM_TX_BUSY; /* note this clears HUNG */
4286 * If we have a minimum free, clear IFF_DRV_OACTIVE
4287 * to tell the stack that it is OK to send packets.
4288 * Notice that all writes of OACTIVE happen under the
4289 * TX lock which, with a single queue, guarantees
4292 if (txr->tx_avail >= EM_MAX_SCATTER) {
4293 ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
4296 /* Disable hang detection if all clean */
4297 if (txr->tx_avail == adapter->num_tx_desc)
4298 txr->busy = EM_TX_IDLE;
4301 /*********************************************************************
4303 * Refresh RX descriptor mbufs from system mbuf buffer pool.
4305 **********************************************************************/
4307 em_refresh_mbufs(struct rx_ring *rxr, int limit)
4309 struct adapter *adapter = rxr->adapter;
4311 bus_dma_segment_t segs;
4312 struct em_rxbuffer *rxbuf;
4313 int i, j, error, nsegs;
4314 bool cleaned = FALSE;
4316 i = j = rxr->next_to_refresh;
4318 ** Get one descriptor beyond
4319 ** our work mark to control
4322 if (++j == adapter->num_rx_desc)
4325 while (j != limit) {
4326 rxbuf = &rxr->rx_buffers[i];
4327 if (rxbuf->m_head == NULL) {
4328 m = m_getjcl(M_NOWAIT, MT_DATA,
4329 M_PKTHDR, adapter->rx_mbuf_sz);
4331 ** If we have a temporary resource shortage
4332 ** that causes a failure, just abort refresh
4333 ** for now, we will return to this point when
4334 ** reinvoked from em_rxeof.
4341 m->m_len = m->m_pkthdr.len = adapter->rx_mbuf_sz;
4342 m->m_flags |= M_PKTHDR;
4343 m->m_data = m->m_ext.ext_buf;
4345 /* Use bus_dma machinery to setup the memory mapping */
4346 error = bus_dmamap_load_mbuf_sg(rxr->rxtag, rxbuf->map,
4347 m, &segs, &nsegs, BUS_DMA_NOWAIT);
4349 printf("Refresh mbufs: hdr dmamap load"
4350 " failure - %d\n", error);
4352 rxbuf->m_head = NULL;
4356 rxbuf->paddr = segs.ds_addr;
4357 bus_dmamap_sync(rxr->rxtag,
4358 rxbuf->map, BUS_DMASYNC_PREREAD);
4359 em_setup_rxdesc(&rxr->rx_base[i], rxbuf);
4362 i = j; /* Next is precalulated for us */
4363 rxr->next_to_refresh = i;
4364 /* Calculate next controlling index */
4365 if (++j == adapter->num_rx_desc)
4370 ** Update the tail pointer only if,
4371 ** and as far as we have refreshed.
4374 E1000_WRITE_REG(&adapter->hw,
4375 E1000_RDT(rxr->me), rxr->next_to_refresh);
4381 /*********************************************************************
4383 * Allocate memory for rx_buffer structures. Since we use one
4384 * rx_buffer per received packet, the maximum number of rx_buffer's
4385 * that we'll need is equal to the number of receive descriptors
4386 * that we've allocated.
4388 **********************************************************************/
4390 em_allocate_receive_buffers(struct rx_ring *rxr)
4392 struct adapter *adapter = rxr->adapter;
4393 device_t dev = adapter->dev;
4394 struct em_rxbuffer *rxbuf;
4397 rxr->rx_buffers = malloc(sizeof(struct em_rxbuffer) *
4398 adapter->num_rx_desc, M_DEVBUF, M_NOWAIT | M_ZERO);
4399 if (rxr->rx_buffers == NULL) {
4400 device_printf(dev, "Unable to allocate rx_buffer memory\n");
4404 error = bus_dma_tag_create(bus_get_dma_tag(dev), /* parent */
4405 1, 0, /* alignment, bounds */
4406 BUS_SPACE_MAXADDR, /* lowaddr */
4407 BUS_SPACE_MAXADDR, /* highaddr */
4408 NULL, NULL, /* filter, filterarg */
4409 MJUM9BYTES, /* maxsize */
4411 MJUM9BYTES, /* maxsegsize */
4413 NULL, /* lockfunc */
4417 device_printf(dev, "%s: bus_dma_tag_create failed %d\n",
4422 rxbuf = rxr->rx_buffers;
4423 for (int i = 0; i < adapter->num_rx_desc; i++, rxbuf++) {
4424 rxbuf = &rxr->rx_buffers[i];
4425 error = bus_dmamap_create(rxr->rxtag, 0, &rxbuf->map);
4427 device_printf(dev, "%s: bus_dmamap_create failed: %d\n",
4436 em_free_receive_structures(adapter);
4441 /*********************************************************************
4443 * Initialize a receive ring and its buffers.
4445 **********************************************************************/
4447 em_setup_receive_ring(struct rx_ring *rxr)
4449 struct adapter *adapter = rxr->adapter;
4450 struct em_rxbuffer *rxbuf;
4451 bus_dma_segment_t seg[1];
4452 int rsize, nsegs, error = 0;
4454 struct netmap_adapter *na = NA(adapter->ifp);
4455 struct netmap_slot *slot;
4459 /* Clear the ring contents */
4461 rsize = roundup2(adapter->num_rx_desc *
4462 sizeof(union e1000_rx_desc_extended), EM_DBA_ALIGN);
4463 bzero((void *)rxr->rx_base, rsize);
4465 slot = netmap_reset(na, NR_RX, 0, 0);
4469 ** Free current RX buffer structs and their mbufs
4471 for (int i = 0; i < adapter->num_rx_desc; i++) {
4472 rxbuf = &rxr->rx_buffers[i];
4473 if (rxbuf->m_head != NULL) {
4474 bus_dmamap_sync(rxr->rxtag, rxbuf->map,
4475 BUS_DMASYNC_POSTREAD);
4476 bus_dmamap_unload(rxr->rxtag, rxbuf->map);
4477 m_freem(rxbuf->m_head);
4478 rxbuf->m_head = NULL; /* mark as freed */
4482 /* Now replenish the mbufs */
4483 for (int j = 0; j != adapter->num_rx_desc; ++j) {
4484 rxbuf = &rxr->rx_buffers[j];
4487 int si = netmap_idx_n2k(&na->rx_rings[rxr->me], j);
4491 addr = PNMB(na, slot + si, &paddr);
4492 netmap_load_map(na, rxr->rxtag, rxbuf->map, addr);
4493 rxbuf->paddr = paddr;
4494 em_setup_rxdesc(&rxr->rx_base[j], rxbuf);
4497 #endif /* DEV_NETMAP */
4498 rxbuf->m_head = m_getjcl(M_NOWAIT, MT_DATA,
4499 M_PKTHDR, adapter->rx_mbuf_sz);
4500 if (rxbuf->m_head == NULL) {
4504 rxbuf->m_head->m_len = adapter->rx_mbuf_sz;
4505 rxbuf->m_head->m_flags &= ~M_HASFCS; /* we strip it */
4506 rxbuf->m_head->m_pkthdr.len = adapter->rx_mbuf_sz;
4508 /* Get the memory mapping */
4509 error = bus_dmamap_load_mbuf_sg(rxr->rxtag,
4510 rxbuf->map, rxbuf->m_head, seg,
4511 &nsegs, BUS_DMA_NOWAIT);
4513 m_freem(rxbuf->m_head);
4514 rxbuf->m_head = NULL;
4517 bus_dmamap_sync(rxr->rxtag,
4518 rxbuf->map, BUS_DMASYNC_PREREAD);
4520 rxbuf->paddr = seg[0].ds_addr;
4521 em_setup_rxdesc(&rxr->rx_base[j], rxbuf);
4523 rxr->next_to_check = 0;
4524 rxr->next_to_refresh = 0;
4525 bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
4526 BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
4533 /*********************************************************************
4535 * Initialize all receive rings.
4537 **********************************************************************/
4539 em_setup_receive_structures(struct adapter *adapter)
4541 struct rx_ring *rxr = adapter->rx_rings;
4544 for (q = 0; q < adapter->num_queues; q++, rxr++)
4545 if (em_setup_receive_ring(rxr))
4551 * Free RX buffers allocated so far, we will only handle
4552 * the rings that completed, the failing case will have
4553 * cleaned up for itself. 'q' failed, so its the terminus.
4555 for (int i = 0; i < q; ++i) {
4556 rxr = &adapter->rx_rings[i];
4557 for (int n = 0; n < adapter->num_rx_desc; n++) {
4558 struct em_rxbuffer *rxbuf;
4559 rxbuf = &rxr->rx_buffers[n];
4560 if (rxbuf->m_head != NULL) {
4561 bus_dmamap_sync(rxr->rxtag, rxbuf->map,
4562 BUS_DMASYNC_POSTREAD);
4563 bus_dmamap_unload(rxr->rxtag, rxbuf->map);
4564 m_freem(rxbuf->m_head);
4565 rxbuf->m_head = NULL;
4568 rxr->next_to_check = 0;
4569 rxr->next_to_refresh = 0;
4575 /*********************************************************************
4577 * Free all receive rings.
4579 **********************************************************************/
4581 em_free_receive_structures(struct adapter *adapter)
4583 struct rx_ring *rxr = adapter->rx_rings;
4585 for (int i = 0; i < adapter->num_queues; i++, rxr++) {
4586 em_free_receive_buffers(rxr);
4587 /* Free the ring memory as well */
4588 em_dma_free(adapter, &rxr->rxdma);
4589 EM_RX_LOCK_DESTROY(rxr);
4592 free(adapter->rx_rings, M_DEVBUF);
4596 /*********************************************************************
4598 * Free receive ring data structures
4600 **********************************************************************/
4602 em_free_receive_buffers(struct rx_ring *rxr)
4604 struct adapter *adapter = rxr->adapter;
4605 struct em_rxbuffer *rxbuf = NULL;
4607 INIT_DEBUGOUT("free_receive_buffers: begin");
4609 if (rxr->rx_buffers != NULL) {
4610 for (int i = 0; i < adapter->num_rx_desc; i++) {
4611 rxbuf = &rxr->rx_buffers[i];
4612 if (rxbuf->map != NULL) {
4613 bus_dmamap_sync(rxr->rxtag, rxbuf->map,
4614 BUS_DMASYNC_POSTREAD);
4615 bus_dmamap_unload(rxr->rxtag, rxbuf->map);
4616 bus_dmamap_destroy(rxr->rxtag, rxbuf->map);
4618 if (rxbuf->m_head != NULL) {
4619 m_freem(rxbuf->m_head);
4620 rxbuf->m_head = NULL;
4623 free(rxr->rx_buffers, M_DEVBUF);
4624 rxr->rx_buffers = NULL;
4625 rxr->next_to_check = 0;
4626 rxr->next_to_refresh = 0;
4629 if (rxr->rxtag != NULL) {
4630 bus_dma_tag_destroy(rxr->rxtag);
4638 /*********************************************************************
4640 * Enable receive unit.
4642 **********************************************************************/
4645 em_initialize_receive_unit(struct adapter *adapter)
4647 struct rx_ring *rxr = adapter->rx_rings;
4648 struct ifnet *ifp = adapter->ifp;
4649 struct e1000_hw *hw = &adapter->hw;
4650 u32 rctl, rxcsum, rfctl;
4652 INIT_DEBUGOUT("em_initialize_receive_units: begin");
4655 * Make sure receives are disabled while setting
4656 * up the descriptor ring
4658 rctl = E1000_READ_REG(hw, E1000_RCTL);
4659 /* Do not disable if ever enabled on this hardware */
4660 if ((hw->mac.type != e1000_82574) && (hw->mac.type != e1000_82583))
4661 E1000_WRITE_REG(hw, E1000_RCTL, rctl & ~E1000_RCTL_EN);
4663 /* Setup the Receive Control Register */
4664 rctl &= ~(3 << E1000_RCTL_MO_SHIFT);
4665 rctl |= E1000_RCTL_EN | E1000_RCTL_BAM |
4666 E1000_RCTL_LBM_NO | E1000_RCTL_RDMTS_HALF |
4667 (hw->mac.mc_filter_type << E1000_RCTL_MO_SHIFT);
4669 /* Do not store bad packets */
4670 rctl &= ~E1000_RCTL_SBP;
4672 /* Enable Long Packet receive */
4673 if (ifp->if_mtu > ETHERMTU)
4674 rctl |= E1000_RCTL_LPE;
4676 rctl &= ~E1000_RCTL_LPE;
4679 if (!em_disable_crc_stripping)
4680 rctl |= E1000_RCTL_SECRC;
4682 E1000_WRITE_REG(&adapter->hw, E1000_RADV,
4683 adapter->rx_abs_int_delay.value);
4685 E1000_WRITE_REG(&adapter->hw, E1000_RDTR,
4686 adapter->rx_int_delay.value);
4688 * Set the interrupt throttling rate. Value is calculated
4689 * as DEFAULT_ITR = 1/(MAX_INTS_PER_SEC * 256ns)
4691 E1000_WRITE_REG(hw, E1000_ITR, DEFAULT_ITR);
4693 /* Use extended rx descriptor formats */
4694 rfctl = E1000_READ_REG(hw, E1000_RFCTL);
4695 rfctl |= E1000_RFCTL_EXTEN;
4697 ** When using MSIX interrupts we need to throttle
4698 ** using the EITR register (82574 only)
4700 if (hw->mac.type == e1000_82574) {
4701 for (int i = 0; i < 4; i++)
4702 E1000_WRITE_REG(hw, E1000_EITR_82574(i),
4704 /* Disable accelerated acknowledge */
4705 rfctl |= E1000_RFCTL_ACK_DIS;
4707 E1000_WRITE_REG(hw, E1000_RFCTL, rfctl);
4709 rxcsum = E1000_READ_REG(hw, E1000_RXCSUM);
4710 if (ifp->if_capenable & IFCAP_RXCSUM) {
4711 #ifdef EM_MULTIQUEUE
4712 rxcsum |= E1000_RXCSUM_TUOFL |
4713 E1000_RXCSUM_IPOFL |
4716 rxcsum |= E1000_RXCSUM_TUOFL;
4719 rxcsum &= ~E1000_RXCSUM_TUOFL;
4721 E1000_WRITE_REG(hw, E1000_RXCSUM, rxcsum);
4723 #ifdef EM_MULTIQUEUE
4724 #define RSSKEYLEN 10
4725 if (adapter->num_queues > 1) {
4726 uint8_t rss_key[4 * RSSKEYLEN];
4733 arc4rand(rss_key, sizeof(rss_key), 0);
4734 for (i = 0; i < RSSKEYLEN; ++i) {
4737 rssrk = EM_RSSRK_VAL(rss_key, i);
4738 E1000_WRITE_REG(hw,E1000_RSSRK(i), rssrk);
4742 * Configure RSS redirect table in following fashion:
4743 * (hash & ring_cnt_mask) == rdr_table[(hash & rdr_table_mask)]
4745 for (i = 0; i < sizeof(reta); ++i) {
4748 q = (i % adapter->num_queues) << 7;
4749 reta |= q << (8 * i);
4752 for (i = 0; i < 32; ++i) {
4753 E1000_WRITE_REG(hw, E1000_RETA(i), reta);
4756 E1000_WRITE_REG(hw, E1000_MRQC, E1000_MRQC_RSS_ENABLE_2Q |
4757 E1000_MRQC_RSS_FIELD_IPV4_TCP |
4758 E1000_MRQC_RSS_FIELD_IPV4 |
4759 E1000_MRQC_RSS_FIELD_IPV6_TCP_EX |
4760 E1000_MRQC_RSS_FIELD_IPV6_EX |
4761 E1000_MRQC_RSS_FIELD_IPV6);
4765 ** XXX TEMPORARY WORKAROUND: on some systems with 82573
4766 ** long latencies are observed, like Lenovo X60. This
4767 ** change eliminates the problem, but since having positive
4768 ** values in RDTR is a known source of problems on other
4769 ** platforms another solution is being sought.
4771 if (hw->mac.type == e1000_82573)
4772 E1000_WRITE_REG(hw, E1000_RDTR, 0x20);
4774 for (int i = 0; i < adapter->num_queues; i++, rxr++) {
4775 /* Setup the Base and Length of the Rx Descriptor Ring */
4776 u64 bus_addr = rxr->rxdma.dma_paddr;
4777 u32 rdt = adapter->num_rx_desc - 1; /* default */
4779 E1000_WRITE_REG(hw, E1000_RDLEN(i),
4780 adapter->num_rx_desc * sizeof(union e1000_rx_desc_extended));
4781 E1000_WRITE_REG(hw, E1000_RDBAH(i), (u32)(bus_addr >> 32));
4782 E1000_WRITE_REG(hw, E1000_RDBAL(i), (u32)bus_addr);
4783 /* Setup the Head and Tail Descriptor Pointers */
4784 E1000_WRITE_REG(hw, E1000_RDH(i), 0);
4787 * an init() while a netmap client is active must
4788 * preserve the rx buffers passed to userspace.
4790 if (ifp->if_capenable & IFCAP_NETMAP)
4791 rdt -= nm_kr_rxspace(&NA(adapter->ifp)->rx_rings[i]);
4792 #endif /* DEV_NETMAP */
4793 E1000_WRITE_REG(hw, E1000_RDT(i), rdt);
4797 * Set PTHRESH for improved jumbo performance
4798 * According to 10.2.5.11 of Intel 82574 Datasheet,
4799 * RXDCTL(1) is written whenever RXDCTL(0) is written.
4800 * Only write to RXDCTL(1) if there is a need for different
4803 if (((adapter->hw.mac.type == e1000_ich9lan) ||
4804 (adapter->hw.mac.type == e1000_pch2lan) ||
4805 (adapter->hw.mac.type == e1000_ich10lan)) &&
4806 (ifp->if_mtu > ETHERMTU)) {
4807 u32 rxdctl = E1000_READ_REG(hw, E1000_RXDCTL(0));
4808 E1000_WRITE_REG(hw, E1000_RXDCTL(0), rxdctl | 3);
4809 } else if (adapter->hw.mac.type == e1000_82574) {
4810 for (int i = 0; i < adapter->num_queues; i++) {
4811 u32 rxdctl = E1000_READ_REG(hw, E1000_RXDCTL(i));
4813 rxdctl |= 0x20; /* PTHRESH */
4814 rxdctl |= 4 << 8; /* HTHRESH */
4815 rxdctl |= 4 << 16;/* WTHRESH */
4816 rxdctl |= 1 << 24; /* Switch to granularity */
4817 E1000_WRITE_REG(hw, E1000_RXDCTL(i), rxdctl);
4821 if (adapter->hw.mac.type >= e1000_pch2lan) {
4822 if (ifp->if_mtu > ETHERMTU)
4823 e1000_lv_jumbo_workaround_ich8lan(hw, TRUE);
4825 e1000_lv_jumbo_workaround_ich8lan(hw, FALSE);
4828 /* Make sure VLAN Filters are off */
4829 rctl &= ~E1000_RCTL_VFE;
4831 if (adapter->rx_mbuf_sz == MCLBYTES)
4832 rctl |= E1000_RCTL_SZ_2048;
4833 else if (adapter->rx_mbuf_sz == MJUMPAGESIZE)
4834 rctl |= E1000_RCTL_SZ_4096 | E1000_RCTL_BSEX;
4835 else if (adapter->rx_mbuf_sz > MJUMPAGESIZE)
4836 rctl |= E1000_RCTL_SZ_8192 | E1000_RCTL_BSEX;
4838 /* ensure we clear use DTYPE of 00 here */
4839 rctl &= ~0x00000C00;
4840 /* Write out the settings */
4841 E1000_WRITE_REG(hw, E1000_RCTL, rctl);
4847 /*********************************************************************
4849 * This routine executes in interrupt context. It replenishes
4850 * the mbufs in the descriptor and sends data which has been
4851 * dma'ed into host memory to upper layer.
4853 * We loop at most count times if count is > 0, or until done if
4856 * For polling we also now return the number of cleaned packets
4857 *********************************************************************/
4859 em_rxeof(struct rx_ring *rxr, int count, int *done)
4861 struct adapter *adapter = rxr->adapter;
4862 struct ifnet *ifp = adapter->ifp;
4863 struct mbuf *mp, *sendmp;
4866 int i, processed, rxdone = 0;
4868 union e1000_rx_desc_extended *cur;
4873 bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
4874 BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
4878 if (netmap_rx_irq(ifp, rxr->me, &processed)) {
4882 #endif /* DEV_NETMAP */
4884 for (i = rxr->next_to_check, processed = 0; count != 0;) {
4885 if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
4888 cur = &rxr->rx_base[i];
4889 status = le32toh(cur->wb.upper.status_error);
4892 if ((status & E1000_RXD_STAT_DD) == 0)
4895 len = le16toh(cur->wb.upper.length);
4896 eop = (status & E1000_RXD_STAT_EOP) != 0;
4898 if ((status & E1000_RXDEXT_ERR_FRAME_ERR_MASK) ||
4899 (rxr->discard == TRUE)) {
4900 adapter->dropped_pkts++;
4901 ++rxr->rx_discarded;
4902 if (!eop) /* Catch subsequent segs */
4903 rxr->discard = TRUE;
4905 rxr->discard = FALSE;
4906 em_rx_discard(rxr, i);
4909 bus_dmamap_unload(rxr->rxtag, rxr->rx_buffers[i].map);
4911 /* Assign correct length to the current fragment */
4912 mp = rxr->rx_buffers[i].m_head;
4915 /* Trigger for refresh */
4916 rxr->rx_buffers[i].m_head = NULL;
4918 /* First segment? */
4919 if (rxr->fmp == NULL) {
4920 mp->m_pkthdr.len = len;
4921 rxr->fmp = rxr->lmp = mp;
4923 /* Chain mbuf's together */
4924 mp->m_flags &= ~M_PKTHDR;
4925 rxr->lmp->m_next = mp;
4927 rxr->fmp->m_pkthdr.len += len;
4933 sendmp->m_pkthdr.rcvif = ifp;
4935 em_receive_checksum(status, sendmp);
4936 #ifndef __NO_STRICT_ALIGNMENT
4937 if (adapter->hw.mac.max_frame_size >
4938 (MCLBYTES - ETHER_ALIGN) &&
4939 em_fixup_rx(rxr) != 0)
4942 if (status & E1000_RXD_STAT_VP) {
4943 sendmp->m_pkthdr.ether_vtag =
4944 le16toh(cur->wb.upper.vlan);
4945 sendmp->m_flags |= M_VLANTAG;
4947 #ifndef __NO_STRICT_ALIGNMENT
4950 rxr->fmp = rxr->lmp = NULL;
4954 bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
4955 BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
4957 /* Zero out the receive descriptors status. */
4958 cur->wb.upper.status_error &= htole32(~0xFF);
4959 ++rxdone; /* cumulative for POLL */
4962 /* Advance our pointers to the next descriptor. */
4963 if (++i == adapter->num_rx_desc)
4966 /* Send to the stack */
4967 if (sendmp != NULL) {
4968 rxr->next_to_check = i;
4970 (*ifp->if_input)(ifp, sendmp);
4972 i = rxr->next_to_check;
4975 /* Only refresh mbufs every 8 descriptors */
4976 if (processed == 8) {
4977 em_refresh_mbufs(rxr, i);
4982 /* Catch any remaining refresh work */
4983 if (e1000_rx_unrefreshed(rxr))
4984 em_refresh_mbufs(rxr, i);
4986 rxr->next_to_check = i;
4991 return ((status & E1000_RXD_STAT_DD) ? TRUE : FALSE);
4994 static __inline void
4995 em_rx_discard(struct rx_ring *rxr, int i)
4997 struct em_rxbuffer *rbuf;
4999 rbuf = &rxr->rx_buffers[i];
5000 bus_dmamap_unload(rxr->rxtag, rbuf->map);
5002 /* Free any previous pieces */
5003 if (rxr->fmp != NULL) {
5004 rxr->fmp->m_flags |= M_PKTHDR;
5010 ** Free buffer and allow em_refresh_mbufs()
5011 ** to clean up and recharge buffer.
5014 m_free(rbuf->m_head);
5015 rbuf->m_head = NULL;
5020 #ifndef __NO_STRICT_ALIGNMENT
5022 * When jumbo frames are enabled we should realign entire payload on
5023 * architecures with strict alignment. This is serious design mistake of 8254x
5024 * as it nullifies DMA operations. 8254x just allows RX buffer size to be
5025 * 2048/4096/8192/16384. What we really want is 2048 - ETHER_ALIGN to align its
5026 * payload. On architecures without strict alignment restrictions 8254x still
5027 * performs unaligned memory access which would reduce the performance too.
5028 * To avoid copying over an entire frame to align, we allocate a new mbuf and
5029 * copy ethernet header to the new mbuf. The new mbuf is prepended into the
5030 * existing mbuf chain.
5032 * Be aware, best performance of the 8254x is achived only when jumbo frame is
5033 * not used at all on architectures with strict alignment.
5036 em_fixup_rx(struct rx_ring *rxr)
5038 struct adapter *adapter = rxr->adapter;
5044 if (m->m_len <= (MCLBYTES - ETHER_HDR_LEN)) {
5045 bcopy(m->m_data, m->m_data + ETHER_HDR_LEN, m->m_len);
5046 m->m_data += ETHER_HDR_LEN;
5048 MGETHDR(n, M_NOWAIT, MT_DATA);
5050 bcopy(m->m_data, n->m_data, ETHER_HDR_LEN);
5051 m->m_data += ETHER_HDR_LEN;
5052 m->m_len -= ETHER_HDR_LEN;
5053 n->m_len = ETHER_HDR_LEN;
5054 M_MOVE_PKTHDR(n, m);
5058 adapter->dropped_pkts++;
5070 em_setup_rxdesc(union e1000_rx_desc_extended *rxd, const struct em_rxbuffer *rxbuf)
5072 rxd->read.buffer_addr = htole64(rxbuf->paddr);
5073 /* DD bits must be cleared */
5074 rxd->wb.upper.status_error= 0;
5077 /*********************************************************************
5079 * Verify that the hardware indicated that the checksum is valid.
5080 * Inform the stack about the status of checksum so that stack
5081 * doesn't spend time verifying the checksum.
5083 *********************************************************************/
5085 em_receive_checksum(uint32_t status, struct mbuf *mp)
5087 mp->m_pkthdr.csum_flags = 0;
5089 /* Ignore Checksum bit is set */
5090 if (status & E1000_RXD_STAT_IXSM)
5093 /* If the IP checksum exists and there is no IP Checksum error */
5094 if ((status & (E1000_RXD_STAT_IPCS | E1000_RXDEXT_STATERR_IPE)) ==
5095 E1000_RXD_STAT_IPCS) {
5096 mp->m_pkthdr.csum_flags = (CSUM_IP_CHECKED | CSUM_IP_VALID);
5099 /* TCP or UDP checksum */
5100 if ((status & (E1000_RXD_STAT_TCPCS | E1000_RXDEXT_STATERR_TCPE)) ==
5101 E1000_RXD_STAT_TCPCS) {
5102 mp->m_pkthdr.csum_flags |= (CSUM_DATA_VALID | CSUM_PSEUDO_HDR);
5103 mp->m_pkthdr.csum_data = htons(0xffff);
5105 if (status & E1000_RXD_STAT_UDPCS) {
5106 mp->m_pkthdr.csum_flags |= (CSUM_DATA_VALID | CSUM_PSEUDO_HDR);
5107 mp->m_pkthdr.csum_data = htons(0xffff);
5112 * This routine is run via an vlan
5116 em_register_vlan(void *arg, struct ifnet *ifp, u16 vtag)
5118 struct adapter *adapter = ifp->if_softc;
5121 if (ifp->if_softc != arg) /* Not our event */
5124 if ((vtag == 0) || (vtag > 4095)) /* Invalid ID */
5127 EM_CORE_LOCK(adapter);
5128 index = (vtag >> 5) & 0x7F;
5130 adapter->shadow_vfta[index] |= (1 << bit);
5131 ++adapter->num_vlans;
5132 /* Re-init to load the changes */
5133 if (ifp->if_capenable & IFCAP_VLAN_HWFILTER)
5134 em_init_locked(adapter);
5135 EM_CORE_UNLOCK(adapter);
5139 * This routine is run via an vlan
5143 em_unregister_vlan(void *arg, struct ifnet *ifp, u16 vtag)
5145 struct adapter *adapter = ifp->if_softc;
5148 if (ifp->if_softc != arg)
5151 if ((vtag == 0) || (vtag > 4095)) /* Invalid */
5154 EM_CORE_LOCK(adapter);
5155 index = (vtag >> 5) & 0x7F;
5157 adapter->shadow_vfta[index] &= ~(1 << bit);
5158 --adapter->num_vlans;
5159 /* Re-init to load the changes */
5160 if (ifp->if_capenable & IFCAP_VLAN_HWFILTER)
5161 em_init_locked(adapter);
5162 EM_CORE_UNLOCK(adapter);
5166 em_setup_vlan_hw_support(struct adapter *adapter)
5168 struct e1000_hw *hw = &adapter->hw;
5172 ** We get here thru init_locked, meaning
5173 ** a soft reset, this has already cleared
5174 ** the VFTA and other state, so if there
5175 ** have been no vlan's registered do nothing.
5177 if (adapter->num_vlans == 0)
5181 ** A soft reset zero's out the VFTA, so
5182 ** we need to repopulate it now.
5184 for (int i = 0; i < EM_VFTA_SIZE; i++)
5185 if (adapter->shadow_vfta[i] != 0)
5186 E1000_WRITE_REG_ARRAY(hw, E1000_VFTA,
5187 i, adapter->shadow_vfta[i]);
5189 reg = E1000_READ_REG(hw, E1000_CTRL);
5190 reg |= E1000_CTRL_VME;
5191 E1000_WRITE_REG(hw, E1000_CTRL, reg);
5193 /* Enable the Filter Table */
5194 reg = E1000_READ_REG(hw, E1000_RCTL);
5195 reg &= ~E1000_RCTL_CFIEN;
5196 reg |= E1000_RCTL_VFE;
5197 E1000_WRITE_REG(hw, E1000_RCTL, reg);
5201 em_enable_intr(struct adapter *adapter)
5203 struct e1000_hw *hw = &adapter->hw;
5204 u32 ims_mask = IMS_ENABLE_MASK;
5206 if (hw->mac.type == e1000_82574) {
5207 E1000_WRITE_REG(hw, EM_EIAC, EM_MSIX_MASK);
5208 ims_mask |= EM_MSIX_MASK;
5210 E1000_WRITE_REG(hw, E1000_IMS, ims_mask);
5214 em_disable_intr(struct adapter *adapter)
5216 struct e1000_hw *hw = &adapter->hw;
5218 if (hw->mac.type == e1000_82574)
5219 E1000_WRITE_REG(hw, EM_EIAC, 0);
5220 E1000_WRITE_REG(&adapter->hw, E1000_IMC, 0xffffffff);
5224 * Bit of a misnomer, what this really means is
5225 * to enable OS management of the system... aka
5226 * to disable special hardware management features
5229 em_init_manageability(struct adapter *adapter)
5231 /* A shared code workaround */
5232 #define E1000_82542_MANC2H E1000_MANC2H
5233 if (adapter->has_manage) {
5234 int manc2h = E1000_READ_REG(&adapter->hw, E1000_MANC2H);
5235 int manc = E1000_READ_REG(&adapter->hw, E1000_MANC);
5237 /* disable hardware interception of ARP */
5238 manc &= ~(E1000_MANC_ARP_EN);
5240 /* enable receiving management packets to the host */
5241 manc |= E1000_MANC_EN_MNG2HOST;
5242 #define E1000_MNG2HOST_PORT_623 (1 << 5)
5243 #define E1000_MNG2HOST_PORT_664 (1 << 6)
5244 manc2h |= E1000_MNG2HOST_PORT_623;
5245 manc2h |= E1000_MNG2HOST_PORT_664;
5246 E1000_WRITE_REG(&adapter->hw, E1000_MANC2H, manc2h);
5247 E1000_WRITE_REG(&adapter->hw, E1000_MANC, manc);
5252 * Give control back to hardware management
5253 * controller if there is one.
5256 em_release_manageability(struct adapter *adapter)
5258 if (adapter->has_manage) {
5259 int manc = E1000_READ_REG(&adapter->hw, E1000_MANC);
5261 /* re-enable hardware interception of ARP */
5262 manc |= E1000_MANC_ARP_EN;
5263 manc &= ~E1000_MANC_EN_MNG2HOST;
5265 E1000_WRITE_REG(&adapter->hw, E1000_MANC, manc);
5270 * em_get_hw_control sets the {CTRL_EXT|FWSM}:DRV_LOAD bit.
5271 * For ASF and Pass Through versions of f/w this means
5272 * that the driver is loaded. For AMT version type f/w
5273 * this means that the network i/f is open.
5276 em_get_hw_control(struct adapter *adapter)
5280 if (adapter->hw.mac.type == e1000_82573) {
5281 swsm = E1000_READ_REG(&adapter->hw, E1000_SWSM);
5282 E1000_WRITE_REG(&adapter->hw, E1000_SWSM,
5283 swsm | E1000_SWSM_DRV_LOAD);
5287 ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
5288 E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT,
5289 ctrl_ext | E1000_CTRL_EXT_DRV_LOAD);
5294 * em_release_hw_control resets {CTRL_EXT|FWSM}:DRV_LOAD bit.
5295 * For ASF and Pass Through versions of f/w this means that
5296 * the driver is no longer loaded. For AMT versions of the
5297 * f/w this means that the network i/f is closed.
5300 em_release_hw_control(struct adapter *adapter)
5304 if (!adapter->has_manage)
5307 if (adapter->hw.mac.type == e1000_82573) {
5308 swsm = E1000_READ_REG(&adapter->hw, E1000_SWSM);
5309 E1000_WRITE_REG(&adapter->hw, E1000_SWSM,
5310 swsm & ~E1000_SWSM_DRV_LOAD);
5314 ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
5315 E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT,
5316 ctrl_ext & ~E1000_CTRL_EXT_DRV_LOAD);
5321 em_is_valid_ether_addr(u8 *addr)
5323 char zero_addr[6] = { 0, 0, 0, 0, 0, 0 };
5325 if ((addr[0] & 1) || (!bcmp(addr, zero_addr, ETHER_ADDR_LEN))) {
5333 ** Parse the interface capabilities with regard
5334 ** to both system management and wake-on-lan for
5338 em_get_wakeup(device_t dev)
5340 struct adapter *adapter = device_get_softc(dev);
5341 u16 eeprom_data = 0, device_id, apme_mask;
5343 adapter->has_manage = e1000_enable_mng_pass_thru(&adapter->hw);
5344 apme_mask = EM_EEPROM_APME;
5346 switch (adapter->hw.mac.type) {
5349 adapter->has_amt = TRUE;
5353 case e1000_80003es2lan:
5354 if (adapter->hw.bus.func == 1) {
5355 e1000_read_nvm(&adapter->hw,
5356 NVM_INIT_CONTROL3_PORT_B, 1, &eeprom_data);
5359 e1000_read_nvm(&adapter->hw,
5360 NVM_INIT_CONTROL3_PORT_A, 1, &eeprom_data);
5364 case e1000_ich10lan:
5370 apme_mask = E1000_WUC_APME;
5371 adapter->has_amt = TRUE;
5372 eeprom_data = E1000_READ_REG(&adapter->hw, E1000_WUC);
5375 e1000_read_nvm(&adapter->hw,
5376 NVM_INIT_CONTROL3_PORT_A, 1, &eeprom_data);
5379 if (eeprom_data & apme_mask)
5380 adapter->wol = (E1000_WUFC_MAG | E1000_WUFC_MC);
5382 * We have the eeprom settings, now apply the special cases
5383 * where the eeprom may be wrong or the board won't support
5384 * wake on lan on a particular port
5386 device_id = pci_get_device(dev);
5387 switch (device_id) {
5388 case E1000_DEV_ID_82571EB_FIBER:
5389 /* Wake events only supported on port A for dual fiber
5390 * regardless of eeprom setting */
5391 if (E1000_READ_REG(&adapter->hw, E1000_STATUS) &
5392 E1000_STATUS_FUNC_1)
5395 case E1000_DEV_ID_82571EB_QUAD_COPPER:
5396 case E1000_DEV_ID_82571EB_QUAD_FIBER:
5397 case E1000_DEV_ID_82571EB_QUAD_COPPER_LP:
5398 /* if quad port adapter, disable WoL on all but port A */
5399 if (global_quad_port_a != 0)
5401 /* Reset for multiple quad port adapters */
5402 if (++global_quad_port_a == 4)
5403 global_quad_port_a = 0;
5411 * Enable PCI Wake On Lan capability
5414 em_enable_wakeup(device_t dev)
5416 struct adapter *adapter = device_get_softc(dev);
5417 struct ifnet *ifp = adapter->ifp;
5419 u32 pmc, ctrl, ctrl_ext, rctl;
5422 if (pci_find_cap(dev, PCIY_PMG, &pmc) != 0)
5426 ** Determine type of Wakeup: note that wol
5427 ** is set with all bits on by default.
5429 if ((ifp->if_capenable & IFCAP_WOL_MAGIC) == 0)
5430 adapter->wol &= ~E1000_WUFC_MAG;
5432 if ((ifp->if_capenable & IFCAP_WOL_MCAST) == 0)
5433 adapter->wol &= ~E1000_WUFC_MC;
5435 rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
5436 rctl |= E1000_RCTL_MPE;
5437 E1000_WRITE_REG(&adapter->hw, E1000_RCTL, rctl);
5440 if (!(adapter->wol & (E1000_WUFC_EX | E1000_WUFC_MAG | E1000_WUFC_MC)))
5443 /* Advertise the wakeup capability */
5444 ctrl = E1000_READ_REG(&adapter->hw, E1000_CTRL);
5445 ctrl |= (E1000_CTRL_SWDPIN2 | E1000_CTRL_SWDPIN3);
5446 E1000_WRITE_REG(&adapter->hw, E1000_CTRL, ctrl);
5448 /* Keep the laser running on Fiber adapters */
5449 if (adapter->hw.phy.media_type == e1000_media_type_fiber ||
5450 adapter->hw.phy.media_type == e1000_media_type_internal_serdes) {
5451 ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
5452 ctrl_ext |= E1000_CTRL_EXT_SDP3_DATA;
5453 E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT, ctrl_ext);
5456 if ((adapter->hw.mac.type == e1000_ich8lan) ||
5457 (adapter->hw.mac.type == e1000_pchlan) ||
5458 (adapter->hw.mac.type == e1000_ich9lan) ||
5459 (adapter->hw.mac.type == e1000_ich10lan))
5460 e1000_suspend_workarounds_ich8lan(&adapter->hw);
5462 if ((adapter->hw.mac.type == e1000_pchlan) ||
5463 (adapter->hw.mac.type == e1000_pch2lan) ||
5464 (adapter->hw.mac.type == e1000_pch_lpt) ||
5465 (adapter->hw.mac.type == e1000_pch_spt) ||
5466 (adapter->hw.mac.type == e1000_pch_cnp)) {
5467 error = em_enable_phy_wakeup(adapter);
5471 /* Enable wakeup by the MAC */
5472 E1000_WRITE_REG(&adapter->hw, E1000_WUC, E1000_WUC_PME_EN);
5473 E1000_WRITE_REG(&adapter->hw, E1000_WUFC, adapter->wol);
5476 if (adapter->hw.phy.type == e1000_phy_igp_3)
5477 e1000_igp3_phy_powerdown_workaround_ich8lan(&adapter->hw);
5480 status = pci_read_config(dev, pmc + PCIR_POWER_STATUS, 2);
5481 status &= ~(PCIM_PSTAT_PME | PCIM_PSTAT_PMEENABLE);
5482 if (!error && (ifp->if_capenable & IFCAP_WOL))
5483 status |= PCIM_PSTAT_PME | PCIM_PSTAT_PMEENABLE;
5484 pci_write_config(dev, pmc + PCIR_POWER_STATUS, status, 2);
5490 ** WOL in the newer chipset interfaces (pchlan)
5491 ** require thing to be copied into the phy
5494 em_enable_phy_wakeup(struct adapter *adapter)
5496 struct e1000_hw *hw = &adapter->hw;
5500 /* copy MAC RARs to PHY RARs */
5501 e1000_copy_rx_addrs_to_phy_ich8lan(hw);
5503 /* copy MAC MTA to PHY MTA */
5504 for (int i = 0; i < adapter->hw.mac.mta_reg_count; i++) {
5505 mreg = E1000_READ_REG_ARRAY(hw, E1000_MTA, i);
5506 e1000_write_phy_reg(hw, BM_MTA(i), (u16)(mreg & 0xFFFF));
5507 e1000_write_phy_reg(hw, BM_MTA(i) + 1,
5508 (u16)((mreg >> 16) & 0xFFFF));
5511 /* configure PHY Rx Control register */
5512 e1000_read_phy_reg(&adapter->hw, BM_RCTL, &preg);
5513 mreg = E1000_READ_REG(hw, E1000_RCTL);
5514 if (mreg & E1000_RCTL_UPE)
5515 preg |= BM_RCTL_UPE;
5516 if (mreg & E1000_RCTL_MPE)
5517 preg |= BM_RCTL_MPE;
5518 preg &= ~(BM_RCTL_MO_MASK);
5519 if (mreg & E1000_RCTL_MO_3)
5520 preg |= (((mreg & E1000_RCTL_MO_3) >> E1000_RCTL_MO_SHIFT)
5521 << BM_RCTL_MO_SHIFT);
5522 if (mreg & E1000_RCTL_BAM)
5523 preg |= BM_RCTL_BAM;
5524 if (mreg & E1000_RCTL_PMCF)
5525 preg |= BM_RCTL_PMCF;
5526 mreg = E1000_READ_REG(hw, E1000_CTRL);
5527 if (mreg & E1000_CTRL_RFCE)
5528 preg |= BM_RCTL_RFCE;
5529 e1000_write_phy_reg(&adapter->hw, BM_RCTL, preg);
5531 /* enable PHY wakeup in MAC register */
5532 E1000_WRITE_REG(hw, E1000_WUC,
5533 E1000_WUC_PHY_WAKE | E1000_WUC_PME_EN);
5534 E1000_WRITE_REG(hw, E1000_WUFC, adapter->wol);
5536 /* configure and enable PHY wakeup in PHY registers */
5537 e1000_write_phy_reg(&adapter->hw, BM_WUFC, adapter->wol);
5538 e1000_write_phy_reg(&adapter->hw, BM_WUC, E1000_WUC_PME_EN);
5540 /* activate PHY wakeup */
5541 ret = hw->phy.ops.acquire(hw);
5543 printf("Could not acquire PHY\n");
5546 e1000_write_phy_reg_mdic(hw, IGP01E1000_PHY_PAGE_SELECT,
5547 (BM_WUC_ENABLE_PAGE << IGP_PAGE_SHIFT));
5548 ret = e1000_read_phy_reg_mdic(hw, BM_WUC_ENABLE_REG, &preg);
5550 printf("Could not read PHY page 769\n");
5553 preg |= BM_WUC_ENABLE_BIT | BM_WUC_HOST_WU_BIT;
5554 ret = e1000_write_phy_reg_mdic(hw, BM_WUC_ENABLE_REG, preg);
5556 printf("Could not set PHY Host Wakeup bit\n");
5558 hw->phy.ops.release(hw);
5564 em_led_func(void *arg, int onoff)
5566 struct adapter *adapter = arg;
5568 EM_CORE_LOCK(adapter);
5570 e1000_setup_led(&adapter->hw);
5571 e1000_led_on(&adapter->hw);
5573 e1000_led_off(&adapter->hw);
5574 e1000_cleanup_led(&adapter->hw);
5576 EM_CORE_UNLOCK(adapter);
5580 ** Disable the L0S and L1 LINK states
5583 em_disable_aspm(struct adapter *adapter)
5586 u16 link_cap,link_ctrl;
5587 device_t dev = adapter->dev;
5589 switch (adapter->hw.mac.type) {
5597 if (pci_find_cap(dev, PCIY_EXPRESS, &base) != 0)
5599 reg = base + PCIER_LINK_CAP;
5600 link_cap = pci_read_config(dev, reg, 2);
5601 if ((link_cap & PCIEM_LINK_CAP_ASPM) == 0)
5603 reg = base + PCIER_LINK_CTL;
5604 link_ctrl = pci_read_config(dev, reg, 2);
5605 link_ctrl &= ~PCIEM_LINK_CTL_ASPMC;
5606 pci_write_config(dev, reg, link_ctrl, 2);
5610 /**********************************************************************
5612 * Update the board statistics counters.
5614 **********************************************************************/
5616 em_update_stats_counters(struct adapter *adapter)
5620 if(adapter->hw.phy.media_type == e1000_media_type_copper ||
5621 (E1000_READ_REG(&adapter->hw, E1000_STATUS) & E1000_STATUS_LU)) {
5622 adapter->stats.symerrs += E1000_READ_REG(&adapter->hw, E1000_SYMERRS);
5623 adapter->stats.sec += E1000_READ_REG(&adapter->hw, E1000_SEC);
5625 adapter->stats.crcerrs += E1000_READ_REG(&adapter->hw, E1000_CRCERRS);
5626 adapter->stats.mpc += E1000_READ_REG(&adapter->hw, E1000_MPC);
5627 adapter->stats.scc += E1000_READ_REG(&adapter->hw, E1000_SCC);
5628 adapter->stats.ecol += E1000_READ_REG(&adapter->hw, E1000_ECOL);
5630 adapter->stats.mcc += E1000_READ_REG(&adapter->hw, E1000_MCC);
5631 adapter->stats.latecol += E1000_READ_REG(&adapter->hw, E1000_LATECOL);
5632 adapter->stats.colc += E1000_READ_REG(&adapter->hw, E1000_COLC);
5633 adapter->stats.dc += E1000_READ_REG(&adapter->hw, E1000_DC);
5634 adapter->stats.rlec += E1000_READ_REG(&adapter->hw, E1000_RLEC);
5635 adapter->stats.xonrxc += E1000_READ_REG(&adapter->hw, E1000_XONRXC);
5636 adapter->stats.xontxc += E1000_READ_REG(&adapter->hw, E1000_XONTXC);
5637 adapter->stats.xoffrxc += E1000_READ_REG(&adapter->hw, E1000_XOFFRXC);
5638 adapter->stats.xofftxc += E1000_READ_REG(&adapter->hw, E1000_XOFFTXC);
5639 adapter->stats.fcruc += E1000_READ_REG(&adapter->hw, E1000_FCRUC);
5640 adapter->stats.prc64 += E1000_READ_REG(&adapter->hw, E1000_PRC64);
5641 adapter->stats.prc127 += E1000_READ_REG(&adapter->hw, E1000_PRC127);
5642 adapter->stats.prc255 += E1000_READ_REG(&adapter->hw, E1000_PRC255);
5643 adapter->stats.prc511 += E1000_READ_REG(&adapter->hw, E1000_PRC511);
5644 adapter->stats.prc1023 += E1000_READ_REG(&adapter->hw, E1000_PRC1023);
5645 adapter->stats.prc1522 += E1000_READ_REG(&adapter->hw, E1000_PRC1522);
5646 adapter->stats.gprc += E1000_READ_REG(&adapter->hw, E1000_GPRC);
5647 adapter->stats.bprc += E1000_READ_REG(&adapter->hw, E1000_BPRC);
5648 adapter->stats.mprc += E1000_READ_REG(&adapter->hw, E1000_MPRC);
5649 adapter->stats.gptc += E1000_READ_REG(&adapter->hw, E1000_GPTC);
5651 /* For the 64-bit byte counters the low dword must be read first. */
5652 /* Both registers clear on the read of the high dword */
5654 adapter->stats.gorc += E1000_READ_REG(&adapter->hw, E1000_GORCL) +
5655 ((u64)E1000_READ_REG(&adapter->hw, E1000_GORCH) << 32);
5656 adapter->stats.gotc += E1000_READ_REG(&adapter->hw, E1000_GOTCL) +
5657 ((u64)E1000_READ_REG(&adapter->hw, E1000_GOTCH) << 32);
5659 adapter->stats.rnbc += E1000_READ_REG(&adapter->hw, E1000_RNBC);
5660 adapter->stats.ruc += E1000_READ_REG(&adapter->hw, E1000_RUC);
5661 adapter->stats.rfc += E1000_READ_REG(&adapter->hw, E1000_RFC);
5662 adapter->stats.roc += E1000_READ_REG(&adapter->hw, E1000_ROC);
5663 adapter->stats.rjc += E1000_READ_REG(&adapter->hw, E1000_RJC);
5665 adapter->stats.tor += E1000_READ_REG(&adapter->hw, E1000_TORH);
5666 adapter->stats.tot += E1000_READ_REG(&adapter->hw, E1000_TOTH);
5668 adapter->stats.tpr += E1000_READ_REG(&adapter->hw, E1000_TPR);
5669 adapter->stats.tpt += E1000_READ_REG(&adapter->hw, E1000_TPT);
5670 adapter->stats.ptc64 += E1000_READ_REG(&adapter->hw, E1000_PTC64);
5671 adapter->stats.ptc127 += E1000_READ_REG(&adapter->hw, E1000_PTC127);
5672 adapter->stats.ptc255 += E1000_READ_REG(&adapter->hw, E1000_PTC255);
5673 adapter->stats.ptc511 += E1000_READ_REG(&adapter->hw, E1000_PTC511);
5674 adapter->stats.ptc1023 += E1000_READ_REG(&adapter->hw, E1000_PTC1023);
5675 adapter->stats.ptc1522 += E1000_READ_REG(&adapter->hw, E1000_PTC1522);
5676 adapter->stats.mptc += E1000_READ_REG(&adapter->hw, E1000_MPTC);
5677 adapter->stats.bptc += E1000_READ_REG(&adapter->hw, E1000_BPTC);
5679 /* Interrupt Counts */
5681 adapter->stats.iac += E1000_READ_REG(&adapter->hw, E1000_IAC);
5682 adapter->stats.icrxptc += E1000_READ_REG(&adapter->hw, E1000_ICRXPTC);
5683 adapter->stats.icrxatc += E1000_READ_REG(&adapter->hw, E1000_ICRXATC);
5684 adapter->stats.ictxptc += E1000_READ_REG(&adapter->hw, E1000_ICTXPTC);
5685 adapter->stats.ictxatc += E1000_READ_REG(&adapter->hw, E1000_ICTXATC);
5686 adapter->stats.ictxqec += E1000_READ_REG(&adapter->hw, E1000_ICTXQEC);
5687 adapter->stats.ictxqmtc += E1000_READ_REG(&adapter->hw, E1000_ICTXQMTC);
5688 adapter->stats.icrxdmtc += E1000_READ_REG(&adapter->hw, E1000_ICRXDMTC);
5689 adapter->stats.icrxoc += E1000_READ_REG(&adapter->hw, E1000_ICRXOC);
5691 if (adapter->hw.mac.type >= e1000_82543) {
5692 adapter->stats.algnerrc +=
5693 E1000_READ_REG(&adapter->hw, E1000_ALGNERRC);
5694 adapter->stats.rxerrc +=
5695 E1000_READ_REG(&adapter->hw, E1000_RXERRC);
5696 adapter->stats.tncrs +=
5697 E1000_READ_REG(&adapter->hw, E1000_TNCRS);
5698 adapter->stats.cexterr +=
5699 E1000_READ_REG(&adapter->hw, E1000_CEXTERR);
5700 adapter->stats.tsctc +=
5701 E1000_READ_REG(&adapter->hw, E1000_TSCTC);
5702 adapter->stats.tsctfc +=
5703 E1000_READ_REG(&adapter->hw, E1000_TSCTFC);
5707 ifp->if_collisions = adapter->stats.colc;
5710 ifp->if_ierrors = adapter->dropped_pkts + adapter->stats.rxerrc +
5711 adapter->stats.crcerrs + adapter->stats.algnerrc +
5712 adapter->stats.ruc + adapter->stats.roc +
5713 adapter->stats.mpc + adapter->stats.cexterr;
5716 ifp->if_oerrors = adapter->stats.ecol +
5717 adapter->stats.latecol + adapter->watchdog_events;
5720 /* Export a single 32-bit register via a read-only sysctl. */
5722 em_sysctl_reg_handler(SYSCTL_HANDLER_ARGS)
5724 struct adapter *adapter;
5727 adapter = oidp->oid_arg1;
5728 val = E1000_READ_REG(&adapter->hw, oidp->oid_arg2);
5729 return (sysctl_handle_int(oidp, &val, 0, req));
5733 * Add sysctl variables, one per statistic, to the system.
5736 em_add_hw_stats(struct adapter *adapter)
5738 device_t dev = adapter->dev;
5740 struct tx_ring *txr = adapter->tx_rings;
5741 struct rx_ring *rxr = adapter->rx_rings;
5743 struct sysctl_ctx_list *ctx = device_get_sysctl_ctx(dev);
5744 struct sysctl_oid *tree = device_get_sysctl_tree(dev);
5745 struct sysctl_oid_list *child = SYSCTL_CHILDREN(tree);
5746 struct e1000_hw_stats *stats = &adapter->stats;
5748 struct sysctl_oid *stat_node, *queue_node, *int_node;
5749 struct sysctl_oid_list *stat_list, *queue_list, *int_list;
5751 #define QUEUE_NAME_LEN 32
5752 char namebuf[QUEUE_NAME_LEN];
5754 /* Driver Statistics */
5755 SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "dropped",
5756 CTLFLAG_RD, &adapter->dropped_pkts,
5757 "Driver dropped packets");
5758 SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "link_irq",
5759 CTLFLAG_RD, &adapter->link_irq,
5760 "Link MSIX IRQ Handled");
5761 SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "mbuf_defrag_fail",
5762 CTLFLAG_RD, &adapter->mbuf_defrag_failed,
5763 "Defragmenting mbuf chain failed");
5764 SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "tx_dma_fail",
5765 CTLFLAG_RD, &adapter->no_tx_dma_setup,
5766 "Driver tx dma failure in xmit");
5767 SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "rx_overruns",
5768 CTLFLAG_RD, &adapter->rx_overruns,
5770 SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "watchdog_timeouts",
5771 CTLFLAG_RD, &adapter->watchdog_events,
5772 "Watchdog timeouts");
5774 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "device_control",
5775 CTLTYPE_UINT | CTLFLAG_RD, adapter, E1000_CTRL,
5776 em_sysctl_reg_handler, "IU",
5777 "Device Control Register");
5778 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "rx_control",
5779 CTLTYPE_UINT | CTLFLAG_RD, adapter, E1000_RCTL,
5780 em_sysctl_reg_handler, "IU",
5781 "Receiver Control Register");
5782 SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "fc_high_water",
5783 CTLFLAG_RD, &adapter->hw.fc.high_water, 0,
5784 "Flow Control High Watermark");
5785 SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "fc_low_water",
5786 CTLFLAG_RD, &adapter->hw.fc.low_water, 0,
5787 "Flow Control Low Watermark");
5789 for (int i = 0; i < adapter->num_queues; i++, txr++, rxr++) {
5790 snprintf(namebuf, QUEUE_NAME_LEN, "queue_tx_%d", i);
5791 queue_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, namebuf,
5792 CTLFLAG_RD, NULL, "TX Queue Name");
5793 queue_list = SYSCTL_CHILDREN(queue_node);
5795 SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "txd_head",
5796 CTLTYPE_UINT | CTLFLAG_RD, adapter,
5798 em_sysctl_reg_handler, "IU",
5799 "Transmit Descriptor Head");
5800 SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "txd_tail",
5801 CTLTYPE_UINT | CTLFLAG_RD, adapter,
5803 em_sysctl_reg_handler, "IU",
5804 "Transmit Descriptor Tail");
5805 SYSCTL_ADD_ULONG(ctx, queue_list, OID_AUTO, "tx_irq",
5806 CTLFLAG_RD, &txr->tx_irq,
5807 "Queue MSI-X Transmit Interrupts");
5808 SYSCTL_ADD_ULONG(ctx, queue_list, OID_AUTO, "no_desc_avail",
5809 CTLFLAG_RD, &txr->no_desc_avail,
5810 "Queue No Descriptor Available");
5812 snprintf(namebuf, QUEUE_NAME_LEN, "queue_rx_%d", i);
5813 queue_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, namebuf,
5814 CTLFLAG_RD, NULL, "RX Queue Name");
5815 queue_list = SYSCTL_CHILDREN(queue_node);
5817 SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "rxd_head",
5818 CTLTYPE_UINT | CTLFLAG_RD, adapter,
5820 em_sysctl_reg_handler, "IU",
5821 "Receive Descriptor Head");
5822 SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "rxd_tail",
5823 CTLTYPE_UINT | CTLFLAG_RD, adapter,
5825 em_sysctl_reg_handler, "IU",
5826 "Receive Descriptor Tail");
5827 SYSCTL_ADD_ULONG(ctx, queue_list, OID_AUTO, "rx_irq",
5828 CTLFLAG_RD, &rxr->rx_irq,
5829 "Queue MSI-X Receive Interrupts");
5832 /* MAC stats get their own sub node */
5834 stat_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "mac_stats",
5835 CTLFLAG_RD, NULL, "Statistics");
5836 stat_list = SYSCTL_CHILDREN(stat_node);
5838 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "excess_coll",
5839 CTLFLAG_RD, &stats->ecol,
5840 "Excessive collisions");
5841 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "single_coll",
5842 CTLFLAG_RD, &stats->scc,
5843 "Single collisions");
5844 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "multiple_coll",
5845 CTLFLAG_RD, &stats->mcc,
5846 "Multiple collisions");
5847 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "late_coll",
5848 CTLFLAG_RD, &stats->latecol,
5850 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "collision_count",
5851 CTLFLAG_RD, &stats->colc,
5853 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "symbol_errors",
5854 CTLFLAG_RD, &adapter->stats.symerrs,
5856 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "sequence_errors",
5857 CTLFLAG_RD, &adapter->stats.sec,
5859 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "defer_count",
5860 CTLFLAG_RD, &adapter->stats.dc,
5862 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "missed_packets",
5863 CTLFLAG_RD, &adapter->stats.mpc,
5865 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_no_buff",
5866 CTLFLAG_RD, &adapter->stats.rnbc,
5867 "Receive No Buffers");
5868 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_undersize",
5869 CTLFLAG_RD, &adapter->stats.ruc,
5870 "Receive Undersize");
5871 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_fragmented",
5872 CTLFLAG_RD, &adapter->stats.rfc,
5873 "Fragmented Packets Received ");
5874 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_oversize",
5875 CTLFLAG_RD, &adapter->stats.roc,
5876 "Oversized Packets Received");
5877 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_jabber",
5878 CTLFLAG_RD, &adapter->stats.rjc,
5880 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_errs",
5881 CTLFLAG_RD, &adapter->stats.rxerrc,
5883 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "crc_errs",
5884 CTLFLAG_RD, &adapter->stats.crcerrs,
5886 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "alignment_errs",
5887 CTLFLAG_RD, &adapter->stats.algnerrc,
5888 "Alignment Errors");
5889 /* On 82575 these are collision counts */
5890 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "coll_ext_errs",
5891 CTLFLAG_RD, &adapter->stats.cexterr,
5892 "Collision/Carrier extension errors");
5893 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xon_recvd",
5894 CTLFLAG_RD, &adapter->stats.xonrxc,
5896 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xon_txd",
5897 CTLFLAG_RD, &adapter->stats.xontxc,
5899 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xoff_recvd",
5900 CTLFLAG_RD, &adapter->stats.xoffrxc,
5902 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xoff_txd",
5903 CTLFLAG_RD, &adapter->stats.xofftxc,
5904 "XOFF Transmitted");
5906 /* Packet Reception Stats */
5907 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "total_pkts_recvd",
5908 CTLFLAG_RD, &adapter->stats.tpr,
5909 "Total Packets Received ");
5910 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_pkts_recvd",
5911 CTLFLAG_RD, &adapter->stats.gprc,
5912 "Good Packets Received");
5913 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "bcast_pkts_recvd",
5914 CTLFLAG_RD, &adapter->stats.bprc,
5915 "Broadcast Packets Received");
5916 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "mcast_pkts_recvd",
5917 CTLFLAG_RD, &adapter->stats.mprc,
5918 "Multicast Packets Received");
5919 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_64",
5920 CTLFLAG_RD, &adapter->stats.prc64,
5921 "64 byte frames received ");
5922 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_65_127",
5923 CTLFLAG_RD, &adapter->stats.prc127,
5924 "65-127 byte frames received");
5925 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_128_255",
5926 CTLFLAG_RD, &adapter->stats.prc255,
5927 "128-255 byte frames received");
5928 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_256_511",
5929 CTLFLAG_RD, &adapter->stats.prc511,
5930 "256-511 byte frames received");
5931 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_512_1023",
5932 CTLFLAG_RD, &adapter->stats.prc1023,
5933 "512-1023 byte frames received");
5934 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_1024_1522",
5935 CTLFLAG_RD, &adapter->stats.prc1522,
5936 "1023-1522 byte frames received");
5937 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_octets_recvd",
5938 CTLFLAG_RD, &adapter->stats.gorc,
5939 "Good Octets Received");
5941 /* Packet Transmission Stats */
5942 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_octets_txd",
5943 CTLFLAG_RD, &adapter->stats.gotc,
5944 "Good Octets Transmitted");
5945 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "total_pkts_txd",
5946 CTLFLAG_RD, &adapter->stats.tpt,
5947 "Total Packets Transmitted");
5948 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_pkts_txd",
5949 CTLFLAG_RD, &adapter->stats.gptc,
5950 "Good Packets Transmitted");
5951 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "bcast_pkts_txd",
5952 CTLFLAG_RD, &adapter->stats.bptc,
5953 "Broadcast Packets Transmitted");
5954 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "mcast_pkts_txd",
5955 CTLFLAG_RD, &adapter->stats.mptc,
5956 "Multicast Packets Transmitted");
5957 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_64",
5958 CTLFLAG_RD, &adapter->stats.ptc64,
5959 "64 byte frames transmitted ");
5960 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_65_127",
5961 CTLFLAG_RD, &adapter->stats.ptc127,
5962 "65-127 byte frames transmitted");
5963 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_128_255",
5964 CTLFLAG_RD, &adapter->stats.ptc255,
5965 "128-255 byte frames transmitted");
5966 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_256_511",
5967 CTLFLAG_RD, &adapter->stats.ptc511,
5968 "256-511 byte frames transmitted");
5969 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_512_1023",
5970 CTLFLAG_RD, &adapter->stats.ptc1023,
5971 "512-1023 byte frames transmitted");
5972 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_1024_1522",
5973 CTLFLAG_RD, &adapter->stats.ptc1522,
5974 "1024-1522 byte frames transmitted");
5975 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tso_txd",
5976 CTLFLAG_RD, &adapter->stats.tsctc,
5977 "TSO Contexts Transmitted");
5978 SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tso_ctx_fail",
5979 CTLFLAG_RD, &adapter->stats.tsctfc,
5980 "TSO Contexts Failed");
5983 /* Interrupt Stats */
5985 int_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "interrupts",
5986 CTLFLAG_RD, NULL, "Interrupt Statistics");
5987 int_list = SYSCTL_CHILDREN(int_node);
5989 SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "asserts",
5990 CTLFLAG_RD, &adapter->stats.iac,
5991 "Interrupt Assertion Count");
5993 SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "rx_pkt_timer",
5994 CTLFLAG_RD, &adapter->stats.icrxptc,
5995 "Interrupt Cause Rx Pkt Timer Expire Count");
5997 SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "rx_abs_timer",
5998 CTLFLAG_RD, &adapter->stats.icrxatc,
5999 "Interrupt Cause Rx Abs Timer Expire Count");
6001 SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "tx_pkt_timer",
6002 CTLFLAG_RD, &adapter->stats.ictxptc,
6003 "Interrupt Cause Tx Pkt Timer Expire Count");
6005 SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "tx_abs_timer",
6006 CTLFLAG_RD, &adapter->stats.ictxatc,
6007 "Interrupt Cause Tx Abs Timer Expire Count");
6009 SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "tx_queue_empty",
6010 CTLFLAG_RD, &adapter->stats.ictxqec,
6011 "Interrupt Cause Tx Queue Empty Count");
6013 SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "tx_queue_min_thresh",
6014 CTLFLAG_RD, &adapter->stats.ictxqmtc,
6015 "Interrupt Cause Tx Queue Min Thresh Count");
6017 SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "rx_desc_min_thresh",
6018 CTLFLAG_RD, &adapter->stats.icrxdmtc,
6019 "Interrupt Cause Rx Desc Min Thresh Count");
6021 SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "rx_overrun",
6022 CTLFLAG_RD, &adapter->stats.icrxoc,
6023 "Interrupt Cause Receiver Overrun Count");
6026 /**********************************************************************
6028 * This routine provides a way to dump out the adapter eeprom,
6029 * often a useful debug/service tool. This only dumps the first
6030 * 32 words, stuff that matters is in that extent.
6032 **********************************************************************/
6034 em_sysctl_nvm_info(SYSCTL_HANDLER_ARGS)
6036 struct adapter *adapter = (struct adapter *)arg1;
6041 error = sysctl_handle_int(oidp, &result, 0, req);
6043 if (error || !req->newptr)
6047 * This value will cause a hex dump of the
6048 * first 32 16-bit words of the EEPROM to
6052 em_print_nvm_info(adapter);
6058 em_print_nvm_info(struct adapter *adapter)
6063 /* Its a bit crude, but it gets the job done */
6064 printf("\nInterface EEPROM Dump:\n");
6065 printf("Offset\n0x0000 ");
6066 for (i = 0, j = 0; i < 32; i++, j++) {
6067 if (j == 8) { /* Make the offset block */
6069 printf("\n0x00%x0 ",row);
6071 e1000_read_nvm(&adapter->hw, i, 1, &eeprom_data);
6072 printf("%04x ", eeprom_data);
6078 em_sysctl_int_delay(SYSCTL_HANDLER_ARGS)
6080 struct em_int_delay_info *info;
6081 struct adapter *adapter;
6083 int error, usecs, ticks;
6085 info = (struct em_int_delay_info *)arg1;
6086 usecs = info->value;
6087 error = sysctl_handle_int(oidp, &usecs, 0, req);
6088 if (error != 0 || req->newptr == NULL)
6090 if (usecs < 0 || usecs > EM_TICKS_TO_USECS(65535))
6092 info->value = usecs;
6093 ticks = EM_USECS_TO_TICKS(usecs);
6094 if (info->offset == E1000_ITR) /* units are 256ns here */
6097 adapter = info->adapter;
6099 EM_CORE_LOCK(adapter);
6100 regval = E1000_READ_OFFSET(&adapter->hw, info->offset);
6101 regval = (regval & ~0xffff) | (ticks & 0xffff);
6102 /* Handle a few special cases. */
6103 switch (info->offset) {
6108 adapter->txd_cmd &= ~E1000_TXD_CMD_IDE;
6109 /* Don't write 0 into the TIDV register. */
6112 adapter->txd_cmd |= E1000_TXD_CMD_IDE;
6115 E1000_WRITE_OFFSET(&adapter->hw, info->offset, regval);
6116 EM_CORE_UNLOCK(adapter);
6121 em_add_int_delay_sysctl(struct adapter *adapter, const char *name,
6122 const char *description, struct em_int_delay_info *info,
6123 int offset, int value)
6125 info->adapter = adapter;
6126 info->offset = offset;
6127 info->value = value;
6128 SYSCTL_ADD_PROC(device_get_sysctl_ctx(adapter->dev),
6129 SYSCTL_CHILDREN(device_get_sysctl_tree(adapter->dev)),
6130 OID_AUTO, name, CTLTYPE_INT|CTLFLAG_RW,
6131 info, 0, em_sysctl_int_delay, "I", description);
6135 em_set_sysctl_value(struct adapter *adapter, const char *name,
6136 const char *description, int *limit, int value)
6139 SYSCTL_ADD_INT(device_get_sysctl_ctx(adapter->dev),
6140 SYSCTL_CHILDREN(device_get_sysctl_tree(adapter->dev)),
6141 OID_AUTO, name, CTLFLAG_RW, limit, value, description);
6146 ** Set flow control using sysctl:
6147 ** Flow control values:
6154 em_set_flowcntl(SYSCTL_HANDLER_ARGS)
6157 static int input = 3; /* default is full */
6158 struct adapter *adapter = (struct adapter *) arg1;
6160 error = sysctl_handle_int(oidp, &input, 0, req);
6162 if ((error) || (req->newptr == NULL))
6165 if (input == adapter->fc) /* no change? */
6169 case e1000_fc_rx_pause:
6170 case e1000_fc_tx_pause:
6173 adapter->hw.fc.requested_mode = input;
6174 adapter->fc = input;
6181 adapter->hw.fc.current_mode = adapter->hw.fc.requested_mode;
6182 e1000_force_mac_fc(&adapter->hw);
6187 ** Manage Energy Efficient Ethernet:
6189 ** 0/1 - enabled/disabled
6192 em_sysctl_eee(SYSCTL_HANDLER_ARGS)
6194 struct adapter *adapter = (struct adapter *) arg1;
6197 value = adapter->hw.dev_spec.ich8lan.eee_disable;
6198 error = sysctl_handle_int(oidp, &value, 0, req);
6199 if (error || req->newptr == NULL)
6201 EM_CORE_LOCK(adapter);
6202 adapter->hw.dev_spec.ich8lan.eee_disable = (value != 0);
6203 em_init_locked(adapter);
6204 EM_CORE_UNLOCK(adapter);
6209 em_sysctl_debug_info(SYSCTL_HANDLER_ARGS)
6211 struct adapter *adapter;
6216 error = sysctl_handle_int(oidp, &result, 0, req);
6218 if (error || !req->newptr)
6222 adapter = (struct adapter *)arg1;
6223 em_print_debug_info(adapter);
6230 ** This routine is meant to be fluid, add whatever is
6231 ** needed for debugging a problem. -jfv
6234 em_print_debug_info(struct adapter *adapter)
6236 device_t dev = adapter->dev;
6237 struct tx_ring *txr = adapter->tx_rings;
6238 struct rx_ring *rxr = adapter->rx_rings;
6240 if (adapter->ifp->if_drv_flags & IFF_DRV_RUNNING)
6241 printf("Interface is RUNNING ");
6243 printf("Interface is NOT RUNNING\n");
6245 if (adapter->ifp->if_drv_flags & IFF_DRV_OACTIVE)
6246 printf("and INACTIVE\n");
6248 printf("and ACTIVE\n");
6250 for (int i = 0; i < adapter->num_queues; i++, txr++, rxr++) {
6251 device_printf(dev, "TX Queue %d ------\n", i);
6252 device_printf(dev, "hw tdh = %d, hw tdt = %d\n",
6253 E1000_READ_REG(&adapter->hw, E1000_TDH(i)),
6254 E1000_READ_REG(&adapter->hw, E1000_TDT(i)));
6255 device_printf(dev, "Tx Queue Status = %d\n", txr->busy);
6256 device_printf(dev, "TX descriptors avail = %d\n",
6258 device_printf(dev, "Tx Descriptors avail failure = %ld\n",
6259 txr->no_desc_avail);
6260 device_printf(dev, "RX Queue %d ------\n", i);
6261 device_printf(dev, "hw rdh = %d, hw rdt = %d\n",
6262 E1000_READ_REG(&adapter->hw, E1000_RDH(i)),
6263 E1000_READ_REG(&adapter->hw, E1000_RDT(i)));
6264 device_printf(dev, "RX discarded packets = %ld\n",
6266 device_printf(dev, "RX Next to Check = %d\n", rxr->next_to_check);
6267 device_printf(dev, "RX Next to Refresh = %d\n", rxr->next_to_refresh);
6271 #ifdef EM_MULTIQUEUE
6274 * Write a new value to the EEPROM increasing the number of MSIX
6275 * vectors from 3 to 5, for proper multiqueue support.
6278 em_enable_vectors_82574(struct adapter *adapter)
6280 struct e1000_hw *hw = &adapter->hw;
6281 device_t dev = adapter->dev;
6284 e1000_read_nvm(hw, EM_NVM_PCIE_CTRL, 1, &edata);
6285 printf("Current cap: %#06x\n", edata);
6286 if (((edata & EM_NVM_MSIX_N_MASK) >> EM_NVM_MSIX_N_SHIFT) != 4) {
6287 device_printf(dev, "Writing to eeprom: increasing "
6288 "reported MSIX vectors from 3 to 5...\n");
6289 edata &= ~(EM_NVM_MSIX_N_MASK);
6290 edata |= 4 << EM_NVM_MSIX_N_SHIFT;
6291 e1000_write_nvm(hw, EM_NVM_PCIE_CTRL, 1, &edata);
6292 e1000_update_nvm_checksum(hw);
6293 device_printf(dev, "Writing to eeprom: done\n");
6299 DB_COMMAND(em_reset_dev, em_ddb_reset_dev)
6304 dc = devclass_find("em");
6305 max_em = devclass_get_maxunit(dc);
6307 for (int index = 0; index < (max_em - 1); index++) {
6309 dev = devclass_get_device(dc, index);
6310 if (device_get_driver(dev) == &em_driver) {
6311 struct adapter *adapter = device_get_softc(dev);
6312 EM_CORE_LOCK(adapter);
6313 em_init_locked(adapter);
6314 EM_CORE_UNLOCK(adapter);
6318 DB_COMMAND(em_dump_queue, em_ddb_dump_queue)
6323 dc = devclass_find("em");
6324 max_em = devclass_get_maxunit(dc);
6326 for (int index = 0; index < (max_em - 1); index++) {
6328 dev = devclass_get_device(dc, index);
6329 if (device_get_driver(dev) == &em_driver)
6330 em_print_debug_info(device_get_softc(dev));