]> CyberLeo.Net >> Repos - FreeBSD/stable/10.git/blob - sys/dev/e1000/if_lem.c
MFC r359968:
[FreeBSD/stable/10.git] / sys / dev / e1000 / if_lem.c
1 /******************************************************************************
2
3   Copyright (c) 2001-2015, Intel Corporation 
4   All rights reserved.
5   
6   Redistribution and use in source and binary forms, with or without 
7   modification, are permitted provided that the following conditions are met:
8   
9    1. Redistributions of source code must retain the above copyright notice, 
10       this list of conditions and the following disclaimer.
11   
12    2. Redistributions in binary form must reproduce the above copyright 
13       notice, this list of conditions and the following disclaimer in the 
14       documentation and/or other materials provided with the distribution.
15   
16    3. Neither the name of the Intel Corporation nor the names of its 
17       contributors may be used to endorse or promote products derived from 
18       this software without specific prior written permission.
19   
20   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 
22   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 
23   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 
24   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 
25   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 
26   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 
27   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 
28   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 
29   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30   POSSIBILITY OF SUCH DAMAGE.
31
32 ******************************************************************************/
33 /*$FreeBSD$*/
34
35 /*
36  * Uncomment the following extensions for better performance in a VM,
37  * especially if you have support in the hypervisor.
38  * See http://info.iet.unipi.it/~luigi/netmap/
39  */
40 // #define BATCH_DISPATCH
41 // #define NIC_SEND_COMBINING
42 // #define NIC_PARAVIRT /* enable virtio-like synchronization */
43
44 #include "opt_inet.h"
45 #include "opt_inet6.h"
46
47 #ifdef HAVE_KERNEL_OPTION_HEADERS
48 #include "opt_device_polling.h"
49 #endif
50
51 #include <sys/param.h>
52 #include <sys/systm.h>
53 #include <sys/bus.h>
54 #include <sys/endian.h>
55 #include <sys/kernel.h>
56 #include <sys/kthread.h>
57 #include <sys/malloc.h>
58 #include <sys/mbuf.h>
59 #include <sys/module.h>
60 #include <sys/rman.h>
61 #include <sys/socket.h>
62 #include <sys/sockio.h>
63 #include <sys/sysctl.h>
64 #include <sys/taskqueue.h>
65 #include <sys/eventhandler.h>
66 #include <machine/bus.h>
67 #include <machine/resource.h>
68
69 #include <net/bpf.h>
70 #include <net/ethernet.h>
71 #include <net/if.h>
72 #include <net/if_arp.h>
73 #include <net/if_dl.h>
74 #include <net/if_media.h>
75
76 #include <net/if_types.h>
77 #include <net/if_vlan_var.h>
78
79 #include <netinet/in_systm.h>
80 #include <netinet/in.h>
81 #include <netinet/if_ether.h>
82 #include <netinet/ip.h>
83 #include <netinet/ip6.h>
84 #include <netinet/tcp.h>
85 #include <netinet/udp.h>
86
87 #include <machine/in_cksum.h>
88 #include <dev/led/led.h>
89 #include <dev/pci/pcivar.h>
90 #include <dev/pci/pcireg.h>
91
92 #include "e1000_api.h"
93 #include "if_lem.h"
94
95 /*********************************************************************
96  *  Legacy Em Driver version:
97  *********************************************************************/
98 char lem_driver_version[] = "1.1.0";
99
100 /*********************************************************************
101  *  PCI Device ID Table
102  *
103  *  Used by probe to select devices to load on
104  *  Last field stores an index into e1000_strings
105  *  Last entry must be all 0s
106  *
107  *  { Vendor ID, Device ID, SubVendor ID, SubDevice ID, String Index }
108  *********************************************************************/
109
110 static em_vendor_info_t lem_vendor_info_array[] =
111 {
112         /* Intel(R) PRO/1000 Network Connection */
113         { 0x8086, E1000_DEV_ID_82540EM,         PCI_ANY_ID, PCI_ANY_ID, 0},
114         { 0x8086, E1000_DEV_ID_82540EM_LOM,     PCI_ANY_ID, PCI_ANY_ID, 0},
115         { 0x8086, E1000_DEV_ID_82540EP,         PCI_ANY_ID, PCI_ANY_ID, 0},
116         { 0x8086, E1000_DEV_ID_82540EP_LOM,     PCI_ANY_ID, PCI_ANY_ID, 0},
117         { 0x8086, E1000_DEV_ID_82540EP_LP,      PCI_ANY_ID, PCI_ANY_ID, 0},
118
119         { 0x8086, E1000_DEV_ID_82541EI,         PCI_ANY_ID, PCI_ANY_ID, 0},
120         { 0x8086, E1000_DEV_ID_82541ER,         PCI_ANY_ID, PCI_ANY_ID, 0},
121         { 0x8086, E1000_DEV_ID_82541ER_LOM,     PCI_ANY_ID, PCI_ANY_ID, 0},
122         { 0x8086, E1000_DEV_ID_82541EI_MOBILE,  PCI_ANY_ID, PCI_ANY_ID, 0},
123         { 0x8086, E1000_DEV_ID_82541GI,         PCI_ANY_ID, PCI_ANY_ID, 0},
124         { 0x8086, E1000_DEV_ID_82541GI_LF,      PCI_ANY_ID, PCI_ANY_ID, 0},
125         { 0x8086, E1000_DEV_ID_82541GI_MOBILE,  PCI_ANY_ID, PCI_ANY_ID, 0},
126
127         { 0x8086, E1000_DEV_ID_82542,           PCI_ANY_ID, PCI_ANY_ID, 0},
128
129         { 0x8086, E1000_DEV_ID_82543GC_FIBER,   PCI_ANY_ID, PCI_ANY_ID, 0},
130         { 0x8086, E1000_DEV_ID_82543GC_COPPER,  PCI_ANY_ID, PCI_ANY_ID, 0},
131
132         { 0x8086, E1000_DEV_ID_82544EI_COPPER,  PCI_ANY_ID, PCI_ANY_ID, 0},
133         { 0x8086, E1000_DEV_ID_82544EI_FIBER,   PCI_ANY_ID, PCI_ANY_ID, 0},
134         { 0x8086, E1000_DEV_ID_82544GC_COPPER,  PCI_ANY_ID, PCI_ANY_ID, 0},
135         { 0x8086, E1000_DEV_ID_82544GC_LOM,     PCI_ANY_ID, PCI_ANY_ID, 0},
136
137         { 0x8086, E1000_DEV_ID_82545EM_COPPER,  PCI_ANY_ID, PCI_ANY_ID, 0},
138         { 0x8086, E1000_DEV_ID_82545EM_FIBER,   PCI_ANY_ID, PCI_ANY_ID, 0},
139         { 0x8086, E1000_DEV_ID_82545GM_COPPER,  PCI_ANY_ID, PCI_ANY_ID, 0},
140         { 0x8086, E1000_DEV_ID_82545GM_FIBER,   PCI_ANY_ID, PCI_ANY_ID, 0},
141         { 0x8086, E1000_DEV_ID_82545GM_SERDES,  PCI_ANY_ID, PCI_ANY_ID, 0},
142
143         { 0x8086, E1000_DEV_ID_82546EB_COPPER,  PCI_ANY_ID, PCI_ANY_ID, 0},
144         { 0x8086, E1000_DEV_ID_82546EB_FIBER,   PCI_ANY_ID, PCI_ANY_ID, 0},
145         { 0x8086, E1000_DEV_ID_82546EB_QUAD_COPPER, PCI_ANY_ID, PCI_ANY_ID, 0},
146         { 0x8086, E1000_DEV_ID_82546GB_COPPER,  PCI_ANY_ID, PCI_ANY_ID, 0},
147         { 0x8086, E1000_DEV_ID_82546GB_FIBER,   PCI_ANY_ID, PCI_ANY_ID, 0},
148         { 0x8086, E1000_DEV_ID_82546GB_SERDES,  PCI_ANY_ID, PCI_ANY_ID, 0},
149         { 0x8086, E1000_DEV_ID_82546GB_PCIE,    PCI_ANY_ID, PCI_ANY_ID, 0},
150         { 0x8086, E1000_DEV_ID_82546GB_QUAD_COPPER, PCI_ANY_ID, PCI_ANY_ID, 0},
151         { 0x8086, E1000_DEV_ID_82546GB_QUAD_COPPER_KSP3,
152                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
153
154         { 0x8086, E1000_DEV_ID_82547EI,         PCI_ANY_ID, PCI_ANY_ID, 0},
155         { 0x8086, E1000_DEV_ID_82547EI_MOBILE,  PCI_ANY_ID, PCI_ANY_ID, 0},
156         { 0x8086, E1000_DEV_ID_82547GI,         PCI_ANY_ID, PCI_ANY_ID, 0},
157         /* required last entry */
158         { 0, 0, 0, 0, 0}
159 };
160
161 /*********************************************************************
162  *  Table of branding strings for all supported NICs.
163  *********************************************************************/
164
165 static char *lem_strings[] = {
166         "Intel(R) PRO/1000 Legacy Network Connection"
167 };
168
169 /*********************************************************************
170  *  Function prototypes
171  *********************************************************************/
172 static int      lem_probe(device_t);
173 static int      lem_attach(device_t);
174 static int      lem_detach(device_t);
175 static int      lem_shutdown(device_t);
176 static int      lem_suspend(device_t);
177 static int      lem_resume(device_t);
178 static void     lem_start(struct ifnet *);
179 static void     lem_start_locked(struct ifnet *ifp);
180 static int      lem_ioctl(struct ifnet *, u_long, caddr_t);
181 static void     lem_init(void *);
182 static void     lem_init_locked(struct adapter *);
183 static void     lem_stop(void *);
184 static void     lem_media_status(struct ifnet *, struct ifmediareq *);
185 static int      lem_media_change(struct ifnet *);
186 static void     lem_identify_hardware(struct adapter *);
187 static int      lem_allocate_pci_resources(struct adapter *);
188 static int      lem_allocate_irq(struct adapter *adapter);
189 static void     lem_free_pci_resources(struct adapter *);
190 static void     lem_local_timer(void *);
191 static int      lem_hardware_init(struct adapter *);
192 static int      lem_setup_interface(device_t, struct adapter *);
193 static void     lem_setup_transmit_structures(struct adapter *);
194 static void     lem_initialize_transmit_unit(struct adapter *);
195 static int      lem_setup_receive_structures(struct adapter *);
196 static void     lem_initialize_receive_unit(struct adapter *);
197 static void     lem_enable_intr(struct adapter *);
198 static void     lem_disable_intr(struct adapter *);
199 static void     lem_free_transmit_structures(struct adapter *);
200 static void     lem_free_receive_structures(struct adapter *);
201 static void     lem_update_stats_counters(struct adapter *);
202 static void     lem_add_hw_stats(struct adapter *adapter);
203 static void     lem_txeof(struct adapter *);
204 static void     lem_tx_purge(struct adapter *);
205 static int      lem_allocate_receive_structures(struct adapter *);
206 static int      lem_allocate_transmit_structures(struct adapter *);
207 static bool     lem_rxeof(struct adapter *, int, int *);
208 #ifndef __NO_STRICT_ALIGNMENT
209 static int      lem_fixup_rx(struct adapter *);
210 #endif
211 static void     lem_receive_checksum(struct adapter *, struct e1000_rx_desc *,
212                     struct mbuf *);
213 static void     lem_transmit_checksum_setup(struct adapter *, struct mbuf *,
214                     u32 *, u32 *);
215 static void     lem_set_promisc(struct adapter *);
216 static void     lem_disable_promisc(struct adapter *);
217 static void     lem_set_multi(struct adapter *);
218 static void     lem_update_link_status(struct adapter *);
219 static int      lem_get_buf(struct adapter *, int);
220 static void     lem_register_vlan(void *, struct ifnet *, u16);
221 static void     lem_unregister_vlan(void *, struct ifnet *, u16);
222 static void     lem_setup_vlan_hw_support(struct adapter *);
223 static int      lem_xmit(struct adapter *, struct mbuf **);
224 static void     lem_smartspeed(struct adapter *);
225 static int      lem_82547_fifo_workaround(struct adapter *, int);
226 static void     lem_82547_update_fifo_head(struct adapter *, int);
227 static int      lem_82547_tx_fifo_reset(struct adapter *);
228 static void     lem_82547_move_tail(void *);
229 static int      lem_dma_malloc(struct adapter *, bus_size_t,
230                     struct em_dma_alloc *, int);
231 static void     lem_dma_free(struct adapter *, struct em_dma_alloc *);
232 static int      lem_sysctl_nvm_info(SYSCTL_HANDLER_ARGS);
233 static void     lem_print_nvm_info(struct adapter *);
234 static int      lem_is_valid_ether_addr(u8 *);
235 static u32      lem_fill_descriptors (bus_addr_t address, u32 length,
236                     PDESC_ARRAY desc_array);
237 static int      lem_sysctl_int_delay(SYSCTL_HANDLER_ARGS);
238 static void     lem_add_int_delay_sysctl(struct adapter *, const char *,
239                     const char *, struct em_int_delay_info *, int, int);
240 static void     lem_set_flow_cntrl(struct adapter *, const char *,
241                     const char *, int *, int);
242 /* Management and WOL Support */
243 static void     lem_init_manageability(struct adapter *);
244 static void     lem_release_manageability(struct adapter *);
245 static void     lem_get_hw_control(struct adapter *);
246 static void     lem_release_hw_control(struct adapter *);
247 static void     lem_get_wakeup(device_t);
248 static void     lem_enable_wakeup(device_t);
249 static int      lem_enable_phy_wakeup(struct adapter *);
250 static void     lem_led_func(void *, int);
251
252 static void     lem_intr(void *);
253 static int      lem_irq_fast(void *);
254 static void     lem_handle_rxtx(void *context, int pending);
255 static void     lem_handle_link(void *context, int pending);
256 static void     lem_add_rx_process_limit(struct adapter *, const char *,
257                     const char *, int *, int);
258
259 #ifdef DEVICE_POLLING
260 static poll_handler_t lem_poll;
261 #endif /* POLLING */
262
263 /*********************************************************************
264  *  FreeBSD Device Interface Entry Points
265  *********************************************************************/
266
267 static device_method_t lem_methods[] = {
268         /* Device interface */
269         DEVMETHOD(device_probe, lem_probe),
270         DEVMETHOD(device_attach, lem_attach),
271         DEVMETHOD(device_detach, lem_detach),
272         DEVMETHOD(device_shutdown, lem_shutdown),
273         DEVMETHOD(device_suspend, lem_suspend),
274         DEVMETHOD(device_resume, lem_resume),
275         DEVMETHOD_END
276 };
277
278 static driver_t lem_driver = {
279         "em", lem_methods, sizeof(struct adapter),
280 };
281
282 extern devclass_t em_devclass;
283 DRIVER_MODULE(lem, pci, lem_driver, em_devclass, 0, 0);
284 MODULE_DEPEND(lem, pci, 1, 1, 1);
285 MODULE_DEPEND(lem, ether, 1, 1, 1);
286
287 /*********************************************************************
288  *  Tunable default values.
289  *********************************************************************/
290
291 #define EM_TICKS_TO_USECS(ticks)        ((1024 * (ticks) + 500) / 1000)
292 #define EM_USECS_TO_TICKS(usecs)        ((1000 * (usecs) + 512) / 1024)
293
294 #define MAX_INTS_PER_SEC        8000
295 #define DEFAULT_ITR             (1000000000/(MAX_INTS_PER_SEC * 256))
296
297 static int lem_tx_int_delay_dflt = EM_TICKS_TO_USECS(EM_TIDV);
298 static int lem_rx_int_delay_dflt = EM_TICKS_TO_USECS(EM_RDTR);
299 static int lem_tx_abs_int_delay_dflt = EM_TICKS_TO_USECS(EM_TADV);
300 static int lem_rx_abs_int_delay_dflt = EM_TICKS_TO_USECS(EM_RADV);
301 /*
302  * increase lem_rxd and lem_txd to at least 2048 in netmap mode
303  * for better performance.
304  */
305 static int lem_rxd = EM_DEFAULT_RXD;
306 static int lem_txd = EM_DEFAULT_TXD;
307 static int lem_smart_pwr_down = FALSE;
308
309 /* Controls whether promiscuous also shows bad packets */
310 static int lem_debug_sbp = FALSE;
311
312 TUNABLE_INT("hw.em.tx_int_delay", &lem_tx_int_delay_dflt);
313 TUNABLE_INT("hw.em.rx_int_delay", &lem_rx_int_delay_dflt);
314 TUNABLE_INT("hw.em.tx_abs_int_delay", &lem_tx_abs_int_delay_dflt);
315 TUNABLE_INT("hw.em.rx_abs_int_delay", &lem_rx_abs_int_delay_dflt);
316 TUNABLE_INT("hw.em.rxd", &lem_rxd);
317 TUNABLE_INT("hw.em.txd", &lem_txd);
318 TUNABLE_INT("hw.em.smart_pwr_down", &lem_smart_pwr_down);
319 TUNABLE_INT("hw.em.sbp", &lem_debug_sbp);
320
321 /* Interrupt style - default to fast */
322 static int lem_use_legacy_irq = 0;
323 TUNABLE_INT("hw.em.use_legacy_irq", &lem_use_legacy_irq);
324
325 /* How many packets rxeof tries to clean at a time */
326 static int lem_rx_process_limit = 100;
327 TUNABLE_INT("hw.em.rx_process_limit", &lem_rx_process_limit);
328
329 /* Flow control setting - default to FULL */
330 static int lem_fc_setting = e1000_fc_full;
331 TUNABLE_INT("hw.em.fc_setting", &lem_fc_setting);
332
333 /* Global used in WOL setup with multiport cards */
334 static int global_quad_port_a = 0;
335
336 #ifdef DEV_NETMAP       /* see ixgbe.c for details */
337 #include <dev/netmap/if_lem_netmap.h>
338 #endif /* DEV_NETMAP */
339
340 /*********************************************************************
341  *  Device identification routine
342  *
343  *  em_probe determines if the driver should be loaded on
344  *  adapter based on PCI vendor/device id of the adapter.
345  *
346  *  return BUS_PROBE_DEFAULT on success, positive on failure
347  *********************************************************************/
348
349 static int
350 lem_probe(device_t dev)
351 {
352         char            adapter_name[60];
353         u16             pci_vendor_id = 0;
354         u16             pci_device_id = 0;
355         u16             pci_subvendor_id = 0;
356         u16             pci_subdevice_id = 0;
357         em_vendor_info_t *ent;
358
359         INIT_DEBUGOUT("em_probe: begin");
360
361         pci_vendor_id = pci_get_vendor(dev);
362         if (pci_vendor_id != EM_VENDOR_ID)
363                 return (ENXIO);
364
365         pci_device_id = pci_get_device(dev);
366         pci_subvendor_id = pci_get_subvendor(dev);
367         pci_subdevice_id = pci_get_subdevice(dev);
368
369         ent = lem_vendor_info_array;
370         while (ent->vendor_id != 0) {
371                 if ((pci_vendor_id == ent->vendor_id) &&
372                     (pci_device_id == ent->device_id) &&
373
374                     ((pci_subvendor_id == ent->subvendor_id) ||
375                     (ent->subvendor_id == PCI_ANY_ID)) &&
376
377                     ((pci_subdevice_id == ent->subdevice_id) ||
378                     (ent->subdevice_id == PCI_ANY_ID))) {
379                         sprintf(adapter_name, "%s %s",
380                                 lem_strings[ent->index],
381                                 lem_driver_version);
382                         device_set_desc_copy(dev, adapter_name);
383                         return (BUS_PROBE_DEFAULT);
384                 }
385                 ent++;
386         }
387
388         return (ENXIO);
389 }
390
391 /*********************************************************************
392  *  Device initialization routine
393  *
394  *  The attach entry point is called when the driver is being loaded.
395  *  This routine identifies the type of hardware, allocates all resources
396  *  and initializes the hardware.
397  *
398  *  return 0 on success, positive on failure
399  *********************************************************************/
400
401 static int
402 lem_attach(device_t dev)
403 {
404         struct adapter  *adapter;
405         int             tsize, rsize;
406         int             error = 0;
407
408         INIT_DEBUGOUT("lem_attach: begin");
409
410         adapter = device_get_softc(dev);
411         adapter->dev = adapter->osdep.dev = dev;
412         EM_CORE_LOCK_INIT(adapter, device_get_nameunit(dev));
413         EM_TX_LOCK_INIT(adapter, device_get_nameunit(dev));
414         EM_RX_LOCK_INIT(adapter, device_get_nameunit(dev));
415
416         /* SYSCTL stuff */
417         SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
418             SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
419             OID_AUTO, "nvm", CTLTYPE_INT|CTLFLAG_RW, adapter, 0,
420             lem_sysctl_nvm_info, "I", "NVM Information");
421
422         callout_init_mtx(&adapter->timer, &adapter->core_mtx, 0);
423         callout_init_mtx(&adapter->tx_fifo_timer, &adapter->tx_mtx, 0);
424
425         /* Determine hardware and mac info */
426         lem_identify_hardware(adapter);
427
428         /* Setup PCI resources */
429         if (lem_allocate_pci_resources(adapter)) {
430                 device_printf(dev, "Allocation of PCI resources failed\n");
431                 error = ENXIO;
432                 goto err_pci;
433         }
434
435         /* Do Shared Code initialization */
436         if (e1000_setup_init_funcs(&adapter->hw, TRUE)) {
437                 device_printf(dev, "Setup of Shared code failed\n");
438                 error = ENXIO;
439                 goto err_pci;
440         }
441
442         e1000_get_bus_info(&adapter->hw);
443
444         /* Set up some sysctls for the tunable interrupt delays */
445         lem_add_int_delay_sysctl(adapter, "rx_int_delay",
446             "receive interrupt delay in usecs", &adapter->rx_int_delay,
447             E1000_REGISTER(&adapter->hw, E1000_RDTR), lem_rx_int_delay_dflt);
448         lem_add_int_delay_sysctl(adapter, "tx_int_delay",
449             "transmit interrupt delay in usecs", &adapter->tx_int_delay,
450             E1000_REGISTER(&adapter->hw, E1000_TIDV), lem_tx_int_delay_dflt);
451         if (adapter->hw.mac.type >= e1000_82540) {
452                 lem_add_int_delay_sysctl(adapter, "rx_abs_int_delay",
453                     "receive interrupt delay limit in usecs",
454                     &adapter->rx_abs_int_delay,
455                     E1000_REGISTER(&adapter->hw, E1000_RADV),
456                     lem_rx_abs_int_delay_dflt);
457                 lem_add_int_delay_sysctl(adapter, "tx_abs_int_delay",
458                     "transmit interrupt delay limit in usecs",
459                     &adapter->tx_abs_int_delay,
460                     E1000_REGISTER(&adapter->hw, E1000_TADV),
461                     lem_tx_abs_int_delay_dflt);
462                 lem_add_int_delay_sysctl(adapter, "itr",
463                     "interrupt delay limit in usecs/4",
464                     &adapter->tx_itr,
465                     E1000_REGISTER(&adapter->hw, E1000_ITR),
466                     DEFAULT_ITR);
467         }
468
469         /* Sysctls for limiting the amount of work done in the taskqueue */
470         lem_add_rx_process_limit(adapter, "rx_processing_limit",
471             "max number of rx packets to process", &adapter->rx_process_limit,
472             lem_rx_process_limit);
473
474 #ifdef NIC_SEND_COMBINING
475         /* Sysctls to control mitigation */
476         lem_add_rx_process_limit(adapter, "sc_enable",
477             "driver TDT mitigation", &adapter->sc_enable, 0);
478 #endif /* NIC_SEND_COMBINING */
479 #ifdef BATCH_DISPATCH
480         lem_add_rx_process_limit(adapter, "batch_enable",
481             "driver rx batch", &adapter->batch_enable, 0);
482 #endif /* BATCH_DISPATCH */
483 #ifdef NIC_PARAVIRT
484         lem_add_rx_process_limit(adapter, "rx_retries",
485             "driver rx retries", &adapter->rx_retries, 0);
486 #endif /* NIC_PARAVIRT */
487
488         /* Sysctl for setting the interface flow control */
489         lem_set_flow_cntrl(adapter, "flow_control",
490             "flow control setting",
491             &adapter->fc_setting, lem_fc_setting);
492
493         /*
494          * Validate number of transmit and receive descriptors. It
495          * must not exceed hardware maximum, and must be multiple
496          * of E1000_DBA_ALIGN.
497          */
498         if (((lem_txd * sizeof(struct e1000_tx_desc)) % EM_DBA_ALIGN) != 0 ||
499             (adapter->hw.mac.type >= e1000_82544 && lem_txd > EM_MAX_TXD) ||
500             (adapter->hw.mac.type < e1000_82544 && lem_txd > EM_MAX_TXD_82543) ||
501             (lem_txd < EM_MIN_TXD)) {
502                 device_printf(dev, "Using %d TX descriptors instead of %d!\n",
503                     EM_DEFAULT_TXD, lem_txd);
504                 adapter->num_tx_desc = EM_DEFAULT_TXD;
505         } else
506                 adapter->num_tx_desc = lem_txd;
507         if (((lem_rxd * sizeof(struct e1000_rx_desc)) % EM_DBA_ALIGN) != 0 ||
508             (adapter->hw.mac.type >= e1000_82544 && lem_rxd > EM_MAX_RXD) ||
509             (adapter->hw.mac.type < e1000_82544 && lem_rxd > EM_MAX_RXD_82543) ||
510             (lem_rxd < EM_MIN_RXD)) {
511                 device_printf(dev, "Using %d RX descriptors instead of %d!\n",
512                     EM_DEFAULT_RXD, lem_rxd);
513                 adapter->num_rx_desc = EM_DEFAULT_RXD;
514         } else
515                 adapter->num_rx_desc = lem_rxd;
516
517         adapter->hw.mac.autoneg = DO_AUTO_NEG;
518         adapter->hw.phy.autoneg_wait_to_complete = FALSE;
519         adapter->hw.phy.autoneg_advertised = AUTONEG_ADV_DEFAULT;
520         adapter->rx_buffer_len = 2048;
521
522         e1000_init_script_state_82541(&adapter->hw, TRUE);
523         e1000_set_tbi_compatibility_82543(&adapter->hw, TRUE);
524
525         /* Copper options */
526         if (adapter->hw.phy.media_type == e1000_media_type_copper) {
527                 adapter->hw.phy.mdix = AUTO_ALL_MODES;
528                 adapter->hw.phy.disable_polarity_correction = FALSE;
529                 adapter->hw.phy.ms_type = EM_MASTER_SLAVE;
530         }
531
532         /*
533          * Set the frame limits assuming
534          * standard ethernet sized frames.
535          */
536         adapter->max_frame_size = ETHERMTU + ETHER_HDR_LEN + ETHERNET_FCS_SIZE;
537         adapter->min_frame_size = ETH_ZLEN + ETHERNET_FCS_SIZE;
538
539         /*
540          * This controls when hardware reports transmit completion
541          * status.
542          */
543         adapter->hw.mac.report_tx_early = 1;
544
545 #ifdef NIC_PARAVIRT
546         device_printf(dev, "driver supports paravirt, subdev 0x%x\n",
547                 adapter->hw.subsystem_device_id);
548         if (adapter->hw.subsystem_device_id == E1000_PARA_SUBDEV) {
549                 uint64_t bus_addr;
550
551                 device_printf(dev, "paravirt support on dev %p\n", adapter);
552                 tsize = 4096; // XXX one page for the csb
553                 if (lem_dma_malloc(adapter, tsize, &adapter->csb_mem, BUS_DMA_NOWAIT)) {
554                         device_printf(dev, "Unable to allocate csb memory\n");
555                         error = ENOMEM;
556                         goto err_csb;
557                 }
558                 /* Setup the Base of the CSB */
559                 adapter->csb = (struct paravirt_csb *)adapter->csb_mem.dma_vaddr;
560                 /* force the first kick */
561                 adapter->csb->host_need_txkick = 1; /* txring empty */
562                 adapter->csb->guest_need_rxkick = 1; /* no rx packets */
563                 bus_addr = adapter->csb_mem.dma_paddr;
564                 lem_add_rx_process_limit(adapter, "csb_on",
565                     "enable paravirt.", &adapter->csb->guest_csb_on, 0);
566                 lem_add_rx_process_limit(adapter, "txc_lim",
567                     "txc_lim", &adapter->csb->host_txcycles_lim, 1);
568
569                 /* some stats */
570 #define PA_SC(name, var, val)           \
571         lem_add_rx_process_limit(adapter, name, name, var, val)
572                 PA_SC("host_need_txkick",&adapter->csb->host_need_txkick, 1);
573                 PA_SC("host_rxkick_at",&adapter->csb->host_rxkick_at, ~0);
574                 PA_SC("guest_need_txkick",&adapter->csb->guest_need_txkick, 0);
575                 PA_SC("guest_need_rxkick",&adapter->csb->guest_need_rxkick, 1);
576                 PA_SC("tdt_reg_count",&adapter->tdt_reg_count, 0);
577                 PA_SC("tdt_csb_count",&adapter->tdt_csb_count, 0);
578                 PA_SC("tdt_int_count",&adapter->tdt_int_count, 0);
579                 PA_SC("guest_need_kick_count",&adapter->guest_need_kick_count, 0);
580                 /* tell the host where the block is */
581                 E1000_WRITE_REG(&adapter->hw, E1000_CSBAH,
582                         (u32)(bus_addr >> 32));
583                 E1000_WRITE_REG(&adapter->hw, E1000_CSBAL,
584                         (u32)bus_addr);
585         }
586 #endif /* NIC_PARAVIRT */
587
588         tsize = roundup2(adapter->num_tx_desc * sizeof(struct e1000_tx_desc),
589             EM_DBA_ALIGN);
590
591         /* Allocate Transmit Descriptor ring */
592         if (lem_dma_malloc(adapter, tsize, &adapter->txdma, BUS_DMA_NOWAIT)) {
593                 device_printf(dev, "Unable to allocate tx_desc memory\n");
594                 error = ENOMEM;
595                 goto err_tx_desc;
596         }
597         adapter->tx_desc_base = 
598             (struct e1000_tx_desc *)adapter->txdma.dma_vaddr;
599
600         rsize = roundup2(adapter->num_rx_desc * sizeof(struct e1000_rx_desc),
601             EM_DBA_ALIGN);
602
603         /* Allocate Receive Descriptor ring */
604         if (lem_dma_malloc(adapter, rsize, &adapter->rxdma, BUS_DMA_NOWAIT)) {
605                 device_printf(dev, "Unable to allocate rx_desc memory\n");
606                 error = ENOMEM;
607                 goto err_rx_desc;
608         }
609         adapter->rx_desc_base =
610             (struct e1000_rx_desc *)adapter->rxdma.dma_vaddr;
611
612         /* Allocate multicast array memory. */
613         adapter->mta = malloc(sizeof(u8) * ETH_ADDR_LEN *
614             MAX_NUM_MULTICAST_ADDRESSES, M_DEVBUF, M_NOWAIT);
615         if (adapter->mta == NULL) {
616                 device_printf(dev, "Can not allocate multicast setup array\n");
617                 error = ENOMEM;
618                 goto err_hw_init;
619         }
620
621         /*
622         ** Start from a known state, this is
623         ** important in reading the nvm and
624         ** mac from that.
625         */
626         e1000_reset_hw(&adapter->hw);
627
628         /* Make sure we have a good EEPROM before we read from it */
629         if (e1000_validate_nvm_checksum(&adapter->hw) < 0) {
630                 /*
631                 ** Some PCI-E parts fail the first check due to
632                 ** the link being in sleep state, call it again,
633                 ** if it fails a second time its a real issue.
634                 */
635                 if (e1000_validate_nvm_checksum(&adapter->hw) < 0) {
636                         device_printf(dev,
637                             "The EEPROM Checksum Is Not Valid\n");
638                         error = EIO;
639                         goto err_hw_init;
640                 }
641         }
642
643         /* Copy the permanent MAC address out of the EEPROM */
644         if (e1000_read_mac_addr(&adapter->hw) < 0) {
645                 device_printf(dev, "EEPROM read error while reading MAC"
646                     " address\n");
647                 error = EIO;
648                 goto err_hw_init;
649         }
650
651         if (!lem_is_valid_ether_addr(adapter->hw.mac.addr)) {
652                 device_printf(dev, "Invalid MAC address\n");
653                 error = EIO;
654                 goto err_hw_init;
655         }
656
657         /* Initialize the hardware */
658         if (lem_hardware_init(adapter)) {
659                 device_printf(dev, "Unable to initialize the hardware\n");
660                 error = EIO;
661                 goto err_hw_init;
662         }
663
664         /* Allocate transmit descriptors and buffers */
665         if (lem_allocate_transmit_structures(adapter)) {
666                 device_printf(dev, "Could not setup transmit structures\n");
667                 error = ENOMEM;
668                 goto err_tx_struct;
669         }
670
671         /* Allocate receive descriptors and buffers */
672         if (lem_allocate_receive_structures(adapter)) {
673                 device_printf(dev, "Could not setup receive structures\n");
674                 error = ENOMEM;
675                 goto err_rx_struct;
676         }
677
678         /*
679         **  Do interrupt configuration
680         */
681         error = lem_allocate_irq(adapter);
682         if (error)
683                 goto err_rx_struct;
684
685         /*
686          * Get Wake-on-Lan and Management info for later use
687          */
688         lem_get_wakeup(dev);
689
690         /* Setup OS specific network interface */
691         if (lem_setup_interface(dev, adapter) != 0)
692                 goto err_rx_struct;
693
694         /* Initialize statistics */
695         lem_update_stats_counters(adapter);
696
697         adapter->hw.mac.get_link_status = 1;
698         lem_update_link_status(adapter);
699
700         /* Indicate SOL/IDER usage */
701         if (e1000_check_reset_block(&adapter->hw))
702                 device_printf(dev,
703                     "PHY reset is blocked due to SOL/IDER session.\n");
704
705         /* Do we need workaround for 82544 PCI-X adapter? */
706         if (adapter->hw.bus.type == e1000_bus_type_pcix &&
707             adapter->hw.mac.type == e1000_82544)
708                 adapter->pcix_82544 = TRUE;
709         else
710                 adapter->pcix_82544 = FALSE;
711
712         /* Register for VLAN events */
713         adapter->vlan_attach = EVENTHANDLER_REGISTER(vlan_config,
714             lem_register_vlan, adapter, EVENTHANDLER_PRI_FIRST);
715         adapter->vlan_detach = EVENTHANDLER_REGISTER(vlan_unconfig,
716             lem_unregister_vlan, adapter, EVENTHANDLER_PRI_FIRST); 
717
718         lem_add_hw_stats(adapter);
719
720         /* Non-AMT based hardware can now take control from firmware */
721         if (adapter->has_manage && !adapter->has_amt)
722                 lem_get_hw_control(adapter);
723
724         /* Tell the stack that the interface is not active */
725         adapter->ifp->if_drv_flags &= ~(IFF_DRV_RUNNING | IFF_DRV_OACTIVE);
726
727         adapter->led_dev = led_create(lem_led_func, adapter,
728             device_get_nameunit(dev));
729
730 #ifdef DEV_NETMAP
731         lem_netmap_attach(adapter);
732 #endif /* DEV_NETMAP */
733         INIT_DEBUGOUT("lem_attach: end");
734
735         return (0);
736
737 err_rx_struct:
738         lem_free_transmit_structures(adapter);
739 err_tx_struct:
740 err_hw_init:
741         lem_release_hw_control(adapter);
742         lem_dma_free(adapter, &adapter->rxdma);
743 err_rx_desc:
744         lem_dma_free(adapter, &adapter->txdma);
745 err_tx_desc:
746 #ifdef NIC_PARAVIRT
747         lem_dma_free(adapter, &adapter->csb_mem);
748 err_csb:
749 #endif /* NIC_PARAVIRT */
750
751 err_pci:
752         if (adapter->ifp != NULL)
753                 if_free(adapter->ifp);
754         lem_free_pci_resources(adapter);
755         free(adapter->mta, M_DEVBUF);
756         EM_TX_LOCK_DESTROY(adapter);
757         EM_RX_LOCK_DESTROY(adapter);
758         EM_CORE_LOCK_DESTROY(adapter);
759
760         return (error);
761 }
762
763 /*********************************************************************
764  *  Device removal routine
765  *
766  *  The detach entry point is called when the driver is being removed.
767  *  This routine stops the adapter and deallocates all the resources
768  *  that were allocated for driver operation.
769  *
770  *  return 0 on success, positive on failure
771  *********************************************************************/
772
773 static int
774 lem_detach(device_t dev)
775 {
776         struct adapter  *adapter = device_get_softc(dev);
777         struct ifnet    *ifp = adapter->ifp;
778
779         INIT_DEBUGOUT("em_detach: begin");
780
781         /* Make sure VLANS are not using driver */
782         if (adapter->ifp->if_vlantrunk != NULL) {
783                 device_printf(dev,"Vlan in use, detach first\n");
784                 return (EBUSY);
785         }
786
787 #ifdef DEVICE_POLLING
788         if (ifp->if_capenable & IFCAP_POLLING)
789                 ether_poll_deregister(ifp);
790 #endif
791
792         if (adapter->led_dev != NULL)
793                 led_destroy(adapter->led_dev);
794
795         EM_CORE_LOCK(adapter);
796         EM_TX_LOCK(adapter);
797         adapter->in_detach = 1;
798         lem_stop(adapter);
799         e1000_phy_hw_reset(&adapter->hw);
800
801         lem_release_manageability(adapter);
802
803         EM_TX_UNLOCK(adapter);
804         EM_CORE_UNLOCK(adapter);
805
806         /* Unregister VLAN events */
807         if (adapter->vlan_attach != NULL)
808                 EVENTHANDLER_DEREGISTER(vlan_config, adapter->vlan_attach);
809         if (adapter->vlan_detach != NULL)
810                 EVENTHANDLER_DEREGISTER(vlan_unconfig, adapter->vlan_detach); 
811
812         ether_ifdetach(adapter->ifp);
813         callout_drain(&adapter->timer);
814         callout_drain(&adapter->tx_fifo_timer);
815
816 #ifdef DEV_NETMAP
817         netmap_detach(ifp);
818 #endif /* DEV_NETMAP */
819         lem_free_pci_resources(adapter);
820         bus_generic_detach(dev);
821         if_free(ifp);
822
823         lem_free_transmit_structures(adapter);
824         lem_free_receive_structures(adapter);
825
826         /* Free Transmit Descriptor ring */
827         if (adapter->tx_desc_base) {
828                 lem_dma_free(adapter, &adapter->txdma);
829                 adapter->tx_desc_base = NULL;
830         }
831
832         /* Free Receive Descriptor ring */
833         if (adapter->rx_desc_base) {
834                 lem_dma_free(adapter, &adapter->rxdma);
835                 adapter->rx_desc_base = NULL;
836         }
837
838 #ifdef NIC_PARAVIRT
839         if (adapter->csb) {
840                 lem_dma_free(adapter, &adapter->csb_mem);
841                 adapter->csb = NULL;
842         }
843 #endif /* NIC_PARAVIRT */
844         lem_release_hw_control(adapter);
845         free(adapter->mta, M_DEVBUF);
846         EM_TX_LOCK_DESTROY(adapter);
847         EM_RX_LOCK_DESTROY(adapter);
848         EM_CORE_LOCK_DESTROY(adapter);
849
850         return (0);
851 }
852
853 /*********************************************************************
854  *
855  *  Shutdown entry point
856  *
857  **********************************************************************/
858
859 static int
860 lem_shutdown(device_t dev)
861 {
862         return lem_suspend(dev);
863 }
864
865 /*
866  * Suspend/resume device methods.
867  */
868 static int
869 lem_suspend(device_t dev)
870 {
871         struct adapter *adapter = device_get_softc(dev);
872
873         EM_CORE_LOCK(adapter);
874
875         lem_release_manageability(adapter);
876         lem_release_hw_control(adapter);
877         lem_enable_wakeup(dev);
878
879         EM_CORE_UNLOCK(adapter);
880
881         return bus_generic_suspend(dev);
882 }
883
884 static int
885 lem_resume(device_t dev)
886 {
887         struct adapter *adapter = device_get_softc(dev);
888         struct ifnet *ifp = adapter->ifp;
889
890         EM_CORE_LOCK(adapter);
891         lem_init_locked(adapter);
892         lem_init_manageability(adapter);
893         EM_CORE_UNLOCK(adapter);
894         lem_start(ifp);
895
896         return bus_generic_resume(dev);
897 }
898
899
900 static void
901 lem_start_locked(struct ifnet *ifp)
902 {
903         struct adapter  *adapter = ifp->if_softc;
904         struct mbuf     *m_head;
905
906         EM_TX_LOCK_ASSERT(adapter);
907
908         if ((ifp->if_drv_flags & (IFF_DRV_RUNNING|IFF_DRV_OACTIVE)) !=
909             IFF_DRV_RUNNING)
910                 return;
911         if (!adapter->link_active)
912                 return;
913
914         /*
915          * Force a cleanup if number of TX descriptors
916          * available hits the threshold
917          */
918         if (adapter->num_tx_desc_avail <= EM_TX_CLEANUP_THRESHOLD) {
919                 lem_txeof(adapter);
920                 /* Now do we at least have a minimal? */
921                 if (adapter->num_tx_desc_avail <= EM_TX_OP_THRESHOLD) {
922                         adapter->no_tx_desc_avail1++;
923                         return;
924                 }
925         }
926
927         while (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) {
928
929                 IFQ_DRV_DEQUEUE(&ifp->if_snd, m_head);
930                 if (m_head == NULL)
931                         break;
932                 /*
933                  *  Encapsulation can modify our pointer, and or make it
934                  *  NULL on failure.  In that event, we can't requeue.
935                  */
936                 if (lem_xmit(adapter, &m_head)) {
937                         if (m_head == NULL)
938                                 break;
939                         ifp->if_drv_flags |= IFF_DRV_OACTIVE;
940                         IFQ_DRV_PREPEND(&ifp->if_snd, m_head);
941                         break;
942                 }
943
944                 /* Send a copy of the frame to the BPF listener */
945                 ETHER_BPF_MTAP(ifp, m_head);
946
947                 /* Set timeout in case hardware has problems transmitting. */
948                 adapter->watchdog_check = TRUE;
949                 adapter->watchdog_time = ticks;
950         }
951         if (adapter->num_tx_desc_avail <= EM_TX_OP_THRESHOLD)
952                 ifp->if_drv_flags |= IFF_DRV_OACTIVE;
953 #ifdef NIC_PARAVIRT
954         if ((ifp->if_drv_flags & IFF_DRV_OACTIVE) && adapter->csb &&
955             adapter->csb->guest_csb_on &&
956             !(adapter->csb->guest_need_txkick & 1))  {
957                 adapter->csb->guest_need_txkick = 1;
958                 adapter->guest_need_kick_count++;
959                 // XXX memory barrier
960                 lem_txeof(adapter); // XXX possibly clear IFF_DRV_OACTIVE
961         }
962 #endif /* NIC_PARAVIRT */
963
964         return;
965 }
966
967 static void
968 lem_start(struct ifnet *ifp)
969 {
970         struct adapter *adapter = ifp->if_softc;
971
972         EM_TX_LOCK(adapter);
973         if (ifp->if_drv_flags & IFF_DRV_RUNNING)
974                 lem_start_locked(ifp);
975         EM_TX_UNLOCK(adapter);
976 }
977
978 /*********************************************************************
979  *  Ioctl entry point
980  *
981  *  em_ioctl is called when the user wants to configure the
982  *  interface.
983  *
984  *  return 0 on success, positive on failure
985  **********************************************************************/
986
987 static int
988 lem_ioctl(struct ifnet *ifp, u_long command, caddr_t data)
989 {
990         struct adapter  *adapter = ifp->if_softc;
991         struct ifreq    *ifr = (struct ifreq *)data;
992 #if defined(INET) || defined(INET6)
993         struct ifaddr   *ifa = (struct ifaddr *)data;
994 #endif
995         bool            avoid_reset = FALSE;
996         int             error = 0;
997
998         if (adapter->in_detach)
999                 return (error);
1000
1001         switch (command) {
1002         case SIOCSIFADDR:
1003 #ifdef INET
1004                 if (ifa->ifa_addr->sa_family == AF_INET)
1005                         avoid_reset = TRUE;
1006 #endif
1007 #ifdef INET6
1008                 if (ifa->ifa_addr->sa_family == AF_INET6)
1009                         avoid_reset = TRUE;
1010 #endif
1011                 /*
1012                 ** Calling init results in link renegotiation,
1013                 ** so we avoid doing it when possible.
1014                 */
1015                 if (avoid_reset) {
1016                         ifp->if_flags |= IFF_UP;
1017                         if (!(ifp->if_drv_flags & IFF_DRV_RUNNING))
1018                                 lem_init(adapter);
1019 #ifdef INET
1020                         if (!(ifp->if_flags & IFF_NOARP))
1021                                 arp_ifinit(ifp, ifa);
1022 #endif
1023                 } else
1024                         error = ether_ioctl(ifp, command, data);
1025                 break;
1026         case SIOCSIFMTU:
1027             {
1028                 int max_frame_size;
1029
1030                 IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFMTU (Set Interface MTU)");
1031
1032                 EM_CORE_LOCK(adapter);
1033                 switch (adapter->hw.mac.type) {
1034                 case e1000_82542:
1035                         max_frame_size = ETHER_MAX_LEN;
1036                         break;
1037                 default:
1038                         max_frame_size = MAX_JUMBO_FRAME_SIZE;
1039                 }
1040                 if (ifr->ifr_mtu > max_frame_size - ETHER_HDR_LEN -
1041                     ETHER_CRC_LEN) {
1042                         EM_CORE_UNLOCK(adapter);
1043                         error = EINVAL;
1044                         break;
1045                 }
1046
1047                 ifp->if_mtu = ifr->ifr_mtu;
1048                 adapter->max_frame_size =
1049                     ifp->if_mtu + ETHER_HDR_LEN + ETHER_CRC_LEN;
1050                 if (ifp->if_drv_flags & IFF_DRV_RUNNING)
1051                         lem_init_locked(adapter);
1052                 EM_CORE_UNLOCK(adapter);
1053                 break;
1054             }
1055         case SIOCSIFFLAGS:
1056                 IOCTL_DEBUGOUT("ioctl rcv'd:\
1057                     SIOCSIFFLAGS (Set Interface Flags)");
1058                 EM_CORE_LOCK(adapter);
1059                 if (ifp->if_flags & IFF_UP) {
1060                         if ((ifp->if_drv_flags & IFF_DRV_RUNNING)) {
1061                                 if ((ifp->if_flags ^ adapter->if_flags) &
1062                                     (IFF_PROMISC | IFF_ALLMULTI)) {
1063                                         lem_disable_promisc(adapter);
1064                                         lem_set_promisc(adapter);
1065                                 }
1066                         } else
1067                                 lem_init_locked(adapter);
1068                 } else
1069                         if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1070                                 EM_TX_LOCK(adapter);
1071                                 lem_stop(adapter);
1072                                 EM_TX_UNLOCK(adapter);
1073                         }
1074                 adapter->if_flags = ifp->if_flags;
1075                 EM_CORE_UNLOCK(adapter);
1076                 break;
1077         case SIOCADDMULTI:
1078         case SIOCDELMULTI:
1079                 IOCTL_DEBUGOUT("ioctl rcv'd: SIOC(ADD|DEL)MULTI");
1080                 if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1081                         EM_CORE_LOCK(adapter);
1082                         lem_disable_intr(adapter);
1083                         lem_set_multi(adapter);
1084                         if (adapter->hw.mac.type == e1000_82542 && 
1085                             adapter->hw.revision_id == E1000_REVISION_2) {
1086                                 lem_initialize_receive_unit(adapter);
1087                         }
1088 #ifdef DEVICE_POLLING
1089                         if (!(ifp->if_capenable & IFCAP_POLLING))
1090 #endif
1091                                 lem_enable_intr(adapter);
1092                         EM_CORE_UNLOCK(adapter);
1093                 }
1094                 break;
1095         case SIOCSIFMEDIA:
1096                 /* Check SOL/IDER usage */
1097                 EM_CORE_LOCK(adapter);
1098                 if (e1000_check_reset_block(&adapter->hw)) {
1099                         EM_CORE_UNLOCK(adapter);
1100                         device_printf(adapter->dev, "Media change is"
1101                             " blocked due to SOL/IDER session.\n");
1102                         break;
1103                 }
1104                 EM_CORE_UNLOCK(adapter);
1105         case SIOCGIFMEDIA:
1106                 IOCTL_DEBUGOUT("ioctl rcv'd: \
1107                     SIOCxIFMEDIA (Get/Set Interface Media)");
1108                 error = ifmedia_ioctl(ifp, ifr, &adapter->media, command);
1109                 break;
1110         case SIOCSIFCAP:
1111             {
1112                 int mask, reinit;
1113
1114                 IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFCAP (Set Capabilities)");
1115                 reinit = 0;
1116                 mask = ifr->ifr_reqcap ^ ifp->if_capenable;
1117 #ifdef DEVICE_POLLING
1118                 if (mask & IFCAP_POLLING) {
1119                         if (ifr->ifr_reqcap & IFCAP_POLLING) {
1120                                 error = ether_poll_register(lem_poll, ifp);
1121                                 if (error)
1122                                         return (error);
1123                                 EM_CORE_LOCK(adapter);
1124                                 lem_disable_intr(adapter);
1125                                 ifp->if_capenable |= IFCAP_POLLING;
1126                                 EM_CORE_UNLOCK(adapter);
1127                         } else {
1128                                 error = ether_poll_deregister(ifp);
1129                                 /* Enable interrupt even in error case */
1130                                 EM_CORE_LOCK(adapter);
1131                                 lem_enable_intr(adapter);
1132                                 ifp->if_capenable &= ~IFCAP_POLLING;
1133                                 EM_CORE_UNLOCK(adapter);
1134                         }
1135                 }
1136 #endif
1137                 if (mask & IFCAP_HWCSUM) {
1138                         ifp->if_capenable ^= IFCAP_HWCSUM;
1139                         reinit = 1;
1140                 }
1141                 if (mask & IFCAP_VLAN_HWTAGGING) {
1142                         ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING;
1143                         reinit = 1;
1144                 }
1145                 if ((mask & IFCAP_WOL) &&
1146                     (ifp->if_capabilities & IFCAP_WOL) != 0) {
1147                         if (mask & IFCAP_WOL_MCAST)
1148                                 ifp->if_capenable ^= IFCAP_WOL_MCAST;
1149                         if (mask & IFCAP_WOL_MAGIC)
1150                                 ifp->if_capenable ^= IFCAP_WOL_MAGIC;
1151                 }
1152                 if (reinit && (ifp->if_drv_flags & IFF_DRV_RUNNING))
1153                         lem_init(adapter);
1154                 VLAN_CAPABILITIES(ifp);
1155                 break;
1156             }
1157
1158         default:
1159                 error = ether_ioctl(ifp, command, data);
1160                 break;
1161         }
1162
1163         return (error);
1164 }
1165
1166
1167 /*********************************************************************
1168  *  Init entry point
1169  *
1170  *  This routine is used in two ways. It is used by the stack as
1171  *  init entry point in network interface structure. It is also used
1172  *  by the driver as a hw/sw initialization routine to get to a
1173  *  consistent state.
1174  *
1175  *  return 0 on success, positive on failure
1176  **********************************************************************/
1177
1178 static void
1179 lem_init_locked(struct adapter *adapter)
1180 {
1181         struct ifnet    *ifp = adapter->ifp;
1182         device_t        dev = adapter->dev;
1183         u32             pba;
1184
1185         INIT_DEBUGOUT("lem_init: begin");
1186
1187         EM_CORE_LOCK_ASSERT(adapter);
1188
1189         EM_TX_LOCK(adapter);
1190         lem_stop(adapter);
1191         EM_TX_UNLOCK(adapter);
1192
1193         /*
1194          * Packet Buffer Allocation (PBA)
1195          * Writing PBA sets the receive portion of the buffer
1196          * the remainder is used for the transmit buffer.
1197          *
1198          * Devices before the 82547 had a Packet Buffer of 64K.
1199          *   Default allocation: PBA=48K for Rx, leaving 16K for Tx.
1200          * After the 82547 the buffer was reduced to 40K.
1201          *   Default allocation: PBA=30K for Rx, leaving 10K for Tx.
1202          *   Note: default does not leave enough room for Jumbo Frame >10k.
1203          */
1204         switch (adapter->hw.mac.type) {
1205         case e1000_82547:
1206         case e1000_82547_rev_2: /* 82547: Total Packet Buffer is 40K */
1207                 if (adapter->max_frame_size > 8192)
1208                         pba = E1000_PBA_22K; /* 22K for Rx, 18K for Tx */
1209                 else
1210                         pba = E1000_PBA_30K; /* 30K for Rx, 10K for Tx */
1211                 adapter->tx_fifo_head = 0;
1212                 adapter->tx_head_addr = pba << EM_TX_HEAD_ADDR_SHIFT;
1213                 adapter->tx_fifo_size =
1214                     (E1000_PBA_40K - pba) << EM_PBA_BYTES_SHIFT;
1215                 break;
1216         default:
1217                 /* Devices before 82547 had a Packet Buffer of 64K.   */
1218                 if (adapter->max_frame_size > 8192)
1219                         pba = E1000_PBA_40K; /* 40K for Rx, 24K for Tx */
1220                 else
1221                         pba = E1000_PBA_48K; /* 48K for Rx, 16K for Tx */
1222         }
1223
1224         INIT_DEBUGOUT1("lem_init: pba=%dK",pba);
1225         E1000_WRITE_REG(&adapter->hw, E1000_PBA, pba);
1226         
1227         /* Get the latest mac address, User can use a LAA */
1228         bcopy(IF_LLADDR(adapter->ifp), adapter->hw.mac.addr,
1229               ETHER_ADDR_LEN);
1230
1231         /* Put the address into the Receive Address Array */
1232         e1000_rar_set(&adapter->hw, adapter->hw.mac.addr, 0);
1233
1234         /* Initialize the hardware */
1235         if (lem_hardware_init(adapter)) {
1236                 device_printf(dev, "Unable to initialize the hardware\n");
1237                 return;
1238         }
1239         lem_update_link_status(adapter);
1240
1241         /* Setup VLAN support, basic and offload if available */
1242         E1000_WRITE_REG(&adapter->hw, E1000_VET, ETHERTYPE_VLAN);
1243
1244         /* Set hardware offload abilities */
1245         ifp->if_hwassist = 0;
1246         if (adapter->hw.mac.type >= e1000_82543) {
1247                 if (ifp->if_capenable & IFCAP_TXCSUM)
1248                         ifp->if_hwassist |= (CSUM_TCP | CSUM_UDP);
1249         }
1250
1251         /* Configure for OS presence */
1252         lem_init_manageability(adapter);
1253
1254         /* Prepare transmit descriptors and buffers */
1255         lem_setup_transmit_structures(adapter);
1256         lem_initialize_transmit_unit(adapter);
1257
1258         /* Setup Multicast table */
1259         lem_set_multi(adapter);
1260
1261         /* Prepare receive descriptors and buffers */
1262         if (lem_setup_receive_structures(adapter)) {
1263                 device_printf(dev, "Could not setup receive structures\n");
1264                 EM_TX_LOCK(adapter);
1265                 lem_stop(adapter);
1266                 EM_TX_UNLOCK(adapter);
1267                 return;
1268         }
1269         lem_initialize_receive_unit(adapter);
1270
1271         /* Use real VLAN Filter support? */
1272         if (ifp->if_capenable & IFCAP_VLAN_HWTAGGING) {
1273                 if (ifp->if_capenable & IFCAP_VLAN_HWFILTER)
1274                         /* Use real VLAN Filter support */
1275                         lem_setup_vlan_hw_support(adapter);
1276                 else {
1277                         u32 ctrl;
1278                         ctrl = E1000_READ_REG(&adapter->hw, E1000_CTRL);
1279                         ctrl |= E1000_CTRL_VME;
1280                         E1000_WRITE_REG(&adapter->hw, E1000_CTRL, ctrl);
1281                 }
1282         }
1283
1284         /* Don't lose promiscuous settings */
1285         lem_set_promisc(adapter);
1286
1287         ifp->if_drv_flags |= IFF_DRV_RUNNING;
1288         ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
1289
1290         callout_reset(&adapter->timer, hz, lem_local_timer, adapter);
1291         e1000_clear_hw_cntrs_base_generic(&adapter->hw);
1292
1293 #ifdef DEVICE_POLLING
1294         /*
1295          * Only enable interrupts if we are not polling, make sure
1296          * they are off otherwise.
1297          */
1298         if (ifp->if_capenable & IFCAP_POLLING)
1299                 lem_disable_intr(adapter);
1300         else
1301 #endif /* DEVICE_POLLING */
1302                 lem_enable_intr(adapter);
1303
1304         /* AMT based hardware can now take control from firmware */
1305         if (adapter->has_manage && adapter->has_amt)
1306                 lem_get_hw_control(adapter);
1307 }
1308
1309 static void
1310 lem_init(void *arg)
1311 {
1312         struct adapter *adapter = arg;
1313
1314         EM_CORE_LOCK(adapter);
1315         lem_init_locked(adapter);
1316         EM_CORE_UNLOCK(adapter);
1317 }
1318
1319
1320 #ifdef DEVICE_POLLING
1321 /*********************************************************************
1322  *
1323  *  Legacy polling routine  
1324  *
1325  *********************************************************************/
1326 static int
1327 lem_poll(struct ifnet *ifp, enum poll_cmd cmd, int count)
1328 {
1329         struct adapter *adapter = ifp->if_softc;
1330         u32             reg_icr, rx_done = 0;
1331
1332         EM_CORE_LOCK(adapter);
1333         if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) {
1334                 EM_CORE_UNLOCK(adapter);
1335                 return (rx_done);
1336         }
1337
1338         if (cmd == POLL_AND_CHECK_STATUS) {
1339                 reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1340                 if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
1341                         callout_stop(&adapter->timer);
1342                         adapter->hw.mac.get_link_status = 1;
1343                         lem_update_link_status(adapter);
1344                         callout_reset(&adapter->timer, hz,
1345                             lem_local_timer, adapter);
1346                 }
1347         }
1348         EM_CORE_UNLOCK(adapter);
1349
1350         lem_rxeof(adapter, count, &rx_done);
1351
1352         EM_TX_LOCK(adapter);
1353         lem_txeof(adapter);
1354         if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1355                 lem_start_locked(ifp);
1356         EM_TX_UNLOCK(adapter);
1357         return (rx_done);
1358 }
1359 #endif /* DEVICE_POLLING */
1360
1361 /*********************************************************************
1362  *
1363  *  Legacy Interrupt Service routine  
1364  *
1365  *********************************************************************/
1366 static void
1367 lem_intr(void *arg)
1368 {
1369         struct adapter  *adapter = arg;
1370         struct ifnet    *ifp = adapter->ifp;
1371         u32             reg_icr;
1372
1373
1374         if ((ifp->if_capenable & IFCAP_POLLING) ||
1375             ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0))
1376                 return;
1377
1378         EM_CORE_LOCK(adapter);
1379         reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1380         if (reg_icr & E1000_ICR_RXO)
1381                 adapter->rx_overruns++;
1382
1383         if ((reg_icr == 0xffffffff) || (reg_icr == 0)) {
1384                 EM_CORE_UNLOCK(adapter);
1385                 return;
1386         }
1387
1388         if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
1389                 callout_stop(&adapter->timer);
1390                 adapter->hw.mac.get_link_status = 1;
1391                 lem_update_link_status(adapter);
1392                 /* Deal with TX cruft when link lost */
1393                 lem_tx_purge(adapter);
1394                 callout_reset(&adapter->timer, hz,
1395                     lem_local_timer, adapter);
1396                 EM_CORE_UNLOCK(adapter);
1397                 return;
1398         }
1399
1400         EM_CORE_UNLOCK(adapter);
1401         lem_rxeof(adapter, -1, NULL);
1402
1403         EM_TX_LOCK(adapter);
1404         lem_txeof(adapter);
1405         if (ifp->if_drv_flags & IFF_DRV_RUNNING &&
1406             !IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1407                 lem_start_locked(ifp);
1408         EM_TX_UNLOCK(adapter);
1409         return;
1410 }
1411
1412
1413 static void
1414 lem_handle_link(void *context, int pending)
1415 {
1416         struct adapter  *adapter = context;
1417         struct ifnet *ifp = adapter->ifp;
1418
1419         if (!(ifp->if_drv_flags & IFF_DRV_RUNNING))
1420                 return;
1421
1422         EM_CORE_LOCK(adapter);
1423         callout_stop(&adapter->timer);
1424         lem_update_link_status(adapter);
1425         /* Deal with TX cruft when link lost */
1426         lem_tx_purge(adapter);
1427         callout_reset(&adapter->timer, hz, lem_local_timer, adapter);
1428         EM_CORE_UNLOCK(adapter);
1429 }
1430
1431
1432 /* Combined RX/TX handler, used by Legacy and MSI */
1433 static void
1434 lem_handle_rxtx(void *context, int pending)
1435 {
1436         struct adapter  *adapter = context;
1437         struct ifnet    *ifp = adapter->ifp;
1438
1439
1440         if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1441                 bool more = lem_rxeof(adapter, adapter->rx_process_limit, NULL);
1442                 EM_TX_LOCK(adapter);
1443                 lem_txeof(adapter);
1444                 if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1445                         lem_start_locked(ifp);
1446                 EM_TX_UNLOCK(adapter);
1447                 if (more) {
1448                         taskqueue_enqueue(adapter->tq, &adapter->rxtx_task);
1449                         return;
1450                 }
1451         }
1452
1453         if (ifp->if_drv_flags & IFF_DRV_RUNNING)
1454                 lem_enable_intr(adapter);
1455 }
1456
1457 /*********************************************************************
1458  *
1459  *  Fast Legacy/MSI Combined Interrupt Service routine  
1460  *
1461  *********************************************************************/
1462 static int
1463 lem_irq_fast(void *arg)
1464 {
1465         struct adapter  *adapter = arg;
1466         struct ifnet    *ifp;
1467         u32             reg_icr;
1468
1469         ifp = adapter->ifp;
1470
1471         reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1472
1473         /* Hot eject?  */
1474         if (reg_icr == 0xffffffff)
1475                 return FILTER_STRAY;
1476
1477         /* Definitely not our interrupt.  */
1478         if (reg_icr == 0x0)
1479                 return FILTER_STRAY;
1480
1481         /*
1482          * Mask interrupts until the taskqueue is finished running.  This is
1483          * cheap, just assume that it is needed.  This also works around the
1484          * MSI message reordering errata on certain systems.
1485          */
1486         lem_disable_intr(adapter);
1487         taskqueue_enqueue(adapter->tq, &adapter->rxtx_task);
1488
1489         /* Link status change */
1490         if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
1491                 adapter->hw.mac.get_link_status = 1;
1492                 taskqueue_enqueue(taskqueue_fast, &adapter->link_task);
1493         }
1494
1495         if (reg_icr & E1000_ICR_RXO)
1496                 adapter->rx_overruns++;
1497         return FILTER_HANDLED;
1498 }
1499
1500
1501 /*********************************************************************
1502  *
1503  *  Media Ioctl callback
1504  *
1505  *  This routine is called whenever the user queries the status of
1506  *  the interface using ifconfig.
1507  *
1508  **********************************************************************/
1509 static void
1510 lem_media_status(struct ifnet *ifp, struct ifmediareq *ifmr)
1511 {
1512         struct adapter *adapter = ifp->if_softc;
1513         u_char fiber_type = IFM_1000_SX;
1514
1515         INIT_DEBUGOUT("lem_media_status: begin");
1516
1517         EM_CORE_LOCK(adapter);
1518         lem_update_link_status(adapter);
1519
1520         ifmr->ifm_status = IFM_AVALID;
1521         ifmr->ifm_active = IFM_ETHER;
1522
1523         if (!adapter->link_active) {
1524                 EM_CORE_UNLOCK(adapter);
1525                 return;
1526         }
1527
1528         ifmr->ifm_status |= IFM_ACTIVE;
1529
1530         if ((adapter->hw.phy.media_type == e1000_media_type_fiber) ||
1531             (adapter->hw.phy.media_type == e1000_media_type_internal_serdes)) {
1532                 if (adapter->hw.mac.type == e1000_82545)
1533                         fiber_type = IFM_1000_LX;
1534                 ifmr->ifm_active |= fiber_type | IFM_FDX;
1535         } else {
1536                 switch (adapter->link_speed) {
1537                 case 10:
1538                         ifmr->ifm_active |= IFM_10_T;
1539                         break;
1540                 case 100:
1541                         ifmr->ifm_active |= IFM_100_TX;
1542                         break;
1543                 case 1000:
1544                         ifmr->ifm_active |= IFM_1000_T;
1545                         break;
1546                 }
1547                 if (adapter->link_duplex == FULL_DUPLEX)
1548                         ifmr->ifm_active |= IFM_FDX;
1549                 else
1550                         ifmr->ifm_active |= IFM_HDX;
1551         }
1552         EM_CORE_UNLOCK(adapter);
1553 }
1554
1555 /*********************************************************************
1556  *
1557  *  Media Ioctl callback
1558  *
1559  *  This routine is called when the user changes speed/duplex using
1560  *  media/mediopt option with ifconfig.
1561  *
1562  **********************************************************************/
1563 static int
1564 lem_media_change(struct ifnet *ifp)
1565 {
1566         struct adapter *adapter = ifp->if_softc;
1567         struct ifmedia  *ifm = &adapter->media;
1568
1569         INIT_DEBUGOUT("lem_media_change: begin");
1570
1571         if (IFM_TYPE(ifm->ifm_media) != IFM_ETHER)
1572                 return (EINVAL);
1573
1574         EM_CORE_LOCK(adapter);
1575         switch (IFM_SUBTYPE(ifm->ifm_media)) {
1576         case IFM_AUTO:
1577                 adapter->hw.mac.autoneg = DO_AUTO_NEG;
1578                 adapter->hw.phy.autoneg_advertised = AUTONEG_ADV_DEFAULT;
1579                 break;
1580         case IFM_1000_LX:
1581         case IFM_1000_SX:
1582         case IFM_1000_T:
1583                 adapter->hw.mac.autoneg = DO_AUTO_NEG;
1584                 adapter->hw.phy.autoneg_advertised = ADVERTISE_1000_FULL;
1585                 break;
1586         case IFM_100_TX:
1587                 adapter->hw.mac.autoneg = FALSE;
1588                 adapter->hw.phy.autoneg_advertised = 0;
1589                 if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX)
1590                         adapter->hw.mac.forced_speed_duplex = ADVERTISE_100_FULL;
1591                 else
1592                         adapter->hw.mac.forced_speed_duplex = ADVERTISE_100_HALF;
1593                 break;
1594         case IFM_10_T:
1595                 adapter->hw.mac.autoneg = FALSE;
1596                 adapter->hw.phy.autoneg_advertised = 0;
1597                 if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX)
1598                         adapter->hw.mac.forced_speed_duplex = ADVERTISE_10_FULL;
1599                 else
1600                         adapter->hw.mac.forced_speed_duplex = ADVERTISE_10_HALF;
1601                 break;
1602         default:
1603                 device_printf(adapter->dev, "Unsupported media type\n");
1604         }
1605
1606         lem_init_locked(adapter);
1607         EM_CORE_UNLOCK(adapter);
1608
1609         return (0);
1610 }
1611
1612 /*********************************************************************
1613  *
1614  *  This routine maps the mbufs to tx descriptors.
1615  *
1616  *  return 0 on success, positive on failure
1617  **********************************************************************/
1618
1619 static int
1620 lem_xmit(struct adapter *adapter, struct mbuf **m_headp)
1621 {
1622         bus_dma_segment_t       segs[EM_MAX_SCATTER];
1623         bus_dmamap_t            map;
1624         struct em_buffer        *tx_buffer, *tx_buffer_mapped;
1625         struct e1000_tx_desc    *ctxd = NULL;
1626         struct mbuf             *m_head;
1627         u32                     txd_upper, txd_lower, txd_used, txd_saved;
1628         int                     error, nsegs, i, j, first, last = 0;
1629
1630         m_head = *m_headp;
1631         txd_upper = txd_lower = txd_used = txd_saved = 0;
1632
1633         /*
1634         ** When doing checksum offload, it is critical to
1635         ** make sure the first mbuf has more than header,
1636         ** because that routine expects data to be present.
1637         */
1638         if ((m_head->m_pkthdr.csum_flags & CSUM_OFFLOAD) &&
1639             (m_head->m_len < ETHER_HDR_LEN + sizeof(struct ip))) {
1640                 m_head = m_pullup(m_head, ETHER_HDR_LEN + sizeof(struct ip));
1641                 *m_headp = m_head;
1642                 if (m_head == NULL)
1643                         return (ENOBUFS);
1644         }
1645
1646         /*
1647          * Map the packet for DMA
1648          *
1649          * Capture the first descriptor index,
1650          * this descriptor will have the index
1651          * of the EOP which is the only one that
1652          * now gets a DONE bit writeback.
1653          */
1654         first = adapter->next_avail_tx_desc;
1655         tx_buffer = &adapter->tx_buffer_area[first];
1656         tx_buffer_mapped = tx_buffer;
1657         map = tx_buffer->map;
1658
1659         error = bus_dmamap_load_mbuf_sg(adapter->txtag, map,
1660             *m_headp, segs, &nsegs, BUS_DMA_NOWAIT);
1661
1662         /*
1663          * There are two types of errors we can (try) to handle:
1664          * - EFBIG means the mbuf chain was too long and bus_dma ran
1665          *   out of segments.  Defragment the mbuf chain and try again.
1666          * - ENOMEM means bus_dma could not obtain enough bounce buffers
1667          *   at this point in time.  Defer sending and try again later.
1668          * All other errors, in particular EINVAL, are fatal and prevent the
1669          * mbuf chain from ever going through.  Drop it and report error.
1670          */
1671         if (error == EFBIG) {
1672                 struct mbuf *m;
1673
1674                 m = m_collapse(*m_headp, M_NOWAIT, EM_MAX_SCATTER);
1675                 if (m == NULL) {
1676                         adapter->mbuf_defrag_failed++;
1677                         m_freem(*m_headp);
1678                         *m_headp = NULL;
1679                         return (ENOBUFS);
1680                 }
1681                 *m_headp = m;
1682
1683                 /* Try it again */
1684                 error = bus_dmamap_load_mbuf_sg(adapter->txtag, map,
1685                     *m_headp, segs, &nsegs, BUS_DMA_NOWAIT);
1686
1687                 if (error) {
1688                         adapter->no_tx_dma_setup++;
1689                         m_freem(*m_headp);
1690                         *m_headp = NULL;
1691                         return (error);
1692                 }
1693         } else if (error != 0) {
1694                 adapter->no_tx_dma_setup++;
1695                 return (error);
1696         }
1697
1698         if (adapter->num_tx_desc_avail < (nsegs + 2)) {
1699                 adapter->no_tx_desc_avail2++;
1700                 bus_dmamap_unload(adapter->txtag, map);
1701                 return (ENOBUFS);
1702         }
1703         m_head = *m_headp;
1704
1705         /* Do hardware assists */
1706         if (m_head->m_pkthdr.csum_flags & CSUM_OFFLOAD)
1707                 lem_transmit_checksum_setup(adapter,  m_head,
1708                     &txd_upper, &txd_lower);
1709
1710         i = adapter->next_avail_tx_desc;
1711         if (adapter->pcix_82544) 
1712                 txd_saved = i;
1713
1714         /* Set up our transmit descriptors */
1715         for (j = 0; j < nsegs; j++) {
1716                 bus_size_t seg_len;
1717                 bus_addr_t seg_addr;
1718                 /* If adapter is 82544 and on PCIX bus */
1719                 if(adapter->pcix_82544) {
1720                         DESC_ARRAY      desc_array;
1721                         u32             array_elements, counter;
1722                         /*
1723                          * Check the Address and Length combination and
1724                          * split the data accordingly
1725                          */
1726                         array_elements = lem_fill_descriptors(segs[j].ds_addr,
1727                             segs[j].ds_len, &desc_array);
1728                         for (counter = 0; counter < array_elements; counter++) {
1729                                 if (txd_used == adapter->num_tx_desc_avail) {
1730                                         adapter->next_avail_tx_desc = txd_saved;
1731                                         adapter->no_tx_desc_avail2++;
1732                                         bus_dmamap_unload(adapter->txtag, map);
1733                                         return (ENOBUFS);
1734                                 }
1735                                 tx_buffer = &adapter->tx_buffer_area[i];
1736                                 ctxd = &adapter->tx_desc_base[i];
1737                                 ctxd->buffer_addr = htole64(
1738                                     desc_array.descriptor[counter].address);
1739                                 ctxd->lower.data = htole32(
1740                                     (adapter->txd_cmd | txd_lower | (u16)
1741                                     desc_array.descriptor[counter].length));
1742                                 ctxd->upper.data =
1743                                     htole32((txd_upper));
1744                                 last = i;
1745                                 if (++i == adapter->num_tx_desc)
1746                                          i = 0;
1747                                 tx_buffer->m_head = NULL;
1748                                 tx_buffer->next_eop = -1;
1749                                 txd_used++;
1750                         }
1751                 } else {
1752                         tx_buffer = &adapter->tx_buffer_area[i];
1753                         ctxd = &adapter->tx_desc_base[i];
1754                         seg_addr = segs[j].ds_addr;
1755                         seg_len  = segs[j].ds_len;
1756                         ctxd->buffer_addr = htole64(seg_addr);
1757                         ctxd->lower.data = htole32(
1758                         adapter->txd_cmd | txd_lower | seg_len);
1759                         ctxd->upper.data =
1760                             htole32(txd_upper);
1761                         last = i;
1762                         if (++i == adapter->num_tx_desc)
1763                                 i = 0;
1764                         tx_buffer->m_head = NULL;
1765                         tx_buffer->next_eop = -1;
1766                 }
1767         }
1768
1769         adapter->next_avail_tx_desc = i;
1770
1771         if (adapter->pcix_82544)
1772                 adapter->num_tx_desc_avail -= txd_used;
1773         else
1774                 adapter->num_tx_desc_avail -= nsegs;
1775
1776         if (m_head->m_flags & M_VLANTAG) {
1777                 /* Set the vlan id. */
1778                 ctxd->upper.fields.special =
1779                     htole16(m_head->m_pkthdr.ether_vtag);
1780                 /* Tell hardware to add tag */
1781                 ctxd->lower.data |= htole32(E1000_TXD_CMD_VLE);
1782         }
1783
1784         tx_buffer->m_head = m_head;
1785         tx_buffer_mapped->map = tx_buffer->map;
1786         tx_buffer->map = map;
1787         bus_dmamap_sync(adapter->txtag, map, BUS_DMASYNC_PREWRITE);
1788
1789         /*
1790          * Last Descriptor of Packet
1791          * needs End Of Packet (EOP)
1792          * and Report Status (RS)
1793          */
1794         ctxd->lower.data |=
1795             htole32(E1000_TXD_CMD_EOP | E1000_TXD_CMD_RS);
1796         /*
1797          * Keep track in the first buffer which
1798          * descriptor will be written back
1799          */
1800         tx_buffer = &adapter->tx_buffer_area[first];
1801         tx_buffer->next_eop = last;
1802         adapter->watchdog_time = ticks;
1803
1804         /*
1805          * Advance the Transmit Descriptor Tail (TDT), this tells the E1000
1806          * that this frame is available to transmit.
1807          */
1808         bus_dmamap_sync(adapter->txdma.dma_tag, adapter->txdma.dma_map,
1809             BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
1810
1811 #ifdef NIC_PARAVIRT
1812         if (adapter->csb) {
1813                 adapter->csb->guest_tdt = i;
1814                 /* XXX memory barrier ? */
1815                 if (adapter->csb->guest_csb_on &&
1816                     !(adapter->csb->host_need_txkick & 1)) {
1817                         /* XXX maybe useless
1818                          * clean the ring. maybe do it before ?
1819                          * maybe a little bit of histeresys ?
1820                          */
1821                         if (adapter->num_tx_desc_avail <= 64) {// XXX
1822                                 lem_txeof(adapter);
1823                         }
1824                         return (0);
1825                 }
1826         }
1827 #endif /* NIC_PARAVIRT */
1828
1829 #ifdef NIC_SEND_COMBINING
1830         if (adapter->sc_enable) {
1831                 if (adapter->shadow_tdt & MIT_PENDING_INT) {
1832                         /* signal intr and data pending */
1833                         adapter->shadow_tdt = MIT_PENDING_TDT | (i & 0xffff);
1834                         return (0);
1835                 } else {
1836                         adapter->shadow_tdt = MIT_PENDING_INT;
1837                 }
1838         }
1839 #endif /* NIC_SEND_COMBINING */
1840
1841         if (adapter->hw.mac.type == e1000_82547 &&
1842             adapter->link_duplex == HALF_DUPLEX)
1843                 lem_82547_move_tail(adapter);
1844         else {
1845                 E1000_WRITE_REG(&adapter->hw, E1000_TDT(0), i);
1846                 if (adapter->hw.mac.type == e1000_82547)
1847                         lem_82547_update_fifo_head(adapter,
1848                             m_head->m_pkthdr.len);
1849         }
1850
1851         return (0);
1852 }
1853
1854 /*********************************************************************
1855  *
1856  * 82547 workaround to avoid controller hang in half-duplex environment.
1857  * The workaround is to avoid queuing a large packet that would span
1858  * the internal Tx FIFO ring boundary. We need to reset the FIFO pointers
1859  * in this case. We do that only when FIFO is quiescent.
1860  *
1861  **********************************************************************/
1862 static void
1863 lem_82547_move_tail(void *arg)
1864 {
1865         struct adapter *adapter = arg;
1866         struct e1000_tx_desc *tx_desc;
1867         u16     hw_tdt, sw_tdt, length = 0;
1868         bool    eop = 0;
1869
1870         EM_TX_LOCK_ASSERT(adapter);
1871
1872         hw_tdt = E1000_READ_REG(&adapter->hw, E1000_TDT(0));
1873         sw_tdt = adapter->next_avail_tx_desc;
1874         
1875         while (hw_tdt != sw_tdt) {
1876                 tx_desc = &adapter->tx_desc_base[hw_tdt];
1877                 length += tx_desc->lower.flags.length;
1878                 eop = tx_desc->lower.data & E1000_TXD_CMD_EOP;
1879                 if (++hw_tdt == adapter->num_tx_desc)
1880                         hw_tdt = 0;
1881
1882                 if (eop) {
1883                         if (lem_82547_fifo_workaround(adapter, length)) {
1884                                 adapter->tx_fifo_wrk_cnt++;
1885                                 callout_reset(&adapter->tx_fifo_timer, 1,
1886                                         lem_82547_move_tail, adapter);
1887                                 break;
1888                         }
1889                         E1000_WRITE_REG(&adapter->hw, E1000_TDT(0), hw_tdt);
1890                         lem_82547_update_fifo_head(adapter, length);
1891                         length = 0;
1892                 }
1893         }       
1894 }
1895
1896 static int
1897 lem_82547_fifo_workaround(struct adapter *adapter, int len)
1898 {       
1899         int fifo_space, fifo_pkt_len;
1900
1901         fifo_pkt_len = roundup2(len + EM_FIFO_HDR, EM_FIFO_HDR);
1902
1903         if (adapter->link_duplex == HALF_DUPLEX) {
1904                 fifo_space = adapter->tx_fifo_size - adapter->tx_fifo_head;
1905
1906                 if (fifo_pkt_len >= (EM_82547_PKT_THRESH + fifo_space)) {
1907                         if (lem_82547_tx_fifo_reset(adapter))
1908                                 return (0);
1909                         else
1910                                 return (1);
1911                 }
1912         }
1913
1914         return (0);
1915 }
1916
1917 static void
1918 lem_82547_update_fifo_head(struct adapter *adapter, int len)
1919 {
1920         int fifo_pkt_len = roundup2(len + EM_FIFO_HDR, EM_FIFO_HDR);
1921         
1922         /* tx_fifo_head is always 16 byte aligned */
1923         adapter->tx_fifo_head += fifo_pkt_len;
1924         if (adapter->tx_fifo_head >= adapter->tx_fifo_size) {
1925                 adapter->tx_fifo_head -= adapter->tx_fifo_size;
1926         }
1927 }
1928
1929
1930 static int
1931 lem_82547_tx_fifo_reset(struct adapter *adapter)
1932 {
1933         u32 tctl;
1934
1935         if ((E1000_READ_REG(&adapter->hw, E1000_TDT(0)) ==
1936             E1000_READ_REG(&adapter->hw, E1000_TDH(0))) &&
1937             (E1000_READ_REG(&adapter->hw, E1000_TDFT) == 
1938             E1000_READ_REG(&adapter->hw, E1000_TDFH)) &&
1939             (E1000_READ_REG(&adapter->hw, E1000_TDFTS) ==
1940             E1000_READ_REG(&adapter->hw, E1000_TDFHS)) &&
1941             (E1000_READ_REG(&adapter->hw, E1000_TDFPC) == 0)) {
1942                 /* Disable TX unit */
1943                 tctl = E1000_READ_REG(&adapter->hw, E1000_TCTL);
1944                 E1000_WRITE_REG(&adapter->hw, E1000_TCTL,
1945                     tctl & ~E1000_TCTL_EN);
1946
1947                 /* Reset FIFO pointers */
1948                 E1000_WRITE_REG(&adapter->hw, E1000_TDFT,
1949                     adapter->tx_head_addr);
1950                 E1000_WRITE_REG(&adapter->hw, E1000_TDFH,
1951                     adapter->tx_head_addr);
1952                 E1000_WRITE_REG(&adapter->hw, E1000_TDFTS,
1953                     adapter->tx_head_addr);
1954                 E1000_WRITE_REG(&adapter->hw, E1000_TDFHS,
1955                     adapter->tx_head_addr);
1956
1957                 /* Re-enable TX unit */
1958                 E1000_WRITE_REG(&adapter->hw, E1000_TCTL, tctl);
1959                 E1000_WRITE_FLUSH(&adapter->hw);
1960
1961                 adapter->tx_fifo_head = 0;
1962                 adapter->tx_fifo_reset_cnt++;
1963
1964                 return (TRUE);
1965         }
1966         else {
1967                 return (FALSE);
1968         }
1969 }
1970
1971 static void
1972 lem_set_promisc(struct adapter *adapter)
1973 {
1974         struct ifnet    *ifp = adapter->ifp;
1975         u32             reg_rctl;
1976
1977         reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
1978
1979         if (ifp->if_flags & IFF_PROMISC) {
1980                 reg_rctl |= (E1000_RCTL_UPE | E1000_RCTL_MPE);
1981                 /* Turn this on if you want to see bad packets */
1982                 if (lem_debug_sbp)
1983                         reg_rctl |= E1000_RCTL_SBP;
1984                 E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
1985         } else if (ifp->if_flags & IFF_ALLMULTI) {
1986                 reg_rctl |= E1000_RCTL_MPE;
1987                 reg_rctl &= ~E1000_RCTL_UPE;
1988                 E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
1989         }
1990 }
1991
1992 static void
1993 lem_disable_promisc(struct adapter *adapter)
1994 {
1995         struct ifnet    *ifp = adapter->ifp;
1996         u32             reg_rctl;
1997         int             mcnt = 0;
1998
1999         reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2000         reg_rctl &=  (~E1000_RCTL_UPE);
2001         if (ifp->if_flags & IFF_ALLMULTI)
2002                 mcnt = MAX_NUM_MULTICAST_ADDRESSES;
2003         else {
2004                 struct  ifmultiaddr *ifma;
2005 #if __FreeBSD_version < 800000
2006                 IF_ADDR_LOCK(ifp);
2007 #else   
2008                 if_maddr_rlock(ifp);
2009 #endif
2010                 TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
2011                         if (ifma->ifma_addr->sa_family != AF_LINK)
2012                                 continue;
2013                         if (mcnt == MAX_NUM_MULTICAST_ADDRESSES)
2014                                 break;
2015                         mcnt++;
2016                 }
2017 #if __FreeBSD_version < 800000
2018                 IF_ADDR_UNLOCK(ifp);
2019 #else
2020                 if_maddr_runlock(ifp);
2021 #endif
2022         }
2023         /* Don't disable if in MAX groups */
2024         if (mcnt < MAX_NUM_MULTICAST_ADDRESSES)
2025                 reg_rctl &=  (~E1000_RCTL_MPE);
2026         reg_rctl &=  (~E1000_RCTL_SBP);
2027         E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2028 }
2029
2030
2031 /*********************************************************************
2032  *  Multicast Update
2033  *
2034  *  This routine is called whenever multicast address list is updated.
2035  *
2036  **********************************************************************/
2037
2038 static void
2039 lem_set_multi(struct adapter *adapter)
2040 {
2041         struct ifnet    *ifp = adapter->ifp;
2042         struct ifmultiaddr *ifma;
2043         u32 reg_rctl = 0;
2044         u8  *mta; /* Multicast array memory */
2045         int mcnt = 0;
2046
2047         IOCTL_DEBUGOUT("lem_set_multi: begin");
2048
2049         mta = adapter->mta;
2050         bzero(mta, sizeof(u8) * ETH_ADDR_LEN * MAX_NUM_MULTICAST_ADDRESSES);
2051
2052         if (adapter->hw.mac.type == e1000_82542 && 
2053             adapter->hw.revision_id == E1000_REVISION_2) {
2054                 reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2055                 if (adapter->hw.bus.pci_cmd_word & CMD_MEM_WRT_INVALIDATE)
2056                         e1000_pci_clear_mwi(&adapter->hw);
2057                 reg_rctl |= E1000_RCTL_RST;
2058                 E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2059                 msec_delay(5);
2060         }
2061
2062 #if __FreeBSD_version < 800000
2063         IF_ADDR_LOCK(ifp);
2064 #else
2065         if_maddr_rlock(ifp);
2066 #endif
2067         TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
2068                 if (ifma->ifma_addr->sa_family != AF_LINK)
2069                         continue;
2070
2071                 if (mcnt == MAX_NUM_MULTICAST_ADDRESSES)
2072                         break;
2073
2074                 bcopy(LLADDR((struct sockaddr_dl *)ifma->ifma_addr),
2075                     &mta[mcnt * ETH_ADDR_LEN], ETH_ADDR_LEN);
2076                 mcnt++;
2077         }
2078 #if __FreeBSD_version < 800000
2079         IF_ADDR_UNLOCK(ifp);
2080 #else
2081         if_maddr_runlock(ifp);
2082 #endif
2083         if (mcnt >= MAX_NUM_MULTICAST_ADDRESSES) {
2084                 reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2085                 reg_rctl |= E1000_RCTL_MPE;
2086                 E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2087         } else
2088                 e1000_update_mc_addr_list(&adapter->hw, mta, mcnt);
2089
2090         if (adapter->hw.mac.type == e1000_82542 && 
2091             adapter->hw.revision_id == E1000_REVISION_2) {
2092                 reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2093                 reg_rctl &= ~E1000_RCTL_RST;
2094                 E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2095                 msec_delay(5);
2096                 if (adapter->hw.bus.pci_cmd_word & CMD_MEM_WRT_INVALIDATE)
2097                         e1000_pci_set_mwi(&adapter->hw);
2098         }
2099 }
2100
2101
2102 /*********************************************************************
2103  *  Timer routine
2104  *
2105  *  This routine checks for link status and updates statistics.
2106  *
2107  **********************************************************************/
2108
2109 static void
2110 lem_local_timer(void *arg)
2111 {
2112         struct adapter  *adapter = arg;
2113
2114         EM_CORE_LOCK_ASSERT(adapter);
2115
2116         lem_update_link_status(adapter);
2117         lem_update_stats_counters(adapter);
2118
2119         lem_smartspeed(adapter);
2120
2121 #ifdef NIC_PARAVIRT
2122         /* recover space if needed */
2123         if (adapter->csb && adapter->csb->guest_csb_on &&
2124             (adapter->watchdog_check == TRUE) &&
2125             (ticks - adapter->watchdog_time > EM_WATCHDOG) &&
2126             (adapter->num_tx_desc_avail != adapter->num_tx_desc) ) {
2127                 lem_txeof(adapter);
2128                 /*
2129                  * lem_txeof() normally (except when space in the queue
2130                  * runs low XXX) cleans watchdog_check so that
2131                  * we do not hung.
2132                  */
2133         }
2134 #endif /* NIC_PARAVIRT */
2135         /*
2136          * We check the watchdog: the time since
2137          * the last TX descriptor was cleaned.
2138          * This implies a functional TX engine.
2139          */
2140         if ((adapter->watchdog_check == TRUE) &&
2141             (ticks - adapter->watchdog_time > EM_WATCHDOG))
2142                 goto hung;
2143
2144         callout_reset(&adapter->timer, hz, lem_local_timer, adapter);
2145         return;
2146 hung:
2147         device_printf(adapter->dev, "Watchdog timeout -- resetting\n");
2148         adapter->ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
2149         adapter->watchdog_events++;
2150         lem_init_locked(adapter);
2151 }
2152
2153 static void
2154 lem_update_link_status(struct adapter *adapter)
2155 {
2156         struct e1000_hw *hw = &adapter->hw;
2157         struct ifnet *ifp = adapter->ifp;
2158         device_t dev = adapter->dev;
2159         u32 link_check = 0;
2160
2161         /* Get the cached link value or read phy for real */
2162         switch (hw->phy.media_type) {
2163         case e1000_media_type_copper:
2164                 if (hw->mac.get_link_status) {
2165                         /* Do the work to read phy */
2166                         e1000_check_for_link(hw);
2167                         link_check = !hw->mac.get_link_status;
2168                         if (link_check) /* ESB2 fix */
2169                                 e1000_cfg_on_link_up(hw);
2170                 } else
2171                         link_check = TRUE;
2172                 break;
2173         case e1000_media_type_fiber:
2174                 e1000_check_for_link(hw);
2175                 link_check = (E1000_READ_REG(hw, E1000_STATUS) &
2176                                  E1000_STATUS_LU);
2177                 break;
2178         case e1000_media_type_internal_serdes:
2179                 e1000_check_for_link(hw);
2180                 link_check = adapter->hw.mac.serdes_has_link;
2181                 break;
2182         default:
2183         case e1000_media_type_unknown:
2184                 break;
2185         }
2186
2187         /* Now check for a transition */
2188         if (link_check && (adapter->link_active == 0)) {
2189                 e1000_get_speed_and_duplex(hw, &adapter->link_speed,
2190                     &adapter->link_duplex);
2191                 if (bootverbose)
2192                         device_printf(dev, "Link is up %d Mbps %s\n",
2193                             adapter->link_speed,
2194                             ((adapter->link_duplex == FULL_DUPLEX) ?
2195                             "Full Duplex" : "Half Duplex"));
2196                 adapter->link_active = 1;
2197                 adapter->smartspeed = 0;
2198                 ifp->if_baudrate = adapter->link_speed * 1000000;
2199                 if_link_state_change(ifp, LINK_STATE_UP);
2200         } else if (!link_check && (adapter->link_active == 1)) {
2201                 ifp->if_baudrate = adapter->link_speed = 0;
2202                 adapter->link_duplex = 0;
2203                 if (bootverbose)
2204                         device_printf(dev, "Link is Down\n");
2205                 adapter->link_active = 0;
2206                 /* Link down, disable watchdog */
2207                 adapter->watchdog_check = FALSE;
2208                 if_link_state_change(ifp, LINK_STATE_DOWN);
2209         }
2210 }
2211
2212 /*********************************************************************
2213  *
2214  *  This routine disables all traffic on the adapter by issuing a
2215  *  global reset on the MAC and deallocates TX/RX buffers.
2216  *
2217  *  This routine should always be called with BOTH the CORE
2218  *  and TX locks.
2219  **********************************************************************/
2220
2221 static void
2222 lem_stop(void *arg)
2223 {
2224         struct adapter  *adapter = arg;
2225         struct ifnet    *ifp = adapter->ifp;
2226
2227         EM_CORE_LOCK_ASSERT(adapter);
2228         EM_TX_LOCK_ASSERT(adapter);
2229
2230         INIT_DEBUGOUT("lem_stop: begin");
2231
2232         lem_disable_intr(adapter);
2233         callout_stop(&adapter->timer);
2234         callout_stop(&adapter->tx_fifo_timer);
2235
2236         /* Tell the stack that the interface is no longer active */
2237         ifp->if_drv_flags &= ~(IFF_DRV_RUNNING | IFF_DRV_OACTIVE);
2238
2239         e1000_reset_hw(&adapter->hw);
2240         if (adapter->hw.mac.type >= e1000_82544)
2241                 E1000_WRITE_REG(&adapter->hw, E1000_WUC, 0);
2242
2243         e1000_led_off(&adapter->hw);
2244         e1000_cleanup_led(&adapter->hw);
2245 }
2246
2247
2248 /*********************************************************************
2249  *
2250  *  Determine hardware revision.
2251  *
2252  **********************************************************************/
2253 static void
2254 lem_identify_hardware(struct adapter *adapter)
2255 {
2256         device_t dev = adapter->dev;
2257
2258         /* Make sure our PCI config space has the necessary stuff set */
2259         pci_enable_busmaster(dev);
2260         adapter->hw.bus.pci_cmd_word = pci_read_config(dev, PCIR_COMMAND, 2);
2261
2262         /* Save off the information about this board */
2263         adapter->hw.vendor_id = pci_get_vendor(dev);
2264         adapter->hw.device_id = pci_get_device(dev);
2265         adapter->hw.revision_id = pci_read_config(dev, PCIR_REVID, 1);
2266         adapter->hw.subsystem_vendor_id =
2267             pci_read_config(dev, PCIR_SUBVEND_0, 2);
2268         adapter->hw.subsystem_device_id =
2269             pci_read_config(dev, PCIR_SUBDEV_0, 2);
2270
2271         /* Do Shared Code Init and Setup */
2272         if (e1000_set_mac_type(&adapter->hw)) {
2273                 device_printf(dev, "Setup init failure\n");
2274                 return;
2275         }
2276 }
2277
2278 static int
2279 lem_allocate_pci_resources(struct adapter *adapter)
2280 {
2281         device_t        dev = adapter->dev;
2282         int             val, rid, error = E1000_SUCCESS;
2283
2284         rid = PCIR_BAR(0);
2285         adapter->memory = bus_alloc_resource_any(dev, SYS_RES_MEMORY,
2286             &rid, RF_ACTIVE);
2287         if (adapter->memory == NULL) {
2288                 device_printf(dev, "Unable to allocate bus resource: memory\n");
2289                 return (ENXIO);
2290         }
2291         adapter->osdep.mem_bus_space_tag =
2292             rman_get_bustag(adapter->memory);
2293         adapter->osdep.mem_bus_space_handle =
2294             rman_get_bushandle(adapter->memory);
2295         adapter->hw.hw_addr = (u8 *)&adapter->osdep.mem_bus_space_handle;
2296
2297         /* Only older adapters use IO mapping */
2298         if (adapter->hw.mac.type > e1000_82543) {
2299                 /* Figure our where our IO BAR is ? */
2300                 for (rid = PCIR_BAR(0); rid < PCIR_CIS;) {
2301                         val = pci_read_config(dev, rid, 4);
2302                         if (EM_BAR_TYPE(val) == EM_BAR_TYPE_IO) {
2303                                 adapter->io_rid = rid;
2304                                 break;
2305                         }
2306                         rid += 4;
2307                         /* check for 64bit BAR */
2308                         if (EM_BAR_MEM_TYPE(val) == EM_BAR_MEM_TYPE_64BIT)
2309                                 rid += 4;
2310                 }
2311                 if (rid >= PCIR_CIS) {
2312                         device_printf(dev, "Unable to locate IO BAR\n");
2313                         return (ENXIO);
2314                 }
2315                 adapter->ioport = bus_alloc_resource_any(dev,
2316                     SYS_RES_IOPORT, &adapter->io_rid, RF_ACTIVE);
2317                 if (adapter->ioport == NULL) {
2318                         device_printf(dev, "Unable to allocate bus resource: "
2319                             "ioport\n");
2320                         return (ENXIO);
2321                 }
2322                 adapter->hw.io_base = 0;
2323                 adapter->osdep.io_bus_space_tag =
2324                     rman_get_bustag(adapter->ioport);
2325                 adapter->osdep.io_bus_space_handle =
2326                     rman_get_bushandle(adapter->ioport);
2327         }
2328
2329         adapter->hw.back = &adapter->osdep;
2330
2331         return (error);
2332 }
2333
2334 /*********************************************************************
2335  *
2336  *  Setup the Legacy or MSI Interrupt handler
2337  *
2338  **********************************************************************/
2339 static int
2340 lem_allocate_irq(struct adapter *adapter)
2341 {
2342         device_t dev = adapter->dev;
2343         int error, rid = 0;
2344
2345         /* Manually turn off all interrupts */
2346         E1000_WRITE_REG(&adapter->hw, E1000_IMC, 0xffffffff);
2347
2348         /* We allocate a single interrupt resource */
2349         adapter->res[0] = bus_alloc_resource_any(dev,
2350             SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE);
2351         if (adapter->res[0] == NULL) {
2352                 device_printf(dev, "Unable to allocate bus resource: "
2353                     "interrupt\n");
2354                 return (ENXIO);
2355         }
2356
2357         /* Do Legacy setup? */
2358         if (lem_use_legacy_irq) {
2359                 if ((error = bus_setup_intr(dev, adapter->res[0],
2360                     INTR_TYPE_NET | INTR_MPSAFE, NULL, lem_intr, adapter,
2361                     &adapter->tag[0])) != 0) {
2362                         device_printf(dev,
2363                             "Failed to register interrupt handler");
2364                         return (error);
2365                 }
2366                 return (0);
2367         }
2368
2369         /*
2370          * Use a Fast interrupt and the associated
2371          * deferred processing contexts.
2372          */
2373         TASK_INIT(&adapter->rxtx_task, 0, lem_handle_rxtx, adapter);
2374         TASK_INIT(&adapter->link_task, 0, lem_handle_link, adapter);
2375         adapter->tq = taskqueue_create_fast("lem_taskq", M_NOWAIT,
2376             taskqueue_thread_enqueue, &adapter->tq);
2377         taskqueue_start_threads(&adapter->tq, 1, PI_NET, "%s taskq",
2378             device_get_nameunit(adapter->dev));
2379         if ((error = bus_setup_intr(dev, adapter->res[0],
2380             INTR_TYPE_NET, lem_irq_fast, NULL, adapter,
2381             &adapter->tag[0])) != 0) {
2382                 device_printf(dev, "Failed to register fast interrupt "
2383                             "handler: %d\n", error);
2384                 taskqueue_free(adapter->tq);
2385                 adapter->tq = NULL;
2386                 return (error);
2387         }
2388         
2389         return (0);
2390 }
2391
2392
2393 static void
2394 lem_free_pci_resources(struct adapter *adapter)
2395 {
2396         device_t dev = adapter->dev;
2397
2398
2399         if (adapter->tag[0] != NULL) {
2400                 bus_teardown_intr(dev, adapter->res[0],
2401                     adapter->tag[0]);
2402                 adapter->tag[0] = NULL;
2403         }
2404
2405         if (adapter->res[0] != NULL) {
2406                 bus_release_resource(dev, SYS_RES_IRQ,
2407                     0, adapter->res[0]);
2408         }
2409
2410         if (adapter->memory != NULL)
2411                 bus_release_resource(dev, SYS_RES_MEMORY,
2412                     PCIR_BAR(0), adapter->memory);
2413
2414         if (adapter->ioport != NULL)
2415                 bus_release_resource(dev, SYS_RES_IOPORT,
2416                     adapter->io_rid, adapter->ioport);
2417 }
2418
2419
2420 /*********************************************************************
2421  *
2422  *  Initialize the hardware to a configuration
2423  *  as specified by the adapter structure.
2424  *
2425  **********************************************************************/
2426 static int
2427 lem_hardware_init(struct adapter *adapter)
2428 {
2429         device_t dev = adapter->dev;
2430         u16     rx_buffer_size;
2431
2432         INIT_DEBUGOUT("lem_hardware_init: begin");
2433
2434         /* Issue a global reset */
2435         e1000_reset_hw(&adapter->hw);
2436
2437         /* When hardware is reset, fifo_head is also reset */
2438         adapter->tx_fifo_head = 0;
2439
2440         /*
2441          * These parameters control the automatic generation (Tx) and
2442          * response (Rx) to Ethernet PAUSE frames.
2443          * - High water mark should allow for at least two frames to be
2444          *   received after sending an XOFF.
2445          * - Low water mark works best when it is very near the high water mark.
2446          *   This allows the receiver to restart by sending XON when it has
2447          *   drained a bit. Here we use an arbitary value of 1500 which will
2448          *   restart after one full frame is pulled from the buffer. There
2449          *   could be several smaller frames in the buffer and if so they will
2450          *   not trigger the XON until their total number reduces the buffer
2451          *   by 1500.
2452          * - The pause time is fairly large at 1000 x 512ns = 512 usec.
2453          */
2454         rx_buffer_size = ((E1000_READ_REG(&adapter->hw, E1000_PBA) &
2455             0xffff) << 10 );
2456
2457         adapter->hw.fc.high_water = rx_buffer_size -
2458             roundup2(adapter->max_frame_size, 1024);
2459         adapter->hw.fc.low_water = adapter->hw.fc.high_water - 1500;
2460
2461         adapter->hw.fc.pause_time = EM_FC_PAUSE_TIME;
2462         adapter->hw.fc.send_xon = TRUE;
2463
2464         /* Set Flow control, use the tunable location if sane */
2465         if ((lem_fc_setting >= 0) && (lem_fc_setting < 4))
2466                 adapter->hw.fc.requested_mode = lem_fc_setting;
2467         else
2468                 adapter->hw.fc.requested_mode = e1000_fc_none;
2469
2470         if (e1000_init_hw(&adapter->hw) < 0) {
2471                 device_printf(dev, "Hardware Initialization Failed\n");
2472                 return (EIO);
2473         }
2474
2475         e1000_check_for_link(&adapter->hw);
2476
2477         return (0);
2478 }
2479
2480 /*********************************************************************
2481  *
2482  *  Setup networking device structure and register an interface.
2483  *
2484  **********************************************************************/
2485 static int
2486 lem_setup_interface(device_t dev, struct adapter *adapter)
2487 {
2488         struct ifnet   *ifp;
2489
2490         INIT_DEBUGOUT("lem_setup_interface: begin");
2491
2492         ifp = adapter->ifp = if_alloc(IFT_ETHER);
2493         if (ifp == NULL) {
2494                 device_printf(dev, "can not allocate ifnet structure\n");
2495                 return (-1);
2496         }
2497         if_initname(ifp, device_get_name(dev), device_get_unit(dev));
2498         ifp->if_init =  lem_init;
2499         ifp->if_softc = adapter;
2500         ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
2501         ifp->if_ioctl = lem_ioctl;
2502         ifp->if_start = lem_start;
2503         IFQ_SET_MAXLEN(&ifp->if_snd, adapter->num_tx_desc - 1);
2504         ifp->if_snd.ifq_drv_maxlen = adapter->num_tx_desc - 1;
2505         IFQ_SET_READY(&ifp->if_snd);
2506
2507         ether_ifattach(ifp, adapter->hw.mac.addr);
2508
2509         ifp->if_capabilities = ifp->if_capenable = 0;
2510
2511         if (adapter->hw.mac.type >= e1000_82543) {
2512                 ifp->if_capabilities |= IFCAP_HWCSUM | IFCAP_VLAN_HWCSUM;
2513                 ifp->if_capenable |= IFCAP_HWCSUM | IFCAP_VLAN_HWCSUM;
2514         }
2515
2516         /*
2517          * Tell the upper layer(s) we support long frames.
2518          */
2519         ifp->if_data.ifi_hdrlen = sizeof(struct ether_vlan_header);
2520         ifp->if_capabilities |= IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_MTU;
2521         ifp->if_capenable |= IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_MTU;
2522
2523         /*
2524         ** Dont turn this on by default, if vlans are
2525         ** created on another pseudo device (eg. lagg)
2526         ** then vlan events are not passed thru, breaking
2527         ** operation, but with HW FILTER off it works. If
2528         ** using vlans directly on the em driver you can
2529         ** enable this and get full hardware tag filtering.
2530         */
2531         ifp->if_capabilities |= IFCAP_VLAN_HWFILTER;
2532
2533 #ifdef DEVICE_POLLING
2534         ifp->if_capabilities |= IFCAP_POLLING;
2535 #endif
2536
2537         /* Enable only WOL MAGIC by default */
2538         if (adapter->wol) {
2539                 ifp->if_capabilities |= IFCAP_WOL;
2540                 ifp->if_capenable |= IFCAP_WOL_MAGIC;
2541         }
2542                 
2543         /*
2544          * Specify the media types supported by this adapter and register
2545          * callbacks to update media and link information
2546          */
2547         ifmedia_init(&adapter->media, IFM_IMASK,
2548             lem_media_change, lem_media_status);
2549         if ((adapter->hw.phy.media_type == e1000_media_type_fiber) ||
2550             (adapter->hw.phy.media_type == e1000_media_type_internal_serdes)) {
2551                 u_char fiber_type = IFM_1000_SX;        /* default type */
2552
2553                 if (adapter->hw.mac.type == e1000_82545)
2554                         fiber_type = IFM_1000_LX;
2555                 ifmedia_add(&adapter->media, IFM_ETHER | fiber_type | IFM_FDX, 
2556                             0, NULL);
2557                 ifmedia_add(&adapter->media, IFM_ETHER | fiber_type, 0, NULL);
2558         } else {
2559                 ifmedia_add(&adapter->media, IFM_ETHER | IFM_10_T, 0, NULL);
2560                 ifmedia_add(&adapter->media, IFM_ETHER | IFM_10_T | IFM_FDX,
2561                             0, NULL);
2562                 ifmedia_add(&adapter->media, IFM_ETHER | IFM_100_TX,
2563                             0, NULL);
2564                 ifmedia_add(&adapter->media, IFM_ETHER | IFM_100_TX | IFM_FDX,
2565                             0, NULL);
2566                 if (adapter->hw.phy.type != e1000_phy_ife) {
2567                         ifmedia_add(&adapter->media,
2568                                 IFM_ETHER | IFM_1000_T | IFM_FDX, 0, NULL);
2569                         ifmedia_add(&adapter->media,
2570                                 IFM_ETHER | IFM_1000_T, 0, NULL);
2571                 }
2572         }
2573         ifmedia_add(&adapter->media, IFM_ETHER | IFM_AUTO, 0, NULL);
2574         ifmedia_set(&adapter->media, IFM_ETHER | IFM_AUTO);
2575         return (0);
2576 }
2577
2578
2579 /*********************************************************************
2580  *
2581  *  Workaround for SmartSpeed on 82541 and 82547 controllers
2582  *
2583  **********************************************************************/
2584 static void
2585 lem_smartspeed(struct adapter *adapter)
2586 {
2587         u16 phy_tmp;
2588
2589         if (adapter->link_active || (adapter->hw.phy.type != e1000_phy_igp) ||
2590             adapter->hw.mac.autoneg == 0 ||
2591             (adapter->hw.phy.autoneg_advertised & ADVERTISE_1000_FULL) == 0)
2592                 return;
2593
2594         if (adapter->smartspeed == 0) {
2595                 /* If Master/Slave config fault is asserted twice,
2596                  * we assume back-to-back */
2597                 e1000_read_phy_reg(&adapter->hw, PHY_1000T_STATUS, &phy_tmp);
2598                 if (!(phy_tmp & SR_1000T_MS_CONFIG_FAULT))
2599                         return;
2600                 e1000_read_phy_reg(&adapter->hw, PHY_1000T_STATUS, &phy_tmp);
2601                 if (phy_tmp & SR_1000T_MS_CONFIG_FAULT) {
2602                         e1000_read_phy_reg(&adapter->hw,
2603                             PHY_1000T_CTRL, &phy_tmp);
2604                         if(phy_tmp & CR_1000T_MS_ENABLE) {
2605                                 phy_tmp &= ~CR_1000T_MS_ENABLE;
2606                                 e1000_write_phy_reg(&adapter->hw,
2607                                     PHY_1000T_CTRL, phy_tmp);
2608                                 adapter->smartspeed++;
2609                                 if(adapter->hw.mac.autoneg &&
2610                                    !e1000_copper_link_autoneg(&adapter->hw) &&
2611                                    !e1000_read_phy_reg(&adapter->hw,
2612                                     PHY_CONTROL, &phy_tmp)) {
2613                                         phy_tmp |= (MII_CR_AUTO_NEG_EN |
2614                                                     MII_CR_RESTART_AUTO_NEG);
2615                                         e1000_write_phy_reg(&adapter->hw,
2616                                             PHY_CONTROL, phy_tmp);
2617                                 }
2618                         }
2619                 }
2620                 return;
2621         } else if(adapter->smartspeed == EM_SMARTSPEED_DOWNSHIFT) {
2622                 /* If still no link, perhaps using 2/3 pair cable */
2623                 e1000_read_phy_reg(&adapter->hw, PHY_1000T_CTRL, &phy_tmp);
2624                 phy_tmp |= CR_1000T_MS_ENABLE;
2625                 e1000_write_phy_reg(&adapter->hw, PHY_1000T_CTRL, phy_tmp);
2626                 if(adapter->hw.mac.autoneg &&
2627                    !e1000_copper_link_autoneg(&adapter->hw) &&
2628                    !e1000_read_phy_reg(&adapter->hw, PHY_CONTROL, &phy_tmp)) {
2629                         phy_tmp |= (MII_CR_AUTO_NEG_EN |
2630                                     MII_CR_RESTART_AUTO_NEG);
2631                         e1000_write_phy_reg(&adapter->hw, PHY_CONTROL, phy_tmp);
2632                 }
2633         }
2634         /* Restart process after EM_SMARTSPEED_MAX iterations */
2635         if(adapter->smartspeed++ == EM_SMARTSPEED_MAX)
2636                 adapter->smartspeed = 0;
2637 }
2638
2639
2640 /*
2641  * Manage DMA'able memory.
2642  */
2643 static void
2644 lem_dmamap_cb(void *arg, bus_dma_segment_t *segs, int nseg, int error)
2645 {
2646         if (error)
2647                 return;
2648         *(bus_addr_t *) arg = segs[0].ds_addr;
2649 }
2650
2651 static int
2652 lem_dma_malloc(struct adapter *adapter, bus_size_t size,
2653         struct em_dma_alloc *dma, int mapflags)
2654 {
2655         int error;
2656
2657         error = bus_dma_tag_create(bus_get_dma_tag(adapter->dev), /* parent */
2658                                 EM_DBA_ALIGN, 0,        /* alignment, bounds */
2659                                 BUS_SPACE_MAXADDR,      /* lowaddr */
2660                                 BUS_SPACE_MAXADDR,      /* highaddr */
2661                                 NULL, NULL,             /* filter, filterarg */
2662                                 size,                   /* maxsize */
2663                                 1,                      /* nsegments */
2664                                 size,                   /* maxsegsize */
2665                                 0,                      /* flags */
2666                                 NULL,                   /* lockfunc */
2667                                 NULL,                   /* lockarg */
2668                                 &dma->dma_tag);
2669         if (error) {
2670                 device_printf(adapter->dev,
2671                     "%s: bus_dma_tag_create failed: %d\n",
2672                     __func__, error);
2673                 goto fail_0;
2674         }
2675
2676         error = bus_dmamem_alloc(dma->dma_tag, (void**) &dma->dma_vaddr,
2677             BUS_DMA_NOWAIT | BUS_DMA_COHERENT, &dma->dma_map);
2678         if (error) {
2679                 device_printf(adapter->dev,
2680                     "%s: bus_dmamem_alloc(%ju) failed: %d\n",
2681                     __func__, (uintmax_t)size, error);
2682                 goto fail_2;
2683         }
2684
2685         dma->dma_paddr = 0;
2686         error = bus_dmamap_load(dma->dma_tag, dma->dma_map, dma->dma_vaddr,
2687             size, lem_dmamap_cb, &dma->dma_paddr, mapflags | BUS_DMA_NOWAIT);
2688         if (error || dma->dma_paddr == 0) {
2689                 device_printf(adapter->dev,
2690                     "%s: bus_dmamap_load failed: %d\n",
2691                     __func__, error);
2692                 goto fail_3;
2693         }
2694
2695         return (0);
2696
2697 fail_3:
2698         bus_dmamap_unload(dma->dma_tag, dma->dma_map);
2699 fail_2:
2700         bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
2701         bus_dma_tag_destroy(dma->dma_tag);
2702 fail_0:
2703         dma->dma_tag = NULL;
2704
2705         return (error);
2706 }
2707
2708 static void
2709 lem_dma_free(struct adapter *adapter, struct em_dma_alloc *dma)
2710 {
2711         if (dma->dma_tag == NULL)
2712                 return;
2713         if (dma->dma_paddr != 0) {
2714                 bus_dmamap_sync(dma->dma_tag, dma->dma_map,
2715                     BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
2716                 bus_dmamap_unload(dma->dma_tag, dma->dma_map);
2717                 dma->dma_paddr = 0;
2718         }
2719         if (dma->dma_vaddr != NULL) {
2720                 bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
2721                 dma->dma_vaddr = NULL;
2722         }
2723         bus_dma_tag_destroy(dma->dma_tag);
2724         dma->dma_tag = NULL;
2725 }
2726
2727
2728 /*********************************************************************
2729  *
2730  *  Allocate memory for tx_buffer structures. The tx_buffer stores all
2731  *  the information needed to transmit a packet on the wire.
2732  *
2733  **********************************************************************/
2734 static int
2735 lem_allocate_transmit_structures(struct adapter *adapter)
2736 {
2737         device_t dev = adapter->dev;
2738         struct em_buffer *tx_buffer;
2739         int error;
2740
2741         /*
2742          * Create DMA tags for tx descriptors
2743          */
2744         if ((error = bus_dma_tag_create(bus_get_dma_tag(dev), /* parent */
2745                                 1, 0,                   /* alignment, bounds */
2746                                 BUS_SPACE_MAXADDR,      /* lowaddr */
2747                                 BUS_SPACE_MAXADDR,      /* highaddr */
2748                                 NULL, NULL,             /* filter, filterarg */
2749                                 MCLBYTES * EM_MAX_SCATTER,      /* maxsize */
2750                                 EM_MAX_SCATTER,         /* nsegments */
2751                                 MCLBYTES,               /* maxsegsize */
2752                                 0,                      /* flags */
2753                                 NULL,                   /* lockfunc */
2754                                 NULL,                   /* lockarg */
2755                                 &adapter->txtag)) != 0) {
2756                 device_printf(dev, "Unable to allocate TX DMA tag\n");
2757                 goto fail;
2758         }
2759
2760         adapter->tx_buffer_area = malloc(sizeof(struct em_buffer) *
2761             adapter->num_tx_desc, M_DEVBUF, M_NOWAIT | M_ZERO);
2762         if (adapter->tx_buffer_area == NULL) {
2763                 device_printf(dev, "Unable to allocate tx_buffer memory\n");
2764                 error = ENOMEM;
2765                 goto fail;
2766         }
2767
2768         /* Create the descriptor buffer dma maps */
2769         for (int i = 0; i < adapter->num_tx_desc; i++) {
2770                 tx_buffer = &adapter->tx_buffer_area[i];
2771                 error = bus_dmamap_create(adapter->txtag, 0, &tx_buffer->map);
2772                 if (error != 0) {
2773                         device_printf(dev, "Unable to create TX DMA map\n");
2774                         goto fail;
2775                 }
2776                 tx_buffer->next_eop = -1;
2777         }
2778
2779         return (0);
2780 fail:
2781         lem_free_transmit_structures(adapter);
2782         return (error);
2783 }
2784
2785 /*********************************************************************
2786  *
2787  *  (Re)Initialize transmit structures.
2788  *
2789  **********************************************************************/
2790 static void
2791 lem_setup_transmit_structures(struct adapter *adapter)
2792 {
2793         struct em_buffer *tx_buffer;
2794 #ifdef DEV_NETMAP
2795         /* we are already locked */
2796         struct netmap_adapter *na = NA(adapter->ifp);
2797         struct netmap_slot *slot = netmap_reset(na, NR_TX, 0, 0);
2798 #endif /* DEV_NETMAP */
2799
2800         /* Clear the old ring contents */
2801         bzero(adapter->tx_desc_base,
2802             (sizeof(struct e1000_tx_desc)) * adapter->num_tx_desc);
2803
2804         /* Free any existing TX buffers */
2805         for (int i = 0; i < adapter->num_tx_desc; i++, tx_buffer++) {
2806                 tx_buffer = &adapter->tx_buffer_area[i];
2807                 bus_dmamap_sync(adapter->txtag, tx_buffer->map,
2808                     BUS_DMASYNC_POSTWRITE);
2809                 bus_dmamap_unload(adapter->txtag, tx_buffer->map);
2810                 m_freem(tx_buffer->m_head);
2811                 tx_buffer->m_head = NULL;
2812 #ifdef DEV_NETMAP
2813                 if (slot) {
2814                         /* the i-th NIC entry goes to slot si */
2815                         int si = netmap_idx_n2k(&na->tx_rings[0], i);
2816                         uint64_t paddr;
2817                         void *addr;
2818
2819                         addr = PNMB(na, slot + si, &paddr);
2820                         adapter->tx_desc_base[i].buffer_addr = htole64(paddr);
2821                         /* reload the map for netmap mode */
2822                         netmap_load_map(na, adapter->txtag, tx_buffer->map, addr);
2823                 }
2824 #endif /* DEV_NETMAP */
2825                 tx_buffer->next_eop = -1;
2826         }
2827
2828         /* Reset state */
2829         adapter->last_hw_offload = 0;
2830         adapter->next_avail_tx_desc = 0;
2831         adapter->next_tx_to_clean = 0;
2832         adapter->num_tx_desc_avail = adapter->num_tx_desc;
2833
2834         bus_dmamap_sync(adapter->txdma.dma_tag, adapter->txdma.dma_map,
2835             BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
2836
2837         return;
2838 }
2839
2840 /*********************************************************************
2841  *
2842  *  Enable transmit unit.
2843  *
2844  **********************************************************************/
2845 static void
2846 lem_initialize_transmit_unit(struct adapter *adapter)
2847 {
2848         u32     tctl, tipg = 0;
2849         u64     bus_addr;
2850
2851          INIT_DEBUGOUT("lem_initialize_transmit_unit: begin");
2852         /* Setup the Base and Length of the Tx Descriptor Ring */
2853         bus_addr = adapter->txdma.dma_paddr;
2854         E1000_WRITE_REG(&adapter->hw, E1000_TDLEN(0),
2855             adapter->num_tx_desc * sizeof(struct e1000_tx_desc));
2856         E1000_WRITE_REG(&adapter->hw, E1000_TDBAH(0),
2857             (u32)(bus_addr >> 32));
2858         E1000_WRITE_REG(&adapter->hw, E1000_TDBAL(0),
2859             (u32)bus_addr);
2860         /* Setup the HW Tx Head and Tail descriptor pointers */
2861         E1000_WRITE_REG(&adapter->hw, E1000_TDT(0), 0);
2862         E1000_WRITE_REG(&adapter->hw, E1000_TDH(0), 0);
2863
2864         HW_DEBUGOUT2("Base = %x, Length = %x\n",
2865             E1000_READ_REG(&adapter->hw, E1000_TDBAL(0)),
2866             E1000_READ_REG(&adapter->hw, E1000_TDLEN(0)));
2867
2868         /* Set the default values for the Tx Inter Packet Gap timer */
2869         switch (adapter->hw.mac.type) {
2870         case e1000_82542:
2871                 tipg = DEFAULT_82542_TIPG_IPGT;
2872                 tipg |= DEFAULT_82542_TIPG_IPGR1 << E1000_TIPG_IPGR1_SHIFT;
2873                 tipg |= DEFAULT_82542_TIPG_IPGR2 << E1000_TIPG_IPGR2_SHIFT;
2874                 break;
2875         default:
2876                 if ((adapter->hw.phy.media_type == e1000_media_type_fiber) ||
2877                     (adapter->hw.phy.media_type ==
2878                     e1000_media_type_internal_serdes))
2879                         tipg = DEFAULT_82543_TIPG_IPGT_FIBER;
2880                 else
2881                         tipg = DEFAULT_82543_TIPG_IPGT_COPPER;
2882                 tipg |= DEFAULT_82543_TIPG_IPGR1 << E1000_TIPG_IPGR1_SHIFT;
2883                 tipg |= DEFAULT_82543_TIPG_IPGR2 << E1000_TIPG_IPGR2_SHIFT;
2884         }
2885
2886         E1000_WRITE_REG(&adapter->hw, E1000_TIPG, tipg);
2887         E1000_WRITE_REG(&adapter->hw, E1000_TIDV, adapter->tx_int_delay.value);
2888         if(adapter->hw.mac.type >= e1000_82540)
2889                 E1000_WRITE_REG(&adapter->hw, E1000_TADV,
2890                     adapter->tx_abs_int_delay.value);
2891
2892         /* Program the Transmit Control Register */
2893         tctl = E1000_READ_REG(&adapter->hw, E1000_TCTL);
2894         tctl &= ~E1000_TCTL_CT;
2895         tctl |= (E1000_TCTL_PSP | E1000_TCTL_RTLC | E1000_TCTL_EN |
2896                    (E1000_COLLISION_THRESHOLD << E1000_CT_SHIFT));
2897
2898         /* This write will effectively turn on the transmit unit. */
2899         E1000_WRITE_REG(&adapter->hw, E1000_TCTL, tctl);
2900
2901         /* Setup Transmit Descriptor Base Settings */   
2902         adapter->txd_cmd = E1000_TXD_CMD_IFCS;
2903
2904         if (adapter->tx_int_delay.value > 0)
2905                 adapter->txd_cmd |= E1000_TXD_CMD_IDE;
2906 }
2907
2908 /*********************************************************************
2909  *
2910  *  Free all transmit related data structures.
2911  *
2912  **********************************************************************/
2913 static void
2914 lem_free_transmit_structures(struct adapter *adapter)
2915 {
2916         struct em_buffer *tx_buffer;
2917
2918         INIT_DEBUGOUT("free_transmit_structures: begin");
2919
2920         if (adapter->tx_buffer_area != NULL) {
2921                 for (int i = 0; i < adapter->num_tx_desc; i++) {
2922                         tx_buffer = &adapter->tx_buffer_area[i];
2923                         if (tx_buffer->m_head != NULL) {
2924                                 bus_dmamap_sync(adapter->txtag, tx_buffer->map,
2925                                     BUS_DMASYNC_POSTWRITE);
2926                                 bus_dmamap_unload(adapter->txtag,
2927                                     tx_buffer->map);
2928                                 m_freem(tx_buffer->m_head);
2929                                 tx_buffer->m_head = NULL;
2930                         } else if (tx_buffer->map != NULL)
2931                                 bus_dmamap_unload(adapter->txtag,
2932                                     tx_buffer->map);
2933                         if (tx_buffer->map != NULL) {
2934                                 bus_dmamap_destroy(adapter->txtag,
2935                                     tx_buffer->map);
2936                                 tx_buffer->map = NULL;
2937                         }
2938                 }
2939         }
2940         if (adapter->tx_buffer_area != NULL) {
2941                 free(adapter->tx_buffer_area, M_DEVBUF);
2942                 adapter->tx_buffer_area = NULL;
2943         }
2944         if (adapter->txtag != NULL) {
2945                 bus_dma_tag_destroy(adapter->txtag);
2946                 adapter->txtag = NULL;
2947         }
2948 }
2949
2950 /*********************************************************************
2951  *
2952  *  The offload context needs to be set when we transfer the first
2953  *  packet of a particular protocol (TCP/UDP). This routine has been
2954  *  enhanced to deal with inserted VLAN headers, and IPV6 (not complete)
2955  *
2956  *  Added back the old method of keeping the current context type
2957  *  and not setting if unnecessary, as this is reported to be a
2958  *  big performance win.  -jfv
2959  **********************************************************************/
2960 static void
2961 lem_transmit_checksum_setup(struct adapter *adapter, struct mbuf *mp,
2962     u32 *txd_upper, u32 *txd_lower)
2963 {
2964         struct e1000_context_desc *TXD = NULL;
2965         struct em_buffer *tx_buffer;
2966         struct ether_vlan_header *eh;
2967         struct ip *ip = NULL;
2968         struct ip6_hdr *ip6;
2969         int curr_txd, ehdrlen;
2970         u32 cmd, hdr_len, ip_hlen;
2971         u16 etype;
2972         u8 ipproto;
2973
2974
2975         cmd = hdr_len = ipproto = 0;
2976         *txd_upper = *txd_lower = 0;
2977         curr_txd = adapter->next_avail_tx_desc;
2978
2979         /*
2980          * Determine where frame payload starts.
2981          * Jump over vlan headers if already present,
2982          * helpful for QinQ too.
2983          */
2984         eh = mtod(mp, struct ether_vlan_header *);
2985         if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
2986                 etype = ntohs(eh->evl_proto);
2987                 ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
2988         } else {
2989                 etype = ntohs(eh->evl_encap_proto);
2990                 ehdrlen = ETHER_HDR_LEN;
2991         }
2992
2993         /*
2994          * We only support TCP/UDP for IPv4 and IPv6 for the moment.
2995          * TODO: Support SCTP too when it hits the tree.
2996          */
2997         switch (etype) {
2998         case ETHERTYPE_IP:
2999                 ip = (struct ip *)(mp->m_data + ehdrlen);
3000                 ip_hlen = ip->ip_hl << 2;
3001
3002                 /* Setup of IP header checksum. */
3003                 if (mp->m_pkthdr.csum_flags & CSUM_IP) {
3004                         /*
3005                          * Start offset for header checksum calculation.
3006                          * End offset for header checksum calculation.
3007                          * Offset of place to put the checksum.
3008                          */
3009                         TXD = (struct e1000_context_desc *)
3010                             &adapter->tx_desc_base[curr_txd];
3011                         TXD->lower_setup.ip_fields.ipcss = ehdrlen;
3012                         TXD->lower_setup.ip_fields.ipcse =
3013                             htole16(ehdrlen + ip_hlen);
3014                         TXD->lower_setup.ip_fields.ipcso =
3015                             ehdrlen + offsetof(struct ip, ip_sum);
3016                         cmd |= E1000_TXD_CMD_IP;
3017                         *txd_upper |= E1000_TXD_POPTS_IXSM << 8;
3018                 }
3019
3020                 hdr_len = ehdrlen + ip_hlen;
3021                 ipproto = ip->ip_p;
3022
3023                 break;
3024         case ETHERTYPE_IPV6:
3025                 ip6 = (struct ip6_hdr *)(mp->m_data + ehdrlen);
3026                 ip_hlen = sizeof(struct ip6_hdr); /* XXX: No header stacking. */
3027
3028                 /* IPv6 doesn't have a header checksum. */
3029
3030                 hdr_len = ehdrlen + ip_hlen;
3031                 ipproto = ip6->ip6_nxt;
3032                 break;
3033
3034         default:
3035                 return;
3036         }
3037
3038         switch (ipproto) {
3039         case IPPROTO_TCP:
3040                 if (mp->m_pkthdr.csum_flags & CSUM_TCP) {
3041                         *txd_lower = E1000_TXD_CMD_DEXT | E1000_TXD_DTYP_D;
3042                         *txd_upper |= E1000_TXD_POPTS_TXSM << 8;
3043                         /* no need for context if already set */
3044                         if (adapter->last_hw_offload == CSUM_TCP)
3045                                 return;
3046                         adapter->last_hw_offload = CSUM_TCP;
3047                         /*
3048                          * Start offset for payload checksum calculation.
3049                          * End offset for payload checksum calculation.
3050                          * Offset of place to put the checksum.
3051                          */
3052                         TXD = (struct e1000_context_desc *)
3053                             &adapter->tx_desc_base[curr_txd];
3054                         TXD->upper_setup.tcp_fields.tucss = hdr_len;
3055                         TXD->upper_setup.tcp_fields.tucse = htole16(0);
3056                         TXD->upper_setup.tcp_fields.tucso =
3057                             hdr_len + offsetof(struct tcphdr, th_sum);
3058                         cmd |= E1000_TXD_CMD_TCP;
3059                 }
3060                 break;
3061         case IPPROTO_UDP:
3062         {
3063                 if (mp->m_pkthdr.csum_flags & CSUM_UDP) {
3064                         *txd_lower = E1000_TXD_CMD_DEXT | E1000_TXD_DTYP_D;
3065                         *txd_upper |= E1000_TXD_POPTS_TXSM << 8;
3066                         /* no need for context if already set */
3067                         if (adapter->last_hw_offload == CSUM_UDP)
3068                                 return;
3069                         adapter->last_hw_offload = CSUM_UDP;
3070                         /*
3071                          * Start offset for header checksum calculation.
3072                          * End offset for header checksum calculation.
3073                          * Offset of place to put the checksum.
3074                          */
3075                         TXD = (struct e1000_context_desc *)
3076                             &adapter->tx_desc_base[curr_txd];
3077                         TXD->upper_setup.tcp_fields.tucss = hdr_len;
3078                         TXD->upper_setup.tcp_fields.tucse = htole16(0);
3079                         TXD->upper_setup.tcp_fields.tucso =
3080                             hdr_len + offsetof(struct udphdr, uh_sum);
3081                 }
3082                 /* Fall Thru */
3083         }
3084         default:
3085                 break;
3086         }
3087
3088         if (TXD == NULL)
3089                 return;
3090         TXD->tcp_seg_setup.data = htole32(0);
3091         TXD->cmd_and_length =
3092             htole32(adapter->txd_cmd | E1000_TXD_CMD_DEXT | cmd);
3093         tx_buffer = &adapter->tx_buffer_area[curr_txd];
3094         tx_buffer->m_head = NULL;
3095         tx_buffer->next_eop = -1;
3096
3097         if (++curr_txd == adapter->num_tx_desc)
3098                 curr_txd = 0;
3099
3100         adapter->num_tx_desc_avail--;
3101         adapter->next_avail_tx_desc = curr_txd;
3102 }
3103
3104
3105 /**********************************************************************
3106  *
3107  *  Examine each tx_buffer in the used queue. If the hardware is done
3108  *  processing the packet then free associated resources. The
3109  *  tx_buffer is put back on the free queue.
3110  *
3111  **********************************************************************/
3112 static void
3113 lem_txeof(struct adapter *adapter)
3114 {
3115         int first, last, done, num_avail;
3116         struct em_buffer *tx_buffer;
3117         struct e1000_tx_desc   *tx_desc, *eop_desc;
3118         struct ifnet   *ifp = adapter->ifp;
3119
3120         EM_TX_LOCK_ASSERT(adapter);
3121
3122 #ifdef DEV_NETMAP
3123         if (netmap_tx_irq(ifp, 0))
3124                 return;
3125 #endif /* DEV_NETMAP */
3126         if (adapter->num_tx_desc_avail == adapter->num_tx_desc)
3127                 return;
3128
3129         num_avail = adapter->num_tx_desc_avail;
3130         first = adapter->next_tx_to_clean;
3131         tx_desc = &adapter->tx_desc_base[first];
3132         tx_buffer = &adapter->tx_buffer_area[first];
3133         last = tx_buffer->next_eop;
3134         eop_desc = &adapter->tx_desc_base[last];
3135
3136         /*
3137          * What this does is get the index of the
3138          * first descriptor AFTER the EOP of the 
3139          * first packet, that way we can do the
3140          * simple comparison on the inner while loop.
3141          */
3142         if (++last == adapter->num_tx_desc)
3143                 last = 0;
3144         done = last;
3145
3146         bus_dmamap_sync(adapter->txdma.dma_tag, adapter->txdma.dma_map,
3147             BUS_DMASYNC_POSTREAD);
3148
3149         while (eop_desc->upper.fields.status & E1000_TXD_STAT_DD) {
3150                 /* We clean the range of the packet */
3151                 while (first != done) {
3152                         tx_desc->upper.data = 0;
3153                         tx_desc->lower.data = 0;
3154                         tx_desc->buffer_addr = 0;
3155                         ++num_avail;
3156
3157                         if (tx_buffer->m_head) {
3158                                 ifp->if_opackets++;
3159                                 bus_dmamap_sync(adapter->txtag,
3160                                     tx_buffer->map,
3161                                     BUS_DMASYNC_POSTWRITE);
3162                                 bus_dmamap_unload(adapter->txtag,
3163                                     tx_buffer->map);
3164
3165                                 m_freem(tx_buffer->m_head);
3166                                 tx_buffer->m_head = NULL;
3167                         }
3168                         tx_buffer->next_eop = -1;
3169                         adapter->watchdog_time = ticks;
3170
3171                         if (++first == adapter->num_tx_desc)
3172                                 first = 0;
3173
3174                         tx_buffer = &adapter->tx_buffer_area[first];
3175                         tx_desc = &adapter->tx_desc_base[first];
3176                 }
3177                 /* See if we can continue to the next packet */
3178                 last = tx_buffer->next_eop;
3179                 if (last != -1) {
3180                         eop_desc = &adapter->tx_desc_base[last];
3181                         /* Get new done point */
3182                         if (++last == adapter->num_tx_desc) last = 0;
3183                         done = last;
3184                 } else
3185                         break;
3186         }
3187         bus_dmamap_sync(adapter->txdma.dma_tag, adapter->txdma.dma_map,
3188             BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
3189
3190         adapter->next_tx_to_clean = first;
3191         adapter->num_tx_desc_avail = num_avail;
3192
3193 #ifdef NIC_SEND_COMBINING
3194         if ((adapter->shadow_tdt & MIT_PENDING_TDT) == MIT_PENDING_TDT) {
3195                 /* a tdt write is pending, do it */
3196                 E1000_WRITE_REG(&adapter->hw, E1000_TDT(0),
3197                         0xffff & adapter->shadow_tdt);
3198                 adapter->shadow_tdt = MIT_PENDING_INT;
3199         } else {
3200                 adapter->shadow_tdt = 0; // disable
3201         }
3202 #endif /* NIC_SEND_COMBINING */
3203         /*
3204          * If we have enough room, clear IFF_DRV_OACTIVE to
3205          * tell the stack that it is OK to send packets.
3206          * If there are no pending descriptors, clear the watchdog.
3207          */
3208         if (adapter->num_tx_desc_avail > EM_TX_CLEANUP_THRESHOLD) {                
3209                 ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
3210 #ifdef NIC_PARAVIRT
3211                 if (adapter->csb) { // XXX also csb_on ?
3212                         adapter->csb->guest_need_txkick = 2; /* acked */
3213                         // XXX memory barrier
3214                 }
3215 #endif /* NIC_PARAVIRT */
3216                 if (adapter->num_tx_desc_avail == adapter->num_tx_desc) {
3217                         adapter->watchdog_check = FALSE;
3218                         return;
3219                 } 
3220         }
3221 }
3222
3223 /*********************************************************************
3224  *
3225  *  When Link is lost sometimes there is work still in the TX ring
3226  *  which may result in a watchdog, rather than allow that we do an
3227  *  attempted cleanup and then reinit here. Note that this has been
3228  *  seens mostly with fiber adapters.
3229  *
3230  **********************************************************************/
3231 static void
3232 lem_tx_purge(struct adapter *adapter)
3233 {
3234         if ((!adapter->link_active) && (adapter->watchdog_check)) {
3235                 EM_TX_LOCK(adapter);
3236                 lem_txeof(adapter);
3237                 EM_TX_UNLOCK(adapter);
3238                 if (adapter->watchdog_check) /* Still outstanding? */
3239                         lem_init_locked(adapter);
3240         }
3241 }
3242
3243 /*********************************************************************
3244  *
3245  *  Get a buffer from system mbuf buffer pool.
3246  *
3247  **********************************************************************/
3248 static int
3249 lem_get_buf(struct adapter *adapter, int i)
3250 {
3251         struct mbuf             *m;
3252         bus_dma_segment_t       segs[1];
3253         bus_dmamap_t            map;
3254         struct em_buffer        *rx_buffer;
3255         int                     error, nsegs;
3256
3257         m = m_getcl(M_NOWAIT, MT_DATA, M_PKTHDR);
3258         if (m == NULL) {
3259                 adapter->mbuf_cluster_failed++;
3260                 return (ENOBUFS);
3261         }
3262         m->m_len = m->m_pkthdr.len = MCLBYTES;
3263
3264         if (adapter->max_frame_size <= (MCLBYTES - ETHER_ALIGN))
3265                 m_adj(m, ETHER_ALIGN);
3266
3267         /*
3268          * Using memory from the mbuf cluster pool, invoke the
3269          * bus_dma machinery to arrange the memory mapping.
3270          */
3271         error = bus_dmamap_load_mbuf_sg(adapter->rxtag,
3272             adapter->rx_sparemap, m, segs, &nsegs, BUS_DMA_NOWAIT);
3273         if (error != 0) {
3274                 m_free(m);
3275                 return (error);
3276         }
3277
3278         /* If nsegs is wrong then the stack is corrupt. */
3279         KASSERT(nsegs == 1, ("Too many segments returned!"));
3280
3281         rx_buffer = &adapter->rx_buffer_area[i];
3282         if (rx_buffer->m_head != NULL)
3283                 bus_dmamap_unload(adapter->rxtag, rx_buffer->map);
3284
3285         map = rx_buffer->map;
3286         rx_buffer->map = adapter->rx_sparemap;
3287         adapter->rx_sparemap = map;
3288         bus_dmamap_sync(adapter->rxtag, rx_buffer->map, BUS_DMASYNC_PREREAD);
3289         rx_buffer->m_head = m;
3290
3291         adapter->rx_desc_base[i].buffer_addr = htole64(segs[0].ds_addr);
3292         return (0);
3293 }
3294
3295 /*********************************************************************
3296  *
3297  *  Allocate memory for rx_buffer structures. Since we use one
3298  *  rx_buffer per received packet, the maximum number of rx_buffer's
3299  *  that we'll need is equal to the number of receive descriptors
3300  *  that we've allocated.
3301  *
3302  **********************************************************************/
3303 static int
3304 lem_allocate_receive_structures(struct adapter *adapter)
3305 {
3306         device_t dev = adapter->dev;
3307         struct em_buffer *rx_buffer;
3308         int i, error;
3309
3310         adapter->rx_buffer_area = malloc(sizeof(struct em_buffer) *
3311             adapter->num_rx_desc, M_DEVBUF, M_NOWAIT | M_ZERO);
3312         if (adapter->rx_buffer_area == NULL) {
3313                 device_printf(dev, "Unable to allocate rx_buffer memory\n");
3314                 return (ENOMEM);
3315         }
3316
3317         error = bus_dma_tag_create(bus_get_dma_tag(dev), /* parent */
3318                                 1, 0,                   /* alignment, bounds */
3319                                 BUS_SPACE_MAXADDR,      /* lowaddr */
3320                                 BUS_SPACE_MAXADDR,      /* highaddr */
3321                                 NULL, NULL,             /* filter, filterarg */
3322                                 MCLBYTES,               /* maxsize */
3323                                 1,                      /* nsegments */
3324                                 MCLBYTES,               /* maxsegsize */
3325                                 0,                      /* flags */
3326                                 NULL,                   /* lockfunc */
3327                                 NULL,                   /* lockarg */
3328                                 &adapter->rxtag);
3329         if (error) {
3330                 device_printf(dev, "%s: bus_dma_tag_create failed %d\n",
3331                     __func__, error);
3332                 goto fail;
3333         }
3334
3335         /* Create the spare map (used by getbuf) */
3336         error = bus_dmamap_create(adapter->rxtag, 0, &adapter->rx_sparemap);
3337         if (error) {
3338                 device_printf(dev, "%s: bus_dmamap_create failed: %d\n",
3339                     __func__, error);
3340                 goto fail;
3341         }
3342
3343         rx_buffer = adapter->rx_buffer_area;
3344         for (i = 0; i < adapter->num_rx_desc; i++, rx_buffer++) {
3345                 error = bus_dmamap_create(adapter->rxtag, 0, &rx_buffer->map);
3346                 if (error) {
3347                         device_printf(dev, "%s: bus_dmamap_create failed: %d\n",
3348                             __func__, error);
3349                         goto fail;
3350                 }
3351         }
3352
3353         return (0);
3354
3355 fail:
3356         lem_free_receive_structures(adapter);
3357         return (error);
3358 }
3359
3360 /*********************************************************************
3361  *
3362  *  (Re)initialize receive structures.
3363  *
3364  **********************************************************************/
3365 static int
3366 lem_setup_receive_structures(struct adapter *adapter)
3367 {
3368         struct em_buffer *rx_buffer;
3369         int i, error;
3370 #ifdef DEV_NETMAP
3371         /* we are already under lock */
3372         struct netmap_adapter *na = NA(adapter->ifp);
3373         struct netmap_slot *slot = netmap_reset(na, NR_RX, 0, 0);
3374 #endif
3375
3376         /* Reset descriptor ring */
3377         bzero(adapter->rx_desc_base,
3378             (sizeof(struct e1000_rx_desc)) * adapter->num_rx_desc);
3379
3380         /* Free current RX buffers. */
3381         rx_buffer = adapter->rx_buffer_area;
3382         for (i = 0; i < adapter->num_rx_desc; i++, rx_buffer++) {
3383                 if (rx_buffer->m_head != NULL) {
3384                         bus_dmamap_sync(adapter->rxtag, rx_buffer->map,
3385                             BUS_DMASYNC_POSTREAD);
3386                         bus_dmamap_unload(adapter->rxtag, rx_buffer->map);
3387                         m_freem(rx_buffer->m_head);
3388                         rx_buffer->m_head = NULL;
3389                 }
3390         }
3391
3392         /* Allocate new ones. */
3393         for (i = 0; i < adapter->num_rx_desc; i++) {
3394 #ifdef DEV_NETMAP
3395                 if (slot) {
3396                         /* the i-th NIC entry goes to slot si */
3397                         int si = netmap_idx_n2k(&na->rx_rings[0], i);
3398                         uint64_t paddr;
3399                         void *addr;
3400
3401                         addr = PNMB(na, slot + si, &paddr);
3402                         netmap_load_map(na, adapter->rxtag, rx_buffer->map, addr);
3403                         /* Update descriptor */
3404                         adapter->rx_desc_base[i].buffer_addr = htole64(paddr);
3405                         continue;
3406                 }
3407 #endif /* DEV_NETMAP */
3408                 error = lem_get_buf(adapter, i);
3409                 if (error)
3410                         return (error);
3411         }
3412
3413         /* Setup our descriptor pointers */
3414         adapter->next_rx_desc_to_check = 0;
3415         bus_dmamap_sync(adapter->rxdma.dma_tag, adapter->rxdma.dma_map,
3416             BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
3417
3418         return (0);
3419 }
3420
3421 /*********************************************************************
3422  *
3423  *  Enable receive unit.
3424  *
3425  **********************************************************************/
3426
3427 static void
3428 lem_initialize_receive_unit(struct adapter *adapter)
3429 {
3430         struct ifnet    *ifp = adapter->ifp;
3431         u64     bus_addr;
3432         u32     rctl, rxcsum;
3433
3434         INIT_DEBUGOUT("lem_initialize_receive_unit: begin");
3435
3436         /*
3437          * Make sure receives are disabled while setting
3438          * up the descriptor ring
3439          */
3440         rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
3441         E1000_WRITE_REG(&adapter->hw, E1000_RCTL, rctl & ~E1000_RCTL_EN);
3442
3443         if (adapter->hw.mac.type >= e1000_82540) {
3444                 E1000_WRITE_REG(&adapter->hw, E1000_RADV,
3445                     adapter->rx_abs_int_delay.value);
3446                 /*
3447                  * Set the interrupt throttling rate. Value is calculated
3448                  * as DEFAULT_ITR = 1/(MAX_INTS_PER_SEC * 256ns)
3449                  */
3450                 E1000_WRITE_REG(&adapter->hw, E1000_ITR, DEFAULT_ITR);
3451         }
3452
3453         /* Setup the Base and Length of the Rx Descriptor Ring */
3454         bus_addr = adapter->rxdma.dma_paddr;
3455         E1000_WRITE_REG(&adapter->hw, E1000_RDLEN(0),
3456             adapter->num_rx_desc * sizeof(struct e1000_rx_desc));
3457         E1000_WRITE_REG(&adapter->hw, E1000_RDBAH(0),
3458             (u32)(bus_addr >> 32));
3459         E1000_WRITE_REG(&adapter->hw, E1000_RDBAL(0),
3460             (u32)bus_addr);
3461
3462         /* Setup the Receive Control Register */
3463         rctl &= ~(3 << E1000_RCTL_MO_SHIFT);
3464         rctl |= E1000_RCTL_EN | E1000_RCTL_BAM | E1000_RCTL_LBM_NO |
3465                    E1000_RCTL_RDMTS_HALF |
3466                    (adapter->hw.mac.mc_filter_type << E1000_RCTL_MO_SHIFT);
3467
3468         /* Make sure VLAN Filters are off */
3469         rctl &= ~E1000_RCTL_VFE;
3470
3471         if (e1000_tbi_sbp_enabled_82543(&adapter->hw))
3472                 rctl |= E1000_RCTL_SBP;
3473         else
3474                 rctl &= ~E1000_RCTL_SBP;
3475
3476         switch (adapter->rx_buffer_len) {
3477         default:
3478         case 2048:
3479                 rctl |= E1000_RCTL_SZ_2048;
3480                 break;
3481         case 4096:
3482                 rctl |= E1000_RCTL_SZ_4096 |
3483                     E1000_RCTL_BSEX | E1000_RCTL_LPE;
3484                 break;
3485         case 8192:
3486                 rctl |= E1000_RCTL_SZ_8192 |
3487                     E1000_RCTL_BSEX | E1000_RCTL_LPE;
3488                 break;
3489         case 16384:
3490                 rctl |= E1000_RCTL_SZ_16384 |
3491                     E1000_RCTL_BSEX | E1000_RCTL_LPE;
3492                 break;
3493         }
3494
3495         if (ifp->if_mtu > ETHERMTU)
3496                 rctl |= E1000_RCTL_LPE;
3497         else
3498                 rctl &= ~E1000_RCTL_LPE;
3499
3500         /* Enable 82543 Receive Checksum Offload for TCP and UDP */
3501         if ((adapter->hw.mac.type >= e1000_82543) &&
3502             (ifp->if_capenable & IFCAP_RXCSUM)) {
3503                 rxcsum = E1000_READ_REG(&adapter->hw, E1000_RXCSUM);
3504                 rxcsum |= (E1000_RXCSUM_IPOFL | E1000_RXCSUM_TUOFL);
3505                 E1000_WRITE_REG(&adapter->hw, E1000_RXCSUM, rxcsum);
3506         }
3507
3508         /* Enable Receives */
3509         E1000_WRITE_REG(&adapter->hw, E1000_RCTL, rctl);
3510
3511         /*
3512          * Setup the HW Rx Head and
3513          * Tail Descriptor Pointers
3514          */
3515         E1000_WRITE_REG(&adapter->hw, E1000_RDH(0), 0);
3516         rctl = adapter->num_rx_desc - 1; /* default RDT value */
3517 #ifdef DEV_NETMAP
3518         /* preserve buffers already made available to clients */
3519         if (ifp->if_capenable & IFCAP_NETMAP)
3520                 rctl -= nm_kr_rxspace(&NA(adapter->ifp)->rx_rings[0]);
3521 #endif /* DEV_NETMAP */
3522         E1000_WRITE_REG(&adapter->hw, E1000_RDT(0), rctl);
3523
3524         return;
3525 }
3526
3527 /*********************************************************************
3528  *
3529  *  Free receive related data structures.
3530  *
3531  **********************************************************************/
3532 static void
3533 lem_free_receive_structures(struct adapter *adapter)
3534 {
3535         struct em_buffer *rx_buffer;
3536         int i;
3537
3538         INIT_DEBUGOUT("free_receive_structures: begin");
3539
3540         if (adapter->rx_sparemap) {
3541                 bus_dmamap_destroy(adapter->rxtag, adapter->rx_sparemap);
3542                 adapter->rx_sparemap = NULL;
3543         }
3544
3545         /* Cleanup any existing buffers */
3546         if (adapter->rx_buffer_area != NULL) {
3547                 rx_buffer = adapter->rx_buffer_area;
3548                 for (i = 0; i < adapter->num_rx_desc; i++, rx_buffer++) {
3549                         if (rx_buffer->m_head != NULL) {
3550                                 bus_dmamap_sync(adapter->rxtag, rx_buffer->map,
3551                                     BUS_DMASYNC_POSTREAD);
3552                                 bus_dmamap_unload(adapter->rxtag,
3553                                     rx_buffer->map);
3554                                 m_freem(rx_buffer->m_head);
3555                                 rx_buffer->m_head = NULL;
3556                         } else if (rx_buffer->map != NULL)
3557                                 bus_dmamap_unload(adapter->rxtag,
3558                                     rx_buffer->map);
3559                         if (rx_buffer->map != NULL) {
3560                                 bus_dmamap_destroy(adapter->rxtag,
3561                                     rx_buffer->map);
3562                                 rx_buffer->map = NULL;
3563                         }
3564                 }
3565         }
3566
3567         if (adapter->rx_buffer_area != NULL) {
3568                 free(adapter->rx_buffer_area, M_DEVBUF);
3569                 adapter->rx_buffer_area = NULL;
3570         }
3571
3572         if (adapter->rxtag != NULL) {
3573                 bus_dma_tag_destroy(adapter->rxtag);
3574                 adapter->rxtag = NULL;
3575         }
3576 }
3577
3578 /*********************************************************************
3579  *
3580  *  This routine executes in interrupt context. It replenishes
3581  *  the mbufs in the descriptor and sends data which has been
3582  *  dma'ed into host memory to upper layer.
3583  *
3584  *  We loop at most count times if count is > 0, or until done if
3585  *  count < 0.
3586  *  
3587  *  For polling we also now return the number of cleaned packets
3588  *********************************************************************/
3589 static bool
3590 lem_rxeof(struct adapter *adapter, int count, int *done)
3591 {
3592         struct ifnet    *ifp = adapter->ifp;
3593         struct mbuf     *mp;
3594         u8              status = 0, accept_frame = 0, eop = 0;
3595         u16             len, desc_len, prev_len_adj;
3596         int             i, rx_sent = 0;
3597         struct e1000_rx_desc   *current_desc;
3598
3599 #ifdef BATCH_DISPATCH
3600         struct mbuf *mh = NULL, *mt = NULL;
3601 #endif /* BATCH_DISPATCH */
3602 #ifdef NIC_PARAVIRT
3603         int retries = 0;
3604         struct paravirt_csb* csb = adapter->csb;
3605         int csb_mode = csb && csb->guest_csb_on;
3606
3607         //ND("clear guest_rxkick at %d", adapter->next_rx_desc_to_check);
3608         if (csb_mode && csb->guest_need_rxkick)
3609                 csb->guest_need_rxkick = 0;
3610 #endif /* NIC_PARAVIRT */
3611         EM_RX_LOCK(adapter);
3612
3613 #ifdef BATCH_DISPATCH
3614     batch_again:
3615 #endif /* BATCH_DISPATCH */
3616         i = adapter->next_rx_desc_to_check;
3617         current_desc = &adapter->rx_desc_base[i];
3618         bus_dmamap_sync(adapter->rxdma.dma_tag, adapter->rxdma.dma_map,
3619             BUS_DMASYNC_POSTREAD);
3620
3621 #ifdef DEV_NETMAP
3622         if (netmap_rx_irq(ifp, 0, &rx_sent)) {
3623                 EM_RX_UNLOCK(adapter);
3624                 return (FALSE);
3625         }
3626 #endif /* DEV_NETMAP */
3627
3628 #if 1 // XXX optimization ?
3629         if (!((current_desc->status) & E1000_RXD_STAT_DD)) {
3630                 if (done != NULL)
3631                         *done = rx_sent;
3632                 EM_RX_UNLOCK(adapter);
3633                 return (FALSE);
3634         }
3635 #endif /* 0 */
3636
3637         while (count != 0 && ifp->if_drv_flags & IFF_DRV_RUNNING) {
3638                 struct mbuf *m = NULL;
3639
3640                 status = current_desc->status;
3641                 if ((status & E1000_RXD_STAT_DD) == 0) {
3642 #ifdef NIC_PARAVIRT
3643                     if (csb_mode) {
3644                         /* buffer not ready yet. Retry a few times before giving up */
3645                         if (++retries <= adapter->rx_retries) {
3646                                 continue;
3647                         }
3648                         if (csb->guest_need_rxkick == 0) {
3649                                 // ND("set guest_rxkick at %d", adapter->next_rx_desc_to_check);
3650                                 csb->guest_need_rxkick = 1;
3651                                 // XXX memory barrier, status volatile ?
3652                                 continue; /* double check */
3653                         }
3654                     }
3655                     /* no buffer ready, give up */
3656 #endif /* NIC_PARAVIRT */
3657                         break;
3658                 }
3659 #ifdef NIC_PARAVIRT
3660                 if (csb_mode) {
3661                         if (csb->guest_need_rxkick)
3662                                 // ND("clear again guest_rxkick at %d", adapter->next_rx_desc_to_check);
3663                         csb->guest_need_rxkick = 0;
3664                         retries = 0;
3665                 }
3666 #endif /* NIC_PARAVIRT */
3667
3668                 mp = adapter->rx_buffer_area[i].m_head;
3669                 /*
3670                  * Can't defer bus_dmamap_sync(9) because TBI_ACCEPT
3671                  * needs to access the last received byte in the mbuf.
3672                  */
3673                 bus_dmamap_sync(adapter->rxtag, adapter->rx_buffer_area[i].map,
3674                     BUS_DMASYNC_POSTREAD);
3675
3676                 accept_frame = 1;
3677                 prev_len_adj = 0;
3678                 desc_len = le16toh(current_desc->length);
3679                 if (status & E1000_RXD_STAT_EOP) {
3680                         count--;
3681                         eop = 1;
3682                         if (desc_len < ETHER_CRC_LEN) {
3683                                 len = 0;
3684                                 prev_len_adj = ETHER_CRC_LEN - desc_len;
3685                         } else
3686                                 len = desc_len - ETHER_CRC_LEN;
3687                 } else {
3688                         eop = 0;
3689                         len = desc_len;
3690                 }
3691
3692                 if (current_desc->errors & E1000_RXD_ERR_FRAME_ERR_MASK) {
3693                         u8      last_byte;
3694                         u32     pkt_len = desc_len;
3695
3696                         if (adapter->fmp != NULL)
3697                                 pkt_len += adapter->fmp->m_pkthdr.len;
3698
3699                         last_byte = *(mtod(mp, caddr_t) + desc_len - 1);                        
3700                         if (TBI_ACCEPT(&adapter->hw, status,
3701                             current_desc->errors, pkt_len, last_byte,
3702                             adapter->min_frame_size, adapter->max_frame_size)) {
3703                                 e1000_tbi_adjust_stats_82543(&adapter->hw,
3704                                     &adapter->stats, pkt_len,
3705                                     adapter->hw.mac.addr,
3706                                     adapter->max_frame_size);
3707                                 if (len > 0)
3708                                         len--;
3709                         } else
3710                                 accept_frame = 0;
3711                 }
3712
3713                 if (accept_frame) {
3714                         if (lem_get_buf(adapter, i) != 0) {
3715                                 ifp->if_iqdrops++;
3716                                 goto discard;
3717                         }
3718
3719                         /* Assign correct length to the current fragment */
3720                         mp->m_len = len;
3721
3722                         if (adapter->fmp == NULL) {
3723                                 mp->m_pkthdr.len = len;
3724                                 adapter->fmp = mp; /* Store the first mbuf */
3725                                 adapter->lmp = mp;
3726                         } else {
3727                                 /* Chain mbuf's together */
3728                                 mp->m_flags &= ~M_PKTHDR;
3729                                 /*
3730                                  * Adjust length of previous mbuf in chain if
3731                                  * we received less than 4 bytes in the last
3732                                  * descriptor.
3733                                  */
3734                                 if (prev_len_adj > 0) {
3735                                         adapter->lmp->m_len -= prev_len_adj;
3736                                         adapter->fmp->m_pkthdr.len -=
3737                                             prev_len_adj;
3738                                 }
3739                                 adapter->lmp->m_next = mp;
3740                                 adapter->lmp = adapter->lmp->m_next;
3741                                 adapter->fmp->m_pkthdr.len += len;
3742                         }
3743
3744                         if (eop) {
3745                                 adapter->fmp->m_pkthdr.rcvif = ifp;
3746                                 ifp->if_ipackets++;
3747                                 lem_receive_checksum(adapter, current_desc,
3748                                     adapter->fmp);
3749 #ifndef __NO_STRICT_ALIGNMENT
3750                                 if (adapter->max_frame_size >
3751                                     (MCLBYTES - ETHER_ALIGN) &&
3752                                     lem_fixup_rx(adapter) != 0)
3753                                         goto skip;
3754 #endif
3755                                 if (status & E1000_RXD_STAT_VP) {
3756                                         adapter->fmp->m_pkthdr.ether_vtag =
3757                                             le16toh(current_desc->special);
3758                                         adapter->fmp->m_flags |= M_VLANTAG;
3759                                 }
3760 #ifndef __NO_STRICT_ALIGNMENT
3761 skip:
3762 #endif
3763                                 m = adapter->fmp;
3764                                 adapter->fmp = NULL;
3765                                 adapter->lmp = NULL;
3766                         }
3767                 } else {
3768                         adapter->dropped_pkts++;
3769 discard:
3770                         /* Reuse loaded DMA map and just update mbuf chain */
3771                         mp = adapter->rx_buffer_area[i].m_head;
3772                         mp->m_len = mp->m_pkthdr.len = MCLBYTES;
3773                         mp->m_data = mp->m_ext.ext_buf;
3774                         mp->m_next = NULL;
3775                         if (adapter->max_frame_size <=
3776                             (MCLBYTES - ETHER_ALIGN))
3777                                 m_adj(mp, ETHER_ALIGN);
3778                         if (adapter->fmp != NULL) {
3779                                 m_freem(adapter->fmp);
3780                                 adapter->fmp = NULL;
3781                                 adapter->lmp = NULL;
3782                         }
3783                         m = NULL;
3784                 }
3785
3786                 /* Zero out the receive descriptors status. */
3787                 current_desc->status = 0;
3788                 bus_dmamap_sync(adapter->rxdma.dma_tag, adapter->rxdma.dma_map,
3789                     BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
3790
3791 #ifdef NIC_PARAVIRT
3792                 if (csb_mode) {
3793                         /* the buffer at i has been already replaced by lem_get_buf()
3794                          * so it is safe to set guest_rdt = i and possibly send a kick.
3795                          * XXX see if we can optimize it later.
3796                          */
3797                         csb->guest_rdt = i;
3798                         // XXX memory barrier
3799                         if (i == csb->host_rxkick_at)
3800                                 E1000_WRITE_REG(&adapter->hw, E1000_RDT(0), i);
3801                 }
3802 #endif /* NIC_PARAVIRT */
3803                 /* Advance our pointers to the next descriptor. */
3804                 if (++i == adapter->num_rx_desc)
3805                         i = 0;
3806                 /* Call into the stack */
3807                 if (m != NULL) {
3808 #ifdef BATCH_DISPATCH
3809                     if (adapter->batch_enable) {
3810                         if (mh == NULL)
3811                                 mh = mt = m;
3812                         else
3813                                 mt->m_nextpkt = m;
3814                         mt = m;
3815                         m->m_nextpkt = NULL;
3816                         rx_sent++;
3817                         current_desc = &adapter->rx_desc_base[i];
3818                         continue;
3819                     }
3820 #endif /* BATCH_DISPATCH */
3821                         adapter->next_rx_desc_to_check = i;
3822                         EM_RX_UNLOCK(adapter);
3823                         (*ifp->if_input)(ifp, m);
3824                         EM_RX_LOCK(adapter);
3825                         rx_sent++;
3826                         i = adapter->next_rx_desc_to_check;
3827                 }
3828                 current_desc = &adapter->rx_desc_base[i];
3829         }
3830         adapter->next_rx_desc_to_check = i;
3831 #ifdef BATCH_DISPATCH
3832         if (mh) {
3833                 EM_RX_UNLOCK(adapter);
3834                 while ( (mt = mh) != NULL) {
3835                         mh = mh->m_nextpkt;
3836                         mt->m_nextpkt = NULL;
3837                         if_input(ifp, mt);
3838                 }
3839                 EM_RX_LOCK(adapter);
3840                 i = adapter->next_rx_desc_to_check; /* in case of interrupts */
3841                 if (count > 0)
3842                         goto batch_again;
3843         }
3844 #endif /* BATCH_DISPATCH */
3845
3846         /* Advance the E1000's Receive Queue #0  "Tail Pointer". */
3847         if (--i < 0)
3848                 i = adapter->num_rx_desc - 1;
3849 #ifdef NIC_PARAVIRT
3850         if (!csb_mode) /* filter out writes */
3851 #endif /* NIC_PARAVIRT */
3852         E1000_WRITE_REG(&adapter->hw, E1000_RDT(0), i);
3853         if (done != NULL)
3854                 *done = rx_sent;
3855         EM_RX_UNLOCK(adapter);
3856         return ((status & E1000_RXD_STAT_DD) ? TRUE : FALSE);
3857 }
3858
3859 #ifndef __NO_STRICT_ALIGNMENT
3860 /*
3861  * When jumbo frames are enabled we should realign entire payload on
3862  * architecures with strict alignment. This is serious design mistake of 8254x
3863  * as it nullifies DMA operations. 8254x just allows RX buffer size to be
3864  * 2048/4096/8192/16384. What we really want is 2048 - ETHER_ALIGN to align its
3865  * payload. On architecures without strict alignment restrictions 8254x still
3866  * performs unaligned memory access which would reduce the performance too.
3867  * To avoid copying over an entire frame to align, we allocate a new mbuf and
3868  * copy ethernet header to the new mbuf. The new mbuf is prepended into the
3869  * existing mbuf chain.
3870  *
3871  * Be aware, best performance of the 8254x is achived only when jumbo frame is
3872  * not used at all on architectures with strict alignment.
3873  */
3874 static int
3875 lem_fixup_rx(struct adapter *adapter)
3876 {
3877         struct mbuf *m, *n;
3878         int error;
3879
3880         error = 0;
3881         m = adapter->fmp;
3882         if (m->m_len <= (MCLBYTES - ETHER_HDR_LEN)) {
3883                 bcopy(m->m_data, m->m_data + ETHER_HDR_LEN, m->m_len);
3884                 m->m_data += ETHER_HDR_LEN;
3885         } else {
3886                 MGETHDR(n, M_NOWAIT, MT_DATA);
3887                 if (n != NULL) {
3888                         bcopy(m->m_data, n->m_data, ETHER_HDR_LEN);
3889                         m->m_data += ETHER_HDR_LEN;
3890                         m->m_len -= ETHER_HDR_LEN;
3891                         n->m_len = ETHER_HDR_LEN;
3892                         M_MOVE_PKTHDR(n, m);
3893                         n->m_next = m;
3894                         adapter->fmp = n;
3895                 } else {
3896                         adapter->dropped_pkts++;
3897                         m_freem(adapter->fmp);
3898                         adapter->fmp = NULL;
3899                         error = ENOMEM;
3900                 }
3901         }
3902
3903         return (error);
3904 }
3905 #endif
3906
3907 /*********************************************************************
3908  *
3909  *  Verify that the hardware indicated that the checksum is valid.
3910  *  Inform the stack about the status of checksum so that stack
3911  *  doesn't spend time verifying the checksum.
3912  *
3913  *********************************************************************/
3914 static void
3915 lem_receive_checksum(struct adapter *adapter,
3916             struct e1000_rx_desc *rx_desc, struct mbuf *mp)
3917 {
3918         /* 82543 or newer only */
3919         if ((adapter->hw.mac.type < e1000_82543) ||
3920             /* Ignore Checksum bit is set */
3921             (rx_desc->status & E1000_RXD_STAT_IXSM)) {
3922                 mp->m_pkthdr.csum_flags = 0;
3923                 return;
3924         }
3925
3926         if (rx_desc->status & E1000_RXD_STAT_IPCS) {
3927                 /* Did it pass? */
3928                 if (!(rx_desc->errors & E1000_RXD_ERR_IPE)) {
3929                         /* IP Checksum Good */
3930                         mp->m_pkthdr.csum_flags = CSUM_IP_CHECKED;
3931                         mp->m_pkthdr.csum_flags |= CSUM_IP_VALID;
3932
3933                 } else {
3934                         mp->m_pkthdr.csum_flags = 0;
3935                 }
3936         }
3937
3938         if (rx_desc->status & E1000_RXD_STAT_TCPCS) {
3939                 /* Did it pass? */
3940                 if (!(rx_desc->errors & E1000_RXD_ERR_TCPE)) {
3941                         mp->m_pkthdr.csum_flags |=
3942                         (CSUM_DATA_VALID | CSUM_PSEUDO_HDR);
3943                         mp->m_pkthdr.csum_data = htons(0xffff);
3944                 }
3945         }
3946 }
3947
3948 /*
3949  * This routine is run via an vlan
3950  * config EVENT
3951  */
3952 static void
3953 lem_register_vlan(void *arg, struct ifnet *ifp, u16 vtag)
3954 {
3955         struct adapter  *adapter = ifp->if_softc;
3956         u32             index, bit;
3957
3958         if (ifp->if_softc !=  arg)   /* Not our event */
3959                 return;
3960
3961         if ((vtag == 0) || (vtag > 4095))       /* Invalid ID */
3962                 return;
3963
3964         EM_CORE_LOCK(adapter);
3965         index = (vtag >> 5) & 0x7F;
3966         bit = vtag & 0x1F;
3967         adapter->shadow_vfta[index] |= (1 << bit);
3968         ++adapter->num_vlans;
3969         /* Re-init to load the changes */
3970         if (ifp->if_capenable & IFCAP_VLAN_HWFILTER)
3971                 lem_init_locked(adapter);
3972         EM_CORE_UNLOCK(adapter);
3973 }
3974
3975 /*
3976  * This routine is run via an vlan
3977  * unconfig EVENT
3978  */
3979 static void
3980 lem_unregister_vlan(void *arg, struct ifnet *ifp, u16 vtag)
3981 {
3982         struct adapter  *adapter = ifp->if_softc;
3983         u32             index, bit;
3984
3985         if (ifp->if_softc !=  arg)
3986                 return;
3987
3988         if ((vtag == 0) || (vtag > 4095))       /* Invalid */
3989                 return;
3990
3991         EM_CORE_LOCK(adapter);
3992         index = (vtag >> 5) & 0x7F;
3993         bit = vtag & 0x1F;
3994         adapter->shadow_vfta[index] &= ~(1 << bit);
3995         --adapter->num_vlans;
3996         /* Re-init to load the changes */
3997         if (ifp->if_capenable & IFCAP_VLAN_HWFILTER)
3998                 lem_init_locked(adapter);
3999         EM_CORE_UNLOCK(adapter);
4000 }
4001
4002 static void
4003 lem_setup_vlan_hw_support(struct adapter *adapter)
4004 {
4005         struct e1000_hw *hw = &adapter->hw;
4006         u32             reg;
4007
4008         /*
4009         ** We get here thru init_locked, meaning
4010         ** a soft reset, this has already cleared
4011         ** the VFTA and other state, so if there
4012         ** have been no vlan's registered do nothing.
4013         */
4014         if (adapter->num_vlans == 0)
4015                 return;
4016
4017         /*
4018         ** A soft reset zero's out the VFTA, so
4019         ** we need to repopulate it now.
4020         */
4021         for (int i = 0; i < EM_VFTA_SIZE; i++)
4022                 if (adapter->shadow_vfta[i] != 0)
4023                         E1000_WRITE_REG_ARRAY(hw, E1000_VFTA,
4024                             i, adapter->shadow_vfta[i]);
4025
4026         reg = E1000_READ_REG(hw, E1000_CTRL);
4027         reg |= E1000_CTRL_VME;
4028         E1000_WRITE_REG(hw, E1000_CTRL, reg);
4029
4030         /* Enable the Filter Table */
4031         reg = E1000_READ_REG(hw, E1000_RCTL);
4032         reg &= ~E1000_RCTL_CFIEN;
4033         reg |= E1000_RCTL_VFE;
4034         E1000_WRITE_REG(hw, E1000_RCTL, reg);
4035 }
4036
4037 static void
4038 lem_enable_intr(struct adapter *adapter)
4039 {
4040         struct e1000_hw *hw = &adapter->hw;
4041         u32 ims_mask = IMS_ENABLE_MASK;
4042
4043         E1000_WRITE_REG(hw, E1000_IMS, ims_mask);
4044 }
4045
4046 static void
4047 lem_disable_intr(struct adapter *adapter)
4048 {
4049         struct e1000_hw *hw = &adapter->hw;
4050
4051         E1000_WRITE_REG(hw, E1000_IMC, 0xffffffff);
4052 }
4053
4054 /*
4055  * Bit of a misnomer, what this really means is
4056  * to enable OS management of the system... aka
4057  * to disable special hardware management features 
4058  */
4059 static void
4060 lem_init_manageability(struct adapter *adapter)
4061 {
4062         /* A shared code workaround */
4063         if (adapter->has_manage) {
4064                 int manc = E1000_READ_REG(&adapter->hw, E1000_MANC);
4065                 /* disable hardware interception of ARP */
4066                 manc &= ~(E1000_MANC_ARP_EN);
4067                 E1000_WRITE_REG(&adapter->hw, E1000_MANC, manc);
4068         }
4069 }
4070
4071 /*
4072  * Give control back to hardware management
4073  * controller if there is one.
4074  */
4075 static void
4076 lem_release_manageability(struct adapter *adapter)
4077 {
4078         if (adapter->has_manage) {
4079                 int manc = E1000_READ_REG(&adapter->hw, E1000_MANC);
4080
4081                 /* re-enable hardware interception of ARP */
4082                 manc |= E1000_MANC_ARP_EN;
4083                 E1000_WRITE_REG(&adapter->hw, E1000_MANC, manc);
4084         }
4085 }
4086
4087 /*
4088  * lem_get_hw_control sets the {CTRL_EXT|FWSM}:DRV_LOAD bit.
4089  * For ASF and Pass Through versions of f/w this means
4090  * that the driver is loaded. For AMT version type f/w
4091  * this means that the network i/f is open.
4092  */
4093 static void
4094 lem_get_hw_control(struct adapter *adapter)
4095 {
4096         u32 ctrl_ext;
4097
4098         ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
4099         E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT,
4100             ctrl_ext | E1000_CTRL_EXT_DRV_LOAD);
4101         return;
4102 }
4103
4104 /*
4105  * lem_release_hw_control resets {CTRL_EXT|FWSM}:DRV_LOAD bit.
4106  * For ASF and Pass Through versions of f/w this means that
4107  * the driver is no longer loaded. For AMT versions of the
4108  * f/w this means that the network i/f is closed.
4109  */
4110 static void
4111 lem_release_hw_control(struct adapter *adapter)
4112 {
4113         u32 ctrl_ext;
4114
4115         if (!adapter->has_manage)
4116                 return;
4117
4118         ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
4119         E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT,
4120             ctrl_ext & ~E1000_CTRL_EXT_DRV_LOAD);
4121         return;
4122 }
4123
4124 static int
4125 lem_is_valid_ether_addr(u8 *addr)
4126 {
4127         char zero_addr[6] = { 0, 0, 0, 0, 0, 0 };
4128
4129         if ((addr[0] & 1) || (!bcmp(addr, zero_addr, ETHER_ADDR_LEN))) {
4130                 return (FALSE);
4131         }
4132
4133         return (TRUE);
4134 }
4135
4136 /*
4137 ** Parse the interface capabilities with regard
4138 ** to both system management and wake-on-lan for
4139 ** later use.
4140 */
4141 static void
4142 lem_get_wakeup(device_t dev)
4143 {
4144         struct adapter  *adapter = device_get_softc(dev);
4145         u16             eeprom_data = 0, device_id, apme_mask;
4146
4147         adapter->has_manage = e1000_enable_mng_pass_thru(&adapter->hw);
4148         apme_mask = EM_EEPROM_APME;
4149
4150         switch (adapter->hw.mac.type) {
4151         case e1000_82542:
4152         case e1000_82543:
4153                 break;
4154         case e1000_82544:
4155                 e1000_read_nvm(&adapter->hw,
4156                     NVM_INIT_CONTROL2_REG, 1, &eeprom_data);
4157                 apme_mask = EM_82544_APME;
4158                 break;
4159         case e1000_82546:
4160         case e1000_82546_rev_3:
4161                 if (adapter->hw.bus.func == 1) {
4162                         e1000_read_nvm(&adapter->hw,
4163                             NVM_INIT_CONTROL3_PORT_B, 1, &eeprom_data);
4164                         break;
4165                 } else
4166                         e1000_read_nvm(&adapter->hw,
4167                             NVM_INIT_CONTROL3_PORT_A, 1, &eeprom_data);
4168                 break;
4169         default:
4170                 e1000_read_nvm(&adapter->hw,
4171                     NVM_INIT_CONTROL3_PORT_A, 1, &eeprom_data);
4172                 break;
4173         }
4174         if (eeprom_data & apme_mask)
4175                 adapter->wol = (E1000_WUFC_MAG | E1000_WUFC_MC);
4176         /*
4177          * We have the eeprom settings, now apply the special cases
4178          * where the eeprom may be wrong or the board won't support
4179          * wake on lan on a particular port
4180          */
4181         device_id = pci_get_device(dev);
4182         switch (device_id) {
4183         case E1000_DEV_ID_82546GB_PCIE:
4184                 adapter->wol = 0;
4185                 break;
4186         case E1000_DEV_ID_82546EB_FIBER:
4187         case E1000_DEV_ID_82546GB_FIBER:
4188                 /* Wake events only supported on port A for dual fiber
4189                  * regardless of eeprom setting */
4190                 if (E1000_READ_REG(&adapter->hw, E1000_STATUS) &
4191                     E1000_STATUS_FUNC_1)
4192                         adapter->wol = 0;
4193                 break;
4194         case E1000_DEV_ID_82546GB_QUAD_COPPER_KSP3:
4195                 /* if quad port adapter, disable WoL on all but port A */
4196                 if (global_quad_port_a != 0)
4197                         adapter->wol = 0;
4198                 /* Reset for multiple quad port adapters */
4199                 if (++global_quad_port_a == 4)
4200                         global_quad_port_a = 0;
4201                 break;
4202         }
4203         return;
4204 }
4205
4206
4207 /*
4208  * Enable PCI Wake On Lan capability
4209  */
4210 static void
4211 lem_enable_wakeup(device_t dev)
4212 {
4213         struct adapter  *adapter = device_get_softc(dev);
4214         struct ifnet    *ifp = adapter->ifp;
4215         u32             pmc, ctrl, ctrl_ext, rctl;
4216         u16             status;
4217
4218         if ((pci_find_cap(dev, PCIY_PMG, &pmc) != 0))
4219                 return;
4220
4221         /* Advertise the wakeup capability */
4222         ctrl = E1000_READ_REG(&adapter->hw, E1000_CTRL);
4223         ctrl |= (E1000_CTRL_SWDPIN2 | E1000_CTRL_SWDPIN3);
4224         E1000_WRITE_REG(&adapter->hw, E1000_CTRL, ctrl);
4225         E1000_WRITE_REG(&adapter->hw, E1000_WUC, E1000_WUC_PME_EN);
4226
4227         /* Keep the laser running on Fiber adapters */
4228         if (adapter->hw.phy.media_type == e1000_media_type_fiber ||
4229             adapter->hw.phy.media_type == e1000_media_type_internal_serdes) {
4230                 ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
4231                 ctrl_ext |= E1000_CTRL_EXT_SDP3_DATA;
4232                 E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT, ctrl_ext);
4233         }
4234
4235         /*
4236         ** Determine type of Wakeup: note that wol
4237         ** is set with all bits on by default.
4238         */
4239         if ((ifp->if_capenable & IFCAP_WOL_MAGIC) == 0)
4240                 adapter->wol &= ~E1000_WUFC_MAG;
4241
4242         if ((ifp->if_capenable & IFCAP_WOL_MCAST) == 0)
4243                 adapter->wol &= ~E1000_WUFC_MC;
4244         else {
4245                 rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
4246                 rctl |= E1000_RCTL_MPE;
4247                 E1000_WRITE_REG(&adapter->hw, E1000_RCTL, rctl);
4248         }
4249
4250         if (adapter->hw.mac.type == e1000_pchlan) {
4251                 if (lem_enable_phy_wakeup(adapter))
4252                         return;
4253         } else {
4254                 E1000_WRITE_REG(&adapter->hw, E1000_WUC, E1000_WUC_PME_EN);
4255                 E1000_WRITE_REG(&adapter->hw, E1000_WUFC, adapter->wol);
4256         }
4257
4258
4259         /* Request PME */
4260         status = pci_read_config(dev, pmc + PCIR_POWER_STATUS, 2);
4261         status &= ~(PCIM_PSTAT_PME | PCIM_PSTAT_PMEENABLE);
4262         if (ifp->if_capenable & IFCAP_WOL)
4263                 status |= PCIM_PSTAT_PME | PCIM_PSTAT_PMEENABLE;
4264         pci_write_config(dev, pmc + PCIR_POWER_STATUS, status, 2);
4265
4266         return;
4267 }
4268
4269 /*
4270 ** WOL in the newer chipset interfaces (pchlan)
4271 ** require thing to be copied into the phy
4272 */
4273 static int
4274 lem_enable_phy_wakeup(struct adapter *adapter)
4275 {
4276         struct e1000_hw *hw = &adapter->hw;
4277         u32 mreg, ret = 0;
4278         u16 preg;
4279
4280         /* copy MAC RARs to PHY RARs */
4281         for (int i = 0; i < adapter->hw.mac.rar_entry_count; i++) {
4282                 mreg = E1000_READ_REG(hw, E1000_RAL(i));
4283                 e1000_write_phy_reg(hw, BM_RAR_L(i), (u16)(mreg & 0xFFFF));
4284                 e1000_write_phy_reg(hw, BM_RAR_M(i),
4285                     (u16)((mreg >> 16) & 0xFFFF));
4286                 mreg = E1000_READ_REG(hw, E1000_RAH(i));
4287                 e1000_write_phy_reg(hw, BM_RAR_H(i), (u16)(mreg & 0xFFFF));
4288                 e1000_write_phy_reg(hw, BM_RAR_CTRL(i),
4289                     (u16)((mreg >> 16) & 0xFFFF));
4290         }
4291
4292         /* copy MAC MTA to PHY MTA */
4293         for (int i = 0; i < adapter->hw.mac.mta_reg_count; i++) {
4294                 mreg = E1000_READ_REG_ARRAY(hw, E1000_MTA, i);
4295                 e1000_write_phy_reg(hw, BM_MTA(i), (u16)(mreg & 0xFFFF));
4296                 e1000_write_phy_reg(hw, BM_MTA(i) + 1,
4297                     (u16)((mreg >> 16) & 0xFFFF));
4298         }
4299
4300         /* configure PHY Rx Control register */
4301         e1000_read_phy_reg(&adapter->hw, BM_RCTL, &preg);
4302         mreg = E1000_READ_REG(hw, E1000_RCTL);
4303         if (mreg & E1000_RCTL_UPE)
4304                 preg |= BM_RCTL_UPE;
4305         if (mreg & E1000_RCTL_MPE)
4306                 preg |= BM_RCTL_MPE;
4307         preg &= ~(BM_RCTL_MO_MASK);
4308         if (mreg & E1000_RCTL_MO_3)
4309                 preg |= (((mreg & E1000_RCTL_MO_3) >> E1000_RCTL_MO_SHIFT)
4310                                 << BM_RCTL_MO_SHIFT);
4311         if (mreg & E1000_RCTL_BAM)
4312                 preg |= BM_RCTL_BAM;
4313         if (mreg & E1000_RCTL_PMCF)
4314                 preg |= BM_RCTL_PMCF;
4315         mreg = E1000_READ_REG(hw, E1000_CTRL);
4316         if (mreg & E1000_CTRL_RFCE)
4317                 preg |= BM_RCTL_RFCE;
4318         e1000_write_phy_reg(&adapter->hw, BM_RCTL, preg);
4319
4320         /* enable PHY wakeup in MAC register */
4321         E1000_WRITE_REG(hw, E1000_WUC,
4322             E1000_WUC_PHY_WAKE | E1000_WUC_PME_EN);
4323         E1000_WRITE_REG(hw, E1000_WUFC, adapter->wol);
4324
4325         /* configure and enable PHY wakeup in PHY registers */
4326         e1000_write_phy_reg(&adapter->hw, BM_WUFC, adapter->wol);
4327         e1000_write_phy_reg(&adapter->hw, BM_WUC, E1000_WUC_PME_EN);
4328
4329         /* activate PHY wakeup */
4330         ret = hw->phy.ops.acquire(hw);
4331         if (ret) {
4332                 printf("Could not acquire PHY\n");
4333                 return ret;
4334         }
4335         e1000_write_phy_reg_mdic(hw, IGP01E1000_PHY_PAGE_SELECT,
4336                                  (BM_WUC_ENABLE_PAGE << IGP_PAGE_SHIFT));
4337         ret = e1000_read_phy_reg_mdic(hw, BM_WUC_ENABLE_REG, &preg);
4338         if (ret) {
4339                 printf("Could not read PHY page 769\n");
4340                 goto out;
4341         }
4342         preg |= BM_WUC_ENABLE_BIT | BM_WUC_HOST_WU_BIT;
4343         ret = e1000_write_phy_reg_mdic(hw, BM_WUC_ENABLE_REG, preg);
4344         if (ret)
4345                 printf("Could not set PHY Host Wakeup bit\n");
4346 out:
4347         hw->phy.ops.release(hw);
4348
4349         return ret;
4350 }
4351
4352 static void
4353 lem_led_func(void *arg, int onoff)
4354 {
4355         struct adapter  *adapter = arg;
4356
4357         EM_CORE_LOCK(adapter);
4358         if (onoff) {
4359                 e1000_setup_led(&adapter->hw);
4360                 e1000_led_on(&adapter->hw);
4361         } else {
4362                 e1000_led_off(&adapter->hw);
4363                 e1000_cleanup_led(&adapter->hw);
4364         }
4365         EM_CORE_UNLOCK(adapter);
4366 }
4367
4368 /*********************************************************************
4369 * 82544 Coexistence issue workaround.
4370 *    There are 2 issues.
4371 *       1. Transmit Hang issue.
4372 *    To detect this issue, following equation can be used...
4373 *         SIZE[3:0] + ADDR[2:0] = SUM[3:0].
4374 *         If SUM[3:0] is in between 1 to 4, we will have this issue.
4375 *
4376 *       2. DAC issue.
4377 *    To detect this issue, following equation can be used...
4378 *         SIZE[3:0] + ADDR[2:0] = SUM[3:0].
4379 *         If SUM[3:0] is in between 9 to c, we will have this issue.
4380 *
4381 *
4382 *    WORKAROUND:
4383 *         Make sure we do not have ending address
4384 *         as 1,2,3,4(Hang) or 9,a,b,c (DAC)
4385 *
4386 *************************************************************************/
4387 static u32
4388 lem_fill_descriptors (bus_addr_t address, u32 length,
4389                 PDESC_ARRAY desc_array)
4390 {
4391         u32 safe_terminator;
4392
4393         /* Since issue is sensitive to length and address.*/
4394         /* Let us first check the address...*/
4395         if (length <= 4) {
4396                 desc_array->descriptor[0].address = address;
4397                 desc_array->descriptor[0].length = length;
4398                 desc_array->elements = 1;
4399                 return (desc_array->elements);
4400         }
4401         safe_terminator = (u32)((((u32)address & 0x7) +
4402             (length & 0xF)) & 0xF);
4403         /* if it does not fall between 0x1 to 0x4 and 0x9 to 0xC then return */
4404         if (safe_terminator == 0   ||
4405         (safe_terminator > 4   &&
4406         safe_terminator < 9)   ||
4407         (safe_terminator > 0xC &&
4408         safe_terminator <= 0xF)) {
4409                 desc_array->descriptor[0].address = address;
4410                 desc_array->descriptor[0].length = length;
4411                 desc_array->elements = 1;
4412                 return (desc_array->elements);
4413         }
4414
4415         desc_array->descriptor[0].address = address;
4416         desc_array->descriptor[0].length = length - 4;
4417         desc_array->descriptor[1].address = address + (length - 4);
4418         desc_array->descriptor[1].length = 4;
4419         desc_array->elements = 2;
4420         return (desc_array->elements);
4421 }
4422
4423 /**********************************************************************
4424  *
4425  *  Update the board statistics counters.
4426  *
4427  **********************************************************************/
4428 static void
4429 lem_update_stats_counters(struct adapter *adapter)
4430 {
4431         struct ifnet   *ifp;
4432
4433         if(adapter->hw.phy.media_type == e1000_media_type_copper ||
4434            (E1000_READ_REG(&adapter->hw, E1000_STATUS) & E1000_STATUS_LU)) {
4435                 adapter->stats.symerrs += E1000_READ_REG(&adapter->hw, E1000_SYMERRS);
4436                 adapter->stats.sec += E1000_READ_REG(&adapter->hw, E1000_SEC);
4437         }
4438         adapter->stats.crcerrs += E1000_READ_REG(&adapter->hw, E1000_CRCERRS);
4439         adapter->stats.mpc += E1000_READ_REG(&adapter->hw, E1000_MPC);
4440         adapter->stats.scc += E1000_READ_REG(&adapter->hw, E1000_SCC);
4441         adapter->stats.ecol += E1000_READ_REG(&adapter->hw, E1000_ECOL);
4442
4443         adapter->stats.mcc += E1000_READ_REG(&adapter->hw, E1000_MCC);
4444         adapter->stats.latecol += E1000_READ_REG(&adapter->hw, E1000_LATECOL);
4445         adapter->stats.colc += E1000_READ_REG(&adapter->hw, E1000_COLC);
4446         adapter->stats.dc += E1000_READ_REG(&adapter->hw, E1000_DC);
4447         adapter->stats.rlec += E1000_READ_REG(&adapter->hw, E1000_RLEC);
4448         adapter->stats.xonrxc += E1000_READ_REG(&adapter->hw, E1000_XONRXC);
4449         adapter->stats.xontxc += E1000_READ_REG(&adapter->hw, E1000_XONTXC);
4450         adapter->stats.xoffrxc += E1000_READ_REG(&adapter->hw, E1000_XOFFRXC);
4451         adapter->stats.xofftxc += E1000_READ_REG(&adapter->hw, E1000_XOFFTXC);
4452         adapter->stats.fcruc += E1000_READ_REG(&adapter->hw, E1000_FCRUC);
4453         adapter->stats.prc64 += E1000_READ_REG(&adapter->hw, E1000_PRC64);
4454         adapter->stats.prc127 += E1000_READ_REG(&adapter->hw, E1000_PRC127);
4455         adapter->stats.prc255 += E1000_READ_REG(&adapter->hw, E1000_PRC255);
4456         adapter->stats.prc511 += E1000_READ_REG(&adapter->hw, E1000_PRC511);
4457         adapter->stats.prc1023 += E1000_READ_REG(&adapter->hw, E1000_PRC1023);
4458         adapter->stats.prc1522 += E1000_READ_REG(&adapter->hw, E1000_PRC1522);
4459         adapter->stats.gprc += E1000_READ_REG(&adapter->hw, E1000_GPRC);
4460         adapter->stats.bprc += E1000_READ_REG(&adapter->hw, E1000_BPRC);
4461         adapter->stats.mprc += E1000_READ_REG(&adapter->hw, E1000_MPRC);
4462         adapter->stats.gptc += E1000_READ_REG(&adapter->hw, E1000_GPTC);
4463
4464         /* For the 64-bit byte counters the low dword must be read first. */
4465         /* Both registers clear on the read of the high dword */
4466
4467         adapter->stats.gorc += E1000_READ_REG(&adapter->hw, E1000_GORCL) +
4468             ((u64)E1000_READ_REG(&adapter->hw, E1000_GORCH) << 32);
4469         adapter->stats.gotc += E1000_READ_REG(&adapter->hw, E1000_GOTCL) +
4470             ((u64)E1000_READ_REG(&adapter->hw, E1000_GOTCH) << 32);
4471
4472         adapter->stats.rnbc += E1000_READ_REG(&adapter->hw, E1000_RNBC);
4473         adapter->stats.ruc += E1000_READ_REG(&adapter->hw, E1000_RUC);
4474         adapter->stats.rfc += E1000_READ_REG(&adapter->hw, E1000_RFC);
4475         adapter->stats.roc += E1000_READ_REG(&adapter->hw, E1000_ROC);
4476         adapter->stats.rjc += E1000_READ_REG(&adapter->hw, E1000_RJC);
4477
4478         adapter->stats.tor += E1000_READ_REG(&adapter->hw, E1000_TORH);
4479         adapter->stats.tot += E1000_READ_REG(&adapter->hw, E1000_TOTH);
4480
4481         adapter->stats.tpr += E1000_READ_REG(&adapter->hw, E1000_TPR);
4482         adapter->stats.tpt += E1000_READ_REG(&adapter->hw, E1000_TPT);
4483         adapter->stats.ptc64 += E1000_READ_REG(&adapter->hw, E1000_PTC64);
4484         adapter->stats.ptc127 += E1000_READ_REG(&adapter->hw, E1000_PTC127);
4485         adapter->stats.ptc255 += E1000_READ_REG(&adapter->hw, E1000_PTC255);
4486         adapter->stats.ptc511 += E1000_READ_REG(&adapter->hw, E1000_PTC511);
4487         adapter->stats.ptc1023 += E1000_READ_REG(&adapter->hw, E1000_PTC1023);
4488         adapter->stats.ptc1522 += E1000_READ_REG(&adapter->hw, E1000_PTC1522);
4489         adapter->stats.mptc += E1000_READ_REG(&adapter->hw, E1000_MPTC);
4490         adapter->stats.bptc += E1000_READ_REG(&adapter->hw, E1000_BPTC);
4491
4492         if (adapter->hw.mac.type >= e1000_82543) {
4493                 adapter->stats.algnerrc += 
4494                 E1000_READ_REG(&adapter->hw, E1000_ALGNERRC);
4495                 adapter->stats.rxerrc += 
4496                 E1000_READ_REG(&adapter->hw, E1000_RXERRC);
4497                 adapter->stats.tncrs += 
4498                 E1000_READ_REG(&adapter->hw, E1000_TNCRS);
4499                 adapter->stats.cexterr += 
4500                 E1000_READ_REG(&adapter->hw, E1000_CEXTERR);
4501                 adapter->stats.tsctc += 
4502                 E1000_READ_REG(&adapter->hw, E1000_TSCTC);
4503                 adapter->stats.tsctfc += 
4504                 E1000_READ_REG(&adapter->hw, E1000_TSCTFC);
4505         }
4506         ifp = adapter->ifp;
4507
4508         ifp->if_collisions = adapter->stats.colc;
4509
4510         /* Rx Errors */
4511         ifp->if_ierrors = adapter->dropped_pkts + adapter->stats.rxerrc +
4512             adapter->stats.crcerrs + adapter->stats.algnerrc +
4513             adapter->stats.ruc + adapter->stats.roc +
4514             adapter->stats.mpc + adapter->stats.cexterr;
4515
4516         /* Tx Errors */
4517         ifp->if_oerrors = adapter->stats.ecol +
4518             adapter->stats.latecol + adapter->watchdog_events;
4519 }
4520
4521 /* Export a single 32-bit register via a read-only sysctl. */
4522 static int
4523 lem_sysctl_reg_handler(SYSCTL_HANDLER_ARGS)
4524 {
4525         struct adapter *adapter;
4526         u_int val;
4527
4528         adapter = oidp->oid_arg1;
4529         val = E1000_READ_REG(&adapter->hw, oidp->oid_arg2);
4530         return (sysctl_handle_int(oidp, &val, 0, req));
4531 }
4532
4533 /*
4534  * Add sysctl variables, one per statistic, to the system.
4535  */
4536 static void
4537 lem_add_hw_stats(struct adapter *adapter)
4538 {
4539         device_t dev = adapter->dev;
4540
4541         struct sysctl_ctx_list *ctx = device_get_sysctl_ctx(dev);
4542         struct sysctl_oid *tree = device_get_sysctl_tree(dev);
4543         struct sysctl_oid_list *child = SYSCTL_CHILDREN(tree);
4544         struct e1000_hw_stats *stats = &adapter->stats;
4545
4546         struct sysctl_oid *stat_node;
4547         struct sysctl_oid_list *stat_list;
4548
4549         /* Driver Statistics */
4550         SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "cluster_alloc_fail", 
4551                          CTLFLAG_RD, &adapter->mbuf_cluster_failed,
4552                          "Std mbuf cluster failed");
4553         SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "mbuf_defrag_fail", 
4554                          CTLFLAG_RD, &adapter->mbuf_defrag_failed,
4555                          "Defragmenting mbuf chain failed");
4556         SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "dropped", 
4557                         CTLFLAG_RD, &adapter->dropped_pkts,
4558                         "Driver dropped packets");
4559         SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "tx_dma_fail", 
4560                         CTLFLAG_RD, &adapter->no_tx_dma_setup,
4561                         "Driver tx dma failure in xmit");
4562         SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "tx_desc_fail1",
4563                         CTLFLAG_RD, &adapter->no_tx_desc_avail1,
4564                         "Not enough tx descriptors failure in xmit");
4565         SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "tx_desc_fail2",
4566                         CTLFLAG_RD, &adapter->no_tx_desc_avail2,
4567                         "Not enough tx descriptors failure in xmit");
4568         SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "rx_overruns",
4569                         CTLFLAG_RD, &adapter->rx_overruns,
4570                         "RX overruns");
4571         SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "watchdog_timeouts",
4572                         CTLFLAG_RD, &adapter->watchdog_events,
4573                         "Watchdog timeouts");
4574
4575         SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "device_control",
4576                         CTLTYPE_UINT | CTLFLAG_RD, adapter, E1000_CTRL,
4577                         lem_sysctl_reg_handler, "IU",
4578                         "Device Control Register");
4579         SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "rx_control",
4580                         CTLTYPE_UINT | CTLFLAG_RD, adapter, E1000_RCTL,
4581                         lem_sysctl_reg_handler, "IU",
4582                         "Receiver Control Register");
4583         SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "fc_high_water",
4584                         CTLFLAG_RD, &adapter->hw.fc.high_water, 0,
4585                         "Flow Control High Watermark");
4586         SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "fc_low_water", 
4587                         CTLFLAG_RD, &adapter->hw.fc.low_water, 0,
4588                         "Flow Control Low Watermark");
4589         SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "fifo_workaround",
4590                         CTLFLAG_RD, &adapter->tx_fifo_wrk_cnt,
4591                         "TX FIFO workaround events");
4592         SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "fifo_reset",
4593                         CTLFLAG_RD, &adapter->tx_fifo_reset_cnt,
4594                         "TX FIFO resets");
4595
4596         SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "txd_head", 
4597                         CTLTYPE_UINT | CTLFLAG_RD, adapter, E1000_TDH(0),
4598                         lem_sysctl_reg_handler, "IU",
4599                         "Transmit Descriptor Head");
4600         SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "txd_tail", 
4601                         CTLTYPE_UINT | CTLFLAG_RD, adapter, E1000_TDT(0),
4602                         lem_sysctl_reg_handler, "IU",
4603                         "Transmit Descriptor Tail");
4604         SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "rxd_head", 
4605                         CTLTYPE_UINT | CTLFLAG_RD, adapter, E1000_RDH(0),
4606                         lem_sysctl_reg_handler, "IU",
4607                         "Receive Descriptor Head");
4608         SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "rxd_tail", 
4609                         CTLTYPE_UINT | CTLFLAG_RD, adapter, E1000_RDT(0),
4610                         lem_sysctl_reg_handler, "IU",
4611                         "Receive Descriptor Tail");
4612         
4613
4614         /* MAC stats get their own sub node */
4615
4616         stat_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "mac_stats", 
4617                                     CTLFLAG_RD, NULL, "Statistics");
4618         stat_list = SYSCTL_CHILDREN(stat_node);
4619
4620         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "excess_coll",
4621                         CTLFLAG_RD, &stats->ecol,
4622                         "Excessive collisions");
4623         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "single_coll",
4624                         CTLFLAG_RD, &stats->scc,
4625                         "Single collisions");
4626         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "multiple_coll",
4627                         CTLFLAG_RD, &stats->mcc,
4628                         "Multiple collisions");
4629         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "late_coll",
4630                         CTLFLAG_RD, &stats->latecol,
4631                         "Late collisions");
4632         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "collision_count",
4633                         CTLFLAG_RD, &stats->colc,
4634                         "Collision Count");
4635         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "symbol_errors",
4636                         CTLFLAG_RD, &adapter->stats.symerrs,
4637                         "Symbol Errors");
4638         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "sequence_errors",
4639                         CTLFLAG_RD, &adapter->stats.sec,
4640                         "Sequence Errors");
4641         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "defer_count",
4642                         CTLFLAG_RD, &adapter->stats.dc,
4643                         "Defer Count");
4644         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "missed_packets",
4645                         CTLFLAG_RD, &adapter->stats.mpc,
4646                         "Missed Packets");
4647         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_no_buff",
4648                         CTLFLAG_RD, &adapter->stats.rnbc,
4649                         "Receive No Buffers");
4650         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_undersize",
4651                         CTLFLAG_RD, &adapter->stats.ruc,
4652                         "Receive Undersize");
4653         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_fragmented",
4654                         CTLFLAG_RD, &adapter->stats.rfc,
4655                         "Fragmented Packets Received ");
4656         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_oversize",
4657                         CTLFLAG_RD, &adapter->stats.roc,
4658                         "Oversized Packets Received");
4659         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_jabber",
4660                         CTLFLAG_RD, &adapter->stats.rjc,
4661                         "Recevied Jabber");
4662         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_errs",
4663                         CTLFLAG_RD, &adapter->stats.rxerrc,
4664                         "Receive Errors");
4665         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "crc_errs",
4666                         CTLFLAG_RD, &adapter->stats.crcerrs,
4667                         "CRC errors");
4668         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "alignment_errs",
4669                         CTLFLAG_RD, &adapter->stats.algnerrc,
4670                         "Alignment Errors");
4671         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "coll_ext_errs",
4672                         CTLFLAG_RD, &adapter->stats.cexterr,
4673                         "Collision/Carrier extension errors");
4674         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xon_recvd",
4675                         CTLFLAG_RD, &adapter->stats.xonrxc,
4676                         "XON Received");
4677         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xon_txd",
4678                         CTLFLAG_RD, &adapter->stats.xontxc,
4679                         "XON Transmitted");
4680         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xoff_recvd",
4681                         CTLFLAG_RD, &adapter->stats.xoffrxc,
4682                         "XOFF Received");
4683         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xoff_txd",
4684                         CTLFLAG_RD, &adapter->stats.xofftxc,
4685                         "XOFF Transmitted");
4686
4687         /* Packet Reception Stats */
4688         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "total_pkts_recvd",
4689                         CTLFLAG_RD, &adapter->stats.tpr,
4690                         "Total Packets Received ");
4691         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_pkts_recvd",
4692                         CTLFLAG_RD, &adapter->stats.gprc,
4693                         "Good Packets Received");
4694         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "bcast_pkts_recvd",
4695                         CTLFLAG_RD, &adapter->stats.bprc,
4696                         "Broadcast Packets Received");
4697         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "mcast_pkts_recvd",
4698                         CTLFLAG_RD, &adapter->stats.mprc,
4699                         "Multicast Packets Received");
4700         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_64",
4701                         CTLFLAG_RD, &adapter->stats.prc64,
4702                         "64 byte frames received ");
4703         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_65_127",
4704                         CTLFLAG_RD, &adapter->stats.prc127,
4705                         "65-127 byte frames received");
4706         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_128_255",
4707                         CTLFLAG_RD, &adapter->stats.prc255,
4708                         "128-255 byte frames received");
4709         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_256_511",
4710                         CTLFLAG_RD, &adapter->stats.prc511,
4711                         "256-511 byte frames received");
4712         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_512_1023",
4713                         CTLFLAG_RD, &adapter->stats.prc1023,
4714                         "512-1023 byte frames received");
4715         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_1024_1522",
4716                         CTLFLAG_RD, &adapter->stats.prc1522,
4717                         "1023-1522 byte frames received");
4718         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_octets_recvd",
4719                         CTLFLAG_RD, &adapter->stats.gorc, 
4720                         "Good Octets Received");
4721
4722         /* Packet Transmission Stats */
4723         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_octets_txd",
4724                         CTLFLAG_RD, &adapter->stats.gotc, 
4725                         "Good Octets Transmitted"); 
4726         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "total_pkts_txd",
4727                         CTLFLAG_RD, &adapter->stats.tpt,
4728                         "Total Packets Transmitted");
4729         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_pkts_txd",
4730                         CTLFLAG_RD, &adapter->stats.gptc,
4731                         "Good Packets Transmitted");
4732         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "bcast_pkts_txd",
4733                         CTLFLAG_RD, &adapter->stats.bptc,
4734                         "Broadcast Packets Transmitted");
4735         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "mcast_pkts_txd",
4736                         CTLFLAG_RD, &adapter->stats.mptc,
4737                         "Multicast Packets Transmitted");
4738         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_64",
4739                         CTLFLAG_RD, &adapter->stats.ptc64,
4740                         "64 byte frames transmitted ");
4741         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_65_127",
4742                         CTLFLAG_RD, &adapter->stats.ptc127,
4743                         "65-127 byte frames transmitted");
4744         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_128_255",
4745                         CTLFLAG_RD, &adapter->stats.ptc255,
4746                         "128-255 byte frames transmitted");
4747         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_256_511",
4748                         CTLFLAG_RD, &adapter->stats.ptc511,
4749                         "256-511 byte frames transmitted");
4750         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_512_1023",
4751                         CTLFLAG_RD, &adapter->stats.ptc1023,
4752                         "512-1023 byte frames transmitted");
4753         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_1024_1522",
4754                         CTLFLAG_RD, &adapter->stats.ptc1522,
4755                         "1024-1522 byte frames transmitted");
4756         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tso_txd",
4757                         CTLFLAG_RD, &adapter->stats.tsctc,
4758                         "TSO Contexts Transmitted");
4759         SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tso_ctx_fail",
4760                         CTLFLAG_RD, &adapter->stats.tsctfc,
4761                         "TSO Contexts Failed");
4762 }
4763
4764 /**********************************************************************
4765  *
4766  *  This routine provides a way to dump out the adapter eeprom,
4767  *  often a useful debug/service tool. This only dumps the first
4768  *  32 words, stuff that matters is in that extent.
4769  *
4770  **********************************************************************/
4771
4772 static int
4773 lem_sysctl_nvm_info(SYSCTL_HANDLER_ARGS)
4774 {
4775         struct adapter *adapter;
4776         int error;
4777         int result;
4778
4779         result = -1;
4780         error = sysctl_handle_int(oidp, &result, 0, req);
4781
4782         if (error || !req->newptr)
4783                 return (error);
4784
4785         /*
4786          * This value will cause a hex dump of the
4787          * first 32 16-bit words of the EEPROM to
4788          * the screen.
4789          */
4790         if (result == 1) {
4791                 adapter = (struct adapter *)arg1;
4792                 lem_print_nvm_info(adapter);
4793         }
4794
4795         return (error);
4796 }
4797
4798 static void
4799 lem_print_nvm_info(struct adapter *adapter)
4800 {
4801         u16     eeprom_data;
4802         int     i, j, row = 0;
4803
4804         /* Its a bit crude, but it gets the job done */
4805         printf("\nInterface EEPROM Dump:\n");
4806         printf("Offset\n0x0000  ");
4807         for (i = 0, j = 0; i < 32; i++, j++) {
4808                 if (j == 8) { /* Make the offset block */
4809                         j = 0; ++row;
4810                         printf("\n0x00%x0  ",row);
4811                 }
4812                 e1000_read_nvm(&adapter->hw, i, 1, &eeprom_data);
4813                 printf("%04x ", eeprom_data);
4814         }
4815         printf("\n");
4816 }
4817
4818 static int
4819 lem_sysctl_int_delay(SYSCTL_HANDLER_ARGS)
4820 {
4821         struct em_int_delay_info *info;
4822         struct adapter *adapter;
4823         u32 regval;
4824         int error;
4825         int usecs;
4826         int ticks;
4827
4828         info = (struct em_int_delay_info *)arg1;
4829         usecs = info->value;
4830         error = sysctl_handle_int(oidp, &usecs, 0, req);
4831         if (error != 0 || req->newptr == NULL)
4832                 return (error);
4833         if (usecs < 0 || usecs > EM_TICKS_TO_USECS(65535))
4834                 return (EINVAL);
4835         info->value = usecs;
4836         ticks = EM_USECS_TO_TICKS(usecs);
4837         if (info->offset == E1000_ITR)  /* units are 256ns here */
4838                 ticks *= 4;
4839
4840         adapter = info->adapter;
4841         
4842         EM_CORE_LOCK(adapter);
4843         regval = E1000_READ_OFFSET(&adapter->hw, info->offset);
4844         regval = (regval & ~0xffff) | (ticks & 0xffff);
4845         /* Handle a few special cases. */
4846         switch (info->offset) {
4847         case E1000_RDTR:
4848                 break;
4849         case E1000_TIDV:
4850                 if (ticks == 0) {
4851                         adapter->txd_cmd &= ~E1000_TXD_CMD_IDE;
4852                         /* Don't write 0 into the TIDV register. */
4853                         regval++;
4854                 } else
4855                         adapter->txd_cmd |= E1000_TXD_CMD_IDE;
4856                 break;
4857         }
4858         E1000_WRITE_OFFSET(&adapter->hw, info->offset, regval);
4859         EM_CORE_UNLOCK(adapter);
4860         return (0);
4861 }
4862
4863 static void
4864 lem_add_int_delay_sysctl(struct adapter *adapter, const char *name,
4865         const char *description, struct em_int_delay_info *info,
4866         int offset, int value)
4867 {
4868         info->adapter = adapter;
4869         info->offset = offset;
4870         info->value = value;
4871         SYSCTL_ADD_PROC(device_get_sysctl_ctx(adapter->dev),
4872             SYSCTL_CHILDREN(device_get_sysctl_tree(adapter->dev)),
4873             OID_AUTO, name, CTLTYPE_INT|CTLFLAG_RW,
4874             info, 0, lem_sysctl_int_delay, "I", description);
4875 }
4876
4877 static void
4878 lem_set_flow_cntrl(struct adapter *adapter, const char *name,
4879         const char *description, int *limit, int value)
4880 {
4881         *limit = value;
4882         SYSCTL_ADD_INT(device_get_sysctl_ctx(adapter->dev),
4883             SYSCTL_CHILDREN(device_get_sysctl_tree(adapter->dev)),
4884             OID_AUTO, name, CTLFLAG_RW, limit, value, description);
4885 }
4886
4887 static void
4888 lem_add_rx_process_limit(struct adapter *adapter, const char *name,
4889         const char *description, int *limit, int value)
4890 {
4891         *limit = value;
4892         SYSCTL_ADD_INT(device_get_sysctl_ctx(adapter->dev),
4893             SYSCTL_CHILDREN(device_get_sysctl_tree(adapter->dev)),
4894             OID_AUTO, name, CTLFLAG_RW, limit, value, description);
4895 }