]> CyberLeo.Net >> Repos - FreeBSD/stable/8.git/blob - sys/dev/cxgb/cxgb_main.c
cxgb(4) MFCs: r204271,r204274,r204348,r204921,r205944,r205945,r205946,r205947,r205948...
[FreeBSD/stable/8.git] / sys / dev / cxgb / cxgb_main.c
1 /**************************************************************************
2
3 Copyright (c) 2007-2009, Chelsio Inc.
4 All rights reserved.
5
6 Redistribution and use in source and binary forms, with or without
7 modification, are permitted provided that the following conditions are met:
8
9  1. Redistributions of source code must retain the above copyright notice,
10     this list of conditions and the following disclaimer.
11
12  2. Neither the name of the Chelsio Corporation nor the names of its
13     contributors may be used to endorse or promote products derived from
14     this software without specific prior written permission.
15
16 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
17 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
20 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
21 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
22 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
23 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
24 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
25 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
26 POSSIBILITY OF SUCH DAMAGE.
27
28 ***************************************************************************/
29
30 #include <sys/cdefs.h>
31 __FBSDID("$FreeBSD$");
32
33 #include <sys/param.h>
34 #include <sys/systm.h>
35 #include <sys/kernel.h>
36 #include <sys/bus.h>
37 #include <sys/module.h>
38 #include <sys/pciio.h>
39 #include <sys/conf.h>
40 #include <machine/bus.h>
41 #include <machine/resource.h>
42 #include <sys/bus_dma.h>
43 #include <sys/ktr.h>
44 #include <sys/rman.h>
45 #include <sys/ioccom.h>
46 #include <sys/mbuf.h>
47 #include <sys/linker.h>
48 #include <sys/firmware.h>
49 #include <sys/socket.h>
50 #include <sys/sockio.h>
51 #include <sys/smp.h>
52 #include <sys/sysctl.h>
53 #include <sys/syslog.h>
54 #include <sys/queue.h>
55 #include <sys/taskqueue.h>
56 #include <sys/proc.h>
57
58 #include <net/bpf.h>
59 #include <net/ethernet.h>
60 #include <net/if.h>
61 #include <net/if_arp.h>
62 #include <net/if_dl.h>
63 #include <net/if_media.h>
64 #include <net/if_types.h>
65 #include <net/if_vlan_var.h>
66
67 #include <netinet/in_systm.h>
68 #include <netinet/in.h>
69 #include <netinet/if_ether.h>
70 #include <netinet/ip.h>
71 #include <netinet/ip.h>
72 #include <netinet/tcp.h>
73 #include <netinet/udp.h>
74
75 #include <dev/pci/pcireg.h>
76 #include <dev/pci/pcivar.h>
77 #include <dev/pci/pci_private.h>
78
79 #include <cxgb_include.h>
80
81 #ifdef PRIV_SUPPORTED
82 #include <sys/priv.h>
83 #endif
84
85 static int cxgb_setup_interrupts(adapter_t *);
86 static void cxgb_teardown_interrupts(adapter_t *);
87 static void cxgb_init(void *);
88 static int cxgb_init_locked(struct port_info *);
89 static int cxgb_uninit_locked(struct port_info *);
90 static int cxgb_uninit_synchronized(struct port_info *);
91 static int cxgb_ioctl(struct ifnet *, unsigned long, caddr_t);
92 static int cxgb_media_change(struct ifnet *);
93 static int cxgb_ifm_type(int);
94 static void cxgb_build_medialist(struct port_info *);
95 static void cxgb_media_status(struct ifnet *, struct ifmediareq *);
96 static int setup_sge_qsets(adapter_t *);
97 static void cxgb_async_intr(void *);
98 static void cxgb_ext_intr_handler(void *, int);
99 static void cxgb_tick_handler(void *, int);
100 static void cxgb_tick(void *);
101 static void setup_rss(adapter_t *sc);
102
103 /* Attachment glue for the PCI controller end of the device.  Each port of
104  * the device is attached separately, as defined later.
105  */
106 static int cxgb_controller_probe(device_t);
107 static int cxgb_controller_attach(device_t);
108 static int cxgb_controller_detach(device_t);
109 static void cxgb_free(struct adapter *);
110 static __inline void reg_block_dump(struct adapter *ap, uint8_t *buf, unsigned int start,
111     unsigned int end);
112 static void cxgb_get_regs(adapter_t *sc, struct ch_ifconf_regs *regs, uint8_t *buf);
113 static int cxgb_get_regs_len(void);
114 static int offload_open(struct port_info *pi);
115 static void touch_bars(device_t dev);
116 static int offload_close(struct t3cdev *tdev);
117 static void cxgb_update_mac_settings(struct port_info *p);
118
119 static device_method_t cxgb_controller_methods[] = {
120         DEVMETHOD(device_probe,         cxgb_controller_probe),
121         DEVMETHOD(device_attach,        cxgb_controller_attach),
122         DEVMETHOD(device_detach,        cxgb_controller_detach),
123
124         /* bus interface */
125         DEVMETHOD(bus_print_child,      bus_generic_print_child),
126         DEVMETHOD(bus_driver_added,     bus_generic_driver_added),
127
128         { 0, 0 }
129 };
130
131 static driver_t cxgb_controller_driver = {
132         "cxgbc",
133         cxgb_controller_methods,
134         sizeof(struct adapter)
135 };
136
137 static devclass_t       cxgb_controller_devclass;
138 DRIVER_MODULE(cxgbc, pci, cxgb_controller_driver, cxgb_controller_devclass, 0, 0);
139
140 /*
141  * Attachment glue for the ports.  Attachment is done directly to the
142  * controller device.
143  */
144 static int cxgb_port_probe(device_t);
145 static int cxgb_port_attach(device_t);
146 static int cxgb_port_detach(device_t);
147
148 static device_method_t cxgb_port_methods[] = {
149         DEVMETHOD(device_probe,         cxgb_port_probe),
150         DEVMETHOD(device_attach,        cxgb_port_attach),
151         DEVMETHOD(device_detach,        cxgb_port_detach),
152         { 0, 0 }
153 };
154
155 static driver_t cxgb_port_driver = {
156         "cxgb",
157         cxgb_port_methods,
158         0
159 };
160
161 static d_ioctl_t cxgb_extension_ioctl;
162 static d_open_t cxgb_extension_open;
163 static d_close_t cxgb_extension_close;
164
165 static struct cdevsw cxgb_cdevsw = {
166        .d_version =    D_VERSION,
167        .d_flags =      0,
168        .d_open =       cxgb_extension_open,
169        .d_close =      cxgb_extension_close,
170        .d_ioctl =      cxgb_extension_ioctl,
171        .d_name =       "cxgb",
172 };
173
174 static devclass_t       cxgb_port_devclass;
175 DRIVER_MODULE(cxgb, cxgbc, cxgb_port_driver, cxgb_port_devclass, 0, 0);
176
177 /*
178  * The driver uses the best interrupt scheme available on a platform in the
179  * order MSI-X, MSI, legacy pin interrupts.  This parameter determines which
180  * of these schemes the driver may consider as follows:
181  *
182  * msi = 2: choose from among all three options
183  * msi = 1 : only consider MSI and pin interrupts
184  * msi = 0: force pin interrupts
185  */
186 static int msi_allowed = 2;
187
188 TUNABLE_INT("hw.cxgb.msi_allowed", &msi_allowed);
189 SYSCTL_NODE(_hw, OID_AUTO, cxgb, CTLFLAG_RD, 0, "CXGB driver parameters");
190 SYSCTL_UINT(_hw_cxgb, OID_AUTO, msi_allowed, CTLFLAG_RDTUN, &msi_allowed, 0,
191     "MSI-X, MSI, INTx selector");
192
193 /*
194  * The driver enables offload as a default.
195  * To disable it, use ofld_disable = 1.
196  */
197 static int ofld_disable = 0;
198 TUNABLE_INT("hw.cxgb.ofld_disable", &ofld_disable);
199 SYSCTL_UINT(_hw_cxgb, OID_AUTO, ofld_disable, CTLFLAG_RDTUN, &ofld_disable, 0,
200     "disable ULP offload");
201
202 /*
203  * The driver uses an auto-queue algorithm by default.
204  * To disable it and force a single queue-set per port, use multiq = 0
205  */
206 static int multiq = 1;
207 TUNABLE_INT("hw.cxgb.multiq", &multiq);
208 SYSCTL_UINT(_hw_cxgb, OID_AUTO, multiq, CTLFLAG_RDTUN, &multiq, 0,
209     "use min(ncpus/ports, 8) queue-sets per port");
210
211 /*
212  * By default the driver will not update the firmware unless
213  * it was compiled against a newer version
214  * 
215  */
216 static int force_fw_update = 0;
217 TUNABLE_INT("hw.cxgb.force_fw_update", &force_fw_update);
218 SYSCTL_UINT(_hw_cxgb, OID_AUTO, force_fw_update, CTLFLAG_RDTUN, &force_fw_update, 0,
219     "update firmware even if up to date");
220
221 int cxgb_use_16k_clusters = -1;
222 TUNABLE_INT("hw.cxgb.use_16k_clusters", &cxgb_use_16k_clusters);
223 SYSCTL_INT(_hw_cxgb, OID_AUTO, use_16k_clusters, CTLFLAG_RDTUN,
224     &cxgb_use_16k_clusters, 0, "use 16kB clusters for the jumbo queue ");
225
226 /*
227  * Tune the size of the output queue.
228  */
229 int cxgb_snd_queue_len = IFQ_MAXLEN;
230 TUNABLE_INT("hw.cxgb.snd_queue_len", &cxgb_snd_queue_len);
231 SYSCTL_UINT(_hw_cxgb, OID_AUTO, snd_queue_len, CTLFLAG_RDTUN,
232     &cxgb_snd_queue_len, 0, "send queue size ");
233
234
235 enum {
236         MAX_TXQ_ENTRIES      = 16384,
237         MAX_CTRL_TXQ_ENTRIES = 1024,
238         MAX_RSPQ_ENTRIES     = 16384,
239         MAX_RX_BUFFERS       = 16384,
240         MAX_RX_JUMBO_BUFFERS = 16384,
241         MIN_TXQ_ENTRIES      = 4,
242         MIN_CTRL_TXQ_ENTRIES = 4,
243         MIN_RSPQ_ENTRIES     = 32,
244         MIN_FL_ENTRIES       = 32,
245         MIN_FL_JUMBO_ENTRIES = 32
246 };
247
248 struct filter_info {
249         u32 sip;
250         u32 sip_mask;
251         u32 dip;
252         u16 sport;
253         u16 dport;
254         u32 vlan:12;
255         u32 vlan_prio:3;
256         u32 mac_hit:1;
257         u32 mac_idx:4;
258         u32 mac_vld:1;
259         u32 pkt_type:2;
260         u32 report_filter_id:1;
261         u32 pass:1;
262         u32 rss:1;
263         u32 qset:3;
264         u32 locked:1;
265         u32 valid:1;
266 };
267
268 enum { FILTER_NO_VLAN_PRI = 7 };
269
270 #define EEPROM_MAGIC 0x38E2F10C
271
272 #define PORT_MASK ((1 << MAX_NPORTS) - 1)
273
274 /* Table for probing the cards.  The desc field isn't actually used */
275 struct cxgb_ident {
276         uint16_t        vendor;
277         uint16_t        device;
278         int             index;
279         char            *desc;
280 } cxgb_identifiers[] = {
281         {PCI_VENDOR_ID_CHELSIO, 0x0020, 0, "PE9000"},
282         {PCI_VENDOR_ID_CHELSIO, 0x0021, 1, "T302E"},
283         {PCI_VENDOR_ID_CHELSIO, 0x0022, 2, "T310E"},
284         {PCI_VENDOR_ID_CHELSIO, 0x0023, 3, "T320X"},
285         {PCI_VENDOR_ID_CHELSIO, 0x0024, 1, "T302X"},
286         {PCI_VENDOR_ID_CHELSIO, 0x0025, 3, "T320E"},
287         {PCI_VENDOR_ID_CHELSIO, 0x0026, 2, "T310X"},
288         {PCI_VENDOR_ID_CHELSIO, 0x0030, 2, "T3B10"},
289         {PCI_VENDOR_ID_CHELSIO, 0x0031, 3, "T3B20"},
290         {PCI_VENDOR_ID_CHELSIO, 0x0032, 1, "T3B02"},
291         {PCI_VENDOR_ID_CHELSIO, 0x0033, 4, "T3B04"},
292         {PCI_VENDOR_ID_CHELSIO, 0x0035, 6, "T3C10"},
293         {PCI_VENDOR_ID_CHELSIO, 0x0036, 3, "S320E-CR"},
294         {PCI_VENDOR_ID_CHELSIO, 0x0037, 7, "N320E-G2"},
295         {0, 0, 0, NULL}
296 };
297
298 static int set_eeprom(struct port_info *pi, const uint8_t *data, int len, int offset);
299
300
301 static __inline char
302 t3rev2char(struct adapter *adapter)
303 {
304         char rev = 'z';
305
306         switch(adapter->params.rev) {
307         case T3_REV_A:
308                 rev = 'a';
309                 break;
310         case T3_REV_B:
311         case T3_REV_B2:
312                 rev = 'b';
313                 break;
314         case T3_REV_C:
315                 rev = 'c';
316                 break;
317         }
318         return rev;
319 }
320
321 static struct cxgb_ident *
322 cxgb_get_ident(device_t dev)
323 {
324         struct cxgb_ident *id;
325
326         for (id = cxgb_identifiers; id->desc != NULL; id++) {
327                 if ((id->vendor == pci_get_vendor(dev)) &&
328                     (id->device == pci_get_device(dev))) {
329                         return (id);
330                 }
331         }
332         return (NULL);
333 }
334
335 static const struct adapter_info *
336 cxgb_get_adapter_info(device_t dev)
337 {
338         struct cxgb_ident *id;
339         const struct adapter_info *ai;
340
341         id = cxgb_get_ident(dev);
342         if (id == NULL)
343                 return (NULL);
344
345         ai = t3_get_adapter_info(id->index);
346
347         return (ai);
348 }
349
350 static int
351 cxgb_controller_probe(device_t dev)
352 {
353         const struct adapter_info *ai;
354         char *ports, buf[80];
355         int nports;
356
357         ai = cxgb_get_adapter_info(dev);
358         if (ai == NULL)
359                 return (ENXIO);
360
361         nports = ai->nports0 + ai->nports1;
362         if (nports == 1)
363                 ports = "port";
364         else
365                 ports = "ports";
366
367         snprintf(buf, sizeof(buf), "%s, %d %s", ai->desc, nports, ports);
368         device_set_desc_copy(dev, buf);
369         return (BUS_PROBE_DEFAULT);
370 }
371
372 #define FW_FNAME "cxgb_t3fw"
373 #define TPEEPROM_NAME "cxgb_t3%c_tp_eeprom"
374 #define TPSRAM_NAME "cxgb_t3%c_protocol_sram"
375
376 static int
377 upgrade_fw(adapter_t *sc)
378 {
379         const struct firmware *fw;
380         int status;
381         u32 vers;
382         
383         if ((fw = firmware_get(FW_FNAME)) == NULL)  {
384                 device_printf(sc->dev, "Could not find firmware image %s\n", FW_FNAME);
385                 return (ENOENT);
386         } else
387                 device_printf(sc->dev, "installing firmware on card\n");
388         status = t3_load_fw(sc, (const uint8_t *)fw->data, fw->datasize);
389
390         if (status != 0) {
391                 device_printf(sc->dev, "failed to install firmware: %d\n",
392                     status);
393         } else {
394                 t3_get_fw_version(sc, &vers);
395                 snprintf(&sc->fw_version[0], sizeof(sc->fw_version), "%d.%d.%d",
396                     G_FW_VERSION_MAJOR(vers), G_FW_VERSION_MINOR(vers),
397                     G_FW_VERSION_MICRO(vers));
398         }
399
400         firmware_put(fw, FIRMWARE_UNLOAD);
401
402         return (status);        
403 }
404
405 /*
406  * The cxgb_controller_attach function is responsible for the initial
407  * bringup of the device.  Its responsibilities include:
408  *
409  *  1. Determine if the device supports MSI or MSI-X.
410  *  2. Allocate bus resources so that we can access the Base Address Register
411  *  3. Create and initialize mutexes for the controller and its control
412  *     logic such as SGE and MDIO.
413  *  4. Call hardware specific setup routine for the adapter as a whole.
414  *  5. Allocate the BAR for doing MSI-X.
415  *  6. Setup the line interrupt iff MSI-X is not supported.
416  *  7. Create the driver's taskq.
417  *  8. Start one task queue service thread.
418  *  9. Check if the firmware and SRAM are up-to-date.  They will be
419  *     auto-updated later (before FULL_INIT_DONE), if required.
420  * 10. Create a child device for each MAC (port)
421  * 11. Initialize T3 private state.
422  * 12. Trigger the LED
423  * 13. Setup offload iff supported.
424  * 14. Reset/restart the tick callout.
425  * 15. Attach sysctls
426  *
427  * NOTE: Any modification or deviation from this list MUST be reflected in
428  * the above comment.  Failure to do so will result in problems on various
429  * error conditions including link flapping.
430  */
431 static int
432 cxgb_controller_attach(device_t dev)
433 {
434         device_t child;
435         const struct adapter_info *ai;
436         struct adapter *sc;
437         int i, error = 0;
438         uint32_t vers;
439         int port_qsets = 1;
440         int msi_needed, reg;
441         char buf[80];
442
443         sc = device_get_softc(dev);
444         sc->dev = dev;
445         sc->msi_count = 0;
446         ai = cxgb_get_adapter_info(dev);
447
448         /* find the PCIe link width and set max read request to 4KB*/
449         if (pci_find_extcap(dev, PCIY_EXPRESS, &reg) == 0) {
450                 uint16_t lnk, pectl;
451                 lnk = pci_read_config(dev, reg + 0x12, 2);
452                 sc->link_width = (lnk >> 4) & 0x3f;
453                 
454                 pectl = pci_read_config(dev, reg + 0x8, 2);
455                 pectl = (pectl & ~0x7000) | (5 << 12);
456                 pci_write_config(dev, reg + 0x8, pectl, 2);
457         }
458
459         if (sc->link_width != 0 && sc->link_width <= 4 &&
460             (ai->nports0 + ai->nports1) <= 2) {
461                 device_printf(sc->dev,
462                     "PCIe x%d Link, expect reduced performance\n",
463                     sc->link_width);
464         }
465
466         touch_bars(dev);
467         pci_enable_busmaster(dev);
468         /*
469          * Allocate the registers and make them available to the driver.
470          * The registers that we care about for NIC mode are in BAR 0
471          */
472         sc->regs_rid = PCIR_BAR(0);
473         if ((sc->regs_res = bus_alloc_resource_any(dev, SYS_RES_MEMORY,
474             &sc->regs_rid, RF_ACTIVE)) == NULL) {
475                 device_printf(dev, "Cannot allocate BAR region 0\n");
476                 return (ENXIO);
477         }
478         sc->udbs_rid = PCIR_BAR(2);
479         sc->udbs_res = NULL;
480         if (is_offload(sc) &&
481             ((sc->udbs_res = bus_alloc_resource_any(dev, SYS_RES_MEMORY,
482                    &sc->udbs_rid, RF_ACTIVE)) == NULL)) {
483                 device_printf(dev, "Cannot allocate BAR region 1\n");
484                 error = ENXIO;
485                 goto out;
486         }
487
488         snprintf(sc->lockbuf, ADAPTER_LOCK_NAME_LEN, "cxgb controller lock %d",
489             device_get_unit(dev));
490         ADAPTER_LOCK_INIT(sc, sc->lockbuf);
491
492         snprintf(sc->reglockbuf, ADAPTER_LOCK_NAME_LEN, "SGE reg lock %d",
493             device_get_unit(dev));
494         snprintf(sc->mdiolockbuf, ADAPTER_LOCK_NAME_LEN, "cxgb mdio lock %d",
495             device_get_unit(dev));
496         snprintf(sc->elmerlockbuf, ADAPTER_LOCK_NAME_LEN, "cxgb elmer lock %d",
497             device_get_unit(dev));
498         
499         MTX_INIT(&sc->sge.reg_lock, sc->reglockbuf, NULL, MTX_SPIN);
500         MTX_INIT(&sc->mdio_lock, sc->mdiolockbuf, NULL, MTX_DEF);
501         MTX_INIT(&sc->elmer_lock, sc->elmerlockbuf, NULL, MTX_DEF);
502         
503         sc->bt = rman_get_bustag(sc->regs_res);
504         sc->bh = rman_get_bushandle(sc->regs_res);
505         sc->mmio_len = rman_get_size(sc->regs_res);
506
507         for (i = 0; i < MAX_NPORTS; i++)
508                 sc->port[i].adapter = sc;
509
510         if (t3_prep_adapter(sc, ai, 1) < 0) {
511                 printf("prep adapter failed\n");
512                 error = ENODEV;
513                 goto out;
514         }
515         /* Allocate the BAR for doing MSI-X.  If it succeeds, try to allocate
516          * enough messages for the queue sets.  If that fails, try falling
517          * back to MSI.  If that fails, then try falling back to the legacy
518          * interrupt pin model.
519          */
520         sc->msix_regs_rid = 0x20;
521         if ((msi_allowed >= 2) &&
522             (sc->msix_regs_res = bus_alloc_resource_any(dev, SYS_RES_MEMORY,
523             &sc->msix_regs_rid, RF_ACTIVE)) != NULL) {
524
525                 if (multiq)
526                         port_qsets = min(SGE_QSETS/sc->params.nports, mp_ncpus);
527                 msi_needed = sc->msi_count = sc->params.nports * port_qsets + 1;
528
529                 if (pci_msix_count(dev) == 0 ||
530                     (error = pci_alloc_msix(dev, &sc->msi_count)) != 0 ||
531                     sc->msi_count != msi_needed) {
532                         device_printf(dev, "alloc msix failed - "
533                                       "msi_count=%d, msi_needed=%d, err=%d; "
534                                       "will try MSI\n", sc->msi_count,
535                                       msi_needed, error);
536                         sc->msi_count = 0;
537                         port_qsets = 1;
538                         pci_release_msi(dev);
539                         bus_release_resource(dev, SYS_RES_MEMORY,
540                             sc->msix_regs_rid, sc->msix_regs_res);
541                         sc->msix_regs_res = NULL;
542                 } else {
543                         sc->flags |= USING_MSIX;
544                         sc->cxgb_intr = cxgb_async_intr;
545                         device_printf(dev,
546                                       "using MSI-X interrupts (%u vectors)\n",
547                                       sc->msi_count);
548                 }
549         }
550
551         if ((msi_allowed >= 1) && (sc->msi_count == 0)) {
552                 sc->msi_count = 1;
553                 if ((error = pci_alloc_msi(dev, &sc->msi_count)) != 0) {
554                         device_printf(dev, "alloc msi failed - "
555                                       "err=%d; will try INTx\n", error);
556                         sc->msi_count = 0;
557                         port_qsets = 1;
558                         pci_release_msi(dev);
559                 } else {
560                         sc->flags |= USING_MSI;
561                         sc->cxgb_intr = t3_intr_msi;
562                         device_printf(dev, "using MSI interrupts\n");
563                 }
564         }
565         if (sc->msi_count == 0) {
566                 device_printf(dev, "using line interrupts\n");
567                 sc->cxgb_intr = t3b_intr;
568         }
569
570         /* Create a private taskqueue thread for handling driver events */
571         sc->tq = taskqueue_create("cxgb_taskq", M_NOWAIT,
572             taskqueue_thread_enqueue, &sc->tq);
573         if (sc->tq == NULL) {
574                 device_printf(dev, "failed to allocate controller task queue\n");
575                 goto out;
576         }
577
578         taskqueue_start_threads(&sc->tq, 1, PI_NET, "%s taskq",
579             device_get_nameunit(dev));
580         TASK_INIT(&sc->ext_intr_task, 0, cxgb_ext_intr_handler, sc);
581         TASK_INIT(&sc->tick_task, 0, cxgb_tick_handler, sc);
582
583         
584         /* Create a periodic callout for checking adapter status */
585         callout_init(&sc->cxgb_tick_ch, TRUE);
586         
587         if (t3_check_fw_version(sc) < 0 || force_fw_update) {
588                 /*
589                  * Warn user that a firmware update will be attempted in init.
590                  */
591                 device_printf(dev, "firmware needs to be updated to version %d.%d.%d\n",
592                     FW_VERSION_MAJOR, FW_VERSION_MINOR, FW_VERSION_MICRO);
593                 sc->flags &= ~FW_UPTODATE;
594         } else {
595                 sc->flags |= FW_UPTODATE;
596         }
597
598         if (t3_check_tpsram_version(sc) < 0) {
599                 /*
600                  * Warn user that a firmware update will be attempted in init.
601                  */
602                 device_printf(dev, "SRAM needs to be updated to version %c-%d.%d.%d\n",
603                     t3rev2char(sc), TP_VERSION_MAJOR, TP_VERSION_MINOR, TP_VERSION_MICRO);
604                 sc->flags &= ~TPS_UPTODATE;
605         } else {
606                 sc->flags |= TPS_UPTODATE;
607         }
608         
609         /*
610          * Create a child device for each MAC.  The ethernet attachment
611          * will be done in these children.
612          */     
613         for (i = 0; i < (sc)->params.nports; i++) {
614                 struct port_info *pi;
615                 
616                 if ((child = device_add_child(dev, "cxgb", -1)) == NULL) {
617                         device_printf(dev, "failed to add child port\n");
618                         error = EINVAL;
619                         goto out;
620                 }
621                 pi = &sc->port[i];
622                 pi->adapter = sc;
623                 pi->nqsets = port_qsets;
624                 pi->first_qset = i*port_qsets;
625                 pi->port_id = i;
626                 pi->tx_chan = i >= ai->nports0;
627                 pi->txpkt_intf = pi->tx_chan ? 2 * (i - ai->nports0) + 1 : 2 * i;
628                 sc->rxpkt_map[pi->txpkt_intf] = i;
629                 sc->port[i].tx_chan = i >= ai->nports0;
630                 sc->portdev[i] = child;
631                 device_set_softc(child, pi);
632         }
633         if ((error = bus_generic_attach(dev)) != 0)
634                 goto out;
635
636         /* initialize sge private state */
637         t3_sge_init_adapter(sc);
638
639         t3_led_ready(sc);
640         
641         cxgb_offload_init();
642         if (is_offload(sc)) {
643                 setbit(&sc->registered_device_map, OFFLOAD_DEVMAP_BIT);
644                 cxgb_adapter_ofld(sc);
645         }
646         error = t3_get_fw_version(sc, &vers);
647         if (error)
648                 goto out;
649
650         snprintf(&sc->fw_version[0], sizeof(sc->fw_version), "%d.%d.%d",
651             G_FW_VERSION_MAJOR(vers), G_FW_VERSION_MINOR(vers),
652             G_FW_VERSION_MICRO(vers));
653
654         snprintf(buf, sizeof(buf), "%s %sNIC\t E/C: %s S/N: %s",
655                  ai->desc, is_offload(sc) ? "R" : "",
656                  sc->params.vpd.ec, sc->params.vpd.sn);
657         device_set_desc_copy(dev, buf);
658
659         snprintf(&sc->port_types[0], sizeof(sc->port_types), "%x%x%x%x",
660                  sc->params.vpd.port_type[0], sc->params.vpd.port_type[1],
661                  sc->params.vpd.port_type[2], sc->params.vpd.port_type[3]);
662
663         device_printf(sc->dev, "Firmware Version %s\n", &sc->fw_version[0]);
664         callout_reset(&sc->cxgb_tick_ch, CXGB_TICKS(sc), cxgb_tick, sc);
665         t3_add_attach_sysctls(sc);
666 out:
667         if (error)
668                 cxgb_free(sc);
669
670         return (error);
671 }
672
673 /*
674  * The cxgb_controller_detach routine is called with the device is
675  * unloaded from the system.
676  */
677
678 static int
679 cxgb_controller_detach(device_t dev)
680 {
681         struct adapter *sc;
682
683         sc = device_get_softc(dev);
684
685         cxgb_free(sc);
686
687         return (0);
688 }
689
690 /*
691  * The cxgb_free() is called by the cxgb_controller_detach() routine
692  * to tear down the structures that were built up in
693  * cxgb_controller_attach(), and should be the final piece of work
694  * done when fully unloading the driver.
695  * 
696  *
697  *  1. Shutting down the threads started by the cxgb_controller_attach()
698  *     routine.
699  *  2. Stopping the lower level device and all callouts (cxgb_down_locked()).
700  *  3. Detaching all of the port devices created during the
701  *     cxgb_controller_attach() routine.
702  *  4. Removing the device children created via cxgb_controller_attach().
703  *  5. Releasing PCI resources associated with the device.
704  *  6. Turning off the offload support, iff it was turned on.
705  *  7. Destroying the mutexes created in cxgb_controller_attach().
706  *
707  */
708 static void
709 cxgb_free(struct adapter *sc)
710 {
711         int i;
712
713         ADAPTER_LOCK(sc);
714         sc->flags |= CXGB_SHUTDOWN;
715         ADAPTER_UNLOCK(sc);
716
717         /*
718          * Make sure all child devices are gone.
719          */
720         bus_generic_detach(sc->dev);
721         for (i = 0; i < (sc)->params.nports; i++) {
722                 if (sc->portdev[i] &&
723                     device_delete_child(sc->dev, sc->portdev[i]) != 0)
724                         device_printf(sc->dev, "failed to delete child port\n");
725         }
726
727         /*
728          * At this point, it is as if cxgb_port_detach has run on all ports, and
729          * cxgb_down has run on the adapter.  All interrupts have been silenced,
730          * all open devices have been closed.
731          */
732         KASSERT(sc->open_device_map == 0, ("%s: device(s) still open (%x)",
733                                            __func__, sc->open_device_map));
734         for (i = 0; i < sc->params.nports; i++) {
735                 KASSERT(sc->port[i].ifp == NULL, ("%s: port %i undead!",
736                                                   __func__, i));
737         }
738
739         /*
740          * Finish off the adapter's callouts.
741          */
742         callout_drain(&sc->cxgb_tick_ch);
743         callout_drain(&sc->sge_timer_ch);
744
745         /*
746          * Release resources grabbed under FULL_INIT_DONE by cxgb_up.  The
747          * sysctls are cleaned up by the kernel linker.
748          */
749         if (sc->flags & FULL_INIT_DONE) {
750                 t3_free_sge_resources(sc);
751                 sc->flags &= ~FULL_INIT_DONE;
752         }
753
754         /*
755          * Release all interrupt resources.
756          */
757         cxgb_teardown_interrupts(sc);
758         if (sc->flags & (USING_MSI | USING_MSIX)) {
759                 device_printf(sc->dev, "releasing msi message(s)\n");
760                 pci_release_msi(sc->dev);
761         } else {
762                 device_printf(sc->dev, "no msi message to release\n");
763         }
764
765         if (sc->msix_regs_res != NULL) {
766                 bus_release_resource(sc->dev, SYS_RES_MEMORY, sc->msix_regs_rid,
767                     sc->msix_regs_res);
768         }
769
770         /*
771          * Free the adapter's taskqueue.
772          */
773         if (sc->tq != NULL) {
774                 taskqueue_free(sc->tq);
775                 sc->tq = NULL;
776         }
777         
778         if (is_offload(sc)) {
779                 clrbit(&sc->registered_device_map, OFFLOAD_DEVMAP_BIT);
780                 cxgb_adapter_unofld(sc);
781         }
782
783 #ifdef notyet
784         if (sc->flags & CXGB_OFLD_INIT)
785                 cxgb_offload_deactivate(sc);
786 #endif
787         free(sc->filters, M_DEVBUF);
788         t3_sge_free(sc);
789
790         cxgb_offload_exit();
791
792         if (sc->udbs_res != NULL)
793                 bus_release_resource(sc->dev, SYS_RES_MEMORY, sc->udbs_rid,
794                     sc->udbs_res);
795
796         if (sc->regs_res != NULL)
797                 bus_release_resource(sc->dev, SYS_RES_MEMORY, sc->regs_rid,
798                     sc->regs_res);
799
800         MTX_DESTROY(&sc->mdio_lock);
801         MTX_DESTROY(&sc->sge.reg_lock);
802         MTX_DESTROY(&sc->elmer_lock);
803         ADAPTER_LOCK_DEINIT(sc);
804 }
805
806 /**
807  *      setup_sge_qsets - configure SGE Tx/Rx/response queues
808  *      @sc: the controller softc
809  *
810  *      Determines how many sets of SGE queues to use and initializes them.
811  *      We support multiple queue sets per port if we have MSI-X, otherwise
812  *      just one queue set per port.
813  */
814 static int
815 setup_sge_qsets(adapter_t *sc)
816 {
817         int i, j, err, irq_idx = 0, qset_idx = 0;
818         u_int ntxq = SGE_TXQ_PER_SET;
819
820         if ((err = t3_sge_alloc(sc)) != 0) {
821                 device_printf(sc->dev, "t3_sge_alloc returned %d\n", err);
822                 return (err);
823         }
824
825         if (sc->params.rev > 0 && !(sc->flags & USING_MSI))
826                 irq_idx = -1;
827
828         for (i = 0; i < (sc)->params.nports; i++) {
829                 struct port_info *pi = &sc->port[i];
830
831                 for (j = 0; j < pi->nqsets; j++, qset_idx++) {
832                         err = t3_sge_alloc_qset(sc, qset_idx, (sc)->params.nports,
833                             (sc->flags & USING_MSIX) ? qset_idx + 1 : irq_idx,
834                             &sc->params.sge.qset[qset_idx], ntxq, pi);
835                         if (err) {
836                                 t3_free_sge_resources(sc);
837                                 device_printf(sc->dev, "t3_sge_alloc_qset failed with %d\n",
838                                     err);
839                                 return (err);
840                         }
841                 }
842         }
843
844         return (0);
845 }
846
847 static void
848 cxgb_teardown_interrupts(adapter_t *sc)
849 {
850         int i;
851
852         for (i = 0; i < SGE_QSETS; i++) {
853                 if (sc->msix_intr_tag[i] == NULL) {
854
855                         /* Should have been setup fully or not at all */
856                         KASSERT(sc->msix_irq_res[i] == NULL &&
857                                 sc->msix_irq_rid[i] == 0,
858                                 ("%s: half-done interrupt (%d).", __func__, i));
859
860                         continue;
861                 }
862
863                 bus_teardown_intr(sc->dev, sc->msix_irq_res[i],
864                                   sc->msix_intr_tag[i]);
865                 bus_release_resource(sc->dev, SYS_RES_IRQ, sc->msix_irq_rid[i],
866                                      sc->msix_irq_res[i]);
867
868                 sc->msix_irq_res[i] = sc->msix_intr_tag[i] = NULL;
869                 sc->msix_irq_rid[i] = 0;
870         }
871
872         if (sc->intr_tag) {
873                 KASSERT(sc->irq_res != NULL,
874                         ("%s: half-done interrupt.", __func__));
875
876                 bus_teardown_intr(sc->dev, sc->irq_res, sc->intr_tag);
877                 bus_release_resource(sc->dev, SYS_RES_IRQ, sc->irq_rid,
878                                      sc->irq_res);
879
880                 sc->irq_res = sc->intr_tag = NULL;
881                 sc->irq_rid = 0;
882         }
883 }
884
885 static int
886 cxgb_setup_interrupts(adapter_t *sc)
887 {
888         struct resource *res;
889         void *tag;
890         int i, rid, err, intr_flag = sc->flags & (USING_MSI | USING_MSIX);
891
892         sc->irq_rid = intr_flag ? 1 : 0;
893         sc->irq_res = bus_alloc_resource_any(sc->dev, SYS_RES_IRQ, &sc->irq_rid,
894                                              RF_SHAREABLE | RF_ACTIVE);
895         if (sc->irq_res == NULL) {
896                 device_printf(sc->dev, "Cannot allocate interrupt (%x, %u)\n",
897                               intr_flag, sc->irq_rid);
898                 err = EINVAL;
899                 sc->irq_rid = 0;
900         } else {
901                 err = bus_setup_intr(sc->dev, sc->irq_res,
902                     INTR_MPSAFE | INTR_TYPE_NET, NULL,
903                     sc->cxgb_intr, sc, &sc->intr_tag);
904
905                 if (err) {
906                         device_printf(sc->dev,
907                                       "Cannot set up interrupt (%x, %u, %d)\n",
908                                       intr_flag, sc->irq_rid, err);
909                         bus_release_resource(sc->dev, SYS_RES_IRQ, sc->irq_rid,
910                                              sc->irq_res);
911                         sc->irq_res = sc->intr_tag = NULL;
912                         sc->irq_rid = 0;
913                 }
914         }
915
916         /* That's all for INTx or MSI */
917         if (!(intr_flag & USING_MSIX) || err)
918                 return (err);
919
920         for (i = 0; i < sc->msi_count - 1; i++) {
921                 rid = i + 2;
922                 res = bus_alloc_resource_any(sc->dev, SYS_RES_IRQ, &rid,
923                                              RF_SHAREABLE | RF_ACTIVE);
924                 if (res == NULL) {
925                         device_printf(sc->dev, "Cannot allocate interrupt "
926                                       "for message %d\n", rid);
927                         err = EINVAL;
928                         break;
929                 }
930
931                 err = bus_setup_intr(sc->dev, res, INTR_MPSAFE | INTR_TYPE_NET,
932                                      NULL, t3_intr_msix, &sc->sge.qs[i], &tag);
933                 if (err) {
934                         device_printf(sc->dev, "Cannot set up interrupt "
935                                       "for message %d (%d)\n", rid, err);
936                         bus_release_resource(sc->dev, SYS_RES_IRQ, rid, res);
937                         break;
938                 }
939
940                 sc->msix_irq_rid[i] = rid;
941                 sc->msix_irq_res[i] = res;
942                 sc->msix_intr_tag[i] = tag;
943         }
944
945         if (err)
946                 cxgb_teardown_interrupts(sc);
947
948         return (err);
949 }
950
951
952 static int
953 cxgb_port_probe(device_t dev)
954 {
955         struct port_info *p;
956         char buf[80];
957         const char *desc;
958         
959         p = device_get_softc(dev);
960         desc = p->phy.desc;
961         snprintf(buf, sizeof(buf), "Port %d %s", p->port_id, desc);
962         device_set_desc_copy(dev, buf);
963         return (0);
964 }
965
966
967 static int
968 cxgb_makedev(struct port_info *pi)
969 {
970         
971         pi->port_cdev = make_dev(&cxgb_cdevsw, pi->ifp->if_dunit,
972             UID_ROOT, GID_WHEEL, 0600, if_name(pi->ifp));
973         
974         if (pi->port_cdev == NULL)
975                 return (ENOMEM);
976
977         pi->port_cdev->si_drv1 = (void *)pi;
978         
979         return (0);
980 }
981
982 #define CXGB_CAP (IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_MTU | IFCAP_HWCSUM | \
983     IFCAP_VLAN_HWCSUM | IFCAP_TSO | IFCAP_JUMBO_MTU | IFCAP_LRO | \
984     IFCAP_VLAN_HWTSO)
985 #define CXGB_CAP_ENABLE (CXGB_CAP & ~IFCAP_TSO6)
986
987 static int
988 cxgb_port_attach(device_t dev)
989 {
990         struct port_info *p;
991         struct ifnet *ifp;
992         int err;
993         struct adapter *sc;
994
995         p = device_get_softc(dev);
996         sc = p->adapter;
997         snprintf(p->lockbuf, PORT_NAME_LEN, "cxgb port lock %d:%d",
998             device_get_unit(device_get_parent(dev)), p->port_id);
999         PORT_LOCK_INIT(p, p->lockbuf);
1000
1001         /* Allocate an ifnet object and set it up */
1002         ifp = p->ifp = if_alloc(IFT_ETHER);
1003         if (ifp == NULL) {
1004                 device_printf(dev, "Cannot allocate ifnet\n");
1005                 return (ENOMEM);
1006         }
1007         
1008         if_initname(ifp, device_get_name(dev), device_get_unit(dev));
1009         ifp->if_init = cxgb_init;
1010         ifp->if_softc = p;
1011         ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
1012         ifp->if_ioctl = cxgb_ioctl;
1013         ifp->if_start = cxgb_start;
1014
1015
1016         ifp->if_timer = 0;      /* Disable ifnet watchdog */
1017         ifp->if_watchdog = NULL;
1018
1019         ifp->if_snd.ifq_drv_maxlen = cxgb_snd_queue_len;
1020         IFQ_SET_MAXLEN(&ifp->if_snd, ifp->if_snd.ifq_drv_maxlen);
1021         IFQ_SET_READY(&ifp->if_snd);
1022
1023         ifp->if_capabilities = CXGB_CAP;
1024         ifp->if_capenable = CXGB_CAP_ENABLE;
1025         ifp->if_hwassist = CSUM_TCP | CSUM_UDP | CSUM_IP | CSUM_TSO;
1026
1027         /*
1028          * Disable TSO on 4-port - it isn't supported by the firmware.
1029          */     
1030         if (sc->params.nports > 2) {
1031                 ifp->if_capabilities &= ~(IFCAP_TSO | IFCAP_VLAN_HWTSO);
1032                 ifp->if_capenable &= ~(IFCAP_TSO | IFCAP_VLAN_HWTSO);
1033                 ifp->if_hwassist &= ~CSUM_TSO;
1034         }
1035
1036         ether_ifattach(ifp, p->hw_addr);
1037         ifp->if_transmit = cxgb_transmit;
1038         ifp->if_qflush = cxgb_qflush;
1039
1040 #ifdef DEFAULT_JUMBO
1041         if (sc->params.nports <= 2)
1042                 ifp->if_mtu = ETHERMTU_JUMBO;
1043 #endif
1044         if ((err = cxgb_makedev(p)) != 0) {
1045                 printf("makedev failed %d\n", err);
1046                 return (err);
1047         }
1048
1049         /* Create a list of media supported by this port */
1050         ifmedia_init(&p->media, IFM_IMASK, cxgb_media_change,
1051             cxgb_media_status);
1052         cxgb_build_medialist(p);
1053       
1054         t3_sge_init_port(p);
1055
1056         return (err);
1057 }
1058
1059 /*
1060  * cxgb_port_detach() is called via the device_detach methods when
1061  * cxgb_free() calls the bus_generic_detach.  It is responsible for 
1062  * removing the device from the view of the kernel, i.e. from all 
1063  * interfaces lists etc.  This routine is only called when the driver is 
1064  * being unloaded, not when the link goes down.
1065  */
1066 static int
1067 cxgb_port_detach(device_t dev)
1068 {
1069         struct port_info *p;
1070         struct adapter *sc;
1071         int i;
1072
1073         p = device_get_softc(dev);
1074         sc = p->adapter;
1075
1076         /* Tell cxgb_ioctl and if_init that the port is going away */
1077         ADAPTER_LOCK(sc);
1078         SET_DOOMED(p);
1079         wakeup(&sc->flags);
1080         while (IS_BUSY(sc))
1081                 mtx_sleep(&sc->flags, &sc->lock, 0, "cxgbdtch", 0);
1082         SET_BUSY(sc);
1083         ADAPTER_UNLOCK(sc);
1084
1085         if (p->port_cdev != NULL)
1086                 destroy_dev(p->port_cdev);
1087
1088         cxgb_uninit_synchronized(p);
1089         ether_ifdetach(p->ifp);
1090
1091         for (i = p->first_qset; i < p->first_qset + p->nqsets; i++) {
1092                 struct sge_qset *qs = &sc->sge.qs[i];
1093                 struct sge_txq *txq = &qs->txq[TXQ_ETH];
1094
1095                 callout_drain(&txq->txq_watchdog);
1096                 callout_drain(&txq->txq_timer);
1097         }
1098
1099         PORT_LOCK_DEINIT(p);
1100         if_free(p->ifp);
1101         p->ifp = NULL;
1102
1103         ADAPTER_LOCK(sc);
1104         CLR_BUSY(sc);
1105         wakeup_one(&sc->flags);
1106         ADAPTER_UNLOCK(sc);
1107         return (0);
1108 }
1109
1110 void
1111 t3_fatal_err(struct adapter *sc)
1112 {
1113         u_int fw_status[4];
1114
1115         if (sc->flags & FULL_INIT_DONE) {
1116                 t3_sge_stop(sc);
1117                 t3_write_reg(sc, A_XGM_TX_CTRL, 0);
1118                 t3_write_reg(sc, A_XGM_RX_CTRL, 0);
1119                 t3_write_reg(sc, XGM_REG(A_XGM_TX_CTRL, 1), 0);
1120                 t3_write_reg(sc, XGM_REG(A_XGM_RX_CTRL, 1), 0);
1121                 t3_intr_disable(sc);
1122         }
1123         device_printf(sc->dev,"encountered fatal error, operation suspended\n");
1124         if (!t3_cim_ctl_blk_read(sc, 0xa0, 4, fw_status))
1125                 device_printf(sc->dev, "FW_ status: 0x%x, 0x%x, 0x%x, 0x%x\n",
1126                     fw_status[0], fw_status[1], fw_status[2], fw_status[3]);
1127 }
1128
1129 int
1130 t3_os_find_pci_capability(adapter_t *sc, int cap)
1131 {
1132         device_t dev;
1133         struct pci_devinfo *dinfo;
1134         pcicfgregs *cfg;
1135         uint32_t status;
1136         uint8_t ptr;
1137
1138         dev = sc->dev;
1139         dinfo = device_get_ivars(dev);
1140         cfg = &dinfo->cfg;
1141
1142         status = pci_read_config(dev, PCIR_STATUS, 2);
1143         if (!(status & PCIM_STATUS_CAPPRESENT))
1144                 return (0);
1145
1146         switch (cfg->hdrtype & PCIM_HDRTYPE) {
1147         case 0:
1148         case 1:
1149                 ptr = PCIR_CAP_PTR;
1150                 break;
1151         case 2:
1152                 ptr = PCIR_CAP_PTR_2;
1153                 break;
1154         default:
1155                 return (0);
1156                 break;
1157         }
1158         ptr = pci_read_config(dev, ptr, 1);
1159
1160         while (ptr != 0) {
1161                 if (pci_read_config(dev, ptr + PCICAP_ID, 1) == cap)
1162                         return (ptr);
1163                 ptr = pci_read_config(dev, ptr + PCICAP_NEXTPTR, 1);
1164         }
1165
1166         return (0);
1167 }
1168
1169 int
1170 t3_os_pci_save_state(struct adapter *sc)
1171 {
1172         device_t dev;
1173         struct pci_devinfo *dinfo;
1174
1175         dev = sc->dev;
1176         dinfo = device_get_ivars(dev);
1177
1178         pci_cfg_save(dev, dinfo, 0);
1179         return (0);
1180 }
1181
1182 int
1183 t3_os_pci_restore_state(struct adapter *sc)
1184 {
1185         device_t dev;
1186         struct pci_devinfo *dinfo;
1187
1188         dev = sc->dev;
1189         dinfo = device_get_ivars(dev);
1190
1191         pci_cfg_restore(dev, dinfo);
1192         return (0);
1193 }
1194
1195 /**
1196  *      t3_os_link_changed - handle link status changes
1197  *      @sc: the adapter associated with the link change
1198  *      @port_id: the port index whose link status has changed
1199  *      @link_status: the new status of the link
1200  *      @speed: the new speed setting
1201  *      @duplex: the new duplex setting
1202  *      @fc: the new flow-control setting
1203  *
1204  *      This is the OS-dependent handler for link status changes.  The OS
1205  *      neutral handler takes care of most of the processing for these events,
1206  *      then calls this handler for any OS-specific processing.
1207  */
1208 void
1209 t3_os_link_changed(adapter_t *adapter, int port_id, int link_status, int speed,
1210      int duplex, int fc, int mac_was_reset)
1211 {
1212         struct port_info *pi = &adapter->port[port_id];
1213         struct ifnet *ifp = pi->ifp;
1214
1215         /* no race with detach, so ifp should always be good */
1216         KASSERT(ifp, ("%s: if detached.", __func__));
1217
1218         /* Reapply mac settings if they were lost due to a reset */
1219         if (mac_was_reset) {
1220                 PORT_LOCK(pi);
1221                 cxgb_update_mac_settings(pi);
1222                 PORT_UNLOCK(pi);
1223         }
1224
1225         if (link_status) {
1226                 ifp->if_baudrate = IF_Mbps(speed);
1227                 if_link_state_change(ifp, LINK_STATE_UP);
1228         } else
1229                 if_link_state_change(ifp, LINK_STATE_DOWN);
1230 }
1231
1232 /**
1233  *      t3_os_phymod_changed - handle PHY module changes
1234  *      @phy: the PHY reporting the module change
1235  *      @mod_type: new module type
1236  *
1237  *      This is the OS-dependent handler for PHY module changes.  It is
1238  *      invoked when a PHY module is removed or inserted for any OS-specific
1239  *      processing.
1240  */
1241 void t3_os_phymod_changed(struct adapter *adap, int port_id)
1242 {
1243         static const char *mod_str[] = {
1244                 NULL, "SR", "LR", "LRM", "TWINAX", "TWINAX-L", "unknown"
1245         };
1246         struct port_info *pi = &adap->port[port_id];
1247         int mod = pi->phy.modtype;
1248
1249         if (mod != pi->media.ifm_cur->ifm_data)
1250                 cxgb_build_medialist(pi);
1251
1252         if (mod == phy_modtype_none)
1253                 if_printf(pi->ifp, "PHY module unplugged\n");
1254         else {
1255                 KASSERT(mod < ARRAY_SIZE(mod_str),
1256                         ("invalid PHY module type %d", mod));
1257                 if_printf(pi->ifp, "%s PHY module inserted\n", mod_str[mod]);
1258         }
1259 }
1260
1261 /*
1262  * Interrupt-context handler for external (PHY) interrupts.
1263  */
1264 void
1265 t3_os_ext_intr_handler(adapter_t *sc)
1266 {
1267         if (cxgb_debug)
1268                 printf("t3_os_ext_intr_handler\n");
1269         /*
1270          * Schedule a task to handle external interrupts as they may be slow
1271          * and we use a mutex to protect MDIO registers.  We disable PHY
1272          * interrupts in the meantime and let the task reenable them when
1273          * it's done.
1274          */
1275         if (sc->slow_intr_mask) {
1276                 ADAPTER_LOCK(sc);
1277                 sc->slow_intr_mask &= ~F_T3DBG;
1278                 t3_write_reg(sc, A_PL_INT_ENABLE0, sc->slow_intr_mask);
1279                 taskqueue_enqueue(sc->tq, &sc->ext_intr_task);
1280                 ADAPTER_UNLOCK(sc);
1281         }
1282 }
1283
1284 void
1285 t3_os_set_hw_addr(adapter_t *adapter, int port_idx, u8 hw_addr[])
1286 {
1287
1288         /*
1289          * The ifnet might not be allocated before this gets called,
1290          * as this is called early on in attach by t3_prep_adapter
1291          * save the address off in the port structure
1292          */
1293         if (cxgb_debug)
1294                 printf("set_hw_addr on idx %d addr %6D\n", port_idx, hw_addr, ":");
1295         bcopy(hw_addr, adapter->port[port_idx].hw_addr, ETHER_ADDR_LEN);
1296 }
1297
1298 /*
1299  * Programs the XGMAC based on the settings in the ifnet.  These settings
1300  * include MTU, MAC address, mcast addresses, etc.
1301  */
1302 static void
1303 cxgb_update_mac_settings(struct port_info *p)
1304 {
1305         struct ifnet *ifp = p->ifp;
1306         struct t3_rx_mode rm;
1307         struct cmac *mac = &p->mac;
1308         int mtu, hwtagging;
1309
1310         PORT_LOCK_ASSERT_OWNED(p);
1311
1312         bcopy(IF_LLADDR(ifp), p->hw_addr, ETHER_ADDR_LEN);
1313
1314         mtu = ifp->if_mtu;
1315         if (ifp->if_capenable & IFCAP_VLAN_MTU)
1316                 mtu += ETHER_VLAN_ENCAP_LEN;
1317
1318         hwtagging = (ifp->if_capenable & IFCAP_VLAN_HWTAGGING) != 0;
1319
1320         t3_mac_set_mtu(mac, mtu);
1321         t3_set_vlan_accel(p->adapter, 1 << p->tx_chan, hwtagging);
1322         t3_mac_set_address(mac, 0, p->hw_addr);
1323         t3_init_rx_mode(&rm, p);
1324         t3_mac_set_rx_mode(mac, &rm);
1325 }
1326
1327
1328 static int
1329 await_mgmt_replies(struct adapter *adap, unsigned long init_cnt,
1330                               unsigned long n)
1331 {
1332         int attempts = 5;
1333
1334         while (adap->sge.qs[0].rspq.offload_pkts < init_cnt + n) {
1335                 if (!--attempts)
1336                         return (ETIMEDOUT);
1337                 t3_os_sleep(10);
1338         }
1339         return 0;
1340 }
1341
1342 static int
1343 init_tp_parity(struct adapter *adap)
1344 {
1345         int i;
1346         struct mbuf *m;
1347         struct cpl_set_tcb_field *greq;
1348         unsigned long cnt = adap->sge.qs[0].rspq.offload_pkts;
1349
1350         t3_tp_set_offload_mode(adap, 1);
1351
1352         for (i = 0; i < 16; i++) {
1353                 struct cpl_smt_write_req *req;
1354
1355                 m = m_gethdr(M_WAITOK, MT_DATA);
1356                 req = mtod(m, struct cpl_smt_write_req *);
1357                 m->m_len = m->m_pkthdr.len = sizeof(*req);
1358                 memset(req, 0, sizeof(*req));
1359                 req->wr.wrh_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD));
1360                 OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_SMT_WRITE_REQ, i));
1361                 req->iff = i;
1362                 t3_mgmt_tx(adap, m);
1363         }
1364
1365         for (i = 0; i < 2048; i++) {
1366                 struct cpl_l2t_write_req *req;
1367
1368                 m = m_gethdr(M_WAITOK, MT_DATA);
1369                 req = mtod(m, struct cpl_l2t_write_req *);
1370                 m->m_len = m->m_pkthdr.len = sizeof(*req);
1371                 memset(req, 0, sizeof(*req));
1372                 req->wr.wrh_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD));
1373                 OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_L2T_WRITE_REQ, i));
1374                 req->params = htonl(V_L2T_W_IDX(i));
1375                 t3_mgmt_tx(adap, m);
1376         }
1377
1378         for (i = 0; i < 2048; i++) {
1379                 struct cpl_rte_write_req *req;
1380
1381                 m = m_gethdr(M_WAITOK, MT_DATA);
1382                 req = mtod(m, struct cpl_rte_write_req *);
1383                 m->m_len = m->m_pkthdr.len = sizeof(*req);
1384                 memset(req, 0, sizeof(*req));
1385                 req->wr.wrh_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD));
1386                 OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_RTE_WRITE_REQ, i));
1387                 req->l2t_idx = htonl(V_L2T_W_IDX(i));
1388                 t3_mgmt_tx(adap, m);
1389         }
1390
1391         m = m_gethdr(M_WAITOK, MT_DATA);
1392         greq = mtod(m, struct cpl_set_tcb_field *);
1393         m->m_len = m->m_pkthdr.len = sizeof(*greq);
1394         memset(greq, 0, sizeof(*greq));
1395         greq->wr.wrh_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD));
1396         OPCODE_TID(greq) = htonl(MK_OPCODE_TID(CPL_SET_TCB_FIELD, 0));
1397         greq->mask = htobe64(1);
1398         t3_mgmt_tx(adap, m);
1399
1400         i = await_mgmt_replies(adap, cnt, 16 + 2048 + 2048 + 1);
1401         t3_tp_set_offload_mode(adap, 0);
1402         return (i);
1403 }
1404
1405 /**
1406  *      setup_rss - configure Receive Side Steering (per-queue connection demux) 
1407  *      @adap: the adapter
1408  *
1409  *      Sets up RSS to distribute packets to multiple receive queues.  We
1410  *      configure the RSS CPU lookup table to distribute to the number of HW
1411  *      receive queues, and the response queue lookup table to narrow that
1412  *      down to the response queues actually configured for each port.
1413  *      We always configure the RSS mapping for two ports since the mapping
1414  *      table has plenty of entries.
1415  */
1416 static void
1417 setup_rss(adapter_t *adap)
1418 {
1419         int i;
1420         u_int nq[2]; 
1421         uint8_t cpus[SGE_QSETS + 1];
1422         uint16_t rspq_map[RSS_TABLE_SIZE];
1423         
1424         for (i = 0; i < SGE_QSETS; ++i)
1425                 cpus[i] = i;
1426         cpus[SGE_QSETS] = 0xff;
1427
1428         nq[0] = nq[1] = 0;
1429         for_each_port(adap, i) {
1430                 const struct port_info *pi = adap2pinfo(adap, i);
1431
1432                 nq[pi->tx_chan] += pi->nqsets;
1433         }
1434         for (i = 0; i < RSS_TABLE_SIZE / 2; ++i) {
1435                 rspq_map[i] = nq[0] ? i % nq[0] : 0;
1436                 rspq_map[i + RSS_TABLE_SIZE / 2] = nq[1] ? i % nq[1] + nq[0] : 0;
1437         }
1438
1439         /* Calculate the reverse RSS map table */
1440         for (i = 0; i < SGE_QSETS; ++i)
1441                 adap->rrss_map[i] = 0xff;
1442         for (i = 0; i < RSS_TABLE_SIZE; ++i)
1443                 if (adap->rrss_map[rspq_map[i]] == 0xff)
1444                         adap->rrss_map[rspq_map[i]] = i;
1445
1446         t3_config_rss(adap, F_RQFEEDBACKENABLE | F_TNLLKPEN | F_TNLMAPEN |
1447                       F_TNLPRTEN | F_TNL2TUPEN | F_TNL4TUPEN | F_OFDMAPEN |
1448                       F_RRCPLMAPEN | V_RRCPLCPUSIZE(6) | F_HASHTOEPLITZ,
1449                       cpus, rspq_map);
1450
1451 }
1452
1453 /*
1454  * Sends an mbuf to an offload queue driver
1455  * after dealing with any active network taps.
1456  */
1457 static inline int
1458 offload_tx(struct t3cdev *tdev, struct mbuf *m)
1459 {
1460         int ret;
1461
1462         ret = t3_offload_tx(tdev, m);
1463         return (ret);
1464 }
1465
1466 static int
1467 write_smt_entry(struct adapter *adapter, int idx)
1468 {
1469         struct port_info *pi = &adapter->port[idx];
1470         struct cpl_smt_write_req *req;
1471         struct mbuf *m;
1472
1473         if ((m = m_gethdr(M_NOWAIT, MT_DATA)) == NULL)
1474                 return (ENOMEM);
1475
1476         req = mtod(m, struct cpl_smt_write_req *);
1477         m->m_pkthdr.len = m->m_len = sizeof(struct cpl_smt_write_req);
1478         
1479         req->wr.wrh_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD));
1480         OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_SMT_WRITE_REQ, idx));
1481         req->mtu_idx = NMTUS - 1;  /* should be 0 but there's a T3 bug */
1482         req->iff = idx;
1483         memset(req->src_mac1, 0, sizeof(req->src_mac1));
1484         memcpy(req->src_mac0, pi->hw_addr, ETHER_ADDR_LEN);
1485
1486         m_set_priority(m, 1);
1487
1488         offload_tx(&adapter->tdev, m);
1489
1490         return (0);
1491 }
1492
1493 static int
1494 init_smt(struct adapter *adapter)
1495 {
1496         int i;
1497
1498         for_each_port(adapter, i)
1499                 write_smt_entry(adapter, i);
1500         return 0;
1501 }
1502
1503 static void
1504 init_port_mtus(adapter_t *adapter)
1505 {
1506         unsigned int mtus = ETHERMTU | (ETHERMTU << 16);
1507
1508         t3_write_reg(adapter, A_TP_MTU_PORT_TABLE, mtus);
1509 }
1510
1511 static void
1512 send_pktsched_cmd(struct adapter *adap, int sched, int qidx, int lo,
1513                               int hi, int port)
1514 {
1515         struct mbuf *m;
1516         struct mngt_pktsched_wr *req;
1517
1518         m = m_gethdr(M_DONTWAIT, MT_DATA);
1519         if (m) {        
1520                 req = mtod(m, struct mngt_pktsched_wr *);
1521                 req->wr.wrh_hi = htonl(V_WR_OP(FW_WROPCODE_MNGT));
1522                 req->mngt_opcode = FW_MNGTOPCODE_PKTSCHED_SET;
1523                 req->sched = sched;
1524                 req->idx = qidx;
1525                 req->min = lo;
1526                 req->max = hi;
1527                 req->binding = port;
1528                 m->m_len = m->m_pkthdr.len = sizeof(*req);
1529                 t3_mgmt_tx(adap, m);
1530         }
1531 }
1532
1533 static void
1534 bind_qsets(adapter_t *sc)
1535 {
1536         int i, j;
1537
1538         for (i = 0; i < (sc)->params.nports; ++i) {
1539                 const struct port_info *pi = adap2pinfo(sc, i);
1540
1541                 for (j = 0; j < pi->nqsets; ++j) {
1542                         send_pktsched_cmd(sc, 1, pi->first_qset + j, -1,
1543                                           -1, pi->tx_chan);
1544
1545                 }
1546         }
1547 }
1548
1549 static void
1550 update_tpeeprom(struct adapter *adap)
1551 {
1552         const struct firmware *tpeeprom;
1553
1554         uint32_t version;
1555         unsigned int major, minor;
1556         int ret, len;
1557         char rev, name[32];
1558
1559         t3_seeprom_read(adap, TP_SRAM_OFFSET, &version);
1560
1561         major = G_TP_VERSION_MAJOR(version);
1562         minor = G_TP_VERSION_MINOR(version);
1563         if (major == TP_VERSION_MAJOR  && minor == TP_VERSION_MINOR)
1564                 return; 
1565
1566         rev = t3rev2char(adap);
1567         snprintf(name, sizeof(name), TPEEPROM_NAME, rev);
1568
1569         tpeeprom = firmware_get(name);
1570         if (tpeeprom == NULL) {
1571                 device_printf(adap->dev,
1572                               "could not load TP EEPROM: unable to load %s\n",
1573                               name);
1574                 return;
1575         }
1576
1577         len = tpeeprom->datasize - 4;
1578         
1579         ret = t3_check_tpsram(adap, tpeeprom->data, tpeeprom->datasize);
1580         if (ret)
1581                 goto release_tpeeprom;
1582
1583         if (len != TP_SRAM_LEN) {
1584                 device_printf(adap->dev,
1585                               "%s length is wrong len=%d expected=%d\n", name,
1586                               len, TP_SRAM_LEN);
1587                 return;
1588         }
1589         
1590         ret = set_eeprom(&adap->port[0], tpeeprom->data, tpeeprom->datasize,
1591             TP_SRAM_OFFSET);
1592         
1593         if (!ret) {
1594                 device_printf(adap->dev,
1595                         "Protocol SRAM image updated in EEPROM to %d.%d.%d\n",
1596                          TP_VERSION_MAJOR, TP_VERSION_MINOR, TP_VERSION_MICRO);
1597         } else 
1598                 device_printf(adap->dev,
1599                               "Protocol SRAM image update in EEPROM failed\n");
1600
1601 release_tpeeprom:
1602         firmware_put(tpeeprom, FIRMWARE_UNLOAD);
1603         
1604         return;
1605 }
1606
1607 static int
1608 update_tpsram(struct adapter *adap)
1609 {
1610         const struct firmware *tpsram;
1611         int ret;
1612         char rev, name[32];
1613
1614         rev = t3rev2char(adap);
1615         snprintf(name, sizeof(name), TPSRAM_NAME, rev);
1616
1617         update_tpeeprom(adap);
1618
1619         tpsram = firmware_get(name);
1620         if (tpsram == NULL){
1621                 device_printf(adap->dev, "could not load TP SRAM\n");
1622                 return (EINVAL);
1623         } else
1624                 device_printf(adap->dev, "updating TP SRAM\n");
1625         
1626         ret = t3_check_tpsram(adap, tpsram->data, tpsram->datasize);
1627         if (ret)
1628                 goto release_tpsram;    
1629
1630         ret = t3_set_proto_sram(adap, tpsram->data);
1631         if (ret)
1632                 device_printf(adap->dev, "loading protocol SRAM failed\n");
1633
1634 release_tpsram:
1635         firmware_put(tpsram, FIRMWARE_UNLOAD);
1636         
1637         return ret;
1638 }
1639
1640 /**
1641  *      cxgb_up - enable the adapter
1642  *      @adap: adapter being enabled
1643  *
1644  *      Called when the first port is enabled, this function performs the
1645  *      actions necessary to make an adapter operational, such as completing
1646  *      the initialization of HW modules, and enabling interrupts.
1647  */
1648 static int
1649 cxgb_up(struct adapter *sc)
1650 {
1651         int err = 0;
1652
1653         KASSERT(sc->open_device_map == 0, ("%s: device(s) already open (%x)",
1654                                            __func__, sc->open_device_map));
1655
1656         if ((sc->flags & FULL_INIT_DONE) == 0) {
1657
1658                 ADAPTER_LOCK_ASSERT_NOTOWNED(sc);
1659
1660                 if ((sc->flags & FW_UPTODATE) == 0)
1661                         if ((err = upgrade_fw(sc)))
1662                                 goto out;
1663
1664                 if ((sc->flags & TPS_UPTODATE) == 0)
1665                         if ((err = update_tpsram(sc)))
1666                                 goto out;
1667
1668                 err = t3_init_hw(sc, 0);
1669                 if (err)
1670                         goto out;
1671
1672                 t3_set_reg_field(sc, A_TP_PARA_REG5, 0, F_RXDDPOFFINIT);
1673                 t3_write_reg(sc, A_ULPRX_TDDP_PSZ, V_HPZ0(PAGE_SHIFT - 12));
1674
1675                 err = setup_sge_qsets(sc);
1676                 if (err)
1677                         goto out;
1678
1679                 setup_rss(sc);
1680
1681                 t3_intr_clear(sc);
1682                 err = cxgb_setup_interrupts(sc);
1683                 if (err)
1684                         goto out;
1685
1686                 t3_add_configured_sysctls(sc);
1687                 sc->flags |= FULL_INIT_DONE;
1688         }
1689
1690         t3_intr_clear(sc);
1691         t3_sge_start(sc);
1692         t3_intr_enable(sc);
1693
1694         if (sc->params.rev >= T3_REV_C && !(sc->flags & TP_PARITY_INIT) &&
1695             is_offload(sc) && init_tp_parity(sc) == 0)
1696                 sc->flags |= TP_PARITY_INIT;
1697
1698         if (sc->flags & TP_PARITY_INIT) {
1699                 t3_write_reg(sc, A_TP_INT_CAUSE, F_CMCACHEPERR | F_ARPLUTPERR);
1700                 t3_write_reg(sc, A_TP_INT_ENABLE, 0x7fbfffff);
1701         }
1702         
1703         if (!(sc->flags & QUEUES_BOUND)) {
1704                 bind_qsets(sc);
1705                 sc->flags |= QUEUES_BOUND;              
1706         }
1707
1708         t3_sge_reset_adapter(sc);
1709 out:
1710         return (err);
1711 }
1712
1713 /*
1714  * Called when the last open device is closed.  Does NOT undo all of cxgb_up's
1715  * work.  Specifically, the resources grabbed under FULL_INIT_DONE are released
1716  * during controller_detach, not here.
1717  */
1718 static void
1719 cxgb_down(struct adapter *sc)
1720 {
1721         t3_sge_stop(sc);
1722         t3_intr_disable(sc);
1723 }
1724
1725 static int
1726 offload_open(struct port_info *pi)
1727 {
1728         struct adapter *sc = pi->adapter;
1729         struct t3cdev *tdev = &sc->tdev;
1730
1731         setbit(&sc->open_device_map, OFFLOAD_DEVMAP_BIT);
1732
1733         t3_tp_set_offload_mode(sc, 1);
1734         tdev->lldev = pi->ifp;
1735         init_port_mtus(sc);
1736         t3_load_mtus(sc, sc->params.mtus, sc->params.a_wnd, sc->params.b_wnd,
1737                      sc->params.rev == 0 ?  sc->port[0].ifp->if_mtu : 0xffff);
1738         init_smt(sc);
1739         cxgb_add_clients(tdev);
1740
1741         return (0);
1742 }
1743
1744 static int
1745 offload_close(struct t3cdev *tdev)
1746 {
1747         struct adapter *adapter = tdev2adap(tdev);
1748
1749         if (!isset(&adapter->open_device_map, OFFLOAD_DEVMAP_BIT))
1750                 return (0);
1751
1752         /* Call back all registered clients */
1753         cxgb_remove_clients(tdev);
1754
1755         tdev->lldev = NULL;
1756         cxgb_set_dummy_ops(tdev);
1757         t3_tp_set_offload_mode(adapter, 0);
1758
1759         clrbit(&adapter->open_device_map, OFFLOAD_DEVMAP_BIT);
1760
1761         return (0);
1762 }
1763
1764 /*
1765  * if_init for cxgb ports.
1766  */
1767 static void
1768 cxgb_init(void *arg)
1769 {
1770         struct port_info *p = arg;
1771         struct adapter *sc = p->adapter;
1772
1773         ADAPTER_LOCK(sc);
1774         cxgb_init_locked(p); /* releases adapter lock */
1775         ADAPTER_LOCK_ASSERT_NOTOWNED(sc);
1776 }
1777
1778 static int
1779 cxgb_init_locked(struct port_info *p)
1780 {
1781         struct adapter *sc = p->adapter;
1782         struct ifnet *ifp = p->ifp;
1783         struct cmac *mac = &p->mac;
1784         int i, rc = 0, may_sleep = 0;
1785
1786         ADAPTER_LOCK_ASSERT_OWNED(sc);
1787
1788         while (!IS_DOOMED(p) && IS_BUSY(sc)) {
1789                 if (mtx_sleep(&sc->flags, &sc->lock, PCATCH, "cxgbinit", 0)) {
1790                         rc = EINTR;
1791                         goto done;
1792                 }
1793         }
1794         if (IS_DOOMED(p)) {
1795                 rc = ENXIO;
1796                 goto done;
1797         }
1798         KASSERT(!IS_BUSY(sc), ("%s: controller busy.", __func__));
1799
1800         /*
1801          * The code that runs during one-time adapter initialization can sleep
1802          * so it's important not to hold any locks across it.
1803          */
1804         may_sleep = sc->flags & FULL_INIT_DONE ? 0 : 1;
1805
1806         if (may_sleep) {
1807                 SET_BUSY(sc);
1808                 ADAPTER_UNLOCK(sc);
1809         }
1810
1811         if (sc->open_device_map == 0) {
1812                 if ((rc = cxgb_up(sc)) != 0)
1813                         goto done;
1814
1815                 if (is_offload(sc) && !ofld_disable && offload_open(p))
1816                         log(LOG_WARNING,
1817                             "Could not initialize offload capabilities\n");
1818         }
1819
1820         PORT_LOCK(p);
1821         if (isset(&sc->open_device_map, p->port_id) &&
1822             (ifp->if_drv_flags & IFF_DRV_RUNNING)) {
1823                 PORT_UNLOCK(p);
1824                 goto done;
1825         }
1826         t3_port_intr_enable(sc, p->port_id);
1827         if (!mac->multiport) 
1828                 t3_mac_init(mac);
1829         cxgb_update_mac_settings(p);
1830         t3_link_start(&p->phy, mac, &p->link_config);
1831         t3_mac_enable(mac, MAC_DIRECTION_RX | MAC_DIRECTION_TX);
1832         ifp->if_drv_flags |= IFF_DRV_RUNNING;
1833         ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
1834         PORT_UNLOCK(p);
1835
1836         t3_link_changed(sc, p->port_id);
1837
1838         for (i = p->first_qset; i < p->first_qset + p->nqsets; i++) {
1839                 struct sge_qset *qs = &sc->sge.qs[i];
1840                 struct sge_txq *txq = &qs->txq[TXQ_ETH];
1841
1842                 callout_reset_on(&txq->txq_watchdog, hz, cxgb_tx_watchdog, qs,
1843                                  txq->txq_watchdog.c_cpu);
1844         }
1845
1846         /* all ok */
1847         setbit(&sc->open_device_map, p->port_id);
1848
1849 done:
1850         if (may_sleep) {
1851                 ADAPTER_LOCK(sc);
1852                 KASSERT(IS_BUSY(sc), ("%s: controller not busy.", __func__));
1853                 CLR_BUSY(sc);
1854                 wakeup_one(&sc->flags);
1855         }
1856         ADAPTER_UNLOCK(sc);
1857         return (rc);
1858 }
1859
1860 static int
1861 cxgb_uninit_locked(struct port_info *p)
1862 {
1863         struct adapter *sc = p->adapter;
1864         int rc;
1865
1866         ADAPTER_LOCK_ASSERT_OWNED(sc);
1867
1868         while (!IS_DOOMED(p) && IS_BUSY(sc)) {
1869                 if (mtx_sleep(&sc->flags, &sc->lock, PCATCH, "cxgbunin", 0)) {
1870                         rc = EINTR;
1871                         goto done;
1872                 }
1873         }
1874         if (IS_DOOMED(p)) {
1875                 rc = ENXIO;
1876                 goto done;
1877         }
1878         KASSERT(!IS_BUSY(sc), ("%s: controller busy.", __func__));
1879         SET_BUSY(sc);
1880         ADAPTER_UNLOCK(sc);
1881
1882         rc = cxgb_uninit_synchronized(p);
1883
1884         ADAPTER_LOCK(sc);
1885         KASSERT(IS_BUSY(sc), ("%s: controller not busy.", __func__));
1886         CLR_BUSY(sc);
1887         wakeup_one(&sc->flags);
1888 done:
1889         ADAPTER_UNLOCK(sc);
1890         return (rc);
1891 }
1892
1893 /*
1894  * Called on "ifconfig down", and from port_detach
1895  */
1896 static int
1897 cxgb_uninit_synchronized(struct port_info *pi)
1898 {
1899         struct adapter *sc = pi->adapter;
1900         struct ifnet *ifp = pi->ifp;
1901
1902         /*
1903          * taskqueue_drain may cause a deadlock if the adapter lock is held.
1904          */
1905         ADAPTER_LOCK_ASSERT_NOTOWNED(sc);
1906
1907         /*
1908          * Clear this port's bit from the open device map, and then drain all
1909          * the tasks that can access/manipulate this port's port_info or ifp.
1910          * We disable this port's interrupts here and so the the slow/ext
1911          * interrupt tasks won't be enqueued.  The tick task will continue to
1912          * be enqueued every second but the runs after this drain will not see
1913          * this port in the open device map.
1914          *
1915          * A well behaved task must take open_device_map into account and ignore
1916          * ports that are not open.
1917          */
1918         clrbit(&sc->open_device_map, pi->port_id);
1919         t3_port_intr_disable(sc, pi->port_id);
1920         taskqueue_drain(sc->tq, &sc->slow_intr_task);
1921         taskqueue_drain(sc->tq, &sc->ext_intr_task);
1922         taskqueue_drain(sc->tq, &sc->tick_task);
1923
1924         PORT_LOCK(pi);
1925         ifp->if_drv_flags &= ~(IFF_DRV_RUNNING | IFF_DRV_OACTIVE);
1926
1927         /* disable pause frames */
1928         t3_set_reg_field(sc, A_XGM_TX_CFG + pi->mac.offset, F_TXPAUSEEN, 0);
1929
1930         /* Reset RX FIFO HWM */
1931         t3_set_reg_field(sc, A_XGM_RXFIFO_CFG +  pi->mac.offset,
1932                          V_RXFIFOPAUSEHWM(M_RXFIFOPAUSEHWM), 0);
1933
1934         DELAY(100 * 1000);
1935
1936         /* Wait for TXFIFO empty */
1937         t3_wait_op_done(sc, A_XGM_TXFIFO_CFG + pi->mac.offset,
1938                         F_TXFIFO_EMPTY, 1, 20, 5);
1939
1940         DELAY(100 * 1000);
1941         t3_mac_disable(&pi->mac, MAC_DIRECTION_RX);
1942
1943
1944         pi->phy.ops->power_down(&pi->phy, 1);
1945
1946         PORT_UNLOCK(pi);
1947
1948         pi->link_config.link_ok = 0;
1949         t3_os_link_changed(sc, pi->port_id, 0, 0, 0, 0, 0);
1950
1951         if ((sc->open_device_map & PORT_MASK) == 0)
1952                 offload_close(&sc->tdev);
1953
1954         if (sc->open_device_map == 0)
1955                 cxgb_down(pi->adapter);
1956
1957         return (0);
1958 }
1959
1960 /*
1961  * Mark lro enabled or disabled in all qsets for this port
1962  */
1963 static int
1964 cxgb_set_lro(struct port_info *p, int enabled)
1965 {
1966         int i;
1967         struct adapter *adp = p->adapter;
1968         struct sge_qset *q;
1969
1970         PORT_LOCK_ASSERT_OWNED(p);
1971         for (i = 0; i < p->nqsets; i++) {
1972                 q = &adp->sge.qs[p->first_qset + i];
1973                 q->lro.enabled = (enabled != 0);
1974         }
1975         return (0);
1976 }
1977
1978 static int
1979 cxgb_ioctl(struct ifnet *ifp, unsigned long command, caddr_t data)
1980 {
1981         struct port_info *p = ifp->if_softc;
1982         struct adapter *sc = p->adapter;
1983         struct ifreq *ifr = (struct ifreq *)data;
1984         int flags, error = 0, mtu;
1985         uint32_t mask;
1986
1987         switch (command) {
1988         case SIOCSIFMTU:
1989                 ADAPTER_LOCK(sc);
1990                 error = IS_DOOMED(p) ? ENXIO : (IS_BUSY(sc) ? EBUSY : 0);
1991                 if (error) {
1992 fail:
1993                         ADAPTER_UNLOCK(sc);
1994                         return (error);
1995                 }
1996
1997                 mtu = ifr->ifr_mtu;
1998                 if ((mtu < ETHERMIN) || (mtu > ETHERMTU_JUMBO)) {
1999                         error = EINVAL;
2000                 } else {
2001                         ifp->if_mtu = mtu;
2002                         PORT_LOCK(p);
2003                         cxgb_update_mac_settings(p);
2004                         PORT_UNLOCK(p);
2005                 }
2006                 ADAPTER_UNLOCK(sc);
2007                 break;
2008         case SIOCSIFFLAGS:
2009                 ADAPTER_LOCK(sc);
2010                 if (IS_DOOMED(p)) {
2011                         error = ENXIO;
2012                         goto fail;
2013                 }
2014                 if (ifp->if_flags & IFF_UP) {
2015                         if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
2016                                 flags = p->if_flags;
2017                                 if (((ifp->if_flags ^ flags) & IFF_PROMISC) ||
2018                                     ((ifp->if_flags ^ flags) & IFF_ALLMULTI)) {
2019                                         if (IS_BUSY(sc)) {
2020                                                 error = EBUSY;
2021                                                 goto fail;
2022                                         }
2023                                         PORT_LOCK(p);
2024                                         cxgb_update_mac_settings(p);
2025                                         PORT_UNLOCK(p);
2026                                 }
2027                                 ADAPTER_UNLOCK(sc);
2028                         } else
2029                                 error = cxgb_init_locked(p);
2030                         p->if_flags = ifp->if_flags;
2031                 } else if (ifp->if_drv_flags & IFF_DRV_RUNNING)
2032                         error = cxgb_uninit_locked(p);
2033                 else
2034                         ADAPTER_UNLOCK(sc);
2035
2036                 ADAPTER_LOCK_ASSERT_NOTOWNED(sc);
2037                 break;
2038         case SIOCADDMULTI:
2039         case SIOCDELMULTI:
2040                 ADAPTER_LOCK(sc);
2041                 error = IS_DOOMED(p) ? ENXIO : (IS_BUSY(sc) ? EBUSY : 0);
2042                 if (error)
2043                         goto fail;
2044
2045                 if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
2046                         PORT_LOCK(p);
2047                         cxgb_update_mac_settings(p);
2048                         PORT_UNLOCK(p);
2049                 }
2050                 ADAPTER_UNLOCK(sc);
2051
2052                 break;
2053         case SIOCSIFCAP:
2054                 ADAPTER_LOCK(sc);
2055                 error = IS_DOOMED(p) ? ENXIO : (IS_BUSY(sc) ? EBUSY : 0);
2056                 if (error)
2057                         goto fail;
2058
2059                 mask = ifr->ifr_reqcap ^ ifp->if_capenable;
2060                 if (mask & IFCAP_TXCSUM) {
2061                         ifp->if_capenable ^= IFCAP_TXCSUM;
2062                         ifp->if_hwassist ^= (CSUM_TCP | CSUM_UDP | CSUM_IP);
2063
2064                         if (IFCAP_TSO & ifp->if_capenable &&
2065                             !(IFCAP_TXCSUM & ifp->if_capenable)) {
2066                                 ifp->if_capenable &= ~IFCAP_TSO;
2067                                 ifp->if_hwassist &= ~CSUM_TSO;
2068                                 if_printf(ifp,
2069                                     "tso disabled due to -txcsum.\n");
2070                         }
2071                 }
2072                 if (mask & IFCAP_RXCSUM)
2073                         ifp->if_capenable ^= IFCAP_RXCSUM;
2074                 if (mask & IFCAP_TSO4) {
2075                         ifp->if_capenable ^= IFCAP_TSO4;
2076
2077                         if (IFCAP_TSO & ifp->if_capenable) {
2078                                 if (IFCAP_TXCSUM & ifp->if_capenable)
2079                                         ifp->if_hwassist |= CSUM_TSO;
2080                                 else {
2081                                         ifp->if_capenable &= ~IFCAP_TSO;
2082                                         ifp->if_hwassist &= ~CSUM_TSO;
2083                                         if_printf(ifp,
2084                                             "enable txcsum first.\n");
2085                                         error = EAGAIN;
2086                                 }
2087                         } else
2088                                 ifp->if_hwassist &= ~CSUM_TSO;
2089                 }
2090                 if (mask & IFCAP_LRO) {
2091                         ifp->if_capenable ^= IFCAP_LRO;
2092
2093                         /* Safe to do this even if cxgb_up not called yet */
2094                         cxgb_set_lro(p, ifp->if_capenable & IFCAP_LRO);
2095                 }
2096                 if (mask & IFCAP_VLAN_HWTAGGING) {
2097                         ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING;
2098                         if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
2099                                 PORT_LOCK(p);
2100                                 cxgb_update_mac_settings(p);
2101                                 PORT_UNLOCK(p);
2102                         }
2103                 }
2104                 if (mask & IFCAP_VLAN_MTU) {
2105                         ifp->if_capenable ^= IFCAP_VLAN_MTU;
2106                         if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
2107                                 PORT_LOCK(p);
2108                                 cxgb_update_mac_settings(p);
2109                                 PORT_UNLOCK(p);
2110                         }
2111                 }
2112                 if (mask & IFCAP_VLAN_HWTSO)
2113                         ifp->if_capenable ^= IFCAP_VLAN_HWTSO;
2114                 if (mask & IFCAP_VLAN_HWCSUM)
2115                         ifp->if_capenable ^= IFCAP_VLAN_HWCSUM;
2116
2117 #ifdef VLAN_CAPABILITIES
2118                 VLAN_CAPABILITIES(ifp);
2119 #endif
2120                 ADAPTER_UNLOCK(sc);
2121                 break;
2122         case SIOCSIFMEDIA:
2123         case SIOCGIFMEDIA:
2124                 error = ifmedia_ioctl(ifp, ifr, &p->media, command);
2125                 break;
2126         default:
2127                 error = ether_ioctl(ifp, command, data);
2128         }
2129
2130         return (error);
2131 }
2132
2133 static int
2134 cxgb_media_change(struct ifnet *ifp)
2135 {
2136         return (EOPNOTSUPP);
2137 }
2138
2139 /*
2140  * Translates phy->modtype to the correct Ethernet media subtype.
2141  */
2142 static int
2143 cxgb_ifm_type(int mod)
2144 {
2145         switch (mod) {
2146         case phy_modtype_sr:
2147                 return (IFM_10G_SR);
2148         case phy_modtype_lr:
2149                 return (IFM_10G_LR);
2150         case phy_modtype_lrm:
2151                 return (IFM_10G_LRM);
2152         case phy_modtype_twinax:
2153                 return (IFM_10G_TWINAX);
2154         case phy_modtype_twinax_long:
2155                 return (IFM_10G_TWINAX_LONG);
2156         case phy_modtype_none:
2157                 return (IFM_NONE);
2158         case phy_modtype_unknown:
2159                 return (IFM_UNKNOWN);
2160         }
2161
2162         KASSERT(0, ("%s: modtype %d unknown", __func__, mod));
2163         return (IFM_UNKNOWN);
2164 }
2165
2166 /*
2167  * Rebuilds the ifmedia list for this port, and sets the current media.
2168  */
2169 static void
2170 cxgb_build_medialist(struct port_info *p)
2171 {
2172         struct cphy *phy = &p->phy;
2173         struct ifmedia *media = &p->media;
2174         int mod = phy->modtype;
2175         int m = IFM_ETHER | IFM_FDX;
2176
2177         PORT_LOCK(p);
2178
2179         ifmedia_removeall(media);
2180         if (phy->caps & SUPPORTED_TP && phy->caps & SUPPORTED_Autoneg) {
2181                 /* Copper (RJ45) */
2182
2183                 if (phy->caps & SUPPORTED_10000baseT_Full)
2184                         ifmedia_add(media, m | IFM_10G_T, mod, NULL);
2185
2186                 if (phy->caps & SUPPORTED_1000baseT_Full)
2187                         ifmedia_add(media, m | IFM_1000_T, mod, NULL);
2188
2189                 if (phy->caps & SUPPORTED_100baseT_Full)
2190                         ifmedia_add(media, m | IFM_100_TX, mod, NULL);
2191
2192                 if (phy->caps & SUPPORTED_10baseT_Full)
2193                         ifmedia_add(media, m | IFM_10_T, mod, NULL);
2194
2195                 ifmedia_add(media, IFM_ETHER | IFM_AUTO, mod, NULL);
2196                 ifmedia_set(media, IFM_ETHER | IFM_AUTO);
2197
2198         } else if (phy->caps & SUPPORTED_TP) {
2199                 /* Copper (CX4) */
2200
2201                 KASSERT(phy->caps & SUPPORTED_10000baseT_Full,
2202                         ("%s: unexpected cap 0x%x", __func__, phy->caps));
2203
2204                 ifmedia_add(media, m | IFM_10G_CX4, mod, NULL);
2205                 ifmedia_set(media, m | IFM_10G_CX4);
2206
2207         } else if (phy->caps & SUPPORTED_FIBRE &&
2208                    phy->caps & SUPPORTED_10000baseT_Full) {
2209                 /* 10G optical (but includes SFP+ twinax) */
2210
2211                 m |= cxgb_ifm_type(mod);
2212                 if (IFM_SUBTYPE(m) == IFM_NONE)
2213                         m &= ~IFM_FDX;
2214
2215                 ifmedia_add(media, m, mod, NULL);
2216                 ifmedia_set(media, m);
2217
2218         } else if (phy->caps & SUPPORTED_FIBRE &&
2219                    phy->caps & SUPPORTED_1000baseT_Full) {
2220                 /* 1G optical */
2221
2222                 /* XXX: Lie and claim to be SX, could actually be any 1G-X */
2223                 ifmedia_add(media, m | IFM_1000_SX, mod, NULL);
2224                 ifmedia_set(media, m | IFM_1000_SX);
2225
2226         } else {
2227                 KASSERT(0, ("%s: don't know how to handle 0x%x.", __func__,
2228                             phy->caps));
2229         }
2230
2231         PORT_UNLOCK(p);
2232 }
2233
2234 static void
2235 cxgb_media_status(struct ifnet *ifp, struct ifmediareq *ifmr)
2236 {
2237         struct port_info *p = ifp->if_softc;
2238         struct ifmedia_entry *cur = p->media.ifm_cur;
2239         int speed = p->link_config.speed;
2240
2241         if (cur->ifm_data != p->phy.modtype) {
2242                 cxgb_build_medialist(p);
2243                 cur = p->media.ifm_cur;
2244         }
2245
2246         ifmr->ifm_status = IFM_AVALID;
2247         if (!p->link_config.link_ok)
2248                 return;
2249
2250         ifmr->ifm_status |= IFM_ACTIVE;
2251
2252         /*
2253          * active and current will differ iff current media is autoselect.  That
2254          * can happen only for copper RJ45.
2255          */
2256         if (IFM_SUBTYPE(cur->ifm_media) != IFM_AUTO)
2257                 return;
2258         KASSERT(p->phy.caps & SUPPORTED_TP && p->phy.caps & SUPPORTED_Autoneg,
2259                 ("%s: unexpected PHY caps 0x%x", __func__, p->phy.caps));
2260
2261         ifmr->ifm_active = IFM_ETHER | IFM_FDX;
2262         if (speed == SPEED_10000)
2263                 ifmr->ifm_active |= IFM_10G_T;
2264         else if (speed == SPEED_1000)
2265                 ifmr->ifm_active |= IFM_1000_T;
2266         else if (speed == SPEED_100)
2267                 ifmr->ifm_active |= IFM_100_TX;
2268         else if (speed == SPEED_10)
2269                 ifmr->ifm_active |= IFM_10_T;
2270         else
2271                 KASSERT(0, ("%s: link up but speed unknown (%u)", __func__,
2272                             speed));
2273 }
2274
2275 static void
2276 cxgb_async_intr(void *data)
2277 {
2278         adapter_t *sc = data;
2279
2280         if (cxgb_debug)
2281                 device_printf(sc->dev, "cxgb_async_intr\n");
2282         /*
2283          * May need to sleep - defer to taskqueue
2284          */
2285         taskqueue_enqueue(sc->tq, &sc->slow_intr_task);
2286 }
2287
2288 static void
2289 cxgb_ext_intr_handler(void *arg, int count)
2290 {
2291         adapter_t *sc = (adapter_t *)arg;
2292
2293         if (cxgb_debug)
2294                 printf("cxgb_ext_intr_handler\n");
2295
2296         t3_phy_intr_handler(sc);
2297
2298         /* Now reenable external interrupts */
2299         ADAPTER_LOCK(sc);
2300         if (sc->slow_intr_mask) {
2301                 sc->slow_intr_mask |= F_T3DBG;
2302                 t3_write_reg(sc, A_PL_INT_CAUSE0, F_T3DBG);
2303                 t3_write_reg(sc, A_PL_INT_ENABLE0, sc->slow_intr_mask);
2304         }
2305         ADAPTER_UNLOCK(sc);
2306 }
2307
2308 static inline int
2309 link_poll_needed(struct port_info *p)
2310 {
2311         struct cphy *phy = &p->phy;
2312
2313         if (phy->caps & POLL_LINK_1ST_TIME) {
2314                 p->phy.caps &= ~POLL_LINK_1ST_TIME;
2315                 return (1);
2316         }
2317
2318         return (p->link_fault || !(phy->caps & SUPPORTED_LINK_IRQ));
2319 }
2320
2321 static void
2322 check_link_status(adapter_t *sc)
2323 {
2324         int i;
2325
2326         for (i = 0; i < (sc)->params.nports; ++i) {
2327                 struct port_info *p = &sc->port[i];
2328
2329                 if (!isset(&sc->open_device_map, p->port_id))
2330                         continue;
2331
2332                 if (link_poll_needed(p))
2333                         t3_link_changed(sc, i);
2334         }
2335 }
2336
2337 static void
2338 check_t3b2_mac(struct adapter *sc)
2339 {
2340         int i;
2341
2342         if (sc->flags & CXGB_SHUTDOWN)
2343                 return;
2344
2345         for_each_port(sc, i) {
2346                 struct port_info *p = &sc->port[i];
2347                 int status;
2348 #ifdef INVARIANTS
2349                 struct ifnet *ifp = p->ifp;
2350 #endif          
2351
2352                 if (!isset(&sc->open_device_map, p->port_id) || p->link_fault ||
2353                     !p->link_config.link_ok)
2354                         continue;
2355
2356                 KASSERT(ifp->if_drv_flags & IFF_DRV_RUNNING,
2357                         ("%s: state mismatch (drv_flags %x, device_map %x)",
2358                          __func__, ifp->if_drv_flags, sc->open_device_map));
2359
2360                 PORT_LOCK(p);
2361                 status = t3b2_mac_watchdog_task(&p->mac);
2362                 if (status == 1)
2363                         p->mac.stats.num_toggled++;
2364                 else if (status == 2) {
2365                         struct cmac *mac = &p->mac;
2366
2367                         cxgb_update_mac_settings(p);
2368                         t3_link_start(&p->phy, mac, &p->link_config);
2369                         t3_mac_enable(mac, MAC_DIRECTION_RX | MAC_DIRECTION_TX);
2370                         t3_port_intr_enable(sc, p->port_id);
2371                         p->mac.stats.num_resets++;
2372                 }
2373                 PORT_UNLOCK(p);
2374         }
2375 }
2376
2377 static void
2378 cxgb_tick(void *arg)
2379 {
2380         adapter_t *sc = (adapter_t *)arg;
2381
2382         if (sc->flags & CXGB_SHUTDOWN)
2383                 return;
2384
2385         taskqueue_enqueue(sc->tq, &sc->tick_task);      
2386         callout_reset(&sc->cxgb_tick_ch, CXGB_TICKS(sc), cxgb_tick, sc);
2387 }
2388
2389 static void
2390 cxgb_tick_handler(void *arg, int count)
2391 {
2392         adapter_t *sc = (adapter_t *)arg;
2393         const struct adapter_params *p = &sc->params;
2394         int i;
2395         uint32_t cause, reset;
2396
2397         if (sc->flags & CXGB_SHUTDOWN || !(sc->flags & FULL_INIT_DONE))
2398                 return;
2399
2400         check_link_status(sc);
2401
2402         if (p->rev == T3_REV_B2 && p->nports < 4 && sc->open_device_map) 
2403                 check_t3b2_mac(sc);
2404
2405         cause = t3_read_reg(sc, A_SG_INT_CAUSE) & (F_RSPQSTARVE | F_FLEMPTY);
2406         if (cause) {
2407                 struct sge_qset *qs = &sc->sge.qs[0];
2408                 uint32_t mask, v;
2409
2410                 v = t3_read_reg(sc, A_SG_RSPQ_FL_STATUS) & ~0xff00;
2411
2412                 mask = 1;
2413                 for (i = 0; i < SGE_QSETS; i++) {
2414                         if (v & mask)
2415                                 qs[i].rspq.starved++;
2416                         mask <<= 1;
2417                 }
2418
2419                 mask <<= SGE_QSETS; /* skip RSPQXDISABLED */
2420
2421                 for (i = 0; i < SGE_QSETS * 2; i++) {
2422                         if (v & mask) {
2423                                 qs[i / 2].fl[i % 2].empty++;
2424                         }
2425                         mask <<= 1;
2426                 }
2427
2428                 /* clear */
2429                 t3_write_reg(sc, A_SG_RSPQ_FL_STATUS, v);
2430                 t3_write_reg(sc, A_SG_INT_CAUSE, cause);
2431         }
2432
2433         for (i = 0; i < sc->params.nports; i++) {
2434                 struct port_info *pi = &sc->port[i];
2435                 struct ifnet *ifp = pi->ifp;
2436                 struct cmac *mac = &pi->mac;
2437                 struct mac_stats *mstats = &mac->stats;
2438                 int drops, j;
2439
2440                 if (!isset(&sc->open_device_map, pi->port_id))
2441                         continue;
2442
2443                 PORT_LOCK(pi);
2444                 t3_mac_update_stats(mac);
2445                 PORT_UNLOCK(pi);
2446
2447                 ifp->if_opackets = mstats->tx_frames;
2448                 ifp->if_ipackets = mstats->rx_frames;
2449                 ifp->if_obytes = mstats->tx_octets;
2450                 ifp->if_ibytes = mstats->rx_octets;
2451                 ifp->if_omcasts = mstats->tx_mcast_frames;
2452                 ifp->if_imcasts = mstats->rx_mcast_frames;
2453                 ifp->if_collisions = mstats->tx_total_collisions;
2454                 ifp->if_iqdrops = mstats->rx_cong_drops;
2455
2456                 drops = 0;
2457                 for (j = pi->first_qset; j < pi->first_qset + pi->nqsets; j++)
2458                         drops += sc->sge.qs[j].txq[TXQ_ETH].txq_mr->br_drops;
2459                 ifp->if_snd.ifq_drops = drops;
2460
2461                 ifp->if_oerrors =
2462                     mstats->tx_excess_collisions +
2463                     mstats->tx_underrun +
2464                     mstats->tx_len_errs +
2465                     mstats->tx_mac_internal_errs +
2466                     mstats->tx_excess_deferral +
2467                     mstats->tx_fcs_errs;
2468                 ifp->if_ierrors =
2469                     mstats->rx_jabber +
2470                     mstats->rx_data_errs +
2471                     mstats->rx_sequence_errs +
2472                     mstats->rx_runt + 
2473                     mstats->rx_too_long +
2474                     mstats->rx_mac_internal_errs +
2475                     mstats->rx_short +
2476                     mstats->rx_fcs_errs;
2477
2478                 if (mac->multiport)
2479                         continue;
2480
2481                 /* Count rx fifo overflows, once per second */
2482                 cause = t3_read_reg(sc, A_XGM_INT_CAUSE + mac->offset);
2483                 reset = 0;
2484                 if (cause & F_RXFIFO_OVERFLOW) {
2485                         mac->stats.rx_fifo_ovfl++;
2486                         reset |= F_RXFIFO_OVERFLOW;
2487                 }
2488                 t3_write_reg(sc, A_XGM_INT_CAUSE + mac->offset, reset);
2489         }
2490 }
2491
2492 static void
2493 touch_bars(device_t dev)
2494 {
2495         /*
2496          * Don't enable yet
2497          */
2498 #if !defined(__LP64__) && 0
2499         u32 v;
2500
2501         pci_read_config_dword(pdev, PCI_BASE_ADDRESS_1, &v);
2502         pci_write_config_dword(pdev, PCI_BASE_ADDRESS_1, v);
2503         pci_read_config_dword(pdev, PCI_BASE_ADDRESS_3, &v);
2504         pci_write_config_dword(pdev, PCI_BASE_ADDRESS_3, v);
2505         pci_read_config_dword(pdev, PCI_BASE_ADDRESS_5, &v);
2506         pci_write_config_dword(pdev, PCI_BASE_ADDRESS_5, v);
2507 #endif
2508 }
2509
2510 static int
2511 set_eeprom(struct port_info *pi, const uint8_t *data, int len, int offset)
2512 {
2513         uint8_t *buf;
2514         int err = 0;
2515         u32 aligned_offset, aligned_len, *p;
2516         struct adapter *adapter = pi->adapter;
2517
2518
2519         aligned_offset = offset & ~3;
2520         aligned_len = (len + (offset & 3) + 3) & ~3;
2521
2522         if (aligned_offset != offset || aligned_len != len) {
2523                 buf = malloc(aligned_len, M_DEVBUF, M_WAITOK|M_ZERO);              
2524                 if (!buf)
2525                         return (ENOMEM);
2526                 err = t3_seeprom_read(adapter, aligned_offset, (u32 *)buf);
2527                 if (!err && aligned_len > 4)
2528                         err = t3_seeprom_read(adapter,
2529                                               aligned_offset + aligned_len - 4,
2530                                               (u32 *)&buf[aligned_len - 4]);
2531                 if (err)
2532                         goto out;
2533                 memcpy(buf + (offset & 3), data, len);
2534         } else
2535                 buf = (uint8_t *)(uintptr_t)data;
2536
2537         err = t3_seeprom_wp(adapter, 0);
2538         if (err)
2539                 goto out;
2540
2541         for (p = (u32 *)buf; !err && aligned_len; aligned_len -= 4, p++) {
2542                 err = t3_seeprom_write(adapter, aligned_offset, *p);
2543                 aligned_offset += 4;
2544         }
2545
2546         if (!err)
2547                 err = t3_seeprom_wp(adapter, 1);
2548 out:
2549         if (buf != data)
2550                 free(buf, M_DEVBUF);
2551         return err;
2552 }
2553
2554
2555 static int
2556 in_range(int val, int lo, int hi)
2557 {
2558         return val < 0 || (val <= hi && val >= lo);
2559 }
2560
2561 static int
2562 cxgb_extension_open(struct cdev *dev, int flags, int fmp, struct thread *td)
2563 {
2564        return (0);
2565 }
2566
2567 static int
2568 cxgb_extension_close(struct cdev *dev, int flags, int fmt, struct thread *td)
2569 {
2570        return (0);
2571 }
2572
2573 static int
2574 cxgb_extension_ioctl(struct cdev *dev, unsigned long cmd, caddr_t data,
2575     int fflag, struct thread *td)
2576 {
2577         int mmd, error = 0;
2578         struct port_info *pi = dev->si_drv1;
2579         adapter_t *sc = pi->adapter;
2580
2581 #ifdef PRIV_SUPPORTED   
2582         if (priv_check(td, PRIV_DRIVER)) {
2583                 if (cxgb_debug) 
2584                         printf("user does not have access to privileged ioctls\n");
2585                 return (EPERM);
2586         }
2587 #else
2588         if (suser(td)) {
2589                 if (cxgb_debug)
2590                         printf("user does not have access to privileged ioctls\n");
2591                 return (EPERM);
2592         }
2593 #endif
2594         
2595         switch (cmd) {
2596         case CHELSIO_GET_MIIREG: {
2597                 uint32_t val;
2598                 struct cphy *phy = &pi->phy;
2599                 struct ch_mii_data *mid = (struct ch_mii_data *)data;
2600                 
2601                 if (!phy->mdio_read)
2602                         return (EOPNOTSUPP);
2603                 if (is_10G(sc)) {
2604                         mmd = mid->phy_id >> 8;
2605                         if (!mmd)
2606                                 mmd = MDIO_DEV_PCS;
2607                         else if (mmd > MDIO_DEV_VEND2)
2608                                 return (EINVAL);
2609
2610                         error = phy->mdio_read(sc, mid->phy_id & 0x1f, mmd,
2611                                              mid->reg_num, &val);
2612                 } else
2613                         error = phy->mdio_read(sc, mid->phy_id & 0x1f, 0,
2614                                              mid->reg_num & 0x1f, &val);
2615                 if (error == 0)
2616                         mid->val_out = val;
2617                 break;
2618         }
2619         case CHELSIO_SET_MIIREG: {
2620                 struct cphy *phy = &pi->phy;
2621                 struct ch_mii_data *mid = (struct ch_mii_data *)data;
2622
2623                 if (!phy->mdio_write)
2624                         return (EOPNOTSUPP);
2625                 if (is_10G(sc)) {
2626                         mmd = mid->phy_id >> 8;
2627                         if (!mmd)
2628                                 mmd = MDIO_DEV_PCS;
2629                         else if (mmd > MDIO_DEV_VEND2)
2630                                 return (EINVAL);
2631                         
2632                         error = phy->mdio_write(sc, mid->phy_id & 0x1f,
2633                                               mmd, mid->reg_num, mid->val_in);
2634                 } else
2635                         error = phy->mdio_write(sc, mid->phy_id & 0x1f, 0,
2636                                               mid->reg_num & 0x1f,
2637                                               mid->val_in);
2638                 break;
2639         }
2640         case CHELSIO_SETREG: {
2641                 struct ch_reg *edata = (struct ch_reg *)data;
2642                 if ((edata->addr & 0x3) != 0 || edata->addr >= sc->mmio_len)
2643                         return (EFAULT);
2644                 t3_write_reg(sc, edata->addr, edata->val);
2645                 break;
2646         }
2647         case CHELSIO_GETREG: {
2648                 struct ch_reg *edata = (struct ch_reg *)data;
2649                 if ((edata->addr & 0x3) != 0 || edata->addr >= sc->mmio_len)
2650                         return (EFAULT);
2651                 edata->val = t3_read_reg(sc, edata->addr);
2652                 break;
2653         }
2654         case CHELSIO_GET_SGE_CONTEXT: {
2655                 struct ch_cntxt *ecntxt = (struct ch_cntxt *)data;
2656                 mtx_lock_spin(&sc->sge.reg_lock);
2657                 switch (ecntxt->cntxt_type) {
2658                 case CNTXT_TYPE_EGRESS:
2659                         error = -t3_sge_read_ecntxt(sc, ecntxt->cntxt_id,
2660                             ecntxt->data);
2661                         break;
2662                 case CNTXT_TYPE_FL:
2663                         error = -t3_sge_read_fl(sc, ecntxt->cntxt_id,
2664                             ecntxt->data);
2665                         break;
2666                 case CNTXT_TYPE_RSP:
2667                         error = -t3_sge_read_rspq(sc, ecntxt->cntxt_id,
2668                             ecntxt->data);
2669                         break;
2670                 case CNTXT_TYPE_CQ:
2671                         error = -t3_sge_read_cq(sc, ecntxt->cntxt_id,
2672                             ecntxt->data);
2673                         break;
2674                 default:
2675                         error = EINVAL;
2676                         break;
2677                 }
2678                 mtx_unlock_spin(&sc->sge.reg_lock);
2679                 break;
2680         }
2681         case CHELSIO_GET_SGE_DESC: {
2682                 struct ch_desc *edesc = (struct ch_desc *)data;
2683                 int ret;
2684                 if (edesc->queue_num >= SGE_QSETS * 6)
2685                         return (EINVAL);
2686                 ret = t3_get_desc(&sc->sge.qs[edesc->queue_num / 6],
2687                     edesc->queue_num % 6, edesc->idx, edesc->data);
2688                 if (ret < 0)
2689                         return (EINVAL);
2690                 edesc->size = ret;
2691                 break;
2692         }
2693         case CHELSIO_GET_QSET_PARAMS: {
2694                 struct qset_params *q;
2695                 struct ch_qset_params *t = (struct ch_qset_params *)data;
2696                 int q1 = pi->first_qset;
2697                 int nqsets = pi->nqsets;
2698                 int i;
2699
2700                 if (t->qset_idx >= nqsets)
2701                         return EINVAL;
2702
2703                 i = q1 + t->qset_idx;
2704                 q = &sc->params.sge.qset[i];
2705                 t->rspq_size   = q->rspq_size;
2706                 t->txq_size[0] = q->txq_size[0];
2707                 t->txq_size[1] = q->txq_size[1];
2708                 t->txq_size[2] = q->txq_size[2];
2709                 t->fl_size[0]  = q->fl_size;
2710                 t->fl_size[1]  = q->jumbo_size;
2711                 t->polling     = q->polling;
2712                 t->lro         = q->lro;
2713                 t->intr_lat    = q->coalesce_usecs;
2714                 t->cong_thres  = q->cong_thres;
2715                 t->qnum        = i;
2716
2717                 if ((sc->flags & FULL_INIT_DONE) == 0)
2718                         t->vector = 0;
2719                 else if (sc->flags & USING_MSIX)
2720                         t->vector = rman_get_start(sc->msix_irq_res[i]);
2721                 else
2722                         t->vector = rman_get_start(sc->irq_res);
2723
2724                 break;
2725         }
2726         case CHELSIO_GET_QSET_NUM: {
2727                 struct ch_reg *edata = (struct ch_reg *)data;
2728                 edata->val = pi->nqsets;
2729                 break;
2730         }
2731         case CHELSIO_LOAD_FW: {
2732                 uint8_t *fw_data;
2733                 uint32_t vers;
2734                 struct ch_mem_range *t = (struct ch_mem_range *)data;
2735
2736                 /*
2737                  * You're allowed to load a firmware only before FULL_INIT_DONE
2738                  *
2739                  * FW_UPTODATE is also set so the rest of the initialization
2740                  * will not overwrite what was loaded here.  This gives you the
2741                  * flexibility to load any firmware (and maybe shoot yourself in
2742                  * the foot).
2743                  */
2744
2745                 ADAPTER_LOCK(sc);
2746                 if (sc->open_device_map || sc->flags & FULL_INIT_DONE) {
2747                         ADAPTER_UNLOCK(sc);
2748                         return (EBUSY);
2749                 }
2750
2751                 fw_data = malloc(t->len, M_DEVBUF, M_NOWAIT);
2752                 if (!fw_data)
2753                         error = ENOMEM;
2754                 else
2755                         error = copyin(t->buf, fw_data, t->len);
2756
2757                 if (!error)
2758                         error = -t3_load_fw(sc, fw_data, t->len);
2759
2760                 if (t3_get_fw_version(sc, &vers) == 0) {
2761                         snprintf(&sc->fw_version[0], sizeof(sc->fw_version),
2762                             "%d.%d.%d", G_FW_VERSION_MAJOR(vers),
2763                             G_FW_VERSION_MINOR(vers), G_FW_VERSION_MICRO(vers));
2764                 }
2765
2766                 if (!error)
2767                         sc->flags |= FW_UPTODATE;
2768
2769                 free(fw_data, M_DEVBUF);
2770                 ADAPTER_UNLOCK(sc);
2771                 break;
2772         }
2773         case CHELSIO_LOAD_BOOT: {
2774                 uint8_t *boot_data;
2775                 struct ch_mem_range *t = (struct ch_mem_range *)data;
2776
2777                 boot_data = malloc(t->len, M_DEVBUF, M_NOWAIT);
2778                 if (!boot_data)
2779                         return ENOMEM;
2780
2781                 error = copyin(t->buf, boot_data, t->len);
2782                 if (!error)
2783                         error = -t3_load_boot(sc, boot_data, t->len);
2784
2785                 free(boot_data, M_DEVBUF);
2786                 break;
2787         }
2788         case CHELSIO_GET_PM: {
2789                 struct ch_pm *m = (struct ch_pm *)data;
2790                 struct tp_params *p = &sc->params.tp;
2791
2792                 if (!is_offload(sc))
2793                         return (EOPNOTSUPP);
2794
2795                 m->tx_pg_sz = p->tx_pg_size;
2796                 m->tx_num_pg = p->tx_num_pgs;
2797                 m->rx_pg_sz  = p->rx_pg_size;
2798                 m->rx_num_pg = p->rx_num_pgs;
2799                 m->pm_total  = p->pmtx_size + p->chan_rx_size * p->nchan;
2800
2801                 break;
2802         }
2803         case CHELSIO_SET_PM: {
2804                 struct ch_pm *m = (struct ch_pm *)data;
2805                 struct tp_params *p = &sc->params.tp;
2806
2807                 if (!is_offload(sc))
2808                         return (EOPNOTSUPP);
2809                 if (sc->flags & FULL_INIT_DONE)
2810                         return (EBUSY);
2811
2812                 if (!m->rx_pg_sz || (m->rx_pg_sz & (m->rx_pg_sz - 1)) ||
2813                     !m->tx_pg_sz || (m->tx_pg_sz & (m->tx_pg_sz - 1)))
2814                         return (EINVAL);        /* not power of 2 */
2815                 if (!(m->rx_pg_sz & 0x14000))
2816                         return (EINVAL);        /* not 16KB or 64KB */
2817                 if (!(m->tx_pg_sz & 0x1554000))
2818                         return (EINVAL);
2819                 if (m->tx_num_pg == -1)
2820                         m->tx_num_pg = p->tx_num_pgs;
2821                 if (m->rx_num_pg == -1)
2822                         m->rx_num_pg = p->rx_num_pgs;
2823                 if (m->tx_num_pg % 24 || m->rx_num_pg % 24)
2824                         return (EINVAL);
2825                 if (m->rx_num_pg * m->rx_pg_sz > p->chan_rx_size ||
2826                     m->tx_num_pg * m->tx_pg_sz > p->chan_tx_size)
2827                         return (EINVAL);
2828
2829                 p->rx_pg_size = m->rx_pg_sz;
2830                 p->tx_pg_size = m->tx_pg_sz;
2831                 p->rx_num_pgs = m->rx_num_pg;
2832                 p->tx_num_pgs = m->tx_num_pg;
2833                 break;
2834         }
2835         case CHELSIO_SETMTUTAB: {
2836                 struct ch_mtus *m = (struct ch_mtus *)data;
2837                 int i;
2838                 
2839                 if (!is_offload(sc))
2840                         return (EOPNOTSUPP);
2841                 if (offload_running(sc))
2842                         return (EBUSY);
2843                 if (m->nmtus != NMTUS)
2844                         return (EINVAL);
2845                 if (m->mtus[0] < 81)         /* accommodate SACK */
2846                         return (EINVAL);
2847                 
2848                 /*
2849                  * MTUs must be in ascending order
2850                  */
2851                 for (i = 1; i < NMTUS; ++i)
2852                         if (m->mtus[i] < m->mtus[i - 1])
2853                                 return (EINVAL);
2854
2855                 memcpy(sc->params.mtus, m->mtus, sizeof(sc->params.mtus));
2856                 break;
2857         }
2858         case CHELSIO_GETMTUTAB: {
2859                 struct ch_mtus *m = (struct ch_mtus *)data;
2860
2861                 if (!is_offload(sc))
2862                         return (EOPNOTSUPP);
2863
2864                 memcpy(m->mtus, sc->params.mtus, sizeof(m->mtus));
2865                 m->nmtus = NMTUS;
2866                 break;
2867         }
2868         case CHELSIO_GET_MEM: {
2869                 struct ch_mem_range *t = (struct ch_mem_range *)data;
2870                 struct mc7 *mem;
2871                 uint8_t *useraddr;
2872                 u64 buf[32];
2873
2874                 /*
2875                  * Use these to avoid modifying len/addr in the the return
2876                  * struct
2877                  */
2878                 uint32_t len = t->len, addr = t->addr;
2879
2880                 if (!is_offload(sc))
2881                         return (EOPNOTSUPP);
2882                 if (!(sc->flags & FULL_INIT_DONE))
2883                         return (EIO);         /* need the memory controllers */
2884                 if ((addr & 0x7) || (len & 0x7))
2885                         return (EINVAL);
2886                 if (t->mem_id == MEM_CM)
2887                         mem = &sc->cm;
2888                 else if (t->mem_id == MEM_PMRX)
2889                         mem = &sc->pmrx;
2890                 else if (t->mem_id == MEM_PMTX)
2891                         mem = &sc->pmtx;
2892                 else
2893                         return (EINVAL);
2894
2895                 /*
2896                  * Version scheme:
2897                  * bits 0..9: chip version
2898                  * bits 10..15: chip revision
2899                  */
2900                 t->version = 3 | (sc->params.rev << 10);
2901                 
2902                 /*
2903                  * Read 256 bytes at a time as len can be large and we don't
2904                  * want to use huge intermediate buffers.
2905                  */
2906                 useraddr = (uint8_t *)t->buf; 
2907                 while (len) {
2908                         unsigned int chunk = min(len, sizeof(buf));
2909
2910                         error = t3_mc7_bd_read(mem, addr / 8, chunk / 8, buf);
2911                         if (error)
2912                                 return (-error);
2913                         if (copyout(buf, useraddr, chunk))
2914                                 return (EFAULT);
2915                         useraddr += chunk;
2916                         addr += chunk;
2917                         len -= chunk;
2918                 }
2919                 break;
2920         }
2921         case CHELSIO_READ_TCAM_WORD: {
2922                 struct ch_tcam_word *t = (struct ch_tcam_word *)data;
2923
2924                 if (!is_offload(sc))
2925                         return (EOPNOTSUPP);
2926                 if (!(sc->flags & FULL_INIT_DONE))
2927                         return (EIO);         /* need MC5 */            
2928                 return -t3_read_mc5_range(&sc->mc5, t->addr, 1, t->buf);
2929                 break;
2930         }
2931         case CHELSIO_SET_TRACE_FILTER: {
2932                 struct ch_trace *t = (struct ch_trace *)data;
2933                 const struct trace_params *tp;
2934
2935                 tp = (const struct trace_params *)&t->sip;
2936                 if (t->config_tx)
2937                         t3_config_trace_filter(sc, tp, 0, t->invert_match,
2938                                                t->trace_tx);
2939                 if (t->config_rx)
2940                         t3_config_trace_filter(sc, tp, 1, t->invert_match,
2941                                                t->trace_rx);
2942                 break;
2943         }
2944         case CHELSIO_SET_PKTSCHED: {
2945                 struct ch_pktsched_params *p = (struct ch_pktsched_params *)data;
2946                 if (sc->open_device_map == 0)
2947                         return (EAGAIN);
2948                 send_pktsched_cmd(sc, p->sched, p->idx, p->min, p->max,
2949                     p->binding);
2950                 break;
2951         }
2952         case CHELSIO_IFCONF_GETREGS: {
2953                 struct ch_ifconf_regs *regs = (struct ch_ifconf_regs *)data;
2954                 int reglen = cxgb_get_regs_len();
2955                 uint8_t *buf = malloc(reglen, M_DEVBUF, M_NOWAIT);
2956                 if (buf == NULL) {
2957                         return (ENOMEM);
2958                 }
2959                 if (regs->len > reglen)
2960                         regs->len = reglen;
2961                 else if (regs->len < reglen)
2962                         error = ENOBUFS;
2963
2964                 if (!error) {
2965                         cxgb_get_regs(sc, regs, buf);
2966                         error = copyout(buf, regs->data, reglen);
2967                 }
2968                 free(buf, M_DEVBUF);
2969
2970                 break;
2971         }
2972         case CHELSIO_SET_HW_SCHED: {
2973                 struct ch_hw_sched *t = (struct ch_hw_sched *)data;
2974                 unsigned int ticks_per_usec = core_ticks_per_usec(sc);
2975
2976                 if ((sc->flags & FULL_INIT_DONE) == 0)
2977                         return (EAGAIN);       /* need TP to be initialized */
2978                 if (t->sched >= NTX_SCHED || !in_range(t->mode, 0, 1) ||
2979                     !in_range(t->channel, 0, 1) ||
2980                     !in_range(t->kbps, 0, 10000000) ||
2981                     !in_range(t->class_ipg, 0, 10000 * 65535 / ticks_per_usec) ||
2982                     !in_range(t->flow_ipg, 0,
2983                               dack_ticks_to_usec(sc, 0x7ff)))
2984                         return (EINVAL);
2985
2986                 if (t->kbps >= 0) {
2987                         error = t3_config_sched(sc, t->kbps, t->sched);
2988                         if (error < 0)
2989                                 return (-error);
2990                 }
2991                 if (t->class_ipg >= 0)
2992                         t3_set_sched_ipg(sc, t->sched, t->class_ipg);
2993                 if (t->flow_ipg >= 0) {
2994                         t->flow_ipg *= 1000;     /* us -> ns */
2995                         t3_set_pace_tbl(sc, &t->flow_ipg, t->sched, 1);
2996                 }
2997                 if (t->mode >= 0) {
2998                         int bit = 1 << (S_TX_MOD_TIMER_MODE + t->sched);
2999
3000                         t3_set_reg_field(sc, A_TP_TX_MOD_QUEUE_REQ_MAP,
3001                                          bit, t->mode ? bit : 0);
3002                 }
3003                 if (t->channel >= 0)
3004                         t3_set_reg_field(sc, A_TP_TX_MOD_QUEUE_REQ_MAP,
3005                                          1 << t->sched, t->channel << t->sched);
3006                 break;
3007         }
3008         case CHELSIO_GET_EEPROM: {
3009                 int i;
3010                 struct ch_eeprom *e = (struct ch_eeprom *)data;
3011                 uint8_t *buf = malloc(EEPROMSIZE, M_DEVBUF, M_NOWAIT);
3012
3013                 if (buf == NULL) {
3014                         return (ENOMEM);
3015                 }
3016                 e->magic = EEPROM_MAGIC;
3017                 for (i = e->offset & ~3; !error && i < e->offset + e->len; i += 4)
3018                         error = -t3_seeprom_read(sc, i, (uint32_t *)&buf[i]);
3019
3020                 if (!error)
3021                         error = copyout(buf + e->offset, e->data, e->len);
3022
3023                 free(buf, M_DEVBUF);
3024                 break;
3025         }
3026         case CHELSIO_CLEAR_STATS: {
3027                 if (!(sc->flags & FULL_INIT_DONE))
3028                         return EAGAIN;
3029
3030                 PORT_LOCK(pi);
3031                 t3_mac_update_stats(&pi->mac);
3032                 memset(&pi->mac.stats, 0, sizeof(pi->mac.stats));
3033                 PORT_UNLOCK(pi);
3034                 break;
3035         }
3036         case CHELSIO_GET_UP_LA: {
3037                 struct ch_up_la *la = (struct ch_up_la *)data;
3038                 uint8_t *buf = malloc(LA_BUFSIZE, M_DEVBUF, M_NOWAIT);
3039                 if (buf == NULL) {
3040                         return (ENOMEM);
3041                 }
3042                 if (la->bufsize < LA_BUFSIZE)
3043                         error = ENOBUFS;
3044
3045                 if (!error)
3046                         error = -t3_get_up_la(sc, &la->stopped, &la->idx,
3047                                               &la->bufsize, buf);
3048                 if (!error)
3049                         error = copyout(buf, la->data, la->bufsize);
3050
3051                 free(buf, M_DEVBUF);
3052                 break;
3053         }
3054         case CHELSIO_GET_UP_IOQS: {
3055                 struct ch_up_ioqs *ioqs = (struct ch_up_ioqs *)data;
3056                 uint8_t *buf = malloc(IOQS_BUFSIZE, M_DEVBUF, M_NOWAIT);
3057                 uint32_t *v;
3058
3059                 if (buf == NULL) {
3060                         return (ENOMEM);
3061                 }
3062                 if (ioqs->bufsize < IOQS_BUFSIZE)
3063                         error = ENOBUFS;
3064
3065                 if (!error)
3066                         error = -t3_get_up_ioqs(sc, &ioqs->bufsize, buf);
3067
3068                 if (!error) {
3069                         v = (uint32_t *)buf;
3070
3071                         ioqs->bufsize -= 4 * sizeof(uint32_t);
3072                         ioqs->ioq_rx_enable = *v++;
3073                         ioqs->ioq_tx_enable = *v++;
3074                         ioqs->ioq_rx_status = *v++;
3075                         ioqs->ioq_tx_status = *v++;
3076
3077                         error = copyout(v, ioqs->data, ioqs->bufsize);
3078                 }
3079
3080                 free(buf, M_DEVBUF);
3081                 break;
3082         }
3083         default:
3084                 return (EOPNOTSUPP);
3085                 break;
3086         }
3087
3088         return (error);
3089 }
3090
3091 static __inline void
3092 reg_block_dump(struct adapter *ap, uint8_t *buf, unsigned int start,
3093     unsigned int end)
3094 {
3095         uint32_t *p = (uint32_t *)(buf + start);
3096
3097         for ( ; start <= end; start += sizeof(uint32_t))
3098                 *p++ = t3_read_reg(ap, start);
3099 }
3100
3101 #define T3_REGMAP_SIZE (3 * 1024)
3102 static int
3103 cxgb_get_regs_len(void)
3104 {
3105         return T3_REGMAP_SIZE;
3106 }
3107
3108 static void
3109 cxgb_get_regs(adapter_t *sc, struct ch_ifconf_regs *regs, uint8_t *buf)
3110 {           
3111         
3112         /*
3113          * Version scheme:
3114          * bits 0..9: chip version
3115          * bits 10..15: chip revision
3116          * bit 31: set for PCIe cards
3117          */
3118         regs->version = 3 | (sc->params.rev << 10) | (is_pcie(sc) << 31);
3119
3120         /*
3121          * We skip the MAC statistics registers because they are clear-on-read.
3122          * Also reading multi-register stats would need to synchronize with the
3123          * periodic mac stats accumulation.  Hard to justify the complexity.
3124          */
3125         memset(buf, 0, cxgb_get_regs_len());
3126         reg_block_dump(sc, buf, 0, A_SG_RSPQ_CREDIT_RETURN);
3127         reg_block_dump(sc, buf, A_SG_HI_DRB_HI_THRSH, A_ULPRX_PBL_ULIMIT);
3128         reg_block_dump(sc, buf, A_ULPTX_CONFIG, A_MPS_INT_CAUSE);
3129         reg_block_dump(sc, buf, A_CPL_SWITCH_CNTRL, A_CPL_MAP_TBL_DATA);
3130         reg_block_dump(sc, buf, A_SMB_GLOBAL_TIME_CFG, A_XGM_SERDES_STAT3);
3131         reg_block_dump(sc, buf, A_XGM_SERDES_STATUS0,
3132                        XGM_REG(A_XGM_SERDES_STAT3, 1));
3133         reg_block_dump(sc, buf, XGM_REG(A_XGM_SERDES_STATUS0, 1),
3134                        XGM_REG(A_XGM_RX_SPI4_SOP_EOP_CNT, 1));
3135 }
3136
3137
3138 MODULE_DEPEND(if_cxgb, cxgb_t3fw, 1, 1, 1);