]> CyberLeo.Net >> Repos - FreeBSD/releng/8.2.git/blob - sys/dev/cxgb/cxgb_main.c
Copy stable/8 to releng/8.2 in preparation for FreeBSD-8.2 release.
[FreeBSD/releng/8.2.git] / sys / dev / cxgb / cxgb_main.c
1 /**************************************************************************
2
3 Copyright (c) 2007-2009, Chelsio Inc.
4 All rights reserved.
5
6 Redistribution and use in source and binary forms, with or without
7 modification, are permitted provided that the following conditions are met:
8
9  1. Redistributions of source code must retain the above copyright notice,
10     this list of conditions and the following disclaimer.
11
12  2. Neither the name of the Chelsio Corporation nor the names of its
13     contributors may be used to endorse or promote products derived from
14     this software without specific prior written permission.
15
16 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
17 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
20 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
21 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
22 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
23 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
24 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
25 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
26 POSSIBILITY OF SUCH DAMAGE.
27
28 ***************************************************************************/
29
30 #include <sys/cdefs.h>
31 __FBSDID("$FreeBSD$");
32
33 #include <sys/param.h>
34 #include <sys/systm.h>
35 #include <sys/kernel.h>
36 #include <sys/bus.h>
37 #include <sys/module.h>
38 #include <sys/pciio.h>
39 #include <sys/conf.h>
40 #include <machine/bus.h>
41 #include <machine/resource.h>
42 #include <sys/bus_dma.h>
43 #include <sys/ktr.h>
44 #include <sys/rman.h>
45 #include <sys/ioccom.h>
46 #include <sys/mbuf.h>
47 #include <sys/linker.h>
48 #include <sys/firmware.h>
49 #include <sys/socket.h>
50 #include <sys/sockio.h>
51 #include <sys/smp.h>
52 #include <sys/sysctl.h>
53 #include <sys/syslog.h>
54 #include <sys/queue.h>
55 #include <sys/taskqueue.h>
56 #include <sys/proc.h>
57
58 #include <net/bpf.h>
59 #include <net/ethernet.h>
60 #include <net/if.h>
61 #include <net/if_arp.h>
62 #include <net/if_dl.h>
63 #include <net/if_media.h>
64 #include <net/if_types.h>
65 #include <net/if_vlan_var.h>
66
67 #include <netinet/in_systm.h>
68 #include <netinet/in.h>
69 #include <netinet/if_ether.h>
70 #include <netinet/ip.h>
71 #include <netinet/ip.h>
72 #include <netinet/tcp.h>
73 #include <netinet/udp.h>
74
75 #include <dev/pci/pcireg.h>
76 #include <dev/pci/pcivar.h>
77 #include <dev/pci/pci_private.h>
78
79 #include <cxgb_include.h>
80
81 #ifdef PRIV_SUPPORTED
82 #include <sys/priv.h>
83 #endif
84
85 static int cxgb_setup_interrupts(adapter_t *);
86 static void cxgb_teardown_interrupts(adapter_t *);
87 static void cxgb_init(void *);
88 static int cxgb_init_locked(struct port_info *);
89 static int cxgb_uninit_locked(struct port_info *);
90 static int cxgb_uninit_synchronized(struct port_info *);
91 static int cxgb_ioctl(struct ifnet *, unsigned long, caddr_t);
92 static int cxgb_media_change(struct ifnet *);
93 static int cxgb_ifm_type(int);
94 static void cxgb_build_medialist(struct port_info *);
95 static void cxgb_media_status(struct ifnet *, struct ifmediareq *);
96 static int setup_sge_qsets(adapter_t *);
97 static void cxgb_async_intr(void *);
98 static void cxgb_tick_handler(void *, int);
99 static void cxgb_tick(void *);
100 static void link_check_callout(void *);
101 static void check_link_status(void *, int);
102 static void setup_rss(adapter_t *sc);
103 static int alloc_filters(struct adapter *);
104 static int setup_hw_filters(struct adapter *);
105 static int set_filter(struct adapter *, int, const struct filter_info *);
106 static inline void mk_set_tcb_field(struct cpl_set_tcb_field *, unsigned int,
107     unsigned int, u64, u64);
108 static inline void set_tcb_field_ulp(struct cpl_set_tcb_field *, unsigned int,
109     unsigned int, u64, u64);
110
111 /* Attachment glue for the PCI controller end of the device.  Each port of
112  * the device is attached separately, as defined later.
113  */
114 static int cxgb_controller_probe(device_t);
115 static int cxgb_controller_attach(device_t);
116 static int cxgb_controller_detach(device_t);
117 static void cxgb_free(struct adapter *);
118 static __inline void reg_block_dump(struct adapter *ap, uint8_t *buf, unsigned int start,
119     unsigned int end);
120 static void cxgb_get_regs(adapter_t *sc, struct ch_ifconf_regs *regs, uint8_t *buf);
121 static int cxgb_get_regs_len(void);
122 static int offload_open(struct port_info *pi);
123 static void touch_bars(device_t dev);
124 static int offload_close(struct t3cdev *tdev);
125 static void cxgb_update_mac_settings(struct port_info *p);
126
127 static device_method_t cxgb_controller_methods[] = {
128         DEVMETHOD(device_probe,         cxgb_controller_probe),
129         DEVMETHOD(device_attach,        cxgb_controller_attach),
130         DEVMETHOD(device_detach,        cxgb_controller_detach),
131
132         /* bus interface */
133         DEVMETHOD(bus_print_child,      bus_generic_print_child),
134         DEVMETHOD(bus_driver_added,     bus_generic_driver_added),
135
136         { 0, 0 }
137 };
138
139 static driver_t cxgb_controller_driver = {
140         "cxgbc",
141         cxgb_controller_methods,
142         sizeof(struct adapter)
143 };
144
145 static devclass_t       cxgb_controller_devclass;
146 DRIVER_MODULE(cxgbc, pci, cxgb_controller_driver, cxgb_controller_devclass, 0, 0);
147
148 /*
149  * Attachment glue for the ports.  Attachment is done directly to the
150  * controller device.
151  */
152 static int cxgb_port_probe(device_t);
153 static int cxgb_port_attach(device_t);
154 static int cxgb_port_detach(device_t);
155
156 static device_method_t cxgb_port_methods[] = {
157         DEVMETHOD(device_probe,         cxgb_port_probe),
158         DEVMETHOD(device_attach,        cxgb_port_attach),
159         DEVMETHOD(device_detach,        cxgb_port_detach),
160         { 0, 0 }
161 };
162
163 static driver_t cxgb_port_driver = {
164         "cxgb",
165         cxgb_port_methods,
166         0
167 };
168
169 static d_ioctl_t cxgb_extension_ioctl;
170 static d_open_t cxgb_extension_open;
171 static d_close_t cxgb_extension_close;
172
173 static struct cdevsw cxgb_cdevsw = {
174        .d_version =    D_VERSION,
175        .d_flags =      0,
176        .d_open =       cxgb_extension_open,
177        .d_close =      cxgb_extension_close,
178        .d_ioctl =      cxgb_extension_ioctl,
179        .d_name =       "cxgb",
180 };
181
182 static devclass_t       cxgb_port_devclass;
183 DRIVER_MODULE(cxgb, cxgbc, cxgb_port_driver, cxgb_port_devclass, 0, 0);
184
185 /*
186  * The driver uses the best interrupt scheme available on a platform in the
187  * order MSI-X, MSI, legacy pin interrupts.  This parameter determines which
188  * of these schemes the driver may consider as follows:
189  *
190  * msi = 2: choose from among all three options
191  * msi = 1 : only consider MSI and pin interrupts
192  * msi = 0: force pin interrupts
193  */
194 static int msi_allowed = 2;
195
196 TUNABLE_INT("hw.cxgb.msi_allowed", &msi_allowed);
197 SYSCTL_NODE(_hw, OID_AUTO, cxgb, CTLFLAG_RD, 0, "CXGB driver parameters");
198 SYSCTL_UINT(_hw_cxgb, OID_AUTO, msi_allowed, CTLFLAG_RDTUN, &msi_allowed, 0,
199     "MSI-X, MSI, INTx selector");
200
201 /*
202  * The driver enables offload as a default.
203  * To disable it, use ofld_disable = 1.
204  */
205 static int ofld_disable = 0;
206 TUNABLE_INT("hw.cxgb.ofld_disable", &ofld_disable);
207 SYSCTL_UINT(_hw_cxgb, OID_AUTO, ofld_disable, CTLFLAG_RDTUN, &ofld_disable, 0,
208     "disable ULP offload");
209
210 /*
211  * The driver uses an auto-queue algorithm by default.
212  * To disable it and force a single queue-set per port, use multiq = 0
213  */
214 static int multiq = 1;
215 TUNABLE_INT("hw.cxgb.multiq", &multiq);
216 SYSCTL_UINT(_hw_cxgb, OID_AUTO, multiq, CTLFLAG_RDTUN, &multiq, 0,
217     "use min(ncpus/ports, 8) queue-sets per port");
218
219 /*
220  * By default the driver will not update the firmware unless
221  * it was compiled against a newer version
222  * 
223  */
224 static int force_fw_update = 0;
225 TUNABLE_INT("hw.cxgb.force_fw_update", &force_fw_update);
226 SYSCTL_UINT(_hw_cxgb, OID_AUTO, force_fw_update, CTLFLAG_RDTUN, &force_fw_update, 0,
227     "update firmware even if up to date");
228
229 int cxgb_use_16k_clusters = -1;
230 TUNABLE_INT("hw.cxgb.use_16k_clusters", &cxgb_use_16k_clusters);
231 SYSCTL_INT(_hw_cxgb, OID_AUTO, use_16k_clusters, CTLFLAG_RDTUN,
232     &cxgb_use_16k_clusters, 0, "use 16kB clusters for the jumbo queue ");
233
234 /*
235  * Tune the size of the output queue.
236  */
237 int cxgb_snd_queue_len = IFQ_MAXLEN;
238 TUNABLE_INT("hw.cxgb.snd_queue_len", &cxgb_snd_queue_len);
239 SYSCTL_UINT(_hw_cxgb, OID_AUTO, snd_queue_len, CTLFLAG_RDTUN,
240     &cxgb_snd_queue_len, 0, "send queue size ");
241
242 static int nfilters = -1;
243 TUNABLE_INT("hw.cxgb.nfilters", &nfilters);
244 SYSCTL_INT(_hw_cxgb, OID_AUTO, nfilters, CTLFLAG_RDTUN,
245     &nfilters, 0, "max number of entries in the filter table");
246
247 enum {
248         MAX_TXQ_ENTRIES      = 16384,
249         MAX_CTRL_TXQ_ENTRIES = 1024,
250         MAX_RSPQ_ENTRIES     = 16384,
251         MAX_RX_BUFFERS       = 16384,
252         MAX_RX_JUMBO_BUFFERS = 16384,
253         MIN_TXQ_ENTRIES      = 4,
254         MIN_CTRL_TXQ_ENTRIES = 4,
255         MIN_RSPQ_ENTRIES     = 32,
256         MIN_FL_ENTRIES       = 32,
257         MIN_FL_JUMBO_ENTRIES = 32
258 };
259
260 struct filter_info {
261         u32 sip;
262         u32 sip_mask;
263         u32 dip;
264         u16 sport;
265         u16 dport;
266         u32 vlan:12;
267         u32 vlan_prio:3;
268         u32 mac_hit:1;
269         u32 mac_idx:4;
270         u32 mac_vld:1;
271         u32 pkt_type:2;
272         u32 report_filter_id:1;
273         u32 pass:1;
274         u32 rss:1;
275         u32 qset:3;
276         u32 locked:1;
277         u32 valid:1;
278 };
279
280 enum { FILTER_NO_VLAN_PRI = 7 };
281
282 #define EEPROM_MAGIC 0x38E2F10C
283
284 #define PORT_MASK ((1 << MAX_NPORTS) - 1)
285
286 /* Table for probing the cards.  The desc field isn't actually used */
287 struct cxgb_ident {
288         uint16_t        vendor;
289         uint16_t        device;
290         int             index;
291         char            *desc;
292 } cxgb_identifiers[] = {
293         {PCI_VENDOR_ID_CHELSIO, 0x0020, 0, "PE9000"},
294         {PCI_VENDOR_ID_CHELSIO, 0x0021, 1, "T302E"},
295         {PCI_VENDOR_ID_CHELSIO, 0x0022, 2, "T310E"},
296         {PCI_VENDOR_ID_CHELSIO, 0x0023, 3, "T320X"},
297         {PCI_VENDOR_ID_CHELSIO, 0x0024, 1, "T302X"},
298         {PCI_VENDOR_ID_CHELSIO, 0x0025, 3, "T320E"},
299         {PCI_VENDOR_ID_CHELSIO, 0x0026, 2, "T310X"},
300         {PCI_VENDOR_ID_CHELSIO, 0x0030, 2, "T3B10"},
301         {PCI_VENDOR_ID_CHELSIO, 0x0031, 3, "T3B20"},
302         {PCI_VENDOR_ID_CHELSIO, 0x0032, 1, "T3B02"},
303         {PCI_VENDOR_ID_CHELSIO, 0x0033, 4, "T3B04"},
304         {PCI_VENDOR_ID_CHELSIO, 0x0035, 6, "T3C10"},
305         {PCI_VENDOR_ID_CHELSIO, 0x0036, 3, "S320E-CR"},
306         {PCI_VENDOR_ID_CHELSIO, 0x0037, 7, "N320E-G2"},
307         {0, 0, 0, NULL}
308 };
309
310 static int set_eeprom(struct port_info *pi, const uint8_t *data, int len, int offset);
311
312
313 static __inline char
314 t3rev2char(struct adapter *adapter)
315 {
316         char rev = 'z';
317
318         switch(adapter->params.rev) {
319         case T3_REV_A:
320                 rev = 'a';
321                 break;
322         case T3_REV_B:
323         case T3_REV_B2:
324                 rev = 'b';
325                 break;
326         case T3_REV_C:
327                 rev = 'c';
328                 break;
329         }
330         return rev;
331 }
332
333 static struct cxgb_ident *
334 cxgb_get_ident(device_t dev)
335 {
336         struct cxgb_ident *id;
337
338         for (id = cxgb_identifiers; id->desc != NULL; id++) {
339                 if ((id->vendor == pci_get_vendor(dev)) &&
340                     (id->device == pci_get_device(dev))) {
341                         return (id);
342                 }
343         }
344         return (NULL);
345 }
346
347 static const struct adapter_info *
348 cxgb_get_adapter_info(device_t dev)
349 {
350         struct cxgb_ident *id;
351         const struct adapter_info *ai;
352
353         id = cxgb_get_ident(dev);
354         if (id == NULL)
355                 return (NULL);
356
357         ai = t3_get_adapter_info(id->index);
358
359         return (ai);
360 }
361
362 static int
363 cxgb_controller_probe(device_t dev)
364 {
365         const struct adapter_info *ai;
366         char *ports, buf[80];
367         int nports;
368
369         ai = cxgb_get_adapter_info(dev);
370         if (ai == NULL)
371                 return (ENXIO);
372
373         nports = ai->nports0 + ai->nports1;
374         if (nports == 1)
375                 ports = "port";
376         else
377                 ports = "ports";
378
379         snprintf(buf, sizeof(buf), "%s, %d %s", ai->desc, nports, ports);
380         device_set_desc_copy(dev, buf);
381         return (BUS_PROBE_DEFAULT);
382 }
383
384 #define FW_FNAME "cxgb_t3fw"
385 #define TPEEPROM_NAME "cxgb_t3%c_tp_eeprom"
386 #define TPSRAM_NAME "cxgb_t3%c_protocol_sram"
387
388 static int
389 upgrade_fw(adapter_t *sc)
390 {
391         const struct firmware *fw;
392         int status;
393         u32 vers;
394         
395         if ((fw = firmware_get(FW_FNAME)) == NULL)  {
396                 device_printf(sc->dev, "Could not find firmware image %s\n", FW_FNAME);
397                 return (ENOENT);
398         } else
399                 device_printf(sc->dev, "installing firmware on card\n");
400         status = t3_load_fw(sc, (const uint8_t *)fw->data, fw->datasize);
401
402         if (status != 0) {
403                 device_printf(sc->dev, "failed to install firmware: %d\n",
404                     status);
405         } else {
406                 t3_get_fw_version(sc, &vers);
407                 snprintf(&sc->fw_version[0], sizeof(sc->fw_version), "%d.%d.%d",
408                     G_FW_VERSION_MAJOR(vers), G_FW_VERSION_MINOR(vers),
409                     G_FW_VERSION_MICRO(vers));
410         }
411
412         firmware_put(fw, FIRMWARE_UNLOAD);
413
414         return (status);        
415 }
416
417 /*
418  * The cxgb_controller_attach function is responsible for the initial
419  * bringup of the device.  Its responsibilities include:
420  *
421  *  1. Determine if the device supports MSI or MSI-X.
422  *  2. Allocate bus resources so that we can access the Base Address Register
423  *  3. Create and initialize mutexes for the controller and its control
424  *     logic such as SGE and MDIO.
425  *  4. Call hardware specific setup routine for the adapter as a whole.
426  *  5. Allocate the BAR for doing MSI-X.
427  *  6. Setup the line interrupt iff MSI-X is not supported.
428  *  7. Create the driver's taskq.
429  *  8. Start one task queue service thread.
430  *  9. Check if the firmware and SRAM are up-to-date.  They will be
431  *     auto-updated later (before FULL_INIT_DONE), if required.
432  * 10. Create a child device for each MAC (port)
433  * 11. Initialize T3 private state.
434  * 12. Trigger the LED
435  * 13. Setup offload iff supported.
436  * 14. Reset/restart the tick callout.
437  * 15. Attach sysctls
438  *
439  * NOTE: Any modification or deviation from this list MUST be reflected in
440  * the above comment.  Failure to do so will result in problems on various
441  * error conditions including link flapping.
442  */
443 static int
444 cxgb_controller_attach(device_t dev)
445 {
446         device_t child;
447         const struct adapter_info *ai;
448         struct adapter *sc;
449         int i, error = 0;
450         uint32_t vers;
451         int port_qsets = 1;
452         int msi_needed, reg;
453         char buf[80];
454
455         sc = device_get_softc(dev);
456         sc->dev = dev;
457         sc->msi_count = 0;
458         ai = cxgb_get_adapter_info(dev);
459
460         /* find the PCIe link width and set max read request to 4KB*/
461         if (pci_find_extcap(dev, PCIY_EXPRESS, &reg) == 0) {
462                 uint16_t lnk;
463
464                 lnk = pci_read_config(dev, reg + PCIR_EXPRESS_LINK_STA, 2);
465                 sc->link_width = (lnk & PCIM_LINK_STA_WIDTH) >> 4;
466                 if (sc->link_width < 8 &&
467                     (ai->caps & SUPPORTED_10000baseT_Full)) {
468                         device_printf(sc->dev,
469                             "PCIe x%d Link, expect reduced performance\n",
470                             sc->link_width);
471                 }
472
473                 pci_set_max_read_req(dev, 4096);
474         }
475
476         touch_bars(dev);
477         pci_enable_busmaster(dev);
478         /*
479          * Allocate the registers and make them available to the driver.
480          * The registers that we care about for NIC mode are in BAR 0
481          */
482         sc->regs_rid = PCIR_BAR(0);
483         if ((sc->regs_res = bus_alloc_resource_any(dev, SYS_RES_MEMORY,
484             &sc->regs_rid, RF_ACTIVE)) == NULL) {
485                 device_printf(dev, "Cannot allocate BAR region 0\n");
486                 return (ENXIO);
487         }
488         sc->udbs_rid = PCIR_BAR(2);
489         sc->udbs_res = NULL;
490         if (is_offload(sc) &&
491             ((sc->udbs_res = bus_alloc_resource_any(dev, SYS_RES_MEMORY,
492                    &sc->udbs_rid, RF_ACTIVE)) == NULL)) {
493                 device_printf(dev, "Cannot allocate BAR region 1\n");
494                 error = ENXIO;
495                 goto out;
496         }
497
498         snprintf(sc->lockbuf, ADAPTER_LOCK_NAME_LEN, "cxgb controller lock %d",
499             device_get_unit(dev));
500         ADAPTER_LOCK_INIT(sc, sc->lockbuf);
501
502         snprintf(sc->reglockbuf, ADAPTER_LOCK_NAME_LEN, "SGE reg lock %d",
503             device_get_unit(dev));
504         snprintf(sc->mdiolockbuf, ADAPTER_LOCK_NAME_LEN, "cxgb mdio lock %d",
505             device_get_unit(dev));
506         snprintf(sc->elmerlockbuf, ADAPTER_LOCK_NAME_LEN, "cxgb elmer lock %d",
507             device_get_unit(dev));
508         
509         MTX_INIT(&sc->sge.reg_lock, sc->reglockbuf, NULL, MTX_SPIN);
510         MTX_INIT(&sc->mdio_lock, sc->mdiolockbuf, NULL, MTX_DEF);
511         MTX_INIT(&sc->elmer_lock, sc->elmerlockbuf, NULL, MTX_DEF);
512         
513         sc->bt = rman_get_bustag(sc->regs_res);
514         sc->bh = rman_get_bushandle(sc->regs_res);
515         sc->mmio_len = rman_get_size(sc->regs_res);
516
517         for (i = 0; i < MAX_NPORTS; i++)
518                 sc->port[i].adapter = sc;
519
520         if (t3_prep_adapter(sc, ai, 1) < 0) {
521                 printf("prep adapter failed\n");
522                 error = ENODEV;
523                 goto out;
524         }
525         /* Allocate the BAR for doing MSI-X.  If it succeeds, try to allocate
526          * enough messages for the queue sets.  If that fails, try falling
527          * back to MSI.  If that fails, then try falling back to the legacy
528          * interrupt pin model.
529          */
530         sc->msix_regs_rid = 0x20;
531         if ((msi_allowed >= 2) &&
532             (sc->msix_regs_res = bus_alloc_resource_any(dev, SYS_RES_MEMORY,
533             &sc->msix_regs_rid, RF_ACTIVE)) != NULL) {
534
535                 if (multiq)
536                         port_qsets = min(SGE_QSETS/sc->params.nports, mp_ncpus);
537                 msi_needed = sc->msi_count = sc->params.nports * port_qsets + 1;
538
539                 if (pci_msix_count(dev) == 0 ||
540                     (error = pci_alloc_msix(dev, &sc->msi_count)) != 0 ||
541                     sc->msi_count != msi_needed) {
542                         device_printf(dev, "alloc msix failed - "
543                                       "msi_count=%d, msi_needed=%d, err=%d; "
544                                       "will try MSI\n", sc->msi_count,
545                                       msi_needed, error);
546                         sc->msi_count = 0;
547                         port_qsets = 1;
548                         pci_release_msi(dev);
549                         bus_release_resource(dev, SYS_RES_MEMORY,
550                             sc->msix_regs_rid, sc->msix_regs_res);
551                         sc->msix_regs_res = NULL;
552                 } else {
553                         sc->flags |= USING_MSIX;
554                         sc->cxgb_intr = cxgb_async_intr;
555                         device_printf(dev,
556                                       "using MSI-X interrupts (%u vectors)\n",
557                                       sc->msi_count);
558                 }
559         }
560
561         if ((msi_allowed >= 1) && (sc->msi_count == 0)) {
562                 sc->msi_count = 1;
563                 if ((error = pci_alloc_msi(dev, &sc->msi_count)) != 0) {
564                         device_printf(dev, "alloc msi failed - "
565                                       "err=%d; will try INTx\n", error);
566                         sc->msi_count = 0;
567                         port_qsets = 1;
568                         pci_release_msi(dev);
569                 } else {
570                         sc->flags |= USING_MSI;
571                         sc->cxgb_intr = t3_intr_msi;
572                         device_printf(dev, "using MSI interrupts\n");
573                 }
574         }
575         if (sc->msi_count == 0) {
576                 device_printf(dev, "using line interrupts\n");
577                 sc->cxgb_intr = t3b_intr;
578         }
579
580         /* Create a private taskqueue thread for handling driver events */
581         sc->tq = taskqueue_create("cxgb_taskq", M_NOWAIT,
582             taskqueue_thread_enqueue, &sc->tq);
583         if (sc->tq == NULL) {
584                 device_printf(dev, "failed to allocate controller task queue\n");
585                 goto out;
586         }
587
588         taskqueue_start_threads(&sc->tq, 1, PI_NET, "%s taskq",
589             device_get_nameunit(dev));
590         TASK_INIT(&sc->tick_task, 0, cxgb_tick_handler, sc);
591
592         
593         /* Create a periodic callout for checking adapter status */
594         callout_init(&sc->cxgb_tick_ch, TRUE);
595         
596         if (t3_check_fw_version(sc) < 0 || force_fw_update) {
597                 /*
598                  * Warn user that a firmware update will be attempted in init.
599                  */
600                 device_printf(dev, "firmware needs to be updated to version %d.%d.%d\n",
601                     FW_VERSION_MAJOR, FW_VERSION_MINOR, FW_VERSION_MICRO);
602                 sc->flags &= ~FW_UPTODATE;
603         } else {
604                 sc->flags |= FW_UPTODATE;
605         }
606
607         if (t3_check_tpsram_version(sc) < 0) {
608                 /*
609                  * Warn user that a firmware update will be attempted in init.
610                  */
611                 device_printf(dev, "SRAM needs to be updated to version %c-%d.%d.%d\n",
612                     t3rev2char(sc), TP_VERSION_MAJOR, TP_VERSION_MINOR, TP_VERSION_MICRO);
613                 sc->flags &= ~TPS_UPTODATE;
614         } else {
615                 sc->flags |= TPS_UPTODATE;
616         }
617         
618         /*
619          * Create a child device for each MAC.  The ethernet attachment
620          * will be done in these children.
621          */     
622         for (i = 0; i < (sc)->params.nports; i++) {
623                 struct port_info *pi;
624                 
625                 if ((child = device_add_child(dev, "cxgb", -1)) == NULL) {
626                         device_printf(dev, "failed to add child port\n");
627                         error = EINVAL;
628                         goto out;
629                 }
630                 pi = &sc->port[i];
631                 pi->adapter = sc;
632                 pi->nqsets = port_qsets;
633                 pi->first_qset = i*port_qsets;
634                 pi->port_id = i;
635                 pi->tx_chan = i >= ai->nports0;
636                 pi->txpkt_intf = pi->tx_chan ? 2 * (i - ai->nports0) + 1 : 2 * i;
637                 sc->rxpkt_map[pi->txpkt_intf] = i;
638                 sc->port[i].tx_chan = i >= ai->nports0;
639                 sc->portdev[i] = child;
640                 device_set_softc(child, pi);
641         }
642         if ((error = bus_generic_attach(dev)) != 0)
643                 goto out;
644
645         /* initialize sge private state */
646         t3_sge_init_adapter(sc);
647
648         t3_led_ready(sc);
649         
650         cxgb_offload_init();
651         if (is_offload(sc)) {
652                 setbit(&sc->registered_device_map, OFFLOAD_DEVMAP_BIT);
653                 cxgb_adapter_ofld(sc);
654         }
655         error = t3_get_fw_version(sc, &vers);
656         if (error)
657                 goto out;
658
659         snprintf(&sc->fw_version[0], sizeof(sc->fw_version), "%d.%d.%d",
660             G_FW_VERSION_MAJOR(vers), G_FW_VERSION_MINOR(vers),
661             G_FW_VERSION_MICRO(vers));
662
663         snprintf(buf, sizeof(buf), "%s %sNIC\t E/C: %s S/N: %s",
664                  ai->desc, is_offload(sc) ? "R" : "",
665                  sc->params.vpd.ec, sc->params.vpd.sn);
666         device_set_desc_copy(dev, buf);
667
668         snprintf(&sc->port_types[0], sizeof(sc->port_types), "%x%x%x%x",
669                  sc->params.vpd.port_type[0], sc->params.vpd.port_type[1],
670                  sc->params.vpd.port_type[2], sc->params.vpd.port_type[3]);
671
672         device_printf(sc->dev, "Firmware Version %s\n", &sc->fw_version[0]);
673         callout_reset(&sc->cxgb_tick_ch, hz, cxgb_tick, sc);
674         t3_add_attach_sysctls(sc);
675 out:
676         if (error)
677                 cxgb_free(sc);
678
679         return (error);
680 }
681
682 /*
683  * The cxgb_controller_detach routine is called with the device is
684  * unloaded from the system.
685  */
686
687 static int
688 cxgb_controller_detach(device_t dev)
689 {
690         struct adapter *sc;
691
692         sc = device_get_softc(dev);
693
694         cxgb_free(sc);
695
696         return (0);
697 }
698
699 /*
700  * The cxgb_free() is called by the cxgb_controller_detach() routine
701  * to tear down the structures that were built up in
702  * cxgb_controller_attach(), and should be the final piece of work
703  * done when fully unloading the driver.
704  * 
705  *
706  *  1. Shutting down the threads started by the cxgb_controller_attach()
707  *     routine.
708  *  2. Stopping the lower level device and all callouts (cxgb_down_locked()).
709  *  3. Detaching all of the port devices created during the
710  *     cxgb_controller_attach() routine.
711  *  4. Removing the device children created via cxgb_controller_attach().
712  *  5. Releasing PCI resources associated with the device.
713  *  6. Turning off the offload support, iff it was turned on.
714  *  7. Destroying the mutexes created in cxgb_controller_attach().
715  *
716  */
717 static void
718 cxgb_free(struct adapter *sc)
719 {
720         int i;
721
722         ADAPTER_LOCK(sc);
723         sc->flags |= CXGB_SHUTDOWN;
724         ADAPTER_UNLOCK(sc);
725
726         /*
727          * Make sure all child devices are gone.
728          */
729         bus_generic_detach(sc->dev);
730         for (i = 0; i < (sc)->params.nports; i++) {
731                 if (sc->portdev[i] &&
732                     device_delete_child(sc->dev, sc->portdev[i]) != 0)
733                         device_printf(sc->dev, "failed to delete child port\n");
734         }
735
736         /*
737          * At this point, it is as if cxgb_port_detach has run on all ports, and
738          * cxgb_down has run on the adapter.  All interrupts have been silenced,
739          * all open devices have been closed.
740          */
741         KASSERT(sc->open_device_map == 0, ("%s: device(s) still open (%x)",
742                                            __func__, sc->open_device_map));
743         for (i = 0; i < sc->params.nports; i++) {
744                 KASSERT(sc->port[i].ifp == NULL, ("%s: port %i undead!",
745                                                   __func__, i));
746         }
747
748         /*
749          * Finish off the adapter's callouts.
750          */
751         callout_drain(&sc->cxgb_tick_ch);
752         callout_drain(&sc->sge_timer_ch);
753
754         /*
755          * Release resources grabbed under FULL_INIT_DONE by cxgb_up.  The
756          * sysctls are cleaned up by the kernel linker.
757          */
758         if (sc->flags & FULL_INIT_DONE) {
759                 t3_free_sge_resources(sc);
760                 sc->flags &= ~FULL_INIT_DONE;
761         }
762
763         /*
764          * Release all interrupt resources.
765          */
766         cxgb_teardown_interrupts(sc);
767         if (sc->flags & (USING_MSI | USING_MSIX)) {
768                 device_printf(sc->dev, "releasing msi message(s)\n");
769                 pci_release_msi(sc->dev);
770         } else {
771                 device_printf(sc->dev, "no msi message to release\n");
772         }
773
774         if (sc->msix_regs_res != NULL) {
775                 bus_release_resource(sc->dev, SYS_RES_MEMORY, sc->msix_regs_rid,
776                     sc->msix_regs_res);
777         }
778
779         /*
780          * Free the adapter's taskqueue.
781          */
782         if (sc->tq != NULL) {
783                 taskqueue_free(sc->tq);
784                 sc->tq = NULL;
785         }
786         
787         if (is_offload(sc)) {
788                 clrbit(&sc->registered_device_map, OFFLOAD_DEVMAP_BIT);
789                 cxgb_adapter_unofld(sc);
790         }
791
792 #ifdef notyet
793         if (sc->flags & CXGB_OFLD_INIT)
794                 cxgb_offload_deactivate(sc);
795 #endif
796         free(sc->filters, M_DEVBUF);
797         t3_sge_free(sc);
798
799         cxgb_offload_exit();
800
801         if (sc->udbs_res != NULL)
802                 bus_release_resource(sc->dev, SYS_RES_MEMORY, sc->udbs_rid,
803                     sc->udbs_res);
804
805         if (sc->regs_res != NULL)
806                 bus_release_resource(sc->dev, SYS_RES_MEMORY, sc->regs_rid,
807                     sc->regs_res);
808
809         MTX_DESTROY(&sc->mdio_lock);
810         MTX_DESTROY(&sc->sge.reg_lock);
811         MTX_DESTROY(&sc->elmer_lock);
812         ADAPTER_LOCK_DEINIT(sc);
813 }
814
815 /**
816  *      setup_sge_qsets - configure SGE Tx/Rx/response queues
817  *      @sc: the controller softc
818  *
819  *      Determines how many sets of SGE queues to use and initializes them.
820  *      We support multiple queue sets per port if we have MSI-X, otherwise
821  *      just one queue set per port.
822  */
823 static int
824 setup_sge_qsets(adapter_t *sc)
825 {
826         int i, j, err, irq_idx = 0, qset_idx = 0;
827         u_int ntxq = SGE_TXQ_PER_SET;
828
829         if ((err = t3_sge_alloc(sc)) != 0) {
830                 device_printf(sc->dev, "t3_sge_alloc returned %d\n", err);
831                 return (err);
832         }
833
834         if (sc->params.rev > 0 && !(sc->flags & USING_MSI))
835                 irq_idx = -1;
836
837         for (i = 0; i < (sc)->params.nports; i++) {
838                 struct port_info *pi = &sc->port[i];
839
840                 for (j = 0; j < pi->nqsets; j++, qset_idx++) {
841                         err = t3_sge_alloc_qset(sc, qset_idx, (sc)->params.nports,
842                             (sc->flags & USING_MSIX) ? qset_idx + 1 : irq_idx,
843                             &sc->params.sge.qset[qset_idx], ntxq, pi);
844                         if (err) {
845                                 t3_free_sge_resources(sc);
846                                 device_printf(sc->dev, "t3_sge_alloc_qset failed with %d\n",
847                                     err);
848                                 return (err);
849                         }
850                 }
851         }
852
853         return (0);
854 }
855
856 static void
857 cxgb_teardown_interrupts(adapter_t *sc)
858 {
859         int i;
860
861         for (i = 0; i < SGE_QSETS; i++) {
862                 if (sc->msix_intr_tag[i] == NULL) {
863
864                         /* Should have been setup fully or not at all */
865                         KASSERT(sc->msix_irq_res[i] == NULL &&
866                                 sc->msix_irq_rid[i] == 0,
867                                 ("%s: half-done interrupt (%d).", __func__, i));
868
869                         continue;
870                 }
871
872                 bus_teardown_intr(sc->dev, sc->msix_irq_res[i],
873                                   sc->msix_intr_tag[i]);
874                 bus_release_resource(sc->dev, SYS_RES_IRQ, sc->msix_irq_rid[i],
875                                      sc->msix_irq_res[i]);
876
877                 sc->msix_irq_res[i] = sc->msix_intr_tag[i] = NULL;
878                 sc->msix_irq_rid[i] = 0;
879         }
880
881         if (sc->intr_tag) {
882                 KASSERT(sc->irq_res != NULL,
883                         ("%s: half-done interrupt.", __func__));
884
885                 bus_teardown_intr(sc->dev, sc->irq_res, sc->intr_tag);
886                 bus_release_resource(sc->dev, SYS_RES_IRQ, sc->irq_rid,
887                                      sc->irq_res);
888
889                 sc->irq_res = sc->intr_tag = NULL;
890                 sc->irq_rid = 0;
891         }
892 }
893
894 static int
895 cxgb_setup_interrupts(adapter_t *sc)
896 {
897         struct resource *res;
898         void *tag;
899         int i, rid, err, intr_flag = sc->flags & (USING_MSI | USING_MSIX);
900
901         sc->irq_rid = intr_flag ? 1 : 0;
902         sc->irq_res = bus_alloc_resource_any(sc->dev, SYS_RES_IRQ, &sc->irq_rid,
903                                              RF_SHAREABLE | RF_ACTIVE);
904         if (sc->irq_res == NULL) {
905                 device_printf(sc->dev, "Cannot allocate interrupt (%x, %u)\n",
906                               intr_flag, sc->irq_rid);
907                 err = EINVAL;
908                 sc->irq_rid = 0;
909         } else {
910                 err = bus_setup_intr(sc->dev, sc->irq_res,
911                     INTR_MPSAFE | INTR_TYPE_NET, NULL,
912                     sc->cxgb_intr, sc, &sc->intr_tag);
913
914                 if (err) {
915                         device_printf(sc->dev,
916                                       "Cannot set up interrupt (%x, %u, %d)\n",
917                                       intr_flag, sc->irq_rid, err);
918                         bus_release_resource(sc->dev, SYS_RES_IRQ, sc->irq_rid,
919                                              sc->irq_res);
920                         sc->irq_res = sc->intr_tag = NULL;
921                         sc->irq_rid = 0;
922                 }
923         }
924
925         /* That's all for INTx or MSI */
926         if (!(intr_flag & USING_MSIX) || err)
927                 return (err);
928
929         for (i = 0; i < sc->msi_count - 1; i++) {
930                 rid = i + 2;
931                 res = bus_alloc_resource_any(sc->dev, SYS_RES_IRQ, &rid,
932                                              RF_SHAREABLE | RF_ACTIVE);
933                 if (res == NULL) {
934                         device_printf(sc->dev, "Cannot allocate interrupt "
935                                       "for message %d\n", rid);
936                         err = EINVAL;
937                         break;
938                 }
939
940                 err = bus_setup_intr(sc->dev, res, INTR_MPSAFE | INTR_TYPE_NET,
941                                      NULL, t3_intr_msix, &sc->sge.qs[i], &tag);
942                 if (err) {
943                         device_printf(sc->dev, "Cannot set up interrupt "
944                                       "for message %d (%d)\n", rid, err);
945                         bus_release_resource(sc->dev, SYS_RES_IRQ, rid, res);
946                         break;
947                 }
948
949                 sc->msix_irq_rid[i] = rid;
950                 sc->msix_irq_res[i] = res;
951                 sc->msix_intr_tag[i] = tag;
952         }
953
954         if (err)
955                 cxgb_teardown_interrupts(sc);
956
957         return (err);
958 }
959
960
961 static int
962 cxgb_port_probe(device_t dev)
963 {
964         struct port_info *p;
965         char buf[80];
966         const char *desc;
967         
968         p = device_get_softc(dev);
969         desc = p->phy.desc;
970         snprintf(buf, sizeof(buf), "Port %d %s", p->port_id, desc);
971         device_set_desc_copy(dev, buf);
972         return (0);
973 }
974
975
976 static int
977 cxgb_makedev(struct port_info *pi)
978 {
979         
980         pi->port_cdev = make_dev(&cxgb_cdevsw, pi->ifp->if_dunit,
981             UID_ROOT, GID_WHEEL, 0600, "%s", if_name(pi->ifp));
982         
983         if (pi->port_cdev == NULL)
984                 return (ENOMEM);
985
986         pi->port_cdev->si_drv1 = (void *)pi;
987         
988         return (0);
989 }
990
991 #define CXGB_CAP (IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_MTU | IFCAP_HWCSUM | \
992     IFCAP_VLAN_HWCSUM | IFCAP_TSO | IFCAP_JUMBO_MTU | IFCAP_LRO | \
993     IFCAP_VLAN_HWTSO | IFCAP_LINKSTATE)
994 #define CXGB_CAP_ENABLE (CXGB_CAP & ~IFCAP_TSO6)
995
996 static int
997 cxgb_port_attach(device_t dev)
998 {
999         struct port_info *p;
1000         struct ifnet *ifp;
1001         int err;
1002         struct adapter *sc;
1003
1004         p = device_get_softc(dev);
1005         sc = p->adapter;
1006         snprintf(p->lockbuf, PORT_NAME_LEN, "cxgb port lock %d:%d",
1007             device_get_unit(device_get_parent(dev)), p->port_id);
1008         PORT_LOCK_INIT(p, p->lockbuf);
1009
1010         callout_init(&p->link_check_ch, CALLOUT_MPSAFE);
1011         TASK_INIT(&p->link_check_task, 0, check_link_status, p);
1012
1013         /* Allocate an ifnet object and set it up */
1014         ifp = p->ifp = if_alloc(IFT_ETHER);
1015         if (ifp == NULL) {
1016                 device_printf(dev, "Cannot allocate ifnet\n");
1017                 return (ENOMEM);
1018         }
1019         
1020         if_initname(ifp, device_get_name(dev), device_get_unit(dev));
1021         ifp->if_init = cxgb_init;
1022         ifp->if_softc = p;
1023         ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
1024         ifp->if_ioctl = cxgb_ioctl;
1025         ifp->if_start = cxgb_start;
1026
1027         ifp->if_snd.ifq_drv_maxlen = max(cxgb_snd_queue_len, ifqmaxlen);
1028         IFQ_SET_MAXLEN(&ifp->if_snd, ifp->if_snd.ifq_drv_maxlen);
1029         IFQ_SET_READY(&ifp->if_snd);
1030
1031         ifp->if_capabilities = CXGB_CAP;
1032         ifp->if_capenable = CXGB_CAP_ENABLE;
1033         ifp->if_hwassist = CSUM_TCP | CSUM_UDP | CSUM_IP | CSUM_TSO;
1034
1035         /*
1036          * Disable TSO on 4-port - it isn't supported by the firmware.
1037          */     
1038         if (sc->params.nports > 2) {
1039                 ifp->if_capabilities &= ~(IFCAP_TSO | IFCAP_VLAN_HWTSO);
1040                 ifp->if_capenable &= ~(IFCAP_TSO | IFCAP_VLAN_HWTSO);
1041                 ifp->if_hwassist &= ~CSUM_TSO;
1042         }
1043
1044         ether_ifattach(ifp, p->hw_addr);
1045         ifp->if_transmit = cxgb_transmit;
1046         ifp->if_qflush = cxgb_qflush;
1047
1048 #ifdef DEFAULT_JUMBO
1049         if (sc->params.nports <= 2)
1050                 ifp->if_mtu = ETHERMTU_JUMBO;
1051 #endif
1052         if ((err = cxgb_makedev(p)) != 0) {
1053                 printf("makedev failed %d\n", err);
1054                 return (err);
1055         }
1056
1057         /* Create a list of media supported by this port */
1058         ifmedia_init(&p->media, IFM_IMASK, cxgb_media_change,
1059             cxgb_media_status);
1060         cxgb_build_medialist(p);
1061       
1062         t3_sge_init_port(p);
1063
1064         return (err);
1065 }
1066
1067 /*
1068  * cxgb_port_detach() is called via the device_detach methods when
1069  * cxgb_free() calls the bus_generic_detach.  It is responsible for 
1070  * removing the device from the view of the kernel, i.e. from all 
1071  * interfaces lists etc.  This routine is only called when the driver is 
1072  * being unloaded, not when the link goes down.
1073  */
1074 static int
1075 cxgb_port_detach(device_t dev)
1076 {
1077         struct port_info *p;
1078         struct adapter *sc;
1079         int i;
1080
1081         p = device_get_softc(dev);
1082         sc = p->adapter;
1083
1084         /* Tell cxgb_ioctl and if_init that the port is going away */
1085         ADAPTER_LOCK(sc);
1086         SET_DOOMED(p);
1087         wakeup(&sc->flags);
1088         while (IS_BUSY(sc))
1089                 mtx_sleep(&sc->flags, &sc->lock, 0, "cxgbdtch", 0);
1090         SET_BUSY(sc);
1091         ADAPTER_UNLOCK(sc);
1092
1093         if (p->port_cdev != NULL)
1094                 destroy_dev(p->port_cdev);
1095
1096         cxgb_uninit_synchronized(p);
1097         ether_ifdetach(p->ifp);
1098
1099         for (i = p->first_qset; i < p->first_qset + p->nqsets; i++) {
1100                 struct sge_qset *qs = &sc->sge.qs[i];
1101                 struct sge_txq *txq = &qs->txq[TXQ_ETH];
1102
1103                 callout_drain(&txq->txq_watchdog);
1104                 callout_drain(&txq->txq_timer);
1105         }
1106
1107         PORT_LOCK_DEINIT(p);
1108         if_free(p->ifp);
1109         p->ifp = NULL;
1110
1111         ADAPTER_LOCK(sc);
1112         CLR_BUSY(sc);
1113         wakeup_one(&sc->flags);
1114         ADAPTER_UNLOCK(sc);
1115         return (0);
1116 }
1117
1118 void
1119 t3_fatal_err(struct adapter *sc)
1120 {
1121         u_int fw_status[4];
1122
1123         if (sc->flags & FULL_INIT_DONE) {
1124                 t3_sge_stop(sc);
1125                 t3_write_reg(sc, A_XGM_TX_CTRL, 0);
1126                 t3_write_reg(sc, A_XGM_RX_CTRL, 0);
1127                 t3_write_reg(sc, XGM_REG(A_XGM_TX_CTRL, 1), 0);
1128                 t3_write_reg(sc, XGM_REG(A_XGM_RX_CTRL, 1), 0);
1129                 t3_intr_disable(sc);
1130         }
1131         device_printf(sc->dev,"encountered fatal error, operation suspended\n");
1132         if (!t3_cim_ctl_blk_read(sc, 0xa0, 4, fw_status))
1133                 device_printf(sc->dev, "FW_ status: 0x%x, 0x%x, 0x%x, 0x%x\n",
1134                     fw_status[0], fw_status[1], fw_status[2], fw_status[3]);
1135 }
1136
1137 int
1138 t3_os_find_pci_capability(adapter_t *sc, int cap)
1139 {
1140         device_t dev;
1141         struct pci_devinfo *dinfo;
1142         pcicfgregs *cfg;
1143         uint32_t status;
1144         uint8_t ptr;
1145
1146         dev = sc->dev;
1147         dinfo = device_get_ivars(dev);
1148         cfg = &dinfo->cfg;
1149
1150         status = pci_read_config(dev, PCIR_STATUS, 2);
1151         if (!(status & PCIM_STATUS_CAPPRESENT))
1152                 return (0);
1153
1154         switch (cfg->hdrtype & PCIM_HDRTYPE) {
1155         case 0:
1156         case 1:
1157                 ptr = PCIR_CAP_PTR;
1158                 break;
1159         case 2:
1160                 ptr = PCIR_CAP_PTR_2;
1161                 break;
1162         default:
1163                 return (0);
1164                 break;
1165         }
1166         ptr = pci_read_config(dev, ptr, 1);
1167
1168         while (ptr != 0) {
1169                 if (pci_read_config(dev, ptr + PCICAP_ID, 1) == cap)
1170                         return (ptr);
1171                 ptr = pci_read_config(dev, ptr + PCICAP_NEXTPTR, 1);
1172         }
1173
1174         return (0);
1175 }
1176
1177 int
1178 t3_os_pci_save_state(struct adapter *sc)
1179 {
1180         device_t dev;
1181         struct pci_devinfo *dinfo;
1182
1183         dev = sc->dev;
1184         dinfo = device_get_ivars(dev);
1185
1186         pci_cfg_save(dev, dinfo, 0);
1187         return (0);
1188 }
1189
1190 int
1191 t3_os_pci_restore_state(struct adapter *sc)
1192 {
1193         device_t dev;
1194         struct pci_devinfo *dinfo;
1195
1196         dev = sc->dev;
1197         dinfo = device_get_ivars(dev);
1198
1199         pci_cfg_restore(dev, dinfo);
1200         return (0);
1201 }
1202
1203 /**
1204  *      t3_os_link_changed - handle link status changes
1205  *      @sc: the adapter associated with the link change
1206  *      @port_id: the port index whose link status has changed
1207  *      @link_status: the new status of the link
1208  *      @speed: the new speed setting
1209  *      @duplex: the new duplex setting
1210  *      @fc: the new flow-control setting
1211  *
1212  *      This is the OS-dependent handler for link status changes.  The OS
1213  *      neutral handler takes care of most of the processing for these events,
1214  *      then calls this handler for any OS-specific processing.
1215  */
1216 void
1217 t3_os_link_changed(adapter_t *adapter, int port_id, int link_status, int speed,
1218      int duplex, int fc, int mac_was_reset)
1219 {
1220         struct port_info *pi = &adapter->port[port_id];
1221         struct ifnet *ifp = pi->ifp;
1222
1223         /* no race with detach, so ifp should always be good */
1224         KASSERT(ifp, ("%s: if detached.", __func__));
1225
1226         /* Reapply mac settings if they were lost due to a reset */
1227         if (mac_was_reset) {
1228                 PORT_LOCK(pi);
1229                 cxgb_update_mac_settings(pi);
1230                 PORT_UNLOCK(pi);
1231         }
1232
1233         if (link_status) {
1234                 ifp->if_baudrate = IF_Mbps(speed);
1235                 if_link_state_change(ifp, LINK_STATE_UP);
1236         } else
1237                 if_link_state_change(ifp, LINK_STATE_DOWN);
1238 }
1239
1240 /**
1241  *      t3_os_phymod_changed - handle PHY module changes
1242  *      @phy: the PHY reporting the module change
1243  *      @mod_type: new module type
1244  *
1245  *      This is the OS-dependent handler for PHY module changes.  It is
1246  *      invoked when a PHY module is removed or inserted for any OS-specific
1247  *      processing.
1248  */
1249 void t3_os_phymod_changed(struct adapter *adap, int port_id)
1250 {
1251         static const char *mod_str[] = {
1252                 NULL, "SR", "LR", "LRM", "TWINAX", "TWINAX-L", "unknown"
1253         };
1254         struct port_info *pi = &adap->port[port_id];
1255         int mod = pi->phy.modtype;
1256
1257         if (mod != pi->media.ifm_cur->ifm_data)
1258                 cxgb_build_medialist(pi);
1259
1260         if (mod == phy_modtype_none)
1261                 if_printf(pi->ifp, "PHY module unplugged\n");
1262         else {
1263                 KASSERT(mod < ARRAY_SIZE(mod_str),
1264                         ("invalid PHY module type %d", mod));
1265                 if_printf(pi->ifp, "%s PHY module inserted\n", mod_str[mod]);
1266         }
1267 }
1268
1269 void
1270 t3_os_set_hw_addr(adapter_t *adapter, int port_idx, u8 hw_addr[])
1271 {
1272
1273         /*
1274          * The ifnet might not be allocated before this gets called,
1275          * as this is called early on in attach by t3_prep_adapter
1276          * save the address off in the port structure
1277          */
1278         if (cxgb_debug)
1279                 printf("set_hw_addr on idx %d addr %6D\n", port_idx, hw_addr, ":");
1280         bcopy(hw_addr, adapter->port[port_idx].hw_addr, ETHER_ADDR_LEN);
1281 }
1282
1283 /*
1284  * Programs the XGMAC based on the settings in the ifnet.  These settings
1285  * include MTU, MAC address, mcast addresses, etc.
1286  */
1287 static void
1288 cxgb_update_mac_settings(struct port_info *p)
1289 {
1290         struct ifnet *ifp = p->ifp;
1291         struct t3_rx_mode rm;
1292         struct cmac *mac = &p->mac;
1293         int mtu, hwtagging;
1294
1295         PORT_LOCK_ASSERT_OWNED(p);
1296
1297         bcopy(IF_LLADDR(ifp), p->hw_addr, ETHER_ADDR_LEN);
1298
1299         mtu = ifp->if_mtu;
1300         if (ifp->if_capenable & IFCAP_VLAN_MTU)
1301                 mtu += ETHER_VLAN_ENCAP_LEN;
1302
1303         hwtagging = (ifp->if_capenable & IFCAP_VLAN_HWTAGGING) != 0;
1304
1305         t3_mac_set_mtu(mac, mtu);
1306         t3_set_vlan_accel(p->adapter, 1 << p->tx_chan, hwtagging);
1307         t3_mac_set_address(mac, 0, p->hw_addr);
1308         t3_init_rx_mode(&rm, p);
1309         t3_mac_set_rx_mode(mac, &rm);
1310 }
1311
1312
1313 static int
1314 await_mgmt_replies(struct adapter *adap, unsigned long init_cnt,
1315                               unsigned long n)
1316 {
1317         int attempts = 5;
1318
1319         while (adap->sge.qs[0].rspq.offload_pkts < init_cnt + n) {
1320                 if (!--attempts)
1321                         return (ETIMEDOUT);
1322                 t3_os_sleep(10);
1323         }
1324         return 0;
1325 }
1326
1327 static int
1328 init_tp_parity(struct adapter *adap)
1329 {
1330         int i;
1331         struct mbuf *m;
1332         struct cpl_set_tcb_field *greq;
1333         unsigned long cnt = adap->sge.qs[0].rspq.offload_pkts;
1334
1335         t3_tp_set_offload_mode(adap, 1);
1336
1337         for (i = 0; i < 16; i++) {
1338                 struct cpl_smt_write_req *req;
1339
1340                 m = m_gethdr(M_WAITOK, MT_DATA);
1341                 req = mtod(m, struct cpl_smt_write_req *);
1342                 m->m_len = m->m_pkthdr.len = sizeof(*req);
1343                 memset(req, 0, sizeof(*req));
1344                 req->wr.wrh_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD));
1345                 OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_SMT_WRITE_REQ, i));
1346                 req->iff = i;
1347                 t3_mgmt_tx(adap, m);
1348         }
1349
1350         for (i = 0; i < 2048; i++) {
1351                 struct cpl_l2t_write_req *req;
1352
1353                 m = m_gethdr(M_WAITOK, MT_DATA);
1354                 req = mtod(m, struct cpl_l2t_write_req *);
1355                 m->m_len = m->m_pkthdr.len = sizeof(*req);
1356                 memset(req, 0, sizeof(*req));
1357                 req->wr.wrh_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD));
1358                 OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_L2T_WRITE_REQ, i));
1359                 req->params = htonl(V_L2T_W_IDX(i));
1360                 t3_mgmt_tx(adap, m);
1361         }
1362
1363         for (i = 0; i < 2048; i++) {
1364                 struct cpl_rte_write_req *req;
1365
1366                 m = m_gethdr(M_WAITOK, MT_DATA);
1367                 req = mtod(m, struct cpl_rte_write_req *);
1368                 m->m_len = m->m_pkthdr.len = sizeof(*req);
1369                 memset(req, 0, sizeof(*req));
1370                 req->wr.wrh_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD));
1371                 OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_RTE_WRITE_REQ, i));
1372                 req->l2t_idx = htonl(V_L2T_W_IDX(i));
1373                 t3_mgmt_tx(adap, m);
1374         }
1375
1376         m = m_gethdr(M_WAITOK, MT_DATA);
1377         greq = mtod(m, struct cpl_set_tcb_field *);
1378         m->m_len = m->m_pkthdr.len = sizeof(*greq);
1379         memset(greq, 0, sizeof(*greq));
1380         greq->wr.wrh_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD));
1381         OPCODE_TID(greq) = htonl(MK_OPCODE_TID(CPL_SET_TCB_FIELD, 0));
1382         greq->mask = htobe64(1);
1383         t3_mgmt_tx(adap, m);
1384
1385         i = await_mgmt_replies(adap, cnt, 16 + 2048 + 2048 + 1);
1386         t3_tp_set_offload_mode(adap, 0);
1387         return (i);
1388 }
1389
1390 /**
1391  *      setup_rss - configure Receive Side Steering (per-queue connection demux) 
1392  *      @adap: the adapter
1393  *
1394  *      Sets up RSS to distribute packets to multiple receive queues.  We
1395  *      configure the RSS CPU lookup table to distribute to the number of HW
1396  *      receive queues, and the response queue lookup table to narrow that
1397  *      down to the response queues actually configured for each port.
1398  *      We always configure the RSS mapping for two ports since the mapping
1399  *      table has plenty of entries.
1400  */
1401 static void
1402 setup_rss(adapter_t *adap)
1403 {
1404         int i;
1405         u_int nq[2]; 
1406         uint8_t cpus[SGE_QSETS + 1];
1407         uint16_t rspq_map[RSS_TABLE_SIZE];
1408         
1409         for (i = 0; i < SGE_QSETS; ++i)
1410                 cpus[i] = i;
1411         cpus[SGE_QSETS] = 0xff;
1412
1413         nq[0] = nq[1] = 0;
1414         for_each_port(adap, i) {
1415                 const struct port_info *pi = adap2pinfo(adap, i);
1416
1417                 nq[pi->tx_chan] += pi->nqsets;
1418         }
1419         for (i = 0; i < RSS_TABLE_SIZE / 2; ++i) {
1420                 rspq_map[i] = nq[0] ? i % nq[0] : 0;
1421                 rspq_map[i + RSS_TABLE_SIZE / 2] = nq[1] ? i % nq[1] + nq[0] : 0;
1422         }
1423
1424         /* Calculate the reverse RSS map table */
1425         for (i = 0; i < SGE_QSETS; ++i)
1426                 adap->rrss_map[i] = 0xff;
1427         for (i = 0; i < RSS_TABLE_SIZE; ++i)
1428                 if (adap->rrss_map[rspq_map[i]] == 0xff)
1429                         adap->rrss_map[rspq_map[i]] = i;
1430
1431         t3_config_rss(adap, F_RQFEEDBACKENABLE | F_TNLLKPEN | F_TNLMAPEN |
1432                       F_TNLPRTEN | F_TNL2TUPEN | F_TNL4TUPEN | F_OFDMAPEN |
1433                       F_RRCPLMAPEN | V_RRCPLCPUSIZE(6) | F_HASHTOEPLITZ,
1434                       cpus, rspq_map);
1435
1436 }
1437
1438 /*
1439  * Sends an mbuf to an offload queue driver
1440  * after dealing with any active network taps.
1441  */
1442 static inline int
1443 offload_tx(struct t3cdev *tdev, struct mbuf *m)
1444 {
1445         int ret;
1446
1447         ret = t3_offload_tx(tdev, m);
1448         return (ret);
1449 }
1450
1451 static int
1452 write_smt_entry(struct adapter *adapter, int idx)
1453 {
1454         struct port_info *pi = &adapter->port[idx];
1455         struct cpl_smt_write_req *req;
1456         struct mbuf *m;
1457
1458         if ((m = m_gethdr(M_NOWAIT, MT_DATA)) == NULL)
1459                 return (ENOMEM);
1460
1461         req = mtod(m, struct cpl_smt_write_req *);
1462         m->m_pkthdr.len = m->m_len = sizeof(struct cpl_smt_write_req);
1463         
1464         req->wr.wrh_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD));
1465         OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_SMT_WRITE_REQ, idx));
1466         req->mtu_idx = NMTUS - 1;  /* should be 0 but there's a T3 bug */
1467         req->iff = idx;
1468         memset(req->src_mac1, 0, sizeof(req->src_mac1));
1469         memcpy(req->src_mac0, pi->hw_addr, ETHER_ADDR_LEN);
1470
1471         m_set_priority(m, 1);
1472
1473         offload_tx(&adapter->tdev, m);
1474
1475         return (0);
1476 }
1477
1478 static int
1479 init_smt(struct adapter *adapter)
1480 {
1481         int i;
1482
1483         for_each_port(adapter, i)
1484                 write_smt_entry(adapter, i);
1485         return 0;
1486 }
1487
1488 static void
1489 init_port_mtus(adapter_t *adapter)
1490 {
1491         unsigned int mtus = ETHERMTU | (ETHERMTU << 16);
1492
1493         t3_write_reg(adapter, A_TP_MTU_PORT_TABLE, mtus);
1494 }
1495
1496 static void
1497 send_pktsched_cmd(struct adapter *adap, int sched, int qidx, int lo,
1498                               int hi, int port)
1499 {
1500         struct mbuf *m;
1501         struct mngt_pktsched_wr *req;
1502
1503         m = m_gethdr(M_DONTWAIT, MT_DATA);
1504         if (m) {        
1505                 req = mtod(m, struct mngt_pktsched_wr *);
1506                 req->wr.wrh_hi = htonl(V_WR_OP(FW_WROPCODE_MNGT));
1507                 req->mngt_opcode = FW_MNGTOPCODE_PKTSCHED_SET;
1508                 req->sched = sched;
1509                 req->idx = qidx;
1510                 req->min = lo;
1511                 req->max = hi;
1512                 req->binding = port;
1513                 m->m_len = m->m_pkthdr.len = sizeof(*req);
1514                 t3_mgmt_tx(adap, m);
1515         }
1516 }
1517
1518 static void
1519 bind_qsets(adapter_t *sc)
1520 {
1521         int i, j;
1522
1523         for (i = 0; i < (sc)->params.nports; ++i) {
1524                 const struct port_info *pi = adap2pinfo(sc, i);
1525
1526                 for (j = 0; j < pi->nqsets; ++j) {
1527                         send_pktsched_cmd(sc, 1, pi->first_qset + j, -1,
1528                                           -1, pi->tx_chan);
1529
1530                 }
1531         }
1532 }
1533
1534 static void
1535 update_tpeeprom(struct adapter *adap)
1536 {
1537         const struct firmware *tpeeprom;
1538
1539         uint32_t version;
1540         unsigned int major, minor;
1541         int ret, len;
1542         char rev, name[32];
1543
1544         t3_seeprom_read(adap, TP_SRAM_OFFSET, &version);
1545
1546         major = G_TP_VERSION_MAJOR(version);
1547         minor = G_TP_VERSION_MINOR(version);
1548         if (major == TP_VERSION_MAJOR  && minor == TP_VERSION_MINOR)
1549                 return; 
1550
1551         rev = t3rev2char(adap);
1552         snprintf(name, sizeof(name), TPEEPROM_NAME, rev);
1553
1554         tpeeprom = firmware_get(name);
1555         if (tpeeprom == NULL) {
1556                 device_printf(adap->dev,
1557                               "could not load TP EEPROM: unable to load %s\n",
1558                               name);
1559                 return;
1560         }
1561
1562         len = tpeeprom->datasize - 4;
1563         
1564         ret = t3_check_tpsram(adap, tpeeprom->data, tpeeprom->datasize);
1565         if (ret)
1566                 goto release_tpeeprom;
1567
1568         if (len != TP_SRAM_LEN) {
1569                 device_printf(adap->dev,
1570                               "%s length is wrong len=%d expected=%d\n", name,
1571                               len, TP_SRAM_LEN);
1572                 return;
1573         }
1574         
1575         ret = set_eeprom(&adap->port[0], tpeeprom->data, tpeeprom->datasize,
1576             TP_SRAM_OFFSET);
1577         
1578         if (!ret) {
1579                 device_printf(adap->dev,
1580                         "Protocol SRAM image updated in EEPROM to %d.%d.%d\n",
1581                          TP_VERSION_MAJOR, TP_VERSION_MINOR, TP_VERSION_MICRO);
1582         } else 
1583                 device_printf(adap->dev,
1584                               "Protocol SRAM image update in EEPROM failed\n");
1585
1586 release_tpeeprom:
1587         firmware_put(tpeeprom, FIRMWARE_UNLOAD);
1588         
1589         return;
1590 }
1591
1592 static int
1593 update_tpsram(struct adapter *adap)
1594 {
1595         const struct firmware *tpsram;
1596         int ret;
1597         char rev, name[32];
1598
1599         rev = t3rev2char(adap);
1600         snprintf(name, sizeof(name), TPSRAM_NAME, rev);
1601
1602         update_tpeeprom(adap);
1603
1604         tpsram = firmware_get(name);
1605         if (tpsram == NULL){
1606                 device_printf(adap->dev, "could not load TP SRAM\n");
1607                 return (EINVAL);
1608         } else
1609                 device_printf(adap->dev, "updating TP SRAM\n");
1610         
1611         ret = t3_check_tpsram(adap, tpsram->data, tpsram->datasize);
1612         if (ret)
1613                 goto release_tpsram;    
1614
1615         ret = t3_set_proto_sram(adap, tpsram->data);
1616         if (ret)
1617                 device_printf(adap->dev, "loading protocol SRAM failed\n");
1618
1619 release_tpsram:
1620         firmware_put(tpsram, FIRMWARE_UNLOAD);
1621         
1622         return ret;
1623 }
1624
1625 /**
1626  *      cxgb_up - enable the adapter
1627  *      @adap: adapter being enabled
1628  *
1629  *      Called when the first port is enabled, this function performs the
1630  *      actions necessary to make an adapter operational, such as completing
1631  *      the initialization of HW modules, and enabling interrupts.
1632  */
1633 static int
1634 cxgb_up(struct adapter *sc)
1635 {
1636         int err = 0;
1637         unsigned int mxf = t3_mc5_size(&sc->mc5) - MC5_MIN_TIDS;
1638
1639         KASSERT(sc->open_device_map == 0, ("%s: device(s) already open (%x)",
1640                                            __func__, sc->open_device_map));
1641
1642         if ((sc->flags & FULL_INIT_DONE) == 0) {
1643
1644                 ADAPTER_LOCK_ASSERT_NOTOWNED(sc);
1645
1646                 if ((sc->flags & FW_UPTODATE) == 0)
1647                         if ((err = upgrade_fw(sc)))
1648                                 goto out;
1649
1650                 if ((sc->flags & TPS_UPTODATE) == 0)
1651                         if ((err = update_tpsram(sc)))
1652                                 goto out;
1653
1654                 if (is_offload(sc) && nfilters != 0) {
1655                         sc->params.mc5.nservers = 0;
1656
1657                         if (nfilters < 0)
1658                                 sc->params.mc5.nfilters = mxf;
1659                         else
1660                                 sc->params.mc5.nfilters = min(nfilters, mxf);
1661                 }
1662
1663                 err = t3_init_hw(sc, 0);
1664                 if (err)
1665                         goto out;
1666
1667                 t3_set_reg_field(sc, A_TP_PARA_REG5, 0, F_RXDDPOFFINIT);
1668                 t3_write_reg(sc, A_ULPRX_TDDP_PSZ, V_HPZ0(PAGE_SHIFT - 12));
1669
1670                 err = setup_sge_qsets(sc);
1671                 if (err)
1672                         goto out;
1673
1674                 alloc_filters(sc);
1675                 setup_rss(sc);
1676
1677                 t3_intr_clear(sc);
1678                 err = cxgb_setup_interrupts(sc);
1679                 if (err)
1680                         goto out;
1681
1682                 t3_add_configured_sysctls(sc);
1683                 sc->flags |= FULL_INIT_DONE;
1684         }
1685
1686         t3_intr_clear(sc);
1687         t3_sge_start(sc);
1688         t3_intr_enable(sc);
1689
1690         if (sc->params.rev >= T3_REV_C && !(sc->flags & TP_PARITY_INIT) &&
1691             is_offload(sc) && init_tp_parity(sc) == 0)
1692                 sc->flags |= TP_PARITY_INIT;
1693
1694         if (sc->flags & TP_PARITY_INIT) {
1695                 t3_write_reg(sc, A_TP_INT_CAUSE, F_CMCACHEPERR | F_ARPLUTPERR);
1696                 t3_write_reg(sc, A_TP_INT_ENABLE, 0x7fbfffff);
1697         }
1698         
1699         if (!(sc->flags & QUEUES_BOUND)) {
1700                 bind_qsets(sc);
1701                 setup_hw_filters(sc);
1702                 sc->flags |= QUEUES_BOUND;              
1703         }
1704
1705         t3_sge_reset_adapter(sc);
1706 out:
1707         return (err);
1708 }
1709
1710 /*
1711  * Called when the last open device is closed.  Does NOT undo all of cxgb_up's
1712  * work.  Specifically, the resources grabbed under FULL_INIT_DONE are released
1713  * during controller_detach, not here.
1714  */
1715 static void
1716 cxgb_down(struct adapter *sc)
1717 {
1718         t3_sge_stop(sc);
1719         t3_intr_disable(sc);
1720 }
1721
1722 static int
1723 offload_open(struct port_info *pi)
1724 {
1725         struct adapter *sc = pi->adapter;
1726         struct t3cdev *tdev = &sc->tdev;
1727
1728         setbit(&sc->open_device_map, OFFLOAD_DEVMAP_BIT);
1729
1730         t3_tp_set_offload_mode(sc, 1);
1731         tdev->lldev = pi->ifp;
1732         init_port_mtus(sc);
1733         t3_load_mtus(sc, sc->params.mtus, sc->params.a_wnd, sc->params.b_wnd,
1734                      sc->params.rev == 0 ?  sc->port[0].ifp->if_mtu : 0xffff);
1735         init_smt(sc);
1736         cxgb_add_clients(tdev);
1737
1738         return (0);
1739 }
1740
1741 static int
1742 offload_close(struct t3cdev *tdev)
1743 {
1744         struct adapter *adapter = tdev2adap(tdev);
1745
1746         if (!isset(&adapter->open_device_map, OFFLOAD_DEVMAP_BIT))
1747                 return (0);
1748
1749         /* Call back all registered clients */
1750         cxgb_remove_clients(tdev);
1751
1752         tdev->lldev = NULL;
1753         cxgb_set_dummy_ops(tdev);
1754         t3_tp_set_offload_mode(adapter, 0);
1755
1756         clrbit(&adapter->open_device_map, OFFLOAD_DEVMAP_BIT);
1757
1758         return (0);
1759 }
1760
1761 /*
1762  * if_init for cxgb ports.
1763  */
1764 static void
1765 cxgb_init(void *arg)
1766 {
1767         struct port_info *p = arg;
1768         struct adapter *sc = p->adapter;
1769
1770         ADAPTER_LOCK(sc);
1771         cxgb_init_locked(p); /* releases adapter lock */
1772         ADAPTER_LOCK_ASSERT_NOTOWNED(sc);
1773 }
1774
1775 static int
1776 cxgb_init_locked(struct port_info *p)
1777 {
1778         struct adapter *sc = p->adapter;
1779         struct ifnet *ifp = p->ifp;
1780         struct cmac *mac = &p->mac;
1781         int i, rc = 0, may_sleep = 0, gave_up_lock = 0;
1782
1783         ADAPTER_LOCK_ASSERT_OWNED(sc);
1784
1785         while (!IS_DOOMED(p) && IS_BUSY(sc)) {
1786                 gave_up_lock = 1;
1787                 if (mtx_sleep(&sc->flags, &sc->lock, PCATCH, "cxgbinit", 0)) {
1788                         rc = EINTR;
1789                         goto done;
1790                 }
1791         }
1792         if (IS_DOOMED(p)) {
1793                 rc = ENXIO;
1794                 goto done;
1795         }
1796         KASSERT(!IS_BUSY(sc), ("%s: controller busy.", __func__));
1797
1798         /*
1799          * The code that runs during one-time adapter initialization can sleep
1800          * so it's important not to hold any locks across it.
1801          */
1802         may_sleep = sc->flags & FULL_INIT_DONE ? 0 : 1;
1803
1804         if (may_sleep) {
1805                 SET_BUSY(sc);
1806                 gave_up_lock = 1;
1807                 ADAPTER_UNLOCK(sc);
1808         }
1809
1810         if (sc->open_device_map == 0) {
1811                 if ((rc = cxgb_up(sc)) != 0)
1812                         goto done;
1813
1814                 if (is_offload(sc) && !ofld_disable && offload_open(p))
1815                         log(LOG_WARNING,
1816                             "Could not initialize offload capabilities\n");
1817         }
1818
1819         PORT_LOCK(p);
1820         if (isset(&sc->open_device_map, p->port_id) &&
1821             (ifp->if_drv_flags & IFF_DRV_RUNNING)) {
1822                 PORT_UNLOCK(p);
1823                 goto done;
1824         }
1825         t3_port_intr_enable(sc, p->port_id);
1826         if (!mac->multiport) 
1827                 t3_mac_init(mac);
1828         cxgb_update_mac_settings(p);
1829         t3_link_start(&p->phy, mac, &p->link_config);
1830         t3_mac_enable(mac, MAC_DIRECTION_RX | MAC_DIRECTION_TX);
1831         ifp->if_drv_flags |= IFF_DRV_RUNNING;
1832         ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
1833         PORT_UNLOCK(p);
1834
1835         for (i = p->first_qset; i < p->first_qset + p->nqsets; i++) {
1836                 struct sge_qset *qs = &sc->sge.qs[i];
1837                 struct sge_txq *txq = &qs->txq[TXQ_ETH];
1838
1839                 callout_reset_on(&txq->txq_watchdog, hz, cxgb_tx_watchdog, qs,
1840                                  txq->txq_watchdog.c_cpu);
1841         }
1842
1843         /* all ok */
1844         setbit(&sc->open_device_map, p->port_id);
1845         callout_reset(&p->link_check_ch,
1846             p->phy.caps & SUPPORTED_LINK_IRQ ?  hz * 3 : hz / 4,
1847             link_check_callout, p);
1848
1849 done:
1850         if (may_sleep) {
1851                 ADAPTER_LOCK(sc);
1852                 KASSERT(IS_BUSY(sc), ("%s: controller not busy.", __func__));
1853                 CLR_BUSY(sc);
1854         }
1855         if (gave_up_lock)
1856                 wakeup_one(&sc->flags);
1857         ADAPTER_UNLOCK(sc);
1858         return (rc);
1859 }
1860
1861 static int
1862 cxgb_uninit_locked(struct port_info *p)
1863 {
1864         struct adapter *sc = p->adapter;
1865         int rc;
1866
1867         ADAPTER_LOCK_ASSERT_OWNED(sc);
1868
1869         while (!IS_DOOMED(p) && IS_BUSY(sc)) {
1870                 if (mtx_sleep(&sc->flags, &sc->lock, PCATCH, "cxgbunin", 0)) {
1871                         rc = EINTR;
1872                         goto done;
1873                 }
1874         }
1875         if (IS_DOOMED(p)) {
1876                 rc = ENXIO;
1877                 goto done;
1878         }
1879         KASSERT(!IS_BUSY(sc), ("%s: controller busy.", __func__));
1880         SET_BUSY(sc);
1881         ADAPTER_UNLOCK(sc);
1882
1883         rc = cxgb_uninit_synchronized(p);
1884
1885         ADAPTER_LOCK(sc);
1886         KASSERT(IS_BUSY(sc), ("%s: controller not busy.", __func__));
1887         CLR_BUSY(sc);
1888         wakeup_one(&sc->flags);
1889 done:
1890         ADAPTER_UNLOCK(sc);
1891         return (rc);
1892 }
1893
1894 /*
1895  * Called on "ifconfig down", and from port_detach
1896  */
1897 static int
1898 cxgb_uninit_synchronized(struct port_info *pi)
1899 {
1900         struct adapter *sc = pi->adapter;
1901         struct ifnet *ifp = pi->ifp;
1902
1903         /*
1904          * taskqueue_drain may cause a deadlock if the adapter lock is held.
1905          */
1906         ADAPTER_LOCK_ASSERT_NOTOWNED(sc);
1907
1908         /*
1909          * Clear this port's bit from the open device map, and then drain all
1910          * the tasks that can access/manipulate this port's port_info or ifp.
1911          * We disable this port's interrupts here and so the the slow/ext
1912          * interrupt tasks won't be enqueued.  The tick task will continue to
1913          * be enqueued every second but the runs after this drain will not see
1914          * this port in the open device map.
1915          *
1916          * A well behaved task must take open_device_map into account and ignore
1917          * ports that are not open.
1918          */
1919         clrbit(&sc->open_device_map, pi->port_id);
1920         t3_port_intr_disable(sc, pi->port_id);
1921         taskqueue_drain(sc->tq, &sc->slow_intr_task);
1922         taskqueue_drain(sc->tq, &sc->tick_task);
1923
1924         callout_drain(&pi->link_check_ch);
1925         taskqueue_drain(sc->tq, &pi->link_check_task);
1926
1927         PORT_LOCK(pi);
1928         ifp->if_drv_flags &= ~(IFF_DRV_RUNNING | IFF_DRV_OACTIVE);
1929
1930         /* disable pause frames */
1931         t3_set_reg_field(sc, A_XGM_TX_CFG + pi->mac.offset, F_TXPAUSEEN, 0);
1932
1933         /* Reset RX FIFO HWM */
1934         t3_set_reg_field(sc, A_XGM_RXFIFO_CFG +  pi->mac.offset,
1935                          V_RXFIFOPAUSEHWM(M_RXFIFOPAUSEHWM), 0);
1936
1937         DELAY(100 * 1000);
1938
1939         /* Wait for TXFIFO empty */
1940         t3_wait_op_done(sc, A_XGM_TXFIFO_CFG + pi->mac.offset,
1941                         F_TXFIFO_EMPTY, 1, 20, 5);
1942
1943         DELAY(100 * 1000);
1944         t3_mac_disable(&pi->mac, MAC_DIRECTION_RX);
1945
1946
1947         pi->phy.ops->power_down(&pi->phy, 1);
1948
1949         PORT_UNLOCK(pi);
1950
1951         pi->link_config.link_ok = 0;
1952         t3_os_link_changed(sc, pi->port_id, 0, 0, 0, 0, 0);
1953
1954         if ((sc->open_device_map & PORT_MASK) == 0)
1955                 offload_close(&sc->tdev);
1956
1957         if (sc->open_device_map == 0)
1958                 cxgb_down(pi->adapter);
1959
1960         return (0);
1961 }
1962
1963 /*
1964  * Mark lro enabled or disabled in all qsets for this port
1965  */
1966 static int
1967 cxgb_set_lro(struct port_info *p, int enabled)
1968 {
1969         int i;
1970         struct adapter *adp = p->adapter;
1971         struct sge_qset *q;
1972
1973         for (i = 0; i < p->nqsets; i++) {
1974                 q = &adp->sge.qs[p->first_qset + i];
1975                 q->lro.enabled = (enabled != 0);
1976         }
1977         return (0);
1978 }
1979
1980 static int
1981 cxgb_ioctl(struct ifnet *ifp, unsigned long command, caddr_t data)
1982 {
1983         struct port_info *p = ifp->if_softc;
1984         struct adapter *sc = p->adapter;
1985         struct ifreq *ifr = (struct ifreq *)data;
1986         int flags, error = 0, mtu;
1987         uint32_t mask;
1988
1989         switch (command) {
1990         case SIOCSIFMTU:
1991                 ADAPTER_LOCK(sc);
1992                 error = IS_DOOMED(p) ? ENXIO : (IS_BUSY(sc) ? EBUSY : 0);
1993                 if (error) {
1994 fail:
1995                         ADAPTER_UNLOCK(sc);
1996                         return (error);
1997                 }
1998
1999                 mtu = ifr->ifr_mtu;
2000                 if ((mtu < ETHERMIN) || (mtu > ETHERMTU_JUMBO)) {
2001                         error = EINVAL;
2002                 } else {
2003                         ifp->if_mtu = mtu;
2004                         PORT_LOCK(p);
2005                         cxgb_update_mac_settings(p);
2006                         PORT_UNLOCK(p);
2007                 }
2008                 ADAPTER_UNLOCK(sc);
2009                 break;
2010         case SIOCSIFFLAGS:
2011                 ADAPTER_LOCK(sc);
2012                 if (IS_DOOMED(p)) {
2013                         error = ENXIO;
2014                         goto fail;
2015                 }
2016                 if (ifp->if_flags & IFF_UP) {
2017                         if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
2018                                 flags = p->if_flags;
2019                                 if (((ifp->if_flags ^ flags) & IFF_PROMISC) ||
2020                                     ((ifp->if_flags ^ flags) & IFF_ALLMULTI)) {
2021                                         if (IS_BUSY(sc)) {
2022                                                 error = EBUSY;
2023                                                 goto fail;
2024                                         }
2025                                         PORT_LOCK(p);
2026                                         cxgb_update_mac_settings(p);
2027                                         PORT_UNLOCK(p);
2028                                 }
2029                                 ADAPTER_UNLOCK(sc);
2030                         } else
2031                                 error = cxgb_init_locked(p);
2032                         p->if_flags = ifp->if_flags;
2033                 } else if (ifp->if_drv_flags & IFF_DRV_RUNNING)
2034                         error = cxgb_uninit_locked(p);
2035                 else
2036                         ADAPTER_UNLOCK(sc);
2037
2038                 ADAPTER_LOCK_ASSERT_NOTOWNED(sc);
2039                 break;
2040         case SIOCADDMULTI:
2041         case SIOCDELMULTI:
2042                 ADAPTER_LOCK(sc);
2043                 error = IS_DOOMED(p) ? ENXIO : (IS_BUSY(sc) ? EBUSY : 0);
2044                 if (error)
2045                         goto fail;
2046
2047                 if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
2048                         PORT_LOCK(p);
2049                         cxgb_update_mac_settings(p);
2050                         PORT_UNLOCK(p);
2051                 }
2052                 ADAPTER_UNLOCK(sc);
2053
2054                 break;
2055         case SIOCSIFCAP:
2056                 ADAPTER_LOCK(sc);
2057                 error = IS_DOOMED(p) ? ENXIO : (IS_BUSY(sc) ? EBUSY : 0);
2058                 if (error)
2059                         goto fail;
2060
2061                 mask = ifr->ifr_reqcap ^ ifp->if_capenable;
2062                 if (mask & IFCAP_TXCSUM) {
2063                         ifp->if_capenable ^= IFCAP_TXCSUM;
2064                         ifp->if_hwassist ^= (CSUM_TCP | CSUM_UDP | CSUM_IP);
2065
2066                         if (IFCAP_TSO & ifp->if_capenable &&
2067                             !(IFCAP_TXCSUM & ifp->if_capenable)) {
2068                                 ifp->if_capenable &= ~IFCAP_TSO;
2069                                 ifp->if_hwassist &= ~CSUM_TSO;
2070                                 if_printf(ifp,
2071                                     "tso disabled due to -txcsum.\n");
2072                         }
2073                 }
2074                 if (mask & IFCAP_RXCSUM)
2075                         ifp->if_capenable ^= IFCAP_RXCSUM;
2076                 if (mask & IFCAP_TSO4) {
2077                         ifp->if_capenable ^= IFCAP_TSO4;
2078
2079                         if (IFCAP_TSO & ifp->if_capenable) {
2080                                 if (IFCAP_TXCSUM & ifp->if_capenable)
2081                                         ifp->if_hwassist |= CSUM_TSO;
2082                                 else {
2083                                         ifp->if_capenable &= ~IFCAP_TSO;
2084                                         ifp->if_hwassist &= ~CSUM_TSO;
2085                                         if_printf(ifp,
2086                                             "enable txcsum first.\n");
2087                                         error = EAGAIN;
2088                                 }
2089                         } else
2090                                 ifp->if_hwassist &= ~CSUM_TSO;
2091                 }
2092                 if (mask & IFCAP_LRO) {
2093                         ifp->if_capenable ^= IFCAP_LRO;
2094
2095                         /* Safe to do this even if cxgb_up not called yet */
2096                         cxgb_set_lro(p, ifp->if_capenable & IFCAP_LRO);
2097                 }
2098                 if (mask & IFCAP_VLAN_HWTAGGING) {
2099                         ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING;
2100                         if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
2101                                 PORT_LOCK(p);
2102                                 cxgb_update_mac_settings(p);
2103                                 PORT_UNLOCK(p);
2104                         }
2105                 }
2106                 if (mask & IFCAP_VLAN_MTU) {
2107                         ifp->if_capenable ^= IFCAP_VLAN_MTU;
2108                         if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
2109                                 PORT_LOCK(p);
2110                                 cxgb_update_mac_settings(p);
2111                                 PORT_UNLOCK(p);
2112                         }
2113                 }
2114                 if (mask & IFCAP_VLAN_HWTSO)
2115                         ifp->if_capenable ^= IFCAP_VLAN_HWTSO;
2116                 if (mask & IFCAP_VLAN_HWCSUM)
2117                         ifp->if_capenable ^= IFCAP_VLAN_HWCSUM;
2118
2119 #ifdef VLAN_CAPABILITIES
2120                 VLAN_CAPABILITIES(ifp);
2121 #endif
2122                 ADAPTER_UNLOCK(sc);
2123                 break;
2124         case SIOCSIFMEDIA:
2125         case SIOCGIFMEDIA:
2126                 error = ifmedia_ioctl(ifp, ifr, &p->media, command);
2127                 break;
2128         default:
2129                 error = ether_ioctl(ifp, command, data);
2130         }
2131
2132         return (error);
2133 }
2134
2135 static int
2136 cxgb_media_change(struct ifnet *ifp)
2137 {
2138         return (EOPNOTSUPP);
2139 }
2140
2141 /*
2142  * Translates phy->modtype to the correct Ethernet media subtype.
2143  */
2144 static int
2145 cxgb_ifm_type(int mod)
2146 {
2147         switch (mod) {
2148         case phy_modtype_sr:
2149                 return (IFM_10G_SR);
2150         case phy_modtype_lr:
2151                 return (IFM_10G_LR);
2152         case phy_modtype_lrm:
2153                 return (IFM_10G_LRM);
2154         case phy_modtype_twinax:
2155                 return (IFM_10G_TWINAX);
2156         case phy_modtype_twinax_long:
2157                 return (IFM_10G_TWINAX_LONG);
2158         case phy_modtype_none:
2159                 return (IFM_NONE);
2160         case phy_modtype_unknown:
2161                 return (IFM_UNKNOWN);
2162         }
2163
2164         KASSERT(0, ("%s: modtype %d unknown", __func__, mod));
2165         return (IFM_UNKNOWN);
2166 }
2167
2168 /*
2169  * Rebuilds the ifmedia list for this port, and sets the current media.
2170  */
2171 static void
2172 cxgb_build_medialist(struct port_info *p)
2173 {
2174         struct cphy *phy = &p->phy;
2175         struct ifmedia *media = &p->media;
2176         int mod = phy->modtype;
2177         int m = IFM_ETHER | IFM_FDX;
2178
2179         PORT_LOCK(p);
2180
2181         ifmedia_removeall(media);
2182         if (phy->caps & SUPPORTED_TP && phy->caps & SUPPORTED_Autoneg) {
2183                 /* Copper (RJ45) */
2184
2185                 if (phy->caps & SUPPORTED_10000baseT_Full)
2186                         ifmedia_add(media, m | IFM_10G_T, mod, NULL);
2187
2188                 if (phy->caps & SUPPORTED_1000baseT_Full)
2189                         ifmedia_add(media, m | IFM_1000_T, mod, NULL);
2190
2191                 if (phy->caps & SUPPORTED_100baseT_Full)
2192                         ifmedia_add(media, m | IFM_100_TX, mod, NULL);
2193
2194                 if (phy->caps & SUPPORTED_10baseT_Full)
2195                         ifmedia_add(media, m | IFM_10_T, mod, NULL);
2196
2197                 ifmedia_add(media, IFM_ETHER | IFM_AUTO, mod, NULL);
2198                 ifmedia_set(media, IFM_ETHER | IFM_AUTO);
2199
2200         } else if (phy->caps & SUPPORTED_TP) {
2201                 /* Copper (CX4) */
2202
2203                 KASSERT(phy->caps & SUPPORTED_10000baseT_Full,
2204                         ("%s: unexpected cap 0x%x", __func__, phy->caps));
2205
2206                 ifmedia_add(media, m | IFM_10G_CX4, mod, NULL);
2207                 ifmedia_set(media, m | IFM_10G_CX4);
2208
2209         } else if (phy->caps & SUPPORTED_FIBRE &&
2210                    phy->caps & SUPPORTED_10000baseT_Full) {
2211                 /* 10G optical (but includes SFP+ twinax) */
2212
2213                 m |= cxgb_ifm_type(mod);
2214                 if (IFM_SUBTYPE(m) == IFM_NONE)
2215                         m &= ~IFM_FDX;
2216
2217                 ifmedia_add(media, m, mod, NULL);
2218                 ifmedia_set(media, m);
2219
2220         } else if (phy->caps & SUPPORTED_FIBRE &&
2221                    phy->caps & SUPPORTED_1000baseT_Full) {
2222                 /* 1G optical */
2223
2224                 /* XXX: Lie and claim to be SX, could actually be any 1G-X */
2225                 ifmedia_add(media, m | IFM_1000_SX, mod, NULL);
2226                 ifmedia_set(media, m | IFM_1000_SX);
2227
2228         } else {
2229                 KASSERT(0, ("%s: don't know how to handle 0x%x.", __func__,
2230                             phy->caps));
2231         }
2232
2233         PORT_UNLOCK(p);
2234 }
2235
2236 static void
2237 cxgb_media_status(struct ifnet *ifp, struct ifmediareq *ifmr)
2238 {
2239         struct port_info *p = ifp->if_softc;
2240         struct ifmedia_entry *cur = p->media.ifm_cur;
2241         int speed = p->link_config.speed;
2242
2243         if (cur->ifm_data != p->phy.modtype) {
2244                 cxgb_build_medialist(p);
2245                 cur = p->media.ifm_cur;
2246         }
2247
2248         ifmr->ifm_status = IFM_AVALID;
2249         if (!p->link_config.link_ok)
2250                 return;
2251
2252         ifmr->ifm_status |= IFM_ACTIVE;
2253
2254         /*
2255          * active and current will differ iff current media is autoselect.  That
2256          * can happen only for copper RJ45.
2257          */
2258         if (IFM_SUBTYPE(cur->ifm_media) != IFM_AUTO)
2259                 return;
2260         KASSERT(p->phy.caps & SUPPORTED_TP && p->phy.caps & SUPPORTED_Autoneg,
2261                 ("%s: unexpected PHY caps 0x%x", __func__, p->phy.caps));
2262
2263         ifmr->ifm_active = IFM_ETHER | IFM_FDX;
2264         if (speed == SPEED_10000)
2265                 ifmr->ifm_active |= IFM_10G_T;
2266         else if (speed == SPEED_1000)
2267                 ifmr->ifm_active |= IFM_1000_T;
2268         else if (speed == SPEED_100)
2269                 ifmr->ifm_active |= IFM_100_TX;
2270         else if (speed == SPEED_10)
2271                 ifmr->ifm_active |= IFM_10_T;
2272         else
2273                 KASSERT(0, ("%s: link up but speed unknown (%u)", __func__,
2274                             speed));
2275 }
2276
2277 static void
2278 cxgb_async_intr(void *data)
2279 {
2280         adapter_t *sc = data;
2281
2282         t3_write_reg(sc, A_PL_INT_ENABLE0, 0);
2283         (void) t3_read_reg(sc, A_PL_INT_ENABLE0);
2284         taskqueue_enqueue(sc->tq, &sc->slow_intr_task);
2285 }
2286
2287 static void
2288 link_check_callout(void *arg)
2289 {
2290         struct port_info *pi = arg;
2291         struct adapter *sc = pi->adapter;
2292
2293         if (!isset(&sc->open_device_map, pi->port_id))
2294                 return;
2295
2296         taskqueue_enqueue(sc->tq, &pi->link_check_task);
2297 }
2298
2299 static void
2300 check_link_status(void *arg, int pending)
2301 {
2302         struct port_info *pi = arg;
2303         struct adapter *sc = pi->adapter;
2304
2305         if (!isset(&sc->open_device_map, pi->port_id))
2306                 return;
2307
2308         t3_link_changed(sc, pi->port_id);
2309
2310         if (pi->link_fault || !(pi->phy.caps & SUPPORTED_LINK_IRQ))
2311                 callout_reset(&pi->link_check_ch, hz, link_check_callout, pi);
2312 }
2313
2314 void
2315 t3_os_link_intr(struct port_info *pi)
2316 {
2317         /*
2318          * Schedule a link check in the near future.  If the link is flapping
2319          * rapidly we'll keep resetting the callout and delaying the check until
2320          * things stabilize a bit.
2321          */
2322         callout_reset(&pi->link_check_ch, hz / 4, link_check_callout, pi);
2323 }
2324
2325 static void
2326 check_t3b2_mac(struct adapter *sc)
2327 {
2328         int i;
2329
2330         if (sc->flags & CXGB_SHUTDOWN)
2331                 return;
2332
2333         for_each_port(sc, i) {
2334                 struct port_info *p = &sc->port[i];
2335                 int status;
2336 #ifdef INVARIANTS
2337                 struct ifnet *ifp = p->ifp;
2338 #endif          
2339
2340                 if (!isset(&sc->open_device_map, p->port_id) || p->link_fault ||
2341                     !p->link_config.link_ok)
2342                         continue;
2343
2344                 KASSERT(ifp->if_drv_flags & IFF_DRV_RUNNING,
2345                         ("%s: state mismatch (drv_flags %x, device_map %x)",
2346                          __func__, ifp->if_drv_flags, sc->open_device_map));
2347
2348                 PORT_LOCK(p);
2349                 status = t3b2_mac_watchdog_task(&p->mac);
2350                 if (status == 1)
2351                         p->mac.stats.num_toggled++;
2352                 else if (status == 2) {
2353                         struct cmac *mac = &p->mac;
2354
2355                         cxgb_update_mac_settings(p);
2356                         t3_link_start(&p->phy, mac, &p->link_config);
2357                         t3_mac_enable(mac, MAC_DIRECTION_RX | MAC_DIRECTION_TX);
2358                         t3_port_intr_enable(sc, p->port_id);
2359                         p->mac.stats.num_resets++;
2360                 }
2361                 PORT_UNLOCK(p);
2362         }
2363 }
2364
2365 static void
2366 cxgb_tick(void *arg)
2367 {
2368         adapter_t *sc = (adapter_t *)arg;
2369
2370         if (sc->flags & CXGB_SHUTDOWN)
2371                 return;
2372
2373         taskqueue_enqueue(sc->tq, &sc->tick_task);      
2374         callout_reset(&sc->cxgb_tick_ch, hz, cxgb_tick, sc);
2375 }
2376
2377 static void
2378 cxgb_tick_handler(void *arg, int count)
2379 {
2380         adapter_t *sc = (adapter_t *)arg;
2381         const struct adapter_params *p = &sc->params;
2382         int i;
2383         uint32_t cause, reset;
2384
2385         if (sc->flags & CXGB_SHUTDOWN || !(sc->flags & FULL_INIT_DONE))
2386                 return;
2387
2388         if (p->rev == T3_REV_B2 && p->nports < 4 && sc->open_device_map) 
2389                 check_t3b2_mac(sc);
2390
2391         cause = t3_read_reg(sc, A_SG_INT_CAUSE) & (F_RSPQSTARVE | F_FLEMPTY);
2392         if (cause) {
2393                 struct sge_qset *qs = &sc->sge.qs[0];
2394                 uint32_t mask, v;
2395
2396                 v = t3_read_reg(sc, A_SG_RSPQ_FL_STATUS) & ~0xff00;
2397
2398                 mask = 1;
2399                 for (i = 0; i < SGE_QSETS; i++) {
2400                         if (v & mask)
2401                                 qs[i].rspq.starved++;
2402                         mask <<= 1;
2403                 }
2404
2405                 mask <<= SGE_QSETS; /* skip RSPQXDISABLED */
2406
2407                 for (i = 0; i < SGE_QSETS * 2; i++) {
2408                         if (v & mask) {
2409                                 qs[i / 2].fl[i % 2].empty++;
2410                         }
2411                         mask <<= 1;
2412                 }
2413
2414                 /* clear */
2415                 t3_write_reg(sc, A_SG_RSPQ_FL_STATUS, v);
2416                 t3_write_reg(sc, A_SG_INT_CAUSE, cause);
2417         }
2418
2419         for (i = 0; i < sc->params.nports; i++) {
2420                 struct port_info *pi = &sc->port[i];
2421                 struct ifnet *ifp = pi->ifp;
2422                 struct cmac *mac = &pi->mac;
2423                 struct mac_stats *mstats = &mac->stats;
2424                 int drops, j;
2425
2426                 if (!isset(&sc->open_device_map, pi->port_id))
2427                         continue;
2428
2429                 PORT_LOCK(pi);
2430                 t3_mac_update_stats(mac);
2431                 PORT_UNLOCK(pi);
2432
2433                 ifp->if_opackets = mstats->tx_frames;
2434                 ifp->if_ipackets = mstats->rx_frames;
2435                 ifp->if_obytes = mstats->tx_octets;
2436                 ifp->if_ibytes = mstats->rx_octets;
2437                 ifp->if_omcasts = mstats->tx_mcast_frames;
2438                 ifp->if_imcasts = mstats->rx_mcast_frames;
2439                 ifp->if_collisions = mstats->tx_total_collisions;
2440                 ifp->if_iqdrops = mstats->rx_cong_drops;
2441
2442                 drops = 0;
2443                 for (j = pi->first_qset; j < pi->first_qset + pi->nqsets; j++)
2444                         drops += sc->sge.qs[j].txq[TXQ_ETH].txq_mr->br_drops;
2445                 ifp->if_snd.ifq_drops = drops;
2446
2447                 ifp->if_oerrors =
2448                     mstats->tx_excess_collisions +
2449                     mstats->tx_underrun +
2450                     mstats->tx_len_errs +
2451                     mstats->tx_mac_internal_errs +
2452                     mstats->tx_excess_deferral +
2453                     mstats->tx_fcs_errs;
2454                 ifp->if_ierrors =
2455                     mstats->rx_jabber +
2456                     mstats->rx_data_errs +
2457                     mstats->rx_sequence_errs +
2458                     mstats->rx_runt + 
2459                     mstats->rx_too_long +
2460                     mstats->rx_mac_internal_errs +
2461                     mstats->rx_short +
2462                     mstats->rx_fcs_errs;
2463
2464                 if (mac->multiport)
2465                         continue;
2466
2467                 /* Count rx fifo overflows, once per second */
2468                 cause = t3_read_reg(sc, A_XGM_INT_CAUSE + mac->offset);
2469                 reset = 0;
2470                 if (cause & F_RXFIFO_OVERFLOW) {
2471                         mac->stats.rx_fifo_ovfl++;
2472                         reset |= F_RXFIFO_OVERFLOW;
2473                 }
2474                 t3_write_reg(sc, A_XGM_INT_CAUSE + mac->offset, reset);
2475         }
2476 }
2477
2478 static void
2479 touch_bars(device_t dev)
2480 {
2481         /*
2482          * Don't enable yet
2483          */
2484 #if !defined(__LP64__) && 0
2485         u32 v;
2486
2487         pci_read_config_dword(pdev, PCI_BASE_ADDRESS_1, &v);
2488         pci_write_config_dword(pdev, PCI_BASE_ADDRESS_1, v);
2489         pci_read_config_dword(pdev, PCI_BASE_ADDRESS_3, &v);
2490         pci_write_config_dword(pdev, PCI_BASE_ADDRESS_3, v);
2491         pci_read_config_dword(pdev, PCI_BASE_ADDRESS_5, &v);
2492         pci_write_config_dword(pdev, PCI_BASE_ADDRESS_5, v);
2493 #endif
2494 }
2495
2496 static int
2497 set_eeprom(struct port_info *pi, const uint8_t *data, int len, int offset)
2498 {
2499         uint8_t *buf;
2500         int err = 0;
2501         u32 aligned_offset, aligned_len, *p;
2502         struct adapter *adapter = pi->adapter;
2503
2504
2505         aligned_offset = offset & ~3;
2506         aligned_len = (len + (offset & 3) + 3) & ~3;
2507
2508         if (aligned_offset != offset || aligned_len != len) {
2509                 buf = malloc(aligned_len, M_DEVBUF, M_WAITOK|M_ZERO);              
2510                 if (!buf)
2511                         return (ENOMEM);
2512                 err = t3_seeprom_read(adapter, aligned_offset, (u32 *)buf);
2513                 if (!err && aligned_len > 4)
2514                         err = t3_seeprom_read(adapter,
2515                                               aligned_offset + aligned_len - 4,
2516                                               (u32 *)&buf[aligned_len - 4]);
2517                 if (err)
2518                         goto out;
2519                 memcpy(buf + (offset & 3), data, len);
2520         } else
2521                 buf = (uint8_t *)(uintptr_t)data;
2522
2523         err = t3_seeprom_wp(adapter, 0);
2524         if (err)
2525                 goto out;
2526
2527         for (p = (u32 *)buf; !err && aligned_len; aligned_len -= 4, p++) {
2528                 err = t3_seeprom_write(adapter, aligned_offset, *p);
2529                 aligned_offset += 4;
2530         }
2531
2532         if (!err)
2533                 err = t3_seeprom_wp(adapter, 1);
2534 out:
2535         if (buf != data)
2536                 free(buf, M_DEVBUF);
2537         return err;
2538 }
2539
2540
2541 static int
2542 in_range(int val, int lo, int hi)
2543 {
2544         return val < 0 || (val <= hi && val >= lo);
2545 }
2546
2547 static int
2548 cxgb_extension_open(struct cdev *dev, int flags, int fmp, struct thread *td)
2549 {
2550        return (0);
2551 }
2552
2553 static int
2554 cxgb_extension_close(struct cdev *dev, int flags, int fmt, struct thread *td)
2555 {
2556        return (0);
2557 }
2558
2559 static int
2560 cxgb_extension_ioctl(struct cdev *dev, unsigned long cmd, caddr_t data,
2561     int fflag, struct thread *td)
2562 {
2563         int mmd, error = 0;
2564         struct port_info *pi = dev->si_drv1;
2565         adapter_t *sc = pi->adapter;
2566
2567 #ifdef PRIV_SUPPORTED   
2568         if (priv_check(td, PRIV_DRIVER)) {
2569                 if (cxgb_debug) 
2570                         printf("user does not have access to privileged ioctls\n");
2571                 return (EPERM);
2572         }
2573 #else
2574         if (suser(td)) {
2575                 if (cxgb_debug)
2576                         printf("user does not have access to privileged ioctls\n");
2577                 return (EPERM);
2578         }
2579 #endif
2580         
2581         switch (cmd) {
2582         case CHELSIO_GET_MIIREG: {
2583                 uint32_t val;
2584                 struct cphy *phy = &pi->phy;
2585                 struct ch_mii_data *mid = (struct ch_mii_data *)data;
2586                 
2587                 if (!phy->mdio_read)
2588                         return (EOPNOTSUPP);
2589                 if (is_10G(sc)) {
2590                         mmd = mid->phy_id >> 8;
2591                         if (!mmd)
2592                                 mmd = MDIO_DEV_PCS;
2593                         else if (mmd > MDIO_DEV_VEND2)
2594                                 return (EINVAL);
2595
2596                         error = phy->mdio_read(sc, mid->phy_id & 0x1f, mmd,
2597                                              mid->reg_num, &val);
2598                 } else
2599                         error = phy->mdio_read(sc, mid->phy_id & 0x1f, 0,
2600                                              mid->reg_num & 0x1f, &val);
2601                 if (error == 0)
2602                         mid->val_out = val;
2603                 break;
2604         }
2605         case CHELSIO_SET_MIIREG: {
2606                 struct cphy *phy = &pi->phy;
2607                 struct ch_mii_data *mid = (struct ch_mii_data *)data;
2608
2609                 if (!phy->mdio_write)
2610                         return (EOPNOTSUPP);
2611                 if (is_10G(sc)) {
2612                         mmd = mid->phy_id >> 8;
2613                         if (!mmd)
2614                                 mmd = MDIO_DEV_PCS;
2615                         else if (mmd > MDIO_DEV_VEND2)
2616                                 return (EINVAL);
2617                         
2618                         error = phy->mdio_write(sc, mid->phy_id & 0x1f,
2619                                               mmd, mid->reg_num, mid->val_in);
2620                 } else
2621                         error = phy->mdio_write(sc, mid->phy_id & 0x1f, 0,
2622                                               mid->reg_num & 0x1f,
2623                                               mid->val_in);
2624                 break;
2625         }
2626         case CHELSIO_SETREG: {
2627                 struct ch_reg *edata = (struct ch_reg *)data;
2628                 if ((edata->addr & 0x3) != 0 || edata->addr >= sc->mmio_len)
2629                         return (EFAULT);
2630                 t3_write_reg(sc, edata->addr, edata->val);
2631                 break;
2632         }
2633         case CHELSIO_GETREG: {
2634                 struct ch_reg *edata = (struct ch_reg *)data;
2635                 if ((edata->addr & 0x3) != 0 || edata->addr >= sc->mmio_len)
2636                         return (EFAULT);
2637                 edata->val = t3_read_reg(sc, edata->addr);
2638                 break;
2639         }
2640         case CHELSIO_GET_SGE_CONTEXT: {
2641                 struct ch_cntxt *ecntxt = (struct ch_cntxt *)data;
2642                 mtx_lock_spin(&sc->sge.reg_lock);
2643                 switch (ecntxt->cntxt_type) {
2644                 case CNTXT_TYPE_EGRESS:
2645                         error = -t3_sge_read_ecntxt(sc, ecntxt->cntxt_id,
2646                             ecntxt->data);
2647                         break;
2648                 case CNTXT_TYPE_FL:
2649                         error = -t3_sge_read_fl(sc, ecntxt->cntxt_id,
2650                             ecntxt->data);
2651                         break;
2652                 case CNTXT_TYPE_RSP:
2653                         error = -t3_sge_read_rspq(sc, ecntxt->cntxt_id,
2654                             ecntxt->data);
2655                         break;
2656                 case CNTXT_TYPE_CQ:
2657                         error = -t3_sge_read_cq(sc, ecntxt->cntxt_id,
2658                             ecntxt->data);
2659                         break;
2660                 default:
2661                         error = EINVAL;
2662                         break;
2663                 }
2664                 mtx_unlock_spin(&sc->sge.reg_lock);
2665                 break;
2666         }
2667         case CHELSIO_GET_SGE_DESC: {
2668                 struct ch_desc *edesc = (struct ch_desc *)data;
2669                 int ret;
2670                 if (edesc->queue_num >= SGE_QSETS * 6)
2671                         return (EINVAL);
2672                 ret = t3_get_desc(&sc->sge.qs[edesc->queue_num / 6],
2673                     edesc->queue_num % 6, edesc->idx, edesc->data);
2674                 if (ret < 0)
2675                         return (EINVAL);
2676                 edesc->size = ret;
2677                 break;
2678         }
2679         case CHELSIO_GET_QSET_PARAMS: {
2680                 struct qset_params *q;
2681                 struct ch_qset_params *t = (struct ch_qset_params *)data;
2682                 int q1 = pi->first_qset;
2683                 int nqsets = pi->nqsets;
2684                 int i;
2685
2686                 if (t->qset_idx >= nqsets)
2687                         return EINVAL;
2688
2689                 i = q1 + t->qset_idx;
2690                 q = &sc->params.sge.qset[i];
2691                 t->rspq_size   = q->rspq_size;
2692                 t->txq_size[0] = q->txq_size[0];
2693                 t->txq_size[1] = q->txq_size[1];
2694                 t->txq_size[2] = q->txq_size[2];
2695                 t->fl_size[0]  = q->fl_size;
2696                 t->fl_size[1]  = q->jumbo_size;
2697                 t->polling     = q->polling;
2698                 t->lro         = q->lro;
2699                 t->intr_lat    = q->coalesce_usecs;
2700                 t->cong_thres  = q->cong_thres;
2701                 t->qnum        = i;
2702
2703                 if ((sc->flags & FULL_INIT_DONE) == 0)
2704                         t->vector = 0;
2705                 else if (sc->flags & USING_MSIX)
2706                         t->vector = rman_get_start(sc->msix_irq_res[i]);
2707                 else
2708                         t->vector = rman_get_start(sc->irq_res);
2709
2710                 break;
2711         }
2712         case CHELSIO_GET_QSET_NUM: {
2713                 struct ch_reg *edata = (struct ch_reg *)data;
2714                 edata->val = pi->nqsets;
2715                 break;
2716         }
2717         case CHELSIO_LOAD_FW: {
2718                 uint8_t *fw_data;
2719                 uint32_t vers;
2720                 struct ch_mem_range *t = (struct ch_mem_range *)data;
2721
2722                 /*
2723                  * You're allowed to load a firmware only before FULL_INIT_DONE
2724                  *
2725                  * FW_UPTODATE is also set so the rest of the initialization
2726                  * will not overwrite what was loaded here.  This gives you the
2727                  * flexibility to load any firmware (and maybe shoot yourself in
2728                  * the foot).
2729                  */
2730
2731                 ADAPTER_LOCK(sc);
2732                 if (sc->open_device_map || sc->flags & FULL_INIT_DONE) {
2733                         ADAPTER_UNLOCK(sc);
2734                         return (EBUSY);
2735                 }
2736
2737                 fw_data = malloc(t->len, M_DEVBUF, M_NOWAIT);
2738                 if (!fw_data)
2739                         error = ENOMEM;
2740                 else
2741                         error = copyin(t->buf, fw_data, t->len);
2742
2743                 if (!error)
2744                         error = -t3_load_fw(sc, fw_data, t->len);
2745
2746                 if (t3_get_fw_version(sc, &vers) == 0) {
2747                         snprintf(&sc->fw_version[0], sizeof(sc->fw_version),
2748                             "%d.%d.%d", G_FW_VERSION_MAJOR(vers),
2749                             G_FW_VERSION_MINOR(vers), G_FW_VERSION_MICRO(vers));
2750                 }
2751
2752                 if (!error)
2753                         sc->flags |= FW_UPTODATE;
2754
2755                 free(fw_data, M_DEVBUF);
2756                 ADAPTER_UNLOCK(sc);
2757                 break;
2758         }
2759         case CHELSIO_LOAD_BOOT: {
2760                 uint8_t *boot_data;
2761                 struct ch_mem_range *t = (struct ch_mem_range *)data;
2762
2763                 boot_data = malloc(t->len, M_DEVBUF, M_NOWAIT);
2764                 if (!boot_data)
2765                         return ENOMEM;
2766
2767                 error = copyin(t->buf, boot_data, t->len);
2768                 if (!error)
2769                         error = -t3_load_boot(sc, boot_data, t->len);
2770
2771                 free(boot_data, M_DEVBUF);
2772                 break;
2773         }
2774         case CHELSIO_GET_PM: {
2775                 struct ch_pm *m = (struct ch_pm *)data;
2776                 struct tp_params *p = &sc->params.tp;
2777
2778                 if (!is_offload(sc))
2779                         return (EOPNOTSUPP);
2780
2781                 m->tx_pg_sz = p->tx_pg_size;
2782                 m->tx_num_pg = p->tx_num_pgs;
2783                 m->rx_pg_sz  = p->rx_pg_size;
2784                 m->rx_num_pg = p->rx_num_pgs;
2785                 m->pm_total  = p->pmtx_size + p->chan_rx_size * p->nchan;
2786
2787                 break;
2788         }
2789         case CHELSIO_SET_PM: {
2790                 struct ch_pm *m = (struct ch_pm *)data;
2791                 struct tp_params *p = &sc->params.tp;
2792
2793                 if (!is_offload(sc))
2794                         return (EOPNOTSUPP);
2795                 if (sc->flags & FULL_INIT_DONE)
2796                         return (EBUSY);
2797
2798                 if (!m->rx_pg_sz || (m->rx_pg_sz & (m->rx_pg_sz - 1)) ||
2799                     !m->tx_pg_sz || (m->tx_pg_sz & (m->tx_pg_sz - 1)))
2800                         return (EINVAL);        /* not power of 2 */
2801                 if (!(m->rx_pg_sz & 0x14000))
2802                         return (EINVAL);        /* not 16KB or 64KB */
2803                 if (!(m->tx_pg_sz & 0x1554000))
2804                         return (EINVAL);
2805                 if (m->tx_num_pg == -1)
2806                         m->tx_num_pg = p->tx_num_pgs;
2807                 if (m->rx_num_pg == -1)
2808                         m->rx_num_pg = p->rx_num_pgs;
2809                 if (m->tx_num_pg % 24 || m->rx_num_pg % 24)
2810                         return (EINVAL);
2811                 if (m->rx_num_pg * m->rx_pg_sz > p->chan_rx_size ||
2812                     m->tx_num_pg * m->tx_pg_sz > p->chan_tx_size)
2813                         return (EINVAL);
2814
2815                 p->rx_pg_size = m->rx_pg_sz;
2816                 p->tx_pg_size = m->tx_pg_sz;
2817                 p->rx_num_pgs = m->rx_num_pg;
2818                 p->tx_num_pgs = m->tx_num_pg;
2819                 break;
2820         }
2821         case CHELSIO_SETMTUTAB: {
2822                 struct ch_mtus *m = (struct ch_mtus *)data;
2823                 int i;
2824                 
2825                 if (!is_offload(sc))
2826                         return (EOPNOTSUPP);
2827                 if (offload_running(sc))
2828                         return (EBUSY);
2829                 if (m->nmtus != NMTUS)
2830                         return (EINVAL);
2831                 if (m->mtus[0] < 81)         /* accommodate SACK */
2832                         return (EINVAL);
2833                 
2834                 /*
2835                  * MTUs must be in ascending order
2836                  */
2837                 for (i = 1; i < NMTUS; ++i)
2838                         if (m->mtus[i] < m->mtus[i - 1])
2839                                 return (EINVAL);
2840
2841                 memcpy(sc->params.mtus, m->mtus, sizeof(sc->params.mtus));
2842                 break;
2843         }
2844         case CHELSIO_GETMTUTAB: {
2845                 struct ch_mtus *m = (struct ch_mtus *)data;
2846
2847                 if (!is_offload(sc))
2848                         return (EOPNOTSUPP);
2849
2850                 memcpy(m->mtus, sc->params.mtus, sizeof(m->mtus));
2851                 m->nmtus = NMTUS;
2852                 break;
2853         }
2854         case CHELSIO_GET_MEM: {
2855                 struct ch_mem_range *t = (struct ch_mem_range *)data;
2856                 struct mc7 *mem;
2857                 uint8_t *useraddr;
2858                 u64 buf[32];
2859
2860                 /*
2861                  * Use these to avoid modifying len/addr in the the return
2862                  * struct
2863                  */
2864                 uint32_t len = t->len, addr = t->addr;
2865
2866                 if (!is_offload(sc))
2867                         return (EOPNOTSUPP);
2868                 if (!(sc->flags & FULL_INIT_DONE))
2869                         return (EIO);         /* need the memory controllers */
2870                 if ((addr & 0x7) || (len & 0x7))
2871                         return (EINVAL);
2872                 if (t->mem_id == MEM_CM)
2873                         mem = &sc->cm;
2874                 else if (t->mem_id == MEM_PMRX)
2875                         mem = &sc->pmrx;
2876                 else if (t->mem_id == MEM_PMTX)
2877                         mem = &sc->pmtx;
2878                 else
2879                         return (EINVAL);
2880
2881                 /*
2882                  * Version scheme:
2883                  * bits 0..9: chip version
2884                  * bits 10..15: chip revision
2885                  */
2886                 t->version = 3 | (sc->params.rev << 10);
2887                 
2888                 /*
2889                  * Read 256 bytes at a time as len can be large and we don't
2890                  * want to use huge intermediate buffers.
2891                  */
2892                 useraddr = (uint8_t *)t->buf; 
2893                 while (len) {
2894                         unsigned int chunk = min(len, sizeof(buf));
2895
2896                         error = t3_mc7_bd_read(mem, addr / 8, chunk / 8, buf);
2897                         if (error)
2898                                 return (-error);
2899                         if (copyout(buf, useraddr, chunk))
2900                                 return (EFAULT);
2901                         useraddr += chunk;
2902                         addr += chunk;
2903                         len -= chunk;
2904                 }
2905                 break;
2906         }
2907         case CHELSIO_READ_TCAM_WORD: {
2908                 struct ch_tcam_word *t = (struct ch_tcam_word *)data;
2909
2910                 if (!is_offload(sc))
2911                         return (EOPNOTSUPP);
2912                 if (!(sc->flags & FULL_INIT_DONE))
2913                         return (EIO);         /* need MC5 */            
2914                 return -t3_read_mc5_range(&sc->mc5, t->addr, 1, t->buf);
2915                 break;
2916         }
2917         case CHELSIO_SET_TRACE_FILTER: {
2918                 struct ch_trace *t = (struct ch_trace *)data;
2919                 const struct trace_params *tp;
2920
2921                 tp = (const struct trace_params *)&t->sip;
2922                 if (t->config_tx)
2923                         t3_config_trace_filter(sc, tp, 0, t->invert_match,
2924                                                t->trace_tx);
2925                 if (t->config_rx)
2926                         t3_config_trace_filter(sc, tp, 1, t->invert_match,
2927                                                t->trace_rx);
2928                 break;
2929         }
2930         case CHELSIO_SET_PKTSCHED: {
2931                 struct ch_pktsched_params *p = (struct ch_pktsched_params *)data;
2932                 if (sc->open_device_map == 0)
2933                         return (EAGAIN);
2934                 send_pktsched_cmd(sc, p->sched, p->idx, p->min, p->max,
2935                     p->binding);
2936                 break;
2937         }
2938         case CHELSIO_IFCONF_GETREGS: {
2939                 struct ch_ifconf_regs *regs = (struct ch_ifconf_regs *)data;
2940                 int reglen = cxgb_get_regs_len();
2941                 uint8_t *buf = malloc(reglen, M_DEVBUF, M_NOWAIT);
2942                 if (buf == NULL) {
2943                         return (ENOMEM);
2944                 }
2945                 if (regs->len > reglen)
2946                         regs->len = reglen;
2947                 else if (regs->len < reglen)
2948                         error = ENOBUFS;
2949
2950                 if (!error) {
2951                         cxgb_get_regs(sc, regs, buf);
2952                         error = copyout(buf, regs->data, reglen);
2953                 }
2954                 free(buf, M_DEVBUF);
2955
2956                 break;
2957         }
2958         case CHELSIO_SET_HW_SCHED: {
2959                 struct ch_hw_sched *t = (struct ch_hw_sched *)data;
2960                 unsigned int ticks_per_usec = core_ticks_per_usec(sc);
2961
2962                 if ((sc->flags & FULL_INIT_DONE) == 0)
2963                         return (EAGAIN);       /* need TP to be initialized */
2964                 if (t->sched >= NTX_SCHED || !in_range(t->mode, 0, 1) ||
2965                     !in_range(t->channel, 0, 1) ||
2966                     !in_range(t->kbps, 0, 10000000) ||
2967                     !in_range(t->class_ipg, 0, 10000 * 65535 / ticks_per_usec) ||
2968                     !in_range(t->flow_ipg, 0,
2969                               dack_ticks_to_usec(sc, 0x7ff)))
2970                         return (EINVAL);
2971
2972                 if (t->kbps >= 0) {
2973                         error = t3_config_sched(sc, t->kbps, t->sched);
2974                         if (error < 0)
2975                                 return (-error);
2976                 }
2977                 if (t->class_ipg >= 0)
2978                         t3_set_sched_ipg(sc, t->sched, t->class_ipg);
2979                 if (t->flow_ipg >= 0) {
2980                         t->flow_ipg *= 1000;     /* us -> ns */
2981                         t3_set_pace_tbl(sc, &t->flow_ipg, t->sched, 1);
2982                 }
2983                 if (t->mode >= 0) {
2984                         int bit = 1 << (S_TX_MOD_TIMER_MODE + t->sched);
2985
2986                         t3_set_reg_field(sc, A_TP_TX_MOD_QUEUE_REQ_MAP,
2987                                          bit, t->mode ? bit : 0);
2988                 }
2989                 if (t->channel >= 0)
2990                         t3_set_reg_field(sc, A_TP_TX_MOD_QUEUE_REQ_MAP,
2991                                          1 << t->sched, t->channel << t->sched);
2992                 break;
2993         }
2994         case CHELSIO_GET_EEPROM: {
2995                 int i;
2996                 struct ch_eeprom *e = (struct ch_eeprom *)data;
2997                 uint8_t *buf = malloc(EEPROMSIZE, M_DEVBUF, M_NOWAIT);
2998
2999                 if (buf == NULL) {
3000                         return (ENOMEM);
3001                 }
3002                 e->magic = EEPROM_MAGIC;
3003                 for (i = e->offset & ~3; !error && i < e->offset + e->len; i += 4)
3004                         error = -t3_seeprom_read(sc, i, (uint32_t *)&buf[i]);
3005
3006                 if (!error)
3007                         error = copyout(buf + e->offset, e->data, e->len);
3008
3009                 free(buf, M_DEVBUF);
3010                 break;
3011         }
3012         case CHELSIO_CLEAR_STATS: {
3013                 if (!(sc->flags & FULL_INIT_DONE))
3014                         return EAGAIN;
3015
3016                 PORT_LOCK(pi);
3017                 t3_mac_update_stats(&pi->mac);
3018                 memset(&pi->mac.stats, 0, sizeof(pi->mac.stats));
3019                 PORT_UNLOCK(pi);
3020                 break;
3021         }
3022         case CHELSIO_GET_UP_LA: {
3023                 struct ch_up_la *la = (struct ch_up_la *)data;
3024                 uint8_t *buf = malloc(LA_BUFSIZE, M_DEVBUF, M_NOWAIT);
3025                 if (buf == NULL) {
3026                         return (ENOMEM);
3027                 }
3028                 if (la->bufsize < LA_BUFSIZE)
3029                         error = ENOBUFS;
3030
3031                 if (!error)
3032                         error = -t3_get_up_la(sc, &la->stopped, &la->idx,
3033                                               &la->bufsize, buf);
3034                 if (!error)
3035                         error = copyout(buf, la->data, la->bufsize);
3036
3037                 free(buf, M_DEVBUF);
3038                 break;
3039         }
3040         case CHELSIO_GET_UP_IOQS: {
3041                 struct ch_up_ioqs *ioqs = (struct ch_up_ioqs *)data;
3042                 uint8_t *buf = malloc(IOQS_BUFSIZE, M_DEVBUF, M_NOWAIT);
3043                 uint32_t *v;
3044
3045                 if (buf == NULL) {
3046                         return (ENOMEM);
3047                 }
3048                 if (ioqs->bufsize < IOQS_BUFSIZE)
3049                         error = ENOBUFS;
3050
3051                 if (!error)
3052                         error = -t3_get_up_ioqs(sc, &ioqs->bufsize, buf);
3053
3054                 if (!error) {
3055                         v = (uint32_t *)buf;
3056
3057                         ioqs->ioq_rx_enable = *v++;
3058                         ioqs->ioq_tx_enable = *v++;
3059                         ioqs->ioq_rx_status = *v++;
3060                         ioqs->ioq_tx_status = *v++;
3061
3062                         error = copyout(v, ioqs->data, ioqs->bufsize);
3063                 }
3064
3065                 free(buf, M_DEVBUF);
3066                 break;
3067         }
3068         case CHELSIO_SET_FILTER: {
3069                 struct ch_filter *f = (struct ch_filter *)data;;
3070                 struct filter_info *p;
3071                 unsigned int nfilters = sc->params.mc5.nfilters;
3072
3073                 if (!is_offload(sc))
3074                         return (EOPNOTSUPP);    /* No TCAM */
3075                 if (!(sc->flags & FULL_INIT_DONE))
3076                         return (EAGAIN);        /* mc5 not setup yet */
3077                 if (nfilters == 0)
3078                         return (EBUSY);         /* TOE will use TCAM */
3079
3080                 /* sanity checks */
3081                 if (f->filter_id >= nfilters ||
3082                     (f->val.dip && f->mask.dip != 0xffffffff) ||
3083                     (f->val.sport && f->mask.sport != 0xffff) ||
3084                     (f->val.dport && f->mask.dport != 0xffff) ||
3085                     (f->val.vlan && f->mask.vlan != 0xfff) ||
3086                     (f->val.vlan_prio &&
3087                         f->mask.vlan_prio != FILTER_NO_VLAN_PRI) ||
3088                     (f->mac_addr_idx != 0xffff && f->mac_addr_idx > 15) ||
3089                     f->qset >= SGE_QSETS ||
3090                     sc->rrss_map[f->qset] >= RSS_TABLE_SIZE)
3091                         return (EINVAL);
3092
3093                 /* Was allocated with M_WAITOK */
3094                 KASSERT(sc->filters, ("filter table NULL\n"));
3095
3096                 p = &sc->filters[f->filter_id];
3097                 if (p->locked)
3098                         return (EPERM);
3099
3100                 bzero(p, sizeof(*p));
3101                 p->sip = f->val.sip;
3102                 p->sip_mask = f->mask.sip;
3103                 p->dip = f->val.dip;
3104                 p->sport = f->val.sport;
3105                 p->dport = f->val.dport;
3106                 p->vlan = f->mask.vlan ? f->val.vlan : 0xfff;
3107                 p->vlan_prio = f->mask.vlan_prio ? (f->val.vlan_prio & 6) :
3108                     FILTER_NO_VLAN_PRI;
3109                 p->mac_hit = f->mac_hit;
3110                 p->mac_vld = f->mac_addr_idx != 0xffff;
3111                 p->mac_idx = f->mac_addr_idx;
3112                 p->pkt_type = f->proto;
3113                 p->report_filter_id = f->want_filter_id;
3114                 p->pass = f->pass;
3115                 p->rss = f->rss;
3116                 p->qset = f->qset;
3117
3118                 error = set_filter(sc, f->filter_id, p);
3119                 if (error == 0)
3120                         p->valid = 1;
3121                 break;
3122         }
3123         case CHELSIO_DEL_FILTER: {
3124                 struct ch_filter *f = (struct ch_filter *)data;
3125                 struct filter_info *p;
3126                 unsigned int nfilters = sc->params.mc5.nfilters;
3127
3128                 if (!is_offload(sc))
3129                         return (EOPNOTSUPP);
3130                 if (!(sc->flags & FULL_INIT_DONE))
3131                         return (EAGAIN);
3132                 if (nfilters == 0 || sc->filters == NULL)
3133                         return (EINVAL);
3134                 if (f->filter_id >= nfilters)
3135                        return (EINVAL);
3136
3137                 p = &sc->filters[f->filter_id];
3138                 if (p->locked)
3139                         return (EPERM);
3140                 if (!p->valid)
3141                         return (EFAULT); /* Read "Bad address" as "Bad index" */
3142
3143                 bzero(p, sizeof(*p));
3144                 p->sip = p->sip_mask = 0xffffffff;
3145                 p->vlan = 0xfff;
3146                 p->vlan_prio = FILTER_NO_VLAN_PRI;
3147                 p->pkt_type = 1;
3148                 error = set_filter(sc, f->filter_id, p);
3149                 break;
3150         }
3151         case CHELSIO_GET_FILTER: {
3152                 struct ch_filter *f = (struct ch_filter *)data;
3153                 struct filter_info *p;
3154                 unsigned int i, nfilters = sc->params.mc5.nfilters;
3155
3156                 if (!is_offload(sc))
3157                         return (EOPNOTSUPP);
3158                 if (!(sc->flags & FULL_INIT_DONE))
3159                         return (EAGAIN);
3160                 if (nfilters == 0 || sc->filters == NULL)
3161                         return (EINVAL);
3162
3163                 i = f->filter_id == 0xffffffff ? 0 : f->filter_id + 1;
3164                 for (; i < nfilters; i++) {
3165                         p = &sc->filters[i];
3166                         if (!p->valid)
3167                                 continue;
3168
3169                         bzero(f, sizeof(*f));
3170
3171                         f->filter_id = i;
3172                         f->val.sip = p->sip;
3173                         f->mask.sip = p->sip_mask;
3174                         f->val.dip = p->dip;
3175                         f->mask.dip = p->dip ? 0xffffffff : 0;
3176                         f->val.sport = p->sport;
3177                         f->mask.sport = p->sport ? 0xffff : 0;
3178                         f->val.dport = p->dport;
3179                         f->mask.dport = p->dport ? 0xffff : 0;
3180                         f->val.vlan = p->vlan == 0xfff ? 0 : p->vlan;
3181                         f->mask.vlan = p->vlan == 0xfff ? 0 : 0xfff;
3182                         f->val.vlan_prio = p->vlan_prio == FILTER_NO_VLAN_PRI ?
3183                             0 : p->vlan_prio;
3184                         f->mask.vlan_prio = p->vlan_prio == FILTER_NO_VLAN_PRI ?
3185                             0 : FILTER_NO_VLAN_PRI;
3186                         f->mac_hit = p->mac_hit;
3187                         f->mac_addr_idx = p->mac_vld ? p->mac_idx : 0xffff;
3188                         f->proto = p->pkt_type;
3189                         f->want_filter_id = p->report_filter_id;
3190                         f->pass = p->pass;
3191                         f->rss = p->rss;
3192                         f->qset = p->qset;
3193
3194                         break;
3195                 }
3196                 
3197                 if (i == nfilters)
3198                         f->filter_id = 0xffffffff;
3199                 break;
3200         }
3201         default:
3202                 return (EOPNOTSUPP);
3203                 break;
3204         }
3205
3206         return (error);
3207 }
3208
3209 static __inline void
3210 reg_block_dump(struct adapter *ap, uint8_t *buf, unsigned int start,
3211     unsigned int end)
3212 {
3213         uint32_t *p = (uint32_t *)(buf + start);
3214
3215         for ( ; start <= end; start += sizeof(uint32_t))
3216                 *p++ = t3_read_reg(ap, start);
3217 }
3218
3219 #define T3_REGMAP_SIZE (3 * 1024)
3220 static int
3221 cxgb_get_regs_len(void)
3222 {
3223         return T3_REGMAP_SIZE;
3224 }
3225
3226 static void
3227 cxgb_get_regs(adapter_t *sc, struct ch_ifconf_regs *regs, uint8_t *buf)
3228 {           
3229         
3230         /*
3231          * Version scheme:
3232          * bits 0..9: chip version
3233          * bits 10..15: chip revision
3234          * bit 31: set for PCIe cards
3235          */
3236         regs->version = 3 | (sc->params.rev << 10) | (is_pcie(sc) << 31);
3237
3238         /*
3239          * We skip the MAC statistics registers because they are clear-on-read.
3240          * Also reading multi-register stats would need to synchronize with the
3241          * periodic mac stats accumulation.  Hard to justify the complexity.
3242          */
3243         memset(buf, 0, cxgb_get_regs_len());
3244         reg_block_dump(sc, buf, 0, A_SG_RSPQ_CREDIT_RETURN);
3245         reg_block_dump(sc, buf, A_SG_HI_DRB_HI_THRSH, A_ULPRX_PBL_ULIMIT);
3246         reg_block_dump(sc, buf, A_ULPTX_CONFIG, A_MPS_INT_CAUSE);
3247         reg_block_dump(sc, buf, A_CPL_SWITCH_CNTRL, A_CPL_MAP_TBL_DATA);
3248         reg_block_dump(sc, buf, A_SMB_GLOBAL_TIME_CFG, A_XGM_SERDES_STAT3);
3249         reg_block_dump(sc, buf, A_XGM_SERDES_STATUS0,
3250                        XGM_REG(A_XGM_SERDES_STAT3, 1));
3251         reg_block_dump(sc, buf, XGM_REG(A_XGM_SERDES_STATUS0, 1),
3252                        XGM_REG(A_XGM_RX_SPI4_SOP_EOP_CNT, 1));
3253 }
3254
3255 static int
3256 alloc_filters(struct adapter *sc)
3257 {
3258         struct filter_info *p;
3259         unsigned int nfilters = sc->params.mc5.nfilters;
3260
3261         if (nfilters == 0)
3262                 return (0);
3263
3264         p = malloc(sizeof(*p) * nfilters, M_DEVBUF, M_WAITOK | M_ZERO);
3265         sc->filters = p;
3266
3267         p = &sc->filters[nfilters - 1];
3268         p->vlan = 0xfff;
3269         p->vlan_prio = FILTER_NO_VLAN_PRI;
3270         p->pass = p->rss = p->valid = p->locked = 1;
3271
3272         return (0);
3273 }
3274
3275 static int
3276 setup_hw_filters(struct adapter *sc)
3277 {
3278         int i, rc;
3279         unsigned int nfilters = sc->params.mc5.nfilters;
3280
3281         if (!sc->filters)
3282                 return (0);
3283
3284         t3_enable_filters(sc);
3285
3286         for (i = rc = 0; i < nfilters && !rc; i++) {
3287                 if (sc->filters[i].locked)
3288                         rc = set_filter(sc, i, &sc->filters[i]);
3289         }
3290
3291         return (rc);
3292 }
3293
3294 static int
3295 set_filter(struct adapter *sc, int id, const struct filter_info *f)
3296 {
3297         int len;
3298         struct mbuf *m;
3299         struct ulp_txpkt *txpkt;
3300         struct work_request_hdr *wr;
3301         struct cpl_pass_open_req *oreq;
3302         struct cpl_set_tcb_field *sreq;
3303
3304         len = sizeof(*wr) + sizeof(*oreq) + 2 * sizeof(*sreq);
3305         KASSERT(len <= MHLEN, ("filter request too big for an mbuf"));
3306
3307         id += t3_mc5_size(&sc->mc5) - sc->params.mc5.nroutes -
3308               sc->params.mc5.nfilters;
3309
3310         m = m_gethdr(M_WAITOK, MT_DATA);
3311         m->m_len = m->m_pkthdr.len = len;
3312         bzero(mtod(m, char *), len);
3313
3314         wr = mtod(m, struct work_request_hdr *);
3315         wr->wrh_hi = htonl(V_WR_OP(FW_WROPCODE_BYPASS) | F_WR_ATOMIC);
3316
3317         oreq = (struct cpl_pass_open_req *)(wr + 1);
3318         txpkt = (struct ulp_txpkt *)oreq;
3319         txpkt->cmd_dest = htonl(V_ULPTX_CMD(ULP_TXPKT));
3320         txpkt->len = htonl(V_ULPTX_NFLITS(sizeof(*oreq) / 8));
3321         OPCODE_TID(oreq) = htonl(MK_OPCODE_TID(CPL_PASS_OPEN_REQ, id));
3322         oreq->local_port = htons(f->dport);
3323         oreq->peer_port = htons(f->sport);
3324         oreq->local_ip = htonl(f->dip);
3325         oreq->peer_ip = htonl(f->sip);
3326         oreq->peer_netmask = htonl(f->sip_mask);
3327         oreq->opt0h = 0;
3328         oreq->opt0l = htonl(F_NO_OFFLOAD);
3329         oreq->opt1 = htonl(V_MAC_MATCH_VALID(f->mac_vld) |
3330                          V_CONN_POLICY(CPL_CONN_POLICY_FILTER) |
3331                          V_VLAN_PRI(f->vlan_prio >> 1) |
3332                          V_VLAN_PRI_VALID(f->vlan_prio != FILTER_NO_VLAN_PRI) |
3333                          V_PKT_TYPE(f->pkt_type) | V_OPT1_VLAN(f->vlan) |
3334                          V_MAC_MATCH(f->mac_idx | (f->mac_hit << 4)));
3335
3336         sreq = (struct cpl_set_tcb_field *)(oreq + 1);
3337         set_tcb_field_ulp(sreq, id, 1, 0x1800808000ULL,
3338                           (f->report_filter_id << 15) | (1 << 23) |
3339                           ((u64)f->pass << 35) | ((u64)!f->rss << 36));
3340         set_tcb_field_ulp(sreq + 1, id, 0, 0xffffffff, (2 << 19) | 1);
3341         t3_mgmt_tx(sc, m);
3342
3343         if (f->pass && !f->rss) {
3344                 len = sizeof(*sreq);
3345                 m = m_gethdr(M_WAITOK, MT_DATA);
3346                 m->m_len = m->m_pkthdr.len = len;
3347                 bzero(mtod(m, char *), len);
3348                 sreq = mtod(m, struct cpl_set_tcb_field *);
3349                 sreq->wr.wrh_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD));
3350                 mk_set_tcb_field(sreq, id, 25, 0x3f80000,
3351                                  (u64)sc->rrss_map[f->qset] << 19);
3352                 t3_mgmt_tx(sc, m);
3353         }
3354         return 0;
3355 }
3356
3357 static inline void
3358 mk_set_tcb_field(struct cpl_set_tcb_field *req, unsigned int tid,
3359     unsigned int word, u64 mask, u64 val)
3360 {
3361         OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_SET_TCB_FIELD, tid));
3362         req->reply = V_NO_REPLY(1);
3363         req->cpu_idx = 0;
3364         req->word = htons(word);
3365         req->mask = htobe64(mask);
3366         req->val = htobe64(val);
3367 }
3368
3369 static inline void
3370 set_tcb_field_ulp(struct cpl_set_tcb_field *req, unsigned int tid,
3371     unsigned int word, u64 mask, u64 val)
3372 {
3373         struct ulp_txpkt *txpkt = (struct ulp_txpkt *)req;
3374
3375         txpkt->cmd_dest = htonl(V_ULPTX_CMD(ULP_TXPKT));
3376         txpkt->len = htonl(V_ULPTX_NFLITS(sizeof(*req) / 8));
3377         mk_set_tcb_field(req, tid, word, mask, val);
3378 }