]> CyberLeo.Net >> Repos - FreeBSD/releng/9.0.git/blob - sys/dev/cxgb/cxgb_main.c
Copy stable/9 to releng/9.0 as part of the FreeBSD 9.0-RELEASE release
[FreeBSD/releng/9.0.git] / sys / dev / cxgb / cxgb_main.c
1 /**************************************************************************
2
3 Copyright (c) 2007-2009, Chelsio Inc.
4 All rights reserved.
5
6 Redistribution and use in source and binary forms, with or without
7 modification, are permitted provided that the following conditions are met:
8
9  1. Redistributions of source code must retain the above copyright notice,
10     this list of conditions and the following disclaimer.
11
12  2. Neither the name of the Chelsio Corporation nor the names of its
13     contributors may be used to endorse or promote products derived from
14     this software without specific prior written permission.
15
16 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
17 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
20 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
21 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
22 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
23 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
24 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
25 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
26 POSSIBILITY OF SUCH DAMAGE.
27
28 ***************************************************************************/
29
30 #include <sys/cdefs.h>
31 __FBSDID("$FreeBSD$");
32
33 #include <sys/param.h>
34 #include <sys/systm.h>
35 #include <sys/kernel.h>
36 #include <sys/bus.h>
37 #include <sys/module.h>
38 #include <sys/pciio.h>
39 #include <sys/conf.h>
40 #include <machine/bus.h>
41 #include <machine/resource.h>
42 #include <sys/bus_dma.h>
43 #include <sys/ktr.h>
44 #include <sys/rman.h>
45 #include <sys/ioccom.h>
46 #include <sys/mbuf.h>
47 #include <sys/linker.h>
48 #include <sys/firmware.h>
49 #include <sys/socket.h>
50 #include <sys/sockio.h>
51 #include <sys/smp.h>
52 #include <sys/sysctl.h>
53 #include <sys/syslog.h>
54 #include <sys/queue.h>
55 #include <sys/taskqueue.h>
56 #include <sys/proc.h>
57
58 #include <net/bpf.h>
59 #include <net/ethernet.h>
60 #include <net/if.h>
61 #include <net/if_arp.h>
62 #include <net/if_dl.h>
63 #include <net/if_media.h>
64 #include <net/if_types.h>
65 #include <net/if_vlan_var.h>
66
67 #include <netinet/in_systm.h>
68 #include <netinet/in.h>
69 #include <netinet/if_ether.h>
70 #include <netinet/ip.h>
71 #include <netinet/ip.h>
72 #include <netinet/tcp.h>
73 #include <netinet/udp.h>
74
75 #include <dev/pci/pcireg.h>
76 #include <dev/pci/pcivar.h>
77 #include <dev/pci/pci_private.h>
78
79 #include <cxgb_include.h>
80
81 #ifdef PRIV_SUPPORTED
82 #include <sys/priv.h>
83 #endif
84
85 static int cxgb_setup_interrupts(adapter_t *);
86 static void cxgb_teardown_interrupts(adapter_t *);
87 static void cxgb_init(void *);
88 static int cxgb_init_locked(struct port_info *);
89 static int cxgb_uninit_locked(struct port_info *);
90 static int cxgb_uninit_synchronized(struct port_info *);
91 static int cxgb_ioctl(struct ifnet *, unsigned long, caddr_t);
92 static int cxgb_media_change(struct ifnet *);
93 static int cxgb_ifm_type(int);
94 static void cxgb_build_medialist(struct port_info *);
95 static void cxgb_media_status(struct ifnet *, struct ifmediareq *);
96 static int setup_sge_qsets(adapter_t *);
97 static void cxgb_async_intr(void *);
98 static void cxgb_tick_handler(void *, int);
99 static void cxgb_tick(void *);
100 static void link_check_callout(void *);
101 static void check_link_status(void *, int);
102 static void setup_rss(adapter_t *sc);
103 static int alloc_filters(struct adapter *);
104 static int setup_hw_filters(struct adapter *);
105 static int set_filter(struct adapter *, int, const struct filter_info *);
106 static inline void mk_set_tcb_field(struct cpl_set_tcb_field *, unsigned int,
107     unsigned int, u64, u64);
108 static inline void set_tcb_field_ulp(struct cpl_set_tcb_field *, unsigned int,
109     unsigned int, u64, u64);
110
111 /* Attachment glue for the PCI controller end of the device.  Each port of
112  * the device is attached separately, as defined later.
113  */
114 static int cxgb_controller_probe(device_t);
115 static int cxgb_controller_attach(device_t);
116 static int cxgb_controller_detach(device_t);
117 static void cxgb_free(struct adapter *);
118 static __inline void reg_block_dump(struct adapter *ap, uint8_t *buf, unsigned int start,
119     unsigned int end);
120 static void cxgb_get_regs(adapter_t *sc, struct ch_ifconf_regs *regs, uint8_t *buf);
121 static int cxgb_get_regs_len(void);
122 static int offload_open(struct port_info *pi);
123 static void touch_bars(device_t dev);
124 static int offload_close(struct t3cdev *tdev);
125 static void cxgb_update_mac_settings(struct port_info *p);
126
127 static device_method_t cxgb_controller_methods[] = {
128         DEVMETHOD(device_probe,         cxgb_controller_probe),
129         DEVMETHOD(device_attach,        cxgb_controller_attach),
130         DEVMETHOD(device_detach,        cxgb_controller_detach),
131
132         /* bus interface */
133         DEVMETHOD(bus_print_child,      bus_generic_print_child),
134         DEVMETHOD(bus_driver_added,     bus_generic_driver_added),
135
136         { 0, 0 }
137 };
138
139 static driver_t cxgb_controller_driver = {
140         "cxgbc",
141         cxgb_controller_methods,
142         sizeof(struct adapter)
143 };
144
145 static devclass_t       cxgb_controller_devclass;
146 DRIVER_MODULE(cxgbc, pci, cxgb_controller_driver, cxgb_controller_devclass, 0, 0);
147
148 /*
149  * Attachment glue for the ports.  Attachment is done directly to the
150  * controller device.
151  */
152 static int cxgb_port_probe(device_t);
153 static int cxgb_port_attach(device_t);
154 static int cxgb_port_detach(device_t);
155
156 static device_method_t cxgb_port_methods[] = {
157         DEVMETHOD(device_probe,         cxgb_port_probe),
158         DEVMETHOD(device_attach,        cxgb_port_attach),
159         DEVMETHOD(device_detach,        cxgb_port_detach),
160         { 0, 0 }
161 };
162
163 static driver_t cxgb_port_driver = {
164         "cxgb",
165         cxgb_port_methods,
166         0
167 };
168
169 static d_ioctl_t cxgb_extension_ioctl;
170 static d_open_t cxgb_extension_open;
171 static d_close_t cxgb_extension_close;
172
173 static struct cdevsw cxgb_cdevsw = {
174        .d_version =    D_VERSION,
175        .d_flags =      0,
176        .d_open =       cxgb_extension_open,
177        .d_close =      cxgb_extension_close,
178        .d_ioctl =      cxgb_extension_ioctl,
179        .d_name =       "cxgb",
180 };
181
182 static devclass_t       cxgb_port_devclass;
183 DRIVER_MODULE(cxgb, cxgbc, cxgb_port_driver, cxgb_port_devclass, 0, 0);
184
185 /*
186  * The driver uses the best interrupt scheme available on a platform in the
187  * order MSI-X, MSI, legacy pin interrupts.  This parameter determines which
188  * of these schemes the driver may consider as follows:
189  *
190  * msi = 2: choose from among all three options
191  * msi = 1 : only consider MSI and pin interrupts
192  * msi = 0: force pin interrupts
193  */
194 static int msi_allowed = 2;
195
196 TUNABLE_INT("hw.cxgb.msi_allowed", &msi_allowed);
197 SYSCTL_NODE(_hw, OID_AUTO, cxgb, CTLFLAG_RD, 0, "CXGB driver parameters");
198 SYSCTL_INT(_hw_cxgb, OID_AUTO, msi_allowed, CTLFLAG_RDTUN, &msi_allowed, 0,
199     "MSI-X, MSI, INTx selector");
200
201 /*
202  * The driver enables offload as a default.
203  * To disable it, use ofld_disable = 1.
204  */
205 static int ofld_disable = 0;
206 TUNABLE_INT("hw.cxgb.ofld_disable", &ofld_disable);
207 SYSCTL_INT(_hw_cxgb, OID_AUTO, ofld_disable, CTLFLAG_RDTUN, &ofld_disable, 0,
208     "disable ULP offload");
209
210 /*
211  * The driver uses an auto-queue algorithm by default.
212  * To disable it and force a single queue-set per port, use multiq = 0
213  */
214 static int multiq = 1;
215 TUNABLE_INT("hw.cxgb.multiq", &multiq);
216 SYSCTL_INT(_hw_cxgb, OID_AUTO, multiq, CTLFLAG_RDTUN, &multiq, 0,
217     "use min(ncpus/ports, 8) queue-sets per port");
218
219 /*
220  * By default the driver will not update the firmware unless
221  * it was compiled against a newer version
222  * 
223  */
224 static int force_fw_update = 0;
225 TUNABLE_INT("hw.cxgb.force_fw_update", &force_fw_update);
226 SYSCTL_INT(_hw_cxgb, OID_AUTO, force_fw_update, CTLFLAG_RDTUN, &force_fw_update, 0,
227     "update firmware even if up to date");
228
229 int cxgb_use_16k_clusters = -1;
230 TUNABLE_INT("hw.cxgb.use_16k_clusters", &cxgb_use_16k_clusters);
231 SYSCTL_INT(_hw_cxgb, OID_AUTO, use_16k_clusters, CTLFLAG_RDTUN,
232     &cxgb_use_16k_clusters, 0, "use 16kB clusters for the jumbo queue ");
233
234 /*
235  * Tune the size of the output queue.
236  */
237 int cxgb_snd_queue_len = IFQ_MAXLEN;
238 TUNABLE_INT("hw.cxgb.snd_queue_len", &cxgb_snd_queue_len);
239 SYSCTL_INT(_hw_cxgb, OID_AUTO, snd_queue_len, CTLFLAG_RDTUN,
240     &cxgb_snd_queue_len, 0, "send queue size ");
241
242 static int nfilters = -1;
243 TUNABLE_INT("hw.cxgb.nfilters", &nfilters);
244 SYSCTL_INT(_hw_cxgb, OID_AUTO, nfilters, CTLFLAG_RDTUN,
245     &nfilters, 0, "max number of entries in the filter table");
246
247 enum {
248         MAX_TXQ_ENTRIES      = 16384,
249         MAX_CTRL_TXQ_ENTRIES = 1024,
250         MAX_RSPQ_ENTRIES     = 16384,
251         MAX_RX_BUFFERS       = 16384,
252         MAX_RX_JUMBO_BUFFERS = 16384,
253         MIN_TXQ_ENTRIES      = 4,
254         MIN_CTRL_TXQ_ENTRIES = 4,
255         MIN_RSPQ_ENTRIES     = 32,
256         MIN_FL_ENTRIES       = 32,
257         MIN_FL_JUMBO_ENTRIES = 32
258 };
259
260 struct filter_info {
261         u32 sip;
262         u32 sip_mask;
263         u32 dip;
264         u16 sport;
265         u16 dport;
266         u32 vlan:12;
267         u32 vlan_prio:3;
268         u32 mac_hit:1;
269         u32 mac_idx:4;
270         u32 mac_vld:1;
271         u32 pkt_type:2;
272         u32 report_filter_id:1;
273         u32 pass:1;
274         u32 rss:1;
275         u32 qset:3;
276         u32 locked:1;
277         u32 valid:1;
278 };
279
280 enum { FILTER_NO_VLAN_PRI = 7 };
281
282 #define EEPROM_MAGIC 0x38E2F10C
283
284 #define PORT_MASK ((1 << MAX_NPORTS) - 1)
285
286 /* Table for probing the cards.  The desc field isn't actually used */
287 struct cxgb_ident {
288         uint16_t        vendor;
289         uint16_t        device;
290         int             index;
291         char            *desc;
292 } cxgb_identifiers[] = {
293         {PCI_VENDOR_ID_CHELSIO, 0x0020, 0, "PE9000"},
294         {PCI_VENDOR_ID_CHELSIO, 0x0021, 1, "T302E"},
295         {PCI_VENDOR_ID_CHELSIO, 0x0022, 2, "T310E"},
296         {PCI_VENDOR_ID_CHELSIO, 0x0023, 3, "T320X"},
297         {PCI_VENDOR_ID_CHELSIO, 0x0024, 1, "T302X"},
298         {PCI_VENDOR_ID_CHELSIO, 0x0025, 3, "T320E"},
299         {PCI_VENDOR_ID_CHELSIO, 0x0026, 2, "T310X"},
300         {PCI_VENDOR_ID_CHELSIO, 0x0030, 2, "T3B10"},
301         {PCI_VENDOR_ID_CHELSIO, 0x0031, 3, "T3B20"},
302         {PCI_VENDOR_ID_CHELSIO, 0x0032, 1, "T3B02"},
303         {PCI_VENDOR_ID_CHELSIO, 0x0033, 4, "T3B04"},
304         {PCI_VENDOR_ID_CHELSIO, 0x0035, 6, "T3C10"},
305         {PCI_VENDOR_ID_CHELSIO, 0x0036, 3, "S320E-CR"},
306         {PCI_VENDOR_ID_CHELSIO, 0x0037, 7, "N320E-G2"},
307         {0, 0, 0, NULL}
308 };
309
310 static int set_eeprom(struct port_info *pi, const uint8_t *data, int len, int offset);
311
312
313 static __inline char
314 t3rev2char(struct adapter *adapter)
315 {
316         char rev = 'z';
317
318         switch(adapter->params.rev) {
319         case T3_REV_A:
320                 rev = 'a';
321                 break;
322         case T3_REV_B:
323         case T3_REV_B2:
324                 rev = 'b';
325                 break;
326         case T3_REV_C:
327                 rev = 'c';
328                 break;
329         }
330         return rev;
331 }
332
333 static struct cxgb_ident *
334 cxgb_get_ident(device_t dev)
335 {
336         struct cxgb_ident *id;
337
338         for (id = cxgb_identifiers; id->desc != NULL; id++) {
339                 if ((id->vendor == pci_get_vendor(dev)) &&
340                     (id->device == pci_get_device(dev))) {
341                         return (id);
342                 }
343         }
344         return (NULL);
345 }
346
347 static const struct adapter_info *
348 cxgb_get_adapter_info(device_t dev)
349 {
350         struct cxgb_ident *id;
351         const struct adapter_info *ai;
352
353         id = cxgb_get_ident(dev);
354         if (id == NULL)
355                 return (NULL);
356
357         ai = t3_get_adapter_info(id->index);
358
359         return (ai);
360 }
361
362 static int
363 cxgb_controller_probe(device_t dev)
364 {
365         const struct adapter_info *ai;
366         char *ports, buf[80];
367         int nports;
368
369         ai = cxgb_get_adapter_info(dev);
370         if (ai == NULL)
371                 return (ENXIO);
372
373         nports = ai->nports0 + ai->nports1;
374         if (nports == 1)
375                 ports = "port";
376         else
377                 ports = "ports";
378
379         snprintf(buf, sizeof(buf), "%s, %d %s", ai->desc, nports, ports);
380         device_set_desc_copy(dev, buf);
381         return (BUS_PROBE_DEFAULT);
382 }
383
384 #define FW_FNAME "cxgb_t3fw"
385 #define TPEEPROM_NAME "cxgb_t3%c_tp_eeprom"
386 #define TPSRAM_NAME "cxgb_t3%c_protocol_sram"
387
388 static int
389 upgrade_fw(adapter_t *sc)
390 {
391         const struct firmware *fw;
392         int status;
393         u32 vers;
394         
395         if ((fw = firmware_get(FW_FNAME)) == NULL)  {
396                 device_printf(sc->dev, "Could not find firmware image %s\n", FW_FNAME);
397                 return (ENOENT);
398         } else
399                 device_printf(sc->dev, "installing firmware on card\n");
400         status = t3_load_fw(sc, (const uint8_t *)fw->data, fw->datasize);
401
402         if (status != 0) {
403                 device_printf(sc->dev, "failed to install firmware: %d\n",
404                     status);
405         } else {
406                 t3_get_fw_version(sc, &vers);
407                 snprintf(&sc->fw_version[0], sizeof(sc->fw_version), "%d.%d.%d",
408                     G_FW_VERSION_MAJOR(vers), G_FW_VERSION_MINOR(vers),
409                     G_FW_VERSION_MICRO(vers));
410         }
411
412         firmware_put(fw, FIRMWARE_UNLOAD);
413
414         return (status);        
415 }
416
417 /*
418  * The cxgb_controller_attach function is responsible for the initial
419  * bringup of the device.  Its responsibilities include:
420  *
421  *  1. Determine if the device supports MSI or MSI-X.
422  *  2. Allocate bus resources so that we can access the Base Address Register
423  *  3. Create and initialize mutexes for the controller and its control
424  *     logic such as SGE and MDIO.
425  *  4. Call hardware specific setup routine for the adapter as a whole.
426  *  5. Allocate the BAR for doing MSI-X.
427  *  6. Setup the line interrupt iff MSI-X is not supported.
428  *  7. Create the driver's taskq.
429  *  8. Start one task queue service thread.
430  *  9. Check if the firmware and SRAM are up-to-date.  They will be
431  *     auto-updated later (before FULL_INIT_DONE), if required.
432  * 10. Create a child device for each MAC (port)
433  * 11. Initialize T3 private state.
434  * 12. Trigger the LED
435  * 13. Setup offload iff supported.
436  * 14. Reset/restart the tick callout.
437  * 15. Attach sysctls
438  *
439  * NOTE: Any modification or deviation from this list MUST be reflected in
440  * the above comment.  Failure to do so will result in problems on various
441  * error conditions including link flapping.
442  */
443 static int
444 cxgb_controller_attach(device_t dev)
445 {
446         device_t child;
447         const struct adapter_info *ai;
448         struct adapter *sc;
449         int i, error = 0;
450         uint32_t vers;
451         int port_qsets = 1;
452         int msi_needed, reg;
453         char buf[80];
454
455         sc = device_get_softc(dev);
456         sc->dev = dev;
457         sc->msi_count = 0;
458         ai = cxgb_get_adapter_info(dev);
459
460         /* find the PCIe link width and set max read request to 4KB*/
461         if (pci_find_cap(dev, PCIY_EXPRESS, &reg) == 0) {
462                 uint16_t lnk;
463
464                 lnk = pci_read_config(dev, reg + PCIR_EXPRESS_LINK_STA, 2);
465                 sc->link_width = (lnk & PCIM_LINK_STA_WIDTH) >> 4;
466                 if (sc->link_width < 8 &&
467                     (ai->caps & SUPPORTED_10000baseT_Full)) {
468                         device_printf(sc->dev,
469                             "PCIe x%d Link, expect reduced performance\n",
470                             sc->link_width);
471                 }
472
473                 pci_set_max_read_req(dev, 4096);
474         }
475
476         touch_bars(dev);
477         pci_enable_busmaster(dev);
478         /*
479          * Allocate the registers and make them available to the driver.
480          * The registers that we care about for NIC mode are in BAR 0
481          */
482         sc->regs_rid = PCIR_BAR(0);
483         if ((sc->regs_res = bus_alloc_resource_any(dev, SYS_RES_MEMORY,
484             &sc->regs_rid, RF_ACTIVE)) == NULL) {
485                 device_printf(dev, "Cannot allocate BAR region 0\n");
486                 return (ENXIO);
487         }
488         sc->udbs_rid = PCIR_BAR(2);
489         sc->udbs_res = NULL;
490         if (is_offload(sc) &&
491             ((sc->udbs_res = bus_alloc_resource_any(dev, SYS_RES_MEMORY,
492                    &sc->udbs_rid, RF_ACTIVE)) == NULL)) {
493                 device_printf(dev, "Cannot allocate BAR region 1\n");
494                 error = ENXIO;
495                 goto out;
496         }
497
498         snprintf(sc->lockbuf, ADAPTER_LOCK_NAME_LEN, "cxgb controller lock %d",
499             device_get_unit(dev));
500         ADAPTER_LOCK_INIT(sc, sc->lockbuf);
501
502         snprintf(sc->reglockbuf, ADAPTER_LOCK_NAME_LEN, "SGE reg lock %d",
503             device_get_unit(dev));
504         snprintf(sc->mdiolockbuf, ADAPTER_LOCK_NAME_LEN, "cxgb mdio lock %d",
505             device_get_unit(dev));
506         snprintf(sc->elmerlockbuf, ADAPTER_LOCK_NAME_LEN, "cxgb elmer lock %d",
507             device_get_unit(dev));
508         
509         MTX_INIT(&sc->sge.reg_lock, sc->reglockbuf, NULL, MTX_SPIN);
510         MTX_INIT(&sc->mdio_lock, sc->mdiolockbuf, NULL, MTX_DEF);
511         MTX_INIT(&sc->elmer_lock, sc->elmerlockbuf, NULL, MTX_DEF);
512         
513         sc->bt = rman_get_bustag(sc->regs_res);
514         sc->bh = rman_get_bushandle(sc->regs_res);
515         sc->mmio_len = rman_get_size(sc->regs_res);
516
517         for (i = 0; i < MAX_NPORTS; i++)
518                 sc->port[i].adapter = sc;
519
520         if (t3_prep_adapter(sc, ai, 1) < 0) {
521                 printf("prep adapter failed\n");
522                 error = ENODEV;
523                 goto out;
524         }
525         /* Allocate the BAR for doing MSI-X.  If it succeeds, try to allocate
526          * enough messages for the queue sets.  If that fails, try falling
527          * back to MSI.  If that fails, then try falling back to the legacy
528          * interrupt pin model.
529          */
530         sc->msix_regs_rid = 0x20;
531         if ((msi_allowed >= 2) &&
532             (sc->msix_regs_res = bus_alloc_resource_any(dev, SYS_RES_MEMORY,
533             &sc->msix_regs_rid, RF_ACTIVE)) != NULL) {
534
535                 if (multiq)
536                         port_qsets = min(SGE_QSETS/sc->params.nports, mp_ncpus);
537                 msi_needed = sc->msi_count = sc->params.nports * port_qsets + 1;
538
539                 if (pci_msix_count(dev) == 0 ||
540                     (error = pci_alloc_msix(dev, &sc->msi_count)) != 0 ||
541                     sc->msi_count != msi_needed) {
542                         device_printf(dev, "alloc msix failed - "
543                                       "msi_count=%d, msi_needed=%d, err=%d; "
544                                       "will try MSI\n", sc->msi_count,
545                                       msi_needed, error);
546                         sc->msi_count = 0;
547                         port_qsets = 1;
548                         pci_release_msi(dev);
549                         bus_release_resource(dev, SYS_RES_MEMORY,
550                             sc->msix_regs_rid, sc->msix_regs_res);
551                         sc->msix_regs_res = NULL;
552                 } else {
553                         sc->flags |= USING_MSIX;
554                         sc->cxgb_intr = cxgb_async_intr;
555                         device_printf(dev,
556                                       "using MSI-X interrupts (%u vectors)\n",
557                                       sc->msi_count);
558                 }
559         }
560
561         if ((msi_allowed >= 1) && (sc->msi_count == 0)) {
562                 sc->msi_count = 1;
563                 if ((error = pci_alloc_msi(dev, &sc->msi_count)) != 0) {
564                         device_printf(dev, "alloc msi failed - "
565                                       "err=%d; will try INTx\n", error);
566                         sc->msi_count = 0;
567                         port_qsets = 1;
568                         pci_release_msi(dev);
569                 } else {
570                         sc->flags |= USING_MSI;
571                         sc->cxgb_intr = t3_intr_msi;
572                         device_printf(dev, "using MSI interrupts\n");
573                 }
574         }
575         if (sc->msi_count == 0) {
576                 device_printf(dev, "using line interrupts\n");
577                 sc->cxgb_intr = t3b_intr;
578         }
579
580         /* Create a private taskqueue thread for handling driver events */
581         sc->tq = taskqueue_create("cxgb_taskq", M_NOWAIT,
582             taskqueue_thread_enqueue, &sc->tq);
583         if (sc->tq == NULL) {
584                 device_printf(dev, "failed to allocate controller task queue\n");
585                 goto out;
586         }
587
588         taskqueue_start_threads(&sc->tq, 1, PI_NET, "%s taskq",
589             device_get_nameunit(dev));
590         TASK_INIT(&sc->tick_task, 0, cxgb_tick_handler, sc);
591
592         
593         /* Create a periodic callout for checking adapter status */
594         callout_init(&sc->cxgb_tick_ch, TRUE);
595         
596         if (t3_check_fw_version(sc) < 0 || force_fw_update) {
597                 /*
598                  * Warn user that a firmware update will be attempted in init.
599                  */
600                 device_printf(dev, "firmware needs to be updated to version %d.%d.%d\n",
601                     FW_VERSION_MAJOR, FW_VERSION_MINOR, FW_VERSION_MICRO);
602                 sc->flags &= ~FW_UPTODATE;
603         } else {
604                 sc->flags |= FW_UPTODATE;
605         }
606
607         if (t3_check_tpsram_version(sc) < 0) {
608                 /*
609                  * Warn user that a firmware update will be attempted in init.
610                  */
611                 device_printf(dev, "SRAM needs to be updated to version %c-%d.%d.%d\n",
612                     t3rev2char(sc), TP_VERSION_MAJOR, TP_VERSION_MINOR, TP_VERSION_MICRO);
613                 sc->flags &= ~TPS_UPTODATE;
614         } else {
615                 sc->flags |= TPS_UPTODATE;
616         }
617         
618         /*
619          * Create a child device for each MAC.  The ethernet attachment
620          * will be done in these children.
621          */     
622         for (i = 0; i < (sc)->params.nports; i++) {
623                 struct port_info *pi;
624                 
625                 if ((child = device_add_child(dev, "cxgb", -1)) == NULL) {
626                         device_printf(dev, "failed to add child port\n");
627                         error = EINVAL;
628                         goto out;
629                 }
630                 pi = &sc->port[i];
631                 pi->adapter = sc;
632                 pi->nqsets = port_qsets;
633                 pi->first_qset = i*port_qsets;
634                 pi->port_id = i;
635                 pi->tx_chan = i >= ai->nports0;
636                 pi->txpkt_intf = pi->tx_chan ? 2 * (i - ai->nports0) + 1 : 2 * i;
637                 sc->rxpkt_map[pi->txpkt_intf] = i;
638                 sc->port[i].tx_chan = i >= ai->nports0;
639                 sc->portdev[i] = child;
640                 device_set_softc(child, pi);
641         }
642         if ((error = bus_generic_attach(dev)) != 0)
643                 goto out;
644
645         /* initialize sge private state */
646         t3_sge_init_adapter(sc);
647
648         t3_led_ready(sc);
649         
650         cxgb_offload_init();
651         if (is_offload(sc)) {
652                 setbit(&sc->registered_device_map, OFFLOAD_DEVMAP_BIT);
653                 cxgb_adapter_ofld(sc);
654         }
655         error = t3_get_fw_version(sc, &vers);
656         if (error)
657                 goto out;
658
659         snprintf(&sc->fw_version[0], sizeof(sc->fw_version), "%d.%d.%d",
660             G_FW_VERSION_MAJOR(vers), G_FW_VERSION_MINOR(vers),
661             G_FW_VERSION_MICRO(vers));
662
663         snprintf(buf, sizeof(buf), "%s %sNIC\t E/C: %s S/N: %s",
664                  ai->desc, is_offload(sc) ? "R" : "",
665                  sc->params.vpd.ec, sc->params.vpd.sn);
666         device_set_desc_copy(dev, buf);
667
668         snprintf(&sc->port_types[0], sizeof(sc->port_types), "%x%x%x%x",
669                  sc->params.vpd.port_type[0], sc->params.vpd.port_type[1],
670                  sc->params.vpd.port_type[2], sc->params.vpd.port_type[3]);
671
672         device_printf(sc->dev, "Firmware Version %s\n", &sc->fw_version[0]);
673         callout_reset(&sc->cxgb_tick_ch, hz, cxgb_tick, sc);
674         t3_add_attach_sysctls(sc);
675 out:
676         if (error)
677                 cxgb_free(sc);
678
679         return (error);
680 }
681
682 /*
683  * The cxgb_controller_detach routine is called with the device is
684  * unloaded from the system.
685  */
686
687 static int
688 cxgb_controller_detach(device_t dev)
689 {
690         struct adapter *sc;
691
692         sc = device_get_softc(dev);
693
694         cxgb_free(sc);
695
696         return (0);
697 }
698
699 /*
700  * The cxgb_free() is called by the cxgb_controller_detach() routine
701  * to tear down the structures that were built up in
702  * cxgb_controller_attach(), and should be the final piece of work
703  * done when fully unloading the driver.
704  * 
705  *
706  *  1. Shutting down the threads started by the cxgb_controller_attach()
707  *     routine.
708  *  2. Stopping the lower level device and all callouts (cxgb_down_locked()).
709  *  3. Detaching all of the port devices created during the
710  *     cxgb_controller_attach() routine.
711  *  4. Removing the device children created via cxgb_controller_attach().
712  *  5. Releasing PCI resources associated with the device.
713  *  6. Turning off the offload support, iff it was turned on.
714  *  7. Destroying the mutexes created in cxgb_controller_attach().
715  *
716  */
717 static void
718 cxgb_free(struct adapter *sc)
719 {
720         int i, nqsets = 0;
721
722         ADAPTER_LOCK(sc);
723         sc->flags |= CXGB_SHUTDOWN;
724         ADAPTER_UNLOCK(sc);
725
726         /*
727          * Make sure all child devices are gone.
728          */
729         bus_generic_detach(sc->dev);
730         for (i = 0; i < (sc)->params.nports; i++) {
731                 if (sc->portdev[i] &&
732                     device_delete_child(sc->dev, sc->portdev[i]) != 0)
733                         device_printf(sc->dev, "failed to delete child port\n");
734                 nqsets += sc->port[i].nqsets;
735         }
736
737         /*
738          * At this point, it is as if cxgb_port_detach has run on all ports, and
739          * cxgb_down has run on the adapter.  All interrupts have been silenced,
740          * all open devices have been closed.
741          */
742         KASSERT(sc->open_device_map == 0, ("%s: device(s) still open (%x)",
743                                            __func__, sc->open_device_map));
744         for (i = 0; i < sc->params.nports; i++) {
745                 KASSERT(sc->port[i].ifp == NULL, ("%s: port %i undead!",
746                                                   __func__, i));
747         }
748
749         /*
750          * Finish off the adapter's callouts.
751          */
752         callout_drain(&sc->cxgb_tick_ch);
753         callout_drain(&sc->sge_timer_ch);
754
755         /*
756          * Release resources grabbed under FULL_INIT_DONE by cxgb_up.  The
757          * sysctls are cleaned up by the kernel linker.
758          */
759         if (sc->flags & FULL_INIT_DONE) {
760                 t3_free_sge_resources(sc, nqsets);
761                 sc->flags &= ~FULL_INIT_DONE;
762         }
763
764         /*
765          * Release all interrupt resources.
766          */
767         cxgb_teardown_interrupts(sc);
768         if (sc->flags & (USING_MSI | USING_MSIX)) {
769                 device_printf(sc->dev, "releasing msi message(s)\n");
770                 pci_release_msi(sc->dev);
771         } else {
772                 device_printf(sc->dev, "no msi message to release\n");
773         }
774
775         if (sc->msix_regs_res != NULL) {
776                 bus_release_resource(sc->dev, SYS_RES_MEMORY, sc->msix_regs_rid,
777                     sc->msix_regs_res);
778         }
779
780         /*
781          * Free the adapter's taskqueue.
782          */
783         if (sc->tq != NULL) {
784                 taskqueue_free(sc->tq);
785                 sc->tq = NULL;
786         }
787         
788         if (is_offload(sc)) {
789                 clrbit(&sc->registered_device_map, OFFLOAD_DEVMAP_BIT);
790                 cxgb_adapter_unofld(sc);
791         }
792
793 #ifdef notyet
794         if (sc->flags & CXGB_OFLD_INIT)
795                 cxgb_offload_deactivate(sc);
796 #endif
797         free(sc->filters, M_DEVBUF);
798         t3_sge_free(sc);
799
800         cxgb_offload_exit();
801
802         if (sc->udbs_res != NULL)
803                 bus_release_resource(sc->dev, SYS_RES_MEMORY, sc->udbs_rid,
804                     sc->udbs_res);
805
806         if (sc->regs_res != NULL)
807                 bus_release_resource(sc->dev, SYS_RES_MEMORY, sc->regs_rid,
808                     sc->regs_res);
809
810         MTX_DESTROY(&sc->mdio_lock);
811         MTX_DESTROY(&sc->sge.reg_lock);
812         MTX_DESTROY(&sc->elmer_lock);
813         ADAPTER_LOCK_DEINIT(sc);
814 }
815
816 /**
817  *      setup_sge_qsets - configure SGE Tx/Rx/response queues
818  *      @sc: the controller softc
819  *
820  *      Determines how many sets of SGE queues to use and initializes them.
821  *      We support multiple queue sets per port if we have MSI-X, otherwise
822  *      just one queue set per port.
823  */
824 static int
825 setup_sge_qsets(adapter_t *sc)
826 {
827         int i, j, err, irq_idx = 0, qset_idx = 0;
828         u_int ntxq = SGE_TXQ_PER_SET;
829
830         if ((err = t3_sge_alloc(sc)) != 0) {
831                 device_printf(sc->dev, "t3_sge_alloc returned %d\n", err);
832                 return (err);
833         }
834
835         if (sc->params.rev > 0 && !(sc->flags & USING_MSI))
836                 irq_idx = -1;
837
838         for (i = 0; i < (sc)->params.nports; i++) {
839                 struct port_info *pi = &sc->port[i];
840
841                 for (j = 0; j < pi->nqsets; j++, qset_idx++) {
842                         err = t3_sge_alloc_qset(sc, qset_idx, (sc)->params.nports,
843                             (sc->flags & USING_MSIX) ? qset_idx + 1 : irq_idx,
844                             &sc->params.sge.qset[qset_idx], ntxq, pi);
845                         if (err) {
846                                 t3_free_sge_resources(sc, qset_idx);
847                                 device_printf(sc->dev,
848                                     "t3_sge_alloc_qset failed with %d\n", err);
849                                 return (err);
850                         }
851                 }
852         }
853
854         return (0);
855 }
856
857 static void
858 cxgb_teardown_interrupts(adapter_t *sc)
859 {
860         int i;
861
862         for (i = 0; i < SGE_QSETS; i++) {
863                 if (sc->msix_intr_tag[i] == NULL) {
864
865                         /* Should have been setup fully or not at all */
866                         KASSERT(sc->msix_irq_res[i] == NULL &&
867                                 sc->msix_irq_rid[i] == 0,
868                                 ("%s: half-done interrupt (%d).", __func__, i));
869
870                         continue;
871                 }
872
873                 bus_teardown_intr(sc->dev, sc->msix_irq_res[i],
874                                   sc->msix_intr_tag[i]);
875                 bus_release_resource(sc->dev, SYS_RES_IRQ, sc->msix_irq_rid[i],
876                                      sc->msix_irq_res[i]);
877
878                 sc->msix_irq_res[i] = sc->msix_intr_tag[i] = NULL;
879                 sc->msix_irq_rid[i] = 0;
880         }
881
882         if (sc->intr_tag) {
883                 KASSERT(sc->irq_res != NULL,
884                         ("%s: half-done interrupt.", __func__));
885
886                 bus_teardown_intr(sc->dev, sc->irq_res, sc->intr_tag);
887                 bus_release_resource(sc->dev, SYS_RES_IRQ, sc->irq_rid,
888                                      sc->irq_res);
889
890                 sc->irq_res = sc->intr_tag = NULL;
891                 sc->irq_rid = 0;
892         }
893 }
894
895 static int
896 cxgb_setup_interrupts(adapter_t *sc)
897 {
898         struct resource *res;
899         void *tag;
900         int i, rid, err, intr_flag = sc->flags & (USING_MSI | USING_MSIX);
901
902         sc->irq_rid = intr_flag ? 1 : 0;
903         sc->irq_res = bus_alloc_resource_any(sc->dev, SYS_RES_IRQ, &sc->irq_rid,
904                                              RF_SHAREABLE | RF_ACTIVE);
905         if (sc->irq_res == NULL) {
906                 device_printf(sc->dev, "Cannot allocate interrupt (%x, %u)\n",
907                               intr_flag, sc->irq_rid);
908                 err = EINVAL;
909                 sc->irq_rid = 0;
910         } else {
911                 err = bus_setup_intr(sc->dev, sc->irq_res,
912                     INTR_MPSAFE | INTR_TYPE_NET, NULL,
913                     sc->cxgb_intr, sc, &sc->intr_tag);
914
915                 if (err) {
916                         device_printf(sc->dev,
917                                       "Cannot set up interrupt (%x, %u, %d)\n",
918                                       intr_flag, sc->irq_rid, err);
919                         bus_release_resource(sc->dev, SYS_RES_IRQ, sc->irq_rid,
920                                              sc->irq_res);
921                         sc->irq_res = sc->intr_tag = NULL;
922                         sc->irq_rid = 0;
923                 }
924         }
925
926         /* That's all for INTx or MSI */
927         if (!(intr_flag & USING_MSIX) || err)
928                 return (err);
929
930         for (i = 0; i < sc->msi_count - 1; i++) {
931                 rid = i + 2;
932                 res = bus_alloc_resource_any(sc->dev, SYS_RES_IRQ, &rid,
933                                              RF_SHAREABLE | RF_ACTIVE);
934                 if (res == NULL) {
935                         device_printf(sc->dev, "Cannot allocate interrupt "
936                                       "for message %d\n", rid);
937                         err = EINVAL;
938                         break;
939                 }
940
941                 err = bus_setup_intr(sc->dev, res, INTR_MPSAFE | INTR_TYPE_NET,
942                                      NULL, t3_intr_msix, &sc->sge.qs[i], &tag);
943                 if (err) {
944                         device_printf(sc->dev, "Cannot set up interrupt "
945                                       "for message %d (%d)\n", rid, err);
946                         bus_release_resource(sc->dev, SYS_RES_IRQ, rid, res);
947                         break;
948                 }
949
950                 sc->msix_irq_rid[i] = rid;
951                 sc->msix_irq_res[i] = res;
952                 sc->msix_intr_tag[i] = tag;
953         }
954
955         if (err)
956                 cxgb_teardown_interrupts(sc);
957
958         return (err);
959 }
960
961
962 static int
963 cxgb_port_probe(device_t dev)
964 {
965         struct port_info *p;
966         char buf[80];
967         const char *desc;
968         
969         p = device_get_softc(dev);
970         desc = p->phy.desc;
971         snprintf(buf, sizeof(buf), "Port %d %s", p->port_id, desc);
972         device_set_desc_copy(dev, buf);
973         return (0);
974 }
975
976
977 static int
978 cxgb_makedev(struct port_info *pi)
979 {
980         
981         pi->port_cdev = make_dev(&cxgb_cdevsw, pi->ifp->if_dunit,
982             UID_ROOT, GID_WHEEL, 0600, "%s", if_name(pi->ifp));
983         
984         if (pi->port_cdev == NULL)
985                 return (ENOMEM);
986
987         pi->port_cdev->si_drv1 = (void *)pi;
988         
989         return (0);
990 }
991
992 #define CXGB_CAP (IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_MTU | IFCAP_HWCSUM | \
993     IFCAP_VLAN_HWCSUM | IFCAP_TSO | IFCAP_JUMBO_MTU | IFCAP_LRO | \
994     IFCAP_VLAN_HWTSO | IFCAP_LINKSTATE)
995 #define CXGB_CAP_ENABLE (CXGB_CAP & ~IFCAP_TSO6)
996
997 static int
998 cxgb_port_attach(device_t dev)
999 {
1000         struct port_info *p;
1001         struct ifnet *ifp;
1002         int err;
1003         struct adapter *sc;
1004
1005         p = device_get_softc(dev);
1006         sc = p->adapter;
1007         snprintf(p->lockbuf, PORT_NAME_LEN, "cxgb port lock %d:%d",
1008             device_get_unit(device_get_parent(dev)), p->port_id);
1009         PORT_LOCK_INIT(p, p->lockbuf);
1010
1011         callout_init(&p->link_check_ch, CALLOUT_MPSAFE);
1012         TASK_INIT(&p->link_check_task, 0, check_link_status, p);
1013
1014         /* Allocate an ifnet object and set it up */
1015         ifp = p->ifp = if_alloc(IFT_ETHER);
1016         if (ifp == NULL) {
1017                 device_printf(dev, "Cannot allocate ifnet\n");
1018                 return (ENOMEM);
1019         }
1020         
1021         if_initname(ifp, device_get_name(dev), device_get_unit(dev));
1022         ifp->if_init = cxgb_init;
1023         ifp->if_softc = p;
1024         ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
1025         ifp->if_ioctl = cxgb_ioctl;
1026         ifp->if_start = cxgb_start;
1027
1028         ifp->if_snd.ifq_drv_maxlen = max(cxgb_snd_queue_len, ifqmaxlen);
1029         IFQ_SET_MAXLEN(&ifp->if_snd, ifp->if_snd.ifq_drv_maxlen);
1030         IFQ_SET_READY(&ifp->if_snd);
1031
1032         ifp->if_capabilities = CXGB_CAP;
1033         ifp->if_capenable = CXGB_CAP_ENABLE;
1034         ifp->if_hwassist = CSUM_TCP | CSUM_UDP | CSUM_IP | CSUM_TSO;
1035
1036         /*
1037          * Disable TSO on 4-port - it isn't supported by the firmware.
1038          */     
1039         if (sc->params.nports > 2) {
1040                 ifp->if_capabilities &= ~(IFCAP_TSO | IFCAP_VLAN_HWTSO);
1041                 ifp->if_capenable &= ~(IFCAP_TSO | IFCAP_VLAN_HWTSO);
1042                 ifp->if_hwassist &= ~CSUM_TSO;
1043         }
1044
1045         ether_ifattach(ifp, p->hw_addr);
1046         ifp->if_transmit = cxgb_transmit;
1047         ifp->if_qflush = cxgb_qflush;
1048
1049 #ifdef DEFAULT_JUMBO
1050         if (sc->params.nports <= 2)
1051                 ifp->if_mtu = ETHERMTU_JUMBO;
1052 #endif
1053         if ((err = cxgb_makedev(p)) != 0) {
1054                 printf("makedev failed %d\n", err);
1055                 return (err);
1056         }
1057
1058         /* Create a list of media supported by this port */
1059         ifmedia_init(&p->media, IFM_IMASK, cxgb_media_change,
1060             cxgb_media_status);
1061         cxgb_build_medialist(p);
1062       
1063         t3_sge_init_port(p);
1064
1065         return (err);
1066 }
1067
1068 /*
1069  * cxgb_port_detach() is called via the device_detach methods when
1070  * cxgb_free() calls the bus_generic_detach.  It is responsible for 
1071  * removing the device from the view of the kernel, i.e. from all 
1072  * interfaces lists etc.  This routine is only called when the driver is 
1073  * being unloaded, not when the link goes down.
1074  */
1075 static int
1076 cxgb_port_detach(device_t dev)
1077 {
1078         struct port_info *p;
1079         struct adapter *sc;
1080         int i;
1081
1082         p = device_get_softc(dev);
1083         sc = p->adapter;
1084
1085         /* Tell cxgb_ioctl and if_init that the port is going away */
1086         ADAPTER_LOCK(sc);
1087         SET_DOOMED(p);
1088         wakeup(&sc->flags);
1089         while (IS_BUSY(sc))
1090                 mtx_sleep(&sc->flags, &sc->lock, 0, "cxgbdtch", 0);
1091         SET_BUSY(sc);
1092         ADAPTER_UNLOCK(sc);
1093
1094         if (p->port_cdev != NULL)
1095                 destroy_dev(p->port_cdev);
1096
1097         cxgb_uninit_synchronized(p);
1098         ether_ifdetach(p->ifp);
1099
1100         for (i = p->first_qset; i < p->first_qset + p->nqsets; i++) {
1101                 struct sge_qset *qs = &sc->sge.qs[i];
1102                 struct sge_txq *txq = &qs->txq[TXQ_ETH];
1103
1104                 callout_drain(&txq->txq_watchdog);
1105                 callout_drain(&txq->txq_timer);
1106         }
1107
1108         PORT_LOCK_DEINIT(p);
1109         if_free(p->ifp);
1110         p->ifp = NULL;
1111
1112         ADAPTER_LOCK(sc);
1113         CLR_BUSY(sc);
1114         wakeup_one(&sc->flags);
1115         ADAPTER_UNLOCK(sc);
1116         return (0);
1117 }
1118
1119 void
1120 t3_fatal_err(struct adapter *sc)
1121 {
1122         u_int fw_status[4];
1123
1124         if (sc->flags & FULL_INIT_DONE) {
1125                 t3_sge_stop(sc);
1126                 t3_write_reg(sc, A_XGM_TX_CTRL, 0);
1127                 t3_write_reg(sc, A_XGM_RX_CTRL, 0);
1128                 t3_write_reg(sc, XGM_REG(A_XGM_TX_CTRL, 1), 0);
1129                 t3_write_reg(sc, XGM_REG(A_XGM_RX_CTRL, 1), 0);
1130                 t3_intr_disable(sc);
1131         }
1132         device_printf(sc->dev,"encountered fatal error, operation suspended\n");
1133         if (!t3_cim_ctl_blk_read(sc, 0xa0, 4, fw_status))
1134                 device_printf(sc->dev, "FW_ status: 0x%x, 0x%x, 0x%x, 0x%x\n",
1135                     fw_status[0], fw_status[1], fw_status[2], fw_status[3]);
1136 }
1137
1138 int
1139 t3_os_find_pci_capability(adapter_t *sc, int cap)
1140 {
1141         device_t dev;
1142         struct pci_devinfo *dinfo;
1143         pcicfgregs *cfg;
1144         uint32_t status;
1145         uint8_t ptr;
1146
1147         dev = sc->dev;
1148         dinfo = device_get_ivars(dev);
1149         cfg = &dinfo->cfg;
1150
1151         status = pci_read_config(dev, PCIR_STATUS, 2);
1152         if (!(status & PCIM_STATUS_CAPPRESENT))
1153                 return (0);
1154
1155         switch (cfg->hdrtype & PCIM_HDRTYPE) {
1156         case 0:
1157         case 1:
1158                 ptr = PCIR_CAP_PTR;
1159                 break;
1160         case 2:
1161                 ptr = PCIR_CAP_PTR_2;
1162                 break;
1163         default:
1164                 return (0);
1165                 break;
1166         }
1167         ptr = pci_read_config(dev, ptr, 1);
1168
1169         while (ptr != 0) {
1170                 if (pci_read_config(dev, ptr + PCICAP_ID, 1) == cap)
1171                         return (ptr);
1172                 ptr = pci_read_config(dev, ptr + PCICAP_NEXTPTR, 1);
1173         }
1174
1175         return (0);
1176 }
1177
1178 int
1179 t3_os_pci_save_state(struct adapter *sc)
1180 {
1181         device_t dev;
1182         struct pci_devinfo *dinfo;
1183
1184         dev = sc->dev;
1185         dinfo = device_get_ivars(dev);
1186
1187         pci_cfg_save(dev, dinfo, 0);
1188         return (0);
1189 }
1190
1191 int
1192 t3_os_pci_restore_state(struct adapter *sc)
1193 {
1194         device_t dev;
1195         struct pci_devinfo *dinfo;
1196
1197         dev = sc->dev;
1198         dinfo = device_get_ivars(dev);
1199
1200         pci_cfg_restore(dev, dinfo);
1201         return (0);
1202 }
1203
1204 /**
1205  *      t3_os_link_changed - handle link status changes
1206  *      @sc: the adapter associated with the link change
1207  *      @port_id: the port index whose link status has changed
1208  *      @link_status: the new status of the link
1209  *      @speed: the new speed setting
1210  *      @duplex: the new duplex setting
1211  *      @fc: the new flow-control setting
1212  *
1213  *      This is the OS-dependent handler for link status changes.  The OS
1214  *      neutral handler takes care of most of the processing for these events,
1215  *      then calls this handler for any OS-specific processing.
1216  */
1217 void
1218 t3_os_link_changed(adapter_t *adapter, int port_id, int link_status, int speed,
1219      int duplex, int fc, int mac_was_reset)
1220 {
1221         struct port_info *pi = &adapter->port[port_id];
1222         struct ifnet *ifp = pi->ifp;
1223
1224         /* no race with detach, so ifp should always be good */
1225         KASSERT(ifp, ("%s: if detached.", __func__));
1226
1227         /* Reapply mac settings if they were lost due to a reset */
1228         if (mac_was_reset) {
1229                 PORT_LOCK(pi);
1230                 cxgb_update_mac_settings(pi);
1231                 PORT_UNLOCK(pi);
1232         }
1233
1234         if (link_status) {
1235                 ifp->if_baudrate = IF_Mbps(speed);
1236                 if_link_state_change(ifp, LINK_STATE_UP);
1237         } else
1238                 if_link_state_change(ifp, LINK_STATE_DOWN);
1239 }
1240
1241 /**
1242  *      t3_os_phymod_changed - handle PHY module changes
1243  *      @phy: the PHY reporting the module change
1244  *      @mod_type: new module type
1245  *
1246  *      This is the OS-dependent handler for PHY module changes.  It is
1247  *      invoked when a PHY module is removed or inserted for any OS-specific
1248  *      processing.
1249  */
1250 void t3_os_phymod_changed(struct adapter *adap, int port_id)
1251 {
1252         static const char *mod_str[] = {
1253                 NULL, "SR", "LR", "LRM", "TWINAX", "TWINAX-L", "unknown"
1254         };
1255         struct port_info *pi = &adap->port[port_id];
1256         int mod = pi->phy.modtype;
1257
1258         if (mod != pi->media.ifm_cur->ifm_data)
1259                 cxgb_build_medialist(pi);
1260
1261         if (mod == phy_modtype_none)
1262                 if_printf(pi->ifp, "PHY module unplugged\n");
1263         else {
1264                 KASSERT(mod < ARRAY_SIZE(mod_str),
1265                         ("invalid PHY module type %d", mod));
1266                 if_printf(pi->ifp, "%s PHY module inserted\n", mod_str[mod]);
1267         }
1268 }
1269
1270 void
1271 t3_os_set_hw_addr(adapter_t *adapter, int port_idx, u8 hw_addr[])
1272 {
1273
1274         /*
1275          * The ifnet might not be allocated before this gets called,
1276          * as this is called early on in attach by t3_prep_adapter
1277          * save the address off in the port structure
1278          */
1279         if (cxgb_debug)
1280                 printf("set_hw_addr on idx %d addr %6D\n", port_idx, hw_addr, ":");
1281         bcopy(hw_addr, adapter->port[port_idx].hw_addr, ETHER_ADDR_LEN);
1282 }
1283
1284 /*
1285  * Programs the XGMAC based on the settings in the ifnet.  These settings
1286  * include MTU, MAC address, mcast addresses, etc.
1287  */
1288 static void
1289 cxgb_update_mac_settings(struct port_info *p)
1290 {
1291         struct ifnet *ifp = p->ifp;
1292         struct t3_rx_mode rm;
1293         struct cmac *mac = &p->mac;
1294         int mtu, hwtagging;
1295
1296         PORT_LOCK_ASSERT_OWNED(p);
1297
1298         bcopy(IF_LLADDR(ifp), p->hw_addr, ETHER_ADDR_LEN);
1299
1300         mtu = ifp->if_mtu;
1301         if (ifp->if_capenable & IFCAP_VLAN_MTU)
1302                 mtu += ETHER_VLAN_ENCAP_LEN;
1303
1304         hwtagging = (ifp->if_capenable & IFCAP_VLAN_HWTAGGING) != 0;
1305
1306         t3_mac_set_mtu(mac, mtu);
1307         t3_set_vlan_accel(p->adapter, 1 << p->tx_chan, hwtagging);
1308         t3_mac_set_address(mac, 0, p->hw_addr);
1309         t3_init_rx_mode(&rm, p);
1310         t3_mac_set_rx_mode(mac, &rm);
1311 }
1312
1313
1314 static int
1315 await_mgmt_replies(struct adapter *adap, unsigned long init_cnt,
1316                               unsigned long n)
1317 {
1318         int attempts = 5;
1319
1320         while (adap->sge.qs[0].rspq.offload_pkts < init_cnt + n) {
1321                 if (!--attempts)
1322                         return (ETIMEDOUT);
1323                 t3_os_sleep(10);
1324         }
1325         return 0;
1326 }
1327
1328 static int
1329 init_tp_parity(struct adapter *adap)
1330 {
1331         int i;
1332         struct mbuf *m;
1333         struct cpl_set_tcb_field *greq;
1334         unsigned long cnt = adap->sge.qs[0].rspq.offload_pkts;
1335
1336         t3_tp_set_offload_mode(adap, 1);
1337
1338         for (i = 0; i < 16; i++) {
1339                 struct cpl_smt_write_req *req;
1340
1341                 m = m_gethdr(M_WAITOK, MT_DATA);
1342                 req = mtod(m, struct cpl_smt_write_req *);
1343                 m->m_len = m->m_pkthdr.len = sizeof(*req);
1344                 memset(req, 0, sizeof(*req));
1345                 req->wr.wrh_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD));
1346                 OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_SMT_WRITE_REQ, i));
1347                 req->iff = i;
1348                 t3_mgmt_tx(adap, m);
1349         }
1350
1351         for (i = 0; i < 2048; i++) {
1352                 struct cpl_l2t_write_req *req;
1353
1354                 m = m_gethdr(M_WAITOK, MT_DATA);
1355                 req = mtod(m, struct cpl_l2t_write_req *);
1356                 m->m_len = m->m_pkthdr.len = sizeof(*req);
1357                 memset(req, 0, sizeof(*req));
1358                 req->wr.wrh_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD));
1359                 OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_L2T_WRITE_REQ, i));
1360                 req->params = htonl(V_L2T_W_IDX(i));
1361                 t3_mgmt_tx(adap, m);
1362         }
1363
1364         for (i = 0; i < 2048; i++) {
1365                 struct cpl_rte_write_req *req;
1366
1367                 m = m_gethdr(M_WAITOK, MT_DATA);
1368                 req = mtod(m, struct cpl_rte_write_req *);
1369                 m->m_len = m->m_pkthdr.len = sizeof(*req);
1370                 memset(req, 0, sizeof(*req));
1371                 req->wr.wrh_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD));
1372                 OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_RTE_WRITE_REQ, i));
1373                 req->l2t_idx = htonl(V_L2T_W_IDX(i));
1374                 t3_mgmt_tx(adap, m);
1375         }
1376
1377         m = m_gethdr(M_WAITOK, MT_DATA);
1378         greq = mtod(m, struct cpl_set_tcb_field *);
1379         m->m_len = m->m_pkthdr.len = sizeof(*greq);
1380         memset(greq, 0, sizeof(*greq));
1381         greq->wr.wrh_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD));
1382         OPCODE_TID(greq) = htonl(MK_OPCODE_TID(CPL_SET_TCB_FIELD, 0));
1383         greq->mask = htobe64(1);
1384         t3_mgmt_tx(adap, m);
1385
1386         i = await_mgmt_replies(adap, cnt, 16 + 2048 + 2048 + 1);
1387         t3_tp_set_offload_mode(adap, 0);
1388         return (i);
1389 }
1390
1391 /**
1392  *      setup_rss - configure Receive Side Steering (per-queue connection demux) 
1393  *      @adap: the adapter
1394  *
1395  *      Sets up RSS to distribute packets to multiple receive queues.  We
1396  *      configure the RSS CPU lookup table to distribute to the number of HW
1397  *      receive queues, and the response queue lookup table to narrow that
1398  *      down to the response queues actually configured for each port.
1399  *      We always configure the RSS mapping for two ports since the mapping
1400  *      table has plenty of entries.
1401  */
1402 static void
1403 setup_rss(adapter_t *adap)
1404 {
1405         int i;
1406         u_int nq[2]; 
1407         uint8_t cpus[SGE_QSETS + 1];
1408         uint16_t rspq_map[RSS_TABLE_SIZE];
1409         
1410         for (i = 0; i < SGE_QSETS; ++i)
1411                 cpus[i] = i;
1412         cpus[SGE_QSETS] = 0xff;
1413
1414         nq[0] = nq[1] = 0;
1415         for_each_port(adap, i) {
1416                 const struct port_info *pi = adap2pinfo(adap, i);
1417
1418                 nq[pi->tx_chan] += pi->nqsets;
1419         }
1420         for (i = 0; i < RSS_TABLE_SIZE / 2; ++i) {
1421                 rspq_map[i] = nq[0] ? i % nq[0] : 0;
1422                 rspq_map[i + RSS_TABLE_SIZE / 2] = nq[1] ? i % nq[1] + nq[0] : 0;
1423         }
1424
1425         /* Calculate the reverse RSS map table */
1426         for (i = 0; i < SGE_QSETS; ++i)
1427                 adap->rrss_map[i] = 0xff;
1428         for (i = 0; i < RSS_TABLE_SIZE; ++i)
1429                 if (adap->rrss_map[rspq_map[i]] == 0xff)
1430                         adap->rrss_map[rspq_map[i]] = i;
1431
1432         t3_config_rss(adap, F_RQFEEDBACKENABLE | F_TNLLKPEN | F_TNLMAPEN |
1433                       F_TNLPRTEN | F_TNL2TUPEN | F_TNL4TUPEN | F_OFDMAPEN |
1434                       F_RRCPLMAPEN | V_RRCPLCPUSIZE(6) | F_HASHTOEPLITZ,
1435                       cpus, rspq_map);
1436
1437 }
1438
1439 /*
1440  * Sends an mbuf to an offload queue driver
1441  * after dealing with any active network taps.
1442  */
1443 static inline int
1444 offload_tx(struct t3cdev *tdev, struct mbuf *m)
1445 {
1446         int ret;
1447
1448         ret = t3_offload_tx(tdev, m);
1449         return (ret);
1450 }
1451
1452 static int
1453 write_smt_entry(struct adapter *adapter, int idx)
1454 {
1455         struct port_info *pi = &adapter->port[idx];
1456         struct cpl_smt_write_req *req;
1457         struct mbuf *m;
1458
1459         if ((m = m_gethdr(M_NOWAIT, MT_DATA)) == NULL)
1460                 return (ENOMEM);
1461
1462         req = mtod(m, struct cpl_smt_write_req *);
1463         m->m_pkthdr.len = m->m_len = sizeof(struct cpl_smt_write_req);
1464         
1465         req->wr.wrh_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD));
1466         OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_SMT_WRITE_REQ, idx));
1467         req->mtu_idx = NMTUS - 1;  /* should be 0 but there's a T3 bug */
1468         req->iff = idx;
1469         memset(req->src_mac1, 0, sizeof(req->src_mac1));
1470         memcpy(req->src_mac0, pi->hw_addr, ETHER_ADDR_LEN);
1471
1472         m_set_priority(m, 1);
1473
1474         offload_tx(&adapter->tdev, m);
1475
1476         return (0);
1477 }
1478
1479 static int
1480 init_smt(struct adapter *adapter)
1481 {
1482         int i;
1483
1484         for_each_port(adapter, i)
1485                 write_smt_entry(adapter, i);
1486         return 0;
1487 }
1488
1489 static void
1490 init_port_mtus(adapter_t *adapter)
1491 {
1492         unsigned int mtus = ETHERMTU | (ETHERMTU << 16);
1493
1494         t3_write_reg(adapter, A_TP_MTU_PORT_TABLE, mtus);
1495 }
1496
1497 static void
1498 send_pktsched_cmd(struct adapter *adap, int sched, int qidx, int lo,
1499                               int hi, int port)
1500 {
1501         struct mbuf *m;
1502         struct mngt_pktsched_wr *req;
1503
1504         m = m_gethdr(M_DONTWAIT, MT_DATA);
1505         if (m) {        
1506                 req = mtod(m, struct mngt_pktsched_wr *);
1507                 req->wr.wrh_hi = htonl(V_WR_OP(FW_WROPCODE_MNGT));
1508                 req->mngt_opcode = FW_MNGTOPCODE_PKTSCHED_SET;
1509                 req->sched = sched;
1510                 req->idx = qidx;
1511                 req->min = lo;
1512                 req->max = hi;
1513                 req->binding = port;
1514                 m->m_len = m->m_pkthdr.len = sizeof(*req);
1515                 t3_mgmt_tx(adap, m);
1516         }
1517 }
1518
1519 static void
1520 bind_qsets(adapter_t *sc)
1521 {
1522         int i, j;
1523
1524         for (i = 0; i < (sc)->params.nports; ++i) {
1525                 const struct port_info *pi = adap2pinfo(sc, i);
1526
1527                 for (j = 0; j < pi->nqsets; ++j) {
1528                         send_pktsched_cmd(sc, 1, pi->first_qset + j, -1,
1529                                           -1, pi->tx_chan);
1530
1531                 }
1532         }
1533 }
1534
1535 static void
1536 update_tpeeprom(struct adapter *adap)
1537 {
1538         const struct firmware *tpeeprom;
1539
1540         uint32_t version;
1541         unsigned int major, minor;
1542         int ret, len;
1543         char rev, name[32];
1544
1545         t3_seeprom_read(adap, TP_SRAM_OFFSET, &version);
1546
1547         major = G_TP_VERSION_MAJOR(version);
1548         minor = G_TP_VERSION_MINOR(version);
1549         if (major == TP_VERSION_MAJOR  && minor == TP_VERSION_MINOR)
1550                 return; 
1551
1552         rev = t3rev2char(adap);
1553         snprintf(name, sizeof(name), TPEEPROM_NAME, rev);
1554
1555         tpeeprom = firmware_get(name);
1556         if (tpeeprom == NULL) {
1557                 device_printf(adap->dev,
1558                               "could not load TP EEPROM: unable to load %s\n",
1559                               name);
1560                 return;
1561         }
1562
1563         len = tpeeprom->datasize - 4;
1564         
1565         ret = t3_check_tpsram(adap, tpeeprom->data, tpeeprom->datasize);
1566         if (ret)
1567                 goto release_tpeeprom;
1568
1569         if (len != TP_SRAM_LEN) {
1570                 device_printf(adap->dev,
1571                               "%s length is wrong len=%d expected=%d\n", name,
1572                               len, TP_SRAM_LEN);
1573                 return;
1574         }
1575         
1576         ret = set_eeprom(&adap->port[0], tpeeprom->data, tpeeprom->datasize,
1577             TP_SRAM_OFFSET);
1578         
1579         if (!ret) {
1580                 device_printf(adap->dev,
1581                         "Protocol SRAM image updated in EEPROM to %d.%d.%d\n",
1582                          TP_VERSION_MAJOR, TP_VERSION_MINOR, TP_VERSION_MICRO);
1583         } else 
1584                 device_printf(adap->dev,
1585                               "Protocol SRAM image update in EEPROM failed\n");
1586
1587 release_tpeeprom:
1588         firmware_put(tpeeprom, FIRMWARE_UNLOAD);
1589         
1590         return;
1591 }
1592
1593 static int
1594 update_tpsram(struct adapter *adap)
1595 {
1596         const struct firmware *tpsram;
1597         int ret;
1598         char rev, name[32];
1599
1600         rev = t3rev2char(adap);
1601         snprintf(name, sizeof(name), TPSRAM_NAME, rev);
1602
1603         update_tpeeprom(adap);
1604
1605         tpsram = firmware_get(name);
1606         if (tpsram == NULL){
1607                 device_printf(adap->dev, "could not load TP SRAM\n");
1608                 return (EINVAL);
1609         } else
1610                 device_printf(adap->dev, "updating TP SRAM\n");
1611         
1612         ret = t3_check_tpsram(adap, tpsram->data, tpsram->datasize);
1613         if (ret)
1614                 goto release_tpsram;    
1615
1616         ret = t3_set_proto_sram(adap, tpsram->data);
1617         if (ret)
1618                 device_printf(adap->dev, "loading protocol SRAM failed\n");
1619
1620 release_tpsram:
1621         firmware_put(tpsram, FIRMWARE_UNLOAD);
1622         
1623         return ret;
1624 }
1625
1626 /**
1627  *      cxgb_up - enable the adapter
1628  *      @adap: adapter being enabled
1629  *
1630  *      Called when the first port is enabled, this function performs the
1631  *      actions necessary to make an adapter operational, such as completing
1632  *      the initialization of HW modules, and enabling interrupts.
1633  */
1634 static int
1635 cxgb_up(struct adapter *sc)
1636 {
1637         int err = 0;
1638         unsigned int mxf = t3_mc5_size(&sc->mc5) - MC5_MIN_TIDS;
1639
1640         KASSERT(sc->open_device_map == 0, ("%s: device(s) already open (%x)",
1641                                            __func__, sc->open_device_map));
1642
1643         if ((sc->flags & FULL_INIT_DONE) == 0) {
1644
1645                 ADAPTER_LOCK_ASSERT_NOTOWNED(sc);
1646
1647                 if ((sc->flags & FW_UPTODATE) == 0)
1648                         if ((err = upgrade_fw(sc)))
1649                                 goto out;
1650
1651                 if ((sc->flags & TPS_UPTODATE) == 0)
1652                         if ((err = update_tpsram(sc)))
1653                                 goto out;
1654
1655                 if (is_offload(sc) && nfilters != 0) {
1656                         sc->params.mc5.nservers = 0;
1657
1658                         if (nfilters < 0)
1659                                 sc->params.mc5.nfilters = mxf;
1660                         else
1661                                 sc->params.mc5.nfilters = min(nfilters, mxf);
1662                 }
1663
1664                 err = t3_init_hw(sc, 0);
1665                 if (err)
1666                         goto out;
1667
1668                 t3_set_reg_field(sc, A_TP_PARA_REG5, 0, F_RXDDPOFFINIT);
1669                 t3_write_reg(sc, A_ULPRX_TDDP_PSZ, V_HPZ0(PAGE_SHIFT - 12));
1670
1671                 err = setup_sge_qsets(sc);
1672                 if (err)
1673                         goto out;
1674
1675                 alloc_filters(sc);
1676                 setup_rss(sc);
1677
1678                 t3_intr_clear(sc);
1679                 err = cxgb_setup_interrupts(sc);
1680                 if (err)
1681                         goto out;
1682
1683                 t3_add_configured_sysctls(sc);
1684                 sc->flags |= FULL_INIT_DONE;
1685         }
1686
1687         t3_intr_clear(sc);
1688         t3_sge_start(sc);
1689         t3_intr_enable(sc);
1690
1691         if (sc->params.rev >= T3_REV_C && !(sc->flags & TP_PARITY_INIT) &&
1692             is_offload(sc) && init_tp_parity(sc) == 0)
1693                 sc->flags |= TP_PARITY_INIT;
1694
1695         if (sc->flags & TP_PARITY_INIT) {
1696                 t3_write_reg(sc, A_TP_INT_CAUSE, F_CMCACHEPERR | F_ARPLUTPERR);
1697                 t3_write_reg(sc, A_TP_INT_ENABLE, 0x7fbfffff);
1698         }
1699         
1700         if (!(sc->flags & QUEUES_BOUND)) {
1701                 bind_qsets(sc);
1702                 setup_hw_filters(sc);
1703                 sc->flags |= QUEUES_BOUND;              
1704         }
1705
1706         t3_sge_reset_adapter(sc);
1707 out:
1708         return (err);
1709 }
1710
1711 /*
1712  * Called when the last open device is closed.  Does NOT undo all of cxgb_up's
1713  * work.  Specifically, the resources grabbed under FULL_INIT_DONE are released
1714  * during controller_detach, not here.
1715  */
1716 static void
1717 cxgb_down(struct adapter *sc)
1718 {
1719         t3_sge_stop(sc);
1720         t3_intr_disable(sc);
1721 }
1722
1723 static int
1724 offload_open(struct port_info *pi)
1725 {
1726         struct adapter *sc = pi->adapter;
1727         struct t3cdev *tdev = &sc->tdev;
1728
1729         setbit(&sc->open_device_map, OFFLOAD_DEVMAP_BIT);
1730
1731         t3_tp_set_offload_mode(sc, 1);
1732         tdev->lldev = pi->ifp;
1733         init_port_mtus(sc);
1734         t3_load_mtus(sc, sc->params.mtus, sc->params.a_wnd, sc->params.b_wnd,
1735                      sc->params.rev == 0 ?  sc->port[0].ifp->if_mtu : 0xffff);
1736         init_smt(sc);
1737         cxgb_add_clients(tdev);
1738
1739         return (0);
1740 }
1741
1742 static int
1743 offload_close(struct t3cdev *tdev)
1744 {
1745         struct adapter *adapter = tdev2adap(tdev);
1746
1747         if (!isset(&adapter->open_device_map, OFFLOAD_DEVMAP_BIT))
1748                 return (0);
1749
1750         /* Call back all registered clients */
1751         cxgb_remove_clients(tdev);
1752
1753         tdev->lldev = NULL;
1754         cxgb_set_dummy_ops(tdev);
1755         t3_tp_set_offload_mode(adapter, 0);
1756
1757         clrbit(&adapter->open_device_map, OFFLOAD_DEVMAP_BIT);
1758
1759         return (0);
1760 }
1761
1762 /*
1763  * if_init for cxgb ports.
1764  */
1765 static void
1766 cxgb_init(void *arg)
1767 {
1768         struct port_info *p = arg;
1769         struct adapter *sc = p->adapter;
1770
1771         ADAPTER_LOCK(sc);
1772         cxgb_init_locked(p); /* releases adapter lock */
1773         ADAPTER_LOCK_ASSERT_NOTOWNED(sc);
1774 }
1775
1776 static int
1777 cxgb_init_locked(struct port_info *p)
1778 {
1779         struct adapter *sc = p->adapter;
1780         struct ifnet *ifp = p->ifp;
1781         struct cmac *mac = &p->mac;
1782         int i, rc = 0, may_sleep = 0, gave_up_lock = 0;
1783
1784         ADAPTER_LOCK_ASSERT_OWNED(sc);
1785
1786         while (!IS_DOOMED(p) && IS_BUSY(sc)) {
1787                 gave_up_lock = 1;
1788                 if (mtx_sleep(&sc->flags, &sc->lock, PCATCH, "cxgbinit", 0)) {
1789                         rc = EINTR;
1790                         goto done;
1791                 }
1792         }
1793         if (IS_DOOMED(p)) {
1794                 rc = ENXIO;
1795                 goto done;
1796         }
1797         KASSERT(!IS_BUSY(sc), ("%s: controller busy.", __func__));
1798
1799         /*
1800          * The code that runs during one-time adapter initialization can sleep
1801          * so it's important not to hold any locks across it.
1802          */
1803         may_sleep = sc->flags & FULL_INIT_DONE ? 0 : 1;
1804
1805         if (may_sleep) {
1806                 SET_BUSY(sc);
1807                 gave_up_lock = 1;
1808                 ADAPTER_UNLOCK(sc);
1809         }
1810
1811         if (sc->open_device_map == 0) {
1812                 if ((rc = cxgb_up(sc)) != 0)
1813                         goto done;
1814
1815                 if (is_offload(sc) && !ofld_disable && offload_open(p))
1816                         log(LOG_WARNING,
1817                             "Could not initialize offload capabilities\n");
1818         }
1819
1820         PORT_LOCK(p);
1821         if (isset(&sc->open_device_map, p->port_id) &&
1822             (ifp->if_drv_flags & IFF_DRV_RUNNING)) {
1823                 PORT_UNLOCK(p);
1824                 goto done;
1825         }
1826         t3_port_intr_enable(sc, p->port_id);
1827         if (!mac->multiport) 
1828                 t3_mac_init(mac);
1829         cxgb_update_mac_settings(p);
1830         t3_link_start(&p->phy, mac, &p->link_config);
1831         t3_mac_enable(mac, MAC_DIRECTION_RX | MAC_DIRECTION_TX);
1832         ifp->if_drv_flags |= IFF_DRV_RUNNING;
1833         ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
1834         PORT_UNLOCK(p);
1835
1836         for (i = p->first_qset; i < p->first_qset + p->nqsets; i++) {
1837                 struct sge_qset *qs = &sc->sge.qs[i];
1838                 struct sge_txq *txq = &qs->txq[TXQ_ETH];
1839
1840                 callout_reset_on(&txq->txq_watchdog, hz, cxgb_tx_watchdog, qs,
1841                                  txq->txq_watchdog.c_cpu);
1842         }
1843
1844         /* all ok */
1845         setbit(&sc->open_device_map, p->port_id);
1846         callout_reset(&p->link_check_ch,
1847             p->phy.caps & SUPPORTED_LINK_IRQ ?  hz * 3 : hz / 4,
1848             link_check_callout, p);
1849
1850 done:
1851         if (may_sleep) {
1852                 ADAPTER_LOCK(sc);
1853                 KASSERT(IS_BUSY(sc), ("%s: controller not busy.", __func__));
1854                 CLR_BUSY(sc);
1855         }
1856         if (gave_up_lock)
1857                 wakeup_one(&sc->flags);
1858         ADAPTER_UNLOCK(sc);
1859         return (rc);
1860 }
1861
1862 static int
1863 cxgb_uninit_locked(struct port_info *p)
1864 {
1865         struct adapter *sc = p->adapter;
1866         int rc;
1867
1868         ADAPTER_LOCK_ASSERT_OWNED(sc);
1869
1870         while (!IS_DOOMED(p) && IS_BUSY(sc)) {
1871                 if (mtx_sleep(&sc->flags, &sc->lock, PCATCH, "cxgbunin", 0)) {
1872                         rc = EINTR;
1873                         goto done;
1874                 }
1875         }
1876         if (IS_DOOMED(p)) {
1877                 rc = ENXIO;
1878                 goto done;
1879         }
1880         KASSERT(!IS_BUSY(sc), ("%s: controller busy.", __func__));
1881         SET_BUSY(sc);
1882         ADAPTER_UNLOCK(sc);
1883
1884         rc = cxgb_uninit_synchronized(p);
1885
1886         ADAPTER_LOCK(sc);
1887         KASSERT(IS_BUSY(sc), ("%s: controller not busy.", __func__));
1888         CLR_BUSY(sc);
1889         wakeup_one(&sc->flags);
1890 done:
1891         ADAPTER_UNLOCK(sc);
1892         return (rc);
1893 }
1894
1895 /*
1896  * Called on "ifconfig down", and from port_detach
1897  */
1898 static int
1899 cxgb_uninit_synchronized(struct port_info *pi)
1900 {
1901         struct adapter *sc = pi->adapter;
1902         struct ifnet *ifp = pi->ifp;
1903
1904         /*
1905          * taskqueue_drain may cause a deadlock if the adapter lock is held.
1906          */
1907         ADAPTER_LOCK_ASSERT_NOTOWNED(sc);
1908
1909         /*
1910          * Clear this port's bit from the open device map, and then drain all
1911          * the tasks that can access/manipulate this port's port_info or ifp.
1912          * We disable this port's interrupts here and so the slow/ext
1913          * interrupt tasks won't be enqueued.  The tick task will continue to
1914          * be enqueued every second but the runs after this drain will not see
1915          * this port in the open device map.
1916          *
1917          * A well behaved task must take open_device_map into account and ignore
1918          * ports that are not open.
1919          */
1920         clrbit(&sc->open_device_map, pi->port_id);
1921         t3_port_intr_disable(sc, pi->port_id);
1922         taskqueue_drain(sc->tq, &sc->slow_intr_task);
1923         taskqueue_drain(sc->tq, &sc->tick_task);
1924
1925         callout_drain(&pi->link_check_ch);
1926         taskqueue_drain(sc->tq, &pi->link_check_task);
1927
1928         PORT_LOCK(pi);
1929         ifp->if_drv_flags &= ~(IFF_DRV_RUNNING | IFF_DRV_OACTIVE);
1930
1931         /* disable pause frames */
1932         t3_set_reg_field(sc, A_XGM_TX_CFG + pi->mac.offset, F_TXPAUSEEN, 0);
1933
1934         /* Reset RX FIFO HWM */
1935         t3_set_reg_field(sc, A_XGM_RXFIFO_CFG +  pi->mac.offset,
1936                          V_RXFIFOPAUSEHWM(M_RXFIFOPAUSEHWM), 0);
1937
1938         DELAY(100 * 1000);
1939
1940         /* Wait for TXFIFO empty */
1941         t3_wait_op_done(sc, A_XGM_TXFIFO_CFG + pi->mac.offset,
1942                         F_TXFIFO_EMPTY, 1, 20, 5);
1943
1944         DELAY(100 * 1000);
1945         t3_mac_disable(&pi->mac, MAC_DIRECTION_RX);
1946
1947
1948         pi->phy.ops->power_down(&pi->phy, 1);
1949
1950         PORT_UNLOCK(pi);
1951
1952         pi->link_config.link_ok = 0;
1953         t3_os_link_changed(sc, pi->port_id, 0, 0, 0, 0, 0);
1954
1955         if ((sc->open_device_map & PORT_MASK) == 0)
1956                 offload_close(&sc->tdev);
1957
1958         if (sc->open_device_map == 0)
1959                 cxgb_down(pi->adapter);
1960
1961         return (0);
1962 }
1963
1964 /*
1965  * Mark lro enabled or disabled in all qsets for this port
1966  */
1967 static int
1968 cxgb_set_lro(struct port_info *p, int enabled)
1969 {
1970         int i;
1971         struct adapter *adp = p->adapter;
1972         struct sge_qset *q;
1973
1974         for (i = 0; i < p->nqsets; i++) {
1975                 q = &adp->sge.qs[p->first_qset + i];
1976                 q->lro.enabled = (enabled != 0);
1977         }
1978         return (0);
1979 }
1980
1981 static int
1982 cxgb_ioctl(struct ifnet *ifp, unsigned long command, caddr_t data)
1983 {
1984         struct port_info *p = ifp->if_softc;
1985         struct adapter *sc = p->adapter;
1986         struct ifreq *ifr = (struct ifreq *)data;
1987         int flags, error = 0, mtu;
1988         uint32_t mask;
1989
1990         switch (command) {
1991         case SIOCSIFMTU:
1992                 ADAPTER_LOCK(sc);
1993                 error = IS_DOOMED(p) ? ENXIO : (IS_BUSY(sc) ? EBUSY : 0);
1994                 if (error) {
1995 fail:
1996                         ADAPTER_UNLOCK(sc);
1997                         return (error);
1998                 }
1999
2000                 mtu = ifr->ifr_mtu;
2001                 if ((mtu < ETHERMIN) || (mtu > ETHERMTU_JUMBO)) {
2002                         error = EINVAL;
2003                 } else {
2004                         ifp->if_mtu = mtu;
2005                         PORT_LOCK(p);
2006                         cxgb_update_mac_settings(p);
2007                         PORT_UNLOCK(p);
2008                 }
2009                 ADAPTER_UNLOCK(sc);
2010                 break;
2011         case SIOCSIFFLAGS:
2012                 ADAPTER_LOCK(sc);
2013                 if (IS_DOOMED(p)) {
2014                         error = ENXIO;
2015                         goto fail;
2016                 }
2017                 if (ifp->if_flags & IFF_UP) {
2018                         if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
2019                                 flags = p->if_flags;
2020                                 if (((ifp->if_flags ^ flags) & IFF_PROMISC) ||
2021                                     ((ifp->if_flags ^ flags) & IFF_ALLMULTI)) {
2022                                         if (IS_BUSY(sc)) {
2023                                                 error = EBUSY;
2024                                                 goto fail;
2025                                         }
2026                                         PORT_LOCK(p);
2027                                         cxgb_update_mac_settings(p);
2028                                         PORT_UNLOCK(p);
2029                                 }
2030                                 ADAPTER_UNLOCK(sc);
2031                         } else
2032                                 error = cxgb_init_locked(p);
2033                         p->if_flags = ifp->if_flags;
2034                 } else if (ifp->if_drv_flags & IFF_DRV_RUNNING)
2035                         error = cxgb_uninit_locked(p);
2036                 else
2037                         ADAPTER_UNLOCK(sc);
2038
2039                 ADAPTER_LOCK_ASSERT_NOTOWNED(sc);
2040                 break;
2041         case SIOCADDMULTI:
2042         case SIOCDELMULTI:
2043                 ADAPTER_LOCK(sc);
2044                 error = IS_DOOMED(p) ? ENXIO : (IS_BUSY(sc) ? EBUSY : 0);
2045                 if (error)
2046                         goto fail;
2047
2048                 if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
2049                         PORT_LOCK(p);
2050                         cxgb_update_mac_settings(p);
2051                         PORT_UNLOCK(p);
2052                 }
2053                 ADAPTER_UNLOCK(sc);
2054
2055                 break;
2056         case SIOCSIFCAP:
2057                 ADAPTER_LOCK(sc);
2058                 error = IS_DOOMED(p) ? ENXIO : (IS_BUSY(sc) ? EBUSY : 0);
2059                 if (error)
2060                         goto fail;
2061
2062                 mask = ifr->ifr_reqcap ^ ifp->if_capenable;
2063                 if (mask & IFCAP_TXCSUM) {
2064                         ifp->if_capenable ^= IFCAP_TXCSUM;
2065                         ifp->if_hwassist ^= (CSUM_TCP | CSUM_UDP | CSUM_IP);
2066
2067                         if (IFCAP_TSO & ifp->if_capenable &&
2068                             !(IFCAP_TXCSUM & ifp->if_capenable)) {
2069                                 ifp->if_capenable &= ~IFCAP_TSO;
2070                                 ifp->if_hwassist &= ~CSUM_TSO;
2071                                 if_printf(ifp,
2072                                     "tso disabled due to -txcsum.\n");
2073                         }
2074                 }
2075                 if (mask & IFCAP_RXCSUM)
2076                         ifp->if_capenable ^= IFCAP_RXCSUM;
2077                 if (mask & IFCAP_TSO4) {
2078                         ifp->if_capenable ^= IFCAP_TSO4;
2079
2080                         if (IFCAP_TSO & ifp->if_capenable) {
2081                                 if (IFCAP_TXCSUM & ifp->if_capenable)
2082                                         ifp->if_hwassist |= CSUM_TSO;
2083                                 else {
2084                                         ifp->if_capenable &= ~IFCAP_TSO;
2085                                         ifp->if_hwassist &= ~CSUM_TSO;
2086                                         if_printf(ifp,
2087                                             "enable txcsum first.\n");
2088                                         error = EAGAIN;
2089                                 }
2090                         } else
2091                                 ifp->if_hwassist &= ~CSUM_TSO;
2092                 }
2093                 if (mask & IFCAP_LRO) {
2094                         ifp->if_capenable ^= IFCAP_LRO;
2095
2096                         /* Safe to do this even if cxgb_up not called yet */
2097                         cxgb_set_lro(p, ifp->if_capenable & IFCAP_LRO);
2098                 }
2099                 if (mask & IFCAP_VLAN_HWTAGGING) {
2100                         ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING;
2101                         if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
2102                                 PORT_LOCK(p);
2103                                 cxgb_update_mac_settings(p);
2104                                 PORT_UNLOCK(p);
2105                         }
2106                 }
2107                 if (mask & IFCAP_VLAN_MTU) {
2108                         ifp->if_capenable ^= IFCAP_VLAN_MTU;
2109                         if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
2110                                 PORT_LOCK(p);
2111                                 cxgb_update_mac_settings(p);
2112                                 PORT_UNLOCK(p);
2113                         }
2114                 }
2115                 if (mask & IFCAP_VLAN_HWTSO)
2116                         ifp->if_capenable ^= IFCAP_VLAN_HWTSO;
2117                 if (mask & IFCAP_VLAN_HWCSUM)
2118                         ifp->if_capenable ^= IFCAP_VLAN_HWCSUM;
2119
2120 #ifdef VLAN_CAPABILITIES
2121                 VLAN_CAPABILITIES(ifp);
2122 #endif
2123                 ADAPTER_UNLOCK(sc);
2124                 break;
2125         case SIOCSIFMEDIA:
2126         case SIOCGIFMEDIA:
2127                 error = ifmedia_ioctl(ifp, ifr, &p->media, command);
2128                 break;
2129         default:
2130                 error = ether_ioctl(ifp, command, data);
2131         }
2132
2133         return (error);
2134 }
2135
2136 static int
2137 cxgb_media_change(struct ifnet *ifp)
2138 {
2139         return (EOPNOTSUPP);
2140 }
2141
2142 /*
2143  * Translates phy->modtype to the correct Ethernet media subtype.
2144  */
2145 static int
2146 cxgb_ifm_type(int mod)
2147 {
2148         switch (mod) {
2149         case phy_modtype_sr:
2150                 return (IFM_10G_SR);
2151         case phy_modtype_lr:
2152                 return (IFM_10G_LR);
2153         case phy_modtype_lrm:
2154                 return (IFM_10G_LRM);
2155         case phy_modtype_twinax:
2156                 return (IFM_10G_TWINAX);
2157         case phy_modtype_twinax_long:
2158                 return (IFM_10G_TWINAX_LONG);
2159         case phy_modtype_none:
2160                 return (IFM_NONE);
2161         case phy_modtype_unknown:
2162                 return (IFM_UNKNOWN);
2163         }
2164
2165         KASSERT(0, ("%s: modtype %d unknown", __func__, mod));
2166         return (IFM_UNKNOWN);
2167 }
2168
2169 /*
2170  * Rebuilds the ifmedia list for this port, and sets the current media.
2171  */
2172 static void
2173 cxgb_build_medialist(struct port_info *p)
2174 {
2175         struct cphy *phy = &p->phy;
2176         struct ifmedia *media = &p->media;
2177         int mod = phy->modtype;
2178         int m = IFM_ETHER | IFM_FDX;
2179
2180         PORT_LOCK(p);
2181
2182         ifmedia_removeall(media);
2183         if (phy->caps & SUPPORTED_TP && phy->caps & SUPPORTED_Autoneg) {
2184                 /* Copper (RJ45) */
2185
2186                 if (phy->caps & SUPPORTED_10000baseT_Full)
2187                         ifmedia_add(media, m | IFM_10G_T, mod, NULL);
2188
2189                 if (phy->caps & SUPPORTED_1000baseT_Full)
2190                         ifmedia_add(media, m | IFM_1000_T, mod, NULL);
2191
2192                 if (phy->caps & SUPPORTED_100baseT_Full)
2193                         ifmedia_add(media, m | IFM_100_TX, mod, NULL);
2194
2195                 if (phy->caps & SUPPORTED_10baseT_Full)
2196                         ifmedia_add(media, m | IFM_10_T, mod, NULL);
2197
2198                 ifmedia_add(media, IFM_ETHER | IFM_AUTO, mod, NULL);
2199                 ifmedia_set(media, IFM_ETHER | IFM_AUTO);
2200
2201         } else if (phy->caps & SUPPORTED_TP) {
2202                 /* Copper (CX4) */
2203
2204                 KASSERT(phy->caps & SUPPORTED_10000baseT_Full,
2205                         ("%s: unexpected cap 0x%x", __func__, phy->caps));
2206
2207                 ifmedia_add(media, m | IFM_10G_CX4, mod, NULL);
2208                 ifmedia_set(media, m | IFM_10G_CX4);
2209
2210         } else if (phy->caps & SUPPORTED_FIBRE &&
2211                    phy->caps & SUPPORTED_10000baseT_Full) {
2212                 /* 10G optical (but includes SFP+ twinax) */
2213
2214                 m |= cxgb_ifm_type(mod);
2215                 if (IFM_SUBTYPE(m) == IFM_NONE)
2216                         m &= ~IFM_FDX;
2217
2218                 ifmedia_add(media, m, mod, NULL);
2219                 ifmedia_set(media, m);
2220
2221         } else if (phy->caps & SUPPORTED_FIBRE &&
2222                    phy->caps & SUPPORTED_1000baseT_Full) {
2223                 /* 1G optical */
2224
2225                 /* XXX: Lie and claim to be SX, could actually be any 1G-X */
2226                 ifmedia_add(media, m | IFM_1000_SX, mod, NULL);
2227                 ifmedia_set(media, m | IFM_1000_SX);
2228
2229         } else {
2230                 KASSERT(0, ("%s: don't know how to handle 0x%x.", __func__,
2231                             phy->caps));
2232         }
2233
2234         PORT_UNLOCK(p);
2235 }
2236
2237 static void
2238 cxgb_media_status(struct ifnet *ifp, struct ifmediareq *ifmr)
2239 {
2240         struct port_info *p = ifp->if_softc;
2241         struct ifmedia_entry *cur = p->media.ifm_cur;
2242         int speed = p->link_config.speed;
2243
2244         if (cur->ifm_data != p->phy.modtype) {
2245                 cxgb_build_medialist(p);
2246                 cur = p->media.ifm_cur;
2247         }
2248
2249         ifmr->ifm_status = IFM_AVALID;
2250         if (!p->link_config.link_ok)
2251                 return;
2252
2253         ifmr->ifm_status |= IFM_ACTIVE;
2254
2255         /*
2256          * active and current will differ iff current media is autoselect.  That
2257          * can happen only for copper RJ45.
2258          */
2259         if (IFM_SUBTYPE(cur->ifm_media) != IFM_AUTO)
2260                 return;
2261         KASSERT(p->phy.caps & SUPPORTED_TP && p->phy.caps & SUPPORTED_Autoneg,
2262                 ("%s: unexpected PHY caps 0x%x", __func__, p->phy.caps));
2263
2264         ifmr->ifm_active = IFM_ETHER | IFM_FDX;
2265         if (speed == SPEED_10000)
2266                 ifmr->ifm_active |= IFM_10G_T;
2267         else if (speed == SPEED_1000)
2268                 ifmr->ifm_active |= IFM_1000_T;
2269         else if (speed == SPEED_100)
2270                 ifmr->ifm_active |= IFM_100_TX;
2271         else if (speed == SPEED_10)
2272                 ifmr->ifm_active |= IFM_10_T;
2273         else
2274                 KASSERT(0, ("%s: link up but speed unknown (%u)", __func__,
2275                             speed));
2276 }
2277
2278 static void
2279 cxgb_async_intr(void *data)
2280 {
2281         adapter_t *sc = data;
2282
2283         t3_write_reg(sc, A_PL_INT_ENABLE0, 0);
2284         (void) t3_read_reg(sc, A_PL_INT_ENABLE0);
2285         taskqueue_enqueue(sc->tq, &sc->slow_intr_task);
2286 }
2287
2288 static void
2289 link_check_callout(void *arg)
2290 {
2291         struct port_info *pi = arg;
2292         struct adapter *sc = pi->adapter;
2293
2294         if (!isset(&sc->open_device_map, pi->port_id))
2295                 return;
2296
2297         taskqueue_enqueue(sc->tq, &pi->link_check_task);
2298 }
2299
2300 static void
2301 check_link_status(void *arg, int pending)
2302 {
2303         struct port_info *pi = arg;
2304         struct adapter *sc = pi->adapter;
2305
2306         if (!isset(&sc->open_device_map, pi->port_id))
2307                 return;
2308
2309         t3_link_changed(sc, pi->port_id);
2310
2311         if (pi->link_fault || !(pi->phy.caps & SUPPORTED_LINK_IRQ))
2312                 callout_reset(&pi->link_check_ch, hz, link_check_callout, pi);
2313 }
2314
2315 void
2316 t3_os_link_intr(struct port_info *pi)
2317 {
2318         /*
2319          * Schedule a link check in the near future.  If the link is flapping
2320          * rapidly we'll keep resetting the callout and delaying the check until
2321          * things stabilize a bit.
2322          */
2323         callout_reset(&pi->link_check_ch, hz / 4, link_check_callout, pi);
2324 }
2325
2326 static void
2327 check_t3b2_mac(struct adapter *sc)
2328 {
2329         int i;
2330
2331         if (sc->flags & CXGB_SHUTDOWN)
2332                 return;
2333
2334         for_each_port(sc, i) {
2335                 struct port_info *p = &sc->port[i];
2336                 int status;
2337 #ifdef INVARIANTS
2338                 struct ifnet *ifp = p->ifp;
2339 #endif          
2340
2341                 if (!isset(&sc->open_device_map, p->port_id) || p->link_fault ||
2342                     !p->link_config.link_ok)
2343                         continue;
2344
2345                 KASSERT(ifp->if_drv_flags & IFF_DRV_RUNNING,
2346                         ("%s: state mismatch (drv_flags %x, device_map %x)",
2347                          __func__, ifp->if_drv_flags, sc->open_device_map));
2348
2349                 PORT_LOCK(p);
2350                 status = t3b2_mac_watchdog_task(&p->mac);
2351                 if (status == 1)
2352                         p->mac.stats.num_toggled++;
2353                 else if (status == 2) {
2354                         struct cmac *mac = &p->mac;
2355
2356                         cxgb_update_mac_settings(p);
2357                         t3_link_start(&p->phy, mac, &p->link_config);
2358                         t3_mac_enable(mac, MAC_DIRECTION_RX | MAC_DIRECTION_TX);
2359                         t3_port_intr_enable(sc, p->port_id);
2360                         p->mac.stats.num_resets++;
2361                 }
2362                 PORT_UNLOCK(p);
2363         }
2364 }
2365
2366 static void
2367 cxgb_tick(void *arg)
2368 {
2369         adapter_t *sc = (adapter_t *)arg;
2370
2371         if (sc->flags & CXGB_SHUTDOWN)
2372                 return;
2373
2374         taskqueue_enqueue(sc->tq, &sc->tick_task);      
2375         callout_reset(&sc->cxgb_tick_ch, hz, cxgb_tick, sc);
2376 }
2377
2378 static void
2379 cxgb_tick_handler(void *arg, int count)
2380 {
2381         adapter_t *sc = (adapter_t *)arg;
2382         const struct adapter_params *p = &sc->params;
2383         int i;
2384         uint32_t cause, reset;
2385
2386         if (sc->flags & CXGB_SHUTDOWN || !(sc->flags & FULL_INIT_DONE))
2387                 return;
2388
2389         if (p->rev == T3_REV_B2 && p->nports < 4 && sc->open_device_map) 
2390                 check_t3b2_mac(sc);
2391
2392         cause = t3_read_reg(sc, A_SG_INT_CAUSE) & (F_RSPQSTARVE | F_FLEMPTY);
2393         if (cause) {
2394                 struct sge_qset *qs = &sc->sge.qs[0];
2395                 uint32_t mask, v;
2396
2397                 v = t3_read_reg(sc, A_SG_RSPQ_FL_STATUS) & ~0xff00;
2398
2399                 mask = 1;
2400                 for (i = 0; i < SGE_QSETS; i++) {
2401                         if (v & mask)
2402                                 qs[i].rspq.starved++;
2403                         mask <<= 1;
2404                 }
2405
2406                 mask <<= SGE_QSETS; /* skip RSPQXDISABLED */
2407
2408                 for (i = 0; i < SGE_QSETS * 2; i++) {
2409                         if (v & mask) {
2410                                 qs[i / 2].fl[i % 2].empty++;
2411                         }
2412                         mask <<= 1;
2413                 }
2414
2415                 /* clear */
2416                 t3_write_reg(sc, A_SG_RSPQ_FL_STATUS, v);
2417                 t3_write_reg(sc, A_SG_INT_CAUSE, cause);
2418         }
2419
2420         for (i = 0; i < sc->params.nports; i++) {
2421                 struct port_info *pi = &sc->port[i];
2422                 struct ifnet *ifp = pi->ifp;
2423                 struct cmac *mac = &pi->mac;
2424                 struct mac_stats *mstats = &mac->stats;
2425                 int drops, j;
2426
2427                 if (!isset(&sc->open_device_map, pi->port_id))
2428                         continue;
2429
2430                 PORT_LOCK(pi);
2431                 t3_mac_update_stats(mac);
2432                 PORT_UNLOCK(pi);
2433
2434                 ifp->if_opackets = mstats->tx_frames;
2435                 ifp->if_ipackets = mstats->rx_frames;
2436                 ifp->if_obytes = mstats->tx_octets;
2437                 ifp->if_ibytes = mstats->rx_octets;
2438                 ifp->if_omcasts = mstats->tx_mcast_frames;
2439                 ifp->if_imcasts = mstats->rx_mcast_frames;
2440                 ifp->if_collisions = mstats->tx_total_collisions;
2441                 ifp->if_iqdrops = mstats->rx_cong_drops;
2442
2443                 drops = 0;
2444                 for (j = pi->first_qset; j < pi->first_qset + pi->nqsets; j++)
2445                         drops += sc->sge.qs[j].txq[TXQ_ETH].txq_mr->br_drops;
2446                 ifp->if_snd.ifq_drops = drops;
2447
2448                 ifp->if_oerrors =
2449                     mstats->tx_excess_collisions +
2450                     mstats->tx_underrun +
2451                     mstats->tx_len_errs +
2452                     mstats->tx_mac_internal_errs +
2453                     mstats->tx_excess_deferral +
2454                     mstats->tx_fcs_errs;
2455                 ifp->if_ierrors =
2456                     mstats->rx_jabber +
2457                     mstats->rx_data_errs +
2458                     mstats->rx_sequence_errs +
2459                     mstats->rx_runt + 
2460                     mstats->rx_too_long +
2461                     mstats->rx_mac_internal_errs +
2462                     mstats->rx_short +
2463                     mstats->rx_fcs_errs;
2464
2465                 if (mac->multiport)
2466                         continue;
2467
2468                 /* Count rx fifo overflows, once per second */
2469                 cause = t3_read_reg(sc, A_XGM_INT_CAUSE + mac->offset);
2470                 reset = 0;
2471                 if (cause & F_RXFIFO_OVERFLOW) {
2472                         mac->stats.rx_fifo_ovfl++;
2473                         reset |= F_RXFIFO_OVERFLOW;
2474                 }
2475                 t3_write_reg(sc, A_XGM_INT_CAUSE + mac->offset, reset);
2476         }
2477 }
2478
2479 static void
2480 touch_bars(device_t dev)
2481 {
2482         /*
2483          * Don't enable yet
2484          */
2485 #if !defined(__LP64__) && 0
2486         u32 v;
2487
2488         pci_read_config_dword(pdev, PCI_BASE_ADDRESS_1, &v);
2489         pci_write_config_dword(pdev, PCI_BASE_ADDRESS_1, v);
2490         pci_read_config_dword(pdev, PCI_BASE_ADDRESS_3, &v);
2491         pci_write_config_dword(pdev, PCI_BASE_ADDRESS_3, v);
2492         pci_read_config_dword(pdev, PCI_BASE_ADDRESS_5, &v);
2493         pci_write_config_dword(pdev, PCI_BASE_ADDRESS_5, v);
2494 #endif
2495 }
2496
2497 static int
2498 set_eeprom(struct port_info *pi, const uint8_t *data, int len, int offset)
2499 {
2500         uint8_t *buf;
2501         int err = 0;
2502         u32 aligned_offset, aligned_len, *p;
2503         struct adapter *adapter = pi->adapter;
2504
2505
2506         aligned_offset = offset & ~3;
2507         aligned_len = (len + (offset & 3) + 3) & ~3;
2508
2509         if (aligned_offset != offset || aligned_len != len) {
2510                 buf = malloc(aligned_len, M_DEVBUF, M_WAITOK|M_ZERO);              
2511                 if (!buf)
2512                         return (ENOMEM);
2513                 err = t3_seeprom_read(adapter, aligned_offset, (u32 *)buf);
2514                 if (!err && aligned_len > 4)
2515                         err = t3_seeprom_read(adapter,
2516                                               aligned_offset + aligned_len - 4,
2517                                               (u32 *)&buf[aligned_len - 4]);
2518                 if (err)
2519                         goto out;
2520                 memcpy(buf + (offset & 3), data, len);
2521         } else
2522                 buf = (uint8_t *)(uintptr_t)data;
2523
2524         err = t3_seeprom_wp(adapter, 0);
2525         if (err)
2526                 goto out;
2527
2528         for (p = (u32 *)buf; !err && aligned_len; aligned_len -= 4, p++) {
2529                 err = t3_seeprom_write(adapter, aligned_offset, *p);
2530                 aligned_offset += 4;
2531         }
2532
2533         if (!err)
2534                 err = t3_seeprom_wp(adapter, 1);
2535 out:
2536         if (buf != data)
2537                 free(buf, M_DEVBUF);
2538         return err;
2539 }
2540
2541
2542 static int
2543 in_range(int val, int lo, int hi)
2544 {
2545         return val < 0 || (val <= hi && val >= lo);
2546 }
2547
2548 static int
2549 cxgb_extension_open(struct cdev *dev, int flags, int fmp, struct thread *td)
2550 {
2551        return (0);
2552 }
2553
2554 static int
2555 cxgb_extension_close(struct cdev *dev, int flags, int fmt, struct thread *td)
2556 {
2557        return (0);
2558 }
2559
2560 static int
2561 cxgb_extension_ioctl(struct cdev *dev, unsigned long cmd, caddr_t data,
2562     int fflag, struct thread *td)
2563 {
2564         int mmd, error = 0;
2565         struct port_info *pi = dev->si_drv1;
2566         adapter_t *sc = pi->adapter;
2567
2568 #ifdef PRIV_SUPPORTED   
2569         if (priv_check(td, PRIV_DRIVER)) {
2570                 if (cxgb_debug) 
2571                         printf("user does not have access to privileged ioctls\n");
2572                 return (EPERM);
2573         }
2574 #else
2575         if (suser(td)) {
2576                 if (cxgb_debug)
2577                         printf("user does not have access to privileged ioctls\n");
2578                 return (EPERM);
2579         }
2580 #endif
2581         
2582         switch (cmd) {
2583         case CHELSIO_GET_MIIREG: {
2584                 uint32_t val;
2585                 struct cphy *phy = &pi->phy;
2586                 struct ch_mii_data *mid = (struct ch_mii_data *)data;
2587                 
2588                 if (!phy->mdio_read)
2589                         return (EOPNOTSUPP);
2590                 if (is_10G(sc)) {
2591                         mmd = mid->phy_id >> 8;
2592                         if (!mmd)
2593                                 mmd = MDIO_DEV_PCS;
2594                         else if (mmd > MDIO_DEV_VEND2)
2595                                 return (EINVAL);
2596
2597                         error = phy->mdio_read(sc, mid->phy_id & 0x1f, mmd,
2598                                              mid->reg_num, &val);
2599                 } else
2600                         error = phy->mdio_read(sc, mid->phy_id & 0x1f, 0,
2601                                              mid->reg_num & 0x1f, &val);
2602                 if (error == 0)
2603                         mid->val_out = val;
2604                 break;
2605         }
2606         case CHELSIO_SET_MIIREG: {
2607                 struct cphy *phy = &pi->phy;
2608                 struct ch_mii_data *mid = (struct ch_mii_data *)data;
2609
2610                 if (!phy->mdio_write)
2611                         return (EOPNOTSUPP);
2612                 if (is_10G(sc)) {
2613                         mmd = mid->phy_id >> 8;
2614                         if (!mmd)
2615                                 mmd = MDIO_DEV_PCS;
2616                         else if (mmd > MDIO_DEV_VEND2)
2617                                 return (EINVAL);
2618                         
2619                         error = phy->mdio_write(sc, mid->phy_id & 0x1f,
2620                                               mmd, mid->reg_num, mid->val_in);
2621                 } else
2622                         error = phy->mdio_write(sc, mid->phy_id & 0x1f, 0,
2623                                               mid->reg_num & 0x1f,
2624                                               mid->val_in);
2625                 break;
2626         }
2627         case CHELSIO_SETREG: {
2628                 struct ch_reg *edata = (struct ch_reg *)data;
2629                 if ((edata->addr & 0x3) != 0 || edata->addr >= sc->mmio_len)
2630                         return (EFAULT);
2631                 t3_write_reg(sc, edata->addr, edata->val);
2632                 break;
2633         }
2634         case CHELSIO_GETREG: {
2635                 struct ch_reg *edata = (struct ch_reg *)data;
2636                 if ((edata->addr & 0x3) != 0 || edata->addr >= sc->mmio_len)
2637                         return (EFAULT);
2638                 edata->val = t3_read_reg(sc, edata->addr);
2639                 break;
2640         }
2641         case CHELSIO_GET_SGE_CONTEXT: {
2642                 struct ch_cntxt *ecntxt = (struct ch_cntxt *)data;
2643                 mtx_lock_spin(&sc->sge.reg_lock);
2644                 switch (ecntxt->cntxt_type) {
2645                 case CNTXT_TYPE_EGRESS:
2646                         error = -t3_sge_read_ecntxt(sc, ecntxt->cntxt_id,
2647                             ecntxt->data);
2648                         break;
2649                 case CNTXT_TYPE_FL:
2650                         error = -t3_sge_read_fl(sc, ecntxt->cntxt_id,
2651                             ecntxt->data);
2652                         break;
2653                 case CNTXT_TYPE_RSP:
2654                         error = -t3_sge_read_rspq(sc, ecntxt->cntxt_id,
2655                             ecntxt->data);
2656                         break;
2657                 case CNTXT_TYPE_CQ:
2658                         error = -t3_sge_read_cq(sc, ecntxt->cntxt_id,
2659                             ecntxt->data);
2660                         break;
2661                 default:
2662                         error = EINVAL;
2663                         break;
2664                 }
2665                 mtx_unlock_spin(&sc->sge.reg_lock);
2666                 break;
2667         }
2668         case CHELSIO_GET_SGE_DESC: {
2669                 struct ch_desc *edesc = (struct ch_desc *)data;
2670                 int ret;
2671                 if (edesc->queue_num >= SGE_QSETS * 6)
2672                         return (EINVAL);
2673                 ret = t3_get_desc(&sc->sge.qs[edesc->queue_num / 6],
2674                     edesc->queue_num % 6, edesc->idx, edesc->data);
2675                 if (ret < 0)
2676                         return (EINVAL);
2677                 edesc->size = ret;
2678                 break;
2679         }
2680         case CHELSIO_GET_QSET_PARAMS: {
2681                 struct qset_params *q;
2682                 struct ch_qset_params *t = (struct ch_qset_params *)data;
2683                 int q1 = pi->first_qset;
2684                 int nqsets = pi->nqsets;
2685                 int i;
2686
2687                 if (t->qset_idx >= nqsets)
2688                         return EINVAL;
2689
2690                 i = q1 + t->qset_idx;
2691                 q = &sc->params.sge.qset[i];
2692                 t->rspq_size   = q->rspq_size;
2693                 t->txq_size[0] = q->txq_size[0];
2694                 t->txq_size[1] = q->txq_size[1];
2695                 t->txq_size[2] = q->txq_size[2];
2696                 t->fl_size[0]  = q->fl_size;
2697                 t->fl_size[1]  = q->jumbo_size;
2698                 t->polling     = q->polling;
2699                 t->lro         = q->lro;
2700                 t->intr_lat    = q->coalesce_usecs;
2701                 t->cong_thres  = q->cong_thres;
2702                 t->qnum        = i;
2703
2704                 if ((sc->flags & FULL_INIT_DONE) == 0)
2705                         t->vector = 0;
2706                 else if (sc->flags & USING_MSIX)
2707                         t->vector = rman_get_start(sc->msix_irq_res[i]);
2708                 else
2709                         t->vector = rman_get_start(sc->irq_res);
2710
2711                 break;
2712         }
2713         case CHELSIO_GET_QSET_NUM: {
2714                 struct ch_reg *edata = (struct ch_reg *)data;
2715                 edata->val = pi->nqsets;
2716                 break;
2717         }
2718         case CHELSIO_LOAD_FW: {
2719                 uint8_t *fw_data;
2720                 uint32_t vers;
2721                 struct ch_mem_range *t = (struct ch_mem_range *)data;
2722
2723                 /*
2724                  * You're allowed to load a firmware only before FULL_INIT_DONE
2725                  *
2726                  * FW_UPTODATE is also set so the rest of the initialization
2727                  * will not overwrite what was loaded here.  This gives you the
2728                  * flexibility to load any firmware (and maybe shoot yourself in
2729                  * the foot).
2730                  */
2731
2732                 ADAPTER_LOCK(sc);
2733                 if (sc->open_device_map || sc->flags & FULL_INIT_DONE) {
2734                         ADAPTER_UNLOCK(sc);
2735                         return (EBUSY);
2736                 }
2737
2738                 fw_data = malloc(t->len, M_DEVBUF, M_NOWAIT);
2739                 if (!fw_data)
2740                         error = ENOMEM;
2741                 else
2742                         error = copyin(t->buf, fw_data, t->len);
2743
2744                 if (!error)
2745                         error = -t3_load_fw(sc, fw_data, t->len);
2746
2747                 if (t3_get_fw_version(sc, &vers) == 0) {
2748                         snprintf(&sc->fw_version[0], sizeof(sc->fw_version),
2749                             "%d.%d.%d", G_FW_VERSION_MAJOR(vers),
2750                             G_FW_VERSION_MINOR(vers), G_FW_VERSION_MICRO(vers));
2751                 }
2752
2753                 if (!error)
2754                         sc->flags |= FW_UPTODATE;
2755
2756                 free(fw_data, M_DEVBUF);
2757                 ADAPTER_UNLOCK(sc);
2758                 break;
2759         }
2760         case CHELSIO_LOAD_BOOT: {
2761                 uint8_t *boot_data;
2762                 struct ch_mem_range *t = (struct ch_mem_range *)data;
2763
2764                 boot_data = malloc(t->len, M_DEVBUF, M_NOWAIT);
2765                 if (!boot_data)
2766                         return ENOMEM;
2767
2768                 error = copyin(t->buf, boot_data, t->len);
2769                 if (!error)
2770                         error = -t3_load_boot(sc, boot_data, t->len);
2771
2772                 free(boot_data, M_DEVBUF);
2773                 break;
2774         }
2775         case CHELSIO_GET_PM: {
2776                 struct ch_pm *m = (struct ch_pm *)data;
2777                 struct tp_params *p = &sc->params.tp;
2778
2779                 if (!is_offload(sc))
2780                         return (EOPNOTSUPP);
2781
2782                 m->tx_pg_sz = p->tx_pg_size;
2783                 m->tx_num_pg = p->tx_num_pgs;
2784                 m->rx_pg_sz  = p->rx_pg_size;
2785                 m->rx_num_pg = p->rx_num_pgs;
2786                 m->pm_total  = p->pmtx_size + p->chan_rx_size * p->nchan;
2787
2788                 break;
2789         }
2790         case CHELSIO_SET_PM: {
2791                 struct ch_pm *m = (struct ch_pm *)data;
2792                 struct tp_params *p = &sc->params.tp;
2793
2794                 if (!is_offload(sc))
2795                         return (EOPNOTSUPP);
2796                 if (sc->flags & FULL_INIT_DONE)
2797                         return (EBUSY);
2798
2799                 if (!m->rx_pg_sz || (m->rx_pg_sz & (m->rx_pg_sz - 1)) ||
2800                     !m->tx_pg_sz || (m->tx_pg_sz & (m->tx_pg_sz - 1)))
2801                         return (EINVAL);        /* not power of 2 */
2802                 if (!(m->rx_pg_sz & 0x14000))
2803                         return (EINVAL);        /* not 16KB or 64KB */
2804                 if (!(m->tx_pg_sz & 0x1554000))
2805                         return (EINVAL);
2806                 if (m->tx_num_pg == -1)
2807                         m->tx_num_pg = p->tx_num_pgs;
2808                 if (m->rx_num_pg == -1)
2809                         m->rx_num_pg = p->rx_num_pgs;
2810                 if (m->tx_num_pg % 24 || m->rx_num_pg % 24)
2811                         return (EINVAL);
2812                 if (m->rx_num_pg * m->rx_pg_sz > p->chan_rx_size ||
2813                     m->tx_num_pg * m->tx_pg_sz > p->chan_tx_size)
2814                         return (EINVAL);
2815
2816                 p->rx_pg_size = m->rx_pg_sz;
2817                 p->tx_pg_size = m->tx_pg_sz;
2818                 p->rx_num_pgs = m->rx_num_pg;
2819                 p->tx_num_pgs = m->tx_num_pg;
2820                 break;
2821         }
2822         case CHELSIO_SETMTUTAB: {
2823                 struct ch_mtus *m = (struct ch_mtus *)data;
2824                 int i;
2825                 
2826                 if (!is_offload(sc))
2827                         return (EOPNOTSUPP);
2828                 if (offload_running(sc))
2829                         return (EBUSY);
2830                 if (m->nmtus != NMTUS)
2831                         return (EINVAL);
2832                 if (m->mtus[0] < 81)         /* accommodate SACK */
2833                         return (EINVAL);
2834                 
2835                 /*
2836                  * MTUs must be in ascending order
2837                  */
2838                 for (i = 1; i < NMTUS; ++i)
2839                         if (m->mtus[i] < m->mtus[i - 1])
2840                                 return (EINVAL);
2841
2842                 memcpy(sc->params.mtus, m->mtus, sizeof(sc->params.mtus));
2843                 break;
2844         }
2845         case CHELSIO_GETMTUTAB: {
2846                 struct ch_mtus *m = (struct ch_mtus *)data;
2847
2848                 if (!is_offload(sc))
2849                         return (EOPNOTSUPP);
2850
2851                 memcpy(m->mtus, sc->params.mtus, sizeof(m->mtus));
2852                 m->nmtus = NMTUS;
2853                 break;
2854         }
2855         case CHELSIO_GET_MEM: {
2856                 struct ch_mem_range *t = (struct ch_mem_range *)data;
2857                 struct mc7 *mem;
2858                 uint8_t *useraddr;
2859                 u64 buf[32];
2860
2861                 /*
2862                  * Use these to avoid modifying len/addr in the return
2863                  * struct
2864                  */
2865                 uint32_t len = t->len, addr = t->addr;
2866
2867                 if (!is_offload(sc))
2868                         return (EOPNOTSUPP);
2869                 if (!(sc->flags & FULL_INIT_DONE))
2870                         return (EIO);         /* need the memory controllers */
2871                 if ((addr & 0x7) || (len & 0x7))
2872                         return (EINVAL);
2873                 if (t->mem_id == MEM_CM)
2874                         mem = &sc->cm;
2875                 else if (t->mem_id == MEM_PMRX)
2876                         mem = &sc->pmrx;
2877                 else if (t->mem_id == MEM_PMTX)
2878                         mem = &sc->pmtx;
2879                 else
2880                         return (EINVAL);
2881
2882                 /*
2883                  * Version scheme:
2884                  * bits 0..9: chip version
2885                  * bits 10..15: chip revision
2886                  */
2887                 t->version = 3 | (sc->params.rev << 10);
2888                 
2889                 /*
2890                  * Read 256 bytes at a time as len can be large and we don't
2891                  * want to use huge intermediate buffers.
2892                  */
2893                 useraddr = (uint8_t *)t->buf; 
2894                 while (len) {
2895                         unsigned int chunk = min(len, sizeof(buf));
2896
2897                         error = t3_mc7_bd_read(mem, addr / 8, chunk / 8, buf);
2898                         if (error)
2899                                 return (-error);
2900                         if (copyout(buf, useraddr, chunk))
2901                                 return (EFAULT);
2902                         useraddr += chunk;
2903                         addr += chunk;
2904                         len -= chunk;
2905                 }
2906                 break;
2907         }
2908         case CHELSIO_READ_TCAM_WORD: {
2909                 struct ch_tcam_word *t = (struct ch_tcam_word *)data;
2910
2911                 if (!is_offload(sc))
2912                         return (EOPNOTSUPP);
2913                 if (!(sc->flags & FULL_INIT_DONE))
2914                         return (EIO);         /* need MC5 */            
2915                 return -t3_read_mc5_range(&sc->mc5, t->addr, 1, t->buf);
2916                 break;
2917         }
2918         case CHELSIO_SET_TRACE_FILTER: {
2919                 struct ch_trace *t = (struct ch_trace *)data;
2920                 const struct trace_params *tp;
2921
2922                 tp = (const struct trace_params *)&t->sip;
2923                 if (t->config_tx)
2924                         t3_config_trace_filter(sc, tp, 0, t->invert_match,
2925                                                t->trace_tx);
2926                 if (t->config_rx)
2927                         t3_config_trace_filter(sc, tp, 1, t->invert_match,
2928                                                t->trace_rx);
2929                 break;
2930         }
2931         case CHELSIO_SET_PKTSCHED: {
2932                 struct ch_pktsched_params *p = (struct ch_pktsched_params *)data;
2933                 if (sc->open_device_map == 0)
2934                         return (EAGAIN);
2935                 send_pktsched_cmd(sc, p->sched, p->idx, p->min, p->max,
2936                     p->binding);
2937                 break;
2938         }
2939         case CHELSIO_IFCONF_GETREGS: {
2940                 struct ch_ifconf_regs *regs = (struct ch_ifconf_regs *)data;
2941                 int reglen = cxgb_get_regs_len();
2942                 uint8_t *buf = malloc(reglen, M_DEVBUF, M_NOWAIT);
2943                 if (buf == NULL) {
2944                         return (ENOMEM);
2945                 }
2946                 if (regs->len > reglen)
2947                         regs->len = reglen;
2948                 else if (regs->len < reglen)
2949                         error = ENOBUFS;
2950
2951                 if (!error) {
2952                         cxgb_get_regs(sc, regs, buf);
2953                         error = copyout(buf, regs->data, reglen);
2954                 }
2955                 free(buf, M_DEVBUF);
2956
2957                 break;
2958         }
2959         case CHELSIO_SET_HW_SCHED: {
2960                 struct ch_hw_sched *t = (struct ch_hw_sched *)data;
2961                 unsigned int ticks_per_usec = core_ticks_per_usec(sc);
2962
2963                 if ((sc->flags & FULL_INIT_DONE) == 0)
2964                         return (EAGAIN);       /* need TP to be initialized */
2965                 if (t->sched >= NTX_SCHED || !in_range(t->mode, 0, 1) ||
2966                     !in_range(t->channel, 0, 1) ||
2967                     !in_range(t->kbps, 0, 10000000) ||
2968                     !in_range(t->class_ipg, 0, 10000 * 65535 / ticks_per_usec) ||
2969                     !in_range(t->flow_ipg, 0,
2970                               dack_ticks_to_usec(sc, 0x7ff)))
2971                         return (EINVAL);
2972
2973                 if (t->kbps >= 0) {
2974                         error = t3_config_sched(sc, t->kbps, t->sched);
2975                         if (error < 0)
2976                                 return (-error);
2977                 }
2978                 if (t->class_ipg >= 0)
2979                         t3_set_sched_ipg(sc, t->sched, t->class_ipg);
2980                 if (t->flow_ipg >= 0) {
2981                         t->flow_ipg *= 1000;     /* us -> ns */
2982                         t3_set_pace_tbl(sc, &t->flow_ipg, t->sched, 1);
2983                 }
2984                 if (t->mode >= 0) {
2985                         int bit = 1 << (S_TX_MOD_TIMER_MODE + t->sched);
2986
2987                         t3_set_reg_field(sc, A_TP_TX_MOD_QUEUE_REQ_MAP,
2988                                          bit, t->mode ? bit : 0);
2989                 }
2990                 if (t->channel >= 0)
2991                         t3_set_reg_field(sc, A_TP_TX_MOD_QUEUE_REQ_MAP,
2992                                          1 << t->sched, t->channel << t->sched);
2993                 break;
2994         }
2995         case CHELSIO_GET_EEPROM: {
2996                 int i;
2997                 struct ch_eeprom *e = (struct ch_eeprom *)data;
2998                 uint8_t *buf = malloc(EEPROMSIZE, M_DEVBUF, M_NOWAIT);
2999
3000                 if (buf == NULL) {
3001                         return (ENOMEM);
3002                 }
3003                 e->magic = EEPROM_MAGIC;
3004                 for (i = e->offset & ~3; !error && i < e->offset + e->len; i += 4)
3005                         error = -t3_seeprom_read(sc, i, (uint32_t *)&buf[i]);
3006
3007                 if (!error)
3008                         error = copyout(buf + e->offset, e->data, e->len);
3009
3010                 free(buf, M_DEVBUF);
3011                 break;
3012         }
3013         case CHELSIO_CLEAR_STATS: {
3014                 if (!(sc->flags & FULL_INIT_DONE))
3015                         return EAGAIN;
3016
3017                 PORT_LOCK(pi);
3018                 t3_mac_update_stats(&pi->mac);
3019                 memset(&pi->mac.stats, 0, sizeof(pi->mac.stats));
3020                 PORT_UNLOCK(pi);
3021                 break;
3022         }
3023         case CHELSIO_GET_UP_LA: {
3024                 struct ch_up_la *la = (struct ch_up_la *)data;
3025                 uint8_t *buf = malloc(LA_BUFSIZE, M_DEVBUF, M_NOWAIT);
3026                 if (buf == NULL) {
3027                         return (ENOMEM);
3028                 }
3029                 if (la->bufsize < LA_BUFSIZE)
3030                         error = ENOBUFS;
3031
3032                 if (!error)
3033                         error = -t3_get_up_la(sc, &la->stopped, &la->idx,
3034                                               &la->bufsize, buf);
3035                 if (!error)
3036                         error = copyout(buf, la->data, la->bufsize);
3037
3038                 free(buf, M_DEVBUF);
3039                 break;
3040         }
3041         case CHELSIO_GET_UP_IOQS: {
3042                 struct ch_up_ioqs *ioqs = (struct ch_up_ioqs *)data;
3043                 uint8_t *buf = malloc(IOQS_BUFSIZE, M_DEVBUF, M_NOWAIT);
3044                 uint32_t *v;
3045
3046                 if (buf == NULL) {
3047                         return (ENOMEM);
3048                 }
3049                 if (ioqs->bufsize < IOQS_BUFSIZE)
3050                         error = ENOBUFS;
3051
3052                 if (!error)
3053                         error = -t3_get_up_ioqs(sc, &ioqs->bufsize, buf);
3054
3055                 if (!error) {
3056                         v = (uint32_t *)buf;
3057
3058                         ioqs->ioq_rx_enable = *v++;
3059                         ioqs->ioq_tx_enable = *v++;
3060                         ioqs->ioq_rx_status = *v++;
3061                         ioqs->ioq_tx_status = *v++;
3062
3063                         error = copyout(v, ioqs->data, ioqs->bufsize);
3064                 }
3065
3066                 free(buf, M_DEVBUF);
3067                 break;
3068         }
3069         case CHELSIO_SET_FILTER: {
3070                 struct ch_filter *f = (struct ch_filter *)data;;
3071                 struct filter_info *p;
3072                 unsigned int nfilters = sc->params.mc5.nfilters;
3073
3074                 if (!is_offload(sc))
3075                         return (EOPNOTSUPP);    /* No TCAM */
3076                 if (!(sc->flags & FULL_INIT_DONE))
3077                         return (EAGAIN);        /* mc5 not setup yet */
3078                 if (nfilters == 0)
3079                         return (EBUSY);         /* TOE will use TCAM */
3080
3081                 /* sanity checks */
3082                 if (f->filter_id >= nfilters ||
3083                     (f->val.dip && f->mask.dip != 0xffffffff) ||
3084                     (f->val.sport && f->mask.sport != 0xffff) ||
3085                     (f->val.dport && f->mask.dport != 0xffff) ||
3086                     (f->val.vlan && f->mask.vlan != 0xfff) ||
3087                     (f->val.vlan_prio &&
3088                         f->mask.vlan_prio != FILTER_NO_VLAN_PRI) ||
3089                     (f->mac_addr_idx != 0xffff && f->mac_addr_idx > 15) ||
3090                     f->qset >= SGE_QSETS ||
3091                     sc->rrss_map[f->qset] >= RSS_TABLE_SIZE)
3092                         return (EINVAL);
3093
3094                 /* Was allocated with M_WAITOK */
3095                 KASSERT(sc->filters, ("filter table NULL\n"));
3096
3097                 p = &sc->filters[f->filter_id];
3098                 if (p->locked)
3099                         return (EPERM);
3100
3101                 bzero(p, sizeof(*p));
3102                 p->sip = f->val.sip;
3103                 p->sip_mask = f->mask.sip;
3104                 p->dip = f->val.dip;
3105                 p->sport = f->val.sport;
3106                 p->dport = f->val.dport;
3107                 p->vlan = f->mask.vlan ? f->val.vlan : 0xfff;
3108                 p->vlan_prio = f->mask.vlan_prio ? (f->val.vlan_prio & 6) :
3109                     FILTER_NO_VLAN_PRI;
3110                 p->mac_hit = f->mac_hit;
3111                 p->mac_vld = f->mac_addr_idx != 0xffff;
3112                 p->mac_idx = f->mac_addr_idx;
3113                 p->pkt_type = f->proto;
3114                 p->report_filter_id = f->want_filter_id;
3115                 p->pass = f->pass;
3116                 p->rss = f->rss;
3117                 p->qset = f->qset;
3118
3119                 error = set_filter(sc, f->filter_id, p);
3120                 if (error == 0)
3121                         p->valid = 1;
3122                 break;
3123         }
3124         case CHELSIO_DEL_FILTER: {
3125                 struct ch_filter *f = (struct ch_filter *)data;
3126                 struct filter_info *p;
3127                 unsigned int nfilters = sc->params.mc5.nfilters;
3128
3129                 if (!is_offload(sc))
3130                         return (EOPNOTSUPP);
3131                 if (!(sc->flags & FULL_INIT_DONE))
3132                         return (EAGAIN);
3133                 if (nfilters == 0 || sc->filters == NULL)
3134                         return (EINVAL);
3135                 if (f->filter_id >= nfilters)
3136                        return (EINVAL);
3137
3138                 p = &sc->filters[f->filter_id];
3139                 if (p->locked)
3140                         return (EPERM);
3141                 if (!p->valid)
3142                         return (EFAULT); /* Read "Bad address" as "Bad index" */
3143
3144                 bzero(p, sizeof(*p));
3145                 p->sip = p->sip_mask = 0xffffffff;
3146                 p->vlan = 0xfff;
3147                 p->vlan_prio = FILTER_NO_VLAN_PRI;
3148                 p->pkt_type = 1;
3149                 error = set_filter(sc, f->filter_id, p);
3150                 break;
3151         }
3152         case CHELSIO_GET_FILTER: {
3153                 struct ch_filter *f = (struct ch_filter *)data;
3154                 struct filter_info *p;
3155                 unsigned int i, nfilters = sc->params.mc5.nfilters;
3156
3157                 if (!is_offload(sc))
3158                         return (EOPNOTSUPP);
3159                 if (!(sc->flags & FULL_INIT_DONE))
3160                         return (EAGAIN);
3161                 if (nfilters == 0 || sc->filters == NULL)
3162                         return (EINVAL);
3163
3164                 i = f->filter_id == 0xffffffff ? 0 : f->filter_id + 1;
3165                 for (; i < nfilters; i++) {
3166                         p = &sc->filters[i];
3167                         if (!p->valid)
3168                                 continue;
3169
3170                         bzero(f, sizeof(*f));
3171
3172                         f->filter_id = i;
3173                         f->val.sip = p->sip;
3174                         f->mask.sip = p->sip_mask;
3175                         f->val.dip = p->dip;
3176                         f->mask.dip = p->dip ? 0xffffffff : 0;
3177                         f->val.sport = p->sport;
3178                         f->mask.sport = p->sport ? 0xffff : 0;
3179                         f->val.dport = p->dport;
3180                         f->mask.dport = p->dport ? 0xffff : 0;
3181                         f->val.vlan = p->vlan == 0xfff ? 0 : p->vlan;
3182                         f->mask.vlan = p->vlan == 0xfff ? 0 : 0xfff;
3183                         f->val.vlan_prio = p->vlan_prio == FILTER_NO_VLAN_PRI ?
3184                             0 : p->vlan_prio;
3185                         f->mask.vlan_prio = p->vlan_prio == FILTER_NO_VLAN_PRI ?
3186                             0 : FILTER_NO_VLAN_PRI;
3187                         f->mac_hit = p->mac_hit;
3188                         f->mac_addr_idx = p->mac_vld ? p->mac_idx : 0xffff;
3189                         f->proto = p->pkt_type;
3190                         f->want_filter_id = p->report_filter_id;
3191                         f->pass = p->pass;
3192                         f->rss = p->rss;
3193                         f->qset = p->qset;
3194
3195                         break;
3196                 }
3197                 
3198                 if (i == nfilters)
3199                         f->filter_id = 0xffffffff;
3200                 break;
3201         }
3202         default:
3203                 return (EOPNOTSUPP);
3204                 break;
3205         }
3206
3207         return (error);
3208 }
3209
3210 static __inline void
3211 reg_block_dump(struct adapter *ap, uint8_t *buf, unsigned int start,
3212     unsigned int end)
3213 {
3214         uint32_t *p = (uint32_t *)(buf + start);
3215
3216         for ( ; start <= end; start += sizeof(uint32_t))
3217                 *p++ = t3_read_reg(ap, start);
3218 }
3219
3220 #define T3_REGMAP_SIZE (3 * 1024)
3221 static int
3222 cxgb_get_regs_len(void)
3223 {
3224         return T3_REGMAP_SIZE;
3225 }
3226
3227 static void
3228 cxgb_get_regs(adapter_t *sc, struct ch_ifconf_regs *regs, uint8_t *buf)
3229 {           
3230         
3231         /*
3232          * Version scheme:
3233          * bits 0..9: chip version
3234          * bits 10..15: chip revision
3235          * bit 31: set for PCIe cards
3236          */
3237         regs->version = 3 | (sc->params.rev << 10) | (is_pcie(sc) << 31);
3238
3239         /*
3240          * We skip the MAC statistics registers because they are clear-on-read.
3241          * Also reading multi-register stats would need to synchronize with the
3242          * periodic mac stats accumulation.  Hard to justify the complexity.
3243          */
3244         memset(buf, 0, cxgb_get_regs_len());
3245         reg_block_dump(sc, buf, 0, A_SG_RSPQ_CREDIT_RETURN);
3246         reg_block_dump(sc, buf, A_SG_HI_DRB_HI_THRSH, A_ULPRX_PBL_ULIMIT);
3247         reg_block_dump(sc, buf, A_ULPTX_CONFIG, A_MPS_INT_CAUSE);
3248         reg_block_dump(sc, buf, A_CPL_SWITCH_CNTRL, A_CPL_MAP_TBL_DATA);
3249         reg_block_dump(sc, buf, A_SMB_GLOBAL_TIME_CFG, A_XGM_SERDES_STAT3);
3250         reg_block_dump(sc, buf, A_XGM_SERDES_STATUS0,
3251                        XGM_REG(A_XGM_SERDES_STAT3, 1));
3252         reg_block_dump(sc, buf, XGM_REG(A_XGM_SERDES_STATUS0, 1),
3253                        XGM_REG(A_XGM_RX_SPI4_SOP_EOP_CNT, 1));
3254 }
3255
3256 static int
3257 alloc_filters(struct adapter *sc)
3258 {
3259         struct filter_info *p;
3260         unsigned int nfilters = sc->params.mc5.nfilters;
3261
3262         if (nfilters == 0)
3263                 return (0);
3264
3265         p = malloc(sizeof(*p) * nfilters, M_DEVBUF, M_WAITOK | M_ZERO);
3266         sc->filters = p;
3267
3268         p = &sc->filters[nfilters - 1];
3269         p->vlan = 0xfff;
3270         p->vlan_prio = FILTER_NO_VLAN_PRI;
3271         p->pass = p->rss = p->valid = p->locked = 1;
3272
3273         return (0);
3274 }
3275
3276 static int
3277 setup_hw_filters(struct adapter *sc)
3278 {
3279         int i, rc;
3280         unsigned int nfilters = sc->params.mc5.nfilters;
3281
3282         if (!sc->filters)
3283                 return (0);
3284
3285         t3_enable_filters(sc);
3286
3287         for (i = rc = 0; i < nfilters && !rc; i++) {
3288                 if (sc->filters[i].locked)
3289                         rc = set_filter(sc, i, &sc->filters[i]);
3290         }
3291
3292         return (rc);
3293 }
3294
3295 static int
3296 set_filter(struct adapter *sc, int id, const struct filter_info *f)
3297 {
3298         int len;
3299         struct mbuf *m;
3300         struct ulp_txpkt *txpkt;
3301         struct work_request_hdr *wr;
3302         struct cpl_pass_open_req *oreq;
3303         struct cpl_set_tcb_field *sreq;
3304
3305         len = sizeof(*wr) + sizeof(*oreq) + 2 * sizeof(*sreq);
3306         KASSERT(len <= MHLEN, ("filter request too big for an mbuf"));
3307
3308         id += t3_mc5_size(&sc->mc5) - sc->params.mc5.nroutes -
3309               sc->params.mc5.nfilters;
3310
3311         m = m_gethdr(M_WAITOK, MT_DATA);
3312         m->m_len = m->m_pkthdr.len = len;
3313         bzero(mtod(m, char *), len);
3314
3315         wr = mtod(m, struct work_request_hdr *);
3316         wr->wrh_hi = htonl(V_WR_OP(FW_WROPCODE_BYPASS) | F_WR_ATOMIC);
3317
3318         oreq = (struct cpl_pass_open_req *)(wr + 1);
3319         txpkt = (struct ulp_txpkt *)oreq;
3320         txpkt->cmd_dest = htonl(V_ULPTX_CMD(ULP_TXPKT));
3321         txpkt->len = htonl(V_ULPTX_NFLITS(sizeof(*oreq) / 8));
3322         OPCODE_TID(oreq) = htonl(MK_OPCODE_TID(CPL_PASS_OPEN_REQ, id));
3323         oreq->local_port = htons(f->dport);
3324         oreq->peer_port = htons(f->sport);
3325         oreq->local_ip = htonl(f->dip);
3326         oreq->peer_ip = htonl(f->sip);
3327         oreq->peer_netmask = htonl(f->sip_mask);
3328         oreq->opt0h = 0;
3329         oreq->opt0l = htonl(F_NO_OFFLOAD);
3330         oreq->opt1 = htonl(V_MAC_MATCH_VALID(f->mac_vld) |
3331                          V_CONN_POLICY(CPL_CONN_POLICY_FILTER) |
3332                          V_VLAN_PRI(f->vlan_prio >> 1) |
3333                          V_VLAN_PRI_VALID(f->vlan_prio != FILTER_NO_VLAN_PRI) |
3334                          V_PKT_TYPE(f->pkt_type) | V_OPT1_VLAN(f->vlan) |
3335                          V_MAC_MATCH(f->mac_idx | (f->mac_hit << 4)));
3336
3337         sreq = (struct cpl_set_tcb_field *)(oreq + 1);
3338         set_tcb_field_ulp(sreq, id, 1, 0x1800808000ULL,
3339                           (f->report_filter_id << 15) | (1 << 23) |
3340                           ((u64)f->pass << 35) | ((u64)!f->rss << 36));
3341         set_tcb_field_ulp(sreq + 1, id, 0, 0xffffffff, (2 << 19) | 1);
3342         t3_mgmt_tx(sc, m);
3343
3344         if (f->pass && !f->rss) {
3345                 len = sizeof(*sreq);
3346                 m = m_gethdr(M_WAITOK, MT_DATA);
3347                 m->m_len = m->m_pkthdr.len = len;
3348                 bzero(mtod(m, char *), len);
3349                 sreq = mtod(m, struct cpl_set_tcb_field *);
3350                 sreq->wr.wrh_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD));
3351                 mk_set_tcb_field(sreq, id, 25, 0x3f80000,
3352                                  (u64)sc->rrss_map[f->qset] << 19);
3353                 t3_mgmt_tx(sc, m);
3354         }
3355         return 0;
3356 }
3357
3358 static inline void
3359 mk_set_tcb_field(struct cpl_set_tcb_field *req, unsigned int tid,
3360     unsigned int word, u64 mask, u64 val)
3361 {
3362         OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_SET_TCB_FIELD, tid));
3363         req->reply = V_NO_REPLY(1);
3364         req->cpu_idx = 0;
3365         req->word = htons(word);
3366         req->mask = htobe64(mask);
3367         req->val = htobe64(val);
3368 }
3369
3370 static inline void
3371 set_tcb_field_ulp(struct cpl_set_tcb_field *req, unsigned int tid,
3372     unsigned int word, u64 mask, u64 val)
3373 {
3374         struct ulp_txpkt *txpkt = (struct ulp_txpkt *)req;
3375
3376         txpkt->cmd_dest = htonl(V_ULPTX_CMD(ULP_TXPKT));
3377         txpkt->len = htonl(V_ULPTX_NFLITS(sizeof(*req) / 8));
3378         mk_set_tcb_field(req, tid, word, mask, val);
3379 }