]> CyberLeo.Net >> Repos - FreeBSD/stable/8.git/blob - sys/dev/cxgb/cxgb_main.c
MFC 210505:
[FreeBSD/stable/8.git] / sys / dev / cxgb / cxgb_main.c
1 /**************************************************************************
2
3 Copyright (c) 2007-2009, Chelsio Inc.
4 All rights reserved.
5
6 Redistribution and use in source and binary forms, with or without
7 modification, are permitted provided that the following conditions are met:
8
9  1. Redistributions of source code must retain the above copyright notice,
10     this list of conditions and the following disclaimer.
11
12  2. Neither the name of the Chelsio Corporation nor the names of its
13     contributors may be used to endorse or promote products derived from
14     this software without specific prior written permission.
15
16 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
17 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
20 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
21 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
22 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
23 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
24 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
25 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
26 POSSIBILITY OF SUCH DAMAGE.
27
28 ***************************************************************************/
29
30 #include <sys/cdefs.h>
31 __FBSDID("$FreeBSD$");
32
33 #include <sys/param.h>
34 #include <sys/systm.h>
35 #include <sys/kernel.h>
36 #include <sys/bus.h>
37 #include <sys/module.h>
38 #include <sys/pciio.h>
39 #include <sys/conf.h>
40 #include <machine/bus.h>
41 #include <machine/resource.h>
42 #include <sys/bus_dma.h>
43 #include <sys/ktr.h>
44 #include <sys/rman.h>
45 #include <sys/ioccom.h>
46 #include <sys/mbuf.h>
47 #include <sys/linker.h>
48 #include <sys/firmware.h>
49 #include <sys/socket.h>
50 #include <sys/sockio.h>
51 #include <sys/smp.h>
52 #include <sys/sysctl.h>
53 #include <sys/syslog.h>
54 #include <sys/queue.h>
55 #include <sys/taskqueue.h>
56 #include <sys/proc.h>
57
58 #include <net/bpf.h>
59 #include <net/ethernet.h>
60 #include <net/if.h>
61 #include <net/if_arp.h>
62 #include <net/if_dl.h>
63 #include <net/if_media.h>
64 #include <net/if_types.h>
65 #include <net/if_vlan_var.h>
66
67 #include <netinet/in_systm.h>
68 #include <netinet/in.h>
69 #include <netinet/if_ether.h>
70 #include <netinet/ip.h>
71 #include <netinet/ip.h>
72 #include <netinet/tcp.h>
73 #include <netinet/udp.h>
74
75 #include <dev/pci/pcireg.h>
76 #include <dev/pci/pcivar.h>
77 #include <dev/pci/pci_private.h>
78
79 #include <cxgb_include.h>
80
81 #ifdef PRIV_SUPPORTED
82 #include <sys/priv.h>
83 #endif
84
85 static int cxgb_setup_interrupts(adapter_t *);
86 static void cxgb_teardown_interrupts(adapter_t *);
87 static void cxgb_init(void *);
88 static int cxgb_init_locked(struct port_info *);
89 static int cxgb_uninit_locked(struct port_info *);
90 static int cxgb_uninit_synchronized(struct port_info *);
91 static int cxgb_ioctl(struct ifnet *, unsigned long, caddr_t);
92 static int cxgb_media_change(struct ifnet *);
93 static int cxgb_ifm_type(int);
94 static void cxgb_build_medialist(struct port_info *);
95 static void cxgb_media_status(struct ifnet *, struct ifmediareq *);
96 static int setup_sge_qsets(adapter_t *);
97 static void cxgb_async_intr(void *);
98 static void cxgb_ext_intr_handler(void *, int);
99 static void cxgb_tick_handler(void *, int);
100 static void cxgb_tick(void *);
101 static void setup_rss(adapter_t *sc);
102 static int alloc_filters(struct adapter *);
103 static int setup_hw_filters(struct adapter *);
104 static int set_filter(struct adapter *, int, const struct filter_info *);
105 static inline void mk_set_tcb_field(struct cpl_set_tcb_field *, unsigned int,
106     unsigned int, u64, u64);
107 static inline void set_tcb_field_ulp(struct cpl_set_tcb_field *, unsigned int,
108     unsigned int, u64, u64);
109
110 /* Attachment glue for the PCI controller end of the device.  Each port of
111  * the device is attached separately, as defined later.
112  */
113 static int cxgb_controller_probe(device_t);
114 static int cxgb_controller_attach(device_t);
115 static int cxgb_controller_detach(device_t);
116 static void cxgb_free(struct adapter *);
117 static __inline void reg_block_dump(struct adapter *ap, uint8_t *buf, unsigned int start,
118     unsigned int end);
119 static void cxgb_get_regs(adapter_t *sc, struct ch_ifconf_regs *regs, uint8_t *buf);
120 static int cxgb_get_regs_len(void);
121 static int offload_open(struct port_info *pi);
122 static void touch_bars(device_t dev);
123 static int offload_close(struct t3cdev *tdev);
124 static void cxgb_update_mac_settings(struct port_info *p);
125
126 static device_method_t cxgb_controller_methods[] = {
127         DEVMETHOD(device_probe,         cxgb_controller_probe),
128         DEVMETHOD(device_attach,        cxgb_controller_attach),
129         DEVMETHOD(device_detach,        cxgb_controller_detach),
130
131         /* bus interface */
132         DEVMETHOD(bus_print_child,      bus_generic_print_child),
133         DEVMETHOD(bus_driver_added,     bus_generic_driver_added),
134
135         { 0, 0 }
136 };
137
138 static driver_t cxgb_controller_driver = {
139         "cxgbc",
140         cxgb_controller_methods,
141         sizeof(struct adapter)
142 };
143
144 static devclass_t       cxgb_controller_devclass;
145 DRIVER_MODULE(cxgbc, pci, cxgb_controller_driver, cxgb_controller_devclass, 0, 0);
146
147 /*
148  * Attachment glue for the ports.  Attachment is done directly to the
149  * controller device.
150  */
151 static int cxgb_port_probe(device_t);
152 static int cxgb_port_attach(device_t);
153 static int cxgb_port_detach(device_t);
154
155 static device_method_t cxgb_port_methods[] = {
156         DEVMETHOD(device_probe,         cxgb_port_probe),
157         DEVMETHOD(device_attach,        cxgb_port_attach),
158         DEVMETHOD(device_detach,        cxgb_port_detach),
159         { 0, 0 }
160 };
161
162 static driver_t cxgb_port_driver = {
163         "cxgb",
164         cxgb_port_methods,
165         0
166 };
167
168 static d_ioctl_t cxgb_extension_ioctl;
169 static d_open_t cxgb_extension_open;
170 static d_close_t cxgb_extension_close;
171
172 static struct cdevsw cxgb_cdevsw = {
173        .d_version =    D_VERSION,
174        .d_flags =      0,
175        .d_open =       cxgb_extension_open,
176        .d_close =      cxgb_extension_close,
177        .d_ioctl =      cxgb_extension_ioctl,
178        .d_name =       "cxgb",
179 };
180
181 static devclass_t       cxgb_port_devclass;
182 DRIVER_MODULE(cxgb, cxgbc, cxgb_port_driver, cxgb_port_devclass, 0, 0);
183
184 /*
185  * The driver uses the best interrupt scheme available on a platform in the
186  * order MSI-X, MSI, legacy pin interrupts.  This parameter determines which
187  * of these schemes the driver may consider as follows:
188  *
189  * msi = 2: choose from among all three options
190  * msi = 1 : only consider MSI and pin interrupts
191  * msi = 0: force pin interrupts
192  */
193 static int msi_allowed = 2;
194
195 TUNABLE_INT("hw.cxgb.msi_allowed", &msi_allowed);
196 SYSCTL_NODE(_hw, OID_AUTO, cxgb, CTLFLAG_RD, 0, "CXGB driver parameters");
197 SYSCTL_UINT(_hw_cxgb, OID_AUTO, msi_allowed, CTLFLAG_RDTUN, &msi_allowed, 0,
198     "MSI-X, MSI, INTx selector");
199
200 /*
201  * The driver enables offload as a default.
202  * To disable it, use ofld_disable = 1.
203  */
204 static int ofld_disable = 0;
205 TUNABLE_INT("hw.cxgb.ofld_disable", &ofld_disable);
206 SYSCTL_UINT(_hw_cxgb, OID_AUTO, ofld_disable, CTLFLAG_RDTUN, &ofld_disable, 0,
207     "disable ULP offload");
208
209 /*
210  * The driver uses an auto-queue algorithm by default.
211  * To disable it and force a single queue-set per port, use multiq = 0
212  */
213 static int multiq = 1;
214 TUNABLE_INT("hw.cxgb.multiq", &multiq);
215 SYSCTL_UINT(_hw_cxgb, OID_AUTO, multiq, CTLFLAG_RDTUN, &multiq, 0,
216     "use min(ncpus/ports, 8) queue-sets per port");
217
218 /*
219  * By default the driver will not update the firmware unless
220  * it was compiled against a newer version
221  * 
222  */
223 static int force_fw_update = 0;
224 TUNABLE_INT("hw.cxgb.force_fw_update", &force_fw_update);
225 SYSCTL_UINT(_hw_cxgb, OID_AUTO, force_fw_update, CTLFLAG_RDTUN, &force_fw_update, 0,
226     "update firmware even if up to date");
227
228 int cxgb_use_16k_clusters = -1;
229 TUNABLE_INT("hw.cxgb.use_16k_clusters", &cxgb_use_16k_clusters);
230 SYSCTL_INT(_hw_cxgb, OID_AUTO, use_16k_clusters, CTLFLAG_RDTUN,
231     &cxgb_use_16k_clusters, 0, "use 16kB clusters for the jumbo queue ");
232
233 /*
234  * Tune the size of the output queue.
235  */
236 int cxgb_snd_queue_len = IFQ_MAXLEN;
237 TUNABLE_INT("hw.cxgb.snd_queue_len", &cxgb_snd_queue_len);
238 SYSCTL_UINT(_hw_cxgb, OID_AUTO, snd_queue_len, CTLFLAG_RDTUN,
239     &cxgb_snd_queue_len, 0, "send queue size ");
240
241
242 enum {
243         MAX_TXQ_ENTRIES      = 16384,
244         MAX_CTRL_TXQ_ENTRIES = 1024,
245         MAX_RSPQ_ENTRIES     = 16384,
246         MAX_RX_BUFFERS       = 16384,
247         MAX_RX_JUMBO_BUFFERS = 16384,
248         MIN_TXQ_ENTRIES      = 4,
249         MIN_CTRL_TXQ_ENTRIES = 4,
250         MIN_RSPQ_ENTRIES     = 32,
251         MIN_FL_ENTRIES       = 32,
252         MIN_FL_JUMBO_ENTRIES = 32
253 };
254
255 struct filter_info {
256         u32 sip;
257         u32 sip_mask;
258         u32 dip;
259         u16 sport;
260         u16 dport;
261         u32 vlan:12;
262         u32 vlan_prio:3;
263         u32 mac_hit:1;
264         u32 mac_idx:4;
265         u32 mac_vld:1;
266         u32 pkt_type:2;
267         u32 report_filter_id:1;
268         u32 pass:1;
269         u32 rss:1;
270         u32 qset:3;
271         u32 locked:1;
272         u32 valid:1;
273 };
274
275 enum { FILTER_NO_VLAN_PRI = 7 };
276
277 #define EEPROM_MAGIC 0x38E2F10C
278
279 #define PORT_MASK ((1 << MAX_NPORTS) - 1)
280
281 /* Table for probing the cards.  The desc field isn't actually used */
282 struct cxgb_ident {
283         uint16_t        vendor;
284         uint16_t        device;
285         int             index;
286         char            *desc;
287 } cxgb_identifiers[] = {
288         {PCI_VENDOR_ID_CHELSIO, 0x0020, 0, "PE9000"},
289         {PCI_VENDOR_ID_CHELSIO, 0x0021, 1, "T302E"},
290         {PCI_VENDOR_ID_CHELSIO, 0x0022, 2, "T310E"},
291         {PCI_VENDOR_ID_CHELSIO, 0x0023, 3, "T320X"},
292         {PCI_VENDOR_ID_CHELSIO, 0x0024, 1, "T302X"},
293         {PCI_VENDOR_ID_CHELSIO, 0x0025, 3, "T320E"},
294         {PCI_VENDOR_ID_CHELSIO, 0x0026, 2, "T310X"},
295         {PCI_VENDOR_ID_CHELSIO, 0x0030, 2, "T3B10"},
296         {PCI_VENDOR_ID_CHELSIO, 0x0031, 3, "T3B20"},
297         {PCI_VENDOR_ID_CHELSIO, 0x0032, 1, "T3B02"},
298         {PCI_VENDOR_ID_CHELSIO, 0x0033, 4, "T3B04"},
299         {PCI_VENDOR_ID_CHELSIO, 0x0035, 6, "T3C10"},
300         {PCI_VENDOR_ID_CHELSIO, 0x0036, 3, "S320E-CR"},
301         {PCI_VENDOR_ID_CHELSIO, 0x0037, 7, "N320E-G2"},
302         {0, 0, 0, NULL}
303 };
304
305 static int set_eeprom(struct port_info *pi, const uint8_t *data, int len, int offset);
306
307
308 static __inline char
309 t3rev2char(struct adapter *adapter)
310 {
311         char rev = 'z';
312
313         switch(adapter->params.rev) {
314         case T3_REV_A:
315                 rev = 'a';
316                 break;
317         case T3_REV_B:
318         case T3_REV_B2:
319                 rev = 'b';
320                 break;
321         case T3_REV_C:
322                 rev = 'c';
323                 break;
324         }
325         return rev;
326 }
327
328 static struct cxgb_ident *
329 cxgb_get_ident(device_t dev)
330 {
331         struct cxgb_ident *id;
332
333         for (id = cxgb_identifiers; id->desc != NULL; id++) {
334                 if ((id->vendor == pci_get_vendor(dev)) &&
335                     (id->device == pci_get_device(dev))) {
336                         return (id);
337                 }
338         }
339         return (NULL);
340 }
341
342 static const struct adapter_info *
343 cxgb_get_adapter_info(device_t dev)
344 {
345         struct cxgb_ident *id;
346         const struct adapter_info *ai;
347
348         id = cxgb_get_ident(dev);
349         if (id == NULL)
350                 return (NULL);
351
352         ai = t3_get_adapter_info(id->index);
353
354         return (ai);
355 }
356
357 static int
358 cxgb_controller_probe(device_t dev)
359 {
360         const struct adapter_info *ai;
361         char *ports, buf[80];
362         int nports;
363
364         ai = cxgb_get_adapter_info(dev);
365         if (ai == NULL)
366                 return (ENXIO);
367
368         nports = ai->nports0 + ai->nports1;
369         if (nports == 1)
370                 ports = "port";
371         else
372                 ports = "ports";
373
374         snprintf(buf, sizeof(buf), "%s, %d %s", ai->desc, nports, ports);
375         device_set_desc_copy(dev, buf);
376         return (BUS_PROBE_DEFAULT);
377 }
378
379 #define FW_FNAME "cxgb_t3fw"
380 #define TPEEPROM_NAME "cxgb_t3%c_tp_eeprom"
381 #define TPSRAM_NAME "cxgb_t3%c_protocol_sram"
382
383 static int
384 upgrade_fw(adapter_t *sc)
385 {
386         const struct firmware *fw;
387         int status;
388         u32 vers;
389         
390         if ((fw = firmware_get(FW_FNAME)) == NULL)  {
391                 device_printf(sc->dev, "Could not find firmware image %s\n", FW_FNAME);
392                 return (ENOENT);
393         } else
394                 device_printf(sc->dev, "installing firmware on card\n");
395         status = t3_load_fw(sc, (const uint8_t *)fw->data, fw->datasize);
396
397         if (status != 0) {
398                 device_printf(sc->dev, "failed to install firmware: %d\n",
399                     status);
400         } else {
401                 t3_get_fw_version(sc, &vers);
402                 snprintf(&sc->fw_version[0], sizeof(sc->fw_version), "%d.%d.%d",
403                     G_FW_VERSION_MAJOR(vers), G_FW_VERSION_MINOR(vers),
404                     G_FW_VERSION_MICRO(vers));
405         }
406
407         firmware_put(fw, FIRMWARE_UNLOAD);
408
409         return (status);        
410 }
411
412 /*
413  * The cxgb_controller_attach function is responsible for the initial
414  * bringup of the device.  Its responsibilities include:
415  *
416  *  1. Determine if the device supports MSI or MSI-X.
417  *  2. Allocate bus resources so that we can access the Base Address Register
418  *  3. Create and initialize mutexes for the controller and its control
419  *     logic such as SGE and MDIO.
420  *  4. Call hardware specific setup routine for the adapter as a whole.
421  *  5. Allocate the BAR for doing MSI-X.
422  *  6. Setup the line interrupt iff MSI-X is not supported.
423  *  7. Create the driver's taskq.
424  *  8. Start one task queue service thread.
425  *  9. Check if the firmware and SRAM are up-to-date.  They will be
426  *     auto-updated later (before FULL_INIT_DONE), if required.
427  * 10. Create a child device for each MAC (port)
428  * 11. Initialize T3 private state.
429  * 12. Trigger the LED
430  * 13. Setup offload iff supported.
431  * 14. Reset/restart the tick callout.
432  * 15. Attach sysctls
433  *
434  * NOTE: Any modification or deviation from this list MUST be reflected in
435  * the above comment.  Failure to do so will result in problems on various
436  * error conditions including link flapping.
437  */
438 static int
439 cxgb_controller_attach(device_t dev)
440 {
441         device_t child;
442         const struct adapter_info *ai;
443         struct adapter *sc;
444         int i, error = 0;
445         uint32_t vers;
446         int port_qsets = 1;
447         int msi_needed, reg;
448         char buf[80];
449
450         sc = device_get_softc(dev);
451         sc->dev = dev;
452         sc->msi_count = 0;
453         ai = cxgb_get_adapter_info(dev);
454
455         /* find the PCIe link width and set max read request to 4KB*/
456         if (pci_find_extcap(dev, PCIY_EXPRESS, &reg) == 0) {
457                 uint16_t lnk;
458
459                 lnk = pci_read_config(dev, reg + PCIR_EXPRESS_LINK_STA, 2);
460                 sc->link_width = (lnk & PCIM_LINK_STA_WIDTH) >> 4;
461                 if (sc->link_width < 8 &&
462                     (ai->caps & SUPPORTED_10000baseT_Full)) {
463                         device_printf(sc->dev,
464                             "PCIe x%d Link, expect reduced performance\n",
465                             sc->link_width);
466                 }
467
468                 pci_set_max_read_req(dev, 4096);
469         }
470
471         touch_bars(dev);
472         pci_enable_busmaster(dev);
473         /*
474          * Allocate the registers and make them available to the driver.
475          * The registers that we care about for NIC mode are in BAR 0
476          */
477         sc->regs_rid = PCIR_BAR(0);
478         if ((sc->regs_res = bus_alloc_resource_any(dev, SYS_RES_MEMORY,
479             &sc->regs_rid, RF_ACTIVE)) == NULL) {
480                 device_printf(dev, "Cannot allocate BAR region 0\n");
481                 return (ENXIO);
482         }
483         sc->udbs_rid = PCIR_BAR(2);
484         sc->udbs_res = NULL;
485         if (is_offload(sc) &&
486             ((sc->udbs_res = bus_alloc_resource_any(dev, SYS_RES_MEMORY,
487                    &sc->udbs_rid, RF_ACTIVE)) == NULL)) {
488                 device_printf(dev, "Cannot allocate BAR region 1\n");
489                 error = ENXIO;
490                 goto out;
491         }
492
493         snprintf(sc->lockbuf, ADAPTER_LOCK_NAME_LEN, "cxgb controller lock %d",
494             device_get_unit(dev));
495         ADAPTER_LOCK_INIT(sc, sc->lockbuf);
496
497         snprintf(sc->reglockbuf, ADAPTER_LOCK_NAME_LEN, "SGE reg lock %d",
498             device_get_unit(dev));
499         snprintf(sc->mdiolockbuf, ADAPTER_LOCK_NAME_LEN, "cxgb mdio lock %d",
500             device_get_unit(dev));
501         snprintf(sc->elmerlockbuf, ADAPTER_LOCK_NAME_LEN, "cxgb elmer lock %d",
502             device_get_unit(dev));
503         
504         MTX_INIT(&sc->sge.reg_lock, sc->reglockbuf, NULL, MTX_SPIN);
505         MTX_INIT(&sc->mdio_lock, sc->mdiolockbuf, NULL, MTX_DEF);
506         MTX_INIT(&sc->elmer_lock, sc->elmerlockbuf, NULL, MTX_DEF);
507         
508         sc->bt = rman_get_bustag(sc->regs_res);
509         sc->bh = rman_get_bushandle(sc->regs_res);
510         sc->mmio_len = rman_get_size(sc->regs_res);
511
512         for (i = 0; i < MAX_NPORTS; i++)
513                 sc->port[i].adapter = sc;
514
515         if (t3_prep_adapter(sc, ai, 1) < 0) {
516                 printf("prep adapter failed\n");
517                 error = ENODEV;
518                 goto out;
519         }
520         /* Allocate the BAR for doing MSI-X.  If it succeeds, try to allocate
521          * enough messages for the queue sets.  If that fails, try falling
522          * back to MSI.  If that fails, then try falling back to the legacy
523          * interrupt pin model.
524          */
525         sc->msix_regs_rid = 0x20;
526         if ((msi_allowed >= 2) &&
527             (sc->msix_regs_res = bus_alloc_resource_any(dev, SYS_RES_MEMORY,
528             &sc->msix_regs_rid, RF_ACTIVE)) != NULL) {
529
530                 if (multiq)
531                         port_qsets = min(SGE_QSETS/sc->params.nports, mp_ncpus);
532                 msi_needed = sc->msi_count = sc->params.nports * port_qsets + 1;
533
534                 if (pci_msix_count(dev) == 0 ||
535                     (error = pci_alloc_msix(dev, &sc->msi_count)) != 0 ||
536                     sc->msi_count != msi_needed) {
537                         device_printf(dev, "alloc msix failed - "
538                                       "msi_count=%d, msi_needed=%d, err=%d; "
539                                       "will try MSI\n", sc->msi_count,
540                                       msi_needed, error);
541                         sc->msi_count = 0;
542                         port_qsets = 1;
543                         pci_release_msi(dev);
544                         bus_release_resource(dev, SYS_RES_MEMORY,
545                             sc->msix_regs_rid, sc->msix_regs_res);
546                         sc->msix_regs_res = NULL;
547                 } else {
548                         sc->flags |= USING_MSIX;
549                         sc->cxgb_intr = cxgb_async_intr;
550                         device_printf(dev,
551                                       "using MSI-X interrupts (%u vectors)\n",
552                                       sc->msi_count);
553                 }
554         }
555
556         if ((msi_allowed >= 1) && (sc->msi_count == 0)) {
557                 sc->msi_count = 1;
558                 if ((error = pci_alloc_msi(dev, &sc->msi_count)) != 0) {
559                         device_printf(dev, "alloc msi failed - "
560                                       "err=%d; will try INTx\n", error);
561                         sc->msi_count = 0;
562                         port_qsets = 1;
563                         pci_release_msi(dev);
564                 } else {
565                         sc->flags |= USING_MSI;
566                         sc->cxgb_intr = t3_intr_msi;
567                         device_printf(dev, "using MSI interrupts\n");
568                 }
569         }
570         if (sc->msi_count == 0) {
571                 device_printf(dev, "using line interrupts\n");
572                 sc->cxgb_intr = t3b_intr;
573         }
574
575         /* Create a private taskqueue thread for handling driver events */
576         sc->tq = taskqueue_create("cxgb_taskq", M_NOWAIT,
577             taskqueue_thread_enqueue, &sc->tq);
578         if (sc->tq == NULL) {
579                 device_printf(dev, "failed to allocate controller task queue\n");
580                 goto out;
581         }
582
583         taskqueue_start_threads(&sc->tq, 1, PI_NET, "%s taskq",
584             device_get_nameunit(dev));
585         TASK_INIT(&sc->ext_intr_task, 0, cxgb_ext_intr_handler, sc);
586         TASK_INIT(&sc->tick_task, 0, cxgb_tick_handler, sc);
587
588         
589         /* Create a periodic callout for checking adapter status */
590         callout_init(&sc->cxgb_tick_ch, TRUE);
591         
592         if (t3_check_fw_version(sc) < 0 || force_fw_update) {
593                 /*
594                  * Warn user that a firmware update will be attempted in init.
595                  */
596                 device_printf(dev, "firmware needs to be updated to version %d.%d.%d\n",
597                     FW_VERSION_MAJOR, FW_VERSION_MINOR, FW_VERSION_MICRO);
598                 sc->flags &= ~FW_UPTODATE;
599         } else {
600                 sc->flags |= FW_UPTODATE;
601         }
602
603         if (t3_check_tpsram_version(sc) < 0) {
604                 /*
605                  * Warn user that a firmware update will be attempted in init.
606                  */
607                 device_printf(dev, "SRAM needs to be updated to version %c-%d.%d.%d\n",
608                     t3rev2char(sc), TP_VERSION_MAJOR, TP_VERSION_MINOR, TP_VERSION_MICRO);
609                 sc->flags &= ~TPS_UPTODATE;
610         } else {
611                 sc->flags |= TPS_UPTODATE;
612         }
613         
614         /*
615          * Create a child device for each MAC.  The ethernet attachment
616          * will be done in these children.
617          */     
618         for (i = 0; i < (sc)->params.nports; i++) {
619                 struct port_info *pi;
620                 
621                 if ((child = device_add_child(dev, "cxgb", -1)) == NULL) {
622                         device_printf(dev, "failed to add child port\n");
623                         error = EINVAL;
624                         goto out;
625                 }
626                 pi = &sc->port[i];
627                 pi->adapter = sc;
628                 pi->nqsets = port_qsets;
629                 pi->first_qset = i*port_qsets;
630                 pi->port_id = i;
631                 pi->tx_chan = i >= ai->nports0;
632                 pi->txpkt_intf = pi->tx_chan ? 2 * (i - ai->nports0) + 1 : 2 * i;
633                 sc->rxpkt_map[pi->txpkt_intf] = i;
634                 sc->port[i].tx_chan = i >= ai->nports0;
635                 sc->portdev[i] = child;
636                 device_set_softc(child, pi);
637         }
638         if ((error = bus_generic_attach(dev)) != 0)
639                 goto out;
640
641         /* initialize sge private state */
642         t3_sge_init_adapter(sc);
643
644         t3_led_ready(sc);
645         
646         cxgb_offload_init();
647         if (is_offload(sc)) {
648                 setbit(&sc->registered_device_map, OFFLOAD_DEVMAP_BIT);
649                 cxgb_adapter_ofld(sc);
650         }
651         error = t3_get_fw_version(sc, &vers);
652         if (error)
653                 goto out;
654
655         snprintf(&sc->fw_version[0], sizeof(sc->fw_version), "%d.%d.%d",
656             G_FW_VERSION_MAJOR(vers), G_FW_VERSION_MINOR(vers),
657             G_FW_VERSION_MICRO(vers));
658
659         snprintf(buf, sizeof(buf), "%s %sNIC\t E/C: %s S/N: %s",
660                  ai->desc, is_offload(sc) ? "R" : "",
661                  sc->params.vpd.ec, sc->params.vpd.sn);
662         device_set_desc_copy(dev, buf);
663
664         snprintf(&sc->port_types[0], sizeof(sc->port_types), "%x%x%x%x",
665                  sc->params.vpd.port_type[0], sc->params.vpd.port_type[1],
666                  sc->params.vpd.port_type[2], sc->params.vpd.port_type[3]);
667
668         device_printf(sc->dev, "Firmware Version %s\n", &sc->fw_version[0]);
669         callout_reset(&sc->cxgb_tick_ch, CXGB_TICKS(sc), cxgb_tick, sc);
670         t3_add_attach_sysctls(sc);
671 out:
672         if (error)
673                 cxgb_free(sc);
674
675         return (error);
676 }
677
678 /*
679  * The cxgb_controller_detach routine is called with the device is
680  * unloaded from the system.
681  */
682
683 static int
684 cxgb_controller_detach(device_t dev)
685 {
686         struct adapter *sc;
687
688         sc = device_get_softc(dev);
689
690         cxgb_free(sc);
691
692         return (0);
693 }
694
695 /*
696  * The cxgb_free() is called by the cxgb_controller_detach() routine
697  * to tear down the structures that were built up in
698  * cxgb_controller_attach(), and should be the final piece of work
699  * done when fully unloading the driver.
700  * 
701  *
702  *  1. Shutting down the threads started by the cxgb_controller_attach()
703  *     routine.
704  *  2. Stopping the lower level device and all callouts (cxgb_down_locked()).
705  *  3. Detaching all of the port devices created during the
706  *     cxgb_controller_attach() routine.
707  *  4. Removing the device children created via cxgb_controller_attach().
708  *  5. Releasing PCI resources associated with the device.
709  *  6. Turning off the offload support, iff it was turned on.
710  *  7. Destroying the mutexes created in cxgb_controller_attach().
711  *
712  */
713 static void
714 cxgb_free(struct adapter *sc)
715 {
716         int i;
717
718         ADAPTER_LOCK(sc);
719         sc->flags |= CXGB_SHUTDOWN;
720         ADAPTER_UNLOCK(sc);
721
722         /*
723          * Make sure all child devices are gone.
724          */
725         bus_generic_detach(sc->dev);
726         for (i = 0; i < (sc)->params.nports; i++) {
727                 if (sc->portdev[i] &&
728                     device_delete_child(sc->dev, sc->portdev[i]) != 0)
729                         device_printf(sc->dev, "failed to delete child port\n");
730         }
731
732         /*
733          * At this point, it is as if cxgb_port_detach has run on all ports, and
734          * cxgb_down has run on the adapter.  All interrupts have been silenced,
735          * all open devices have been closed.
736          */
737         KASSERT(sc->open_device_map == 0, ("%s: device(s) still open (%x)",
738                                            __func__, sc->open_device_map));
739         for (i = 0; i < sc->params.nports; i++) {
740                 KASSERT(sc->port[i].ifp == NULL, ("%s: port %i undead!",
741                                                   __func__, i));
742         }
743
744         /*
745          * Finish off the adapter's callouts.
746          */
747         callout_drain(&sc->cxgb_tick_ch);
748         callout_drain(&sc->sge_timer_ch);
749
750         /*
751          * Release resources grabbed under FULL_INIT_DONE by cxgb_up.  The
752          * sysctls are cleaned up by the kernel linker.
753          */
754         if (sc->flags & FULL_INIT_DONE) {
755                 t3_free_sge_resources(sc);
756                 sc->flags &= ~FULL_INIT_DONE;
757         }
758
759         /*
760          * Release all interrupt resources.
761          */
762         cxgb_teardown_interrupts(sc);
763         if (sc->flags & (USING_MSI | USING_MSIX)) {
764                 device_printf(sc->dev, "releasing msi message(s)\n");
765                 pci_release_msi(sc->dev);
766         } else {
767                 device_printf(sc->dev, "no msi message to release\n");
768         }
769
770         if (sc->msix_regs_res != NULL) {
771                 bus_release_resource(sc->dev, SYS_RES_MEMORY, sc->msix_regs_rid,
772                     sc->msix_regs_res);
773         }
774
775         /*
776          * Free the adapter's taskqueue.
777          */
778         if (sc->tq != NULL) {
779                 taskqueue_free(sc->tq);
780                 sc->tq = NULL;
781         }
782         
783         if (is_offload(sc)) {
784                 clrbit(&sc->registered_device_map, OFFLOAD_DEVMAP_BIT);
785                 cxgb_adapter_unofld(sc);
786         }
787
788 #ifdef notyet
789         if (sc->flags & CXGB_OFLD_INIT)
790                 cxgb_offload_deactivate(sc);
791 #endif
792         free(sc->filters, M_DEVBUF);
793         t3_sge_free(sc);
794
795         cxgb_offload_exit();
796
797         if (sc->udbs_res != NULL)
798                 bus_release_resource(sc->dev, SYS_RES_MEMORY, sc->udbs_rid,
799                     sc->udbs_res);
800
801         if (sc->regs_res != NULL)
802                 bus_release_resource(sc->dev, SYS_RES_MEMORY, sc->regs_rid,
803                     sc->regs_res);
804
805         MTX_DESTROY(&sc->mdio_lock);
806         MTX_DESTROY(&sc->sge.reg_lock);
807         MTX_DESTROY(&sc->elmer_lock);
808         ADAPTER_LOCK_DEINIT(sc);
809 }
810
811 /**
812  *      setup_sge_qsets - configure SGE Tx/Rx/response queues
813  *      @sc: the controller softc
814  *
815  *      Determines how many sets of SGE queues to use and initializes them.
816  *      We support multiple queue sets per port if we have MSI-X, otherwise
817  *      just one queue set per port.
818  */
819 static int
820 setup_sge_qsets(adapter_t *sc)
821 {
822         int i, j, err, irq_idx = 0, qset_idx = 0;
823         u_int ntxq = SGE_TXQ_PER_SET;
824
825         if ((err = t3_sge_alloc(sc)) != 0) {
826                 device_printf(sc->dev, "t3_sge_alloc returned %d\n", err);
827                 return (err);
828         }
829
830         if (sc->params.rev > 0 && !(sc->flags & USING_MSI))
831                 irq_idx = -1;
832
833         for (i = 0; i < (sc)->params.nports; i++) {
834                 struct port_info *pi = &sc->port[i];
835
836                 for (j = 0; j < pi->nqsets; j++, qset_idx++) {
837                         err = t3_sge_alloc_qset(sc, qset_idx, (sc)->params.nports,
838                             (sc->flags & USING_MSIX) ? qset_idx + 1 : irq_idx,
839                             &sc->params.sge.qset[qset_idx], ntxq, pi);
840                         if (err) {
841                                 t3_free_sge_resources(sc);
842                                 device_printf(sc->dev, "t3_sge_alloc_qset failed with %d\n",
843                                     err);
844                                 return (err);
845                         }
846                 }
847         }
848
849         return (0);
850 }
851
852 static void
853 cxgb_teardown_interrupts(adapter_t *sc)
854 {
855         int i;
856
857         for (i = 0; i < SGE_QSETS; i++) {
858                 if (sc->msix_intr_tag[i] == NULL) {
859
860                         /* Should have been setup fully or not at all */
861                         KASSERT(sc->msix_irq_res[i] == NULL &&
862                                 sc->msix_irq_rid[i] == 0,
863                                 ("%s: half-done interrupt (%d).", __func__, i));
864
865                         continue;
866                 }
867
868                 bus_teardown_intr(sc->dev, sc->msix_irq_res[i],
869                                   sc->msix_intr_tag[i]);
870                 bus_release_resource(sc->dev, SYS_RES_IRQ, sc->msix_irq_rid[i],
871                                      sc->msix_irq_res[i]);
872
873                 sc->msix_irq_res[i] = sc->msix_intr_tag[i] = NULL;
874                 sc->msix_irq_rid[i] = 0;
875         }
876
877         if (sc->intr_tag) {
878                 KASSERT(sc->irq_res != NULL,
879                         ("%s: half-done interrupt.", __func__));
880
881                 bus_teardown_intr(sc->dev, sc->irq_res, sc->intr_tag);
882                 bus_release_resource(sc->dev, SYS_RES_IRQ, sc->irq_rid,
883                                      sc->irq_res);
884
885                 sc->irq_res = sc->intr_tag = NULL;
886                 sc->irq_rid = 0;
887         }
888 }
889
890 static int
891 cxgb_setup_interrupts(adapter_t *sc)
892 {
893         struct resource *res;
894         void *tag;
895         int i, rid, err, intr_flag = sc->flags & (USING_MSI | USING_MSIX);
896
897         sc->irq_rid = intr_flag ? 1 : 0;
898         sc->irq_res = bus_alloc_resource_any(sc->dev, SYS_RES_IRQ, &sc->irq_rid,
899                                              RF_SHAREABLE | RF_ACTIVE);
900         if (sc->irq_res == NULL) {
901                 device_printf(sc->dev, "Cannot allocate interrupt (%x, %u)\n",
902                               intr_flag, sc->irq_rid);
903                 err = EINVAL;
904                 sc->irq_rid = 0;
905         } else {
906                 err = bus_setup_intr(sc->dev, sc->irq_res,
907                     INTR_MPSAFE | INTR_TYPE_NET, NULL,
908                     sc->cxgb_intr, sc, &sc->intr_tag);
909
910                 if (err) {
911                         device_printf(sc->dev,
912                                       "Cannot set up interrupt (%x, %u, %d)\n",
913                                       intr_flag, sc->irq_rid, err);
914                         bus_release_resource(sc->dev, SYS_RES_IRQ, sc->irq_rid,
915                                              sc->irq_res);
916                         sc->irq_res = sc->intr_tag = NULL;
917                         sc->irq_rid = 0;
918                 }
919         }
920
921         /* That's all for INTx or MSI */
922         if (!(intr_flag & USING_MSIX) || err)
923                 return (err);
924
925         for (i = 0; i < sc->msi_count - 1; i++) {
926                 rid = i + 2;
927                 res = bus_alloc_resource_any(sc->dev, SYS_RES_IRQ, &rid,
928                                              RF_SHAREABLE | RF_ACTIVE);
929                 if (res == NULL) {
930                         device_printf(sc->dev, "Cannot allocate interrupt "
931                                       "for message %d\n", rid);
932                         err = EINVAL;
933                         break;
934                 }
935
936                 err = bus_setup_intr(sc->dev, res, INTR_MPSAFE | INTR_TYPE_NET,
937                                      NULL, t3_intr_msix, &sc->sge.qs[i], &tag);
938                 if (err) {
939                         device_printf(sc->dev, "Cannot set up interrupt "
940                                       "for message %d (%d)\n", rid, err);
941                         bus_release_resource(sc->dev, SYS_RES_IRQ, rid, res);
942                         break;
943                 }
944
945                 sc->msix_irq_rid[i] = rid;
946                 sc->msix_irq_res[i] = res;
947                 sc->msix_intr_tag[i] = tag;
948         }
949
950         if (err)
951                 cxgb_teardown_interrupts(sc);
952
953         return (err);
954 }
955
956
957 static int
958 cxgb_port_probe(device_t dev)
959 {
960         struct port_info *p;
961         char buf[80];
962         const char *desc;
963         
964         p = device_get_softc(dev);
965         desc = p->phy.desc;
966         snprintf(buf, sizeof(buf), "Port %d %s", p->port_id, desc);
967         device_set_desc_copy(dev, buf);
968         return (0);
969 }
970
971
972 static int
973 cxgb_makedev(struct port_info *pi)
974 {
975         
976         pi->port_cdev = make_dev(&cxgb_cdevsw, pi->ifp->if_dunit,
977             UID_ROOT, GID_WHEEL, 0600, if_name(pi->ifp));
978         
979         if (pi->port_cdev == NULL)
980                 return (ENOMEM);
981
982         pi->port_cdev->si_drv1 = (void *)pi;
983         
984         return (0);
985 }
986
987 #define CXGB_CAP (IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_MTU | IFCAP_HWCSUM | \
988     IFCAP_VLAN_HWCSUM | IFCAP_TSO | IFCAP_JUMBO_MTU | IFCAP_LRO | \
989     IFCAP_VLAN_HWTSO | IFCAP_LINKSTATE)
990 #define CXGB_CAP_ENABLE (CXGB_CAP & ~IFCAP_TSO6)
991
992 static int
993 cxgb_port_attach(device_t dev)
994 {
995         struct port_info *p;
996         struct ifnet *ifp;
997         int err;
998         struct adapter *sc;
999
1000         p = device_get_softc(dev);
1001         sc = p->adapter;
1002         snprintf(p->lockbuf, PORT_NAME_LEN, "cxgb port lock %d:%d",
1003             device_get_unit(device_get_parent(dev)), p->port_id);
1004         PORT_LOCK_INIT(p, p->lockbuf);
1005
1006         /* Allocate an ifnet object and set it up */
1007         ifp = p->ifp = if_alloc(IFT_ETHER);
1008         if (ifp == NULL) {
1009                 device_printf(dev, "Cannot allocate ifnet\n");
1010                 return (ENOMEM);
1011         }
1012         
1013         if_initname(ifp, device_get_name(dev), device_get_unit(dev));
1014         ifp->if_init = cxgb_init;
1015         ifp->if_softc = p;
1016         ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
1017         ifp->if_ioctl = cxgb_ioctl;
1018         ifp->if_start = cxgb_start;
1019
1020         ifp->if_snd.ifq_drv_maxlen = cxgb_snd_queue_len;
1021         IFQ_SET_MAXLEN(&ifp->if_snd, ifp->if_snd.ifq_drv_maxlen);
1022         IFQ_SET_READY(&ifp->if_snd);
1023
1024         ifp->if_capabilities = CXGB_CAP;
1025         ifp->if_capenable = CXGB_CAP_ENABLE;
1026         ifp->if_hwassist = CSUM_TCP | CSUM_UDP | CSUM_IP | CSUM_TSO;
1027
1028         /*
1029          * Disable TSO on 4-port - it isn't supported by the firmware.
1030          */     
1031         if (sc->params.nports > 2) {
1032                 ifp->if_capabilities &= ~(IFCAP_TSO | IFCAP_VLAN_HWTSO);
1033                 ifp->if_capenable &= ~(IFCAP_TSO | IFCAP_VLAN_HWTSO);
1034                 ifp->if_hwassist &= ~CSUM_TSO;
1035         }
1036
1037         ether_ifattach(ifp, p->hw_addr);
1038         ifp->if_transmit = cxgb_transmit;
1039         ifp->if_qflush = cxgb_qflush;
1040
1041 #ifdef DEFAULT_JUMBO
1042         if (sc->params.nports <= 2)
1043                 ifp->if_mtu = ETHERMTU_JUMBO;
1044 #endif
1045         if ((err = cxgb_makedev(p)) != 0) {
1046                 printf("makedev failed %d\n", err);
1047                 return (err);
1048         }
1049
1050         /* Create a list of media supported by this port */
1051         ifmedia_init(&p->media, IFM_IMASK, cxgb_media_change,
1052             cxgb_media_status);
1053         cxgb_build_medialist(p);
1054       
1055         t3_sge_init_port(p);
1056
1057         return (err);
1058 }
1059
1060 /*
1061  * cxgb_port_detach() is called via the device_detach methods when
1062  * cxgb_free() calls the bus_generic_detach.  It is responsible for 
1063  * removing the device from the view of the kernel, i.e. from all 
1064  * interfaces lists etc.  This routine is only called when the driver is 
1065  * being unloaded, not when the link goes down.
1066  */
1067 static int
1068 cxgb_port_detach(device_t dev)
1069 {
1070         struct port_info *p;
1071         struct adapter *sc;
1072         int i;
1073
1074         p = device_get_softc(dev);
1075         sc = p->adapter;
1076
1077         /* Tell cxgb_ioctl and if_init that the port is going away */
1078         ADAPTER_LOCK(sc);
1079         SET_DOOMED(p);
1080         wakeup(&sc->flags);
1081         while (IS_BUSY(sc))
1082                 mtx_sleep(&sc->flags, &sc->lock, 0, "cxgbdtch", 0);
1083         SET_BUSY(sc);
1084         ADAPTER_UNLOCK(sc);
1085
1086         if (p->port_cdev != NULL)
1087                 destroy_dev(p->port_cdev);
1088
1089         cxgb_uninit_synchronized(p);
1090         ether_ifdetach(p->ifp);
1091
1092         for (i = p->first_qset; i < p->first_qset + p->nqsets; i++) {
1093                 struct sge_qset *qs = &sc->sge.qs[i];
1094                 struct sge_txq *txq = &qs->txq[TXQ_ETH];
1095
1096                 callout_drain(&txq->txq_watchdog);
1097                 callout_drain(&txq->txq_timer);
1098         }
1099
1100         PORT_LOCK_DEINIT(p);
1101         if_free(p->ifp);
1102         p->ifp = NULL;
1103
1104         ADAPTER_LOCK(sc);
1105         CLR_BUSY(sc);
1106         wakeup_one(&sc->flags);
1107         ADAPTER_UNLOCK(sc);
1108         return (0);
1109 }
1110
1111 void
1112 t3_fatal_err(struct adapter *sc)
1113 {
1114         u_int fw_status[4];
1115
1116         if (sc->flags & FULL_INIT_DONE) {
1117                 t3_sge_stop(sc);
1118                 t3_write_reg(sc, A_XGM_TX_CTRL, 0);
1119                 t3_write_reg(sc, A_XGM_RX_CTRL, 0);
1120                 t3_write_reg(sc, XGM_REG(A_XGM_TX_CTRL, 1), 0);
1121                 t3_write_reg(sc, XGM_REG(A_XGM_RX_CTRL, 1), 0);
1122                 t3_intr_disable(sc);
1123         }
1124         device_printf(sc->dev,"encountered fatal error, operation suspended\n");
1125         if (!t3_cim_ctl_blk_read(sc, 0xa0, 4, fw_status))
1126                 device_printf(sc->dev, "FW_ status: 0x%x, 0x%x, 0x%x, 0x%x\n",
1127                     fw_status[0], fw_status[1], fw_status[2], fw_status[3]);
1128 }
1129
1130 int
1131 t3_os_find_pci_capability(adapter_t *sc, int cap)
1132 {
1133         device_t dev;
1134         struct pci_devinfo *dinfo;
1135         pcicfgregs *cfg;
1136         uint32_t status;
1137         uint8_t ptr;
1138
1139         dev = sc->dev;
1140         dinfo = device_get_ivars(dev);
1141         cfg = &dinfo->cfg;
1142
1143         status = pci_read_config(dev, PCIR_STATUS, 2);
1144         if (!(status & PCIM_STATUS_CAPPRESENT))
1145                 return (0);
1146
1147         switch (cfg->hdrtype & PCIM_HDRTYPE) {
1148         case 0:
1149         case 1:
1150                 ptr = PCIR_CAP_PTR;
1151                 break;
1152         case 2:
1153                 ptr = PCIR_CAP_PTR_2;
1154                 break;
1155         default:
1156                 return (0);
1157                 break;
1158         }
1159         ptr = pci_read_config(dev, ptr, 1);
1160
1161         while (ptr != 0) {
1162                 if (pci_read_config(dev, ptr + PCICAP_ID, 1) == cap)
1163                         return (ptr);
1164                 ptr = pci_read_config(dev, ptr + PCICAP_NEXTPTR, 1);
1165         }
1166
1167         return (0);
1168 }
1169
1170 int
1171 t3_os_pci_save_state(struct adapter *sc)
1172 {
1173         device_t dev;
1174         struct pci_devinfo *dinfo;
1175
1176         dev = sc->dev;
1177         dinfo = device_get_ivars(dev);
1178
1179         pci_cfg_save(dev, dinfo, 0);
1180         return (0);
1181 }
1182
1183 int
1184 t3_os_pci_restore_state(struct adapter *sc)
1185 {
1186         device_t dev;
1187         struct pci_devinfo *dinfo;
1188
1189         dev = sc->dev;
1190         dinfo = device_get_ivars(dev);
1191
1192         pci_cfg_restore(dev, dinfo);
1193         return (0);
1194 }
1195
1196 /**
1197  *      t3_os_link_changed - handle link status changes
1198  *      @sc: the adapter associated with the link change
1199  *      @port_id: the port index whose link status has changed
1200  *      @link_status: the new status of the link
1201  *      @speed: the new speed setting
1202  *      @duplex: the new duplex setting
1203  *      @fc: the new flow-control setting
1204  *
1205  *      This is the OS-dependent handler for link status changes.  The OS
1206  *      neutral handler takes care of most of the processing for these events,
1207  *      then calls this handler for any OS-specific processing.
1208  */
1209 void
1210 t3_os_link_changed(adapter_t *adapter, int port_id, int link_status, int speed,
1211      int duplex, int fc, int mac_was_reset)
1212 {
1213         struct port_info *pi = &adapter->port[port_id];
1214         struct ifnet *ifp = pi->ifp;
1215
1216         /* no race with detach, so ifp should always be good */
1217         KASSERT(ifp, ("%s: if detached.", __func__));
1218
1219         /* Reapply mac settings if they were lost due to a reset */
1220         if (mac_was_reset) {
1221                 PORT_LOCK(pi);
1222                 cxgb_update_mac_settings(pi);
1223                 PORT_UNLOCK(pi);
1224         }
1225
1226         if (link_status) {
1227                 ifp->if_baudrate = IF_Mbps(speed);
1228                 if_link_state_change(ifp, LINK_STATE_UP);
1229         } else
1230                 if_link_state_change(ifp, LINK_STATE_DOWN);
1231 }
1232
1233 /**
1234  *      t3_os_phymod_changed - handle PHY module changes
1235  *      @phy: the PHY reporting the module change
1236  *      @mod_type: new module type
1237  *
1238  *      This is the OS-dependent handler for PHY module changes.  It is
1239  *      invoked when a PHY module is removed or inserted for any OS-specific
1240  *      processing.
1241  */
1242 void t3_os_phymod_changed(struct adapter *adap, int port_id)
1243 {
1244         static const char *mod_str[] = {
1245                 NULL, "SR", "LR", "LRM", "TWINAX", "TWINAX-L", "unknown"
1246         };
1247         struct port_info *pi = &adap->port[port_id];
1248         int mod = pi->phy.modtype;
1249
1250         if (mod != pi->media.ifm_cur->ifm_data)
1251                 cxgb_build_medialist(pi);
1252
1253         if (mod == phy_modtype_none)
1254                 if_printf(pi->ifp, "PHY module unplugged\n");
1255         else {
1256                 KASSERT(mod < ARRAY_SIZE(mod_str),
1257                         ("invalid PHY module type %d", mod));
1258                 if_printf(pi->ifp, "%s PHY module inserted\n", mod_str[mod]);
1259         }
1260 }
1261
1262 /*
1263  * Interrupt-context handler for external (PHY) interrupts.
1264  */
1265 void
1266 t3_os_ext_intr_handler(adapter_t *sc)
1267 {
1268         if (cxgb_debug)
1269                 printf("t3_os_ext_intr_handler\n");
1270         /*
1271          * Schedule a task to handle external interrupts as they may be slow
1272          * and we use a mutex to protect MDIO registers.  We disable PHY
1273          * interrupts in the meantime and let the task reenable them when
1274          * it's done.
1275          */
1276         if (sc->slow_intr_mask) {
1277                 ADAPTER_LOCK(sc);
1278                 sc->slow_intr_mask &= ~F_T3DBG;
1279                 t3_write_reg(sc, A_PL_INT_ENABLE0, sc->slow_intr_mask);
1280                 taskqueue_enqueue(sc->tq, &sc->ext_intr_task);
1281                 ADAPTER_UNLOCK(sc);
1282         }
1283 }
1284
1285 void
1286 t3_os_set_hw_addr(adapter_t *adapter, int port_idx, u8 hw_addr[])
1287 {
1288
1289         /*
1290          * The ifnet might not be allocated before this gets called,
1291          * as this is called early on in attach by t3_prep_adapter
1292          * save the address off in the port structure
1293          */
1294         if (cxgb_debug)
1295                 printf("set_hw_addr on idx %d addr %6D\n", port_idx, hw_addr, ":");
1296         bcopy(hw_addr, adapter->port[port_idx].hw_addr, ETHER_ADDR_LEN);
1297 }
1298
1299 /*
1300  * Programs the XGMAC based on the settings in the ifnet.  These settings
1301  * include MTU, MAC address, mcast addresses, etc.
1302  */
1303 static void
1304 cxgb_update_mac_settings(struct port_info *p)
1305 {
1306         struct ifnet *ifp = p->ifp;
1307         struct t3_rx_mode rm;
1308         struct cmac *mac = &p->mac;
1309         int mtu, hwtagging;
1310
1311         PORT_LOCK_ASSERT_OWNED(p);
1312
1313         bcopy(IF_LLADDR(ifp), p->hw_addr, ETHER_ADDR_LEN);
1314
1315         mtu = ifp->if_mtu;
1316         if (ifp->if_capenable & IFCAP_VLAN_MTU)
1317                 mtu += ETHER_VLAN_ENCAP_LEN;
1318
1319         hwtagging = (ifp->if_capenable & IFCAP_VLAN_HWTAGGING) != 0;
1320
1321         t3_mac_set_mtu(mac, mtu);
1322         t3_set_vlan_accel(p->adapter, 1 << p->tx_chan, hwtagging);
1323         t3_mac_set_address(mac, 0, p->hw_addr);
1324         t3_init_rx_mode(&rm, p);
1325         t3_mac_set_rx_mode(mac, &rm);
1326 }
1327
1328
1329 static int
1330 await_mgmt_replies(struct adapter *adap, unsigned long init_cnt,
1331                               unsigned long n)
1332 {
1333         int attempts = 5;
1334
1335         while (adap->sge.qs[0].rspq.offload_pkts < init_cnt + n) {
1336                 if (!--attempts)
1337                         return (ETIMEDOUT);
1338                 t3_os_sleep(10);
1339         }
1340         return 0;
1341 }
1342
1343 static int
1344 init_tp_parity(struct adapter *adap)
1345 {
1346         int i;
1347         struct mbuf *m;
1348         struct cpl_set_tcb_field *greq;
1349         unsigned long cnt = adap->sge.qs[0].rspq.offload_pkts;
1350
1351         t3_tp_set_offload_mode(adap, 1);
1352
1353         for (i = 0; i < 16; i++) {
1354                 struct cpl_smt_write_req *req;
1355
1356                 m = m_gethdr(M_WAITOK, MT_DATA);
1357                 req = mtod(m, struct cpl_smt_write_req *);
1358                 m->m_len = m->m_pkthdr.len = sizeof(*req);
1359                 memset(req, 0, sizeof(*req));
1360                 req->wr.wrh_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD));
1361                 OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_SMT_WRITE_REQ, i));
1362                 req->iff = i;
1363                 t3_mgmt_tx(adap, m);
1364         }
1365
1366         for (i = 0; i < 2048; i++) {
1367                 struct cpl_l2t_write_req *req;
1368
1369                 m = m_gethdr(M_WAITOK, MT_DATA);
1370                 req = mtod(m, struct cpl_l2t_write_req *);
1371                 m->m_len = m->m_pkthdr.len = sizeof(*req);
1372                 memset(req, 0, sizeof(*req));
1373                 req->wr.wrh_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD));
1374                 OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_L2T_WRITE_REQ, i));
1375                 req->params = htonl(V_L2T_W_IDX(i));
1376                 t3_mgmt_tx(adap, m);
1377         }
1378
1379         for (i = 0; i < 2048; i++) {
1380                 struct cpl_rte_write_req *req;
1381
1382                 m = m_gethdr(M_WAITOK, MT_DATA);
1383                 req = mtod(m, struct cpl_rte_write_req *);
1384                 m->m_len = m->m_pkthdr.len = sizeof(*req);
1385                 memset(req, 0, sizeof(*req));
1386                 req->wr.wrh_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD));
1387                 OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_RTE_WRITE_REQ, i));
1388                 req->l2t_idx = htonl(V_L2T_W_IDX(i));
1389                 t3_mgmt_tx(adap, m);
1390         }
1391
1392         m = m_gethdr(M_WAITOK, MT_DATA);
1393         greq = mtod(m, struct cpl_set_tcb_field *);
1394         m->m_len = m->m_pkthdr.len = sizeof(*greq);
1395         memset(greq, 0, sizeof(*greq));
1396         greq->wr.wrh_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD));
1397         OPCODE_TID(greq) = htonl(MK_OPCODE_TID(CPL_SET_TCB_FIELD, 0));
1398         greq->mask = htobe64(1);
1399         t3_mgmt_tx(adap, m);
1400
1401         i = await_mgmt_replies(adap, cnt, 16 + 2048 + 2048 + 1);
1402         t3_tp_set_offload_mode(adap, 0);
1403         return (i);
1404 }
1405
1406 /**
1407  *      setup_rss - configure Receive Side Steering (per-queue connection demux) 
1408  *      @adap: the adapter
1409  *
1410  *      Sets up RSS to distribute packets to multiple receive queues.  We
1411  *      configure the RSS CPU lookup table to distribute to the number of HW
1412  *      receive queues, and the response queue lookup table to narrow that
1413  *      down to the response queues actually configured for each port.
1414  *      We always configure the RSS mapping for two ports since the mapping
1415  *      table has plenty of entries.
1416  */
1417 static void
1418 setup_rss(adapter_t *adap)
1419 {
1420         int i;
1421         u_int nq[2]; 
1422         uint8_t cpus[SGE_QSETS + 1];
1423         uint16_t rspq_map[RSS_TABLE_SIZE];
1424         
1425         for (i = 0; i < SGE_QSETS; ++i)
1426                 cpus[i] = i;
1427         cpus[SGE_QSETS] = 0xff;
1428
1429         nq[0] = nq[1] = 0;
1430         for_each_port(adap, i) {
1431                 const struct port_info *pi = adap2pinfo(adap, i);
1432
1433                 nq[pi->tx_chan] += pi->nqsets;
1434         }
1435         for (i = 0; i < RSS_TABLE_SIZE / 2; ++i) {
1436                 rspq_map[i] = nq[0] ? i % nq[0] : 0;
1437                 rspq_map[i + RSS_TABLE_SIZE / 2] = nq[1] ? i % nq[1] + nq[0] : 0;
1438         }
1439
1440         /* Calculate the reverse RSS map table */
1441         for (i = 0; i < SGE_QSETS; ++i)
1442                 adap->rrss_map[i] = 0xff;
1443         for (i = 0; i < RSS_TABLE_SIZE; ++i)
1444                 if (adap->rrss_map[rspq_map[i]] == 0xff)
1445                         adap->rrss_map[rspq_map[i]] = i;
1446
1447         t3_config_rss(adap, F_RQFEEDBACKENABLE | F_TNLLKPEN | F_TNLMAPEN |
1448                       F_TNLPRTEN | F_TNL2TUPEN | F_TNL4TUPEN | F_OFDMAPEN |
1449                       F_RRCPLMAPEN | V_RRCPLCPUSIZE(6) | F_HASHTOEPLITZ,
1450                       cpus, rspq_map);
1451
1452 }
1453
1454 /*
1455  * Sends an mbuf to an offload queue driver
1456  * after dealing with any active network taps.
1457  */
1458 static inline int
1459 offload_tx(struct t3cdev *tdev, struct mbuf *m)
1460 {
1461         int ret;
1462
1463         ret = t3_offload_tx(tdev, m);
1464         return (ret);
1465 }
1466
1467 static int
1468 write_smt_entry(struct adapter *adapter, int idx)
1469 {
1470         struct port_info *pi = &adapter->port[idx];
1471         struct cpl_smt_write_req *req;
1472         struct mbuf *m;
1473
1474         if ((m = m_gethdr(M_NOWAIT, MT_DATA)) == NULL)
1475                 return (ENOMEM);
1476
1477         req = mtod(m, struct cpl_smt_write_req *);
1478         m->m_pkthdr.len = m->m_len = sizeof(struct cpl_smt_write_req);
1479         
1480         req->wr.wrh_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD));
1481         OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_SMT_WRITE_REQ, idx));
1482         req->mtu_idx = NMTUS - 1;  /* should be 0 but there's a T3 bug */
1483         req->iff = idx;
1484         memset(req->src_mac1, 0, sizeof(req->src_mac1));
1485         memcpy(req->src_mac0, pi->hw_addr, ETHER_ADDR_LEN);
1486
1487         m_set_priority(m, 1);
1488
1489         offload_tx(&adapter->tdev, m);
1490
1491         return (0);
1492 }
1493
1494 static int
1495 init_smt(struct adapter *adapter)
1496 {
1497         int i;
1498
1499         for_each_port(adapter, i)
1500                 write_smt_entry(adapter, i);
1501         return 0;
1502 }
1503
1504 static void
1505 init_port_mtus(adapter_t *adapter)
1506 {
1507         unsigned int mtus = ETHERMTU | (ETHERMTU << 16);
1508
1509         t3_write_reg(adapter, A_TP_MTU_PORT_TABLE, mtus);
1510 }
1511
1512 static void
1513 send_pktsched_cmd(struct adapter *adap, int sched, int qidx, int lo,
1514                               int hi, int port)
1515 {
1516         struct mbuf *m;
1517         struct mngt_pktsched_wr *req;
1518
1519         m = m_gethdr(M_DONTWAIT, MT_DATA);
1520         if (m) {        
1521                 req = mtod(m, struct mngt_pktsched_wr *);
1522                 req->wr.wrh_hi = htonl(V_WR_OP(FW_WROPCODE_MNGT));
1523                 req->mngt_opcode = FW_MNGTOPCODE_PKTSCHED_SET;
1524                 req->sched = sched;
1525                 req->idx = qidx;
1526                 req->min = lo;
1527                 req->max = hi;
1528                 req->binding = port;
1529                 m->m_len = m->m_pkthdr.len = sizeof(*req);
1530                 t3_mgmt_tx(adap, m);
1531         }
1532 }
1533
1534 static void
1535 bind_qsets(adapter_t *sc)
1536 {
1537         int i, j;
1538
1539         for (i = 0; i < (sc)->params.nports; ++i) {
1540                 const struct port_info *pi = adap2pinfo(sc, i);
1541
1542                 for (j = 0; j < pi->nqsets; ++j) {
1543                         send_pktsched_cmd(sc, 1, pi->first_qset + j, -1,
1544                                           -1, pi->tx_chan);
1545
1546                 }
1547         }
1548 }
1549
1550 static void
1551 update_tpeeprom(struct adapter *adap)
1552 {
1553         const struct firmware *tpeeprom;
1554
1555         uint32_t version;
1556         unsigned int major, minor;
1557         int ret, len;
1558         char rev, name[32];
1559
1560         t3_seeprom_read(adap, TP_SRAM_OFFSET, &version);
1561
1562         major = G_TP_VERSION_MAJOR(version);
1563         minor = G_TP_VERSION_MINOR(version);
1564         if (major == TP_VERSION_MAJOR  && minor == TP_VERSION_MINOR)
1565                 return; 
1566
1567         rev = t3rev2char(adap);
1568         snprintf(name, sizeof(name), TPEEPROM_NAME, rev);
1569
1570         tpeeprom = firmware_get(name);
1571         if (tpeeprom == NULL) {
1572                 device_printf(adap->dev,
1573                               "could not load TP EEPROM: unable to load %s\n",
1574                               name);
1575                 return;
1576         }
1577
1578         len = tpeeprom->datasize - 4;
1579         
1580         ret = t3_check_tpsram(adap, tpeeprom->data, tpeeprom->datasize);
1581         if (ret)
1582                 goto release_tpeeprom;
1583
1584         if (len != TP_SRAM_LEN) {
1585                 device_printf(adap->dev,
1586                               "%s length is wrong len=%d expected=%d\n", name,
1587                               len, TP_SRAM_LEN);
1588                 return;
1589         }
1590         
1591         ret = set_eeprom(&adap->port[0], tpeeprom->data, tpeeprom->datasize,
1592             TP_SRAM_OFFSET);
1593         
1594         if (!ret) {
1595                 device_printf(adap->dev,
1596                         "Protocol SRAM image updated in EEPROM to %d.%d.%d\n",
1597                          TP_VERSION_MAJOR, TP_VERSION_MINOR, TP_VERSION_MICRO);
1598         } else 
1599                 device_printf(adap->dev,
1600                               "Protocol SRAM image update in EEPROM failed\n");
1601
1602 release_tpeeprom:
1603         firmware_put(tpeeprom, FIRMWARE_UNLOAD);
1604         
1605         return;
1606 }
1607
1608 static int
1609 update_tpsram(struct adapter *adap)
1610 {
1611         const struct firmware *tpsram;
1612         int ret;
1613         char rev, name[32];
1614
1615         rev = t3rev2char(adap);
1616         snprintf(name, sizeof(name), TPSRAM_NAME, rev);
1617
1618         update_tpeeprom(adap);
1619
1620         tpsram = firmware_get(name);
1621         if (tpsram == NULL){
1622                 device_printf(adap->dev, "could not load TP SRAM\n");
1623                 return (EINVAL);
1624         } else
1625                 device_printf(adap->dev, "updating TP SRAM\n");
1626         
1627         ret = t3_check_tpsram(adap, tpsram->data, tpsram->datasize);
1628         if (ret)
1629                 goto release_tpsram;    
1630
1631         ret = t3_set_proto_sram(adap, tpsram->data);
1632         if (ret)
1633                 device_printf(adap->dev, "loading protocol SRAM failed\n");
1634
1635 release_tpsram:
1636         firmware_put(tpsram, FIRMWARE_UNLOAD);
1637         
1638         return ret;
1639 }
1640
1641 /**
1642  *      cxgb_up - enable the adapter
1643  *      @adap: adapter being enabled
1644  *
1645  *      Called when the first port is enabled, this function performs the
1646  *      actions necessary to make an adapter operational, such as completing
1647  *      the initialization of HW modules, and enabling interrupts.
1648  */
1649 static int
1650 cxgb_up(struct adapter *sc)
1651 {
1652         int err = 0;
1653
1654         KASSERT(sc->open_device_map == 0, ("%s: device(s) already open (%x)",
1655                                            __func__, sc->open_device_map));
1656
1657         if ((sc->flags & FULL_INIT_DONE) == 0) {
1658
1659                 ADAPTER_LOCK_ASSERT_NOTOWNED(sc);
1660
1661                 if ((sc->flags & FW_UPTODATE) == 0)
1662                         if ((err = upgrade_fw(sc)))
1663                                 goto out;
1664
1665                 if ((sc->flags & TPS_UPTODATE) == 0)
1666                         if ((err = update_tpsram(sc)))
1667                                 goto out;
1668
1669                 if (is_offload(sc)) {
1670                         sc->params.mc5.nservers = 0;
1671                         sc->params.mc5.nroutes = 0;
1672                         sc->params.mc5.nfilters = t3_mc5_size(&sc->mc5) -
1673                             MC5_MIN_TIDS;
1674                 }
1675
1676                 err = t3_init_hw(sc, 0);
1677                 if (err)
1678                         goto out;
1679
1680                 t3_set_reg_field(sc, A_TP_PARA_REG5, 0, F_RXDDPOFFINIT);
1681                 t3_write_reg(sc, A_ULPRX_TDDP_PSZ, V_HPZ0(PAGE_SHIFT - 12));
1682
1683                 err = setup_sge_qsets(sc);
1684                 if (err)
1685                         goto out;
1686
1687                 alloc_filters(sc);
1688                 setup_rss(sc);
1689
1690                 t3_intr_clear(sc);
1691                 err = cxgb_setup_interrupts(sc);
1692                 if (err)
1693                         goto out;
1694
1695                 t3_add_configured_sysctls(sc);
1696                 sc->flags |= FULL_INIT_DONE;
1697         }
1698
1699         t3_intr_clear(sc);
1700         t3_sge_start(sc);
1701         t3_intr_enable(sc);
1702
1703         if (sc->params.rev >= T3_REV_C && !(sc->flags & TP_PARITY_INIT) &&
1704             is_offload(sc) && init_tp_parity(sc) == 0)
1705                 sc->flags |= TP_PARITY_INIT;
1706
1707         if (sc->flags & TP_PARITY_INIT) {
1708                 t3_write_reg(sc, A_TP_INT_CAUSE, F_CMCACHEPERR | F_ARPLUTPERR);
1709                 t3_write_reg(sc, A_TP_INT_ENABLE, 0x7fbfffff);
1710         }
1711         
1712         if (!(sc->flags & QUEUES_BOUND)) {
1713                 bind_qsets(sc);
1714                 setup_hw_filters(sc);
1715                 sc->flags |= QUEUES_BOUND;              
1716         }
1717
1718         t3_sge_reset_adapter(sc);
1719 out:
1720         return (err);
1721 }
1722
1723 /*
1724  * Called when the last open device is closed.  Does NOT undo all of cxgb_up's
1725  * work.  Specifically, the resources grabbed under FULL_INIT_DONE are released
1726  * during controller_detach, not here.
1727  */
1728 static void
1729 cxgb_down(struct adapter *sc)
1730 {
1731         t3_sge_stop(sc);
1732         t3_intr_disable(sc);
1733 }
1734
1735 static int
1736 offload_open(struct port_info *pi)
1737 {
1738         struct adapter *sc = pi->adapter;
1739         struct t3cdev *tdev = &sc->tdev;
1740
1741         setbit(&sc->open_device_map, OFFLOAD_DEVMAP_BIT);
1742
1743         t3_tp_set_offload_mode(sc, 1);
1744         tdev->lldev = pi->ifp;
1745         init_port_mtus(sc);
1746         t3_load_mtus(sc, sc->params.mtus, sc->params.a_wnd, sc->params.b_wnd,
1747                      sc->params.rev == 0 ?  sc->port[0].ifp->if_mtu : 0xffff);
1748         init_smt(sc);
1749         cxgb_add_clients(tdev);
1750
1751         return (0);
1752 }
1753
1754 static int
1755 offload_close(struct t3cdev *tdev)
1756 {
1757         struct adapter *adapter = tdev2adap(tdev);
1758
1759         if (!isset(&adapter->open_device_map, OFFLOAD_DEVMAP_BIT))
1760                 return (0);
1761
1762         /* Call back all registered clients */
1763         cxgb_remove_clients(tdev);
1764
1765         tdev->lldev = NULL;
1766         cxgb_set_dummy_ops(tdev);
1767         t3_tp_set_offload_mode(adapter, 0);
1768
1769         clrbit(&adapter->open_device_map, OFFLOAD_DEVMAP_BIT);
1770
1771         return (0);
1772 }
1773
1774 /*
1775  * if_init for cxgb ports.
1776  */
1777 static void
1778 cxgb_init(void *arg)
1779 {
1780         struct port_info *p = arg;
1781         struct adapter *sc = p->adapter;
1782
1783         ADAPTER_LOCK(sc);
1784         cxgb_init_locked(p); /* releases adapter lock */
1785         ADAPTER_LOCK_ASSERT_NOTOWNED(sc);
1786 }
1787
1788 static int
1789 cxgb_init_locked(struct port_info *p)
1790 {
1791         struct adapter *sc = p->adapter;
1792         struct ifnet *ifp = p->ifp;
1793         struct cmac *mac = &p->mac;
1794         int i, rc = 0, may_sleep = 0;
1795
1796         ADAPTER_LOCK_ASSERT_OWNED(sc);
1797
1798         while (!IS_DOOMED(p) && IS_BUSY(sc)) {
1799                 if (mtx_sleep(&sc->flags, &sc->lock, PCATCH, "cxgbinit", 0)) {
1800                         rc = EINTR;
1801                         goto done;
1802                 }
1803         }
1804         if (IS_DOOMED(p)) {
1805                 rc = ENXIO;
1806                 goto done;
1807         }
1808         KASSERT(!IS_BUSY(sc), ("%s: controller busy.", __func__));
1809
1810         /*
1811          * The code that runs during one-time adapter initialization can sleep
1812          * so it's important not to hold any locks across it.
1813          */
1814         may_sleep = sc->flags & FULL_INIT_DONE ? 0 : 1;
1815
1816         if (may_sleep) {
1817                 SET_BUSY(sc);
1818                 ADAPTER_UNLOCK(sc);
1819         }
1820
1821         if (sc->open_device_map == 0) {
1822                 if ((rc = cxgb_up(sc)) != 0)
1823                         goto done;
1824
1825                 if (is_offload(sc) && !ofld_disable && offload_open(p))
1826                         log(LOG_WARNING,
1827                             "Could not initialize offload capabilities\n");
1828         }
1829
1830         PORT_LOCK(p);
1831         if (isset(&sc->open_device_map, p->port_id) &&
1832             (ifp->if_drv_flags & IFF_DRV_RUNNING)) {
1833                 PORT_UNLOCK(p);
1834                 goto done;
1835         }
1836         t3_port_intr_enable(sc, p->port_id);
1837         if (!mac->multiport) 
1838                 t3_mac_init(mac);
1839         cxgb_update_mac_settings(p);
1840         t3_link_start(&p->phy, mac, &p->link_config);
1841         t3_mac_enable(mac, MAC_DIRECTION_RX | MAC_DIRECTION_TX);
1842         ifp->if_drv_flags |= IFF_DRV_RUNNING;
1843         ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
1844         PORT_UNLOCK(p);
1845
1846         t3_link_changed(sc, p->port_id);
1847
1848         for (i = p->first_qset; i < p->first_qset + p->nqsets; i++) {
1849                 struct sge_qset *qs = &sc->sge.qs[i];
1850                 struct sge_txq *txq = &qs->txq[TXQ_ETH];
1851
1852                 callout_reset_on(&txq->txq_watchdog, hz, cxgb_tx_watchdog, qs,
1853                                  txq->txq_watchdog.c_cpu);
1854         }
1855
1856         /* all ok */
1857         setbit(&sc->open_device_map, p->port_id);
1858
1859 done:
1860         if (may_sleep) {
1861                 ADAPTER_LOCK(sc);
1862                 KASSERT(IS_BUSY(sc), ("%s: controller not busy.", __func__));
1863                 CLR_BUSY(sc);
1864                 wakeup_one(&sc->flags);
1865         }
1866         ADAPTER_UNLOCK(sc);
1867         return (rc);
1868 }
1869
1870 static int
1871 cxgb_uninit_locked(struct port_info *p)
1872 {
1873         struct adapter *sc = p->adapter;
1874         int rc;
1875
1876         ADAPTER_LOCK_ASSERT_OWNED(sc);
1877
1878         while (!IS_DOOMED(p) && IS_BUSY(sc)) {
1879                 if (mtx_sleep(&sc->flags, &sc->lock, PCATCH, "cxgbunin", 0)) {
1880                         rc = EINTR;
1881                         goto done;
1882                 }
1883         }
1884         if (IS_DOOMED(p)) {
1885                 rc = ENXIO;
1886                 goto done;
1887         }
1888         KASSERT(!IS_BUSY(sc), ("%s: controller busy.", __func__));
1889         SET_BUSY(sc);
1890         ADAPTER_UNLOCK(sc);
1891
1892         rc = cxgb_uninit_synchronized(p);
1893
1894         ADAPTER_LOCK(sc);
1895         KASSERT(IS_BUSY(sc), ("%s: controller not busy.", __func__));
1896         CLR_BUSY(sc);
1897         wakeup_one(&sc->flags);
1898 done:
1899         ADAPTER_UNLOCK(sc);
1900         return (rc);
1901 }
1902
1903 /*
1904  * Called on "ifconfig down", and from port_detach
1905  */
1906 static int
1907 cxgb_uninit_synchronized(struct port_info *pi)
1908 {
1909         struct adapter *sc = pi->adapter;
1910         struct ifnet *ifp = pi->ifp;
1911
1912         /*
1913          * taskqueue_drain may cause a deadlock if the adapter lock is held.
1914          */
1915         ADAPTER_LOCK_ASSERT_NOTOWNED(sc);
1916
1917         /*
1918          * Clear this port's bit from the open device map, and then drain all
1919          * the tasks that can access/manipulate this port's port_info or ifp.
1920          * We disable this port's interrupts here and so the the slow/ext
1921          * interrupt tasks won't be enqueued.  The tick task will continue to
1922          * be enqueued every second but the runs after this drain will not see
1923          * this port in the open device map.
1924          *
1925          * A well behaved task must take open_device_map into account and ignore
1926          * ports that are not open.
1927          */
1928         clrbit(&sc->open_device_map, pi->port_id);
1929         t3_port_intr_disable(sc, pi->port_id);
1930         taskqueue_drain(sc->tq, &sc->slow_intr_task);
1931         taskqueue_drain(sc->tq, &sc->ext_intr_task);
1932         taskqueue_drain(sc->tq, &sc->tick_task);
1933
1934         PORT_LOCK(pi);
1935         ifp->if_drv_flags &= ~(IFF_DRV_RUNNING | IFF_DRV_OACTIVE);
1936
1937         /* disable pause frames */
1938         t3_set_reg_field(sc, A_XGM_TX_CFG + pi->mac.offset, F_TXPAUSEEN, 0);
1939
1940         /* Reset RX FIFO HWM */
1941         t3_set_reg_field(sc, A_XGM_RXFIFO_CFG +  pi->mac.offset,
1942                          V_RXFIFOPAUSEHWM(M_RXFIFOPAUSEHWM), 0);
1943
1944         DELAY(100 * 1000);
1945
1946         /* Wait for TXFIFO empty */
1947         t3_wait_op_done(sc, A_XGM_TXFIFO_CFG + pi->mac.offset,
1948                         F_TXFIFO_EMPTY, 1, 20, 5);
1949
1950         DELAY(100 * 1000);
1951         t3_mac_disable(&pi->mac, MAC_DIRECTION_RX);
1952
1953
1954         pi->phy.ops->power_down(&pi->phy, 1);
1955
1956         PORT_UNLOCK(pi);
1957
1958         pi->link_config.link_ok = 0;
1959         t3_os_link_changed(sc, pi->port_id, 0, 0, 0, 0, 0);
1960
1961         if ((sc->open_device_map & PORT_MASK) == 0)
1962                 offload_close(&sc->tdev);
1963
1964         if (sc->open_device_map == 0)
1965                 cxgb_down(pi->adapter);
1966
1967         return (0);
1968 }
1969
1970 /*
1971  * Mark lro enabled or disabled in all qsets for this port
1972  */
1973 static int
1974 cxgb_set_lro(struct port_info *p, int enabled)
1975 {
1976         int i;
1977         struct adapter *adp = p->adapter;
1978         struct sge_qset *q;
1979
1980         for (i = 0; i < p->nqsets; i++) {
1981                 q = &adp->sge.qs[p->first_qset + i];
1982                 q->lro.enabled = (enabled != 0);
1983         }
1984         return (0);
1985 }
1986
1987 static int
1988 cxgb_ioctl(struct ifnet *ifp, unsigned long command, caddr_t data)
1989 {
1990         struct port_info *p = ifp->if_softc;
1991         struct adapter *sc = p->adapter;
1992         struct ifreq *ifr = (struct ifreq *)data;
1993         int flags, error = 0, mtu;
1994         uint32_t mask;
1995
1996         switch (command) {
1997         case SIOCSIFMTU:
1998                 ADAPTER_LOCK(sc);
1999                 error = IS_DOOMED(p) ? ENXIO : (IS_BUSY(sc) ? EBUSY : 0);
2000                 if (error) {
2001 fail:
2002                         ADAPTER_UNLOCK(sc);
2003                         return (error);
2004                 }
2005
2006                 mtu = ifr->ifr_mtu;
2007                 if ((mtu < ETHERMIN) || (mtu > ETHERMTU_JUMBO)) {
2008                         error = EINVAL;
2009                 } else {
2010                         ifp->if_mtu = mtu;
2011                         PORT_LOCK(p);
2012                         cxgb_update_mac_settings(p);
2013                         PORT_UNLOCK(p);
2014                 }
2015                 ADAPTER_UNLOCK(sc);
2016                 break;
2017         case SIOCSIFFLAGS:
2018                 ADAPTER_LOCK(sc);
2019                 if (IS_DOOMED(p)) {
2020                         error = ENXIO;
2021                         goto fail;
2022                 }
2023                 if (ifp->if_flags & IFF_UP) {
2024                         if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
2025                                 flags = p->if_flags;
2026                                 if (((ifp->if_flags ^ flags) & IFF_PROMISC) ||
2027                                     ((ifp->if_flags ^ flags) & IFF_ALLMULTI)) {
2028                                         if (IS_BUSY(sc)) {
2029                                                 error = EBUSY;
2030                                                 goto fail;
2031                                         }
2032                                         PORT_LOCK(p);
2033                                         cxgb_update_mac_settings(p);
2034                                         PORT_UNLOCK(p);
2035                                 }
2036                                 ADAPTER_UNLOCK(sc);
2037                         } else
2038                                 error = cxgb_init_locked(p);
2039                         p->if_flags = ifp->if_flags;
2040                 } else if (ifp->if_drv_flags & IFF_DRV_RUNNING)
2041                         error = cxgb_uninit_locked(p);
2042                 else
2043                         ADAPTER_UNLOCK(sc);
2044
2045                 ADAPTER_LOCK_ASSERT_NOTOWNED(sc);
2046                 break;
2047         case SIOCADDMULTI:
2048         case SIOCDELMULTI:
2049                 ADAPTER_LOCK(sc);
2050                 error = IS_DOOMED(p) ? ENXIO : (IS_BUSY(sc) ? EBUSY : 0);
2051                 if (error)
2052                         goto fail;
2053
2054                 if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
2055                         PORT_LOCK(p);
2056                         cxgb_update_mac_settings(p);
2057                         PORT_UNLOCK(p);
2058                 }
2059                 ADAPTER_UNLOCK(sc);
2060
2061                 break;
2062         case SIOCSIFCAP:
2063                 ADAPTER_LOCK(sc);
2064                 error = IS_DOOMED(p) ? ENXIO : (IS_BUSY(sc) ? EBUSY : 0);
2065                 if (error)
2066                         goto fail;
2067
2068                 mask = ifr->ifr_reqcap ^ ifp->if_capenable;
2069                 if (mask & IFCAP_TXCSUM) {
2070                         ifp->if_capenable ^= IFCAP_TXCSUM;
2071                         ifp->if_hwassist ^= (CSUM_TCP | CSUM_UDP | CSUM_IP);
2072
2073                         if (IFCAP_TSO & ifp->if_capenable &&
2074                             !(IFCAP_TXCSUM & ifp->if_capenable)) {
2075                                 ifp->if_capenable &= ~IFCAP_TSO;
2076                                 ifp->if_hwassist &= ~CSUM_TSO;
2077                                 if_printf(ifp,
2078                                     "tso disabled due to -txcsum.\n");
2079                         }
2080                 }
2081                 if (mask & IFCAP_RXCSUM)
2082                         ifp->if_capenable ^= IFCAP_RXCSUM;
2083                 if (mask & IFCAP_TSO4) {
2084                         ifp->if_capenable ^= IFCAP_TSO4;
2085
2086                         if (IFCAP_TSO & ifp->if_capenable) {
2087                                 if (IFCAP_TXCSUM & ifp->if_capenable)
2088                                         ifp->if_hwassist |= CSUM_TSO;
2089                                 else {
2090                                         ifp->if_capenable &= ~IFCAP_TSO;
2091                                         ifp->if_hwassist &= ~CSUM_TSO;
2092                                         if_printf(ifp,
2093                                             "enable txcsum first.\n");
2094                                         error = EAGAIN;
2095                                 }
2096                         } else
2097                                 ifp->if_hwassist &= ~CSUM_TSO;
2098                 }
2099                 if (mask & IFCAP_LRO) {
2100                         ifp->if_capenable ^= IFCAP_LRO;
2101
2102                         /* Safe to do this even if cxgb_up not called yet */
2103                         cxgb_set_lro(p, ifp->if_capenable & IFCAP_LRO);
2104                 }
2105                 if (mask & IFCAP_VLAN_HWTAGGING) {
2106                         ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING;
2107                         if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
2108                                 PORT_LOCK(p);
2109                                 cxgb_update_mac_settings(p);
2110                                 PORT_UNLOCK(p);
2111                         }
2112                 }
2113                 if (mask & IFCAP_VLAN_MTU) {
2114                         ifp->if_capenable ^= IFCAP_VLAN_MTU;
2115                         if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
2116                                 PORT_LOCK(p);
2117                                 cxgb_update_mac_settings(p);
2118                                 PORT_UNLOCK(p);
2119                         }
2120                 }
2121                 if (mask & IFCAP_VLAN_HWTSO)
2122                         ifp->if_capenable ^= IFCAP_VLAN_HWTSO;
2123                 if (mask & IFCAP_VLAN_HWCSUM)
2124                         ifp->if_capenable ^= IFCAP_VLAN_HWCSUM;
2125
2126 #ifdef VLAN_CAPABILITIES
2127                 VLAN_CAPABILITIES(ifp);
2128 #endif
2129                 ADAPTER_UNLOCK(sc);
2130                 break;
2131         case SIOCSIFMEDIA:
2132         case SIOCGIFMEDIA:
2133                 error = ifmedia_ioctl(ifp, ifr, &p->media, command);
2134                 break;
2135         default:
2136                 error = ether_ioctl(ifp, command, data);
2137         }
2138
2139         return (error);
2140 }
2141
2142 static int
2143 cxgb_media_change(struct ifnet *ifp)
2144 {
2145         return (EOPNOTSUPP);
2146 }
2147
2148 /*
2149  * Translates phy->modtype to the correct Ethernet media subtype.
2150  */
2151 static int
2152 cxgb_ifm_type(int mod)
2153 {
2154         switch (mod) {
2155         case phy_modtype_sr:
2156                 return (IFM_10G_SR);
2157         case phy_modtype_lr:
2158                 return (IFM_10G_LR);
2159         case phy_modtype_lrm:
2160                 return (IFM_10G_LRM);
2161         case phy_modtype_twinax:
2162                 return (IFM_10G_TWINAX);
2163         case phy_modtype_twinax_long:
2164                 return (IFM_10G_TWINAX_LONG);
2165         case phy_modtype_none:
2166                 return (IFM_NONE);
2167         case phy_modtype_unknown:
2168                 return (IFM_UNKNOWN);
2169         }
2170
2171         KASSERT(0, ("%s: modtype %d unknown", __func__, mod));
2172         return (IFM_UNKNOWN);
2173 }
2174
2175 /*
2176  * Rebuilds the ifmedia list for this port, and sets the current media.
2177  */
2178 static void
2179 cxgb_build_medialist(struct port_info *p)
2180 {
2181         struct cphy *phy = &p->phy;
2182         struct ifmedia *media = &p->media;
2183         int mod = phy->modtype;
2184         int m = IFM_ETHER | IFM_FDX;
2185
2186         PORT_LOCK(p);
2187
2188         ifmedia_removeall(media);
2189         if (phy->caps & SUPPORTED_TP && phy->caps & SUPPORTED_Autoneg) {
2190                 /* Copper (RJ45) */
2191
2192                 if (phy->caps & SUPPORTED_10000baseT_Full)
2193                         ifmedia_add(media, m | IFM_10G_T, mod, NULL);
2194
2195                 if (phy->caps & SUPPORTED_1000baseT_Full)
2196                         ifmedia_add(media, m | IFM_1000_T, mod, NULL);
2197
2198                 if (phy->caps & SUPPORTED_100baseT_Full)
2199                         ifmedia_add(media, m | IFM_100_TX, mod, NULL);
2200
2201                 if (phy->caps & SUPPORTED_10baseT_Full)
2202                         ifmedia_add(media, m | IFM_10_T, mod, NULL);
2203
2204                 ifmedia_add(media, IFM_ETHER | IFM_AUTO, mod, NULL);
2205                 ifmedia_set(media, IFM_ETHER | IFM_AUTO);
2206
2207         } else if (phy->caps & SUPPORTED_TP) {
2208                 /* Copper (CX4) */
2209
2210                 KASSERT(phy->caps & SUPPORTED_10000baseT_Full,
2211                         ("%s: unexpected cap 0x%x", __func__, phy->caps));
2212
2213                 ifmedia_add(media, m | IFM_10G_CX4, mod, NULL);
2214                 ifmedia_set(media, m | IFM_10G_CX4);
2215
2216         } else if (phy->caps & SUPPORTED_FIBRE &&
2217                    phy->caps & SUPPORTED_10000baseT_Full) {
2218                 /* 10G optical (but includes SFP+ twinax) */
2219
2220                 m |= cxgb_ifm_type(mod);
2221                 if (IFM_SUBTYPE(m) == IFM_NONE)
2222                         m &= ~IFM_FDX;
2223
2224                 ifmedia_add(media, m, mod, NULL);
2225                 ifmedia_set(media, m);
2226
2227         } else if (phy->caps & SUPPORTED_FIBRE &&
2228                    phy->caps & SUPPORTED_1000baseT_Full) {
2229                 /* 1G optical */
2230
2231                 /* XXX: Lie and claim to be SX, could actually be any 1G-X */
2232                 ifmedia_add(media, m | IFM_1000_SX, mod, NULL);
2233                 ifmedia_set(media, m | IFM_1000_SX);
2234
2235         } else {
2236                 KASSERT(0, ("%s: don't know how to handle 0x%x.", __func__,
2237                             phy->caps));
2238         }
2239
2240         PORT_UNLOCK(p);
2241 }
2242
2243 static void
2244 cxgb_media_status(struct ifnet *ifp, struct ifmediareq *ifmr)
2245 {
2246         struct port_info *p = ifp->if_softc;
2247         struct ifmedia_entry *cur = p->media.ifm_cur;
2248         int speed = p->link_config.speed;
2249
2250         if (cur->ifm_data != p->phy.modtype) {
2251                 cxgb_build_medialist(p);
2252                 cur = p->media.ifm_cur;
2253         }
2254
2255         ifmr->ifm_status = IFM_AVALID;
2256         if (!p->link_config.link_ok)
2257                 return;
2258
2259         ifmr->ifm_status |= IFM_ACTIVE;
2260
2261         /*
2262          * active and current will differ iff current media is autoselect.  That
2263          * can happen only for copper RJ45.
2264          */
2265         if (IFM_SUBTYPE(cur->ifm_media) != IFM_AUTO)
2266                 return;
2267         KASSERT(p->phy.caps & SUPPORTED_TP && p->phy.caps & SUPPORTED_Autoneg,
2268                 ("%s: unexpected PHY caps 0x%x", __func__, p->phy.caps));
2269
2270         ifmr->ifm_active = IFM_ETHER | IFM_FDX;
2271         if (speed == SPEED_10000)
2272                 ifmr->ifm_active |= IFM_10G_T;
2273         else if (speed == SPEED_1000)
2274                 ifmr->ifm_active |= IFM_1000_T;
2275         else if (speed == SPEED_100)
2276                 ifmr->ifm_active |= IFM_100_TX;
2277         else if (speed == SPEED_10)
2278                 ifmr->ifm_active |= IFM_10_T;
2279         else
2280                 KASSERT(0, ("%s: link up but speed unknown (%u)", __func__,
2281                             speed));
2282 }
2283
2284 static void
2285 cxgb_async_intr(void *data)
2286 {
2287         adapter_t *sc = data;
2288
2289         if (cxgb_debug)
2290                 device_printf(sc->dev, "cxgb_async_intr\n");
2291         /*
2292          * May need to sleep - defer to taskqueue
2293          */
2294         taskqueue_enqueue(sc->tq, &sc->slow_intr_task);
2295 }
2296
2297 static void
2298 cxgb_ext_intr_handler(void *arg, int count)
2299 {
2300         adapter_t *sc = (adapter_t *)arg;
2301
2302         if (cxgb_debug)
2303                 printf("cxgb_ext_intr_handler\n");
2304
2305         t3_phy_intr_handler(sc);
2306
2307         /* Now reenable external interrupts */
2308         ADAPTER_LOCK(sc);
2309         if (sc->slow_intr_mask) {
2310                 sc->slow_intr_mask |= F_T3DBG;
2311                 t3_write_reg(sc, A_PL_INT_CAUSE0, F_T3DBG);
2312                 t3_write_reg(sc, A_PL_INT_ENABLE0, sc->slow_intr_mask);
2313         }
2314         ADAPTER_UNLOCK(sc);
2315 }
2316
2317 static inline int
2318 link_poll_needed(struct port_info *p)
2319 {
2320         struct cphy *phy = &p->phy;
2321
2322         if (phy->caps & POLL_LINK_1ST_TIME) {
2323                 p->phy.caps &= ~POLL_LINK_1ST_TIME;
2324                 return (1);
2325         }
2326
2327         return (p->link_fault || !(phy->caps & SUPPORTED_LINK_IRQ));
2328 }
2329
2330 static void
2331 check_link_status(adapter_t *sc)
2332 {
2333         int i;
2334
2335         for (i = 0; i < (sc)->params.nports; ++i) {
2336                 struct port_info *p = &sc->port[i];
2337
2338                 if (!isset(&sc->open_device_map, p->port_id))
2339                         continue;
2340
2341                 if (link_poll_needed(p))
2342                         t3_link_changed(sc, i);
2343         }
2344 }
2345
2346 static void
2347 check_t3b2_mac(struct adapter *sc)
2348 {
2349         int i;
2350
2351         if (sc->flags & CXGB_SHUTDOWN)
2352                 return;
2353
2354         for_each_port(sc, i) {
2355                 struct port_info *p = &sc->port[i];
2356                 int status;
2357 #ifdef INVARIANTS
2358                 struct ifnet *ifp = p->ifp;
2359 #endif          
2360
2361                 if (!isset(&sc->open_device_map, p->port_id) || p->link_fault ||
2362                     !p->link_config.link_ok)
2363                         continue;
2364
2365                 KASSERT(ifp->if_drv_flags & IFF_DRV_RUNNING,
2366                         ("%s: state mismatch (drv_flags %x, device_map %x)",
2367                          __func__, ifp->if_drv_flags, sc->open_device_map));
2368
2369                 PORT_LOCK(p);
2370                 status = t3b2_mac_watchdog_task(&p->mac);
2371                 if (status == 1)
2372                         p->mac.stats.num_toggled++;
2373                 else if (status == 2) {
2374                         struct cmac *mac = &p->mac;
2375
2376                         cxgb_update_mac_settings(p);
2377                         t3_link_start(&p->phy, mac, &p->link_config);
2378                         t3_mac_enable(mac, MAC_DIRECTION_RX | MAC_DIRECTION_TX);
2379                         t3_port_intr_enable(sc, p->port_id);
2380                         p->mac.stats.num_resets++;
2381                 }
2382                 PORT_UNLOCK(p);
2383         }
2384 }
2385
2386 static void
2387 cxgb_tick(void *arg)
2388 {
2389         adapter_t *sc = (adapter_t *)arg;
2390
2391         if (sc->flags & CXGB_SHUTDOWN)
2392                 return;
2393
2394         taskqueue_enqueue(sc->tq, &sc->tick_task);      
2395         callout_reset(&sc->cxgb_tick_ch, CXGB_TICKS(sc), cxgb_tick, sc);
2396 }
2397
2398 static void
2399 cxgb_tick_handler(void *arg, int count)
2400 {
2401         adapter_t *sc = (adapter_t *)arg;
2402         const struct adapter_params *p = &sc->params;
2403         int i;
2404         uint32_t cause, reset;
2405
2406         if (sc->flags & CXGB_SHUTDOWN || !(sc->flags & FULL_INIT_DONE))
2407                 return;
2408
2409         check_link_status(sc);
2410
2411         if (p->rev == T3_REV_B2 && p->nports < 4 && sc->open_device_map) 
2412                 check_t3b2_mac(sc);
2413
2414         cause = t3_read_reg(sc, A_SG_INT_CAUSE) & (F_RSPQSTARVE | F_FLEMPTY);
2415         if (cause) {
2416                 struct sge_qset *qs = &sc->sge.qs[0];
2417                 uint32_t mask, v;
2418
2419                 v = t3_read_reg(sc, A_SG_RSPQ_FL_STATUS) & ~0xff00;
2420
2421                 mask = 1;
2422                 for (i = 0; i < SGE_QSETS; i++) {
2423                         if (v & mask)
2424                                 qs[i].rspq.starved++;
2425                         mask <<= 1;
2426                 }
2427
2428                 mask <<= SGE_QSETS; /* skip RSPQXDISABLED */
2429
2430                 for (i = 0; i < SGE_QSETS * 2; i++) {
2431                         if (v & mask) {
2432                                 qs[i / 2].fl[i % 2].empty++;
2433                         }
2434                         mask <<= 1;
2435                 }
2436
2437                 /* clear */
2438                 t3_write_reg(sc, A_SG_RSPQ_FL_STATUS, v);
2439                 t3_write_reg(sc, A_SG_INT_CAUSE, cause);
2440         }
2441
2442         for (i = 0; i < sc->params.nports; i++) {
2443                 struct port_info *pi = &sc->port[i];
2444                 struct ifnet *ifp = pi->ifp;
2445                 struct cmac *mac = &pi->mac;
2446                 struct mac_stats *mstats = &mac->stats;
2447                 int drops, j;
2448
2449                 if (!isset(&sc->open_device_map, pi->port_id))
2450                         continue;
2451
2452                 PORT_LOCK(pi);
2453                 t3_mac_update_stats(mac);
2454                 PORT_UNLOCK(pi);
2455
2456                 ifp->if_opackets = mstats->tx_frames;
2457                 ifp->if_ipackets = mstats->rx_frames;
2458                 ifp->if_obytes = mstats->tx_octets;
2459                 ifp->if_ibytes = mstats->rx_octets;
2460                 ifp->if_omcasts = mstats->tx_mcast_frames;
2461                 ifp->if_imcasts = mstats->rx_mcast_frames;
2462                 ifp->if_collisions = mstats->tx_total_collisions;
2463                 ifp->if_iqdrops = mstats->rx_cong_drops;
2464
2465                 drops = 0;
2466                 for (j = pi->first_qset; j < pi->first_qset + pi->nqsets; j++)
2467                         drops += sc->sge.qs[j].txq[TXQ_ETH].txq_mr->br_drops;
2468                 ifp->if_snd.ifq_drops = drops;
2469
2470                 ifp->if_oerrors =
2471                     mstats->tx_excess_collisions +
2472                     mstats->tx_underrun +
2473                     mstats->tx_len_errs +
2474                     mstats->tx_mac_internal_errs +
2475                     mstats->tx_excess_deferral +
2476                     mstats->tx_fcs_errs;
2477                 ifp->if_ierrors =
2478                     mstats->rx_jabber +
2479                     mstats->rx_data_errs +
2480                     mstats->rx_sequence_errs +
2481                     mstats->rx_runt + 
2482                     mstats->rx_too_long +
2483                     mstats->rx_mac_internal_errs +
2484                     mstats->rx_short +
2485                     mstats->rx_fcs_errs;
2486
2487                 if (mac->multiport)
2488                         continue;
2489
2490                 /* Count rx fifo overflows, once per second */
2491                 cause = t3_read_reg(sc, A_XGM_INT_CAUSE + mac->offset);
2492                 reset = 0;
2493                 if (cause & F_RXFIFO_OVERFLOW) {
2494                         mac->stats.rx_fifo_ovfl++;
2495                         reset |= F_RXFIFO_OVERFLOW;
2496                 }
2497                 t3_write_reg(sc, A_XGM_INT_CAUSE + mac->offset, reset);
2498         }
2499 }
2500
2501 static void
2502 touch_bars(device_t dev)
2503 {
2504         /*
2505          * Don't enable yet
2506          */
2507 #if !defined(__LP64__) && 0
2508         u32 v;
2509
2510         pci_read_config_dword(pdev, PCI_BASE_ADDRESS_1, &v);
2511         pci_write_config_dword(pdev, PCI_BASE_ADDRESS_1, v);
2512         pci_read_config_dword(pdev, PCI_BASE_ADDRESS_3, &v);
2513         pci_write_config_dword(pdev, PCI_BASE_ADDRESS_3, v);
2514         pci_read_config_dword(pdev, PCI_BASE_ADDRESS_5, &v);
2515         pci_write_config_dword(pdev, PCI_BASE_ADDRESS_5, v);
2516 #endif
2517 }
2518
2519 static int
2520 set_eeprom(struct port_info *pi, const uint8_t *data, int len, int offset)
2521 {
2522         uint8_t *buf;
2523         int err = 0;
2524         u32 aligned_offset, aligned_len, *p;
2525         struct adapter *adapter = pi->adapter;
2526
2527
2528         aligned_offset = offset & ~3;
2529         aligned_len = (len + (offset & 3) + 3) & ~3;
2530
2531         if (aligned_offset != offset || aligned_len != len) {
2532                 buf = malloc(aligned_len, M_DEVBUF, M_WAITOK|M_ZERO);              
2533                 if (!buf)
2534                         return (ENOMEM);
2535                 err = t3_seeprom_read(adapter, aligned_offset, (u32 *)buf);
2536                 if (!err && aligned_len > 4)
2537                         err = t3_seeprom_read(adapter,
2538                                               aligned_offset + aligned_len - 4,
2539                                               (u32 *)&buf[aligned_len - 4]);
2540                 if (err)
2541                         goto out;
2542                 memcpy(buf + (offset & 3), data, len);
2543         } else
2544                 buf = (uint8_t *)(uintptr_t)data;
2545
2546         err = t3_seeprom_wp(adapter, 0);
2547         if (err)
2548                 goto out;
2549
2550         for (p = (u32 *)buf; !err && aligned_len; aligned_len -= 4, p++) {
2551                 err = t3_seeprom_write(adapter, aligned_offset, *p);
2552                 aligned_offset += 4;
2553         }
2554
2555         if (!err)
2556                 err = t3_seeprom_wp(adapter, 1);
2557 out:
2558         if (buf != data)
2559                 free(buf, M_DEVBUF);
2560         return err;
2561 }
2562
2563
2564 static int
2565 in_range(int val, int lo, int hi)
2566 {
2567         return val < 0 || (val <= hi && val >= lo);
2568 }
2569
2570 static int
2571 cxgb_extension_open(struct cdev *dev, int flags, int fmp, struct thread *td)
2572 {
2573        return (0);
2574 }
2575
2576 static int
2577 cxgb_extension_close(struct cdev *dev, int flags, int fmt, struct thread *td)
2578 {
2579        return (0);
2580 }
2581
2582 static int
2583 cxgb_extension_ioctl(struct cdev *dev, unsigned long cmd, caddr_t data,
2584     int fflag, struct thread *td)
2585 {
2586         int mmd, error = 0;
2587         struct port_info *pi = dev->si_drv1;
2588         adapter_t *sc = pi->adapter;
2589
2590 #ifdef PRIV_SUPPORTED   
2591         if (priv_check(td, PRIV_DRIVER)) {
2592                 if (cxgb_debug) 
2593                         printf("user does not have access to privileged ioctls\n");
2594                 return (EPERM);
2595         }
2596 #else
2597         if (suser(td)) {
2598                 if (cxgb_debug)
2599                         printf("user does not have access to privileged ioctls\n");
2600                 return (EPERM);
2601         }
2602 #endif
2603         
2604         switch (cmd) {
2605         case CHELSIO_GET_MIIREG: {
2606                 uint32_t val;
2607                 struct cphy *phy = &pi->phy;
2608                 struct ch_mii_data *mid = (struct ch_mii_data *)data;
2609                 
2610                 if (!phy->mdio_read)
2611                         return (EOPNOTSUPP);
2612                 if (is_10G(sc)) {
2613                         mmd = mid->phy_id >> 8;
2614                         if (!mmd)
2615                                 mmd = MDIO_DEV_PCS;
2616                         else if (mmd > MDIO_DEV_VEND2)
2617                                 return (EINVAL);
2618
2619                         error = phy->mdio_read(sc, mid->phy_id & 0x1f, mmd,
2620                                              mid->reg_num, &val);
2621                 } else
2622                         error = phy->mdio_read(sc, mid->phy_id & 0x1f, 0,
2623                                              mid->reg_num & 0x1f, &val);
2624                 if (error == 0)
2625                         mid->val_out = val;
2626                 break;
2627         }
2628         case CHELSIO_SET_MIIREG: {
2629                 struct cphy *phy = &pi->phy;
2630                 struct ch_mii_data *mid = (struct ch_mii_data *)data;
2631
2632                 if (!phy->mdio_write)
2633                         return (EOPNOTSUPP);
2634                 if (is_10G(sc)) {
2635                         mmd = mid->phy_id >> 8;
2636                         if (!mmd)
2637                                 mmd = MDIO_DEV_PCS;
2638                         else if (mmd > MDIO_DEV_VEND2)
2639                                 return (EINVAL);
2640                         
2641                         error = phy->mdio_write(sc, mid->phy_id & 0x1f,
2642                                               mmd, mid->reg_num, mid->val_in);
2643                 } else
2644                         error = phy->mdio_write(sc, mid->phy_id & 0x1f, 0,
2645                                               mid->reg_num & 0x1f,
2646                                               mid->val_in);
2647                 break;
2648         }
2649         case CHELSIO_SETREG: {
2650                 struct ch_reg *edata = (struct ch_reg *)data;
2651                 if ((edata->addr & 0x3) != 0 || edata->addr >= sc->mmio_len)
2652                         return (EFAULT);
2653                 t3_write_reg(sc, edata->addr, edata->val);
2654                 break;
2655         }
2656         case CHELSIO_GETREG: {
2657                 struct ch_reg *edata = (struct ch_reg *)data;
2658                 if ((edata->addr & 0x3) != 0 || edata->addr >= sc->mmio_len)
2659                         return (EFAULT);
2660                 edata->val = t3_read_reg(sc, edata->addr);
2661                 break;
2662         }
2663         case CHELSIO_GET_SGE_CONTEXT: {
2664                 struct ch_cntxt *ecntxt = (struct ch_cntxt *)data;
2665                 mtx_lock_spin(&sc->sge.reg_lock);
2666                 switch (ecntxt->cntxt_type) {
2667                 case CNTXT_TYPE_EGRESS:
2668                         error = -t3_sge_read_ecntxt(sc, ecntxt->cntxt_id,
2669                             ecntxt->data);
2670                         break;
2671                 case CNTXT_TYPE_FL:
2672                         error = -t3_sge_read_fl(sc, ecntxt->cntxt_id,
2673                             ecntxt->data);
2674                         break;
2675                 case CNTXT_TYPE_RSP:
2676                         error = -t3_sge_read_rspq(sc, ecntxt->cntxt_id,
2677                             ecntxt->data);
2678                         break;
2679                 case CNTXT_TYPE_CQ:
2680                         error = -t3_sge_read_cq(sc, ecntxt->cntxt_id,
2681                             ecntxt->data);
2682                         break;
2683                 default:
2684                         error = EINVAL;
2685                         break;
2686                 }
2687                 mtx_unlock_spin(&sc->sge.reg_lock);
2688                 break;
2689         }
2690         case CHELSIO_GET_SGE_DESC: {
2691                 struct ch_desc *edesc = (struct ch_desc *)data;
2692                 int ret;
2693                 if (edesc->queue_num >= SGE_QSETS * 6)
2694                         return (EINVAL);
2695                 ret = t3_get_desc(&sc->sge.qs[edesc->queue_num / 6],
2696                     edesc->queue_num % 6, edesc->idx, edesc->data);
2697                 if (ret < 0)
2698                         return (EINVAL);
2699                 edesc->size = ret;
2700                 break;
2701         }
2702         case CHELSIO_GET_QSET_PARAMS: {
2703                 struct qset_params *q;
2704                 struct ch_qset_params *t = (struct ch_qset_params *)data;
2705                 int q1 = pi->first_qset;
2706                 int nqsets = pi->nqsets;
2707                 int i;
2708
2709                 if (t->qset_idx >= nqsets)
2710                         return EINVAL;
2711
2712                 i = q1 + t->qset_idx;
2713                 q = &sc->params.sge.qset[i];
2714                 t->rspq_size   = q->rspq_size;
2715                 t->txq_size[0] = q->txq_size[0];
2716                 t->txq_size[1] = q->txq_size[1];
2717                 t->txq_size[2] = q->txq_size[2];
2718                 t->fl_size[0]  = q->fl_size;
2719                 t->fl_size[1]  = q->jumbo_size;
2720                 t->polling     = q->polling;
2721                 t->lro         = q->lro;
2722                 t->intr_lat    = q->coalesce_usecs;
2723                 t->cong_thres  = q->cong_thres;
2724                 t->qnum        = i;
2725
2726                 if ((sc->flags & FULL_INIT_DONE) == 0)
2727                         t->vector = 0;
2728                 else if (sc->flags & USING_MSIX)
2729                         t->vector = rman_get_start(sc->msix_irq_res[i]);
2730                 else
2731                         t->vector = rman_get_start(sc->irq_res);
2732
2733                 break;
2734         }
2735         case CHELSIO_GET_QSET_NUM: {
2736                 struct ch_reg *edata = (struct ch_reg *)data;
2737                 edata->val = pi->nqsets;
2738                 break;
2739         }
2740         case CHELSIO_LOAD_FW: {
2741                 uint8_t *fw_data;
2742                 uint32_t vers;
2743                 struct ch_mem_range *t = (struct ch_mem_range *)data;
2744
2745                 /*
2746                  * You're allowed to load a firmware only before FULL_INIT_DONE
2747                  *
2748                  * FW_UPTODATE is also set so the rest of the initialization
2749                  * will not overwrite what was loaded here.  This gives you the
2750                  * flexibility to load any firmware (and maybe shoot yourself in
2751                  * the foot).
2752                  */
2753
2754                 ADAPTER_LOCK(sc);
2755                 if (sc->open_device_map || sc->flags & FULL_INIT_DONE) {
2756                         ADAPTER_UNLOCK(sc);
2757                         return (EBUSY);
2758                 }
2759
2760                 fw_data = malloc(t->len, M_DEVBUF, M_NOWAIT);
2761                 if (!fw_data)
2762                         error = ENOMEM;
2763                 else
2764                         error = copyin(t->buf, fw_data, t->len);
2765
2766                 if (!error)
2767                         error = -t3_load_fw(sc, fw_data, t->len);
2768
2769                 if (t3_get_fw_version(sc, &vers) == 0) {
2770                         snprintf(&sc->fw_version[0], sizeof(sc->fw_version),
2771                             "%d.%d.%d", G_FW_VERSION_MAJOR(vers),
2772                             G_FW_VERSION_MINOR(vers), G_FW_VERSION_MICRO(vers));
2773                 }
2774
2775                 if (!error)
2776                         sc->flags |= FW_UPTODATE;
2777
2778                 free(fw_data, M_DEVBUF);
2779                 ADAPTER_UNLOCK(sc);
2780                 break;
2781         }
2782         case CHELSIO_LOAD_BOOT: {
2783                 uint8_t *boot_data;
2784                 struct ch_mem_range *t = (struct ch_mem_range *)data;
2785
2786                 boot_data = malloc(t->len, M_DEVBUF, M_NOWAIT);
2787                 if (!boot_data)
2788                         return ENOMEM;
2789
2790                 error = copyin(t->buf, boot_data, t->len);
2791                 if (!error)
2792                         error = -t3_load_boot(sc, boot_data, t->len);
2793
2794                 free(boot_data, M_DEVBUF);
2795                 break;
2796         }
2797         case CHELSIO_GET_PM: {
2798                 struct ch_pm *m = (struct ch_pm *)data;
2799                 struct tp_params *p = &sc->params.tp;
2800
2801                 if (!is_offload(sc))
2802                         return (EOPNOTSUPP);
2803
2804                 m->tx_pg_sz = p->tx_pg_size;
2805                 m->tx_num_pg = p->tx_num_pgs;
2806                 m->rx_pg_sz  = p->rx_pg_size;
2807                 m->rx_num_pg = p->rx_num_pgs;
2808                 m->pm_total  = p->pmtx_size + p->chan_rx_size * p->nchan;
2809
2810                 break;
2811         }
2812         case CHELSIO_SET_PM: {
2813                 struct ch_pm *m = (struct ch_pm *)data;
2814                 struct tp_params *p = &sc->params.tp;
2815
2816                 if (!is_offload(sc))
2817                         return (EOPNOTSUPP);
2818                 if (sc->flags & FULL_INIT_DONE)
2819                         return (EBUSY);
2820
2821                 if (!m->rx_pg_sz || (m->rx_pg_sz & (m->rx_pg_sz - 1)) ||
2822                     !m->tx_pg_sz || (m->tx_pg_sz & (m->tx_pg_sz - 1)))
2823                         return (EINVAL);        /* not power of 2 */
2824                 if (!(m->rx_pg_sz & 0x14000))
2825                         return (EINVAL);        /* not 16KB or 64KB */
2826                 if (!(m->tx_pg_sz & 0x1554000))
2827                         return (EINVAL);
2828                 if (m->tx_num_pg == -1)
2829                         m->tx_num_pg = p->tx_num_pgs;
2830                 if (m->rx_num_pg == -1)
2831                         m->rx_num_pg = p->rx_num_pgs;
2832                 if (m->tx_num_pg % 24 || m->rx_num_pg % 24)
2833                         return (EINVAL);
2834                 if (m->rx_num_pg * m->rx_pg_sz > p->chan_rx_size ||
2835                     m->tx_num_pg * m->tx_pg_sz > p->chan_tx_size)
2836                         return (EINVAL);
2837
2838                 p->rx_pg_size = m->rx_pg_sz;
2839                 p->tx_pg_size = m->tx_pg_sz;
2840                 p->rx_num_pgs = m->rx_num_pg;
2841                 p->tx_num_pgs = m->tx_num_pg;
2842                 break;
2843         }
2844         case CHELSIO_SETMTUTAB: {
2845                 struct ch_mtus *m = (struct ch_mtus *)data;
2846                 int i;
2847                 
2848                 if (!is_offload(sc))
2849                         return (EOPNOTSUPP);
2850                 if (offload_running(sc))
2851                         return (EBUSY);
2852                 if (m->nmtus != NMTUS)
2853                         return (EINVAL);
2854                 if (m->mtus[0] < 81)         /* accommodate SACK */
2855                         return (EINVAL);
2856                 
2857                 /*
2858                  * MTUs must be in ascending order
2859                  */
2860                 for (i = 1; i < NMTUS; ++i)
2861                         if (m->mtus[i] < m->mtus[i - 1])
2862                                 return (EINVAL);
2863
2864                 memcpy(sc->params.mtus, m->mtus, sizeof(sc->params.mtus));
2865                 break;
2866         }
2867         case CHELSIO_GETMTUTAB: {
2868                 struct ch_mtus *m = (struct ch_mtus *)data;
2869
2870                 if (!is_offload(sc))
2871                         return (EOPNOTSUPP);
2872
2873                 memcpy(m->mtus, sc->params.mtus, sizeof(m->mtus));
2874                 m->nmtus = NMTUS;
2875                 break;
2876         }
2877         case CHELSIO_GET_MEM: {
2878                 struct ch_mem_range *t = (struct ch_mem_range *)data;
2879                 struct mc7 *mem;
2880                 uint8_t *useraddr;
2881                 u64 buf[32];
2882
2883                 /*
2884                  * Use these to avoid modifying len/addr in the the return
2885                  * struct
2886                  */
2887                 uint32_t len = t->len, addr = t->addr;
2888
2889                 if (!is_offload(sc))
2890                         return (EOPNOTSUPP);
2891                 if (!(sc->flags & FULL_INIT_DONE))
2892                         return (EIO);         /* need the memory controllers */
2893                 if ((addr & 0x7) || (len & 0x7))
2894                         return (EINVAL);
2895                 if (t->mem_id == MEM_CM)
2896                         mem = &sc->cm;
2897                 else if (t->mem_id == MEM_PMRX)
2898                         mem = &sc->pmrx;
2899                 else if (t->mem_id == MEM_PMTX)
2900                         mem = &sc->pmtx;
2901                 else
2902                         return (EINVAL);
2903
2904                 /*
2905                  * Version scheme:
2906                  * bits 0..9: chip version
2907                  * bits 10..15: chip revision
2908                  */
2909                 t->version = 3 | (sc->params.rev << 10);
2910                 
2911                 /*
2912                  * Read 256 bytes at a time as len can be large and we don't
2913                  * want to use huge intermediate buffers.
2914                  */
2915                 useraddr = (uint8_t *)t->buf; 
2916                 while (len) {
2917                         unsigned int chunk = min(len, sizeof(buf));
2918
2919                         error = t3_mc7_bd_read(mem, addr / 8, chunk / 8, buf);
2920                         if (error)
2921                                 return (-error);
2922                         if (copyout(buf, useraddr, chunk))
2923                                 return (EFAULT);
2924                         useraddr += chunk;
2925                         addr += chunk;
2926                         len -= chunk;
2927                 }
2928                 break;
2929         }
2930         case CHELSIO_READ_TCAM_WORD: {
2931                 struct ch_tcam_word *t = (struct ch_tcam_word *)data;
2932
2933                 if (!is_offload(sc))
2934                         return (EOPNOTSUPP);
2935                 if (!(sc->flags & FULL_INIT_DONE))
2936                         return (EIO);         /* need MC5 */            
2937                 return -t3_read_mc5_range(&sc->mc5, t->addr, 1, t->buf);
2938                 break;
2939         }
2940         case CHELSIO_SET_TRACE_FILTER: {
2941                 struct ch_trace *t = (struct ch_trace *)data;
2942                 const struct trace_params *tp;
2943
2944                 tp = (const struct trace_params *)&t->sip;
2945                 if (t->config_tx)
2946                         t3_config_trace_filter(sc, tp, 0, t->invert_match,
2947                                                t->trace_tx);
2948                 if (t->config_rx)
2949                         t3_config_trace_filter(sc, tp, 1, t->invert_match,
2950                                                t->trace_rx);
2951                 break;
2952         }
2953         case CHELSIO_SET_PKTSCHED: {
2954                 struct ch_pktsched_params *p = (struct ch_pktsched_params *)data;
2955                 if (sc->open_device_map == 0)
2956                         return (EAGAIN);
2957                 send_pktsched_cmd(sc, p->sched, p->idx, p->min, p->max,
2958                     p->binding);
2959                 break;
2960         }
2961         case CHELSIO_IFCONF_GETREGS: {
2962                 struct ch_ifconf_regs *regs = (struct ch_ifconf_regs *)data;
2963                 int reglen = cxgb_get_regs_len();
2964                 uint8_t *buf = malloc(reglen, M_DEVBUF, M_NOWAIT);
2965                 if (buf == NULL) {
2966                         return (ENOMEM);
2967                 }
2968                 if (regs->len > reglen)
2969                         regs->len = reglen;
2970                 else if (regs->len < reglen)
2971                         error = ENOBUFS;
2972
2973                 if (!error) {
2974                         cxgb_get_regs(sc, regs, buf);
2975                         error = copyout(buf, regs->data, reglen);
2976                 }
2977                 free(buf, M_DEVBUF);
2978
2979                 break;
2980         }
2981         case CHELSIO_SET_HW_SCHED: {
2982                 struct ch_hw_sched *t = (struct ch_hw_sched *)data;
2983                 unsigned int ticks_per_usec = core_ticks_per_usec(sc);
2984
2985                 if ((sc->flags & FULL_INIT_DONE) == 0)
2986                         return (EAGAIN);       /* need TP to be initialized */
2987                 if (t->sched >= NTX_SCHED || !in_range(t->mode, 0, 1) ||
2988                     !in_range(t->channel, 0, 1) ||
2989                     !in_range(t->kbps, 0, 10000000) ||
2990                     !in_range(t->class_ipg, 0, 10000 * 65535 / ticks_per_usec) ||
2991                     !in_range(t->flow_ipg, 0,
2992                               dack_ticks_to_usec(sc, 0x7ff)))
2993                         return (EINVAL);
2994
2995                 if (t->kbps >= 0) {
2996                         error = t3_config_sched(sc, t->kbps, t->sched);
2997                         if (error < 0)
2998                                 return (-error);
2999                 }
3000                 if (t->class_ipg >= 0)
3001                         t3_set_sched_ipg(sc, t->sched, t->class_ipg);
3002                 if (t->flow_ipg >= 0) {
3003                         t->flow_ipg *= 1000;     /* us -> ns */
3004                         t3_set_pace_tbl(sc, &t->flow_ipg, t->sched, 1);
3005                 }
3006                 if (t->mode >= 0) {
3007                         int bit = 1 << (S_TX_MOD_TIMER_MODE + t->sched);
3008
3009                         t3_set_reg_field(sc, A_TP_TX_MOD_QUEUE_REQ_MAP,
3010                                          bit, t->mode ? bit : 0);
3011                 }
3012                 if (t->channel >= 0)
3013                         t3_set_reg_field(sc, A_TP_TX_MOD_QUEUE_REQ_MAP,
3014                                          1 << t->sched, t->channel << t->sched);
3015                 break;
3016         }
3017         case CHELSIO_GET_EEPROM: {
3018                 int i;
3019                 struct ch_eeprom *e = (struct ch_eeprom *)data;
3020                 uint8_t *buf = malloc(EEPROMSIZE, M_DEVBUF, M_NOWAIT);
3021
3022                 if (buf == NULL) {
3023                         return (ENOMEM);
3024                 }
3025                 e->magic = EEPROM_MAGIC;
3026                 for (i = e->offset & ~3; !error && i < e->offset + e->len; i += 4)
3027                         error = -t3_seeprom_read(sc, i, (uint32_t *)&buf[i]);
3028
3029                 if (!error)
3030                         error = copyout(buf + e->offset, e->data, e->len);
3031
3032                 free(buf, M_DEVBUF);
3033                 break;
3034         }
3035         case CHELSIO_CLEAR_STATS: {
3036                 if (!(sc->flags & FULL_INIT_DONE))
3037                         return EAGAIN;
3038
3039                 PORT_LOCK(pi);
3040                 t3_mac_update_stats(&pi->mac);
3041                 memset(&pi->mac.stats, 0, sizeof(pi->mac.stats));
3042                 PORT_UNLOCK(pi);
3043                 break;
3044         }
3045         case CHELSIO_GET_UP_LA: {
3046                 struct ch_up_la *la = (struct ch_up_la *)data;
3047                 uint8_t *buf = malloc(LA_BUFSIZE, M_DEVBUF, M_NOWAIT);
3048                 if (buf == NULL) {
3049                         return (ENOMEM);
3050                 }
3051                 if (la->bufsize < LA_BUFSIZE)
3052                         error = ENOBUFS;
3053
3054                 if (!error)
3055                         error = -t3_get_up_la(sc, &la->stopped, &la->idx,
3056                                               &la->bufsize, buf);
3057                 if (!error)
3058                         error = copyout(buf, la->data, la->bufsize);
3059
3060                 free(buf, M_DEVBUF);
3061                 break;
3062         }
3063         case CHELSIO_GET_UP_IOQS: {
3064                 struct ch_up_ioqs *ioqs = (struct ch_up_ioqs *)data;
3065                 uint8_t *buf = malloc(IOQS_BUFSIZE, M_DEVBUF, M_NOWAIT);
3066                 uint32_t *v;
3067
3068                 if (buf == NULL) {
3069                         return (ENOMEM);
3070                 }
3071                 if (ioqs->bufsize < IOQS_BUFSIZE)
3072                         error = ENOBUFS;
3073
3074                 if (!error)
3075                         error = -t3_get_up_ioqs(sc, &ioqs->bufsize, buf);
3076
3077                 if (!error) {
3078                         v = (uint32_t *)buf;
3079
3080                         ioqs->bufsize -= 4 * sizeof(uint32_t);
3081                         ioqs->ioq_rx_enable = *v++;
3082                         ioqs->ioq_tx_enable = *v++;
3083                         ioqs->ioq_rx_status = *v++;
3084                         ioqs->ioq_tx_status = *v++;
3085
3086                         error = copyout(v, ioqs->data, ioqs->bufsize);
3087                 }
3088
3089                 free(buf, M_DEVBUF);
3090                 break;
3091         }
3092         case CHELSIO_SET_FILTER: {
3093                 struct ch_filter *f = (struct ch_filter *)data;;
3094                 struct filter_info *p;
3095                 unsigned int nfilters = sc->params.mc5.nfilters;
3096
3097                 if (!is_offload(sc))
3098                         return (EOPNOTSUPP);    /* No TCAM */
3099                 if (!(sc->flags & FULL_INIT_DONE))
3100                         return (EAGAIN);        /* mc5 not setup yet */
3101                 if (nfilters == 0)
3102                         return (EBUSY);         /* TOE will use TCAM */
3103
3104                 /* sanity checks */
3105                 if (f->filter_id >= nfilters ||
3106                     (f->val.dip && f->mask.dip != 0xffffffff) ||
3107                     (f->val.sport && f->mask.sport != 0xffff) ||
3108                     (f->val.dport && f->mask.dport != 0xffff) ||
3109                     (f->val.vlan && f->mask.vlan != 0xfff) ||
3110                     (f->val.vlan_prio &&
3111                         f->mask.vlan_prio != FILTER_NO_VLAN_PRI) ||
3112                     (f->mac_addr_idx != 0xffff && f->mac_addr_idx > 15) ||
3113                     f->qset >= SGE_QSETS ||
3114                     sc->rrss_map[f->qset] >= RSS_TABLE_SIZE)
3115                         return (EINVAL);
3116
3117                 /* Was allocated with M_WAITOK */
3118                 KASSERT(sc->filters, ("filter table NULL\n"));
3119
3120                 p = &sc->filters[f->filter_id];
3121                 if (p->locked)
3122                         return (EPERM);
3123
3124                 bzero(p, sizeof(*p));
3125                 p->sip = f->val.sip;
3126                 p->sip_mask = f->mask.sip;
3127                 p->dip = f->val.dip;
3128                 p->sport = f->val.sport;
3129                 p->dport = f->val.dport;
3130                 p->vlan = f->mask.vlan ? f->val.vlan : 0xfff;
3131                 p->vlan_prio = f->mask.vlan_prio ? (f->val.vlan_prio & 6) :
3132                     FILTER_NO_VLAN_PRI;
3133                 p->mac_hit = f->mac_hit;
3134                 p->mac_vld = f->mac_addr_idx != 0xffff;
3135                 p->mac_idx = f->mac_addr_idx;
3136                 p->pkt_type = f->proto;
3137                 p->report_filter_id = f->want_filter_id;
3138                 p->pass = f->pass;
3139                 p->rss = f->rss;
3140                 p->qset = f->qset;
3141
3142                 error = set_filter(sc, f->filter_id, p);
3143                 if (error == 0)
3144                         p->valid = 1;
3145                 break;
3146         }
3147         case CHELSIO_DEL_FILTER: {
3148                 struct ch_filter *f = (struct ch_filter *)data;
3149                 struct filter_info *p;
3150                 unsigned int nfilters = sc->params.mc5.nfilters;
3151
3152                 if (!is_offload(sc))
3153                         return (EOPNOTSUPP);
3154                 if (!(sc->flags & FULL_INIT_DONE))
3155                         return (EAGAIN);
3156                 if (nfilters == 0 || sc->filters == NULL)
3157                         return (EINVAL);
3158                 if (f->filter_id >= nfilters)
3159                        return (EINVAL);
3160
3161                 p = &sc->filters[f->filter_id];
3162                 if (p->locked)
3163                         return (EPERM);
3164                 if (!p->valid)
3165                         return (EFAULT); /* Read "Bad address" as "Bad index" */
3166
3167                 bzero(p, sizeof(*p));
3168                 p->sip = p->sip_mask = 0xffffffff;
3169                 p->vlan = 0xfff;
3170                 p->vlan_prio = FILTER_NO_VLAN_PRI;
3171                 p->pkt_type = 1;
3172                 error = set_filter(sc, f->filter_id, p);
3173                 break;
3174         }
3175         case CHELSIO_GET_FILTER: {
3176                 struct ch_filter *f = (struct ch_filter *)data;
3177                 struct filter_info *p;
3178                 unsigned int i, nfilters = sc->params.mc5.nfilters;
3179
3180                 if (!is_offload(sc))
3181                         return (EOPNOTSUPP);
3182                 if (!(sc->flags & FULL_INIT_DONE))
3183                         return (EAGAIN);
3184                 if (nfilters == 0 || sc->filters == NULL)
3185                         return (EINVAL);
3186
3187                 i = f->filter_id == 0xffffffff ? 0 : f->filter_id + 1;
3188                 for (; i < nfilters; i++) {
3189                         p = &sc->filters[i];
3190                         if (!p->valid)
3191                                 continue;
3192
3193                         bzero(f, sizeof(*f));
3194
3195                         f->filter_id = i;
3196                         f->val.sip = p->sip;
3197                         f->mask.sip = p->sip_mask;
3198                         f->val.dip = p->dip;
3199                         f->mask.dip = p->dip ? 0xffffffff : 0;
3200                         f->val.sport = p->sport;
3201                         f->mask.sport = p->sport ? 0xffff : 0;
3202                         f->val.dport = p->dport;
3203                         f->mask.dport = p->dport ? 0xffff : 0;
3204                         f->val.vlan = p->vlan == 0xfff ? 0 : p->vlan;
3205                         f->mask.vlan = p->vlan == 0xfff ? 0 : 0xfff;
3206                         f->val.vlan_prio = p->vlan_prio == FILTER_NO_VLAN_PRI ?
3207                             0 : p->vlan_prio;
3208                         f->mask.vlan_prio = p->vlan_prio == FILTER_NO_VLAN_PRI ?
3209                             0 : FILTER_NO_VLAN_PRI;
3210                         f->mac_hit = p->mac_hit;
3211                         f->mac_addr_idx = p->mac_vld ? p->mac_idx : 0xffff;
3212                         f->proto = p->pkt_type;
3213                         f->want_filter_id = p->report_filter_id;
3214                         f->pass = p->pass;
3215                         f->rss = p->rss;
3216                         f->qset = p->qset;
3217
3218                         break;
3219                 }
3220                 
3221                 if (i == nfilters)
3222                         f->filter_id = 0xffffffff;
3223                 break;
3224         }
3225         default:
3226                 return (EOPNOTSUPP);
3227                 break;
3228         }
3229
3230         return (error);
3231 }
3232
3233 static __inline void
3234 reg_block_dump(struct adapter *ap, uint8_t *buf, unsigned int start,
3235     unsigned int end)
3236 {
3237         uint32_t *p = (uint32_t *)(buf + start);
3238
3239         for ( ; start <= end; start += sizeof(uint32_t))
3240                 *p++ = t3_read_reg(ap, start);
3241 }
3242
3243 #define T3_REGMAP_SIZE (3 * 1024)
3244 static int
3245 cxgb_get_regs_len(void)
3246 {
3247         return T3_REGMAP_SIZE;
3248 }
3249
3250 static void
3251 cxgb_get_regs(adapter_t *sc, struct ch_ifconf_regs *regs, uint8_t *buf)
3252 {           
3253         
3254         /*
3255          * Version scheme:
3256          * bits 0..9: chip version
3257          * bits 10..15: chip revision
3258          * bit 31: set for PCIe cards
3259          */
3260         regs->version = 3 | (sc->params.rev << 10) | (is_pcie(sc) << 31);
3261
3262         /*
3263          * We skip the MAC statistics registers because they are clear-on-read.
3264          * Also reading multi-register stats would need to synchronize with the
3265          * periodic mac stats accumulation.  Hard to justify the complexity.
3266          */
3267         memset(buf, 0, cxgb_get_regs_len());
3268         reg_block_dump(sc, buf, 0, A_SG_RSPQ_CREDIT_RETURN);
3269         reg_block_dump(sc, buf, A_SG_HI_DRB_HI_THRSH, A_ULPRX_PBL_ULIMIT);
3270         reg_block_dump(sc, buf, A_ULPTX_CONFIG, A_MPS_INT_CAUSE);
3271         reg_block_dump(sc, buf, A_CPL_SWITCH_CNTRL, A_CPL_MAP_TBL_DATA);
3272         reg_block_dump(sc, buf, A_SMB_GLOBAL_TIME_CFG, A_XGM_SERDES_STAT3);
3273         reg_block_dump(sc, buf, A_XGM_SERDES_STATUS0,
3274                        XGM_REG(A_XGM_SERDES_STAT3, 1));
3275         reg_block_dump(sc, buf, XGM_REG(A_XGM_SERDES_STATUS0, 1),
3276                        XGM_REG(A_XGM_RX_SPI4_SOP_EOP_CNT, 1));
3277 }
3278
3279 static int
3280 alloc_filters(struct adapter *sc)
3281 {
3282         struct filter_info *p;
3283         unsigned int nfilters = sc->params.mc5.nfilters;
3284
3285         if (nfilters == 0)
3286                 return (0);
3287
3288         p = malloc(sizeof(*p) * nfilters, M_DEVBUF, M_WAITOK | M_ZERO);
3289         sc->filters = p;
3290
3291         p = &sc->filters[nfilters - 1];
3292         p->vlan = 0xfff;
3293         p->vlan_prio = FILTER_NO_VLAN_PRI;
3294         p->pass = p->rss = p->valid = p->locked = 1;
3295
3296         return (0);
3297 }
3298
3299 static int
3300 setup_hw_filters(struct adapter *sc)
3301 {
3302         int i, rc;
3303         unsigned int nfilters = sc->params.mc5.nfilters;
3304
3305         if (!sc->filters)
3306                 return (0);
3307
3308         t3_enable_filters(sc);
3309
3310         for (i = rc = 0; i < nfilters && !rc; i++) {
3311                 if (sc->filters[i].locked)
3312                         rc = set_filter(sc, i, &sc->filters[i]);
3313         }
3314
3315         return (rc);
3316 }
3317
3318 static int
3319 set_filter(struct adapter *sc, int id, const struct filter_info *f)
3320 {
3321         int len;
3322         struct mbuf *m;
3323         struct ulp_txpkt *txpkt;
3324         struct work_request_hdr *wr;
3325         struct cpl_pass_open_req *oreq;
3326         struct cpl_set_tcb_field *sreq;
3327
3328         len = sizeof(*wr) + sizeof(*oreq) + 2 * sizeof(*sreq);
3329         KASSERT(len <= MHLEN, ("filter request too big for an mbuf"));
3330
3331         id += t3_mc5_size(&sc->mc5) - sc->params.mc5.nroutes -
3332               sc->params.mc5.nfilters;
3333
3334         m = m_gethdr(M_WAITOK, MT_DATA);
3335         m->m_len = m->m_pkthdr.len = len;
3336         bzero(mtod(m, char *), len);
3337
3338         wr = mtod(m, struct work_request_hdr *);
3339         wr->wrh_hi = htonl(V_WR_OP(FW_WROPCODE_BYPASS) | F_WR_ATOMIC);
3340
3341         oreq = (struct cpl_pass_open_req *)(wr + 1);
3342         txpkt = (struct ulp_txpkt *)oreq;
3343         txpkt->cmd_dest = htonl(V_ULPTX_CMD(ULP_TXPKT));
3344         txpkt->len = htonl(V_ULPTX_NFLITS(sizeof(*oreq) / 8));
3345         OPCODE_TID(oreq) = htonl(MK_OPCODE_TID(CPL_PASS_OPEN_REQ, id));
3346         oreq->local_port = htons(f->dport);
3347         oreq->peer_port = htons(f->sport);
3348         oreq->local_ip = htonl(f->dip);
3349         oreq->peer_ip = htonl(f->sip);
3350         oreq->peer_netmask = htonl(f->sip_mask);
3351         oreq->opt0h = 0;
3352         oreq->opt0l = htonl(F_NO_OFFLOAD);
3353         oreq->opt1 = htonl(V_MAC_MATCH_VALID(f->mac_vld) |
3354                          V_CONN_POLICY(CPL_CONN_POLICY_FILTER) |
3355                          V_VLAN_PRI(f->vlan_prio >> 1) |
3356                          V_VLAN_PRI_VALID(f->vlan_prio != FILTER_NO_VLAN_PRI) |
3357                          V_PKT_TYPE(f->pkt_type) | V_OPT1_VLAN(f->vlan) |
3358                          V_MAC_MATCH(f->mac_idx | (f->mac_hit << 4)));
3359
3360         sreq = (struct cpl_set_tcb_field *)(oreq + 1);
3361         set_tcb_field_ulp(sreq, id, 1, 0x1800808000ULL,
3362                           (f->report_filter_id << 15) | (1 << 23) |
3363                           ((u64)f->pass << 35) | ((u64)!f->rss << 36));
3364         set_tcb_field_ulp(sreq + 1, id, 0, 0xffffffff, (2 << 19) | 1);
3365         t3_mgmt_tx(sc, m);
3366
3367         if (f->pass && !f->rss) {
3368                 len = sizeof(*sreq);
3369                 m = m_gethdr(M_WAITOK, MT_DATA);
3370                 m->m_len = m->m_pkthdr.len = len;
3371                 bzero(mtod(m, char *), len);
3372                 sreq = mtod(m, struct cpl_set_tcb_field *);
3373                 sreq->wr.wrh_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD));
3374                 mk_set_tcb_field(sreq, id, 25, 0x3f80000,
3375                                  (u64)sc->rrss_map[f->qset] << 19);
3376                 t3_mgmt_tx(sc, m);
3377         }
3378         return 0;
3379 }
3380
3381 static inline void
3382 mk_set_tcb_field(struct cpl_set_tcb_field *req, unsigned int tid,
3383     unsigned int word, u64 mask, u64 val)
3384 {
3385         OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_SET_TCB_FIELD, tid));
3386         req->reply = V_NO_REPLY(1);
3387         req->cpu_idx = 0;
3388         req->word = htons(word);
3389         req->mask = htobe64(mask);
3390         req->val = htobe64(val);
3391 }
3392
3393 static inline void
3394 set_tcb_field_ulp(struct cpl_set_tcb_field *req, unsigned int tid,
3395     unsigned int word, u64 mask, u64 val)
3396 {
3397         struct ulp_txpkt *txpkt = (struct ulp_txpkt *)req;
3398
3399         txpkt->cmd_dest = htonl(V_ULPTX_CMD(ULP_TXPKT));
3400         txpkt->len = htonl(V_ULPTX_NFLITS(sizeof(*req) / 8));
3401         mk_set_tcb_field(req, tid, word, mask, val);
3402 }