]> CyberLeo.Net >> Repos - FreeBSD/releng/8.1.git/blob - sys/dev/cxgb/cxgb_main.c
Copy stable/8 to releng/8.1 in preparation for 8.1-RC1.
[FreeBSD/releng/8.1.git] / sys / dev / cxgb / cxgb_main.c
1 /**************************************************************************
2
3 Copyright (c) 2007-2009, Chelsio Inc.
4 All rights reserved.
5
6 Redistribution and use in source and binary forms, with or without
7 modification, are permitted provided that the following conditions are met:
8
9  1. Redistributions of source code must retain the above copyright notice,
10     this list of conditions and the following disclaimer.
11
12  2. Neither the name of the Chelsio Corporation nor the names of its
13     contributors may be used to endorse or promote products derived from
14     this software without specific prior written permission.
15
16 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
17 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
20 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
21 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
22 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
23 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
24 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
25 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
26 POSSIBILITY OF SUCH DAMAGE.
27
28 ***************************************************************************/
29
30 #include <sys/cdefs.h>
31 __FBSDID("$FreeBSD$");
32
33 #include <sys/param.h>
34 #include <sys/systm.h>
35 #include <sys/kernel.h>
36 #include <sys/bus.h>
37 #include <sys/module.h>
38 #include <sys/pciio.h>
39 #include <sys/conf.h>
40 #include <machine/bus.h>
41 #include <machine/resource.h>
42 #include <sys/bus_dma.h>
43 #include <sys/ktr.h>
44 #include <sys/rman.h>
45 #include <sys/ioccom.h>
46 #include <sys/mbuf.h>
47 #include <sys/linker.h>
48 #include <sys/firmware.h>
49 #include <sys/socket.h>
50 #include <sys/sockio.h>
51 #include <sys/smp.h>
52 #include <sys/sysctl.h>
53 #include <sys/syslog.h>
54 #include <sys/queue.h>
55 #include <sys/taskqueue.h>
56 #include <sys/proc.h>
57
58 #include <net/bpf.h>
59 #include <net/ethernet.h>
60 #include <net/if.h>
61 #include <net/if_arp.h>
62 #include <net/if_dl.h>
63 #include <net/if_media.h>
64 #include <net/if_types.h>
65 #include <net/if_vlan_var.h>
66
67 #include <netinet/in_systm.h>
68 #include <netinet/in.h>
69 #include <netinet/if_ether.h>
70 #include <netinet/ip.h>
71 #include <netinet/ip.h>
72 #include <netinet/tcp.h>
73 #include <netinet/udp.h>
74
75 #include <dev/pci/pcireg.h>
76 #include <dev/pci/pcivar.h>
77 #include <dev/pci/pci_private.h>
78
79 #include <cxgb_include.h>
80
81 #ifdef PRIV_SUPPORTED
82 #include <sys/priv.h>
83 #endif
84
85 static int cxgb_setup_interrupts(adapter_t *);
86 static void cxgb_teardown_interrupts(adapter_t *);
87 static void cxgb_init(void *);
88 static int cxgb_init_locked(struct port_info *);
89 static int cxgb_uninit_locked(struct port_info *);
90 static int cxgb_uninit_synchronized(struct port_info *);
91 static int cxgb_ioctl(struct ifnet *, unsigned long, caddr_t);
92 static int cxgb_media_change(struct ifnet *);
93 static int cxgb_ifm_type(int);
94 static void cxgb_build_medialist(struct port_info *);
95 static void cxgb_media_status(struct ifnet *, struct ifmediareq *);
96 static int setup_sge_qsets(adapter_t *);
97 static void cxgb_async_intr(void *);
98 static void cxgb_ext_intr_handler(void *, int);
99 static void cxgb_tick_handler(void *, int);
100 static void cxgb_tick(void *);
101 static void setup_rss(adapter_t *sc);
102 static int alloc_filters(struct adapter *);
103 static int setup_hw_filters(struct adapter *);
104 static int set_filter(struct adapter *, int, const struct filter_info *);
105 static inline void mk_set_tcb_field(struct cpl_set_tcb_field *, unsigned int,
106     unsigned int, u64, u64);
107 static inline void set_tcb_field_ulp(struct cpl_set_tcb_field *, unsigned int,
108     unsigned int, u64, u64);
109
110 /* Attachment glue for the PCI controller end of the device.  Each port of
111  * the device is attached separately, as defined later.
112  */
113 static int cxgb_controller_probe(device_t);
114 static int cxgb_controller_attach(device_t);
115 static int cxgb_controller_detach(device_t);
116 static void cxgb_free(struct adapter *);
117 static __inline void reg_block_dump(struct adapter *ap, uint8_t *buf, unsigned int start,
118     unsigned int end);
119 static void cxgb_get_regs(adapter_t *sc, struct ch_ifconf_regs *regs, uint8_t *buf);
120 static int cxgb_get_regs_len(void);
121 static int offload_open(struct port_info *pi);
122 static void touch_bars(device_t dev);
123 static int offload_close(struct t3cdev *tdev);
124 static void cxgb_update_mac_settings(struct port_info *p);
125
126 static device_method_t cxgb_controller_methods[] = {
127         DEVMETHOD(device_probe,         cxgb_controller_probe),
128         DEVMETHOD(device_attach,        cxgb_controller_attach),
129         DEVMETHOD(device_detach,        cxgb_controller_detach),
130
131         /* bus interface */
132         DEVMETHOD(bus_print_child,      bus_generic_print_child),
133         DEVMETHOD(bus_driver_added,     bus_generic_driver_added),
134
135         { 0, 0 }
136 };
137
138 static driver_t cxgb_controller_driver = {
139         "cxgbc",
140         cxgb_controller_methods,
141         sizeof(struct adapter)
142 };
143
144 static devclass_t       cxgb_controller_devclass;
145 DRIVER_MODULE(cxgbc, pci, cxgb_controller_driver, cxgb_controller_devclass, 0, 0);
146
147 /*
148  * Attachment glue for the ports.  Attachment is done directly to the
149  * controller device.
150  */
151 static int cxgb_port_probe(device_t);
152 static int cxgb_port_attach(device_t);
153 static int cxgb_port_detach(device_t);
154
155 static device_method_t cxgb_port_methods[] = {
156         DEVMETHOD(device_probe,         cxgb_port_probe),
157         DEVMETHOD(device_attach,        cxgb_port_attach),
158         DEVMETHOD(device_detach,        cxgb_port_detach),
159         { 0, 0 }
160 };
161
162 static driver_t cxgb_port_driver = {
163         "cxgb",
164         cxgb_port_methods,
165         0
166 };
167
168 static d_ioctl_t cxgb_extension_ioctl;
169 static d_open_t cxgb_extension_open;
170 static d_close_t cxgb_extension_close;
171
172 static struct cdevsw cxgb_cdevsw = {
173        .d_version =    D_VERSION,
174        .d_flags =      0,
175        .d_open =       cxgb_extension_open,
176        .d_close =      cxgb_extension_close,
177        .d_ioctl =      cxgb_extension_ioctl,
178        .d_name =       "cxgb",
179 };
180
181 static devclass_t       cxgb_port_devclass;
182 DRIVER_MODULE(cxgb, cxgbc, cxgb_port_driver, cxgb_port_devclass, 0, 0);
183
184 /*
185  * The driver uses the best interrupt scheme available on a platform in the
186  * order MSI-X, MSI, legacy pin interrupts.  This parameter determines which
187  * of these schemes the driver may consider as follows:
188  *
189  * msi = 2: choose from among all three options
190  * msi = 1 : only consider MSI and pin interrupts
191  * msi = 0: force pin interrupts
192  */
193 static int msi_allowed = 2;
194
195 TUNABLE_INT("hw.cxgb.msi_allowed", &msi_allowed);
196 SYSCTL_NODE(_hw, OID_AUTO, cxgb, CTLFLAG_RD, 0, "CXGB driver parameters");
197 SYSCTL_UINT(_hw_cxgb, OID_AUTO, msi_allowed, CTLFLAG_RDTUN, &msi_allowed, 0,
198     "MSI-X, MSI, INTx selector");
199
200 /*
201  * The driver enables offload as a default.
202  * To disable it, use ofld_disable = 1.
203  */
204 static int ofld_disable = 0;
205 TUNABLE_INT("hw.cxgb.ofld_disable", &ofld_disable);
206 SYSCTL_UINT(_hw_cxgb, OID_AUTO, ofld_disable, CTLFLAG_RDTUN, &ofld_disable, 0,
207     "disable ULP offload");
208
209 /*
210  * The driver uses an auto-queue algorithm by default.
211  * To disable it and force a single queue-set per port, use multiq = 0
212  */
213 static int multiq = 1;
214 TUNABLE_INT("hw.cxgb.multiq", &multiq);
215 SYSCTL_UINT(_hw_cxgb, OID_AUTO, multiq, CTLFLAG_RDTUN, &multiq, 0,
216     "use min(ncpus/ports, 8) queue-sets per port");
217
218 /*
219  * By default the driver will not update the firmware unless
220  * it was compiled against a newer version
221  * 
222  */
223 static int force_fw_update = 0;
224 TUNABLE_INT("hw.cxgb.force_fw_update", &force_fw_update);
225 SYSCTL_UINT(_hw_cxgb, OID_AUTO, force_fw_update, CTLFLAG_RDTUN, &force_fw_update, 0,
226     "update firmware even if up to date");
227
228 int cxgb_use_16k_clusters = -1;
229 TUNABLE_INT("hw.cxgb.use_16k_clusters", &cxgb_use_16k_clusters);
230 SYSCTL_INT(_hw_cxgb, OID_AUTO, use_16k_clusters, CTLFLAG_RDTUN,
231     &cxgb_use_16k_clusters, 0, "use 16kB clusters for the jumbo queue ");
232
233 /*
234  * Tune the size of the output queue.
235  */
236 int cxgb_snd_queue_len = IFQ_MAXLEN;
237 TUNABLE_INT("hw.cxgb.snd_queue_len", &cxgb_snd_queue_len);
238 SYSCTL_UINT(_hw_cxgb, OID_AUTO, snd_queue_len, CTLFLAG_RDTUN,
239     &cxgb_snd_queue_len, 0, "send queue size ");
240
241
242 enum {
243         MAX_TXQ_ENTRIES      = 16384,
244         MAX_CTRL_TXQ_ENTRIES = 1024,
245         MAX_RSPQ_ENTRIES     = 16384,
246         MAX_RX_BUFFERS       = 16384,
247         MAX_RX_JUMBO_BUFFERS = 16384,
248         MIN_TXQ_ENTRIES      = 4,
249         MIN_CTRL_TXQ_ENTRIES = 4,
250         MIN_RSPQ_ENTRIES     = 32,
251         MIN_FL_ENTRIES       = 32,
252         MIN_FL_JUMBO_ENTRIES = 32
253 };
254
255 struct filter_info {
256         u32 sip;
257         u32 sip_mask;
258         u32 dip;
259         u16 sport;
260         u16 dport;
261         u32 vlan:12;
262         u32 vlan_prio:3;
263         u32 mac_hit:1;
264         u32 mac_idx:4;
265         u32 mac_vld:1;
266         u32 pkt_type:2;
267         u32 report_filter_id:1;
268         u32 pass:1;
269         u32 rss:1;
270         u32 qset:3;
271         u32 locked:1;
272         u32 valid:1;
273 };
274
275 enum { FILTER_NO_VLAN_PRI = 7 };
276
277 #define EEPROM_MAGIC 0x38E2F10C
278
279 #define PORT_MASK ((1 << MAX_NPORTS) - 1)
280
281 /* Table for probing the cards.  The desc field isn't actually used */
282 struct cxgb_ident {
283         uint16_t        vendor;
284         uint16_t        device;
285         int             index;
286         char            *desc;
287 } cxgb_identifiers[] = {
288         {PCI_VENDOR_ID_CHELSIO, 0x0020, 0, "PE9000"},
289         {PCI_VENDOR_ID_CHELSIO, 0x0021, 1, "T302E"},
290         {PCI_VENDOR_ID_CHELSIO, 0x0022, 2, "T310E"},
291         {PCI_VENDOR_ID_CHELSIO, 0x0023, 3, "T320X"},
292         {PCI_VENDOR_ID_CHELSIO, 0x0024, 1, "T302X"},
293         {PCI_VENDOR_ID_CHELSIO, 0x0025, 3, "T320E"},
294         {PCI_VENDOR_ID_CHELSIO, 0x0026, 2, "T310X"},
295         {PCI_VENDOR_ID_CHELSIO, 0x0030, 2, "T3B10"},
296         {PCI_VENDOR_ID_CHELSIO, 0x0031, 3, "T3B20"},
297         {PCI_VENDOR_ID_CHELSIO, 0x0032, 1, "T3B02"},
298         {PCI_VENDOR_ID_CHELSIO, 0x0033, 4, "T3B04"},
299         {PCI_VENDOR_ID_CHELSIO, 0x0035, 6, "T3C10"},
300         {PCI_VENDOR_ID_CHELSIO, 0x0036, 3, "S320E-CR"},
301         {PCI_VENDOR_ID_CHELSIO, 0x0037, 7, "N320E-G2"},
302         {0, 0, 0, NULL}
303 };
304
305 static int set_eeprom(struct port_info *pi, const uint8_t *data, int len, int offset);
306
307
308 static __inline char
309 t3rev2char(struct adapter *adapter)
310 {
311         char rev = 'z';
312
313         switch(adapter->params.rev) {
314         case T3_REV_A:
315                 rev = 'a';
316                 break;
317         case T3_REV_B:
318         case T3_REV_B2:
319                 rev = 'b';
320                 break;
321         case T3_REV_C:
322                 rev = 'c';
323                 break;
324         }
325         return rev;
326 }
327
328 static struct cxgb_ident *
329 cxgb_get_ident(device_t dev)
330 {
331         struct cxgb_ident *id;
332
333         for (id = cxgb_identifiers; id->desc != NULL; id++) {
334                 if ((id->vendor == pci_get_vendor(dev)) &&
335                     (id->device == pci_get_device(dev))) {
336                         return (id);
337                 }
338         }
339         return (NULL);
340 }
341
342 static const struct adapter_info *
343 cxgb_get_adapter_info(device_t dev)
344 {
345         struct cxgb_ident *id;
346         const struct adapter_info *ai;
347
348         id = cxgb_get_ident(dev);
349         if (id == NULL)
350                 return (NULL);
351
352         ai = t3_get_adapter_info(id->index);
353
354         return (ai);
355 }
356
357 static int
358 cxgb_controller_probe(device_t dev)
359 {
360         const struct adapter_info *ai;
361         char *ports, buf[80];
362         int nports;
363
364         ai = cxgb_get_adapter_info(dev);
365         if (ai == NULL)
366                 return (ENXIO);
367
368         nports = ai->nports0 + ai->nports1;
369         if (nports == 1)
370                 ports = "port";
371         else
372                 ports = "ports";
373
374         snprintf(buf, sizeof(buf), "%s, %d %s", ai->desc, nports, ports);
375         device_set_desc_copy(dev, buf);
376         return (BUS_PROBE_DEFAULT);
377 }
378
379 #define FW_FNAME "cxgb_t3fw"
380 #define TPEEPROM_NAME "cxgb_t3%c_tp_eeprom"
381 #define TPSRAM_NAME "cxgb_t3%c_protocol_sram"
382
383 static int
384 upgrade_fw(adapter_t *sc)
385 {
386         const struct firmware *fw;
387         int status;
388         u32 vers;
389         
390         if ((fw = firmware_get(FW_FNAME)) == NULL)  {
391                 device_printf(sc->dev, "Could not find firmware image %s\n", FW_FNAME);
392                 return (ENOENT);
393         } else
394                 device_printf(sc->dev, "installing firmware on card\n");
395         status = t3_load_fw(sc, (const uint8_t *)fw->data, fw->datasize);
396
397         if (status != 0) {
398                 device_printf(sc->dev, "failed to install firmware: %d\n",
399                     status);
400         } else {
401                 t3_get_fw_version(sc, &vers);
402                 snprintf(&sc->fw_version[0], sizeof(sc->fw_version), "%d.%d.%d",
403                     G_FW_VERSION_MAJOR(vers), G_FW_VERSION_MINOR(vers),
404                     G_FW_VERSION_MICRO(vers));
405         }
406
407         firmware_put(fw, FIRMWARE_UNLOAD);
408
409         return (status);        
410 }
411
412 /*
413  * The cxgb_controller_attach function is responsible for the initial
414  * bringup of the device.  Its responsibilities include:
415  *
416  *  1. Determine if the device supports MSI or MSI-X.
417  *  2. Allocate bus resources so that we can access the Base Address Register
418  *  3. Create and initialize mutexes for the controller and its control
419  *     logic such as SGE and MDIO.
420  *  4. Call hardware specific setup routine for the adapter as a whole.
421  *  5. Allocate the BAR for doing MSI-X.
422  *  6. Setup the line interrupt iff MSI-X is not supported.
423  *  7. Create the driver's taskq.
424  *  8. Start one task queue service thread.
425  *  9. Check if the firmware and SRAM are up-to-date.  They will be
426  *     auto-updated later (before FULL_INIT_DONE), if required.
427  * 10. Create a child device for each MAC (port)
428  * 11. Initialize T3 private state.
429  * 12. Trigger the LED
430  * 13. Setup offload iff supported.
431  * 14. Reset/restart the tick callout.
432  * 15. Attach sysctls
433  *
434  * NOTE: Any modification or deviation from this list MUST be reflected in
435  * the above comment.  Failure to do so will result in problems on various
436  * error conditions including link flapping.
437  */
438 static int
439 cxgb_controller_attach(device_t dev)
440 {
441         device_t child;
442         const struct adapter_info *ai;
443         struct adapter *sc;
444         int i, error = 0;
445         uint32_t vers;
446         int port_qsets = 1;
447         int msi_needed, reg;
448         char buf[80];
449
450         sc = device_get_softc(dev);
451         sc->dev = dev;
452         sc->msi_count = 0;
453         ai = cxgb_get_adapter_info(dev);
454
455         /* find the PCIe link width and set max read request to 4KB*/
456         if (pci_find_extcap(dev, PCIY_EXPRESS, &reg) == 0) {
457                 uint16_t lnk, pectl;
458                 lnk = pci_read_config(dev, reg + 0x12, 2);
459                 sc->link_width = (lnk >> 4) & 0x3f;
460                 
461                 pectl = pci_read_config(dev, reg + 0x8, 2);
462                 pectl = (pectl & ~0x7000) | (5 << 12);
463                 pci_write_config(dev, reg + 0x8, pectl, 2);
464         }
465
466         if (sc->link_width != 0 && sc->link_width <= 4 &&
467             (ai->nports0 + ai->nports1) <= 2) {
468                 device_printf(sc->dev,
469                     "PCIe x%d Link, expect reduced performance\n",
470                     sc->link_width);
471         }
472
473         touch_bars(dev);
474         pci_enable_busmaster(dev);
475         /*
476          * Allocate the registers and make them available to the driver.
477          * The registers that we care about for NIC mode are in BAR 0
478          */
479         sc->regs_rid = PCIR_BAR(0);
480         if ((sc->regs_res = bus_alloc_resource_any(dev, SYS_RES_MEMORY,
481             &sc->regs_rid, RF_ACTIVE)) == NULL) {
482                 device_printf(dev, "Cannot allocate BAR region 0\n");
483                 return (ENXIO);
484         }
485         sc->udbs_rid = PCIR_BAR(2);
486         sc->udbs_res = NULL;
487         if (is_offload(sc) &&
488             ((sc->udbs_res = bus_alloc_resource_any(dev, SYS_RES_MEMORY,
489                    &sc->udbs_rid, RF_ACTIVE)) == NULL)) {
490                 device_printf(dev, "Cannot allocate BAR region 1\n");
491                 error = ENXIO;
492                 goto out;
493         }
494
495         snprintf(sc->lockbuf, ADAPTER_LOCK_NAME_LEN, "cxgb controller lock %d",
496             device_get_unit(dev));
497         ADAPTER_LOCK_INIT(sc, sc->lockbuf);
498
499         snprintf(sc->reglockbuf, ADAPTER_LOCK_NAME_LEN, "SGE reg lock %d",
500             device_get_unit(dev));
501         snprintf(sc->mdiolockbuf, ADAPTER_LOCK_NAME_LEN, "cxgb mdio lock %d",
502             device_get_unit(dev));
503         snprintf(sc->elmerlockbuf, ADAPTER_LOCK_NAME_LEN, "cxgb elmer lock %d",
504             device_get_unit(dev));
505         
506         MTX_INIT(&sc->sge.reg_lock, sc->reglockbuf, NULL, MTX_SPIN);
507         MTX_INIT(&sc->mdio_lock, sc->mdiolockbuf, NULL, MTX_DEF);
508         MTX_INIT(&sc->elmer_lock, sc->elmerlockbuf, NULL, MTX_DEF);
509         
510         sc->bt = rman_get_bustag(sc->regs_res);
511         sc->bh = rman_get_bushandle(sc->regs_res);
512         sc->mmio_len = rman_get_size(sc->regs_res);
513
514         for (i = 0; i < MAX_NPORTS; i++)
515                 sc->port[i].adapter = sc;
516
517         if (t3_prep_adapter(sc, ai, 1) < 0) {
518                 printf("prep adapter failed\n");
519                 error = ENODEV;
520                 goto out;
521         }
522         /* Allocate the BAR for doing MSI-X.  If it succeeds, try to allocate
523          * enough messages for the queue sets.  If that fails, try falling
524          * back to MSI.  If that fails, then try falling back to the legacy
525          * interrupt pin model.
526          */
527         sc->msix_regs_rid = 0x20;
528         if ((msi_allowed >= 2) &&
529             (sc->msix_regs_res = bus_alloc_resource_any(dev, SYS_RES_MEMORY,
530             &sc->msix_regs_rid, RF_ACTIVE)) != NULL) {
531
532                 if (multiq)
533                         port_qsets = min(SGE_QSETS/sc->params.nports, mp_ncpus);
534                 msi_needed = sc->msi_count = sc->params.nports * port_qsets + 1;
535
536                 if (pci_msix_count(dev) == 0 ||
537                     (error = pci_alloc_msix(dev, &sc->msi_count)) != 0 ||
538                     sc->msi_count != msi_needed) {
539                         device_printf(dev, "alloc msix failed - "
540                                       "msi_count=%d, msi_needed=%d, err=%d; "
541                                       "will try MSI\n", sc->msi_count,
542                                       msi_needed, error);
543                         sc->msi_count = 0;
544                         port_qsets = 1;
545                         pci_release_msi(dev);
546                         bus_release_resource(dev, SYS_RES_MEMORY,
547                             sc->msix_regs_rid, sc->msix_regs_res);
548                         sc->msix_regs_res = NULL;
549                 } else {
550                         sc->flags |= USING_MSIX;
551                         sc->cxgb_intr = cxgb_async_intr;
552                         device_printf(dev,
553                                       "using MSI-X interrupts (%u vectors)\n",
554                                       sc->msi_count);
555                 }
556         }
557
558         if ((msi_allowed >= 1) && (sc->msi_count == 0)) {
559                 sc->msi_count = 1;
560                 if ((error = pci_alloc_msi(dev, &sc->msi_count)) != 0) {
561                         device_printf(dev, "alloc msi failed - "
562                                       "err=%d; will try INTx\n", error);
563                         sc->msi_count = 0;
564                         port_qsets = 1;
565                         pci_release_msi(dev);
566                 } else {
567                         sc->flags |= USING_MSI;
568                         sc->cxgb_intr = t3_intr_msi;
569                         device_printf(dev, "using MSI interrupts\n");
570                 }
571         }
572         if (sc->msi_count == 0) {
573                 device_printf(dev, "using line interrupts\n");
574                 sc->cxgb_intr = t3b_intr;
575         }
576
577         /* Create a private taskqueue thread for handling driver events */
578         sc->tq = taskqueue_create("cxgb_taskq", M_NOWAIT,
579             taskqueue_thread_enqueue, &sc->tq);
580         if (sc->tq == NULL) {
581                 device_printf(dev, "failed to allocate controller task queue\n");
582                 goto out;
583         }
584
585         taskqueue_start_threads(&sc->tq, 1, PI_NET, "%s taskq",
586             device_get_nameunit(dev));
587         TASK_INIT(&sc->ext_intr_task, 0, cxgb_ext_intr_handler, sc);
588         TASK_INIT(&sc->tick_task, 0, cxgb_tick_handler, sc);
589
590         
591         /* Create a periodic callout for checking adapter status */
592         callout_init(&sc->cxgb_tick_ch, TRUE);
593         
594         if (t3_check_fw_version(sc) < 0 || force_fw_update) {
595                 /*
596                  * Warn user that a firmware update will be attempted in init.
597                  */
598                 device_printf(dev, "firmware needs to be updated to version %d.%d.%d\n",
599                     FW_VERSION_MAJOR, FW_VERSION_MINOR, FW_VERSION_MICRO);
600                 sc->flags &= ~FW_UPTODATE;
601         } else {
602                 sc->flags |= FW_UPTODATE;
603         }
604
605         if (t3_check_tpsram_version(sc) < 0) {
606                 /*
607                  * Warn user that a firmware update will be attempted in init.
608                  */
609                 device_printf(dev, "SRAM needs to be updated to version %c-%d.%d.%d\n",
610                     t3rev2char(sc), TP_VERSION_MAJOR, TP_VERSION_MINOR, TP_VERSION_MICRO);
611                 sc->flags &= ~TPS_UPTODATE;
612         } else {
613                 sc->flags |= TPS_UPTODATE;
614         }
615         
616         /*
617          * Create a child device for each MAC.  The ethernet attachment
618          * will be done in these children.
619          */     
620         for (i = 0; i < (sc)->params.nports; i++) {
621                 struct port_info *pi;
622                 
623                 if ((child = device_add_child(dev, "cxgb", -1)) == NULL) {
624                         device_printf(dev, "failed to add child port\n");
625                         error = EINVAL;
626                         goto out;
627                 }
628                 pi = &sc->port[i];
629                 pi->adapter = sc;
630                 pi->nqsets = port_qsets;
631                 pi->first_qset = i*port_qsets;
632                 pi->port_id = i;
633                 pi->tx_chan = i >= ai->nports0;
634                 pi->txpkt_intf = pi->tx_chan ? 2 * (i - ai->nports0) + 1 : 2 * i;
635                 sc->rxpkt_map[pi->txpkt_intf] = i;
636                 sc->port[i].tx_chan = i >= ai->nports0;
637                 sc->portdev[i] = child;
638                 device_set_softc(child, pi);
639         }
640         if ((error = bus_generic_attach(dev)) != 0)
641                 goto out;
642
643         /* initialize sge private state */
644         t3_sge_init_adapter(sc);
645
646         t3_led_ready(sc);
647         
648         cxgb_offload_init();
649         if (is_offload(sc)) {
650                 setbit(&sc->registered_device_map, OFFLOAD_DEVMAP_BIT);
651                 cxgb_adapter_ofld(sc);
652         }
653         error = t3_get_fw_version(sc, &vers);
654         if (error)
655                 goto out;
656
657         snprintf(&sc->fw_version[0], sizeof(sc->fw_version), "%d.%d.%d",
658             G_FW_VERSION_MAJOR(vers), G_FW_VERSION_MINOR(vers),
659             G_FW_VERSION_MICRO(vers));
660
661         snprintf(buf, sizeof(buf), "%s %sNIC\t E/C: %s S/N: %s",
662                  ai->desc, is_offload(sc) ? "R" : "",
663                  sc->params.vpd.ec, sc->params.vpd.sn);
664         device_set_desc_copy(dev, buf);
665
666         snprintf(&sc->port_types[0], sizeof(sc->port_types), "%x%x%x%x",
667                  sc->params.vpd.port_type[0], sc->params.vpd.port_type[1],
668                  sc->params.vpd.port_type[2], sc->params.vpd.port_type[3]);
669
670         device_printf(sc->dev, "Firmware Version %s\n", &sc->fw_version[0]);
671         callout_reset(&sc->cxgb_tick_ch, CXGB_TICKS(sc), cxgb_tick, sc);
672         t3_add_attach_sysctls(sc);
673 out:
674         if (error)
675                 cxgb_free(sc);
676
677         return (error);
678 }
679
680 /*
681  * The cxgb_controller_detach routine is called with the device is
682  * unloaded from the system.
683  */
684
685 static int
686 cxgb_controller_detach(device_t dev)
687 {
688         struct adapter *sc;
689
690         sc = device_get_softc(dev);
691
692         cxgb_free(sc);
693
694         return (0);
695 }
696
697 /*
698  * The cxgb_free() is called by the cxgb_controller_detach() routine
699  * to tear down the structures that were built up in
700  * cxgb_controller_attach(), and should be the final piece of work
701  * done when fully unloading the driver.
702  * 
703  *
704  *  1. Shutting down the threads started by the cxgb_controller_attach()
705  *     routine.
706  *  2. Stopping the lower level device and all callouts (cxgb_down_locked()).
707  *  3. Detaching all of the port devices created during the
708  *     cxgb_controller_attach() routine.
709  *  4. Removing the device children created via cxgb_controller_attach().
710  *  5. Releasing PCI resources associated with the device.
711  *  6. Turning off the offload support, iff it was turned on.
712  *  7. Destroying the mutexes created in cxgb_controller_attach().
713  *
714  */
715 static void
716 cxgb_free(struct adapter *sc)
717 {
718         int i;
719
720         ADAPTER_LOCK(sc);
721         sc->flags |= CXGB_SHUTDOWN;
722         ADAPTER_UNLOCK(sc);
723
724         /*
725          * Make sure all child devices are gone.
726          */
727         bus_generic_detach(sc->dev);
728         for (i = 0; i < (sc)->params.nports; i++) {
729                 if (sc->portdev[i] &&
730                     device_delete_child(sc->dev, sc->portdev[i]) != 0)
731                         device_printf(sc->dev, "failed to delete child port\n");
732         }
733
734         /*
735          * At this point, it is as if cxgb_port_detach has run on all ports, and
736          * cxgb_down has run on the adapter.  All interrupts have been silenced,
737          * all open devices have been closed.
738          */
739         KASSERT(sc->open_device_map == 0, ("%s: device(s) still open (%x)",
740                                            __func__, sc->open_device_map));
741         for (i = 0; i < sc->params.nports; i++) {
742                 KASSERT(sc->port[i].ifp == NULL, ("%s: port %i undead!",
743                                                   __func__, i));
744         }
745
746         /*
747          * Finish off the adapter's callouts.
748          */
749         callout_drain(&sc->cxgb_tick_ch);
750         callout_drain(&sc->sge_timer_ch);
751
752         /*
753          * Release resources grabbed under FULL_INIT_DONE by cxgb_up.  The
754          * sysctls are cleaned up by the kernel linker.
755          */
756         if (sc->flags & FULL_INIT_DONE) {
757                 t3_free_sge_resources(sc);
758                 sc->flags &= ~FULL_INIT_DONE;
759         }
760
761         /*
762          * Release all interrupt resources.
763          */
764         cxgb_teardown_interrupts(sc);
765         if (sc->flags & (USING_MSI | USING_MSIX)) {
766                 device_printf(sc->dev, "releasing msi message(s)\n");
767                 pci_release_msi(sc->dev);
768         } else {
769                 device_printf(sc->dev, "no msi message to release\n");
770         }
771
772         if (sc->msix_regs_res != NULL) {
773                 bus_release_resource(sc->dev, SYS_RES_MEMORY, sc->msix_regs_rid,
774                     sc->msix_regs_res);
775         }
776
777         /*
778          * Free the adapter's taskqueue.
779          */
780         if (sc->tq != NULL) {
781                 taskqueue_free(sc->tq);
782                 sc->tq = NULL;
783         }
784         
785         if (is_offload(sc)) {
786                 clrbit(&sc->registered_device_map, OFFLOAD_DEVMAP_BIT);
787                 cxgb_adapter_unofld(sc);
788         }
789
790 #ifdef notyet
791         if (sc->flags & CXGB_OFLD_INIT)
792                 cxgb_offload_deactivate(sc);
793 #endif
794         free(sc->filters, M_DEVBUF);
795         t3_sge_free(sc);
796
797         cxgb_offload_exit();
798
799         if (sc->udbs_res != NULL)
800                 bus_release_resource(sc->dev, SYS_RES_MEMORY, sc->udbs_rid,
801                     sc->udbs_res);
802
803         if (sc->regs_res != NULL)
804                 bus_release_resource(sc->dev, SYS_RES_MEMORY, sc->regs_rid,
805                     sc->regs_res);
806
807         MTX_DESTROY(&sc->mdio_lock);
808         MTX_DESTROY(&sc->sge.reg_lock);
809         MTX_DESTROY(&sc->elmer_lock);
810         ADAPTER_LOCK_DEINIT(sc);
811 }
812
813 /**
814  *      setup_sge_qsets - configure SGE Tx/Rx/response queues
815  *      @sc: the controller softc
816  *
817  *      Determines how many sets of SGE queues to use and initializes them.
818  *      We support multiple queue sets per port if we have MSI-X, otherwise
819  *      just one queue set per port.
820  */
821 static int
822 setup_sge_qsets(adapter_t *sc)
823 {
824         int i, j, err, irq_idx = 0, qset_idx = 0;
825         u_int ntxq = SGE_TXQ_PER_SET;
826
827         if ((err = t3_sge_alloc(sc)) != 0) {
828                 device_printf(sc->dev, "t3_sge_alloc returned %d\n", err);
829                 return (err);
830         }
831
832         if (sc->params.rev > 0 && !(sc->flags & USING_MSI))
833                 irq_idx = -1;
834
835         for (i = 0; i < (sc)->params.nports; i++) {
836                 struct port_info *pi = &sc->port[i];
837
838                 for (j = 0; j < pi->nqsets; j++, qset_idx++) {
839                         err = t3_sge_alloc_qset(sc, qset_idx, (sc)->params.nports,
840                             (sc->flags & USING_MSIX) ? qset_idx + 1 : irq_idx,
841                             &sc->params.sge.qset[qset_idx], ntxq, pi);
842                         if (err) {
843                                 t3_free_sge_resources(sc);
844                                 device_printf(sc->dev, "t3_sge_alloc_qset failed with %d\n",
845                                     err);
846                                 return (err);
847                         }
848                 }
849         }
850
851         return (0);
852 }
853
854 static void
855 cxgb_teardown_interrupts(adapter_t *sc)
856 {
857         int i;
858
859         for (i = 0; i < SGE_QSETS; i++) {
860                 if (sc->msix_intr_tag[i] == NULL) {
861
862                         /* Should have been setup fully or not at all */
863                         KASSERT(sc->msix_irq_res[i] == NULL &&
864                                 sc->msix_irq_rid[i] == 0,
865                                 ("%s: half-done interrupt (%d).", __func__, i));
866
867                         continue;
868                 }
869
870                 bus_teardown_intr(sc->dev, sc->msix_irq_res[i],
871                                   sc->msix_intr_tag[i]);
872                 bus_release_resource(sc->dev, SYS_RES_IRQ, sc->msix_irq_rid[i],
873                                      sc->msix_irq_res[i]);
874
875                 sc->msix_irq_res[i] = sc->msix_intr_tag[i] = NULL;
876                 sc->msix_irq_rid[i] = 0;
877         }
878
879         if (sc->intr_tag) {
880                 KASSERT(sc->irq_res != NULL,
881                         ("%s: half-done interrupt.", __func__));
882
883                 bus_teardown_intr(sc->dev, sc->irq_res, sc->intr_tag);
884                 bus_release_resource(sc->dev, SYS_RES_IRQ, sc->irq_rid,
885                                      sc->irq_res);
886
887                 sc->irq_res = sc->intr_tag = NULL;
888                 sc->irq_rid = 0;
889         }
890 }
891
892 static int
893 cxgb_setup_interrupts(adapter_t *sc)
894 {
895         struct resource *res;
896         void *tag;
897         int i, rid, err, intr_flag = sc->flags & (USING_MSI | USING_MSIX);
898
899         sc->irq_rid = intr_flag ? 1 : 0;
900         sc->irq_res = bus_alloc_resource_any(sc->dev, SYS_RES_IRQ, &sc->irq_rid,
901                                              RF_SHAREABLE | RF_ACTIVE);
902         if (sc->irq_res == NULL) {
903                 device_printf(sc->dev, "Cannot allocate interrupt (%x, %u)\n",
904                               intr_flag, sc->irq_rid);
905                 err = EINVAL;
906                 sc->irq_rid = 0;
907         } else {
908                 err = bus_setup_intr(sc->dev, sc->irq_res,
909                     INTR_MPSAFE | INTR_TYPE_NET, NULL,
910                     sc->cxgb_intr, sc, &sc->intr_tag);
911
912                 if (err) {
913                         device_printf(sc->dev,
914                                       "Cannot set up interrupt (%x, %u, %d)\n",
915                                       intr_flag, sc->irq_rid, err);
916                         bus_release_resource(sc->dev, SYS_RES_IRQ, sc->irq_rid,
917                                              sc->irq_res);
918                         sc->irq_res = sc->intr_tag = NULL;
919                         sc->irq_rid = 0;
920                 }
921         }
922
923         /* That's all for INTx or MSI */
924         if (!(intr_flag & USING_MSIX) || err)
925                 return (err);
926
927         for (i = 0; i < sc->msi_count - 1; i++) {
928                 rid = i + 2;
929                 res = bus_alloc_resource_any(sc->dev, SYS_RES_IRQ, &rid,
930                                              RF_SHAREABLE | RF_ACTIVE);
931                 if (res == NULL) {
932                         device_printf(sc->dev, "Cannot allocate interrupt "
933                                       "for message %d\n", rid);
934                         err = EINVAL;
935                         break;
936                 }
937
938                 err = bus_setup_intr(sc->dev, res, INTR_MPSAFE | INTR_TYPE_NET,
939                                      NULL, t3_intr_msix, &sc->sge.qs[i], &tag);
940                 if (err) {
941                         device_printf(sc->dev, "Cannot set up interrupt "
942                                       "for message %d (%d)\n", rid, err);
943                         bus_release_resource(sc->dev, SYS_RES_IRQ, rid, res);
944                         break;
945                 }
946
947                 sc->msix_irq_rid[i] = rid;
948                 sc->msix_irq_res[i] = res;
949                 sc->msix_intr_tag[i] = tag;
950         }
951
952         if (err)
953                 cxgb_teardown_interrupts(sc);
954
955         return (err);
956 }
957
958
959 static int
960 cxgb_port_probe(device_t dev)
961 {
962         struct port_info *p;
963         char buf[80];
964         const char *desc;
965         
966         p = device_get_softc(dev);
967         desc = p->phy.desc;
968         snprintf(buf, sizeof(buf), "Port %d %s", p->port_id, desc);
969         device_set_desc_copy(dev, buf);
970         return (0);
971 }
972
973
974 static int
975 cxgb_makedev(struct port_info *pi)
976 {
977         
978         pi->port_cdev = make_dev(&cxgb_cdevsw, pi->ifp->if_dunit,
979             UID_ROOT, GID_WHEEL, 0600, if_name(pi->ifp));
980         
981         if (pi->port_cdev == NULL)
982                 return (ENOMEM);
983
984         pi->port_cdev->si_drv1 = (void *)pi;
985         
986         return (0);
987 }
988
989 #define CXGB_CAP (IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_MTU | IFCAP_HWCSUM | \
990     IFCAP_VLAN_HWCSUM | IFCAP_TSO | IFCAP_JUMBO_MTU | IFCAP_LRO | \
991     IFCAP_VLAN_HWTSO | IFCAP_LINKSTATE)
992 #define CXGB_CAP_ENABLE (CXGB_CAP & ~IFCAP_TSO6)
993
994 static int
995 cxgb_port_attach(device_t dev)
996 {
997         struct port_info *p;
998         struct ifnet *ifp;
999         int err;
1000         struct adapter *sc;
1001
1002         p = device_get_softc(dev);
1003         sc = p->adapter;
1004         snprintf(p->lockbuf, PORT_NAME_LEN, "cxgb port lock %d:%d",
1005             device_get_unit(device_get_parent(dev)), p->port_id);
1006         PORT_LOCK_INIT(p, p->lockbuf);
1007
1008         /* Allocate an ifnet object and set it up */
1009         ifp = p->ifp = if_alloc(IFT_ETHER);
1010         if (ifp == NULL) {
1011                 device_printf(dev, "Cannot allocate ifnet\n");
1012                 return (ENOMEM);
1013         }
1014         
1015         if_initname(ifp, device_get_name(dev), device_get_unit(dev));
1016         ifp->if_init = cxgb_init;
1017         ifp->if_softc = p;
1018         ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
1019         ifp->if_ioctl = cxgb_ioctl;
1020         ifp->if_start = cxgb_start;
1021
1022
1023         ifp->if_timer = 0;      /* Disable ifnet watchdog */
1024         ifp->if_watchdog = NULL;
1025
1026         ifp->if_snd.ifq_drv_maxlen = cxgb_snd_queue_len;
1027         IFQ_SET_MAXLEN(&ifp->if_snd, ifp->if_snd.ifq_drv_maxlen);
1028         IFQ_SET_READY(&ifp->if_snd);
1029
1030         ifp->if_capabilities = CXGB_CAP;
1031         ifp->if_capenable = CXGB_CAP_ENABLE;
1032         ifp->if_hwassist = CSUM_TCP | CSUM_UDP | CSUM_IP | CSUM_TSO;
1033
1034         /*
1035          * Disable TSO on 4-port - it isn't supported by the firmware.
1036          */     
1037         if (sc->params.nports > 2) {
1038                 ifp->if_capabilities &= ~(IFCAP_TSO | IFCAP_VLAN_HWTSO);
1039                 ifp->if_capenable &= ~(IFCAP_TSO | IFCAP_VLAN_HWTSO);
1040                 ifp->if_hwassist &= ~CSUM_TSO;
1041         }
1042
1043         ether_ifattach(ifp, p->hw_addr);
1044         ifp->if_transmit = cxgb_transmit;
1045         ifp->if_qflush = cxgb_qflush;
1046
1047 #ifdef DEFAULT_JUMBO
1048         if (sc->params.nports <= 2)
1049                 ifp->if_mtu = ETHERMTU_JUMBO;
1050 #endif
1051         if ((err = cxgb_makedev(p)) != 0) {
1052                 printf("makedev failed %d\n", err);
1053                 return (err);
1054         }
1055
1056         /* Create a list of media supported by this port */
1057         ifmedia_init(&p->media, IFM_IMASK, cxgb_media_change,
1058             cxgb_media_status);
1059         cxgb_build_medialist(p);
1060       
1061         t3_sge_init_port(p);
1062
1063         return (err);
1064 }
1065
1066 /*
1067  * cxgb_port_detach() is called via the device_detach methods when
1068  * cxgb_free() calls the bus_generic_detach.  It is responsible for 
1069  * removing the device from the view of the kernel, i.e. from all 
1070  * interfaces lists etc.  This routine is only called when the driver is 
1071  * being unloaded, not when the link goes down.
1072  */
1073 static int
1074 cxgb_port_detach(device_t dev)
1075 {
1076         struct port_info *p;
1077         struct adapter *sc;
1078         int i;
1079
1080         p = device_get_softc(dev);
1081         sc = p->adapter;
1082
1083         /* Tell cxgb_ioctl and if_init that the port is going away */
1084         ADAPTER_LOCK(sc);
1085         SET_DOOMED(p);
1086         wakeup(&sc->flags);
1087         while (IS_BUSY(sc))
1088                 mtx_sleep(&sc->flags, &sc->lock, 0, "cxgbdtch", 0);
1089         SET_BUSY(sc);
1090         ADAPTER_UNLOCK(sc);
1091
1092         if (p->port_cdev != NULL)
1093                 destroy_dev(p->port_cdev);
1094
1095         cxgb_uninit_synchronized(p);
1096         ether_ifdetach(p->ifp);
1097
1098         for (i = p->first_qset; i < p->first_qset + p->nqsets; i++) {
1099                 struct sge_qset *qs = &sc->sge.qs[i];
1100                 struct sge_txq *txq = &qs->txq[TXQ_ETH];
1101
1102                 callout_drain(&txq->txq_watchdog);
1103                 callout_drain(&txq->txq_timer);
1104         }
1105
1106         PORT_LOCK_DEINIT(p);
1107         if_free(p->ifp);
1108         p->ifp = NULL;
1109
1110         ADAPTER_LOCK(sc);
1111         CLR_BUSY(sc);
1112         wakeup_one(&sc->flags);
1113         ADAPTER_UNLOCK(sc);
1114         return (0);
1115 }
1116
1117 void
1118 t3_fatal_err(struct adapter *sc)
1119 {
1120         u_int fw_status[4];
1121
1122         if (sc->flags & FULL_INIT_DONE) {
1123                 t3_sge_stop(sc);
1124                 t3_write_reg(sc, A_XGM_TX_CTRL, 0);
1125                 t3_write_reg(sc, A_XGM_RX_CTRL, 0);
1126                 t3_write_reg(sc, XGM_REG(A_XGM_TX_CTRL, 1), 0);
1127                 t3_write_reg(sc, XGM_REG(A_XGM_RX_CTRL, 1), 0);
1128                 t3_intr_disable(sc);
1129         }
1130         device_printf(sc->dev,"encountered fatal error, operation suspended\n");
1131         if (!t3_cim_ctl_blk_read(sc, 0xa0, 4, fw_status))
1132                 device_printf(sc->dev, "FW_ status: 0x%x, 0x%x, 0x%x, 0x%x\n",
1133                     fw_status[0], fw_status[1], fw_status[2], fw_status[3]);
1134 }
1135
1136 int
1137 t3_os_find_pci_capability(adapter_t *sc, int cap)
1138 {
1139         device_t dev;
1140         struct pci_devinfo *dinfo;
1141         pcicfgregs *cfg;
1142         uint32_t status;
1143         uint8_t ptr;
1144
1145         dev = sc->dev;
1146         dinfo = device_get_ivars(dev);
1147         cfg = &dinfo->cfg;
1148
1149         status = pci_read_config(dev, PCIR_STATUS, 2);
1150         if (!(status & PCIM_STATUS_CAPPRESENT))
1151                 return (0);
1152
1153         switch (cfg->hdrtype & PCIM_HDRTYPE) {
1154         case 0:
1155         case 1:
1156                 ptr = PCIR_CAP_PTR;
1157                 break;
1158         case 2:
1159                 ptr = PCIR_CAP_PTR_2;
1160                 break;
1161         default:
1162                 return (0);
1163                 break;
1164         }
1165         ptr = pci_read_config(dev, ptr, 1);
1166
1167         while (ptr != 0) {
1168                 if (pci_read_config(dev, ptr + PCICAP_ID, 1) == cap)
1169                         return (ptr);
1170                 ptr = pci_read_config(dev, ptr + PCICAP_NEXTPTR, 1);
1171         }
1172
1173         return (0);
1174 }
1175
1176 int
1177 t3_os_pci_save_state(struct adapter *sc)
1178 {
1179         device_t dev;
1180         struct pci_devinfo *dinfo;
1181
1182         dev = sc->dev;
1183         dinfo = device_get_ivars(dev);
1184
1185         pci_cfg_save(dev, dinfo, 0);
1186         return (0);
1187 }
1188
1189 int
1190 t3_os_pci_restore_state(struct adapter *sc)
1191 {
1192         device_t dev;
1193         struct pci_devinfo *dinfo;
1194
1195         dev = sc->dev;
1196         dinfo = device_get_ivars(dev);
1197
1198         pci_cfg_restore(dev, dinfo);
1199         return (0);
1200 }
1201
1202 /**
1203  *      t3_os_link_changed - handle link status changes
1204  *      @sc: the adapter associated with the link change
1205  *      @port_id: the port index whose link status has changed
1206  *      @link_status: the new status of the link
1207  *      @speed: the new speed setting
1208  *      @duplex: the new duplex setting
1209  *      @fc: the new flow-control setting
1210  *
1211  *      This is the OS-dependent handler for link status changes.  The OS
1212  *      neutral handler takes care of most of the processing for these events,
1213  *      then calls this handler for any OS-specific processing.
1214  */
1215 void
1216 t3_os_link_changed(adapter_t *adapter, int port_id, int link_status, int speed,
1217      int duplex, int fc, int mac_was_reset)
1218 {
1219         struct port_info *pi = &adapter->port[port_id];
1220         struct ifnet *ifp = pi->ifp;
1221
1222         /* no race with detach, so ifp should always be good */
1223         KASSERT(ifp, ("%s: if detached.", __func__));
1224
1225         /* Reapply mac settings if they were lost due to a reset */
1226         if (mac_was_reset) {
1227                 PORT_LOCK(pi);
1228                 cxgb_update_mac_settings(pi);
1229                 PORT_UNLOCK(pi);
1230         }
1231
1232         if (link_status) {
1233                 ifp->if_baudrate = IF_Mbps(speed);
1234                 if_link_state_change(ifp, LINK_STATE_UP);
1235         } else
1236                 if_link_state_change(ifp, LINK_STATE_DOWN);
1237 }
1238
1239 /**
1240  *      t3_os_phymod_changed - handle PHY module changes
1241  *      @phy: the PHY reporting the module change
1242  *      @mod_type: new module type
1243  *
1244  *      This is the OS-dependent handler for PHY module changes.  It is
1245  *      invoked when a PHY module is removed or inserted for any OS-specific
1246  *      processing.
1247  */
1248 void t3_os_phymod_changed(struct adapter *adap, int port_id)
1249 {
1250         static const char *mod_str[] = {
1251                 NULL, "SR", "LR", "LRM", "TWINAX", "TWINAX-L", "unknown"
1252         };
1253         struct port_info *pi = &adap->port[port_id];
1254         int mod = pi->phy.modtype;
1255
1256         if (mod != pi->media.ifm_cur->ifm_data)
1257                 cxgb_build_medialist(pi);
1258
1259         if (mod == phy_modtype_none)
1260                 if_printf(pi->ifp, "PHY module unplugged\n");
1261         else {
1262                 KASSERT(mod < ARRAY_SIZE(mod_str),
1263                         ("invalid PHY module type %d", mod));
1264                 if_printf(pi->ifp, "%s PHY module inserted\n", mod_str[mod]);
1265         }
1266 }
1267
1268 /*
1269  * Interrupt-context handler for external (PHY) interrupts.
1270  */
1271 void
1272 t3_os_ext_intr_handler(adapter_t *sc)
1273 {
1274         if (cxgb_debug)
1275                 printf("t3_os_ext_intr_handler\n");
1276         /*
1277          * Schedule a task to handle external interrupts as they may be slow
1278          * and we use a mutex to protect MDIO registers.  We disable PHY
1279          * interrupts in the meantime and let the task reenable them when
1280          * it's done.
1281          */
1282         if (sc->slow_intr_mask) {
1283                 ADAPTER_LOCK(sc);
1284                 sc->slow_intr_mask &= ~F_T3DBG;
1285                 t3_write_reg(sc, A_PL_INT_ENABLE0, sc->slow_intr_mask);
1286                 taskqueue_enqueue(sc->tq, &sc->ext_intr_task);
1287                 ADAPTER_UNLOCK(sc);
1288         }
1289 }
1290
1291 void
1292 t3_os_set_hw_addr(adapter_t *adapter, int port_idx, u8 hw_addr[])
1293 {
1294
1295         /*
1296          * The ifnet might not be allocated before this gets called,
1297          * as this is called early on in attach by t3_prep_adapter
1298          * save the address off in the port structure
1299          */
1300         if (cxgb_debug)
1301                 printf("set_hw_addr on idx %d addr %6D\n", port_idx, hw_addr, ":");
1302         bcopy(hw_addr, adapter->port[port_idx].hw_addr, ETHER_ADDR_LEN);
1303 }
1304
1305 /*
1306  * Programs the XGMAC based on the settings in the ifnet.  These settings
1307  * include MTU, MAC address, mcast addresses, etc.
1308  */
1309 static void
1310 cxgb_update_mac_settings(struct port_info *p)
1311 {
1312         struct ifnet *ifp = p->ifp;
1313         struct t3_rx_mode rm;
1314         struct cmac *mac = &p->mac;
1315         int mtu, hwtagging;
1316
1317         PORT_LOCK_ASSERT_OWNED(p);
1318
1319         bcopy(IF_LLADDR(ifp), p->hw_addr, ETHER_ADDR_LEN);
1320
1321         mtu = ifp->if_mtu;
1322         if (ifp->if_capenable & IFCAP_VLAN_MTU)
1323                 mtu += ETHER_VLAN_ENCAP_LEN;
1324
1325         hwtagging = (ifp->if_capenable & IFCAP_VLAN_HWTAGGING) != 0;
1326
1327         t3_mac_set_mtu(mac, mtu);
1328         t3_set_vlan_accel(p->adapter, 1 << p->tx_chan, hwtagging);
1329         t3_mac_set_address(mac, 0, p->hw_addr);
1330         t3_init_rx_mode(&rm, p);
1331         t3_mac_set_rx_mode(mac, &rm);
1332 }
1333
1334
1335 static int
1336 await_mgmt_replies(struct adapter *adap, unsigned long init_cnt,
1337                               unsigned long n)
1338 {
1339         int attempts = 5;
1340
1341         while (adap->sge.qs[0].rspq.offload_pkts < init_cnt + n) {
1342                 if (!--attempts)
1343                         return (ETIMEDOUT);
1344                 t3_os_sleep(10);
1345         }
1346         return 0;
1347 }
1348
1349 static int
1350 init_tp_parity(struct adapter *adap)
1351 {
1352         int i;
1353         struct mbuf *m;
1354         struct cpl_set_tcb_field *greq;
1355         unsigned long cnt = adap->sge.qs[0].rspq.offload_pkts;
1356
1357         t3_tp_set_offload_mode(adap, 1);
1358
1359         for (i = 0; i < 16; i++) {
1360                 struct cpl_smt_write_req *req;
1361
1362                 m = m_gethdr(M_WAITOK, MT_DATA);
1363                 req = mtod(m, struct cpl_smt_write_req *);
1364                 m->m_len = m->m_pkthdr.len = sizeof(*req);
1365                 memset(req, 0, sizeof(*req));
1366                 req->wr.wrh_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD));
1367                 OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_SMT_WRITE_REQ, i));
1368                 req->iff = i;
1369                 t3_mgmt_tx(adap, m);
1370         }
1371
1372         for (i = 0; i < 2048; i++) {
1373                 struct cpl_l2t_write_req *req;
1374
1375                 m = m_gethdr(M_WAITOK, MT_DATA);
1376                 req = mtod(m, struct cpl_l2t_write_req *);
1377                 m->m_len = m->m_pkthdr.len = sizeof(*req);
1378                 memset(req, 0, sizeof(*req));
1379                 req->wr.wrh_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD));
1380                 OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_L2T_WRITE_REQ, i));
1381                 req->params = htonl(V_L2T_W_IDX(i));
1382                 t3_mgmt_tx(adap, m);
1383         }
1384
1385         for (i = 0; i < 2048; i++) {
1386                 struct cpl_rte_write_req *req;
1387
1388                 m = m_gethdr(M_WAITOK, MT_DATA);
1389                 req = mtod(m, struct cpl_rte_write_req *);
1390                 m->m_len = m->m_pkthdr.len = sizeof(*req);
1391                 memset(req, 0, sizeof(*req));
1392                 req->wr.wrh_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD));
1393                 OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_RTE_WRITE_REQ, i));
1394                 req->l2t_idx = htonl(V_L2T_W_IDX(i));
1395                 t3_mgmt_tx(adap, m);
1396         }
1397
1398         m = m_gethdr(M_WAITOK, MT_DATA);
1399         greq = mtod(m, struct cpl_set_tcb_field *);
1400         m->m_len = m->m_pkthdr.len = sizeof(*greq);
1401         memset(greq, 0, sizeof(*greq));
1402         greq->wr.wrh_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD));
1403         OPCODE_TID(greq) = htonl(MK_OPCODE_TID(CPL_SET_TCB_FIELD, 0));
1404         greq->mask = htobe64(1);
1405         t3_mgmt_tx(adap, m);
1406
1407         i = await_mgmt_replies(adap, cnt, 16 + 2048 + 2048 + 1);
1408         t3_tp_set_offload_mode(adap, 0);
1409         return (i);
1410 }
1411
1412 /**
1413  *      setup_rss - configure Receive Side Steering (per-queue connection demux) 
1414  *      @adap: the adapter
1415  *
1416  *      Sets up RSS to distribute packets to multiple receive queues.  We
1417  *      configure the RSS CPU lookup table to distribute to the number of HW
1418  *      receive queues, and the response queue lookup table to narrow that
1419  *      down to the response queues actually configured for each port.
1420  *      We always configure the RSS mapping for two ports since the mapping
1421  *      table has plenty of entries.
1422  */
1423 static void
1424 setup_rss(adapter_t *adap)
1425 {
1426         int i;
1427         u_int nq[2]; 
1428         uint8_t cpus[SGE_QSETS + 1];
1429         uint16_t rspq_map[RSS_TABLE_SIZE];
1430         
1431         for (i = 0; i < SGE_QSETS; ++i)
1432                 cpus[i] = i;
1433         cpus[SGE_QSETS] = 0xff;
1434
1435         nq[0] = nq[1] = 0;
1436         for_each_port(adap, i) {
1437                 const struct port_info *pi = adap2pinfo(adap, i);
1438
1439                 nq[pi->tx_chan] += pi->nqsets;
1440         }
1441         for (i = 0; i < RSS_TABLE_SIZE / 2; ++i) {
1442                 rspq_map[i] = nq[0] ? i % nq[0] : 0;
1443                 rspq_map[i + RSS_TABLE_SIZE / 2] = nq[1] ? i % nq[1] + nq[0] : 0;
1444         }
1445
1446         /* Calculate the reverse RSS map table */
1447         for (i = 0; i < SGE_QSETS; ++i)
1448                 adap->rrss_map[i] = 0xff;
1449         for (i = 0; i < RSS_TABLE_SIZE; ++i)
1450                 if (adap->rrss_map[rspq_map[i]] == 0xff)
1451                         adap->rrss_map[rspq_map[i]] = i;
1452
1453         t3_config_rss(adap, F_RQFEEDBACKENABLE | F_TNLLKPEN | F_TNLMAPEN |
1454                       F_TNLPRTEN | F_TNL2TUPEN | F_TNL4TUPEN | F_OFDMAPEN |
1455                       F_RRCPLMAPEN | V_RRCPLCPUSIZE(6) | F_HASHTOEPLITZ,
1456                       cpus, rspq_map);
1457
1458 }
1459
1460 /*
1461  * Sends an mbuf to an offload queue driver
1462  * after dealing with any active network taps.
1463  */
1464 static inline int
1465 offload_tx(struct t3cdev *tdev, struct mbuf *m)
1466 {
1467         int ret;
1468
1469         ret = t3_offload_tx(tdev, m);
1470         return (ret);
1471 }
1472
1473 static int
1474 write_smt_entry(struct adapter *adapter, int idx)
1475 {
1476         struct port_info *pi = &adapter->port[idx];
1477         struct cpl_smt_write_req *req;
1478         struct mbuf *m;
1479
1480         if ((m = m_gethdr(M_NOWAIT, MT_DATA)) == NULL)
1481                 return (ENOMEM);
1482
1483         req = mtod(m, struct cpl_smt_write_req *);
1484         m->m_pkthdr.len = m->m_len = sizeof(struct cpl_smt_write_req);
1485         
1486         req->wr.wrh_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD));
1487         OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_SMT_WRITE_REQ, idx));
1488         req->mtu_idx = NMTUS - 1;  /* should be 0 but there's a T3 bug */
1489         req->iff = idx;
1490         memset(req->src_mac1, 0, sizeof(req->src_mac1));
1491         memcpy(req->src_mac0, pi->hw_addr, ETHER_ADDR_LEN);
1492
1493         m_set_priority(m, 1);
1494
1495         offload_tx(&adapter->tdev, m);
1496
1497         return (0);
1498 }
1499
1500 static int
1501 init_smt(struct adapter *adapter)
1502 {
1503         int i;
1504
1505         for_each_port(adapter, i)
1506                 write_smt_entry(adapter, i);
1507         return 0;
1508 }
1509
1510 static void
1511 init_port_mtus(adapter_t *adapter)
1512 {
1513         unsigned int mtus = ETHERMTU | (ETHERMTU << 16);
1514
1515         t3_write_reg(adapter, A_TP_MTU_PORT_TABLE, mtus);
1516 }
1517
1518 static void
1519 send_pktsched_cmd(struct adapter *adap, int sched, int qidx, int lo,
1520                               int hi, int port)
1521 {
1522         struct mbuf *m;
1523         struct mngt_pktsched_wr *req;
1524
1525         m = m_gethdr(M_DONTWAIT, MT_DATA);
1526         if (m) {        
1527                 req = mtod(m, struct mngt_pktsched_wr *);
1528                 req->wr.wrh_hi = htonl(V_WR_OP(FW_WROPCODE_MNGT));
1529                 req->mngt_opcode = FW_MNGTOPCODE_PKTSCHED_SET;
1530                 req->sched = sched;
1531                 req->idx = qidx;
1532                 req->min = lo;
1533                 req->max = hi;
1534                 req->binding = port;
1535                 m->m_len = m->m_pkthdr.len = sizeof(*req);
1536                 t3_mgmt_tx(adap, m);
1537         }
1538 }
1539
1540 static void
1541 bind_qsets(adapter_t *sc)
1542 {
1543         int i, j;
1544
1545         for (i = 0; i < (sc)->params.nports; ++i) {
1546                 const struct port_info *pi = adap2pinfo(sc, i);
1547
1548                 for (j = 0; j < pi->nqsets; ++j) {
1549                         send_pktsched_cmd(sc, 1, pi->first_qset + j, -1,
1550                                           -1, pi->tx_chan);
1551
1552                 }
1553         }
1554 }
1555
1556 static void
1557 update_tpeeprom(struct adapter *adap)
1558 {
1559         const struct firmware *tpeeprom;
1560
1561         uint32_t version;
1562         unsigned int major, minor;
1563         int ret, len;
1564         char rev, name[32];
1565
1566         t3_seeprom_read(adap, TP_SRAM_OFFSET, &version);
1567
1568         major = G_TP_VERSION_MAJOR(version);
1569         minor = G_TP_VERSION_MINOR(version);
1570         if (major == TP_VERSION_MAJOR  && minor == TP_VERSION_MINOR)
1571                 return; 
1572
1573         rev = t3rev2char(adap);
1574         snprintf(name, sizeof(name), TPEEPROM_NAME, rev);
1575
1576         tpeeprom = firmware_get(name);
1577         if (tpeeprom == NULL) {
1578                 device_printf(adap->dev,
1579                               "could not load TP EEPROM: unable to load %s\n",
1580                               name);
1581                 return;
1582         }
1583
1584         len = tpeeprom->datasize - 4;
1585         
1586         ret = t3_check_tpsram(adap, tpeeprom->data, tpeeprom->datasize);
1587         if (ret)
1588                 goto release_tpeeprom;
1589
1590         if (len != TP_SRAM_LEN) {
1591                 device_printf(adap->dev,
1592                               "%s length is wrong len=%d expected=%d\n", name,
1593                               len, TP_SRAM_LEN);
1594                 return;
1595         }
1596         
1597         ret = set_eeprom(&adap->port[0], tpeeprom->data, tpeeprom->datasize,
1598             TP_SRAM_OFFSET);
1599         
1600         if (!ret) {
1601                 device_printf(adap->dev,
1602                         "Protocol SRAM image updated in EEPROM to %d.%d.%d\n",
1603                          TP_VERSION_MAJOR, TP_VERSION_MINOR, TP_VERSION_MICRO);
1604         } else 
1605                 device_printf(adap->dev,
1606                               "Protocol SRAM image update in EEPROM failed\n");
1607
1608 release_tpeeprom:
1609         firmware_put(tpeeprom, FIRMWARE_UNLOAD);
1610         
1611         return;
1612 }
1613
1614 static int
1615 update_tpsram(struct adapter *adap)
1616 {
1617         const struct firmware *tpsram;
1618         int ret;
1619         char rev, name[32];
1620
1621         rev = t3rev2char(adap);
1622         snprintf(name, sizeof(name), TPSRAM_NAME, rev);
1623
1624         update_tpeeprom(adap);
1625
1626         tpsram = firmware_get(name);
1627         if (tpsram == NULL){
1628                 device_printf(adap->dev, "could not load TP SRAM\n");
1629                 return (EINVAL);
1630         } else
1631                 device_printf(adap->dev, "updating TP SRAM\n");
1632         
1633         ret = t3_check_tpsram(adap, tpsram->data, tpsram->datasize);
1634         if (ret)
1635                 goto release_tpsram;    
1636
1637         ret = t3_set_proto_sram(adap, tpsram->data);
1638         if (ret)
1639                 device_printf(adap->dev, "loading protocol SRAM failed\n");
1640
1641 release_tpsram:
1642         firmware_put(tpsram, FIRMWARE_UNLOAD);
1643         
1644         return ret;
1645 }
1646
1647 /**
1648  *      cxgb_up - enable the adapter
1649  *      @adap: adapter being enabled
1650  *
1651  *      Called when the first port is enabled, this function performs the
1652  *      actions necessary to make an adapter operational, such as completing
1653  *      the initialization of HW modules, and enabling interrupts.
1654  */
1655 static int
1656 cxgb_up(struct adapter *sc)
1657 {
1658         int err = 0;
1659
1660         KASSERT(sc->open_device_map == 0, ("%s: device(s) already open (%x)",
1661                                            __func__, sc->open_device_map));
1662
1663         if ((sc->flags & FULL_INIT_DONE) == 0) {
1664
1665                 ADAPTER_LOCK_ASSERT_NOTOWNED(sc);
1666
1667                 if ((sc->flags & FW_UPTODATE) == 0)
1668                         if ((err = upgrade_fw(sc)))
1669                                 goto out;
1670
1671                 if ((sc->flags & TPS_UPTODATE) == 0)
1672                         if ((err = update_tpsram(sc)))
1673                                 goto out;
1674
1675                 if (is_offload(sc)) {
1676                         sc->params.mc5.nservers = 0;
1677                         sc->params.mc5.nroutes = 0;
1678                         sc->params.mc5.nfilters = t3_mc5_size(&sc->mc5) -
1679                             MC5_MIN_TIDS;
1680                 }
1681
1682                 err = t3_init_hw(sc, 0);
1683                 if (err)
1684                         goto out;
1685
1686                 t3_set_reg_field(sc, A_TP_PARA_REG5, 0, F_RXDDPOFFINIT);
1687                 t3_write_reg(sc, A_ULPRX_TDDP_PSZ, V_HPZ0(PAGE_SHIFT - 12));
1688
1689                 err = setup_sge_qsets(sc);
1690                 if (err)
1691                         goto out;
1692
1693                 alloc_filters(sc);
1694                 setup_rss(sc);
1695
1696                 t3_intr_clear(sc);
1697                 err = cxgb_setup_interrupts(sc);
1698                 if (err)
1699                         goto out;
1700
1701                 t3_add_configured_sysctls(sc);
1702                 sc->flags |= FULL_INIT_DONE;
1703         }
1704
1705         t3_intr_clear(sc);
1706         t3_sge_start(sc);
1707         t3_intr_enable(sc);
1708
1709         if (sc->params.rev >= T3_REV_C && !(sc->flags & TP_PARITY_INIT) &&
1710             is_offload(sc) && init_tp_parity(sc) == 0)
1711                 sc->flags |= TP_PARITY_INIT;
1712
1713         if (sc->flags & TP_PARITY_INIT) {
1714                 t3_write_reg(sc, A_TP_INT_CAUSE, F_CMCACHEPERR | F_ARPLUTPERR);
1715                 t3_write_reg(sc, A_TP_INT_ENABLE, 0x7fbfffff);
1716         }
1717         
1718         if (!(sc->flags & QUEUES_BOUND)) {
1719                 bind_qsets(sc);
1720                 setup_hw_filters(sc);
1721                 sc->flags |= QUEUES_BOUND;              
1722         }
1723
1724         t3_sge_reset_adapter(sc);
1725 out:
1726         return (err);
1727 }
1728
1729 /*
1730  * Called when the last open device is closed.  Does NOT undo all of cxgb_up's
1731  * work.  Specifically, the resources grabbed under FULL_INIT_DONE are released
1732  * during controller_detach, not here.
1733  */
1734 static void
1735 cxgb_down(struct adapter *sc)
1736 {
1737         t3_sge_stop(sc);
1738         t3_intr_disable(sc);
1739 }
1740
1741 static int
1742 offload_open(struct port_info *pi)
1743 {
1744         struct adapter *sc = pi->adapter;
1745         struct t3cdev *tdev = &sc->tdev;
1746
1747         setbit(&sc->open_device_map, OFFLOAD_DEVMAP_BIT);
1748
1749         t3_tp_set_offload_mode(sc, 1);
1750         tdev->lldev = pi->ifp;
1751         init_port_mtus(sc);
1752         t3_load_mtus(sc, sc->params.mtus, sc->params.a_wnd, sc->params.b_wnd,
1753                      sc->params.rev == 0 ?  sc->port[0].ifp->if_mtu : 0xffff);
1754         init_smt(sc);
1755         cxgb_add_clients(tdev);
1756
1757         return (0);
1758 }
1759
1760 static int
1761 offload_close(struct t3cdev *tdev)
1762 {
1763         struct adapter *adapter = tdev2adap(tdev);
1764
1765         if (!isset(&adapter->open_device_map, OFFLOAD_DEVMAP_BIT))
1766                 return (0);
1767
1768         /* Call back all registered clients */
1769         cxgb_remove_clients(tdev);
1770
1771         tdev->lldev = NULL;
1772         cxgb_set_dummy_ops(tdev);
1773         t3_tp_set_offload_mode(adapter, 0);
1774
1775         clrbit(&adapter->open_device_map, OFFLOAD_DEVMAP_BIT);
1776
1777         return (0);
1778 }
1779
1780 /*
1781  * if_init for cxgb ports.
1782  */
1783 static void
1784 cxgb_init(void *arg)
1785 {
1786         struct port_info *p = arg;
1787         struct adapter *sc = p->adapter;
1788
1789         ADAPTER_LOCK(sc);
1790         cxgb_init_locked(p); /* releases adapter lock */
1791         ADAPTER_LOCK_ASSERT_NOTOWNED(sc);
1792 }
1793
1794 static int
1795 cxgb_init_locked(struct port_info *p)
1796 {
1797         struct adapter *sc = p->adapter;
1798         struct ifnet *ifp = p->ifp;
1799         struct cmac *mac = &p->mac;
1800         int i, rc = 0, may_sleep = 0;
1801
1802         ADAPTER_LOCK_ASSERT_OWNED(sc);
1803
1804         while (!IS_DOOMED(p) && IS_BUSY(sc)) {
1805                 if (mtx_sleep(&sc->flags, &sc->lock, PCATCH, "cxgbinit", 0)) {
1806                         rc = EINTR;
1807                         goto done;
1808                 }
1809         }
1810         if (IS_DOOMED(p)) {
1811                 rc = ENXIO;
1812                 goto done;
1813         }
1814         KASSERT(!IS_BUSY(sc), ("%s: controller busy.", __func__));
1815
1816         /*
1817          * The code that runs during one-time adapter initialization can sleep
1818          * so it's important not to hold any locks across it.
1819          */
1820         may_sleep = sc->flags & FULL_INIT_DONE ? 0 : 1;
1821
1822         if (may_sleep) {
1823                 SET_BUSY(sc);
1824                 ADAPTER_UNLOCK(sc);
1825         }
1826
1827         if (sc->open_device_map == 0) {
1828                 if ((rc = cxgb_up(sc)) != 0)
1829                         goto done;
1830
1831                 if (is_offload(sc) && !ofld_disable && offload_open(p))
1832                         log(LOG_WARNING,
1833                             "Could not initialize offload capabilities\n");
1834         }
1835
1836         PORT_LOCK(p);
1837         if (isset(&sc->open_device_map, p->port_id) &&
1838             (ifp->if_drv_flags & IFF_DRV_RUNNING)) {
1839                 PORT_UNLOCK(p);
1840                 goto done;
1841         }
1842         t3_port_intr_enable(sc, p->port_id);
1843         if (!mac->multiport) 
1844                 t3_mac_init(mac);
1845         cxgb_update_mac_settings(p);
1846         t3_link_start(&p->phy, mac, &p->link_config);
1847         t3_mac_enable(mac, MAC_DIRECTION_RX | MAC_DIRECTION_TX);
1848         ifp->if_drv_flags |= IFF_DRV_RUNNING;
1849         ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
1850         PORT_UNLOCK(p);
1851
1852         t3_link_changed(sc, p->port_id);
1853
1854         for (i = p->first_qset; i < p->first_qset + p->nqsets; i++) {
1855                 struct sge_qset *qs = &sc->sge.qs[i];
1856                 struct sge_txq *txq = &qs->txq[TXQ_ETH];
1857
1858                 callout_reset_on(&txq->txq_watchdog, hz, cxgb_tx_watchdog, qs,
1859                                  txq->txq_watchdog.c_cpu);
1860         }
1861
1862         /* all ok */
1863         setbit(&sc->open_device_map, p->port_id);
1864
1865 done:
1866         if (may_sleep) {
1867                 ADAPTER_LOCK(sc);
1868                 KASSERT(IS_BUSY(sc), ("%s: controller not busy.", __func__));
1869                 CLR_BUSY(sc);
1870                 wakeup_one(&sc->flags);
1871         }
1872         ADAPTER_UNLOCK(sc);
1873         return (rc);
1874 }
1875
1876 static int
1877 cxgb_uninit_locked(struct port_info *p)
1878 {
1879         struct adapter *sc = p->adapter;
1880         int rc;
1881
1882         ADAPTER_LOCK_ASSERT_OWNED(sc);
1883
1884         while (!IS_DOOMED(p) && IS_BUSY(sc)) {
1885                 if (mtx_sleep(&sc->flags, &sc->lock, PCATCH, "cxgbunin", 0)) {
1886                         rc = EINTR;
1887                         goto done;
1888                 }
1889         }
1890         if (IS_DOOMED(p)) {
1891                 rc = ENXIO;
1892                 goto done;
1893         }
1894         KASSERT(!IS_BUSY(sc), ("%s: controller busy.", __func__));
1895         SET_BUSY(sc);
1896         ADAPTER_UNLOCK(sc);
1897
1898         rc = cxgb_uninit_synchronized(p);
1899
1900         ADAPTER_LOCK(sc);
1901         KASSERT(IS_BUSY(sc), ("%s: controller not busy.", __func__));
1902         CLR_BUSY(sc);
1903         wakeup_one(&sc->flags);
1904 done:
1905         ADAPTER_UNLOCK(sc);
1906         return (rc);
1907 }
1908
1909 /*
1910  * Called on "ifconfig down", and from port_detach
1911  */
1912 static int
1913 cxgb_uninit_synchronized(struct port_info *pi)
1914 {
1915         struct adapter *sc = pi->adapter;
1916         struct ifnet *ifp = pi->ifp;
1917
1918         /*
1919          * taskqueue_drain may cause a deadlock if the adapter lock is held.
1920          */
1921         ADAPTER_LOCK_ASSERT_NOTOWNED(sc);
1922
1923         /*
1924          * Clear this port's bit from the open device map, and then drain all
1925          * the tasks that can access/manipulate this port's port_info or ifp.
1926          * We disable this port's interrupts here and so the the slow/ext
1927          * interrupt tasks won't be enqueued.  The tick task will continue to
1928          * be enqueued every second but the runs after this drain will not see
1929          * this port in the open device map.
1930          *
1931          * A well behaved task must take open_device_map into account and ignore
1932          * ports that are not open.
1933          */
1934         clrbit(&sc->open_device_map, pi->port_id);
1935         t3_port_intr_disable(sc, pi->port_id);
1936         taskqueue_drain(sc->tq, &sc->slow_intr_task);
1937         taskqueue_drain(sc->tq, &sc->ext_intr_task);
1938         taskqueue_drain(sc->tq, &sc->tick_task);
1939
1940         PORT_LOCK(pi);
1941         ifp->if_drv_flags &= ~(IFF_DRV_RUNNING | IFF_DRV_OACTIVE);
1942
1943         /* disable pause frames */
1944         t3_set_reg_field(sc, A_XGM_TX_CFG + pi->mac.offset, F_TXPAUSEEN, 0);
1945
1946         /* Reset RX FIFO HWM */
1947         t3_set_reg_field(sc, A_XGM_RXFIFO_CFG +  pi->mac.offset,
1948                          V_RXFIFOPAUSEHWM(M_RXFIFOPAUSEHWM), 0);
1949
1950         DELAY(100 * 1000);
1951
1952         /* Wait for TXFIFO empty */
1953         t3_wait_op_done(sc, A_XGM_TXFIFO_CFG + pi->mac.offset,
1954                         F_TXFIFO_EMPTY, 1, 20, 5);
1955
1956         DELAY(100 * 1000);
1957         t3_mac_disable(&pi->mac, MAC_DIRECTION_RX);
1958
1959
1960         pi->phy.ops->power_down(&pi->phy, 1);
1961
1962         PORT_UNLOCK(pi);
1963
1964         pi->link_config.link_ok = 0;
1965         t3_os_link_changed(sc, pi->port_id, 0, 0, 0, 0, 0);
1966
1967         if ((sc->open_device_map & PORT_MASK) == 0)
1968                 offload_close(&sc->tdev);
1969
1970         if (sc->open_device_map == 0)
1971                 cxgb_down(pi->adapter);
1972
1973         return (0);
1974 }
1975
1976 /*
1977  * Mark lro enabled or disabled in all qsets for this port
1978  */
1979 static int
1980 cxgb_set_lro(struct port_info *p, int enabled)
1981 {
1982         int i;
1983         struct adapter *adp = p->adapter;
1984         struct sge_qset *q;
1985
1986         for (i = 0; i < p->nqsets; i++) {
1987                 q = &adp->sge.qs[p->first_qset + i];
1988                 q->lro.enabled = (enabled != 0);
1989         }
1990         return (0);
1991 }
1992
1993 static int
1994 cxgb_ioctl(struct ifnet *ifp, unsigned long command, caddr_t data)
1995 {
1996         struct port_info *p = ifp->if_softc;
1997         struct adapter *sc = p->adapter;
1998         struct ifreq *ifr = (struct ifreq *)data;
1999         int flags, error = 0, mtu;
2000         uint32_t mask;
2001
2002         switch (command) {
2003         case SIOCSIFMTU:
2004                 ADAPTER_LOCK(sc);
2005                 error = IS_DOOMED(p) ? ENXIO : (IS_BUSY(sc) ? EBUSY : 0);
2006                 if (error) {
2007 fail:
2008                         ADAPTER_UNLOCK(sc);
2009                         return (error);
2010                 }
2011
2012                 mtu = ifr->ifr_mtu;
2013                 if ((mtu < ETHERMIN) || (mtu > ETHERMTU_JUMBO)) {
2014                         error = EINVAL;
2015                 } else {
2016                         ifp->if_mtu = mtu;
2017                         PORT_LOCK(p);
2018                         cxgb_update_mac_settings(p);
2019                         PORT_UNLOCK(p);
2020                 }
2021                 ADAPTER_UNLOCK(sc);
2022                 break;
2023         case SIOCSIFFLAGS:
2024                 ADAPTER_LOCK(sc);
2025                 if (IS_DOOMED(p)) {
2026                         error = ENXIO;
2027                         goto fail;
2028                 }
2029                 if (ifp->if_flags & IFF_UP) {
2030                         if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
2031                                 flags = p->if_flags;
2032                                 if (((ifp->if_flags ^ flags) & IFF_PROMISC) ||
2033                                     ((ifp->if_flags ^ flags) & IFF_ALLMULTI)) {
2034                                         if (IS_BUSY(sc)) {
2035                                                 error = EBUSY;
2036                                                 goto fail;
2037                                         }
2038                                         PORT_LOCK(p);
2039                                         cxgb_update_mac_settings(p);
2040                                         PORT_UNLOCK(p);
2041                                 }
2042                                 ADAPTER_UNLOCK(sc);
2043                         } else
2044                                 error = cxgb_init_locked(p);
2045                         p->if_flags = ifp->if_flags;
2046                 } else if (ifp->if_drv_flags & IFF_DRV_RUNNING)
2047                         error = cxgb_uninit_locked(p);
2048                 else
2049                         ADAPTER_UNLOCK(sc);
2050
2051                 ADAPTER_LOCK_ASSERT_NOTOWNED(sc);
2052                 break;
2053         case SIOCADDMULTI:
2054         case SIOCDELMULTI:
2055                 ADAPTER_LOCK(sc);
2056                 error = IS_DOOMED(p) ? ENXIO : (IS_BUSY(sc) ? EBUSY : 0);
2057                 if (error)
2058                         goto fail;
2059
2060                 if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
2061                         PORT_LOCK(p);
2062                         cxgb_update_mac_settings(p);
2063                         PORT_UNLOCK(p);
2064                 }
2065                 ADAPTER_UNLOCK(sc);
2066
2067                 break;
2068         case SIOCSIFCAP:
2069                 ADAPTER_LOCK(sc);
2070                 error = IS_DOOMED(p) ? ENXIO : (IS_BUSY(sc) ? EBUSY : 0);
2071                 if (error)
2072                         goto fail;
2073
2074                 mask = ifr->ifr_reqcap ^ ifp->if_capenable;
2075                 if (mask & IFCAP_TXCSUM) {
2076                         ifp->if_capenable ^= IFCAP_TXCSUM;
2077                         ifp->if_hwassist ^= (CSUM_TCP | CSUM_UDP | CSUM_IP);
2078
2079                         if (IFCAP_TSO & ifp->if_capenable &&
2080                             !(IFCAP_TXCSUM & ifp->if_capenable)) {
2081                                 ifp->if_capenable &= ~IFCAP_TSO;
2082                                 ifp->if_hwassist &= ~CSUM_TSO;
2083                                 if_printf(ifp,
2084                                     "tso disabled due to -txcsum.\n");
2085                         }
2086                 }
2087                 if (mask & IFCAP_RXCSUM)
2088                         ifp->if_capenable ^= IFCAP_RXCSUM;
2089                 if (mask & IFCAP_TSO4) {
2090                         ifp->if_capenable ^= IFCAP_TSO4;
2091
2092                         if (IFCAP_TSO & ifp->if_capenable) {
2093                                 if (IFCAP_TXCSUM & ifp->if_capenable)
2094                                         ifp->if_hwassist |= CSUM_TSO;
2095                                 else {
2096                                         ifp->if_capenable &= ~IFCAP_TSO;
2097                                         ifp->if_hwassist &= ~CSUM_TSO;
2098                                         if_printf(ifp,
2099                                             "enable txcsum first.\n");
2100                                         error = EAGAIN;
2101                                 }
2102                         } else
2103                                 ifp->if_hwassist &= ~CSUM_TSO;
2104                 }
2105                 if (mask & IFCAP_LRO) {
2106                         ifp->if_capenable ^= IFCAP_LRO;
2107
2108                         /* Safe to do this even if cxgb_up not called yet */
2109                         cxgb_set_lro(p, ifp->if_capenable & IFCAP_LRO);
2110                 }
2111                 if (mask & IFCAP_VLAN_HWTAGGING) {
2112                         ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING;
2113                         if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
2114                                 PORT_LOCK(p);
2115                                 cxgb_update_mac_settings(p);
2116                                 PORT_UNLOCK(p);
2117                         }
2118                 }
2119                 if (mask & IFCAP_VLAN_MTU) {
2120                         ifp->if_capenable ^= IFCAP_VLAN_MTU;
2121                         if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
2122                                 PORT_LOCK(p);
2123                                 cxgb_update_mac_settings(p);
2124                                 PORT_UNLOCK(p);
2125                         }
2126                 }
2127                 if (mask & IFCAP_VLAN_HWTSO)
2128                         ifp->if_capenable ^= IFCAP_VLAN_HWTSO;
2129                 if (mask & IFCAP_VLAN_HWCSUM)
2130                         ifp->if_capenable ^= IFCAP_VLAN_HWCSUM;
2131
2132 #ifdef VLAN_CAPABILITIES
2133                 VLAN_CAPABILITIES(ifp);
2134 #endif
2135                 ADAPTER_UNLOCK(sc);
2136                 break;
2137         case SIOCSIFMEDIA:
2138         case SIOCGIFMEDIA:
2139                 error = ifmedia_ioctl(ifp, ifr, &p->media, command);
2140                 break;
2141         default:
2142                 error = ether_ioctl(ifp, command, data);
2143         }
2144
2145         return (error);
2146 }
2147
2148 static int
2149 cxgb_media_change(struct ifnet *ifp)
2150 {
2151         return (EOPNOTSUPP);
2152 }
2153
2154 /*
2155  * Translates phy->modtype to the correct Ethernet media subtype.
2156  */
2157 static int
2158 cxgb_ifm_type(int mod)
2159 {
2160         switch (mod) {
2161         case phy_modtype_sr:
2162                 return (IFM_10G_SR);
2163         case phy_modtype_lr:
2164                 return (IFM_10G_LR);
2165         case phy_modtype_lrm:
2166                 return (IFM_10G_LRM);
2167         case phy_modtype_twinax:
2168                 return (IFM_10G_TWINAX);
2169         case phy_modtype_twinax_long:
2170                 return (IFM_10G_TWINAX_LONG);
2171         case phy_modtype_none:
2172                 return (IFM_NONE);
2173         case phy_modtype_unknown:
2174                 return (IFM_UNKNOWN);
2175         }
2176
2177         KASSERT(0, ("%s: modtype %d unknown", __func__, mod));
2178         return (IFM_UNKNOWN);
2179 }
2180
2181 /*
2182  * Rebuilds the ifmedia list for this port, and sets the current media.
2183  */
2184 static void
2185 cxgb_build_medialist(struct port_info *p)
2186 {
2187         struct cphy *phy = &p->phy;
2188         struct ifmedia *media = &p->media;
2189         int mod = phy->modtype;
2190         int m = IFM_ETHER | IFM_FDX;
2191
2192         PORT_LOCK(p);
2193
2194         ifmedia_removeall(media);
2195         if (phy->caps & SUPPORTED_TP && phy->caps & SUPPORTED_Autoneg) {
2196                 /* Copper (RJ45) */
2197
2198                 if (phy->caps & SUPPORTED_10000baseT_Full)
2199                         ifmedia_add(media, m | IFM_10G_T, mod, NULL);
2200
2201                 if (phy->caps & SUPPORTED_1000baseT_Full)
2202                         ifmedia_add(media, m | IFM_1000_T, mod, NULL);
2203
2204                 if (phy->caps & SUPPORTED_100baseT_Full)
2205                         ifmedia_add(media, m | IFM_100_TX, mod, NULL);
2206
2207                 if (phy->caps & SUPPORTED_10baseT_Full)
2208                         ifmedia_add(media, m | IFM_10_T, mod, NULL);
2209
2210                 ifmedia_add(media, IFM_ETHER | IFM_AUTO, mod, NULL);
2211                 ifmedia_set(media, IFM_ETHER | IFM_AUTO);
2212
2213         } else if (phy->caps & SUPPORTED_TP) {
2214                 /* Copper (CX4) */
2215
2216                 KASSERT(phy->caps & SUPPORTED_10000baseT_Full,
2217                         ("%s: unexpected cap 0x%x", __func__, phy->caps));
2218
2219                 ifmedia_add(media, m | IFM_10G_CX4, mod, NULL);
2220                 ifmedia_set(media, m | IFM_10G_CX4);
2221
2222         } else if (phy->caps & SUPPORTED_FIBRE &&
2223                    phy->caps & SUPPORTED_10000baseT_Full) {
2224                 /* 10G optical (but includes SFP+ twinax) */
2225
2226                 m |= cxgb_ifm_type(mod);
2227                 if (IFM_SUBTYPE(m) == IFM_NONE)
2228                         m &= ~IFM_FDX;
2229
2230                 ifmedia_add(media, m, mod, NULL);
2231                 ifmedia_set(media, m);
2232
2233         } else if (phy->caps & SUPPORTED_FIBRE &&
2234                    phy->caps & SUPPORTED_1000baseT_Full) {
2235                 /* 1G optical */
2236
2237                 /* XXX: Lie and claim to be SX, could actually be any 1G-X */
2238                 ifmedia_add(media, m | IFM_1000_SX, mod, NULL);
2239                 ifmedia_set(media, m | IFM_1000_SX);
2240
2241         } else {
2242                 KASSERT(0, ("%s: don't know how to handle 0x%x.", __func__,
2243                             phy->caps));
2244         }
2245
2246         PORT_UNLOCK(p);
2247 }
2248
2249 static void
2250 cxgb_media_status(struct ifnet *ifp, struct ifmediareq *ifmr)
2251 {
2252         struct port_info *p = ifp->if_softc;
2253         struct ifmedia_entry *cur = p->media.ifm_cur;
2254         int speed = p->link_config.speed;
2255
2256         if (cur->ifm_data != p->phy.modtype) {
2257                 cxgb_build_medialist(p);
2258                 cur = p->media.ifm_cur;
2259         }
2260
2261         ifmr->ifm_status = IFM_AVALID;
2262         if (!p->link_config.link_ok)
2263                 return;
2264
2265         ifmr->ifm_status |= IFM_ACTIVE;
2266
2267         /*
2268          * active and current will differ iff current media is autoselect.  That
2269          * can happen only for copper RJ45.
2270          */
2271         if (IFM_SUBTYPE(cur->ifm_media) != IFM_AUTO)
2272                 return;
2273         KASSERT(p->phy.caps & SUPPORTED_TP && p->phy.caps & SUPPORTED_Autoneg,
2274                 ("%s: unexpected PHY caps 0x%x", __func__, p->phy.caps));
2275
2276         ifmr->ifm_active = IFM_ETHER | IFM_FDX;
2277         if (speed == SPEED_10000)
2278                 ifmr->ifm_active |= IFM_10G_T;
2279         else if (speed == SPEED_1000)
2280                 ifmr->ifm_active |= IFM_1000_T;
2281         else if (speed == SPEED_100)
2282                 ifmr->ifm_active |= IFM_100_TX;
2283         else if (speed == SPEED_10)
2284                 ifmr->ifm_active |= IFM_10_T;
2285         else
2286                 KASSERT(0, ("%s: link up but speed unknown (%u)", __func__,
2287                             speed));
2288 }
2289
2290 static void
2291 cxgb_async_intr(void *data)
2292 {
2293         adapter_t *sc = data;
2294
2295         if (cxgb_debug)
2296                 device_printf(sc->dev, "cxgb_async_intr\n");
2297         /*
2298          * May need to sleep - defer to taskqueue
2299          */
2300         taskqueue_enqueue(sc->tq, &sc->slow_intr_task);
2301 }
2302
2303 static void
2304 cxgb_ext_intr_handler(void *arg, int count)
2305 {
2306         adapter_t *sc = (adapter_t *)arg;
2307
2308         if (cxgb_debug)
2309                 printf("cxgb_ext_intr_handler\n");
2310
2311         t3_phy_intr_handler(sc);
2312
2313         /* Now reenable external interrupts */
2314         ADAPTER_LOCK(sc);
2315         if (sc->slow_intr_mask) {
2316                 sc->slow_intr_mask |= F_T3DBG;
2317                 t3_write_reg(sc, A_PL_INT_CAUSE0, F_T3DBG);
2318                 t3_write_reg(sc, A_PL_INT_ENABLE0, sc->slow_intr_mask);
2319         }
2320         ADAPTER_UNLOCK(sc);
2321 }
2322
2323 static inline int
2324 link_poll_needed(struct port_info *p)
2325 {
2326         struct cphy *phy = &p->phy;
2327
2328         if (phy->caps & POLL_LINK_1ST_TIME) {
2329                 p->phy.caps &= ~POLL_LINK_1ST_TIME;
2330                 return (1);
2331         }
2332
2333         return (p->link_fault || !(phy->caps & SUPPORTED_LINK_IRQ));
2334 }
2335
2336 static void
2337 check_link_status(adapter_t *sc)
2338 {
2339         int i;
2340
2341         for (i = 0; i < (sc)->params.nports; ++i) {
2342                 struct port_info *p = &sc->port[i];
2343
2344                 if (!isset(&sc->open_device_map, p->port_id))
2345                         continue;
2346
2347                 if (link_poll_needed(p))
2348                         t3_link_changed(sc, i);
2349         }
2350 }
2351
2352 static void
2353 check_t3b2_mac(struct adapter *sc)
2354 {
2355         int i;
2356
2357         if (sc->flags & CXGB_SHUTDOWN)
2358                 return;
2359
2360         for_each_port(sc, i) {
2361                 struct port_info *p = &sc->port[i];
2362                 int status;
2363 #ifdef INVARIANTS
2364                 struct ifnet *ifp = p->ifp;
2365 #endif          
2366
2367                 if (!isset(&sc->open_device_map, p->port_id) || p->link_fault ||
2368                     !p->link_config.link_ok)
2369                         continue;
2370
2371                 KASSERT(ifp->if_drv_flags & IFF_DRV_RUNNING,
2372                         ("%s: state mismatch (drv_flags %x, device_map %x)",
2373                          __func__, ifp->if_drv_flags, sc->open_device_map));
2374
2375                 PORT_LOCK(p);
2376                 status = t3b2_mac_watchdog_task(&p->mac);
2377                 if (status == 1)
2378                         p->mac.stats.num_toggled++;
2379                 else if (status == 2) {
2380                         struct cmac *mac = &p->mac;
2381
2382                         cxgb_update_mac_settings(p);
2383                         t3_link_start(&p->phy, mac, &p->link_config);
2384                         t3_mac_enable(mac, MAC_DIRECTION_RX | MAC_DIRECTION_TX);
2385                         t3_port_intr_enable(sc, p->port_id);
2386                         p->mac.stats.num_resets++;
2387                 }
2388                 PORT_UNLOCK(p);
2389         }
2390 }
2391
2392 static void
2393 cxgb_tick(void *arg)
2394 {
2395         adapter_t *sc = (adapter_t *)arg;
2396
2397         if (sc->flags & CXGB_SHUTDOWN)
2398                 return;
2399
2400         taskqueue_enqueue(sc->tq, &sc->tick_task);      
2401         callout_reset(&sc->cxgb_tick_ch, CXGB_TICKS(sc), cxgb_tick, sc);
2402 }
2403
2404 static void
2405 cxgb_tick_handler(void *arg, int count)
2406 {
2407         adapter_t *sc = (adapter_t *)arg;
2408         const struct adapter_params *p = &sc->params;
2409         int i;
2410         uint32_t cause, reset;
2411
2412         if (sc->flags & CXGB_SHUTDOWN || !(sc->flags & FULL_INIT_DONE))
2413                 return;
2414
2415         check_link_status(sc);
2416
2417         if (p->rev == T3_REV_B2 && p->nports < 4 && sc->open_device_map) 
2418                 check_t3b2_mac(sc);
2419
2420         cause = t3_read_reg(sc, A_SG_INT_CAUSE) & (F_RSPQSTARVE | F_FLEMPTY);
2421         if (cause) {
2422                 struct sge_qset *qs = &sc->sge.qs[0];
2423                 uint32_t mask, v;
2424
2425                 v = t3_read_reg(sc, A_SG_RSPQ_FL_STATUS) & ~0xff00;
2426
2427                 mask = 1;
2428                 for (i = 0; i < SGE_QSETS; i++) {
2429                         if (v & mask)
2430                                 qs[i].rspq.starved++;
2431                         mask <<= 1;
2432                 }
2433
2434                 mask <<= SGE_QSETS; /* skip RSPQXDISABLED */
2435
2436                 for (i = 0; i < SGE_QSETS * 2; i++) {
2437                         if (v & mask) {
2438                                 qs[i / 2].fl[i % 2].empty++;
2439                         }
2440                         mask <<= 1;
2441                 }
2442
2443                 /* clear */
2444                 t3_write_reg(sc, A_SG_RSPQ_FL_STATUS, v);
2445                 t3_write_reg(sc, A_SG_INT_CAUSE, cause);
2446         }
2447
2448         for (i = 0; i < sc->params.nports; i++) {
2449                 struct port_info *pi = &sc->port[i];
2450                 struct ifnet *ifp = pi->ifp;
2451                 struct cmac *mac = &pi->mac;
2452                 struct mac_stats *mstats = &mac->stats;
2453                 int drops, j;
2454
2455                 if (!isset(&sc->open_device_map, pi->port_id))
2456                         continue;
2457
2458                 PORT_LOCK(pi);
2459                 t3_mac_update_stats(mac);
2460                 PORT_UNLOCK(pi);
2461
2462                 ifp->if_opackets = mstats->tx_frames;
2463                 ifp->if_ipackets = mstats->rx_frames;
2464                 ifp->if_obytes = mstats->tx_octets;
2465                 ifp->if_ibytes = mstats->rx_octets;
2466                 ifp->if_omcasts = mstats->tx_mcast_frames;
2467                 ifp->if_imcasts = mstats->rx_mcast_frames;
2468                 ifp->if_collisions = mstats->tx_total_collisions;
2469                 ifp->if_iqdrops = mstats->rx_cong_drops;
2470
2471                 drops = 0;
2472                 for (j = pi->first_qset; j < pi->first_qset + pi->nqsets; j++)
2473                         drops += sc->sge.qs[j].txq[TXQ_ETH].txq_mr->br_drops;
2474                 ifp->if_snd.ifq_drops = drops;
2475
2476                 ifp->if_oerrors =
2477                     mstats->tx_excess_collisions +
2478                     mstats->tx_underrun +
2479                     mstats->tx_len_errs +
2480                     mstats->tx_mac_internal_errs +
2481                     mstats->tx_excess_deferral +
2482                     mstats->tx_fcs_errs;
2483                 ifp->if_ierrors =
2484                     mstats->rx_jabber +
2485                     mstats->rx_data_errs +
2486                     mstats->rx_sequence_errs +
2487                     mstats->rx_runt + 
2488                     mstats->rx_too_long +
2489                     mstats->rx_mac_internal_errs +
2490                     mstats->rx_short +
2491                     mstats->rx_fcs_errs;
2492
2493                 if (mac->multiport)
2494                         continue;
2495
2496                 /* Count rx fifo overflows, once per second */
2497                 cause = t3_read_reg(sc, A_XGM_INT_CAUSE + mac->offset);
2498                 reset = 0;
2499                 if (cause & F_RXFIFO_OVERFLOW) {
2500                         mac->stats.rx_fifo_ovfl++;
2501                         reset |= F_RXFIFO_OVERFLOW;
2502                 }
2503                 t3_write_reg(sc, A_XGM_INT_CAUSE + mac->offset, reset);
2504         }
2505 }
2506
2507 static void
2508 touch_bars(device_t dev)
2509 {
2510         /*
2511          * Don't enable yet
2512          */
2513 #if !defined(__LP64__) && 0
2514         u32 v;
2515
2516         pci_read_config_dword(pdev, PCI_BASE_ADDRESS_1, &v);
2517         pci_write_config_dword(pdev, PCI_BASE_ADDRESS_1, v);
2518         pci_read_config_dword(pdev, PCI_BASE_ADDRESS_3, &v);
2519         pci_write_config_dword(pdev, PCI_BASE_ADDRESS_3, v);
2520         pci_read_config_dword(pdev, PCI_BASE_ADDRESS_5, &v);
2521         pci_write_config_dword(pdev, PCI_BASE_ADDRESS_5, v);
2522 #endif
2523 }
2524
2525 static int
2526 set_eeprom(struct port_info *pi, const uint8_t *data, int len, int offset)
2527 {
2528         uint8_t *buf;
2529         int err = 0;
2530         u32 aligned_offset, aligned_len, *p;
2531         struct adapter *adapter = pi->adapter;
2532
2533
2534         aligned_offset = offset & ~3;
2535         aligned_len = (len + (offset & 3) + 3) & ~3;
2536
2537         if (aligned_offset != offset || aligned_len != len) {
2538                 buf = malloc(aligned_len, M_DEVBUF, M_WAITOK|M_ZERO);              
2539                 if (!buf)
2540                         return (ENOMEM);
2541                 err = t3_seeprom_read(adapter, aligned_offset, (u32 *)buf);
2542                 if (!err && aligned_len > 4)
2543                         err = t3_seeprom_read(adapter,
2544                                               aligned_offset + aligned_len - 4,
2545                                               (u32 *)&buf[aligned_len - 4]);
2546                 if (err)
2547                         goto out;
2548                 memcpy(buf + (offset & 3), data, len);
2549         } else
2550                 buf = (uint8_t *)(uintptr_t)data;
2551
2552         err = t3_seeprom_wp(adapter, 0);
2553         if (err)
2554                 goto out;
2555
2556         for (p = (u32 *)buf; !err && aligned_len; aligned_len -= 4, p++) {
2557                 err = t3_seeprom_write(adapter, aligned_offset, *p);
2558                 aligned_offset += 4;
2559         }
2560
2561         if (!err)
2562                 err = t3_seeprom_wp(adapter, 1);
2563 out:
2564         if (buf != data)
2565                 free(buf, M_DEVBUF);
2566         return err;
2567 }
2568
2569
2570 static int
2571 in_range(int val, int lo, int hi)
2572 {
2573         return val < 0 || (val <= hi && val >= lo);
2574 }
2575
2576 static int
2577 cxgb_extension_open(struct cdev *dev, int flags, int fmp, struct thread *td)
2578 {
2579        return (0);
2580 }
2581
2582 static int
2583 cxgb_extension_close(struct cdev *dev, int flags, int fmt, struct thread *td)
2584 {
2585        return (0);
2586 }
2587
2588 static int
2589 cxgb_extension_ioctl(struct cdev *dev, unsigned long cmd, caddr_t data,
2590     int fflag, struct thread *td)
2591 {
2592         int mmd, error = 0;
2593         struct port_info *pi = dev->si_drv1;
2594         adapter_t *sc = pi->adapter;
2595
2596 #ifdef PRIV_SUPPORTED   
2597         if (priv_check(td, PRIV_DRIVER)) {
2598                 if (cxgb_debug) 
2599                         printf("user does not have access to privileged ioctls\n");
2600                 return (EPERM);
2601         }
2602 #else
2603         if (suser(td)) {
2604                 if (cxgb_debug)
2605                         printf("user does not have access to privileged ioctls\n");
2606                 return (EPERM);
2607         }
2608 #endif
2609         
2610         switch (cmd) {
2611         case CHELSIO_GET_MIIREG: {
2612                 uint32_t val;
2613                 struct cphy *phy = &pi->phy;
2614                 struct ch_mii_data *mid = (struct ch_mii_data *)data;
2615                 
2616                 if (!phy->mdio_read)
2617                         return (EOPNOTSUPP);
2618                 if (is_10G(sc)) {
2619                         mmd = mid->phy_id >> 8;
2620                         if (!mmd)
2621                                 mmd = MDIO_DEV_PCS;
2622                         else if (mmd > MDIO_DEV_VEND2)
2623                                 return (EINVAL);
2624
2625                         error = phy->mdio_read(sc, mid->phy_id & 0x1f, mmd,
2626                                              mid->reg_num, &val);
2627                 } else
2628                         error = phy->mdio_read(sc, mid->phy_id & 0x1f, 0,
2629                                              mid->reg_num & 0x1f, &val);
2630                 if (error == 0)
2631                         mid->val_out = val;
2632                 break;
2633         }
2634         case CHELSIO_SET_MIIREG: {
2635                 struct cphy *phy = &pi->phy;
2636                 struct ch_mii_data *mid = (struct ch_mii_data *)data;
2637
2638                 if (!phy->mdio_write)
2639                         return (EOPNOTSUPP);
2640                 if (is_10G(sc)) {
2641                         mmd = mid->phy_id >> 8;
2642                         if (!mmd)
2643                                 mmd = MDIO_DEV_PCS;
2644                         else if (mmd > MDIO_DEV_VEND2)
2645                                 return (EINVAL);
2646                         
2647                         error = phy->mdio_write(sc, mid->phy_id & 0x1f,
2648                                               mmd, mid->reg_num, mid->val_in);
2649                 } else
2650                         error = phy->mdio_write(sc, mid->phy_id & 0x1f, 0,
2651                                               mid->reg_num & 0x1f,
2652                                               mid->val_in);
2653                 break;
2654         }
2655         case CHELSIO_SETREG: {
2656                 struct ch_reg *edata = (struct ch_reg *)data;
2657                 if ((edata->addr & 0x3) != 0 || edata->addr >= sc->mmio_len)
2658                         return (EFAULT);
2659                 t3_write_reg(sc, edata->addr, edata->val);
2660                 break;
2661         }
2662         case CHELSIO_GETREG: {
2663                 struct ch_reg *edata = (struct ch_reg *)data;
2664                 if ((edata->addr & 0x3) != 0 || edata->addr >= sc->mmio_len)
2665                         return (EFAULT);
2666                 edata->val = t3_read_reg(sc, edata->addr);
2667                 break;
2668         }
2669         case CHELSIO_GET_SGE_CONTEXT: {
2670                 struct ch_cntxt *ecntxt = (struct ch_cntxt *)data;
2671                 mtx_lock_spin(&sc->sge.reg_lock);
2672                 switch (ecntxt->cntxt_type) {
2673                 case CNTXT_TYPE_EGRESS:
2674                         error = -t3_sge_read_ecntxt(sc, ecntxt->cntxt_id,
2675                             ecntxt->data);
2676                         break;
2677                 case CNTXT_TYPE_FL:
2678                         error = -t3_sge_read_fl(sc, ecntxt->cntxt_id,
2679                             ecntxt->data);
2680                         break;
2681                 case CNTXT_TYPE_RSP:
2682                         error = -t3_sge_read_rspq(sc, ecntxt->cntxt_id,
2683                             ecntxt->data);
2684                         break;
2685                 case CNTXT_TYPE_CQ:
2686                         error = -t3_sge_read_cq(sc, ecntxt->cntxt_id,
2687                             ecntxt->data);
2688                         break;
2689                 default:
2690                         error = EINVAL;
2691                         break;
2692                 }
2693                 mtx_unlock_spin(&sc->sge.reg_lock);
2694                 break;
2695         }
2696         case CHELSIO_GET_SGE_DESC: {
2697                 struct ch_desc *edesc = (struct ch_desc *)data;
2698                 int ret;
2699                 if (edesc->queue_num >= SGE_QSETS * 6)
2700                         return (EINVAL);
2701                 ret = t3_get_desc(&sc->sge.qs[edesc->queue_num / 6],
2702                     edesc->queue_num % 6, edesc->idx, edesc->data);
2703                 if (ret < 0)
2704                         return (EINVAL);
2705                 edesc->size = ret;
2706                 break;
2707         }
2708         case CHELSIO_GET_QSET_PARAMS: {
2709                 struct qset_params *q;
2710                 struct ch_qset_params *t = (struct ch_qset_params *)data;
2711                 int q1 = pi->first_qset;
2712                 int nqsets = pi->nqsets;
2713                 int i;
2714
2715                 if (t->qset_idx >= nqsets)
2716                         return EINVAL;
2717
2718                 i = q1 + t->qset_idx;
2719                 q = &sc->params.sge.qset[i];
2720                 t->rspq_size   = q->rspq_size;
2721                 t->txq_size[0] = q->txq_size[0];
2722                 t->txq_size[1] = q->txq_size[1];
2723                 t->txq_size[2] = q->txq_size[2];
2724                 t->fl_size[0]  = q->fl_size;
2725                 t->fl_size[1]  = q->jumbo_size;
2726                 t->polling     = q->polling;
2727                 t->lro         = q->lro;
2728                 t->intr_lat    = q->coalesce_usecs;
2729                 t->cong_thres  = q->cong_thres;
2730                 t->qnum        = i;
2731
2732                 if ((sc->flags & FULL_INIT_DONE) == 0)
2733                         t->vector = 0;
2734                 else if (sc->flags & USING_MSIX)
2735                         t->vector = rman_get_start(sc->msix_irq_res[i]);
2736                 else
2737                         t->vector = rman_get_start(sc->irq_res);
2738
2739                 break;
2740         }
2741         case CHELSIO_GET_QSET_NUM: {
2742                 struct ch_reg *edata = (struct ch_reg *)data;
2743                 edata->val = pi->nqsets;
2744                 break;
2745         }
2746         case CHELSIO_LOAD_FW: {
2747                 uint8_t *fw_data;
2748                 uint32_t vers;
2749                 struct ch_mem_range *t = (struct ch_mem_range *)data;
2750
2751                 /*
2752                  * You're allowed to load a firmware only before FULL_INIT_DONE
2753                  *
2754                  * FW_UPTODATE is also set so the rest of the initialization
2755                  * will not overwrite what was loaded here.  This gives you the
2756                  * flexibility to load any firmware (and maybe shoot yourself in
2757                  * the foot).
2758                  */
2759
2760                 ADAPTER_LOCK(sc);
2761                 if (sc->open_device_map || sc->flags & FULL_INIT_DONE) {
2762                         ADAPTER_UNLOCK(sc);
2763                         return (EBUSY);
2764                 }
2765
2766                 fw_data = malloc(t->len, M_DEVBUF, M_NOWAIT);
2767                 if (!fw_data)
2768                         error = ENOMEM;
2769                 else
2770                         error = copyin(t->buf, fw_data, t->len);
2771
2772                 if (!error)
2773                         error = -t3_load_fw(sc, fw_data, t->len);
2774
2775                 if (t3_get_fw_version(sc, &vers) == 0) {
2776                         snprintf(&sc->fw_version[0], sizeof(sc->fw_version),
2777                             "%d.%d.%d", G_FW_VERSION_MAJOR(vers),
2778                             G_FW_VERSION_MINOR(vers), G_FW_VERSION_MICRO(vers));
2779                 }
2780
2781                 if (!error)
2782                         sc->flags |= FW_UPTODATE;
2783
2784                 free(fw_data, M_DEVBUF);
2785                 ADAPTER_UNLOCK(sc);
2786                 break;
2787         }
2788         case CHELSIO_LOAD_BOOT: {
2789                 uint8_t *boot_data;
2790                 struct ch_mem_range *t = (struct ch_mem_range *)data;
2791
2792                 boot_data = malloc(t->len, M_DEVBUF, M_NOWAIT);
2793                 if (!boot_data)
2794                         return ENOMEM;
2795
2796                 error = copyin(t->buf, boot_data, t->len);
2797                 if (!error)
2798                         error = -t3_load_boot(sc, boot_data, t->len);
2799
2800                 free(boot_data, M_DEVBUF);
2801                 break;
2802         }
2803         case CHELSIO_GET_PM: {
2804                 struct ch_pm *m = (struct ch_pm *)data;
2805                 struct tp_params *p = &sc->params.tp;
2806
2807                 if (!is_offload(sc))
2808                         return (EOPNOTSUPP);
2809
2810                 m->tx_pg_sz = p->tx_pg_size;
2811                 m->tx_num_pg = p->tx_num_pgs;
2812                 m->rx_pg_sz  = p->rx_pg_size;
2813                 m->rx_num_pg = p->rx_num_pgs;
2814                 m->pm_total  = p->pmtx_size + p->chan_rx_size * p->nchan;
2815
2816                 break;
2817         }
2818         case CHELSIO_SET_PM: {
2819                 struct ch_pm *m = (struct ch_pm *)data;
2820                 struct tp_params *p = &sc->params.tp;
2821
2822                 if (!is_offload(sc))
2823                         return (EOPNOTSUPP);
2824                 if (sc->flags & FULL_INIT_DONE)
2825                         return (EBUSY);
2826
2827                 if (!m->rx_pg_sz || (m->rx_pg_sz & (m->rx_pg_sz - 1)) ||
2828                     !m->tx_pg_sz || (m->tx_pg_sz & (m->tx_pg_sz - 1)))
2829                         return (EINVAL);        /* not power of 2 */
2830                 if (!(m->rx_pg_sz & 0x14000))
2831                         return (EINVAL);        /* not 16KB or 64KB */
2832                 if (!(m->tx_pg_sz & 0x1554000))
2833                         return (EINVAL);
2834                 if (m->tx_num_pg == -1)
2835                         m->tx_num_pg = p->tx_num_pgs;
2836                 if (m->rx_num_pg == -1)
2837                         m->rx_num_pg = p->rx_num_pgs;
2838                 if (m->tx_num_pg % 24 || m->rx_num_pg % 24)
2839                         return (EINVAL);
2840                 if (m->rx_num_pg * m->rx_pg_sz > p->chan_rx_size ||
2841                     m->tx_num_pg * m->tx_pg_sz > p->chan_tx_size)
2842                         return (EINVAL);
2843
2844                 p->rx_pg_size = m->rx_pg_sz;
2845                 p->tx_pg_size = m->tx_pg_sz;
2846                 p->rx_num_pgs = m->rx_num_pg;
2847                 p->tx_num_pgs = m->tx_num_pg;
2848                 break;
2849         }
2850         case CHELSIO_SETMTUTAB: {
2851                 struct ch_mtus *m = (struct ch_mtus *)data;
2852                 int i;
2853                 
2854                 if (!is_offload(sc))
2855                         return (EOPNOTSUPP);
2856                 if (offload_running(sc))
2857                         return (EBUSY);
2858                 if (m->nmtus != NMTUS)
2859                         return (EINVAL);
2860                 if (m->mtus[0] < 81)         /* accommodate SACK */
2861                         return (EINVAL);
2862                 
2863                 /*
2864                  * MTUs must be in ascending order
2865                  */
2866                 for (i = 1; i < NMTUS; ++i)
2867                         if (m->mtus[i] < m->mtus[i - 1])
2868                                 return (EINVAL);
2869
2870                 memcpy(sc->params.mtus, m->mtus, sizeof(sc->params.mtus));
2871                 break;
2872         }
2873         case CHELSIO_GETMTUTAB: {
2874                 struct ch_mtus *m = (struct ch_mtus *)data;
2875
2876                 if (!is_offload(sc))
2877                         return (EOPNOTSUPP);
2878
2879                 memcpy(m->mtus, sc->params.mtus, sizeof(m->mtus));
2880                 m->nmtus = NMTUS;
2881                 break;
2882         }
2883         case CHELSIO_GET_MEM: {
2884                 struct ch_mem_range *t = (struct ch_mem_range *)data;
2885                 struct mc7 *mem;
2886                 uint8_t *useraddr;
2887                 u64 buf[32];
2888
2889                 /*
2890                  * Use these to avoid modifying len/addr in the the return
2891                  * struct
2892                  */
2893                 uint32_t len = t->len, addr = t->addr;
2894
2895                 if (!is_offload(sc))
2896                         return (EOPNOTSUPP);
2897                 if (!(sc->flags & FULL_INIT_DONE))
2898                         return (EIO);         /* need the memory controllers */
2899                 if ((addr & 0x7) || (len & 0x7))
2900                         return (EINVAL);
2901                 if (t->mem_id == MEM_CM)
2902                         mem = &sc->cm;
2903                 else if (t->mem_id == MEM_PMRX)
2904                         mem = &sc->pmrx;
2905                 else if (t->mem_id == MEM_PMTX)
2906                         mem = &sc->pmtx;
2907                 else
2908                         return (EINVAL);
2909
2910                 /*
2911                  * Version scheme:
2912                  * bits 0..9: chip version
2913                  * bits 10..15: chip revision
2914                  */
2915                 t->version = 3 | (sc->params.rev << 10);
2916                 
2917                 /*
2918                  * Read 256 bytes at a time as len can be large and we don't
2919                  * want to use huge intermediate buffers.
2920                  */
2921                 useraddr = (uint8_t *)t->buf; 
2922                 while (len) {
2923                         unsigned int chunk = min(len, sizeof(buf));
2924
2925                         error = t3_mc7_bd_read(mem, addr / 8, chunk / 8, buf);
2926                         if (error)
2927                                 return (-error);
2928                         if (copyout(buf, useraddr, chunk))
2929                                 return (EFAULT);
2930                         useraddr += chunk;
2931                         addr += chunk;
2932                         len -= chunk;
2933                 }
2934                 break;
2935         }
2936         case CHELSIO_READ_TCAM_WORD: {
2937                 struct ch_tcam_word *t = (struct ch_tcam_word *)data;
2938
2939                 if (!is_offload(sc))
2940                         return (EOPNOTSUPP);
2941                 if (!(sc->flags & FULL_INIT_DONE))
2942                         return (EIO);         /* need MC5 */            
2943                 return -t3_read_mc5_range(&sc->mc5, t->addr, 1, t->buf);
2944                 break;
2945         }
2946         case CHELSIO_SET_TRACE_FILTER: {
2947                 struct ch_trace *t = (struct ch_trace *)data;
2948                 const struct trace_params *tp;
2949
2950                 tp = (const struct trace_params *)&t->sip;
2951                 if (t->config_tx)
2952                         t3_config_trace_filter(sc, tp, 0, t->invert_match,
2953                                                t->trace_tx);
2954                 if (t->config_rx)
2955                         t3_config_trace_filter(sc, tp, 1, t->invert_match,
2956                                                t->trace_rx);
2957                 break;
2958         }
2959         case CHELSIO_SET_PKTSCHED: {
2960                 struct ch_pktsched_params *p = (struct ch_pktsched_params *)data;
2961                 if (sc->open_device_map == 0)
2962                         return (EAGAIN);
2963                 send_pktsched_cmd(sc, p->sched, p->idx, p->min, p->max,
2964                     p->binding);
2965                 break;
2966         }
2967         case CHELSIO_IFCONF_GETREGS: {
2968                 struct ch_ifconf_regs *regs = (struct ch_ifconf_regs *)data;
2969                 int reglen = cxgb_get_regs_len();
2970                 uint8_t *buf = malloc(reglen, M_DEVBUF, M_NOWAIT);
2971                 if (buf == NULL) {
2972                         return (ENOMEM);
2973                 }
2974                 if (regs->len > reglen)
2975                         regs->len = reglen;
2976                 else if (regs->len < reglen)
2977                         error = ENOBUFS;
2978
2979                 if (!error) {
2980                         cxgb_get_regs(sc, regs, buf);
2981                         error = copyout(buf, regs->data, reglen);
2982                 }
2983                 free(buf, M_DEVBUF);
2984
2985                 break;
2986         }
2987         case CHELSIO_SET_HW_SCHED: {
2988                 struct ch_hw_sched *t = (struct ch_hw_sched *)data;
2989                 unsigned int ticks_per_usec = core_ticks_per_usec(sc);
2990
2991                 if ((sc->flags & FULL_INIT_DONE) == 0)
2992                         return (EAGAIN);       /* need TP to be initialized */
2993                 if (t->sched >= NTX_SCHED || !in_range(t->mode, 0, 1) ||
2994                     !in_range(t->channel, 0, 1) ||
2995                     !in_range(t->kbps, 0, 10000000) ||
2996                     !in_range(t->class_ipg, 0, 10000 * 65535 / ticks_per_usec) ||
2997                     !in_range(t->flow_ipg, 0,
2998                               dack_ticks_to_usec(sc, 0x7ff)))
2999                         return (EINVAL);
3000
3001                 if (t->kbps >= 0) {
3002                         error = t3_config_sched(sc, t->kbps, t->sched);
3003                         if (error < 0)
3004                                 return (-error);
3005                 }
3006                 if (t->class_ipg >= 0)
3007                         t3_set_sched_ipg(sc, t->sched, t->class_ipg);
3008                 if (t->flow_ipg >= 0) {
3009                         t->flow_ipg *= 1000;     /* us -> ns */
3010                         t3_set_pace_tbl(sc, &t->flow_ipg, t->sched, 1);
3011                 }
3012                 if (t->mode >= 0) {
3013                         int bit = 1 << (S_TX_MOD_TIMER_MODE + t->sched);
3014
3015                         t3_set_reg_field(sc, A_TP_TX_MOD_QUEUE_REQ_MAP,
3016                                          bit, t->mode ? bit : 0);
3017                 }
3018                 if (t->channel >= 0)
3019                         t3_set_reg_field(sc, A_TP_TX_MOD_QUEUE_REQ_MAP,
3020                                          1 << t->sched, t->channel << t->sched);
3021                 break;
3022         }
3023         case CHELSIO_GET_EEPROM: {
3024                 int i;
3025                 struct ch_eeprom *e = (struct ch_eeprom *)data;
3026                 uint8_t *buf = malloc(EEPROMSIZE, M_DEVBUF, M_NOWAIT);
3027
3028                 if (buf == NULL) {
3029                         return (ENOMEM);
3030                 }
3031                 e->magic = EEPROM_MAGIC;
3032                 for (i = e->offset & ~3; !error && i < e->offset + e->len; i += 4)
3033                         error = -t3_seeprom_read(sc, i, (uint32_t *)&buf[i]);
3034
3035                 if (!error)
3036                         error = copyout(buf + e->offset, e->data, e->len);
3037
3038                 free(buf, M_DEVBUF);
3039                 break;
3040         }
3041         case CHELSIO_CLEAR_STATS: {
3042                 if (!(sc->flags & FULL_INIT_DONE))
3043                         return EAGAIN;
3044
3045                 PORT_LOCK(pi);
3046                 t3_mac_update_stats(&pi->mac);
3047                 memset(&pi->mac.stats, 0, sizeof(pi->mac.stats));
3048                 PORT_UNLOCK(pi);
3049                 break;
3050         }
3051         case CHELSIO_GET_UP_LA: {
3052                 struct ch_up_la *la = (struct ch_up_la *)data;
3053                 uint8_t *buf = malloc(LA_BUFSIZE, M_DEVBUF, M_NOWAIT);
3054                 if (buf == NULL) {
3055                         return (ENOMEM);
3056                 }
3057                 if (la->bufsize < LA_BUFSIZE)
3058                         error = ENOBUFS;
3059
3060                 if (!error)
3061                         error = -t3_get_up_la(sc, &la->stopped, &la->idx,
3062                                               &la->bufsize, buf);
3063                 if (!error)
3064                         error = copyout(buf, la->data, la->bufsize);
3065
3066                 free(buf, M_DEVBUF);
3067                 break;
3068         }
3069         case CHELSIO_GET_UP_IOQS: {
3070                 struct ch_up_ioqs *ioqs = (struct ch_up_ioqs *)data;
3071                 uint8_t *buf = malloc(IOQS_BUFSIZE, M_DEVBUF, M_NOWAIT);
3072                 uint32_t *v;
3073
3074                 if (buf == NULL) {
3075                         return (ENOMEM);
3076                 }
3077                 if (ioqs->bufsize < IOQS_BUFSIZE)
3078                         error = ENOBUFS;
3079
3080                 if (!error)
3081                         error = -t3_get_up_ioqs(sc, &ioqs->bufsize, buf);
3082
3083                 if (!error) {
3084                         v = (uint32_t *)buf;
3085
3086                         ioqs->bufsize -= 4 * sizeof(uint32_t);
3087                         ioqs->ioq_rx_enable = *v++;
3088                         ioqs->ioq_tx_enable = *v++;
3089                         ioqs->ioq_rx_status = *v++;
3090                         ioqs->ioq_tx_status = *v++;
3091
3092                         error = copyout(v, ioqs->data, ioqs->bufsize);
3093                 }
3094
3095                 free(buf, M_DEVBUF);
3096                 break;
3097         }
3098         case CHELSIO_SET_FILTER: {
3099                 struct ch_filter *f = (struct ch_filter *)data;;
3100                 struct filter_info *p;
3101                 unsigned int nfilters = sc->params.mc5.nfilters;
3102
3103                 if (!is_offload(sc))
3104                         return (EOPNOTSUPP);    /* No TCAM */
3105                 if (!(sc->flags & FULL_INIT_DONE))
3106                         return (EAGAIN);        /* mc5 not setup yet */
3107                 if (nfilters == 0)
3108                         return (EBUSY);         /* TOE will use TCAM */
3109
3110                 /* sanity checks */
3111                 if (f->filter_id >= nfilters ||
3112                     (f->val.dip && f->mask.dip != 0xffffffff) ||
3113                     (f->val.sport && f->mask.sport != 0xffff) ||
3114                     (f->val.dport && f->mask.dport != 0xffff) ||
3115                     (f->val.vlan && f->mask.vlan != 0xfff) ||
3116                     (f->val.vlan_prio &&
3117                         f->mask.vlan_prio != FILTER_NO_VLAN_PRI) ||
3118                     (f->mac_addr_idx != 0xffff && f->mac_addr_idx > 15) ||
3119                     f->qset >= SGE_QSETS ||
3120                     sc->rrss_map[f->qset] >= RSS_TABLE_SIZE)
3121                         return (EINVAL);
3122
3123                 /* Was allocated with M_WAITOK */
3124                 KASSERT(sc->filters, ("filter table NULL\n"));
3125
3126                 p = &sc->filters[f->filter_id];
3127                 if (p->locked)
3128                         return (EPERM);
3129
3130                 bzero(p, sizeof(*p));
3131                 p->sip = f->val.sip;
3132                 p->sip_mask = f->mask.sip;
3133                 p->dip = f->val.dip;
3134                 p->sport = f->val.sport;
3135                 p->dport = f->val.dport;
3136                 p->vlan = f->mask.vlan ? f->val.vlan : 0xfff;
3137                 p->vlan_prio = f->mask.vlan_prio ? (f->val.vlan_prio & 6) :
3138                     FILTER_NO_VLAN_PRI;
3139                 p->mac_hit = f->mac_hit;
3140                 p->mac_vld = f->mac_addr_idx != 0xffff;
3141                 p->mac_idx = f->mac_addr_idx;
3142                 p->pkt_type = f->proto;
3143                 p->report_filter_id = f->want_filter_id;
3144                 p->pass = f->pass;
3145                 p->rss = f->rss;
3146                 p->qset = f->qset;
3147
3148                 error = set_filter(sc, f->filter_id, p);
3149                 if (error == 0)
3150                         p->valid = 1;
3151                 break;
3152         }
3153         case CHELSIO_DEL_FILTER: {
3154                 struct ch_filter *f = (struct ch_filter *)data;
3155                 struct filter_info *p;
3156                 unsigned int nfilters = sc->params.mc5.nfilters;
3157
3158                 if (!is_offload(sc))
3159                         return (EOPNOTSUPP);
3160                 if (!(sc->flags & FULL_INIT_DONE))
3161                         return (EAGAIN);
3162                 if (nfilters == 0 || sc->filters == NULL)
3163                         return (EINVAL);
3164                 if (f->filter_id >= nfilters)
3165                        return (EINVAL);
3166
3167                 p = &sc->filters[f->filter_id];
3168                 if (p->locked)
3169                         return (EPERM);
3170                 if (!p->valid)
3171                         return (EFAULT); /* Read "Bad address" as "Bad index" */
3172
3173                 bzero(p, sizeof(*p));
3174                 p->sip = p->sip_mask = 0xffffffff;
3175                 p->vlan = 0xfff;
3176                 p->vlan_prio = FILTER_NO_VLAN_PRI;
3177                 p->pkt_type = 1;
3178                 error = set_filter(sc, f->filter_id, p);
3179                 break;
3180         }
3181         case CHELSIO_GET_FILTER: {
3182                 struct ch_filter *f = (struct ch_filter *)data;
3183                 struct filter_info *p;
3184                 unsigned int i, nfilters = sc->params.mc5.nfilters;
3185
3186                 if (!is_offload(sc))
3187                         return (EOPNOTSUPP);
3188                 if (!(sc->flags & FULL_INIT_DONE))
3189                         return (EAGAIN);
3190                 if (nfilters == 0 || sc->filters == NULL)
3191                         return (EINVAL);
3192
3193                 i = f->filter_id == 0xffffffff ? 0 : f->filter_id + 1;
3194                 for (; i < nfilters; i++) {
3195                         p = &sc->filters[i];
3196                         if (!p->valid)
3197                                 continue;
3198
3199                         bzero(f, sizeof(*f));
3200
3201                         f->filter_id = i;
3202                         f->val.sip = p->sip;
3203                         f->mask.sip = p->sip_mask;
3204                         f->val.dip = p->dip;
3205                         f->mask.dip = p->dip ? 0xffffffff : 0;
3206                         f->val.sport = p->sport;
3207                         f->mask.sport = p->sport ? 0xffff : 0;
3208                         f->val.dport = p->dport;
3209                         f->mask.dport = p->dport ? 0xffff : 0;
3210                         f->val.vlan = p->vlan == 0xfff ? 0 : p->vlan;
3211                         f->mask.vlan = p->vlan == 0xfff ? 0 : 0xfff;
3212                         f->val.vlan_prio = p->vlan_prio == FILTER_NO_VLAN_PRI ?
3213                             0 : p->vlan_prio;
3214                         f->mask.vlan_prio = p->vlan_prio == FILTER_NO_VLAN_PRI ?
3215                             0 : FILTER_NO_VLAN_PRI;
3216                         f->mac_hit = p->mac_hit;
3217                         f->mac_addr_idx = p->mac_vld ? p->mac_idx : 0xffff;
3218                         f->proto = p->pkt_type;
3219                         f->want_filter_id = p->report_filter_id;
3220                         f->pass = p->pass;
3221                         f->rss = p->rss;
3222                         f->qset = p->qset;
3223
3224                         break;
3225                 }
3226                 
3227                 if (i == nfilters)
3228                         f->filter_id = 0xffffffff;
3229                 break;
3230         }
3231         default:
3232                 return (EOPNOTSUPP);
3233                 break;
3234         }
3235
3236         return (error);
3237 }
3238
3239 static __inline void
3240 reg_block_dump(struct adapter *ap, uint8_t *buf, unsigned int start,
3241     unsigned int end)
3242 {
3243         uint32_t *p = (uint32_t *)(buf + start);
3244
3245         for ( ; start <= end; start += sizeof(uint32_t))
3246                 *p++ = t3_read_reg(ap, start);
3247 }
3248
3249 #define T3_REGMAP_SIZE (3 * 1024)
3250 static int
3251 cxgb_get_regs_len(void)
3252 {
3253         return T3_REGMAP_SIZE;
3254 }
3255
3256 static void
3257 cxgb_get_regs(adapter_t *sc, struct ch_ifconf_regs *regs, uint8_t *buf)
3258 {           
3259         
3260         /*
3261          * Version scheme:
3262          * bits 0..9: chip version
3263          * bits 10..15: chip revision
3264          * bit 31: set for PCIe cards
3265          */
3266         regs->version = 3 | (sc->params.rev << 10) | (is_pcie(sc) << 31);
3267
3268         /*
3269          * We skip the MAC statistics registers because they are clear-on-read.
3270          * Also reading multi-register stats would need to synchronize with the
3271          * periodic mac stats accumulation.  Hard to justify the complexity.
3272          */
3273         memset(buf, 0, cxgb_get_regs_len());
3274         reg_block_dump(sc, buf, 0, A_SG_RSPQ_CREDIT_RETURN);
3275         reg_block_dump(sc, buf, A_SG_HI_DRB_HI_THRSH, A_ULPRX_PBL_ULIMIT);
3276         reg_block_dump(sc, buf, A_ULPTX_CONFIG, A_MPS_INT_CAUSE);
3277         reg_block_dump(sc, buf, A_CPL_SWITCH_CNTRL, A_CPL_MAP_TBL_DATA);
3278         reg_block_dump(sc, buf, A_SMB_GLOBAL_TIME_CFG, A_XGM_SERDES_STAT3);
3279         reg_block_dump(sc, buf, A_XGM_SERDES_STATUS0,
3280                        XGM_REG(A_XGM_SERDES_STAT3, 1));
3281         reg_block_dump(sc, buf, XGM_REG(A_XGM_SERDES_STATUS0, 1),
3282                        XGM_REG(A_XGM_RX_SPI4_SOP_EOP_CNT, 1));
3283 }
3284
3285 static int
3286 alloc_filters(struct adapter *sc)
3287 {
3288         struct filter_info *p;
3289         unsigned int nfilters = sc->params.mc5.nfilters;
3290
3291         if (nfilters == 0)
3292                 return (0);
3293
3294         p = malloc(sizeof(*p) * nfilters, M_DEVBUF, M_WAITOK | M_ZERO);
3295         sc->filters = p;
3296
3297         p = &sc->filters[nfilters - 1];
3298         p->vlan = 0xfff;
3299         p->vlan_prio = FILTER_NO_VLAN_PRI;
3300         p->pass = p->rss = p->valid = p->locked = 1;
3301
3302         return (0);
3303 }
3304
3305 static int
3306 setup_hw_filters(struct adapter *sc)
3307 {
3308         int i, rc;
3309         unsigned int nfilters = sc->params.mc5.nfilters;
3310
3311         if (!sc->filters)
3312                 return (0);
3313
3314         t3_enable_filters(sc);
3315
3316         for (i = rc = 0; i < nfilters && !rc; i++) {
3317                 if (sc->filters[i].locked)
3318                         rc = set_filter(sc, i, &sc->filters[i]);
3319         }
3320
3321         return (rc);
3322 }
3323
3324 static int
3325 set_filter(struct adapter *sc, int id, const struct filter_info *f)
3326 {
3327         int len;
3328         struct mbuf *m;
3329         struct ulp_txpkt *txpkt;
3330         struct work_request_hdr *wr;
3331         struct cpl_pass_open_req *oreq;
3332         struct cpl_set_tcb_field *sreq;
3333
3334         len = sizeof(*wr) + sizeof(*oreq) + 2 * sizeof(*sreq);
3335         KASSERT(len <= MHLEN, ("filter request too big for an mbuf"));
3336
3337         id += t3_mc5_size(&sc->mc5) - sc->params.mc5.nroutes -
3338               sc->params.mc5.nfilters;
3339
3340         m = m_gethdr(M_WAITOK, MT_DATA);
3341         m->m_len = m->m_pkthdr.len = len;
3342         bzero(mtod(m, char *), len);
3343
3344         wr = mtod(m, struct work_request_hdr *);
3345         wr->wrh_hi = htonl(V_WR_OP(FW_WROPCODE_BYPASS) | F_WR_ATOMIC);
3346
3347         oreq = (struct cpl_pass_open_req *)(wr + 1);
3348         txpkt = (struct ulp_txpkt *)oreq;
3349         txpkt->cmd_dest = htonl(V_ULPTX_CMD(ULP_TXPKT));
3350         txpkt->len = htonl(V_ULPTX_NFLITS(sizeof(*oreq) / 8));
3351         OPCODE_TID(oreq) = htonl(MK_OPCODE_TID(CPL_PASS_OPEN_REQ, id));
3352         oreq->local_port = htons(f->dport);
3353         oreq->peer_port = htons(f->sport);
3354         oreq->local_ip = htonl(f->dip);
3355         oreq->peer_ip = htonl(f->sip);
3356         oreq->peer_netmask = htonl(f->sip_mask);
3357         oreq->opt0h = 0;
3358         oreq->opt0l = htonl(F_NO_OFFLOAD);
3359         oreq->opt1 = htonl(V_MAC_MATCH_VALID(f->mac_vld) |
3360                          V_CONN_POLICY(CPL_CONN_POLICY_FILTER) |
3361                          V_VLAN_PRI(f->vlan_prio >> 1) |
3362                          V_VLAN_PRI_VALID(f->vlan_prio != FILTER_NO_VLAN_PRI) |
3363                          V_PKT_TYPE(f->pkt_type) | V_OPT1_VLAN(f->vlan) |
3364                          V_MAC_MATCH(f->mac_idx | (f->mac_hit << 4)));
3365
3366         sreq = (struct cpl_set_tcb_field *)(oreq + 1);
3367         set_tcb_field_ulp(sreq, id, 1, 0x1800808000ULL,
3368                           (f->report_filter_id << 15) | (1 << 23) |
3369                           ((u64)f->pass << 35) | ((u64)!f->rss << 36));
3370         set_tcb_field_ulp(sreq + 1, id, 0, 0xffffffff, (2 << 19) | 1);
3371         t3_mgmt_tx(sc, m);
3372
3373         if (f->pass && !f->rss) {
3374                 len = sizeof(*sreq);
3375                 m = m_gethdr(M_WAITOK, MT_DATA);
3376                 m->m_len = m->m_pkthdr.len = len;
3377                 bzero(mtod(m, char *), len);
3378                 sreq = mtod(m, struct cpl_set_tcb_field *);
3379                 sreq->wr.wrh_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD));
3380                 mk_set_tcb_field(sreq, id, 25, 0x3f80000,
3381                                  (u64)sc->rrss_map[f->qset] << 19);
3382                 t3_mgmt_tx(sc, m);
3383         }
3384         return 0;
3385 }
3386
3387 static inline void
3388 mk_set_tcb_field(struct cpl_set_tcb_field *req, unsigned int tid,
3389     unsigned int word, u64 mask, u64 val)
3390 {
3391         OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_SET_TCB_FIELD, tid));
3392         req->reply = V_NO_REPLY(1);
3393         req->cpu_idx = 0;
3394         req->word = htons(word);
3395         req->mask = htobe64(mask);
3396         req->val = htobe64(val);
3397 }
3398
3399 static inline void
3400 set_tcb_field_ulp(struct cpl_set_tcb_field *req, unsigned int tid,
3401     unsigned int word, u64 mask, u64 val)
3402 {
3403         struct ulp_txpkt *txpkt = (struct ulp_txpkt *)req;
3404
3405         txpkt->cmd_dest = htonl(V_ULPTX_CMD(ULP_TXPKT));
3406         txpkt->len = htonl(V_ULPTX_NFLITS(sizeof(*req) / 8));
3407         mk_set_tcb_field(req, tid, word, mask, val);
3408 }