]> CyberLeo.Net >> Repos - FreeBSD/stable/8.git/blob - sys/dev/cxgb/cxgb_main.c
MFC: r227843 (partial)
[FreeBSD/stable/8.git] / sys / dev / cxgb / cxgb_main.c
1 /**************************************************************************
2
3 Copyright (c) 2007-2009, Chelsio Inc.
4 All rights reserved.
5
6 Redistribution and use in source and binary forms, with or without
7 modification, are permitted provided that the following conditions are met:
8
9  1. Redistributions of source code must retain the above copyright notice,
10     this list of conditions and the following disclaimer.
11
12  2. Neither the name of the Chelsio Corporation nor the names of its
13     contributors may be used to endorse or promote products derived from
14     this software without specific prior written permission.
15
16 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
17 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
20 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
21 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
22 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
23 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
24 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
25 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
26 POSSIBILITY OF SUCH DAMAGE.
27
28 ***************************************************************************/
29
30 #include <sys/cdefs.h>
31 __FBSDID("$FreeBSD$");
32
33 #include <sys/param.h>
34 #include <sys/systm.h>
35 #include <sys/kernel.h>
36 #include <sys/bus.h>
37 #include <sys/module.h>
38 #include <sys/pciio.h>
39 #include <sys/conf.h>
40 #include <machine/bus.h>
41 #include <machine/resource.h>
42 #include <sys/bus_dma.h>
43 #include <sys/ktr.h>
44 #include <sys/rman.h>
45 #include <sys/ioccom.h>
46 #include <sys/mbuf.h>
47 #include <sys/linker.h>
48 #include <sys/firmware.h>
49 #include <sys/socket.h>
50 #include <sys/sockio.h>
51 #include <sys/smp.h>
52 #include <sys/sysctl.h>
53 #include <sys/syslog.h>
54 #include <sys/queue.h>
55 #include <sys/taskqueue.h>
56 #include <sys/proc.h>
57
58 #include <net/bpf.h>
59 #include <net/ethernet.h>
60 #include <net/if.h>
61 #include <net/if_arp.h>
62 #include <net/if_dl.h>
63 #include <net/if_media.h>
64 #include <net/if_types.h>
65 #include <net/if_vlan_var.h>
66
67 #include <netinet/in_systm.h>
68 #include <netinet/in.h>
69 #include <netinet/if_ether.h>
70 #include <netinet/ip.h>
71 #include <netinet/ip.h>
72 #include <netinet/tcp.h>
73 #include <netinet/udp.h>
74
75 #include <dev/pci/pcireg.h>
76 #include <dev/pci/pcivar.h>
77 #include <dev/pci/pci_private.h>
78
79 #include <cxgb_include.h>
80
81 #ifdef PRIV_SUPPORTED
82 #include <sys/priv.h>
83 #endif
84
85 static int cxgb_setup_interrupts(adapter_t *);
86 static void cxgb_teardown_interrupts(adapter_t *);
87 static void cxgb_init(void *);
88 static int cxgb_init_locked(struct port_info *);
89 static int cxgb_uninit_locked(struct port_info *);
90 static int cxgb_uninit_synchronized(struct port_info *);
91 static int cxgb_ioctl(struct ifnet *, unsigned long, caddr_t);
92 static int cxgb_media_change(struct ifnet *);
93 static int cxgb_ifm_type(int);
94 static void cxgb_build_medialist(struct port_info *);
95 static void cxgb_media_status(struct ifnet *, struct ifmediareq *);
96 static int setup_sge_qsets(adapter_t *);
97 static void cxgb_async_intr(void *);
98 static void cxgb_tick_handler(void *, int);
99 static void cxgb_tick(void *);
100 static void link_check_callout(void *);
101 static void check_link_status(void *, int);
102 static void setup_rss(adapter_t *sc);
103 static int alloc_filters(struct adapter *);
104 static int setup_hw_filters(struct adapter *);
105 static int set_filter(struct adapter *, int, const struct filter_info *);
106 static inline void mk_set_tcb_field(struct cpl_set_tcb_field *, unsigned int,
107     unsigned int, u64, u64);
108 static inline void set_tcb_field_ulp(struct cpl_set_tcb_field *, unsigned int,
109     unsigned int, u64, u64);
110
111 /* Attachment glue for the PCI controller end of the device.  Each port of
112  * the device is attached separately, as defined later.
113  */
114 static int cxgb_controller_probe(device_t);
115 static int cxgb_controller_attach(device_t);
116 static int cxgb_controller_detach(device_t);
117 static void cxgb_free(struct adapter *);
118 static __inline void reg_block_dump(struct adapter *ap, uint8_t *buf, unsigned int start,
119     unsigned int end);
120 static void cxgb_get_regs(adapter_t *sc, struct ch_ifconf_regs *regs, uint8_t *buf);
121 static int cxgb_get_regs_len(void);
122 static int offload_open(struct port_info *pi);
123 static void touch_bars(device_t dev);
124 static int offload_close(struct t3cdev *tdev);
125 static void cxgb_update_mac_settings(struct port_info *p);
126
127 static device_method_t cxgb_controller_methods[] = {
128         DEVMETHOD(device_probe,         cxgb_controller_probe),
129         DEVMETHOD(device_attach,        cxgb_controller_attach),
130         DEVMETHOD(device_detach,        cxgb_controller_detach),
131
132         DEVMETHOD_END
133 };
134
135 static driver_t cxgb_controller_driver = {
136         "cxgbc",
137         cxgb_controller_methods,
138         sizeof(struct adapter)
139 };
140
141 static devclass_t       cxgb_controller_devclass;
142 DRIVER_MODULE(cxgbc, pci, cxgb_controller_driver, cxgb_controller_devclass, 0, 0);
143
144 /*
145  * Attachment glue for the ports.  Attachment is done directly to the
146  * controller device.
147  */
148 static int cxgb_port_probe(device_t);
149 static int cxgb_port_attach(device_t);
150 static int cxgb_port_detach(device_t);
151
152 static device_method_t cxgb_port_methods[] = {
153         DEVMETHOD(device_probe,         cxgb_port_probe),
154         DEVMETHOD(device_attach,        cxgb_port_attach),
155         DEVMETHOD(device_detach,        cxgb_port_detach),
156         { 0, 0 }
157 };
158
159 static driver_t cxgb_port_driver = {
160         "cxgb",
161         cxgb_port_methods,
162         0
163 };
164
165 static d_ioctl_t cxgb_extension_ioctl;
166 static d_open_t cxgb_extension_open;
167 static d_close_t cxgb_extension_close;
168
169 static struct cdevsw cxgb_cdevsw = {
170        .d_version =    D_VERSION,
171        .d_flags =      0,
172        .d_open =       cxgb_extension_open,
173        .d_close =      cxgb_extension_close,
174        .d_ioctl =      cxgb_extension_ioctl,
175        .d_name =       "cxgb",
176 };
177
178 static devclass_t       cxgb_port_devclass;
179 DRIVER_MODULE(cxgb, cxgbc, cxgb_port_driver, cxgb_port_devclass, 0, 0);
180
181 /*
182  * The driver uses the best interrupt scheme available on a platform in the
183  * order MSI-X, MSI, legacy pin interrupts.  This parameter determines which
184  * of these schemes the driver may consider as follows:
185  *
186  * msi = 2: choose from among all three options
187  * msi = 1 : only consider MSI and pin interrupts
188  * msi = 0: force pin interrupts
189  */
190 static int msi_allowed = 2;
191
192 TUNABLE_INT("hw.cxgb.msi_allowed", &msi_allowed);
193 SYSCTL_NODE(_hw, OID_AUTO, cxgb, CTLFLAG_RD, 0, "CXGB driver parameters");
194 SYSCTL_UINT(_hw_cxgb, OID_AUTO, msi_allowed, CTLFLAG_RDTUN, &msi_allowed, 0,
195     "MSI-X, MSI, INTx selector");
196
197 /*
198  * The driver enables offload as a default.
199  * To disable it, use ofld_disable = 1.
200  */
201 static int ofld_disable = 0;
202 TUNABLE_INT("hw.cxgb.ofld_disable", &ofld_disable);
203 SYSCTL_UINT(_hw_cxgb, OID_AUTO, ofld_disable, CTLFLAG_RDTUN, &ofld_disable, 0,
204     "disable ULP offload");
205
206 /*
207  * The driver uses an auto-queue algorithm by default.
208  * To disable it and force a single queue-set per port, use multiq = 0
209  */
210 static int multiq = 1;
211 TUNABLE_INT("hw.cxgb.multiq", &multiq);
212 SYSCTL_UINT(_hw_cxgb, OID_AUTO, multiq, CTLFLAG_RDTUN, &multiq, 0,
213     "use min(ncpus/ports, 8) queue-sets per port");
214
215 /*
216  * By default the driver will not update the firmware unless
217  * it was compiled against a newer version
218  * 
219  */
220 static int force_fw_update = 0;
221 TUNABLE_INT("hw.cxgb.force_fw_update", &force_fw_update);
222 SYSCTL_UINT(_hw_cxgb, OID_AUTO, force_fw_update, CTLFLAG_RDTUN, &force_fw_update, 0,
223     "update firmware even if up to date");
224
225 int cxgb_use_16k_clusters = -1;
226 TUNABLE_INT("hw.cxgb.use_16k_clusters", &cxgb_use_16k_clusters);
227 SYSCTL_INT(_hw_cxgb, OID_AUTO, use_16k_clusters, CTLFLAG_RDTUN,
228     &cxgb_use_16k_clusters, 0, "use 16kB clusters for the jumbo queue ");
229
230 /*
231  * Tune the size of the output queue.
232  */
233 int cxgb_snd_queue_len = IFQ_MAXLEN;
234 TUNABLE_INT("hw.cxgb.snd_queue_len", &cxgb_snd_queue_len);
235 SYSCTL_UINT(_hw_cxgb, OID_AUTO, snd_queue_len, CTLFLAG_RDTUN,
236     &cxgb_snd_queue_len, 0, "send queue size ");
237
238 static int nfilters = -1;
239 TUNABLE_INT("hw.cxgb.nfilters", &nfilters);
240 SYSCTL_INT(_hw_cxgb, OID_AUTO, nfilters, CTLFLAG_RDTUN,
241     &nfilters, 0, "max number of entries in the filter table");
242
243 enum {
244         MAX_TXQ_ENTRIES      = 16384,
245         MAX_CTRL_TXQ_ENTRIES = 1024,
246         MAX_RSPQ_ENTRIES     = 16384,
247         MAX_RX_BUFFERS       = 16384,
248         MAX_RX_JUMBO_BUFFERS = 16384,
249         MIN_TXQ_ENTRIES      = 4,
250         MIN_CTRL_TXQ_ENTRIES = 4,
251         MIN_RSPQ_ENTRIES     = 32,
252         MIN_FL_ENTRIES       = 32,
253         MIN_FL_JUMBO_ENTRIES = 32
254 };
255
256 struct filter_info {
257         u32 sip;
258         u32 sip_mask;
259         u32 dip;
260         u16 sport;
261         u16 dport;
262         u32 vlan:12;
263         u32 vlan_prio:3;
264         u32 mac_hit:1;
265         u32 mac_idx:4;
266         u32 mac_vld:1;
267         u32 pkt_type:2;
268         u32 report_filter_id:1;
269         u32 pass:1;
270         u32 rss:1;
271         u32 qset:3;
272         u32 locked:1;
273         u32 valid:1;
274 };
275
276 enum { FILTER_NO_VLAN_PRI = 7 };
277
278 #define EEPROM_MAGIC 0x38E2F10C
279
280 #define PORT_MASK ((1 << MAX_NPORTS) - 1)
281
282 /* Table for probing the cards.  The desc field isn't actually used */
283 struct cxgb_ident {
284         uint16_t        vendor;
285         uint16_t        device;
286         int             index;
287         char            *desc;
288 } cxgb_identifiers[] = {
289         {PCI_VENDOR_ID_CHELSIO, 0x0020, 0, "PE9000"},
290         {PCI_VENDOR_ID_CHELSIO, 0x0021, 1, "T302E"},
291         {PCI_VENDOR_ID_CHELSIO, 0x0022, 2, "T310E"},
292         {PCI_VENDOR_ID_CHELSIO, 0x0023, 3, "T320X"},
293         {PCI_VENDOR_ID_CHELSIO, 0x0024, 1, "T302X"},
294         {PCI_VENDOR_ID_CHELSIO, 0x0025, 3, "T320E"},
295         {PCI_VENDOR_ID_CHELSIO, 0x0026, 2, "T310X"},
296         {PCI_VENDOR_ID_CHELSIO, 0x0030, 2, "T3B10"},
297         {PCI_VENDOR_ID_CHELSIO, 0x0031, 3, "T3B20"},
298         {PCI_VENDOR_ID_CHELSIO, 0x0032, 1, "T3B02"},
299         {PCI_VENDOR_ID_CHELSIO, 0x0033, 4, "T3B04"},
300         {PCI_VENDOR_ID_CHELSIO, 0x0035, 6, "T3C10"},
301         {PCI_VENDOR_ID_CHELSIO, 0x0036, 3, "S320E-CR"},
302         {PCI_VENDOR_ID_CHELSIO, 0x0037, 7, "N320E-G2"},
303         {0, 0, 0, NULL}
304 };
305
306 static int set_eeprom(struct port_info *pi, const uint8_t *data, int len, int offset);
307
308
309 static __inline char
310 t3rev2char(struct adapter *adapter)
311 {
312         char rev = 'z';
313
314         switch(adapter->params.rev) {
315         case T3_REV_A:
316                 rev = 'a';
317                 break;
318         case T3_REV_B:
319         case T3_REV_B2:
320                 rev = 'b';
321                 break;
322         case T3_REV_C:
323                 rev = 'c';
324                 break;
325         }
326         return rev;
327 }
328
329 static struct cxgb_ident *
330 cxgb_get_ident(device_t dev)
331 {
332         struct cxgb_ident *id;
333
334         for (id = cxgb_identifiers; id->desc != NULL; id++) {
335                 if ((id->vendor == pci_get_vendor(dev)) &&
336                     (id->device == pci_get_device(dev))) {
337                         return (id);
338                 }
339         }
340         return (NULL);
341 }
342
343 static const struct adapter_info *
344 cxgb_get_adapter_info(device_t dev)
345 {
346         struct cxgb_ident *id;
347         const struct adapter_info *ai;
348
349         id = cxgb_get_ident(dev);
350         if (id == NULL)
351                 return (NULL);
352
353         ai = t3_get_adapter_info(id->index);
354
355         return (ai);
356 }
357
358 static int
359 cxgb_controller_probe(device_t dev)
360 {
361         const struct adapter_info *ai;
362         char *ports, buf[80];
363         int nports;
364
365         ai = cxgb_get_adapter_info(dev);
366         if (ai == NULL)
367                 return (ENXIO);
368
369         nports = ai->nports0 + ai->nports1;
370         if (nports == 1)
371                 ports = "port";
372         else
373                 ports = "ports";
374
375         snprintf(buf, sizeof(buf), "%s, %d %s", ai->desc, nports, ports);
376         device_set_desc_copy(dev, buf);
377         return (BUS_PROBE_DEFAULT);
378 }
379
380 #define FW_FNAME "cxgb_t3fw"
381 #define TPEEPROM_NAME "cxgb_t3%c_tp_eeprom"
382 #define TPSRAM_NAME "cxgb_t3%c_protocol_sram"
383
384 static int
385 upgrade_fw(adapter_t *sc)
386 {
387         const struct firmware *fw;
388         int status;
389         u32 vers;
390         
391         if ((fw = firmware_get(FW_FNAME)) == NULL)  {
392                 device_printf(sc->dev, "Could not find firmware image %s\n", FW_FNAME);
393                 return (ENOENT);
394         } else
395                 device_printf(sc->dev, "installing firmware on card\n");
396         status = t3_load_fw(sc, (const uint8_t *)fw->data, fw->datasize);
397
398         if (status != 0) {
399                 device_printf(sc->dev, "failed to install firmware: %d\n",
400                     status);
401         } else {
402                 t3_get_fw_version(sc, &vers);
403                 snprintf(&sc->fw_version[0], sizeof(sc->fw_version), "%d.%d.%d",
404                     G_FW_VERSION_MAJOR(vers), G_FW_VERSION_MINOR(vers),
405                     G_FW_VERSION_MICRO(vers));
406         }
407
408         firmware_put(fw, FIRMWARE_UNLOAD);
409
410         return (status);        
411 }
412
413 /*
414  * The cxgb_controller_attach function is responsible for the initial
415  * bringup of the device.  Its responsibilities include:
416  *
417  *  1. Determine if the device supports MSI or MSI-X.
418  *  2. Allocate bus resources so that we can access the Base Address Register
419  *  3. Create and initialize mutexes for the controller and its control
420  *     logic such as SGE and MDIO.
421  *  4. Call hardware specific setup routine for the adapter as a whole.
422  *  5. Allocate the BAR for doing MSI-X.
423  *  6. Setup the line interrupt iff MSI-X is not supported.
424  *  7. Create the driver's taskq.
425  *  8. Start one task queue service thread.
426  *  9. Check if the firmware and SRAM are up-to-date.  They will be
427  *     auto-updated later (before FULL_INIT_DONE), if required.
428  * 10. Create a child device for each MAC (port)
429  * 11. Initialize T3 private state.
430  * 12. Trigger the LED
431  * 13. Setup offload iff supported.
432  * 14. Reset/restart the tick callout.
433  * 15. Attach sysctls
434  *
435  * NOTE: Any modification or deviation from this list MUST be reflected in
436  * the above comment.  Failure to do so will result in problems on various
437  * error conditions including link flapping.
438  */
439 static int
440 cxgb_controller_attach(device_t dev)
441 {
442         device_t child;
443         const struct adapter_info *ai;
444         struct adapter *sc;
445         int i, error = 0;
446         uint32_t vers;
447         int port_qsets = 1;
448         int msi_needed, reg;
449         char buf[80];
450
451         sc = device_get_softc(dev);
452         sc->dev = dev;
453         sc->msi_count = 0;
454         ai = cxgb_get_adapter_info(dev);
455
456         /* find the PCIe link width and set max read request to 4KB*/
457         if (pci_find_extcap(dev, PCIY_EXPRESS, &reg) == 0) {
458                 uint16_t lnk;
459
460                 lnk = pci_read_config(dev, reg + PCIR_EXPRESS_LINK_STA, 2);
461                 sc->link_width = (lnk & PCIM_LINK_STA_WIDTH) >> 4;
462                 if (sc->link_width < 8 &&
463                     (ai->caps & SUPPORTED_10000baseT_Full)) {
464                         device_printf(sc->dev,
465                             "PCIe x%d Link, expect reduced performance\n",
466                             sc->link_width);
467                 }
468
469                 pci_set_max_read_req(dev, 4096);
470         }
471
472         touch_bars(dev);
473         pci_enable_busmaster(dev);
474         /*
475          * Allocate the registers and make them available to the driver.
476          * The registers that we care about for NIC mode are in BAR 0
477          */
478         sc->regs_rid = PCIR_BAR(0);
479         if ((sc->regs_res = bus_alloc_resource_any(dev, SYS_RES_MEMORY,
480             &sc->regs_rid, RF_ACTIVE)) == NULL) {
481                 device_printf(dev, "Cannot allocate BAR region 0\n");
482                 return (ENXIO);
483         }
484         sc->udbs_rid = PCIR_BAR(2);
485         sc->udbs_res = NULL;
486         if (is_offload(sc) &&
487             ((sc->udbs_res = bus_alloc_resource_any(dev, SYS_RES_MEMORY,
488                    &sc->udbs_rid, RF_ACTIVE)) == NULL)) {
489                 device_printf(dev, "Cannot allocate BAR region 1\n");
490                 error = ENXIO;
491                 goto out;
492         }
493
494         snprintf(sc->lockbuf, ADAPTER_LOCK_NAME_LEN, "cxgb controller lock %d",
495             device_get_unit(dev));
496         ADAPTER_LOCK_INIT(sc, sc->lockbuf);
497
498         snprintf(sc->reglockbuf, ADAPTER_LOCK_NAME_LEN, "SGE reg lock %d",
499             device_get_unit(dev));
500         snprintf(sc->mdiolockbuf, ADAPTER_LOCK_NAME_LEN, "cxgb mdio lock %d",
501             device_get_unit(dev));
502         snprintf(sc->elmerlockbuf, ADAPTER_LOCK_NAME_LEN, "cxgb elmer lock %d",
503             device_get_unit(dev));
504         
505         MTX_INIT(&sc->sge.reg_lock, sc->reglockbuf, NULL, MTX_SPIN);
506         MTX_INIT(&sc->mdio_lock, sc->mdiolockbuf, NULL, MTX_DEF);
507         MTX_INIT(&sc->elmer_lock, sc->elmerlockbuf, NULL, MTX_DEF);
508         
509         sc->bt = rman_get_bustag(sc->regs_res);
510         sc->bh = rman_get_bushandle(sc->regs_res);
511         sc->mmio_len = rman_get_size(sc->regs_res);
512
513         for (i = 0; i < MAX_NPORTS; i++)
514                 sc->port[i].adapter = sc;
515
516         if (t3_prep_adapter(sc, ai, 1) < 0) {
517                 printf("prep adapter failed\n");
518                 error = ENODEV;
519                 goto out;
520         }
521         /* Allocate the BAR for doing MSI-X.  If it succeeds, try to allocate
522          * enough messages for the queue sets.  If that fails, try falling
523          * back to MSI.  If that fails, then try falling back to the legacy
524          * interrupt pin model.
525          */
526         sc->msix_regs_rid = 0x20;
527         if ((msi_allowed >= 2) &&
528             (sc->msix_regs_res = bus_alloc_resource_any(dev, SYS_RES_MEMORY,
529             &sc->msix_regs_rid, RF_ACTIVE)) != NULL) {
530
531                 if (multiq)
532                         port_qsets = min(SGE_QSETS/sc->params.nports, mp_ncpus);
533                 msi_needed = sc->msi_count = sc->params.nports * port_qsets + 1;
534
535                 if (pci_msix_count(dev) == 0 ||
536                     (error = pci_alloc_msix(dev, &sc->msi_count)) != 0 ||
537                     sc->msi_count != msi_needed) {
538                         device_printf(dev, "alloc msix failed - "
539                                       "msi_count=%d, msi_needed=%d, err=%d; "
540                                       "will try MSI\n", sc->msi_count,
541                                       msi_needed, error);
542                         sc->msi_count = 0;
543                         port_qsets = 1;
544                         pci_release_msi(dev);
545                         bus_release_resource(dev, SYS_RES_MEMORY,
546                             sc->msix_regs_rid, sc->msix_regs_res);
547                         sc->msix_regs_res = NULL;
548                 } else {
549                         sc->flags |= USING_MSIX;
550                         sc->cxgb_intr = cxgb_async_intr;
551                         device_printf(dev,
552                                       "using MSI-X interrupts (%u vectors)\n",
553                                       sc->msi_count);
554                 }
555         }
556
557         if ((msi_allowed >= 1) && (sc->msi_count == 0)) {
558                 sc->msi_count = 1;
559                 if ((error = pci_alloc_msi(dev, &sc->msi_count)) != 0) {
560                         device_printf(dev, "alloc msi failed - "
561                                       "err=%d; will try INTx\n", error);
562                         sc->msi_count = 0;
563                         port_qsets = 1;
564                         pci_release_msi(dev);
565                 } else {
566                         sc->flags |= USING_MSI;
567                         sc->cxgb_intr = t3_intr_msi;
568                         device_printf(dev, "using MSI interrupts\n");
569                 }
570         }
571         if (sc->msi_count == 0) {
572                 device_printf(dev, "using line interrupts\n");
573                 sc->cxgb_intr = t3b_intr;
574         }
575
576         /* Create a private taskqueue thread for handling driver events */
577         sc->tq = taskqueue_create("cxgb_taskq", M_NOWAIT,
578             taskqueue_thread_enqueue, &sc->tq);
579         if (sc->tq == NULL) {
580                 device_printf(dev, "failed to allocate controller task queue\n");
581                 goto out;
582         }
583
584         taskqueue_start_threads(&sc->tq, 1, PI_NET, "%s taskq",
585             device_get_nameunit(dev));
586         TASK_INIT(&sc->tick_task, 0, cxgb_tick_handler, sc);
587
588         
589         /* Create a periodic callout for checking adapter status */
590         callout_init(&sc->cxgb_tick_ch, TRUE);
591         
592         if (t3_check_fw_version(sc) < 0 || force_fw_update) {
593                 /*
594                  * Warn user that a firmware update will be attempted in init.
595                  */
596                 device_printf(dev, "firmware needs to be updated to version %d.%d.%d\n",
597                     FW_VERSION_MAJOR, FW_VERSION_MINOR, FW_VERSION_MICRO);
598                 sc->flags &= ~FW_UPTODATE;
599         } else {
600                 sc->flags |= FW_UPTODATE;
601         }
602
603         if (t3_check_tpsram_version(sc) < 0) {
604                 /*
605                  * Warn user that a firmware update will be attempted in init.
606                  */
607                 device_printf(dev, "SRAM needs to be updated to version %c-%d.%d.%d\n",
608                     t3rev2char(sc), TP_VERSION_MAJOR, TP_VERSION_MINOR, TP_VERSION_MICRO);
609                 sc->flags &= ~TPS_UPTODATE;
610         } else {
611                 sc->flags |= TPS_UPTODATE;
612         }
613         
614         /*
615          * Create a child device for each MAC.  The ethernet attachment
616          * will be done in these children.
617          */     
618         for (i = 0; i < (sc)->params.nports; i++) {
619                 struct port_info *pi;
620                 
621                 if ((child = device_add_child(dev, "cxgb", -1)) == NULL) {
622                         device_printf(dev, "failed to add child port\n");
623                         error = EINVAL;
624                         goto out;
625                 }
626                 pi = &sc->port[i];
627                 pi->adapter = sc;
628                 pi->nqsets = port_qsets;
629                 pi->first_qset = i*port_qsets;
630                 pi->port_id = i;
631                 pi->tx_chan = i >= ai->nports0;
632                 pi->txpkt_intf = pi->tx_chan ? 2 * (i - ai->nports0) + 1 : 2 * i;
633                 sc->rxpkt_map[pi->txpkt_intf] = i;
634                 sc->port[i].tx_chan = i >= ai->nports0;
635                 sc->portdev[i] = child;
636                 device_set_softc(child, pi);
637         }
638         if ((error = bus_generic_attach(dev)) != 0)
639                 goto out;
640
641         /* initialize sge private state */
642         t3_sge_init_adapter(sc);
643
644         t3_led_ready(sc);
645         
646         cxgb_offload_init();
647         if (is_offload(sc)) {
648                 setbit(&sc->registered_device_map, OFFLOAD_DEVMAP_BIT);
649                 cxgb_adapter_ofld(sc);
650         }
651         error = t3_get_fw_version(sc, &vers);
652         if (error)
653                 goto out;
654
655         snprintf(&sc->fw_version[0], sizeof(sc->fw_version), "%d.%d.%d",
656             G_FW_VERSION_MAJOR(vers), G_FW_VERSION_MINOR(vers),
657             G_FW_VERSION_MICRO(vers));
658
659         snprintf(buf, sizeof(buf), "%s %sNIC\t E/C: %s S/N: %s",
660                  ai->desc, is_offload(sc) ? "R" : "",
661                  sc->params.vpd.ec, sc->params.vpd.sn);
662         device_set_desc_copy(dev, buf);
663
664         snprintf(&sc->port_types[0], sizeof(sc->port_types), "%x%x%x%x",
665                  sc->params.vpd.port_type[0], sc->params.vpd.port_type[1],
666                  sc->params.vpd.port_type[2], sc->params.vpd.port_type[3]);
667
668         device_printf(sc->dev, "Firmware Version %s\n", &sc->fw_version[0]);
669         callout_reset(&sc->cxgb_tick_ch, hz, cxgb_tick, sc);
670         t3_add_attach_sysctls(sc);
671 out:
672         if (error)
673                 cxgb_free(sc);
674
675         return (error);
676 }
677
678 /*
679  * The cxgb_controller_detach routine is called with the device is
680  * unloaded from the system.
681  */
682
683 static int
684 cxgb_controller_detach(device_t dev)
685 {
686         struct adapter *sc;
687
688         sc = device_get_softc(dev);
689
690         cxgb_free(sc);
691
692         return (0);
693 }
694
695 /*
696  * The cxgb_free() is called by the cxgb_controller_detach() routine
697  * to tear down the structures that were built up in
698  * cxgb_controller_attach(), and should be the final piece of work
699  * done when fully unloading the driver.
700  * 
701  *
702  *  1. Shutting down the threads started by the cxgb_controller_attach()
703  *     routine.
704  *  2. Stopping the lower level device and all callouts (cxgb_down_locked()).
705  *  3. Detaching all of the port devices created during the
706  *     cxgb_controller_attach() routine.
707  *  4. Removing the device children created via cxgb_controller_attach().
708  *  5. Releasing PCI resources associated with the device.
709  *  6. Turning off the offload support, iff it was turned on.
710  *  7. Destroying the mutexes created in cxgb_controller_attach().
711  *
712  */
713 static void
714 cxgb_free(struct adapter *sc)
715 {
716         int i, nqsets = 0;
717
718         ADAPTER_LOCK(sc);
719         sc->flags |= CXGB_SHUTDOWN;
720         ADAPTER_UNLOCK(sc);
721
722         /*
723          * Make sure all child devices are gone.
724          */
725         bus_generic_detach(sc->dev);
726         for (i = 0; i < (sc)->params.nports; i++) {
727                 if (sc->portdev[i] &&
728                     device_delete_child(sc->dev, sc->portdev[i]) != 0)
729                         device_printf(sc->dev, "failed to delete child port\n");
730                 nqsets += sc->port[i].nqsets;
731         }
732
733         /*
734          * At this point, it is as if cxgb_port_detach has run on all ports, and
735          * cxgb_down has run on the adapter.  All interrupts have been silenced,
736          * all open devices have been closed.
737          */
738         KASSERT(sc->open_device_map == 0, ("%s: device(s) still open (%x)",
739                                            __func__, sc->open_device_map));
740         for (i = 0; i < sc->params.nports; i++) {
741                 KASSERT(sc->port[i].ifp == NULL, ("%s: port %i undead!",
742                                                   __func__, i));
743         }
744
745         /*
746          * Finish off the adapter's callouts.
747          */
748         callout_drain(&sc->cxgb_tick_ch);
749         callout_drain(&sc->sge_timer_ch);
750
751         /*
752          * Release resources grabbed under FULL_INIT_DONE by cxgb_up.  The
753          * sysctls are cleaned up by the kernel linker.
754          */
755         if (sc->flags & FULL_INIT_DONE) {
756                 t3_free_sge_resources(sc, nqsets);
757                 sc->flags &= ~FULL_INIT_DONE;
758         }
759
760         /*
761          * Release all interrupt resources.
762          */
763         cxgb_teardown_interrupts(sc);
764         if (sc->flags & (USING_MSI | USING_MSIX)) {
765                 device_printf(sc->dev, "releasing msi message(s)\n");
766                 pci_release_msi(sc->dev);
767         } else {
768                 device_printf(sc->dev, "no msi message to release\n");
769         }
770
771         if (sc->msix_regs_res != NULL) {
772                 bus_release_resource(sc->dev, SYS_RES_MEMORY, sc->msix_regs_rid,
773                     sc->msix_regs_res);
774         }
775
776         /*
777          * Free the adapter's taskqueue.
778          */
779         if (sc->tq != NULL) {
780                 taskqueue_free(sc->tq);
781                 sc->tq = NULL;
782         }
783         
784         if (is_offload(sc)) {
785                 clrbit(&sc->registered_device_map, OFFLOAD_DEVMAP_BIT);
786                 cxgb_adapter_unofld(sc);
787         }
788
789 #ifdef notyet
790         if (sc->flags & CXGB_OFLD_INIT)
791                 cxgb_offload_deactivate(sc);
792 #endif
793         free(sc->filters, M_DEVBUF);
794         t3_sge_free(sc);
795
796         cxgb_offload_exit();
797
798         if (sc->udbs_res != NULL)
799                 bus_release_resource(sc->dev, SYS_RES_MEMORY, sc->udbs_rid,
800                     sc->udbs_res);
801
802         if (sc->regs_res != NULL)
803                 bus_release_resource(sc->dev, SYS_RES_MEMORY, sc->regs_rid,
804                     sc->regs_res);
805
806         MTX_DESTROY(&sc->mdio_lock);
807         MTX_DESTROY(&sc->sge.reg_lock);
808         MTX_DESTROY(&sc->elmer_lock);
809         ADAPTER_LOCK_DEINIT(sc);
810 }
811
812 /**
813  *      setup_sge_qsets - configure SGE Tx/Rx/response queues
814  *      @sc: the controller softc
815  *
816  *      Determines how many sets of SGE queues to use and initializes them.
817  *      We support multiple queue sets per port if we have MSI-X, otherwise
818  *      just one queue set per port.
819  */
820 static int
821 setup_sge_qsets(adapter_t *sc)
822 {
823         int i, j, err, irq_idx = 0, qset_idx = 0;
824         u_int ntxq = SGE_TXQ_PER_SET;
825
826         if ((err = t3_sge_alloc(sc)) != 0) {
827                 device_printf(sc->dev, "t3_sge_alloc returned %d\n", err);
828                 return (err);
829         }
830
831         if (sc->params.rev > 0 && !(sc->flags & USING_MSI))
832                 irq_idx = -1;
833
834         for (i = 0; i < (sc)->params.nports; i++) {
835                 struct port_info *pi = &sc->port[i];
836
837                 for (j = 0; j < pi->nqsets; j++, qset_idx++) {
838                         err = t3_sge_alloc_qset(sc, qset_idx, (sc)->params.nports,
839                             (sc->flags & USING_MSIX) ? qset_idx + 1 : irq_idx,
840                             &sc->params.sge.qset[qset_idx], ntxq, pi);
841                         if (err) {
842                                 t3_free_sge_resources(sc, qset_idx);
843                                 device_printf(sc->dev,
844                                     "t3_sge_alloc_qset failed with %d\n", err);
845                                 return (err);
846                         }
847                 }
848         }
849
850         return (0);
851 }
852
853 static void
854 cxgb_teardown_interrupts(adapter_t *sc)
855 {
856         int i;
857
858         for (i = 0; i < SGE_QSETS; i++) {
859                 if (sc->msix_intr_tag[i] == NULL) {
860
861                         /* Should have been setup fully or not at all */
862                         KASSERT(sc->msix_irq_res[i] == NULL &&
863                                 sc->msix_irq_rid[i] == 0,
864                                 ("%s: half-done interrupt (%d).", __func__, i));
865
866                         continue;
867                 }
868
869                 bus_teardown_intr(sc->dev, sc->msix_irq_res[i],
870                                   sc->msix_intr_tag[i]);
871                 bus_release_resource(sc->dev, SYS_RES_IRQ, sc->msix_irq_rid[i],
872                                      sc->msix_irq_res[i]);
873
874                 sc->msix_irq_res[i] = sc->msix_intr_tag[i] = NULL;
875                 sc->msix_irq_rid[i] = 0;
876         }
877
878         if (sc->intr_tag) {
879                 KASSERT(sc->irq_res != NULL,
880                         ("%s: half-done interrupt.", __func__));
881
882                 bus_teardown_intr(sc->dev, sc->irq_res, sc->intr_tag);
883                 bus_release_resource(sc->dev, SYS_RES_IRQ, sc->irq_rid,
884                                      sc->irq_res);
885
886                 sc->irq_res = sc->intr_tag = NULL;
887                 sc->irq_rid = 0;
888         }
889 }
890
891 static int
892 cxgb_setup_interrupts(adapter_t *sc)
893 {
894         struct resource *res;
895         void *tag;
896         int i, rid, err, intr_flag = sc->flags & (USING_MSI | USING_MSIX);
897
898         sc->irq_rid = intr_flag ? 1 : 0;
899         sc->irq_res = bus_alloc_resource_any(sc->dev, SYS_RES_IRQ, &sc->irq_rid,
900                                              RF_SHAREABLE | RF_ACTIVE);
901         if (sc->irq_res == NULL) {
902                 device_printf(sc->dev, "Cannot allocate interrupt (%x, %u)\n",
903                               intr_flag, sc->irq_rid);
904                 err = EINVAL;
905                 sc->irq_rid = 0;
906         } else {
907                 err = bus_setup_intr(sc->dev, sc->irq_res,
908                     INTR_MPSAFE | INTR_TYPE_NET, NULL,
909                     sc->cxgb_intr, sc, &sc->intr_tag);
910
911                 if (err) {
912                         device_printf(sc->dev,
913                                       "Cannot set up interrupt (%x, %u, %d)\n",
914                                       intr_flag, sc->irq_rid, err);
915                         bus_release_resource(sc->dev, SYS_RES_IRQ, sc->irq_rid,
916                                              sc->irq_res);
917                         sc->irq_res = sc->intr_tag = NULL;
918                         sc->irq_rid = 0;
919                 }
920         }
921
922         /* That's all for INTx or MSI */
923         if (!(intr_flag & USING_MSIX) || err)
924                 return (err);
925
926         for (i = 0; i < sc->msi_count - 1; i++) {
927                 rid = i + 2;
928                 res = bus_alloc_resource_any(sc->dev, SYS_RES_IRQ, &rid,
929                                              RF_SHAREABLE | RF_ACTIVE);
930                 if (res == NULL) {
931                         device_printf(sc->dev, "Cannot allocate interrupt "
932                                       "for message %d\n", rid);
933                         err = EINVAL;
934                         break;
935                 }
936
937                 err = bus_setup_intr(sc->dev, res, INTR_MPSAFE | INTR_TYPE_NET,
938                                      NULL, t3_intr_msix, &sc->sge.qs[i], &tag);
939                 if (err) {
940                         device_printf(sc->dev, "Cannot set up interrupt "
941                                       "for message %d (%d)\n", rid, err);
942                         bus_release_resource(sc->dev, SYS_RES_IRQ, rid, res);
943                         break;
944                 }
945
946                 sc->msix_irq_rid[i] = rid;
947                 sc->msix_irq_res[i] = res;
948                 sc->msix_intr_tag[i] = tag;
949         }
950
951         if (err)
952                 cxgb_teardown_interrupts(sc);
953
954         return (err);
955 }
956
957
958 static int
959 cxgb_port_probe(device_t dev)
960 {
961         struct port_info *p;
962         char buf[80];
963         const char *desc;
964         
965         p = device_get_softc(dev);
966         desc = p->phy.desc;
967         snprintf(buf, sizeof(buf), "Port %d %s", p->port_id, desc);
968         device_set_desc_copy(dev, buf);
969         return (0);
970 }
971
972
973 static int
974 cxgb_makedev(struct port_info *pi)
975 {
976         
977         pi->port_cdev = make_dev(&cxgb_cdevsw, pi->ifp->if_dunit,
978             UID_ROOT, GID_WHEEL, 0600, "%s", if_name(pi->ifp));
979         
980         if (pi->port_cdev == NULL)
981                 return (ENOMEM);
982
983         pi->port_cdev->si_drv1 = (void *)pi;
984         
985         return (0);
986 }
987
988 #define CXGB_CAP (IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_MTU | IFCAP_HWCSUM | \
989     IFCAP_VLAN_HWCSUM | IFCAP_TSO | IFCAP_JUMBO_MTU | IFCAP_LRO | \
990     IFCAP_VLAN_HWTSO | IFCAP_LINKSTATE)
991 #define CXGB_CAP_ENABLE (CXGB_CAP & ~IFCAP_TSO6)
992
993 static int
994 cxgb_port_attach(device_t dev)
995 {
996         struct port_info *p;
997         struct ifnet *ifp;
998         int err;
999         struct adapter *sc;
1000
1001         p = device_get_softc(dev);
1002         sc = p->adapter;
1003         snprintf(p->lockbuf, PORT_NAME_LEN, "cxgb port lock %d:%d",
1004             device_get_unit(device_get_parent(dev)), p->port_id);
1005         PORT_LOCK_INIT(p, p->lockbuf);
1006
1007         callout_init(&p->link_check_ch, CALLOUT_MPSAFE);
1008         TASK_INIT(&p->link_check_task, 0, check_link_status, p);
1009
1010         /* Allocate an ifnet object and set it up */
1011         ifp = p->ifp = if_alloc(IFT_ETHER);
1012         if (ifp == NULL) {
1013                 device_printf(dev, "Cannot allocate ifnet\n");
1014                 return (ENOMEM);
1015         }
1016         
1017         if_initname(ifp, device_get_name(dev), device_get_unit(dev));
1018         ifp->if_init = cxgb_init;
1019         ifp->if_softc = p;
1020         ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
1021         ifp->if_ioctl = cxgb_ioctl;
1022         ifp->if_start = cxgb_start;
1023
1024         ifp->if_snd.ifq_drv_maxlen = max(cxgb_snd_queue_len, ifqmaxlen);
1025         IFQ_SET_MAXLEN(&ifp->if_snd, ifp->if_snd.ifq_drv_maxlen);
1026         IFQ_SET_READY(&ifp->if_snd);
1027
1028         ifp->if_capabilities = CXGB_CAP;
1029         ifp->if_capenable = CXGB_CAP_ENABLE;
1030         ifp->if_hwassist = CSUM_TCP | CSUM_UDP | CSUM_IP | CSUM_TSO;
1031
1032         /*
1033          * Disable TSO on 4-port - it isn't supported by the firmware.
1034          */     
1035         if (sc->params.nports > 2) {
1036                 ifp->if_capabilities &= ~(IFCAP_TSO | IFCAP_VLAN_HWTSO);
1037                 ifp->if_capenable &= ~(IFCAP_TSO | IFCAP_VLAN_HWTSO);
1038                 ifp->if_hwassist &= ~CSUM_TSO;
1039         }
1040
1041         ether_ifattach(ifp, p->hw_addr);
1042         ifp->if_transmit = cxgb_transmit;
1043         ifp->if_qflush = cxgb_qflush;
1044
1045 #ifdef DEFAULT_JUMBO
1046         if (sc->params.nports <= 2)
1047                 ifp->if_mtu = ETHERMTU_JUMBO;
1048 #endif
1049         if ((err = cxgb_makedev(p)) != 0) {
1050                 printf("makedev failed %d\n", err);
1051                 return (err);
1052         }
1053
1054         /* Create a list of media supported by this port */
1055         ifmedia_init(&p->media, IFM_IMASK, cxgb_media_change,
1056             cxgb_media_status);
1057         cxgb_build_medialist(p);
1058       
1059         t3_sge_init_port(p);
1060
1061         return (err);
1062 }
1063
1064 /*
1065  * cxgb_port_detach() is called via the device_detach methods when
1066  * cxgb_free() calls the bus_generic_detach.  It is responsible for 
1067  * removing the device from the view of the kernel, i.e. from all 
1068  * interfaces lists etc.  This routine is only called when the driver is 
1069  * being unloaded, not when the link goes down.
1070  */
1071 static int
1072 cxgb_port_detach(device_t dev)
1073 {
1074         struct port_info *p;
1075         struct adapter *sc;
1076         int i;
1077
1078         p = device_get_softc(dev);
1079         sc = p->adapter;
1080
1081         /* Tell cxgb_ioctl and if_init that the port is going away */
1082         ADAPTER_LOCK(sc);
1083         SET_DOOMED(p);
1084         wakeup(&sc->flags);
1085         while (IS_BUSY(sc))
1086                 mtx_sleep(&sc->flags, &sc->lock, 0, "cxgbdtch", 0);
1087         SET_BUSY(sc);
1088         ADAPTER_UNLOCK(sc);
1089
1090         if (p->port_cdev != NULL)
1091                 destroy_dev(p->port_cdev);
1092
1093         cxgb_uninit_synchronized(p);
1094         ether_ifdetach(p->ifp);
1095
1096         for (i = p->first_qset; i < p->first_qset + p->nqsets; i++) {
1097                 struct sge_qset *qs = &sc->sge.qs[i];
1098                 struct sge_txq *txq = &qs->txq[TXQ_ETH];
1099
1100                 callout_drain(&txq->txq_watchdog);
1101                 callout_drain(&txq->txq_timer);
1102         }
1103
1104         PORT_LOCK_DEINIT(p);
1105         if_free(p->ifp);
1106         p->ifp = NULL;
1107
1108         ADAPTER_LOCK(sc);
1109         CLR_BUSY(sc);
1110         wakeup_one(&sc->flags);
1111         ADAPTER_UNLOCK(sc);
1112         return (0);
1113 }
1114
1115 void
1116 t3_fatal_err(struct adapter *sc)
1117 {
1118         u_int fw_status[4];
1119
1120         if (sc->flags & FULL_INIT_DONE) {
1121                 t3_sge_stop(sc);
1122                 t3_write_reg(sc, A_XGM_TX_CTRL, 0);
1123                 t3_write_reg(sc, A_XGM_RX_CTRL, 0);
1124                 t3_write_reg(sc, XGM_REG(A_XGM_TX_CTRL, 1), 0);
1125                 t3_write_reg(sc, XGM_REG(A_XGM_RX_CTRL, 1), 0);
1126                 t3_intr_disable(sc);
1127         }
1128         device_printf(sc->dev,"encountered fatal error, operation suspended\n");
1129         if (!t3_cim_ctl_blk_read(sc, 0xa0, 4, fw_status))
1130                 device_printf(sc->dev, "FW_ status: 0x%x, 0x%x, 0x%x, 0x%x\n",
1131                     fw_status[0], fw_status[1], fw_status[2], fw_status[3]);
1132 }
1133
1134 int
1135 t3_os_find_pci_capability(adapter_t *sc, int cap)
1136 {
1137         device_t dev;
1138         struct pci_devinfo *dinfo;
1139         pcicfgregs *cfg;
1140         uint32_t status;
1141         uint8_t ptr;
1142
1143         dev = sc->dev;
1144         dinfo = device_get_ivars(dev);
1145         cfg = &dinfo->cfg;
1146
1147         status = pci_read_config(dev, PCIR_STATUS, 2);
1148         if (!(status & PCIM_STATUS_CAPPRESENT))
1149                 return (0);
1150
1151         switch (cfg->hdrtype & PCIM_HDRTYPE) {
1152         case 0:
1153         case 1:
1154                 ptr = PCIR_CAP_PTR;
1155                 break;
1156         case 2:
1157                 ptr = PCIR_CAP_PTR_2;
1158                 break;
1159         default:
1160                 return (0);
1161                 break;
1162         }
1163         ptr = pci_read_config(dev, ptr, 1);
1164
1165         while (ptr != 0) {
1166                 if (pci_read_config(dev, ptr + PCICAP_ID, 1) == cap)
1167                         return (ptr);
1168                 ptr = pci_read_config(dev, ptr + PCICAP_NEXTPTR, 1);
1169         }
1170
1171         return (0);
1172 }
1173
1174 int
1175 t3_os_pci_save_state(struct adapter *sc)
1176 {
1177         device_t dev;
1178         struct pci_devinfo *dinfo;
1179
1180         dev = sc->dev;
1181         dinfo = device_get_ivars(dev);
1182
1183         pci_cfg_save(dev, dinfo, 0);
1184         return (0);
1185 }
1186
1187 int
1188 t3_os_pci_restore_state(struct adapter *sc)
1189 {
1190         device_t dev;
1191         struct pci_devinfo *dinfo;
1192
1193         dev = sc->dev;
1194         dinfo = device_get_ivars(dev);
1195
1196         pci_cfg_restore(dev, dinfo);
1197         return (0);
1198 }
1199
1200 /**
1201  *      t3_os_link_changed - handle link status changes
1202  *      @sc: the adapter associated with the link change
1203  *      @port_id: the port index whose link status has changed
1204  *      @link_status: the new status of the link
1205  *      @speed: the new speed setting
1206  *      @duplex: the new duplex setting
1207  *      @fc: the new flow-control setting
1208  *
1209  *      This is the OS-dependent handler for link status changes.  The OS
1210  *      neutral handler takes care of most of the processing for these events,
1211  *      then calls this handler for any OS-specific processing.
1212  */
1213 void
1214 t3_os_link_changed(adapter_t *adapter, int port_id, int link_status, int speed,
1215      int duplex, int fc, int mac_was_reset)
1216 {
1217         struct port_info *pi = &adapter->port[port_id];
1218         struct ifnet *ifp = pi->ifp;
1219
1220         /* no race with detach, so ifp should always be good */
1221         KASSERT(ifp, ("%s: if detached.", __func__));
1222
1223         /* Reapply mac settings if they were lost due to a reset */
1224         if (mac_was_reset) {
1225                 PORT_LOCK(pi);
1226                 cxgb_update_mac_settings(pi);
1227                 PORT_UNLOCK(pi);
1228         }
1229
1230         if (link_status) {
1231                 ifp->if_baudrate = IF_Mbps(speed);
1232                 if_link_state_change(ifp, LINK_STATE_UP);
1233         } else
1234                 if_link_state_change(ifp, LINK_STATE_DOWN);
1235 }
1236
1237 /**
1238  *      t3_os_phymod_changed - handle PHY module changes
1239  *      @phy: the PHY reporting the module change
1240  *      @mod_type: new module type
1241  *
1242  *      This is the OS-dependent handler for PHY module changes.  It is
1243  *      invoked when a PHY module is removed or inserted for any OS-specific
1244  *      processing.
1245  */
1246 void t3_os_phymod_changed(struct adapter *adap, int port_id)
1247 {
1248         static const char *mod_str[] = {
1249                 NULL, "SR", "LR", "LRM", "TWINAX", "TWINAX-L", "unknown"
1250         };
1251         struct port_info *pi = &adap->port[port_id];
1252         int mod = pi->phy.modtype;
1253
1254         if (mod != pi->media.ifm_cur->ifm_data)
1255                 cxgb_build_medialist(pi);
1256
1257         if (mod == phy_modtype_none)
1258                 if_printf(pi->ifp, "PHY module unplugged\n");
1259         else {
1260                 KASSERT(mod < ARRAY_SIZE(mod_str),
1261                         ("invalid PHY module type %d", mod));
1262                 if_printf(pi->ifp, "%s PHY module inserted\n", mod_str[mod]);
1263         }
1264 }
1265
1266 void
1267 t3_os_set_hw_addr(adapter_t *adapter, int port_idx, u8 hw_addr[])
1268 {
1269
1270         /*
1271          * The ifnet might not be allocated before this gets called,
1272          * as this is called early on in attach by t3_prep_adapter
1273          * save the address off in the port structure
1274          */
1275         if (cxgb_debug)
1276                 printf("set_hw_addr on idx %d addr %6D\n", port_idx, hw_addr, ":");
1277         bcopy(hw_addr, adapter->port[port_idx].hw_addr, ETHER_ADDR_LEN);
1278 }
1279
1280 /*
1281  * Programs the XGMAC based on the settings in the ifnet.  These settings
1282  * include MTU, MAC address, mcast addresses, etc.
1283  */
1284 static void
1285 cxgb_update_mac_settings(struct port_info *p)
1286 {
1287         struct ifnet *ifp = p->ifp;
1288         struct t3_rx_mode rm;
1289         struct cmac *mac = &p->mac;
1290         int mtu, hwtagging;
1291
1292         PORT_LOCK_ASSERT_OWNED(p);
1293
1294         bcopy(IF_LLADDR(ifp), p->hw_addr, ETHER_ADDR_LEN);
1295
1296         mtu = ifp->if_mtu;
1297         if (ifp->if_capenable & IFCAP_VLAN_MTU)
1298                 mtu += ETHER_VLAN_ENCAP_LEN;
1299
1300         hwtagging = (ifp->if_capenable & IFCAP_VLAN_HWTAGGING) != 0;
1301
1302         t3_mac_set_mtu(mac, mtu);
1303         t3_set_vlan_accel(p->adapter, 1 << p->tx_chan, hwtagging);
1304         t3_mac_set_address(mac, 0, p->hw_addr);
1305         t3_init_rx_mode(&rm, p);
1306         t3_mac_set_rx_mode(mac, &rm);
1307 }
1308
1309
1310 static int
1311 await_mgmt_replies(struct adapter *adap, unsigned long init_cnt,
1312                               unsigned long n)
1313 {
1314         int attempts = 5;
1315
1316         while (adap->sge.qs[0].rspq.offload_pkts < init_cnt + n) {
1317                 if (!--attempts)
1318                         return (ETIMEDOUT);
1319                 t3_os_sleep(10);
1320         }
1321         return 0;
1322 }
1323
1324 static int
1325 init_tp_parity(struct adapter *adap)
1326 {
1327         int i;
1328         struct mbuf *m;
1329         struct cpl_set_tcb_field *greq;
1330         unsigned long cnt = adap->sge.qs[0].rspq.offload_pkts;
1331
1332         t3_tp_set_offload_mode(adap, 1);
1333
1334         for (i = 0; i < 16; i++) {
1335                 struct cpl_smt_write_req *req;
1336
1337                 m = m_gethdr(M_WAITOK, MT_DATA);
1338                 req = mtod(m, struct cpl_smt_write_req *);
1339                 m->m_len = m->m_pkthdr.len = sizeof(*req);
1340                 memset(req, 0, sizeof(*req));
1341                 req->wr.wrh_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD));
1342                 OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_SMT_WRITE_REQ, i));
1343                 req->iff = i;
1344                 t3_mgmt_tx(adap, m);
1345         }
1346
1347         for (i = 0; i < 2048; i++) {
1348                 struct cpl_l2t_write_req *req;
1349
1350                 m = m_gethdr(M_WAITOK, MT_DATA);
1351                 req = mtod(m, struct cpl_l2t_write_req *);
1352                 m->m_len = m->m_pkthdr.len = sizeof(*req);
1353                 memset(req, 0, sizeof(*req));
1354                 req->wr.wrh_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD));
1355                 OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_L2T_WRITE_REQ, i));
1356                 req->params = htonl(V_L2T_W_IDX(i));
1357                 t3_mgmt_tx(adap, m);
1358         }
1359
1360         for (i = 0; i < 2048; i++) {
1361                 struct cpl_rte_write_req *req;
1362
1363                 m = m_gethdr(M_WAITOK, MT_DATA);
1364                 req = mtod(m, struct cpl_rte_write_req *);
1365                 m->m_len = m->m_pkthdr.len = sizeof(*req);
1366                 memset(req, 0, sizeof(*req));
1367                 req->wr.wrh_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD));
1368                 OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_RTE_WRITE_REQ, i));
1369                 req->l2t_idx = htonl(V_L2T_W_IDX(i));
1370                 t3_mgmt_tx(adap, m);
1371         }
1372
1373         m = m_gethdr(M_WAITOK, MT_DATA);
1374         greq = mtod(m, struct cpl_set_tcb_field *);
1375         m->m_len = m->m_pkthdr.len = sizeof(*greq);
1376         memset(greq, 0, sizeof(*greq));
1377         greq->wr.wrh_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD));
1378         OPCODE_TID(greq) = htonl(MK_OPCODE_TID(CPL_SET_TCB_FIELD, 0));
1379         greq->mask = htobe64(1);
1380         t3_mgmt_tx(adap, m);
1381
1382         i = await_mgmt_replies(adap, cnt, 16 + 2048 + 2048 + 1);
1383         t3_tp_set_offload_mode(adap, 0);
1384         return (i);
1385 }
1386
1387 /**
1388  *      setup_rss - configure Receive Side Steering (per-queue connection demux) 
1389  *      @adap: the adapter
1390  *
1391  *      Sets up RSS to distribute packets to multiple receive queues.  We
1392  *      configure the RSS CPU lookup table to distribute to the number of HW
1393  *      receive queues, and the response queue lookup table to narrow that
1394  *      down to the response queues actually configured for each port.
1395  *      We always configure the RSS mapping for two ports since the mapping
1396  *      table has plenty of entries.
1397  */
1398 static void
1399 setup_rss(adapter_t *adap)
1400 {
1401         int i;
1402         u_int nq[2]; 
1403         uint8_t cpus[SGE_QSETS + 1];
1404         uint16_t rspq_map[RSS_TABLE_SIZE];
1405         
1406         for (i = 0; i < SGE_QSETS; ++i)
1407                 cpus[i] = i;
1408         cpus[SGE_QSETS] = 0xff;
1409
1410         nq[0] = nq[1] = 0;
1411         for_each_port(adap, i) {
1412                 const struct port_info *pi = adap2pinfo(adap, i);
1413
1414                 nq[pi->tx_chan] += pi->nqsets;
1415         }
1416         for (i = 0; i < RSS_TABLE_SIZE / 2; ++i) {
1417                 rspq_map[i] = nq[0] ? i % nq[0] : 0;
1418                 rspq_map[i + RSS_TABLE_SIZE / 2] = nq[1] ? i % nq[1] + nq[0] : 0;
1419         }
1420
1421         /* Calculate the reverse RSS map table */
1422         for (i = 0; i < SGE_QSETS; ++i)
1423                 adap->rrss_map[i] = 0xff;
1424         for (i = 0; i < RSS_TABLE_SIZE; ++i)
1425                 if (adap->rrss_map[rspq_map[i]] == 0xff)
1426                         adap->rrss_map[rspq_map[i]] = i;
1427
1428         t3_config_rss(adap, F_RQFEEDBACKENABLE | F_TNLLKPEN | F_TNLMAPEN |
1429                       F_TNLPRTEN | F_TNL2TUPEN | F_TNL4TUPEN | F_OFDMAPEN |
1430                       F_RRCPLMAPEN | V_RRCPLCPUSIZE(6) | F_HASHTOEPLITZ,
1431                       cpus, rspq_map);
1432
1433 }
1434
1435 /*
1436  * Sends an mbuf to an offload queue driver
1437  * after dealing with any active network taps.
1438  */
1439 static inline int
1440 offload_tx(struct t3cdev *tdev, struct mbuf *m)
1441 {
1442         int ret;
1443
1444         ret = t3_offload_tx(tdev, m);
1445         return (ret);
1446 }
1447
1448 static int
1449 write_smt_entry(struct adapter *adapter, int idx)
1450 {
1451         struct port_info *pi = &adapter->port[idx];
1452         struct cpl_smt_write_req *req;
1453         struct mbuf *m;
1454
1455         if ((m = m_gethdr(M_NOWAIT, MT_DATA)) == NULL)
1456                 return (ENOMEM);
1457
1458         req = mtod(m, struct cpl_smt_write_req *);
1459         m->m_pkthdr.len = m->m_len = sizeof(struct cpl_smt_write_req);
1460         
1461         req->wr.wrh_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD));
1462         OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_SMT_WRITE_REQ, idx));
1463         req->mtu_idx = NMTUS - 1;  /* should be 0 but there's a T3 bug */
1464         req->iff = idx;
1465         memset(req->src_mac1, 0, sizeof(req->src_mac1));
1466         memcpy(req->src_mac0, pi->hw_addr, ETHER_ADDR_LEN);
1467
1468         m_set_priority(m, 1);
1469
1470         offload_tx(&adapter->tdev, m);
1471
1472         return (0);
1473 }
1474
1475 static int
1476 init_smt(struct adapter *adapter)
1477 {
1478         int i;
1479
1480         for_each_port(adapter, i)
1481                 write_smt_entry(adapter, i);
1482         return 0;
1483 }
1484
1485 static void
1486 init_port_mtus(adapter_t *adapter)
1487 {
1488         unsigned int mtus = ETHERMTU | (ETHERMTU << 16);
1489
1490         t3_write_reg(adapter, A_TP_MTU_PORT_TABLE, mtus);
1491 }
1492
1493 static void
1494 send_pktsched_cmd(struct adapter *adap, int sched, int qidx, int lo,
1495                               int hi, int port)
1496 {
1497         struct mbuf *m;
1498         struct mngt_pktsched_wr *req;
1499
1500         m = m_gethdr(M_DONTWAIT, MT_DATA);
1501         if (m) {        
1502                 req = mtod(m, struct mngt_pktsched_wr *);
1503                 req->wr.wrh_hi = htonl(V_WR_OP(FW_WROPCODE_MNGT));
1504                 req->mngt_opcode = FW_MNGTOPCODE_PKTSCHED_SET;
1505                 req->sched = sched;
1506                 req->idx = qidx;
1507                 req->min = lo;
1508                 req->max = hi;
1509                 req->binding = port;
1510                 m->m_len = m->m_pkthdr.len = sizeof(*req);
1511                 t3_mgmt_tx(adap, m);
1512         }
1513 }
1514
1515 static void
1516 bind_qsets(adapter_t *sc)
1517 {
1518         int i, j;
1519
1520         for (i = 0; i < (sc)->params.nports; ++i) {
1521                 const struct port_info *pi = adap2pinfo(sc, i);
1522
1523                 for (j = 0; j < pi->nqsets; ++j) {
1524                         send_pktsched_cmd(sc, 1, pi->first_qset + j, -1,
1525                                           -1, pi->tx_chan);
1526
1527                 }
1528         }
1529 }
1530
1531 static void
1532 update_tpeeprom(struct adapter *adap)
1533 {
1534         const struct firmware *tpeeprom;
1535
1536         uint32_t version;
1537         unsigned int major, minor;
1538         int ret, len;
1539         char rev, name[32];
1540
1541         t3_seeprom_read(adap, TP_SRAM_OFFSET, &version);
1542
1543         major = G_TP_VERSION_MAJOR(version);
1544         minor = G_TP_VERSION_MINOR(version);
1545         if (major == TP_VERSION_MAJOR  && minor == TP_VERSION_MINOR)
1546                 return; 
1547
1548         rev = t3rev2char(adap);
1549         snprintf(name, sizeof(name), TPEEPROM_NAME, rev);
1550
1551         tpeeprom = firmware_get(name);
1552         if (tpeeprom == NULL) {
1553                 device_printf(adap->dev,
1554                               "could not load TP EEPROM: unable to load %s\n",
1555                               name);
1556                 return;
1557         }
1558
1559         len = tpeeprom->datasize - 4;
1560         
1561         ret = t3_check_tpsram(adap, tpeeprom->data, tpeeprom->datasize);
1562         if (ret)
1563                 goto release_tpeeprom;
1564
1565         if (len != TP_SRAM_LEN) {
1566                 device_printf(adap->dev,
1567                               "%s length is wrong len=%d expected=%d\n", name,
1568                               len, TP_SRAM_LEN);
1569                 return;
1570         }
1571         
1572         ret = set_eeprom(&adap->port[0], tpeeprom->data, tpeeprom->datasize,
1573             TP_SRAM_OFFSET);
1574         
1575         if (!ret) {
1576                 device_printf(adap->dev,
1577                         "Protocol SRAM image updated in EEPROM to %d.%d.%d\n",
1578                          TP_VERSION_MAJOR, TP_VERSION_MINOR, TP_VERSION_MICRO);
1579         } else 
1580                 device_printf(adap->dev,
1581                               "Protocol SRAM image update in EEPROM failed\n");
1582
1583 release_tpeeprom:
1584         firmware_put(tpeeprom, FIRMWARE_UNLOAD);
1585         
1586         return;
1587 }
1588
1589 static int
1590 update_tpsram(struct adapter *adap)
1591 {
1592         const struct firmware *tpsram;
1593         int ret;
1594         char rev, name[32];
1595
1596         rev = t3rev2char(adap);
1597         snprintf(name, sizeof(name), TPSRAM_NAME, rev);
1598
1599         update_tpeeprom(adap);
1600
1601         tpsram = firmware_get(name);
1602         if (tpsram == NULL){
1603                 device_printf(adap->dev, "could not load TP SRAM\n");
1604                 return (EINVAL);
1605         } else
1606                 device_printf(adap->dev, "updating TP SRAM\n");
1607         
1608         ret = t3_check_tpsram(adap, tpsram->data, tpsram->datasize);
1609         if (ret)
1610                 goto release_tpsram;    
1611
1612         ret = t3_set_proto_sram(adap, tpsram->data);
1613         if (ret)
1614                 device_printf(adap->dev, "loading protocol SRAM failed\n");
1615
1616 release_tpsram:
1617         firmware_put(tpsram, FIRMWARE_UNLOAD);
1618         
1619         return ret;
1620 }
1621
1622 /**
1623  *      cxgb_up - enable the adapter
1624  *      @adap: adapter being enabled
1625  *
1626  *      Called when the first port is enabled, this function performs the
1627  *      actions necessary to make an adapter operational, such as completing
1628  *      the initialization of HW modules, and enabling interrupts.
1629  */
1630 static int
1631 cxgb_up(struct adapter *sc)
1632 {
1633         int err = 0;
1634         unsigned int mxf = t3_mc5_size(&sc->mc5) - MC5_MIN_TIDS;
1635
1636         KASSERT(sc->open_device_map == 0, ("%s: device(s) already open (%x)",
1637                                            __func__, sc->open_device_map));
1638
1639         if ((sc->flags & FULL_INIT_DONE) == 0) {
1640
1641                 ADAPTER_LOCK_ASSERT_NOTOWNED(sc);
1642
1643                 if ((sc->flags & FW_UPTODATE) == 0)
1644                         if ((err = upgrade_fw(sc)))
1645                                 goto out;
1646
1647                 if ((sc->flags & TPS_UPTODATE) == 0)
1648                         if ((err = update_tpsram(sc)))
1649                                 goto out;
1650
1651                 if (is_offload(sc) && nfilters != 0) {
1652                         sc->params.mc5.nservers = 0;
1653
1654                         if (nfilters < 0)
1655                                 sc->params.mc5.nfilters = mxf;
1656                         else
1657                                 sc->params.mc5.nfilters = min(nfilters, mxf);
1658                 }
1659
1660                 err = t3_init_hw(sc, 0);
1661                 if (err)
1662                         goto out;
1663
1664                 t3_set_reg_field(sc, A_TP_PARA_REG5, 0, F_RXDDPOFFINIT);
1665                 t3_write_reg(sc, A_ULPRX_TDDP_PSZ, V_HPZ0(PAGE_SHIFT - 12));
1666
1667                 err = setup_sge_qsets(sc);
1668                 if (err)
1669                         goto out;
1670
1671                 alloc_filters(sc);
1672                 setup_rss(sc);
1673
1674                 t3_intr_clear(sc);
1675                 err = cxgb_setup_interrupts(sc);
1676                 if (err)
1677                         goto out;
1678
1679                 t3_add_configured_sysctls(sc);
1680                 sc->flags |= FULL_INIT_DONE;
1681         }
1682
1683         t3_intr_clear(sc);
1684         t3_sge_start(sc);
1685         t3_intr_enable(sc);
1686
1687         if (sc->params.rev >= T3_REV_C && !(sc->flags & TP_PARITY_INIT) &&
1688             is_offload(sc) && init_tp_parity(sc) == 0)
1689                 sc->flags |= TP_PARITY_INIT;
1690
1691         if (sc->flags & TP_PARITY_INIT) {
1692                 t3_write_reg(sc, A_TP_INT_CAUSE, F_CMCACHEPERR | F_ARPLUTPERR);
1693                 t3_write_reg(sc, A_TP_INT_ENABLE, 0x7fbfffff);
1694         }
1695         
1696         if (!(sc->flags & QUEUES_BOUND)) {
1697                 bind_qsets(sc);
1698                 setup_hw_filters(sc);
1699                 sc->flags |= QUEUES_BOUND;              
1700         }
1701
1702         t3_sge_reset_adapter(sc);
1703 out:
1704         return (err);
1705 }
1706
1707 /*
1708  * Called when the last open device is closed.  Does NOT undo all of cxgb_up's
1709  * work.  Specifically, the resources grabbed under FULL_INIT_DONE are released
1710  * during controller_detach, not here.
1711  */
1712 static void
1713 cxgb_down(struct adapter *sc)
1714 {
1715         t3_sge_stop(sc);
1716         t3_intr_disable(sc);
1717 }
1718
1719 static int
1720 offload_open(struct port_info *pi)
1721 {
1722         struct adapter *sc = pi->adapter;
1723         struct t3cdev *tdev = &sc->tdev;
1724
1725         setbit(&sc->open_device_map, OFFLOAD_DEVMAP_BIT);
1726
1727         t3_tp_set_offload_mode(sc, 1);
1728         tdev->lldev = pi->ifp;
1729         init_port_mtus(sc);
1730         t3_load_mtus(sc, sc->params.mtus, sc->params.a_wnd, sc->params.b_wnd,
1731                      sc->params.rev == 0 ?  sc->port[0].ifp->if_mtu : 0xffff);
1732         init_smt(sc);
1733         cxgb_add_clients(tdev);
1734
1735         return (0);
1736 }
1737
1738 static int
1739 offload_close(struct t3cdev *tdev)
1740 {
1741         struct adapter *adapter = tdev2adap(tdev);
1742
1743         if (!isset(&adapter->open_device_map, OFFLOAD_DEVMAP_BIT))
1744                 return (0);
1745
1746         /* Call back all registered clients */
1747         cxgb_remove_clients(tdev);
1748
1749         tdev->lldev = NULL;
1750         cxgb_set_dummy_ops(tdev);
1751         t3_tp_set_offload_mode(adapter, 0);
1752
1753         clrbit(&adapter->open_device_map, OFFLOAD_DEVMAP_BIT);
1754
1755         return (0);
1756 }
1757
1758 /*
1759  * if_init for cxgb ports.
1760  */
1761 static void
1762 cxgb_init(void *arg)
1763 {
1764         struct port_info *p = arg;
1765         struct adapter *sc = p->adapter;
1766
1767         ADAPTER_LOCK(sc);
1768         cxgb_init_locked(p); /* releases adapter lock */
1769         ADAPTER_LOCK_ASSERT_NOTOWNED(sc);
1770 }
1771
1772 static int
1773 cxgb_init_locked(struct port_info *p)
1774 {
1775         struct adapter *sc = p->adapter;
1776         struct ifnet *ifp = p->ifp;
1777         struct cmac *mac = &p->mac;
1778         int i, rc = 0, may_sleep = 0, gave_up_lock = 0;
1779
1780         ADAPTER_LOCK_ASSERT_OWNED(sc);
1781
1782         while (!IS_DOOMED(p) && IS_BUSY(sc)) {
1783                 gave_up_lock = 1;
1784                 if (mtx_sleep(&sc->flags, &sc->lock, PCATCH, "cxgbinit", 0)) {
1785                         rc = EINTR;
1786                         goto done;
1787                 }
1788         }
1789         if (IS_DOOMED(p)) {
1790                 rc = ENXIO;
1791                 goto done;
1792         }
1793         KASSERT(!IS_BUSY(sc), ("%s: controller busy.", __func__));
1794
1795         /*
1796          * The code that runs during one-time adapter initialization can sleep
1797          * so it's important not to hold any locks across it.
1798          */
1799         may_sleep = sc->flags & FULL_INIT_DONE ? 0 : 1;
1800
1801         if (may_sleep) {
1802                 SET_BUSY(sc);
1803                 gave_up_lock = 1;
1804                 ADAPTER_UNLOCK(sc);
1805         }
1806
1807         if (sc->open_device_map == 0) {
1808                 if ((rc = cxgb_up(sc)) != 0)
1809                         goto done;
1810
1811                 if (is_offload(sc) && !ofld_disable && offload_open(p))
1812                         log(LOG_WARNING,
1813                             "Could not initialize offload capabilities\n");
1814         }
1815
1816         PORT_LOCK(p);
1817         if (isset(&sc->open_device_map, p->port_id) &&
1818             (ifp->if_drv_flags & IFF_DRV_RUNNING)) {
1819                 PORT_UNLOCK(p);
1820                 goto done;
1821         }
1822         t3_port_intr_enable(sc, p->port_id);
1823         if (!mac->multiport) 
1824                 t3_mac_init(mac);
1825         cxgb_update_mac_settings(p);
1826         t3_link_start(&p->phy, mac, &p->link_config);
1827         t3_mac_enable(mac, MAC_DIRECTION_RX | MAC_DIRECTION_TX);
1828         ifp->if_drv_flags |= IFF_DRV_RUNNING;
1829         ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
1830         PORT_UNLOCK(p);
1831
1832         for (i = p->first_qset; i < p->first_qset + p->nqsets; i++) {
1833                 struct sge_qset *qs = &sc->sge.qs[i];
1834                 struct sge_txq *txq = &qs->txq[TXQ_ETH];
1835
1836                 callout_reset_on(&txq->txq_watchdog, hz, cxgb_tx_watchdog, qs,
1837                                  txq->txq_watchdog.c_cpu);
1838         }
1839
1840         /* all ok */
1841         setbit(&sc->open_device_map, p->port_id);
1842         callout_reset(&p->link_check_ch,
1843             p->phy.caps & SUPPORTED_LINK_IRQ ?  hz * 3 : hz / 4,
1844             link_check_callout, p);
1845
1846 done:
1847         if (may_sleep) {
1848                 ADAPTER_LOCK(sc);
1849                 KASSERT(IS_BUSY(sc), ("%s: controller not busy.", __func__));
1850                 CLR_BUSY(sc);
1851         }
1852         if (gave_up_lock)
1853                 wakeup_one(&sc->flags);
1854         ADAPTER_UNLOCK(sc);
1855         return (rc);
1856 }
1857
1858 static int
1859 cxgb_uninit_locked(struct port_info *p)
1860 {
1861         struct adapter *sc = p->adapter;
1862         int rc;
1863
1864         ADAPTER_LOCK_ASSERT_OWNED(sc);
1865
1866         while (!IS_DOOMED(p) && IS_BUSY(sc)) {
1867                 if (mtx_sleep(&sc->flags, &sc->lock, PCATCH, "cxgbunin", 0)) {
1868                         rc = EINTR;
1869                         goto done;
1870                 }
1871         }
1872         if (IS_DOOMED(p)) {
1873                 rc = ENXIO;
1874                 goto done;
1875         }
1876         KASSERT(!IS_BUSY(sc), ("%s: controller busy.", __func__));
1877         SET_BUSY(sc);
1878         ADAPTER_UNLOCK(sc);
1879
1880         rc = cxgb_uninit_synchronized(p);
1881
1882         ADAPTER_LOCK(sc);
1883         KASSERT(IS_BUSY(sc), ("%s: controller not busy.", __func__));
1884         CLR_BUSY(sc);
1885         wakeup_one(&sc->flags);
1886 done:
1887         ADAPTER_UNLOCK(sc);
1888         return (rc);
1889 }
1890
1891 /*
1892  * Called on "ifconfig down", and from port_detach
1893  */
1894 static int
1895 cxgb_uninit_synchronized(struct port_info *pi)
1896 {
1897         struct adapter *sc = pi->adapter;
1898         struct ifnet *ifp = pi->ifp;
1899
1900         /*
1901          * taskqueue_drain may cause a deadlock if the adapter lock is held.
1902          */
1903         ADAPTER_LOCK_ASSERT_NOTOWNED(sc);
1904
1905         /*
1906          * Clear this port's bit from the open device map, and then drain all
1907          * the tasks that can access/manipulate this port's port_info or ifp.
1908          * We disable this port's interrupts here and so the slow/ext
1909          * interrupt tasks won't be enqueued.  The tick task will continue to
1910          * be enqueued every second but the runs after this drain will not see
1911          * this port in the open device map.
1912          *
1913          * A well behaved task must take open_device_map into account and ignore
1914          * ports that are not open.
1915          */
1916         clrbit(&sc->open_device_map, pi->port_id);
1917         t3_port_intr_disable(sc, pi->port_id);
1918         taskqueue_drain(sc->tq, &sc->slow_intr_task);
1919         taskqueue_drain(sc->tq, &sc->tick_task);
1920
1921         callout_drain(&pi->link_check_ch);
1922         taskqueue_drain(sc->tq, &pi->link_check_task);
1923
1924         PORT_LOCK(pi);
1925         ifp->if_drv_flags &= ~(IFF_DRV_RUNNING | IFF_DRV_OACTIVE);
1926
1927         /* disable pause frames */
1928         t3_set_reg_field(sc, A_XGM_TX_CFG + pi->mac.offset, F_TXPAUSEEN, 0);
1929
1930         /* Reset RX FIFO HWM */
1931         t3_set_reg_field(sc, A_XGM_RXFIFO_CFG +  pi->mac.offset,
1932                          V_RXFIFOPAUSEHWM(M_RXFIFOPAUSEHWM), 0);
1933
1934         DELAY(100 * 1000);
1935
1936         /* Wait for TXFIFO empty */
1937         t3_wait_op_done(sc, A_XGM_TXFIFO_CFG + pi->mac.offset,
1938                         F_TXFIFO_EMPTY, 1, 20, 5);
1939
1940         DELAY(100 * 1000);
1941         t3_mac_disable(&pi->mac, MAC_DIRECTION_RX);
1942
1943
1944         pi->phy.ops->power_down(&pi->phy, 1);
1945
1946         PORT_UNLOCK(pi);
1947
1948         pi->link_config.link_ok = 0;
1949         t3_os_link_changed(sc, pi->port_id, 0, 0, 0, 0, 0);
1950
1951         if ((sc->open_device_map & PORT_MASK) == 0)
1952                 offload_close(&sc->tdev);
1953
1954         if (sc->open_device_map == 0)
1955                 cxgb_down(pi->adapter);
1956
1957         return (0);
1958 }
1959
1960 /*
1961  * Mark lro enabled or disabled in all qsets for this port
1962  */
1963 static int
1964 cxgb_set_lro(struct port_info *p, int enabled)
1965 {
1966         int i;
1967         struct adapter *adp = p->adapter;
1968         struct sge_qset *q;
1969
1970         for (i = 0; i < p->nqsets; i++) {
1971                 q = &adp->sge.qs[p->first_qset + i];
1972                 q->lro.enabled = (enabled != 0);
1973         }
1974         return (0);
1975 }
1976
1977 static int
1978 cxgb_ioctl(struct ifnet *ifp, unsigned long command, caddr_t data)
1979 {
1980         struct port_info *p = ifp->if_softc;
1981         struct adapter *sc = p->adapter;
1982         struct ifreq *ifr = (struct ifreq *)data;
1983         int flags, error = 0, mtu;
1984         uint32_t mask;
1985
1986         switch (command) {
1987         case SIOCSIFMTU:
1988                 ADAPTER_LOCK(sc);
1989                 error = IS_DOOMED(p) ? ENXIO : (IS_BUSY(sc) ? EBUSY : 0);
1990                 if (error) {
1991 fail:
1992                         ADAPTER_UNLOCK(sc);
1993                         return (error);
1994                 }
1995
1996                 mtu = ifr->ifr_mtu;
1997                 if ((mtu < ETHERMIN) || (mtu > ETHERMTU_JUMBO)) {
1998                         error = EINVAL;
1999                 } else {
2000                         ifp->if_mtu = mtu;
2001                         PORT_LOCK(p);
2002                         cxgb_update_mac_settings(p);
2003                         PORT_UNLOCK(p);
2004                 }
2005                 ADAPTER_UNLOCK(sc);
2006                 break;
2007         case SIOCSIFFLAGS:
2008                 ADAPTER_LOCK(sc);
2009                 if (IS_DOOMED(p)) {
2010                         error = ENXIO;
2011                         goto fail;
2012                 }
2013                 if (ifp->if_flags & IFF_UP) {
2014                         if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
2015                                 flags = p->if_flags;
2016                                 if (((ifp->if_flags ^ flags) & IFF_PROMISC) ||
2017                                     ((ifp->if_flags ^ flags) & IFF_ALLMULTI)) {
2018                                         if (IS_BUSY(sc)) {
2019                                                 error = EBUSY;
2020                                                 goto fail;
2021                                         }
2022                                         PORT_LOCK(p);
2023                                         cxgb_update_mac_settings(p);
2024                                         PORT_UNLOCK(p);
2025                                 }
2026                                 ADAPTER_UNLOCK(sc);
2027                         } else
2028                                 error = cxgb_init_locked(p);
2029                         p->if_flags = ifp->if_flags;
2030                 } else if (ifp->if_drv_flags & IFF_DRV_RUNNING)
2031                         error = cxgb_uninit_locked(p);
2032                 else
2033                         ADAPTER_UNLOCK(sc);
2034
2035                 ADAPTER_LOCK_ASSERT_NOTOWNED(sc);
2036                 break;
2037         case SIOCADDMULTI:
2038         case SIOCDELMULTI:
2039                 ADAPTER_LOCK(sc);
2040                 error = IS_DOOMED(p) ? ENXIO : (IS_BUSY(sc) ? EBUSY : 0);
2041                 if (error)
2042                         goto fail;
2043
2044                 if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
2045                         PORT_LOCK(p);
2046                         cxgb_update_mac_settings(p);
2047                         PORT_UNLOCK(p);
2048                 }
2049                 ADAPTER_UNLOCK(sc);
2050
2051                 break;
2052         case SIOCSIFCAP:
2053                 ADAPTER_LOCK(sc);
2054                 error = IS_DOOMED(p) ? ENXIO : (IS_BUSY(sc) ? EBUSY : 0);
2055                 if (error)
2056                         goto fail;
2057
2058                 mask = ifr->ifr_reqcap ^ ifp->if_capenable;
2059                 if (mask & IFCAP_TXCSUM) {
2060                         ifp->if_capenable ^= IFCAP_TXCSUM;
2061                         ifp->if_hwassist ^= (CSUM_TCP | CSUM_UDP | CSUM_IP);
2062
2063                         if (IFCAP_TSO & ifp->if_capenable &&
2064                             !(IFCAP_TXCSUM & ifp->if_capenable)) {
2065                                 ifp->if_capenable &= ~IFCAP_TSO;
2066                                 ifp->if_hwassist &= ~CSUM_TSO;
2067                                 if_printf(ifp,
2068                                     "tso disabled due to -txcsum.\n");
2069                         }
2070                 }
2071                 if (mask & IFCAP_RXCSUM)
2072                         ifp->if_capenable ^= IFCAP_RXCSUM;
2073                 if (mask & IFCAP_TSO4) {
2074                         ifp->if_capenable ^= IFCAP_TSO4;
2075
2076                         if (IFCAP_TSO & ifp->if_capenable) {
2077                                 if (IFCAP_TXCSUM & ifp->if_capenable)
2078                                         ifp->if_hwassist |= CSUM_TSO;
2079                                 else {
2080                                         ifp->if_capenable &= ~IFCAP_TSO;
2081                                         ifp->if_hwassist &= ~CSUM_TSO;
2082                                         if_printf(ifp,
2083                                             "enable txcsum first.\n");
2084                                         error = EAGAIN;
2085                                 }
2086                         } else
2087                                 ifp->if_hwassist &= ~CSUM_TSO;
2088                 }
2089                 if (mask & IFCAP_LRO) {
2090                         ifp->if_capenable ^= IFCAP_LRO;
2091
2092                         /* Safe to do this even if cxgb_up not called yet */
2093                         cxgb_set_lro(p, ifp->if_capenable & IFCAP_LRO);
2094                 }
2095                 if (mask & IFCAP_VLAN_HWTAGGING) {
2096                         ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING;
2097                         if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
2098                                 PORT_LOCK(p);
2099                                 cxgb_update_mac_settings(p);
2100                                 PORT_UNLOCK(p);
2101                         }
2102                 }
2103                 if (mask & IFCAP_VLAN_MTU) {
2104                         ifp->if_capenable ^= IFCAP_VLAN_MTU;
2105                         if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
2106                                 PORT_LOCK(p);
2107                                 cxgb_update_mac_settings(p);
2108                                 PORT_UNLOCK(p);
2109                         }
2110                 }
2111                 if (mask & IFCAP_VLAN_HWTSO)
2112                         ifp->if_capenable ^= IFCAP_VLAN_HWTSO;
2113                 if (mask & IFCAP_VLAN_HWCSUM)
2114                         ifp->if_capenable ^= IFCAP_VLAN_HWCSUM;
2115
2116 #ifdef VLAN_CAPABILITIES
2117                 VLAN_CAPABILITIES(ifp);
2118 #endif
2119                 ADAPTER_UNLOCK(sc);
2120                 break;
2121         case SIOCSIFMEDIA:
2122         case SIOCGIFMEDIA:
2123                 error = ifmedia_ioctl(ifp, ifr, &p->media, command);
2124                 break;
2125         default:
2126                 error = ether_ioctl(ifp, command, data);
2127         }
2128
2129         return (error);
2130 }
2131
2132 static int
2133 cxgb_media_change(struct ifnet *ifp)
2134 {
2135         return (EOPNOTSUPP);
2136 }
2137
2138 /*
2139  * Translates phy->modtype to the correct Ethernet media subtype.
2140  */
2141 static int
2142 cxgb_ifm_type(int mod)
2143 {
2144         switch (mod) {
2145         case phy_modtype_sr:
2146                 return (IFM_10G_SR);
2147         case phy_modtype_lr:
2148                 return (IFM_10G_LR);
2149         case phy_modtype_lrm:
2150                 return (IFM_10G_LRM);
2151         case phy_modtype_twinax:
2152                 return (IFM_10G_TWINAX);
2153         case phy_modtype_twinax_long:
2154                 return (IFM_10G_TWINAX_LONG);
2155         case phy_modtype_none:
2156                 return (IFM_NONE);
2157         case phy_modtype_unknown:
2158                 return (IFM_UNKNOWN);
2159         }
2160
2161         KASSERT(0, ("%s: modtype %d unknown", __func__, mod));
2162         return (IFM_UNKNOWN);
2163 }
2164
2165 /*
2166  * Rebuilds the ifmedia list for this port, and sets the current media.
2167  */
2168 static void
2169 cxgb_build_medialist(struct port_info *p)
2170 {
2171         struct cphy *phy = &p->phy;
2172         struct ifmedia *media = &p->media;
2173         int mod = phy->modtype;
2174         int m = IFM_ETHER | IFM_FDX;
2175
2176         PORT_LOCK(p);
2177
2178         ifmedia_removeall(media);
2179         if (phy->caps & SUPPORTED_TP && phy->caps & SUPPORTED_Autoneg) {
2180                 /* Copper (RJ45) */
2181
2182                 if (phy->caps & SUPPORTED_10000baseT_Full)
2183                         ifmedia_add(media, m | IFM_10G_T, mod, NULL);
2184
2185                 if (phy->caps & SUPPORTED_1000baseT_Full)
2186                         ifmedia_add(media, m | IFM_1000_T, mod, NULL);
2187
2188                 if (phy->caps & SUPPORTED_100baseT_Full)
2189                         ifmedia_add(media, m | IFM_100_TX, mod, NULL);
2190
2191                 if (phy->caps & SUPPORTED_10baseT_Full)
2192                         ifmedia_add(media, m | IFM_10_T, mod, NULL);
2193
2194                 ifmedia_add(media, IFM_ETHER | IFM_AUTO, mod, NULL);
2195                 ifmedia_set(media, IFM_ETHER | IFM_AUTO);
2196
2197         } else if (phy->caps & SUPPORTED_TP) {
2198                 /* Copper (CX4) */
2199
2200                 KASSERT(phy->caps & SUPPORTED_10000baseT_Full,
2201                         ("%s: unexpected cap 0x%x", __func__, phy->caps));
2202
2203                 ifmedia_add(media, m | IFM_10G_CX4, mod, NULL);
2204                 ifmedia_set(media, m | IFM_10G_CX4);
2205
2206         } else if (phy->caps & SUPPORTED_FIBRE &&
2207                    phy->caps & SUPPORTED_10000baseT_Full) {
2208                 /* 10G optical (but includes SFP+ twinax) */
2209
2210                 m |= cxgb_ifm_type(mod);
2211                 if (IFM_SUBTYPE(m) == IFM_NONE)
2212                         m &= ~IFM_FDX;
2213
2214                 ifmedia_add(media, m, mod, NULL);
2215                 ifmedia_set(media, m);
2216
2217         } else if (phy->caps & SUPPORTED_FIBRE &&
2218                    phy->caps & SUPPORTED_1000baseT_Full) {
2219                 /* 1G optical */
2220
2221                 /* XXX: Lie and claim to be SX, could actually be any 1G-X */
2222                 ifmedia_add(media, m | IFM_1000_SX, mod, NULL);
2223                 ifmedia_set(media, m | IFM_1000_SX);
2224
2225         } else {
2226                 KASSERT(0, ("%s: don't know how to handle 0x%x.", __func__,
2227                             phy->caps));
2228         }
2229
2230         PORT_UNLOCK(p);
2231 }
2232
2233 static void
2234 cxgb_media_status(struct ifnet *ifp, struct ifmediareq *ifmr)
2235 {
2236         struct port_info *p = ifp->if_softc;
2237         struct ifmedia_entry *cur = p->media.ifm_cur;
2238         int speed = p->link_config.speed;
2239
2240         if (cur->ifm_data != p->phy.modtype) {
2241                 cxgb_build_medialist(p);
2242                 cur = p->media.ifm_cur;
2243         }
2244
2245         ifmr->ifm_status = IFM_AVALID;
2246         if (!p->link_config.link_ok)
2247                 return;
2248
2249         ifmr->ifm_status |= IFM_ACTIVE;
2250
2251         /*
2252          * active and current will differ iff current media is autoselect.  That
2253          * can happen only for copper RJ45.
2254          */
2255         if (IFM_SUBTYPE(cur->ifm_media) != IFM_AUTO)
2256                 return;
2257         KASSERT(p->phy.caps & SUPPORTED_TP && p->phy.caps & SUPPORTED_Autoneg,
2258                 ("%s: unexpected PHY caps 0x%x", __func__, p->phy.caps));
2259
2260         ifmr->ifm_active = IFM_ETHER | IFM_FDX;
2261         if (speed == SPEED_10000)
2262                 ifmr->ifm_active |= IFM_10G_T;
2263         else if (speed == SPEED_1000)
2264                 ifmr->ifm_active |= IFM_1000_T;
2265         else if (speed == SPEED_100)
2266                 ifmr->ifm_active |= IFM_100_TX;
2267         else if (speed == SPEED_10)
2268                 ifmr->ifm_active |= IFM_10_T;
2269         else
2270                 KASSERT(0, ("%s: link up but speed unknown (%u)", __func__,
2271                             speed));
2272 }
2273
2274 static void
2275 cxgb_async_intr(void *data)
2276 {
2277         adapter_t *sc = data;
2278
2279         t3_write_reg(sc, A_PL_INT_ENABLE0, 0);
2280         (void) t3_read_reg(sc, A_PL_INT_ENABLE0);
2281         taskqueue_enqueue(sc->tq, &sc->slow_intr_task);
2282 }
2283
2284 static void
2285 link_check_callout(void *arg)
2286 {
2287         struct port_info *pi = arg;
2288         struct adapter *sc = pi->adapter;
2289
2290         if (!isset(&sc->open_device_map, pi->port_id))
2291                 return;
2292
2293         taskqueue_enqueue(sc->tq, &pi->link_check_task);
2294 }
2295
2296 static void
2297 check_link_status(void *arg, int pending)
2298 {
2299         struct port_info *pi = arg;
2300         struct adapter *sc = pi->adapter;
2301
2302         if (!isset(&sc->open_device_map, pi->port_id))
2303                 return;
2304
2305         t3_link_changed(sc, pi->port_id);
2306
2307         if (pi->link_fault || !(pi->phy.caps & SUPPORTED_LINK_IRQ))
2308                 callout_reset(&pi->link_check_ch, hz, link_check_callout, pi);
2309 }
2310
2311 void
2312 t3_os_link_intr(struct port_info *pi)
2313 {
2314         /*
2315          * Schedule a link check in the near future.  If the link is flapping
2316          * rapidly we'll keep resetting the callout and delaying the check until
2317          * things stabilize a bit.
2318          */
2319         callout_reset(&pi->link_check_ch, hz / 4, link_check_callout, pi);
2320 }
2321
2322 static void
2323 check_t3b2_mac(struct adapter *sc)
2324 {
2325         int i;
2326
2327         if (sc->flags & CXGB_SHUTDOWN)
2328                 return;
2329
2330         for_each_port(sc, i) {
2331                 struct port_info *p = &sc->port[i];
2332                 int status;
2333 #ifdef INVARIANTS
2334                 struct ifnet *ifp = p->ifp;
2335 #endif          
2336
2337                 if (!isset(&sc->open_device_map, p->port_id) || p->link_fault ||
2338                     !p->link_config.link_ok)
2339                         continue;
2340
2341                 KASSERT(ifp->if_drv_flags & IFF_DRV_RUNNING,
2342                         ("%s: state mismatch (drv_flags %x, device_map %x)",
2343                          __func__, ifp->if_drv_flags, sc->open_device_map));
2344
2345                 PORT_LOCK(p);
2346                 status = t3b2_mac_watchdog_task(&p->mac);
2347                 if (status == 1)
2348                         p->mac.stats.num_toggled++;
2349                 else if (status == 2) {
2350                         struct cmac *mac = &p->mac;
2351
2352                         cxgb_update_mac_settings(p);
2353                         t3_link_start(&p->phy, mac, &p->link_config);
2354                         t3_mac_enable(mac, MAC_DIRECTION_RX | MAC_DIRECTION_TX);
2355                         t3_port_intr_enable(sc, p->port_id);
2356                         p->mac.stats.num_resets++;
2357                 }
2358                 PORT_UNLOCK(p);
2359         }
2360 }
2361
2362 static void
2363 cxgb_tick(void *arg)
2364 {
2365         adapter_t *sc = (adapter_t *)arg;
2366
2367         if (sc->flags & CXGB_SHUTDOWN)
2368                 return;
2369
2370         taskqueue_enqueue(sc->tq, &sc->tick_task);      
2371         callout_reset(&sc->cxgb_tick_ch, hz, cxgb_tick, sc);
2372 }
2373
2374 static void
2375 cxgb_tick_handler(void *arg, int count)
2376 {
2377         adapter_t *sc = (adapter_t *)arg;
2378         const struct adapter_params *p = &sc->params;
2379         int i;
2380         uint32_t cause, reset;
2381
2382         if (sc->flags & CXGB_SHUTDOWN || !(sc->flags & FULL_INIT_DONE))
2383                 return;
2384
2385         if (p->rev == T3_REV_B2 && p->nports < 4 && sc->open_device_map) 
2386                 check_t3b2_mac(sc);
2387
2388         cause = t3_read_reg(sc, A_SG_INT_CAUSE) & (F_RSPQSTARVE | F_FLEMPTY);
2389         if (cause) {
2390                 struct sge_qset *qs = &sc->sge.qs[0];
2391                 uint32_t mask, v;
2392
2393                 v = t3_read_reg(sc, A_SG_RSPQ_FL_STATUS) & ~0xff00;
2394
2395                 mask = 1;
2396                 for (i = 0; i < SGE_QSETS; i++) {
2397                         if (v & mask)
2398                                 qs[i].rspq.starved++;
2399                         mask <<= 1;
2400                 }
2401
2402                 mask <<= SGE_QSETS; /* skip RSPQXDISABLED */
2403
2404                 for (i = 0; i < SGE_QSETS * 2; i++) {
2405                         if (v & mask) {
2406                                 qs[i / 2].fl[i % 2].empty++;
2407                         }
2408                         mask <<= 1;
2409                 }
2410
2411                 /* clear */
2412                 t3_write_reg(sc, A_SG_RSPQ_FL_STATUS, v);
2413                 t3_write_reg(sc, A_SG_INT_CAUSE, cause);
2414         }
2415
2416         for (i = 0; i < sc->params.nports; i++) {
2417                 struct port_info *pi = &sc->port[i];
2418                 struct ifnet *ifp = pi->ifp;
2419                 struct cmac *mac = &pi->mac;
2420                 struct mac_stats *mstats = &mac->stats;
2421                 int drops, j;
2422
2423                 if (!isset(&sc->open_device_map, pi->port_id))
2424                         continue;
2425
2426                 PORT_LOCK(pi);
2427                 t3_mac_update_stats(mac);
2428                 PORT_UNLOCK(pi);
2429
2430                 ifp->if_opackets = mstats->tx_frames;
2431                 ifp->if_ipackets = mstats->rx_frames;
2432                 ifp->if_obytes = mstats->tx_octets;
2433                 ifp->if_ibytes = mstats->rx_octets;
2434                 ifp->if_omcasts = mstats->tx_mcast_frames;
2435                 ifp->if_imcasts = mstats->rx_mcast_frames;
2436                 ifp->if_collisions = mstats->tx_total_collisions;
2437                 ifp->if_iqdrops = mstats->rx_cong_drops;
2438
2439                 drops = 0;
2440                 for (j = pi->first_qset; j < pi->first_qset + pi->nqsets; j++)
2441                         drops += sc->sge.qs[j].txq[TXQ_ETH].txq_mr->br_drops;
2442                 ifp->if_snd.ifq_drops = drops;
2443
2444                 ifp->if_oerrors =
2445                     mstats->tx_excess_collisions +
2446                     mstats->tx_underrun +
2447                     mstats->tx_len_errs +
2448                     mstats->tx_mac_internal_errs +
2449                     mstats->tx_excess_deferral +
2450                     mstats->tx_fcs_errs;
2451                 ifp->if_ierrors =
2452                     mstats->rx_jabber +
2453                     mstats->rx_data_errs +
2454                     mstats->rx_sequence_errs +
2455                     mstats->rx_runt + 
2456                     mstats->rx_too_long +
2457                     mstats->rx_mac_internal_errs +
2458                     mstats->rx_short +
2459                     mstats->rx_fcs_errs;
2460
2461                 if (mac->multiport)
2462                         continue;
2463
2464                 /* Count rx fifo overflows, once per second */
2465                 cause = t3_read_reg(sc, A_XGM_INT_CAUSE + mac->offset);
2466                 reset = 0;
2467                 if (cause & F_RXFIFO_OVERFLOW) {
2468                         mac->stats.rx_fifo_ovfl++;
2469                         reset |= F_RXFIFO_OVERFLOW;
2470                 }
2471                 t3_write_reg(sc, A_XGM_INT_CAUSE + mac->offset, reset);
2472         }
2473 }
2474
2475 static void
2476 touch_bars(device_t dev)
2477 {
2478         /*
2479          * Don't enable yet
2480          */
2481 #if !defined(__LP64__) && 0
2482         u32 v;
2483
2484         pci_read_config_dword(pdev, PCI_BASE_ADDRESS_1, &v);
2485         pci_write_config_dword(pdev, PCI_BASE_ADDRESS_1, v);
2486         pci_read_config_dword(pdev, PCI_BASE_ADDRESS_3, &v);
2487         pci_write_config_dword(pdev, PCI_BASE_ADDRESS_3, v);
2488         pci_read_config_dword(pdev, PCI_BASE_ADDRESS_5, &v);
2489         pci_write_config_dword(pdev, PCI_BASE_ADDRESS_5, v);
2490 #endif
2491 }
2492
2493 static int
2494 set_eeprom(struct port_info *pi, const uint8_t *data, int len, int offset)
2495 {
2496         uint8_t *buf;
2497         int err = 0;
2498         u32 aligned_offset, aligned_len, *p;
2499         struct adapter *adapter = pi->adapter;
2500
2501
2502         aligned_offset = offset & ~3;
2503         aligned_len = (len + (offset & 3) + 3) & ~3;
2504
2505         if (aligned_offset != offset || aligned_len != len) {
2506                 buf = malloc(aligned_len, M_DEVBUF, M_WAITOK|M_ZERO);              
2507                 if (!buf)
2508                         return (ENOMEM);
2509                 err = t3_seeprom_read(adapter, aligned_offset, (u32 *)buf);
2510                 if (!err && aligned_len > 4)
2511                         err = t3_seeprom_read(adapter,
2512                                               aligned_offset + aligned_len - 4,
2513                                               (u32 *)&buf[aligned_len - 4]);
2514                 if (err)
2515                         goto out;
2516                 memcpy(buf + (offset & 3), data, len);
2517         } else
2518                 buf = (uint8_t *)(uintptr_t)data;
2519
2520         err = t3_seeprom_wp(adapter, 0);
2521         if (err)
2522                 goto out;
2523
2524         for (p = (u32 *)buf; !err && aligned_len; aligned_len -= 4, p++) {
2525                 err = t3_seeprom_write(adapter, aligned_offset, *p);
2526                 aligned_offset += 4;
2527         }
2528
2529         if (!err)
2530                 err = t3_seeprom_wp(adapter, 1);
2531 out:
2532         if (buf != data)
2533                 free(buf, M_DEVBUF);
2534         return err;
2535 }
2536
2537
2538 static int
2539 in_range(int val, int lo, int hi)
2540 {
2541         return val < 0 || (val <= hi && val >= lo);
2542 }
2543
2544 static int
2545 cxgb_extension_open(struct cdev *dev, int flags, int fmp, struct thread *td)
2546 {
2547        return (0);
2548 }
2549
2550 static int
2551 cxgb_extension_close(struct cdev *dev, int flags, int fmt, struct thread *td)
2552 {
2553        return (0);
2554 }
2555
2556 static int
2557 cxgb_extension_ioctl(struct cdev *dev, unsigned long cmd, caddr_t data,
2558     int fflag, struct thread *td)
2559 {
2560         int mmd, error = 0;
2561         struct port_info *pi = dev->si_drv1;
2562         adapter_t *sc = pi->adapter;
2563
2564 #ifdef PRIV_SUPPORTED   
2565         if (priv_check(td, PRIV_DRIVER)) {
2566                 if (cxgb_debug) 
2567                         printf("user does not have access to privileged ioctls\n");
2568                 return (EPERM);
2569         }
2570 #else
2571         if (suser(td)) {
2572                 if (cxgb_debug)
2573                         printf("user does not have access to privileged ioctls\n");
2574                 return (EPERM);
2575         }
2576 #endif
2577         
2578         switch (cmd) {
2579         case CHELSIO_GET_MIIREG: {
2580                 uint32_t val;
2581                 struct cphy *phy = &pi->phy;
2582                 struct ch_mii_data *mid = (struct ch_mii_data *)data;
2583                 
2584                 if (!phy->mdio_read)
2585                         return (EOPNOTSUPP);
2586                 if (is_10G(sc)) {
2587                         mmd = mid->phy_id >> 8;
2588                         if (!mmd)
2589                                 mmd = MDIO_DEV_PCS;
2590                         else if (mmd > MDIO_DEV_VEND2)
2591                                 return (EINVAL);
2592
2593                         error = phy->mdio_read(sc, mid->phy_id & 0x1f, mmd,
2594                                              mid->reg_num, &val);
2595                 } else
2596                         error = phy->mdio_read(sc, mid->phy_id & 0x1f, 0,
2597                                              mid->reg_num & 0x1f, &val);
2598                 if (error == 0)
2599                         mid->val_out = val;
2600                 break;
2601         }
2602         case CHELSIO_SET_MIIREG: {
2603                 struct cphy *phy = &pi->phy;
2604                 struct ch_mii_data *mid = (struct ch_mii_data *)data;
2605
2606                 if (!phy->mdio_write)
2607                         return (EOPNOTSUPP);
2608                 if (is_10G(sc)) {
2609                         mmd = mid->phy_id >> 8;
2610                         if (!mmd)
2611                                 mmd = MDIO_DEV_PCS;
2612                         else if (mmd > MDIO_DEV_VEND2)
2613                                 return (EINVAL);
2614                         
2615                         error = phy->mdio_write(sc, mid->phy_id & 0x1f,
2616                                               mmd, mid->reg_num, mid->val_in);
2617                 } else
2618                         error = phy->mdio_write(sc, mid->phy_id & 0x1f, 0,
2619                                               mid->reg_num & 0x1f,
2620                                               mid->val_in);
2621                 break;
2622         }
2623         case CHELSIO_SETREG: {
2624                 struct ch_reg *edata = (struct ch_reg *)data;
2625                 if ((edata->addr & 0x3) != 0 || edata->addr >= sc->mmio_len)
2626                         return (EFAULT);
2627                 t3_write_reg(sc, edata->addr, edata->val);
2628                 break;
2629         }
2630         case CHELSIO_GETREG: {
2631                 struct ch_reg *edata = (struct ch_reg *)data;
2632                 if ((edata->addr & 0x3) != 0 || edata->addr >= sc->mmio_len)
2633                         return (EFAULT);
2634                 edata->val = t3_read_reg(sc, edata->addr);
2635                 break;
2636         }
2637         case CHELSIO_GET_SGE_CONTEXT: {
2638                 struct ch_cntxt *ecntxt = (struct ch_cntxt *)data;
2639                 mtx_lock_spin(&sc->sge.reg_lock);
2640                 switch (ecntxt->cntxt_type) {
2641                 case CNTXT_TYPE_EGRESS:
2642                         error = -t3_sge_read_ecntxt(sc, ecntxt->cntxt_id,
2643                             ecntxt->data);
2644                         break;
2645                 case CNTXT_TYPE_FL:
2646                         error = -t3_sge_read_fl(sc, ecntxt->cntxt_id,
2647                             ecntxt->data);
2648                         break;
2649                 case CNTXT_TYPE_RSP:
2650                         error = -t3_sge_read_rspq(sc, ecntxt->cntxt_id,
2651                             ecntxt->data);
2652                         break;
2653                 case CNTXT_TYPE_CQ:
2654                         error = -t3_sge_read_cq(sc, ecntxt->cntxt_id,
2655                             ecntxt->data);
2656                         break;
2657                 default:
2658                         error = EINVAL;
2659                         break;
2660                 }
2661                 mtx_unlock_spin(&sc->sge.reg_lock);
2662                 break;
2663         }
2664         case CHELSIO_GET_SGE_DESC: {
2665                 struct ch_desc *edesc = (struct ch_desc *)data;
2666                 int ret;
2667                 if (edesc->queue_num >= SGE_QSETS * 6)
2668                         return (EINVAL);
2669                 ret = t3_get_desc(&sc->sge.qs[edesc->queue_num / 6],
2670                     edesc->queue_num % 6, edesc->idx, edesc->data);
2671                 if (ret < 0)
2672                         return (EINVAL);
2673                 edesc->size = ret;
2674                 break;
2675         }
2676         case CHELSIO_GET_QSET_PARAMS: {
2677                 struct qset_params *q;
2678                 struct ch_qset_params *t = (struct ch_qset_params *)data;
2679                 int q1 = pi->first_qset;
2680                 int nqsets = pi->nqsets;
2681                 int i;
2682
2683                 if (t->qset_idx >= nqsets)
2684                         return EINVAL;
2685
2686                 i = q1 + t->qset_idx;
2687                 q = &sc->params.sge.qset[i];
2688                 t->rspq_size   = q->rspq_size;
2689                 t->txq_size[0] = q->txq_size[0];
2690                 t->txq_size[1] = q->txq_size[1];
2691                 t->txq_size[2] = q->txq_size[2];
2692                 t->fl_size[0]  = q->fl_size;
2693                 t->fl_size[1]  = q->jumbo_size;
2694                 t->polling     = q->polling;
2695                 t->lro         = q->lro;
2696                 t->intr_lat    = q->coalesce_usecs;
2697                 t->cong_thres  = q->cong_thres;
2698                 t->qnum        = i;
2699
2700                 if ((sc->flags & FULL_INIT_DONE) == 0)
2701                         t->vector = 0;
2702                 else if (sc->flags & USING_MSIX)
2703                         t->vector = rman_get_start(sc->msix_irq_res[i]);
2704                 else
2705                         t->vector = rman_get_start(sc->irq_res);
2706
2707                 break;
2708         }
2709         case CHELSIO_GET_QSET_NUM: {
2710                 struct ch_reg *edata = (struct ch_reg *)data;
2711                 edata->val = pi->nqsets;
2712                 break;
2713         }
2714         case CHELSIO_LOAD_FW: {
2715                 uint8_t *fw_data;
2716                 uint32_t vers;
2717                 struct ch_mem_range *t = (struct ch_mem_range *)data;
2718
2719                 /*
2720                  * You're allowed to load a firmware only before FULL_INIT_DONE
2721                  *
2722                  * FW_UPTODATE is also set so the rest of the initialization
2723                  * will not overwrite what was loaded here.  This gives you the
2724                  * flexibility to load any firmware (and maybe shoot yourself in
2725                  * the foot).
2726                  */
2727
2728                 ADAPTER_LOCK(sc);
2729                 if (sc->open_device_map || sc->flags & FULL_INIT_DONE) {
2730                         ADAPTER_UNLOCK(sc);
2731                         return (EBUSY);
2732                 }
2733
2734                 fw_data = malloc(t->len, M_DEVBUF, M_NOWAIT);
2735                 if (!fw_data)
2736                         error = ENOMEM;
2737                 else
2738                         error = copyin(t->buf, fw_data, t->len);
2739
2740                 if (!error)
2741                         error = -t3_load_fw(sc, fw_data, t->len);
2742
2743                 if (t3_get_fw_version(sc, &vers) == 0) {
2744                         snprintf(&sc->fw_version[0], sizeof(sc->fw_version),
2745                             "%d.%d.%d", G_FW_VERSION_MAJOR(vers),
2746                             G_FW_VERSION_MINOR(vers), G_FW_VERSION_MICRO(vers));
2747                 }
2748
2749                 if (!error)
2750                         sc->flags |= FW_UPTODATE;
2751
2752                 free(fw_data, M_DEVBUF);
2753                 ADAPTER_UNLOCK(sc);
2754                 break;
2755         }
2756         case CHELSIO_LOAD_BOOT: {
2757                 uint8_t *boot_data;
2758                 struct ch_mem_range *t = (struct ch_mem_range *)data;
2759
2760                 boot_data = malloc(t->len, M_DEVBUF, M_NOWAIT);
2761                 if (!boot_data)
2762                         return ENOMEM;
2763
2764                 error = copyin(t->buf, boot_data, t->len);
2765                 if (!error)
2766                         error = -t3_load_boot(sc, boot_data, t->len);
2767
2768                 free(boot_data, M_DEVBUF);
2769                 break;
2770         }
2771         case CHELSIO_GET_PM: {
2772                 struct ch_pm *m = (struct ch_pm *)data;
2773                 struct tp_params *p = &sc->params.tp;
2774
2775                 if (!is_offload(sc))
2776                         return (EOPNOTSUPP);
2777
2778                 m->tx_pg_sz = p->tx_pg_size;
2779                 m->tx_num_pg = p->tx_num_pgs;
2780                 m->rx_pg_sz  = p->rx_pg_size;
2781                 m->rx_num_pg = p->rx_num_pgs;
2782                 m->pm_total  = p->pmtx_size + p->chan_rx_size * p->nchan;
2783
2784                 break;
2785         }
2786         case CHELSIO_SET_PM: {
2787                 struct ch_pm *m = (struct ch_pm *)data;
2788                 struct tp_params *p = &sc->params.tp;
2789
2790                 if (!is_offload(sc))
2791                         return (EOPNOTSUPP);
2792                 if (sc->flags & FULL_INIT_DONE)
2793                         return (EBUSY);
2794
2795                 if (!m->rx_pg_sz || (m->rx_pg_sz & (m->rx_pg_sz - 1)) ||
2796                     !m->tx_pg_sz || (m->tx_pg_sz & (m->tx_pg_sz - 1)))
2797                         return (EINVAL);        /* not power of 2 */
2798                 if (!(m->rx_pg_sz & 0x14000))
2799                         return (EINVAL);        /* not 16KB or 64KB */
2800                 if (!(m->tx_pg_sz & 0x1554000))
2801                         return (EINVAL);
2802                 if (m->tx_num_pg == -1)
2803                         m->tx_num_pg = p->tx_num_pgs;
2804                 if (m->rx_num_pg == -1)
2805                         m->rx_num_pg = p->rx_num_pgs;
2806                 if (m->tx_num_pg % 24 || m->rx_num_pg % 24)
2807                         return (EINVAL);
2808                 if (m->rx_num_pg * m->rx_pg_sz > p->chan_rx_size ||
2809                     m->tx_num_pg * m->tx_pg_sz > p->chan_tx_size)
2810                         return (EINVAL);
2811
2812                 p->rx_pg_size = m->rx_pg_sz;
2813                 p->tx_pg_size = m->tx_pg_sz;
2814                 p->rx_num_pgs = m->rx_num_pg;
2815                 p->tx_num_pgs = m->tx_num_pg;
2816                 break;
2817         }
2818         case CHELSIO_SETMTUTAB: {
2819                 struct ch_mtus *m = (struct ch_mtus *)data;
2820                 int i;
2821                 
2822                 if (!is_offload(sc))
2823                         return (EOPNOTSUPP);
2824                 if (offload_running(sc))
2825                         return (EBUSY);
2826                 if (m->nmtus != NMTUS)
2827                         return (EINVAL);
2828                 if (m->mtus[0] < 81)         /* accommodate SACK */
2829                         return (EINVAL);
2830                 
2831                 /*
2832                  * MTUs must be in ascending order
2833                  */
2834                 for (i = 1; i < NMTUS; ++i)
2835                         if (m->mtus[i] < m->mtus[i - 1])
2836                                 return (EINVAL);
2837
2838                 memcpy(sc->params.mtus, m->mtus, sizeof(sc->params.mtus));
2839                 break;
2840         }
2841         case CHELSIO_GETMTUTAB: {
2842                 struct ch_mtus *m = (struct ch_mtus *)data;
2843
2844                 if (!is_offload(sc))
2845                         return (EOPNOTSUPP);
2846
2847                 memcpy(m->mtus, sc->params.mtus, sizeof(m->mtus));
2848                 m->nmtus = NMTUS;
2849                 break;
2850         }
2851         case CHELSIO_GET_MEM: {
2852                 struct ch_mem_range *t = (struct ch_mem_range *)data;
2853                 struct mc7 *mem;
2854                 uint8_t *useraddr;
2855                 u64 buf[32];
2856
2857                 /*
2858                  * Use these to avoid modifying len/addr in the return
2859                  * struct
2860                  */
2861                 uint32_t len = t->len, addr = t->addr;
2862
2863                 if (!is_offload(sc))
2864                         return (EOPNOTSUPP);
2865                 if (!(sc->flags & FULL_INIT_DONE))
2866                         return (EIO);         /* need the memory controllers */
2867                 if ((addr & 0x7) || (len & 0x7))
2868                         return (EINVAL);
2869                 if (t->mem_id == MEM_CM)
2870                         mem = &sc->cm;
2871                 else if (t->mem_id == MEM_PMRX)
2872                         mem = &sc->pmrx;
2873                 else if (t->mem_id == MEM_PMTX)
2874                         mem = &sc->pmtx;
2875                 else
2876                         return (EINVAL);
2877
2878                 /*
2879                  * Version scheme:
2880                  * bits 0..9: chip version
2881                  * bits 10..15: chip revision
2882                  */
2883                 t->version = 3 | (sc->params.rev << 10);
2884                 
2885                 /*
2886                  * Read 256 bytes at a time as len can be large and we don't
2887                  * want to use huge intermediate buffers.
2888                  */
2889                 useraddr = (uint8_t *)t->buf; 
2890                 while (len) {
2891                         unsigned int chunk = min(len, sizeof(buf));
2892
2893                         error = t3_mc7_bd_read(mem, addr / 8, chunk / 8, buf);
2894                         if (error)
2895                                 return (-error);
2896                         if (copyout(buf, useraddr, chunk))
2897                                 return (EFAULT);
2898                         useraddr += chunk;
2899                         addr += chunk;
2900                         len -= chunk;
2901                 }
2902                 break;
2903         }
2904         case CHELSIO_READ_TCAM_WORD: {
2905                 struct ch_tcam_word *t = (struct ch_tcam_word *)data;
2906
2907                 if (!is_offload(sc))
2908                         return (EOPNOTSUPP);
2909                 if (!(sc->flags & FULL_INIT_DONE))
2910                         return (EIO);         /* need MC5 */            
2911                 return -t3_read_mc5_range(&sc->mc5, t->addr, 1, t->buf);
2912                 break;
2913         }
2914         case CHELSIO_SET_TRACE_FILTER: {
2915                 struct ch_trace *t = (struct ch_trace *)data;
2916                 const struct trace_params *tp;
2917
2918                 tp = (const struct trace_params *)&t->sip;
2919                 if (t->config_tx)
2920                         t3_config_trace_filter(sc, tp, 0, t->invert_match,
2921                                                t->trace_tx);
2922                 if (t->config_rx)
2923                         t3_config_trace_filter(sc, tp, 1, t->invert_match,
2924                                                t->trace_rx);
2925                 break;
2926         }
2927         case CHELSIO_SET_PKTSCHED: {
2928                 struct ch_pktsched_params *p = (struct ch_pktsched_params *)data;
2929                 if (sc->open_device_map == 0)
2930                         return (EAGAIN);
2931                 send_pktsched_cmd(sc, p->sched, p->idx, p->min, p->max,
2932                     p->binding);
2933                 break;
2934         }
2935         case CHELSIO_IFCONF_GETREGS: {
2936                 struct ch_ifconf_regs *regs = (struct ch_ifconf_regs *)data;
2937                 int reglen = cxgb_get_regs_len();
2938                 uint8_t *buf = malloc(reglen, M_DEVBUF, M_NOWAIT);
2939                 if (buf == NULL) {
2940                         return (ENOMEM);
2941                 }
2942                 if (regs->len > reglen)
2943                         regs->len = reglen;
2944                 else if (regs->len < reglen)
2945                         error = ENOBUFS;
2946
2947                 if (!error) {
2948                         cxgb_get_regs(sc, regs, buf);
2949                         error = copyout(buf, regs->data, reglen);
2950                 }
2951                 free(buf, M_DEVBUF);
2952
2953                 break;
2954         }
2955         case CHELSIO_SET_HW_SCHED: {
2956                 struct ch_hw_sched *t = (struct ch_hw_sched *)data;
2957                 unsigned int ticks_per_usec = core_ticks_per_usec(sc);
2958
2959                 if ((sc->flags & FULL_INIT_DONE) == 0)
2960                         return (EAGAIN);       /* need TP to be initialized */
2961                 if (t->sched >= NTX_SCHED || !in_range(t->mode, 0, 1) ||
2962                     !in_range(t->channel, 0, 1) ||
2963                     !in_range(t->kbps, 0, 10000000) ||
2964                     !in_range(t->class_ipg, 0, 10000 * 65535 / ticks_per_usec) ||
2965                     !in_range(t->flow_ipg, 0,
2966                               dack_ticks_to_usec(sc, 0x7ff)))
2967                         return (EINVAL);
2968
2969                 if (t->kbps >= 0) {
2970                         error = t3_config_sched(sc, t->kbps, t->sched);
2971                         if (error < 0)
2972                                 return (-error);
2973                 }
2974                 if (t->class_ipg >= 0)
2975                         t3_set_sched_ipg(sc, t->sched, t->class_ipg);
2976                 if (t->flow_ipg >= 0) {
2977                         t->flow_ipg *= 1000;     /* us -> ns */
2978                         t3_set_pace_tbl(sc, &t->flow_ipg, t->sched, 1);
2979                 }
2980                 if (t->mode >= 0) {
2981                         int bit = 1 << (S_TX_MOD_TIMER_MODE + t->sched);
2982
2983                         t3_set_reg_field(sc, A_TP_TX_MOD_QUEUE_REQ_MAP,
2984                                          bit, t->mode ? bit : 0);
2985                 }
2986                 if (t->channel >= 0)
2987                         t3_set_reg_field(sc, A_TP_TX_MOD_QUEUE_REQ_MAP,
2988                                          1 << t->sched, t->channel << t->sched);
2989                 break;
2990         }
2991         case CHELSIO_GET_EEPROM: {
2992                 int i;
2993                 struct ch_eeprom *e = (struct ch_eeprom *)data;
2994                 uint8_t *buf = malloc(EEPROMSIZE, M_DEVBUF, M_NOWAIT);
2995
2996                 if (buf == NULL) {
2997                         return (ENOMEM);
2998                 }
2999                 e->magic = EEPROM_MAGIC;
3000                 for (i = e->offset & ~3; !error && i < e->offset + e->len; i += 4)
3001                         error = -t3_seeprom_read(sc, i, (uint32_t *)&buf[i]);
3002
3003                 if (!error)
3004                         error = copyout(buf + e->offset, e->data, e->len);
3005
3006                 free(buf, M_DEVBUF);
3007                 break;
3008         }
3009         case CHELSIO_CLEAR_STATS: {
3010                 if (!(sc->flags & FULL_INIT_DONE))
3011                         return EAGAIN;
3012
3013                 PORT_LOCK(pi);
3014                 t3_mac_update_stats(&pi->mac);
3015                 memset(&pi->mac.stats, 0, sizeof(pi->mac.stats));
3016                 PORT_UNLOCK(pi);
3017                 break;
3018         }
3019         case CHELSIO_GET_UP_LA: {
3020                 struct ch_up_la *la = (struct ch_up_la *)data;
3021                 uint8_t *buf = malloc(LA_BUFSIZE, M_DEVBUF, M_NOWAIT);
3022                 if (buf == NULL) {
3023                         return (ENOMEM);
3024                 }
3025                 if (la->bufsize < LA_BUFSIZE)
3026                         error = ENOBUFS;
3027
3028                 if (!error)
3029                         error = -t3_get_up_la(sc, &la->stopped, &la->idx,
3030                                               &la->bufsize, buf);
3031                 if (!error)
3032                         error = copyout(buf, la->data, la->bufsize);
3033
3034                 free(buf, M_DEVBUF);
3035                 break;
3036         }
3037         case CHELSIO_GET_UP_IOQS: {
3038                 struct ch_up_ioqs *ioqs = (struct ch_up_ioqs *)data;
3039                 uint8_t *buf = malloc(IOQS_BUFSIZE, M_DEVBUF, M_NOWAIT);
3040                 uint32_t *v;
3041
3042                 if (buf == NULL) {
3043                         return (ENOMEM);
3044                 }
3045                 if (ioqs->bufsize < IOQS_BUFSIZE)
3046                         error = ENOBUFS;
3047
3048                 if (!error)
3049                         error = -t3_get_up_ioqs(sc, &ioqs->bufsize, buf);
3050
3051                 if (!error) {
3052                         v = (uint32_t *)buf;
3053
3054                         ioqs->ioq_rx_enable = *v++;
3055                         ioqs->ioq_tx_enable = *v++;
3056                         ioqs->ioq_rx_status = *v++;
3057                         ioqs->ioq_tx_status = *v++;
3058
3059                         error = copyout(v, ioqs->data, ioqs->bufsize);
3060                 }
3061
3062                 free(buf, M_DEVBUF);
3063                 break;
3064         }
3065         case CHELSIO_SET_FILTER: {
3066                 struct ch_filter *f = (struct ch_filter *)data;;
3067                 struct filter_info *p;
3068                 unsigned int nfilters = sc->params.mc5.nfilters;
3069
3070                 if (!is_offload(sc))
3071                         return (EOPNOTSUPP);    /* No TCAM */
3072                 if (!(sc->flags & FULL_INIT_DONE))
3073                         return (EAGAIN);        /* mc5 not setup yet */
3074                 if (nfilters == 0)
3075                         return (EBUSY);         /* TOE will use TCAM */
3076
3077                 /* sanity checks */
3078                 if (f->filter_id >= nfilters ||
3079                     (f->val.dip && f->mask.dip != 0xffffffff) ||
3080                     (f->val.sport && f->mask.sport != 0xffff) ||
3081                     (f->val.dport && f->mask.dport != 0xffff) ||
3082                     (f->val.vlan && f->mask.vlan != 0xfff) ||
3083                     (f->val.vlan_prio &&
3084                         f->mask.vlan_prio != FILTER_NO_VLAN_PRI) ||
3085                     (f->mac_addr_idx != 0xffff && f->mac_addr_idx > 15) ||
3086                     f->qset >= SGE_QSETS ||
3087                     sc->rrss_map[f->qset] >= RSS_TABLE_SIZE)
3088                         return (EINVAL);
3089
3090                 /* Was allocated with M_WAITOK */
3091                 KASSERT(sc->filters, ("filter table NULL\n"));
3092
3093                 p = &sc->filters[f->filter_id];
3094                 if (p->locked)
3095                         return (EPERM);
3096
3097                 bzero(p, sizeof(*p));
3098                 p->sip = f->val.sip;
3099                 p->sip_mask = f->mask.sip;
3100                 p->dip = f->val.dip;
3101                 p->sport = f->val.sport;
3102                 p->dport = f->val.dport;
3103                 p->vlan = f->mask.vlan ? f->val.vlan : 0xfff;
3104                 p->vlan_prio = f->mask.vlan_prio ? (f->val.vlan_prio & 6) :
3105                     FILTER_NO_VLAN_PRI;
3106                 p->mac_hit = f->mac_hit;
3107                 p->mac_vld = f->mac_addr_idx != 0xffff;
3108                 p->mac_idx = f->mac_addr_idx;
3109                 p->pkt_type = f->proto;
3110                 p->report_filter_id = f->want_filter_id;
3111                 p->pass = f->pass;
3112                 p->rss = f->rss;
3113                 p->qset = f->qset;
3114
3115                 error = set_filter(sc, f->filter_id, p);
3116                 if (error == 0)
3117                         p->valid = 1;
3118                 break;
3119         }
3120         case CHELSIO_DEL_FILTER: {
3121                 struct ch_filter *f = (struct ch_filter *)data;
3122                 struct filter_info *p;
3123                 unsigned int nfilters = sc->params.mc5.nfilters;
3124
3125                 if (!is_offload(sc))
3126                         return (EOPNOTSUPP);
3127                 if (!(sc->flags & FULL_INIT_DONE))
3128                         return (EAGAIN);
3129                 if (nfilters == 0 || sc->filters == NULL)
3130                         return (EINVAL);
3131                 if (f->filter_id >= nfilters)
3132                        return (EINVAL);
3133
3134                 p = &sc->filters[f->filter_id];
3135                 if (p->locked)
3136                         return (EPERM);
3137                 if (!p->valid)
3138                         return (EFAULT); /* Read "Bad address" as "Bad index" */
3139
3140                 bzero(p, sizeof(*p));
3141                 p->sip = p->sip_mask = 0xffffffff;
3142                 p->vlan = 0xfff;
3143                 p->vlan_prio = FILTER_NO_VLAN_PRI;
3144                 p->pkt_type = 1;
3145                 error = set_filter(sc, f->filter_id, p);
3146                 break;
3147         }
3148         case CHELSIO_GET_FILTER: {
3149                 struct ch_filter *f = (struct ch_filter *)data;
3150                 struct filter_info *p;
3151                 unsigned int i, nfilters = sc->params.mc5.nfilters;
3152
3153                 if (!is_offload(sc))
3154                         return (EOPNOTSUPP);
3155                 if (!(sc->flags & FULL_INIT_DONE))
3156                         return (EAGAIN);
3157                 if (nfilters == 0 || sc->filters == NULL)
3158                         return (EINVAL);
3159
3160                 i = f->filter_id == 0xffffffff ? 0 : f->filter_id + 1;
3161                 for (; i < nfilters; i++) {
3162                         p = &sc->filters[i];
3163                         if (!p->valid)
3164                                 continue;
3165
3166                         bzero(f, sizeof(*f));
3167
3168                         f->filter_id = i;
3169                         f->val.sip = p->sip;
3170                         f->mask.sip = p->sip_mask;
3171                         f->val.dip = p->dip;
3172                         f->mask.dip = p->dip ? 0xffffffff : 0;
3173                         f->val.sport = p->sport;
3174                         f->mask.sport = p->sport ? 0xffff : 0;
3175                         f->val.dport = p->dport;
3176                         f->mask.dport = p->dport ? 0xffff : 0;
3177                         f->val.vlan = p->vlan == 0xfff ? 0 : p->vlan;
3178                         f->mask.vlan = p->vlan == 0xfff ? 0 : 0xfff;
3179                         f->val.vlan_prio = p->vlan_prio == FILTER_NO_VLAN_PRI ?
3180                             0 : p->vlan_prio;
3181                         f->mask.vlan_prio = p->vlan_prio == FILTER_NO_VLAN_PRI ?
3182                             0 : FILTER_NO_VLAN_PRI;
3183                         f->mac_hit = p->mac_hit;
3184                         f->mac_addr_idx = p->mac_vld ? p->mac_idx : 0xffff;
3185                         f->proto = p->pkt_type;
3186                         f->want_filter_id = p->report_filter_id;
3187                         f->pass = p->pass;
3188                         f->rss = p->rss;
3189                         f->qset = p->qset;
3190
3191                         break;
3192                 }
3193                 
3194                 if (i == nfilters)
3195                         f->filter_id = 0xffffffff;
3196                 break;
3197         }
3198         default:
3199                 return (EOPNOTSUPP);
3200                 break;
3201         }
3202
3203         return (error);
3204 }
3205
3206 static __inline void
3207 reg_block_dump(struct adapter *ap, uint8_t *buf, unsigned int start,
3208     unsigned int end)
3209 {
3210         uint32_t *p = (uint32_t *)(buf + start);
3211
3212         for ( ; start <= end; start += sizeof(uint32_t))
3213                 *p++ = t3_read_reg(ap, start);
3214 }
3215
3216 #define T3_REGMAP_SIZE (3 * 1024)
3217 static int
3218 cxgb_get_regs_len(void)
3219 {
3220         return T3_REGMAP_SIZE;
3221 }
3222
3223 static void
3224 cxgb_get_regs(adapter_t *sc, struct ch_ifconf_regs *regs, uint8_t *buf)
3225 {           
3226         
3227         /*
3228          * Version scheme:
3229          * bits 0..9: chip version
3230          * bits 10..15: chip revision
3231          * bit 31: set for PCIe cards
3232          */
3233         regs->version = 3 | (sc->params.rev << 10) | (is_pcie(sc) << 31);
3234
3235         /*
3236          * We skip the MAC statistics registers because they are clear-on-read.
3237          * Also reading multi-register stats would need to synchronize with the
3238          * periodic mac stats accumulation.  Hard to justify the complexity.
3239          */
3240         memset(buf, 0, cxgb_get_regs_len());
3241         reg_block_dump(sc, buf, 0, A_SG_RSPQ_CREDIT_RETURN);
3242         reg_block_dump(sc, buf, A_SG_HI_DRB_HI_THRSH, A_ULPRX_PBL_ULIMIT);
3243         reg_block_dump(sc, buf, A_ULPTX_CONFIG, A_MPS_INT_CAUSE);
3244         reg_block_dump(sc, buf, A_CPL_SWITCH_CNTRL, A_CPL_MAP_TBL_DATA);
3245         reg_block_dump(sc, buf, A_SMB_GLOBAL_TIME_CFG, A_XGM_SERDES_STAT3);
3246         reg_block_dump(sc, buf, A_XGM_SERDES_STATUS0,
3247                        XGM_REG(A_XGM_SERDES_STAT3, 1));
3248         reg_block_dump(sc, buf, XGM_REG(A_XGM_SERDES_STATUS0, 1),
3249                        XGM_REG(A_XGM_RX_SPI4_SOP_EOP_CNT, 1));
3250 }
3251
3252 static int
3253 alloc_filters(struct adapter *sc)
3254 {
3255         struct filter_info *p;
3256         unsigned int nfilters = sc->params.mc5.nfilters;
3257
3258         if (nfilters == 0)
3259                 return (0);
3260
3261         p = malloc(sizeof(*p) * nfilters, M_DEVBUF, M_WAITOK | M_ZERO);
3262         sc->filters = p;
3263
3264         p = &sc->filters[nfilters - 1];
3265         p->vlan = 0xfff;
3266         p->vlan_prio = FILTER_NO_VLAN_PRI;
3267         p->pass = p->rss = p->valid = p->locked = 1;
3268
3269         return (0);
3270 }
3271
3272 static int
3273 setup_hw_filters(struct adapter *sc)
3274 {
3275         int i, rc;
3276         unsigned int nfilters = sc->params.mc5.nfilters;
3277
3278         if (!sc->filters)
3279                 return (0);
3280
3281         t3_enable_filters(sc);
3282
3283         for (i = rc = 0; i < nfilters && !rc; i++) {
3284                 if (sc->filters[i].locked)
3285                         rc = set_filter(sc, i, &sc->filters[i]);
3286         }
3287
3288         return (rc);
3289 }
3290
3291 static int
3292 set_filter(struct adapter *sc, int id, const struct filter_info *f)
3293 {
3294         int len;
3295         struct mbuf *m;
3296         struct ulp_txpkt *txpkt;
3297         struct work_request_hdr *wr;
3298         struct cpl_pass_open_req *oreq;
3299         struct cpl_set_tcb_field *sreq;
3300
3301         len = sizeof(*wr) + sizeof(*oreq) + 2 * sizeof(*sreq);
3302         KASSERT(len <= MHLEN, ("filter request too big for an mbuf"));
3303
3304         id += t3_mc5_size(&sc->mc5) - sc->params.mc5.nroutes -
3305               sc->params.mc5.nfilters;
3306
3307         m = m_gethdr(M_WAITOK, MT_DATA);
3308         m->m_len = m->m_pkthdr.len = len;
3309         bzero(mtod(m, char *), len);
3310
3311         wr = mtod(m, struct work_request_hdr *);
3312         wr->wrh_hi = htonl(V_WR_OP(FW_WROPCODE_BYPASS) | F_WR_ATOMIC);
3313
3314         oreq = (struct cpl_pass_open_req *)(wr + 1);
3315         txpkt = (struct ulp_txpkt *)oreq;
3316         txpkt->cmd_dest = htonl(V_ULPTX_CMD(ULP_TXPKT));
3317         txpkt->len = htonl(V_ULPTX_NFLITS(sizeof(*oreq) / 8));
3318         OPCODE_TID(oreq) = htonl(MK_OPCODE_TID(CPL_PASS_OPEN_REQ, id));
3319         oreq->local_port = htons(f->dport);
3320         oreq->peer_port = htons(f->sport);
3321         oreq->local_ip = htonl(f->dip);
3322         oreq->peer_ip = htonl(f->sip);
3323         oreq->peer_netmask = htonl(f->sip_mask);
3324         oreq->opt0h = 0;
3325         oreq->opt0l = htonl(F_NO_OFFLOAD);
3326         oreq->opt1 = htonl(V_MAC_MATCH_VALID(f->mac_vld) |
3327                          V_CONN_POLICY(CPL_CONN_POLICY_FILTER) |
3328                          V_VLAN_PRI(f->vlan_prio >> 1) |
3329                          V_VLAN_PRI_VALID(f->vlan_prio != FILTER_NO_VLAN_PRI) |
3330                          V_PKT_TYPE(f->pkt_type) | V_OPT1_VLAN(f->vlan) |
3331                          V_MAC_MATCH(f->mac_idx | (f->mac_hit << 4)));
3332
3333         sreq = (struct cpl_set_tcb_field *)(oreq + 1);
3334         set_tcb_field_ulp(sreq, id, 1, 0x1800808000ULL,
3335                           (f->report_filter_id << 15) | (1 << 23) |
3336                           ((u64)f->pass << 35) | ((u64)!f->rss << 36));
3337         set_tcb_field_ulp(sreq + 1, id, 0, 0xffffffff, (2 << 19) | 1);
3338         t3_mgmt_tx(sc, m);
3339
3340         if (f->pass && !f->rss) {
3341                 len = sizeof(*sreq);
3342                 m = m_gethdr(M_WAITOK, MT_DATA);
3343                 m->m_len = m->m_pkthdr.len = len;
3344                 bzero(mtod(m, char *), len);
3345                 sreq = mtod(m, struct cpl_set_tcb_field *);
3346                 sreq->wr.wrh_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD));
3347                 mk_set_tcb_field(sreq, id, 25, 0x3f80000,
3348                                  (u64)sc->rrss_map[f->qset] << 19);
3349                 t3_mgmt_tx(sc, m);
3350         }
3351         return 0;
3352 }
3353
3354 static inline void
3355 mk_set_tcb_field(struct cpl_set_tcb_field *req, unsigned int tid,
3356     unsigned int word, u64 mask, u64 val)
3357 {
3358         OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_SET_TCB_FIELD, tid));
3359         req->reply = V_NO_REPLY(1);
3360         req->cpu_idx = 0;
3361         req->word = htons(word);
3362         req->mask = htobe64(mask);
3363         req->val = htobe64(val);
3364 }
3365
3366 static inline void
3367 set_tcb_field_ulp(struct cpl_set_tcb_field *req, unsigned int tid,
3368     unsigned int word, u64 mask, u64 val)
3369 {
3370         struct ulp_txpkt *txpkt = (struct ulp_txpkt *)req;
3371
3372         txpkt->cmd_dest = htonl(V_ULPTX_CMD(ULP_TXPKT));
3373         txpkt->len = htonl(V_ULPTX_NFLITS(sizeof(*req) / 8));
3374         mk_set_tcb_field(req, tid, word, mask, val);
3375 }