]> CyberLeo.Net >> Repos - FreeBSD/stable/8.git/blob - sys/dev/cxgb/cxgb_main.c
Merge r240680 from head:
[FreeBSD/stable/8.git] / sys / dev / cxgb / cxgb_main.c
1 /**************************************************************************
2
3 Copyright (c) 2007-2009, Chelsio Inc.
4 All rights reserved.
5
6 Redistribution and use in source and binary forms, with or without
7 modification, are permitted provided that the following conditions are met:
8
9  1. Redistributions of source code must retain the above copyright notice,
10     this list of conditions and the following disclaimer.
11
12  2. Neither the name of the Chelsio Corporation nor the names of its
13     contributors may be used to endorse or promote products derived from
14     this software without specific prior written permission.
15
16 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
17 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
20 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
21 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
22 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
23 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
24 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
25 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
26 POSSIBILITY OF SUCH DAMAGE.
27
28 ***************************************************************************/
29
30 #include <sys/cdefs.h>
31 __FBSDID("$FreeBSD$");
32
33 #include <sys/param.h>
34 #include <sys/systm.h>
35 #include <sys/kernel.h>
36 #include <sys/bus.h>
37 #include <sys/module.h>
38 #include <sys/pciio.h>
39 #include <sys/conf.h>
40 #include <machine/bus.h>
41 #include <machine/resource.h>
42 #include <sys/bus_dma.h>
43 #include <sys/ktr.h>
44 #include <sys/rman.h>
45 #include <sys/ioccom.h>
46 #include <sys/mbuf.h>
47 #include <sys/linker.h>
48 #include <sys/firmware.h>
49 #include <sys/socket.h>
50 #include <sys/sockio.h>
51 #include <sys/smp.h>
52 #include <sys/sysctl.h>
53 #include <sys/syslog.h>
54 #include <sys/queue.h>
55 #include <sys/taskqueue.h>
56 #include <sys/proc.h>
57
58 #include <net/bpf.h>
59 #include <net/ethernet.h>
60 #include <net/if.h>
61 #include <net/if_arp.h>
62 #include <net/if_dl.h>
63 #include <net/if_media.h>
64 #include <net/if_types.h>
65 #include <net/if_vlan_var.h>
66
67 #include <netinet/in_systm.h>
68 #include <netinet/in.h>
69 #include <netinet/if_ether.h>
70 #include <netinet/ip.h>
71 #include <netinet/ip.h>
72 #include <netinet/tcp.h>
73 #include <netinet/udp.h>
74
75 #include <dev/pci/pcireg.h>
76 #include <dev/pci/pcivar.h>
77 #include <dev/pci/pci_private.h>
78
79 #include <cxgb_include.h>
80
81 #ifdef PRIV_SUPPORTED
82 #include <sys/priv.h>
83 #endif
84
85 static int cxgb_setup_interrupts(adapter_t *);
86 static void cxgb_teardown_interrupts(adapter_t *);
87 static void cxgb_init(void *);
88 static int cxgb_init_locked(struct port_info *);
89 static int cxgb_uninit_locked(struct port_info *);
90 static int cxgb_uninit_synchronized(struct port_info *);
91 static int cxgb_ioctl(struct ifnet *, unsigned long, caddr_t);
92 static int cxgb_media_change(struct ifnet *);
93 static int cxgb_ifm_type(int);
94 static void cxgb_build_medialist(struct port_info *);
95 static void cxgb_media_status(struct ifnet *, struct ifmediareq *);
96 static int setup_sge_qsets(adapter_t *);
97 static void cxgb_async_intr(void *);
98 static void cxgb_tick_handler(void *, int);
99 static void cxgb_tick(void *);
100 static void link_check_callout(void *);
101 static void check_link_status(void *, int);
102 static void setup_rss(adapter_t *sc);
103 static int alloc_filters(struct adapter *);
104 static int setup_hw_filters(struct adapter *);
105 static int set_filter(struct adapter *, int, const struct filter_info *);
106 static inline void mk_set_tcb_field(struct cpl_set_tcb_field *, unsigned int,
107     unsigned int, u64, u64);
108 static inline void set_tcb_field_ulp(struct cpl_set_tcb_field *, unsigned int,
109     unsigned int, u64, u64);
110
111 /* Attachment glue for the PCI controller end of the device.  Each port of
112  * the device is attached separately, as defined later.
113  */
114 static int cxgb_controller_probe(device_t);
115 static int cxgb_controller_attach(device_t);
116 static int cxgb_controller_detach(device_t);
117 static void cxgb_free(struct adapter *);
118 static __inline void reg_block_dump(struct adapter *ap, uint8_t *buf, unsigned int start,
119     unsigned int end);
120 static void cxgb_get_regs(adapter_t *sc, struct ch_ifconf_regs *regs, uint8_t *buf);
121 static int cxgb_get_regs_len(void);
122 static int offload_open(struct port_info *pi);
123 static void touch_bars(device_t dev);
124 static int offload_close(struct t3cdev *tdev);
125 static void cxgb_update_mac_settings(struct port_info *p);
126
127 static device_method_t cxgb_controller_methods[] = {
128         DEVMETHOD(device_probe,         cxgb_controller_probe),
129         DEVMETHOD(device_attach,        cxgb_controller_attach),
130         DEVMETHOD(device_detach,        cxgb_controller_detach),
131
132         DEVMETHOD_END
133 };
134
135 static driver_t cxgb_controller_driver = {
136         "cxgbc",
137         cxgb_controller_methods,
138         sizeof(struct adapter)
139 };
140
141 static devclass_t       cxgb_controller_devclass;
142 DRIVER_MODULE(cxgbc, pci, cxgb_controller_driver, cxgb_controller_devclass, 0, 0);
143
144 /*
145  * Attachment glue for the ports.  Attachment is done directly to the
146  * controller device.
147  */
148 static int cxgb_port_probe(device_t);
149 static int cxgb_port_attach(device_t);
150 static int cxgb_port_detach(device_t);
151
152 static device_method_t cxgb_port_methods[] = {
153         DEVMETHOD(device_probe,         cxgb_port_probe),
154         DEVMETHOD(device_attach,        cxgb_port_attach),
155         DEVMETHOD(device_detach,        cxgb_port_detach),
156         { 0, 0 }
157 };
158
159 static driver_t cxgb_port_driver = {
160         "cxgb",
161         cxgb_port_methods,
162         0
163 };
164
165 static d_ioctl_t cxgb_extension_ioctl;
166 static d_open_t cxgb_extension_open;
167 static d_close_t cxgb_extension_close;
168
169 static struct cdevsw cxgb_cdevsw = {
170        .d_version =    D_VERSION,
171        .d_flags =      0,
172        .d_open =       cxgb_extension_open,
173        .d_close =      cxgb_extension_close,
174        .d_ioctl =      cxgb_extension_ioctl,
175        .d_name =       "cxgb",
176 };
177
178 static devclass_t       cxgb_port_devclass;
179 DRIVER_MODULE(cxgb, cxgbc, cxgb_port_driver, cxgb_port_devclass, 0, 0);
180
181 /*
182  * The driver uses the best interrupt scheme available on a platform in the
183  * order MSI-X, MSI, legacy pin interrupts.  This parameter determines which
184  * of these schemes the driver may consider as follows:
185  *
186  * msi = 2: choose from among all three options
187  * msi = 1 : only consider MSI and pin interrupts
188  * msi = 0: force pin interrupts
189  */
190 static int msi_allowed = 2;
191
192 TUNABLE_INT("hw.cxgb.msi_allowed", &msi_allowed);
193 SYSCTL_NODE(_hw, OID_AUTO, cxgb, CTLFLAG_RD, 0, "CXGB driver parameters");
194 SYSCTL_UINT(_hw_cxgb, OID_AUTO, msi_allowed, CTLFLAG_RDTUN, &msi_allowed, 0,
195     "MSI-X, MSI, INTx selector");
196
197 /*
198  * The driver enables offload as a default.
199  * To disable it, use ofld_disable = 1.
200  */
201 static int ofld_disable = 0;
202 TUNABLE_INT("hw.cxgb.ofld_disable", &ofld_disable);
203 SYSCTL_UINT(_hw_cxgb, OID_AUTO, ofld_disable, CTLFLAG_RDTUN, &ofld_disable, 0,
204     "disable ULP offload");
205
206 /*
207  * The driver uses an auto-queue algorithm by default.
208  * To disable it and force a single queue-set per port, use multiq = 0
209  */
210 static int multiq = 1;
211 TUNABLE_INT("hw.cxgb.multiq", &multiq);
212 SYSCTL_UINT(_hw_cxgb, OID_AUTO, multiq, CTLFLAG_RDTUN, &multiq, 0,
213     "use min(ncpus/ports, 8) queue-sets per port");
214
215 /*
216  * By default the driver will not update the firmware unless
217  * it was compiled against a newer version
218  * 
219  */
220 static int force_fw_update = 0;
221 TUNABLE_INT("hw.cxgb.force_fw_update", &force_fw_update);
222 SYSCTL_UINT(_hw_cxgb, OID_AUTO, force_fw_update, CTLFLAG_RDTUN, &force_fw_update, 0,
223     "update firmware even if up to date");
224
225 int cxgb_use_16k_clusters = -1;
226 TUNABLE_INT("hw.cxgb.use_16k_clusters", &cxgb_use_16k_clusters);
227 SYSCTL_INT(_hw_cxgb, OID_AUTO, use_16k_clusters, CTLFLAG_RDTUN,
228     &cxgb_use_16k_clusters, 0, "use 16kB clusters for the jumbo queue ");
229
230 static int nfilters = -1;
231 TUNABLE_INT("hw.cxgb.nfilters", &nfilters);
232 SYSCTL_INT(_hw_cxgb, OID_AUTO, nfilters, CTLFLAG_RDTUN,
233     &nfilters, 0, "max number of entries in the filter table");
234
235 enum {
236         MAX_TXQ_ENTRIES      = 16384,
237         MAX_CTRL_TXQ_ENTRIES = 1024,
238         MAX_RSPQ_ENTRIES     = 16384,
239         MAX_RX_BUFFERS       = 16384,
240         MAX_RX_JUMBO_BUFFERS = 16384,
241         MIN_TXQ_ENTRIES      = 4,
242         MIN_CTRL_TXQ_ENTRIES = 4,
243         MIN_RSPQ_ENTRIES     = 32,
244         MIN_FL_ENTRIES       = 32,
245         MIN_FL_JUMBO_ENTRIES = 32
246 };
247
248 struct filter_info {
249         u32 sip;
250         u32 sip_mask;
251         u32 dip;
252         u16 sport;
253         u16 dport;
254         u32 vlan:12;
255         u32 vlan_prio:3;
256         u32 mac_hit:1;
257         u32 mac_idx:4;
258         u32 mac_vld:1;
259         u32 pkt_type:2;
260         u32 report_filter_id:1;
261         u32 pass:1;
262         u32 rss:1;
263         u32 qset:3;
264         u32 locked:1;
265         u32 valid:1;
266 };
267
268 enum { FILTER_NO_VLAN_PRI = 7 };
269
270 #define EEPROM_MAGIC 0x38E2F10C
271
272 #define PORT_MASK ((1 << MAX_NPORTS) - 1)
273
274 /* Table for probing the cards.  The desc field isn't actually used */
275 struct cxgb_ident {
276         uint16_t        vendor;
277         uint16_t        device;
278         int             index;
279         char            *desc;
280 } cxgb_identifiers[] = {
281         {PCI_VENDOR_ID_CHELSIO, 0x0020, 0, "PE9000"},
282         {PCI_VENDOR_ID_CHELSIO, 0x0021, 1, "T302E"},
283         {PCI_VENDOR_ID_CHELSIO, 0x0022, 2, "T310E"},
284         {PCI_VENDOR_ID_CHELSIO, 0x0023, 3, "T320X"},
285         {PCI_VENDOR_ID_CHELSIO, 0x0024, 1, "T302X"},
286         {PCI_VENDOR_ID_CHELSIO, 0x0025, 3, "T320E"},
287         {PCI_VENDOR_ID_CHELSIO, 0x0026, 2, "T310X"},
288         {PCI_VENDOR_ID_CHELSIO, 0x0030, 2, "T3B10"},
289         {PCI_VENDOR_ID_CHELSIO, 0x0031, 3, "T3B20"},
290         {PCI_VENDOR_ID_CHELSIO, 0x0032, 1, "T3B02"},
291         {PCI_VENDOR_ID_CHELSIO, 0x0033, 4, "T3B04"},
292         {PCI_VENDOR_ID_CHELSIO, 0x0035, 6, "T3C10"},
293         {PCI_VENDOR_ID_CHELSIO, 0x0036, 3, "S320E-CR"},
294         {PCI_VENDOR_ID_CHELSIO, 0x0037, 7, "N320E-G2"},
295         {0, 0, 0, NULL}
296 };
297
298 static int set_eeprom(struct port_info *pi, const uint8_t *data, int len, int offset);
299
300
301 static __inline char
302 t3rev2char(struct adapter *adapter)
303 {
304         char rev = 'z';
305
306         switch(adapter->params.rev) {
307         case T3_REV_A:
308                 rev = 'a';
309                 break;
310         case T3_REV_B:
311         case T3_REV_B2:
312                 rev = 'b';
313                 break;
314         case T3_REV_C:
315                 rev = 'c';
316                 break;
317         }
318         return rev;
319 }
320
321 static struct cxgb_ident *
322 cxgb_get_ident(device_t dev)
323 {
324         struct cxgb_ident *id;
325
326         for (id = cxgb_identifiers; id->desc != NULL; id++) {
327                 if ((id->vendor == pci_get_vendor(dev)) &&
328                     (id->device == pci_get_device(dev))) {
329                         return (id);
330                 }
331         }
332         return (NULL);
333 }
334
335 static const struct adapter_info *
336 cxgb_get_adapter_info(device_t dev)
337 {
338         struct cxgb_ident *id;
339         const struct adapter_info *ai;
340
341         id = cxgb_get_ident(dev);
342         if (id == NULL)
343                 return (NULL);
344
345         ai = t3_get_adapter_info(id->index);
346
347         return (ai);
348 }
349
350 static int
351 cxgb_controller_probe(device_t dev)
352 {
353         const struct adapter_info *ai;
354         char *ports, buf[80];
355         int nports;
356
357         ai = cxgb_get_adapter_info(dev);
358         if (ai == NULL)
359                 return (ENXIO);
360
361         nports = ai->nports0 + ai->nports1;
362         if (nports == 1)
363                 ports = "port";
364         else
365                 ports = "ports";
366
367         snprintf(buf, sizeof(buf), "%s, %d %s", ai->desc, nports, ports);
368         device_set_desc_copy(dev, buf);
369         return (BUS_PROBE_DEFAULT);
370 }
371
372 #define FW_FNAME "cxgb_t3fw"
373 #define TPEEPROM_NAME "cxgb_t3%c_tp_eeprom"
374 #define TPSRAM_NAME "cxgb_t3%c_protocol_sram"
375
376 static int
377 upgrade_fw(adapter_t *sc)
378 {
379         const struct firmware *fw;
380         int status;
381         u32 vers;
382         
383         if ((fw = firmware_get(FW_FNAME)) == NULL)  {
384                 device_printf(sc->dev, "Could not find firmware image %s\n", FW_FNAME);
385                 return (ENOENT);
386         } else
387                 device_printf(sc->dev, "installing firmware on card\n");
388         status = t3_load_fw(sc, (const uint8_t *)fw->data, fw->datasize);
389
390         if (status != 0) {
391                 device_printf(sc->dev, "failed to install firmware: %d\n",
392                     status);
393         } else {
394                 t3_get_fw_version(sc, &vers);
395                 snprintf(&sc->fw_version[0], sizeof(sc->fw_version), "%d.%d.%d",
396                     G_FW_VERSION_MAJOR(vers), G_FW_VERSION_MINOR(vers),
397                     G_FW_VERSION_MICRO(vers));
398         }
399
400         firmware_put(fw, FIRMWARE_UNLOAD);
401
402         return (status);        
403 }
404
405 /*
406  * The cxgb_controller_attach function is responsible for the initial
407  * bringup of the device.  Its responsibilities include:
408  *
409  *  1. Determine if the device supports MSI or MSI-X.
410  *  2. Allocate bus resources so that we can access the Base Address Register
411  *  3. Create and initialize mutexes for the controller and its control
412  *     logic such as SGE and MDIO.
413  *  4. Call hardware specific setup routine for the adapter as a whole.
414  *  5. Allocate the BAR for doing MSI-X.
415  *  6. Setup the line interrupt iff MSI-X is not supported.
416  *  7. Create the driver's taskq.
417  *  8. Start one task queue service thread.
418  *  9. Check if the firmware and SRAM are up-to-date.  They will be
419  *     auto-updated later (before FULL_INIT_DONE), if required.
420  * 10. Create a child device for each MAC (port)
421  * 11. Initialize T3 private state.
422  * 12. Trigger the LED
423  * 13. Setup offload iff supported.
424  * 14. Reset/restart the tick callout.
425  * 15. Attach sysctls
426  *
427  * NOTE: Any modification or deviation from this list MUST be reflected in
428  * the above comment.  Failure to do so will result in problems on various
429  * error conditions including link flapping.
430  */
431 static int
432 cxgb_controller_attach(device_t dev)
433 {
434         device_t child;
435         const struct adapter_info *ai;
436         struct adapter *sc;
437         int i, error = 0;
438         uint32_t vers;
439         int port_qsets = 1;
440         int msi_needed, reg;
441         char buf[80];
442
443         sc = device_get_softc(dev);
444         sc->dev = dev;
445         sc->msi_count = 0;
446         ai = cxgb_get_adapter_info(dev);
447
448         /* find the PCIe link width and set max read request to 4KB*/
449         if (pci_find_extcap(dev, PCIY_EXPRESS, &reg) == 0) {
450                 uint16_t lnk;
451
452                 lnk = pci_read_config(dev, reg + PCIER_LINK_STA, 2);
453                 sc->link_width = (lnk & PCIEM_LINK_STA_WIDTH) >> 4;
454                 if (sc->link_width < 8 &&
455                     (ai->caps & SUPPORTED_10000baseT_Full)) {
456                         device_printf(sc->dev,
457                             "PCIe x%d Link, expect reduced performance\n",
458                             sc->link_width);
459                 }
460
461                 pci_set_max_read_req(dev, 4096);
462         }
463
464         touch_bars(dev);
465         pci_enable_busmaster(dev);
466         /*
467          * Allocate the registers and make them available to the driver.
468          * The registers that we care about for NIC mode are in BAR 0
469          */
470         sc->regs_rid = PCIR_BAR(0);
471         if ((sc->regs_res = bus_alloc_resource_any(dev, SYS_RES_MEMORY,
472             &sc->regs_rid, RF_ACTIVE)) == NULL) {
473                 device_printf(dev, "Cannot allocate BAR region 0\n");
474                 return (ENXIO);
475         }
476
477         snprintf(sc->lockbuf, ADAPTER_LOCK_NAME_LEN, "cxgb controller lock %d",
478             device_get_unit(dev));
479         ADAPTER_LOCK_INIT(sc, sc->lockbuf);
480
481         snprintf(sc->reglockbuf, ADAPTER_LOCK_NAME_LEN, "SGE reg lock %d",
482             device_get_unit(dev));
483         snprintf(sc->mdiolockbuf, ADAPTER_LOCK_NAME_LEN, "cxgb mdio lock %d",
484             device_get_unit(dev));
485         snprintf(sc->elmerlockbuf, ADAPTER_LOCK_NAME_LEN, "cxgb elmer lock %d",
486             device_get_unit(dev));
487         
488         MTX_INIT(&sc->sge.reg_lock, sc->reglockbuf, NULL, MTX_SPIN);
489         MTX_INIT(&sc->mdio_lock, sc->mdiolockbuf, NULL, MTX_DEF);
490         MTX_INIT(&sc->elmer_lock, sc->elmerlockbuf, NULL, MTX_DEF);
491         
492         sc->bt = rman_get_bustag(sc->regs_res);
493         sc->bh = rman_get_bushandle(sc->regs_res);
494         sc->mmio_len = rman_get_size(sc->regs_res);
495
496         for (i = 0; i < MAX_NPORTS; i++)
497                 sc->port[i].adapter = sc;
498
499         if (t3_prep_adapter(sc, ai, 1) < 0) {
500                 printf("prep adapter failed\n");
501                 error = ENODEV;
502                 goto out;
503         }
504
505         sc->udbs_rid = PCIR_BAR(2);
506         sc->udbs_res = NULL;
507         if (is_offload(sc) &&
508             ((sc->udbs_res = bus_alloc_resource_any(dev, SYS_RES_MEMORY,
509                    &sc->udbs_rid, RF_ACTIVE)) == NULL)) {
510                 device_printf(dev, "Cannot allocate BAR region 1\n");
511                 error = ENXIO;
512                 goto out;
513         }
514
515         /* Allocate the BAR for doing MSI-X.  If it succeeds, try to allocate
516          * enough messages for the queue sets.  If that fails, try falling
517          * back to MSI.  If that fails, then try falling back to the legacy
518          * interrupt pin model.
519          */
520         sc->msix_regs_rid = 0x20;
521         if ((msi_allowed >= 2) &&
522             (sc->msix_regs_res = bus_alloc_resource_any(dev, SYS_RES_MEMORY,
523             &sc->msix_regs_rid, RF_ACTIVE)) != NULL) {
524
525                 if (multiq)
526                         port_qsets = min(SGE_QSETS/sc->params.nports, mp_ncpus);
527                 msi_needed = sc->msi_count = sc->params.nports * port_qsets + 1;
528
529                 if (pci_msix_count(dev) == 0 ||
530                     (error = pci_alloc_msix(dev, &sc->msi_count)) != 0 ||
531                     sc->msi_count != msi_needed) {
532                         device_printf(dev, "alloc msix failed - "
533                                       "msi_count=%d, msi_needed=%d, err=%d; "
534                                       "will try MSI\n", sc->msi_count,
535                                       msi_needed, error);
536                         sc->msi_count = 0;
537                         port_qsets = 1;
538                         pci_release_msi(dev);
539                         bus_release_resource(dev, SYS_RES_MEMORY,
540                             sc->msix_regs_rid, sc->msix_regs_res);
541                         sc->msix_regs_res = NULL;
542                 } else {
543                         sc->flags |= USING_MSIX;
544                         sc->cxgb_intr = cxgb_async_intr;
545                         device_printf(dev,
546                                       "using MSI-X interrupts (%u vectors)\n",
547                                       sc->msi_count);
548                 }
549         }
550
551         if ((msi_allowed >= 1) && (sc->msi_count == 0)) {
552                 sc->msi_count = 1;
553                 if ((error = pci_alloc_msi(dev, &sc->msi_count)) != 0) {
554                         device_printf(dev, "alloc msi failed - "
555                                       "err=%d; will try INTx\n", error);
556                         sc->msi_count = 0;
557                         port_qsets = 1;
558                         pci_release_msi(dev);
559                 } else {
560                         sc->flags |= USING_MSI;
561                         sc->cxgb_intr = t3_intr_msi;
562                         device_printf(dev, "using MSI interrupts\n");
563                 }
564         }
565         if (sc->msi_count == 0) {
566                 device_printf(dev, "using line interrupts\n");
567                 sc->cxgb_intr = t3b_intr;
568         }
569
570         /* Create a private taskqueue thread for handling driver events */
571         sc->tq = taskqueue_create("cxgb_taskq", M_NOWAIT,
572             taskqueue_thread_enqueue, &sc->tq);
573         if (sc->tq == NULL) {
574                 device_printf(dev, "failed to allocate controller task queue\n");
575                 goto out;
576         }
577
578         taskqueue_start_threads(&sc->tq, 1, PI_NET, "%s taskq",
579             device_get_nameunit(dev));
580         TASK_INIT(&sc->tick_task, 0, cxgb_tick_handler, sc);
581
582         
583         /* Create a periodic callout for checking adapter status */
584         callout_init(&sc->cxgb_tick_ch, TRUE);
585         
586         if (t3_check_fw_version(sc) < 0 || force_fw_update) {
587                 /*
588                  * Warn user that a firmware update will be attempted in init.
589                  */
590                 device_printf(dev, "firmware needs to be updated to version %d.%d.%d\n",
591                     FW_VERSION_MAJOR, FW_VERSION_MINOR, FW_VERSION_MICRO);
592                 sc->flags &= ~FW_UPTODATE;
593         } else {
594                 sc->flags |= FW_UPTODATE;
595         }
596
597         if (t3_check_tpsram_version(sc) < 0) {
598                 /*
599                  * Warn user that a firmware update will be attempted in init.
600                  */
601                 device_printf(dev, "SRAM needs to be updated to version %c-%d.%d.%d\n",
602                     t3rev2char(sc), TP_VERSION_MAJOR, TP_VERSION_MINOR, TP_VERSION_MICRO);
603                 sc->flags &= ~TPS_UPTODATE;
604         } else {
605                 sc->flags |= TPS_UPTODATE;
606         }
607         
608         /*
609          * Create a child device for each MAC.  The ethernet attachment
610          * will be done in these children.
611          */     
612         for (i = 0; i < (sc)->params.nports; i++) {
613                 struct port_info *pi;
614                 
615                 if ((child = device_add_child(dev, "cxgb", -1)) == NULL) {
616                         device_printf(dev, "failed to add child port\n");
617                         error = EINVAL;
618                         goto out;
619                 }
620                 pi = &sc->port[i];
621                 pi->adapter = sc;
622                 pi->nqsets = port_qsets;
623                 pi->first_qset = i*port_qsets;
624                 pi->port_id = i;
625                 pi->tx_chan = i >= ai->nports0;
626                 pi->txpkt_intf = pi->tx_chan ? 2 * (i - ai->nports0) + 1 : 2 * i;
627                 sc->rxpkt_map[pi->txpkt_intf] = i;
628                 sc->port[i].tx_chan = i >= ai->nports0;
629                 sc->portdev[i] = child;
630                 device_set_softc(child, pi);
631         }
632         if ((error = bus_generic_attach(dev)) != 0)
633                 goto out;
634
635         /* initialize sge private state */
636         t3_sge_init_adapter(sc);
637
638         t3_led_ready(sc);
639         
640         cxgb_offload_init();
641         if (is_offload(sc)) {
642                 setbit(&sc->registered_device_map, OFFLOAD_DEVMAP_BIT);
643                 cxgb_adapter_ofld(sc);
644         }
645         error = t3_get_fw_version(sc, &vers);
646         if (error)
647                 goto out;
648
649         snprintf(&sc->fw_version[0], sizeof(sc->fw_version), "%d.%d.%d",
650             G_FW_VERSION_MAJOR(vers), G_FW_VERSION_MINOR(vers),
651             G_FW_VERSION_MICRO(vers));
652
653         snprintf(buf, sizeof(buf), "%s %sNIC\t E/C: %s S/N: %s",
654                  ai->desc, is_offload(sc) ? "R" : "",
655                  sc->params.vpd.ec, sc->params.vpd.sn);
656         device_set_desc_copy(dev, buf);
657
658         snprintf(&sc->port_types[0], sizeof(sc->port_types), "%x%x%x%x",
659                  sc->params.vpd.port_type[0], sc->params.vpd.port_type[1],
660                  sc->params.vpd.port_type[2], sc->params.vpd.port_type[3]);
661
662         device_printf(sc->dev, "Firmware Version %s\n", &sc->fw_version[0]);
663         callout_reset(&sc->cxgb_tick_ch, hz, cxgb_tick, sc);
664         t3_add_attach_sysctls(sc);
665
666         t3_intr_clear(sc);
667         error = cxgb_setup_interrupts(sc);
668 out:
669         if (error)
670                 cxgb_free(sc);
671
672         return (error);
673 }
674
675 /*
676  * The cxgb_controller_detach routine is called with the device is
677  * unloaded from the system.
678  */
679
680 static int
681 cxgb_controller_detach(device_t dev)
682 {
683         struct adapter *sc;
684
685         sc = device_get_softc(dev);
686
687         cxgb_free(sc);
688
689         return (0);
690 }
691
692 /*
693  * The cxgb_free() is called by the cxgb_controller_detach() routine
694  * to tear down the structures that were built up in
695  * cxgb_controller_attach(), and should be the final piece of work
696  * done when fully unloading the driver.
697  * 
698  *
699  *  1. Shutting down the threads started by the cxgb_controller_attach()
700  *     routine.
701  *  2. Stopping the lower level device and all callouts (cxgb_down_locked()).
702  *  3. Detaching all of the port devices created during the
703  *     cxgb_controller_attach() routine.
704  *  4. Removing the device children created via cxgb_controller_attach().
705  *  5. Releasing PCI resources associated with the device.
706  *  6. Turning off the offload support, iff it was turned on.
707  *  7. Destroying the mutexes created in cxgb_controller_attach().
708  *
709  */
710 static void
711 cxgb_free(struct adapter *sc)
712 {
713         int i, nqsets = 0;
714
715         ADAPTER_LOCK(sc);
716         sc->flags |= CXGB_SHUTDOWN;
717         ADAPTER_UNLOCK(sc);
718
719         /*
720          * Make sure all child devices are gone.
721          */
722         bus_generic_detach(sc->dev);
723         for (i = 0; i < (sc)->params.nports; i++) {
724                 if (sc->portdev[i] &&
725                     device_delete_child(sc->dev, sc->portdev[i]) != 0)
726                         device_printf(sc->dev, "failed to delete child port\n");
727                 nqsets += sc->port[i].nqsets;
728         }
729
730         /*
731          * At this point, it is as if cxgb_port_detach has run on all ports, and
732          * cxgb_down has run on the adapter.  All interrupts have been silenced,
733          * all open devices have been closed.
734          */
735         KASSERT(sc->open_device_map == 0, ("%s: device(s) still open (%x)",
736                                            __func__, sc->open_device_map));
737         for (i = 0; i < sc->params.nports; i++) {
738                 KASSERT(sc->port[i].ifp == NULL, ("%s: port %i undead!",
739                                                   __func__, i));
740         }
741
742         /*
743          * Finish off the adapter's callouts.
744          */
745         callout_drain(&sc->cxgb_tick_ch);
746         callout_drain(&sc->sge_timer_ch);
747
748         /*
749          * Release resources grabbed under FULL_INIT_DONE by cxgb_up.  The
750          * sysctls are cleaned up by the kernel linker.
751          */
752         if (sc->flags & FULL_INIT_DONE) {
753                 t3_free_sge_resources(sc, nqsets);
754                 sc->flags &= ~FULL_INIT_DONE;
755         }
756
757         /*
758          * Release all interrupt resources.
759          */
760         cxgb_teardown_interrupts(sc);
761         if (sc->flags & (USING_MSI | USING_MSIX)) {
762                 device_printf(sc->dev, "releasing msi message(s)\n");
763                 pci_release_msi(sc->dev);
764         } else {
765                 device_printf(sc->dev, "no msi message to release\n");
766         }
767
768         if (sc->msix_regs_res != NULL) {
769                 bus_release_resource(sc->dev, SYS_RES_MEMORY, sc->msix_regs_rid,
770                     sc->msix_regs_res);
771         }
772
773         /*
774          * Free the adapter's taskqueue.
775          */
776         if (sc->tq != NULL) {
777                 taskqueue_free(sc->tq);
778                 sc->tq = NULL;
779         }
780         
781         if (is_offload(sc)) {
782                 clrbit(&sc->registered_device_map, OFFLOAD_DEVMAP_BIT);
783                 cxgb_adapter_unofld(sc);
784         }
785
786 #ifdef notyet
787         if (sc->flags & CXGB_OFLD_INIT)
788                 cxgb_offload_deactivate(sc);
789 #endif
790         free(sc->filters, M_DEVBUF);
791         t3_sge_free(sc);
792
793         cxgb_offload_exit();
794
795         if (sc->udbs_res != NULL)
796                 bus_release_resource(sc->dev, SYS_RES_MEMORY, sc->udbs_rid,
797                     sc->udbs_res);
798
799         if (sc->regs_res != NULL)
800                 bus_release_resource(sc->dev, SYS_RES_MEMORY, sc->regs_rid,
801                     sc->regs_res);
802
803         MTX_DESTROY(&sc->mdio_lock);
804         MTX_DESTROY(&sc->sge.reg_lock);
805         MTX_DESTROY(&sc->elmer_lock);
806         ADAPTER_LOCK_DEINIT(sc);
807 }
808
809 /**
810  *      setup_sge_qsets - configure SGE Tx/Rx/response queues
811  *      @sc: the controller softc
812  *
813  *      Determines how many sets of SGE queues to use and initializes them.
814  *      We support multiple queue sets per port if we have MSI-X, otherwise
815  *      just one queue set per port.
816  */
817 static int
818 setup_sge_qsets(adapter_t *sc)
819 {
820         int i, j, err, irq_idx = 0, qset_idx = 0;
821         u_int ntxq = SGE_TXQ_PER_SET;
822
823         if ((err = t3_sge_alloc(sc)) != 0) {
824                 device_printf(sc->dev, "t3_sge_alloc returned %d\n", err);
825                 return (err);
826         }
827
828         if (sc->params.rev > 0 && !(sc->flags & USING_MSI))
829                 irq_idx = -1;
830
831         for (i = 0; i < (sc)->params.nports; i++) {
832                 struct port_info *pi = &sc->port[i];
833
834                 for (j = 0; j < pi->nqsets; j++, qset_idx++) {
835                         err = t3_sge_alloc_qset(sc, qset_idx, (sc)->params.nports,
836                             (sc->flags & USING_MSIX) ? qset_idx + 1 : irq_idx,
837                             &sc->params.sge.qset[qset_idx], ntxq, pi);
838                         if (err) {
839                                 t3_free_sge_resources(sc, qset_idx);
840                                 device_printf(sc->dev,
841                                     "t3_sge_alloc_qset failed with %d\n", err);
842                                 return (err);
843                         }
844                 }
845         }
846
847         return (0);
848 }
849
850 static void
851 cxgb_teardown_interrupts(adapter_t *sc)
852 {
853         int i;
854
855         for (i = 0; i < SGE_QSETS; i++) {
856                 if (sc->msix_intr_tag[i] == NULL) {
857
858                         /* Should have been setup fully or not at all */
859                         KASSERT(sc->msix_irq_res[i] == NULL &&
860                                 sc->msix_irq_rid[i] == 0,
861                                 ("%s: half-done interrupt (%d).", __func__, i));
862
863                         continue;
864                 }
865
866                 bus_teardown_intr(sc->dev, sc->msix_irq_res[i],
867                                   sc->msix_intr_tag[i]);
868                 bus_release_resource(sc->dev, SYS_RES_IRQ, sc->msix_irq_rid[i],
869                                      sc->msix_irq_res[i]);
870
871                 sc->msix_irq_res[i] = sc->msix_intr_tag[i] = NULL;
872                 sc->msix_irq_rid[i] = 0;
873         }
874
875         if (sc->intr_tag) {
876                 KASSERT(sc->irq_res != NULL,
877                         ("%s: half-done interrupt.", __func__));
878
879                 bus_teardown_intr(sc->dev, sc->irq_res, sc->intr_tag);
880                 bus_release_resource(sc->dev, SYS_RES_IRQ, sc->irq_rid,
881                                      sc->irq_res);
882
883                 sc->irq_res = sc->intr_tag = NULL;
884                 sc->irq_rid = 0;
885         }
886 }
887
888 static int
889 cxgb_setup_interrupts(adapter_t *sc)
890 {
891         struct resource *res;
892         void *tag;
893         int i, rid, err, intr_flag = sc->flags & (USING_MSI | USING_MSIX);
894
895         sc->irq_rid = intr_flag ? 1 : 0;
896         sc->irq_res = bus_alloc_resource_any(sc->dev, SYS_RES_IRQ, &sc->irq_rid,
897                                              RF_SHAREABLE | RF_ACTIVE);
898         if (sc->irq_res == NULL) {
899                 device_printf(sc->dev, "Cannot allocate interrupt (%x, %u)\n",
900                               intr_flag, sc->irq_rid);
901                 err = EINVAL;
902                 sc->irq_rid = 0;
903         } else {
904                 err = bus_setup_intr(sc->dev, sc->irq_res,
905                     INTR_MPSAFE | INTR_TYPE_NET, NULL,
906                     sc->cxgb_intr, sc, &sc->intr_tag);
907
908                 if (err) {
909                         device_printf(sc->dev,
910                                       "Cannot set up interrupt (%x, %u, %d)\n",
911                                       intr_flag, sc->irq_rid, err);
912                         bus_release_resource(sc->dev, SYS_RES_IRQ, sc->irq_rid,
913                                              sc->irq_res);
914                         sc->irq_res = sc->intr_tag = NULL;
915                         sc->irq_rid = 0;
916                 }
917         }
918
919         /* That's all for INTx or MSI */
920         if (!(intr_flag & USING_MSIX) || err)
921                 return (err);
922
923         bus_describe_intr(sc->dev, sc->irq_res, sc->intr_tag, "err");
924         for (i = 0; i < sc->msi_count - 1; i++) {
925                 rid = i + 2;
926                 res = bus_alloc_resource_any(sc->dev, SYS_RES_IRQ, &rid,
927                                              RF_SHAREABLE | RF_ACTIVE);
928                 if (res == NULL) {
929                         device_printf(sc->dev, "Cannot allocate interrupt "
930                                       "for message %d\n", rid);
931                         err = EINVAL;
932                         break;
933                 }
934
935                 err = bus_setup_intr(sc->dev, res, INTR_MPSAFE | INTR_TYPE_NET,
936                                      NULL, t3_intr_msix, &sc->sge.qs[i], &tag);
937                 if (err) {
938                         device_printf(sc->dev, "Cannot set up interrupt "
939                                       "for message %d (%d)\n", rid, err);
940                         bus_release_resource(sc->dev, SYS_RES_IRQ, rid, res);
941                         break;
942                 }
943
944                 sc->msix_irq_rid[i] = rid;
945                 sc->msix_irq_res[i] = res;
946                 sc->msix_intr_tag[i] = tag;
947                 bus_describe_intr(sc->dev, res, tag, "qs%d", i);
948         }
949
950         if (err)
951                 cxgb_teardown_interrupts(sc);
952
953         return (err);
954 }
955
956
957 static int
958 cxgb_port_probe(device_t dev)
959 {
960         struct port_info *p;
961         char buf[80];
962         const char *desc;
963         
964         p = device_get_softc(dev);
965         desc = p->phy.desc;
966         snprintf(buf, sizeof(buf), "Port %d %s", p->port_id, desc);
967         device_set_desc_copy(dev, buf);
968         return (0);
969 }
970
971
972 static int
973 cxgb_makedev(struct port_info *pi)
974 {
975         
976         pi->port_cdev = make_dev(&cxgb_cdevsw, pi->ifp->if_dunit,
977             UID_ROOT, GID_WHEEL, 0600, "%s", if_name(pi->ifp));
978         
979         if (pi->port_cdev == NULL)
980                 return (ENOMEM);
981
982         pi->port_cdev->si_drv1 = (void *)pi;
983         
984         return (0);
985 }
986
987 #define CXGB_CAP (IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_MTU | IFCAP_HWCSUM | \
988     IFCAP_VLAN_HWCSUM | IFCAP_TSO | IFCAP_JUMBO_MTU | IFCAP_LRO | \
989     IFCAP_VLAN_HWTSO | IFCAP_LINKSTATE)
990 #define CXGB_CAP_ENABLE (CXGB_CAP & ~IFCAP_TSO6)
991
992 static int
993 cxgb_port_attach(device_t dev)
994 {
995         struct port_info *p;
996         struct ifnet *ifp;
997         int err;
998         struct adapter *sc;
999
1000         p = device_get_softc(dev);
1001         sc = p->adapter;
1002         snprintf(p->lockbuf, PORT_NAME_LEN, "cxgb port lock %d:%d",
1003             device_get_unit(device_get_parent(dev)), p->port_id);
1004         PORT_LOCK_INIT(p, p->lockbuf);
1005
1006         callout_init(&p->link_check_ch, CALLOUT_MPSAFE);
1007         TASK_INIT(&p->link_check_task, 0, check_link_status, p);
1008
1009         /* Allocate an ifnet object and set it up */
1010         ifp = p->ifp = if_alloc(IFT_ETHER);
1011         if (ifp == NULL) {
1012                 device_printf(dev, "Cannot allocate ifnet\n");
1013                 return (ENOMEM);
1014         }
1015         
1016         if_initname(ifp, device_get_name(dev), device_get_unit(dev));
1017         ifp->if_init = cxgb_init;
1018         ifp->if_softc = p;
1019         ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
1020         ifp->if_ioctl = cxgb_ioctl;
1021         ifp->if_transmit = cxgb_transmit;
1022         ifp->if_qflush = cxgb_qflush;
1023
1024         ifp->if_capabilities = CXGB_CAP;
1025         ifp->if_capenable = CXGB_CAP_ENABLE;
1026         ifp->if_hwassist = CSUM_TCP | CSUM_UDP | CSUM_IP | CSUM_TSO;
1027
1028         /*
1029          * Disable TSO on 4-port - it isn't supported by the firmware.
1030          */     
1031         if (sc->params.nports > 2) {
1032                 ifp->if_capabilities &= ~(IFCAP_TSO | IFCAP_VLAN_HWTSO);
1033                 ifp->if_capenable &= ~(IFCAP_TSO | IFCAP_VLAN_HWTSO);
1034                 ifp->if_hwassist &= ~CSUM_TSO;
1035         }
1036
1037         ether_ifattach(ifp, p->hw_addr);
1038
1039 #ifdef DEFAULT_JUMBO
1040         if (sc->params.nports <= 2)
1041                 ifp->if_mtu = ETHERMTU_JUMBO;
1042 #endif
1043         if ((err = cxgb_makedev(p)) != 0) {
1044                 printf("makedev failed %d\n", err);
1045                 return (err);
1046         }
1047
1048         /* Create a list of media supported by this port */
1049         ifmedia_init(&p->media, IFM_IMASK, cxgb_media_change,
1050             cxgb_media_status);
1051         cxgb_build_medialist(p);
1052       
1053         t3_sge_init_port(p);
1054
1055         return (err);
1056 }
1057
1058 /*
1059  * cxgb_port_detach() is called via the device_detach methods when
1060  * cxgb_free() calls the bus_generic_detach.  It is responsible for 
1061  * removing the device from the view of the kernel, i.e. from all 
1062  * interfaces lists etc.  This routine is only called when the driver is 
1063  * being unloaded, not when the link goes down.
1064  */
1065 static int
1066 cxgb_port_detach(device_t dev)
1067 {
1068         struct port_info *p;
1069         struct adapter *sc;
1070         int i;
1071
1072         p = device_get_softc(dev);
1073         sc = p->adapter;
1074
1075         /* Tell cxgb_ioctl and if_init that the port is going away */
1076         ADAPTER_LOCK(sc);
1077         SET_DOOMED(p);
1078         wakeup(&sc->flags);
1079         while (IS_BUSY(sc))
1080                 mtx_sleep(&sc->flags, &sc->lock, 0, "cxgbdtch", 0);
1081         SET_BUSY(sc);
1082         ADAPTER_UNLOCK(sc);
1083
1084         if (p->port_cdev != NULL)
1085                 destroy_dev(p->port_cdev);
1086
1087         cxgb_uninit_synchronized(p);
1088         ether_ifdetach(p->ifp);
1089
1090         for (i = p->first_qset; i < p->first_qset + p->nqsets; i++) {
1091                 struct sge_qset *qs = &sc->sge.qs[i];
1092                 struct sge_txq *txq = &qs->txq[TXQ_ETH];
1093
1094                 callout_drain(&txq->txq_watchdog);
1095                 callout_drain(&txq->txq_timer);
1096         }
1097
1098         PORT_LOCK_DEINIT(p);
1099         if_free(p->ifp);
1100         p->ifp = NULL;
1101
1102         ADAPTER_LOCK(sc);
1103         CLR_BUSY(sc);
1104         wakeup_one(&sc->flags);
1105         ADAPTER_UNLOCK(sc);
1106         return (0);
1107 }
1108
1109 void
1110 t3_fatal_err(struct adapter *sc)
1111 {
1112         u_int fw_status[4];
1113
1114         if (sc->flags & FULL_INIT_DONE) {
1115                 t3_sge_stop(sc);
1116                 t3_write_reg(sc, A_XGM_TX_CTRL, 0);
1117                 t3_write_reg(sc, A_XGM_RX_CTRL, 0);
1118                 t3_write_reg(sc, XGM_REG(A_XGM_TX_CTRL, 1), 0);
1119                 t3_write_reg(sc, XGM_REG(A_XGM_RX_CTRL, 1), 0);
1120                 t3_intr_disable(sc);
1121         }
1122         device_printf(sc->dev,"encountered fatal error, operation suspended\n");
1123         if (!t3_cim_ctl_blk_read(sc, 0xa0, 4, fw_status))
1124                 device_printf(sc->dev, "FW_ status: 0x%x, 0x%x, 0x%x, 0x%x\n",
1125                     fw_status[0], fw_status[1], fw_status[2], fw_status[3]);
1126 }
1127
1128 int
1129 t3_os_find_pci_capability(adapter_t *sc, int cap)
1130 {
1131         device_t dev;
1132         struct pci_devinfo *dinfo;
1133         pcicfgregs *cfg;
1134         uint32_t status;
1135         uint8_t ptr;
1136
1137         dev = sc->dev;
1138         dinfo = device_get_ivars(dev);
1139         cfg = &dinfo->cfg;
1140
1141         status = pci_read_config(dev, PCIR_STATUS, 2);
1142         if (!(status & PCIM_STATUS_CAPPRESENT))
1143                 return (0);
1144
1145         switch (cfg->hdrtype & PCIM_HDRTYPE) {
1146         case 0:
1147         case 1:
1148                 ptr = PCIR_CAP_PTR;
1149                 break;
1150         case 2:
1151                 ptr = PCIR_CAP_PTR_2;
1152                 break;
1153         default:
1154                 return (0);
1155                 break;
1156         }
1157         ptr = pci_read_config(dev, ptr, 1);
1158
1159         while (ptr != 0) {
1160                 if (pci_read_config(dev, ptr + PCICAP_ID, 1) == cap)
1161                         return (ptr);
1162                 ptr = pci_read_config(dev, ptr + PCICAP_NEXTPTR, 1);
1163         }
1164
1165         return (0);
1166 }
1167
1168 int
1169 t3_os_pci_save_state(struct adapter *sc)
1170 {
1171         device_t dev;
1172         struct pci_devinfo *dinfo;
1173
1174         dev = sc->dev;
1175         dinfo = device_get_ivars(dev);
1176
1177         pci_cfg_save(dev, dinfo, 0);
1178         return (0);
1179 }
1180
1181 int
1182 t3_os_pci_restore_state(struct adapter *sc)
1183 {
1184         device_t dev;
1185         struct pci_devinfo *dinfo;
1186
1187         dev = sc->dev;
1188         dinfo = device_get_ivars(dev);
1189
1190         pci_cfg_restore(dev, dinfo);
1191         return (0);
1192 }
1193
1194 /**
1195  *      t3_os_link_changed - handle link status changes
1196  *      @sc: the adapter associated with the link change
1197  *      @port_id: the port index whose link status has changed
1198  *      @link_status: the new status of the link
1199  *      @speed: the new speed setting
1200  *      @duplex: the new duplex setting
1201  *      @fc: the new flow-control setting
1202  *
1203  *      This is the OS-dependent handler for link status changes.  The OS
1204  *      neutral handler takes care of most of the processing for these events,
1205  *      then calls this handler for any OS-specific processing.
1206  */
1207 void
1208 t3_os_link_changed(adapter_t *adapter, int port_id, int link_status, int speed,
1209      int duplex, int fc, int mac_was_reset)
1210 {
1211         struct port_info *pi = &adapter->port[port_id];
1212         struct ifnet *ifp = pi->ifp;
1213
1214         /* no race with detach, so ifp should always be good */
1215         KASSERT(ifp, ("%s: if detached.", __func__));
1216
1217         /* Reapply mac settings if they were lost due to a reset */
1218         if (mac_was_reset) {
1219                 PORT_LOCK(pi);
1220                 cxgb_update_mac_settings(pi);
1221                 PORT_UNLOCK(pi);
1222         }
1223
1224         if (link_status) {
1225                 ifp->if_baudrate = IF_Mbps(speed);
1226                 if_link_state_change(ifp, LINK_STATE_UP);
1227         } else
1228                 if_link_state_change(ifp, LINK_STATE_DOWN);
1229 }
1230
1231 /**
1232  *      t3_os_phymod_changed - handle PHY module changes
1233  *      @phy: the PHY reporting the module change
1234  *      @mod_type: new module type
1235  *
1236  *      This is the OS-dependent handler for PHY module changes.  It is
1237  *      invoked when a PHY module is removed or inserted for any OS-specific
1238  *      processing.
1239  */
1240 void t3_os_phymod_changed(struct adapter *adap, int port_id)
1241 {
1242         static const char *mod_str[] = {
1243                 NULL, "SR", "LR", "LRM", "TWINAX", "TWINAX-L", "unknown"
1244         };
1245         struct port_info *pi = &adap->port[port_id];
1246         int mod = pi->phy.modtype;
1247
1248         if (mod != pi->media.ifm_cur->ifm_data)
1249                 cxgb_build_medialist(pi);
1250
1251         if (mod == phy_modtype_none)
1252                 if_printf(pi->ifp, "PHY module unplugged\n");
1253         else {
1254                 KASSERT(mod < ARRAY_SIZE(mod_str),
1255                         ("invalid PHY module type %d", mod));
1256                 if_printf(pi->ifp, "%s PHY module inserted\n", mod_str[mod]);
1257         }
1258 }
1259
1260 void
1261 t3_os_set_hw_addr(adapter_t *adapter, int port_idx, u8 hw_addr[])
1262 {
1263
1264         /*
1265          * The ifnet might not be allocated before this gets called,
1266          * as this is called early on in attach by t3_prep_adapter
1267          * save the address off in the port structure
1268          */
1269         if (cxgb_debug)
1270                 printf("set_hw_addr on idx %d addr %6D\n", port_idx, hw_addr, ":");
1271         bcopy(hw_addr, adapter->port[port_idx].hw_addr, ETHER_ADDR_LEN);
1272 }
1273
1274 /*
1275  * Programs the XGMAC based on the settings in the ifnet.  These settings
1276  * include MTU, MAC address, mcast addresses, etc.
1277  */
1278 static void
1279 cxgb_update_mac_settings(struct port_info *p)
1280 {
1281         struct ifnet *ifp = p->ifp;
1282         struct t3_rx_mode rm;
1283         struct cmac *mac = &p->mac;
1284         int mtu, hwtagging;
1285
1286         PORT_LOCK_ASSERT_OWNED(p);
1287
1288         bcopy(IF_LLADDR(ifp), p->hw_addr, ETHER_ADDR_LEN);
1289
1290         mtu = ifp->if_mtu;
1291         if (ifp->if_capenable & IFCAP_VLAN_MTU)
1292                 mtu += ETHER_VLAN_ENCAP_LEN;
1293
1294         hwtagging = (ifp->if_capenable & IFCAP_VLAN_HWTAGGING) != 0;
1295
1296         t3_mac_set_mtu(mac, mtu);
1297         t3_set_vlan_accel(p->adapter, 1 << p->tx_chan, hwtagging);
1298         t3_mac_set_address(mac, 0, p->hw_addr);
1299         t3_init_rx_mode(&rm, p);
1300         t3_mac_set_rx_mode(mac, &rm);
1301 }
1302
1303
1304 static int
1305 await_mgmt_replies(struct adapter *adap, unsigned long init_cnt,
1306                               unsigned long n)
1307 {
1308         int attempts = 5;
1309
1310         while (adap->sge.qs[0].rspq.offload_pkts < init_cnt + n) {
1311                 if (!--attempts)
1312                         return (ETIMEDOUT);
1313                 t3_os_sleep(10);
1314         }
1315         return 0;
1316 }
1317
1318 static int
1319 init_tp_parity(struct adapter *adap)
1320 {
1321         int i;
1322         struct mbuf *m;
1323         struct cpl_set_tcb_field *greq;
1324         unsigned long cnt = adap->sge.qs[0].rspq.offload_pkts;
1325
1326         t3_tp_set_offload_mode(adap, 1);
1327
1328         for (i = 0; i < 16; i++) {
1329                 struct cpl_smt_write_req *req;
1330
1331                 m = m_gethdr(M_WAITOK, MT_DATA);
1332                 req = mtod(m, struct cpl_smt_write_req *);
1333                 m->m_len = m->m_pkthdr.len = sizeof(*req);
1334                 memset(req, 0, sizeof(*req));
1335                 req->wr.wrh_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD));
1336                 OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_SMT_WRITE_REQ, i));
1337                 req->iff = i;
1338                 t3_mgmt_tx(adap, m);
1339         }
1340
1341         for (i = 0; i < 2048; i++) {
1342                 struct cpl_l2t_write_req *req;
1343
1344                 m = m_gethdr(M_WAITOK, MT_DATA);
1345                 req = mtod(m, struct cpl_l2t_write_req *);
1346                 m->m_len = m->m_pkthdr.len = sizeof(*req);
1347                 memset(req, 0, sizeof(*req));
1348                 req->wr.wrh_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD));
1349                 OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_L2T_WRITE_REQ, i));
1350                 req->params = htonl(V_L2T_W_IDX(i));
1351                 t3_mgmt_tx(adap, m);
1352         }
1353
1354         for (i = 0; i < 2048; i++) {
1355                 struct cpl_rte_write_req *req;
1356
1357                 m = m_gethdr(M_WAITOK, MT_DATA);
1358                 req = mtod(m, struct cpl_rte_write_req *);
1359                 m->m_len = m->m_pkthdr.len = sizeof(*req);
1360                 memset(req, 0, sizeof(*req));
1361                 req->wr.wrh_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD));
1362                 OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_RTE_WRITE_REQ, i));
1363                 req->l2t_idx = htonl(V_L2T_W_IDX(i));
1364                 t3_mgmt_tx(adap, m);
1365         }
1366
1367         m = m_gethdr(M_WAITOK, MT_DATA);
1368         greq = mtod(m, struct cpl_set_tcb_field *);
1369         m->m_len = m->m_pkthdr.len = sizeof(*greq);
1370         memset(greq, 0, sizeof(*greq));
1371         greq->wr.wrh_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD));
1372         OPCODE_TID(greq) = htonl(MK_OPCODE_TID(CPL_SET_TCB_FIELD, 0));
1373         greq->mask = htobe64(1);
1374         t3_mgmt_tx(adap, m);
1375
1376         i = await_mgmt_replies(adap, cnt, 16 + 2048 + 2048 + 1);
1377         t3_tp_set_offload_mode(adap, 0);
1378         return (i);
1379 }
1380
1381 /**
1382  *      setup_rss - configure Receive Side Steering (per-queue connection demux) 
1383  *      @adap: the adapter
1384  *
1385  *      Sets up RSS to distribute packets to multiple receive queues.  We
1386  *      configure the RSS CPU lookup table to distribute to the number of HW
1387  *      receive queues, and the response queue lookup table to narrow that
1388  *      down to the response queues actually configured for each port.
1389  *      We always configure the RSS mapping for two ports since the mapping
1390  *      table has plenty of entries.
1391  */
1392 static void
1393 setup_rss(adapter_t *adap)
1394 {
1395         int i;
1396         u_int nq[2]; 
1397         uint8_t cpus[SGE_QSETS + 1];
1398         uint16_t rspq_map[RSS_TABLE_SIZE];
1399         
1400         for (i = 0; i < SGE_QSETS; ++i)
1401                 cpus[i] = i;
1402         cpus[SGE_QSETS] = 0xff;
1403
1404         nq[0] = nq[1] = 0;
1405         for_each_port(adap, i) {
1406                 const struct port_info *pi = adap2pinfo(adap, i);
1407
1408                 nq[pi->tx_chan] += pi->nqsets;
1409         }
1410         for (i = 0; i < RSS_TABLE_SIZE / 2; ++i) {
1411                 rspq_map[i] = nq[0] ? i % nq[0] : 0;
1412                 rspq_map[i + RSS_TABLE_SIZE / 2] = nq[1] ? i % nq[1] + nq[0] : 0;
1413         }
1414
1415         /* Calculate the reverse RSS map table */
1416         for (i = 0; i < SGE_QSETS; ++i)
1417                 adap->rrss_map[i] = 0xff;
1418         for (i = 0; i < RSS_TABLE_SIZE; ++i)
1419                 if (adap->rrss_map[rspq_map[i]] == 0xff)
1420                         adap->rrss_map[rspq_map[i]] = i;
1421
1422         t3_config_rss(adap, F_RQFEEDBACKENABLE | F_TNLLKPEN | F_TNLMAPEN |
1423                       F_TNLPRTEN | F_TNL2TUPEN | F_TNL4TUPEN | F_OFDMAPEN |
1424                       F_RRCPLMAPEN | V_RRCPLCPUSIZE(6) | F_HASHTOEPLITZ,
1425                       cpus, rspq_map);
1426
1427 }
1428
1429 /*
1430  * Sends an mbuf to an offload queue driver
1431  * after dealing with any active network taps.
1432  */
1433 static inline int
1434 offload_tx(struct t3cdev *tdev, struct mbuf *m)
1435 {
1436         int ret;
1437
1438         ret = t3_offload_tx(tdev, m);
1439         return (ret);
1440 }
1441
1442 static int
1443 write_smt_entry(struct adapter *adapter, int idx)
1444 {
1445         struct port_info *pi = &adapter->port[idx];
1446         struct cpl_smt_write_req *req;
1447         struct mbuf *m;
1448
1449         if ((m = m_gethdr(M_NOWAIT, MT_DATA)) == NULL)
1450                 return (ENOMEM);
1451
1452         req = mtod(m, struct cpl_smt_write_req *);
1453         m->m_pkthdr.len = m->m_len = sizeof(struct cpl_smt_write_req);
1454         
1455         req->wr.wrh_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD));
1456         OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_SMT_WRITE_REQ, idx));
1457         req->mtu_idx = NMTUS - 1;  /* should be 0 but there's a T3 bug */
1458         req->iff = idx;
1459         memset(req->src_mac1, 0, sizeof(req->src_mac1));
1460         memcpy(req->src_mac0, pi->hw_addr, ETHER_ADDR_LEN);
1461
1462         m_set_priority(m, 1);
1463
1464         offload_tx(&adapter->tdev, m);
1465
1466         return (0);
1467 }
1468
1469 static int
1470 init_smt(struct adapter *adapter)
1471 {
1472         int i;
1473
1474         for_each_port(adapter, i)
1475                 write_smt_entry(adapter, i);
1476         return 0;
1477 }
1478
1479 static void
1480 init_port_mtus(adapter_t *adapter)
1481 {
1482         unsigned int mtus = ETHERMTU | (ETHERMTU << 16);
1483
1484         t3_write_reg(adapter, A_TP_MTU_PORT_TABLE, mtus);
1485 }
1486
1487 static void
1488 send_pktsched_cmd(struct adapter *adap, int sched, int qidx, int lo,
1489                               int hi, int port)
1490 {
1491         struct mbuf *m;
1492         struct mngt_pktsched_wr *req;
1493
1494         m = m_gethdr(M_DONTWAIT, MT_DATA);
1495         if (m) {        
1496                 req = mtod(m, struct mngt_pktsched_wr *);
1497                 req->wr.wrh_hi = htonl(V_WR_OP(FW_WROPCODE_MNGT));
1498                 req->mngt_opcode = FW_MNGTOPCODE_PKTSCHED_SET;
1499                 req->sched = sched;
1500                 req->idx = qidx;
1501                 req->min = lo;
1502                 req->max = hi;
1503                 req->binding = port;
1504                 m->m_len = m->m_pkthdr.len = sizeof(*req);
1505                 t3_mgmt_tx(adap, m);
1506         }
1507 }
1508
1509 static void
1510 bind_qsets(adapter_t *sc)
1511 {
1512         int i, j;
1513
1514         for (i = 0; i < (sc)->params.nports; ++i) {
1515                 const struct port_info *pi = adap2pinfo(sc, i);
1516
1517                 for (j = 0; j < pi->nqsets; ++j) {
1518                         send_pktsched_cmd(sc, 1, pi->first_qset + j, -1,
1519                                           -1, pi->tx_chan);
1520
1521                 }
1522         }
1523 }
1524
1525 static void
1526 update_tpeeprom(struct adapter *adap)
1527 {
1528         const struct firmware *tpeeprom;
1529
1530         uint32_t version;
1531         unsigned int major, minor;
1532         int ret, len;
1533         char rev, name[32];
1534
1535         t3_seeprom_read(adap, TP_SRAM_OFFSET, &version);
1536
1537         major = G_TP_VERSION_MAJOR(version);
1538         minor = G_TP_VERSION_MINOR(version);
1539         if (major == TP_VERSION_MAJOR  && minor == TP_VERSION_MINOR)
1540                 return; 
1541
1542         rev = t3rev2char(adap);
1543         snprintf(name, sizeof(name), TPEEPROM_NAME, rev);
1544
1545         tpeeprom = firmware_get(name);
1546         if (tpeeprom == NULL) {
1547                 device_printf(adap->dev,
1548                               "could not load TP EEPROM: unable to load %s\n",
1549                               name);
1550                 return;
1551         }
1552
1553         len = tpeeprom->datasize - 4;
1554         
1555         ret = t3_check_tpsram(adap, tpeeprom->data, tpeeprom->datasize);
1556         if (ret)
1557                 goto release_tpeeprom;
1558
1559         if (len != TP_SRAM_LEN) {
1560                 device_printf(adap->dev,
1561                               "%s length is wrong len=%d expected=%d\n", name,
1562                               len, TP_SRAM_LEN);
1563                 return;
1564         }
1565         
1566         ret = set_eeprom(&adap->port[0], tpeeprom->data, tpeeprom->datasize,
1567             TP_SRAM_OFFSET);
1568         
1569         if (!ret) {
1570                 device_printf(adap->dev,
1571                         "Protocol SRAM image updated in EEPROM to %d.%d.%d\n",
1572                          TP_VERSION_MAJOR, TP_VERSION_MINOR, TP_VERSION_MICRO);
1573         } else 
1574                 device_printf(adap->dev,
1575                               "Protocol SRAM image update in EEPROM failed\n");
1576
1577 release_tpeeprom:
1578         firmware_put(tpeeprom, FIRMWARE_UNLOAD);
1579         
1580         return;
1581 }
1582
1583 static int
1584 update_tpsram(struct adapter *adap)
1585 {
1586         const struct firmware *tpsram;
1587         int ret;
1588         char rev, name[32];
1589
1590         rev = t3rev2char(adap);
1591         snprintf(name, sizeof(name), TPSRAM_NAME, rev);
1592
1593         update_tpeeprom(adap);
1594
1595         tpsram = firmware_get(name);
1596         if (tpsram == NULL){
1597                 device_printf(adap->dev, "could not load TP SRAM\n");
1598                 return (EINVAL);
1599         } else
1600                 device_printf(adap->dev, "updating TP SRAM\n");
1601         
1602         ret = t3_check_tpsram(adap, tpsram->data, tpsram->datasize);
1603         if (ret)
1604                 goto release_tpsram;    
1605
1606         ret = t3_set_proto_sram(adap, tpsram->data);
1607         if (ret)
1608                 device_printf(adap->dev, "loading protocol SRAM failed\n");
1609
1610 release_tpsram:
1611         firmware_put(tpsram, FIRMWARE_UNLOAD);
1612         
1613         return ret;
1614 }
1615
1616 /**
1617  *      cxgb_up - enable the adapter
1618  *      @adap: adapter being enabled
1619  *
1620  *      Called when the first port is enabled, this function performs the
1621  *      actions necessary to make an adapter operational, such as completing
1622  *      the initialization of HW modules, and enabling interrupts.
1623  */
1624 static int
1625 cxgb_up(struct adapter *sc)
1626 {
1627         int err = 0;
1628         unsigned int mxf = t3_mc5_size(&sc->mc5) - MC5_MIN_TIDS;
1629
1630         KASSERT(sc->open_device_map == 0, ("%s: device(s) already open (%x)",
1631                                            __func__, sc->open_device_map));
1632
1633         if ((sc->flags & FULL_INIT_DONE) == 0) {
1634
1635                 ADAPTER_LOCK_ASSERT_NOTOWNED(sc);
1636
1637                 if ((sc->flags & FW_UPTODATE) == 0)
1638                         if ((err = upgrade_fw(sc)))
1639                                 goto out;
1640
1641                 if ((sc->flags & TPS_UPTODATE) == 0)
1642                         if ((err = update_tpsram(sc)))
1643                                 goto out;
1644
1645                 if (is_offload(sc) && nfilters != 0) {
1646                         sc->params.mc5.nservers = 0;
1647
1648                         if (nfilters < 0)
1649                                 sc->params.mc5.nfilters = mxf;
1650                         else
1651                                 sc->params.mc5.nfilters = min(nfilters, mxf);
1652                 }
1653
1654                 err = t3_init_hw(sc, 0);
1655                 if (err)
1656                         goto out;
1657
1658                 t3_set_reg_field(sc, A_TP_PARA_REG5, 0, F_RXDDPOFFINIT);
1659                 t3_write_reg(sc, A_ULPRX_TDDP_PSZ, V_HPZ0(PAGE_SHIFT - 12));
1660
1661                 err = setup_sge_qsets(sc);
1662                 if (err)
1663                         goto out;
1664
1665                 alloc_filters(sc);
1666                 setup_rss(sc);
1667
1668                 t3_add_configured_sysctls(sc);
1669                 sc->flags |= FULL_INIT_DONE;
1670         }
1671
1672         t3_intr_clear(sc);
1673         t3_sge_start(sc);
1674         t3_intr_enable(sc);
1675
1676         if (sc->params.rev >= T3_REV_C && !(sc->flags & TP_PARITY_INIT) &&
1677             is_offload(sc) && init_tp_parity(sc) == 0)
1678                 sc->flags |= TP_PARITY_INIT;
1679
1680         if (sc->flags & TP_PARITY_INIT) {
1681                 t3_write_reg(sc, A_TP_INT_CAUSE, F_CMCACHEPERR | F_ARPLUTPERR);
1682                 t3_write_reg(sc, A_TP_INT_ENABLE, 0x7fbfffff);
1683         }
1684         
1685         if (!(sc->flags & QUEUES_BOUND)) {
1686                 bind_qsets(sc);
1687                 setup_hw_filters(sc);
1688                 sc->flags |= QUEUES_BOUND;              
1689         }
1690
1691         t3_sge_reset_adapter(sc);
1692 out:
1693         return (err);
1694 }
1695
1696 /*
1697  * Called when the last open device is closed.  Does NOT undo all of cxgb_up's
1698  * work.  Specifically, the resources grabbed under FULL_INIT_DONE are released
1699  * during controller_detach, not here.
1700  */
1701 static void
1702 cxgb_down(struct adapter *sc)
1703 {
1704         t3_sge_stop(sc);
1705         t3_intr_disable(sc);
1706 }
1707
1708 static int
1709 offload_open(struct port_info *pi)
1710 {
1711         struct adapter *sc = pi->adapter;
1712         struct t3cdev *tdev = &sc->tdev;
1713
1714         setbit(&sc->open_device_map, OFFLOAD_DEVMAP_BIT);
1715
1716         t3_tp_set_offload_mode(sc, 1);
1717         tdev->lldev = pi->ifp;
1718         init_port_mtus(sc);
1719         t3_load_mtus(sc, sc->params.mtus, sc->params.a_wnd, sc->params.b_wnd,
1720                      sc->params.rev == 0 ?  sc->port[0].ifp->if_mtu : 0xffff);
1721         init_smt(sc);
1722         cxgb_add_clients(tdev);
1723
1724         return (0);
1725 }
1726
1727 static int
1728 offload_close(struct t3cdev *tdev)
1729 {
1730         struct adapter *adapter = tdev2adap(tdev);
1731
1732         if (!isset(&adapter->open_device_map, OFFLOAD_DEVMAP_BIT))
1733                 return (0);
1734
1735         /* Call back all registered clients */
1736         cxgb_remove_clients(tdev);
1737
1738         tdev->lldev = NULL;
1739         cxgb_set_dummy_ops(tdev);
1740         t3_tp_set_offload_mode(adapter, 0);
1741
1742         clrbit(&adapter->open_device_map, OFFLOAD_DEVMAP_BIT);
1743
1744         return (0);
1745 }
1746
1747 /*
1748  * if_init for cxgb ports.
1749  */
1750 static void
1751 cxgb_init(void *arg)
1752 {
1753         struct port_info *p = arg;
1754         struct adapter *sc = p->adapter;
1755
1756         ADAPTER_LOCK(sc);
1757         cxgb_init_locked(p); /* releases adapter lock */
1758         ADAPTER_LOCK_ASSERT_NOTOWNED(sc);
1759 }
1760
1761 static int
1762 cxgb_init_locked(struct port_info *p)
1763 {
1764         struct adapter *sc = p->adapter;
1765         struct ifnet *ifp = p->ifp;
1766         struct cmac *mac = &p->mac;
1767         int i, rc = 0, may_sleep = 0, gave_up_lock = 0;
1768
1769         ADAPTER_LOCK_ASSERT_OWNED(sc);
1770
1771         while (!IS_DOOMED(p) && IS_BUSY(sc)) {
1772                 gave_up_lock = 1;
1773                 if (mtx_sleep(&sc->flags, &sc->lock, PCATCH, "cxgbinit", 0)) {
1774                         rc = EINTR;
1775                         goto done;
1776                 }
1777         }
1778         if (IS_DOOMED(p)) {
1779                 rc = ENXIO;
1780                 goto done;
1781         }
1782         KASSERT(!IS_BUSY(sc), ("%s: controller busy.", __func__));
1783
1784         /*
1785          * The code that runs during one-time adapter initialization can sleep
1786          * so it's important not to hold any locks across it.
1787          */
1788         may_sleep = sc->flags & FULL_INIT_DONE ? 0 : 1;
1789
1790         if (may_sleep) {
1791                 SET_BUSY(sc);
1792                 gave_up_lock = 1;
1793                 ADAPTER_UNLOCK(sc);
1794         }
1795
1796         if (sc->open_device_map == 0) {
1797                 if ((rc = cxgb_up(sc)) != 0)
1798                         goto done;
1799
1800                 if (is_offload(sc) && !ofld_disable && offload_open(p))
1801                         log(LOG_WARNING,
1802                             "Could not initialize offload capabilities\n");
1803         }
1804
1805         PORT_LOCK(p);
1806         if (isset(&sc->open_device_map, p->port_id) &&
1807             (ifp->if_drv_flags & IFF_DRV_RUNNING)) {
1808                 PORT_UNLOCK(p);
1809                 goto done;
1810         }
1811         t3_port_intr_enable(sc, p->port_id);
1812         if (!mac->multiport) 
1813                 t3_mac_init(mac);
1814         cxgb_update_mac_settings(p);
1815         t3_link_start(&p->phy, mac, &p->link_config);
1816         t3_mac_enable(mac, MAC_DIRECTION_RX | MAC_DIRECTION_TX);
1817         ifp->if_drv_flags |= IFF_DRV_RUNNING;
1818         ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
1819         PORT_UNLOCK(p);
1820
1821         for (i = p->first_qset; i < p->first_qset + p->nqsets; i++) {
1822                 struct sge_qset *qs = &sc->sge.qs[i];
1823                 struct sge_txq *txq = &qs->txq[TXQ_ETH];
1824
1825                 callout_reset_on(&txq->txq_watchdog, hz, cxgb_tx_watchdog, qs,
1826                                  txq->txq_watchdog.c_cpu);
1827         }
1828
1829         /* all ok */
1830         setbit(&sc->open_device_map, p->port_id);
1831         callout_reset(&p->link_check_ch,
1832             p->phy.caps & SUPPORTED_LINK_IRQ ?  hz * 3 : hz / 4,
1833             link_check_callout, p);
1834
1835 done:
1836         if (may_sleep) {
1837                 ADAPTER_LOCK(sc);
1838                 KASSERT(IS_BUSY(sc), ("%s: controller not busy.", __func__));
1839                 CLR_BUSY(sc);
1840         }
1841         if (gave_up_lock)
1842                 wakeup_one(&sc->flags);
1843         ADAPTER_UNLOCK(sc);
1844         return (rc);
1845 }
1846
1847 static int
1848 cxgb_uninit_locked(struct port_info *p)
1849 {
1850         struct adapter *sc = p->adapter;
1851         int rc;
1852
1853         ADAPTER_LOCK_ASSERT_OWNED(sc);
1854
1855         while (!IS_DOOMED(p) && IS_BUSY(sc)) {
1856                 if (mtx_sleep(&sc->flags, &sc->lock, PCATCH, "cxgbunin", 0)) {
1857                         rc = EINTR;
1858                         goto done;
1859                 }
1860         }
1861         if (IS_DOOMED(p)) {
1862                 rc = ENXIO;
1863                 goto done;
1864         }
1865         KASSERT(!IS_BUSY(sc), ("%s: controller busy.", __func__));
1866         SET_BUSY(sc);
1867         ADAPTER_UNLOCK(sc);
1868
1869         rc = cxgb_uninit_synchronized(p);
1870
1871         ADAPTER_LOCK(sc);
1872         KASSERT(IS_BUSY(sc), ("%s: controller not busy.", __func__));
1873         CLR_BUSY(sc);
1874         wakeup_one(&sc->flags);
1875 done:
1876         ADAPTER_UNLOCK(sc);
1877         return (rc);
1878 }
1879
1880 /*
1881  * Called on "ifconfig down", and from port_detach
1882  */
1883 static int
1884 cxgb_uninit_synchronized(struct port_info *pi)
1885 {
1886         struct adapter *sc = pi->adapter;
1887         struct ifnet *ifp = pi->ifp;
1888
1889         /*
1890          * taskqueue_drain may cause a deadlock if the adapter lock is held.
1891          */
1892         ADAPTER_LOCK_ASSERT_NOTOWNED(sc);
1893
1894         /*
1895          * Clear this port's bit from the open device map, and then drain all
1896          * the tasks that can access/manipulate this port's port_info or ifp.
1897          * We disable this port's interrupts here and so the slow/ext
1898          * interrupt tasks won't be enqueued.  The tick task will continue to
1899          * be enqueued every second but the runs after this drain will not see
1900          * this port in the open device map.
1901          *
1902          * A well behaved task must take open_device_map into account and ignore
1903          * ports that are not open.
1904          */
1905         clrbit(&sc->open_device_map, pi->port_id);
1906         t3_port_intr_disable(sc, pi->port_id);
1907         taskqueue_drain(sc->tq, &sc->slow_intr_task);
1908         taskqueue_drain(sc->tq, &sc->tick_task);
1909
1910         callout_drain(&pi->link_check_ch);
1911         taskqueue_drain(sc->tq, &pi->link_check_task);
1912
1913         PORT_LOCK(pi);
1914         ifp->if_drv_flags &= ~(IFF_DRV_RUNNING | IFF_DRV_OACTIVE);
1915
1916         /* disable pause frames */
1917         t3_set_reg_field(sc, A_XGM_TX_CFG + pi->mac.offset, F_TXPAUSEEN, 0);
1918
1919         /* Reset RX FIFO HWM */
1920         t3_set_reg_field(sc, A_XGM_RXFIFO_CFG +  pi->mac.offset,
1921                          V_RXFIFOPAUSEHWM(M_RXFIFOPAUSEHWM), 0);
1922
1923         DELAY(100 * 1000);
1924
1925         /* Wait for TXFIFO empty */
1926         t3_wait_op_done(sc, A_XGM_TXFIFO_CFG + pi->mac.offset,
1927                         F_TXFIFO_EMPTY, 1, 20, 5);
1928
1929         DELAY(100 * 1000);
1930         t3_mac_disable(&pi->mac, MAC_DIRECTION_RX);
1931
1932
1933         pi->phy.ops->power_down(&pi->phy, 1);
1934
1935         PORT_UNLOCK(pi);
1936
1937         pi->link_config.link_ok = 0;
1938         t3_os_link_changed(sc, pi->port_id, 0, 0, 0, 0, 0);
1939
1940         if ((sc->open_device_map & PORT_MASK) == 0)
1941                 offload_close(&sc->tdev);
1942
1943         if (sc->open_device_map == 0)
1944                 cxgb_down(pi->adapter);
1945
1946         return (0);
1947 }
1948
1949 /*
1950  * Mark lro enabled or disabled in all qsets for this port
1951  */
1952 static int
1953 cxgb_set_lro(struct port_info *p, int enabled)
1954 {
1955         int i;
1956         struct adapter *adp = p->adapter;
1957         struct sge_qset *q;
1958
1959         for (i = 0; i < p->nqsets; i++) {
1960                 q = &adp->sge.qs[p->first_qset + i];
1961                 q->lro.enabled = (enabled != 0);
1962         }
1963         return (0);
1964 }
1965
1966 static int
1967 cxgb_ioctl(struct ifnet *ifp, unsigned long command, caddr_t data)
1968 {
1969         struct port_info *p = ifp->if_softc;
1970         struct adapter *sc = p->adapter;
1971         struct ifreq *ifr = (struct ifreq *)data;
1972         int flags, error = 0, mtu;
1973         uint32_t mask;
1974
1975         switch (command) {
1976         case SIOCSIFMTU:
1977                 ADAPTER_LOCK(sc);
1978                 error = IS_DOOMED(p) ? ENXIO : (IS_BUSY(sc) ? EBUSY : 0);
1979                 if (error) {
1980 fail:
1981                         ADAPTER_UNLOCK(sc);
1982                         return (error);
1983                 }
1984
1985                 mtu = ifr->ifr_mtu;
1986                 if ((mtu < ETHERMIN) || (mtu > ETHERMTU_JUMBO)) {
1987                         error = EINVAL;
1988                 } else {
1989                         ifp->if_mtu = mtu;
1990                         PORT_LOCK(p);
1991                         cxgb_update_mac_settings(p);
1992                         PORT_UNLOCK(p);
1993                 }
1994                 ADAPTER_UNLOCK(sc);
1995                 break;
1996         case SIOCSIFFLAGS:
1997                 ADAPTER_LOCK(sc);
1998                 if (IS_DOOMED(p)) {
1999                         error = ENXIO;
2000                         goto fail;
2001                 }
2002                 if (ifp->if_flags & IFF_UP) {
2003                         if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
2004                                 flags = p->if_flags;
2005                                 if (((ifp->if_flags ^ flags) & IFF_PROMISC) ||
2006                                     ((ifp->if_flags ^ flags) & IFF_ALLMULTI)) {
2007                                         if (IS_BUSY(sc)) {
2008                                                 error = EBUSY;
2009                                                 goto fail;
2010                                         }
2011                                         PORT_LOCK(p);
2012                                         cxgb_update_mac_settings(p);
2013                                         PORT_UNLOCK(p);
2014                                 }
2015                                 ADAPTER_UNLOCK(sc);
2016                         } else
2017                                 error = cxgb_init_locked(p);
2018                         p->if_flags = ifp->if_flags;
2019                 } else if (ifp->if_drv_flags & IFF_DRV_RUNNING)
2020                         error = cxgb_uninit_locked(p);
2021                 else
2022                         ADAPTER_UNLOCK(sc);
2023
2024                 ADAPTER_LOCK_ASSERT_NOTOWNED(sc);
2025                 break;
2026         case SIOCADDMULTI:
2027         case SIOCDELMULTI:
2028                 ADAPTER_LOCK(sc);
2029                 error = IS_DOOMED(p) ? ENXIO : (IS_BUSY(sc) ? EBUSY : 0);
2030                 if (error)
2031                         goto fail;
2032
2033                 if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
2034                         PORT_LOCK(p);
2035                         cxgb_update_mac_settings(p);
2036                         PORT_UNLOCK(p);
2037                 }
2038                 ADAPTER_UNLOCK(sc);
2039
2040                 break;
2041         case SIOCSIFCAP:
2042                 ADAPTER_LOCK(sc);
2043                 error = IS_DOOMED(p) ? ENXIO : (IS_BUSY(sc) ? EBUSY : 0);
2044                 if (error)
2045                         goto fail;
2046
2047                 mask = ifr->ifr_reqcap ^ ifp->if_capenable;
2048                 if (mask & IFCAP_TXCSUM) {
2049                         ifp->if_capenable ^= IFCAP_TXCSUM;
2050                         ifp->if_hwassist ^= (CSUM_TCP | CSUM_UDP | CSUM_IP);
2051
2052                         if (IFCAP_TSO & ifp->if_capenable &&
2053                             !(IFCAP_TXCSUM & ifp->if_capenable)) {
2054                                 ifp->if_capenable &= ~IFCAP_TSO;
2055                                 ifp->if_hwassist &= ~CSUM_TSO;
2056                                 if_printf(ifp,
2057                                     "tso disabled due to -txcsum.\n");
2058                         }
2059                 }
2060                 if (mask & IFCAP_RXCSUM)
2061                         ifp->if_capenable ^= IFCAP_RXCSUM;
2062                 if (mask & IFCAP_TSO4) {
2063                         ifp->if_capenable ^= IFCAP_TSO4;
2064
2065                         if (IFCAP_TSO & ifp->if_capenable) {
2066                                 if (IFCAP_TXCSUM & ifp->if_capenable)
2067                                         ifp->if_hwassist |= CSUM_TSO;
2068                                 else {
2069                                         ifp->if_capenable &= ~IFCAP_TSO;
2070                                         ifp->if_hwassist &= ~CSUM_TSO;
2071                                         if_printf(ifp,
2072                                             "enable txcsum first.\n");
2073                                         error = EAGAIN;
2074                                 }
2075                         } else
2076                                 ifp->if_hwassist &= ~CSUM_TSO;
2077                 }
2078                 if (mask & IFCAP_LRO) {
2079                         ifp->if_capenable ^= IFCAP_LRO;
2080
2081                         /* Safe to do this even if cxgb_up not called yet */
2082                         cxgb_set_lro(p, ifp->if_capenable & IFCAP_LRO);
2083                 }
2084                 if (mask & IFCAP_VLAN_HWTAGGING) {
2085                         ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING;
2086                         if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
2087                                 PORT_LOCK(p);
2088                                 cxgb_update_mac_settings(p);
2089                                 PORT_UNLOCK(p);
2090                         }
2091                 }
2092                 if (mask & IFCAP_VLAN_MTU) {
2093                         ifp->if_capenable ^= IFCAP_VLAN_MTU;
2094                         if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
2095                                 PORT_LOCK(p);
2096                                 cxgb_update_mac_settings(p);
2097                                 PORT_UNLOCK(p);
2098                         }
2099                 }
2100                 if (mask & IFCAP_VLAN_HWTSO)
2101                         ifp->if_capenable ^= IFCAP_VLAN_HWTSO;
2102                 if (mask & IFCAP_VLAN_HWCSUM)
2103                         ifp->if_capenable ^= IFCAP_VLAN_HWCSUM;
2104
2105 #ifdef VLAN_CAPABILITIES
2106                 VLAN_CAPABILITIES(ifp);
2107 #endif
2108                 ADAPTER_UNLOCK(sc);
2109                 break;
2110         case SIOCSIFMEDIA:
2111         case SIOCGIFMEDIA:
2112                 error = ifmedia_ioctl(ifp, ifr, &p->media, command);
2113                 break;
2114         default:
2115                 error = ether_ioctl(ifp, command, data);
2116         }
2117
2118         return (error);
2119 }
2120
2121 static int
2122 cxgb_media_change(struct ifnet *ifp)
2123 {
2124         return (EOPNOTSUPP);
2125 }
2126
2127 /*
2128  * Translates phy->modtype to the correct Ethernet media subtype.
2129  */
2130 static int
2131 cxgb_ifm_type(int mod)
2132 {
2133         switch (mod) {
2134         case phy_modtype_sr:
2135                 return (IFM_10G_SR);
2136         case phy_modtype_lr:
2137                 return (IFM_10G_LR);
2138         case phy_modtype_lrm:
2139                 return (IFM_10G_LRM);
2140         case phy_modtype_twinax:
2141                 return (IFM_10G_TWINAX);
2142         case phy_modtype_twinax_long:
2143                 return (IFM_10G_TWINAX_LONG);
2144         case phy_modtype_none:
2145                 return (IFM_NONE);
2146         case phy_modtype_unknown:
2147                 return (IFM_UNKNOWN);
2148         }
2149
2150         KASSERT(0, ("%s: modtype %d unknown", __func__, mod));
2151         return (IFM_UNKNOWN);
2152 }
2153
2154 /*
2155  * Rebuilds the ifmedia list for this port, and sets the current media.
2156  */
2157 static void
2158 cxgb_build_medialist(struct port_info *p)
2159 {
2160         struct cphy *phy = &p->phy;
2161         struct ifmedia *media = &p->media;
2162         int mod = phy->modtype;
2163         int m = IFM_ETHER | IFM_FDX;
2164
2165         PORT_LOCK(p);
2166
2167         ifmedia_removeall(media);
2168         if (phy->caps & SUPPORTED_TP && phy->caps & SUPPORTED_Autoneg) {
2169                 /* Copper (RJ45) */
2170
2171                 if (phy->caps & SUPPORTED_10000baseT_Full)
2172                         ifmedia_add(media, m | IFM_10G_T, mod, NULL);
2173
2174                 if (phy->caps & SUPPORTED_1000baseT_Full)
2175                         ifmedia_add(media, m | IFM_1000_T, mod, NULL);
2176
2177                 if (phy->caps & SUPPORTED_100baseT_Full)
2178                         ifmedia_add(media, m | IFM_100_TX, mod, NULL);
2179
2180                 if (phy->caps & SUPPORTED_10baseT_Full)
2181                         ifmedia_add(media, m | IFM_10_T, mod, NULL);
2182
2183                 ifmedia_add(media, IFM_ETHER | IFM_AUTO, mod, NULL);
2184                 ifmedia_set(media, IFM_ETHER | IFM_AUTO);
2185
2186         } else if (phy->caps & SUPPORTED_TP) {
2187                 /* Copper (CX4) */
2188
2189                 KASSERT(phy->caps & SUPPORTED_10000baseT_Full,
2190                         ("%s: unexpected cap 0x%x", __func__, phy->caps));
2191
2192                 ifmedia_add(media, m | IFM_10G_CX4, mod, NULL);
2193                 ifmedia_set(media, m | IFM_10G_CX4);
2194
2195         } else if (phy->caps & SUPPORTED_FIBRE &&
2196                    phy->caps & SUPPORTED_10000baseT_Full) {
2197                 /* 10G optical (but includes SFP+ twinax) */
2198
2199                 m |= cxgb_ifm_type(mod);
2200                 if (IFM_SUBTYPE(m) == IFM_NONE)
2201                         m &= ~IFM_FDX;
2202
2203                 ifmedia_add(media, m, mod, NULL);
2204                 ifmedia_set(media, m);
2205
2206         } else if (phy->caps & SUPPORTED_FIBRE &&
2207                    phy->caps & SUPPORTED_1000baseT_Full) {
2208                 /* 1G optical */
2209
2210                 /* XXX: Lie and claim to be SX, could actually be any 1G-X */
2211                 ifmedia_add(media, m | IFM_1000_SX, mod, NULL);
2212                 ifmedia_set(media, m | IFM_1000_SX);
2213
2214         } else {
2215                 KASSERT(0, ("%s: don't know how to handle 0x%x.", __func__,
2216                             phy->caps));
2217         }
2218
2219         PORT_UNLOCK(p);
2220 }
2221
2222 static void
2223 cxgb_media_status(struct ifnet *ifp, struct ifmediareq *ifmr)
2224 {
2225         struct port_info *p = ifp->if_softc;
2226         struct ifmedia_entry *cur = p->media.ifm_cur;
2227         int speed = p->link_config.speed;
2228
2229         if (cur->ifm_data != p->phy.modtype) {
2230                 cxgb_build_medialist(p);
2231                 cur = p->media.ifm_cur;
2232         }
2233
2234         ifmr->ifm_status = IFM_AVALID;
2235         if (!p->link_config.link_ok)
2236                 return;
2237
2238         ifmr->ifm_status |= IFM_ACTIVE;
2239
2240         /*
2241          * active and current will differ iff current media is autoselect.  That
2242          * can happen only for copper RJ45.
2243          */
2244         if (IFM_SUBTYPE(cur->ifm_media) != IFM_AUTO)
2245                 return;
2246         KASSERT(p->phy.caps & SUPPORTED_TP && p->phy.caps & SUPPORTED_Autoneg,
2247                 ("%s: unexpected PHY caps 0x%x", __func__, p->phy.caps));
2248
2249         ifmr->ifm_active = IFM_ETHER | IFM_FDX;
2250         if (speed == SPEED_10000)
2251                 ifmr->ifm_active |= IFM_10G_T;
2252         else if (speed == SPEED_1000)
2253                 ifmr->ifm_active |= IFM_1000_T;
2254         else if (speed == SPEED_100)
2255                 ifmr->ifm_active |= IFM_100_TX;
2256         else if (speed == SPEED_10)
2257                 ifmr->ifm_active |= IFM_10_T;
2258         else
2259                 KASSERT(0, ("%s: link up but speed unknown (%u)", __func__,
2260                             speed));
2261 }
2262
2263 static void
2264 cxgb_async_intr(void *data)
2265 {
2266         adapter_t *sc = data;
2267
2268         t3_write_reg(sc, A_PL_INT_ENABLE0, 0);
2269         (void) t3_read_reg(sc, A_PL_INT_ENABLE0);
2270         taskqueue_enqueue(sc->tq, &sc->slow_intr_task);
2271 }
2272
2273 static void
2274 link_check_callout(void *arg)
2275 {
2276         struct port_info *pi = arg;
2277         struct adapter *sc = pi->adapter;
2278
2279         if (!isset(&sc->open_device_map, pi->port_id))
2280                 return;
2281
2282         taskqueue_enqueue(sc->tq, &pi->link_check_task);
2283 }
2284
2285 static void
2286 check_link_status(void *arg, int pending)
2287 {
2288         struct port_info *pi = arg;
2289         struct adapter *sc = pi->adapter;
2290
2291         if (!isset(&sc->open_device_map, pi->port_id))
2292                 return;
2293
2294         t3_link_changed(sc, pi->port_id);
2295
2296         if (pi->link_fault || !(pi->phy.caps & SUPPORTED_LINK_IRQ))
2297                 callout_reset(&pi->link_check_ch, hz, link_check_callout, pi);
2298 }
2299
2300 void
2301 t3_os_link_intr(struct port_info *pi)
2302 {
2303         /*
2304          * Schedule a link check in the near future.  If the link is flapping
2305          * rapidly we'll keep resetting the callout and delaying the check until
2306          * things stabilize a bit.
2307          */
2308         callout_reset(&pi->link_check_ch, hz / 4, link_check_callout, pi);
2309 }
2310
2311 static void
2312 check_t3b2_mac(struct adapter *sc)
2313 {
2314         int i;
2315
2316         if (sc->flags & CXGB_SHUTDOWN)
2317                 return;
2318
2319         for_each_port(sc, i) {
2320                 struct port_info *p = &sc->port[i];
2321                 int status;
2322 #ifdef INVARIANTS
2323                 struct ifnet *ifp = p->ifp;
2324 #endif          
2325
2326                 if (!isset(&sc->open_device_map, p->port_id) || p->link_fault ||
2327                     !p->link_config.link_ok)
2328                         continue;
2329
2330                 KASSERT(ifp->if_drv_flags & IFF_DRV_RUNNING,
2331                         ("%s: state mismatch (drv_flags %x, device_map %x)",
2332                          __func__, ifp->if_drv_flags, sc->open_device_map));
2333
2334                 PORT_LOCK(p);
2335                 status = t3b2_mac_watchdog_task(&p->mac);
2336                 if (status == 1)
2337                         p->mac.stats.num_toggled++;
2338                 else if (status == 2) {
2339                         struct cmac *mac = &p->mac;
2340
2341                         cxgb_update_mac_settings(p);
2342                         t3_link_start(&p->phy, mac, &p->link_config);
2343                         t3_mac_enable(mac, MAC_DIRECTION_RX | MAC_DIRECTION_TX);
2344                         t3_port_intr_enable(sc, p->port_id);
2345                         p->mac.stats.num_resets++;
2346                 }
2347                 PORT_UNLOCK(p);
2348         }
2349 }
2350
2351 static void
2352 cxgb_tick(void *arg)
2353 {
2354         adapter_t *sc = (adapter_t *)arg;
2355
2356         if (sc->flags & CXGB_SHUTDOWN)
2357                 return;
2358
2359         taskqueue_enqueue(sc->tq, &sc->tick_task);      
2360         callout_reset(&sc->cxgb_tick_ch, hz, cxgb_tick, sc);
2361 }
2362
2363 static void
2364 cxgb_tick_handler(void *arg, int count)
2365 {
2366         adapter_t *sc = (adapter_t *)arg;
2367         const struct adapter_params *p = &sc->params;
2368         int i;
2369         uint32_t cause, reset;
2370
2371         if (sc->flags & CXGB_SHUTDOWN || !(sc->flags & FULL_INIT_DONE))
2372                 return;
2373
2374         if (p->rev == T3_REV_B2 && p->nports < 4 && sc->open_device_map) 
2375                 check_t3b2_mac(sc);
2376
2377         cause = t3_read_reg(sc, A_SG_INT_CAUSE) & (F_RSPQSTARVE | F_FLEMPTY);
2378         if (cause) {
2379                 struct sge_qset *qs = &sc->sge.qs[0];
2380                 uint32_t mask, v;
2381
2382                 v = t3_read_reg(sc, A_SG_RSPQ_FL_STATUS) & ~0xff00;
2383
2384                 mask = 1;
2385                 for (i = 0; i < SGE_QSETS; i++) {
2386                         if (v & mask)
2387                                 qs[i].rspq.starved++;
2388                         mask <<= 1;
2389                 }
2390
2391                 mask <<= SGE_QSETS; /* skip RSPQXDISABLED */
2392
2393                 for (i = 0; i < SGE_QSETS * 2; i++) {
2394                         if (v & mask) {
2395                                 qs[i / 2].fl[i % 2].empty++;
2396                         }
2397                         mask <<= 1;
2398                 }
2399
2400                 /* clear */
2401                 t3_write_reg(sc, A_SG_RSPQ_FL_STATUS, v);
2402                 t3_write_reg(sc, A_SG_INT_CAUSE, cause);
2403         }
2404
2405         for (i = 0; i < sc->params.nports; i++) {
2406                 struct port_info *pi = &sc->port[i];
2407                 struct ifnet *ifp = pi->ifp;
2408                 struct cmac *mac = &pi->mac;
2409                 struct mac_stats *mstats = &mac->stats;
2410                 int drops, j;
2411
2412                 if (!isset(&sc->open_device_map, pi->port_id))
2413                         continue;
2414
2415                 PORT_LOCK(pi);
2416                 t3_mac_update_stats(mac);
2417                 PORT_UNLOCK(pi);
2418
2419                 ifp->if_opackets = mstats->tx_frames;
2420                 ifp->if_ipackets = mstats->rx_frames;
2421                 ifp->if_obytes = mstats->tx_octets;
2422                 ifp->if_ibytes = mstats->rx_octets;
2423                 ifp->if_omcasts = mstats->tx_mcast_frames;
2424                 ifp->if_imcasts = mstats->rx_mcast_frames;
2425                 ifp->if_collisions = mstats->tx_total_collisions;
2426                 ifp->if_iqdrops = mstats->rx_cong_drops;
2427
2428                 drops = 0;
2429                 for (j = pi->first_qset; j < pi->first_qset + pi->nqsets; j++)
2430                         drops += sc->sge.qs[j].txq[TXQ_ETH].txq_mr->br_drops;
2431                 ifp->if_snd.ifq_drops = drops;
2432
2433                 ifp->if_oerrors =
2434                     mstats->tx_excess_collisions +
2435                     mstats->tx_underrun +
2436                     mstats->tx_len_errs +
2437                     mstats->tx_mac_internal_errs +
2438                     mstats->tx_excess_deferral +
2439                     mstats->tx_fcs_errs;
2440                 ifp->if_ierrors =
2441                     mstats->rx_jabber +
2442                     mstats->rx_data_errs +
2443                     mstats->rx_sequence_errs +
2444                     mstats->rx_runt + 
2445                     mstats->rx_too_long +
2446                     mstats->rx_mac_internal_errs +
2447                     mstats->rx_short +
2448                     mstats->rx_fcs_errs;
2449
2450                 if (mac->multiport)
2451                         continue;
2452
2453                 /* Count rx fifo overflows, once per second */
2454                 cause = t3_read_reg(sc, A_XGM_INT_CAUSE + mac->offset);
2455                 reset = 0;
2456                 if (cause & F_RXFIFO_OVERFLOW) {
2457                         mac->stats.rx_fifo_ovfl++;
2458                         reset |= F_RXFIFO_OVERFLOW;
2459                 }
2460                 t3_write_reg(sc, A_XGM_INT_CAUSE + mac->offset, reset);
2461         }
2462 }
2463
2464 static void
2465 touch_bars(device_t dev)
2466 {
2467         /*
2468          * Don't enable yet
2469          */
2470 #if !defined(__LP64__) && 0
2471         u32 v;
2472
2473         pci_read_config_dword(pdev, PCI_BASE_ADDRESS_1, &v);
2474         pci_write_config_dword(pdev, PCI_BASE_ADDRESS_1, v);
2475         pci_read_config_dword(pdev, PCI_BASE_ADDRESS_3, &v);
2476         pci_write_config_dword(pdev, PCI_BASE_ADDRESS_3, v);
2477         pci_read_config_dword(pdev, PCI_BASE_ADDRESS_5, &v);
2478         pci_write_config_dword(pdev, PCI_BASE_ADDRESS_5, v);
2479 #endif
2480 }
2481
2482 static int
2483 set_eeprom(struct port_info *pi, const uint8_t *data, int len, int offset)
2484 {
2485         uint8_t *buf;
2486         int err = 0;
2487         u32 aligned_offset, aligned_len, *p;
2488         struct adapter *adapter = pi->adapter;
2489
2490
2491         aligned_offset = offset & ~3;
2492         aligned_len = (len + (offset & 3) + 3) & ~3;
2493
2494         if (aligned_offset != offset || aligned_len != len) {
2495                 buf = malloc(aligned_len, M_DEVBUF, M_WAITOK|M_ZERO);              
2496                 if (!buf)
2497                         return (ENOMEM);
2498                 err = t3_seeprom_read(adapter, aligned_offset, (u32 *)buf);
2499                 if (!err && aligned_len > 4)
2500                         err = t3_seeprom_read(adapter,
2501                                               aligned_offset + aligned_len - 4,
2502                                               (u32 *)&buf[aligned_len - 4]);
2503                 if (err)
2504                         goto out;
2505                 memcpy(buf + (offset & 3), data, len);
2506         } else
2507                 buf = (uint8_t *)(uintptr_t)data;
2508
2509         err = t3_seeprom_wp(adapter, 0);
2510         if (err)
2511                 goto out;
2512
2513         for (p = (u32 *)buf; !err && aligned_len; aligned_len -= 4, p++) {
2514                 err = t3_seeprom_write(adapter, aligned_offset, *p);
2515                 aligned_offset += 4;
2516         }
2517
2518         if (!err)
2519                 err = t3_seeprom_wp(adapter, 1);
2520 out:
2521         if (buf != data)
2522                 free(buf, M_DEVBUF);
2523         return err;
2524 }
2525
2526
2527 static int
2528 in_range(int val, int lo, int hi)
2529 {
2530         return val < 0 || (val <= hi && val >= lo);
2531 }
2532
2533 static int
2534 cxgb_extension_open(struct cdev *dev, int flags, int fmp, struct thread *td)
2535 {
2536        return (0);
2537 }
2538
2539 static int
2540 cxgb_extension_close(struct cdev *dev, int flags, int fmt, struct thread *td)
2541 {
2542        return (0);
2543 }
2544
2545 static int
2546 cxgb_extension_ioctl(struct cdev *dev, unsigned long cmd, caddr_t data,
2547     int fflag, struct thread *td)
2548 {
2549         int mmd, error = 0;
2550         struct port_info *pi = dev->si_drv1;
2551         adapter_t *sc = pi->adapter;
2552
2553 #ifdef PRIV_SUPPORTED   
2554         if (priv_check(td, PRIV_DRIVER)) {
2555                 if (cxgb_debug) 
2556                         printf("user does not have access to privileged ioctls\n");
2557                 return (EPERM);
2558         }
2559 #else
2560         if (suser(td)) {
2561                 if (cxgb_debug)
2562                         printf("user does not have access to privileged ioctls\n");
2563                 return (EPERM);
2564         }
2565 #endif
2566         
2567         switch (cmd) {
2568         case CHELSIO_GET_MIIREG: {
2569                 uint32_t val;
2570                 struct cphy *phy = &pi->phy;
2571                 struct ch_mii_data *mid = (struct ch_mii_data *)data;
2572                 
2573                 if (!phy->mdio_read)
2574                         return (EOPNOTSUPP);
2575                 if (is_10G(sc)) {
2576                         mmd = mid->phy_id >> 8;
2577                         if (!mmd)
2578                                 mmd = MDIO_DEV_PCS;
2579                         else if (mmd > MDIO_DEV_VEND2)
2580                                 return (EINVAL);
2581
2582                         error = phy->mdio_read(sc, mid->phy_id & 0x1f, mmd,
2583                                              mid->reg_num, &val);
2584                 } else
2585                         error = phy->mdio_read(sc, mid->phy_id & 0x1f, 0,
2586                                              mid->reg_num & 0x1f, &val);
2587                 if (error == 0)
2588                         mid->val_out = val;
2589                 break;
2590         }
2591         case CHELSIO_SET_MIIREG: {
2592                 struct cphy *phy = &pi->phy;
2593                 struct ch_mii_data *mid = (struct ch_mii_data *)data;
2594
2595                 if (!phy->mdio_write)
2596                         return (EOPNOTSUPP);
2597                 if (is_10G(sc)) {
2598                         mmd = mid->phy_id >> 8;
2599                         if (!mmd)
2600                                 mmd = MDIO_DEV_PCS;
2601                         else if (mmd > MDIO_DEV_VEND2)
2602                                 return (EINVAL);
2603                         
2604                         error = phy->mdio_write(sc, mid->phy_id & 0x1f,
2605                                               mmd, mid->reg_num, mid->val_in);
2606                 } else
2607                         error = phy->mdio_write(sc, mid->phy_id & 0x1f, 0,
2608                                               mid->reg_num & 0x1f,
2609                                               mid->val_in);
2610                 break;
2611         }
2612         case CHELSIO_SETREG: {
2613                 struct ch_reg *edata = (struct ch_reg *)data;
2614                 if ((edata->addr & 0x3) != 0 || edata->addr >= sc->mmio_len)
2615                         return (EFAULT);
2616                 t3_write_reg(sc, edata->addr, edata->val);
2617                 break;
2618         }
2619         case CHELSIO_GETREG: {
2620                 struct ch_reg *edata = (struct ch_reg *)data;
2621                 if ((edata->addr & 0x3) != 0 || edata->addr >= sc->mmio_len)
2622                         return (EFAULT);
2623                 edata->val = t3_read_reg(sc, edata->addr);
2624                 break;
2625         }
2626         case CHELSIO_GET_SGE_CONTEXT: {
2627                 struct ch_cntxt *ecntxt = (struct ch_cntxt *)data;
2628                 mtx_lock_spin(&sc->sge.reg_lock);
2629                 switch (ecntxt->cntxt_type) {
2630                 case CNTXT_TYPE_EGRESS:
2631                         error = -t3_sge_read_ecntxt(sc, ecntxt->cntxt_id,
2632                             ecntxt->data);
2633                         break;
2634                 case CNTXT_TYPE_FL:
2635                         error = -t3_sge_read_fl(sc, ecntxt->cntxt_id,
2636                             ecntxt->data);
2637                         break;
2638                 case CNTXT_TYPE_RSP:
2639                         error = -t3_sge_read_rspq(sc, ecntxt->cntxt_id,
2640                             ecntxt->data);
2641                         break;
2642                 case CNTXT_TYPE_CQ:
2643                         error = -t3_sge_read_cq(sc, ecntxt->cntxt_id,
2644                             ecntxt->data);
2645                         break;
2646                 default:
2647                         error = EINVAL;
2648                         break;
2649                 }
2650                 mtx_unlock_spin(&sc->sge.reg_lock);
2651                 break;
2652         }
2653         case CHELSIO_GET_SGE_DESC: {
2654                 struct ch_desc *edesc = (struct ch_desc *)data;
2655                 int ret;
2656                 if (edesc->queue_num >= SGE_QSETS * 6)
2657                         return (EINVAL);
2658                 ret = t3_get_desc(&sc->sge.qs[edesc->queue_num / 6],
2659                     edesc->queue_num % 6, edesc->idx, edesc->data);
2660                 if (ret < 0)
2661                         return (EINVAL);
2662                 edesc->size = ret;
2663                 break;
2664         }
2665         case CHELSIO_GET_QSET_PARAMS: {
2666                 struct qset_params *q;
2667                 struct ch_qset_params *t = (struct ch_qset_params *)data;
2668                 int q1 = pi->first_qset;
2669                 int nqsets = pi->nqsets;
2670                 int i;
2671
2672                 if (t->qset_idx >= nqsets)
2673                         return EINVAL;
2674
2675                 i = q1 + t->qset_idx;
2676                 q = &sc->params.sge.qset[i];
2677                 t->rspq_size   = q->rspq_size;
2678                 t->txq_size[0] = q->txq_size[0];
2679                 t->txq_size[1] = q->txq_size[1];
2680                 t->txq_size[2] = q->txq_size[2];
2681                 t->fl_size[0]  = q->fl_size;
2682                 t->fl_size[1]  = q->jumbo_size;
2683                 t->polling     = q->polling;
2684                 t->lro         = q->lro;
2685                 t->intr_lat    = q->coalesce_usecs;
2686                 t->cong_thres  = q->cong_thres;
2687                 t->qnum        = i;
2688
2689                 if ((sc->flags & FULL_INIT_DONE) == 0)
2690                         t->vector = 0;
2691                 else if (sc->flags & USING_MSIX)
2692                         t->vector = rman_get_start(sc->msix_irq_res[i]);
2693                 else
2694                         t->vector = rman_get_start(sc->irq_res);
2695
2696                 break;
2697         }
2698         case CHELSIO_GET_QSET_NUM: {
2699                 struct ch_reg *edata = (struct ch_reg *)data;
2700                 edata->val = pi->nqsets;
2701                 break;
2702         }
2703         case CHELSIO_LOAD_FW: {
2704                 uint8_t *fw_data;
2705                 uint32_t vers;
2706                 struct ch_mem_range *t = (struct ch_mem_range *)data;
2707
2708                 /*
2709                  * You're allowed to load a firmware only before FULL_INIT_DONE
2710                  *
2711                  * FW_UPTODATE is also set so the rest of the initialization
2712                  * will not overwrite what was loaded here.  This gives you the
2713                  * flexibility to load any firmware (and maybe shoot yourself in
2714                  * the foot).
2715                  */
2716
2717                 ADAPTER_LOCK(sc);
2718                 if (sc->open_device_map || sc->flags & FULL_INIT_DONE) {
2719                         ADAPTER_UNLOCK(sc);
2720                         return (EBUSY);
2721                 }
2722
2723                 fw_data = malloc(t->len, M_DEVBUF, M_NOWAIT);
2724                 if (!fw_data)
2725                         error = ENOMEM;
2726                 else
2727                         error = copyin(t->buf, fw_data, t->len);
2728
2729                 if (!error)
2730                         error = -t3_load_fw(sc, fw_data, t->len);
2731
2732                 if (t3_get_fw_version(sc, &vers) == 0) {
2733                         snprintf(&sc->fw_version[0], sizeof(sc->fw_version),
2734                             "%d.%d.%d", G_FW_VERSION_MAJOR(vers),
2735                             G_FW_VERSION_MINOR(vers), G_FW_VERSION_MICRO(vers));
2736                 }
2737
2738                 if (!error)
2739                         sc->flags |= FW_UPTODATE;
2740
2741                 free(fw_data, M_DEVBUF);
2742                 ADAPTER_UNLOCK(sc);
2743                 break;
2744         }
2745         case CHELSIO_LOAD_BOOT: {
2746                 uint8_t *boot_data;
2747                 struct ch_mem_range *t = (struct ch_mem_range *)data;
2748
2749                 boot_data = malloc(t->len, M_DEVBUF, M_NOWAIT);
2750                 if (!boot_data)
2751                         return ENOMEM;
2752
2753                 error = copyin(t->buf, boot_data, t->len);
2754                 if (!error)
2755                         error = -t3_load_boot(sc, boot_data, t->len);
2756
2757                 free(boot_data, M_DEVBUF);
2758                 break;
2759         }
2760         case CHELSIO_GET_PM: {
2761                 struct ch_pm *m = (struct ch_pm *)data;
2762                 struct tp_params *p = &sc->params.tp;
2763
2764                 if (!is_offload(sc))
2765                         return (EOPNOTSUPP);
2766
2767                 m->tx_pg_sz = p->tx_pg_size;
2768                 m->tx_num_pg = p->tx_num_pgs;
2769                 m->rx_pg_sz  = p->rx_pg_size;
2770                 m->rx_num_pg = p->rx_num_pgs;
2771                 m->pm_total  = p->pmtx_size + p->chan_rx_size * p->nchan;
2772
2773                 break;
2774         }
2775         case CHELSIO_SET_PM: {
2776                 struct ch_pm *m = (struct ch_pm *)data;
2777                 struct tp_params *p = &sc->params.tp;
2778
2779                 if (!is_offload(sc))
2780                         return (EOPNOTSUPP);
2781                 if (sc->flags & FULL_INIT_DONE)
2782                         return (EBUSY);
2783
2784                 if (!m->rx_pg_sz || (m->rx_pg_sz & (m->rx_pg_sz - 1)) ||
2785                     !m->tx_pg_sz || (m->tx_pg_sz & (m->tx_pg_sz - 1)))
2786                         return (EINVAL);        /* not power of 2 */
2787                 if (!(m->rx_pg_sz & 0x14000))
2788                         return (EINVAL);        /* not 16KB or 64KB */
2789                 if (!(m->tx_pg_sz & 0x1554000))
2790                         return (EINVAL);
2791                 if (m->tx_num_pg == -1)
2792                         m->tx_num_pg = p->tx_num_pgs;
2793                 if (m->rx_num_pg == -1)
2794                         m->rx_num_pg = p->rx_num_pgs;
2795                 if (m->tx_num_pg % 24 || m->rx_num_pg % 24)
2796                         return (EINVAL);
2797                 if (m->rx_num_pg * m->rx_pg_sz > p->chan_rx_size ||
2798                     m->tx_num_pg * m->tx_pg_sz > p->chan_tx_size)
2799                         return (EINVAL);
2800
2801                 p->rx_pg_size = m->rx_pg_sz;
2802                 p->tx_pg_size = m->tx_pg_sz;
2803                 p->rx_num_pgs = m->rx_num_pg;
2804                 p->tx_num_pgs = m->tx_num_pg;
2805                 break;
2806         }
2807         case CHELSIO_SETMTUTAB: {
2808                 struct ch_mtus *m = (struct ch_mtus *)data;
2809                 int i;
2810                 
2811                 if (!is_offload(sc))
2812                         return (EOPNOTSUPP);
2813                 if (offload_running(sc))
2814                         return (EBUSY);
2815                 if (m->nmtus != NMTUS)
2816                         return (EINVAL);
2817                 if (m->mtus[0] < 81)         /* accommodate SACK */
2818                         return (EINVAL);
2819                 
2820                 /*
2821                  * MTUs must be in ascending order
2822                  */
2823                 for (i = 1; i < NMTUS; ++i)
2824                         if (m->mtus[i] < m->mtus[i - 1])
2825                                 return (EINVAL);
2826
2827                 memcpy(sc->params.mtus, m->mtus, sizeof(sc->params.mtus));
2828                 break;
2829         }
2830         case CHELSIO_GETMTUTAB: {
2831                 struct ch_mtus *m = (struct ch_mtus *)data;
2832
2833                 if (!is_offload(sc))
2834                         return (EOPNOTSUPP);
2835
2836                 memcpy(m->mtus, sc->params.mtus, sizeof(m->mtus));
2837                 m->nmtus = NMTUS;
2838                 break;
2839         }
2840         case CHELSIO_GET_MEM: {
2841                 struct ch_mem_range *t = (struct ch_mem_range *)data;
2842                 struct mc7 *mem;
2843                 uint8_t *useraddr;
2844                 u64 buf[32];
2845
2846                 /*
2847                  * Use these to avoid modifying len/addr in the return
2848                  * struct
2849                  */
2850                 uint32_t len = t->len, addr = t->addr;
2851
2852                 if (!is_offload(sc))
2853                         return (EOPNOTSUPP);
2854                 if (!(sc->flags & FULL_INIT_DONE))
2855                         return (EIO);         /* need the memory controllers */
2856                 if ((addr & 0x7) || (len & 0x7))
2857                         return (EINVAL);
2858                 if (t->mem_id == MEM_CM)
2859                         mem = &sc->cm;
2860                 else if (t->mem_id == MEM_PMRX)
2861                         mem = &sc->pmrx;
2862                 else if (t->mem_id == MEM_PMTX)
2863                         mem = &sc->pmtx;
2864                 else
2865                         return (EINVAL);
2866
2867                 /*
2868                  * Version scheme:
2869                  * bits 0..9: chip version
2870                  * bits 10..15: chip revision
2871                  */
2872                 t->version = 3 | (sc->params.rev << 10);
2873                 
2874                 /*
2875                  * Read 256 bytes at a time as len can be large and we don't
2876                  * want to use huge intermediate buffers.
2877                  */
2878                 useraddr = (uint8_t *)t->buf; 
2879                 while (len) {
2880                         unsigned int chunk = min(len, sizeof(buf));
2881
2882                         error = t3_mc7_bd_read(mem, addr / 8, chunk / 8, buf);
2883                         if (error)
2884                                 return (-error);
2885                         if (copyout(buf, useraddr, chunk))
2886                                 return (EFAULT);
2887                         useraddr += chunk;
2888                         addr += chunk;
2889                         len -= chunk;
2890                 }
2891                 break;
2892         }
2893         case CHELSIO_READ_TCAM_WORD: {
2894                 struct ch_tcam_word *t = (struct ch_tcam_word *)data;
2895
2896                 if (!is_offload(sc))
2897                         return (EOPNOTSUPP);
2898                 if (!(sc->flags & FULL_INIT_DONE))
2899                         return (EIO);         /* need MC5 */            
2900                 return -t3_read_mc5_range(&sc->mc5, t->addr, 1, t->buf);
2901                 break;
2902         }
2903         case CHELSIO_SET_TRACE_FILTER: {
2904                 struct ch_trace *t = (struct ch_trace *)data;
2905                 const struct trace_params *tp;
2906
2907                 tp = (const struct trace_params *)&t->sip;
2908                 if (t->config_tx)
2909                         t3_config_trace_filter(sc, tp, 0, t->invert_match,
2910                                                t->trace_tx);
2911                 if (t->config_rx)
2912                         t3_config_trace_filter(sc, tp, 1, t->invert_match,
2913                                                t->trace_rx);
2914                 break;
2915         }
2916         case CHELSIO_SET_PKTSCHED: {
2917                 struct ch_pktsched_params *p = (struct ch_pktsched_params *)data;
2918                 if (sc->open_device_map == 0)
2919                         return (EAGAIN);
2920                 send_pktsched_cmd(sc, p->sched, p->idx, p->min, p->max,
2921                     p->binding);
2922                 break;
2923         }
2924         case CHELSIO_IFCONF_GETREGS: {
2925                 struct ch_ifconf_regs *regs = (struct ch_ifconf_regs *)data;
2926                 int reglen = cxgb_get_regs_len();
2927                 uint8_t *buf = malloc(reglen, M_DEVBUF, M_NOWAIT);
2928                 if (buf == NULL) {
2929                         return (ENOMEM);
2930                 }
2931                 if (regs->len > reglen)
2932                         regs->len = reglen;
2933                 else if (regs->len < reglen)
2934                         error = ENOBUFS;
2935
2936                 if (!error) {
2937                         cxgb_get_regs(sc, regs, buf);
2938                         error = copyout(buf, regs->data, reglen);
2939                 }
2940                 free(buf, M_DEVBUF);
2941
2942                 break;
2943         }
2944         case CHELSIO_SET_HW_SCHED: {
2945                 struct ch_hw_sched *t = (struct ch_hw_sched *)data;
2946                 unsigned int ticks_per_usec = core_ticks_per_usec(sc);
2947
2948                 if ((sc->flags & FULL_INIT_DONE) == 0)
2949                         return (EAGAIN);       /* need TP to be initialized */
2950                 if (t->sched >= NTX_SCHED || !in_range(t->mode, 0, 1) ||
2951                     !in_range(t->channel, 0, 1) ||
2952                     !in_range(t->kbps, 0, 10000000) ||
2953                     !in_range(t->class_ipg, 0, 10000 * 65535 / ticks_per_usec) ||
2954                     !in_range(t->flow_ipg, 0,
2955                               dack_ticks_to_usec(sc, 0x7ff)))
2956                         return (EINVAL);
2957
2958                 if (t->kbps >= 0) {
2959                         error = t3_config_sched(sc, t->kbps, t->sched);
2960                         if (error < 0)
2961                                 return (-error);
2962                 }
2963                 if (t->class_ipg >= 0)
2964                         t3_set_sched_ipg(sc, t->sched, t->class_ipg);
2965                 if (t->flow_ipg >= 0) {
2966                         t->flow_ipg *= 1000;     /* us -> ns */
2967                         t3_set_pace_tbl(sc, &t->flow_ipg, t->sched, 1);
2968                 }
2969                 if (t->mode >= 0) {
2970                         int bit = 1 << (S_TX_MOD_TIMER_MODE + t->sched);
2971
2972                         t3_set_reg_field(sc, A_TP_TX_MOD_QUEUE_REQ_MAP,
2973                                          bit, t->mode ? bit : 0);
2974                 }
2975                 if (t->channel >= 0)
2976                         t3_set_reg_field(sc, A_TP_TX_MOD_QUEUE_REQ_MAP,
2977                                          1 << t->sched, t->channel << t->sched);
2978                 break;
2979         }
2980         case CHELSIO_GET_EEPROM: {
2981                 int i;
2982                 struct ch_eeprom *e = (struct ch_eeprom *)data;
2983                 uint8_t *buf = malloc(EEPROMSIZE, M_DEVBUF, M_NOWAIT);
2984
2985                 if (buf == NULL) {
2986                         return (ENOMEM);
2987                 }
2988                 e->magic = EEPROM_MAGIC;
2989                 for (i = e->offset & ~3; !error && i < e->offset + e->len; i += 4)
2990                         error = -t3_seeprom_read(sc, i, (uint32_t *)&buf[i]);
2991
2992                 if (!error)
2993                         error = copyout(buf + e->offset, e->data, e->len);
2994
2995                 free(buf, M_DEVBUF);
2996                 break;
2997         }
2998         case CHELSIO_CLEAR_STATS: {
2999                 if (!(sc->flags & FULL_INIT_DONE))
3000                         return EAGAIN;
3001
3002                 PORT_LOCK(pi);
3003                 t3_mac_update_stats(&pi->mac);
3004                 memset(&pi->mac.stats, 0, sizeof(pi->mac.stats));
3005                 PORT_UNLOCK(pi);
3006                 break;
3007         }
3008         case CHELSIO_GET_UP_LA: {
3009                 struct ch_up_la *la = (struct ch_up_la *)data;
3010                 uint8_t *buf = malloc(LA_BUFSIZE, M_DEVBUF, M_NOWAIT);
3011                 if (buf == NULL) {
3012                         return (ENOMEM);
3013                 }
3014                 if (la->bufsize < LA_BUFSIZE)
3015                         error = ENOBUFS;
3016
3017                 if (!error)
3018                         error = -t3_get_up_la(sc, &la->stopped, &la->idx,
3019                                               &la->bufsize, buf);
3020                 if (!error)
3021                         error = copyout(buf, la->data, la->bufsize);
3022
3023                 free(buf, M_DEVBUF);
3024                 break;
3025         }
3026         case CHELSIO_GET_UP_IOQS: {
3027                 struct ch_up_ioqs *ioqs = (struct ch_up_ioqs *)data;
3028                 uint8_t *buf = malloc(IOQS_BUFSIZE, M_DEVBUF, M_NOWAIT);
3029                 uint32_t *v;
3030
3031                 if (buf == NULL) {
3032                         return (ENOMEM);
3033                 }
3034                 if (ioqs->bufsize < IOQS_BUFSIZE)
3035                         error = ENOBUFS;
3036
3037                 if (!error)
3038                         error = -t3_get_up_ioqs(sc, &ioqs->bufsize, buf);
3039
3040                 if (!error) {
3041                         v = (uint32_t *)buf;
3042
3043                         ioqs->ioq_rx_enable = *v++;
3044                         ioqs->ioq_tx_enable = *v++;
3045                         ioqs->ioq_rx_status = *v++;
3046                         ioqs->ioq_tx_status = *v++;
3047
3048                         error = copyout(v, ioqs->data, ioqs->bufsize);
3049                 }
3050
3051                 free(buf, M_DEVBUF);
3052                 break;
3053         }
3054         case CHELSIO_SET_FILTER: {
3055                 struct ch_filter *f = (struct ch_filter *)data;;
3056                 struct filter_info *p;
3057                 unsigned int nfilters = sc->params.mc5.nfilters;
3058
3059                 if (!is_offload(sc))
3060                         return (EOPNOTSUPP);    /* No TCAM */
3061                 if (!(sc->flags & FULL_INIT_DONE))
3062                         return (EAGAIN);        /* mc5 not setup yet */
3063                 if (nfilters == 0)
3064                         return (EBUSY);         /* TOE will use TCAM */
3065
3066                 /* sanity checks */
3067                 if (f->filter_id >= nfilters ||
3068                     (f->val.dip && f->mask.dip != 0xffffffff) ||
3069                     (f->val.sport && f->mask.sport != 0xffff) ||
3070                     (f->val.dport && f->mask.dport != 0xffff) ||
3071                     (f->val.vlan && f->mask.vlan != 0xfff) ||
3072                     (f->val.vlan_prio &&
3073                         f->mask.vlan_prio != FILTER_NO_VLAN_PRI) ||
3074                     (f->mac_addr_idx != 0xffff && f->mac_addr_idx > 15) ||
3075                     f->qset >= SGE_QSETS ||
3076                     sc->rrss_map[f->qset] >= RSS_TABLE_SIZE)
3077                         return (EINVAL);
3078
3079                 /* Was allocated with M_WAITOK */
3080                 KASSERT(sc->filters, ("filter table NULL\n"));
3081
3082                 p = &sc->filters[f->filter_id];
3083                 if (p->locked)
3084                         return (EPERM);
3085
3086                 bzero(p, sizeof(*p));
3087                 p->sip = f->val.sip;
3088                 p->sip_mask = f->mask.sip;
3089                 p->dip = f->val.dip;
3090                 p->sport = f->val.sport;
3091                 p->dport = f->val.dport;
3092                 p->vlan = f->mask.vlan ? f->val.vlan : 0xfff;
3093                 p->vlan_prio = f->mask.vlan_prio ? (f->val.vlan_prio & 6) :
3094                     FILTER_NO_VLAN_PRI;
3095                 p->mac_hit = f->mac_hit;
3096                 p->mac_vld = f->mac_addr_idx != 0xffff;
3097                 p->mac_idx = f->mac_addr_idx;
3098                 p->pkt_type = f->proto;
3099                 p->report_filter_id = f->want_filter_id;
3100                 p->pass = f->pass;
3101                 p->rss = f->rss;
3102                 p->qset = f->qset;
3103
3104                 error = set_filter(sc, f->filter_id, p);
3105                 if (error == 0)
3106                         p->valid = 1;
3107                 break;
3108         }
3109         case CHELSIO_DEL_FILTER: {
3110                 struct ch_filter *f = (struct ch_filter *)data;
3111                 struct filter_info *p;
3112                 unsigned int nfilters = sc->params.mc5.nfilters;
3113
3114                 if (!is_offload(sc))
3115                         return (EOPNOTSUPP);
3116                 if (!(sc->flags & FULL_INIT_DONE))
3117                         return (EAGAIN);
3118                 if (nfilters == 0 || sc->filters == NULL)
3119                         return (EINVAL);
3120                 if (f->filter_id >= nfilters)
3121                        return (EINVAL);
3122
3123                 p = &sc->filters[f->filter_id];
3124                 if (p->locked)
3125                         return (EPERM);
3126                 if (!p->valid)
3127                         return (EFAULT); /* Read "Bad address" as "Bad index" */
3128
3129                 bzero(p, sizeof(*p));
3130                 p->sip = p->sip_mask = 0xffffffff;
3131                 p->vlan = 0xfff;
3132                 p->vlan_prio = FILTER_NO_VLAN_PRI;
3133                 p->pkt_type = 1;
3134                 error = set_filter(sc, f->filter_id, p);
3135                 break;
3136         }
3137         case CHELSIO_GET_FILTER: {
3138                 struct ch_filter *f = (struct ch_filter *)data;
3139                 struct filter_info *p;
3140                 unsigned int i, nfilters = sc->params.mc5.nfilters;
3141
3142                 if (!is_offload(sc))
3143                         return (EOPNOTSUPP);
3144                 if (!(sc->flags & FULL_INIT_DONE))
3145                         return (EAGAIN);
3146                 if (nfilters == 0 || sc->filters == NULL)
3147                         return (EINVAL);
3148
3149                 i = f->filter_id == 0xffffffff ? 0 : f->filter_id + 1;
3150                 for (; i < nfilters; i++) {
3151                         p = &sc->filters[i];
3152                         if (!p->valid)
3153                                 continue;
3154
3155                         bzero(f, sizeof(*f));
3156
3157                         f->filter_id = i;
3158                         f->val.sip = p->sip;
3159                         f->mask.sip = p->sip_mask;
3160                         f->val.dip = p->dip;
3161                         f->mask.dip = p->dip ? 0xffffffff : 0;
3162                         f->val.sport = p->sport;
3163                         f->mask.sport = p->sport ? 0xffff : 0;
3164                         f->val.dport = p->dport;
3165                         f->mask.dport = p->dport ? 0xffff : 0;
3166                         f->val.vlan = p->vlan == 0xfff ? 0 : p->vlan;
3167                         f->mask.vlan = p->vlan == 0xfff ? 0 : 0xfff;
3168                         f->val.vlan_prio = p->vlan_prio == FILTER_NO_VLAN_PRI ?
3169                             0 : p->vlan_prio;
3170                         f->mask.vlan_prio = p->vlan_prio == FILTER_NO_VLAN_PRI ?
3171                             0 : FILTER_NO_VLAN_PRI;
3172                         f->mac_hit = p->mac_hit;
3173                         f->mac_addr_idx = p->mac_vld ? p->mac_idx : 0xffff;
3174                         f->proto = p->pkt_type;
3175                         f->want_filter_id = p->report_filter_id;
3176                         f->pass = p->pass;
3177                         f->rss = p->rss;
3178                         f->qset = p->qset;
3179
3180                         break;
3181                 }
3182                 
3183                 if (i == nfilters)
3184                         f->filter_id = 0xffffffff;
3185                 break;
3186         }
3187         default:
3188                 return (EOPNOTSUPP);
3189                 break;
3190         }
3191
3192         return (error);
3193 }
3194
3195 static __inline void
3196 reg_block_dump(struct adapter *ap, uint8_t *buf, unsigned int start,
3197     unsigned int end)
3198 {
3199         uint32_t *p = (uint32_t *)(buf + start);
3200
3201         for ( ; start <= end; start += sizeof(uint32_t))
3202                 *p++ = t3_read_reg(ap, start);
3203 }
3204
3205 #define T3_REGMAP_SIZE (3 * 1024)
3206 static int
3207 cxgb_get_regs_len(void)
3208 {
3209         return T3_REGMAP_SIZE;
3210 }
3211
3212 static void
3213 cxgb_get_regs(adapter_t *sc, struct ch_ifconf_regs *regs, uint8_t *buf)
3214 {           
3215         
3216         /*
3217          * Version scheme:
3218          * bits 0..9: chip version
3219          * bits 10..15: chip revision
3220          * bit 31: set for PCIe cards
3221          */
3222         regs->version = 3 | (sc->params.rev << 10) | (is_pcie(sc) << 31);
3223
3224         /*
3225          * We skip the MAC statistics registers because they are clear-on-read.
3226          * Also reading multi-register stats would need to synchronize with the
3227          * periodic mac stats accumulation.  Hard to justify the complexity.
3228          */
3229         memset(buf, 0, cxgb_get_regs_len());
3230         reg_block_dump(sc, buf, 0, A_SG_RSPQ_CREDIT_RETURN);
3231         reg_block_dump(sc, buf, A_SG_HI_DRB_HI_THRSH, A_ULPRX_PBL_ULIMIT);
3232         reg_block_dump(sc, buf, A_ULPTX_CONFIG, A_MPS_INT_CAUSE);
3233         reg_block_dump(sc, buf, A_CPL_SWITCH_CNTRL, A_CPL_MAP_TBL_DATA);
3234         reg_block_dump(sc, buf, A_SMB_GLOBAL_TIME_CFG, A_XGM_SERDES_STAT3);
3235         reg_block_dump(sc, buf, A_XGM_SERDES_STATUS0,
3236                        XGM_REG(A_XGM_SERDES_STAT3, 1));
3237         reg_block_dump(sc, buf, XGM_REG(A_XGM_SERDES_STATUS0, 1),
3238                        XGM_REG(A_XGM_RX_SPI4_SOP_EOP_CNT, 1));
3239 }
3240
3241 static int
3242 alloc_filters(struct adapter *sc)
3243 {
3244         struct filter_info *p;
3245         unsigned int nfilters = sc->params.mc5.nfilters;
3246
3247         if (nfilters == 0)
3248                 return (0);
3249
3250         p = malloc(sizeof(*p) * nfilters, M_DEVBUF, M_WAITOK | M_ZERO);
3251         sc->filters = p;
3252
3253         p = &sc->filters[nfilters - 1];
3254         p->vlan = 0xfff;
3255         p->vlan_prio = FILTER_NO_VLAN_PRI;
3256         p->pass = p->rss = p->valid = p->locked = 1;
3257
3258         return (0);
3259 }
3260
3261 static int
3262 setup_hw_filters(struct adapter *sc)
3263 {
3264         int i, rc;
3265         unsigned int nfilters = sc->params.mc5.nfilters;
3266
3267         if (!sc->filters)
3268                 return (0);
3269
3270         t3_enable_filters(sc);
3271
3272         for (i = rc = 0; i < nfilters && !rc; i++) {
3273                 if (sc->filters[i].locked)
3274                         rc = set_filter(sc, i, &sc->filters[i]);
3275         }
3276
3277         return (rc);
3278 }
3279
3280 static int
3281 set_filter(struct adapter *sc, int id, const struct filter_info *f)
3282 {
3283         int len;
3284         struct mbuf *m;
3285         struct ulp_txpkt *txpkt;
3286         struct work_request_hdr *wr;
3287         struct cpl_pass_open_req *oreq;
3288         struct cpl_set_tcb_field *sreq;
3289
3290         len = sizeof(*wr) + sizeof(*oreq) + 2 * sizeof(*sreq);
3291         KASSERT(len <= MHLEN, ("filter request too big for an mbuf"));
3292
3293         id += t3_mc5_size(&sc->mc5) - sc->params.mc5.nroutes -
3294               sc->params.mc5.nfilters;
3295
3296         m = m_gethdr(M_WAITOK, MT_DATA);
3297         m->m_len = m->m_pkthdr.len = len;
3298         bzero(mtod(m, char *), len);
3299
3300         wr = mtod(m, struct work_request_hdr *);
3301         wr->wrh_hi = htonl(V_WR_OP(FW_WROPCODE_BYPASS) | F_WR_ATOMIC);
3302
3303         oreq = (struct cpl_pass_open_req *)(wr + 1);
3304         txpkt = (struct ulp_txpkt *)oreq;
3305         txpkt->cmd_dest = htonl(V_ULPTX_CMD(ULP_TXPKT));
3306         txpkt->len = htonl(V_ULPTX_NFLITS(sizeof(*oreq) / 8));
3307         OPCODE_TID(oreq) = htonl(MK_OPCODE_TID(CPL_PASS_OPEN_REQ, id));
3308         oreq->local_port = htons(f->dport);
3309         oreq->peer_port = htons(f->sport);
3310         oreq->local_ip = htonl(f->dip);
3311         oreq->peer_ip = htonl(f->sip);
3312         oreq->peer_netmask = htonl(f->sip_mask);
3313         oreq->opt0h = 0;
3314         oreq->opt0l = htonl(F_NO_OFFLOAD);
3315         oreq->opt1 = htonl(V_MAC_MATCH_VALID(f->mac_vld) |
3316                          V_CONN_POLICY(CPL_CONN_POLICY_FILTER) |
3317                          V_VLAN_PRI(f->vlan_prio >> 1) |
3318                          V_VLAN_PRI_VALID(f->vlan_prio != FILTER_NO_VLAN_PRI) |
3319                          V_PKT_TYPE(f->pkt_type) | V_OPT1_VLAN(f->vlan) |
3320                          V_MAC_MATCH(f->mac_idx | (f->mac_hit << 4)));
3321
3322         sreq = (struct cpl_set_tcb_field *)(oreq + 1);
3323         set_tcb_field_ulp(sreq, id, 1, 0x1800808000ULL,
3324                           (f->report_filter_id << 15) | (1 << 23) |
3325                           ((u64)f->pass << 35) | ((u64)!f->rss << 36));
3326         set_tcb_field_ulp(sreq + 1, id, 0, 0xffffffff, (2 << 19) | 1);
3327         t3_mgmt_tx(sc, m);
3328
3329         if (f->pass && !f->rss) {
3330                 len = sizeof(*sreq);
3331                 m = m_gethdr(M_WAITOK, MT_DATA);
3332                 m->m_len = m->m_pkthdr.len = len;
3333                 bzero(mtod(m, char *), len);
3334                 sreq = mtod(m, struct cpl_set_tcb_field *);
3335                 sreq->wr.wrh_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD));
3336                 mk_set_tcb_field(sreq, id, 25, 0x3f80000,
3337                                  (u64)sc->rrss_map[f->qset] << 19);
3338                 t3_mgmt_tx(sc, m);
3339         }
3340         return 0;
3341 }
3342
3343 static inline void
3344 mk_set_tcb_field(struct cpl_set_tcb_field *req, unsigned int tid,
3345     unsigned int word, u64 mask, u64 val)
3346 {
3347         OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_SET_TCB_FIELD, tid));
3348         req->reply = V_NO_REPLY(1);
3349         req->cpu_idx = 0;
3350         req->word = htons(word);
3351         req->mask = htobe64(mask);
3352         req->val = htobe64(val);
3353 }
3354
3355 static inline void
3356 set_tcb_field_ulp(struct cpl_set_tcb_field *req, unsigned int tid,
3357     unsigned int word, u64 mask, u64 val)
3358 {
3359         struct ulp_txpkt *txpkt = (struct ulp_txpkt *)req;
3360
3361         txpkt->cmd_dest = htonl(V_ULPTX_CMD(ULP_TXPKT));
3362         txpkt->len = htonl(V_ULPTX_NFLITS(sizeof(*req) / 8));
3363         mk_set_tcb_field(req, tid, word, mask, val);
3364 }