]> CyberLeo.Net >> Repos - FreeBSD/stable/8.git/blob - sys/dev/cxgb/cxgb_main.c
MFC r276959:
[FreeBSD/stable/8.git] / sys / dev / cxgb / cxgb_main.c
1 /**************************************************************************
2
3 Copyright (c) 2007-2009, Chelsio Inc.
4 All rights reserved.
5
6 Redistribution and use in source and binary forms, with or without
7 modification, are permitted provided that the following conditions are met:
8
9  1. Redistributions of source code must retain the above copyright notice,
10     this list of conditions and the following disclaimer.
11
12  2. Neither the name of the Chelsio Corporation nor the names of its
13     contributors may be used to endorse or promote products derived from
14     this software without specific prior written permission.
15
16 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
17 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
20 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
21 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
22 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
23 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
24 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
25 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
26 POSSIBILITY OF SUCH DAMAGE.
27
28 ***************************************************************************/
29
30 #include <sys/cdefs.h>
31 __FBSDID("$FreeBSD$");
32
33 #include <sys/param.h>
34 #include <sys/systm.h>
35 #include <sys/kernel.h>
36 #include <sys/bus.h>
37 #include <sys/module.h>
38 #include <sys/pciio.h>
39 #include <sys/conf.h>
40 #include <machine/bus.h>
41 #include <machine/resource.h>
42 #include <sys/bus_dma.h>
43 #include <sys/ktr.h>
44 #include <sys/rman.h>
45 #include <sys/ioccom.h>
46 #include <sys/mbuf.h>
47 #include <sys/linker.h>
48 #include <sys/firmware.h>
49 #include <sys/socket.h>
50 #include <sys/sockio.h>
51 #include <sys/smp.h>
52 #include <sys/sysctl.h>
53 #include <sys/syslog.h>
54 #include <sys/queue.h>
55 #include <sys/taskqueue.h>
56 #include <sys/proc.h>
57
58 #include <net/bpf.h>
59 #include <net/ethernet.h>
60 #include <net/if.h>
61 #include <net/if_arp.h>
62 #include <net/if_dl.h>
63 #include <net/if_media.h>
64 #include <net/if_types.h>
65 #include <net/if_vlan_var.h>
66
67 #include <netinet/in_systm.h>
68 #include <netinet/in.h>
69 #include <netinet/if_ether.h>
70 #include <netinet/ip.h>
71 #include <netinet/ip.h>
72 #include <netinet/tcp.h>
73 #include <netinet/udp.h>
74
75 #include <dev/pci/pcireg.h>
76 #include <dev/pci/pcivar.h>
77 #include <dev/pci/pci_private.h>
78
79 #include <cxgb_include.h>
80
81 #ifdef PRIV_SUPPORTED
82 #include <sys/priv.h>
83 #endif
84
85 static int cxgb_setup_interrupts(adapter_t *);
86 static void cxgb_teardown_interrupts(adapter_t *);
87 static void cxgb_init(void *);
88 static int cxgb_init_locked(struct port_info *);
89 static int cxgb_uninit_locked(struct port_info *);
90 static int cxgb_uninit_synchronized(struct port_info *);
91 static int cxgb_ioctl(struct ifnet *, unsigned long, caddr_t);
92 static int cxgb_media_change(struct ifnet *);
93 static int cxgb_ifm_type(int);
94 static void cxgb_build_medialist(struct port_info *);
95 static void cxgb_media_status(struct ifnet *, struct ifmediareq *);
96 static int setup_sge_qsets(adapter_t *);
97 static void cxgb_async_intr(void *);
98 static void cxgb_tick_handler(void *, int);
99 static void cxgb_tick(void *);
100 static void link_check_callout(void *);
101 static void check_link_status(void *, int);
102 static void setup_rss(adapter_t *sc);
103 static int alloc_filters(struct adapter *);
104 static int setup_hw_filters(struct adapter *);
105 static int set_filter(struct adapter *, int, const struct filter_info *);
106 static inline void mk_set_tcb_field(struct cpl_set_tcb_field *, unsigned int,
107     unsigned int, u64, u64);
108 static inline void set_tcb_field_ulp(struct cpl_set_tcb_field *, unsigned int,
109     unsigned int, u64, u64);
110
111 /* Attachment glue for the PCI controller end of the device.  Each port of
112  * the device is attached separately, as defined later.
113  */
114 static int cxgb_controller_probe(device_t);
115 static int cxgb_controller_attach(device_t);
116 static int cxgb_controller_detach(device_t);
117 static void cxgb_free(struct adapter *);
118 static __inline void reg_block_dump(struct adapter *ap, uint8_t *buf, unsigned int start,
119     unsigned int end);
120 static void cxgb_get_regs(adapter_t *sc, struct ch_ifconf_regs *regs, uint8_t *buf);
121 static int cxgb_get_regs_len(void);
122 static int offload_open(struct port_info *pi);
123 static void touch_bars(device_t dev);
124 static int offload_close(struct t3cdev *tdev);
125 static void cxgb_update_mac_settings(struct port_info *p);
126
127 static device_method_t cxgb_controller_methods[] = {
128         DEVMETHOD(device_probe,         cxgb_controller_probe),
129         DEVMETHOD(device_attach,        cxgb_controller_attach),
130         DEVMETHOD(device_detach,        cxgb_controller_detach),
131
132         DEVMETHOD_END
133 };
134
135 static driver_t cxgb_controller_driver = {
136         "cxgbc",
137         cxgb_controller_methods,
138         sizeof(struct adapter)
139 };
140
141 static devclass_t       cxgb_controller_devclass;
142 DRIVER_MODULE(cxgbc, pci, cxgb_controller_driver, cxgb_controller_devclass, 0, 0);
143
144 /*
145  * Attachment glue for the ports.  Attachment is done directly to the
146  * controller device.
147  */
148 static int cxgb_port_probe(device_t);
149 static int cxgb_port_attach(device_t);
150 static int cxgb_port_detach(device_t);
151
152 static device_method_t cxgb_port_methods[] = {
153         DEVMETHOD(device_probe,         cxgb_port_probe),
154         DEVMETHOD(device_attach,        cxgb_port_attach),
155         DEVMETHOD(device_detach,        cxgb_port_detach),
156         { 0, 0 }
157 };
158
159 static driver_t cxgb_port_driver = {
160         "cxgb",
161         cxgb_port_methods,
162         0
163 };
164
165 static d_ioctl_t cxgb_extension_ioctl;
166 static d_open_t cxgb_extension_open;
167 static d_close_t cxgb_extension_close;
168
169 static struct cdevsw cxgb_cdevsw = {
170        .d_version =    D_VERSION,
171        .d_flags =      0,
172        .d_open =       cxgb_extension_open,
173        .d_close =      cxgb_extension_close,
174        .d_ioctl =      cxgb_extension_ioctl,
175        .d_name =       "cxgb",
176 };
177
178 static devclass_t       cxgb_port_devclass;
179 DRIVER_MODULE(cxgb, cxgbc, cxgb_port_driver, cxgb_port_devclass, 0, 0);
180
181 /*
182  * The driver uses the best interrupt scheme available on a platform in the
183  * order MSI-X, MSI, legacy pin interrupts.  This parameter determines which
184  * of these schemes the driver may consider as follows:
185  *
186  * msi = 2: choose from among all three options
187  * msi = 1 : only consider MSI and pin interrupts
188  * msi = 0: force pin interrupts
189  */
190 static int msi_allowed = 2;
191
192 TUNABLE_INT("hw.cxgb.msi_allowed", &msi_allowed);
193 SYSCTL_NODE(_hw, OID_AUTO, cxgb, CTLFLAG_RD, 0, "CXGB driver parameters");
194 SYSCTL_UINT(_hw_cxgb, OID_AUTO, msi_allowed, CTLFLAG_RDTUN, &msi_allowed, 0,
195     "MSI-X, MSI, INTx selector");
196
197 /*
198  * The driver enables offload as a default.
199  * To disable it, use ofld_disable = 1.
200  */
201 static int ofld_disable = 0;
202 TUNABLE_INT("hw.cxgb.ofld_disable", &ofld_disable);
203 SYSCTL_UINT(_hw_cxgb, OID_AUTO, ofld_disable, CTLFLAG_RDTUN, &ofld_disable, 0,
204     "disable ULP offload");
205
206 /*
207  * The driver uses an auto-queue algorithm by default.
208  * To disable it and force a single queue-set per port, use multiq = 0
209  */
210 static int multiq = 1;
211 TUNABLE_INT("hw.cxgb.multiq", &multiq);
212 SYSCTL_UINT(_hw_cxgb, OID_AUTO, multiq, CTLFLAG_RDTUN, &multiq, 0,
213     "use min(ncpus/ports, 8) queue-sets per port");
214
215 /*
216  * By default the driver will not update the firmware unless
217  * it was compiled against a newer version
218  * 
219  */
220 static int force_fw_update = 0;
221 TUNABLE_INT("hw.cxgb.force_fw_update", &force_fw_update);
222 SYSCTL_UINT(_hw_cxgb, OID_AUTO, force_fw_update, CTLFLAG_RDTUN, &force_fw_update, 0,
223     "update firmware even if up to date");
224
225 int cxgb_use_16k_clusters = -1;
226 TUNABLE_INT("hw.cxgb.use_16k_clusters", &cxgb_use_16k_clusters);
227 SYSCTL_INT(_hw_cxgb, OID_AUTO, use_16k_clusters, CTLFLAG_RDTUN,
228     &cxgb_use_16k_clusters, 0, "use 16kB clusters for the jumbo queue ");
229
230 static int nfilters = -1;
231 TUNABLE_INT("hw.cxgb.nfilters", &nfilters);
232 SYSCTL_INT(_hw_cxgb, OID_AUTO, nfilters, CTLFLAG_RDTUN,
233     &nfilters, 0, "max number of entries in the filter table");
234
235 enum {
236         MAX_TXQ_ENTRIES      = 16384,
237         MAX_CTRL_TXQ_ENTRIES = 1024,
238         MAX_RSPQ_ENTRIES     = 16384,
239         MAX_RX_BUFFERS       = 16384,
240         MAX_RX_JUMBO_BUFFERS = 16384,
241         MIN_TXQ_ENTRIES      = 4,
242         MIN_CTRL_TXQ_ENTRIES = 4,
243         MIN_RSPQ_ENTRIES     = 32,
244         MIN_FL_ENTRIES       = 32,
245         MIN_FL_JUMBO_ENTRIES = 32
246 };
247
248 struct filter_info {
249         u32 sip;
250         u32 sip_mask;
251         u32 dip;
252         u16 sport;
253         u16 dport;
254         u32 vlan:12;
255         u32 vlan_prio:3;
256         u32 mac_hit:1;
257         u32 mac_idx:4;
258         u32 mac_vld:1;
259         u32 pkt_type:2;
260         u32 report_filter_id:1;
261         u32 pass:1;
262         u32 rss:1;
263         u32 qset:3;
264         u32 locked:1;
265         u32 valid:1;
266 };
267
268 enum { FILTER_NO_VLAN_PRI = 7 };
269
270 #define EEPROM_MAGIC 0x38E2F10C
271
272 #define PORT_MASK ((1 << MAX_NPORTS) - 1)
273
274 /* Table for probing the cards.  The desc field isn't actually used */
275 struct cxgb_ident {
276         uint16_t        vendor;
277         uint16_t        device;
278         int             index;
279         char            *desc;
280 } cxgb_identifiers[] = {
281         {PCI_VENDOR_ID_CHELSIO, 0x0020, 0, "PE9000"},
282         {PCI_VENDOR_ID_CHELSIO, 0x0021, 1, "T302E"},
283         {PCI_VENDOR_ID_CHELSIO, 0x0022, 2, "T310E"},
284         {PCI_VENDOR_ID_CHELSIO, 0x0023, 3, "T320X"},
285         {PCI_VENDOR_ID_CHELSIO, 0x0024, 1, "T302X"},
286         {PCI_VENDOR_ID_CHELSIO, 0x0025, 3, "T320E"},
287         {PCI_VENDOR_ID_CHELSIO, 0x0026, 2, "T310X"},
288         {PCI_VENDOR_ID_CHELSIO, 0x0030, 2, "T3B10"},
289         {PCI_VENDOR_ID_CHELSIO, 0x0031, 3, "T3B20"},
290         {PCI_VENDOR_ID_CHELSIO, 0x0032, 1, "T3B02"},
291         {PCI_VENDOR_ID_CHELSIO, 0x0033, 4, "T3B04"},
292         {PCI_VENDOR_ID_CHELSIO, 0x0035, 6, "T3C10"},
293         {PCI_VENDOR_ID_CHELSIO, 0x0036, 3, "S320E-CR"},
294         {PCI_VENDOR_ID_CHELSIO, 0x0037, 7, "N320E-G2"},
295         {0, 0, 0, NULL}
296 };
297
298 static int set_eeprom(struct port_info *pi, const uint8_t *data, int len, int offset);
299
300
301 static __inline char
302 t3rev2char(struct adapter *adapter)
303 {
304         char rev = 'z';
305
306         switch(adapter->params.rev) {
307         case T3_REV_A:
308                 rev = 'a';
309                 break;
310         case T3_REV_B:
311         case T3_REV_B2:
312                 rev = 'b';
313                 break;
314         case T3_REV_C:
315                 rev = 'c';
316                 break;
317         }
318         return rev;
319 }
320
321 static struct cxgb_ident *
322 cxgb_get_ident(device_t dev)
323 {
324         struct cxgb_ident *id;
325
326         for (id = cxgb_identifiers; id->desc != NULL; id++) {
327                 if ((id->vendor == pci_get_vendor(dev)) &&
328                     (id->device == pci_get_device(dev))) {
329                         return (id);
330                 }
331         }
332         return (NULL);
333 }
334
335 static const struct adapter_info *
336 cxgb_get_adapter_info(device_t dev)
337 {
338         struct cxgb_ident *id;
339         const struct adapter_info *ai;
340
341         id = cxgb_get_ident(dev);
342         if (id == NULL)
343                 return (NULL);
344
345         ai = t3_get_adapter_info(id->index);
346
347         return (ai);
348 }
349
350 static int
351 cxgb_controller_probe(device_t dev)
352 {
353         const struct adapter_info *ai;
354         char *ports, buf[80];
355         int nports;
356
357         ai = cxgb_get_adapter_info(dev);
358         if (ai == NULL)
359                 return (ENXIO);
360
361         nports = ai->nports0 + ai->nports1;
362         if (nports == 1)
363                 ports = "port";
364         else
365                 ports = "ports";
366
367         snprintf(buf, sizeof(buf), "%s, %d %s", ai->desc, nports, ports);
368         device_set_desc_copy(dev, buf);
369         return (BUS_PROBE_DEFAULT);
370 }
371
372 #define FW_FNAME "cxgb_t3fw"
373 #define TPEEPROM_NAME "cxgb_t3%c_tp_eeprom"
374 #define TPSRAM_NAME "cxgb_t3%c_protocol_sram"
375
376 static int
377 upgrade_fw(adapter_t *sc)
378 {
379         const struct firmware *fw;
380         int status;
381         u32 vers;
382         
383         if ((fw = firmware_get(FW_FNAME)) == NULL)  {
384                 device_printf(sc->dev, "Could not find firmware image %s\n", FW_FNAME);
385                 return (ENOENT);
386         } else
387                 device_printf(sc->dev, "installing firmware on card\n");
388         status = t3_load_fw(sc, (const uint8_t *)fw->data, fw->datasize);
389
390         if (status != 0) {
391                 device_printf(sc->dev, "failed to install firmware: %d\n",
392                     status);
393         } else {
394                 t3_get_fw_version(sc, &vers);
395                 snprintf(&sc->fw_version[0], sizeof(sc->fw_version), "%d.%d.%d",
396                     G_FW_VERSION_MAJOR(vers), G_FW_VERSION_MINOR(vers),
397                     G_FW_VERSION_MICRO(vers));
398         }
399
400         firmware_put(fw, FIRMWARE_UNLOAD);
401
402         return (status);        
403 }
404
405 /*
406  * The cxgb_controller_attach function is responsible for the initial
407  * bringup of the device.  Its responsibilities include:
408  *
409  *  1. Determine if the device supports MSI or MSI-X.
410  *  2. Allocate bus resources so that we can access the Base Address Register
411  *  3. Create and initialize mutexes for the controller and its control
412  *     logic such as SGE and MDIO.
413  *  4. Call hardware specific setup routine for the adapter as a whole.
414  *  5. Allocate the BAR for doing MSI-X.
415  *  6. Setup the line interrupt iff MSI-X is not supported.
416  *  7. Create the driver's taskq.
417  *  8. Start one task queue service thread.
418  *  9. Check if the firmware and SRAM are up-to-date.  They will be
419  *     auto-updated later (before FULL_INIT_DONE), if required.
420  * 10. Create a child device for each MAC (port)
421  * 11. Initialize T3 private state.
422  * 12. Trigger the LED
423  * 13. Setup offload iff supported.
424  * 14. Reset/restart the tick callout.
425  * 15. Attach sysctls
426  *
427  * NOTE: Any modification or deviation from this list MUST be reflected in
428  * the above comment.  Failure to do so will result in problems on various
429  * error conditions including link flapping.
430  */
431 static int
432 cxgb_controller_attach(device_t dev)
433 {
434         device_t child;
435         const struct adapter_info *ai;
436         struct adapter *sc;
437         int i, error = 0;
438         uint32_t vers;
439         int port_qsets = 1;
440         int msi_needed, reg;
441         char buf[80];
442
443         sc = device_get_softc(dev);
444         sc->dev = dev;
445         sc->msi_count = 0;
446         ai = cxgb_get_adapter_info(dev);
447
448         /* find the PCIe link width and set max read request to 4KB*/
449         if (pci_find_extcap(dev, PCIY_EXPRESS, &reg) == 0) {
450                 uint16_t lnk;
451
452                 lnk = pci_read_config(dev, reg + PCIER_LINK_STA, 2);
453                 sc->link_width = (lnk & PCIEM_LINK_STA_WIDTH) >> 4;
454                 if (sc->link_width < 8 &&
455                     (ai->caps & SUPPORTED_10000baseT_Full)) {
456                         device_printf(sc->dev,
457                             "PCIe x%d Link, expect reduced performance\n",
458                             sc->link_width);
459                 }
460
461                 pci_set_max_read_req(dev, 4096);
462         }
463
464         touch_bars(dev);
465         pci_enable_busmaster(dev);
466         /*
467          * Allocate the registers and make them available to the driver.
468          * The registers that we care about for NIC mode are in BAR 0
469          */
470         sc->regs_rid = PCIR_BAR(0);
471         if ((sc->regs_res = bus_alloc_resource_any(dev, SYS_RES_MEMORY,
472             &sc->regs_rid, RF_ACTIVE)) == NULL) {
473                 device_printf(dev, "Cannot allocate BAR region 0\n");
474                 return (ENXIO);
475         }
476
477         snprintf(sc->lockbuf, ADAPTER_LOCK_NAME_LEN, "cxgb controller lock %d",
478             device_get_unit(dev));
479         ADAPTER_LOCK_INIT(sc, sc->lockbuf);
480
481         snprintf(sc->reglockbuf, ADAPTER_LOCK_NAME_LEN, "SGE reg lock %d",
482             device_get_unit(dev));
483         snprintf(sc->mdiolockbuf, ADAPTER_LOCK_NAME_LEN, "cxgb mdio lock %d",
484             device_get_unit(dev));
485         snprintf(sc->elmerlockbuf, ADAPTER_LOCK_NAME_LEN, "cxgb elmer lock %d",
486             device_get_unit(dev));
487         
488         MTX_INIT(&sc->sge.reg_lock, sc->reglockbuf, NULL, MTX_SPIN);
489         MTX_INIT(&sc->mdio_lock, sc->mdiolockbuf, NULL, MTX_DEF);
490         MTX_INIT(&sc->elmer_lock, sc->elmerlockbuf, NULL, MTX_DEF);
491         
492         sc->bt = rman_get_bustag(sc->regs_res);
493         sc->bh = rman_get_bushandle(sc->regs_res);
494         sc->mmio_len = rman_get_size(sc->regs_res);
495
496         for (i = 0; i < MAX_NPORTS; i++)
497                 sc->port[i].adapter = sc;
498
499         if (t3_prep_adapter(sc, ai, 1) < 0) {
500                 printf("prep adapter failed\n");
501                 error = ENODEV;
502                 goto out;
503         }
504
505         sc->udbs_rid = PCIR_BAR(2);
506         sc->udbs_res = NULL;
507         if (is_offload(sc) &&
508             ((sc->udbs_res = bus_alloc_resource_any(dev, SYS_RES_MEMORY,
509                    &sc->udbs_rid, RF_ACTIVE)) == NULL)) {
510                 device_printf(dev, "Cannot allocate BAR region 1\n");
511                 error = ENXIO;
512                 goto out;
513         }
514
515         /* Allocate the BAR for doing MSI-X.  If it succeeds, try to allocate
516          * enough messages for the queue sets.  If that fails, try falling
517          * back to MSI.  If that fails, then try falling back to the legacy
518          * interrupt pin model.
519          */
520         sc->msix_regs_rid = 0x20;
521         if ((msi_allowed >= 2) &&
522             (sc->msix_regs_res = bus_alloc_resource_any(dev, SYS_RES_MEMORY,
523             &sc->msix_regs_rid, RF_ACTIVE)) != NULL) {
524
525                 if (multiq)
526                         port_qsets = min(SGE_QSETS/sc->params.nports, mp_ncpus);
527                 msi_needed = sc->msi_count = sc->params.nports * port_qsets + 1;
528
529                 if (pci_msix_count(dev) == 0 ||
530                     (error = pci_alloc_msix(dev, &sc->msi_count)) != 0 ||
531                     sc->msi_count != msi_needed) {
532                         device_printf(dev, "alloc msix failed - "
533                                       "msi_count=%d, msi_needed=%d, err=%d; "
534                                       "will try MSI\n", sc->msi_count,
535                                       msi_needed, error);
536                         sc->msi_count = 0;
537                         port_qsets = 1;
538                         pci_release_msi(dev);
539                         bus_release_resource(dev, SYS_RES_MEMORY,
540                             sc->msix_regs_rid, sc->msix_regs_res);
541                         sc->msix_regs_res = NULL;
542                 } else {
543                         sc->flags |= USING_MSIX;
544                         sc->cxgb_intr = cxgb_async_intr;
545                         device_printf(dev,
546                                       "using MSI-X interrupts (%u vectors)\n",
547                                       sc->msi_count);
548                 }
549         }
550
551         if ((msi_allowed >= 1) && (sc->msi_count == 0)) {
552                 sc->msi_count = 1;
553                 if ((error = pci_alloc_msi(dev, &sc->msi_count)) != 0) {
554                         device_printf(dev, "alloc msi failed - "
555                                       "err=%d; will try INTx\n", error);
556                         sc->msi_count = 0;
557                         port_qsets = 1;
558                         pci_release_msi(dev);
559                 } else {
560                         sc->flags |= USING_MSI;
561                         sc->cxgb_intr = t3_intr_msi;
562                         device_printf(dev, "using MSI interrupts\n");
563                 }
564         }
565         if (sc->msi_count == 0) {
566                 device_printf(dev, "using line interrupts\n");
567                 sc->cxgb_intr = t3b_intr;
568         }
569
570         /* Create a private taskqueue thread for handling driver events */
571         sc->tq = taskqueue_create("cxgb_taskq", M_NOWAIT,
572             taskqueue_thread_enqueue, &sc->tq);
573         if (sc->tq == NULL) {
574                 device_printf(dev, "failed to allocate controller task queue\n");
575                 goto out;
576         }
577
578         taskqueue_start_threads(&sc->tq, 1, PI_NET, "%s taskq",
579             device_get_nameunit(dev));
580         TASK_INIT(&sc->tick_task, 0, cxgb_tick_handler, sc);
581
582         
583         /* Create a periodic callout for checking adapter status */
584         callout_init(&sc->cxgb_tick_ch, TRUE);
585         
586         if (t3_check_fw_version(sc) < 0 || force_fw_update) {
587                 /*
588                  * Warn user that a firmware update will be attempted in init.
589                  */
590                 device_printf(dev, "firmware needs to be updated to version %d.%d.%d\n",
591                     FW_VERSION_MAJOR, FW_VERSION_MINOR, FW_VERSION_MICRO);
592                 sc->flags &= ~FW_UPTODATE;
593         } else {
594                 sc->flags |= FW_UPTODATE;
595         }
596
597         if (t3_check_tpsram_version(sc) < 0) {
598                 /*
599                  * Warn user that a firmware update will be attempted in init.
600                  */
601                 device_printf(dev, "SRAM needs to be updated to version %c-%d.%d.%d\n",
602                     t3rev2char(sc), TP_VERSION_MAJOR, TP_VERSION_MINOR, TP_VERSION_MICRO);
603                 sc->flags &= ~TPS_UPTODATE;
604         } else {
605                 sc->flags |= TPS_UPTODATE;
606         }
607         
608         /*
609          * Create a child device for each MAC.  The ethernet attachment
610          * will be done in these children.
611          */     
612         for (i = 0; i < (sc)->params.nports; i++) {
613                 struct port_info *pi;
614                 
615                 if ((child = device_add_child(dev, "cxgb", -1)) == NULL) {
616                         device_printf(dev, "failed to add child port\n");
617                         error = EINVAL;
618                         goto out;
619                 }
620                 pi = &sc->port[i];
621                 pi->adapter = sc;
622                 pi->nqsets = port_qsets;
623                 pi->first_qset = i*port_qsets;
624                 pi->port_id = i;
625                 pi->tx_chan = i >= ai->nports0;
626                 pi->txpkt_intf = pi->tx_chan ? 2 * (i - ai->nports0) + 1 : 2 * i;
627                 sc->rxpkt_map[pi->txpkt_intf] = i;
628                 sc->port[i].tx_chan = i >= ai->nports0;
629                 sc->portdev[i] = child;
630                 device_set_softc(child, pi);
631         }
632         if ((error = bus_generic_attach(dev)) != 0)
633                 goto out;
634
635         /* initialize sge private state */
636         t3_sge_init_adapter(sc);
637
638         t3_led_ready(sc);
639         
640         cxgb_offload_init();
641         if (is_offload(sc)) {
642                 setbit(&sc->registered_device_map, OFFLOAD_DEVMAP_BIT);
643                 cxgb_adapter_ofld(sc);
644         }
645         error = t3_get_fw_version(sc, &vers);
646         if (error)
647                 goto out;
648
649         snprintf(&sc->fw_version[0], sizeof(sc->fw_version), "%d.%d.%d",
650             G_FW_VERSION_MAJOR(vers), G_FW_VERSION_MINOR(vers),
651             G_FW_VERSION_MICRO(vers));
652
653         snprintf(buf, sizeof(buf), "%s %sNIC\t E/C: %s S/N: %s",
654                  ai->desc, is_offload(sc) ? "R" : "",
655                  sc->params.vpd.ec, sc->params.vpd.sn);
656         device_set_desc_copy(dev, buf);
657
658         snprintf(&sc->port_types[0], sizeof(sc->port_types), "%x%x%x%x",
659                  sc->params.vpd.port_type[0], sc->params.vpd.port_type[1],
660                  sc->params.vpd.port_type[2], sc->params.vpd.port_type[3]);
661
662         device_printf(sc->dev, "Firmware Version %s\n", &sc->fw_version[0]);
663         callout_reset(&sc->cxgb_tick_ch, hz, cxgb_tick, sc);
664         t3_add_attach_sysctls(sc);
665
666         t3_intr_clear(sc);
667         error = cxgb_setup_interrupts(sc);
668 out:
669         if (error)
670                 cxgb_free(sc);
671
672         return (error);
673 }
674
675 /*
676  * The cxgb_controller_detach routine is called with the device is
677  * unloaded from the system.
678  */
679
680 static int
681 cxgb_controller_detach(device_t dev)
682 {
683         struct adapter *sc;
684
685         sc = device_get_softc(dev);
686
687         cxgb_free(sc);
688
689         return (0);
690 }
691
692 /*
693  * The cxgb_free() is called by the cxgb_controller_detach() routine
694  * to tear down the structures that were built up in
695  * cxgb_controller_attach(), and should be the final piece of work
696  * done when fully unloading the driver.
697  * 
698  *
699  *  1. Shutting down the threads started by the cxgb_controller_attach()
700  *     routine.
701  *  2. Stopping the lower level device and all callouts (cxgb_down_locked()).
702  *  3. Detaching all of the port devices created during the
703  *     cxgb_controller_attach() routine.
704  *  4. Removing the device children created via cxgb_controller_attach().
705  *  5. Releasing PCI resources associated with the device.
706  *  6. Turning off the offload support, iff it was turned on.
707  *  7. Destroying the mutexes created in cxgb_controller_attach().
708  *
709  */
710 static void
711 cxgb_free(struct adapter *sc)
712 {
713         int i, nqsets = 0;
714
715         ADAPTER_LOCK(sc);
716         sc->flags |= CXGB_SHUTDOWN;
717         ADAPTER_UNLOCK(sc);
718
719         /*
720          * Make sure all child devices are gone.
721          */
722         bus_generic_detach(sc->dev);
723         for (i = 0; i < (sc)->params.nports; i++) {
724                 if (sc->portdev[i] &&
725                     device_delete_child(sc->dev, sc->portdev[i]) != 0)
726                         device_printf(sc->dev, "failed to delete child port\n");
727                 nqsets += sc->port[i].nqsets;
728         }
729
730         /*
731          * At this point, it is as if cxgb_port_detach has run on all ports, and
732          * cxgb_down has run on the adapter.  All interrupts have been silenced,
733          * all open devices have been closed.
734          */
735         KASSERT(sc->open_device_map == 0, ("%s: device(s) still open (%x)",
736                                            __func__, sc->open_device_map));
737         for (i = 0; i < sc->params.nports; i++) {
738                 KASSERT(sc->port[i].ifp == NULL, ("%s: port %i undead!",
739                                                   __func__, i));
740         }
741
742         /*
743          * Finish off the adapter's callouts.
744          */
745         callout_drain(&sc->cxgb_tick_ch);
746         callout_drain(&sc->sge_timer_ch);
747
748         /*
749          * Release resources grabbed under FULL_INIT_DONE by cxgb_up.  The
750          * sysctls are cleaned up by the kernel linker.
751          */
752         if (sc->flags & FULL_INIT_DONE) {
753                 t3_free_sge_resources(sc, nqsets);
754                 sc->flags &= ~FULL_INIT_DONE;
755         }
756
757         /*
758          * Release all interrupt resources.
759          */
760         cxgb_teardown_interrupts(sc);
761         if (sc->flags & (USING_MSI | USING_MSIX)) {
762                 device_printf(sc->dev, "releasing msi message(s)\n");
763                 pci_release_msi(sc->dev);
764         } else {
765                 device_printf(sc->dev, "no msi message to release\n");
766         }
767
768         if (sc->msix_regs_res != NULL) {
769                 bus_release_resource(sc->dev, SYS_RES_MEMORY, sc->msix_regs_rid,
770                     sc->msix_regs_res);
771         }
772
773         /*
774          * Free the adapter's taskqueue.
775          */
776         if (sc->tq != NULL) {
777                 taskqueue_free(sc->tq);
778                 sc->tq = NULL;
779         }
780         
781         if (is_offload(sc)) {
782                 clrbit(&sc->registered_device_map, OFFLOAD_DEVMAP_BIT);
783                 cxgb_adapter_unofld(sc);
784         }
785
786 #ifdef notyet
787         if (sc->flags & CXGB_OFLD_INIT)
788                 cxgb_offload_deactivate(sc);
789 #endif
790         free(sc->filters, M_DEVBUF);
791         t3_sge_free(sc);
792
793         cxgb_offload_exit();
794
795         if (sc->udbs_res != NULL)
796                 bus_release_resource(sc->dev, SYS_RES_MEMORY, sc->udbs_rid,
797                     sc->udbs_res);
798
799         if (sc->regs_res != NULL)
800                 bus_release_resource(sc->dev, SYS_RES_MEMORY, sc->regs_rid,
801                     sc->regs_res);
802
803         MTX_DESTROY(&sc->mdio_lock);
804         MTX_DESTROY(&sc->sge.reg_lock);
805         MTX_DESTROY(&sc->elmer_lock);
806         ADAPTER_LOCK_DEINIT(sc);
807 }
808
809 /**
810  *      setup_sge_qsets - configure SGE Tx/Rx/response queues
811  *      @sc: the controller softc
812  *
813  *      Determines how many sets of SGE queues to use and initializes them.
814  *      We support multiple queue sets per port if we have MSI-X, otherwise
815  *      just one queue set per port.
816  */
817 static int
818 setup_sge_qsets(adapter_t *sc)
819 {
820         int i, j, err, irq_idx = 0, qset_idx = 0;
821         u_int ntxq = SGE_TXQ_PER_SET;
822
823         if ((err = t3_sge_alloc(sc)) != 0) {
824                 device_printf(sc->dev, "t3_sge_alloc returned %d\n", err);
825                 return (err);
826         }
827
828         if (sc->params.rev > 0 && !(sc->flags & USING_MSI))
829                 irq_idx = -1;
830
831         for (i = 0; i < (sc)->params.nports; i++) {
832                 struct port_info *pi = &sc->port[i];
833
834                 for (j = 0; j < pi->nqsets; j++, qset_idx++) {
835                         err = t3_sge_alloc_qset(sc, qset_idx, (sc)->params.nports,
836                             (sc->flags & USING_MSIX) ? qset_idx + 1 : irq_idx,
837                             &sc->params.sge.qset[qset_idx], ntxq, pi);
838                         if (err) {
839                                 t3_free_sge_resources(sc, qset_idx);
840                                 device_printf(sc->dev,
841                                     "t3_sge_alloc_qset failed with %d\n", err);
842                                 return (err);
843                         }
844                 }
845         }
846
847         return (0);
848 }
849
850 static void
851 cxgb_teardown_interrupts(adapter_t *sc)
852 {
853         int i;
854
855         for (i = 0; i < SGE_QSETS; i++) {
856                 if (sc->msix_intr_tag[i] == NULL) {
857
858                         /* Should have been setup fully or not at all */
859                         KASSERT(sc->msix_irq_res[i] == NULL &&
860                                 sc->msix_irq_rid[i] == 0,
861                                 ("%s: half-done interrupt (%d).", __func__, i));
862
863                         continue;
864                 }
865
866                 bus_teardown_intr(sc->dev, sc->msix_irq_res[i],
867                                   sc->msix_intr_tag[i]);
868                 bus_release_resource(sc->dev, SYS_RES_IRQ, sc->msix_irq_rid[i],
869                                      sc->msix_irq_res[i]);
870
871                 sc->msix_irq_res[i] = sc->msix_intr_tag[i] = NULL;
872                 sc->msix_irq_rid[i] = 0;
873         }
874
875         if (sc->intr_tag) {
876                 KASSERT(sc->irq_res != NULL,
877                         ("%s: half-done interrupt.", __func__));
878
879                 bus_teardown_intr(sc->dev, sc->irq_res, sc->intr_tag);
880                 bus_release_resource(sc->dev, SYS_RES_IRQ, sc->irq_rid,
881                                      sc->irq_res);
882
883                 sc->irq_res = sc->intr_tag = NULL;
884                 sc->irq_rid = 0;
885         }
886 }
887
888 static int
889 cxgb_setup_interrupts(adapter_t *sc)
890 {
891         struct resource *res;
892         void *tag;
893         int i, rid, err, intr_flag = sc->flags & (USING_MSI | USING_MSIX);
894
895         sc->irq_rid = intr_flag ? 1 : 0;
896         sc->irq_res = bus_alloc_resource_any(sc->dev, SYS_RES_IRQ, &sc->irq_rid,
897                                              RF_SHAREABLE | RF_ACTIVE);
898         if (sc->irq_res == NULL) {
899                 device_printf(sc->dev, "Cannot allocate interrupt (%x, %u)\n",
900                               intr_flag, sc->irq_rid);
901                 err = EINVAL;
902                 sc->irq_rid = 0;
903         } else {
904                 err = bus_setup_intr(sc->dev, sc->irq_res,
905                     INTR_MPSAFE | INTR_TYPE_NET, NULL,
906                     sc->cxgb_intr, sc, &sc->intr_tag);
907
908                 if (err) {
909                         device_printf(sc->dev,
910                                       "Cannot set up interrupt (%x, %u, %d)\n",
911                                       intr_flag, sc->irq_rid, err);
912                         bus_release_resource(sc->dev, SYS_RES_IRQ, sc->irq_rid,
913                                              sc->irq_res);
914                         sc->irq_res = sc->intr_tag = NULL;
915                         sc->irq_rid = 0;
916                 }
917         }
918
919         /* That's all for INTx or MSI */
920         if (!(intr_flag & USING_MSIX) || err)
921                 return (err);
922
923         bus_describe_intr(sc->dev, sc->irq_res, sc->intr_tag, "err");
924         for (i = 0; i < sc->msi_count - 1; i++) {
925                 rid = i + 2;
926                 res = bus_alloc_resource_any(sc->dev, SYS_RES_IRQ, &rid,
927                                              RF_SHAREABLE | RF_ACTIVE);
928                 if (res == NULL) {
929                         device_printf(sc->dev, "Cannot allocate interrupt "
930                                       "for message %d\n", rid);
931                         err = EINVAL;
932                         break;
933                 }
934
935                 err = bus_setup_intr(sc->dev, res, INTR_MPSAFE | INTR_TYPE_NET,
936                                      NULL, t3_intr_msix, &sc->sge.qs[i], &tag);
937                 if (err) {
938                         device_printf(sc->dev, "Cannot set up interrupt "
939                                       "for message %d (%d)\n", rid, err);
940                         bus_release_resource(sc->dev, SYS_RES_IRQ, rid, res);
941                         break;
942                 }
943
944                 sc->msix_irq_rid[i] = rid;
945                 sc->msix_irq_res[i] = res;
946                 sc->msix_intr_tag[i] = tag;
947                 bus_describe_intr(sc->dev, res, tag, "qs%d", i);
948         }
949
950         if (err)
951                 cxgb_teardown_interrupts(sc);
952
953         return (err);
954 }
955
956
957 static int
958 cxgb_port_probe(device_t dev)
959 {
960         struct port_info *p;
961         char buf[80];
962         const char *desc;
963         
964         p = device_get_softc(dev);
965         desc = p->phy.desc;
966         snprintf(buf, sizeof(buf), "Port %d %s", p->port_id, desc);
967         device_set_desc_copy(dev, buf);
968         return (0);
969 }
970
971
972 static int
973 cxgb_makedev(struct port_info *pi)
974 {
975         
976         pi->port_cdev = make_dev(&cxgb_cdevsw, pi->ifp->if_dunit,
977             UID_ROOT, GID_WHEEL, 0600, "%s", if_name(pi->ifp));
978         
979         if (pi->port_cdev == NULL)
980                 return (ENOMEM);
981
982         pi->port_cdev->si_drv1 = (void *)pi;
983         
984         return (0);
985 }
986
987 #define CXGB_CAP (IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_MTU | IFCAP_HWCSUM | \
988     IFCAP_VLAN_HWCSUM | IFCAP_TSO | IFCAP_JUMBO_MTU | IFCAP_LRO | \
989     IFCAP_VLAN_HWTSO | IFCAP_LINKSTATE)
990 #define CXGB_CAP_ENABLE (CXGB_CAP & ~IFCAP_TSO6)
991
992 static int
993 cxgb_port_attach(device_t dev)
994 {
995         struct port_info *p;
996         struct ifnet *ifp;
997         int err;
998         struct adapter *sc;
999
1000         p = device_get_softc(dev);
1001         sc = p->adapter;
1002         snprintf(p->lockbuf, PORT_NAME_LEN, "cxgb port lock %d:%d",
1003             device_get_unit(device_get_parent(dev)), p->port_id);
1004         PORT_LOCK_INIT(p, p->lockbuf);
1005
1006         callout_init(&p->link_check_ch, CALLOUT_MPSAFE);
1007         TASK_INIT(&p->link_check_task, 0, check_link_status, p);
1008
1009         /* Allocate an ifnet object and set it up */
1010         ifp = p->ifp = if_alloc(IFT_ETHER);
1011         if (ifp == NULL) {
1012                 device_printf(dev, "Cannot allocate ifnet\n");
1013                 return (ENOMEM);
1014         }
1015         
1016         if_initname(ifp, device_get_name(dev), device_get_unit(dev));
1017         ifp->if_init = cxgb_init;
1018         ifp->if_softc = p;
1019         ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
1020         ifp->if_ioctl = cxgb_ioctl;
1021         ifp->if_transmit = cxgb_transmit;
1022         ifp->if_qflush = cxgb_qflush;
1023
1024         ifp->if_capabilities = CXGB_CAP;
1025         ifp->if_capenable = CXGB_CAP_ENABLE;
1026         ifp->if_hwassist = CSUM_TCP | CSUM_UDP | CSUM_IP | CSUM_TSO;
1027
1028         /*
1029          * Disable TSO on 4-port - it isn't supported by the firmware.
1030          */     
1031         if (sc->params.nports > 2) {
1032                 ifp->if_capabilities &= ~(IFCAP_TSO | IFCAP_VLAN_HWTSO);
1033                 ifp->if_capenable &= ~(IFCAP_TSO | IFCAP_VLAN_HWTSO);
1034                 ifp->if_hwassist &= ~CSUM_TSO;
1035         }
1036
1037         ether_ifattach(ifp, p->hw_addr);
1038
1039 #ifdef DEFAULT_JUMBO
1040         if (sc->params.nports <= 2)
1041                 ifp->if_mtu = ETHERMTU_JUMBO;
1042 #endif
1043         if ((err = cxgb_makedev(p)) != 0) {
1044                 printf("makedev failed %d\n", err);
1045                 return (err);
1046         }
1047
1048         /* Create a list of media supported by this port */
1049         ifmedia_init(&p->media, IFM_IMASK, cxgb_media_change,
1050             cxgb_media_status);
1051         cxgb_build_medialist(p);
1052       
1053         t3_sge_init_port(p);
1054
1055         return (err);
1056 }
1057
1058 /*
1059  * cxgb_port_detach() is called via the device_detach methods when
1060  * cxgb_free() calls the bus_generic_detach.  It is responsible for 
1061  * removing the device from the view of the kernel, i.e. from all 
1062  * interfaces lists etc.  This routine is only called when the driver is 
1063  * being unloaded, not when the link goes down.
1064  */
1065 static int
1066 cxgb_port_detach(device_t dev)
1067 {
1068         struct port_info *p;
1069         struct adapter *sc;
1070         int i;
1071
1072         p = device_get_softc(dev);
1073         sc = p->adapter;
1074
1075         /* Tell cxgb_ioctl and if_init that the port is going away */
1076         ADAPTER_LOCK(sc);
1077         SET_DOOMED(p);
1078         wakeup(&sc->flags);
1079         while (IS_BUSY(sc))
1080                 mtx_sleep(&sc->flags, &sc->lock, 0, "cxgbdtch", 0);
1081         SET_BUSY(sc);
1082         ADAPTER_UNLOCK(sc);
1083
1084         if (p->port_cdev != NULL)
1085                 destroy_dev(p->port_cdev);
1086
1087         cxgb_uninit_synchronized(p);
1088         ether_ifdetach(p->ifp);
1089
1090         for (i = p->first_qset; i < p->first_qset + p->nqsets; i++) {
1091                 struct sge_qset *qs = &sc->sge.qs[i];
1092                 struct sge_txq *txq = &qs->txq[TXQ_ETH];
1093
1094                 callout_drain(&txq->txq_watchdog);
1095                 callout_drain(&txq->txq_timer);
1096         }
1097
1098         PORT_LOCK_DEINIT(p);
1099         if_free(p->ifp);
1100         p->ifp = NULL;
1101
1102         ADAPTER_LOCK(sc);
1103         CLR_BUSY(sc);
1104         wakeup_one(&sc->flags);
1105         ADAPTER_UNLOCK(sc);
1106         return (0);
1107 }
1108
1109 void
1110 t3_fatal_err(struct adapter *sc)
1111 {
1112         u_int fw_status[4];
1113
1114         if (sc->flags & FULL_INIT_DONE) {
1115                 t3_sge_stop(sc);
1116                 t3_write_reg(sc, A_XGM_TX_CTRL, 0);
1117                 t3_write_reg(sc, A_XGM_RX_CTRL, 0);
1118                 t3_write_reg(sc, XGM_REG(A_XGM_TX_CTRL, 1), 0);
1119                 t3_write_reg(sc, XGM_REG(A_XGM_RX_CTRL, 1), 0);
1120                 t3_intr_disable(sc);
1121         }
1122         device_printf(sc->dev,"encountered fatal error, operation suspended\n");
1123         if (!t3_cim_ctl_blk_read(sc, 0xa0, 4, fw_status))
1124                 device_printf(sc->dev, "FW_ status: 0x%x, 0x%x, 0x%x, 0x%x\n",
1125                     fw_status[0], fw_status[1], fw_status[2], fw_status[3]);
1126 }
1127
1128 int
1129 t3_os_find_pci_capability(adapter_t *sc, int cap)
1130 {
1131         device_t dev;
1132         struct pci_devinfo *dinfo;
1133         pcicfgregs *cfg;
1134         uint32_t status;
1135         uint8_t ptr;
1136
1137         dev = sc->dev;
1138         dinfo = device_get_ivars(dev);
1139         cfg = &dinfo->cfg;
1140
1141         status = pci_read_config(dev, PCIR_STATUS, 2);
1142         if (!(status & PCIM_STATUS_CAPPRESENT))
1143                 return (0);
1144
1145         switch (cfg->hdrtype & PCIM_HDRTYPE) {
1146         case 0:
1147         case 1:
1148                 ptr = PCIR_CAP_PTR;
1149                 break;
1150         case 2:
1151                 ptr = PCIR_CAP_PTR_2;
1152                 break;
1153         default:
1154                 return (0);
1155                 break;
1156         }
1157         ptr = pci_read_config(dev, ptr, 1);
1158
1159         while (ptr != 0) {
1160                 if (pci_read_config(dev, ptr + PCICAP_ID, 1) == cap)
1161                         return (ptr);
1162                 ptr = pci_read_config(dev, ptr + PCICAP_NEXTPTR, 1);
1163         }
1164
1165         return (0);
1166 }
1167
1168 int
1169 t3_os_pci_save_state(struct adapter *sc)
1170 {
1171         device_t dev;
1172         struct pci_devinfo *dinfo;
1173
1174         dev = sc->dev;
1175         dinfo = device_get_ivars(dev);
1176
1177         pci_cfg_save(dev, dinfo, 0);
1178         return (0);
1179 }
1180
1181 int
1182 t3_os_pci_restore_state(struct adapter *sc)
1183 {
1184         device_t dev;
1185         struct pci_devinfo *dinfo;
1186
1187         dev = sc->dev;
1188         dinfo = device_get_ivars(dev);
1189
1190         pci_cfg_restore(dev, dinfo);
1191         return (0);
1192 }
1193
1194 /**
1195  *      t3_os_link_changed - handle link status changes
1196  *      @sc: the adapter associated with the link change
1197  *      @port_id: the port index whose link status has changed
1198  *      @link_status: the new status of the link
1199  *      @speed: the new speed setting
1200  *      @duplex: the new duplex setting
1201  *      @fc: the new flow-control setting
1202  *
1203  *      This is the OS-dependent handler for link status changes.  The OS
1204  *      neutral handler takes care of most of the processing for these events,
1205  *      then calls this handler for any OS-specific processing.
1206  */
1207 void
1208 t3_os_link_changed(adapter_t *adapter, int port_id, int link_status, int speed,
1209      int duplex, int fc, int mac_was_reset)
1210 {
1211         struct port_info *pi = &adapter->port[port_id];
1212         struct ifnet *ifp = pi->ifp;
1213
1214         /* no race with detach, so ifp should always be good */
1215         KASSERT(ifp, ("%s: if detached.", __func__));
1216
1217         /* Reapply mac settings if they were lost due to a reset */
1218         if (mac_was_reset) {
1219                 PORT_LOCK(pi);
1220                 cxgb_update_mac_settings(pi);
1221                 PORT_UNLOCK(pi);
1222         }
1223
1224         if (link_status) {
1225                 ifp->if_baudrate = IF_Mbps(speed);
1226                 if_link_state_change(ifp, LINK_STATE_UP);
1227         } else
1228                 if_link_state_change(ifp, LINK_STATE_DOWN);
1229 }
1230
1231 /**
1232  *      t3_os_phymod_changed - handle PHY module changes
1233  *      @phy: the PHY reporting the module change
1234  *      @mod_type: new module type
1235  *
1236  *      This is the OS-dependent handler for PHY module changes.  It is
1237  *      invoked when a PHY module is removed or inserted for any OS-specific
1238  *      processing.
1239  */
1240 void t3_os_phymod_changed(struct adapter *adap, int port_id)
1241 {
1242         static const char *mod_str[] = {
1243                 NULL, "SR", "LR", "LRM", "TWINAX", "TWINAX-L", "unknown"
1244         };
1245         struct port_info *pi = &adap->port[port_id];
1246         int mod = pi->phy.modtype;
1247
1248         if (mod != pi->media.ifm_cur->ifm_data)
1249                 cxgb_build_medialist(pi);
1250
1251         if (mod == phy_modtype_none)
1252                 if_printf(pi->ifp, "PHY module unplugged\n");
1253         else {
1254                 KASSERT(mod < ARRAY_SIZE(mod_str),
1255                         ("invalid PHY module type %d", mod));
1256                 if_printf(pi->ifp, "%s PHY module inserted\n", mod_str[mod]);
1257         }
1258 }
1259
1260 void
1261 t3_os_set_hw_addr(adapter_t *adapter, int port_idx, u8 hw_addr[])
1262 {
1263
1264         /*
1265          * The ifnet might not be allocated before this gets called,
1266          * as this is called early on in attach by t3_prep_adapter
1267          * save the address off in the port structure
1268          */
1269         if (cxgb_debug)
1270                 printf("set_hw_addr on idx %d addr %6D\n", port_idx, hw_addr, ":");
1271         bcopy(hw_addr, adapter->port[port_idx].hw_addr, ETHER_ADDR_LEN);
1272 }
1273
1274 /*
1275  * Programs the XGMAC based on the settings in the ifnet.  These settings
1276  * include MTU, MAC address, mcast addresses, etc.
1277  */
1278 static void
1279 cxgb_update_mac_settings(struct port_info *p)
1280 {
1281         struct ifnet *ifp = p->ifp;
1282         struct t3_rx_mode rm;
1283         struct cmac *mac = &p->mac;
1284         int mtu, hwtagging;
1285
1286         PORT_LOCK_ASSERT_OWNED(p);
1287
1288         bcopy(IF_LLADDR(ifp), p->hw_addr, ETHER_ADDR_LEN);
1289
1290         mtu = ifp->if_mtu;
1291         if (ifp->if_capenable & IFCAP_VLAN_MTU)
1292                 mtu += ETHER_VLAN_ENCAP_LEN;
1293
1294         hwtagging = (ifp->if_capenable & IFCAP_VLAN_HWTAGGING) != 0;
1295
1296         t3_mac_set_mtu(mac, mtu);
1297         t3_set_vlan_accel(p->adapter, 1 << p->tx_chan, hwtagging);
1298         t3_mac_set_address(mac, 0, p->hw_addr);
1299         t3_init_rx_mode(&rm, p);
1300         t3_mac_set_rx_mode(mac, &rm);
1301 }
1302
1303
1304 static int
1305 await_mgmt_replies(struct adapter *adap, unsigned long init_cnt,
1306                               unsigned long n)
1307 {
1308         int attempts = 5;
1309
1310         while (adap->sge.qs[0].rspq.offload_pkts < init_cnt + n) {
1311                 if (!--attempts)
1312                         return (ETIMEDOUT);
1313                 t3_os_sleep(10);
1314         }
1315         return 0;
1316 }
1317
1318 static int
1319 init_tp_parity(struct adapter *adap)
1320 {
1321         int i;
1322         struct mbuf *m;
1323         struct cpl_set_tcb_field *greq;
1324         unsigned long cnt = adap->sge.qs[0].rspq.offload_pkts;
1325
1326         t3_tp_set_offload_mode(adap, 1);
1327
1328         for (i = 0; i < 16; i++) {
1329                 struct cpl_smt_write_req *req;
1330
1331                 m = m_gethdr(M_WAITOK, MT_DATA);
1332                 req = mtod(m, struct cpl_smt_write_req *);
1333                 m->m_len = m->m_pkthdr.len = sizeof(*req);
1334                 memset(req, 0, sizeof(*req));
1335                 req->wr.wrh_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD));
1336                 OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_SMT_WRITE_REQ, i));
1337                 req->iff = i;
1338                 t3_mgmt_tx(adap, m);
1339         }
1340
1341         for (i = 0; i < 2048; i++) {
1342                 struct cpl_l2t_write_req *req;
1343
1344                 m = m_gethdr(M_WAITOK, MT_DATA);
1345                 req = mtod(m, struct cpl_l2t_write_req *);
1346                 m->m_len = m->m_pkthdr.len = sizeof(*req);
1347                 memset(req, 0, sizeof(*req));
1348                 req->wr.wrh_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD));
1349                 OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_L2T_WRITE_REQ, i));
1350                 req->params = htonl(V_L2T_W_IDX(i));
1351                 t3_mgmt_tx(adap, m);
1352         }
1353
1354         for (i = 0; i < 2048; i++) {
1355                 struct cpl_rte_write_req *req;
1356
1357                 m = m_gethdr(M_WAITOK, MT_DATA);
1358                 req = mtod(m, struct cpl_rte_write_req *);
1359                 m->m_len = m->m_pkthdr.len = sizeof(*req);
1360                 memset(req, 0, sizeof(*req));
1361                 req->wr.wrh_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD));
1362                 OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_RTE_WRITE_REQ, i));
1363                 req->l2t_idx = htonl(V_L2T_W_IDX(i));
1364                 t3_mgmt_tx(adap, m);
1365         }
1366
1367         m = m_gethdr(M_WAITOK, MT_DATA);
1368         greq = mtod(m, struct cpl_set_tcb_field *);
1369         m->m_len = m->m_pkthdr.len = sizeof(*greq);
1370         memset(greq, 0, sizeof(*greq));
1371         greq->wr.wrh_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD));
1372         OPCODE_TID(greq) = htonl(MK_OPCODE_TID(CPL_SET_TCB_FIELD, 0));
1373         greq->mask = htobe64(1);
1374         t3_mgmt_tx(adap, m);
1375
1376         i = await_mgmt_replies(adap, cnt, 16 + 2048 + 2048 + 1);
1377         t3_tp_set_offload_mode(adap, 0);
1378         return (i);
1379 }
1380
1381 /**
1382  *      setup_rss - configure Receive Side Steering (per-queue connection demux) 
1383  *      @adap: the adapter
1384  *
1385  *      Sets up RSS to distribute packets to multiple receive queues.  We
1386  *      configure the RSS CPU lookup table to distribute to the number of HW
1387  *      receive queues, and the response queue lookup table to narrow that
1388  *      down to the response queues actually configured for each port.
1389  *      We always configure the RSS mapping for two ports since the mapping
1390  *      table has plenty of entries.
1391  */
1392 static void
1393 setup_rss(adapter_t *adap)
1394 {
1395         int i;
1396         u_int nq[2]; 
1397         uint8_t cpus[SGE_QSETS + 1];
1398         uint16_t rspq_map[RSS_TABLE_SIZE];
1399         
1400         for (i = 0; i < SGE_QSETS; ++i)
1401                 cpus[i] = i;
1402         cpus[SGE_QSETS] = 0xff;
1403
1404         nq[0] = nq[1] = 0;
1405         for_each_port(adap, i) {
1406                 const struct port_info *pi = adap2pinfo(adap, i);
1407
1408                 nq[pi->tx_chan] += pi->nqsets;
1409         }
1410         for (i = 0; i < RSS_TABLE_SIZE / 2; ++i) {
1411                 rspq_map[i] = nq[0] ? i % nq[0] : 0;
1412                 rspq_map[i + RSS_TABLE_SIZE / 2] = nq[1] ? i % nq[1] + nq[0] : 0;
1413         }
1414
1415         /* Calculate the reverse RSS map table */
1416         for (i = 0; i < SGE_QSETS; ++i)
1417                 adap->rrss_map[i] = 0xff;
1418         for (i = 0; i < RSS_TABLE_SIZE; ++i)
1419                 if (adap->rrss_map[rspq_map[i]] == 0xff)
1420                         adap->rrss_map[rspq_map[i]] = i;
1421
1422         t3_config_rss(adap, F_RQFEEDBACKENABLE | F_TNLLKPEN | F_TNLMAPEN |
1423                       F_TNLPRTEN | F_TNL2TUPEN | F_TNL4TUPEN | F_OFDMAPEN |
1424                       F_RRCPLMAPEN | V_RRCPLCPUSIZE(6) | F_HASHTOEPLITZ,
1425                       cpus, rspq_map);
1426
1427 }
1428
1429 /*
1430  * Sends an mbuf to an offload queue driver
1431  * after dealing with any active network taps.
1432  */
1433 static inline int
1434 offload_tx(struct t3cdev *tdev, struct mbuf *m)
1435 {
1436         int ret;
1437
1438         ret = t3_offload_tx(tdev, m);
1439         return (ret);
1440 }
1441
1442 static int
1443 write_smt_entry(struct adapter *adapter, int idx)
1444 {
1445         struct port_info *pi = &adapter->port[idx];
1446         struct cpl_smt_write_req *req;
1447         struct mbuf *m;
1448
1449         if ((m = m_gethdr(M_NOWAIT, MT_DATA)) == NULL)
1450                 return (ENOMEM);
1451
1452         req = mtod(m, struct cpl_smt_write_req *);
1453         m->m_pkthdr.len = m->m_len = sizeof(struct cpl_smt_write_req);
1454         
1455         req->wr.wrh_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD));
1456         OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_SMT_WRITE_REQ, idx));
1457         req->mtu_idx = NMTUS - 1;  /* should be 0 but there's a T3 bug */
1458         req->iff = idx;
1459         memset(req->src_mac1, 0, sizeof(req->src_mac1));
1460         memcpy(req->src_mac0, pi->hw_addr, ETHER_ADDR_LEN);
1461
1462         m_set_priority(m, 1);
1463
1464         offload_tx(&adapter->tdev, m);
1465
1466         return (0);
1467 }
1468
1469 static int
1470 init_smt(struct adapter *adapter)
1471 {
1472         int i;
1473
1474         for_each_port(adapter, i)
1475                 write_smt_entry(adapter, i);
1476         return 0;
1477 }
1478
1479 static void
1480 init_port_mtus(adapter_t *adapter)
1481 {
1482         unsigned int mtus = ETHERMTU | (ETHERMTU << 16);
1483
1484         t3_write_reg(adapter, A_TP_MTU_PORT_TABLE, mtus);
1485 }
1486
1487 static void
1488 send_pktsched_cmd(struct adapter *adap, int sched, int qidx, int lo,
1489                               int hi, int port)
1490 {
1491         struct mbuf *m;
1492         struct mngt_pktsched_wr *req;
1493
1494         m = m_gethdr(M_DONTWAIT, MT_DATA);
1495         if (m) {        
1496                 req = mtod(m, struct mngt_pktsched_wr *);
1497                 req->wr.wrh_hi = htonl(V_WR_OP(FW_WROPCODE_MNGT));
1498                 req->mngt_opcode = FW_MNGTOPCODE_PKTSCHED_SET;
1499                 req->sched = sched;
1500                 req->idx = qidx;
1501                 req->min = lo;
1502                 req->max = hi;
1503                 req->binding = port;
1504                 m->m_len = m->m_pkthdr.len = sizeof(*req);
1505                 t3_mgmt_tx(adap, m);
1506         }
1507 }
1508
1509 static void
1510 bind_qsets(adapter_t *sc)
1511 {
1512         int i, j;
1513
1514         for (i = 0; i < (sc)->params.nports; ++i) {
1515                 const struct port_info *pi = adap2pinfo(sc, i);
1516
1517                 for (j = 0; j < pi->nqsets; ++j) {
1518                         send_pktsched_cmd(sc, 1, pi->first_qset + j, -1,
1519                                           -1, pi->tx_chan);
1520
1521                 }
1522         }
1523 }
1524
1525 static void
1526 update_tpeeprom(struct adapter *adap)
1527 {
1528         const struct firmware *tpeeprom;
1529
1530         uint32_t version;
1531         unsigned int major, minor;
1532         int ret, len;
1533         char rev, name[32];
1534
1535         t3_seeprom_read(adap, TP_SRAM_OFFSET, &version);
1536
1537         major = G_TP_VERSION_MAJOR(version);
1538         minor = G_TP_VERSION_MINOR(version);
1539         if (major == TP_VERSION_MAJOR  && minor == TP_VERSION_MINOR)
1540                 return; 
1541
1542         rev = t3rev2char(adap);
1543         snprintf(name, sizeof(name), TPEEPROM_NAME, rev);
1544
1545         tpeeprom = firmware_get(name);
1546         if (tpeeprom == NULL) {
1547                 device_printf(adap->dev,
1548                               "could not load TP EEPROM: unable to load %s\n",
1549                               name);
1550                 return;
1551         }
1552
1553         len = tpeeprom->datasize - 4;
1554         
1555         ret = t3_check_tpsram(adap, tpeeprom->data, tpeeprom->datasize);
1556         if (ret)
1557                 goto release_tpeeprom;
1558
1559         if (len != TP_SRAM_LEN) {
1560                 device_printf(adap->dev,
1561                               "%s length is wrong len=%d expected=%d\n", name,
1562                               len, TP_SRAM_LEN);
1563                 return;
1564         }
1565         
1566         ret = set_eeprom(&adap->port[0], tpeeprom->data, tpeeprom->datasize,
1567             TP_SRAM_OFFSET);
1568         
1569         if (!ret) {
1570                 device_printf(adap->dev,
1571                         "Protocol SRAM image updated in EEPROM to %d.%d.%d\n",
1572                          TP_VERSION_MAJOR, TP_VERSION_MINOR, TP_VERSION_MICRO);
1573         } else 
1574                 device_printf(adap->dev,
1575                               "Protocol SRAM image update in EEPROM failed\n");
1576
1577 release_tpeeprom:
1578         firmware_put(tpeeprom, FIRMWARE_UNLOAD);
1579         
1580         return;
1581 }
1582
1583 static int
1584 update_tpsram(struct adapter *adap)
1585 {
1586         const struct firmware *tpsram;
1587         int ret;
1588         char rev, name[32];
1589
1590         rev = t3rev2char(adap);
1591         snprintf(name, sizeof(name), TPSRAM_NAME, rev);
1592
1593         update_tpeeprom(adap);
1594
1595         tpsram = firmware_get(name);
1596         if (tpsram == NULL){
1597                 device_printf(adap->dev, "could not load TP SRAM\n");
1598                 return (EINVAL);
1599         } else
1600                 device_printf(adap->dev, "updating TP SRAM\n");
1601         
1602         ret = t3_check_tpsram(adap, tpsram->data, tpsram->datasize);
1603         if (ret)
1604                 goto release_tpsram;    
1605
1606         ret = t3_set_proto_sram(adap, tpsram->data);
1607         if (ret)
1608                 device_printf(adap->dev, "loading protocol SRAM failed\n");
1609
1610 release_tpsram:
1611         firmware_put(tpsram, FIRMWARE_UNLOAD);
1612         
1613         return ret;
1614 }
1615
1616 /**
1617  *      cxgb_up - enable the adapter
1618  *      @adap: adapter being enabled
1619  *
1620  *      Called when the first port is enabled, this function performs the
1621  *      actions necessary to make an adapter operational, such as completing
1622  *      the initialization of HW modules, and enabling interrupts.
1623  */
1624 static int
1625 cxgb_up(struct adapter *sc)
1626 {
1627         int err = 0;
1628         unsigned int mxf = t3_mc5_size(&sc->mc5) - MC5_MIN_TIDS;
1629
1630         KASSERT(sc->open_device_map == 0, ("%s: device(s) already open (%x)",
1631                                            __func__, sc->open_device_map));
1632
1633         if ((sc->flags & FULL_INIT_DONE) == 0) {
1634
1635                 ADAPTER_LOCK_ASSERT_NOTOWNED(sc);
1636
1637                 if ((sc->flags & FW_UPTODATE) == 0)
1638                         if ((err = upgrade_fw(sc)))
1639                                 goto out;
1640
1641                 if ((sc->flags & TPS_UPTODATE) == 0)
1642                         if ((err = update_tpsram(sc)))
1643                                 goto out;
1644
1645                 if (is_offload(sc) && nfilters != 0) {
1646                         sc->params.mc5.nservers = 0;
1647
1648                         if (nfilters < 0)
1649                                 sc->params.mc5.nfilters = mxf;
1650                         else
1651                                 sc->params.mc5.nfilters = min(nfilters, mxf);
1652                 }
1653
1654                 err = t3_init_hw(sc, 0);
1655                 if (err)
1656                         goto out;
1657
1658                 t3_set_reg_field(sc, A_TP_PARA_REG5, 0, F_RXDDPOFFINIT);
1659                 t3_write_reg(sc, A_ULPRX_TDDP_PSZ, V_HPZ0(PAGE_SHIFT - 12));
1660
1661                 err = setup_sge_qsets(sc);
1662                 if (err)
1663                         goto out;
1664
1665                 alloc_filters(sc);
1666                 setup_rss(sc);
1667
1668                 t3_add_configured_sysctls(sc);
1669                 sc->flags |= FULL_INIT_DONE;
1670         }
1671
1672         t3_intr_clear(sc);
1673         t3_sge_start(sc);
1674         t3_intr_enable(sc);
1675
1676         if (sc->params.rev >= T3_REV_C && !(sc->flags & TP_PARITY_INIT) &&
1677             is_offload(sc) && init_tp_parity(sc) == 0)
1678                 sc->flags |= TP_PARITY_INIT;
1679
1680         if (sc->flags & TP_PARITY_INIT) {
1681                 t3_write_reg(sc, A_TP_INT_CAUSE, F_CMCACHEPERR | F_ARPLUTPERR);
1682                 t3_write_reg(sc, A_TP_INT_ENABLE, 0x7fbfffff);
1683         }
1684         
1685         if (!(sc->flags & QUEUES_BOUND)) {
1686                 bind_qsets(sc);
1687                 setup_hw_filters(sc);
1688                 sc->flags |= QUEUES_BOUND;              
1689         }
1690
1691         t3_sge_reset_adapter(sc);
1692 out:
1693         return (err);
1694 }
1695
1696 /*
1697  * Called when the last open device is closed.  Does NOT undo all of cxgb_up's
1698  * work.  Specifically, the resources grabbed under FULL_INIT_DONE are released
1699  * during controller_detach, not here.
1700  */
1701 static void
1702 cxgb_down(struct adapter *sc)
1703 {
1704         t3_sge_stop(sc);
1705         t3_intr_disable(sc);
1706 }
1707
1708 static int
1709 offload_open(struct port_info *pi)
1710 {
1711         struct adapter *sc = pi->adapter;
1712         struct t3cdev *tdev = &sc->tdev;
1713
1714         setbit(&sc->open_device_map, OFFLOAD_DEVMAP_BIT);
1715
1716         t3_tp_set_offload_mode(sc, 1);
1717         tdev->lldev = pi->ifp;
1718         init_port_mtus(sc);
1719         t3_load_mtus(sc, sc->params.mtus, sc->params.a_wnd, sc->params.b_wnd,
1720                      sc->params.rev == 0 ?  sc->port[0].ifp->if_mtu : 0xffff);
1721         init_smt(sc);
1722         cxgb_add_clients(tdev);
1723
1724         return (0);
1725 }
1726
1727 static int
1728 offload_close(struct t3cdev *tdev)
1729 {
1730         struct adapter *adapter = tdev2adap(tdev);
1731
1732         if (!isset(&adapter->open_device_map, OFFLOAD_DEVMAP_BIT))
1733                 return (0);
1734
1735         /* Call back all registered clients */
1736         cxgb_remove_clients(tdev);
1737
1738         tdev->lldev = NULL;
1739         cxgb_set_dummy_ops(tdev);
1740         t3_tp_set_offload_mode(adapter, 0);
1741
1742         clrbit(&adapter->open_device_map, OFFLOAD_DEVMAP_BIT);
1743
1744         return (0);
1745 }
1746
1747 /*
1748  * if_init for cxgb ports.
1749  */
1750 static void
1751 cxgb_init(void *arg)
1752 {
1753         struct port_info *p = arg;
1754         struct adapter *sc = p->adapter;
1755
1756         ADAPTER_LOCK(sc);
1757         cxgb_init_locked(p); /* releases adapter lock */
1758         ADAPTER_LOCK_ASSERT_NOTOWNED(sc);
1759 }
1760
1761 static int
1762 cxgb_init_locked(struct port_info *p)
1763 {
1764         struct adapter *sc = p->adapter;
1765         struct ifnet *ifp = p->ifp;
1766         struct cmac *mac = &p->mac;
1767         int i, rc = 0, may_sleep = 0, gave_up_lock = 0;
1768
1769         ADAPTER_LOCK_ASSERT_OWNED(sc);
1770
1771         while (!IS_DOOMED(p) && IS_BUSY(sc)) {
1772                 gave_up_lock = 1;
1773                 if (mtx_sleep(&sc->flags, &sc->lock, PCATCH, "cxgbinit", 0)) {
1774                         rc = EINTR;
1775                         goto done;
1776                 }
1777         }
1778         if (IS_DOOMED(p)) {
1779                 rc = ENXIO;
1780                 goto done;
1781         }
1782         KASSERT(!IS_BUSY(sc), ("%s: controller busy.", __func__));
1783
1784         /*
1785          * The code that runs during one-time adapter initialization can sleep
1786          * so it's important not to hold any locks across it.
1787          */
1788         may_sleep = sc->flags & FULL_INIT_DONE ? 0 : 1;
1789
1790         if (may_sleep) {
1791                 SET_BUSY(sc);
1792                 gave_up_lock = 1;
1793                 ADAPTER_UNLOCK(sc);
1794         }
1795
1796         if (sc->open_device_map == 0) {
1797                 if ((rc = cxgb_up(sc)) != 0)
1798                         goto done;
1799
1800                 if (is_offload(sc) && !ofld_disable && offload_open(p))
1801                         log(LOG_WARNING,
1802                             "Could not initialize offload capabilities\n");
1803         }
1804
1805         PORT_LOCK(p);
1806         if (isset(&sc->open_device_map, p->port_id) &&
1807             (ifp->if_drv_flags & IFF_DRV_RUNNING)) {
1808                 PORT_UNLOCK(p);
1809                 goto done;
1810         }
1811         t3_port_intr_enable(sc, p->port_id);
1812         if (!mac->multiport) 
1813                 t3_mac_init(mac);
1814         cxgb_update_mac_settings(p);
1815         t3_link_start(&p->phy, mac, &p->link_config);
1816         t3_mac_enable(mac, MAC_DIRECTION_RX | MAC_DIRECTION_TX);
1817         ifp->if_drv_flags |= IFF_DRV_RUNNING;
1818         ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
1819         PORT_UNLOCK(p);
1820
1821         for (i = p->first_qset; i < p->first_qset + p->nqsets; i++) {
1822                 struct sge_qset *qs = &sc->sge.qs[i];
1823                 struct sge_txq *txq = &qs->txq[TXQ_ETH];
1824
1825                 callout_reset_on(&txq->txq_watchdog, hz, cxgb_tx_watchdog, qs,
1826                                  txq->txq_watchdog.c_cpu);
1827         }
1828
1829         /* all ok */
1830         setbit(&sc->open_device_map, p->port_id);
1831         callout_reset(&p->link_check_ch,
1832             p->phy.caps & SUPPORTED_LINK_IRQ ?  hz * 3 : hz / 4,
1833             link_check_callout, p);
1834
1835 done:
1836         if (may_sleep) {
1837                 ADAPTER_LOCK(sc);
1838                 KASSERT(IS_BUSY(sc), ("%s: controller not busy.", __func__));
1839                 CLR_BUSY(sc);
1840         }
1841         if (gave_up_lock)
1842                 wakeup_one(&sc->flags);
1843         ADAPTER_UNLOCK(sc);
1844         return (rc);
1845 }
1846
1847 static int
1848 cxgb_uninit_locked(struct port_info *p)
1849 {
1850         struct adapter *sc = p->adapter;
1851         int rc;
1852
1853         ADAPTER_LOCK_ASSERT_OWNED(sc);
1854
1855         while (!IS_DOOMED(p) && IS_BUSY(sc)) {
1856                 if (mtx_sleep(&sc->flags, &sc->lock, PCATCH, "cxgbunin", 0)) {
1857                         rc = EINTR;
1858                         goto done;
1859                 }
1860         }
1861         if (IS_DOOMED(p)) {
1862                 rc = ENXIO;
1863                 goto done;
1864         }
1865         KASSERT(!IS_BUSY(sc), ("%s: controller busy.", __func__));
1866         SET_BUSY(sc);
1867         ADAPTER_UNLOCK(sc);
1868
1869         rc = cxgb_uninit_synchronized(p);
1870
1871         ADAPTER_LOCK(sc);
1872         KASSERT(IS_BUSY(sc), ("%s: controller not busy.", __func__));
1873         CLR_BUSY(sc);
1874         wakeup_one(&sc->flags);
1875 done:
1876         ADAPTER_UNLOCK(sc);
1877         return (rc);
1878 }
1879
1880 /*
1881  * Called on "ifconfig down", and from port_detach
1882  */
1883 static int
1884 cxgb_uninit_synchronized(struct port_info *pi)
1885 {
1886         struct adapter *sc = pi->adapter;
1887         struct ifnet *ifp = pi->ifp;
1888
1889         /*
1890          * taskqueue_drain may cause a deadlock if the adapter lock is held.
1891          */
1892         ADAPTER_LOCK_ASSERT_NOTOWNED(sc);
1893
1894         /*
1895          * Clear this port's bit from the open device map, and then drain all
1896          * the tasks that can access/manipulate this port's port_info or ifp.
1897          * We disable this port's interrupts here and so the slow/ext
1898          * interrupt tasks won't be enqueued.  The tick task will continue to
1899          * be enqueued every second but the runs after this drain will not see
1900          * this port in the open device map.
1901          *
1902          * A well behaved task must take open_device_map into account and ignore
1903          * ports that are not open.
1904          */
1905         clrbit(&sc->open_device_map, pi->port_id);
1906         t3_port_intr_disable(sc, pi->port_id);
1907         taskqueue_drain(sc->tq, &sc->slow_intr_task);
1908         taskqueue_drain(sc->tq, &sc->tick_task);
1909
1910         callout_drain(&pi->link_check_ch);
1911         taskqueue_drain(sc->tq, &pi->link_check_task);
1912
1913         PORT_LOCK(pi);
1914         ifp->if_drv_flags &= ~(IFF_DRV_RUNNING | IFF_DRV_OACTIVE);
1915
1916         /* disable pause frames */
1917         t3_set_reg_field(sc, A_XGM_TX_CFG + pi->mac.offset, F_TXPAUSEEN, 0);
1918
1919         /* Reset RX FIFO HWM */
1920         t3_set_reg_field(sc, A_XGM_RXFIFO_CFG +  pi->mac.offset,
1921                          V_RXFIFOPAUSEHWM(M_RXFIFOPAUSEHWM), 0);
1922
1923         DELAY(100 * 1000);
1924
1925         /* Wait for TXFIFO empty */
1926         t3_wait_op_done(sc, A_XGM_TXFIFO_CFG + pi->mac.offset,
1927                         F_TXFIFO_EMPTY, 1, 20, 5);
1928
1929         DELAY(100 * 1000);
1930         t3_mac_disable(&pi->mac, MAC_DIRECTION_RX);
1931
1932
1933         pi->phy.ops->power_down(&pi->phy, 1);
1934
1935         PORT_UNLOCK(pi);
1936
1937         pi->link_config.link_ok = 0;
1938         t3_os_link_changed(sc, pi->port_id, 0, 0, 0, 0, 0);
1939
1940         if ((sc->open_device_map & PORT_MASK) == 0)
1941                 offload_close(&sc->tdev);
1942
1943         if (sc->open_device_map == 0)
1944                 cxgb_down(pi->adapter);
1945
1946         return (0);
1947 }
1948
1949 /*
1950  * Mark lro enabled or disabled in all qsets for this port
1951  */
1952 static int
1953 cxgb_set_lro(struct port_info *p, int enabled)
1954 {
1955         int i;
1956         struct adapter *adp = p->adapter;
1957         struct sge_qset *q;
1958
1959         for (i = 0; i < p->nqsets; i++) {
1960                 q = &adp->sge.qs[p->first_qset + i];
1961                 q->lro.enabled = (enabled != 0);
1962         }
1963         return (0);
1964 }
1965
1966 static int
1967 cxgb_ioctl(struct ifnet *ifp, unsigned long command, caddr_t data)
1968 {
1969         struct port_info *p = ifp->if_softc;
1970         struct adapter *sc = p->adapter;
1971         struct ifreq *ifr = (struct ifreq *)data;
1972         int flags, error = 0, mtu;
1973         uint32_t mask;
1974
1975         switch (command) {
1976         case SIOCSIFMTU:
1977                 ADAPTER_LOCK(sc);
1978                 error = IS_DOOMED(p) ? ENXIO : (IS_BUSY(sc) ? EBUSY : 0);
1979                 if (error) {
1980 fail:
1981                         ADAPTER_UNLOCK(sc);
1982                         return (error);
1983                 }
1984
1985                 mtu = ifr->ifr_mtu;
1986                 if ((mtu < ETHERMIN) || (mtu > ETHERMTU_JUMBO)) {
1987                         error = EINVAL;
1988                 } else {
1989                         ifp->if_mtu = mtu;
1990                         PORT_LOCK(p);
1991                         cxgb_update_mac_settings(p);
1992                         PORT_UNLOCK(p);
1993                 }
1994                 ADAPTER_UNLOCK(sc);
1995                 break;
1996         case SIOCSIFFLAGS:
1997                 ADAPTER_LOCK(sc);
1998                 if (IS_DOOMED(p)) {
1999                         error = ENXIO;
2000                         goto fail;
2001                 }
2002                 if (ifp->if_flags & IFF_UP) {
2003                         if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
2004                                 flags = p->if_flags;
2005                                 if (((ifp->if_flags ^ flags) & IFF_PROMISC) ||
2006                                     ((ifp->if_flags ^ flags) & IFF_ALLMULTI)) {
2007                                         if (IS_BUSY(sc)) {
2008                                                 error = EBUSY;
2009                                                 goto fail;
2010                                         }
2011                                         PORT_LOCK(p);
2012                                         cxgb_update_mac_settings(p);
2013                                         PORT_UNLOCK(p);
2014                                 }
2015                                 ADAPTER_UNLOCK(sc);
2016                         } else
2017                                 error = cxgb_init_locked(p);
2018                         p->if_flags = ifp->if_flags;
2019                 } else if (ifp->if_drv_flags & IFF_DRV_RUNNING)
2020                         error = cxgb_uninit_locked(p);
2021                 else
2022                         ADAPTER_UNLOCK(sc);
2023
2024                 ADAPTER_LOCK_ASSERT_NOTOWNED(sc);
2025                 break;
2026         case SIOCADDMULTI:
2027         case SIOCDELMULTI:
2028                 ADAPTER_LOCK(sc);
2029                 error = IS_DOOMED(p) ? ENXIO : (IS_BUSY(sc) ? EBUSY : 0);
2030                 if (error)
2031                         goto fail;
2032
2033                 if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
2034                         PORT_LOCK(p);
2035                         cxgb_update_mac_settings(p);
2036                         PORT_UNLOCK(p);
2037                 }
2038                 ADAPTER_UNLOCK(sc);
2039
2040                 break;
2041         case SIOCSIFCAP:
2042                 ADAPTER_LOCK(sc);
2043                 error = IS_DOOMED(p) ? ENXIO : (IS_BUSY(sc) ? EBUSY : 0);
2044                 if (error)
2045                         goto fail;
2046
2047                 mask = ifr->ifr_reqcap ^ ifp->if_capenable;
2048                 if (mask & IFCAP_TXCSUM) {
2049                         ifp->if_capenable ^= IFCAP_TXCSUM;
2050                         ifp->if_hwassist ^= (CSUM_TCP | CSUM_UDP | CSUM_IP);
2051
2052                         if (IFCAP_TSO & ifp->if_capenable &&
2053                             !(IFCAP_TXCSUM & ifp->if_capenable)) {
2054                                 ifp->if_capenable &= ~IFCAP_TSO;
2055                                 ifp->if_hwassist &= ~CSUM_TSO;
2056                                 if_printf(ifp,
2057                                     "tso disabled due to -txcsum.\n");
2058                         }
2059                 }
2060                 if (mask & IFCAP_RXCSUM)
2061                         ifp->if_capenable ^= IFCAP_RXCSUM;
2062                 if (mask & IFCAP_TSO4) {
2063                         ifp->if_capenable ^= IFCAP_TSO4;
2064
2065                         if (IFCAP_TSO & ifp->if_capenable) {
2066                                 if (IFCAP_TXCSUM & ifp->if_capenable)
2067                                         ifp->if_hwassist |= CSUM_TSO;
2068                                 else {
2069                                         ifp->if_capenable &= ~IFCAP_TSO;
2070                                         ifp->if_hwassist &= ~CSUM_TSO;
2071                                         if_printf(ifp,
2072                                             "enable txcsum first.\n");
2073                                         error = EAGAIN;
2074                                 }
2075                         } else
2076                                 ifp->if_hwassist &= ~CSUM_TSO;
2077                 }
2078                 if (mask & IFCAP_LRO) {
2079                         ifp->if_capenable ^= IFCAP_LRO;
2080
2081                         /* Safe to do this even if cxgb_up not called yet */
2082                         cxgb_set_lro(p, ifp->if_capenable & IFCAP_LRO);
2083                 }
2084                 if (mask & IFCAP_VLAN_HWTAGGING) {
2085                         ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING;
2086                         if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
2087                                 PORT_LOCK(p);
2088                                 cxgb_update_mac_settings(p);
2089                                 PORT_UNLOCK(p);
2090                         }
2091                 }
2092                 if (mask & IFCAP_VLAN_MTU) {
2093                         ifp->if_capenable ^= IFCAP_VLAN_MTU;
2094                         if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
2095                                 PORT_LOCK(p);
2096                                 cxgb_update_mac_settings(p);
2097                                 PORT_UNLOCK(p);
2098                         }
2099                 }
2100                 if (mask & IFCAP_VLAN_HWTSO)
2101                         ifp->if_capenable ^= IFCAP_VLAN_HWTSO;
2102                 if (mask & IFCAP_VLAN_HWCSUM)
2103                         ifp->if_capenable ^= IFCAP_VLAN_HWCSUM;
2104
2105 #ifdef VLAN_CAPABILITIES
2106                 VLAN_CAPABILITIES(ifp);
2107 #endif
2108                 ADAPTER_UNLOCK(sc);
2109                 break;
2110         case SIOCSIFMEDIA:
2111         case SIOCGIFMEDIA:
2112                 error = ifmedia_ioctl(ifp, ifr, &p->media, command);
2113                 break;
2114         default:
2115                 error = ether_ioctl(ifp, command, data);
2116         }
2117
2118         return (error);
2119 }
2120
2121 static int
2122 cxgb_media_change(struct ifnet *ifp)
2123 {
2124         return (EOPNOTSUPP);
2125 }
2126
2127 /*
2128  * Translates phy->modtype to the correct Ethernet media subtype.
2129  */
2130 static int
2131 cxgb_ifm_type(int mod)
2132 {
2133         switch (mod) {
2134         case phy_modtype_sr:
2135                 return (IFM_10G_SR);
2136         case phy_modtype_lr:
2137                 return (IFM_10G_LR);
2138         case phy_modtype_lrm:
2139                 return (IFM_10G_LRM);
2140         case phy_modtype_twinax:
2141                 return (IFM_10G_TWINAX);
2142         case phy_modtype_twinax_long:
2143                 return (IFM_10G_TWINAX_LONG);
2144         case phy_modtype_none:
2145                 return (IFM_NONE);
2146         case phy_modtype_unknown:
2147                 return (IFM_UNKNOWN);
2148         }
2149
2150         KASSERT(0, ("%s: modtype %d unknown", __func__, mod));
2151         return (IFM_UNKNOWN);
2152 }
2153
2154 /*
2155  * Rebuilds the ifmedia list for this port, and sets the current media.
2156  */
2157 static void
2158 cxgb_build_medialist(struct port_info *p)
2159 {
2160         struct cphy *phy = &p->phy;
2161         struct ifmedia *media = &p->media;
2162         int mod = phy->modtype;
2163         int m = IFM_ETHER | IFM_FDX;
2164
2165         PORT_LOCK(p);
2166
2167         ifmedia_removeall(media);
2168         if (phy->caps & SUPPORTED_TP && phy->caps & SUPPORTED_Autoneg) {
2169                 /* Copper (RJ45) */
2170
2171                 if (phy->caps & SUPPORTED_10000baseT_Full)
2172                         ifmedia_add(media, m | IFM_10G_T, mod, NULL);
2173
2174                 if (phy->caps & SUPPORTED_1000baseT_Full)
2175                         ifmedia_add(media, m | IFM_1000_T, mod, NULL);
2176
2177                 if (phy->caps & SUPPORTED_100baseT_Full)
2178                         ifmedia_add(media, m | IFM_100_TX, mod, NULL);
2179
2180                 if (phy->caps & SUPPORTED_10baseT_Full)
2181                         ifmedia_add(media, m | IFM_10_T, mod, NULL);
2182
2183                 ifmedia_add(media, IFM_ETHER | IFM_AUTO, mod, NULL);
2184                 ifmedia_set(media, IFM_ETHER | IFM_AUTO);
2185
2186         } else if (phy->caps & SUPPORTED_TP) {
2187                 /* Copper (CX4) */
2188
2189                 KASSERT(phy->caps & SUPPORTED_10000baseT_Full,
2190                         ("%s: unexpected cap 0x%x", __func__, phy->caps));
2191
2192                 ifmedia_add(media, m | IFM_10G_CX4, mod, NULL);
2193                 ifmedia_set(media, m | IFM_10G_CX4);
2194
2195         } else if (phy->caps & SUPPORTED_FIBRE &&
2196                    phy->caps & SUPPORTED_10000baseT_Full) {
2197                 /* 10G optical (but includes SFP+ twinax) */
2198
2199                 m |= cxgb_ifm_type(mod);
2200                 if (IFM_SUBTYPE(m) == IFM_NONE)
2201                         m &= ~IFM_FDX;
2202
2203                 ifmedia_add(media, m, mod, NULL);
2204                 ifmedia_set(media, m);
2205
2206         } else if (phy->caps & SUPPORTED_FIBRE &&
2207                    phy->caps & SUPPORTED_1000baseT_Full) {
2208                 /* 1G optical */
2209
2210                 /* XXX: Lie and claim to be SX, could actually be any 1G-X */
2211                 ifmedia_add(media, m | IFM_1000_SX, mod, NULL);
2212                 ifmedia_set(media, m | IFM_1000_SX);
2213
2214         } else {
2215                 KASSERT(0, ("%s: don't know how to handle 0x%x.", __func__,
2216                             phy->caps));
2217         }
2218
2219         PORT_UNLOCK(p);
2220 }
2221
2222 static void
2223 cxgb_media_status(struct ifnet *ifp, struct ifmediareq *ifmr)
2224 {
2225         struct port_info *p = ifp->if_softc;
2226         struct ifmedia_entry *cur = p->media.ifm_cur;
2227         int speed = p->link_config.speed;
2228
2229         if (cur->ifm_data != p->phy.modtype) {
2230                 cxgb_build_medialist(p);
2231                 cur = p->media.ifm_cur;
2232         }
2233
2234         ifmr->ifm_status = IFM_AVALID;
2235         if (!p->link_config.link_ok)
2236                 return;
2237
2238         ifmr->ifm_status |= IFM_ACTIVE;
2239
2240         /*
2241          * active and current will differ iff current media is autoselect.  That
2242          * can happen only for copper RJ45.
2243          */
2244         if (IFM_SUBTYPE(cur->ifm_media) != IFM_AUTO)
2245                 return;
2246         KASSERT(p->phy.caps & SUPPORTED_TP && p->phy.caps & SUPPORTED_Autoneg,
2247                 ("%s: unexpected PHY caps 0x%x", __func__, p->phy.caps));
2248
2249         ifmr->ifm_active = IFM_ETHER | IFM_FDX;
2250         if (speed == SPEED_10000)
2251                 ifmr->ifm_active |= IFM_10G_T;
2252         else if (speed == SPEED_1000)
2253                 ifmr->ifm_active |= IFM_1000_T;
2254         else if (speed == SPEED_100)
2255                 ifmr->ifm_active |= IFM_100_TX;
2256         else if (speed == SPEED_10)
2257                 ifmr->ifm_active |= IFM_10_T;
2258         else
2259                 KASSERT(0, ("%s: link up but speed unknown (%u)", __func__,
2260                             speed));
2261 }
2262
2263 static void
2264 cxgb_async_intr(void *data)
2265 {
2266         adapter_t *sc = data;
2267
2268         t3_write_reg(sc, A_PL_INT_ENABLE0, 0);
2269         (void) t3_read_reg(sc, A_PL_INT_ENABLE0);
2270         taskqueue_enqueue(sc->tq, &sc->slow_intr_task);
2271 }
2272
2273 static void
2274 link_check_callout(void *arg)
2275 {
2276         struct port_info *pi = arg;
2277         struct adapter *sc = pi->adapter;
2278
2279         if (!isset(&sc->open_device_map, pi->port_id))
2280                 return;
2281
2282         taskqueue_enqueue(sc->tq, &pi->link_check_task);
2283 }
2284
2285 static void
2286 check_link_status(void *arg, int pending)
2287 {
2288         struct port_info *pi = arg;
2289         struct adapter *sc = pi->adapter;
2290
2291         if (!isset(&sc->open_device_map, pi->port_id))
2292                 return;
2293
2294         t3_link_changed(sc, pi->port_id);
2295
2296         if (pi->link_fault || !(pi->phy.caps & SUPPORTED_LINK_IRQ) ||
2297             pi->link_config.link_ok == 0)
2298                 callout_reset(&pi->link_check_ch, hz, link_check_callout, pi);
2299 }
2300
2301 void
2302 t3_os_link_intr(struct port_info *pi)
2303 {
2304         /*
2305          * Schedule a link check in the near future.  If the link is flapping
2306          * rapidly we'll keep resetting the callout and delaying the check until
2307          * things stabilize a bit.
2308          */
2309         callout_reset(&pi->link_check_ch, hz / 4, link_check_callout, pi);
2310 }
2311
2312 static void
2313 check_t3b2_mac(struct adapter *sc)
2314 {
2315         int i;
2316
2317         if (sc->flags & CXGB_SHUTDOWN)
2318                 return;
2319
2320         for_each_port(sc, i) {
2321                 struct port_info *p = &sc->port[i];
2322                 int status;
2323 #ifdef INVARIANTS
2324                 struct ifnet *ifp = p->ifp;
2325 #endif          
2326
2327                 if (!isset(&sc->open_device_map, p->port_id) || p->link_fault ||
2328                     !p->link_config.link_ok)
2329                         continue;
2330
2331                 KASSERT(ifp->if_drv_flags & IFF_DRV_RUNNING,
2332                         ("%s: state mismatch (drv_flags %x, device_map %x)",
2333                          __func__, ifp->if_drv_flags, sc->open_device_map));
2334
2335                 PORT_LOCK(p);
2336                 status = t3b2_mac_watchdog_task(&p->mac);
2337                 if (status == 1)
2338                         p->mac.stats.num_toggled++;
2339                 else if (status == 2) {
2340                         struct cmac *mac = &p->mac;
2341
2342                         cxgb_update_mac_settings(p);
2343                         t3_link_start(&p->phy, mac, &p->link_config);
2344                         t3_mac_enable(mac, MAC_DIRECTION_RX | MAC_DIRECTION_TX);
2345                         t3_port_intr_enable(sc, p->port_id);
2346                         p->mac.stats.num_resets++;
2347                 }
2348                 PORT_UNLOCK(p);
2349         }
2350 }
2351
2352 static void
2353 cxgb_tick(void *arg)
2354 {
2355         adapter_t *sc = (adapter_t *)arg;
2356
2357         if (sc->flags & CXGB_SHUTDOWN)
2358                 return;
2359
2360         taskqueue_enqueue(sc->tq, &sc->tick_task);      
2361         callout_reset(&sc->cxgb_tick_ch, hz, cxgb_tick, sc);
2362 }
2363
2364 static void
2365 cxgb_tick_handler(void *arg, int count)
2366 {
2367         adapter_t *sc = (adapter_t *)arg;
2368         const struct adapter_params *p = &sc->params;
2369         int i;
2370         uint32_t cause, reset;
2371
2372         if (sc->flags & CXGB_SHUTDOWN || !(sc->flags & FULL_INIT_DONE))
2373                 return;
2374
2375         if (p->rev == T3_REV_B2 && p->nports < 4 && sc->open_device_map) 
2376                 check_t3b2_mac(sc);
2377
2378         cause = t3_read_reg(sc, A_SG_INT_CAUSE) & (F_RSPQSTARVE | F_FLEMPTY);
2379         if (cause) {
2380                 struct sge_qset *qs = &sc->sge.qs[0];
2381                 uint32_t mask, v;
2382
2383                 v = t3_read_reg(sc, A_SG_RSPQ_FL_STATUS) & ~0xff00;
2384
2385                 mask = 1;
2386                 for (i = 0; i < SGE_QSETS; i++) {
2387                         if (v & mask)
2388                                 qs[i].rspq.starved++;
2389                         mask <<= 1;
2390                 }
2391
2392                 mask <<= SGE_QSETS; /* skip RSPQXDISABLED */
2393
2394                 for (i = 0; i < SGE_QSETS * 2; i++) {
2395                         if (v & mask) {
2396                                 qs[i / 2].fl[i % 2].empty++;
2397                         }
2398                         mask <<= 1;
2399                 }
2400
2401                 /* clear */
2402                 t3_write_reg(sc, A_SG_RSPQ_FL_STATUS, v);
2403                 t3_write_reg(sc, A_SG_INT_CAUSE, cause);
2404         }
2405
2406         for (i = 0; i < sc->params.nports; i++) {
2407                 struct port_info *pi = &sc->port[i];
2408                 struct ifnet *ifp = pi->ifp;
2409                 struct cmac *mac = &pi->mac;
2410                 struct mac_stats *mstats = &mac->stats;
2411                 int drops, j;
2412
2413                 if (!isset(&sc->open_device_map, pi->port_id))
2414                         continue;
2415
2416                 PORT_LOCK(pi);
2417                 t3_mac_update_stats(mac);
2418                 PORT_UNLOCK(pi);
2419
2420                 ifp->if_opackets = mstats->tx_frames;
2421                 ifp->if_ipackets = mstats->rx_frames;
2422                 ifp->if_obytes = mstats->tx_octets;
2423                 ifp->if_ibytes = mstats->rx_octets;
2424                 ifp->if_omcasts = mstats->tx_mcast_frames;
2425                 ifp->if_imcasts = mstats->rx_mcast_frames;
2426                 ifp->if_collisions = mstats->tx_total_collisions;
2427                 ifp->if_iqdrops = mstats->rx_cong_drops;
2428
2429                 drops = 0;
2430                 for (j = pi->first_qset; j < pi->first_qset + pi->nqsets; j++)
2431                         drops += sc->sge.qs[j].txq[TXQ_ETH].txq_mr->br_drops;
2432                 ifp->if_snd.ifq_drops = drops;
2433
2434                 ifp->if_oerrors =
2435                     mstats->tx_excess_collisions +
2436                     mstats->tx_underrun +
2437                     mstats->tx_len_errs +
2438                     mstats->tx_mac_internal_errs +
2439                     mstats->tx_excess_deferral +
2440                     mstats->tx_fcs_errs;
2441                 ifp->if_ierrors =
2442                     mstats->rx_jabber +
2443                     mstats->rx_data_errs +
2444                     mstats->rx_sequence_errs +
2445                     mstats->rx_runt + 
2446                     mstats->rx_too_long +
2447                     mstats->rx_mac_internal_errs +
2448                     mstats->rx_short +
2449                     mstats->rx_fcs_errs;
2450
2451                 if (mac->multiport)
2452                         continue;
2453
2454                 /* Count rx fifo overflows, once per second */
2455                 cause = t3_read_reg(sc, A_XGM_INT_CAUSE + mac->offset);
2456                 reset = 0;
2457                 if (cause & F_RXFIFO_OVERFLOW) {
2458                         mac->stats.rx_fifo_ovfl++;
2459                         reset |= F_RXFIFO_OVERFLOW;
2460                 }
2461                 t3_write_reg(sc, A_XGM_INT_CAUSE + mac->offset, reset);
2462         }
2463 }
2464
2465 static void
2466 touch_bars(device_t dev)
2467 {
2468         /*
2469          * Don't enable yet
2470          */
2471 #if !defined(__LP64__) && 0
2472         u32 v;
2473
2474         pci_read_config_dword(pdev, PCI_BASE_ADDRESS_1, &v);
2475         pci_write_config_dword(pdev, PCI_BASE_ADDRESS_1, v);
2476         pci_read_config_dword(pdev, PCI_BASE_ADDRESS_3, &v);
2477         pci_write_config_dword(pdev, PCI_BASE_ADDRESS_3, v);
2478         pci_read_config_dword(pdev, PCI_BASE_ADDRESS_5, &v);
2479         pci_write_config_dword(pdev, PCI_BASE_ADDRESS_5, v);
2480 #endif
2481 }
2482
2483 static int
2484 set_eeprom(struct port_info *pi, const uint8_t *data, int len, int offset)
2485 {
2486         uint8_t *buf;
2487         int err = 0;
2488         u32 aligned_offset, aligned_len, *p;
2489         struct adapter *adapter = pi->adapter;
2490
2491
2492         aligned_offset = offset & ~3;
2493         aligned_len = (len + (offset & 3) + 3) & ~3;
2494
2495         if (aligned_offset != offset || aligned_len != len) {
2496                 buf = malloc(aligned_len, M_DEVBUF, M_WAITOK|M_ZERO);              
2497                 if (!buf)
2498                         return (ENOMEM);
2499                 err = t3_seeprom_read(adapter, aligned_offset, (u32 *)buf);
2500                 if (!err && aligned_len > 4)
2501                         err = t3_seeprom_read(adapter,
2502                                               aligned_offset + aligned_len - 4,
2503                                               (u32 *)&buf[aligned_len - 4]);
2504                 if (err)
2505                         goto out;
2506                 memcpy(buf + (offset & 3), data, len);
2507         } else
2508                 buf = (uint8_t *)(uintptr_t)data;
2509
2510         err = t3_seeprom_wp(adapter, 0);
2511         if (err)
2512                 goto out;
2513
2514         for (p = (u32 *)buf; !err && aligned_len; aligned_len -= 4, p++) {
2515                 err = t3_seeprom_write(adapter, aligned_offset, *p);
2516                 aligned_offset += 4;
2517         }
2518
2519         if (!err)
2520                 err = t3_seeprom_wp(adapter, 1);
2521 out:
2522         if (buf != data)
2523                 free(buf, M_DEVBUF);
2524         return err;
2525 }
2526
2527
2528 static int
2529 in_range(int val, int lo, int hi)
2530 {
2531         return val < 0 || (val <= hi && val >= lo);
2532 }
2533
2534 static int
2535 cxgb_extension_open(struct cdev *dev, int flags, int fmp, struct thread *td)
2536 {
2537        return (0);
2538 }
2539
2540 static int
2541 cxgb_extension_close(struct cdev *dev, int flags, int fmt, struct thread *td)
2542 {
2543        return (0);
2544 }
2545
2546 static int
2547 cxgb_extension_ioctl(struct cdev *dev, unsigned long cmd, caddr_t data,
2548     int fflag, struct thread *td)
2549 {
2550         int mmd, error = 0;
2551         struct port_info *pi = dev->si_drv1;
2552         adapter_t *sc = pi->adapter;
2553
2554 #ifdef PRIV_SUPPORTED   
2555         if (priv_check(td, PRIV_DRIVER)) {
2556                 if (cxgb_debug) 
2557                         printf("user does not have access to privileged ioctls\n");
2558                 return (EPERM);
2559         }
2560 #else
2561         if (suser(td)) {
2562                 if (cxgb_debug)
2563                         printf("user does not have access to privileged ioctls\n");
2564                 return (EPERM);
2565         }
2566 #endif
2567         
2568         switch (cmd) {
2569         case CHELSIO_GET_MIIREG: {
2570                 uint32_t val;
2571                 struct cphy *phy = &pi->phy;
2572                 struct ch_mii_data *mid = (struct ch_mii_data *)data;
2573                 
2574                 if (!phy->mdio_read)
2575                         return (EOPNOTSUPP);
2576                 if (is_10G(sc)) {
2577                         mmd = mid->phy_id >> 8;
2578                         if (!mmd)
2579                                 mmd = MDIO_DEV_PCS;
2580                         else if (mmd > MDIO_DEV_VEND2)
2581                                 return (EINVAL);
2582
2583                         error = phy->mdio_read(sc, mid->phy_id & 0x1f, mmd,
2584                                              mid->reg_num, &val);
2585                 } else
2586                         error = phy->mdio_read(sc, mid->phy_id & 0x1f, 0,
2587                                              mid->reg_num & 0x1f, &val);
2588                 if (error == 0)
2589                         mid->val_out = val;
2590                 break;
2591         }
2592         case CHELSIO_SET_MIIREG: {
2593                 struct cphy *phy = &pi->phy;
2594                 struct ch_mii_data *mid = (struct ch_mii_data *)data;
2595
2596                 if (!phy->mdio_write)
2597                         return (EOPNOTSUPP);
2598                 if (is_10G(sc)) {
2599                         mmd = mid->phy_id >> 8;
2600                         if (!mmd)
2601                                 mmd = MDIO_DEV_PCS;
2602                         else if (mmd > MDIO_DEV_VEND2)
2603                                 return (EINVAL);
2604                         
2605                         error = phy->mdio_write(sc, mid->phy_id & 0x1f,
2606                                               mmd, mid->reg_num, mid->val_in);
2607                 } else
2608                         error = phy->mdio_write(sc, mid->phy_id & 0x1f, 0,
2609                                               mid->reg_num & 0x1f,
2610                                               mid->val_in);
2611                 break;
2612         }
2613         case CHELSIO_SETREG: {
2614                 struct ch_reg *edata = (struct ch_reg *)data;
2615                 if ((edata->addr & 0x3) != 0 || edata->addr >= sc->mmio_len)
2616                         return (EFAULT);
2617                 t3_write_reg(sc, edata->addr, edata->val);
2618                 break;
2619         }
2620         case CHELSIO_GETREG: {
2621                 struct ch_reg *edata = (struct ch_reg *)data;
2622                 if ((edata->addr & 0x3) != 0 || edata->addr >= sc->mmio_len)
2623                         return (EFAULT);
2624                 edata->val = t3_read_reg(sc, edata->addr);
2625                 break;
2626         }
2627         case CHELSIO_GET_SGE_CONTEXT: {
2628                 struct ch_cntxt *ecntxt = (struct ch_cntxt *)data;
2629                 mtx_lock_spin(&sc->sge.reg_lock);
2630                 switch (ecntxt->cntxt_type) {
2631                 case CNTXT_TYPE_EGRESS:
2632                         error = -t3_sge_read_ecntxt(sc, ecntxt->cntxt_id,
2633                             ecntxt->data);
2634                         break;
2635                 case CNTXT_TYPE_FL:
2636                         error = -t3_sge_read_fl(sc, ecntxt->cntxt_id,
2637                             ecntxt->data);
2638                         break;
2639                 case CNTXT_TYPE_RSP:
2640                         error = -t3_sge_read_rspq(sc, ecntxt->cntxt_id,
2641                             ecntxt->data);
2642                         break;
2643                 case CNTXT_TYPE_CQ:
2644                         error = -t3_sge_read_cq(sc, ecntxt->cntxt_id,
2645                             ecntxt->data);
2646                         break;
2647                 default:
2648                         error = EINVAL;
2649                         break;
2650                 }
2651                 mtx_unlock_spin(&sc->sge.reg_lock);
2652                 break;
2653         }
2654         case CHELSIO_GET_SGE_DESC: {
2655                 struct ch_desc *edesc = (struct ch_desc *)data;
2656                 int ret;
2657                 if (edesc->queue_num >= SGE_QSETS * 6)
2658                         return (EINVAL);
2659                 ret = t3_get_desc(&sc->sge.qs[edesc->queue_num / 6],
2660                     edesc->queue_num % 6, edesc->idx, edesc->data);
2661                 if (ret < 0)
2662                         return (EINVAL);
2663                 edesc->size = ret;
2664                 break;
2665         }
2666         case CHELSIO_GET_QSET_PARAMS: {
2667                 struct qset_params *q;
2668                 struct ch_qset_params *t = (struct ch_qset_params *)data;
2669                 int q1 = pi->first_qset;
2670                 int nqsets = pi->nqsets;
2671                 int i;
2672
2673                 if (t->qset_idx >= nqsets)
2674                         return EINVAL;
2675
2676                 i = q1 + t->qset_idx;
2677                 q = &sc->params.sge.qset[i];
2678                 t->rspq_size   = q->rspq_size;
2679                 t->txq_size[0] = q->txq_size[0];
2680                 t->txq_size[1] = q->txq_size[1];
2681                 t->txq_size[2] = q->txq_size[2];
2682                 t->fl_size[0]  = q->fl_size;
2683                 t->fl_size[1]  = q->jumbo_size;
2684                 t->polling     = q->polling;
2685                 t->lro         = q->lro;
2686                 t->intr_lat    = q->coalesce_usecs;
2687                 t->cong_thres  = q->cong_thres;
2688                 t->qnum        = i;
2689
2690                 if ((sc->flags & FULL_INIT_DONE) == 0)
2691                         t->vector = 0;
2692                 else if (sc->flags & USING_MSIX)
2693                         t->vector = rman_get_start(sc->msix_irq_res[i]);
2694                 else
2695                         t->vector = rman_get_start(sc->irq_res);
2696
2697                 break;
2698         }
2699         case CHELSIO_GET_QSET_NUM: {
2700                 struct ch_reg *edata = (struct ch_reg *)data;
2701                 edata->val = pi->nqsets;
2702                 break;
2703         }
2704         case CHELSIO_LOAD_FW: {
2705                 uint8_t *fw_data;
2706                 uint32_t vers;
2707                 struct ch_mem_range *t = (struct ch_mem_range *)data;
2708
2709                 /*
2710                  * You're allowed to load a firmware only before FULL_INIT_DONE
2711                  *
2712                  * FW_UPTODATE is also set so the rest of the initialization
2713                  * will not overwrite what was loaded here.  This gives you the
2714                  * flexibility to load any firmware (and maybe shoot yourself in
2715                  * the foot).
2716                  */
2717
2718                 ADAPTER_LOCK(sc);
2719                 if (sc->open_device_map || sc->flags & FULL_INIT_DONE) {
2720                         ADAPTER_UNLOCK(sc);
2721                         return (EBUSY);
2722                 }
2723
2724                 fw_data = malloc(t->len, M_DEVBUF, M_NOWAIT);
2725                 if (!fw_data)
2726                         error = ENOMEM;
2727                 else
2728                         error = copyin(t->buf, fw_data, t->len);
2729
2730                 if (!error)
2731                         error = -t3_load_fw(sc, fw_data, t->len);
2732
2733                 if (t3_get_fw_version(sc, &vers) == 0) {
2734                         snprintf(&sc->fw_version[0], sizeof(sc->fw_version),
2735                             "%d.%d.%d", G_FW_VERSION_MAJOR(vers),
2736                             G_FW_VERSION_MINOR(vers), G_FW_VERSION_MICRO(vers));
2737                 }
2738
2739                 if (!error)
2740                         sc->flags |= FW_UPTODATE;
2741
2742                 free(fw_data, M_DEVBUF);
2743                 ADAPTER_UNLOCK(sc);
2744                 break;
2745         }
2746         case CHELSIO_LOAD_BOOT: {
2747                 uint8_t *boot_data;
2748                 struct ch_mem_range *t = (struct ch_mem_range *)data;
2749
2750                 boot_data = malloc(t->len, M_DEVBUF, M_NOWAIT);
2751                 if (!boot_data)
2752                         return ENOMEM;
2753
2754                 error = copyin(t->buf, boot_data, t->len);
2755                 if (!error)
2756                         error = -t3_load_boot(sc, boot_data, t->len);
2757
2758                 free(boot_data, M_DEVBUF);
2759                 break;
2760         }
2761         case CHELSIO_GET_PM: {
2762                 struct ch_pm *m = (struct ch_pm *)data;
2763                 struct tp_params *p = &sc->params.tp;
2764
2765                 if (!is_offload(sc))
2766                         return (EOPNOTSUPP);
2767
2768                 m->tx_pg_sz = p->tx_pg_size;
2769                 m->tx_num_pg = p->tx_num_pgs;
2770                 m->rx_pg_sz  = p->rx_pg_size;
2771                 m->rx_num_pg = p->rx_num_pgs;
2772                 m->pm_total  = p->pmtx_size + p->chan_rx_size * p->nchan;
2773
2774                 break;
2775         }
2776         case CHELSIO_SET_PM: {
2777                 struct ch_pm *m = (struct ch_pm *)data;
2778                 struct tp_params *p = &sc->params.tp;
2779
2780                 if (!is_offload(sc))
2781                         return (EOPNOTSUPP);
2782                 if (sc->flags & FULL_INIT_DONE)
2783                         return (EBUSY);
2784
2785                 if (!m->rx_pg_sz || (m->rx_pg_sz & (m->rx_pg_sz - 1)) ||
2786                     !m->tx_pg_sz || (m->tx_pg_sz & (m->tx_pg_sz - 1)))
2787                         return (EINVAL);        /* not power of 2 */
2788                 if (!(m->rx_pg_sz & 0x14000))
2789                         return (EINVAL);        /* not 16KB or 64KB */
2790                 if (!(m->tx_pg_sz & 0x1554000))
2791                         return (EINVAL);
2792                 if (m->tx_num_pg == -1)
2793                         m->tx_num_pg = p->tx_num_pgs;
2794                 if (m->rx_num_pg == -1)
2795                         m->rx_num_pg = p->rx_num_pgs;
2796                 if (m->tx_num_pg % 24 || m->rx_num_pg % 24)
2797                         return (EINVAL);
2798                 if (m->rx_num_pg * m->rx_pg_sz > p->chan_rx_size ||
2799                     m->tx_num_pg * m->tx_pg_sz > p->chan_tx_size)
2800                         return (EINVAL);
2801
2802                 p->rx_pg_size = m->rx_pg_sz;
2803                 p->tx_pg_size = m->tx_pg_sz;
2804                 p->rx_num_pgs = m->rx_num_pg;
2805                 p->tx_num_pgs = m->tx_num_pg;
2806                 break;
2807         }
2808         case CHELSIO_SETMTUTAB: {
2809                 struct ch_mtus *m = (struct ch_mtus *)data;
2810                 int i;
2811                 
2812                 if (!is_offload(sc))
2813                         return (EOPNOTSUPP);
2814                 if (offload_running(sc))
2815                         return (EBUSY);
2816                 if (m->nmtus != NMTUS)
2817                         return (EINVAL);
2818                 if (m->mtus[0] < 81)         /* accommodate SACK */
2819                         return (EINVAL);
2820                 
2821                 /*
2822                  * MTUs must be in ascending order
2823                  */
2824                 for (i = 1; i < NMTUS; ++i)
2825                         if (m->mtus[i] < m->mtus[i - 1])
2826                                 return (EINVAL);
2827
2828                 memcpy(sc->params.mtus, m->mtus, sizeof(sc->params.mtus));
2829                 break;
2830         }
2831         case CHELSIO_GETMTUTAB: {
2832                 struct ch_mtus *m = (struct ch_mtus *)data;
2833
2834                 if (!is_offload(sc))
2835                         return (EOPNOTSUPP);
2836
2837                 memcpy(m->mtus, sc->params.mtus, sizeof(m->mtus));
2838                 m->nmtus = NMTUS;
2839                 break;
2840         }
2841         case CHELSIO_GET_MEM: {
2842                 struct ch_mem_range *t = (struct ch_mem_range *)data;
2843                 struct mc7 *mem;
2844                 uint8_t *useraddr;
2845                 u64 buf[32];
2846
2847                 /*
2848                  * Use these to avoid modifying len/addr in the return
2849                  * struct
2850                  */
2851                 uint32_t len = t->len, addr = t->addr;
2852
2853                 if (!is_offload(sc))
2854                         return (EOPNOTSUPP);
2855                 if (!(sc->flags & FULL_INIT_DONE))
2856                         return (EIO);         /* need the memory controllers */
2857                 if ((addr & 0x7) || (len & 0x7))
2858                         return (EINVAL);
2859                 if (t->mem_id == MEM_CM)
2860                         mem = &sc->cm;
2861                 else if (t->mem_id == MEM_PMRX)
2862                         mem = &sc->pmrx;
2863                 else if (t->mem_id == MEM_PMTX)
2864                         mem = &sc->pmtx;
2865                 else
2866                         return (EINVAL);
2867
2868                 /*
2869                  * Version scheme:
2870                  * bits 0..9: chip version
2871                  * bits 10..15: chip revision
2872                  */
2873                 t->version = 3 | (sc->params.rev << 10);
2874                 
2875                 /*
2876                  * Read 256 bytes at a time as len can be large and we don't
2877                  * want to use huge intermediate buffers.
2878                  */
2879                 useraddr = (uint8_t *)t->buf; 
2880                 while (len) {
2881                         unsigned int chunk = min(len, sizeof(buf));
2882
2883                         error = t3_mc7_bd_read(mem, addr / 8, chunk / 8, buf);
2884                         if (error)
2885                                 return (-error);
2886                         if (copyout(buf, useraddr, chunk))
2887                                 return (EFAULT);
2888                         useraddr += chunk;
2889                         addr += chunk;
2890                         len -= chunk;
2891                 }
2892                 break;
2893         }
2894         case CHELSIO_READ_TCAM_WORD: {
2895                 struct ch_tcam_word *t = (struct ch_tcam_word *)data;
2896
2897                 if (!is_offload(sc))
2898                         return (EOPNOTSUPP);
2899                 if (!(sc->flags & FULL_INIT_DONE))
2900                         return (EIO);         /* need MC5 */            
2901                 return -t3_read_mc5_range(&sc->mc5, t->addr, 1, t->buf);
2902                 break;
2903         }
2904         case CHELSIO_SET_TRACE_FILTER: {
2905                 struct ch_trace *t = (struct ch_trace *)data;
2906                 const struct trace_params *tp;
2907
2908                 tp = (const struct trace_params *)&t->sip;
2909                 if (t->config_tx)
2910                         t3_config_trace_filter(sc, tp, 0, t->invert_match,
2911                                                t->trace_tx);
2912                 if (t->config_rx)
2913                         t3_config_trace_filter(sc, tp, 1, t->invert_match,
2914                                                t->trace_rx);
2915                 break;
2916         }
2917         case CHELSIO_SET_PKTSCHED: {
2918                 struct ch_pktsched_params *p = (struct ch_pktsched_params *)data;
2919                 if (sc->open_device_map == 0)
2920                         return (EAGAIN);
2921                 send_pktsched_cmd(sc, p->sched, p->idx, p->min, p->max,
2922                     p->binding);
2923                 break;
2924         }
2925         case CHELSIO_IFCONF_GETREGS: {
2926                 struct ch_ifconf_regs *regs = (struct ch_ifconf_regs *)data;
2927                 int reglen = cxgb_get_regs_len();
2928                 uint8_t *buf = malloc(reglen, M_DEVBUF, M_NOWAIT);
2929                 if (buf == NULL) {
2930                         return (ENOMEM);
2931                 }
2932                 if (regs->len > reglen)
2933                         regs->len = reglen;
2934                 else if (regs->len < reglen)
2935                         error = ENOBUFS;
2936
2937                 if (!error) {
2938                         cxgb_get_regs(sc, regs, buf);
2939                         error = copyout(buf, regs->data, reglen);
2940                 }
2941                 free(buf, M_DEVBUF);
2942
2943                 break;
2944         }
2945         case CHELSIO_SET_HW_SCHED: {
2946                 struct ch_hw_sched *t = (struct ch_hw_sched *)data;
2947                 unsigned int ticks_per_usec = core_ticks_per_usec(sc);
2948
2949                 if ((sc->flags & FULL_INIT_DONE) == 0)
2950                         return (EAGAIN);       /* need TP to be initialized */
2951                 if (t->sched >= NTX_SCHED || !in_range(t->mode, 0, 1) ||
2952                     !in_range(t->channel, 0, 1) ||
2953                     !in_range(t->kbps, 0, 10000000) ||
2954                     !in_range(t->class_ipg, 0, 10000 * 65535 / ticks_per_usec) ||
2955                     !in_range(t->flow_ipg, 0,
2956                               dack_ticks_to_usec(sc, 0x7ff)))
2957                         return (EINVAL);
2958
2959                 if (t->kbps >= 0) {
2960                         error = t3_config_sched(sc, t->kbps, t->sched);
2961                         if (error < 0)
2962                                 return (-error);
2963                 }
2964                 if (t->class_ipg >= 0)
2965                         t3_set_sched_ipg(sc, t->sched, t->class_ipg);
2966                 if (t->flow_ipg >= 0) {
2967                         t->flow_ipg *= 1000;     /* us -> ns */
2968                         t3_set_pace_tbl(sc, &t->flow_ipg, t->sched, 1);
2969                 }
2970                 if (t->mode >= 0) {
2971                         int bit = 1 << (S_TX_MOD_TIMER_MODE + t->sched);
2972
2973                         t3_set_reg_field(sc, A_TP_TX_MOD_QUEUE_REQ_MAP,
2974                                          bit, t->mode ? bit : 0);
2975                 }
2976                 if (t->channel >= 0)
2977                         t3_set_reg_field(sc, A_TP_TX_MOD_QUEUE_REQ_MAP,
2978                                          1 << t->sched, t->channel << t->sched);
2979                 break;
2980         }
2981         case CHELSIO_GET_EEPROM: {
2982                 int i;
2983                 struct ch_eeprom *e = (struct ch_eeprom *)data;
2984                 uint8_t *buf = malloc(EEPROMSIZE, M_DEVBUF, M_NOWAIT);
2985
2986                 if (buf == NULL) {
2987                         return (ENOMEM);
2988                 }
2989                 e->magic = EEPROM_MAGIC;
2990                 for (i = e->offset & ~3; !error && i < e->offset + e->len; i += 4)
2991                         error = -t3_seeprom_read(sc, i, (uint32_t *)&buf[i]);
2992
2993                 if (!error)
2994                         error = copyout(buf + e->offset, e->data, e->len);
2995
2996                 free(buf, M_DEVBUF);
2997                 break;
2998         }
2999         case CHELSIO_CLEAR_STATS: {
3000                 if (!(sc->flags & FULL_INIT_DONE))
3001                         return EAGAIN;
3002
3003                 PORT_LOCK(pi);
3004                 t3_mac_update_stats(&pi->mac);
3005                 memset(&pi->mac.stats, 0, sizeof(pi->mac.stats));
3006                 PORT_UNLOCK(pi);
3007                 break;
3008         }
3009         case CHELSIO_GET_UP_LA: {
3010                 struct ch_up_la *la = (struct ch_up_la *)data;
3011                 uint8_t *buf = malloc(LA_BUFSIZE, M_DEVBUF, M_NOWAIT);
3012                 if (buf == NULL) {
3013                         return (ENOMEM);
3014                 }
3015                 if (la->bufsize < LA_BUFSIZE)
3016                         error = ENOBUFS;
3017
3018                 if (!error)
3019                         error = -t3_get_up_la(sc, &la->stopped, &la->idx,
3020                                               &la->bufsize, buf);
3021                 if (!error)
3022                         error = copyout(buf, la->data, la->bufsize);
3023
3024                 free(buf, M_DEVBUF);
3025                 break;
3026         }
3027         case CHELSIO_GET_UP_IOQS: {
3028                 struct ch_up_ioqs *ioqs = (struct ch_up_ioqs *)data;
3029                 uint8_t *buf = malloc(IOQS_BUFSIZE, M_DEVBUF, M_NOWAIT);
3030                 uint32_t *v;
3031
3032                 if (buf == NULL) {
3033                         return (ENOMEM);
3034                 }
3035                 if (ioqs->bufsize < IOQS_BUFSIZE)
3036                         error = ENOBUFS;
3037
3038                 if (!error)
3039                         error = -t3_get_up_ioqs(sc, &ioqs->bufsize, buf);
3040
3041                 if (!error) {
3042                         v = (uint32_t *)buf;
3043
3044                         ioqs->ioq_rx_enable = *v++;
3045                         ioqs->ioq_tx_enable = *v++;
3046                         ioqs->ioq_rx_status = *v++;
3047                         ioqs->ioq_tx_status = *v++;
3048
3049                         error = copyout(v, ioqs->data, ioqs->bufsize);
3050                 }
3051
3052                 free(buf, M_DEVBUF);
3053                 break;
3054         }
3055         case CHELSIO_SET_FILTER: {
3056                 struct ch_filter *f = (struct ch_filter *)data;
3057                 struct filter_info *p;
3058                 unsigned int nfilters = sc->params.mc5.nfilters;
3059
3060                 if (!is_offload(sc))
3061                         return (EOPNOTSUPP);    /* No TCAM */
3062                 if (!(sc->flags & FULL_INIT_DONE))
3063                         return (EAGAIN);        /* mc5 not setup yet */
3064                 if (nfilters == 0)
3065                         return (EBUSY);         /* TOE will use TCAM */
3066
3067                 /* sanity checks */
3068                 if (f->filter_id >= nfilters ||
3069                     (f->val.dip && f->mask.dip != 0xffffffff) ||
3070                     (f->val.sport && f->mask.sport != 0xffff) ||
3071                     (f->val.dport && f->mask.dport != 0xffff) ||
3072                     (f->val.vlan && f->mask.vlan != 0xfff) ||
3073                     (f->val.vlan_prio &&
3074                         f->mask.vlan_prio != FILTER_NO_VLAN_PRI) ||
3075                     (f->mac_addr_idx != 0xffff && f->mac_addr_idx > 15) ||
3076                     f->qset >= SGE_QSETS ||
3077                     sc->rrss_map[f->qset] >= RSS_TABLE_SIZE)
3078                         return (EINVAL);
3079
3080                 /* Was allocated with M_WAITOK */
3081                 KASSERT(sc->filters, ("filter table NULL\n"));
3082
3083                 p = &sc->filters[f->filter_id];
3084                 if (p->locked)
3085                         return (EPERM);
3086
3087                 bzero(p, sizeof(*p));
3088                 p->sip = f->val.sip;
3089                 p->sip_mask = f->mask.sip;
3090                 p->dip = f->val.dip;
3091                 p->sport = f->val.sport;
3092                 p->dport = f->val.dport;
3093                 p->vlan = f->mask.vlan ? f->val.vlan : 0xfff;
3094                 p->vlan_prio = f->mask.vlan_prio ? (f->val.vlan_prio & 6) :
3095                     FILTER_NO_VLAN_PRI;
3096                 p->mac_hit = f->mac_hit;
3097                 p->mac_vld = f->mac_addr_idx != 0xffff;
3098                 p->mac_idx = f->mac_addr_idx;
3099                 p->pkt_type = f->proto;
3100                 p->report_filter_id = f->want_filter_id;
3101                 p->pass = f->pass;
3102                 p->rss = f->rss;
3103                 p->qset = f->qset;
3104
3105                 error = set_filter(sc, f->filter_id, p);
3106                 if (error == 0)
3107                         p->valid = 1;
3108                 break;
3109         }
3110         case CHELSIO_DEL_FILTER: {
3111                 struct ch_filter *f = (struct ch_filter *)data;
3112                 struct filter_info *p;
3113                 unsigned int nfilters = sc->params.mc5.nfilters;
3114
3115                 if (!is_offload(sc))
3116                         return (EOPNOTSUPP);
3117                 if (!(sc->flags & FULL_INIT_DONE))
3118                         return (EAGAIN);
3119                 if (nfilters == 0 || sc->filters == NULL)
3120                         return (EINVAL);
3121                 if (f->filter_id >= nfilters)
3122                        return (EINVAL);
3123
3124                 p = &sc->filters[f->filter_id];
3125                 if (p->locked)
3126                         return (EPERM);
3127                 if (!p->valid)
3128                         return (EFAULT); /* Read "Bad address" as "Bad index" */
3129
3130                 bzero(p, sizeof(*p));
3131                 p->sip = p->sip_mask = 0xffffffff;
3132                 p->vlan = 0xfff;
3133                 p->vlan_prio = FILTER_NO_VLAN_PRI;
3134                 p->pkt_type = 1;
3135                 error = set_filter(sc, f->filter_id, p);
3136                 break;
3137         }
3138         case CHELSIO_GET_FILTER: {
3139                 struct ch_filter *f = (struct ch_filter *)data;
3140                 struct filter_info *p;
3141                 unsigned int i, nfilters = sc->params.mc5.nfilters;
3142
3143                 if (!is_offload(sc))
3144                         return (EOPNOTSUPP);
3145                 if (!(sc->flags & FULL_INIT_DONE))
3146                         return (EAGAIN);
3147                 if (nfilters == 0 || sc->filters == NULL)
3148                         return (EINVAL);
3149
3150                 i = f->filter_id == 0xffffffff ? 0 : f->filter_id + 1;
3151                 for (; i < nfilters; i++) {
3152                         p = &sc->filters[i];
3153                         if (!p->valid)
3154                                 continue;
3155
3156                         bzero(f, sizeof(*f));
3157
3158                         f->filter_id = i;
3159                         f->val.sip = p->sip;
3160                         f->mask.sip = p->sip_mask;
3161                         f->val.dip = p->dip;
3162                         f->mask.dip = p->dip ? 0xffffffff : 0;
3163                         f->val.sport = p->sport;
3164                         f->mask.sport = p->sport ? 0xffff : 0;
3165                         f->val.dport = p->dport;
3166                         f->mask.dport = p->dport ? 0xffff : 0;
3167                         f->val.vlan = p->vlan == 0xfff ? 0 : p->vlan;
3168                         f->mask.vlan = p->vlan == 0xfff ? 0 : 0xfff;
3169                         f->val.vlan_prio = p->vlan_prio == FILTER_NO_VLAN_PRI ?
3170                             0 : p->vlan_prio;
3171                         f->mask.vlan_prio = p->vlan_prio == FILTER_NO_VLAN_PRI ?
3172                             0 : FILTER_NO_VLAN_PRI;
3173                         f->mac_hit = p->mac_hit;
3174                         f->mac_addr_idx = p->mac_vld ? p->mac_idx : 0xffff;
3175                         f->proto = p->pkt_type;
3176                         f->want_filter_id = p->report_filter_id;
3177                         f->pass = p->pass;
3178                         f->rss = p->rss;
3179                         f->qset = p->qset;
3180
3181                         break;
3182                 }
3183                 
3184                 if (i == nfilters)
3185                         f->filter_id = 0xffffffff;
3186                 break;
3187         }
3188         default:
3189                 return (EOPNOTSUPP);
3190                 break;
3191         }
3192
3193         return (error);
3194 }
3195
3196 static __inline void
3197 reg_block_dump(struct adapter *ap, uint8_t *buf, unsigned int start,
3198     unsigned int end)
3199 {
3200         uint32_t *p = (uint32_t *)(buf + start);
3201
3202         for ( ; start <= end; start += sizeof(uint32_t))
3203                 *p++ = t3_read_reg(ap, start);
3204 }
3205
3206 #define T3_REGMAP_SIZE (3 * 1024)
3207 static int
3208 cxgb_get_regs_len(void)
3209 {
3210         return T3_REGMAP_SIZE;
3211 }
3212
3213 static void
3214 cxgb_get_regs(adapter_t *sc, struct ch_ifconf_regs *regs, uint8_t *buf)
3215 {           
3216         
3217         /*
3218          * Version scheme:
3219          * bits 0..9: chip version
3220          * bits 10..15: chip revision
3221          * bit 31: set for PCIe cards
3222          */
3223         regs->version = 3 | (sc->params.rev << 10) | (is_pcie(sc) << 31);
3224
3225         /*
3226          * We skip the MAC statistics registers because they are clear-on-read.
3227          * Also reading multi-register stats would need to synchronize with the
3228          * periodic mac stats accumulation.  Hard to justify the complexity.
3229          */
3230         memset(buf, 0, cxgb_get_regs_len());
3231         reg_block_dump(sc, buf, 0, A_SG_RSPQ_CREDIT_RETURN);
3232         reg_block_dump(sc, buf, A_SG_HI_DRB_HI_THRSH, A_ULPRX_PBL_ULIMIT);
3233         reg_block_dump(sc, buf, A_ULPTX_CONFIG, A_MPS_INT_CAUSE);
3234         reg_block_dump(sc, buf, A_CPL_SWITCH_CNTRL, A_CPL_MAP_TBL_DATA);
3235         reg_block_dump(sc, buf, A_SMB_GLOBAL_TIME_CFG, A_XGM_SERDES_STAT3);
3236         reg_block_dump(sc, buf, A_XGM_SERDES_STATUS0,
3237                        XGM_REG(A_XGM_SERDES_STAT3, 1));
3238         reg_block_dump(sc, buf, XGM_REG(A_XGM_SERDES_STATUS0, 1),
3239                        XGM_REG(A_XGM_RX_SPI4_SOP_EOP_CNT, 1));
3240 }
3241
3242 static int
3243 alloc_filters(struct adapter *sc)
3244 {
3245         struct filter_info *p;
3246         unsigned int nfilters = sc->params.mc5.nfilters;
3247
3248         if (nfilters == 0)
3249                 return (0);
3250
3251         p = malloc(sizeof(*p) * nfilters, M_DEVBUF, M_WAITOK | M_ZERO);
3252         sc->filters = p;
3253
3254         p = &sc->filters[nfilters - 1];
3255         p->vlan = 0xfff;
3256         p->vlan_prio = FILTER_NO_VLAN_PRI;
3257         p->pass = p->rss = p->valid = p->locked = 1;
3258
3259         return (0);
3260 }
3261
3262 static int
3263 setup_hw_filters(struct adapter *sc)
3264 {
3265         int i, rc;
3266         unsigned int nfilters = sc->params.mc5.nfilters;
3267
3268         if (!sc->filters)
3269                 return (0);
3270
3271         t3_enable_filters(sc);
3272
3273         for (i = rc = 0; i < nfilters && !rc; i++) {
3274                 if (sc->filters[i].locked)
3275                         rc = set_filter(sc, i, &sc->filters[i]);
3276         }
3277
3278         return (rc);
3279 }
3280
3281 static int
3282 set_filter(struct adapter *sc, int id, const struct filter_info *f)
3283 {
3284         int len;
3285         struct mbuf *m;
3286         struct ulp_txpkt *txpkt;
3287         struct work_request_hdr *wr;
3288         struct cpl_pass_open_req *oreq;
3289         struct cpl_set_tcb_field *sreq;
3290
3291         len = sizeof(*wr) + sizeof(*oreq) + 2 * sizeof(*sreq);
3292         KASSERT(len <= MHLEN, ("filter request too big for an mbuf"));
3293
3294         id += t3_mc5_size(&sc->mc5) - sc->params.mc5.nroutes -
3295               sc->params.mc5.nfilters;
3296
3297         m = m_gethdr(M_WAITOK, MT_DATA);
3298         m->m_len = m->m_pkthdr.len = len;
3299         bzero(mtod(m, char *), len);
3300
3301         wr = mtod(m, struct work_request_hdr *);
3302         wr->wrh_hi = htonl(V_WR_OP(FW_WROPCODE_BYPASS) | F_WR_ATOMIC);
3303
3304         oreq = (struct cpl_pass_open_req *)(wr + 1);
3305         txpkt = (struct ulp_txpkt *)oreq;
3306         txpkt->cmd_dest = htonl(V_ULPTX_CMD(ULP_TXPKT));
3307         txpkt->len = htonl(V_ULPTX_NFLITS(sizeof(*oreq) / 8));
3308         OPCODE_TID(oreq) = htonl(MK_OPCODE_TID(CPL_PASS_OPEN_REQ, id));
3309         oreq->local_port = htons(f->dport);
3310         oreq->peer_port = htons(f->sport);
3311         oreq->local_ip = htonl(f->dip);
3312         oreq->peer_ip = htonl(f->sip);
3313         oreq->peer_netmask = htonl(f->sip_mask);
3314         oreq->opt0h = 0;
3315         oreq->opt0l = htonl(F_NO_OFFLOAD);
3316         oreq->opt1 = htonl(V_MAC_MATCH_VALID(f->mac_vld) |
3317                          V_CONN_POLICY(CPL_CONN_POLICY_FILTER) |
3318                          V_VLAN_PRI(f->vlan_prio >> 1) |
3319                          V_VLAN_PRI_VALID(f->vlan_prio != FILTER_NO_VLAN_PRI) |
3320                          V_PKT_TYPE(f->pkt_type) | V_OPT1_VLAN(f->vlan) |
3321                          V_MAC_MATCH(f->mac_idx | (f->mac_hit << 4)));
3322
3323         sreq = (struct cpl_set_tcb_field *)(oreq + 1);
3324         set_tcb_field_ulp(sreq, id, 1, 0x1800808000ULL,
3325                           (f->report_filter_id << 15) | (1 << 23) |
3326                           ((u64)f->pass << 35) | ((u64)!f->rss << 36));
3327         set_tcb_field_ulp(sreq + 1, id, 0, 0xffffffff, (2 << 19) | 1);
3328         t3_mgmt_tx(sc, m);
3329
3330         if (f->pass && !f->rss) {
3331                 len = sizeof(*sreq);
3332                 m = m_gethdr(M_WAITOK, MT_DATA);
3333                 m->m_len = m->m_pkthdr.len = len;
3334                 bzero(mtod(m, char *), len);
3335                 sreq = mtod(m, struct cpl_set_tcb_field *);
3336                 sreq->wr.wrh_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD));
3337                 mk_set_tcb_field(sreq, id, 25, 0x3f80000,
3338                                  (u64)sc->rrss_map[f->qset] << 19);
3339                 t3_mgmt_tx(sc, m);
3340         }
3341         return 0;
3342 }
3343
3344 static inline void
3345 mk_set_tcb_field(struct cpl_set_tcb_field *req, unsigned int tid,
3346     unsigned int word, u64 mask, u64 val)
3347 {
3348         OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_SET_TCB_FIELD, tid));
3349         req->reply = V_NO_REPLY(1);
3350         req->cpu_idx = 0;
3351         req->word = htons(word);
3352         req->mask = htobe64(mask);
3353         req->val = htobe64(val);
3354 }
3355
3356 static inline void
3357 set_tcb_field_ulp(struct cpl_set_tcb_field *req, unsigned int tid,
3358     unsigned int word, u64 mask, u64 val)
3359 {
3360         struct ulp_txpkt *txpkt = (struct ulp_txpkt *)req;
3361
3362         txpkt->cmd_dest = htonl(V_ULPTX_CMD(ULP_TXPKT));
3363         txpkt->len = htonl(V_ULPTX_NFLITS(sizeof(*req) / 8));
3364         mk_set_tcb_field(req, tid, word, mask, val);
3365 }