]> CyberLeo.Net >> Repos - FreeBSD/stable/8.git/blob - sys/dev/cxgb/cxgb_main.c
MFC r231175:
[FreeBSD/stable/8.git] / sys / dev / cxgb / cxgb_main.c
1 /**************************************************************************
2
3 Copyright (c) 2007-2009, Chelsio Inc.
4 All rights reserved.
5
6 Redistribution and use in source and binary forms, with or without
7 modification, are permitted provided that the following conditions are met:
8
9  1. Redistributions of source code must retain the above copyright notice,
10     this list of conditions and the following disclaimer.
11
12  2. Neither the name of the Chelsio Corporation nor the names of its
13     contributors may be used to endorse or promote products derived from
14     this software without specific prior written permission.
15
16 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
17 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
20 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
21 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
22 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
23 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
24 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
25 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
26 POSSIBILITY OF SUCH DAMAGE.
27
28 ***************************************************************************/
29
30 #include <sys/cdefs.h>
31 __FBSDID("$FreeBSD$");
32
33 #include <sys/param.h>
34 #include <sys/systm.h>
35 #include <sys/kernel.h>
36 #include <sys/bus.h>
37 #include <sys/module.h>
38 #include <sys/pciio.h>
39 #include <sys/conf.h>
40 #include <machine/bus.h>
41 #include <machine/resource.h>
42 #include <sys/bus_dma.h>
43 #include <sys/ktr.h>
44 #include <sys/rman.h>
45 #include <sys/ioccom.h>
46 #include <sys/mbuf.h>
47 #include <sys/linker.h>
48 #include <sys/firmware.h>
49 #include <sys/socket.h>
50 #include <sys/sockio.h>
51 #include <sys/smp.h>
52 #include <sys/sysctl.h>
53 #include <sys/syslog.h>
54 #include <sys/queue.h>
55 #include <sys/taskqueue.h>
56 #include <sys/proc.h>
57
58 #include <net/bpf.h>
59 #include <net/ethernet.h>
60 #include <net/if.h>
61 #include <net/if_arp.h>
62 #include <net/if_dl.h>
63 #include <net/if_media.h>
64 #include <net/if_types.h>
65 #include <net/if_vlan_var.h>
66
67 #include <netinet/in_systm.h>
68 #include <netinet/in.h>
69 #include <netinet/if_ether.h>
70 #include <netinet/ip.h>
71 #include <netinet/ip.h>
72 #include <netinet/tcp.h>
73 #include <netinet/udp.h>
74
75 #include <dev/pci/pcireg.h>
76 #include <dev/pci/pcivar.h>
77 #include <dev/pci/pci_private.h>
78
79 #include <cxgb_include.h>
80
81 #ifdef PRIV_SUPPORTED
82 #include <sys/priv.h>
83 #endif
84
85 static int cxgb_setup_interrupts(adapter_t *);
86 static void cxgb_teardown_interrupts(adapter_t *);
87 static void cxgb_init(void *);
88 static int cxgb_init_locked(struct port_info *);
89 static int cxgb_uninit_locked(struct port_info *);
90 static int cxgb_uninit_synchronized(struct port_info *);
91 static int cxgb_ioctl(struct ifnet *, unsigned long, caddr_t);
92 static int cxgb_media_change(struct ifnet *);
93 static int cxgb_ifm_type(int);
94 static void cxgb_build_medialist(struct port_info *);
95 static void cxgb_media_status(struct ifnet *, struct ifmediareq *);
96 static int setup_sge_qsets(adapter_t *);
97 static void cxgb_async_intr(void *);
98 static void cxgb_tick_handler(void *, int);
99 static void cxgb_tick(void *);
100 static void link_check_callout(void *);
101 static void check_link_status(void *, int);
102 static void setup_rss(adapter_t *sc);
103 static int alloc_filters(struct adapter *);
104 static int setup_hw_filters(struct adapter *);
105 static int set_filter(struct adapter *, int, const struct filter_info *);
106 static inline void mk_set_tcb_field(struct cpl_set_tcb_field *, unsigned int,
107     unsigned int, u64, u64);
108 static inline void set_tcb_field_ulp(struct cpl_set_tcb_field *, unsigned int,
109     unsigned int, u64, u64);
110
111 /* Attachment glue for the PCI controller end of the device.  Each port of
112  * the device is attached separately, as defined later.
113  */
114 static int cxgb_controller_probe(device_t);
115 static int cxgb_controller_attach(device_t);
116 static int cxgb_controller_detach(device_t);
117 static void cxgb_free(struct adapter *);
118 static __inline void reg_block_dump(struct adapter *ap, uint8_t *buf, unsigned int start,
119     unsigned int end);
120 static void cxgb_get_regs(adapter_t *sc, struct ch_ifconf_regs *regs, uint8_t *buf);
121 static int cxgb_get_regs_len(void);
122 static int offload_open(struct port_info *pi);
123 static void touch_bars(device_t dev);
124 static int offload_close(struct t3cdev *tdev);
125 static void cxgb_update_mac_settings(struct port_info *p);
126
127 static device_method_t cxgb_controller_methods[] = {
128         DEVMETHOD(device_probe,         cxgb_controller_probe),
129         DEVMETHOD(device_attach,        cxgb_controller_attach),
130         DEVMETHOD(device_detach,        cxgb_controller_detach),
131
132         DEVMETHOD_END
133 };
134
135 static driver_t cxgb_controller_driver = {
136         "cxgbc",
137         cxgb_controller_methods,
138         sizeof(struct adapter)
139 };
140
141 static devclass_t       cxgb_controller_devclass;
142 DRIVER_MODULE(cxgbc, pci, cxgb_controller_driver, cxgb_controller_devclass, 0, 0);
143
144 /*
145  * Attachment glue for the ports.  Attachment is done directly to the
146  * controller device.
147  */
148 static int cxgb_port_probe(device_t);
149 static int cxgb_port_attach(device_t);
150 static int cxgb_port_detach(device_t);
151
152 static device_method_t cxgb_port_methods[] = {
153         DEVMETHOD(device_probe,         cxgb_port_probe),
154         DEVMETHOD(device_attach,        cxgb_port_attach),
155         DEVMETHOD(device_detach,        cxgb_port_detach),
156         { 0, 0 }
157 };
158
159 static driver_t cxgb_port_driver = {
160         "cxgb",
161         cxgb_port_methods,
162         0
163 };
164
165 static d_ioctl_t cxgb_extension_ioctl;
166 static d_open_t cxgb_extension_open;
167 static d_close_t cxgb_extension_close;
168
169 static struct cdevsw cxgb_cdevsw = {
170        .d_version =    D_VERSION,
171        .d_flags =      0,
172        .d_open =       cxgb_extension_open,
173        .d_close =      cxgb_extension_close,
174        .d_ioctl =      cxgb_extension_ioctl,
175        .d_name =       "cxgb",
176 };
177
178 static devclass_t       cxgb_port_devclass;
179 DRIVER_MODULE(cxgb, cxgbc, cxgb_port_driver, cxgb_port_devclass, 0, 0);
180
181 /*
182  * The driver uses the best interrupt scheme available on a platform in the
183  * order MSI-X, MSI, legacy pin interrupts.  This parameter determines which
184  * of these schemes the driver may consider as follows:
185  *
186  * msi = 2: choose from among all three options
187  * msi = 1 : only consider MSI and pin interrupts
188  * msi = 0: force pin interrupts
189  */
190 static int msi_allowed = 2;
191
192 TUNABLE_INT("hw.cxgb.msi_allowed", &msi_allowed);
193 SYSCTL_NODE(_hw, OID_AUTO, cxgb, CTLFLAG_RD, 0, "CXGB driver parameters");
194 SYSCTL_UINT(_hw_cxgb, OID_AUTO, msi_allowed, CTLFLAG_RDTUN, &msi_allowed, 0,
195     "MSI-X, MSI, INTx selector");
196
197 /*
198  * The driver enables offload as a default.
199  * To disable it, use ofld_disable = 1.
200  */
201 static int ofld_disable = 0;
202 TUNABLE_INT("hw.cxgb.ofld_disable", &ofld_disable);
203 SYSCTL_UINT(_hw_cxgb, OID_AUTO, ofld_disable, CTLFLAG_RDTUN, &ofld_disable, 0,
204     "disable ULP offload");
205
206 /*
207  * The driver uses an auto-queue algorithm by default.
208  * To disable it and force a single queue-set per port, use multiq = 0
209  */
210 static int multiq = 1;
211 TUNABLE_INT("hw.cxgb.multiq", &multiq);
212 SYSCTL_UINT(_hw_cxgb, OID_AUTO, multiq, CTLFLAG_RDTUN, &multiq, 0,
213     "use min(ncpus/ports, 8) queue-sets per port");
214
215 /*
216  * By default the driver will not update the firmware unless
217  * it was compiled against a newer version
218  * 
219  */
220 static int force_fw_update = 0;
221 TUNABLE_INT("hw.cxgb.force_fw_update", &force_fw_update);
222 SYSCTL_UINT(_hw_cxgb, OID_AUTO, force_fw_update, CTLFLAG_RDTUN, &force_fw_update, 0,
223     "update firmware even if up to date");
224
225 int cxgb_use_16k_clusters = -1;
226 TUNABLE_INT("hw.cxgb.use_16k_clusters", &cxgb_use_16k_clusters);
227 SYSCTL_INT(_hw_cxgb, OID_AUTO, use_16k_clusters, CTLFLAG_RDTUN,
228     &cxgb_use_16k_clusters, 0, "use 16kB clusters for the jumbo queue ");
229
230 static int nfilters = -1;
231 TUNABLE_INT("hw.cxgb.nfilters", &nfilters);
232 SYSCTL_INT(_hw_cxgb, OID_AUTO, nfilters, CTLFLAG_RDTUN,
233     &nfilters, 0, "max number of entries in the filter table");
234
235 enum {
236         MAX_TXQ_ENTRIES      = 16384,
237         MAX_CTRL_TXQ_ENTRIES = 1024,
238         MAX_RSPQ_ENTRIES     = 16384,
239         MAX_RX_BUFFERS       = 16384,
240         MAX_RX_JUMBO_BUFFERS = 16384,
241         MIN_TXQ_ENTRIES      = 4,
242         MIN_CTRL_TXQ_ENTRIES = 4,
243         MIN_RSPQ_ENTRIES     = 32,
244         MIN_FL_ENTRIES       = 32,
245         MIN_FL_JUMBO_ENTRIES = 32
246 };
247
248 struct filter_info {
249         u32 sip;
250         u32 sip_mask;
251         u32 dip;
252         u16 sport;
253         u16 dport;
254         u32 vlan:12;
255         u32 vlan_prio:3;
256         u32 mac_hit:1;
257         u32 mac_idx:4;
258         u32 mac_vld:1;
259         u32 pkt_type:2;
260         u32 report_filter_id:1;
261         u32 pass:1;
262         u32 rss:1;
263         u32 qset:3;
264         u32 locked:1;
265         u32 valid:1;
266 };
267
268 enum { FILTER_NO_VLAN_PRI = 7 };
269
270 #define EEPROM_MAGIC 0x38E2F10C
271
272 #define PORT_MASK ((1 << MAX_NPORTS) - 1)
273
274 /* Table for probing the cards.  The desc field isn't actually used */
275 struct cxgb_ident {
276         uint16_t        vendor;
277         uint16_t        device;
278         int             index;
279         char            *desc;
280 } cxgb_identifiers[] = {
281         {PCI_VENDOR_ID_CHELSIO, 0x0020, 0, "PE9000"},
282         {PCI_VENDOR_ID_CHELSIO, 0x0021, 1, "T302E"},
283         {PCI_VENDOR_ID_CHELSIO, 0x0022, 2, "T310E"},
284         {PCI_VENDOR_ID_CHELSIO, 0x0023, 3, "T320X"},
285         {PCI_VENDOR_ID_CHELSIO, 0x0024, 1, "T302X"},
286         {PCI_VENDOR_ID_CHELSIO, 0x0025, 3, "T320E"},
287         {PCI_VENDOR_ID_CHELSIO, 0x0026, 2, "T310X"},
288         {PCI_VENDOR_ID_CHELSIO, 0x0030, 2, "T3B10"},
289         {PCI_VENDOR_ID_CHELSIO, 0x0031, 3, "T3B20"},
290         {PCI_VENDOR_ID_CHELSIO, 0x0032, 1, "T3B02"},
291         {PCI_VENDOR_ID_CHELSIO, 0x0033, 4, "T3B04"},
292         {PCI_VENDOR_ID_CHELSIO, 0x0035, 6, "T3C10"},
293         {PCI_VENDOR_ID_CHELSIO, 0x0036, 3, "S320E-CR"},
294         {PCI_VENDOR_ID_CHELSIO, 0x0037, 7, "N320E-G2"},
295         {0, 0, 0, NULL}
296 };
297
298 static int set_eeprom(struct port_info *pi, const uint8_t *data, int len, int offset);
299
300
301 static __inline char
302 t3rev2char(struct adapter *adapter)
303 {
304         char rev = 'z';
305
306         switch(adapter->params.rev) {
307         case T3_REV_A:
308                 rev = 'a';
309                 break;
310         case T3_REV_B:
311         case T3_REV_B2:
312                 rev = 'b';
313                 break;
314         case T3_REV_C:
315                 rev = 'c';
316                 break;
317         }
318         return rev;
319 }
320
321 static struct cxgb_ident *
322 cxgb_get_ident(device_t dev)
323 {
324         struct cxgb_ident *id;
325
326         for (id = cxgb_identifiers; id->desc != NULL; id++) {
327                 if ((id->vendor == pci_get_vendor(dev)) &&
328                     (id->device == pci_get_device(dev))) {
329                         return (id);
330                 }
331         }
332         return (NULL);
333 }
334
335 static const struct adapter_info *
336 cxgb_get_adapter_info(device_t dev)
337 {
338         struct cxgb_ident *id;
339         const struct adapter_info *ai;
340
341         id = cxgb_get_ident(dev);
342         if (id == NULL)
343                 return (NULL);
344
345         ai = t3_get_adapter_info(id->index);
346
347         return (ai);
348 }
349
350 static int
351 cxgb_controller_probe(device_t dev)
352 {
353         const struct adapter_info *ai;
354         char *ports, buf[80];
355         int nports;
356
357         ai = cxgb_get_adapter_info(dev);
358         if (ai == NULL)
359                 return (ENXIO);
360
361         nports = ai->nports0 + ai->nports1;
362         if (nports == 1)
363                 ports = "port";
364         else
365                 ports = "ports";
366
367         snprintf(buf, sizeof(buf), "%s, %d %s", ai->desc, nports, ports);
368         device_set_desc_copy(dev, buf);
369         return (BUS_PROBE_DEFAULT);
370 }
371
372 #define FW_FNAME "cxgb_t3fw"
373 #define TPEEPROM_NAME "cxgb_t3%c_tp_eeprom"
374 #define TPSRAM_NAME "cxgb_t3%c_protocol_sram"
375
376 static int
377 upgrade_fw(adapter_t *sc)
378 {
379         const struct firmware *fw;
380         int status;
381         u32 vers;
382         
383         if ((fw = firmware_get(FW_FNAME)) == NULL)  {
384                 device_printf(sc->dev, "Could not find firmware image %s\n", FW_FNAME);
385                 return (ENOENT);
386         } else
387                 device_printf(sc->dev, "installing firmware on card\n");
388         status = t3_load_fw(sc, (const uint8_t *)fw->data, fw->datasize);
389
390         if (status != 0) {
391                 device_printf(sc->dev, "failed to install firmware: %d\n",
392                     status);
393         } else {
394                 t3_get_fw_version(sc, &vers);
395                 snprintf(&sc->fw_version[0], sizeof(sc->fw_version), "%d.%d.%d",
396                     G_FW_VERSION_MAJOR(vers), G_FW_VERSION_MINOR(vers),
397                     G_FW_VERSION_MICRO(vers));
398         }
399
400         firmware_put(fw, FIRMWARE_UNLOAD);
401
402         return (status);        
403 }
404
405 /*
406  * The cxgb_controller_attach function is responsible for the initial
407  * bringup of the device.  Its responsibilities include:
408  *
409  *  1. Determine if the device supports MSI or MSI-X.
410  *  2. Allocate bus resources so that we can access the Base Address Register
411  *  3. Create and initialize mutexes for the controller and its control
412  *     logic such as SGE and MDIO.
413  *  4. Call hardware specific setup routine for the adapter as a whole.
414  *  5. Allocate the BAR for doing MSI-X.
415  *  6. Setup the line interrupt iff MSI-X is not supported.
416  *  7. Create the driver's taskq.
417  *  8. Start one task queue service thread.
418  *  9. Check if the firmware and SRAM are up-to-date.  They will be
419  *     auto-updated later (before FULL_INIT_DONE), if required.
420  * 10. Create a child device for each MAC (port)
421  * 11. Initialize T3 private state.
422  * 12. Trigger the LED
423  * 13. Setup offload iff supported.
424  * 14. Reset/restart the tick callout.
425  * 15. Attach sysctls
426  *
427  * NOTE: Any modification or deviation from this list MUST be reflected in
428  * the above comment.  Failure to do so will result in problems on various
429  * error conditions including link flapping.
430  */
431 static int
432 cxgb_controller_attach(device_t dev)
433 {
434         device_t child;
435         const struct adapter_info *ai;
436         struct adapter *sc;
437         int i, error = 0;
438         uint32_t vers;
439         int port_qsets = 1;
440         int msi_needed, reg;
441         char buf[80];
442
443         sc = device_get_softc(dev);
444         sc->dev = dev;
445         sc->msi_count = 0;
446         ai = cxgb_get_adapter_info(dev);
447
448         /* find the PCIe link width and set max read request to 4KB*/
449         if (pci_find_extcap(dev, PCIY_EXPRESS, &reg) == 0) {
450                 uint16_t lnk;
451
452                 lnk = pci_read_config(dev, reg + PCIR_EXPRESS_LINK_STA, 2);
453                 sc->link_width = (lnk & PCIM_LINK_STA_WIDTH) >> 4;
454                 if (sc->link_width < 8 &&
455                     (ai->caps & SUPPORTED_10000baseT_Full)) {
456                         device_printf(sc->dev,
457                             "PCIe x%d Link, expect reduced performance\n",
458                             sc->link_width);
459                 }
460
461                 pci_set_max_read_req(dev, 4096);
462         }
463
464         touch_bars(dev);
465         pci_enable_busmaster(dev);
466         /*
467          * Allocate the registers and make them available to the driver.
468          * The registers that we care about for NIC mode are in BAR 0
469          */
470         sc->regs_rid = PCIR_BAR(0);
471         if ((sc->regs_res = bus_alloc_resource_any(dev, SYS_RES_MEMORY,
472             &sc->regs_rid, RF_ACTIVE)) == NULL) {
473                 device_printf(dev, "Cannot allocate BAR region 0\n");
474                 return (ENXIO);
475         }
476
477         snprintf(sc->lockbuf, ADAPTER_LOCK_NAME_LEN, "cxgb controller lock %d",
478             device_get_unit(dev));
479         ADAPTER_LOCK_INIT(sc, sc->lockbuf);
480
481         snprintf(sc->reglockbuf, ADAPTER_LOCK_NAME_LEN, "SGE reg lock %d",
482             device_get_unit(dev));
483         snprintf(sc->mdiolockbuf, ADAPTER_LOCK_NAME_LEN, "cxgb mdio lock %d",
484             device_get_unit(dev));
485         snprintf(sc->elmerlockbuf, ADAPTER_LOCK_NAME_LEN, "cxgb elmer lock %d",
486             device_get_unit(dev));
487         
488         MTX_INIT(&sc->sge.reg_lock, sc->reglockbuf, NULL, MTX_SPIN);
489         MTX_INIT(&sc->mdio_lock, sc->mdiolockbuf, NULL, MTX_DEF);
490         MTX_INIT(&sc->elmer_lock, sc->elmerlockbuf, NULL, MTX_DEF);
491         
492         sc->bt = rman_get_bustag(sc->regs_res);
493         sc->bh = rman_get_bushandle(sc->regs_res);
494         sc->mmio_len = rman_get_size(sc->regs_res);
495
496         for (i = 0; i < MAX_NPORTS; i++)
497                 sc->port[i].adapter = sc;
498
499         if (t3_prep_adapter(sc, ai, 1) < 0) {
500                 printf("prep adapter failed\n");
501                 error = ENODEV;
502                 goto out;
503         }
504
505         sc->udbs_rid = PCIR_BAR(2);
506         sc->udbs_res = NULL;
507         if (is_offload(sc) &&
508             ((sc->udbs_res = bus_alloc_resource_any(dev, SYS_RES_MEMORY,
509                    &sc->udbs_rid, RF_ACTIVE)) == NULL)) {
510                 device_printf(dev, "Cannot allocate BAR region 1\n");
511                 error = ENXIO;
512                 goto out;
513         }
514
515         /* Allocate the BAR for doing MSI-X.  If it succeeds, try to allocate
516          * enough messages for the queue sets.  If that fails, try falling
517          * back to MSI.  If that fails, then try falling back to the legacy
518          * interrupt pin model.
519          */
520         sc->msix_regs_rid = 0x20;
521         if ((msi_allowed >= 2) &&
522             (sc->msix_regs_res = bus_alloc_resource_any(dev, SYS_RES_MEMORY,
523             &sc->msix_regs_rid, RF_ACTIVE)) != NULL) {
524
525                 if (multiq)
526                         port_qsets = min(SGE_QSETS/sc->params.nports, mp_ncpus);
527                 msi_needed = sc->msi_count = sc->params.nports * port_qsets + 1;
528
529                 if (pci_msix_count(dev) == 0 ||
530                     (error = pci_alloc_msix(dev, &sc->msi_count)) != 0 ||
531                     sc->msi_count != msi_needed) {
532                         device_printf(dev, "alloc msix failed - "
533                                       "msi_count=%d, msi_needed=%d, err=%d; "
534                                       "will try MSI\n", sc->msi_count,
535                                       msi_needed, error);
536                         sc->msi_count = 0;
537                         port_qsets = 1;
538                         pci_release_msi(dev);
539                         bus_release_resource(dev, SYS_RES_MEMORY,
540                             sc->msix_regs_rid, sc->msix_regs_res);
541                         sc->msix_regs_res = NULL;
542                 } else {
543                         sc->flags |= USING_MSIX;
544                         sc->cxgb_intr = cxgb_async_intr;
545                         device_printf(dev,
546                                       "using MSI-X interrupts (%u vectors)\n",
547                                       sc->msi_count);
548                 }
549         }
550
551         if ((msi_allowed >= 1) && (sc->msi_count == 0)) {
552                 sc->msi_count = 1;
553                 if ((error = pci_alloc_msi(dev, &sc->msi_count)) != 0) {
554                         device_printf(dev, "alloc msi failed - "
555                                       "err=%d; will try INTx\n", error);
556                         sc->msi_count = 0;
557                         port_qsets = 1;
558                         pci_release_msi(dev);
559                 } else {
560                         sc->flags |= USING_MSI;
561                         sc->cxgb_intr = t3_intr_msi;
562                         device_printf(dev, "using MSI interrupts\n");
563                 }
564         }
565         if (sc->msi_count == 0) {
566                 device_printf(dev, "using line interrupts\n");
567                 sc->cxgb_intr = t3b_intr;
568         }
569
570         /* Create a private taskqueue thread for handling driver events */
571         sc->tq = taskqueue_create("cxgb_taskq", M_NOWAIT,
572             taskqueue_thread_enqueue, &sc->tq);
573         if (sc->tq == NULL) {
574                 device_printf(dev, "failed to allocate controller task queue\n");
575                 goto out;
576         }
577
578         taskqueue_start_threads(&sc->tq, 1, PI_NET, "%s taskq",
579             device_get_nameunit(dev));
580         TASK_INIT(&sc->tick_task, 0, cxgb_tick_handler, sc);
581
582         
583         /* Create a periodic callout for checking adapter status */
584         callout_init(&sc->cxgb_tick_ch, TRUE);
585         
586         if (t3_check_fw_version(sc) < 0 || force_fw_update) {
587                 /*
588                  * Warn user that a firmware update will be attempted in init.
589                  */
590                 device_printf(dev, "firmware needs to be updated to version %d.%d.%d\n",
591                     FW_VERSION_MAJOR, FW_VERSION_MINOR, FW_VERSION_MICRO);
592                 sc->flags &= ~FW_UPTODATE;
593         } else {
594                 sc->flags |= FW_UPTODATE;
595         }
596
597         if (t3_check_tpsram_version(sc) < 0) {
598                 /*
599                  * Warn user that a firmware update will be attempted in init.
600                  */
601                 device_printf(dev, "SRAM needs to be updated to version %c-%d.%d.%d\n",
602                     t3rev2char(sc), TP_VERSION_MAJOR, TP_VERSION_MINOR, TP_VERSION_MICRO);
603                 sc->flags &= ~TPS_UPTODATE;
604         } else {
605                 sc->flags |= TPS_UPTODATE;
606         }
607         
608         /*
609          * Create a child device for each MAC.  The ethernet attachment
610          * will be done in these children.
611          */     
612         for (i = 0; i < (sc)->params.nports; i++) {
613                 struct port_info *pi;
614                 
615                 if ((child = device_add_child(dev, "cxgb", -1)) == NULL) {
616                         device_printf(dev, "failed to add child port\n");
617                         error = EINVAL;
618                         goto out;
619                 }
620                 pi = &sc->port[i];
621                 pi->adapter = sc;
622                 pi->nqsets = port_qsets;
623                 pi->first_qset = i*port_qsets;
624                 pi->port_id = i;
625                 pi->tx_chan = i >= ai->nports0;
626                 pi->txpkt_intf = pi->tx_chan ? 2 * (i - ai->nports0) + 1 : 2 * i;
627                 sc->rxpkt_map[pi->txpkt_intf] = i;
628                 sc->port[i].tx_chan = i >= ai->nports0;
629                 sc->portdev[i] = child;
630                 device_set_softc(child, pi);
631         }
632         if ((error = bus_generic_attach(dev)) != 0)
633                 goto out;
634
635         /* initialize sge private state */
636         t3_sge_init_adapter(sc);
637
638         t3_led_ready(sc);
639         
640         cxgb_offload_init();
641         if (is_offload(sc)) {
642                 setbit(&sc->registered_device_map, OFFLOAD_DEVMAP_BIT);
643                 cxgb_adapter_ofld(sc);
644         }
645         error = t3_get_fw_version(sc, &vers);
646         if (error)
647                 goto out;
648
649         snprintf(&sc->fw_version[0], sizeof(sc->fw_version), "%d.%d.%d",
650             G_FW_VERSION_MAJOR(vers), G_FW_VERSION_MINOR(vers),
651             G_FW_VERSION_MICRO(vers));
652
653         snprintf(buf, sizeof(buf), "%s %sNIC\t E/C: %s S/N: %s",
654                  ai->desc, is_offload(sc) ? "R" : "",
655                  sc->params.vpd.ec, sc->params.vpd.sn);
656         device_set_desc_copy(dev, buf);
657
658         snprintf(&sc->port_types[0], sizeof(sc->port_types), "%x%x%x%x",
659                  sc->params.vpd.port_type[0], sc->params.vpd.port_type[1],
660                  sc->params.vpd.port_type[2], sc->params.vpd.port_type[3]);
661
662         device_printf(sc->dev, "Firmware Version %s\n", &sc->fw_version[0]);
663         callout_reset(&sc->cxgb_tick_ch, hz, cxgb_tick, sc);
664         t3_add_attach_sysctls(sc);
665 out:
666         if (error)
667                 cxgb_free(sc);
668
669         return (error);
670 }
671
672 /*
673  * The cxgb_controller_detach routine is called with the device is
674  * unloaded from the system.
675  */
676
677 static int
678 cxgb_controller_detach(device_t dev)
679 {
680         struct adapter *sc;
681
682         sc = device_get_softc(dev);
683
684         cxgb_free(sc);
685
686         return (0);
687 }
688
689 /*
690  * The cxgb_free() is called by the cxgb_controller_detach() routine
691  * to tear down the structures that were built up in
692  * cxgb_controller_attach(), and should be the final piece of work
693  * done when fully unloading the driver.
694  * 
695  *
696  *  1. Shutting down the threads started by the cxgb_controller_attach()
697  *     routine.
698  *  2. Stopping the lower level device and all callouts (cxgb_down_locked()).
699  *  3. Detaching all of the port devices created during the
700  *     cxgb_controller_attach() routine.
701  *  4. Removing the device children created via cxgb_controller_attach().
702  *  5. Releasing PCI resources associated with the device.
703  *  6. Turning off the offload support, iff it was turned on.
704  *  7. Destroying the mutexes created in cxgb_controller_attach().
705  *
706  */
707 static void
708 cxgb_free(struct adapter *sc)
709 {
710         int i, nqsets = 0;
711
712         ADAPTER_LOCK(sc);
713         sc->flags |= CXGB_SHUTDOWN;
714         ADAPTER_UNLOCK(sc);
715
716         /*
717          * Make sure all child devices are gone.
718          */
719         bus_generic_detach(sc->dev);
720         for (i = 0; i < (sc)->params.nports; i++) {
721                 if (sc->portdev[i] &&
722                     device_delete_child(sc->dev, sc->portdev[i]) != 0)
723                         device_printf(sc->dev, "failed to delete child port\n");
724                 nqsets += sc->port[i].nqsets;
725         }
726
727         /*
728          * At this point, it is as if cxgb_port_detach has run on all ports, and
729          * cxgb_down has run on the adapter.  All interrupts have been silenced,
730          * all open devices have been closed.
731          */
732         KASSERT(sc->open_device_map == 0, ("%s: device(s) still open (%x)",
733                                            __func__, sc->open_device_map));
734         for (i = 0; i < sc->params.nports; i++) {
735                 KASSERT(sc->port[i].ifp == NULL, ("%s: port %i undead!",
736                                                   __func__, i));
737         }
738
739         /*
740          * Finish off the adapter's callouts.
741          */
742         callout_drain(&sc->cxgb_tick_ch);
743         callout_drain(&sc->sge_timer_ch);
744
745         /*
746          * Release resources grabbed under FULL_INIT_DONE by cxgb_up.  The
747          * sysctls are cleaned up by the kernel linker.
748          */
749         if (sc->flags & FULL_INIT_DONE) {
750                 t3_free_sge_resources(sc, nqsets);
751                 sc->flags &= ~FULL_INIT_DONE;
752         }
753
754         /*
755          * Release all interrupt resources.
756          */
757         cxgb_teardown_interrupts(sc);
758         if (sc->flags & (USING_MSI | USING_MSIX)) {
759                 device_printf(sc->dev, "releasing msi message(s)\n");
760                 pci_release_msi(sc->dev);
761         } else {
762                 device_printf(sc->dev, "no msi message to release\n");
763         }
764
765         if (sc->msix_regs_res != NULL) {
766                 bus_release_resource(sc->dev, SYS_RES_MEMORY, sc->msix_regs_rid,
767                     sc->msix_regs_res);
768         }
769
770         /*
771          * Free the adapter's taskqueue.
772          */
773         if (sc->tq != NULL) {
774                 taskqueue_free(sc->tq);
775                 sc->tq = NULL;
776         }
777         
778         if (is_offload(sc)) {
779                 clrbit(&sc->registered_device_map, OFFLOAD_DEVMAP_BIT);
780                 cxgb_adapter_unofld(sc);
781         }
782
783 #ifdef notyet
784         if (sc->flags & CXGB_OFLD_INIT)
785                 cxgb_offload_deactivate(sc);
786 #endif
787         free(sc->filters, M_DEVBUF);
788         t3_sge_free(sc);
789
790         cxgb_offload_exit();
791
792         if (sc->udbs_res != NULL)
793                 bus_release_resource(sc->dev, SYS_RES_MEMORY, sc->udbs_rid,
794                     sc->udbs_res);
795
796         if (sc->regs_res != NULL)
797                 bus_release_resource(sc->dev, SYS_RES_MEMORY, sc->regs_rid,
798                     sc->regs_res);
799
800         MTX_DESTROY(&sc->mdio_lock);
801         MTX_DESTROY(&sc->sge.reg_lock);
802         MTX_DESTROY(&sc->elmer_lock);
803         ADAPTER_LOCK_DEINIT(sc);
804 }
805
806 /**
807  *      setup_sge_qsets - configure SGE Tx/Rx/response queues
808  *      @sc: the controller softc
809  *
810  *      Determines how many sets of SGE queues to use and initializes them.
811  *      We support multiple queue sets per port if we have MSI-X, otherwise
812  *      just one queue set per port.
813  */
814 static int
815 setup_sge_qsets(adapter_t *sc)
816 {
817         int i, j, err, irq_idx = 0, qset_idx = 0;
818         u_int ntxq = SGE_TXQ_PER_SET;
819
820         if ((err = t3_sge_alloc(sc)) != 0) {
821                 device_printf(sc->dev, "t3_sge_alloc returned %d\n", err);
822                 return (err);
823         }
824
825         if (sc->params.rev > 0 && !(sc->flags & USING_MSI))
826                 irq_idx = -1;
827
828         for (i = 0; i < (sc)->params.nports; i++) {
829                 struct port_info *pi = &sc->port[i];
830
831                 for (j = 0; j < pi->nqsets; j++, qset_idx++) {
832                         err = t3_sge_alloc_qset(sc, qset_idx, (sc)->params.nports,
833                             (sc->flags & USING_MSIX) ? qset_idx + 1 : irq_idx,
834                             &sc->params.sge.qset[qset_idx], ntxq, pi);
835                         if (err) {
836                                 t3_free_sge_resources(sc, qset_idx);
837                                 device_printf(sc->dev,
838                                     "t3_sge_alloc_qset failed with %d\n", err);
839                                 return (err);
840                         }
841                 }
842         }
843
844         return (0);
845 }
846
847 static void
848 cxgb_teardown_interrupts(adapter_t *sc)
849 {
850         int i;
851
852         for (i = 0; i < SGE_QSETS; i++) {
853                 if (sc->msix_intr_tag[i] == NULL) {
854
855                         /* Should have been setup fully or not at all */
856                         KASSERT(sc->msix_irq_res[i] == NULL &&
857                                 sc->msix_irq_rid[i] == 0,
858                                 ("%s: half-done interrupt (%d).", __func__, i));
859
860                         continue;
861                 }
862
863                 bus_teardown_intr(sc->dev, sc->msix_irq_res[i],
864                                   sc->msix_intr_tag[i]);
865                 bus_release_resource(sc->dev, SYS_RES_IRQ, sc->msix_irq_rid[i],
866                                      sc->msix_irq_res[i]);
867
868                 sc->msix_irq_res[i] = sc->msix_intr_tag[i] = NULL;
869                 sc->msix_irq_rid[i] = 0;
870         }
871
872         if (sc->intr_tag) {
873                 KASSERT(sc->irq_res != NULL,
874                         ("%s: half-done interrupt.", __func__));
875
876                 bus_teardown_intr(sc->dev, sc->irq_res, sc->intr_tag);
877                 bus_release_resource(sc->dev, SYS_RES_IRQ, sc->irq_rid,
878                                      sc->irq_res);
879
880                 sc->irq_res = sc->intr_tag = NULL;
881                 sc->irq_rid = 0;
882         }
883 }
884
885 static int
886 cxgb_setup_interrupts(adapter_t *sc)
887 {
888         struct resource *res;
889         void *tag;
890         int i, rid, err, intr_flag = sc->flags & (USING_MSI | USING_MSIX);
891
892         sc->irq_rid = intr_flag ? 1 : 0;
893         sc->irq_res = bus_alloc_resource_any(sc->dev, SYS_RES_IRQ, &sc->irq_rid,
894                                              RF_SHAREABLE | RF_ACTIVE);
895         if (sc->irq_res == NULL) {
896                 device_printf(sc->dev, "Cannot allocate interrupt (%x, %u)\n",
897                               intr_flag, sc->irq_rid);
898                 err = EINVAL;
899                 sc->irq_rid = 0;
900         } else {
901                 err = bus_setup_intr(sc->dev, sc->irq_res,
902                     INTR_MPSAFE | INTR_TYPE_NET, NULL,
903                     sc->cxgb_intr, sc, &sc->intr_tag);
904
905                 if (err) {
906                         device_printf(sc->dev,
907                                       "Cannot set up interrupt (%x, %u, %d)\n",
908                                       intr_flag, sc->irq_rid, err);
909                         bus_release_resource(sc->dev, SYS_RES_IRQ, sc->irq_rid,
910                                              sc->irq_res);
911                         sc->irq_res = sc->intr_tag = NULL;
912                         sc->irq_rid = 0;
913                 }
914         }
915
916         /* That's all for INTx or MSI */
917         if (!(intr_flag & USING_MSIX) || err)
918                 return (err);
919
920         for (i = 0; i < sc->msi_count - 1; i++) {
921                 rid = i + 2;
922                 res = bus_alloc_resource_any(sc->dev, SYS_RES_IRQ, &rid,
923                                              RF_SHAREABLE | RF_ACTIVE);
924                 if (res == NULL) {
925                         device_printf(sc->dev, "Cannot allocate interrupt "
926                                       "for message %d\n", rid);
927                         err = EINVAL;
928                         break;
929                 }
930
931                 err = bus_setup_intr(sc->dev, res, INTR_MPSAFE | INTR_TYPE_NET,
932                                      NULL, t3_intr_msix, &sc->sge.qs[i], &tag);
933                 if (err) {
934                         device_printf(sc->dev, "Cannot set up interrupt "
935                                       "for message %d (%d)\n", rid, err);
936                         bus_release_resource(sc->dev, SYS_RES_IRQ, rid, res);
937                         break;
938                 }
939
940                 sc->msix_irq_rid[i] = rid;
941                 sc->msix_irq_res[i] = res;
942                 sc->msix_intr_tag[i] = tag;
943         }
944
945         if (err)
946                 cxgb_teardown_interrupts(sc);
947
948         return (err);
949 }
950
951
952 static int
953 cxgb_port_probe(device_t dev)
954 {
955         struct port_info *p;
956         char buf[80];
957         const char *desc;
958         
959         p = device_get_softc(dev);
960         desc = p->phy.desc;
961         snprintf(buf, sizeof(buf), "Port %d %s", p->port_id, desc);
962         device_set_desc_copy(dev, buf);
963         return (0);
964 }
965
966
967 static int
968 cxgb_makedev(struct port_info *pi)
969 {
970         
971         pi->port_cdev = make_dev(&cxgb_cdevsw, pi->ifp->if_dunit,
972             UID_ROOT, GID_WHEEL, 0600, "%s", if_name(pi->ifp));
973         
974         if (pi->port_cdev == NULL)
975                 return (ENOMEM);
976
977         pi->port_cdev->si_drv1 = (void *)pi;
978         
979         return (0);
980 }
981
982 #define CXGB_CAP (IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_MTU | IFCAP_HWCSUM | \
983     IFCAP_VLAN_HWCSUM | IFCAP_TSO | IFCAP_JUMBO_MTU | IFCAP_LRO | \
984     IFCAP_VLAN_HWTSO | IFCAP_LINKSTATE)
985 #define CXGB_CAP_ENABLE (CXGB_CAP & ~IFCAP_TSO6)
986
987 static int
988 cxgb_port_attach(device_t dev)
989 {
990         struct port_info *p;
991         struct ifnet *ifp;
992         int err;
993         struct adapter *sc;
994
995         p = device_get_softc(dev);
996         sc = p->adapter;
997         snprintf(p->lockbuf, PORT_NAME_LEN, "cxgb port lock %d:%d",
998             device_get_unit(device_get_parent(dev)), p->port_id);
999         PORT_LOCK_INIT(p, p->lockbuf);
1000
1001         callout_init(&p->link_check_ch, CALLOUT_MPSAFE);
1002         TASK_INIT(&p->link_check_task, 0, check_link_status, p);
1003
1004         /* Allocate an ifnet object and set it up */
1005         ifp = p->ifp = if_alloc(IFT_ETHER);
1006         if (ifp == NULL) {
1007                 device_printf(dev, "Cannot allocate ifnet\n");
1008                 return (ENOMEM);
1009         }
1010         
1011         if_initname(ifp, device_get_name(dev), device_get_unit(dev));
1012         ifp->if_init = cxgb_init;
1013         ifp->if_softc = p;
1014         ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
1015         ifp->if_ioctl = cxgb_ioctl;
1016         ifp->if_transmit = cxgb_transmit;
1017         ifp->if_qflush = cxgb_qflush;
1018
1019         ifp->if_capabilities = CXGB_CAP;
1020         ifp->if_capenable = CXGB_CAP_ENABLE;
1021         ifp->if_hwassist = CSUM_TCP | CSUM_UDP | CSUM_IP | CSUM_TSO;
1022
1023         /*
1024          * Disable TSO on 4-port - it isn't supported by the firmware.
1025          */     
1026         if (sc->params.nports > 2) {
1027                 ifp->if_capabilities &= ~(IFCAP_TSO | IFCAP_VLAN_HWTSO);
1028                 ifp->if_capenable &= ~(IFCAP_TSO | IFCAP_VLAN_HWTSO);
1029                 ifp->if_hwassist &= ~CSUM_TSO;
1030         }
1031
1032         ether_ifattach(ifp, p->hw_addr);
1033
1034 #ifdef DEFAULT_JUMBO
1035         if (sc->params.nports <= 2)
1036                 ifp->if_mtu = ETHERMTU_JUMBO;
1037 #endif
1038         if ((err = cxgb_makedev(p)) != 0) {
1039                 printf("makedev failed %d\n", err);
1040                 return (err);
1041         }
1042
1043         /* Create a list of media supported by this port */
1044         ifmedia_init(&p->media, IFM_IMASK, cxgb_media_change,
1045             cxgb_media_status);
1046         cxgb_build_medialist(p);
1047       
1048         t3_sge_init_port(p);
1049
1050         return (err);
1051 }
1052
1053 /*
1054  * cxgb_port_detach() is called via the device_detach methods when
1055  * cxgb_free() calls the bus_generic_detach.  It is responsible for 
1056  * removing the device from the view of the kernel, i.e. from all 
1057  * interfaces lists etc.  This routine is only called when the driver is 
1058  * being unloaded, not when the link goes down.
1059  */
1060 static int
1061 cxgb_port_detach(device_t dev)
1062 {
1063         struct port_info *p;
1064         struct adapter *sc;
1065         int i;
1066
1067         p = device_get_softc(dev);
1068         sc = p->adapter;
1069
1070         /* Tell cxgb_ioctl and if_init that the port is going away */
1071         ADAPTER_LOCK(sc);
1072         SET_DOOMED(p);
1073         wakeup(&sc->flags);
1074         while (IS_BUSY(sc))
1075                 mtx_sleep(&sc->flags, &sc->lock, 0, "cxgbdtch", 0);
1076         SET_BUSY(sc);
1077         ADAPTER_UNLOCK(sc);
1078
1079         if (p->port_cdev != NULL)
1080                 destroy_dev(p->port_cdev);
1081
1082         cxgb_uninit_synchronized(p);
1083         ether_ifdetach(p->ifp);
1084
1085         for (i = p->first_qset; i < p->first_qset + p->nqsets; i++) {
1086                 struct sge_qset *qs = &sc->sge.qs[i];
1087                 struct sge_txq *txq = &qs->txq[TXQ_ETH];
1088
1089                 callout_drain(&txq->txq_watchdog);
1090                 callout_drain(&txq->txq_timer);
1091         }
1092
1093         PORT_LOCK_DEINIT(p);
1094         if_free(p->ifp);
1095         p->ifp = NULL;
1096
1097         ADAPTER_LOCK(sc);
1098         CLR_BUSY(sc);
1099         wakeup_one(&sc->flags);
1100         ADAPTER_UNLOCK(sc);
1101         return (0);
1102 }
1103
1104 void
1105 t3_fatal_err(struct adapter *sc)
1106 {
1107         u_int fw_status[4];
1108
1109         if (sc->flags & FULL_INIT_DONE) {
1110                 t3_sge_stop(sc);
1111                 t3_write_reg(sc, A_XGM_TX_CTRL, 0);
1112                 t3_write_reg(sc, A_XGM_RX_CTRL, 0);
1113                 t3_write_reg(sc, XGM_REG(A_XGM_TX_CTRL, 1), 0);
1114                 t3_write_reg(sc, XGM_REG(A_XGM_RX_CTRL, 1), 0);
1115                 t3_intr_disable(sc);
1116         }
1117         device_printf(sc->dev,"encountered fatal error, operation suspended\n");
1118         if (!t3_cim_ctl_blk_read(sc, 0xa0, 4, fw_status))
1119                 device_printf(sc->dev, "FW_ status: 0x%x, 0x%x, 0x%x, 0x%x\n",
1120                     fw_status[0], fw_status[1], fw_status[2], fw_status[3]);
1121 }
1122
1123 int
1124 t3_os_find_pci_capability(adapter_t *sc, int cap)
1125 {
1126         device_t dev;
1127         struct pci_devinfo *dinfo;
1128         pcicfgregs *cfg;
1129         uint32_t status;
1130         uint8_t ptr;
1131
1132         dev = sc->dev;
1133         dinfo = device_get_ivars(dev);
1134         cfg = &dinfo->cfg;
1135
1136         status = pci_read_config(dev, PCIR_STATUS, 2);
1137         if (!(status & PCIM_STATUS_CAPPRESENT))
1138                 return (0);
1139
1140         switch (cfg->hdrtype & PCIM_HDRTYPE) {
1141         case 0:
1142         case 1:
1143                 ptr = PCIR_CAP_PTR;
1144                 break;
1145         case 2:
1146                 ptr = PCIR_CAP_PTR_2;
1147                 break;
1148         default:
1149                 return (0);
1150                 break;
1151         }
1152         ptr = pci_read_config(dev, ptr, 1);
1153
1154         while (ptr != 0) {
1155                 if (pci_read_config(dev, ptr + PCICAP_ID, 1) == cap)
1156                         return (ptr);
1157                 ptr = pci_read_config(dev, ptr + PCICAP_NEXTPTR, 1);
1158         }
1159
1160         return (0);
1161 }
1162
1163 int
1164 t3_os_pci_save_state(struct adapter *sc)
1165 {
1166         device_t dev;
1167         struct pci_devinfo *dinfo;
1168
1169         dev = sc->dev;
1170         dinfo = device_get_ivars(dev);
1171
1172         pci_cfg_save(dev, dinfo, 0);
1173         return (0);
1174 }
1175
1176 int
1177 t3_os_pci_restore_state(struct adapter *sc)
1178 {
1179         device_t dev;
1180         struct pci_devinfo *dinfo;
1181
1182         dev = sc->dev;
1183         dinfo = device_get_ivars(dev);
1184
1185         pci_cfg_restore(dev, dinfo);
1186         return (0);
1187 }
1188
1189 /**
1190  *      t3_os_link_changed - handle link status changes
1191  *      @sc: the adapter associated with the link change
1192  *      @port_id: the port index whose link status has changed
1193  *      @link_status: the new status of the link
1194  *      @speed: the new speed setting
1195  *      @duplex: the new duplex setting
1196  *      @fc: the new flow-control setting
1197  *
1198  *      This is the OS-dependent handler for link status changes.  The OS
1199  *      neutral handler takes care of most of the processing for these events,
1200  *      then calls this handler for any OS-specific processing.
1201  */
1202 void
1203 t3_os_link_changed(adapter_t *adapter, int port_id, int link_status, int speed,
1204      int duplex, int fc, int mac_was_reset)
1205 {
1206         struct port_info *pi = &adapter->port[port_id];
1207         struct ifnet *ifp = pi->ifp;
1208
1209         /* no race with detach, so ifp should always be good */
1210         KASSERT(ifp, ("%s: if detached.", __func__));
1211
1212         /* Reapply mac settings if they were lost due to a reset */
1213         if (mac_was_reset) {
1214                 PORT_LOCK(pi);
1215                 cxgb_update_mac_settings(pi);
1216                 PORT_UNLOCK(pi);
1217         }
1218
1219         if (link_status) {
1220                 ifp->if_baudrate = IF_Mbps(speed);
1221                 if_link_state_change(ifp, LINK_STATE_UP);
1222         } else
1223                 if_link_state_change(ifp, LINK_STATE_DOWN);
1224 }
1225
1226 /**
1227  *      t3_os_phymod_changed - handle PHY module changes
1228  *      @phy: the PHY reporting the module change
1229  *      @mod_type: new module type
1230  *
1231  *      This is the OS-dependent handler for PHY module changes.  It is
1232  *      invoked when a PHY module is removed or inserted for any OS-specific
1233  *      processing.
1234  */
1235 void t3_os_phymod_changed(struct adapter *adap, int port_id)
1236 {
1237         static const char *mod_str[] = {
1238                 NULL, "SR", "LR", "LRM", "TWINAX", "TWINAX-L", "unknown"
1239         };
1240         struct port_info *pi = &adap->port[port_id];
1241         int mod = pi->phy.modtype;
1242
1243         if (mod != pi->media.ifm_cur->ifm_data)
1244                 cxgb_build_medialist(pi);
1245
1246         if (mod == phy_modtype_none)
1247                 if_printf(pi->ifp, "PHY module unplugged\n");
1248         else {
1249                 KASSERT(mod < ARRAY_SIZE(mod_str),
1250                         ("invalid PHY module type %d", mod));
1251                 if_printf(pi->ifp, "%s PHY module inserted\n", mod_str[mod]);
1252         }
1253 }
1254
1255 void
1256 t3_os_set_hw_addr(adapter_t *adapter, int port_idx, u8 hw_addr[])
1257 {
1258
1259         /*
1260          * The ifnet might not be allocated before this gets called,
1261          * as this is called early on in attach by t3_prep_adapter
1262          * save the address off in the port structure
1263          */
1264         if (cxgb_debug)
1265                 printf("set_hw_addr on idx %d addr %6D\n", port_idx, hw_addr, ":");
1266         bcopy(hw_addr, adapter->port[port_idx].hw_addr, ETHER_ADDR_LEN);
1267 }
1268
1269 /*
1270  * Programs the XGMAC based on the settings in the ifnet.  These settings
1271  * include MTU, MAC address, mcast addresses, etc.
1272  */
1273 static void
1274 cxgb_update_mac_settings(struct port_info *p)
1275 {
1276         struct ifnet *ifp = p->ifp;
1277         struct t3_rx_mode rm;
1278         struct cmac *mac = &p->mac;
1279         int mtu, hwtagging;
1280
1281         PORT_LOCK_ASSERT_OWNED(p);
1282
1283         bcopy(IF_LLADDR(ifp), p->hw_addr, ETHER_ADDR_LEN);
1284
1285         mtu = ifp->if_mtu;
1286         if (ifp->if_capenable & IFCAP_VLAN_MTU)
1287                 mtu += ETHER_VLAN_ENCAP_LEN;
1288
1289         hwtagging = (ifp->if_capenable & IFCAP_VLAN_HWTAGGING) != 0;
1290
1291         t3_mac_set_mtu(mac, mtu);
1292         t3_set_vlan_accel(p->adapter, 1 << p->tx_chan, hwtagging);
1293         t3_mac_set_address(mac, 0, p->hw_addr);
1294         t3_init_rx_mode(&rm, p);
1295         t3_mac_set_rx_mode(mac, &rm);
1296 }
1297
1298
1299 static int
1300 await_mgmt_replies(struct adapter *adap, unsigned long init_cnt,
1301                               unsigned long n)
1302 {
1303         int attempts = 5;
1304
1305         while (adap->sge.qs[0].rspq.offload_pkts < init_cnt + n) {
1306                 if (!--attempts)
1307                         return (ETIMEDOUT);
1308                 t3_os_sleep(10);
1309         }
1310         return 0;
1311 }
1312
1313 static int
1314 init_tp_parity(struct adapter *adap)
1315 {
1316         int i;
1317         struct mbuf *m;
1318         struct cpl_set_tcb_field *greq;
1319         unsigned long cnt = adap->sge.qs[0].rspq.offload_pkts;
1320
1321         t3_tp_set_offload_mode(adap, 1);
1322
1323         for (i = 0; i < 16; i++) {
1324                 struct cpl_smt_write_req *req;
1325
1326                 m = m_gethdr(M_WAITOK, MT_DATA);
1327                 req = mtod(m, struct cpl_smt_write_req *);
1328                 m->m_len = m->m_pkthdr.len = sizeof(*req);
1329                 memset(req, 0, sizeof(*req));
1330                 req->wr.wrh_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD));
1331                 OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_SMT_WRITE_REQ, i));
1332                 req->iff = i;
1333                 t3_mgmt_tx(adap, m);
1334         }
1335
1336         for (i = 0; i < 2048; i++) {
1337                 struct cpl_l2t_write_req *req;
1338
1339                 m = m_gethdr(M_WAITOK, MT_DATA);
1340                 req = mtod(m, struct cpl_l2t_write_req *);
1341                 m->m_len = m->m_pkthdr.len = sizeof(*req);
1342                 memset(req, 0, sizeof(*req));
1343                 req->wr.wrh_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD));
1344                 OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_L2T_WRITE_REQ, i));
1345                 req->params = htonl(V_L2T_W_IDX(i));
1346                 t3_mgmt_tx(adap, m);
1347         }
1348
1349         for (i = 0; i < 2048; i++) {
1350                 struct cpl_rte_write_req *req;
1351
1352                 m = m_gethdr(M_WAITOK, MT_DATA);
1353                 req = mtod(m, struct cpl_rte_write_req *);
1354                 m->m_len = m->m_pkthdr.len = sizeof(*req);
1355                 memset(req, 0, sizeof(*req));
1356                 req->wr.wrh_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD));
1357                 OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_RTE_WRITE_REQ, i));
1358                 req->l2t_idx = htonl(V_L2T_W_IDX(i));
1359                 t3_mgmt_tx(adap, m);
1360         }
1361
1362         m = m_gethdr(M_WAITOK, MT_DATA);
1363         greq = mtod(m, struct cpl_set_tcb_field *);
1364         m->m_len = m->m_pkthdr.len = sizeof(*greq);
1365         memset(greq, 0, sizeof(*greq));
1366         greq->wr.wrh_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD));
1367         OPCODE_TID(greq) = htonl(MK_OPCODE_TID(CPL_SET_TCB_FIELD, 0));
1368         greq->mask = htobe64(1);
1369         t3_mgmt_tx(adap, m);
1370
1371         i = await_mgmt_replies(adap, cnt, 16 + 2048 + 2048 + 1);
1372         t3_tp_set_offload_mode(adap, 0);
1373         return (i);
1374 }
1375
1376 /**
1377  *      setup_rss - configure Receive Side Steering (per-queue connection demux) 
1378  *      @adap: the adapter
1379  *
1380  *      Sets up RSS to distribute packets to multiple receive queues.  We
1381  *      configure the RSS CPU lookup table to distribute to the number of HW
1382  *      receive queues, and the response queue lookup table to narrow that
1383  *      down to the response queues actually configured for each port.
1384  *      We always configure the RSS mapping for two ports since the mapping
1385  *      table has plenty of entries.
1386  */
1387 static void
1388 setup_rss(adapter_t *adap)
1389 {
1390         int i;
1391         u_int nq[2]; 
1392         uint8_t cpus[SGE_QSETS + 1];
1393         uint16_t rspq_map[RSS_TABLE_SIZE];
1394         
1395         for (i = 0; i < SGE_QSETS; ++i)
1396                 cpus[i] = i;
1397         cpus[SGE_QSETS] = 0xff;
1398
1399         nq[0] = nq[1] = 0;
1400         for_each_port(adap, i) {
1401                 const struct port_info *pi = adap2pinfo(adap, i);
1402
1403                 nq[pi->tx_chan] += pi->nqsets;
1404         }
1405         for (i = 0; i < RSS_TABLE_SIZE / 2; ++i) {
1406                 rspq_map[i] = nq[0] ? i % nq[0] : 0;
1407                 rspq_map[i + RSS_TABLE_SIZE / 2] = nq[1] ? i % nq[1] + nq[0] : 0;
1408         }
1409
1410         /* Calculate the reverse RSS map table */
1411         for (i = 0; i < SGE_QSETS; ++i)
1412                 adap->rrss_map[i] = 0xff;
1413         for (i = 0; i < RSS_TABLE_SIZE; ++i)
1414                 if (adap->rrss_map[rspq_map[i]] == 0xff)
1415                         adap->rrss_map[rspq_map[i]] = i;
1416
1417         t3_config_rss(adap, F_RQFEEDBACKENABLE | F_TNLLKPEN | F_TNLMAPEN |
1418                       F_TNLPRTEN | F_TNL2TUPEN | F_TNL4TUPEN | F_OFDMAPEN |
1419                       F_RRCPLMAPEN | V_RRCPLCPUSIZE(6) | F_HASHTOEPLITZ,
1420                       cpus, rspq_map);
1421
1422 }
1423
1424 /*
1425  * Sends an mbuf to an offload queue driver
1426  * after dealing with any active network taps.
1427  */
1428 static inline int
1429 offload_tx(struct t3cdev *tdev, struct mbuf *m)
1430 {
1431         int ret;
1432
1433         ret = t3_offload_tx(tdev, m);
1434         return (ret);
1435 }
1436
1437 static int
1438 write_smt_entry(struct adapter *adapter, int idx)
1439 {
1440         struct port_info *pi = &adapter->port[idx];
1441         struct cpl_smt_write_req *req;
1442         struct mbuf *m;
1443
1444         if ((m = m_gethdr(M_NOWAIT, MT_DATA)) == NULL)
1445                 return (ENOMEM);
1446
1447         req = mtod(m, struct cpl_smt_write_req *);
1448         m->m_pkthdr.len = m->m_len = sizeof(struct cpl_smt_write_req);
1449         
1450         req->wr.wrh_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD));
1451         OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_SMT_WRITE_REQ, idx));
1452         req->mtu_idx = NMTUS - 1;  /* should be 0 but there's a T3 bug */
1453         req->iff = idx;
1454         memset(req->src_mac1, 0, sizeof(req->src_mac1));
1455         memcpy(req->src_mac0, pi->hw_addr, ETHER_ADDR_LEN);
1456
1457         m_set_priority(m, 1);
1458
1459         offload_tx(&adapter->tdev, m);
1460
1461         return (0);
1462 }
1463
1464 static int
1465 init_smt(struct adapter *adapter)
1466 {
1467         int i;
1468
1469         for_each_port(adapter, i)
1470                 write_smt_entry(adapter, i);
1471         return 0;
1472 }
1473
1474 static void
1475 init_port_mtus(adapter_t *adapter)
1476 {
1477         unsigned int mtus = ETHERMTU | (ETHERMTU << 16);
1478
1479         t3_write_reg(adapter, A_TP_MTU_PORT_TABLE, mtus);
1480 }
1481
1482 static void
1483 send_pktsched_cmd(struct adapter *adap, int sched, int qidx, int lo,
1484                               int hi, int port)
1485 {
1486         struct mbuf *m;
1487         struct mngt_pktsched_wr *req;
1488
1489         m = m_gethdr(M_DONTWAIT, MT_DATA);
1490         if (m) {        
1491                 req = mtod(m, struct mngt_pktsched_wr *);
1492                 req->wr.wrh_hi = htonl(V_WR_OP(FW_WROPCODE_MNGT));
1493                 req->mngt_opcode = FW_MNGTOPCODE_PKTSCHED_SET;
1494                 req->sched = sched;
1495                 req->idx = qidx;
1496                 req->min = lo;
1497                 req->max = hi;
1498                 req->binding = port;
1499                 m->m_len = m->m_pkthdr.len = sizeof(*req);
1500                 t3_mgmt_tx(adap, m);
1501         }
1502 }
1503
1504 static void
1505 bind_qsets(adapter_t *sc)
1506 {
1507         int i, j;
1508
1509         for (i = 0; i < (sc)->params.nports; ++i) {
1510                 const struct port_info *pi = adap2pinfo(sc, i);
1511
1512                 for (j = 0; j < pi->nqsets; ++j) {
1513                         send_pktsched_cmd(sc, 1, pi->first_qset + j, -1,
1514                                           -1, pi->tx_chan);
1515
1516                 }
1517         }
1518 }
1519
1520 static void
1521 update_tpeeprom(struct adapter *adap)
1522 {
1523         const struct firmware *tpeeprom;
1524
1525         uint32_t version;
1526         unsigned int major, minor;
1527         int ret, len;
1528         char rev, name[32];
1529
1530         t3_seeprom_read(adap, TP_SRAM_OFFSET, &version);
1531
1532         major = G_TP_VERSION_MAJOR(version);
1533         minor = G_TP_VERSION_MINOR(version);
1534         if (major == TP_VERSION_MAJOR  && minor == TP_VERSION_MINOR)
1535                 return; 
1536
1537         rev = t3rev2char(adap);
1538         snprintf(name, sizeof(name), TPEEPROM_NAME, rev);
1539
1540         tpeeprom = firmware_get(name);
1541         if (tpeeprom == NULL) {
1542                 device_printf(adap->dev,
1543                               "could not load TP EEPROM: unable to load %s\n",
1544                               name);
1545                 return;
1546         }
1547
1548         len = tpeeprom->datasize - 4;
1549         
1550         ret = t3_check_tpsram(adap, tpeeprom->data, tpeeprom->datasize);
1551         if (ret)
1552                 goto release_tpeeprom;
1553
1554         if (len != TP_SRAM_LEN) {
1555                 device_printf(adap->dev,
1556                               "%s length is wrong len=%d expected=%d\n", name,
1557                               len, TP_SRAM_LEN);
1558                 return;
1559         }
1560         
1561         ret = set_eeprom(&adap->port[0], tpeeprom->data, tpeeprom->datasize,
1562             TP_SRAM_OFFSET);
1563         
1564         if (!ret) {
1565                 device_printf(adap->dev,
1566                         "Protocol SRAM image updated in EEPROM to %d.%d.%d\n",
1567                          TP_VERSION_MAJOR, TP_VERSION_MINOR, TP_VERSION_MICRO);
1568         } else 
1569                 device_printf(adap->dev,
1570                               "Protocol SRAM image update in EEPROM failed\n");
1571
1572 release_tpeeprom:
1573         firmware_put(tpeeprom, FIRMWARE_UNLOAD);
1574         
1575         return;
1576 }
1577
1578 static int
1579 update_tpsram(struct adapter *adap)
1580 {
1581         const struct firmware *tpsram;
1582         int ret;
1583         char rev, name[32];
1584
1585         rev = t3rev2char(adap);
1586         snprintf(name, sizeof(name), TPSRAM_NAME, rev);
1587
1588         update_tpeeprom(adap);
1589
1590         tpsram = firmware_get(name);
1591         if (tpsram == NULL){
1592                 device_printf(adap->dev, "could not load TP SRAM\n");
1593                 return (EINVAL);
1594         } else
1595                 device_printf(adap->dev, "updating TP SRAM\n");
1596         
1597         ret = t3_check_tpsram(adap, tpsram->data, tpsram->datasize);
1598         if (ret)
1599                 goto release_tpsram;    
1600
1601         ret = t3_set_proto_sram(adap, tpsram->data);
1602         if (ret)
1603                 device_printf(adap->dev, "loading protocol SRAM failed\n");
1604
1605 release_tpsram:
1606         firmware_put(tpsram, FIRMWARE_UNLOAD);
1607         
1608         return ret;
1609 }
1610
1611 /**
1612  *      cxgb_up - enable the adapter
1613  *      @adap: adapter being enabled
1614  *
1615  *      Called when the first port is enabled, this function performs the
1616  *      actions necessary to make an adapter operational, such as completing
1617  *      the initialization of HW modules, and enabling interrupts.
1618  */
1619 static int
1620 cxgb_up(struct adapter *sc)
1621 {
1622         int err = 0;
1623         unsigned int mxf = t3_mc5_size(&sc->mc5) - MC5_MIN_TIDS;
1624
1625         KASSERT(sc->open_device_map == 0, ("%s: device(s) already open (%x)",
1626                                            __func__, sc->open_device_map));
1627
1628         if ((sc->flags & FULL_INIT_DONE) == 0) {
1629
1630                 ADAPTER_LOCK_ASSERT_NOTOWNED(sc);
1631
1632                 if ((sc->flags & FW_UPTODATE) == 0)
1633                         if ((err = upgrade_fw(sc)))
1634                                 goto out;
1635
1636                 if ((sc->flags & TPS_UPTODATE) == 0)
1637                         if ((err = update_tpsram(sc)))
1638                                 goto out;
1639
1640                 if (is_offload(sc) && nfilters != 0) {
1641                         sc->params.mc5.nservers = 0;
1642
1643                         if (nfilters < 0)
1644                                 sc->params.mc5.nfilters = mxf;
1645                         else
1646                                 sc->params.mc5.nfilters = min(nfilters, mxf);
1647                 }
1648
1649                 err = t3_init_hw(sc, 0);
1650                 if (err)
1651                         goto out;
1652
1653                 t3_set_reg_field(sc, A_TP_PARA_REG5, 0, F_RXDDPOFFINIT);
1654                 t3_write_reg(sc, A_ULPRX_TDDP_PSZ, V_HPZ0(PAGE_SHIFT - 12));
1655
1656                 err = setup_sge_qsets(sc);
1657                 if (err)
1658                         goto out;
1659
1660                 alloc_filters(sc);
1661                 setup_rss(sc);
1662
1663                 t3_intr_clear(sc);
1664                 err = cxgb_setup_interrupts(sc);
1665                 if (err)
1666                         goto out;
1667
1668                 t3_add_configured_sysctls(sc);
1669                 sc->flags |= FULL_INIT_DONE;
1670         }
1671
1672         t3_intr_clear(sc);
1673         t3_sge_start(sc);
1674         t3_intr_enable(sc);
1675
1676         if (sc->params.rev >= T3_REV_C && !(sc->flags & TP_PARITY_INIT) &&
1677             is_offload(sc) && init_tp_parity(sc) == 0)
1678                 sc->flags |= TP_PARITY_INIT;
1679
1680         if (sc->flags & TP_PARITY_INIT) {
1681                 t3_write_reg(sc, A_TP_INT_CAUSE, F_CMCACHEPERR | F_ARPLUTPERR);
1682                 t3_write_reg(sc, A_TP_INT_ENABLE, 0x7fbfffff);
1683         }
1684         
1685         if (!(sc->flags & QUEUES_BOUND)) {
1686                 bind_qsets(sc);
1687                 setup_hw_filters(sc);
1688                 sc->flags |= QUEUES_BOUND;              
1689         }
1690
1691         t3_sge_reset_adapter(sc);
1692 out:
1693         return (err);
1694 }
1695
1696 /*
1697  * Called when the last open device is closed.  Does NOT undo all of cxgb_up's
1698  * work.  Specifically, the resources grabbed under FULL_INIT_DONE are released
1699  * during controller_detach, not here.
1700  */
1701 static void
1702 cxgb_down(struct adapter *sc)
1703 {
1704         t3_sge_stop(sc);
1705         t3_intr_disable(sc);
1706 }
1707
1708 static int
1709 offload_open(struct port_info *pi)
1710 {
1711         struct adapter *sc = pi->adapter;
1712         struct t3cdev *tdev = &sc->tdev;
1713
1714         setbit(&sc->open_device_map, OFFLOAD_DEVMAP_BIT);
1715
1716         t3_tp_set_offload_mode(sc, 1);
1717         tdev->lldev = pi->ifp;
1718         init_port_mtus(sc);
1719         t3_load_mtus(sc, sc->params.mtus, sc->params.a_wnd, sc->params.b_wnd,
1720                      sc->params.rev == 0 ?  sc->port[0].ifp->if_mtu : 0xffff);
1721         init_smt(sc);
1722         cxgb_add_clients(tdev);
1723
1724         return (0);
1725 }
1726
1727 static int
1728 offload_close(struct t3cdev *tdev)
1729 {
1730         struct adapter *adapter = tdev2adap(tdev);
1731
1732         if (!isset(&adapter->open_device_map, OFFLOAD_DEVMAP_BIT))
1733                 return (0);
1734
1735         /* Call back all registered clients */
1736         cxgb_remove_clients(tdev);
1737
1738         tdev->lldev = NULL;
1739         cxgb_set_dummy_ops(tdev);
1740         t3_tp_set_offload_mode(adapter, 0);
1741
1742         clrbit(&adapter->open_device_map, OFFLOAD_DEVMAP_BIT);
1743
1744         return (0);
1745 }
1746
1747 /*
1748  * if_init for cxgb ports.
1749  */
1750 static void
1751 cxgb_init(void *arg)
1752 {
1753         struct port_info *p = arg;
1754         struct adapter *sc = p->adapter;
1755
1756         ADAPTER_LOCK(sc);
1757         cxgb_init_locked(p); /* releases adapter lock */
1758         ADAPTER_LOCK_ASSERT_NOTOWNED(sc);
1759 }
1760
1761 static int
1762 cxgb_init_locked(struct port_info *p)
1763 {
1764         struct adapter *sc = p->adapter;
1765         struct ifnet *ifp = p->ifp;
1766         struct cmac *mac = &p->mac;
1767         int i, rc = 0, may_sleep = 0, gave_up_lock = 0;
1768
1769         ADAPTER_LOCK_ASSERT_OWNED(sc);
1770
1771         while (!IS_DOOMED(p) && IS_BUSY(sc)) {
1772                 gave_up_lock = 1;
1773                 if (mtx_sleep(&sc->flags, &sc->lock, PCATCH, "cxgbinit", 0)) {
1774                         rc = EINTR;
1775                         goto done;
1776                 }
1777         }
1778         if (IS_DOOMED(p)) {
1779                 rc = ENXIO;
1780                 goto done;
1781         }
1782         KASSERT(!IS_BUSY(sc), ("%s: controller busy.", __func__));
1783
1784         /*
1785          * The code that runs during one-time adapter initialization can sleep
1786          * so it's important not to hold any locks across it.
1787          */
1788         may_sleep = sc->flags & FULL_INIT_DONE ? 0 : 1;
1789
1790         if (may_sleep) {
1791                 SET_BUSY(sc);
1792                 gave_up_lock = 1;
1793                 ADAPTER_UNLOCK(sc);
1794         }
1795
1796         if (sc->open_device_map == 0) {
1797                 if ((rc = cxgb_up(sc)) != 0)
1798                         goto done;
1799
1800                 if (is_offload(sc) && !ofld_disable && offload_open(p))
1801                         log(LOG_WARNING,
1802                             "Could not initialize offload capabilities\n");
1803         }
1804
1805         PORT_LOCK(p);
1806         if (isset(&sc->open_device_map, p->port_id) &&
1807             (ifp->if_drv_flags & IFF_DRV_RUNNING)) {
1808                 PORT_UNLOCK(p);
1809                 goto done;
1810         }
1811         t3_port_intr_enable(sc, p->port_id);
1812         if (!mac->multiport) 
1813                 t3_mac_init(mac);
1814         cxgb_update_mac_settings(p);
1815         t3_link_start(&p->phy, mac, &p->link_config);
1816         t3_mac_enable(mac, MAC_DIRECTION_RX | MAC_DIRECTION_TX);
1817         ifp->if_drv_flags |= IFF_DRV_RUNNING;
1818         ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
1819         PORT_UNLOCK(p);
1820
1821         for (i = p->first_qset; i < p->first_qset + p->nqsets; i++) {
1822                 struct sge_qset *qs = &sc->sge.qs[i];
1823                 struct sge_txq *txq = &qs->txq[TXQ_ETH];
1824
1825                 callout_reset_on(&txq->txq_watchdog, hz, cxgb_tx_watchdog, qs,
1826                                  txq->txq_watchdog.c_cpu);
1827         }
1828
1829         /* all ok */
1830         setbit(&sc->open_device_map, p->port_id);
1831         callout_reset(&p->link_check_ch,
1832             p->phy.caps & SUPPORTED_LINK_IRQ ?  hz * 3 : hz / 4,
1833             link_check_callout, p);
1834
1835 done:
1836         if (may_sleep) {
1837                 ADAPTER_LOCK(sc);
1838                 KASSERT(IS_BUSY(sc), ("%s: controller not busy.", __func__));
1839                 CLR_BUSY(sc);
1840         }
1841         if (gave_up_lock)
1842                 wakeup_one(&sc->flags);
1843         ADAPTER_UNLOCK(sc);
1844         return (rc);
1845 }
1846
1847 static int
1848 cxgb_uninit_locked(struct port_info *p)
1849 {
1850         struct adapter *sc = p->adapter;
1851         int rc;
1852
1853         ADAPTER_LOCK_ASSERT_OWNED(sc);
1854
1855         while (!IS_DOOMED(p) && IS_BUSY(sc)) {
1856                 if (mtx_sleep(&sc->flags, &sc->lock, PCATCH, "cxgbunin", 0)) {
1857                         rc = EINTR;
1858                         goto done;
1859                 }
1860         }
1861         if (IS_DOOMED(p)) {
1862                 rc = ENXIO;
1863                 goto done;
1864         }
1865         KASSERT(!IS_BUSY(sc), ("%s: controller busy.", __func__));
1866         SET_BUSY(sc);
1867         ADAPTER_UNLOCK(sc);
1868
1869         rc = cxgb_uninit_synchronized(p);
1870
1871         ADAPTER_LOCK(sc);
1872         KASSERT(IS_BUSY(sc), ("%s: controller not busy.", __func__));
1873         CLR_BUSY(sc);
1874         wakeup_one(&sc->flags);
1875 done:
1876         ADAPTER_UNLOCK(sc);
1877         return (rc);
1878 }
1879
1880 /*
1881  * Called on "ifconfig down", and from port_detach
1882  */
1883 static int
1884 cxgb_uninit_synchronized(struct port_info *pi)
1885 {
1886         struct adapter *sc = pi->adapter;
1887         struct ifnet *ifp = pi->ifp;
1888
1889         /*
1890          * taskqueue_drain may cause a deadlock if the adapter lock is held.
1891          */
1892         ADAPTER_LOCK_ASSERT_NOTOWNED(sc);
1893
1894         /*
1895          * Clear this port's bit from the open device map, and then drain all
1896          * the tasks that can access/manipulate this port's port_info or ifp.
1897          * We disable this port's interrupts here and so the slow/ext
1898          * interrupt tasks won't be enqueued.  The tick task will continue to
1899          * be enqueued every second but the runs after this drain will not see
1900          * this port in the open device map.
1901          *
1902          * A well behaved task must take open_device_map into account and ignore
1903          * ports that are not open.
1904          */
1905         clrbit(&sc->open_device_map, pi->port_id);
1906         t3_port_intr_disable(sc, pi->port_id);
1907         taskqueue_drain(sc->tq, &sc->slow_intr_task);
1908         taskqueue_drain(sc->tq, &sc->tick_task);
1909
1910         callout_drain(&pi->link_check_ch);
1911         taskqueue_drain(sc->tq, &pi->link_check_task);
1912
1913         PORT_LOCK(pi);
1914         ifp->if_drv_flags &= ~(IFF_DRV_RUNNING | IFF_DRV_OACTIVE);
1915
1916         /* disable pause frames */
1917         t3_set_reg_field(sc, A_XGM_TX_CFG + pi->mac.offset, F_TXPAUSEEN, 0);
1918
1919         /* Reset RX FIFO HWM */
1920         t3_set_reg_field(sc, A_XGM_RXFIFO_CFG +  pi->mac.offset,
1921                          V_RXFIFOPAUSEHWM(M_RXFIFOPAUSEHWM), 0);
1922
1923         DELAY(100 * 1000);
1924
1925         /* Wait for TXFIFO empty */
1926         t3_wait_op_done(sc, A_XGM_TXFIFO_CFG + pi->mac.offset,
1927                         F_TXFIFO_EMPTY, 1, 20, 5);
1928
1929         DELAY(100 * 1000);
1930         t3_mac_disable(&pi->mac, MAC_DIRECTION_RX);
1931
1932
1933         pi->phy.ops->power_down(&pi->phy, 1);
1934
1935         PORT_UNLOCK(pi);
1936
1937         pi->link_config.link_ok = 0;
1938         t3_os_link_changed(sc, pi->port_id, 0, 0, 0, 0, 0);
1939
1940         if ((sc->open_device_map & PORT_MASK) == 0)
1941                 offload_close(&sc->tdev);
1942
1943         if (sc->open_device_map == 0)
1944                 cxgb_down(pi->adapter);
1945
1946         return (0);
1947 }
1948
1949 /*
1950  * Mark lro enabled or disabled in all qsets for this port
1951  */
1952 static int
1953 cxgb_set_lro(struct port_info *p, int enabled)
1954 {
1955         int i;
1956         struct adapter *adp = p->adapter;
1957         struct sge_qset *q;
1958
1959         for (i = 0; i < p->nqsets; i++) {
1960                 q = &adp->sge.qs[p->first_qset + i];
1961                 q->lro.enabled = (enabled != 0);
1962         }
1963         return (0);
1964 }
1965
1966 static int
1967 cxgb_ioctl(struct ifnet *ifp, unsigned long command, caddr_t data)
1968 {
1969         struct port_info *p = ifp->if_softc;
1970         struct adapter *sc = p->adapter;
1971         struct ifreq *ifr = (struct ifreq *)data;
1972         int flags, error = 0, mtu;
1973         uint32_t mask;
1974
1975         switch (command) {
1976         case SIOCSIFMTU:
1977                 ADAPTER_LOCK(sc);
1978                 error = IS_DOOMED(p) ? ENXIO : (IS_BUSY(sc) ? EBUSY : 0);
1979                 if (error) {
1980 fail:
1981                         ADAPTER_UNLOCK(sc);
1982                         return (error);
1983                 }
1984
1985                 mtu = ifr->ifr_mtu;
1986                 if ((mtu < ETHERMIN) || (mtu > ETHERMTU_JUMBO)) {
1987                         error = EINVAL;
1988                 } else {
1989                         ifp->if_mtu = mtu;
1990                         PORT_LOCK(p);
1991                         cxgb_update_mac_settings(p);
1992                         PORT_UNLOCK(p);
1993                 }
1994                 ADAPTER_UNLOCK(sc);
1995                 break;
1996         case SIOCSIFFLAGS:
1997                 ADAPTER_LOCK(sc);
1998                 if (IS_DOOMED(p)) {
1999                         error = ENXIO;
2000                         goto fail;
2001                 }
2002                 if (ifp->if_flags & IFF_UP) {
2003                         if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
2004                                 flags = p->if_flags;
2005                                 if (((ifp->if_flags ^ flags) & IFF_PROMISC) ||
2006                                     ((ifp->if_flags ^ flags) & IFF_ALLMULTI)) {
2007                                         if (IS_BUSY(sc)) {
2008                                                 error = EBUSY;
2009                                                 goto fail;
2010                                         }
2011                                         PORT_LOCK(p);
2012                                         cxgb_update_mac_settings(p);
2013                                         PORT_UNLOCK(p);
2014                                 }
2015                                 ADAPTER_UNLOCK(sc);
2016                         } else
2017                                 error = cxgb_init_locked(p);
2018                         p->if_flags = ifp->if_flags;
2019                 } else if (ifp->if_drv_flags & IFF_DRV_RUNNING)
2020                         error = cxgb_uninit_locked(p);
2021                 else
2022                         ADAPTER_UNLOCK(sc);
2023
2024                 ADAPTER_LOCK_ASSERT_NOTOWNED(sc);
2025                 break;
2026         case SIOCADDMULTI:
2027         case SIOCDELMULTI:
2028                 ADAPTER_LOCK(sc);
2029                 error = IS_DOOMED(p) ? ENXIO : (IS_BUSY(sc) ? EBUSY : 0);
2030                 if (error)
2031                         goto fail;
2032
2033                 if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
2034                         PORT_LOCK(p);
2035                         cxgb_update_mac_settings(p);
2036                         PORT_UNLOCK(p);
2037                 }
2038                 ADAPTER_UNLOCK(sc);
2039
2040                 break;
2041         case SIOCSIFCAP:
2042                 ADAPTER_LOCK(sc);
2043                 error = IS_DOOMED(p) ? ENXIO : (IS_BUSY(sc) ? EBUSY : 0);
2044                 if (error)
2045                         goto fail;
2046
2047                 mask = ifr->ifr_reqcap ^ ifp->if_capenable;
2048                 if (mask & IFCAP_TXCSUM) {
2049                         ifp->if_capenable ^= IFCAP_TXCSUM;
2050                         ifp->if_hwassist ^= (CSUM_TCP | CSUM_UDP | CSUM_IP);
2051
2052                         if (IFCAP_TSO & ifp->if_capenable &&
2053                             !(IFCAP_TXCSUM & ifp->if_capenable)) {
2054                                 ifp->if_capenable &= ~IFCAP_TSO;
2055                                 ifp->if_hwassist &= ~CSUM_TSO;
2056                                 if_printf(ifp,
2057                                     "tso disabled due to -txcsum.\n");
2058                         }
2059                 }
2060                 if (mask & IFCAP_RXCSUM)
2061                         ifp->if_capenable ^= IFCAP_RXCSUM;
2062                 if (mask & IFCAP_TSO4) {
2063                         ifp->if_capenable ^= IFCAP_TSO4;
2064
2065                         if (IFCAP_TSO & ifp->if_capenable) {
2066                                 if (IFCAP_TXCSUM & ifp->if_capenable)
2067                                         ifp->if_hwassist |= CSUM_TSO;
2068                                 else {
2069                                         ifp->if_capenable &= ~IFCAP_TSO;
2070                                         ifp->if_hwassist &= ~CSUM_TSO;
2071                                         if_printf(ifp,
2072                                             "enable txcsum first.\n");
2073                                         error = EAGAIN;
2074                                 }
2075                         } else
2076                                 ifp->if_hwassist &= ~CSUM_TSO;
2077                 }
2078                 if (mask & IFCAP_LRO) {
2079                         ifp->if_capenable ^= IFCAP_LRO;
2080
2081                         /* Safe to do this even if cxgb_up not called yet */
2082                         cxgb_set_lro(p, ifp->if_capenable & IFCAP_LRO);
2083                 }
2084                 if (mask & IFCAP_VLAN_HWTAGGING) {
2085                         ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING;
2086                         if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
2087                                 PORT_LOCK(p);
2088                                 cxgb_update_mac_settings(p);
2089                                 PORT_UNLOCK(p);
2090                         }
2091                 }
2092                 if (mask & IFCAP_VLAN_MTU) {
2093                         ifp->if_capenable ^= IFCAP_VLAN_MTU;
2094                         if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
2095                                 PORT_LOCK(p);
2096                                 cxgb_update_mac_settings(p);
2097                                 PORT_UNLOCK(p);
2098                         }
2099                 }
2100                 if (mask & IFCAP_VLAN_HWTSO)
2101                         ifp->if_capenable ^= IFCAP_VLAN_HWTSO;
2102                 if (mask & IFCAP_VLAN_HWCSUM)
2103                         ifp->if_capenable ^= IFCAP_VLAN_HWCSUM;
2104
2105 #ifdef VLAN_CAPABILITIES
2106                 VLAN_CAPABILITIES(ifp);
2107 #endif
2108                 ADAPTER_UNLOCK(sc);
2109                 break;
2110         case SIOCSIFMEDIA:
2111         case SIOCGIFMEDIA:
2112                 error = ifmedia_ioctl(ifp, ifr, &p->media, command);
2113                 break;
2114         default:
2115                 error = ether_ioctl(ifp, command, data);
2116         }
2117
2118         return (error);
2119 }
2120
2121 static int
2122 cxgb_media_change(struct ifnet *ifp)
2123 {
2124         return (EOPNOTSUPP);
2125 }
2126
2127 /*
2128  * Translates phy->modtype to the correct Ethernet media subtype.
2129  */
2130 static int
2131 cxgb_ifm_type(int mod)
2132 {
2133         switch (mod) {
2134         case phy_modtype_sr:
2135                 return (IFM_10G_SR);
2136         case phy_modtype_lr:
2137                 return (IFM_10G_LR);
2138         case phy_modtype_lrm:
2139                 return (IFM_10G_LRM);
2140         case phy_modtype_twinax:
2141                 return (IFM_10G_TWINAX);
2142         case phy_modtype_twinax_long:
2143                 return (IFM_10G_TWINAX_LONG);
2144         case phy_modtype_none:
2145                 return (IFM_NONE);
2146         case phy_modtype_unknown:
2147                 return (IFM_UNKNOWN);
2148         }
2149
2150         KASSERT(0, ("%s: modtype %d unknown", __func__, mod));
2151         return (IFM_UNKNOWN);
2152 }
2153
2154 /*
2155  * Rebuilds the ifmedia list for this port, and sets the current media.
2156  */
2157 static void
2158 cxgb_build_medialist(struct port_info *p)
2159 {
2160         struct cphy *phy = &p->phy;
2161         struct ifmedia *media = &p->media;
2162         int mod = phy->modtype;
2163         int m = IFM_ETHER | IFM_FDX;
2164
2165         PORT_LOCK(p);
2166
2167         ifmedia_removeall(media);
2168         if (phy->caps & SUPPORTED_TP && phy->caps & SUPPORTED_Autoneg) {
2169                 /* Copper (RJ45) */
2170
2171                 if (phy->caps & SUPPORTED_10000baseT_Full)
2172                         ifmedia_add(media, m | IFM_10G_T, mod, NULL);
2173
2174                 if (phy->caps & SUPPORTED_1000baseT_Full)
2175                         ifmedia_add(media, m | IFM_1000_T, mod, NULL);
2176
2177                 if (phy->caps & SUPPORTED_100baseT_Full)
2178                         ifmedia_add(media, m | IFM_100_TX, mod, NULL);
2179
2180                 if (phy->caps & SUPPORTED_10baseT_Full)
2181                         ifmedia_add(media, m | IFM_10_T, mod, NULL);
2182
2183                 ifmedia_add(media, IFM_ETHER | IFM_AUTO, mod, NULL);
2184                 ifmedia_set(media, IFM_ETHER | IFM_AUTO);
2185
2186         } else if (phy->caps & SUPPORTED_TP) {
2187                 /* Copper (CX4) */
2188
2189                 KASSERT(phy->caps & SUPPORTED_10000baseT_Full,
2190                         ("%s: unexpected cap 0x%x", __func__, phy->caps));
2191
2192                 ifmedia_add(media, m | IFM_10G_CX4, mod, NULL);
2193                 ifmedia_set(media, m | IFM_10G_CX4);
2194
2195         } else if (phy->caps & SUPPORTED_FIBRE &&
2196                    phy->caps & SUPPORTED_10000baseT_Full) {
2197                 /* 10G optical (but includes SFP+ twinax) */
2198
2199                 m |= cxgb_ifm_type(mod);
2200                 if (IFM_SUBTYPE(m) == IFM_NONE)
2201                         m &= ~IFM_FDX;
2202
2203                 ifmedia_add(media, m, mod, NULL);
2204                 ifmedia_set(media, m);
2205
2206         } else if (phy->caps & SUPPORTED_FIBRE &&
2207                    phy->caps & SUPPORTED_1000baseT_Full) {
2208                 /* 1G optical */
2209
2210                 /* XXX: Lie and claim to be SX, could actually be any 1G-X */
2211                 ifmedia_add(media, m | IFM_1000_SX, mod, NULL);
2212                 ifmedia_set(media, m | IFM_1000_SX);
2213
2214         } else {
2215                 KASSERT(0, ("%s: don't know how to handle 0x%x.", __func__,
2216                             phy->caps));
2217         }
2218
2219         PORT_UNLOCK(p);
2220 }
2221
2222 static void
2223 cxgb_media_status(struct ifnet *ifp, struct ifmediareq *ifmr)
2224 {
2225         struct port_info *p = ifp->if_softc;
2226         struct ifmedia_entry *cur = p->media.ifm_cur;
2227         int speed = p->link_config.speed;
2228
2229         if (cur->ifm_data != p->phy.modtype) {
2230                 cxgb_build_medialist(p);
2231                 cur = p->media.ifm_cur;
2232         }
2233
2234         ifmr->ifm_status = IFM_AVALID;
2235         if (!p->link_config.link_ok)
2236                 return;
2237
2238         ifmr->ifm_status |= IFM_ACTIVE;
2239
2240         /*
2241          * active and current will differ iff current media is autoselect.  That
2242          * can happen only for copper RJ45.
2243          */
2244         if (IFM_SUBTYPE(cur->ifm_media) != IFM_AUTO)
2245                 return;
2246         KASSERT(p->phy.caps & SUPPORTED_TP && p->phy.caps & SUPPORTED_Autoneg,
2247                 ("%s: unexpected PHY caps 0x%x", __func__, p->phy.caps));
2248
2249         ifmr->ifm_active = IFM_ETHER | IFM_FDX;
2250         if (speed == SPEED_10000)
2251                 ifmr->ifm_active |= IFM_10G_T;
2252         else if (speed == SPEED_1000)
2253                 ifmr->ifm_active |= IFM_1000_T;
2254         else if (speed == SPEED_100)
2255                 ifmr->ifm_active |= IFM_100_TX;
2256         else if (speed == SPEED_10)
2257                 ifmr->ifm_active |= IFM_10_T;
2258         else
2259                 KASSERT(0, ("%s: link up but speed unknown (%u)", __func__,
2260                             speed));
2261 }
2262
2263 static void
2264 cxgb_async_intr(void *data)
2265 {
2266         adapter_t *sc = data;
2267
2268         t3_write_reg(sc, A_PL_INT_ENABLE0, 0);
2269         (void) t3_read_reg(sc, A_PL_INT_ENABLE0);
2270         taskqueue_enqueue(sc->tq, &sc->slow_intr_task);
2271 }
2272
2273 static void
2274 link_check_callout(void *arg)
2275 {
2276         struct port_info *pi = arg;
2277         struct adapter *sc = pi->adapter;
2278
2279         if (!isset(&sc->open_device_map, pi->port_id))
2280                 return;
2281
2282         taskqueue_enqueue(sc->tq, &pi->link_check_task);
2283 }
2284
2285 static void
2286 check_link_status(void *arg, int pending)
2287 {
2288         struct port_info *pi = arg;
2289         struct adapter *sc = pi->adapter;
2290
2291         if (!isset(&sc->open_device_map, pi->port_id))
2292                 return;
2293
2294         t3_link_changed(sc, pi->port_id);
2295
2296         if (pi->link_fault || !(pi->phy.caps & SUPPORTED_LINK_IRQ))
2297                 callout_reset(&pi->link_check_ch, hz, link_check_callout, pi);
2298 }
2299
2300 void
2301 t3_os_link_intr(struct port_info *pi)
2302 {
2303         /*
2304          * Schedule a link check in the near future.  If the link is flapping
2305          * rapidly we'll keep resetting the callout and delaying the check until
2306          * things stabilize a bit.
2307          */
2308         callout_reset(&pi->link_check_ch, hz / 4, link_check_callout, pi);
2309 }
2310
2311 static void
2312 check_t3b2_mac(struct adapter *sc)
2313 {
2314         int i;
2315
2316         if (sc->flags & CXGB_SHUTDOWN)
2317                 return;
2318
2319         for_each_port(sc, i) {
2320                 struct port_info *p = &sc->port[i];
2321                 int status;
2322 #ifdef INVARIANTS
2323                 struct ifnet *ifp = p->ifp;
2324 #endif          
2325
2326                 if (!isset(&sc->open_device_map, p->port_id) || p->link_fault ||
2327                     !p->link_config.link_ok)
2328                         continue;
2329
2330                 KASSERT(ifp->if_drv_flags & IFF_DRV_RUNNING,
2331                         ("%s: state mismatch (drv_flags %x, device_map %x)",
2332                          __func__, ifp->if_drv_flags, sc->open_device_map));
2333
2334                 PORT_LOCK(p);
2335                 status = t3b2_mac_watchdog_task(&p->mac);
2336                 if (status == 1)
2337                         p->mac.stats.num_toggled++;
2338                 else if (status == 2) {
2339                         struct cmac *mac = &p->mac;
2340
2341                         cxgb_update_mac_settings(p);
2342                         t3_link_start(&p->phy, mac, &p->link_config);
2343                         t3_mac_enable(mac, MAC_DIRECTION_RX | MAC_DIRECTION_TX);
2344                         t3_port_intr_enable(sc, p->port_id);
2345                         p->mac.stats.num_resets++;
2346                 }
2347                 PORT_UNLOCK(p);
2348         }
2349 }
2350
2351 static void
2352 cxgb_tick(void *arg)
2353 {
2354         adapter_t *sc = (adapter_t *)arg;
2355
2356         if (sc->flags & CXGB_SHUTDOWN)
2357                 return;
2358
2359         taskqueue_enqueue(sc->tq, &sc->tick_task);      
2360         callout_reset(&sc->cxgb_tick_ch, hz, cxgb_tick, sc);
2361 }
2362
2363 static void
2364 cxgb_tick_handler(void *arg, int count)
2365 {
2366         adapter_t *sc = (adapter_t *)arg;
2367         const struct adapter_params *p = &sc->params;
2368         int i;
2369         uint32_t cause, reset;
2370
2371         if (sc->flags & CXGB_SHUTDOWN || !(sc->flags & FULL_INIT_DONE))
2372                 return;
2373
2374         if (p->rev == T3_REV_B2 && p->nports < 4 && sc->open_device_map) 
2375                 check_t3b2_mac(sc);
2376
2377         cause = t3_read_reg(sc, A_SG_INT_CAUSE) & (F_RSPQSTARVE | F_FLEMPTY);
2378         if (cause) {
2379                 struct sge_qset *qs = &sc->sge.qs[0];
2380                 uint32_t mask, v;
2381
2382                 v = t3_read_reg(sc, A_SG_RSPQ_FL_STATUS) & ~0xff00;
2383
2384                 mask = 1;
2385                 for (i = 0; i < SGE_QSETS; i++) {
2386                         if (v & mask)
2387                                 qs[i].rspq.starved++;
2388                         mask <<= 1;
2389                 }
2390
2391                 mask <<= SGE_QSETS; /* skip RSPQXDISABLED */
2392
2393                 for (i = 0; i < SGE_QSETS * 2; i++) {
2394                         if (v & mask) {
2395                                 qs[i / 2].fl[i % 2].empty++;
2396                         }
2397                         mask <<= 1;
2398                 }
2399
2400                 /* clear */
2401                 t3_write_reg(sc, A_SG_RSPQ_FL_STATUS, v);
2402                 t3_write_reg(sc, A_SG_INT_CAUSE, cause);
2403         }
2404
2405         for (i = 0; i < sc->params.nports; i++) {
2406                 struct port_info *pi = &sc->port[i];
2407                 struct ifnet *ifp = pi->ifp;
2408                 struct cmac *mac = &pi->mac;
2409                 struct mac_stats *mstats = &mac->stats;
2410                 int drops, j;
2411
2412                 if (!isset(&sc->open_device_map, pi->port_id))
2413                         continue;
2414
2415                 PORT_LOCK(pi);
2416                 t3_mac_update_stats(mac);
2417                 PORT_UNLOCK(pi);
2418
2419                 ifp->if_opackets = mstats->tx_frames;
2420                 ifp->if_ipackets = mstats->rx_frames;
2421                 ifp->if_obytes = mstats->tx_octets;
2422                 ifp->if_ibytes = mstats->rx_octets;
2423                 ifp->if_omcasts = mstats->tx_mcast_frames;
2424                 ifp->if_imcasts = mstats->rx_mcast_frames;
2425                 ifp->if_collisions = mstats->tx_total_collisions;
2426                 ifp->if_iqdrops = mstats->rx_cong_drops;
2427
2428                 drops = 0;
2429                 for (j = pi->first_qset; j < pi->first_qset + pi->nqsets; j++)
2430                         drops += sc->sge.qs[j].txq[TXQ_ETH].txq_mr->br_drops;
2431                 ifp->if_snd.ifq_drops = drops;
2432
2433                 ifp->if_oerrors =
2434                     mstats->tx_excess_collisions +
2435                     mstats->tx_underrun +
2436                     mstats->tx_len_errs +
2437                     mstats->tx_mac_internal_errs +
2438                     mstats->tx_excess_deferral +
2439                     mstats->tx_fcs_errs;
2440                 ifp->if_ierrors =
2441                     mstats->rx_jabber +
2442                     mstats->rx_data_errs +
2443                     mstats->rx_sequence_errs +
2444                     mstats->rx_runt + 
2445                     mstats->rx_too_long +
2446                     mstats->rx_mac_internal_errs +
2447                     mstats->rx_short +
2448                     mstats->rx_fcs_errs;
2449
2450                 if (mac->multiport)
2451                         continue;
2452
2453                 /* Count rx fifo overflows, once per second */
2454                 cause = t3_read_reg(sc, A_XGM_INT_CAUSE + mac->offset);
2455                 reset = 0;
2456                 if (cause & F_RXFIFO_OVERFLOW) {
2457                         mac->stats.rx_fifo_ovfl++;
2458                         reset |= F_RXFIFO_OVERFLOW;
2459                 }
2460                 t3_write_reg(sc, A_XGM_INT_CAUSE + mac->offset, reset);
2461         }
2462 }
2463
2464 static void
2465 touch_bars(device_t dev)
2466 {
2467         /*
2468          * Don't enable yet
2469          */
2470 #if !defined(__LP64__) && 0
2471         u32 v;
2472
2473         pci_read_config_dword(pdev, PCI_BASE_ADDRESS_1, &v);
2474         pci_write_config_dword(pdev, PCI_BASE_ADDRESS_1, v);
2475         pci_read_config_dword(pdev, PCI_BASE_ADDRESS_3, &v);
2476         pci_write_config_dword(pdev, PCI_BASE_ADDRESS_3, v);
2477         pci_read_config_dword(pdev, PCI_BASE_ADDRESS_5, &v);
2478         pci_write_config_dword(pdev, PCI_BASE_ADDRESS_5, v);
2479 #endif
2480 }
2481
2482 static int
2483 set_eeprom(struct port_info *pi, const uint8_t *data, int len, int offset)
2484 {
2485         uint8_t *buf;
2486         int err = 0;
2487         u32 aligned_offset, aligned_len, *p;
2488         struct adapter *adapter = pi->adapter;
2489
2490
2491         aligned_offset = offset & ~3;
2492         aligned_len = (len + (offset & 3) + 3) & ~3;
2493
2494         if (aligned_offset != offset || aligned_len != len) {
2495                 buf = malloc(aligned_len, M_DEVBUF, M_WAITOK|M_ZERO);              
2496                 if (!buf)
2497                         return (ENOMEM);
2498                 err = t3_seeprom_read(adapter, aligned_offset, (u32 *)buf);
2499                 if (!err && aligned_len > 4)
2500                         err = t3_seeprom_read(adapter,
2501                                               aligned_offset + aligned_len - 4,
2502                                               (u32 *)&buf[aligned_len - 4]);
2503                 if (err)
2504                         goto out;
2505                 memcpy(buf + (offset & 3), data, len);
2506         } else
2507                 buf = (uint8_t *)(uintptr_t)data;
2508
2509         err = t3_seeprom_wp(adapter, 0);
2510         if (err)
2511                 goto out;
2512
2513         for (p = (u32 *)buf; !err && aligned_len; aligned_len -= 4, p++) {
2514                 err = t3_seeprom_write(adapter, aligned_offset, *p);
2515                 aligned_offset += 4;
2516         }
2517
2518         if (!err)
2519                 err = t3_seeprom_wp(adapter, 1);
2520 out:
2521         if (buf != data)
2522                 free(buf, M_DEVBUF);
2523         return err;
2524 }
2525
2526
2527 static int
2528 in_range(int val, int lo, int hi)
2529 {
2530         return val < 0 || (val <= hi && val >= lo);
2531 }
2532
2533 static int
2534 cxgb_extension_open(struct cdev *dev, int flags, int fmp, struct thread *td)
2535 {
2536        return (0);
2537 }
2538
2539 static int
2540 cxgb_extension_close(struct cdev *dev, int flags, int fmt, struct thread *td)
2541 {
2542        return (0);
2543 }
2544
2545 static int
2546 cxgb_extension_ioctl(struct cdev *dev, unsigned long cmd, caddr_t data,
2547     int fflag, struct thread *td)
2548 {
2549         int mmd, error = 0;
2550         struct port_info *pi = dev->si_drv1;
2551         adapter_t *sc = pi->adapter;
2552
2553 #ifdef PRIV_SUPPORTED   
2554         if (priv_check(td, PRIV_DRIVER)) {
2555                 if (cxgb_debug) 
2556                         printf("user does not have access to privileged ioctls\n");
2557                 return (EPERM);
2558         }
2559 #else
2560         if (suser(td)) {
2561                 if (cxgb_debug)
2562                         printf("user does not have access to privileged ioctls\n");
2563                 return (EPERM);
2564         }
2565 #endif
2566         
2567         switch (cmd) {
2568         case CHELSIO_GET_MIIREG: {
2569                 uint32_t val;
2570                 struct cphy *phy = &pi->phy;
2571                 struct ch_mii_data *mid = (struct ch_mii_data *)data;
2572                 
2573                 if (!phy->mdio_read)
2574                         return (EOPNOTSUPP);
2575                 if (is_10G(sc)) {
2576                         mmd = mid->phy_id >> 8;
2577                         if (!mmd)
2578                                 mmd = MDIO_DEV_PCS;
2579                         else if (mmd > MDIO_DEV_VEND2)
2580                                 return (EINVAL);
2581
2582                         error = phy->mdio_read(sc, mid->phy_id & 0x1f, mmd,
2583                                              mid->reg_num, &val);
2584                 } else
2585                         error = phy->mdio_read(sc, mid->phy_id & 0x1f, 0,
2586                                              mid->reg_num & 0x1f, &val);
2587                 if (error == 0)
2588                         mid->val_out = val;
2589                 break;
2590         }
2591         case CHELSIO_SET_MIIREG: {
2592                 struct cphy *phy = &pi->phy;
2593                 struct ch_mii_data *mid = (struct ch_mii_data *)data;
2594
2595                 if (!phy->mdio_write)
2596                         return (EOPNOTSUPP);
2597                 if (is_10G(sc)) {
2598                         mmd = mid->phy_id >> 8;
2599                         if (!mmd)
2600                                 mmd = MDIO_DEV_PCS;
2601                         else if (mmd > MDIO_DEV_VEND2)
2602                                 return (EINVAL);
2603                         
2604                         error = phy->mdio_write(sc, mid->phy_id & 0x1f,
2605                                               mmd, mid->reg_num, mid->val_in);
2606                 } else
2607                         error = phy->mdio_write(sc, mid->phy_id & 0x1f, 0,
2608                                               mid->reg_num & 0x1f,
2609                                               mid->val_in);
2610                 break;
2611         }
2612         case CHELSIO_SETREG: {
2613                 struct ch_reg *edata = (struct ch_reg *)data;
2614                 if ((edata->addr & 0x3) != 0 || edata->addr >= sc->mmio_len)
2615                         return (EFAULT);
2616                 t3_write_reg(sc, edata->addr, edata->val);
2617                 break;
2618         }
2619         case CHELSIO_GETREG: {
2620                 struct ch_reg *edata = (struct ch_reg *)data;
2621                 if ((edata->addr & 0x3) != 0 || edata->addr >= sc->mmio_len)
2622                         return (EFAULT);
2623                 edata->val = t3_read_reg(sc, edata->addr);
2624                 break;
2625         }
2626         case CHELSIO_GET_SGE_CONTEXT: {
2627                 struct ch_cntxt *ecntxt = (struct ch_cntxt *)data;
2628                 mtx_lock_spin(&sc->sge.reg_lock);
2629                 switch (ecntxt->cntxt_type) {
2630                 case CNTXT_TYPE_EGRESS:
2631                         error = -t3_sge_read_ecntxt(sc, ecntxt->cntxt_id,
2632                             ecntxt->data);
2633                         break;
2634                 case CNTXT_TYPE_FL:
2635                         error = -t3_sge_read_fl(sc, ecntxt->cntxt_id,
2636                             ecntxt->data);
2637                         break;
2638                 case CNTXT_TYPE_RSP:
2639                         error = -t3_sge_read_rspq(sc, ecntxt->cntxt_id,
2640                             ecntxt->data);
2641                         break;
2642                 case CNTXT_TYPE_CQ:
2643                         error = -t3_sge_read_cq(sc, ecntxt->cntxt_id,
2644                             ecntxt->data);
2645                         break;
2646                 default:
2647                         error = EINVAL;
2648                         break;
2649                 }
2650                 mtx_unlock_spin(&sc->sge.reg_lock);
2651                 break;
2652         }
2653         case CHELSIO_GET_SGE_DESC: {
2654                 struct ch_desc *edesc = (struct ch_desc *)data;
2655                 int ret;
2656                 if (edesc->queue_num >= SGE_QSETS * 6)
2657                         return (EINVAL);
2658                 ret = t3_get_desc(&sc->sge.qs[edesc->queue_num / 6],
2659                     edesc->queue_num % 6, edesc->idx, edesc->data);
2660                 if (ret < 0)
2661                         return (EINVAL);
2662                 edesc->size = ret;
2663                 break;
2664         }
2665         case CHELSIO_GET_QSET_PARAMS: {
2666                 struct qset_params *q;
2667                 struct ch_qset_params *t = (struct ch_qset_params *)data;
2668                 int q1 = pi->first_qset;
2669                 int nqsets = pi->nqsets;
2670                 int i;
2671
2672                 if (t->qset_idx >= nqsets)
2673                         return EINVAL;
2674
2675                 i = q1 + t->qset_idx;
2676                 q = &sc->params.sge.qset[i];
2677                 t->rspq_size   = q->rspq_size;
2678                 t->txq_size[0] = q->txq_size[0];
2679                 t->txq_size[1] = q->txq_size[1];
2680                 t->txq_size[2] = q->txq_size[2];
2681                 t->fl_size[0]  = q->fl_size;
2682                 t->fl_size[1]  = q->jumbo_size;
2683                 t->polling     = q->polling;
2684                 t->lro         = q->lro;
2685                 t->intr_lat    = q->coalesce_usecs;
2686                 t->cong_thres  = q->cong_thres;
2687                 t->qnum        = i;
2688
2689                 if ((sc->flags & FULL_INIT_DONE) == 0)
2690                         t->vector = 0;
2691                 else if (sc->flags & USING_MSIX)
2692                         t->vector = rman_get_start(sc->msix_irq_res[i]);
2693                 else
2694                         t->vector = rman_get_start(sc->irq_res);
2695
2696                 break;
2697         }
2698         case CHELSIO_GET_QSET_NUM: {
2699                 struct ch_reg *edata = (struct ch_reg *)data;
2700                 edata->val = pi->nqsets;
2701                 break;
2702         }
2703         case CHELSIO_LOAD_FW: {
2704                 uint8_t *fw_data;
2705                 uint32_t vers;
2706                 struct ch_mem_range *t = (struct ch_mem_range *)data;
2707
2708                 /*
2709                  * You're allowed to load a firmware only before FULL_INIT_DONE
2710                  *
2711                  * FW_UPTODATE is also set so the rest of the initialization
2712                  * will not overwrite what was loaded here.  This gives you the
2713                  * flexibility to load any firmware (and maybe shoot yourself in
2714                  * the foot).
2715                  */
2716
2717                 ADAPTER_LOCK(sc);
2718                 if (sc->open_device_map || sc->flags & FULL_INIT_DONE) {
2719                         ADAPTER_UNLOCK(sc);
2720                         return (EBUSY);
2721                 }
2722
2723                 fw_data = malloc(t->len, M_DEVBUF, M_NOWAIT);
2724                 if (!fw_data)
2725                         error = ENOMEM;
2726                 else
2727                         error = copyin(t->buf, fw_data, t->len);
2728
2729                 if (!error)
2730                         error = -t3_load_fw(sc, fw_data, t->len);
2731
2732                 if (t3_get_fw_version(sc, &vers) == 0) {
2733                         snprintf(&sc->fw_version[0], sizeof(sc->fw_version),
2734                             "%d.%d.%d", G_FW_VERSION_MAJOR(vers),
2735                             G_FW_VERSION_MINOR(vers), G_FW_VERSION_MICRO(vers));
2736                 }
2737
2738                 if (!error)
2739                         sc->flags |= FW_UPTODATE;
2740
2741                 free(fw_data, M_DEVBUF);
2742                 ADAPTER_UNLOCK(sc);
2743                 break;
2744         }
2745         case CHELSIO_LOAD_BOOT: {
2746                 uint8_t *boot_data;
2747                 struct ch_mem_range *t = (struct ch_mem_range *)data;
2748
2749                 boot_data = malloc(t->len, M_DEVBUF, M_NOWAIT);
2750                 if (!boot_data)
2751                         return ENOMEM;
2752
2753                 error = copyin(t->buf, boot_data, t->len);
2754                 if (!error)
2755                         error = -t3_load_boot(sc, boot_data, t->len);
2756
2757                 free(boot_data, M_DEVBUF);
2758                 break;
2759         }
2760         case CHELSIO_GET_PM: {
2761                 struct ch_pm *m = (struct ch_pm *)data;
2762                 struct tp_params *p = &sc->params.tp;
2763
2764                 if (!is_offload(sc))
2765                         return (EOPNOTSUPP);
2766
2767                 m->tx_pg_sz = p->tx_pg_size;
2768                 m->tx_num_pg = p->tx_num_pgs;
2769                 m->rx_pg_sz  = p->rx_pg_size;
2770                 m->rx_num_pg = p->rx_num_pgs;
2771                 m->pm_total  = p->pmtx_size + p->chan_rx_size * p->nchan;
2772
2773                 break;
2774         }
2775         case CHELSIO_SET_PM: {
2776                 struct ch_pm *m = (struct ch_pm *)data;
2777                 struct tp_params *p = &sc->params.tp;
2778
2779                 if (!is_offload(sc))
2780                         return (EOPNOTSUPP);
2781                 if (sc->flags & FULL_INIT_DONE)
2782                         return (EBUSY);
2783
2784                 if (!m->rx_pg_sz || (m->rx_pg_sz & (m->rx_pg_sz - 1)) ||
2785                     !m->tx_pg_sz || (m->tx_pg_sz & (m->tx_pg_sz - 1)))
2786                         return (EINVAL);        /* not power of 2 */
2787                 if (!(m->rx_pg_sz & 0x14000))
2788                         return (EINVAL);        /* not 16KB or 64KB */
2789                 if (!(m->tx_pg_sz & 0x1554000))
2790                         return (EINVAL);
2791                 if (m->tx_num_pg == -1)
2792                         m->tx_num_pg = p->tx_num_pgs;
2793                 if (m->rx_num_pg == -1)
2794                         m->rx_num_pg = p->rx_num_pgs;
2795                 if (m->tx_num_pg % 24 || m->rx_num_pg % 24)
2796                         return (EINVAL);
2797                 if (m->rx_num_pg * m->rx_pg_sz > p->chan_rx_size ||
2798                     m->tx_num_pg * m->tx_pg_sz > p->chan_tx_size)
2799                         return (EINVAL);
2800
2801                 p->rx_pg_size = m->rx_pg_sz;
2802                 p->tx_pg_size = m->tx_pg_sz;
2803                 p->rx_num_pgs = m->rx_num_pg;
2804                 p->tx_num_pgs = m->tx_num_pg;
2805                 break;
2806         }
2807         case CHELSIO_SETMTUTAB: {
2808                 struct ch_mtus *m = (struct ch_mtus *)data;
2809                 int i;
2810                 
2811                 if (!is_offload(sc))
2812                         return (EOPNOTSUPP);
2813                 if (offload_running(sc))
2814                         return (EBUSY);
2815                 if (m->nmtus != NMTUS)
2816                         return (EINVAL);
2817                 if (m->mtus[0] < 81)         /* accommodate SACK */
2818                         return (EINVAL);
2819                 
2820                 /*
2821                  * MTUs must be in ascending order
2822                  */
2823                 for (i = 1; i < NMTUS; ++i)
2824                         if (m->mtus[i] < m->mtus[i - 1])
2825                                 return (EINVAL);
2826
2827                 memcpy(sc->params.mtus, m->mtus, sizeof(sc->params.mtus));
2828                 break;
2829         }
2830         case CHELSIO_GETMTUTAB: {
2831                 struct ch_mtus *m = (struct ch_mtus *)data;
2832
2833                 if (!is_offload(sc))
2834                         return (EOPNOTSUPP);
2835
2836                 memcpy(m->mtus, sc->params.mtus, sizeof(m->mtus));
2837                 m->nmtus = NMTUS;
2838                 break;
2839         }
2840         case CHELSIO_GET_MEM: {
2841                 struct ch_mem_range *t = (struct ch_mem_range *)data;
2842                 struct mc7 *mem;
2843                 uint8_t *useraddr;
2844                 u64 buf[32];
2845
2846                 /*
2847                  * Use these to avoid modifying len/addr in the return
2848                  * struct
2849                  */
2850                 uint32_t len = t->len, addr = t->addr;
2851
2852                 if (!is_offload(sc))
2853                         return (EOPNOTSUPP);
2854                 if (!(sc->flags & FULL_INIT_DONE))
2855                         return (EIO);         /* need the memory controllers */
2856                 if ((addr & 0x7) || (len & 0x7))
2857                         return (EINVAL);
2858                 if (t->mem_id == MEM_CM)
2859                         mem = &sc->cm;
2860                 else if (t->mem_id == MEM_PMRX)
2861                         mem = &sc->pmrx;
2862                 else if (t->mem_id == MEM_PMTX)
2863                         mem = &sc->pmtx;
2864                 else
2865                         return (EINVAL);
2866
2867                 /*
2868                  * Version scheme:
2869                  * bits 0..9: chip version
2870                  * bits 10..15: chip revision
2871                  */
2872                 t->version = 3 | (sc->params.rev << 10);
2873                 
2874                 /*
2875                  * Read 256 bytes at a time as len can be large and we don't
2876                  * want to use huge intermediate buffers.
2877                  */
2878                 useraddr = (uint8_t *)t->buf; 
2879                 while (len) {
2880                         unsigned int chunk = min(len, sizeof(buf));
2881
2882                         error = t3_mc7_bd_read(mem, addr / 8, chunk / 8, buf);
2883                         if (error)
2884                                 return (-error);
2885                         if (copyout(buf, useraddr, chunk))
2886                                 return (EFAULT);
2887                         useraddr += chunk;
2888                         addr += chunk;
2889                         len -= chunk;
2890                 }
2891                 break;
2892         }
2893         case CHELSIO_READ_TCAM_WORD: {
2894                 struct ch_tcam_word *t = (struct ch_tcam_word *)data;
2895
2896                 if (!is_offload(sc))
2897                         return (EOPNOTSUPP);
2898                 if (!(sc->flags & FULL_INIT_DONE))
2899                         return (EIO);         /* need MC5 */            
2900                 return -t3_read_mc5_range(&sc->mc5, t->addr, 1, t->buf);
2901                 break;
2902         }
2903         case CHELSIO_SET_TRACE_FILTER: {
2904                 struct ch_trace *t = (struct ch_trace *)data;
2905                 const struct trace_params *tp;
2906
2907                 tp = (const struct trace_params *)&t->sip;
2908                 if (t->config_tx)
2909                         t3_config_trace_filter(sc, tp, 0, t->invert_match,
2910                                                t->trace_tx);
2911                 if (t->config_rx)
2912                         t3_config_trace_filter(sc, tp, 1, t->invert_match,
2913                                                t->trace_rx);
2914                 break;
2915         }
2916         case CHELSIO_SET_PKTSCHED: {
2917                 struct ch_pktsched_params *p = (struct ch_pktsched_params *)data;
2918                 if (sc->open_device_map == 0)
2919                         return (EAGAIN);
2920                 send_pktsched_cmd(sc, p->sched, p->idx, p->min, p->max,
2921                     p->binding);
2922                 break;
2923         }
2924         case CHELSIO_IFCONF_GETREGS: {
2925                 struct ch_ifconf_regs *regs = (struct ch_ifconf_regs *)data;
2926                 int reglen = cxgb_get_regs_len();
2927                 uint8_t *buf = malloc(reglen, M_DEVBUF, M_NOWAIT);
2928                 if (buf == NULL) {
2929                         return (ENOMEM);
2930                 }
2931                 if (regs->len > reglen)
2932                         regs->len = reglen;
2933                 else if (regs->len < reglen)
2934                         error = ENOBUFS;
2935
2936                 if (!error) {
2937                         cxgb_get_regs(sc, regs, buf);
2938                         error = copyout(buf, regs->data, reglen);
2939                 }
2940                 free(buf, M_DEVBUF);
2941
2942                 break;
2943         }
2944         case CHELSIO_SET_HW_SCHED: {
2945                 struct ch_hw_sched *t = (struct ch_hw_sched *)data;
2946                 unsigned int ticks_per_usec = core_ticks_per_usec(sc);
2947
2948                 if ((sc->flags & FULL_INIT_DONE) == 0)
2949                         return (EAGAIN);       /* need TP to be initialized */
2950                 if (t->sched >= NTX_SCHED || !in_range(t->mode, 0, 1) ||
2951                     !in_range(t->channel, 0, 1) ||
2952                     !in_range(t->kbps, 0, 10000000) ||
2953                     !in_range(t->class_ipg, 0, 10000 * 65535 / ticks_per_usec) ||
2954                     !in_range(t->flow_ipg, 0,
2955                               dack_ticks_to_usec(sc, 0x7ff)))
2956                         return (EINVAL);
2957
2958                 if (t->kbps >= 0) {
2959                         error = t3_config_sched(sc, t->kbps, t->sched);
2960                         if (error < 0)
2961                                 return (-error);
2962                 }
2963                 if (t->class_ipg >= 0)
2964                         t3_set_sched_ipg(sc, t->sched, t->class_ipg);
2965                 if (t->flow_ipg >= 0) {
2966                         t->flow_ipg *= 1000;     /* us -> ns */
2967                         t3_set_pace_tbl(sc, &t->flow_ipg, t->sched, 1);
2968                 }
2969                 if (t->mode >= 0) {
2970                         int bit = 1 << (S_TX_MOD_TIMER_MODE + t->sched);
2971
2972                         t3_set_reg_field(sc, A_TP_TX_MOD_QUEUE_REQ_MAP,
2973                                          bit, t->mode ? bit : 0);
2974                 }
2975                 if (t->channel >= 0)
2976                         t3_set_reg_field(sc, A_TP_TX_MOD_QUEUE_REQ_MAP,
2977                                          1 << t->sched, t->channel << t->sched);
2978                 break;
2979         }
2980         case CHELSIO_GET_EEPROM: {
2981                 int i;
2982                 struct ch_eeprom *e = (struct ch_eeprom *)data;
2983                 uint8_t *buf = malloc(EEPROMSIZE, M_DEVBUF, M_NOWAIT);
2984
2985                 if (buf == NULL) {
2986                         return (ENOMEM);
2987                 }
2988                 e->magic = EEPROM_MAGIC;
2989                 for (i = e->offset & ~3; !error && i < e->offset + e->len; i += 4)
2990                         error = -t3_seeprom_read(sc, i, (uint32_t *)&buf[i]);
2991
2992                 if (!error)
2993                         error = copyout(buf + e->offset, e->data, e->len);
2994
2995                 free(buf, M_DEVBUF);
2996                 break;
2997         }
2998         case CHELSIO_CLEAR_STATS: {
2999                 if (!(sc->flags & FULL_INIT_DONE))
3000                         return EAGAIN;
3001
3002                 PORT_LOCK(pi);
3003                 t3_mac_update_stats(&pi->mac);
3004                 memset(&pi->mac.stats, 0, sizeof(pi->mac.stats));
3005                 PORT_UNLOCK(pi);
3006                 break;
3007         }
3008         case CHELSIO_GET_UP_LA: {
3009                 struct ch_up_la *la = (struct ch_up_la *)data;
3010                 uint8_t *buf = malloc(LA_BUFSIZE, M_DEVBUF, M_NOWAIT);
3011                 if (buf == NULL) {
3012                         return (ENOMEM);
3013                 }
3014                 if (la->bufsize < LA_BUFSIZE)
3015                         error = ENOBUFS;
3016
3017                 if (!error)
3018                         error = -t3_get_up_la(sc, &la->stopped, &la->idx,
3019                                               &la->bufsize, buf);
3020                 if (!error)
3021                         error = copyout(buf, la->data, la->bufsize);
3022
3023                 free(buf, M_DEVBUF);
3024                 break;
3025         }
3026         case CHELSIO_GET_UP_IOQS: {
3027                 struct ch_up_ioqs *ioqs = (struct ch_up_ioqs *)data;
3028                 uint8_t *buf = malloc(IOQS_BUFSIZE, M_DEVBUF, M_NOWAIT);
3029                 uint32_t *v;
3030
3031                 if (buf == NULL) {
3032                         return (ENOMEM);
3033                 }
3034                 if (ioqs->bufsize < IOQS_BUFSIZE)
3035                         error = ENOBUFS;
3036
3037                 if (!error)
3038                         error = -t3_get_up_ioqs(sc, &ioqs->bufsize, buf);
3039
3040                 if (!error) {
3041                         v = (uint32_t *)buf;
3042
3043                         ioqs->ioq_rx_enable = *v++;
3044                         ioqs->ioq_tx_enable = *v++;
3045                         ioqs->ioq_rx_status = *v++;
3046                         ioqs->ioq_tx_status = *v++;
3047
3048                         error = copyout(v, ioqs->data, ioqs->bufsize);
3049                 }
3050
3051                 free(buf, M_DEVBUF);
3052                 break;
3053         }
3054         case CHELSIO_SET_FILTER: {
3055                 struct ch_filter *f = (struct ch_filter *)data;;
3056                 struct filter_info *p;
3057                 unsigned int nfilters = sc->params.mc5.nfilters;
3058
3059                 if (!is_offload(sc))
3060                         return (EOPNOTSUPP);    /* No TCAM */
3061                 if (!(sc->flags & FULL_INIT_DONE))
3062                         return (EAGAIN);        /* mc5 not setup yet */
3063                 if (nfilters == 0)
3064                         return (EBUSY);         /* TOE will use TCAM */
3065
3066                 /* sanity checks */
3067                 if (f->filter_id >= nfilters ||
3068                     (f->val.dip && f->mask.dip != 0xffffffff) ||
3069                     (f->val.sport && f->mask.sport != 0xffff) ||
3070                     (f->val.dport && f->mask.dport != 0xffff) ||
3071                     (f->val.vlan && f->mask.vlan != 0xfff) ||
3072                     (f->val.vlan_prio &&
3073                         f->mask.vlan_prio != FILTER_NO_VLAN_PRI) ||
3074                     (f->mac_addr_idx != 0xffff && f->mac_addr_idx > 15) ||
3075                     f->qset >= SGE_QSETS ||
3076                     sc->rrss_map[f->qset] >= RSS_TABLE_SIZE)
3077                         return (EINVAL);
3078
3079                 /* Was allocated with M_WAITOK */
3080                 KASSERT(sc->filters, ("filter table NULL\n"));
3081
3082                 p = &sc->filters[f->filter_id];
3083                 if (p->locked)
3084                         return (EPERM);
3085
3086                 bzero(p, sizeof(*p));
3087                 p->sip = f->val.sip;
3088                 p->sip_mask = f->mask.sip;
3089                 p->dip = f->val.dip;
3090                 p->sport = f->val.sport;
3091                 p->dport = f->val.dport;
3092                 p->vlan = f->mask.vlan ? f->val.vlan : 0xfff;
3093                 p->vlan_prio = f->mask.vlan_prio ? (f->val.vlan_prio & 6) :
3094                     FILTER_NO_VLAN_PRI;
3095                 p->mac_hit = f->mac_hit;
3096                 p->mac_vld = f->mac_addr_idx != 0xffff;
3097                 p->mac_idx = f->mac_addr_idx;
3098                 p->pkt_type = f->proto;
3099                 p->report_filter_id = f->want_filter_id;
3100                 p->pass = f->pass;
3101                 p->rss = f->rss;
3102                 p->qset = f->qset;
3103
3104                 error = set_filter(sc, f->filter_id, p);
3105                 if (error == 0)
3106                         p->valid = 1;
3107                 break;
3108         }
3109         case CHELSIO_DEL_FILTER: {
3110                 struct ch_filter *f = (struct ch_filter *)data;
3111                 struct filter_info *p;
3112                 unsigned int nfilters = sc->params.mc5.nfilters;
3113
3114                 if (!is_offload(sc))
3115                         return (EOPNOTSUPP);
3116                 if (!(sc->flags & FULL_INIT_DONE))
3117                         return (EAGAIN);
3118                 if (nfilters == 0 || sc->filters == NULL)
3119                         return (EINVAL);
3120                 if (f->filter_id >= nfilters)
3121                        return (EINVAL);
3122
3123                 p = &sc->filters[f->filter_id];
3124                 if (p->locked)
3125                         return (EPERM);
3126                 if (!p->valid)
3127                         return (EFAULT); /* Read "Bad address" as "Bad index" */
3128
3129                 bzero(p, sizeof(*p));
3130                 p->sip = p->sip_mask = 0xffffffff;
3131                 p->vlan = 0xfff;
3132                 p->vlan_prio = FILTER_NO_VLAN_PRI;
3133                 p->pkt_type = 1;
3134                 error = set_filter(sc, f->filter_id, p);
3135                 break;
3136         }
3137         case CHELSIO_GET_FILTER: {
3138                 struct ch_filter *f = (struct ch_filter *)data;
3139                 struct filter_info *p;
3140                 unsigned int i, nfilters = sc->params.mc5.nfilters;
3141
3142                 if (!is_offload(sc))
3143                         return (EOPNOTSUPP);
3144                 if (!(sc->flags & FULL_INIT_DONE))
3145                         return (EAGAIN);
3146                 if (nfilters == 0 || sc->filters == NULL)
3147                         return (EINVAL);
3148
3149                 i = f->filter_id == 0xffffffff ? 0 : f->filter_id + 1;
3150                 for (; i < nfilters; i++) {
3151                         p = &sc->filters[i];
3152                         if (!p->valid)
3153                                 continue;
3154
3155                         bzero(f, sizeof(*f));
3156
3157                         f->filter_id = i;
3158                         f->val.sip = p->sip;
3159                         f->mask.sip = p->sip_mask;
3160                         f->val.dip = p->dip;
3161                         f->mask.dip = p->dip ? 0xffffffff : 0;
3162                         f->val.sport = p->sport;
3163                         f->mask.sport = p->sport ? 0xffff : 0;
3164                         f->val.dport = p->dport;
3165                         f->mask.dport = p->dport ? 0xffff : 0;
3166                         f->val.vlan = p->vlan == 0xfff ? 0 : p->vlan;
3167                         f->mask.vlan = p->vlan == 0xfff ? 0 : 0xfff;
3168                         f->val.vlan_prio = p->vlan_prio == FILTER_NO_VLAN_PRI ?
3169                             0 : p->vlan_prio;
3170                         f->mask.vlan_prio = p->vlan_prio == FILTER_NO_VLAN_PRI ?
3171                             0 : FILTER_NO_VLAN_PRI;
3172                         f->mac_hit = p->mac_hit;
3173                         f->mac_addr_idx = p->mac_vld ? p->mac_idx : 0xffff;
3174                         f->proto = p->pkt_type;
3175                         f->want_filter_id = p->report_filter_id;
3176                         f->pass = p->pass;
3177                         f->rss = p->rss;
3178                         f->qset = p->qset;
3179
3180                         break;
3181                 }
3182                 
3183                 if (i == nfilters)
3184                         f->filter_id = 0xffffffff;
3185                 break;
3186         }
3187         default:
3188                 return (EOPNOTSUPP);
3189                 break;
3190         }
3191
3192         return (error);
3193 }
3194
3195 static __inline void
3196 reg_block_dump(struct adapter *ap, uint8_t *buf, unsigned int start,
3197     unsigned int end)
3198 {
3199         uint32_t *p = (uint32_t *)(buf + start);
3200
3201         for ( ; start <= end; start += sizeof(uint32_t))
3202                 *p++ = t3_read_reg(ap, start);
3203 }
3204
3205 #define T3_REGMAP_SIZE (3 * 1024)
3206 static int
3207 cxgb_get_regs_len(void)
3208 {
3209         return T3_REGMAP_SIZE;
3210 }
3211
3212 static void
3213 cxgb_get_regs(adapter_t *sc, struct ch_ifconf_regs *regs, uint8_t *buf)
3214 {           
3215         
3216         /*
3217          * Version scheme:
3218          * bits 0..9: chip version
3219          * bits 10..15: chip revision
3220          * bit 31: set for PCIe cards
3221          */
3222         regs->version = 3 | (sc->params.rev << 10) | (is_pcie(sc) << 31);
3223
3224         /*
3225          * We skip the MAC statistics registers because they are clear-on-read.
3226          * Also reading multi-register stats would need to synchronize with the
3227          * periodic mac stats accumulation.  Hard to justify the complexity.
3228          */
3229         memset(buf, 0, cxgb_get_regs_len());
3230         reg_block_dump(sc, buf, 0, A_SG_RSPQ_CREDIT_RETURN);
3231         reg_block_dump(sc, buf, A_SG_HI_DRB_HI_THRSH, A_ULPRX_PBL_ULIMIT);
3232         reg_block_dump(sc, buf, A_ULPTX_CONFIG, A_MPS_INT_CAUSE);
3233         reg_block_dump(sc, buf, A_CPL_SWITCH_CNTRL, A_CPL_MAP_TBL_DATA);
3234         reg_block_dump(sc, buf, A_SMB_GLOBAL_TIME_CFG, A_XGM_SERDES_STAT3);
3235         reg_block_dump(sc, buf, A_XGM_SERDES_STATUS0,
3236                        XGM_REG(A_XGM_SERDES_STAT3, 1));
3237         reg_block_dump(sc, buf, XGM_REG(A_XGM_SERDES_STATUS0, 1),
3238                        XGM_REG(A_XGM_RX_SPI4_SOP_EOP_CNT, 1));
3239 }
3240
3241 static int
3242 alloc_filters(struct adapter *sc)
3243 {
3244         struct filter_info *p;
3245         unsigned int nfilters = sc->params.mc5.nfilters;
3246
3247         if (nfilters == 0)
3248                 return (0);
3249
3250         p = malloc(sizeof(*p) * nfilters, M_DEVBUF, M_WAITOK | M_ZERO);
3251         sc->filters = p;
3252
3253         p = &sc->filters[nfilters - 1];
3254         p->vlan = 0xfff;
3255         p->vlan_prio = FILTER_NO_VLAN_PRI;
3256         p->pass = p->rss = p->valid = p->locked = 1;
3257
3258         return (0);
3259 }
3260
3261 static int
3262 setup_hw_filters(struct adapter *sc)
3263 {
3264         int i, rc;
3265         unsigned int nfilters = sc->params.mc5.nfilters;
3266
3267         if (!sc->filters)
3268                 return (0);
3269
3270         t3_enable_filters(sc);
3271
3272         for (i = rc = 0; i < nfilters && !rc; i++) {
3273                 if (sc->filters[i].locked)
3274                         rc = set_filter(sc, i, &sc->filters[i]);
3275         }
3276
3277         return (rc);
3278 }
3279
3280 static int
3281 set_filter(struct adapter *sc, int id, const struct filter_info *f)
3282 {
3283         int len;
3284         struct mbuf *m;
3285         struct ulp_txpkt *txpkt;
3286         struct work_request_hdr *wr;
3287         struct cpl_pass_open_req *oreq;
3288         struct cpl_set_tcb_field *sreq;
3289
3290         len = sizeof(*wr) + sizeof(*oreq) + 2 * sizeof(*sreq);
3291         KASSERT(len <= MHLEN, ("filter request too big for an mbuf"));
3292
3293         id += t3_mc5_size(&sc->mc5) - sc->params.mc5.nroutes -
3294               sc->params.mc5.nfilters;
3295
3296         m = m_gethdr(M_WAITOK, MT_DATA);
3297         m->m_len = m->m_pkthdr.len = len;
3298         bzero(mtod(m, char *), len);
3299
3300         wr = mtod(m, struct work_request_hdr *);
3301         wr->wrh_hi = htonl(V_WR_OP(FW_WROPCODE_BYPASS) | F_WR_ATOMIC);
3302
3303         oreq = (struct cpl_pass_open_req *)(wr + 1);
3304         txpkt = (struct ulp_txpkt *)oreq;
3305         txpkt->cmd_dest = htonl(V_ULPTX_CMD(ULP_TXPKT));
3306         txpkt->len = htonl(V_ULPTX_NFLITS(sizeof(*oreq) / 8));
3307         OPCODE_TID(oreq) = htonl(MK_OPCODE_TID(CPL_PASS_OPEN_REQ, id));
3308         oreq->local_port = htons(f->dport);
3309         oreq->peer_port = htons(f->sport);
3310         oreq->local_ip = htonl(f->dip);
3311         oreq->peer_ip = htonl(f->sip);
3312         oreq->peer_netmask = htonl(f->sip_mask);
3313         oreq->opt0h = 0;
3314         oreq->opt0l = htonl(F_NO_OFFLOAD);
3315         oreq->opt1 = htonl(V_MAC_MATCH_VALID(f->mac_vld) |
3316                          V_CONN_POLICY(CPL_CONN_POLICY_FILTER) |
3317                          V_VLAN_PRI(f->vlan_prio >> 1) |
3318                          V_VLAN_PRI_VALID(f->vlan_prio != FILTER_NO_VLAN_PRI) |
3319                          V_PKT_TYPE(f->pkt_type) | V_OPT1_VLAN(f->vlan) |
3320                          V_MAC_MATCH(f->mac_idx | (f->mac_hit << 4)));
3321
3322         sreq = (struct cpl_set_tcb_field *)(oreq + 1);
3323         set_tcb_field_ulp(sreq, id, 1, 0x1800808000ULL,
3324                           (f->report_filter_id << 15) | (1 << 23) |
3325                           ((u64)f->pass << 35) | ((u64)!f->rss << 36));
3326         set_tcb_field_ulp(sreq + 1, id, 0, 0xffffffff, (2 << 19) | 1);
3327         t3_mgmt_tx(sc, m);
3328
3329         if (f->pass && !f->rss) {
3330                 len = sizeof(*sreq);
3331                 m = m_gethdr(M_WAITOK, MT_DATA);
3332                 m->m_len = m->m_pkthdr.len = len;
3333                 bzero(mtod(m, char *), len);
3334                 sreq = mtod(m, struct cpl_set_tcb_field *);
3335                 sreq->wr.wrh_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD));
3336                 mk_set_tcb_field(sreq, id, 25, 0x3f80000,
3337                                  (u64)sc->rrss_map[f->qset] << 19);
3338                 t3_mgmt_tx(sc, m);
3339         }
3340         return 0;
3341 }
3342
3343 static inline void
3344 mk_set_tcb_field(struct cpl_set_tcb_field *req, unsigned int tid,
3345     unsigned int word, u64 mask, u64 val)
3346 {
3347         OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_SET_TCB_FIELD, tid));
3348         req->reply = V_NO_REPLY(1);
3349         req->cpu_idx = 0;
3350         req->word = htons(word);
3351         req->mask = htobe64(mask);
3352         req->val = htobe64(val);
3353 }
3354
3355 static inline void
3356 set_tcb_field_ulp(struct cpl_set_tcb_field *req, unsigned int tid,
3357     unsigned int word, u64 mask, u64 val)
3358 {
3359         struct ulp_txpkt *txpkt = (struct ulp_txpkt *)req;
3360
3361         txpkt->cmd_dest = htonl(V_ULPTX_CMD(ULP_TXPKT));
3362         txpkt->len = htonl(V_ULPTX_NFLITS(sizeof(*req) / 8));
3363         mk_set_tcb_field(req, tid, word, mask, val);
3364 }