]> CyberLeo.Net >> Repos - FreeBSD/FreeBSD.git/blob - sys/dev/cxgbe/t4_main.c
cxgbe(4): Update firmwares from version 1.16.12.0 to 1.16.22.0.
[FreeBSD/FreeBSD.git] / sys / dev / cxgbe / t4_main.c
1 /*-
2  * Copyright (c) 2011 Chelsio Communications, Inc.
3  * All rights reserved.
4  * Written by: Navdeep Parhar <np@FreeBSD.org>
5  *
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted provided that the following conditions
8  * are met:
9  * 1. Redistributions of source code must retain the above copyright
10  *    notice, this list of conditions and the following disclaimer.
11  * 2. Redistributions in binary form must reproduce the above copyright
12  *    notice, this list of conditions and the following disclaimer in the
13  *    documentation and/or other materials provided with the distribution.
14  *
15  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
16  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
18  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
19  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
21  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
22  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
23  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
24  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
25  * SUCH DAMAGE.
26  */
27
28 #include <sys/cdefs.h>
29 __FBSDID("$FreeBSD$");
30
31 #include "opt_ddb.h"
32 #include "opt_inet.h"
33 #include "opt_inet6.h"
34 #include "opt_rss.h"
35
36 #include <sys/param.h>
37 #include <sys/conf.h>
38 #include <sys/priv.h>
39 #include <sys/kernel.h>
40 #include <sys/bus.h>
41 #include <sys/module.h>
42 #include <sys/malloc.h>
43 #include <sys/queue.h>
44 #include <sys/taskqueue.h>
45 #include <sys/pciio.h>
46 #include <dev/pci/pcireg.h>
47 #include <dev/pci/pcivar.h>
48 #include <dev/pci/pci_private.h>
49 #include <sys/firmware.h>
50 #include <sys/sbuf.h>
51 #include <sys/smp.h>
52 #include <sys/socket.h>
53 #include <sys/sockio.h>
54 #include <sys/sysctl.h>
55 #include <net/ethernet.h>
56 #include <net/if.h>
57 #include <net/if_types.h>
58 #include <net/if_dl.h>
59 #include <net/if_vlan_var.h>
60 #ifdef RSS
61 #include <net/rss_config.h>
62 #endif
63 #if defined(__i386__) || defined(__amd64__)
64 #include <vm/vm.h>
65 #include <vm/pmap.h>
66 #endif
67 #ifdef DDB
68 #include <ddb/ddb.h>
69 #include <ddb/db_lex.h>
70 #endif
71
72 #include "common/common.h"
73 #include "common/t4_msg.h"
74 #include "common/t4_regs.h"
75 #include "common/t4_regs_values.h"
76 #include "t4_ioctl.h"
77 #include "t4_l2t.h"
78 #include "t4_mp_ring.h"
79 #include "t4_if.h"
80
81 /* T4 bus driver interface */
82 static int t4_probe(device_t);
83 static int t4_attach(device_t);
84 static int t4_detach(device_t);
85 static int t4_ready(device_t);
86 static int t4_read_port_device(device_t, int, device_t *);
87 static device_method_t t4_methods[] = {
88         DEVMETHOD(device_probe,         t4_probe),
89         DEVMETHOD(device_attach,        t4_attach),
90         DEVMETHOD(device_detach,        t4_detach),
91
92         DEVMETHOD(t4_is_main_ready,     t4_ready),
93         DEVMETHOD(t4_read_port_device,  t4_read_port_device),
94
95         DEVMETHOD_END
96 };
97 static driver_t t4_driver = {
98         "t4nex",
99         t4_methods,
100         sizeof(struct adapter)
101 };
102
103
104 /* T4 port (cxgbe) interface */
105 static int cxgbe_probe(device_t);
106 static int cxgbe_attach(device_t);
107 static int cxgbe_detach(device_t);
108 device_method_t cxgbe_methods[] = {
109         DEVMETHOD(device_probe,         cxgbe_probe),
110         DEVMETHOD(device_attach,        cxgbe_attach),
111         DEVMETHOD(device_detach,        cxgbe_detach),
112         { 0, 0 }
113 };
114 static driver_t cxgbe_driver = {
115         "cxgbe",
116         cxgbe_methods,
117         sizeof(struct port_info)
118 };
119
120 /* T4 VI (vcxgbe) interface */
121 static int vcxgbe_probe(device_t);
122 static int vcxgbe_attach(device_t);
123 static int vcxgbe_detach(device_t);
124 static device_method_t vcxgbe_methods[] = {
125         DEVMETHOD(device_probe,         vcxgbe_probe),
126         DEVMETHOD(device_attach,        vcxgbe_attach),
127         DEVMETHOD(device_detach,        vcxgbe_detach),
128         { 0, 0 }
129 };
130 static driver_t vcxgbe_driver = {
131         "vcxgbe",
132         vcxgbe_methods,
133         sizeof(struct vi_info)
134 };
135
136 static d_ioctl_t t4_ioctl;
137
138 static struct cdevsw t4_cdevsw = {
139        .d_version = D_VERSION,
140        .d_ioctl = t4_ioctl,
141        .d_name = "t4nex",
142 };
143
144 /* T5 bus driver interface */
145 static int t5_probe(device_t);
146 static device_method_t t5_methods[] = {
147         DEVMETHOD(device_probe,         t5_probe),
148         DEVMETHOD(device_attach,        t4_attach),
149         DEVMETHOD(device_detach,        t4_detach),
150
151         DEVMETHOD(t4_is_main_ready,     t4_ready),
152         DEVMETHOD(t4_read_port_device,  t4_read_port_device),
153
154         DEVMETHOD_END
155 };
156 static driver_t t5_driver = {
157         "t5nex",
158         t5_methods,
159         sizeof(struct adapter)
160 };
161
162
163 /* T5 port (cxl) interface */
164 static driver_t cxl_driver = {
165         "cxl",
166         cxgbe_methods,
167         sizeof(struct port_info)
168 };
169
170 /* T5 VI (vcxl) interface */
171 static driver_t vcxl_driver = {
172         "vcxl",
173         vcxgbe_methods,
174         sizeof(struct vi_info)
175 };
176
177 /* T6 bus driver interface */
178 static int t6_probe(device_t);
179 static device_method_t t6_methods[] = {
180         DEVMETHOD(device_probe,         t6_probe),
181         DEVMETHOD(device_attach,        t4_attach),
182         DEVMETHOD(device_detach,        t4_detach),
183
184         DEVMETHOD(t4_is_main_ready,     t4_ready),
185         DEVMETHOD(t4_read_port_device,  t4_read_port_device),
186
187         DEVMETHOD_END
188 };
189 static driver_t t6_driver = {
190         "t6nex",
191         t6_methods,
192         sizeof(struct adapter)
193 };
194
195
196 /* T6 port (cc) interface */
197 static driver_t cc_driver = {
198         "cc",
199         cxgbe_methods,
200         sizeof(struct port_info)
201 };
202
203 /* T6 VI (vcc) interface */
204 static driver_t vcc_driver = {
205         "vcc",
206         vcxgbe_methods,
207         sizeof(struct vi_info)
208 };
209
210 /* ifnet + media interface */
211 static void cxgbe_init(void *);
212 static int cxgbe_ioctl(struct ifnet *, unsigned long, caddr_t);
213 static int cxgbe_transmit(struct ifnet *, struct mbuf *);
214 static void cxgbe_qflush(struct ifnet *);
215 static int cxgbe_media_change(struct ifnet *);
216 static void cxgbe_media_status(struct ifnet *, struct ifmediareq *);
217
218 MALLOC_DEFINE(M_CXGBE, "cxgbe", "Chelsio T4/T5 Ethernet driver and services");
219
220 /*
221  * Correct lock order when you need to acquire multiple locks is t4_list_lock,
222  * then ADAPTER_LOCK, then t4_uld_list_lock.
223  */
224 static struct sx t4_list_lock;
225 SLIST_HEAD(, adapter) t4_list;
226 #ifdef TCP_OFFLOAD
227 static struct sx t4_uld_list_lock;
228 SLIST_HEAD(, uld_info) t4_uld_list;
229 #endif
230
231 /*
232  * Tunables.  See tweak_tunables() too.
233  *
234  * Each tunable is set to a default value here if it's known at compile-time.
235  * Otherwise it is set to -1 as an indication to tweak_tunables() that it should
236  * provide a reasonable default when the driver is loaded.
237  *
238  * Tunables applicable to both T4 and T5 are under hw.cxgbe.  Those specific to
239  * T5 are under hw.cxl.
240  */
241
242 /*
243  * Number of queues for tx and rx, 10G and 1G, NIC and offload.
244  */
245 #define NTXQ_10G 16
246 int t4_ntxq10g = -1;
247 TUNABLE_INT("hw.cxgbe.ntxq10g", &t4_ntxq10g);
248
249 #define NRXQ_10G 8
250 int t4_nrxq10g = -1;
251 TUNABLE_INT("hw.cxgbe.nrxq10g", &t4_nrxq10g);
252
253 #define NTXQ_1G 4
254 int t4_ntxq1g = -1;
255 TUNABLE_INT("hw.cxgbe.ntxq1g", &t4_ntxq1g);
256
257 #define NRXQ_1G 2
258 int t4_nrxq1g = -1;
259 TUNABLE_INT("hw.cxgbe.nrxq1g", &t4_nrxq1g);
260
261 #define NTXQ_VI 1
262 static int t4_ntxq_vi = -1;
263 TUNABLE_INT("hw.cxgbe.ntxq_vi", &t4_ntxq_vi);
264
265 #define NRXQ_VI 1
266 static int t4_nrxq_vi = -1;
267 TUNABLE_INT("hw.cxgbe.nrxq_vi", &t4_nrxq_vi);
268
269 static int t4_rsrv_noflowq = 0;
270 TUNABLE_INT("hw.cxgbe.rsrv_noflowq", &t4_rsrv_noflowq);
271
272 #ifdef TCP_OFFLOAD
273 #define NOFLDTXQ_10G 8
274 static int t4_nofldtxq10g = -1;
275 TUNABLE_INT("hw.cxgbe.nofldtxq10g", &t4_nofldtxq10g);
276
277 #define NOFLDRXQ_10G 2
278 static int t4_nofldrxq10g = -1;
279 TUNABLE_INT("hw.cxgbe.nofldrxq10g", &t4_nofldrxq10g);
280
281 #define NOFLDTXQ_1G 2
282 static int t4_nofldtxq1g = -1;
283 TUNABLE_INT("hw.cxgbe.nofldtxq1g", &t4_nofldtxq1g);
284
285 #define NOFLDRXQ_1G 1
286 static int t4_nofldrxq1g = -1;
287 TUNABLE_INT("hw.cxgbe.nofldrxq1g", &t4_nofldrxq1g);
288
289 #define NOFLDTXQ_VI 1
290 static int t4_nofldtxq_vi = -1;
291 TUNABLE_INT("hw.cxgbe.nofldtxq_vi", &t4_nofldtxq_vi);
292
293 #define NOFLDRXQ_VI 1
294 static int t4_nofldrxq_vi = -1;
295 TUNABLE_INT("hw.cxgbe.nofldrxq_vi", &t4_nofldrxq_vi);
296 #endif
297
298 #ifdef DEV_NETMAP
299 #define NNMTXQ_VI 2
300 static int t4_nnmtxq_vi = -1;
301 TUNABLE_INT("hw.cxgbe.nnmtxq_vi", &t4_nnmtxq_vi);
302
303 #define NNMRXQ_VI 2
304 static int t4_nnmrxq_vi = -1;
305 TUNABLE_INT("hw.cxgbe.nnmrxq_vi", &t4_nnmrxq_vi);
306 #endif
307
308 /*
309  * Holdoff parameters for 10G and 1G ports.
310  */
311 #define TMR_IDX_10G 1
312 int t4_tmr_idx_10g = TMR_IDX_10G;
313 TUNABLE_INT("hw.cxgbe.holdoff_timer_idx_10G", &t4_tmr_idx_10g);
314
315 #define PKTC_IDX_10G (-1)
316 int t4_pktc_idx_10g = PKTC_IDX_10G;
317 TUNABLE_INT("hw.cxgbe.holdoff_pktc_idx_10G", &t4_pktc_idx_10g);
318
319 #define TMR_IDX_1G 1
320 int t4_tmr_idx_1g = TMR_IDX_1G;
321 TUNABLE_INT("hw.cxgbe.holdoff_timer_idx_1G", &t4_tmr_idx_1g);
322
323 #define PKTC_IDX_1G (-1)
324 int t4_pktc_idx_1g = PKTC_IDX_1G;
325 TUNABLE_INT("hw.cxgbe.holdoff_pktc_idx_1G", &t4_pktc_idx_1g);
326
327 /*
328  * Size (# of entries) of each tx and rx queue.
329  */
330 unsigned int t4_qsize_txq = TX_EQ_QSIZE;
331 TUNABLE_INT("hw.cxgbe.qsize_txq", &t4_qsize_txq);
332
333 unsigned int t4_qsize_rxq = RX_IQ_QSIZE;
334 TUNABLE_INT("hw.cxgbe.qsize_rxq", &t4_qsize_rxq);
335
336 /*
337  * Interrupt types allowed (bits 0, 1, 2 = INTx, MSI, MSI-X respectively).
338  */
339 int t4_intr_types = INTR_MSIX | INTR_MSI | INTR_INTX;
340 TUNABLE_INT("hw.cxgbe.interrupt_types", &t4_intr_types);
341
342 /*
343  * Configuration file.
344  */
345 #define DEFAULT_CF      "default"
346 #define FLASH_CF        "flash"
347 #define UWIRE_CF        "uwire"
348 #define FPGA_CF         "fpga"
349 static char t4_cfg_file[32] = DEFAULT_CF;
350 TUNABLE_STR("hw.cxgbe.config_file", t4_cfg_file, sizeof(t4_cfg_file));
351
352 /*
353  * PAUSE settings (bit 0, 1 = rx_pause, tx_pause respectively).
354  * rx_pause = 1 to heed incoming PAUSE frames, 0 to ignore them.
355  * tx_pause = 1 to emit PAUSE frames when the rx FIFO reaches its high water
356  *            mark or when signalled to do so, 0 to never emit PAUSE.
357  */
358 static int t4_pause_settings = PAUSE_TX | PAUSE_RX;
359 TUNABLE_INT("hw.cxgbe.pause_settings", &t4_pause_settings);
360
361 /*
362  * Firmware auto-install by driver during attach (0, 1, 2 = prohibited, allowed,
363  * encouraged respectively).
364  */
365 static unsigned int t4_fw_install = 1;
366 TUNABLE_INT("hw.cxgbe.fw_install", &t4_fw_install);
367
368 /*
369  * ASIC features that will be used.  Disable the ones you don't want so that the
370  * chip resources aren't wasted on features that will not be used.
371  */
372 static int t4_nbmcaps_allowed = 0;
373 TUNABLE_INT("hw.cxgbe.nbmcaps_allowed", &t4_nbmcaps_allowed);
374
375 static int t4_linkcaps_allowed = 0;     /* No DCBX, PPP, etc. by default */
376 TUNABLE_INT("hw.cxgbe.linkcaps_allowed", &t4_linkcaps_allowed);
377
378 static int t4_switchcaps_allowed = FW_CAPS_CONFIG_SWITCH_INGRESS |
379     FW_CAPS_CONFIG_SWITCH_EGRESS;
380 TUNABLE_INT("hw.cxgbe.switchcaps_allowed", &t4_switchcaps_allowed);
381
382 static int t4_niccaps_allowed = FW_CAPS_CONFIG_NIC;
383 TUNABLE_INT("hw.cxgbe.niccaps_allowed", &t4_niccaps_allowed);
384
385 static int t4_toecaps_allowed = -1;
386 TUNABLE_INT("hw.cxgbe.toecaps_allowed", &t4_toecaps_allowed);
387
388 static int t4_rdmacaps_allowed = -1;
389 TUNABLE_INT("hw.cxgbe.rdmacaps_allowed", &t4_rdmacaps_allowed);
390
391 static int t4_cryptocaps_allowed = 0;
392 TUNABLE_INT("hw.cxgbe.cryptocaps_allowed", &t4_cryptocaps_allowed);
393
394 static int t4_iscsicaps_allowed = -1;
395 TUNABLE_INT("hw.cxgbe.iscsicaps_allowed", &t4_iscsicaps_allowed);
396
397 static int t4_fcoecaps_allowed = 0;
398 TUNABLE_INT("hw.cxgbe.fcoecaps_allowed", &t4_fcoecaps_allowed);
399
400 static int t5_write_combine = 0;
401 TUNABLE_INT("hw.cxl.write_combine", &t5_write_combine);
402
403 static int t4_num_vis = 1;
404 TUNABLE_INT("hw.cxgbe.num_vis", &t4_num_vis);
405
406 /* Functions used by extra VIs to obtain unique MAC addresses for each VI. */
407 static int vi_mac_funcs[] = {
408         FW_VI_FUNC_OFLD,
409         FW_VI_FUNC_IWARP,
410         FW_VI_FUNC_OPENISCSI,
411         FW_VI_FUNC_OPENFCOE,
412         FW_VI_FUNC_FOISCSI,
413         FW_VI_FUNC_FOFCOE,
414 };
415
416 struct intrs_and_queues {
417         uint16_t intr_type;     /* INTx, MSI, or MSI-X */
418         uint16_t nirq;          /* Total # of vectors */
419         uint16_t intr_flags_10g;/* Interrupt flags for each 10G port */
420         uint16_t intr_flags_1g; /* Interrupt flags for each 1G port */
421         uint16_t ntxq10g;       /* # of NIC txq's for each 10G port */
422         uint16_t nrxq10g;       /* # of NIC rxq's for each 10G port */
423         uint16_t ntxq1g;        /* # of NIC txq's for each 1G port */
424         uint16_t nrxq1g;        /* # of NIC rxq's for each 1G port */
425         uint16_t rsrv_noflowq;  /* Flag whether to reserve queue 0 */
426         uint16_t nofldtxq10g;   /* # of TOE txq's for each 10G port */
427         uint16_t nofldrxq10g;   /* # of TOE rxq's for each 10G port */
428         uint16_t nofldtxq1g;    /* # of TOE txq's for each 1G port */
429         uint16_t nofldrxq1g;    /* # of TOE rxq's for each 1G port */
430
431         /* The vcxgbe/vcxl interfaces use these and not the ones above. */
432         uint16_t ntxq_vi;       /* # of NIC txq's */
433         uint16_t nrxq_vi;       /* # of NIC rxq's */
434         uint16_t nofldtxq_vi;   /* # of TOE txq's */
435         uint16_t nofldrxq_vi;   /* # of TOE rxq's */
436         uint16_t nnmtxq_vi;     /* # of netmap txq's */
437         uint16_t nnmrxq_vi;     /* # of netmap rxq's */
438 };
439
440 struct filter_entry {
441         uint32_t valid:1;       /* filter allocated and valid */
442         uint32_t locked:1;      /* filter is administratively locked */
443         uint32_t pending:1;     /* filter action is pending firmware reply */
444         uint32_t smtidx:8;      /* Source MAC Table index for smac */
445         struct l2t_entry *l2t;  /* Layer Two Table entry for dmac */
446
447         struct t4_filter_specification fs;
448 };
449
450 static void setup_memwin(struct adapter *);
451 static void position_memwin(struct adapter *, int, uint32_t);
452 static int rw_via_memwin(struct adapter *, int, uint32_t, uint32_t *, int, int);
453 static inline int read_via_memwin(struct adapter *, int, uint32_t, uint32_t *,
454     int);
455 static inline int write_via_memwin(struct adapter *, int, uint32_t,
456     const uint32_t *, int);
457 static int validate_mem_range(struct adapter *, uint32_t, int);
458 static int fwmtype_to_hwmtype(int);
459 static int validate_mt_off_len(struct adapter *, int, uint32_t, int,
460     uint32_t *);
461 static int fixup_devlog_params(struct adapter *);
462 static int cfg_itype_and_nqueues(struct adapter *, int, int, int,
463     struct intrs_and_queues *);
464 static int prep_firmware(struct adapter *);
465 static int partition_resources(struct adapter *, const struct firmware *,
466     const char *);
467 static int get_params__pre_init(struct adapter *);
468 static int get_params__post_init(struct adapter *);
469 static int set_params__post_init(struct adapter *);
470 static void t4_set_desc(struct adapter *);
471 static void build_medialist(struct port_info *, struct ifmedia *);
472 static int cxgbe_init_synchronized(struct vi_info *);
473 static int cxgbe_uninit_synchronized(struct vi_info *);
474 static void quiesce_txq(struct adapter *, struct sge_txq *);
475 static void quiesce_wrq(struct adapter *, struct sge_wrq *);
476 static void quiesce_iq(struct adapter *, struct sge_iq *);
477 static void quiesce_fl(struct adapter *, struct sge_fl *);
478 static int t4_alloc_irq(struct adapter *, struct irq *, int rid,
479     driver_intr_t *, void *, char *);
480 static int t4_free_irq(struct adapter *, struct irq *);
481 static void get_regs(struct adapter *, struct t4_regdump *, uint8_t *);
482 static void vi_refresh_stats(struct adapter *, struct vi_info *);
483 static void cxgbe_refresh_stats(struct adapter *, struct port_info *);
484 static void cxgbe_tick(void *);
485 static void cxgbe_vlan_config(void *, struct ifnet *, uint16_t);
486 static void cxgbe_sysctls(struct port_info *);
487 static int sysctl_int_array(SYSCTL_HANDLER_ARGS);
488 static int sysctl_bitfield(SYSCTL_HANDLER_ARGS);
489 static int sysctl_btphy(SYSCTL_HANDLER_ARGS);
490 static int sysctl_noflowq(SYSCTL_HANDLER_ARGS);
491 static int sysctl_holdoff_tmr_idx(SYSCTL_HANDLER_ARGS);
492 static int sysctl_holdoff_pktc_idx(SYSCTL_HANDLER_ARGS);
493 static int sysctl_qsize_rxq(SYSCTL_HANDLER_ARGS);
494 static int sysctl_qsize_txq(SYSCTL_HANDLER_ARGS);
495 static int sysctl_pause_settings(SYSCTL_HANDLER_ARGS);
496 static int sysctl_handle_t4_reg64(SYSCTL_HANDLER_ARGS);
497 static int sysctl_temperature(SYSCTL_HANDLER_ARGS);
498 #ifdef SBUF_DRAIN
499 static int sysctl_cctrl(SYSCTL_HANDLER_ARGS);
500 static int sysctl_cim_ibq_obq(SYSCTL_HANDLER_ARGS);
501 static int sysctl_cim_la(SYSCTL_HANDLER_ARGS);
502 static int sysctl_cim_la_t6(SYSCTL_HANDLER_ARGS);
503 static int sysctl_cim_ma_la(SYSCTL_HANDLER_ARGS);
504 static int sysctl_cim_pif_la(SYSCTL_HANDLER_ARGS);
505 static int sysctl_cim_qcfg(SYSCTL_HANDLER_ARGS);
506 static int sysctl_cpl_stats(SYSCTL_HANDLER_ARGS);
507 static int sysctl_ddp_stats(SYSCTL_HANDLER_ARGS);
508 static int sysctl_devlog(SYSCTL_HANDLER_ARGS);
509 static int sysctl_fcoe_stats(SYSCTL_HANDLER_ARGS);
510 static int sysctl_hw_sched(SYSCTL_HANDLER_ARGS);
511 static int sysctl_lb_stats(SYSCTL_HANDLER_ARGS);
512 static int sysctl_linkdnrc(SYSCTL_HANDLER_ARGS);
513 static int sysctl_meminfo(SYSCTL_HANDLER_ARGS);
514 static int sysctl_mps_tcam(SYSCTL_HANDLER_ARGS);
515 static int sysctl_mps_tcam_t6(SYSCTL_HANDLER_ARGS);
516 static int sysctl_path_mtus(SYSCTL_HANDLER_ARGS);
517 static int sysctl_pm_stats(SYSCTL_HANDLER_ARGS);
518 static int sysctl_rdma_stats(SYSCTL_HANDLER_ARGS);
519 static int sysctl_tcp_stats(SYSCTL_HANDLER_ARGS);
520 static int sysctl_tids(SYSCTL_HANDLER_ARGS);
521 static int sysctl_tp_err_stats(SYSCTL_HANDLER_ARGS);
522 static int sysctl_tp_la_mask(SYSCTL_HANDLER_ARGS);
523 static int sysctl_tp_la(SYSCTL_HANDLER_ARGS);
524 static int sysctl_tx_rate(SYSCTL_HANDLER_ARGS);
525 static int sysctl_ulprx_la(SYSCTL_HANDLER_ARGS);
526 static int sysctl_wcwr_stats(SYSCTL_HANDLER_ARGS);
527 static int sysctl_tc_params(SYSCTL_HANDLER_ARGS);
528 #endif
529 #ifdef TCP_OFFLOAD
530 static int sysctl_tp_tick(SYSCTL_HANDLER_ARGS);
531 static int sysctl_tp_dack_timer(SYSCTL_HANDLER_ARGS);
532 static int sysctl_tp_timer(SYSCTL_HANDLER_ARGS);
533 #endif
534 static uint32_t fconf_iconf_to_mode(uint32_t, uint32_t);
535 static uint32_t mode_to_fconf(uint32_t);
536 static uint32_t mode_to_iconf(uint32_t);
537 static int check_fspec_against_fconf_iconf(struct adapter *,
538     struct t4_filter_specification *);
539 static int get_filter_mode(struct adapter *, uint32_t *);
540 static int set_filter_mode(struct adapter *, uint32_t);
541 static inline uint64_t get_filter_hits(struct adapter *, uint32_t);
542 static int get_filter(struct adapter *, struct t4_filter *);
543 static int set_filter(struct adapter *, struct t4_filter *);
544 static int del_filter(struct adapter *, struct t4_filter *);
545 static void clear_filter(struct filter_entry *);
546 static int set_filter_wr(struct adapter *, int);
547 static int del_filter_wr(struct adapter *, int);
548 static int set_tcb_rpl(struct sge_iq *, const struct rss_header *,
549     struct mbuf *);
550 static int get_sge_context(struct adapter *, struct t4_sge_context *);
551 static int load_fw(struct adapter *, struct t4_data *);
552 static int load_cfg(struct adapter *, struct t4_data *);
553 static int read_card_mem(struct adapter *, int, struct t4_mem_range *);
554 static int read_i2c(struct adapter *, struct t4_i2c_data *);
555 #ifdef TCP_OFFLOAD
556 static int toe_capability(struct vi_info *, int);
557 #endif
558 static int mod_event(module_t, int, void *);
559 static int notify_siblings(device_t, int);
560
561 struct {
562         uint16_t device;
563         char *desc;
564 } t4_pciids[] = {
565         {0xa000, "Chelsio Terminator 4 FPGA"},
566         {0x4400, "Chelsio T440-dbg"},
567         {0x4401, "Chelsio T420-CR"},
568         {0x4402, "Chelsio T422-CR"},
569         {0x4403, "Chelsio T440-CR"},
570         {0x4404, "Chelsio T420-BCH"},
571         {0x4405, "Chelsio T440-BCH"},
572         {0x4406, "Chelsio T440-CH"},
573         {0x4407, "Chelsio T420-SO"},
574         {0x4408, "Chelsio T420-CX"},
575         {0x4409, "Chelsio T420-BT"},
576         {0x440a, "Chelsio T404-BT"},
577         {0x440e, "Chelsio T440-LP-CR"},
578 }, t5_pciids[] = {
579         {0xb000, "Chelsio Terminator 5 FPGA"},
580         {0x5400, "Chelsio T580-dbg"},
581         {0x5401,  "Chelsio T520-CR"},           /* 2 x 10G */
582         {0x5402,  "Chelsio T522-CR"},           /* 2 x 10G, 2 X 1G */
583         {0x5403,  "Chelsio T540-CR"},           /* 4 x 10G */
584         {0x5407,  "Chelsio T520-SO"},           /* 2 x 10G, nomem */
585         {0x5409,  "Chelsio T520-BT"},           /* 2 x 10GBaseT */
586         {0x540a,  "Chelsio T504-BT"},           /* 4 x 1G */
587         {0x540d,  "Chelsio T580-CR"},           /* 2 x 40G */
588         {0x540e,  "Chelsio T540-LP-CR"},        /* 4 x 10G */
589         {0x5410,  "Chelsio T580-LP-CR"},        /* 2 x 40G */
590         {0x5411,  "Chelsio T520-LL-CR"},        /* 2 x 10G */
591         {0x5412,  "Chelsio T560-CR"},           /* 1 x 40G, 2 x 10G */
592         {0x5414,  "Chelsio T580-LP-SO-CR"},     /* 2 x 40G, nomem */
593         {0x5415,  "Chelsio T502-BT"},           /* 2 x 1G */
594 #ifdef notyet
595         {0x5404,  "Chelsio T520-BCH"},
596         {0x5405,  "Chelsio T540-BCH"},
597         {0x5406,  "Chelsio T540-CH"},
598         {0x5408,  "Chelsio T520-CX"},
599         {0x540b,  "Chelsio B520-SR"},
600         {0x540c,  "Chelsio B504-BT"},
601         {0x540f,  "Chelsio Amsterdam"},
602         {0x5413,  "Chelsio T580-CHR"},
603 #endif
604 }, t6_pciids[] = {
605         {0xc006, "Chelsio Terminator 6 FPGA"},  /* T6 PE10K6 FPGA (PF0) */
606         {0x6401, "Chelsio T6225-CR"},           /* 2 x 10/25G */
607         {0x6402, "Chelsio T6225-SO-CR"},        /* 2 x 10/25G, nomem */
608         {0x6407, "Chelsio T62100-LP-CR"},       /* 2 x 40/50/100G */
609         {0x6408, "Chelsio T62100-SO-CR"},       /* 2 x 40/50/100G, nomem */
610         {0x640d, "Chelsio T62100-CR"},          /* 2 x 40/50/100G */
611         {0x6410, "Chelsio T62100-DBG"},         /* 2 x 40/50/100G, debug */
612 };
613
614 #ifdef TCP_OFFLOAD
615 /*
616  * service_iq() has an iq and needs the fl.  Offset of fl from the iq should be
617  * exactly the same for both rxq and ofld_rxq.
618  */
619 CTASSERT(offsetof(struct sge_ofld_rxq, iq) == offsetof(struct sge_rxq, iq));
620 CTASSERT(offsetof(struct sge_ofld_rxq, fl) == offsetof(struct sge_rxq, fl));
621 #endif
622 CTASSERT(sizeof(struct cluster_metadata) <= CL_METADATA_SIZE);
623
624 static int
625 t4_probe(device_t dev)
626 {
627         int i;
628         uint16_t v = pci_get_vendor(dev);
629         uint16_t d = pci_get_device(dev);
630         uint8_t f = pci_get_function(dev);
631
632         if (v != PCI_VENDOR_ID_CHELSIO)
633                 return (ENXIO);
634
635         /* Attach only to PF0 of the FPGA */
636         if (d == 0xa000 && f != 0)
637                 return (ENXIO);
638
639         for (i = 0; i < nitems(t4_pciids); i++) {
640                 if (d == t4_pciids[i].device) {
641                         device_set_desc(dev, t4_pciids[i].desc);
642                         return (BUS_PROBE_DEFAULT);
643                 }
644         }
645
646         return (ENXIO);
647 }
648
649 static int
650 t5_probe(device_t dev)
651 {
652         int i;
653         uint16_t v = pci_get_vendor(dev);
654         uint16_t d = pci_get_device(dev);
655         uint8_t f = pci_get_function(dev);
656
657         if (v != PCI_VENDOR_ID_CHELSIO)
658                 return (ENXIO);
659
660         /* Attach only to PF0 of the FPGA */
661         if (d == 0xb000 && f != 0)
662                 return (ENXIO);
663
664         for (i = 0; i < nitems(t5_pciids); i++) {
665                 if (d == t5_pciids[i].device) {
666                         device_set_desc(dev, t5_pciids[i].desc);
667                         return (BUS_PROBE_DEFAULT);
668                 }
669         }
670
671         return (ENXIO);
672 }
673
674 static int
675 t6_probe(device_t dev)
676 {
677         int i;
678         uint16_t v = pci_get_vendor(dev);
679         uint16_t d = pci_get_device(dev);
680
681         if (v != PCI_VENDOR_ID_CHELSIO)
682                 return (ENXIO);
683
684         for (i = 0; i < nitems(t6_pciids); i++) {
685                 if (d == t6_pciids[i].device) {
686                         device_set_desc(dev, t6_pciids[i].desc);
687                         return (BUS_PROBE_DEFAULT);
688                 }
689         }
690
691         return (ENXIO);
692 }
693
694 static void
695 t5_attribute_workaround(device_t dev)
696 {
697         device_t root_port;
698         uint32_t v;
699
700         /*
701          * The T5 chips do not properly echo the No Snoop and Relaxed
702          * Ordering attributes when replying to a TLP from a Root
703          * Port.  As a workaround, find the parent Root Port and
704          * disable No Snoop and Relaxed Ordering.  Note that this
705          * affects all devices under this root port.
706          */
707         root_port = pci_find_pcie_root_port(dev);
708         if (root_port == NULL) {
709                 device_printf(dev, "Unable to find parent root port\n");
710                 return;
711         }
712
713         v = pcie_adjust_config(root_port, PCIER_DEVICE_CTL,
714             PCIEM_CTL_RELAXED_ORD_ENABLE | PCIEM_CTL_NOSNOOP_ENABLE, 0, 2);
715         if ((v & (PCIEM_CTL_RELAXED_ORD_ENABLE | PCIEM_CTL_NOSNOOP_ENABLE)) !=
716             0)
717                 device_printf(dev, "Disabled No Snoop/Relaxed Ordering on %s\n",
718                     device_get_nameunit(root_port));
719 }
720
721 static const struct devnames devnames[] = {
722         {
723                 .nexus_name = "t4nex",
724                 .ifnet_name = "cxgbe",
725                 .vi_ifnet_name = "vcxgbe",
726                 .pf03_drv_name = "t4iov",
727                 .vf_nexus_name = "t4vf",
728                 .vf_ifnet_name = "cxgbev"
729         }, {
730                 .nexus_name = "t5nex",
731                 .ifnet_name = "cxl",
732                 .vi_ifnet_name = "vcxl",
733                 .pf03_drv_name = "t5iov",
734                 .vf_nexus_name = "t5vf",
735                 .vf_ifnet_name = "cxlv"
736         }, {
737                 .nexus_name = "t6nex",
738                 .ifnet_name = "cc",
739                 .vi_ifnet_name = "vcc",
740                 .pf03_drv_name = "t6iov",
741                 .vf_nexus_name = "t6vf",
742                 .vf_ifnet_name = "ccv"
743         }
744 };
745
746 void
747 t4_init_devnames(struct adapter *sc)
748 {
749         int id;
750
751         id = chip_id(sc);
752         if (id >= CHELSIO_T4 && id - CHELSIO_T4 < nitems(devnames))
753                 sc->names = &devnames[id - CHELSIO_T4];
754         else {
755                 device_printf(sc->dev, "chip id %d is not supported.\n", id);
756                 sc->names = NULL;
757         }
758 }
759
760 static int
761 t4_attach(device_t dev)
762 {
763         struct adapter *sc;
764         int rc = 0, i, j, n10g, n1g, rqidx, tqidx;
765         struct make_dev_args mda;
766         struct intrs_and_queues iaq;
767         struct sge *s;
768         uint8_t *buf;
769 #ifdef TCP_OFFLOAD
770         int ofld_rqidx, ofld_tqidx;
771 #endif
772 #ifdef DEV_NETMAP
773         int nm_rqidx, nm_tqidx;
774 #endif
775         int num_vis;
776
777         sc = device_get_softc(dev);
778         sc->dev = dev;
779         TUNABLE_INT_FETCH("hw.cxgbe.dflags", &sc->debug_flags);
780
781         if ((pci_get_device(dev) & 0xff00) == 0x5400)
782                 t5_attribute_workaround(dev);
783         pci_enable_busmaster(dev);
784         if (pci_find_cap(dev, PCIY_EXPRESS, &i) == 0) {
785                 uint32_t v;
786
787                 pci_set_max_read_req(dev, 4096);
788                 v = pci_read_config(dev, i + PCIER_DEVICE_CTL, 2);
789                 v |= PCIEM_CTL_RELAXED_ORD_ENABLE;
790                 pci_write_config(dev, i + PCIER_DEVICE_CTL, v, 2);
791
792                 sc->params.pci.mps = 128 << ((v & PCIEM_CTL_MAX_PAYLOAD) >> 5);
793         }
794
795         sc->sge_gts_reg = MYPF_REG(A_SGE_PF_GTS);
796         sc->sge_kdoorbell_reg = MYPF_REG(A_SGE_PF_KDOORBELL);
797         sc->traceq = -1;
798         mtx_init(&sc->ifp_lock, sc->ifp_lockname, 0, MTX_DEF);
799         snprintf(sc->ifp_lockname, sizeof(sc->ifp_lockname), "%s tracer",
800             device_get_nameunit(dev));
801
802         snprintf(sc->lockname, sizeof(sc->lockname), "%s",
803             device_get_nameunit(dev));
804         mtx_init(&sc->sc_lock, sc->lockname, 0, MTX_DEF);
805         t4_add_adapter(sc);
806
807         mtx_init(&sc->sfl_lock, "starving freelists", 0, MTX_DEF);
808         TAILQ_INIT(&sc->sfl);
809         callout_init_mtx(&sc->sfl_callout, &sc->sfl_lock, 0);
810
811         mtx_init(&sc->reg_lock, "indirect register access", 0, MTX_DEF);
812
813         rc = t4_map_bars_0_and_4(sc);
814         if (rc != 0)
815                 goto done; /* error message displayed already */
816
817         memset(sc->chan_map, 0xff, sizeof(sc->chan_map));
818
819         /* Prepare the adapter for operation. */
820         buf = malloc(PAGE_SIZE, M_CXGBE, M_ZERO | M_WAITOK);
821         rc = -t4_prep_adapter(sc, buf);
822         free(buf, M_CXGBE);
823         if (rc != 0) {
824                 device_printf(dev, "failed to prepare adapter: %d.\n", rc);
825                 goto done;
826         }
827
828         /*
829          * This is the real PF# to which we're attaching.  Works from within PCI
830          * passthrough environments too, where pci_get_function() could return a
831          * different PF# depending on the passthrough configuration.  We need to
832          * use the real PF# in all our communication with the firmware.
833          */
834         j = t4_read_reg(sc, A_PL_WHOAMI);
835         sc->pf = chip_id(sc) <= CHELSIO_T5 ? G_SOURCEPF(j) : G_T6_SOURCEPF(j);
836         sc->mbox = sc->pf;
837
838         t4_init_devnames(sc);
839         if (sc->names == NULL) {
840                 rc = ENOTSUP;
841                 goto done; /* error message displayed already */
842         }
843
844         /*
845          * Do this really early, with the memory windows set up even before the
846          * character device.  The userland tool's register i/o and mem read
847          * will work even in "recovery mode".
848          */
849         setup_memwin(sc);
850         if (t4_init_devlog_params(sc, 0) == 0)
851                 fixup_devlog_params(sc);
852         make_dev_args_init(&mda);
853         mda.mda_devsw = &t4_cdevsw;
854         mda.mda_uid = UID_ROOT;
855         mda.mda_gid = GID_WHEEL;
856         mda.mda_mode = 0600;
857         mda.mda_si_drv1 = sc;
858         rc = make_dev_s(&mda, &sc->cdev, "%s", device_get_nameunit(dev));
859         if (rc != 0)
860                 device_printf(dev, "failed to create nexus char device: %d.\n",
861                     rc);
862
863         /* Go no further if recovery mode has been requested. */
864         if (TUNABLE_INT_FETCH("hw.cxgbe.sos", &i) && i != 0) {
865                 device_printf(dev, "recovery mode.\n");
866                 goto done;
867         }
868
869 #if defined(__i386__)
870         if ((cpu_feature & CPUID_CX8) == 0) {
871                 device_printf(dev, "64 bit atomics not available.\n");
872                 rc = ENOTSUP;
873                 goto done;
874         }
875 #endif
876
877         /* Prepare the firmware for operation */
878         rc = prep_firmware(sc);
879         if (rc != 0)
880                 goto done; /* error message displayed already */
881
882         rc = get_params__post_init(sc);
883         if (rc != 0)
884                 goto done; /* error message displayed already */
885
886         rc = set_params__post_init(sc);
887         if (rc != 0)
888                 goto done; /* error message displayed already */
889
890         rc = t4_map_bar_2(sc);
891         if (rc != 0)
892                 goto done; /* error message displayed already */
893
894         rc = t4_create_dma_tag(sc);
895         if (rc != 0)
896                 goto done; /* error message displayed already */
897
898         /*
899          * Number of VIs to create per-port.  The first VI is the "main" regular
900          * VI for the port.  The rest are additional virtual interfaces on the
901          * same physical port.  Note that the main VI does not have native
902          * netmap support but the extra VIs do.
903          *
904          * Limit the number of VIs per port to the number of available
905          * MAC addresses per port.
906          */
907         if (t4_num_vis >= 1)
908                 num_vis = t4_num_vis;
909         else
910                 num_vis = 1;
911         if (num_vis > nitems(vi_mac_funcs)) {
912                 num_vis = nitems(vi_mac_funcs);
913                 device_printf(dev, "Number of VIs limited to %d\n", num_vis);
914         }
915
916         /*
917          * First pass over all the ports - allocate VIs and initialize some
918          * basic parameters like mac address, port type, etc.  We also figure
919          * out whether a port is 10G or 1G and use that information when
920          * calculating how many interrupts to attempt to allocate.
921          */
922         n10g = n1g = 0;
923         for_each_port(sc, i) {
924                 struct port_info *pi;
925
926                 pi = malloc(sizeof(*pi), M_CXGBE, M_ZERO | M_WAITOK);
927                 sc->port[i] = pi;
928
929                 /* These must be set before t4_port_init */
930                 pi->adapter = sc;
931                 pi->port_id = i;
932                 /*
933                  * XXX: vi[0] is special so we can't delay this allocation until
934                  * pi->nvi's final value is known.
935                  */
936                 pi->vi = malloc(sizeof(struct vi_info) * num_vis, M_CXGBE,
937                     M_ZERO | M_WAITOK);
938
939                 /*
940                  * Allocate the "main" VI and initialize parameters
941                  * like mac addr.
942                  */
943                 rc = -t4_port_init(sc, sc->mbox, sc->pf, 0, i);
944                 if (rc != 0) {
945                         device_printf(dev, "unable to initialize port %d: %d\n",
946                             i, rc);
947                         free(pi->vi, M_CXGBE);
948                         free(pi, M_CXGBE);
949                         sc->port[i] = NULL;
950                         goto done;
951                 }
952
953                 pi->link_cfg.requested_fc &= ~(PAUSE_TX | PAUSE_RX);
954                 pi->link_cfg.requested_fc |= t4_pause_settings;
955                 pi->link_cfg.fc &= ~(PAUSE_TX | PAUSE_RX);
956                 pi->link_cfg.fc |= t4_pause_settings;
957
958                 rc = -t4_link_l1cfg(sc, sc->mbox, pi->tx_chan, &pi->link_cfg);
959                 if (rc != 0) {
960                         device_printf(dev, "port %d l1cfg failed: %d\n", i, rc);
961                         free(pi->vi, M_CXGBE);
962                         free(pi, M_CXGBE);
963                         sc->port[i] = NULL;
964                         goto done;
965                 }
966
967                 snprintf(pi->lockname, sizeof(pi->lockname), "%sp%d",
968                     device_get_nameunit(dev), i);
969                 mtx_init(&pi->pi_lock, pi->lockname, 0, MTX_DEF);
970                 sc->chan_map[pi->tx_chan] = i;
971
972                 pi->tc = malloc(sizeof(struct tx_sched_class) *
973                     sc->chip_params->nsched_cls, M_CXGBE, M_ZERO | M_WAITOK);
974
975                 if (port_top_speed(pi) >= 10) {
976                         n10g++;
977                 } else {
978                         n1g++;
979                 }
980
981                 pi->linkdnrc = -1;
982
983                 pi->dev = device_add_child(dev, sc->names->ifnet_name, -1);
984                 if (pi->dev == NULL) {
985                         device_printf(dev,
986                             "failed to add device for port %d.\n", i);
987                         rc = ENXIO;
988                         goto done;
989                 }
990                 pi->vi[0].dev = pi->dev;
991                 device_set_softc(pi->dev, pi);
992         }
993
994         /*
995          * Interrupt type, # of interrupts, # of rx/tx queues, etc.
996          */
997         rc = cfg_itype_and_nqueues(sc, n10g, n1g, num_vis, &iaq);
998         if (rc != 0)
999                 goto done; /* error message displayed already */
1000         if (iaq.nrxq_vi + iaq.nofldrxq_vi + iaq.nnmrxq_vi == 0)
1001                 num_vis = 1;
1002
1003         sc->intr_type = iaq.intr_type;
1004         sc->intr_count = iaq.nirq;
1005
1006         s = &sc->sge;
1007         s->nrxq = n10g * iaq.nrxq10g + n1g * iaq.nrxq1g;
1008         s->ntxq = n10g * iaq.ntxq10g + n1g * iaq.ntxq1g;
1009         if (num_vis > 1) {
1010                 s->nrxq += (n10g + n1g) * (num_vis - 1) * iaq.nrxq_vi;
1011                 s->ntxq += (n10g + n1g) * (num_vis - 1) * iaq.ntxq_vi;
1012         }
1013         s->neq = s->ntxq + s->nrxq;     /* the free list in an rxq is an eq */
1014         s->neq += sc->params.nports + 1;/* ctrl queues: 1 per port + 1 mgmt */
1015         s->niq = s->nrxq + 1;           /* 1 extra for firmware event queue */
1016 #ifdef TCP_OFFLOAD
1017         if (is_offload(sc)) {
1018                 s->nofldrxq = n10g * iaq.nofldrxq10g + n1g * iaq.nofldrxq1g;
1019                 s->nofldtxq = n10g * iaq.nofldtxq10g + n1g * iaq.nofldtxq1g;
1020                 if (num_vis > 1) {
1021                         s->nofldrxq += (n10g + n1g) * (num_vis - 1) *
1022                             iaq.nofldrxq_vi;
1023                         s->nofldtxq += (n10g + n1g) * (num_vis - 1) *
1024                             iaq.nofldtxq_vi;
1025                 }
1026                 s->neq += s->nofldtxq + s->nofldrxq;
1027                 s->niq += s->nofldrxq;
1028
1029                 s->ofld_rxq = malloc(s->nofldrxq * sizeof(struct sge_ofld_rxq),
1030                     M_CXGBE, M_ZERO | M_WAITOK);
1031                 s->ofld_txq = malloc(s->nofldtxq * sizeof(struct sge_wrq),
1032                     M_CXGBE, M_ZERO | M_WAITOK);
1033         }
1034 #endif
1035 #ifdef DEV_NETMAP
1036         if (num_vis > 1) {
1037                 s->nnmrxq = (n10g + n1g) * (num_vis - 1) * iaq.nnmrxq_vi;
1038                 s->nnmtxq = (n10g + n1g) * (num_vis - 1) * iaq.nnmtxq_vi;
1039         }
1040         s->neq += s->nnmtxq + s->nnmrxq;
1041         s->niq += s->nnmrxq;
1042
1043         s->nm_rxq = malloc(s->nnmrxq * sizeof(struct sge_nm_rxq),
1044             M_CXGBE, M_ZERO | M_WAITOK);
1045         s->nm_txq = malloc(s->nnmtxq * sizeof(struct sge_nm_txq),
1046             M_CXGBE, M_ZERO | M_WAITOK);
1047 #endif
1048
1049         s->ctrlq = malloc(sc->params.nports * sizeof(struct sge_wrq), M_CXGBE,
1050             M_ZERO | M_WAITOK);
1051         s->rxq = malloc(s->nrxq * sizeof(struct sge_rxq), M_CXGBE,
1052             M_ZERO | M_WAITOK);
1053         s->txq = malloc(s->ntxq * sizeof(struct sge_txq), M_CXGBE,
1054             M_ZERO | M_WAITOK);
1055         s->iqmap = malloc(s->niq * sizeof(struct sge_iq *), M_CXGBE,
1056             M_ZERO | M_WAITOK);
1057         s->eqmap = malloc(s->neq * sizeof(struct sge_eq *), M_CXGBE,
1058             M_ZERO | M_WAITOK);
1059
1060         sc->irq = malloc(sc->intr_count * sizeof(struct irq), M_CXGBE,
1061             M_ZERO | M_WAITOK);
1062
1063         t4_init_l2t(sc, M_WAITOK);
1064
1065         /*
1066          * Second pass over the ports.  This time we know the number of rx and
1067          * tx queues that each port should get.
1068          */
1069         rqidx = tqidx = 0;
1070 #ifdef TCP_OFFLOAD
1071         ofld_rqidx = ofld_tqidx = 0;
1072 #endif
1073 #ifdef DEV_NETMAP
1074         nm_rqidx = nm_tqidx = 0;
1075 #endif
1076         for_each_port(sc, i) {
1077                 struct port_info *pi = sc->port[i];
1078                 struct vi_info *vi;
1079
1080                 if (pi == NULL)
1081                         continue;
1082
1083                 pi->nvi = num_vis;
1084                 for_each_vi(pi, j, vi) {
1085                         vi->pi = pi;
1086                         vi->qsize_rxq = t4_qsize_rxq;
1087                         vi->qsize_txq = t4_qsize_txq;
1088
1089                         vi->first_rxq = rqidx;
1090                         vi->first_txq = tqidx;
1091                         if (port_top_speed(pi) >= 10) {
1092                                 vi->tmr_idx = t4_tmr_idx_10g;
1093                                 vi->pktc_idx = t4_pktc_idx_10g;
1094                                 vi->flags |= iaq.intr_flags_10g & INTR_RXQ;
1095                                 vi->nrxq = j == 0 ? iaq.nrxq10g : iaq.nrxq_vi;
1096                                 vi->ntxq = j == 0 ? iaq.ntxq10g : iaq.ntxq_vi;
1097                         } else {
1098                                 vi->tmr_idx = t4_tmr_idx_1g;
1099                                 vi->pktc_idx = t4_pktc_idx_1g;
1100                                 vi->flags |= iaq.intr_flags_1g & INTR_RXQ;
1101                                 vi->nrxq = j == 0 ? iaq.nrxq1g : iaq.nrxq_vi;
1102                                 vi->ntxq = j == 0 ? iaq.ntxq1g : iaq.ntxq_vi;
1103                         }
1104                         rqidx += vi->nrxq;
1105                         tqidx += vi->ntxq;
1106
1107                         if (j == 0 && vi->ntxq > 1)
1108                                 vi->rsrv_noflowq = iaq.rsrv_noflowq ? 1 : 0;
1109                         else
1110                                 vi->rsrv_noflowq = 0;
1111
1112 #ifdef TCP_OFFLOAD
1113                         vi->first_ofld_rxq = ofld_rqidx;
1114                         vi->first_ofld_txq = ofld_tqidx;
1115                         if (port_top_speed(pi) >= 10) {
1116                                 vi->flags |= iaq.intr_flags_10g & INTR_OFLD_RXQ;
1117                                 vi->nofldrxq = j == 0 ? iaq.nofldrxq10g :
1118                                     iaq.nofldrxq_vi;
1119                                 vi->nofldtxq = j == 0 ? iaq.nofldtxq10g :
1120                                     iaq.nofldtxq_vi;
1121                         } else {
1122                                 vi->flags |= iaq.intr_flags_1g & INTR_OFLD_RXQ;
1123                                 vi->nofldrxq = j == 0 ? iaq.nofldrxq1g :
1124                                     iaq.nofldrxq_vi;
1125                                 vi->nofldtxq = j == 0 ? iaq.nofldtxq1g :
1126                                     iaq.nofldtxq_vi;
1127                         }
1128                         ofld_rqidx += vi->nofldrxq;
1129                         ofld_tqidx += vi->nofldtxq;
1130 #endif
1131 #ifdef DEV_NETMAP
1132                         if (j > 0) {
1133                                 vi->first_nm_rxq = nm_rqidx;
1134                                 vi->first_nm_txq = nm_tqidx;
1135                                 vi->nnmrxq = iaq.nnmrxq_vi;
1136                                 vi->nnmtxq = iaq.nnmtxq_vi;
1137                                 nm_rqidx += vi->nnmrxq;
1138                                 nm_tqidx += vi->nnmtxq;
1139                         }
1140 #endif
1141                 }
1142         }
1143
1144         rc = t4_setup_intr_handlers(sc);
1145         if (rc != 0) {
1146                 device_printf(dev,
1147                     "failed to setup interrupt handlers: %d\n", rc);
1148                 goto done;
1149         }
1150
1151         rc = bus_generic_attach(dev);
1152         if (rc != 0) {
1153                 device_printf(dev,
1154                     "failed to attach all child ports: %d\n", rc);
1155                 goto done;
1156         }
1157
1158         device_printf(dev,
1159             "PCIe gen%d x%d, %d ports, %d %s interrupt%s, %d eq, %d iq\n",
1160             sc->params.pci.speed, sc->params.pci.width, sc->params.nports,
1161             sc->intr_count, sc->intr_type == INTR_MSIX ? "MSI-X" :
1162             (sc->intr_type == INTR_MSI ? "MSI" : "INTx"),
1163             sc->intr_count > 1 ? "s" : "", sc->sge.neq, sc->sge.niq);
1164
1165         t4_set_desc(sc);
1166
1167         notify_siblings(dev, 0);
1168
1169 done:
1170         if (rc != 0 && sc->cdev) {
1171                 /* cdev was created and so cxgbetool works; recover that way. */
1172                 device_printf(dev,
1173                     "error during attach, adapter is now in recovery mode.\n");
1174                 rc = 0;
1175         }
1176
1177         if (rc != 0)
1178                 t4_detach_common(dev);
1179         else
1180                 t4_sysctls(sc);
1181
1182         return (rc);
1183 }
1184
1185 static int
1186 t4_ready(device_t dev)
1187 {
1188         struct adapter *sc;
1189
1190         sc = device_get_softc(dev);
1191         if (sc->flags & FW_OK)
1192                 return (0);
1193         return (ENXIO);
1194 }
1195
1196 static int
1197 t4_read_port_device(device_t dev, int port, device_t *child)
1198 {
1199         struct adapter *sc;
1200         struct port_info *pi;
1201
1202         sc = device_get_softc(dev);
1203         if (port < 0 || port >= MAX_NPORTS)
1204                 return (EINVAL);
1205         pi = sc->port[port];
1206         if (pi == NULL || pi->dev == NULL)
1207                 return (ENXIO);
1208         *child = pi->dev;
1209         return (0);
1210 }
1211
1212 static int
1213 notify_siblings(device_t dev, int detaching)
1214 {
1215         device_t sibling;
1216         int error, i;
1217
1218         error = 0;
1219         for (i = 0; i < PCI_FUNCMAX; i++) {
1220                 if (i == pci_get_function(dev))
1221                         continue;
1222                 sibling = pci_find_dbsf(pci_get_domain(dev), pci_get_bus(dev),
1223                     pci_get_slot(dev), i);
1224                 if (sibling == NULL || !device_is_attached(sibling))
1225                         continue;
1226                 if (detaching)
1227                         error = T4_DETACH_CHILD(sibling);
1228                 else
1229                         (void)T4_ATTACH_CHILD(sibling);
1230                 if (error)
1231                         break;
1232         }
1233         return (error);
1234 }
1235
1236 /*
1237  * Idempotent
1238  */
1239 static int
1240 t4_detach(device_t dev)
1241 {
1242         struct adapter *sc;
1243         int rc;
1244
1245         sc = device_get_softc(dev);
1246
1247         rc = notify_siblings(dev, 1);
1248         if (rc) {
1249                 device_printf(dev,
1250                     "failed to detach sibling devices: %d\n", rc);
1251                 return (rc);
1252         }
1253
1254         return (t4_detach_common(dev));
1255 }
1256
1257 int
1258 t4_detach_common(device_t dev)
1259 {
1260         struct adapter *sc;
1261         struct port_info *pi;
1262         int i, rc;
1263
1264         sc = device_get_softc(dev);
1265
1266         if (sc->flags & FULL_INIT_DONE) {
1267                 if (!(sc->flags & IS_VF))
1268                         t4_intr_disable(sc);
1269         }
1270
1271         if (sc->cdev) {
1272                 destroy_dev(sc->cdev);
1273                 sc->cdev = NULL;
1274         }
1275
1276         if (device_is_attached(dev)) {
1277                 rc = bus_generic_detach(dev);
1278                 if (rc) {
1279                         device_printf(dev,
1280                             "failed to detach child devices: %d\n", rc);
1281                         return (rc);
1282                 }
1283         }
1284
1285         for (i = 0; i < sc->intr_count; i++)
1286                 t4_free_irq(sc, &sc->irq[i]);
1287
1288         for (i = 0; i < MAX_NPORTS; i++) {
1289                 pi = sc->port[i];
1290                 if (pi) {
1291                         t4_free_vi(sc, sc->mbox, sc->pf, 0, pi->vi[0].viid);
1292                         if (pi->dev)
1293                                 device_delete_child(dev, pi->dev);
1294
1295                         mtx_destroy(&pi->pi_lock);
1296                         free(pi->vi, M_CXGBE);
1297                         free(pi->tc, M_CXGBE);
1298                         free(pi, M_CXGBE);
1299                 }
1300         }
1301
1302         if (sc->flags & FULL_INIT_DONE)
1303                 adapter_full_uninit(sc);
1304
1305         if ((sc->flags & (IS_VF | FW_OK)) == FW_OK)
1306                 t4_fw_bye(sc, sc->mbox);
1307
1308         if (sc->intr_type == INTR_MSI || sc->intr_type == INTR_MSIX)
1309                 pci_release_msi(dev);
1310
1311         if (sc->regs_res)
1312                 bus_release_resource(dev, SYS_RES_MEMORY, sc->regs_rid,
1313                     sc->regs_res);
1314
1315         if (sc->udbs_res)
1316                 bus_release_resource(dev, SYS_RES_MEMORY, sc->udbs_rid,
1317                     sc->udbs_res);
1318
1319         if (sc->msix_res)
1320                 bus_release_resource(dev, SYS_RES_MEMORY, sc->msix_rid,
1321                     sc->msix_res);
1322
1323         if (sc->l2t)
1324                 t4_free_l2t(sc->l2t);
1325
1326 #ifdef TCP_OFFLOAD
1327         free(sc->sge.ofld_rxq, M_CXGBE);
1328         free(sc->sge.ofld_txq, M_CXGBE);
1329 #endif
1330 #ifdef DEV_NETMAP
1331         free(sc->sge.nm_rxq, M_CXGBE);
1332         free(sc->sge.nm_txq, M_CXGBE);
1333 #endif
1334         free(sc->irq, M_CXGBE);
1335         free(sc->sge.rxq, M_CXGBE);
1336         free(sc->sge.txq, M_CXGBE);
1337         free(sc->sge.ctrlq, M_CXGBE);
1338         free(sc->sge.iqmap, M_CXGBE);
1339         free(sc->sge.eqmap, M_CXGBE);
1340         free(sc->tids.ftid_tab, M_CXGBE);
1341         t4_destroy_dma_tag(sc);
1342         if (mtx_initialized(&sc->sc_lock)) {
1343                 sx_xlock(&t4_list_lock);
1344                 SLIST_REMOVE(&t4_list, sc, adapter, link);
1345                 sx_xunlock(&t4_list_lock);
1346                 mtx_destroy(&sc->sc_lock);
1347         }
1348
1349         callout_drain(&sc->sfl_callout);
1350         if (mtx_initialized(&sc->tids.ftid_lock))
1351                 mtx_destroy(&sc->tids.ftid_lock);
1352         if (mtx_initialized(&sc->sfl_lock))
1353                 mtx_destroy(&sc->sfl_lock);
1354         if (mtx_initialized(&sc->ifp_lock))
1355                 mtx_destroy(&sc->ifp_lock);
1356         if (mtx_initialized(&sc->reg_lock))
1357                 mtx_destroy(&sc->reg_lock);
1358
1359         for (i = 0; i < NUM_MEMWIN; i++) {
1360                 struct memwin *mw = &sc->memwin[i];
1361
1362                 if (rw_initialized(&mw->mw_lock))
1363                         rw_destroy(&mw->mw_lock);
1364         }
1365
1366         bzero(sc, sizeof(*sc));
1367
1368         return (0);
1369 }
1370
1371 static int
1372 cxgbe_probe(device_t dev)
1373 {
1374         char buf[128];
1375         struct port_info *pi = device_get_softc(dev);
1376
1377         snprintf(buf, sizeof(buf), "port %d", pi->port_id);
1378         device_set_desc_copy(dev, buf);
1379
1380         return (BUS_PROBE_DEFAULT);
1381 }
1382
1383 #define T4_CAP (IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_MTU | IFCAP_HWCSUM | \
1384     IFCAP_VLAN_HWCSUM | IFCAP_TSO | IFCAP_JUMBO_MTU | IFCAP_LRO | \
1385     IFCAP_VLAN_HWTSO | IFCAP_LINKSTATE | IFCAP_HWCSUM_IPV6 | IFCAP_HWSTATS)
1386 #define T4_CAP_ENABLE (T4_CAP)
1387
1388 static int
1389 cxgbe_vi_attach(device_t dev, struct vi_info *vi)
1390 {
1391         struct ifnet *ifp;
1392         struct sbuf *sb;
1393
1394         vi->xact_addr_filt = -1;
1395         callout_init(&vi->tick, 1);
1396
1397         /* Allocate an ifnet and set it up */
1398         ifp = if_alloc(IFT_ETHER);
1399         if (ifp == NULL) {
1400                 device_printf(dev, "Cannot allocate ifnet\n");
1401                 return (ENOMEM);
1402         }
1403         vi->ifp = ifp;
1404         ifp->if_softc = vi;
1405
1406         if_initname(ifp, device_get_name(dev), device_get_unit(dev));
1407         ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
1408
1409         ifp->if_init = cxgbe_init;
1410         ifp->if_ioctl = cxgbe_ioctl;
1411         ifp->if_transmit = cxgbe_transmit;
1412         ifp->if_qflush = cxgbe_qflush;
1413         ifp->if_get_counter = cxgbe_get_counter;
1414
1415         ifp->if_capabilities = T4_CAP;
1416 #ifdef TCP_OFFLOAD
1417         if (vi->nofldrxq != 0)
1418                 ifp->if_capabilities |= IFCAP_TOE;
1419 #endif
1420         ifp->if_capenable = T4_CAP_ENABLE;
1421         ifp->if_hwassist = CSUM_TCP | CSUM_UDP | CSUM_IP | CSUM_TSO |
1422             CSUM_UDP_IPV6 | CSUM_TCP_IPV6;
1423
1424         ifp->if_hw_tsomax = 65536 - (ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN);
1425         ifp->if_hw_tsomaxsegcount = TX_SGL_SEGS;
1426         ifp->if_hw_tsomaxsegsize = 65536;
1427
1428         /* Initialize ifmedia for this VI */
1429         ifmedia_init(&vi->media, IFM_IMASK, cxgbe_media_change,
1430             cxgbe_media_status);
1431         build_medialist(vi->pi, &vi->media);
1432
1433         vi->vlan_c = EVENTHANDLER_REGISTER(vlan_config, cxgbe_vlan_config, ifp,
1434             EVENTHANDLER_PRI_ANY);
1435
1436         ether_ifattach(ifp, vi->hw_addr);
1437 #ifdef DEV_NETMAP
1438         if (vi->nnmrxq != 0)
1439                 cxgbe_nm_attach(vi);
1440 #endif
1441         sb = sbuf_new_auto();
1442         sbuf_printf(sb, "%d txq, %d rxq (NIC)", vi->ntxq, vi->nrxq);
1443 #ifdef TCP_OFFLOAD
1444         if (ifp->if_capabilities & IFCAP_TOE)
1445                 sbuf_printf(sb, "; %d txq, %d rxq (TOE)",
1446                     vi->nofldtxq, vi->nofldrxq);
1447 #endif
1448 #ifdef DEV_NETMAP
1449         if (ifp->if_capabilities & IFCAP_NETMAP)
1450                 sbuf_printf(sb, "; %d txq, %d rxq (netmap)",
1451                     vi->nnmtxq, vi->nnmrxq);
1452 #endif
1453         sbuf_finish(sb);
1454         device_printf(dev, "%s\n", sbuf_data(sb));
1455         sbuf_delete(sb);
1456
1457         vi_sysctls(vi);
1458
1459         return (0);
1460 }
1461
1462 static int
1463 cxgbe_attach(device_t dev)
1464 {
1465         struct port_info *pi = device_get_softc(dev);
1466         struct adapter *sc = pi->adapter;
1467         struct vi_info *vi;
1468         int i, rc;
1469
1470         callout_init_mtx(&pi->tick, &pi->pi_lock, 0);
1471
1472         rc = cxgbe_vi_attach(dev, &pi->vi[0]);
1473         if (rc)
1474                 return (rc);
1475
1476         for_each_vi(pi, i, vi) {
1477                 if (i == 0)
1478                         continue;
1479                 vi->dev = device_add_child(dev, sc->names->vi_ifnet_name, -1);
1480                 if (vi->dev == NULL) {
1481                         device_printf(dev, "failed to add VI %d\n", i);
1482                         continue;
1483                 }
1484                 device_set_softc(vi->dev, vi);
1485         }
1486
1487         cxgbe_sysctls(pi);
1488
1489         bus_generic_attach(dev);
1490
1491         return (0);
1492 }
1493
1494 static void
1495 cxgbe_vi_detach(struct vi_info *vi)
1496 {
1497         struct ifnet *ifp = vi->ifp;
1498
1499         ether_ifdetach(ifp);
1500
1501         if (vi->vlan_c)
1502                 EVENTHANDLER_DEREGISTER(vlan_config, vi->vlan_c);
1503
1504         /* Let detach proceed even if these fail. */
1505 #ifdef DEV_NETMAP
1506         if (ifp->if_capabilities & IFCAP_NETMAP)
1507                 cxgbe_nm_detach(vi);
1508 #endif
1509         cxgbe_uninit_synchronized(vi);
1510         callout_drain(&vi->tick);
1511         vi_full_uninit(vi);
1512
1513         ifmedia_removeall(&vi->media);
1514         if_free(vi->ifp);
1515         vi->ifp = NULL;
1516 }
1517
1518 static int
1519 cxgbe_detach(device_t dev)
1520 {
1521         struct port_info *pi = device_get_softc(dev);
1522         struct adapter *sc = pi->adapter;
1523         int rc;
1524
1525         /* Detach the extra VIs first. */
1526         rc = bus_generic_detach(dev);
1527         if (rc)
1528                 return (rc);
1529         device_delete_children(dev);
1530
1531         doom_vi(sc, &pi->vi[0]);
1532
1533         if (pi->flags & HAS_TRACEQ) {
1534                 sc->traceq = -1;        /* cloner should not create ifnet */
1535                 t4_tracer_port_detach(sc);
1536         }
1537
1538         cxgbe_vi_detach(&pi->vi[0]);
1539         callout_drain(&pi->tick);
1540
1541         end_synchronized_op(sc, 0);
1542
1543         return (0);
1544 }
1545
1546 static void
1547 cxgbe_init(void *arg)
1548 {
1549         struct vi_info *vi = arg;
1550         struct adapter *sc = vi->pi->adapter;
1551
1552         if (begin_synchronized_op(sc, vi, SLEEP_OK | INTR_OK, "t4init") != 0)
1553                 return;
1554         cxgbe_init_synchronized(vi);
1555         end_synchronized_op(sc, 0);
1556 }
1557
1558 static int
1559 cxgbe_ioctl(struct ifnet *ifp, unsigned long cmd, caddr_t data)
1560 {
1561         int rc = 0, mtu, flags, can_sleep;
1562         struct vi_info *vi = ifp->if_softc;
1563         struct adapter *sc = vi->pi->adapter;
1564         struct ifreq *ifr = (struct ifreq *)data;
1565         uint32_t mask;
1566
1567         switch (cmd) {
1568         case SIOCSIFMTU:
1569                 mtu = ifr->ifr_mtu;
1570                 if (mtu < ETHERMIN || mtu > MAX_MTU)
1571                         return (EINVAL);
1572
1573                 rc = begin_synchronized_op(sc, vi, SLEEP_OK | INTR_OK, "t4mtu");
1574                 if (rc)
1575                         return (rc);
1576                 ifp->if_mtu = mtu;
1577                 if (vi->flags & VI_INIT_DONE) {
1578                         t4_update_fl_bufsize(ifp);
1579                         if (ifp->if_drv_flags & IFF_DRV_RUNNING)
1580                                 rc = update_mac_settings(ifp, XGMAC_MTU);
1581                 }
1582                 end_synchronized_op(sc, 0);
1583                 break;
1584
1585         case SIOCSIFFLAGS:
1586                 can_sleep = 0;
1587 redo_sifflags:
1588                 rc = begin_synchronized_op(sc, vi,
1589                     can_sleep ? (SLEEP_OK | INTR_OK) : HOLD_LOCK, "t4flg");
1590                 if (rc)
1591                         return (rc);
1592
1593                 if (ifp->if_flags & IFF_UP) {
1594                         if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1595                                 flags = vi->if_flags;
1596                                 if ((ifp->if_flags ^ flags) &
1597                                     (IFF_PROMISC | IFF_ALLMULTI)) {
1598                                         if (can_sleep == 1) {
1599                                                 end_synchronized_op(sc, 0);
1600                                                 can_sleep = 0;
1601                                                 goto redo_sifflags;
1602                                         }
1603                                         rc = update_mac_settings(ifp,
1604                                             XGMAC_PROMISC | XGMAC_ALLMULTI);
1605                                 }
1606                         } else {
1607                                 if (can_sleep == 0) {
1608                                         end_synchronized_op(sc, LOCK_HELD);
1609                                         can_sleep = 1;
1610                                         goto redo_sifflags;
1611                                 }
1612                                 rc = cxgbe_init_synchronized(vi);
1613                         }
1614                         vi->if_flags = ifp->if_flags;
1615                 } else if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1616                         if (can_sleep == 0) {
1617                                 end_synchronized_op(sc, LOCK_HELD);
1618                                 can_sleep = 1;
1619                                 goto redo_sifflags;
1620                         }
1621                         rc = cxgbe_uninit_synchronized(vi);
1622                 }
1623                 end_synchronized_op(sc, can_sleep ? 0 : LOCK_HELD);
1624                 break;
1625
1626         case SIOCADDMULTI:
1627         case SIOCDELMULTI: /* these two are called with a mutex held :-( */
1628                 rc = begin_synchronized_op(sc, vi, HOLD_LOCK, "t4multi");
1629                 if (rc)
1630                         return (rc);
1631                 if (ifp->if_drv_flags & IFF_DRV_RUNNING)
1632                         rc = update_mac_settings(ifp, XGMAC_MCADDRS);
1633                 end_synchronized_op(sc, LOCK_HELD);
1634                 break;
1635
1636         case SIOCSIFCAP:
1637                 rc = begin_synchronized_op(sc, vi, SLEEP_OK | INTR_OK, "t4cap");
1638                 if (rc)
1639                         return (rc);
1640
1641                 mask = ifr->ifr_reqcap ^ ifp->if_capenable;
1642                 if (mask & IFCAP_TXCSUM) {
1643                         ifp->if_capenable ^= IFCAP_TXCSUM;
1644                         ifp->if_hwassist ^= (CSUM_TCP | CSUM_UDP | CSUM_IP);
1645
1646                         if (IFCAP_TSO4 & ifp->if_capenable &&
1647                             !(IFCAP_TXCSUM & ifp->if_capenable)) {
1648                                 ifp->if_capenable &= ~IFCAP_TSO4;
1649                                 if_printf(ifp,
1650                                     "tso4 disabled due to -txcsum.\n");
1651                         }
1652                 }
1653                 if (mask & IFCAP_TXCSUM_IPV6) {
1654                         ifp->if_capenable ^= IFCAP_TXCSUM_IPV6;
1655                         ifp->if_hwassist ^= (CSUM_UDP_IPV6 | CSUM_TCP_IPV6);
1656
1657                         if (IFCAP_TSO6 & ifp->if_capenable &&
1658                             !(IFCAP_TXCSUM_IPV6 & ifp->if_capenable)) {
1659                                 ifp->if_capenable &= ~IFCAP_TSO6;
1660                                 if_printf(ifp,
1661                                     "tso6 disabled due to -txcsum6.\n");
1662                         }
1663                 }
1664                 if (mask & IFCAP_RXCSUM)
1665                         ifp->if_capenable ^= IFCAP_RXCSUM;
1666                 if (mask & IFCAP_RXCSUM_IPV6)
1667                         ifp->if_capenable ^= IFCAP_RXCSUM_IPV6;
1668
1669                 /*
1670                  * Note that we leave CSUM_TSO alone (it is always set).  The
1671                  * kernel takes both IFCAP_TSOx and CSUM_TSO into account before
1672                  * sending a TSO request our way, so it's sufficient to toggle
1673                  * IFCAP_TSOx only.
1674                  */
1675                 if (mask & IFCAP_TSO4) {
1676                         if (!(IFCAP_TSO4 & ifp->if_capenable) &&
1677                             !(IFCAP_TXCSUM & ifp->if_capenable)) {
1678                                 if_printf(ifp, "enable txcsum first.\n");
1679                                 rc = EAGAIN;
1680                                 goto fail;
1681                         }
1682                         ifp->if_capenable ^= IFCAP_TSO4;
1683                 }
1684                 if (mask & IFCAP_TSO6) {
1685                         if (!(IFCAP_TSO6 & ifp->if_capenable) &&
1686                             !(IFCAP_TXCSUM_IPV6 & ifp->if_capenable)) {
1687                                 if_printf(ifp, "enable txcsum6 first.\n");
1688                                 rc = EAGAIN;
1689                                 goto fail;
1690                         }
1691                         ifp->if_capenable ^= IFCAP_TSO6;
1692                 }
1693                 if (mask & IFCAP_LRO) {
1694 #if defined(INET) || defined(INET6)
1695                         int i;
1696                         struct sge_rxq *rxq;
1697
1698                         ifp->if_capenable ^= IFCAP_LRO;
1699                         for_each_rxq(vi, i, rxq) {
1700                                 if (ifp->if_capenable & IFCAP_LRO)
1701                                         rxq->iq.flags |= IQ_LRO_ENABLED;
1702                                 else
1703                                         rxq->iq.flags &= ~IQ_LRO_ENABLED;
1704                         }
1705 #endif
1706                 }
1707 #ifdef TCP_OFFLOAD
1708                 if (mask & IFCAP_TOE) {
1709                         int enable = (ifp->if_capenable ^ mask) & IFCAP_TOE;
1710
1711                         rc = toe_capability(vi, enable);
1712                         if (rc != 0)
1713                                 goto fail;
1714
1715                         ifp->if_capenable ^= mask;
1716                 }
1717 #endif
1718                 if (mask & IFCAP_VLAN_HWTAGGING) {
1719                         ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING;
1720                         if (ifp->if_drv_flags & IFF_DRV_RUNNING)
1721                                 rc = update_mac_settings(ifp, XGMAC_VLANEX);
1722                 }
1723                 if (mask & IFCAP_VLAN_MTU) {
1724                         ifp->if_capenable ^= IFCAP_VLAN_MTU;
1725
1726                         /* Need to find out how to disable auto-mtu-inflation */
1727                 }
1728                 if (mask & IFCAP_VLAN_HWTSO)
1729                         ifp->if_capenable ^= IFCAP_VLAN_HWTSO;
1730                 if (mask & IFCAP_VLAN_HWCSUM)
1731                         ifp->if_capenable ^= IFCAP_VLAN_HWCSUM;
1732
1733 #ifdef VLAN_CAPABILITIES
1734                 VLAN_CAPABILITIES(ifp);
1735 #endif
1736 fail:
1737                 end_synchronized_op(sc, 0);
1738                 break;
1739
1740         case SIOCSIFMEDIA:
1741         case SIOCGIFMEDIA:
1742         case SIOCGIFXMEDIA:
1743                 ifmedia_ioctl(ifp, ifr, &vi->media, cmd);
1744                 break;
1745
1746         case SIOCGI2C: {
1747                 struct ifi2creq i2c;
1748
1749                 rc = copyin(ifr->ifr_data, &i2c, sizeof(i2c));
1750                 if (rc != 0)
1751                         break;
1752                 if (i2c.dev_addr != 0xA0 && i2c.dev_addr != 0xA2) {
1753                         rc = EPERM;
1754                         break;
1755                 }
1756                 if (i2c.len > sizeof(i2c.data)) {
1757                         rc = EINVAL;
1758                         break;
1759                 }
1760                 rc = begin_synchronized_op(sc, vi, SLEEP_OK | INTR_OK, "t4i2c");
1761                 if (rc)
1762                         return (rc);
1763                 rc = -t4_i2c_rd(sc, sc->mbox, vi->pi->port_id, i2c.dev_addr,
1764                     i2c.offset, i2c.len, &i2c.data[0]);
1765                 end_synchronized_op(sc, 0);
1766                 if (rc == 0)
1767                         rc = copyout(&i2c, ifr->ifr_data, sizeof(i2c));
1768                 break;
1769         }
1770
1771         default:
1772                 rc = ether_ioctl(ifp, cmd, data);
1773         }
1774
1775         return (rc);
1776 }
1777
1778 static int
1779 cxgbe_transmit(struct ifnet *ifp, struct mbuf *m)
1780 {
1781         struct vi_info *vi = ifp->if_softc;
1782         struct port_info *pi = vi->pi;
1783         struct adapter *sc = pi->adapter;
1784         struct sge_txq *txq;
1785         void *items[1];
1786         int rc;
1787
1788         M_ASSERTPKTHDR(m);
1789         MPASS(m->m_nextpkt == NULL);    /* not quite ready for this yet */
1790
1791         if (__predict_false(pi->link_cfg.link_ok == 0)) {
1792                 m_freem(m);
1793                 return (ENETDOWN);
1794         }
1795
1796         rc = parse_pkt(sc, &m);
1797         if (__predict_false(rc != 0)) {
1798                 MPASS(m == NULL);                       /* was freed already */
1799                 atomic_add_int(&pi->tx_parse_error, 1); /* rare, atomic is ok */
1800                 return (rc);
1801         }
1802
1803         /* Select a txq. */
1804         txq = &sc->sge.txq[vi->first_txq];
1805         if (M_HASHTYPE_GET(m) != M_HASHTYPE_NONE)
1806                 txq += ((m->m_pkthdr.flowid % (vi->ntxq - vi->rsrv_noflowq)) +
1807                     vi->rsrv_noflowq);
1808
1809         items[0] = m;
1810         rc = mp_ring_enqueue(txq->r, items, 1, 4096);
1811         if (__predict_false(rc != 0))
1812                 m_freem(m);
1813
1814         return (rc);
1815 }
1816
1817 static void
1818 cxgbe_qflush(struct ifnet *ifp)
1819 {
1820         struct vi_info *vi = ifp->if_softc;
1821         struct sge_txq *txq;
1822         int i;
1823
1824         /* queues do not exist if !VI_INIT_DONE. */
1825         if (vi->flags & VI_INIT_DONE) {
1826                 for_each_txq(vi, i, txq) {
1827                         TXQ_LOCK(txq);
1828                         txq->eq.flags &= ~EQ_ENABLED;
1829                         TXQ_UNLOCK(txq);
1830                         while (!mp_ring_is_idle(txq->r)) {
1831                                 mp_ring_check_drainage(txq->r, 0);
1832                                 pause("qflush", 1);
1833                         }
1834                 }
1835         }
1836         if_qflush(ifp);
1837 }
1838
1839 static uint64_t
1840 vi_get_counter(struct ifnet *ifp, ift_counter c)
1841 {
1842         struct vi_info *vi = ifp->if_softc;
1843         struct fw_vi_stats_vf *s = &vi->stats;
1844
1845         vi_refresh_stats(vi->pi->adapter, vi);
1846
1847         switch (c) {
1848         case IFCOUNTER_IPACKETS:
1849                 return (s->rx_bcast_frames + s->rx_mcast_frames +
1850                     s->rx_ucast_frames);
1851         case IFCOUNTER_IERRORS:
1852                 return (s->rx_err_frames);
1853         case IFCOUNTER_OPACKETS:
1854                 return (s->tx_bcast_frames + s->tx_mcast_frames +
1855                     s->tx_ucast_frames + s->tx_offload_frames);
1856         case IFCOUNTER_OERRORS:
1857                 return (s->tx_drop_frames);
1858         case IFCOUNTER_IBYTES:
1859                 return (s->rx_bcast_bytes + s->rx_mcast_bytes +
1860                     s->rx_ucast_bytes);
1861         case IFCOUNTER_OBYTES:
1862                 return (s->tx_bcast_bytes + s->tx_mcast_bytes +
1863                     s->tx_ucast_bytes + s->tx_offload_bytes);
1864         case IFCOUNTER_IMCASTS:
1865                 return (s->rx_mcast_frames);
1866         case IFCOUNTER_OMCASTS:
1867                 return (s->tx_mcast_frames);
1868         case IFCOUNTER_OQDROPS: {
1869                 uint64_t drops;
1870
1871                 drops = 0;
1872                 if (vi->flags & VI_INIT_DONE) {
1873                         int i;
1874                         struct sge_txq *txq;
1875
1876                         for_each_txq(vi, i, txq)
1877                                 drops += counter_u64_fetch(txq->r->drops);
1878                 }
1879
1880                 return (drops);
1881
1882         }
1883
1884         default:
1885                 return (if_get_counter_default(ifp, c));
1886         }
1887 }
1888
1889 uint64_t
1890 cxgbe_get_counter(struct ifnet *ifp, ift_counter c)
1891 {
1892         struct vi_info *vi = ifp->if_softc;
1893         struct port_info *pi = vi->pi;
1894         struct adapter *sc = pi->adapter;
1895         struct port_stats *s = &pi->stats;
1896
1897         if (pi->nvi > 1 || sc->flags & IS_VF)
1898                 return (vi_get_counter(ifp, c));
1899
1900         cxgbe_refresh_stats(sc, pi);
1901
1902         switch (c) {
1903         case IFCOUNTER_IPACKETS:
1904                 return (s->rx_frames);
1905
1906         case IFCOUNTER_IERRORS:
1907                 return (s->rx_jabber + s->rx_runt + s->rx_too_long +
1908                     s->rx_fcs_err + s->rx_len_err);
1909
1910         case IFCOUNTER_OPACKETS:
1911                 return (s->tx_frames);
1912
1913         case IFCOUNTER_OERRORS:
1914                 return (s->tx_error_frames);
1915
1916         case IFCOUNTER_IBYTES:
1917                 return (s->rx_octets);
1918
1919         case IFCOUNTER_OBYTES:
1920                 return (s->tx_octets);
1921
1922         case IFCOUNTER_IMCASTS:
1923                 return (s->rx_mcast_frames);
1924
1925         case IFCOUNTER_OMCASTS:
1926                 return (s->tx_mcast_frames);
1927
1928         case IFCOUNTER_IQDROPS:
1929                 return (s->rx_ovflow0 + s->rx_ovflow1 + s->rx_ovflow2 +
1930                     s->rx_ovflow3 + s->rx_trunc0 + s->rx_trunc1 + s->rx_trunc2 +
1931                     s->rx_trunc3 + pi->tnl_cong_drops);
1932
1933         case IFCOUNTER_OQDROPS: {
1934                 uint64_t drops;
1935
1936                 drops = s->tx_drop;
1937                 if (vi->flags & VI_INIT_DONE) {
1938                         int i;
1939                         struct sge_txq *txq;
1940
1941                         for_each_txq(vi, i, txq)
1942                                 drops += counter_u64_fetch(txq->r->drops);
1943                 }
1944
1945                 return (drops);
1946
1947         }
1948
1949         default:
1950                 return (if_get_counter_default(ifp, c));
1951         }
1952 }
1953
1954 static int
1955 cxgbe_media_change(struct ifnet *ifp)
1956 {
1957         struct vi_info *vi = ifp->if_softc;
1958
1959         device_printf(vi->dev, "%s unimplemented.\n", __func__);
1960
1961         return (EOPNOTSUPP);
1962 }
1963
1964 static void
1965 cxgbe_media_status(struct ifnet *ifp, struct ifmediareq *ifmr)
1966 {
1967         struct vi_info *vi = ifp->if_softc;
1968         struct port_info *pi = vi->pi;
1969         struct ifmedia_entry *cur;
1970         int speed = pi->link_cfg.speed;
1971
1972         cur = vi->media.ifm_cur;
1973
1974         ifmr->ifm_status = IFM_AVALID;
1975         if (!pi->link_cfg.link_ok)
1976                 return;
1977
1978         ifmr->ifm_status |= IFM_ACTIVE;
1979
1980         /* active and current will differ iff current media is autoselect. */
1981         if (IFM_SUBTYPE(cur->ifm_media) != IFM_AUTO)
1982                 return;
1983
1984         ifmr->ifm_active = IFM_ETHER | IFM_FDX;
1985         if (speed == 10000)
1986                 ifmr->ifm_active |= IFM_10G_T;
1987         else if (speed == 1000)
1988                 ifmr->ifm_active |= IFM_1000_T;
1989         else if (speed == 100)
1990                 ifmr->ifm_active |= IFM_100_TX;
1991         else if (speed == 10)
1992                 ifmr->ifm_active |= IFM_10_T;
1993         else
1994                 KASSERT(0, ("%s: link up but speed unknown (%u)", __func__,
1995                             speed));
1996 }
1997
1998 static int
1999 vcxgbe_probe(device_t dev)
2000 {
2001         char buf[128];
2002         struct vi_info *vi = device_get_softc(dev);
2003
2004         snprintf(buf, sizeof(buf), "port %d vi %td", vi->pi->port_id,
2005             vi - vi->pi->vi);
2006         device_set_desc_copy(dev, buf);
2007
2008         return (BUS_PROBE_DEFAULT);
2009 }
2010
2011 static int
2012 vcxgbe_attach(device_t dev)
2013 {
2014         struct vi_info *vi;
2015         struct port_info *pi;
2016         struct adapter *sc;
2017         int func, index, rc;
2018         u32 param, val;
2019
2020         vi = device_get_softc(dev);
2021         pi = vi->pi;
2022         sc = pi->adapter;
2023
2024         index = vi - pi->vi;
2025         KASSERT(index < nitems(vi_mac_funcs),
2026             ("%s: VI %s doesn't have a MAC func", __func__,
2027             device_get_nameunit(dev)));
2028         func = vi_mac_funcs[index];
2029         rc = t4_alloc_vi_func(sc, sc->mbox, pi->tx_chan, sc->pf, 0, 1,
2030             vi->hw_addr, &vi->rss_size, func, 0);
2031         if (rc < 0) {
2032                 device_printf(dev, "Failed to allocate virtual interface "
2033                     "for port %d: %d\n", pi->port_id, -rc);
2034                 return (-rc);
2035         }
2036         vi->viid = rc;
2037         if (chip_id(sc) <= CHELSIO_T5)
2038                 vi->smt_idx = (rc & 0x7f) << 1;
2039         else
2040                 vi->smt_idx = (rc & 0x7f);
2041
2042         param = V_FW_PARAMS_MNEM(FW_PARAMS_MNEM_DEV) |
2043             V_FW_PARAMS_PARAM_X(FW_PARAMS_PARAM_DEV_RSSINFO) |
2044             V_FW_PARAMS_PARAM_YZ(vi->viid);
2045         rc = t4_query_params(sc, sc->mbox, sc->pf, 0, 1, &param, &val);
2046         if (rc)
2047                 vi->rss_base = 0xffff;
2048         else {
2049                 /* MPASS((val >> 16) == rss_size); */
2050                 vi->rss_base = val & 0xffff;
2051         }
2052
2053         rc = cxgbe_vi_attach(dev, vi);
2054         if (rc) {
2055                 t4_free_vi(sc, sc->mbox, sc->pf, 0, vi->viid);
2056                 return (rc);
2057         }
2058         return (0);
2059 }
2060
2061 static int
2062 vcxgbe_detach(device_t dev)
2063 {
2064         struct vi_info *vi;
2065         struct adapter *sc;
2066
2067         vi = device_get_softc(dev);
2068         sc = vi->pi->adapter;
2069
2070         doom_vi(sc, vi);
2071
2072         cxgbe_vi_detach(vi);
2073         t4_free_vi(sc, sc->mbox, sc->pf, 0, vi->viid);
2074
2075         end_synchronized_op(sc, 0);
2076
2077         return (0);
2078 }
2079
2080 void
2081 t4_fatal_err(struct adapter *sc)
2082 {
2083         t4_set_reg_field(sc, A_SGE_CONTROL, F_GLOBALENABLE, 0);
2084         t4_intr_disable(sc);
2085         log(LOG_EMERG, "%s: encountered fatal error, adapter stopped.\n",
2086             device_get_nameunit(sc->dev));
2087 }
2088
2089 void
2090 t4_add_adapter(struct adapter *sc)
2091 {
2092         sx_xlock(&t4_list_lock);
2093         SLIST_INSERT_HEAD(&t4_list, sc, link);
2094         sx_xunlock(&t4_list_lock);
2095 }
2096
2097 int
2098 t4_map_bars_0_and_4(struct adapter *sc)
2099 {
2100         sc->regs_rid = PCIR_BAR(0);
2101         sc->regs_res = bus_alloc_resource_any(sc->dev, SYS_RES_MEMORY,
2102             &sc->regs_rid, RF_ACTIVE);
2103         if (sc->regs_res == NULL) {
2104                 device_printf(sc->dev, "cannot map registers.\n");
2105                 return (ENXIO);
2106         }
2107         sc->bt = rman_get_bustag(sc->regs_res);
2108         sc->bh = rman_get_bushandle(sc->regs_res);
2109         sc->mmio_len = rman_get_size(sc->regs_res);
2110         setbit(&sc->doorbells, DOORBELL_KDB);
2111
2112         sc->msix_rid = PCIR_BAR(4);
2113         sc->msix_res = bus_alloc_resource_any(sc->dev, SYS_RES_MEMORY,
2114             &sc->msix_rid, RF_ACTIVE);
2115         if (sc->msix_res == NULL) {
2116                 device_printf(sc->dev, "cannot map MSI-X BAR.\n");
2117                 return (ENXIO);
2118         }
2119
2120         return (0);
2121 }
2122
2123 int
2124 t4_map_bar_2(struct adapter *sc)
2125 {
2126
2127         /*
2128          * T4: only iWARP driver uses the userspace doorbells.  There is no need
2129          * to map it if RDMA is disabled.
2130          */
2131         if (is_t4(sc) && sc->rdmacaps == 0)
2132                 return (0);
2133
2134         sc->udbs_rid = PCIR_BAR(2);
2135         sc->udbs_res = bus_alloc_resource_any(sc->dev, SYS_RES_MEMORY,
2136             &sc->udbs_rid, RF_ACTIVE);
2137         if (sc->udbs_res == NULL) {
2138                 device_printf(sc->dev, "cannot map doorbell BAR.\n");
2139                 return (ENXIO);
2140         }
2141         sc->udbs_base = rman_get_virtual(sc->udbs_res);
2142
2143         if (chip_id(sc) >= CHELSIO_T5) {
2144                 setbit(&sc->doorbells, DOORBELL_UDB);
2145 #if defined(__i386__) || defined(__amd64__)
2146                 if (t5_write_combine) {
2147                         int rc, mode;
2148
2149                         /*
2150                          * Enable write combining on BAR2.  This is the
2151                          * userspace doorbell BAR and is split into 128B
2152                          * (UDBS_SEG_SIZE) doorbell regions, each associated
2153                          * with an egress queue.  The first 64B has the doorbell
2154                          * and the second 64B can be used to submit a tx work
2155                          * request with an implicit doorbell.
2156                          */
2157
2158                         rc = pmap_change_attr((vm_offset_t)sc->udbs_base,
2159                             rman_get_size(sc->udbs_res), PAT_WRITE_COMBINING);
2160                         if (rc == 0) {
2161                                 clrbit(&sc->doorbells, DOORBELL_UDB);
2162                                 setbit(&sc->doorbells, DOORBELL_WCWR);
2163                                 setbit(&sc->doorbells, DOORBELL_UDBWC);
2164                         } else {
2165                                 device_printf(sc->dev,
2166                                     "couldn't enable write combining: %d\n",
2167                                     rc);
2168                         }
2169
2170                         mode = is_t5(sc) ? V_STATMODE(0) : V_T6_STATMODE(0);
2171                         t4_write_reg(sc, A_SGE_STAT_CFG,
2172                             V_STATSOURCE_T5(7) | mode);
2173                 }
2174 #endif
2175         }
2176
2177         return (0);
2178 }
2179
2180 struct memwin_init {
2181         uint32_t base;
2182         uint32_t aperture;
2183 };
2184
2185 static const struct memwin_init t4_memwin[NUM_MEMWIN] = {
2186         { MEMWIN0_BASE, MEMWIN0_APERTURE },
2187         { MEMWIN1_BASE, MEMWIN1_APERTURE },
2188         { MEMWIN2_BASE_T4, MEMWIN2_APERTURE_T4 }
2189 };
2190
2191 static const struct memwin_init t5_memwin[NUM_MEMWIN] = {
2192         { MEMWIN0_BASE, MEMWIN0_APERTURE },
2193         { MEMWIN1_BASE, MEMWIN1_APERTURE },
2194         { MEMWIN2_BASE_T5, MEMWIN2_APERTURE_T5 },
2195 };
2196
2197 static void
2198 setup_memwin(struct adapter *sc)
2199 {
2200         const struct memwin_init *mw_init;
2201         struct memwin *mw;
2202         int i;
2203         uint32_t bar0;
2204
2205         if (is_t4(sc)) {
2206                 /*
2207                  * Read low 32b of bar0 indirectly via the hardware backdoor
2208                  * mechanism.  Works from within PCI passthrough environments
2209                  * too, where rman_get_start() can return a different value.  We
2210                  * need to program the T4 memory window decoders with the actual
2211                  * addresses that will be coming across the PCIe link.
2212                  */
2213                 bar0 = t4_hw_pci_read_cfg4(sc, PCIR_BAR(0));
2214                 bar0 &= (uint32_t) PCIM_BAR_MEM_BASE;
2215
2216                 mw_init = &t4_memwin[0];
2217         } else {
2218                 /* T5+ use the relative offset inside the PCIe BAR */
2219                 bar0 = 0;
2220
2221                 mw_init = &t5_memwin[0];
2222         }
2223
2224         for (i = 0, mw = &sc->memwin[0]; i < NUM_MEMWIN; i++, mw_init++, mw++) {
2225                 rw_init(&mw->mw_lock, "memory window access");
2226                 mw->mw_base = mw_init->base;
2227                 mw->mw_aperture = mw_init->aperture;
2228                 mw->mw_curpos = 0;
2229                 t4_write_reg(sc,
2230                     PCIE_MEM_ACCESS_REG(A_PCIE_MEM_ACCESS_BASE_WIN, i),
2231                     (mw->mw_base + bar0) | V_BIR(0) |
2232                     V_WINDOW(ilog2(mw->mw_aperture) - 10));
2233                 rw_wlock(&mw->mw_lock);
2234                 position_memwin(sc, i, 0);
2235                 rw_wunlock(&mw->mw_lock);
2236         }
2237
2238         /* flush */
2239         t4_read_reg(sc, PCIE_MEM_ACCESS_REG(A_PCIE_MEM_ACCESS_BASE_WIN, 2));
2240 }
2241
2242 /*
2243  * Positions the memory window at the given address in the card's address space.
2244  * There are some alignment requirements and the actual position may be at an
2245  * address prior to the requested address.  mw->mw_curpos always has the actual
2246  * position of the window.
2247  */
2248 static void
2249 position_memwin(struct adapter *sc, int idx, uint32_t addr)
2250 {
2251         struct memwin *mw;
2252         uint32_t pf;
2253         uint32_t reg;
2254
2255         MPASS(idx >= 0 && idx < NUM_MEMWIN);
2256         mw = &sc->memwin[idx];
2257         rw_assert(&mw->mw_lock, RA_WLOCKED);
2258
2259         if (is_t4(sc)) {
2260                 pf = 0;
2261                 mw->mw_curpos = addr & ~0xf;    /* start must be 16B aligned */
2262         } else {
2263                 pf = V_PFNUM(sc->pf);
2264                 mw->mw_curpos = addr & ~0x7f;   /* start must be 128B aligned */
2265         }
2266         reg = PCIE_MEM_ACCESS_REG(A_PCIE_MEM_ACCESS_OFFSET, idx);
2267         t4_write_reg(sc, reg, mw->mw_curpos | pf);
2268         t4_read_reg(sc, reg);   /* flush */
2269 }
2270
2271 static int
2272 rw_via_memwin(struct adapter *sc, int idx, uint32_t addr, uint32_t *val,
2273     int len, int rw)
2274 {
2275         struct memwin *mw;
2276         uint32_t mw_end, v;
2277
2278         MPASS(idx >= 0 && idx < NUM_MEMWIN);
2279
2280         /* Memory can only be accessed in naturally aligned 4 byte units */
2281         if (addr & 3 || len & 3 || len <= 0)
2282                 return (EINVAL);
2283
2284         mw = &sc->memwin[idx];
2285         while (len > 0) {
2286                 rw_rlock(&mw->mw_lock);
2287                 mw_end = mw->mw_curpos + mw->mw_aperture;
2288                 if (addr >= mw_end || addr < mw->mw_curpos) {
2289                         /* Will need to reposition the window */
2290                         if (!rw_try_upgrade(&mw->mw_lock)) {
2291                                 rw_runlock(&mw->mw_lock);
2292                                 rw_wlock(&mw->mw_lock);
2293                         }
2294                         rw_assert(&mw->mw_lock, RA_WLOCKED);
2295                         position_memwin(sc, idx, addr);
2296                         rw_downgrade(&mw->mw_lock);
2297                         mw_end = mw->mw_curpos + mw->mw_aperture;
2298                 }
2299                 rw_assert(&mw->mw_lock, RA_RLOCKED);
2300                 while (addr < mw_end && len > 0) {
2301                         if (rw == 0) {
2302                                 v = t4_read_reg(sc, mw->mw_base + addr -
2303                                     mw->mw_curpos);
2304                                 *val++ = le32toh(v);
2305                         } else {
2306                                 v = *val++;
2307                                 t4_write_reg(sc, mw->mw_base + addr -
2308                                     mw->mw_curpos, htole32(v));
2309                         }
2310                         addr += 4;
2311                         len -= 4;
2312                 }
2313                 rw_runlock(&mw->mw_lock);
2314         }
2315
2316         return (0);
2317 }
2318
2319 static inline int
2320 read_via_memwin(struct adapter *sc, int idx, uint32_t addr, uint32_t *val,
2321     int len)
2322 {
2323
2324         return (rw_via_memwin(sc, idx, addr, val, len, 0));
2325 }
2326
2327 static inline int
2328 write_via_memwin(struct adapter *sc, int idx, uint32_t addr,
2329     const uint32_t *val, int len)
2330 {
2331
2332         return (rw_via_memwin(sc, idx, addr, (void *)(uintptr_t)val, len, 1));
2333 }
2334
2335 static int
2336 t4_range_cmp(const void *a, const void *b)
2337 {
2338         return ((const struct t4_range *)a)->start -
2339                ((const struct t4_range *)b)->start;
2340 }
2341
2342 /*
2343  * Verify that the memory range specified by the addr/len pair is valid within
2344  * the card's address space.
2345  */
2346 static int
2347 validate_mem_range(struct adapter *sc, uint32_t addr, int len)
2348 {
2349         struct t4_range mem_ranges[4], *r, *next;
2350         uint32_t em, addr_len;
2351         int i, n, remaining;
2352
2353         /* Memory can only be accessed in naturally aligned 4 byte units */
2354         if (addr & 3 || len & 3 || len <= 0)
2355                 return (EINVAL);
2356
2357         /* Enabled memories */
2358         em = t4_read_reg(sc, A_MA_TARGET_MEM_ENABLE);
2359
2360         r = &mem_ranges[0];
2361         n = 0;
2362         bzero(r, sizeof(mem_ranges));
2363         if (em & F_EDRAM0_ENABLE) {
2364                 addr_len = t4_read_reg(sc, A_MA_EDRAM0_BAR);
2365                 r->size = G_EDRAM0_SIZE(addr_len) << 20;
2366                 if (r->size > 0) {
2367                         r->start = G_EDRAM0_BASE(addr_len) << 20;
2368                         if (addr >= r->start &&
2369                             addr + len <= r->start + r->size)
2370                                 return (0);
2371                         r++;
2372                         n++;
2373                 }
2374         }
2375         if (em & F_EDRAM1_ENABLE) {
2376                 addr_len = t4_read_reg(sc, A_MA_EDRAM1_BAR);
2377                 r->size = G_EDRAM1_SIZE(addr_len) << 20;
2378                 if (r->size > 0) {
2379                         r->start = G_EDRAM1_BASE(addr_len) << 20;
2380                         if (addr >= r->start &&
2381                             addr + len <= r->start + r->size)
2382                                 return (0);
2383                         r++;
2384                         n++;
2385                 }
2386         }
2387         if (em & F_EXT_MEM_ENABLE) {
2388                 addr_len = t4_read_reg(sc, A_MA_EXT_MEMORY_BAR);
2389                 r->size = G_EXT_MEM_SIZE(addr_len) << 20;
2390                 if (r->size > 0) {
2391                         r->start = G_EXT_MEM_BASE(addr_len) << 20;
2392                         if (addr >= r->start &&
2393                             addr + len <= r->start + r->size)
2394                                 return (0);
2395                         r++;
2396                         n++;
2397                 }
2398         }
2399         if (is_t5(sc) && em & F_EXT_MEM1_ENABLE) {
2400                 addr_len = t4_read_reg(sc, A_MA_EXT_MEMORY1_BAR);
2401                 r->size = G_EXT_MEM1_SIZE(addr_len) << 20;
2402                 if (r->size > 0) {
2403                         r->start = G_EXT_MEM1_BASE(addr_len) << 20;
2404                         if (addr >= r->start &&
2405                             addr + len <= r->start + r->size)
2406                                 return (0);
2407                         r++;
2408                         n++;
2409                 }
2410         }
2411         MPASS(n <= nitems(mem_ranges));
2412
2413         if (n > 1) {
2414                 /* Sort and merge the ranges. */
2415                 qsort(mem_ranges, n, sizeof(struct t4_range), t4_range_cmp);
2416
2417                 /* Start from index 0 and examine the next n - 1 entries. */
2418                 r = &mem_ranges[0];
2419                 for (remaining = n - 1; remaining > 0; remaining--, r++) {
2420
2421                         MPASS(r->size > 0);     /* r is a valid entry. */
2422                         next = r + 1;
2423                         MPASS(next->size > 0);  /* and so is the next one. */
2424
2425                         while (r->start + r->size >= next->start) {
2426                                 /* Merge the next one into the current entry. */
2427                                 r->size = max(r->start + r->size,
2428                                     next->start + next->size) - r->start;
2429                                 n--;    /* One fewer entry in total. */
2430                                 if (--remaining == 0)
2431                                         goto done;      /* short circuit */
2432                                 next++;
2433                         }
2434                         if (next != r + 1) {
2435                                 /*
2436                                  * Some entries were merged into r and next
2437                                  * points to the first valid entry that couldn't
2438                                  * be merged.
2439                                  */
2440                                 MPASS(next->size > 0);  /* must be valid */
2441                                 memcpy(r + 1, next, remaining * sizeof(*r));
2442 #ifdef INVARIANTS
2443                                 /*
2444                                  * This so that the foo->size assertion in the
2445                                  * next iteration of the loop do the right
2446                                  * thing for entries that were pulled up and are
2447                                  * no longer valid.
2448                                  */
2449                                 MPASS(n < nitems(mem_ranges));
2450                                 bzero(&mem_ranges[n], (nitems(mem_ranges) - n) *
2451                                     sizeof(struct t4_range));
2452 #endif
2453                         }
2454                 }
2455 done:
2456                 /* Done merging the ranges. */
2457                 MPASS(n > 0);
2458                 r = &mem_ranges[0];
2459                 for (i = 0; i < n; i++, r++) {
2460                         if (addr >= r->start &&
2461                             addr + len <= r->start + r->size)
2462                                 return (0);
2463                 }
2464         }
2465
2466         return (EFAULT);
2467 }
2468
2469 static int
2470 fwmtype_to_hwmtype(int mtype)
2471 {
2472
2473         switch (mtype) {
2474         case FW_MEMTYPE_EDC0:
2475                 return (MEM_EDC0);
2476         case FW_MEMTYPE_EDC1:
2477                 return (MEM_EDC1);
2478         case FW_MEMTYPE_EXTMEM:
2479                 return (MEM_MC0);
2480         case FW_MEMTYPE_EXTMEM1:
2481                 return (MEM_MC1);
2482         default:
2483                 panic("%s: cannot translate fw mtype %d.", __func__, mtype);
2484         }
2485 }
2486
2487 /*
2488  * Verify that the memory range specified by the memtype/offset/len pair is
2489  * valid and lies entirely within the memtype specified.  The global address of
2490  * the start of the range is returned in addr.
2491  */
2492 static int
2493 validate_mt_off_len(struct adapter *sc, int mtype, uint32_t off, int len,
2494     uint32_t *addr)
2495 {
2496         uint32_t em, addr_len, maddr;
2497
2498         /* Memory can only be accessed in naturally aligned 4 byte units */
2499         if (off & 3 || len & 3 || len == 0)
2500                 return (EINVAL);
2501
2502         em = t4_read_reg(sc, A_MA_TARGET_MEM_ENABLE);
2503         switch (fwmtype_to_hwmtype(mtype)) {
2504         case MEM_EDC0:
2505                 if (!(em & F_EDRAM0_ENABLE))
2506                         return (EINVAL);
2507                 addr_len = t4_read_reg(sc, A_MA_EDRAM0_BAR);
2508                 maddr = G_EDRAM0_BASE(addr_len) << 20;
2509                 break;
2510         case MEM_EDC1:
2511                 if (!(em & F_EDRAM1_ENABLE))
2512                         return (EINVAL);
2513                 addr_len = t4_read_reg(sc, A_MA_EDRAM1_BAR);
2514                 maddr = G_EDRAM1_BASE(addr_len) << 20;
2515                 break;
2516         case MEM_MC:
2517                 if (!(em & F_EXT_MEM_ENABLE))
2518                         return (EINVAL);
2519                 addr_len = t4_read_reg(sc, A_MA_EXT_MEMORY_BAR);
2520                 maddr = G_EXT_MEM_BASE(addr_len) << 20;
2521                 break;
2522         case MEM_MC1:
2523                 if (!is_t5(sc) || !(em & F_EXT_MEM1_ENABLE))
2524                         return (EINVAL);
2525                 addr_len = t4_read_reg(sc, A_MA_EXT_MEMORY1_BAR);
2526                 maddr = G_EXT_MEM1_BASE(addr_len) << 20;
2527                 break;
2528         default:
2529                 return (EINVAL);
2530         }
2531
2532         *addr = maddr + off;    /* global address */
2533         return (validate_mem_range(sc, *addr, len));
2534 }
2535
2536 static int
2537 fixup_devlog_params(struct adapter *sc)
2538 {
2539         struct devlog_params *dparams = &sc->params.devlog;
2540         int rc;
2541
2542         rc = validate_mt_off_len(sc, dparams->memtype, dparams->start,
2543             dparams->size, &dparams->addr);
2544
2545         return (rc);
2546 }
2547
2548 static int
2549 cfg_itype_and_nqueues(struct adapter *sc, int n10g, int n1g, int num_vis,
2550     struct intrs_and_queues *iaq)
2551 {
2552         int rc, itype, navail, nrxq10g, nrxq1g, n;
2553         int nofldrxq10g = 0, nofldrxq1g = 0;
2554
2555         bzero(iaq, sizeof(*iaq));
2556
2557         iaq->ntxq10g = t4_ntxq10g;
2558         iaq->ntxq1g = t4_ntxq1g;
2559         iaq->ntxq_vi = t4_ntxq_vi;
2560         iaq->nrxq10g = nrxq10g = t4_nrxq10g;
2561         iaq->nrxq1g = nrxq1g = t4_nrxq1g;
2562         iaq->nrxq_vi = t4_nrxq_vi;
2563         iaq->rsrv_noflowq = t4_rsrv_noflowq;
2564 #ifdef TCP_OFFLOAD
2565         if (is_offload(sc)) {
2566                 iaq->nofldtxq10g = t4_nofldtxq10g;
2567                 iaq->nofldtxq1g = t4_nofldtxq1g;
2568                 iaq->nofldtxq_vi = t4_nofldtxq_vi;
2569                 iaq->nofldrxq10g = nofldrxq10g = t4_nofldrxq10g;
2570                 iaq->nofldrxq1g = nofldrxq1g = t4_nofldrxq1g;
2571                 iaq->nofldrxq_vi = t4_nofldrxq_vi;
2572         }
2573 #endif
2574 #ifdef DEV_NETMAP
2575         iaq->nnmtxq_vi = t4_nnmtxq_vi;
2576         iaq->nnmrxq_vi = t4_nnmrxq_vi;
2577 #endif
2578
2579         for (itype = INTR_MSIX; itype; itype >>= 1) {
2580
2581                 if ((itype & t4_intr_types) == 0)
2582                         continue;       /* not allowed */
2583
2584                 if (itype == INTR_MSIX)
2585                         navail = pci_msix_count(sc->dev);
2586                 else if (itype == INTR_MSI)
2587                         navail = pci_msi_count(sc->dev);
2588                 else
2589                         navail = 1;
2590 restart:
2591                 if (navail == 0)
2592                         continue;
2593
2594                 iaq->intr_type = itype;
2595                 iaq->intr_flags_10g = 0;
2596                 iaq->intr_flags_1g = 0;
2597
2598                 /*
2599                  * Best option: an interrupt vector for errors, one for the
2600                  * firmware event queue, and one for every rxq (NIC and TOE) of
2601                  * every VI.  The VIs that support netmap use the same
2602                  * interrupts for the NIC rx queues and the netmap rx queues
2603                  * because only one set of queues is active at a time.
2604                  */
2605                 iaq->nirq = T4_EXTRA_INTR;
2606                 iaq->nirq += n10g * (nrxq10g + nofldrxq10g);
2607                 iaq->nirq += n1g * (nrxq1g + nofldrxq1g);
2608                 iaq->nirq += (n10g + n1g) * (num_vis - 1) *
2609                     max(iaq->nrxq_vi, iaq->nnmrxq_vi);  /* See comment above. */
2610                 iaq->nirq += (n10g + n1g) * (num_vis - 1) * iaq->nofldrxq_vi;
2611                 if (iaq->nirq <= navail &&
2612                     (itype != INTR_MSI || powerof2(iaq->nirq))) {
2613                         iaq->intr_flags_10g = INTR_ALL;
2614                         iaq->intr_flags_1g = INTR_ALL;
2615                         goto allocate;
2616                 }
2617
2618                 /* Disable the VIs (and netmap) if there aren't enough intrs */
2619                 if (num_vis > 1) {
2620                         device_printf(sc->dev, "virtual interfaces disabled "
2621                             "because num_vis=%u with current settings "
2622                             "(nrxq10g=%u, nrxq1g=%u, nofldrxq10g=%u, "
2623                             "nofldrxq1g=%u, nrxq_vi=%u nofldrxq_vi=%u, "
2624                             "nnmrxq_vi=%u) would need %u interrupts but "
2625                             "only %u are available.\n", num_vis, nrxq10g,
2626                             nrxq1g, nofldrxq10g, nofldrxq1g, iaq->nrxq_vi,
2627                             iaq->nofldrxq_vi, iaq->nnmrxq_vi, iaq->nirq,
2628                             navail);
2629                         num_vis = 1;
2630                         iaq->ntxq_vi = iaq->nrxq_vi = 0;
2631                         iaq->nofldtxq_vi = iaq->nofldrxq_vi = 0;
2632                         iaq->nnmtxq_vi = iaq->nnmrxq_vi = 0;
2633                         goto restart;
2634                 }
2635
2636                 /*
2637                  * Second best option: a vector for errors, one for the firmware
2638                  * event queue, and vectors for either all the NIC rx queues or
2639                  * all the TOE rx queues.  The queues that don't get vectors
2640                  * will forward their interrupts to those that do.
2641                  */
2642                 iaq->nirq = T4_EXTRA_INTR;
2643                 if (nrxq10g >= nofldrxq10g) {
2644                         iaq->intr_flags_10g = INTR_RXQ;
2645                         iaq->nirq += n10g * nrxq10g;
2646                 } else {
2647                         iaq->intr_flags_10g = INTR_OFLD_RXQ;
2648                         iaq->nirq += n10g * nofldrxq10g;
2649                 }
2650                 if (nrxq1g >= nofldrxq1g) {
2651                         iaq->intr_flags_1g = INTR_RXQ;
2652                         iaq->nirq += n1g * nrxq1g;
2653                 } else {
2654                         iaq->intr_flags_1g = INTR_OFLD_RXQ;
2655                         iaq->nirq += n1g * nofldrxq1g;
2656                 }
2657                 if (iaq->nirq <= navail &&
2658                     (itype != INTR_MSI || powerof2(iaq->nirq)))
2659                         goto allocate;
2660
2661                 /*
2662                  * Next best option: an interrupt vector for errors, one for the
2663                  * firmware event queue, and at least one per main-VI.  At this
2664                  * point we know we'll have to downsize nrxq and/or nofldrxq to
2665                  * fit what's available to us.
2666                  */
2667                 iaq->nirq = T4_EXTRA_INTR;
2668                 iaq->nirq += n10g + n1g;
2669                 if (iaq->nirq <= navail) {
2670                         int leftover = navail - iaq->nirq;
2671
2672                         if (n10g > 0) {
2673                                 int target = max(nrxq10g, nofldrxq10g);
2674
2675                                 iaq->intr_flags_10g = nrxq10g >= nofldrxq10g ?
2676                                     INTR_RXQ : INTR_OFLD_RXQ;
2677
2678                                 n = 1;
2679                                 while (n < target && leftover >= n10g) {
2680                                         leftover -= n10g;
2681                                         iaq->nirq += n10g;
2682                                         n++;
2683                                 }
2684                                 iaq->nrxq10g = min(n, nrxq10g);
2685 #ifdef TCP_OFFLOAD
2686                                 iaq->nofldrxq10g = min(n, nofldrxq10g);
2687 #endif
2688                         }
2689
2690                         if (n1g > 0) {
2691                                 int target = max(nrxq1g, nofldrxq1g);
2692
2693                                 iaq->intr_flags_1g = nrxq1g >= nofldrxq1g ?
2694                                     INTR_RXQ : INTR_OFLD_RXQ;
2695
2696                                 n = 1;
2697                                 while (n < target && leftover >= n1g) {
2698                                         leftover -= n1g;
2699                                         iaq->nirq += n1g;
2700                                         n++;
2701                                 }
2702                                 iaq->nrxq1g = min(n, nrxq1g);
2703 #ifdef TCP_OFFLOAD
2704                                 iaq->nofldrxq1g = min(n, nofldrxq1g);
2705 #endif
2706                         }
2707
2708                         if (itype != INTR_MSI || powerof2(iaq->nirq))
2709                                 goto allocate;
2710                 }
2711
2712                 /*
2713                  * Least desirable option: one interrupt vector for everything.
2714                  */
2715                 iaq->nirq = iaq->nrxq10g = iaq->nrxq1g = 1;
2716                 iaq->intr_flags_10g = iaq->intr_flags_1g = 0;
2717 #ifdef TCP_OFFLOAD
2718                 if (is_offload(sc))
2719                         iaq->nofldrxq10g = iaq->nofldrxq1g = 1;
2720 #endif
2721 allocate:
2722                 navail = iaq->nirq;
2723                 rc = 0;
2724                 if (itype == INTR_MSIX)
2725                         rc = pci_alloc_msix(sc->dev, &navail);
2726                 else if (itype == INTR_MSI)
2727                         rc = pci_alloc_msi(sc->dev, &navail);
2728
2729                 if (rc == 0) {
2730                         if (navail == iaq->nirq)
2731                                 return (0);
2732
2733                         /*
2734                          * Didn't get the number requested.  Use whatever number
2735                          * the kernel is willing to allocate (it's in navail).
2736                          */
2737                         device_printf(sc->dev, "fewer vectors than requested, "
2738                             "type=%d, req=%d, rcvd=%d; will downshift req.\n",
2739                             itype, iaq->nirq, navail);
2740                         pci_release_msi(sc->dev);
2741                         goto restart;
2742                 }
2743
2744                 device_printf(sc->dev,
2745                     "failed to allocate vectors:%d, type=%d, req=%d, rcvd=%d\n",
2746                     itype, rc, iaq->nirq, navail);
2747         }
2748
2749         device_printf(sc->dev,
2750             "failed to find a usable interrupt type.  "
2751             "allowed=%d, msi-x=%d, msi=%d, intx=1", t4_intr_types,
2752             pci_msix_count(sc->dev), pci_msi_count(sc->dev));
2753
2754         return (ENXIO);
2755 }
2756
2757 #define FW_VERSION(chip) ( \
2758     V_FW_HDR_FW_VER_MAJOR(chip##FW_VERSION_MAJOR) | \
2759     V_FW_HDR_FW_VER_MINOR(chip##FW_VERSION_MINOR) | \
2760     V_FW_HDR_FW_VER_MICRO(chip##FW_VERSION_MICRO) | \
2761     V_FW_HDR_FW_VER_BUILD(chip##FW_VERSION_BUILD))
2762 #define FW_INTFVER(chip, intf) (chip##FW_HDR_INTFVER_##intf)
2763
2764 struct fw_info {
2765         uint8_t chip;
2766         char *kld_name;
2767         char *fw_mod_name;
2768         struct fw_hdr fw_hdr;   /* XXX: waste of space, need a sparse struct */
2769 } fw_info[] = {
2770         {
2771                 .chip = CHELSIO_T4,
2772                 .kld_name = "t4fw_cfg",
2773                 .fw_mod_name = "t4fw",
2774                 .fw_hdr = {
2775                         .chip = FW_HDR_CHIP_T4,
2776                         .fw_ver = htobe32_const(FW_VERSION(T4)),
2777                         .intfver_nic = FW_INTFVER(T4, NIC),
2778                         .intfver_vnic = FW_INTFVER(T4, VNIC),
2779                         .intfver_ofld = FW_INTFVER(T4, OFLD),
2780                         .intfver_ri = FW_INTFVER(T4, RI),
2781                         .intfver_iscsipdu = FW_INTFVER(T4, ISCSIPDU),
2782                         .intfver_iscsi = FW_INTFVER(T4, ISCSI),
2783                         .intfver_fcoepdu = FW_INTFVER(T4, FCOEPDU),
2784                         .intfver_fcoe = FW_INTFVER(T4, FCOE),
2785                 },
2786         }, {
2787                 .chip = CHELSIO_T5,
2788                 .kld_name = "t5fw_cfg",
2789                 .fw_mod_name = "t5fw",
2790                 .fw_hdr = {
2791                         .chip = FW_HDR_CHIP_T5,
2792                         .fw_ver = htobe32_const(FW_VERSION(T5)),
2793                         .intfver_nic = FW_INTFVER(T5, NIC),
2794                         .intfver_vnic = FW_INTFVER(T5, VNIC),
2795                         .intfver_ofld = FW_INTFVER(T5, OFLD),
2796                         .intfver_ri = FW_INTFVER(T5, RI),
2797                         .intfver_iscsipdu = FW_INTFVER(T5, ISCSIPDU),
2798                         .intfver_iscsi = FW_INTFVER(T5, ISCSI),
2799                         .intfver_fcoepdu = FW_INTFVER(T5, FCOEPDU),
2800                         .intfver_fcoe = FW_INTFVER(T5, FCOE),
2801                 },
2802         }, {
2803                 .chip = CHELSIO_T6,
2804                 .kld_name = "t6fw_cfg",
2805                 .fw_mod_name = "t6fw",
2806                 .fw_hdr = {
2807                         .chip = FW_HDR_CHIP_T6,
2808                         .fw_ver = htobe32_const(FW_VERSION(T6)),
2809                         .intfver_nic = FW_INTFVER(T6, NIC),
2810                         .intfver_vnic = FW_INTFVER(T6, VNIC),
2811                         .intfver_ofld = FW_INTFVER(T6, OFLD),
2812                         .intfver_ri = FW_INTFVER(T6, RI),
2813                         .intfver_iscsipdu = FW_INTFVER(T6, ISCSIPDU),
2814                         .intfver_iscsi = FW_INTFVER(T6, ISCSI),
2815                         .intfver_fcoepdu = FW_INTFVER(T6, FCOEPDU),
2816                         .intfver_fcoe = FW_INTFVER(T6, FCOE),
2817                 },
2818         }
2819 };
2820
2821 static struct fw_info *
2822 find_fw_info(int chip)
2823 {
2824         int i;
2825
2826         for (i = 0; i < nitems(fw_info); i++) {
2827                 if (fw_info[i].chip == chip)
2828                         return (&fw_info[i]);
2829         }
2830         return (NULL);
2831 }
2832
2833 /*
2834  * Is the given firmware API compatible with the one the driver was compiled
2835  * with?
2836  */
2837 static int
2838 fw_compatible(const struct fw_hdr *hdr1, const struct fw_hdr *hdr2)
2839 {
2840
2841         /* short circuit if it's the exact same firmware version */
2842         if (hdr1->chip == hdr2->chip && hdr1->fw_ver == hdr2->fw_ver)
2843                 return (1);
2844
2845         /*
2846          * XXX: Is this too conservative?  Perhaps I should limit this to the
2847          * features that are supported in the driver.
2848          */
2849 #define SAME_INTF(x) (hdr1->intfver_##x == hdr2->intfver_##x)
2850         if (hdr1->chip == hdr2->chip && SAME_INTF(nic) && SAME_INTF(vnic) &&
2851             SAME_INTF(ofld) && SAME_INTF(ri) && SAME_INTF(iscsipdu) &&
2852             SAME_INTF(iscsi) && SAME_INTF(fcoepdu) && SAME_INTF(fcoe))
2853                 return (1);
2854 #undef SAME_INTF
2855
2856         return (0);
2857 }
2858
2859 /*
2860  * The firmware in the KLD is usable, but should it be installed?  This routine
2861  * explains itself in detail if it indicates the KLD firmware should be
2862  * installed.
2863  */
2864 static int
2865 should_install_kld_fw(struct adapter *sc, int card_fw_usable, int k, int c)
2866 {
2867         const char *reason;
2868
2869         if (!card_fw_usable) {
2870                 reason = "incompatible or unusable";
2871                 goto install;
2872         }
2873
2874         if (k > c) {
2875                 reason = "older than the version bundled with this driver";
2876                 goto install;
2877         }
2878
2879         if (t4_fw_install == 2 && k != c) {
2880                 reason = "different than the version bundled with this driver";
2881                 goto install;
2882         }
2883
2884         return (0);
2885
2886 install:
2887         if (t4_fw_install == 0) {
2888                 device_printf(sc->dev, "firmware on card (%u.%u.%u.%u) is %s, "
2889                     "but the driver is prohibited from installing a different "
2890                     "firmware on the card.\n",
2891                     G_FW_HDR_FW_VER_MAJOR(c), G_FW_HDR_FW_VER_MINOR(c),
2892                     G_FW_HDR_FW_VER_MICRO(c), G_FW_HDR_FW_VER_BUILD(c), reason);
2893
2894                 return (0);
2895         }
2896
2897         device_printf(sc->dev, "firmware on card (%u.%u.%u.%u) is %s, "
2898             "installing firmware %u.%u.%u.%u on card.\n",
2899             G_FW_HDR_FW_VER_MAJOR(c), G_FW_HDR_FW_VER_MINOR(c),
2900             G_FW_HDR_FW_VER_MICRO(c), G_FW_HDR_FW_VER_BUILD(c), reason,
2901             G_FW_HDR_FW_VER_MAJOR(k), G_FW_HDR_FW_VER_MINOR(k),
2902             G_FW_HDR_FW_VER_MICRO(k), G_FW_HDR_FW_VER_BUILD(k));
2903
2904         return (1);
2905 }
2906 /*
2907  * Establish contact with the firmware and determine if we are the master driver
2908  * or not, and whether we are responsible for chip initialization.
2909  */
2910 static int
2911 prep_firmware(struct adapter *sc)
2912 {
2913         const struct firmware *fw = NULL, *default_cfg;
2914         int rc, pf, card_fw_usable, kld_fw_usable, need_fw_reset = 1;
2915         enum dev_state state;
2916         struct fw_info *fw_info;
2917         struct fw_hdr *card_fw;         /* fw on the card */
2918         const struct fw_hdr *kld_fw;    /* fw in the KLD */
2919         const struct fw_hdr *drv_fw;    /* fw header the driver was compiled
2920                                            against */
2921
2922         /* Contact firmware. */
2923         rc = t4_fw_hello(sc, sc->mbox, sc->mbox, MASTER_MAY, &state);
2924         if (rc < 0 || state == DEV_STATE_ERR) {
2925                 rc = -rc;
2926                 device_printf(sc->dev,
2927                     "failed to connect to the firmware: %d, %d.\n", rc, state);
2928                 return (rc);
2929         }
2930         pf = rc;
2931         if (pf == sc->mbox)
2932                 sc->flags |= MASTER_PF;
2933         else if (state == DEV_STATE_UNINIT) {
2934                 /*
2935                  * We didn't get to be the master so we definitely won't be
2936                  * configuring the chip.  It's a bug if someone else hasn't
2937                  * configured it already.
2938                  */
2939                 device_printf(sc->dev, "couldn't be master(%d), "
2940                     "device not already initialized either(%d).\n", rc, state);
2941                 return (EDOOFUS);
2942         }
2943
2944         /* This is the firmware whose headers the driver was compiled against */
2945         fw_info = find_fw_info(chip_id(sc));
2946         if (fw_info == NULL) {
2947                 device_printf(sc->dev,
2948                     "unable to look up firmware information for chip %d.\n",
2949                     chip_id(sc));
2950                 return (EINVAL);
2951         }
2952         drv_fw = &fw_info->fw_hdr;
2953
2954         /*
2955          * The firmware KLD contains many modules.  The KLD name is also the
2956          * name of the module that contains the default config file.
2957          */
2958         default_cfg = firmware_get(fw_info->kld_name);
2959
2960         /* Read the header of the firmware on the card */
2961         card_fw = malloc(sizeof(*card_fw), M_CXGBE, M_ZERO | M_WAITOK);
2962         rc = -t4_read_flash(sc, FLASH_FW_START,
2963             sizeof (*card_fw) / sizeof (uint32_t), (uint32_t *)card_fw, 1);
2964         if (rc == 0)
2965                 card_fw_usable = fw_compatible(drv_fw, (const void*)card_fw);
2966         else {
2967                 device_printf(sc->dev,
2968                     "Unable to read card's firmware header: %d\n", rc);
2969                 card_fw_usable = 0;
2970         }
2971
2972         /* This is the firmware in the KLD */
2973         fw = firmware_get(fw_info->fw_mod_name);
2974         if (fw != NULL) {
2975                 kld_fw = (const void *)fw->data;
2976                 kld_fw_usable = fw_compatible(drv_fw, kld_fw);
2977         } else {
2978                 kld_fw = NULL;
2979                 kld_fw_usable = 0;
2980         }
2981
2982         if (card_fw_usable && card_fw->fw_ver == drv_fw->fw_ver &&
2983             (!kld_fw_usable || kld_fw->fw_ver == drv_fw->fw_ver)) {
2984                 /*
2985                  * Common case: the firmware on the card is an exact match and
2986                  * the KLD is an exact match too, or the KLD is
2987                  * absent/incompatible.  Note that t4_fw_install = 2 is ignored
2988                  * here -- use cxgbetool loadfw if you want to reinstall the
2989                  * same firmware as the one on the card.
2990                  */
2991         } else if (kld_fw_usable && state == DEV_STATE_UNINIT &&
2992             should_install_kld_fw(sc, card_fw_usable, be32toh(kld_fw->fw_ver),
2993             be32toh(card_fw->fw_ver))) {
2994
2995                 rc = -t4_fw_upgrade(sc, sc->mbox, fw->data, fw->datasize, 0);
2996                 if (rc != 0) {
2997                         device_printf(sc->dev,
2998                             "failed to install firmware: %d\n", rc);
2999                         goto done;
3000                 }
3001
3002                 /* Installed successfully, update the cached header too. */
3003                 memcpy(card_fw, kld_fw, sizeof(*card_fw));
3004                 card_fw_usable = 1;
3005                 need_fw_reset = 0;      /* already reset as part of load_fw */
3006         }
3007
3008         if (!card_fw_usable) {
3009                 uint32_t d, c, k;
3010
3011                 d = ntohl(drv_fw->fw_ver);
3012                 c = ntohl(card_fw->fw_ver);
3013                 k = kld_fw ? ntohl(kld_fw->fw_ver) : 0;
3014
3015                 device_printf(sc->dev, "Cannot find a usable firmware: "
3016                     "fw_install %d, chip state %d, "
3017                     "driver compiled with %d.%d.%d.%d, "
3018                     "card has %d.%d.%d.%d, KLD has %d.%d.%d.%d\n",
3019                     t4_fw_install, state,
3020                     G_FW_HDR_FW_VER_MAJOR(d), G_FW_HDR_FW_VER_MINOR(d),
3021                     G_FW_HDR_FW_VER_MICRO(d), G_FW_HDR_FW_VER_BUILD(d),
3022                     G_FW_HDR_FW_VER_MAJOR(c), G_FW_HDR_FW_VER_MINOR(c),
3023                     G_FW_HDR_FW_VER_MICRO(c), G_FW_HDR_FW_VER_BUILD(c),
3024                     G_FW_HDR_FW_VER_MAJOR(k), G_FW_HDR_FW_VER_MINOR(k),
3025                     G_FW_HDR_FW_VER_MICRO(k), G_FW_HDR_FW_VER_BUILD(k));
3026                 rc = EINVAL;
3027                 goto done;
3028         }
3029
3030         /* Reset device */
3031         if (need_fw_reset &&
3032             (rc = -t4_fw_reset(sc, sc->mbox, F_PIORSTMODE | F_PIORST)) != 0) {
3033                 device_printf(sc->dev, "firmware reset failed: %d.\n", rc);
3034                 if (rc != ETIMEDOUT && rc != EIO)
3035                         t4_fw_bye(sc, sc->mbox);
3036                 goto done;
3037         }
3038         sc->flags |= FW_OK;
3039
3040         rc = get_params__pre_init(sc);
3041         if (rc != 0)
3042                 goto done; /* error message displayed already */
3043
3044         /* Partition adapter resources as specified in the config file. */
3045         if (state == DEV_STATE_UNINIT) {
3046
3047                 KASSERT(sc->flags & MASTER_PF,
3048                     ("%s: trying to change chip settings when not master.",
3049                     __func__));
3050
3051                 rc = partition_resources(sc, default_cfg, fw_info->kld_name);
3052                 if (rc != 0)
3053                         goto done;      /* error message displayed already */
3054
3055                 t4_tweak_chip_settings(sc);
3056
3057                 /* get basic stuff going */
3058                 rc = -t4_fw_initialize(sc, sc->mbox);
3059                 if (rc != 0) {
3060                         device_printf(sc->dev, "fw init failed: %d.\n", rc);
3061                         goto done;
3062                 }
3063         } else {
3064                 snprintf(sc->cfg_file, sizeof(sc->cfg_file), "pf%d", pf);
3065                 sc->cfcsum = 0;
3066         }
3067
3068 done:
3069         free(card_fw, M_CXGBE);
3070         if (fw != NULL)
3071                 firmware_put(fw, FIRMWARE_UNLOAD);
3072         if (default_cfg != NULL)
3073                 firmware_put(default_cfg, FIRMWARE_UNLOAD);
3074
3075         return (rc);
3076 }
3077
3078 #define FW_PARAM_DEV(param) \
3079         (V_FW_PARAMS_MNEM(FW_PARAMS_MNEM_DEV) | \
3080          V_FW_PARAMS_PARAM_X(FW_PARAMS_PARAM_DEV_##param))
3081 #define FW_PARAM_PFVF(param) \
3082         (V_FW_PARAMS_MNEM(FW_PARAMS_MNEM_PFVF) | \
3083          V_FW_PARAMS_PARAM_X(FW_PARAMS_PARAM_PFVF_##param))
3084
3085 /*
3086  * Partition chip resources for use between various PFs, VFs, etc.
3087  */
3088 static int
3089 partition_resources(struct adapter *sc, const struct firmware *default_cfg,
3090     const char *name_prefix)
3091 {
3092         const struct firmware *cfg = NULL;
3093         int rc = 0;
3094         struct fw_caps_config_cmd caps;
3095         uint32_t mtype, moff, finicsum, cfcsum;
3096
3097         /*
3098          * Figure out what configuration file to use.  Pick the default config
3099          * file for the card if the user hasn't specified one explicitly.
3100          */
3101         snprintf(sc->cfg_file, sizeof(sc->cfg_file), "%s", t4_cfg_file);
3102         if (strncmp(t4_cfg_file, DEFAULT_CF, sizeof(t4_cfg_file)) == 0) {
3103                 /* Card specific overrides go here. */
3104                 if (pci_get_device(sc->dev) == 0x440a)
3105                         snprintf(sc->cfg_file, sizeof(sc->cfg_file), UWIRE_CF);
3106                 if (is_fpga(sc))
3107                         snprintf(sc->cfg_file, sizeof(sc->cfg_file), FPGA_CF);
3108         }
3109
3110         /*
3111          * We need to load another module if the profile is anything except
3112          * "default" or "flash".
3113          */
3114         if (strncmp(sc->cfg_file, DEFAULT_CF, sizeof(sc->cfg_file)) != 0 &&
3115             strncmp(sc->cfg_file, FLASH_CF, sizeof(sc->cfg_file)) != 0) {
3116                 char s[32];
3117
3118                 snprintf(s, sizeof(s), "%s_%s", name_prefix, sc->cfg_file);
3119                 cfg = firmware_get(s);
3120                 if (cfg == NULL) {
3121                         if (default_cfg != NULL) {
3122                                 device_printf(sc->dev,
3123                                     "unable to load module \"%s\" for "
3124                                     "configuration profile \"%s\", will use "
3125                                     "the default config file instead.\n",
3126                                     s, sc->cfg_file);
3127                                 snprintf(sc->cfg_file, sizeof(sc->cfg_file),
3128                                     "%s", DEFAULT_CF);
3129                         } else {
3130                                 device_printf(sc->dev,
3131                                     "unable to load module \"%s\" for "
3132                                     "configuration profile \"%s\", will use "
3133                                     "the config file on the card's flash "
3134                                     "instead.\n", s, sc->cfg_file);
3135                                 snprintf(sc->cfg_file, sizeof(sc->cfg_file),
3136                                     "%s", FLASH_CF);
3137                         }
3138                 }
3139         }
3140
3141         if (strncmp(sc->cfg_file, DEFAULT_CF, sizeof(sc->cfg_file)) == 0 &&
3142             default_cfg == NULL) {
3143                 device_printf(sc->dev,
3144                     "default config file not available, will use the config "
3145                     "file on the card's flash instead.\n");
3146                 snprintf(sc->cfg_file, sizeof(sc->cfg_file), "%s", FLASH_CF);
3147         }
3148
3149         if (strncmp(sc->cfg_file, FLASH_CF, sizeof(sc->cfg_file)) != 0) {
3150                 u_int cflen;
3151                 const uint32_t *cfdata;
3152                 uint32_t param, val, addr;
3153
3154                 KASSERT(cfg != NULL || default_cfg != NULL,
3155                     ("%s: no config to upload", __func__));
3156
3157                 /*
3158                  * Ask the firmware where it wants us to upload the config file.
3159                  */
3160                 param = FW_PARAM_DEV(CF);
3161                 rc = -t4_query_params(sc, sc->mbox, sc->pf, 0, 1, &param, &val);
3162                 if (rc != 0) {
3163                         /* No support for config file?  Shouldn't happen. */
3164                         device_printf(sc->dev,
3165                             "failed to query config file location: %d.\n", rc);
3166                         goto done;
3167                 }
3168                 mtype = G_FW_PARAMS_PARAM_Y(val);
3169                 moff = G_FW_PARAMS_PARAM_Z(val) << 16;
3170
3171                 /*
3172                  * XXX: sheer laziness.  We deliberately added 4 bytes of
3173                  * useless stuffing/comments at the end of the config file so
3174                  * it's ok to simply throw away the last remaining bytes when
3175                  * the config file is not an exact multiple of 4.  This also
3176                  * helps with the validate_mt_off_len check.
3177                  */
3178                 if (cfg != NULL) {
3179                         cflen = cfg->datasize & ~3;
3180                         cfdata = cfg->data;
3181                 } else {
3182                         cflen = default_cfg->datasize & ~3;
3183                         cfdata = default_cfg->data;
3184                 }
3185
3186                 if (cflen > FLASH_CFG_MAX_SIZE) {
3187                         device_printf(sc->dev,
3188                             "config file too long (%d, max allowed is %d).  "
3189                             "Will try to use the config on the card, if any.\n",
3190                             cflen, FLASH_CFG_MAX_SIZE);
3191                         goto use_config_on_flash;
3192                 }
3193
3194                 rc = validate_mt_off_len(sc, mtype, moff, cflen, &addr);
3195                 if (rc != 0) {
3196                         device_printf(sc->dev,
3197                             "%s: addr (%d/0x%x) or len %d is not valid: %d.  "
3198                             "Will try to use the config on the card, if any.\n",
3199                             __func__, mtype, moff, cflen, rc);
3200                         goto use_config_on_flash;
3201                 }
3202                 write_via_memwin(sc, 2, addr, cfdata, cflen);
3203         } else {
3204 use_config_on_flash:
3205                 mtype = FW_MEMTYPE_FLASH;
3206                 moff = t4_flash_cfg_addr(sc);
3207         }
3208
3209         bzero(&caps, sizeof(caps));
3210         caps.op_to_write = htobe32(V_FW_CMD_OP(FW_CAPS_CONFIG_CMD) |
3211             F_FW_CMD_REQUEST | F_FW_CMD_READ);
3212         caps.cfvalid_to_len16 = htobe32(F_FW_CAPS_CONFIG_CMD_CFVALID |
3213             V_FW_CAPS_CONFIG_CMD_MEMTYPE_CF(mtype) |
3214             V_FW_CAPS_CONFIG_CMD_MEMADDR64K_CF(moff >> 16) | FW_LEN16(caps));
3215         rc = -t4_wr_mbox(sc, sc->mbox, &caps, sizeof(caps), &caps);
3216         if (rc != 0) {
3217                 device_printf(sc->dev,
3218                     "failed to pre-process config file: %d "
3219                     "(mtype %d, moff 0x%x).\n", rc, mtype, moff);
3220                 goto done;
3221         }
3222
3223         finicsum = be32toh(caps.finicsum);
3224         cfcsum = be32toh(caps.cfcsum);
3225         if (finicsum != cfcsum) {
3226                 device_printf(sc->dev,
3227                     "WARNING: config file checksum mismatch: %08x %08x\n",
3228                     finicsum, cfcsum);
3229         }
3230         sc->cfcsum = cfcsum;
3231
3232 #define LIMIT_CAPS(x) do { \
3233         caps.x &= htobe16(t4_##x##_allowed); \
3234 } while (0)
3235
3236         /*
3237          * Let the firmware know what features will (not) be used so it can tune
3238          * things accordingly.
3239          */
3240         LIMIT_CAPS(nbmcaps);
3241         LIMIT_CAPS(linkcaps);
3242         LIMIT_CAPS(switchcaps);
3243         LIMIT_CAPS(niccaps);
3244         LIMIT_CAPS(toecaps);
3245         LIMIT_CAPS(rdmacaps);
3246         LIMIT_CAPS(cryptocaps);
3247         LIMIT_CAPS(iscsicaps);
3248         LIMIT_CAPS(fcoecaps);
3249 #undef LIMIT_CAPS
3250
3251         caps.op_to_write = htobe32(V_FW_CMD_OP(FW_CAPS_CONFIG_CMD) |
3252             F_FW_CMD_REQUEST | F_FW_CMD_WRITE);
3253         caps.cfvalid_to_len16 = htobe32(FW_LEN16(caps));
3254         rc = -t4_wr_mbox(sc, sc->mbox, &caps, sizeof(caps), NULL);
3255         if (rc != 0) {
3256                 device_printf(sc->dev,
3257                     "failed to process config file: %d.\n", rc);
3258         }
3259 done:
3260         if (cfg != NULL)
3261                 firmware_put(cfg, FIRMWARE_UNLOAD);
3262         return (rc);
3263 }
3264
3265 /*
3266  * Retrieve parameters that are needed (or nice to have) very early.
3267  */
3268 static int
3269 get_params__pre_init(struct adapter *sc)
3270 {
3271         int rc;
3272         uint32_t param[2], val[2];
3273
3274         t4_get_version_info(sc);
3275
3276         snprintf(sc->fw_version, sizeof(sc->fw_version), "%u.%u.%u.%u",
3277             G_FW_HDR_FW_VER_MAJOR(sc->params.fw_vers),
3278             G_FW_HDR_FW_VER_MINOR(sc->params.fw_vers),
3279             G_FW_HDR_FW_VER_MICRO(sc->params.fw_vers),
3280             G_FW_HDR_FW_VER_BUILD(sc->params.fw_vers));
3281
3282         snprintf(sc->bs_version, sizeof(sc->bs_version), "%u.%u.%u.%u",
3283             G_FW_HDR_FW_VER_MAJOR(sc->params.bs_vers),
3284             G_FW_HDR_FW_VER_MINOR(sc->params.bs_vers),
3285             G_FW_HDR_FW_VER_MICRO(sc->params.bs_vers),
3286             G_FW_HDR_FW_VER_BUILD(sc->params.bs_vers));
3287
3288         snprintf(sc->tp_version, sizeof(sc->tp_version), "%u.%u.%u.%u",
3289             G_FW_HDR_FW_VER_MAJOR(sc->params.tp_vers),
3290             G_FW_HDR_FW_VER_MINOR(sc->params.tp_vers),
3291             G_FW_HDR_FW_VER_MICRO(sc->params.tp_vers),
3292             G_FW_HDR_FW_VER_BUILD(sc->params.tp_vers));
3293
3294         snprintf(sc->er_version, sizeof(sc->er_version), "%u.%u.%u.%u",
3295             G_FW_HDR_FW_VER_MAJOR(sc->params.er_vers),
3296             G_FW_HDR_FW_VER_MINOR(sc->params.er_vers),
3297             G_FW_HDR_FW_VER_MICRO(sc->params.er_vers),
3298             G_FW_HDR_FW_VER_BUILD(sc->params.er_vers));
3299
3300         param[0] = FW_PARAM_DEV(PORTVEC);
3301         param[1] = FW_PARAM_DEV(CCLK);
3302         rc = -t4_query_params(sc, sc->mbox, sc->pf, 0, 2, param, val);
3303         if (rc != 0) {
3304                 device_printf(sc->dev,
3305                     "failed to query parameters (pre_init): %d.\n", rc);
3306                 return (rc);
3307         }
3308
3309         sc->params.portvec = val[0];
3310         sc->params.nports = bitcount32(val[0]);
3311         sc->params.vpd.cclk = val[1];
3312
3313         /* Read device log parameters. */
3314         rc = -t4_init_devlog_params(sc, 1);
3315         if (rc == 0)
3316                 fixup_devlog_params(sc);
3317         else {
3318                 device_printf(sc->dev,
3319                     "failed to get devlog parameters: %d.\n", rc);
3320                 rc = 0; /* devlog isn't critical for device operation */
3321         }
3322
3323         return (rc);
3324 }
3325
3326 /*
3327  * Retrieve various parameters that are of interest to the driver.  The device
3328  * has been initialized by the firmware at this point.
3329  */
3330 static int
3331 get_params__post_init(struct adapter *sc)
3332 {
3333         int rc;
3334         uint32_t param[7], val[7];
3335         struct fw_caps_config_cmd caps;
3336
3337         param[0] = FW_PARAM_PFVF(IQFLINT_START);
3338         param[1] = FW_PARAM_PFVF(EQ_START);
3339         param[2] = FW_PARAM_PFVF(FILTER_START);
3340         param[3] = FW_PARAM_PFVF(FILTER_END);
3341         param[4] = FW_PARAM_PFVF(L2T_START);
3342         param[5] = FW_PARAM_PFVF(L2T_END);
3343         rc = -t4_query_params(sc, sc->mbox, sc->pf, 0, 6, param, val);
3344         if (rc != 0) {
3345                 device_printf(sc->dev,
3346                     "failed to query parameters (post_init): %d.\n", rc);
3347                 return (rc);
3348         }
3349
3350         sc->sge.iq_start = val[0];
3351         sc->sge.eq_start = val[1];
3352         sc->tids.ftid_base = val[2];
3353         sc->tids.nftids = val[3] - val[2] + 1;
3354         sc->params.ftid_min = val[2];
3355         sc->params.ftid_max = val[3];
3356         sc->vres.l2t.start = val[4];
3357         sc->vres.l2t.size = val[5] - val[4] + 1;
3358         KASSERT(sc->vres.l2t.size <= L2T_SIZE,
3359             ("%s: L2 table size (%u) larger than expected (%u)",
3360             __func__, sc->vres.l2t.size, L2T_SIZE));
3361
3362         /* get capabilites */
3363         bzero(&caps, sizeof(caps));
3364         caps.op_to_write = htobe32(V_FW_CMD_OP(FW_CAPS_CONFIG_CMD) |
3365             F_FW_CMD_REQUEST | F_FW_CMD_READ);
3366         caps.cfvalid_to_len16 = htobe32(FW_LEN16(caps));
3367         rc = -t4_wr_mbox(sc, sc->mbox, &caps, sizeof(caps), &caps);
3368         if (rc != 0) {
3369                 device_printf(sc->dev,
3370                     "failed to get card capabilities: %d.\n", rc);
3371                 return (rc);
3372         }
3373
3374 #define READ_CAPS(x) do { \
3375         sc->x = htobe16(caps.x); \
3376 } while (0)
3377         READ_CAPS(nbmcaps);
3378         READ_CAPS(linkcaps);
3379         READ_CAPS(switchcaps);
3380         READ_CAPS(niccaps);
3381         READ_CAPS(toecaps);
3382         READ_CAPS(rdmacaps);
3383         READ_CAPS(cryptocaps);
3384         READ_CAPS(iscsicaps);
3385         READ_CAPS(fcoecaps);
3386
3387         if (sc->niccaps & FW_CAPS_CONFIG_NIC_ETHOFLD) {
3388                 param[0] = FW_PARAM_PFVF(ETHOFLD_START);
3389                 param[1] = FW_PARAM_PFVF(ETHOFLD_END);
3390                 param[2] = FW_PARAM_DEV(FLOWC_BUFFIFO_SZ);
3391                 rc = -t4_query_params(sc, sc->mbox, sc->pf, 0, 3, param, val);
3392                 if (rc != 0) {
3393                         device_printf(sc->dev,
3394                             "failed to query NIC parameters: %d.\n", rc);
3395                         return (rc);
3396                 }
3397                 sc->tids.etid_base = val[0];
3398                 sc->params.etid_min = val[0];
3399                 sc->tids.netids = val[1] - val[0] + 1;
3400                 sc->params.netids = sc->tids.netids;
3401                 sc->params.eo_wr_cred = val[2];
3402                 sc->params.ethoffload = 1;
3403         }
3404
3405         if (sc->toecaps) {
3406                 /* query offload-related parameters */
3407                 param[0] = FW_PARAM_DEV(NTID);
3408                 param[1] = FW_PARAM_PFVF(SERVER_START);
3409                 param[2] = FW_PARAM_PFVF(SERVER_END);
3410                 param[3] = FW_PARAM_PFVF(TDDP_START);
3411                 param[4] = FW_PARAM_PFVF(TDDP_END);
3412                 param[5] = FW_PARAM_DEV(FLOWC_BUFFIFO_SZ);
3413                 rc = -t4_query_params(sc, sc->mbox, sc->pf, 0, 6, param, val);
3414                 if (rc != 0) {
3415                         device_printf(sc->dev,
3416                             "failed to query TOE parameters: %d.\n", rc);
3417                         return (rc);
3418                 }
3419                 sc->tids.ntids = val[0];
3420                 sc->tids.natids = min(sc->tids.ntids / 2, MAX_ATIDS);
3421                 sc->tids.stid_base = val[1];
3422                 sc->tids.nstids = val[2] - val[1] + 1;
3423                 sc->vres.ddp.start = val[3];
3424                 sc->vres.ddp.size = val[4] - val[3] + 1;
3425                 sc->params.ofldq_wr_cred = val[5];
3426                 sc->params.offload = 1;
3427         }
3428         if (sc->rdmacaps) {
3429                 param[0] = FW_PARAM_PFVF(STAG_START);
3430                 param[1] = FW_PARAM_PFVF(STAG_END);
3431                 param[2] = FW_PARAM_PFVF(RQ_START);
3432                 param[3] = FW_PARAM_PFVF(RQ_END);
3433                 param[4] = FW_PARAM_PFVF(PBL_START);
3434                 param[5] = FW_PARAM_PFVF(PBL_END);
3435                 rc = -t4_query_params(sc, sc->mbox, sc->pf, 0, 6, param, val);
3436                 if (rc != 0) {
3437                         device_printf(sc->dev,
3438                             "failed to query RDMA parameters(1): %d.\n", rc);
3439                         return (rc);
3440                 }
3441                 sc->vres.stag.start = val[0];
3442                 sc->vres.stag.size = val[1] - val[0] + 1;
3443                 sc->vres.rq.start = val[2];
3444                 sc->vres.rq.size = val[3] - val[2] + 1;
3445                 sc->vres.pbl.start = val[4];
3446                 sc->vres.pbl.size = val[5] - val[4] + 1;
3447
3448                 param[0] = FW_PARAM_PFVF(SQRQ_START);
3449                 param[1] = FW_PARAM_PFVF(SQRQ_END);
3450                 param[2] = FW_PARAM_PFVF(CQ_START);
3451                 param[3] = FW_PARAM_PFVF(CQ_END);
3452                 param[4] = FW_PARAM_PFVF(OCQ_START);
3453                 param[5] = FW_PARAM_PFVF(OCQ_END);
3454                 rc = -t4_query_params(sc, sc->mbox, sc->pf, 0, 6, param, val);
3455                 if (rc != 0) {
3456                         device_printf(sc->dev,
3457                             "failed to query RDMA parameters(2): %d.\n", rc);
3458                         return (rc);
3459                 }
3460                 sc->vres.qp.start = val[0];
3461                 sc->vres.qp.size = val[1] - val[0] + 1;
3462                 sc->vres.cq.start = val[2];
3463                 sc->vres.cq.size = val[3] - val[2] + 1;
3464                 sc->vres.ocq.start = val[4];
3465                 sc->vres.ocq.size = val[5] - val[4] + 1;
3466         }
3467         if (sc->iscsicaps) {
3468                 param[0] = FW_PARAM_PFVF(ISCSI_START);
3469                 param[1] = FW_PARAM_PFVF(ISCSI_END);
3470                 rc = -t4_query_params(sc, sc->mbox, sc->pf, 0, 2, param, val);
3471                 if (rc != 0) {
3472                         device_printf(sc->dev,
3473                             "failed to query iSCSI parameters: %d.\n", rc);
3474                         return (rc);
3475                 }
3476                 sc->vres.iscsi.start = val[0];
3477                 sc->vres.iscsi.size = val[1] - val[0] + 1;
3478         }
3479
3480         t4_init_sge_params(sc);
3481
3482         /*
3483          * We've got the params we wanted to query via the firmware.  Now grab
3484          * some others directly from the chip.
3485          */
3486         rc = t4_read_chip_settings(sc);
3487
3488         return (rc);
3489 }
3490
3491 static int
3492 set_params__post_init(struct adapter *sc)
3493 {
3494         uint32_t param, val;
3495
3496         /* ask for encapsulated CPLs */
3497         param = FW_PARAM_PFVF(CPLFW4MSG_ENCAP);
3498         val = 1;
3499         (void)t4_set_params(sc, sc->mbox, sc->pf, 0, 1, &param, &val);
3500
3501         return (0);
3502 }
3503
3504 #undef FW_PARAM_PFVF
3505 #undef FW_PARAM_DEV
3506
3507 static void
3508 t4_set_desc(struct adapter *sc)
3509 {
3510         char buf[128];
3511         struct adapter_params *p = &sc->params;
3512
3513         snprintf(buf, sizeof(buf), "Chelsio %s", p->vpd.id);
3514
3515         device_set_desc_copy(sc->dev, buf);
3516 }
3517
3518 static void
3519 build_medialist(struct port_info *pi, struct ifmedia *media)
3520 {
3521         int m;
3522
3523         PORT_LOCK(pi);
3524
3525         ifmedia_removeall(media);
3526
3527         m = IFM_ETHER | IFM_FDX;
3528
3529         switch(pi->port_type) {
3530         case FW_PORT_TYPE_BT_XFI:
3531         case FW_PORT_TYPE_BT_XAUI:
3532                 ifmedia_add(media, m | IFM_10G_T, 0, NULL);
3533                 /* fall through */
3534
3535         case FW_PORT_TYPE_BT_SGMII:
3536                 ifmedia_add(media, m | IFM_1000_T, 0, NULL);
3537                 ifmedia_add(media, m | IFM_100_TX, 0, NULL);
3538                 ifmedia_add(media, IFM_ETHER | IFM_AUTO, 0, NULL);
3539                 ifmedia_set(media, IFM_ETHER | IFM_AUTO);
3540                 break;
3541
3542         case FW_PORT_TYPE_CX4:
3543                 ifmedia_add(media, m | IFM_10G_CX4, 0, NULL);
3544                 ifmedia_set(media, m | IFM_10G_CX4);
3545                 break;
3546
3547         case FW_PORT_TYPE_QSFP_10G:
3548         case FW_PORT_TYPE_SFP:
3549         case FW_PORT_TYPE_FIBER_XFI:
3550         case FW_PORT_TYPE_FIBER_XAUI:
3551                 switch (pi->mod_type) {
3552
3553                 case FW_PORT_MOD_TYPE_LR:
3554                         ifmedia_add(media, m | IFM_10G_LR, 0, NULL);
3555                         ifmedia_set(media, m | IFM_10G_LR);
3556                         break;
3557
3558                 case FW_PORT_MOD_TYPE_SR:
3559                         ifmedia_add(media, m | IFM_10G_SR, 0, NULL);
3560                         ifmedia_set(media, m | IFM_10G_SR);
3561                         break;
3562
3563                 case FW_PORT_MOD_TYPE_LRM:
3564                         ifmedia_add(media, m | IFM_10G_LRM, 0, NULL);
3565                         ifmedia_set(media, m | IFM_10G_LRM);
3566                         break;
3567
3568                 case FW_PORT_MOD_TYPE_TWINAX_PASSIVE:
3569                 case FW_PORT_MOD_TYPE_TWINAX_ACTIVE:
3570                         ifmedia_add(media, m | IFM_10G_TWINAX, 0, NULL);
3571                         ifmedia_set(media, m | IFM_10G_TWINAX);
3572                         break;
3573
3574                 case FW_PORT_MOD_TYPE_NONE:
3575                         m &= ~IFM_FDX;
3576                         ifmedia_add(media, m | IFM_NONE, 0, NULL);
3577                         ifmedia_set(media, m | IFM_NONE);
3578                         break;
3579
3580                 case FW_PORT_MOD_TYPE_NA:
3581                 case FW_PORT_MOD_TYPE_ER:
3582                 default:
3583                         device_printf(pi->dev,
3584                             "unknown port_type (%d), mod_type (%d)\n",
3585                             pi->port_type, pi->mod_type);
3586                         ifmedia_add(media, m | IFM_UNKNOWN, 0, NULL);
3587                         ifmedia_set(media, m | IFM_UNKNOWN);
3588                         break;
3589                 }
3590                 break;
3591
3592         case FW_PORT_TYPE_CR_QSFP:
3593         case FW_PORT_TYPE_SFP28:
3594         case FW_PORT_TYPE_KR_SFP28:
3595                 switch (pi->mod_type) {
3596
3597                 case FW_PORT_MOD_TYPE_SR:
3598                         ifmedia_add(media, m | IFM_25G_SR, 0, NULL);
3599                         ifmedia_set(media, m | IFM_25G_SR);
3600                         break;
3601
3602                 case FW_PORT_MOD_TYPE_TWINAX_PASSIVE:
3603                 case FW_PORT_MOD_TYPE_TWINAX_ACTIVE:
3604                         ifmedia_add(media, m | IFM_25G_CR, 0, NULL);
3605                         ifmedia_set(media, m | IFM_25G_CR);
3606                         break;
3607
3608                 case FW_PORT_MOD_TYPE_NONE:
3609                         m &= ~IFM_FDX;
3610                         ifmedia_add(media, m | IFM_NONE, 0, NULL);
3611                         ifmedia_set(media, m | IFM_NONE);
3612                         break;
3613
3614                 default:
3615                         device_printf(pi->dev,
3616                             "unknown port_type (%d), mod_type (%d)\n",
3617                             pi->port_type, pi->mod_type);
3618                         ifmedia_add(media, m | IFM_UNKNOWN, 0, NULL);
3619                         ifmedia_set(media, m | IFM_UNKNOWN);
3620                         break;
3621                 }
3622                 break;
3623
3624         case FW_PORT_TYPE_QSFP:
3625                 switch (pi->mod_type) {
3626
3627                 case FW_PORT_MOD_TYPE_LR:
3628                         ifmedia_add(media, m | IFM_40G_LR4, 0, NULL);
3629                         ifmedia_set(media, m | IFM_40G_LR4);
3630                         break;
3631
3632                 case FW_PORT_MOD_TYPE_SR:
3633                         ifmedia_add(media, m | IFM_40G_SR4, 0, NULL);
3634                         ifmedia_set(media, m | IFM_40G_SR4);
3635                         break;
3636
3637                 case FW_PORT_MOD_TYPE_TWINAX_PASSIVE:
3638                 case FW_PORT_MOD_TYPE_TWINAX_ACTIVE:
3639                         ifmedia_add(media, m | IFM_40G_CR4, 0, NULL);
3640                         ifmedia_set(media, m | IFM_40G_CR4);
3641                         break;
3642
3643                 case FW_PORT_MOD_TYPE_NONE:
3644                         m &= ~IFM_FDX;
3645                         ifmedia_add(media, m | IFM_NONE, 0, NULL);
3646                         ifmedia_set(media, m | IFM_NONE);
3647                         break;
3648
3649                 default:
3650                         device_printf(pi->dev,
3651                             "unknown port_type (%d), mod_type (%d)\n",
3652                             pi->port_type, pi->mod_type);
3653                         ifmedia_add(media, m | IFM_UNKNOWN, 0, NULL);
3654                         ifmedia_set(media, m | IFM_UNKNOWN);
3655                         break;
3656                 }
3657                 break;
3658
3659         case FW_PORT_TYPE_KR4_100G:
3660         case FW_PORT_TYPE_CR4_QSFP:
3661                 switch (pi->mod_type) {
3662
3663                 case FW_PORT_MOD_TYPE_LR:
3664                         ifmedia_add(media, m | IFM_100G_LR4, 0, NULL);
3665                         ifmedia_set(media, m | IFM_100G_LR4);
3666                         break;
3667
3668                 case FW_PORT_MOD_TYPE_SR:
3669                         ifmedia_add(media, m | IFM_100G_SR4, 0, NULL);
3670                         ifmedia_set(media, m | IFM_100G_SR4);
3671                         break;
3672
3673                 case FW_PORT_MOD_TYPE_TWINAX_PASSIVE:
3674                 case FW_PORT_MOD_TYPE_TWINAX_ACTIVE:
3675                         ifmedia_add(media, m | IFM_100G_CR4, 0, NULL);
3676                         ifmedia_set(media, m | IFM_100G_CR4);
3677                         break;
3678
3679                 case FW_PORT_MOD_TYPE_NONE:
3680                         m &= ~IFM_FDX;
3681                         ifmedia_add(media, m | IFM_NONE, 0, NULL);
3682                         ifmedia_set(media, m | IFM_NONE);
3683                         break;
3684
3685                 default:
3686                         device_printf(pi->dev,
3687                             "unknown port_type (%d), mod_type (%d)\n",
3688                             pi->port_type, pi->mod_type);
3689                         ifmedia_add(media, m | IFM_UNKNOWN, 0, NULL);
3690                         ifmedia_set(media, m | IFM_UNKNOWN);
3691                         break;
3692                 }
3693                 break;
3694
3695         default:
3696                 device_printf(pi->dev,
3697                     "unknown port_type (%d), mod_type (%d)\n", pi->port_type,
3698                     pi->mod_type);
3699                 ifmedia_add(media, m | IFM_UNKNOWN, 0, NULL);
3700                 ifmedia_set(media, m | IFM_UNKNOWN);
3701                 break;
3702         }
3703
3704         PORT_UNLOCK(pi);
3705 }
3706
3707 #define FW_MAC_EXACT_CHUNK      7
3708
3709 /*
3710  * Program the port's XGMAC based on parameters in ifnet.  The caller also
3711  * indicates which parameters should be programmed (the rest are left alone).
3712  */
3713 int
3714 update_mac_settings(struct ifnet *ifp, int flags)
3715 {
3716         int rc = 0;
3717         struct vi_info *vi = ifp->if_softc;
3718         struct port_info *pi = vi->pi;
3719         struct adapter *sc = pi->adapter;
3720         int mtu = -1, promisc = -1, allmulti = -1, vlanex = -1;
3721
3722         ASSERT_SYNCHRONIZED_OP(sc);
3723         KASSERT(flags, ("%s: not told what to update.", __func__));
3724
3725         if (flags & XGMAC_MTU)
3726                 mtu = ifp->if_mtu;
3727
3728         if (flags & XGMAC_PROMISC)
3729                 promisc = ifp->if_flags & IFF_PROMISC ? 1 : 0;
3730
3731         if (flags & XGMAC_ALLMULTI)
3732                 allmulti = ifp->if_flags & IFF_ALLMULTI ? 1 : 0;
3733
3734         if (flags & XGMAC_VLANEX)
3735                 vlanex = ifp->if_capenable & IFCAP_VLAN_HWTAGGING ? 1 : 0;
3736
3737         if (flags & (XGMAC_MTU|XGMAC_PROMISC|XGMAC_ALLMULTI|XGMAC_VLANEX)) {
3738                 rc = -t4_set_rxmode(sc, sc->mbox, vi->viid, mtu, promisc,
3739                     allmulti, 1, vlanex, false);
3740                 if (rc) {
3741                         if_printf(ifp, "set_rxmode (%x) failed: %d\n", flags,
3742                             rc);
3743                         return (rc);
3744                 }
3745         }
3746
3747         if (flags & XGMAC_UCADDR) {
3748                 uint8_t ucaddr[ETHER_ADDR_LEN];
3749
3750                 bcopy(IF_LLADDR(ifp), ucaddr, sizeof(ucaddr));
3751                 rc = t4_change_mac(sc, sc->mbox, vi->viid, vi->xact_addr_filt,
3752                     ucaddr, true, true);
3753                 if (rc < 0) {
3754                         rc = -rc;
3755                         if_printf(ifp, "change_mac failed: %d\n", rc);
3756                         return (rc);
3757                 } else {
3758                         vi->xact_addr_filt = rc;
3759                         rc = 0;
3760                 }
3761         }
3762
3763         if (flags & XGMAC_MCADDRS) {
3764                 const uint8_t *mcaddr[FW_MAC_EXACT_CHUNK];
3765                 int del = 1;
3766                 uint64_t hash = 0;
3767                 struct ifmultiaddr *ifma;
3768                 int i = 0, j;
3769
3770                 if_maddr_rlock(ifp);
3771                 TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
3772                         if (ifma->ifma_addr->sa_family != AF_LINK)
3773                                 continue;
3774                         mcaddr[i] =
3775                             LLADDR((struct sockaddr_dl *)ifma->ifma_addr);
3776                         MPASS(ETHER_IS_MULTICAST(mcaddr[i]));
3777                         i++;
3778
3779                         if (i == FW_MAC_EXACT_CHUNK) {
3780                                 rc = t4_alloc_mac_filt(sc, sc->mbox, vi->viid,
3781                                     del, i, mcaddr, NULL, &hash, 0);
3782                                 if (rc < 0) {
3783                                         rc = -rc;
3784                                         for (j = 0; j < i; j++) {
3785                                                 if_printf(ifp,
3786                                                     "failed to add mc address"
3787                                                     " %02x:%02x:%02x:"
3788                                                     "%02x:%02x:%02x rc=%d\n",
3789                                                     mcaddr[j][0], mcaddr[j][1],
3790                                                     mcaddr[j][2], mcaddr[j][3],
3791                                                     mcaddr[j][4], mcaddr[j][5],
3792                                                     rc);
3793                                         }
3794                                         goto mcfail;
3795                                 }
3796                                 del = 0;
3797                                 i = 0;
3798                         }
3799                 }
3800                 if (i > 0) {
3801                         rc = t4_alloc_mac_filt(sc, sc->mbox, vi->viid, del, i,
3802                             mcaddr, NULL, &hash, 0);
3803                         if (rc < 0) {
3804                                 rc = -rc;
3805                                 for (j = 0; j < i; j++) {
3806                                         if_printf(ifp,
3807                                             "failed to add mc address"
3808                                             " %02x:%02x:%02x:"
3809                                             "%02x:%02x:%02x rc=%d\n",
3810                                             mcaddr[j][0], mcaddr[j][1],
3811                                             mcaddr[j][2], mcaddr[j][3],
3812                                             mcaddr[j][4], mcaddr[j][5],
3813                                             rc);
3814                                 }
3815                                 goto mcfail;
3816                         }
3817                 }
3818
3819                 rc = -t4_set_addr_hash(sc, sc->mbox, vi->viid, 0, hash, 0);
3820                 if (rc != 0)
3821                         if_printf(ifp, "failed to set mc address hash: %d", rc);
3822 mcfail:
3823                 if_maddr_runlock(ifp);
3824         }
3825
3826         return (rc);
3827 }
3828
3829 /*
3830  * {begin|end}_synchronized_op must be called from the same thread.
3831  */
3832 int
3833 begin_synchronized_op(struct adapter *sc, struct vi_info *vi, int flags,
3834     char *wmesg)
3835 {
3836         int rc, pri;
3837
3838 #ifdef WITNESS
3839         /* the caller thinks it's ok to sleep, but is it really? */
3840         if (flags & SLEEP_OK)
3841                 WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK, NULL,
3842                     "begin_synchronized_op");
3843 #endif
3844
3845         if (INTR_OK)
3846                 pri = PCATCH;
3847         else
3848                 pri = 0;
3849
3850         ADAPTER_LOCK(sc);
3851         for (;;) {
3852
3853                 if (vi && IS_DOOMED(vi)) {
3854                         rc = ENXIO;
3855                         goto done;
3856                 }
3857
3858                 if (!IS_BUSY(sc)) {
3859                         rc = 0;
3860                         break;
3861                 }
3862
3863                 if (!(flags & SLEEP_OK)) {
3864                         rc = EBUSY;
3865                         goto done;
3866                 }
3867
3868                 if (mtx_sleep(&sc->flags, &sc->sc_lock, pri, wmesg, 0)) {
3869                         rc = EINTR;
3870                         goto done;
3871                 }
3872         }
3873
3874         KASSERT(!IS_BUSY(sc), ("%s: controller busy.", __func__));
3875         SET_BUSY(sc);
3876 #ifdef INVARIANTS
3877         sc->last_op = wmesg;
3878         sc->last_op_thr = curthread;
3879         sc->last_op_flags = flags;
3880 #endif
3881
3882 done:
3883         if (!(flags & HOLD_LOCK) || rc)
3884                 ADAPTER_UNLOCK(sc);
3885
3886         return (rc);
3887 }
3888
3889 /*
3890  * Tell if_ioctl and if_init that the VI is going away.  This is
3891  * special variant of begin_synchronized_op and must be paired with a
3892  * call to end_synchronized_op.
3893  */
3894 void
3895 doom_vi(struct adapter *sc, struct vi_info *vi)
3896 {
3897
3898         ADAPTER_LOCK(sc);
3899         SET_DOOMED(vi);
3900         wakeup(&sc->flags);
3901         while (IS_BUSY(sc))
3902                 mtx_sleep(&sc->flags, &sc->sc_lock, 0, "t4detach", 0);
3903         SET_BUSY(sc);
3904 #ifdef INVARIANTS
3905         sc->last_op = "t4detach";
3906         sc->last_op_thr = curthread;
3907         sc->last_op_flags = 0;
3908 #endif
3909         ADAPTER_UNLOCK(sc);
3910 }
3911
3912 /*
3913  * {begin|end}_synchronized_op must be called from the same thread.
3914  */
3915 void
3916 end_synchronized_op(struct adapter *sc, int flags)
3917 {
3918
3919         if (flags & LOCK_HELD)
3920                 ADAPTER_LOCK_ASSERT_OWNED(sc);
3921         else
3922                 ADAPTER_LOCK(sc);
3923
3924         KASSERT(IS_BUSY(sc), ("%s: controller not busy.", __func__));
3925         CLR_BUSY(sc);
3926         wakeup(&sc->flags);
3927         ADAPTER_UNLOCK(sc);
3928 }
3929
3930 static int
3931 cxgbe_init_synchronized(struct vi_info *vi)
3932 {
3933         struct port_info *pi = vi->pi;
3934         struct adapter *sc = pi->adapter;
3935         struct ifnet *ifp = vi->ifp;
3936         int rc = 0, i;
3937         struct sge_txq *txq;
3938
3939         ASSERT_SYNCHRONIZED_OP(sc);
3940
3941         if (ifp->if_drv_flags & IFF_DRV_RUNNING)
3942                 return (0);     /* already running */
3943
3944         if (!(sc->flags & FULL_INIT_DONE) &&
3945             ((rc = adapter_full_init(sc)) != 0))
3946                 return (rc);    /* error message displayed already */
3947
3948         if (!(vi->flags & VI_INIT_DONE) &&
3949             ((rc = vi_full_init(vi)) != 0))
3950                 return (rc); /* error message displayed already */
3951
3952         rc = update_mac_settings(ifp, XGMAC_ALL);
3953         if (rc)
3954                 goto done;      /* error message displayed already */
3955
3956         rc = -t4_enable_vi(sc, sc->mbox, vi->viid, true, true);
3957         if (rc != 0) {
3958                 if_printf(ifp, "enable_vi failed: %d\n", rc);
3959                 goto done;
3960         }
3961
3962         /*
3963          * Can't fail from this point onwards.  Review cxgbe_uninit_synchronized
3964          * if this changes.
3965          */
3966
3967         for_each_txq(vi, i, txq) {
3968                 TXQ_LOCK(txq);
3969                 txq->eq.flags |= EQ_ENABLED;
3970                 TXQ_UNLOCK(txq);
3971         }
3972
3973         /*
3974          * The first iq of the first port to come up is used for tracing.
3975          */
3976         if (sc->traceq < 0 && IS_MAIN_VI(vi)) {
3977                 sc->traceq = sc->sge.rxq[vi->first_rxq].iq.abs_id;
3978                 t4_write_reg(sc, is_t4(sc) ?  A_MPS_TRC_RSS_CONTROL :
3979                     A_MPS_T5_TRC_RSS_CONTROL, V_RSSCONTROL(pi->tx_chan) |
3980                     V_QUEUENUMBER(sc->traceq));
3981                 pi->flags |= HAS_TRACEQ;
3982         }
3983
3984         /* all ok */
3985         PORT_LOCK(pi);
3986         ifp->if_drv_flags |= IFF_DRV_RUNNING;
3987         pi->up_vis++;
3988
3989         if (pi->nvi > 1 || sc->flags & IS_VF)
3990                 callout_reset(&vi->tick, hz, vi_tick, vi);
3991         else
3992                 callout_reset(&pi->tick, hz, cxgbe_tick, pi);
3993         PORT_UNLOCK(pi);
3994 done:
3995         if (rc != 0)
3996                 cxgbe_uninit_synchronized(vi);
3997
3998         return (rc);
3999 }
4000
4001 /*
4002  * Idempotent.
4003  */
4004 static int
4005 cxgbe_uninit_synchronized(struct vi_info *vi)
4006 {
4007         struct port_info *pi = vi->pi;
4008         struct adapter *sc = pi->adapter;
4009         struct ifnet *ifp = vi->ifp;
4010         int rc, i;
4011         struct sge_txq *txq;
4012
4013         ASSERT_SYNCHRONIZED_OP(sc);
4014
4015         if (!(vi->flags & VI_INIT_DONE)) {
4016                 KASSERT(!(ifp->if_drv_flags & IFF_DRV_RUNNING),
4017                     ("uninited VI is running"));
4018                 return (0);
4019         }
4020
4021         /*
4022          * Disable the VI so that all its data in either direction is discarded
4023          * by the MPS.  Leave everything else (the queues, interrupts, and 1Hz
4024          * tick) intact as the TP can deliver negative advice or data that it's
4025          * holding in its RAM (for an offloaded connection) even after the VI is
4026          * disabled.
4027          */
4028         rc = -t4_enable_vi(sc, sc->mbox, vi->viid, false, false);
4029         if (rc) {
4030                 if_printf(ifp, "disable_vi failed: %d\n", rc);
4031                 return (rc);
4032         }
4033
4034         for_each_txq(vi, i, txq) {
4035                 TXQ_LOCK(txq);
4036                 txq->eq.flags &= ~EQ_ENABLED;
4037                 TXQ_UNLOCK(txq);
4038         }
4039
4040         PORT_LOCK(pi);
4041         if (pi->nvi > 1 || sc->flags & IS_VF)
4042                 callout_stop(&vi->tick);
4043         else
4044                 callout_stop(&pi->tick);
4045         if (!(ifp->if_drv_flags & IFF_DRV_RUNNING)) {
4046                 PORT_UNLOCK(pi);
4047                 return (0);
4048         }
4049         ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
4050         pi->up_vis--;
4051         if (pi->up_vis > 0) {
4052                 PORT_UNLOCK(pi);
4053                 return (0);
4054         }
4055         PORT_UNLOCK(pi);
4056
4057         pi->link_cfg.link_ok = 0;
4058         pi->link_cfg.speed = 0;
4059         pi->linkdnrc = -1;
4060         t4_os_link_changed(sc, pi->port_id, 0, -1);
4061
4062         return (0);
4063 }
4064
4065 /*
4066  * It is ok for this function to fail midway and return right away.  t4_detach
4067  * will walk the entire sc->irq list and clean up whatever is valid.
4068  */
4069 int
4070 t4_setup_intr_handlers(struct adapter *sc)
4071 {
4072         int rc, rid, p, q, v;
4073         char s[8];
4074         struct irq *irq;
4075         struct port_info *pi;
4076         struct vi_info *vi;
4077         struct sge *sge = &sc->sge;
4078         struct sge_rxq *rxq;
4079 #ifdef TCP_OFFLOAD
4080         struct sge_ofld_rxq *ofld_rxq;
4081 #endif
4082 #ifdef DEV_NETMAP
4083         struct sge_nm_rxq *nm_rxq;
4084 #endif
4085 #ifdef RSS
4086         int nbuckets = rss_getnumbuckets();
4087 #endif
4088
4089         /*
4090          * Setup interrupts.
4091          */
4092         irq = &sc->irq[0];
4093         rid = sc->intr_type == INTR_INTX ? 0 : 1;
4094         if (sc->intr_count == 1)
4095                 return (t4_alloc_irq(sc, irq, rid, t4_intr_all, sc, "all"));
4096
4097         /* Multiple interrupts. */
4098         if (sc->flags & IS_VF)
4099                 KASSERT(sc->intr_count >= T4VF_EXTRA_INTR + sc->params.nports,
4100                     ("%s: too few intr.", __func__));
4101         else
4102                 KASSERT(sc->intr_count >= T4_EXTRA_INTR + sc->params.nports,
4103                     ("%s: too few intr.", __func__));
4104
4105         /* The first one is always error intr on PFs */
4106         if (!(sc->flags & IS_VF)) {
4107                 rc = t4_alloc_irq(sc, irq, rid, t4_intr_err, sc, "err");
4108                 if (rc != 0)
4109                         return (rc);
4110                 irq++;
4111                 rid++;
4112         }
4113
4114         /* The second one is always the firmware event queue (first on VFs) */
4115         rc = t4_alloc_irq(sc, irq, rid, t4_intr_evt, &sge->fwq, "evt");
4116         if (rc != 0)
4117                 return (rc);
4118         irq++;
4119         rid++;
4120
4121         for_each_port(sc, p) {
4122                 pi = sc->port[p];
4123                 for_each_vi(pi, v, vi) {
4124                         vi->first_intr = rid - 1;
4125
4126                         if (vi->nnmrxq > 0) {
4127                                 int n = max(vi->nrxq, vi->nnmrxq);
4128
4129                                 MPASS(vi->flags & INTR_RXQ);
4130
4131                                 rxq = &sge->rxq[vi->first_rxq];
4132 #ifdef DEV_NETMAP
4133                                 nm_rxq = &sge->nm_rxq[vi->first_nm_rxq];
4134 #endif
4135                                 for (q = 0; q < n; q++) {
4136                                         snprintf(s, sizeof(s), "%x%c%x", p,
4137                                             'a' + v, q);
4138                                         if (q < vi->nrxq)
4139                                                 irq->rxq = rxq++;
4140 #ifdef DEV_NETMAP
4141                                         if (q < vi->nnmrxq)
4142                                                 irq->nm_rxq = nm_rxq++;
4143 #endif
4144                                         rc = t4_alloc_irq(sc, irq, rid,
4145                                             t4_vi_intr, irq, s);
4146                                         if (rc != 0)
4147                                                 return (rc);
4148                                         irq++;
4149                                         rid++;
4150                                         vi->nintr++;
4151                                 }
4152                         } else if (vi->flags & INTR_RXQ) {
4153                                 for_each_rxq(vi, q, rxq) {
4154                                         snprintf(s, sizeof(s), "%x%c%x", p,
4155                                             'a' + v, q);
4156                                         rc = t4_alloc_irq(sc, irq, rid,
4157                                             t4_intr, rxq, s);
4158                                         if (rc != 0)
4159                                                 return (rc);
4160 #ifdef RSS
4161                                         bus_bind_intr(sc->dev, irq->res,
4162                                             rss_getcpu(q % nbuckets));
4163 #endif
4164                                         irq++;
4165                                         rid++;
4166                                         vi->nintr++;
4167                                 }
4168                         }
4169 #ifdef TCP_OFFLOAD
4170                         if (vi->flags & INTR_OFLD_RXQ) {
4171                                 for_each_ofld_rxq(vi, q, ofld_rxq) {
4172                                         snprintf(s, sizeof(s), "%x%c%x", p,
4173                                             'A' + v, q);
4174                                         rc = t4_alloc_irq(sc, irq, rid,
4175                                             t4_intr, ofld_rxq, s);
4176                                         if (rc != 0)
4177                                                 return (rc);
4178                                         irq++;
4179                                         rid++;
4180                                         vi->nintr++;
4181                                 }
4182                         }
4183 #endif
4184                 }
4185         }
4186         MPASS(irq == &sc->irq[sc->intr_count]);
4187
4188         return (0);
4189 }
4190
4191 int
4192 adapter_full_init(struct adapter *sc)
4193 {
4194         int rc, i;
4195
4196         ASSERT_SYNCHRONIZED_OP(sc);
4197         ADAPTER_LOCK_ASSERT_NOTOWNED(sc);
4198         KASSERT((sc->flags & FULL_INIT_DONE) == 0,
4199             ("%s: FULL_INIT_DONE already", __func__));
4200
4201         /*
4202          * queues that belong to the adapter (not any particular port).
4203          */
4204         rc = t4_setup_adapter_queues(sc);
4205         if (rc != 0)
4206                 goto done;
4207
4208         for (i = 0; i < nitems(sc->tq); i++) {
4209                 sc->tq[i] = taskqueue_create("t4 taskq", M_NOWAIT,
4210                     taskqueue_thread_enqueue, &sc->tq[i]);
4211                 if (sc->tq[i] == NULL) {
4212                         device_printf(sc->dev,
4213                             "failed to allocate task queue %d\n", i);
4214                         rc = ENOMEM;
4215                         goto done;
4216                 }
4217                 taskqueue_start_threads(&sc->tq[i], 1, PI_NET, "%s tq%d",
4218                     device_get_nameunit(sc->dev), i);
4219         }
4220
4221         if (!(sc->flags & IS_VF))
4222                 t4_intr_enable(sc);
4223         sc->flags |= FULL_INIT_DONE;
4224 done:
4225         if (rc != 0)
4226                 adapter_full_uninit(sc);
4227
4228         return (rc);
4229 }
4230
4231 int
4232 adapter_full_uninit(struct adapter *sc)
4233 {
4234         int i;
4235
4236         ADAPTER_LOCK_ASSERT_NOTOWNED(sc);
4237
4238         t4_teardown_adapter_queues(sc);
4239
4240         for (i = 0; i < nitems(sc->tq) && sc->tq[i]; i++) {
4241                 taskqueue_free(sc->tq[i]);
4242                 sc->tq[i] = NULL;
4243         }
4244
4245         sc->flags &= ~FULL_INIT_DONE;
4246
4247         return (0);
4248 }
4249
4250 #ifdef RSS
4251 #define SUPPORTED_RSS_HASHTYPES (RSS_HASHTYPE_RSS_IPV4 | \
4252     RSS_HASHTYPE_RSS_TCP_IPV4 | RSS_HASHTYPE_RSS_IPV6 | \
4253     RSS_HASHTYPE_RSS_TCP_IPV6 | RSS_HASHTYPE_RSS_UDP_IPV4 | \
4254     RSS_HASHTYPE_RSS_UDP_IPV6)
4255
4256 /* Translates kernel hash types to hardware. */
4257 static int
4258 hashconfig_to_hashen(int hashconfig)
4259 {
4260         int hashen = 0;
4261
4262         if (hashconfig & RSS_HASHTYPE_RSS_IPV4)
4263                 hashen |= F_FW_RSS_VI_CONFIG_CMD_IP4TWOTUPEN;
4264         if (hashconfig & RSS_HASHTYPE_RSS_IPV6)
4265                 hashen |= F_FW_RSS_VI_CONFIG_CMD_IP6TWOTUPEN;
4266         if (hashconfig & RSS_HASHTYPE_RSS_UDP_IPV4) {
4267                 hashen |= F_FW_RSS_VI_CONFIG_CMD_UDPEN |
4268                     F_FW_RSS_VI_CONFIG_CMD_IP4FOURTUPEN;
4269         }
4270         if (hashconfig & RSS_HASHTYPE_RSS_UDP_IPV6) {
4271                 hashen |= F_FW_RSS_VI_CONFIG_CMD_UDPEN |
4272                     F_FW_RSS_VI_CONFIG_CMD_IP6FOURTUPEN;
4273         }
4274         if (hashconfig & RSS_HASHTYPE_RSS_TCP_IPV4)
4275                 hashen |= F_FW_RSS_VI_CONFIG_CMD_IP4FOURTUPEN;
4276         if (hashconfig & RSS_HASHTYPE_RSS_TCP_IPV6)
4277                 hashen |= F_FW_RSS_VI_CONFIG_CMD_IP6FOURTUPEN;
4278
4279         return (hashen);
4280 }
4281
4282 /* Translates hardware hash types to kernel. */
4283 static int
4284 hashen_to_hashconfig(int hashen)
4285 {
4286         int hashconfig = 0;
4287
4288         if (hashen & F_FW_RSS_VI_CONFIG_CMD_UDPEN) {
4289                 /*
4290                  * If UDP hashing was enabled it must have been enabled for
4291                  * either IPv4 or IPv6 (inclusive or).  Enabling UDP without
4292                  * enabling any 4-tuple hash is nonsense configuration.
4293                  */
4294                 MPASS(hashen & (F_FW_RSS_VI_CONFIG_CMD_IP4FOURTUPEN |
4295                     F_FW_RSS_VI_CONFIG_CMD_IP6FOURTUPEN));
4296
4297                 if (hashen & F_FW_RSS_VI_CONFIG_CMD_IP4FOURTUPEN)
4298                         hashconfig |= RSS_HASHTYPE_RSS_UDP_IPV4;
4299                 if (hashen & F_FW_RSS_VI_CONFIG_CMD_IP6FOURTUPEN)
4300                         hashconfig |= RSS_HASHTYPE_RSS_UDP_IPV6;
4301         }
4302         if (hashen & F_FW_RSS_VI_CONFIG_CMD_IP4FOURTUPEN)
4303                 hashconfig |= RSS_HASHTYPE_RSS_TCP_IPV4;
4304         if (hashen & F_FW_RSS_VI_CONFIG_CMD_IP6FOURTUPEN)
4305                 hashconfig |= RSS_HASHTYPE_RSS_TCP_IPV6;
4306         if (hashen & F_FW_RSS_VI_CONFIG_CMD_IP4TWOTUPEN)
4307                 hashconfig |= RSS_HASHTYPE_RSS_IPV4;
4308         if (hashen & F_FW_RSS_VI_CONFIG_CMD_IP6TWOTUPEN)
4309                 hashconfig |= RSS_HASHTYPE_RSS_IPV6;
4310
4311         return (hashconfig);
4312 }
4313 #endif
4314
4315 int
4316 vi_full_init(struct vi_info *vi)
4317 {
4318         struct adapter *sc = vi->pi->adapter;
4319         struct ifnet *ifp = vi->ifp;
4320         uint16_t *rss;
4321         struct sge_rxq *rxq;
4322         int rc, i, j, hashen;
4323 #ifdef RSS
4324         int nbuckets = rss_getnumbuckets();
4325         int hashconfig = rss_gethashconfig();
4326         int extra;
4327         uint32_t raw_rss_key[RSS_KEYSIZE / sizeof(uint32_t)];
4328         uint32_t rss_key[RSS_KEYSIZE / sizeof(uint32_t)];
4329 #endif
4330
4331         ASSERT_SYNCHRONIZED_OP(sc);
4332         KASSERT((vi->flags & VI_INIT_DONE) == 0,
4333             ("%s: VI_INIT_DONE already", __func__));
4334
4335         sysctl_ctx_init(&vi->ctx);
4336         vi->flags |= VI_SYSCTL_CTX;
4337
4338         /*
4339          * Allocate tx/rx/fl queues for this VI.
4340          */
4341         rc = t4_setup_vi_queues(vi);
4342         if (rc != 0)
4343                 goto done;      /* error message displayed already */
4344
4345         /*
4346          * Setup RSS for this VI.  Save a copy of the RSS table for later use.
4347          */
4348         if (vi->nrxq > vi->rss_size) {
4349                 if_printf(ifp, "nrxq (%d) > hw RSS table size (%d); "
4350                     "some queues will never receive traffic.\n", vi->nrxq,
4351                     vi->rss_size);
4352         } else if (vi->rss_size % vi->nrxq) {
4353                 if_printf(ifp, "nrxq (%d), hw RSS table size (%d); "
4354                     "expect uneven traffic distribution.\n", vi->nrxq,
4355                     vi->rss_size);
4356         }
4357 #ifdef RSS
4358         MPASS(RSS_KEYSIZE == 40);
4359         if (vi->nrxq != nbuckets) {
4360                 if_printf(ifp, "nrxq (%d) != kernel RSS buckets (%d);"
4361                     "performance will be impacted.\n", vi->nrxq, nbuckets);
4362         }
4363
4364         rss_getkey((void *)&raw_rss_key[0]);
4365         for (i = 0; i < nitems(rss_key); i++) {
4366                 rss_key[i] = htobe32(raw_rss_key[nitems(rss_key) - 1 - i]);
4367         }
4368         t4_write_rss_key(sc, &rss_key[0], -1);
4369 #endif
4370         rss = malloc(vi->rss_size * sizeof (*rss), M_CXGBE, M_ZERO | M_WAITOK);
4371         for (i = 0; i < vi->rss_size;) {
4372 #ifdef RSS
4373                 j = rss_get_indirection_to_bucket(i);
4374                 j %= vi->nrxq;
4375                 rxq = &sc->sge.rxq[vi->first_rxq + j];
4376                 rss[i++] = rxq->iq.abs_id;
4377 #else
4378                 for_each_rxq(vi, j, rxq) {
4379                         rss[i++] = rxq->iq.abs_id;
4380                         if (i == vi->rss_size)
4381                                 break;
4382                 }
4383 #endif
4384         }
4385
4386         rc = -t4_config_rss_range(sc, sc->mbox, vi->viid, 0, vi->rss_size, rss,
4387             vi->rss_size);
4388         if (rc != 0) {
4389                 if_printf(ifp, "rss_config failed: %d\n", rc);
4390                 goto done;
4391         }
4392
4393 #ifdef RSS
4394         hashen = hashconfig_to_hashen(hashconfig);
4395
4396         /*
4397          * We may have had to enable some hashes even though the global config
4398          * wants them disabled.  This is a potential problem that must be
4399          * reported to the user.
4400          */
4401         extra = hashen_to_hashconfig(hashen) ^ hashconfig;
4402
4403         /*
4404          * If we consider only the supported hash types, then the enabled hashes
4405          * are a superset of the requested hashes.  In other words, there cannot
4406          * be any supported hash that was requested but not enabled, but there
4407          * can be hashes that were not requested but had to be enabled.
4408          */
4409         extra &= SUPPORTED_RSS_HASHTYPES;
4410         MPASS((extra & hashconfig) == 0);
4411
4412         if (extra) {
4413                 if_printf(ifp,
4414                     "global RSS config (0x%x) cannot be accommodated.\n",
4415                     hashconfig);
4416         }
4417         if (extra & RSS_HASHTYPE_RSS_IPV4)
4418                 if_printf(ifp, "IPv4 2-tuple hashing forced on.\n");
4419         if (extra & RSS_HASHTYPE_RSS_TCP_IPV4)
4420                 if_printf(ifp, "TCP/IPv4 4-tuple hashing forced on.\n");
4421         if (extra & RSS_HASHTYPE_RSS_IPV6)
4422                 if_printf(ifp, "IPv6 2-tuple hashing forced on.\n");
4423         if (extra & RSS_HASHTYPE_RSS_TCP_IPV6)
4424                 if_printf(ifp, "TCP/IPv6 4-tuple hashing forced on.\n");
4425         if (extra & RSS_HASHTYPE_RSS_UDP_IPV4)
4426                 if_printf(ifp, "UDP/IPv4 4-tuple hashing forced on.\n");
4427         if (extra & RSS_HASHTYPE_RSS_UDP_IPV6)
4428                 if_printf(ifp, "UDP/IPv6 4-tuple hashing forced on.\n");
4429 #else
4430         hashen = F_FW_RSS_VI_CONFIG_CMD_IP6FOURTUPEN |
4431             F_FW_RSS_VI_CONFIG_CMD_IP6TWOTUPEN |
4432             F_FW_RSS_VI_CONFIG_CMD_IP4FOURTUPEN |
4433             F_FW_RSS_VI_CONFIG_CMD_IP4TWOTUPEN | F_FW_RSS_VI_CONFIG_CMD_UDPEN;
4434 #endif
4435         rc = -t4_config_vi_rss(sc, sc->mbox, vi->viid, hashen, rss[0], 0, 0);
4436         if (rc != 0) {
4437                 if_printf(ifp, "rss hash/defaultq config failed: %d\n", rc);
4438                 goto done;
4439         }
4440
4441         vi->rss = rss;
4442         vi->flags |= VI_INIT_DONE;
4443 done:
4444         if (rc != 0)
4445                 vi_full_uninit(vi);
4446
4447         return (rc);
4448 }
4449
4450 /*
4451  * Idempotent.
4452  */
4453 int
4454 vi_full_uninit(struct vi_info *vi)
4455 {
4456         struct port_info *pi = vi->pi;
4457         struct adapter *sc = pi->adapter;
4458         int i;
4459         struct sge_rxq *rxq;
4460         struct sge_txq *txq;
4461 #ifdef TCP_OFFLOAD
4462         struct sge_ofld_rxq *ofld_rxq;
4463         struct sge_wrq *ofld_txq;
4464 #endif
4465
4466         if (vi->flags & VI_INIT_DONE) {
4467
4468                 /* Need to quiesce queues.  */
4469
4470                 /* XXX: Only for the first VI? */
4471                 if (IS_MAIN_VI(vi) && !(sc->flags & IS_VF))
4472                         quiesce_wrq(sc, &sc->sge.ctrlq[pi->port_id]);
4473
4474                 for_each_txq(vi, i, txq) {
4475                         quiesce_txq(sc, txq);
4476                 }
4477
4478 #ifdef TCP_OFFLOAD
4479                 for_each_ofld_txq(vi, i, ofld_txq) {
4480                         quiesce_wrq(sc, ofld_txq);
4481                 }
4482 #endif
4483
4484                 for_each_rxq(vi, i, rxq) {
4485                         quiesce_iq(sc, &rxq->iq);
4486                         quiesce_fl(sc, &rxq->fl);
4487                 }
4488
4489 #ifdef TCP_OFFLOAD
4490                 for_each_ofld_rxq(vi, i, ofld_rxq) {
4491                         quiesce_iq(sc, &ofld_rxq->iq);
4492                         quiesce_fl(sc, &ofld_rxq->fl);
4493                 }
4494 #endif
4495                 free(vi->rss, M_CXGBE);
4496                 free(vi->nm_rss, M_CXGBE);
4497         }
4498
4499         t4_teardown_vi_queues(vi);
4500         vi->flags &= ~VI_INIT_DONE;
4501
4502         return (0);
4503 }
4504
4505 static void
4506 quiesce_txq(struct adapter *sc, struct sge_txq *txq)
4507 {
4508         struct sge_eq *eq = &txq->eq;
4509         struct sge_qstat *spg = (void *)&eq->desc[eq->sidx];
4510
4511         (void) sc;      /* unused */
4512
4513 #ifdef INVARIANTS
4514         TXQ_LOCK(txq);
4515         MPASS((eq->flags & EQ_ENABLED) == 0);
4516         TXQ_UNLOCK(txq);
4517 #endif
4518
4519         /* Wait for the mp_ring to empty. */
4520         while (!mp_ring_is_idle(txq->r)) {
4521                 mp_ring_check_drainage(txq->r, 0);
4522                 pause("rquiesce", 1);
4523         }
4524
4525         /* Then wait for the hardware to finish. */
4526         while (spg->cidx != htobe16(eq->pidx))
4527                 pause("equiesce", 1);
4528
4529         /* Finally, wait for the driver to reclaim all descriptors. */
4530         while (eq->cidx != eq->pidx)
4531                 pause("dquiesce", 1);
4532 }
4533
4534 static void
4535 quiesce_wrq(struct adapter *sc, struct sge_wrq *wrq)
4536 {
4537
4538         /* XXXTX */
4539 }
4540
4541 static void
4542 quiesce_iq(struct adapter *sc, struct sge_iq *iq)
4543 {
4544         (void) sc;      /* unused */
4545
4546         /* Synchronize with the interrupt handler */
4547         while (!atomic_cmpset_int(&iq->state, IQS_IDLE, IQS_DISABLED))
4548                 pause("iqfree", 1);
4549 }
4550
4551 static void
4552 quiesce_fl(struct adapter *sc, struct sge_fl *fl)
4553 {
4554         mtx_lock(&sc->sfl_lock);
4555         FL_LOCK(fl);
4556         fl->flags |= FL_DOOMED;
4557         FL_UNLOCK(fl);
4558         callout_stop(&sc->sfl_callout);
4559         mtx_unlock(&sc->sfl_lock);
4560
4561         KASSERT((fl->flags & FL_STARVING) == 0,
4562             ("%s: still starving", __func__));
4563 }
4564
4565 static int
4566 t4_alloc_irq(struct adapter *sc, struct irq *irq, int rid,
4567     driver_intr_t *handler, void *arg, char *name)
4568 {
4569         int rc;
4570
4571         irq->rid = rid;
4572         irq->res = bus_alloc_resource_any(sc->dev, SYS_RES_IRQ, &irq->rid,
4573             RF_SHAREABLE | RF_ACTIVE);
4574         if (irq->res == NULL) {
4575                 device_printf(sc->dev,
4576                     "failed to allocate IRQ for rid %d, name %s.\n", rid, name);
4577                 return (ENOMEM);
4578         }
4579
4580         rc = bus_setup_intr(sc->dev, irq->res, INTR_MPSAFE | INTR_TYPE_NET,
4581             NULL, handler, arg, &irq->tag);
4582         if (rc != 0) {
4583                 device_printf(sc->dev,
4584                     "failed to setup interrupt for rid %d, name %s: %d\n",
4585                     rid, name, rc);
4586         } else if (name)
4587                 bus_describe_intr(sc->dev, irq->res, irq->tag, "%s", name);
4588
4589         return (rc);
4590 }
4591
4592 static int
4593 t4_free_irq(struct adapter *sc, struct irq *irq)
4594 {
4595         if (irq->tag)
4596                 bus_teardown_intr(sc->dev, irq->res, irq->tag);
4597         if (irq->res)
4598                 bus_release_resource(sc->dev, SYS_RES_IRQ, irq->rid, irq->res);
4599
4600         bzero(irq, sizeof(*irq));
4601
4602         return (0);
4603 }
4604
4605 static void
4606 get_regs(struct adapter *sc, struct t4_regdump *regs, uint8_t *buf)
4607 {
4608
4609         regs->version = chip_id(sc) | chip_rev(sc) << 10;
4610         t4_get_regs(sc, buf, regs->len);
4611 }
4612
4613 #define A_PL_INDIR_CMD  0x1f8
4614
4615 #define S_PL_AUTOINC    31
4616 #define M_PL_AUTOINC    0x1U
4617 #define V_PL_AUTOINC(x) ((x) << S_PL_AUTOINC)
4618 #define G_PL_AUTOINC(x) (((x) >> S_PL_AUTOINC) & M_PL_AUTOINC)
4619
4620 #define S_PL_VFID       20
4621 #define M_PL_VFID       0xffU
4622 #define V_PL_VFID(x)    ((x) << S_PL_VFID)
4623 #define G_PL_VFID(x)    (((x) >> S_PL_VFID) & M_PL_VFID)
4624
4625 #define S_PL_ADDR       0
4626 #define M_PL_ADDR       0xfffffU
4627 #define V_PL_ADDR(x)    ((x) << S_PL_ADDR)
4628 #define G_PL_ADDR(x)    (((x) >> S_PL_ADDR) & M_PL_ADDR)
4629
4630 #define A_PL_INDIR_DATA 0x1fc
4631
4632 static uint64_t
4633 read_vf_stat(struct adapter *sc, unsigned int viid, int reg)
4634 {
4635         u32 stats[2];
4636
4637         mtx_assert(&sc->reg_lock, MA_OWNED);
4638         if (sc->flags & IS_VF) {
4639                 stats[0] = t4_read_reg(sc, VF_MPS_REG(reg));
4640                 stats[1] = t4_read_reg(sc, VF_MPS_REG(reg + 4));
4641         } else {
4642                 t4_write_reg(sc, A_PL_INDIR_CMD, V_PL_AUTOINC(1) |
4643                     V_PL_VFID(G_FW_VIID_VIN(viid)) |
4644                     V_PL_ADDR(VF_MPS_REG(reg)));
4645                 stats[0] = t4_read_reg(sc, A_PL_INDIR_DATA);
4646                 stats[1] = t4_read_reg(sc, A_PL_INDIR_DATA);
4647         }
4648         return (((uint64_t)stats[1]) << 32 | stats[0]);
4649 }
4650
4651 static void
4652 t4_get_vi_stats(struct adapter *sc, unsigned int viid,
4653     struct fw_vi_stats_vf *stats)
4654 {
4655
4656 #define GET_STAT(name) \
4657         read_vf_stat(sc, viid, A_MPS_VF_STAT_##name##_L)
4658
4659         stats->tx_bcast_bytes    = GET_STAT(TX_VF_BCAST_BYTES);
4660         stats->tx_bcast_frames   = GET_STAT(TX_VF_BCAST_FRAMES);
4661         stats->tx_mcast_bytes    = GET_STAT(TX_VF_MCAST_BYTES);
4662         stats->tx_mcast_frames   = GET_STAT(TX_VF_MCAST_FRAMES);
4663         stats->tx_ucast_bytes    = GET_STAT(TX_VF_UCAST_BYTES);
4664         stats->tx_ucast_frames   = GET_STAT(TX_VF_UCAST_FRAMES);
4665         stats->tx_drop_frames    = GET_STAT(TX_VF_DROP_FRAMES);
4666         stats->tx_offload_bytes  = GET_STAT(TX_VF_OFFLOAD_BYTES);
4667         stats->tx_offload_frames = GET_STAT(TX_VF_OFFLOAD_FRAMES);
4668         stats->rx_bcast_bytes    = GET_STAT(RX_VF_BCAST_BYTES);
4669         stats->rx_bcast_frames   = GET_STAT(RX_VF_BCAST_FRAMES);
4670         stats->rx_mcast_bytes    = GET_STAT(RX_VF_MCAST_BYTES);
4671         stats->rx_mcast_frames   = GET_STAT(RX_VF_MCAST_FRAMES);
4672         stats->rx_ucast_bytes    = GET_STAT(RX_VF_UCAST_BYTES);
4673         stats->rx_ucast_frames   = GET_STAT(RX_VF_UCAST_FRAMES);
4674         stats->rx_err_frames     = GET_STAT(RX_VF_ERR_FRAMES);
4675
4676 #undef GET_STAT
4677 }
4678
4679 static void
4680 t4_clr_vi_stats(struct adapter *sc, unsigned int viid)
4681 {
4682         int reg;
4683
4684         t4_write_reg(sc, A_PL_INDIR_CMD, V_PL_AUTOINC(1) |
4685             V_PL_VFID(G_FW_VIID_VIN(viid)) |
4686             V_PL_ADDR(VF_MPS_REG(A_MPS_VF_STAT_TX_VF_BCAST_BYTES_L)));
4687         for (reg = A_MPS_VF_STAT_TX_VF_BCAST_BYTES_L;
4688              reg <= A_MPS_VF_STAT_RX_VF_ERR_FRAMES_H; reg += 4)
4689                 t4_write_reg(sc, A_PL_INDIR_DATA, 0);
4690 }
4691
4692 static void
4693 vi_refresh_stats(struct adapter *sc, struct vi_info *vi)
4694 {
4695         struct timeval tv;
4696         const struct timeval interval = {0, 250000};    /* 250ms */
4697
4698         if (!(vi->flags & VI_INIT_DONE))
4699                 return;
4700
4701         getmicrotime(&tv);
4702         timevalsub(&tv, &interval);
4703         if (timevalcmp(&tv, &vi->last_refreshed, <))
4704                 return;
4705
4706         mtx_lock(&sc->reg_lock);
4707         t4_get_vi_stats(sc, vi->viid, &vi->stats);
4708         getmicrotime(&vi->last_refreshed);
4709         mtx_unlock(&sc->reg_lock);
4710 }
4711
4712 static void
4713 cxgbe_refresh_stats(struct adapter *sc, struct port_info *pi)
4714 {
4715         int i;
4716         u_int v, tnl_cong_drops;
4717         struct timeval tv;
4718         const struct timeval interval = {0, 250000};    /* 250ms */
4719
4720         getmicrotime(&tv);
4721         timevalsub(&tv, &interval);
4722         if (timevalcmp(&tv, &pi->last_refreshed, <))
4723                 return;
4724
4725         tnl_cong_drops = 0;
4726         t4_get_port_stats(sc, pi->tx_chan, &pi->stats);
4727         for (i = 0; i < sc->chip_params->nchan; i++) {
4728                 if (pi->rx_chan_map & (1 << i)) {
4729                         mtx_lock(&sc->reg_lock);
4730                         t4_read_indirect(sc, A_TP_MIB_INDEX, A_TP_MIB_DATA, &v,
4731                             1, A_TP_MIB_TNL_CNG_DROP_0 + i);
4732                         mtx_unlock(&sc->reg_lock);
4733                         tnl_cong_drops += v;
4734                 }
4735         }
4736         pi->tnl_cong_drops = tnl_cong_drops;
4737         getmicrotime(&pi->last_refreshed);
4738 }
4739
4740 static void
4741 cxgbe_tick(void *arg)
4742 {
4743         struct port_info *pi = arg;
4744         struct adapter *sc = pi->adapter;
4745
4746         PORT_LOCK_ASSERT_OWNED(pi);
4747         cxgbe_refresh_stats(sc, pi);
4748
4749         callout_schedule(&pi->tick, hz);
4750 }
4751
4752 void
4753 vi_tick(void *arg)
4754 {
4755         struct vi_info *vi = arg;
4756         struct adapter *sc = vi->pi->adapter;
4757
4758         vi_refresh_stats(sc, vi);
4759
4760         callout_schedule(&vi->tick, hz);
4761 }
4762
4763 static void
4764 cxgbe_vlan_config(void *arg, struct ifnet *ifp, uint16_t vid)
4765 {
4766         struct ifnet *vlan;
4767
4768         if (arg != ifp || ifp->if_type != IFT_ETHER)
4769                 return;
4770
4771         vlan = VLAN_DEVAT(ifp, vid);
4772         VLAN_SETCOOKIE(vlan, ifp);
4773 }
4774
4775 /*
4776  * Should match fw_caps_config_<foo> enums in t4fw_interface.h
4777  */
4778 static char *caps_decoder[] = {
4779         "\20\001IPMI\002NCSI",                          /* 0: NBM */
4780         "\20\001PPP\002QFC\003DCBX",                    /* 1: link */
4781         "\20\001INGRESS\002EGRESS",                     /* 2: switch */
4782         "\20\001NIC\002VM\003IDS\004UM\005UM_ISGL"      /* 3: NIC */
4783             "\006HASHFILTER\007ETHOFLD",
4784         "\20\001TOE",                                   /* 4: TOE */
4785         "\20\001RDDP\002RDMAC",                         /* 5: RDMA */
4786         "\20\001INITIATOR_PDU\002TARGET_PDU"            /* 6: iSCSI */
4787             "\003INITIATOR_CNXOFLD\004TARGET_CNXOFLD"
4788             "\005INITIATOR_SSNOFLD\006TARGET_SSNOFLD"
4789             "\007T10DIF"
4790             "\010INITIATOR_CMDOFLD\011TARGET_CMDOFLD",
4791         "\20\001LOOKASIDE\002TLSKEYS",                  /* 7: Crypto */
4792         "\20\001INITIATOR\002TARGET\003CTRL_OFLD"       /* 8: FCoE */
4793                     "\004PO_INITIATOR\005PO_TARGET",
4794 };
4795
4796 void
4797 t4_sysctls(struct adapter *sc)
4798 {
4799         struct sysctl_ctx_list *ctx;
4800         struct sysctl_oid *oid;
4801         struct sysctl_oid_list *children, *c0;
4802         static char *doorbells = {"\20\1UDB\2WCWR\3UDBWC\4KDB"};
4803
4804         ctx = device_get_sysctl_ctx(sc->dev);
4805
4806         /*
4807          * dev.t4nex.X.
4808          */
4809         oid = device_get_sysctl_tree(sc->dev);
4810         c0 = children = SYSCTL_CHILDREN(oid);
4811
4812         sc->sc_do_rxcopy = 1;
4813         SYSCTL_ADD_INT(ctx, children, OID_AUTO, "do_rx_copy", CTLFLAG_RW,
4814             &sc->sc_do_rxcopy, 1, "Do RX copy of small frames");
4815
4816         SYSCTL_ADD_INT(ctx, children, OID_AUTO, "nports", CTLFLAG_RD, NULL,
4817             sc->params.nports, "# of ports");
4818
4819         SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "doorbells",
4820             CTLTYPE_STRING | CTLFLAG_RD, doorbells, sc->doorbells,
4821             sysctl_bitfield, "A", "available doorbells");
4822
4823         SYSCTL_ADD_INT(ctx, children, OID_AUTO, "core_clock", CTLFLAG_RD, NULL,
4824             sc->params.vpd.cclk, "core clock frequency (in KHz)");
4825
4826         SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "holdoff_timers",
4827             CTLTYPE_STRING | CTLFLAG_RD, sc->params.sge.timer_val,
4828             sizeof(sc->params.sge.timer_val), sysctl_int_array, "A",
4829             "interrupt holdoff timer values (us)");
4830
4831         SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "holdoff_pkt_counts",
4832             CTLTYPE_STRING | CTLFLAG_RD, sc->params.sge.counter_val,
4833             sizeof(sc->params.sge.counter_val), sysctl_int_array, "A",
4834             "interrupt holdoff packet counter values");
4835
4836         t4_sge_sysctls(sc, ctx, children);
4837
4838         sc->lro_timeout = 100;
4839         SYSCTL_ADD_INT(ctx, children, OID_AUTO, "lro_timeout", CTLFLAG_RW,
4840             &sc->lro_timeout, 0, "lro inactive-flush timeout (in us)");
4841
4842         SYSCTL_ADD_INT(ctx, children, OID_AUTO, "dflags", CTLFLAG_RW,
4843             &sc->debug_flags, 0, "flags to enable runtime debugging");
4844
4845         SYSCTL_ADD_STRING(ctx, children, OID_AUTO, "tp_version",
4846             CTLFLAG_RD, sc->tp_version, 0, "TP microcode version");
4847
4848         SYSCTL_ADD_STRING(ctx, children, OID_AUTO, "firmware_version",
4849             CTLFLAG_RD, sc->fw_version, 0, "firmware version");
4850
4851         if (sc->flags & IS_VF)
4852                 return;
4853
4854         SYSCTL_ADD_INT(ctx, children, OID_AUTO, "hw_revision", CTLFLAG_RD,
4855             NULL, chip_rev(sc), "chip hardware revision");
4856
4857         SYSCTL_ADD_STRING(ctx, children, OID_AUTO, "sn",
4858             CTLFLAG_RD, sc->params.vpd.sn, 0, "serial number");
4859
4860         SYSCTL_ADD_STRING(ctx, children, OID_AUTO, "pn",
4861             CTLFLAG_RD, sc->params.vpd.pn, 0, "part number");
4862
4863         SYSCTL_ADD_STRING(ctx, children, OID_AUTO, "ec",
4864             CTLFLAG_RD, sc->params.vpd.ec, 0, "engineering change");
4865
4866         SYSCTL_ADD_STRING(ctx, children, OID_AUTO, "na",
4867             CTLFLAG_RD, sc->params.vpd.na, 0, "network address");
4868
4869         SYSCTL_ADD_STRING(ctx, children, OID_AUTO, "er_version", CTLFLAG_RD,
4870             sc->er_version, 0, "expansion ROM version");
4871
4872         SYSCTL_ADD_STRING(ctx, children, OID_AUTO, "bs_version", CTLFLAG_RD,
4873             sc->bs_version, 0, "bootstrap firmware version");
4874
4875         SYSCTL_ADD_UINT(ctx, children, OID_AUTO, "scfg_version", CTLFLAG_RD,
4876             NULL, sc->params.scfg_vers, "serial config version");
4877
4878         SYSCTL_ADD_UINT(ctx, children, OID_AUTO, "vpd_version", CTLFLAG_RD,
4879             NULL, sc->params.vpd_vers, "VPD version");
4880
4881         SYSCTL_ADD_STRING(ctx, children, OID_AUTO, "cf",
4882             CTLFLAG_RD, sc->cfg_file, 0, "configuration file");
4883
4884         SYSCTL_ADD_UINT(ctx, children, OID_AUTO, "cfcsum", CTLFLAG_RD, NULL,
4885             sc->cfcsum, "config file checksum");
4886
4887 #define SYSCTL_CAP(name, n, text) \
4888         SYSCTL_ADD_PROC(ctx, children, OID_AUTO, #name, \
4889             CTLTYPE_STRING | CTLFLAG_RD, caps_decoder[n], sc->name, \
4890             sysctl_bitfield, "A", "available " text " capabilities")
4891
4892         SYSCTL_CAP(nbmcaps, 0, "NBM");
4893         SYSCTL_CAP(linkcaps, 1, "link");
4894         SYSCTL_CAP(switchcaps, 2, "switch");
4895         SYSCTL_CAP(niccaps, 3, "NIC");
4896         SYSCTL_CAP(toecaps, 4, "TCP offload");
4897         SYSCTL_CAP(rdmacaps, 5, "RDMA");
4898         SYSCTL_CAP(iscsicaps, 6, "iSCSI");
4899         SYSCTL_CAP(cryptocaps, 7, "crypto");
4900         SYSCTL_CAP(fcoecaps, 8, "FCoE");
4901 #undef SYSCTL_CAP
4902
4903         SYSCTL_ADD_INT(ctx, children, OID_AUTO, "nfilters", CTLFLAG_RD,
4904             NULL, sc->tids.nftids, "number of filters");
4905
4906         SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "temperature", CTLTYPE_INT |
4907             CTLFLAG_RD, sc, 0, sysctl_temperature, "I",
4908             "chip temperature (in Celsius)");
4909
4910 #ifdef SBUF_DRAIN
4911         /*
4912          * dev.t4nex.X.misc.  Marked CTLFLAG_SKIP to avoid information overload.
4913          */
4914         oid = SYSCTL_ADD_NODE(ctx, c0, OID_AUTO, "misc",
4915             CTLFLAG_RD | CTLFLAG_SKIP, NULL,
4916             "logs and miscellaneous information");
4917         children = SYSCTL_CHILDREN(oid);
4918
4919         SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "cctrl",
4920             CTLTYPE_STRING | CTLFLAG_RD, sc, 0,
4921             sysctl_cctrl, "A", "congestion control");
4922
4923         SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "cim_ibq_tp0",
4924             CTLTYPE_STRING | CTLFLAG_RD, sc, 0,
4925             sysctl_cim_ibq_obq, "A", "CIM IBQ 0 (TP0)");
4926
4927         SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "cim_ibq_tp1",
4928             CTLTYPE_STRING | CTLFLAG_RD, sc, 1,
4929             sysctl_cim_ibq_obq, "A", "CIM IBQ 1 (TP1)");
4930
4931         SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "cim_ibq_ulp",
4932             CTLTYPE_STRING | CTLFLAG_RD, sc, 2,
4933             sysctl_cim_ibq_obq, "A", "CIM IBQ 2 (ULP)");
4934
4935         SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "cim_ibq_sge0",
4936             CTLTYPE_STRING | CTLFLAG_RD, sc, 3,
4937             sysctl_cim_ibq_obq, "A", "CIM IBQ 3 (SGE0)");
4938
4939         SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "cim_ibq_sge1",
4940             CTLTYPE_STRING | CTLFLAG_RD, sc, 4,
4941             sysctl_cim_ibq_obq, "A", "CIM IBQ 4 (SGE1)");
4942
4943         SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "cim_ibq_ncsi",
4944             CTLTYPE_STRING | CTLFLAG_RD, sc, 5,
4945             sysctl_cim_ibq_obq, "A", "CIM IBQ 5 (NCSI)");
4946
4947         SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "cim_la",
4948             CTLTYPE_STRING | CTLFLAG_RD, sc, 0,
4949             chip_id(sc) <= CHELSIO_T5 ? sysctl_cim_la : sysctl_cim_la_t6,
4950             "A", "CIM logic analyzer");
4951
4952         SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "cim_ma_la",
4953             CTLTYPE_STRING | CTLFLAG_RD, sc, 0,
4954             sysctl_cim_ma_la, "A", "CIM MA logic analyzer");
4955
4956         SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "cim_obq_ulp0",
4957             CTLTYPE_STRING | CTLFLAG_RD, sc, 0 + CIM_NUM_IBQ,
4958             sysctl_cim_ibq_obq, "A", "CIM OBQ 0 (ULP0)");
4959
4960         SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "cim_obq_ulp1",
4961             CTLTYPE_STRING | CTLFLAG_RD, sc, 1 + CIM_NUM_IBQ,
4962             sysctl_cim_ibq_obq, "A", "CIM OBQ 1 (ULP1)");
4963
4964         SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "cim_obq_ulp2",
4965             CTLTYPE_STRING | CTLFLAG_RD, sc, 2 + CIM_NUM_IBQ,
4966             sysctl_cim_ibq_obq, "A", "CIM OBQ 2 (ULP2)");
4967
4968         SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "cim_obq_ulp3",
4969             CTLTYPE_STRING | CTLFLAG_RD, sc, 3 + CIM_NUM_IBQ,
4970             sysctl_cim_ibq_obq, "A", "CIM OBQ 3 (ULP3)");
4971
4972         SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "cim_obq_sge",
4973             CTLTYPE_STRING | CTLFLAG_RD, sc, 4 + CIM_NUM_IBQ,
4974             sysctl_cim_ibq_obq, "A", "CIM OBQ 4 (SGE)");
4975
4976         SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "cim_obq_ncsi",
4977             CTLTYPE_STRING | CTLFLAG_RD, sc, 5 + CIM_NUM_IBQ,
4978             sysctl_cim_ibq_obq, "A", "CIM OBQ 5 (NCSI)");
4979
4980         if (chip_id(sc) > CHELSIO_T4) {
4981                 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "cim_obq_sge0_rx",
4982                     CTLTYPE_STRING | CTLFLAG_RD, sc, 6 + CIM_NUM_IBQ,
4983                     sysctl_cim_ibq_obq, "A", "CIM OBQ 6 (SGE0-RX)");
4984
4985                 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "cim_obq_sge1_rx",
4986                     CTLTYPE_STRING | CTLFLAG_RD, sc, 7 + CIM_NUM_IBQ,
4987                     sysctl_cim_ibq_obq, "A", "CIM OBQ 7 (SGE1-RX)");
4988         }
4989
4990         SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "cim_pif_la",
4991             CTLTYPE_STRING | CTLFLAG_RD, sc, 0,
4992             sysctl_cim_pif_la, "A", "CIM PIF logic analyzer");
4993
4994         SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "cim_qcfg",
4995             CTLTYPE_STRING | CTLFLAG_RD, sc, 0,
4996             sysctl_cim_qcfg, "A", "CIM queue configuration");
4997
4998         SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "cpl_stats",
4999             CTLTYPE_STRING | CTLFLAG_RD, sc, 0,
5000             sysctl_cpl_stats, "A", "CPL statistics");
5001
5002         SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "ddp_stats",
5003             CTLTYPE_STRING | CTLFLAG_RD, sc, 0,
5004             sysctl_ddp_stats, "A", "non-TCP DDP statistics");
5005
5006         SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "devlog",
5007             CTLTYPE_STRING | CTLFLAG_RD, sc, 0,
5008             sysctl_devlog, "A", "firmware's device log");
5009
5010         SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "fcoe_stats",
5011             CTLTYPE_STRING | CTLFLAG_RD, sc, 0,
5012             sysctl_fcoe_stats, "A", "FCoE statistics");
5013
5014         SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "hw_sched",
5015             CTLTYPE_STRING | CTLFLAG_RD, sc, 0,
5016             sysctl_hw_sched, "A", "hardware scheduler ");
5017
5018         SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "l2t",
5019             CTLTYPE_STRING | CTLFLAG_RD, sc, 0,
5020             sysctl_l2t, "A", "hardware L2 table");
5021
5022         SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "lb_stats",
5023             CTLTYPE_STRING | CTLFLAG_RD, sc, 0,
5024             sysctl_lb_stats, "A", "loopback statistics");
5025
5026         SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "meminfo",
5027             CTLTYPE_STRING | CTLFLAG_RD, sc, 0,
5028             sysctl_meminfo, "A", "memory regions");
5029
5030         SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "mps_tcam",
5031             CTLTYPE_STRING | CTLFLAG_RD, sc, 0,
5032             chip_id(sc) <= CHELSIO_T5 ? sysctl_mps_tcam : sysctl_mps_tcam_t6,
5033             "A", "MPS TCAM entries");
5034
5035         SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "path_mtus",
5036             CTLTYPE_STRING | CTLFLAG_RD, sc, 0,
5037             sysctl_path_mtus, "A", "path MTUs");
5038
5039         SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "pm_stats",
5040             CTLTYPE_STRING | CTLFLAG_RD, sc, 0,
5041             sysctl_pm_stats, "A", "PM statistics");
5042
5043         SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "rdma_stats",
5044             CTLTYPE_STRING | CTLFLAG_RD, sc, 0,
5045             sysctl_rdma_stats, "A", "RDMA statistics");
5046
5047         SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "tcp_stats",
5048             CTLTYPE_STRING | CTLFLAG_RD, sc, 0,
5049             sysctl_tcp_stats, "A", "TCP statistics");
5050
5051         SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "tids",
5052             CTLTYPE_STRING | CTLFLAG_RD, sc, 0,
5053             sysctl_tids, "A", "TID information");
5054
5055         SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "tp_err_stats",
5056             CTLTYPE_STRING | CTLFLAG_RD, sc, 0,
5057             sysctl_tp_err_stats, "A", "TP error statistics");
5058
5059         SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "tp_la_mask",
5060             CTLTYPE_INT | CTLFLAG_RW, sc, 0, sysctl_tp_la_mask, "I",
5061             "TP logic analyzer event capture mask");
5062
5063         SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "tp_la",
5064             CTLTYPE_STRING | CTLFLAG_RD, sc, 0,
5065             sysctl_tp_la, "A", "TP logic analyzer");
5066
5067         SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "tx_rate",
5068             CTLTYPE_STRING | CTLFLAG_RD, sc, 0,
5069             sysctl_tx_rate, "A", "Tx rate");
5070
5071         SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "ulprx_la",
5072             CTLTYPE_STRING | CTLFLAG_RD, sc, 0,
5073             sysctl_ulprx_la, "A", "ULPRX logic analyzer");
5074
5075         if (chip_id(sc) >= CHELSIO_T5) {
5076                 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "wcwr_stats",
5077                     CTLTYPE_STRING | CTLFLAG_RD, sc, 0,
5078                     sysctl_wcwr_stats, "A", "write combined work requests");
5079         }
5080 #endif
5081
5082 #ifdef TCP_OFFLOAD
5083         if (is_offload(sc)) {
5084                 /*
5085                  * dev.t4nex.X.toe.
5086                  */
5087                 oid = SYSCTL_ADD_NODE(ctx, c0, OID_AUTO, "toe", CTLFLAG_RD,
5088                     NULL, "TOE parameters");
5089                 children = SYSCTL_CHILDREN(oid);
5090
5091                 sc->tt.sndbuf = 256 * 1024;
5092                 SYSCTL_ADD_INT(ctx, children, OID_AUTO, "sndbuf", CTLFLAG_RW,
5093                     &sc->tt.sndbuf, 0, "max hardware send buffer size");
5094
5095                 sc->tt.ddp = 0;
5096                 SYSCTL_ADD_INT(ctx, children, OID_AUTO, "ddp", CTLFLAG_RW,
5097                     &sc->tt.ddp, 0, "DDP allowed");
5098
5099                 sc->tt.rx_coalesce = 1;
5100                 SYSCTL_ADD_INT(ctx, children, OID_AUTO, "rx_coalesce",
5101                     CTLFLAG_RW, &sc->tt.rx_coalesce, 0, "receive coalescing");
5102
5103                 sc->tt.tx_align = 1;
5104                 SYSCTL_ADD_INT(ctx, children, OID_AUTO, "tx_align",
5105                     CTLFLAG_RW, &sc->tt.tx_align, 0, "chop and align payload");
5106
5107                 sc->tt.tx_zcopy = 0;
5108                 SYSCTL_ADD_INT(ctx, children, OID_AUTO, "tx_zcopy",
5109                     CTLFLAG_RW, &sc->tt.tx_zcopy, 0,
5110                     "Enable zero-copy aio_write(2)");
5111
5112                 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "timer_tick",
5113                     CTLTYPE_STRING | CTLFLAG_RD, sc, 0, sysctl_tp_tick, "A",
5114                     "TP timer tick (us)");
5115
5116                 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "timestamp_tick",
5117                     CTLTYPE_STRING | CTLFLAG_RD, sc, 1, sysctl_tp_tick, "A",
5118                     "TCP timestamp tick (us)");
5119
5120                 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "dack_tick",
5121                     CTLTYPE_STRING | CTLFLAG_RD, sc, 2, sysctl_tp_tick, "A",
5122                     "DACK tick (us)");
5123
5124                 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "dack_timer",
5125                     CTLTYPE_UINT | CTLFLAG_RD, sc, 0, sysctl_tp_dack_timer,
5126                     "IU", "DACK timer (us)");
5127
5128                 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "rexmt_min",
5129                     CTLTYPE_ULONG | CTLFLAG_RD, sc, A_TP_RXT_MIN,
5130                     sysctl_tp_timer, "LU", "Retransmit min (us)");
5131
5132                 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "rexmt_max",
5133                     CTLTYPE_ULONG | CTLFLAG_RD, sc, A_TP_RXT_MAX,
5134                     sysctl_tp_timer, "LU", "Retransmit max (us)");
5135
5136                 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "persist_min",
5137                     CTLTYPE_ULONG | CTLFLAG_RD, sc, A_TP_PERS_MIN,
5138                     sysctl_tp_timer, "LU", "Persist timer min (us)");
5139
5140                 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "persist_max",
5141                     CTLTYPE_ULONG | CTLFLAG_RD, sc, A_TP_PERS_MAX,
5142                     sysctl_tp_timer, "LU", "Persist timer max (us)");
5143
5144                 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "keepalive_idle",
5145                     CTLTYPE_ULONG | CTLFLAG_RD, sc, A_TP_KEEP_IDLE,
5146                     sysctl_tp_timer, "LU", "Keepidle idle timer (us)");
5147
5148                 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "keepalive_intvl",
5149                     CTLTYPE_ULONG | CTLFLAG_RD, sc, A_TP_KEEP_INTVL,
5150                     sysctl_tp_timer, "LU", "Keepidle interval (us)");
5151
5152                 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "initial_srtt",
5153                     CTLTYPE_ULONG | CTLFLAG_RD, sc, A_TP_INIT_SRTT,
5154                     sysctl_tp_timer, "LU", "Initial SRTT (us)");
5155
5156                 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "finwait2_timer",
5157                     CTLTYPE_ULONG | CTLFLAG_RD, sc, A_TP_FINWAIT2_TIMER,
5158                     sysctl_tp_timer, "LU", "FINWAIT2 timer (us)");
5159         }
5160 #endif
5161 }
5162
5163 void
5164 vi_sysctls(struct vi_info *vi)
5165 {
5166         struct sysctl_ctx_list *ctx;
5167         struct sysctl_oid *oid;
5168         struct sysctl_oid_list *children;
5169
5170         ctx = device_get_sysctl_ctx(vi->dev);
5171
5172         /*
5173          * dev.v?(cxgbe|cxl).X.
5174          */
5175         oid = device_get_sysctl_tree(vi->dev);
5176         children = SYSCTL_CHILDREN(oid);
5177
5178         SYSCTL_ADD_UINT(ctx, children, OID_AUTO, "viid", CTLFLAG_RD, NULL,
5179             vi->viid, "VI identifer");
5180         SYSCTL_ADD_INT(ctx, children, OID_AUTO, "nrxq", CTLFLAG_RD,
5181             &vi->nrxq, 0, "# of rx queues");
5182         SYSCTL_ADD_INT(ctx, children, OID_AUTO, "ntxq", CTLFLAG_RD,
5183             &vi->ntxq, 0, "# of tx queues");
5184         SYSCTL_ADD_INT(ctx, children, OID_AUTO, "first_rxq", CTLFLAG_RD,
5185             &vi->first_rxq, 0, "index of first rx queue");
5186         SYSCTL_ADD_INT(ctx, children, OID_AUTO, "first_txq", CTLFLAG_RD,
5187             &vi->first_txq, 0, "index of first tx queue");
5188         SYSCTL_ADD_UINT(ctx, children, OID_AUTO, "rss_size", CTLFLAG_RD, NULL,
5189             vi->rss_size, "size of RSS indirection table");
5190
5191         if (IS_MAIN_VI(vi)) {
5192                 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "rsrv_noflowq",
5193                     CTLTYPE_INT | CTLFLAG_RW, vi, 0, sysctl_noflowq, "IU",
5194                     "Reserve queue 0 for non-flowid packets");
5195         }
5196
5197 #ifdef TCP_OFFLOAD
5198         if (vi->nofldrxq != 0) {
5199                 SYSCTL_ADD_INT(ctx, children, OID_AUTO, "nofldrxq", CTLFLAG_RD,
5200                     &vi->nofldrxq, 0,
5201                     "# of rx queues for offloaded TCP connections");
5202                 SYSCTL_ADD_INT(ctx, children, OID_AUTO, "nofldtxq", CTLFLAG_RD,
5203                     &vi->nofldtxq, 0,
5204                     "# of tx queues for offloaded TCP connections");
5205                 SYSCTL_ADD_INT(ctx, children, OID_AUTO, "first_ofld_rxq",
5206                     CTLFLAG_RD, &vi->first_ofld_rxq, 0,
5207                     "index of first TOE rx queue");
5208                 SYSCTL_ADD_INT(ctx, children, OID_AUTO, "first_ofld_txq",
5209                     CTLFLAG_RD, &vi->first_ofld_txq, 0,
5210                     "index of first TOE tx queue");
5211         }
5212 #endif
5213 #ifdef DEV_NETMAP
5214         if (vi->nnmrxq != 0) {
5215                 SYSCTL_ADD_INT(ctx, children, OID_AUTO, "nnmrxq", CTLFLAG_RD,
5216                     &vi->nnmrxq, 0, "# of netmap rx queues");
5217                 SYSCTL_ADD_INT(ctx, children, OID_AUTO, "nnmtxq", CTLFLAG_RD,
5218                     &vi->nnmtxq, 0, "# of netmap tx queues");
5219                 SYSCTL_ADD_INT(ctx, children, OID_AUTO, "first_nm_rxq",
5220                     CTLFLAG_RD, &vi->first_nm_rxq, 0,
5221                     "index of first netmap rx queue");
5222                 SYSCTL_ADD_INT(ctx, children, OID_AUTO, "first_nm_txq",
5223                     CTLFLAG_RD, &vi->first_nm_txq, 0,
5224                     "index of first netmap tx queue");
5225         }
5226 #endif
5227
5228         SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "holdoff_tmr_idx",
5229             CTLTYPE_INT | CTLFLAG_RW, vi, 0, sysctl_holdoff_tmr_idx, "I",
5230             "holdoff timer index");
5231         SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "holdoff_pktc_idx",
5232             CTLTYPE_INT | CTLFLAG_RW, vi, 0, sysctl_holdoff_pktc_idx, "I",
5233             "holdoff packet counter index");
5234
5235         SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "qsize_rxq",
5236             CTLTYPE_INT | CTLFLAG_RW, vi, 0, sysctl_qsize_rxq, "I",
5237             "rx queue size");
5238         SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "qsize_txq",
5239             CTLTYPE_INT | CTLFLAG_RW, vi, 0, sysctl_qsize_txq, "I",
5240             "tx queue size");
5241 }
5242
5243 static void
5244 cxgbe_sysctls(struct port_info *pi)
5245 {
5246         struct sysctl_ctx_list *ctx;
5247         struct sysctl_oid *oid;
5248         struct sysctl_oid_list *children, *children2;
5249         struct adapter *sc = pi->adapter;
5250         int i;
5251         char name[16];
5252
5253         ctx = device_get_sysctl_ctx(pi->dev);
5254
5255         /*
5256          * dev.cxgbe.X.
5257          */
5258         oid = device_get_sysctl_tree(pi->dev);
5259         children = SYSCTL_CHILDREN(oid);
5260
5261         SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "linkdnrc", CTLTYPE_STRING |
5262            CTLFLAG_RD, pi, 0, sysctl_linkdnrc, "A", "reason why link is down");
5263         if (pi->port_type == FW_PORT_TYPE_BT_XAUI) {
5264                 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "temperature",
5265                     CTLTYPE_INT | CTLFLAG_RD, pi, 0, sysctl_btphy, "I",
5266                     "PHY temperature (in Celsius)");
5267                 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "fw_version",
5268                     CTLTYPE_INT | CTLFLAG_RD, pi, 1, sysctl_btphy, "I",
5269                     "PHY firmware version");
5270         }
5271
5272         SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "pause_settings",
5273             CTLTYPE_STRING | CTLFLAG_RW, pi, PAUSE_TX, sysctl_pause_settings,
5274             "A", "PAUSE settings (bit 0 = rx_pause, bit 1 = tx_pause)");
5275
5276         SYSCTL_ADD_INT(ctx, children, OID_AUTO, "max_speed", CTLFLAG_RD, NULL,
5277             port_top_speed(pi), "max speed (in Gbps)");
5278
5279         if (sc->flags & IS_VF)
5280                 return;
5281
5282         /*
5283          * dev.(cxgbe|cxl).X.tc.
5284          */
5285         oid = SYSCTL_ADD_NODE(ctx, children, OID_AUTO, "tc", CTLFLAG_RD, NULL,
5286             "Tx scheduler traffic classes");
5287         for (i = 0; i < sc->chip_params->nsched_cls; i++) {
5288                 struct tx_sched_class *tc = &pi->tc[i];
5289
5290                 snprintf(name, sizeof(name), "%d", i);
5291                 children2 = SYSCTL_CHILDREN(SYSCTL_ADD_NODE(ctx,
5292                     SYSCTL_CHILDREN(oid), OID_AUTO, name, CTLFLAG_RD, NULL,
5293                     "traffic class"));
5294                 SYSCTL_ADD_UINT(ctx, children2, OID_AUTO, "flags", CTLFLAG_RD,
5295                     &tc->flags, 0, "flags");
5296                 SYSCTL_ADD_UINT(ctx, children2, OID_AUTO, "refcount",
5297                     CTLFLAG_RD, &tc->refcount, 0, "references to this class");
5298 #ifdef SBUF_DRAIN
5299                 SYSCTL_ADD_PROC(ctx, children2, OID_AUTO, "params",
5300                     CTLTYPE_STRING | CTLFLAG_RD, sc, (pi->port_id << 16) | i,
5301                     sysctl_tc_params, "A", "traffic class parameters");
5302 #endif
5303         }
5304
5305         /*
5306          * dev.cxgbe.X.stats.
5307          */
5308         oid = SYSCTL_ADD_NODE(ctx, children, OID_AUTO, "stats", CTLFLAG_RD,
5309             NULL, "port statistics");
5310         children = SYSCTL_CHILDREN(oid);
5311         SYSCTL_ADD_UINT(ctx, children, OID_AUTO, "tx_parse_error", CTLFLAG_RD,
5312             &pi->tx_parse_error, 0,
5313             "# of tx packets with invalid length or # of segments");
5314
5315 #define SYSCTL_ADD_T4_REG64(pi, name, desc, reg) \
5316         SYSCTL_ADD_OID(ctx, children, OID_AUTO, name, \
5317             CTLTYPE_U64 | CTLFLAG_RD, sc, reg, \
5318             sysctl_handle_t4_reg64, "QU", desc)
5319
5320         SYSCTL_ADD_T4_REG64(pi, "tx_octets", "# of octets in good frames",
5321             PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_TX_PORT_BYTES_L));
5322         SYSCTL_ADD_T4_REG64(pi, "tx_frames", "total # of good frames",
5323             PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_TX_PORT_FRAMES_L));
5324         SYSCTL_ADD_T4_REG64(pi, "tx_bcast_frames", "# of broadcast frames",
5325             PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_TX_PORT_BCAST_L));
5326         SYSCTL_ADD_T4_REG64(pi, "tx_mcast_frames", "# of multicast frames",
5327             PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_TX_PORT_MCAST_L));
5328         SYSCTL_ADD_T4_REG64(pi, "tx_ucast_frames", "# of unicast frames",
5329             PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_TX_PORT_UCAST_L));
5330         SYSCTL_ADD_T4_REG64(pi, "tx_error_frames", "# of error frames",
5331             PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_TX_PORT_ERROR_L));
5332         SYSCTL_ADD_T4_REG64(pi, "tx_frames_64",
5333             "# of tx frames in this range",
5334             PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_TX_PORT_64B_L));
5335         SYSCTL_ADD_T4_REG64(pi, "tx_frames_65_127",
5336             "# of tx frames in this range",
5337             PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_TX_PORT_65B_127B_L));
5338         SYSCTL_ADD_T4_REG64(pi, "tx_frames_128_255",
5339             "# of tx frames in this range",
5340             PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_TX_PORT_128B_255B_L));
5341         SYSCTL_ADD_T4_REG64(pi, "tx_frames_256_511",
5342             "# of tx frames in this range",
5343             PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_TX_PORT_256B_511B_L));
5344         SYSCTL_ADD_T4_REG64(pi, "tx_frames_512_1023",
5345             "# of tx frames in this range",
5346             PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_TX_PORT_512B_1023B_L));
5347         SYSCTL_ADD_T4_REG64(pi, "tx_frames_1024_1518",
5348             "# of tx frames in this range",
5349             PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_TX_PORT_1024B_1518B_L));
5350         SYSCTL_ADD_T4_REG64(pi, "tx_frames_1519_max",
5351             "# of tx frames in this range",
5352             PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_TX_PORT_1519B_MAX_L));
5353         SYSCTL_ADD_T4_REG64(pi, "tx_drop", "# of dropped tx frames",
5354             PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_TX_PORT_DROP_L));
5355         SYSCTL_ADD_T4_REG64(pi, "tx_pause", "# of pause frames transmitted",
5356             PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_TX_PORT_PAUSE_L));
5357         SYSCTL_ADD_T4_REG64(pi, "tx_ppp0", "# of PPP prio 0 frames transmitted",
5358             PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_TX_PORT_PPP0_L));
5359         SYSCTL_ADD_T4_REG64(pi, "tx_ppp1", "# of PPP prio 1 frames transmitted",
5360             PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_TX_PORT_PPP1_L));
5361         SYSCTL_ADD_T4_REG64(pi, "tx_ppp2", "# of PPP prio 2 frames transmitted",
5362             PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_TX_PORT_PPP2_L));
5363         SYSCTL_ADD_T4_REG64(pi, "tx_ppp3", "# of PPP prio 3 frames transmitted",
5364             PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_TX_PORT_PPP3_L));
5365         SYSCTL_ADD_T4_REG64(pi, "tx_ppp4", "# of PPP prio 4 frames transmitted",
5366             PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_TX_PORT_PPP4_L));
5367         SYSCTL_ADD_T4_REG64(pi, "tx_ppp5", "# of PPP prio 5 frames transmitted",
5368             PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_TX_PORT_PPP5_L));
5369         SYSCTL_ADD_T4_REG64(pi, "tx_ppp6", "# of PPP prio 6 frames transmitted",
5370             PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_TX_PORT_PPP6_L));
5371         SYSCTL_ADD_T4_REG64(pi, "tx_ppp7", "# of PPP prio 7 frames transmitted",
5372             PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_TX_PORT_PPP7_L));
5373
5374         SYSCTL_ADD_T4_REG64(pi, "rx_octets", "# of octets in good frames",
5375             PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_RX_PORT_BYTES_L));
5376         SYSCTL_ADD_T4_REG64(pi, "rx_frames", "total # of good frames",
5377             PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_RX_PORT_FRAMES_L));
5378         SYSCTL_ADD_T4_REG64(pi, "rx_bcast_frames", "# of broadcast frames",
5379             PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_RX_PORT_BCAST_L));
5380         SYSCTL_ADD_T4_REG64(pi, "rx_mcast_frames", "# of multicast frames",
5381             PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_RX_PORT_MCAST_L));
5382         SYSCTL_ADD_T4_REG64(pi, "rx_ucast_frames", "# of unicast frames",
5383             PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_RX_PORT_UCAST_L));
5384         SYSCTL_ADD_T4_REG64(pi, "rx_too_long", "# of frames exceeding MTU",
5385             PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_RX_PORT_MTU_ERROR_L));
5386         SYSCTL_ADD_T4_REG64(pi, "rx_jabber", "# of jabber frames",
5387             PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_RX_PORT_MTU_CRC_ERROR_L));
5388         SYSCTL_ADD_T4_REG64(pi, "rx_fcs_err",
5389             "# of frames received with bad FCS",
5390             PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_RX_PORT_CRC_ERROR_L));
5391         SYSCTL_ADD_T4_REG64(pi, "rx_len_err",
5392             "# of frames received with length error",
5393             PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_RX_PORT_LEN_ERROR_L));
5394         SYSCTL_ADD_T4_REG64(pi, "rx_symbol_err", "symbol errors",
5395             PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_RX_PORT_SYM_ERROR_L));
5396         SYSCTL_ADD_T4_REG64(pi, "rx_runt", "# of short frames received",
5397             PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_RX_PORT_LESS_64B_L));
5398         SYSCTL_ADD_T4_REG64(pi, "rx_frames_64",
5399             "# of rx frames in this range",
5400             PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_RX_PORT_64B_L));
5401         SYSCTL_ADD_T4_REG64(pi, "rx_frames_65_127",
5402             "# of rx frames in this range",
5403             PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_RX_PORT_65B_127B_L));
5404         SYSCTL_ADD_T4_REG64(pi, "rx_frames_128_255",
5405             "# of rx frames in this range",
5406             PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_RX_PORT_128B_255B_L));
5407         SYSCTL_ADD_T4_REG64(pi, "rx_frames_256_511",
5408             "# of rx frames in this range",
5409             PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_RX_PORT_256B_511B_L));
5410         SYSCTL_ADD_T4_REG64(pi, "rx_frames_512_1023",
5411             "# of rx frames in this range",
5412             PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_RX_PORT_512B_1023B_L));
5413         SYSCTL_ADD_T4_REG64(pi, "rx_frames_1024_1518",
5414             "# of rx frames in this range",
5415             PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_RX_PORT_1024B_1518B_L));
5416         SYSCTL_ADD_T4_REG64(pi, "rx_frames_1519_max",
5417             "# of rx frames in this range",
5418             PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_RX_PORT_1519B_MAX_L));
5419         SYSCTL_ADD_T4_REG64(pi, "rx_pause", "# of pause frames received",
5420             PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_RX_PORT_PAUSE_L));
5421         SYSCTL_ADD_T4_REG64(pi, "rx_ppp0", "# of PPP prio 0 frames received",
5422             PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_RX_PORT_PPP0_L));
5423         SYSCTL_ADD_T4_REG64(pi, "rx_ppp1", "# of PPP prio 1 frames received",
5424             PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_RX_PORT_PPP1_L));
5425         SYSCTL_ADD_T4_REG64(pi, "rx_ppp2", "# of PPP prio 2 frames received",
5426             PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_RX_PORT_PPP2_L));
5427         SYSCTL_ADD_T4_REG64(pi, "rx_ppp3", "# of PPP prio 3 frames received",
5428             PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_RX_PORT_PPP3_L));
5429         SYSCTL_ADD_T4_REG64(pi, "rx_ppp4", "# of PPP prio 4 frames received",
5430             PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_RX_PORT_PPP4_L));
5431         SYSCTL_ADD_T4_REG64(pi, "rx_ppp5", "# of PPP prio 5 frames received",
5432             PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_RX_PORT_PPP5_L));
5433         SYSCTL_ADD_T4_REG64(pi, "rx_ppp6", "# of PPP prio 6 frames received",
5434             PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_RX_PORT_PPP6_L));
5435         SYSCTL_ADD_T4_REG64(pi, "rx_ppp7", "# of PPP prio 7 frames received",
5436             PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_RX_PORT_PPP7_L));
5437
5438 #undef SYSCTL_ADD_T4_REG64
5439
5440 #define SYSCTL_ADD_T4_PORTSTAT(name, desc) \
5441         SYSCTL_ADD_UQUAD(ctx, children, OID_AUTO, #name, CTLFLAG_RD, \
5442             &pi->stats.name, desc)
5443
5444         /* We get these from port_stats and they may be stale by up to 1s */
5445         SYSCTL_ADD_T4_PORTSTAT(rx_ovflow0,
5446             "# drops due to buffer-group 0 overflows");
5447         SYSCTL_ADD_T4_PORTSTAT(rx_ovflow1,
5448             "# drops due to buffer-group 1 overflows");
5449         SYSCTL_ADD_T4_PORTSTAT(rx_ovflow2,
5450             "# drops due to buffer-group 2 overflows");
5451         SYSCTL_ADD_T4_PORTSTAT(rx_ovflow3,
5452             "# drops due to buffer-group 3 overflows");
5453         SYSCTL_ADD_T4_PORTSTAT(rx_trunc0,
5454             "# of buffer-group 0 truncated packets");
5455         SYSCTL_ADD_T4_PORTSTAT(rx_trunc1,
5456             "# of buffer-group 1 truncated packets");
5457         SYSCTL_ADD_T4_PORTSTAT(rx_trunc2,
5458             "# of buffer-group 2 truncated packets");
5459         SYSCTL_ADD_T4_PORTSTAT(rx_trunc3,
5460             "# of buffer-group 3 truncated packets");
5461
5462 #undef SYSCTL_ADD_T4_PORTSTAT
5463 }
5464
5465 static int
5466 sysctl_int_array(SYSCTL_HANDLER_ARGS)
5467 {
5468         int rc, *i, space = 0;
5469         struct sbuf sb;
5470
5471         sbuf_new_for_sysctl(&sb, NULL, 64, req);
5472         for (i = arg1; arg2; arg2 -= sizeof(int), i++) {
5473                 if (space)
5474                         sbuf_printf(&sb, " ");
5475                 sbuf_printf(&sb, "%d", *i);
5476                 space = 1;
5477         }
5478         rc = sbuf_finish(&sb);
5479         sbuf_delete(&sb);
5480         return (rc);
5481 }
5482
5483 static int
5484 sysctl_bitfield(SYSCTL_HANDLER_ARGS)
5485 {
5486         int rc;
5487         struct sbuf *sb;
5488
5489         rc = sysctl_wire_old_buffer(req, 0);
5490         if (rc != 0)
5491                 return(rc);
5492
5493         sb = sbuf_new_for_sysctl(NULL, NULL, 128, req);
5494         if (sb == NULL)
5495                 return (ENOMEM);
5496
5497         sbuf_printf(sb, "%b", (int)arg2, (char *)arg1);
5498         rc = sbuf_finish(sb);
5499         sbuf_delete(sb);
5500
5501         return (rc);
5502 }
5503
5504 static int
5505 sysctl_btphy(SYSCTL_HANDLER_ARGS)
5506 {
5507         struct port_info *pi = arg1;
5508         int op = arg2;
5509         struct adapter *sc = pi->adapter;
5510         u_int v;
5511         int rc;
5512
5513         rc = begin_synchronized_op(sc, &pi->vi[0], SLEEP_OK | INTR_OK, "t4btt");
5514         if (rc)
5515                 return (rc);
5516         /* XXX: magic numbers */
5517         rc = -t4_mdio_rd(sc, sc->mbox, pi->mdio_addr, 0x1e, op ? 0x20 : 0xc820,
5518             &v);
5519         end_synchronized_op(sc, 0);
5520         if (rc)
5521                 return (rc);
5522         if (op == 0)
5523                 v /= 256;
5524
5525         rc = sysctl_handle_int(oidp, &v, 0, req);
5526         return (rc);
5527 }
5528
5529 static int
5530 sysctl_noflowq(SYSCTL_HANDLER_ARGS)
5531 {
5532         struct vi_info *vi = arg1;
5533         int rc, val;
5534
5535         val = vi->rsrv_noflowq;
5536         rc = sysctl_handle_int(oidp, &val, 0, req);
5537         if (rc != 0 || req->newptr == NULL)
5538                 return (rc);
5539
5540         if ((val >= 1) && (vi->ntxq > 1))
5541                 vi->rsrv_noflowq = 1;
5542         else
5543                 vi->rsrv_noflowq = 0;
5544
5545         return (rc);
5546 }
5547
5548 static int
5549 sysctl_holdoff_tmr_idx(SYSCTL_HANDLER_ARGS)
5550 {
5551         struct vi_info *vi = arg1;
5552         struct adapter *sc = vi->pi->adapter;
5553         int idx, rc, i;
5554         struct sge_rxq *rxq;
5555 #ifdef TCP_OFFLOAD
5556         struct sge_ofld_rxq *ofld_rxq;
5557 #endif
5558         uint8_t v;
5559
5560         idx = vi->tmr_idx;
5561
5562         rc = sysctl_handle_int(oidp, &idx, 0, req);
5563         if (rc != 0 || req->newptr == NULL)
5564                 return (rc);
5565
5566         if (idx < 0 || idx >= SGE_NTIMERS)
5567                 return (EINVAL);
5568
5569         rc = begin_synchronized_op(sc, vi, HOLD_LOCK | SLEEP_OK | INTR_OK,
5570             "t4tmr");
5571         if (rc)
5572                 return (rc);
5573
5574         v = V_QINTR_TIMER_IDX(idx) | V_QINTR_CNT_EN(vi->pktc_idx != -1);
5575         for_each_rxq(vi, i, rxq) {
5576 #ifdef atomic_store_rel_8
5577                 atomic_store_rel_8(&rxq->iq.intr_params, v);
5578 #else
5579                 rxq->iq.intr_params = v;
5580 #endif
5581         }
5582 #ifdef TCP_OFFLOAD
5583         for_each_ofld_rxq(vi, i, ofld_rxq) {
5584 #ifdef atomic_store_rel_8
5585                 atomic_store_rel_8(&ofld_rxq->iq.intr_params, v);
5586 #else
5587                 ofld_rxq->iq.intr_params = v;
5588 #endif
5589         }
5590 #endif
5591         vi->tmr_idx = idx;
5592
5593         end_synchronized_op(sc, LOCK_HELD);
5594         return (0);
5595 }
5596
5597 static int
5598 sysctl_holdoff_pktc_idx(SYSCTL_HANDLER_ARGS)
5599 {
5600         struct vi_info *vi = arg1;
5601         struct adapter *sc = vi->pi->adapter;
5602         int idx, rc;
5603
5604         idx = vi->pktc_idx;
5605
5606         rc = sysctl_handle_int(oidp, &idx, 0, req);
5607         if (rc != 0 || req->newptr == NULL)
5608                 return (rc);
5609
5610         if (idx < -1 || idx >= SGE_NCOUNTERS)
5611                 return (EINVAL);
5612
5613         rc = begin_synchronized_op(sc, vi, HOLD_LOCK | SLEEP_OK | INTR_OK,
5614             "t4pktc");
5615         if (rc)
5616                 return (rc);
5617
5618         if (vi->flags & VI_INIT_DONE)
5619                 rc = EBUSY; /* cannot be changed once the queues are created */
5620         else
5621                 vi->pktc_idx = idx;
5622
5623         end_synchronized_op(sc, LOCK_HELD);
5624         return (rc);
5625 }
5626
5627 static int
5628 sysctl_qsize_rxq(SYSCTL_HANDLER_ARGS)
5629 {
5630         struct vi_info *vi = arg1;
5631         struct adapter *sc = vi->pi->adapter;
5632         int qsize, rc;
5633
5634         qsize = vi->qsize_rxq;
5635
5636         rc = sysctl_handle_int(oidp, &qsize, 0, req);
5637         if (rc != 0 || req->newptr == NULL)
5638                 return (rc);
5639
5640         if (qsize < 128 || (qsize & 7))
5641                 return (EINVAL);
5642
5643         rc = begin_synchronized_op(sc, vi, HOLD_LOCK | SLEEP_OK | INTR_OK,
5644             "t4rxqs");
5645         if (rc)
5646                 return (rc);
5647
5648         if (vi->flags & VI_INIT_DONE)
5649                 rc = EBUSY; /* cannot be changed once the queues are created */
5650         else
5651                 vi->qsize_rxq = qsize;
5652
5653         end_synchronized_op(sc, LOCK_HELD);
5654         return (rc);
5655 }
5656
5657 static int
5658 sysctl_qsize_txq(SYSCTL_HANDLER_ARGS)
5659 {
5660         struct vi_info *vi = arg1;
5661         struct adapter *sc = vi->pi->adapter;
5662         int qsize, rc;
5663
5664         qsize = vi->qsize_txq;
5665
5666         rc = sysctl_handle_int(oidp, &qsize, 0, req);
5667         if (rc != 0 || req->newptr == NULL)
5668                 return (rc);
5669
5670         if (qsize < 128 || qsize > 65536)
5671                 return (EINVAL);
5672
5673         rc = begin_synchronized_op(sc, vi, HOLD_LOCK | SLEEP_OK | INTR_OK,
5674             "t4txqs");
5675         if (rc)
5676                 return (rc);
5677
5678         if (vi->flags & VI_INIT_DONE)
5679                 rc = EBUSY; /* cannot be changed once the queues are created */
5680         else
5681                 vi->qsize_txq = qsize;
5682
5683         end_synchronized_op(sc, LOCK_HELD);
5684         return (rc);
5685 }
5686
5687 static int
5688 sysctl_pause_settings(SYSCTL_HANDLER_ARGS)
5689 {
5690         struct port_info *pi = arg1;
5691         struct adapter *sc = pi->adapter;
5692         struct link_config *lc = &pi->link_cfg;
5693         int rc;
5694
5695         if (req->newptr == NULL) {
5696                 struct sbuf *sb;
5697                 static char *bits = "\20\1PAUSE_RX\2PAUSE_TX";
5698
5699                 rc = sysctl_wire_old_buffer(req, 0);
5700                 if (rc != 0)
5701                         return(rc);
5702
5703                 sb = sbuf_new_for_sysctl(NULL, NULL, 128, req);
5704                 if (sb == NULL)
5705                         return (ENOMEM);
5706
5707                 sbuf_printf(sb, "%b", lc->fc & (PAUSE_TX | PAUSE_RX), bits);
5708                 rc = sbuf_finish(sb);
5709                 sbuf_delete(sb);
5710         } else {
5711                 char s[2];
5712                 int n;
5713
5714                 s[0] = '0' + (lc->requested_fc & (PAUSE_TX | PAUSE_RX));
5715                 s[1] = 0;
5716
5717                 rc = sysctl_handle_string(oidp, s, sizeof(s), req);
5718                 if (rc != 0)
5719                         return(rc);
5720
5721                 if (s[1] != 0)
5722                         return (EINVAL);
5723                 if (s[0] < '0' || s[0] > '9')
5724                         return (EINVAL);        /* not a number */
5725                 n = s[0] - '0';
5726                 if (n & ~(PAUSE_TX | PAUSE_RX))
5727                         return (EINVAL);        /* some other bit is set too */
5728
5729                 rc = begin_synchronized_op(sc, &pi->vi[0], SLEEP_OK | INTR_OK,
5730                     "t4PAUSE");
5731                 if (rc)
5732                         return (rc);
5733                 if ((lc->requested_fc & (PAUSE_TX | PAUSE_RX)) != n) {
5734                         int link_ok = lc->link_ok;
5735
5736                         lc->requested_fc &= ~(PAUSE_TX | PAUSE_RX);
5737                         lc->requested_fc |= n;
5738                         rc = -t4_link_l1cfg(sc, sc->mbox, pi->tx_chan, lc);
5739                         lc->link_ok = link_ok;  /* restore */
5740                 }
5741                 end_synchronized_op(sc, 0);
5742         }
5743
5744         return (rc);
5745 }
5746
5747 static int
5748 sysctl_handle_t4_reg64(SYSCTL_HANDLER_ARGS)
5749 {
5750         struct adapter *sc = arg1;
5751         int reg = arg2;
5752         uint64_t val;
5753
5754         val = t4_read_reg64(sc, reg);
5755
5756         return (sysctl_handle_64(oidp, &val, 0, req));
5757 }
5758
5759 static int
5760 sysctl_temperature(SYSCTL_HANDLER_ARGS)
5761 {
5762         struct adapter *sc = arg1;
5763         int rc, t;
5764         uint32_t param, val;
5765
5766         rc = begin_synchronized_op(sc, NULL, SLEEP_OK | INTR_OK, "t4temp");
5767         if (rc)
5768                 return (rc);
5769         param = V_FW_PARAMS_MNEM(FW_PARAMS_MNEM_DEV) |
5770             V_FW_PARAMS_PARAM_X(FW_PARAMS_PARAM_DEV_DIAG) |
5771             V_FW_PARAMS_PARAM_Y(FW_PARAM_DEV_DIAG_TMP);
5772         rc = -t4_query_params(sc, sc->mbox, sc->pf, 0, 1, &param, &val);
5773         end_synchronized_op(sc, 0);
5774         if (rc)
5775                 return (rc);
5776
5777         /* unknown is returned as 0 but we display -1 in that case */
5778         t = val == 0 ? -1 : val;
5779
5780         rc = sysctl_handle_int(oidp, &t, 0, req);
5781         return (rc);
5782 }
5783
5784 #ifdef SBUF_DRAIN
5785 static int
5786 sysctl_cctrl(SYSCTL_HANDLER_ARGS)
5787 {
5788         struct adapter *sc = arg1;
5789         struct sbuf *sb;
5790         int rc, i;
5791         uint16_t incr[NMTUS][NCCTRL_WIN];
5792         static const char *dec_fac[] = {
5793                 "0.5", "0.5625", "0.625", "0.6875", "0.75", "0.8125", "0.875",
5794                 "0.9375"
5795         };
5796
5797         rc = sysctl_wire_old_buffer(req, 0);
5798         if (rc != 0)
5799                 return (rc);
5800
5801         sb = sbuf_new_for_sysctl(NULL, NULL, 4096, req);
5802         if (sb == NULL)
5803                 return (ENOMEM);
5804
5805         t4_read_cong_tbl(sc, incr);
5806
5807         for (i = 0; i < NCCTRL_WIN; ++i) {
5808                 sbuf_printf(sb, "%2d: %4u %4u %4u %4u %4u %4u %4u %4u\n", i,
5809                     incr[0][i], incr[1][i], incr[2][i], incr[3][i], incr[4][i],
5810                     incr[5][i], incr[6][i], incr[7][i]);
5811                 sbuf_printf(sb, "%8u %4u %4u %4u %4u %4u %4u %4u %5u %s\n",
5812                     incr[8][i], incr[9][i], incr[10][i], incr[11][i],
5813                     incr[12][i], incr[13][i], incr[14][i], incr[15][i],
5814                     sc->params.a_wnd[i], dec_fac[sc->params.b_wnd[i]]);
5815         }
5816
5817         rc = sbuf_finish(sb);
5818         sbuf_delete(sb);
5819
5820         return (rc);
5821 }
5822
5823 static const char *qname[CIM_NUM_IBQ + CIM_NUM_OBQ_T5] = {
5824         "TP0", "TP1", "ULP", "SGE0", "SGE1", "NC-SI",   /* ibq's */
5825         "ULP0", "ULP1", "ULP2", "ULP3", "SGE", "NC-SI", /* obq's */
5826         "SGE0-RX", "SGE1-RX"    /* additional obq's (T5 onwards) */
5827 };
5828
5829 static int
5830 sysctl_cim_ibq_obq(SYSCTL_HANDLER_ARGS)
5831 {
5832         struct adapter *sc = arg1;
5833         struct sbuf *sb;
5834         int rc, i, n, qid = arg2;
5835         uint32_t *buf, *p;
5836         char *qtype;
5837         u_int cim_num_obq = sc->chip_params->cim_num_obq;
5838
5839         KASSERT(qid >= 0 && qid < CIM_NUM_IBQ + cim_num_obq,
5840             ("%s: bad qid %d\n", __func__, qid));
5841
5842         if (qid < CIM_NUM_IBQ) {
5843                 /* inbound queue */
5844                 qtype = "IBQ";
5845                 n = 4 * CIM_IBQ_SIZE;
5846                 buf = malloc(n * sizeof(uint32_t), M_CXGBE, M_ZERO | M_WAITOK);
5847                 rc = t4_read_cim_ibq(sc, qid, buf, n);
5848         } else {
5849                 /* outbound queue */
5850                 qtype = "OBQ";
5851                 qid -= CIM_NUM_IBQ;
5852                 n = 4 * cim_num_obq * CIM_OBQ_SIZE;
5853                 buf = malloc(n * sizeof(uint32_t), M_CXGBE, M_ZERO | M_WAITOK);
5854                 rc = t4_read_cim_obq(sc, qid, buf, n);
5855         }
5856
5857         if (rc < 0) {
5858                 rc = -rc;
5859                 goto done;
5860         }
5861         n = rc * sizeof(uint32_t);      /* rc has # of words actually read */
5862
5863         rc = sysctl_wire_old_buffer(req, 0);
5864         if (rc != 0)
5865                 goto done;
5866
5867         sb = sbuf_new_for_sysctl(NULL, NULL, PAGE_SIZE, req);
5868         if (sb == NULL) {
5869                 rc = ENOMEM;
5870                 goto done;
5871         }
5872
5873         sbuf_printf(sb, "%s%d %s", qtype , qid, qname[arg2]);
5874         for (i = 0, p = buf; i < n; i += 16, p += 4)
5875                 sbuf_printf(sb, "\n%#06x: %08x %08x %08x %08x", i, p[0], p[1],
5876                     p[2], p[3]);
5877
5878         rc = sbuf_finish(sb);
5879         sbuf_delete(sb);
5880 done:
5881         free(buf, M_CXGBE);
5882         return (rc);
5883 }
5884
5885 static int
5886 sysctl_cim_la(SYSCTL_HANDLER_ARGS)
5887 {
5888         struct adapter *sc = arg1;
5889         u_int cfg;
5890         struct sbuf *sb;
5891         uint32_t *buf, *p;
5892         int rc;
5893
5894         MPASS(chip_id(sc) <= CHELSIO_T5);
5895
5896         rc = -t4_cim_read(sc, A_UP_UP_DBG_LA_CFG, 1, &cfg);
5897         if (rc != 0)
5898                 return (rc);
5899
5900         rc = sysctl_wire_old_buffer(req, 0);
5901         if (rc != 0)
5902                 return (rc);
5903
5904         sb = sbuf_new_for_sysctl(NULL, NULL, 4096, req);
5905         if (sb == NULL)
5906                 return (ENOMEM);
5907
5908         buf = malloc(sc->params.cim_la_size * sizeof(uint32_t), M_CXGBE,
5909             M_ZERO | M_WAITOK);
5910
5911         rc = -t4_cim_read_la(sc, buf, NULL);
5912         if (rc != 0)
5913                 goto done;
5914
5915         sbuf_printf(sb, "Status   Data      PC%s",
5916             cfg & F_UPDBGLACAPTPCONLY ? "" :
5917             "     LS0Stat  LS0Addr             LS0Data");
5918
5919         for (p = buf; p <= &buf[sc->params.cim_la_size - 8]; p += 8) {
5920                 if (cfg & F_UPDBGLACAPTPCONLY) {
5921                         sbuf_printf(sb, "\n  %02x   %08x %08x", p[5] & 0xff,
5922                             p[6], p[7]);
5923                         sbuf_printf(sb, "\n  %02x   %02x%06x %02x%06x",
5924                             (p[3] >> 8) & 0xff, p[3] & 0xff, p[4] >> 8,
5925                             p[4] & 0xff, p[5] >> 8);
5926                         sbuf_printf(sb, "\n  %02x   %x%07x %x%07x",
5927                             (p[0] >> 4) & 0xff, p[0] & 0xf, p[1] >> 4,
5928                             p[1] & 0xf, p[2] >> 4);
5929                 } else {
5930                         sbuf_printf(sb,
5931                             "\n  %02x   %x%07x %x%07x %08x %08x "
5932                             "%08x%08x%08x%08x",
5933                             (p[0] >> 4) & 0xff, p[0] & 0xf, p[1] >> 4,
5934                             p[1] & 0xf, p[2] >> 4, p[2] & 0xf, p[3], p[4], p[5],
5935                             p[6], p[7]);
5936                 }
5937         }
5938
5939         rc = sbuf_finish(sb);
5940         sbuf_delete(sb);
5941 done:
5942         free(buf, M_CXGBE);
5943         return (rc);
5944 }
5945
5946 static int
5947 sysctl_cim_la_t6(SYSCTL_HANDLER_ARGS)
5948 {
5949         struct adapter *sc = arg1;
5950         u_int cfg;
5951         struct sbuf *sb;
5952         uint32_t *buf, *p;
5953         int rc;
5954
5955         MPASS(chip_id(sc) > CHELSIO_T5);
5956
5957         rc = -t4_cim_read(sc, A_UP_UP_DBG_LA_CFG, 1, &cfg);
5958         if (rc != 0)
5959                 return (rc);
5960
5961         rc = sysctl_wire_old_buffer(req, 0);
5962         if (rc != 0)
5963                 return (rc);
5964
5965         sb = sbuf_new_for_sysctl(NULL, NULL, 4096, req);
5966         if (sb == NULL)
5967                 return (ENOMEM);
5968
5969         buf = malloc(sc->params.cim_la_size * sizeof(uint32_t), M_CXGBE,
5970             M_ZERO | M_WAITOK);
5971
5972         rc = -t4_cim_read_la(sc, buf, NULL);
5973         if (rc != 0)
5974                 goto done;
5975
5976         sbuf_printf(sb, "Status   Inst    Data      PC%s",
5977             cfg & F_UPDBGLACAPTPCONLY ? "" :
5978             "     LS0Stat  LS0Addr  LS0Data  LS1Stat  LS1Addr  LS1Data");
5979
5980         for (p = buf; p <= &buf[sc->params.cim_la_size - 10]; p += 10) {
5981                 if (cfg & F_UPDBGLACAPTPCONLY) {
5982                         sbuf_printf(sb, "\n  %02x   %08x %08x %08x",
5983                             p[3] & 0xff, p[2], p[1], p[0]);
5984                         sbuf_printf(sb, "\n  %02x   %02x%06x %02x%06x %02x%06x",
5985                             (p[6] >> 8) & 0xff, p[6] & 0xff, p[5] >> 8,
5986                             p[5] & 0xff, p[4] >> 8, p[4] & 0xff, p[3] >> 8);
5987                         sbuf_printf(sb, "\n  %02x   %04x%04x %04x%04x %04x%04x",
5988                             (p[9] >> 16) & 0xff, p[9] & 0xffff, p[8] >> 16,
5989                             p[8] & 0xffff, p[7] >> 16, p[7] & 0xffff,
5990                             p[6] >> 16);
5991                 } else {
5992                         sbuf_printf(sb, "\n  %02x   %04x%04x %04x%04x %04x%04x "
5993                             "%08x %08x %08x %08x %08x %08x",
5994                             (p[9] >> 16) & 0xff,
5995                             p[9] & 0xffff, p[8] >> 16,
5996                             p[8] & 0xffff, p[7] >> 16,
5997                             p[7] & 0xffff, p[6] >> 16,
5998                             p[2], p[1], p[0], p[5], p[4], p[3]);
5999                 }
6000         }
6001
6002         rc = sbuf_finish(sb);
6003         sbuf_delete(sb);
6004 done:
6005         free(buf, M_CXGBE);
6006         return (rc);
6007 }
6008
6009 static int
6010 sysctl_cim_ma_la(SYSCTL_HANDLER_ARGS)
6011 {
6012         struct adapter *sc = arg1;
6013         u_int i;
6014         struct sbuf *sb;
6015         uint32_t *buf, *p;
6016         int rc;
6017
6018         rc = sysctl_wire_old_buffer(req, 0);
6019         if (rc != 0)
6020                 return (rc);
6021
6022         sb = sbuf_new_for_sysctl(NULL, NULL, 4096, req);
6023         if (sb == NULL)
6024                 return (ENOMEM);
6025
6026         buf = malloc(2 * CIM_MALA_SIZE * 5 * sizeof(uint32_t), M_CXGBE,
6027             M_ZERO | M_WAITOK);
6028
6029         t4_cim_read_ma_la(sc, buf, buf + 5 * CIM_MALA_SIZE);
6030         p = buf;
6031
6032         for (i = 0; i < CIM_MALA_SIZE; i++, p += 5) {
6033                 sbuf_printf(sb, "\n%02x%08x%08x%08x%08x", p[4], p[3], p[2],
6034                     p[1], p[0]);
6035         }
6036
6037         sbuf_printf(sb, "\n\nCnt ID Tag UE       Data       RDY VLD");
6038         for (i = 0; i < CIM_MALA_SIZE; i++, p += 5) {
6039                 sbuf_printf(sb, "\n%3u %2u  %x   %u %08x%08x  %u   %u",
6040                     (p[2] >> 10) & 0xff, (p[2] >> 7) & 7,
6041                     (p[2] >> 3) & 0xf, (p[2] >> 2) & 1,
6042                     (p[1] >> 2) | ((p[2] & 3) << 30),
6043                     (p[0] >> 2) | ((p[1] & 3) << 30), (p[0] >> 1) & 1,
6044                     p[0] & 1);
6045         }
6046
6047         rc = sbuf_finish(sb);
6048         sbuf_delete(sb);
6049         free(buf, M_CXGBE);
6050         return (rc);
6051 }
6052
6053 static int
6054 sysctl_cim_pif_la(SYSCTL_HANDLER_ARGS)
6055 {
6056         struct adapter *sc = arg1;
6057         u_int i;
6058         struct sbuf *sb;
6059         uint32_t *buf, *p;
6060         int rc;
6061
6062         rc = sysctl_wire_old_buffer(req, 0);
6063         if (rc != 0)
6064                 return (rc);
6065
6066         sb = sbuf_new_for_sysctl(NULL, NULL, 4096, req);
6067         if (sb == NULL)
6068                 return (ENOMEM);
6069
6070         buf = malloc(2 * CIM_PIFLA_SIZE * 6 * sizeof(uint32_t), M_CXGBE,
6071             M_ZERO | M_WAITOK);
6072
6073         t4_cim_read_pif_la(sc, buf, buf + 6 * CIM_PIFLA_SIZE, NULL, NULL);
6074         p = buf;
6075
6076         sbuf_printf(sb, "Cntl ID DataBE   Addr                 Data");
6077         for (i = 0; i < CIM_PIFLA_SIZE; i++, p += 6) {
6078                 sbuf_printf(sb, "\n %02x  %02x  %04x  %08x %08x%08x%08x%08x",
6079                     (p[5] >> 22) & 0xff, (p[5] >> 16) & 0x3f, p[5] & 0xffff,
6080                     p[4], p[3], p[2], p[1], p[0]);
6081         }
6082
6083         sbuf_printf(sb, "\n\nCntl ID               Data");
6084         for (i = 0; i < CIM_PIFLA_SIZE; i++, p += 6) {
6085                 sbuf_printf(sb, "\n %02x  %02x %08x%08x%08x%08x",
6086                     (p[4] >> 6) & 0xff, p[4] & 0x3f, p[3], p[2], p[1], p[0]);
6087         }
6088
6089         rc = sbuf_finish(sb);
6090         sbuf_delete(sb);
6091         free(buf, M_CXGBE);
6092         return (rc);
6093 }
6094
6095 static int
6096 sysctl_cim_qcfg(SYSCTL_HANDLER_ARGS)
6097 {
6098         struct adapter *sc = arg1;
6099         struct sbuf *sb;
6100         int rc, i;
6101         uint16_t base[CIM_NUM_IBQ + CIM_NUM_OBQ_T5];
6102         uint16_t size[CIM_NUM_IBQ + CIM_NUM_OBQ_T5];
6103         uint16_t thres[CIM_NUM_IBQ];
6104         uint32_t obq_wr[2 * CIM_NUM_OBQ_T5], *wr = obq_wr;
6105         uint32_t stat[4 * (CIM_NUM_IBQ + CIM_NUM_OBQ_T5)], *p = stat;
6106         u_int cim_num_obq, ibq_rdaddr, obq_rdaddr, nq;
6107
6108         cim_num_obq = sc->chip_params->cim_num_obq;
6109         if (is_t4(sc)) {
6110                 ibq_rdaddr = A_UP_IBQ_0_RDADDR;
6111                 obq_rdaddr = A_UP_OBQ_0_REALADDR;
6112         } else {
6113                 ibq_rdaddr = A_UP_IBQ_0_SHADOW_RDADDR;
6114                 obq_rdaddr = A_UP_OBQ_0_SHADOW_REALADDR;
6115         }
6116         nq = CIM_NUM_IBQ + cim_num_obq;
6117
6118         rc = -t4_cim_read(sc, ibq_rdaddr, 4 * nq, stat);
6119         if (rc == 0)
6120                 rc = -t4_cim_read(sc, obq_rdaddr, 2 * cim_num_obq, obq_wr);
6121         if (rc != 0)
6122                 return (rc);
6123
6124         t4_read_cimq_cfg(sc, base, size, thres);
6125
6126         rc = sysctl_wire_old_buffer(req, 0);
6127         if (rc != 0)
6128                 return (rc);
6129
6130         sb = sbuf_new_for_sysctl(NULL, NULL, PAGE_SIZE, req);
6131         if (sb == NULL)
6132                 return (ENOMEM);
6133
6134         sbuf_printf(sb,
6135             "  Queue  Base  Size Thres  RdPtr WrPtr  SOP  EOP Avail");
6136
6137         for (i = 0; i < CIM_NUM_IBQ; i++, p += 4)
6138                 sbuf_printf(sb, "\n%7s %5x %5u %5u %6x  %4x %4u %4u %5u",
6139                     qname[i], base[i], size[i], thres[i], G_IBQRDADDR(p[0]),
6140                     G_IBQWRADDR(p[1]), G_QUESOPCNT(p[3]), G_QUEEOPCNT(p[3]),
6141                     G_QUEREMFLITS(p[2]) * 16);
6142         for ( ; i < nq; i++, p += 4, wr += 2)
6143                 sbuf_printf(sb, "\n%7s %5x %5u %12x  %4x %4u %4u %5u", qname[i],
6144                     base[i], size[i], G_QUERDADDR(p[0]) & 0x3fff,
6145                     wr[0] - base[i], G_QUESOPCNT(p[3]), G_QUEEOPCNT(p[3]),
6146                     G_QUEREMFLITS(p[2]) * 16);
6147
6148         rc = sbuf_finish(sb);
6149         sbuf_delete(sb);
6150
6151         return (rc);
6152 }
6153
6154 static int
6155 sysctl_cpl_stats(SYSCTL_HANDLER_ARGS)
6156 {
6157         struct adapter *sc = arg1;
6158         struct sbuf *sb;
6159         int rc;
6160         struct tp_cpl_stats stats;
6161
6162         rc = sysctl_wire_old_buffer(req, 0);
6163         if (rc != 0)
6164                 return (rc);
6165
6166         sb = sbuf_new_for_sysctl(NULL, NULL, 256, req);
6167         if (sb == NULL)
6168                 return (ENOMEM);
6169
6170         mtx_lock(&sc->reg_lock);
6171         t4_tp_get_cpl_stats(sc, &stats);
6172         mtx_unlock(&sc->reg_lock);
6173
6174         if (sc->chip_params->nchan > 2) {
6175                 sbuf_printf(sb, "                 channel 0  channel 1"
6176                     "  channel 2  channel 3");
6177                 sbuf_printf(sb, "\nCPL requests:   %10u %10u %10u %10u",
6178                     stats.req[0], stats.req[1], stats.req[2], stats.req[3]);
6179                 sbuf_printf(sb, "\nCPL responses:   %10u %10u %10u %10u",
6180                     stats.rsp[0], stats.rsp[1], stats.rsp[2], stats.rsp[3]);
6181         } else {
6182                 sbuf_printf(sb, "                 channel 0  channel 1");
6183                 sbuf_printf(sb, "\nCPL requests:   %10u %10u",
6184                     stats.req[0], stats.req[1]);
6185                 sbuf_printf(sb, "\nCPL responses:   %10u %10u",
6186                     stats.rsp[0], stats.rsp[1]);
6187         }
6188
6189         rc = sbuf_finish(sb);
6190         sbuf_delete(sb);
6191
6192         return (rc);
6193 }
6194
6195 static int
6196 sysctl_ddp_stats(SYSCTL_HANDLER_ARGS)
6197 {
6198         struct adapter *sc = arg1;
6199         struct sbuf *sb;
6200         int rc;
6201         struct tp_usm_stats stats;
6202
6203         rc = sysctl_wire_old_buffer(req, 0);
6204         if (rc != 0)
6205                 return(rc);
6206
6207         sb = sbuf_new_for_sysctl(NULL, NULL, 256, req);
6208         if (sb == NULL)
6209                 return (ENOMEM);
6210
6211         t4_get_usm_stats(sc, &stats);
6212
6213         sbuf_printf(sb, "Frames: %u\n", stats.frames);
6214         sbuf_printf(sb, "Octets: %ju\n", stats.octets);
6215         sbuf_printf(sb, "Drops:  %u", stats.drops);
6216
6217         rc = sbuf_finish(sb);
6218         sbuf_delete(sb);
6219
6220         return (rc);
6221 }
6222
6223 static const char * const devlog_level_strings[] = {
6224         [FW_DEVLOG_LEVEL_EMERG]         = "EMERG",
6225         [FW_DEVLOG_LEVEL_CRIT]          = "CRIT",
6226         [FW_DEVLOG_LEVEL_ERR]           = "ERR",
6227         [FW_DEVLOG_LEVEL_NOTICE]        = "NOTICE",
6228         [FW_DEVLOG_LEVEL_INFO]          = "INFO",
6229         [FW_DEVLOG_LEVEL_DEBUG]         = "DEBUG"
6230 };
6231
6232 static const char * const devlog_facility_strings[] = {
6233         [FW_DEVLOG_FACILITY_CORE]       = "CORE",
6234         [FW_DEVLOG_FACILITY_CF]         = "CF",
6235         [FW_DEVLOG_FACILITY_SCHED]      = "SCHED",
6236         [FW_DEVLOG_FACILITY_TIMER]      = "TIMER",
6237         [FW_DEVLOG_FACILITY_RES]        = "RES",
6238         [FW_DEVLOG_FACILITY_HW]         = "HW",
6239         [FW_DEVLOG_FACILITY_FLR]        = "FLR",
6240         [FW_DEVLOG_FACILITY_DMAQ]       = "DMAQ",
6241         [FW_DEVLOG_FACILITY_PHY]        = "PHY",
6242         [FW_DEVLOG_FACILITY_MAC]        = "MAC",
6243         [FW_DEVLOG_FACILITY_PORT]       = "PORT",
6244         [FW_DEVLOG_FACILITY_VI]         = "VI",
6245         [FW_DEVLOG_FACILITY_FILTER]     = "FILTER",
6246         [FW_DEVLOG_FACILITY_ACL]        = "ACL",
6247         [FW_DEVLOG_FACILITY_TM]         = "TM",
6248         [FW_DEVLOG_FACILITY_QFC]        = "QFC",
6249         [FW_DEVLOG_FACILITY_DCB]        = "DCB",
6250         [FW_DEVLOG_FACILITY_ETH]        = "ETH",
6251         [FW_DEVLOG_FACILITY_OFLD]       = "OFLD",
6252         [FW_DEVLOG_FACILITY_RI]         = "RI",
6253         [FW_DEVLOG_FACILITY_ISCSI]      = "ISCSI",
6254         [FW_DEVLOG_FACILITY_FCOE]       = "FCOE",
6255         [FW_DEVLOG_FACILITY_FOISCSI]    = "FOISCSI",
6256         [FW_DEVLOG_FACILITY_FOFCOE]     = "FOFCOE",
6257         [FW_DEVLOG_FACILITY_CHNET]      = "CHNET",
6258 };
6259
6260 static int
6261 sysctl_devlog(SYSCTL_HANDLER_ARGS)
6262 {
6263         struct adapter *sc = arg1;
6264         struct devlog_params *dparams = &sc->params.devlog;
6265         struct fw_devlog_e *buf, *e;
6266         int i, j, rc, nentries, first = 0;
6267         struct sbuf *sb;
6268         uint64_t ftstamp = UINT64_MAX;
6269
6270         if (dparams->addr == 0)
6271                 return (ENXIO);
6272
6273         buf = malloc(dparams->size, M_CXGBE, M_NOWAIT);
6274         if (buf == NULL)
6275                 return (ENOMEM);
6276
6277         rc = read_via_memwin(sc, 1, dparams->addr, (void *)buf, dparams->size);
6278         if (rc != 0)
6279                 goto done;
6280
6281         nentries = dparams->size / sizeof(struct fw_devlog_e);
6282         for (i = 0; i < nentries; i++) {
6283                 e = &buf[i];
6284
6285                 if (e->timestamp == 0)
6286                         break;  /* end */
6287
6288                 e->timestamp = be64toh(e->timestamp);
6289                 e->seqno = be32toh(e->seqno);
6290                 for (j = 0; j < 8; j++)
6291                         e->params[j] = be32toh(e->params[j]);
6292
6293                 if (e->timestamp < ftstamp) {
6294                         ftstamp = e->timestamp;
6295                         first = i;
6296                 }
6297         }
6298
6299         if (buf[first].timestamp == 0)
6300                 goto done;      /* nothing in the log */
6301
6302         rc = sysctl_wire_old_buffer(req, 0);
6303         if (rc != 0)
6304                 goto done;
6305
6306         sb = sbuf_new_for_sysctl(NULL, NULL, 4096, req);
6307         if (sb == NULL) {
6308                 rc = ENOMEM;
6309                 goto done;
6310         }
6311         sbuf_printf(sb, "%10s  %15s  %8s  %8s  %s\n",
6312             "Seq#", "Tstamp", "Level", "Facility", "Message");
6313
6314         i = first;
6315         do {
6316                 e = &buf[i];
6317                 if (e->timestamp == 0)
6318                         break;  /* end */
6319
6320                 sbuf_printf(sb, "%10d  %15ju  %8s  %8s  ",
6321                     e->seqno, e->timestamp,
6322                     (e->level < nitems(devlog_level_strings) ?
6323                         devlog_level_strings[e->level] : "UNKNOWN"),
6324                     (e->facility < nitems(devlog_facility_strings) ?
6325                         devlog_facility_strings[e->facility] : "UNKNOWN"));
6326                 sbuf_printf(sb, e->fmt, e->params[0], e->params[1],
6327                     e->params[2], e->params[3], e->params[4],
6328                     e->params[5], e->params[6], e->params[7]);
6329
6330                 if (++i == nentries)
6331                         i = 0;
6332         } while (i != first);
6333
6334         rc = sbuf_finish(sb);
6335         sbuf_delete(sb);
6336 done:
6337         free(buf, M_CXGBE);
6338         return (rc);
6339 }
6340
6341 static int
6342 sysctl_fcoe_stats(SYSCTL_HANDLER_ARGS)
6343 {
6344         struct adapter *sc = arg1;
6345         struct sbuf *sb;
6346         int rc;
6347         struct tp_fcoe_stats stats[MAX_NCHAN];
6348         int i, nchan = sc->chip_params->nchan;
6349
6350         rc = sysctl_wire_old_buffer(req, 0);
6351         if (rc != 0)
6352                 return (rc);
6353
6354         sb = sbuf_new_for_sysctl(NULL, NULL, 256, req);
6355         if (sb == NULL)
6356                 return (ENOMEM);
6357
6358         for (i = 0; i < nchan; i++)
6359                 t4_get_fcoe_stats(sc, i, &stats[i]);
6360
6361         if (nchan > 2) {
6362                 sbuf_printf(sb, "                   channel 0        channel 1"
6363                     "        channel 2        channel 3");
6364                 sbuf_printf(sb, "\noctetsDDP:  %16ju %16ju %16ju %16ju",
6365                     stats[0].octets_ddp, stats[1].octets_ddp,
6366                     stats[2].octets_ddp, stats[3].octets_ddp);
6367                 sbuf_printf(sb, "\nframesDDP:  %16u %16u %16u %16u",
6368                     stats[0].frames_ddp, stats[1].frames_ddp,
6369                     stats[2].frames_ddp, stats[3].frames_ddp);
6370                 sbuf_printf(sb, "\nframesDrop: %16u %16u %16u %16u",
6371                     stats[0].frames_drop, stats[1].frames_drop,
6372                     stats[2].frames_drop, stats[3].frames_drop);
6373         } else {
6374                 sbuf_printf(sb, "                   channel 0        channel 1");
6375                 sbuf_printf(sb, "\noctetsDDP:  %16ju %16ju",
6376                     stats[0].octets_ddp, stats[1].octets_ddp);
6377                 sbuf_printf(sb, "\nframesDDP:  %16u %16u",
6378                     stats[0].frames_ddp, stats[1].frames_ddp);
6379                 sbuf_printf(sb, "\nframesDrop: %16u %16u",
6380                     stats[0].frames_drop, stats[1].frames_drop);
6381         }
6382
6383         rc = sbuf_finish(sb);
6384         sbuf_delete(sb);
6385
6386         return (rc);
6387 }
6388
6389 static int
6390 sysctl_hw_sched(SYSCTL_HANDLER_ARGS)
6391 {
6392         struct adapter *sc = arg1;
6393         struct sbuf *sb;
6394         int rc, i;
6395         unsigned int map, kbps, ipg, mode;
6396         unsigned int pace_tab[NTX_SCHED];
6397
6398         rc = sysctl_wire_old_buffer(req, 0);
6399         if (rc != 0)
6400                 return (rc);
6401
6402         sb = sbuf_new_for_sysctl(NULL, NULL, 256, req);
6403         if (sb == NULL)
6404                 return (ENOMEM);
6405
6406         map = t4_read_reg(sc, A_TP_TX_MOD_QUEUE_REQ_MAP);
6407         mode = G_TIMERMODE(t4_read_reg(sc, A_TP_MOD_CONFIG));
6408         t4_read_pace_tbl(sc, pace_tab);
6409
6410         sbuf_printf(sb, "Scheduler  Mode   Channel  Rate (Kbps)   "
6411             "Class IPG (0.1 ns)   Flow IPG (us)");
6412
6413         for (i = 0; i < NTX_SCHED; ++i, map >>= 2) {
6414                 t4_get_tx_sched(sc, i, &kbps, &ipg);
6415                 sbuf_printf(sb, "\n    %u      %-5s     %u     ", i,
6416                     (mode & (1 << i)) ? "flow" : "class", map & 3);
6417                 if (kbps)
6418                         sbuf_printf(sb, "%9u     ", kbps);
6419                 else
6420                         sbuf_printf(sb, " disabled     ");
6421
6422                 if (ipg)
6423                         sbuf_printf(sb, "%13u        ", ipg);
6424                 else
6425                         sbuf_printf(sb, "     disabled        ");
6426
6427                 if (pace_tab[i])
6428                         sbuf_printf(sb, "%10u", pace_tab[i]);
6429                 else
6430                         sbuf_printf(sb, "  disabled");
6431         }
6432
6433         rc = sbuf_finish(sb);
6434         sbuf_delete(sb);
6435
6436         return (rc);
6437 }
6438
6439 static int
6440 sysctl_lb_stats(SYSCTL_HANDLER_ARGS)
6441 {
6442         struct adapter *sc = arg1;
6443         struct sbuf *sb;
6444         int rc, i, j;
6445         uint64_t *p0, *p1;
6446         struct lb_port_stats s[2];
6447         static const char *stat_name[] = {
6448                 "OctetsOK:", "FramesOK:", "BcastFrames:", "McastFrames:",
6449                 "UcastFrames:", "ErrorFrames:", "Frames64:", "Frames65To127:",
6450                 "Frames128To255:", "Frames256To511:", "Frames512To1023:",
6451                 "Frames1024To1518:", "Frames1519ToMax:", "FramesDropped:",
6452                 "BG0FramesDropped:", "BG1FramesDropped:", "BG2FramesDropped:",
6453                 "BG3FramesDropped:", "BG0FramesTrunc:", "BG1FramesTrunc:",
6454                 "BG2FramesTrunc:", "BG3FramesTrunc:"
6455         };
6456
6457         rc = sysctl_wire_old_buffer(req, 0);
6458         if (rc != 0)
6459                 return (rc);
6460
6461         sb = sbuf_new_for_sysctl(NULL, NULL, 4096, req);
6462         if (sb == NULL)
6463                 return (ENOMEM);
6464
6465         memset(s, 0, sizeof(s));
6466
6467         for (i = 0; i < sc->chip_params->nchan; i += 2) {
6468                 t4_get_lb_stats(sc, i, &s[0]);
6469                 t4_get_lb_stats(sc, i + 1, &s[1]);
6470
6471                 p0 = &s[0].octets;
6472                 p1 = &s[1].octets;
6473                 sbuf_printf(sb, "%s                       Loopback %u"
6474                     "           Loopback %u", i == 0 ? "" : "\n", i, i + 1);
6475
6476                 for (j = 0; j < nitems(stat_name); j++)
6477                         sbuf_printf(sb, "\n%-17s %20ju %20ju", stat_name[j],
6478                                    *p0++, *p1++);
6479         }
6480
6481         rc = sbuf_finish(sb);
6482         sbuf_delete(sb);
6483
6484         return (rc);
6485 }
6486
6487 static int
6488 sysctl_linkdnrc(SYSCTL_HANDLER_ARGS)
6489 {
6490         int rc = 0;
6491         struct port_info *pi = arg1;
6492         struct sbuf *sb;
6493
6494         rc = sysctl_wire_old_buffer(req, 0);
6495         if (rc != 0)
6496                 return(rc);
6497         sb = sbuf_new_for_sysctl(NULL, NULL, 64, req);
6498         if (sb == NULL)
6499                 return (ENOMEM);
6500
6501         if (pi->linkdnrc < 0)
6502                 sbuf_printf(sb, "n/a");
6503         else
6504                 sbuf_printf(sb, "%s", t4_link_down_rc_str(pi->linkdnrc));
6505
6506         rc = sbuf_finish(sb);
6507         sbuf_delete(sb);
6508
6509         return (rc);
6510 }
6511
6512 struct mem_desc {
6513         unsigned int base;
6514         unsigned int limit;
6515         unsigned int idx;
6516 };
6517
6518 static int
6519 mem_desc_cmp(const void *a, const void *b)
6520 {
6521         return ((const struct mem_desc *)a)->base -
6522                ((const struct mem_desc *)b)->base;
6523 }
6524
6525 static void
6526 mem_region_show(struct sbuf *sb, const char *name, unsigned int from,
6527     unsigned int to)
6528 {
6529         unsigned int size;
6530
6531         if (from == to)
6532                 return;
6533
6534         size = to - from + 1;
6535         if (size == 0)
6536                 return;
6537
6538         /* XXX: need humanize_number(3) in libkern for a more readable 'size' */
6539         sbuf_printf(sb, "%-15s %#x-%#x [%u]\n", name, from, to, size);
6540 }
6541
6542 static int
6543 sysctl_meminfo(SYSCTL_HANDLER_ARGS)
6544 {
6545         struct adapter *sc = arg1;
6546         struct sbuf *sb;
6547         int rc, i, n;
6548         uint32_t lo, hi, used, alloc;
6549         static const char *memory[] = {"EDC0:", "EDC1:", "MC:", "MC0:", "MC1:"};
6550         static const char *region[] = {
6551                 "DBQ contexts:", "IMSG contexts:", "FLM cache:", "TCBs:",
6552                 "Pstructs:", "Timers:", "Rx FL:", "Tx FL:", "Pstruct FL:",
6553                 "Tx payload:", "Rx payload:", "LE hash:", "iSCSI region:",
6554                 "TDDP region:", "TPT region:", "STAG region:", "RQ region:",
6555                 "RQUDP region:", "PBL region:", "TXPBL region:",
6556                 "DBVFIFO region:", "ULPRX state:", "ULPTX state:",
6557                 "On-chip queues:"
6558         };
6559         struct mem_desc avail[4];
6560         struct mem_desc mem[nitems(region) + 3];        /* up to 3 holes */
6561         struct mem_desc *md = mem;
6562
6563         rc = sysctl_wire_old_buffer(req, 0);
6564         if (rc != 0)
6565                 return (rc);
6566
6567         sb = sbuf_new_for_sysctl(NULL, NULL, 4096, req);
6568         if (sb == NULL)
6569                 return (ENOMEM);
6570
6571         for (i = 0; i < nitems(mem); i++) {
6572                 mem[i].limit = 0;
6573                 mem[i].idx = i;
6574         }
6575
6576         /* Find and sort the populated memory ranges */
6577         i = 0;
6578         lo = t4_read_reg(sc, A_MA_TARGET_MEM_ENABLE);
6579         if (lo & F_EDRAM0_ENABLE) {
6580                 hi = t4_read_reg(sc, A_MA_EDRAM0_BAR);
6581                 avail[i].base = G_EDRAM0_BASE(hi) << 20;
6582                 avail[i].limit = avail[i].base + (G_EDRAM0_SIZE(hi) << 20);
6583                 avail[i].idx = 0;
6584                 i++;
6585         }
6586         if (lo & F_EDRAM1_ENABLE) {
6587                 hi = t4_read_reg(sc, A_MA_EDRAM1_BAR);
6588                 avail[i].base = G_EDRAM1_BASE(hi) << 20;
6589                 avail[i].limit = avail[i].base + (G_EDRAM1_SIZE(hi) << 20);
6590                 avail[i].idx = 1;
6591                 i++;
6592         }
6593         if (lo & F_EXT_MEM_ENABLE) {
6594                 hi = t4_read_reg(sc, A_MA_EXT_MEMORY_BAR);
6595                 avail[i].base = G_EXT_MEM_BASE(hi) << 20;
6596                 avail[i].limit = avail[i].base +
6597                     (G_EXT_MEM_SIZE(hi) << 20);
6598                 avail[i].idx = is_t5(sc) ? 3 : 2;       /* Call it MC0 for T5 */
6599                 i++;
6600         }
6601         if (is_t5(sc) && lo & F_EXT_MEM1_ENABLE) {
6602                 hi = t4_read_reg(sc, A_MA_EXT_MEMORY1_BAR);
6603                 avail[i].base = G_EXT_MEM1_BASE(hi) << 20;
6604                 avail[i].limit = avail[i].base +
6605                     (G_EXT_MEM1_SIZE(hi) << 20);
6606                 avail[i].idx = 4;
6607                 i++;
6608         }
6609         if (!i)                                    /* no memory available */
6610                 return 0;
6611         qsort(avail, i, sizeof(struct mem_desc), mem_desc_cmp);
6612
6613         (md++)->base = t4_read_reg(sc, A_SGE_DBQ_CTXT_BADDR);
6614         (md++)->base = t4_read_reg(sc, A_SGE_IMSG_CTXT_BADDR);
6615         (md++)->base = t4_read_reg(sc, A_SGE_FLM_CACHE_BADDR);
6616         (md++)->base = t4_read_reg(sc, A_TP_CMM_TCB_BASE);
6617         (md++)->base = t4_read_reg(sc, A_TP_CMM_MM_BASE);
6618         (md++)->base = t4_read_reg(sc, A_TP_CMM_TIMER_BASE);
6619         (md++)->base = t4_read_reg(sc, A_TP_CMM_MM_RX_FLST_BASE);
6620         (md++)->base = t4_read_reg(sc, A_TP_CMM_MM_TX_FLST_BASE);
6621         (md++)->base = t4_read_reg(sc, A_TP_CMM_MM_PS_FLST_BASE);
6622
6623         /* the next few have explicit upper bounds */
6624         md->base = t4_read_reg(sc, A_TP_PMM_TX_BASE);
6625         md->limit = md->base - 1 +
6626                     t4_read_reg(sc, A_TP_PMM_TX_PAGE_SIZE) *
6627                     G_PMTXMAXPAGE(t4_read_reg(sc, A_TP_PMM_TX_MAX_PAGE));
6628         md++;
6629
6630         md->base = t4_read_reg(sc, A_TP_PMM_RX_BASE);
6631         md->limit = md->base - 1 +
6632                     t4_read_reg(sc, A_TP_PMM_RX_PAGE_SIZE) *
6633                     G_PMRXMAXPAGE(t4_read_reg(sc, A_TP_PMM_RX_MAX_PAGE));
6634         md++;
6635
6636         if (t4_read_reg(sc, A_LE_DB_CONFIG) & F_HASHEN) {
6637                 if (chip_id(sc) <= CHELSIO_T5)
6638                         md->base = t4_read_reg(sc, A_LE_DB_HASH_TID_BASE);
6639                 else
6640                         md->base = t4_read_reg(sc, A_LE_DB_HASH_TBL_BASE_ADDR);
6641                 md->limit = 0;
6642         } else {
6643                 md->base = 0;
6644                 md->idx = nitems(region);  /* hide it */
6645         }
6646         md++;
6647
6648 #define ulp_region(reg) \
6649         md->base = t4_read_reg(sc, A_ULP_ ## reg ## _LLIMIT);\
6650         (md++)->limit = t4_read_reg(sc, A_ULP_ ## reg ## _ULIMIT)
6651
6652         ulp_region(RX_ISCSI);
6653         ulp_region(RX_TDDP);
6654         ulp_region(TX_TPT);
6655         ulp_region(RX_STAG);
6656         ulp_region(RX_RQ);
6657         ulp_region(RX_RQUDP);
6658         ulp_region(RX_PBL);
6659         ulp_region(TX_PBL);
6660 #undef ulp_region
6661
6662         md->base = 0;
6663         md->idx = nitems(region);
6664         if (!is_t4(sc)) {
6665                 uint32_t size = 0;
6666                 uint32_t sge_ctrl = t4_read_reg(sc, A_SGE_CONTROL2);
6667                 uint32_t fifo_size = t4_read_reg(sc, A_SGE_DBVFIFO_SIZE);
6668
6669                 if (is_t5(sc)) {
6670                         if (sge_ctrl & F_VFIFO_ENABLE)
6671                                 size = G_DBVFIFO_SIZE(fifo_size);
6672                 } else
6673                         size = G_T6_DBVFIFO_SIZE(fifo_size);
6674
6675                 if (size) {
6676                         md->base = G_BASEADDR(t4_read_reg(sc,
6677                             A_SGE_DBVFIFO_BADDR));
6678                         md->limit = md->base + (size << 2) - 1;
6679                 }
6680         }
6681         md++;
6682
6683         md->base = t4_read_reg(sc, A_ULP_RX_CTX_BASE);
6684         md->limit = 0;
6685         md++;
6686         md->base = t4_read_reg(sc, A_ULP_TX_ERR_TABLE_BASE);
6687         md->limit = 0;
6688         md++;
6689
6690         md->base = sc->vres.ocq.start;
6691         if (sc->vres.ocq.size)
6692                 md->limit = md->base + sc->vres.ocq.size - 1;
6693         else
6694                 md->idx = nitems(region);  /* hide it */
6695         md++;
6696
6697         /* add any address-space holes, there can be up to 3 */
6698         for (n = 0; n < i - 1; n++)
6699                 if (avail[n].limit < avail[n + 1].base)
6700                         (md++)->base = avail[n].limit;
6701         if (avail[n].limit)
6702                 (md++)->base = avail[n].limit;
6703
6704         n = md - mem;
6705         qsort(mem, n, sizeof(struct mem_desc), mem_desc_cmp);
6706
6707         for (lo = 0; lo < i; lo++)
6708                 mem_region_show(sb, memory[avail[lo].idx], avail[lo].base,
6709                                 avail[lo].limit - 1);
6710
6711         sbuf_printf(sb, "\n");
6712         for (i = 0; i < n; i++) {
6713                 if (mem[i].idx >= nitems(region))
6714                         continue;                        /* skip holes */
6715                 if (!mem[i].limit)
6716                         mem[i].limit = i < n - 1 ? mem[i + 1].base - 1 : ~0;
6717                 mem_region_show(sb, region[mem[i].idx], mem[i].base,
6718                                 mem[i].limit);
6719         }
6720
6721         sbuf_printf(sb, "\n");
6722         lo = t4_read_reg(sc, A_CIM_SDRAM_BASE_ADDR);
6723         hi = t4_read_reg(sc, A_CIM_SDRAM_ADDR_SIZE) + lo - 1;
6724         mem_region_show(sb, "uP RAM:", lo, hi);
6725
6726         lo = t4_read_reg(sc, A_CIM_EXTMEM2_BASE_ADDR);
6727         hi = t4_read_reg(sc, A_CIM_EXTMEM2_ADDR_SIZE) + lo - 1;
6728         mem_region_show(sb, "uP Extmem2:", lo, hi);
6729
6730         lo = t4_read_reg(sc, A_TP_PMM_RX_MAX_PAGE);
6731         sbuf_printf(sb, "\n%u Rx pages of size %uKiB for %u channels\n",
6732                    G_PMRXMAXPAGE(lo),
6733                    t4_read_reg(sc, A_TP_PMM_RX_PAGE_SIZE) >> 10,
6734                    (lo & F_PMRXNUMCHN) ? 2 : 1);
6735
6736         lo = t4_read_reg(sc, A_TP_PMM_TX_MAX_PAGE);
6737         hi = t4_read_reg(sc, A_TP_PMM_TX_PAGE_SIZE);
6738         sbuf_printf(sb, "%u Tx pages of size %u%ciB for %u channels\n",
6739                    G_PMTXMAXPAGE(lo),
6740                    hi >= (1 << 20) ? (hi >> 20) : (hi >> 10),
6741                    hi >= (1 << 20) ? 'M' : 'K', 1 << G_PMTXNUMCHN(lo));
6742         sbuf_printf(sb, "%u p-structs\n",
6743                    t4_read_reg(sc, A_TP_CMM_MM_MAX_PSTRUCT));
6744
6745         for (i = 0; i < 4; i++) {
6746                 if (chip_id(sc) > CHELSIO_T5)
6747                         lo = t4_read_reg(sc, A_MPS_RX_MAC_BG_PG_CNT0 + i * 4);
6748                 else
6749                         lo = t4_read_reg(sc, A_MPS_RX_PG_RSV0 + i * 4);
6750                 if (is_t5(sc)) {
6751                         used = G_T5_USED(lo);
6752                         alloc = G_T5_ALLOC(lo);
6753                 } else {
6754                         used = G_USED(lo);
6755                         alloc = G_ALLOC(lo);
6756                 }
6757                 /* For T6 these are MAC buffer groups */
6758                 sbuf_printf(sb, "\nPort %d using %u pages out of %u allocated",
6759                     i, used, alloc);
6760         }
6761         for (i = 0; i < sc->chip_params->nchan; i++) {
6762                 if (chip_id(sc) > CHELSIO_T5)
6763                         lo = t4_read_reg(sc, A_MPS_RX_LPBK_BG_PG_CNT0 + i * 4);
6764                 else
6765                         lo = t4_read_reg(sc, A_MPS_RX_PG_RSV4 + i * 4);
6766                 if (is_t5(sc)) {
6767                         used = G_T5_USED(lo);
6768                         alloc = G_T5_ALLOC(lo);
6769                 } else {
6770                         used = G_USED(lo);
6771                         alloc = G_ALLOC(lo);
6772                 }
6773                 /* For T6 these are MAC buffer groups */
6774                 sbuf_printf(sb,
6775                     "\nLoopback %d using %u pages out of %u allocated",
6776                     i, used, alloc);
6777         }
6778
6779         rc = sbuf_finish(sb);
6780         sbuf_delete(sb);
6781
6782         return (rc);
6783 }
6784
6785 static inline void
6786 tcamxy2valmask(uint64_t x, uint64_t y, uint8_t *addr, uint64_t *mask)
6787 {
6788         *mask = x | y;
6789         y = htobe64(y);
6790         memcpy(addr, (char *)&y + 2, ETHER_ADDR_LEN);
6791 }
6792
6793 static int
6794 sysctl_mps_tcam(SYSCTL_HANDLER_ARGS)
6795 {
6796         struct adapter *sc = arg1;
6797         struct sbuf *sb;
6798         int rc, i;
6799
6800         MPASS(chip_id(sc) <= CHELSIO_T5);
6801
6802         rc = sysctl_wire_old_buffer(req, 0);
6803         if (rc != 0)
6804                 return (rc);
6805
6806         sb = sbuf_new_for_sysctl(NULL, NULL, 4096, req);
6807         if (sb == NULL)
6808                 return (ENOMEM);
6809
6810         sbuf_printf(sb,
6811             "Idx  Ethernet address     Mask     Vld Ports PF"
6812             "  VF              Replication             P0 P1 P2 P3  ML");
6813         for (i = 0; i < sc->chip_params->mps_tcam_size; i++) {
6814                 uint64_t tcamx, tcamy, mask;
6815                 uint32_t cls_lo, cls_hi;
6816                 uint8_t addr[ETHER_ADDR_LEN];
6817
6818                 tcamy = t4_read_reg64(sc, MPS_CLS_TCAM_Y_L(i));
6819                 tcamx = t4_read_reg64(sc, MPS_CLS_TCAM_X_L(i));
6820                 if (tcamx & tcamy)
6821                         continue;
6822                 tcamxy2valmask(tcamx, tcamy, addr, &mask);
6823                 cls_lo = t4_read_reg(sc, MPS_CLS_SRAM_L(i));
6824                 cls_hi = t4_read_reg(sc, MPS_CLS_SRAM_H(i));
6825                 sbuf_printf(sb, "\n%3u %02x:%02x:%02x:%02x:%02x:%02x %012jx"
6826                            "  %c   %#x%4u%4d", i, addr[0], addr[1], addr[2],
6827                            addr[3], addr[4], addr[5], (uintmax_t)mask,
6828                            (cls_lo & F_SRAM_VLD) ? 'Y' : 'N',
6829                            G_PORTMAP(cls_hi), G_PF(cls_lo),
6830                            (cls_lo & F_VF_VALID) ? G_VF(cls_lo) : -1);
6831
6832                 if (cls_lo & F_REPLICATE) {
6833                         struct fw_ldst_cmd ldst_cmd;
6834
6835                         memset(&ldst_cmd, 0, sizeof(ldst_cmd));
6836                         ldst_cmd.op_to_addrspace =
6837                             htobe32(V_FW_CMD_OP(FW_LDST_CMD) |
6838                                 F_FW_CMD_REQUEST | F_FW_CMD_READ |
6839                                 V_FW_LDST_CMD_ADDRSPACE(FW_LDST_ADDRSPC_MPS));
6840                         ldst_cmd.cycles_to_len16 = htobe32(FW_LEN16(ldst_cmd));
6841                         ldst_cmd.u.mps.rplc.fid_idx =
6842                             htobe16(V_FW_LDST_CMD_FID(FW_LDST_MPS_RPLC) |
6843                                 V_FW_LDST_CMD_IDX(i));
6844
6845                         rc = begin_synchronized_op(sc, NULL, SLEEP_OK | INTR_OK,
6846                             "t4mps");
6847                         if (rc)
6848                                 break;
6849                         rc = -t4_wr_mbox(sc, sc->mbox, &ldst_cmd,
6850                             sizeof(ldst_cmd), &ldst_cmd);
6851                         end_synchronized_op(sc, 0);
6852
6853                         if (rc != 0) {
6854                                 sbuf_printf(sb, "%36d", rc);
6855                                 rc = 0;
6856                         } else {
6857                                 sbuf_printf(sb, " %08x %08x %08x %08x",
6858                                     be32toh(ldst_cmd.u.mps.rplc.rplc127_96),
6859                                     be32toh(ldst_cmd.u.mps.rplc.rplc95_64),
6860                                     be32toh(ldst_cmd.u.mps.rplc.rplc63_32),
6861                                     be32toh(ldst_cmd.u.mps.rplc.rplc31_0));
6862                         }
6863                 } else
6864                         sbuf_printf(sb, "%36s", "");
6865
6866                 sbuf_printf(sb, "%4u%3u%3u%3u %#3x", G_SRAM_PRIO0(cls_lo),
6867                     G_SRAM_PRIO1(cls_lo), G_SRAM_PRIO2(cls_lo),
6868                     G_SRAM_PRIO3(cls_lo), (cls_lo >> S_MULTILISTEN0) & 0xf);
6869         }
6870
6871         if (rc)
6872                 (void) sbuf_finish(sb);
6873         else
6874                 rc = sbuf_finish(sb);
6875         sbuf_delete(sb);
6876
6877         return (rc);
6878 }
6879
6880 static int
6881 sysctl_mps_tcam_t6(SYSCTL_HANDLER_ARGS)
6882 {
6883         struct adapter *sc = arg1;
6884         struct sbuf *sb;
6885         int rc, i;
6886
6887         MPASS(chip_id(sc) > CHELSIO_T5);
6888
6889         rc = sysctl_wire_old_buffer(req, 0);
6890         if (rc != 0)
6891                 return (rc);
6892
6893         sb = sbuf_new_for_sysctl(NULL, NULL, 4096, req);
6894         if (sb == NULL)
6895                 return (ENOMEM);
6896
6897         sbuf_printf(sb, "Idx  Ethernet address     Mask       VNI   Mask"
6898             "   IVLAN Vld DIP_Hit   Lookup  Port Vld Ports PF  VF"
6899             "                           Replication"
6900             "                                    P0 P1 P2 P3  ML\n");
6901
6902         for (i = 0; i < sc->chip_params->mps_tcam_size; i++) {
6903                 uint8_t dip_hit, vlan_vld, lookup_type, port_num;
6904                 uint16_t ivlan;
6905                 uint64_t tcamx, tcamy, val, mask;
6906                 uint32_t cls_lo, cls_hi, ctl, data2, vnix, vniy;
6907                 uint8_t addr[ETHER_ADDR_LEN];
6908
6909                 ctl = V_CTLREQID(1) | V_CTLCMDTYPE(0) | V_CTLXYBITSEL(0);
6910                 if (i < 256)
6911                         ctl |= V_CTLTCAMINDEX(i) | V_CTLTCAMSEL(0);
6912                 else
6913                         ctl |= V_CTLTCAMINDEX(i - 256) | V_CTLTCAMSEL(1);
6914                 t4_write_reg(sc, A_MPS_CLS_TCAM_DATA2_CTL, ctl);
6915                 val = t4_read_reg(sc, A_MPS_CLS_TCAM_RDATA1_REQ_ID1);
6916                 tcamy = G_DMACH(val) << 32;
6917                 tcamy |= t4_read_reg(sc, A_MPS_CLS_TCAM_RDATA0_REQ_ID1);
6918                 data2 = t4_read_reg(sc, A_MPS_CLS_TCAM_RDATA2_REQ_ID1);
6919                 lookup_type = G_DATALKPTYPE(data2);
6920                 port_num = G_DATAPORTNUM(data2);
6921                 if (lookup_type && lookup_type != M_DATALKPTYPE) {
6922                         /* Inner header VNI */
6923                         vniy = ((data2 & F_DATAVIDH2) << 23) |
6924                                        (G_DATAVIDH1(data2) << 16) | G_VIDL(val);
6925                         dip_hit = data2 & F_DATADIPHIT;
6926                         vlan_vld = 0;
6927                 } else {
6928                         vniy = 0;
6929                         dip_hit = 0;
6930                         vlan_vld = data2 & F_DATAVIDH2;
6931                         ivlan = G_VIDL(val);
6932                 }
6933
6934                 ctl |= V_CTLXYBITSEL(1);
6935                 t4_write_reg(sc, A_MPS_CLS_TCAM_DATA2_CTL, ctl);
6936                 val = t4_read_reg(sc, A_MPS_CLS_TCAM_RDATA1_REQ_ID1);
6937                 tcamx = G_DMACH(val) << 32;
6938                 tcamx |= t4_read_reg(sc, A_MPS_CLS_TCAM_RDATA0_REQ_ID1);
6939                 data2 = t4_read_reg(sc, A_MPS_CLS_TCAM_RDATA2_REQ_ID1);
6940                 if (lookup_type && lookup_type != M_DATALKPTYPE) {
6941                         /* Inner header VNI mask */
6942                         vnix = ((data2 & F_DATAVIDH2) << 23) |
6943                                (G_DATAVIDH1(data2) << 16) | G_VIDL(val);
6944                 } else
6945                         vnix = 0;
6946
6947                 if (tcamx & tcamy)
6948                         continue;
6949                 tcamxy2valmask(tcamx, tcamy, addr, &mask);
6950
6951                 cls_lo = t4_read_reg(sc, MPS_CLS_SRAM_L(i));
6952                 cls_hi = t4_read_reg(sc, MPS_CLS_SRAM_H(i));
6953
6954                 if (lookup_type && lookup_type != M_DATALKPTYPE) {
6955                         sbuf_printf(sb, "\n%3u %02x:%02x:%02x:%02x:%02x:%02x "
6956                             "%012jx %06x %06x    -    -   %3c"
6957                             "      'I'  %4x   %3c   %#x%4u%4d", i, addr[0],
6958                             addr[1], addr[2], addr[3], addr[4], addr[5],
6959                             (uintmax_t)mask, vniy, vnix, dip_hit ? 'Y' : 'N',
6960                             port_num, cls_lo & F_T6_SRAM_VLD ? 'Y' : 'N',
6961                             G_PORTMAP(cls_hi), G_T6_PF(cls_lo),
6962                             cls_lo & F_T6_VF_VALID ? G_T6_VF(cls_lo) : -1);
6963                 } else {
6964                         sbuf_printf(sb, "\n%3u %02x:%02x:%02x:%02x:%02x:%02x "
6965                             "%012jx    -       -   ", i, addr[0], addr[1],
6966                             addr[2], addr[3], addr[4], addr[5],
6967                             (uintmax_t)mask);
6968
6969                         if (vlan_vld)
6970                                 sbuf_printf(sb, "%4u   Y     ", ivlan);
6971                         else
6972                                 sbuf_printf(sb, "  -    N     ");
6973
6974                         sbuf_printf(sb, "-      %3c  %4x   %3c   %#x%4u%4d",
6975                             lookup_type ? 'I' : 'O', port_num,
6976                             cls_lo & F_T6_SRAM_VLD ? 'Y' : 'N',
6977                             G_PORTMAP(cls_hi), G_T6_PF(cls_lo),
6978                             cls_lo & F_T6_VF_VALID ? G_T6_VF(cls_lo) : -1);
6979                 }
6980
6981
6982                 if (cls_lo & F_T6_REPLICATE) {
6983                         struct fw_ldst_cmd ldst_cmd;
6984
6985                         memset(&ldst_cmd, 0, sizeof(ldst_cmd));
6986                         ldst_cmd.op_to_addrspace =
6987                             htobe32(V_FW_CMD_OP(FW_LDST_CMD) |
6988                                 F_FW_CMD_REQUEST | F_FW_CMD_READ |
6989                                 V_FW_LDST_CMD_ADDRSPACE(FW_LDST_ADDRSPC_MPS));
6990                         ldst_cmd.cycles_to_len16 = htobe32(FW_LEN16(ldst_cmd));
6991                         ldst_cmd.u.mps.rplc.fid_idx =
6992                             htobe16(V_FW_LDST_CMD_FID(FW_LDST_MPS_RPLC) |
6993                                 V_FW_LDST_CMD_IDX(i));
6994
6995                         rc = begin_synchronized_op(sc, NULL, SLEEP_OK | INTR_OK,
6996                             "t6mps");
6997                         if (rc)
6998                                 break;
6999                         rc = -t4_wr_mbox(sc, sc->mbox, &ldst_cmd,
7000                             sizeof(ldst_cmd), &ldst_cmd);
7001                         end_synchronized_op(sc, 0);
7002
7003                         if (rc != 0) {
7004                                 sbuf_printf(sb, "%72d", rc);
7005                                 rc = 0;
7006                         } else {
7007                                 sbuf_printf(sb, " %08x %08x %08x %08x"
7008                                     " %08x %08x %08x %08x",
7009                                     be32toh(ldst_cmd.u.mps.rplc.rplc255_224),
7010                                     be32toh(ldst_cmd.u.mps.rplc.rplc223_192),
7011                                     be32toh(ldst_cmd.u.mps.rplc.rplc191_160),
7012                                     be32toh(ldst_cmd.u.mps.rplc.rplc159_128),
7013                                     be32toh(ldst_cmd.u.mps.rplc.rplc127_96),
7014                                     be32toh(ldst_cmd.u.mps.rplc.rplc95_64),
7015                                     be32toh(ldst_cmd.u.mps.rplc.rplc63_32),
7016                                     be32toh(ldst_cmd.u.mps.rplc.rplc31_0));
7017                         }
7018                 } else
7019                         sbuf_printf(sb, "%72s", "");
7020
7021                 sbuf_printf(sb, "%4u%3u%3u%3u %#x",
7022                     G_T6_SRAM_PRIO0(cls_lo), G_T6_SRAM_PRIO1(cls_lo),
7023                     G_T6_SRAM_PRIO2(cls_lo), G_T6_SRAM_PRIO3(cls_lo),
7024                     (cls_lo >> S_T6_MULTILISTEN0) & 0xf);
7025         }
7026
7027         if (rc)
7028                 (void) sbuf_finish(sb);
7029         else
7030                 rc = sbuf_finish(sb);
7031         sbuf_delete(sb);
7032
7033         return (rc);
7034 }
7035
7036 static int
7037 sysctl_path_mtus(SYSCTL_HANDLER_ARGS)
7038 {
7039         struct adapter *sc = arg1;
7040         struct sbuf *sb;
7041         int rc;
7042         uint16_t mtus[NMTUS];
7043
7044         rc = sysctl_wire_old_buffer(req, 0);
7045         if (rc != 0)
7046                 return (rc);
7047
7048         sb = sbuf_new_for_sysctl(NULL, NULL, 256, req);
7049         if (sb == NULL)
7050                 return (ENOMEM);
7051
7052         t4_read_mtu_tbl(sc, mtus, NULL);
7053
7054         sbuf_printf(sb, "%u %u %u %u %u %u %u %u %u %u %u %u %u %u %u %u",
7055             mtus[0], mtus[1], mtus[2], mtus[3], mtus[4], mtus[5], mtus[6],
7056             mtus[7], mtus[8], mtus[9], mtus[10], mtus[11], mtus[12], mtus[13],
7057             mtus[14], mtus[15]);
7058
7059         rc = sbuf_finish(sb);
7060         sbuf_delete(sb);
7061
7062         return (rc);
7063 }
7064
7065 static int
7066 sysctl_pm_stats(SYSCTL_HANDLER_ARGS)
7067 {
7068         struct adapter *sc = arg1;
7069         struct sbuf *sb;
7070         int rc, i;
7071         uint32_t tx_cnt[MAX_PM_NSTATS], rx_cnt[MAX_PM_NSTATS];
7072         uint64_t tx_cyc[MAX_PM_NSTATS], rx_cyc[MAX_PM_NSTATS];
7073         static const char *tx_stats[MAX_PM_NSTATS] = {
7074                 "Read:", "Write bypass:", "Write mem:", "Bypass + mem:",
7075                 "Tx FIFO wait", NULL, "Tx latency"
7076         };
7077         static const char *rx_stats[MAX_PM_NSTATS] = {
7078                 "Read:", "Write bypass:", "Write mem:", "Flush:",
7079                 "Rx FIFO wait", NULL, "Rx latency"
7080         };
7081
7082         rc = sysctl_wire_old_buffer(req, 0);
7083         if (rc != 0)
7084                 return (rc);
7085
7086         sb = sbuf_new_for_sysctl(NULL, NULL, 256, req);
7087         if (sb == NULL)
7088                 return (ENOMEM);
7089
7090         t4_pmtx_get_stats(sc, tx_cnt, tx_cyc);
7091         t4_pmrx_get_stats(sc, rx_cnt, rx_cyc);
7092
7093         sbuf_printf(sb, "                Tx pcmds             Tx bytes");
7094         for (i = 0; i < 4; i++) {
7095                 sbuf_printf(sb, "\n%-13s %10u %20ju", tx_stats[i], tx_cnt[i],
7096                     tx_cyc[i]);
7097         }
7098
7099         sbuf_printf(sb, "\n                Rx pcmds             Rx bytes");
7100         for (i = 0; i < 4; i++) {
7101                 sbuf_printf(sb, "\n%-13s %10u %20ju", rx_stats[i], rx_cnt[i],
7102                     rx_cyc[i]);
7103         }
7104
7105         if (chip_id(sc) > CHELSIO_T5) {
7106                 sbuf_printf(sb,
7107                     "\n              Total wait      Total occupancy");
7108                 sbuf_printf(sb, "\n%-13s %10u %20ju", tx_stats[i], tx_cnt[i],
7109                     tx_cyc[i]);
7110                 sbuf_printf(sb, "\n%-13s %10u %20ju", rx_stats[i], rx_cnt[i],
7111                     rx_cyc[i]);
7112
7113                 i += 2;
7114                 MPASS(i < nitems(tx_stats));
7115
7116                 sbuf_printf(sb,
7117                     "\n                   Reads           Total wait");
7118                 sbuf_printf(sb, "\n%-13s %10u %20ju", tx_stats[i], tx_cnt[i],
7119                     tx_cyc[i]);
7120                 sbuf_printf(sb, "\n%-13s %10u %20ju", rx_stats[i], rx_cnt[i],
7121                     rx_cyc[i]);
7122         }
7123
7124         rc = sbuf_finish(sb);
7125         sbuf_delete(sb);
7126
7127         return (rc);
7128 }
7129
7130 static int
7131 sysctl_rdma_stats(SYSCTL_HANDLER_ARGS)
7132 {
7133         struct adapter *sc = arg1;
7134         struct sbuf *sb;
7135         int rc;
7136         struct tp_rdma_stats stats;
7137
7138         rc = sysctl_wire_old_buffer(req, 0);
7139         if (rc != 0)
7140                 return (rc);
7141
7142         sb = sbuf_new_for_sysctl(NULL, NULL, 256, req);
7143         if (sb == NULL)
7144                 return (ENOMEM);
7145
7146         mtx_lock(&sc->reg_lock);
7147         t4_tp_get_rdma_stats(sc, &stats);
7148         mtx_unlock(&sc->reg_lock);
7149
7150         sbuf_printf(sb, "NoRQEModDefferals: %u\n", stats.rqe_dfr_mod);
7151         sbuf_printf(sb, "NoRQEPktDefferals: %u", stats.rqe_dfr_pkt);
7152
7153         rc = sbuf_finish(sb);
7154         sbuf_delete(sb);
7155
7156         return (rc);
7157 }
7158
7159 static int
7160 sysctl_tcp_stats(SYSCTL_HANDLER_ARGS)
7161 {
7162         struct adapter *sc = arg1;
7163         struct sbuf *sb;
7164         int rc;
7165         struct tp_tcp_stats v4, v6;
7166
7167         rc = sysctl_wire_old_buffer(req, 0);
7168         if (rc != 0)
7169                 return (rc);
7170
7171         sb = sbuf_new_for_sysctl(NULL, NULL, 256, req);
7172         if (sb == NULL)
7173                 return (ENOMEM);
7174
7175         mtx_lock(&sc->reg_lock);
7176         t4_tp_get_tcp_stats(sc, &v4, &v6);
7177         mtx_unlock(&sc->reg_lock);
7178
7179         sbuf_printf(sb,
7180             "                                IP                 IPv6\n");
7181         sbuf_printf(sb, "OutRsts:      %20u %20u\n",
7182             v4.tcp_out_rsts, v6.tcp_out_rsts);
7183         sbuf_printf(sb, "InSegs:       %20ju %20ju\n",
7184             v4.tcp_in_segs, v6.tcp_in_segs);
7185         sbuf_printf(sb, "OutSegs:      %20ju %20ju\n",
7186             v4.tcp_out_segs, v6.tcp_out_segs);
7187         sbuf_printf(sb, "RetransSegs:  %20ju %20ju",
7188             v4.tcp_retrans_segs, v6.tcp_retrans_segs);
7189
7190         rc = sbuf_finish(sb);
7191         sbuf_delete(sb);
7192
7193         return (rc);
7194 }
7195
7196 static int
7197 sysctl_tids(SYSCTL_HANDLER_ARGS)
7198 {
7199         struct adapter *sc = arg1;
7200         struct sbuf *sb;
7201         int rc;
7202         struct tid_info *t = &sc->tids;
7203
7204         rc = sysctl_wire_old_buffer(req, 0);
7205         if (rc != 0)
7206                 return (rc);
7207
7208         sb = sbuf_new_for_sysctl(NULL, NULL, 256, req);
7209         if (sb == NULL)
7210                 return (ENOMEM);
7211
7212         if (t->natids) {
7213                 sbuf_printf(sb, "ATID range: 0-%u, in use: %u\n", t->natids - 1,
7214                     t->atids_in_use);
7215         }
7216
7217         if (t->ntids) {
7218                 if (t4_read_reg(sc, A_LE_DB_CONFIG) & F_HASHEN) {
7219                         uint32_t b;
7220
7221                         if (chip_id(sc) <= CHELSIO_T5)
7222                                 b = t4_read_reg(sc, A_LE_DB_SERVER_INDEX) / 4;
7223                         else
7224                                 b = t4_read_reg(sc, A_LE_DB_SRVR_START_INDEX);
7225
7226                         if (b) {
7227                                 sbuf_printf(sb, "TID range: 0-%u, %u-%u", b - 1,
7228                                     t4_read_reg(sc, A_LE_DB_TID_HASHBASE) / 4,
7229                                     t->ntids - 1);
7230                         } else {
7231                                 sbuf_printf(sb, "TID range: %u-%u",
7232                                     t4_read_reg(sc, A_LE_DB_TID_HASHBASE) / 4,
7233                                     t->ntids - 1);
7234                         }
7235                 } else
7236                         sbuf_printf(sb, "TID range: 0-%u", t->ntids - 1);
7237                 sbuf_printf(sb, ", in use: %u\n",
7238                     atomic_load_acq_int(&t->tids_in_use));
7239         }
7240
7241         if (t->nstids) {
7242                 sbuf_printf(sb, "STID range: %u-%u, in use: %u\n", t->stid_base,
7243                     t->stid_base + t->nstids - 1, t->stids_in_use);
7244         }
7245
7246         if (t->nftids) {
7247                 sbuf_printf(sb, "FTID range: %u-%u\n", t->ftid_base,
7248                     t->ftid_base + t->nftids - 1);
7249         }
7250
7251         if (t->netids) {
7252                 sbuf_printf(sb, "ETID range: %u-%u\n", t->etid_base,
7253                     t->etid_base + t->netids - 1);
7254         }
7255
7256         sbuf_printf(sb, "HW TID usage: %u IP users, %u IPv6 users",
7257             t4_read_reg(sc, A_LE_DB_ACT_CNT_IPV4),
7258             t4_read_reg(sc, A_LE_DB_ACT_CNT_IPV6));
7259
7260         rc = sbuf_finish(sb);
7261         sbuf_delete(sb);
7262
7263         return (rc);
7264 }
7265
7266 static int
7267 sysctl_tp_err_stats(SYSCTL_HANDLER_ARGS)
7268 {
7269         struct adapter *sc = arg1;
7270         struct sbuf *sb;
7271         int rc;
7272         struct tp_err_stats stats;
7273
7274         rc = sysctl_wire_old_buffer(req, 0);
7275         if (rc != 0)
7276                 return (rc);
7277
7278         sb = sbuf_new_for_sysctl(NULL, NULL, 256, req);
7279         if (sb == NULL)
7280                 return (ENOMEM);
7281
7282         mtx_lock(&sc->reg_lock);
7283         t4_tp_get_err_stats(sc, &stats);
7284         mtx_unlock(&sc->reg_lock);
7285
7286         if (sc->chip_params->nchan > 2) {
7287                 sbuf_printf(sb, "                 channel 0  channel 1"
7288                     "  channel 2  channel 3\n");
7289                 sbuf_printf(sb, "macInErrs:      %10u %10u %10u %10u\n",
7290                     stats.mac_in_errs[0], stats.mac_in_errs[1],
7291                     stats.mac_in_errs[2], stats.mac_in_errs[3]);
7292                 sbuf_printf(sb, "hdrInErrs:      %10u %10u %10u %10u\n",
7293                     stats.hdr_in_errs[0], stats.hdr_in_errs[1],
7294                     stats.hdr_in_errs[2], stats.hdr_in_errs[3]);
7295                 sbuf_printf(sb, "tcpInErrs:      %10u %10u %10u %10u\n",
7296                     stats.tcp_in_errs[0], stats.tcp_in_errs[1],
7297                     stats.tcp_in_errs[2], stats.tcp_in_errs[3]);
7298                 sbuf_printf(sb, "tcp6InErrs:     %10u %10u %10u %10u\n",
7299                     stats.tcp6_in_errs[0], stats.tcp6_in_errs[1],
7300                     stats.tcp6_in_errs[2], stats.tcp6_in_errs[3]);
7301                 sbuf_printf(sb, "tnlCongDrops:   %10u %10u %10u %10u\n",
7302                     stats.tnl_cong_drops[0], stats.tnl_cong_drops[1],
7303                     stats.tnl_cong_drops[2], stats.tnl_cong_drops[3]);
7304                 sbuf_printf(sb, "tnlTxDrops:     %10u %10u %10u %10u\n",
7305                     stats.tnl_tx_drops[0], stats.tnl_tx_drops[1],
7306                     stats.tnl_tx_drops[2], stats.tnl_tx_drops[3]);
7307                 sbuf_printf(sb, "ofldVlanDrops:  %10u %10u %10u %10u\n",
7308                     stats.ofld_vlan_drops[0], stats.ofld_vlan_drops[1],
7309                     stats.ofld_vlan_drops[2], stats.ofld_vlan_drops[3]);
7310                 sbuf_printf(sb, "ofldChanDrops:  %10u %10u %10u %10u\n\n",
7311                     stats.ofld_chan_drops[0], stats.ofld_chan_drops[1],
7312                     stats.ofld_chan_drops[2], stats.ofld_chan_drops[3]);
7313         } else {
7314                 sbuf_printf(sb, "                 channel 0  channel 1\n");
7315                 sbuf_printf(sb, "macInErrs:      %10u %10u\n",
7316                     stats.mac_in_errs[0], stats.mac_in_errs[1]);
7317                 sbuf_printf(sb, "hdrInErrs:      %10u %10u\n",
7318                     stats.hdr_in_errs[0], stats.hdr_in_errs[1]);
7319                 sbuf_printf(sb, "tcpInErrs:      %10u %10u\n",
7320                     stats.tcp_in_errs[0], stats.tcp_in_errs[1]);
7321                 sbuf_printf(sb, "tcp6InErrs:     %10u %10u\n",
7322                     stats.tcp6_in_errs[0], stats.tcp6_in_errs[1]);
7323                 sbuf_printf(sb, "tnlCongDrops:   %10u %10u\n",
7324                     stats.tnl_cong_drops[0], stats.tnl_cong_drops[1]);
7325                 sbuf_printf(sb, "tnlTxDrops:     %10u %10u\n",
7326                     stats.tnl_tx_drops[0], stats.tnl_tx_drops[1]);
7327                 sbuf_printf(sb, "ofldVlanDrops:  %10u %10u\n",
7328                     stats.ofld_vlan_drops[0], stats.ofld_vlan_drops[1]);
7329                 sbuf_printf(sb, "ofldChanDrops:  %10u %10u\n\n",
7330                     stats.ofld_chan_drops[0], stats.ofld_chan_drops[1]);
7331         }
7332
7333         sbuf_printf(sb, "ofldNoNeigh:    %u\nofldCongDefer:  %u",
7334             stats.ofld_no_neigh, stats.ofld_cong_defer);
7335
7336         rc = sbuf_finish(sb);
7337         sbuf_delete(sb);
7338
7339         return (rc);
7340 }
7341
7342 static int
7343 sysctl_tp_la_mask(SYSCTL_HANDLER_ARGS)
7344 {
7345         struct adapter *sc = arg1;
7346         struct tp_params *tpp = &sc->params.tp;
7347         u_int mask;
7348         int rc;
7349
7350         mask = tpp->la_mask >> 16;
7351         rc = sysctl_handle_int(oidp, &mask, 0, req);
7352         if (rc != 0 || req->newptr == NULL)
7353                 return (rc);
7354         if (mask > 0xffff)
7355                 return (EINVAL);
7356         tpp->la_mask = mask << 16;
7357         t4_set_reg_field(sc, A_TP_DBG_LA_CONFIG, 0xffff0000U, tpp->la_mask);
7358
7359         return (0);
7360 }
7361
7362 struct field_desc {
7363         const char *name;
7364         u_int start;
7365         u_int width;
7366 };
7367
7368 static void
7369 field_desc_show(struct sbuf *sb, uint64_t v, const struct field_desc *f)
7370 {
7371         char buf[32];
7372         int line_size = 0;
7373
7374         while (f->name) {
7375                 uint64_t mask = (1ULL << f->width) - 1;
7376                 int len = snprintf(buf, sizeof(buf), "%s: %ju", f->name,
7377                     ((uintmax_t)v >> f->start) & mask);
7378
7379                 if (line_size + len >= 79) {
7380                         line_size = 8;
7381                         sbuf_printf(sb, "\n        ");
7382                 }
7383                 sbuf_printf(sb, "%s ", buf);
7384                 line_size += len + 1;
7385                 f++;
7386         }
7387         sbuf_printf(sb, "\n");
7388 }
7389
7390 static const struct field_desc tp_la0[] = {
7391         { "RcfOpCodeOut", 60, 4 },
7392         { "State", 56, 4 },
7393         { "WcfState", 52, 4 },
7394         { "RcfOpcSrcOut", 50, 2 },
7395         { "CRxError", 49, 1 },
7396         { "ERxError", 48, 1 },
7397         { "SanityFailed", 47, 1 },
7398         { "SpuriousMsg", 46, 1 },
7399         { "FlushInputMsg", 45, 1 },
7400         { "FlushInputCpl", 44, 1 },
7401         { "RssUpBit", 43, 1 },
7402         { "RssFilterHit", 42, 1 },
7403         { "Tid", 32, 10 },
7404         { "InitTcb", 31, 1 },
7405         { "LineNumber", 24, 7 },
7406         { "Emsg", 23, 1 },
7407         { "EdataOut", 22, 1 },
7408         { "Cmsg", 21, 1 },
7409         { "CdataOut", 20, 1 },
7410         { "EreadPdu", 19, 1 },
7411         { "CreadPdu", 18, 1 },
7412         { "TunnelPkt", 17, 1 },
7413         { "RcfPeerFin", 16, 1 },
7414         { "RcfReasonOut", 12, 4 },
7415         { "TxCchannel", 10, 2 },
7416         { "RcfTxChannel", 8, 2 },
7417         { "RxEchannel", 6, 2 },
7418         { "RcfRxChannel", 5, 1 },
7419         { "RcfDataOutSrdy", 4, 1 },
7420         { "RxDvld", 3, 1 },
7421         { "RxOoDvld", 2, 1 },
7422         { "RxCongestion", 1, 1 },
7423         { "TxCongestion", 0, 1 },
7424         { NULL }
7425 };
7426
7427 static const struct field_desc tp_la1[] = {
7428         { "CplCmdIn", 56, 8 },
7429         { "CplCmdOut", 48, 8 },
7430         { "ESynOut", 47, 1 },
7431         { "EAckOut", 46, 1 },
7432         { "EFinOut", 45, 1 },
7433         { "ERstOut", 44, 1 },
7434         { "SynIn", 43, 1 },
7435         { "AckIn", 42, 1 },
7436         { "FinIn", 41, 1 },
7437         { "RstIn", 40, 1 },
7438         { "DataIn", 39, 1 },
7439         { "DataInVld", 38, 1 },
7440         { "PadIn", 37, 1 },
7441         { "RxBufEmpty", 36, 1 },
7442         { "RxDdp", 35, 1 },
7443         { "RxFbCongestion", 34, 1 },
7444         { "TxFbCongestion", 33, 1 },
7445         { "TxPktSumSrdy", 32, 1 },
7446         { "RcfUlpType", 28, 4 },
7447         { "Eread", 27, 1 },
7448         { "Ebypass", 26, 1 },
7449         { "Esave", 25, 1 },
7450         { "Static0", 24, 1 },
7451         { "Cread", 23, 1 },
7452         { "Cbypass", 22, 1 },
7453         { "Csave", 21, 1 },
7454         { "CPktOut", 20, 1 },
7455         { "RxPagePoolFull", 18, 2 },
7456         { "RxLpbkPkt", 17, 1 },
7457         { "TxLpbkPkt", 16, 1 },
7458         { "RxVfValid", 15, 1 },
7459         { "SynLearned", 14, 1 },
7460         { "SetDelEntry", 13, 1 },
7461         { "SetInvEntry", 12, 1 },
7462         { "CpcmdDvld", 11, 1 },
7463         { "CpcmdSave", 10, 1 },
7464         { "RxPstructsFull", 8, 2 },
7465         { "EpcmdDvld", 7, 1 },
7466         { "EpcmdFlush", 6, 1 },
7467         { "EpcmdTrimPrefix", 5, 1 },
7468         { "EpcmdTrimPostfix", 4, 1 },
7469         { "ERssIp4Pkt", 3, 1 },
7470         { "ERssIp6Pkt", 2, 1 },
7471         { "ERssTcpUdpPkt", 1, 1 },
7472         { "ERssFceFipPkt", 0, 1 },
7473         { NULL }
7474 };
7475
7476 static const struct field_desc tp_la2[] = {
7477         { "CplCmdIn", 56, 8 },
7478         { "MpsVfVld", 55, 1 },
7479         { "MpsPf", 52, 3 },
7480         { "MpsVf", 44, 8 },
7481         { "SynIn", 43, 1 },
7482         { "AckIn", 42, 1 },
7483         { "FinIn", 41, 1 },
7484         { "RstIn", 40, 1 },
7485         { "DataIn", 39, 1 },
7486         { "DataInVld", 38, 1 },
7487         { "PadIn", 37, 1 },
7488         { "RxBufEmpty", 36, 1 },
7489         { "RxDdp", 35, 1 },
7490         { "RxFbCongestion", 34, 1 },
7491         { "TxFbCongestion", 33, 1 },
7492         { "TxPktSumSrdy", 32, 1 },
7493         { "RcfUlpType", 28, 4 },
7494         { "Eread", 27, 1 },
7495         { "Ebypass", 26, 1 },
7496         { "Esave", 25, 1 },
7497         { "Static0", 24, 1 },
7498         { "Cread", 23, 1 },
7499         { "Cbypass", 22, 1 },
7500         { "Csave", 21, 1 },
7501         { "CPktOut", 20, 1 },
7502         { "RxPagePoolFull", 18, 2 },
7503         { "RxLpbkPkt", 17, 1 },
7504         { "TxLpbkPkt", 16, 1 },
7505         { "RxVfValid", 15, 1 },
7506         { "SynLearned", 14, 1 },
7507         { "SetDelEntry", 13, 1 },
7508         { "SetInvEntry", 12, 1 },
7509         { "CpcmdDvld", 11, 1 },
7510         { "CpcmdSave", 10, 1 },
7511         { "RxPstructsFull", 8, 2 },
7512         { "EpcmdDvld", 7, 1 },
7513         { "EpcmdFlush", 6, 1 },
7514         { "EpcmdTrimPrefix", 5, 1 },
7515         { "EpcmdTrimPostfix", 4, 1 },
7516         { "ERssIp4Pkt", 3, 1 },
7517         { "ERssIp6Pkt", 2, 1 },
7518         { "ERssTcpUdpPkt", 1, 1 },
7519         { "ERssFceFipPkt", 0, 1 },
7520         { NULL }
7521 };
7522
7523 static void
7524 tp_la_show(struct sbuf *sb, uint64_t *p, int idx)
7525 {
7526
7527         field_desc_show(sb, *p, tp_la0);
7528 }
7529
7530 static void
7531 tp_la_show2(struct sbuf *sb, uint64_t *p, int idx)
7532 {
7533
7534         if (idx)
7535                 sbuf_printf(sb, "\n");
7536         field_desc_show(sb, p[0], tp_la0);
7537         if (idx < (TPLA_SIZE / 2 - 1) || p[1] != ~0ULL)
7538                 field_desc_show(sb, p[1], tp_la0);
7539 }
7540
7541 static void
7542 tp_la_show3(struct sbuf *sb, uint64_t *p, int idx)
7543 {
7544
7545         if (idx)
7546                 sbuf_printf(sb, "\n");
7547         field_desc_show(sb, p[0], tp_la0);
7548         if (idx < (TPLA_SIZE / 2 - 1) || p[1] != ~0ULL)
7549                 field_desc_show(sb, p[1], (p[0] & (1 << 17)) ? tp_la2 : tp_la1);
7550 }
7551
7552 static int
7553 sysctl_tp_la(SYSCTL_HANDLER_ARGS)
7554 {
7555         struct adapter *sc = arg1;
7556         struct sbuf *sb;
7557         uint64_t *buf, *p;
7558         int rc;
7559         u_int i, inc;
7560         void (*show_func)(struct sbuf *, uint64_t *, int);
7561
7562         rc = sysctl_wire_old_buffer(req, 0);
7563         if (rc != 0)
7564                 return (rc);
7565
7566         sb = sbuf_new_for_sysctl(NULL, NULL, 4096, req);
7567         if (sb == NULL)
7568                 return (ENOMEM);
7569
7570         buf = malloc(TPLA_SIZE * sizeof(uint64_t), M_CXGBE, M_ZERO | M_WAITOK);
7571
7572         t4_tp_read_la(sc, buf, NULL);
7573         p = buf;
7574
7575         switch (G_DBGLAMODE(t4_read_reg(sc, A_TP_DBG_LA_CONFIG))) {
7576         case 2:
7577                 inc = 2;
7578                 show_func = tp_la_show2;
7579                 break;
7580         case 3:
7581                 inc = 2;
7582                 show_func = tp_la_show3;
7583                 break;
7584         default:
7585                 inc = 1;
7586                 show_func = tp_la_show;
7587         }
7588
7589         for (i = 0; i < TPLA_SIZE / inc; i++, p += inc)
7590                 (*show_func)(sb, p, i);
7591
7592         rc = sbuf_finish(sb);
7593         sbuf_delete(sb);
7594         free(buf, M_CXGBE);
7595         return (rc);
7596 }
7597
7598 static int
7599 sysctl_tx_rate(SYSCTL_HANDLER_ARGS)
7600 {
7601         struct adapter *sc = arg1;
7602         struct sbuf *sb;
7603         int rc;
7604         u64 nrate[MAX_NCHAN], orate[MAX_NCHAN];
7605
7606         rc = sysctl_wire_old_buffer(req, 0);
7607         if (rc != 0)
7608                 return (rc);
7609
7610         sb = sbuf_new_for_sysctl(NULL, NULL, 256, req);
7611         if (sb == NULL)
7612                 return (ENOMEM);
7613
7614         t4_get_chan_txrate(sc, nrate, orate);
7615
7616         if (sc->chip_params->nchan > 2) {
7617                 sbuf_printf(sb, "              channel 0   channel 1"
7618                     "   channel 2   channel 3\n");
7619                 sbuf_printf(sb, "NIC B/s:     %10ju  %10ju  %10ju  %10ju\n",
7620                     nrate[0], nrate[1], nrate[2], nrate[3]);
7621                 sbuf_printf(sb, "Offload B/s: %10ju  %10ju  %10ju  %10ju",
7622                     orate[0], orate[1], orate[2], orate[3]);
7623         } else {
7624                 sbuf_printf(sb, "              channel 0   channel 1\n");
7625                 sbuf_printf(sb, "NIC B/s:     %10ju  %10ju\n",
7626                     nrate[0], nrate[1]);
7627                 sbuf_printf(sb, "Offload B/s: %10ju  %10ju",
7628                     orate[0], orate[1]);
7629         }
7630
7631         rc = sbuf_finish(sb);
7632         sbuf_delete(sb);
7633
7634         return (rc);
7635 }
7636
7637 static int
7638 sysctl_ulprx_la(SYSCTL_HANDLER_ARGS)
7639 {
7640         struct adapter *sc = arg1;
7641         struct sbuf *sb;
7642         uint32_t *buf, *p;
7643         int rc, i;
7644
7645         rc = sysctl_wire_old_buffer(req, 0);
7646         if (rc != 0)
7647                 return (rc);
7648
7649         sb = sbuf_new_for_sysctl(NULL, NULL, 4096, req);
7650         if (sb == NULL)
7651                 return (ENOMEM);
7652
7653         buf = malloc(ULPRX_LA_SIZE * 8 * sizeof(uint32_t), M_CXGBE,
7654             M_ZERO | M_WAITOK);
7655
7656         t4_ulprx_read_la(sc, buf);
7657         p = buf;
7658
7659         sbuf_printf(sb, "      Pcmd        Type   Message"
7660             "                Data");
7661         for (i = 0; i < ULPRX_LA_SIZE; i++, p += 8) {
7662                 sbuf_printf(sb, "\n%08x%08x  %4x  %08x  %08x%08x%08x%08x",
7663                     p[1], p[0], p[2], p[3], p[7], p[6], p[5], p[4]);
7664         }
7665
7666         rc = sbuf_finish(sb);
7667         sbuf_delete(sb);
7668         free(buf, M_CXGBE);
7669         return (rc);
7670 }
7671
7672 static int
7673 sysctl_wcwr_stats(SYSCTL_HANDLER_ARGS)
7674 {
7675         struct adapter *sc = arg1;
7676         struct sbuf *sb;
7677         int rc, v;
7678
7679         MPASS(chip_id(sc) >= CHELSIO_T5);
7680
7681         rc = sysctl_wire_old_buffer(req, 0);
7682         if (rc != 0)
7683                 return (rc);
7684
7685         sb = sbuf_new_for_sysctl(NULL, NULL, 4096, req);
7686         if (sb == NULL)
7687                 return (ENOMEM);
7688
7689         v = t4_read_reg(sc, A_SGE_STAT_CFG);
7690         if (G_STATSOURCE_T5(v) == 7) {
7691                 int mode;
7692
7693                 mode = is_t5(sc) ? G_STATMODE(v) : G_T6_STATMODE(v);
7694                 if (mode == 0) {
7695                         sbuf_printf(sb, "total %d, incomplete %d",
7696                             t4_read_reg(sc, A_SGE_STAT_TOTAL),
7697                             t4_read_reg(sc, A_SGE_STAT_MATCH));
7698                 } else if (mode == 1) {
7699                         sbuf_printf(sb, "total %d, data overflow %d",
7700                             t4_read_reg(sc, A_SGE_STAT_TOTAL),
7701                             t4_read_reg(sc, A_SGE_STAT_MATCH));
7702                 } else {
7703                         sbuf_printf(sb, "unknown mode %d", mode);
7704                 }
7705         }
7706         rc = sbuf_finish(sb);
7707         sbuf_delete(sb);
7708
7709         return (rc);
7710 }
7711
7712 static int
7713 sysctl_tc_params(SYSCTL_HANDLER_ARGS)
7714 {
7715         struct adapter *sc = arg1;
7716         struct tx_sched_class *tc;
7717         struct t4_sched_class_params p;
7718         struct sbuf *sb;
7719         int i, rc, port_id, flags, mbps, gbps;
7720
7721         rc = sysctl_wire_old_buffer(req, 0);
7722         if (rc != 0)
7723                 return (rc);
7724
7725         sb = sbuf_new_for_sysctl(NULL, NULL, 4096, req);
7726         if (sb == NULL)
7727                 return (ENOMEM);
7728
7729         port_id = arg2 >> 16;
7730         MPASS(port_id < sc->params.nports);
7731         MPASS(sc->port[port_id] != NULL);
7732         i = arg2 & 0xffff;
7733         MPASS(i < sc->chip_params->nsched_cls);
7734         tc = &sc->port[port_id]->tc[i];
7735
7736         rc = begin_synchronized_op(sc, NULL, HOLD_LOCK | SLEEP_OK | INTR_OK,
7737             "t4tc_p");
7738         if (rc)
7739                 goto done;
7740         flags = tc->flags;
7741         p = tc->params;
7742         end_synchronized_op(sc, LOCK_HELD);
7743
7744         if ((flags & TX_SC_OK) == 0) {
7745                 sbuf_printf(sb, "none");
7746                 goto done;
7747         }
7748
7749         if (p.level == SCHED_CLASS_LEVEL_CL_WRR) {
7750                 sbuf_printf(sb, "cl-wrr weight %u", p.weight);
7751                 goto done;
7752         } else if (p.level == SCHED_CLASS_LEVEL_CL_RL)
7753                 sbuf_printf(sb, "cl-rl");
7754         else if (p.level == SCHED_CLASS_LEVEL_CH_RL)
7755                 sbuf_printf(sb, "ch-rl");
7756         else {
7757                 rc = ENXIO;
7758                 goto done;
7759         }
7760
7761         if (p.ratemode == SCHED_CLASS_RATEMODE_REL) {
7762                 /* XXX: top speed or actual link speed? */
7763                 gbps = port_top_speed(sc->port[port_id]);
7764                 sbuf_printf(sb, " %u%% of %uGbps", p.maxrate, gbps);
7765         }
7766         else if (p.ratemode == SCHED_CLASS_RATEMODE_ABS) {
7767                 switch (p.rateunit) {
7768                 case SCHED_CLASS_RATEUNIT_BITS:
7769                         mbps = p.maxrate / 1000;
7770                         gbps = p.maxrate / 1000000;
7771                         if (p.maxrate == gbps * 1000000)
7772                                 sbuf_printf(sb, " %uGbps", gbps);
7773                         else if (p.maxrate == mbps * 1000)
7774                                 sbuf_printf(sb, " %uMbps", mbps);
7775                         else
7776                                 sbuf_printf(sb, " %uKbps", p.maxrate);
7777                         break;
7778                 case SCHED_CLASS_RATEUNIT_PKTS:
7779                         sbuf_printf(sb, " %upps", p.maxrate);
7780                         break;
7781                 default:
7782                         rc = ENXIO;
7783                         goto done;
7784                 }
7785         }
7786
7787         switch (p.mode) {
7788         case SCHED_CLASS_MODE_CLASS:
7789                 sbuf_printf(sb, " aggregate");
7790                 break;
7791         case SCHED_CLASS_MODE_FLOW:
7792                 sbuf_printf(sb, " per-flow");
7793                 break;
7794         default:
7795                 rc = ENXIO;
7796                 goto done;
7797         }
7798
7799 done:
7800         if (rc == 0)
7801                 rc = sbuf_finish(sb);
7802         sbuf_delete(sb);
7803
7804         return (rc);
7805 }
7806 #endif
7807
7808 #ifdef TCP_OFFLOAD
7809 static void
7810 unit_conv(char *buf, size_t len, u_int val, u_int factor)
7811 {
7812         u_int rem = val % factor;
7813
7814         if (rem == 0)
7815                 snprintf(buf, len, "%u", val / factor);
7816         else {
7817                 while (rem % 10 == 0)
7818                         rem /= 10;
7819                 snprintf(buf, len, "%u.%u", val / factor, rem);
7820         }
7821 }
7822
7823 static int
7824 sysctl_tp_tick(SYSCTL_HANDLER_ARGS)
7825 {
7826         struct adapter *sc = arg1;
7827         char buf[16];
7828         u_int res, re;
7829         u_int cclk_ps = 1000000000 / sc->params.vpd.cclk;
7830
7831         res = t4_read_reg(sc, A_TP_TIMER_RESOLUTION);
7832         switch (arg2) {
7833         case 0:
7834                 /* timer_tick */
7835                 re = G_TIMERRESOLUTION(res);
7836                 break;
7837         case 1:
7838                 /* TCP timestamp tick */
7839                 re = G_TIMESTAMPRESOLUTION(res);
7840                 break;
7841         case 2:
7842                 /* DACK tick */
7843                 re = G_DELAYEDACKRESOLUTION(res);
7844                 break;
7845         default:
7846                 return (EDOOFUS);
7847         }
7848
7849         unit_conv(buf, sizeof(buf), (cclk_ps << re), 1000000);
7850
7851         return (sysctl_handle_string(oidp, buf, sizeof(buf), req));
7852 }
7853
7854 static int
7855 sysctl_tp_dack_timer(SYSCTL_HANDLER_ARGS)
7856 {
7857         struct adapter *sc = arg1;
7858         u_int res, dack_re, v;
7859         u_int cclk_ps = 1000000000 / sc->params.vpd.cclk;
7860
7861         res = t4_read_reg(sc, A_TP_TIMER_RESOLUTION);
7862         dack_re = G_DELAYEDACKRESOLUTION(res);
7863         v = ((cclk_ps << dack_re) / 1000000) * t4_read_reg(sc, A_TP_DACK_TIMER);
7864
7865         return (sysctl_handle_int(oidp, &v, 0, req));
7866 }
7867
7868 static int
7869 sysctl_tp_timer(SYSCTL_HANDLER_ARGS)
7870 {
7871         struct adapter *sc = arg1;
7872         int reg = arg2;
7873         u_int tre;
7874         u_long tp_tick_us, v;
7875         u_int cclk_ps = 1000000000 / sc->params.vpd.cclk;
7876
7877         MPASS(reg == A_TP_RXT_MIN || reg == A_TP_RXT_MAX ||
7878             reg == A_TP_PERS_MIN || reg == A_TP_PERS_MAX ||
7879             reg == A_TP_KEEP_IDLE || A_TP_KEEP_INTVL || reg == A_TP_INIT_SRTT ||
7880             reg == A_TP_FINWAIT2_TIMER);
7881
7882         tre = G_TIMERRESOLUTION(t4_read_reg(sc, A_TP_TIMER_RESOLUTION));
7883         tp_tick_us = (cclk_ps << tre) / 1000000;
7884
7885         if (reg == A_TP_INIT_SRTT)
7886                 v = tp_tick_us * G_INITSRTT(t4_read_reg(sc, reg));
7887         else
7888                 v = tp_tick_us * t4_read_reg(sc, reg);
7889
7890         return (sysctl_handle_long(oidp, &v, 0, req));
7891 }
7892 #endif
7893
7894 static uint32_t
7895 fconf_iconf_to_mode(uint32_t fconf, uint32_t iconf)
7896 {
7897         uint32_t mode;
7898
7899         mode = T4_FILTER_IPv4 | T4_FILTER_IPv6 | T4_FILTER_IP_SADDR |
7900             T4_FILTER_IP_DADDR | T4_FILTER_IP_SPORT | T4_FILTER_IP_DPORT;
7901
7902         if (fconf & F_FRAGMENTATION)
7903                 mode |= T4_FILTER_IP_FRAGMENT;
7904
7905         if (fconf & F_MPSHITTYPE)
7906                 mode |= T4_FILTER_MPS_HIT_TYPE;
7907
7908         if (fconf & F_MACMATCH)
7909                 mode |= T4_FILTER_MAC_IDX;
7910
7911         if (fconf & F_ETHERTYPE)
7912                 mode |= T4_FILTER_ETH_TYPE;
7913
7914         if (fconf & F_PROTOCOL)
7915                 mode |= T4_FILTER_IP_PROTO;
7916
7917         if (fconf & F_TOS)
7918                 mode |= T4_FILTER_IP_TOS;
7919
7920         if (fconf & F_VLAN)
7921                 mode |= T4_FILTER_VLAN;
7922
7923         if (fconf & F_VNIC_ID) {
7924                 mode |= T4_FILTER_VNIC;
7925                 if (iconf & F_VNIC)
7926                         mode |= T4_FILTER_IC_VNIC;
7927         }
7928
7929         if (fconf & F_PORT)
7930                 mode |= T4_FILTER_PORT;
7931
7932         if (fconf & F_FCOE)
7933                 mode |= T4_FILTER_FCoE;
7934
7935         return (mode);
7936 }
7937
7938 static uint32_t
7939 mode_to_fconf(uint32_t mode)
7940 {
7941         uint32_t fconf = 0;
7942
7943         if (mode & T4_FILTER_IP_FRAGMENT)
7944                 fconf |= F_FRAGMENTATION;
7945
7946         if (mode & T4_FILTER_MPS_HIT_TYPE)
7947                 fconf |= F_MPSHITTYPE;
7948
7949         if (mode & T4_FILTER_MAC_IDX)
7950                 fconf |= F_MACMATCH;
7951
7952         if (mode & T4_FILTER_ETH_TYPE)
7953                 fconf |= F_ETHERTYPE;
7954
7955         if (mode & T4_FILTER_IP_PROTO)
7956                 fconf |= F_PROTOCOL;
7957
7958         if (mode & T4_FILTER_IP_TOS)
7959                 fconf |= F_TOS;
7960
7961         if (mode & T4_FILTER_VLAN)
7962                 fconf |= F_VLAN;
7963
7964         if (mode & T4_FILTER_VNIC)
7965                 fconf |= F_VNIC_ID;
7966
7967         if (mode & T4_FILTER_PORT)
7968                 fconf |= F_PORT;
7969
7970         if (mode & T4_FILTER_FCoE)
7971                 fconf |= F_FCOE;
7972
7973         return (fconf);
7974 }
7975
7976 static uint32_t
7977 mode_to_iconf(uint32_t mode)
7978 {
7979
7980         if (mode & T4_FILTER_IC_VNIC)
7981                 return (F_VNIC);
7982         return (0);
7983 }
7984
7985 static int check_fspec_against_fconf_iconf(struct adapter *sc,
7986     struct t4_filter_specification *fs)
7987 {
7988         struct tp_params *tpp = &sc->params.tp;
7989         uint32_t fconf = 0;
7990
7991         if (fs->val.frag || fs->mask.frag)
7992                 fconf |= F_FRAGMENTATION;
7993
7994         if (fs->val.matchtype || fs->mask.matchtype)
7995                 fconf |= F_MPSHITTYPE;
7996
7997         if (fs->val.macidx || fs->mask.macidx)
7998                 fconf |= F_MACMATCH;
7999
8000         if (fs->val.ethtype || fs->mask.ethtype)
8001                 fconf |= F_ETHERTYPE;
8002
8003         if (fs->val.proto || fs->mask.proto)
8004                 fconf |= F_PROTOCOL;
8005
8006         if (fs->val.tos || fs->mask.tos)
8007                 fconf |= F_TOS;
8008
8009         if (fs->val.vlan_vld || fs->mask.vlan_vld)
8010                 fconf |= F_VLAN;
8011
8012         if (fs->val.ovlan_vld || fs->mask.ovlan_vld) {
8013                 fconf |= F_VNIC_ID;
8014                 if (tpp->ingress_config & F_VNIC)
8015                         return (EINVAL);
8016         }
8017
8018         if (fs->val.pfvf_vld || fs->mask.pfvf_vld) {
8019                 fconf |= F_VNIC_ID;
8020                 if ((tpp->ingress_config & F_VNIC) == 0)
8021                         return (EINVAL);
8022         }
8023
8024         if (fs->val.iport || fs->mask.iport)
8025                 fconf |= F_PORT;
8026
8027         if (fs->val.fcoe || fs->mask.fcoe)
8028                 fconf |= F_FCOE;
8029
8030         if ((tpp->vlan_pri_map | fconf) != tpp->vlan_pri_map)
8031                 return (E2BIG);
8032
8033         return (0);
8034 }
8035
8036 static int
8037 get_filter_mode(struct adapter *sc, uint32_t *mode)
8038 {
8039         struct tp_params *tpp = &sc->params.tp;
8040
8041         /*
8042          * We trust the cached values of the relevant TP registers.  This means
8043          * things work reliably only if writes to those registers are always via
8044          * t4_set_filter_mode.
8045          */
8046         *mode = fconf_iconf_to_mode(tpp->vlan_pri_map, tpp->ingress_config);
8047
8048         return (0);
8049 }
8050
8051 static int
8052 set_filter_mode(struct adapter *sc, uint32_t mode)
8053 {
8054         struct tp_params *tpp = &sc->params.tp;
8055         uint32_t fconf, iconf;
8056         int rc;
8057
8058         iconf = mode_to_iconf(mode);
8059         if ((iconf ^ tpp->ingress_config) & F_VNIC) {
8060                 /*
8061                  * For now we just complain if A_TP_INGRESS_CONFIG is not
8062                  * already set to the correct value for the requested filter
8063                  * mode.  It's not clear if it's safe to write to this register
8064                  * on the fly.  (And we trust the cached value of the register).
8065                  */
8066                 return (EBUSY);
8067         }
8068
8069         fconf = mode_to_fconf(mode);
8070
8071         rc = begin_synchronized_op(sc, NULL, HOLD_LOCK | SLEEP_OK | INTR_OK,
8072             "t4setfm");
8073         if (rc)
8074                 return (rc);
8075
8076         if (sc->tids.ftids_in_use > 0) {
8077                 rc = EBUSY;
8078                 goto done;
8079         }
8080
8081 #ifdef TCP_OFFLOAD
8082         if (uld_active(sc, ULD_TOM)) {
8083                 rc = EBUSY;
8084                 goto done;
8085         }
8086 #endif
8087
8088         rc = -t4_set_filter_mode(sc, fconf);
8089 done:
8090         end_synchronized_op(sc, LOCK_HELD);
8091         return (rc);
8092 }
8093
8094 static inline uint64_t
8095 get_filter_hits(struct adapter *sc, uint32_t fid)
8096 {
8097         uint32_t tcb_addr;
8098
8099         tcb_addr = t4_read_reg(sc, A_TP_CMM_TCB_BASE) +
8100             (fid + sc->tids.ftid_base) * TCB_SIZE;
8101
8102         if (is_t4(sc)) {
8103                 uint64_t hits;
8104
8105                 read_via_memwin(sc, 0, tcb_addr + 16, (uint32_t *)&hits, 8);
8106                 return (be64toh(hits));
8107         } else {
8108                 uint32_t hits;
8109
8110                 read_via_memwin(sc, 0, tcb_addr + 24, &hits, 4);
8111                 return (be32toh(hits));
8112         }
8113 }
8114
8115 static int
8116 get_filter(struct adapter *sc, struct t4_filter *t)
8117 {
8118         int i, rc, nfilters = sc->tids.nftids;
8119         struct filter_entry *f;
8120
8121         rc = begin_synchronized_op(sc, NULL, HOLD_LOCK | SLEEP_OK | INTR_OK,
8122             "t4getf");
8123         if (rc)
8124                 return (rc);
8125
8126         if (sc->tids.ftids_in_use == 0 || sc->tids.ftid_tab == NULL ||
8127             t->idx >= nfilters) {
8128                 t->idx = 0xffffffff;
8129                 goto done;
8130         }
8131
8132         f = &sc->tids.ftid_tab[t->idx];
8133         for (i = t->idx; i < nfilters; i++, f++) {
8134                 if (f->valid) {
8135                         t->idx = i;
8136                         t->l2tidx = f->l2t ? f->l2t->idx : 0;
8137                         t->smtidx = f->smtidx;
8138                         if (f->fs.hitcnts)
8139                                 t->hits = get_filter_hits(sc, t->idx);
8140                         else
8141                                 t->hits = UINT64_MAX;
8142                         t->fs = f->fs;
8143
8144                         goto done;
8145                 }
8146         }
8147
8148         t->idx = 0xffffffff;
8149 done:
8150         end_synchronized_op(sc, LOCK_HELD);
8151         return (0);
8152 }
8153
8154 static int
8155 set_filter(struct adapter *sc, struct t4_filter *t)
8156 {
8157         unsigned int nfilters, nports;
8158         struct filter_entry *f;
8159         int i, rc;
8160
8161         rc = begin_synchronized_op(sc, NULL, SLEEP_OK | INTR_OK, "t4setf");
8162         if (rc)
8163                 return (rc);
8164
8165         nfilters = sc->tids.nftids;
8166         nports = sc->params.nports;
8167
8168         if (nfilters == 0) {
8169                 rc = ENOTSUP;
8170                 goto done;
8171         }
8172
8173         if (t->idx >= nfilters) {
8174                 rc = EINVAL;
8175                 goto done;
8176         }
8177
8178         /* Validate against the global filter mode and ingress config */
8179         rc = check_fspec_against_fconf_iconf(sc, &t->fs);
8180         if (rc != 0)
8181                 goto done;
8182
8183         if (t->fs.action == FILTER_SWITCH && t->fs.eport >= nports) {
8184                 rc = EINVAL;
8185                 goto done;
8186         }
8187
8188         if (t->fs.val.iport >= nports) {
8189                 rc = EINVAL;
8190                 goto done;
8191         }
8192
8193         /* Can't specify an iq if not steering to it */
8194         if (!t->fs.dirsteer && t->fs.iq) {
8195                 rc = EINVAL;
8196                 goto done;
8197         }
8198
8199         /* IPv6 filter idx must be 4 aligned */
8200         if (t->fs.type == 1 &&
8201             ((t->idx & 0x3) || t->idx + 4 >= nfilters)) {
8202                 rc = EINVAL;
8203                 goto done;
8204         }
8205
8206         if (!(sc->flags & FULL_INIT_DONE) &&
8207             ((rc = adapter_full_init(sc)) != 0))
8208                 goto done;
8209
8210         if (sc->tids.ftid_tab == NULL) {
8211                 KASSERT(sc->tids.ftids_in_use == 0,
8212                     ("%s: no memory allocated but filters_in_use > 0",
8213                     __func__));
8214
8215                 sc->tids.ftid_tab = malloc(sizeof (struct filter_entry) *
8216                     nfilters, M_CXGBE, M_NOWAIT | M_ZERO);
8217                 if (sc->tids.ftid_tab == NULL) {
8218                         rc = ENOMEM;
8219                         goto done;
8220                 }
8221                 mtx_init(&sc->tids.ftid_lock, "T4 filters", 0, MTX_DEF);
8222         }
8223
8224         for (i = 0; i < 4; i++) {
8225                 f = &sc->tids.ftid_tab[t->idx + i];
8226
8227                 if (f->pending || f->valid) {
8228                         rc = EBUSY;
8229                         goto done;
8230                 }
8231                 if (f->locked) {
8232                         rc = EPERM;
8233                         goto done;
8234                 }
8235
8236                 if (t->fs.type == 0)
8237                         break;
8238         }
8239
8240         f = &sc->tids.ftid_tab[t->idx];
8241         f->fs = t->fs;
8242
8243         rc = set_filter_wr(sc, t->idx);
8244 done:
8245         end_synchronized_op(sc, 0);
8246
8247         if (rc == 0) {
8248                 mtx_lock(&sc->tids.ftid_lock);
8249                 for (;;) {
8250                         if (f->pending == 0) {
8251                                 rc = f->valid ? 0 : EIO;
8252                                 break;
8253                         }
8254
8255                         if (mtx_sleep(&sc->tids.ftid_tab, &sc->tids.ftid_lock,
8256                             PCATCH, "t4setfw", 0)) {
8257                                 rc = EINPROGRESS;
8258                                 break;
8259                         }
8260                 }
8261                 mtx_unlock(&sc->tids.ftid_lock);
8262         }
8263         return (rc);
8264 }
8265
8266 static int
8267 del_filter(struct adapter *sc, struct t4_filter *t)
8268 {
8269         unsigned int nfilters;
8270         struct filter_entry *f;
8271         int rc;
8272
8273         rc = begin_synchronized_op(sc, NULL, SLEEP_OK | INTR_OK, "t4delf");
8274         if (rc)
8275                 return (rc);
8276
8277         nfilters = sc->tids.nftids;
8278
8279         if (nfilters == 0) {
8280                 rc = ENOTSUP;
8281                 goto done;
8282         }
8283
8284         if (sc->tids.ftid_tab == NULL || sc->tids.ftids_in_use == 0 ||
8285             t->idx >= nfilters) {
8286                 rc = EINVAL;
8287                 goto done;
8288         }
8289
8290         if (!(sc->flags & FULL_INIT_DONE)) {
8291                 rc = EAGAIN;
8292                 goto done;
8293         }
8294
8295         f = &sc->tids.ftid_tab[t->idx];
8296
8297         if (f->pending) {
8298                 rc = EBUSY;
8299                 goto done;
8300         }
8301         if (f->locked) {
8302                 rc = EPERM;
8303                 goto done;
8304         }
8305
8306         if (f->valid) {
8307                 t->fs = f->fs;  /* extra info for the caller */
8308                 rc = del_filter_wr(sc, t->idx);
8309         }
8310
8311 done:
8312         end_synchronized_op(sc, 0);
8313
8314         if (rc == 0) {
8315                 mtx_lock(&sc->tids.ftid_lock);
8316                 for (;;) {
8317                         if (f->pending == 0) {
8318                                 rc = f->valid ? EIO : 0;
8319                                 break;
8320                         }
8321
8322                         if (mtx_sleep(&sc->tids.ftid_tab, &sc->tids.ftid_lock,
8323                             PCATCH, "t4delfw", 0)) {
8324                                 rc = EINPROGRESS;
8325                                 break;
8326                         }
8327                 }
8328                 mtx_unlock(&sc->tids.ftid_lock);
8329         }
8330
8331         return (rc);
8332 }
8333
8334 static void
8335 clear_filter(struct filter_entry *f)
8336 {
8337         if (f->l2t)
8338                 t4_l2t_release(f->l2t);
8339
8340         bzero(f, sizeof (*f));
8341 }
8342
8343 static int
8344 set_filter_wr(struct adapter *sc, int fidx)
8345 {
8346         struct filter_entry *f = &sc->tids.ftid_tab[fidx];
8347         struct fw_filter_wr *fwr;
8348         unsigned int ftid, vnic_vld, vnic_vld_mask;
8349         struct wrq_cookie cookie;
8350
8351         ASSERT_SYNCHRONIZED_OP(sc);
8352
8353         if (f->fs.newdmac || f->fs.newvlan) {
8354                 /* This filter needs an L2T entry; allocate one. */
8355                 f->l2t = t4_l2t_alloc_switching(sc->l2t);
8356                 if (f->l2t == NULL)
8357                         return (EAGAIN);
8358                 if (t4_l2t_set_switching(sc, f->l2t, f->fs.vlan, f->fs.eport,
8359                     f->fs.dmac)) {
8360                         t4_l2t_release(f->l2t);
8361                         f->l2t = NULL;
8362                         return (ENOMEM);
8363                 }
8364         }
8365
8366         /* Already validated against fconf, iconf */
8367         MPASS((f->fs.val.pfvf_vld & f->fs.val.ovlan_vld) == 0);
8368         MPASS((f->fs.mask.pfvf_vld & f->fs.mask.ovlan_vld) == 0);
8369         if (f->fs.val.pfvf_vld || f->fs.val.ovlan_vld)
8370                 vnic_vld = 1;
8371         else
8372                 vnic_vld = 0;
8373         if (f->fs.mask.pfvf_vld || f->fs.mask.ovlan_vld)
8374                 vnic_vld_mask = 1;
8375         else
8376                 vnic_vld_mask = 0;
8377
8378         ftid = sc->tids.ftid_base + fidx;
8379
8380         fwr = start_wrq_wr(&sc->sge.mgmtq, howmany(sizeof(*fwr), 16), &cookie);
8381         if (fwr == NULL)
8382                 return (ENOMEM);
8383         bzero(fwr, sizeof(*fwr));
8384
8385         fwr->op_pkd = htobe32(V_FW_WR_OP(FW_FILTER_WR));
8386         fwr->len16_pkd = htobe32(FW_LEN16(*fwr));
8387         fwr->tid_to_iq =
8388             htobe32(V_FW_FILTER_WR_TID(ftid) |
8389                 V_FW_FILTER_WR_RQTYPE(f->fs.type) |
8390                 V_FW_FILTER_WR_NOREPLY(0) |
8391                 V_FW_FILTER_WR_IQ(f->fs.iq));
8392         fwr->del_filter_to_l2tix =
8393             htobe32(V_FW_FILTER_WR_RPTTID(f->fs.rpttid) |
8394                 V_FW_FILTER_WR_DROP(f->fs.action == FILTER_DROP) |
8395                 V_FW_FILTER_WR_DIRSTEER(f->fs.dirsteer) |
8396                 V_FW_FILTER_WR_MASKHASH(f->fs.maskhash) |
8397                 V_FW_FILTER_WR_DIRSTEERHASH(f->fs.dirsteerhash) |
8398                 V_FW_FILTER_WR_LPBK(f->fs.action == FILTER_SWITCH) |
8399                 V_FW_FILTER_WR_DMAC(f->fs.newdmac) |
8400                 V_FW_FILTER_WR_SMAC(f->fs.newsmac) |
8401                 V_FW_FILTER_WR_INSVLAN(f->fs.newvlan == VLAN_INSERT ||
8402                     f->fs.newvlan == VLAN_REWRITE) |
8403                 V_FW_FILTER_WR_RMVLAN(f->fs.newvlan == VLAN_REMOVE ||
8404                     f->fs.newvlan == VLAN_REWRITE) |
8405                 V_FW_FILTER_WR_HITCNTS(f->fs.hitcnts) |
8406                 V_FW_FILTER_WR_TXCHAN(f->fs.eport) |
8407                 V_FW_FILTER_WR_PRIO(f->fs.prio) |
8408                 V_FW_FILTER_WR_L2TIX(f->l2t ? f->l2t->idx : 0));
8409         fwr->ethtype = htobe16(f->fs.val.ethtype);
8410         fwr->ethtypem = htobe16(f->fs.mask.ethtype);
8411         fwr->frag_to_ovlan_vldm =
8412             (V_FW_FILTER_WR_FRAG(f->fs.val.frag) |
8413                 V_FW_FILTER_WR_FRAGM(f->fs.mask.frag) |
8414                 V_FW_FILTER_WR_IVLAN_VLD(f->fs.val.vlan_vld) |
8415                 V_FW_FILTER_WR_OVLAN_VLD(vnic_vld) |
8416                 V_FW_FILTER_WR_IVLAN_VLDM(f->fs.mask.vlan_vld) |
8417                 V_FW_FILTER_WR_OVLAN_VLDM(vnic_vld_mask));
8418         fwr->smac_sel = 0;
8419         fwr->rx_chan_rx_rpl_iq = htobe16(V_FW_FILTER_WR_RX_CHAN(0) |
8420             V_FW_FILTER_WR_RX_RPL_IQ(sc->sge.fwq.abs_id));
8421         fwr->maci_to_matchtypem =
8422             htobe32(V_FW_FILTER_WR_MACI(f->fs.val.macidx) |
8423                 V_FW_FILTER_WR_MACIM(f->fs.mask.macidx) |
8424                 V_FW_FILTER_WR_FCOE(f->fs.val.fcoe) |
8425                 V_FW_FILTER_WR_FCOEM(f->fs.mask.fcoe) |
8426                 V_FW_FILTER_WR_PORT(f->fs.val.iport) |
8427                 V_FW_FILTER_WR_PORTM(f->fs.mask.iport) |
8428                 V_FW_FILTER_WR_MATCHTYPE(f->fs.val.matchtype) |
8429                 V_FW_FILTER_WR_MATCHTYPEM(f->fs.mask.matchtype));
8430         fwr->ptcl = f->fs.val.proto;
8431         fwr->ptclm = f->fs.mask.proto;
8432         fwr->ttyp = f->fs.val.tos;
8433         fwr->ttypm = f->fs.mask.tos;
8434         fwr->ivlan = htobe16(f->fs.val.vlan);
8435         fwr->ivlanm = htobe16(f->fs.mask.vlan);
8436         fwr->ovlan = htobe16(f->fs.val.vnic);
8437         fwr->ovlanm = htobe16(f->fs.mask.vnic);
8438         bcopy(f->fs.val.dip, fwr->lip, sizeof (fwr->lip));
8439         bcopy(f->fs.mask.dip, fwr->lipm, sizeof (fwr->lipm));
8440         bcopy(f->fs.val.sip, fwr->fip, sizeof (fwr->fip));
8441         bcopy(f->fs.mask.sip, fwr->fipm, sizeof (fwr->fipm));
8442         fwr->lp = htobe16(f->fs.val.dport);
8443         fwr->lpm = htobe16(f->fs.mask.dport);
8444         fwr->fp = htobe16(f->fs.val.sport);
8445         fwr->fpm = htobe16(f->fs.mask.sport);
8446         if (f->fs.newsmac)
8447                 bcopy(f->fs.smac, fwr->sma, sizeof (fwr->sma));
8448
8449         f->pending = 1;
8450         sc->tids.ftids_in_use++;
8451
8452         commit_wrq_wr(&sc->sge.mgmtq, fwr, &cookie);
8453         return (0);
8454 }
8455
8456 static int
8457 del_filter_wr(struct adapter *sc, int fidx)
8458 {
8459         struct filter_entry *f = &sc->tids.ftid_tab[fidx];
8460         struct fw_filter_wr *fwr;
8461         unsigned int ftid;
8462         struct wrq_cookie cookie;
8463
8464         ftid = sc->tids.ftid_base + fidx;
8465
8466         fwr = start_wrq_wr(&sc->sge.mgmtq, howmany(sizeof(*fwr), 16), &cookie);
8467         if (fwr == NULL)
8468                 return (ENOMEM);
8469         bzero(fwr, sizeof (*fwr));
8470
8471         t4_mk_filtdelwr(ftid, fwr, sc->sge.fwq.abs_id);
8472
8473         f->pending = 1;
8474         commit_wrq_wr(&sc->sge.mgmtq, fwr, &cookie);
8475         return (0);
8476 }
8477
8478 int
8479 t4_filter_rpl(struct sge_iq *iq, const struct rss_header *rss, struct mbuf *m)
8480 {
8481         struct adapter *sc = iq->adapter;
8482         const struct cpl_set_tcb_rpl *rpl = (const void *)(rss + 1);
8483         unsigned int idx = GET_TID(rpl);
8484         unsigned int rc;
8485         struct filter_entry *f;
8486
8487         KASSERT(m == NULL, ("%s: payload with opcode %02x", __func__,
8488             rss->opcode));
8489         MPASS(iq == &sc->sge.fwq);
8490         MPASS(is_ftid(sc, idx));
8491
8492         idx -= sc->tids.ftid_base;
8493         f = &sc->tids.ftid_tab[idx];
8494         rc = G_COOKIE(rpl->cookie);
8495
8496         mtx_lock(&sc->tids.ftid_lock);
8497         if (rc == FW_FILTER_WR_FLT_ADDED) {
8498                 KASSERT(f->pending, ("%s: filter[%u] isn't pending.",
8499                     __func__, idx));
8500                 f->smtidx = (be64toh(rpl->oldval) >> 24) & 0xff;
8501                 f->pending = 0;  /* asynchronous setup completed */
8502                 f->valid = 1;
8503         } else {
8504                 if (rc != FW_FILTER_WR_FLT_DELETED) {
8505                         /* Add or delete failed, display an error */
8506                         log(LOG_ERR,
8507                             "filter %u setup failed with error %u\n",
8508                             idx, rc);
8509                 }
8510
8511                 clear_filter(f);
8512                 sc->tids.ftids_in_use--;
8513         }
8514         wakeup(&sc->tids.ftid_tab);
8515         mtx_unlock(&sc->tids.ftid_lock);
8516
8517         return (0);
8518 }
8519
8520 static int
8521 set_tcb_rpl(struct sge_iq *iq, const struct rss_header *rss, struct mbuf *m)
8522 {
8523
8524         MPASS(iq->set_tcb_rpl != NULL);
8525         return (iq->set_tcb_rpl(iq, rss, m));
8526 }
8527
8528 static int
8529 l2t_write_rpl(struct sge_iq *iq, const struct rss_header *rss, struct mbuf *m)
8530 {
8531
8532         MPASS(iq->l2t_write_rpl != NULL);
8533         return (iq->l2t_write_rpl(iq, rss, m));
8534 }
8535
8536 static int
8537 get_sge_context(struct adapter *sc, struct t4_sge_context *cntxt)
8538 {
8539         int rc;
8540
8541         if (cntxt->cid > M_CTXTQID)
8542                 return (EINVAL);
8543
8544         if (cntxt->mem_id != CTXT_EGRESS && cntxt->mem_id != CTXT_INGRESS &&
8545             cntxt->mem_id != CTXT_FLM && cntxt->mem_id != CTXT_CNM)
8546                 return (EINVAL);
8547
8548         rc = begin_synchronized_op(sc, NULL, SLEEP_OK | INTR_OK, "t4ctxt");
8549         if (rc)
8550                 return (rc);
8551
8552         if (sc->flags & FW_OK) {
8553                 rc = -t4_sge_ctxt_rd(sc, sc->mbox, cntxt->cid, cntxt->mem_id,
8554                     &cntxt->data[0]);
8555                 if (rc == 0)
8556                         goto done;
8557         }
8558
8559         /*
8560          * Read via firmware failed or wasn't even attempted.  Read directly via
8561          * the backdoor.
8562          */
8563         rc = -t4_sge_ctxt_rd_bd(sc, cntxt->cid, cntxt->mem_id, &cntxt->data[0]);
8564 done:
8565         end_synchronized_op(sc, 0);
8566         return (rc);
8567 }
8568
8569 static int
8570 load_fw(struct adapter *sc, struct t4_data *fw)
8571 {
8572         int rc;
8573         uint8_t *fw_data;
8574
8575         rc = begin_synchronized_op(sc, NULL, SLEEP_OK | INTR_OK, "t4ldfw");
8576         if (rc)
8577                 return (rc);
8578
8579         if (sc->flags & FULL_INIT_DONE) {
8580                 rc = EBUSY;
8581                 goto done;
8582         }
8583
8584         fw_data = malloc(fw->len, M_CXGBE, M_WAITOK);
8585         if (fw_data == NULL) {
8586                 rc = ENOMEM;
8587                 goto done;
8588         }
8589
8590         rc = copyin(fw->data, fw_data, fw->len);
8591         if (rc == 0)
8592                 rc = -t4_load_fw(sc, fw_data, fw->len);
8593
8594         free(fw_data, M_CXGBE);
8595 done:
8596         end_synchronized_op(sc, 0);
8597         return (rc);
8598 }
8599
8600 static int
8601 load_cfg(struct adapter *sc, struct t4_data *cfg)
8602 {
8603         int rc;
8604         uint8_t *cfg_data = NULL;
8605
8606         rc = begin_synchronized_op(sc, NULL, SLEEP_OK | INTR_OK, "t4ldcf");
8607         if (rc)
8608                 return (rc);
8609
8610         if (cfg->len == 0) {
8611                 /* clear */
8612                 rc = -t4_load_cfg(sc, NULL, 0);
8613                 goto done;
8614         }
8615
8616         cfg_data = malloc(cfg->len, M_CXGBE, M_WAITOK);
8617         if (cfg_data == NULL) {
8618                 rc = ENOMEM;
8619                 goto done;
8620         }
8621
8622         rc = copyin(cfg->data, cfg_data, cfg->len);
8623         if (rc == 0)
8624                 rc = -t4_load_cfg(sc, cfg_data, cfg->len);
8625
8626         free(cfg_data, M_CXGBE);
8627 done:
8628         end_synchronized_op(sc, 0);
8629         return (rc);
8630 }
8631
8632 #define MAX_READ_BUF_SIZE (128 * 1024)
8633 static int
8634 read_card_mem(struct adapter *sc, int win, struct t4_mem_range *mr)
8635 {
8636         uint32_t addr, remaining, n;
8637         uint32_t *buf;
8638         int rc;
8639         uint8_t *dst;
8640
8641         rc = validate_mem_range(sc, mr->addr, mr->len);
8642         if (rc != 0)
8643                 return (rc);
8644
8645         buf = malloc(min(mr->len, MAX_READ_BUF_SIZE), M_CXGBE, M_WAITOK);
8646         addr = mr->addr;
8647         remaining = mr->len;
8648         dst = (void *)mr->data;
8649
8650         while (remaining) {
8651                 n = min(remaining, MAX_READ_BUF_SIZE);
8652                 read_via_memwin(sc, 2, addr, buf, n);
8653
8654                 rc = copyout(buf, dst, n);
8655                 if (rc != 0)
8656                         break;
8657
8658                 dst += n;
8659                 remaining -= n;
8660                 addr += n;
8661         }
8662
8663         free(buf, M_CXGBE);
8664         return (rc);
8665 }
8666 #undef MAX_READ_BUF_SIZE
8667
8668 static int
8669 read_i2c(struct adapter *sc, struct t4_i2c_data *i2cd)
8670 {
8671         int rc;
8672
8673         if (i2cd->len == 0 || i2cd->port_id >= sc->params.nports)
8674                 return (EINVAL);
8675
8676         if (i2cd->len > sizeof(i2cd->data))
8677                 return (EFBIG);
8678
8679         rc = begin_synchronized_op(sc, NULL, SLEEP_OK | INTR_OK, "t4i2crd");
8680         if (rc)
8681                 return (rc);
8682         rc = -t4_i2c_rd(sc, sc->mbox, i2cd->port_id, i2cd->dev_addr,
8683             i2cd->offset, i2cd->len, &i2cd->data[0]);
8684         end_synchronized_op(sc, 0);
8685
8686         return (rc);
8687 }
8688
8689 static int
8690 in_range(int val, int lo, int hi)
8691 {
8692
8693         return (val < 0 || (val <= hi && val >= lo));
8694 }
8695
8696 static int
8697 set_sched_class_config(struct adapter *sc, int minmax)
8698 {
8699         int rc;
8700
8701         if (minmax < 0)
8702                 return (EINVAL);
8703
8704         rc = begin_synchronized_op(sc, NULL, SLEEP_OK | INTR_OK, "t4sscc");
8705         if (rc)
8706                 return (rc);
8707         rc = -t4_sched_config(sc, FW_SCHED_TYPE_PKTSCHED, minmax, 1);
8708         end_synchronized_op(sc, 0);
8709
8710         return (rc);
8711 }
8712
8713 static int
8714 set_sched_class_params(struct adapter *sc, struct t4_sched_class_params *p,
8715     int sleep_ok)
8716 {
8717         int rc, top_speed, fw_level, fw_mode, fw_rateunit, fw_ratemode;
8718         struct port_info *pi;
8719         struct tx_sched_class *tc;
8720
8721         if (p->level == SCHED_CLASS_LEVEL_CL_RL)
8722                 fw_level = FW_SCHED_PARAMS_LEVEL_CL_RL;
8723         else if (p->level == SCHED_CLASS_LEVEL_CL_WRR)
8724                 fw_level = FW_SCHED_PARAMS_LEVEL_CL_WRR;
8725         else if (p->level == SCHED_CLASS_LEVEL_CH_RL)
8726                 fw_level = FW_SCHED_PARAMS_LEVEL_CH_RL;
8727         else
8728                 return (EINVAL);
8729
8730         if (p->mode == SCHED_CLASS_MODE_CLASS)
8731                 fw_mode = FW_SCHED_PARAMS_MODE_CLASS;
8732         else if (p->mode == SCHED_CLASS_MODE_FLOW)
8733                 fw_mode = FW_SCHED_PARAMS_MODE_FLOW;
8734         else
8735                 return (EINVAL);
8736
8737         if (p->rateunit == SCHED_CLASS_RATEUNIT_BITS)
8738                 fw_rateunit = FW_SCHED_PARAMS_UNIT_BITRATE;
8739         else if (p->rateunit == SCHED_CLASS_RATEUNIT_PKTS)
8740                 fw_rateunit = FW_SCHED_PARAMS_UNIT_PKTRATE;
8741         else
8742                 return (EINVAL);
8743
8744         if (p->ratemode == SCHED_CLASS_RATEMODE_REL)
8745                 fw_ratemode = FW_SCHED_PARAMS_RATE_REL;
8746         else if (p->ratemode == SCHED_CLASS_RATEMODE_ABS)
8747                 fw_ratemode = FW_SCHED_PARAMS_RATE_ABS;
8748         else
8749                 return (EINVAL);
8750
8751         /* Vet our parameters ... */
8752         if (!in_range(p->channel, 0, sc->chip_params->nchan - 1))
8753                 return (ERANGE);
8754
8755         pi = sc->port[sc->chan_map[p->channel]];
8756         if (pi == NULL)
8757                 return (ENXIO);
8758         MPASS(pi->tx_chan == p->channel);
8759         top_speed = port_top_speed(pi) * 1000000; /* Gbps -> Kbps */
8760
8761         if (!in_range(p->cl, 0, sc->chip_params->nsched_cls) ||
8762             !in_range(p->minrate, 0, top_speed) ||
8763             !in_range(p->maxrate, 0, top_speed) ||
8764             !in_range(p->weight, 0, 100))
8765                 return (ERANGE);
8766
8767         /*
8768          * Translate any unset parameters into the firmware's
8769          * nomenclature and/or fail the call if the parameters
8770          * are required ...
8771          */
8772         if (p->rateunit < 0 || p->ratemode < 0 || p->channel < 0 || p->cl < 0)
8773                 return (EINVAL);
8774
8775         if (p->minrate < 0)
8776                 p->minrate = 0;
8777         if (p->maxrate < 0) {
8778                 if (p->level == SCHED_CLASS_LEVEL_CL_RL ||
8779                     p->level == SCHED_CLASS_LEVEL_CH_RL)
8780                         return (EINVAL);
8781                 else
8782                         p->maxrate = 0;
8783         }
8784         if (p->weight < 0) {
8785                 if (p->level == SCHED_CLASS_LEVEL_CL_WRR)
8786                         return (EINVAL);
8787                 else
8788                         p->weight = 0;
8789         }
8790         if (p->pktsize < 0) {
8791                 if (p->level == SCHED_CLASS_LEVEL_CL_RL ||
8792                     p->level == SCHED_CLASS_LEVEL_CH_RL)
8793                         return (EINVAL);
8794                 else
8795                         p->pktsize = 0;
8796         }
8797
8798         rc = begin_synchronized_op(sc, NULL,
8799             sleep_ok ? (SLEEP_OK | INTR_OK) : HOLD_LOCK, "t4sscp");
8800         if (rc)
8801                 return (rc);
8802         tc = &pi->tc[p->cl];
8803         tc->params = *p;
8804         rc = -t4_sched_params(sc, FW_SCHED_TYPE_PKTSCHED, fw_level, fw_mode,
8805             fw_rateunit, fw_ratemode, p->channel, p->cl, p->minrate, p->maxrate,
8806             p->weight, p->pktsize, sleep_ok);
8807         if (rc == 0)
8808                 tc->flags |= TX_SC_OK;
8809         else {
8810                 /*
8811                  * Unknown state at this point, see tc->params for what was
8812                  * attempted.
8813                  */
8814                 tc->flags &= ~TX_SC_OK;
8815         }
8816         end_synchronized_op(sc, sleep_ok ? 0 : LOCK_HELD);
8817
8818         return (rc);
8819 }
8820
8821 int
8822 t4_set_sched_class(struct adapter *sc, struct t4_sched_params *p)
8823 {
8824
8825         if (p->type != SCHED_CLASS_TYPE_PACKET)
8826                 return (EINVAL);
8827
8828         if (p->subcmd == SCHED_CLASS_SUBCMD_CONFIG)
8829                 return (set_sched_class_config(sc, p->u.config.minmax));
8830
8831         if (p->subcmd == SCHED_CLASS_SUBCMD_PARAMS)
8832                 return (set_sched_class_params(sc, &p->u.params, 1));
8833
8834         return (EINVAL);
8835 }
8836
8837 int
8838 t4_set_sched_queue(struct adapter *sc, struct t4_sched_queue *p)
8839 {
8840         struct port_info *pi = NULL;
8841         struct vi_info *vi;
8842         struct sge_txq *txq;
8843         uint32_t fw_mnem, fw_queue, fw_class;
8844         int i, rc;
8845
8846         rc = begin_synchronized_op(sc, NULL, SLEEP_OK | INTR_OK, "t4setsq");
8847         if (rc)
8848                 return (rc);
8849
8850         if (p->port >= sc->params.nports) {
8851                 rc = EINVAL;
8852                 goto done;
8853         }
8854
8855         /* XXX: Only supported for the main VI. */
8856         pi = sc->port[p->port];
8857         vi = &pi->vi[0];
8858         if (!(vi->flags & VI_INIT_DONE)) {
8859                 /* tx queues not set up yet */
8860                 rc = EAGAIN;
8861                 goto done;
8862         }
8863
8864         if (!in_range(p->queue, 0, vi->ntxq - 1) ||
8865             !in_range(p->cl, 0, sc->chip_params->nsched_cls - 1)) {
8866                 rc = EINVAL;
8867                 goto done;
8868         }
8869
8870         /*
8871          * Create a template for the FW_PARAMS_CMD mnemonic and value (TX
8872          * Scheduling Class in this case).
8873          */
8874         fw_mnem = (V_FW_PARAMS_MNEM(FW_PARAMS_MNEM_DMAQ) |
8875             V_FW_PARAMS_PARAM_X(FW_PARAMS_PARAM_DMAQ_EQ_SCHEDCLASS_ETH));
8876         fw_class = p->cl < 0 ? 0xffffffff : p->cl;
8877
8878         /*
8879          * If op.queue is non-negative, then we're only changing the scheduling
8880          * on a single specified TX queue.
8881          */
8882         if (p->queue >= 0) {
8883                 txq = &sc->sge.txq[vi->first_txq + p->queue];
8884                 fw_queue = (fw_mnem | V_FW_PARAMS_PARAM_YZ(txq->eq.cntxt_id));
8885                 rc = -t4_set_params(sc, sc->mbox, sc->pf, 0, 1, &fw_queue,
8886                     &fw_class);
8887                 goto done;
8888         }
8889
8890         /*
8891          * Change the scheduling on all the TX queues for the
8892          * interface.
8893          */
8894         for_each_txq(vi, i, txq) {
8895                 fw_queue = (fw_mnem | V_FW_PARAMS_PARAM_YZ(txq->eq.cntxt_id));
8896                 rc = -t4_set_params(sc, sc->mbox, sc->pf, 0, 1, &fw_queue,
8897                     &fw_class);
8898                 if (rc)
8899                         goto done;
8900         }
8901
8902         rc = 0;
8903 done:
8904         end_synchronized_op(sc, 0);
8905         return (rc);
8906 }
8907
8908 int
8909 t4_os_find_pci_capability(struct adapter *sc, int cap)
8910 {
8911         int i;
8912
8913         return (pci_find_cap(sc->dev, cap, &i) == 0 ? i : 0);
8914 }
8915
8916 int
8917 t4_os_pci_save_state(struct adapter *sc)
8918 {
8919         device_t dev;
8920         struct pci_devinfo *dinfo;
8921
8922         dev = sc->dev;
8923         dinfo = device_get_ivars(dev);
8924
8925         pci_cfg_save(dev, dinfo, 0);
8926         return (0);
8927 }
8928
8929 int
8930 t4_os_pci_restore_state(struct adapter *sc)
8931 {
8932         device_t dev;
8933         struct pci_devinfo *dinfo;
8934
8935         dev = sc->dev;
8936         dinfo = device_get_ivars(dev);
8937
8938         pci_cfg_restore(dev, dinfo);
8939         return (0);
8940 }
8941
8942 void
8943 t4_os_portmod_changed(const struct adapter *sc, int idx)
8944 {
8945         struct port_info *pi = sc->port[idx];
8946         struct vi_info *vi;
8947         struct ifnet *ifp;
8948         int v;
8949         static const char *mod_str[] = {
8950                 NULL, "LR", "SR", "ER", "TWINAX", "active TWINAX", "LRM"
8951         };
8952
8953         for_each_vi(pi, v, vi) {
8954                 build_medialist(pi, &vi->media);
8955         }
8956
8957         ifp = pi->vi[0].ifp;
8958         if (pi->mod_type == FW_PORT_MOD_TYPE_NONE)
8959                 if_printf(ifp, "transceiver unplugged.\n");
8960         else if (pi->mod_type == FW_PORT_MOD_TYPE_UNKNOWN)
8961                 if_printf(ifp, "unknown transceiver inserted.\n");
8962         else if (pi->mod_type == FW_PORT_MOD_TYPE_NOTSUPPORTED)
8963                 if_printf(ifp, "unsupported transceiver inserted.\n");
8964         else if (pi->mod_type > 0 && pi->mod_type < nitems(mod_str)) {
8965                 if_printf(ifp, "%s transceiver inserted.\n",
8966                     mod_str[pi->mod_type]);
8967         } else {
8968                 if_printf(ifp, "transceiver (type %d) inserted.\n",
8969                     pi->mod_type);
8970         }
8971 }
8972
8973 void
8974 t4_os_link_changed(struct adapter *sc, int idx, int link_stat, int reason)
8975 {
8976         struct port_info *pi = sc->port[idx];
8977         struct vi_info *vi;
8978         struct ifnet *ifp;
8979         int v;
8980
8981         if (link_stat)
8982                 pi->linkdnrc = -1;
8983         else {
8984                 if (reason >= 0)
8985                         pi->linkdnrc = reason;
8986         }
8987         for_each_vi(pi, v, vi) {
8988                 ifp = vi->ifp;
8989                 if (ifp == NULL)
8990                         continue;
8991
8992                 if (link_stat) {
8993                         ifp->if_baudrate = IF_Mbps(pi->link_cfg.speed);
8994                         if_link_state_change(ifp, LINK_STATE_UP);
8995                 } else {
8996                         if_link_state_change(ifp, LINK_STATE_DOWN);
8997                 }
8998         }
8999 }
9000
9001 void
9002 t4_iterate(void (*func)(struct adapter *, void *), void *arg)
9003 {
9004         struct adapter *sc;
9005
9006         sx_slock(&t4_list_lock);
9007         SLIST_FOREACH(sc, &t4_list, link) {
9008                 /*
9009                  * func should not make any assumptions about what state sc is
9010                  * in - the only guarantee is that sc->sc_lock is a valid lock.
9011                  */
9012                 func(sc, arg);
9013         }
9014         sx_sunlock(&t4_list_lock);
9015 }
9016
9017 static int
9018 t4_ioctl(struct cdev *dev, unsigned long cmd, caddr_t data, int fflag,
9019     struct thread *td)
9020 {
9021         int rc;
9022         struct adapter *sc = dev->si_drv1;
9023
9024         rc = priv_check(td, PRIV_DRIVER);
9025         if (rc != 0)
9026                 return (rc);
9027
9028         switch (cmd) {
9029         case CHELSIO_T4_GETREG: {
9030                 struct t4_reg *edata = (struct t4_reg *)data;
9031
9032                 if ((edata->addr & 0x3) != 0 || edata->addr >= sc->mmio_len)
9033                         return (EFAULT);
9034
9035                 if (edata->size == 4)
9036                         edata->val = t4_read_reg(sc, edata->addr);
9037                 else if (edata->size == 8)
9038                         edata->val = t4_read_reg64(sc, edata->addr);
9039                 else
9040                         return (EINVAL);
9041
9042                 break;
9043         }
9044         case CHELSIO_T4_SETREG: {
9045                 struct t4_reg *edata = (struct t4_reg *)data;
9046
9047                 if ((edata->addr & 0x3) != 0 || edata->addr >= sc->mmio_len)
9048                         return (EFAULT);
9049
9050                 if (edata->size == 4) {
9051                         if (edata->val & 0xffffffff00000000)
9052                                 return (EINVAL);
9053                         t4_write_reg(sc, edata->addr, (uint32_t) edata->val);
9054                 } else if (edata->size == 8)
9055                         t4_write_reg64(sc, edata->addr, edata->val);
9056                 else
9057                         return (EINVAL);
9058                 break;
9059         }
9060         case CHELSIO_T4_REGDUMP: {
9061                 struct t4_regdump *regs = (struct t4_regdump *)data;
9062                 int reglen = t4_get_regs_len(sc);
9063                 uint8_t *buf;
9064
9065                 if (regs->len < reglen) {
9066                         regs->len = reglen; /* hint to the caller */
9067                         return (ENOBUFS);
9068                 }
9069
9070                 regs->len = reglen;
9071                 buf = malloc(reglen, M_CXGBE, M_WAITOK | M_ZERO);
9072                 get_regs(sc, regs, buf);
9073                 rc = copyout(buf, regs->data, reglen);
9074                 free(buf, M_CXGBE);
9075                 break;
9076         }
9077         case CHELSIO_T4_GET_FILTER_MODE:
9078                 rc = get_filter_mode(sc, (uint32_t *)data);
9079                 break;
9080         case CHELSIO_T4_SET_FILTER_MODE:
9081                 rc = set_filter_mode(sc, *(uint32_t *)data);
9082                 break;
9083         case CHELSIO_T4_GET_FILTER:
9084                 rc = get_filter(sc, (struct t4_filter *)data);
9085                 break;
9086         case CHELSIO_T4_SET_FILTER:
9087                 rc = set_filter(sc, (struct t4_filter *)data);
9088                 break;
9089         case CHELSIO_T4_DEL_FILTER:
9090                 rc = del_filter(sc, (struct t4_filter *)data);
9091                 break;
9092         case CHELSIO_T4_GET_SGE_CONTEXT:
9093                 rc = get_sge_context(sc, (struct t4_sge_context *)data);
9094                 break;
9095         case CHELSIO_T4_LOAD_FW:
9096                 rc = load_fw(sc, (struct t4_data *)data);
9097                 break;
9098         case CHELSIO_T4_GET_MEM:
9099                 rc = read_card_mem(sc, 2, (struct t4_mem_range *)data);
9100                 break;
9101         case CHELSIO_T4_GET_I2C:
9102                 rc = read_i2c(sc, (struct t4_i2c_data *)data);
9103                 break;
9104         case CHELSIO_T4_CLEAR_STATS: {
9105                 int i, v;
9106                 u_int port_id = *(uint32_t *)data;
9107                 struct port_info *pi;
9108                 struct vi_info *vi;
9109
9110                 if (port_id >= sc->params.nports)
9111                         return (EINVAL);
9112                 pi = sc->port[port_id];
9113                 if (pi == NULL)
9114                         return (EIO);
9115
9116                 /* MAC stats */
9117                 t4_clr_port_stats(sc, pi->tx_chan);
9118                 pi->tx_parse_error = 0;
9119                 mtx_lock(&sc->reg_lock);
9120                 for_each_vi(pi, v, vi) {
9121                         if (vi->flags & VI_INIT_DONE)
9122                                 t4_clr_vi_stats(sc, vi->viid);
9123                 }
9124                 mtx_unlock(&sc->reg_lock);
9125
9126                 /*
9127                  * Since this command accepts a port, clear stats for
9128                  * all VIs on this port.
9129                  */
9130                 for_each_vi(pi, v, vi) {
9131                         if (vi->flags & VI_INIT_DONE) {
9132                                 struct sge_rxq *rxq;
9133                                 struct sge_txq *txq;
9134                                 struct sge_wrq *wrq;
9135
9136                                 for_each_rxq(vi, i, rxq) {
9137 #if defined(INET) || defined(INET6)
9138                                         rxq->lro.lro_queued = 0;
9139                                         rxq->lro.lro_flushed = 0;
9140 #endif
9141                                         rxq->rxcsum = 0;
9142                                         rxq->vlan_extraction = 0;
9143                                 }
9144
9145                                 for_each_txq(vi, i, txq) {
9146                                         txq->txcsum = 0;
9147                                         txq->tso_wrs = 0;
9148                                         txq->vlan_insertion = 0;
9149                                         txq->imm_wrs = 0;
9150                                         txq->sgl_wrs = 0;
9151                                         txq->txpkt_wrs = 0;
9152                                         txq->txpkts0_wrs = 0;
9153                                         txq->txpkts1_wrs = 0;
9154                                         txq->txpkts0_pkts = 0;
9155                                         txq->txpkts1_pkts = 0;
9156                                         mp_ring_reset_stats(txq->r);
9157                                 }
9158
9159 #ifdef TCP_OFFLOAD
9160                                 /* nothing to clear for each ofld_rxq */
9161
9162                                 for_each_ofld_txq(vi, i, wrq) {
9163                                         wrq->tx_wrs_direct = 0;
9164                                         wrq->tx_wrs_copied = 0;
9165                                 }
9166 #endif
9167
9168                                 if (IS_MAIN_VI(vi)) {
9169                                         wrq = &sc->sge.ctrlq[pi->port_id];
9170                                         wrq->tx_wrs_direct = 0;
9171                                         wrq->tx_wrs_copied = 0;
9172                                 }
9173                         }
9174                 }
9175                 break;
9176         }
9177         case CHELSIO_T4_SCHED_CLASS:
9178                 rc = t4_set_sched_class(sc, (struct t4_sched_params *)data);
9179                 break;
9180         case CHELSIO_T4_SCHED_QUEUE:
9181                 rc = t4_set_sched_queue(sc, (struct t4_sched_queue *)data);
9182                 break;
9183         case CHELSIO_T4_GET_TRACER:
9184                 rc = t4_get_tracer(sc, (struct t4_tracer *)data);
9185                 break;
9186         case CHELSIO_T4_SET_TRACER:
9187                 rc = t4_set_tracer(sc, (struct t4_tracer *)data);
9188                 break;
9189         case CHELSIO_T4_LOAD_CFG:
9190                 rc = load_cfg(sc, (struct t4_data *)data);
9191                 break;
9192         default:
9193                 rc = ENOTTY;
9194         }
9195
9196         return (rc);
9197 }
9198
9199 void
9200 t4_db_full(struct adapter *sc)
9201 {
9202
9203         CXGBE_UNIMPLEMENTED(__func__);
9204 }
9205
9206 void
9207 t4_db_dropped(struct adapter *sc)
9208 {
9209
9210         CXGBE_UNIMPLEMENTED(__func__);
9211 }
9212
9213 #ifdef TCP_OFFLOAD
9214 static int
9215 toe_capability(struct vi_info *vi, int enable)
9216 {
9217         int rc;
9218         struct port_info *pi = vi->pi;
9219         struct adapter *sc = pi->adapter;
9220
9221         ASSERT_SYNCHRONIZED_OP(sc);
9222
9223         if (!is_offload(sc))
9224                 return (ENODEV);
9225
9226         if (enable) {
9227                 if ((vi->ifp->if_capenable & IFCAP_TOE) != 0) {
9228                         /* TOE is already enabled. */
9229                         return (0);
9230                 }
9231
9232                 /*
9233                  * We need the port's queues around so that we're able to send
9234                  * and receive CPLs to/from the TOE even if the ifnet for this
9235                  * port has never been UP'd administratively.
9236                  */
9237                 if (!(vi->flags & VI_INIT_DONE)) {
9238                         rc = vi_full_init(vi);
9239                         if (rc)
9240                                 return (rc);
9241                 }
9242                 if (!(pi->vi[0].flags & VI_INIT_DONE)) {
9243                         rc = vi_full_init(&pi->vi[0]);
9244                         if (rc)
9245                                 return (rc);
9246                 }
9247
9248                 if (isset(&sc->offload_map, pi->port_id)) {
9249                         /* TOE is enabled on another VI of this port. */
9250                         pi->uld_vis++;
9251                         return (0);
9252                 }
9253
9254                 if (!uld_active(sc, ULD_TOM)) {
9255                         rc = t4_activate_uld(sc, ULD_TOM);
9256                         if (rc == EAGAIN) {
9257                                 log(LOG_WARNING,
9258                                     "You must kldload t4_tom.ko before trying "
9259                                     "to enable TOE on a cxgbe interface.\n");
9260                         }
9261                         if (rc != 0)
9262                                 return (rc);
9263                         KASSERT(sc->tom_softc != NULL,
9264                             ("%s: TOM activated but softc NULL", __func__));
9265                         KASSERT(uld_active(sc, ULD_TOM),
9266                             ("%s: TOM activated but flag not set", __func__));
9267                 }
9268
9269                 /* Activate iWARP and iSCSI too, if the modules are loaded. */
9270                 if (!uld_active(sc, ULD_IWARP))
9271                         (void) t4_activate_uld(sc, ULD_IWARP);
9272                 if (!uld_active(sc, ULD_ISCSI))
9273                         (void) t4_activate_uld(sc, ULD_ISCSI);
9274
9275                 pi->uld_vis++;
9276                 setbit(&sc->offload_map, pi->port_id);
9277         } else {
9278                 pi->uld_vis--;
9279
9280                 if (!isset(&sc->offload_map, pi->port_id) || pi->uld_vis > 0)
9281                         return (0);
9282
9283                 KASSERT(uld_active(sc, ULD_TOM),
9284                     ("%s: TOM never initialized?", __func__));
9285                 clrbit(&sc->offload_map, pi->port_id);
9286         }
9287
9288         return (0);
9289 }
9290
9291 /*
9292  * Add an upper layer driver to the global list.
9293  */
9294 int
9295 t4_register_uld(struct uld_info *ui)
9296 {
9297         int rc = 0;
9298         struct uld_info *u;
9299
9300         sx_xlock(&t4_uld_list_lock);
9301         SLIST_FOREACH(u, &t4_uld_list, link) {
9302             if (u->uld_id == ui->uld_id) {
9303                     rc = EEXIST;
9304                     goto done;
9305             }
9306         }
9307
9308         SLIST_INSERT_HEAD(&t4_uld_list, ui, link);
9309         ui->refcount = 0;
9310 done:
9311         sx_xunlock(&t4_uld_list_lock);
9312         return (rc);
9313 }
9314
9315 int
9316 t4_unregister_uld(struct uld_info *ui)
9317 {
9318         int rc = EINVAL;
9319         struct uld_info *u;
9320
9321         sx_xlock(&t4_uld_list_lock);
9322
9323         SLIST_FOREACH(u, &t4_uld_list, link) {
9324             if (u == ui) {
9325                     if (ui->refcount > 0) {
9326                             rc = EBUSY;
9327                             goto done;
9328                     }
9329
9330                     SLIST_REMOVE(&t4_uld_list, ui, uld_info, link);
9331                     rc = 0;
9332                     goto done;
9333             }
9334         }
9335 done:
9336         sx_xunlock(&t4_uld_list_lock);
9337         return (rc);
9338 }
9339
9340 int
9341 t4_activate_uld(struct adapter *sc, int id)
9342 {
9343         int rc;
9344         struct uld_info *ui;
9345
9346         ASSERT_SYNCHRONIZED_OP(sc);
9347
9348         if (id < 0 || id > ULD_MAX)
9349                 return (EINVAL);
9350         rc = EAGAIN;    /* kldoad the module with this ULD and try again. */
9351
9352         sx_slock(&t4_uld_list_lock);
9353
9354         SLIST_FOREACH(ui, &t4_uld_list, link) {
9355                 if (ui->uld_id == id) {
9356                         if (!(sc->flags & FULL_INIT_DONE)) {
9357                                 rc = adapter_full_init(sc);
9358                                 if (rc != 0)
9359                                         break;
9360                         }
9361
9362                         rc = ui->activate(sc);
9363                         if (rc == 0) {
9364                                 setbit(&sc->active_ulds, id);
9365                                 ui->refcount++;
9366                         }
9367                         break;
9368                 }
9369         }
9370
9371         sx_sunlock(&t4_uld_list_lock);
9372
9373         return (rc);
9374 }
9375
9376 int
9377 t4_deactivate_uld(struct adapter *sc, int id)
9378 {
9379         int rc;
9380         struct uld_info *ui;
9381
9382         ASSERT_SYNCHRONIZED_OP(sc);
9383
9384         if (id < 0 || id > ULD_MAX)
9385                 return (EINVAL);
9386         rc = ENXIO;
9387
9388         sx_slock(&t4_uld_list_lock);
9389
9390         SLIST_FOREACH(ui, &t4_uld_list, link) {
9391                 if (ui->uld_id == id) {
9392                         rc = ui->deactivate(sc);
9393                         if (rc == 0) {
9394                                 clrbit(&sc->active_ulds, id);
9395                                 ui->refcount--;
9396                         }
9397                         break;
9398                 }
9399         }
9400
9401         sx_sunlock(&t4_uld_list_lock);
9402
9403         return (rc);
9404 }
9405
9406 int
9407 uld_active(struct adapter *sc, int uld_id)
9408 {
9409
9410         MPASS(uld_id >= 0 && uld_id <= ULD_MAX);
9411
9412         return (isset(&sc->active_ulds, uld_id));
9413 }
9414 #endif
9415
9416 /*
9417  * Come up with reasonable defaults for some of the tunables, provided they're
9418  * not set by the user (in which case we'll use the values as is).
9419  */
9420 static void
9421 tweak_tunables(void)
9422 {
9423         int nc = mp_ncpus;      /* our snapshot of the number of CPUs */
9424
9425         if (t4_ntxq10g < 1) {
9426 #ifdef RSS
9427                 t4_ntxq10g = rss_getnumbuckets();
9428 #else
9429                 t4_ntxq10g = min(nc, NTXQ_10G);
9430 #endif
9431         }
9432
9433         if (t4_ntxq1g < 1) {
9434 #ifdef RSS
9435                 /* XXX: way too many for 1GbE? */
9436                 t4_ntxq1g = rss_getnumbuckets();
9437 #else
9438                 t4_ntxq1g = min(nc, NTXQ_1G);
9439 #endif
9440         }
9441
9442         if (t4_ntxq_vi < 1)
9443                 t4_ntxq_vi = min(nc, NTXQ_VI);
9444
9445         if (t4_nrxq10g < 1) {
9446 #ifdef RSS
9447                 t4_nrxq10g = rss_getnumbuckets();
9448 #else
9449                 t4_nrxq10g = min(nc, NRXQ_10G);
9450 #endif
9451         }
9452
9453         if (t4_nrxq1g < 1) {
9454 #ifdef RSS
9455                 /* XXX: way too many for 1GbE? */
9456                 t4_nrxq1g = rss_getnumbuckets();
9457 #else
9458                 t4_nrxq1g = min(nc, NRXQ_1G);
9459 #endif
9460         }
9461
9462         if (t4_nrxq_vi < 1)
9463                 t4_nrxq_vi = min(nc, NRXQ_VI);
9464
9465 #ifdef TCP_OFFLOAD
9466         if (t4_nofldtxq10g < 1)
9467                 t4_nofldtxq10g = min(nc, NOFLDTXQ_10G);
9468
9469         if (t4_nofldtxq1g < 1)
9470                 t4_nofldtxq1g = min(nc, NOFLDTXQ_1G);
9471
9472         if (t4_nofldtxq_vi < 1)
9473                 t4_nofldtxq_vi = min(nc, NOFLDTXQ_VI);
9474
9475         if (t4_nofldrxq10g < 1)
9476                 t4_nofldrxq10g = min(nc, NOFLDRXQ_10G);
9477
9478         if (t4_nofldrxq1g < 1)
9479                 t4_nofldrxq1g = min(nc, NOFLDRXQ_1G);
9480
9481         if (t4_nofldrxq_vi < 1)
9482                 t4_nofldrxq_vi = min(nc, NOFLDRXQ_VI);
9483
9484         if (t4_toecaps_allowed == -1)
9485                 t4_toecaps_allowed = FW_CAPS_CONFIG_TOE;
9486
9487         if (t4_rdmacaps_allowed == -1) {
9488                 t4_rdmacaps_allowed = FW_CAPS_CONFIG_RDMA_RDDP |
9489                     FW_CAPS_CONFIG_RDMA_RDMAC;
9490         }
9491
9492         if (t4_iscsicaps_allowed == -1) {
9493                 t4_iscsicaps_allowed = FW_CAPS_CONFIG_ISCSI_INITIATOR_PDU |
9494                     FW_CAPS_CONFIG_ISCSI_TARGET_PDU |
9495                     FW_CAPS_CONFIG_ISCSI_T10DIF;
9496         }
9497 #else
9498         if (t4_toecaps_allowed == -1)
9499                 t4_toecaps_allowed = 0;
9500
9501         if (t4_rdmacaps_allowed == -1)
9502                 t4_rdmacaps_allowed = 0;
9503
9504         if (t4_iscsicaps_allowed == -1)
9505                 t4_iscsicaps_allowed = 0;
9506 #endif
9507
9508 #ifdef DEV_NETMAP
9509         if (t4_nnmtxq_vi < 1)
9510                 t4_nnmtxq_vi = min(nc, NNMTXQ_VI);
9511
9512         if (t4_nnmrxq_vi < 1)
9513                 t4_nnmrxq_vi = min(nc, NNMRXQ_VI);
9514 #endif
9515
9516         if (t4_tmr_idx_10g < 0 || t4_tmr_idx_10g >= SGE_NTIMERS)
9517                 t4_tmr_idx_10g = TMR_IDX_10G;
9518
9519         if (t4_pktc_idx_10g < -1 || t4_pktc_idx_10g >= SGE_NCOUNTERS)
9520                 t4_pktc_idx_10g = PKTC_IDX_10G;
9521
9522         if (t4_tmr_idx_1g < 0 || t4_tmr_idx_1g >= SGE_NTIMERS)
9523                 t4_tmr_idx_1g = TMR_IDX_1G;
9524
9525         if (t4_pktc_idx_1g < -1 || t4_pktc_idx_1g >= SGE_NCOUNTERS)
9526                 t4_pktc_idx_1g = PKTC_IDX_1G;
9527
9528         if (t4_qsize_txq < 128)
9529                 t4_qsize_txq = 128;
9530
9531         if (t4_qsize_rxq < 128)
9532                 t4_qsize_rxq = 128;
9533         while (t4_qsize_rxq & 7)
9534                 t4_qsize_rxq++;
9535
9536         t4_intr_types &= INTR_MSIX | INTR_MSI | INTR_INTX;
9537 }
9538
9539 #ifdef DDB
9540 static void
9541 t4_dump_tcb(struct adapter *sc, int tid)
9542 {
9543         uint32_t base, i, j, off, pf, reg, save, tcb_addr, win_pos;
9544
9545         reg = PCIE_MEM_ACCESS_REG(A_PCIE_MEM_ACCESS_OFFSET, 2);
9546         save = t4_read_reg(sc, reg);
9547         base = sc->memwin[2].mw_base;
9548
9549         /* Dump TCB for the tid */
9550         tcb_addr = t4_read_reg(sc, A_TP_CMM_TCB_BASE);
9551         tcb_addr += tid * TCB_SIZE;
9552
9553         if (is_t4(sc)) {
9554                 pf = 0;
9555                 win_pos = tcb_addr & ~0xf;      /* start must be 16B aligned */
9556         } else {
9557                 pf = V_PFNUM(sc->pf);
9558                 win_pos = tcb_addr & ~0x7f;     /* start must be 128B aligned */
9559         }
9560         t4_write_reg(sc, reg, win_pos | pf);
9561         t4_read_reg(sc, reg);
9562
9563         off = tcb_addr - win_pos;
9564         for (i = 0; i < 4; i++) {
9565                 uint32_t buf[8];
9566                 for (j = 0; j < 8; j++, off += 4)
9567                         buf[j] = htonl(t4_read_reg(sc, base + off));
9568
9569                 db_printf("%08x %08x %08x %08x %08x %08x %08x %08x\n",
9570                     buf[0], buf[1], buf[2], buf[3], buf[4], buf[5], buf[6],
9571                     buf[7]);
9572         }
9573
9574         t4_write_reg(sc, reg, save);
9575         t4_read_reg(sc, reg);
9576 }
9577
9578 static void
9579 t4_dump_devlog(struct adapter *sc)
9580 {
9581         struct devlog_params *dparams = &sc->params.devlog;
9582         struct fw_devlog_e e;
9583         int i, first, j, m, nentries, rc;
9584         uint64_t ftstamp = UINT64_MAX;
9585
9586         if (dparams->start == 0) {
9587                 db_printf("devlog params not valid\n");
9588                 return;
9589         }
9590
9591         nentries = dparams->size / sizeof(struct fw_devlog_e);
9592         m = fwmtype_to_hwmtype(dparams->memtype);
9593
9594         /* Find the first entry. */
9595         first = -1;
9596         for (i = 0; i < nentries && !db_pager_quit; i++) {
9597                 rc = -t4_mem_read(sc, m, dparams->start + i * sizeof(e),
9598                     sizeof(e), (void *)&e);
9599                 if (rc != 0)
9600                         break;
9601
9602                 if (e.timestamp == 0)
9603                         break;
9604
9605                 e.timestamp = be64toh(e.timestamp);
9606                 if (e.timestamp < ftstamp) {
9607                         ftstamp = e.timestamp;
9608                         first = i;
9609                 }
9610         }
9611
9612         if (first == -1)
9613                 return;
9614
9615         i = first;
9616         do {
9617                 rc = -t4_mem_read(sc, m, dparams->start + i * sizeof(e),
9618                     sizeof(e), (void *)&e);
9619                 if (rc != 0)
9620                         return;
9621
9622                 if (e.timestamp == 0)
9623                         return;
9624
9625                 e.timestamp = be64toh(e.timestamp);
9626                 e.seqno = be32toh(e.seqno);
9627                 for (j = 0; j < 8; j++)
9628                         e.params[j] = be32toh(e.params[j]);
9629
9630                 db_printf("%10d  %15ju  %8s  %8s  ",
9631                     e.seqno, e.timestamp,
9632                     (e.level < nitems(devlog_level_strings) ?
9633                         devlog_level_strings[e.level] : "UNKNOWN"),
9634                     (e.facility < nitems(devlog_facility_strings) ?
9635                         devlog_facility_strings[e.facility] : "UNKNOWN"));
9636                 db_printf(e.fmt, e.params[0], e.params[1], e.params[2],
9637                     e.params[3], e.params[4], e.params[5], e.params[6],
9638                     e.params[7]);
9639
9640                 if (++i == nentries)
9641                         i = 0;
9642         } while (i != first && !db_pager_quit);
9643 }
9644
9645 static struct command_table db_t4_table = LIST_HEAD_INITIALIZER(db_t4_table);
9646 _DB_SET(_show, t4, NULL, db_show_table, 0, &db_t4_table);
9647
9648 DB_FUNC(devlog, db_show_devlog, db_t4_table, CS_OWN, NULL)
9649 {
9650         device_t dev;
9651         int t;
9652         bool valid;
9653
9654         valid = false;
9655         t = db_read_token();
9656         if (t == tIDENT) {
9657                 dev = device_lookup_by_name(db_tok_string);
9658                 valid = true;
9659         }
9660         db_skip_to_eol();
9661         if (!valid) {
9662                 db_printf("usage: show t4 devlog <nexus>\n");
9663                 return;
9664         }
9665
9666         if (dev == NULL) {
9667                 db_printf("device not found\n");
9668                 return;
9669         }
9670
9671         t4_dump_devlog(device_get_softc(dev));
9672 }
9673
9674 DB_FUNC(tcb, db_show_t4tcb, db_t4_table, CS_OWN, NULL)
9675 {
9676         device_t dev;
9677         int radix, tid, t;
9678         bool valid;
9679
9680         valid = false;
9681         radix = db_radix;
9682         db_radix = 10;
9683         t = db_read_token();
9684         if (t == tIDENT) {
9685                 dev = device_lookup_by_name(db_tok_string);
9686                 t = db_read_token();
9687                 if (t == tNUMBER) {
9688                         tid = db_tok_number;
9689                         valid = true;
9690                 }
9691         }       
9692         db_radix = radix;
9693         db_skip_to_eol();
9694         if (!valid) {
9695                 db_printf("usage: show t4 tcb <nexus> <tid>\n");
9696                 return;
9697         }
9698
9699         if (dev == NULL) {
9700                 db_printf("device not found\n");
9701                 return;
9702         }
9703         if (tid < 0) {
9704                 db_printf("invalid tid\n");
9705                 return;
9706         }
9707
9708         t4_dump_tcb(device_get_softc(dev), tid);
9709 }
9710 #endif
9711
9712 static struct sx mlu;   /* mod load unload */
9713 SX_SYSINIT(cxgbe_mlu, &mlu, "cxgbe mod load/unload");
9714
9715 static int
9716 mod_event(module_t mod, int cmd, void *arg)
9717 {
9718         int rc = 0;
9719         static int loaded = 0;
9720
9721         switch (cmd) {
9722         case MOD_LOAD:
9723                 sx_xlock(&mlu);
9724                 if (loaded++ == 0) {
9725                         t4_sge_modload();
9726                         t4_register_cpl_handler(CPL_SET_TCB_RPL, set_tcb_rpl);
9727                         t4_register_cpl_handler(CPL_L2T_WRITE_RPL, l2t_write_rpl);
9728                         t4_register_cpl_handler(CPL_TRACE_PKT, t4_trace_pkt);
9729                         t4_register_cpl_handler(CPL_T5_TRACE_PKT, t5_trace_pkt);
9730                         sx_init(&t4_list_lock, "T4/T5 adapters");
9731                         SLIST_INIT(&t4_list);
9732 #ifdef TCP_OFFLOAD
9733                         sx_init(&t4_uld_list_lock, "T4/T5 ULDs");
9734                         SLIST_INIT(&t4_uld_list);
9735 #endif
9736                         t4_tracer_modload();
9737                         tweak_tunables();
9738                 }
9739                 sx_xunlock(&mlu);
9740                 break;
9741
9742         case MOD_UNLOAD:
9743                 sx_xlock(&mlu);
9744                 if (--loaded == 0) {
9745                         int tries;
9746
9747                         sx_slock(&t4_list_lock);
9748                         if (!SLIST_EMPTY(&t4_list)) {
9749                                 rc = EBUSY;
9750                                 sx_sunlock(&t4_list_lock);
9751                                 goto done_unload;
9752                         }
9753 #ifdef TCP_OFFLOAD
9754                         sx_slock(&t4_uld_list_lock);
9755                         if (!SLIST_EMPTY(&t4_uld_list)) {
9756                                 rc = EBUSY;
9757                                 sx_sunlock(&t4_uld_list_lock);
9758                                 sx_sunlock(&t4_list_lock);
9759                                 goto done_unload;
9760                         }
9761 #endif
9762                         tries = 0;
9763                         while (tries++ < 5 && t4_sge_extfree_refs() != 0) {
9764                                 uprintf("%ju clusters with custom free routine "
9765                                     "still is use.\n", t4_sge_extfree_refs());
9766                                 pause("t4unload", 2 * hz);
9767                         }
9768 #ifdef TCP_OFFLOAD
9769                         sx_sunlock(&t4_uld_list_lock);
9770 #endif
9771                         sx_sunlock(&t4_list_lock);
9772
9773                         if (t4_sge_extfree_refs() == 0) {
9774                                 t4_tracer_modunload();
9775 #ifdef TCP_OFFLOAD
9776                                 sx_destroy(&t4_uld_list_lock);
9777 #endif
9778                                 sx_destroy(&t4_list_lock);
9779                                 t4_sge_modunload();
9780                                 loaded = 0;
9781                         } else {
9782                                 rc = EBUSY;
9783                                 loaded++;       /* undo earlier decrement */
9784                         }
9785                 }
9786 done_unload:
9787                 sx_xunlock(&mlu);
9788                 break;
9789         }
9790
9791         return (rc);
9792 }
9793
9794 static devclass_t t4_devclass, t5_devclass, t6_devclass;
9795 static devclass_t cxgbe_devclass, cxl_devclass, cc_devclass;
9796 static devclass_t vcxgbe_devclass, vcxl_devclass, vcc_devclass;
9797
9798 DRIVER_MODULE(t4nex, pci, t4_driver, t4_devclass, mod_event, 0);
9799 MODULE_VERSION(t4nex, 1);
9800 MODULE_DEPEND(t4nex, firmware, 1, 1, 1);
9801 #ifdef DEV_NETMAP
9802 MODULE_DEPEND(t4nex, netmap, 1, 1, 1);
9803 #endif /* DEV_NETMAP */
9804
9805 DRIVER_MODULE(t5nex, pci, t5_driver, t5_devclass, mod_event, 0);
9806 MODULE_VERSION(t5nex, 1);
9807 MODULE_DEPEND(t5nex, firmware, 1, 1, 1);
9808 #ifdef DEV_NETMAP
9809 MODULE_DEPEND(t5nex, netmap, 1, 1, 1);
9810 #endif /* DEV_NETMAP */
9811
9812 DRIVER_MODULE(t6nex, pci, t6_driver, t6_devclass, mod_event, 0);
9813 MODULE_VERSION(t6nex, 1);
9814 MODULE_DEPEND(t6nex, firmware, 1, 1, 1);
9815 #ifdef DEV_NETMAP
9816 MODULE_DEPEND(t6nex, netmap, 1, 1, 1);
9817 #endif /* DEV_NETMAP */
9818
9819 DRIVER_MODULE(cxgbe, t4nex, cxgbe_driver, cxgbe_devclass, 0, 0);
9820 MODULE_VERSION(cxgbe, 1);
9821
9822 DRIVER_MODULE(cxl, t5nex, cxl_driver, cxl_devclass, 0, 0);
9823 MODULE_VERSION(cxl, 1);
9824
9825 DRIVER_MODULE(cc, t6nex, cc_driver, cc_devclass, 0, 0);
9826 MODULE_VERSION(cc, 1);
9827
9828 DRIVER_MODULE(vcxgbe, cxgbe, vcxgbe_driver, vcxgbe_devclass, 0, 0);
9829 MODULE_VERSION(vcxgbe, 1);
9830
9831 DRIVER_MODULE(vcxl, cxl, vcxl_driver, vcxl_devclass, 0, 0);
9832 MODULE_VERSION(vcxl, 1);
9833
9834 DRIVER_MODULE(vcc, cc, vcc_driver, vcc_devclass, 0, 0);
9835 MODULE_VERSION(vcc, 1);