]> CyberLeo.Net >> Repos - FreeBSD/FreeBSD.git/blob - sys/dev/cxgbe/t4_main.c
Update svn-1.9.7 to 1.10.0.
[FreeBSD/FreeBSD.git] / sys / dev / cxgbe / t4_main.c
1 /*-
2  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
3  *
4  * Copyright (c) 2011 Chelsio Communications, Inc.
5  * All rights reserved.
6  * Written by: Navdeep Parhar <np@FreeBSD.org>
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
18  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
21  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27  * SUCH DAMAGE.
28  */
29
30 #include <sys/cdefs.h>
31 __FBSDID("$FreeBSD$");
32
33 #include "opt_ddb.h"
34 #include "opt_inet.h"
35 #include "opt_inet6.h"
36 #include "opt_rss.h"
37
38 #include <sys/param.h>
39 #include <sys/conf.h>
40 #include <sys/priv.h>
41 #include <sys/kernel.h>
42 #include <sys/bus.h>
43 #include <sys/module.h>
44 #include <sys/malloc.h>
45 #include <sys/queue.h>
46 #include <sys/taskqueue.h>
47 #include <sys/pciio.h>
48 #include <dev/pci/pcireg.h>
49 #include <dev/pci/pcivar.h>
50 #include <dev/pci/pci_private.h>
51 #include <sys/firmware.h>
52 #include <sys/sbuf.h>
53 #include <sys/smp.h>
54 #include <sys/socket.h>
55 #include <sys/sockio.h>
56 #include <sys/sysctl.h>
57 #include <net/ethernet.h>
58 #include <net/if.h>
59 #include <net/if_types.h>
60 #include <net/if_dl.h>
61 #include <net/if_vlan_var.h>
62 #ifdef RSS
63 #include <net/rss_config.h>
64 #endif
65 #if defined(__i386__) || defined(__amd64__)
66 #include <machine/md_var.h>
67 #include <machine/cputypes.h>
68 #include <vm/vm.h>
69 #include <vm/pmap.h>
70 #endif
71 #include <crypto/rijndael/rijndael.h>
72 #ifdef DDB
73 #include <ddb/ddb.h>
74 #include <ddb/db_lex.h>
75 #endif
76
77 #include "common/common.h"
78 #include "common/t4_msg.h"
79 #include "common/t4_regs.h"
80 #include "common/t4_regs_values.h"
81 #include "cudbg/cudbg.h"
82 #include "t4_ioctl.h"
83 #include "t4_l2t.h"
84 #include "t4_mp_ring.h"
85 #include "t4_if.h"
86
87 /* T4 bus driver interface */
88 static int t4_probe(device_t);
89 static int t4_attach(device_t);
90 static int t4_detach(device_t);
91 static int t4_ready(device_t);
92 static int t4_read_port_device(device_t, int, device_t *);
93 static device_method_t t4_methods[] = {
94         DEVMETHOD(device_probe,         t4_probe),
95         DEVMETHOD(device_attach,        t4_attach),
96         DEVMETHOD(device_detach,        t4_detach),
97
98         DEVMETHOD(t4_is_main_ready,     t4_ready),
99         DEVMETHOD(t4_read_port_device,  t4_read_port_device),
100
101         DEVMETHOD_END
102 };
103 static driver_t t4_driver = {
104         "t4nex",
105         t4_methods,
106         sizeof(struct adapter)
107 };
108
109
110 /* T4 port (cxgbe) interface */
111 static int cxgbe_probe(device_t);
112 static int cxgbe_attach(device_t);
113 static int cxgbe_detach(device_t);
114 device_method_t cxgbe_methods[] = {
115         DEVMETHOD(device_probe,         cxgbe_probe),
116         DEVMETHOD(device_attach,        cxgbe_attach),
117         DEVMETHOD(device_detach,        cxgbe_detach),
118         { 0, 0 }
119 };
120 static driver_t cxgbe_driver = {
121         "cxgbe",
122         cxgbe_methods,
123         sizeof(struct port_info)
124 };
125
126 /* T4 VI (vcxgbe) interface */
127 static int vcxgbe_probe(device_t);
128 static int vcxgbe_attach(device_t);
129 static int vcxgbe_detach(device_t);
130 static device_method_t vcxgbe_methods[] = {
131         DEVMETHOD(device_probe,         vcxgbe_probe),
132         DEVMETHOD(device_attach,        vcxgbe_attach),
133         DEVMETHOD(device_detach,        vcxgbe_detach),
134         { 0, 0 }
135 };
136 static driver_t vcxgbe_driver = {
137         "vcxgbe",
138         vcxgbe_methods,
139         sizeof(struct vi_info)
140 };
141
142 static d_ioctl_t t4_ioctl;
143
144 static struct cdevsw t4_cdevsw = {
145        .d_version = D_VERSION,
146        .d_ioctl = t4_ioctl,
147        .d_name = "t4nex",
148 };
149
150 /* T5 bus driver interface */
151 static int t5_probe(device_t);
152 static device_method_t t5_methods[] = {
153         DEVMETHOD(device_probe,         t5_probe),
154         DEVMETHOD(device_attach,        t4_attach),
155         DEVMETHOD(device_detach,        t4_detach),
156
157         DEVMETHOD(t4_is_main_ready,     t4_ready),
158         DEVMETHOD(t4_read_port_device,  t4_read_port_device),
159
160         DEVMETHOD_END
161 };
162 static driver_t t5_driver = {
163         "t5nex",
164         t5_methods,
165         sizeof(struct adapter)
166 };
167
168
169 /* T5 port (cxl) interface */
170 static driver_t cxl_driver = {
171         "cxl",
172         cxgbe_methods,
173         sizeof(struct port_info)
174 };
175
176 /* T5 VI (vcxl) interface */
177 static driver_t vcxl_driver = {
178         "vcxl",
179         vcxgbe_methods,
180         sizeof(struct vi_info)
181 };
182
183 /* T6 bus driver interface */
184 static int t6_probe(device_t);
185 static device_method_t t6_methods[] = {
186         DEVMETHOD(device_probe,         t6_probe),
187         DEVMETHOD(device_attach,        t4_attach),
188         DEVMETHOD(device_detach,        t4_detach),
189
190         DEVMETHOD(t4_is_main_ready,     t4_ready),
191         DEVMETHOD(t4_read_port_device,  t4_read_port_device),
192
193         DEVMETHOD_END
194 };
195 static driver_t t6_driver = {
196         "t6nex",
197         t6_methods,
198         sizeof(struct adapter)
199 };
200
201
202 /* T6 port (cc) interface */
203 static driver_t cc_driver = {
204         "cc",
205         cxgbe_methods,
206         sizeof(struct port_info)
207 };
208
209 /* T6 VI (vcc) interface */
210 static driver_t vcc_driver = {
211         "vcc",
212         vcxgbe_methods,
213         sizeof(struct vi_info)
214 };
215
216 /* ifnet + media interface */
217 static void cxgbe_init(void *);
218 static int cxgbe_ioctl(struct ifnet *, unsigned long, caddr_t);
219 static int cxgbe_transmit(struct ifnet *, struct mbuf *);
220 static void cxgbe_qflush(struct ifnet *);
221 static int cxgbe_media_change(struct ifnet *);
222 static void cxgbe_media_status(struct ifnet *, struct ifmediareq *);
223
224 MALLOC_DEFINE(M_CXGBE, "cxgbe", "Chelsio T4/T5 Ethernet driver and services");
225
226 /*
227  * Correct lock order when you need to acquire multiple locks is t4_list_lock,
228  * then ADAPTER_LOCK, then t4_uld_list_lock.
229  */
230 static struct sx t4_list_lock;
231 SLIST_HEAD(, adapter) t4_list;
232 #ifdef TCP_OFFLOAD
233 static struct sx t4_uld_list_lock;
234 SLIST_HEAD(, uld_info) t4_uld_list;
235 #endif
236
237 /*
238  * Tunables.  See tweak_tunables() too.
239  *
240  * Each tunable is set to a default value here if it's known at compile-time.
241  * Otherwise it is set to -n as an indication to tweak_tunables() that it should
242  * provide a reasonable default (upto n) when the driver is loaded.
243  *
244  * Tunables applicable to both T4 and T5 are under hw.cxgbe.  Those specific to
245  * T5 are under hw.cxl.
246  */
247
248 /*
249  * Number of queues for tx and rx, NIC and offload.
250  */
251 #define NTXQ 16
252 int t4_ntxq = -NTXQ;
253 TUNABLE_INT("hw.cxgbe.ntxq", &t4_ntxq);
254 TUNABLE_INT("hw.cxgbe.ntxq10g", &t4_ntxq);      /* Old name, undocumented */
255
256 #define NRXQ 8
257 int t4_nrxq = -NRXQ;
258 TUNABLE_INT("hw.cxgbe.nrxq", &t4_nrxq);
259 TUNABLE_INT("hw.cxgbe.nrxq10g", &t4_nrxq);      /* Old name, undocumented */
260
261 #define NTXQ_VI 1
262 static int t4_ntxq_vi = -NTXQ_VI;
263 TUNABLE_INT("hw.cxgbe.ntxq_vi", &t4_ntxq_vi);
264
265 #define NRXQ_VI 1
266 static int t4_nrxq_vi = -NRXQ_VI;
267 TUNABLE_INT("hw.cxgbe.nrxq_vi", &t4_nrxq_vi);
268
269 static int t4_rsrv_noflowq = 0;
270 TUNABLE_INT("hw.cxgbe.rsrv_noflowq", &t4_rsrv_noflowq);
271
272 #ifdef TCP_OFFLOAD
273 #define NOFLDTXQ 8
274 static int t4_nofldtxq = -NOFLDTXQ;
275 TUNABLE_INT("hw.cxgbe.nofldtxq", &t4_nofldtxq);
276
277 #define NOFLDRXQ 2
278 static int t4_nofldrxq = -NOFLDRXQ;
279 TUNABLE_INT("hw.cxgbe.nofldrxq", &t4_nofldrxq);
280
281 #define NOFLDTXQ_VI 1
282 static int t4_nofldtxq_vi = -NOFLDTXQ_VI;
283 TUNABLE_INT("hw.cxgbe.nofldtxq_vi", &t4_nofldtxq_vi);
284
285 #define NOFLDRXQ_VI 1
286 static int t4_nofldrxq_vi = -NOFLDRXQ_VI;
287 TUNABLE_INT("hw.cxgbe.nofldrxq_vi", &t4_nofldrxq_vi);
288
289 #define TMR_IDX_OFLD 1
290 int t4_tmr_idx_ofld = TMR_IDX_OFLD;
291 TUNABLE_INT("hw.cxgbe.holdoff_timer_idx_ofld", &t4_tmr_idx_ofld);
292
293 #define PKTC_IDX_OFLD (-1)
294 int t4_pktc_idx_ofld = PKTC_IDX_OFLD;
295 TUNABLE_INT("hw.cxgbe.holdoff_pktc_idx_ofld", &t4_pktc_idx_ofld);
296
297 /* 0 means chip/fw default, non-zero number is value in microseconds */
298 static u_long t4_toe_keepalive_idle = 0;
299 TUNABLE_ULONG("hw.cxgbe.toe.keepalive_idle", &t4_toe_keepalive_idle);
300
301 /* 0 means chip/fw default, non-zero number is value in microseconds */
302 static u_long t4_toe_keepalive_interval = 0;
303 TUNABLE_ULONG("hw.cxgbe.toe.keepalive_interval", &t4_toe_keepalive_interval);
304
305 /* 0 means chip/fw default, non-zero number is # of keepalives before abort */
306 static int t4_toe_keepalive_count = 0;
307 TUNABLE_INT("hw.cxgbe.toe.keepalive_count", &t4_toe_keepalive_count);
308
309 /* 0 means chip/fw default, non-zero number is value in microseconds */
310 static u_long t4_toe_rexmt_min = 0;
311 TUNABLE_ULONG("hw.cxgbe.toe.rexmt_min", &t4_toe_rexmt_min);
312
313 /* 0 means chip/fw default, non-zero number is value in microseconds */
314 static u_long t4_toe_rexmt_max = 0;
315 TUNABLE_ULONG("hw.cxgbe.toe.rexmt_max", &t4_toe_rexmt_max);
316
317 /* 0 means chip/fw default, non-zero number is # of rexmt before abort */
318 static int t4_toe_rexmt_count = 0;
319 TUNABLE_INT("hw.cxgbe.toe.rexmt_count", &t4_toe_rexmt_count);
320
321 /* -1 means chip/fw default, other values are raw backoff values to use */
322 static int t4_toe_rexmt_backoff[16] = {
323         -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1
324 };
325 TUNABLE_INT("hw.cxgbe.toe.rexmt_backoff.0", &t4_toe_rexmt_backoff[0]);
326 TUNABLE_INT("hw.cxgbe.toe.rexmt_backoff.1", &t4_toe_rexmt_backoff[1]);
327 TUNABLE_INT("hw.cxgbe.toe.rexmt_backoff.2", &t4_toe_rexmt_backoff[2]);
328 TUNABLE_INT("hw.cxgbe.toe.rexmt_backoff.3", &t4_toe_rexmt_backoff[3]);
329 TUNABLE_INT("hw.cxgbe.toe.rexmt_backoff.4", &t4_toe_rexmt_backoff[4]);
330 TUNABLE_INT("hw.cxgbe.toe.rexmt_backoff.5", &t4_toe_rexmt_backoff[5]);
331 TUNABLE_INT("hw.cxgbe.toe.rexmt_backoff.6", &t4_toe_rexmt_backoff[6]);
332 TUNABLE_INT("hw.cxgbe.toe.rexmt_backoff.7", &t4_toe_rexmt_backoff[7]);
333 TUNABLE_INT("hw.cxgbe.toe.rexmt_backoff.8", &t4_toe_rexmt_backoff[8]);
334 TUNABLE_INT("hw.cxgbe.toe.rexmt_backoff.9", &t4_toe_rexmt_backoff[9]);
335 TUNABLE_INT("hw.cxgbe.toe.rexmt_backoff.10", &t4_toe_rexmt_backoff[10]);
336 TUNABLE_INT("hw.cxgbe.toe.rexmt_backoff.11", &t4_toe_rexmt_backoff[11]);
337 TUNABLE_INT("hw.cxgbe.toe.rexmt_backoff.12", &t4_toe_rexmt_backoff[12]);
338 TUNABLE_INT("hw.cxgbe.toe.rexmt_backoff.13", &t4_toe_rexmt_backoff[13]);
339 TUNABLE_INT("hw.cxgbe.toe.rexmt_backoff.14", &t4_toe_rexmt_backoff[14]);
340 TUNABLE_INT("hw.cxgbe.toe.rexmt_backoff.15", &t4_toe_rexmt_backoff[15]);
341 #endif
342
343 #ifdef DEV_NETMAP
344 #define NNMTXQ_VI 2
345 static int t4_nnmtxq_vi = -NNMTXQ_VI;
346 TUNABLE_INT("hw.cxgbe.nnmtxq_vi", &t4_nnmtxq_vi);
347
348 #define NNMRXQ_VI 2
349 static int t4_nnmrxq_vi = -NNMRXQ_VI;
350 TUNABLE_INT("hw.cxgbe.nnmrxq_vi", &t4_nnmrxq_vi);
351 #endif
352
353 /*
354  * Holdoff parameters for ports.
355  */
356 #define TMR_IDX 1
357 int t4_tmr_idx = TMR_IDX;
358 TUNABLE_INT("hw.cxgbe.holdoff_timer_idx", &t4_tmr_idx);
359 TUNABLE_INT("hw.cxgbe.holdoff_timer_idx_10G", &t4_tmr_idx);     /* Old name */
360
361 #define PKTC_IDX (-1)
362 int t4_pktc_idx = PKTC_IDX;
363 TUNABLE_INT("hw.cxgbe.holdoff_pktc_idx", &t4_pktc_idx);
364 TUNABLE_INT("hw.cxgbe.holdoff_pktc_idx_10G", &t4_pktc_idx);     /* Old name */
365
366 /*
367  * Size (# of entries) of each tx and rx queue.
368  */
369 unsigned int t4_qsize_txq = TX_EQ_QSIZE;
370 TUNABLE_INT("hw.cxgbe.qsize_txq", &t4_qsize_txq);
371
372 unsigned int t4_qsize_rxq = RX_IQ_QSIZE;
373 TUNABLE_INT("hw.cxgbe.qsize_rxq", &t4_qsize_rxq);
374
375 /*
376  * Interrupt types allowed (bits 0, 1, 2 = INTx, MSI, MSI-X respectively).
377  */
378 int t4_intr_types = INTR_MSIX | INTR_MSI | INTR_INTX;
379 TUNABLE_INT("hw.cxgbe.interrupt_types", &t4_intr_types);
380
381 /*
382  * Configuration file.
383  */
384 #define DEFAULT_CF      "default"
385 #define FLASH_CF        "flash"
386 #define UWIRE_CF        "uwire"
387 #define FPGA_CF         "fpga"
388 static char t4_cfg_file[32] = DEFAULT_CF;
389 TUNABLE_STR("hw.cxgbe.config_file", t4_cfg_file, sizeof(t4_cfg_file));
390
391 /*
392  * PAUSE settings (bit 0, 1 = rx_pause, tx_pause respectively).
393  * rx_pause = 1 to heed incoming PAUSE frames, 0 to ignore them.
394  * tx_pause = 1 to emit PAUSE frames when the rx FIFO reaches its high water
395  *            mark or when signalled to do so, 0 to never emit PAUSE.
396  */
397 static int t4_pause_settings = PAUSE_TX | PAUSE_RX;
398 TUNABLE_INT("hw.cxgbe.pause_settings", &t4_pause_settings);
399
400 /*
401  * Forward Error Correction settings (bit 0, 1, 2 = FEC_RS, FEC_BASER_RS,
402  * FEC_RESERVED respectively).
403  * -1 to run with the firmware default.
404  *  0 to disable FEC.
405  */
406 static int t4_fec = -1;
407 TUNABLE_INT("hw.cxgbe.fec", &t4_fec);
408
409 /*
410  * Link autonegotiation.
411  * -1 to run with the firmware default.
412  *  0 to disable.
413  *  1 to enable.
414  */
415 static int t4_autoneg = -1;
416 TUNABLE_INT("hw.cxgbe.autoneg", &t4_autoneg);
417
418 /*
419  * Firmware auto-install by driver during attach (0, 1, 2 = prohibited, allowed,
420  * encouraged respectively).
421  */
422 static unsigned int t4_fw_install = 1;
423 TUNABLE_INT("hw.cxgbe.fw_install", &t4_fw_install);
424
425 /*
426  * ASIC features that will be used.  Disable the ones you don't want so that the
427  * chip resources aren't wasted on features that will not be used.
428  */
429 static int t4_nbmcaps_allowed = 0;
430 TUNABLE_INT("hw.cxgbe.nbmcaps_allowed", &t4_nbmcaps_allowed);
431
432 static int t4_linkcaps_allowed = 0;     /* No DCBX, PPP, etc. by default */
433 TUNABLE_INT("hw.cxgbe.linkcaps_allowed", &t4_linkcaps_allowed);
434
435 static int t4_switchcaps_allowed = FW_CAPS_CONFIG_SWITCH_INGRESS |
436     FW_CAPS_CONFIG_SWITCH_EGRESS;
437 TUNABLE_INT("hw.cxgbe.switchcaps_allowed", &t4_switchcaps_allowed);
438
439 static int t4_niccaps_allowed = FW_CAPS_CONFIG_NIC;
440 TUNABLE_INT("hw.cxgbe.niccaps_allowed", &t4_niccaps_allowed);
441
442 static int t4_toecaps_allowed = -1;
443 TUNABLE_INT("hw.cxgbe.toecaps_allowed", &t4_toecaps_allowed);
444
445 static int t4_rdmacaps_allowed = -1;
446 TUNABLE_INT("hw.cxgbe.rdmacaps_allowed", &t4_rdmacaps_allowed);
447
448 static int t4_cryptocaps_allowed = -1;
449 TUNABLE_INT("hw.cxgbe.cryptocaps_allowed", &t4_cryptocaps_allowed);
450
451 static int t4_iscsicaps_allowed = -1;
452 TUNABLE_INT("hw.cxgbe.iscsicaps_allowed", &t4_iscsicaps_allowed);
453
454 static int t4_fcoecaps_allowed = 0;
455 TUNABLE_INT("hw.cxgbe.fcoecaps_allowed", &t4_fcoecaps_allowed);
456
457 static int t5_write_combine = 1;
458 TUNABLE_INT("hw.cxl.write_combine", &t5_write_combine);
459
460 static int t4_num_vis = 1;
461 TUNABLE_INT("hw.cxgbe.num_vis", &t4_num_vis);
462 /*
463  * PCIe Relaxed Ordering.
464  * -1: driver should figure out a good value.
465  * 0: disable RO.
466  * 1: enable RO.
467  * 2: leave RO alone.
468  */
469 static int pcie_relaxed_ordering = -1;
470 TUNABLE_INT("hw.cxgbe.pcie_relaxed_ordering", &pcie_relaxed_ordering);
471
472 static int t4_panic_on_fatal_err = 0;
473 TUNABLE_INT("hw.cxgbe.panic_on_fatal_err", &t4_panic_on_fatal_err);
474
475 #ifdef TCP_OFFLOAD
476 /*
477  * TOE tunables.
478  */
479 static int t4_cop_managed_offloading = 0;
480 TUNABLE_INT("hw.cxgbe.cop_managed_offloading", &t4_cop_managed_offloading);
481 #endif
482
483 /* Functions used by VIs to obtain unique MAC addresses for each VI. */
484 static int vi_mac_funcs[] = {
485         FW_VI_FUNC_ETH,
486         FW_VI_FUNC_OFLD,
487         FW_VI_FUNC_IWARP,
488         FW_VI_FUNC_OPENISCSI,
489         FW_VI_FUNC_OPENFCOE,
490         FW_VI_FUNC_FOISCSI,
491         FW_VI_FUNC_FOFCOE,
492 };
493
494 struct intrs_and_queues {
495         uint16_t intr_type;     /* INTx, MSI, or MSI-X */
496         uint16_t num_vis;       /* number of VIs for each port */
497         uint16_t nirq;          /* Total # of vectors */
498         uint16_t ntxq;          /* # of NIC txq's for each port */
499         uint16_t nrxq;          /* # of NIC rxq's for each port */
500         uint16_t nofldtxq;      /* # of TOE txq's for each port */
501         uint16_t nofldrxq;      /* # of TOE rxq's for each port */
502
503         /* The vcxgbe/vcxl interfaces use these and not the ones above. */
504         uint16_t ntxq_vi;       /* # of NIC txq's */
505         uint16_t nrxq_vi;       /* # of NIC rxq's */
506         uint16_t nofldtxq_vi;   /* # of TOE txq's */
507         uint16_t nofldrxq_vi;   /* # of TOE rxq's */
508         uint16_t nnmtxq_vi;     /* # of netmap txq's */
509         uint16_t nnmrxq_vi;     /* # of netmap rxq's */
510 };
511
512 static void setup_memwin(struct adapter *);
513 static void position_memwin(struct adapter *, int, uint32_t);
514 static int validate_mem_range(struct adapter *, uint32_t, int);
515 static int fwmtype_to_hwmtype(int);
516 static int validate_mt_off_len(struct adapter *, int, uint32_t, int,
517     uint32_t *);
518 static int fixup_devlog_params(struct adapter *);
519 static int cfg_itype_and_nqueues(struct adapter *, struct intrs_and_queues *);
520 static int prep_firmware(struct adapter *);
521 static int partition_resources(struct adapter *, const struct firmware *,
522     const char *);
523 static int get_params__pre_init(struct adapter *);
524 static int get_params__post_init(struct adapter *);
525 static int set_params__post_init(struct adapter *);
526 static void t4_set_desc(struct adapter *);
527 static void build_medialist(struct port_info *, struct ifmedia *);
528 static void init_l1cfg(struct port_info *);
529 static int cxgbe_init_synchronized(struct vi_info *);
530 static int cxgbe_uninit_synchronized(struct vi_info *);
531 static void quiesce_txq(struct adapter *, struct sge_txq *);
532 static void quiesce_wrq(struct adapter *, struct sge_wrq *);
533 static void quiesce_iq(struct adapter *, struct sge_iq *);
534 static void quiesce_fl(struct adapter *, struct sge_fl *);
535 static int t4_alloc_irq(struct adapter *, struct irq *, int rid,
536     driver_intr_t *, void *, char *);
537 static int t4_free_irq(struct adapter *, struct irq *);
538 static void get_regs(struct adapter *, struct t4_regdump *, uint8_t *);
539 static void vi_refresh_stats(struct adapter *, struct vi_info *);
540 static void cxgbe_refresh_stats(struct adapter *, struct port_info *);
541 static void cxgbe_tick(void *);
542 static void cxgbe_vlan_config(void *, struct ifnet *, uint16_t);
543 static void cxgbe_sysctls(struct port_info *);
544 static int sysctl_int_array(SYSCTL_HANDLER_ARGS);
545 static int sysctl_bitfield(SYSCTL_HANDLER_ARGS);
546 static int sysctl_btphy(SYSCTL_HANDLER_ARGS);
547 static int sysctl_noflowq(SYSCTL_HANDLER_ARGS);
548 static int sysctl_holdoff_tmr_idx(SYSCTL_HANDLER_ARGS);
549 static int sysctl_holdoff_pktc_idx(SYSCTL_HANDLER_ARGS);
550 static int sysctl_qsize_rxq(SYSCTL_HANDLER_ARGS);
551 static int sysctl_qsize_txq(SYSCTL_HANDLER_ARGS);
552 static int sysctl_pause_settings(SYSCTL_HANDLER_ARGS);
553 static int sysctl_fec(SYSCTL_HANDLER_ARGS);
554 static int sysctl_autoneg(SYSCTL_HANDLER_ARGS);
555 static int sysctl_handle_t4_reg64(SYSCTL_HANDLER_ARGS);
556 static int sysctl_temperature(SYSCTL_HANDLER_ARGS);
557 #ifdef SBUF_DRAIN
558 static int sysctl_cctrl(SYSCTL_HANDLER_ARGS);
559 static int sysctl_cim_ibq_obq(SYSCTL_HANDLER_ARGS);
560 static int sysctl_cim_la(SYSCTL_HANDLER_ARGS);
561 static int sysctl_cim_la_t6(SYSCTL_HANDLER_ARGS);
562 static int sysctl_cim_ma_la(SYSCTL_HANDLER_ARGS);
563 static int sysctl_cim_pif_la(SYSCTL_HANDLER_ARGS);
564 static int sysctl_cim_qcfg(SYSCTL_HANDLER_ARGS);
565 static int sysctl_cpl_stats(SYSCTL_HANDLER_ARGS);
566 static int sysctl_ddp_stats(SYSCTL_HANDLER_ARGS);
567 static int sysctl_devlog(SYSCTL_HANDLER_ARGS);
568 static int sysctl_fcoe_stats(SYSCTL_HANDLER_ARGS);
569 static int sysctl_hw_sched(SYSCTL_HANDLER_ARGS);
570 static int sysctl_lb_stats(SYSCTL_HANDLER_ARGS);
571 static int sysctl_linkdnrc(SYSCTL_HANDLER_ARGS);
572 static int sysctl_meminfo(SYSCTL_HANDLER_ARGS);
573 static int sysctl_mps_tcam(SYSCTL_HANDLER_ARGS);
574 static int sysctl_mps_tcam_t6(SYSCTL_HANDLER_ARGS);
575 static int sysctl_path_mtus(SYSCTL_HANDLER_ARGS);
576 static int sysctl_pm_stats(SYSCTL_HANDLER_ARGS);
577 static int sysctl_rdma_stats(SYSCTL_HANDLER_ARGS);
578 static int sysctl_tcp_stats(SYSCTL_HANDLER_ARGS);
579 static int sysctl_tids(SYSCTL_HANDLER_ARGS);
580 static int sysctl_tp_err_stats(SYSCTL_HANDLER_ARGS);
581 static int sysctl_tp_la_mask(SYSCTL_HANDLER_ARGS);
582 static int sysctl_tp_la(SYSCTL_HANDLER_ARGS);
583 static int sysctl_tx_rate(SYSCTL_HANDLER_ARGS);
584 static int sysctl_ulprx_la(SYSCTL_HANDLER_ARGS);
585 static int sysctl_wcwr_stats(SYSCTL_HANDLER_ARGS);
586 static int sysctl_tc_params(SYSCTL_HANDLER_ARGS);
587 #endif
588 #ifdef TCP_OFFLOAD
589 static int sysctl_tls_rx_ports(SYSCTL_HANDLER_ARGS);
590 static int sysctl_tp_tick(SYSCTL_HANDLER_ARGS);
591 static int sysctl_tp_dack_timer(SYSCTL_HANDLER_ARGS);
592 static int sysctl_tp_timer(SYSCTL_HANDLER_ARGS);
593 static int sysctl_tp_shift_cnt(SYSCTL_HANDLER_ARGS);
594 static int sysctl_tp_backoff(SYSCTL_HANDLER_ARGS);
595 static int sysctl_holdoff_tmr_idx_ofld(SYSCTL_HANDLER_ARGS);
596 static int sysctl_holdoff_pktc_idx_ofld(SYSCTL_HANDLER_ARGS);
597 #endif
598 static int get_sge_context(struct adapter *, struct t4_sge_context *);
599 static int load_fw(struct adapter *, struct t4_data *);
600 static int load_cfg(struct adapter *, struct t4_data *);
601 static int load_boot(struct adapter *, struct t4_bootrom *);
602 static int load_bootcfg(struct adapter *, struct t4_data *);
603 static int cudbg_dump(struct adapter *, struct t4_cudbg_dump *);
604 static void free_offload_policy(struct t4_offload_policy *);
605 static int set_offload_policy(struct adapter *, struct t4_offload_policy *);
606 static int read_card_mem(struct adapter *, int, struct t4_mem_range *);
607 static int read_i2c(struct adapter *, struct t4_i2c_data *);
608 #ifdef TCP_OFFLOAD
609 static int toe_capability(struct vi_info *, int);
610 #endif
611 static int mod_event(module_t, int, void *);
612 static int notify_siblings(device_t, int);
613
614 struct {
615         uint16_t device;
616         char *desc;
617 } t4_pciids[] = {
618         {0xa000, "Chelsio Terminator 4 FPGA"},
619         {0x4400, "Chelsio T440-dbg"},
620         {0x4401, "Chelsio T420-CR"},
621         {0x4402, "Chelsio T422-CR"},
622         {0x4403, "Chelsio T440-CR"},
623         {0x4404, "Chelsio T420-BCH"},
624         {0x4405, "Chelsio T440-BCH"},
625         {0x4406, "Chelsio T440-CH"},
626         {0x4407, "Chelsio T420-SO"},
627         {0x4408, "Chelsio T420-CX"},
628         {0x4409, "Chelsio T420-BT"},
629         {0x440a, "Chelsio T404-BT"},
630         {0x440e, "Chelsio T440-LP-CR"},
631 }, t5_pciids[] = {
632         {0xb000, "Chelsio Terminator 5 FPGA"},
633         {0x5400, "Chelsio T580-dbg"},
634         {0x5401,  "Chelsio T520-CR"},           /* 2 x 10G */
635         {0x5402,  "Chelsio T522-CR"},           /* 2 x 10G, 2 X 1G */
636         {0x5403,  "Chelsio T540-CR"},           /* 4 x 10G */
637         {0x5407,  "Chelsio T520-SO"},           /* 2 x 10G, nomem */
638         {0x5409,  "Chelsio T520-BT"},           /* 2 x 10GBaseT */
639         {0x540a,  "Chelsio T504-BT"},           /* 4 x 1G */
640         {0x540d,  "Chelsio T580-CR"},           /* 2 x 40G */
641         {0x540e,  "Chelsio T540-LP-CR"},        /* 4 x 10G */
642         {0x5410,  "Chelsio T580-LP-CR"},        /* 2 x 40G */
643         {0x5411,  "Chelsio T520-LL-CR"},        /* 2 x 10G */
644         {0x5412,  "Chelsio T560-CR"},           /* 1 x 40G, 2 x 10G */
645         {0x5414,  "Chelsio T580-LP-SO-CR"},     /* 2 x 40G, nomem */
646         {0x5415,  "Chelsio T502-BT"},           /* 2 x 1G */
647 #ifdef notyet
648         {0x5404,  "Chelsio T520-BCH"},
649         {0x5405,  "Chelsio T540-BCH"},
650         {0x5406,  "Chelsio T540-CH"},
651         {0x5408,  "Chelsio T520-CX"},
652         {0x540b,  "Chelsio B520-SR"},
653         {0x540c,  "Chelsio B504-BT"},
654         {0x540f,  "Chelsio Amsterdam"},
655         {0x5413,  "Chelsio T580-CHR"},
656 #endif
657 }, t6_pciids[] = {
658         {0xc006, "Chelsio Terminator 6 FPGA"},  /* T6 PE10K6 FPGA (PF0) */
659         {0x6400, "Chelsio T6-DBG-25"},          /* 2 x 10/25G, debug */
660         {0x6401, "Chelsio T6225-CR"},           /* 2 x 10/25G */
661         {0x6402, "Chelsio T6225-SO-CR"},        /* 2 x 10/25G, nomem */
662         {0x6403, "Chelsio T6425-CR"},           /* 4 x 10/25G */
663         {0x6404, "Chelsio T6425-SO-CR"},        /* 4 x 10/25G, nomem */
664         {0x6405, "Chelsio T6225-OCP-SO"},       /* 2 x 10/25G, nomem */
665         {0x6406, "Chelsio T62100-OCP-SO"},      /* 2 x 40/50/100G, nomem */
666         {0x6407, "Chelsio T62100-LP-CR"},       /* 2 x 40/50/100G */
667         {0x6408, "Chelsio T62100-SO-CR"},       /* 2 x 40/50/100G, nomem */
668         {0x6409, "Chelsio T6210-BT"},           /* 2 x 10GBASE-T */
669         {0x640d, "Chelsio T62100-CR"},          /* 2 x 40/50/100G */
670         {0x6410, "Chelsio T6-DBG-100"},         /* 2 x 40/50/100G, debug */
671         {0x6411, "Chelsio T6225-LL-CR"},        /* 2 x 10/25G */
672         {0x6414, "Chelsio T61100-OCP-SO"},      /* 1 x 40/50/100G, nomem */
673         {0x6415, "Chelsio T6201-BT"},           /* 2 x 1000BASE-T */
674
675         /* Custom */
676         {0x6480, "Chelsio T6225 80"},
677         {0x6481, "Chelsio T62100 81"},
678         {0x6484, "Chelsio T62100 84"},
679 };
680
681 #ifdef TCP_OFFLOAD
682 /*
683  * service_iq() has an iq and needs the fl.  Offset of fl from the iq should be
684  * exactly the same for both rxq and ofld_rxq.
685  */
686 CTASSERT(offsetof(struct sge_ofld_rxq, iq) == offsetof(struct sge_rxq, iq));
687 CTASSERT(offsetof(struct sge_ofld_rxq, fl) == offsetof(struct sge_rxq, fl));
688 #endif
689 CTASSERT(sizeof(struct cluster_metadata) <= CL_METADATA_SIZE);
690
691 static int
692 t4_probe(device_t dev)
693 {
694         int i;
695         uint16_t v = pci_get_vendor(dev);
696         uint16_t d = pci_get_device(dev);
697         uint8_t f = pci_get_function(dev);
698
699         if (v != PCI_VENDOR_ID_CHELSIO)
700                 return (ENXIO);
701
702         /* Attach only to PF0 of the FPGA */
703         if (d == 0xa000 && f != 0)
704                 return (ENXIO);
705
706         for (i = 0; i < nitems(t4_pciids); i++) {
707                 if (d == t4_pciids[i].device) {
708                         device_set_desc(dev, t4_pciids[i].desc);
709                         return (BUS_PROBE_DEFAULT);
710                 }
711         }
712
713         return (ENXIO);
714 }
715
716 static int
717 t5_probe(device_t dev)
718 {
719         int i;
720         uint16_t v = pci_get_vendor(dev);
721         uint16_t d = pci_get_device(dev);
722         uint8_t f = pci_get_function(dev);
723
724         if (v != PCI_VENDOR_ID_CHELSIO)
725                 return (ENXIO);
726
727         /* Attach only to PF0 of the FPGA */
728         if (d == 0xb000 && f != 0)
729                 return (ENXIO);
730
731         for (i = 0; i < nitems(t5_pciids); i++) {
732                 if (d == t5_pciids[i].device) {
733                         device_set_desc(dev, t5_pciids[i].desc);
734                         return (BUS_PROBE_DEFAULT);
735                 }
736         }
737
738         return (ENXIO);
739 }
740
741 static int
742 t6_probe(device_t dev)
743 {
744         int i;
745         uint16_t v = pci_get_vendor(dev);
746         uint16_t d = pci_get_device(dev);
747
748         if (v != PCI_VENDOR_ID_CHELSIO)
749                 return (ENXIO);
750
751         for (i = 0; i < nitems(t6_pciids); i++) {
752                 if (d == t6_pciids[i].device) {
753                         device_set_desc(dev, t6_pciids[i].desc);
754                         return (BUS_PROBE_DEFAULT);
755                 }
756         }
757
758         return (ENXIO);
759 }
760
761 static void
762 t5_attribute_workaround(device_t dev)
763 {
764         device_t root_port;
765         uint32_t v;
766
767         /*
768          * The T5 chips do not properly echo the No Snoop and Relaxed
769          * Ordering attributes when replying to a TLP from a Root
770          * Port.  As a workaround, find the parent Root Port and
771          * disable No Snoop and Relaxed Ordering.  Note that this
772          * affects all devices under this root port.
773          */
774         root_port = pci_find_pcie_root_port(dev);
775         if (root_port == NULL) {
776                 device_printf(dev, "Unable to find parent root port\n");
777                 return;
778         }
779
780         v = pcie_adjust_config(root_port, PCIER_DEVICE_CTL,
781             PCIEM_CTL_RELAXED_ORD_ENABLE | PCIEM_CTL_NOSNOOP_ENABLE, 0, 2);
782         if ((v & (PCIEM_CTL_RELAXED_ORD_ENABLE | PCIEM_CTL_NOSNOOP_ENABLE)) !=
783             0)
784                 device_printf(dev, "Disabled No Snoop/Relaxed Ordering on %s\n",
785                     device_get_nameunit(root_port));
786 }
787
788 static const struct devnames devnames[] = {
789         {
790                 .nexus_name = "t4nex",
791                 .ifnet_name = "cxgbe",
792                 .vi_ifnet_name = "vcxgbe",
793                 .pf03_drv_name = "t4iov",
794                 .vf_nexus_name = "t4vf",
795                 .vf_ifnet_name = "cxgbev"
796         }, {
797                 .nexus_name = "t5nex",
798                 .ifnet_name = "cxl",
799                 .vi_ifnet_name = "vcxl",
800                 .pf03_drv_name = "t5iov",
801                 .vf_nexus_name = "t5vf",
802                 .vf_ifnet_name = "cxlv"
803         }, {
804                 .nexus_name = "t6nex",
805                 .ifnet_name = "cc",
806                 .vi_ifnet_name = "vcc",
807                 .pf03_drv_name = "t6iov",
808                 .vf_nexus_name = "t6vf",
809                 .vf_ifnet_name = "ccv"
810         }
811 };
812
813 void
814 t4_init_devnames(struct adapter *sc)
815 {
816         int id;
817
818         id = chip_id(sc);
819         if (id >= CHELSIO_T4 && id - CHELSIO_T4 < nitems(devnames))
820                 sc->names = &devnames[id - CHELSIO_T4];
821         else {
822                 device_printf(sc->dev, "chip id %d is not supported.\n", id);
823                 sc->names = NULL;
824         }
825 }
826
827 static int
828 t4_attach(device_t dev)
829 {
830         struct adapter *sc;
831         int rc = 0, i, j, rqidx, tqidx, nports;
832         struct make_dev_args mda;
833         struct intrs_and_queues iaq;
834         struct sge *s;
835         uint32_t *buf;
836 #ifdef TCP_OFFLOAD
837         int ofld_rqidx, ofld_tqidx;
838 #endif
839 #ifdef DEV_NETMAP
840         int nm_rqidx, nm_tqidx;
841 #endif
842         int num_vis;
843
844         sc = device_get_softc(dev);
845         sc->dev = dev;
846         TUNABLE_INT_FETCH("hw.cxgbe.dflags", &sc->debug_flags);
847
848         if ((pci_get_device(dev) & 0xff00) == 0x5400)
849                 t5_attribute_workaround(dev);
850         pci_enable_busmaster(dev);
851         if (pci_find_cap(dev, PCIY_EXPRESS, &i) == 0) {
852                 uint32_t v;
853
854                 pci_set_max_read_req(dev, 4096);
855                 v = pci_read_config(dev, i + PCIER_DEVICE_CTL, 2);
856                 sc->params.pci.mps = 128 << ((v & PCIEM_CTL_MAX_PAYLOAD) >> 5);
857                 if (pcie_relaxed_ordering == 0 &&
858                     (v | PCIEM_CTL_RELAXED_ORD_ENABLE) != 0) {
859                         v &= ~PCIEM_CTL_RELAXED_ORD_ENABLE;
860                         pci_write_config(dev, i + PCIER_DEVICE_CTL, v, 2);
861                 } else if (pcie_relaxed_ordering == 1 &&
862                     (v & PCIEM_CTL_RELAXED_ORD_ENABLE) == 0) {
863                         v |= PCIEM_CTL_RELAXED_ORD_ENABLE;
864                         pci_write_config(dev, i + PCIER_DEVICE_CTL, v, 2);
865                 }
866         }
867
868         sc->sge_gts_reg = MYPF_REG(A_SGE_PF_GTS);
869         sc->sge_kdoorbell_reg = MYPF_REG(A_SGE_PF_KDOORBELL);
870         sc->traceq = -1;
871         mtx_init(&sc->ifp_lock, sc->ifp_lockname, 0, MTX_DEF);
872         snprintf(sc->ifp_lockname, sizeof(sc->ifp_lockname), "%s tracer",
873             device_get_nameunit(dev));
874
875         snprintf(sc->lockname, sizeof(sc->lockname), "%s",
876             device_get_nameunit(dev));
877         mtx_init(&sc->sc_lock, sc->lockname, 0, MTX_DEF);
878         t4_add_adapter(sc);
879
880         mtx_init(&sc->sfl_lock, "starving freelists", 0, MTX_DEF);
881         TAILQ_INIT(&sc->sfl);
882         callout_init_mtx(&sc->sfl_callout, &sc->sfl_lock, 0);
883
884         mtx_init(&sc->reg_lock, "indirect register access", 0, MTX_DEF);
885
886         sc->policy = NULL;
887         rw_init(&sc->policy_lock, "connection offload policy");
888
889         rc = t4_map_bars_0_and_4(sc);
890         if (rc != 0)
891                 goto done; /* error message displayed already */
892
893         memset(sc->chan_map, 0xff, sizeof(sc->chan_map));
894
895         /* Prepare the adapter for operation. */
896         buf = malloc(PAGE_SIZE, M_CXGBE, M_ZERO | M_WAITOK);
897         rc = -t4_prep_adapter(sc, buf);
898         free(buf, M_CXGBE);
899         if (rc != 0) {
900                 device_printf(dev, "failed to prepare adapter: %d.\n", rc);
901                 goto done;
902         }
903
904         /*
905          * This is the real PF# to which we're attaching.  Works from within PCI
906          * passthrough environments too, where pci_get_function() could return a
907          * different PF# depending on the passthrough configuration.  We need to
908          * use the real PF# in all our communication with the firmware.
909          */
910         j = t4_read_reg(sc, A_PL_WHOAMI);
911         sc->pf = chip_id(sc) <= CHELSIO_T5 ? G_SOURCEPF(j) : G_T6_SOURCEPF(j);
912         sc->mbox = sc->pf;
913
914         t4_init_devnames(sc);
915         if (sc->names == NULL) {
916                 rc = ENOTSUP;
917                 goto done; /* error message displayed already */
918         }
919
920         /*
921          * Do this really early, with the memory windows set up even before the
922          * character device.  The userland tool's register i/o and mem read
923          * will work even in "recovery mode".
924          */
925         setup_memwin(sc);
926         if (t4_init_devlog_params(sc, 0) == 0)
927                 fixup_devlog_params(sc);
928         make_dev_args_init(&mda);
929         mda.mda_devsw = &t4_cdevsw;
930         mda.mda_uid = UID_ROOT;
931         mda.mda_gid = GID_WHEEL;
932         mda.mda_mode = 0600;
933         mda.mda_si_drv1 = sc;
934         rc = make_dev_s(&mda, &sc->cdev, "%s", device_get_nameunit(dev));
935         if (rc != 0)
936                 device_printf(dev, "failed to create nexus char device: %d.\n",
937                     rc);
938
939         /* Go no further if recovery mode has been requested. */
940         if (TUNABLE_INT_FETCH("hw.cxgbe.sos", &i) && i != 0) {
941                 device_printf(dev, "recovery mode.\n");
942                 goto done;
943         }
944
945 #if defined(__i386__)
946         if ((cpu_feature & CPUID_CX8) == 0) {
947                 device_printf(dev, "64 bit atomics not available.\n");
948                 rc = ENOTSUP;
949                 goto done;
950         }
951 #endif
952
953         /* Prepare the firmware for operation */
954         rc = prep_firmware(sc);
955         if (rc != 0)
956                 goto done; /* error message displayed already */
957
958         rc = get_params__post_init(sc);
959         if (rc != 0)
960                 goto done; /* error message displayed already */
961
962         rc = set_params__post_init(sc);
963         if (rc != 0)
964                 goto done; /* error message displayed already */
965
966         rc = t4_map_bar_2(sc);
967         if (rc != 0)
968                 goto done; /* error message displayed already */
969
970         rc = t4_create_dma_tag(sc);
971         if (rc != 0)
972                 goto done; /* error message displayed already */
973
974         /*
975          * First pass over all the ports - allocate VIs and initialize some
976          * basic parameters like mac address, port type, etc.
977          */
978         for_each_port(sc, i) {
979                 struct port_info *pi;
980
981                 pi = malloc(sizeof(*pi), M_CXGBE, M_ZERO | M_WAITOK);
982                 sc->port[i] = pi;
983
984                 /* These must be set before t4_port_init */
985                 pi->adapter = sc;
986                 pi->port_id = i;
987                 /*
988                  * XXX: vi[0] is special so we can't delay this allocation until
989                  * pi->nvi's final value is known.
990                  */
991                 pi->vi = malloc(sizeof(struct vi_info) * t4_num_vis, M_CXGBE,
992                     M_ZERO | M_WAITOK);
993
994                 /*
995                  * Allocate the "main" VI and initialize parameters
996                  * like mac addr.
997                  */
998                 rc = -t4_port_init(sc, sc->mbox, sc->pf, 0, i);
999                 if (rc != 0) {
1000                         device_printf(dev, "unable to initialize port %d: %d\n",
1001                             i, rc);
1002                         free(pi->vi, M_CXGBE);
1003                         free(pi, M_CXGBE);
1004                         sc->port[i] = NULL;
1005                         goto done;
1006                 }
1007
1008                 snprintf(pi->lockname, sizeof(pi->lockname), "%sp%d",
1009                     device_get_nameunit(dev), i);
1010                 mtx_init(&pi->pi_lock, pi->lockname, 0, MTX_DEF);
1011                 sc->chan_map[pi->tx_chan] = i;
1012
1013                 /* All VIs on this port share this media. */
1014                 ifmedia_init(&pi->media, IFM_IMASK, cxgbe_media_change,
1015                     cxgbe_media_status);
1016
1017                 pi->dev = device_add_child(dev, sc->names->ifnet_name, -1);
1018                 if (pi->dev == NULL) {
1019                         device_printf(dev,
1020                             "failed to add device for port %d.\n", i);
1021                         rc = ENXIO;
1022                         goto done;
1023                 }
1024                 pi->vi[0].dev = pi->dev;
1025                 device_set_softc(pi->dev, pi);
1026         }
1027
1028         /*
1029          * Interrupt type, # of interrupts, # of rx/tx queues, etc.
1030          */
1031         nports = sc->params.nports;
1032         rc = cfg_itype_and_nqueues(sc, &iaq);
1033         if (rc != 0)
1034                 goto done; /* error message displayed already */
1035
1036         num_vis = iaq.num_vis;
1037         sc->intr_type = iaq.intr_type;
1038         sc->intr_count = iaq.nirq;
1039
1040         s = &sc->sge;
1041         s->nrxq = nports * iaq.nrxq;
1042         s->ntxq = nports * iaq.ntxq;
1043         if (num_vis > 1) {
1044                 s->nrxq += nports * (num_vis - 1) * iaq.nrxq_vi;
1045                 s->ntxq += nports * (num_vis - 1) * iaq.ntxq_vi;
1046         }
1047         s->neq = s->ntxq + s->nrxq;     /* the free list in an rxq is an eq */
1048         s->neq += nports + 1;/* ctrl queues: 1 per port + 1 mgmt */
1049         s->niq = s->nrxq + 1;           /* 1 extra for firmware event queue */
1050 #ifdef TCP_OFFLOAD
1051         if (is_offload(sc)) {
1052                 s->nofldrxq = nports * iaq.nofldrxq;
1053                 s->nofldtxq = nports * iaq.nofldtxq;
1054                 if (num_vis > 1) {
1055                         s->nofldrxq += nports * (num_vis - 1) * iaq.nofldrxq_vi;
1056                         s->nofldtxq += nports * (num_vis - 1) * iaq.nofldtxq_vi;
1057                 }
1058                 s->neq += s->nofldtxq + s->nofldrxq;
1059                 s->niq += s->nofldrxq;
1060
1061                 s->ofld_rxq = malloc(s->nofldrxq * sizeof(struct sge_ofld_rxq),
1062                     M_CXGBE, M_ZERO | M_WAITOK);
1063                 s->ofld_txq = malloc(s->nofldtxq * sizeof(struct sge_wrq),
1064                     M_CXGBE, M_ZERO | M_WAITOK);
1065         }
1066 #endif
1067 #ifdef DEV_NETMAP
1068         if (num_vis > 1) {
1069                 s->nnmrxq = nports * (num_vis - 1) * iaq.nnmrxq_vi;
1070                 s->nnmtxq = nports * (num_vis - 1) * iaq.nnmtxq_vi;
1071         }
1072         s->neq += s->nnmtxq + s->nnmrxq;
1073         s->niq += s->nnmrxq;
1074
1075         s->nm_rxq = malloc(s->nnmrxq * sizeof(struct sge_nm_rxq),
1076             M_CXGBE, M_ZERO | M_WAITOK);
1077         s->nm_txq = malloc(s->nnmtxq * sizeof(struct sge_nm_txq),
1078             M_CXGBE, M_ZERO | M_WAITOK);
1079 #endif
1080
1081         s->ctrlq = malloc(nports * sizeof(struct sge_wrq), M_CXGBE,
1082             M_ZERO | M_WAITOK);
1083         s->rxq = malloc(s->nrxq * sizeof(struct sge_rxq), M_CXGBE,
1084             M_ZERO | M_WAITOK);
1085         s->txq = malloc(s->ntxq * sizeof(struct sge_txq), M_CXGBE,
1086             M_ZERO | M_WAITOK);
1087         s->iqmap = malloc(s->niq * sizeof(struct sge_iq *), M_CXGBE,
1088             M_ZERO | M_WAITOK);
1089         s->eqmap = malloc(s->neq * sizeof(struct sge_eq *), M_CXGBE,
1090             M_ZERO | M_WAITOK);
1091
1092         sc->irq = malloc(sc->intr_count * sizeof(struct irq), M_CXGBE,
1093             M_ZERO | M_WAITOK);
1094
1095         t4_init_l2t(sc, M_WAITOK);
1096         t4_init_tx_sched(sc);
1097
1098         /*
1099          * Second pass over the ports.  This time we know the number of rx and
1100          * tx queues that each port should get.
1101          */
1102         rqidx = tqidx = 0;
1103 #ifdef TCP_OFFLOAD
1104         ofld_rqidx = ofld_tqidx = 0;
1105 #endif
1106 #ifdef DEV_NETMAP
1107         nm_rqidx = nm_tqidx = 0;
1108 #endif
1109         for_each_port(sc, i) {
1110                 struct port_info *pi = sc->port[i];
1111                 struct vi_info *vi;
1112
1113                 if (pi == NULL)
1114                         continue;
1115
1116                 pi->nvi = num_vis;
1117                 for_each_vi(pi, j, vi) {
1118                         vi->pi = pi;
1119                         vi->qsize_rxq = t4_qsize_rxq;
1120                         vi->qsize_txq = t4_qsize_txq;
1121
1122                         vi->first_rxq = rqidx;
1123                         vi->first_txq = tqidx;
1124                         vi->tmr_idx = t4_tmr_idx;
1125                         vi->pktc_idx = t4_pktc_idx;
1126                         vi->nrxq = j == 0 ? iaq.nrxq : iaq.nrxq_vi;
1127                         vi->ntxq = j == 0 ? iaq.ntxq : iaq.ntxq_vi;
1128
1129                         rqidx += vi->nrxq;
1130                         tqidx += vi->ntxq;
1131
1132                         if (j == 0 && vi->ntxq > 1)
1133                                 vi->rsrv_noflowq = t4_rsrv_noflowq ? 1 : 0;
1134                         else
1135                                 vi->rsrv_noflowq = 0;
1136
1137 #ifdef TCP_OFFLOAD
1138                         vi->ofld_tmr_idx = t4_tmr_idx_ofld;
1139                         vi->ofld_pktc_idx = t4_pktc_idx_ofld;
1140                         vi->first_ofld_rxq = ofld_rqidx;
1141                         vi->first_ofld_txq = ofld_tqidx;
1142                         vi->nofldrxq = j == 0 ? iaq.nofldrxq : iaq.nofldrxq_vi;
1143                         vi->nofldtxq = j == 0 ? iaq.nofldtxq : iaq.nofldtxq_vi;
1144
1145                         ofld_rqidx += vi->nofldrxq;
1146                         ofld_tqidx += vi->nofldtxq;
1147 #endif
1148 #ifdef DEV_NETMAP
1149                         if (j > 0) {
1150                                 vi->first_nm_rxq = nm_rqidx;
1151                                 vi->first_nm_txq = nm_tqidx;
1152                                 vi->nnmrxq = iaq.nnmrxq_vi;
1153                                 vi->nnmtxq = iaq.nnmtxq_vi;
1154                                 nm_rqidx += vi->nnmrxq;
1155                                 nm_tqidx += vi->nnmtxq;
1156                         }
1157 #endif
1158                 }
1159         }
1160
1161         rc = t4_setup_intr_handlers(sc);
1162         if (rc != 0) {
1163                 device_printf(dev,
1164                     "failed to setup interrupt handlers: %d\n", rc);
1165                 goto done;
1166         }
1167
1168         rc = bus_generic_probe(dev);
1169         if (rc != 0) {
1170                 device_printf(dev, "failed to probe child drivers: %d\n", rc);
1171                 goto done;
1172         }
1173
1174         /*
1175          * Ensure thread-safe mailbox access (in debug builds).
1176          *
1177          * So far this was the only thread accessing the mailbox but various
1178          * ifnets and sysctls are about to be created and their handlers/ioctls
1179          * will access the mailbox from different threads.
1180          */
1181         sc->flags |= CHK_MBOX_ACCESS;
1182
1183         rc = bus_generic_attach(dev);
1184         if (rc != 0) {
1185                 device_printf(dev,
1186                     "failed to attach all child ports: %d\n", rc);
1187                 goto done;
1188         }
1189
1190         device_printf(dev,
1191             "PCIe gen%d x%d, %d ports, %d %s interrupt%s, %d eq, %d iq\n",
1192             sc->params.pci.speed, sc->params.pci.width, sc->params.nports,
1193             sc->intr_count, sc->intr_type == INTR_MSIX ? "MSI-X" :
1194             (sc->intr_type == INTR_MSI ? "MSI" : "INTx"),
1195             sc->intr_count > 1 ? "s" : "", sc->sge.neq, sc->sge.niq);
1196
1197         t4_set_desc(sc);
1198
1199         notify_siblings(dev, 0);
1200
1201 done:
1202         if (rc != 0 && sc->cdev) {
1203                 /* cdev was created and so cxgbetool works; recover that way. */
1204                 device_printf(dev,
1205                     "error during attach, adapter is now in recovery mode.\n");
1206                 rc = 0;
1207         }
1208
1209         if (rc != 0)
1210                 t4_detach_common(dev);
1211         else
1212                 t4_sysctls(sc);
1213
1214         return (rc);
1215 }
1216
1217 static int
1218 t4_ready(device_t dev)
1219 {
1220         struct adapter *sc;
1221
1222         sc = device_get_softc(dev);
1223         if (sc->flags & FW_OK)
1224                 return (0);
1225         return (ENXIO);
1226 }
1227
1228 static int
1229 t4_read_port_device(device_t dev, int port, device_t *child)
1230 {
1231         struct adapter *sc;
1232         struct port_info *pi;
1233
1234         sc = device_get_softc(dev);
1235         if (port < 0 || port >= MAX_NPORTS)
1236                 return (EINVAL);
1237         pi = sc->port[port];
1238         if (pi == NULL || pi->dev == NULL)
1239                 return (ENXIO);
1240         *child = pi->dev;
1241         return (0);
1242 }
1243
1244 static int
1245 notify_siblings(device_t dev, int detaching)
1246 {
1247         device_t sibling;
1248         int error, i;
1249
1250         error = 0;
1251         for (i = 0; i < PCI_FUNCMAX; i++) {
1252                 if (i == pci_get_function(dev))
1253                         continue;
1254                 sibling = pci_find_dbsf(pci_get_domain(dev), pci_get_bus(dev),
1255                     pci_get_slot(dev), i);
1256                 if (sibling == NULL || !device_is_attached(sibling))
1257                         continue;
1258                 if (detaching)
1259                         error = T4_DETACH_CHILD(sibling);
1260                 else
1261                         (void)T4_ATTACH_CHILD(sibling);
1262                 if (error)
1263                         break;
1264         }
1265         return (error);
1266 }
1267
1268 /*
1269  * Idempotent
1270  */
1271 static int
1272 t4_detach(device_t dev)
1273 {
1274         struct adapter *sc;
1275         int rc;
1276
1277         sc = device_get_softc(dev);
1278
1279         rc = notify_siblings(dev, 1);
1280         if (rc) {
1281                 device_printf(dev,
1282                     "failed to detach sibling devices: %d\n", rc);
1283                 return (rc);
1284         }
1285
1286         return (t4_detach_common(dev));
1287 }
1288
1289 int
1290 t4_detach_common(device_t dev)
1291 {
1292         struct adapter *sc;
1293         struct port_info *pi;
1294         int i, rc;
1295
1296         sc = device_get_softc(dev);
1297
1298         if (sc->cdev) {
1299                 destroy_dev(sc->cdev);
1300                 sc->cdev = NULL;
1301         }
1302
1303         sc->flags &= ~CHK_MBOX_ACCESS;
1304         if (sc->flags & FULL_INIT_DONE) {
1305                 if (!(sc->flags & IS_VF))
1306                         t4_intr_disable(sc);
1307         }
1308
1309         if (device_is_attached(dev)) {
1310                 rc = bus_generic_detach(dev);
1311                 if (rc) {
1312                         device_printf(dev,
1313                             "failed to detach child devices: %d\n", rc);
1314                         return (rc);
1315                 }
1316         }
1317
1318         for (i = 0; i < sc->intr_count; i++)
1319                 t4_free_irq(sc, &sc->irq[i]);
1320
1321         if ((sc->flags & (IS_VF | FW_OK)) == FW_OK)
1322                 t4_free_tx_sched(sc);
1323
1324         for (i = 0; i < MAX_NPORTS; i++) {
1325                 pi = sc->port[i];
1326                 if (pi) {
1327                         t4_free_vi(sc, sc->mbox, sc->pf, 0, pi->vi[0].viid);
1328                         if (pi->dev)
1329                                 device_delete_child(dev, pi->dev);
1330
1331                         mtx_destroy(&pi->pi_lock);
1332                         free(pi->vi, M_CXGBE);
1333                         free(pi, M_CXGBE);
1334                 }
1335         }
1336
1337         device_delete_children(dev);
1338
1339         if (sc->flags & FULL_INIT_DONE)
1340                 adapter_full_uninit(sc);
1341
1342         if ((sc->flags & (IS_VF | FW_OK)) == FW_OK)
1343                 t4_fw_bye(sc, sc->mbox);
1344
1345         if (sc->intr_type == INTR_MSI || sc->intr_type == INTR_MSIX)
1346                 pci_release_msi(dev);
1347
1348         if (sc->regs_res)
1349                 bus_release_resource(dev, SYS_RES_MEMORY, sc->regs_rid,
1350                     sc->regs_res);
1351
1352         if (sc->udbs_res)
1353                 bus_release_resource(dev, SYS_RES_MEMORY, sc->udbs_rid,
1354                     sc->udbs_res);
1355
1356         if (sc->msix_res)
1357                 bus_release_resource(dev, SYS_RES_MEMORY, sc->msix_rid,
1358                     sc->msix_res);
1359
1360         if (sc->l2t)
1361                 t4_free_l2t(sc->l2t);
1362
1363 #ifdef TCP_OFFLOAD
1364         free(sc->sge.ofld_rxq, M_CXGBE);
1365         free(sc->sge.ofld_txq, M_CXGBE);
1366 #endif
1367 #ifdef DEV_NETMAP
1368         free(sc->sge.nm_rxq, M_CXGBE);
1369         free(sc->sge.nm_txq, M_CXGBE);
1370 #endif
1371         free(sc->irq, M_CXGBE);
1372         free(sc->sge.rxq, M_CXGBE);
1373         free(sc->sge.txq, M_CXGBE);
1374         free(sc->sge.ctrlq, M_CXGBE);
1375         free(sc->sge.iqmap, M_CXGBE);
1376         free(sc->sge.eqmap, M_CXGBE);
1377         free(sc->tids.ftid_tab, M_CXGBE);
1378         free(sc->tt.tls_rx_ports, M_CXGBE);
1379         t4_destroy_dma_tag(sc);
1380         if (mtx_initialized(&sc->sc_lock)) {
1381                 sx_xlock(&t4_list_lock);
1382                 SLIST_REMOVE(&t4_list, sc, adapter, link);
1383                 sx_xunlock(&t4_list_lock);
1384                 mtx_destroy(&sc->sc_lock);
1385         }
1386
1387         callout_drain(&sc->sfl_callout);
1388         if (mtx_initialized(&sc->tids.ftid_lock))
1389                 mtx_destroy(&sc->tids.ftid_lock);
1390         if (mtx_initialized(&sc->sfl_lock))
1391                 mtx_destroy(&sc->sfl_lock);
1392         if (mtx_initialized(&sc->ifp_lock))
1393                 mtx_destroy(&sc->ifp_lock);
1394         if (mtx_initialized(&sc->reg_lock))
1395                 mtx_destroy(&sc->reg_lock);
1396
1397         if (rw_initialized(&sc->policy_lock)) {
1398                 rw_destroy(&sc->policy_lock);
1399 #ifdef TCP_OFFLOAD
1400                 if (sc->policy != NULL)
1401                         free_offload_policy(sc->policy);
1402 #endif
1403         }
1404
1405         for (i = 0; i < NUM_MEMWIN; i++) {
1406                 struct memwin *mw = &sc->memwin[i];
1407
1408                 if (rw_initialized(&mw->mw_lock))
1409                         rw_destroy(&mw->mw_lock);
1410         }
1411
1412         bzero(sc, sizeof(*sc));
1413
1414         return (0);
1415 }
1416
1417 static int
1418 cxgbe_probe(device_t dev)
1419 {
1420         char buf[128];
1421         struct port_info *pi = device_get_softc(dev);
1422
1423         snprintf(buf, sizeof(buf), "port %d", pi->port_id);
1424         device_set_desc_copy(dev, buf);
1425
1426         return (BUS_PROBE_DEFAULT);
1427 }
1428
1429 #define T4_CAP (IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_MTU | IFCAP_HWCSUM | \
1430     IFCAP_VLAN_HWCSUM | IFCAP_TSO | IFCAP_JUMBO_MTU | IFCAP_LRO | \
1431     IFCAP_VLAN_HWTSO | IFCAP_LINKSTATE | IFCAP_HWCSUM_IPV6 | IFCAP_HWSTATS)
1432 #define T4_CAP_ENABLE (T4_CAP)
1433
1434 static int
1435 cxgbe_vi_attach(device_t dev, struct vi_info *vi)
1436 {
1437         struct ifnet *ifp;
1438         struct sbuf *sb;
1439
1440         vi->xact_addr_filt = -1;
1441         callout_init(&vi->tick, 1);
1442
1443         /* Allocate an ifnet and set it up */
1444         ifp = if_alloc(IFT_ETHER);
1445         if (ifp == NULL) {
1446                 device_printf(dev, "Cannot allocate ifnet\n");
1447                 return (ENOMEM);
1448         }
1449         vi->ifp = ifp;
1450         ifp->if_softc = vi;
1451
1452         if_initname(ifp, device_get_name(dev), device_get_unit(dev));
1453         ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
1454
1455         ifp->if_init = cxgbe_init;
1456         ifp->if_ioctl = cxgbe_ioctl;
1457         ifp->if_transmit = cxgbe_transmit;
1458         ifp->if_qflush = cxgbe_qflush;
1459         ifp->if_get_counter = cxgbe_get_counter;
1460
1461         ifp->if_capabilities = T4_CAP;
1462 #ifdef TCP_OFFLOAD
1463         if (vi->nofldrxq != 0)
1464                 ifp->if_capabilities |= IFCAP_TOE;
1465 #endif
1466 #ifdef DEV_NETMAP
1467         if (vi->nnmrxq != 0)
1468                 ifp->if_capabilities |= IFCAP_NETMAP;
1469 #endif
1470         ifp->if_capenable = T4_CAP_ENABLE;
1471         ifp->if_hwassist = CSUM_TCP | CSUM_UDP | CSUM_IP | CSUM_TSO |
1472             CSUM_UDP_IPV6 | CSUM_TCP_IPV6;
1473
1474         ifp->if_hw_tsomax = 65536 - (ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN);
1475         ifp->if_hw_tsomaxsegcount = TX_SGL_SEGS;
1476         ifp->if_hw_tsomaxsegsize = 65536;
1477
1478         vi->vlan_c = EVENTHANDLER_REGISTER(vlan_config, cxgbe_vlan_config, ifp,
1479             EVENTHANDLER_PRI_ANY);
1480
1481         ether_ifattach(ifp, vi->hw_addr);
1482 #ifdef DEV_NETMAP
1483         if (ifp->if_capabilities & IFCAP_NETMAP)
1484                 cxgbe_nm_attach(vi);
1485 #endif
1486         sb = sbuf_new_auto();
1487         sbuf_printf(sb, "%d txq, %d rxq (NIC)", vi->ntxq, vi->nrxq);
1488 #ifdef TCP_OFFLOAD
1489         if (ifp->if_capabilities & IFCAP_TOE)
1490                 sbuf_printf(sb, "; %d txq, %d rxq (TOE)",
1491                     vi->nofldtxq, vi->nofldrxq);
1492 #endif
1493 #ifdef DEV_NETMAP
1494         if (ifp->if_capabilities & IFCAP_NETMAP)
1495                 sbuf_printf(sb, "; %d txq, %d rxq (netmap)",
1496                     vi->nnmtxq, vi->nnmrxq);
1497 #endif
1498         sbuf_finish(sb);
1499         device_printf(dev, "%s\n", sbuf_data(sb));
1500         sbuf_delete(sb);
1501
1502         vi_sysctls(vi);
1503
1504         return (0);
1505 }
1506
1507 static int
1508 cxgbe_attach(device_t dev)
1509 {
1510         struct port_info *pi = device_get_softc(dev);
1511         struct adapter *sc = pi->adapter;
1512         struct vi_info *vi;
1513         int i, rc;
1514
1515         callout_init_mtx(&pi->tick, &pi->pi_lock, 0);
1516
1517         rc = cxgbe_vi_attach(dev, &pi->vi[0]);
1518         if (rc)
1519                 return (rc);
1520
1521         for_each_vi(pi, i, vi) {
1522                 if (i == 0)
1523                         continue;
1524                 vi->dev = device_add_child(dev, sc->names->vi_ifnet_name, -1);
1525                 if (vi->dev == NULL) {
1526                         device_printf(dev, "failed to add VI %d\n", i);
1527                         continue;
1528                 }
1529                 device_set_softc(vi->dev, vi);
1530         }
1531
1532         cxgbe_sysctls(pi);
1533
1534         bus_generic_attach(dev);
1535
1536         return (0);
1537 }
1538
1539 static void
1540 cxgbe_vi_detach(struct vi_info *vi)
1541 {
1542         struct ifnet *ifp = vi->ifp;
1543
1544         ether_ifdetach(ifp);
1545
1546         if (vi->vlan_c)
1547                 EVENTHANDLER_DEREGISTER(vlan_config, vi->vlan_c);
1548
1549         /* Let detach proceed even if these fail. */
1550 #ifdef DEV_NETMAP
1551         if (ifp->if_capabilities & IFCAP_NETMAP)
1552                 cxgbe_nm_detach(vi);
1553 #endif
1554         cxgbe_uninit_synchronized(vi);
1555         callout_drain(&vi->tick);
1556         vi_full_uninit(vi);
1557
1558         if_free(vi->ifp);
1559         vi->ifp = NULL;
1560 }
1561
1562 static int
1563 cxgbe_detach(device_t dev)
1564 {
1565         struct port_info *pi = device_get_softc(dev);
1566         struct adapter *sc = pi->adapter;
1567         int rc;
1568
1569         /* Detach the extra VIs first. */
1570         rc = bus_generic_detach(dev);
1571         if (rc)
1572                 return (rc);
1573         device_delete_children(dev);
1574
1575         doom_vi(sc, &pi->vi[0]);
1576
1577         if (pi->flags & HAS_TRACEQ) {
1578                 sc->traceq = -1;        /* cloner should not create ifnet */
1579                 t4_tracer_port_detach(sc);
1580         }
1581
1582         cxgbe_vi_detach(&pi->vi[0]);
1583         callout_drain(&pi->tick);
1584         ifmedia_removeall(&pi->media);
1585
1586         end_synchronized_op(sc, 0);
1587
1588         return (0);
1589 }
1590
1591 static void
1592 cxgbe_init(void *arg)
1593 {
1594         struct vi_info *vi = arg;
1595         struct adapter *sc = vi->pi->adapter;
1596
1597         if (begin_synchronized_op(sc, vi, SLEEP_OK | INTR_OK, "t4init") != 0)
1598                 return;
1599         cxgbe_init_synchronized(vi);
1600         end_synchronized_op(sc, 0);
1601 }
1602
1603 static int
1604 cxgbe_ioctl(struct ifnet *ifp, unsigned long cmd, caddr_t data)
1605 {
1606         int rc = 0, mtu, flags, can_sleep;
1607         struct vi_info *vi = ifp->if_softc;
1608         struct port_info *pi = vi->pi;
1609         struct adapter *sc = pi->adapter;
1610         struct ifreq *ifr = (struct ifreq *)data;
1611         uint32_t mask;
1612
1613         switch (cmd) {
1614         case SIOCSIFMTU:
1615                 mtu = ifr->ifr_mtu;
1616                 if (mtu < ETHERMIN || mtu > MAX_MTU)
1617                         return (EINVAL);
1618
1619                 rc = begin_synchronized_op(sc, vi, SLEEP_OK | INTR_OK, "t4mtu");
1620                 if (rc)
1621                         return (rc);
1622                 ifp->if_mtu = mtu;
1623                 if (vi->flags & VI_INIT_DONE) {
1624                         t4_update_fl_bufsize(ifp);
1625                         if (ifp->if_drv_flags & IFF_DRV_RUNNING)
1626                                 rc = update_mac_settings(ifp, XGMAC_MTU);
1627                 }
1628                 end_synchronized_op(sc, 0);
1629                 break;
1630
1631         case SIOCSIFFLAGS:
1632                 can_sleep = 0;
1633 redo_sifflags:
1634                 rc = begin_synchronized_op(sc, vi,
1635                     can_sleep ? (SLEEP_OK | INTR_OK) : HOLD_LOCK, "t4flg");
1636                 if (rc) {
1637                         if_printf(ifp, "%ssleepable synch operation failed: %d."
1638                             "  if_flags 0x%08x, if_drv_flags 0x%08x\n",
1639                             can_sleep ? "" : "non-", rc, ifp->if_flags,
1640                             ifp->if_drv_flags);
1641                         return (rc);
1642                 }
1643
1644                 if (ifp->if_flags & IFF_UP) {
1645                         if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1646                                 flags = vi->if_flags;
1647                                 if ((ifp->if_flags ^ flags) &
1648                                     (IFF_PROMISC | IFF_ALLMULTI)) {
1649                                         if (can_sleep == 1) {
1650                                                 end_synchronized_op(sc, 0);
1651                                                 can_sleep = 0;
1652                                                 goto redo_sifflags;
1653                                         }
1654                                         rc = update_mac_settings(ifp,
1655                                             XGMAC_PROMISC | XGMAC_ALLMULTI);
1656                                 }
1657                         } else {
1658                                 if (can_sleep == 0) {
1659                                         end_synchronized_op(sc, LOCK_HELD);
1660                                         can_sleep = 1;
1661                                         goto redo_sifflags;
1662                                 }
1663                                 rc = cxgbe_init_synchronized(vi);
1664                         }
1665                         vi->if_flags = ifp->if_flags;
1666                 } else if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1667                         if (can_sleep == 0) {
1668                                 end_synchronized_op(sc, LOCK_HELD);
1669                                 can_sleep = 1;
1670                                 goto redo_sifflags;
1671                         }
1672                         rc = cxgbe_uninit_synchronized(vi);
1673                 }
1674                 end_synchronized_op(sc, can_sleep ? 0 : LOCK_HELD);
1675                 break;
1676
1677         case SIOCADDMULTI:
1678         case SIOCDELMULTI: /* these two are called with a mutex held :-( */
1679                 rc = begin_synchronized_op(sc, vi, HOLD_LOCK, "t4multi");
1680                 if (rc)
1681                         return (rc);
1682                 if (ifp->if_drv_flags & IFF_DRV_RUNNING)
1683                         rc = update_mac_settings(ifp, XGMAC_MCADDRS);
1684                 end_synchronized_op(sc, LOCK_HELD);
1685                 break;
1686
1687         case SIOCSIFCAP:
1688                 rc = begin_synchronized_op(sc, vi, SLEEP_OK | INTR_OK, "t4cap");
1689                 if (rc)
1690                         return (rc);
1691
1692                 mask = ifr->ifr_reqcap ^ ifp->if_capenable;
1693                 if (mask & IFCAP_TXCSUM) {
1694                         ifp->if_capenable ^= IFCAP_TXCSUM;
1695                         ifp->if_hwassist ^= (CSUM_TCP | CSUM_UDP | CSUM_IP);
1696
1697                         if (IFCAP_TSO4 & ifp->if_capenable &&
1698                             !(IFCAP_TXCSUM & ifp->if_capenable)) {
1699                                 ifp->if_capenable &= ~IFCAP_TSO4;
1700                                 if_printf(ifp,
1701                                     "tso4 disabled due to -txcsum.\n");
1702                         }
1703                 }
1704                 if (mask & IFCAP_TXCSUM_IPV6) {
1705                         ifp->if_capenable ^= IFCAP_TXCSUM_IPV6;
1706                         ifp->if_hwassist ^= (CSUM_UDP_IPV6 | CSUM_TCP_IPV6);
1707
1708                         if (IFCAP_TSO6 & ifp->if_capenable &&
1709                             !(IFCAP_TXCSUM_IPV6 & ifp->if_capenable)) {
1710                                 ifp->if_capenable &= ~IFCAP_TSO6;
1711                                 if_printf(ifp,
1712                                     "tso6 disabled due to -txcsum6.\n");
1713                         }
1714                 }
1715                 if (mask & IFCAP_RXCSUM)
1716                         ifp->if_capenable ^= IFCAP_RXCSUM;
1717                 if (mask & IFCAP_RXCSUM_IPV6)
1718                         ifp->if_capenable ^= IFCAP_RXCSUM_IPV6;
1719
1720                 /*
1721                  * Note that we leave CSUM_TSO alone (it is always set).  The
1722                  * kernel takes both IFCAP_TSOx and CSUM_TSO into account before
1723                  * sending a TSO request our way, so it's sufficient to toggle
1724                  * IFCAP_TSOx only.
1725                  */
1726                 if (mask & IFCAP_TSO4) {
1727                         if (!(IFCAP_TSO4 & ifp->if_capenable) &&
1728                             !(IFCAP_TXCSUM & ifp->if_capenable)) {
1729                                 if_printf(ifp, "enable txcsum first.\n");
1730                                 rc = EAGAIN;
1731                                 goto fail;
1732                         }
1733                         ifp->if_capenable ^= IFCAP_TSO4;
1734                 }
1735                 if (mask & IFCAP_TSO6) {
1736                         if (!(IFCAP_TSO6 & ifp->if_capenable) &&
1737                             !(IFCAP_TXCSUM_IPV6 & ifp->if_capenable)) {
1738                                 if_printf(ifp, "enable txcsum6 first.\n");
1739                                 rc = EAGAIN;
1740                                 goto fail;
1741                         }
1742                         ifp->if_capenable ^= IFCAP_TSO6;
1743                 }
1744                 if (mask & IFCAP_LRO) {
1745 #if defined(INET) || defined(INET6)
1746                         int i;
1747                         struct sge_rxq *rxq;
1748
1749                         ifp->if_capenable ^= IFCAP_LRO;
1750                         for_each_rxq(vi, i, rxq) {
1751                                 if (ifp->if_capenable & IFCAP_LRO)
1752                                         rxq->iq.flags |= IQ_LRO_ENABLED;
1753                                 else
1754                                         rxq->iq.flags &= ~IQ_LRO_ENABLED;
1755                         }
1756 #endif
1757                 }
1758 #ifdef TCP_OFFLOAD
1759                 if (mask & IFCAP_TOE) {
1760                         int enable = (ifp->if_capenable ^ mask) & IFCAP_TOE;
1761
1762                         rc = toe_capability(vi, enable);
1763                         if (rc != 0)
1764                                 goto fail;
1765
1766                         ifp->if_capenable ^= mask;
1767                 }
1768 #endif
1769                 if (mask & IFCAP_VLAN_HWTAGGING) {
1770                         ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING;
1771                         if (ifp->if_drv_flags & IFF_DRV_RUNNING)
1772                                 rc = update_mac_settings(ifp, XGMAC_VLANEX);
1773                 }
1774                 if (mask & IFCAP_VLAN_MTU) {
1775                         ifp->if_capenable ^= IFCAP_VLAN_MTU;
1776
1777                         /* Need to find out how to disable auto-mtu-inflation */
1778                 }
1779                 if (mask & IFCAP_VLAN_HWTSO)
1780                         ifp->if_capenable ^= IFCAP_VLAN_HWTSO;
1781                 if (mask & IFCAP_VLAN_HWCSUM)
1782                         ifp->if_capenable ^= IFCAP_VLAN_HWCSUM;
1783
1784 #ifdef VLAN_CAPABILITIES
1785                 VLAN_CAPABILITIES(ifp);
1786 #endif
1787 fail:
1788                 end_synchronized_op(sc, 0);
1789                 break;
1790
1791         case SIOCSIFMEDIA:
1792         case SIOCGIFMEDIA:
1793         case SIOCGIFXMEDIA:
1794                 ifmedia_ioctl(ifp, ifr, &pi->media, cmd);
1795                 break;
1796
1797         case SIOCGI2C: {
1798                 struct ifi2creq i2c;
1799
1800                 rc = copyin(ifr_data_get_ptr(ifr), &i2c, sizeof(i2c));
1801                 if (rc != 0)
1802                         break;
1803                 if (i2c.dev_addr != 0xA0 && i2c.dev_addr != 0xA2) {
1804                         rc = EPERM;
1805                         break;
1806                 }
1807                 if (i2c.len > sizeof(i2c.data)) {
1808                         rc = EINVAL;
1809                         break;
1810                 }
1811                 rc = begin_synchronized_op(sc, vi, SLEEP_OK | INTR_OK, "t4i2c");
1812                 if (rc)
1813                         return (rc);
1814                 rc = -t4_i2c_rd(sc, sc->mbox, pi->port_id, i2c.dev_addr,
1815                     i2c.offset, i2c.len, &i2c.data[0]);
1816                 end_synchronized_op(sc, 0);
1817                 if (rc == 0)
1818                         rc = copyout(&i2c, ifr_data_get_ptr(ifr), sizeof(i2c));
1819                 break;
1820         }
1821
1822         default:
1823                 rc = ether_ioctl(ifp, cmd, data);
1824         }
1825
1826         return (rc);
1827 }
1828
1829 static int
1830 cxgbe_transmit(struct ifnet *ifp, struct mbuf *m)
1831 {
1832         struct vi_info *vi = ifp->if_softc;
1833         struct port_info *pi = vi->pi;
1834         struct adapter *sc = pi->adapter;
1835         struct sge_txq *txq;
1836         void *items[1];
1837         int rc;
1838
1839         M_ASSERTPKTHDR(m);
1840         MPASS(m->m_nextpkt == NULL);    /* not quite ready for this yet */
1841
1842         if (__predict_false(pi->link_cfg.link_ok == 0)) {
1843                 m_freem(m);
1844                 return (ENETDOWN);
1845         }
1846
1847         rc = parse_pkt(sc, &m);
1848         if (__predict_false(rc != 0)) {
1849                 MPASS(m == NULL);                       /* was freed already */
1850                 atomic_add_int(&pi->tx_parse_error, 1); /* rare, atomic is ok */
1851                 return (rc);
1852         }
1853
1854         /* Select a txq. */
1855         txq = &sc->sge.txq[vi->first_txq];
1856         if (M_HASHTYPE_GET(m) != M_HASHTYPE_NONE)
1857                 txq += ((m->m_pkthdr.flowid % (vi->ntxq - vi->rsrv_noflowq)) +
1858                     vi->rsrv_noflowq);
1859
1860         items[0] = m;
1861         rc = mp_ring_enqueue(txq->r, items, 1, 4096);
1862         if (__predict_false(rc != 0))
1863                 m_freem(m);
1864
1865         return (rc);
1866 }
1867
1868 static void
1869 cxgbe_qflush(struct ifnet *ifp)
1870 {
1871         struct vi_info *vi = ifp->if_softc;
1872         struct sge_txq *txq;
1873         int i;
1874
1875         /* queues do not exist if !VI_INIT_DONE. */
1876         if (vi->flags & VI_INIT_DONE) {
1877                 for_each_txq(vi, i, txq) {
1878                         TXQ_LOCK(txq);
1879                         txq->eq.flags |= EQ_QFLUSH;
1880                         TXQ_UNLOCK(txq);
1881                         while (!mp_ring_is_idle(txq->r)) {
1882                                 mp_ring_check_drainage(txq->r, 0);
1883                                 pause("qflush", 1);
1884                         }
1885                         TXQ_LOCK(txq);
1886                         txq->eq.flags &= ~EQ_QFLUSH;
1887                         TXQ_UNLOCK(txq);
1888                 }
1889         }
1890         if_qflush(ifp);
1891 }
1892
1893 static uint64_t
1894 vi_get_counter(struct ifnet *ifp, ift_counter c)
1895 {
1896         struct vi_info *vi = ifp->if_softc;
1897         struct fw_vi_stats_vf *s = &vi->stats;
1898
1899         vi_refresh_stats(vi->pi->adapter, vi);
1900
1901         switch (c) {
1902         case IFCOUNTER_IPACKETS:
1903                 return (s->rx_bcast_frames + s->rx_mcast_frames +
1904                     s->rx_ucast_frames);
1905         case IFCOUNTER_IERRORS:
1906                 return (s->rx_err_frames);
1907         case IFCOUNTER_OPACKETS:
1908                 return (s->tx_bcast_frames + s->tx_mcast_frames +
1909                     s->tx_ucast_frames + s->tx_offload_frames);
1910         case IFCOUNTER_OERRORS:
1911                 return (s->tx_drop_frames);
1912         case IFCOUNTER_IBYTES:
1913                 return (s->rx_bcast_bytes + s->rx_mcast_bytes +
1914                     s->rx_ucast_bytes);
1915         case IFCOUNTER_OBYTES:
1916                 return (s->tx_bcast_bytes + s->tx_mcast_bytes +
1917                     s->tx_ucast_bytes + s->tx_offload_bytes);
1918         case IFCOUNTER_IMCASTS:
1919                 return (s->rx_mcast_frames);
1920         case IFCOUNTER_OMCASTS:
1921                 return (s->tx_mcast_frames);
1922         case IFCOUNTER_OQDROPS: {
1923                 uint64_t drops;
1924
1925                 drops = 0;
1926                 if (vi->flags & VI_INIT_DONE) {
1927                         int i;
1928                         struct sge_txq *txq;
1929
1930                         for_each_txq(vi, i, txq)
1931                                 drops += counter_u64_fetch(txq->r->drops);
1932                 }
1933
1934                 return (drops);
1935
1936         }
1937
1938         default:
1939                 return (if_get_counter_default(ifp, c));
1940         }
1941 }
1942
1943 uint64_t
1944 cxgbe_get_counter(struct ifnet *ifp, ift_counter c)
1945 {
1946         struct vi_info *vi = ifp->if_softc;
1947         struct port_info *pi = vi->pi;
1948         struct adapter *sc = pi->adapter;
1949         struct port_stats *s = &pi->stats;
1950
1951         if (pi->nvi > 1 || sc->flags & IS_VF)
1952                 return (vi_get_counter(ifp, c));
1953
1954         cxgbe_refresh_stats(sc, pi);
1955
1956         switch (c) {
1957         case IFCOUNTER_IPACKETS:
1958                 return (s->rx_frames);
1959
1960         case IFCOUNTER_IERRORS:
1961                 return (s->rx_jabber + s->rx_runt + s->rx_too_long +
1962                     s->rx_fcs_err + s->rx_len_err);
1963
1964         case IFCOUNTER_OPACKETS:
1965                 return (s->tx_frames);
1966
1967         case IFCOUNTER_OERRORS:
1968                 return (s->tx_error_frames);
1969
1970         case IFCOUNTER_IBYTES:
1971                 return (s->rx_octets);
1972
1973         case IFCOUNTER_OBYTES:
1974                 return (s->tx_octets);
1975
1976         case IFCOUNTER_IMCASTS:
1977                 return (s->rx_mcast_frames);
1978
1979         case IFCOUNTER_OMCASTS:
1980                 return (s->tx_mcast_frames);
1981
1982         case IFCOUNTER_IQDROPS:
1983                 return (s->rx_ovflow0 + s->rx_ovflow1 + s->rx_ovflow2 +
1984                     s->rx_ovflow3 + s->rx_trunc0 + s->rx_trunc1 + s->rx_trunc2 +
1985                     s->rx_trunc3 + pi->tnl_cong_drops);
1986
1987         case IFCOUNTER_OQDROPS: {
1988                 uint64_t drops;
1989
1990                 drops = s->tx_drop;
1991                 if (vi->flags & VI_INIT_DONE) {
1992                         int i;
1993                         struct sge_txq *txq;
1994
1995                         for_each_txq(vi, i, txq)
1996                                 drops += counter_u64_fetch(txq->r->drops);
1997                 }
1998
1999                 return (drops);
2000
2001         }
2002
2003         default:
2004                 return (if_get_counter_default(ifp, c));
2005         }
2006 }
2007
2008 static int
2009 cxgbe_media_change(struct ifnet *ifp)
2010 {
2011         struct vi_info *vi = ifp->if_softc;
2012
2013         device_printf(vi->dev, "%s unimplemented.\n", __func__);
2014
2015         return (EOPNOTSUPP);
2016 }
2017
2018 static void
2019 cxgbe_media_status(struct ifnet *ifp, struct ifmediareq *ifmr)
2020 {
2021         struct vi_info *vi = ifp->if_softc;
2022         struct port_info *pi = vi->pi;
2023         struct ifmedia_entry *cur;
2024         struct link_config *lc = &pi->link_cfg;
2025
2026         /*
2027          * If all the interfaces are administratively down the firmware does not
2028          * report transceiver changes.  Refresh port info here so that ifconfig
2029          * displays accurate information at all times.
2030          */
2031         if (begin_synchronized_op(pi->adapter, NULL, SLEEP_OK | INTR_OK,
2032             "t4med") == 0) {
2033                 PORT_LOCK(pi);
2034                 if (pi->up_vis == 0) {
2035                         t4_update_port_info(pi);
2036                         build_medialist(pi, &pi->media);
2037                 }
2038                 PORT_UNLOCK(pi);
2039                 end_synchronized_op(pi->adapter, 0);
2040         }
2041
2042         ifmr->ifm_status = IFM_AVALID;
2043         if (lc->link_ok == 0)
2044                 return;
2045
2046         ifmr->ifm_status |= IFM_ACTIVE;
2047         ifmr->ifm_active &= ~(IFM_ETH_TXPAUSE | IFM_ETH_RXPAUSE);
2048         if (lc->fc & PAUSE_RX)
2049                 ifmr->ifm_active |= IFM_ETH_RXPAUSE;
2050         if (lc->fc & PAUSE_TX)
2051                 ifmr->ifm_active |= IFM_ETH_TXPAUSE;
2052
2053         /* active and current will differ iff current media is autoselect. */
2054         cur = pi->media.ifm_cur;
2055         if (cur != NULL && IFM_SUBTYPE(cur->ifm_media) != IFM_AUTO)
2056                 return;
2057
2058         ifmr->ifm_active = IFM_ETHER | IFM_FDX;
2059         if (lc->fc & PAUSE_RX)
2060                 ifmr->ifm_active |= IFM_ETH_RXPAUSE;
2061         if (lc->fc & PAUSE_TX)
2062                 ifmr->ifm_active |= IFM_ETH_TXPAUSE;
2063         switch (lc->speed) {
2064         case 10000:
2065                 ifmr->ifm_active |= IFM_10G_T;
2066                 break;
2067         case 1000:
2068                 ifmr->ifm_active |= IFM_1000_T;
2069                 break;
2070         case 100:
2071                 ifmr->ifm_active |= IFM_100_TX;
2072                 break;
2073         case 10:
2074                 ifmr->ifm_active |= IFM_10_T;
2075                 break;
2076         default:
2077                 device_printf(vi->dev, "link up but speed unknown (%u)\n",
2078                     lc->speed);
2079         }
2080 }
2081
2082 static int
2083 vcxgbe_probe(device_t dev)
2084 {
2085         char buf[128];
2086         struct vi_info *vi = device_get_softc(dev);
2087
2088         snprintf(buf, sizeof(buf), "port %d vi %td", vi->pi->port_id,
2089             vi - vi->pi->vi);
2090         device_set_desc_copy(dev, buf);
2091
2092         return (BUS_PROBE_DEFAULT);
2093 }
2094
2095 static int
2096 alloc_extra_vi(struct adapter *sc, struct port_info *pi, struct vi_info *vi)
2097 {
2098         int func, index, rc;
2099         uint32_t param, val;
2100
2101         ASSERT_SYNCHRONIZED_OP(sc);
2102
2103         index = vi - pi->vi;
2104         MPASS(index > 0);       /* This function deals with _extra_ VIs only */
2105         KASSERT(index < nitems(vi_mac_funcs),
2106             ("%s: VI %s doesn't have a MAC func", __func__,
2107             device_get_nameunit(vi->dev)));
2108         func = vi_mac_funcs[index];
2109         rc = t4_alloc_vi_func(sc, sc->mbox, pi->tx_chan, sc->pf, 0, 1,
2110             vi->hw_addr, &vi->rss_size, func, 0);
2111         if (rc < 0) {
2112                 device_printf(vi->dev, "failed to allocate virtual interface %d"
2113                     "for port %d: %d\n", index, pi->port_id, -rc);
2114                 return (-rc);
2115         }
2116         vi->viid = rc;
2117         if (chip_id(sc) <= CHELSIO_T5)
2118                 vi->smt_idx = (rc & 0x7f) << 1;
2119         else
2120                 vi->smt_idx = (rc & 0x7f);
2121
2122         if (vi->rss_size == 1) {
2123                 /*
2124                  * This VI didn't get a slice of the RSS table.  Reduce the
2125                  * number of VIs being created (hw.cxgbe.num_vis) or modify the
2126                  * configuration file (nvi, rssnvi for this PF) if this is a
2127                  * problem.
2128                  */
2129                 device_printf(vi->dev, "RSS table not available.\n");
2130                 vi->rss_base = 0xffff;
2131
2132                 return (0);
2133         }
2134
2135         param = V_FW_PARAMS_MNEM(FW_PARAMS_MNEM_DEV) |
2136             V_FW_PARAMS_PARAM_X(FW_PARAMS_PARAM_DEV_RSSINFO) |
2137             V_FW_PARAMS_PARAM_YZ(vi->viid);
2138         rc = t4_query_params(sc, sc->mbox, sc->pf, 0, 1, &param, &val);
2139         if (rc)
2140                 vi->rss_base = 0xffff;
2141         else {
2142                 MPASS((val >> 16) == vi->rss_size);
2143                 vi->rss_base = val & 0xffff;
2144         }
2145
2146         return (0);
2147 }
2148
2149 static int
2150 vcxgbe_attach(device_t dev)
2151 {
2152         struct vi_info *vi;
2153         struct port_info *pi;
2154         struct adapter *sc;
2155         int rc;
2156
2157         vi = device_get_softc(dev);
2158         pi = vi->pi;
2159         sc = pi->adapter;
2160
2161         rc = begin_synchronized_op(sc, vi, SLEEP_OK | INTR_OK, "t4via");
2162         if (rc)
2163                 return (rc);
2164         rc = alloc_extra_vi(sc, pi, vi);
2165         end_synchronized_op(sc, 0);
2166         if (rc)
2167                 return (rc);
2168
2169         rc = cxgbe_vi_attach(dev, vi);
2170         if (rc) {
2171                 t4_free_vi(sc, sc->mbox, sc->pf, 0, vi->viid);
2172                 return (rc);
2173         }
2174         return (0);
2175 }
2176
2177 static int
2178 vcxgbe_detach(device_t dev)
2179 {
2180         struct vi_info *vi;
2181         struct adapter *sc;
2182
2183         vi = device_get_softc(dev);
2184         sc = vi->pi->adapter;
2185
2186         doom_vi(sc, vi);
2187
2188         cxgbe_vi_detach(vi);
2189         t4_free_vi(sc, sc->mbox, sc->pf, 0, vi->viid);
2190
2191         end_synchronized_op(sc, 0);
2192
2193         return (0);
2194 }
2195
2196 void
2197 t4_fatal_err(struct adapter *sc)
2198 {
2199         t4_set_reg_field(sc, A_SGE_CONTROL, F_GLOBALENABLE, 0);
2200         t4_intr_disable(sc);
2201         log(LOG_EMERG, "%s: encountered fatal error, adapter stopped.\n",
2202             device_get_nameunit(sc->dev));
2203         if (t4_panic_on_fatal_err)
2204                 panic("panic requested on fatal error");
2205 }
2206
2207 void
2208 t4_add_adapter(struct adapter *sc)
2209 {
2210         sx_xlock(&t4_list_lock);
2211         SLIST_INSERT_HEAD(&t4_list, sc, link);
2212         sx_xunlock(&t4_list_lock);
2213 }
2214
2215 int
2216 t4_map_bars_0_and_4(struct adapter *sc)
2217 {
2218         sc->regs_rid = PCIR_BAR(0);
2219         sc->regs_res = bus_alloc_resource_any(sc->dev, SYS_RES_MEMORY,
2220             &sc->regs_rid, RF_ACTIVE);
2221         if (sc->regs_res == NULL) {
2222                 device_printf(sc->dev, "cannot map registers.\n");
2223                 return (ENXIO);
2224         }
2225         sc->bt = rman_get_bustag(sc->regs_res);
2226         sc->bh = rman_get_bushandle(sc->regs_res);
2227         sc->mmio_len = rman_get_size(sc->regs_res);
2228         setbit(&sc->doorbells, DOORBELL_KDB);
2229
2230         sc->msix_rid = PCIR_BAR(4);
2231         sc->msix_res = bus_alloc_resource_any(sc->dev, SYS_RES_MEMORY,
2232             &sc->msix_rid, RF_ACTIVE);
2233         if (sc->msix_res == NULL) {
2234                 device_printf(sc->dev, "cannot map MSI-X BAR.\n");
2235                 return (ENXIO);
2236         }
2237
2238         return (0);
2239 }
2240
2241 int
2242 t4_map_bar_2(struct adapter *sc)
2243 {
2244
2245         /*
2246          * T4: only iWARP driver uses the userspace doorbells.  There is no need
2247          * to map it if RDMA is disabled.
2248          */
2249         if (is_t4(sc) && sc->rdmacaps == 0)
2250                 return (0);
2251
2252         sc->udbs_rid = PCIR_BAR(2);
2253         sc->udbs_res = bus_alloc_resource_any(sc->dev, SYS_RES_MEMORY,
2254             &sc->udbs_rid, RF_ACTIVE);
2255         if (sc->udbs_res == NULL) {
2256                 device_printf(sc->dev, "cannot map doorbell BAR.\n");
2257                 return (ENXIO);
2258         }
2259         sc->udbs_base = rman_get_virtual(sc->udbs_res);
2260
2261         if (chip_id(sc) >= CHELSIO_T5) {
2262                 setbit(&sc->doorbells, DOORBELL_UDB);
2263 #if defined(__i386__) || defined(__amd64__)
2264                 if (t5_write_combine) {
2265                         int rc, mode;
2266
2267                         /*
2268                          * Enable write combining on BAR2.  This is the
2269                          * userspace doorbell BAR and is split into 128B
2270                          * (UDBS_SEG_SIZE) doorbell regions, each associated
2271                          * with an egress queue.  The first 64B has the doorbell
2272                          * and the second 64B can be used to submit a tx work
2273                          * request with an implicit doorbell.
2274                          */
2275
2276                         rc = pmap_change_attr((vm_offset_t)sc->udbs_base,
2277                             rman_get_size(sc->udbs_res), PAT_WRITE_COMBINING);
2278                         if (rc == 0) {
2279                                 clrbit(&sc->doorbells, DOORBELL_UDB);
2280                                 setbit(&sc->doorbells, DOORBELL_WCWR);
2281                                 setbit(&sc->doorbells, DOORBELL_UDBWC);
2282                         } else {
2283                                 t5_write_combine = 0;
2284                                 device_printf(sc->dev,
2285                                     "couldn't enable write combining: %d\n",
2286                                     rc);
2287                         }
2288
2289                         mode = is_t5(sc) ? V_STATMODE(0) : V_T6_STATMODE(0);
2290                         t4_write_reg(sc, A_SGE_STAT_CFG,
2291                             V_STATSOURCE_T5(7) | mode);
2292                 }
2293 #else
2294                 t5_write_combine = 0;
2295 #endif
2296                 sc->iwt.wc_en = t5_write_combine;
2297         }
2298
2299         return (0);
2300 }
2301
2302 struct memwin_init {
2303         uint32_t base;
2304         uint32_t aperture;
2305 };
2306
2307 static const struct memwin_init t4_memwin[NUM_MEMWIN] = {
2308         { MEMWIN0_BASE, MEMWIN0_APERTURE },
2309         { MEMWIN1_BASE, MEMWIN1_APERTURE },
2310         { MEMWIN2_BASE_T4, MEMWIN2_APERTURE_T4 }
2311 };
2312
2313 static const struct memwin_init t5_memwin[NUM_MEMWIN] = {
2314         { MEMWIN0_BASE, MEMWIN0_APERTURE },
2315         { MEMWIN1_BASE, MEMWIN1_APERTURE },
2316         { MEMWIN2_BASE_T5, MEMWIN2_APERTURE_T5 },
2317 };
2318
2319 static void
2320 setup_memwin(struct adapter *sc)
2321 {
2322         const struct memwin_init *mw_init;
2323         struct memwin *mw;
2324         int i;
2325         uint32_t bar0;
2326
2327         if (is_t4(sc)) {
2328                 /*
2329                  * Read low 32b of bar0 indirectly via the hardware backdoor
2330                  * mechanism.  Works from within PCI passthrough environments
2331                  * too, where rman_get_start() can return a different value.  We
2332                  * need to program the T4 memory window decoders with the actual
2333                  * addresses that will be coming across the PCIe link.
2334                  */
2335                 bar0 = t4_hw_pci_read_cfg4(sc, PCIR_BAR(0));
2336                 bar0 &= (uint32_t) PCIM_BAR_MEM_BASE;
2337
2338                 mw_init = &t4_memwin[0];
2339         } else {
2340                 /* T5+ use the relative offset inside the PCIe BAR */
2341                 bar0 = 0;
2342
2343                 mw_init = &t5_memwin[0];
2344         }
2345
2346         for (i = 0, mw = &sc->memwin[0]; i < NUM_MEMWIN; i++, mw_init++, mw++) {
2347                 rw_init(&mw->mw_lock, "memory window access");
2348                 mw->mw_base = mw_init->base;
2349                 mw->mw_aperture = mw_init->aperture;
2350                 mw->mw_curpos = 0;
2351                 t4_write_reg(sc,
2352                     PCIE_MEM_ACCESS_REG(A_PCIE_MEM_ACCESS_BASE_WIN, i),
2353                     (mw->mw_base + bar0) | V_BIR(0) |
2354                     V_WINDOW(ilog2(mw->mw_aperture) - 10));
2355                 rw_wlock(&mw->mw_lock);
2356                 position_memwin(sc, i, 0);
2357                 rw_wunlock(&mw->mw_lock);
2358         }
2359
2360         /* flush */
2361         t4_read_reg(sc, PCIE_MEM_ACCESS_REG(A_PCIE_MEM_ACCESS_BASE_WIN, 2));
2362 }
2363
2364 /*
2365  * Positions the memory window at the given address in the card's address space.
2366  * There are some alignment requirements and the actual position may be at an
2367  * address prior to the requested address.  mw->mw_curpos always has the actual
2368  * position of the window.
2369  */
2370 static void
2371 position_memwin(struct adapter *sc, int idx, uint32_t addr)
2372 {
2373         struct memwin *mw;
2374         uint32_t pf;
2375         uint32_t reg;
2376
2377         MPASS(idx >= 0 && idx < NUM_MEMWIN);
2378         mw = &sc->memwin[idx];
2379         rw_assert(&mw->mw_lock, RA_WLOCKED);
2380
2381         if (is_t4(sc)) {
2382                 pf = 0;
2383                 mw->mw_curpos = addr & ~0xf;    /* start must be 16B aligned */
2384         } else {
2385                 pf = V_PFNUM(sc->pf);
2386                 mw->mw_curpos = addr & ~0x7f;   /* start must be 128B aligned */
2387         }
2388         reg = PCIE_MEM_ACCESS_REG(A_PCIE_MEM_ACCESS_OFFSET, idx);
2389         t4_write_reg(sc, reg, mw->mw_curpos | pf);
2390         t4_read_reg(sc, reg);   /* flush */
2391 }
2392
2393 int
2394 rw_via_memwin(struct adapter *sc, int idx, uint32_t addr, uint32_t *val,
2395     int len, int rw)
2396 {
2397         struct memwin *mw;
2398         uint32_t mw_end, v;
2399
2400         MPASS(idx >= 0 && idx < NUM_MEMWIN);
2401
2402         /* Memory can only be accessed in naturally aligned 4 byte units */
2403         if (addr & 3 || len & 3 || len <= 0)
2404                 return (EINVAL);
2405
2406         mw = &sc->memwin[idx];
2407         while (len > 0) {
2408                 rw_rlock(&mw->mw_lock);
2409                 mw_end = mw->mw_curpos + mw->mw_aperture;
2410                 if (addr >= mw_end || addr < mw->mw_curpos) {
2411                         /* Will need to reposition the window */
2412                         if (!rw_try_upgrade(&mw->mw_lock)) {
2413                                 rw_runlock(&mw->mw_lock);
2414                                 rw_wlock(&mw->mw_lock);
2415                         }
2416                         rw_assert(&mw->mw_lock, RA_WLOCKED);
2417                         position_memwin(sc, idx, addr);
2418                         rw_downgrade(&mw->mw_lock);
2419                         mw_end = mw->mw_curpos + mw->mw_aperture;
2420                 }
2421                 rw_assert(&mw->mw_lock, RA_RLOCKED);
2422                 while (addr < mw_end && len > 0) {
2423                         if (rw == 0) {
2424                                 v = t4_read_reg(sc, mw->mw_base + addr -
2425                                     mw->mw_curpos);
2426                                 *val++ = le32toh(v);
2427                         } else {
2428                                 v = *val++;
2429                                 t4_write_reg(sc, mw->mw_base + addr -
2430                                     mw->mw_curpos, htole32(v));
2431                         }
2432                         addr += 4;
2433                         len -= 4;
2434                 }
2435                 rw_runlock(&mw->mw_lock);
2436         }
2437
2438         return (0);
2439 }
2440
2441 int
2442 alloc_atid_tab(struct tid_info *t, int flags)
2443 {
2444         int i;
2445
2446         MPASS(t->natids > 0);
2447         MPASS(t->atid_tab == NULL);
2448
2449         t->atid_tab = malloc(t->natids * sizeof(*t->atid_tab), M_CXGBE,
2450             M_ZERO | flags);
2451         if (t->atid_tab == NULL)
2452                 return (ENOMEM);
2453         mtx_init(&t->atid_lock, "atid lock", NULL, MTX_DEF);
2454         t->afree = t->atid_tab;
2455         t->atids_in_use = 0;
2456         for (i = 1; i < t->natids; i++)
2457                 t->atid_tab[i - 1].next = &t->atid_tab[i];
2458         t->atid_tab[t->natids - 1].next = NULL;
2459
2460         return (0);
2461 }
2462
2463 void
2464 free_atid_tab(struct tid_info *t)
2465 {
2466
2467         KASSERT(t->atids_in_use == 0,
2468             ("%s: %d atids still in use.", __func__, t->atids_in_use));
2469
2470         if (mtx_initialized(&t->atid_lock))
2471                 mtx_destroy(&t->atid_lock);
2472         free(t->atid_tab, M_CXGBE);
2473         t->atid_tab = NULL;
2474 }
2475
2476 int
2477 alloc_atid(struct adapter *sc, void *ctx)
2478 {
2479         struct tid_info *t = &sc->tids;
2480         int atid = -1;
2481
2482         mtx_lock(&t->atid_lock);
2483         if (t->afree) {
2484                 union aopen_entry *p = t->afree;
2485
2486                 atid = p - t->atid_tab;
2487                 MPASS(atid <= M_TID_TID);
2488                 t->afree = p->next;
2489                 p->data = ctx;
2490                 t->atids_in_use++;
2491         }
2492         mtx_unlock(&t->atid_lock);
2493         return (atid);
2494 }
2495
2496 void *
2497 lookup_atid(struct adapter *sc, int atid)
2498 {
2499         struct tid_info *t = &sc->tids;
2500
2501         return (t->atid_tab[atid].data);
2502 }
2503
2504 void
2505 free_atid(struct adapter *sc, int atid)
2506 {
2507         struct tid_info *t = &sc->tids;
2508         union aopen_entry *p = &t->atid_tab[atid];
2509
2510         mtx_lock(&t->atid_lock);
2511         p->next = t->afree;
2512         t->afree = p;
2513         t->atids_in_use--;
2514         mtx_unlock(&t->atid_lock);
2515 }
2516
2517 static void
2518 queue_tid_release(struct adapter *sc, int tid)
2519 {
2520
2521         CXGBE_UNIMPLEMENTED("deferred tid release");
2522 }
2523
2524 void
2525 release_tid(struct adapter *sc, int tid, struct sge_wrq *ctrlq)
2526 {
2527         struct wrqe *wr;
2528         struct cpl_tid_release *req;
2529
2530         wr = alloc_wrqe(sizeof(*req), ctrlq);
2531         if (wr == NULL) {
2532                 queue_tid_release(sc, tid);     /* defer */
2533                 return;
2534         }
2535         req = wrtod(wr);
2536
2537         INIT_TP_WR_MIT_CPL(req, CPL_TID_RELEASE, tid);
2538
2539         t4_wrq_tx(sc, wr);
2540 }
2541
2542 static int
2543 t4_range_cmp(const void *a, const void *b)
2544 {
2545         return ((const struct t4_range *)a)->start -
2546                ((const struct t4_range *)b)->start;
2547 }
2548
2549 /*
2550  * Verify that the memory range specified by the addr/len pair is valid within
2551  * the card's address space.
2552  */
2553 static int
2554 validate_mem_range(struct adapter *sc, uint32_t addr, int len)
2555 {
2556         struct t4_range mem_ranges[4], *r, *next;
2557         uint32_t em, addr_len;
2558         int i, n, remaining;
2559
2560         /* Memory can only be accessed in naturally aligned 4 byte units */
2561         if (addr & 3 || len & 3 || len <= 0)
2562                 return (EINVAL);
2563
2564         /* Enabled memories */
2565         em = t4_read_reg(sc, A_MA_TARGET_MEM_ENABLE);
2566
2567         r = &mem_ranges[0];
2568         n = 0;
2569         bzero(r, sizeof(mem_ranges));
2570         if (em & F_EDRAM0_ENABLE) {
2571                 addr_len = t4_read_reg(sc, A_MA_EDRAM0_BAR);
2572                 r->size = G_EDRAM0_SIZE(addr_len) << 20;
2573                 if (r->size > 0) {
2574                         r->start = G_EDRAM0_BASE(addr_len) << 20;
2575                         if (addr >= r->start &&
2576                             addr + len <= r->start + r->size)
2577                                 return (0);
2578                         r++;
2579                         n++;
2580                 }
2581         }
2582         if (em & F_EDRAM1_ENABLE) {
2583                 addr_len = t4_read_reg(sc, A_MA_EDRAM1_BAR);
2584                 r->size = G_EDRAM1_SIZE(addr_len) << 20;
2585                 if (r->size > 0) {
2586                         r->start = G_EDRAM1_BASE(addr_len) << 20;
2587                         if (addr >= r->start &&
2588                             addr + len <= r->start + r->size)
2589                                 return (0);
2590                         r++;
2591                         n++;
2592                 }
2593         }
2594         if (em & F_EXT_MEM_ENABLE) {
2595                 addr_len = t4_read_reg(sc, A_MA_EXT_MEMORY_BAR);
2596                 r->size = G_EXT_MEM_SIZE(addr_len) << 20;
2597                 if (r->size > 0) {
2598                         r->start = G_EXT_MEM_BASE(addr_len) << 20;
2599                         if (addr >= r->start &&
2600                             addr + len <= r->start + r->size)
2601                                 return (0);
2602                         r++;
2603                         n++;
2604                 }
2605         }
2606         if (is_t5(sc) && em & F_EXT_MEM1_ENABLE) {
2607                 addr_len = t4_read_reg(sc, A_MA_EXT_MEMORY1_BAR);
2608                 r->size = G_EXT_MEM1_SIZE(addr_len) << 20;
2609                 if (r->size > 0) {
2610                         r->start = G_EXT_MEM1_BASE(addr_len) << 20;
2611                         if (addr >= r->start &&
2612                             addr + len <= r->start + r->size)
2613                                 return (0);
2614                         r++;
2615                         n++;
2616                 }
2617         }
2618         MPASS(n <= nitems(mem_ranges));
2619
2620         if (n > 1) {
2621                 /* Sort and merge the ranges. */
2622                 qsort(mem_ranges, n, sizeof(struct t4_range), t4_range_cmp);
2623
2624                 /* Start from index 0 and examine the next n - 1 entries. */
2625                 r = &mem_ranges[0];
2626                 for (remaining = n - 1; remaining > 0; remaining--, r++) {
2627
2628                         MPASS(r->size > 0);     /* r is a valid entry. */
2629                         next = r + 1;
2630                         MPASS(next->size > 0);  /* and so is the next one. */
2631
2632                         while (r->start + r->size >= next->start) {
2633                                 /* Merge the next one into the current entry. */
2634                                 r->size = max(r->start + r->size,
2635                                     next->start + next->size) - r->start;
2636                                 n--;    /* One fewer entry in total. */
2637                                 if (--remaining == 0)
2638                                         goto done;      /* short circuit */
2639                                 next++;
2640                         }
2641                         if (next != r + 1) {
2642                                 /*
2643                                  * Some entries were merged into r and next
2644                                  * points to the first valid entry that couldn't
2645                                  * be merged.
2646                                  */
2647                                 MPASS(next->size > 0);  /* must be valid */
2648                                 memcpy(r + 1, next, remaining * sizeof(*r));
2649 #ifdef INVARIANTS
2650                                 /*
2651                                  * This so that the foo->size assertion in the
2652                                  * next iteration of the loop do the right
2653                                  * thing for entries that were pulled up and are
2654                                  * no longer valid.
2655                                  */
2656                                 MPASS(n < nitems(mem_ranges));
2657                                 bzero(&mem_ranges[n], (nitems(mem_ranges) - n) *
2658                                     sizeof(struct t4_range));
2659 #endif
2660                         }
2661                 }
2662 done:
2663                 /* Done merging the ranges. */
2664                 MPASS(n > 0);
2665                 r = &mem_ranges[0];
2666                 for (i = 0; i < n; i++, r++) {
2667                         if (addr >= r->start &&
2668                             addr + len <= r->start + r->size)
2669                                 return (0);
2670                 }
2671         }
2672
2673         return (EFAULT);
2674 }
2675
2676 static int
2677 fwmtype_to_hwmtype(int mtype)
2678 {
2679
2680         switch (mtype) {
2681         case FW_MEMTYPE_EDC0:
2682                 return (MEM_EDC0);
2683         case FW_MEMTYPE_EDC1:
2684                 return (MEM_EDC1);
2685         case FW_MEMTYPE_EXTMEM:
2686                 return (MEM_MC0);
2687         case FW_MEMTYPE_EXTMEM1:
2688                 return (MEM_MC1);
2689         default:
2690                 panic("%s: cannot translate fw mtype %d.", __func__, mtype);
2691         }
2692 }
2693
2694 /*
2695  * Verify that the memory range specified by the memtype/offset/len pair is
2696  * valid and lies entirely within the memtype specified.  The global address of
2697  * the start of the range is returned in addr.
2698  */
2699 static int
2700 validate_mt_off_len(struct adapter *sc, int mtype, uint32_t off, int len,
2701     uint32_t *addr)
2702 {
2703         uint32_t em, addr_len, maddr;
2704
2705         /* Memory can only be accessed in naturally aligned 4 byte units */
2706         if (off & 3 || len & 3 || len == 0)
2707                 return (EINVAL);
2708
2709         em = t4_read_reg(sc, A_MA_TARGET_MEM_ENABLE);
2710         switch (fwmtype_to_hwmtype(mtype)) {
2711         case MEM_EDC0:
2712                 if (!(em & F_EDRAM0_ENABLE))
2713                         return (EINVAL);
2714                 addr_len = t4_read_reg(sc, A_MA_EDRAM0_BAR);
2715                 maddr = G_EDRAM0_BASE(addr_len) << 20;
2716                 break;
2717         case MEM_EDC1:
2718                 if (!(em & F_EDRAM1_ENABLE))
2719                         return (EINVAL);
2720                 addr_len = t4_read_reg(sc, A_MA_EDRAM1_BAR);
2721                 maddr = G_EDRAM1_BASE(addr_len) << 20;
2722                 break;
2723         case MEM_MC:
2724                 if (!(em & F_EXT_MEM_ENABLE))
2725                         return (EINVAL);
2726                 addr_len = t4_read_reg(sc, A_MA_EXT_MEMORY_BAR);
2727                 maddr = G_EXT_MEM_BASE(addr_len) << 20;
2728                 break;
2729         case MEM_MC1:
2730                 if (!is_t5(sc) || !(em & F_EXT_MEM1_ENABLE))
2731                         return (EINVAL);
2732                 addr_len = t4_read_reg(sc, A_MA_EXT_MEMORY1_BAR);
2733                 maddr = G_EXT_MEM1_BASE(addr_len) << 20;
2734                 break;
2735         default:
2736                 return (EINVAL);
2737         }
2738
2739         *addr = maddr + off;    /* global address */
2740         return (validate_mem_range(sc, *addr, len));
2741 }
2742
2743 static int
2744 fixup_devlog_params(struct adapter *sc)
2745 {
2746         struct devlog_params *dparams = &sc->params.devlog;
2747         int rc;
2748
2749         rc = validate_mt_off_len(sc, dparams->memtype, dparams->start,
2750             dparams->size, &dparams->addr);
2751
2752         return (rc);
2753 }
2754
2755 static void
2756 update_nirq(struct intrs_and_queues *iaq, int nports)
2757 {
2758         int extra = T4_EXTRA_INTR;
2759
2760         iaq->nirq = extra;
2761         iaq->nirq += nports * (iaq->nrxq + iaq->nofldrxq);
2762         iaq->nirq += nports * (iaq->num_vis - 1) *
2763             max(iaq->nrxq_vi, iaq->nnmrxq_vi);
2764         iaq->nirq += nports * (iaq->num_vis - 1) * iaq->nofldrxq_vi;
2765 }
2766
2767 /*
2768  * Adjust requirements to fit the number of interrupts available.
2769  */
2770 static void
2771 calculate_iaq(struct adapter *sc, struct intrs_and_queues *iaq, int itype,
2772     int navail)
2773 {
2774         int old_nirq;
2775         const int nports = sc->params.nports;
2776
2777         MPASS(nports > 0);
2778         MPASS(navail > 0);
2779
2780         bzero(iaq, sizeof(*iaq));
2781         iaq->intr_type = itype;
2782         iaq->num_vis = t4_num_vis;
2783         iaq->ntxq = t4_ntxq;
2784         iaq->ntxq_vi = t4_ntxq_vi;
2785         iaq->nrxq = t4_nrxq;
2786         iaq->nrxq_vi = t4_nrxq_vi;
2787 #ifdef TCP_OFFLOAD
2788         if (is_offload(sc)) {
2789                 iaq->nofldtxq = t4_nofldtxq;
2790                 iaq->nofldtxq_vi = t4_nofldtxq_vi;
2791                 iaq->nofldrxq = t4_nofldrxq;
2792                 iaq->nofldrxq_vi = t4_nofldrxq_vi;
2793         }
2794 #endif
2795 #ifdef DEV_NETMAP
2796         iaq->nnmtxq_vi = t4_nnmtxq_vi;
2797         iaq->nnmrxq_vi = t4_nnmrxq_vi;
2798 #endif
2799
2800         update_nirq(iaq, nports);
2801         if (iaq->nirq <= navail &&
2802             (itype != INTR_MSI || powerof2(iaq->nirq))) {
2803                 /*
2804                  * This is the normal case -- there are enough interrupts for
2805                  * everything.
2806                  */
2807                 goto done;
2808         }
2809
2810         /*
2811          * If extra VIs have been configured try reducing their count and see if
2812          * that works.
2813          */
2814         while (iaq->num_vis > 1) {
2815                 iaq->num_vis--;
2816                 update_nirq(iaq, nports);
2817                 if (iaq->nirq <= navail &&
2818                     (itype != INTR_MSI || powerof2(iaq->nirq))) {
2819                         device_printf(sc->dev, "virtual interfaces per port "
2820                             "reduced to %d from %d.  nrxq=%u, nofldrxq=%u, "
2821                             "nrxq_vi=%u nofldrxq_vi=%u, nnmrxq_vi=%u.  "
2822                             "itype %d, navail %u, nirq %d.\n",
2823                             iaq->num_vis, t4_num_vis, iaq->nrxq, iaq->nofldrxq,
2824                             iaq->nrxq_vi, iaq->nofldrxq_vi, iaq->nnmrxq_vi,
2825                             itype, navail, iaq->nirq);
2826                         goto done;
2827                 }
2828         }
2829
2830         /*
2831          * Extra VIs will not be created.  Log a message if they were requested.
2832          */
2833         MPASS(iaq->num_vis == 1);
2834         iaq->ntxq_vi = iaq->nrxq_vi = 0;
2835         iaq->nofldtxq_vi = iaq->nofldrxq_vi = 0;
2836         iaq->nnmtxq_vi = iaq->nnmrxq_vi = 0;
2837         if (iaq->num_vis != t4_num_vis) {
2838                 device_printf(sc->dev, "extra virtual interfaces disabled.  "
2839                     "nrxq=%u, nofldrxq=%u, nrxq_vi=%u nofldrxq_vi=%u, "
2840                     "nnmrxq_vi=%u.  itype %d, navail %u, nirq %d.\n",
2841                     iaq->nrxq, iaq->nofldrxq, iaq->nrxq_vi, iaq->nofldrxq_vi,
2842                     iaq->nnmrxq_vi, itype, navail, iaq->nirq);
2843         }
2844
2845         /*
2846          * Keep reducing the number of NIC rx queues to the next lower power of
2847          * 2 (for even RSS distribution) and halving the TOE rx queues and see
2848          * if that works.
2849          */
2850         do {
2851                 if (iaq->nrxq > 1) {
2852                         do {
2853                                 iaq->nrxq--;
2854                         } while (!powerof2(iaq->nrxq));
2855                 }
2856                 if (iaq->nofldrxq > 1)
2857                         iaq->nofldrxq >>= 1;
2858
2859                 old_nirq = iaq->nirq;
2860                 update_nirq(iaq, nports);
2861                 if (iaq->nirq <= navail &&
2862                     (itype != INTR_MSI || powerof2(iaq->nirq))) {
2863                         device_printf(sc->dev, "running with reduced number of "
2864                             "rx queues because of shortage of interrupts.  "
2865                             "nrxq=%u, nofldrxq=%u.  "
2866                             "itype %d, navail %u, nirq %d.\n", iaq->nrxq,
2867                             iaq->nofldrxq, itype, navail, iaq->nirq);
2868                         goto done;
2869                 }
2870         } while (old_nirq != iaq->nirq);
2871
2872         /* One interrupt for everything.  Ugh. */
2873         device_printf(sc->dev, "running with minimal number of queues.  "
2874             "itype %d, navail %u.\n", itype, navail);
2875         iaq->nirq = 1;
2876         MPASS(iaq->nrxq == 1);
2877         iaq->ntxq = 1;
2878         if (iaq->nofldrxq > 1)
2879                 iaq->nofldtxq = 1;
2880 done:
2881         MPASS(iaq->num_vis > 0);
2882         if (iaq->num_vis > 1) {
2883                 MPASS(iaq->nrxq_vi > 0);
2884                 MPASS(iaq->ntxq_vi > 0);
2885         }
2886         MPASS(iaq->nirq > 0);
2887         MPASS(iaq->nrxq > 0);
2888         MPASS(iaq->ntxq > 0);
2889         if (itype == INTR_MSI) {
2890                 MPASS(powerof2(iaq->nirq));
2891         }
2892 }
2893
2894 static int
2895 cfg_itype_and_nqueues(struct adapter *sc, struct intrs_and_queues *iaq)
2896 {
2897         int rc, itype, navail, nalloc;
2898
2899         for (itype = INTR_MSIX; itype; itype >>= 1) {
2900
2901                 if ((itype & t4_intr_types) == 0)
2902                         continue;       /* not allowed */
2903
2904                 if (itype == INTR_MSIX)
2905                         navail = pci_msix_count(sc->dev);
2906                 else if (itype == INTR_MSI)
2907                         navail = pci_msi_count(sc->dev);
2908                 else
2909                         navail = 1;
2910 restart:
2911                 if (navail == 0)
2912                         continue;
2913
2914                 calculate_iaq(sc, iaq, itype, navail);
2915                 nalloc = iaq->nirq;
2916                 rc = 0;
2917                 if (itype == INTR_MSIX)
2918                         rc = pci_alloc_msix(sc->dev, &nalloc);
2919                 else if (itype == INTR_MSI)
2920                         rc = pci_alloc_msi(sc->dev, &nalloc);
2921
2922                 if (rc == 0 && nalloc > 0) {
2923                         if (nalloc == iaq->nirq)
2924                                 return (0);
2925
2926                         /*
2927                          * Didn't get the number requested.  Use whatever number
2928                          * the kernel is willing to allocate.
2929                          */
2930                         device_printf(sc->dev, "fewer vectors than requested, "
2931                             "type=%d, req=%d, rcvd=%d; will downshift req.\n",
2932                             itype, iaq->nirq, nalloc);
2933                         pci_release_msi(sc->dev);
2934                         navail = nalloc;
2935                         goto restart;
2936                 }
2937
2938                 device_printf(sc->dev,
2939                     "failed to allocate vectors:%d, type=%d, req=%d, rcvd=%d\n",
2940                     itype, rc, iaq->nirq, nalloc);
2941         }
2942
2943         device_printf(sc->dev,
2944             "failed to find a usable interrupt type.  "
2945             "allowed=%d, msi-x=%d, msi=%d, intx=1", t4_intr_types,
2946             pci_msix_count(sc->dev), pci_msi_count(sc->dev));
2947
2948         return (ENXIO);
2949 }
2950
2951 #define FW_VERSION(chip) ( \
2952     V_FW_HDR_FW_VER_MAJOR(chip##FW_VERSION_MAJOR) | \
2953     V_FW_HDR_FW_VER_MINOR(chip##FW_VERSION_MINOR) | \
2954     V_FW_HDR_FW_VER_MICRO(chip##FW_VERSION_MICRO) | \
2955     V_FW_HDR_FW_VER_BUILD(chip##FW_VERSION_BUILD))
2956 #define FW_INTFVER(chip, intf) (chip##FW_HDR_INTFVER_##intf)
2957
2958 struct fw_info {
2959         uint8_t chip;
2960         char *kld_name;
2961         char *fw_mod_name;
2962         struct fw_hdr fw_hdr;   /* XXX: waste of space, need a sparse struct */
2963 } fw_info[] = {
2964         {
2965                 .chip = CHELSIO_T4,
2966                 .kld_name = "t4fw_cfg",
2967                 .fw_mod_name = "t4fw",
2968                 .fw_hdr = {
2969                         .chip = FW_HDR_CHIP_T4,
2970                         .fw_ver = htobe32_const(FW_VERSION(T4)),
2971                         .intfver_nic = FW_INTFVER(T4, NIC),
2972                         .intfver_vnic = FW_INTFVER(T4, VNIC),
2973                         .intfver_ofld = FW_INTFVER(T4, OFLD),
2974                         .intfver_ri = FW_INTFVER(T4, RI),
2975                         .intfver_iscsipdu = FW_INTFVER(T4, ISCSIPDU),
2976                         .intfver_iscsi = FW_INTFVER(T4, ISCSI),
2977                         .intfver_fcoepdu = FW_INTFVER(T4, FCOEPDU),
2978                         .intfver_fcoe = FW_INTFVER(T4, FCOE),
2979                 },
2980         }, {
2981                 .chip = CHELSIO_T5,
2982                 .kld_name = "t5fw_cfg",
2983                 .fw_mod_name = "t5fw",
2984                 .fw_hdr = {
2985                         .chip = FW_HDR_CHIP_T5,
2986                         .fw_ver = htobe32_const(FW_VERSION(T5)),
2987                         .intfver_nic = FW_INTFVER(T5, NIC),
2988                         .intfver_vnic = FW_INTFVER(T5, VNIC),
2989                         .intfver_ofld = FW_INTFVER(T5, OFLD),
2990                         .intfver_ri = FW_INTFVER(T5, RI),
2991                         .intfver_iscsipdu = FW_INTFVER(T5, ISCSIPDU),
2992                         .intfver_iscsi = FW_INTFVER(T5, ISCSI),
2993                         .intfver_fcoepdu = FW_INTFVER(T5, FCOEPDU),
2994                         .intfver_fcoe = FW_INTFVER(T5, FCOE),
2995                 },
2996         }, {
2997                 .chip = CHELSIO_T6,
2998                 .kld_name = "t6fw_cfg",
2999                 .fw_mod_name = "t6fw",
3000                 .fw_hdr = {
3001                         .chip = FW_HDR_CHIP_T6,
3002                         .fw_ver = htobe32_const(FW_VERSION(T6)),
3003                         .intfver_nic = FW_INTFVER(T6, NIC),
3004                         .intfver_vnic = FW_INTFVER(T6, VNIC),
3005                         .intfver_ofld = FW_INTFVER(T6, OFLD),
3006                         .intfver_ri = FW_INTFVER(T6, RI),
3007                         .intfver_iscsipdu = FW_INTFVER(T6, ISCSIPDU),
3008                         .intfver_iscsi = FW_INTFVER(T6, ISCSI),
3009                         .intfver_fcoepdu = FW_INTFVER(T6, FCOEPDU),
3010                         .intfver_fcoe = FW_INTFVER(T6, FCOE),
3011                 },
3012         }
3013 };
3014
3015 static struct fw_info *
3016 find_fw_info(int chip)
3017 {
3018         int i;
3019
3020         for (i = 0; i < nitems(fw_info); i++) {
3021                 if (fw_info[i].chip == chip)
3022                         return (&fw_info[i]);
3023         }
3024         return (NULL);
3025 }
3026
3027 /*
3028  * Is the given firmware API compatible with the one the driver was compiled
3029  * with?
3030  */
3031 static int
3032 fw_compatible(const struct fw_hdr *hdr1, const struct fw_hdr *hdr2)
3033 {
3034
3035         /* short circuit if it's the exact same firmware version */
3036         if (hdr1->chip == hdr2->chip && hdr1->fw_ver == hdr2->fw_ver)
3037                 return (1);
3038
3039         /*
3040          * XXX: Is this too conservative?  Perhaps I should limit this to the
3041          * features that are supported in the driver.
3042          */
3043 #define SAME_INTF(x) (hdr1->intfver_##x == hdr2->intfver_##x)
3044         if (hdr1->chip == hdr2->chip && SAME_INTF(nic) && SAME_INTF(vnic) &&
3045             SAME_INTF(ofld) && SAME_INTF(ri) && SAME_INTF(iscsipdu) &&
3046             SAME_INTF(iscsi) && SAME_INTF(fcoepdu) && SAME_INTF(fcoe))
3047                 return (1);
3048 #undef SAME_INTF
3049
3050         return (0);
3051 }
3052
3053 /*
3054  * The firmware in the KLD is usable, but should it be installed?  This routine
3055  * explains itself in detail if it indicates the KLD firmware should be
3056  * installed.
3057  */
3058 static int
3059 should_install_kld_fw(struct adapter *sc, int card_fw_usable, int k, int c)
3060 {
3061         const char *reason;
3062
3063         if (!card_fw_usable) {
3064                 reason = "incompatible or unusable";
3065                 goto install;
3066         }
3067
3068         if (k > c) {
3069                 reason = "older than the version bundled with this driver";
3070                 goto install;
3071         }
3072
3073         if (t4_fw_install == 2 && k != c) {
3074                 reason = "different than the version bundled with this driver";
3075                 goto install;
3076         }
3077
3078         return (0);
3079
3080 install:
3081         if (t4_fw_install == 0) {
3082                 device_printf(sc->dev, "firmware on card (%u.%u.%u.%u) is %s, "
3083                     "but the driver is prohibited from installing a different "
3084                     "firmware on the card.\n",
3085                     G_FW_HDR_FW_VER_MAJOR(c), G_FW_HDR_FW_VER_MINOR(c),
3086                     G_FW_HDR_FW_VER_MICRO(c), G_FW_HDR_FW_VER_BUILD(c), reason);
3087
3088                 return (0);
3089         }
3090
3091         device_printf(sc->dev, "firmware on card (%u.%u.%u.%u) is %s, "
3092             "installing firmware %u.%u.%u.%u on card.\n",
3093             G_FW_HDR_FW_VER_MAJOR(c), G_FW_HDR_FW_VER_MINOR(c),
3094             G_FW_HDR_FW_VER_MICRO(c), G_FW_HDR_FW_VER_BUILD(c), reason,
3095             G_FW_HDR_FW_VER_MAJOR(k), G_FW_HDR_FW_VER_MINOR(k),
3096             G_FW_HDR_FW_VER_MICRO(k), G_FW_HDR_FW_VER_BUILD(k));
3097
3098         return (1);
3099 }
3100
3101 /*
3102  * Establish contact with the firmware and determine if we are the master driver
3103  * or not, and whether we are responsible for chip initialization.
3104  */
3105 static int
3106 prep_firmware(struct adapter *sc)
3107 {
3108         const struct firmware *fw = NULL, *default_cfg;
3109         int rc, pf, card_fw_usable, kld_fw_usable, need_fw_reset = 1;
3110         enum dev_state state;
3111         struct fw_info *fw_info;
3112         struct fw_hdr *card_fw;         /* fw on the card */
3113         const struct fw_hdr *kld_fw;    /* fw in the KLD */
3114         const struct fw_hdr *drv_fw;    /* fw header the driver was compiled
3115                                            against */
3116
3117         /* This is the firmware whose headers the driver was compiled against */
3118         fw_info = find_fw_info(chip_id(sc));
3119         if (fw_info == NULL) {
3120                 device_printf(sc->dev,
3121                     "unable to look up firmware information for chip %d.\n",
3122                     chip_id(sc));
3123                 return (EINVAL);
3124         }
3125         drv_fw = &fw_info->fw_hdr;
3126
3127         /*
3128          * The firmware KLD contains many modules.  The KLD name is also the
3129          * name of the module that contains the default config file.
3130          */
3131         default_cfg = firmware_get(fw_info->kld_name);
3132
3133         /* This is the firmware in the KLD */
3134         fw = firmware_get(fw_info->fw_mod_name);
3135         if (fw != NULL) {
3136                 kld_fw = (const void *)fw->data;
3137                 kld_fw_usable = fw_compatible(drv_fw, kld_fw);
3138         } else {
3139                 kld_fw = NULL;
3140                 kld_fw_usable = 0;
3141         }
3142
3143         /* Read the header of the firmware on the card */
3144         card_fw = malloc(sizeof(*card_fw), M_CXGBE, M_ZERO | M_WAITOK);
3145         rc = -t4_read_flash(sc, FLASH_FW_START,
3146             sizeof (*card_fw) / sizeof (uint32_t), (uint32_t *)card_fw, 1);
3147         if (rc == 0) {
3148                 card_fw_usable = fw_compatible(drv_fw, (const void*)card_fw);
3149                 if (card_fw->fw_ver == be32toh(0xffffffff)) {
3150                         uint32_t d = be32toh(kld_fw->fw_ver);
3151
3152                         if (!kld_fw_usable) {
3153                                 device_printf(sc->dev,
3154                                     "no firmware on the card and no usable "
3155                                     "firmware bundled with the driver.\n");
3156                                 rc = EIO;
3157                                 goto done;
3158                         } else if (t4_fw_install == 0) {
3159                                 device_printf(sc->dev,
3160                                     "no firmware on the card and the driver "
3161                                     "is prohibited from installing new "
3162                                     "firmware.\n");
3163                                 rc = EIO;
3164                                 goto done;
3165                         }
3166
3167                         device_printf(sc->dev, "no firmware on the card, "
3168                             "installing firmware %d.%d.%d.%d\n",
3169                             G_FW_HDR_FW_VER_MAJOR(d), G_FW_HDR_FW_VER_MINOR(d),
3170                             G_FW_HDR_FW_VER_MICRO(d), G_FW_HDR_FW_VER_BUILD(d));
3171                         rc = t4_fw_forceinstall(sc, fw->data, fw->datasize);
3172                         if (rc < 0) {
3173                                 rc = -rc;
3174                                 device_printf(sc->dev,
3175                                     "firmware install failed: %d.\n", rc);
3176                                 goto done;
3177                         }
3178                         memcpy(card_fw, kld_fw, sizeof(*card_fw));
3179                         card_fw_usable = 1;
3180                         need_fw_reset = 0;
3181                 }
3182         } else {
3183                 device_printf(sc->dev,
3184                     "Unable to read card's firmware header: %d\n", rc);
3185                 card_fw_usable = 0;
3186         }
3187
3188         /* Contact firmware. */
3189         rc = t4_fw_hello(sc, sc->mbox, sc->mbox, MASTER_MAY, &state);
3190         if (rc < 0 || state == DEV_STATE_ERR) {
3191                 rc = -rc;
3192                 device_printf(sc->dev,
3193                     "failed to connect to the firmware: %d, %d.\n", rc, state);
3194                 goto done;
3195         }
3196         pf = rc;
3197         if (pf == sc->mbox)
3198                 sc->flags |= MASTER_PF;
3199         else if (state == DEV_STATE_UNINIT) {
3200                 /*
3201                  * We didn't get to be the master so we definitely won't be
3202                  * configuring the chip.  It's a bug if someone else hasn't
3203                  * configured it already.
3204                  */
3205                 device_printf(sc->dev, "couldn't be master(%d), "
3206                     "device not already initialized either(%d).\n", rc, state);
3207                 rc = EPROTO;
3208                 goto done;
3209         }
3210
3211         if (card_fw_usable && card_fw->fw_ver == drv_fw->fw_ver &&
3212             (!kld_fw_usable || kld_fw->fw_ver == drv_fw->fw_ver)) {
3213                 /*
3214                  * Common case: the firmware on the card is an exact match and
3215                  * the KLD is an exact match too, or the KLD is
3216                  * absent/incompatible.  Note that t4_fw_install = 2 is ignored
3217                  * here -- use cxgbetool loadfw if you want to reinstall the
3218                  * same firmware as the one on the card.
3219                  */
3220         } else if (kld_fw_usable && state == DEV_STATE_UNINIT &&
3221             should_install_kld_fw(sc, card_fw_usable, be32toh(kld_fw->fw_ver),
3222             be32toh(card_fw->fw_ver))) {
3223
3224                 rc = -t4_fw_upgrade(sc, sc->mbox, fw->data, fw->datasize, 0);
3225                 if (rc != 0) {
3226                         device_printf(sc->dev,
3227                             "failed to install firmware: %d\n", rc);
3228                         goto done;
3229                 }
3230
3231                 /* Installed successfully, update the cached header too. */
3232                 memcpy(card_fw, kld_fw, sizeof(*card_fw));
3233                 card_fw_usable = 1;
3234                 need_fw_reset = 0;      /* already reset as part of load_fw */
3235         }
3236
3237         if (!card_fw_usable) {
3238                 uint32_t d, c, k;
3239
3240                 d = ntohl(drv_fw->fw_ver);
3241                 c = ntohl(card_fw->fw_ver);
3242                 k = kld_fw ? ntohl(kld_fw->fw_ver) : 0;
3243
3244                 device_printf(sc->dev, "Cannot find a usable firmware: "
3245                     "fw_install %d, chip state %d, "
3246                     "driver compiled with %d.%d.%d.%d, "
3247                     "card has %d.%d.%d.%d, KLD has %d.%d.%d.%d\n",
3248                     t4_fw_install, state,
3249                     G_FW_HDR_FW_VER_MAJOR(d), G_FW_HDR_FW_VER_MINOR(d),
3250                     G_FW_HDR_FW_VER_MICRO(d), G_FW_HDR_FW_VER_BUILD(d),
3251                     G_FW_HDR_FW_VER_MAJOR(c), G_FW_HDR_FW_VER_MINOR(c),
3252                     G_FW_HDR_FW_VER_MICRO(c), G_FW_HDR_FW_VER_BUILD(c),
3253                     G_FW_HDR_FW_VER_MAJOR(k), G_FW_HDR_FW_VER_MINOR(k),
3254                     G_FW_HDR_FW_VER_MICRO(k), G_FW_HDR_FW_VER_BUILD(k));
3255                 rc = EINVAL;
3256                 goto done;
3257         }
3258
3259         /* Reset device */
3260         if (need_fw_reset &&
3261             (rc = -t4_fw_reset(sc, sc->mbox, F_PIORSTMODE | F_PIORST)) != 0) {
3262                 device_printf(sc->dev, "firmware reset failed: %d.\n", rc);
3263                 if (rc != ETIMEDOUT && rc != EIO)
3264                         t4_fw_bye(sc, sc->mbox);
3265                 goto done;
3266         }
3267         sc->flags |= FW_OK;
3268
3269         rc = get_params__pre_init(sc);
3270         if (rc != 0)
3271                 goto done; /* error message displayed already */
3272
3273         /* Partition adapter resources as specified in the config file. */
3274         if (state == DEV_STATE_UNINIT) {
3275
3276                 KASSERT(sc->flags & MASTER_PF,
3277                     ("%s: trying to change chip settings when not master.",
3278                     __func__));
3279
3280                 rc = partition_resources(sc, default_cfg, fw_info->kld_name);
3281                 if (rc != 0)
3282                         goto done;      /* error message displayed already */
3283
3284                 t4_tweak_chip_settings(sc);
3285
3286                 /* get basic stuff going */
3287                 rc = -t4_fw_initialize(sc, sc->mbox);
3288                 if (rc != 0) {
3289                         device_printf(sc->dev, "fw init failed: %d.\n", rc);
3290                         goto done;
3291                 }
3292         } else {
3293                 snprintf(sc->cfg_file, sizeof(sc->cfg_file), "pf%d", pf);
3294                 sc->cfcsum = 0;
3295         }
3296
3297 done:
3298         free(card_fw, M_CXGBE);
3299         if (fw != NULL)
3300                 firmware_put(fw, FIRMWARE_UNLOAD);
3301         if (default_cfg != NULL)
3302                 firmware_put(default_cfg, FIRMWARE_UNLOAD);
3303
3304         return (rc);
3305 }
3306
3307 #define FW_PARAM_DEV(param) \
3308         (V_FW_PARAMS_MNEM(FW_PARAMS_MNEM_DEV) | \
3309          V_FW_PARAMS_PARAM_X(FW_PARAMS_PARAM_DEV_##param))
3310 #define FW_PARAM_PFVF(param) \
3311         (V_FW_PARAMS_MNEM(FW_PARAMS_MNEM_PFVF) | \
3312          V_FW_PARAMS_PARAM_X(FW_PARAMS_PARAM_PFVF_##param))
3313
3314 /*
3315  * Partition chip resources for use between various PFs, VFs, etc.
3316  */
3317 static int
3318 partition_resources(struct adapter *sc, const struct firmware *default_cfg,
3319     const char *name_prefix)
3320 {
3321         const struct firmware *cfg = NULL;
3322         int rc = 0;
3323         struct fw_caps_config_cmd caps;
3324         uint32_t mtype, moff, finicsum, cfcsum;
3325
3326         /*
3327          * Figure out what configuration file to use.  Pick the default config
3328          * file for the card if the user hasn't specified one explicitly.
3329          */
3330         snprintf(sc->cfg_file, sizeof(sc->cfg_file), "%s", t4_cfg_file);
3331         if (strncmp(t4_cfg_file, DEFAULT_CF, sizeof(t4_cfg_file)) == 0) {
3332                 /* Card specific overrides go here. */
3333                 if (pci_get_device(sc->dev) == 0x440a)
3334                         snprintf(sc->cfg_file, sizeof(sc->cfg_file), UWIRE_CF);
3335                 if (is_fpga(sc))
3336                         snprintf(sc->cfg_file, sizeof(sc->cfg_file), FPGA_CF);
3337         }
3338
3339         /*
3340          * We need to load another module if the profile is anything except
3341          * "default" or "flash".
3342          */
3343         if (strncmp(sc->cfg_file, DEFAULT_CF, sizeof(sc->cfg_file)) != 0 &&
3344             strncmp(sc->cfg_file, FLASH_CF, sizeof(sc->cfg_file)) != 0) {
3345                 char s[32];
3346
3347                 snprintf(s, sizeof(s), "%s_%s", name_prefix, sc->cfg_file);
3348                 cfg = firmware_get(s);
3349                 if (cfg == NULL) {
3350                         if (default_cfg != NULL) {
3351                                 device_printf(sc->dev,
3352                                     "unable to load module \"%s\" for "
3353                                     "configuration profile \"%s\", will use "
3354                                     "the default config file instead.\n",
3355                                     s, sc->cfg_file);
3356                                 snprintf(sc->cfg_file, sizeof(sc->cfg_file),
3357                                     "%s", DEFAULT_CF);
3358                         } else {
3359                                 device_printf(sc->dev,
3360                                     "unable to load module \"%s\" for "
3361                                     "configuration profile \"%s\", will use "
3362                                     "the config file on the card's flash "
3363                                     "instead.\n", s, sc->cfg_file);
3364                                 snprintf(sc->cfg_file, sizeof(sc->cfg_file),
3365                                     "%s", FLASH_CF);
3366                         }
3367                 }
3368         }
3369
3370         if (strncmp(sc->cfg_file, DEFAULT_CF, sizeof(sc->cfg_file)) == 0 &&
3371             default_cfg == NULL) {
3372                 device_printf(sc->dev,
3373                     "default config file not available, will use the config "
3374                     "file on the card's flash instead.\n");
3375                 snprintf(sc->cfg_file, sizeof(sc->cfg_file), "%s", FLASH_CF);
3376         }
3377
3378         if (strncmp(sc->cfg_file, FLASH_CF, sizeof(sc->cfg_file)) != 0) {
3379                 u_int cflen;
3380                 const uint32_t *cfdata;
3381                 uint32_t param, val, addr;
3382
3383                 KASSERT(cfg != NULL || default_cfg != NULL,
3384                     ("%s: no config to upload", __func__));
3385
3386                 /*
3387                  * Ask the firmware where it wants us to upload the config file.
3388                  */
3389                 param = FW_PARAM_DEV(CF);
3390                 rc = -t4_query_params(sc, sc->mbox, sc->pf, 0, 1, &param, &val);
3391                 if (rc != 0) {
3392                         /* No support for config file?  Shouldn't happen. */
3393                         device_printf(sc->dev,
3394                             "failed to query config file location: %d.\n", rc);
3395                         goto done;
3396                 }
3397                 mtype = G_FW_PARAMS_PARAM_Y(val);
3398                 moff = G_FW_PARAMS_PARAM_Z(val) << 16;
3399
3400                 /*
3401                  * XXX: sheer laziness.  We deliberately added 4 bytes of
3402                  * useless stuffing/comments at the end of the config file so
3403                  * it's ok to simply throw away the last remaining bytes when
3404                  * the config file is not an exact multiple of 4.  This also
3405                  * helps with the validate_mt_off_len check.
3406                  */
3407                 if (cfg != NULL) {
3408                         cflen = cfg->datasize & ~3;
3409                         cfdata = cfg->data;
3410                 } else {
3411                         cflen = default_cfg->datasize & ~3;
3412                         cfdata = default_cfg->data;
3413                 }
3414
3415                 if (cflen > FLASH_CFG_MAX_SIZE) {
3416                         device_printf(sc->dev,
3417                             "config file too long (%d, max allowed is %d).  "
3418                             "Will try to use the config on the card, if any.\n",
3419                             cflen, FLASH_CFG_MAX_SIZE);
3420                         goto use_config_on_flash;
3421                 }
3422
3423                 rc = validate_mt_off_len(sc, mtype, moff, cflen, &addr);
3424                 if (rc != 0) {
3425                         device_printf(sc->dev,
3426                             "%s: addr (%d/0x%x) or len %d is not valid: %d.  "
3427                             "Will try to use the config on the card, if any.\n",
3428                             __func__, mtype, moff, cflen, rc);
3429                         goto use_config_on_flash;
3430                 }
3431                 write_via_memwin(sc, 2, addr, cfdata, cflen);
3432         } else {
3433 use_config_on_flash:
3434                 mtype = FW_MEMTYPE_FLASH;
3435                 moff = t4_flash_cfg_addr(sc);
3436         }
3437
3438         bzero(&caps, sizeof(caps));
3439         caps.op_to_write = htobe32(V_FW_CMD_OP(FW_CAPS_CONFIG_CMD) |
3440             F_FW_CMD_REQUEST | F_FW_CMD_READ);
3441         caps.cfvalid_to_len16 = htobe32(F_FW_CAPS_CONFIG_CMD_CFVALID |
3442             V_FW_CAPS_CONFIG_CMD_MEMTYPE_CF(mtype) |
3443             V_FW_CAPS_CONFIG_CMD_MEMADDR64K_CF(moff >> 16) | FW_LEN16(caps));
3444         rc = -t4_wr_mbox(sc, sc->mbox, &caps, sizeof(caps), &caps);
3445         if (rc != 0) {
3446                 device_printf(sc->dev,
3447                     "failed to pre-process config file: %d "
3448                     "(mtype %d, moff 0x%x).\n", rc, mtype, moff);
3449                 goto done;
3450         }
3451
3452         finicsum = be32toh(caps.finicsum);
3453         cfcsum = be32toh(caps.cfcsum);
3454         if (finicsum != cfcsum) {
3455                 device_printf(sc->dev,
3456                     "WARNING: config file checksum mismatch: %08x %08x\n",
3457                     finicsum, cfcsum);
3458         }
3459         sc->cfcsum = cfcsum;
3460
3461 #define LIMIT_CAPS(x) do { \
3462         caps.x &= htobe16(t4_##x##_allowed); \
3463 } while (0)
3464
3465         /*
3466          * Let the firmware know what features will (not) be used so it can tune
3467          * things accordingly.
3468          */
3469         LIMIT_CAPS(nbmcaps);
3470         LIMIT_CAPS(linkcaps);
3471         LIMIT_CAPS(switchcaps);
3472         LIMIT_CAPS(niccaps);
3473         LIMIT_CAPS(toecaps);
3474         LIMIT_CAPS(rdmacaps);
3475         LIMIT_CAPS(cryptocaps);
3476         LIMIT_CAPS(iscsicaps);
3477         LIMIT_CAPS(fcoecaps);
3478 #undef LIMIT_CAPS
3479
3480         caps.op_to_write = htobe32(V_FW_CMD_OP(FW_CAPS_CONFIG_CMD) |
3481             F_FW_CMD_REQUEST | F_FW_CMD_WRITE);
3482         caps.cfvalid_to_len16 = htobe32(FW_LEN16(caps));
3483         rc = -t4_wr_mbox(sc, sc->mbox, &caps, sizeof(caps), NULL);
3484         if (rc != 0) {
3485                 device_printf(sc->dev,
3486                     "failed to process config file: %d.\n", rc);
3487         }
3488 done:
3489         if (cfg != NULL)
3490                 firmware_put(cfg, FIRMWARE_UNLOAD);
3491         return (rc);
3492 }
3493
3494 /*
3495  * Retrieve parameters that are needed (or nice to have) very early.
3496  */
3497 static int
3498 get_params__pre_init(struct adapter *sc)
3499 {
3500         int rc;
3501         uint32_t param[2], val[2];
3502
3503         t4_get_version_info(sc);
3504
3505         snprintf(sc->fw_version, sizeof(sc->fw_version), "%u.%u.%u.%u",
3506             G_FW_HDR_FW_VER_MAJOR(sc->params.fw_vers),
3507             G_FW_HDR_FW_VER_MINOR(sc->params.fw_vers),
3508             G_FW_HDR_FW_VER_MICRO(sc->params.fw_vers),
3509             G_FW_HDR_FW_VER_BUILD(sc->params.fw_vers));
3510
3511         snprintf(sc->bs_version, sizeof(sc->bs_version), "%u.%u.%u.%u",
3512             G_FW_HDR_FW_VER_MAJOR(sc->params.bs_vers),
3513             G_FW_HDR_FW_VER_MINOR(sc->params.bs_vers),
3514             G_FW_HDR_FW_VER_MICRO(sc->params.bs_vers),
3515             G_FW_HDR_FW_VER_BUILD(sc->params.bs_vers));
3516
3517         snprintf(sc->tp_version, sizeof(sc->tp_version), "%u.%u.%u.%u",
3518             G_FW_HDR_FW_VER_MAJOR(sc->params.tp_vers),
3519             G_FW_HDR_FW_VER_MINOR(sc->params.tp_vers),
3520             G_FW_HDR_FW_VER_MICRO(sc->params.tp_vers),
3521             G_FW_HDR_FW_VER_BUILD(sc->params.tp_vers));
3522
3523         snprintf(sc->er_version, sizeof(sc->er_version), "%u.%u.%u.%u",
3524             G_FW_HDR_FW_VER_MAJOR(sc->params.er_vers),
3525             G_FW_HDR_FW_VER_MINOR(sc->params.er_vers),
3526             G_FW_HDR_FW_VER_MICRO(sc->params.er_vers),
3527             G_FW_HDR_FW_VER_BUILD(sc->params.er_vers));
3528
3529         param[0] = FW_PARAM_DEV(PORTVEC);
3530         param[1] = FW_PARAM_DEV(CCLK);
3531         rc = -t4_query_params(sc, sc->mbox, sc->pf, 0, 2, param, val);
3532         if (rc != 0) {
3533                 device_printf(sc->dev,
3534                     "failed to query parameters (pre_init): %d.\n", rc);
3535                 return (rc);
3536         }
3537
3538         sc->params.portvec = val[0];
3539         sc->params.nports = bitcount32(val[0]);
3540         sc->params.vpd.cclk = val[1];
3541
3542         /* Read device log parameters. */
3543         rc = -t4_init_devlog_params(sc, 1);
3544         if (rc == 0)
3545                 fixup_devlog_params(sc);
3546         else {
3547                 device_printf(sc->dev,
3548                     "failed to get devlog parameters: %d.\n", rc);
3549                 rc = 0; /* devlog isn't critical for device operation */
3550         }
3551
3552         return (rc);
3553 }
3554
3555 /*
3556  * Retrieve various parameters that are of interest to the driver.  The device
3557  * has been initialized by the firmware at this point.
3558  */
3559 static int
3560 get_params__post_init(struct adapter *sc)
3561 {
3562         int rc;
3563         uint32_t param[7], val[7];
3564         struct fw_caps_config_cmd caps;
3565
3566         param[0] = FW_PARAM_PFVF(IQFLINT_START);
3567         param[1] = FW_PARAM_PFVF(EQ_START);
3568         param[2] = FW_PARAM_PFVF(FILTER_START);
3569         param[3] = FW_PARAM_PFVF(FILTER_END);
3570         param[4] = FW_PARAM_PFVF(L2T_START);
3571         param[5] = FW_PARAM_PFVF(L2T_END);
3572         param[6] = V_FW_PARAMS_MNEM(FW_PARAMS_MNEM_DEV) |
3573             V_FW_PARAMS_PARAM_X(FW_PARAMS_PARAM_DEV_DIAG) |
3574             V_FW_PARAMS_PARAM_Y(FW_PARAM_DEV_DIAG_VDD);
3575         rc = -t4_query_params(sc, sc->mbox, sc->pf, 0, 7, param, val);
3576         if (rc != 0) {
3577                 device_printf(sc->dev,
3578                     "failed to query parameters (post_init): %d.\n", rc);
3579                 return (rc);
3580         }
3581
3582         sc->sge.iq_start = val[0];
3583         sc->sge.eq_start = val[1];
3584         sc->tids.ftid_base = val[2];
3585         sc->tids.nftids = val[3] - val[2] + 1;
3586         sc->params.ftid_min = val[2];
3587         sc->params.ftid_max = val[3];
3588         sc->vres.l2t.start = val[4];
3589         sc->vres.l2t.size = val[5] - val[4] + 1;
3590         KASSERT(sc->vres.l2t.size <= L2T_SIZE,
3591             ("%s: L2 table size (%u) larger than expected (%u)",
3592             __func__, sc->vres.l2t.size, L2T_SIZE));
3593         sc->params.core_vdd = val[6];
3594
3595         /*
3596          * MPSBGMAP is queried separately because only recent firmwares support
3597          * it as a parameter and we don't want the compound query above to fail
3598          * on older firmwares.
3599          */
3600         param[0] = FW_PARAM_DEV(MPSBGMAP);
3601         val[0] = 0;
3602         rc = -t4_query_params(sc, sc->mbox, sc->pf, 0, 1, param, val);
3603         if (rc == 0)
3604                 sc->params.mps_bg_map = val[0];
3605         else
3606                 sc->params.mps_bg_map = 0;
3607
3608         /* get capabilites */
3609         bzero(&caps, sizeof(caps));
3610         caps.op_to_write = htobe32(V_FW_CMD_OP(FW_CAPS_CONFIG_CMD) |
3611             F_FW_CMD_REQUEST | F_FW_CMD_READ);
3612         caps.cfvalid_to_len16 = htobe32(FW_LEN16(caps));
3613         rc = -t4_wr_mbox(sc, sc->mbox, &caps, sizeof(caps), &caps);
3614         if (rc != 0) {
3615                 device_printf(sc->dev,
3616                     "failed to get card capabilities: %d.\n", rc);
3617                 return (rc);
3618         }
3619
3620 #define READ_CAPS(x) do { \
3621         sc->x = htobe16(caps.x); \
3622 } while (0)
3623         READ_CAPS(nbmcaps);
3624         READ_CAPS(linkcaps);
3625         READ_CAPS(switchcaps);
3626         READ_CAPS(niccaps);
3627         READ_CAPS(toecaps);
3628         READ_CAPS(rdmacaps);
3629         READ_CAPS(cryptocaps);
3630         READ_CAPS(iscsicaps);
3631         READ_CAPS(fcoecaps);
3632
3633         /*
3634          * The firmware attempts memfree TOE configuration for -SO cards and
3635          * will report toecaps=0 if it runs out of resources (this depends on
3636          * the config file).  It may not report 0 for other capabilities
3637          * dependent on the TOE in this case.  Set them to 0 here so that the
3638          * driver doesn't bother tracking resources that will never be used.
3639          */
3640         if (sc->toecaps == 0) {
3641                 sc->iscsicaps = 0;
3642                 sc->rdmacaps = 0;
3643         }
3644
3645         if (sc->niccaps & FW_CAPS_CONFIG_NIC_ETHOFLD) {
3646                 param[0] = FW_PARAM_PFVF(ETHOFLD_START);
3647                 param[1] = FW_PARAM_PFVF(ETHOFLD_END);
3648                 param[2] = FW_PARAM_DEV(FLOWC_BUFFIFO_SZ);
3649                 rc = -t4_query_params(sc, sc->mbox, sc->pf, 0, 3, param, val);
3650                 if (rc != 0) {
3651                         device_printf(sc->dev,
3652                             "failed to query NIC parameters: %d.\n", rc);
3653                         return (rc);
3654                 }
3655                 sc->tids.etid_base = val[0];
3656                 sc->params.etid_min = val[0];
3657                 sc->tids.netids = val[1] - val[0] + 1;
3658                 sc->params.netids = sc->tids.netids;
3659                 sc->params.eo_wr_cred = val[2];
3660                 sc->params.ethoffload = 1;
3661         }
3662
3663         if (sc->toecaps) {
3664                 /* query offload-related parameters */
3665                 param[0] = FW_PARAM_DEV(NTID);
3666                 param[1] = FW_PARAM_PFVF(SERVER_START);
3667                 param[2] = FW_PARAM_PFVF(SERVER_END);
3668                 param[3] = FW_PARAM_PFVF(TDDP_START);
3669                 param[4] = FW_PARAM_PFVF(TDDP_END);
3670                 param[5] = FW_PARAM_DEV(FLOWC_BUFFIFO_SZ);
3671                 rc = -t4_query_params(sc, sc->mbox, sc->pf, 0, 6, param, val);
3672                 if (rc != 0) {
3673                         device_printf(sc->dev,
3674                             "failed to query TOE parameters: %d.\n", rc);
3675                         return (rc);
3676                 }
3677                 sc->tids.ntids = val[0];
3678                 sc->tids.natids = min(sc->tids.ntids / 2, MAX_ATIDS);
3679                 sc->tids.stid_base = val[1];
3680                 sc->tids.nstids = val[2] - val[1] + 1;
3681                 sc->vres.ddp.start = val[3];
3682                 sc->vres.ddp.size = val[4] - val[3] + 1;
3683                 sc->params.ofldq_wr_cred = val[5];
3684                 sc->params.offload = 1;
3685         }
3686         if (sc->rdmacaps) {
3687                 param[0] = FW_PARAM_PFVF(STAG_START);
3688                 param[1] = FW_PARAM_PFVF(STAG_END);
3689                 param[2] = FW_PARAM_PFVF(RQ_START);
3690                 param[3] = FW_PARAM_PFVF(RQ_END);
3691                 param[4] = FW_PARAM_PFVF(PBL_START);
3692                 param[5] = FW_PARAM_PFVF(PBL_END);
3693                 rc = -t4_query_params(sc, sc->mbox, sc->pf, 0, 6, param, val);
3694                 if (rc != 0) {
3695                         device_printf(sc->dev,
3696                             "failed to query RDMA parameters(1): %d.\n", rc);
3697                         return (rc);
3698                 }
3699                 sc->vres.stag.start = val[0];
3700                 sc->vres.stag.size = val[1] - val[0] + 1;
3701                 sc->vres.rq.start = val[2];
3702                 sc->vres.rq.size = val[3] - val[2] + 1;
3703                 sc->vres.pbl.start = val[4];
3704                 sc->vres.pbl.size = val[5] - val[4] + 1;
3705
3706                 param[0] = FW_PARAM_PFVF(SQRQ_START);
3707                 param[1] = FW_PARAM_PFVF(SQRQ_END);
3708                 param[2] = FW_PARAM_PFVF(CQ_START);
3709                 param[3] = FW_PARAM_PFVF(CQ_END);
3710                 param[4] = FW_PARAM_PFVF(OCQ_START);
3711                 param[5] = FW_PARAM_PFVF(OCQ_END);
3712                 rc = -t4_query_params(sc, sc->mbox, sc->pf, 0, 6, param, val);
3713                 if (rc != 0) {
3714                         device_printf(sc->dev,
3715                             "failed to query RDMA parameters(2): %d.\n", rc);
3716                         return (rc);
3717                 }
3718                 sc->vres.qp.start = val[0];
3719                 sc->vres.qp.size = val[1] - val[0] + 1;
3720                 sc->vres.cq.start = val[2];
3721                 sc->vres.cq.size = val[3] - val[2] + 1;
3722                 sc->vres.ocq.start = val[4];
3723                 sc->vres.ocq.size = val[5] - val[4] + 1;
3724
3725                 param[0] = FW_PARAM_PFVF(SRQ_START);
3726                 param[1] = FW_PARAM_PFVF(SRQ_END);
3727                 param[2] = FW_PARAM_DEV(MAXORDIRD_QP);
3728                 param[3] = FW_PARAM_DEV(MAXIRD_ADAPTER);
3729                 rc = -t4_query_params(sc, sc->mbox, sc->pf, 0, 4, param, val);
3730                 if (rc != 0) {
3731                         device_printf(sc->dev,
3732                             "failed to query RDMA parameters(3): %d.\n", rc);
3733                         return (rc);
3734                 }
3735                 sc->vres.srq.start = val[0];
3736                 sc->vres.srq.size = val[1] - val[0] + 1;
3737                 sc->params.max_ordird_qp = val[2];
3738                 sc->params.max_ird_adapter = val[3];
3739         }
3740         if (sc->iscsicaps) {
3741                 param[0] = FW_PARAM_PFVF(ISCSI_START);
3742                 param[1] = FW_PARAM_PFVF(ISCSI_END);
3743                 rc = -t4_query_params(sc, sc->mbox, sc->pf, 0, 2, param, val);
3744                 if (rc != 0) {
3745                         device_printf(sc->dev,
3746                             "failed to query iSCSI parameters: %d.\n", rc);
3747                         return (rc);
3748                 }
3749                 sc->vres.iscsi.start = val[0];
3750                 sc->vres.iscsi.size = val[1] - val[0] + 1;
3751         }
3752         if (sc->cryptocaps & FW_CAPS_CONFIG_TLSKEYS) {
3753                 param[0] = FW_PARAM_PFVF(TLS_START);
3754                 param[1] = FW_PARAM_PFVF(TLS_END);
3755                 rc = -t4_query_params(sc, sc->mbox, sc->pf, 0, 2, param, val);
3756                 if (rc != 0) {
3757                         device_printf(sc->dev,
3758                             "failed to query TLS parameters: %d.\n", rc);
3759                         return (rc);
3760                 }
3761                 sc->vres.key.start = val[0];
3762                 sc->vres.key.size = val[1] - val[0] + 1;
3763         }
3764
3765         t4_init_sge_params(sc);
3766
3767         /*
3768          * We've got the params we wanted to query via the firmware.  Now grab
3769          * some others directly from the chip.
3770          */
3771         rc = t4_read_chip_settings(sc);
3772
3773         return (rc);
3774 }
3775
3776 static int
3777 set_params__post_init(struct adapter *sc)
3778 {
3779         uint32_t param, val;
3780 #ifdef TCP_OFFLOAD
3781         int i, v, shift;
3782 #endif
3783
3784         /* ask for encapsulated CPLs */
3785         param = FW_PARAM_PFVF(CPLFW4MSG_ENCAP);
3786         val = 1;
3787         (void)t4_set_params(sc, sc->mbox, sc->pf, 0, 1, &param, &val);
3788
3789 #ifdef TCP_OFFLOAD
3790         /*
3791          * Override the TOE timers with user provided tunables.  This is not the
3792          * recommended way to change the timers (the firmware config file is) so
3793          * these tunables are not documented.
3794          *
3795          * All the timer tunables are in microseconds.
3796          */
3797         if (t4_toe_keepalive_idle != 0) {
3798                 v = us_to_tcp_ticks(sc, t4_toe_keepalive_idle);
3799                 v &= M_KEEPALIVEIDLE;
3800                 t4_set_reg_field(sc, A_TP_KEEP_IDLE,
3801                     V_KEEPALIVEIDLE(M_KEEPALIVEIDLE), V_KEEPALIVEIDLE(v));
3802         }
3803         if (t4_toe_keepalive_interval != 0) {
3804                 v = us_to_tcp_ticks(sc, t4_toe_keepalive_interval);
3805                 v &= M_KEEPALIVEINTVL;
3806                 t4_set_reg_field(sc, A_TP_KEEP_INTVL,
3807                     V_KEEPALIVEINTVL(M_KEEPALIVEINTVL), V_KEEPALIVEINTVL(v));
3808         }
3809         if (t4_toe_keepalive_count != 0) {
3810                 v = t4_toe_keepalive_count & M_KEEPALIVEMAXR2;
3811                 t4_set_reg_field(sc, A_TP_SHIFT_CNT,
3812                     V_KEEPALIVEMAXR1(M_KEEPALIVEMAXR1) |
3813                     V_KEEPALIVEMAXR2(M_KEEPALIVEMAXR2),
3814                     V_KEEPALIVEMAXR1(1) | V_KEEPALIVEMAXR2(v));
3815         }
3816         if (t4_toe_rexmt_min != 0) {
3817                 v = us_to_tcp_ticks(sc, t4_toe_rexmt_min);
3818                 v &= M_RXTMIN;
3819                 t4_set_reg_field(sc, A_TP_RXT_MIN,
3820                     V_RXTMIN(M_RXTMIN), V_RXTMIN(v));
3821         }
3822         if (t4_toe_rexmt_max != 0) {
3823                 v = us_to_tcp_ticks(sc, t4_toe_rexmt_max);
3824                 v &= M_RXTMAX;
3825                 t4_set_reg_field(sc, A_TP_RXT_MAX,
3826                     V_RXTMAX(M_RXTMAX), V_RXTMAX(v));
3827         }
3828         if (t4_toe_rexmt_count != 0) {
3829                 v = t4_toe_rexmt_count & M_RXTSHIFTMAXR2;
3830                 t4_set_reg_field(sc, A_TP_SHIFT_CNT,
3831                     V_RXTSHIFTMAXR1(M_RXTSHIFTMAXR1) |
3832                     V_RXTSHIFTMAXR2(M_RXTSHIFTMAXR2),
3833                     V_RXTSHIFTMAXR1(1) | V_RXTSHIFTMAXR2(v));
3834         }
3835         for (i = 0; i < nitems(t4_toe_rexmt_backoff); i++) {
3836                 if (t4_toe_rexmt_backoff[i] != -1) {
3837                         v = t4_toe_rexmt_backoff[i] & M_TIMERBACKOFFINDEX0;
3838                         shift = (i & 3) << 3;
3839                         t4_set_reg_field(sc, A_TP_TCP_BACKOFF_REG0 + (i & ~3),
3840                             M_TIMERBACKOFFINDEX0 << shift, v << shift);
3841                 }
3842         }
3843 #endif
3844         return (0);
3845 }
3846
3847 #undef FW_PARAM_PFVF
3848 #undef FW_PARAM_DEV
3849
3850 static void
3851 t4_set_desc(struct adapter *sc)
3852 {
3853         char buf[128];
3854         struct adapter_params *p = &sc->params;
3855
3856         snprintf(buf, sizeof(buf), "Chelsio %s", p->vpd.id);
3857
3858         device_set_desc_copy(sc->dev, buf);
3859 }
3860
3861 static void
3862 build_medialist(struct port_info *pi, struct ifmedia *media)
3863 {
3864         int m;
3865
3866         PORT_LOCK_ASSERT_OWNED(pi);
3867
3868         ifmedia_removeall(media);
3869
3870         /*
3871          * XXX: Would it be better to ifmedia_add all 4 combinations of pause
3872          * settings for every speed instead of just txpause|rxpause?  ifconfig
3873          * media display looks much better if autoselect is the only case where
3874          * ifm_current is different from ifm_active.  If the user picks anything
3875          * except txpause|rxpause the display is ugly.
3876          */
3877         m = IFM_ETHER | IFM_FDX | IFM_ETH_TXPAUSE | IFM_ETH_RXPAUSE;
3878
3879         switch(pi->port_type) {
3880         case FW_PORT_TYPE_BT_XFI:
3881         case FW_PORT_TYPE_BT_XAUI:
3882                 ifmedia_add(media, m | IFM_10G_T, 0, NULL);
3883                 /* fall through */
3884
3885         case FW_PORT_TYPE_BT_SGMII:
3886                 ifmedia_add(media, m | IFM_1000_T, 0, NULL);
3887                 ifmedia_add(media, m | IFM_100_TX, 0, NULL);
3888                 ifmedia_add(media, IFM_ETHER | IFM_AUTO, 0, NULL);
3889                 ifmedia_set(media, IFM_ETHER | IFM_AUTO);
3890                 break;
3891
3892         case FW_PORT_TYPE_CX4:
3893                 ifmedia_add(media, m | IFM_10G_CX4, 0, NULL);
3894                 ifmedia_set(media, m | IFM_10G_CX4);
3895                 break;
3896
3897         case FW_PORT_TYPE_QSFP_10G:
3898         case FW_PORT_TYPE_SFP:
3899         case FW_PORT_TYPE_FIBER_XFI:
3900         case FW_PORT_TYPE_FIBER_XAUI:
3901                 switch (pi->mod_type) {
3902
3903                 case FW_PORT_MOD_TYPE_LR:
3904                         ifmedia_add(media, m | IFM_10G_LR, 0, NULL);
3905                         ifmedia_set(media, m | IFM_10G_LR);
3906                         break;
3907
3908                 case FW_PORT_MOD_TYPE_SR:
3909                         ifmedia_add(media, m | IFM_10G_SR, 0, NULL);
3910                         ifmedia_set(media, m | IFM_10G_SR);
3911                         break;
3912
3913                 case FW_PORT_MOD_TYPE_LRM:
3914                         ifmedia_add(media, m | IFM_10G_LRM, 0, NULL);
3915                         ifmedia_set(media, m | IFM_10G_LRM);
3916                         break;
3917
3918                 case FW_PORT_MOD_TYPE_TWINAX_PASSIVE:
3919                 case FW_PORT_MOD_TYPE_TWINAX_ACTIVE:
3920                         ifmedia_add(media, m | IFM_10G_TWINAX, 0, NULL);
3921                         ifmedia_set(media, m | IFM_10G_TWINAX);
3922                         break;
3923
3924                 case FW_PORT_MOD_TYPE_NONE:
3925                         m &= ~IFM_FDX;
3926                         ifmedia_add(media, m | IFM_NONE, 0, NULL);
3927                         ifmedia_set(media, m | IFM_NONE);
3928                         break;
3929
3930                 case FW_PORT_MOD_TYPE_NA:
3931                 case FW_PORT_MOD_TYPE_ER:
3932                 default:
3933                         device_printf(pi->dev,
3934                             "unknown port_type (%d), mod_type (%d)\n",
3935                             pi->port_type, pi->mod_type);
3936                         ifmedia_add(media, m | IFM_UNKNOWN, 0, NULL);
3937                         ifmedia_set(media, m | IFM_UNKNOWN);
3938                         break;
3939                 }
3940                 break;
3941
3942         case FW_PORT_TYPE_CR_QSFP:
3943         case FW_PORT_TYPE_SFP28:
3944         case FW_PORT_TYPE_KR_SFP28:
3945                 switch (pi->mod_type) {
3946
3947                 case FW_PORT_MOD_TYPE_SR:
3948                         ifmedia_add(media, m | IFM_25G_SR, 0, NULL);
3949                         ifmedia_set(media, m | IFM_25G_SR);
3950                         break;
3951
3952                 case FW_PORT_MOD_TYPE_TWINAX_PASSIVE:
3953                 case FW_PORT_MOD_TYPE_TWINAX_ACTIVE:
3954                         ifmedia_add(media, m | IFM_25G_CR, 0, NULL);
3955                         ifmedia_set(media, m | IFM_25G_CR);
3956                         break;
3957
3958                 case FW_PORT_MOD_TYPE_NONE:
3959                         m &= ~IFM_FDX;
3960                         ifmedia_add(media, m | IFM_NONE, 0, NULL);
3961                         ifmedia_set(media, m | IFM_NONE);
3962                         break;
3963
3964                 default:
3965                         device_printf(pi->dev,
3966                             "unknown port_type (%d), mod_type (%d)\n",
3967                             pi->port_type, pi->mod_type);
3968                         ifmedia_add(media, m | IFM_UNKNOWN, 0, NULL);
3969                         ifmedia_set(media, m | IFM_UNKNOWN);
3970                         break;
3971                 }
3972                 break;
3973
3974         case FW_PORT_TYPE_QSFP:
3975                 switch (pi->mod_type) {
3976
3977                 case FW_PORT_MOD_TYPE_LR:
3978                         ifmedia_add(media, m | IFM_40G_LR4, 0, NULL);
3979                         ifmedia_set(media, m | IFM_40G_LR4);
3980                         break;
3981
3982                 case FW_PORT_MOD_TYPE_SR:
3983                         ifmedia_add(media, m | IFM_40G_SR4, 0, NULL);
3984                         ifmedia_set(media, m | IFM_40G_SR4);
3985                         break;
3986
3987                 case FW_PORT_MOD_TYPE_TWINAX_PASSIVE:
3988                 case FW_PORT_MOD_TYPE_TWINAX_ACTIVE:
3989                         ifmedia_add(media, m | IFM_40G_CR4, 0, NULL);
3990                         ifmedia_set(media, m | IFM_40G_CR4);
3991                         break;
3992
3993                 case FW_PORT_MOD_TYPE_NONE:
3994                         m &= ~IFM_FDX;
3995                         ifmedia_add(media, m | IFM_NONE, 0, NULL);
3996                         ifmedia_set(media, m | IFM_NONE);
3997                         break;
3998
3999                 default:
4000                         device_printf(pi->dev,
4001                             "unknown port_type (%d), mod_type (%d)\n",
4002                             pi->port_type, pi->mod_type);
4003                         ifmedia_add(media, m | IFM_UNKNOWN, 0, NULL);
4004                         ifmedia_set(media, m | IFM_UNKNOWN);
4005                         break;
4006                 }
4007                 break;
4008
4009         case FW_PORT_TYPE_KR4_100G:
4010         case FW_PORT_TYPE_CR4_QSFP:
4011                 switch (pi->mod_type) {
4012
4013                 case FW_PORT_MOD_TYPE_LR:
4014                         ifmedia_add(media, m | IFM_100G_LR4, 0, NULL);
4015                         ifmedia_set(media, m | IFM_100G_LR4);
4016                         break;
4017
4018                 case FW_PORT_MOD_TYPE_SR:
4019                         ifmedia_add(media, m | IFM_100G_SR4, 0, NULL);
4020                         ifmedia_set(media, m | IFM_100G_SR4);
4021                         break;
4022
4023                 case FW_PORT_MOD_TYPE_TWINAX_PASSIVE:
4024                 case FW_PORT_MOD_TYPE_TWINAX_ACTIVE:
4025                         ifmedia_add(media, m | IFM_100G_CR4, 0, NULL);
4026                         ifmedia_set(media, m | IFM_100G_CR4);
4027                         break;
4028
4029                 case FW_PORT_MOD_TYPE_NONE:
4030                         m &= ~IFM_FDX;
4031                         ifmedia_add(media, m | IFM_NONE, 0, NULL);
4032                         ifmedia_set(media, m | IFM_NONE);
4033                         break;
4034
4035                 default:
4036                         device_printf(pi->dev,
4037                             "unknown port_type (%d), mod_type (%d)\n",
4038                             pi->port_type, pi->mod_type);
4039                         ifmedia_add(media, m | IFM_UNKNOWN, 0, NULL);
4040                         ifmedia_set(media, m | IFM_UNKNOWN);
4041                         break;
4042                 }
4043                 break;
4044
4045         default:
4046                 device_printf(pi->dev,
4047                     "unknown port_type (%d), mod_type (%d)\n", pi->port_type,
4048                     pi->mod_type);
4049                 ifmedia_add(media, m | IFM_UNKNOWN, 0, NULL);
4050                 ifmedia_set(media, m | IFM_UNKNOWN);
4051                 break;
4052         }
4053 }
4054
4055 /*
4056  * Update all the requested_* fields in the link config and then send a mailbox
4057  * command to apply the settings.
4058  */
4059 static void
4060 init_l1cfg(struct port_info *pi)
4061 {
4062         struct adapter *sc = pi->adapter;
4063         struct link_config *lc = &pi->link_cfg;
4064         int rc;
4065
4066         ASSERT_SYNCHRONIZED_OP(sc);
4067
4068         lc->requested_speed = port_top_speed(pi);       /* in Gbps */
4069         if (t4_autoneg != 0 && lc->supported & FW_PORT_CAP_ANEG) {
4070                 lc->requested_aneg = AUTONEG_ENABLE;
4071         } else {
4072                 lc->requested_aneg = AUTONEG_DISABLE;
4073         }
4074
4075         lc->requested_fc = t4_pause_settings & (PAUSE_TX | PAUSE_RX);
4076
4077         if (t4_fec != -1) {
4078                 lc->requested_fec = t4_fec & (FEC_RS | FEC_BASER_RS |
4079                     FEC_RESERVED);
4080         } else {
4081                 /* Use the suggested value provided by the firmware in acaps */
4082                 if (lc->advertising & FW_PORT_CAP_FEC_RS)
4083                         lc->requested_fec = FEC_RS;
4084                 else if (lc->advertising & FW_PORT_CAP_FEC_BASER_RS)
4085                         lc->requested_fec = FEC_BASER_RS;
4086                 else
4087                         lc->requested_fec = 0;
4088         }
4089
4090         rc = -t4_link_l1cfg(sc, sc->mbox, pi->tx_chan, lc);
4091         if (rc != 0) {
4092                 device_printf(pi->dev, "l1cfg failed: %d\n", rc);
4093         } else {
4094                 lc->fc = lc->requested_fc;
4095                 lc->fec = lc->requested_fec;
4096         }
4097 }
4098
4099 #define FW_MAC_EXACT_CHUNK      7
4100
4101 /*
4102  * Program the port's XGMAC based on parameters in ifnet.  The caller also
4103  * indicates which parameters should be programmed (the rest are left alone).
4104  */
4105 int
4106 update_mac_settings(struct ifnet *ifp, int flags)
4107 {
4108         int rc = 0;
4109         struct vi_info *vi = ifp->if_softc;
4110         struct port_info *pi = vi->pi;
4111         struct adapter *sc = pi->adapter;
4112         int mtu = -1, promisc = -1, allmulti = -1, vlanex = -1;
4113
4114         ASSERT_SYNCHRONIZED_OP(sc);
4115         KASSERT(flags, ("%s: not told what to update.", __func__));
4116
4117         if (flags & XGMAC_MTU)
4118                 mtu = ifp->if_mtu;
4119
4120         if (flags & XGMAC_PROMISC)
4121                 promisc = ifp->if_flags & IFF_PROMISC ? 1 : 0;
4122
4123         if (flags & XGMAC_ALLMULTI)
4124                 allmulti = ifp->if_flags & IFF_ALLMULTI ? 1 : 0;
4125
4126         if (flags & XGMAC_VLANEX)
4127                 vlanex = ifp->if_capenable & IFCAP_VLAN_HWTAGGING ? 1 : 0;
4128
4129         if (flags & (XGMAC_MTU|XGMAC_PROMISC|XGMAC_ALLMULTI|XGMAC_VLANEX)) {
4130                 rc = -t4_set_rxmode(sc, sc->mbox, vi->viid, mtu, promisc,
4131                     allmulti, 1, vlanex, false);
4132                 if (rc) {
4133                         if_printf(ifp, "set_rxmode (%x) failed: %d\n", flags,
4134                             rc);
4135                         return (rc);
4136                 }
4137         }
4138
4139         if (flags & XGMAC_UCADDR) {
4140                 uint8_t ucaddr[ETHER_ADDR_LEN];
4141
4142                 bcopy(IF_LLADDR(ifp), ucaddr, sizeof(ucaddr));
4143                 rc = t4_change_mac(sc, sc->mbox, vi->viid, vi->xact_addr_filt,
4144                     ucaddr, true, true);
4145                 if (rc < 0) {
4146                         rc = -rc;
4147                         if_printf(ifp, "change_mac failed: %d\n", rc);
4148                         return (rc);
4149                 } else {
4150                         vi->xact_addr_filt = rc;
4151                         rc = 0;
4152                 }
4153         }
4154
4155         if (flags & XGMAC_MCADDRS) {
4156                 const uint8_t *mcaddr[FW_MAC_EXACT_CHUNK];
4157                 int del = 1;
4158                 uint64_t hash = 0;
4159                 struct ifmultiaddr *ifma;
4160                 int i = 0, j;
4161
4162                 if_maddr_rlock(ifp);
4163                 TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
4164                         if (ifma->ifma_addr->sa_family != AF_LINK)
4165                                 continue;
4166                         mcaddr[i] =
4167                             LLADDR((struct sockaddr_dl *)ifma->ifma_addr);
4168                         MPASS(ETHER_IS_MULTICAST(mcaddr[i]));
4169                         i++;
4170
4171                         if (i == FW_MAC_EXACT_CHUNK) {
4172                                 rc = t4_alloc_mac_filt(sc, sc->mbox, vi->viid,
4173                                     del, i, mcaddr, NULL, &hash, 0);
4174                                 if (rc < 0) {
4175                                         rc = -rc;
4176                                         for (j = 0; j < i; j++) {
4177                                                 if_printf(ifp,
4178                                                     "failed to add mc address"
4179                                                     " %02x:%02x:%02x:"
4180                                                     "%02x:%02x:%02x rc=%d\n",
4181                                                     mcaddr[j][0], mcaddr[j][1],
4182                                                     mcaddr[j][2], mcaddr[j][3],
4183                                                     mcaddr[j][4], mcaddr[j][5],
4184                                                     rc);
4185                                         }
4186                                         goto mcfail;
4187                                 }
4188                                 del = 0;
4189                                 i = 0;
4190                         }
4191                 }
4192                 if (i > 0) {
4193                         rc = t4_alloc_mac_filt(sc, sc->mbox, vi->viid, del, i,
4194                             mcaddr, NULL, &hash, 0);
4195                         if (rc < 0) {
4196                                 rc = -rc;
4197                                 for (j = 0; j < i; j++) {
4198                                         if_printf(ifp,
4199                                             "failed to add mc address"
4200                                             " %02x:%02x:%02x:"
4201                                             "%02x:%02x:%02x rc=%d\n",
4202                                             mcaddr[j][0], mcaddr[j][1],
4203                                             mcaddr[j][2], mcaddr[j][3],
4204                                             mcaddr[j][4], mcaddr[j][5],
4205                                             rc);
4206                                 }
4207                                 goto mcfail;
4208                         }
4209                 }
4210
4211                 rc = -t4_set_addr_hash(sc, sc->mbox, vi->viid, 0, hash, 0);
4212                 if (rc != 0)
4213                         if_printf(ifp, "failed to set mc address hash: %d", rc);
4214 mcfail:
4215                 if_maddr_runlock(ifp);
4216         }
4217
4218         return (rc);
4219 }
4220
4221 /*
4222  * {begin|end}_synchronized_op must be called from the same thread.
4223  */
4224 int
4225 begin_synchronized_op(struct adapter *sc, struct vi_info *vi, int flags,
4226     char *wmesg)
4227 {
4228         int rc, pri;
4229
4230 #ifdef WITNESS
4231         /* the caller thinks it's ok to sleep, but is it really? */
4232         if (flags & SLEEP_OK)
4233                 WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK, NULL,
4234                     "begin_synchronized_op");
4235 #endif
4236
4237         if (INTR_OK)
4238                 pri = PCATCH;
4239         else
4240                 pri = 0;
4241
4242         ADAPTER_LOCK(sc);
4243         for (;;) {
4244
4245                 if (vi && IS_DOOMED(vi)) {
4246                         rc = ENXIO;
4247                         goto done;
4248                 }
4249
4250                 if (!IS_BUSY(sc)) {
4251                         rc = 0;
4252                         break;
4253                 }
4254
4255                 if (!(flags & SLEEP_OK)) {
4256                         rc = EBUSY;
4257                         goto done;
4258                 }
4259
4260                 if (mtx_sleep(&sc->flags, &sc->sc_lock, pri, wmesg, 0)) {
4261                         rc = EINTR;
4262                         goto done;
4263                 }
4264         }
4265
4266         KASSERT(!IS_BUSY(sc), ("%s: controller busy.", __func__));
4267         SET_BUSY(sc);
4268 #ifdef INVARIANTS
4269         sc->last_op = wmesg;
4270         sc->last_op_thr = curthread;
4271         sc->last_op_flags = flags;
4272 #endif
4273
4274 done:
4275         if (!(flags & HOLD_LOCK) || rc)
4276                 ADAPTER_UNLOCK(sc);
4277
4278         return (rc);
4279 }
4280
4281 /*
4282  * Tell if_ioctl and if_init that the VI is going away.  This is
4283  * special variant of begin_synchronized_op and must be paired with a
4284  * call to end_synchronized_op.
4285  */
4286 void
4287 doom_vi(struct adapter *sc, struct vi_info *vi)
4288 {
4289
4290         ADAPTER_LOCK(sc);
4291         SET_DOOMED(vi);
4292         wakeup(&sc->flags);
4293         while (IS_BUSY(sc))
4294                 mtx_sleep(&sc->flags, &sc->sc_lock, 0, "t4detach", 0);
4295         SET_BUSY(sc);
4296 #ifdef INVARIANTS
4297         sc->last_op = "t4detach";
4298         sc->last_op_thr = curthread;
4299         sc->last_op_flags = 0;
4300 #endif
4301         ADAPTER_UNLOCK(sc);
4302 }
4303
4304 /*
4305  * {begin|end}_synchronized_op must be called from the same thread.
4306  */
4307 void
4308 end_synchronized_op(struct adapter *sc, int flags)
4309 {
4310
4311         if (flags & LOCK_HELD)
4312                 ADAPTER_LOCK_ASSERT_OWNED(sc);
4313         else
4314                 ADAPTER_LOCK(sc);
4315
4316         KASSERT(IS_BUSY(sc), ("%s: controller not busy.", __func__));
4317         CLR_BUSY(sc);
4318         wakeup(&sc->flags);
4319         ADAPTER_UNLOCK(sc);
4320 }
4321
4322 static int
4323 cxgbe_init_synchronized(struct vi_info *vi)
4324 {
4325         struct port_info *pi = vi->pi;
4326         struct adapter *sc = pi->adapter;
4327         struct ifnet *ifp = vi->ifp;
4328         int rc = 0, i;
4329         struct sge_txq *txq;
4330
4331         ASSERT_SYNCHRONIZED_OP(sc);
4332
4333         if (ifp->if_drv_flags & IFF_DRV_RUNNING)
4334                 return (0);     /* already running */
4335
4336         if (!(sc->flags & FULL_INIT_DONE) &&
4337             ((rc = adapter_full_init(sc)) != 0))
4338                 return (rc);    /* error message displayed already */
4339
4340         if (!(vi->flags & VI_INIT_DONE) &&
4341             ((rc = vi_full_init(vi)) != 0))
4342                 return (rc); /* error message displayed already */
4343
4344         rc = update_mac_settings(ifp, XGMAC_ALL);
4345         if (rc)
4346                 goto done;      /* error message displayed already */
4347
4348         rc = -t4_enable_vi(sc, sc->mbox, vi->viid, true, true);
4349         if (rc != 0) {
4350                 if_printf(ifp, "enable_vi failed: %d\n", rc);
4351                 goto done;
4352         }
4353
4354         /*
4355          * Can't fail from this point onwards.  Review cxgbe_uninit_synchronized
4356          * if this changes.
4357          */
4358
4359         for_each_txq(vi, i, txq) {
4360                 TXQ_LOCK(txq);
4361                 txq->eq.flags |= EQ_ENABLED;
4362                 TXQ_UNLOCK(txq);
4363         }
4364
4365         /*
4366          * The first iq of the first port to come up is used for tracing.
4367          */
4368         if (sc->traceq < 0 && IS_MAIN_VI(vi)) {
4369                 sc->traceq = sc->sge.rxq[vi->first_rxq].iq.abs_id;
4370                 t4_write_reg(sc, is_t4(sc) ?  A_MPS_TRC_RSS_CONTROL :
4371                     A_MPS_T5_TRC_RSS_CONTROL, V_RSSCONTROL(pi->tx_chan) |
4372                     V_QUEUENUMBER(sc->traceq));
4373                 pi->flags |= HAS_TRACEQ;
4374         }
4375
4376         /* all ok */
4377         PORT_LOCK(pi);
4378         if (pi->up_vis++ == 0) {
4379                 t4_update_port_info(pi);
4380                 build_medialist(pi, &pi->media);
4381                 init_l1cfg(pi);
4382         }
4383         ifp->if_drv_flags |= IFF_DRV_RUNNING;
4384
4385         if (pi->nvi > 1 || sc->flags & IS_VF)
4386                 callout_reset(&vi->tick, hz, vi_tick, vi);
4387         else
4388                 callout_reset(&pi->tick, hz, cxgbe_tick, pi);
4389         PORT_UNLOCK(pi);
4390 done:
4391         if (rc != 0)
4392                 cxgbe_uninit_synchronized(vi);
4393
4394         return (rc);
4395 }
4396
4397 /*
4398  * Idempotent.
4399  */
4400 static int
4401 cxgbe_uninit_synchronized(struct vi_info *vi)
4402 {
4403         struct port_info *pi = vi->pi;
4404         struct adapter *sc = pi->adapter;
4405         struct ifnet *ifp = vi->ifp;
4406         int rc, i;
4407         struct sge_txq *txq;
4408
4409         ASSERT_SYNCHRONIZED_OP(sc);
4410
4411         if (!(vi->flags & VI_INIT_DONE)) {
4412                 if (__predict_false(ifp->if_drv_flags & IFF_DRV_RUNNING)) {
4413                         KASSERT(0, ("uninited VI is running"));
4414                         if_printf(ifp, "uninited VI with running ifnet.  "
4415                             "vi->flags 0x%016lx, if_flags 0x%08x, "
4416                             "if_drv_flags 0x%08x\n", vi->flags, ifp->if_flags,
4417                             ifp->if_drv_flags);
4418                 }
4419                 return (0);
4420         }
4421
4422         /*
4423          * Disable the VI so that all its data in either direction is discarded
4424          * by the MPS.  Leave everything else (the queues, interrupts, and 1Hz
4425          * tick) intact as the TP can deliver negative advice or data that it's
4426          * holding in its RAM (for an offloaded connection) even after the VI is
4427          * disabled.
4428          */
4429         rc = -t4_enable_vi(sc, sc->mbox, vi->viid, false, false);
4430         if (rc) {
4431                 if_printf(ifp, "disable_vi failed: %d\n", rc);
4432                 return (rc);
4433         }
4434
4435         for_each_txq(vi, i, txq) {
4436                 TXQ_LOCK(txq);
4437                 txq->eq.flags &= ~EQ_ENABLED;
4438                 TXQ_UNLOCK(txq);
4439         }
4440
4441         PORT_LOCK(pi);
4442         if (pi->nvi > 1 || sc->flags & IS_VF)
4443                 callout_stop(&vi->tick);
4444         else
4445                 callout_stop(&pi->tick);
4446         if (!(ifp->if_drv_flags & IFF_DRV_RUNNING)) {
4447                 PORT_UNLOCK(pi);
4448                 return (0);
4449         }
4450         ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
4451         pi->up_vis--;
4452         if (pi->up_vis > 0) {
4453                 PORT_UNLOCK(pi);
4454                 return (0);
4455         }
4456         PORT_UNLOCK(pi);
4457
4458         pi->link_cfg.link_ok = 0;
4459         pi->link_cfg.speed = 0;
4460         pi->link_cfg.link_down_rc = 255;
4461         t4_os_link_changed(pi);
4462         pi->old_link_cfg = pi->link_cfg;
4463
4464         return (0);
4465 }
4466
4467 /*
4468  * It is ok for this function to fail midway and return right away.  t4_detach
4469  * will walk the entire sc->irq list and clean up whatever is valid.
4470  */
4471 int
4472 t4_setup_intr_handlers(struct adapter *sc)
4473 {
4474         int rc, rid, p, q, v;
4475         char s[8];
4476         struct irq *irq;
4477         struct port_info *pi;
4478         struct vi_info *vi;
4479         struct sge *sge = &sc->sge;
4480         struct sge_rxq *rxq;
4481 #ifdef TCP_OFFLOAD
4482         struct sge_ofld_rxq *ofld_rxq;
4483 #endif
4484 #ifdef DEV_NETMAP
4485         struct sge_nm_rxq *nm_rxq;
4486 #endif
4487 #ifdef RSS
4488         int nbuckets = rss_getnumbuckets();
4489 #endif
4490
4491         /*
4492          * Setup interrupts.
4493          */
4494         irq = &sc->irq[0];
4495         rid = sc->intr_type == INTR_INTX ? 0 : 1;
4496         if (forwarding_intr_to_fwq(sc))
4497                 return (t4_alloc_irq(sc, irq, rid, t4_intr_all, sc, "all"));
4498
4499         /* Multiple interrupts. */
4500         if (sc->flags & IS_VF)
4501                 KASSERT(sc->intr_count >= T4VF_EXTRA_INTR + sc->params.nports,
4502                     ("%s: too few intr.", __func__));
4503         else
4504                 KASSERT(sc->intr_count >= T4_EXTRA_INTR + sc->params.nports,
4505                     ("%s: too few intr.", __func__));
4506
4507         /* The first one is always error intr on PFs */
4508         if (!(sc->flags & IS_VF)) {
4509                 rc = t4_alloc_irq(sc, irq, rid, t4_intr_err, sc, "err");
4510                 if (rc != 0)
4511                         return (rc);
4512                 irq++;
4513                 rid++;
4514         }
4515
4516         /* The second one is always the firmware event queue (first on VFs) */
4517         rc = t4_alloc_irq(sc, irq, rid, t4_intr_evt, &sge->fwq, "evt");
4518         if (rc != 0)
4519                 return (rc);
4520         irq++;
4521         rid++;
4522
4523         for_each_port(sc, p) {
4524                 pi = sc->port[p];
4525                 for_each_vi(pi, v, vi) {
4526                         vi->first_intr = rid - 1;
4527
4528                         if (vi->nnmrxq > 0) {
4529                                 int n = max(vi->nrxq, vi->nnmrxq);
4530
4531                                 rxq = &sge->rxq[vi->first_rxq];
4532 #ifdef DEV_NETMAP
4533                                 nm_rxq = &sge->nm_rxq[vi->first_nm_rxq];
4534 #endif
4535                                 for (q = 0; q < n; q++) {
4536                                         snprintf(s, sizeof(s), "%x%c%x", p,
4537                                             'a' + v, q);
4538                                         if (q < vi->nrxq)
4539                                                 irq->rxq = rxq++;
4540 #ifdef DEV_NETMAP
4541                                         if (q < vi->nnmrxq)
4542                                                 irq->nm_rxq = nm_rxq++;
4543 #endif
4544                                         rc = t4_alloc_irq(sc, irq, rid,
4545                                             t4_vi_intr, irq, s);
4546                                         if (rc != 0)
4547                                                 return (rc);
4548 #ifdef RSS
4549                                         if (q < vi->nrxq) {
4550                                                 bus_bind_intr(sc->dev, irq->res,
4551                                                     rss_getcpu(q % nbuckets));
4552                                         }
4553 #endif
4554                                         irq++;
4555                                         rid++;
4556                                         vi->nintr++;
4557                                 }
4558                         } else {
4559                                 for_each_rxq(vi, q, rxq) {
4560                                         snprintf(s, sizeof(s), "%x%c%x", p,
4561                                             'a' + v, q);
4562                                         rc = t4_alloc_irq(sc, irq, rid,
4563                                             t4_intr, rxq, s);
4564                                         if (rc != 0)
4565                                                 return (rc);
4566 #ifdef RSS
4567                                         bus_bind_intr(sc->dev, irq->res,
4568                                             rss_getcpu(q % nbuckets));
4569 #endif
4570                                         irq++;
4571                                         rid++;
4572                                         vi->nintr++;
4573                                 }
4574                         }
4575 #ifdef TCP_OFFLOAD
4576                         for_each_ofld_rxq(vi, q, ofld_rxq) {
4577                                 snprintf(s, sizeof(s), "%x%c%x", p, 'A' + v, q);
4578                                 rc = t4_alloc_irq(sc, irq, rid, t4_intr,
4579                                     ofld_rxq, s);
4580                                 if (rc != 0)
4581                                         return (rc);
4582                                 irq++;
4583                                 rid++;
4584                                 vi->nintr++;
4585                         }
4586 #endif
4587                 }
4588         }
4589         MPASS(irq == &sc->irq[sc->intr_count]);
4590
4591         return (0);
4592 }
4593
4594 int
4595 adapter_full_init(struct adapter *sc)
4596 {
4597         int rc, i;
4598 #ifdef RSS
4599         uint32_t raw_rss_key[RSS_KEYSIZE / sizeof(uint32_t)];
4600         uint32_t rss_key[RSS_KEYSIZE / sizeof(uint32_t)];
4601 #endif
4602
4603         ASSERT_SYNCHRONIZED_OP(sc);
4604         ADAPTER_LOCK_ASSERT_NOTOWNED(sc);
4605         KASSERT((sc->flags & FULL_INIT_DONE) == 0,
4606             ("%s: FULL_INIT_DONE already", __func__));
4607
4608         /*
4609          * queues that belong to the adapter (not any particular port).
4610          */
4611         rc = t4_setup_adapter_queues(sc);
4612         if (rc != 0)
4613                 goto done;
4614
4615         for (i = 0; i < nitems(sc->tq); i++) {
4616                 sc->tq[i] = taskqueue_create("t4 taskq", M_NOWAIT,
4617                     taskqueue_thread_enqueue, &sc->tq[i]);
4618                 if (sc->tq[i] == NULL) {
4619                         device_printf(sc->dev,
4620                             "failed to allocate task queue %d\n", i);
4621                         rc = ENOMEM;
4622                         goto done;
4623                 }
4624                 taskqueue_start_threads(&sc->tq[i], 1, PI_NET, "%s tq%d",
4625                     device_get_nameunit(sc->dev), i);
4626         }
4627 #ifdef RSS
4628         MPASS(RSS_KEYSIZE == 40);
4629         rss_getkey((void *)&raw_rss_key[0]);
4630         for (i = 0; i < nitems(rss_key); i++) {
4631                 rss_key[i] = htobe32(raw_rss_key[nitems(rss_key) - 1 - i]);
4632         }
4633         t4_write_rss_key(sc, &rss_key[0], -1, 1);
4634 #endif
4635
4636         if (!(sc->flags & IS_VF))
4637                 t4_intr_enable(sc);
4638         sc->flags |= FULL_INIT_DONE;
4639 done:
4640         if (rc != 0)
4641                 adapter_full_uninit(sc);
4642
4643         return (rc);
4644 }
4645
4646 int
4647 adapter_full_uninit(struct adapter *sc)
4648 {
4649         int i;
4650
4651         ADAPTER_LOCK_ASSERT_NOTOWNED(sc);
4652
4653         t4_teardown_adapter_queues(sc);
4654
4655         for (i = 0; i < nitems(sc->tq) && sc->tq[i]; i++) {
4656                 taskqueue_free(sc->tq[i]);
4657                 sc->tq[i] = NULL;
4658         }
4659
4660         sc->flags &= ~FULL_INIT_DONE;
4661
4662         return (0);
4663 }
4664
4665 #ifdef RSS
4666 #define SUPPORTED_RSS_HASHTYPES (RSS_HASHTYPE_RSS_IPV4 | \
4667     RSS_HASHTYPE_RSS_TCP_IPV4 | RSS_HASHTYPE_RSS_IPV6 | \
4668     RSS_HASHTYPE_RSS_TCP_IPV6 | RSS_HASHTYPE_RSS_UDP_IPV4 | \
4669     RSS_HASHTYPE_RSS_UDP_IPV6)
4670
4671 /* Translates kernel hash types to hardware. */
4672 static int
4673 hashconfig_to_hashen(int hashconfig)
4674 {
4675         int hashen = 0;
4676
4677         if (hashconfig & RSS_HASHTYPE_RSS_IPV4)
4678                 hashen |= F_FW_RSS_VI_CONFIG_CMD_IP4TWOTUPEN;
4679         if (hashconfig & RSS_HASHTYPE_RSS_IPV6)
4680                 hashen |= F_FW_RSS_VI_CONFIG_CMD_IP6TWOTUPEN;
4681         if (hashconfig & RSS_HASHTYPE_RSS_UDP_IPV4) {
4682                 hashen |= F_FW_RSS_VI_CONFIG_CMD_UDPEN |
4683                     F_FW_RSS_VI_CONFIG_CMD_IP4FOURTUPEN;
4684         }
4685         if (hashconfig & RSS_HASHTYPE_RSS_UDP_IPV6) {
4686                 hashen |= F_FW_RSS_VI_CONFIG_CMD_UDPEN |
4687                     F_FW_RSS_VI_CONFIG_CMD_IP6FOURTUPEN;
4688         }
4689         if (hashconfig & RSS_HASHTYPE_RSS_TCP_IPV4)
4690                 hashen |= F_FW_RSS_VI_CONFIG_CMD_IP4FOURTUPEN;
4691         if (hashconfig & RSS_HASHTYPE_RSS_TCP_IPV6)
4692                 hashen |= F_FW_RSS_VI_CONFIG_CMD_IP6FOURTUPEN;
4693
4694         return (hashen);
4695 }
4696
4697 /* Translates hardware hash types to kernel. */
4698 static int
4699 hashen_to_hashconfig(int hashen)
4700 {
4701         int hashconfig = 0;
4702
4703         if (hashen & F_FW_RSS_VI_CONFIG_CMD_UDPEN) {
4704                 /*
4705                  * If UDP hashing was enabled it must have been enabled for
4706                  * either IPv4 or IPv6 (inclusive or).  Enabling UDP without
4707                  * enabling any 4-tuple hash is nonsense configuration.
4708                  */
4709                 MPASS(hashen & (F_FW_RSS_VI_CONFIG_CMD_IP4FOURTUPEN |
4710                     F_FW_RSS_VI_CONFIG_CMD_IP6FOURTUPEN));
4711
4712                 if (hashen & F_FW_RSS_VI_CONFIG_CMD_IP4FOURTUPEN)
4713                         hashconfig |= RSS_HASHTYPE_RSS_UDP_IPV4;
4714                 if (hashen & F_FW_RSS_VI_CONFIG_CMD_IP6FOURTUPEN)
4715                         hashconfig |= RSS_HASHTYPE_RSS_UDP_IPV6;
4716         }
4717         if (hashen & F_FW_RSS_VI_CONFIG_CMD_IP4FOURTUPEN)
4718                 hashconfig |= RSS_HASHTYPE_RSS_TCP_IPV4;
4719         if (hashen & F_FW_RSS_VI_CONFIG_CMD_IP6FOURTUPEN)
4720                 hashconfig |= RSS_HASHTYPE_RSS_TCP_IPV6;
4721         if (hashen & F_FW_RSS_VI_CONFIG_CMD_IP4TWOTUPEN)
4722                 hashconfig |= RSS_HASHTYPE_RSS_IPV4;
4723         if (hashen & F_FW_RSS_VI_CONFIG_CMD_IP6TWOTUPEN)
4724                 hashconfig |= RSS_HASHTYPE_RSS_IPV6;
4725
4726         return (hashconfig);
4727 }
4728 #endif
4729
4730 int
4731 vi_full_init(struct vi_info *vi)
4732 {
4733         struct adapter *sc = vi->pi->adapter;
4734         struct ifnet *ifp = vi->ifp;
4735         uint16_t *rss;
4736         struct sge_rxq *rxq;
4737         int rc, i, j, hashen;
4738 #ifdef RSS
4739         int nbuckets = rss_getnumbuckets();
4740         int hashconfig = rss_gethashconfig();
4741         int extra;
4742 #endif
4743
4744         ASSERT_SYNCHRONIZED_OP(sc);
4745         KASSERT((vi->flags & VI_INIT_DONE) == 0,
4746             ("%s: VI_INIT_DONE already", __func__));
4747
4748         sysctl_ctx_init(&vi->ctx);
4749         vi->flags |= VI_SYSCTL_CTX;
4750
4751         /*
4752          * Allocate tx/rx/fl queues for this VI.
4753          */
4754         rc = t4_setup_vi_queues(vi);
4755         if (rc != 0)
4756                 goto done;      /* error message displayed already */
4757
4758         /*
4759          * Setup RSS for this VI.  Save a copy of the RSS table for later use.
4760          */
4761         if (vi->nrxq > vi->rss_size) {
4762                 if_printf(ifp, "nrxq (%d) > hw RSS table size (%d); "
4763                     "some queues will never receive traffic.\n", vi->nrxq,
4764                     vi->rss_size);
4765         } else if (vi->rss_size % vi->nrxq) {
4766                 if_printf(ifp, "nrxq (%d), hw RSS table size (%d); "
4767                     "expect uneven traffic distribution.\n", vi->nrxq,
4768                     vi->rss_size);
4769         }
4770 #ifdef RSS
4771         if (vi->nrxq != nbuckets) {
4772                 if_printf(ifp, "nrxq (%d) != kernel RSS buckets (%d);"
4773                     "performance will be impacted.\n", vi->nrxq, nbuckets);
4774         }
4775 #endif
4776         rss = malloc(vi->rss_size * sizeof (*rss), M_CXGBE, M_ZERO | M_WAITOK);
4777         for (i = 0; i < vi->rss_size;) {
4778 #ifdef RSS
4779                 j = rss_get_indirection_to_bucket(i);
4780                 j %= vi->nrxq;
4781                 rxq = &sc->sge.rxq[vi->first_rxq + j];
4782                 rss[i++] = rxq->iq.abs_id;
4783 #else
4784                 for_each_rxq(vi, j, rxq) {
4785                         rss[i++] = rxq->iq.abs_id;
4786                         if (i == vi->rss_size)
4787                                 break;
4788                 }
4789 #endif
4790         }
4791
4792         rc = -t4_config_rss_range(sc, sc->mbox, vi->viid, 0, vi->rss_size, rss,
4793             vi->rss_size);
4794         if (rc != 0) {
4795                 if_printf(ifp, "rss_config failed: %d\n", rc);
4796                 goto done;
4797         }
4798
4799 #ifdef RSS
4800         hashen = hashconfig_to_hashen(hashconfig);
4801
4802         /*
4803          * We may have had to enable some hashes even though the global config
4804          * wants them disabled.  This is a potential problem that must be
4805          * reported to the user.
4806          */
4807         extra = hashen_to_hashconfig(hashen) ^ hashconfig;
4808
4809         /*
4810          * If we consider only the supported hash types, then the enabled hashes
4811          * are a superset of the requested hashes.  In other words, there cannot
4812          * be any supported hash that was requested but not enabled, but there
4813          * can be hashes that were not requested but had to be enabled.
4814          */
4815         extra &= SUPPORTED_RSS_HASHTYPES;
4816         MPASS((extra & hashconfig) == 0);
4817
4818         if (extra) {
4819                 if_printf(ifp,
4820                     "global RSS config (0x%x) cannot be accommodated.\n",
4821                     hashconfig);
4822         }
4823         if (extra & RSS_HASHTYPE_RSS_IPV4)
4824                 if_printf(ifp, "IPv4 2-tuple hashing forced on.\n");
4825         if (extra & RSS_HASHTYPE_RSS_TCP_IPV4)
4826                 if_printf(ifp, "TCP/IPv4 4-tuple hashing forced on.\n");
4827         if (extra & RSS_HASHTYPE_RSS_IPV6)
4828                 if_printf(ifp, "IPv6 2-tuple hashing forced on.\n");
4829         if (extra & RSS_HASHTYPE_RSS_TCP_IPV6)
4830                 if_printf(ifp, "TCP/IPv6 4-tuple hashing forced on.\n");
4831         if (extra & RSS_HASHTYPE_RSS_UDP_IPV4)
4832                 if_printf(ifp, "UDP/IPv4 4-tuple hashing forced on.\n");
4833         if (extra & RSS_HASHTYPE_RSS_UDP_IPV6)
4834                 if_printf(ifp, "UDP/IPv6 4-tuple hashing forced on.\n");
4835 #else
4836         hashen = F_FW_RSS_VI_CONFIG_CMD_IP6FOURTUPEN |
4837             F_FW_RSS_VI_CONFIG_CMD_IP6TWOTUPEN |
4838             F_FW_RSS_VI_CONFIG_CMD_IP4FOURTUPEN |
4839             F_FW_RSS_VI_CONFIG_CMD_IP4TWOTUPEN | F_FW_RSS_VI_CONFIG_CMD_UDPEN;
4840 #endif
4841         rc = -t4_config_vi_rss(sc, sc->mbox, vi->viid, hashen, rss[0], 0, 0);
4842         if (rc != 0) {
4843                 if_printf(ifp, "rss hash/defaultq config failed: %d\n", rc);
4844                 goto done;
4845         }
4846
4847         vi->rss = rss;
4848         vi->flags |= VI_INIT_DONE;
4849 done:
4850         if (rc != 0)
4851                 vi_full_uninit(vi);
4852
4853         return (rc);
4854 }
4855
4856 /*
4857  * Idempotent.
4858  */
4859 int
4860 vi_full_uninit(struct vi_info *vi)
4861 {
4862         struct port_info *pi = vi->pi;
4863         struct adapter *sc = pi->adapter;
4864         int i;
4865         struct sge_rxq *rxq;
4866         struct sge_txq *txq;
4867 #ifdef TCP_OFFLOAD
4868         struct sge_ofld_rxq *ofld_rxq;
4869         struct sge_wrq *ofld_txq;
4870 #endif
4871
4872         if (vi->flags & VI_INIT_DONE) {
4873
4874                 /* Need to quiesce queues.  */
4875
4876                 /* XXX: Only for the first VI? */
4877                 if (IS_MAIN_VI(vi) && !(sc->flags & IS_VF))
4878                         quiesce_wrq(sc, &sc->sge.ctrlq[pi->port_id]);
4879
4880                 for_each_txq(vi, i, txq) {
4881                         quiesce_txq(sc, txq);
4882                 }
4883
4884 #ifdef TCP_OFFLOAD
4885                 for_each_ofld_txq(vi, i, ofld_txq) {
4886                         quiesce_wrq(sc, ofld_txq);
4887                 }
4888 #endif
4889
4890                 for_each_rxq(vi, i, rxq) {
4891                         quiesce_iq(sc, &rxq->iq);
4892                         quiesce_fl(sc, &rxq->fl);
4893                 }
4894
4895 #ifdef TCP_OFFLOAD
4896                 for_each_ofld_rxq(vi, i, ofld_rxq) {
4897                         quiesce_iq(sc, &ofld_rxq->iq);
4898                         quiesce_fl(sc, &ofld_rxq->fl);
4899                 }
4900 #endif
4901                 free(vi->rss, M_CXGBE);
4902                 free(vi->nm_rss, M_CXGBE);
4903         }
4904
4905         t4_teardown_vi_queues(vi);
4906         vi->flags &= ~VI_INIT_DONE;
4907
4908         return (0);
4909 }
4910
4911 static void
4912 quiesce_txq(struct adapter *sc, struct sge_txq *txq)
4913 {
4914         struct sge_eq *eq = &txq->eq;
4915         struct sge_qstat *spg = (void *)&eq->desc[eq->sidx];
4916
4917         (void) sc;      /* unused */
4918
4919 #ifdef INVARIANTS
4920         TXQ_LOCK(txq);
4921         MPASS((eq->flags & EQ_ENABLED) == 0);
4922         TXQ_UNLOCK(txq);
4923 #endif
4924
4925         /* Wait for the mp_ring to empty. */
4926         while (!mp_ring_is_idle(txq->r)) {
4927                 mp_ring_check_drainage(txq->r, 0);
4928                 pause("rquiesce", 1);
4929         }
4930
4931         /* Then wait for the hardware to finish. */
4932         while (spg->cidx != htobe16(eq->pidx))
4933                 pause("equiesce", 1);
4934
4935         /* Finally, wait for the driver to reclaim all descriptors. */
4936         while (eq->cidx != eq->pidx)
4937                 pause("dquiesce", 1);
4938 }
4939
4940 static void
4941 quiesce_wrq(struct adapter *sc, struct sge_wrq *wrq)
4942 {
4943
4944         /* XXXTX */
4945 }
4946
4947 static void
4948 quiesce_iq(struct adapter *sc, struct sge_iq *iq)
4949 {
4950         (void) sc;      /* unused */
4951
4952         /* Synchronize with the interrupt handler */
4953         while (!atomic_cmpset_int(&iq->state, IQS_IDLE, IQS_DISABLED))
4954                 pause("iqfree", 1);
4955 }
4956
4957 static void
4958 quiesce_fl(struct adapter *sc, struct sge_fl *fl)
4959 {
4960         mtx_lock(&sc->sfl_lock);
4961         FL_LOCK(fl);
4962         fl->flags |= FL_DOOMED;
4963         FL_UNLOCK(fl);
4964         callout_stop(&sc->sfl_callout);
4965         mtx_unlock(&sc->sfl_lock);
4966
4967         KASSERT((fl->flags & FL_STARVING) == 0,
4968             ("%s: still starving", __func__));
4969 }
4970
4971 static int
4972 t4_alloc_irq(struct adapter *sc, struct irq *irq, int rid,
4973     driver_intr_t *handler, void *arg, char *name)
4974 {
4975         int rc;
4976
4977         irq->rid = rid;
4978         irq->res = bus_alloc_resource_any(sc->dev, SYS_RES_IRQ, &irq->rid,
4979             RF_SHAREABLE | RF_ACTIVE);
4980         if (irq->res == NULL) {
4981                 device_printf(sc->dev,
4982                     "failed to allocate IRQ for rid %d, name %s.\n", rid, name);
4983                 return (ENOMEM);
4984         }
4985
4986         rc = bus_setup_intr(sc->dev, irq->res, INTR_MPSAFE | INTR_TYPE_NET,
4987             NULL, handler, arg, &irq->tag);
4988         if (rc != 0) {
4989                 device_printf(sc->dev,
4990                     "failed to setup interrupt for rid %d, name %s: %d\n",
4991                     rid, name, rc);
4992         } else if (name)
4993                 bus_describe_intr(sc->dev, irq->res, irq->tag, "%s", name);
4994
4995         return (rc);
4996 }
4997
4998 static int
4999 t4_free_irq(struct adapter *sc, struct irq *irq)
5000 {
5001         if (irq->tag)
5002                 bus_teardown_intr(sc->dev, irq->res, irq->tag);
5003         if (irq->res)
5004                 bus_release_resource(sc->dev, SYS_RES_IRQ, irq->rid, irq->res);
5005
5006         bzero(irq, sizeof(*irq));
5007
5008         return (0);
5009 }
5010
5011 static void
5012 get_regs(struct adapter *sc, struct t4_regdump *regs, uint8_t *buf)
5013 {
5014
5015         regs->version = chip_id(sc) | chip_rev(sc) << 10;
5016         t4_get_regs(sc, buf, regs->len);
5017 }
5018
5019 #define A_PL_INDIR_CMD  0x1f8
5020
5021 #define S_PL_AUTOINC    31
5022 #define M_PL_AUTOINC    0x1U
5023 #define V_PL_AUTOINC(x) ((x) << S_PL_AUTOINC)
5024 #define G_PL_AUTOINC(x) (((x) >> S_PL_AUTOINC) & M_PL_AUTOINC)
5025
5026 #define S_PL_VFID       20
5027 #define M_PL_VFID       0xffU
5028 #define V_PL_VFID(x)    ((x) << S_PL_VFID)
5029 #define G_PL_VFID(x)    (((x) >> S_PL_VFID) & M_PL_VFID)
5030
5031 #define S_PL_ADDR       0
5032 #define M_PL_ADDR       0xfffffU
5033 #define V_PL_ADDR(x)    ((x) << S_PL_ADDR)
5034 #define G_PL_ADDR(x)    (((x) >> S_PL_ADDR) & M_PL_ADDR)
5035
5036 #define A_PL_INDIR_DATA 0x1fc
5037
5038 static uint64_t
5039 read_vf_stat(struct adapter *sc, unsigned int viid, int reg)
5040 {
5041         u32 stats[2];
5042
5043         mtx_assert(&sc->reg_lock, MA_OWNED);
5044         if (sc->flags & IS_VF) {
5045                 stats[0] = t4_read_reg(sc, VF_MPS_REG(reg));
5046                 stats[1] = t4_read_reg(sc, VF_MPS_REG(reg + 4));
5047         } else {
5048                 t4_write_reg(sc, A_PL_INDIR_CMD, V_PL_AUTOINC(1) |
5049                     V_PL_VFID(G_FW_VIID_VIN(viid)) |
5050                     V_PL_ADDR(VF_MPS_REG(reg)));
5051                 stats[0] = t4_read_reg(sc, A_PL_INDIR_DATA);
5052                 stats[1] = t4_read_reg(sc, A_PL_INDIR_DATA);
5053         }
5054         return (((uint64_t)stats[1]) << 32 | stats[0]);
5055 }
5056
5057 static void
5058 t4_get_vi_stats(struct adapter *sc, unsigned int viid,
5059     struct fw_vi_stats_vf *stats)
5060 {
5061
5062 #define GET_STAT(name) \
5063         read_vf_stat(sc, viid, A_MPS_VF_STAT_##name##_L)
5064
5065         stats->tx_bcast_bytes    = GET_STAT(TX_VF_BCAST_BYTES);
5066         stats->tx_bcast_frames   = GET_STAT(TX_VF_BCAST_FRAMES);
5067         stats->tx_mcast_bytes    = GET_STAT(TX_VF_MCAST_BYTES);
5068         stats->tx_mcast_frames   = GET_STAT(TX_VF_MCAST_FRAMES);
5069         stats->tx_ucast_bytes    = GET_STAT(TX_VF_UCAST_BYTES);
5070         stats->tx_ucast_frames   = GET_STAT(TX_VF_UCAST_FRAMES);
5071         stats->tx_drop_frames    = GET_STAT(TX_VF_DROP_FRAMES);
5072         stats->tx_offload_bytes  = GET_STAT(TX_VF_OFFLOAD_BYTES);
5073         stats->tx_offload_frames = GET_STAT(TX_VF_OFFLOAD_FRAMES);
5074         stats->rx_bcast_bytes    = GET_STAT(RX_VF_BCAST_BYTES);
5075         stats->rx_bcast_frames   = GET_STAT(RX_VF_BCAST_FRAMES);
5076         stats->rx_mcast_bytes    = GET_STAT(RX_VF_MCAST_BYTES);
5077         stats->rx_mcast_frames   = GET_STAT(RX_VF_MCAST_FRAMES);
5078         stats->rx_ucast_bytes    = GET_STAT(RX_VF_UCAST_BYTES);
5079         stats->rx_ucast_frames   = GET_STAT(RX_VF_UCAST_FRAMES);
5080         stats->rx_err_frames     = GET_STAT(RX_VF_ERR_FRAMES);
5081
5082 #undef GET_STAT
5083 }
5084
5085 static void
5086 t4_clr_vi_stats(struct adapter *sc, unsigned int viid)
5087 {
5088         int reg;
5089
5090         t4_write_reg(sc, A_PL_INDIR_CMD, V_PL_AUTOINC(1) |
5091             V_PL_VFID(G_FW_VIID_VIN(viid)) |
5092             V_PL_ADDR(VF_MPS_REG(A_MPS_VF_STAT_TX_VF_BCAST_BYTES_L)));
5093         for (reg = A_MPS_VF_STAT_TX_VF_BCAST_BYTES_L;
5094              reg <= A_MPS_VF_STAT_RX_VF_ERR_FRAMES_H; reg += 4)
5095                 t4_write_reg(sc, A_PL_INDIR_DATA, 0);
5096 }
5097
5098 static void
5099 vi_refresh_stats(struct adapter *sc, struct vi_info *vi)
5100 {
5101         struct timeval tv;
5102         const struct timeval interval = {0, 250000};    /* 250ms */
5103
5104         if (!(vi->flags & VI_INIT_DONE))
5105                 return;
5106
5107         getmicrotime(&tv);
5108         timevalsub(&tv, &interval);
5109         if (timevalcmp(&tv, &vi->last_refreshed, <))
5110                 return;
5111
5112         mtx_lock(&sc->reg_lock);
5113         t4_get_vi_stats(sc, vi->viid, &vi->stats);
5114         getmicrotime(&vi->last_refreshed);
5115         mtx_unlock(&sc->reg_lock);
5116 }
5117
5118 static void
5119 cxgbe_refresh_stats(struct adapter *sc, struct port_info *pi)
5120 {
5121         u_int i, v, tnl_cong_drops, bg_map;
5122         struct timeval tv;
5123         const struct timeval interval = {0, 250000};    /* 250ms */
5124
5125         getmicrotime(&tv);
5126         timevalsub(&tv, &interval);
5127         if (timevalcmp(&tv, &pi->last_refreshed, <))
5128                 return;
5129
5130         tnl_cong_drops = 0;
5131         t4_get_port_stats(sc, pi->tx_chan, &pi->stats);
5132         bg_map = pi->mps_bg_map;
5133         while (bg_map) {
5134                 i = ffs(bg_map) - 1;
5135                 mtx_lock(&sc->reg_lock);
5136                 t4_read_indirect(sc, A_TP_MIB_INDEX, A_TP_MIB_DATA, &v, 1,
5137                     A_TP_MIB_TNL_CNG_DROP_0 + i);
5138                 mtx_unlock(&sc->reg_lock);
5139                 tnl_cong_drops += v;
5140                 bg_map &= ~(1 << i);
5141         }
5142         pi->tnl_cong_drops = tnl_cong_drops;
5143         getmicrotime(&pi->last_refreshed);
5144 }
5145
5146 static void
5147 cxgbe_tick(void *arg)
5148 {
5149         struct port_info *pi = arg;
5150         struct adapter *sc = pi->adapter;
5151
5152         PORT_LOCK_ASSERT_OWNED(pi);
5153         cxgbe_refresh_stats(sc, pi);
5154
5155         callout_schedule(&pi->tick, hz);
5156 }
5157
5158 void
5159 vi_tick(void *arg)
5160 {
5161         struct vi_info *vi = arg;
5162         struct adapter *sc = vi->pi->adapter;
5163
5164         vi_refresh_stats(sc, vi);
5165
5166         callout_schedule(&vi->tick, hz);
5167 }
5168
5169 static void
5170 cxgbe_vlan_config(void *arg, struct ifnet *ifp, uint16_t vid)
5171 {
5172         struct ifnet *vlan;
5173
5174         if (arg != ifp || ifp->if_type != IFT_ETHER)
5175                 return;
5176
5177         vlan = VLAN_DEVAT(ifp, vid);
5178         VLAN_SETCOOKIE(vlan, ifp);
5179 }
5180
5181 /*
5182  * Should match fw_caps_config_<foo> enums in t4fw_interface.h
5183  */
5184 static char *caps_decoder[] = {
5185         "\20\001IPMI\002NCSI",                          /* 0: NBM */
5186         "\20\001PPP\002QFC\003DCBX",                    /* 1: link */
5187         "\20\001INGRESS\002EGRESS",                     /* 2: switch */
5188         "\20\001NIC\002VM\003IDS\004UM\005UM_ISGL"      /* 3: NIC */
5189             "\006HASHFILTER\007ETHOFLD",
5190         "\20\001TOE",                                   /* 4: TOE */
5191         "\20\001RDDP\002RDMAC",                         /* 5: RDMA */
5192         "\20\001INITIATOR_PDU\002TARGET_PDU"            /* 6: iSCSI */
5193             "\003INITIATOR_CNXOFLD\004TARGET_CNXOFLD"
5194             "\005INITIATOR_SSNOFLD\006TARGET_SSNOFLD"
5195             "\007T10DIF"
5196             "\010INITIATOR_CMDOFLD\011TARGET_CMDOFLD",
5197         "\20\001LOOKASIDE\002TLSKEYS",                  /* 7: Crypto */
5198         "\20\001INITIATOR\002TARGET\003CTRL_OFLD"       /* 8: FCoE */
5199                     "\004PO_INITIATOR\005PO_TARGET",
5200 };
5201
5202 void
5203 t4_sysctls(struct adapter *sc)
5204 {
5205         struct sysctl_ctx_list *ctx;
5206         struct sysctl_oid *oid;
5207         struct sysctl_oid_list *children, *c0;
5208         static char *doorbells = {"\20\1UDB\2WCWR\3UDBWC\4KDB"};
5209
5210         ctx = device_get_sysctl_ctx(sc->dev);
5211
5212         /*
5213          * dev.t4nex.X.
5214          */
5215         oid = device_get_sysctl_tree(sc->dev);
5216         c0 = children = SYSCTL_CHILDREN(oid);
5217
5218         sc->sc_do_rxcopy = 1;
5219         SYSCTL_ADD_INT(ctx, children, OID_AUTO, "do_rx_copy", CTLFLAG_RW,
5220             &sc->sc_do_rxcopy, 1, "Do RX copy of small frames");
5221
5222         SYSCTL_ADD_INT(ctx, children, OID_AUTO, "nports", CTLFLAG_RD, NULL,
5223             sc->params.nports, "# of ports");
5224
5225         SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "doorbells",
5226             CTLTYPE_STRING | CTLFLAG_RD, doorbells, sc->doorbells,
5227             sysctl_bitfield, "A", "available doorbells");
5228
5229         SYSCTL_ADD_INT(ctx, children, OID_AUTO, "core_clock", CTLFLAG_RD, NULL,
5230             sc->params.vpd.cclk, "core clock frequency (in KHz)");
5231
5232         SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "holdoff_timers",
5233             CTLTYPE_STRING | CTLFLAG_RD, sc->params.sge.timer_val,
5234             sizeof(sc->params.sge.timer_val), sysctl_int_array, "A",
5235             "interrupt holdoff timer values (us)");
5236
5237         SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "holdoff_pkt_counts",
5238             CTLTYPE_STRING | CTLFLAG_RD, sc->params.sge.counter_val,
5239             sizeof(sc->params.sge.counter_val), sysctl_int_array, "A",
5240             "interrupt holdoff packet counter values");
5241
5242         t4_sge_sysctls(sc, ctx, children);
5243
5244         sc->lro_timeout = 100;
5245         SYSCTL_ADD_INT(ctx, children, OID_AUTO, "lro_timeout", CTLFLAG_RW,
5246             &sc->lro_timeout, 0, "lro inactive-flush timeout (in us)");
5247
5248         SYSCTL_ADD_INT(ctx, children, OID_AUTO, "dflags", CTLFLAG_RW,
5249             &sc->debug_flags, 0, "flags to enable runtime debugging");
5250
5251         SYSCTL_ADD_STRING(ctx, children, OID_AUTO, "tp_version",
5252             CTLFLAG_RD, sc->tp_version, 0, "TP microcode version");
5253
5254         SYSCTL_ADD_STRING(ctx, children, OID_AUTO, "firmware_version",
5255             CTLFLAG_RD, sc->fw_version, 0, "firmware version");
5256
5257         if (sc->flags & IS_VF)
5258                 return;
5259
5260         SYSCTL_ADD_INT(ctx, children, OID_AUTO, "hw_revision", CTLFLAG_RD,
5261             NULL, chip_rev(sc), "chip hardware revision");
5262
5263         SYSCTL_ADD_STRING(ctx, children, OID_AUTO, "sn",
5264             CTLFLAG_RD, sc->params.vpd.sn, 0, "serial number");
5265
5266         SYSCTL_ADD_STRING(ctx, children, OID_AUTO, "pn",
5267             CTLFLAG_RD, sc->params.vpd.pn, 0, "part number");
5268
5269         SYSCTL_ADD_STRING(ctx, children, OID_AUTO, "ec",
5270             CTLFLAG_RD, sc->params.vpd.ec, 0, "engineering change");
5271
5272         SYSCTL_ADD_STRING(ctx, children, OID_AUTO, "md_version",
5273             CTLFLAG_RD, sc->params.vpd.md, 0, "manufacturing diags version");
5274
5275         SYSCTL_ADD_STRING(ctx, children, OID_AUTO, "na",
5276             CTLFLAG_RD, sc->params.vpd.na, 0, "network address");
5277
5278         SYSCTL_ADD_STRING(ctx, children, OID_AUTO, "er_version", CTLFLAG_RD,
5279             sc->er_version, 0, "expansion ROM version");
5280
5281         SYSCTL_ADD_STRING(ctx, children, OID_AUTO, "bs_version", CTLFLAG_RD,
5282             sc->bs_version, 0, "bootstrap firmware version");
5283
5284         SYSCTL_ADD_UINT(ctx, children, OID_AUTO, "scfg_version", CTLFLAG_RD,
5285             NULL, sc->params.scfg_vers, "serial config version");
5286
5287         SYSCTL_ADD_UINT(ctx, children, OID_AUTO, "vpd_version", CTLFLAG_RD,
5288             NULL, sc->params.vpd_vers, "VPD version");
5289
5290         SYSCTL_ADD_STRING(ctx, children, OID_AUTO, "cf",
5291             CTLFLAG_RD, sc->cfg_file, 0, "configuration file");
5292
5293         SYSCTL_ADD_UINT(ctx, children, OID_AUTO, "cfcsum", CTLFLAG_RD, NULL,
5294             sc->cfcsum, "config file checksum");
5295
5296 #define SYSCTL_CAP(name, n, text) \
5297         SYSCTL_ADD_PROC(ctx, children, OID_AUTO, #name, \
5298             CTLTYPE_STRING | CTLFLAG_RD, caps_decoder[n], sc->name, \
5299             sysctl_bitfield, "A", "available " text " capabilities")
5300
5301         SYSCTL_CAP(nbmcaps, 0, "NBM");
5302         SYSCTL_CAP(linkcaps, 1, "link");
5303         SYSCTL_CAP(switchcaps, 2, "switch");
5304         SYSCTL_CAP(niccaps, 3, "NIC");
5305         SYSCTL_CAP(toecaps, 4, "TCP offload");
5306         SYSCTL_CAP(rdmacaps, 5, "RDMA");
5307         SYSCTL_CAP(iscsicaps, 6, "iSCSI");
5308         SYSCTL_CAP(cryptocaps, 7, "crypto");
5309         SYSCTL_CAP(fcoecaps, 8, "FCoE");
5310 #undef SYSCTL_CAP
5311
5312         SYSCTL_ADD_INT(ctx, children, OID_AUTO, "nfilters", CTLFLAG_RD,
5313             NULL, sc->tids.nftids, "number of filters");
5314
5315         SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "temperature", CTLTYPE_INT |
5316             CTLFLAG_RD, sc, 0, sysctl_temperature, "I",
5317             "chip temperature (in Celsius)");
5318
5319         SYSCTL_ADD_INT(ctx, children, OID_AUTO, "core_vdd", CTLFLAG_RD,
5320             &sc->params.core_vdd, 0, "core Vdd (in mV)");
5321
5322 #ifdef SBUF_DRAIN
5323         /*
5324          * dev.t4nex.X.misc.  Marked CTLFLAG_SKIP to avoid information overload.
5325          */
5326         oid = SYSCTL_ADD_NODE(ctx, c0, OID_AUTO, "misc",
5327             CTLFLAG_RD | CTLFLAG_SKIP, NULL,
5328             "logs and miscellaneous information");
5329         children = SYSCTL_CHILDREN(oid);
5330
5331         SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "cctrl",
5332             CTLTYPE_STRING | CTLFLAG_RD, sc, 0,
5333             sysctl_cctrl, "A", "congestion control");
5334
5335         SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "cim_ibq_tp0",
5336             CTLTYPE_STRING | CTLFLAG_RD, sc, 0,
5337             sysctl_cim_ibq_obq, "A", "CIM IBQ 0 (TP0)");
5338
5339         SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "cim_ibq_tp1",
5340             CTLTYPE_STRING | CTLFLAG_RD, sc, 1,
5341             sysctl_cim_ibq_obq, "A", "CIM IBQ 1 (TP1)");
5342
5343         SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "cim_ibq_ulp",
5344             CTLTYPE_STRING | CTLFLAG_RD, sc, 2,
5345             sysctl_cim_ibq_obq, "A", "CIM IBQ 2 (ULP)");
5346
5347         SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "cim_ibq_sge0",
5348             CTLTYPE_STRING | CTLFLAG_RD, sc, 3,
5349             sysctl_cim_ibq_obq, "A", "CIM IBQ 3 (SGE0)");
5350
5351         SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "cim_ibq_sge1",
5352             CTLTYPE_STRING | CTLFLAG_RD, sc, 4,
5353             sysctl_cim_ibq_obq, "A", "CIM IBQ 4 (SGE1)");
5354
5355         SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "cim_ibq_ncsi",
5356             CTLTYPE_STRING | CTLFLAG_RD, sc, 5,
5357             sysctl_cim_ibq_obq, "A", "CIM IBQ 5 (NCSI)");
5358
5359         SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "cim_la",
5360             CTLTYPE_STRING | CTLFLAG_RD, sc, 0,
5361             chip_id(sc) <= CHELSIO_T5 ? sysctl_cim_la : sysctl_cim_la_t6,
5362             "A", "CIM logic analyzer");
5363
5364         SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "cim_ma_la",
5365             CTLTYPE_STRING | CTLFLAG_RD, sc, 0,
5366             sysctl_cim_ma_la, "A", "CIM MA logic analyzer");
5367
5368         SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "cim_obq_ulp0",
5369             CTLTYPE_STRING | CTLFLAG_RD, sc, 0 + CIM_NUM_IBQ,
5370             sysctl_cim_ibq_obq, "A", "CIM OBQ 0 (ULP0)");
5371
5372         SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "cim_obq_ulp1",
5373             CTLTYPE_STRING | CTLFLAG_RD, sc, 1 + CIM_NUM_IBQ,
5374             sysctl_cim_ibq_obq, "A", "CIM OBQ 1 (ULP1)");
5375
5376         SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "cim_obq_ulp2",
5377             CTLTYPE_STRING | CTLFLAG_RD, sc, 2 + CIM_NUM_IBQ,
5378             sysctl_cim_ibq_obq, "A", "CIM OBQ 2 (ULP2)");
5379
5380         SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "cim_obq_ulp3",
5381             CTLTYPE_STRING | CTLFLAG_RD, sc, 3 + CIM_NUM_IBQ,
5382             sysctl_cim_ibq_obq, "A", "CIM OBQ 3 (ULP3)");
5383
5384         SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "cim_obq_sge",
5385             CTLTYPE_STRING | CTLFLAG_RD, sc, 4 + CIM_NUM_IBQ,
5386             sysctl_cim_ibq_obq, "A", "CIM OBQ 4 (SGE)");
5387
5388         SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "cim_obq_ncsi",
5389             CTLTYPE_STRING | CTLFLAG_RD, sc, 5 + CIM_NUM_IBQ,
5390             sysctl_cim_ibq_obq, "A", "CIM OBQ 5 (NCSI)");
5391
5392         if (chip_id(sc) > CHELSIO_T4) {
5393                 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "cim_obq_sge0_rx",
5394                     CTLTYPE_STRING | CTLFLAG_RD, sc, 6 + CIM_NUM_IBQ,
5395                     sysctl_cim_ibq_obq, "A", "CIM OBQ 6 (SGE0-RX)");
5396
5397                 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "cim_obq_sge1_rx",
5398                     CTLTYPE_STRING | CTLFLAG_RD, sc, 7 + CIM_NUM_IBQ,
5399                     sysctl_cim_ibq_obq, "A", "CIM OBQ 7 (SGE1-RX)");
5400         }
5401
5402         SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "cim_pif_la",
5403             CTLTYPE_STRING | CTLFLAG_RD, sc, 0,
5404             sysctl_cim_pif_la, "A", "CIM PIF logic analyzer");
5405
5406         SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "cim_qcfg",
5407             CTLTYPE_STRING | CTLFLAG_RD, sc, 0,
5408             sysctl_cim_qcfg, "A", "CIM queue configuration");
5409
5410         SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "cpl_stats",
5411             CTLTYPE_STRING | CTLFLAG_RD, sc, 0,
5412             sysctl_cpl_stats, "A", "CPL statistics");
5413
5414         SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "ddp_stats",
5415             CTLTYPE_STRING | CTLFLAG_RD, sc, 0,
5416             sysctl_ddp_stats, "A", "non-TCP DDP statistics");
5417
5418         SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "devlog",
5419             CTLTYPE_STRING | CTLFLAG_RD, sc, 0,
5420             sysctl_devlog, "A", "firmware's device log");
5421
5422         SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "fcoe_stats",
5423             CTLTYPE_STRING | CTLFLAG_RD, sc, 0,
5424             sysctl_fcoe_stats, "A", "FCoE statistics");
5425
5426         SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "hw_sched",
5427             CTLTYPE_STRING | CTLFLAG_RD, sc, 0,
5428             sysctl_hw_sched, "A", "hardware scheduler ");
5429
5430         SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "l2t",
5431             CTLTYPE_STRING | CTLFLAG_RD, sc, 0,
5432             sysctl_l2t, "A", "hardware L2 table");
5433
5434         SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "lb_stats",
5435             CTLTYPE_STRING | CTLFLAG_RD, sc, 0,
5436             sysctl_lb_stats, "A", "loopback statistics");
5437
5438         SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "meminfo",
5439             CTLTYPE_STRING | CTLFLAG_RD, sc, 0,
5440             sysctl_meminfo, "A", "memory regions");
5441
5442         SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "mps_tcam",
5443             CTLTYPE_STRING | CTLFLAG_RD, sc, 0,
5444             chip_id(sc) <= CHELSIO_T5 ? sysctl_mps_tcam : sysctl_mps_tcam_t6,
5445             "A", "MPS TCAM entries");
5446
5447         SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "path_mtus",
5448             CTLTYPE_STRING | CTLFLAG_RD, sc, 0,
5449             sysctl_path_mtus, "A", "path MTUs");
5450
5451         SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "pm_stats",
5452             CTLTYPE_STRING | CTLFLAG_RD, sc, 0,
5453             sysctl_pm_stats, "A", "PM statistics");
5454
5455         SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "rdma_stats",
5456             CTLTYPE_STRING | CTLFLAG_RD, sc, 0,
5457             sysctl_rdma_stats, "A", "RDMA statistics");
5458
5459         SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "tcp_stats",
5460             CTLTYPE_STRING | CTLFLAG_RD, sc, 0,
5461             sysctl_tcp_stats, "A", "TCP statistics");
5462
5463         SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "tids",
5464             CTLTYPE_STRING | CTLFLAG_RD, sc, 0,
5465             sysctl_tids, "A", "TID information");
5466
5467         SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "tp_err_stats",
5468             CTLTYPE_STRING | CTLFLAG_RD, sc, 0,
5469             sysctl_tp_err_stats, "A", "TP error statistics");
5470
5471         SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "tp_la_mask",
5472             CTLTYPE_INT | CTLFLAG_RW, sc, 0, sysctl_tp_la_mask, "I",
5473             "TP logic analyzer event capture mask");
5474
5475         SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "tp_la",
5476             CTLTYPE_STRING | CTLFLAG_RD, sc, 0,
5477             sysctl_tp_la, "A", "TP logic analyzer");
5478
5479         SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "tx_rate",
5480             CTLTYPE_STRING | CTLFLAG_RD, sc, 0,
5481             sysctl_tx_rate, "A", "Tx rate");
5482
5483         SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "ulprx_la",
5484             CTLTYPE_STRING | CTLFLAG_RD, sc, 0,
5485             sysctl_ulprx_la, "A", "ULPRX logic analyzer");
5486
5487         if (chip_id(sc) >= CHELSIO_T5) {
5488                 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "wcwr_stats",
5489                     CTLTYPE_STRING | CTLFLAG_RD, sc, 0,
5490                     sysctl_wcwr_stats, "A", "write combined work requests");
5491         }
5492 #endif
5493
5494 #ifdef TCP_OFFLOAD
5495         if (is_offload(sc)) {
5496                 int i;
5497                 char s[4];
5498
5499                 /*
5500                  * dev.t4nex.X.toe.
5501                  */
5502                 oid = SYSCTL_ADD_NODE(ctx, c0, OID_AUTO, "toe", CTLFLAG_RD,
5503                     NULL, "TOE parameters");
5504                 children = SYSCTL_CHILDREN(oid);
5505
5506                 sc->tt.cong_algorithm = -1;
5507                 SYSCTL_ADD_INT(ctx, children, OID_AUTO, "cong_algorithm",
5508                     CTLFLAG_RW, &sc->tt.cong_algorithm, 0, "congestion control "
5509                     "(-1 = default, 0 = reno, 1 = tahoe, 2 = newreno, "
5510                     "3 = highspeed)");
5511
5512                 sc->tt.sndbuf = 256 * 1024;
5513                 SYSCTL_ADD_INT(ctx, children, OID_AUTO, "sndbuf", CTLFLAG_RW,
5514                     &sc->tt.sndbuf, 0, "max hardware send buffer size");
5515
5516                 sc->tt.ddp = 0;
5517                 SYSCTL_ADD_INT(ctx, children, OID_AUTO, "ddp", CTLFLAG_RW,
5518                     &sc->tt.ddp, 0, "DDP allowed");
5519
5520                 sc->tt.rx_coalesce = 1;
5521                 SYSCTL_ADD_INT(ctx, children, OID_AUTO, "rx_coalesce",
5522                     CTLFLAG_RW, &sc->tt.rx_coalesce, 0, "receive coalescing");
5523
5524                 sc->tt.tls = 0;
5525                 SYSCTL_ADD_INT(ctx, children, OID_AUTO, "tls", CTLFLAG_RW,
5526                     &sc->tt.tls, 0, "Inline TLS allowed");
5527
5528                 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "tls_rx_ports",
5529                     CTLTYPE_INT | CTLFLAG_RW, sc, 0, sysctl_tls_rx_ports,
5530                     "I", "TCP ports that use inline TLS+TOE RX");
5531
5532                 sc->tt.tx_align = 1;
5533                 SYSCTL_ADD_INT(ctx, children, OID_AUTO, "tx_align",
5534                     CTLFLAG_RW, &sc->tt.tx_align, 0, "chop and align payload");
5535
5536                 sc->tt.tx_zcopy = 0;
5537                 SYSCTL_ADD_INT(ctx, children, OID_AUTO, "tx_zcopy",
5538                     CTLFLAG_RW, &sc->tt.tx_zcopy, 0,
5539                     "Enable zero-copy aio_write(2)");
5540
5541                 sc->tt.cop_managed_offloading = !!t4_cop_managed_offloading;
5542                 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
5543                     "cop_managed_offloading", CTLFLAG_RW,
5544                     &sc->tt.cop_managed_offloading, 0,
5545                     "COP (Connection Offload Policy) controls all TOE offload");
5546
5547                 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "timer_tick",
5548                     CTLTYPE_STRING | CTLFLAG_RD, sc, 0, sysctl_tp_tick, "A",
5549                     "TP timer tick (us)");
5550
5551                 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "timestamp_tick",
5552                     CTLTYPE_STRING | CTLFLAG_RD, sc, 1, sysctl_tp_tick, "A",
5553                     "TCP timestamp tick (us)");
5554
5555                 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "dack_tick",
5556                     CTLTYPE_STRING | CTLFLAG_RD, sc, 2, sysctl_tp_tick, "A",
5557                     "DACK tick (us)");
5558
5559                 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "dack_timer",
5560                     CTLTYPE_UINT | CTLFLAG_RD, sc, 0, sysctl_tp_dack_timer,
5561                     "IU", "DACK timer (us)");
5562
5563                 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "rexmt_min",
5564                     CTLTYPE_ULONG | CTLFLAG_RD, sc, A_TP_RXT_MIN,
5565                     sysctl_tp_timer, "LU", "Minimum retransmit interval (us)");
5566
5567                 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "rexmt_max",
5568                     CTLTYPE_ULONG | CTLFLAG_RD, sc, A_TP_RXT_MAX,
5569                     sysctl_tp_timer, "LU", "Maximum retransmit interval (us)");
5570
5571                 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "persist_min",
5572                     CTLTYPE_ULONG | CTLFLAG_RD, sc, A_TP_PERS_MIN,
5573                     sysctl_tp_timer, "LU", "Persist timer min (us)");
5574
5575                 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "persist_max",
5576                     CTLTYPE_ULONG | CTLFLAG_RD, sc, A_TP_PERS_MAX,
5577                     sysctl_tp_timer, "LU", "Persist timer max (us)");
5578
5579                 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "keepalive_idle",
5580                     CTLTYPE_ULONG | CTLFLAG_RD, sc, A_TP_KEEP_IDLE,
5581                     sysctl_tp_timer, "LU", "Keepalive idle timer (us)");
5582
5583                 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "keepalive_interval",
5584                     CTLTYPE_ULONG | CTLFLAG_RD, sc, A_TP_KEEP_INTVL,
5585                     sysctl_tp_timer, "LU", "Keepalive interval timer (us)");
5586
5587                 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "initial_srtt",
5588                     CTLTYPE_ULONG | CTLFLAG_RD, sc, A_TP_INIT_SRTT,
5589                     sysctl_tp_timer, "LU", "Initial SRTT (us)");
5590
5591                 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "finwait2_timer",
5592                     CTLTYPE_ULONG | CTLFLAG_RD, sc, A_TP_FINWAIT2_TIMER,
5593                     sysctl_tp_timer, "LU", "FINWAIT2 timer (us)");
5594
5595                 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "syn_rexmt_count",
5596                     CTLTYPE_UINT | CTLFLAG_RD, sc, S_SYNSHIFTMAX,
5597                     sysctl_tp_shift_cnt, "IU",
5598                     "Number of SYN retransmissions before abort");
5599
5600                 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "rexmt_count",
5601                     CTLTYPE_UINT | CTLFLAG_RD, sc, S_RXTSHIFTMAXR2,
5602                     sysctl_tp_shift_cnt, "IU",
5603                     "Number of retransmissions before abort");
5604
5605                 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "keepalive_count",
5606                     CTLTYPE_UINT | CTLFLAG_RD, sc, S_KEEPALIVEMAXR2,
5607                     sysctl_tp_shift_cnt, "IU",
5608                     "Number of keepalive probes before abort");
5609
5610                 oid = SYSCTL_ADD_NODE(ctx, children, OID_AUTO, "rexmt_backoff",
5611                     CTLFLAG_RD, NULL, "TOE retransmit backoffs");
5612                 children = SYSCTL_CHILDREN(oid);
5613                 for (i = 0; i < 16; i++) {
5614                         snprintf(s, sizeof(s), "%u", i);
5615                         SYSCTL_ADD_PROC(ctx, children, OID_AUTO, s,
5616                             CTLTYPE_UINT | CTLFLAG_RD, sc, i, sysctl_tp_backoff,
5617                             "IU", "TOE retransmit backoff");
5618                 }
5619         }
5620 #endif
5621 }
5622
5623 void
5624 vi_sysctls(struct vi_info *vi)
5625 {
5626         struct sysctl_ctx_list *ctx;
5627         struct sysctl_oid *oid;
5628         struct sysctl_oid_list *children;
5629
5630         ctx = device_get_sysctl_ctx(vi->dev);
5631
5632         /*
5633          * dev.v?(cxgbe|cxl).X.
5634          */
5635         oid = device_get_sysctl_tree(vi->dev);
5636         children = SYSCTL_CHILDREN(oid);
5637
5638         SYSCTL_ADD_UINT(ctx, children, OID_AUTO, "viid", CTLFLAG_RD, NULL,
5639             vi->viid, "VI identifer");
5640         SYSCTL_ADD_INT(ctx, children, OID_AUTO, "nrxq", CTLFLAG_RD,
5641             &vi->nrxq, 0, "# of rx queues");
5642         SYSCTL_ADD_INT(ctx, children, OID_AUTO, "ntxq", CTLFLAG_RD,
5643             &vi->ntxq, 0, "# of tx queues");
5644         SYSCTL_ADD_INT(ctx, children, OID_AUTO, "first_rxq", CTLFLAG_RD,
5645             &vi->first_rxq, 0, "index of first rx queue");
5646         SYSCTL_ADD_INT(ctx, children, OID_AUTO, "first_txq", CTLFLAG_RD,
5647             &vi->first_txq, 0, "index of first tx queue");
5648         SYSCTL_ADD_UINT(ctx, children, OID_AUTO, "rss_size", CTLFLAG_RD, NULL,
5649             vi->rss_size, "size of RSS indirection table");
5650
5651         if (IS_MAIN_VI(vi)) {
5652                 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "rsrv_noflowq",
5653                     CTLTYPE_INT | CTLFLAG_RW, vi, 0, sysctl_noflowq, "IU",
5654                     "Reserve queue 0 for non-flowid packets");
5655         }
5656
5657 #ifdef TCP_OFFLOAD
5658         if (vi->nofldrxq != 0) {
5659                 SYSCTL_ADD_INT(ctx, children, OID_AUTO, "nofldrxq", CTLFLAG_RD,
5660                     &vi->nofldrxq, 0,
5661                     "# of rx queues for offloaded TCP connections");
5662                 SYSCTL_ADD_INT(ctx, children, OID_AUTO, "nofldtxq", CTLFLAG_RD,
5663                     &vi->nofldtxq, 0,
5664                     "# of tx queues for offloaded TCP connections");
5665                 SYSCTL_ADD_INT(ctx, children, OID_AUTO, "first_ofld_rxq",
5666                     CTLFLAG_RD, &vi->first_ofld_rxq, 0,
5667                     "index of first TOE rx queue");
5668                 SYSCTL_ADD_INT(ctx, children, OID_AUTO, "first_ofld_txq",
5669                     CTLFLAG_RD, &vi->first_ofld_txq, 0,
5670                     "index of first TOE tx queue");
5671                 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "holdoff_tmr_idx_ofld",
5672                     CTLTYPE_INT | CTLFLAG_RW, vi, 0,
5673                     sysctl_holdoff_tmr_idx_ofld, "I",
5674                     "holdoff timer index for TOE queues");
5675                 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "holdoff_pktc_idx_ofld",
5676                     CTLTYPE_INT | CTLFLAG_RW, vi, 0,
5677                     sysctl_holdoff_pktc_idx_ofld, "I",
5678                     "holdoff packet counter index for TOE queues");
5679         }
5680 #endif
5681 #ifdef DEV_NETMAP
5682         if (vi->nnmrxq != 0) {
5683                 SYSCTL_ADD_INT(ctx, children, OID_AUTO, "nnmrxq", CTLFLAG_RD,
5684                     &vi->nnmrxq, 0, "# of netmap rx queues");
5685                 SYSCTL_ADD_INT(ctx, children, OID_AUTO, "nnmtxq", CTLFLAG_RD,
5686                     &vi->nnmtxq, 0, "# of netmap tx queues");
5687                 SYSCTL_ADD_INT(ctx, children, OID_AUTO, "first_nm_rxq",
5688                     CTLFLAG_RD, &vi->first_nm_rxq, 0,
5689                     "index of first netmap rx queue");
5690                 SYSCTL_ADD_INT(ctx, children, OID_AUTO, "first_nm_txq",
5691                     CTLFLAG_RD, &vi->first_nm_txq, 0,
5692                     "index of first netmap tx queue");
5693         }
5694 #endif
5695
5696         SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "holdoff_tmr_idx",
5697             CTLTYPE_INT | CTLFLAG_RW, vi, 0, sysctl_holdoff_tmr_idx, "I",
5698             "holdoff timer index");
5699         SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "holdoff_pktc_idx",
5700             CTLTYPE_INT | CTLFLAG_RW, vi, 0, sysctl_holdoff_pktc_idx, "I",
5701             "holdoff packet counter index");
5702
5703         SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "qsize_rxq",
5704             CTLTYPE_INT | CTLFLAG_RW, vi, 0, sysctl_qsize_rxq, "I",
5705             "rx queue size");
5706         SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "qsize_txq",
5707             CTLTYPE_INT | CTLFLAG_RW, vi, 0, sysctl_qsize_txq, "I",
5708             "tx queue size");
5709 }
5710
5711 static void
5712 cxgbe_sysctls(struct port_info *pi)
5713 {
5714         struct sysctl_ctx_list *ctx;
5715         struct sysctl_oid *oid;
5716         struct sysctl_oid_list *children, *children2;
5717         struct adapter *sc = pi->adapter;
5718         int i;
5719         char name[16];
5720
5721         ctx = device_get_sysctl_ctx(pi->dev);
5722
5723         /*
5724          * dev.cxgbe.X.
5725          */
5726         oid = device_get_sysctl_tree(pi->dev);
5727         children = SYSCTL_CHILDREN(oid);
5728
5729         SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "linkdnrc", CTLTYPE_STRING |
5730            CTLFLAG_RD, pi, 0, sysctl_linkdnrc, "A", "reason why link is down");
5731         if (pi->port_type == FW_PORT_TYPE_BT_XAUI) {
5732                 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "temperature",
5733                     CTLTYPE_INT | CTLFLAG_RD, pi, 0, sysctl_btphy, "I",
5734                     "PHY temperature (in Celsius)");
5735                 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "fw_version",
5736                     CTLTYPE_INT | CTLFLAG_RD, pi, 1, sysctl_btphy, "I",
5737                     "PHY firmware version");
5738         }
5739
5740         SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "pause_settings",
5741             CTLTYPE_STRING | CTLFLAG_RW, pi, 0, sysctl_pause_settings, "A",
5742             "PAUSE settings (bit 0 = rx_pause, bit 1 = tx_pause)");
5743         SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "fec",
5744             CTLTYPE_STRING | CTLFLAG_RW, pi, 0, sysctl_fec, "A",
5745             "Forward Error Correction (bit 0 = RS, bit 1 = BASER_RS)");
5746         SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "autoneg",
5747             CTLTYPE_INT | CTLFLAG_RW, pi, 0, sysctl_autoneg, "I",
5748             "autonegotiation (-1 = not supported)");
5749
5750         SYSCTL_ADD_INT(ctx, children, OID_AUTO, "max_speed", CTLFLAG_RD, NULL,
5751             port_top_speed(pi), "max speed (in Gbps)");
5752         SYSCTL_ADD_INT(ctx, children, OID_AUTO, "mps_bg_map", CTLFLAG_RD, NULL,
5753             pi->mps_bg_map, "MPS buffer group map");
5754         SYSCTL_ADD_INT(ctx, children, OID_AUTO, "rx_e_chan_map", CTLFLAG_RD,
5755             NULL, pi->rx_e_chan_map, "TP rx e-channel map");
5756
5757         if (sc->flags & IS_VF)
5758                 return;
5759
5760         /*
5761          * dev.(cxgbe|cxl).X.tc.
5762          */
5763         oid = SYSCTL_ADD_NODE(ctx, children, OID_AUTO, "tc", CTLFLAG_RD, NULL,
5764             "Tx scheduler traffic classes (cl_rl)");
5765         for (i = 0; i < sc->chip_params->nsched_cls; i++) {
5766                 struct tx_cl_rl_params *tc = &pi->sched_params->cl_rl[i];
5767
5768                 snprintf(name, sizeof(name), "%d", i);
5769                 children2 = SYSCTL_CHILDREN(SYSCTL_ADD_NODE(ctx,
5770                     SYSCTL_CHILDREN(oid), OID_AUTO, name, CTLFLAG_RD, NULL,
5771                     "traffic class"));
5772                 SYSCTL_ADD_UINT(ctx, children2, OID_AUTO, "flags", CTLFLAG_RD,
5773                     &tc->flags, 0, "flags");
5774                 SYSCTL_ADD_UINT(ctx, children2, OID_AUTO, "refcount",
5775                     CTLFLAG_RD, &tc->refcount, 0, "references to this class");
5776 #ifdef SBUF_DRAIN
5777                 SYSCTL_ADD_PROC(ctx, children2, OID_AUTO, "params",
5778                     CTLTYPE_STRING | CTLFLAG_RD, sc, (pi->port_id << 16) | i,
5779                     sysctl_tc_params, "A", "traffic class parameters");
5780 #endif
5781         }
5782
5783         /*
5784          * dev.cxgbe.X.stats.
5785          */
5786         oid = SYSCTL_ADD_NODE(ctx, children, OID_AUTO, "stats", CTLFLAG_RD,
5787             NULL, "port statistics");
5788         children = SYSCTL_CHILDREN(oid);
5789         SYSCTL_ADD_UINT(ctx, children, OID_AUTO, "tx_parse_error", CTLFLAG_RD,
5790             &pi->tx_parse_error, 0,
5791             "# of tx packets with invalid length or # of segments");
5792
5793 #define SYSCTL_ADD_T4_REG64(pi, name, desc, reg) \
5794         SYSCTL_ADD_OID(ctx, children, OID_AUTO, name, \
5795             CTLTYPE_U64 | CTLFLAG_RD, sc, reg, \
5796             sysctl_handle_t4_reg64, "QU", desc)
5797
5798         SYSCTL_ADD_T4_REG64(pi, "tx_octets", "# of octets in good frames",
5799             PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_TX_PORT_BYTES_L));
5800         SYSCTL_ADD_T4_REG64(pi, "tx_frames", "total # of good frames",
5801             PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_TX_PORT_FRAMES_L));
5802         SYSCTL_ADD_T4_REG64(pi, "tx_bcast_frames", "# of broadcast frames",
5803             PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_TX_PORT_BCAST_L));
5804         SYSCTL_ADD_T4_REG64(pi, "tx_mcast_frames", "# of multicast frames",
5805             PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_TX_PORT_MCAST_L));
5806         SYSCTL_ADD_T4_REG64(pi, "tx_ucast_frames", "# of unicast frames",
5807             PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_TX_PORT_UCAST_L));
5808         SYSCTL_ADD_T4_REG64(pi, "tx_error_frames", "# of error frames",
5809             PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_TX_PORT_ERROR_L));
5810         SYSCTL_ADD_T4_REG64(pi, "tx_frames_64",
5811             "# of tx frames in this range",
5812             PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_TX_PORT_64B_L));
5813         SYSCTL_ADD_T4_REG64(pi, "tx_frames_65_127",
5814             "# of tx frames in this range",
5815             PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_TX_PORT_65B_127B_L));
5816         SYSCTL_ADD_T4_REG64(pi, "tx_frames_128_255",
5817             "# of tx frames in this range",
5818             PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_TX_PORT_128B_255B_L));
5819         SYSCTL_ADD_T4_REG64(pi, "tx_frames_256_511",
5820             "# of tx frames in this range",
5821             PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_TX_PORT_256B_511B_L));
5822         SYSCTL_ADD_T4_REG64(pi, "tx_frames_512_1023",
5823             "# of tx frames in this range",
5824             PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_TX_PORT_512B_1023B_L));
5825         SYSCTL_ADD_T4_REG64(pi, "tx_frames_1024_1518",
5826             "# of tx frames in this range",
5827             PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_TX_PORT_1024B_1518B_L));
5828         SYSCTL_ADD_T4_REG64(pi, "tx_frames_1519_max",
5829             "# of tx frames in this range",
5830             PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_TX_PORT_1519B_MAX_L));
5831         SYSCTL_ADD_T4_REG64(pi, "tx_drop", "# of dropped tx frames",
5832             PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_TX_PORT_DROP_L));
5833         SYSCTL_ADD_T4_REG64(pi, "tx_pause", "# of pause frames transmitted",
5834             PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_TX_PORT_PAUSE_L));
5835         SYSCTL_ADD_T4_REG64(pi, "tx_ppp0", "# of PPP prio 0 frames transmitted",
5836             PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_TX_PORT_PPP0_L));
5837         SYSCTL_ADD_T4_REG64(pi, "tx_ppp1", "# of PPP prio 1 frames transmitted",
5838             PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_TX_PORT_PPP1_L));
5839         SYSCTL_ADD_T4_REG64(pi, "tx_ppp2", "# of PPP prio 2 frames transmitted",
5840             PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_TX_PORT_PPP2_L));
5841         SYSCTL_ADD_T4_REG64(pi, "tx_ppp3", "# of PPP prio 3 frames transmitted",
5842             PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_TX_PORT_PPP3_L));
5843         SYSCTL_ADD_T4_REG64(pi, "tx_ppp4", "# of PPP prio 4 frames transmitted",
5844             PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_TX_PORT_PPP4_L));
5845         SYSCTL_ADD_T4_REG64(pi, "tx_ppp5", "# of PPP prio 5 frames transmitted",
5846             PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_TX_PORT_PPP5_L));
5847         SYSCTL_ADD_T4_REG64(pi, "tx_ppp6", "# of PPP prio 6 frames transmitted",
5848             PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_TX_PORT_PPP6_L));
5849         SYSCTL_ADD_T4_REG64(pi, "tx_ppp7", "# of PPP prio 7 frames transmitted",
5850             PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_TX_PORT_PPP7_L));
5851
5852         SYSCTL_ADD_T4_REG64(pi, "rx_octets", "# of octets in good frames",
5853             PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_RX_PORT_BYTES_L));
5854         SYSCTL_ADD_T4_REG64(pi, "rx_frames", "total # of good frames",
5855             PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_RX_PORT_FRAMES_L));
5856         SYSCTL_ADD_T4_REG64(pi, "rx_bcast_frames", "# of broadcast frames",
5857             PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_RX_PORT_BCAST_L));
5858         SYSCTL_ADD_T4_REG64(pi, "rx_mcast_frames", "# of multicast frames",
5859             PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_RX_PORT_MCAST_L));
5860         SYSCTL_ADD_T4_REG64(pi, "rx_ucast_frames", "# of unicast frames",
5861             PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_RX_PORT_UCAST_L));
5862         SYSCTL_ADD_T4_REG64(pi, "rx_too_long", "# of frames exceeding MTU",
5863             PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_RX_PORT_MTU_ERROR_L));
5864         SYSCTL_ADD_T4_REG64(pi, "rx_jabber", "# of jabber frames",
5865             PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_RX_PORT_MTU_CRC_ERROR_L));
5866         SYSCTL_ADD_T4_REG64(pi, "rx_fcs_err",
5867             "# of frames received with bad FCS",
5868             PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_RX_PORT_CRC_ERROR_L));
5869         SYSCTL_ADD_T4_REG64(pi, "rx_len_err",
5870             "# of frames received with length error",
5871             PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_RX_PORT_LEN_ERROR_L));
5872         SYSCTL_ADD_T4_REG64(pi, "rx_symbol_err", "symbol errors",
5873             PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_RX_PORT_SYM_ERROR_L));
5874         SYSCTL_ADD_T4_REG64(pi, "rx_runt", "# of short frames received",
5875             PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_RX_PORT_LESS_64B_L));
5876         SYSCTL_ADD_T4_REG64(pi, "rx_frames_64",
5877             "# of rx frames in this range",
5878             PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_RX_PORT_64B_L));
5879         SYSCTL_ADD_T4_REG64(pi, "rx_frames_65_127",
5880             "# of rx frames in this range",
5881             PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_RX_PORT_65B_127B_L));
5882         SYSCTL_ADD_T4_REG64(pi, "rx_frames_128_255",
5883             "# of rx frames in this range",
5884             PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_RX_PORT_128B_255B_L));
5885         SYSCTL_ADD_T4_REG64(pi, "rx_frames_256_511",
5886             "# of rx frames in this range",
5887             PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_RX_PORT_256B_511B_L));
5888         SYSCTL_ADD_T4_REG64(pi, "rx_frames_512_1023",
5889             "# of rx frames in this range",
5890             PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_RX_PORT_512B_1023B_L));
5891         SYSCTL_ADD_T4_REG64(pi, "rx_frames_1024_1518",
5892             "# of rx frames in this range",
5893             PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_RX_PORT_1024B_1518B_L));
5894         SYSCTL_ADD_T4_REG64(pi, "rx_frames_1519_max",
5895             "# of rx frames in this range",
5896             PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_RX_PORT_1519B_MAX_L));
5897         SYSCTL_ADD_T4_REG64(pi, "rx_pause", "# of pause frames received",
5898             PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_RX_PORT_PAUSE_L));
5899         SYSCTL_ADD_T4_REG64(pi, "rx_ppp0", "# of PPP prio 0 frames received",
5900             PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_RX_PORT_PPP0_L));
5901         SYSCTL_ADD_T4_REG64(pi, "rx_ppp1", "# of PPP prio 1 frames received",
5902             PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_RX_PORT_PPP1_L));
5903         SYSCTL_ADD_T4_REG64(pi, "rx_ppp2", "# of PPP prio 2 frames received",
5904             PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_RX_PORT_PPP2_L));
5905         SYSCTL_ADD_T4_REG64(pi, "rx_ppp3", "# of PPP prio 3 frames received",
5906             PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_RX_PORT_PPP3_L));
5907         SYSCTL_ADD_T4_REG64(pi, "rx_ppp4", "# of PPP prio 4 frames received",
5908             PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_RX_PORT_PPP4_L));
5909         SYSCTL_ADD_T4_REG64(pi, "rx_ppp5", "# of PPP prio 5 frames received",
5910             PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_RX_PORT_PPP5_L));
5911         SYSCTL_ADD_T4_REG64(pi, "rx_ppp6", "# of PPP prio 6 frames received",
5912             PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_RX_PORT_PPP6_L));
5913         SYSCTL_ADD_T4_REG64(pi, "rx_ppp7", "# of PPP prio 7 frames received",
5914             PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_RX_PORT_PPP7_L));
5915
5916 #undef SYSCTL_ADD_T4_REG64
5917
5918 #define SYSCTL_ADD_T4_PORTSTAT(name, desc) \
5919         SYSCTL_ADD_UQUAD(ctx, children, OID_AUTO, #name, CTLFLAG_RD, \
5920             &pi->stats.name, desc)
5921
5922         /* We get these from port_stats and they may be stale by up to 1s */
5923         SYSCTL_ADD_T4_PORTSTAT(rx_ovflow0,
5924             "# drops due to buffer-group 0 overflows");
5925         SYSCTL_ADD_T4_PORTSTAT(rx_ovflow1,
5926             "# drops due to buffer-group 1 overflows");
5927         SYSCTL_ADD_T4_PORTSTAT(rx_ovflow2,
5928             "# drops due to buffer-group 2 overflows");
5929         SYSCTL_ADD_T4_PORTSTAT(rx_ovflow3,
5930             "# drops due to buffer-group 3 overflows");
5931         SYSCTL_ADD_T4_PORTSTAT(rx_trunc0,
5932             "# of buffer-group 0 truncated packets");
5933         SYSCTL_ADD_T4_PORTSTAT(rx_trunc1,
5934             "# of buffer-group 1 truncated packets");
5935         SYSCTL_ADD_T4_PORTSTAT(rx_trunc2,
5936             "# of buffer-group 2 truncated packets");
5937         SYSCTL_ADD_T4_PORTSTAT(rx_trunc3,
5938             "# of buffer-group 3 truncated packets");
5939
5940 #undef SYSCTL_ADD_T4_PORTSTAT
5941
5942         SYSCTL_ADD_ULONG(ctx, children, OID_AUTO, "tx_tls_records",
5943             CTLFLAG_RD, &pi->tx_tls_records,
5944             "# of TLS records transmitted");
5945         SYSCTL_ADD_ULONG(ctx, children, OID_AUTO, "tx_tls_octets",
5946             CTLFLAG_RD, &pi->tx_tls_octets,
5947             "# of payload octets in transmitted TLS records");
5948         SYSCTL_ADD_ULONG(ctx, children, OID_AUTO, "rx_tls_records",
5949             CTLFLAG_RD, &pi->rx_tls_records,
5950             "# of TLS records received");
5951         SYSCTL_ADD_ULONG(ctx, children, OID_AUTO, "rx_tls_octets",
5952             CTLFLAG_RD, &pi->rx_tls_octets,
5953             "# of payload octets in received TLS records");
5954 }
5955
5956 static int
5957 sysctl_int_array(SYSCTL_HANDLER_ARGS)
5958 {
5959         int rc, *i, space = 0;
5960         struct sbuf sb;
5961
5962         sbuf_new_for_sysctl(&sb, NULL, 64, req);
5963         for (i = arg1; arg2; arg2 -= sizeof(int), i++) {
5964                 if (space)
5965                         sbuf_printf(&sb, " ");
5966                 sbuf_printf(&sb, "%d", *i);
5967                 space = 1;
5968         }
5969         rc = sbuf_finish(&sb);
5970         sbuf_delete(&sb);
5971         return (rc);
5972 }
5973
5974 static int
5975 sysctl_bitfield(SYSCTL_HANDLER_ARGS)
5976 {
5977         int rc;
5978         struct sbuf *sb;
5979
5980         rc = sysctl_wire_old_buffer(req, 0);
5981         if (rc != 0)
5982                 return(rc);
5983
5984         sb = sbuf_new_for_sysctl(NULL, NULL, 128, req);
5985         if (sb == NULL)
5986                 return (ENOMEM);
5987
5988         sbuf_printf(sb, "%b", (int)arg2, (char *)arg1);
5989         rc = sbuf_finish(sb);
5990         sbuf_delete(sb);
5991
5992         return (rc);
5993 }
5994
5995 static int
5996 sysctl_btphy(SYSCTL_HANDLER_ARGS)
5997 {
5998         struct port_info *pi = arg1;
5999         int op = arg2;
6000         struct adapter *sc = pi->adapter;
6001         u_int v;
6002         int rc;
6003
6004         rc = begin_synchronized_op(sc, &pi->vi[0], SLEEP_OK | INTR_OK, "t4btt");
6005         if (rc)
6006                 return (rc);
6007         /* XXX: magic numbers */
6008         rc = -t4_mdio_rd(sc, sc->mbox, pi->mdio_addr, 0x1e, op ? 0x20 : 0xc820,
6009             &v);
6010         end_synchronized_op(sc, 0);
6011         if (rc)
6012                 return (rc);
6013         if (op == 0)
6014                 v /= 256;
6015
6016         rc = sysctl_handle_int(oidp, &v, 0, req);
6017         return (rc);
6018 }
6019
6020 static int
6021 sysctl_noflowq(SYSCTL_HANDLER_ARGS)
6022 {
6023         struct vi_info *vi = arg1;
6024         int rc, val;
6025
6026         val = vi->rsrv_noflowq;
6027         rc = sysctl_handle_int(oidp, &val, 0, req);
6028         if (rc != 0 || req->newptr == NULL)
6029                 return (rc);
6030
6031         if ((val >= 1) && (vi->ntxq > 1))
6032                 vi->rsrv_noflowq = 1;
6033         else
6034                 vi->rsrv_noflowq = 0;
6035
6036         return (rc);
6037 }
6038
6039 static int
6040 sysctl_holdoff_tmr_idx(SYSCTL_HANDLER_ARGS)
6041 {
6042         struct vi_info *vi = arg1;
6043         struct adapter *sc = vi->pi->adapter;
6044         int idx, rc, i;
6045         struct sge_rxq *rxq;
6046         uint8_t v;
6047
6048         idx = vi->tmr_idx;
6049
6050         rc = sysctl_handle_int(oidp, &idx, 0, req);
6051         if (rc != 0 || req->newptr == NULL)
6052                 return (rc);
6053
6054         if (idx < 0 || idx >= SGE_NTIMERS)
6055                 return (EINVAL);
6056
6057         rc = begin_synchronized_op(sc, vi, HOLD_LOCK | SLEEP_OK | INTR_OK,
6058             "t4tmr");
6059         if (rc)
6060                 return (rc);
6061
6062         v = V_QINTR_TIMER_IDX(idx) | V_QINTR_CNT_EN(vi->pktc_idx != -1);
6063         for_each_rxq(vi, i, rxq) {
6064 #ifdef atomic_store_rel_8
6065                 atomic_store_rel_8(&rxq->iq.intr_params, v);
6066 #else
6067                 rxq->iq.intr_params = v;
6068 #endif
6069         }
6070         vi->tmr_idx = idx;
6071
6072         end_synchronized_op(sc, LOCK_HELD);
6073         return (0);
6074 }
6075
6076 static int
6077 sysctl_holdoff_pktc_idx(SYSCTL_HANDLER_ARGS)
6078 {
6079         struct vi_info *vi = arg1;
6080         struct adapter *sc = vi->pi->adapter;
6081         int idx, rc;
6082
6083         idx = vi->pktc_idx;
6084
6085         rc = sysctl_handle_int(oidp, &idx, 0, req);
6086         if (rc != 0 || req->newptr == NULL)
6087                 return (rc);
6088
6089         if (idx < -1 || idx >= SGE_NCOUNTERS)
6090                 return (EINVAL);
6091
6092         rc = begin_synchronized_op(sc, vi, HOLD_LOCK | SLEEP_OK | INTR_OK,
6093             "t4pktc");
6094         if (rc)
6095                 return (rc);
6096
6097         if (vi->flags & VI_INIT_DONE)
6098                 rc = EBUSY; /* cannot be changed once the queues are created */
6099         else
6100                 vi->pktc_idx = idx;
6101
6102         end_synchronized_op(sc, LOCK_HELD);
6103         return (rc);
6104 }
6105
6106 static int
6107 sysctl_qsize_rxq(SYSCTL_HANDLER_ARGS)
6108 {
6109         struct vi_info *vi = arg1;
6110         struct adapter *sc = vi->pi->adapter;
6111         int qsize, rc;
6112
6113         qsize = vi->qsize_rxq;
6114
6115         rc = sysctl_handle_int(oidp, &qsize, 0, req);
6116         if (rc != 0 || req->newptr == NULL)
6117                 return (rc);
6118
6119         if (qsize < 128 || (qsize & 7))
6120                 return (EINVAL);
6121
6122         rc = begin_synchronized_op(sc, vi, HOLD_LOCK | SLEEP_OK | INTR_OK,
6123             "t4rxqs");
6124         if (rc)
6125                 return (rc);
6126
6127         if (vi->flags & VI_INIT_DONE)
6128                 rc = EBUSY; /* cannot be changed once the queues are created */
6129         else
6130                 vi->qsize_rxq = qsize;
6131
6132         end_synchronized_op(sc, LOCK_HELD);
6133         return (rc);
6134 }
6135
6136 static int
6137 sysctl_qsize_txq(SYSCTL_HANDLER_ARGS)
6138 {
6139         struct vi_info *vi = arg1;
6140         struct adapter *sc = vi->pi->adapter;
6141         int qsize, rc;
6142
6143         qsize = vi->qsize_txq;
6144
6145         rc = sysctl_handle_int(oidp, &qsize, 0, req);
6146         if (rc != 0 || req->newptr == NULL)
6147                 return (rc);
6148
6149         if (qsize < 128 || qsize > 65536)
6150                 return (EINVAL);
6151
6152         rc = begin_synchronized_op(sc, vi, HOLD_LOCK | SLEEP_OK | INTR_OK,
6153             "t4txqs");
6154         if (rc)
6155                 return (rc);
6156
6157         if (vi->flags & VI_INIT_DONE)
6158                 rc = EBUSY; /* cannot be changed once the queues are created */
6159         else
6160                 vi->qsize_txq = qsize;
6161
6162         end_synchronized_op(sc, LOCK_HELD);
6163         return (rc);
6164 }
6165
6166 static int
6167 sysctl_pause_settings(SYSCTL_HANDLER_ARGS)
6168 {
6169         struct port_info *pi = arg1;
6170         struct adapter *sc = pi->adapter;
6171         struct link_config *lc = &pi->link_cfg;
6172         int rc;
6173
6174         if (req->newptr == NULL) {
6175                 struct sbuf *sb;
6176                 static char *bits = "\20\1PAUSE_RX\2PAUSE_TX";
6177
6178                 rc = sysctl_wire_old_buffer(req, 0);
6179                 if (rc != 0)
6180                         return(rc);
6181
6182                 sb = sbuf_new_for_sysctl(NULL, NULL, 128, req);
6183                 if (sb == NULL)
6184                         return (ENOMEM);
6185
6186                 sbuf_printf(sb, "%b", lc->fc & (PAUSE_TX | PAUSE_RX), bits);
6187                 rc = sbuf_finish(sb);
6188                 sbuf_delete(sb);
6189         } else {
6190                 char s[2];
6191                 int n;
6192
6193                 s[0] = '0' + (lc->requested_fc & (PAUSE_TX | PAUSE_RX));
6194                 s[1] = 0;
6195
6196                 rc = sysctl_handle_string(oidp, s, sizeof(s), req);
6197                 if (rc != 0)
6198                         return(rc);
6199
6200                 if (s[1] != 0)
6201                         return (EINVAL);
6202                 if (s[0] < '0' || s[0] > '9')
6203                         return (EINVAL);        /* not a number */
6204                 n = s[0] - '0';
6205                 if (n & ~(PAUSE_TX | PAUSE_RX))
6206                         return (EINVAL);        /* some other bit is set too */
6207
6208                 rc = begin_synchronized_op(sc, &pi->vi[0], SLEEP_OK | INTR_OK,
6209                     "t4PAUSE");
6210                 if (rc)
6211                         return (rc);
6212                 if ((lc->requested_fc & (PAUSE_TX | PAUSE_RX)) != n) {
6213                         lc->requested_fc &= ~(PAUSE_TX | PAUSE_RX);
6214                         lc->requested_fc |= n;
6215                         rc = -t4_link_l1cfg(sc, sc->mbox, pi->tx_chan, lc);
6216                         if (rc == 0) {
6217                                 lc->fc = lc->requested_fc;
6218                         }
6219                 }
6220                 end_synchronized_op(sc, 0);
6221         }
6222
6223         return (rc);
6224 }
6225
6226 static int
6227 sysctl_fec(SYSCTL_HANDLER_ARGS)
6228 {
6229         struct port_info *pi = arg1;
6230         struct adapter *sc = pi->adapter;
6231         struct link_config *lc = &pi->link_cfg;
6232         int rc;
6233
6234         if (req->newptr == NULL) {
6235                 struct sbuf *sb;
6236                 static char *bits = "\20\1RS\2BASER_RS\3RESERVED";
6237
6238                 rc = sysctl_wire_old_buffer(req, 0);
6239                 if (rc != 0)
6240                         return(rc);
6241
6242                 sb = sbuf_new_for_sysctl(NULL, NULL, 128, req);
6243                 if (sb == NULL)
6244                         return (ENOMEM);
6245
6246                 sbuf_printf(sb, "%b", lc->fec & M_FW_PORT_CAP_FEC, bits);
6247                 rc = sbuf_finish(sb);
6248                 sbuf_delete(sb);
6249         } else {
6250                 char s[2];
6251                 int n;
6252
6253                 s[0] = '0' + (lc->requested_fec & M_FW_PORT_CAP_FEC);
6254                 s[1] = 0;
6255
6256                 rc = sysctl_handle_string(oidp, s, sizeof(s), req);
6257                 if (rc != 0)
6258                         return(rc);
6259
6260                 if (s[1] != 0)
6261                         return (EINVAL);
6262                 if (s[0] < '0' || s[0] > '9')
6263                         return (EINVAL);        /* not a number */
6264                 n = s[0] - '0';
6265                 if (n & ~M_FW_PORT_CAP_FEC)
6266                         return (EINVAL);        /* some other bit is set too */
6267
6268                 rc = begin_synchronized_op(sc, &pi->vi[0], SLEEP_OK | INTR_OK,
6269                     "t4fec");
6270                 if (rc)
6271                         return (rc);
6272                 if ((lc->requested_fec & M_FW_PORT_CAP_FEC) != n) {
6273                         lc->requested_fec = n &
6274                             G_FW_PORT_CAP_FEC(lc->supported);
6275                         rc = -t4_link_l1cfg(sc, sc->mbox, pi->tx_chan, lc);
6276                         if (rc == 0) {
6277                                 lc->fec = lc->requested_fec;
6278                         }
6279                 }
6280                 end_synchronized_op(sc, 0);
6281         }
6282
6283         return (rc);
6284 }
6285
6286 static int
6287 sysctl_autoneg(SYSCTL_HANDLER_ARGS)
6288 {
6289         struct port_info *pi = arg1;
6290         struct adapter *sc = pi->adapter;
6291         struct link_config *lc = &pi->link_cfg;
6292         int rc, val, old;
6293
6294         if (lc->supported & FW_PORT_CAP_ANEG)
6295                 val = lc->requested_aneg == AUTONEG_ENABLE ? 1 : 0;
6296         else
6297                 val = -1;
6298         rc = sysctl_handle_int(oidp, &val, 0, req);
6299         if (rc != 0 || req->newptr == NULL)
6300                 return (rc);
6301         if ((lc->supported & FW_PORT_CAP_ANEG) == 0)
6302                 return (ENOTSUP);
6303
6304         if (val == 0)
6305                 val = AUTONEG_DISABLE;
6306         else if (val == 1)
6307                 val = AUTONEG_ENABLE;
6308         else
6309                 return (EINVAL);
6310         if (lc->requested_aneg == val)
6311                 return (0);     /* no change */
6312
6313         rc = begin_synchronized_op(sc, &pi->vi[0], SLEEP_OK | INTR_OK,
6314             "t4aneg");
6315         if (rc)
6316                 return (rc);
6317         old = lc->requested_aneg;
6318         lc->requested_aneg = val;
6319         rc = -t4_link_l1cfg(sc, sc->mbox, pi->tx_chan, lc);
6320         if (rc != 0)
6321                 lc->requested_aneg = old;
6322         end_synchronized_op(sc, 0);
6323         return (rc);
6324 }
6325
6326 static int
6327 sysctl_handle_t4_reg64(SYSCTL_HANDLER_ARGS)
6328 {
6329         struct adapter *sc = arg1;
6330         int reg = arg2;
6331         uint64_t val;
6332
6333         val = t4_read_reg64(sc, reg);
6334
6335         return (sysctl_handle_64(oidp, &val, 0, req));
6336 }
6337
6338 static int
6339 sysctl_temperature(SYSCTL_HANDLER_ARGS)
6340 {
6341         struct adapter *sc = arg1;
6342         int rc, t;
6343         uint32_t param, val;
6344
6345         rc = begin_synchronized_op(sc, NULL, SLEEP_OK | INTR_OK, "t4temp");
6346         if (rc)
6347                 return (rc);
6348         param = V_FW_PARAMS_MNEM(FW_PARAMS_MNEM_DEV) |
6349             V_FW_PARAMS_PARAM_X(FW_PARAMS_PARAM_DEV_DIAG) |
6350             V_FW_PARAMS_PARAM_Y(FW_PARAM_DEV_DIAG_TMP);
6351         rc = -t4_query_params(sc, sc->mbox, sc->pf, 0, 1, &param, &val);
6352         end_synchronized_op(sc, 0);
6353         if (rc)
6354                 return (rc);
6355
6356         /* unknown is returned as 0 but we display -1 in that case */
6357         t = val == 0 ? -1 : val;
6358
6359         rc = sysctl_handle_int(oidp, &t, 0, req);
6360         return (rc);
6361 }
6362
6363 #ifdef SBUF_DRAIN
6364 static int
6365 sysctl_cctrl(SYSCTL_HANDLER_ARGS)
6366 {
6367         struct adapter *sc = arg1;
6368         struct sbuf *sb;
6369         int rc, i;
6370         uint16_t incr[NMTUS][NCCTRL_WIN];
6371         static const char *dec_fac[] = {
6372                 "0.5", "0.5625", "0.625", "0.6875", "0.75", "0.8125", "0.875",
6373                 "0.9375"
6374         };
6375
6376         rc = sysctl_wire_old_buffer(req, 0);
6377         if (rc != 0)
6378                 return (rc);
6379
6380         sb = sbuf_new_for_sysctl(NULL, NULL, 4096, req);
6381         if (sb == NULL)
6382                 return (ENOMEM);
6383
6384         t4_read_cong_tbl(sc, incr);
6385
6386         for (i = 0; i < NCCTRL_WIN; ++i) {
6387                 sbuf_printf(sb, "%2d: %4u %4u %4u %4u %4u %4u %4u %4u\n", i,
6388                     incr[0][i], incr[1][i], incr[2][i], incr[3][i], incr[4][i],
6389                     incr[5][i], incr[6][i], incr[7][i]);
6390                 sbuf_printf(sb, "%8u %4u %4u %4u %4u %4u %4u %4u %5u %s\n",
6391                     incr[8][i], incr[9][i], incr[10][i], incr[11][i],
6392                     incr[12][i], incr[13][i], incr[14][i], incr[15][i],
6393                     sc->params.a_wnd[i], dec_fac[sc->params.b_wnd[i]]);
6394         }
6395
6396         rc = sbuf_finish(sb);
6397         sbuf_delete(sb);
6398
6399         return (rc);
6400 }
6401
6402 static const char *qname[CIM_NUM_IBQ + CIM_NUM_OBQ_T5] = {
6403         "TP0", "TP1", "ULP", "SGE0", "SGE1", "NC-SI",   /* ibq's */
6404         "ULP0", "ULP1", "ULP2", "ULP3", "SGE", "NC-SI", /* obq's */
6405         "SGE0-RX", "SGE1-RX"    /* additional obq's (T5 onwards) */
6406 };
6407
6408 static int
6409 sysctl_cim_ibq_obq(SYSCTL_HANDLER_ARGS)
6410 {
6411         struct adapter *sc = arg1;
6412         struct sbuf *sb;
6413         int rc, i, n, qid = arg2;
6414         uint32_t *buf, *p;
6415         char *qtype;
6416         u_int cim_num_obq = sc->chip_params->cim_num_obq;
6417
6418         KASSERT(qid >= 0 && qid < CIM_NUM_IBQ + cim_num_obq,
6419             ("%s: bad qid %d\n", __func__, qid));
6420
6421         if (qid < CIM_NUM_IBQ) {
6422                 /* inbound queue */
6423                 qtype = "IBQ";
6424                 n = 4 * CIM_IBQ_SIZE;
6425                 buf = malloc(n * sizeof(uint32_t), M_CXGBE, M_ZERO | M_WAITOK);
6426                 rc = t4_read_cim_ibq(sc, qid, buf, n);
6427         } else {
6428                 /* outbound queue */
6429                 qtype = "OBQ";
6430                 qid -= CIM_NUM_IBQ;
6431                 n = 4 * cim_num_obq * CIM_OBQ_SIZE;
6432                 buf = malloc(n * sizeof(uint32_t), M_CXGBE, M_ZERO | M_WAITOK);
6433                 rc = t4_read_cim_obq(sc, qid, buf, n);
6434         }
6435
6436         if (rc < 0) {
6437                 rc = -rc;
6438                 goto done;
6439         }
6440         n = rc * sizeof(uint32_t);      /* rc has # of words actually read */
6441
6442         rc = sysctl_wire_old_buffer(req, 0);
6443         if (rc != 0)
6444                 goto done;
6445
6446         sb = sbuf_new_for_sysctl(NULL, NULL, PAGE_SIZE, req);
6447         if (sb == NULL) {
6448                 rc = ENOMEM;
6449                 goto done;
6450         }
6451
6452         sbuf_printf(sb, "%s%d %s", qtype , qid, qname[arg2]);
6453         for (i = 0, p = buf; i < n; i += 16, p += 4)
6454                 sbuf_printf(sb, "\n%#06x: %08x %08x %08x %08x", i, p[0], p[1],
6455                     p[2], p[3]);
6456
6457         rc = sbuf_finish(sb);
6458         sbuf_delete(sb);
6459 done:
6460         free(buf, M_CXGBE);
6461         return (rc);
6462 }
6463
6464 static int
6465 sysctl_cim_la(SYSCTL_HANDLER_ARGS)
6466 {
6467         struct adapter *sc = arg1;
6468         u_int cfg;
6469         struct sbuf *sb;
6470         uint32_t *buf, *p;
6471         int rc;
6472
6473         MPASS(chip_id(sc) <= CHELSIO_T5);
6474
6475         rc = -t4_cim_read(sc, A_UP_UP_DBG_LA_CFG, 1, &cfg);
6476         if (rc != 0)
6477                 return (rc);
6478
6479         rc = sysctl_wire_old_buffer(req, 0);
6480         if (rc != 0)
6481                 return (rc);
6482
6483         sb = sbuf_new_for_sysctl(NULL, NULL, 4096, req);
6484         if (sb == NULL)
6485                 return (ENOMEM);
6486
6487         buf = malloc(sc->params.cim_la_size * sizeof(uint32_t), M_CXGBE,
6488             M_ZERO | M_WAITOK);
6489
6490         rc = -t4_cim_read_la(sc, buf, NULL);
6491         if (rc != 0)
6492                 goto done;
6493
6494         sbuf_printf(sb, "Status   Data      PC%s",
6495             cfg & F_UPDBGLACAPTPCONLY ? "" :
6496             "     LS0Stat  LS0Addr             LS0Data");
6497
6498         for (p = buf; p <= &buf[sc->params.cim_la_size - 8]; p += 8) {
6499                 if (cfg & F_UPDBGLACAPTPCONLY) {
6500                         sbuf_printf(sb, "\n  %02x   %08x %08x", p[5] & 0xff,
6501                             p[6], p[7]);
6502                         sbuf_printf(sb, "\n  %02x   %02x%06x %02x%06x",
6503                             (p[3] >> 8) & 0xff, p[3] & 0xff, p[4] >> 8,
6504                             p[4] & 0xff, p[5] >> 8);
6505                         sbuf_printf(sb, "\n  %02x   %x%07x %x%07x",
6506                             (p[0] >> 4) & 0xff, p[0] & 0xf, p[1] >> 4,
6507                             p[1] & 0xf, p[2] >> 4);
6508                 } else {
6509                         sbuf_printf(sb,
6510                             "\n  %02x   %x%07x %x%07x %08x %08x "
6511                             "%08x%08x%08x%08x",
6512                             (p[0] >> 4) & 0xff, p[0] & 0xf, p[1] >> 4,
6513                             p[1] & 0xf, p[2] >> 4, p[2] & 0xf, p[3], p[4], p[5],
6514                             p[6], p[7]);
6515                 }
6516         }
6517
6518         rc = sbuf_finish(sb);
6519         sbuf_delete(sb);
6520 done:
6521         free(buf, M_CXGBE);
6522         return (rc);
6523 }
6524
6525 static int
6526 sysctl_cim_la_t6(SYSCTL_HANDLER_ARGS)
6527 {
6528         struct adapter *sc = arg1;
6529         u_int cfg;
6530         struct sbuf *sb;
6531         uint32_t *buf, *p;
6532         int rc;
6533
6534         MPASS(chip_id(sc) > CHELSIO_T5);
6535
6536         rc = -t4_cim_read(sc, A_UP_UP_DBG_LA_CFG, 1, &cfg);
6537         if (rc != 0)
6538                 return (rc);
6539
6540         rc = sysctl_wire_old_buffer(req, 0);
6541         if (rc != 0)
6542                 return (rc);
6543
6544         sb = sbuf_new_for_sysctl(NULL, NULL, 4096, req);
6545         if (sb == NULL)
6546                 return (ENOMEM);
6547
6548         buf = malloc(sc->params.cim_la_size * sizeof(uint32_t), M_CXGBE,
6549             M_ZERO | M_WAITOK);
6550
6551         rc = -t4_cim_read_la(sc, buf, NULL);
6552         if (rc != 0)
6553                 goto done;
6554
6555         sbuf_printf(sb, "Status   Inst    Data      PC%s",
6556             cfg & F_UPDBGLACAPTPCONLY ? "" :
6557             "     LS0Stat  LS0Addr  LS0Data  LS1Stat  LS1Addr  LS1Data");
6558
6559         for (p = buf; p <= &buf[sc->params.cim_la_size - 10]; p += 10) {
6560                 if (cfg & F_UPDBGLACAPTPCONLY) {
6561                         sbuf_printf(sb, "\n  %02x   %08x %08x %08x",
6562                             p[3] & 0xff, p[2], p[1], p[0]);
6563                         sbuf_printf(sb, "\n  %02x   %02x%06x %02x%06x %02x%06x",
6564                             (p[6] >> 8) & 0xff, p[6] & 0xff, p[5] >> 8,
6565                             p[5] & 0xff, p[4] >> 8, p[4] & 0xff, p[3] >> 8);
6566                         sbuf_printf(sb, "\n  %02x   %04x%04x %04x%04x %04x%04x",
6567                             (p[9] >> 16) & 0xff, p[9] & 0xffff, p[8] >> 16,
6568                             p[8] & 0xffff, p[7] >> 16, p[7] & 0xffff,
6569                             p[6] >> 16);
6570                 } else {
6571                         sbuf_printf(sb, "\n  %02x   %04x%04x %04x%04x %04x%04x "
6572                             "%08x %08x %08x %08x %08x %08x",
6573                             (p[9] >> 16) & 0xff,
6574                             p[9] & 0xffff, p[8] >> 16,
6575                             p[8] & 0xffff, p[7] >> 16,
6576                             p[7] & 0xffff, p[6] >> 16,
6577                             p[2], p[1], p[0], p[5], p[4], p[3]);
6578                 }
6579         }
6580
6581         rc = sbuf_finish(sb);
6582         sbuf_delete(sb);
6583 done:
6584         free(buf, M_CXGBE);
6585         return (rc);
6586 }
6587
6588 static int
6589 sysctl_cim_ma_la(SYSCTL_HANDLER_ARGS)
6590 {
6591         struct adapter *sc = arg1;
6592         u_int i;
6593         struct sbuf *sb;
6594         uint32_t *buf, *p;
6595         int rc;
6596
6597         rc = sysctl_wire_old_buffer(req, 0);
6598         if (rc != 0)
6599                 return (rc);
6600
6601         sb = sbuf_new_for_sysctl(NULL, NULL, 4096, req);
6602         if (sb == NULL)
6603                 return (ENOMEM);
6604
6605         buf = malloc(2 * CIM_MALA_SIZE * 5 * sizeof(uint32_t), M_CXGBE,
6606             M_ZERO | M_WAITOK);
6607
6608         t4_cim_read_ma_la(sc, buf, buf + 5 * CIM_MALA_SIZE);
6609         p = buf;
6610
6611         for (i = 0; i < CIM_MALA_SIZE; i++, p += 5) {
6612                 sbuf_printf(sb, "\n%02x%08x%08x%08x%08x", p[4], p[3], p[2],
6613                     p[1], p[0]);
6614         }
6615
6616         sbuf_printf(sb, "\n\nCnt ID Tag UE       Data       RDY VLD");
6617         for (i = 0; i < CIM_MALA_SIZE; i++, p += 5) {
6618                 sbuf_printf(sb, "\n%3u %2u  %x   %u %08x%08x  %u   %u",
6619                     (p[2] >> 10) & 0xff, (p[2] >> 7) & 7,
6620                     (p[2] >> 3) & 0xf, (p[2] >> 2) & 1,
6621                     (p[1] >> 2) | ((p[2] & 3) << 30),
6622                     (p[0] >> 2) | ((p[1] & 3) << 30), (p[0] >> 1) & 1,
6623                     p[0] & 1);
6624         }
6625
6626         rc = sbuf_finish(sb);
6627         sbuf_delete(sb);
6628         free(buf, M_CXGBE);
6629         return (rc);
6630 }
6631
6632 static int
6633 sysctl_cim_pif_la(SYSCTL_HANDLER_ARGS)
6634 {
6635         struct adapter *sc = arg1;
6636         u_int i;
6637         struct sbuf *sb;
6638         uint32_t *buf, *p;
6639         int rc;
6640
6641         rc = sysctl_wire_old_buffer(req, 0);
6642         if (rc != 0)
6643                 return (rc);
6644
6645         sb = sbuf_new_for_sysctl(NULL, NULL, 4096, req);
6646         if (sb == NULL)
6647                 return (ENOMEM);
6648
6649         buf = malloc(2 * CIM_PIFLA_SIZE * 6 * sizeof(uint32_t), M_CXGBE,
6650             M_ZERO | M_WAITOK);
6651
6652         t4_cim_read_pif_la(sc, buf, buf + 6 * CIM_PIFLA_SIZE, NULL, NULL);
6653         p = buf;
6654
6655         sbuf_printf(sb, "Cntl ID DataBE   Addr                 Data");
6656         for (i = 0; i < CIM_PIFLA_SIZE; i++, p += 6) {
6657                 sbuf_printf(sb, "\n %02x  %02x  %04x  %08x %08x%08x%08x%08x",
6658                     (p[5] >> 22) & 0xff, (p[5] >> 16) & 0x3f, p[5] & 0xffff,
6659                     p[4], p[3], p[2], p[1], p[0]);
6660         }
6661
6662         sbuf_printf(sb, "\n\nCntl ID               Data");
6663         for (i = 0; i < CIM_PIFLA_SIZE; i++, p += 6) {
6664                 sbuf_printf(sb, "\n %02x  %02x %08x%08x%08x%08x",
6665                     (p[4] >> 6) & 0xff, p[4] & 0x3f, p[3], p[2], p[1], p[0]);
6666         }
6667
6668         rc = sbuf_finish(sb);
6669         sbuf_delete(sb);
6670         free(buf, M_CXGBE);
6671         return (rc);
6672 }
6673
6674 static int
6675 sysctl_cim_qcfg(SYSCTL_HANDLER_ARGS)
6676 {
6677         struct adapter *sc = arg1;
6678         struct sbuf *sb;
6679         int rc, i;
6680         uint16_t base[CIM_NUM_IBQ + CIM_NUM_OBQ_T5];
6681         uint16_t size[CIM_NUM_IBQ + CIM_NUM_OBQ_T5];
6682         uint16_t thres[CIM_NUM_IBQ];
6683         uint32_t obq_wr[2 * CIM_NUM_OBQ_T5], *wr = obq_wr;
6684         uint32_t stat[4 * (CIM_NUM_IBQ + CIM_NUM_OBQ_T5)], *p = stat;
6685         u_int cim_num_obq, ibq_rdaddr, obq_rdaddr, nq;
6686
6687         cim_num_obq = sc->chip_params->cim_num_obq;
6688         if (is_t4(sc)) {
6689                 ibq_rdaddr = A_UP_IBQ_0_RDADDR;
6690                 obq_rdaddr = A_UP_OBQ_0_REALADDR;
6691         } else {
6692                 ibq_rdaddr = A_UP_IBQ_0_SHADOW_RDADDR;
6693                 obq_rdaddr = A_UP_OBQ_0_SHADOW_REALADDR;
6694         }
6695         nq = CIM_NUM_IBQ + cim_num_obq;
6696
6697         rc = -t4_cim_read(sc, ibq_rdaddr, 4 * nq, stat);
6698         if (rc == 0)
6699                 rc = -t4_cim_read(sc, obq_rdaddr, 2 * cim_num_obq, obq_wr);
6700         if (rc != 0)
6701                 return (rc);
6702
6703         t4_read_cimq_cfg(sc, base, size, thres);
6704
6705         rc = sysctl_wire_old_buffer(req, 0);
6706         if (rc != 0)
6707                 return (rc);
6708
6709         sb = sbuf_new_for_sysctl(NULL, NULL, PAGE_SIZE, req);
6710         if (sb == NULL)
6711                 return (ENOMEM);
6712
6713         sbuf_printf(sb,
6714             "  Queue  Base  Size Thres  RdPtr WrPtr  SOP  EOP Avail");
6715
6716         for (i = 0; i < CIM_NUM_IBQ; i++, p += 4)
6717                 sbuf_printf(sb, "\n%7s %5x %5u %5u %6x  %4x %4u %4u %5u",
6718                     qname[i], base[i], size[i], thres[i], G_IBQRDADDR(p[0]),
6719                     G_IBQWRADDR(p[1]), G_QUESOPCNT(p[3]), G_QUEEOPCNT(p[3]),
6720                     G_QUEREMFLITS(p[2]) * 16);
6721         for ( ; i < nq; i++, p += 4, wr += 2)
6722                 sbuf_printf(sb, "\n%7s %5x %5u %12x  %4x %4u %4u %5u", qname[i],
6723                     base[i], size[i], G_QUERDADDR(p[0]) & 0x3fff,
6724                     wr[0] - base[i], G_QUESOPCNT(p[3]), G_QUEEOPCNT(p[3]),
6725                     G_QUEREMFLITS(p[2]) * 16);
6726
6727         rc = sbuf_finish(sb);
6728         sbuf_delete(sb);
6729
6730         return (rc);
6731 }
6732
6733 static int
6734 sysctl_cpl_stats(SYSCTL_HANDLER_ARGS)
6735 {
6736         struct adapter *sc = arg1;
6737         struct sbuf *sb;
6738         int rc;
6739         struct tp_cpl_stats stats;
6740
6741         rc = sysctl_wire_old_buffer(req, 0);
6742         if (rc != 0)
6743                 return (rc);
6744
6745         sb = sbuf_new_for_sysctl(NULL, NULL, 256, req);
6746         if (sb == NULL)
6747                 return (ENOMEM);
6748
6749         mtx_lock(&sc->reg_lock);
6750         t4_tp_get_cpl_stats(sc, &stats, 0);
6751         mtx_unlock(&sc->reg_lock);
6752
6753         if (sc->chip_params->nchan > 2) {
6754                 sbuf_printf(sb, "                 channel 0  channel 1"
6755                     "  channel 2  channel 3");
6756                 sbuf_printf(sb, "\nCPL requests:   %10u %10u %10u %10u",
6757                     stats.req[0], stats.req[1], stats.req[2], stats.req[3]);
6758                 sbuf_printf(sb, "\nCPL responses:   %10u %10u %10u %10u",
6759                     stats.rsp[0], stats.rsp[1], stats.rsp[2], stats.rsp[3]);
6760         } else {
6761                 sbuf_printf(sb, "                 channel 0  channel 1");
6762                 sbuf_printf(sb, "\nCPL requests:   %10u %10u",
6763                     stats.req[0], stats.req[1]);
6764                 sbuf_printf(sb, "\nCPL responses:   %10u %10u",
6765                     stats.rsp[0], stats.rsp[1]);
6766         }
6767
6768         rc = sbuf_finish(sb);
6769         sbuf_delete(sb);
6770
6771         return (rc);
6772 }
6773
6774 static int
6775 sysctl_ddp_stats(SYSCTL_HANDLER_ARGS)
6776 {
6777         struct adapter *sc = arg1;
6778         struct sbuf *sb;
6779         int rc;
6780         struct tp_usm_stats stats;
6781
6782         rc = sysctl_wire_old_buffer(req, 0);
6783         if (rc != 0)
6784                 return(rc);
6785
6786         sb = sbuf_new_for_sysctl(NULL, NULL, 256, req);
6787         if (sb == NULL)
6788                 return (ENOMEM);
6789
6790         t4_get_usm_stats(sc, &stats, 1);
6791
6792         sbuf_printf(sb, "Frames: %u\n", stats.frames);
6793         sbuf_printf(sb, "Octets: %ju\n", stats.octets);
6794         sbuf_printf(sb, "Drops:  %u", stats.drops);
6795
6796         rc = sbuf_finish(sb);
6797         sbuf_delete(sb);
6798
6799         return (rc);
6800 }
6801
6802 static const char * const devlog_level_strings[] = {
6803         [FW_DEVLOG_LEVEL_EMERG]         = "EMERG",
6804         [FW_DEVLOG_LEVEL_CRIT]          = "CRIT",
6805         [FW_DEVLOG_LEVEL_ERR]           = "ERR",
6806         [FW_DEVLOG_LEVEL_NOTICE]        = "NOTICE",
6807         [FW_DEVLOG_LEVEL_INFO]          = "INFO",
6808         [FW_DEVLOG_LEVEL_DEBUG]         = "DEBUG"
6809 };
6810
6811 static const char * const devlog_facility_strings[] = {
6812         [FW_DEVLOG_FACILITY_CORE]       = "CORE",
6813         [FW_DEVLOG_FACILITY_CF]         = "CF",
6814         [FW_DEVLOG_FACILITY_SCHED]      = "SCHED",
6815         [FW_DEVLOG_FACILITY_TIMER]      = "TIMER",
6816         [FW_DEVLOG_FACILITY_RES]        = "RES",
6817         [FW_DEVLOG_FACILITY_HW]         = "HW",
6818         [FW_DEVLOG_FACILITY_FLR]        = "FLR",
6819         [FW_DEVLOG_FACILITY_DMAQ]       = "DMAQ",
6820         [FW_DEVLOG_FACILITY_PHY]        = "PHY",
6821         [FW_DEVLOG_FACILITY_MAC]        = "MAC",
6822         [FW_DEVLOG_FACILITY_PORT]       = "PORT",
6823         [FW_DEVLOG_FACILITY_VI]         = "VI",
6824         [FW_DEVLOG_FACILITY_FILTER]     = "FILTER",
6825         [FW_DEVLOG_FACILITY_ACL]        = "ACL",
6826         [FW_DEVLOG_FACILITY_TM]         = "TM",
6827         [FW_DEVLOG_FACILITY_QFC]        = "QFC",
6828         [FW_DEVLOG_FACILITY_DCB]        = "DCB",
6829         [FW_DEVLOG_FACILITY_ETH]        = "ETH",
6830         [FW_DEVLOG_FACILITY_OFLD]       = "OFLD",
6831         [FW_DEVLOG_FACILITY_RI]         = "RI",
6832         [FW_DEVLOG_FACILITY_ISCSI]      = "ISCSI",
6833         [FW_DEVLOG_FACILITY_FCOE]       = "FCOE",
6834         [FW_DEVLOG_FACILITY_FOISCSI]    = "FOISCSI",
6835         [FW_DEVLOG_FACILITY_FOFCOE]     = "FOFCOE",
6836         [FW_DEVLOG_FACILITY_CHNET]      = "CHNET",
6837 };
6838
6839 static int
6840 sysctl_devlog(SYSCTL_HANDLER_ARGS)
6841 {
6842         struct adapter *sc = arg1;
6843         struct devlog_params *dparams = &sc->params.devlog;
6844         struct fw_devlog_e *buf, *e;
6845         int i, j, rc, nentries, first = 0;
6846         struct sbuf *sb;
6847         uint64_t ftstamp = UINT64_MAX;
6848
6849         if (dparams->addr == 0)
6850                 return (ENXIO);
6851
6852         buf = malloc(dparams->size, M_CXGBE, M_NOWAIT);
6853         if (buf == NULL)
6854                 return (ENOMEM);
6855
6856         rc = read_via_memwin(sc, 1, dparams->addr, (void *)buf, dparams->size);
6857         if (rc != 0)
6858                 goto done;
6859
6860         nentries = dparams->size / sizeof(struct fw_devlog_e);
6861         for (i = 0; i < nentries; i++) {
6862                 e = &buf[i];
6863
6864                 if (e->timestamp == 0)
6865                         break;  /* end */
6866
6867                 e->timestamp = be64toh(e->timestamp);
6868                 e->seqno = be32toh(e->seqno);
6869                 for (j = 0; j < 8; j++)
6870                         e->params[j] = be32toh(e->params[j]);
6871
6872                 if (e->timestamp < ftstamp) {
6873                         ftstamp = e->timestamp;
6874                         first = i;
6875                 }
6876         }
6877
6878         if (buf[first].timestamp == 0)
6879                 goto done;      /* nothing in the log */
6880
6881         rc = sysctl_wire_old_buffer(req, 0);
6882         if (rc != 0)
6883                 goto done;
6884
6885         sb = sbuf_new_for_sysctl(NULL, NULL, 4096, req);
6886         if (sb == NULL) {
6887                 rc = ENOMEM;
6888                 goto done;
6889         }
6890         sbuf_printf(sb, "%10s  %15s  %8s  %8s  %s\n",
6891             "Seq#", "Tstamp", "Level", "Facility", "Message");
6892
6893         i = first;
6894         do {
6895                 e = &buf[i];
6896                 if (e->timestamp == 0)
6897                         break;  /* end */
6898
6899                 sbuf_printf(sb, "%10d  %15ju  %8s  %8s  ",
6900                     e->seqno, e->timestamp,
6901                     (e->level < nitems(devlog_level_strings) ?
6902                         devlog_level_strings[e->level] : "UNKNOWN"),
6903                     (e->facility < nitems(devlog_facility_strings) ?
6904                         devlog_facility_strings[e->facility] : "UNKNOWN"));
6905                 sbuf_printf(sb, e->fmt, e->params[0], e->params[1],
6906                     e->params[2], e->params[3], e->params[4],
6907                     e->params[5], e->params[6], e->params[7]);
6908
6909                 if (++i == nentries)
6910                         i = 0;
6911         } while (i != first);
6912
6913         rc = sbuf_finish(sb);
6914         sbuf_delete(sb);
6915 done:
6916         free(buf, M_CXGBE);
6917         return (rc);
6918 }
6919
6920 static int
6921 sysctl_fcoe_stats(SYSCTL_HANDLER_ARGS)
6922 {
6923         struct adapter *sc = arg1;
6924         struct sbuf *sb;
6925         int rc;
6926         struct tp_fcoe_stats stats[MAX_NCHAN];
6927         int i, nchan = sc->chip_params->nchan;
6928
6929         rc = sysctl_wire_old_buffer(req, 0);
6930         if (rc != 0)
6931                 return (rc);
6932
6933         sb = sbuf_new_for_sysctl(NULL, NULL, 256, req);
6934         if (sb == NULL)
6935                 return (ENOMEM);
6936
6937         for (i = 0; i < nchan; i++)
6938                 t4_get_fcoe_stats(sc, i, &stats[i], 1);
6939
6940         if (nchan > 2) {
6941                 sbuf_printf(sb, "                   channel 0        channel 1"
6942                     "        channel 2        channel 3");
6943                 sbuf_printf(sb, "\noctetsDDP:  %16ju %16ju %16ju %16ju",
6944                     stats[0].octets_ddp, stats[1].octets_ddp,
6945                     stats[2].octets_ddp, stats[3].octets_ddp);
6946                 sbuf_printf(sb, "\nframesDDP:  %16u %16u %16u %16u",
6947                     stats[0].frames_ddp, stats[1].frames_ddp,
6948                     stats[2].frames_ddp, stats[3].frames_ddp);
6949                 sbuf_printf(sb, "\nframesDrop: %16u %16u %16u %16u",
6950                     stats[0].frames_drop, stats[1].frames_drop,
6951                     stats[2].frames_drop, stats[3].frames_drop);
6952         } else {
6953                 sbuf_printf(sb, "                   channel 0        channel 1");
6954                 sbuf_printf(sb, "\noctetsDDP:  %16ju %16ju",
6955                     stats[0].octets_ddp, stats[1].octets_ddp);
6956                 sbuf_printf(sb, "\nframesDDP:  %16u %16u",
6957                     stats[0].frames_ddp, stats[1].frames_ddp);
6958                 sbuf_printf(sb, "\nframesDrop: %16u %16u",
6959                     stats[0].frames_drop, stats[1].frames_drop);
6960         }
6961
6962         rc = sbuf_finish(sb);
6963         sbuf_delete(sb);
6964
6965         return (rc);
6966 }
6967
6968 static int
6969 sysctl_hw_sched(SYSCTL_HANDLER_ARGS)
6970 {
6971         struct adapter *sc = arg1;
6972         struct sbuf *sb;
6973         int rc, i;
6974         unsigned int map, kbps, ipg, mode;
6975         unsigned int pace_tab[NTX_SCHED];
6976
6977         rc = sysctl_wire_old_buffer(req, 0);
6978         if (rc != 0)
6979                 return (rc);
6980
6981         sb = sbuf_new_for_sysctl(NULL, NULL, 256, req);
6982         if (sb == NULL)
6983                 return (ENOMEM);
6984
6985         map = t4_read_reg(sc, A_TP_TX_MOD_QUEUE_REQ_MAP);
6986         mode = G_TIMERMODE(t4_read_reg(sc, A_TP_MOD_CONFIG));
6987         t4_read_pace_tbl(sc, pace_tab);
6988
6989         sbuf_printf(sb, "Scheduler  Mode   Channel  Rate (Kbps)   "
6990             "Class IPG (0.1 ns)   Flow IPG (us)");
6991
6992         for (i = 0; i < NTX_SCHED; ++i, map >>= 2) {
6993                 t4_get_tx_sched(sc, i, &kbps, &ipg, 1);
6994                 sbuf_printf(sb, "\n    %u      %-5s     %u     ", i,
6995                     (mode & (1 << i)) ? "flow" : "class", map & 3);
6996                 if (kbps)
6997                         sbuf_printf(sb, "%9u     ", kbps);
6998                 else
6999                         sbuf_printf(sb, " disabled     ");
7000
7001                 if (ipg)
7002                         sbuf_printf(sb, "%13u        ", ipg);
7003                 else
7004                         sbuf_printf(sb, "     disabled        ");
7005
7006                 if (pace_tab[i])
7007                         sbuf_printf(sb, "%10u", pace_tab[i]);
7008                 else
7009                         sbuf_printf(sb, "  disabled");
7010         }
7011
7012         rc = sbuf_finish(sb);
7013         sbuf_delete(sb);
7014
7015         return (rc);
7016 }
7017
7018 static int
7019 sysctl_lb_stats(SYSCTL_HANDLER_ARGS)
7020 {
7021         struct adapter *sc = arg1;
7022         struct sbuf *sb;
7023         int rc, i, j;
7024         uint64_t *p0, *p1;
7025         struct lb_port_stats s[2];
7026         static const char *stat_name[] = {
7027                 "OctetsOK:", "FramesOK:", "BcastFrames:", "McastFrames:",
7028                 "UcastFrames:", "ErrorFrames:", "Frames64:", "Frames65To127:",
7029                 "Frames128To255:", "Frames256To511:", "Frames512To1023:",
7030                 "Frames1024To1518:", "Frames1519ToMax:", "FramesDropped:",
7031                 "BG0FramesDropped:", "BG1FramesDropped:", "BG2FramesDropped:",
7032                 "BG3FramesDropped:", "BG0FramesTrunc:", "BG1FramesTrunc:",
7033                 "BG2FramesTrunc:", "BG3FramesTrunc:"
7034         };
7035
7036         rc = sysctl_wire_old_buffer(req, 0);
7037         if (rc != 0)
7038                 return (rc);
7039
7040         sb = sbuf_new_for_sysctl(NULL, NULL, 4096, req);
7041         if (sb == NULL)
7042                 return (ENOMEM);
7043
7044         memset(s, 0, sizeof(s));
7045
7046         for (i = 0; i < sc->chip_params->nchan; i += 2) {
7047                 t4_get_lb_stats(sc, i, &s[0]);
7048                 t4_get_lb_stats(sc, i + 1, &s[1]);
7049
7050                 p0 = &s[0].octets;
7051                 p1 = &s[1].octets;
7052                 sbuf_printf(sb, "%s                       Loopback %u"
7053                     "           Loopback %u", i == 0 ? "" : "\n", i, i + 1);
7054
7055                 for (j = 0; j < nitems(stat_name); j++)
7056                         sbuf_printf(sb, "\n%-17s %20ju %20ju", stat_name[j],
7057                                    *p0++, *p1++);
7058         }
7059
7060         rc = sbuf_finish(sb);
7061         sbuf_delete(sb);
7062
7063         return (rc);
7064 }
7065
7066 static int
7067 sysctl_linkdnrc(SYSCTL_HANDLER_ARGS)
7068 {
7069         int rc = 0;
7070         struct port_info *pi = arg1;
7071         struct link_config *lc = &pi->link_cfg;
7072         struct sbuf *sb;
7073
7074         rc = sysctl_wire_old_buffer(req, 0);
7075         if (rc != 0)
7076                 return(rc);
7077         sb = sbuf_new_for_sysctl(NULL, NULL, 64, req);
7078         if (sb == NULL)
7079                 return (ENOMEM);
7080
7081         if (lc->link_ok || lc->link_down_rc == 255)
7082                 sbuf_printf(sb, "n/a");
7083         else
7084                 sbuf_printf(sb, "%s", t4_link_down_rc_str(lc->link_down_rc));
7085
7086         rc = sbuf_finish(sb);
7087         sbuf_delete(sb);
7088
7089         return (rc);
7090 }
7091
7092 struct mem_desc {
7093         unsigned int base;
7094         unsigned int limit;
7095         unsigned int idx;
7096 };
7097
7098 static int
7099 mem_desc_cmp(const void *a, const void *b)
7100 {
7101         return ((const struct mem_desc *)a)->base -
7102                ((const struct mem_desc *)b)->base;
7103 }
7104
7105 static void
7106 mem_region_show(struct sbuf *sb, const char *name, unsigned int from,
7107     unsigned int to)
7108 {
7109         unsigned int size;
7110
7111         if (from == to)
7112                 return;
7113
7114         size = to - from + 1;
7115         if (size == 0)
7116                 return;
7117
7118         /* XXX: need humanize_number(3) in libkern for a more readable 'size' */
7119         sbuf_printf(sb, "%-15s %#x-%#x [%u]\n", name, from, to, size);
7120 }
7121
7122 static int
7123 sysctl_meminfo(SYSCTL_HANDLER_ARGS)
7124 {
7125         struct adapter *sc = arg1;
7126         struct sbuf *sb;
7127         int rc, i, n;
7128         uint32_t lo, hi, used, alloc;
7129         static const char *memory[] = {"EDC0:", "EDC1:", "MC:", "MC0:", "MC1:"};
7130         static const char *region[] = {
7131                 "DBQ contexts:", "IMSG contexts:", "FLM cache:", "TCBs:",
7132                 "Pstructs:", "Timers:", "Rx FL:", "Tx FL:", "Pstruct FL:",
7133                 "Tx payload:", "Rx payload:", "LE hash:", "iSCSI region:",
7134                 "TDDP region:", "TPT region:", "STAG region:", "RQ region:",
7135                 "RQUDP region:", "PBL region:", "TXPBL region:",
7136                 "DBVFIFO region:", "ULPRX state:", "ULPTX state:",
7137                 "On-chip queues:", "TLS keys:",
7138         };
7139         struct mem_desc avail[4];
7140         struct mem_desc mem[nitems(region) + 3];        /* up to 3 holes */
7141         struct mem_desc *md = mem;
7142
7143         rc = sysctl_wire_old_buffer(req, 0);
7144         if (rc != 0)
7145                 return (rc);
7146
7147         sb = sbuf_new_for_sysctl(NULL, NULL, 4096, req);
7148         if (sb == NULL)
7149                 return (ENOMEM);
7150
7151         for (i = 0; i < nitems(mem); i++) {
7152                 mem[i].limit = 0;
7153                 mem[i].idx = i;
7154         }
7155
7156         /* Find and sort the populated memory ranges */
7157         i = 0;
7158         lo = t4_read_reg(sc, A_MA_TARGET_MEM_ENABLE);
7159         if (lo & F_EDRAM0_ENABLE) {
7160                 hi = t4_read_reg(sc, A_MA_EDRAM0_BAR);
7161                 avail[i].base = G_EDRAM0_BASE(hi) << 20;
7162                 avail[i].limit = avail[i].base + (G_EDRAM0_SIZE(hi) << 20);
7163                 avail[i].idx = 0;
7164                 i++;
7165         }
7166         if (lo & F_EDRAM1_ENABLE) {
7167                 hi = t4_read_reg(sc, A_MA_EDRAM1_BAR);
7168                 avail[i].base = G_EDRAM1_BASE(hi) << 20;
7169                 avail[i].limit = avail[i].base + (G_EDRAM1_SIZE(hi) << 20);
7170                 avail[i].idx = 1;
7171                 i++;
7172         }
7173         if (lo & F_EXT_MEM_ENABLE) {
7174                 hi = t4_read_reg(sc, A_MA_EXT_MEMORY_BAR);
7175                 avail[i].base = G_EXT_MEM_BASE(hi) << 20;
7176                 avail[i].limit = avail[i].base +
7177                     (G_EXT_MEM_SIZE(hi) << 20);
7178                 avail[i].idx = is_t5(sc) ? 3 : 2;       /* Call it MC0 for T5 */
7179                 i++;
7180         }
7181         if (is_t5(sc) && lo & F_EXT_MEM1_ENABLE) {
7182                 hi = t4_read_reg(sc, A_MA_EXT_MEMORY1_BAR);
7183                 avail[i].base = G_EXT_MEM1_BASE(hi) << 20;
7184                 avail[i].limit = avail[i].base +
7185                     (G_EXT_MEM1_SIZE(hi) << 20);
7186                 avail[i].idx = 4;
7187                 i++;
7188         }
7189         if (!i)                                    /* no memory available */
7190                 return 0;
7191         qsort(avail, i, sizeof(struct mem_desc), mem_desc_cmp);
7192
7193         (md++)->base = t4_read_reg(sc, A_SGE_DBQ_CTXT_BADDR);
7194         (md++)->base = t4_read_reg(sc, A_SGE_IMSG_CTXT_BADDR);
7195         (md++)->base = t4_read_reg(sc, A_SGE_FLM_CACHE_BADDR);
7196         (md++)->base = t4_read_reg(sc, A_TP_CMM_TCB_BASE);
7197         (md++)->base = t4_read_reg(sc, A_TP_CMM_MM_BASE);
7198         (md++)->base = t4_read_reg(sc, A_TP_CMM_TIMER_BASE);
7199         (md++)->base = t4_read_reg(sc, A_TP_CMM_MM_RX_FLST_BASE);
7200         (md++)->base = t4_read_reg(sc, A_TP_CMM_MM_TX_FLST_BASE);
7201         (md++)->base = t4_read_reg(sc, A_TP_CMM_MM_PS_FLST_BASE);
7202
7203         /* the next few have explicit upper bounds */
7204         md->base = t4_read_reg(sc, A_TP_PMM_TX_BASE);
7205         md->limit = md->base - 1 +
7206                     t4_read_reg(sc, A_TP_PMM_TX_PAGE_SIZE) *
7207                     G_PMTXMAXPAGE(t4_read_reg(sc, A_TP_PMM_TX_MAX_PAGE));
7208         md++;
7209
7210         md->base = t4_read_reg(sc, A_TP_PMM_RX_BASE);
7211         md->limit = md->base - 1 +
7212                     t4_read_reg(sc, A_TP_PMM_RX_PAGE_SIZE) *
7213                     G_PMRXMAXPAGE(t4_read_reg(sc, A_TP_PMM_RX_MAX_PAGE));
7214         md++;
7215
7216         if (t4_read_reg(sc, A_LE_DB_CONFIG) & F_HASHEN) {
7217                 if (chip_id(sc) <= CHELSIO_T5)
7218                         md->base = t4_read_reg(sc, A_LE_DB_HASH_TID_BASE);
7219                 else
7220                         md->base = t4_read_reg(sc, A_LE_DB_HASH_TBL_BASE_ADDR);
7221                 md->limit = 0;
7222         } else {
7223                 md->base = 0;
7224                 md->idx = nitems(region);  /* hide it */
7225         }
7226         md++;
7227
7228 #define ulp_region(reg) \
7229         md->base = t4_read_reg(sc, A_ULP_ ## reg ## _LLIMIT);\
7230         (md++)->limit = t4_read_reg(sc, A_ULP_ ## reg ## _ULIMIT)
7231
7232         ulp_region(RX_ISCSI);
7233         ulp_region(RX_TDDP);
7234         ulp_region(TX_TPT);
7235         ulp_region(RX_STAG);
7236         ulp_region(RX_RQ);
7237         ulp_region(RX_RQUDP);
7238         ulp_region(RX_PBL);
7239         ulp_region(TX_PBL);
7240 #undef ulp_region
7241
7242         md->base = 0;
7243         md->idx = nitems(region);
7244         if (!is_t4(sc)) {
7245                 uint32_t size = 0;
7246                 uint32_t sge_ctrl = t4_read_reg(sc, A_SGE_CONTROL2);
7247                 uint32_t fifo_size = t4_read_reg(sc, A_SGE_DBVFIFO_SIZE);
7248
7249                 if (is_t5(sc)) {
7250                         if (sge_ctrl & F_VFIFO_ENABLE)
7251                                 size = G_DBVFIFO_SIZE(fifo_size);
7252                 } else
7253                         size = G_T6_DBVFIFO_SIZE(fifo_size);
7254
7255                 if (size) {
7256                         md->base = G_BASEADDR(t4_read_reg(sc,
7257                             A_SGE_DBVFIFO_BADDR));
7258                         md->limit = md->base + (size << 2) - 1;
7259                 }
7260         }
7261         md++;
7262
7263         md->base = t4_read_reg(sc, A_ULP_RX_CTX_BASE);
7264         md->limit = 0;
7265         md++;
7266         md->base = t4_read_reg(sc, A_ULP_TX_ERR_TABLE_BASE);
7267         md->limit = 0;
7268         md++;
7269
7270         md->base = sc->vres.ocq.start;
7271         if (sc->vres.ocq.size)
7272                 md->limit = md->base + sc->vres.ocq.size - 1;
7273         else
7274                 md->idx = nitems(region);  /* hide it */
7275         md++;
7276
7277         md->base = sc->vres.key.start;
7278         if (sc->vres.key.size)
7279                 md->limit = md->base + sc->vres.key.size - 1;
7280         else
7281                 md->idx = nitems(region);  /* hide it */
7282         md++;
7283
7284         /* add any address-space holes, there can be up to 3 */
7285         for (n = 0; n < i - 1; n++)
7286                 if (avail[n].limit < avail[n + 1].base)
7287                         (md++)->base = avail[n].limit;
7288         if (avail[n].limit)
7289                 (md++)->base = avail[n].limit;
7290
7291         n = md - mem;
7292         qsort(mem, n, sizeof(struct mem_desc), mem_desc_cmp);
7293
7294         for (lo = 0; lo < i; lo++)
7295                 mem_region_show(sb, memory[avail[lo].idx], avail[lo].base,
7296                                 avail[lo].limit - 1);
7297
7298         sbuf_printf(sb, "\n");
7299         for (i = 0; i < n; i++) {
7300                 if (mem[i].idx >= nitems(region))
7301                         continue;                        /* skip holes */
7302                 if (!mem[i].limit)
7303                         mem[i].limit = i < n - 1 ? mem[i + 1].base - 1 : ~0;
7304                 mem_region_show(sb, region[mem[i].idx], mem[i].base,
7305                                 mem[i].limit);
7306         }
7307
7308         sbuf_printf(sb, "\n");
7309         lo = t4_read_reg(sc, A_CIM_SDRAM_BASE_ADDR);
7310         hi = t4_read_reg(sc, A_CIM_SDRAM_ADDR_SIZE) + lo - 1;
7311         mem_region_show(sb, "uP RAM:", lo, hi);
7312
7313         lo = t4_read_reg(sc, A_CIM_EXTMEM2_BASE_ADDR);
7314         hi = t4_read_reg(sc, A_CIM_EXTMEM2_ADDR_SIZE) + lo - 1;
7315         mem_region_show(sb, "uP Extmem2:", lo, hi);
7316
7317         lo = t4_read_reg(sc, A_TP_PMM_RX_MAX_PAGE);
7318         sbuf_printf(sb, "\n%u Rx pages of size %uKiB for %u channels\n",
7319                    G_PMRXMAXPAGE(lo),
7320                    t4_read_reg(sc, A_TP_PMM_RX_PAGE_SIZE) >> 10,
7321                    (lo & F_PMRXNUMCHN) ? 2 : 1);
7322
7323         lo = t4_read_reg(sc, A_TP_PMM_TX_MAX_PAGE);
7324         hi = t4_read_reg(sc, A_TP_PMM_TX_PAGE_SIZE);
7325         sbuf_printf(sb, "%u Tx pages of size %u%ciB for %u channels\n",
7326                    G_PMTXMAXPAGE(lo),
7327                    hi >= (1 << 20) ? (hi >> 20) : (hi >> 10),
7328                    hi >= (1 << 20) ? 'M' : 'K', 1 << G_PMTXNUMCHN(lo));
7329         sbuf_printf(sb, "%u p-structs\n",
7330                    t4_read_reg(sc, A_TP_CMM_MM_MAX_PSTRUCT));
7331
7332         for (i = 0; i < 4; i++) {
7333                 if (chip_id(sc) > CHELSIO_T5)
7334                         lo = t4_read_reg(sc, A_MPS_RX_MAC_BG_PG_CNT0 + i * 4);
7335                 else
7336                         lo = t4_read_reg(sc, A_MPS_RX_PG_RSV0 + i * 4);
7337                 if (is_t5(sc)) {
7338                         used = G_T5_USED(lo);
7339                         alloc = G_T5_ALLOC(lo);
7340                 } else {
7341                         used = G_USED(lo);
7342                         alloc = G_ALLOC(lo);
7343                 }
7344                 /* For T6 these are MAC buffer groups */
7345                 sbuf_printf(sb, "\nPort %d using %u pages out of %u allocated",
7346                     i, used, alloc);
7347         }
7348         for (i = 0; i < sc->chip_params->nchan; i++) {
7349                 if (chip_id(sc) > CHELSIO_T5)
7350                         lo = t4_read_reg(sc, A_MPS_RX_LPBK_BG_PG_CNT0 + i * 4);
7351                 else
7352                         lo = t4_read_reg(sc, A_MPS_RX_PG_RSV4 + i * 4);
7353                 if (is_t5(sc)) {
7354                         used = G_T5_USED(lo);
7355                         alloc = G_T5_ALLOC(lo);
7356                 } else {
7357                         used = G_USED(lo);
7358                         alloc = G_ALLOC(lo);
7359                 }
7360                 /* For T6 these are MAC buffer groups */
7361                 sbuf_printf(sb,
7362                     "\nLoopback %d using %u pages out of %u allocated",
7363                     i, used, alloc);
7364         }
7365
7366         rc = sbuf_finish(sb);
7367         sbuf_delete(sb);
7368
7369         return (rc);
7370 }
7371
7372 static inline void
7373 tcamxy2valmask(uint64_t x, uint64_t y, uint8_t *addr, uint64_t *mask)
7374 {
7375         *mask = x | y;
7376         y = htobe64(y);
7377         memcpy(addr, (char *)&y + 2, ETHER_ADDR_LEN);
7378 }
7379
7380 static int
7381 sysctl_mps_tcam(SYSCTL_HANDLER_ARGS)
7382 {
7383         struct adapter *sc = arg1;
7384         struct sbuf *sb;
7385         int rc, i;
7386
7387         MPASS(chip_id(sc) <= CHELSIO_T5);
7388
7389         rc = sysctl_wire_old_buffer(req, 0);
7390         if (rc != 0)
7391                 return (rc);
7392
7393         sb = sbuf_new_for_sysctl(NULL, NULL, 4096, req);
7394         if (sb == NULL)
7395                 return (ENOMEM);
7396
7397         sbuf_printf(sb,
7398             "Idx  Ethernet address     Mask     Vld Ports PF"
7399             "  VF              Replication             P0 P1 P2 P3  ML");
7400         for (i = 0; i < sc->chip_params->mps_tcam_size; i++) {
7401                 uint64_t tcamx, tcamy, mask;
7402                 uint32_t cls_lo, cls_hi;
7403                 uint8_t addr[ETHER_ADDR_LEN];
7404
7405                 tcamy = t4_read_reg64(sc, MPS_CLS_TCAM_Y_L(i));
7406                 tcamx = t4_read_reg64(sc, MPS_CLS_TCAM_X_L(i));
7407                 if (tcamx & tcamy)
7408                         continue;
7409                 tcamxy2valmask(tcamx, tcamy, addr, &mask);
7410                 cls_lo = t4_read_reg(sc, MPS_CLS_SRAM_L(i));
7411                 cls_hi = t4_read_reg(sc, MPS_CLS_SRAM_H(i));
7412                 sbuf_printf(sb, "\n%3u %02x:%02x:%02x:%02x:%02x:%02x %012jx"
7413                            "  %c   %#x%4u%4d", i, addr[0], addr[1], addr[2],
7414                            addr[3], addr[4], addr[5], (uintmax_t)mask,
7415                            (cls_lo & F_SRAM_VLD) ? 'Y' : 'N',
7416                            G_PORTMAP(cls_hi), G_PF(cls_lo),
7417                            (cls_lo & F_VF_VALID) ? G_VF(cls_lo) : -1);
7418
7419                 if (cls_lo & F_REPLICATE) {
7420                         struct fw_ldst_cmd ldst_cmd;
7421
7422                         memset(&ldst_cmd, 0, sizeof(ldst_cmd));
7423                         ldst_cmd.op_to_addrspace =
7424                             htobe32(V_FW_CMD_OP(FW_LDST_CMD) |
7425                                 F_FW_CMD_REQUEST | F_FW_CMD_READ |
7426                                 V_FW_LDST_CMD_ADDRSPACE(FW_LDST_ADDRSPC_MPS));
7427                         ldst_cmd.cycles_to_len16 = htobe32(FW_LEN16(ldst_cmd));
7428                         ldst_cmd.u.mps.rplc.fid_idx =
7429                             htobe16(V_FW_LDST_CMD_FID(FW_LDST_MPS_RPLC) |
7430                                 V_FW_LDST_CMD_IDX(i));
7431
7432                         rc = begin_synchronized_op(sc, NULL, SLEEP_OK | INTR_OK,
7433                             "t4mps");
7434                         if (rc)
7435                                 break;
7436                         rc = -t4_wr_mbox(sc, sc->mbox, &ldst_cmd,
7437                             sizeof(ldst_cmd), &ldst_cmd);
7438                         end_synchronized_op(sc, 0);
7439
7440                         if (rc != 0) {
7441                                 sbuf_printf(sb, "%36d", rc);
7442                                 rc = 0;
7443                         } else {
7444                                 sbuf_printf(sb, " %08x %08x %08x %08x",
7445                                     be32toh(ldst_cmd.u.mps.rplc.rplc127_96),
7446                                     be32toh(ldst_cmd.u.mps.rplc.rplc95_64),
7447                                     be32toh(ldst_cmd.u.mps.rplc.rplc63_32),
7448                                     be32toh(ldst_cmd.u.mps.rplc.rplc31_0));
7449                         }
7450                 } else
7451                         sbuf_printf(sb, "%36s", "");
7452
7453                 sbuf_printf(sb, "%4u%3u%3u%3u %#3x", G_SRAM_PRIO0(cls_lo),
7454                     G_SRAM_PRIO1(cls_lo), G_SRAM_PRIO2(cls_lo),
7455                     G_SRAM_PRIO3(cls_lo), (cls_lo >> S_MULTILISTEN0) & 0xf);
7456         }
7457
7458         if (rc)
7459                 (void) sbuf_finish(sb);
7460         else
7461                 rc = sbuf_finish(sb);
7462         sbuf_delete(sb);
7463
7464         return (rc);
7465 }
7466
7467 static int
7468 sysctl_mps_tcam_t6(SYSCTL_HANDLER_ARGS)
7469 {
7470         struct adapter *sc = arg1;
7471         struct sbuf *sb;
7472         int rc, i;
7473
7474         MPASS(chip_id(sc) > CHELSIO_T5);
7475
7476         rc = sysctl_wire_old_buffer(req, 0);
7477         if (rc != 0)
7478                 return (rc);
7479
7480         sb = sbuf_new_for_sysctl(NULL, NULL, 4096, req);
7481         if (sb == NULL)
7482                 return (ENOMEM);
7483
7484         sbuf_printf(sb, "Idx  Ethernet address     Mask       VNI   Mask"
7485             "   IVLAN Vld DIP_Hit   Lookup  Port Vld Ports PF  VF"
7486             "                           Replication"
7487             "                                    P0 P1 P2 P3  ML\n");
7488
7489         for (i = 0; i < sc->chip_params->mps_tcam_size; i++) {
7490                 uint8_t dip_hit, vlan_vld, lookup_type, port_num;
7491                 uint16_t ivlan;
7492                 uint64_t tcamx, tcamy, val, mask;
7493                 uint32_t cls_lo, cls_hi, ctl, data2, vnix, vniy;
7494                 uint8_t addr[ETHER_ADDR_LEN];
7495
7496                 ctl = V_CTLREQID(1) | V_CTLCMDTYPE(0) | V_CTLXYBITSEL(0);
7497                 if (i < 256)
7498                         ctl |= V_CTLTCAMINDEX(i) | V_CTLTCAMSEL(0);
7499                 else
7500                         ctl |= V_CTLTCAMINDEX(i - 256) | V_CTLTCAMSEL(1);
7501                 t4_write_reg(sc, A_MPS_CLS_TCAM_DATA2_CTL, ctl);
7502                 val = t4_read_reg(sc, A_MPS_CLS_TCAM_RDATA1_REQ_ID1);
7503                 tcamy = G_DMACH(val) << 32;
7504                 tcamy |= t4_read_reg(sc, A_MPS_CLS_TCAM_RDATA0_REQ_ID1);
7505                 data2 = t4_read_reg(sc, A_MPS_CLS_TCAM_RDATA2_REQ_ID1);
7506                 lookup_type = G_DATALKPTYPE(data2);
7507                 port_num = G_DATAPORTNUM(data2);
7508                 if (lookup_type && lookup_type != M_DATALKPTYPE) {
7509                         /* Inner header VNI */
7510                         vniy = ((data2 & F_DATAVIDH2) << 23) |
7511                                        (G_DATAVIDH1(data2) << 16) | G_VIDL(val);
7512                         dip_hit = data2 & F_DATADIPHIT;
7513                         vlan_vld = 0;
7514                 } else {
7515                         vniy = 0;
7516                         dip_hit = 0;
7517                         vlan_vld = data2 & F_DATAVIDH2;
7518                         ivlan = G_VIDL(val);
7519                 }
7520
7521                 ctl |= V_CTLXYBITSEL(1);
7522                 t4_write_reg(sc, A_MPS_CLS_TCAM_DATA2_CTL, ctl);
7523                 val = t4_read_reg(sc, A_MPS_CLS_TCAM_RDATA1_REQ_ID1);
7524                 tcamx = G_DMACH(val) << 32;
7525                 tcamx |= t4_read_reg(sc, A_MPS_CLS_TCAM_RDATA0_REQ_ID1);
7526                 data2 = t4_read_reg(sc, A_MPS_CLS_TCAM_RDATA2_REQ_ID1);
7527                 if (lookup_type && lookup_type != M_DATALKPTYPE) {
7528                         /* Inner header VNI mask */
7529                         vnix = ((data2 & F_DATAVIDH2) << 23) |
7530                                (G_DATAVIDH1(data2) << 16) | G_VIDL(val);
7531                 } else
7532                         vnix = 0;
7533
7534                 if (tcamx & tcamy)
7535                         continue;
7536                 tcamxy2valmask(tcamx, tcamy, addr, &mask);
7537
7538                 cls_lo = t4_read_reg(sc, MPS_CLS_SRAM_L(i));
7539                 cls_hi = t4_read_reg(sc, MPS_CLS_SRAM_H(i));
7540
7541                 if (lookup_type && lookup_type != M_DATALKPTYPE) {
7542                         sbuf_printf(sb, "\n%3u %02x:%02x:%02x:%02x:%02x:%02x "
7543                             "%012jx %06x %06x    -    -   %3c"
7544                             "      'I'  %4x   %3c   %#x%4u%4d", i, addr[0],
7545                             addr[1], addr[2], addr[3], addr[4], addr[5],
7546                             (uintmax_t)mask, vniy, vnix, dip_hit ? 'Y' : 'N',
7547                             port_num, cls_lo & F_T6_SRAM_VLD ? 'Y' : 'N',
7548                             G_PORTMAP(cls_hi), G_T6_PF(cls_lo),
7549                             cls_lo & F_T6_VF_VALID ? G_T6_VF(cls_lo) : -1);
7550                 } else {
7551                         sbuf_printf(sb, "\n%3u %02x:%02x:%02x:%02x:%02x:%02x "
7552                             "%012jx    -       -   ", i, addr[0], addr[1],
7553                             addr[2], addr[3], addr[4], addr[5],
7554                             (uintmax_t)mask);
7555
7556                         if (vlan_vld)
7557                                 sbuf_printf(sb, "%4u   Y     ", ivlan);
7558                         else
7559                                 sbuf_printf(sb, "  -    N     ");
7560
7561                         sbuf_printf(sb, "-      %3c  %4x   %3c   %#x%4u%4d",
7562                             lookup_type ? 'I' : 'O', port_num,
7563                             cls_lo & F_T6_SRAM_VLD ? 'Y' : 'N',
7564                             G_PORTMAP(cls_hi), G_T6_PF(cls_lo),
7565                             cls_lo & F_T6_VF_VALID ? G_T6_VF(cls_lo) : -1);
7566                 }
7567
7568
7569                 if (cls_lo & F_T6_REPLICATE) {
7570                         struct fw_ldst_cmd ldst_cmd;
7571
7572                         memset(&ldst_cmd, 0, sizeof(ldst_cmd));
7573                         ldst_cmd.op_to_addrspace =
7574                             htobe32(V_FW_CMD_OP(FW_LDST_CMD) |
7575                                 F_FW_CMD_REQUEST | F_FW_CMD_READ |
7576                                 V_FW_LDST_CMD_ADDRSPACE(FW_LDST_ADDRSPC_MPS));
7577                         ldst_cmd.cycles_to_len16 = htobe32(FW_LEN16(ldst_cmd));
7578                         ldst_cmd.u.mps.rplc.fid_idx =
7579                             htobe16(V_FW_LDST_CMD_FID(FW_LDST_MPS_RPLC) |
7580                                 V_FW_LDST_CMD_IDX(i));
7581
7582                         rc = begin_synchronized_op(sc, NULL, SLEEP_OK | INTR_OK,
7583                             "t6mps");
7584                         if (rc)
7585                                 break;
7586                         rc = -t4_wr_mbox(sc, sc->mbox, &ldst_cmd,
7587                             sizeof(ldst_cmd), &ldst_cmd);
7588                         end_synchronized_op(sc, 0);
7589
7590                         if (rc != 0) {
7591                                 sbuf_printf(sb, "%72d", rc);
7592                                 rc = 0;
7593                         } else {
7594                                 sbuf_printf(sb, " %08x %08x %08x %08x"
7595                                     " %08x %08x %08x %08x",
7596                                     be32toh(ldst_cmd.u.mps.rplc.rplc255_224),
7597                                     be32toh(ldst_cmd.u.mps.rplc.rplc223_192),
7598                                     be32toh(ldst_cmd.u.mps.rplc.rplc191_160),
7599                                     be32toh(ldst_cmd.u.mps.rplc.rplc159_128),
7600                                     be32toh(ldst_cmd.u.mps.rplc.rplc127_96),
7601                                     be32toh(ldst_cmd.u.mps.rplc.rplc95_64),
7602                                     be32toh(ldst_cmd.u.mps.rplc.rplc63_32),
7603                                     be32toh(ldst_cmd.u.mps.rplc.rplc31_0));
7604                         }
7605                 } else
7606                         sbuf_printf(sb, "%72s", "");
7607
7608                 sbuf_printf(sb, "%4u%3u%3u%3u %#x",
7609                     G_T6_SRAM_PRIO0(cls_lo), G_T6_SRAM_PRIO1(cls_lo),
7610                     G_T6_SRAM_PRIO2(cls_lo), G_T6_SRAM_PRIO3(cls_lo),
7611                     (cls_lo >> S_T6_MULTILISTEN0) & 0xf);
7612         }
7613
7614         if (rc)
7615                 (void) sbuf_finish(sb);
7616         else
7617                 rc = sbuf_finish(sb);
7618         sbuf_delete(sb);
7619
7620         return (rc);
7621 }
7622
7623 static int
7624 sysctl_path_mtus(SYSCTL_HANDLER_ARGS)
7625 {
7626         struct adapter *sc = arg1;
7627         struct sbuf *sb;
7628         int rc;
7629         uint16_t mtus[NMTUS];
7630
7631         rc = sysctl_wire_old_buffer(req, 0);
7632         if (rc != 0)
7633                 return (rc);
7634
7635         sb = sbuf_new_for_sysctl(NULL, NULL, 256, req);
7636         if (sb == NULL)
7637                 return (ENOMEM);
7638
7639         t4_read_mtu_tbl(sc, mtus, NULL);
7640
7641         sbuf_printf(sb, "%u %u %u %u %u %u %u %u %u %u %u %u %u %u %u %u",
7642             mtus[0], mtus[1], mtus[2], mtus[3], mtus[4], mtus[5], mtus[6],
7643             mtus[7], mtus[8], mtus[9], mtus[10], mtus[11], mtus[12], mtus[13],
7644             mtus[14], mtus[15]);
7645
7646         rc = sbuf_finish(sb);
7647         sbuf_delete(sb);
7648
7649         return (rc);
7650 }
7651
7652 static int
7653 sysctl_pm_stats(SYSCTL_HANDLER_ARGS)
7654 {
7655         struct adapter *sc = arg1;
7656         struct sbuf *sb;
7657         int rc, i;
7658         uint32_t tx_cnt[MAX_PM_NSTATS], rx_cnt[MAX_PM_NSTATS];
7659         uint64_t tx_cyc[MAX_PM_NSTATS], rx_cyc[MAX_PM_NSTATS];
7660         static const char *tx_stats[MAX_PM_NSTATS] = {
7661                 "Read:", "Write bypass:", "Write mem:", "Bypass + mem:",
7662                 "Tx FIFO wait", NULL, "Tx latency"
7663         };
7664         static const char *rx_stats[MAX_PM_NSTATS] = {
7665                 "Read:", "Write bypass:", "Write mem:", "Flush:",
7666                 "Rx FIFO wait", NULL, "Rx latency"
7667         };
7668
7669         rc = sysctl_wire_old_buffer(req, 0);
7670         if (rc != 0)
7671                 return (rc);
7672
7673         sb = sbuf_new_for_sysctl(NULL, NULL, 256, req);
7674         if (sb == NULL)
7675                 return (ENOMEM);
7676
7677         t4_pmtx_get_stats(sc, tx_cnt, tx_cyc);
7678         t4_pmrx_get_stats(sc, rx_cnt, rx_cyc);
7679
7680         sbuf_printf(sb, "                Tx pcmds             Tx bytes");
7681         for (i = 0; i < 4; i++) {
7682                 sbuf_printf(sb, "\n%-13s %10u %20ju", tx_stats[i], tx_cnt[i],
7683                     tx_cyc[i]);
7684         }
7685
7686         sbuf_printf(sb, "\n                Rx pcmds             Rx bytes");
7687         for (i = 0; i < 4; i++) {
7688                 sbuf_printf(sb, "\n%-13s %10u %20ju", rx_stats[i], rx_cnt[i],
7689                     rx_cyc[i]);
7690         }
7691
7692         if (chip_id(sc) > CHELSIO_T5) {
7693                 sbuf_printf(sb,
7694                     "\n              Total wait      Total occupancy");
7695                 sbuf_printf(sb, "\n%-13s %10u %20ju", tx_stats[i], tx_cnt[i],
7696                     tx_cyc[i]);
7697                 sbuf_printf(sb, "\n%-13s %10u %20ju", rx_stats[i], rx_cnt[i],
7698                     rx_cyc[i]);
7699
7700                 i += 2;
7701                 MPASS(i < nitems(tx_stats));
7702
7703                 sbuf_printf(sb,
7704                     "\n                   Reads           Total wait");
7705                 sbuf_printf(sb, "\n%-13s %10u %20ju", tx_stats[i], tx_cnt[i],
7706                     tx_cyc[i]);
7707                 sbuf_printf(sb, "\n%-13s %10u %20ju", rx_stats[i], rx_cnt[i],
7708                     rx_cyc[i]);
7709         }
7710
7711         rc = sbuf_finish(sb);
7712         sbuf_delete(sb);
7713
7714         return (rc);
7715 }
7716
7717 static int
7718 sysctl_rdma_stats(SYSCTL_HANDLER_ARGS)
7719 {
7720         struct adapter *sc = arg1;
7721         struct sbuf *sb;
7722         int rc;
7723         struct tp_rdma_stats stats;
7724
7725         rc = sysctl_wire_old_buffer(req, 0);
7726         if (rc != 0)
7727                 return (rc);
7728
7729         sb = sbuf_new_for_sysctl(NULL, NULL, 256, req);
7730         if (sb == NULL)
7731                 return (ENOMEM);
7732
7733         mtx_lock(&sc->reg_lock);
7734         t4_tp_get_rdma_stats(sc, &stats, 0);
7735         mtx_unlock(&sc->reg_lock);
7736
7737         sbuf_printf(sb, "NoRQEModDefferals: %u\n", stats.rqe_dfr_mod);
7738         sbuf_printf(sb, "NoRQEPktDefferals: %u", stats.rqe_dfr_pkt);
7739
7740         rc = sbuf_finish(sb);
7741         sbuf_delete(sb);
7742
7743         return (rc);
7744 }
7745
7746 static int
7747 sysctl_tcp_stats(SYSCTL_HANDLER_ARGS)
7748 {
7749         struct adapter *sc = arg1;
7750         struct sbuf *sb;
7751         int rc;
7752         struct tp_tcp_stats v4, v6;
7753
7754         rc = sysctl_wire_old_buffer(req, 0);
7755         if (rc != 0)
7756                 return (rc);
7757
7758         sb = sbuf_new_for_sysctl(NULL, NULL, 256, req);
7759         if (sb == NULL)
7760                 return (ENOMEM);
7761
7762         mtx_lock(&sc->reg_lock);
7763         t4_tp_get_tcp_stats(sc, &v4, &v6, 0);
7764         mtx_unlock(&sc->reg_lock);
7765
7766         sbuf_printf(sb,
7767             "                                IP                 IPv6\n");
7768         sbuf_printf(sb, "OutRsts:      %20u %20u\n",
7769             v4.tcp_out_rsts, v6.tcp_out_rsts);
7770         sbuf_printf(sb, "InSegs:       %20ju %20ju\n",
7771             v4.tcp_in_segs, v6.tcp_in_segs);
7772         sbuf_printf(sb, "OutSegs:      %20ju %20ju\n",
7773             v4.tcp_out_segs, v6.tcp_out_segs);
7774         sbuf_printf(sb, "RetransSegs:  %20ju %20ju",
7775             v4.tcp_retrans_segs, v6.tcp_retrans_segs);
7776
7777         rc = sbuf_finish(sb);
7778         sbuf_delete(sb);
7779
7780         return (rc);
7781 }
7782
7783 static int
7784 sysctl_tids(SYSCTL_HANDLER_ARGS)
7785 {
7786         struct adapter *sc = arg1;
7787         struct sbuf *sb;
7788         int rc;
7789         struct tid_info *t = &sc->tids;
7790
7791         rc = sysctl_wire_old_buffer(req, 0);
7792         if (rc != 0)
7793                 return (rc);
7794
7795         sb = sbuf_new_for_sysctl(NULL, NULL, 256, req);
7796         if (sb == NULL)
7797                 return (ENOMEM);
7798
7799         if (t->natids) {
7800                 sbuf_printf(sb, "ATID range: 0-%u, in use: %u\n", t->natids - 1,
7801                     t->atids_in_use);
7802         }
7803
7804         if (t->ntids) {
7805                 sbuf_printf(sb, "TID range: ");
7806                 if (t4_read_reg(sc, A_LE_DB_CONFIG) & F_HASHEN) {
7807                         uint32_t b, hb;
7808
7809                         if (chip_id(sc) <= CHELSIO_T5) {
7810                                 b = t4_read_reg(sc, A_LE_DB_SERVER_INDEX) / 4;
7811                                 hb = t4_read_reg(sc, A_LE_DB_TID_HASHBASE) / 4;
7812                         } else {
7813                                 b = t4_read_reg(sc, A_LE_DB_SRVR_START_INDEX);
7814                                 hb = t4_read_reg(sc, A_T6_LE_DB_HASH_TID_BASE);
7815                         }
7816
7817                         if (b)
7818                                 sbuf_printf(sb, "0-%u, ", b - 1);
7819                         sbuf_printf(sb, "%u-%u", hb, t->ntids - 1);
7820                 } else
7821                         sbuf_printf(sb, "0-%u", t->ntids - 1);
7822                 sbuf_printf(sb, ", in use: %u\n",
7823                     atomic_load_acq_int(&t->tids_in_use));
7824         }
7825
7826         if (t->nstids) {
7827                 sbuf_printf(sb, "STID range: %u-%u, in use: %u\n", t->stid_base,
7828                     t->stid_base + t->nstids - 1, t->stids_in_use);
7829         }
7830
7831         if (t->nftids) {
7832                 sbuf_printf(sb, "FTID range: %u-%u\n", t->ftid_base,
7833                     t->ftid_base + t->nftids - 1);
7834         }
7835
7836         if (t->netids) {
7837                 sbuf_printf(sb, "ETID range: %u-%u\n", t->etid_base,
7838                     t->etid_base + t->netids - 1);
7839         }
7840
7841         sbuf_printf(sb, "HW TID usage: %u IP users, %u IPv6 users",
7842             t4_read_reg(sc, A_LE_DB_ACT_CNT_IPV4),
7843             t4_read_reg(sc, A_LE_DB_ACT_CNT_IPV6));
7844
7845         rc = sbuf_finish(sb);
7846         sbuf_delete(sb);
7847
7848         return (rc);
7849 }
7850
7851 static int
7852 sysctl_tp_err_stats(SYSCTL_HANDLER_ARGS)
7853 {
7854         struct adapter *sc = arg1;
7855         struct sbuf *sb;
7856         int rc;
7857         struct tp_err_stats stats;
7858
7859         rc = sysctl_wire_old_buffer(req, 0);
7860         if (rc != 0)
7861                 return (rc);
7862
7863         sb = sbuf_new_for_sysctl(NULL, NULL, 256, req);
7864         if (sb == NULL)
7865                 return (ENOMEM);
7866
7867         mtx_lock(&sc->reg_lock);
7868         t4_tp_get_err_stats(sc, &stats, 0);
7869         mtx_unlock(&sc->reg_lock);
7870
7871         if (sc->chip_params->nchan > 2) {
7872                 sbuf_printf(sb, "                 channel 0  channel 1"
7873                     "  channel 2  channel 3\n");
7874                 sbuf_printf(sb, "macInErrs:      %10u %10u %10u %10u\n",
7875                     stats.mac_in_errs[0], stats.mac_in_errs[1],
7876                     stats.mac_in_errs[2], stats.mac_in_errs[3]);
7877                 sbuf_printf(sb, "hdrInErrs:      %10u %10u %10u %10u\n",
7878                     stats.hdr_in_errs[0], stats.hdr_in_errs[1],
7879                     stats.hdr_in_errs[2], stats.hdr_in_errs[3]);
7880                 sbuf_printf(sb, "tcpInErrs:      %10u %10u %10u %10u\n",
7881                     stats.tcp_in_errs[0], stats.tcp_in_errs[1],
7882                     stats.tcp_in_errs[2], stats.tcp_in_errs[3]);
7883                 sbuf_printf(sb, "tcp6InErrs:     %10u %10u %10u %10u\n",
7884                     stats.tcp6_in_errs[0], stats.tcp6_in_errs[1],
7885                     stats.tcp6_in_errs[2], stats.tcp6_in_errs[3]);
7886                 sbuf_printf(sb, "tnlCongDrops:   %10u %10u %10u %10u\n",
7887                     stats.tnl_cong_drops[0], stats.tnl_cong_drops[1],
7888                     stats.tnl_cong_drops[2], stats.tnl_cong_drops[3]);
7889                 sbuf_printf(sb, "tnlTxDrops:     %10u %10u %10u %10u\n",
7890                     stats.tnl_tx_drops[0], stats.tnl_tx_drops[1],
7891                     stats.tnl_tx_drops[2], stats.tnl_tx_drops[3]);
7892                 sbuf_printf(sb, "ofldVlanDrops:  %10u %10u %10u %10u\n",
7893                     stats.ofld_vlan_drops[0], stats.ofld_vlan_drops[1],
7894                     stats.ofld_vlan_drops[2], stats.ofld_vlan_drops[3]);
7895                 sbuf_printf(sb, "ofldChanDrops:  %10u %10u %10u %10u\n\n",
7896                     stats.ofld_chan_drops[0], stats.ofld_chan_drops[1],
7897                     stats.ofld_chan_drops[2], stats.ofld_chan_drops[3]);
7898         } else {
7899                 sbuf_printf(sb, "                 channel 0  channel 1\n");
7900                 sbuf_printf(sb, "macInErrs:      %10u %10u\n",
7901                     stats.mac_in_errs[0], stats.mac_in_errs[1]);
7902                 sbuf_printf(sb, "hdrInErrs:      %10u %10u\n",
7903                     stats.hdr_in_errs[0], stats.hdr_in_errs[1]);
7904                 sbuf_printf(sb, "tcpInErrs:      %10u %10u\n",
7905                     stats.tcp_in_errs[0], stats.tcp_in_errs[1]);
7906                 sbuf_printf(sb, "tcp6InErrs:     %10u %10u\n",
7907                     stats.tcp6_in_errs[0], stats.tcp6_in_errs[1]);
7908                 sbuf_printf(sb, "tnlCongDrops:   %10u %10u\n",
7909                     stats.tnl_cong_drops[0], stats.tnl_cong_drops[1]);
7910                 sbuf_printf(sb, "tnlTxDrops:     %10u %10u\n",
7911                     stats.tnl_tx_drops[0], stats.tnl_tx_drops[1]);
7912                 sbuf_printf(sb, "ofldVlanDrops:  %10u %10u\n",
7913                     stats.ofld_vlan_drops[0], stats.ofld_vlan_drops[1]);
7914                 sbuf_printf(sb, "ofldChanDrops:  %10u %10u\n\n",
7915                     stats.ofld_chan_drops[0], stats.ofld_chan_drops[1]);
7916         }
7917
7918         sbuf_printf(sb, "ofldNoNeigh:    %u\nofldCongDefer:  %u",
7919             stats.ofld_no_neigh, stats.ofld_cong_defer);
7920
7921         rc = sbuf_finish(sb);
7922         sbuf_delete(sb);
7923
7924         return (rc);
7925 }
7926
7927 static int
7928 sysctl_tp_la_mask(SYSCTL_HANDLER_ARGS)
7929 {
7930         struct adapter *sc = arg1;
7931         struct tp_params *tpp = &sc->params.tp;
7932         u_int mask;
7933         int rc;
7934
7935         mask = tpp->la_mask >> 16;
7936         rc = sysctl_handle_int(oidp, &mask, 0, req);
7937         if (rc != 0 || req->newptr == NULL)
7938                 return (rc);
7939         if (mask > 0xffff)
7940                 return (EINVAL);
7941         tpp->la_mask = mask << 16;
7942         t4_set_reg_field(sc, A_TP_DBG_LA_CONFIG, 0xffff0000U, tpp->la_mask);
7943
7944         return (0);
7945 }
7946
7947 struct field_desc {
7948         const char *name;
7949         u_int start;
7950         u_int width;
7951 };
7952
7953 static void
7954 field_desc_show(struct sbuf *sb, uint64_t v, const struct field_desc *f)
7955 {
7956         char buf[32];
7957         int line_size = 0;
7958
7959         while (f->name) {
7960                 uint64_t mask = (1ULL << f->width) - 1;
7961                 int len = snprintf(buf, sizeof(buf), "%s: %ju", f->name,
7962                     ((uintmax_t)v >> f->start) & mask);
7963
7964                 if (line_size + len >= 79) {
7965                         line_size = 8;
7966                         sbuf_printf(sb, "\n        ");
7967                 }
7968                 sbuf_printf(sb, "%s ", buf);
7969                 line_size += len + 1;
7970                 f++;
7971         }
7972         sbuf_printf(sb, "\n");
7973 }
7974
7975 static const struct field_desc tp_la0[] = {
7976         { "RcfOpCodeOut", 60, 4 },
7977         { "State", 56, 4 },
7978         { "WcfState", 52, 4 },
7979         { "RcfOpcSrcOut", 50, 2 },
7980         { "CRxError", 49, 1 },
7981         { "ERxError", 48, 1 },
7982         { "SanityFailed", 47, 1 },
7983         { "SpuriousMsg", 46, 1 },
7984         { "FlushInputMsg", 45, 1 },
7985         { "FlushInputCpl", 44, 1 },
7986         { "RssUpBit", 43, 1 },
7987         { "RssFilterHit", 42, 1 },
7988         { "Tid", 32, 10 },
7989         { "InitTcb", 31, 1 },
7990         { "LineNumber", 24, 7 },
7991         { "Emsg", 23, 1 },
7992         { "EdataOut", 22, 1 },
7993         { "Cmsg", 21, 1 },
7994         { "CdataOut", 20, 1 },
7995         { "EreadPdu", 19, 1 },
7996         { "CreadPdu", 18, 1 },
7997         { "TunnelPkt", 17, 1 },
7998         { "RcfPeerFin", 16, 1 },
7999         { "RcfReasonOut", 12, 4 },
8000         { "TxCchannel", 10, 2 },
8001         { "RcfTxChannel", 8, 2 },
8002         { "RxEchannel", 6, 2 },
8003         { "RcfRxChannel", 5, 1 },
8004         { "RcfDataOutSrdy", 4, 1 },
8005         { "RxDvld", 3, 1 },
8006         { "RxOoDvld", 2, 1 },
8007         { "RxCongestion", 1, 1 },
8008         { "TxCongestion", 0, 1 },
8009         { NULL }
8010 };
8011
8012 static const struct field_desc tp_la1[] = {
8013         { "CplCmdIn", 56, 8 },
8014         { "CplCmdOut", 48, 8 },
8015         { "ESynOut", 47, 1 },
8016         { "EAckOut", 46, 1 },
8017         { "EFinOut", 45, 1 },
8018         { "ERstOut", 44, 1 },
8019         { "SynIn", 43, 1 },
8020         { "AckIn", 42, 1 },
8021         { "FinIn", 41, 1 },
8022         { "RstIn", 40, 1 },
8023         { "DataIn", 39, 1 },
8024         { "DataInVld", 38, 1 },
8025         { "PadIn", 37, 1 },
8026         { "RxBufEmpty", 36, 1 },
8027         { "RxDdp", 35, 1 },
8028         { "RxFbCongestion", 34, 1 },
8029         { "TxFbCongestion", 33, 1 },
8030         { "TxPktSumSrdy", 32, 1 },
8031         { "RcfUlpType", 28, 4 },
8032         { "Eread", 27, 1 },
8033         { "Ebypass", 26, 1 },
8034         { "Esave", 25, 1 },
8035         { "Static0", 24, 1 },
8036         { "Cread", 23, 1 },
8037         { "Cbypass", 22, 1 },
8038         { "Csave", 21, 1 },
8039         { "CPktOut", 20, 1 },
8040         { "RxPagePoolFull", 18, 2 },
8041         { "RxLpbkPkt", 17, 1 },
8042         { "TxLpbkPkt", 16, 1 },
8043         { "RxVfValid", 15, 1 },
8044         { "SynLearned", 14, 1 },
8045         { "SetDelEntry", 13, 1 },
8046         { "SetInvEntry", 12, 1 },
8047         { "CpcmdDvld", 11, 1 },
8048         { "CpcmdSave", 10, 1 },
8049         { "RxPstructsFull", 8, 2 },
8050         { "EpcmdDvld", 7, 1 },
8051         { "EpcmdFlush", 6, 1 },
8052         { "EpcmdTrimPrefix", 5, 1 },
8053         { "EpcmdTrimPostfix", 4, 1 },
8054         { "ERssIp4Pkt", 3, 1 },
8055         { "ERssIp6Pkt", 2, 1 },
8056         { "ERssTcpUdpPkt", 1, 1 },
8057         { "ERssFceFipPkt", 0, 1 },
8058         { NULL }
8059 };
8060
8061 static const struct field_desc tp_la2[] = {
8062         { "CplCmdIn", 56, 8 },
8063         { "MpsVfVld", 55, 1 },
8064         { "MpsPf", 52, 3 },
8065         { "MpsVf", 44, 8 },
8066         { "SynIn", 43, 1 },
8067         { "AckIn", 42, 1 },
8068         { "FinIn", 41, 1 },
8069         { "RstIn", 40, 1 },
8070         { "DataIn", 39, 1 },
8071         { "DataInVld", 38, 1 },
8072         { "PadIn", 37, 1 },
8073         { "RxBufEmpty", 36, 1 },
8074         { "RxDdp", 35, 1 },
8075         { "RxFbCongestion", 34, 1 },
8076         { "TxFbCongestion", 33, 1 },
8077         { "TxPktSumSrdy", 32, 1 },
8078         { "RcfUlpType", 28, 4 },
8079         { "Eread", 27, 1 },
8080         { "Ebypass", 26, 1 },
8081         { "Esave", 25, 1 },
8082         { "Static0", 24, 1 },
8083         { "Cread", 23, 1 },
8084         { "Cbypass", 22, 1 },
8085         { "Csave", 21, 1 },
8086         { "CPktOut", 20, 1 },
8087         { "RxPagePoolFull", 18, 2 },
8088         { "RxLpbkPkt", 17, 1 },
8089         { "TxLpbkPkt", 16, 1 },
8090         { "RxVfValid", 15, 1 },
8091         { "SynLearned", 14, 1 },
8092         { "SetDelEntry", 13, 1 },
8093         { "SetInvEntry", 12, 1 },
8094         { "CpcmdDvld", 11, 1 },
8095         { "CpcmdSave", 10, 1 },
8096         { "RxPstructsFull", 8, 2 },
8097         { "EpcmdDvld", 7, 1 },
8098         { "EpcmdFlush", 6, 1 },
8099         { "EpcmdTrimPrefix", 5, 1 },
8100         { "EpcmdTrimPostfix", 4, 1 },
8101         { "ERssIp4Pkt", 3, 1 },
8102         { "ERssIp6Pkt", 2, 1 },
8103         { "ERssTcpUdpPkt", 1, 1 },
8104         { "ERssFceFipPkt", 0, 1 },
8105         { NULL }
8106 };
8107
8108 static void
8109 tp_la_show(struct sbuf *sb, uint64_t *p, int idx)
8110 {
8111
8112         field_desc_show(sb, *p, tp_la0);
8113 }
8114
8115 static void
8116 tp_la_show2(struct sbuf *sb, uint64_t *p, int idx)
8117 {
8118
8119         if (idx)
8120                 sbuf_printf(sb, "\n");
8121         field_desc_show(sb, p[0], tp_la0);
8122         if (idx < (TPLA_SIZE / 2 - 1) || p[1] != ~0ULL)
8123                 field_desc_show(sb, p[1], tp_la0);
8124 }
8125
8126 static void
8127 tp_la_show3(struct sbuf *sb, uint64_t *p, int idx)
8128 {
8129
8130         if (idx)
8131                 sbuf_printf(sb, "\n");
8132         field_desc_show(sb, p[0], tp_la0);
8133         if (idx < (TPLA_SIZE / 2 - 1) || p[1] != ~0ULL)
8134                 field_desc_show(sb, p[1], (p[0] & (1 << 17)) ? tp_la2 : tp_la1);
8135 }
8136
8137 static int
8138 sysctl_tp_la(SYSCTL_HANDLER_ARGS)
8139 {
8140         struct adapter *sc = arg1;
8141         struct sbuf *sb;
8142         uint64_t *buf, *p;
8143         int rc;
8144         u_int i, inc;
8145         void (*show_func)(struct sbuf *, uint64_t *, int);
8146
8147         rc = sysctl_wire_old_buffer(req, 0);
8148         if (rc != 0)
8149                 return (rc);
8150
8151         sb = sbuf_new_for_sysctl(NULL, NULL, 4096, req);
8152         if (sb == NULL)
8153                 return (ENOMEM);
8154
8155         buf = malloc(TPLA_SIZE * sizeof(uint64_t), M_CXGBE, M_ZERO | M_WAITOK);
8156
8157         t4_tp_read_la(sc, buf, NULL);
8158         p = buf;
8159
8160         switch (G_DBGLAMODE(t4_read_reg(sc, A_TP_DBG_LA_CONFIG))) {
8161         case 2:
8162                 inc = 2;
8163                 show_func = tp_la_show2;
8164                 break;
8165         case 3:
8166                 inc = 2;
8167                 show_func = tp_la_show3;
8168                 break;
8169         default:
8170                 inc = 1;
8171                 show_func = tp_la_show;
8172         }
8173
8174         for (i = 0; i < TPLA_SIZE / inc; i++, p += inc)
8175                 (*show_func)(sb, p, i);
8176
8177         rc = sbuf_finish(sb);
8178         sbuf_delete(sb);
8179         free(buf, M_CXGBE);
8180         return (rc);
8181 }
8182
8183 static int
8184 sysctl_tx_rate(SYSCTL_HANDLER_ARGS)
8185 {
8186         struct adapter *sc = arg1;
8187         struct sbuf *sb;
8188         int rc;
8189         u64 nrate[MAX_NCHAN], orate[MAX_NCHAN];
8190
8191         rc = sysctl_wire_old_buffer(req, 0);
8192         if (rc != 0)
8193                 return (rc);
8194
8195         sb = sbuf_new_for_sysctl(NULL, NULL, 256, req);
8196         if (sb == NULL)
8197                 return (ENOMEM);
8198
8199         t4_get_chan_txrate(sc, nrate, orate);
8200
8201         if (sc->chip_params->nchan > 2) {
8202                 sbuf_printf(sb, "              channel 0   channel 1"
8203                     "   channel 2   channel 3\n");
8204                 sbuf_printf(sb, "NIC B/s:     %10ju  %10ju  %10ju  %10ju\n",
8205                     nrate[0], nrate[1], nrate[2], nrate[3]);
8206                 sbuf_printf(sb, "Offload B/s: %10ju  %10ju  %10ju  %10ju",
8207                     orate[0], orate[1], orate[2], orate[3]);
8208         } else {
8209                 sbuf_printf(sb, "              channel 0   channel 1\n");
8210                 sbuf_printf(sb, "NIC B/s:     %10ju  %10ju\n",
8211                     nrate[0], nrate[1]);
8212                 sbuf_printf(sb, "Offload B/s: %10ju  %10ju",
8213                     orate[0], orate[1]);
8214         }
8215
8216         rc = sbuf_finish(sb);
8217         sbuf_delete(sb);
8218
8219         return (rc);
8220 }
8221
8222 static int
8223 sysctl_ulprx_la(SYSCTL_HANDLER_ARGS)
8224 {
8225         struct adapter *sc = arg1;
8226         struct sbuf *sb;
8227         uint32_t *buf, *p;
8228         int rc, i;
8229
8230         rc = sysctl_wire_old_buffer(req, 0);
8231         if (rc != 0)
8232                 return (rc);
8233
8234         sb = sbuf_new_for_sysctl(NULL, NULL, 4096, req);
8235         if (sb == NULL)
8236                 return (ENOMEM);
8237
8238         buf = malloc(ULPRX_LA_SIZE * 8 * sizeof(uint32_t), M_CXGBE,
8239             M_ZERO | M_WAITOK);
8240
8241         t4_ulprx_read_la(sc, buf);
8242         p = buf;
8243
8244         sbuf_printf(sb, "      Pcmd        Type   Message"
8245             "                Data");
8246         for (i = 0; i < ULPRX_LA_SIZE; i++, p += 8) {
8247                 sbuf_printf(sb, "\n%08x%08x  %4x  %08x  %08x%08x%08x%08x",
8248                     p[1], p[0], p[2], p[3], p[7], p[6], p[5], p[4]);
8249         }
8250
8251         rc = sbuf_finish(sb);
8252         sbuf_delete(sb);
8253         free(buf, M_CXGBE);
8254         return (rc);
8255 }
8256
8257 static int
8258 sysctl_wcwr_stats(SYSCTL_HANDLER_ARGS)
8259 {
8260         struct adapter *sc = arg1;
8261         struct sbuf *sb;
8262         int rc, v;
8263
8264         MPASS(chip_id(sc) >= CHELSIO_T5);
8265
8266         rc = sysctl_wire_old_buffer(req, 0);
8267         if (rc != 0)
8268                 return (rc);
8269
8270         sb = sbuf_new_for_sysctl(NULL, NULL, 4096, req);
8271         if (sb == NULL)
8272                 return (ENOMEM);
8273
8274         v = t4_read_reg(sc, A_SGE_STAT_CFG);
8275         if (G_STATSOURCE_T5(v) == 7) {
8276                 int mode;
8277
8278                 mode = is_t5(sc) ? G_STATMODE(v) : G_T6_STATMODE(v);
8279                 if (mode == 0) {
8280                         sbuf_printf(sb, "total %d, incomplete %d",
8281                             t4_read_reg(sc, A_SGE_STAT_TOTAL),
8282                             t4_read_reg(sc, A_SGE_STAT_MATCH));
8283                 } else if (mode == 1) {
8284                         sbuf_printf(sb, "total %d, data overflow %d",
8285                             t4_read_reg(sc, A_SGE_STAT_TOTAL),
8286                             t4_read_reg(sc, A_SGE_STAT_MATCH));
8287                 } else {
8288                         sbuf_printf(sb, "unknown mode %d", mode);
8289                 }
8290         }
8291         rc = sbuf_finish(sb);
8292         sbuf_delete(sb);
8293
8294         return (rc);
8295 }
8296
8297 static int
8298 sysctl_tc_params(SYSCTL_HANDLER_ARGS)
8299 {
8300         struct adapter *sc = arg1;
8301         struct tx_cl_rl_params tc;
8302         struct sbuf *sb;
8303         int i, rc, port_id, mbps, gbps;
8304
8305         rc = sysctl_wire_old_buffer(req, 0);
8306         if (rc != 0)
8307                 return (rc);
8308
8309         sb = sbuf_new_for_sysctl(NULL, NULL, 4096, req);
8310         if (sb == NULL)
8311                 return (ENOMEM);
8312
8313         port_id = arg2 >> 16;
8314         MPASS(port_id < sc->params.nports);
8315         MPASS(sc->port[port_id] != NULL);
8316         i = arg2 & 0xffff;
8317         MPASS(i < sc->chip_params->nsched_cls);
8318
8319         mtx_lock(&sc->tc_lock);
8320         tc = sc->port[port_id]->sched_params->cl_rl[i];
8321         mtx_unlock(&sc->tc_lock);
8322
8323         if (tc.flags & TX_CLRL_ERROR) {
8324                 sbuf_printf(sb, "error");
8325                 goto done;
8326         }
8327
8328         if (tc.ratemode == SCHED_CLASS_RATEMODE_REL) {
8329                 /* XXX: top speed or actual link speed? */
8330                 gbps = port_top_speed(sc->port[port_id]);
8331                 sbuf_printf(sb, " %u%% of %uGbps", tc.maxrate, gbps);
8332         } else if (tc.ratemode == SCHED_CLASS_RATEMODE_ABS) {
8333                 switch (tc.rateunit) {
8334                 case SCHED_CLASS_RATEUNIT_BITS:
8335                         mbps = tc.maxrate / 1000;
8336                         gbps = tc.maxrate / 1000000;
8337                         if (tc.maxrate == gbps * 1000000)
8338                                 sbuf_printf(sb, " %uGbps", gbps);
8339                         else if (tc.maxrate == mbps * 1000)
8340                                 sbuf_printf(sb, " %uMbps", mbps);
8341                         else
8342                                 sbuf_printf(sb, " %uKbps", tc.maxrate);
8343                         break;
8344                 case SCHED_CLASS_RATEUNIT_PKTS:
8345                         sbuf_printf(sb, " %upps", tc.maxrate);
8346                         break;
8347                 default:
8348                         rc = ENXIO;
8349                         goto done;
8350                 }
8351         }
8352
8353         switch (tc.mode) {
8354         case SCHED_CLASS_MODE_CLASS:
8355                 sbuf_printf(sb, " aggregate");
8356                 break;
8357         case SCHED_CLASS_MODE_FLOW:
8358                 sbuf_printf(sb, " per-flow");
8359                 break;
8360         default:
8361                 rc = ENXIO;
8362                 goto done;
8363         }
8364
8365 done:
8366         if (rc == 0)
8367                 rc = sbuf_finish(sb);
8368         sbuf_delete(sb);
8369
8370         return (rc);
8371 }
8372 #endif
8373
8374 #ifdef TCP_OFFLOAD
8375 static int
8376 sysctl_tls_rx_ports(SYSCTL_HANDLER_ARGS)
8377 {
8378         struct adapter *sc = arg1;
8379         int *old_ports, *new_ports;
8380         int i, new_count, rc;
8381
8382         if (req->newptr == NULL && req->oldptr == NULL)
8383                 return (SYSCTL_OUT(req, NULL, imax(sc->tt.num_tls_rx_ports, 1) *
8384                     sizeof(sc->tt.tls_rx_ports[0])));
8385
8386         rc = begin_synchronized_op(sc, NULL, SLEEP_OK | INTR_OK, "t4tlsrx");
8387         if (rc)
8388                 return (rc);
8389
8390         if (sc->tt.num_tls_rx_ports == 0) {
8391                 i = -1;
8392                 rc = SYSCTL_OUT(req, &i, sizeof(i));
8393         } else
8394                 rc = SYSCTL_OUT(req, sc->tt.tls_rx_ports,
8395                     sc->tt.num_tls_rx_ports * sizeof(sc->tt.tls_rx_ports[0]));
8396         if (rc == 0 && req->newptr != NULL) {
8397                 new_count = req->newlen / sizeof(new_ports[0]);
8398                 new_ports = malloc(new_count * sizeof(new_ports[0]), M_CXGBE,
8399                     M_WAITOK);
8400                 rc = SYSCTL_IN(req, new_ports, new_count *
8401                     sizeof(new_ports[0]));
8402                 if (rc)
8403                         goto err;
8404
8405                 /* Allow setting to a single '-1' to clear the list. */
8406                 if (new_count == 1 && new_ports[0] == -1) {
8407                         ADAPTER_LOCK(sc);
8408                         old_ports = sc->tt.tls_rx_ports;
8409                         sc->tt.tls_rx_ports = NULL;
8410                         sc->tt.num_tls_rx_ports = 0;
8411                         ADAPTER_UNLOCK(sc);
8412                         free(old_ports, M_CXGBE);
8413                 } else {
8414                         for (i = 0; i < new_count; i++) {
8415                                 if (new_ports[i] < 1 ||
8416                                     new_ports[i] > IPPORT_MAX) {
8417                                         rc = EINVAL;
8418                                         goto err;
8419                                 }
8420                         }
8421
8422                         ADAPTER_LOCK(sc);
8423                         old_ports = sc->tt.tls_rx_ports;
8424                         sc->tt.tls_rx_ports = new_ports;
8425                         sc->tt.num_tls_rx_ports = new_count;
8426                         ADAPTER_UNLOCK(sc);
8427                         free(old_ports, M_CXGBE);
8428                         new_ports = NULL;
8429                 }
8430         err:
8431                 free(new_ports, M_CXGBE);
8432         }
8433         end_synchronized_op(sc, 0);
8434         return (rc);
8435 }
8436
8437 static void
8438 unit_conv(char *buf, size_t len, u_int val, u_int factor)
8439 {
8440         u_int rem = val % factor;
8441
8442         if (rem == 0)
8443                 snprintf(buf, len, "%u", val / factor);
8444         else {
8445                 while (rem % 10 == 0)
8446                         rem /= 10;
8447                 snprintf(buf, len, "%u.%u", val / factor, rem);
8448         }
8449 }
8450
8451 static int
8452 sysctl_tp_tick(SYSCTL_HANDLER_ARGS)
8453 {
8454         struct adapter *sc = arg1;
8455         char buf[16];
8456         u_int res, re;
8457         u_int cclk_ps = 1000000000 / sc->params.vpd.cclk;
8458
8459         res = t4_read_reg(sc, A_TP_TIMER_RESOLUTION);
8460         switch (arg2) {
8461         case 0:
8462                 /* timer_tick */
8463                 re = G_TIMERRESOLUTION(res);
8464                 break;
8465         case 1:
8466                 /* TCP timestamp tick */
8467                 re = G_TIMESTAMPRESOLUTION(res);
8468                 break;
8469         case 2:
8470                 /* DACK tick */
8471                 re = G_DELAYEDACKRESOLUTION(res);
8472                 break;
8473         default:
8474                 return (EDOOFUS);
8475         }
8476
8477         unit_conv(buf, sizeof(buf), (cclk_ps << re), 1000000);
8478
8479         return (sysctl_handle_string(oidp, buf, sizeof(buf), req));
8480 }
8481
8482 static int
8483 sysctl_tp_dack_timer(SYSCTL_HANDLER_ARGS)
8484 {
8485         struct adapter *sc = arg1;
8486         u_int res, dack_re, v;
8487         u_int cclk_ps = 1000000000 / sc->params.vpd.cclk;
8488
8489         res = t4_read_reg(sc, A_TP_TIMER_RESOLUTION);
8490         dack_re = G_DELAYEDACKRESOLUTION(res);
8491         v = ((cclk_ps << dack_re) / 1000000) * t4_read_reg(sc, A_TP_DACK_TIMER);
8492
8493         return (sysctl_handle_int(oidp, &v, 0, req));
8494 }
8495
8496 static int
8497 sysctl_tp_timer(SYSCTL_HANDLER_ARGS)
8498 {
8499         struct adapter *sc = arg1;
8500         int reg = arg2;
8501         u_int tre;
8502         u_long tp_tick_us, v;
8503         u_int cclk_ps = 1000000000 / sc->params.vpd.cclk;
8504
8505         MPASS(reg == A_TP_RXT_MIN || reg == A_TP_RXT_MAX ||
8506             reg == A_TP_PERS_MIN  || reg == A_TP_PERS_MAX ||
8507             reg == A_TP_KEEP_IDLE || reg == A_TP_KEEP_INTVL ||
8508             reg == A_TP_INIT_SRTT || reg == A_TP_FINWAIT2_TIMER);
8509
8510         tre = G_TIMERRESOLUTION(t4_read_reg(sc, A_TP_TIMER_RESOLUTION));
8511         tp_tick_us = (cclk_ps << tre) / 1000000;
8512
8513         if (reg == A_TP_INIT_SRTT)
8514                 v = tp_tick_us * G_INITSRTT(t4_read_reg(sc, reg));
8515         else
8516                 v = tp_tick_us * t4_read_reg(sc, reg);
8517
8518         return (sysctl_handle_long(oidp, &v, 0, req));
8519 }
8520
8521 /*
8522  * All fields in TP_SHIFT_CNT are 4b and the starting location of the field is
8523  * passed to this function.
8524  */
8525 static int
8526 sysctl_tp_shift_cnt(SYSCTL_HANDLER_ARGS)
8527 {
8528         struct adapter *sc = arg1;
8529         int idx = arg2;
8530         u_int v;
8531
8532         MPASS(idx >= 0 && idx <= 24);
8533
8534         v = (t4_read_reg(sc, A_TP_SHIFT_CNT) >> idx) & 0xf;
8535
8536         return (sysctl_handle_int(oidp, &v, 0, req));
8537 }
8538
8539 static int
8540 sysctl_tp_backoff(SYSCTL_HANDLER_ARGS)
8541 {
8542         struct adapter *sc = arg1;
8543         int idx = arg2;
8544         u_int shift, v, r;
8545
8546         MPASS(idx >= 0 && idx < 16);
8547
8548         r = A_TP_TCP_BACKOFF_REG0 + (idx & ~3);
8549         shift = (idx & 3) << 3;
8550         v = (t4_read_reg(sc, r) >> shift) & M_TIMERBACKOFFINDEX0;
8551
8552         return (sysctl_handle_int(oidp, &v, 0, req));
8553 }
8554
8555 static int
8556 sysctl_holdoff_tmr_idx_ofld(SYSCTL_HANDLER_ARGS)
8557 {
8558         struct vi_info *vi = arg1;
8559         struct adapter *sc = vi->pi->adapter;
8560         int idx, rc, i;
8561         struct sge_ofld_rxq *ofld_rxq;
8562         uint8_t v;
8563
8564         idx = vi->ofld_tmr_idx;
8565
8566         rc = sysctl_handle_int(oidp, &idx, 0, req);
8567         if (rc != 0 || req->newptr == NULL)
8568                 return (rc);
8569
8570         if (idx < 0 || idx >= SGE_NTIMERS)
8571                 return (EINVAL);
8572
8573         rc = begin_synchronized_op(sc, vi, HOLD_LOCK | SLEEP_OK | INTR_OK,
8574             "t4otmr");
8575         if (rc)
8576                 return (rc);
8577
8578         v = V_QINTR_TIMER_IDX(idx) | V_QINTR_CNT_EN(vi->ofld_pktc_idx != -1);
8579         for_each_ofld_rxq(vi, i, ofld_rxq) {
8580 #ifdef atomic_store_rel_8
8581                 atomic_store_rel_8(&ofld_rxq->iq.intr_params, v);
8582 #else
8583                 ofld_rxq->iq.intr_params = v;
8584 #endif
8585         }
8586         vi->ofld_tmr_idx = idx;
8587
8588         end_synchronized_op(sc, LOCK_HELD);
8589         return (0);
8590 }
8591
8592 static int
8593 sysctl_holdoff_pktc_idx_ofld(SYSCTL_HANDLER_ARGS)
8594 {
8595         struct vi_info *vi = arg1;
8596         struct adapter *sc = vi->pi->adapter;
8597         int idx, rc;
8598
8599         idx = vi->ofld_pktc_idx;
8600
8601         rc = sysctl_handle_int(oidp, &idx, 0, req);
8602         if (rc != 0 || req->newptr == NULL)
8603                 return (rc);
8604
8605         if (idx < -1 || idx >= SGE_NCOUNTERS)
8606                 return (EINVAL);
8607
8608         rc = begin_synchronized_op(sc, vi, HOLD_LOCK | SLEEP_OK | INTR_OK,
8609             "t4opktc");
8610         if (rc)
8611                 return (rc);
8612
8613         if (vi->flags & VI_INIT_DONE)
8614                 rc = EBUSY; /* cannot be changed once the queues are created */
8615         else
8616                 vi->ofld_pktc_idx = idx;
8617
8618         end_synchronized_op(sc, LOCK_HELD);
8619         return (rc);
8620 }
8621 #endif
8622
8623 static int
8624 get_sge_context(struct adapter *sc, struct t4_sge_context *cntxt)
8625 {
8626         int rc;
8627
8628         if (cntxt->cid > M_CTXTQID)
8629                 return (EINVAL);
8630
8631         if (cntxt->mem_id != CTXT_EGRESS && cntxt->mem_id != CTXT_INGRESS &&
8632             cntxt->mem_id != CTXT_FLM && cntxt->mem_id != CTXT_CNM)
8633                 return (EINVAL);
8634
8635         rc = begin_synchronized_op(sc, NULL, SLEEP_OK | INTR_OK, "t4ctxt");
8636         if (rc)
8637                 return (rc);
8638
8639         if (sc->flags & FW_OK) {
8640                 rc = -t4_sge_ctxt_rd(sc, sc->mbox, cntxt->cid, cntxt->mem_id,
8641                     &cntxt->data[0]);
8642                 if (rc == 0)
8643                         goto done;
8644         }
8645
8646         /*
8647          * Read via firmware failed or wasn't even attempted.  Read directly via
8648          * the backdoor.
8649          */
8650         rc = -t4_sge_ctxt_rd_bd(sc, cntxt->cid, cntxt->mem_id, &cntxt->data[0]);
8651 done:
8652         end_synchronized_op(sc, 0);
8653         return (rc);
8654 }
8655
8656 static int
8657 load_fw(struct adapter *sc, struct t4_data *fw)
8658 {
8659         int rc;
8660         uint8_t *fw_data;
8661
8662         rc = begin_synchronized_op(sc, NULL, SLEEP_OK | INTR_OK, "t4ldfw");
8663         if (rc)
8664                 return (rc);
8665
8666         /*
8667          * The firmware, with the sole exception of the memory parity error
8668          * handler, runs from memory and not flash.  It is almost always safe to
8669          * install a new firmware on a running system.  Just set bit 1 in
8670          * hw.cxgbe.dflags or dev.<nexus>.<n>.dflags first.
8671          */
8672         if (sc->flags & FULL_INIT_DONE &&
8673             (sc->debug_flags & DF_LOAD_FW_ANYTIME) == 0) {
8674                 rc = EBUSY;
8675                 goto done;
8676         }
8677
8678         fw_data = malloc(fw->len, M_CXGBE, M_WAITOK);
8679         if (fw_data == NULL) {
8680                 rc = ENOMEM;
8681                 goto done;
8682         }
8683
8684         rc = copyin(fw->data, fw_data, fw->len);
8685         if (rc == 0)
8686                 rc = -t4_load_fw(sc, fw_data, fw->len);
8687
8688         free(fw_data, M_CXGBE);
8689 done:
8690         end_synchronized_op(sc, 0);
8691         return (rc);
8692 }
8693
8694 static int
8695 load_cfg(struct adapter *sc, struct t4_data *cfg)
8696 {
8697         int rc;
8698         uint8_t *cfg_data = NULL;
8699
8700         rc = begin_synchronized_op(sc, NULL, SLEEP_OK | INTR_OK, "t4ldcf");
8701         if (rc)
8702                 return (rc);
8703
8704         if (cfg->len == 0) {
8705                 /* clear */
8706                 rc = -t4_load_cfg(sc, NULL, 0);
8707                 goto done;
8708         }
8709
8710         cfg_data = malloc(cfg->len, M_CXGBE, M_WAITOK);
8711         if (cfg_data == NULL) {
8712                 rc = ENOMEM;
8713                 goto done;
8714         }
8715
8716         rc = copyin(cfg->data, cfg_data, cfg->len);
8717         if (rc == 0)
8718                 rc = -t4_load_cfg(sc, cfg_data, cfg->len);
8719
8720         free(cfg_data, M_CXGBE);
8721 done:
8722         end_synchronized_op(sc, 0);
8723         return (rc);
8724 }
8725
8726 static int
8727 load_boot(struct adapter *sc, struct t4_bootrom *br)
8728 {
8729         int rc;
8730         uint8_t *br_data = NULL;
8731         u_int offset;
8732
8733         if (br->len > 1024 * 1024)
8734                 return (EFBIG);
8735
8736         if (br->pf_offset == 0) {
8737                 /* pfidx */
8738                 if (br->pfidx_addr > 7)
8739                         return (EINVAL);
8740                 offset = G_OFFSET(t4_read_reg(sc, PF_REG(br->pfidx_addr,
8741                     A_PCIE_PF_EXPROM_OFST)));
8742         } else if (br->pf_offset == 1) {
8743                 /* offset */
8744                 offset = G_OFFSET(br->pfidx_addr);
8745         } else {
8746                 return (EINVAL);
8747         }
8748
8749         rc = begin_synchronized_op(sc, NULL, SLEEP_OK | INTR_OK, "t4ldbr");
8750         if (rc)
8751                 return (rc);
8752
8753         if (br->len == 0) {
8754                 /* clear */
8755                 rc = -t4_load_boot(sc, NULL, offset, 0);
8756                 goto done;
8757         }
8758
8759         br_data = malloc(br->len, M_CXGBE, M_WAITOK);
8760         if (br_data == NULL) {
8761                 rc = ENOMEM;
8762                 goto done;
8763         }
8764
8765         rc = copyin(br->data, br_data, br->len);
8766         if (rc == 0)
8767                 rc = -t4_load_boot(sc, br_data, offset, br->len);
8768
8769         free(br_data, M_CXGBE);
8770 done:
8771         end_synchronized_op(sc, 0);
8772         return (rc);
8773 }
8774
8775 static int
8776 load_bootcfg(struct adapter *sc, struct t4_data *bc)
8777 {
8778         int rc;
8779         uint8_t *bc_data = NULL;
8780
8781         rc = begin_synchronized_op(sc, NULL, SLEEP_OK | INTR_OK, "t4ldcf");
8782         if (rc)
8783                 return (rc);
8784
8785         if (bc->len == 0) {
8786                 /* clear */
8787                 rc = -t4_load_bootcfg(sc, NULL, 0);
8788                 goto done;
8789         }
8790
8791         bc_data = malloc(bc->len, M_CXGBE, M_WAITOK);
8792         if (bc_data == NULL) {
8793                 rc = ENOMEM;
8794                 goto done;
8795         }
8796
8797         rc = copyin(bc->data, bc_data, bc->len);
8798         if (rc == 0)
8799                 rc = -t4_load_bootcfg(sc, bc_data, bc->len);
8800
8801         free(bc_data, M_CXGBE);
8802 done:
8803         end_synchronized_op(sc, 0);
8804         return (rc);
8805 }
8806
8807 static int
8808 cudbg_dump(struct adapter *sc, struct t4_cudbg_dump *dump)
8809 {
8810         int rc;
8811         struct cudbg_init *cudbg;
8812         void *handle, *buf;
8813
8814         /* buf is large, don't block if no memory is available */
8815         buf = malloc(dump->len, M_CXGBE, M_NOWAIT | M_ZERO);
8816         if (buf == NULL)
8817                 return (ENOMEM);
8818
8819         handle = cudbg_alloc_handle();
8820         if (handle == NULL) {
8821                 rc = ENOMEM;
8822                 goto done;
8823         }
8824
8825         cudbg = cudbg_get_init(handle);
8826         cudbg->adap = sc;
8827         cudbg->print = (cudbg_print_cb)printf;
8828
8829 #ifndef notyet
8830         device_printf(sc->dev, "%s: wr_flash %u, len %u, data %p.\n",
8831             __func__, dump->wr_flash, dump->len, dump->data);
8832 #endif
8833
8834         if (dump->wr_flash)
8835                 cudbg->use_flash = 1;
8836         MPASS(sizeof(cudbg->dbg_bitmap) == sizeof(dump->bitmap));
8837         memcpy(cudbg->dbg_bitmap, dump->bitmap, sizeof(cudbg->dbg_bitmap));
8838
8839         rc = cudbg_collect(handle, buf, &dump->len);
8840         if (rc != 0)
8841                 goto done;
8842
8843         rc = copyout(buf, dump->data, dump->len);
8844 done:
8845         cudbg_free_handle(handle);
8846         free(buf, M_CXGBE);
8847         return (rc);
8848 }
8849
8850 static void
8851 free_offload_policy(struct t4_offload_policy *op)
8852 {
8853         struct offload_rule *r;
8854         int i;
8855
8856         if (op == NULL)
8857                 return;
8858
8859         r = &op->rule[0];
8860         for (i = 0; i < op->nrules; i++, r++) {
8861                 free(r->bpf_prog.bf_insns, M_CXGBE);
8862         }
8863         free(op->rule, M_CXGBE);
8864         free(op, M_CXGBE);
8865 }
8866
8867 static int
8868 set_offload_policy(struct adapter *sc, struct t4_offload_policy *uop)
8869 {
8870         int i, rc, len;
8871         struct t4_offload_policy *op, *old;
8872         struct bpf_program *bf;
8873         const struct offload_settings *s;
8874         struct offload_rule *r;
8875         void *u;
8876
8877         if (!is_offload(sc))
8878                 return (ENODEV);
8879
8880         if (uop->nrules == 0) {
8881                 /* Delete installed policies. */
8882                 op = NULL;
8883                 goto set_policy;
8884         } if (uop->nrules > 256) { /* arbitrary */
8885                 return (E2BIG);
8886         }
8887
8888         /* Copy userspace offload policy to kernel */
8889         op = malloc(sizeof(*op), M_CXGBE, M_ZERO | M_WAITOK);
8890         op->nrules = uop->nrules;
8891         len = op->nrules * sizeof(struct offload_rule);
8892         op->rule = malloc(len, M_CXGBE, M_ZERO | M_WAITOK);
8893         rc = copyin(uop->rule, op->rule, len);
8894         if (rc) {
8895                 free(op->rule, M_CXGBE);
8896                 free(op, M_CXGBE);
8897                 return (rc);
8898         }
8899
8900         r = &op->rule[0];
8901         for (i = 0; i < op->nrules; i++, r++) {
8902
8903                 /* Validate open_type */
8904                 if (r->open_type != OPEN_TYPE_LISTEN &&
8905                     r->open_type != OPEN_TYPE_ACTIVE &&
8906                     r->open_type != OPEN_TYPE_PASSIVE &&
8907                     r->open_type != OPEN_TYPE_DONTCARE) {
8908 error:
8909                         /*
8910                          * Rules 0 to i have malloc'd filters that need to be
8911                          * freed.  Rules i+1 to nrules have userspace pointers
8912                          * and should be left alone.
8913                          */
8914                         op->nrules = i;
8915                         free_offload_policy(op);
8916                         return (rc);
8917                 }
8918
8919                 /* Validate settings */
8920                 s = &r->settings;
8921                 if ((s->offload != 0 && s->offload != 1) ||
8922                     s->cong_algo < -1 || s->cong_algo > CONG_ALG_HIGHSPEED ||
8923                     s->sched_class < -1 ||
8924                     s->sched_class >= sc->chip_params->nsched_cls) {
8925                         rc = EINVAL;
8926                         goto error;
8927                 }
8928
8929                 bf = &r->bpf_prog;
8930                 u = bf->bf_insns;       /* userspace ptr */
8931                 bf->bf_insns = NULL;
8932                 if (bf->bf_len == 0) {
8933                         /* legal, matches everything */
8934                         continue;
8935                 }
8936                 len = bf->bf_len * sizeof(*bf->bf_insns);
8937                 bf->bf_insns = malloc(len, M_CXGBE, M_ZERO | M_WAITOK);
8938                 rc = copyin(u, bf->bf_insns, len);
8939                 if (rc != 0)
8940                         goto error;
8941
8942                 if (!bpf_validate(bf->bf_insns, bf->bf_len)) {
8943                         rc = EINVAL;
8944                         goto error;
8945                 }
8946         }
8947 set_policy:
8948         rw_wlock(&sc->policy_lock);
8949         old = sc->policy;
8950         sc->policy = op;
8951         rw_wunlock(&sc->policy_lock);
8952         free_offload_policy(old);
8953
8954         return (0);
8955 }
8956
8957 #define MAX_READ_BUF_SIZE (128 * 1024)
8958 static int
8959 read_card_mem(struct adapter *sc, int win, struct t4_mem_range *mr)
8960 {
8961         uint32_t addr, remaining, n;
8962         uint32_t *buf;
8963         int rc;
8964         uint8_t *dst;
8965
8966         rc = validate_mem_range(sc, mr->addr, mr->len);
8967         if (rc != 0)
8968                 return (rc);
8969
8970         buf = malloc(min(mr->len, MAX_READ_BUF_SIZE), M_CXGBE, M_WAITOK);
8971         addr = mr->addr;
8972         remaining = mr->len;
8973         dst = (void *)mr->data;
8974
8975         while (remaining) {
8976                 n = min(remaining, MAX_READ_BUF_SIZE);
8977                 read_via_memwin(sc, 2, addr, buf, n);
8978
8979                 rc = copyout(buf, dst, n);
8980                 if (rc != 0)
8981                         break;
8982
8983                 dst += n;
8984                 remaining -= n;
8985                 addr += n;
8986         }
8987
8988         free(buf, M_CXGBE);
8989         return (rc);
8990 }
8991 #undef MAX_READ_BUF_SIZE
8992
8993 static int
8994 read_i2c(struct adapter *sc, struct t4_i2c_data *i2cd)
8995 {
8996         int rc;
8997
8998         if (i2cd->len == 0 || i2cd->port_id >= sc->params.nports)
8999                 return (EINVAL);
9000
9001         if (i2cd->len > sizeof(i2cd->data))
9002                 return (EFBIG);
9003
9004         rc = begin_synchronized_op(sc, NULL, SLEEP_OK | INTR_OK, "t4i2crd");
9005         if (rc)
9006                 return (rc);
9007         rc = -t4_i2c_rd(sc, sc->mbox, i2cd->port_id, i2cd->dev_addr,
9008             i2cd->offset, i2cd->len, &i2cd->data[0]);
9009         end_synchronized_op(sc, 0);
9010
9011         return (rc);
9012 }
9013
9014 int
9015 t4_os_find_pci_capability(struct adapter *sc, int cap)
9016 {
9017         int i;
9018
9019         return (pci_find_cap(sc->dev, cap, &i) == 0 ? i : 0);
9020 }
9021
9022 int
9023 t4_os_pci_save_state(struct adapter *sc)
9024 {
9025         device_t dev;
9026         struct pci_devinfo *dinfo;
9027
9028         dev = sc->dev;
9029         dinfo = device_get_ivars(dev);
9030
9031         pci_cfg_save(dev, dinfo, 0);
9032         return (0);
9033 }
9034
9035 int
9036 t4_os_pci_restore_state(struct adapter *sc)
9037 {
9038         device_t dev;
9039         struct pci_devinfo *dinfo;
9040
9041         dev = sc->dev;
9042         dinfo = device_get_ivars(dev);
9043
9044         pci_cfg_restore(dev, dinfo);
9045         return (0);
9046 }
9047
9048 void
9049 t4_os_portmod_changed(struct port_info *pi)
9050 {
9051         struct adapter *sc = pi->adapter;
9052         struct vi_info *vi;
9053         struct ifnet *ifp;
9054         static const char *mod_str[] = {
9055                 NULL, "LR", "SR", "ER", "TWINAX", "active TWINAX", "LRM"
9056         };
9057
9058         PORT_LOCK(pi);
9059         build_medialist(pi, &pi->media);
9060         PORT_UNLOCK(pi);
9061         vi = &pi->vi[0];
9062         if (begin_synchronized_op(sc, vi, HOLD_LOCK, "t4mod") == 0) {
9063                 init_l1cfg(pi);
9064                 end_synchronized_op(sc, LOCK_HELD);
9065         }
9066
9067         ifp = vi->ifp;
9068         if (pi->mod_type == FW_PORT_MOD_TYPE_NONE)
9069                 if_printf(ifp, "transceiver unplugged.\n");
9070         else if (pi->mod_type == FW_PORT_MOD_TYPE_UNKNOWN)
9071                 if_printf(ifp, "unknown transceiver inserted.\n");
9072         else if (pi->mod_type == FW_PORT_MOD_TYPE_NOTSUPPORTED)
9073                 if_printf(ifp, "unsupported transceiver inserted.\n");
9074         else if (pi->mod_type > 0 && pi->mod_type < nitems(mod_str)) {
9075                 if_printf(ifp, "%dGbps %s transceiver inserted.\n",
9076                     port_top_speed(pi), mod_str[pi->mod_type]);
9077         } else {
9078                 if_printf(ifp, "transceiver (type %d) inserted.\n",
9079                     pi->mod_type);
9080         }
9081 }
9082
9083 void
9084 t4_os_link_changed(struct port_info *pi)
9085 {
9086         struct vi_info *vi;
9087         struct ifnet *ifp;
9088         struct link_config *lc;
9089         int v;
9090
9091         for_each_vi(pi, v, vi) {
9092                 ifp = vi->ifp;
9093                 if (ifp == NULL)
9094                         continue;
9095
9096                 lc = &pi->link_cfg;
9097                 if (lc->link_ok) {
9098                         ifp->if_baudrate = IF_Mbps(lc->speed);
9099                         if_link_state_change(ifp, LINK_STATE_UP);
9100                 } else {
9101                         if_link_state_change(ifp, LINK_STATE_DOWN);
9102                 }
9103         }
9104 }
9105
9106 void
9107 t4_iterate(void (*func)(struct adapter *, void *), void *arg)
9108 {
9109         struct adapter *sc;
9110
9111         sx_slock(&t4_list_lock);
9112         SLIST_FOREACH(sc, &t4_list, link) {
9113                 /*
9114                  * func should not make any assumptions about what state sc is
9115                  * in - the only guarantee is that sc->sc_lock is a valid lock.
9116                  */
9117                 func(sc, arg);
9118         }
9119         sx_sunlock(&t4_list_lock);
9120 }
9121
9122 static int
9123 t4_ioctl(struct cdev *dev, unsigned long cmd, caddr_t data, int fflag,
9124     struct thread *td)
9125 {
9126         int rc;
9127         struct adapter *sc = dev->si_drv1;
9128
9129         rc = priv_check(td, PRIV_DRIVER);
9130         if (rc != 0)
9131                 return (rc);
9132
9133         switch (cmd) {
9134         case CHELSIO_T4_GETREG: {
9135                 struct t4_reg *edata = (struct t4_reg *)data;
9136
9137                 if ((edata->addr & 0x3) != 0 || edata->addr >= sc->mmio_len)
9138                         return (EFAULT);
9139
9140                 if (edata->size == 4)
9141                         edata->val = t4_read_reg(sc, edata->addr);
9142                 else if (edata->size == 8)
9143                         edata->val = t4_read_reg64(sc, edata->addr);
9144                 else
9145                         return (EINVAL);
9146
9147                 break;
9148         }
9149         case CHELSIO_T4_SETREG: {
9150                 struct t4_reg *edata = (struct t4_reg *)data;
9151
9152                 if ((edata->addr & 0x3) != 0 || edata->addr >= sc->mmio_len)
9153                         return (EFAULT);
9154
9155                 if (edata->size == 4) {
9156                         if (edata->val & 0xffffffff00000000)
9157                                 return (EINVAL);
9158                         t4_write_reg(sc, edata->addr, (uint32_t) edata->val);
9159                 } else if (edata->size == 8)
9160                         t4_write_reg64(sc, edata->addr, edata->val);
9161                 else
9162                         return (EINVAL);
9163                 break;
9164         }
9165         case CHELSIO_T4_REGDUMP: {
9166                 struct t4_regdump *regs = (struct t4_regdump *)data;
9167                 int reglen = t4_get_regs_len(sc);
9168                 uint8_t *buf;
9169
9170                 if (regs->len < reglen) {
9171                         regs->len = reglen; /* hint to the caller */
9172                         return (ENOBUFS);
9173                 }
9174
9175                 regs->len = reglen;
9176                 buf = malloc(reglen, M_CXGBE, M_WAITOK | M_ZERO);
9177                 get_regs(sc, regs, buf);
9178                 rc = copyout(buf, regs->data, reglen);
9179                 free(buf, M_CXGBE);
9180                 break;
9181         }
9182         case CHELSIO_T4_GET_FILTER_MODE:
9183                 rc = get_filter_mode(sc, (uint32_t *)data);
9184                 break;
9185         case CHELSIO_T4_SET_FILTER_MODE:
9186                 rc = set_filter_mode(sc, *(uint32_t *)data);
9187                 break;
9188         case CHELSIO_T4_GET_FILTER:
9189                 rc = get_filter(sc, (struct t4_filter *)data);
9190                 break;
9191         case CHELSIO_T4_SET_FILTER:
9192                 rc = set_filter(sc, (struct t4_filter *)data);
9193                 break;
9194         case CHELSIO_T4_DEL_FILTER:
9195                 rc = del_filter(sc, (struct t4_filter *)data);
9196                 break;
9197         case CHELSIO_T4_GET_SGE_CONTEXT:
9198                 rc = get_sge_context(sc, (struct t4_sge_context *)data);
9199                 break;
9200         case CHELSIO_T4_LOAD_FW:
9201                 rc = load_fw(sc, (struct t4_data *)data);
9202                 break;
9203         case CHELSIO_T4_GET_MEM:
9204                 rc = read_card_mem(sc, 2, (struct t4_mem_range *)data);
9205                 break;
9206         case CHELSIO_T4_GET_I2C:
9207                 rc = read_i2c(sc, (struct t4_i2c_data *)data);
9208                 break;
9209         case CHELSIO_T4_CLEAR_STATS: {
9210                 int i, v, bg_map;
9211                 u_int port_id = *(uint32_t *)data;
9212                 struct port_info *pi;
9213                 struct vi_info *vi;
9214
9215                 if (port_id >= sc->params.nports)
9216                         return (EINVAL);
9217                 pi = sc->port[port_id];
9218                 if (pi == NULL)
9219                         return (EIO);
9220
9221                 /* MAC stats */
9222                 t4_clr_port_stats(sc, pi->tx_chan);
9223                 pi->tx_parse_error = 0;
9224                 pi->tnl_cong_drops = 0;
9225                 mtx_lock(&sc->reg_lock);
9226                 for_each_vi(pi, v, vi) {
9227                         if (vi->flags & VI_INIT_DONE)
9228                                 t4_clr_vi_stats(sc, vi->viid);
9229                 }
9230                 bg_map = pi->mps_bg_map;
9231                 v = 0;  /* reuse */
9232                 while (bg_map) {
9233                         i = ffs(bg_map) - 1;
9234                         t4_write_indirect(sc, A_TP_MIB_INDEX, A_TP_MIB_DATA, &v,
9235                             1, A_TP_MIB_TNL_CNG_DROP_0 + i);
9236                         bg_map &= ~(1 << i);
9237                 }
9238                 mtx_unlock(&sc->reg_lock);
9239
9240                 /*
9241                  * Since this command accepts a port, clear stats for
9242                  * all VIs on this port.
9243                  */
9244                 for_each_vi(pi, v, vi) {
9245                         if (vi->flags & VI_INIT_DONE) {
9246                                 struct sge_rxq *rxq;
9247                                 struct sge_txq *txq;
9248                                 struct sge_wrq *wrq;
9249
9250                                 for_each_rxq(vi, i, rxq) {
9251 #if defined(INET) || defined(INET6)
9252                                         rxq->lro.lro_queued = 0;
9253                                         rxq->lro.lro_flushed = 0;
9254 #endif
9255                                         rxq->rxcsum = 0;
9256                                         rxq->vlan_extraction = 0;
9257                                 }
9258
9259                                 for_each_txq(vi, i, txq) {
9260                                         txq->txcsum = 0;
9261                                         txq->tso_wrs = 0;
9262                                         txq->vlan_insertion = 0;
9263                                         txq->imm_wrs = 0;
9264                                         txq->sgl_wrs = 0;
9265                                         txq->txpkt_wrs = 0;
9266                                         txq->txpkts0_wrs = 0;
9267                                         txq->txpkts1_wrs = 0;
9268                                         txq->txpkts0_pkts = 0;
9269                                         txq->txpkts1_pkts = 0;
9270                                         mp_ring_reset_stats(txq->r);
9271                                 }
9272
9273 #ifdef TCP_OFFLOAD
9274                                 /* nothing to clear for each ofld_rxq */
9275
9276                                 for_each_ofld_txq(vi, i, wrq) {
9277                                         wrq->tx_wrs_direct = 0;
9278                                         wrq->tx_wrs_copied = 0;
9279                                 }
9280 #endif
9281
9282                                 if (IS_MAIN_VI(vi)) {
9283                                         wrq = &sc->sge.ctrlq[pi->port_id];
9284                                         wrq->tx_wrs_direct = 0;
9285                                         wrq->tx_wrs_copied = 0;
9286                                 }
9287                         }
9288                 }
9289                 break;
9290         }
9291         case CHELSIO_T4_SCHED_CLASS:
9292                 rc = t4_set_sched_class(sc, (struct t4_sched_params *)data);
9293                 break;
9294         case CHELSIO_T4_SCHED_QUEUE:
9295                 rc = t4_set_sched_queue(sc, (struct t4_sched_queue *)data);
9296                 break;
9297         case CHELSIO_T4_GET_TRACER:
9298                 rc = t4_get_tracer(sc, (struct t4_tracer *)data);
9299                 break;
9300         case CHELSIO_T4_SET_TRACER:
9301                 rc = t4_set_tracer(sc, (struct t4_tracer *)data);
9302                 break;
9303         case CHELSIO_T4_LOAD_CFG:
9304                 rc = load_cfg(sc, (struct t4_data *)data);
9305                 break;
9306         case CHELSIO_T4_LOAD_BOOT:
9307                 rc = load_boot(sc, (struct t4_bootrom *)data);
9308                 break;
9309         case CHELSIO_T4_LOAD_BOOTCFG:
9310                 rc = load_bootcfg(sc, (struct t4_data *)data);
9311                 break;
9312         case CHELSIO_T4_CUDBG_DUMP:
9313                 rc = cudbg_dump(sc, (struct t4_cudbg_dump *)data);
9314                 break;
9315         case CHELSIO_T4_SET_OFLD_POLICY:
9316                 rc = set_offload_policy(sc, (struct t4_offload_policy *)data);
9317                 break;
9318         default:
9319                 rc = ENOTTY;
9320         }
9321
9322         return (rc);
9323 }
9324
9325 void
9326 t4_db_full(struct adapter *sc)
9327 {
9328
9329         CXGBE_UNIMPLEMENTED(__func__);
9330 }
9331
9332 void
9333 t4_db_dropped(struct adapter *sc)
9334 {
9335
9336         CXGBE_UNIMPLEMENTED(__func__);
9337 }
9338
9339 #ifdef TCP_OFFLOAD
9340 static int
9341 toe_capability(struct vi_info *vi, int enable)
9342 {
9343         int rc;
9344         struct port_info *pi = vi->pi;
9345         struct adapter *sc = pi->adapter;
9346
9347         ASSERT_SYNCHRONIZED_OP(sc);
9348
9349         if (!is_offload(sc))
9350                 return (ENODEV);
9351
9352         if (enable) {
9353                 if ((vi->ifp->if_capenable & IFCAP_TOE) != 0) {
9354                         /* TOE is already enabled. */
9355                         return (0);
9356                 }
9357
9358                 /*
9359                  * We need the port's queues around so that we're able to send
9360                  * and receive CPLs to/from the TOE even if the ifnet for this
9361                  * port has never been UP'd administratively.
9362                  */
9363                 if (!(vi->flags & VI_INIT_DONE)) {
9364                         rc = vi_full_init(vi);
9365                         if (rc)
9366                                 return (rc);
9367                 }
9368                 if (!(pi->vi[0].flags & VI_INIT_DONE)) {
9369                         rc = vi_full_init(&pi->vi[0]);
9370                         if (rc)
9371                                 return (rc);
9372                 }
9373
9374                 if (isset(&sc->offload_map, pi->port_id)) {
9375                         /* TOE is enabled on another VI of this port. */
9376                         pi->uld_vis++;
9377                         return (0);
9378                 }
9379
9380                 if (!uld_active(sc, ULD_TOM)) {
9381                         rc = t4_activate_uld(sc, ULD_TOM);
9382                         if (rc == EAGAIN) {
9383                                 log(LOG_WARNING,
9384                                     "You must kldload t4_tom.ko before trying "
9385                                     "to enable TOE on a cxgbe interface.\n");
9386                         }
9387                         if (rc != 0)
9388                                 return (rc);
9389                         KASSERT(sc->tom_softc != NULL,
9390                             ("%s: TOM activated but softc NULL", __func__));
9391                         KASSERT(uld_active(sc, ULD_TOM),
9392                             ("%s: TOM activated but flag not set", __func__));
9393                 }
9394
9395                 /* Activate iWARP and iSCSI too, if the modules are loaded. */
9396                 if (!uld_active(sc, ULD_IWARP))
9397                         (void) t4_activate_uld(sc, ULD_IWARP);
9398                 if (!uld_active(sc, ULD_ISCSI))
9399                         (void) t4_activate_uld(sc, ULD_ISCSI);
9400
9401                 pi->uld_vis++;
9402                 setbit(&sc->offload_map, pi->port_id);
9403         } else {
9404                 pi->uld_vis--;
9405
9406                 if (!isset(&sc->offload_map, pi->port_id) || pi->uld_vis > 0)
9407                         return (0);
9408
9409                 KASSERT(uld_active(sc, ULD_TOM),
9410                     ("%s: TOM never initialized?", __func__));
9411                 clrbit(&sc->offload_map, pi->port_id);
9412         }
9413
9414         return (0);
9415 }
9416
9417 /*
9418  * Add an upper layer driver to the global list.
9419  */
9420 int
9421 t4_register_uld(struct uld_info *ui)
9422 {
9423         int rc = 0;
9424         struct uld_info *u;
9425
9426         sx_xlock(&t4_uld_list_lock);
9427         SLIST_FOREACH(u, &t4_uld_list, link) {
9428             if (u->uld_id == ui->uld_id) {
9429                     rc = EEXIST;
9430                     goto done;
9431             }
9432         }
9433
9434         SLIST_INSERT_HEAD(&t4_uld_list, ui, link);
9435         ui->refcount = 0;
9436 done:
9437         sx_xunlock(&t4_uld_list_lock);
9438         return (rc);
9439 }
9440
9441 int
9442 t4_unregister_uld(struct uld_info *ui)
9443 {
9444         int rc = EINVAL;
9445         struct uld_info *u;
9446
9447         sx_xlock(&t4_uld_list_lock);
9448
9449         SLIST_FOREACH(u, &t4_uld_list, link) {
9450             if (u == ui) {
9451                     if (ui->refcount > 0) {
9452                             rc = EBUSY;
9453                             goto done;
9454                     }
9455
9456                     SLIST_REMOVE(&t4_uld_list, ui, uld_info, link);
9457                     rc = 0;
9458                     goto done;
9459             }
9460         }
9461 done:
9462         sx_xunlock(&t4_uld_list_lock);
9463         return (rc);
9464 }
9465
9466 int
9467 t4_activate_uld(struct adapter *sc, int id)
9468 {
9469         int rc;
9470         struct uld_info *ui;
9471
9472         ASSERT_SYNCHRONIZED_OP(sc);
9473
9474         if (id < 0 || id > ULD_MAX)
9475                 return (EINVAL);
9476         rc = EAGAIN;    /* kldoad the module with this ULD and try again. */
9477
9478         sx_slock(&t4_uld_list_lock);
9479
9480         SLIST_FOREACH(ui, &t4_uld_list, link) {
9481                 if (ui->uld_id == id) {
9482                         if (!(sc->flags & FULL_INIT_DONE)) {
9483                                 rc = adapter_full_init(sc);
9484                                 if (rc != 0)
9485                                         break;
9486                         }
9487
9488                         rc = ui->activate(sc);
9489                         if (rc == 0) {
9490                                 setbit(&sc->active_ulds, id);
9491                                 ui->refcount++;
9492                         }
9493                         break;
9494                 }
9495         }
9496
9497         sx_sunlock(&t4_uld_list_lock);
9498
9499         return (rc);
9500 }
9501
9502 int
9503 t4_deactivate_uld(struct adapter *sc, int id)
9504 {
9505         int rc;
9506         struct uld_info *ui;
9507
9508         ASSERT_SYNCHRONIZED_OP(sc);
9509
9510         if (id < 0 || id > ULD_MAX)
9511                 return (EINVAL);
9512         rc = ENXIO;
9513
9514         sx_slock(&t4_uld_list_lock);
9515
9516         SLIST_FOREACH(ui, &t4_uld_list, link) {
9517                 if (ui->uld_id == id) {
9518                         rc = ui->deactivate(sc);
9519                         if (rc == 0) {
9520                                 clrbit(&sc->active_ulds, id);
9521                                 ui->refcount--;
9522                         }
9523                         break;
9524                 }
9525         }
9526
9527         sx_sunlock(&t4_uld_list_lock);
9528
9529         return (rc);
9530 }
9531
9532 int
9533 uld_active(struct adapter *sc, int uld_id)
9534 {
9535
9536         MPASS(uld_id >= 0 && uld_id <= ULD_MAX);
9537
9538         return (isset(&sc->active_ulds, uld_id));
9539 }
9540 #endif
9541
9542 /*
9543  * t  = ptr to tunable.
9544  * nc = number of CPUs.
9545  * c  = compiled in default for that tunable.
9546  */
9547 static void
9548 calculate_nqueues(int *t, int nc, const int c)
9549 {
9550         int nq;
9551
9552         if (*t > 0)
9553                 return;
9554         nq = *t < 0 ? -*t : c;
9555         *t = min(nc, nq);
9556 }
9557
9558 /*
9559  * Come up with reasonable defaults for some of the tunables, provided they're
9560  * not set by the user (in which case we'll use the values as is).
9561  */
9562 static void
9563 tweak_tunables(void)
9564 {
9565         int nc = mp_ncpus;      /* our snapshot of the number of CPUs */
9566
9567         if (t4_ntxq < 1) {
9568 #ifdef RSS
9569                 t4_ntxq = rss_getnumbuckets();
9570 #else
9571                 calculate_nqueues(&t4_ntxq, nc, NTXQ);
9572 #endif
9573         }
9574
9575         calculate_nqueues(&t4_ntxq_vi, nc, NTXQ_VI);
9576
9577         if (t4_nrxq < 1) {
9578 #ifdef RSS
9579                 t4_nrxq = rss_getnumbuckets();
9580 #else
9581                 calculate_nqueues(&t4_nrxq, nc, NRXQ);
9582 #endif
9583         }
9584
9585         calculate_nqueues(&t4_nrxq_vi, nc, NRXQ_VI);
9586
9587 #ifdef TCP_OFFLOAD
9588         calculate_nqueues(&t4_nofldtxq, nc, NOFLDTXQ);
9589         calculate_nqueues(&t4_nofldtxq_vi, nc, NOFLDTXQ_VI);
9590         calculate_nqueues(&t4_nofldrxq, nc, NOFLDRXQ);
9591         calculate_nqueues(&t4_nofldrxq_vi, nc, NOFLDRXQ_VI);
9592
9593         if (t4_toecaps_allowed == -1)
9594                 t4_toecaps_allowed = FW_CAPS_CONFIG_TOE;
9595
9596         if (t4_rdmacaps_allowed == -1) {
9597                 t4_rdmacaps_allowed = FW_CAPS_CONFIG_RDMA_RDDP |
9598                     FW_CAPS_CONFIG_RDMA_RDMAC;
9599         }
9600
9601         if (t4_iscsicaps_allowed == -1) {
9602                 t4_iscsicaps_allowed = FW_CAPS_CONFIG_ISCSI_INITIATOR_PDU |
9603                     FW_CAPS_CONFIG_ISCSI_TARGET_PDU |
9604                     FW_CAPS_CONFIG_ISCSI_T10DIF;
9605         }
9606
9607         if (t4_tmr_idx_ofld < 0 || t4_tmr_idx_ofld >= SGE_NTIMERS)
9608                 t4_tmr_idx_ofld = TMR_IDX_OFLD;
9609
9610         if (t4_pktc_idx_ofld < -1 || t4_pktc_idx_ofld >= SGE_NCOUNTERS)
9611                 t4_pktc_idx_ofld = PKTC_IDX_OFLD;
9612 #else
9613         if (t4_toecaps_allowed == -1)
9614                 t4_toecaps_allowed = 0;
9615
9616         if (t4_rdmacaps_allowed == -1)
9617                 t4_rdmacaps_allowed = 0;
9618
9619         if (t4_iscsicaps_allowed == -1)
9620                 t4_iscsicaps_allowed = 0;
9621 #endif
9622
9623 #ifdef DEV_NETMAP
9624         calculate_nqueues(&t4_nnmtxq_vi, nc, NNMTXQ_VI);
9625         calculate_nqueues(&t4_nnmrxq_vi, nc, NNMRXQ_VI);
9626 #endif
9627
9628         if (t4_tmr_idx < 0 || t4_tmr_idx >= SGE_NTIMERS)
9629                 t4_tmr_idx = TMR_IDX;
9630
9631         if (t4_pktc_idx < -1 || t4_pktc_idx >= SGE_NCOUNTERS)
9632                 t4_pktc_idx = PKTC_IDX;
9633
9634         if (t4_qsize_txq < 128)
9635                 t4_qsize_txq = 128;
9636
9637         if (t4_qsize_rxq < 128)
9638                 t4_qsize_rxq = 128;
9639         while (t4_qsize_rxq & 7)
9640                 t4_qsize_rxq++;
9641
9642         t4_intr_types &= INTR_MSIX | INTR_MSI | INTR_INTX;
9643
9644         /*
9645          * Number of VIs to create per-port.  The first VI is the "main" regular
9646          * VI for the port.  The rest are additional virtual interfaces on the
9647          * same physical port.  Note that the main VI does not have native
9648          * netmap support but the extra VIs do.
9649          *
9650          * Limit the number of VIs per port to the number of available
9651          * MAC addresses per port.
9652          */
9653         if (t4_num_vis < 1)
9654                 t4_num_vis = 1;
9655         if (t4_num_vis > nitems(vi_mac_funcs)) {
9656                 t4_num_vis = nitems(vi_mac_funcs);
9657                 printf("cxgbe: number of VIs limited to %d\n", t4_num_vis);
9658         }
9659
9660         if (pcie_relaxed_ordering < 0 || pcie_relaxed_ordering > 2) {
9661                 pcie_relaxed_ordering = 1;
9662 #if defined(__i386__) || defined(__amd64__)
9663                 if (cpu_vendor_id == CPU_VENDOR_INTEL)
9664                         pcie_relaxed_ordering = 0;
9665 #endif
9666         }
9667 }
9668
9669 #ifdef DDB
9670 static void
9671 t4_dump_tcb(struct adapter *sc, int tid)
9672 {
9673         uint32_t base, i, j, off, pf, reg, save, tcb_addr, win_pos;
9674
9675         reg = PCIE_MEM_ACCESS_REG(A_PCIE_MEM_ACCESS_OFFSET, 2);
9676         save = t4_read_reg(sc, reg);
9677         base = sc->memwin[2].mw_base;
9678
9679         /* Dump TCB for the tid */
9680         tcb_addr = t4_read_reg(sc, A_TP_CMM_TCB_BASE);
9681         tcb_addr += tid * TCB_SIZE;
9682
9683         if (is_t4(sc)) {
9684                 pf = 0;
9685                 win_pos = tcb_addr & ~0xf;      /* start must be 16B aligned */
9686         } else {
9687                 pf = V_PFNUM(sc->pf);
9688                 win_pos = tcb_addr & ~0x7f;     /* start must be 128B aligned */
9689         }
9690         t4_write_reg(sc, reg, win_pos | pf);
9691         t4_read_reg(sc, reg);
9692
9693         off = tcb_addr - win_pos;
9694         for (i = 0; i < 4; i++) {
9695                 uint32_t buf[8];
9696                 for (j = 0; j < 8; j++, off += 4)
9697                         buf[j] = htonl(t4_read_reg(sc, base + off));
9698
9699                 db_printf("%08x %08x %08x %08x %08x %08x %08x %08x\n",
9700                     buf[0], buf[1], buf[2], buf[3], buf[4], buf[5], buf[6],
9701                     buf[7]);
9702         }
9703
9704         t4_write_reg(sc, reg, save);
9705         t4_read_reg(sc, reg);
9706 }
9707
9708 static void
9709 t4_dump_devlog(struct adapter *sc)
9710 {
9711         struct devlog_params *dparams = &sc->params.devlog;
9712         struct fw_devlog_e e;
9713         int i, first, j, m, nentries, rc;
9714         uint64_t ftstamp = UINT64_MAX;
9715
9716         if (dparams->start == 0) {
9717                 db_printf("devlog params not valid\n");
9718                 return;
9719         }
9720
9721         nentries = dparams->size / sizeof(struct fw_devlog_e);
9722         m = fwmtype_to_hwmtype(dparams->memtype);
9723
9724         /* Find the first entry. */
9725         first = -1;
9726         for (i = 0; i < nentries && !db_pager_quit; i++) {
9727                 rc = -t4_mem_read(sc, m, dparams->start + i * sizeof(e),
9728                     sizeof(e), (void *)&e);
9729                 if (rc != 0)
9730                         break;
9731
9732                 if (e.timestamp == 0)
9733                         break;
9734
9735                 e.timestamp = be64toh(e.timestamp);
9736                 if (e.timestamp < ftstamp) {
9737                         ftstamp = e.timestamp;
9738                         first = i;
9739                 }
9740         }
9741
9742         if (first == -1)
9743                 return;
9744
9745         i = first;
9746         do {
9747                 rc = -t4_mem_read(sc, m, dparams->start + i * sizeof(e),
9748                     sizeof(e), (void *)&e);
9749                 if (rc != 0)
9750                         return;
9751
9752                 if (e.timestamp == 0)
9753                         return;
9754
9755                 e.timestamp = be64toh(e.timestamp);
9756                 e.seqno = be32toh(e.seqno);
9757                 for (j = 0; j < 8; j++)
9758                         e.params[j] = be32toh(e.params[j]);
9759
9760                 db_printf("%10d  %15ju  %8s  %8s  ",
9761                     e.seqno, e.timestamp,
9762                     (e.level < nitems(devlog_level_strings) ?
9763                         devlog_level_strings[e.level] : "UNKNOWN"),
9764                     (e.facility < nitems(devlog_facility_strings) ?
9765                         devlog_facility_strings[e.facility] : "UNKNOWN"));
9766                 db_printf(e.fmt, e.params[0], e.params[1], e.params[2],
9767                     e.params[3], e.params[4], e.params[5], e.params[6],
9768                     e.params[7]);
9769
9770                 if (++i == nentries)
9771                         i = 0;
9772         } while (i != first && !db_pager_quit);
9773 }
9774
9775 static struct command_table db_t4_table = LIST_HEAD_INITIALIZER(db_t4_table);
9776 _DB_SET(_show, t4, NULL, db_show_table, 0, &db_t4_table);
9777
9778 DB_FUNC(devlog, db_show_devlog, db_t4_table, CS_OWN, NULL)
9779 {
9780         device_t dev;
9781         int t;
9782         bool valid;
9783
9784         valid = false;
9785         t = db_read_token();
9786         if (t == tIDENT) {
9787                 dev = device_lookup_by_name(db_tok_string);
9788                 valid = true;
9789         }
9790         db_skip_to_eol();
9791         if (!valid) {
9792                 db_printf("usage: show t4 devlog <nexus>\n");
9793                 return;
9794         }
9795
9796         if (dev == NULL) {
9797                 db_printf("device not found\n");
9798                 return;
9799         }
9800
9801         t4_dump_devlog(device_get_softc(dev));
9802 }
9803
9804 DB_FUNC(tcb, db_show_t4tcb, db_t4_table, CS_OWN, NULL)
9805 {
9806         device_t dev;
9807         int radix, tid, t;
9808         bool valid;
9809
9810         valid = false;
9811         radix = db_radix;
9812         db_radix = 10;
9813         t = db_read_token();
9814         if (t == tIDENT) {
9815                 dev = device_lookup_by_name(db_tok_string);
9816                 t = db_read_token();
9817                 if (t == tNUMBER) {
9818                         tid = db_tok_number;
9819                         valid = true;
9820                 }
9821         }       
9822         db_radix = radix;
9823         db_skip_to_eol();
9824         if (!valid) {
9825                 db_printf("usage: show t4 tcb <nexus> <tid>\n");
9826                 return;
9827         }
9828
9829         if (dev == NULL) {
9830                 db_printf("device not found\n");
9831                 return;
9832         }
9833         if (tid < 0) {
9834                 db_printf("invalid tid\n");
9835                 return;
9836         }
9837
9838         t4_dump_tcb(device_get_softc(dev), tid);
9839 }
9840 #endif
9841
9842 /*
9843  * Borrowed from cesa_prep_aes_key().
9844  *
9845  * NB: The crypto engine wants the words in the decryption key in reverse
9846  * order.
9847  */
9848 void
9849 t4_aes_getdeckey(void *dec_key, const void *enc_key, unsigned int kbits)
9850 {
9851         uint32_t ek[4 * (RIJNDAEL_MAXNR + 1)];
9852         uint32_t *dkey;
9853         int i;
9854
9855         rijndaelKeySetupEnc(ek, enc_key, kbits);
9856         dkey = dec_key;
9857         dkey += (kbits / 8) / 4;
9858
9859         switch (kbits) {
9860         case 128:
9861                 for (i = 0; i < 4; i++)
9862                         *--dkey = htobe32(ek[4 * 10 + i]);
9863                 break;
9864         case 192:
9865                 for (i = 0; i < 2; i++)
9866                         *--dkey = htobe32(ek[4 * 11 + 2 + i]);
9867                 for (i = 0; i < 4; i++)
9868                         *--dkey = htobe32(ek[4 * 12 + i]);
9869                 break;
9870         case 256:
9871                 for (i = 0; i < 4; i++)
9872                         *--dkey = htobe32(ek[4 * 13 + i]);
9873                 for (i = 0; i < 4; i++)
9874                         *--dkey = htobe32(ek[4 * 14 + i]);
9875                 break;
9876         }
9877         MPASS(dkey == dec_key);
9878 }
9879
9880 static struct sx mlu;   /* mod load unload */
9881 SX_SYSINIT(cxgbe_mlu, &mlu, "cxgbe mod load/unload");
9882
9883 static int
9884 mod_event(module_t mod, int cmd, void *arg)
9885 {
9886         int rc = 0;
9887         static int loaded = 0;
9888
9889         switch (cmd) {
9890         case MOD_LOAD:
9891                 sx_xlock(&mlu);
9892                 if (loaded++ == 0) {
9893                         t4_sge_modload();
9894                         t4_register_shared_cpl_handler(CPL_SET_TCB_RPL,
9895                             t4_filter_rpl, CPL_COOKIE_FILTER);
9896                         t4_register_shared_cpl_handler(CPL_L2T_WRITE_RPL,
9897                             do_l2t_write_rpl, CPL_COOKIE_FILTER);
9898                         t4_register_cpl_handler(CPL_TRACE_PKT, t4_trace_pkt);
9899                         t4_register_cpl_handler(CPL_T5_TRACE_PKT, t5_trace_pkt);
9900                         sx_init(&t4_list_lock, "T4/T5 adapters");
9901                         SLIST_INIT(&t4_list);
9902 #ifdef TCP_OFFLOAD
9903                         sx_init(&t4_uld_list_lock, "T4/T5 ULDs");
9904                         SLIST_INIT(&t4_uld_list);
9905 #endif
9906                         t4_tracer_modload();
9907                         tweak_tunables();
9908                 }
9909                 sx_xunlock(&mlu);
9910                 break;
9911
9912         case MOD_UNLOAD:
9913                 sx_xlock(&mlu);
9914                 if (--loaded == 0) {
9915                         int tries;
9916
9917                         sx_slock(&t4_list_lock);
9918                         if (!SLIST_EMPTY(&t4_list)) {
9919                                 rc = EBUSY;
9920                                 sx_sunlock(&t4_list_lock);
9921                                 goto done_unload;
9922                         }
9923 #ifdef TCP_OFFLOAD
9924                         sx_slock(&t4_uld_list_lock);
9925                         if (!SLIST_EMPTY(&t4_uld_list)) {
9926                                 rc = EBUSY;
9927                                 sx_sunlock(&t4_uld_list_lock);
9928                                 sx_sunlock(&t4_list_lock);
9929                                 goto done_unload;
9930                         }
9931 #endif
9932                         tries = 0;
9933                         while (tries++ < 5 && t4_sge_extfree_refs() != 0) {
9934                                 uprintf("%ju clusters with custom free routine "
9935                                     "still is use.\n", t4_sge_extfree_refs());
9936                                 pause("t4unload", 2 * hz);
9937                         }
9938 #ifdef TCP_OFFLOAD
9939                         sx_sunlock(&t4_uld_list_lock);
9940 #endif
9941                         sx_sunlock(&t4_list_lock);
9942
9943                         if (t4_sge_extfree_refs() == 0) {
9944                                 t4_tracer_modunload();
9945 #ifdef TCP_OFFLOAD
9946                                 sx_destroy(&t4_uld_list_lock);
9947 #endif
9948                                 sx_destroy(&t4_list_lock);
9949                                 t4_sge_modunload();
9950                                 loaded = 0;
9951                         } else {
9952                                 rc = EBUSY;
9953                                 loaded++;       /* undo earlier decrement */
9954                         }
9955                 }
9956 done_unload:
9957                 sx_xunlock(&mlu);
9958                 break;
9959         }
9960
9961         return (rc);
9962 }
9963
9964 static devclass_t t4_devclass, t5_devclass, t6_devclass;
9965 static devclass_t cxgbe_devclass, cxl_devclass, cc_devclass;
9966 static devclass_t vcxgbe_devclass, vcxl_devclass, vcc_devclass;
9967
9968 DRIVER_MODULE(t4nex, pci, t4_driver, t4_devclass, mod_event, 0);
9969 MODULE_VERSION(t4nex, 1);
9970 MODULE_DEPEND(t4nex, firmware, 1, 1, 1);
9971 #ifdef DEV_NETMAP
9972 MODULE_DEPEND(t4nex, netmap, 1, 1, 1);
9973 #endif /* DEV_NETMAP */
9974
9975 DRIVER_MODULE(t5nex, pci, t5_driver, t5_devclass, mod_event, 0);
9976 MODULE_VERSION(t5nex, 1);
9977 MODULE_DEPEND(t5nex, firmware, 1, 1, 1);
9978 #ifdef DEV_NETMAP
9979 MODULE_DEPEND(t5nex, netmap, 1, 1, 1);
9980 #endif /* DEV_NETMAP */
9981
9982 DRIVER_MODULE(t6nex, pci, t6_driver, t6_devclass, mod_event, 0);
9983 MODULE_VERSION(t6nex, 1);
9984 MODULE_DEPEND(t6nex, firmware, 1, 1, 1);
9985 #ifdef DEV_NETMAP
9986 MODULE_DEPEND(t6nex, netmap, 1, 1, 1);
9987 #endif /* DEV_NETMAP */
9988
9989 DRIVER_MODULE(cxgbe, t4nex, cxgbe_driver, cxgbe_devclass, 0, 0);
9990 MODULE_VERSION(cxgbe, 1);
9991
9992 DRIVER_MODULE(cxl, t5nex, cxl_driver, cxl_devclass, 0, 0);
9993 MODULE_VERSION(cxl, 1);
9994
9995 DRIVER_MODULE(cc, t6nex, cc_driver, cc_devclass, 0, 0);
9996 MODULE_VERSION(cc, 1);
9997
9998 DRIVER_MODULE(vcxgbe, cxgbe, vcxgbe_driver, vcxgbe_devclass, 0, 0);
9999 MODULE_VERSION(vcxgbe, 1);
10000
10001 DRIVER_MODULE(vcxl, cxl, vcxl_driver, vcxl_devclass, 0, 0);
10002 MODULE_VERSION(vcxl, 1);
10003
10004 DRIVER_MODULE(vcc, cc, vcc_driver, vcc_devclass, 0, 0);
10005 MODULE_VERSION(vcc, 1);