]> CyberLeo.Net >> Repos - FreeBSD/FreeBSD.git/blob - sys/dev/cxgbe/t4_main.c
MFV r339640,339641,339644:
[FreeBSD/FreeBSD.git] / sys / dev / cxgbe / t4_main.c
1 /*-
2  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
3  *
4  * Copyright (c) 2011 Chelsio Communications, Inc.
5  * All rights reserved.
6  * Written by: Navdeep Parhar <np@FreeBSD.org>
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
18  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
21  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27  * SUCH DAMAGE.
28  */
29
30 #include <sys/cdefs.h>
31 __FBSDID("$FreeBSD$");
32
33 #include "opt_ddb.h"
34 #include "opt_inet.h"
35 #include "opt_inet6.h"
36 #include "opt_ratelimit.h"
37 #include "opt_rss.h"
38
39 #include <sys/param.h>
40 #include <sys/conf.h>
41 #include <sys/priv.h>
42 #include <sys/kernel.h>
43 #include <sys/bus.h>
44 #include <sys/module.h>
45 #include <sys/malloc.h>
46 #include <sys/queue.h>
47 #include <sys/taskqueue.h>
48 #include <sys/pciio.h>
49 #include <dev/pci/pcireg.h>
50 #include <dev/pci/pcivar.h>
51 #include <dev/pci/pci_private.h>
52 #include <sys/firmware.h>
53 #include <sys/sbuf.h>
54 #include <sys/smp.h>
55 #include <sys/socket.h>
56 #include <sys/sockio.h>
57 #include <sys/sysctl.h>
58 #include <net/ethernet.h>
59 #include <net/if.h>
60 #include <net/if_types.h>
61 #include <net/if_dl.h>
62 #include <net/if_vlan_var.h>
63 #ifdef RSS
64 #include <net/rss_config.h>
65 #endif
66 #include <netinet/in.h>
67 #include <netinet/ip.h>
68 #if defined(__i386__) || defined(__amd64__)
69 #include <machine/md_var.h>
70 #include <machine/cputypes.h>
71 #include <vm/vm.h>
72 #include <vm/pmap.h>
73 #endif
74 #include <crypto/rijndael/rijndael.h>
75 #ifdef DDB
76 #include <ddb/ddb.h>
77 #include <ddb/db_lex.h>
78 #endif
79
80 #include "common/common.h"
81 #include "common/t4_msg.h"
82 #include "common/t4_regs.h"
83 #include "common/t4_regs_values.h"
84 #include "cudbg/cudbg.h"
85 #include "t4_ioctl.h"
86 #include "t4_l2t.h"
87 #include "t4_mp_ring.h"
88 #include "t4_if.h"
89 #include "t4_smt.h"
90
91 /* T4 bus driver interface */
92 static int t4_probe(device_t);
93 static int t4_attach(device_t);
94 static int t4_detach(device_t);
95 static int t4_ready(device_t);
96 static int t4_read_port_device(device_t, int, device_t *);
97 static device_method_t t4_methods[] = {
98         DEVMETHOD(device_probe,         t4_probe),
99         DEVMETHOD(device_attach,        t4_attach),
100         DEVMETHOD(device_detach,        t4_detach),
101
102         DEVMETHOD(t4_is_main_ready,     t4_ready),
103         DEVMETHOD(t4_read_port_device,  t4_read_port_device),
104
105         DEVMETHOD_END
106 };
107 static driver_t t4_driver = {
108         "t4nex",
109         t4_methods,
110         sizeof(struct adapter)
111 };
112
113
114 /* T4 port (cxgbe) interface */
115 static int cxgbe_probe(device_t);
116 static int cxgbe_attach(device_t);
117 static int cxgbe_detach(device_t);
118 device_method_t cxgbe_methods[] = {
119         DEVMETHOD(device_probe,         cxgbe_probe),
120         DEVMETHOD(device_attach,        cxgbe_attach),
121         DEVMETHOD(device_detach,        cxgbe_detach),
122         { 0, 0 }
123 };
124 static driver_t cxgbe_driver = {
125         "cxgbe",
126         cxgbe_methods,
127         sizeof(struct port_info)
128 };
129
130 /* T4 VI (vcxgbe) interface */
131 static int vcxgbe_probe(device_t);
132 static int vcxgbe_attach(device_t);
133 static int vcxgbe_detach(device_t);
134 static device_method_t vcxgbe_methods[] = {
135         DEVMETHOD(device_probe,         vcxgbe_probe),
136         DEVMETHOD(device_attach,        vcxgbe_attach),
137         DEVMETHOD(device_detach,        vcxgbe_detach),
138         { 0, 0 }
139 };
140 static driver_t vcxgbe_driver = {
141         "vcxgbe",
142         vcxgbe_methods,
143         sizeof(struct vi_info)
144 };
145
146 static d_ioctl_t t4_ioctl;
147
148 static struct cdevsw t4_cdevsw = {
149        .d_version = D_VERSION,
150        .d_ioctl = t4_ioctl,
151        .d_name = "t4nex",
152 };
153
154 /* T5 bus driver interface */
155 static int t5_probe(device_t);
156 static device_method_t t5_methods[] = {
157         DEVMETHOD(device_probe,         t5_probe),
158         DEVMETHOD(device_attach,        t4_attach),
159         DEVMETHOD(device_detach,        t4_detach),
160
161         DEVMETHOD(t4_is_main_ready,     t4_ready),
162         DEVMETHOD(t4_read_port_device,  t4_read_port_device),
163
164         DEVMETHOD_END
165 };
166 static driver_t t5_driver = {
167         "t5nex",
168         t5_methods,
169         sizeof(struct adapter)
170 };
171
172
173 /* T5 port (cxl) interface */
174 static driver_t cxl_driver = {
175         "cxl",
176         cxgbe_methods,
177         sizeof(struct port_info)
178 };
179
180 /* T5 VI (vcxl) interface */
181 static driver_t vcxl_driver = {
182         "vcxl",
183         vcxgbe_methods,
184         sizeof(struct vi_info)
185 };
186
187 /* T6 bus driver interface */
188 static int t6_probe(device_t);
189 static device_method_t t6_methods[] = {
190         DEVMETHOD(device_probe,         t6_probe),
191         DEVMETHOD(device_attach,        t4_attach),
192         DEVMETHOD(device_detach,        t4_detach),
193
194         DEVMETHOD(t4_is_main_ready,     t4_ready),
195         DEVMETHOD(t4_read_port_device,  t4_read_port_device),
196
197         DEVMETHOD_END
198 };
199 static driver_t t6_driver = {
200         "t6nex",
201         t6_methods,
202         sizeof(struct adapter)
203 };
204
205
206 /* T6 port (cc) interface */
207 static driver_t cc_driver = {
208         "cc",
209         cxgbe_methods,
210         sizeof(struct port_info)
211 };
212
213 /* T6 VI (vcc) interface */
214 static driver_t vcc_driver = {
215         "vcc",
216         vcxgbe_methods,
217         sizeof(struct vi_info)
218 };
219
220 /* ifnet interface */
221 static void cxgbe_init(void *);
222 static int cxgbe_ioctl(struct ifnet *, unsigned long, caddr_t);
223 static int cxgbe_transmit(struct ifnet *, struct mbuf *);
224 static void cxgbe_qflush(struct ifnet *);
225
226 MALLOC_DEFINE(M_CXGBE, "cxgbe", "Chelsio T4/T5 Ethernet driver and services");
227
228 /*
229  * Correct lock order when you need to acquire multiple locks is t4_list_lock,
230  * then ADAPTER_LOCK, then t4_uld_list_lock.
231  */
232 static struct sx t4_list_lock;
233 SLIST_HEAD(, adapter) t4_list;
234 #ifdef TCP_OFFLOAD
235 static struct sx t4_uld_list_lock;
236 SLIST_HEAD(, uld_info) t4_uld_list;
237 #endif
238
239 /*
240  * Tunables.  See tweak_tunables() too.
241  *
242  * Each tunable is set to a default value here if it's known at compile-time.
243  * Otherwise it is set to -n as an indication to tweak_tunables() that it should
244  * provide a reasonable default (upto n) when the driver is loaded.
245  *
246  * Tunables applicable to both T4 and T5 are under hw.cxgbe.  Those specific to
247  * T5 are under hw.cxl.
248  */
249
250 /*
251  * Number of queues for tx and rx, NIC and offload.
252  */
253 #define NTXQ 16
254 int t4_ntxq = -NTXQ;
255 TUNABLE_INT("hw.cxgbe.ntxq", &t4_ntxq);
256 TUNABLE_INT("hw.cxgbe.ntxq10g", &t4_ntxq);      /* Old name, undocumented */
257
258 #define NRXQ 8
259 int t4_nrxq = -NRXQ;
260 TUNABLE_INT("hw.cxgbe.nrxq", &t4_nrxq);
261 TUNABLE_INT("hw.cxgbe.nrxq10g", &t4_nrxq);      /* Old name, undocumented */
262
263 #define NTXQ_VI 1
264 static int t4_ntxq_vi = -NTXQ_VI;
265 TUNABLE_INT("hw.cxgbe.ntxq_vi", &t4_ntxq_vi);
266
267 #define NRXQ_VI 1
268 static int t4_nrxq_vi = -NRXQ_VI;
269 TUNABLE_INT("hw.cxgbe.nrxq_vi", &t4_nrxq_vi);
270
271 static int t4_rsrv_noflowq = 0;
272 TUNABLE_INT("hw.cxgbe.rsrv_noflowq", &t4_rsrv_noflowq);
273
274 #if defined(TCP_OFFLOAD) || defined(RATELIMIT)
275 #define NOFLDTXQ 8
276 static int t4_nofldtxq = -NOFLDTXQ;
277 TUNABLE_INT("hw.cxgbe.nofldtxq", &t4_nofldtxq);
278
279 #define NOFLDRXQ 2
280 static int t4_nofldrxq = -NOFLDRXQ;
281 TUNABLE_INT("hw.cxgbe.nofldrxq", &t4_nofldrxq);
282
283 #define NOFLDTXQ_VI 1
284 static int t4_nofldtxq_vi = -NOFLDTXQ_VI;
285 TUNABLE_INT("hw.cxgbe.nofldtxq_vi", &t4_nofldtxq_vi);
286
287 #define NOFLDRXQ_VI 1
288 static int t4_nofldrxq_vi = -NOFLDRXQ_VI;
289 TUNABLE_INT("hw.cxgbe.nofldrxq_vi", &t4_nofldrxq_vi);
290
291 #define TMR_IDX_OFLD 1
292 int t4_tmr_idx_ofld = TMR_IDX_OFLD;
293 TUNABLE_INT("hw.cxgbe.holdoff_timer_idx_ofld", &t4_tmr_idx_ofld);
294
295 #define PKTC_IDX_OFLD (-1)
296 int t4_pktc_idx_ofld = PKTC_IDX_OFLD;
297 TUNABLE_INT("hw.cxgbe.holdoff_pktc_idx_ofld", &t4_pktc_idx_ofld);
298
299 /* 0 means chip/fw default, non-zero number is value in microseconds */
300 static u_long t4_toe_keepalive_idle = 0;
301 TUNABLE_ULONG("hw.cxgbe.toe.keepalive_idle", &t4_toe_keepalive_idle);
302
303 /* 0 means chip/fw default, non-zero number is value in microseconds */
304 static u_long t4_toe_keepalive_interval = 0;
305 TUNABLE_ULONG("hw.cxgbe.toe.keepalive_interval", &t4_toe_keepalive_interval);
306
307 /* 0 means chip/fw default, non-zero number is # of keepalives before abort */
308 static int t4_toe_keepalive_count = 0;
309 TUNABLE_INT("hw.cxgbe.toe.keepalive_count", &t4_toe_keepalive_count);
310
311 /* 0 means chip/fw default, non-zero number is value in microseconds */
312 static u_long t4_toe_rexmt_min = 0;
313 TUNABLE_ULONG("hw.cxgbe.toe.rexmt_min", &t4_toe_rexmt_min);
314
315 /* 0 means chip/fw default, non-zero number is value in microseconds */
316 static u_long t4_toe_rexmt_max = 0;
317 TUNABLE_ULONG("hw.cxgbe.toe.rexmt_max", &t4_toe_rexmt_max);
318
319 /* 0 means chip/fw default, non-zero number is # of rexmt before abort */
320 static int t4_toe_rexmt_count = 0;
321 TUNABLE_INT("hw.cxgbe.toe.rexmt_count", &t4_toe_rexmt_count);
322
323 /* -1 means chip/fw default, other values are raw backoff values to use */
324 static int t4_toe_rexmt_backoff[16] = {
325         -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1
326 };
327 TUNABLE_INT("hw.cxgbe.toe.rexmt_backoff.0", &t4_toe_rexmt_backoff[0]);
328 TUNABLE_INT("hw.cxgbe.toe.rexmt_backoff.1", &t4_toe_rexmt_backoff[1]);
329 TUNABLE_INT("hw.cxgbe.toe.rexmt_backoff.2", &t4_toe_rexmt_backoff[2]);
330 TUNABLE_INT("hw.cxgbe.toe.rexmt_backoff.3", &t4_toe_rexmt_backoff[3]);
331 TUNABLE_INT("hw.cxgbe.toe.rexmt_backoff.4", &t4_toe_rexmt_backoff[4]);
332 TUNABLE_INT("hw.cxgbe.toe.rexmt_backoff.5", &t4_toe_rexmt_backoff[5]);
333 TUNABLE_INT("hw.cxgbe.toe.rexmt_backoff.6", &t4_toe_rexmt_backoff[6]);
334 TUNABLE_INT("hw.cxgbe.toe.rexmt_backoff.7", &t4_toe_rexmt_backoff[7]);
335 TUNABLE_INT("hw.cxgbe.toe.rexmt_backoff.8", &t4_toe_rexmt_backoff[8]);
336 TUNABLE_INT("hw.cxgbe.toe.rexmt_backoff.9", &t4_toe_rexmt_backoff[9]);
337 TUNABLE_INT("hw.cxgbe.toe.rexmt_backoff.10", &t4_toe_rexmt_backoff[10]);
338 TUNABLE_INT("hw.cxgbe.toe.rexmt_backoff.11", &t4_toe_rexmt_backoff[11]);
339 TUNABLE_INT("hw.cxgbe.toe.rexmt_backoff.12", &t4_toe_rexmt_backoff[12]);
340 TUNABLE_INT("hw.cxgbe.toe.rexmt_backoff.13", &t4_toe_rexmt_backoff[13]);
341 TUNABLE_INT("hw.cxgbe.toe.rexmt_backoff.14", &t4_toe_rexmt_backoff[14]);
342 TUNABLE_INT("hw.cxgbe.toe.rexmt_backoff.15", &t4_toe_rexmt_backoff[15]);
343 #endif
344
345 #ifdef DEV_NETMAP
346 #define NNMTXQ_VI 2
347 static int t4_nnmtxq_vi = -NNMTXQ_VI;
348 TUNABLE_INT("hw.cxgbe.nnmtxq_vi", &t4_nnmtxq_vi);
349
350 #define NNMRXQ_VI 2
351 static int t4_nnmrxq_vi = -NNMRXQ_VI;
352 TUNABLE_INT("hw.cxgbe.nnmrxq_vi", &t4_nnmrxq_vi);
353 #endif
354
355 /*
356  * Holdoff parameters for ports.
357  */
358 #define TMR_IDX 1
359 int t4_tmr_idx = TMR_IDX;
360 TUNABLE_INT("hw.cxgbe.holdoff_timer_idx", &t4_tmr_idx);
361 TUNABLE_INT("hw.cxgbe.holdoff_timer_idx_10G", &t4_tmr_idx);     /* Old name */
362
363 #define PKTC_IDX (-1)
364 int t4_pktc_idx = PKTC_IDX;
365 TUNABLE_INT("hw.cxgbe.holdoff_pktc_idx", &t4_pktc_idx);
366 TUNABLE_INT("hw.cxgbe.holdoff_pktc_idx_10G", &t4_pktc_idx);     /* Old name */
367
368 /*
369  * Size (# of entries) of each tx and rx queue.
370  */
371 unsigned int t4_qsize_txq = TX_EQ_QSIZE;
372 TUNABLE_INT("hw.cxgbe.qsize_txq", &t4_qsize_txq);
373
374 unsigned int t4_qsize_rxq = RX_IQ_QSIZE;
375 TUNABLE_INT("hw.cxgbe.qsize_rxq", &t4_qsize_rxq);
376
377 /*
378  * Interrupt types allowed (bits 0, 1, 2 = INTx, MSI, MSI-X respectively).
379  */
380 int t4_intr_types = INTR_MSIX | INTR_MSI | INTR_INTX;
381 TUNABLE_INT("hw.cxgbe.interrupt_types", &t4_intr_types);
382
383 /*
384  * Configuration file.  All the _CF names here are special.
385  */
386 #define DEFAULT_CF      "default"
387 #define BUILTIN_CF      "built-in"
388 #define FLASH_CF        "flash"
389 #define UWIRE_CF        "uwire"
390 #define FPGA_CF         "fpga"
391 static char t4_cfg_file[32] = DEFAULT_CF;
392 TUNABLE_STR("hw.cxgbe.config_file", t4_cfg_file, sizeof(t4_cfg_file));
393
394 /*
395  * PAUSE settings (bit 0, 1, 2 = rx_pause, tx_pause, pause_autoneg respectively).
396  * rx_pause = 1 to heed incoming PAUSE frames, 0 to ignore them.
397  * tx_pause = 1 to emit PAUSE frames when the rx FIFO reaches its high water
398  *            mark or when signalled to do so, 0 to never emit PAUSE.
399  * pause_autoneg = 1 means PAUSE will be negotiated if possible and the
400  *                 negotiated settings will override rx_pause/tx_pause.
401  *                 Otherwise rx_pause/tx_pause are applied forcibly.
402  */
403 static int t4_pause_settings = PAUSE_RX | PAUSE_TX | PAUSE_AUTONEG;
404 TUNABLE_INT("hw.cxgbe.pause_settings", &t4_pause_settings);
405
406 /*
407  * Forward Error Correction settings (bit 0, 1 = RS, BASER respectively).
408  * -1 to run with the firmware default.  Same as FEC_AUTO (bit 5)
409  *  0 to disable FEC.
410  */
411 static int t4_fec = -1;
412 TUNABLE_INT("hw.cxgbe.fec", &t4_fec);
413
414 /*
415  * Link autonegotiation.
416  * -1 to run with the firmware default.
417  *  0 to disable.
418  *  1 to enable.
419  */
420 static int t4_autoneg = -1;
421 TUNABLE_INT("hw.cxgbe.autoneg", &t4_autoneg);
422
423 /*
424  * Firmware auto-install by driver during attach (0, 1, 2 = prohibited, allowed,
425  * encouraged respectively).
426  */
427 static unsigned int t4_fw_install = 1;
428 TUNABLE_INT("hw.cxgbe.fw_install", &t4_fw_install);
429
430 /*
431  * ASIC features that will be used.  Disable the ones you don't want so that the
432  * chip resources aren't wasted on features that will not be used.
433  */
434 static int t4_nbmcaps_allowed = 0;
435 TUNABLE_INT("hw.cxgbe.nbmcaps_allowed", &t4_nbmcaps_allowed);
436
437 static int t4_linkcaps_allowed = 0;     /* No DCBX, PPP, etc. by default */
438 TUNABLE_INT("hw.cxgbe.linkcaps_allowed", &t4_linkcaps_allowed);
439
440 static int t4_switchcaps_allowed = FW_CAPS_CONFIG_SWITCH_INGRESS |
441     FW_CAPS_CONFIG_SWITCH_EGRESS;
442 TUNABLE_INT("hw.cxgbe.switchcaps_allowed", &t4_switchcaps_allowed);
443
444 #ifdef RATELIMIT
445 static int t4_niccaps_allowed = FW_CAPS_CONFIG_NIC |
446         FW_CAPS_CONFIG_NIC_HASHFILTER | FW_CAPS_CONFIG_NIC_ETHOFLD;
447 #else
448 static int t4_niccaps_allowed = FW_CAPS_CONFIG_NIC |
449         FW_CAPS_CONFIG_NIC_HASHFILTER;
450 #endif
451 TUNABLE_INT("hw.cxgbe.niccaps_allowed", &t4_niccaps_allowed);
452
453 static int t4_toecaps_allowed = -1;
454 TUNABLE_INT("hw.cxgbe.toecaps_allowed", &t4_toecaps_allowed);
455
456 static int t4_rdmacaps_allowed = -1;
457 TUNABLE_INT("hw.cxgbe.rdmacaps_allowed", &t4_rdmacaps_allowed);
458
459 static int t4_cryptocaps_allowed = -1;
460 TUNABLE_INT("hw.cxgbe.cryptocaps_allowed", &t4_cryptocaps_allowed);
461
462 static int t4_iscsicaps_allowed = -1;
463 TUNABLE_INT("hw.cxgbe.iscsicaps_allowed", &t4_iscsicaps_allowed);
464
465 static int t4_fcoecaps_allowed = 0;
466 TUNABLE_INT("hw.cxgbe.fcoecaps_allowed", &t4_fcoecaps_allowed);
467
468 static int t5_write_combine = 0;
469 TUNABLE_INT("hw.cxl.write_combine", &t5_write_combine);
470
471 static int t4_num_vis = 1;
472 TUNABLE_INT("hw.cxgbe.num_vis", &t4_num_vis);
473 /*
474  * PCIe Relaxed Ordering.
475  * -1: driver should figure out a good value.
476  * 0: disable RO.
477  * 1: enable RO.
478  * 2: leave RO alone.
479  */
480 static int pcie_relaxed_ordering = -1;
481 TUNABLE_INT("hw.cxgbe.pcie_relaxed_ordering", &pcie_relaxed_ordering);
482
483 static int t4_panic_on_fatal_err = 0;
484 TUNABLE_INT("hw.cxgbe.panic_on_fatal_err", &t4_panic_on_fatal_err);
485
486 #ifdef TCP_OFFLOAD
487 /*
488  * TOE tunables.
489  */
490 static int t4_cop_managed_offloading = 0;
491 TUNABLE_INT("hw.cxgbe.cop_managed_offloading", &t4_cop_managed_offloading);
492 #endif
493
494 /* Functions used by VIs to obtain unique MAC addresses for each VI. */
495 static int vi_mac_funcs[] = {
496         FW_VI_FUNC_ETH,
497         FW_VI_FUNC_OFLD,
498         FW_VI_FUNC_IWARP,
499         FW_VI_FUNC_OPENISCSI,
500         FW_VI_FUNC_OPENFCOE,
501         FW_VI_FUNC_FOISCSI,
502         FW_VI_FUNC_FOFCOE,
503 };
504
505 struct intrs_and_queues {
506         uint16_t intr_type;     /* INTx, MSI, or MSI-X */
507         uint16_t num_vis;       /* number of VIs for each port */
508         uint16_t nirq;          /* Total # of vectors */
509         uint16_t ntxq;          /* # of NIC txq's for each port */
510         uint16_t nrxq;          /* # of NIC rxq's for each port */
511         uint16_t nofldtxq;      /* # of TOE/ETHOFLD txq's for each port */
512         uint16_t nofldrxq;      /* # of TOE rxq's for each port */
513
514         /* The vcxgbe/vcxl interfaces use these and not the ones above. */
515         uint16_t ntxq_vi;       /* # of NIC txq's */
516         uint16_t nrxq_vi;       /* # of NIC rxq's */
517         uint16_t nofldtxq_vi;   /* # of TOE txq's */
518         uint16_t nofldrxq_vi;   /* # of TOE rxq's */
519         uint16_t nnmtxq_vi;     /* # of netmap txq's */
520         uint16_t nnmrxq_vi;     /* # of netmap rxq's */
521 };
522
523 static void setup_memwin(struct adapter *);
524 static void position_memwin(struct adapter *, int, uint32_t);
525 static int validate_mem_range(struct adapter *, uint32_t, int);
526 static int fwmtype_to_hwmtype(int);
527 static int validate_mt_off_len(struct adapter *, int, uint32_t, int,
528     uint32_t *);
529 static int fixup_devlog_params(struct adapter *);
530 static int cfg_itype_and_nqueues(struct adapter *, struct intrs_and_queues *);
531 static int prep_firmware(struct adapter *);
532 static int partition_resources(struct adapter *, const struct firmware *,
533     const char *);
534 static int get_params__pre_init(struct adapter *);
535 static int get_params__post_init(struct adapter *);
536 static int set_params__post_init(struct adapter *);
537 static void t4_set_desc(struct adapter *);
538 static bool fixed_ifmedia(struct port_info *);
539 static void build_medialist(struct port_info *);
540 static void init_link_config(struct port_info *);
541 static int fixup_link_config(struct port_info *);
542 static int apply_link_config(struct port_info *);
543 static int cxgbe_init_synchronized(struct vi_info *);
544 static int cxgbe_uninit_synchronized(struct vi_info *);
545 static void quiesce_txq(struct adapter *, struct sge_txq *);
546 static void quiesce_wrq(struct adapter *, struct sge_wrq *);
547 static void quiesce_iq(struct adapter *, struct sge_iq *);
548 static void quiesce_fl(struct adapter *, struct sge_fl *);
549 static int t4_alloc_irq(struct adapter *, struct irq *, int rid,
550     driver_intr_t *, void *, char *);
551 static int t4_free_irq(struct adapter *, struct irq *);
552 static void get_regs(struct adapter *, struct t4_regdump *, uint8_t *);
553 static void vi_refresh_stats(struct adapter *, struct vi_info *);
554 static void cxgbe_refresh_stats(struct adapter *, struct port_info *);
555 static void cxgbe_tick(void *);
556 static void cxgbe_sysctls(struct port_info *);
557 static int sysctl_int_array(SYSCTL_HANDLER_ARGS);
558 static int sysctl_bitfield_8b(SYSCTL_HANDLER_ARGS);
559 static int sysctl_bitfield_16b(SYSCTL_HANDLER_ARGS);
560 static int sysctl_btphy(SYSCTL_HANDLER_ARGS);
561 static int sysctl_noflowq(SYSCTL_HANDLER_ARGS);
562 static int sysctl_holdoff_tmr_idx(SYSCTL_HANDLER_ARGS);
563 static int sysctl_holdoff_pktc_idx(SYSCTL_HANDLER_ARGS);
564 static int sysctl_qsize_rxq(SYSCTL_HANDLER_ARGS);
565 static int sysctl_qsize_txq(SYSCTL_HANDLER_ARGS);
566 static int sysctl_pause_settings(SYSCTL_HANDLER_ARGS);
567 static int sysctl_fec(SYSCTL_HANDLER_ARGS);
568 static int sysctl_autoneg(SYSCTL_HANDLER_ARGS);
569 static int sysctl_handle_t4_reg64(SYSCTL_HANDLER_ARGS);
570 static int sysctl_temperature(SYSCTL_HANDLER_ARGS);
571 static int sysctl_loadavg(SYSCTL_HANDLER_ARGS);
572 static int sysctl_cctrl(SYSCTL_HANDLER_ARGS);
573 static int sysctl_cim_ibq_obq(SYSCTL_HANDLER_ARGS);
574 static int sysctl_cim_la(SYSCTL_HANDLER_ARGS);
575 static int sysctl_cim_la_t6(SYSCTL_HANDLER_ARGS);
576 static int sysctl_cim_ma_la(SYSCTL_HANDLER_ARGS);
577 static int sysctl_cim_pif_la(SYSCTL_HANDLER_ARGS);
578 static int sysctl_cim_qcfg(SYSCTL_HANDLER_ARGS);
579 static int sysctl_cpl_stats(SYSCTL_HANDLER_ARGS);
580 static int sysctl_ddp_stats(SYSCTL_HANDLER_ARGS);
581 static int sysctl_devlog(SYSCTL_HANDLER_ARGS);
582 static int sysctl_fcoe_stats(SYSCTL_HANDLER_ARGS);
583 static int sysctl_hw_sched(SYSCTL_HANDLER_ARGS);
584 static int sysctl_lb_stats(SYSCTL_HANDLER_ARGS);
585 static int sysctl_linkdnrc(SYSCTL_HANDLER_ARGS);
586 static int sysctl_meminfo(SYSCTL_HANDLER_ARGS);
587 static int sysctl_mps_tcam(SYSCTL_HANDLER_ARGS);
588 static int sysctl_mps_tcam_t6(SYSCTL_HANDLER_ARGS);
589 static int sysctl_path_mtus(SYSCTL_HANDLER_ARGS);
590 static int sysctl_pm_stats(SYSCTL_HANDLER_ARGS);
591 static int sysctl_rdma_stats(SYSCTL_HANDLER_ARGS);
592 static int sysctl_tcp_stats(SYSCTL_HANDLER_ARGS);
593 static int sysctl_tids(SYSCTL_HANDLER_ARGS);
594 static int sysctl_tp_err_stats(SYSCTL_HANDLER_ARGS);
595 static int sysctl_tp_la_mask(SYSCTL_HANDLER_ARGS);
596 static int sysctl_tp_la(SYSCTL_HANDLER_ARGS);
597 static int sysctl_tx_rate(SYSCTL_HANDLER_ARGS);
598 static int sysctl_ulprx_la(SYSCTL_HANDLER_ARGS);
599 static int sysctl_wcwr_stats(SYSCTL_HANDLER_ARGS);
600 static int sysctl_cpus(SYSCTL_HANDLER_ARGS);
601 #ifdef TCP_OFFLOAD
602 static int sysctl_tls_rx_ports(SYSCTL_HANDLER_ARGS);
603 static int sysctl_tp_tick(SYSCTL_HANDLER_ARGS);
604 static int sysctl_tp_dack_timer(SYSCTL_HANDLER_ARGS);
605 static int sysctl_tp_timer(SYSCTL_HANDLER_ARGS);
606 static int sysctl_tp_shift_cnt(SYSCTL_HANDLER_ARGS);
607 static int sysctl_tp_backoff(SYSCTL_HANDLER_ARGS);
608 static int sysctl_holdoff_tmr_idx_ofld(SYSCTL_HANDLER_ARGS);
609 static int sysctl_holdoff_pktc_idx_ofld(SYSCTL_HANDLER_ARGS);
610 #endif
611 static int get_sge_context(struct adapter *, struct t4_sge_context *);
612 static int load_fw(struct adapter *, struct t4_data *);
613 static int load_cfg(struct adapter *, struct t4_data *);
614 static int load_boot(struct adapter *, struct t4_bootrom *);
615 static int load_bootcfg(struct adapter *, struct t4_data *);
616 static int cudbg_dump(struct adapter *, struct t4_cudbg_dump *);
617 static void free_offload_policy(struct t4_offload_policy *);
618 static int set_offload_policy(struct adapter *, struct t4_offload_policy *);
619 static int read_card_mem(struct adapter *, int, struct t4_mem_range *);
620 static int read_i2c(struct adapter *, struct t4_i2c_data *);
621 #ifdef TCP_OFFLOAD
622 static int toe_capability(struct vi_info *, int);
623 #endif
624 static int mod_event(module_t, int, void *);
625 static int notify_siblings(device_t, int);
626
627 struct {
628         uint16_t device;
629         char *desc;
630 } t4_pciids[] = {
631         {0xa000, "Chelsio Terminator 4 FPGA"},
632         {0x4400, "Chelsio T440-dbg"},
633         {0x4401, "Chelsio T420-CR"},
634         {0x4402, "Chelsio T422-CR"},
635         {0x4403, "Chelsio T440-CR"},
636         {0x4404, "Chelsio T420-BCH"},
637         {0x4405, "Chelsio T440-BCH"},
638         {0x4406, "Chelsio T440-CH"},
639         {0x4407, "Chelsio T420-SO"},
640         {0x4408, "Chelsio T420-CX"},
641         {0x4409, "Chelsio T420-BT"},
642         {0x440a, "Chelsio T404-BT"},
643         {0x440e, "Chelsio T440-LP-CR"},
644 }, t5_pciids[] = {
645         {0xb000, "Chelsio Terminator 5 FPGA"},
646         {0x5400, "Chelsio T580-dbg"},
647         {0x5401,  "Chelsio T520-CR"},           /* 2 x 10G */
648         {0x5402,  "Chelsio T522-CR"},           /* 2 x 10G, 2 X 1G */
649         {0x5403,  "Chelsio T540-CR"},           /* 4 x 10G */
650         {0x5407,  "Chelsio T520-SO"},           /* 2 x 10G, nomem */
651         {0x5409,  "Chelsio T520-BT"},           /* 2 x 10GBaseT */
652         {0x540a,  "Chelsio T504-BT"},           /* 4 x 1G */
653         {0x540d,  "Chelsio T580-CR"},           /* 2 x 40G */
654         {0x540e,  "Chelsio T540-LP-CR"},        /* 4 x 10G */
655         {0x5410,  "Chelsio T580-LP-CR"},        /* 2 x 40G */
656         {0x5411,  "Chelsio T520-LL-CR"},        /* 2 x 10G */
657         {0x5412,  "Chelsio T560-CR"},           /* 1 x 40G, 2 x 10G */
658         {0x5414,  "Chelsio T580-LP-SO-CR"},     /* 2 x 40G, nomem */
659         {0x5415,  "Chelsio T502-BT"},           /* 2 x 1G */
660         {0x5418,  "Chelsio T540-BT"},           /* 4 x 10GBaseT */
661         {0x5419,  "Chelsio T540-LP-BT"},        /* 4 x 10GBaseT */
662         {0x541a,  "Chelsio T540-SO-BT"},        /* 4 x 10GBaseT, nomem */
663         {0x541b,  "Chelsio T540-SO-CR"},        /* 4 x 10G, nomem */
664 }, t6_pciids[] = {
665         {0xc006, "Chelsio Terminator 6 FPGA"},  /* T6 PE10K6 FPGA (PF0) */
666         {0x6400, "Chelsio T6-DBG-25"},          /* 2 x 10/25G, debug */
667         {0x6401, "Chelsio T6225-CR"},           /* 2 x 10/25G */
668         {0x6402, "Chelsio T6225-SO-CR"},        /* 2 x 10/25G, nomem */
669         {0x6403, "Chelsio T6425-CR"},           /* 4 x 10/25G */
670         {0x6404, "Chelsio T6425-SO-CR"},        /* 4 x 10/25G, nomem */
671         {0x6405, "Chelsio T6225-OCP-SO"},       /* 2 x 10/25G, nomem */
672         {0x6406, "Chelsio T62100-OCP-SO"},      /* 2 x 40/50/100G, nomem */
673         {0x6407, "Chelsio T62100-LP-CR"},       /* 2 x 40/50/100G */
674         {0x6408, "Chelsio T62100-SO-CR"},       /* 2 x 40/50/100G, nomem */
675         {0x6409, "Chelsio T6210-BT"},           /* 2 x 10GBASE-T */
676         {0x640d, "Chelsio T62100-CR"},          /* 2 x 40/50/100G */
677         {0x6410, "Chelsio T6-DBG-100"},         /* 2 x 40/50/100G, debug */
678         {0x6411, "Chelsio T6225-LL-CR"},        /* 2 x 10/25G */
679         {0x6414, "Chelsio T61100-OCP-SO"},      /* 1 x 40/50/100G, nomem */
680         {0x6415, "Chelsio T6201-BT"},           /* 2 x 1000BASE-T */
681
682         /* Custom */
683         {0x6480, "Custom T6225-CR"},
684         {0x6481, "Custom T62100-CR"},
685         {0x6482, "Custom T6225-CR"},
686         {0x6483, "Custom T62100-CR"},
687         {0x6484, "Custom T64100-CR"},
688         {0x6485, "Custom T6240-SO"},
689         {0x6486, "Custom T6225-SO-CR"},
690         {0x6487, "Custom T6225-CR"},
691 };
692
693 #ifdef TCP_OFFLOAD
694 /*
695  * service_iq_fl() has an iq and needs the fl.  Offset of fl from the iq should
696  * be exactly the same for both rxq and ofld_rxq.
697  */
698 CTASSERT(offsetof(struct sge_ofld_rxq, iq) == offsetof(struct sge_rxq, iq));
699 CTASSERT(offsetof(struct sge_ofld_rxq, fl) == offsetof(struct sge_rxq, fl));
700 #endif
701 CTASSERT(sizeof(struct cluster_metadata) <= CL_METADATA_SIZE);
702
703 static int
704 t4_probe(device_t dev)
705 {
706         int i;
707         uint16_t v = pci_get_vendor(dev);
708         uint16_t d = pci_get_device(dev);
709         uint8_t f = pci_get_function(dev);
710
711         if (v != PCI_VENDOR_ID_CHELSIO)
712                 return (ENXIO);
713
714         /* Attach only to PF0 of the FPGA */
715         if (d == 0xa000 && f != 0)
716                 return (ENXIO);
717
718         for (i = 0; i < nitems(t4_pciids); i++) {
719                 if (d == t4_pciids[i].device) {
720                         device_set_desc(dev, t4_pciids[i].desc);
721                         return (BUS_PROBE_DEFAULT);
722                 }
723         }
724
725         return (ENXIO);
726 }
727
728 static int
729 t5_probe(device_t dev)
730 {
731         int i;
732         uint16_t v = pci_get_vendor(dev);
733         uint16_t d = pci_get_device(dev);
734         uint8_t f = pci_get_function(dev);
735
736         if (v != PCI_VENDOR_ID_CHELSIO)
737                 return (ENXIO);
738
739         /* Attach only to PF0 of the FPGA */
740         if (d == 0xb000 && f != 0)
741                 return (ENXIO);
742
743         for (i = 0; i < nitems(t5_pciids); i++) {
744                 if (d == t5_pciids[i].device) {
745                         device_set_desc(dev, t5_pciids[i].desc);
746                         return (BUS_PROBE_DEFAULT);
747                 }
748         }
749
750         return (ENXIO);
751 }
752
753 static int
754 t6_probe(device_t dev)
755 {
756         int i;
757         uint16_t v = pci_get_vendor(dev);
758         uint16_t d = pci_get_device(dev);
759
760         if (v != PCI_VENDOR_ID_CHELSIO)
761                 return (ENXIO);
762
763         for (i = 0; i < nitems(t6_pciids); i++) {
764                 if (d == t6_pciids[i].device) {
765                         device_set_desc(dev, t6_pciids[i].desc);
766                         return (BUS_PROBE_DEFAULT);
767                 }
768         }
769
770         return (ENXIO);
771 }
772
773 static void
774 t5_attribute_workaround(device_t dev)
775 {
776         device_t root_port;
777         uint32_t v;
778
779         /*
780          * The T5 chips do not properly echo the No Snoop and Relaxed
781          * Ordering attributes when replying to a TLP from a Root
782          * Port.  As a workaround, find the parent Root Port and
783          * disable No Snoop and Relaxed Ordering.  Note that this
784          * affects all devices under this root port.
785          */
786         root_port = pci_find_pcie_root_port(dev);
787         if (root_port == NULL) {
788                 device_printf(dev, "Unable to find parent root port\n");
789                 return;
790         }
791
792         v = pcie_adjust_config(root_port, PCIER_DEVICE_CTL,
793             PCIEM_CTL_RELAXED_ORD_ENABLE | PCIEM_CTL_NOSNOOP_ENABLE, 0, 2);
794         if ((v & (PCIEM_CTL_RELAXED_ORD_ENABLE | PCIEM_CTL_NOSNOOP_ENABLE)) !=
795             0)
796                 device_printf(dev, "Disabled No Snoop/Relaxed Ordering on %s\n",
797                     device_get_nameunit(root_port));
798 }
799
800 static const struct devnames devnames[] = {
801         {
802                 .nexus_name = "t4nex",
803                 .ifnet_name = "cxgbe",
804                 .vi_ifnet_name = "vcxgbe",
805                 .pf03_drv_name = "t4iov",
806                 .vf_nexus_name = "t4vf",
807                 .vf_ifnet_name = "cxgbev"
808         }, {
809                 .nexus_name = "t5nex",
810                 .ifnet_name = "cxl",
811                 .vi_ifnet_name = "vcxl",
812                 .pf03_drv_name = "t5iov",
813                 .vf_nexus_name = "t5vf",
814                 .vf_ifnet_name = "cxlv"
815         }, {
816                 .nexus_name = "t6nex",
817                 .ifnet_name = "cc",
818                 .vi_ifnet_name = "vcc",
819                 .pf03_drv_name = "t6iov",
820                 .vf_nexus_name = "t6vf",
821                 .vf_ifnet_name = "ccv"
822         }
823 };
824
825 void
826 t4_init_devnames(struct adapter *sc)
827 {
828         int id;
829
830         id = chip_id(sc);
831         if (id >= CHELSIO_T4 && id - CHELSIO_T4 < nitems(devnames))
832                 sc->names = &devnames[id - CHELSIO_T4];
833         else {
834                 device_printf(sc->dev, "chip id %d is not supported.\n", id);
835                 sc->names = NULL;
836         }
837 }
838
839 static int
840 t4_attach(device_t dev)
841 {
842         struct adapter *sc;
843         int rc = 0, i, j, rqidx, tqidx, nports;
844         struct make_dev_args mda;
845         struct intrs_and_queues iaq;
846         struct sge *s;
847         uint32_t *buf;
848 #if defined(TCP_OFFLOAD) || defined(RATELIMIT)
849         int ofld_tqidx;
850 #endif
851 #ifdef TCP_OFFLOAD
852         int ofld_rqidx;
853 #endif
854 #ifdef DEV_NETMAP
855         int nm_rqidx, nm_tqidx;
856 #endif
857         int num_vis;
858
859         sc = device_get_softc(dev);
860         sc->dev = dev;
861         TUNABLE_INT_FETCH("hw.cxgbe.dflags", &sc->debug_flags);
862
863         if ((pci_get_device(dev) & 0xff00) == 0x5400)
864                 t5_attribute_workaround(dev);
865         pci_enable_busmaster(dev);
866         if (pci_find_cap(dev, PCIY_EXPRESS, &i) == 0) {
867                 uint32_t v;
868
869                 pci_set_max_read_req(dev, 4096);
870                 v = pci_read_config(dev, i + PCIER_DEVICE_CTL, 2);
871                 sc->params.pci.mps = 128 << ((v & PCIEM_CTL_MAX_PAYLOAD) >> 5);
872                 if (pcie_relaxed_ordering == 0 &&
873                     (v & PCIEM_CTL_RELAXED_ORD_ENABLE) != 0) {
874                         v &= ~PCIEM_CTL_RELAXED_ORD_ENABLE;
875                         pci_write_config(dev, i + PCIER_DEVICE_CTL, v, 2);
876                 } else if (pcie_relaxed_ordering == 1 &&
877                     (v & PCIEM_CTL_RELAXED_ORD_ENABLE) == 0) {
878                         v |= PCIEM_CTL_RELAXED_ORD_ENABLE;
879                         pci_write_config(dev, i + PCIER_DEVICE_CTL, v, 2);
880                 }
881         }
882
883         sc->sge_gts_reg = MYPF_REG(A_SGE_PF_GTS);
884         sc->sge_kdoorbell_reg = MYPF_REG(A_SGE_PF_KDOORBELL);
885         sc->traceq = -1;
886         mtx_init(&sc->ifp_lock, sc->ifp_lockname, 0, MTX_DEF);
887         snprintf(sc->ifp_lockname, sizeof(sc->ifp_lockname), "%s tracer",
888             device_get_nameunit(dev));
889
890         snprintf(sc->lockname, sizeof(sc->lockname), "%s",
891             device_get_nameunit(dev));
892         mtx_init(&sc->sc_lock, sc->lockname, 0, MTX_DEF);
893         t4_add_adapter(sc);
894
895         mtx_init(&sc->sfl_lock, "starving freelists", 0, MTX_DEF);
896         TAILQ_INIT(&sc->sfl);
897         callout_init_mtx(&sc->sfl_callout, &sc->sfl_lock, 0);
898
899         mtx_init(&sc->reg_lock, "indirect register access", 0, MTX_DEF);
900
901         sc->policy = NULL;
902         rw_init(&sc->policy_lock, "connection offload policy");
903
904         rc = t4_map_bars_0_and_4(sc);
905         if (rc != 0)
906                 goto done; /* error message displayed already */
907
908         memset(sc->chan_map, 0xff, sizeof(sc->chan_map));
909
910         /* Prepare the adapter for operation. */
911         buf = malloc(PAGE_SIZE, M_CXGBE, M_ZERO | M_WAITOK);
912         rc = -t4_prep_adapter(sc, buf);
913         free(buf, M_CXGBE);
914         if (rc != 0) {
915                 device_printf(dev, "failed to prepare adapter: %d.\n", rc);
916                 goto done;
917         }
918
919         /*
920          * This is the real PF# to which we're attaching.  Works from within PCI
921          * passthrough environments too, where pci_get_function() could return a
922          * different PF# depending on the passthrough configuration.  We need to
923          * use the real PF# in all our communication with the firmware.
924          */
925         j = t4_read_reg(sc, A_PL_WHOAMI);
926         sc->pf = chip_id(sc) <= CHELSIO_T5 ? G_SOURCEPF(j) : G_T6_SOURCEPF(j);
927         sc->mbox = sc->pf;
928
929         t4_init_devnames(sc);
930         if (sc->names == NULL) {
931                 rc = ENOTSUP;
932                 goto done; /* error message displayed already */
933         }
934
935         /*
936          * Do this really early, with the memory windows set up even before the
937          * character device.  The userland tool's register i/o and mem read
938          * will work even in "recovery mode".
939          */
940         setup_memwin(sc);
941         if (t4_init_devlog_params(sc, 0) == 0)
942                 fixup_devlog_params(sc);
943         make_dev_args_init(&mda);
944         mda.mda_devsw = &t4_cdevsw;
945         mda.mda_uid = UID_ROOT;
946         mda.mda_gid = GID_WHEEL;
947         mda.mda_mode = 0600;
948         mda.mda_si_drv1 = sc;
949         rc = make_dev_s(&mda, &sc->cdev, "%s", device_get_nameunit(dev));
950         if (rc != 0)
951                 device_printf(dev, "failed to create nexus char device: %d.\n",
952                     rc);
953
954         /* Go no further if recovery mode has been requested. */
955         if (TUNABLE_INT_FETCH("hw.cxgbe.sos", &i) && i != 0) {
956                 device_printf(dev, "recovery mode.\n");
957                 goto done;
958         }
959
960 #if defined(__i386__)
961         if ((cpu_feature & CPUID_CX8) == 0) {
962                 device_printf(dev, "64 bit atomics not available.\n");
963                 rc = ENOTSUP;
964                 goto done;
965         }
966 #endif
967
968         /* Prepare the firmware for operation */
969         rc = prep_firmware(sc);
970         if (rc != 0)
971                 goto done; /* error message displayed already */
972
973         rc = get_params__post_init(sc);
974         if (rc != 0)
975                 goto done; /* error message displayed already */
976
977         rc = set_params__post_init(sc);
978         if (rc != 0)
979                 goto done; /* error message displayed already */
980
981         rc = t4_map_bar_2(sc);
982         if (rc != 0)
983                 goto done; /* error message displayed already */
984
985         rc = t4_create_dma_tag(sc);
986         if (rc != 0)
987                 goto done; /* error message displayed already */
988
989         /*
990          * First pass over all the ports - allocate VIs and initialize some
991          * basic parameters like mac address, port type, etc.
992          */
993         for_each_port(sc, i) {
994                 struct port_info *pi;
995
996                 pi = malloc(sizeof(*pi), M_CXGBE, M_ZERO | M_WAITOK);
997                 sc->port[i] = pi;
998
999                 /* These must be set before t4_port_init */
1000                 pi->adapter = sc;
1001                 pi->port_id = i;
1002                 /*
1003                  * XXX: vi[0] is special so we can't delay this allocation until
1004                  * pi->nvi's final value is known.
1005                  */
1006                 pi->vi = malloc(sizeof(struct vi_info) * t4_num_vis, M_CXGBE,
1007                     M_ZERO | M_WAITOK);
1008
1009                 /*
1010                  * Allocate the "main" VI and initialize parameters
1011                  * like mac addr.
1012                  */
1013                 rc = -t4_port_init(sc, sc->mbox, sc->pf, 0, i);
1014                 if (rc != 0) {
1015                         device_printf(dev, "unable to initialize port %d: %d\n",
1016                             i, rc);
1017                         free(pi->vi, M_CXGBE);
1018                         free(pi, M_CXGBE);
1019                         sc->port[i] = NULL;
1020                         goto done;
1021                 }
1022
1023                 snprintf(pi->lockname, sizeof(pi->lockname), "%sp%d",
1024                     device_get_nameunit(dev), i);
1025                 mtx_init(&pi->pi_lock, pi->lockname, 0, MTX_DEF);
1026                 sc->chan_map[pi->tx_chan] = i;
1027
1028                 /* All VIs on this port share this media. */
1029                 ifmedia_init(&pi->media, IFM_IMASK, cxgbe_media_change,
1030                     cxgbe_media_status);
1031
1032                 PORT_LOCK(pi);
1033                 init_link_config(pi);
1034                 fixup_link_config(pi);
1035                 build_medialist(pi);
1036                 if (fixed_ifmedia(pi))
1037                         pi->flags |= FIXED_IFMEDIA;
1038                 PORT_UNLOCK(pi);
1039
1040                 pi->dev = device_add_child(dev, sc->names->ifnet_name, -1);
1041                 if (pi->dev == NULL) {
1042                         device_printf(dev,
1043                             "failed to add device for port %d.\n", i);
1044                         rc = ENXIO;
1045                         goto done;
1046                 }
1047                 pi->vi[0].dev = pi->dev;
1048                 device_set_softc(pi->dev, pi);
1049         }
1050
1051         /*
1052          * Interrupt type, # of interrupts, # of rx/tx queues, etc.
1053          */
1054         nports = sc->params.nports;
1055         rc = cfg_itype_and_nqueues(sc, &iaq);
1056         if (rc != 0)
1057                 goto done; /* error message displayed already */
1058
1059         num_vis = iaq.num_vis;
1060         sc->intr_type = iaq.intr_type;
1061         sc->intr_count = iaq.nirq;
1062
1063         s = &sc->sge;
1064         s->nrxq = nports * iaq.nrxq;
1065         s->ntxq = nports * iaq.ntxq;
1066         if (num_vis > 1) {
1067                 s->nrxq += nports * (num_vis - 1) * iaq.nrxq_vi;
1068                 s->ntxq += nports * (num_vis - 1) * iaq.ntxq_vi;
1069         }
1070         s->neq = s->ntxq + s->nrxq;     /* the free list in an rxq is an eq */
1071         s->neq += nports;               /* ctrl queues: 1 per port */
1072         s->niq = s->nrxq + 1;           /* 1 extra for firmware event queue */
1073 #if defined(TCP_OFFLOAD) || defined(RATELIMIT)
1074         if (is_offload(sc) || is_ethoffload(sc)) {
1075                 s->nofldtxq = nports * iaq.nofldtxq;
1076                 if (num_vis > 1)
1077                         s->nofldtxq += nports * (num_vis - 1) * iaq.nofldtxq_vi;
1078                 s->neq += s->nofldtxq;
1079
1080                 s->ofld_txq = malloc(s->nofldtxq * sizeof(struct sge_wrq),
1081                     M_CXGBE, M_ZERO | M_WAITOK);
1082         }
1083 #endif
1084 #ifdef TCP_OFFLOAD
1085         if (is_offload(sc)) {
1086                 s->nofldrxq = nports * iaq.nofldrxq;
1087                 if (num_vis > 1)
1088                         s->nofldrxq += nports * (num_vis - 1) * iaq.nofldrxq_vi;
1089                 s->neq += s->nofldrxq;  /* free list */
1090                 s->niq += s->nofldrxq;
1091
1092                 s->ofld_rxq = malloc(s->nofldrxq * sizeof(struct sge_ofld_rxq),
1093                     M_CXGBE, M_ZERO | M_WAITOK);
1094         }
1095 #endif
1096 #ifdef DEV_NETMAP
1097         if (num_vis > 1) {
1098                 s->nnmrxq = nports * (num_vis - 1) * iaq.nnmrxq_vi;
1099                 s->nnmtxq = nports * (num_vis - 1) * iaq.nnmtxq_vi;
1100         }
1101         s->neq += s->nnmtxq + s->nnmrxq;
1102         s->niq += s->nnmrxq;
1103
1104         s->nm_rxq = malloc(s->nnmrxq * sizeof(struct sge_nm_rxq),
1105             M_CXGBE, M_ZERO | M_WAITOK);
1106         s->nm_txq = malloc(s->nnmtxq * sizeof(struct sge_nm_txq),
1107             M_CXGBE, M_ZERO | M_WAITOK);
1108 #endif
1109
1110         s->ctrlq = malloc(nports * sizeof(struct sge_wrq), M_CXGBE,
1111             M_ZERO | M_WAITOK);
1112         s->rxq = malloc(s->nrxq * sizeof(struct sge_rxq), M_CXGBE,
1113             M_ZERO | M_WAITOK);
1114         s->txq = malloc(s->ntxq * sizeof(struct sge_txq), M_CXGBE,
1115             M_ZERO | M_WAITOK);
1116         s->iqmap = malloc(s->niq * sizeof(struct sge_iq *), M_CXGBE,
1117             M_ZERO | M_WAITOK);
1118         s->eqmap = malloc(s->neq * sizeof(struct sge_eq *), M_CXGBE,
1119             M_ZERO | M_WAITOK);
1120
1121         sc->irq = malloc(sc->intr_count * sizeof(struct irq), M_CXGBE,
1122             M_ZERO | M_WAITOK);
1123
1124         t4_init_l2t(sc, M_WAITOK);
1125         t4_init_smt(sc, M_WAITOK);
1126         t4_init_tx_sched(sc);
1127 #ifdef RATELIMIT
1128         t4_init_etid_table(sc);
1129 #endif
1130
1131         /*
1132          * Second pass over the ports.  This time we know the number of rx and
1133          * tx queues that each port should get.
1134          */
1135         rqidx = tqidx = 0;
1136 #if defined(TCP_OFFLOAD) || defined(RATELIMIT)
1137         ofld_tqidx = 0;
1138 #endif
1139 #ifdef TCP_OFFLOAD
1140         ofld_rqidx = 0;
1141 #endif
1142 #ifdef DEV_NETMAP
1143         nm_rqidx = nm_tqidx = 0;
1144 #endif
1145         for_each_port(sc, i) {
1146                 struct port_info *pi = sc->port[i];
1147                 struct vi_info *vi;
1148
1149                 if (pi == NULL)
1150                         continue;
1151
1152                 pi->nvi = num_vis;
1153                 for_each_vi(pi, j, vi) {
1154                         vi->pi = pi;
1155                         vi->qsize_rxq = t4_qsize_rxq;
1156                         vi->qsize_txq = t4_qsize_txq;
1157
1158                         vi->first_rxq = rqidx;
1159                         vi->first_txq = tqidx;
1160                         vi->tmr_idx = t4_tmr_idx;
1161                         vi->pktc_idx = t4_pktc_idx;
1162                         vi->nrxq = j == 0 ? iaq.nrxq : iaq.nrxq_vi;
1163                         vi->ntxq = j == 0 ? iaq.ntxq : iaq.ntxq_vi;
1164
1165                         rqidx += vi->nrxq;
1166                         tqidx += vi->ntxq;
1167
1168                         if (j == 0 && vi->ntxq > 1)
1169                                 vi->rsrv_noflowq = t4_rsrv_noflowq ? 1 : 0;
1170                         else
1171                                 vi->rsrv_noflowq = 0;
1172
1173 #if defined(TCP_OFFLOAD) || defined(RATELIMIT)
1174                         vi->first_ofld_txq = ofld_tqidx;
1175                         vi->nofldtxq = j == 0 ? iaq.nofldtxq : iaq.nofldtxq_vi;
1176                         ofld_tqidx += vi->nofldtxq;
1177 #endif
1178 #ifdef TCP_OFFLOAD
1179                         vi->ofld_tmr_idx = t4_tmr_idx_ofld;
1180                         vi->ofld_pktc_idx = t4_pktc_idx_ofld;
1181                         vi->first_ofld_rxq = ofld_rqidx;
1182                         vi->nofldrxq = j == 0 ? iaq.nofldrxq : iaq.nofldrxq_vi;
1183
1184                         ofld_rqidx += vi->nofldrxq;
1185 #endif
1186 #ifdef DEV_NETMAP
1187                         if (j > 0) {
1188                                 vi->first_nm_rxq = nm_rqidx;
1189                                 vi->first_nm_txq = nm_tqidx;
1190                                 vi->nnmrxq = iaq.nnmrxq_vi;
1191                                 vi->nnmtxq = iaq.nnmtxq_vi;
1192                                 nm_rqidx += vi->nnmrxq;
1193                                 nm_tqidx += vi->nnmtxq;
1194                         }
1195 #endif
1196                 }
1197         }
1198
1199         rc = t4_setup_intr_handlers(sc);
1200         if (rc != 0) {
1201                 device_printf(dev,
1202                     "failed to setup interrupt handlers: %d\n", rc);
1203                 goto done;
1204         }
1205
1206         rc = bus_generic_probe(dev);
1207         if (rc != 0) {
1208                 device_printf(dev, "failed to probe child drivers: %d\n", rc);
1209                 goto done;
1210         }
1211
1212         /*
1213          * Ensure thread-safe mailbox access (in debug builds).
1214          *
1215          * So far this was the only thread accessing the mailbox but various
1216          * ifnets and sysctls are about to be created and their handlers/ioctls
1217          * will access the mailbox from different threads.
1218          */
1219         sc->flags |= CHK_MBOX_ACCESS;
1220
1221         rc = bus_generic_attach(dev);
1222         if (rc != 0) {
1223                 device_printf(dev,
1224                     "failed to attach all child ports: %d\n", rc);
1225                 goto done;
1226         }
1227
1228         device_printf(dev,
1229             "PCIe gen%d x%d, %d ports, %d %s interrupt%s, %d eq, %d iq\n",
1230             sc->params.pci.speed, sc->params.pci.width, sc->params.nports,
1231             sc->intr_count, sc->intr_type == INTR_MSIX ? "MSI-X" :
1232             (sc->intr_type == INTR_MSI ? "MSI" : "INTx"),
1233             sc->intr_count > 1 ? "s" : "", sc->sge.neq, sc->sge.niq);
1234
1235         t4_set_desc(sc);
1236
1237         notify_siblings(dev, 0);
1238
1239 done:
1240         if (rc != 0 && sc->cdev) {
1241                 /* cdev was created and so cxgbetool works; recover that way. */
1242                 device_printf(dev,
1243                     "error during attach, adapter is now in recovery mode.\n");
1244                 rc = 0;
1245         }
1246
1247         if (rc != 0)
1248                 t4_detach_common(dev);
1249         else
1250                 t4_sysctls(sc);
1251
1252         return (rc);
1253 }
1254
1255 static int
1256 t4_ready(device_t dev)
1257 {
1258         struct adapter *sc;
1259
1260         sc = device_get_softc(dev);
1261         if (sc->flags & FW_OK)
1262                 return (0);
1263         return (ENXIO);
1264 }
1265
1266 static int
1267 t4_read_port_device(device_t dev, int port, device_t *child)
1268 {
1269         struct adapter *sc;
1270         struct port_info *pi;
1271
1272         sc = device_get_softc(dev);
1273         if (port < 0 || port >= MAX_NPORTS)
1274                 return (EINVAL);
1275         pi = sc->port[port];
1276         if (pi == NULL || pi->dev == NULL)
1277                 return (ENXIO);
1278         *child = pi->dev;
1279         return (0);
1280 }
1281
1282 static int
1283 notify_siblings(device_t dev, int detaching)
1284 {
1285         device_t sibling;
1286         int error, i;
1287
1288         error = 0;
1289         for (i = 0; i < PCI_FUNCMAX; i++) {
1290                 if (i == pci_get_function(dev))
1291                         continue;
1292                 sibling = pci_find_dbsf(pci_get_domain(dev), pci_get_bus(dev),
1293                     pci_get_slot(dev), i);
1294                 if (sibling == NULL || !device_is_attached(sibling))
1295                         continue;
1296                 if (detaching)
1297                         error = T4_DETACH_CHILD(sibling);
1298                 else
1299                         (void)T4_ATTACH_CHILD(sibling);
1300                 if (error)
1301                         break;
1302         }
1303         return (error);
1304 }
1305
1306 /*
1307  * Idempotent
1308  */
1309 static int
1310 t4_detach(device_t dev)
1311 {
1312         struct adapter *sc;
1313         int rc;
1314
1315         sc = device_get_softc(dev);
1316
1317         rc = notify_siblings(dev, 1);
1318         if (rc) {
1319                 device_printf(dev,
1320                     "failed to detach sibling devices: %d\n", rc);
1321                 return (rc);
1322         }
1323
1324         return (t4_detach_common(dev));
1325 }
1326
1327 int
1328 t4_detach_common(device_t dev)
1329 {
1330         struct adapter *sc;
1331         struct port_info *pi;
1332         int i, rc;
1333
1334         sc = device_get_softc(dev);
1335
1336         if (sc->cdev) {
1337                 destroy_dev(sc->cdev);
1338                 sc->cdev = NULL;
1339         }
1340
1341         sc->flags &= ~CHK_MBOX_ACCESS;
1342         if (sc->flags & FULL_INIT_DONE) {
1343                 if (!(sc->flags & IS_VF))
1344                         t4_intr_disable(sc);
1345         }
1346
1347         if (device_is_attached(dev)) {
1348                 rc = bus_generic_detach(dev);
1349                 if (rc) {
1350                         device_printf(dev,
1351                             "failed to detach child devices: %d\n", rc);
1352                         return (rc);
1353                 }
1354         }
1355
1356         for (i = 0; i < sc->intr_count; i++)
1357                 t4_free_irq(sc, &sc->irq[i]);
1358
1359         if ((sc->flags & (IS_VF | FW_OK)) == FW_OK)
1360                 t4_free_tx_sched(sc);
1361
1362         for (i = 0; i < MAX_NPORTS; i++) {
1363                 pi = sc->port[i];
1364                 if (pi) {
1365                         t4_free_vi(sc, sc->mbox, sc->pf, 0, pi->vi[0].viid);
1366                         if (pi->dev)
1367                                 device_delete_child(dev, pi->dev);
1368
1369                         mtx_destroy(&pi->pi_lock);
1370                         free(pi->vi, M_CXGBE);
1371                         free(pi, M_CXGBE);
1372                 }
1373         }
1374
1375         device_delete_children(dev);
1376
1377         if (sc->flags & FULL_INIT_DONE)
1378                 adapter_full_uninit(sc);
1379
1380         if ((sc->flags & (IS_VF | FW_OK)) == FW_OK)
1381                 t4_fw_bye(sc, sc->mbox);
1382
1383         if (sc->intr_type == INTR_MSI || sc->intr_type == INTR_MSIX)
1384                 pci_release_msi(dev);
1385
1386         if (sc->regs_res)
1387                 bus_release_resource(dev, SYS_RES_MEMORY, sc->regs_rid,
1388                     sc->regs_res);
1389
1390         if (sc->udbs_res)
1391                 bus_release_resource(dev, SYS_RES_MEMORY, sc->udbs_rid,
1392                     sc->udbs_res);
1393
1394         if (sc->msix_res)
1395                 bus_release_resource(dev, SYS_RES_MEMORY, sc->msix_rid,
1396                     sc->msix_res);
1397
1398         if (sc->l2t)
1399                 t4_free_l2t(sc->l2t);
1400         if (sc->smt)
1401                 t4_free_smt(sc->smt);
1402 #ifdef RATELIMIT
1403         t4_free_etid_table(sc);
1404 #endif
1405
1406 #if defined(TCP_OFFLOAD) || defined(RATELIMIT)
1407         free(sc->sge.ofld_txq, M_CXGBE);
1408 #endif
1409 #ifdef TCP_OFFLOAD
1410         free(sc->sge.ofld_rxq, M_CXGBE);
1411 #endif
1412 #ifdef DEV_NETMAP
1413         free(sc->sge.nm_rxq, M_CXGBE);
1414         free(sc->sge.nm_txq, M_CXGBE);
1415 #endif
1416         free(sc->irq, M_CXGBE);
1417         free(sc->sge.rxq, M_CXGBE);
1418         free(sc->sge.txq, M_CXGBE);
1419         free(sc->sge.ctrlq, M_CXGBE);
1420         free(sc->sge.iqmap, M_CXGBE);
1421         free(sc->sge.eqmap, M_CXGBE);
1422         free(sc->tids.ftid_tab, M_CXGBE);
1423         free(sc->tids.hpftid_tab, M_CXGBE);
1424         free_hftid_hash(&sc->tids);
1425         free(sc->tids.atid_tab, M_CXGBE);
1426         free(sc->tids.tid_tab, M_CXGBE);
1427         free(sc->tt.tls_rx_ports, M_CXGBE);
1428         t4_destroy_dma_tag(sc);
1429         if (mtx_initialized(&sc->sc_lock)) {
1430                 sx_xlock(&t4_list_lock);
1431                 SLIST_REMOVE(&t4_list, sc, adapter, link);
1432                 sx_xunlock(&t4_list_lock);
1433                 mtx_destroy(&sc->sc_lock);
1434         }
1435
1436         callout_drain(&sc->sfl_callout);
1437         if (mtx_initialized(&sc->tids.ftid_lock)) {
1438                 mtx_destroy(&sc->tids.ftid_lock);
1439                 cv_destroy(&sc->tids.ftid_cv);
1440         }
1441         if (mtx_initialized(&sc->tids.atid_lock))
1442                 mtx_destroy(&sc->tids.atid_lock);
1443         if (mtx_initialized(&sc->sfl_lock))
1444                 mtx_destroy(&sc->sfl_lock);
1445         if (mtx_initialized(&sc->ifp_lock))
1446                 mtx_destroy(&sc->ifp_lock);
1447         if (mtx_initialized(&sc->reg_lock))
1448                 mtx_destroy(&sc->reg_lock);
1449
1450         if (rw_initialized(&sc->policy_lock)) {
1451                 rw_destroy(&sc->policy_lock);
1452 #ifdef TCP_OFFLOAD
1453                 if (sc->policy != NULL)
1454                         free_offload_policy(sc->policy);
1455 #endif
1456         }
1457
1458         for (i = 0; i < NUM_MEMWIN; i++) {
1459                 struct memwin *mw = &sc->memwin[i];
1460
1461                 if (rw_initialized(&mw->mw_lock))
1462                         rw_destroy(&mw->mw_lock);
1463         }
1464
1465         bzero(sc, sizeof(*sc));
1466
1467         return (0);
1468 }
1469
1470 static int
1471 cxgbe_probe(device_t dev)
1472 {
1473         char buf[128];
1474         struct port_info *pi = device_get_softc(dev);
1475
1476         snprintf(buf, sizeof(buf), "port %d", pi->port_id);
1477         device_set_desc_copy(dev, buf);
1478
1479         return (BUS_PROBE_DEFAULT);
1480 }
1481
1482 #define T4_CAP (IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_MTU | IFCAP_HWCSUM | \
1483     IFCAP_VLAN_HWCSUM | IFCAP_TSO | IFCAP_JUMBO_MTU | IFCAP_LRO | \
1484     IFCAP_VLAN_HWTSO | IFCAP_LINKSTATE | IFCAP_HWCSUM_IPV6 | IFCAP_HWSTATS | \
1485     IFCAP_HWRXTSTMP)
1486 #define T4_CAP_ENABLE (T4_CAP)
1487
1488 static int
1489 cxgbe_vi_attach(device_t dev, struct vi_info *vi)
1490 {
1491         struct ifnet *ifp;
1492         struct sbuf *sb;
1493
1494         vi->xact_addr_filt = -1;
1495         callout_init(&vi->tick, 1);
1496
1497         /* Allocate an ifnet and set it up */
1498         ifp = if_alloc(IFT_ETHER);
1499         if (ifp == NULL) {
1500                 device_printf(dev, "Cannot allocate ifnet\n");
1501                 return (ENOMEM);
1502         }
1503         vi->ifp = ifp;
1504         ifp->if_softc = vi;
1505
1506         if_initname(ifp, device_get_name(dev), device_get_unit(dev));
1507         ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
1508
1509         ifp->if_init = cxgbe_init;
1510         ifp->if_ioctl = cxgbe_ioctl;
1511         ifp->if_transmit = cxgbe_transmit;
1512         ifp->if_qflush = cxgbe_qflush;
1513         ifp->if_get_counter = cxgbe_get_counter;
1514 #ifdef RATELIMIT
1515         ifp->if_snd_tag_alloc = cxgbe_snd_tag_alloc;
1516         ifp->if_snd_tag_modify = cxgbe_snd_tag_modify;
1517         ifp->if_snd_tag_query = cxgbe_snd_tag_query;
1518         ifp->if_snd_tag_free = cxgbe_snd_tag_free;
1519 #endif
1520
1521         ifp->if_capabilities = T4_CAP;
1522         ifp->if_capenable = T4_CAP_ENABLE;
1523 #ifdef TCP_OFFLOAD
1524         if (vi->nofldrxq != 0)
1525                 ifp->if_capabilities |= IFCAP_TOE;
1526 #endif
1527 #ifdef DEV_NETMAP
1528         if (vi->nnmrxq != 0)
1529                 ifp->if_capabilities |= IFCAP_NETMAP;
1530 #endif
1531 #ifdef RATELIMIT
1532         if (is_ethoffload(vi->pi->adapter) && vi->nofldtxq != 0) {
1533                 ifp->if_capabilities |= IFCAP_TXRTLMT;
1534                 ifp->if_capenable |= IFCAP_TXRTLMT;
1535         }
1536 #endif
1537         ifp->if_hwassist = CSUM_TCP | CSUM_UDP | CSUM_IP | CSUM_TSO |
1538             CSUM_UDP_IPV6 | CSUM_TCP_IPV6;
1539
1540         ifp->if_hw_tsomax = IP_MAXPACKET;
1541         ifp->if_hw_tsomaxsegcount = TX_SGL_SEGS_TSO;
1542 #ifdef RATELIMIT
1543         if (is_ethoffload(vi->pi->adapter) && vi->nofldtxq != 0)
1544                 ifp->if_hw_tsomaxsegcount = TX_SGL_SEGS_EO_TSO;
1545 #endif
1546         ifp->if_hw_tsomaxsegsize = 0;
1547
1548         ether_ifattach(ifp, vi->hw_addr);
1549 #ifdef DEV_NETMAP
1550         if (ifp->if_capabilities & IFCAP_NETMAP)
1551                 cxgbe_nm_attach(vi);
1552 #endif
1553         sb = sbuf_new_auto();
1554         sbuf_printf(sb, "%d txq, %d rxq (NIC)", vi->ntxq, vi->nrxq);
1555 #if defined(TCP_OFFLOAD) || defined(RATELIMIT)
1556         switch (ifp->if_capabilities & (IFCAP_TOE | IFCAP_TXRTLMT)) {
1557         case IFCAP_TOE:
1558                 sbuf_printf(sb, "; %d txq (TOE)", vi->nofldtxq);
1559                 break;
1560         case IFCAP_TOE | IFCAP_TXRTLMT:
1561                 sbuf_printf(sb, "; %d txq (TOE/ETHOFLD)", vi->nofldtxq);
1562                 break;
1563         case IFCAP_TXRTLMT:
1564                 sbuf_printf(sb, "; %d txq (ETHOFLD)", vi->nofldtxq);
1565                 break;
1566         }
1567 #endif
1568 #ifdef TCP_OFFLOAD
1569         if (ifp->if_capabilities & IFCAP_TOE)
1570                 sbuf_printf(sb, ", %d rxq (TOE)", vi->nofldrxq);
1571 #endif
1572 #ifdef DEV_NETMAP
1573         if (ifp->if_capabilities & IFCAP_NETMAP)
1574                 sbuf_printf(sb, "; %d txq, %d rxq (netmap)",
1575                     vi->nnmtxq, vi->nnmrxq);
1576 #endif
1577         sbuf_finish(sb);
1578         device_printf(dev, "%s\n", sbuf_data(sb));
1579         sbuf_delete(sb);
1580
1581         vi_sysctls(vi);
1582
1583         return (0);
1584 }
1585
1586 static int
1587 cxgbe_attach(device_t dev)
1588 {
1589         struct port_info *pi = device_get_softc(dev);
1590         struct adapter *sc = pi->adapter;
1591         struct vi_info *vi;
1592         int i, rc;
1593
1594         callout_init_mtx(&pi->tick, &pi->pi_lock, 0);
1595
1596         rc = cxgbe_vi_attach(dev, &pi->vi[0]);
1597         if (rc)
1598                 return (rc);
1599
1600         for_each_vi(pi, i, vi) {
1601                 if (i == 0)
1602                         continue;
1603                 vi->dev = device_add_child(dev, sc->names->vi_ifnet_name, -1);
1604                 if (vi->dev == NULL) {
1605                         device_printf(dev, "failed to add VI %d\n", i);
1606                         continue;
1607                 }
1608                 device_set_softc(vi->dev, vi);
1609         }
1610
1611         cxgbe_sysctls(pi);
1612
1613         bus_generic_attach(dev);
1614
1615         return (0);
1616 }
1617
1618 static void
1619 cxgbe_vi_detach(struct vi_info *vi)
1620 {
1621         struct ifnet *ifp = vi->ifp;
1622
1623         ether_ifdetach(ifp);
1624
1625         /* Let detach proceed even if these fail. */
1626 #ifdef DEV_NETMAP
1627         if (ifp->if_capabilities & IFCAP_NETMAP)
1628                 cxgbe_nm_detach(vi);
1629 #endif
1630         cxgbe_uninit_synchronized(vi);
1631         callout_drain(&vi->tick);
1632         vi_full_uninit(vi);
1633
1634         if_free(vi->ifp);
1635         vi->ifp = NULL;
1636 }
1637
1638 static int
1639 cxgbe_detach(device_t dev)
1640 {
1641         struct port_info *pi = device_get_softc(dev);
1642         struct adapter *sc = pi->adapter;
1643         int rc;
1644
1645         /* Detach the extra VIs first. */
1646         rc = bus_generic_detach(dev);
1647         if (rc)
1648                 return (rc);
1649         device_delete_children(dev);
1650
1651         doom_vi(sc, &pi->vi[0]);
1652
1653         if (pi->flags & HAS_TRACEQ) {
1654                 sc->traceq = -1;        /* cloner should not create ifnet */
1655                 t4_tracer_port_detach(sc);
1656         }
1657
1658         cxgbe_vi_detach(&pi->vi[0]);
1659         callout_drain(&pi->tick);
1660         ifmedia_removeall(&pi->media);
1661
1662         end_synchronized_op(sc, 0);
1663
1664         return (0);
1665 }
1666
1667 static void
1668 cxgbe_init(void *arg)
1669 {
1670         struct vi_info *vi = arg;
1671         struct adapter *sc = vi->pi->adapter;
1672
1673         if (begin_synchronized_op(sc, vi, SLEEP_OK | INTR_OK, "t4init") != 0)
1674                 return;
1675         cxgbe_init_synchronized(vi);
1676         end_synchronized_op(sc, 0);
1677 }
1678
1679 static int
1680 cxgbe_ioctl(struct ifnet *ifp, unsigned long cmd, caddr_t data)
1681 {
1682         int rc = 0, mtu, flags;
1683         struct vi_info *vi = ifp->if_softc;
1684         struct port_info *pi = vi->pi;
1685         struct adapter *sc = pi->adapter;
1686         struct ifreq *ifr = (struct ifreq *)data;
1687         uint32_t mask;
1688
1689         switch (cmd) {
1690         case SIOCSIFMTU:
1691                 mtu = ifr->ifr_mtu;
1692                 if (mtu < ETHERMIN || mtu > MAX_MTU)
1693                         return (EINVAL);
1694
1695                 rc = begin_synchronized_op(sc, vi, SLEEP_OK | INTR_OK, "t4mtu");
1696                 if (rc)
1697                         return (rc);
1698                 ifp->if_mtu = mtu;
1699                 if (vi->flags & VI_INIT_DONE) {
1700                         t4_update_fl_bufsize(ifp);
1701                         if (ifp->if_drv_flags & IFF_DRV_RUNNING)
1702                                 rc = update_mac_settings(ifp, XGMAC_MTU);
1703                 }
1704                 end_synchronized_op(sc, 0);
1705                 break;
1706
1707         case SIOCSIFFLAGS:
1708                 rc = begin_synchronized_op(sc, vi, SLEEP_OK | INTR_OK, "t4flg");
1709                 if (rc)
1710                         return (rc);
1711
1712                 if (ifp->if_flags & IFF_UP) {
1713                         if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1714                                 flags = vi->if_flags;
1715                                 if ((ifp->if_flags ^ flags) &
1716                                     (IFF_PROMISC | IFF_ALLMULTI)) {
1717                                         rc = update_mac_settings(ifp,
1718                                             XGMAC_PROMISC | XGMAC_ALLMULTI);
1719                                 }
1720                         } else {
1721                                 rc = cxgbe_init_synchronized(vi);
1722                         }
1723                         vi->if_flags = ifp->if_flags;
1724                 } else if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1725                         rc = cxgbe_uninit_synchronized(vi);
1726                 }
1727                 end_synchronized_op(sc, 0);
1728                 break;
1729
1730         case SIOCADDMULTI:
1731         case SIOCDELMULTI:
1732                 rc = begin_synchronized_op(sc, vi, SLEEP_OK | INTR_OK, "t4multi");
1733                 if (rc)
1734                         return (rc);
1735                 if (ifp->if_drv_flags & IFF_DRV_RUNNING)
1736                         rc = update_mac_settings(ifp, XGMAC_MCADDRS);
1737                 end_synchronized_op(sc, 0);
1738                 break;
1739
1740         case SIOCSIFCAP:
1741                 rc = begin_synchronized_op(sc, vi, SLEEP_OK | INTR_OK, "t4cap");
1742                 if (rc)
1743                         return (rc);
1744
1745                 mask = ifr->ifr_reqcap ^ ifp->if_capenable;
1746                 if (mask & IFCAP_TXCSUM) {
1747                         ifp->if_capenable ^= IFCAP_TXCSUM;
1748                         ifp->if_hwassist ^= (CSUM_TCP | CSUM_UDP | CSUM_IP);
1749
1750                         if (IFCAP_TSO4 & ifp->if_capenable &&
1751                             !(IFCAP_TXCSUM & ifp->if_capenable)) {
1752                                 ifp->if_capenable &= ~IFCAP_TSO4;
1753                                 if_printf(ifp,
1754                                     "tso4 disabled due to -txcsum.\n");
1755                         }
1756                 }
1757                 if (mask & IFCAP_TXCSUM_IPV6) {
1758                         ifp->if_capenable ^= IFCAP_TXCSUM_IPV6;
1759                         ifp->if_hwassist ^= (CSUM_UDP_IPV6 | CSUM_TCP_IPV6);
1760
1761                         if (IFCAP_TSO6 & ifp->if_capenable &&
1762                             !(IFCAP_TXCSUM_IPV6 & ifp->if_capenable)) {
1763                                 ifp->if_capenable &= ~IFCAP_TSO6;
1764                                 if_printf(ifp,
1765                                     "tso6 disabled due to -txcsum6.\n");
1766                         }
1767                 }
1768                 if (mask & IFCAP_RXCSUM)
1769                         ifp->if_capenable ^= IFCAP_RXCSUM;
1770                 if (mask & IFCAP_RXCSUM_IPV6)
1771                         ifp->if_capenable ^= IFCAP_RXCSUM_IPV6;
1772
1773                 /*
1774                  * Note that we leave CSUM_TSO alone (it is always set).  The
1775                  * kernel takes both IFCAP_TSOx and CSUM_TSO into account before
1776                  * sending a TSO request our way, so it's sufficient to toggle
1777                  * IFCAP_TSOx only.
1778                  */
1779                 if (mask & IFCAP_TSO4) {
1780                         if (!(IFCAP_TSO4 & ifp->if_capenable) &&
1781                             !(IFCAP_TXCSUM & ifp->if_capenable)) {
1782                                 if_printf(ifp, "enable txcsum first.\n");
1783                                 rc = EAGAIN;
1784                                 goto fail;
1785                         }
1786                         ifp->if_capenable ^= IFCAP_TSO4;
1787                 }
1788                 if (mask & IFCAP_TSO6) {
1789                         if (!(IFCAP_TSO6 & ifp->if_capenable) &&
1790                             !(IFCAP_TXCSUM_IPV6 & ifp->if_capenable)) {
1791                                 if_printf(ifp, "enable txcsum6 first.\n");
1792                                 rc = EAGAIN;
1793                                 goto fail;
1794                         }
1795                         ifp->if_capenable ^= IFCAP_TSO6;
1796                 }
1797                 if (mask & IFCAP_LRO) {
1798 #if defined(INET) || defined(INET6)
1799                         int i;
1800                         struct sge_rxq *rxq;
1801
1802                         ifp->if_capenable ^= IFCAP_LRO;
1803                         for_each_rxq(vi, i, rxq) {
1804                                 if (ifp->if_capenable & IFCAP_LRO)
1805                                         rxq->iq.flags |= IQ_LRO_ENABLED;
1806                                 else
1807                                         rxq->iq.flags &= ~IQ_LRO_ENABLED;
1808                         }
1809 #endif
1810                 }
1811 #ifdef TCP_OFFLOAD
1812                 if (mask & IFCAP_TOE) {
1813                         int enable = (ifp->if_capenable ^ mask) & IFCAP_TOE;
1814
1815                         rc = toe_capability(vi, enable);
1816                         if (rc != 0)
1817                                 goto fail;
1818
1819                         ifp->if_capenable ^= mask;
1820                 }
1821 #endif
1822                 if (mask & IFCAP_VLAN_HWTAGGING) {
1823                         ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING;
1824                         if (ifp->if_drv_flags & IFF_DRV_RUNNING)
1825                                 rc = update_mac_settings(ifp, XGMAC_VLANEX);
1826                 }
1827                 if (mask & IFCAP_VLAN_MTU) {
1828                         ifp->if_capenable ^= IFCAP_VLAN_MTU;
1829
1830                         /* Need to find out how to disable auto-mtu-inflation */
1831                 }
1832                 if (mask & IFCAP_VLAN_HWTSO)
1833                         ifp->if_capenable ^= IFCAP_VLAN_HWTSO;
1834                 if (mask & IFCAP_VLAN_HWCSUM)
1835                         ifp->if_capenable ^= IFCAP_VLAN_HWCSUM;
1836 #ifdef RATELIMIT
1837                 if (mask & IFCAP_TXRTLMT)
1838                         ifp->if_capenable ^= IFCAP_TXRTLMT;
1839 #endif
1840                 if (mask & IFCAP_HWRXTSTMP) {
1841                         int i;
1842                         struct sge_rxq *rxq;
1843
1844                         ifp->if_capenable ^= IFCAP_HWRXTSTMP;
1845                         for_each_rxq(vi, i, rxq) {
1846                                 if (ifp->if_capenable & IFCAP_HWRXTSTMP)
1847                                         rxq->iq.flags |= IQ_RX_TIMESTAMP;
1848                                 else
1849                                         rxq->iq.flags &= ~IQ_RX_TIMESTAMP;
1850                         }
1851                 }
1852
1853 #ifdef VLAN_CAPABILITIES
1854                 VLAN_CAPABILITIES(ifp);
1855 #endif
1856 fail:
1857                 end_synchronized_op(sc, 0);
1858                 break;
1859
1860         case SIOCSIFMEDIA:
1861         case SIOCGIFMEDIA:
1862         case SIOCGIFXMEDIA:
1863                 ifmedia_ioctl(ifp, ifr, &pi->media, cmd);
1864                 break;
1865
1866         case SIOCGI2C: {
1867                 struct ifi2creq i2c;
1868
1869                 rc = copyin(ifr_data_get_ptr(ifr), &i2c, sizeof(i2c));
1870                 if (rc != 0)
1871                         break;
1872                 if (i2c.dev_addr != 0xA0 && i2c.dev_addr != 0xA2) {
1873                         rc = EPERM;
1874                         break;
1875                 }
1876                 if (i2c.len > sizeof(i2c.data)) {
1877                         rc = EINVAL;
1878                         break;
1879                 }
1880                 rc = begin_synchronized_op(sc, vi, SLEEP_OK | INTR_OK, "t4i2c");
1881                 if (rc)
1882                         return (rc);
1883                 rc = -t4_i2c_rd(sc, sc->mbox, pi->port_id, i2c.dev_addr,
1884                     i2c.offset, i2c.len, &i2c.data[0]);
1885                 end_synchronized_op(sc, 0);
1886                 if (rc == 0)
1887                         rc = copyout(&i2c, ifr_data_get_ptr(ifr), sizeof(i2c));
1888                 break;
1889         }
1890
1891         default:
1892                 rc = ether_ioctl(ifp, cmd, data);
1893         }
1894
1895         return (rc);
1896 }
1897
1898 static int
1899 cxgbe_transmit(struct ifnet *ifp, struct mbuf *m)
1900 {
1901         struct vi_info *vi = ifp->if_softc;
1902         struct port_info *pi = vi->pi;
1903         struct adapter *sc = pi->adapter;
1904         struct sge_txq *txq;
1905         void *items[1];
1906         int rc;
1907
1908         M_ASSERTPKTHDR(m);
1909         MPASS(m->m_nextpkt == NULL);    /* not quite ready for this yet */
1910
1911         if (__predict_false(pi->link_cfg.link_ok == false)) {
1912                 m_freem(m);
1913                 return (ENETDOWN);
1914         }
1915
1916         rc = parse_pkt(sc, &m);
1917         if (__predict_false(rc != 0)) {
1918                 MPASS(m == NULL);                       /* was freed already */
1919                 atomic_add_int(&pi->tx_parse_error, 1); /* rare, atomic is ok */
1920                 return (rc);
1921         }
1922 #ifdef RATELIMIT
1923         if (m->m_pkthdr.snd_tag != NULL) {
1924                 /* EAGAIN tells the stack we are not the correct interface. */
1925                 if (__predict_false(ifp != m->m_pkthdr.snd_tag->ifp)) {
1926                         m_freem(m);
1927                         return (EAGAIN);
1928                 }
1929
1930                 return (ethofld_transmit(ifp, m));
1931         }
1932 #endif
1933
1934         /* Select a txq. */
1935         txq = &sc->sge.txq[vi->first_txq];
1936         if (M_HASHTYPE_GET(m) != M_HASHTYPE_NONE)
1937                 txq += ((m->m_pkthdr.flowid % (vi->ntxq - vi->rsrv_noflowq)) +
1938                     vi->rsrv_noflowq);
1939
1940         items[0] = m;
1941         rc = mp_ring_enqueue(txq->r, items, 1, 4096);
1942         if (__predict_false(rc != 0))
1943                 m_freem(m);
1944
1945         return (rc);
1946 }
1947
1948 static void
1949 cxgbe_qflush(struct ifnet *ifp)
1950 {
1951         struct vi_info *vi = ifp->if_softc;
1952         struct sge_txq *txq;
1953         int i;
1954
1955         /* queues do not exist if !VI_INIT_DONE. */
1956         if (vi->flags & VI_INIT_DONE) {
1957                 for_each_txq(vi, i, txq) {
1958                         TXQ_LOCK(txq);
1959                         txq->eq.flags |= EQ_QFLUSH;
1960                         TXQ_UNLOCK(txq);
1961                         while (!mp_ring_is_idle(txq->r)) {
1962                                 mp_ring_check_drainage(txq->r, 0);
1963                                 pause("qflush", 1);
1964                         }
1965                         TXQ_LOCK(txq);
1966                         txq->eq.flags &= ~EQ_QFLUSH;
1967                         TXQ_UNLOCK(txq);
1968                 }
1969         }
1970         if_qflush(ifp);
1971 }
1972
1973 static uint64_t
1974 vi_get_counter(struct ifnet *ifp, ift_counter c)
1975 {
1976         struct vi_info *vi = ifp->if_softc;
1977         struct fw_vi_stats_vf *s = &vi->stats;
1978
1979         vi_refresh_stats(vi->pi->adapter, vi);
1980
1981         switch (c) {
1982         case IFCOUNTER_IPACKETS:
1983                 return (s->rx_bcast_frames + s->rx_mcast_frames +
1984                     s->rx_ucast_frames);
1985         case IFCOUNTER_IERRORS:
1986                 return (s->rx_err_frames);
1987         case IFCOUNTER_OPACKETS:
1988                 return (s->tx_bcast_frames + s->tx_mcast_frames +
1989                     s->tx_ucast_frames + s->tx_offload_frames);
1990         case IFCOUNTER_OERRORS:
1991                 return (s->tx_drop_frames);
1992         case IFCOUNTER_IBYTES:
1993                 return (s->rx_bcast_bytes + s->rx_mcast_bytes +
1994                     s->rx_ucast_bytes);
1995         case IFCOUNTER_OBYTES:
1996                 return (s->tx_bcast_bytes + s->tx_mcast_bytes +
1997                     s->tx_ucast_bytes + s->tx_offload_bytes);
1998         case IFCOUNTER_IMCASTS:
1999                 return (s->rx_mcast_frames);
2000         case IFCOUNTER_OMCASTS:
2001                 return (s->tx_mcast_frames);
2002         case IFCOUNTER_OQDROPS: {
2003                 uint64_t drops;
2004
2005                 drops = 0;
2006                 if (vi->flags & VI_INIT_DONE) {
2007                         int i;
2008                         struct sge_txq *txq;
2009
2010                         for_each_txq(vi, i, txq)
2011                                 drops += counter_u64_fetch(txq->r->drops);
2012                 }
2013
2014                 return (drops);
2015
2016         }
2017
2018         default:
2019                 return (if_get_counter_default(ifp, c));
2020         }
2021 }
2022
2023 uint64_t
2024 cxgbe_get_counter(struct ifnet *ifp, ift_counter c)
2025 {
2026         struct vi_info *vi = ifp->if_softc;
2027         struct port_info *pi = vi->pi;
2028         struct adapter *sc = pi->adapter;
2029         struct port_stats *s = &pi->stats;
2030
2031         if (pi->nvi > 1 || sc->flags & IS_VF)
2032                 return (vi_get_counter(ifp, c));
2033
2034         cxgbe_refresh_stats(sc, pi);
2035
2036         switch (c) {
2037         case IFCOUNTER_IPACKETS:
2038                 return (s->rx_frames);
2039
2040         case IFCOUNTER_IERRORS:
2041                 return (s->rx_jabber + s->rx_runt + s->rx_too_long +
2042                     s->rx_fcs_err + s->rx_len_err);
2043
2044         case IFCOUNTER_OPACKETS:
2045                 return (s->tx_frames);
2046
2047         case IFCOUNTER_OERRORS:
2048                 return (s->tx_error_frames);
2049
2050         case IFCOUNTER_IBYTES:
2051                 return (s->rx_octets);
2052
2053         case IFCOUNTER_OBYTES:
2054                 return (s->tx_octets);
2055
2056         case IFCOUNTER_IMCASTS:
2057                 return (s->rx_mcast_frames);
2058
2059         case IFCOUNTER_OMCASTS:
2060                 return (s->tx_mcast_frames);
2061
2062         case IFCOUNTER_IQDROPS:
2063                 return (s->rx_ovflow0 + s->rx_ovflow1 + s->rx_ovflow2 +
2064                     s->rx_ovflow3 + s->rx_trunc0 + s->rx_trunc1 + s->rx_trunc2 +
2065                     s->rx_trunc3 + pi->tnl_cong_drops);
2066
2067         case IFCOUNTER_OQDROPS: {
2068                 uint64_t drops;
2069
2070                 drops = s->tx_drop;
2071                 if (vi->flags & VI_INIT_DONE) {
2072                         int i;
2073                         struct sge_txq *txq;
2074
2075                         for_each_txq(vi, i, txq)
2076                                 drops += counter_u64_fetch(txq->r->drops);
2077                 }
2078
2079                 return (drops);
2080
2081         }
2082
2083         default:
2084                 return (if_get_counter_default(ifp, c));
2085         }
2086 }
2087
2088 /*
2089  * The kernel picks a media from the list we had provided but we still validate
2090  * the requeste.
2091  */
2092 int
2093 cxgbe_media_change(struct ifnet *ifp)
2094 {
2095         struct vi_info *vi = ifp->if_softc;
2096         struct port_info *pi = vi->pi;
2097         struct ifmedia *ifm = &pi->media;
2098         struct link_config *lc = &pi->link_cfg;
2099         struct adapter *sc = pi->adapter;
2100         int rc;
2101
2102         rc = begin_synchronized_op(sc, NULL, SLEEP_OK | INTR_OK, "t4mec");
2103         if (rc != 0)
2104                 return (rc);
2105         PORT_LOCK(pi);
2106         if (IFM_SUBTYPE(ifm->ifm_media) == IFM_AUTO) {
2107                 /* ifconfig .. media autoselect */
2108                 if (!(lc->supported & FW_PORT_CAP32_ANEG)) {
2109                         rc = ENOTSUP; /* AN not supported by transceiver */
2110                         goto done;
2111                 }
2112                 lc->requested_aneg = AUTONEG_ENABLE;
2113                 lc->requested_speed = 0;
2114                 lc->requested_fc |= PAUSE_AUTONEG;
2115         } else {
2116                 lc->requested_aneg = AUTONEG_DISABLE;
2117                 lc->requested_speed =
2118                     ifmedia_baudrate(ifm->ifm_media) / 1000000;
2119                 lc->requested_fc = 0;
2120                 if (IFM_OPTIONS(ifm->ifm_media) & IFM_ETH_RXPAUSE)
2121                         lc->requested_fc |= PAUSE_RX;
2122                 if (IFM_OPTIONS(ifm->ifm_media) & IFM_ETH_TXPAUSE)
2123                         lc->requested_fc |= PAUSE_TX;
2124         }
2125         if (pi->up_vis > 0) {
2126                 fixup_link_config(pi);
2127                 rc = apply_link_config(pi);
2128         }
2129 done:
2130         PORT_UNLOCK(pi);
2131         end_synchronized_op(sc, 0);
2132         return (rc);
2133 }
2134
2135 /*
2136  * Base media word (without ETHER, pause, link active, etc.) for the port at the
2137  * given speed.
2138  */
2139 static int
2140 port_mword(struct port_info *pi, uint32_t speed)
2141 {
2142
2143         MPASS(speed & M_FW_PORT_CAP32_SPEED);
2144         MPASS(powerof2(speed));
2145
2146         switch(pi->port_type) {
2147         case FW_PORT_TYPE_BT_SGMII:
2148         case FW_PORT_TYPE_BT_XFI:
2149         case FW_PORT_TYPE_BT_XAUI:
2150                 /* BaseT */
2151                 switch (speed) {
2152                 case FW_PORT_CAP32_SPEED_100M:
2153                         return (IFM_100_T);
2154                 case FW_PORT_CAP32_SPEED_1G:
2155                         return (IFM_1000_T);
2156                 case FW_PORT_CAP32_SPEED_10G:
2157                         return (IFM_10G_T);
2158                 }
2159                 break;
2160         case FW_PORT_TYPE_KX4:
2161                 if (speed == FW_PORT_CAP32_SPEED_10G)
2162                         return (IFM_10G_KX4);
2163                 break;
2164         case FW_PORT_TYPE_CX4:
2165                 if (speed == FW_PORT_CAP32_SPEED_10G)
2166                         return (IFM_10G_CX4);
2167                 break;
2168         case FW_PORT_TYPE_KX:
2169                 if (speed == FW_PORT_CAP32_SPEED_1G)
2170                         return (IFM_1000_KX);
2171                 break;
2172         case FW_PORT_TYPE_KR:
2173         case FW_PORT_TYPE_BP_AP:
2174         case FW_PORT_TYPE_BP4_AP:
2175         case FW_PORT_TYPE_BP40_BA:
2176         case FW_PORT_TYPE_KR4_100G:
2177         case FW_PORT_TYPE_KR_SFP28:
2178         case FW_PORT_TYPE_KR_XLAUI:
2179                 switch (speed) {
2180                 case FW_PORT_CAP32_SPEED_1G:
2181                         return (IFM_1000_KX);
2182                 case FW_PORT_CAP32_SPEED_10G:
2183                         return (IFM_10G_KR);
2184                 case FW_PORT_CAP32_SPEED_25G:
2185                         return (IFM_25G_KR);
2186                 case FW_PORT_CAP32_SPEED_40G:
2187                         return (IFM_40G_KR4);
2188                 case FW_PORT_CAP32_SPEED_50G:
2189                         return (IFM_50G_KR2);
2190                 case FW_PORT_CAP32_SPEED_100G:
2191                         return (IFM_100G_KR4);
2192                 }
2193                 break;
2194         case FW_PORT_TYPE_FIBER_XFI:
2195         case FW_PORT_TYPE_FIBER_XAUI:
2196         case FW_PORT_TYPE_SFP:
2197         case FW_PORT_TYPE_QSFP_10G:
2198         case FW_PORT_TYPE_QSA:
2199         case FW_PORT_TYPE_QSFP:
2200         case FW_PORT_TYPE_CR4_QSFP:
2201         case FW_PORT_TYPE_CR_QSFP:
2202         case FW_PORT_TYPE_CR2_QSFP:
2203         case FW_PORT_TYPE_SFP28:
2204                 /* Pluggable transceiver */
2205                 switch (pi->mod_type) {
2206                 case FW_PORT_MOD_TYPE_LR:
2207                         switch (speed) {
2208                         case FW_PORT_CAP32_SPEED_1G:
2209                                 return (IFM_1000_LX);
2210                         case FW_PORT_CAP32_SPEED_10G:
2211                                 return (IFM_10G_LR);
2212                         case FW_PORT_CAP32_SPEED_25G:
2213                                 return (IFM_25G_LR);
2214                         case FW_PORT_CAP32_SPEED_40G:
2215                                 return (IFM_40G_LR4);
2216                         case FW_PORT_CAP32_SPEED_50G:
2217                                 return (IFM_50G_LR2);
2218                         case FW_PORT_CAP32_SPEED_100G:
2219                                 return (IFM_100G_LR4);
2220                         }
2221                         break;
2222                 case FW_PORT_MOD_TYPE_SR:
2223                         switch (speed) {
2224                         case FW_PORT_CAP32_SPEED_1G:
2225                                 return (IFM_1000_SX);
2226                         case FW_PORT_CAP32_SPEED_10G:
2227                                 return (IFM_10G_SR);
2228                         case FW_PORT_CAP32_SPEED_25G:
2229                                 return (IFM_25G_SR);
2230                         case FW_PORT_CAP32_SPEED_40G:
2231                                 return (IFM_40G_SR4);
2232                         case FW_PORT_CAP32_SPEED_50G:
2233                                 return (IFM_50G_SR2);
2234                         case FW_PORT_CAP32_SPEED_100G:
2235                                 return (IFM_100G_SR4);
2236                         }
2237                         break;
2238                 case FW_PORT_MOD_TYPE_ER:
2239                         if (speed == FW_PORT_CAP32_SPEED_10G)
2240                                 return (IFM_10G_ER);
2241                         break;
2242                 case FW_PORT_MOD_TYPE_TWINAX_PASSIVE:
2243                 case FW_PORT_MOD_TYPE_TWINAX_ACTIVE:
2244                         switch (speed) {
2245                         case FW_PORT_CAP32_SPEED_1G:
2246                                 return (IFM_1000_CX);
2247                         case FW_PORT_CAP32_SPEED_10G:
2248                                 return (IFM_10G_TWINAX);
2249                         case FW_PORT_CAP32_SPEED_25G:
2250                                 return (IFM_25G_CR);
2251                         case FW_PORT_CAP32_SPEED_40G:
2252                                 return (IFM_40G_CR4);
2253                         case FW_PORT_CAP32_SPEED_50G:
2254                                 return (IFM_50G_CR2);
2255                         case FW_PORT_CAP32_SPEED_100G:
2256                                 return (IFM_100G_CR4);
2257                         }
2258                         break;
2259                 case FW_PORT_MOD_TYPE_LRM:
2260                         if (speed == FW_PORT_CAP32_SPEED_10G)
2261                                 return (IFM_10G_LRM);
2262                         break;
2263                 case FW_PORT_MOD_TYPE_NA:
2264                         MPASS(0);       /* Not pluggable? */
2265                         /* fall throough */
2266                 case FW_PORT_MOD_TYPE_ERROR:
2267                 case FW_PORT_MOD_TYPE_UNKNOWN:
2268                 case FW_PORT_MOD_TYPE_NOTSUPPORTED:
2269                         break;
2270                 case FW_PORT_MOD_TYPE_NONE:
2271                         return (IFM_NONE);
2272                 }
2273                 break;
2274         case FW_PORT_TYPE_NONE:
2275                 return (IFM_NONE);
2276         }
2277
2278         return (IFM_UNKNOWN);
2279 }
2280
2281 void
2282 cxgbe_media_status(struct ifnet *ifp, struct ifmediareq *ifmr)
2283 {
2284         struct vi_info *vi = ifp->if_softc;
2285         struct port_info *pi = vi->pi;
2286         struct adapter *sc = pi->adapter;
2287         struct link_config *lc = &pi->link_cfg;
2288
2289         if (begin_synchronized_op(sc, NULL, SLEEP_OK | INTR_OK, "t4med") != 0)
2290                 return;
2291         PORT_LOCK(pi);
2292
2293         if (pi->up_vis == 0) {
2294                 /*
2295                  * If all the interfaces are administratively down the firmware
2296                  * does not report transceiver changes.  Refresh port info here
2297                  * so that ifconfig displays accurate ifmedia at all times.
2298                  * This is the only reason we have a synchronized op in this
2299                  * function.  Just PORT_LOCK would have been enough otherwise.
2300                  */
2301                 t4_update_port_info(pi);
2302                 build_medialist(pi);
2303         }
2304
2305         /* ifm_status */
2306         ifmr->ifm_status = IFM_AVALID;
2307         if (lc->link_ok == false)
2308                 goto done;
2309         ifmr->ifm_status |= IFM_ACTIVE;
2310
2311         /* ifm_active */
2312         ifmr->ifm_active = IFM_ETHER | IFM_FDX;
2313         ifmr->ifm_active &= ~(IFM_ETH_TXPAUSE | IFM_ETH_RXPAUSE);
2314         if (lc->fc & PAUSE_RX)
2315                 ifmr->ifm_active |= IFM_ETH_RXPAUSE;
2316         if (lc->fc & PAUSE_TX)
2317                 ifmr->ifm_active |= IFM_ETH_TXPAUSE;
2318         ifmr->ifm_active |= port_mword(pi, speed_to_fwcap(lc->speed));
2319 done:
2320         PORT_UNLOCK(pi);
2321         end_synchronized_op(sc, 0);
2322 }
2323
2324 static int
2325 vcxgbe_probe(device_t dev)
2326 {
2327         char buf[128];
2328         struct vi_info *vi = device_get_softc(dev);
2329
2330         snprintf(buf, sizeof(buf), "port %d vi %td", vi->pi->port_id,
2331             vi - vi->pi->vi);
2332         device_set_desc_copy(dev, buf);
2333
2334         return (BUS_PROBE_DEFAULT);
2335 }
2336
2337 static int
2338 alloc_extra_vi(struct adapter *sc, struct port_info *pi, struct vi_info *vi)
2339 {
2340         int func, index, rc;
2341         uint32_t param, val;
2342
2343         ASSERT_SYNCHRONIZED_OP(sc);
2344
2345         index = vi - pi->vi;
2346         MPASS(index > 0);       /* This function deals with _extra_ VIs only */
2347         KASSERT(index < nitems(vi_mac_funcs),
2348             ("%s: VI %s doesn't have a MAC func", __func__,
2349             device_get_nameunit(vi->dev)));
2350         func = vi_mac_funcs[index];
2351         rc = t4_alloc_vi_func(sc, sc->mbox, pi->tx_chan, sc->pf, 0, 1,
2352             vi->hw_addr, &vi->rss_size, func, 0);
2353         if (rc < 0) {
2354                 device_printf(vi->dev, "failed to allocate virtual interface %d"
2355                     "for port %d: %d\n", index, pi->port_id, -rc);
2356                 return (-rc);
2357         }
2358         vi->viid = rc;
2359         if (chip_id(sc) <= CHELSIO_T5)
2360                 vi->smt_idx = (rc & 0x7f) << 1;
2361         else
2362                 vi->smt_idx = (rc & 0x7f);
2363
2364         if (vi->rss_size == 1) {
2365                 /*
2366                  * This VI didn't get a slice of the RSS table.  Reduce the
2367                  * number of VIs being created (hw.cxgbe.num_vis) or modify the
2368                  * configuration file (nvi, rssnvi for this PF) if this is a
2369                  * problem.
2370                  */
2371                 device_printf(vi->dev, "RSS table not available.\n");
2372                 vi->rss_base = 0xffff;
2373
2374                 return (0);
2375         }
2376
2377         param = V_FW_PARAMS_MNEM(FW_PARAMS_MNEM_DEV) |
2378             V_FW_PARAMS_PARAM_X(FW_PARAMS_PARAM_DEV_RSSINFO) |
2379             V_FW_PARAMS_PARAM_YZ(vi->viid);
2380         rc = t4_query_params(sc, sc->mbox, sc->pf, 0, 1, &param, &val);
2381         if (rc)
2382                 vi->rss_base = 0xffff;
2383         else {
2384                 MPASS((val >> 16) == vi->rss_size);
2385                 vi->rss_base = val & 0xffff;
2386         }
2387
2388         return (0);
2389 }
2390
2391 static int
2392 vcxgbe_attach(device_t dev)
2393 {
2394         struct vi_info *vi;
2395         struct port_info *pi;
2396         struct adapter *sc;
2397         int rc;
2398
2399         vi = device_get_softc(dev);
2400         pi = vi->pi;
2401         sc = pi->adapter;
2402
2403         rc = begin_synchronized_op(sc, vi, SLEEP_OK | INTR_OK, "t4via");
2404         if (rc)
2405                 return (rc);
2406         rc = alloc_extra_vi(sc, pi, vi);
2407         end_synchronized_op(sc, 0);
2408         if (rc)
2409                 return (rc);
2410
2411         rc = cxgbe_vi_attach(dev, vi);
2412         if (rc) {
2413                 t4_free_vi(sc, sc->mbox, sc->pf, 0, vi->viid);
2414                 return (rc);
2415         }
2416         return (0);
2417 }
2418
2419 static int
2420 vcxgbe_detach(device_t dev)
2421 {
2422         struct vi_info *vi;
2423         struct adapter *sc;
2424
2425         vi = device_get_softc(dev);
2426         sc = vi->pi->adapter;
2427
2428         doom_vi(sc, vi);
2429
2430         cxgbe_vi_detach(vi);
2431         t4_free_vi(sc, sc->mbox, sc->pf, 0, vi->viid);
2432
2433         end_synchronized_op(sc, 0);
2434
2435         return (0);
2436 }
2437
2438 void
2439 t4_fatal_err(struct adapter *sc)
2440 {
2441         t4_set_reg_field(sc, A_SGE_CONTROL, F_GLOBALENABLE, 0);
2442         t4_intr_disable(sc);
2443         log(LOG_EMERG, "%s: encountered fatal error, adapter stopped.\n",
2444             device_get_nameunit(sc->dev));
2445         if (t4_panic_on_fatal_err)
2446                 panic("panic requested on fatal error");
2447 }
2448
2449 void
2450 t4_add_adapter(struct adapter *sc)
2451 {
2452         sx_xlock(&t4_list_lock);
2453         SLIST_INSERT_HEAD(&t4_list, sc, link);
2454         sx_xunlock(&t4_list_lock);
2455 }
2456
2457 int
2458 t4_map_bars_0_and_4(struct adapter *sc)
2459 {
2460         sc->regs_rid = PCIR_BAR(0);
2461         sc->regs_res = bus_alloc_resource_any(sc->dev, SYS_RES_MEMORY,
2462             &sc->regs_rid, RF_ACTIVE);
2463         if (sc->regs_res == NULL) {
2464                 device_printf(sc->dev, "cannot map registers.\n");
2465                 return (ENXIO);
2466         }
2467         sc->bt = rman_get_bustag(sc->regs_res);
2468         sc->bh = rman_get_bushandle(sc->regs_res);
2469         sc->mmio_len = rman_get_size(sc->regs_res);
2470         setbit(&sc->doorbells, DOORBELL_KDB);
2471
2472         sc->msix_rid = PCIR_BAR(4);
2473         sc->msix_res = bus_alloc_resource_any(sc->dev, SYS_RES_MEMORY,
2474             &sc->msix_rid, RF_ACTIVE);
2475         if (sc->msix_res == NULL) {
2476                 device_printf(sc->dev, "cannot map MSI-X BAR.\n");
2477                 return (ENXIO);
2478         }
2479
2480         return (0);
2481 }
2482
2483 int
2484 t4_map_bar_2(struct adapter *sc)
2485 {
2486
2487         /*
2488          * T4: only iWARP driver uses the userspace doorbells.  There is no need
2489          * to map it if RDMA is disabled.
2490          */
2491         if (is_t4(sc) && sc->rdmacaps == 0)
2492                 return (0);
2493
2494         sc->udbs_rid = PCIR_BAR(2);
2495         sc->udbs_res = bus_alloc_resource_any(sc->dev, SYS_RES_MEMORY,
2496             &sc->udbs_rid, RF_ACTIVE);
2497         if (sc->udbs_res == NULL) {
2498                 device_printf(sc->dev, "cannot map doorbell BAR.\n");
2499                 return (ENXIO);
2500         }
2501         sc->udbs_base = rman_get_virtual(sc->udbs_res);
2502
2503         if (chip_id(sc) >= CHELSIO_T5) {
2504                 setbit(&sc->doorbells, DOORBELL_UDB);
2505 #if defined(__i386__) || defined(__amd64__)
2506                 if (t5_write_combine) {
2507                         int rc, mode;
2508
2509                         /*
2510                          * Enable write combining on BAR2.  This is the
2511                          * userspace doorbell BAR and is split into 128B
2512                          * (UDBS_SEG_SIZE) doorbell regions, each associated
2513                          * with an egress queue.  The first 64B has the doorbell
2514                          * and the second 64B can be used to submit a tx work
2515                          * request with an implicit doorbell.
2516                          */
2517
2518                         rc = pmap_change_attr((vm_offset_t)sc->udbs_base,
2519                             rman_get_size(sc->udbs_res), PAT_WRITE_COMBINING);
2520                         if (rc == 0) {
2521                                 clrbit(&sc->doorbells, DOORBELL_UDB);
2522                                 setbit(&sc->doorbells, DOORBELL_WCWR);
2523                                 setbit(&sc->doorbells, DOORBELL_UDBWC);
2524                         } else {
2525                                 device_printf(sc->dev,
2526                                     "couldn't enable write combining: %d\n",
2527                                     rc);
2528                         }
2529
2530                         mode = is_t5(sc) ? V_STATMODE(0) : V_T6_STATMODE(0);
2531                         t4_write_reg(sc, A_SGE_STAT_CFG,
2532                             V_STATSOURCE_T5(7) | mode);
2533                 }
2534 #endif
2535         }
2536         sc->iwt.wc_en = isset(&sc->doorbells, DOORBELL_UDBWC) ? 1 : 0;
2537
2538         return (0);
2539 }
2540
2541 struct memwin_init {
2542         uint32_t base;
2543         uint32_t aperture;
2544 };
2545
2546 static const struct memwin_init t4_memwin[NUM_MEMWIN] = {
2547         { MEMWIN0_BASE, MEMWIN0_APERTURE },
2548         { MEMWIN1_BASE, MEMWIN1_APERTURE },
2549         { MEMWIN2_BASE_T4, MEMWIN2_APERTURE_T4 }
2550 };
2551
2552 static const struct memwin_init t5_memwin[NUM_MEMWIN] = {
2553         { MEMWIN0_BASE, MEMWIN0_APERTURE },
2554         { MEMWIN1_BASE, MEMWIN1_APERTURE },
2555         { MEMWIN2_BASE_T5, MEMWIN2_APERTURE_T5 },
2556 };
2557
2558 static void
2559 setup_memwin(struct adapter *sc)
2560 {
2561         const struct memwin_init *mw_init;
2562         struct memwin *mw;
2563         int i;
2564         uint32_t bar0;
2565
2566         if (is_t4(sc)) {
2567                 /*
2568                  * Read low 32b of bar0 indirectly via the hardware backdoor
2569                  * mechanism.  Works from within PCI passthrough environments
2570                  * too, where rman_get_start() can return a different value.  We
2571                  * need to program the T4 memory window decoders with the actual
2572                  * addresses that will be coming across the PCIe link.
2573                  */
2574                 bar0 = t4_hw_pci_read_cfg4(sc, PCIR_BAR(0));
2575                 bar0 &= (uint32_t) PCIM_BAR_MEM_BASE;
2576
2577                 mw_init = &t4_memwin[0];
2578         } else {
2579                 /* T5+ use the relative offset inside the PCIe BAR */
2580                 bar0 = 0;
2581
2582                 mw_init = &t5_memwin[0];
2583         }
2584
2585         for (i = 0, mw = &sc->memwin[0]; i < NUM_MEMWIN; i++, mw_init++, mw++) {
2586                 rw_init(&mw->mw_lock, "memory window access");
2587                 mw->mw_base = mw_init->base;
2588                 mw->mw_aperture = mw_init->aperture;
2589                 mw->mw_curpos = 0;
2590                 t4_write_reg(sc,
2591                     PCIE_MEM_ACCESS_REG(A_PCIE_MEM_ACCESS_BASE_WIN, i),
2592                     (mw->mw_base + bar0) | V_BIR(0) |
2593                     V_WINDOW(ilog2(mw->mw_aperture) - 10));
2594                 rw_wlock(&mw->mw_lock);
2595                 position_memwin(sc, i, 0);
2596                 rw_wunlock(&mw->mw_lock);
2597         }
2598
2599         /* flush */
2600         t4_read_reg(sc, PCIE_MEM_ACCESS_REG(A_PCIE_MEM_ACCESS_BASE_WIN, 2));
2601 }
2602
2603 /*
2604  * Positions the memory window at the given address in the card's address space.
2605  * There are some alignment requirements and the actual position may be at an
2606  * address prior to the requested address.  mw->mw_curpos always has the actual
2607  * position of the window.
2608  */
2609 static void
2610 position_memwin(struct adapter *sc, int idx, uint32_t addr)
2611 {
2612         struct memwin *mw;
2613         uint32_t pf;
2614         uint32_t reg;
2615
2616         MPASS(idx >= 0 && idx < NUM_MEMWIN);
2617         mw = &sc->memwin[idx];
2618         rw_assert(&mw->mw_lock, RA_WLOCKED);
2619
2620         if (is_t4(sc)) {
2621                 pf = 0;
2622                 mw->mw_curpos = addr & ~0xf;    /* start must be 16B aligned */
2623         } else {
2624                 pf = V_PFNUM(sc->pf);
2625                 mw->mw_curpos = addr & ~0x7f;   /* start must be 128B aligned */
2626         }
2627         reg = PCIE_MEM_ACCESS_REG(A_PCIE_MEM_ACCESS_OFFSET, idx);
2628         t4_write_reg(sc, reg, mw->mw_curpos | pf);
2629         t4_read_reg(sc, reg);   /* flush */
2630 }
2631
2632 int
2633 rw_via_memwin(struct adapter *sc, int idx, uint32_t addr, uint32_t *val,
2634     int len, int rw)
2635 {
2636         struct memwin *mw;
2637         uint32_t mw_end, v;
2638
2639         MPASS(idx >= 0 && idx < NUM_MEMWIN);
2640
2641         /* Memory can only be accessed in naturally aligned 4 byte units */
2642         if (addr & 3 || len & 3 || len <= 0)
2643                 return (EINVAL);
2644
2645         mw = &sc->memwin[idx];
2646         while (len > 0) {
2647                 rw_rlock(&mw->mw_lock);
2648                 mw_end = mw->mw_curpos + mw->mw_aperture;
2649                 if (addr >= mw_end || addr < mw->mw_curpos) {
2650                         /* Will need to reposition the window */
2651                         if (!rw_try_upgrade(&mw->mw_lock)) {
2652                                 rw_runlock(&mw->mw_lock);
2653                                 rw_wlock(&mw->mw_lock);
2654                         }
2655                         rw_assert(&mw->mw_lock, RA_WLOCKED);
2656                         position_memwin(sc, idx, addr);
2657                         rw_downgrade(&mw->mw_lock);
2658                         mw_end = mw->mw_curpos + mw->mw_aperture;
2659                 }
2660                 rw_assert(&mw->mw_lock, RA_RLOCKED);
2661                 while (addr < mw_end && len > 0) {
2662                         if (rw == 0) {
2663                                 v = t4_read_reg(sc, mw->mw_base + addr -
2664                                     mw->mw_curpos);
2665                                 *val++ = le32toh(v);
2666                         } else {
2667                                 v = *val++;
2668                                 t4_write_reg(sc, mw->mw_base + addr -
2669                                     mw->mw_curpos, htole32(v));
2670                         }
2671                         addr += 4;
2672                         len -= 4;
2673                 }
2674                 rw_runlock(&mw->mw_lock);
2675         }
2676
2677         return (0);
2678 }
2679
2680 int
2681 alloc_atid_tab(struct tid_info *t, int flags)
2682 {
2683         int i;
2684
2685         MPASS(t->natids > 0);
2686         MPASS(t->atid_tab == NULL);
2687
2688         t->atid_tab = malloc(t->natids * sizeof(*t->atid_tab), M_CXGBE,
2689             M_ZERO | flags);
2690         if (t->atid_tab == NULL)
2691                 return (ENOMEM);
2692         mtx_init(&t->atid_lock, "atid lock", NULL, MTX_DEF);
2693         t->afree = t->atid_tab;
2694         t->atids_in_use = 0;
2695         for (i = 1; i < t->natids; i++)
2696                 t->atid_tab[i - 1].next = &t->atid_tab[i];
2697         t->atid_tab[t->natids - 1].next = NULL;
2698
2699         return (0);
2700 }
2701
2702 void
2703 free_atid_tab(struct tid_info *t)
2704 {
2705
2706         KASSERT(t->atids_in_use == 0,
2707             ("%s: %d atids still in use.", __func__, t->atids_in_use));
2708
2709         if (mtx_initialized(&t->atid_lock))
2710                 mtx_destroy(&t->atid_lock);
2711         free(t->atid_tab, M_CXGBE);
2712         t->atid_tab = NULL;
2713 }
2714
2715 int
2716 alloc_atid(struct adapter *sc, void *ctx)
2717 {
2718         struct tid_info *t = &sc->tids;
2719         int atid = -1;
2720
2721         mtx_lock(&t->atid_lock);
2722         if (t->afree) {
2723                 union aopen_entry *p = t->afree;
2724
2725                 atid = p - t->atid_tab;
2726                 MPASS(atid <= M_TID_TID);
2727                 t->afree = p->next;
2728                 p->data = ctx;
2729                 t->atids_in_use++;
2730         }
2731         mtx_unlock(&t->atid_lock);
2732         return (atid);
2733 }
2734
2735 void *
2736 lookup_atid(struct adapter *sc, int atid)
2737 {
2738         struct tid_info *t = &sc->tids;
2739
2740         return (t->atid_tab[atid].data);
2741 }
2742
2743 void
2744 free_atid(struct adapter *sc, int atid)
2745 {
2746         struct tid_info *t = &sc->tids;
2747         union aopen_entry *p = &t->atid_tab[atid];
2748
2749         mtx_lock(&t->atid_lock);
2750         p->next = t->afree;
2751         t->afree = p;
2752         t->atids_in_use--;
2753         mtx_unlock(&t->atid_lock);
2754 }
2755
2756 static void
2757 queue_tid_release(struct adapter *sc, int tid)
2758 {
2759
2760         CXGBE_UNIMPLEMENTED("deferred tid release");
2761 }
2762
2763 void
2764 release_tid(struct adapter *sc, int tid, struct sge_wrq *ctrlq)
2765 {
2766         struct wrqe *wr;
2767         struct cpl_tid_release *req;
2768
2769         wr = alloc_wrqe(sizeof(*req), ctrlq);
2770         if (wr == NULL) {
2771                 queue_tid_release(sc, tid);     /* defer */
2772                 return;
2773         }
2774         req = wrtod(wr);
2775
2776         INIT_TP_WR_MIT_CPL(req, CPL_TID_RELEASE, tid);
2777
2778         t4_wrq_tx(sc, wr);
2779 }
2780
2781 static int
2782 t4_range_cmp(const void *a, const void *b)
2783 {
2784         return ((const struct t4_range *)a)->start -
2785                ((const struct t4_range *)b)->start;
2786 }
2787
2788 /*
2789  * Verify that the memory range specified by the addr/len pair is valid within
2790  * the card's address space.
2791  */
2792 static int
2793 validate_mem_range(struct adapter *sc, uint32_t addr, int len)
2794 {
2795         struct t4_range mem_ranges[4], *r, *next;
2796         uint32_t em, addr_len;
2797         int i, n, remaining;
2798
2799         /* Memory can only be accessed in naturally aligned 4 byte units */
2800         if (addr & 3 || len & 3 || len <= 0)
2801                 return (EINVAL);
2802
2803         /* Enabled memories */
2804         em = t4_read_reg(sc, A_MA_TARGET_MEM_ENABLE);
2805
2806         r = &mem_ranges[0];
2807         n = 0;
2808         bzero(r, sizeof(mem_ranges));
2809         if (em & F_EDRAM0_ENABLE) {
2810                 addr_len = t4_read_reg(sc, A_MA_EDRAM0_BAR);
2811                 r->size = G_EDRAM0_SIZE(addr_len) << 20;
2812                 if (r->size > 0) {
2813                         r->start = G_EDRAM0_BASE(addr_len) << 20;
2814                         if (addr >= r->start &&
2815                             addr + len <= r->start + r->size)
2816                                 return (0);
2817                         r++;
2818                         n++;
2819                 }
2820         }
2821         if (em & F_EDRAM1_ENABLE) {
2822                 addr_len = t4_read_reg(sc, A_MA_EDRAM1_BAR);
2823                 r->size = G_EDRAM1_SIZE(addr_len) << 20;
2824                 if (r->size > 0) {
2825                         r->start = G_EDRAM1_BASE(addr_len) << 20;
2826                         if (addr >= r->start &&
2827                             addr + len <= r->start + r->size)
2828                                 return (0);
2829                         r++;
2830                         n++;
2831                 }
2832         }
2833         if (em & F_EXT_MEM_ENABLE) {
2834                 addr_len = t4_read_reg(sc, A_MA_EXT_MEMORY_BAR);
2835                 r->size = G_EXT_MEM_SIZE(addr_len) << 20;
2836                 if (r->size > 0) {
2837                         r->start = G_EXT_MEM_BASE(addr_len) << 20;
2838                         if (addr >= r->start &&
2839                             addr + len <= r->start + r->size)
2840                                 return (0);
2841                         r++;
2842                         n++;
2843                 }
2844         }
2845         if (is_t5(sc) && em & F_EXT_MEM1_ENABLE) {
2846                 addr_len = t4_read_reg(sc, A_MA_EXT_MEMORY1_BAR);
2847                 r->size = G_EXT_MEM1_SIZE(addr_len) << 20;
2848                 if (r->size > 0) {
2849                         r->start = G_EXT_MEM1_BASE(addr_len) << 20;
2850                         if (addr >= r->start &&
2851                             addr + len <= r->start + r->size)
2852                                 return (0);
2853                         r++;
2854                         n++;
2855                 }
2856         }
2857         MPASS(n <= nitems(mem_ranges));
2858
2859         if (n > 1) {
2860                 /* Sort and merge the ranges. */
2861                 qsort(mem_ranges, n, sizeof(struct t4_range), t4_range_cmp);
2862
2863                 /* Start from index 0 and examine the next n - 1 entries. */
2864                 r = &mem_ranges[0];
2865                 for (remaining = n - 1; remaining > 0; remaining--, r++) {
2866
2867                         MPASS(r->size > 0);     /* r is a valid entry. */
2868                         next = r + 1;
2869                         MPASS(next->size > 0);  /* and so is the next one. */
2870
2871                         while (r->start + r->size >= next->start) {
2872                                 /* Merge the next one into the current entry. */
2873                                 r->size = max(r->start + r->size,
2874                                     next->start + next->size) - r->start;
2875                                 n--;    /* One fewer entry in total. */
2876                                 if (--remaining == 0)
2877                                         goto done;      /* short circuit */
2878                                 next++;
2879                         }
2880                         if (next != r + 1) {
2881                                 /*
2882                                  * Some entries were merged into r and next
2883                                  * points to the first valid entry that couldn't
2884                                  * be merged.
2885                                  */
2886                                 MPASS(next->size > 0);  /* must be valid */
2887                                 memcpy(r + 1, next, remaining * sizeof(*r));
2888 #ifdef INVARIANTS
2889                                 /*
2890                                  * This so that the foo->size assertion in the
2891                                  * next iteration of the loop do the right
2892                                  * thing for entries that were pulled up and are
2893                                  * no longer valid.
2894                                  */
2895                                 MPASS(n < nitems(mem_ranges));
2896                                 bzero(&mem_ranges[n], (nitems(mem_ranges) - n) *
2897                                     sizeof(struct t4_range));
2898 #endif
2899                         }
2900                 }
2901 done:
2902                 /* Done merging the ranges. */
2903                 MPASS(n > 0);
2904                 r = &mem_ranges[0];
2905                 for (i = 0; i < n; i++, r++) {
2906                         if (addr >= r->start &&
2907                             addr + len <= r->start + r->size)
2908                                 return (0);
2909                 }
2910         }
2911
2912         return (EFAULT);
2913 }
2914
2915 static int
2916 fwmtype_to_hwmtype(int mtype)
2917 {
2918
2919         switch (mtype) {
2920         case FW_MEMTYPE_EDC0:
2921                 return (MEM_EDC0);
2922         case FW_MEMTYPE_EDC1:
2923                 return (MEM_EDC1);
2924         case FW_MEMTYPE_EXTMEM:
2925                 return (MEM_MC0);
2926         case FW_MEMTYPE_EXTMEM1:
2927                 return (MEM_MC1);
2928         default:
2929                 panic("%s: cannot translate fw mtype %d.", __func__, mtype);
2930         }
2931 }
2932
2933 /*
2934  * Verify that the memory range specified by the memtype/offset/len pair is
2935  * valid and lies entirely within the memtype specified.  The global address of
2936  * the start of the range is returned in addr.
2937  */
2938 static int
2939 validate_mt_off_len(struct adapter *sc, int mtype, uint32_t off, int len,
2940     uint32_t *addr)
2941 {
2942         uint32_t em, addr_len, maddr;
2943
2944         /* Memory can only be accessed in naturally aligned 4 byte units */
2945         if (off & 3 || len & 3 || len == 0)
2946                 return (EINVAL);
2947
2948         em = t4_read_reg(sc, A_MA_TARGET_MEM_ENABLE);
2949         switch (fwmtype_to_hwmtype(mtype)) {
2950         case MEM_EDC0:
2951                 if (!(em & F_EDRAM0_ENABLE))
2952                         return (EINVAL);
2953                 addr_len = t4_read_reg(sc, A_MA_EDRAM0_BAR);
2954                 maddr = G_EDRAM0_BASE(addr_len) << 20;
2955                 break;
2956         case MEM_EDC1:
2957                 if (!(em & F_EDRAM1_ENABLE))
2958                         return (EINVAL);
2959                 addr_len = t4_read_reg(sc, A_MA_EDRAM1_BAR);
2960                 maddr = G_EDRAM1_BASE(addr_len) << 20;
2961                 break;
2962         case MEM_MC:
2963                 if (!(em & F_EXT_MEM_ENABLE))
2964                         return (EINVAL);
2965                 addr_len = t4_read_reg(sc, A_MA_EXT_MEMORY_BAR);
2966                 maddr = G_EXT_MEM_BASE(addr_len) << 20;
2967                 break;
2968         case MEM_MC1:
2969                 if (!is_t5(sc) || !(em & F_EXT_MEM1_ENABLE))
2970                         return (EINVAL);
2971                 addr_len = t4_read_reg(sc, A_MA_EXT_MEMORY1_BAR);
2972                 maddr = G_EXT_MEM1_BASE(addr_len) << 20;
2973                 break;
2974         default:
2975                 return (EINVAL);
2976         }
2977
2978         *addr = maddr + off;    /* global address */
2979         return (validate_mem_range(sc, *addr, len));
2980 }
2981
2982 static int
2983 fixup_devlog_params(struct adapter *sc)
2984 {
2985         struct devlog_params *dparams = &sc->params.devlog;
2986         int rc;
2987
2988         rc = validate_mt_off_len(sc, dparams->memtype, dparams->start,
2989             dparams->size, &dparams->addr);
2990
2991         return (rc);
2992 }
2993
2994 static void
2995 update_nirq(struct intrs_and_queues *iaq, int nports)
2996 {
2997         int extra = T4_EXTRA_INTR;
2998
2999         iaq->nirq = extra;
3000         iaq->nirq += nports * (iaq->nrxq + iaq->nofldrxq);
3001         iaq->nirq += nports * (iaq->num_vis - 1) *
3002             max(iaq->nrxq_vi, iaq->nnmrxq_vi);
3003         iaq->nirq += nports * (iaq->num_vis - 1) * iaq->nofldrxq_vi;
3004 }
3005
3006 /*
3007  * Adjust requirements to fit the number of interrupts available.
3008  */
3009 static void
3010 calculate_iaq(struct adapter *sc, struct intrs_and_queues *iaq, int itype,
3011     int navail)
3012 {
3013         int old_nirq;
3014         const int nports = sc->params.nports;
3015
3016         MPASS(nports > 0);
3017         MPASS(navail > 0);
3018
3019         bzero(iaq, sizeof(*iaq));
3020         iaq->intr_type = itype;
3021         iaq->num_vis = t4_num_vis;
3022         iaq->ntxq = t4_ntxq;
3023         iaq->ntxq_vi = t4_ntxq_vi;
3024         iaq->nrxq = t4_nrxq;
3025         iaq->nrxq_vi = t4_nrxq_vi;
3026 #if defined(TCP_OFFLOAD) || defined(RATELIMIT)
3027         if (is_offload(sc) || is_ethoffload(sc)) {
3028                 iaq->nofldtxq = t4_nofldtxq;
3029                 iaq->nofldtxq_vi = t4_nofldtxq_vi;
3030         }
3031 #endif
3032 #ifdef TCP_OFFLOAD
3033         if (is_offload(sc)) {
3034                 iaq->nofldrxq = t4_nofldrxq;
3035                 iaq->nofldrxq_vi = t4_nofldrxq_vi;
3036         }
3037 #endif
3038 #ifdef DEV_NETMAP
3039         iaq->nnmtxq_vi = t4_nnmtxq_vi;
3040         iaq->nnmrxq_vi = t4_nnmrxq_vi;
3041 #endif
3042
3043         update_nirq(iaq, nports);
3044         if (iaq->nirq <= navail &&
3045             (itype != INTR_MSI || powerof2(iaq->nirq))) {
3046                 /*
3047                  * This is the normal case -- there are enough interrupts for
3048                  * everything.
3049                  */
3050                 goto done;
3051         }
3052
3053         /*
3054          * If extra VIs have been configured try reducing their count and see if
3055          * that works.
3056          */
3057         while (iaq->num_vis > 1) {
3058                 iaq->num_vis--;
3059                 update_nirq(iaq, nports);
3060                 if (iaq->nirq <= navail &&
3061                     (itype != INTR_MSI || powerof2(iaq->nirq))) {
3062                         device_printf(sc->dev, "virtual interfaces per port "
3063                             "reduced to %d from %d.  nrxq=%u, nofldrxq=%u, "
3064                             "nrxq_vi=%u nofldrxq_vi=%u, nnmrxq_vi=%u.  "
3065                             "itype %d, navail %u, nirq %d.\n",
3066                             iaq->num_vis, t4_num_vis, iaq->nrxq, iaq->nofldrxq,
3067                             iaq->nrxq_vi, iaq->nofldrxq_vi, iaq->nnmrxq_vi,
3068                             itype, navail, iaq->nirq);
3069                         goto done;
3070                 }
3071         }
3072
3073         /*
3074          * Extra VIs will not be created.  Log a message if they were requested.
3075          */
3076         MPASS(iaq->num_vis == 1);
3077         iaq->ntxq_vi = iaq->nrxq_vi = 0;
3078         iaq->nofldtxq_vi = iaq->nofldrxq_vi = 0;
3079         iaq->nnmtxq_vi = iaq->nnmrxq_vi = 0;
3080         if (iaq->num_vis != t4_num_vis) {
3081                 device_printf(sc->dev, "extra virtual interfaces disabled.  "
3082                     "nrxq=%u, nofldrxq=%u, nrxq_vi=%u nofldrxq_vi=%u, "
3083                     "nnmrxq_vi=%u.  itype %d, navail %u, nirq %d.\n",
3084                     iaq->nrxq, iaq->nofldrxq, iaq->nrxq_vi, iaq->nofldrxq_vi,
3085                     iaq->nnmrxq_vi, itype, navail, iaq->nirq);
3086         }
3087
3088         /*
3089          * Keep reducing the number of NIC rx queues to the next lower power of
3090          * 2 (for even RSS distribution) and halving the TOE rx queues and see
3091          * if that works.
3092          */
3093         do {
3094                 if (iaq->nrxq > 1) {
3095                         do {
3096                                 iaq->nrxq--;
3097                         } while (!powerof2(iaq->nrxq));
3098                 }
3099                 if (iaq->nofldrxq > 1)
3100                         iaq->nofldrxq >>= 1;
3101
3102                 old_nirq = iaq->nirq;
3103                 update_nirq(iaq, nports);
3104                 if (iaq->nirq <= navail &&
3105                     (itype != INTR_MSI || powerof2(iaq->nirq))) {
3106                         device_printf(sc->dev, "running with reduced number of "
3107                             "rx queues because of shortage of interrupts.  "
3108                             "nrxq=%u, nofldrxq=%u.  "
3109                             "itype %d, navail %u, nirq %d.\n", iaq->nrxq,
3110                             iaq->nofldrxq, itype, navail, iaq->nirq);
3111                         goto done;
3112                 }
3113         } while (old_nirq != iaq->nirq);
3114
3115         /* One interrupt for everything.  Ugh. */
3116         device_printf(sc->dev, "running with minimal number of queues.  "
3117             "itype %d, navail %u.\n", itype, navail);
3118         iaq->nirq = 1;
3119         MPASS(iaq->nrxq == 1);
3120         iaq->ntxq = 1;
3121         if (iaq->nofldrxq > 1)
3122                 iaq->nofldtxq = 1;
3123 done:
3124         MPASS(iaq->num_vis > 0);
3125         if (iaq->num_vis > 1) {
3126                 MPASS(iaq->nrxq_vi > 0);
3127                 MPASS(iaq->ntxq_vi > 0);
3128         }
3129         MPASS(iaq->nirq > 0);
3130         MPASS(iaq->nrxq > 0);
3131         MPASS(iaq->ntxq > 0);
3132         if (itype == INTR_MSI) {
3133                 MPASS(powerof2(iaq->nirq));
3134         }
3135 }
3136
3137 static int
3138 cfg_itype_and_nqueues(struct adapter *sc, struct intrs_and_queues *iaq)
3139 {
3140         int rc, itype, navail, nalloc;
3141
3142         for (itype = INTR_MSIX; itype; itype >>= 1) {
3143
3144                 if ((itype & t4_intr_types) == 0)
3145                         continue;       /* not allowed */
3146
3147                 if (itype == INTR_MSIX)
3148                         navail = pci_msix_count(sc->dev);
3149                 else if (itype == INTR_MSI)
3150                         navail = pci_msi_count(sc->dev);
3151                 else
3152                         navail = 1;
3153 restart:
3154                 if (navail == 0)
3155                         continue;
3156
3157                 calculate_iaq(sc, iaq, itype, navail);
3158                 nalloc = iaq->nirq;
3159                 rc = 0;
3160                 if (itype == INTR_MSIX)
3161                         rc = pci_alloc_msix(sc->dev, &nalloc);
3162                 else if (itype == INTR_MSI)
3163                         rc = pci_alloc_msi(sc->dev, &nalloc);
3164
3165                 if (rc == 0 && nalloc > 0) {
3166                         if (nalloc == iaq->nirq)
3167                                 return (0);
3168
3169                         /*
3170                          * Didn't get the number requested.  Use whatever number
3171                          * the kernel is willing to allocate.
3172                          */
3173                         device_printf(sc->dev, "fewer vectors than requested, "
3174                             "type=%d, req=%d, rcvd=%d; will downshift req.\n",
3175                             itype, iaq->nirq, nalloc);
3176                         pci_release_msi(sc->dev);
3177                         navail = nalloc;
3178                         goto restart;
3179                 }
3180
3181                 device_printf(sc->dev,
3182                     "failed to allocate vectors:%d, type=%d, req=%d, rcvd=%d\n",
3183                     itype, rc, iaq->nirq, nalloc);
3184         }
3185
3186         device_printf(sc->dev,
3187             "failed to find a usable interrupt type.  "
3188             "allowed=%d, msi-x=%d, msi=%d, intx=1", t4_intr_types,
3189             pci_msix_count(sc->dev), pci_msi_count(sc->dev));
3190
3191         return (ENXIO);
3192 }
3193
3194 #define FW_VERSION(chip) ( \
3195     V_FW_HDR_FW_VER_MAJOR(chip##FW_VERSION_MAJOR) | \
3196     V_FW_HDR_FW_VER_MINOR(chip##FW_VERSION_MINOR) | \
3197     V_FW_HDR_FW_VER_MICRO(chip##FW_VERSION_MICRO) | \
3198     V_FW_HDR_FW_VER_BUILD(chip##FW_VERSION_BUILD))
3199 #define FW_INTFVER(chip, intf) (chip##FW_HDR_INTFVER_##intf)
3200
3201 struct fw_info {
3202         uint8_t chip;
3203         char *kld_name;
3204         char *fw_mod_name;
3205         struct fw_hdr fw_hdr;   /* XXX: waste of space, need a sparse struct */
3206 } fw_info[] = {
3207         {
3208                 .chip = CHELSIO_T4,
3209                 .kld_name = "t4fw_cfg",
3210                 .fw_mod_name = "t4fw",
3211                 .fw_hdr = {
3212                         .chip = FW_HDR_CHIP_T4,
3213                         .fw_ver = htobe32(FW_VERSION(T4)),
3214                         .intfver_nic = FW_INTFVER(T4, NIC),
3215                         .intfver_vnic = FW_INTFVER(T4, VNIC),
3216                         .intfver_ofld = FW_INTFVER(T4, OFLD),
3217                         .intfver_ri = FW_INTFVER(T4, RI),
3218                         .intfver_iscsipdu = FW_INTFVER(T4, ISCSIPDU),
3219                         .intfver_iscsi = FW_INTFVER(T4, ISCSI),
3220                         .intfver_fcoepdu = FW_INTFVER(T4, FCOEPDU),
3221                         .intfver_fcoe = FW_INTFVER(T4, FCOE),
3222                 },
3223         }, {
3224                 .chip = CHELSIO_T5,
3225                 .kld_name = "t5fw_cfg",
3226                 .fw_mod_name = "t5fw",
3227                 .fw_hdr = {
3228                         .chip = FW_HDR_CHIP_T5,
3229                         .fw_ver = htobe32(FW_VERSION(T5)),
3230                         .intfver_nic = FW_INTFVER(T5, NIC),
3231                         .intfver_vnic = FW_INTFVER(T5, VNIC),
3232                         .intfver_ofld = FW_INTFVER(T5, OFLD),
3233                         .intfver_ri = FW_INTFVER(T5, RI),
3234                         .intfver_iscsipdu = FW_INTFVER(T5, ISCSIPDU),
3235                         .intfver_iscsi = FW_INTFVER(T5, ISCSI),
3236                         .intfver_fcoepdu = FW_INTFVER(T5, FCOEPDU),
3237                         .intfver_fcoe = FW_INTFVER(T5, FCOE),
3238                 },
3239         }, {
3240                 .chip = CHELSIO_T6,
3241                 .kld_name = "t6fw_cfg",
3242                 .fw_mod_name = "t6fw",
3243                 .fw_hdr = {
3244                         .chip = FW_HDR_CHIP_T6,
3245                         .fw_ver = htobe32(FW_VERSION(T6)),
3246                         .intfver_nic = FW_INTFVER(T6, NIC),
3247                         .intfver_vnic = FW_INTFVER(T6, VNIC),
3248                         .intfver_ofld = FW_INTFVER(T6, OFLD),
3249                         .intfver_ri = FW_INTFVER(T6, RI),
3250                         .intfver_iscsipdu = FW_INTFVER(T6, ISCSIPDU),
3251                         .intfver_iscsi = FW_INTFVER(T6, ISCSI),
3252                         .intfver_fcoepdu = FW_INTFVER(T6, FCOEPDU),
3253                         .intfver_fcoe = FW_INTFVER(T6, FCOE),
3254                 },
3255         }
3256 };
3257
3258 static struct fw_info *
3259 find_fw_info(int chip)
3260 {
3261         int i;
3262
3263         for (i = 0; i < nitems(fw_info); i++) {
3264                 if (fw_info[i].chip == chip)
3265                         return (&fw_info[i]);
3266         }
3267         return (NULL);
3268 }
3269
3270 /*
3271  * Is the given firmware API compatible with the one the driver was compiled
3272  * with?
3273  */
3274 static int
3275 fw_compatible(const struct fw_hdr *hdr1, const struct fw_hdr *hdr2)
3276 {
3277
3278         /* short circuit if it's the exact same firmware version */
3279         if (hdr1->chip == hdr2->chip && hdr1->fw_ver == hdr2->fw_ver)
3280                 return (1);
3281
3282         /*
3283          * XXX: Is this too conservative?  Perhaps I should limit this to the
3284          * features that are supported in the driver.
3285          */
3286 #define SAME_INTF(x) (hdr1->intfver_##x == hdr2->intfver_##x)
3287         if (hdr1->chip == hdr2->chip && SAME_INTF(nic) && SAME_INTF(vnic) &&
3288             SAME_INTF(ofld) && SAME_INTF(ri) && SAME_INTF(iscsipdu) &&
3289             SAME_INTF(iscsi) && SAME_INTF(fcoepdu) && SAME_INTF(fcoe))
3290                 return (1);
3291 #undef SAME_INTF
3292
3293         return (0);
3294 }
3295
3296 /*
3297  * The firmware in the KLD is usable, but should it be installed?  This routine
3298  * explains itself in detail if it indicates the KLD firmware should be
3299  * installed.
3300  */
3301 static int
3302 should_install_kld_fw(struct adapter *sc, int card_fw_usable, int k, int c)
3303 {
3304         const char *reason;
3305
3306         if (!card_fw_usable) {
3307                 reason = "incompatible or unusable";
3308                 goto install;
3309         }
3310
3311         if (k > c) {
3312                 reason = "older than the version bundled with this driver";
3313                 goto install;
3314         }
3315
3316         if (t4_fw_install == 2 && k != c) {
3317                 reason = "different than the version bundled with this driver";
3318                 goto install;
3319         }
3320
3321         return (0);
3322
3323 install:
3324         if (t4_fw_install == 0) {
3325                 device_printf(sc->dev, "firmware on card (%u.%u.%u.%u) is %s, "
3326                     "but the driver is prohibited from installing a different "
3327                     "firmware on the card.\n",
3328                     G_FW_HDR_FW_VER_MAJOR(c), G_FW_HDR_FW_VER_MINOR(c),
3329                     G_FW_HDR_FW_VER_MICRO(c), G_FW_HDR_FW_VER_BUILD(c), reason);
3330
3331                 return (0);
3332         }
3333
3334         device_printf(sc->dev, "firmware on card (%u.%u.%u.%u) is %s, "
3335             "installing firmware %u.%u.%u.%u on card.\n",
3336             G_FW_HDR_FW_VER_MAJOR(c), G_FW_HDR_FW_VER_MINOR(c),
3337             G_FW_HDR_FW_VER_MICRO(c), G_FW_HDR_FW_VER_BUILD(c), reason,
3338             G_FW_HDR_FW_VER_MAJOR(k), G_FW_HDR_FW_VER_MINOR(k),
3339             G_FW_HDR_FW_VER_MICRO(k), G_FW_HDR_FW_VER_BUILD(k));
3340
3341         return (1);
3342 }
3343
3344 /*
3345  * Establish contact with the firmware and determine if we are the master driver
3346  * or not, and whether we are responsible for chip initialization.
3347  */
3348 static int
3349 prep_firmware(struct adapter *sc)
3350 {
3351         const struct firmware *fw = NULL, *default_cfg;
3352         int rc, pf, card_fw_usable, kld_fw_usable, need_fw_reset = 1;
3353         enum dev_state state;
3354         struct fw_info *fw_info;
3355         struct fw_hdr *card_fw;         /* fw on the card */
3356         const struct fw_hdr *kld_fw;    /* fw in the KLD */
3357         const struct fw_hdr *drv_fw;    /* fw header the driver was compiled
3358                                            against */
3359
3360         /* This is the firmware whose headers the driver was compiled against */
3361         fw_info = find_fw_info(chip_id(sc));
3362         if (fw_info == NULL) {
3363                 device_printf(sc->dev,
3364                     "unable to look up firmware information for chip %d.\n",
3365                     chip_id(sc));
3366                 return (EINVAL);
3367         }
3368         drv_fw = &fw_info->fw_hdr;
3369
3370         /*
3371          * The firmware KLD contains many modules.  The KLD name is also the
3372          * name of the module that contains the default config file.
3373          */
3374         default_cfg = firmware_get(fw_info->kld_name);
3375
3376         /* This is the firmware in the KLD */
3377         fw = firmware_get(fw_info->fw_mod_name);
3378         if (fw != NULL) {
3379                 kld_fw = (const void *)fw->data;
3380                 kld_fw_usable = fw_compatible(drv_fw, kld_fw);
3381         } else {
3382                 kld_fw = NULL;
3383                 kld_fw_usable = 0;
3384         }
3385
3386         /* Read the header of the firmware on the card */
3387         card_fw = malloc(sizeof(*card_fw), M_CXGBE, M_ZERO | M_WAITOK);
3388         rc = -t4_read_flash(sc, FLASH_FW_START,
3389             sizeof (*card_fw) / sizeof (uint32_t), (uint32_t *)card_fw, 1);
3390         if (rc == 0) {
3391                 card_fw_usable = fw_compatible(drv_fw, (const void*)card_fw);
3392                 if (card_fw->fw_ver == be32toh(0xffffffff)) {
3393                         uint32_t d = be32toh(kld_fw->fw_ver);
3394
3395                         if (!kld_fw_usable) {
3396                                 device_printf(sc->dev,
3397                                     "no firmware on the card and no usable "
3398                                     "firmware bundled with the driver.\n");
3399                                 rc = EIO;
3400                                 goto done;
3401                         } else if (t4_fw_install == 0) {
3402                                 device_printf(sc->dev,
3403                                     "no firmware on the card and the driver "
3404                                     "is prohibited from installing new "
3405                                     "firmware.\n");
3406                                 rc = EIO;
3407                                 goto done;
3408                         }
3409
3410                         device_printf(sc->dev, "no firmware on the card, "
3411                             "installing firmware %d.%d.%d.%d\n",
3412                             G_FW_HDR_FW_VER_MAJOR(d), G_FW_HDR_FW_VER_MINOR(d),
3413                             G_FW_HDR_FW_VER_MICRO(d), G_FW_HDR_FW_VER_BUILD(d));
3414                         rc = t4_fw_forceinstall(sc, fw->data, fw->datasize);
3415                         if (rc < 0) {
3416                                 rc = -rc;
3417                                 device_printf(sc->dev,
3418                                     "firmware install failed: %d.\n", rc);
3419                                 goto done;
3420                         }
3421                         memcpy(card_fw, kld_fw, sizeof(*card_fw));
3422                         card_fw_usable = 1;
3423                         need_fw_reset = 0;
3424                 }
3425         } else {
3426                 device_printf(sc->dev,
3427                     "Unable to read card's firmware header: %d\n", rc);
3428                 card_fw_usable = 0;
3429         }
3430
3431         /* Contact firmware. */
3432         rc = t4_fw_hello(sc, sc->mbox, sc->mbox, MASTER_MAY, &state);
3433         if (rc < 0 || state == DEV_STATE_ERR) {
3434                 rc = -rc;
3435                 device_printf(sc->dev,
3436                     "failed to connect to the firmware: %d, %d.\n", rc, state);
3437                 goto done;
3438         }
3439         pf = rc;
3440         if (pf == sc->mbox)
3441                 sc->flags |= MASTER_PF;
3442         else if (state == DEV_STATE_UNINIT) {
3443                 /*
3444                  * We didn't get to be the master so we definitely won't be
3445                  * configuring the chip.  It's a bug if someone else hasn't
3446                  * configured it already.
3447                  */
3448                 device_printf(sc->dev, "couldn't be master(%d), "
3449                     "device not already initialized either(%d).\n", rc, state);
3450                 rc = EPROTO;
3451                 goto done;
3452         }
3453
3454         if (card_fw_usable && card_fw->fw_ver == drv_fw->fw_ver &&
3455             (!kld_fw_usable || kld_fw->fw_ver == drv_fw->fw_ver)) {
3456                 /*
3457                  * Common case: the firmware on the card is an exact match and
3458                  * the KLD is an exact match too, or the KLD is
3459                  * absent/incompatible.  Note that t4_fw_install = 2 is ignored
3460                  * here -- use cxgbetool loadfw if you want to reinstall the
3461                  * same firmware as the one on the card.
3462                  */
3463         } else if (kld_fw_usable && state == DEV_STATE_UNINIT &&
3464             should_install_kld_fw(sc, card_fw_usable, be32toh(kld_fw->fw_ver),
3465             be32toh(card_fw->fw_ver))) {
3466
3467                 rc = -t4_fw_upgrade(sc, sc->mbox, fw->data, fw->datasize, 0);
3468                 if (rc != 0) {
3469                         device_printf(sc->dev,
3470                             "failed to install firmware: %d\n", rc);
3471                         goto done;
3472                 }
3473
3474                 /* Installed successfully, update the cached header too. */
3475                 memcpy(card_fw, kld_fw, sizeof(*card_fw));
3476                 card_fw_usable = 1;
3477                 need_fw_reset = 0;      /* already reset as part of load_fw */
3478         }
3479
3480         if (!card_fw_usable) {
3481                 uint32_t d, c, k;
3482
3483                 d = ntohl(drv_fw->fw_ver);
3484                 c = ntohl(card_fw->fw_ver);
3485                 k = kld_fw ? ntohl(kld_fw->fw_ver) : 0;
3486
3487                 device_printf(sc->dev, "Cannot find a usable firmware: "
3488                     "fw_install %d, chip state %d, "
3489                     "driver compiled with %d.%d.%d.%d, "
3490                     "card has %d.%d.%d.%d, KLD has %d.%d.%d.%d\n",
3491                     t4_fw_install, state,
3492                     G_FW_HDR_FW_VER_MAJOR(d), G_FW_HDR_FW_VER_MINOR(d),
3493                     G_FW_HDR_FW_VER_MICRO(d), G_FW_HDR_FW_VER_BUILD(d),
3494                     G_FW_HDR_FW_VER_MAJOR(c), G_FW_HDR_FW_VER_MINOR(c),
3495                     G_FW_HDR_FW_VER_MICRO(c), G_FW_HDR_FW_VER_BUILD(c),
3496                     G_FW_HDR_FW_VER_MAJOR(k), G_FW_HDR_FW_VER_MINOR(k),
3497                     G_FW_HDR_FW_VER_MICRO(k), G_FW_HDR_FW_VER_BUILD(k));
3498                 rc = EINVAL;
3499                 goto done;
3500         }
3501
3502         /* Reset device */
3503         if (need_fw_reset &&
3504             (rc = -t4_fw_reset(sc, sc->mbox, F_PIORSTMODE | F_PIORST)) != 0) {
3505                 device_printf(sc->dev, "firmware reset failed: %d.\n", rc);
3506                 if (rc != ETIMEDOUT && rc != EIO)
3507                         t4_fw_bye(sc, sc->mbox);
3508                 goto done;
3509         }
3510         sc->flags |= FW_OK;
3511
3512         rc = get_params__pre_init(sc);
3513         if (rc != 0)
3514                 goto done; /* error message displayed already */
3515
3516         /* Partition adapter resources as specified in the config file. */
3517         if (state == DEV_STATE_UNINIT) {
3518
3519                 KASSERT(sc->flags & MASTER_PF,
3520                     ("%s: trying to change chip settings when not master.",
3521                     __func__));
3522
3523                 rc = partition_resources(sc, default_cfg, fw_info->kld_name);
3524                 if (rc != 0)
3525                         goto done;      /* error message displayed already */
3526
3527                 t4_tweak_chip_settings(sc);
3528
3529                 /* get basic stuff going */
3530                 rc = -t4_fw_initialize(sc, sc->mbox);
3531                 if (rc != 0) {
3532                         device_printf(sc->dev, "fw init failed: %d.\n", rc);
3533                         goto done;
3534                 }
3535         } else {
3536                 snprintf(sc->cfg_file, sizeof(sc->cfg_file), "pf%d", pf);
3537                 sc->cfcsum = 0;
3538         }
3539
3540 done:
3541         free(card_fw, M_CXGBE);
3542         if (fw != NULL)
3543                 firmware_put(fw, FIRMWARE_UNLOAD);
3544         if (default_cfg != NULL)
3545                 firmware_put(default_cfg, FIRMWARE_UNLOAD);
3546
3547         return (rc);
3548 }
3549
3550 #define FW_PARAM_DEV(param) \
3551         (V_FW_PARAMS_MNEM(FW_PARAMS_MNEM_DEV) | \
3552          V_FW_PARAMS_PARAM_X(FW_PARAMS_PARAM_DEV_##param))
3553 #define FW_PARAM_PFVF(param) \
3554         (V_FW_PARAMS_MNEM(FW_PARAMS_MNEM_PFVF) | \
3555          V_FW_PARAMS_PARAM_X(FW_PARAMS_PARAM_PFVF_##param))
3556
3557 /*
3558  * Partition chip resources for use between various PFs, VFs, etc.
3559  */
3560 static int
3561 partition_resources(struct adapter *sc, const struct firmware *default_cfg,
3562     const char *name_prefix)
3563 {
3564         const struct firmware *cfg = NULL;
3565         int rc = 0;
3566         struct fw_caps_config_cmd caps;
3567         uint32_t mtype, moff, finicsum, cfcsum;
3568
3569         /*
3570          * Figure out what configuration file to use.  Pick the default config
3571          * file for the card if the user hasn't specified one explicitly.
3572          */
3573         snprintf(sc->cfg_file, sizeof(sc->cfg_file), "%s", t4_cfg_file);
3574         if (strncmp(t4_cfg_file, DEFAULT_CF, sizeof(t4_cfg_file)) == 0) {
3575                 /* Card specific overrides go here. */
3576                 if (pci_get_device(sc->dev) == 0x440a)
3577                         snprintf(sc->cfg_file, sizeof(sc->cfg_file), UWIRE_CF);
3578                 if (is_fpga(sc))
3579                         snprintf(sc->cfg_file, sizeof(sc->cfg_file), FPGA_CF);
3580         } else if (strncmp(t4_cfg_file, BUILTIN_CF, sizeof(t4_cfg_file)) == 0)
3581                 goto use_built_in_config;       /* go straight to config. */
3582
3583         /*
3584          * We need to load another module if the profile is anything except
3585          * "default" or "flash".
3586          */
3587         if (strncmp(sc->cfg_file, DEFAULT_CF, sizeof(sc->cfg_file)) != 0 &&
3588             strncmp(sc->cfg_file, FLASH_CF, sizeof(sc->cfg_file)) != 0) {
3589                 char s[32];
3590
3591                 snprintf(s, sizeof(s), "%s_%s", name_prefix, sc->cfg_file);
3592                 cfg = firmware_get(s);
3593                 if (cfg == NULL) {
3594                         if (default_cfg != NULL) {
3595                                 device_printf(sc->dev,
3596                                     "unable to load module \"%s\" for "
3597                                     "configuration profile \"%s\", will use "
3598                                     "the default config file instead.\n",
3599                                     s, sc->cfg_file);
3600                                 snprintf(sc->cfg_file, sizeof(sc->cfg_file),
3601                                     "%s", DEFAULT_CF);
3602                         } else {
3603                                 device_printf(sc->dev,
3604                                     "unable to load module \"%s\" for "
3605                                     "configuration profile \"%s\", will use "
3606                                     "the config file on the card's flash "
3607                                     "instead.\n", s, sc->cfg_file);
3608                                 snprintf(sc->cfg_file, sizeof(sc->cfg_file),
3609                                     "%s", FLASH_CF);
3610                         }
3611                 }
3612         }
3613
3614         if (strncmp(sc->cfg_file, DEFAULT_CF, sizeof(sc->cfg_file)) == 0 &&
3615             default_cfg == NULL) {
3616                 device_printf(sc->dev,
3617                     "default config file not available, will use the config "
3618                     "file on the card's flash instead.\n");
3619                 snprintf(sc->cfg_file, sizeof(sc->cfg_file), "%s", FLASH_CF);
3620         }
3621
3622         if (strncmp(sc->cfg_file, FLASH_CF, sizeof(sc->cfg_file)) != 0) {
3623                 u_int cflen;
3624                 const uint32_t *cfdata;
3625                 uint32_t param, val, addr;
3626
3627                 KASSERT(cfg != NULL || default_cfg != NULL,
3628                     ("%s: no config to upload", __func__));
3629
3630                 /*
3631                  * Ask the firmware where it wants us to upload the config file.
3632                  */
3633                 param = FW_PARAM_DEV(CF);
3634                 rc = -t4_query_params(sc, sc->mbox, sc->pf, 0, 1, &param, &val);
3635                 if (rc != 0) {
3636                         /* No support for config file?  Shouldn't happen. */
3637                         device_printf(sc->dev,
3638                             "failed to query config file location: %d.\n", rc);
3639                         goto done;
3640                 }
3641                 mtype = G_FW_PARAMS_PARAM_Y(val);
3642                 moff = G_FW_PARAMS_PARAM_Z(val) << 16;
3643
3644                 /*
3645                  * XXX: sheer laziness.  We deliberately added 4 bytes of
3646                  * useless stuffing/comments at the end of the config file so
3647                  * it's ok to simply throw away the last remaining bytes when
3648                  * the config file is not an exact multiple of 4.  This also
3649                  * helps with the validate_mt_off_len check.
3650                  */
3651                 if (cfg != NULL) {
3652                         cflen = cfg->datasize & ~3;
3653                         cfdata = cfg->data;
3654                 } else {
3655                         cflen = default_cfg->datasize & ~3;
3656                         cfdata = default_cfg->data;
3657                 }
3658
3659                 if (cflen > FLASH_CFG_MAX_SIZE) {
3660                         device_printf(sc->dev,
3661                             "config file too long (%d, max allowed is %d).  "
3662                             "Will try to use the config on the card, if any.\n",
3663                             cflen, FLASH_CFG_MAX_SIZE);
3664                         goto use_config_on_flash;
3665                 }
3666
3667                 rc = validate_mt_off_len(sc, mtype, moff, cflen, &addr);
3668                 if (rc != 0) {
3669                         device_printf(sc->dev,
3670                             "%s: addr (%d/0x%x) or len %d is not valid: %d.  "
3671                             "Will try to use the config on the card, if any.\n",
3672                             __func__, mtype, moff, cflen, rc);
3673                         goto use_config_on_flash;
3674                 }
3675                 write_via_memwin(sc, 2, addr, cfdata, cflen);
3676         } else {
3677 use_config_on_flash:
3678                 mtype = FW_MEMTYPE_FLASH;
3679                 moff = t4_flash_cfg_addr(sc);
3680         }
3681
3682         bzero(&caps, sizeof(caps));
3683         caps.op_to_write = htobe32(V_FW_CMD_OP(FW_CAPS_CONFIG_CMD) |
3684             F_FW_CMD_REQUEST | F_FW_CMD_READ);
3685         caps.cfvalid_to_len16 = htobe32(F_FW_CAPS_CONFIG_CMD_CFVALID |
3686             V_FW_CAPS_CONFIG_CMD_MEMTYPE_CF(mtype) |
3687             V_FW_CAPS_CONFIG_CMD_MEMADDR64K_CF(moff >> 16) | FW_LEN16(caps));
3688         rc = -t4_wr_mbox(sc, sc->mbox, &caps, sizeof(caps), &caps);
3689         if (rc != 0) {
3690                 device_printf(sc->dev,
3691                     "failed to pre-process config file: %d "
3692                     "(mtype %d, moff 0x%x).  Will reset the firmware and retry "
3693                     "with the built-in configuration.\n", rc, mtype, moff);
3694
3695                 rc = -t4_fw_reset(sc, sc->mbox, F_PIORSTMODE | F_PIORST);
3696                 if (rc != 0) {
3697                         device_printf(sc->dev,
3698                             "firmware reset failed: %d.\n", rc);
3699                         if (rc != ETIMEDOUT && rc != EIO) {
3700                                 t4_fw_bye(sc, sc->mbox);
3701                                 sc->flags &= ~FW_OK;
3702                         }
3703                         goto done;
3704                 }
3705                 snprintf(sc->cfg_file, sizeof(sc->cfg_file), "%s", "built-in");
3706 use_built_in_config:
3707                 bzero(&caps, sizeof(caps));
3708                 caps.op_to_write = htobe32(V_FW_CMD_OP(FW_CAPS_CONFIG_CMD) |
3709                     F_FW_CMD_REQUEST | F_FW_CMD_READ);
3710                 caps.cfvalid_to_len16 = htobe32(FW_LEN16(caps));
3711                 rc = t4_wr_mbox(sc, sc->mbox, &caps, sizeof(caps), &caps);
3712                 if (rc != 0) {
3713                         device_printf(sc->dev,
3714                             "built-in configuration failed: %d.\n", rc);
3715                         goto done;
3716                 }
3717         }
3718
3719         finicsum = be32toh(caps.finicsum);
3720         cfcsum = be32toh(caps.cfcsum);
3721         if (finicsum != cfcsum) {
3722                 device_printf(sc->dev,
3723                     "WARNING: config file checksum mismatch: %08x %08x\n",
3724                     finicsum, cfcsum);
3725         }
3726         sc->cfcsum = cfcsum;
3727
3728 #define LIMIT_CAPS(x) do { \
3729         caps.x &= htobe16(t4_##x##_allowed); \
3730 } while (0)
3731
3732         /*
3733          * Let the firmware know what features will (not) be used so it can tune
3734          * things accordingly.
3735          */
3736         LIMIT_CAPS(nbmcaps);
3737         LIMIT_CAPS(linkcaps);
3738         LIMIT_CAPS(switchcaps);
3739         LIMIT_CAPS(niccaps);
3740         LIMIT_CAPS(toecaps);
3741         LIMIT_CAPS(rdmacaps);
3742         LIMIT_CAPS(cryptocaps);
3743         LIMIT_CAPS(iscsicaps);
3744         LIMIT_CAPS(fcoecaps);
3745 #undef LIMIT_CAPS
3746
3747         if (caps.niccaps & htobe16(FW_CAPS_CONFIG_NIC_HASHFILTER)) {
3748                 /*
3749                  * TOE and hashfilters are mutually exclusive.  It is a config
3750                  * file or firmware bug if both are reported as available.  Try
3751                  * to cope with the situation in non-debug builds by disabling
3752                  * TOE.
3753                  */
3754                 MPASS(caps.toecaps == 0);
3755
3756                 caps.toecaps = 0;
3757                 caps.rdmacaps = 0;
3758                 caps.iscsicaps = 0;
3759         }
3760
3761         caps.op_to_write = htobe32(V_FW_CMD_OP(FW_CAPS_CONFIG_CMD) |
3762             F_FW_CMD_REQUEST | F_FW_CMD_WRITE);
3763         caps.cfvalid_to_len16 = htobe32(FW_LEN16(caps));
3764         rc = -t4_wr_mbox(sc, sc->mbox, &caps, sizeof(caps), NULL);
3765         if (rc != 0) {
3766                 device_printf(sc->dev,
3767                     "failed to process config file: %d.\n", rc);
3768         }
3769 done:
3770         if (cfg != NULL)
3771                 firmware_put(cfg, FIRMWARE_UNLOAD);
3772         return (rc);
3773 }
3774
3775 /*
3776  * Retrieve parameters that are needed (or nice to have) very early.
3777  */
3778 static int
3779 get_params__pre_init(struct adapter *sc)
3780 {
3781         int rc;
3782         uint32_t param[2], val[2];
3783
3784         t4_get_version_info(sc);
3785
3786         snprintf(sc->fw_version, sizeof(sc->fw_version), "%u.%u.%u.%u",
3787             G_FW_HDR_FW_VER_MAJOR(sc->params.fw_vers),
3788             G_FW_HDR_FW_VER_MINOR(sc->params.fw_vers),
3789             G_FW_HDR_FW_VER_MICRO(sc->params.fw_vers),
3790             G_FW_HDR_FW_VER_BUILD(sc->params.fw_vers));
3791
3792         snprintf(sc->bs_version, sizeof(sc->bs_version), "%u.%u.%u.%u",
3793             G_FW_HDR_FW_VER_MAJOR(sc->params.bs_vers),
3794             G_FW_HDR_FW_VER_MINOR(sc->params.bs_vers),
3795             G_FW_HDR_FW_VER_MICRO(sc->params.bs_vers),
3796             G_FW_HDR_FW_VER_BUILD(sc->params.bs_vers));
3797
3798         snprintf(sc->tp_version, sizeof(sc->tp_version), "%u.%u.%u.%u",
3799             G_FW_HDR_FW_VER_MAJOR(sc->params.tp_vers),
3800             G_FW_HDR_FW_VER_MINOR(sc->params.tp_vers),
3801             G_FW_HDR_FW_VER_MICRO(sc->params.tp_vers),
3802             G_FW_HDR_FW_VER_BUILD(sc->params.tp_vers));
3803
3804         snprintf(sc->er_version, sizeof(sc->er_version), "%u.%u.%u.%u",
3805             G_FW_HDR_FW_VER_MAJOR(sc->params.er_vers),
3806             G_FW_HDR_FW_VER_MINOR(sc->params.er_vers),
3807             G_FW_HDR_FW_VER_MICRO(sc->params.er_vers),
3808             G_FW_HDR_FW_VER_BUILD(sc->params.er_vers));
3809
3810         param[0] = FW_PARAM_DEV(PORTVEC);
3811         param[1] = FW_PARAM_DEV(CCLK);
3812         rc = -t4_query_params(sc, sc->mbox, sc->pf, 0, 2, param, val);
3813         if (rc != 0) {
3814                 device_printf(sc->dev,
3815                     "failed to query parameters (pre_init): %d.\n", rc);
3816                 return (rc);
3817         }
3818
3819         sc->params.portvec = val[0];
3820         sc->params.nports = bitcount32(val[0]);
3821         sc->params.vpd.cclk = val[1];
3822
3823         /* Read device log parameters. */
3824         rc = -t4_init_devlog_params(sc, 1);
3825         if (rc == 0)
3826                 fixup_devlog_params(sc);
3827         else {
3828                 device_printf(sc->dev,
3829                     "failed to get devlog parameters: %d.\n", rc);
3830                 rc = 0; /* devlog isn't critical for device operation */
3831         }
3832
3833         return (rc);
3834 }
3835
3836 /*
3837  * Retrieve various parameters that are of interest to the driver.  The device
3838  * has been initialized by the firmware at this point.
3839  */
3840 static int
3841 get_params__post_init(struct adapter *sc)
3842 {
3843         int rc;
3844         uint32_t param[7], val[7];
3845         struct fw_caps_config_cmd caps;
3846
3847         param[0] = FW_PARAM_PFVF(IQFLINT_START);
3848         param[1] = FW_PARAM_PFVF(EQ_START);
3849         param[2] = FW_PARAM_PFVF(FILTER_START);
3850         param[3] = FW_PARAM_PFVF(FILTER_END);
3851         param[4] = FW_PARAM_PFVF(L2T_START);
3852         param[5] = FW_PARAM_PFVF(L2T_END);
3853         param[6] = V_FW_PARAMS_MNEM(FW_PARAMS_MNEM_DEV) |
3854             V_FW_PARAMS_PARAM_X(FW_PARAMS_PARAM_DEV_DIAG) |
3855             V_FW_PARAMS_PARAM_Y(FW_PARAM_DEV_DIAG_VDD);
3856         rc = -t4_query_params(sc, sc->mbox, sc->pf, 0, 7, param, val);
3857         if (rc != 0) {
3858                 device_printf(sc->dev,
3859                     "failed to query parameters (post_init): %d.\n", rc);
3860                 return (rc);
3861         }
3862
3863         sc->sge.iq_start = val[0];
3864         sc->sge.eq_start = val[1];
3865         if ((int)val[3] > (int)val[2]) {
3866                 sc->tids.ftid_base = val[2];
3867                 sc->tids.ftid_end = val[3];
3868                 sc->tids.nftids = val[3] - val[2] + 1;
3869         }
3870         sc->vres.l2t.start = val[4];
3871         sc->vres.l2t.size = val[5] - val[4] + 1;
3872         KASSERT(sc->vres.l2t.size <= L2T_SIZE,
3873             ("%s: L2 table size (%u) larger than expected (%u)",
3874             __func__, sc->vres.l2t.size, L2T_SIZE));
3875         sc->params.core_vdd = val[6];
3876
3877         if (chip_id(sc) >= CHELSIO_T6) {
3878
3879 #ifdef INVARIANTS
3880                 if (sc->params.fw_vers >=
3881                     (V_FW_HDR_FW_VER_MAJOR(1) | V_FW_HDR_FW_VER_MINOR(20) |
3882                     V_FW_HDR_FW_VER_MICRO(1) | V_FW_HDR_FW_VER_BUILD(0))) {
3883                         /*
3884                          * Note that the code to enable the region should run
3885                          * before t4_fw_initialize and not here.  This is just a
3886                          * reminder to add said code.
3887                          */
3888                         device_printf(sc->dev,
3889                             "hpfilter region not enabled.\n");
3890                 }
3891 #endif
3892
3893                 sc->tids.tid_base = t4_read_reg(sc,
3894                     A_LE_DB_ACTIVE_TABLE_START_INDEX);
3895
3896                 param[0] = FW_PARAM_PFVF(HPFILTER_START);
3897                 param[1] = FW_PARAM_PFVF(HPFILTER_END);
3898                 rc = -t4_query_params(sc, sc->mbox, sc->pf, 0, 2, param, val);
3899                 if (rc != 0) {
3900                         device_printf(sc->dev,
3901                            "failed to query hpfilter parameters: %d.\n", rc);
3902                         return (rc);
3903                 }
3904                 if ((int)val[1] > (int)val[0]) {
3905                         sc->tids.hpftid_base = val[0];
3906                         sc->tids.hpftid_end = val[1];
3907                         sc->tids.nhpftids = val[1] - val[0] + 1;
3908
3909                         /*
3910                          * These should go off if the layout changes and the
3911                          * driver needs to catch up.
3912                          */
3913                         MPASS(sc->tids.hpftid_base == 0);
3914                         MPASS(sc->tids.tid_base == sc->tids.nhpftids);
3915                 }
3916         }
3917
3918         /*
3919          * MPSBGMAP is queried separately because only recent firmwares support
3920          * it as a parameter and we don't want the compound query above to fail
3921          * on older firmwares.
3922          */
3923         param[0] = FW_PARAM_DEV(MPSBGMAP);
3924         val[0] = 0;
3925         rc = -t4_query_params(sc, sc->mbox, sc->pf, 0, 1, param, val);
3926         if (rc == 0)
3927                 sc->params.mps_bg_map = val[0];
3928         else
3929                 sc->params.mps_bg_map = 0;
3930
3931         /*
3932          * Determine whether the firmware supports the filter2 work request.
3933          * This is queried separately for the same reason as MPSBGMAP above.
3934          */
3935         param[0] = FW_PARAM_DEV(FILTER2_WR);
3936         val[0] = 0;
3937         rc = -t4_query_params(sc, sc->mbox, sc->pf, 0, 1, param, val);
3938         if (rc == 0)
3939                 sc->params.filter2_wr_support = val[0] != 0;
3940         else
3941                 sc->params.filter2_wr_support = 0;
3942
3943         /* get capabilites */
3944         bzero(&caps, sizeof(caps));
3945         caps.op_to_write = htobe32(V_FW_CMD_OP(FW_CAPS_CONFIG_CMD) |
3946             F_FW_CMD_REQUEST | F_FW_CMD_READ);
3947         caps.cfvalid_to_len16 = htobe32(FW_LEN16(caps));
3948         rc = -t4_wr_mbox(sc, sc->mbox, &caps, sizeof(caps), &caps);
3949         if (rc != 0) {
3950                 device_printf(sc->dev,
3951                     "failed to get card capabilities: %d.\n", rc);
3952                 return (rc);
3953         }
3954
3955 #define READ_CAPS(x) do { \
3956         sc->x = htobe16(caps.x); \
3957 } while (0)
3958         READ_CAPS(nbmcaps);
3959         READ_CAPS(linkcaps);
3960         READ_CAPS(switchcaps);
3961         READ_CAPS(niccaps);
3962         READ_CAPS(toecaps);
3963         READ_CAPS(rdmacaps);
3964         READ_CAPS(cryptocaps);
3965         READ_CAPS(iscsicaps);
3966         READ_CAPS(fcoecaps);
3967
3968         if (sc->niccaps & FW_CAPS_CONFIG_NIC_HASHFILTER) {
3969                 MPASS(chip_id(sc) > CHELSIO_T4);
3970                 MPASS(sc->toecaps == 0);
3971                 sc->toecaps = 0;
3972
3973                 param[0] = FW_PARAM_DEV(NTID);
3974                 rc = -t4_query_params(sc, sc->mbox, sc->pf, 0, 1, param, val);
3975                 if (rc != 0) {
3976                         device_printf(sc->dev,
3977                             "failed to query HASHFILTER parameters: %d.\n", rc);
3978                         return (rc);
3979                 }
3980                 sc->tids.ntids = val[0];
3981                 if (sc->params.fw_vers <
3982                     (V_FW_HDR_FW_VER_MAJOR(1) | V_FW_HDR_FW_VER_MINOR(20) |
3983                     V_FW_HDR_FW_VER_MICRO(5) | V_FW_HDR_FW_VER_BUILD(0))) {
3984                         MPASS(sc->tids.ntids >= sc->tids.nhpftids);
3985                         sc->tids.ntids -= sc->tids.nhpftids;
3986                 }
3987                 sc->tids.natids = min(sc->tids.ntids / 2, MAX_ATIDS);
3988                 sc->params.hash_filter = 1;
3989         }
3990         if (sc->niccaps & FW_CAPS_CONFIG_NIC_ETHOFLD) {
3991                 param[0] = FW_PARAM_PFVF(ETHOFLD_START);
3992                 param[1] = FW_PARAM_PFVF(ETHOFLD_END);
3993                 param[2] = FW_PARAM_DEV(FLOWC_BUFFIFO_SZ);
3994                 rc = -t4_query_params(sc, sc->mbox, sc->pf, 0, 3, param, val);
3995                 if (rc != 0) {
3996                         device_printf(sc->dev,
3997                             "failed to query NIC parameters: %d.\n", rc);
3998                         return (rc);
3999                 }
4000                 if ((int)val[1] > (int)val[0]) {
4001                         sc->tids.etid_base = val[0];
4002                         sc->tids.etid_end = val[1];
4003                         sc->tids.netids = val[1] - val[0] + 1;
4004                         sc->params.eo_wr_cred = val[2];
4005                         sc->params.ethoffload = 1;
4006                 }
4007         }
4008         if (sc->toecaps) {
4009                 /* query offload-related parameters */
4010                 param[0] = FW_PARAM_DEV(NTID);
4011                 param[1] = FW_PARAM_PFVF(SERVER_START);
4012                 param[2] = FW_PARAM_PFVF(SERVER_END);
4013                 param[3] = FW_PARAM_PFVF(TDDP_START);
4014                 param[4] = FW_PARAM_PFVF(TDDP_END);
4015                 param[5] = FW_PARAM_DEV(FLOWC_BUFFIFO_SZ);
4016                 rc = -t4_query_params(sc, sc->mbox, sc->pf, 0, 6, param, val);
4017                 if (rc != 0) {
4018                         device_printf(sc->dev,
4019                             "failed to query TOE parameters: %d.\n", rc);
4020                         return (rc);
4021                 }
4022                 sc->tids.ntids = val[0];
4023                 if (sc->params.fw_vers <
4024                     (V_FW_HDR_FW_VER_MAJOR(1) | V_FW_HDR_FW_VER_MINOR(20) |
4025                     V_FW_HDR_FW_VER_MICRO(5) | V_FW_HDR_FW_VER_BUILD(0))) {
4026                         MPASS(sc->tids.ntids >= sc->tids.nhpftids);
4027                         sc->tids.ntids -= sc->tids.nhpftids;
4028                 }
4029                 sc->tids.natids = min(sc->tids.ntids / 2, MAX_ATIDS);
4030                 if ((int)val[2] > (int)val[1]) {
4031                         sc->tids.stid_base = val[1];
4032                         sc->tids.nstids = val[2] - val[1] + 1;
4033                 }
4034                 sc->vres.ddp.start = val[3];
4035                 sc->vres.ddp.size = val[4] - val[3] + 1;
4036                 sc->params.ofldq_wr_cred = val[5];
4037                 sc->params.offload = 1;
4038         } else {
4039                 /*
4040                  * The firmware attempts memfree TOE configuration for -SO cards
4041                  * and will report toecaps=0 if it runs out of resources (this
4042                  * depends on the config file).  It may not report 0 for other
4043                  * capabilities dependent on the TOE in this case.  Set them to
4044                  * 0 here so that the driver doesn't bother tracking resources
4045                  * that will never be used.
4046                  */
4047                 sc->iscsicaps = 0;
4048                 sc->rdmacaps = 0;
4049         }
4050         if (sc->rdmacaps) {
4051                 param[0] = FW_PARAM_PFVF(STAG_START);
4052                 param[1] = FW_PARAM_PFVF(STAG_END);
4053                 param[2] = FW_PARAM_PFVF(RQ_START);
4054                 param[3] = FW_PARAM_PFVF(RQ_END);
4055                 param[4] = FW_PARAM_PFVF(PBL_START);
4056                 param[5] = FW_PARAM_PFVF(PBL_END);
4057                 rc = -t4_query_params(sc, sc->mbox, sc->pf, 0, 6, param, val);
4058                 if (rc != 0) {
4059                         device_printf(sc->dev,
4060                             "failed to query RDMA parameters(1): %d.\n", rc);
4061                         return (rc);
4062                 }
4063                 sc->vres.stag.start = val[0];
4064                 sc->vres.stag.size = val[1] - val[0] + 1;
4065                 sc->vres.rq.start = val[2];
4066                 sc->vres.rq.size = val[3] - val[2] + 1;
4067                 sc->vres.pbl.start = val[4];
4068                 sc->vres.pbl.size = val[5] - val[4] + 1;
4069
4070                 param[0] = FW_PARAM_PFVF(SQRQ_START);
4071                 param[1] = FW_PARAM_PFVF(SQRQ_END);
4072                 param[2] = FW_PARAM_PFVF(CQ_START);
4073                 param[3] = FW_PARAM_PFVF(CQ_END);
4074                 param[4] = FW_PARAM_PFVF(OCQ_START);
4075                 param[5] = FW_PARAM_PFVF(OCQ_END);
4076                 rc = -t4_query_params(sc, sc->mbox, sc->pf, 0, 6, param, val);
4077                 if (rc != 0) {
4078                         device_printf(sc->dev,
4079                             "failed to query RDMA parameters(2): %d.\n", rc);
4080                         return (rc);
4081                 }
4082                 sc->vres.qp.start = val[0];
4083                 sc->vres.qp.size = val[1] - val[0] + 1;
4084                 sc->vres.cq.start = val[2];
4085                 sc->vres.cq.size = val[3] - val[2] + 1;
4086                 sc->vres.ocq.start = val[4];
4087                 sc->vres.ocq.size = val[5] - val[4] + 1;
4088
4089                 param[0] = FW_PARAM_PFVF(SRQ_START);
4090                 param[1] = FW_PARAM_PFVF(SRQ_END);
4091                 param[2] = FW_PARAM_DEV(MAXORDIRD_QP);
4092                 param[3] = FW_PARAM_DEV(MAXIRD_ADAPTER);
4093                 rc = -t4_query_params(sc, sc->mbox, sc->pf, 0, 4, param, val);
4094                 if (rc != 0) {
4095                         device_printf(sc->dev,
4096                             "failed to query RDMA parameters(3): %d.\n", rc);
4097                         return (rc);
4098                 }
4099                 sc->vres.srq.start = val[0];
4100                 sc->vres.srq.size = val[1] - val[0] + 1;
4101                 sc->params.max_ordird_qp = val[2];
4102                 sc->params.max_ird_adapter = val[3];
4103         }
4104         if (sc->iscsicaps) {
4105                 param[0] = FW_PARAM_PFVF(ISCSI_START);
4106                 param[1] = FW_PARAM_PFVF(ISCSI_END);
4107                 rc = -t4_query_params(sc, sc->mbox, sc->pf, 0, 2, param, val);
4108                 if (rc != 0) {
4109                         device_printf(sc->dev,
4110                             "failed to query iSCSI parameters: %d.\n", rc);
4111                         return (rc);
4112                 }
4113                 sc->vres.iscsi.start = val[0];
4114                 sc->vres.iscsi.size = val[1] - val[0] + 1;
4115         }
4116         if (sc->cryptocaps & FW_CAPS_CONFIG_TLSKEYS) {
4117                 param[0] = FW_PARAM_PFVF(TLS_START);
4118                 param[1] = FW_PARAM_PFVF(TLS_END);
4119                 rc = -t4_query_params(sc, sc->mbox, sc->pf, 0, 2, param, val);
4120                 if (rc != 0) {
4121                         device_printf(sc->dev,
4122                             "failed to query TLS parameters: %d.\n", rc);
4123                         return (rc);
4124                 }
4125                 sc->vres.key.start = val[0];
4126                 sc->vres.key.size = val[1] - val[0] + 1;
4127         }
4128
4129         t4_init_sge_params(sc);
4130
4131         /*
4132          * We've got the params we wanted to query via the firmware.  Now grab
4133          * some others directly from the chip.
4134          */
4135         rc = t4_read_chip_settings(sc);
4136
4137         return (rc);
4138 }
4139
4140 static int
4141 set_params__post_init(struct adapter *sc)
4142 {
4143         uint32_t param, val;
4144 #ifdef TCP_OFFLOAD
4145         int i, v, shift;
4146 #endif
4147
4148         /* ask for encapsulated CPLs */
4149         param = FW_PARAM_PFVF(CPLFW4MSG_ENCAP);
4150         val = 1;
4151         (void)t4_set_params(sc, sc->mbox, sc->pf, 0, 1, &param, &val);
4152
4153         /* Enable 32b port caps if the firmware supports it. */
4154         param = FW_PARAM_PFVF(PORT_CAPS32);
4155         val = 1;
4156         if (t4_set_params(sc, sc->mbox, sc->pf, 0, 1, &param, &val) == 0)
4157                 sc->params.port_caps32 = 1;
4158
4159         /* Let filter + maskhash steer to a part of the VI's RSS region. */
4160         val = 1 << (G_MASKSIZE(t4_read_reg(sc, A_TP_RSS_CONFIG_TNL)) - 1);
4161         t4_set_reg_field(sc, A_TP_RSS_CONFIG_TNL, V_MASKFILTER(M_MASKFILTER),
4162             V_MASKFILTER(val - 1));
4163
4164 #ifdef TCP_OFFLOAD
4165         /*
4166          * Override the TOE timers with user provided tunables.  This is not the
4167          * recommended way to change the timers (the firmware config file is) so
4168          * these tunables are not documented.
4169          *
4170          * All the timer tunables are in microseconds.
4171          */
4172         if (t4_toe_keepalive_idle != 0) {
4173                 v = us_to_tcp_ticks(sc, t4_toe_keepalive_idle);
4174                 v &= M_KEEPALIVEIDLE;
4175                 t4_set_reg_field(sc, A_TP_KEEP_IDLE,
4176                     V_KEEPALIVEIDLE(M_KEEPALIVEIDLE), V_KEEPALIVEIDLE(v));
4177         }
4178         if (t4_toe_keepalive_interval != 0) {
4179                 v = us_to_tcp_ticks(sc, t4_toe_keepalive_interval);
4180                 v &= M_KEEPALIVEINTVL;
4181                 t4_set_reg_field(sc, A_TP_KEEP_INTVL,
4182                     V_KEEPALIVEINTVL(M_KEEPALIVEINTVL), V_KEEPALIVEINTVL(v));
4183         }
4184         if (t4_toe_keepalive_count != 0) {
4185                 v = t4_toe_keepalive_count & M_KEEPALIVEMAXR2;
4186                 t4_set_reg_field(sc, A_TP_SHIFT_CNT,
4187                     V_KEEPALIVEMAXR1(M_KEEPALIVEMAXR1) |
4188                     V_KEEPALIVEMAXR2(M_KEEPALIVEMAXR2),
4189                     V_KEEPALIVEMAXR1(1) | V_KEEPALIVEMAXR2(v));
4190         }
4191         if (t4_toe_rexmt_min != 0) {
4192                 v = us_to_tcp_ticks(sc, t4_toe_rexmt_min);
4193                 v &= M_RXTMIN;
4194                 t4_set_reg_field(sc, A_TP_RXT_MIN,
4195                     V_RXTMIN(M_RXTMIN), V_RXTMIN(v));
4196         }
4197         if (t4_toe_rexmt_max != 0) {
4198                 v = us_to_tcp_ticks(sc, t4_toe_rexmt_max);
4199                 v &= M_RXTMAX;
4200                 t4_set_reg_field(sc, A_TP_RXT_MAX,
4201                     V_RXTMAX(M_RXTMAX), V_RXTMAX(v));
4202         }
4203         if (t4_toe_rexmt_count != 0) {
4204                 v = t4_toe_rexmt_count & M_RXTSHIFTMAXR2;
4205                 t4_set_reg_field(sc, A_TP_SHIFT_CNT,
4206                     V_RXTSHIFTMAXR1(M_RXTSHIFTMAXR1) |
4207                     V_RXTSHIFTMAXR2(M_RXTSHIFTMAXR2),
4208                     V_RXTSHIFTMAXR1(1) | V_RXTSHIFTMAXR2(v));
4209         }
4210         for (i = 0; i < nitems(t4_toe_rexmt_backoff); i++) {
4211                 if (t4_toe_rexmt_backoff[i] != -1) {
4212                         v = t4_toe_rexmt_backoff[i] & M_TIMERBACKOFFINDEX0;
4213                         shift = (i & 3) << 3;
4214                         t4_set_reg_field(sc, A_TP_TCP_BACKOFF_REG0 + (i & ~3),
4215                             M_TIMERBACKOFFINDEX0 << shift, v << shift);
4216                 }
4217         }
4218 #endif
4219         return (0);
4220 }
4221
4222 #undef FW_PARAM_PFVF
4223 #undef FW_PARAM_DEV
4224
4225 static void
4226 t4_set_desc(struct adapter *sc)
4227 {
4228         char buf[128];
4229         struct adapter_params *p = &sc->params;
4230
4231         snprintf(buf, sizeof(buf), "Chelsio %s", p->vpd.id);
4232
4233         device_set_desc_copy(sc->dev, buf);
4234 }
4235
4236 static inline void
4237 ifmedia_add4(struct ifmedia *ifm, int m)
4238 {
4239
4240         ifmedia_add(ifm, m, 0, NULL);
4241         ifmedia_add(ifm, m | IFM_ETH_TXPAUSE, 0, NULL);
4242         ifmedia_add(ifm, m | IFM_ETH_RXPAUSE, 0, NULL);
4243         ifmedia_add(ifm, m | IFM_ETH_TXPAUSE | IFM_ETH_RXPAUSE, 0, NULL);
4244 }
4245
4246 /*
4247  * This is the selected media, which is not quite the same as the active media.
4248  * The media line in ifconfig is "media: Ethernet selected (active)" if selected
4249  * and active are not the same, and "media: Ethernet selected" otherwise.
4250  */
4251 static void
4252 set_current_media(struct port_info *pi)
4253 {
4254         struct link_config *lc;
4255         struct ifmedia *ifm;
4256         int mword;
4257         u_int speed;
4258
4259         PORT_LOCK_ASSERT_OWNED(pi);
4260
4261         /* Leave current media alone if it's already set to IFM_NONE. */
4262         ifm = &pi->media;
4263         if (ifm->ifm_cur != NULL &&
4264             IFM_SUBTYPE(ifm->ifm_cur->ifm_media) == IFM_NONE)
4265                 return;
4266
4267         lc = &pi->link_cfg;
4268         if (lc->requested_aneg != AUTONEG_DISABLE &&
4269             lc->supported & FW_PORT_CAP32_ANEG) {
4270                 ifmedia_set(ifm, IFM_ETHER | IFM_AUTO);
4271                 return;
4272         }
4273         mword = IFM_ETHER | IFM_FDX;
4274         if (lc->requested_fc & PAUSE_TX)
4275                 mword |= IFM_ETH_TXPAUSE;
4276         if (lc->requested_fc & PAUSE_RX)
4277                 mword |= IFM_ETH_RXPAUSE;
4278         if (lc->requested_speed == 0)
4279                 speed = port_top_speed(pi) * 1000;      /* Gbps -> Mbps */
4280         else
4281                 speed = lc->requested_speed;
4282         mword |= port_mword(pi, speed_to_fwcap(speed));
4283         ifmedia_set(ifm, mword);
4284 }
4285
4286 /*
4287  * Returns true if the ifmedia list for the port cannot change.
4288  */
4289 static bool
4290 fixed_ifmedia(struct port_info *pi)
4291 {
4292
4293         return (pi->port_type == FW_PORT_TYPE_BT_SGMII ||
4294             pi->port_type == FW_PORT_TYPE_BT_XFI ||
4295             pi->port_type == FW_PORT_TYPE_BT_XAUI ||
4296             pi->port_type == FW_PORT_TYPE_KX4 ||
4297             pi->port_type == FW_PORT_TYPE_KX ||
4298             pi->port_type == FW_PORT_TYPE_KR ||
4299             pi->port_type == FW_PORT_TYPE_BP_AP ||
4300             pi->port_type == FW_PORT_TYPE_BP4_AP ||
4301             pi->port_type == FW_PORT_TYPE_BP40_BA ||
4302             pi->port_type == FW_PORT_TYPE_KR4_100G ||
4303             pi->port_type == FW_PORT_TYPE_KR_SFP28 ||
4304             pi->port_type == FW_PORT_TYPE_KR_XLAUI);
4305 }
4306
4307 static void
4308 build_medialist(struct port_info *pi)
4309 {
4310         uint32_t ss, speed;
4311         int unknown, mword, bit;
4312         struct link_config *lc;
4313         struct ifmedia *ifm;
4314
4315         PORT_LOCK_ASSERT_OWNED(pi);
4316
4317         if (pi->flags & FIXED_IFMEDIA)
4318                 return;
4319
4320         /*
4321          * Rebuild the ifmedia list.
4322          */
4323         ifm = &pi->media;
4324         ifmedia_removeall(ifm);
4325         lc = &pi->link_cfg;
4326         ss = G_FW_PORT_CAP32_SPEED(lc->supported); /* Supported Speeds */
4327         if (__predict_false(ss == 0)) { /* not supposed to happen. */
4328                 MPASS(ss != 0);
4329 no_media:
4330                 MPASS(LIST_EMPTY(&ifm->ifm_list));
4331                 ifmedia_add(ifm, IFM_ETHER | IFM_NONE, 0, NULL);
4332                 ifmedia_set(ifm, IFM_ETHER | IFM_NONE);
4333                 return;
4334         }
4335
4336         unknown = 0;
4337         for (bit = S_FW_PORT_CAP32_SPEED; bit < fls(ss); bit++) {
4338                 speed = 1 << bit;
4339                 MPASS(speed & M_FW_PORT_CAP32_SPEED);
4340                 if (ss & speed) {
4341                         mword = port_mword(pi, speed);
4342                         if (mword == IFM_NONE) {
4343                                 goto no_media;
4344                         } else if (mword == IFM_UNKNOWN)
4345                                 unknown++;
4346                         else
4347                                 ifmedia_add4(ifm, IFM_ETHER | IFM_FDX | mword);
4348                 }
4349         }
4350         if (unknown > 0) /* Add one unknown for all unknown media types. */
4351                 ifmedia_add4(ifm, IFM_ETHER | IFM_FDX | IFM_UNKNOWN);
4352         if (lc->supported & FW_PORT_CAP32_ANEG)
4353                 ifmedia_add(ifm, IFM_ETHER | IFM_AUTO, 0, NULL);
4354
4355         set_current_media(pi);
4356 }
4357
4358 /*
4359  * Initialize the requested fields in the link config based on driver tunables.
4360  */
4361 static void
4362 init_link_config(struct port_info *pi)
4363 {
4364         struct link_config *lc = &pi->link_cfg;
4365
4366         PORT_LOCK_ASSERT_OWNED(pi);
4367
4368         lc->requested_speed = 0;
4369
4370         if (t4_autoneg == 0)
4371                 lc->requested_aneg = AUTONEG_DISABLE;
4372         else if (t4_autoneg == 1)
4373                 lc->requested_aneg = AUTONEG_ENABLE;
4374         else
4375                 lc->requested_aneg = AUTONEG_AUTO;
4376
4377         lc->requested_fc = t4_pause_settings & (PAUSE_TX | PAUSE_RX |
4378             PAUSE_AUTONEG);
4379
4380         if (t4_fec == -1 || t4_fec & FEC_AUTO)
4381                 lc->requested_fec = FEC_AUTO;
4382         else {
4383                 lc->requested_fec = FEC_NONE;
4384                 if (t4_fec & FEC_RS)
4385                         lc->requested_fec |= FEC_RS;
4386                 if (t4_fec & FEC_BASER_RS)
4387                         lc->requested_fec |= FEC_BASER_RS;
4388         }
4389 }
4390
4391 /*
4392  * Makes sure that all requested settings comply with what's supported by the
4393  * port.  Returns the number of settings that were invalid and had to be fixed.
4394  */
4395 static int
4396 fixup_link_config(struct port_info *pi)
4397 {
4398         int n = 0;
4399         struct link_config *lc = &pi->link_cfg;
4400         uint32_t fwspeed;
4401
4402         PORT_LOCK_ASSERT_OWNED(pi);
4403
4404         /* Speed (when not autonegotiating) */
4405         if (lc->requested_speed != 0) {
4406                 fwspeed = speed_to_fwcap(lc->requested_speed);
4407                 if ((fwspeed & lc->supported) == 0) {
4408                         n++;
4409                         lc->requested_speed = 0;
4410                 }
4411         }
4412
4413         /* Link autonegotiation */
4414         MPASS(lc->requested_aneg == AUTONEG_ENABLE ||
4415             lc->requested_aneg == AUTONEG_DISABLE ||
4416             lc->requested_aneg == AUTONEG_AUTO);
4417         if (lc->requested_aneg == AUTONEG_ENABLE &&
4418             !(lc->supported & FW_PORT_CAP32_ANEG)) {
4419                 n++;
4420                 lc->requested_aneg = AUTONEG_AUTO;
4421         }
4422
4423         /* Flow control */
4424         MPASS((lc->requested_fc & ~(PAUSE_TX | PAUSE_RX | PAUSE_AUTONEG)) == 0);
4425         if (lc->requested_fc & PAUSE_TX &&
4426             !(lc->supported & FW_PORT_CAP32_FC_TX)) {
4427                 n++;
4428                 lc->requested_fc &= ~PAUSE_TX;
4429         }
4430         if (lc->requested_fc & PAUSE_RX &&
4431             !(lc->supported & FW_PORT_CAP32_FC_RX)) {
4432                 n++;
4433                 lc->requested_fc &= ~PAUSE_RX;
4434         }
4435         if (!(lc->requested_fc & PAUSE_AUTONEG) &&
4436             !(lc->supported & FW_PORT_CAP32_FORCE_PAUSE)) {
4437                 n++;
4438                 lc->requested_fc |= PAUSE_AUTONEG;
4439         }
4440
4441         /* FEC */
4442         if ((lc->requested_fec & FEC_RS &&
4443             !(lc->supported & FW_PORT_CAP32_FEC_RS)) ||
4444             (lc->requested_fec & FEC_BASER_RS &&
4445             !(lc->supported & FW_PORT_CAP32_FEC_BASER_RS))) {
4446                 n++;
4447                 lc->requested_fec = FEC_AUTO;
4448         }
4449
4450         return (n);
4451 }
4452
4453 /*
4454  * Apply the requested L1 settings, which are expected to be valid, to the
4455  * hardware.
4456  */
4457 static int
4458 apply_link_config(struct port_info *pi)
4459 {
4460         struct adapter *sc = pi->adapter;
4461         struct link_config *lc = &pi->link_cfg;
4462         int rc;
4463
4464 #ifdef INVARIANTS
4465         ASSERT_SYNCHRONIZED_OP(sc);
4466         PORT_LOCK_ASSERT_OWNED(pi);
4467
4468         if (lc->requested_aneg == AUTONEG_ENABLE)
4469                 MPASS(lc->supported & FW_PORT_CAP32_ANEG);
4470         if (!(lc->requested_fc & PAUSE_AUTONEG))
4471                 MPASS(lc->supported & FW_PORT_CAP32_FORCE_PAUSE);
4472         if (lc->requested_fc & PAUSE_TX)
4473                 MPASS(lc->supported & FW_PORT_CAP32_FC_TX);
4474         if (lc->requested_fc & PAUSE_RX)
4475                 MPASS(lc->supported & FW_PORT_CAP32_FC_RX);
4476         if (lc->requested_fec & FEC_RS)
4477                 MPASS(lc->supported & FW_PORT_CAP32_FEC_RS);
4478         if (lc->requested_fec & FEC_BASER_RS)
4479                 MPASS(lc->supported & FW_PORT_CAP32_FEC_BASER_RS);
4480 #endif
4481         rc = -t4_link_l1cfg(sc, sc->mbox, pi->tx_chan, lc);
4482         if (rc != 0) {
4483                 /* Don't complain if the VF driver gets back an EPERM. */
4484                 if (!(sc->flags & IS_VF) || rc != FW_EPERM)
4485                         device_printf(pi->dev, "l1cfg failed: %d\n", rc);
4486         } else {
4487                 /*
4488                  * An L1_CFG will almost always result in a link-change event if
4489                  * the link is up, and the driver will refresh the actual
4490                  * fec/fc/etc. when the notification is processed.  If the link
4491                  * is down then the actual settings are meaningless.
4492                  *
4493                  * This takes care of the case where a change in the L1 settings
4494                  * may not result in a notification.
4495                  */
4496                 if (lc->link_ok && !(lc->requested_fc & PAUSE_AUTONEG))
4497                         lc->fc = lc->requested_fc & (PAUSE_TX | PAUSE_RX);
4498         }
4499         return (rc);
4500 }
4501
4502 #define FW_MAC_EXACT_CHUNK      7
4503
4504 /*
4505  * Program the port's XGMAC based on parameters in ifnet.  The caller also
4506  * indicates which parameters should be programmed (the rest are left alone).
4507  */
4508 int
4509 update_mac_settings(struct ifnet *ifp, int flags)
4510 {
4511         int rc = 0;
4512         struct vi_info *vi = ifp->if_softc;
4513         struct port_info *pi = vi->pi;
4514         struct adapter *sc = pi->adapter;
4515         int mtu = -1, promisc = -1, allmulti = -1, vlanex = -1;
4516
4517         ASSERT_SYNCHRONIZED_OP(sc);
4518         KASSERT(flags, ("%s: not told what to update.", __func__));
4519
4520         if (flags & XGMAC_MTU)
4521                 mtu = ifp->if_mtu;
4522
4523         if (flags & XGMAC_PROMISC)
4524                 promisc = ifp->if_flags & IFF_PROMISC ? 1 : 0;
4525
4526         if (flags & XGMAC_ALLMULTI)
4527                 allmulti = ifp->if_flags & IFF_ALLMULTI ? 1 : 0;
4528
4529         if (flags & XGMAC_VLANEX)
4530                 vlanex = ifp->if_capenable & IFCAP_VLAN_HWTAGGING ? 1 : 0;
4531
4532         if (flags & (XGMAC_MTU|XGMAC_PROMISC|XGMAC_ALLMULTI|XGMAC_VLANEX)) {
4533                 rc = -t4_set_rxmode(sc, sc->mbox, vi->viid, mtu, promisc,
4534                     allmulti, 1, vlanex, false);
4535                 if (rc) {
4536                         if_printf(ifp, "set_rxmode (%x) failed: %d\n", flags,
4537                             rc);
4538                         return (rc);
4539                 }
4540         }
4541
4542         if (flags & XGMAC_UCADDR) {
4543                 uint8_t ucaddr[ETHER_ADDR_LEN];
4544
4545                 bcopy(IF_LLADDR(ifp), ucaddr, sizeof(ucaddr));
4546                 rc = t4_change_mac(sc, sc->mbox, vi->viid, vi->xact_addr_filt,
4547                     ucaddr, true, true);
4548                 if (rc < 0) {
4549                         rc = -rc;
4550                         if_printf(ifp, "change_mac failed: %d\n", rc);
4551                         return (rc);
4552                 } else {
4553                         vi->xact_addr_filt = rc;
4554                         rc = 0;
4555                 }
4556         }
4557
4558         if (flags & XGMAC_MCADDRS) {
4559                 const uint8_t *mcaddr[FW_MAC_EXACT_CHUNK];
4560                 int del = 1;
4561                 uint64_t hash = 0;
4562                 struct ifmultiaddr *ifma;
4563                 int i = 0, j;
4564
4565                 if_maddr_rlock(ifp);
4566                 CK_STAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
4567                         if (ifma->ifma_addr->sa_family != AF_LINK)
4568                                 continue;
4569                         mcaddr[i] =
4570                             LLADDR((struct sockaddr_dl *)ifma->ifma_addr);
4571                         MPASS(ETHER_IS_MULTICAST(mcaddr[i]));
4572                         i++;
4573
4574                         if (i == FW_MAC_EXACT_CHUNK) {
4575                                 rc = t4_alloc_mac_filt(sc, sc->mbox, vi->viid,
4576                                     del, i, mcaddr, NULL, &hash, 0);
4577                                 if (rc < 0) {
4578                                         rc = -rc;
4579                                         for (j = 0; j < i; j++) {
4580                                                 if_printf(ifp,
4581                                                     "failed to add mc address"
4582                                                     " %02x:%02x:%02x:"
4583                                                     "%02x:%02x:%02x rc=%d\n",
4584                                                     mcaddr[j][0], mcaddr[j][1],
4585                                                     mcaddr[j][2], mcaddr[j][3],
4586                                                     mcaddr[j][4], mcaddr[j][5],
4587                                                     rc);
4588                                         }
4589                                         goto mcfail;
4590                                 }
4591                                 del = 0;
4592                                 i = 0;
4593                         }
4594                 }
4595                 if (i > 0) {
4596                         rc = t4_alloc_mac_filt(sc, sc->mbox, vi->viid, del, i,
4597                             mcaddr, NULL, &hash, 0);
4598                         if (rc < 0) {
4599                                 rc = -rc;
4600                                 for (j = 0; j < i; j++) {
4601                                         if_printf(ifp,
4602                                             "failed to add mc address"
4603                                             " %02x:%02x:%02x:"
4604                                             "%02x:%02x:%02x rc=%d\n",
4605                                             mcaddr[j][0], mcaddr[j][1],
4606                                             mcaddr[j][2], mcaddr[j][3],
4607                                             mcaddr[j][4], mcaddr[j][5],
4608                                             rc);
4609                                 }
4610                                 goto mcfail;
4611                         }
4612                 }
4613
4614                 rc = -t4_set_addr_hash(sc, sc->mbox, vi->viid, 0, hash, 0);
4615                 if (rc != 0)
4616                         if_printf(ifp, "failed to set mc address hash: %d", rc);
4617 mcfail:
4618                 if_maddr_runlock(ifp);
4619         }
4620
4621         return (rc);
4622 }
4623
4624 /*
4625  * {begin|end}_synchronized_op must be called from the same thread.
4626  */
4627 int
4628 begin_synchronized_op(struct adapter *sc, struct vi_info *vi, int flags,
4629     char *wmesg)
4630 {
4631         int rc, pri;
4632
4633 #ifdef WITNESS
4634         /* the caller thinks it's ok to sleep, but is it really? */
4635         if (flags & SLEEP_OK)
4636                 WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK, NULL,
4637                     "begin_synchronized_op");
4638 #endif
4639
4640         if (INTR_OK)
4641                 pri = PCATCH;
4642         else
4643                 pri = 0;
4644
4645         ADAPTER_LOCK(sc);
4646         for (;;) {
4647
4648                 if (vi && IS_DOOMED(vi)) {
4649                         rc = ENXIO;
4650                         goto done;
4651                 }
4652
4653                 if (!IS_BUSY(sc)) {
4654                         rc = 0;
4655                         break;
4656                 }
4657
4658                 if (!(flags & SLEEP_OK)) {
4659                         rc = EBUSY;
4660                         goto done;
4661                 }
4662
4663                 if (mtx_sleep(&sc->flags, &sc->sc_lock, pri, wmesg, 0)) {
4664                         rc = EINTR;
4665                         goto done;
4666                 }
4667         }
4668
4669         KASSERT(!IS_BUSY(sc), ("%s: controller busy.", __func__));
4670         SET_BUSY(sc);
4671 #ifdef INVARIANTS
4672         sc->last_op = wmesg;
4673         sc->last_op_thr = curthread;
4674         sc->last_op_flags = flags;
4675 #endif
4676
4677 done:
4678         if (!(flags & HOLD_LOCK) || rc)
4679                 ADAPTER_UNLOCK(sc);
4680
4681         return (rc);
4682 }
4683
4684 /*
4685  * Tell if_ioctl and if_init that the VI is going away.  This is
4686  * special variant of begin_synchronized_op and must be paired with a
4687  * call to end_synchronized_op.
4688  */
4689 void
4690 doom_vi(struct adapter *sc, struct vi_info *vi)
4691 {
4692
4693         ADAPTER_LOCK(sc);
4694         SET_DOOMED(vi);
4695         wakeup(&sc->flags);
4696         while (IS_BUSY(sc))
4697                 mtx_sleep(&sc->flags, &sc->sc_lock, 0, "t4detach", 0);
4698         SET_BUSY(sc);
4699 #ifdef INVARIANTS
4700         sc->last_op = "t4detach";
4701         sc->last_op_thr = curthread;
4702         sc->last_op_flags = 0;
4703 #endif
4704         ADAPTER_UNLOCK(sc);
4705 }
4706
4707 /*
4708  * {begin|end}_synchronized_op must be called from the same thread.
4709  */
4710 void
4711 end_synchronized_op(struct adapter *sc, int flags)
4712 {
4713
4714         if (flags & LOCK_HELD)
4715                 ADAPTER_LOCK_ASSERT_OWNED(sc);
4716         else
4717                 ADAPTER_LOCK(sc);
4718
4719         KASSERT(IS_BUSY(sc), ("%s: controller not busy.", __func__));
4720         CLR_BUSY(sc);
4721         wakeup(&sc->flags);
4722         ADAPTER_UNLOCK(sc);
4723 }
4724
4725 static int
4726 cxgbe_init_synchronized(struct vi_info *vi)
4727 {
4728         struct port_info *pi = vi->pi;
4729         struct adapter *sc = pi->adapter;
4730         struct ifnet *ifp = vi->ifp;
4731         int rc = 0, i;
4732         struct sge_txq *txq;
4733
4734         ASSERT_SYNCHRONIZED_OP(sc);
4735
4736         if (ifp->if_drv_flags & IFF_DRV_RUNNING)
4737                 return (0);     /* already running */
4738
4739         if (!(sc->flags & FULL_INIT_DONE) &&
4740             ((rc = adapter_full_init(sc)) != 0))
4741                 return (rc);    /* error message displayed already */
4742
4743         if (!(vi->flags & VI_INIT_DONE) &&
4744             ((rc = vi_full_init(vi)) != 0))
4745                 return (rc); /* error message displayed already */
4746
4747         rc = update_mac_settings(ifp, XGMAC_ALL);
4748         if (rc)
4749                 goto done;      /* error message displayed already */
4750
4751         PORT_LOCK(pi);
4752         if (pi->up_vis == 0) {
4753                 t4_update_port_info(pi);
4754                 fixup_link_config(pi);
4755                 build_medialist(pi);
4756                 apply_link_config(pi);
4757         }
4758
4759         rc = -t4_enable_vi(sc, sc->mbox, vi->viid, true, true);
4760         if (rc != 0) {
4761                 if_printf(ifp, "enable_vi failed: %d\n", rc);
4762                 PORT_UNLOCK(pi);
4763                 goto done;
4764         }
4765
4766         /*
4767          * Can't fail from this point onwards.  Review cxgbe_uninit_synchronized
4768          * if this changes.
4769          */
4770
4771         for_each_txq(vi, i, txq) {
4772                 TXQ_LOCK(txq);
4773                 txq->eq.flags |= EQ_ENABLED;
4774                 TXQ_UNLOCK(txq);
4775         }
4776
4777         /*
4778          * The first iq of the first port to come up is used for tracing.
4779          */
4780         if (sc->traceq < 0 && IS_MAIN_VI(vi)) {
4781                 sc->traceq = sc->sge.rxq[vi->first_rxq].iq.abs_id;
4782                 t4_write_reg(sc, is_t4(sc) ?  A_MPS_TRC_RSS_CONTROL :
4783                     A_MPS_T5_TRC_RSS_CONTROL, V_RSSCONTROL(pi->tx_chan) |
4784                     V_QUEUENUMBER(sc->traceq));
4785                 pi->flags |= HAS_TRACEQ;
4786         }
4787
4788         /* all ok */
4789         pi->up_vis++;
4790         ifp->if_drv_flags |= IFF_DRV_RUNNING;
4791
4792         if (pi->nvi > 1 || sc->flags & IS_VF)
4793                 callout_reset(&vi->tick, hz, vi_tick, vi);
4794         else
4795                 callout_reset(&pi->tick, hz, cxgbe_tick, pi);
4796         PORT_UNLOCK(pi);
4797 done:
4798         if (rc != 0)
4799                 cxgbe_uninit_synchronized(vi);
4800
4801         return (rc);
4802 }
4803
4804 /*
4805  * Idempotent.
4806  */
4807 static int
4808 cxgbe_uninit_synchronized(struct vi_info *vi)
4809 {
4810         struct port_info *pi = vi->pi;
4811         struct adapter *sc = pi->adapter;
4812         struct ifnet *ifp = vi->ifp;
4813         int rc, i;
4814         struct sge_txq *txq;
4815
4816         ASSERT_SYNCHRONIZED_OP(sc);
4817
4818         if (!(vi->flags & VI_INIT_DONE)) {
4819                 if (__predict_false(ifp->if_drv_flags & IFF_DRV_RUNNING)) {
4820                         KASSERT(0, ("uninited VI is running"));
4821                         if_printf(ifp, "uninited VI with running ifnet.  "
4822                             "vi->flags 0x%016lx, if_flags 0x%08x, "
4823                             "if_drv_flags 0x%08x\n", vi->flags, ifp->if_flags,
4824                             ifp->if_drv_flags);
4825                 }
4826                 return (0);
4827         }
4828
4829         /*
4830          * Disable the VI so that all its data in either direction is discarded
4831          * by the MPS.  Leave everything else (the queues, interrupts, and 1Hz
4832          * tick) intact as the TP can deliver negative advice or data that it's
4833          * holding in its RAM (for an offloaded connection) even after the VI is
4834          * disabled.
4835          */
4836         rc = -t4_enable_vi(sc, sc->mbox, vi->viid, false, false);
4837         if (rc) {
4838                 if_printf(ifp, "disable_vi failed: %d\n", rc);
4839                 return (rc);
4840         }
4841
4842         for_each_txq(vi, i, txq) {
4843                 TXQ_LOCK(txq);
4844                 txq->eq.flags &= ~EQ_ENABLED;
4845                 TXQ_UNLOCK(txq);
4846         }
4847
4848         PORT_LOCK(pi);
4849         if (pi->nvi > 1 || sc->flags & IS_VF)
4850                 callout_stop(&vi->tick);
4851         else
4852                 callout_stop(&pi->tick);
4853         if (!(ifp->if_drv_flags & IFF_DRV_RUNNING)) {
4854                 PORT_UNLOCK(pi);
4855                 return (0);
4856         }
4857         ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
4858         pi->up_vis--;
4859         if (pi->up_vis > 0) {
4860                 PORT_UNLOCK(pi);
4861                 return (0);
4862         }
4863
4864         pi->link_cfg.link_ok = false;
4865         pi->link_cfg.speed = 0;
4866         pi->link_cfg.link_down_rc = 255;
4867         t4_os_link_changed(pi);
4868         PORT_UNLOCK(pi);
4869
4870         return (0);
4871 }
4872
4873 /*
4874  * It is ok for this function to fail midway and return right away.  t4_detach
4875  * will walk the entire sc->irq list and clean up whatever is valid.
4876  */
4877 int
4878 t4_setup_intr_handlers(struct adapter *sc)
4879 {
4880         int rc, rid, p, q, v;
4881         char s[8];
4882         struct irq *irq;
4883         struct port_info *pi;
4884         struct vi_info *vi;
4885         struct sge *sge = &sc->sge;
4886         struct sge_rxq *rxq;
4887 #ifdef TCP_OFFLOAD
4888         struct sge_ofld_rxq *ofld_rxq;
4889 #endif
4890 #ifdef DEV_NETMAP
4891         struct sge_nm_rxq *nm_rxq;
4892 #endif
4893 #ifdef RSS
4894         int nbuckets = rss_getnumbuckets();
4895 #endif
4896
4897         /*
4898          * Setup interrupts.
4899          */
4900         irq = &sc->irq[0];
4901         rid = sc->intr_type == INTR_INTX ? 0 : 1;
4902         if (forwarding_intr_to_fwq(sc))
4903                 return (t4_alloc_irq(sc, irq, rid, t4_intr_all, sc, "all"));
4904
4905         /* Multiple interrupts. */
4906         if (sc->flags & IS_VF)
4907                 KASSERT(sc->intr_count >= T4VF_EXTRA_INTR + sc->params.nports,
4908                     ("%s: too few intr.", __func__));
4909         else
4910                 KASSERT(sc->intr_count >= T4_EXTRA_INTR + sc->params.nports,
4911                     ("%s: too few intr.", __func__));
4912
4913         /* The first one is always error intr on PFs */
4914         if (!(sc->flags & IS_VF)) {
4915                 rc = t4_alloc_irq(sc, irq, rid, t4_intr_err, sc, "err");
4916                 if (rc != 0)
4917                         return (rc);
4918                 irq++;
4919                 rid++;
4920         }
4921
4922         /* The second one is always the firmware event queue (first on VFs) */
4923         rc = t4_alloc_irq(sc, irq, rid, t4_intr_evt, &sge->fwq, "evt");
4924         if (rc != 0)
4925                 return (rc);
4926         irq++;
4927         rid++;
4928
4929         for_each_port(sc, p) {
4930                 pi = sc->port[p];
4931                 for_each_vi(pi, v, vi) {
4932                         vi->first_intr = rid - 1;
4933
4934                         if (vi->nnmrxq > 0) {
4935                                 int n = max(vi->nrxq, vi->nnmrxq);
4936
4937                                 rxq = &sge->rxq[vi->first_rxq];
4938 #ifdef DEV_NETMAP
4939                                 nm_rxq = &sge->nm_rxq[vi->first_nm_rxq];
4940 #endif
4941                                 for (q = 0; q < n; q++) {
4942                                         snprintf(s, sizeof(s), "%x%c%x", p,
4943                                             'a' + v, q);
4944                                         if (q < vi->nrxq)
4945                                                 irq->rxq = rxq++;
4946 #ifdef DEV_NETMAP
4947                                         if (q < vi->nnmrxq)
4948                                                 irq->nm_rxq = nm_rxq++;
4949
4950                                         if (irq->nm_rxq != NULL &&
4951                                             irq->rxq == NULL) {
4952                                                 /* Netmap rx only */
4953                                                 rc = t4_alloc_irq(sc, irq, rid,
4954                                                     t4_nm_intr, irq->nm_rxq, s);
4955                                         }
4956                                         if (irq->nm_rxq != NULL &&
4957                                             irq->rxq != NULL) {
4958                                                 /* NIC and Netmap rx */
4959                                                 rc = t4_alloc_irq(sc, irq, rid,
4960                                                     t4_vi_intr, irq, s);
4961                                         }
4962 #endif
4963                                         if (irq->rxq != NULL &&
4964                                             irq->nm_rxq == NULL) {
4965                                                 /* NIC rx only */
4966                                                 rc = t4_alloc_irq(sc, irq, rid,
4967                                                     t4_intr, irq->rxq, s);
4968                                         }
4969                                         if (rc != 0)
4970                                                 return (rc);
4971 #ifdef RSS
4972                                         if (q < vi->nrxq) {
4973                                                 bus_bind_intr(sc->dev, irq->res,
4974                                                     rss_getcpu(q % nbuckets));
4975                                         }
4976 #endif
4977                                         irq++;
4978                                         rid++;
4979                                         vi->nintr++;
4980                                 }
4981                         } else {
4982                                 for_each_rxq(vi, q, rxq) {
4983                                         snprintf(s, sizeof(s), "%x%c%x", p,
4984                                             'a' + v, q);
4985                                         rc = t4_alloc_irq(sc, irq, rid,
4986                                             t4_intr, rxq, s);
4987                                         if (rc != 0)
4988                                                 return (rc);
4989 #ifdef RSS
4990                                         bus_bind_intr(sc->dev, irq->res,
4991                                             rss_getcpu(q % nbuckets));
4992 #endif
4993                                         irq++;
4994                                         rid++;
4995                                         vi->nintr++;
4996                                 }
4997                         }
4998 #ifdef TCP_OFFLOAD
4999                         for_each_ofld_rxq(vi, q, ofld_rxq) {
5000                                 snprintf(s, sizeof(s), "%x%c%x", p, 'A' + v, q);
5001                                 rc = t4_alloc_irq(sc, irq, rid, t4_intr,
5002                                     ofld_rxq, s);
5003                                 if (rc != 0)
5004                                         return (rc);
5005                                 irq++;
5006                                 rid++;
5007                                 vi->nintr++;
5008                         }
5009 #endif
5010                 }
5011         }
5012         MPASS(irq == &sc->irq[sc->intr_count]);
5013
5014         return (0);
5015 }
5016
5017 int
5018 adapter_full_init(struct adapter *sc)
5019 {
5020         int rc, i;
5021 #ifdef RSS
5022         uint32_t raw_rss_key[RSS_KEYSIZE / sizeof(uint32_t)];
5023         uint32_t rss_key[RSS_KEYSIZE / sizeof(uint32_t)];
5024 #endif
5025
5026         ASSERT_SYNCHRONIZED_OP(sc);
5027         ADAPTER_LOCK_ASSERT_NOTOWNED(sc);
5028         KASSERT((sc->flags & FULL_INIT_DONE) == 0,
5029             ("%s: FULL_INIT_DONE already", __func__));
5030
5031         /*
5032          * queues that belong to the adapter (not any particular port).
5033          */
5034         rc = t4_setup_adapter_queues(sc);
5035         if (rc != 0)
5036                 goto done;
5037
5038         for (i = 0; i < nitems(sc->tq); i++) {
5039                 sc->tq[i] = taskqueue_create("t4 taskq", M_NOWAIT,
5040                     taskqueue_thread_enqueue, &sc->tq[i]);
5041                 if (sc->tq[i] == NULL) {
5042                         device_printf(sc->dev,
5043                             "failed to allocate task queue %d\n", i);
5044                         rc = ENOMEM;
5045                         goto done;
5046                 }
5047                 taskqueue_start_threads(&sc->tq[i], 1, PI_NET, "%s tq%d",
5048                     device_get_nameunit(sc->dev), i);
5049         }
5050 #ifdef RSS
5051         MPASS(RSS_KEYSIZE == 40);
5052         rss_getkey((void *)&raw_rss_key[0]);
5053         for (i = 0; i < nitems(rss_key); i++) {
5054                 rss_key[i] = htobe32(raw_rss_key[nitems(rss_key) - 1 - i]);
5055         }
5056         t4_write_rss_key(sc, &rss_key[0], -1, 1);
5057 #endif
5058
5059         if (!(sc->flags & IS_VF))
5060                 t4_intr_enable(sc);
5061         sc->flags |= FULL_INIT_DONE;
5062 done:
5063         if (rc != 0)
5064                 adapter_full_uninit(sc);
5065
5066         return (rc);
5067 }
5068
5069 int
5070 adapter_full_uninit(struct adapter *sc)
5071 {
5072         int i;
5073
5074         ADAPTER_LOCK_ASSERT_NOTOWNED(sc);
5075
5076         t4_teardown_adapter_queues(sc);
5077
5078         for (i = 0; i < nitems(sc->tq) && sc->tq[i]; i++) {
5079                 taskqueue_free(sc->tq[i]);
5080                 sc->tq[i] = NULL;
5081         }
5082
5083         sc->flags &= ~FULL_INIT_DONE;
5084
5085         return (0);
5086 }
5087
5088 #ifdef RSS
5089 #define SUPPORTED_RSS_HASHTYPES (RSS_HASHTYPE_RSS_IPV4 | \
5090     RSS_HASHTYPE_RSS_TCP_IPV4 | RSS_HASHTYPE_RSS_IPV6 | \
5091     RSS_HASHTYPE_RSS_TCP_IPV6 | RSS_HASHTYPE_RSS_UDP_IPV4 | \
5092     RSS_HASHTYPE_RSS_UDP_IPV6)
5093
5094 /* Translates kernel hash types to hardware. */
5095 static int
5096 hashconfig_to_hashen(int hashconfig)
5097 {
5098         int hashen = 0;
5099
5100         if (hashconfig & RSS_HASHTYPE_RSS_IPV4)
5101                 hashen |= F_FW_RSS_VI_CONFIG_CMD_IP4TWOTUPEN;
5102         if (hashconfig & RSS_HASHTYPE_RSS_IPV6)
5103                 hashen |= F_FW_RSS_VI_CONFIG_CMD_IP6TWOTUPEN;
5104         if (hashconfig & RSS_HASHTYPE_RSS_UDP_IPV4) {
5105                 hashen |= F_FW_RSS_VI_CONFIG_CMD_UDPEN |
5106                     F_FW_RSS_VI_CONFIG_CMD_IP4FOURTUPEN;
5107         }
5108         if (hashconfig & RSS_HASHTYPE_RSS_UDP_IPV6) {
5109                 hashen |= F_FW_RSS_VI_CONFIG_CMD_UDPEN |
5110                     F_FW_RSS_VI_CONFIG_CMD_IP6FOURTUPEN;
5111         }
5112         if (hashconfig & RSS_HASHTYPE_RSS_TCP_IPV4)
5113                 hashen |= F_FW_RSS_VI_CONFIG_CMD_IP4FOURTUPEN;
5114         if (hashconfig & RSS_HASHTYPE_RSS_TCP_IPV6)
5115                 hashen |= F_FW_RSS_VI_CONFIG_CMD_IP6FOURTUPEN;
5116
5117         return (hashen);
5118 }
5119
5120 /* Translates hardware hash types to kernel. */
5121 static int
5122 hashen_to_hashconfig(int hashen)
5123 {
5124         int hashconfig = 0;
5125
5126         if (hashen & F_FW_RSS_VI_CONFIG_CMD_UDPEN) {
5127                 /*
5128                  * If UDP hashing was enabled it must have been enabled for
5129                  * either IPv4 or IPv6 (inclusive or).  Enabling UDP without
5130                  * enabling any 4-tuple hash is nonsense configuration.
5131                  */
5132                 MPASS(hashen & (F_FW_RSS_VI_CONFIG_CMD_IP4FOURTUPEN |
5133                     F_FW_RSS_VI_CONFIG_CMD_IP6FOURTUPEN));
5134
5135                 if (hashen & F_FW_RSS_VI_CONFIG_CMD_IP4FOURTUPEN)
5136                         hashconfig |= RSS_HASHTYPE_RSS_UDP_IPV4;
5137                 if (hashen & F_FW_RSS_VI_CONFIG_CMD_IP6FOURTUPEN)
5138                         hashconfig |= RSS_HASHTYPE_RSS_UDP_IPV6;
5139         }
5140         if (hashen & F_FW_RSS_VI_CONFIG_CMD_IP4FOURTUPEN)
5141                 hashconfig |= RSS_HASHTYPE_RSS_TCP_IPV4;
5142         if (hashen & F_FW_RSS_VI_CONFIG_CMD_IP6FOURTUPEN)
5143                 hashconfig |= RSS_HASHTYPE_RSS_TCP_IPV6;
5144         if (hashen & F_FW_RSS_VI_CONFIG_CMD_IP4TWOTUPEN)
5145                 hashconfig |= RSS_HASHTYPE_RSS_IPV4;
5146         if (hashen & F_FW_RSS_VI_CONFIG_CMD_IP6TWOTUPEN)
5147                 hashconfig |= RSS_HASHTYPE_RSS_IPV6;
5148
5149         return (hashconfig);
5150 }
5151 #endif
5152
5153 int
5154 vi_full_init(struct vi_info *vi)
5155 {
5156         struct adapter *sc = vi->pi->adapter;
5157         struct ifnet *ifp = vi->ifp;
5158         uint16_t *rss;
5159         struct sge_rxq *rxq;
5160         int rc, i, j;
5161 #ifdef RSS
5162         int nbuckets = rss_getnumbuckets();
5163         int hashconfig = rss_gethashconfig();
5164         int extra;
5165 #endif
5166
5167         ASSERT_SYNCHRONIZED_OP(sc);
5168         KASSERT((vi->flags & VI_INIT_DONE) == 0,
5169             ("%s: VI_INIT_DONE already", __func__));
5170
5171         sysctl_ctx_init(&vi->ctx);
5172         vi->flags |= VI_SYSCTL_CTX;
5173
5174         /*
5175          * Allocate tx/rx/fl queues for this VI.
5176          */
5177         rc = t4_setup_vi_queues(vi);
5178         if (rc != 0)
5179                 goto done;      /* error message displayed already */
5180
5181         /*
5182          * Setup RSS for this VI.  Save a copy of the RSS table for later use.
5183          */
5184         if (vi->nrxq > vi->rss_size) {
5185                 if_printf(ifp, "nrxq (%d) > hw RSS table size (%d); "
5186                     "some queues will never receive traffic.\n", vi->nrxq,
5187                     vi->rss_size);
5188         } else if (vi->rss_size % vi->nrxq) {
5189                 if_printf(ifp, "nrxq (%d), hw RSS table size (%d); "
5190                     "expect uneven traffic distribution.\n", vi->nrxq,
5191                     vi->rss_size);
5192         }
5193 #ifdef RSS
5194         if (vi->nrxq != nbuckets) {
5195                 if_printf(ifp, "nrxq (%d) != kernel RSS buckets (%d);"
5196                     "performance will be impacted.\n", vi->nrxq, nbuckets);
5197         }
5198 #endif
5199         rss = malloc(vi->rss_size * sizeof (*rss), M_CXGBE, M_ZERO | M_WAITOK);
5200         for (i = 0; i < vi->rss_size;) {
5201 #ifdef RSS
5202                 j = rss_get_indirection_to_bucket(i);
5203                 j %= vi->nrxq;
5204                 rxq = &sc->sge.rxq[vi->first_rxq + j];
5205                 rss[i++] = rxq->iq.abs_id;
5206 #else
5207                 for_each_rxq(vi, j, rxq) {
5208                         rss[i++] = rxq->iq.abs_id;
5209                         if (i == vi->rss_size)
5210                                 break;
5211                 }
5212 #endif
5213         }
5214
5215         rc = -t4_config_rss_range(sc, sc->mbox, vi->viid, 0, vi->rss_size, rss,
5216             vi->rss_size);
5217         if (rc != 0) {
5218                 free(rss, M_CXGBE);
5219                 if_printf(ifp, "rss_config failed: %d\n", rc);
5220                 goto done;
5221         }
5222
5223 #ifdef RSS
5224         vi->hashen = hashconfig_to_hashen(hashconfig);
5225
5226         /*
5227          * We may have had to enable some hashes even though the global config
5228          * wants them disabled.  This is a potential problem that must be
5229          * reported to the user.
5230          */
5231         extra = hashen_to_hashconfig(vi->hashen) ^ hashconfig;
5232
5233         /*
5234          * If we consider only the supported hash types, then the enabled hashes
5235          * are a superset of the requested hashes.  In other words, there cannot
5236          * be any supported hash that was requested but not enabled, but there
5237          * can be hashes that were not requested but had to be enabled.
5238          */
5239         extra &= SUPPORTED_RSS_HASHTYPES;
5240         MPASS((extra & hashconfig) == 0);
5241
5242         if (extra) {
5243                 if_printf(ifp,
5244                     "global RSS config (0x%x) cannot be accommodated.\n",
5245                     hashconfig);
5246         }
5247         if (extra & RSS_HASHTYPE_RSS_IPV4)
5248                 if_printf(ifp, "IPv4 2-tuple hashing forced on.\n");
5249         if (extra & RSS_HASHTYPE_RSS_TCP_IPV4)
5250                 if_printf(ifp, "TCP/IPv4 4-tuple hashing forced on.\n");
5251         if (extra & RSS_HASHTYPE_RSS_IPV6)
5252                 if_printf(ifp, "IPv6 2-tuple hashing forced on.\n");
5253         if (extra & RSS_HASHTYPE_RSS_TCP_IPV6)
5254                 if_printf(ifp, "TCP/IPv6 4-tuple hashing forced on.\n");
5255         if (extra & RSS_HASHTYPE_RSS_UDP_IPV4)
5256                 if_printf(ifp, "UDP/IPv4 4-tuple hashing forced on.\n");
5257         if (extra & RSS_HASHTYPE_RSS_UDP_IPV6)
5258                 if_printf(ifp, "UDP/IPv6 4-tuple hashing forced on.\n");
5259 #else
5260         vi->hashen = F_FW_RSS_VI_CONFIG_CMD_IP6FOURTUPEN |
5261             F_FW_RSS_VI_CONFIG_CMD_IP6TWOTUPEN |
5262             F_FW_RSS_VI_CONFIG_CMD_IP4FOURTUPEN |
5263             F_FW_RSS_VI_CONFIG_CMD_IP4TWOTUPEN | F_FW_RSS_VI_CONFIG_CMD_UDPEN;
5264 #endif
5265         rc = -t4_config_vi_rss(sc, sc->mbox, vi->viid, vi->hashen, rss[0], 0, 0);
5266         if (rc != 0) {
5267                 free(rss, M_CXGBE);
5268                 if_printf(ifp, "rss hash/defaultq config failed: %d\n", rc);
5269                 goto done;
5270         }
5271
5272         vi->rss = rss;
5273         vi->flags |= VI_INIT_DONE;
5274 done:
5275         if (rc != 0)
5276                 vi_full_uninit(vi);
5277
5278         return (rc);
5279 }
5280
5281 /*
5282  * Idempotent.
5283  */
5284 int
5285 vi_full_uninit(struct vi_info *vi)
5286 {
5287         struct port_info *pi = vi->pi;
5288         struct adapter *sc = pi->adapter;
5289         int i;
5290         struct sge_rxq *rxq;
5291         struct sge_txq *txq;
5292 #ifdef TCP_OFFLOAD
5293         struct sge_ofld_rxq *ofld_rxq;
5294         struct sge_wrq *ofld_txq;
5295 #endif
5296
5297         if (vi->flags & VI_INIT_DONE) {
5298
5299                 /* Need to quiesce queues.  */
5300
5301                 /* XXX: Only for the first VI? */
5302                 if (IS_MAIN_VI(vi) && !(sc->flags & IS_VF))
5303                         quiesce_wrq(sc, &sc->sge.ctrlq[pi->port_id]);
5304
5305                 for_each_txq(vi, i, txq) {
5306                         quiesce_txq(sc, txq);
5307                 }
5308
5309 #ifdef TCP_OFFLOAD
5310                 for_each_ofld_txq(vi, i, ofld_txq) {
5311                         quiesce_wrq(sc, ofld_txq);
5312                 }
5313 #endif
5314
5315                 for_each_rxq(vi, i, rxq) {
5316                         quiesce_iq(sc, &rxq->iq);
5317                         quiesce_fl(sc, &rxq->fl);
5318                 }
5319
5320 #ifdef TCP_OFFLOAD
5321                 for_each_ofld_rxq(vi, i, ofld_rxq) {
5322                         quiesce_iq(sc, &ofld_rxq->iq);
5323                         quiesce_fl(sc, &ofld_rxq->fl);
5324                 }
5325 #endif
5326                 free(vi->rss, M_CXGBE);
5327                 free(vi->nm_rss, M_CXGBE);
5328         }
5329
5330         t4_teardown_vi_queues(vi);
5331         vi->flags &= ~VI_INIT_DONE;
5332
5333         return (0);
5334 }
5335
5336 static void
5337 quiesce_txq(struct adapter *sc, struct sge_txq *txq)
5338 {
5339         struct sge_eq *eq = &txq->eq;
5340         struct sge_qstat *spg = (void *)&eq->desc[eq->sidx];
5341
5342         (void) sc;      /* unused */
5343
5344 #ifdef INVARIANTS
5345         TXQ_LOCK(txq);
5346         MPASS((eq->flags & EQ_ENABLED) == 0);
5347         TXQ_UNLOCK(txq);
5348 #endif
5349
5350         /* Wait for the mp_ring to empty. */
5351         while (!mp_ring_is_idle(txq->r)) {
5352                 mp_ring_check_drainage(txq->r, 0);
5353                 pause("rquiesce", 1);
5354         }
5355
5356         /* Then wait for the hardware to finish. */
5357         while (spg->cidx != htobe16(eq->pidx))
5358                 pause("equiesce", 1);
5359
5360         /* Finally, wait for the driver to reclaim all descriptors. */
5361         while (eq->cidx != eq->pidx)
5362                 pause("dquiesce", 1);
5363 }
5364
5365 static void
5366 quiesce_wrq(struct adapter *sc, struct sge_wrq *wrq)
5367 {
5368
5369         /* XXXTX */
5370 }
5371
5372 static void
5373 quiesce_iq(struct adapter *sc, struct sge_iq *iq)
5374 {
5375         (void) sc;      /* unused */
5376
5377         /* Synchronize with the interrupt handler */
5378         while (!atomic_cmpset_int(&iq->state, IQS_IDLE, IQS_DISABLED))
5379                 pause("iqfree", 1);
5380 }
5381
5382 static void
5383 quiesce_fl(struct adapter *sc, struct sge_fl *fl)
5384 {
5385         mtx_lock(&sc->sfl_lock);
5386         FL_LOCK(fl);
5387         fl->flags |= FL_DOOMED;
5388         FL_UNLOCK(fl);
5389         callout_stop(&sc->sfl_callout);
5390         mtx_unlock(&sc->sfl_lock);
5391
5392         KASSERT((fl->flags & FL_STARVING) == 0,
5393             ("%s: still starving", __func__));
5394 }
5395
5396 static int
5397 t4_alloc_irq(struct adapter *sc, struct irq *irq, int rid,
5398     driver_intr_t *handler, void *arg, char *name)
5399 {
5400         int rc;
5401
5402         irq->rid = rid;
5403         irq->res = bus_alloc_resource_any(sc->dev, SYS_RES_IRQ, &irq->rid,
5404             RF_SHAREABLE | RF_ACTIVE);
5405         if (irq->res == NULL) {
5406                 device_printf(sc->dev,
5407                     "failed to allocate IRQ for rid %d, name %s.\n", rid, name);
5408                 return (ENOMEM);
5409         }
5410
5411         rc = bus_setup_intr(sc->dev, irq->res, INTR_MPSAFE | INTR_TYPE_NET,
5412             NULL, handler, arg, &irq->tag);
5413         if (rc != 0) {
5414                 device_printf(sc->dev,
5415                     "failed to setup interrupt for rid %d, name %s: %d\n",
5416                     rid, name, rc);
5417         } else if (name)
5418                 bus_describe_intr(sc->dev, irq->res, irq->tag, "%s", name);
5419
5420         return (rc);
5421 }
5422
5423 static int
5424 t4_free_irq(struct adapter *sc, struct irq *irq)
5425 {
5426         if (irq->tag)
5427                 bus_teardown_intr(sc->dev, irq->res, irq->tag);
5428         if (irq->res)
5429                 bus_release_resource(sc->dev, SYS_RES_IRQ, irq->rid, irq->res);
5430
5431         bzero(irq, sizeof(*irq));
5432
5433         return (0);
5434 }
5435
5436 static void
5437 get_regs(struct adapter *sc, struct t4_regdump *regs, uint8_t *buf)
5438 {
5439
5440         regs->version = chip_id(sc) | chip_rev(sc) << 10;
5441         t4_get_regs(sc, buf, regs->len);
5442 }
5443
5444 #define A_PL_INDIR_CMD  0x1f8
5445
5446 #define S_PL_AUTOINC    31
5447 #define M_PL_AUTOINC    0x1U
5448 #define V_PL_AUTOINC(x) ((x) << S_PL_AUTOINC)
5449 #define G_PL_AUTOINC(x) (((x) >> S_PL_AUTOINC) & M_PL_AUTOINC)
5450
5451 #define S_PL_VFID       20
5452 #define M_PL_VFID       0xffU
5453 #define V_PL_VFID(x)    ((x) << S_PL_VFID)
5454 #define G_PL_VFID(x)    (((x) >> S_PL_VFID) & M_PL_VFID)
5455
5456 #define S_PL_ADDR       0
5457 #define M_PL_ADDR       0xfffffU
5458 #define V_PL_ADDR(x)    ((x) << S_PL_ADDR)
5459 #define G_PL_ADDR(x)    (((x) >> S_PL_ADDR) & M_PL_ADDR)
5460
5461 #define A_PL_INDIR_DATA 0x1fc
5462
5463 static uint64_t
5464 read_vf_stat(struct adapter *sc, unsigned int viid, int reg)
5465 {
5466         u32 stats[2];
5467
5468         mtx_assert(&sc->reg_lock, MA_OWNED);
5469         if (sc->flags & IS_VF) {
5470                 stats[0] = t4_read_reg(sc, VF_MPS_REG(reg));
5471                 stats[1] = t4_read_reg(sc, VF_MPS_REG(reg + 4));
5472         } else {
5473                 t4_write_reg(sc, A_PL_INDIR_CMD, V_PL_AUTOINC(1) |
5474                     V_PL_VFID(G_FW_VIID_VIN(viid)) |
5475                     V_PL_ADDR(VF_MPS_REG(reg)));
5476                 stats[0] = t4_read_reg(sc, A_PL_INDIR_DATA);
5477                 stats[1] = t4_read_reg(sc, A_PL_INDIR_DATA);
5478         }
5479         return (((uint64_t)stats[1]) << 32 | stats[0]);
5480 }
5481
5482 static void
5483 t4_get_vi_stats(struct adapter *sc, unsigned int viid,
5484     struct fw_vi_stats_vf *stats)
5485 {
5486
5487 #define GET_STAT(name) \
5488         read_vf_stat(sc, viid, A_MPS_VF_STAT_##name##_L)
5489
5490         stats->tx_bcast_bytes    = GET_STAT(TX_VF_BCAST_BYTES);
5491         stats->tx_bcast_frames   = GET_STAT(TX_VF_BCAST_FRAMES);
5492         stats->tx_mcast_bytes    = GET_STAT(TX_VF_MCAST_BYTES);
5493         stats->tx_mcast_frames   = GET_STAT(TX_VF_MCAST_FRAMES);
5494         stats->tx_ucast_bytes    = GET_STAT(TX_VF_UCAST_BYTES);
5495         stats->tx_ucast_frames   = GET_STAT(TX_VF_UCAST_FRAMES);
5496         stats->tx_drop_frames    = GET_STAT(TX_VF_DROP_FRAMES);
5497         stats->tx_offload_bytes  = GET_STAT(TX_VF_OFFLOAD_BYTES);
5498         stats->tx_offload_frames = GET_STAT(TX_VF_OFFLOAD_FRAMES);
5499         stats->rx_bcast_bytes    = GET_STAT(RX_VF_BCAST_BYTES);
5500         stats->rx_bcast_frames   = GET_STAT(RX_VF_BCAST_FRAMES);
5501         stats->rx_mcast_bytes    = GET_STAT(RX_VF_MCAST_BYTES);
5502         stats->rx_mcast_frames   = GET_STAT(RX_VF_MCAST_FRAMES);
5503         stats->rx_ucast_bytes    = GET_STAT(RX_VF_UCAST_BYTES);
5504         stats->rx_ucast_frames   = GET_STAT(RX_VF_UCAST_FRAMES);
5505         stats->rx_err_frames     = GET_STAT(RX_VF_ERR_FRAMES);
5506
5507 #undef GET_STAT
5508 }
5509
5510 static void
5511 t4_clr_vi_stats(struct adapter *sc, unsigned int viid)
5512 {
5513         int reg;
5514
5515         t4_write_reg(sc, A_PL_INDIR_CMD, V_PL_AUTOINC(1) |
5516             V_PL_VFID(G_FW_VIID_VIN(viid)) |
5517             V_PL_ADDR(VF_MPS_REG(A_MPS_VF_STAT_TX_VF_BCAST_BYTES_L)));
5518         for (reg = A_MPS_VF_STAT_TX_VF_BCAST_BYTES_L;
5519              reg <= A_MPS_VF_STAT_RX_VF_ERR_FRAMES_H; reg += 4)
5520                 t4_write_reg(sc, A_PL_INDIR_DATA, 0);
5521 }
5522
5523 static void
5524 vi_refresh_stats(struct adapter *sc, struct vi_info *vi)
5525 {
5526         struct timeval tv;
5527         const struct timeval interval = {0, 250000};    /* 250ms */
5528
5529         if (!(vi->flags & VI_INIT_DONE))
5530                 return;
5531
5532         getmicrotime(&tv);
5533         timevalsub(&tv, &interval);
5534         if (timevalcmp(&tv, &vi->last_refreshed, <))
5535                 return;
5536
5537         mtx_lock(&sc->reg_lock);
5538         t4_get_vi_stats(sc, vi->viid, &vi->stats);
5539         getmicrotime(&vi->last_refreshed);
5540         mtx_unlock(&sc->reg_lock);
5541 }
5542
5543 static void
5544 cxgbe_refresh_stats(struct adapter *sc, struct port_info *pi)
5545 {
5546         u_int i, v, tnl_cong_drops, bg_map;
5547         struct timeval tv;
5548         const struct timeval interval = {0, 250000};    /* 250ms */
5549
5550         getmicrotime(&tv);
5551         timevalsub(&tv, &interval);
5552         if (timevalcmp(&tv, &pi->last_refreshed, <))
5553                 return;
5554
5555         tnl_cong_drops = 0;
5556         t4_get_port_stats(sc, pi->tx_chan, &pi->stats);
5557         bg_map = pi->mps_bg_map;
5558         while (bg_map) {
5559                 i = ffs(bg_map) - 1;
5560                 mtx_lock(&sc->reg_lock);
5561                 t4_read_indirect(sc, A_TP_MIB_INDEX, A_TP_MIB_DATA, &v, 1,
5562                     A_TP_MIB_TNL_CNG_DROP_0 + i);
5563                 mtx_unlock(&sc->reg_lock);
5564                 tnl_cong_drops += v;
5565                 bg_map &= ~(1 << i);
5566         }
5567         pi->tnl_cong_drops = tnl_cong_drops;
5568         getmicrotime(&pi->last_refreshed);
5569 }
5570
5571 static void
5572 cxgbe_tick(void *arg)
5573 {
5574         struct port_info *pi = arg;
5575         struct adapter *sc = pi->adapter;
5576
5577         PORT_LOCK_ASSERT_OWNED(pi);
5578         cxgbe_refresh_stats(sc, pi);
5579
5580         callout_schedule(&pi->tick, hz);
5581 }
5582
5583 void
5584 vi_tick(void *arg)
5585 {
5586         struct vi_info *vi = arg;
5587         struct adapter *sc = vi->pi->adapter;
5588
5589         vi_refresh_stats(sc, vi);
5590
5591         callout_schedule(&vi->tick, hz);
5592 }
5593
5594 /*
5595  * Should match fw_caps_config_<foo> enums in t4fw_interface.h
5596  */
5597 static char *caps_decoder[] = {
5598         "\20\001IPMI\002NCSI",                          /* 0: NBM */
5599         "\20\001PPP\002QFC\003DCBX",                    /* 1: link */
5600         "\20\001INGRESS\002EGRESS",                     /* 2: switch */
5601         "\20\001NIC\002VM\003IDS\004UM\005UM_ISGL"      /* 3: NIC */
5602             "\006HASHFILTER\007ETHOFLD",
5603         "\20\001TOE",                                   /* 4: TOE */
5604         "\20\001RDDP\002RDMAC",                         /* 5: RDMA */
5605         "\20\001INITIATOR_PDU\002TARGET_PDU"            /* 6: iSCSI */
5606             "\003INITIATOR_CNXOFLD\004TARGET_CNXOFLD"
5607             "\005INITIATOR_SSNOFLD\006TARGET_SSNOFLD"
5608             "\007T10DIF"
5609             "\010INITIATOR_CMDOFLD\011TARGET_CMDOFLD",
5610         "\20\001LOOKASIDE\002TLSKEYS",                  /* 7: Crypto */
5611         "\20\001INITIATOR\002TARGET\003CTRL_OFLD"       /* 8: FCoE */
5612                     "\004PO_INITIATOR\005PO_TARGET",
5613 };
5614
5615 void
5616 t4_sysctls(struct adapter *sc)
5617 {
5618         struct sysctl_ctx_list *ctx;
5619         struct sysctl_oid *oid;
5620         struct sysctl_oid_list *children, *c0;
5621         static char *doorbells = {"\20\1UDB\2WCWR\3UDBWC\4KDB"};
5622
5623         ctx = device_get_sysctl_ctx(sc->dev);
5624
5625         /*
5626          * dev.t4nex.X.
5627          */
5628         oid = device_get_sysctl_tree(sc->dev);
5629         c0 = children = SYSCTL_CHILDREN(oid);
5630
5631         sc->sc_do_rxcopy = 1;
5632         SYSCTL_ADD_INT(ctx, children, OID_AUTO, "do_rx_copy", CTLFLAG_RW,
5633             &sc->sc_do_rxcopy, 1, "Do RX copy of small frames");
5634
5635         SYSCTL_ADD_INT(ctx, children, OID_AUTO, "nports", CTLFLAG_RD, NULL,
5636             sc->params.nports, "# of ports");
5637
5638         SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "doorbells",
5639             CTLTYPE_STRING | CTLFLAG_RD, doorbells, (uintptr_t)&sc->doorbells,
5640             sysctl_bitfield_8b, "A", "available doorbells");
5641
5642         SYSCTL_ADD_INT(ctx, children, OID_AUTO, "core_clock", CTLFLAG_RD, NULL,
5643             sc->params.vpd.cclk, "core clock frequency (in KHz)");
5644
5645         SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "holdoff_timers",
5646             CTLTYPE_STRING | CTLFLAG_RD, sc->params.sge.timer_val,
5647             sizeof(sc->params.sge.timer_val), sysctl_int_array, "A",
5648             "interrupt holdoff timer values (us)");
5649
5650         SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "holdoff_pkt_counts",
5651             CTLTYPE_STRING | CTLFLAG_RD, sc->params.sge.counter_val,
5652             sizeof(sc->params.sge.counter_val), sysctl_int_array, "A",
5653             "interrupt holdoff packet counter values");
5654
5655         t4_sge_sysctls(sc, ctx, children);
5656
5657         sc->lro_timeout = 100;
5658         SYSCTL_ADD_INT(ctx, children, OID_AUTO, "lro_timeout", CTLFLAG_RW,
5659             &sc->lro_timeout, 0, "lro inactive-flush timeout (in us)");
5660
5661         SYSCTL_ADD_INT(ctx, children, OID_AUTO, "dflags", CTLFLAG_RW,
5662             &sc->debug_flags, 0, "flags to enable runtime debugging");
5663
5664         SYSCTL_ADD_STRING(ctx, children, OID_AUTO, "tp_version",
5665             CTLFLAG_RD, sc->tp_version, 0, "TP microcode version");
5666
5667         SYSCTL_ADD_STRING(ctx, children, OID_AUTO, "firmware_version",
5668             CTLFLAG_RD, sc->fw_version, 0, "firmware version");
5669
5670         if (sc->flags & IS_VF)
5671                 return;
5672
5673         SYSCTL_ADD_INT(ctx, children, OID_AUTO, "hw_revision", CTLFLAG_RD,
5674             NULL, chip_rev(sc), "chip hardware revision");
5675
5676         SYSCTL_ADD_STRING(ctx, children, OID_AUTO, "sn",
5677             CTLFLAG_RD, sc->params.vpd.sn, 0, "serial number");
5678
5679         SYSCTL_ADD_STRING(ctx, children, OID_AUTO, "pn",
5680             CTLFLAG_RD, sc->params.vpd.pn, 0, "part number");
5681
5682         SYSCTL_ADD_STRING(ctx, children, OID_AUTO, "ec",
5683             CTLFLAG_RD, sc->params.vpd.ec, 0, "engineering change");
5684
5685         SYSCTL_ADD_STRING(ctx, children, OID_AUTO, "md_version",
5686             CTLFLAG_RD, sc->params.vpd.md, 0, "manufacturing diags version");
5687
5688         SYSCTL_ADD_STRING(ctx, children, OID_AUTO, "na",
5689             CTLFLAG_RD, sc->params.vpd.na, 0, "network address");
5690
5691         SYSCTL_ADD_STRING(ctx, children, OID_AUTO, "er_version", CTLFLAG_RD,
5692             sc->er_version, 0, "expansion ROM version");
5693
5694         SYSCTL_ADD_STRING(ctx, children, OID_AUTO, "bs_version", CTLFLAG_RD,
5695             sc->bs_version, 0, "bootstrap firmware version");
5696
5697         SYSCTL_ADD_UINT(ctx, children, OID_AUTO, "scfg_version", CTLFLAG_RD,
5698             NULL, sc->params.scfg_vers, "serial config version");
5699
5700         SYSCTL_ADD_UINT(ctx, children, OID_AUTO, "vpd_version", CTLFLAG_RD,
5701             NULL, sc->params.vpd_vers, "VPD version");
5702
5703         SYSCTL_ADD_STRING(ctx, children, OID_AUTO, "cf",
5704             CTLFLAG_RD, sc->cfg_file, 0, "configuration file");
5705
5706         SYSCTL_ADD_UINT(ctx, children, OID_AUTO, "cfcsum", CTLFLAG_RD, NULL,
5707             sc->cfcsum, "config file checksum");
5708
5709 #define SYSCTL_CAP(name, n, text) \
5710         SYSCTL_ADD_PROC(ctx, children, OID_AUTO, #name, \
5711             CTLTYPE_STRING | CTLFLAG_RD, caps_decoder[n], (uintptr_t)&sc->name, \
5712             sysctl_bitfield_16b, "A", "available " text " capabilities")
5713
5714         SYSCTL_CAP(nbmcaps, 0, "NBM");
5715         SYSCTL_CAP(linkcaps, 1, "link");
5716         SYSCTL_CAP(switchcaps, 2, "switch");
5717         SYSCTL_CAP(niccaps, 3, "NIC");
5718         SYSCTL_CAP(toecaps, 4, "TCP offload");
5719         SYSCTL_CAP(rdmacaps, 5, "RDMA");
5720         SYSCTL_CAP(iscsicaps, 6, "iSCSI");
5721         SYSCTL_CAP(cryptocaps, 7, "crypto");
5722         SYSCTL_CAP(fcoecaps, 8, "FCoE");
5723 #undef SYSCTL_CAP
5724
5725         SYSCTL_ADD_INT(ctx, children, OID_AUTO, "nfilters", CTLFLAG_RD,
5726             NULL, sc->tids.nftids, "number of filters");
5727
5728         SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "temperature", CTLTYPE_INT |
5729             CTLFLAG_RD, sc, 0, sysctl_temperature, "I",
5730             "chip temperature (in Celsius)");
5731
5732         SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "loadavg", CTLTYPE_STRING |
5733             CTLFLAG_RD, sc, 0, sysctl_loadavg, "A",
5734             "microprocessor load averages (debug firmwares only)");
5735
5736         SYSCTL_ADD_INT(ctx, children, OID_AUTO, "core_vdd", CTLFLAG_RD,
5737             &sc->params.core_vdd, 0, "core Vdd (in mV)");
5738
5739         SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "local_cpus",
5740             CTLTYPE_STRING | CTLFLAG_RD, sc, LOCAL_CPUS,
5741             sysctl_cpus, "A", "local CPUs");
5742
5743         SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "intr_cpus",
5744             CTLTYPE_STRING | CTLFLAG_RD, sc, INTR_CPUS,
5745             sysctl_cpus, "A", "preferred CPUs for interrupts");
5746
5747         /*
5748          * dev.t4nex.X.misc.  Marked CTLFLAG_SKIP to avoid information overload.
5749          */
5750         oid = SYSCTL_ADD_NODE(ctx, c0, OID_AUTO, "misc",
5751             CTLFLAG_RD | CTLFLAG_SKIP, NULL,
5752             "logs and miscellaneous information");
5753         children = SYSCTL_CHILDREN(oid);
5754
5755         SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "cctrl",
5756             CTLTYPE_STRING | CTLFLAG_RD, sc, 0,
5757             sysctl_cctrl, "A", "congestion control");
5758
5759         SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "cim_ibq_tp0",
5760             CTLTYPE_STRING | CTLFLAG_RD, sc, 0,
5761             sysctl_cim_ibq_obq, "A", "CIM IBQ 0 (TP0)");
5762
5763         SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "cim_ibq_tp1",
5764             CTLTYPE_STRING | CTLFLAG_RD, sc, 1,
5765             sysctl_cim_ibq_obq, "A", "CIM IBQ 1 (TP1)");
5766
5767         SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "cim_ibq_ulp",
5768             CTLTYPE_STRING | CTLFLAG_RD, sc, 2,
5769             sysctl_cim_ibq_obq, "A", "CIM IBQ 2 (ULP)");
5770
5771         SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "cim_ibq_sge0",
5772             CTLTYPE_STRING | CTLFLAG_RD, sc, 3,
5773             sysctl_cim_ibq_obq, "A", "CIM IBQ 3 (SGE0)");
5774
5775         SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "cim_ibq_sge1",
5776             CTLTYPE_STRING | CTLFLAG_RD, sc, 4,
5777             sysctl_cim_ibq_obq, "A", "CIM IBQ 4 (SGE1)");
5778
5779         SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "cim_ibq_ncsi",
5780             CTLTYPE_STRING | CTLFLAG_RD, sc, 5,
5781             sysctl_cim_ibq_obq, "A", "CIM IBQ 5 (NCSI)");
5782
5783         SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "cim_la",
5784             CTLTYPE_STRING | CTLFLAG_RD, sc, 0,
5785             chip_id(sc) <= CHELSIO_T5 ? sysctl_cim_la : sysctl_cim_la_t6,
5786             "A", "CIM logic analyzer");
5787
5788         SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "cim_ma_la",
5789             CTLTYPE_STRING | CTLFLAG_RD, sc, 0,
5790             sysctl_cim_ma_la, "A", "CIM MA logic analyzer");
5791
5792         SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "cim_obq_ulp0",
5793             CTLTYPE_STRING | CTLFLAG_RD, sc, 0 + CIM_NUM_IBQ,
5794             sysctl_cim_ibq_obq, "A", "CIM OBQ 0 (ULP0)");
5795
5796         SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "cim_obq_ulp1",
5797             CTLTYPE_STRING | CTLFLAG_RD, sc, 1 + CIM_NUM_IBQ,
5798             sysctl_cim_ibq_obq, "A", "CIM OBQ 1 (ULP1)");
5799
5800         SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "cim_obq_ulp2",
5801             CTLTYPE_STRING | CTLFLAG_RD, sc, 2 + CIM_NUM_IBQ,
5802             sysctl_cim_ibq_obq, "A", "CIM OBQ 2 (ULP2)");
5803
5804         SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "cim_obq_ulp3",
5805             CTLTYPE_STRING | CTLFLAG_RD, sc, 3 + CIM_NUM_IBQ,
5806             sysctl_cim_ibq_obq, "A", "CIM OBQ 3 (ULP3)");
5807
5808         SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "cim_obq_sge",
5809             CTLTYPE_STRING | CTLFLAG_RD, sc, 4 + CIM_NUM_IBQ,
5810             sysctl_cim_ibq_obq, "A", "CIM OBQ 4 (SGE)");
5811
5812         SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "cim_obq_ncsi",
5813             CTLTYPE_STRING | CTLFLAG_RD, sc, 5 + CIM_NUM_IBQ,
5814             sysctl_cim_ibq_obq, "A", "CIM OBQ 5 (NCSI)");
5815
5816         if (chip_id(sc) > CHELSIO_T4) {
5817                 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "cim_obq_sge0_rx",
5818                     CTLTYPE_STRING | CTLFLAG_RD, sc, 6 + CIM_NUM_IBQ,
5819                     sysctl_cim_ibq_obq, "A", "CIM OBQ 6 (SGE0-RX)");
5820
5821                 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "cim_obq_sge1_rx",
5822                     CTLTYPE_STRING | CTLFLAG_RD, sc, 7 + CIM_NUM_IBQ,
5823                     sysctl_cim_ibq_obq, "A", "CIM OBQ 7 (SGE1-RX)");
5824         }
5825
5826         SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "cim_pif_la",
5827             CTLTYPE_STRING | CTLFLAG_RD, sc, 0,
5828             sysctl_cim_pif_la, "A", "CIM PIF logic analyzer");
5829
5830         SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "cim_qcfg",
5831             CTLTYPE_STRING | CTLFLAG_RD, sc, 0,
5832             sysctl_cim_qcfg, "A", "CIM queue configuration");
5833
5834         SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "cpl_stats",
5835             CTLTYPE_STRING | CTLFLAG_RD, sc, 0,
5836             sysctl_cpl_stats, "A", "CPL statistics");
5837
5838         SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "ddp_stats",
5839             CTLTYPE_STRING | CTLFLAG_RD, sc, 0,
5840             sysctl_ddp_stats, "A", "non-TCP DDP statistics");
5841
5842         SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "devlog",
5843             CTLTYPE_STRING | CTLFLAG_RD, sc, 0,
5844             sysctl_devlog, "A", "firmware's device log");
5845
5846         SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "fcoe_stats",
5847             CTLTYPE_STRING | CTLFLAG_RD, sc, 0,
5848             sysctl_fcoe_stats, "A", "FCoE statistics");
5849
5850         SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "hw_sched",
5851             CTLTYPE_STRING | CTLFLAG_RD, sc, 0,
5852             sysctl_hw_sched, "A", "hardware scheduler ");
5853
5854         SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "l2t",
5855             CTLTYPE_STRING | CTLFLAG_RD, sc, 0,
5856             sysctl_l2t, "A", "hardware L2 table");
5857
5858         SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "smt",
5859             CTLTYPE_STRING | CTLFLAG_RD, sc, 0,
5860             sysctl_smt, "A", "hardware source MAC table");
5861
5862         SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "lb_stats",
5863             CTLTYPE_STRING | CTLFLAG_RD, sc, 0,
5864             sysctl_lb_stats, "A", "loopback statistics");
5865
5866         SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "meminfo",
5867             CTLTYPE_STRING | CTLFLAG_RD, sc, 0,
5868             sysctl_meminfo, "A", "memory regions");
5869
5870         SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "mps_tcam",
5871             CTLTYPE_STRING | CTLFLAG_RD, sc, 0,
5872             chip_id(sc) <= CHELSIO_T5 ? sysctl_mps_tcam : sysctl_mps_tcam_t6,
5873             "A", "MPS TCAM entries");
5874
5875         SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "path_mtus",
5876             CTLTYPE_STRING | CTLFLAG_RD, sc, 0,
5877             sysctl_path_mtus, "A", "path MTUs");
5878
5879         SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "pm_stats",
5880             CTLTYPE_STRING | CTLFLAG_RD, sc, 0,
5881             sysctl_pm_stats, "A", "PM statistics");
5882
5883         SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "rdma_stats",
5884             CTLTYPE_STRING | CTLFLAG_RD, sc, 0,
5885             sysctl_rdma_stats, "A", "RDMA statistics");
5886
5887         SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "tcp_stats",
5888             CTLTYPE_STRING | CTLFLAG_RD, sc, 0,
5889             sysctl_tcp_stats, "A", "TCP statistics");
5890
5891         SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "tids",
5892             CTLTYPE_STRING | CTLFLAG_RD, sc, 0,
5893             sysctl_tids, "A", "TID information");
5894
5895         SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "tp_err_stats",
5896             CTLTYPE_STRING | CTLFLAG_RD, sc, 0,
5897             sysctl_tp_err_stats, "A", "TP error statistics");
5898
5899         SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "tp_la_mask",
5900             CTLTYPE_INT | CTLFLAG_RW, sc, 0, sysctl_tp_la_mask, "I",
5901             "TP logic analyzer event capture mask");
5902
5903         SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "tp_la",
5904             CTLTYPE_STRING | CTLFLAG_RD, sc, 0,
5905             sysctl_tp_la, "A", "TP logic analyzer");
5906
5907         SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "tx_rate",
5908             CTLTYPE_STRING | CTLFLAG_RD, sc, 0,
5909             sysctl_tx_rate, "A", "Tx rate");
5910
5911         SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "ulprx_la",
5912             CTLTYPE_STRING | CTLFLAG_RD, sc, 0,
5913             sysctl_ulprx_la, "A", "ULPRX logic analyzer");
5914
5915         if (chip_id(sc) >= CHELSIO_T5) {
5916                 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "wcwr_stats",
5917                     CTLTYPE_STRING | CTLFLAG_RD, sc, 0,
5918                     sysctl_wcwr_stats, "A", "write combined work requests");
5919         }
5920
5921 #ifdef TCP_OFFLOAD
5922         if (is_offload(sc)) {
5923                 int i;
5924                 char s[4];
5925
5926                 /*
5927                  * dev.t4nex.X.toe.
5928                  */
5929                 oid = SYSCTL_ADD_NODE(ctx, c0, OID_AUTO, "toe", CTLFLAG_RD,
5930                     NULL, "TOE parameters");
5931                 children = SYSCTL_CHILDREN(oid);
5932
5933                 sc->tt.cong_algorithm = -1;
5934                 SYSCTL_ADD_INT(ctx, children, OID_AUTO, "cong_algorithm",
5935                     CTLFLAG_RW, &sc->tt.cong_algorithm, 0, "congestion control "
5936                     "(-1 = default, 0 = reno, 1 = tahoe, 2 = newreno, "
5937                     "3 = highspeed)");
5938
5939                 sc->tt.sndbuf = 256 * 1024;
5940                 SYSCTL_ADD_INT(ctx, children, OID_AUTO, "sndbuf", CTLFLAG_RW,
5941                     &sc->tt.sndbuf, 0, "max hardware send buffer size");
5942
5943                 sc->tt.ddp = 0;
5944                 SYSCTL_ADD_INT(ctx, children, OID_AUTO, "ddp", CTLFLAG_RW,
5945                     &sc->tt.ddp, 0, "DDP allowed");
5946
5947                 sc->tt.rx_coalesce = 1;
5948                 SYSCTL_ADD_INT(ctx, children, OID_AUTO, "rx_coalesce",
5949                     CTLFLAG_RW, &sc->tt.rx_coalesce, 0, "receive coalescing");
5950
5951                 sc->tt.tls = 0;
5952                 SYSCTL_ADD_INT(ctx, children, OID_AUTO, "tls", CTLFLAG_RW,
5953                     &sc->tt.tls, 0, "Inline TLS allowed");
5954
5955                 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "tls_rx_ports",
5956                     CTLTYPE_INT | CTLFLAG_RW, sc, 0, sysctl_tls_rx_ports,
5957                     "I", "TCP ports that use inline TLS+TOE RX");
5958
5959                 sc->tt.tx_align = 1;
5960                 SYSCTL_ADD_INT(ctx, children, OID_AUTO, "tx_align",
5961                     CTLFLAG_RW, &sc->tt.tx_align, 0, "chop and align payload");
5962
5963                 sc->tt.tx_zcopy = 0;
5964                 SYSCTL_ADD_INT(ctx, children, OID_AUTO, "tx_zcopy",
5965                     CTLFLAG_RW, &sc->tt.tx_zcopy, 0,
5966                     "Enable zero-copy aio_write(2)");
5967
5968                 sc->tt.cop_managed_offloading = !!t4_cop_managed_offloading;
5969                 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
5970                     "cop_managed_offloading", CTLFLAG_RW,
5971                     &sc->tt.cop_managed_offloading, 0,
5972                     "COP (Connection Offload Policy) controls all TOE offload");
5973
5974                 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "timer_tick",
5975                     CTLTYPE_STRING | CTLFLAG_RD, sc, 0, sysctl_tp_tick, "A",
5976                     "TP timer tick (us)");
5977
5978                 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "timestamp_tick",
5979                     CTLTYPE_STRING | CTLFLAG_RD, sc, 1, sysctl_tp_tick, "A",
5980                     "TCP timestamp tick (us)");
5981
5982                 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "dack_tick",
5983                     CTLTYPE_STRING | CTLFLAG_RD, sc, 2, sysctl_tp_tick, "A",
5984                     "DACK tick (us)");
5985
5986                 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "dack_timer",
5987                     CTLTYPE_UINT | CTLFLAG_RD, sc, 0, sysctl_tp_dack_timer,
5988                     "IU", "DACK timer (us)");
5989
5990                 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "rexmt_min",
5991                     CTLTYPE_ULONG | CTLFLAG_RD, sc, A_TP_RXT_MIN,
5992                     sysctl_tp_timer, "LU", "Minimum retransmit interval (us)");
5993
5994                 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "rexmt_max",
5995                     CTLTYPE_ULONG | CTLFLAG_RD, sc, A_TP_RXT_MAX,
5996                     sysctl_tp_timer, "LU", "Maximum retransmit interval (us)");
5997
5998                 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "persist_min",
5999                     CTLTYPE_ULONG | CTLFLAG_RD, sc, A_TP_PERS_MIN,
6000                     sysctl_tp_timer, "LU", "Persist timer min (us)");
6001
6002                 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "persist_max",
6003                     CTLTYPE_ULONG | CTLFLAG_RD, sc, A_TP_PERS_MAX,
6004                     sysctl_tp_timer, "LU", "Persist timer max (us)");
6005
6006                 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "keepalive_idle",
6007                     CTLTYPE_ULONG | CTLFLAG_RD, sc, A_TP_KEEP_IDLE,
6008                     sysctl_tp_timer, "LU", "Keepalive idle timer (us)");
6009
6010                 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "keepalive_interval",
6011                     CTLTYPE_ULONG | CTLFLAG_RD, sc, A_TP_KEEP_INTVL,
6012                     sysctl_tp_timer, "LU", "Keepalive interval timer (us)");
6013
6014                 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "initial_srtt",
6015                     CTLTYPE_ULONG | CTLFLAG_RD, sc, A_TP_INIT_SRTT,
6016                     sysctl_tp_timer, "LU", "Initial SRTT (us)");
6017
6018                 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "finwait2_timer",
6019                     CTLTYPE_ULONG | CTLFLAG_RD, sc, A_TP_FINWAIT2_TIMER,
6020                     sysctl_tp_timer, "LU", "FINWAIT2 timer (us)");
6021
6022                 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "syn_rexmt_count",
6023                     CTLTYPE_UINT | CTLFLAG_RD, sc, S_SYNSHIFTMAX,
6024                     sysctl_tp_shift_cnt, "IU",
6025                     "Number of SYN retransmissions before abort");
6026
6027                 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "rexmt_count",
6028                     CTLTYPE_UINT | CTLFLAG_RD, sc, S_RXTSHIFTMAXR2,
6029                     sysctl_tp_shift_cnt, "IU",
6030                     "Number of retransmissions before abort");
6031
6032                 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "keepalive_count",
6033                     CTLTYPE_UINT | CTLFLAG_RD, sc, S_KEEPALIVEMAXR2,
6034                     sysctl_tp_shift_cnt, "IU",
6035                     "Number of keepalive probes before abort");
6036
6037                 oid = SYSCTL_ADD_NODE(ctx, children, OID_AUTO, "rexmt_backoff",
6038                     CTLFLAG_RD, NULL, "TOE retransmit backoffs");
6039                 children = SYSCTL_CHILDREN(oid);
6040                 for (i = 0; i < 16; i++) {
6041                         snprintf(s, sizeof(s), "%u", i);
6042                         SYSCTL_ADD_PROC(ctx, children, OID_AUTO, s,
6043                             CTLTYPE_UINT | CTLFLAG_RD, sc, i, sysctl_tp_backoff,
6044                             "IU", "TOE retransmit backoff");
6045                 }
6046         }
6047 #endif
6048 }
6049
6050 void
6051 vi_sysctls(struct vi_info *vi)
6052 {
6053         struct sysctl_ctx_list *ctx;
6054         struct sysctl_oid *oid;
6055         struct sysctl_oid_list *children;
6056
6057         ctx = device_get_sysctl_ctx(vi->dev);
6058
6059         /*
6060          * dev.v?(cxgbe|cxl).X.
6061          */
6062         oid = device_get_sysctl_tree(vi->dev);
6063         children = SYSCTL_CHILDREN(oid);
6064
6065         SYSCTL_ADD_UINT(ctx, children, OID_AUTO, "viid", CTLFLAG_RD, NULL,
6066             vi->viid, "VI identifer");
6067         SYSCTL_ADD_INT(ctx, children, OID_AUTO, "nrxq", CTLFLAG_RD,
6068             &vi->nrxq, 0, "# of rx queues");
6069         SYSCTL_ADD_INT(ctx, children, OID_AUTO, "ntxq", CTLFLAG_RD,
6070             &vi->ntxq, 0, "# of tx queues");
6071         SYSCTL_ADD_INT(ctx, children, OID_AUTO, "first_rxq", CTLFLAG_RD,
6072             &vi->first_rxq, 0, "index of first rx queue");
6073         SYSCTL_ADD_INT(ctx, children, OID_AUTO, "first_txq", CTLFLAG_RD,
6074             &vi->first_txq, 0, "index of first tx queue");
6075         SYSCTL_ADD_UINT(ctx, children, OID_AUTO, "rss_base", CTLFLAG_RD, NULL,
6076             vi->rss_base, "start of RSS indirection table");
6077         SYSCTL_ADD_UINT(ctx, children, OID_AUTO, "rss_size", CTLFLAG_RD, NULL,
6078             vi->rss_size, "size of RSS indirection table");
6079
6080         if (IS_MAIN_VI(vi)) {
6081                 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "rsrv_noflowq",
6082                     CTLTYPE_INT | CTLFLAG_RW, vi, 0, sysctl_noflowq, "IU",
6083                     "Reserve queue 0 for non-flowid packets");
6084         }
6085
6086 #ifdef TCP_OFFLOAD
6087         if (vi->nofldrxq != 0) {
6088                 SYSCTL_ADD_INT(ctx, children, OID_AUTO, "nofldrxq", CTLFLAG_RD,
6089                     &vi->nofldrxq, 0,
6090                     "# of rx queues for offloaded TCP connections");
6091                 SYSCTL_ADD_INT(ctx, children, OID_AUTO, "nofldtxq", CTLFLAG_RD,
6092                     &vi->nofldtxq, 0,
6093                     "# of tx queues for offloaded TCP connections");
6094                 SYSCTL_ADD_INT(ctx, children, OID_AUTO, "first_ofld_rxq",
6095                     CTLFLAG_RD, &vi->first_ofld_rxq, 0,
6096                     "index of first TOE rx queue");
6097                 SYSCTL_ADD_INT(ctx, children, OID_AUTO, "first_ofld_txq",
6098                     CTLFLAG_RD, &vi->first_ofld_txq, 0,
6099                     "index of first TOE tx queue");
6100                 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "holdoff_tmr_idx_ofld",
6101                     CTLTYPE_INT | CTLFLAG_RW, vi, 0,
6102                     sysctl_holdoff_tmr_idx_ofld, "I",
6103                     "holdoff timer index for TOE queues");
6104                 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "holdoff_pktc_idx_ofld",
6105                     CTLTYPE_INT | CTLFLAG_RW, vi, 0,
6106                     sysctl_holdoff_pktc_idx_ofld, "I",
6107                     "holdoff packet counter index for TOE queues");
6108         }
6109 #endif
6110 #ifdef DEV_NETMAP
6111         if (vi->nnmrxq != 0) {
6112                 SYSCTL_ADD_INT(ctx, children, OID_AUTO, "nnmrxq", CTLFLAG_RD,
6113                     &vi->nnmrxq, 0, "# of netmap rx queues");
6114                 SYSCTL_ADD_INT(ctx, children, OID_AUTO, "nnmtxq", CTLFLAG_RD,
6115                     &vi->nnmtxq, 0, "# of netmap tx queues");
6116                 SYSCTL_ADD_INT(ctx, children, OID_AUTO, "first_nm_rxq",
6117                     CTLFLAG_RD, &vi->first_nm_rxq, 0,
6118                     "index of first netmap rx queue");
6119                 SYSCTL_ADD_INT(ctx, children, OID_AUTO, "first_nm_txq",
6120                     CTLFLAG_RD, &vi->first_nm_txq, 0,
6121                     "index of first netmap tx queue");
6122         }
6123 #endif
6124
6125         SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "holdoff_tmr_idx",
6126             CTLTYPE_INT | CTLFLAG_RW, vi, 0, sysctl_holdoff_tmr_idx, "I",
6127             "holdoff timer index");
6128         SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "holdoff_pktc_idx",
6129             CTLTYPE_INT | CTLFLAG_RW, vi, 0, sysctl_holdoff_pktc_idx, "I",
6130             "holdoff packet counter index");
6131
6132         SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "qsize_rxq",
6133             CTLTYPE_INT | CTLFLAG_RW, vi, 0, sysctl_qsize_rxq, "I",
6134             "rx queue size");
6135         SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "qsize_txq",
6136             CTLTYPE_INT | CTLFLAG_RW, vi, 0, sysctl_qsize_txq, "I",
6137             "tx queue size");
6138 }
6139
6140 static void
6141 cxgbe_sysctls(struct port_info *pi)
6142 {
6143         struct sysctl_ctx_list *ctx;
6144         struct sysctl_oid *oid;
6145         struct sysctl_oid_list *children, *children2;
6146         struct adapter *sc = pi->adapter;
6147         int i;
6148         char name[16];
6149         static char *tc_flags = {"\20\1USER\2SYNC\3ASYNC\4ERR"};
6150
6151         ctx = device_get_sysctl_ctx(pi->dev);
6152
6153         /*
6154          * dev.cxgbe.X.
6155          */
6156         oid = device_get_sysctl_tree(pi->dev);
6157         children = SYSCTL_CHILDREN(oid);
6158
6159         SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "linkdnrc", CTLTYPE_STRING |
6160            CTLFLAG_RD, pi, 0, sysctl_linkdnrc, "A", "reason why link is down");
6161         if (pi->port_type == FW_PORT_TYPE_BT_XAUI) {
6162                 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "temperature",
6163                     CTLTYPE_INT | CTLFLAG_RD, pi, 0, sysctl_btphy, "I",
6164                     "PHY temperature (in Celsius)");
6165                 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "fw_version",
6166                     CTLTYPE_INT | CTLFLAG_RD, pi, 1, sysctl_btphy, "I",
6167                     "PHY firmware version");
6168         }
6169
6170         SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "pause_settings",
6171             CTLTYPE_STRING | CTLFLAG_RW, pi, 0, sysctl_pause_settings, "A",
6172             "PAUSE settings (bit 0 = rx_pause, bit 1 = tx_pause)");
6173         SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "fec",
6174             CTLTYPE_STRING | CTLFLAG_RW, pi, 0, sysctl_fec, "A",
6175             "Forward Error Correction (bit 0 = RS, bit 1 = BASER_RS)");
6176         SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "autoneg",
6177             CTLTYPE_INT | CTLFLAG_RW, pi, 0, sysctl_autoneg, "I",
6178             "autonegotiation (-1 = not supported)");
6179
6180         SYSCTL_ADD_INT(ctx, children, OID_AUTO, "max_speed", CTLFLAG_RD, NULL,
6181             port_top_speed(pi), "max speed (in Gbps)");
6182         SYSCTL_ADD_INT(ctx, children, OID_AUTO, "mps_bg_map", CTLFLAG_RD, NULL,
6183             pi->mps_bg_map, "MPS buffer group map");
6184         SYSCTL_ADD_INT(ctx, children, OID_AUTO, "rx_e_chan_map", CTLFLAG_RD,
6185             NULL, pi->rx_e_chan_map, "TP rx e-channel map");
6186
6187         if (sc->flags & IS_VF)
6188                 return;
6189
6190         /*
6191          * dev.(cxgbe|cxl).X.tc.
6192          */
6193         oid = SYSCTL_ADD_NODE(ctx, children, OID_AUTO, "tc", CTLFLAG_RD, NULL,
6194             "Tx scheduler traffic classes (cl_rl)");
6195         children2 = SYSCTL_CHILDREN(oid);
6196         SYSCTL_ADD_UINT(ctx, children2, OID_AUTO, "pktsize",
6197             CTLFLAG_RW, &pi->sched_params->pktsize, 0,
6198             "pktsize for per-flow cl-rl (0 means up to the driver )");
6199         SYSCTL_ADD_UINT(ctx, children2, OID_AUTO, "burstsize",
6200             CTLFLAG_RW, &pi->sched_params->burstsize, 0,
6201             "burstsize for per-flow cl-rl (0 means up to the driver)");
6202         for (i = 0; i < sc->chip_params->nsched_cls; i++) {
6203                 struct tx_cl_rl_params *tc = &pi->sched_params->cl_rl[i];
6204
6205                 snprintf(name, sizeof(name), "%d", i);
6206                 children2 = SYSCTL_CHILDREN(SYSCTL_ADD_NODE(ctx,
6207                     SYSCTL_CHILDREN(oid), OID_AUTO, name, CTLFLAG_RD, NULL,
6208                     "traffic class"));
6209                 SYSCTL_ADD_PROC(ctx, children2, OID_AUTO, "flags",
6210                     CTLTYPE_STRING | CTLFLAG_RD, tc_flags, (uintptr_t)&tc->flags,
6211                     sysctl_bitfield_8b, "A", "flags");
6212                 SYSCTL_ADD_UINT(ctx, children2, OID_AUTO, "refcount",
6213                     CTLFLAG_RD, &tc->refcount, 0, "references to this class");
6214                 SYSCTL_ADD_PROC(ctx, children2, OID_AUTO, "params",
6215                     CTLTYPE_STRING | CTLFLAG_RD, sc, (pi->port_id << 16) | i,
6216                     sysctl_tc_params, "A", "traffic class parameters");
6217         }
6218
6219         /*
6220          * dev.cxgbe.X.stats.
6221          */
6222         oid = SYSCTL_ADD_NODE(ctx, children, OID_AUTO, "stats", CTLFLAG_RD,
6223             NULL, "port statistics");
6224         children = SYSCTL_CHILDREN(oid);
6225         SYSCTL_ADD_UINT(ctx, children, OID_AUTO, "tx_parse_error", CTLFLAG_RD,
6226             &pi->tx_parse_error, 0,
6227             "# of tx packets with invalid length or # of segments");
6228
6229 #define SYSCTL_ADD_T4_REG64(pi, name, desc, reg) \
6230         SYSCTL_ADD_OID(ctx, children, OID_AUTO, name, \
6231             CTLTYPE_U64 | CTLFLAG_RD, sc, reg, \
6232             sysctl_handle_t4_reg64, "QU", desc)
6233
6234         SYSCTL_ADD_T4_REG64(pi, "tx_octets", "# of octets in good frames",
6235             PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_TX_PORT_BYTES_L));
6236         SYSCTL_ADD_T4_REG64(pi, "tx_frames", "total # of good frames",
6237             PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_TX_PORT_FRAMES_L));
6238         SYSCTL_ADD_T4_REG64(pi, "tx_bcast_frames", "# of broadcast frames",
6239             PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_TX_PORT_BCAST_L));
6240         SYSCTL_ADD_T4_REG64(pi, "tx_mcast_frames", "# of multicast frames",
6241             PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_TX_PORT_MCAST_L));
6242         SYSCTL_ADD_T4_REG64(pi, "tx_ucast_frames", "# of unicast frames",
6243             PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_TX_PORT_UCAST_L));
6244         SYSCTL_ADD_T4_REG64(pi, "tx_error_frames", "# of error frames",
6245             PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_TX_PORT_ERROR_L));
6246         SYSCTL_ADD_T4_REG64(pi, "tx_frames_64",
6247             "# of tx frames in this range",
6248             PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_TX_PORT_64B_L));
6249         SYSCTL_ADD_T4_REG64(pi, "tx_frames_65_127",
6250             "# of tx frames in this range",
6251             PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_TX_PORT_65B_127B_L));
6252         SYSCTL_ADD_T4_REG64(pi, "tx_frames_128_255",
6253             "# of tx frames in this range",
6254             PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_TX_PORT_128B_255B_L));
6255         SYSCTL_ADD_T4_REG64(pi, "tx_frames_256_511",
6256             "# of tx frames in this range",
6257             PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_TX_PORT_256B_511B_L));
6258         SYSCTL_ADD_T4_REG64(pi, "tx_frames_512_1023",
6259             "# of tx frames in this range",
6260             PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_TX_PORT_512B_1023B_L));
6261         SYSCTL_ADD_T4_REG64(pi, "tx_frames_1024_1518",
6262             "# of tx frames in this range",
6263             PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_TX_PORT_1024B_1518B_L));
6264         SYSCTL_ADD_T4_REG64(pi, "tx_frames_1519_max",
6265             "# of tx frames in this range",
6266             PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_TX_PORT_1519B_MAX_L));
6267         SYSCTL_ADD_T4_REG64(pi, "tx_drop", "# of dropped tx frames",
6268             PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_TX_PORT_DROP_L));
6269         SYSCTL_ADD_T4_REG64(pi, "tx_pause", "# of pause frames transmitted",
6270             PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_TX_PORT_PAUSE_L));
6271         SYSCTL_ADD_T4_REG64(pi, "tx_ppp0", "# of PPP prio 0 frames transmitted",
6272             PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_TX_PORT_PPP0_L));
6273         SYSCTL_ADD_T4_REG64(pi, "tx_ppp1", "# of PPP prio 1 frames transmitted",
6274             PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_TX_PORT_PPP1_L));
6275         SYSCTL_ADD_T4_REG64(pi, "tx_ppp2", "# of PPP prio 2 frames transmitted",
6276             PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_TX_PORT_PPP2_L));
6277         SYSCTL_ADD_T4_REG64(pi, "tx_ppp3", "# of PPP prio 3 frames transmitted",
6278             PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_TX_PORT_PPP3_L));
6279         SYSCTL_ADD_T4_REG64(pi, "tx_ppp4", "# of PPP prio 4 frames transmitted",
6280             PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_TX_PORT_PPP4_L));
6281         SYSCTL_ADD_T4_REG64(pi, "tx_ppp5", "# of PPP prio 5 frames transmitted",
6282             PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_TX_PORT_PPP5_L));
6283         SYSCTL_ADD_T4_REG64(pi, "tx_ppp6", "# of PPP prio 6 frames transmitted",
6284             PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_TX_PORT_PPP6_L));
6285         SYSCTL_ADD_T4_REG64(pi, "tx_ppp7", "# of PPP prio 7 frames transmitted",
6286             PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_TX_PORT_PPP7_L));
6287
6288         SYSCTL_ADD_T4_REG64(pi, "rx_octets", "# of octets in good frames",
6289             PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_RX_PORT_BYTES_L));
6290         SYSCTL_ADD_T4_REG64(pi, "rx_frames", "total # of good frames",
6291             PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_RX_PORT_FRAMES_L));
6292         SYSCTL_ADD_T4_REG64(pi, "rx_bcast_frames", "# of broadcast frames",
6293             PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_RX_PORT_BCAST_L));
6294         SYSCTL_ADD_T4_REG64(pi, "rx_mcast_frames", "# of multicast frames",
6295             PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_RX_PORT_MCAST_L));
6296         SYSCTL_ADD_T4_REG64(pi, "rx_ucast_frames", "# of unicast frames",
6297             PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_RX_PORT_UCAST_L));
6298         SYSCTL_ADD_T4_REG64(pi, "rx_too_long", "# of frames exceeding MTU",
6299             PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_RX_PORT_MTU_ERROR_L));
6300         SYSCTL_ADD_T4_REG64(pi, "rx_jabber", "# of jabber frames",
6301             PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_RX_PORT_MTU_CRC_ERROR_L));
6302         SYSCTL_ADD_T4_REG64(pi, "rx_fcs_err",
6303             "# of frames received with bad FCS",
6304             PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_RX_PORT_CRC_ERROR_L));
6305         SYSCTL_ADD_T4_REG64(pi, "rx_len_err",
6306             "# of frames received with length error",
6307             PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_RX_PORT_LEN_ERROR_L));
6308         SYSCTL_ADD_T4_REG64(pi, "rx_symbol_err", "symbol errors",
6309             PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_RX_PORT_SYM_ERROR_L));
6310         SYSCTL_ADD_T4_REG64(pi, "rx_runt", "# of short frames received",
6311             PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_RX_PORT_LESS_64B_L));
6312         SYSCTL_ADD_T4_REG64(pi, "rx_frames_64",
6313             "# of rx frames in this range",
6314             PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_RX_PORT_64B_L));
6315         SYSCTL_ADD_T4_REG64(pi, "rx_frames_65_127",
6316             "# of rx frames in this range",
6317             PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_RX_PORT_65B_127B_L));
6318         SYSCTL_ADD_T4_REG64(pi, "rx_frames_128_255",
6319             "# of rx frames in this range",
6320             PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_RX_PORT_128B_255B_L));
6321         SYSCTL_ADD_T4_REG64(pi, "rx_frames_256_511",
6322             "# of rx frames in this range",
6323             PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_RX_PORT_256B_511B_L));
6324         SYSCTL_ADD_T4_REG64(pi, "rx_frames_512_1023",
6325             "# of rx frames in this range",
6326             PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_RX_PORT_512B_1023B_L));
6327         SYSCTL_ADD_T4_REG64(pi, "rx_frames_1024_1518",
6328             "# of rx frames in this range",
6329             PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_RX_PORT_1024B_1518B_L));
6330         SYSCTL_ADD_T4_REG64(pi, "rx_frames_1519_max",
6331             "# of rx frames in this range",
6332             PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_RX_PORT_1519B_MAX_L));
6333         SYSCTL_ADD_T4_REG64(pi, "rx_pause", "# of pause frames received",
6334             PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_RX_PORT_PAUSE_L));
6335         SYSCTL_ADD_T4_REG64(pi, "rx_ppp0", "# of PPP prio 0 frames received",
6336             PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_RX_PORT_PPP0_L));
6337         SYSCTL_ADD_T4_REG64(pi, "rx_ppp1", "# of PPP prio 1 frames received",
6338             PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_RX_PORT_PPP1_L));
6339         SYSCTL_ADD_T4_REG64(pi, "rx_ppp2", "# of PPP prio 2 frames received",
6340             PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_RX_PORT_PPP2_L));
6341         SYSCTL_ADD_T4_REG64(pi, "rx_ppp3", "# of PPP prio 3 frames received",
6342             PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_RX_PORT_PPP3_L));
6343         SYSCTL_ADD_T4_REG64(pi, "rx_ppp4", "# of PPP prio 4 frames received",
6344             PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_RX_PORT_PPP4_L));
6345         SYSCTL_ADD_T4_REG64(pi, "rx_ppp5", "# of PPP prio 5 frames received",
6346             PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_RX_PORT_PPP5_L));
6347         SYSCTL_ADD_T4_REG64(pi, "rx_ppp6", "# of PPP prio 6 frames received",
6348             PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_RX_PORT_PPP6_L));
6349         SYSCTL_ADD_T4_REG64(pi, "rx_ppp7", "# of PPP prio 7 frames received",
6350             PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_RX_PORT_PPP7_L));
6351
6352 #undef SYSCTL_ADD_T4_REG64
6353
6354 #define SYSCTL_ADD_T4_PORTSTAT(name, desc) \
6355         SYSCTL_ADD_UQUAD(ctx, children, OID_AUTO, #name, CTLFLAG_RD, \
6356             &pi->stats.name, desc)
6357
6358         /* We get these from port_stats and they may be stale by up to 1s */
6359         SYSCTL_ADD_T4_PORTSTAT(rx_ovflow0,
6360             "# drops due to buffer-group 0 overflows");
6361         SYSCTL_ADD_T4_PORTSTAT(rx_ovflow1,
6362             "# drops due to buffer-group 1 overflows");
6363         SYSCTL_ADD_T4_PORTSTAT(rx_ovflow2,
6364             "# drops due to buffer-group 2 overflows");
6365         SYSCTL_ADD_T4_PORTSTAT(rx_ovflow3,
6366             "# drops due to buffer-group 3 overflows");
6367         SYSCTL_ADD_T4_PORTSTAT(rx_trunc0,
6368             "# of buffer-group 0 truncated packets");
6369         SYSCTL_ADD_T4_PORTSTAT(rx_trunc1,
6370             "# of buffer-group 1 truncated packets");
6371         SYSCTL_ADD_T4_PORTSTAT(rx_trunc2,
6372             "# of buffer-group 2 truncated packets");
6373         SYSCTL_ADD_T4_PORTSTAT(rx_trunc3,
6374             "# of buffer-group 3 truncated packets");
6375
6376 #undef SYSCTL_ADD_T4_PORTSTAT
6377
6378         SYSCTL_ADD_ULONG(ctx, children, OID_AUTO, "tx_tls_records",
6379             CTLFLAG_RD, &pi->tx_tls_records,
6380             "# of TLS records transmitted");
6381         SYSCTL_ADD_ULONG(ctx, children, OID_AUTO, "tx_tls_octets",
6382             CTLFLAG_RD, &pi->tx_tls_octets,
6383             "# of payload octets in transmitted TLS records");
6384         SYSCTL_ADD_ULONG(ctx, children, OID_AUTO, "rx_tls_records",
6385             CTLFLAG_RD, &pi->rx_tls_records,
6386             "# of TLS records received");
6387         SYSCTL_ADD_ULONG(ctx, children, OID_AUTO, "rx_tls_octets",
6388             CTLFLAG_RD, &pi->rx_tls_octets,
6389             "# of payload octets in received TLS records");
6390 }
6391
6392 static int
6393 sysctl_int_array(SYSCTL_HANDLER_ARGS)
6394 {
6395         int rc, *i, space = 0;
6396         struct sbuf sb;
6397
6398         sbuf_new_for_sysctl(&sb, NULL, 64, req);
6399         for (i = arg1; arg2; arg2 -= sizeof(int), i++) {
6400                 if (space)
6401                         sbuf_printf(&sb, " ");
6402                 sbuf_printf(&sb, "%d", *i);
6403                 space = 1;
6404         }
6405         rc = sbuf_finish(&sb);
6406         sbuf_delete(&sb);
6407         return (rc);
6408 }
6409
6410 static int
6411 sysctl_bitfield_8b(SYSCTL_HANDLER_ARGS)
6412 {
6413         int rc;
6414         struct sbuf *sb;
6415
6416         rc = sysctl_wire_old_buffer(req, 0);
6417         if (rc != 0)
6418                 return(rc);
6419
6420         sb = sbuf_new_for_sysctl(NULL, NULL, 128, req);
6421         if (sb == NULL)
6422                 return (ENOMEM);
6423
6424         sbuf_printf(sb, "%b", *(uint8_t *)(uintptr_t)arg2, (char *)arg1);
6425         rc = sbuf_finish(sb);
6426         sbuf_delete(sb);
6427
6428         return (rc);
6429 }
6430
6431 static int
6432 sysctl_bitfield_16b(SYSCTL_HANDLER_ARGS)
6433 {
6434         int rc;
6435         struct sbuf *sb;
6436
6437         rc = sysctl_wire_old_buffer(req, 0);
6438         if (rc != 0)
6439                 return(rc);
6440
6441         sb = sbuf_new_for_sysctl(NULL, NULL, 128, req);
6442         if (sb == NULL)
6443                 return (ENOMEM);
6444
6445         sbuf_printf(sb, "%b", *(uint16_t *)(uintptr_t)arg2, (char *)arg1);
6446         rc = sbuf_finish(sb);
6447         sbuf_delete(sb);
6448
6449         return (rc);
6450 }
6451
6452 static int
6453 sysctl_btphy(SYSCTL_HANDLER_ARGS)
6454 {
6455         struct port_info *pi = arg1;
6456         int op = arg2;
6457         struct adapter *sc = pi->adapter;
6458         u_int v;
6459         int rc;
6460
6461         rc = begin_synchronized_op(sc, &pi->vi[0], SLEEP_OK | INTR_OK, "t4btt");
6462         if (rc)
6463                 return (rc);
6464         /* XXX: magic numbers */
6465         rc = -t4_mdio_rd(sc, sc->mbox, pi->mdio_addr, 0x1e, op ? 0x20 : 0xc820,
6466             &v);
6467         end_synchronized_op(sc, 0);
6468         if (rc)
6469                 return (rc);
6470         if (op == 0)
6471                 v /= 256;
6472
6473         rc = sysctl_handle_int(oidp, &v, 0, req);
6474         return (rc);
6475 }
6476
6477 static int
6478 sysctl_noflowq(SYSCTL_HANDLER_ARGS)
6479 {
6480         struct vi_info *vi = arg1;
6481         int rc, val;
6482
6483         val = vi->rsrv_noflowq;
6484         rc = sysctl_handle_int(oidp, &val, 0, req);
6485         if (rc != 0 || req->newptr == NULL)
6486                 return (rc);
6487
6488         if ((val >= 1) && (vi->ntxq > 1))
6489                 vi->rsrv_noflowq = 1;
6490         else
6491                 vi->rsrv_noflowq = 0;
6492
6493         return (rc);
6494 }
6495
6496 static int
6497 sysctl_holdoff_tmr_idx(SYSCTL_HANDLER_ARGS)
6498 {
6499         struct vi_info *vi = arg1;
6500         struct adapter *sc = vi->pi->adapter;
6501         int idx, rc, i;
6502         struct sge_rxq *rxq;
6503         uint8_t v;
6504
6505         idx = vi->tmr_idx;
6506
6507         rc = sysctl_handle_int(oidp, &idx, 0, req);
6508         if (rc != 0 || req->newptr == NULL)
6509                 return (rc);
6510
6511         if (idx < 0 || idx >= SGE_NTIMERS)
6512                 return (EINVAL);
6513
6514         rc = begin_synchronized_op(sc, vi, HOLD_LOCK | SLEEP_OK | INTR_OK,
6515             "t4tmr");
6516         if (rc)
6517                 return (rc);
6518
6519         v = V_QINTR_TIMER_IDX(idx) | V_QINTR_CNT_EN(vi->pktc_idx != -1);
6520         for_each_rxq(vi, i, rxq) {
6521 #ifdef atomic_store_rel_8
6522                 atomic_store_rel_8(&rxq->iq.intr_params, v);
6523 #else
6524                 rxq->iq.intr_params = v;
6525 #endif
6526         }
6527         vi->tmr_idx = idx;
6528
6529         end_synchronized_op(sc, LOCK_HELD);
6530         return (0);
6531 }
6532
6533 static int
6534 sysctl_holdoff_pktc_idx(SYSCTL_HANDLER_ARGS)
6535 {
6536         struct vi_info *vi = arg1;
6537         struct adapter *sc = vi->pi->adapter;
6538         int idx, rc;
6539
6540         idx = vi->pktc_idx;
6541
6542         rc = sysctl_handle_int(oidp, &idx, 0, req);
6543         if (rc != 0 || req->newptr == NULL)
6544                 return (rc);
6545
6546         if (idx < -1 || idx >= SGE_NCOUNTERS)
6547                 return (EINVAL);
6548
6549         rc = begin_synchronized_op(sc, vi, HOLD_LOCK | SLEEP_OK | INTR_OK,
6550             "t4pktc");
6551         if (rc)
6552                 return (rc);
6553
6554         if (vi->flags & VI_INIT_DONE)
6555                 rc = EBUSY; /* cannot be changed once the queues are created */
6556         else
6557                 vi->pktc_idx = idx;
6558
6559         end_synchronized_op(sc, LOCK_HELD);
6560         return (rc);
6561 }
6562
6563 static int
6564 sysctl_qsize_rxq(SYSCTL_HANDLER_ARGS)
6565 {
6566         struct vi_info *vi = arg1;
6567         struct adapter *sc = vi->pi->adapter;
6568         int qsize, rc;
6569
6570         qsize = vi->qsize_rxq;
6571
6572         rc = sysctl_handle_int(oidp, &qsize, 0, req);
6573         if (rc != 0 || req->newptr == NULL)
6574                 return (rc);
6575
6576         if (qsize < 128 || (qsize & 7))
6577                 return (EINVAL);
6578
6579         rc = begin_synchronized_op(sc, vi, HOLD_LOCK | SLEEP_OK | INTR_OK,
6580             "t4rxqs");
6581         if (rc)
6582                 return (rc);
6583
6584         if (vi->flags & VI_INIT_DONE)
6585                 rc = EBUSY; /* cannot be changed once the queues are created */
6586         else
6587                 vi->qsize_rxq = qsize;
6588
6589         end_synchronized_op(sc, LOCK_HELD);
6590         return (rc);
6591 }
6592
6593 static int
6594 sysctl_qsize_txq(SYSCTL_HANDLER_ARGS)
6595 {
6596         struct vi_info *vi = arg1;
6597         struct adapter *sc = vi->pi->adapter;
6598         int qsize, rc;
6599
6600         qsize = vi->qsize_txq;
6601
6602         rc = sysctl_handle_int(oidp, &qsize, 0, req);
6603         if (rc != 0 || req->newptr == NULL)
6604                 return (rc);
6605
6606         if (qsize < 128 || qsize > 65536)
6607                 return (EINVAL);
6608
6609         rc = begin_synchronized_op(sc, vi, HOLD_LOCK | SLEEP_OK | INTR_OK,
6610             "t4txqs");
6611         if (rc)
6612                 return (rc);
6613
6614         if (vi->flags & VI_INIT_DONE)
6615                 rc = EBUSY; /* cannot be changed once the queues are created */
6616         else
6617                 vi->qsize_txq = qsize;
6618
6619         end_synchronized_op(sc, LOCK_HELD);
6620         return (rc);
6621 }
6622
6623 static int
6624 sysctl_pause_settings(SYSCTL_HANDLER_ARGS)
6625 {
6626         struct port_info *pi = arg1;
6627         struct adapter *sc = pi->adapter;
6628         struct link_config *lc = &pi->link_cfg;
6629         int rc;
6630
6631         if (req->newptr == NULL) {
6632                 struct sbuf *sb;
6633                 static char *bits = "\20\1RX\2TX\3AUTO";
6634
6635                 rc = sysctl_wire_old_buffer(req, 0);
6636                 if (rc != 0)
6637                         return(rc);
6638
6639                 sb = sbuf_new_for_sysctl(NULL, NULL, 128, req);
6640                 if (sb == NULL)
6641                         return (ENOMEM);
6642
6643                 if (lc->link_ok) {
6644                         sbuf_printf(sb, "%b", (lc->fc & (PAUSE_TX | PAUSE_RX)) |
6645                             (lc->requested_fc & PAUSE_AUTONEG), bits);
6646                 } else {
6647                         sbuf_printf(sb, "%b", lc->requested_fc & (PAUSE_TX |
6648                             PAUSE_RX | PAUSE_AUTONEG), bits);
6649                 }
6650                 rc = sbuf_finish(sb);
6651                 sbuf_delete(sb);
6652         } else {
6653                 char s[2];
6654                 int n;
6655
6656                 s[0] = '0' + (lc->requested_fc & (PAUSE_TX | PAUSE_RX |
6657                     PAUSE_AUTONEG));
6658                 s[1] = 0;
6659
6660                 rc = sysctl_handle_string(oidp, s, sizeof(s), req);
6661                 if (rc != 0)
6662                         return(rc);
6663
6664                 if (s[1] != 0)
6665                         return (EINVAL);
6666                 if (s[0] < '0' || s[0] > '9')
6667                         return (EINVAL);        /* not a number */
6668                 n = s[0] - '0';
6669                 if (n & ~(PAUSE_TX | PAUSE_RX | PAUSE_AUTONEG))
6670                         return (EINVAL);        /* some other bit is set too */
6671
6672                 rc = begin_synchronized_op(sc, &pi->vi[0], SLEEP_OK | INTR_OK,
6673                     "t4PAUSE");
6674                 if (rc)
6675                         return (rc);
6676                 PORT_LOCK(pi);
6677                 lc->requested_fc = n;
6678                 fixup_link_config(pi);
6679                 if (pi->up_vis > 0)
6680                         rc = apply_link_config(pi);
6681                 set_current_media(pi);
6682                 PORT_UNLOCK(pi);
6683                 end_synchronized_op(sc, 0);
6684         }
6685
6686         return (rc);
6687 }
6688
6689 static int
6690 sysctl_fec(SYSCTL_HANDLER_ARGS)
6691 {
6692         struct port_info *pi = arg1;
6693         struct adapter *sc = pi->adapter;
6694         struct link_config *lc = &pi->link_cfg;
6695         int rc;
6696         int8_t old;
6697
6698         if (req->newptr == NULL) {
6699                 struct sbuf *sb;
6700                 static char *bits = "\20\1RS\2BASE-R\3RSVD1\4RSVD2\5RSVD3\6AUTO";
6701
6702                 rc = sysctl_wire_old_buffer(req, 0);
6703                 if (rc != 0)
6704                         return(rc);
6705
6706                 sb = sbuf_new_for_sysctl(NULL, NULL, 128, req);
6707                 if (sb == NULL)
6708                         return (ENOMEM);
6709
6710                 /*
6711                  * Display the requested_fec when the link is down -- the actual
6712                  * FEC makes sense only when the link is up.
6713                  */
6714                 if (lc->link_ok) {
6715                         sbuf_printf(sb, "%b", (lc->fec & M_FW_PORT_CAP32_FEC) |
6716                             (lc->requested_fec & FEC_AUTO), bits);
6717                 } else {
6718                         sbuf_printf(sb, "%b", lc->requested_fec, bits);
6719                 }
6720                 rc = sbuf_finish(sb);
6721                 sbuf_delete(sb);
6722         } else {
6723                 char s[3];
6724                 int n;
6725
6726                 snprintf(s, sizeof(s), "%d",
6727                     lc->requested_fec == FEC_AUTO ? -1 :
6728                     lc->requested_fec & M_FW_PORT_CAP32_FEC);
6729
6730                 rc = sysctl_handle_string(oidp, s, sizeof(s), req);
6731                 if (rc != 0)
6732                         return(rc);
6733
6734                 n = strtol(&s[0], NULL, 0);
6735                 if (n < 0 || n & FEC_AUTO)
6736                         n = FEC_AUTO;
6737                 else {
6738                         if (n & ~M_FW_PORT_CAP32_FEC)
6739                                 return (EINVAL);/* some other bit is set too */
6740                         if (!powerof2(n))
6741                                 return (EINVAL);/* one bit can be set at most */
6742                 }
6743
6744                 rc = begin_synchronized_op(sc, &pi->vi[0], SLEEP_OK | INTR_OK,
6745                     "t4fec");
6746                 if (rc)
6747                         return (rc);
6748                 PORT_LOCK(pi);
6749                 old = lc->requested_fec;
6750                 if (n == FEC_AUTO)
6751                         lc->requested_fec = FEC_AUTO;
6752                 else if (n == 0)
6753                         lc->requested_fec = FEC_NONE;
6754                 else {
6755                         if ((lc->supported | V_FW_PORT_CAP32_FEC(n)) !=
6756                             lc->supported) {
6757                                 rc = ENOTSUP;
6758                                 goto done;
6759                         }
6760                         lc->requested_fec = n;
6761                 }
6762                 fixup_link_config(pi);
6763                 if (pi->up_vis > 0) {
6764                         rc = apply_link_config(pi);
6765                         if (rc != 0) {
6766                                 lc->requested_fec = old;
6767                                 if (rc == FW_EPROTO)
6768                                         rc = ENOTSUP;
6769                         }
6770                 }
6771 done:
6772                 PORT_UNLOCK(pi);
6773                 end_synchronized_op(sc, 0);
6774         }
6775
6776         return (rc);
6777 }
6778
6779 static int
6780 sysctl_autoneg(SYSCTL_HANDLER_ARGS)
6781 {
6782         struct port_info *pi = arg1;
6783         struct adapter *sc = pi->adapter;
6784         struct link_config *lc = &pi->link_cfg;
6785         int rc, val;
6786
6787         if (lc->supported & FW_PORT_CAP32_ANEG)
6788                 val = lc->requested_aneg == AUTONEG_DISABLE ? 0 : 1;
6789         else
6790                 val = -1;
6791         rc = sysctl_handle_int(oidp, &val, 0, req);
6792         if (rc != 0 || req->newptr == NULL)
6793                 return (rc);
6794         if (val == 0)
6795                 val = AUTONEG_DISABLE;
6796         else if (val == 1)
6797                 val = AUTONEG_ENABLE;
6798         else
6799                 val = AUTONEG_AUTO;
6800
6801         rc = begin_synchronized_op(sc, &pi->vi[0], SLEEP_OK | INTR_OK,
6802             "t4aneg");
6803         if (rc)
6804                 return (rc);
6805         PORT_LOCK(pi);
6806         if (val == AUTONEG_ENABLE && !(lc->supported & FW_PORT_CAP32_ANEG)) {
6807                 rc = ENOTSUP;
6808                 goto done;
6809         }
6810         lc->requested_aneg = val;
6811         fixup_link_config(pi);
6812         if (pi->up_vis > 0)
6813                 rc = apply_link_config(pi);
6814         set_current_media(pi);
6815 done:
6816         PORT_UNLOCK(pi);
6817         end_synchronized_op(sc, 0);
6818         return (rc);
6819 }
6820
6821 static int
6822 sysctl_handle_t4_reg64(SYSCTL_HANDLER_ARGS)
6823 {
6824         struct adapter *sc = arg1;
6825         int reg = arg2;
6826         uint64_t val;
6827
6828         val = t4_read_reg64(sc, reg);
6829
6830         return (sysctl_handle_64(oidp, &val, 0, req));
6831 }
6832
6833 static int
6834 sysctl_temperature(SYSCTL_HANDLER_ARGS)
6835 {
6836         struct adapter *sc = arg1;
6837         int rc, t;
6838         uint32_t param, val;
6839
6840         rc = begin_synchronized_op(sc, NULL, SLEEP_OK | INTR_OK, "t4temp");
6841         if (rc)
6842                 return (rc);
6843         param = V_FW_PARAMS_MNEM(FW_PARAMS_MNEM_DEV) |
6844             V_FW_PARAMS_PARAM_X(FW_PARAMS_PARAM_DEV_DIAG) |
6845             V_FW_PARAMS_PARAM_Y(FW_PARAM_DEV_DIAG_TMP);
6846         rc = -t4_query_params(sc, sc->mbox, sc->pf, 0, 1, &param, &val);
6847         end_synchronized_op(sc, 0);
6848         if (rc)
6849                 return (rc);
6850
6851         /* unknown is returned as 0 but we display -1 in that case */
6852         t = val == 0 ? -1 : val;
6853
6854         rc = sysctl_handle_int(oidp, &t, 0, req);
6855         return (rc);
6856 }
6857
6858 static int
6859 sysctl_loadavg(SYSCTL_HANDLER_ARGS)
6860 {
6861         struct adapter *sc = arg1;
6862         struct sbuf *sb;
6863         int rc;
6864         uint32_t param, val;
6865
6866         rc = begin_synchronized_op(sc, NULL, SLEEP_OK | INTR_OK, "t4lavg");
6867         if (rc)
6868                 return (rc);
6869         param = V_FW_PARAMS_MNEM(FW_PARAMS_MNEM_DEV) |
6870             V_FW_PARAMS_PARAM_X(FW_PARAMS_PARAM_DEV_LOAD);
6871         rc = -t4_query_params(sc, sc->mbox, sc->pf, 0, 1, &param, &val);
6872         end_synchronized_op(sc, 0);
6873         if (rc)
6874                 return (rc);
6875
6876         rc = sysctl_wire_old_buffer(req, 0);
6877         if (rc != 0)
6878                 return (rc);
6879
6880         sb = sbuf_new_for_sysctl(NULL, NULL, 4096, req);
6881         if (sb == NULL)
6882                 return (ENOMEM);
6883
6884         if (val == 0xffffffff) {
6885                 /* Only debug and custom firmwares report load averages. */
6886                 sbuf_printf(sb, "not available");
6887         } else {
6888                 sbuf_printf(sb, "%d %d %d", val & 0xff, (val >> 8) & 0xff,
6889                     (val >> 16) & 0xff);
6890         }
6891         rc = sbuf_finish(sb);
6892         sbuf_delete(sb);
6893
6894         return (rc);
6895 }
6896
6897 static int
6898 sysctl_cctrl(SYSCTL_HANDLER_ARGS)
6899 {
6900         struct adapter *sc = arg1;
6901         struct sbuf *sb;
6902         int rc, i;
6903         uint16_t incr[NMTUS][NCCTRL_WIN];
6904         static const char *dec_fac[] = {
6905                 "0.5", "0.5625", "0.625", "0.6875", "0.75", "0.8125", "0.875",
6906                 "0.9375"
6907         };
6908
6909         rc = sysctl_wire_old_buffer(req, 0);
6910         if (rc != 0)
6911                 return (rc);
6912
6913         sb = sbuf_new_for_sysctl(NULL, NULL, 4096, req);
6914         if (sb == NULL)
6915                 return (ENOMEM);
6916
6917         t4_read_cong_tbl(sc, incr);
6918
6919         for (i = 0; i < NCCTRL_WIN; ++i) {
6920                 sbuf_printf(sb, "%2d: %4u %4u %4u %4u %4u %4u %4u %4u\n", i,
6921                     incr[0][i], incr[1][i], incr[2][i], incr[3][i], incr[4][i],
6922                     incr[5][i], incr[6][i], incr[7][i]);
6923                 sbuf_printf(sb, "%8u %4u %4u %4u %4u %4u %4u %4u %5u %s\n",
6924                     incr[8][i], incr[9][i], incr[10][i], incr[11][i],
6925                     incr[12][i], incr[13][i], incr[14][i], incr[15][i],
6926                     sc->params.a_wnd[i], dec_fac[sc->params.b_wnd[i]]);
6927         }
6928
6929         rc = sbuf_finish(sb);
6930         sbuf_delete(sb);
6931
6932         return (rc);
6933 }
6934
6935 static const char *qname[CIM_NUM_IBQ + CIM_NUM_OBQ_T5] = {
6936         "TP0", "TP1", "ULP", "SGE0", "SGE1", "NC-SI",   /* ibq's */
6937         "ULP0", "ULP1", "ULP2", "ULP3", "SGE", "NC-SI", /* obq's */
6938         "SGE0-RX", "SGE1-RX"    /* additional obq's (T5 onwards) */
6939 };
6940
6941 static int
6942 sysctl_cim_ibq_obq(SYSCTL_HANDLER_ARGS)
6943 {
6944         struct adapter *sc = arg1;
6945         struct sbuf *sb;
6946         int rc, i, n, qid = arg2;
6947         uint32_t *buf, *p;
6948         char *qtype;
6949         u_int cim_num_obq = sc->chip_params->cim_num_obq;
6950
6951         KASSERT(qid >= 0 && qid < CIM_NUM_IBQ + cim_num_obq,
6952             ("%s: bad qid %d\n", __func__, qid));
6953
6954         if (qid < CIM_NUM_IBQ) {
6955                 /* inbound queue */
6956                 qtype = "IBQ";
6957                 n = 4 * CIM_IBQ_SIZE;
6958                 buf = malloc(n * sizeof(uint32_t), M_CXGBE, M_ZERO | M_WAITOK);
6959                 rc = t4_read_cim_ibq(sc, qid, buf, n);
6960         } else {
6961                 /* outbound queue */
6962                 qtype = "OBQ";
6963                 qid -= CIM_NUM_IBQ;
6964                 n = 4 * cim_num_obq * CIM_OBQ_SIZE;
6965                 buf = malloc(n * sizeof(uint32_t), M_CXGBE, M_ZERO | M_WAITOK);
6966                 rc = t4_read_cim_obq(sc, qid, buf, n);
6967         }
6968
6969         if (rc < 0) {
6970                 rc = -rc;
6971                 goto done;
6972         }
6973         n = rc * sizeof(uint32_t);      /* rc has # of words actually read */
6974
6975         rc = sysctl_wire_old_buffer(req, 0);
6976         if (rc != 0)
6977                 goto done;
6978
6979         sb = sbuf_new_for_sysctl(NULL, NULL, PAGE_SIZE, req);
6980         if (sb == NULL) {
6981                 rc = ENOMEM;
6982                 goto done;
6983         }
6984
6985         sbuf_printf(sb, "%s%d %s", qtype , qid, qname[arg2]);
6986         for (i = 0, p = buf; i < n; i += 16, p += 4)
6987                 sbuf_printf(sb, "\n%#06x: %08x %08x %08x %08x", i, p[0], p[1],
6988                     p[2], p[3]);
6989
6990         rc = sbuf_finish(sb);
6991         sbuf_delete(sb);
6992 done:
6993         free(buf, M_CXGBE);
6994         return (rc);
6995 }
6996
6997 static int
6998 sysctl_cim_la(SYSCTL_HANDLER_ARGS)
6999 {
7000         struct adapter *sc = arg1;
7001         u_int cfg;
7002         struct sbuf *sb;
7003         uint32_t *buf, *p;
7004         int rc;
7005
7006         MPASS(chip_id(sc) <= CHELSIO_T5);
7007
7008         rc = -t4_cim_read(sc, A_UP_UP_DBG_LA_CFG, 1, &cfg);
7009         if (rc != 0)
7010                 return (rc);
7011
7012         rc = sysctl_wire_old_buffer(req, 0);
7013         if (rc != 0)
7014                 return (rc);
7015
7016         sb = sbuf_new_for_sysctl(NULL, NULL, 4096, req);
7017         if (sb == NULL)
7018                 return (ENOMEM);
7019
7020         buf = malloc(sc->params.cim_la_size * sizeof(uint32_t), M_CXGBE,
7021             M_ZERO | M_WAITOK);
7022
7023         rc = -t4_cim_read_la(sc, buf, NULL);
7024         if (rc != 0)
7025                 goto done;
7026
7027         sbuf_printf(sb, "Status   Data      PC%s",
7028             cfg & F_UPDBGLACAPTPCONLY ? "" :
7029             "     LS0Stat  LS0Addr             LS0Data");
7030
7031         for (p = buf; p <= &buf[sc->params.cim_la_size - 8]; p += 8) {
7032                 if (cfg & F_UPDBGLACAPTPCONLY) {
7033                         sbuf_printf(sb, "\n  %02x   %08x %08x", p[5] & 0xff,
7034                             p[6], p[7]);
7035                         sbuf_printf(sb, "\n  %02x   %02x%06x %02x%06x",
7036                             (p[3] >> 8) & 0xff, p[3] & 0xff, p[4] >> 8,
7037                             p[4] & 0xff, p[5] >> 8);
7038                         sbuf_printf(sb, "\n  %02x   %x%07x %x%07x",
7039                             (p[0] >> 4) & 0xff, p[0] & 0xf, p[1] >> 4,
7040                             p[1] & 0xf, p[2] >> 4);
7041                 } else {
7042                         sbuf_printf(sb,
7043                             "\n  %02x   %x%07x %x%07x %08x %08x "
7044                             "%08x%08x%08x%08x",
7045                             (p[0] >> 4) & 0xff, p[0] & 0xf, p[1] >> 4,
7046                             p[1] & 0xf, p[2] >> 4, p[2] & 0xf, p[3], p[4], p[5],
7047                             p[6], p[7]);
7048                 }
7049         }
7050
7051         rc = sbuf_finish(sb);
7052         sbuf_delete(sb);
7053 done:
7054         free(buf, M_CXGBE);
7055         return (rc);
7056 }
7057
7058 static int
7059 sysctl_cim_la_t6(SYSCTL_HANDLER_ARGS)
7060 {
7061         struct adapter *sc = arg1;
7062         u_int cfg;
7063         struct sbuf *sb;
7064         uint32_t *buf, *p;
7065         int rc;
7066
7067         MPASS(chip_id(sc) > CHELSIO_T5);
7068
7069         rc = -t4_cim_read(sc, A_UP_UP_DBG_LA_CFG, 1, &cfg);
7070         if (rc != 0)
7071                 return (rc);
7072
7073         rc = sysctl_wire_old_buffer(req, 0);
7074         if (rc != 0)
7075                 return (rc);
7076
7077         sb = sbuf_new_for_sysctl(NULL, NULL, 4096, req);
7078         if (sb == NULL)
7079                 return (ENOMEM);
7080
7081         buf = malloc(sc->params.cim_la_size * sizeof(uint32_t), M_CXGBE,
7082             M_ZERO | M_WAITOK);
7083
7084         rc = -t4_cim_read_la(sc, buf, NULL);
7085         if (rc != 0)
7086                 goto done;
7087
7088         sbuf_printf(sb, "Status   Inst    Data      PC%s",
7089             cfg & F_UPDBGLACAPTPCONLY ? "" :
7090             "     LS0Stat  LS0Addr  LS0Data  LS1Stat  LS1Addr  LS1Data");
7091
7092         for (p = buf; p <= &buf[sc->params.cim_la_size - 10]; p += 10) {
7093                 if (cfg & F_UPDBGLACAPTPCONLY) {
7094                         sbuf_printf(sb, "\n  %02x   %08x %08x %08x",
7095                             p[3] & 0xff, p[2], p[1], p[0]);
7096                         sbuf_printf(sb, "\n  %02x   %02x%06x %02x%06x %02x%06x",
7097                             (p[6] >> 8) & 0xff, p[6] & 0xff, p[5] >> 8,
7098                             p[5] & 0xff, p[4] >> 8, p[4] & 0xff, p[3] >> 8);
7099                         sbuf_printf(sb, "\n  %02x   %04x%04x %04x%04x %04x%04x",
7100                             (p[9] >> 16) & 0xff, p[9] & 0xffff, p[8] >> 16,
7101                             p[8] & 0xffff, p[7] >> 16, p[7] & 0xffff,
7102                             p[6] >> 16);
7103                 } else {
7104                         sbuf_printf(sb, "\n  %02x   %04x%04x %04x%04x %04x%04x "
7105                             "%08x %08x %08x %08x %08x %08x",
7106                             (p[9] >> 16) & 0xff,
7107                             p[9] & 0xffff, p[8] >> 16,
7108                             p[8] & 0xffff, p[7] >> 16,
7109                             p[7] & 0xffff, p[6] >> 16,
7110                             p[2], p[1], p[0], p[5], p[4], p[3]);
7111                 }
7112         }
7113
7114         rc = sbuf_finish(sb);
7115         sbuf_delete(sb);
7116 done:
7117         free(buf, M_CXGBE);
7118         return (rc);
7119 }
7120
7121 static int
7122 sysctl_cim_ma_la(SYSCTL_HANDLER_ARGS)
7123 {
7124         struct adapter *sc = arg1;
7125         u_int i;
7126         struct sbuf *sb;
7127         uint32_t *buf, *p;
7128         int rc;
7129
7130         rc = sysctl_wire_old_buffer(req, 0);
7131         if (rc != 0)
7132                 return (rc);
7133
7134         sb = sbuf_new_for_sysctl(NULL, NULL, 4096, req);
7135         if (sb == NULL)
7136                 return (ENOMEM);
7137
7138         buf = malloc(2 * CIM_MALA_SIZE * 5 * sizeof(uint32_t), M_CXGBE,
7139             M_ZERO | M_WAITOK);
7140
7141         t4_cim_read_ma_la(sc, buf, buf + 5 * CIM_MALA_SIZE);
7142         p = buf;
7143
7144         for (i = 0; i < CIM_MALA_SIZE; i++, p += 5) {
7145                 sbuf_printf(sb, "\n%02x%08x%08x%08x%08x", p[4], p[3], p[2],
7146                     p[1], p[0]);
7147         }
7148
7149         sbuf_printf(sb, "\n\nCnt ID Tag UE       Data       RDY VLD");
7150         for (i = 0; i < CIM_MALA_SIZE; i++, p += 5) {
7151                 sbuf_printf(sb, "\n%3u %2u  %x   %u %08x%08x  %u   %u",
7152                     (p[2] >> 10) & 0xff, (p[2] >> 7) & 7,
7153                     (p[2] >> 3) & 0xf, (p[2] >> 2) & 1,
7154                     (p[1] >> 2) | ((p[2] & 3) << 30),
7155                     (p[0] >> 2) | ((p[1] & 3) << 30), (p[0] >> 1) & 1,
7156                     p[0] & 1);
7157         }
7158
7159         rc = sbuf_finish(sb);
7160         sbuf_delete(sb);
7161         free(buf, M_CXGBE);
7162         return (rc);
7163 }
7164
7165 static int
7166 sysctl_cim_pif_la(SYSCTL_HANDLER_ARGS)
7167 {
7168         struct adapter *sc = arg1;
7169         u_int i;
7170         struct sbuf *sb;
7171         uint32_t *buf, *p;
7172         int rc;
7173
7174         rc = sysctl_wire_old_buffer(req, 0);
7175         if (rc != 0)
7176                 return (rc);
7177
7178         sb = sbuf_new_for_sysctl(NULL, NULL, 4096, req);
7179         if (sb == NULL)
7180                 return (ENOMEM);
7181
7182         buf = malloc(2 * CIM_PIFLA_SIZE * 6 * sizeof(uint32_t), M_CXGBE,
7183             M_ZERO | M_WAITOK);
7184
7185         t4_cim_read_pif_la(sc, buf, buf + 6 * CIM_PIFLA_SIZE, NULL, NULL);
7186         p = buf;
7187
7188         sbuf_printf(sb, "Cntl ID DataBE   Addr                 Data");
7189         for (i = 0; i < CIM_PIFLA_SIZE; i++, p += 6) {
7190                 sbuf_printf(sb, "\n %02x  %02x  %04x  %08x %08x%08x%08x%08x",
7191                     (p[5] >> 22) & 0xff, (p[5] >> 16) & 0x3f, p[5] & 0xffff,
7192                     p[4], p[3], p[2], p[1], p[0]);
7193         }
7194
7195         sbuf_printf(sb, "\n\nCntl ID               Data");
7196         for (i = 0; i < CIM_PIFLA_SIZE; i++, p += 6) {
7197                 sbuf_printf(sb, "\n %02x  %02x %08x%08x%08x%08x",
7198                     (p[4] >> 6) & 0xff, p[4] & 0x3f, p[3], p[2], p[1], p[0]);
7199         }
7200
7201         rc = sbuf_finish(sb);
7202         sbuf_delete(sb);
7203         free(buf, M_CXGBE);
7204         return (rc);
7205 }
7206
7207 static int
7208 sysctl_cim_qcfg(SYSCTL_HANDLER_ARGS)
7209 {
7210         struct adapter *sc = arg1;
7211         struct sbuf *sb;
7212         int rc, i;
7213         uint16_t base[CIM_NUM_IBQ + CIM_NUM_OBQ_T5];
7214         uint16_t size[CIM_NUM_IBQ + CIM_NUM_OBQ_T5];
7215         uint16_t thres[CIM_NUM_IBQ];
7216         uint32_t obq_wr[2 * CIM_NUM_OBQ_T5], *wr = obq_wr;
7217         uint32_t stat[4 * (CIM_NUM_IBQ + CIM_NUM_OBQ_T5)], *p = stat;
7218         u_int cim_num_obq, ibq_rdaddr, obq_rdaddr, nq;
7219
7220         cim_num_obq = sc->chip_params->cim_num_obq;
7221         if (is_t4(sc)) {
7222                 ibq_rdaddr = A_UP_IBQ_0_RDADDR;
7223                 obq_rdaddr = A_UP_OBQ_0_REALADDR;
7224         } else {
7225                 ibq_rdaddr = A_UP_IBQ_0_SHADOW_RDADDR;
7226                 obq_rdaddr = A_UP_OBQ_0_SHADOW_REALADDR;
7227         }
7228         nq = CIM_NUM_IBQ + cim_num_obq;
7229
7230         rc = -t4_cim_read(sc, ibq_rdaddr, 4 * nq, stat);
7231         if (rc == 0)
7232                 rc = -t4_cim_read(sc, obq_rdaddr, 2 * cim_num_obq, obq_wr);
7233         if (rc != 0)
7234                 return (rc);
7235
7236         t4_read_cimq_cfg(sc, base, size, thres);
7237
7238         rc = sysctl_wire_old_buffer(req, 0);
7239         if (rc != 0)
7240                 return (rc);
7241
7242         sb = sbuf_new_for_sysctl(NULL, NULL, PAGE_SIZE, req);
7243         if (sb == NULL)
7244                 return (ENOMEM);
7245
7246         sbuf_printf(sb,
7247             "  Queue  Base  Size Thres  RdPtr WrPtr  SOP  EOP Avail");
7248
7249         for (i = 0; i < CIM_NUM_IBQ; i++, p += 4)
7250                 sbuf_printf(sb, "\n%7s %5x %5u %5u %6x  %4x %4u %4u %5u",
7251                     qname[i], base[i], size[i], thres[i], G_IBQRDADDR(p[0]),
7252                     G_IBQWRADDR(p[1]), G_QUESOPCNT(p[3]), G_QUEEOPCNT(p[3]),
7253                     G_QUEREMFLITS(p[2]) * 16);
7254         for ( ; i < nq; i++, p += 4, wr += 2)
7255                 sbuf_printf(sb, "\n%7s %5x %5u %12x  %4x %4u %4u %5u", qname[i],
7256                     base[i], size[i], G_QUERDADDR(p[0]) & 0x3fff,
7257                     wr[0] - base[i], G_QUESOPCNT(p[3]), G_QUEEOPCNT(p[3]),
7258                     G_QUEREMFLITS(p[2]) * 16);
7259
7260         rc = sbuf_finish(sb);
7261         sbuf_delete(sb);
7262
7263         return (rc);
7264 }
7265
7266 static int
7267 sysctl_cpl_stats(SYSCTL_HANDLER_ARGS)
7268 {
7269         struct adapter *sc = arg1;
7270         struct sbuf *sb;
7271         int rc;
7272         struct tp_cpl_stats stats;
7273
7274         rc = sysctl_wire_old_buffer(req, 0);
7275         if (rc != 0)
7276                 return (rc);
7277
7278         sb = sbuf_new_for_sysctl(NULL, NULL, 256, req);
7279         if (sb == NULL)
7280                 return (ENOMEM);
7281
7282         mtx_lock(&sc->reg_lock);
7283         t4_tp_get_cpl_stats(sc, &stats, 0);
7284         mtx_unlock(&sc->reg_lock);
7285
7286         if (sc->chip_params->nchan > 2) {
7287                 sbuf_printf(sb, "                 channel 0  channel 1"
7288                     "  channel 2  channel 3");
7289                 sbuf_printf(sb, "\nCPL requests:   %10u %10u %10u %10u",
7290                     stats.req[0], stats.req[1], stats.req[2], stats.req[3]);
7291                 sbuf_printf(sb, "\nCPL responses:   %10u %10u %10u %10u",
7292                     stats.rsp[0], stats.rsp[1], stats.rsp[2], stats.rsp[3]);
7293         } else {
7294                 sbuf_printf(sb, "                 channel 0  channel 1");
7295                 sbuf_printf(sb, "\nCPL requests:   %10u %10u",
7296                     stats.req[0], stats.req[1]);
7297                 sbuf_printf(sb, "\nCPL responses:   %10u %10u",
7298                     stats.rsp[0], stats.rsp[1]);
7299         }
7300
7301         rc = sbuf_finish(sb);
7302         sbuf_delete(sb);
7303
7304         return (rc);
7305 }
7306
7307 static int
7308 sysctl_ddp_stats(SYSCTL_HANDLER_ARGS)
7309 {
7310         struct adapter *sc = arg1;
7311         struct sbuf *sb;
7312         int rc;
7313         struct tp_usm_stats stats;
7314
7315         rc = sysctl_wire_old_buffer(req, 0);
7316         if (rc != 0)
7317                 return(rc);
7318
7319         sb = sbuf_new_for_sysctl(NULL, NULL, 256, req);
7320         if (sb == NULL)
7321                 return (ENOMEM);
7322
7323         t4_get_usm_stats(sc, &stats, 1);
7324
7325         sbuf_printf(sb, "Frames: %u\n", stats.frames);
7326         sbuf_printf(sb, "Octets: %ju\n", stats.octets);
7327         sbuf_printf(sb, "Drops:  %u", stats.drops);
7328
7329         rc = sbuf_finish(sb);
7330         sbuf_delete(sb);
7331
7332         return (rc);
7333 }
7334
7335 static const char * const devlog_level_strings[] = {
7336         [FW_DEVLOG_LEVEL_EMERG]         = "EMERG",
7337         [FW_DEVLOG_LEVEL_CRIT]          = "CRIT",
7338         [FW_DEVLOG_LEVEL_ERR]           = "ERR",
7339         [FW_DEVLOG_LEVEL_NOTICE]        = "NOTICE",
7340         [FW_DEVLOG_LEVEL_INFO]          = "INFO",
7341         [FW_DEVLOG_LEVEL_DEBUG]         = "DEBUG"
7342 };
7343
7344 static const char * const devlog_facility_strings[] = {
7345         [FW_DEVLOG_FACILITY_CORE]       = "CORE",
7346         [FW_DEVLOG_FACILITY_CF]         = "CF",
7347         [FW_DEVLOG_FACILITY_SCHED]      = "SCHED",
7348         [FW_DEVLOG_FACILITY_TIMER]      = "TIMER",
7349         [FW_DEVLOG_FACILITY_RES]        = "RES",
7350         [FW_DEVLOG_FACILITY_HW]         = "HW",
7351         [FW_DEVLOG_FACILITY_FLR]        = "FLR",
7352         [FW_DEVLOG_FACILITY_DMAQ]       = "DMAQ",
7353         [FW_DEVLOG_FACILITY_PHY]        = "PHY",
7354         [FW_DEVLOG_FACILITY_MAC]        = "MAC",
7355         [FW_DEVLOG_FACILITY_PORT]       = "PORT",
7356         [FW_DEVLOG_FACILITY_VI]         = "VI",
7357         [FW_DEVLOG_FACILITY_FILTER]     = "FILTER",
7358         [FW_DEVLOG_FACILITY_ACL]        = "ACL",
7359         [FW_DEVLOG_FACILITY_TM]         = "TM",
7360         [FW_DEVLOG_FACILITY_QFC]        = "QFC",
7361         [FW_DEVLOG_FACILITY_DCB]        = "DCB",
7362         [FW_DEVLOG_FACILITY_ETH]        = "ETH",
7363         [FW_DEVLOG_FACILITY_OFLD]       = "OFLD",
7364         [FW_DEVLOG_FACILITY_RI]         = "RI",
7365         [FW_DEVLOG_FACILITY_ISCSI]      = "ISCSI",
7366         [FW_DEVLOG_FACILITY_FCOE]       = "FCOE",
7367         [FW_DEVLOG_FACILITY_FOISCSI]    = "FOISCSI",
7368         [FW_DEVLOG_FACILITY_FOFCOE]     = "FOFCOE",
7369         [FW_DEVLOG_FACILITY_CHNET]      = "CHNET",
7370 };
7371
7372 static int
7373 sysctl_devlog(SYSCTL_HANDLER_ARGS)
7374 {
7375         struct adapter *sc = arg1;
7376         struct devlog_params *dparams = &sc->params.devlog;
7377         struct fw_devlog_e *buf, *e;
7378         int i, j, rc, nentries, first = 0;
7379         struct sbuf *sb;
7380         uint64_t ftstamp = UINT64_MAX;
7381
7382         if (dparams->addr == 0)
7383                 return (ENXIO);
7384
7385         buf = malloc(dparams->size, M_CXGBE, M_NOWAIT);
7386         if (buf == NULL)
7387                 return (ENOMEM);
7388
7389         rc = read_via_memwin(sc, 1, dparams->addr, (void *)buf, dparams->size);
7390         if (rc != 0)
7391                 goto done;
7392
7393         nentries = dparams->size / sizeof(struct fw_devlog_e);
7394         for (i = 0; i < nentries; i++) {
7395                 e = &buf[i];
7396
7397                 if (e->timestamp == 0)
7398                         break;  /* end */
7399
7400                 e->timestamp = be64toh(e->timestamp);
7401                 e->seqno = be32toh(e->seqno);
7402                 for (j = 0; j < 8; j++)
7403                         e->params[j] = be32toh(e->params[j]);
7404
7405                 if (e->timestamp < ftstamp) {
7406                         ftstamp = e->timestamp;
7407                         first = i;
7408                 }
7409         }
7410
7411         if (buf[first].timestamp == 0)
7412                 goto done;      /* nothing in the log */
7413
7414         rc = sysctl_wire_old_buffer(req, 0);
7415         if (rc != 0)
7416                 goto done;
7417
7418         sb = sbuf_new_for_sysctl(NULL, NULL, 4096, req);
7419         if (sb == NULL) {
7420                 rc = ENOMEM;
7421                 goto done;
7422         }
7423         sbuf_printf(sb, "%10s  %15s  %8s  %8s  %s\n",
7424             "Seq#", "Tstamp", "Level", "Facility", "Message");
7425
7426         i = first;
7427         do {
7428                 e = &buf[i];
7429                 if (e->timestamp == 0)
7430                         break;  /* end */
7431
7432                 sbuf_printf(sb, "%10d  %15ju  %8s  %8s  ",
7433                     e->seqno, e->timestamp,
7434                     (e->level < nitems(devlog_level_strings) ?
7435                         devlog_level_strings[e->level] : "UNKNOWN"),
7436                     (e->facility < nitems(devlog_facility_strings) ?
7437                         devlog_facility_strings[e->facility] : "UNKNOWN"));
7438                 sbuf_printf(sb, e->fmt, e->params[0], e->params[1],
7439                     e->params[2], e->params[3], e->params[4],
7440                     e->params[5], e->params[6], e->params[7]);
7441
7442                 if (++i == nentries)
7443                         i = 0;
7444         } while (i != first);
7445
7446         rc = sbuf_finish(sb);
7447         sbuf_delete(sb);
7448 done:
7449         free(buf, M_CXGBE);
7450         return (rc);
7451 }
7452
7453 static int
7454 sysctl_fcoe_stats(SYSCTL_HANDLER_ARGS)
7455 {
7456         struct adapter *sc = arg1;
7457         struct sbuf *sb;
7458         int rc;
7459         struct tp_fcoe_stats stats[MAX_NCHAN];
7460         int i, nchan = sc->chip_params->nchan;
7461
7462         rc = sysctl_wire_old_buffer(req, 0);
7463         if (rc != 0)
7464                 return (rc);
7465
7466         sb = sbuf_new_for_sysctl(NULL, NULL, 256, req);
7467         if (sb == NULL)
7468                 return (ENOMEM);
7469
7470         for (i = 0; i < nchan; i++)
7471                 t4_get_fcoe_stats(sc, i, &stats[i], 1);
7472
7473         if (nchan > 2) {
7474                 sbuf_printf(sb, "                   channel 0        channel 1"
7475                     "        channel 2        channel 3");
7476                 sbuf_printf(sb, "\noctetsDDP:  %16ju %16ju %16ju %16ju",
7477                     stats[0].octets_ddp, stats[1].octets_ddp,
7478                     stats[2].octets_ddp, stats[3].octets_ddp);
7479                 sbuf_printf(sb, "\nframesDDP:  %16u %16u %16u %16u",
7480                     stats[0].frames_ddp, stats[1].frames_ddp,
7481                     stats[2].frames_ddp, stats[3].frames_ddp);
7482                 sbuf_printf(sb, "\nframesDrop: %16u %16u %16u %16u",
7483                     stats[0].frames_drop, stats[1].frames_drop,
7484                     stats[2].frames_drop, stats[3].frames_drop);
7485         } else {
7486                 sbuf_printf(sb, "                   channel 0        channel 1");
7487                 sbuf_printf(sb, "\noctetsDDP:  %16ju %16ju",
7488                     stats[0].octets_ddp, stats[1].octets_ddp);
7489                 sbuf_printf(sb, "\nframesDDP:  %16u %16u",
7490                     stats[0].frames_ddp, stats[1].frames_ddp);
7491                 sbuf_printf(sb, "\nframesDrop: %16u %16u",
7492                     stats[0].frames_drop, stats[1].frames_drop);
7493         }
7494
7495         rc = sbuf_finish(sb);
7496         sbuf_delete(sb);
7497
7498         return (rc);
7499 }
7500
7501 static int
7502 sysctl_hw_sched(SYSCTL_HANDLER_ARGS)
7503 {
7504         struct adapter *sc = arg1;
7505         struct sbuf *sb;
7506         int rc, i;
7507         unsigned int map, kbps, ipg, mode;
7508         unsigned int pace_tab[NTX_SCHED];
7509
7510         rc = sysctl_wire_old_buffer(req, 0);
7511         if (rc != 0)
7512                 return (rc);
7513
7514         sb = sbuf_new_for_sysctl(NULL, NULL, 256, req);
7515         if (sb == NULL)
7516                 return (ENOMEM);
7517
7518         map = t4_read_reg(sc, A_TP_TX_MOD_QUEUE_REQ_MAP);
7519         mode = G_TIMERMODE(t4_read_reg(sc, A_TP_MOD_CONFIG));
7520         t4_read_pace_tbl(sc, pace_tab);
7521
7522         sbuf_printf(sb, "Scheduler  Mode   Channel  Rate (Kbps)   "
7523             "Class IPG (0.1 ns)   Flow IPG (us)");
7524
7525         for (i = 0; i < NTX_SCHED; ++i, map >>= 2) {
7526                 t4_get_tx_sched(sc, i, &kbps, &ipg, 1);
7527                 sbuf_printf(sb, "\n    %u      %-5s     %u     ", i,
7528                     (mode & (1 << i)) ? "flow" : "class", map & 3);
7529                 if (kbps)
7530                         sbuf_printf(sb, "%9u     ", kbps);
7531                 else
7532                         sbuf_printf(sb, " disabled     ");
7533
7534                 if (ipg)
7535                         sbuf_printf(sb, "%13u        ", ipg);
7536                 else
7537                         sbuf_printf(sb, "     disabled        ");
7538
7539                 if (pace_tab[i])
7540                         sbuf_printf(sb, "%10u", pace_tab[i]);
7541                 else
7542                         sbuf_printf(sb, "  disabled");
7543         }
7544
7545         rc = sbuf_finish(sb);
7546         sbuf_delete(sb);
7547
7548         return (rc);
7549 }
7550
7551 static int
7552 sysctl_lb_stats(SYSCTL_HANDLER_ARGS)
7553 {
7554         struct adapter *sc = arg1;
7555         struct sbuf *sb;
7556         int rc, i, j;
7557         uint64_t *p0, *p1;
7558         struct lb_port_stats s[2];
7559         static const char *stat_name[] = {
7560                 "OctetsOK:", "FramesOK:", "BcastFrames:", "McastFrames:",
7561                 "UcastFrames:", "ErrorFrames:", "Frames64:", "Frames65To127:",
7562                 "Frames128To255:", "Frames256To511:", "Frames512To1023:",
7563                 "Frames1024To1518:", "Frames1519ToMax:", "FramesDropped:",
7564                 "BG0FramesDropped:", "BG1FramesDropped:", "BG2FramesDropped:",
7565                 "BG3FramesDropped:", "BG0FramesTrunc:", "BG1FramesTrunc:",
7566                 "BG2FramesTrunc:", "BG3FramesTrunc:"
7567         };
7568
7569         rc = sysctl_wire_old_buffer(req, 0);
7570         if (rc != 0)
7571                 return (rc);
7572
7573         sb = sbuf_new_for_sysctl(NULL, NULL, 4096, req);
7574         if (sb == NULL)
7575                 return (ENOMEM);
7576
7577         memset(s, 0, sizeof(s));
7578
7579         for (i = 0; i < sc->chip_params->nchan; i += 2) {
7580                 t4_get_lb_stats(sc, i, &s[0]);
7581                 t4_get_lb_stats(sc, i + 1, &s[1]);
7582
7583                 p0 = &s[0].octets;
7584                 p1 = &s[1].octets;
7585                 sbuf_printf(sb, "%s                       Loopback %u"
7586                     "           Loopback %u", i == 0 ? "" : "\n", i, i + 1);
7587
7588                 for (j = 0; j < nitems(stat_name); j++)
7589                         sbuf_printf(sb, "\n%-17s %20ju %20ju", stat_name[j],
7590                                    *p0++, *p1++);
7591         }
7592
7593         rc = sbuf_finish(sb);
7594         sbuf_delete(sb);
7595
7596         return (rc);
7597 }
7598
7599 static int
7600 sysctl_linkdnrc(SYSCTL_HANDLER_ARGS)
7601 {
7602         int rc = 0;
7603         struct port_info *pi = arg1;
7604         struct link_config *lc = &pi->link_cfg;
7605         struct sbuf *sb;
7606
7607         rc = sysctl_wire_old_buffer(req, 0);
7608         if (rc != 0)
7609                 return(rc);
7610         sb = sbuf_new_for_sysctl(NULL, NULL, 64, req);
7611         if (sb == NULL)
7612                 return (ENOMEM);
7613
7614         if (lc->link_ok || lc->link_down_rc == 255)
7615                 sbuf_printf(sb, "n/a");
7616         else
7617                 sbuf_printf(sb, "%s", t4_link_down_rc_str(lc->link_down_rc));
7618
7619         rc = sbuf_finish(sb);
7620         sbuf_delete(sb);
7621
7622         return (rc);
7623 }
7624
7625 struct mem_desc {
7626         unsigned int base;
7627         unsigned int limit;
7628         unsigned int idx;
7629 };
7630
7631 static int
7632 mem_desc_cmp(const void *a, const void *b)
7633 {
7634         return ((const struct mem_desc *)a)->base -
7635                ((const struct mem_desc *)b)->base;
7636 }
7637
7638 static void
7639 mem_region_show(struct sbuf *sb, const char *name, unsigned int from,
7640     unsigned int to)
7641 {
7642         unsigned int size;
7643
7644         if (from == to)
7645                 return;
7646
7647         size = to - from + 1;
7648         if (size == 0)
7649                 return;
7650
7651         /* XXX: need humanize_number(3) in libkern for a more readable 'size' */
7652         sbuf_printf(sb, "%-15s %#x-%#x [%u]\n", name, from, to, size);
7653 }
7654
7655 static int
7656 sysctl_meminfo(SYSCTL_HANDLER_ARGS)
7657 {
7658         struct adapter *sc = arg1;
7659         struct sbuf *sb;
7660         int rc, i, n;
7661         uint32_t lo, hi, used, alloc;
7662         static const char *memory[] = {"EDC0:", "EDC1:", "MC:", "MC0:", "MC1:"};
7663         static const char *region[] = {
7664                 "DBQ contexts:", "IMSG contexts:", "FLM cache:", "TCBs:",
7665                 "Pstructs:", "Timers:", "Rx FL:", "Tx FL:", "Pstruct FL:",
7666                 "Tx payload:", "Rx payload:", "LE hash:", "iSCSI region:",
7667                 "TDDP region:", "TPT region:", "STAG region:", "RQ region:",
7668                 "RQUDP region:", "PBL region:", "TXPBL region:",
7669                 "DBVFIFO region:", "ULPRX state:", "ULPTX state:",
7670                 "On-chip queues:", "TLS keys:",
7671         };
7672         struct mem_desc avail[4];
7673         struct mem_desc mem[nitems(region) + 3];        /* up to 3 holes */
7674         struct mem_desc *md = mem;
7675
7676         rc = sysctl_wire_old_buffer(req, 0);
7677         if (rc != 0)
7678                 return (rc);
7679
7680         sb = sbuf_new_for_sysctl(NULL, NULL, 4096, req);
7681         if (sb == NULL)
7682                 return (ENOMEM);
7683
7684         for (i = 0; i < nitems(mem); i++) {
7685                 mem[i].limit = 0;
7686                 mem[i].idx = i;
7687         }
7688
7689         /* Find and sort the populated memory ranges */
7690         i = 0;
7691         lo = t4_read_reg(sc, A_MA_TARGET_MEM_ENABLE);
7692         if (lo & F_EDRAM0_ENABLE) {
7693                 hi = t4_read_reg(sc, A_MA_EDRAM0_BAR);
7694                 avail[i].base = G_EDRAM0_BASE(hi) << 20;
7695                 avail[i].limit = avail[i].base + (G_EDRAM0_SIZE(hi) << 20);
7696                 avail[i].idx = 0;
7697                 i++;
7698         }
7699         if (lo & F_EDRAM1_ENABLE) {
7700                 hi = t4_read_reg(sc, A_MA_EDRAM1_BAR);
7701                 avail[i].base = G_EDRAM1_BASE(hi) << 20;
7702                 avail[i].limit = avail[i].base + (G_EDRAM1_SIZE(hi) << 20);
7703                 avail[i].idx = 1;
7704                 i++;
7705         }
7706         if (lo & F_EXT_MEM_ENABLE) {
7707                 hi = t4_read_reg(sc, A_MA_EXT_MEMORY_BAR);
7708                 avail[i].base = G_EXT_MEM_BASE(hi) << 20;
7709                 avail[i].limit = avail[i].base +
7710                     (G_EXT_MEM_SIZE(hi) << 20);
7711                 avail[i].idx = is_t5(sc) ? 3 : 2;       /* Call it MC0 for T5 */
7712                 i++;
7713         }
7714         if (is_t5(sc) && lo & F_EXT_MEM1_ENABLE) {
7715                 hi = t4_read_reg(sc, A_MA_EXT_MEMORY1_BAR);
7716                 avail[i].base = G_EXT_MEM1_BASE(hi) << 20;
7717                 avail[i].limit = avail[i].base +
7718                     (G_EXT_MEM1_SIZE(hi) << 20);
7719                 avail[i].idx = 4;
7720                 i++;
7721         }
7722         if (!i)                                    /* no memory available */
7723                 return 0;
7724         qsort(avail, i, sizeof(struct mem_desc), mem_desc_cmp);
7725
7726         (md++)->base = t4_read_reg(sc, A_SGE_DBQ_CTXT_BADDR);
7727         (md++)->base = t4_read_reg(sc, A_SGE_IMSG_CTXT_BADDR);
7728         (md++)->base = t4_read_reg(sc, A_SGE_FLM_CACHE_BADDR);
7729         (md++)->base = t4_read_reg(sc, A_TP_CMM_TCB_BASE);
7730         (md++)->base = t4_read_reg(sc, A_TP_CMM_MM_BASE);
7731         (md++)->base = t4_read_reg(sc, A_TP_CMM_TIMER_BASE);
7732         (md++)->base = t4_read_reg(sc, A_TP_CMM_MM_RX_FLST_BASE);
7733         (md++)->base = t4_read_reg(sc, A_TP_CMM_MM_TX_FLST_BASE);
7734         (md++)->base = t4_read_reg(sc, A_TP_CMM_MM_PS_FLST_BASE);
7735
7736         /* the next few have explicit upper bounds */
7737         md->base = t4_read_reg(sc, A_TP_PMM_TX_BASE);
7738         md->limit = md->base - 1 +
7739                     t4_read_reg(sc, A_TP_PMM_TX_PAGE_SIZE) *
7740                     G_PMTXMAXPAGE(t4_read_reg(sc, A_TP_PMM_TX_MAX_PAGE));
7741         md++;
7742
7743         md->base = t4_read_reg(sc, A_TP_PMM_RX_BASE);
7744         md->limit = md->base - 1 +
7745                     t4_read_reg(sc, A_TP_PMM_RX_PAGE_SIZE) *
7746                     G_PMRXMAXPAGE(t4_read_reg(sc, A_TP_PMM_RX_MAX_PAGE));
7747         md++;
7748
7749         if (t4_read_reg(sc, A_LE_DB_CONFIG) & F_HASHEN) {
7750                 if (chip_id(sc) <= CHELSIO_T5)
7751                         md->base = t4_read_reg(sc, A_LE_DB_HASH_TID_BASE);
7752                 else
7753                         md->base = t4_read_reg(sc, A_LE_DB_HASH_TBL_BASE_ADDR);
7754                 md->limit = 0;
7755         } else {
7756                 md->base = 0;
7757                 md->idx = nitems(region);  /* hide it */
7758         }
7759         md++;
7760
7761 #define ulp_region(reg) \
7762         md->base = t4_read_reg(sc, A_ULP_ ## reg ## _LLIMIT);\
7763         (md++)->limit = t4_read_reg(sc, A_ULP_ ## reg ## _ULIMIT)
7764
7765         ulp_region(RX_ISCSI);
7766         ulp_region(RX_TDDP);
7767         ulp_region(TX_TPT);
7768         ulp_region(RX_STAG);
7769         ulp_region(RX_RQ);
7770         ulp_region(RX_RQUDP);
7771         ulp_region(RX_PBL);
7772         ulp_region(TX_PBL);
7773 #undef ulp_region
7774
7775         md->base = 0;
7776         md->idx = nitems(region);
7777         if (!is_t4(sc)) {
7778                 uint32_t size = 0;
7779                 uint32_t sge_ctrl = t4_read_reg(sc, A_SGE_CONTROL2);
7780                 uint32_t fifo_size = t4_read_reg(sc, A_SGE_DBVFIFO_SIZE);
7781
7782                 if (is_t5(sc)) {
7783                         if (sge_ctrl & F_VFIFO_ENABLE)
7784                                 size = G_DBVFIFO_SIZE(fifo_size);
7785                 } else
7786                         size = G_T6_DBVFIFO_SIZE(fifo_size);
7787
7788                 if (size) {
7789                         md->base = G_BASEADDR(t4_read_reg(sc,
7790                             A_SGE_DBVFIFO_BADDR));
7791                         md->limit = md->base + (size << 2) - 1;
7792                 }
7793         }
7794         md++;
7795
7796         md->base = t4_read_reg(sc, A_ULP_RX_CTX_BASE);
7797         md->limit = 0;
7798         md++;
7799         md->base = t4_read_reg(sc, A_ULP_TX_ERR_TABLE_BASE);
7800         md->limit = 0;
7801         md++;
7802
7803         md->base = sc->vres.ocq.start;
7804         if (sc->vres.ocq.size)
7805                 md->limit = md->base + sc->vres.ocq.size - 1;
7806         else
7807                 md->idx = nitems(region);  /* hide it */
7808         md++;
7809
7810         md->base = sc->vres.key.start;
7811         if (sc->vres.key.size)
7812                 md->limit = md->base + sc->vres.key.size - 1;
7813         else
7814                 md->idx = nitems(region);  /* hide it */
7815         md++;
7816
7817         /* add any address-space holes, there can be up to 3 */
7818         for (n = 0; n < i - 1; n++)
7819                 if (avail[n].limit < avail[n + 1].base)
7820                         (md++)->base = avail[n].limit;
7821         if (avail[n].limit)
7822                 (md++)->base = avail[n].limit;
7823
7824         n = md - mem;
7825         qsort(mem, n, sizeof(struct mem_desc), mem_desc_cmp);
7826
7827         for (lo = 0; lo < i; lo++)
7828                 mem_region_show(sb, memory[avail[lo].idx], avail[lo].base,
7829                                 avail[lo].limit - 1);
7830
7831         sbuf_printf(sb, "\n");
7832         for (i = 0; i < n; i++) {
7833                 if (mem[i].idx >= nitems(region))
7834                         continue;                        /* skip holes */
7835                 if (!mem[i].limit)
7836                         mem[i].limit = i < n - 1 ? mem[i + 1].base - 1 : ~0;
7837                 mem_region_show(sb, region[mem[i].idx], mem[i].base,
7838                                 mem[i].limit);
7839         }
7840
7841         sbuf_printf(sb, "\n");
7842         lo = t4_read_reg(sc, A_CIM_SDRAM_BASE_ADDR);
7843         hi = t4_read_reg(sc, A_CIM_SDRAM_ADDR_SIZE) + lo - 1;
7844         mem_region_show(sb, "uP RAM:", lo, hi);
7845
7846         lo = t4_read_reg(sc, A_CIM_EXTMEM2_BASE_ADDR);
7847         hi = t4_read_reg(sc, A_CIM_EXTMEM2_ADDR_SIZE) + lo - 1;
7848         mem_region_show(sb, "uP Extmem2:", lo, hi);
7849
7850         lo = t4_read_reg(sc, A_TP_PMM_RX_MAX_PAGE);
7851         sbuf_printf(sb, "\n%u Rx pages of size %uKiB for %u channels\n",
7852                    G_PMRXMAXPAGE(lo),
7853                    t4_read_reg(sc, A_TP_PMM_RX_PAGE_SIZE) >> 10,
7854                    (lo & F_PMRXNUMCHN) ? 2 : 1);
7855
7856         lo = t4_read_reg(sc, A_TP_PMM_TX_MAX_PAGE);
7857         hi = t4_read_reg(sc, A_TP_PMM_TX_PAGE_SIZE);
7858         sbuf_printf(sb, "%u Tx pages of size %u%ciB for %u channels\n",
7859                    G_PMTXMAXPAGE(lo),
7860                    hi >= (1 << 20) ? (hi >> 20) : (hi >> 10),
7861                    hi >= (1 << 20) ? 'M' : 'K', 1 << G_PMTXNUMCHN(lo));
7862         sbuf_printf(sb, "%u p-structs\n",
7863                    t4_read_reg(sc, A_TP_CMM_MM_MAX_PSTRUCT));
7864
7865         for (i = 0; i < 4; i++) {
7866                 if (chip_id(sc) > CHELSIO_T5)
7867                         lo = t4_read_reg(sc, A_MPS_RX_MAC_BG_PG_CNT0 + i * 4);
7868                 else
7869                         lo = t4_read_reg(sc, A_MPS_RX_PG_RSV0 + i * 4);
7870                 if (is_t5(sc)) {
7871                         used = G_T5_USED(lo);
7872                         alloc = G_T5_ALLOC(lo);
7873                 } else {
7874                         used = G_USED(lo);
7875                         alloc = G_ALLOC(lo);
7876                 }
7877                 /* For T6 these are MAC buffer groups */
7878                 sbuf_printf(sb, "\nPort %d using %u pages out of %u allocated",
7879                     i, used, alloc);
7880         }
7881         for (i = 0; i < sc->chip_params->nchan; i++) {
7882                 if (chip_id(sc) > CHELSIO_T5)
7883                         lo = t4_read_reg(sc, A_MPS_RX_LPBK_BG_PG_CNT0 + i * 4);
7884                 else
7885                         lo = t4_read_reg(sc, A_MPS_RX_PG_RSV4 + i * 4);
7886                 if (is_t5(sc)) {
7887                         used = G_T5_USED(lo);
7888                         alloc = G_T5_ALLOC(lo);
7889                 } else {
7890                         used = G_USED(lo);
7891                         alloc = G_ALLOC(lo);
7892                 }
7893                 /* For T6 these are MAC buffer groups */
7894                 sbuf_printf(sb,
7895                     "\nLoopback %d using %u pages out of %u allocated",
7896                     i, used, alloc);
7897         }
7898
7899         rc = sbuf_finish(sb);
7900         sbuf_delete(sb);
7901
7902         return (rc);
7903 }
7904
7905 static inline void
7906 tcamxy2valmask(uint64_t x, uint64_t y, uint8_t *addr, uint64_t *mask)
7907 {
7908         *mask = x | y;
7909         y = htobe64(y);
7910         memcpy(addr, (char *)&y + 2, ETHER_ADDR_LEN);
7911 }
7912
7913 static int
7914 sysctl_mps_tcam(SYSCTL_HANDLER_ARGS)
7915 {
7916         struct adapter *sc = arg1;
7917         struct sbuf *sb;
7918         int rc, i;
7919
7920         MPASS(chip_id(sc) <= CHELSIO_T5);
7921
7922         rc = sysctl_wire_old_buffer(req, 0);
7923         if (rc != 0)
7924                 return (rc);
7925
7926         sb = sbuf_new_for_sysctl(NULL, NULL, 4096, req);
7927         if (sb == NULL)
7928                 return (ENOMEM);
7929
7930         sbuf_printf(sb,
7931             "Idx  Ethernet address     Mask     Vld Ports PF"
7932             "  VF              Replication             P0 P1 P2 P3  ML");
7933         for (i = 0; i < sc->chip_params->mps_tcam_size; i++) {
7934                 uint64_t tcamx, tcamy, mask;
7935                 uint32_t cls_lo, cls_hi;
7936                 uint8_t addr[ETHER_ADDR_LEN];
7937
7938                 tcamy = t4_read_reg64(sc, MPS_CLS_TCAM_Y_L(i));
7939                 tcamx = t4_read_reg64(sc, MPS_CLS_TCAM_X_L(i));
7940                 if (tcamx & tcamy)
7941                         continue;
7942                 tcamxy2valmask(tcamx, tcamy, addr, &mask);
7943                 cls_lo = t4_read_reg(sc, MPS_CLS_SRAM_L(i));
7944                 cls_hi = t4_read_reg(sc, MPS_CLS_SRAM_H(i));
7945                 sbuf_printf(sb, "\n%3u %02x:%02x:%02x:%02x:%02x:%02x %012jx"
7946                            "  %c   %#x%4u%4d", i, addr[0], addr[1], addr[2],
7947                            addr[3], addr[4], addr[5], (uintmax_t)mask,
7948                            (cls_lo & F_SRAM_VLD) ? 'Y' : 'N',
7949                            G_PORTMAP(cls_hi), G_PF(cls_lo),
7950                            (cls_lo & F_VF_VALID) ? G_VF(cls_lo) : -1);
7951
7952                 if (cls_lo & F_REPLICATE) {
7953                         struct fw_ldst_cmd ldst_cmd;
7954
7955                         memset(&ldst_cmd, 0, sizeof(ldst_cmd));
7956                         ldst_cmd.op_to_addrspace =
7957                             htobe32(V_FW_CMD_OP(FW_LDST_CMD) |
7958                                 F_FW_CMD_REQUEST | F_FW_CMD_READ |
7959                                 V_FW_LDST_CMD_ADDRSPACE(FW_LDST_ADDRSPC_MPS));
7960                         ldst_cmd.cycles_to_len16 = htobe32(FW_LEN16(ldst_cmd));
7961                         ldst_cmd.u.mps.rplc.fid_idx =
7962                             htobe16(V_FW_LDST_CMD_FID(FW_LDST_MPS_RPLC) |
7963                                 V_FW_LDST_CMD_IDX(i));
7964
7965                         rc = begin_synchronized_op(sc, NULL, SLEEP_OK | INTR_OK,
7966                             "t4mps");
7967                         if (rc)
7968                                 break;
7969                         rc = -t4_wr_mbox(sc, sc->mbox, &ldst_cmd,
7970                             sizeof(ldst_cmd), &ldst_cmd);
7971                         end_synchronized_op(sc, 0);
7972
7973                         if (rc != 0) {
7974                                 sbuf_printf(sb, "%36d", rc);
7975                                 rc = 0;
7976                         } else {
7977                                 sbuf_printf(sb, " %08x %08x %08x %08x",
7978                                     be32toh(ldst_cmd.u.mps.rplc.rplc127_96),
7979                                     be32toh(ldst_cmd.u.mps.rplc.rplc95_64),
7980                                     be32toh(ldst_cmd.u.mps.rplc.rplc63_32),
7981                                     be32toh(ldst_cmd.u.mps.rplc.rplc31_0));
7982                         }
7983                 } else
7984                         sbuf_printf(sb, "%36s", "");
7985
7986                 sbuf_printf(sb, "%4u%3u%3u%3u %#3x", G_SRAM_PRIO0(cls_lo),
7987                     G_SRAM_PRIO1(cls_lo), G_SRAM_PRIO2(cls_lo),
7988                     G_SRAM_PRIO3(cls_lo), (cls_lo >> S_MULTILISTEN0) & 0xf);
7989         }
7990
7991         if (rc)
7992                 (void) sbuf_finish(sb);
7993         else
7994                 rc = sbuf_finish(sb);
7995         sbuf_delete(sb);
7996
7997         return (rc);
7998 }
7999
8000 static int
8001 sysctl_mps_tcam_t6(SYSCTL_HANDLER_ARGS)
8002 {
8003         struct adapter *sc = arg1;
8004         struct sbuf *sb;
8005         int rc, i;
8006
8007         MPASS(chip_id(sc) > CHELSIO_T5);
8008
8009         rc = sysctl_wire_old_buffer(req, 0);
8010         if (rc != 0)
8011                 return (rc);
8012
8013         sb = sbuf_new_for_sysctl(NULL, NULL, 4096, req);
8014         if (sb == NULL)
8015                 return (ENOMEM);
8016
8017         sbuf_printf(sb, "Idx  Ethernet address     Mask       VNI   Mask"
8018             "   IVLAN Vld DIP_Hit   Lookup  Port Vld Ports PF  VF"
8019             "                           Replication"
8020             "                                    P0 P1 P2 P3  ML\n");
8021
8022         for (i = 0; i < sc->chip_params->mps_tcam_size; i++) {
8023                 uint8_t dip_hit, vlan_vld, lookup_type, port_num;
8024                 uint16_t ivlan;
8025                 uint64_t tcamx, tcamy, val, mask;
8026                 uint32_t cls_lo, cls_hi, ctl, data2, vnix, vniy;
8027                 uint8_t addr[ETHER_ADDR_LEN];
8028
8029                 ctl = V_CTLREQID(1) | V_CTLCMDTYPE(0) | V_CTLXYBITSEL(0);
8030                 if (i < 256)
8031                         ctl |= V_CTLTCAMINDEX(i) | V_CTLTCAMSEL(0);
8032                 else
8033                         ctl |= V_CTLTCAMINDEX(i - 256) | V_CTLTCAMSEL(1);
8034                 t4_write_reg(sc, A_MPS_CLS_TCAM_DATA2_CTL, ctl);
8035                 val = t4_read_reg(sc, A_MPS_CLS_TCAM_RDATA1_REQ_ID1);
8036                 tcamy = G_DMACH(val) << 32;
8037                 tcamy |= t4_read_reg(sc, A_MPS_CLS_TCAM_RDATA0_REQ_ID1);
8038                 data2 = t4_read_reg(sc, A_MPS_CLS_TCAM_RDATA2_REQ_ID1);
8039                 lookup_type = G_DATALKPTYPE(data2);
8040                 port_num = G_DATAPORTNUM(data2);
8041                 if (lookup_type && lookup_type != M_DATALKPTYPE) {
8042                         /* Inner header VNI */
8043                         vniy = ((data2 & F_DATAVIDH2) << 23) |
8044                                        (G_DATAVIDH1(data2) << 16) | G_VIDL(val);
8045                         dip_hit = data2 & F_DATADIPHIT;
8046                         vlan_vld = 0;
8047                 } else {
8048                         vniy = 0;
8049                         dip_hit = 0;
8050                         vlan_vld = data2 & F_DATAVIDH2;
8051                         ivlan = G_VIDL(val);
8052                 }
8053
8054                 ctl |= V_CTLXYBITSEL(1);
8055                 t4_write_reg(sc, A_MPS_CLS_TCAM_DATA2_CTL, ctl);
8056                 val = t4_read_reg(sc, A_MPS_CLS_TCAM_RDATA1_REQ_ID1);
8057                 tcamx = G_DMACH(val) << 32;
8058                 tcamx |= t4_read_reg(sc, A_MPS_CLS_TCAM_RDATA0_REQ_ID1);
8059                 data2 = t4_read_reg(sc, A_MPS_CLS_TCAM_RDATA2_REQ_ID1);
8060                 if (lookup_type && lookup_type != M_DATALKPTYPE) {
8061                         /* Inner header VNI mask */
8062                         vnix = ((data2 & F_DATAVIDH2) << 23) |
8063                                (G_DATAVIDH1(data2) << 16) | G_VIDL(val);
8064                 } else
8065                         vnix = 0;
8066
8067                 if (tcamx & tcamy)
8068                         continue;
8069                 tcamxy2valmask(tcamx, tcamy, addr, &mask);
8070
8071                 cls_lo = t4_read_reg(sc, MPS_CLS_SRAM_L(i));
8072                 cls_hi = t4_read_reg(sc, MPS_CLS_SRAM_H(i));
8073
8074                 if (lookup_type && lookup_type != M_DATALKPTYPE) {
8075                         sbuf_printf(sb, "\n%3u %02x:%02x:%02x:%02x:%02x:%02x "
8076                             "%012jx %06x %06x    -    -   %3c"
8077                             "      'I'  %4x   %3c   %#x%4u%4d", i, addr[0],
8078                             addr[1], addr[2], addr[3], addr[4], addr[5],
8079                             (uintmax_t)mask, vniy, vnix, dip_hit ? 'Y' : 'N',
8080                             port_num, cls_lo & F_T6_SRAM_VLD ? 'Y' : 'N',
8081                             G_PORTMAP(cls_hi), G_T6_PF(cls_lo),
8082                             cls_lo & F_T6_VF_VALID ? G_T6_VF(cls_lo) : -1);
8083                 } else {
8084                         sbuf_printf(sb, "\n%3u %02x:%02x:%02x:%02x:%02x:%02x "
8085                             "%012jx    -       -   ", i, addr[0], addr[1],
8086                             addr[2], addr[3], addr[4], addr[5],
8087                             (uintmax_t)mask);
8088
8089                         if (vlan_vld)
8090                                 sbuf_printf(sb, "%4u   Y     ", ivlan);
8091                         else
8092                                 sbuf_printf(sb, "  -    N     ");
8093
8094                         sbuf_printf(sb, "-      %3c  %4x   %3c   %#x%4u%4d",
8095                             lookup_type ? 'I' : 'O', port_num,
8096                             cls_lo & F_T6_SRAM_VLD ? 'Y' : 'N',
8097                             G_PORTMAP(cls_hi), G_T6_PF(cls_lo),
8098                             cls_lo & F_T6_VF_VALID ? G_T6_VF(cls_lo) : -1);
8099                 }
8100
8101
8102                 if (cls_lo & F_T6_REPLICATE) {
8103                         struct fw_ldst_cmd ldst_cmd;
8104
8105                         memset(&ldst_cmd, 0, sizeof(ldst_cmd));
8106                         ldst_cmd.op_to_addrspace =
8107                             htobe32(V_FW_CMD_OP(FW_LDST_CMD) |
8108                                 F_FW_CMD_REQUEST | F_FW_CMD_READ |
8109                                 V_FW_LDST_CMD_ADDRSPACE(FW_LDST_ADDRSPC_MPS));
8110                         ldst_cmd.cycles_to_len16 = htobe32(FW_LEN16(ldst_cmd));
8111                         ldst_cmd.u.mps.rplc.fid_idx =
8112                             htobe16(V_FW_LDST_CMD_FID(FW_LDST_MPS_RPLC) |
8113                                 V_FW_LDST_CMD_IDX(i));
8114
8115                         rc = begin_synchronized_op(sc, NULL, SLEEP_OK | INTR_OK,
8116                             "t6mps");
8117                         if (rc)
8118                                 break;
8119                         rc = -t4_wr_mbox(sc, sc->mbox, &ldst_cmd,
8120                             sizeof(ldst_cmd), &ldst_cmd);
8121                         end_synchronized_op(sc, 0);
8122
8123                         if (rc != 0) {
8124                                 sbuf_printf(sb, "%72d", rc);
8125                                 rc = 0;
8126                         } else {
8127                                 sbuf_printf(sb, " %08x %08x %08x %08x"
8128                                     " %08x %08x %08x %08x",
8129                                     be32toh(ldst_cmd.u.mps.rplc.rplc255_224),
8130                                     be32toh(ldst_cmd.u.mps.rplc.rplc223_192),
8131                                     be32toh(ldst_cmd.u.mps.rplc.rplc191_160),
8132                                     be32toh(ldst_cmd.u.mps.rplc.rplc159_128),
8133                                     be32toh(ldst_cmd.u.mps.rplc.rplc127_96),
8134                                     be32toh(ldst_cmd.u.mps.rplc.rplc95_64),
8135                                     be32toh(ldst_cmd.u.mps.rplc.rplc63_32),
8136                                     be32toh(ldst_cmd.u.mps.rplc.rplc31_0));
8137                         }
8138                 } else
8139                         sbuf_printf(sb, "%72s", "");
8140
8141                 sbuf_printf(sb, "%4u%3u%3u%3u %#x",
8142                     G_T6_SRAM_PRIO0(cls_lo), G_T6_SRAM_PRIO1(cls_lo),
8143                     G_T6_SRAM_PRIO2(cls_lo), G_T6_SRAM_PRIO3(cls_lo),
8144                     (cls_lo >> S_T6_MULTILISTEN0) & 0xf);
8145         }
8146
8147         if (rc)
8148                 (void) sbuf_finish(sb);
8149         else
8150                 rc = sbuf_finish(sb);
8151         sbuf_delete(sb);
8152
8153         return (rc);
8154 }
8155
8156 static int
8157 sysctl_path_mtus(SYSCTL_HANDLER_ARGS)
8158 {
8159         struct adapter *sc = arg1;
8160         struct sbuf *sb;
8161         int rc;
8162         uint16_t mtus[NMTUS];
8163
8164         rc = sysctl_wire_old_buffer(req, 0);
8165         if (rc != 0)
8166                 return (rc);
8167
8168         sb = sbuf_new_for_sysctl(NULL, NULL, 256, req);
8169         if (sb == NULL)
8170                 return (ENOMEM);
8171
8172         t4_read_mtu_tbl(sc, mtus, NULL);
8173
8174         sbuf_printf(sb, "%u %u %u %u %u %u %u %u %u %u %u %u %u %u %u %u",
8175             mtus[0], mtus[1], mtus[2], mtus[3], mtus[4], mtus[5], mtus[6],
8176             mtus[7], mtus[8], mtus[9], mtus[10], mtus[11], mtus[12], mtus[13],
8177             mtus[14], mtus[15]);
8178
8179         rc = sbuf_finish(sb);
8180         sbuf_delete(sb);
8181
8182         return (rc);
8183 }
8184
8185 static int
8186 sysctl_pm_stats(SYSCTL_HANDLER_ARGS)
8187 {
8188         struct adapter *sc = arg1;
8189         struct sbuf *sb;
8190         int rc, i;
8191         uint32_t tx_cnt[MAX_PM_NSTATS], rx_cnt[MAX_PM_NSTATS];
8192         uint64_t tx_cyc[MAX_PM_NSTATS], rx_cyc[MAX_PM_NSTATS];
8193         static const char *tx_stats[MAX_PM_NSTATS] = {
8194                 "Read:", "Write bypass:", "Write mem:", "Bypass + mem:",
8195                 "Tx FIFO wait", NULL, "Tx latency"
8196         };
8197         static const char *rx_stats[MAX_PM_NSTATS] = {
8198                 "Read:", "Write bypass:", "Write mem:", "Flush:",
8199                 "Rx FIFO wait", NULL, "Rx latency"
8200         };
8201
8202         rc = sysctl_wire_old_buffer(req, 0);
8203         if (rc != 0)
8204                 return (rc);
8205
8206         sb = sbuf_new_for_sysctl(NULL, NULL, 256, req);
8207         if (sb == NULL)
8208                 return (ENOMEM);
8209
8210         t4_pmtx_get_stats(sc, tx_cnt, tx_cyc);
8211         t4_pmrx_get_stats(sc, rx_cnt, rx_cyc);
8212
8213         sbuf_printf(sb, "                Tx pcmds             Tx bytes");
8214         for (i = 0; i < 4; i++) {
8215                 sbuf_printf(sb, "\n%-13s %10u %20ju", tx_stats[i], tx_cnt[i],
8216                     tx_cyc[i]);
8217         }
8218
8219         sbuf_printf(sb, "\n                Rx pcmds             Rx bytes");
8220         for (i = 0; i < 4; i++) {
8221                 sbuf_printf(sb, "\n%-13s %10u %20ju", rx_stats[i], rx_cnt[i],
8222                     rx_cyc[i]);
8223         }
8224
8225         if (chip_id(sc) > CHELSIO_T5) {
8226                 sbuf_printf(sb,
8227                     "\n              Total wait      Total occupancy");
8228                 sbuf_printf(sb, "\n%-13s %10u %20ju", tx_stats[i], tx_cnt[i],
8229                     tx_cyc[i]);
8230                 sbuf_printf(sb, "\n%-13s %10u %20ju", rx_stats[i], rx_cnt[i],
8231                     rx_cyc[i]);
8232
8233                 i += 2;
8234                 MPASS(i < nitems(tx_stats));
8235
8236                 sbuf_printf(sb,
8237                     "\n                   Reads           Total wait");
8238                 sbuf_printf(sb, "\n%-13s %10u %20ju", tx_stats[i], tx_cnt[i],
8239                     tx_cyc[i]);
8240                 sbuf_printf(sb, "\n%-13s %10u %20ju", rx_stats[i], rx_cnt[i],
8241                     rx_cyc[i]);
8242         }
8243
8244         rc = sbuf_finish(sb);
8245         sbuf_delete(sb);
8246
8247         return (rc);
8248 }
8249
8250 static int
8251 sysctl_rdma_stats(SYSCTL_HANDLER_ARGS)
8252 {
8253         struct adapter *sc = arg1;
8254         struct sbuf *sb;
8255         int rc;
8256         struct tp_rdma_stats stats;
8257
8258         rc = sysctl_wire_old_buffer(req, 0);
8259         if (rc != 0)
8260                 return (rc);
8261
8262         sb = sbuf_new_for_sysctl(NULL, NULL, 256, req);
8263         if (sb == NULL)
8264                 return (ENOMEM);
8265
8266         mtx_lock(&sc->reg_lock);
8267         t4_tp_get_rdma_stats(sc, &stats, 0);
8268         mtx_unlock(&sc->reg_lock);
8269
8270         sbuf_printf(sb, "NoRQEModDefferals: %u\n", stats.rqe_dfr_mod);
8271         sbuf_printf(sb, "NoRQEPktDefferals: %u", stats.rqe_dfr_pkt);
8272
8273         rc = sbuf_finish(sb);
8274         sbuf_delete(sb);
8275
8276         return (rc);
8277 }
8278
8279 static int
8280 sysctl_tcp_stats(SYSCTL_HANDLER_ARGS)
8281 {
8282         struct adapter *sc = arg1;
8283         struct sbuf *sb;
8284         int rc;
8285         struct tp_tcp_stats v4, v6;
8286
8287         rc = sysctl_wire_old_buffer(req, 0);
8288         if (rc != 0)
8289                 return (rc);
8290
8291         sb = sbuf_new_for_sysctl(NULL, NULL, 256, req);
8292         if (sb == NULL)
8293                 return (ENOMEM);
8294
8295         mtx_lock(&sc->reg_lock);
8296         t4_tp_get_tcp_stats(sc, &v4, &v6, 0);
8297         mtx_unlock(&sc->reg_lock);
8298
8299         sbuf_printf(sb,
8300             "                                IP                 IPv6\n");
8301         sbuf_printf(sb, "OutRsts:      %20u %20u\n",
8302             v4.tcp_out_rsts, v6.tcp_out_rsts);
8303         sbuf_printf(sb, "InSegs:       %20ju %20ju\n",
8304             v4.tcp_in_segs, v6.tcp_in_segs);
8305         sbuf_printf(sb, "OutSegs:      %20ju %20ju\n",
8306             v4.tcp_out_segs, v6.tcp_out_segs);
8307         sbuf_printf(sb, "RetransSegs:  %20ju %20ju",
8308             v4.tcp_retrans_segs, v6.tcp_retrans_segs);
8309
8310         rc = sbuf_finish(sb);
8311         sbuf_delete(sb);
8312
8313         return (rc);
8314 }
8315
8316 static int
8317 sysctl_tids(SYSCTL_HANDLER_ARGS)
8318 {
8319         struct adapter *sc = arg1;
8320         struct sbuf *sb;
8321         int rc;
8322         struct tid_info *t = &sc->tids;
8323
8324         rc = sysctl_wire_old_buffer(req, 0);
8325         if (rc != 0)
8326                 return (rc);
8327
8328         sb = sbuf_new_for_sysctl(NULL, NULL, 256, req);
8329         if (sb == NULL)
8330                 return (ENOMEM);
8331
8332         if (t->natids) {
8333                 sbuf_printf(sb, "ATID range: 0-%u, in use: %u\n", t->natids - 1,
8334                     t->atids_in_use);
8335         }
8336
8337         if (t->nhpftids) {
8338                 sbuf_printf(sb, "HPFTID range: %u-%u, in use: %u\n",
8339                     t->hpftid_base, t->hpftid_end, t->hpftids_in_use);
8340         }
8341
8342         if (t->ntids) {
8343                 sbuf_printf(sb, "TID range: ");
8344                 if (t4_read_reg(sc, A_LE_DB_CONFIG) & F_HASHEN) {
8345                         uint32_t b, hb;
8346
8347                         if (chip_id(sc) <= CHELSIO_T5) {
8348                                 b = t4_read_reg(sc, A_LE_DB_SERVER_INDEX) / 4;
8349                                 hb = t4_read_reg(sc, A_LE_DB_TID_HASHBASE) / 4;
8350                         } else {
8351                                 b = t4_read_reg(sc, A_LE_DB_SRVR_START_INDEX);
8352                                 hb = t4_read_reg(sc, A_T6_LE_DB_HASH_TID_BASE);
8353                         }
8354
8355                         if (b)
8356                                 sbuf_printf(sb, "%u-%u, ", t->tid_base, b - 1);
8357                         sbuf_printf(sb, "%u-%u", hb, t->ntids - 1);
8358                 } else
8359                         sbuf_printf(sb, "%u-%u", t->tid_base, t->ntids - 1);
8360                 sbuf_printf(sb, ", in use: %u\n",
8361                     atomic_load_acq_int(&t->tids_in_use));
8362         }
8363
8364         if (t->nstids) {
8365                 sbuf_printf(sb, "STID range: %u-%u, in use: %u\n", t->stid_base,
8366                     t->stid_base + t->nstids - 1, t->stids_in_use);
8367         }
8368
8369         if (t->nftids) {
8370                 sbuf_printf(sb, "FTID range: %u-%u, in use: %u\n", t->ftid_base,
8371                     t->ftid_end, t->ftids_in_use);
8372         }
8373
8374         if (t->netids) {
8375                 sbuf_printf(sb, "ETID range: %u-%u, in use: %u\n", t->etid_base,
8376                     t->etid_base + t->netids - 1, t->etids_in_use);
8377         }
8378
8379         sbuf_printf(sb, "HW TID usage: %u IP users, %u IPv6 users",
8380             t4_read_reg(sc, A_LE_DB_ACT_CNT_IPV4),
8381             t4_read_reg(sc, A_LE_DB_ACT_CNT_IPV6));
8382
8383         rc = sbuf_finish(sb);
8384         sbuf_delete(sb);
8385
8386         return (rc);
8387 }
8388
8389 static int
8390 sysctl_tp_err_stats(SYSCTL_HANDLER_ARGS)
8391 {
8392         struct adapter *sc = arg1;
8393         struct sbuf *sb;
8394         int rc;
8395         struct tp_err_stats stats;
8396
8397         rc = sysctl_wire_old_buffer(req, 0);
8398         if (rc != 0)
8399                 return (rc);
8400
8401         sb = sbuf_new_for_sysctl(NULL, NULL, 256, req);
8402         if (sb == NULL)
8403                 return (ENOMEM);
8404
8405         mtx_lock(&sc->reg_lock);
8406         t4_tp_get_err_stats(sc, &stats, 0);
8407         mtx_unlock(&sc->reg_lock);
8408
8409         if (sc->chip_params->nchan > 2) {
8410                 sbuf_printf(sb, "                 channel 0  channel 1"
8411                     "  channel 2  channel 3\n");
8412                 sbuf_printf(sb, "macInErrs:      %10u %10u %10u %10u\n",
8413                     stats.mac_in_errs[0], stats.mac_in_errs[1],
8414                     stats.mac_in_errs[2], stats.mac_in_errs[3]);
8415                 sbuf_printf(sb, "hdrInErrs:      %10u %10u %10u %10u\n",
8416                     stats.hdr_in_errs[0], stats.hdr_in_errs[1],
8417                     stats.hdr_in_errs[2], stats.hdr_in_errs[3]);
8418                 sbuf_printf(sb, "tcpInErrs:      %10u %10u %10u %10u\n",
8419                     stats.tcp_in_errs[0], stats.tcp_in_errs[1],
8420                     stats.tcp_in_errs[2], stats.tcp_in_errs[3]);
8421                 sbuf_printf(sb, "tcp6InErrs:     %10u %10u %10u %10u\n",
8422                     stats.tcp6_in_errs[0], stats.tcp6_in_errs[1],
8423                     stats.tcp6_in_errs[2], stats.tcp6_in_errs[3]);
8424                 sbuf_printf(sb, "tnlCongDrops:   %10u %10u %10u %10u\n",
8425                     stats.tnl_cong_drops[0], stats.tnl_cong_drops[1],
8426                     stats.tnl_cong_drops[2], stats.tnl_cong_drops[3]);
8427                 sbuf_printf(sb, "tnlTxDrops:     %10u %10u %10u %10u\n",
8428                     stats.tnl_tx_drops[0], stats.tnl_tx_drops[1],
8429                     stats.tnl_tx_drops[2], stats.tnl_tx_drops[3]);
8430                 sbuf_printf(sb, "ofldVlanDrops:  %10u %10u %10u %10u\n",
8431                     stats.ofld_vlan_drops[0], stats.ofld_vlan_drops[1],
8432                     stats.ofld_vlan_drops[2], stats.ofld_vlan_drops[3]);
8433                 sbuf_printf(sb, "ofldChanDrops:  %10u %10u %10u %10u\n\n",
8434                     stats.ofld_chan_drops[0], stats.ofld_chan_drops[1],
8435                     stats.ofld_chan_drops[2], stats.ofld_chan_drops[3]);
8436         } else {
8437                 sbuf_printf(sb, "                 channel 0  channel 1\n");
8438                 sbuf_printf(sb, "macInErrs:      %10u %10u\n",
8439                     stats.mac_in_errs[0], stats.mac_in_errs[1]);
8440                 sbuf_printf(sb, "hdrInErrs:      %10u %10u\n",
8441                     stats.hdr_in_errs[0], stats.hdr_in_errs[1]);
8442                 sbuf_printf(sb, "tcpInErrs:      %10u %10u\n",
8443                     stats.tcp_in_errs[0], stats.tcp_in_errs[1]);
8444                 sbuf_printf(sb, "tcp6InErrs:     %10u %10u\n",
8445                     stats.tcp6_in_errs[0], stats.tcp6_in_errs[1]);
8446                 sbuf_printf(sb, "tnlCongDrops:   %10u %10u\n",
8447                     stats.tnl_cong_drops[0], stats.tnl_cong_drops[1]);
8448                 sbuf_printf(sb, "tnlTxDrops:     %10u %10u\n",
8449                     stats.tnl_tx_drops[0], stats.tnl_tx_drops[1]);
8450                 sbuf_printf(sb, "ofldVlanDrops:  %10u %10u\n",
8451                     stats.ofld_vlan_drops[0], stats.ofld_vlan_drops[1]);
8452                 sbuf_printf(sb, "ofldChanDrops:  %10u %10u\n\n",
8453                     stats.ofld_chan_drops[0], stats.ofld_chan_drops[1]);
8454         }
8455
8456         sbuf_printf(sb, "ofldNoNeigh:    %u\nofldCongDefer:  %u",
8457             stats.ofld_no_neigh, stats.ofld_cong_defer);
8458
8459         rc = sbuf_finish(sb);
8460         sbuf_delete(sb);
8461
8462         return (rc);
8463 }
8464
8465 static int
8466 sysctl_tp_la_mask(SYSCTL_HANDLER_ARGS)
8467 {
8468         struct adapter *sc = arg1;
8469         struct tp_params *tpp = &sc->params.tp;
8470         u_int mask;
8471         int rc;
8472
8473         mask = tpp->la_mask >> 16;
8474         rc = sysctl_handle_int(oidp, &mask, 0, req);
8475         if (rc != 0 || req->newptr == NULL)
8476                 return (rc);
8477         if (mask > 0xffff)
8478                 return (EINVAL);
8479         tpp->la_mask = mask << 16;
8480         t4_set_reg_field(sc, A_TP_DBG_LA_CONFIG, 0xffff0000U, tpp->la_mask);
8481
8482         return (0);
8483 }
8484
8485 struct field_desc {
8486         const char *name;
8487         u_int start;
8488         u_int width;
8489 };
8490
8491 static void
8492 field_desc_show(struct sbuf *sb, uint64_t v, const struct field_desc *f)
8493 {
8494         char buf[32];
8495         int line_size = 0;
8496
8497         while (f->name) {
8498                 uint64_t mask = (1ULL << f->width) - 1;
8499                 int len = snprintf(buf, sizeof(buf), "%s: %ju", f->name,
8500                     ((uintmax_t)v >> f->start) & mask);
8501
8502                 if (line_size + len >= 79) {
8503                         line_size = 8;
8504                         sbuf_printf(sb, "\n        ");
8505                 }
8506                 sbuf_printf(sb, "%s ", buf);
8507                 line_size += len + 1;
8508                 f++;
8509         }
8510         sbuf_printf(sb, "\n");
8511 }
8512
8513 static const struct field_desc tp_la0[] = {
8514         { "RcfOpCodeOut", 60, 4 },
8515         { "State", 56, 4 },
8516         { "WcfState", 52, 4 },
8517         { "RcfOpcSrcOut", 50, 2 },
8518         { "CRxError", 49, 1 },
8519         { "ERxError", 48, 1 },
8520         { "SanityFailed", 47, 1 },
8521         { "SpuriousMsg", 46, 1 },
8522         { "FlushInputMsg", 45, 1 },
8523         { "FlushInputCpl", 44, 1 },
8524         { "RssUpBit", 43, 1 },
8525         { "RssFilterHit", 42, 1 },
8526         { "Tid", 32, 10 },
8527         { "InitTcb", 31, 1 },
8528         { "LineNumber", 24, 7 },
8529         { "Emsg", 23, 1 },
8530         { "EdataOut", 22, 1 },
8531         { "Cmsg", 21, 1 },
8532         { "CdataOut", 20, 1 },
8533         { "EreadPdu", 19, 1 },
8534         { "CreadPdu", 18, 1 },
8535         { "TunnelPkt", 17, 1 },
8536         { "RcfPeerFin", 16, 1 },
8537         { "RcfReasonOut", 12, 4 },
8538         { "TxCchannel", 10, 2 },
8539         { "RcfTxChannel", 8, 2 },
8540         { "RxEchannel", 6, 2 },
8541         { "RcfRxChannel", 5, 1 },
8542         { "RcfDataOutSrdy", 4, 1 },
8543         { "RxDvld", 3, 1 },
8544         { "RxOoDvld", 2, 1 },
8545         { "RxCongestion", 1, 1 },
8546         { "TxCongestion", 0, 1 },
8547         { NULL }
8548 };
8549
8550 static const struct field_desc tp_la1[] = {
8551         { "CplCmdIn", 56, 8 },
8552         { "CplCmdOut", 48, 8 },
8553         { "ESynOut", 47, 1 },
8554         { "EAckOut", 46, 1 },
8555         { "EFinOut", 45, 1 },
8556         { "ERstOut", 44, 1 },
8557         { "SynIn", 43, 1 },
8558         { "AckIn", 42, 1 },
8559         { "FinIn", 41, 1 },
8560         { "RstIn", 40, 1 },
8561         { "DataIn", 39, 1 },
8562         { "DataInVld", 38, 1 },
8563         { "PadIn", 37, 1 },
8564         { "RxBufEmpty", 36, 1 },
8565         { "RxDdp", 35, 1 },
8566         { "RxFbCongestion", 34, 1 },
8567         { "TxFbCongestion", 33, 1 },
8568         { "TxPktSumSrdy", 32, 1 },
8569         { "RcfUlpType", 28, 4 },
8570         { "Eread", 27, 1 },
8571         { "Ebypass", 26, 1 },
8572         { "Esave", 25, 1 },
8573         { "Static0", 24, 1 },
8574         { "Cread", 23, 1 },
8575         { "Cbypass", 22, 1 },
8576         { "Csave", 21, 1 },
8577         { "CPktOut", 20, 1 },
8578         { "RxPagePoolFull", 18, 2 },
8579         { "RxLpbkPkt", 17, 1 },
8580         { "TxLpbkPkt", 16, 1 },
8581         { "RxVfValid", 15, 1 },
8582         { "SynLearned", 14, 1 },
8583         { "SetDelEntry", 13, 1 },
8584         { "SetInvEntry", 12, 1 },
8585         { "CpcmdDvld", 11, 1 },
8586         { "CpcmdSave", 10, 1 },
8587         { "RxPstructsFull", 8, 2 },
8588         { "EpcmdDvld", 7, 1 },
8589         { "EpcmdFlush", 6, 1 },
8590         { "EpcmdTrimPrefix", 5, 1 },
8591         { "EpcmdTrimPostfix", 4, 1 },
8592         { "ERssIp4Pkt", 3, 1 },
8593         { "ERssIp6Pkt", 2, 1 },
8594         { "ERssTcpUdpPkt", 1, 1 },
8595         { "ERssFceFipPkt", 0, 1 },
8596         { NULL }
8597 };
8598
8599 static const struct field_desc tp_la2[] = {
8600         { "CplCmdIn", 56, 8 },
8601         { "MpsVfVld", 55, 1 },
8602         { "MpsPf", 52, 3 },
8603         { "MpsVf", 44, 8 },
8604         { "SynIn", 43, 1 },
8605         { "AckIn", 42, 1 },
8606         { "FinIn", 41, 1 },
8607         { "RstIn", 40, 1 },
8608         { "DataIn", 39, 1 },
8609         { "DataInVld", 38, 1 },
8610         { "PadIn", 37, 1 },
8611         { "RxBufEmpty", 36, 1 },
8612         { "RxDdp", 35, 1 },
8613         { "RxFbCongestion", 34, 1 },
8614         { "TxFbCongestion", 33, 1 },
8615         { "TxPktSumSrdy", 32, 1 },
8616         { "RcfUlpType", 28, 4 },
8617         { "Eread", 27, 1 },
8618         { "Ebypass", 26, 1 },
8619         { "Esave", 25, 1 },
8620         { "Static0", 24, 1 },
8621         { "Cread", 23, 1 },
8622         { "Cbypass", 22, 1 },
8623         { "Csave", 21, 1 },
8624         { "CPktOut", 20, 1 },
8625         { "RxPagePoolFull", 18, 2 },
8626         { "RxLpbkPkt", 17, 1 },
8627         { "TxLpbkPkt", 16, 1 },
8628         { "RxVfValid", 15, 1 },
8629         { "SynLearned", 14, 1 },
8630         { "SetDelEntry", 13, 1 },
8631         { "SetInvEntry", 12, 1 },
8632         { "CpcmdDvld", 11, 1 },
8633         { "CpcmdSave", 10, 1 },
8634         { "RxPstructsFull", 8, 2 },
8635         { "EpcmdDvld", 7, 1 },
8636         { "EpcmdFlush", 6, 1 },
8637         { "EpcmdTrimPrefix", 5, 1 },
8638         { "EpcmdTrimPostfix", 4, 1 },
8639         { "ERssIp4Pkt", 3, 1 },
8640         { "ERssIp6Pkt", 2, 1 },
8641         { "ERssTcpUdpPkt", 1, 1 },
8642         { "ERssFceFipPkt", 0, 1 },
8643         { NULL }
8644 };
8645
8646 static void
8647 tp_la_show(struct sbuf *sb, uint64_t *p, int idx)
8648 {
8649
8650         field_desc_show(sb, *p, tp_la0);
8651 }
8652
8653 static void
8654 tp_la_show2(struct sbuf *sb, uint64_t *p, int idx)
8655 {
8656
8657         if (idx)
8658                 sbuf_printf(sb, "\n");
8659         field_desc_show(sb, p[0], tp_la0);
8660         if (idx < (TPLA_SIZE / 2 - 1) || p[1] != ~0ULL)
8661                 field_desc_show(sb, p[1], tp_la0);
8662 }
8663
8664 static void
8665 tp_la_show3(struct sbuf *sb, uint64_t *p, int idx)
8666 {
8667
8668         if (idx)
8669                 sbuf_printf(sb, "\n");
8670         field_desc_show(sb, p[0], tp_la0);
8671         if (idx < (TPLA_SIZE / 2 - 1) || p[1] != ~0ULL)
8672                 field_desc_show(sb, p[1], (p[0] & (1 << 17)) ? tp_la2 : tp_la1);
8673 }
8674
8675 static int
8676 sysctl_tp_la(SYSCTL_HANDLER_ARGS)
8677 {
8678         struct adapter *sc = arg1;
8679         struct sbuf *sb;
8680         uint64_t *buf, *p;
8681         int rc;
8682         u_int i, inc;
8683         void (*show_func)(struct sbuf *, uint64_t *, int);
8684
8685         rc = sysctl_wire_old_buffer(req, 0);
8686         if (rc != 0)
8687                 return (rc);
8688
8689         sb = sbuf_new_for_sysctl(NULL, NULL, 4096, req);
8690         if (sb == NULL)
8691                 return (ENOMEM);
8692
8693         buf = malloc(TPLA_SIZE * sizeof(uint64_t), M_CXGBE, M_ZERO | M_WAITOK);
8694
8695         t4_tp_read_la(sc, buf, NULL);
8696         p = buf;
8697
8698         switch (G_DBGLAMODE(t4_read_reg(sc, A_TP_DBG_LA_CONFIG))) {
8699         case 2:
8700                 inc = 2;
8701                 show_func = tp_la_show2;
8702                 break;
8703         case 3:
8704                 inc = 2;
8705                 show_func = tp_la_show3;
8706                 break;
8707         default:
8708                 inc = 1;
8709                 show_func = tp_la_show;
8710         }
8711
8712         for (i = 0; i < TPLA_SIZE / inc; i++, p += inc)
8713                 (*show_func)(sb, p, i);
8714
8715         rc = sbuf_finish(sb);
8716         sbuf_delete(sb);
8717         free(buf, M_CXGBE);
8718         return (rc);
8719 }
8720
8721 static int
8722 sysctl_tx_rate(SYSCTL_HANDLER_ARGS)
8723 {
8724         struct adapter *sc = arg1;
8725         struct sbuf *sb;
8726         int rc;
8727         u64 nrate[MAX_NCHAN], orate[MAX_NCHAN];
8728
8729         rc = sysctl_wire_old_buffer(req, 0);
8730         if (rc != 0)
8731                 return (rc);
8732
8733         sb = sbuf_new_for_sysctl(NULL, NULL, 256, req);
8734         if (sb == NULL)
8735                 return (ENOMEM);
8736
8737         t4_get_chan_txrate(sc, nrate, orate);
8738
8739         if (sc->chip_params->nchan > 2) {
8740                 sbuf_printf(sb, "              channel 0   channel 1"
8741                     "   channel 2   channel 3\n");
8742                 sbuf_printf(sb, "NIC B/s:     %10ju  %10ju  %10ju  %10ju\n",
8743                     nrate[0], nrate[1], nrate[2], nrate[3]);
8744                 sbuf_printf(sb, "Offload B/s: %10ju  %10ju  %10ju  %10ju",
8745                     orate[0], orate[1], orate[2], orate[3]);
8746         } else {
8747                 sbuf_printf(sb, "              channel 0   channel 1\n");
8748                 sbuf_printf(sb, "NIC B/s:     %10ju  %10ju\n",
8749                     nrate[0], nrate[1]);
8750                 sbuf_printf(sb, "Offload B/s: %10ju  %10ju",
8751                     orate[0], orate[1]);
8752         }
8753
8754         rc = sbuf_finish(sb);
8755         sbuf_delete(sb);
8756
8757         return (rc);
8758 }
8759
8760 static int
8761 sysctl_ulprx_la(SYSCTL_HANDLER_ARGS)
8762 {
8763         struct adapter *sc = arg1;
8764         struct sbuf *sb;
8765         uint32_t *buf, *p;
8766         int rc, i;
8767
8768         rc = sysctl_wire_old_buffer(req, 0);
8769         if (rc != 0)
8770                 return (rc);
8771
8772         sb = sbuf_new_for_sysctl(NULL, NULL, 4096, req);
8773         if (sb == NULL)
8774                 return (ENOMEM);
8775
8776         buf = malloc(ULPRX_LA_SIZE * 8 * sizeof(uint32_t), M_CXGBE,
8777             M_ZERO | M_WAITOK);
8778
8779         t4_ulprx_read_la(sc, buf);
8780         p = buf;
8781
8782         sbuf_printf(sb, "      Pcmd        Type   Message"
8783             "                Data");
8784         for (i = 0; i < ULPRX_LA_SIZE; i++, p += 8) {
8785                 sbuf_printf(sb, "\n%08x%08x  %4x  %08x  %08x%08x%08x%08x",
8786                     p[1], p[0], p[2], p[3], p[7], p[6], p[5], p[4]);
8787         }
8788
8789         rc = sbuf_finish(sb);
8790         sbuf_delete(sb);
8791         free(buf, M_CXGBE);
8792         return (rc);
8793 }
8794
8795 static int
8796 sysctl_wcwr_stats(SYSCTL_HANDLER_ARGS)
8797 {
8798         struct adapter *sc = arg1;
8799         struct sbuf *sb;
8800         int rc, v;
8801
8802         MPASS(chip_id(sc) >= CHELSIO_T5);
8803
8804         rc = sysctl_wire_old_buffer(req, 0);
8805         if (rc != 0)
8806                 return (rc);
8807
8808         sb = sbuf_new_for_sysctl(NULL, NULL, 4096, req);
8809         if (sb == NULL)
8810                 return (ENOMEM);
8811
8812         v = t4_read_reg(sc, A_SGE_STAT_CFG);
8813         if (G_STATSOURCE_T5(v) == 7) {
8814                 int mode;
8815
8816                 mode = is_t5(sc) ? G_STATMODE(v) : G_T6_STATMODE(v);
8817                 if (mode == 0) {
8818                         sbuf_printf(sb, "total %d, incomplete %d",
8819                             t4_read_reg(sc, A_SGE_STAT_TOTAL),
8820                             t4_read_reg(sc, A_SGE_STAT_MATCH));
8821                 } else if (mode == 1) {
8822                         sbuf_printf(sb, "total %d, data overflow %d",
8823                             t4_read_reg(sc, A_SGE_STAT_TOTAL),
8824                             t4_read_reg(sc, A_SGE_STAT_MATCH));
8825                 } else {
8826                         sbuf_printf(sb, "unknown mode %d", mode);
8827                 }
8828         }
8829         rc = sbuf_finish(sb);
8830         sbuf_delete(sb);
8831
8832         return (rc);
8833 }
8834
8835 static int
8836 sysctl_cpus(SYSCTL_HANDLER_ARGS)
8837 {
8838         struct adapter *sc = arg1;
8839         enum cpu_sets op = arg2;
8840         cpuset_t cpuset;
8841         struct sbuf *sb;
8842         int i, rc;
8843
8844         MPASS(op == LOCAL_CPUS || op == INTR_CPUS);
8845
8846         CPU_ZERO(&cpuset);
8847         rc = bus_get_cpus(sc->dev, op, sizeof(cpuset), &cpuset);
8848         if (rc != 0)
8849                 return (rc);
8850
8851         rc = sysctl_wire_old_buffer(req, 0);
8852         if (rc != 0)
8853                 return (rc);
8854
8855         sb = sbuf_new_for_sysctl(NULL, NULL, 4096, req);
8856         if (sb == NULL)
8857                 return (ENOMEM);
8858
8859         CPU_FOREACH(i)
8860                 sbuf_printf(sb, "%d ", i);
8861         rc = sbuf_finish(sb);
8862         sbuf_delete(sb);
8863
8864         return (rc);
8865 }
8866
8867 #ifdef TCP_OFFLOAD
8868 static int
8869 sysctl_tls_rx_ports(SYSCTL_HANDLER_ARGS)
8870 {
8871         struct adapter *sc = arg1;
8872         int *old_ports, *new_ports;
8873         int i, new_count, rc;
8874
8875         if (req->newptr == NULL && req->oldptr == NULL)
8876                 return (SYSCTL_OUT(req, NULL, imax(sc->tt.num_tls_rx_ports, 1) *
8877                     sizeof(sc->tt.tls_rx_ports[0])));
8878
8879         rc = begin_synchronized_op(sc, NULL, SLEEP_OK | INTR_OK, "t4tlsrx");
8880         if (rc)
8881                 return (rc);
8882
8883         if (sc->tt.num_tls_rx_ports == 0) {
8884                 i = -1;
8885                 rc = SYSCTL_OUT(req, &i, sizeof(i));
8886         } else
8887                 rc = SYSCTL_OUT(req, sc->tt.tls_rx_ports,
8888                     sc->tt.num_tls_rx_ports * sizeof(sc->tt.tls_rx_ports[0]));
8889         if (rc == 0 && req->newptr != NULL) {
8890                 new_count = req->newlen / sizeof(new_ports[0]);
8891                 new_ports = malloc(new_count * sizeof(new_ports[0]), M_CXGBE,
8892                     M_WAITOK);
8893                 rc = SYSCTL_IN(req, new_ports, new_count *
8894                     sizeof(new_ports[0]));
8895                 if (rc)
8896                         goto err;
8897
8898                 /* Allow setting to a single '-1' to clear the list. */
8899                 if (new_count == 1 && new_ports[0] == -1) {
8900                         ADAPTER_LOCK(sc);
8901                         old_ports = sc->tt.tls_rx_ports;
8902                         sc->tt.tls_rx_ports = NULL;
8903                         sc->tt.num_tls_rx_ports = 0;
8904                         ADAPTER_UNLOCK(sc);
8905                         free(old_ports, M_CXGBE);
8906                 } else {
8907                         for (i = 0; i < new_count; i++) {
8908                                 if (new_ports[i] < 1 ||
8909                                     new_ports[i] > IPPORT_MAX) {
8910                                         rc = EINVAL;
8911                                         goto err;
8912                                 }
8913                         }
8914
8915                         ADAPTER_LOCK(sc);
8916                         old_ports = sc->tt.tls_rx_ports;
8917                         sc->tt.tls_rx_ports = new_ports;
8918                         sc->tt.num_tls_rx_ports = new_count;
8919                         ADAPTER_UNLOCK(sc);
8920                         free(old_ports, M_CXGBE);
8921                         new_ports = NULL;
8922                 }
8923         err:
8924                 free(new_ports, M_CXGBE);
8925         }
8926         end_synchronized_op(sc, 0);
8927         return (rc);
8928 }
8929
8930 static void
8931 unit_conv(char *buf, size_t len, u_int val, u_int factor)
8932 {
8933         u_int rem = val % factor;
8934
8935         if (rem == 0)
8936                 snprintf(buf, len, "%u", val / factor);
8937         else {
8938                 while (rem % 10 == 0)
8939                         rem /= 10;
8940                 snprintf(buf, len, "%u.%u", val / factor, rem);
8941         }
8942 }
8943
8944 static int
8945 sysctl_tp_tick(SYSCTL_HANDLER_ARGS)
8946 {
8947         struct adapter *sc = arg1;
8948         char buf[16];
8949         u_int res, re;
8950         u_int cclk_ps = 1000000000 / sc->params.vpd.cclk;
8951
8952         res = t4_read_reg(sc, A_TP_TIMER_RESOLUTION);
8953         switch (arg2) {
8954         case 0:
8955                 /* timer_tick */
8956                 re = G_TIMERRESOLUTION(res);
8957                 break;
8958         case 1:
8959                 /* TCP timestamp tick */
8960                 re = G_TIMESTAMPRESOLUTION(res);
8961                 break;
8962         case 2:
8963                 /* DACK tick */
8964                 re = G_DELAYEDACKRESOLUTION(res);
8965                 break;
8966         default:
8967                 return (EDOOFUS);
8968         }
8969
8970         unit_conv(buf, sizeof(buf), (cclk_ps << re), 1000000);
8971
8972         return (sysctl_handle_string(oidp, buf, sizeof(buf), req));
8973 }
8974
8975 static int
8976 sysctl_tp_dack_timer(SYSCTL_HANDLER_ARGS)
8977 {
8978         struct adapter *sc = arg1;
8979         u_int res, dack_re, v;
8980         u_int cclk_ps = 1000000000 / sc->params.vpd.cclk;
8981
8982         res = t4_read_reg(sc, A_TP_TIMER_RESOLUTION);
8983         dack_re = G_DELAYEDACKRESOLUTION(res);
8984         v = ((cclk_ps << dack_re) / 1000000) * t4_read_reg(sc, A_TP_DACK_TIMER);
8985
8986         return (sysctl_handle_int(oidp, &v, 0, req));
8987 }
8988
8989 static int
8990 sysctl_tp_timer(SYSCTL_HANDLER_ARGS)
8991 {
8992         struct adapter *sc = arg1;
8993         int reg = arg2;
8994         u_int tre;
8995         u_long tp_tick_us, v;
8996         u_int cclk_ps = 1000000000 / sc->params.vpd.cclk;
8997
8998         MPASS(reg == A_TP_RXT_MIN || reg == A_TP_RXT_MAX ||
8999             reg == A_TP_PERS_MIN  || reg == A_TP_PERS_MAX ||
9000             reg == A_TP_KEEP_IDLE || reg == A_TP_KEEP_INTVL ||
9001             reg == A_TP_INIT_SRTT || reg == A_TP_FINWAIT2_TIMER);
9002
9003         tre = G_TIMERRESOLUTION(t4_read_reg(sc, A_TP_TIMER_RESOLUTION));
9004         tp_tick_us = (cclk_ps << tre) / 1000000;
9005
9006         if (reg == A_TP_INIT_SRTT)
9007                 v = tp_tick_us * G_INITSRTT(t4_read_reg(sc, reg));
9008         else
9009                 v = tp_tick_us * t4_read_reg(sc, reg);
9010
9011         return (sysctl_handle_long(oidp, &v, 0, req));
9012 }
9013
9014 /*
9015  * All fields in TP_SHIFT_CNT are 4b and the starting location of the field is
9016  * passed to this function.
9017  */
9018 static int
9019 sysctl_tp_shift_cnt(SYSCTL_HANDLER_ARGS)
9020 {
9021         struct adapter *sc = arg1;
9022         int idx = arg2;
9023         u_int v;
9024
9025         MPASS(idx >= 0 && idx <= 24);
9026
9027         v = (t4_read_reg(sc, A_TP_SHIFT_CNT) >> idx) & 0xf;
9028
9029         return (sysctl_handle_int(oidp, &v, 0, req));
9030 }
9031
9032 static int
9033 sysctl_tp_backoff(SYSCTL_HANDLER_ARGS)
9034 {
9035         struct adapter *sc = arg1;
9036         int idx = arg2;
9037         u_int shift, v, r;
9038
9039         MPASS(idx >= 0 && idx < 16);
9040
9041         r = A_TP_TCP_BACKOFF_REG0 + (idx & ~3);
9042         shift = (idx & 3) << 3;
9043         v = (t4_read_reg(sc, r) >> shift) & M_TIMERBACKOFFINDEX0;
9044
9045         return (sysctl_handle_int(oidp, &v, 0, req));
9046 }
9047
9048 static int
9049 sysctl_holdoff_tmr_idx_ofld(SYSCTL_HANDLER_ARGS)
9050 {
9051         struct vi_info *vi = arg1;
9052         struct adapter *sc = vi->pi->adapter;
9053         int idx, rc, i;
9054         struct sge_ofld_rxq *ofld_rxq;
9055         uint8_t v;
9056
9057         idx = vi->ofld_tmr_idx;
9058
9059         rc = sysctl_handle_int(oidp, &idx, 0, req);
9060         if (rc != 0 || req->newptr == NULL)
9061                 return (rc);
9062
9063         if (idx < 0 || idx >= SGE_NTIMERS)
9064                 return (EINVAL);
9065
9066         rc = begin_synchronized_op(sc, vi, HOLD_LOCK | SLEEP_OK | INTR_OK,
9067             "t4otmr");
9068         if (rc)
9069                 return (rc);
9070
9071         v = V_QINTR_TIMER_IDX(idx) | V_QINTR_CNT_EN(vi->ofld_pktc_idx != -1);
9072         for_each_ofld_rxq(vi, i, ofld_rxq) {
9073 #ifdef atomic_store_rel_8
9074                 atomic_store_rel_8(&ofld_rxq->iq.intr_params, v);
9075 #else
9076                 ofld_rxq->iq.intr_params = v;
9077 #endif
9078         }
9079         vi->ofld_tmr_idx = idx;
9080
9081         end_synchronized_op(sc, LOCK_HELD);
9082         return (0);
9083 }
9084
9085 static int
9086 sysctl_holdoff_pktc_idx_ofld(SYSCTL_HANDLER_ARGS)
9087 {
9088         struct vi_info *vi = arg1;
9089         struct adapter *sc = vi->pi->adapter;
9090         int idx, rc;
9091
9092         idx = vi->ofld_pktc_idx;
9093
9094         rc = sysctl_handle_int(oidp, &idx, 0, req);
9095         if (rc != 0 || req->newptr == NULL)
9096                 return (rc);
9097
9098         if (idx < -1 || idx >= SGE_NCOUNTERS)
9099                 return (EINVAL);
9100
9101         rc = begin_synchronized_op(sc, vi, HOLD_LOCK | SLEEP_OK | INTR_OK,
9102             "t4opktc");
9103         if (rc)
9104                 return (rc);
9105
9106         if (vi->flags & VI_INIT_DONE)
9107                 rc = EBUSY; /* cannot be changed once the queues are created */
9108         else
9109                 vi->ofld_pktc_idx = idx;
9110
9111         end_synchronized_op(sc, LOCK_HELD);
9112         return (rc);
9113 }
9114 #endif
9115
9116 static int
9117 get_sge_context(struct adapter *sc, struct t4_sge_context *cntxt)
9118 {
9119         int rc;
9120
9121         if (cntxt->cid > M_CTXTQID)
9122                 return (EINVAL);
9123
9124         if (cntxt->mem_id != CTXT_EGRESS && cntxt->mem_id != CTXT_INGRESS &&
9125             cntxt->mem_id != CTXT_FLM && cntxt->mem_id != CTXT_CNM)
9126                 return (EINVAL);
9127
9128         rc = begin_synchronized_op(sc, NULL, SLEEP_OK | INTR_OK, "t4ctxt");
9129         if (rc)
9130                 return (rc);
9131
9132         if (sc->flags & FW_OK) {
9133                 rc = -t4_sge_ctxt_rd(sc, sc->mbox, cntxt->cid, cntxt->mem_id,
9134                     &cntxt->data[0]);
9135                 if (rc == 0)
9136                         goto done;
9137         }
9138
9139         /*
9140          * Read via firmware failed or wasn't even attempted.  Read directly via
9141          * the backdoor.
9142          */
9143         rc = -t4_sge_ctxt_rd_bd(sc, cntxt->cid, cntxt->mem_id, &cntxt->data[0]);
9144 done:
9145         end_synchronized_op(sc, 0);
9146         return (rc);
9147 }
9148
9149 static int
9150 load_fw(struct adapter *sc, struct t4_data *fw)
9151 {
9152         int rc;
9153         uint8_t *fw_data;
9154
9155         rc = begin_synchronized_op(sc, NULL, SLEEP_OK | INTR_OK, "t4ldfw");
9156         if (rc)
9157                 return (rc);
9158
9159         /*
9160          * The firmware, with the sole exception of the memory parity error
9161          * handler, runs from memory and not flash.  It is almost always safe to
9162          * install a new firmware on a running system.  Just set bit 1 in
9163          * hw.cxgbe.dflags or dev.<nexus>.<n>.dflags first.
9164          */
9165         if (sc->flags & FULL_INIT_DONE &&
9166             (sc->debug_flags & DF_LOAD_FW_ANYTIME) == 0) {
9167                 rc = EBUSY;
9168                 goto done;
9169         }
9170
9171         fw_data = malloc(fw->len, M_CXGBE, M_WAITOK);
9172         if (fw_data == NULL) {
9173                 rc = ENOMEM;
9174                 goto done;
9175         }
9176
9177         rc = copyin(fw->data, fw_data, fw->len);
9178         if (rc == 0)
9179                 rc = -t4_load_fw(sc, fw_data, fw->len);
9180
9181         free(fw_data, M_CXGBE);
9182 done:
9183         end_synchronized_op(sc, 0);
9184         return (rc);
9185 }
9186
9187 static int
9188 load_cfg(struct adapter *sc, struct t4_data *cfg)
9189 {
9190         int rc;
9191         uint8_t *cfg_data = NULL;
9192
9193         rc = begin_synchronized_op(sc, NULL, SLEEP_OK | INTR_OK, "t4ldcf");
9194         if (rc)
9195                 return (rc);
9196
9197         if (cfg->len == 0) {
9198                 /* clear */
9199                 rc = -t4_load_cfg(sc, NULL, 0);
9200                 goto done;
9201         }
9202
9203         cfg_data = malloc(cfg->len, M_CXGBE, M_WAITOK);
9204         if (cfg_data == NULL) {
9205                 rc = ENOMEM;
9206                 goto done;
9207         }
9208
9209         rc = copyin(cfg->data, cfg_data, cfg->len);
9210         if (rc == 0)
9211                 rc = -t4_load_cfg(sc, cfg_data, cfg->len);
9212
9213         free(cfg_data, M_CXGBE);
9214 done:
9215         end_synchronized_op(sc, 0);
9216         return (rc);
9217 }
9218
9219 static int
9220 load_boot(struct adapter *sc, struct t4_bootrom *br)
9221 {
9222         int rc;
9223         uint8_t *br_data = NULL;
9224         u_int offset;
9225
9226         if (br->len > 1024 * 1024)
9227                 return (EFBIG);
9228
9229         if (br->pf_offset == 0) {
9230                 /* pfidx */
9231                 if (br->pfidx_addr > 7)
9232                         return (EINVAL);
9233                 offset = G_OFFSET(t4_read_reg(sc, PF_REG(br->pfidx_addr,
9234                     A_PCIE_PF_EXPROM_OFST)));
9235         } else if (br->pf_offset == 1) {
9236                 /* offset */
9237                 offset = G_OFFSET(br->pfidx_addr);
9238         } else {
9239                 return (EINVAL);
9240         }
9241
9242         rc = begin_synchronized_op(sc, NULL, SLEEP_OK | INTR_OK, "t4ldbr");
9243         if (rc)
9244                 return (rc);
9245
9246         if (br->len == 0) {
9247                 /* clear */
9248                 rc = -t4_load_boot(sc, NULL, offset, 0);
9249                 goto done;
9250         }
9251
9252         br_data = malloc(br->len, M_CXGBE, M_WAITOK);
9253         if (br_data == NULL) {
9254                 rc = ENOMEM;
9255                 goto done;
9256         }
9257
9258         rc = copyin(br->data, br_data, br->len);
9259         if (rc == 0)
9260                 rc = -t4_load_boot(sc, br_data, offset, br->len);
9261
9262         free(br_data, M_CXGBE);
9263 done:
9264         end_synchronized_op(sc, 0);
9265         return (rc);
9266 }
9267
9268 static int
9269 load_bootcfg(struct adapter *sc, struct t4_data *bc)
9270 {
9271         int rc;
9272         uint8_t *bc_data = NULL;
9273
9274         rc = begin_synchronized_op(sc, NULL, SLEEP_OK | INTR_OK, "t4ldcf");
9275         if (rc)
9276                 return (rc);
9277
9278         if (bc->len == 0) {
9279                 /* clear */
9280                 rc = -t4_load_bootcfg(sc, NULL, 0);
9281                 goto done;
9282         }
9283
9284         bc_data = malloc(bc->len, M_CXGBE, M_WAITOK);
9285         if (bc_data == NULL) {
9286                 rc = ENOMEM;
9287                 goto done;
9288         }
9289
9290         rc = copyin(bc->data, bc_data, bc->len);
9291         if (rc == 0)
9292                 rc = -t4_load_bootcfg(sc, bc_data, bc->len);
9293
9294         free(bc_data, M_CXGBE);
9295 done:
9296         end_synchronized_op(sc, 0);
9297         return (rc);
9298 }
9299
9300 static int
9301 cudbg_dump(struct adapter *sc, struct t4_cudbg_dump *dump)
9302 {
9303         int rc;
9304         struct cudbg_init *cudbg;
9305         void *handle, *buf;
9306
9307         /* buf is large, don't block if no memory is available */
9308         buf = malloc(dump->len, M_CXGBE, M_NOWAIT | M_ZERO);
9309         if (buf == NULL)
9310                 return (ENOMEM);
9311
9312         handle = cudbg_alloc_handle();
9313         if (handle == NULL) {
9314                 rc = ENOMEM;
9315                 goto done;
9316         }
9317
9318         cudbg = cudbg_get_init(handle);
9319         cudbg->adap = sc;
9320         cudbg->print = (cudbg_print_cb)printf;
9321
9322 #ifndef notyet
9323         device_printf(sc->dev, "%s: wr_flash %u, len %u, data %p.\n",
9324             __func__, dump->wr_flash, dump->len, dump->data);
9325 #endif
9326
9327         if (dump->wr_flash)
9328                 cudbg->use_flash = 1;
9329         MPASS(sizeof(cudbg->dbg_bitmap) == sizeof(dump->bitmap));
9330         memcpy(cudbg->dbg_bitmap, dump->bitmap, sizeof(cudbg->dbg_bitmap));
9331
9332         rc = cudbg_collect(handle, buf, &dump->len);
9333         if (rc != 0)
9334                 goto done;
9335
9336         rc = copyout(buf, dump->data, dump->len);
9337 done:
9338         cudbg_free_handle(handle);
9339         free(buf, M_CXGBE);
9340         return (rc);
9341 }
9342
9343 static void
9344 free_offload_policy(struct t4_offload_policy *op)
9345 {
9346         struct offload_rule *r;
9347         int i;
9348
9349         if (op == NULL)
9350                 return;
9351
9352         r = &op->rule[0];
9353         for (i = 0; i < op->nrules; i++, r++) {
9354                 free(r->bpf_prog.bf_insns, M_CXGBE);
9355         }
9356         free(op->rule, M_CXGBE);
9357         free(op, M_CXGBE);
9358 }
9359
9360 static int
9361 set_offload_policy(struct adapter *sc, struct t4_offload_policy *uop)
9362 {
9363         int i, rc, len;
9364         struct t4_offload_policy *op, *old;
9365         struct bpf_program *bf;
9366         const struct offload_settings *s;
9367         struct offload_rule *r;
9368         void *u;
9369
9370         if (!is_offload(sc))
9371                 return (ENODEV);
9372
9373         if (uop->nrules == 0) {
9374                 /* Delete installed policies. */
9375                 op = NULL;
9376                 goto set_policy;
9377         } if (uop->nrules > 256) { /* arbitrary */
9378                 return (E2BIG);
9379         }
9380
9381         /* Copy userspace offload policy to kernel */
9382         op = malloc(sizeof(*op), M_CXGBE, M_ZERO | M_WAITOK);
9383         op->nrules = uop->nrules;
9384         len = op->nrules * sizeof(struct offload_rule);
9385         op->rule = malloc(len, M_CXGBE, M_ZERO | M_WAITOK);
9386         rc = copyin(uop->rule, op->rule, len);
9387         if (rc) {
9388                 free(op->rule, M_CXGBE);
9389                 free(op, M_CXGBE);
9390                 return (rc);
9391         }
9392
9393         r = &op->rule[0];
9394         for (i = 0; i < op->nrules; i++, r++) {
9395
9396                 /* Validate open_type */
9397                 if (r->open_type != OPEN_TYPE_LISTEN &&
9398                     r->open_type != OPEN_TYPE_ACTIVE &&
9399                     r->open_type != OPEN_TYPE_PASSIVE &&
9400                     r->open_type != OPEN_TYPE_DONTCARE) {
9401 error:
9402                         /*
9403                          * Rules 0 to i have malloc'd filters that need to be
9404                          * freed.  Rules i+1 to nrules have userspace pointers
9405                          * and should be left alone.
9406                          */
9407                         op->nrules = i;
9408                         free_offload_policy(op);
9409                         return (rc);
9410                 }
9411
9412                 /* Validate settings */
9413                 s = &r->settings;
9414                 if ((s->offload != 0 && s->offload != 1) ||
9415                     s->cong_algo < -1 || s->cong_algo > CONG_ALG_HIGHSPEED ||
9416                     s->sched_class < -1 ||
9417                     s->sched_class >= sc->chip_params->nsched_cls) {
9418                         rc = EINVAL;
9419                         goto error;
9420                 }
9421
9422                 bf = &r->bpf_prog;
9423                 u = bf->bf_insns;       /* userspace ptr */
9424                 bf->bf_insns = NULL;
9425                 if (bf->bf_len == 0) {
9426                         /* legal, matches everything */
9427                         continue;
9428                 }
9429                 len = bf->bf_len * sizeof(*bf->bf_insns);
9430                 bf->bf_insns = malloc(len, M_CXGBE, M_ZERO | M_WAITOK);
9431                 rc = copyin(u, bf->bf_insns, len);
9432                 if (rc != 0)
9433                         goto error;
9434
9435                 if (!bpf_validate(bf->bf_insns, bf->bf_len)) {
9436                         rc = EINVAL;
9437                         goto error;
9438                 }
9439         }
9440 set_policy:
9441         rw_wlock(&sc->policy_lock);
9442         old = sc->policy;
9443         sc->policy = op;
9444         rw_wunlock(&sc->policy_lock);
9445         free_offload_policy(old);
9446
9447         return (0);
9448 }
9449
9450 #define MAX_READ_BUF_SIZE (128 * 1024)
9451 static int
9452 read_card_mem(struct adapter *sc, int win, struct t4_mem_range *mr)
9453 {
9454         uint32_t addr, remaining, n;
9455         uint32_t *buf;
9456         int rc;
9457         uint8_t *dst;
9458
9459         rc = validate_mem_range(sc, mr->addr, mr->len);
9460         if (rc != 0)
9461                 return (rc);
9462
9463         buf = malloc(min(mr->len, MAX_READ_BUF_SIZE), M_CXGBE, M_WAITOK);
9464         addr = mr->addr;
9465         remaining = mr->len;
9466         dst = (void *)mr->data;
9467
9468         while (remaining) {
9469                 n = min(remaining, MAX_READ_BUF_SIZE);
9470                 read_via_memwin(sc, 2, addr, buf, n);
9471
9472                 rc = copyout(buf, dst, n);
9473                 if (rc != 0)
9474                         break;
9475
9476                 dst += n;
9477                 remaining -= n;
9478                 addr += n;
9479         }
9480
9481         free(buf, M_CXGBE);
9482         return (rc);
9483 }
9484 #undef MAX_READ_BUF_SIZE
9485
9486 static int
9487 read_i2c(struct adapter *sc, struct t4_i2c_data *i2cd)
9488 {
9489         int rc;
9490
9491         if (i2cd->len == 0 || i2cd->port_id >= sc->params.nports)
9492                 return (EINVAL);
9493
9494         if (i2cd->len > sizeof(i2cd->data))
9495                 return (EFBIG);
9496
9497         rc = begin_synchronized_op(sc, NULL, SLEEP_OK | INTR_OK, "t4i2crd");
9498         if (rc)
9499                 return (rc);
9500         rc = -t4_i2c_rd(sc, sc->mbox, i2cd->port_id, i2cd->dev_addr,
9501             i2cd->offset, i2cd->len, &i2cd->data[0]);
9502         end_synchronized_op(sc, 0);
9503
9504         return (rc);
9505 }
9506
9507 int
9508 t4_os_find_pci_capability(struct adapter *sc, int cap)
9509 {
9510         int i;
9511
9512         return (pci_find_cap(sc->dev, cap, &i) == 0 ? i : 0);
9513 }
9514
9515 int
9516 t4_os_pci_save_state(struct adapter *sc)
9517 {
9518         device_t dev;
9519         struct pci_devinfo *dinfo;
9520
9521         dev = sc->dev;
9522         dinfo = device_get_ivars(dev);
9523
9524         pci_cfg_save(dev, dinfo, 0);
9525         return (0);
9526 }
9527
9528 int
9529 t4_os_pci_restore_state(struct adapter *sc)
9530 {
9531         device_t dev;
9532         struct pci_devinfo *dinfo;
9533
9534         dev = sc->dev;
9535         dinfo = device_get_ivars(dev);
9536
9537         pci_cfg_restore(dev, dinfo);
9538         return (0);
9539 }
9540
9541 void
9542 t4_os_portmod_changed(struct port_info *pi)
9543 {
9544         struct adapter *sc = pi->adapter;
9545         struct vi_info *vi;
9546         struct ifnet *ifp;
9547         static const char *mod_str[] = {
9548                 NULL, "LR", "SR", "ER", "TWINAX", "active TWINAX", "LRM"
9549         };
9550
9551         KASSERT((pi->flags & FIXED_IFMEDIA) == 0,
9552             ("%s: port_type %u", __func__, pi->port_type));
9553
9554         vi = &pi->vi[0];
9555         if (begin_synchronized_op(sc, vi, HOLD_LOCK, "t4mod") == 0) {
9556                 PORT_LOCK(pi);
9557                 build_medialist(pi);
9558                 if (pi->mod_type != FW_PORT_MOD_TYPE_NONE) {
9559                         fixup_link_config(pi);
9560                         apply_link_config(pi);
9561                 }
9562                 PORT_UNLOCK(pi);
9563                 end_synchronized_op(sc, LOCK_HELD);
9564         }
9565
9566         ifp = vi->ifp;
9567         if (pi->mod_type == FW_PORT_MOD_TYPE_NONE)
9568                 if_printf(ifp, "transceiver unplugged.\n");
9569         else if (pi->mod_type == FW_PORT_MOD_TYPE_UNKNOWN)
9570                 if_printf(ifp, "unknown transceiver inserted.\n");
9571         else if (pi->mod_type == FW_PORT_MOD_TYPE_NOTSUPPORTED)
9572                 if_printf(ifp, "unsupported transceiver inserted.\n");
9573         else if (pi->mod_type > 0 && pi->mod_type < nitems(mod_str)) {
9574                 if_printf(ifp, "%dGbps %s transceiver inserted.\n",
9575                     port_top_speed(pi), mod_str[pi->mod_type]);
9576         } else {
9577                 if_printf(ifp, "transceiver (type %d) inserted.\n",
9578                     pi->mod_type);
9579         }
9580 }
9581
9582 void
9583 t4_os_link_changed(struct port_info *pi)
9584 {
9585         struct vi_info *vi;
9586         struct ifnet *ifp;
9587         struct link_config *lc;
9588         int v;
9589
9590         PORT_LOCK_ASSERT_OWNED(pi);
9591
9592         for_each_vi(pi, v, vi) {
9593                 ifp = vi->ifp;
9594                 if (ifp == NULL)
9595                         continue;
9596
9597                 lc = &pi->link_cfg;
9598                 if (lc->link_ok) {
9599                         ifp->if_baudrate = IF_Mbps(lc->speed);
9600                         if_link_state_change(ifp, LINK_STATE_UP);
9601                 } else {
9602                         if_link_state_change(ifp, LINK_STATE_DOWN);
9603                 }
9604         }
9605 }
9606
9607 void
9608 t4_iterate(void (*func)(struct adapter *, void *), void *arg)
9609 {
9610         struct adapter *sc;
9611
9612         sx_slock(&t4_list_lock);
9613         SLIST_FOREACH(sc, &t4_list, link) {
9614                 /*
9615                  * func should not make any assumptions about what state sc is
9616                  * in - the only guarantee is that sc->sc_lock is a valid lock.
9617                  */
9618                 func(sc, arg);
9619         }
9620         sx_sunlock(&t4_list_lock);
9621 }
9622
9623 static int
9624 t4_ioctl(struct cdev *dev, unsigned long cmd, caddr_t data, int fflag,
9625     struct thread *td)
9626 {
9627         int rc;
9628         struct adapter *sc = dev->si_drv1;
9629
9630         rc = priv_check(td, PRIV_DRIVER);
9631         if (rc != 0)
9632                 return (rc);
9633
9634         switch (cmd) {
9635         case CHELSIO_T4_GETREG: {
9636                 struct t4_reg *edata = (struct t4_reg *)data;
9637
9638                 if ((edata->addr & 0x3) != 0 || edata->addr >= sc->mmio_len)
9639                         return (EFAULT);
9640
9641                 if (edata->size == 4)
9642                         edata->val = t4_read_reg(sc, edata->addr);
9643                 else if (edata->size == 8)
9644                         edata->val = t4_read_reg64(sc, edata->addr);
9645                 else
9646                         return (EINVAL);
9647
9648                 break;
9649         }
9650         case CHELSIO_T4_SETREG: {
9651                 struct t4_reg *edata = (struct t4_reg *)data;
9652
9653                 if ((edata->addr & 0x3) != 0 || edata->addr >= sc->mmio_len)
9654                         return (EFAULT);
9655
9656                 if (edata->size == 4) {
9657                         if (edata->val & 0xffffffff00000000)
9658                                 return (EINVAL);
9659                         t4_write_reg(sc, edata->addr, (uint32_t) edata->val);
9660                 } else if (edata->size == 8)
9661                         t4_write_reg64(sc, edata->addr, edata->val);
9662                 else
9663                         return (EINVAL);
9664                 break;
9665         }
9666         case CHELSIO_T4_REGDUMP: {
9667                 struct t4_regdump *regs = (struct t4_regdump *)data;
9668                 int reglen = t4_get_regs_len(sc);
9669                 uint8_t *buf;
9670
9671                 if (regs->len < reglen) {
9672                         regs->len = reglen; /* hint to the caller */
9673                         return (ENOBUFS);
9674                 }
9675
9676                 regs->len = reglen;
9677                 buf = malloc(reglen, M_CXGBE, M_WAITOK | M_ZERO);
9678                 get_regs(sc, regs, buf);
9679                 rc = copyout(buf, regs->data, reglen);
9680                 free(buf, M_CXGBE);
9681                 break;
9682         }
9683         case CHELSIO_T4_GET_FILTER_MODE:
9684                 rc = get_filter_mode(sc, (uint32_t *)data);
9685                 break;
9686         case CHELSIO_T4_SET_FILTER_MODE:
9687                 rc = set_filter_mode(sc, *(uint32_t *)data);
9688                 break;
9689         case CHELSIO_T4_GET_FILTER:
9690                 rc = get_filter(sc, (struct t4_filter *)data);
9691                 break;
9692         case CHELSIO_T4_SET_FILTER:
9693                 rc = set_filter(sc, (struct t4_filter *)data);
9694                 break;
9695         case CHELSIO_T4_DEL_FILTER:
9696                 rc = del_filter(sc, (struct t4_filter *)data);
9697                 break;
9698         case CHELSIO_T4_GET_SGE_CONTEXT:
9699                 rc = get_sge_context(sc, (struct t4_sge_context *)data);
9700                 break;
9701         case CHELSIO_T4_LOAD_FW:
9702                 rc = load_fw(sc, (struct t4_data *)data);
9703                 break;
9704         case CHELSIO_T4_GET_MEM:
9705                 rc = read_card_mem(sc, 2, (struct t4_mem_range *)data);
9706                 break;
9707         case CHELSIO_T4_GET_I2C:
9708                 rc = read_i2c(sc, (struct t4_i2c_data *)data);
9709                 break;
9710         case CHELSIO_T4_CLEAR_STATS: {
9711                 int i, v, bg_map;
9712                 u_int port_id = *(uint32_t *)data;
9713                 struct port_info *pi;
9714                 struct vi_info *vi;
9715
9716                 if (port_id >= sc->params.nports)
9717                         return (EINVAL);
9718                 pi = sc->port[port_id];
9719                 if (pi == NULL)
9720                         return (EIO);
9721
9722                 /* MAC stats */
9723                 t4_clr_port_stats(sc, pi->tx_chan);
9724                 pi->tx_parse_error = 0;
9725                 pi->tnl_cong_drops = 0;
9726                 mtx_lock(&sc->reg_lock);
9727                 for_each_vi(pi, v, vi) {
9728                         if (vi->flags & VI_INIT_DONE)
9729                                 t4_clr_vi_stats(sc, vi->viid);
9730                 }
9731                 bg_map = pi->mps_bg_map;
9732                 v = 0;  /* reuse */
9733                 while (bg_map) {
9734                         i = ffs(bg_map) - 1;
9735                         t4_write_indirect(sc, A_TP_MIB_INDEX, A_TP_MIB_DATA, &v,
9736                             1, A_TP_MIB_TNL_CNG_DROP_0 + i);
9737                         bg_map &= ~(1 << i);
9738                 }
9739                 mtx_unlock(&sc->reg_lock);
9740
9741                 /*
9742                  * Since this command accepts a port, clear stats for
9743                  * all VIs on this port.
9744                  */
9745                 for_each_vi(pi, v, vi) {
9746                         if (vi->flags & VI_INIT_DONE) {
9747                                 struct sge_rxq *rxq;
9748                                 struct sge_txq *txq;
9749                                 struct sge_wrq *wrq;
9750
9751                                 for_each_rxq(vi, i, rxq) {
9752 #if defined(INET) || defined(INET6)
9753                                         rxq->lro.lro_queued = 0;
9754                                         rxq->lro.lro_flushed = 0;
9755 #endif
9756                                         rxq->rxcsum = 0;
9757                                         rxq->vlan_extraction = 0;
9758                                 }
9759
9760                                 for_each_txq(vi, i, txq) {
9761                                         txq->txcsum = 0;
9762                                         txq->tso_wrs = 0;
9763                                         txq->vlan_insertion = 0;
9764                                         txq->imm_wrs = 0;
9765                                         txq->sgl_wrs = 0;
9766                                         txq->txpkt_wrs = 0;
9767                                         txq->txpkts0_wrs = 0;
9768                                         txq->txpkts1_wrs = 0;
9769                                         txq->txpkts0_pkts = 0;
9770                                         txq->txpkts1_pkts = 0;
9771                                         mp_ring_reset_stats(txq->r);
9772                                 }
9773
9774 #ifdef TCP_OFFLOAD
9775                                 /* nothing to clear for each ofld_rxq */
9776
9777                                 for_each_ofld_txq(vi, i, wrq) {
9778                                         wrq->tx_wrs_direct = 0;
9779                                         wrq->tx_wrs_copied = 0;
9780                                 }
9781 #endif
9782
9783                                 if (IS_MAIN_VI(vi)) {
9784                                         wrq = &sc->sge.ctrlq[pi->port_id];
9785                                         wrq->tx_wrs_direct = 0;
9786                                         wrq->tx_wrs_copied = 0;
9787                                 }
9788                         }
9789                 }
9790                 break;
9791         }
9792         case CHELSIO_T4_SCHED_CLASS:
9793                 rc = t4_set_sched_class(sc, (struct t4_sched_params *)data);
9794                 break;
9795         case CHELSIO_T4_SCHED_QUEUE:
9796                 rc = t4_set_sched_queue(sc, (struct t4_sched_queue *)data);
9797                 break;
9798         case CHELSIO_T4_GET_TRACER:
9799                 rc = t4_get_tracer(sc, (struct t4_tracer *)data);
9800                 break;
9801         case CHELSIO_T4_SET_TRACER:
9802                 rc = t4_set_tracer(sc, (struct t4_tracer *)data);
9803                 break;
9804         case CHELSIO_T4_LOAD_CFG:
9805                 rc = load_cfg(sc, (struct t4_data *)data);
9806                 break;
9807         case CHELSIO_T4_LOAD_BOOT:
9808                 rc = load_boot(sc, (struct t4_bootrom *)data);
9809                 break;
9810         case CHELSIO_T4_LOAD_BOOTCFG:
9811                 rc = load_bootcfg(sc, (struct t4_data *)data);
9812                 break;
9813         case CHELSIO_T4_CUDBG_DUMP:
9814                 rc = cudbg_dump(sc, (struct t4_cudbg_dump *)data);
9815                 break;
9816         case CHELSIO_T4_SET_OFLD_POLICY:
9817                 rc = set_offload_policy(sc, (struct t4_offload_policy *)data);
9818                 break;
9819         default:
9820                 rc = ENOTTY;
9821         }
9822
9823         return (rc);
9824 }
9825
9826 void
9827 t4_db_full(struct adapter *sc)
9828 {
9829
9830         CXGBE_UNIMPLEMENTED(__func__);
9831 }
9832
9833 void
9834 t4_db_dropped(struct adapter *sc)
9835 {
9836
9837         CXGBE_UNIMPLEMENTED(__func__);
9838 }
9839
9840 #ifdef TCP_OFFLOAD
9841 static int
9842 toe_capability(struct vi_info *vi, int enable)
9843 {
9844         int rc;
9845         struct port_info *pi = vi->pi;
9846         struct adapter *sc = pi->adapter;
9847
9848         ASSERT_SYNCHRONIZED_OP(sc);
9849
9850         if (!is_offload(sc))
9851                 return (ENODEV);
9852
9853         if (enable) {
9854                 if ((vi->ifp->if_capenable & IFCAP_TOE) != 0) {
9855                         /* TOE is already enabled. */
9856                         return (0);
9857                 }
9858
9859                 /*
9860                  * We need the port's queues around so that we're able to send
9861                  * and receive CPLs to/from the TOE even if the ifnet for this
9862                  * port has never been UP'd administratively.
9863                  */
9864                 if (!(vi->flags & VI_INIT_DONE)) {
9865                         rc = vi_full_init(vi);
9866                         if (rc)
9867                                 return (rc);
9868                 }
9869                 if (!(pi->vi[0].flags & VI_INIT_DONE)) {
9870                         rc = vi_full_init(&pi->vi[0]);
9871                         if (rc)
9872                                 return (rc);
9873                 }
9874
9875                 if (isset(&sc->offload_map, pi->port_id)) {
9876                         /* TOE is enabled on another VI of this port. */
9877                         pi->uld_vis++;
9878                         return (0);
9879                 }
9880
9881                 if (!uld_active(sc, ULD_TOM)) {
9882                         rc = t4_activate_uld(sc, ULD_TOM);
9883                         if (rc == EAGAIN) {
9884                                 log(LOG_WARNING,
9885                                     "You must kldload t4_tom.ko before trying "
9886                                     "to enable TOE on a cxgbe interface.\n");
9887                         }
9888                         if (rc != 0)
9889                                 return (rc);
9890                         KASSERT(sc->tom_softc != NULL,
9891                             ("%s: TOM activated but softc NULL", __func__));
9892                         KASSERT(uld_active(sc, ULD_TOM),
9893                             ("%s: TOM activated but flag not set", __func__));
9894                 }
9895
9896                 /* Activate iWARP and iSCSI too, if the modules are loaded. */
9897                 if (!uld_active(sc, ULD_IWARP))
9898                         (void) t4_activate_uld(sc, ULD_IWARP);
9899                 if (!uld_active(sc, ULD_ISCSI))
9900                         (void) t4_activate_uld(sc, ULD_ISCSI);
9901
9902                 pi->uld_vis++;
9903                 setbit(&sc->offload_map, pi->port_id);
9904         } else {
9905                 pi->uld_vis--;
9906
9907                 if (!isset(&sc->offload_map, pi->port_id) || pi->uld_vis > 0)
9908                         return (0);
9909
9910                 KASSERT(uld_active(sc, ULD_TOM),
9911                     ("%s: TOM never initialized?", __func__));
9912                 clrbit(&sc->offload_map, pi->port_id);
9913         }
9914
9915         return (0);
9916 }
9917
9918 /*
9919  * Add an upper layer driver to the global list.
9920  */
9921 int
9922 t4_register_uld(struct uld_info *ui)
9923 {
9924         int rc = 0;
9925         struct uld_info *u;
9926
9927         sx_xlock(&t4_uld_list_lock);
9928         SLIST_FOREACH(u, &t4_uld_list, link) {
9929             if (u->uld_id == ui->uld_id) {
9930                     rc = EEXIST;
9931                     goto done;
9932             }
9933         }
9934
9935         SLIST_INSERT_HEAD(&t4_uld_list, ui, link);
9936         ui->refcount = 0;
9937 done:
9938         sx_xunlock(&t4_uld_list_lock);
9939         return (rc);
9940 }
9941
9942 int
9943 t4_unregister_uld(struct uld_info *ui)
9944 {
9945         int rc = EINVAL;
9946         struct uld_info *u;
9947
9948         sx_xlock(&t4_uld_list_lock);
9949
9950         SLIST_FOREACH(u, &t4_uld_list, link) {
9951             if (u == ui) {
9952                     if (ui->refcount > 0) {
9953                             rc = EBUSY;
9954                             goto done;
9955                     }
9956
9957                     SLIST_REMOVE(&t4_uld_list, ui, uld_info, link);
9958                     rc = 0;
9959                     goto done;
9960             }
9961         }
9962 done:
9963         sx_xunlock(&t4_uld_list_lock);
9964         return (rc);
9965 }
9966
9967 int
9968 t4_activate_uld(struct adapter *sc, int id)
9969 {
9970         int rc;
9971         struct uld_info *ui;
9972
9973         ASSERT_SYNCHRONIZED_OP(sc);
9974
9975         if (id < 0 || id > ULD_MAX)
9976                 return (EINVAL);
9977         rc = EAGAIN;    /* kldoad the module with this ULD and try again. */
9978
9979         sx_slock(&t4_uld_list_lock);
9980
9981         SLIST_FOREACH(ui, &t4_uld_list, link) {
9982                 if (ui->uld_id == id) {
9983                         if (!(sc->flags & FULL_INIT_DONE)) {
9984                                 rc = adapter_full_init(sc);
9985                                 if (rc != 0)
9986                                         break;
9987                         }
9988
9989                         rc = ui->activate(sc);
9990                         if (rc == 0) {
9991                                 setbit(&sc->active_ulds, id);
9992                                 ui->refcount++;
9993                         }
9994                         break;
9995                 }
9996         }
9997
9998         sx_sunlock(&t4_uld_list_lock);
9999
10000         return (rc);
10001 }
10002
10003 int
10004 t4_deactivate_uld(struct adapter *sc, int id)
10005 {
10006         int rc;
10007         struct uld_info *ui;
10008
10009         ASSERT_SYNCHRONIZED_OP(sc);
10010
10011         if (id < 0 || id > ULD_MAX)
10012                 return (EINVAL);
10013         rc = ENXIO;
10014
10015         sx_slock(&t4_uld_list_lock);
10016
10017         SLIST_FOREACH(ui, &t4_uld_list, link) {
10018                 if (ui->uld_id == id) {
10019                         rc = ui->deactivate(sc);
10020                         if (rc == 0) {
10021                                 clrbit(&sc->active_ulds, id);
10022                                 ui->refcount--;
10023                         }
10024                         break;
10025                 }
10026         }
10027
10028         sx_sunlock(&t4_uld_list_lock);
10029
10030         return (rc);
10031 }
10032
10033 int
10034 uld_active(struct adapter *sc, int uld_id)
10035 {
10036
10037         MPASS(uld_id >= 0 && uld_id <= ULD_MAX);
10038
10039         return (isset(&sc->active_ulds, uld_id));
10040 }
10041 #endif
10042
10043 /*
10044  * t  = ptr to tunable.
10045  * nc = number of CPUs.
10046  * c  = compiled in default for that tunable.
10047  */
10048 static void
10049 calculate_nqueues(int *t, int nc, const int c)
10050 {
10051         int nq;
10052
10053         if (*t > 0)
10054                 return;
10055         nq = *t < 0 ? -*t : c;
10056         *t = min(nc, nq);
10057 }
10058
10059 /*
10060  * Come up with reasonable defaults for some of the tunables, provided they're
10061  * not set by the user (in which case we'll use the values as is).
10062  */
10063 static void
10064 tweak_tunables(void)
10065 {
10066         int nc = mp_ncpus;      /* our snapshot of the number of CPUs */
10067
10068         if (t4_ntxq < 1) {
10069 #ifdef RSS
10070                 t4_ntxq = rss_getnumbuckets();
10071 #else
10072                 calculate_nqueues(&t4_ntxq, nc, NTXQ);
10073 #endif
10074         }
10075
10076         calculate_nqueues(&t4_ntxq_vi, nc, NTXQ_VI);
10077
10078         if (t4_nrxq < 1) {
10079 #ifdef RSS
10080                 t4_nrxq = rss_getnumbuckets();
10081 #else
10082                 calculate_nqueues(&t4_nrxq, nc, NRXQ);
10083 #endif
10084         }
10085
10086         calculate_nqueues(&t4_nrxq_vi, nc, NRXQ_VI);
10087
10088 #if defined(TCP_OFFLOAD) || defined(RATELIMIT)
10089         calculate_nqueues(&t4_nofldtxq, nc, NOFLDTXQ);
10090         calculate_nqueues(&t4_nofldtxq_vi, nc, NOFLDTXQ_VI);
10091 #endif
10092 #ifdef TCP_OFFLOAD
10093         calculate_nqueues(&t4_nofldrxq, nc, NOFLDRXQ);
10094         calculate_nqueues(&t4_nofldrxq_vi, nc, NOFLDRXQ_VI);
10095
10096         if (t4_toecaps_allowed == -1)
10097                 t4_toecaps_allowed = FW_CAPS_CONFIG_TOE;
10098
10099         if (t4_rdmacaps_allowed == -1) {
10100                 t4_rdmacaps_allowed = FW_CAPS_CONFIG_RDMA_RDDP |
10101                     FW_CAPS_CONFIG_RDMA_RDMAC;
10102         }
10103
10104         if (t4_iscsicaps_allowed == -1) {
10105                 t4_iscsicaps_allowed = FW_CAPS_CONFIG_ISCSI_INITIATOR_PDU |
10106                     FW_CAPS_CONFIG_ISCSI_TARGET_PDU |
10107                     FW_CAPS_CONFIG_ISCSI_T10DIF;
10108         }
10109
10110         if (t4_tmr_idx_ofld < 0 || t4_tmr_idx_ofld >= SGE_NTIMERS)
10111                 t4_tmr_idx_ofld = TMR_IDX_OFLD;
10112
10113         if (t4_pktc_idx_ofld < -1 || t4_pktc_idx_ofld >= SGE_NCOUNTERS)
10114                 t4_pktc_idx_ofld = PKTC_IDX_OFLD;
10115 #else
10116         if (t4_toecaps_allowed == -1)
10117                 t4_toecaps_allowed = 0;
10118
10119         if (t4_rdmacaps_allowed == -1)
10120                 t4_rdmacaps_allowed = 0;
10121
10122         if (t4_iscsicaps_allowed == -1)
10123                 t4_iscsicaps_allowed = 0;
10124 #endif
10125
10126 #ifdef DEV_NETMAP
10127         calculate_nqueues(&t4_nnmtxq_vi, nc, NNMTXQ_VI);
10128         calculate_nqueues(&t4_nnmrxq_vi, nc, NNMRXQ_VI);
10129 #endif
10130
10131         if (t4_tmr_idx < 0 || t4_tmr_idx >= SGE_NTIMERS)
10132                 t4_tmr_idx = TMR_IDX;
10133
10134         if (t4_pktc_idx < -1 || t4_pktc_idx >= SGE_NCOUNTERS)
10135                 t4_pktc_idx = PKTC_IDX;
10136
10137         if (t4_qsize_txq < 128)
10138                 t4_qsize_txq = 128;
10139
10140         if (t4_qsize_rxq < 128)
10141                 t4_qsize_rxq = 128;
10142         while (t4_qsize_rxq & 7)
10143                 t4_qsize_rxq++;
10144
10145         t4_intr_types &= INTR_MSIX | INTR_MSI | INTR_INTX;
10146
10147         /*
10148          * Number of VIs to create per-port.  The first VI is the "main" regular
10149          * VI for the port.  The rest are additional virtual interfaces on the
10150          * same physical port.  Note that the main VI does not have native
10151          * netmap support but the extra VIs do.
10152          *
10153          * Limit the number of VIs per port to the number of available
10154          * MAC addresses per port.
10155          */
10156         if (t4_num_vis < 1)
10157                 t4_num_vis = 1;
10158         if (t4_num_vis > nitems(vi_mac_funcs)) {
10159                 t4_num_vis = nitems(vi_mac_funcs);
10160                 printf("cxgbe: number of VIs limited to %d\n", t4_num_vis);
10161         }
10162
10163         if (pcie_relaxed_ordering < 0 || pcie_relaxed_ordering > 2) {
10164                 pcie_relaxed_ordering = 1;
10165 #if defined(__i386__) || defined(__amd64__)
10166                 if (cpu_vendor_id == CPU_VENDOR_INTEL)
10167                         pcie_relaxed_ordering = 0;
10168 #endif
10169         }
10170 }
10171
10172 #ifdef DDB
10173 static void
10174 t4_dump_tcb(struct adapter *sc, int tid)
10175 {
10176         uint32_t base, i, j, off, pf, reg, save, tcb_addr, win_pos;
10177
10178         reg = PCIE_MEM_ACCESS_REG(A_PCIE_MEM_ACCESS_OFFSET, 2);
10179         save = t4_read_reg(sc, reg);
10180         base = sc->memwin[2].mw_base;
10181
10182         /* Dump TCB for the tid */
10183         tcb_addr = t4_read_reg(sc, A_TP_CMM_TCB_BASE);
10184         tcb_addr += tid * TCB_SIZE;
10185
10186         if (is_t4(sc)) {
10187                 pf = 0;
10188                 win_pos = tcb_addr & ~0xf;      /* start must be 16B aligned */
10189         } else {
10190                 pf = V_PFNUM(sc->pf);
10191                 win_pos = tcb_addr & ~0x7f;     /* start must be 128B aligned */
10192         }
10193         t4_write_reg(sc, reg, win_pos | pf);
10194         t4_read_reg(sc, reg);
10195
10196         off = tcb_addr - win_pos;
10197         for (i = 0; i < 4; i++) {
10198                 uint32_t buf[8];
10199                 for (j = 0; j < 8; j++, off += 4)
10200                         buf[j] = htonl(t4_read_reg(sc, base + off));
10201
10202                 db_printf("%08x %08x %08x %08x %08x %08x %08x %08x\n",
10203                     buf[0], buf[1], buf[2], buf[3], buf[4], buf[5], buf[6],
10204                     buf[7]);
10205         }
10206
10207         t4_write_reg(sc, reg, save);
10208         t4_read_reg(sc, reg);
10209 }
10210
10211 static void
10212 t4_dump_devlog(struct adapter *sc)
10213 {
10214         struct devlog_params *dparams = &sc->params.devlog;
10215         struct fw_devlog_e e;
10216         int i, first, j, m, nentries, rc;
10217         uint64_t ftstamp = UINT64_MAX;
10218
10219         if (dparams->start == 0) {
10220                 db_printf("devlog params not valid\n");
10221                 return;
10222         }
10223
10224         nentries = dparams->size / sizeof(struct fw_devlog_e);
10225         m = fwmtype_to_hwmtype(dparams->memtype);
10226
10227         /* Find the first entry. */
10228         first = -1;
10229         for (i = 0; i < nentries && !db_pager_quit; i++) {
10230                 rc = -t4_mem_read(sc, m, dparams->start + i * sizeof(e),
10231                     sizeof(e), (void *)&e);
10232                 if (rc != 0)
10233                         break;
10234
10235                 if (e.timestamp == 0)
10236                         break;
10237
10238                 e.timestamp = be64toh(e.timestamp);
10239                 if (e.timestamp < ftstamp) {
10240                         ftstamp = e.timestamp;
10241                         first = i;
10242                 }
10243         }
10244
10245         if (first == -1)
10246                 return;
10247
10248         i = first;
10249         do {
10250                 rc = -t4_mem_read(sc, m, dparams->start + i * sizeof(e),
10251                     sizeof(e), (void *)&e);
10252                 if (rc != 0)
10253                         return;
10254
10255                 if (e.timestamp == 0)
10256                         return;
10257
10258                 e.timestamp = be64toh(e.timestamp);
10259                 e.seqno = be32toh(e.seqno);
10260                 for (j = 0; j < 8; j++)
10261                         e.params[j] = be32toh(e.params[j]);
10262
10263                 db_printf("%10d  %15ju  %8s  %8s  ",
10264                     e.seqno, e.timestamp,
10265                     (e.level < nitems(devlog_level_strings) ?
10266                         devlog_level_strings[e.level] : "UNKNOWN"),
10267                     (e.facility < nitems(devlog_facility_strings) ?
10268                         devlog_facility_strings[e.facility] : "UNKNOWN"));
10269                 db_printf(e.fmt, e.params[0], e.params[1], e.params[2],
10270                     e.params[3], e.params[4], e.params[5], e.params[6],
10271                     e.params[7]);
10272
10273                 if (++i == nentries)
10274                         i = 0;
10275         } while (i != first && !db_pager_quit);
10276 }
10277
10278 static struct command_table db_t4_table = LIST_HEAD_INITIALIZER(db_t4_table);
10279 _DB_SET(_show, t4, NULL, db_show_table, 0, &db_t4_table);
10280
10281 DB_FUNC(devlog, db_show_devlog, db_t4_table, CS_OWN, NULL)
10282 {
10283         device_t dev;
10284         int t;
10285         bool valid;
10286
10287         valid = false;
10288         t = db_read_token();
10289         if (t == tIDENT) {
10290                 dev = device_lookup_by_name(db_tok_string);
10291                 valid = true;
10292         }
10293         db_skip_to_eol();
10294         if (!valid) {
10295                 db_printf("usage: show t4 devlog <nexus>\n");
10296                 return;
10297         }
10298
10299         if (dev == NULL) {
10300                 db_printf("device not found\n");
10301                 return;
10302         }
10303
10304         t4_dump_devlog(device_get_softc(dev));
10305 }
10306
10307 DB_FUNC(tcb, db_show_t4tcb, db_t4_table, CS_OWN, NULL)
10308 {
10309         device_t dev;
10310         int radix, tid, t;
10311         bool valid;
10312
10313         valid = false;
10314         radix = db_radix;
10315         db_radix = 10;
10316         t = db_read_token();
10317         if (t == tIDENT) {
10318                 dev = device_lookup_by_name(db_tok_string);
10319                 t = db_read_token();
10320                 if (t == tNUMBER) {
10321                         tid = db_tok_number;
10322                         valid = true;
10323                 }
10324         }       
10325         db_radix = radix;
10326         db_skip_to_eol();
10327         if (!valid) {
10328                 db_printf("usage: show t4 tcb <nexus> <tid>\n");
10329                 return;
10330         }
10331
10332         if (dev == NULL) {
10333                 db_printf("device not found\n");
10334                 return;
10335         }
10336         if (tid < 0) {
10337                 db_printf("invalid tid\n");
10338                 return;
10339         }
10340
10341         t4_dump_tcb(device_get_softc(dev), tid);
10342 }
10343 #endif
10344
10345 /*
10346  * Borrowed from cesa_prep_aes_key().
10347  *
10348  * NB: The crypto engine wants the words in the decryption key in reverse
10349  * order.
10350  */
10351 void
10352 t4_aes_getdeckey(void *dec_key, const void *enc_key, unsigned int kbits)
10353 {
10354         uint32_t ek[4 * (RIJNDAEL_MAXNR + 1)];
10355         uint32_t *dkey;
10356         int i;
10357
10358         rijndaelKeySetupEnc(ek, enc_key, kbits);
10359         dkey = dec_key;
10360         dkey += (kbits / 8) / 4;
10361
10362         switch (kbits) {
10363         case 128:
10364                 for (i = 0; i < 4; i++)
10365                         *--dkey = htobe32(ek[4 * 10 + i]);
10366                 break;
10367         case 192:
10368                 for (i = 0; i < 2; i++)
10369                         *--dkey = htobe32(ek[4 * 11 + 2 + i]);
10370                 for (i = 0; i < 4; i++)
10371                         *--dkey = htobe32(ek[4 * 12 + i]);
10372                 break;
10373         case 256:
10374                 for (i = 0; i < 4; i++)
10375                         *--dkey = htobe32(ek[4 * 13 + i]);
10376                 for (i = 0; i < 4; i++)
10377                         *--dkey = htobe32(ek[4 * 14 + i]);
10378                 break;
10379         }
10380         MPASS(dkey == dec_key);
10381 }
10382
10383 static struct sx mlu;   /* mod load unload */
10384 SX_SYSINIT(cxgbe_mlu, &mlu, "cxgbe mod load/unload");
10385
10386 static int
10387 mod_event(module_t mod, int cmd, void *arg)
10388 {
10389         int rc = 0;
10390         static int loaded = 0;
10391
10392         switch (cmd) {
10393         case MOD_LOAD:
10394                 sx_xlock(&mlu);
10395                 if (loaded++ == 0) {
10396                         t4_sge_modload();
10397                         t4_register_shared_cpl_handler(CPL_SET_TCB_RPL,
10398                             t4_filter_rpl, CPL_COOKIE_FILTER);
10399                         t4_register_shared_cpl_handler(CPL_L2T_WRITE_RPL,
10400                             do_l2t_write_rpl, CPL_COOKIE_FILTER);
10401                         t4_register_shared_cpl_handler(CPL_ACT_OPEN_RPL,
10402                             t4_hashfilter_ao_rpl, CPL_COOKIE_HASHFILTER);
10403                         t4_register_shared_cpl_handler(CPL_SET_TCB_RPL,
10404                             t4_hashfilter_tcb_rpl, CPL_COOKIE_HASHFILTER);
10405                         t4_register_shared_cpl_handler(CPL_ABORT_RPL_RSS,
10406                             t4_del_hashfilter_rpl, CPL_COOKIE_HASHFILTER);
10407                         t4_register_cpl_handler(CPL_TRACE_PKT, t4_trace_pkt);
10408                         t4_register_cpl_handler(CPL_T5_TRACE_PKT, t5_trace_pkt);
10409                         t4_register_cpl_handler(CPL_SMT_WRITE_RPL,
10410                             do_smt_write_rpl);
10411                         sx_init(&t4_list_lock, "T4/T5 adapters");
10412                         SLIST_INIT(&t4_list);
10413 #ifdef TCP_OFFLOAD
10414                         sx_init(&t4_uld_list_lock, "T4/T5 ULDs");
10415                         SLIST_INIT(&t4_uld_list);
10416 #endif
10417                         t4_tracer_modload();
10418                         tweak_tunables();
10419                 }
10420                 sx_xunlock(&mlu);
10421                 break;
10422
10423         case MOD_UNLOAD:
10424                 sx_xlock(&mlu);
10425                 if (--loaded == 0) {
10426                         int tries;
10427
10428                         sx_slock(&t4_list_lock);
10429                         if (!SLIST_EMPTY(&t4_list)) {
10430                                 rc = EBUSY;
10431                                 sx_sunlock(&t4_list_lock);
10432                                 goto done_unload;
10433                         }
10434 #ifdef TCP_OFFLOAD
10435                         sx_slock(&t4_uld_list_lock);
10436                         if (!SLIST_EMPTY(&t4_uld_list)) {
10437                                 rc = EBUSY;
10438                                 sx_sunlock(&t4_uld_list_lock);
10439                                 sx_sunlock(&t4_list_lock);
10440                                 goto done_unload;
10441                         }
10442 #endif
10443                         tries = 0;
10444                         while (tries++ < 5 && t4_sge_extfree_refs() != 0) {
10445                                 uprintf("%ju clusters with custom free routine "
10446                                     "still is use.\n", t4_sge_extfree_refs());
10447                                 pause("t4unload", 2 * hz);
10448                         }
10449 #ifdef TCP_OFFLOAD
10450                         sx_sunlock(&t4_uld_list_lock);
10451 #endif
10452                         sx_sunlock(&t4_list_lock);
10453
10454                         if (t4_sge_extfree_refs() == 0) {
10455                                 t4_tracer_modunload();
10456 #ifdef TCP_OFFLOAD
10457                                 sx_destroy(&t4_uld_list_lock);
10458 #endif
10459                                 sx_destroy(&t4_list_lock);
10460                                 t4_sge_modunload();
10461                                 loaded = 0;
10462                         } else {
10463                                 rc = EBUSY;
10464                                 loaded++;       /* undo earlier decrement */
10465                         }
10466                 }
10467 done_unload:
10468                 sx_xunlock(&mlu);
10469                 break;
10470         }
10471
10472         return (rc);
10473 }
10474
10475 static devclass_t t4_devclass, t5_devclass, t6_devclass;
10476 static devclass_t cxgbe_devclass, cxl_devclass, cc_devclass;
10477 static devclass_t vcxgbe_devclass, vcxl_devclass, vcc_devclass;
10478
10479 DRIVER_MODULE(t4nex, pci, t4_driver, t4_devclass, mod_event, 0);
10480 MODULE_VERSION(t4nex, 1);
10481 MODULE_DEPEND(t4nex, firmware, 1, 1, 1);
10482 #ifdef DEV_NETMAP
10483 MODULE_DEPEND(t4nex, netmap, 1, 1, 1);
10484 #endif /* DEV_NETMAP */
10485
10486 DRIVER_MODULE(t5nex, pci, t5_driver, t5_devclass, mod_event, 0);
10487 MODULE_VERSION(t5nex, 1);
10488 MODULE_DEPEND(t5nex, firmware, 1, 1, 1);
10489 #ifdef DEV_NETMAP
10490 MODULE_DEPEND(t5nex, netmap, 1, 1, 1);
10491 #endif /* DEV_NETMAP */
10492
10493 DRIVER_MODULE(t6nex, pci, t6_driver, t6_devclass, mod_event, 0);
10494 MODULE_VERSION(t6nex, 1);
10495 MODULE_DEPEND(t6nex, firmware, 1, 1, 1);
10496 #ifdef DEV_NETMAP
10497 MODULE_DEPEND(t6nex, netmap, 1, 1, 1);
10498 #endif /* DEV_NETMAP */
10499
10500 DRIVER_MODULE(cxgbe, t4nex, cxgbe_driver, cxgbe_devclass, 0, 0);
10501 MODULE_VERSION(cxgbe, 1);
10502
10503 DRIVER_MODULE(cxl, t5nex, cxl_driver, cxl_devclass, 0, 0);
10504 MODULE_VERSION(cxl, 1);
10505
10506 DRIVER_MODULE(cc, t6nex, cc_driver, cc_devclass, 0, 0);
10507 MODULE_VERSION(cc, 1);
10508
10509 DRIVER_MODULE(vcxgbe, cxgbe, vcxgbe_driver, vcxgbe_devclass, 0, 0);
10510 MODULE_VERSION(vcxgbe, 1);
10511
10512 DRIVER_MODULE(vcxl, cxl, vcxl_driver, vcxl_devclass, 0, 0);
10513 MODULE_VERSION(vcxl, 1);
10514
10515 DRIVER_MODULE(vcc, cc, vcc_driver, vcc_devclass, 0, 0);
10516 MODULE_VERSION(vcc, 1);