]> CyberLeo.Net >> Repos - FreeBSD/FreeBSD.git/blob - sys/dev/cxgbe/t4_main.c
Plug a race between fd table teardown and several loops
[FreeBSD/FreeBSD.git] / sys / dev / cxgbe / t4_main.c
1 /*-
2  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
3  *
4  * Copyright (c) 2011 Chelsio Communications, Inc.
5  * All rights reserved.
6  * Written by: Navdeep Parhar <np@FreeBSD.org>
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
18  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
21  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27  * SUCH DAMAGE.
28  */
29
30 #include <sys/cdefs.h>
31 __FBSDID("$FreeBSD$");
32
33 #include "opt_ddb.h"
34 #include "opt_inet.h"
35 #include "opt_inet6.h"
36 #include "opt_kern_tls.h"
37 #include "opt_ratelimit.h"
38 #include "opt_rss.h"
39
40 #include <sys/param.h>
41 #include <sys/conf.h>
42 #include <sys/priv.h>
43 #include <sys/kernel.h>
44 #include <sys/bus.h>
45 #include <sys/eventhandler.h>
46 #include <sys/module.h>
47 #include <sys/malloc.h>
48 #include <sys/queue.h>
49 #include <sys/taskqueue.h>
50 #include <sys/pciio.h>
51 #include <dev/pci/pcireg.h>
52 #include <dev/pci/pcivar.h>
53 #include <dev/pci/pci_private.h>
54 #include <sys/firmware.h>
55 #include <sys/sbuf.h>
56 #include <sys/smp.h>
57 #include <sys/socket.h>
58 #include <sys/sockio.h>
59 #include <sys/sysctl.h>
60 #include <net/ethernet.h>
61 #include <net/if.h>
62 #include <net/if_types.h>
63 #include <net/if_dl.h>
64 #include <net/if_vlan_var.h>
65 #ifdef RSS
66 #include <net/rss_config.h>
67 #endif
68 #include <netinet/in.h>
69 #include <netinet/ip.h>
70 #ifdef KERN_TLS
71 #include <netinet/tcp_seq.h>
72 #endif
73 #if defined(__i386__) || defined(__amd64__)
74 #include <machine/md_var.h>
75 #include <machine/cputypes.h>
76 #include <vm/vm.h>
77 #include <vm/pmap.h>
78 #endif
79 #ifdef DDB
80 #include <ddb/ddb.h>
81 #include <ddb/db_lex.h>
82 #endif
83
84 #include "common/common.h"
85 #include "common/t4_msg.h"
86 #include "common/t4_regs.h"
87 #include "common/t4_regs_values.h"
88 #include "cudbg/cudbg.h"
89 #include "t4_clip.h"
90 #include "t4_ioctl.h"
91 #include "t4_l2t.h"
92 #include "t4_mp_ring.h"
93 #include "t4_if.h"
94 #include "t4_smt.h"
95
96 /* T4 bus driver interface */
97 static int t4_probe(device_t);
98 static int t4_attach(device_t);
99 static int t4_detach(device_t);
100 static int t4_child_location_str(device_t, device_t, char *, size_t);
101 static int t4_ready(device_t);
102 static int t4_read_port_device(device_t, int, device_t *);
103 static device_method_t t4_methods[] = {
104         DEVMETHOD(device_probe,         t4_probe),
105         DEVMETHOD(device_attach,        t4_attach),
106         DEVMETHOD(device_detach,        t4_detach),
107
108         DEVMETHOD(bus_child_location_str, t4_child_location_str),
109
110         DEVMETHOD(t4_is_main_ready,     t4_ready),
111         DEVMETHOD(t4_read_port_device,  t4_read_port_device),
112
113         DEVMETHOD_END
114 };
115 static driver_t t4_driver = {
116         "t4nex",
117         t4_methods,
118         sizeof(struct adapter)
119 };
120
121
122 /* T4 port (cxgbe) interface */
123 static int cxgbe_probe(device_t);
124 static int cxgbe_attach(device_t);
125 static int cxgbe_detach(device_t);
126 device_method_t cxgbe_methods[] = {
127         DEVMETHOD(device_probe,         cxgbe_probe),
128         DEVMETHOD(device_attach,        cxgbe_attach),
129         DEVMETHOD(device_detach,        cxgbe_detach),
130         { 0, 0 }
131 };
132 static driver_t cxgbe_driver = {
133         "cxgbe",
134         cxgbe_methods,
135         sizeof(struct port_info)
136 };
137
138 /* T4 VI (vcxgbe) interface */
139 static int vcxgbe_probe(device_t);
140 static int vcxgbe_attach(device_t);
141 static int vcxgbe_detach(device_t);
142 static device_method_t vcxgbe_methods[] = {
143         DEVMETHOD(device_probe,         vcxgbe_probe),
144         DEVMETHOD(device_attach,        vcxgbe_attach),
145         DEVMETHOD(device_detach,        vcxgbe_detach),
146         { 0, 0 }
147 };
148 static driver_t vcxgbe_driver = {
149         "vcxgbe",
150         vcxgbe_methods,
151         sizeof(struct vi_info)
152 };
153
154 static d_ioctl_t t4_ioctl;
155
156 static struct cdevsw t4_cdevsw = {
157        .d_version = D_VERSION,
158        .d_ioctl = t4_ioctl,
159        .d_name = "t4nex",
160 };
161
162 /* T5 bus driver interface */
163 static int t5_probe(device_t);
164 static device_method_t t5_methods[] = {
165         DEVMETHOD(device_probe,         t5_probe),
166         DEVMETHOD(device_attach,        t4_attach),
167         DEVMETHOD(device_detach,        t4_detach),
168
169         DEVMETHOD(bus_child_location_str, t4_child_location_str),
170
171         DEVMETHOD(t4_is_main_ready,     t4_ready),
172         DEVMETHOD(t4_read_port_device,  t4_read_port_device),
173
174         DEVMETHOD_END
175 };
176 static driver_t t5_driver = {
177         "t5nex",
178         t5_methods,
179         sizeof(struct adapter)
180 };
181
182
183 /* T5 port (cxl) interface */
184 static driver_t cxl_driver = {
185         "cxl",
186         cxgbe_methods,
187         sizeof(struct port_info)
188 };
189
190 /* T5 VI (vcxl) interface */
191 static driver_t vcxl_driver = {
192         "vcxl",
193         vcxgbe_methods,
194         sizeof(struct vi_info)
195 };
196
197 /* T6 bus driver interface */
198 static int t6_probe(device_t);
199 static device_method_t t6_methods[] = {
200         DEVMETHOD(device_probe,         t6_probe),
201         DEVMETHOD(device_attach,        t4_attach),
202         DEVMETHOD(device_detach,        t4_detach),
203
204         DEVMETHOD(bus_child_location_str, t4_child_location_str),
205
206         DEVMETHOD(t4_is_main_ready,     t4_ready),
207         DEVMETHOD(t4_read_port_device,  t4_read_port_device),
208
209         DEVMETHOD_END
210 };
211 static driver_t t6_driver = {
212         "t6nex",
213         t6_methods,
214         sizeof(struct adapter)
215 };
216
217
218 /* T6 port (cc) interface */
219 static driver_t cc_driver = {
220         "cc",
221         cxgbe_methods,
222         sizeof(struct port_info)
223 };
224
225 /* T6 VI (vcc) interface */
226 static driver_t vcc_driver = {
227         "vcc",
228         vcxgbe_methods,
229         sizeof(struct vi_info)
230 };
231
232 /* ifnet interface */
233 static void cxgbe_init(void *);
234 static int cxgbe_ioctl(struct ifnet *, unsigned long, caddr_t);
235 static int cxgbe_transmit(struct ifnet *, struct mbuf *);
236 static void cxgbe_qflush(struct ifnet *);
237 #if defined(KERN_TLS) || defined(RATELIMIT)
238 static int cxgbe_snd_tag_alloc(struct ifnet *, union if_snd_tag_alloc_params *,
239     struct m_snd_tag **);
240 static int cxgbe_snd_tag_modify(struct m_snd_tag *,
241     union if_snd_tag_modify_params *);
242 static int cxgbe_snd_tag_query(struct m_snd_tag *,
243     union if_snd_tag_query_params *);
244 static void cxgbe_snd_tag_free(struct m_snd_tag *);
245 #endif
246
247 MALLOC_DEFINE(M_CXGBE, "cxgbe", "Chelsio T4/T5 Ethernet driver and services");
248
249 /*
250  * Correct lock order when you need to acquire multiple locks is t4_list_lock,
251  * then ADAPTER_LOCK, then t4_uld_list_lock.
252  */
253 static struct sx t4_list_lock;
254 SLIST_HEAD(, adapter) t4_list;
255 #ifdef TCP_OFFLOAD
256 static struct sx t4_uld_list_lock;
257 SLIST_HEAD(, uld_info) t4_uld_list;
258 #endif
259
260 /*
261  * Tunables.  See tweak_tunables() too.
262  *
263  * Each tunable is set to a default value here if it's known at compile-time.
264  * Otherwise it is set to -n as an indication to tweak_tunables() that it should
265  * provide a reasonable default (upto n) when the driver is loaded.
266  *
267  * Tunables applicable to both T4 and T5 are under hw.cxgbe.  Those specific to
268  * T5 are under hw.cxl.
269  */
270 SYSCTL_NODE(_hw, OID_AUTO, cxgbe, CTLFLAG_RD | CTLFLAG_MPSAFE, 0,
271     "cxgbe(4) parameters");
272 SYSCTL_NODE(_hw, OID_AUTO, cxl, CTLFLAG_RD | CTLFLAG_MPSAFE, 0,
273     "cxgbe(4) T5+ parameters");
274 SYSCTL_NODE(_hw_cxgbe, OID_AUTO, toe, CTLFLAG_RD | CTLFLAG_MPSAFE, 0,
275     "cxgbe(4) TOE parameters");
276
277 /*
278  * Number of queues for tx and rx, NIC and offload.
279  */
280 #define NTXQ 16
281 int t4_ntxq = -NTXQ;
282 SYSCTL_INT(_hw_cxgbe, OID_AUTO, ntxq, CTLFLAG_RDTUN, &t4_ntxq, 0,
283     "Number of TX queues per port");
284 TUNABLE_INT("hw.cxgbe.ntxq10g", &t4_ntxq);      /* Old name, undocumented */
285
286 #define NRXQ 8
287 int t4_nrxq = -NRXQ;
288 SYSCTL_INT(_hw_cxgbe, OID_AUTO, nrxq, CTLFLAG_RDTUN, &t4_nrxq, 0,
289     "Number of RX queues per port");
290 TUNABLE_INT("hw.cxgbe.nrxq10g", &t4_nrxq);      /* Old name, undocumented */
291
292 #define NTXQ_VI 1
293 static int t4_ntxq_vi = -NTXQ_VI;
294 SYSCTL_INT(_hw_cxgbe, OID_AUTO, ntxq_vi, CTLFLAG_RDTUN, &t4_ntxq_vi, 0,
295     "Number of TX queues per VI");
296
297 #define NRXQ_VI 1
298 static int t4_nrxq_vi = -NRXQ_VI;
299 SYSCTL_INT(_hw_cxgbe, OID_AUTO, nrxq_vi, CTLFLAG_RDTUN, &t4_nrxq_vi, 0,
300     "Number of RX queues per VI");
301
302 static int t4_rsrv_noflowq = 0;
303 SYSCTL_INT(_hw_cxgbe, OID_AUTO, rsrv_noflowq, CTLFLAG_RDTUN, &t4_rsrv_noflowq,
304     0, "Reserve TX queue 0 of each VI for non-flowid packets");
305
306 #if defined(TCP_OFFLOAD) || defined(RATELIMIT)
307 #define NOFLDTXQ 8
308 static int t4_nofldtxq = -NOFLDTXQ;
309 SYSCTL_INT(_hw_cxgbe, OID_AUTO, nofldtxq, CTLFLAG_RDTUN, &t4_nofldtxq, 0,
310     "Number of offload TX queues per port");
311
312 #define NOFLDRXQ 2
313 static int t4_nofldrxq = -NOFLDRXQ;
314 SYSCTL_INT(_hw_cxgbe, OID_AUTO, nofldrxq, CTLFLAG_RDTUN, &t4_nofldrxq, 0,
315     "Number of offload RX queues per port");
316
317 #define NOFLDTXQ_VI 1
318 static int t4_nofldtxq_vi = -NOFLDTXQ_VI;
319 SYSCTL_INT(_hw_cxgbe, OID_AUTO, nofldtxq_vi, CTLFLAG_RDTUN, &t4_nofldtxq_vi, 0,
320     "Number of offload TX queues per VI");
321
322 #define NOFLDRXQ_VI 1
323 static int t4_nofldrxq_vi = -NOFLDRXQ_VI;
324 SYSCTL_INT(_hw_cxgbe, OID_AUTO, nofldrxq_vi, CTLFLAG_RDTUN, &t4_nofldrxq_vi, 0,
325     "Number of offload RX queues per VI");
326
327 #define TMR_IDX_OFLD 1
328 int t4_tmr_idx_ofld = TMR_IDX_OFLD;
329 SYSCTL_INT(_hw_cxgbe, OID_AUTO, holdoff_timer_idx_ofld, CTLFLAG_RDTUN,
330     &t4_tmr_idx_ofld, 0, "Holdoff timer index for offload queues");
331
332 #define PKTC_IDX_OFLD (-1)
333 int t4_pktc_idx_ofld = PKTC_IDX_OFLD;
334 SYSCTL_INT(_hw_cxgbe, OID_AUTO, holdoff_pktc_idx_ofld, CTLFLAG_RDTUN,
335     &t4_pktc_idx_ofld, 0, "holdoff packet counter index for offload queues");
336
337 /* 0 means chip/fw default, non-zero number is value in microseconds */
338 static u_long t4_toe_keepalive_idle = 0;
339 SYSCTL_ULONG(_hw_cxgbe_toe, OID_AUTO, keepalive_idle, CTLFLAG_RDTUN,
340     &t4_toe_keepalive_idle, 0, "TOE keepalive idle timer (us)");
341
342 /* 0 means chip/fw default, non-zero number is value in microseconds */
343 static u_long t4_toe_keepalive_interval = 0;
344 SYSCTL_ULONG(_hw_cxgbe_toe, OID_AUTO, keepalive_interval, CTLFLAG_RDTUN,
345     &t4_toe_keepalive_interval, 0, "TOE keepalive interval timer (us)");
346
347 /* 0 means chip/fw default, non-zero number is # of keepalives before abort */
348 static int t4_toe_keepalive_count = 0;
349 SYSCTL_INT(_hw_cxgbe_toe, OID_AUTO, keepalive_count, CTLFLAG_RDTUN,
350     &t4_toe_keepalive_count, 0, "Number of TOE keepalive probes before abort");
351
352 /* 0 means chip/fw default, non-zero number is value in microseconds */
353 static u_long t4_toe_rexmt_min = 0;
354 SYSCTL_ULONG(_hw_cxgbe_toe, OID_AUTO, rexmt_min, CTLFLAG_RDTUN,
355     &t4_toe_rexmt_min, 0, "Minimum TOE retransmit interval (us)");
356
357 /* 0 means chip/fw default, non-zero number is value in microseconds */
358 static u_long t4_toe_rexmt_max = 0;
359 SYSCTL_ULONG(_hw_cxgbe_toe, OID_AUTO, rexmt_max, CTLFLAG_RDTUN,
360     &t4_toe_rexmt_max, 0, "Maximum TOE retransmit interval (us)");
361
362 /* 0 means chip/fw default, non-zero number is # of rexmt before abort */
363 static int t4_toe_rexmt_count = 0;
364 SYSCTL_INT(_hw_cxgbe_toe, OID_AUTO, rexmt_count, CTLFLAG_RDTUN,
365     &t4_toe_rexmt_count, 0, "Number of TOE retransmissions before abort");
366
367 /* -1 means chip/fw default, other values are raw backoff values to use */
368 static int t4_toe_rexmt_backoff[16] = {
369         -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1
370 };
371 SYSCTL_NODE(_hw_cxgbe_toe, OID_AUTO, rexmt_backoff,
372     CTLFLAG_RD | CTLFLAG_MPSAFE, 0,
373     "cxgbe(4) TOE retransmit backoff values");
374 SYSCTL_INT(_hw_cxgbe_toe_rexmt_backoff, OID_AUTO, 0, CTLFLAG_RDTUN,
375     &t4_toe_rexmt_backoff[0], 0, "");
376 SYSCTL_INT(_hw_cxgbe_toe_rexmt_backoff, OID_AUTO, 1, CTLFLAG_RDTUN,
377     &t4_toe_rexmt_backoff[1], 0, "");
378 SYSCTL_INT(_hw_cxgbe_toe_rexmt_backoff, OID_AUTO, 2, CTLFLAG_RDTUN,
379     &t4_toe_rexmt_backoff[2], 0, "");
380 SYSCTL_INT(_hw_cxgbe_toe_rexmt_backoff, OID_AUTO, 3, CTLFLAG_RDTUN,
381     &t4_toe_rexmt_backoff[3], 0, "");
382 SYSCTL_INT(_hw_cxgbe_toe_rexmt_backoff, OID_AUTO, 4, CTLFLAG_RDTUN,
383     &t4_toe_rexmt_backoff[4], 0, "");
384 SYSCTL_INT(_hw_cxgbe_toe_rexmt_backoff, OID_AUTO, 5, CTLFLAG_RDTUN,
385     &t4_toe_rexmt_backoff[5], 0, "");
386 SYSCTL_INT(_hw_cxgbe_toe_rexmt_backoff, OID_AUTO, 6, CTLFLAG_RDTUN,
387     &t4_toe_rexmt_backoff[6], 0, "");
388 SYSCTL_INT(_hw_cxgbe_toe_rexmt_backoff, OID_AUTO, 7, CTLFLAG_RDTUN,
389     &t4_toe_rexmt_backoff[7], 0, "");
390 SYSCTL_INT(_hw_cxgbe_toe_rexmt_backoff, OID_AUTO, 8, CTLFLAG_RDTUN,
391     &t4_toe_rexmt_backoff[8], 0, "");
392 SYSCTL_INT(_hw_cxgbe_toe_rexmt_backoff, OID_AUTO, 9, CTLFLAG_RDTUN,
393     &t4_toe_rexmt_backoff[9], 0, "");
394 SYSCTL_INT(_hw_cxgbe_toe_rexmt_backoff, OID_AUTO, 10, CTLFLAG_RDTUN,
395     &t4_toe_rexmt_backoff[10], 0, "");
396 SYSCTL_INT(_hw_cxgbe_toe_rexmt_backoff, OID_AUTO, 11, CTLFLAG_RDTUN,
397     &t4_toe_rexmt_backoff[11], 0, "");
398 SYSCTL_INT(_hw_cxgbe_toe_rexmt_backoff, OID_AUTO, 12, CTLFLAG_RDTUN,
399     &t4_toe_rexmt_backoff[12], 0, "");
400 SYSCTL_INT(_hw_cxgbe_toe_rexmt_backoff, OID_AUTO, 13, CTLFLAG_RDTUN,
401     &t4_toe_rexmt_backoff[13], 0, "");
402 SYSCTL_INT(_hw_cxgbe_toe_rexmt_backoff, OID_AUTO, 14, CTLFLAG_RDTUN,
403     &t4_toe_rexmt_backoff[14], 0, "");
404 SYSCTL_INT(_hw_cxgbe_toe_rexmt_backoff, OID_AUTO, 15, CTLFLAG_RDTUN,
405     &t4_toe_rexmt_backoff[15], 0, "");
406
407 static int t4_toe_tls_rx_timeout = 5;
408 SYSCTL_INT(_hw_cxgbe_toe, OID_AUTO, tls_rx_timeout, CTLFLAG_RDTUN,
409     &t4_toe_tls_rx_timeout, 0,
410     "Timeout in seconds to downgrade TLS sockets to plain TOE");
411 #endif
412
413 #ifdef DEV_NETMAP
414 #define NN_MAIN_VI      (1 << 0)        /* Native netmap on the main VI */
415 #define NN_EXTRA_VI     (1 << 1)        /* Native netmap on the extra VI(s) */
416 static int t4_native_netmap = NN_EXTRA_VI;
417 SYSCTL_INT(_hw_cxgbe, OID_AUTO, native_netmap, CTLFLAG_RDTUN, &t4_native_netmap,
418     0, "Native netmap support.  bit 0 = main VI, bit 1 = extra VIs");
419
420 #define NNMTXQ 8
421 static int t4_nnmtxq = -NNMTXQ;
422 SYSCTL_INT(_hw_cxgbe, OID_AUTO, nnmtxq, CTLFLAG_RDTUN, &t4_nnmtxq, 0,
423     "Number of netmap TX queues");
424
425 #define NNMRXQ 8
426 static int t4_nnmrxq = -NNMRXQ;
427 SYSCTL_INT(_hw_cxgbe, OID_AUTO, nnmrxq, CTLFLAG_RDTUN, &t4_nnmrxq, 0,
428     "Number of netmap RX queues");
429
430 #define NNMTXQ_VI 2
431 static int t4_nnmtxq_vi = -NNMTXQ_VI;
432 SYSCTL_INT(_hw_cxgbe, OID_AUTO, nnmtxq_vi, CTLFLAG_RDTUN, &t4_nnmtxq_vi, 0,
433     "Number of netmap TX queues per VI");
434
435 #define NNMRXQ_VI 2
436 static int t4_nnmrxq_vi = -NNMRXQ_VI;
437 SYSCTL_INT(_hw_cxgbe, OID_AUTO, nnmrxq_vi, CTLFLAG_RDTUN, &t4_nnmrxq_vi, 0,
438     "Number of netmap RX queues per VI");
439 #endif
440
441 /*
442  * Holdoff parameters for ports.
443  */
444 #define TMR_IDX 1
445 int t4_tmr_idx = TMR_IDX;
446 SYSCTL_INT(_hw_cxgbe, OID_AUTO, holdoff_timer_idx, CTLFLAG_RDTUN, &t4_tmr_idx,
447     0, "Holdoff timer index");
448 TUNABLE_INT("hw.cxgbe.holdoff_timer_idx_10G", &t4_tmr_idx);     /* Old name */
449
450 #define PKTC_IDX (-1)
451 int t4_pktc_idx = PKTC_IDX;
452 SYSCTL_INT(_hw_cxgbe, OID_AUTO, holdoff_pktc_idx, CTLFLAG_RDTUN, &t4_pktc_idx,
453     0, "Holdoff packet counter index");
454 TUNABLE_INT("hw.cxgbe.holdoff_pktc_idx_10G", &t4_pktc_idx);     /* Old name */
455
456 /*
457  * Size (# of entries) of each tx and rx queue.
458  */
459 unsigned int t4_qsize_txq = TX_EQ_QSIZE;
460 SYSCTL_INT(_hw_cxgbe, OID_AUTO, qsize_txq, CTLFLAG_RDTUN, &t4_qsize_txq, 0,
461     "Number of descriptors in each TX queue");
462
463 unsigned int t4_qsize_rxq = RX_IQ_QSIZE;
464 SYSCTL_INT(_hw_cxgbe, OID_AUTO, qsize_rxq, CTLFLAG_RDTUN, &t4_qsize_rxq, 0,
465     "Number of descriptors in each RX queue");
466
467 /*
468  * Interrupt types allowed (bits 0, 1, 2 = INTx, MSI, MSI-X respectively).
469  */
470 int t4_intr_types = INTR_MSIX | INTR_MSI | INTR_INTX;
471 SYSCTL_INT(_hw_cxgbe, OID_AUTO, interrupt_types, CTLFLAG_RDTUN, &t4_intr_types,
472     0, "Interrupt types allowed (bit 0 = INTx, 1 = MSI, 2 = MSI-X)");
473
474 /*
475  * Configuration file.  All the _CF names here are special.
476  */
477 #define DEFAULT_CF      "default"
478 #define BUILTIN_CF      "built-in"
479 #define FLASH_CF        "flash"
480 #define UWIRE_CF        "uwire"
481 #define FPGA_CF         "fpga"
482 static char t4_cfg_file[32] = DEFAULT_CF;
483 SYSCTL_STRING(_hw_cxgbe, OID_AUTO, config_file, CTLFLAG_RDTUN, t4_cfg_file,
484     sizeof(t4_cfg_file), "Firmware configuration file");
485
486 /*
487  * PAUSE settings (bit 0, 1, 2 = rx_pause, tx_pause, pause_autoneg respectively).
488  * rx_pause = 1 to heed incoming PAUSE frames, 0 to ignore them.
489  * tx_pause = 1 to emit PAUSE frames when the rx FIFO reaches its high water
490  *            mark or when signalled to do so, 0 to never emit PAUSE.
491  * pause_autoneg = 1 means PAUSE will be negotiated if possible and the
492  *                 negotiated settings will override rx_pause/tx_pause.
493  *                 Otherwise rx_pause/tx_pause are applied forcibly.
494  */
495 static int t4_pause_settings = PAUSE_RX | PAUSE_TX | PAUSE_AUTONEG;
496 SYSCTL_INT(_hw_cxgbe, OID_AUTO, pause_settings, CTLFLAG_RDTUN,
497     &t4_pause_settings, 0,
498     "PAUSE settings (bit 0 = rx_pause, 1 = tx_pause, 2 = pause_autoneg)");
499
500 /*
501  * Forward Error Correction settings (bit 0, 1 = RS, BASER respectively).
502  * -1 to run with the firmware default.  Same as FEC_AUTO (bit 5)
503  *  0 to disable FEC.
504  */
505 static int t4_fec = -1;
506 SYSCTL_INT(_hw_cxgbe, OID_AUTO, fec, CTLFLAG_RDTUN, &t4_fec, 0,
507     "Forward Error Correction (bit 0 = RS, bit 1 = BASER_RS)");
508
509 /*
510  * Link autonegotiation.
511  * -1 to run with the firmware default.
512  *  0 to disable.
513  *  1 to enable.
514  */
515 static int t4_autoneg = -1;
516 SYSCTL_INT(_hw_cxgbe, OID_AUTO, autoneg, CTLFLAG_RDTUN, &t4_autoneg, 0,
517     "Link autonegotiation");
518
519 /*
520  * Firmware auto-install by driver during attach (0, 1, 2 = prohibited, allowed,
521  * encouraged respectively).  '-n' is the same as 'n' except the firmware
522  * version used in the checks is read from the firmware bundled with the driver.
523  */
524 static int t4_fw_install = 1;
525 SYSCTL_INT(_hw_cxgbe, OID_AUTO, fw_install, CTLFLAG_RDTUN, &t4_fw_install, 0,
526     "Firmware auto-install (0 = prohibited, 1 = allowed, 2 = encouraged)");
527
528 /*
529  * ASIC features that will be used.  Disable the ones you don't want so that the
530  * chip resources aren't wasted on features that will not be used.
531  */
532 static int t4_nbmcaps_allowed = 0;
533 SYSCTL_INT(_hw_cxgbe, OID_AUTO, nbmcaps_allowed, CTLFLAG_RDTUN,
534     &t4_nbmcaps_allowed, 0, "Default NBM capabilities");
535
536 static int t4_linkcaps_allowed = 0;     /* No DCBX, PPP, etc. by default */
537 SYSCTL_INT(_hw_cxgbe, OID_AUTO, linkcaps_allowed, CTLFLAG_RDTUN,
538     &t4_linkcaps_allowed, 0, "Default link capabilities");
539
540 static int t4_switchcaps_allowed = FW_CAPS_CONFIG_SWITCH_INGRESS |
541     FW_CAPS_CONFIG_SWITCH_EGRESS;
542 SYSCTL_INT(_hw_cxgbe, OID_AUTO, switchcaps_allowed, CTLFLAG_RDTUN,
543     &t4_switchcaps_allowed, 0, "Default switch capabilities");
544
545 #ifdef RATELIMIT
546 static int t4_niccaps_allowed = FW_CAPS_CONFIG_NIC |
547         FW_CAPS_CONFIG_NIC_HASHFILTER | FW_CAPS_CONFIG_NIC_ETHOFLD;
548 #else
549 static int t4_niccaps_allowed = FW_CAPS_CONFIG_NIC |
550         FW_CAPS_CONFIG_NIC_HASHFILTER;
551 #endif
552 SYSCTL_INT(_hw_cxgbe, OID_AUTO, niccaps_allowed, CTLFLAG_RDTUN,
553     &t4_niccaps_allowed, 0, "Default NIC capabilities");
554
555 static int t4_toecaps_allowed = -1;
556 SYSCTL_INT(_hw_cxgbe, OID_AUTO, toecaps_allowed, CTLFLAG_RDTUN,
557     &t4_toecaps_allowed, 0, "Default TCP offload capabilities");
558
559 static int t4_rdmacaps_allowed = -1;
560 SYSCTL_INT(_hw_cxgbe, OID_AUTO, rdmacaps_allowed, CTLFLAG_RDTUN,
561     &t4_rdmacaps_allowed, 0, "Default RDMA capabilities");
562
563 static int t4_cryptocaps_allowed = -1;
564 SYSCTL_INT(_hw_cxgbe, OID_AUTO, cryptocaps_allowed, CTLFLAG_RDTUN,
565     &t4_cryptocaps_allowed, 0, "Default crypto capabilities");
566
567 static int t4_iscsicaps_allowed = -1;
568 SYSCTL_INT(_hw_cxgbe, OID_AUTO, iscsicaps_allowed, CTLFLAG_RDTUN,
569     &t4_iscsicaps_allowed, 0, "Default iSCSI capabilities");
570
571 static int t4_fcoecaps_allowed = 0;
572 SYSCTL_INT(_hw_cxgbe, OID_AUTO, fcoecaps_allowed, CTLFLAG_RDTUN,
573     &t4_fcoecaps_allowed, 0, "Default FCoE capabilities");
574
575 static int t5_write_combine = 0;
576 SYSCTL_INT(_hw_cxl, OID_AUTO, write_combine, CTLFLAG_RDTUN, &t5_write_combine,
577     0, "Use WC instead of UC for BAR2");
578
579 static int t4_num_vis = 1;
580 SYSCTL_INT(_hw_cxgbe, OID_AUTO, num_vis, CTLFLAG_RDTUN, &t4_num_vis, 0,
581     "Number of VIs per port");
582
583 /*
584  * PCIe Relaxed Ordering.
585  * -1: driver should figure out a good value.
586  * 0: disable RO.
587  * 1: enable RO.
588  * 2: leave RO alone.
589  */
590 static int pcie_relaxed_ordering = -1;
591 SYSCTL_INT(_hw_cxgbe, OID_AUTO, pcie_relaxed_ordering, CTLFLAG_RDTUN,
592     &pcie_relaxed_ordering, 0,
593     "PCIe Relaxed Ordering: 0 = disable, 1 = enable, 2 = leave alone");
594
595 static int t4_panic_on_fatal_err = 0;
596 SYSCTL_INT(_hw_cxgbe, OID_AUTO, panic_on_fatal_err, CTLFLAG_RDTUN,
597     &t4_panic_on_fatal_err, 0, "panic on fatal errors");
598
599 static int t4_tx_vm_wr = 0;
600 SYSCTL_INT(_hw_cxgbe, OID_AUTO, tx_vm_wr, CTLFLAG_RWTUN, &t4_tx_vm_wr, 0,
601     "Use VM work requests to transmit packets.");
602
603 /*
604  * Set to non-zero to enable the attack filter.  A packet that matches any of
605  * these conditions will get dropped on ingress:
606  * 1) IP && source address == destination address.
607  * 2) TCP/IP && source address is not a unicast address.
608  * 3) TCP/IP && destination address is not a unicast address.
609  * 4) IP && source address is loopback (127.x.y.z).
610  * 5) IP && destination address is loopback (127.x.y.z).
611  * 6) IPv6 && source address == destination address.
612  * 7) IPv6 && source address is not a unicast address.
613  * 8) IPv6 && source address is loopback (::1/128).
614  * 9) IPv6 && destination address is loopback (::1/128).
615  * 10) IPv6 && source address is unspecified (::/128).
616  * 11) IPv6 && destination address is unspecified (::/128).
617  * 12) TCP/IPv6 && source address is multicast (ff00::/8).
618  * 13) TCP/IPv6 && destination address is multicast (ff00::/8).
619  */
620 static int t4_attack_filter = 0;
621 SYSCTL_INT(_hw_cxgbe, OID_AUTO, attack_filter, CTLFLAG_RDTUN,
622     &t4_attack_filter, 0, "Drop suspicious traffic");
623
624 static int t4_drop_ip_fragments = 0;
625 SYSCTL_INT(_hw_cxgbe, OID_AUTO, drop_ip_fragments, CTLFLAG_RDTUN,
626     &t4_drop_ip_fragments, 0, "Drop IP fragments");
627
628 static int t4_drop_pkts_with_l2_errors = 1;
629 SYSCTL_INT(_hw_cxgbe, OID_AUTO, drop_pkts_with_l2_errors, CTLFLAG_RDTUN,
630     &t4_drop_pkts_with_l2_errors, 0,
631     "Drop all frames with Layer 2 length or checksum errors");
632
633 static int t4_drop_pkts_with_l3_errors = 0;
634 SYSCTL_INT(_hw_cxgbe, OID_AUTO, drop_pkts_with_l3_errors, CTLFLAG_RDTUN,
635     &t4_drop_pkts_with_l3_errors, 0,
636     "Drop all frames with IP version, length, or checksum errors");
637
638 static int t4_drop_pkts_with_l4_errors = 0;
639 SYSCTL_INT(_hw_cxgbe, OID_AUTO, drop_pkts_with_l4_errors, CTLFLAG_RDTUN,
640     &t4_drop_pkts_with_l4_errors, 0,
641     "Drop all frames with Layer 4 length, checksum, or other errors");
642
643 #ifdef TCP_OFFLOAD
644 /*
645  * TOE tunables.
646  */
647 static int t4_cop_managed_offloading = 0;
648 SYSCTL_INT(_hw_cxgbe, OID_AUTO, cop_managed_offloading, CTLFLAG_RDTUN,
649     &t4_cop_managed_offloading, 0,
650     "COP (Connection Offload Policy) controls all TOE offload");
651 #endif
652
653 #ifdef KERN_TLS
654 /*
655  * This enables KERN_TLS for all adapters if set.
656  */
657 static int t4_kern_tls = 0;
658 SYSCTL_INT(_hw_cxgbe, OID_AUTO, kern_tls, CTLFLAG_RDTUN, &t4_kern_tls, 0,
659     "Enable KERN_TLS mode for all supported adapters");
660
661 SYSCTL_NODE(_hw_cxgbe, OID_AUTO, tls, CTLFLAG_RD | CTLFLAG_MPSAFE, 0,
662     "cxgbe(4) KERN_TLS parameters");
663
664 static int t4_tls_inline_keys = 0;
665 SYSCTL_INT(_hw_cxgbe_tls, OID_AUTO, inline_keys, CTLFLAG_RDTUN,
666     &t4_tls_inline_keys, 0,
667     "Always pass TLS keys in work requests (1) or attempt to store TLS keys "
668     "in card memory.");
669
670 static int t4_tls_combo_wrs = 0;
671 SYSCTL_INT(_hw_cxgbe_tls, OID_AUTO, combo_wrs, CTLFLAG_RDTUN, &t4_tls_combo_wrs,
672     0, "Attempt to combine TCB field updates with TLS record work requests.");
673 #endif
674
675 /* Functions used by VIs to obtain unique MAC addresses for each VI. */
676 static int vi_mac_funcs[] = {
677         FW_VI_FUNC_ETH,
678         FW_VI_FUNC_OFLD,
679         FW_VI_FUNC_IWARP,
680         FW_VI_FUNC_OPENISCSI,
681         FW_VI_FUNC_OPENFCOE,
682         FW_VI_FUNC_FOISCSI,
683         FW_VI_FUNC_FOFCOE,
684 };
685
686 struct intrs_and_queues {
687         uint16_t intr_type;     /* INTx, MSI, or MSI-X */
688         uint16_t num_vis;       /* number of VIs for each port */
689         uint16_t nirq;          /* Total # of vectors */
690         uint16_t ntxq;          /* # of NIC txq's for each port */
691         uint16_t nrxq;          /* # of NIC rxq's for each port */
692         uint16_t nofldtxq;      /* # of TOE/ETHOFLD txq's for each port */
693         uint16_t nofldrxq;      /* # of TOE rxq's for each port */
694         uint16_t nnmtxq;        /* # of netmap txq's */
695         uint16_t nnmrxq;        /* # of netmap rxq's */
696
697         /* The vcxgbe/vcxl interfaces use these and not the ones above. */
698         uint16_t ntxq_vi;       /* # of NIC txq's */
699         uint16_t nrxq_vi;       /* # of NIC rxq's */
700         uint16_t nofldtxq_vi;   /* # of TOE txq's */
701         uint16_t nofldrxq_vi;   /* # of TOE rxq's */
702         uint16_t nnmtxq_vi;     /* # of netmap txq's */
703         uint16_t nnmrxq_vi;     /* # of netmap rxq's */
704 };
705
706 static void setup_memwin(struct adapter *);
707 static void position_memwin(struct adapter *, int, uint32_t);
708 static int validate_mem_range(struct adapter *, uint32_t, uint32_t);
709 static int fwmtype_to_hwmtype(int);
710 static int validate_mt_off_len(struct adapter *, int, uint32_t, uint32_t,
711     uint32_t *);
712 static int fixup_devlog_params(struct adapter *);
713 static int cfg_itype_and_nqueues(struct adapter *, struct intrs_and_queues *);
714 static int contact_firmware(struct adapter *);
715 static int partition_resources(struct adapter *);
716 static int get_params__pre_init(struct adapter *);
717 static int set_params__pre_init(struct adapter *);
718 static int get_params__post_init(struct adapter *);
719 static int set_params__post_init(struct adapter *);
720 static void t4_set_desc(struct adapter *);
721 static bool fixed_ifmedia(struct port_info *);
722 static void build_medialist(struct port_info *);
723 static void init_link_config(struct port_info *);
724 static int fixup_link_config(struct port_info *);
725 static int apply_link_config(struct port_info *);
726 static int cxgbe_init_synchronized(struct vi_info *);
727 static int cxgbe_uninit_synchronized(struct vi_info *);
728 static void quiesce_txq(struct adapter *, struct sge_txq *);
729 static void quiesce_wrq(struct adapter *, struct sge_wrq *);
730 static void quiesce_iq(struct adapter *, struct sge_iq *);
731 static void quiesce_fl(struct adapter *, struct sge_fl *);
732 static int t4_alloc_irq(struct adapter *, struct irq *, int rid,
733     driver_intr_t *, void *, char *);
734 static int t4_free_irq(struct adapter *, struct irq *);
735 static void t4_init_atid_table(struct adapter *);
736 static void t4_free_atid_table(struct adapter *);
737 static void get_regs(struct adapter *, struct t4_regdump *, uint8_t *);
738 static void vi_refresh_stats(struct adapter *, struct vi_info *);
739 static void cxgbe_refresh_stats(struct adapter *, struct port_info *);
740 static void cxgbe_tick(void *);
741 static void cxgbe_sysctls(struct port_info *);
742 static int sysctl_int_array(SYSCTL_HANDLER_ARGS);
743 static int sysctl_bitfield_8b(SYSCTL_HANDLER_ARGS);
744 static int sysctl_bitfield_16b(SYSCTL_HANDLER_ARGS);
745 static int sysctl_btphy(SYSCTL_HANDLER_ARGS);
746 static int sysctl_noflowq(SYSCTL_HANDLER_ARGS);
747 static int sysctl_tx_vm_wr(SYSCTL_HANDLER_ARGS);
748 static int sysctl_holdoff_tmr_idx(SYSCTL_HANDLER_ARGS);
749 static int sysctl_holdoff_pktc_idx(SYSCTL_HANDLER_ARGS);
750 static int sysctl_qsize_rxq(SYSCTL_HANDLER_ARGS);
751 static int sysctl_qsize_txq(SYSCTL_HANDLER_ARGS);
752 static int sysctl_pause_settings(SYSCTL_HANDLER_ARGS);
753 static int sysctl_fec(SYSCTL_HANDLER_ARGS);
754 static int sysctl_module_fec(SYSCTL_HANDLER_ARGS);
755 static int sysctl_autoneg(SYSCTL_HANDLER_ARGS);
756 static int sysctl_handle_t4_reg64(SYSCTL_HANDLER_ARGS);
757 static int sysctl_temperature(SYSCTL_HANDLER_ARGS);
758 static int sysctl_vdd(SYSCTL_HANDLER_ARGS);
759 static int sysctl_reset_sensor(SYSCTL_HANDLER_ARGS);
760 static int sysctl_loadavg(SYSCTL_HANDLER_ARGS);
761 static int sysctl_cctrl(SYSCTL_HANDLER_ARGS);
762 static int sysctl_cim_ibq_obq(SYSCTL_HANDLER_ARGS);
763 static int sysctl_cim_la(SYSCTL_HANDLER_ARGS);
764 static int sysctl_cim_ma_la(SYSCTL_HANDLER_ARGS);
765 static int sysctl_cim_pif_la(SYSCTL_HANDLER_ARGS);
766 static int sysctl_cim_qcfg(SYSCTL_HANDLER_ARGS);
767 static int sysctl_cpl_stats(SYSCTL_HANDLER_ARGS);
768 static int sysctl_ddp_stats(SYSCTL_HANDLER_ARGS);
769 static int sysctl_tid_stats(SYSCTL_HANDLER_ARGS);
770 static int sysctl_devlog(SYSCTL_HANDLER_ARGS);
771 static int sysctl_fcoe_stats(SYSCTL_HANDLER_ARGS);
772 static int sysctl_hw_sched(SYSCTL_HANDLER_ARGS);
773 static int sysctl_lb_stats(SYSCTL_HANDLER_ARGS);
774 static int sysctl_linkdnrc(SYSCTL_HANDLER_ARGS);
775 static int sysctl_meminfo(SYSCTL_HANDLER_ARGS);
776 static int sysctl_mps_tcam(SYSCTL_HANDLER_ARGS);
777 static int sysctl_mps_tcam_t6(SYSCTL_HANDLER_ARGS);
778 static int sysctl_path_mtus(SYSCTL_HANDLER_ARGS);
779 static int sysctl_pm_stats(SYSCTL_HANDLER_ARGS);
780 static int sysctl_rdma_stats(SYSCTL_HANDLER_ARGS);
781 static int sysctl_tcp_stats(SYSCTL_HANDLER_ARGS);
782 static int sysctl_tids(SYSCTL_HANDLER_ARGS);
783 static int sysctl_tp_err_stats(SYSCTL_HANDLER_ARGS);
784 static int sysctl_tnl_stats(SYSCTL_HANDLER_ARGS);
785 static int sysctl_tp_la_mask(SYSCTL_HANDLER_ARGS);
786 static int sysctl_tp_la(SYSCTL_HANDLER_ARGS);
787 static int sysctl_tx_rate(SYSCTL_HANDLER_ARGS);
788 static int sysctl_ulprx_la(SYSCTL_HANDLER_ARGS);
789 static int sysctl_wcwr_stats(SYSCTL_HANDLER_ARGS);
790 static int sysctl_cpus(SYSCTL_HANDLER_ARGS);
791 #ifdef TCP_OFFLOAD
792 static int sysctl_tls(SYSCTL_HANDLER_ARGS);
793 static int sysctl_tls_rx_ports(SYSCTL_HANDLER_ARGS);
794 static int sysctl_tls_rx_timeout(SYSCTL_HANDLER_ARGS);
795 static int sysctl_tp_tick(SYSCTL_HANDLER_ARGS);
796 static int sysctl_tp_dack_timer(SYSCTL_HANDLER_ARGS);
797 static int sysctl_tp_timer(SYSCTL_HANDLER_ARGS);
798 static int sysctl_tp_shift_cnt(SYSCTL_HANDLER_ARGS);
799 static int sysctl_tp_backoff(SYSCTL_HANDLER_ARGS);
800 static int sysctl_holdoff_tmr_idx_ofld(SYSCTL_HANDLER_ARGS);
801 static int sysctl_holdoff_pktc_idx_ofld(SYSCTL_HANDLER_ARGS);
802 #endif
803 static int get_sge_context(struct adapter *, struct t4_sge_context *);
804 static int load_fw(struct adapter *, struct t4_data *);
805 static int load_cfg(struct adapter *, struct t4_data *);
806 static int load_boot(struct adapter *, struct t4_bootrom *);
807 static int load_bootcfg(struct adapter *, struct t4_data *);
808 static int cudbg_dump(struct adapter *, struct t4_cudbg_dump *);
809 static void free_offload_policy(struct t4_offload_policy *);
810 static int set_offload_policy(struct adapter *, struct t4_offload_policy *);
811 static int read_card_mem(struct adapter *, int, struct t4_mem_range *);
812 static int read_i2c(struct adapter *, struct t4_i2c_data *);
813 static int clear_stats(struct adapter *, u_int);
814 #ifdef TCP_OFFLOAD
815 static int toe_capability(struct vi_info *, int);
816 static void t4_async_event(void *, int);
817 #endif
818 static int mod_event(module_t, int, void *);
819 static int notify_siblings(device_t, int);
820
821 struct {
822         uint16_t device;
823         char *desc;
824 } t4_pciids[] = {
825         {0xa000, "Chelsio Terminator 4 FPGA"},
826         {0x4400, "Chelsio T440-dbg"},
827         {0x4401, "Chelsio T420-CR"},
828         {0x4402, "Chelsio T422-CR"},
829         {0x4403, "Chelsio T440-CR"},
830         {0x4404, "Chelsio T420-BCH"},
831         {0x4405, "Chelsio T440-BCH"},
832         {0x4406, "Chelsio T440-CH"},
833         {0x4407, "Chelsio T420-SO"},
834         {0x4408, "Chelsio T420-CX"},
835         {0x4409, "Chelsio T420-BT"},
836         {0x440a, "Chelsio T404-BT"},
837         {0x440e, "Chelsio T440-LP-CR"},
838 }, t5_pciids[] = {
839         {0xb000, "Chelsio Terminator 5 FPGA"},
840         {0x5400, "Chelsio T580-dbg"},
841         {0x5401,  "Chelsio T520-CR"},           /* 2 x 10G */
842         {0x5402,  "Chelsio T522-CR"},           /* 2 x 10G, 2 X 1G */
843         {0x5403,  "Chelsio T540-CR"},           /* 4 x 10G */
844         {0x5407,  "Chelsio T520-SO"},           /* 2 x 10G, nomem */
845         {0x5409,  "Chelsio T520-BT"},           /* 2 x 10GBaseT */
846         {0x540a,  "Chelsio T504-BT"},           /* 4 x 1G */
847         {0x540d,  "Chelsio T580-CR"},           /* 2 x 40G */
848         {0x540e,  "Chelsio T540-LP-CR"},        /* 4 x 10G */
849         {0x5410,  "Chelsio T580-LP-CR"},        /* 2 x 40G */
850         {0x5411,  "Chelsio T520-LL-CR"},        /* 2 x 10G */
851         {0x5412,  "Chelsio T560-CR"},           /* 1 x 40G, 2 x 10G */
852         {0x5414,  "Chelsio T580-LP-SO-CR"},     /* 2 x 40G, nomem */
853         {0x5415,  "Chelsio T502-BT"},           /* 2 x 1G */
854         {0x5418,  "Chelsio T540-BT"},           /* 4 x 10GBaseT */
855         {0x5419,  "Chelsio T540-LP-BT"},        /* 4 x 10GBaseT */
856         {0x541a,  "Chelsio T540-SO-BT"},        /* 4 x 10GBaseT, nomem */
857         {0x541b,  "Chelsio T540-SO-CR"},        /* 4 x 10G, nomem */
858
859         /* Custom */
860         {0x5483, "Custom T540-CR"},
861         {0x5484, "Custom T540-BT"},
862 }, t6_pciids[] = {
863         {0xc006, "Chelsio Terminator 6 FPGA"},  /* T6 PE10K6 FPGA (PF0) */
864         {0x6400, "Chelsio T6-DBG-25"},          /* 2 x 10/25G, debug */
865         {0x6401, "Chelsio T6225-CR"},           /* 2 x 10/25G */
866         {0x6402, "Chelsio T6225-SO-CR"},        /* 2 x 10/25G, nomem */
867         {0x6403, "Chelsio T6425-CR"},           /* 4 x 10/25G */
868         {0x6404, "Chelsio T6425-SO-CR"},        /* 4 x 10/25G, nomem */
869         {0x6405, "Chelsio T6225-OCP-SO"},       /* 2 x 10/25G, nomem */
870         {0x6406, "Chelsio T62100-OCP-SO"},      /* 2 x 40/50/100G, nomem */
871         {0x6407, "Chelsio T62100-LP-CR"},       /* 2 x 40/50/100G */
872         {0x6408, "Chelsio T62100-SO-CR"},       /* 2 x 40/50/100G, nomem */
873         {0x6409, "Chelsio T6210-BT"},           /* 2 x 10GBASE-T */
874         {0x640d, "Chelsio T62100-CR"},          /* 2 x 40/50/100G */
875         {0x6410, "Chelsio T6-DBG-100"},         /* 2 x 40/50/100G, debug */
876         {0x6411, "Chelsio T6225-LL-CR"},        /* 2 x 10/25G */
877         {0x6414, "Chelsio T61100-OCP-SO"},      /* 1 x 40/50/100G, nomem */
878         {0x6415, "Chelsio T6201-BT"},           /* 2 x 1000BASE-T */
879
880         /* Custom */
881         {0x6480, "Custom T6225-CR"},
882         {0x6481, "Custom T62100-CR"},
883         {0x6482, "Custom T6225-CR"},
884         {0x6483, "Custom T62100-CR"},
885         {0x6484, "Custom T64100-CR"},
886         {0x6485, "Custom T6240-SO"},
887         {0x6486, "Custom T6225-SO-CR"},
888         {0x6487, "Custom T6225-CR"},
889 };
890
891 #ifdef TCP_OFFLOAD
892 /*
893  * service_iq_fl() has an iq and needs the fl.  Offset of fl from the iq should
894  * be exactly the same for both rxq and ofld_rxq.
895  */
896 CTASSERT(offsetof(struct sge_ofld_rxq, iq) == offsetof(struct sge_rxq, iq));
897 CTASSERT(offsetof(struct sge_ofld_rxq, fl) == offsetof(struct sge_rxq, fl));
898 #endif
899 CTASSERT(sizeof(struct cluster_metadata) <= CL_METADATA_SIZE);
900
901 static int
902 t4_probe(device_t dev)
903 {
904         int i;
905         uint16_t v = pci_get_vendor(dev);
906         uint16_t d = pci_get_device(dev);
907         uint8_t f = pci_get_function(dev);
908
909         if (v != PCI_VENDOR_ID_CHELSIO)
910                 return (ENXIO);
911
912         /* Attach only to PF0 of the FPGA */
913         if (d == 0xa000 && f != 0)
914                 return (ENXIO);
915
916         for (i = 0; i < nitems(t4_pciids); i++) {
917                 if (d == t4_pciids[i].device) {
918                         device_set_desc(dev, t4_pciids[i].desc);
919                         return (BUS_PROBE_DEFAULT);
920                 }
921         }
922
923         return (ENXIO);
924 }
925
926 static int
927 t5_probe(device_t dev)
928 {
929         int i;
930         uint16_t v = pci_get_vendor(dev);
931         uint16_t d = pci_get_device(dev);
932         uint8_t f = pci_get_function(dev);
933
934         if (v != PCI_VENDOR_ID_CHELSIO)
935                 return (ENXIO);
936
937         /* Attach only to PF0 of the FPGA */
938         if (d == 0xb000 && f != 0)
939                 return (ENXIO);
940
941         for (i = 0; i < nitems(t5_pciids); i++) {
942                 if (d == t5_pciids[i].device) {
943                         device_set_desc(dev, t5_pciids[i].desc);
944                         return (BUS_PROBE_DEFAULT);
945                 }
946         }
947
948         return (ENXIO);
949 }
950
951 static int
952 t6_probe(device_t dev)
953 {
954         int i;
955         uint16_t v = pci_get_vendor(dev);
956         uint16_t d = pci_get_device(dev);
957
958         if (v != PCI_VENDOR_ID_CHELSIO)
959                 return (ENXIO);
960
961         for (i = 0; i < nitems(t6_pciids); i++) {
962                 if (d == t6_pciids[i].device) {
963                         device_set_desc(dev, t6_pciids[i].desc);
964                         return (BUS_PROBE_DEFAULT);
965                 }
966         }
967
968         return (ENXIO);
969 }
970
971 static void
972 t5_attribute_workaround(device_t dev)
973 {
974         device_t root_port;
975         uint32_t v;
976
977         /*
978          * The T5 chips do not properly echo the No Snoop and Relaxed
979          * Ordering attributes when replying to a TLP from a Root
980          * Port.  As a workaround, find the parent Root Port and
981          * disable No Snoop and Relaxed Ordering.  Note that this
982          * affects all devices under this root port.
983          */
984         root_port = pci_find_pcie_root_port(dev);
985         if (root_port == NULL) {
986                 device_printf(dev, "Unable to find parent root port\n");
987                 return;
988         }
989
990         v = pcie_adjust_config(root_port, PCIER_DEVICE_CTL,
991             PCIEM_CTL_RELAXED_ORD_ENABLE | PCIEM_CTL_NOSNOOP_ENABLE, 0, 2);
992         if ((v & (PCIEM_CTL_RELAXED_ORD_ENABLE | PCIEM_CTL_NOSNOOP_ENABLE)) !=
993             0)
994                 device_printf(dev, "Disabled No Snoop/Relaxed Ordering on %s\n",
995                     device_get_nameunit(root_port));
996 }
997
998 static const struct devnames devnames[] = {
999         {
1000                 .nexus_name = "t4nex",
1001                 .ifnet_name = "cxgbe",
1002                 .vi_ifnet_name = "vcxgbe",
1003                 .pf03_drv_name = "t4iov",
1004                 .vf_nexus_name = "t4vf",
1005                 .vf_ifnet_name = "cxgbev"
1006         }, {
1007                 .nexus_name = "t5nex",
1008                 .ifnet_name = "cxl",
1009                 .vi_ifnet_name = "vcxl",
1010                 .pf03_drv_name = "t5iov",
1011                 .vf_nexus_name = "t5vf",
1012                 .vf_ifnet_name = "cxlv"
1013         }, {
1014                 .nexus_name = "t6nex",
1015                 .ifnet_name = "cc",
1016                 .vi_ifnet_name = "vcc",
1017                 .pf03_drv_name = "t6iov",
1018                 .vf_nexus_name = "t6vf",
1019                 .vf_ifnet_name = "ccv"
1020         }
1021 };
1022
1023 void
1024 t4_init_devnames(struct adapter *sc)
1025 {
1026         int id;
1027
1028         id = chip_id(sc);
1029         if (id >= CHELSIO_T4 && id - CHELSIO_T4 < nitems(devnames))
1030                 sc->names = &devnames[id - CHELSIO_T4];
1031         else {
1032                 device_printf(sc->dev, "chip id %d is not supported.\n", id);
1033                 sc->names = NULL;
1034         }
1035 }
1036
1037 static int
1038 t4_ifnet_unit(struct adapter *sc, struct port_info *pi)
1039 {
1040         const char *parent, *name;
1041         long value;
1042         int line, unit;
1043
1044         line = 0;
1045         parent = device_get_nameunit(sc->dev);
1046         name = sc->names->ifnet_name;
1047         while (resource_find_dev(&line, name, &unit, "at", parent) == 0) {
1048                 if (resource_long_value(name, unit, "port", &value) == 0 &&
1049                     value == pi->port_id)
1050                         return (unit);
1051         }
1052         return (-1);
1053 }
1054
1055 static int
1056 t4_attach(device_t dev)
1057 {
1058         struct adapter *sc;
1059         int rc = 0, i, j, rqidx, tqidx, nports;
1060         struct make_dev_args mda;
1061         struct intrs_and_queues iaq;
1062         struct sge *s;
1063         uint32_t *buf;
1064 #if defined(TCP_OFFLOAD) || defined(RATELIMIT)
1065         int ofld_tqidx;
1066 #endif
1067 #ifdef TCP_OFFLOAD
1068         int ofld_rqidx;
1069 #endif
1070 #ifdef DEV_NETMAP
1071         int nm_rqidx, nm_tqidx;
1072 #endif
1073         int num_vis;
1074
1075         sc = device_get_softc(dev);
1076         sc->dev = dev;
1077         TUNABLE_INT_FETCH("hw.cxgbe.dflags", &sc->debug_flags);
1078
1079         if ((pci_get_device(dev) & 0xff00) == 0x5400)
1080                 t5_attribute_workaround(dev);
1081         pci_enable_busmaster(dev);
1082         if (pci_find_cap(dev, PCIY_EXPRESS, &i) == 0) {
1083                 uint32_t v;
1084
1085                 pci_set_max_read_req(dev, 4096);
1086                 v = pci_read_config(dev, i + PCIER_DEVICE_CTL, 2);
1087                 sc->params.pci.mps = 128 << ((v & PCIEM_CTL_MAX_PAYLOAD) >> 5);
1088                 if (pcie_relaxed_ordering == 0 &&
1089                     (v & PCIEM_CTL_RELAXED_ORD_ENABLE) != 0) {
1090                         v &= ~PCIEM_CTL_RELAXED_ORD_ENABLE;
1091                         pci_write_config(dev, i + PCIER_DEVICE_CTL, v, 2);
1092                 } else if (pcie_relaxed_ordering == 1 &&
1093                     (v & PCIEM_CTL_RELAXED_ORD_ENABLE) == 0) {
1094                         v |= PCIEM_CTL_RELAXED_ORD_ENABLE;
1095                         pci_write_config(dev, i + PCIER_DEVICE_CTL, v, 2);
1096                 }
1097         }
1098
1099         sc->sge_gts_reg = MYPF_REG(A_SGE_PF_GTS);
1100         sc->sge_kdoorbell_reg = MYPF_REG(A_SGE_PF_KDOORBELL);
1101         sc->traceq = -1;
1102         mtx_init(&sc->ifp_lock, sc->ifp_lockname, 0, MTX_DEF);
1103         snprintf(sc->ifp_lockname, sizeof(sc->ifp_lockname), "%s tracer",
1104             device_get_nameunit(dev));
1105
1106         snprintf(sc->lockname, sizeof(sc->lockname), "%s",
1107             device_get_nameunit(dev));
1108         mtx_init(&sc->sc_lock, sc->lockname, 0, MTX_DEF);
1109         t4_add_adapter(sc);
1110
1111         mtx_init(&sc->sfl_lock, "starving freelists", 0, MTX_DEF);
1112         TAILQ_INIT(&sc->sfl);
1113         callout_init_mtx(&sc->sfl_callout, &sc->sfl_lock, 0);
1114
1115         mtx_init(&sc->reg_lock, "indirect register access", 0, MTX_DEF);
1116
1117         sc->policy = NULL;
1118         rw_init(&sc->policy_lock, "connection offload policy");
1119
1120         callout_init(&sc->ktls_tick, 1);
1121
1122 #ifdef TCP_OFFLOAD
1123         TASK_INIT(&sc->async_event_task, 0, t4_async_event, sc);
1124 #endif
1125
1126         refcount_init(&sc->vxlan_refcount, 0);
1127
1128         rc = t4_map_bars_0_and_4(sc);
1129         if (rc != 0)
1130                 goto done; /* error message displayed already */
1131
1132         memset(sc->chan_map, 0xff, sizeof(sc->chan_map));
1133
1134         /* Prepare the adapter for operation. */
1135         buf = malloc(PAGE_SIZE, M_CXGBE, M_ZERO | M_WAITOK);
1136         rc = -t4_prep_adapter(sc, buf);
1137         free(buf, M_CXGBE);
1138         if (rc != 0) {
1139                 device_printf(dev, "failed to prepare adapter: %d.\n", rc);
1140                 goto done;
1141         }
1142
1143         /*
1144          * This is the real PF# to which we're attaching.  Works from within PCI
1145          * passthrough environments too, where pci_get_function() could return a
1146          * different PF# depending on the passthrough configuration.  We need to
1147          * use the real PF# in all our communication with the firmware.
1148          */
1149         j = t4_read_reg(sc, A_PL_WHOAMI);
1150         sc->pf = chip_id(sc) <= CHELSIO_T5 ? G_SOURCEPF(j) : G_T6_SOURCEPF(j);
1151         sc->mbox = sc->pf;
1152
1153         t4_init_devnames(sc);
1154         if (sc->names == NULL) {
1155                 rc = ENOTSUP;
1156                 goto done; /* error message displayed already */
1157         }
1158
1159         /*
1160          * Do this really early, with the memory windows set up even before the
1161          * character device.  The userland tool's register i/o and mem read
1162          * will work even in "recovery mode".
1163          */
1164         setup_memwin(sc);
1165         if (t4_init_devlog_params(sc, 0) == 0)
1166                 fixup_devlog_params(sc);
1167         make_dev_args_init(&mda);
1168         mda.mda_devsw = &t4_cdevsw;
1169         mda.mda_uid = UID_ROOT;
1170         mda.mda_gid = GID_WHEEL;
1171         mda.mda_mode = 0600;
1172         mda.mda_si_drv1 = sc;
1173         rc = make_dev_s(&mda, &sc->cdev, "%s", device_get_nameunit(dev));
1174         if (rc != 0)
1175                 device_printf(dev, "failed to create nexus char device: %d.\n",
1176                     rc);
1177
1178         /* Go no further if recovery mode has been requested. */
1179         if (TUNABLE_INT_FETCH("hw.cxgbe.sos", &i) && i != 0) {
1180                 device_printf(dev, "recovery mode.\n");
1181                 goto done;
1182         }
1183
1184 #if defined(__i386__)
1185         if ((cpu_feature & CPUID_CX8) == 0) {
1186                 device_printf(dev, "64 bit atomics not available.\n");
1187                 rc = ENOTSUP;
1188                 goto done;
1189         }
1190 #endif
1191
1192         /* Contact the firmware and try to become the master driver. */
1193         rc = contact_firmware(sc);
1194         if (rc != 0)
1195                 goto done; /* error message displayed already */
1196         MPASS(sc->flags & FW_OK);
1197
1198         rc = get_params__pre_init(sc);
1199         if (rc != 0)
1200                 goto done; /* error message displayed already */
1201
1202         if (sc->flags & MASTER_PF) {
1203                 rc = partition_resources(sc);
1204                 if (rc != 0)
1205                         goto done; /* error message displayed already */
1206                 t4_intr_clear(sc);
1207         }
1208
1209         rc = get_params__post_init(sc);
1210         if (rc != 0)
1211                 goto done; /* error message displayed already */
1212
1213         rc = set_params__post_init(sc);
1214         if (rc != 0)
1215                 goto done; /* error message displayed already */
1216
1217         rc = t4_map_bar_2(sc);
1218         if (rc != 0)
1219                 goto done; /* error message displayed already */
1220
1221         rc = t4_create_dma_tag(sc);
1222         if (rc != 0)
1223                 goto done; /* error message displayed already */
1224
1225         /*
1226          * First pass over all the ports - allocate VIs and initialize some
1227          * basic parameters like mac address, port type, etc.
1228          */
1229         for_each_port(sc, i) {
1230                 struct port_info *pi;
1231
1232                 pi = malloc(sizeof(*pi), M_CXGBE, M_ZERO | M_WAITOK);
1233                 sc->port[i] = pi;
1234
1235                 /* These must be set before t4_port_init */
1236                 pi->adapter = sc;
1237                 pi->port_id = i;
1238                 /*
1239                  * XXX: vi[0] is special so we can't delay this allocation until
1240                  * pi->nvi's final value is known.
1241                  */
1242                 pi->vi = malloc(sizeof(struct vi_info) * t4_num_vis, M_CXGBE,
1243                     M_ZERO | M_WAITOK);
1244
1245                 /*
1246                  * Allocate the "main" VI and initialize parameters
1247                  * like mac addr.
1248                  */
1249                 rc = -t4_port_init(sc, sc->mbox, sc->pf, 0, i);
1250                 if (rc != 0) {
1251                         device_printf(dev, "unable to initialize port %d: %d\n",
1252                             i, rc);
1253                         free(pi->vi, M_CXGBE);
1254                         free(pi, M_CXGBE);
1255                         sc->port[i] = NULL;
1256                         goto done;
1257                 }
1258
1259                 snprintf(pi->lockname, sizeof(pi->lockname), "%sp%d",
1260                     device_get_nameunit(dev), i);
1261                 mtx_init(&pi->pi_lock, pi->lockname, 0, MTX_DEF);
1262                 sc->chan_map[pi->tx_chan] = i;
1263
1264                 /*
1265                  * The MPS counter for FCS errors doesn't work correctly on the
1266                  * T6 so we use the MAC counter here.  Which MAC is in use
1267                  * depends on the link settings which will be known when the
1268                  * link comes up.
1269                  */
1270                 if (is_t6(sc)) {
1271                         pi->fcs_reg = -1;
1272                 } else if (is_t4(sc)) {
1273                         pi->fcs_reg = PORT_REG(pi->tx_chan,
1274                             A_MPS_PORT_STAT_RX_PORT_CRC_ERROR_L);
1275                 } else {
1276                         pi->fcs_reg = T5_PORT_REG(pi->tx_chan,
1277                             A_MPS_PORT_STAT_RX_PORT_CRC_ERROR_L);
1278                 }
1279                 pi->fcs_base = 0;
1280
1281                 /* All VIs on this port share this media. */
1282                 ifmedia_init(&pi->media, IFM_IMASK, cxgbe_media_change,
1283                     cxgbe_media_status);
1284
1285                 PORT_LOCK(pi);
1286                 init_link_config(pi);
1287                 fixup_link_config(pi);
1288                 build_medialist(pi);
1289                 if (fixed_ifmedia(pi))
1290                         pi->flags |= FIXED_IFMEDIA;
1291                 PORT_UNLOCK(pi);
1292
1293                 pi->dev = device_add_child(dev, sc->names->ifnet_name,
1294                     t4_ifnet_unit(sc, pi));
1295                 if (pi->dev == NULL) {
1296                         device_printf(dev,
1297                             "failed to add device for port %d.\n", i);
1298                         rc = ENXIO;
1299                         goto done;
1300                 }
1301                 pi->vi[0].dev = pi->dev;
1302                 device_set_softc(pi->dev, pi);
1303         }
1304
1305         /*
1306          * Interrupt type, # of interrupts, # of rx/tx queues, etc.
1307          */
1308         nports = sc->params.nports;
1309         rc = cfg_itype_and_nqueues(sc, &iaq);
1310         if (rc != 0)
1311                 goto done; /* error message displayed already */
1312
1313         num_vis = iaq.num_vis;
1314         sc->intr_type = iaq.intr_type;
1315         sc->intr_count = iaq.nirq;
1316
1317         s = &sc->sge;
1318         s->nrxq = nports * iaq.nrxq;
1319         s->ntxq = nports * iaq.ntxq;
1320         if (num_vis > 1) {
1321                 s->nrxq += nports * (num_vis - 1) * iaq.nrxq_vi;
1322                 s->ntxq += nports * (num_vis - 1) * iaq.ntxq_vi;
1323         }
1324         s->neq = s->ntxq + s->nrxq;     /* the free list in an rxq is an eq */
1325         s->neq += nports;               /* ctrl queues: 1 per port */
1326         s->niq = s->nrxq + 1;           /* 1 extra for firmware event queue */
1327 #if defined(TCP_OFFLOAD) || defined(RATELIMIT)
1328         if (is_offload(sc) || is_ethoffload(sc)) {
1329                 s->nofldtxq = nports * iaq.nofldtxq;
1330                 if (num_vis > 1)
1331                         s->nofldtxq += nports * (num_vis - 1) * iaq.nofldtxq_vi;
1332                 s->neq += s->nofldtxq;
1333
1334                 s->ofld_txq = malloc(s->nofldtxq * sizeof(struct sge_wrq),
1335                     M_CXGBE, M_ZERO | M_WAITOK);
1336         }
1337 #endif
1338 #ifdef TCP_OFFLOAD
1339         if (is_offload(sc)) {
1340                 s->nofldrxq = nports * iaq.nofldrxq;
1341                 if (num_vis > 1)
1342                         s->nofldrxq += nports * (num_vis - 1) * iaq.nofldrxq_vi;
1343                 s->neq += s->nofldrxq;  /* free list */
1344                 s->niq += s->nofldrxq;
1345
1346                 s->ofld_rxq = malloc(s->nofldrxq * sizeof(struct sge_ofld_rxq),
1347                     M_CXGBE, M_ZERO | M_WAITOK);
1348         }
1349 #endif
1350 #ifdef DEV_NETMAP
1351         s->nnmrxq = 0;
1352         s->nnmtxq = 0;
1353         if (t4_native_netmap & NN_MAIN_VI) {
1354                 s->nnmrxq += nports * iaq.nnmrxq;
1355                 s->nnmtxq += nports * iaq.nnmtxq;
1356         }
1357         if (num_vis > 1 && t4_native_netmap & NN_EXTRA_VI) {
1358                 s->nnmrxq += nports * (num_vis - 1) * iaq.nnmrxq_vi;
1359                 s->nnmtxq += nports * (num_vis - 1) * iaq.nnmtxq_vi;
1360         }
1361         s->neq += s->nnmtxq + s->nnmrxq;
1362         s->niq += s->nnmrxq;
1363
1364         s->nm_rxq = malloc(s->nnmrxq * sizeof(struct sge_nm_rxq),
1365             M_CXGBE, M_ZERO | M_WAITOK);
1366         s->nm_txq = malloc(s->nnmtxq * sizeof(struct sge_nm_txq),
1367             M_CXGBE, M_ZERO | M_WAITOK);
1368 #endif
1369         MPASS(s->niq <= s->iqmap_sz);
1370         MPASS(s->neq <= s->eqmap_sz);
1371
1372         s->ctrlq = malloc(nports * sizeof(struct sge_wrq), M_CXGBE,
1373             M_ZERO | M_WAITOK);
1374         s->rxq = malloc(s->nrxq * sizeof(struct sge_rxq), M_CXGBE,
1375             M_ZERO | M_WAITOK);
1376         s->txq = malloc(s->ntxq * sizeof(struct sge_txq), M_CXGBE,
1377             M_ZERO | M_WAITOK);
1378         s->iqmap = malloc(s->iqmap_sz * sizeof(struct sge_iq *), M_CXGBE,
1379             M_ZERO | M_WAITOK);
1380         s->eqmap = malloc(s->eqmap_sz * sizeof(struct sge_eq *), M_CXGBE,
1381             M_ZERO | M_WAITOK);
1382
1383         sc->irq = malloc(sc->intr_count * sizeof(struct irq), M_CXGBE,
1384             M_ZERO | M_WAITOK);
1385
1386         t4_init_l2t(sc, M_WAITOK);
1387         t4_init_smt(sc, M_WAITOK);
1388         t4_init_tx_sched(sc);
1389         t4_init_atid_table(sc);
1390 #ifdef RATELIMIT
1391         t4_init_etid_table(sc);
1392 #endif
1393 #ifdef INET6
1394         t4_init_clip_table(sc);
1395 #endif
1396         if (sc->vres.key.size != 0)
1397                 sc->key_map = vmem_create("T4TLS key map", sc->vres.key.start,
1398                     sc->vres.key.size, 32, 0, M_FIRSTFIT | M_WAITOK);
1399
1400         /*
1401          * Second pass over the ports.  This time we know the number of rx and
1402          * tx queues that each port should get.
1403          */
1404         rqidx = tqidx = 0;
1405 #if defined(TCP_OFFLOAD) || defined(RATELIMIT)
1406         ofld_tqidx = 0;
1407 #endif
1408 #ifdef TCP_OFFLOAD
1409         ofld_rqidx = 0;
1410 #endif
1411 #ifdef DEV_NETMAP
1412         nm_rqidx = nm_tqidx = 0;
1413 #endif
1414         for_each_port(sc, i) {
1415                 struct port_info *pi = sc->port[i];
1416                 struct vi_info *vi;
1417
1418                 if (pi == NULL)
1419                         continue;
1420
1421                 pi->nvi = num_vis;
1422                 for_each_vi(pi, j, vi) {
1423                         vi->pi = pi;
1424                         vi->adapter = sc;
1425                         vi->qsize_rxq = t4_qsize_rxq;
1426                         vi->qsize_txq = t4_qsize_txq;
1427
1428                         vi->first_rxq = rqidx;
1429                         vi->first_txq = tqidx;
1430                         vi->tmr_idx = t4_tmr_idx;
1431                         vi->pktc_idx = t4_pktc_idx;
1432                         vi->nrxq = j == 0 ? iaq.nrxq : iaq.nrxq_vi;
1433                         vi->ntxq = j == 0 ? iaq.ntxq : iaq.ntxq_vi;
1434
1435                         rqidx += vi->nrxq;
1436                         tqidx += vi->ntxq;
1437
1438                         if (j == 0 && vi->ntxq > 1)
1439                                 vi->rsrv_noflowq = t4_rsrv_noflowq ? 1 : 0;
1440                         else
1441                                 vi->rsrv_noflowq = 0;
1442
1443 #if defined(TCP_OFFLOAD) || defined(RATELIMIT)
1444                         vi->first_ofld_txq = ofld_tqidx;
1445                         vi->nofldtxq = j == 0 ? iaq.nofldtxq : iaq.nofldtxq_vi;
1446                         ofld_tqidx += vi->nofldtxq;
1447 #endif
1448 #ifdef TCP_OFFLOAD
1449                         vi->ofld_tmr_idx = t4_tmr_idx_ofld;
1450                         vi->ofld_pktc_idx = t4_pktc_idx_ofld;
1451                         vi->first_ofld_rxq = ofld_rqidx;
1452                         vi->nofldrxq = j == 0 ? iaq.nofldrxq : iaq.nofldrxq_vi;
1453
1454                         ofld_rqidx += vi->nofldrxq;
1455 #endif
1456 #ifdef DEV_NETMAP
1457                         vi->first_nm_rxq = nm_rqidx;
1458                         vi->first_nm_txq = nm_tqidx;
1459                         if (j == 0) {
1460                                 vi->nnmrxq = iaq.nnmrxq;
1461                                 vi->nnmtxq = iaq.nnmtxq;
1462                         } else {
1463                                 vi->nnmrxq = iaq.nnmrxq_vi;
1464                                 vi->nnmtxq = iaq.nnmtxq_vi;
1465                         }
1466                         nm_rqidx += vi->nnmrxq;
1467                         nm_tqidx += vi->nnmtxq;
1468 #endif
1469                 }
1470         }
1471
1472         rc = t4_setup_intr_handlers(sc);
1473         if (rc != 0) {
1474                 device_printf(dev,
1475                     "failed to setup interrupt handlers: %d\n", rc);
1476                 goto done;
1477         }
1478
1479         rc = bus_generic_probe(dev);
1480         if (rc != 0) {
1481                 device_printf(dev, "failed to probe child drivers: %d\n", rc);
1482                 goto done;
1483         }
1484
1485         /*
1486          * Ensure thread-safe mailbox access (in debug builds).
1487          *
1488          * So far this was the only thread accessing the mailbox but various
1489          * ifnets and sysctls are about to be created and their handlers/ioctls
1490          * will access the mailbox from different threads.
1491          */
1492         sc->flags |= CHK_MBOX_ACCESS;
1493
1494         rc = bus_generic_attach(dev);
1495         if (rc != 0) {
1496                 device_printf(dev,
1497                     "failed to attach all child ports: %d\n", rc);
1498                 goto done;
1499         }
1500
1501         device_printf(dev,
1502             "PCIe gen%d x%d, %d ports, %d %s interrupt%s, %d eq, %d iq\n",
1503             sc->params.pci.speed, sc->params.pci.width, sc->params.nports,
1504             sc->intr_count, sc->intr_type == INTR_MSIX ? "MSI-X" :
1505             (sc->intr_type == INTR_MSI ? "MSI" : "INTx"),
1506             sc->intr_count > 1 ? "s" : "", sc->sge.neq, sc->sge.niq);
1507
1508         t4_set_desc(sc);
1509
1510         notify_siblings(dev, 0);
1511
1512 done:
1513         if (rc != 0 && sc->cdev) {
1514                 /* cdev was created and so cxgbetool works; recover that way. */
1515                 device_printf(dev,
1516                     "error during attach, adapter is now in recovery mode.\n");
1517                 rc = 0;
1518         }
1519
1520         if (rc != 0)
1521                 t4_detach_common(dev);
1522         else
1523                 t4_sysctls(sc);
1524
1525         return (rc);
1526 }
1527
1528 static int
1529 t4_child_location_str(device_t bus, device_t dev, char *buf, size_t buflen)
1530 {
1531         struct adapter *sc;
1532         struct port_info *pi;
1533         int i;
1534
1535         sc = device_get_softc(bus);
1536         buf[0] = '\0';
1537         for_each_port(sc, i) {
1538                 pi = sc->port[i];
1539                 if (pi != NULL && pi->dev == dev) {
1540                         snprintf(buf, buflen, "port=%d", pi->port_id);
1541                         break;
1542                 }
1543         }
1544         return (0);
1545 }
1546
1547 static int
1548 t4_ready(device_t dev)
1549 {
1550         struct adapter *sc;
1551
1552         sc = device_get_softc(dev);
1553         if (sc->flags & FW_OK)
1554                 return (0);
1555         return (ENXIO);
1556 }
1557
1558 static int
1559 t4_read_port_device(device_t dev, int port, device_t *child)
1560 {
1561         struct adapter *sc;
1562         struct port_info *pi;
1563
1564         sc = device_get_softc(dev);
1565         if (port < 0 || port >= MAX_NPORTS)
1566                 return (EINVAL);
1567         pi = sc->port[port];
1568         if (pi == NULL || pi->dev == NULL)
1569                 return (ENXIO);
1570         *child = pi->dev;
1571         return (0);
1572 }
1573
1574 static int
1575 notify_siblings(device_t dev, int detaching)
1576 {
1577         device_t sibling;
1578         int error, i;
1579
1580         error = 0;
1581         for (i = 0; i < PCI_FUNCMAX; i++) {
1582                 if (i == pci_get_function(dev))
1583                         continue;
1584                 sibling = pci_find_dbsf(pci_get_domain(dev), pci_get_bus(dev),
1585                     pci_get_slot(dev), i);
1586                 if (sibling == NULL || !device_is_attached(sibling))
1587                         continue;
1588                 if (detaching)
1589                         error = T4_DETACH_CHILD(sibling);
1590                 else
1591                         (void)T4_ATTACH_CHILD(sibling);
1592                 if (error)
1593                         break;
1594         }
1595         return (error);
1596 }
1597
1598 /*
1599  * Idempotent
1600  */
1601 static int
1602 t4_detach(device_t dev)
1603 {
1604         struct adapter *sc;
1605         int rc;
1606
1607         sc = device_get_softc(dev);
1608
1609         rc = notify_siblings(dev, 1);
1610         if (rc) {
1611                 device_printf(dev,
1612                     "failed to detach sibling devices: %d\n", rc);
1613                 return (rc);
1614         }
1615
1616         return (t4_detach_common(dev));
1617 }
1618
1619 int
1620 t4_detach_common(device_t dev)
1621 {
1622         struct adapter *sc;
1623         struct port_info *pi;
1624         int i, rc;
1625
1626         sc = device_get_softc(dev);
1627
1628         if (sc->cdev) {
1629                 destroy_dev(sc->cdev);
1630                 sc->cdev = NULL;
1631         }
1632
1633         sx_xlock(&t4_list_lock);
1634         SLIST_REMOVE(&t4_list, sc, adapter, link);
1635         sx_xunlock(&t4_list_lock);
1636
1637         sc->flags &= ~CHK_MBOX_ACCESS;
1638         if (sc->flags & FULL_INIT_DONE) {
1639                 if (!(sc->flags & IS_VF))
1640                         t4_intr_disable(sc);
1641         }
1642
1643         if (device_is_attached(dev)) {
1644                 rc = bus_generic_detach(dev);
1645                 if (rc) {
1646                         device_printf(dev,
1647                             "failed to detach child devices: %d\n", rc);
1648                         return (rc);
1649                 }
1650         }
1651
1652 #ifdef TCP_OFFLOAD
1653         taskqueue_drain(taskqueue_thread, &sc->async_event_task);
1654 #endif
1655
1656         for (i = 0; i < sc->intr_count; i++)
1657                 t4_free_irq(sc, &sc->irq[i]);
1658
1659         if ((sc->flags & (IS_VF | FW_OK)) == FW_OK)
1660                 t4_free_tx_sched(sc);
1661
1662         for (i = 0; i < MAX_NPORTS; i++) {
1663                 pi = sc->port[i];
1664                 if (pi) {
1665                         t4_free_vi(sc, sc->mbox, sc->pf, 0, pi->vi[0].viid);
1666                         if (pi->dev)
1667                                 device_delete_child(dev, pi->dev);
1668
1669                         mtx_destroy(&pi->pi_lock);
1670                         free(pi->vi, M_CXGBE);
1671                         free(pi, M_CXGBE);
1672                 }
1673         }
1674
1675         device_delete_children(dev);
1676
1677         if (sc->flags & FULL_INIT_DONE)
1678                 adapter_full_uninit(sc);
1679
1680         if ((sc->flags & (IS_VF | FW_OK)) == FW_OK)
1681                 t4_fw_bye(sc, sc->mbox);
1682
1683         if (sc->intr_type == INTR_MSI || sc->intr_type == INTR_MSIX)
1684                 pci_release_msi(dev);
1685
1686         if (sc->regs_res)
1687                 bus_release_resource(dev, SYS_RES_MEMORY, sc->regs_rid,
1688                     sc->regs_res);
1689
1690         if (sc->udbs_res)
1691                 bus_release_resource(dev, SYS_RES_MEMORY, sc->udbs_rid,
1692                     sc->udbs_res);
1693
1694         if (sc->msix_res)
1695                 bus_release_resource(dev, SYS_RES_MEMORY, sc->msix_rid,
1696                     sc->msix_res);
1697
1698         if (sc->l2t)
1699                 t4_free_l2t(sc->l2t);
1700         if (sc->smt)
1701                 t4_free_smt(sc->smt);
1702         t4_free_atid_table(sc);
1703 #ifdef RATELIMIT
1704         t4_free_etid_table(sc);
1705 #endif
1706         if (sc->key_map)
1707                 vmem_destroy(sc->key_map);
1708 #ifdef INET6
1709         t4_destroy_clip_table(sc);
1710 #endif
1711
1712 #if defined(TCP_OFFLOAD) || defined(RATELIMIT)
1713         free(sc->sge.ofld_txq, M_CXGBE);
1714 #endif
1715 #ifdef TCP_OFFLOAD
1716         free(sc->sge.ofld_rxq, M_CXGBE);
1717 #endif
1718 #ifdef DEV_NETMAP
1719         free(sc->sge.nm_rxq, M_CXGBE);
1720         free(sc->sge.nm_txq, M_CXGBE);
1721 #endif
1722         free(sc->irq, M_CXGBE);
1723         free(sc->sge.rxq, M_CXGBE);
1724         free(sc->sge.txq, M_CXGBE);
1725         free(sc->sge.ctrlq, M_CXGBE);
1726         free(sc->sge.iqmap, M_CXGBE);
1727         free(sc->sge.eqmap, M_CXGBE);
1728         free(sc->tids.ftid_tab, M_CXGBE);
1729         free(sc->tids.hpftid_tab, M_CXGBE);
1730         free_hftid_hash(&sc->tids);
1731         free(sc->tids.tid_tab, M_CXGBE);
1732         free(sc->tt.tls_rx_ports, M_CXGBE);
1733         t4_destroy_dma_tag(sc);
1734
1735         callout_drain(&sc->ktls_tick);
1736         callout_drain(&sc->sfl_callout);
1737         if (mtx_initialized(&sc->tids.ftid_lock)) {
1738                 mtx_destroy(&sc->tids.ftid_lock);
1739                 cv_destroy(&sc->tids.ftid_cv);
1740         }
1741         if (mtx_initialized(&sc->tids.atid_lock))
1742                 mtx_destroy(&sc->tids.atid_lock);
1743         if (mtx_initialized(&sc->ifp_lock))
1744                 mtx_destroy(&sc->ifp_lock);
1745
1746         if (rw_initialized(&sc->policy_lock)) {
1747                 rw_destroy(&sc->policy_lock);
1748 #ifdef TCP_OFFLOAD
1749                 if (sc->policy != NULL)
1750                         free_offload_policy(sc->policy);
1751 #endif
1752         }
1753
1754         for (i = 0; i < NUM_MEMWIN; i++) {
1755                 struct memwin *mw = &sc->memwin[i];
1756
1757                 if (rw_initialized(&mw->mw_lock))
1758                         rw_destroy(&mw->mw_lock);
1759         }
1760
1761         mtx_destroy(&sc->sfl_lock);
1762         mtx_destroy(&sc->reg_lock);
1763         mtx_destroy(&sc->sc_lock);
1764
1765         bzero(sc, sizeof(*sc));
1766
1767         return (0);
1768 }
1769
1770 static int
1771 cxgbe_probe(device_t dev)
1772 {
1773         char buf[128];
1774         struct port_info *pi = device_get_softc(dev);
1775
1776         snprintf(buf, sizeof(buf), "port %d", pi->port_id);
1777         device_set_desc_copy(dev, buf);
1778
1779         return (BUS_PROBE_DEFAULT);
1780 }
1781
1782 #define T4_CAP (IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_MTU | IFCAP_HWCSUM | \
1783     IFCAP_VLAN_HWCSUM | IFCAP_TSO | IFCAP_JUMBO_MTU | IFCAP_LRO | \
1784     IFCAP_VLAN_HWTSO | IFCAP_LINKSTATE | IFCAP_HWCSUM_IPV6 | IFCAP_HWSTATS | \
1785     IFCAP_HWRXTSTMP | IFCAP_NOMAP)
1786 #define T4_CAP_ENABLE (T4_CAP)
1787
1788 static int
1789 cxgbe_vi_attach(device_t dev, struct vi_info *vi)
1790 {
1791         struct ifnet *ifp;
1792         struct sbuf *sb;
1793         struct pfil_head_args pa;
1794         struct adapter *sc = vi->adapter;
1795
1796         vi->xact_addr_filt = -1;
1797         callout_init(&vi->tick, 1);
1798         if (sc->flags & IS_VF || t4_tx_vm_wr != 0)
1799                 vi->flags |= TX_USES_VM_WR;
1800
1801         /* Allocate an ifnet and set it up */
1802         ifp = if_alloc_dev(IFT_ETHER, dev);
1803         if (ifp == NULL) {
1804                 device_printf(dev, "Cannot allocate ifnet\n");
1805                 return (ENOMEM);
1806         }
1807         vi->ifp = ifp;
1808         ifp->if_softc = vi;
1809
1810         if_initname(ifp, device_get_name(dev), device_get_unit(dev));
1811         ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
1812
1813         ifp->if_init = cxgbe_init;
1814         ifp->if_ioctl = cxgbe_ioctl;
1815         ifp->if_transmit = cxgbe_transmit;
1816         ifp->if_qflush = cxgbe_qflush;
1817         ifp->if_get_counter = cxgbe_get_counter;
1818 #if defined(KERN_TLS) || defined(RATELIMIT)
1819         ifp->if_snd_tag_alloc = cxgbe_snd_tag_alloc;
1820         ifp->if_snd_tag_modify = cxgbe_snd_tag_modify;
1821         ifp->if_snd_tag_query = cxgbe_snd_tag_query;
1822         ifp->if_snd_tag_free = cxgbe_snd_tag_free;
1823 #endif
1824 #ifdef RATELIMIT
1825         ifp->if_ratelimit_query = cxgbe_ratelimit_query;
1826 #endif
1827
1828         ifp->if_capabilities = T4_CAP;
1829         ifp->if_capenable = T4_CAP_ENABLE;
1830         ifp->if_hwassist = CSUM_TCP | CSUM_UDP | CSUM_IP | CSUM_TSO |
1831             CSUM_UDP_IPV6 | CSUM_TCP_IPV6;
1832         if (chip_id(sc) >= CHELSIO_T6) {
1833                 ifp->if_capabilities |= IFCAP_VXLAN_HWCSUM | IFCAP_VXLAN_HWTSO;
1834                 ifp->if_capenable |= IFCAP_VXLAN_HWCSUM | IFCAP_VXLAN_HWTSO;
1835                 ifp->if_hwassist |= CSUM_INNER_IP6_UDP | CSUM_INNER_IP6_TCP |
1836                     CSUM_INNER_IP6_TSO | CSUM_INNER_IP | CSUM_INNER_IP_UDP |
1837                     CSUM_INNER_IP_TCP | CSUM_INNER_IP_TSO | CSUM_ENCAP_VXLAN;
1838         }
1839
1840 #ifdef TCP_OFFLOAD
1841         if (vi->nofldrxq != 0 && (sc->flags & KERN_TLS_OK) == 0)
1842                 ifp->if_capabilities |= IFCAP_TOE;
1843 #endif
1844 #ifdef RATELIMIT
1845         if (is_ethoffload(sc) && vi->nofldtxq != 0) {
1846                 ifp->if_capabilities |= IFCAP_TXRTLMT;
1847                 ifp->if_capenable |= IFCAP_TXRTLMT;
1848         }
1849 #endif
1850
1851         ifp->if_hw_tsomax = IP_MAXPACKET;
1852         if (vi->flags & TX_USES_VM_WR)
1853                 ifp->if_hw_tsomaxsegcount = TX_SGL_SEGS_VM_TSO;
1854         else
1855                 ifp->if_hw_tsomaxsegcount = TX_SGL_SEGS_TSO;
1856 #ifdef RATELIMIT
1857         if (is_ethoffload(sc) && vi->nofldtxq != 0)
1858                 ifp->if_hw_tsomaxsegcount = TX_SGL_SEGS_EO_TSO;
1859 #endif
1860         ifp->if_hw_tsomaxsegsize = 65536;
1861 #ifdef KERN_TLS
1862         if (sc->flags & KERN_TLS_OK) {
1863                 ifp->if_capabilities |= IFCAP_TXTLS;
1864                 ifp->if_capenable |= IFCAP_TXTLS;
1865         }
1866 #endif
1867
1868         ether_ifattach(ifp, vi->hw_addr);
1869 #ifdef DEV_NETMAP
1870         if (vi->nnmrxq != 0)
1871                 cxgbe_nm_attach(vi);
1872 #endif
1873         sb = sbuf_new_auto();
1874         sbuf_printf(sb, "%d txq, %d rxq (NIC)", vi->ntxq, vi->nrxq);
1875 #if defined(TCP_OFFLOAD) || defined(RATELIMIT)
1876         switch (ifp->if_capabilities & (IFCAP_TOE | IFCAP_TXRTLMT)) {
1877         case IFCAP_TOE:
1878                 sbuf_printf(sb, "; %d txq (TOE)", vi->nofldtxq);
1879                 break;
1880         case IFCAP_TOE | IFCAP_TXRTLMT:
1881                 sbuf_printf(sb, "; %d txq (TOE/ETHOFLD)", vi->nofldtxq);
1882                 break;
1883         case IFCAP_TXRTLMT:
1884                 sbuf_printf(sb, "; %d txq (ETHOFLD)", vi->nofldtxq);
1885                 break;
1886         }
1887 #endif
1888 #ifdef TCP_OFFLOAD
1889         if (ifp->if_capabilities & IFCAP_TOE)
1890                 sbuf_printf(sb, ", %d rxq (TOE)", vi->nofldrxq);
1891 #endif
1892 #ifdef DEV_NETMAP
1893         if (ifp->if_capabilities & IFCAP_NETMAP)
1894                 sbuf_printf(sb, "; %d txq, %d rxq (netmap)",
1895                     vi->nnmtxq, vi->nnmrxq);
1896 #endif
1897         sbuf_finish(sb);
1898         device_printf(dev, "%s\n", sbuf_data(sb));
1899         sbuf_delete(sb);
1900
1901         vi_sysctls(vi);
1902
1903         pa.pa_version = PFIL_VERSION;
1904         pa.pa_flags = PFIL_IN;
1905         pa.pa_type = PFIL_TYPE_ETHERNET;
1906         pa.pa_headname = ifp->if_xname;
1907         vi->pfil = pfil_head_register(&pa);
1908
1909         return (0);
1910 }
1911
1912 static int
1913 cxgbe_attach(device_t dev)
1914 {
1915         struct port_info *pi = device_get_softc(dev);
1916         struct adapter *sc = pi->adapter;
1917         struct vi_info *vi;
1918         int i, rc;
1919
1920         callout_init_mtx(&pi->tick, &pi->pi_lock, 0);
1921
1922         rc = cxgbe_vi_attach(dev, &pi->vi[0]);
1923         if (rc)
1924                 return (rc);
1925
1926         for_each_vi(pi, i, vi) {
1927                 if (i == 0)
1928                         continue;
1929                 vi->dev = device_add_child(dev, sc->names->vi_ifnet_name, -1);
1930                 if (vi->dev == NULL) {
1931                         device_printf(dev, "failed to add VI %d\n", i);
1932                         continue;
1933                 }
1934                 device_set_softc(vi->dev, vi);
1935         }
1936
1937         cxgbe_sysctls(pi);
1938
1939         bus_generic_attach(dev);
1940
1941         return (0);
1942 }
1943
1944 static void
1945 cxgbe_vi_detach(struct vi_info *vi)
1946 {
1947         struct ifnet *ifp = vi->ifp;
1948
1949         if (vi->pfil != NULL) {
1950                 pfil_head_unregister(vi->pfil);
1951                 vi->pfil = NULL;
1952         }
1953
1954         ether_ifdetach(ifp);
1955
1956         /* Let detach proceed even if these fail. */
1957 #ifdef DEV_NETMAP
1958         if (ifp->if_capabilities & IFCAP_NETMAP)
1959                 cxgbe_nm_detach(vi);
1960 #endif
1961         cxgbe_uninit_synchronized(vi);
1962         callout_drain(&vi->tick);
1963         vi_full_uninit(vi);
1964
1965         if_free(vi->ifp);
1966         vi->ifp = NULL;
1967 }
1968
1969 static int
1970 cxgbe_detach(device_t dev)
1971 {
1972         struct port_info *pi = device_get_softc(dev);
1973         struct adapter *sc = pi->adapter;
1974         int rc;
1975
1976         /* Detach the extra VIs first. */
1977         rc = bus_generic_detach(dev);
1978         if (rc)
1979                 return (rc);
1980         device_delete_children(dev);
1981
1982         doom_vi(sc, &pi->vi[0]);
1983
1984         if (pi->flags & HAS_TRACEQ) {
1985                 sc->traceq = -1;        /* cloner should not create ifnet */
1986                 t4_tracer_port_detach(sc);
1987         }
1988
1989         cxgbe_vi_detach(&pi->vi[0]);
1990         callout_drain(&pi->tick);
1991         ifmedia_removeall(&pi->media);
1992
1993         end_synchronized_op(sc, 0);
1994
1995         return (0);
1996 }
1997
1998 static void
1999 cxgbe_init(void *arg)
2000 {
2001         struct vi_info *vi = arg;
2002         struct adapter *sc = vi->adapter;
2003
2004         if (begin_synchronized_op(sc, vi, SLEEP_OK | INTR_OK, "t4init") != 0)
2005                 return;
2006         cxgbe_init_synchronized(vi);
2007         end_synchronized_op(sc, 0);
2008 }
2009
2010 static int
2011 cxgbe_ioctl(struct ifnet *ifp, unsigned long cmd, caddr_t data)
2012 {
2013         int rc = 0, mtu, flags;
2014         struct vi_info *vi = ifp->if_softc;
2015         struct port_info *pi = vi->pi;
2016         struct adapter *sc = pi->adapter;
2017         struct ifreq *ifr = (struct ifreq *)data;
2018         uint32_t mask;
2019
2020         switch (cmd) {
2021         case SIOCSIFMTU:
2022                 mtu = ifr->ifr_mtu;
2023                 if (mtu < ETHERMIN || mtu > MAX_MTU)
2024                         return (EINVAL);
2025
2026                 rc = begin_synchronized_op(sc, vi, SLEEP_OK | INTR_OK, "t4mtu");
2027                 if (rc)
2028                         return (rc);
2029                 ifp->if_mtu = mtu;
2030                 if (vi->flags & VI_INIT_DONE) {
2031                         t4_update_fl_bufsize(ifp);
2032                         if (ifp->if_drv_flags & IFF_DRV_RUNNING)
2033                                 rc = update_mac_settings(ifp, XGMAC_MTU);
2034                 }
2035                 end_synchronized_op(sc, 0);
2036                 break;
2037
2038         case SIOCSIFFLAGS:
2039                 rc = begin_synchronized_op(sc, vi, SLEEP_OK | INTR_OK, "t4flg");
2040                 if (rc)
2041                         return (rc);
2042
2043                 if (ifp->if_flags & IFF_UP) {
2044                         if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
2045                                 flags = vi->if_flags;
2046                                 if ((ifp->if_flags ^ flags) &
2047                                     (IFF_PROMISC | IFF_ALLMULTI)) {
2048                                         rc = update_mac_settings(ifp,
2049                                             XGMAC_PROMISC | XGMAC_ALLMULTI);
2050                                 }
2051                         } else {
2052                                 rc = cxgbe_init_synchronized(vi);
2053                         }
2054                         vi->if_flags = ifp->if_flags;
2055                 } else if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
2056                         rc = cxgbe_uninit_synchronized(vi);
2057                 }
2058                 end_synchronized_op(sc, 0);
2059                 break;
2060
2061         case SIOCADDMULTI:
2062         case SIOCDELMULTI:
2063                 rc = begin_synchronized_op(sc, vi, SLEEP_OK | INTR_OK, "t4multi");
2064                 if (rc)
2065                         return (rc);
2066                 if (ifp->if_drv_flags & IFF_DRV_RUNNING)
2067                         rc = update_mac_settings(ifp, XGMAC_MCADDRS);
2068                 end_synchronized_op(sc, 0);
2069                 break;
2070
2071         case SIOCSIFCAP:
2072                 rc = begin_synchronized_op(sc, vi, SLEEP_OK | INTR_OK, "t4cap");
2073                 if (rc)
2074                         return (rc);
2075
2076                 mask = ifr->ifr_reqcap ^ ifp->if_capenable;
2077                 if (mask & IFCAP_TXCSUM) {
2078                         ifp->if_capenable ^= IFCAP_TXCSUM;
2079                         ifp->if_hwassist ^= (CSUM_TCP | CSUM_UDP | CSUM_IP);
2080
2081                         if (IFCAP_TSO4 & ifp->if_capenable &&
2082                             !(IFCAP_TXCSUM & ifp->if_capenable)) {
2083                                 mask &= ~IFCAP_TSO4;
2084                                 ifp->if_capenable &= ~IFCAP_TSO4;
2085                                 if_printf(ifp,
2086                                     "tso4 disabled due to -txcsum.\n");
2087                         }
2088                 }
2089                 if (mask & IFCAP_TXCSUM_IPV6) {
2090                         ifp->if_capenable ^= IFCAP_TXCSUM_IPV6;
2091                         ifp->if_hwassist ^= (CSUM_UDP_IPV6 | CSUM_TCP_IPV6);
2092
2093                         if (IFCAP_TSO6 & ifp->if_capenable &&
2094                             !(IFCAP_TXCSUM_IPV6 & ifp->if_capenable)) {
2095                                 mask &= ~IFCAP_TSO6;
2096                                 ifp->if_capenable &= ~IFCAP_TSO6;
2097                                 if_printf(ifp,
2098                                     "tso6 disabled due to -txcsum6.\n");
2099                         }
2100                 }
2101                 if (mask & IFCAP_RXCSUM)
2102                         ifp->if_capenable ^= IFCAP_RXCSUM;
2103                 if (mask & IFCAP_RXCSUM_IPV6)
2104                         ifp->if_capenable ^= IFCAP_RXCSUM_IPV6;
2105
2106                 /*
2107                  * Note that we leave CSUM_TSO alone (it is always set).  The
2108                  * kernel takes both IFCAP_TSOx and CSUM_TSO into account before
2109                  * sending a TSO request our way, so it's sufficient to toggle
2110                  * IFCAP_TSOx only.
2111                  */
2112                 if (mask & IFCAP_TSO4) {
2113                         if (!(IFCAP_TSO4 & ifp->if_capenable) &&
2114                             !(IFCAP_TXCSUM & ifp->if_capenable)) {
2115                                 if_printf(ifp, "enable txcsum first.\n");
2116                                 rc = EAGAIN;
2117                                 goto fail;
2118                         }
2119                         ifp->if_capenable ^= IFCAP_TSO4;
2120                 }
2121                 if (mask & IFCAP_TSO6) {
2122                         if (!(IFCAP_TSO6 & ifp->if_capenable) &&
2123                             !(IFCAP_TXCSUM_IPV6 & ifp->if_capenable)) {
2124                                 if_printf(ifp, "enable txcsum6 first.\n");
2125                                 rc = EAGAIN;
2126                                 goto fail;
2127                         }
2128                         ifp->if_capenable ^= IFCAP_TSO6;
2129                 }
2130                 if (mask & IFCAP_LRO) {
2131 #if defined(INET) || defined(INET6)
2132                         int i;
2133                         struct sge_rxq *rxq;
2134
2135                         ifp->if_capenable ^= IFCAP_LRO;
2136                         for_each_rxq(vi, i, rxq) {
2137                                 if (ifp->if_capenable & IFCAP_LRO)
2138                                         rxq->iq.flags |= IQ_LRO_ENABLED;
2139                                 else
2140                                         rxq->iq.flags &= ~IQ_LRO_ENABLED;
2141                         }
2142 #endif
2143                 }
2144 #ifdef TCP_OFFLOAD
2145                 if (mask & IFCAP_TOE) {
2146                         int enable = (ifp->if_capenable ^ mask) & IFCAP_TOE;
2147
2148                         rc = toe_capability(vi, enable);
2149                         if (rc != 0)
2150                                 goto fail;
2151
2152                         ifp->if_capenable ^= mask;
2153                 }
2154 #endif
2155                 if (mask & IFCAP_VLAN_HWTAGGING) {
2156                         ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING;
2157                         if (ifp->if_drv_flags & IFF_DRV_RUNNING)
2158                                 rc = update_mac_settings(ifp, XGMAC_VLANEX);
2159                 }
2160                 if (mask & IFCAP_VLAN_MTU) {
2161                         ifp->if_capenable ^= IFCAP_VLAN_MTU;
2162
2163                         /* Need to find out how to disable auto-mtu-inflation */
2164                 }
2165                 if (mask & IFCAP_VLAN_HWTSO)
2166                         ifp->if_capenable ^= IFCAP_VLAN_HWTSO;
2167                 if (mask & IFCAP_VLAN_HWCSUM)
2168                         ifp->if_capenable ^= IFCAP_VLAN_HWCSUM;
2169 #ifdef RATELIMIT
2170                 if (mask & IFCAP_TXRTLMT)
2171                         ifp->if_capenable ^= IFCAP_TXRTLMT;
2172 #endif
2173                 if (mask & IFCAP_HWRXTSTMP) {
2174                         int i;
2175                         struct sge_rxq *rxq;
2176
2177                         ifp->if_capenable ^= IFCAP_HWRXTSTMP;
2178                         for_each_rxq(vi, i, rxq) {
2179                                 if (ifp->if_capenable & IFCAP_HWRXTSTMP)
2180                                         rxq->iq.flags |= IQ_RX_TIMESTAMP;
2181                                 else
2182                                         rxq->iq.flags &= ~IQ_RX_TIMESTAMP;
2183                         }
2184                 }
2185                 if (mask & IFCAP_NOMAP)
2186                         ifp->if_capenable ^= IFCAP_NOMAP;
2187
2188 #ifdef KERN_TLS
2189                 if (mask & IFCAP_TXTLS)
2190                         ifp->if_capenable ^= (mask & IFCAP_TXTLS);
2191 #endif
2192                 if (mask & IFCAP_VXLAN_HWCSUM) {
2193                         ifp->if_capenable ^= IFCAP_VXLAN_HWCSUM;
2194                         ifp->if_hwassist ^= CSUM_INNER_IP6_UDP |
2195                             CSUM_INNER_IP6_TCP | CSUM_INNER_IP |
2196                             CSUM_INNER_IP_UDP | CSUM_INNER_IP_TCP;
2197                 }
2198                 if (mask & IFCAP_VXLAN_HWTSO) {
2199                         ifp->if_capenable ^= IFCAP_VXLAN_HWTSO;
2200                         ifp->if_hwassist ^= CSUM_INNER_IP6_TSO |
2201                             CSUM_INNER_IP_TSO;
2202                 }
2203
2204 #ifdef VLAN_CAPABILITIES
2205                 VLAN_CAPABILITIES(ifp);
2206 #endif
2207 fail:
2208                 end_synchronized_op(sc, 0);
2209                 break;
2210
2211         case SIOCSIFMEDIA:
2212         case SIOCGIFMEDIA:
2213         case SIOCGIFXMEDIA:
2214                 ifmedia_ioctl(ifp, ifr, &pi->media, cmd);
2215                 break;
2216
2217         case SIOCGI2C: {
2218                 struct ifi2creq i2c;
2219
2220                 rc = copyin(ifr_data_get_ptr(ifr), &i2c, sizeof(i2c));
2221                 if (rc != 0)
2222                         break;
2223                 if (i2c.dev_addr != 0xA0 && i2c.dev_addr != 0xA2) {
2224                         rc = EPERM;
2225                         break;
2226                 }
2227                 if (i2c.len > sizeof(i2c.data)) {
2228                         rc = EINVAL;
2229                         break;
2230                 }
2231                 rc = begin_synchronized_op(sc, vi, SLEEP_OK | INTR_OK, "t4i2c");
2232                 if (rc)
2233                         return (rc);
2234                 rc = -t4_i2c_rd(sc, sc->mbox, pi->port_id, i2c.dev_addr,
2235                     i2c.offset, i2c.len, &i2c.data[0]);
2236                 end_synchronized_op(sc, 0);
2237                 if (rc == 0)
2238                         rc = copyout(&i2c, ifr_data_get_ptr(ifr), sizeof(i2c));
2239                 break;
2240         }
2241
2242         default:
2243                 rc = ether_ioctl(ifp, cmd, data);
2244         }
2245
2246         return (rc);
2247 }
2248
2249 static int
2250 cxgbe_transmit(struct ifnet *ifp, struct mbuf *m)
2251 {
2252         struct vi_info *vi = ifp->if_softc;
2253         struct port_info *pi = vi->pi;
2254         struct adapter *sc;
2255         struct sge_txq *txq;
2256         void *items[1];
2257         int rc;
2258
2259         M_ASSERTPKTHDR(m);
2260         MPASS(m->m_nextpkt == NULL);    /* not quite ready for this yet */
2261 #if defined(KERN_TLS) || defined(RATELIMIT)
2262         if (m->m_pkthdr.csum_flags & CSUM_SND_TAG)
2263                 MPASS(m->m_pkthdr.snd_tag->ifp == ifp);
2264 #endif
2265
2266         if (__predict_false(pi->link_cfg.link_ok == false)) {
2267                 m_freem(m);
2268                 return (ENETDOWN);
2269         }
2270
2271         rc = parse_pkt(&m, vi->flags & TX_USES_VM_WR);
2272         if (__predict_false(rc != 0)) {
2273                 MPASS(m == NULL);                       /* was freed already */
2274                 atomic_add_int(&pi->tx_parse_error, 1); /* rare, atomic is ok */
2275                 return (rc);
2276         }
2277 #ifdef RATELIMIT
2278         if (m->m_pkthdr.csum_flags & CSUM_SND_TAG) {
2279                 if (m->m_pkthdr.snd_tag->type == IF_SND_TAG_TYPE_RATE_LIMIT)
2280                         return (ethofld_transmit(ifp, m));
2281         }
2282 #endif
2283
2284         /* Select a txq. */
2285         sc = vi->adapter;
2286         txq = &sc->sge.txq[vi->first_txq];
2287         if (M_HASHTYPE_GET(m) != M_HASHTYPE_NONE)
2288                 txq += ((m->m_pkthdr.flowid % (vi->ntxq - vi->rsrv_noflowq)) +
2289                     vi->rsrv_noflowq);
2290
2291         items[0] = m;
2292         rc = mp_ring_enqueue(txq->r, items, 1, 256);
2293         if (__predict_false(rc != 0))
2294                 m_freem(m);
2295
2296         return (rc);
2297 }
2298
2299 static void
2300 cxgbe_qflush(struct ifnet *ifp)
2301 {
2302         struct vi_info *vi = ifp->if_softc;
2303         struct sge_txq *txq;
2304         int i;
2305
2306         /* queues do not exist if !VI_INIT_DONE. */
2307         if (vi->flags & VI_INIT_DONE) {
2308                 for_each_txq(vi, i, txq) {
2309                         TXQ_LOCK(txq);
2310                         txq->eq.flags |= EQ_QFLUSH;
2311                         TXQ_UNLOCK(txq);
2312                         while (!mp_ring_is_idle(txq->r)) {
2313                                 mp_ring_check_drainage(txq->r, 4096);
2314                                 pause("qflush", 1);
2315                         }
2316                         TXQ_LOCK(txq);
2317                         txq->eq.flags &= ~EQ_QFLUSH;
2318                         TXQ_UNLOCK(txq);
2319                 }
2320         }
2321         if_qflush(ifp);
2322 }
2323
2324 static uint64_t
2325 vi_get_counter(struct ifnet *ifp, ift_counter c)
2326 {
2327         struct vi_info *vi = ifp->if_softc;
2328         struct fw_vi_stats_vf *s = &vi->stats;
2329
2330         vi_refresh_stats(vi->adapter, vi);
2331
2332         switch (c) {
2333         case IFCOUNTER_IPACKETS:
2334                 return (s->rx_bcast_frames + s->rx_mcast_frames +
2335                     s->rx_ucast_frames);
2336         case IFCOUNTER_IERRORS:
2337                 return (s->rx_err_frames);
2338         case IFCOUNTER_OPACKETS:
2339                 return (s->tx_bcast_frames + s->tx_mcast_frames +
2340                     s->tx_ucast_frames + s->tx_offload_frames);
2341         case IFCOUNTER_OERRORS:
2342                 return (s->tx_drop_frames);
2343         case IFCOUNTER_IBYTES:
2344                 return (s->rx_bcast_bytes + s->rx_mcast_bytes +
2345                     s->rx_ucast_bytes);
2346         case IFCOUNTER_OBYTES:
2347                 return (s->tx_bcast_bytes + s->tx_mcast_bytes +
2348                     s->tx_ucast_bytes + s->tx_offload_bytes);
2349         case IFCOUNTER_IMCASTS:
2350                 return (s->rx_mcast_frames);
2351         case IFCOUNTER_OMCASTS:
2352                 return (s->tx_mcast_frames);
2353         case IFCOUNTER_OQDROPS: {
2354                 uint64_t drops;
2355
2356                 drops = 0;
2357                 if (vi->flags & VI_INIT_DONE) {
2358                         int i;
2359                         struct sge_txq *txq;
2360
2361                         for_each_txq(vi, i, txq)
2362                                 drops += counter_u64_fetch(txq->r->dropped);
2363                 }
2364
2365                 return (drops);
2366
2367         }
2368
2369         default:
2370                 return (if_get_counter_default(ifp, c));
2371         }
2372 }
2373
2374 uint64_t
2375 cxgbe_get_counter(struct ifnet *ifp, ift_counter c)
2376 {
2377         struct vi_info *vi = ifp->if_softc;
2378         struct port_info *pi = vi->pi;
2379         struct adapter *sc = pi->adapter;
2380         struct port_stats *s = &pi->stats;
2381
2382         if (pi->nvi > 1 || sc->flags & IS_VF)
2383                 return (vi_get_counter(ifp, c));
2384
2385         cxgbe_refresh_stats(sc, pi);
2386
2387         switch (c) {
2388         case IFCOUNTER_IPACKETS:
2389                 return (s->rx_frames);
2390
2391         case IFCOUNTER_IERRORS:
2392                 return (s->rx_jabber + s->rx_runt + s->rx_too_long +
2393                     s->rx_fcs_err + s->rx_len_err);
2394
2395         case IFCOUNTER_OPACKETS:
2396                 return (s->tx_frames);
2397
2398         case IFCOUNTER_OERRORS:
2399                 return (s->tx_error_frames);
2400
2401         case IFCOUNTER_IBYTES:
2402                 return (s->rx_octets);
2403
2404         case IFCOUNTER_OBYTES:
2405                 return (s->tx_octets);
2406
2407         case IFCOUNTER_IMCASTS:
2408                 return (s->rx_mcast_frames);
2409
2410         case IFCOUNTER_OMCASTS:
2411                 return (s->tx_mcast_frames);
2412
2413         case IFCOUNTER_IQDROPS:
2414                 return (s->rx_ovflow0 + s->rx_ovflow1 + s->rx_ovflow2 +
2415                     s->rx_ovflow3 + s->rx_trunc0 + s->rx_trunc1 + s->rx_trunc2 +
2416                     s->rx_trunc3 + pi->tnl_cong_drops);
2417
2418         case IFCOUNTER_OQDROPS: {
2419                 uint64_t drops;
2420
2421                 drops = s->tx_drop;
2422                 if (vi->flags & VI_INIT_DONE) {
2423                         int i;
2424                         struct sge_txq *txq;
2425
2426                         for_each_txq(vi, i, txq)
2427                                 drops += counter_u64_fetch(txq->r->dropped);
2428                 }
2429
2430                 return (drops);
2431
2432         }
2433
2434         default:
2435                 return (if_get_counter_default(ifp, c));
2436         }
2437 }
2438
2439 #if defined(KERN_TLS) || defined(RATELIMIT)
2440 static int
2441 cxgbe_snd_tag_alloc(struct ifnet *ifp, union if_snd_tag_alloc_params *params,
2442     struct m_snd_tag **pt)
2443 {
2444         int error;
2445
2446         switch (params->hdr.type) {
2447 #ifdef RATELIMIT
2448         case IF_SND_TAG_TYPE_RATE_LIMIT:
2449                 error = cxgbe_rate_tag_alloc(ifp, params, pt);
2450                 break;
2451 #endif
2452 #ifdef KERN_TLS
2453         case IF_SND_TAG_TYPE_TLS:
2454                 error = cxgbe_tls_tag_alloc(ifp, params, pt);
2455                 break;
2456 #endif
2457         default:
2458                 error = EOPNOTSUPP;
2459         }
2460         return (error);
2461 }
2462
2463 static int
2464 cxgbe_snd_tag_modify(struct m_snd_tag *mst,
2465     union if_snd_tag_modify_params *params)
2466 {
2467
2468         switch (mst->type) {
2469 #ifdef RATELIMIT
2470         case IF_SND_TAG_TYPE_RATE_LIMIT:
2471                 return (cxgbe_rate_tag_modify(mst, params));
2472 #endif
2473         default:
2474                 return (EOPNOTSUPP);
2475         }
2476 }
2477
2478 static int
2479 cxgbe_snd_tag_query(struct m_snd_tag *mst,
2480     union if_snd_tag_query_params *params)
2481 {
2482
2483         switch (mst->type) {
2484 #ifdef RATELIMIT
2485         case IF_SND_TAG_TYPE_RATE_LIMIT:
2486                 return (cxgbe_rate_tag_query(mst, params));
2487 #endif
2488         default:
2489                 return (EOPNOTSUPP);
2490         }
2491 }
2492
2493 static void
2494 cxgbe_snd_tag_free(struct m_snd_tag *mst)
2495 {
2496
2497         switch (mst->type) {
2498 #ifdef RATELIMIT
2499         case IF_SND_TAG_TYPE_RATE_LIMIT:
2500                 cxgbe_rate_tag_free(mst);
2501                 return;
2502 #endif
2503 #ifdef KERN_TLS
2504         case IF_SND_TAG_TYPE_TLS:
2505                 cxgbe_tls_tag_free(mst);
2506                 return;
2507 #endif
2508         default:
2509                 panic("shouldn't get here");
2510         }
2511 }
2512 #endif
2513
2514 /*
2515  * The kernel picks a media from the list we had provided but we still validate
2516  * the requeste.
2517  */
2518 int
2519 cxgbe_media_change(struct ifnet *ifp)
2520 {
2521         struct vi_info *vi = ifp->if_softc;
2522         struct port_info *pi = vi->pi;
2523         struct ifmedia *ifm = &pi->media;
2524         struct link_config *lc = &pi->link_cfg;
2525         struct adapter *sc = pi->adapter;
2526         int rc;
2527
2528         rc = begin_synchronized_op(sc, NULL, SLEEP_OK | INTR_OK, "t4mec");
2529         if (rc != 0)
2530                 return (rc);
2531         PORT_LOCK(pi);
2532         if (IFM_SUBTYPE(ifm->ifm_media) == IFM_AUTO) {
2533                 /* ifconfig .. media autoselect */
2534                 if (!(lc->pcaps & FW_PORT_CAP32_ANEG)) {
2535                         rc = ENOTSUP; /* AN not supported by transceiver */
2536                         goto done;
2537                 }
2538                 lc->requested_aneg = AUTONEG_ENABLE;
2539                 lc->requested_speed = 0;
2540                 lc->requested_fc |= PAUSE_AUTONEG;
2541         } else {
2542                 lc->requested_aneg = AUTONEG_DISABLE;
2543                 lc->requested_speed =
2544                     ifmedia_baudrate(ifm->ifm_media) / 1000000;
2545                 lc->requested_fc = 0;
2546                 if (IFM_OPTIONS(ifm->ifm_media) & IFM_ETH_RXPAUSE)
2547                         lc->requested_fc |= PAUSE_RX;
2548                 if (IFM_OPTIONS(ifm->ifm_media) & IFM_ETH_TXPAUSE)
2549                         lc->requested_fc |= PAUSE_TX;
2550         }
2551         if (pi->up_vis > 0) {
2552                 fixup_link_config(pi);
2553                 rc = apply_link_config(pi);
2554         }
2555 done:
2556         PORT_UNLOCK(pi);
2557         end_synchronized_op(sc, 0);
2558         return (rc);
2559 }
2560
2561 /*
2562  * Base media word (without ETHER, pause, link active, etc.) for the port at the
2563  * given speed.
2564  */
2565 static int
2566 port_mword(struct port_info *pi, uint32_t speed)
2567 {
2568
2569         MPASS(speed & M_FW_PORT_CAP32_SPEED);
2570         MPASS(powerof2(speed));
2571
2572         switch(pi->port_type) {
2573         case FW_PORT_TYPE_BT_SGMII:
2574         case FW_PORT_TYPE_BT_XFI:
2575         case FW_PORT_TYPE_BT_XAUI:
2576                 /* BaseT */
2577                 switch (speed) {
2578                 case FW_PORT_CAP32_SPEED_100M:
2579                         return (IFM_100_T);
2580                 case FW_PORT_CAP32_SPEED_1G:
2581                         return (IFM_1000_T);
2582                 case FW_PORT_CAP32_SPEED_10G:
2583                         return (IFM_10G_T);
2584                 }
2585                 break;
2586         case FW_PORT_TYPE_KX4:
2587                 if (speed == FW_PORT_CAP32_SPEED_10G)
2588                         return (IFM_10G_KX4);
2589                 break;
2590         case FW_PORT_TYPE_CX4:
2591                 if (speed == FW_PORT_CAP32_SPEED_10G)
2592                         return (IFM_10G_CX4);
2593                 break;
2594         case FW_PORT_TYPE_KX:
2595                 if (speed == FW_PORT_CAP32_SPEED_1G)
2596                         return (IFM_1000_KX);
2597                 break;
2598         case FW_PORT_TYPE_KR:
2599         case FW_PORT_TYPE_BP_AP:
2600         case FW_PORT_TYPE_BP4_AP:
2601         case FW_PORT_TYPE_BP40_BA:
2602         case FW_PORT_TYPE_KR4_100G:
2603         case FW_PORT_TYPE_KR_SFP28:
2604         case FW_PORT_TYPE_KR_XLAUI:
2605                 switch (speed) {
2606                 case FW_PORT_CAP32_SPEED_1G:
2607                         return (IFM_1000_KX);
2608                 case FW_PORT_CAP32_SPEED_10G:
2609                         return (IFM_10G_KR);
2610                 case FW_PORT_CAP32_SPEED_25G:
2611                         return (IFM_25G_KR);
2612                 case FW_PORT_CAP32_SPEED_40G:
2613                         return (IFM_40G_KR4);
2614                 case FW_PORT_CAP32_SPEED_50G:
2615                         return (IFM_50G_KR2);
2616                 case FW_PORT_CAP32_SPEED_100G:
2617                         return (IFM_100G_KR4);
2618                 }
2619                 break;
2620         case FW_PORT_TYPE_FIBER_XFI:
2621         case FW_PORT_TYPE_FIBER_XAUI:
2622         case FW_PORT_TYPE_SFP:
2623         case FW_PORT_TYPE_QSFP_10G:
2624         case FW_PORT_TYPE_QSA:
2625         case FW_PORT_TYPE_QSFP:
2626         case FW_PORT_TYPE_CR4_QSFP:
2627         case FW_PORT_TYPE_CR_QSFP:
2628         case FW_PORT_TYPE_CR2_QSFP:
2629         case FW_PORT_TYPE_SFP28:
2630                 /* Pluggable transceiver */
2631                 switch (pi->mod_type) {
2632                 case FW_PORT_MOD_TYPE_LR:
2633                         switch (speed) {
2634                         case FW_PORT_CAP32_SPEED_1G:
2635                                 return (IFM_1000_LX);
2636                         case FW_PORT_CAP32_SPEED_10G:
2637                                 return (IFM_10G_LR);
2638                         case FW_PORT_CAP32_SPEED_25G:
2639                                 return (IFM_25G_LR);
2640                         case FW_PORT_CAP32_SPEED_40G:
2641                                 return (IFM_40G_LR4);
2642                         case FW_PORT_CAP32_SPEED_50G:
2643                                 return (IFM_50G_LR2);
2644                         case FW_PORT_CAP32_SPEED_100G:
2645                                 return (IFM_100G_LR4);
2646                         }
2647                         break;
2648                 case FW_PORT_MOD_TYPE_SR:
2649                         switch (speed) {
2650                         case FW_PORT_CAP32_SPEED_1G:
2651                                 return (IFM_1000_SX);
2652                         case FW_PORT_CAP32_SPEED_10G:
2653                                 return (IFM_10G_SR);
2654                         case FW_PORT_CAP32_SPEED_25G:
2655                                 return (IFM_25G_SR);
2656                         case FW_PORT_CAP32_SPEED_40G:
2657                                 return (IFM_40G_SR4);
2658                         case FW_PORT_CAP32_SPEED_50G:
2659                                 return (IFM_50G_SR2);
2660                         case FW_PORT_CAP32_SPEED_100G:
2661                                 return (IFM_100G_SR4);
2662                         }
2663                         break;
2664                 case FW_PORT_MOD_TYPE_ER:
2665                         if (speed == FW_PORT_CAP32_SPEED_10G)
2666                                 return (IFM_10G_ER);
2667                         break;
2668                 case FW_PORT_MOD_TYPE_TWINAX_PASSIVE:
2669                 case FW_PORT_MOD_TYPE_TWINAX_ACTIVE:
2670                         switch (speed) {
2671                         case FW_PORT_CAP32_SPEED_1G:
2672                                 return (IFM_1000_CX);
2673                         case FW_PORT_CAP32_SPEED_10G:
2674                                 return (IFM_10G_TWINAX);
2675                         case FW_PORT_CAP32_SPEED_25G:
2676                                 return (IFM_25G_CR);
2677                         case FW_PORT_CAP32_SPEED_40G:
2678                                 return (IFM_40G_CR4);
2679                         case FW_PORT_CAP32_SPEED_50G:
2680                                 return (IFM_50G_CR2);
2681                         case FW_PORT_CAP32_SPEED_100G:
2682                                 return (IFM_100G_CR4);
2683                         }
2684                         break;
2685                 case FW_PORT_MOD_TYPE_LRM:
2686                         if (speed == FW_PORT_CAP32_SPEED_10G)
2687                                 return (IFM_10G_LRM);
2688                         break;
2689                 case FW_PORT_MOD_TYPE_NA:
2690                         MPASS(0);       /* Not pluggable? */
2691                         /* fall throough */
2692                 case FW_PORT_MOD_TYPE_ERROR:
2693                 case FW_PORT_MOD_TYPE_UNKNOWN:
2694                 case FW_PORT_MOD_TYPE_NOTSUPPORTED:
2695                         break;
2696                 case FW_PORT_MOD_TYPE_NONE:
2697                         return (IFM_NONE);
2698                 }
2699                 break;
2700         case FW_PORT_TYPE_NONE:
2701                 return (IFM_NONE);
2702         }
2703
2704         return (IFM_UNKNOWN);
2705 }
2706
2707 void
2708 cxgbe_media_status(struct ifnet *ifp, struct ifmediareq *ifmr)
2709 {
2710         struct vi_info *vi = ifp->if_softc;
2711         struct port_info *pi = vi->pi;
2712         struct adapter *sc = pi->adapter;
2713         struct link_config *lc = &pi->link_cfg;
2714
2715         if (begin_synchronized_op(sc, NULL, SLEEP_OK | INTR_OK, "t4med") != 0)
2716                 return;
2717         PORT_LOCK(pi);
2718
2719         if (pi->up_vis == 0) {
2720                 /*
2721                  * If all the interfaces are administratively down the firmware
2722                  * does not report transceiver changes.  Refresh port info here
2723                  * so that ifconfig displays accurate ifmedia at all times.
2724                  * This is the only reason we have a synchronized op in this
2725                  * function.  Just PORT_LOCK would have been enough otherwise.
2726                  */
2727                 t4_update_port_info(pi);
2728                 build_medialist(pi);
2729         }
2730
2731         /* ifm_status */
2732         ifmr->ifm_status = IFM_AVALID;
2733         if (lc->link_ok == false)
2734                 goto done;
2735         ifmr->ifm_status |= IFM_ACTIVE;
2736
2737         /* ifm_active */
2738         ifmr->ifm_active = IFM_ETHER | IFM_FDX;
2739         ifmr->ifm_active &= ~(IFM_ETH_TXPAUSE | IFM_ETH_RXPAUSE);
2740         if (lc->fc & PAUSE_RX)
2741                 ifmr->ifm_active |= IFM_ETH_RXPAUSE;
2742         if (lc->fc & PAUSE_TX)
2743                 ifmr->ifm_active |= IFM_ETH_TXPAUSE;
2744         ifmr->ifm_active |= port_mword(pi, speed_to_fwcap(lc->speed));
2745 done:
2746         PORT_UNLOCK(pi);
2747         end_synchronized_op(sc, 0);
2748 }
2749
2750 static int
2751 vcxgbe_probe(device_t dev)
2752 {
2753         char buf[128];
2754         struct vi_info *vi = device_get_softc(dev);
2755
2756         snprintf(buf, sizeof(buf), "port %d vi %td", vi->pi->port_id,
2757             vi - vi->pi->vi);
2758         device_set_desc_copy(dev, buf);
2759
2760         return (BUS_PROBE_DEFAULT);
2761 }
2762
2763 static int
2764 alloc_extra_vi(struct adapter *sc, struct port_info *pi, struct vi_info *vi)
2765 {
2766         int func, index, rc;
2767         uint32_t param, val;
2768
2769         ASSERT_SYNCHRONIZED_OP(sc);
2770
2771         index = vi - pi->vi;
2772         MPASS(index > 0);       /* This function deals with _extra_ VIs only */
2773         KASSERT(index < nitems(vi_mac_funcs),
2774             ("%s: VI %s doesn't have a MAC func", __func__,
2775             device_get_nameunit(vi->dev)));
2776         func = vi_mac_funcs[index];
2777         rc = t4_alloc_vi_func(sc, sc->mbox, pi->tx_chan, sc->pf, 0, 1,
2778             vi->hw_addr, &vi->rss_size, &vi->vfvld, &vi->vin, func, 0);
2779         if (rc < 0) {
2780                 device_printf(vi->dev, "failed to allocate virtual interface %d"
2781                     "for port %d: %d\n", index, pi->port_id, -rc);
2782                 return (-rc);
2783         }
2784         vi->viid = rc;
2785
2786         if (vi->rss_size == 1) {
2787                 /*
2788                  * This VI didn't get a slice of the RSS table.  Reduce the
2789                  * number of VIs being created (hw.cxgbe.num_vis) or modify the
2790                  * configuration file (nvi, rssnvi for this PF) if this is a
2791                  * problem.
2792                  */
2793                 device_printf(vi->dev, "RSS table not available.\n");
2794                 vi->rss_base = 0xffff;
2795
2796                 return (0);
2797         }
2798
2799         param = V_FW_PARAMS_MNEM(FW_PARAMS_MNEM_DEV) |
2800             V_FW_PARAMS_PARAM_X(FW_PARAMS_PARAM_DEV_RSSINFO) |
2801             V_FW_PARAMS_PARAM_YZ(vi->viid);
2802         rc = t4_query_params(sc, sc->mbox, sc->pf, 0, 1, &param, &val);
2803         if (rc)
2804                 vi->rss_base = 0xffff;
2805         else {
2806                 MPASS((val >> 16) == vi->rss_size);
2807                 vi->rss_base = val & 0xffff;
2808         }
2809
2810         return (0);
2811 }
2812
2813 static int
2814 vcxgbe_attach(device_t dev)
2815 {
2816         struct vi_info *vi;
2817         struct port_info *pi;
2818         struct adapter *sc;
2819         int rc;
2820
2821         vi = device_get_softc(dev);
2822         pi = vi->pi;
2823         sc = pi->adapter;
2824
2825         rc = begin_synchronized_op(sc, vi, SLEEP_OK | INTR_OK, "t4via");
2826         if (rc)
2827                 return (rc);
2828         rc = alloc_extra_vi(sc, pi, vi);
2829         end_synchronized_op(sc, 0);
2830         if (rc)
2831                 return (rc);
2832
2833         rc = cxgbe_vi_attach(dev, vi);
2834         if (rc) {
2835                 t4_free_vi(sc, sc->mbox, sc->pf, 0, vi->viid);
2836                 return (rc);
2837         }
2838         return (0);
2839 }
2840
2841 static int
2842 vcxgbe_detach(device_t dev)
2843 {
2844         struct vi_info *vi;
2845         struct adapter *sc;
2846
2847         vi = device_get_softc(dev);
2848         sc = vi->adapter;
2849
2850         doom_vi(sc, vi);
2851
2852         cxgbe_vi_detach(vi);
2853         t4_free_vi(sc, sc->mbox, sc->pf, 0, vi->viid);
2854
2855         end_synchronized_op(sc, 0);
2856
2857         return (0);
2858 }
2859
2860 static struct callout fatal_callout;
2861
2862 static void
2863 delayed_panic(void *arg)
2864 {
2865         struct adapter *sc = arg;
2866
2867         panic("%s: panic on fatal error", device_get_nameunit(sc->dev));
2868 }
2869
2870 void
2871 t4_fatal_err(struct adapter *sc, bool fw_error)
2872 {
2873
2874         t4_shutdown_adapter(sc);
2875         log(LOG_ALERT, "%s: encountered fatal error, adapter stopped.\n",
2876             device_get_nameunit(sc->dev));
2877         if (fw_error) {
2878                 ASSERT_SYNCHRONIZED_OP(sc);
2879                 sc->flags |= ADAP_ERR;
2880         } else {
2881                 ADAPTER_LOCK(sc);
2882                 sc->flags |= ADAP_ERR;
2883                 ADAPTER_UNLOCK(sc);
2884         }
2885 #ifdef TCP_OFFLOAD
2886         taskqueue_enqueue(taskqueue_thread, &sc->async_event_task);
2887 #endif
2888
2889         if (t4_panic_on_fatal_err) {
2890                 log(LOG_ALERT, "%s: panic on fatal error after 30s",
2891                     device_get_nameunit(sc->dev));
2892                 callout_reset(&fatal_callout, hz * 30, delayed_panic, sc);
2893         }
2894 }
2895
2896 void
2897 t4_add_adapter(struct adapter *sc)
2898 {
2899         sx_xlock(&t4_list_lock);
2900         SLIST_INSERT_HEAD(&t4_list, sc, link);
2901         sx_xunlock(&t4_list_lock);
2902 }
2903
2904 int
2905 t4_map_bars_0_and_4(struct adapter *sc)
2906 {
2907         sc->regs_rid = PCIR_BAR(0);
2908         sc->regs_res = bus_alloc_resource_any(sc->dev, SYS_RES_MEMORY,
2909             &sc->regs_rid, RF_ACTIVE);
2910         if (sc->regs_res == NULL) {
2911                 device_printf(sc->dev, "cannot map registers.\n");
2912                 return (ENXIO);
2913         }
2914         sc->bt = rman_get_bustag(sc->regs_res);
2915         sc->bh = rman_get_bushandle(sc->regs_res);
2916         sc->mmio_len = rman_get_size(sc->regs_res);
2917         setbit(&sc->doorbells, DOORBELL_KDB);
2918
2919         sc->msix_rid = PCIR_BAR(4);
2920         sc->msix_res = bus_alloc_resource_any(sc->dev, SYS_RES_MEMORY,
2921             &sc->msix_rid, RF_ACTIVE);
2922         if (sc->msix_res == NULL) {
2923                 device_printf(sc->dev, "cannot map MSI-X BAR.\n");
2924                 return (ENXIO);
2925         }
2926
2927         return (0);
2928 }
2929
2930 int
2931 t4_map_bar_2(struct adapter *sc)
2932 {
2933
2934         /*
2935          * T4: only iWARP driver uses the userspace doorbells.  There is no need
2936          * to map it if RDMA is disabled.
2937          */
2938         if (is_t4(sc) && sc->rdmacaps == 0)
2939                 return (0);
2940
2941         sc->udbs_rid = PCIR_BAR(2);
2942         sc->udbs_res = bus_alloc_resource_any(sc->dev, SYS_RES_MEMORY,
2943             &sc->udbs_rid, RF_ACTIVE);
2944         if (sc->udbs_res == NULL) {
2945                 device_printf(sc->dev, "cannot map doorbell BAR.\n");
2946                 return (ENXIO);
2947         }
2948         sc->udbs_base = rman_get_virtual(sc->udbs_res);
2949
2950         if (chip_id(sc) >= CHELSIO_T5) {
2951                 setbit(&sc->doorbells, DOORBELL_UDB);
2952 #if defined(__i386__) || defined(__amd64__)
2953                 if (t5_write_combine) {
2954                         int rc, mode;
2955
2956                         /*
2957                          * Enable write combining on BAR2.  This is the
2958                          * userspace doorbell BAR and is split into 128B
2959                          * (UDBS_SEG_SIZE) doorbell regions, each associated
2960                          * with an egress queue.  The first 64B has the doorbell
2961                          * and the second 64B can be used to submit a tx work
2962                          * request with an implicit doorbell.
2963                          */
2964
2965                         rc = pmap_change_attr((vm_offset_t)sc->udbs_base,
2966                             rman_get_size(sc->udbs_res), PAT_WRITE_COMBINING);
2967                         if (rc == 0) {
2968                                 clrbit(&sc->doorbells, DOORBELL_UDB);
2969                                 setbit(&sc->doorbells, DOORBELL_WCWR);
2970                                 setbit(&sc->doorbells, DOORBELL_UDBWC);
2971                         } else {
2972                                 device_printf(sc->dev,
2973                                     "couldn't enable write combining: %d\n",
2974                                     rc);
2975                         }
2976
2977                         mode = is_t5(sc) ? V_STATMODE(0) : V_T6_STATMODE(0);
2978                         t4_write_reg(sc, A_SGE_STAT_CFG,
2979                             V_STATSOURCE_T5(7) | mode);
2980                 }
2981 #endif
2982         }
2983         sc->iwt.wc_en = isset(&sc->doorbells, DOORBELL_UDBWC) ? 1 : 0;
2984
2985         return (0);
2986 }
2987
2988 struct memwin_init {
2989         uint32_t base;
2990         uint32_t aperture;
2991 };
2992
2993 static const struct memwin_init t4_memwin[NUM_MEMWIN] = {
2994         { MEMWIN0_BASE, MEMWIN0_APERTURE },
2995         { MEMWIN1_BASE, MEMWIN1_APERTURE },
2996         { MEMWIN2_BASE_T4, MEMWIN2_APERTURE_T4 }
2997 };
2998
2999 static const struct memwin_init t5_memwin[NUM_MEMWIN] = {
3000         { MEMWIN0_BASE, MEMWIN0_APERTURE },
3001         { MEMWIN1_BASE, MEMWIN1_APERTURE },
3002         { MEMWIN2_BASE_T5, MEMWIN2_APERTURE_T5 },
3003 };
3004
3005 static void
3006 setup_memwin(struct adapter *sc)
3007 {
3008         const struct memwin_init *mw_init;
3009         struct memwin *mw;
3010         int i;
3011         uint32_t bar0;
3012
3013         if (is_t4(sc)) {
3014                 /*
3015                  * Read low 32b of bar0 indirectly via the hardware backdoor
3016                  * mechanism.  Works from within PCI passthrough environments
3017                  * too, where rman_get_start() can return a different value.  We
3018                  * need to program the T4 memory window decoders with the actual
3019                  * addresses that will be coming across the PCIe link.
3020                  */
3021                 bar0 = t4_hw_pci_read_cfg4(sc, PCIR_BAR(0));
3022                 bar0 &= (uint32_t) PCIM_BAR_MEM_BASE;
3023
3024                 mw_init = &t4_memwin[0];
3025         } else {
3026                 /* T5+ use the relative offset inside the PCIe BAR */
3027                 bar0 = 0;
3028
3029                 mw_init = &t5_memwin[0];
3030         }
3031
3032         for (i = 0, mw = &sc->memwin[0]; i < NUM_MEMWIN; i++, mw_init++, mw++) {
3033                 rw_init(&mw->mw_lock, "memory window access");
3034                 mw->mw_base = mw_init->base;
3035                 mw->mw_aperture = mw_init->aperture;
3036                 mw->mw_curpos = 0;
3037                 t4_write_reg(sc,
3038                     PCIE_MEM_ACCESS_REG(A_PCIE_MEM_ACCESS_BASE_WIN, i),
3039                     (mw->mw_base + bar0) | V_BIR(0) |
3040                     V_WINDOW(ilog2(mw->mw_aperture) - 10));
3041                 rw_wlock(&mw->mw_lock);
3042                 position_memwin(sc, i, 0);
3043                 rw_wunlock(&mw->mw_lock);
3044         }
3045
3046         /* flush */
3047         t4_read_reg(sc, PCIE_MEM_ACCESS_REG(A_PCIE_MEM_ACCESS_BASE_WIN, 2));
3048 }
3049
3050 /*
3051  * Positions the memory window at the given address in the card's address space.
3052  * There are some alignment requirements and the actual position may be at an
3053  * address prior to the requested address.  mw->mw_curpos always has the actual
3054  * position of the window.
3055  */
3056 static void
3057 position_memwin(struct adapter *sc, int idx, uint32_t addr)
3058 {
3059         struct memwin *mw;
3060         uint32_t pf;
3061         uint32_t reg;
3062
3063         MPASS(idx >= 0 && idx < NUM_MEMWIN);
3064         mw = &sc->memwin[idx];
3065         rw_assert(&mw->mw_lock, RA_WLOCKED);
3066
3067         if (is_t4(sc)) {
3068                 pf = 0;
3069                 mw->mw_curpos = addr & ~0xf;    /* start must be 16B aligned */
3070         } else {
3071                 pf = V_PFNUM(sc->pf);
3072                 mw->mw_curpos = addr & ~0x7f;   /* start must be 128B aligned */
3073         }
3074         reg = PCIE_MEM_ACCESS_REG(A_PCIE_MEM_ACCESS_OFFSET, idx);
3075         t4_write_reg(sc, reg, mw->mw_curpos | pf);
3076         t4_read_reg(sc, reg);   /* flush */
3077 }
3078
3079 int
3080 rw_via_memwin(struct adapter *sc, int idx, uint32_t addr, uint32_t *val,
3081     int len, int rw)
3082 {
3083         struct memwin *mw;
3084         uint32_t mw_end, v;
3085
3086         MPASS(idx >= 0 && idx < NUM_MEMWIN);
3087
3088         /* Memory can only be accessed in naturally aligned 4 byte units */
3089         if (addr & 3 || len & 3 || len <= 0)
3090                 return (EINVAL);
3091
3092         mw = &sc->memwin[idx];
3093         while (len > 0) {
3094                 rw_rlock(&mw->mw_lock);
3095                 mw_end = mw->mw_curpos + mw->mw_aperture;
3096                 if (addr >= mw_end || addr < mw->mw_curpos) {
3097                         /* Will need to reposition the window */
3098                         if (!rw_try_upgrade(&mw->mw_lock)) {
3099                                 rw_runlock(&mw->mw_lock);
3100                                 rw_wlock(&mw->mw_lock);
3101                         }
3102                         rw_assert(&mw->mw_lock, RA_WLOCKED);
3103                         position_memwin(sc, idx, addr);
3104                         rw_downgrade(&mw->mw_lock);
3105                         mw_end = mw->mw_curpos + mw->mw_aperture;
3106                 }
3107                 rw_assert(&mw->mw_lock, RA_RLOCKED);
3108                 while (addr < mw_end && len > 0) {
3109                         if (rw == 0) {
3110                                 v = t4_read_reg(sc, mw->mw_base + addr -
3111                                     mw->mw_curpos);
3112                                 *val++ = le32toh(v);
3113                         } else {
3114                                 v = *val++;
3115                                 t4_write_reg(sc, mw->mw_base + addr -
3116                                     mw->mw_curpos, htole32(v));
3117                         }
3118                         addr += 4;
3119                         len -= 4;
3120                 }
3121                 rw_runlock(&mw->mw_lock);
3122         }
3123
3124         return (0);
3125 }
3126
3127 static void
3128 t4_init_atid_table(struct adapter *sc)
3129 {
3130         struct tid_info *t;
3131         int i;
3132
3133         t = &sc->tids;
3134         if (t->natids == 0)
3135                 return;
3136
3137         MPASS(t->atid_tab == NULL);
3138
3139         t->atid_tab = malloc(t->natids * sizeof(*t->atid_tab), M_CXGBE,
3140             M_ZERO | M_WAITOK);
3141         mtx_init(&t->atid_lock, "atid lock", NULL, MTX_DEF);
3142         t->afree = t->atid_tab;
3143         t->atids_in_use = 0;
3144         for (i = 1; i < t->natids; i++)
3145                 t->atid_tab[i - 1].next = &t->atid_tab[i];
3146         t->atid_tab[t->natids - 1].next = NULL;
3147 }
3148
3149 static void
3150 t4_free_atid_table(struct adapter *sc)
3151 {
3152         struct tid_info *t;
3153
3154         t = &sc->tids;
3155
3156         KASSERT(t->atids_in_use == 0,
3157             ("%s: %d atids still in use.", __func__, t->atids_in_use));
3158
3159         if (mtx_initialized(&t->atid_lock))
3160                 mtx_destroy(&t->atid_lock);
3161         free(t->atid_tab, M_CXGBE);
3162         t->atid_tab = NULL;
3163 }
3164
3165 int
3166 alloc_atid(struct adapter *sc, void *ctx)
3167 {
3168         struct tid_info *t = &sc->tids;
3169         int atid = -1;
3170
3171         mtx_lock(&t->atid_lock);
3172         if (t->afree) {
3173                 union aopen_entry *p = t->afree;
3174
3175                 atid = p - t->atid_tab;
3176                 MPASS(atid <= M_TID_TID);
3177                 t->afree = p->next;
3178                 p->data = ctx;
3179                 t->atids_in_use++;
3180         }
3181         mtx_unlock(&t->atid_lock);
3182         return (atid);
3183 }
3184
3185 void *
3186 lookup_atid(struct adapter *sc, int atid)
3187 {
3188         struct tid_info *t = &sc->tids;
3189
3190         return (t->atid_tab[atid].data);
3191 }
3192
3193 void
3194 free_atid(struct adapter *sc, int atid)
3195 {
3196         struct tid_info *t = &sc->tids;
3197         union aopen_entry *p = &t->atid_tab[atid];
3198
3199         mtx_lock(&t->atid_lock);
3200         p->next = t->afree;
3201         t->afree = p;
3202         t->atids_in_use--;
3203         mtx_unlock(&t->atid_lock);
3204 }
3205
3206 static void
3207 queue_tid_release(struct adapter *sc, int tid)
3208 {
3209
3210         CXGBE_UNIMPLEMENTED("deferred tid release");
3211 }
3212
3213 void
3214 release_tid(struct adapter *sc, int tid, struct sge_wrq *ctrlq)
3215 {
3216         struct wrqe *wr;
3217         struct cpl_tid_release *req;
3218
3219         wr = alloc_wrqe(sizeof(*req), ctrlq);
3220         if (wr == NULL) {
3221                 queue_tid_release(sc, tid);     /* defer */
3222                 return;
3223         }
3224         req = wrtod(wr);
3225
3226         INIT_TP_WR_MIT_CPL(req, CPL_TID_RELEASE, tid);
3227
3228         t4_wrq_tx(sc, wr);
3229 }
3230
3231 static int
3232 t4_range_cmp(const void *a, const void *b)
3233 {
3234         return ((const struct t4_range *)a)->start -
3235                ((const struct t4_range *)b)->start;
3236 }
3237
3238 /*
3239  * Verify that the memory range specified by the addr/len pair is valid within
3240  * the card's address space.
3241  */
3242 static int
3243 validate_mem_range(struct adapter *sc, uint32_t addr, uint32_t len)
3244 {
3245         struct t4_range mem_ranges[4], *r, *next;
3246         uint32_t em, addr_len;
3247         int i, n, remaining;
3248
3249         /* Memory can only be accessed in naturally aligned 4 byte units */
3250         if (addr & 3 || len & 3 || len == 0)
3251                 return (EINVAL);
3252
3253         /* Enabled memories */
3254         em = t4_read_reg(sc, A_MA_TARGET_MEM_ENABLE);
3255
3256         r = &mem_ranges[0];
3257         n = 0;
3258         bzero(r, sizeof(mem_ranges));
3259         if (em & F_EDRAM0_ENABLE) {
3260                 addr_len = t4_read_reg(sc, A_MA_EDRAM0_BAR);
3261                 r->size = G_EDRAM0_SIZE(addr_len) << 20;
3262                 if (r->size > 0) {
3263                         r->start = G_EDRAM0_BASE(addr_len) << 20;
3264                         if (addr >= r->start &&
3265                             addr + len <= r->start + r->size)
3266                                 return (0);
3267                         r++;
3268                         n++;
3269                 }
3270         }
3271         if (em & F_EDRAM1_ENABLE) {
3272                 addr_len = t4_read_reg(sc, A_MA_EDRAM1_BAR);
3273                 r->size = G_EDRAM1_SIZE(addr_len) << 20;
3274                 if (r->size > 0) {
3275                         r->start = G_EDRAM1_BASE(addr_len) << 20;
3276                         if (addr >= r->start &&
3277                             addr + len <= r->start + r->size)
3278                                 return (0);
3279                         r++;
3280                         n++;
3281                 }
3282         }
3283         if (em & F_EXT_MEM_ENABLE) {
3284                 addr_len = t4_read_reg(sc, A_MA_EXT_MEMORY_BAR);
3285                 r->size = G_EXT_MEM_SIZE(addr_len) << 20;
3286                 if (r->size > 0) {
3287                         r->start = G_EXT_MEM_BASE(addr_len) << 20;
3288                         if (addr >= r->start &&
3289                             addr + len <= r->start + r->size)
3290                                 return (0);
3291                         r++;
3292                         n++;
3293                 }
3294         }
3295         if (is_t5(sc) && em & F_EXT_MEM1_ENABLE) {
3296                 addr_len = t4_read_reg(sc, A_MA_EXT_MEMORY1_BAR);
3297                 r->size = G_EXT_MEM1_SIZE(addr_len) << 20;
3298                 if (r->size > 0) {
3299                         r->start = G_EXT_MEM1_BASE(addr_len) << 20;
3300                         if (addr >= r->start &&
3301                             addr + len <= r->start + r->size)
3302                                 return (0);
3303                         r++;
3304                         n++;
3305                 }
3306         }
3307         MPASS(n <= nitems(mem_ranges));
3308
3309         if (n > 1) {
3310                 /* Sort and merge the ranges. */
3311                 qsort(mem_ranges, n, sizeof(struct t4_range), t4_range_cmp);
3312
3313                 /* Start from index 0 and examine the next n - 1 entries. */
3314                 r = &mem_ranges[0];
3315                 for (remaining = n - 1; remaining > 0; remaining--, r++) {
3316
3317                         MPASS(r->size > 0);     /* r is a valid entry. */
3318                         next = r + 1;
3319                         MPASS(next->size > 0);  /* and so is the next one. */
3320
3321                         while (r->start + r->size >= next->start) {
3322                                 /* Merge the next one into the current entry. */
3323                                 r->size = max(r->start + r->size,
3324                                     next->start + next->size) - r->start;
3325                                 n--;    /* One fewer entry in total. */
3326                                 if (--remaining == 0)
3327                                         goto done;      /* short circuit */
3328                                 next++;
3329                         }
3330                         if (next != r + 1) {
3331                                 /*
3332                                  * Some entries were merged into r and next
3333                                  * points to the first valid entry that couldn't
3334                                  * be merged.
3335                                  */
3336                                 MPASS(next->size > 0);  /* must be valid */
3337                                 memcpy(r + 1, next, remaining * sizeof(*r));
3338 #ifdef INVARIANTS
3339                                 /*
3340                                  * This so that the foo->size assertion in the
3341                                  * next iteration of the loop do the right
3342                                  * thing for entries that were pulled up and are
3343                                  * no longer valid.
3344                                  */
3345                                 MPASS(n < nitems(mem_ranges));
3346                                 bzero(&mem_ranges[n], (nitems(mem_ranges) - n) *
3347                                     sizeof(struct t4_range));
3348 #endif
3349                         }
3350                 }
3351 done:
3352                 /* Done merging the ranges. */
3353                 MPASS(n > 0);
3354                 r = &mem_ranges[0];
3355                 for (i = 0; i < n; i++, r++) {
3356                         if (addr >= r->start &&
3357                             addr + len <= r->start + r->size)
3358                                 return (0);
3359                 }
3360         }
3361
3362         return (EFAULT);
3363 }
3364
3365 static int
3366 fwmtype_to_hwmtype(int mtype)
3367 {
3368
3369         switch (mtype) {
3370         case FW_MEMTYPE_EDC0:
3371                 return (MEM_EDC0);
3372         case FW_MEMTYPE_EDC1:
3373                 return (MEM_EDC1);
3374         case FW_MEMTYPE_EXTMEM:
3375                 return (MEM_MC0);
3376         case FW_MEMTYPE_EXTMEM1:
3377                 return (MEM_MC1);
3378         default:
3379                 panic("%s: cannot translate fw mtype %d.", __func__, mtype);
3380         }
3381 }
3382
3383 /*
3384  * Verify that the memory range specified by the memtype/offset/len pair is
3385  * valid and lies entirely within the memtype specified.  The global address of
3386  * the start of the range is returned in addr.
3387  */
3388 static int
3389 validate_mt_off_len(struct adapter *sc, int mtype, uint32_t off, uint32_t len,
3390     uint32_t *addr)
3391 {
3392         uint32_t em, addr_len, maddr;
3393
3394         /* Memory can only be accessed in naturally aligned 4 byte units */
3395         if (off & 3 || len & 3 || len == 0)
3396                 return (EINVAL);
3397
3398         em = t4_read_reg(sc, A_MA_TARGET_MEM_ENABLE);
3399         switch (fwmtype_to_hwmtype(mtype)) {
3400         case MEM_EDC0:
3401                 if (!(em & F_EDRAM0_ENABLE))
3402                         return (EINVAL);
3403                 addr_len = t4_read_reg(sc, A_MA_EDRAM0_BAR);
3404                 maddr = G_EDRAM0_BASE(addr_len) << 20;
3405                 break;
3406         case MEM_EDC1:
3407                 if (!(em & F_EDRAM1_ENABLE))
3408                         return (EINVAL);
3409                 addr_len = t4_read_reg(sc, A_MA_EDRAM1_BAR);
3410                 maddr = G_EDRAM1_BASE(addr_len) << 20;
3411                 break;
3412         case MEM_MC:
3413                 if (!(em & F_EXT_MEM_ENABLE))
3414                         return (EINVAL);
3415                 addr_len = t4_read_reg(sc, A_MA_EXT_MEMORY_BAR);
3416                 maddr = G_EXT_MEM_BASE(addr_len) << 20;
3417                 break;
3418         case MEM_MC1:
3419                 if (!is_t5(sc) || !(em & F_EXT_MEM1_ENABLE))
3420                         return (EINVAL);
3421                 addr_len = t4_read_reg(sc, A_MA_EXT_MEMORY1_BAR);
3422                 maddr = G_EXT_MEM1_BASE(addr_len) << 20;
3423                 break;
3424         default:
3425                 return (EINVAL);
3426         }
3427
3428         *addr = maddr + off;    /* global address */
3429         return (validate_mem_range(sc, *addr, len));
3430 }
3431
3432 static int
3433 fixup_devlog_params(struct adapter *sc)
3434 {
3435         struct devlog_params *dparams = &sc->params.devlog;
3436         int rc;
3437
3438         rc = validate_mt_off_len(sc, dparams->memtype, dparams->start,
3439             dparams->size, &dparams->addr);
3440
3441         return (rc);
3442 }
3443
3444 static void
3445 update_nirq(struct intrs_and_queues *iaq, int nports)
3446 {
3447
3448         iaq->nirq = T4_EXTRA_INTR;
3449         iaq->nirq += nports * max(iaq->nrxq, iaq->nnmrxq);
3450         iaq->nirq += nports * iaq->nofldrxq;
3451         iaq->nirq += nports * (iaq->num_vis - 1) *
3452             max(iaq->nrxq_vi, iaq->nnmrxq_vi);
3453         iaq->nirq += nports * (iaq->num_vis - 1) * iaq->nofldrxq_vi;
3454 }
3455
3456 /*
3457  * Adjust requirements to fit the number of interrupts available.
3458  */
3459 static void
3460 calculate_iaq(struct adapter *sc, struct intrs_and_queues *iaq, int itype,
3461     int navail)
3462 {
3463         int old_nirq;
3464         const int nports = sc->params.nports;
3465
3466         MPASS(nports > 0);
3467         MPASS(navail > 0);
3468
3469         bzero(iaq, sizeof(*iaq));
3470         iaq->intr_type = itype;
3471         iaq->num_vis = t4_num_vis;
3472         iaq->ntxq = t4_ntxq;
3473         iaq->ntxq_vi = t4_ntxq_vi;
3474         iaq->nrxq = t4_nrxq;
3475         iaq->nrxq_vi = t4_nrxq_vi;
3476 #if defined(TCP_OFFLOAD) || defined(RATELIMIT)
3477         if (is_offload(sc) || is_ethoffload(sc)) {
3478                 iaq->nofldtxq = t4_nofldtxq;
3479                 iaq->nofldtxq_vi = t4_nofldtxq_vi;
3480         }
3481 #endif
3482 #ifdef TCP_OFFLOAD
3483         if (is_offload(sc)) {
3484                 iaq->nofldrxq = t4_nofldrxq;
3485                 iaq->nofldrxq_vi = t4_nofldrxq_vi;
3486         }
3487 #endif
3488 #ifdef DEV_NETMAP
3489         if (t4_native_netmap & NN_MAIN_VI) {
3490                 iaq->nnmtxq = t4_nnmtxq;
3491                 iaq->nnmrxq = t4_nnmrxq;
3492         }
3493         if (t4_native_netmap & NN_EXTRA_VI) {
3494                 iaq->nnmtxq_vi = t4_nnmtxq_vi;
3495                 iaq->nnmrxq_vi = t4_nnmrxq_vi;
3496         }
3497 #endif
3498
3499         update_nirq(iaq, nports);
3500         if (iaq->nirq <= navail &&
3501             (itype != INTR_MSI || powerof2(iaq->nirq))) {
3502                 /*
3503                  * This is the normal case -- there are enough interrupts for
3504                  * everything.
3505                  */
3506                 goto done;
3507         }
3508
3509         /*
3510          * If extra VIs have been configured try reducing their count and see if
3511          * that works.
3512          */
3513         while (iaq->num_vis > 1) {
3514                 iaq->num_vis--;
3515                 update_nirq(iaq, nports);
3516                 if (iaq->nirq <= navail &&
3517                     (itype != INTR_MSI || powerof2(iaq->nirq))) {
3518                         device_printf(sc->dev, "virtual interfaces per port "
3519                             "reduced to %d from %d.  nrxq=%u, nofldrxq=%u, "
3520                             "nrxq_vi=%u nofldrxq_vi=%u, nnmrxq_vi=%u.  "
3521                             "itype %d, navail %u, nirq %d.\n",
3522                             iaq->num_vis, t4_num_vis, iaq->nrxq, iaq->nofldrxq,
3523                             iaq->nrxq_vi, iaq->nofldrxq_vi, iaq->nnmrxq_vi,
3524                             itype, navail, iaq->nirq);
3525                         goto done;
3526                 }
3527         }
3528
3529         /*
3530          * Extra VIs will not be created.  Log a message if they were requested.
3531          */
3532         MPASS(iaq->num_vis == 1);
3533         iaq->ntxq_vi = iaq->nrxq_vi = 0;
3534         iaq->nofldtxq_vi = iaq->nofldrxq_vi = 0;
3535         iaq->nnmtxq_vi = iaq->nnmrxq_vi = 0;
3536         if (iaq->num_vis != t4_num_vis) {
3537                 device_printf(sc->dev, "extra virtual interfaces disabled.  "
3538                     "nrxq=%u, nofldrxq=%u, nrxq_vi=%u nofldrxq_vi=%u, "
3539                     "nnmrxq_vi=%u.  itype %d, navail %u, nirq %d.\n",
3540                     iaq->nrxq, iaq->nofldrxq, iaq->nrxq_vi, iaq->nofldrxq_vi,
3541                     iaq->nnmrxq_vi, itype, navail, iaq->nirq);
3542         }
3543
3544         /*
3545          * Keep reducing the number of NIC rx queues to the next lower power of
3546          * 2 (for even RSS distribution) and halving the TOE rx queues and see
3547          * if that works.
3548          */
3549         do {
3550                 if (iaq->nrxq > 1) {
3551                         do {
3552                                 iaq->nrxq--;
3553                         } while (!powerof2(iaq->nrxq));
3554                         if (iaq->nnmrxq > iaq->nrxq)
3555                                 iaq->nnmrxq = iaq->nrxq;
3556                 }
3557                 if (iaq->nofldrxq > 1)
3558                         iaq->nofldrxq >>= 1;
3559
3560                 old_nirq = iaq->nirq;
3561                 update_nirq(iaq, nports);
3562                 if (iaq->nirq <= navail &&
3563                     (itype != INTR_MSI || powerof2(iaq->nirq))) {
3564                         device_printf(sc->dev, "running with reduced number of "
3565                             "rx queues because of shortage of interrupts.  "
3566                             "nrxq=%u, nofldrxq=%u.  "
3567                             "itype %d, navail %u, nirq %d.\n", iaq->nrxq,
3568                             iaq->nofldrxq, itype, navail, iaq->nirq);
3569                         goto done;
3570                 }
3571         } while (old_nirq != iaq->nirq);
3572
3573         /* One interrupt for everything.  Ugh. */
3574         device_printf(sc->dev, "running with minimal number of queues.  "
3575             "itype %d, navail %u.\n", itype, navail);
3576         iaq->nirq = 1;
3577         iaq->nrxq = 1;
3578         iaq->ntxq = 1;
3579         if (iaq->nofldrxq > 0) {
3580                 iaq->nofldrxq = 1;
3581                 iaq->nofldtxq = 1;
3582         }
3583         iaq->nnmtxq = 0;
3584         iaq->nnmrxq = 0;
3585 done:
3586         MPASS(iaq->num_vis > 0);
3587         if (iaq->num_vis > 1) {
3588                 MPASS(iaq->nrxq_vi > 0);
3589                 MPASS(iaq->ntxq_vi > 0);
3590         }
3591         MPASS(iaq->nirq > 0);
3592         MPASS(iaq->nrxq > 0);
3593         MPASS(iaq->ntxq > 0);
3594         if (itype == INTR_MSI) {
3595                 MPASS(powerof2(iaq->nirq));
3596         }
3597 }
3598
3599 static int
3600 cfg_itype_and_nqueues(struct adapter *sc, struct intrs_and_queues *iaq)
3601 {
3602         int rc, itype, navail, nalloc;
3603
3604         for (itype = INTR_MSIX; itype; itype >>= 1) {
3605
3606                 if ((itype & t4_intr_types) == 0)
3607                         continue;       /* not allowed */
3608
3609                 if (itype == INTR_MSIX)
3610                         navail = pci_msix_count(sc->dev);
3611                 else if (itype == INTR_MSI)
3612                         navail = pci_msi_count(sc->dev);
3613                 else
3614                         navail = 1;
3615 restart:
3616                 if (navail == 0)
3617                         continue;
3618
3619                 calculate_iaq(sc, iaq, itype, navail);
3620                 nalloc = iaq->nirq;
3621                 rc = 0;
3622                 if (itype == INTR_MSIX)
3623                         rc = pci_alloc_msix(sc->dev, &nalloc);
3624                 else if (itype == INTR_MSI)
3625                         rc = pci_alloc_msi(sc->dev, &nalloc);
3626
3627                 if (rc == 0 && nalloc > 0) {
3628                         if (nalloc == iaq->nirq)
3629                                 return (0);
3630
3631                         /*
3632                          * Didn't get the number requested.  Use whatever number
3633                          * the kernel is willing to allocate.
3634                          */
3635                         device_printf(sc->dev, "fewer vectors than requested, "
3636                             "type=%d, req=%d, rcvd=%d; will downshift req.\n",
3637                             itype, iaq->nirq, nalloc);
3638                         pci_release_msi(sc->dev);
3639                         navail = nalloc;
3640                         goto restart;
3641                 }
3642
3643                 device_printf(sc->dev,
3644                     "failed to allocate vectors:%d, type=%d, req=%d, rcvd=%d\n",
3645                     itype, rc, iaq->nirq, nalloc);
3646         }
3647
3648         device_printf(sc->dev,
3649             "failed to find a usable interrupt type.  "
3650             "allowed=%d, msi-x=%d, msi=%d, intx=1", t4_intr_types,
3651             pci_msix_count(sc->dev), pci_msi_count(sc->dev));
3652
3653         return (ENXIO);
3654 }
3655
3656 #define FW_VERSION(chip) ( \
3657     V_FW_HDR_FW_VER_MAJOR(chip##FW_VERSION_MAJOR) | \
3658     V_FW_HDR_FW_VER_MINOR(chip##FW_VERSION_MINOR) | \
3659     V_FW_HDR_FW_VER_MICRO(chip##FW_VERSION_MICRO) | \
3660     V_FW_HDR_FW_VER_BUILD(chip##FW_VERSION_BUILD))
3661 #define FW_INTFVER(chip, intf) (chip##FW_HDR_INTFVER_##intf)
3662
3663 /* Just enough of fw_hdr to cover all version info. */
3664 struct fw_h {
3665         __u8    ver;
3666         __u8    chip;
3667         __be16  len512;
3668         __be32  fw_ver;
3669         __be32  tp_microcode_ver;
3670         __u8    intfver_nic;
3671         __u8    intfver_vnic;
3672         __u8    intfver_ofld;
3673         __u8    intfver_ri;
3674         __u8    intfver_iscsipdu;
3675         __u8    intfver_iscsi;
3676         __u8    intfver_fcoepdu;
3677         __u8    intfver_fcoe;
3678 };
3679 /* Spot check a couple of fields. */
3680 CTASSERT(offsetof(struct fw_h, fw_ver) == offsetof(struct fw_hdr, fw_ver));
3681 CTASSERT(offsetof(struct fw_h, intfver_nic) == offsetof(struct fw_hdr, intfver_nic));
3682 CTASSERT(offsetof(struct fw_h, intfver_fcoe) == offsetof(struct fw_hdr, intfver_fcoe));
3683
3684 struct fw_info {
3685         uint8_t chip;
3686         char *kld_name;
3687         char *fw_mod_name;
3688         struct fw_h fw_h;
3689 } fw_info[] = {
3690         {
3691                 .chip = CHELSIO_T4,
3692                 .kld_name = "t4fw_cfg",
3693                 .fw_mod_name = "t4fw",
3694                 .fw_h = {
3695                         .chip = FW_HDR_CHIP_T4,
3696                         .fw_ver = htobe32(FW_VERSION(T4)),
3697                         .intfver_nic = FW_INTFVER(T4, NIC),
3698                         .intfver_vnic = FW_INTFVER(T4, VNIC),
3699                         .intfver_ofld = FW_INTFVER(T4, OFLD),
3700                         .intfver_ri = FW_INTFVER(T4, RI),
3701                         .intfver_iscsipdu = FW_INTFVER(T4, ISCSIPDU),
3702                         .intfver_iscsi = FW_INTFVER(T4, ISCSI),
3703                         .intfver_fcoepdu = FW_INTFVER(T4, FCOEPDU),
3704                         .intfver_fcoe = FW_INTFVER(T4, FCOE),
3705                 },
3706         }, {
3707                 .chip = CHELSIO_T5,
3708                 .kld_name = "t5fw_cfg",
3709                 .fw_mod_name = "t5fw",
3710                 .fw_h = {
3711                         .chip = FW_HDR_CHIP_T5,
3712                         .fw_ver = htobe32(FW_VERSION(T5)),
3713                         .intfver_nic = FW_INTFVER(T5, NIC),
3714                         .intfver_vnic = FW_INTFVER(T5, VNIC),
3715                         .intfver_ofld = FW_INTFVER(T5, OFLD),
3716                         .intfver_ri = FW_INTFVER(T5, RI),
3717                         .intfver_iscsipdu = FW_INTFVER(T5, ISCSIPDU),
3718                         .intfver_iscsi = FW_INTFVER(T5, ISCSI),
3719                         .intfver_fcoepdu = FW_INTFVER(T5, FCOEPDU),
3720                         .intfver_fcoe = FW_INTFVER(T5, FCOE),
3721                 },
3722         }, {
3723                 .chip = CHELSIO_T6,
3724                 .kld_name = "t6fw_cfg",
3725                 .fw_mod_name = "t6fw",
3726                 .fw_h = {
3727                         .chip = FW_HDR_CHIP_T6,
3728                         .fw_ver = htobe32(FW_VERSION(T6)),
3729                         .intfver_nic = FW_INTFVER(T6, NIC),
3730                         .intfver_vnic = FW_INTFVER(T6, VNIC),
3731                         .intfver_ofld = FW_INTFVER(T6, OFLD),
3732                         .intfver_ri = FW_INTFVER(T6, RI),
3733                         .intfver_iscsipdu = FW_INTFVER(T6, ISCSIPDU),
3734                         .intfver_iscsi = FW_INTFVER(T6, ISCSI),
3735                         .intfver_fcoepdu = FW_INTFVER(T6, FCOEPDU),
3736                         .intfver_fcoe = FW_INTFVER(T6, FCOE),
3737                 },
3738         }
3739 };
3740
3741 static struct fw_info *
3742 find_fw_info(int chip)
3743 {
3744         int i;
3745
3746         for (i = 0; i < nitems(fw_info); i++) {
3747                 if (fw_info[i].chip == chip)
3748                         return (&fw_info[i]);
3749         }
3750         return (NULL);
3751 }
3752
3753 /*
3754  * Is the given firmware API compatible with the one the driver was compiled
3755  * with?
3756  */
3757 static int
3758 fw_compatible(const struct fw_h *hdr1, const struct fw_h *hdr2)
3759 {
3760
3761         /* short circuit if it's the exact same firmware version */
3762         if (hdr1->chip == hdr2->chip && hdr1->fw_ver == hdr2->fw_ver)
3763                 return (1);
3764
3765         /*
3766          * XXX: Is this too conservative?  Perhaps I should limit this to the
3767          * features that are supported in the driver.
3768          */
3769 #define SAME_INTF(x) (hdr1->intfver_##x == hdr2->intfver_##x)
3770         if (hdr1->chip == hdr2->chip && SAME_INTF(nic) && SAME_INTF(vnic) &&
3771             SAME_INTF(ofld) && SAME_INTF(ri) && SAME_INTF(iscsipdu) &&
3772             SAME_INTF(iscsi) && SAME_INTF(fcoepdu) && SAME_INTF(fcoe))
3773                 return (1);
3774 #undef SAME_INTF
3775
3776         return (0);
3777 }
3778
3779 static int
3780 load_fw_module(struct adapter *sc, const struct firmware **dcfg,
3781     const struct firmware **fw)
3782 {
3783         struct fw_info *fw_info;
3784
3785         *dcfg = NULL;
3786         if (fw != NULL)
3787                 *fw = NULL;
3788
3789         fw_info = find_fw_info(chip_id(sc));
3790         if (fw_info == NULL) {
3791                 device_printf(sc->dev,
3792                     "unable to look up firmware information for chip %d.\n",
3793                     chip_id(sc));
3794                 return (EINVAL);
3795         }
3796
3797         *dcfg = firmware_get(fw_info->kld_name);
3798         if (*dcfg != NULL) {
3799                 if (fw != NULL)
3800                         *fw = firmware_get(fw_info->fw_mod_name);
3801                 return (0);
3802         }
3803
3804         return (ENOENT);
3805 }
3806
3807 static void
3808 unload_fw_module(struct adapter *sc, const struct firmware *dcfg,
3809     const struct firmware *fw)
3810 {
3811
3812         if (fw != NULL)
3813                 firmware_put(fw, FIRMWARE_UNLOAD);
3814         if (dcfg != NULL)
3815                 firmware_put(dcfg, FIRMWARE_UNLOAD);
3816 }
3817
3818 /*
3819  * Return values:
3820  * 0 means no firmware install attempted.
3821  * ERESTART means a firmware install was attempted and was successful.
3822  * +ve errno means a firmware install was attempted but failed.
3823  */
3824 static int
3825 install_kld_firmware(struct adapter *sc, struct fw_h *card_fw,
3826     const struct fw_h *drv_fw, const char *reason, int *already)
3827 {
3828         const struct firmware *cfg, *fw;
3829         const uint32_t c = be32toh(card_fw->fw_ver);
3830         uint32_t d, k;
3831         int rc, fw_install;
3832         struct fw_h bundled_fw;
3833         bool load_attempted;
3834
3835         cfg = fw = NULL;
3836         load_attempted = false;
3837         fw_install = t4_fw_install < 0 ? -t4_fw_install : t4_fw_install;
3838
3839         memcpy(&bundled_fw, drv_fw, sizeof(bundled_fw));
3840         if (t4_fw_install < 0) {
3841                 rc = load_fw_module(sc, &cfg, &fw);
3842                 if (rc != 0 || fw == NULL) {
3843                         device_printf(sc->dev,
3844                             "failed to load firmware module: %d. cfg %p, fw %p;"
3845                             " will use compiled-in firmware version for"
3846                             "hw.cxgbe.fw_install checks.\n",
3847                             rc, cfg, fw);
3848                 } else {
3849                         memcpy(&bundled_fw, fw->data, sizeof(bundled_fw));
3850                 }
3851                 load_attempted = true;
3852         }
3853         d = be32toh(bundled_fw.fw_ver);
3854
3855         if (reason != NULL)
3856                 goto install;
3857
3858         if ((sc->flags & FW_OK) == 0) {
3859
3860                 if (c == 0xffffffff) {
3861                         reason = "missing";
3862                         goto install;
3863                 }
3864
3865                 rc = 0;
3866                 goto done;
3867         }
3868
3869         if (!fw_compatible(card_fw, &bundled_fw)) {
3870                 reason = "incompatible or unusable";
3871                 goto install;
3872         }
3873
3874         if (d > c) {
3875                 reason = "older than the version bundled with this driver";
3876                 goto install;
3877         }
3878
3879         if (fw_install == 2 && d != c) {
3880                 reason = "different than the version bundled with this driver";
3881                 goto install;
3882         }
3883
3884         /* No reason to do anything to the firmware already on the card. */
3885         rc = 0;
3886         goto done;
3887
3888 install:
3889         rc = 0;
3890         if ((*already)++)
3891                 goto done;
3892
3893         if (fw_install == 0) {
3894                 device_printf(sc->dev, "firmware on card (%u.%u.%u.%u) is %s, "
3895                     "but the driver is prohibited from installing a firmware "
3896                     "on the card.\n",
3897                     G_FW_HDR_FW_VER_MAJOR(c), G_FW_HDR_FW_VER_MINOR(c),
3898                     G_FW_HDR_FW_VER_MICRO(c), G_FW_HDR_FW_VER_BUILD(c), reason);
3899
3900                 goto done;
3901         }
3902
3903         /*
3904          * We'll attempt to install a firmware.  Load the module first (if it
3905          * hasn't been loaded already).
3906          */
3907         if (!load_attempted) {
3908                 rc = load_fw_module(sc, &cfg, &fw);
3909                 if (rc != 0 || fw == NULL) {
3910                         device_printf(sc->dev,
3911                             "failed to load firmware module: %d. cfg %p, fw %p\n",
3912                             rc, cfg, fw);
3913                         /* carry on */
3914                 }
3915         }
3916         if (fw == NULL) {
3917                 device_printf(sc->dev, "firmware on card (%u.%u.%u.%u) is %s, "
3918                     "but the driver cannot take corrective action because it "
3919                     "is unable to load the firmware module.\n",
3920                     G_FW_HDR_FW_VER_MAJOR(c), G_FW_HDR_FW_VER_MINOR(c),
3921                     G_FW_HDR_FW_VER_MICRO(c), G_FW_HDR_FW_VER_BUILD(c), reason);
3922                 rc = sc->flags & FW_OK ? 0 : ENOENT;
3923                 goto done;
3924         }
3925         k = be32toh(((const struct fw_hdr *)fw->data)->fw_ver);
3926         if (k != d) {
3927                 MPASS(t4_fw_install > 0);
3928                 device_printf(sc->dev,
3929                     "firmware in KLD (%u.%u.%u.%u) is not what the driver was "
3930                     "expecting (%u.%u.%u.%u) and will not be used.\n",
3931                     G_FW_HDR_FW_VER_MAJOR(k), G_FW_HDR_FW_VER_MINOR(k),
3932                     G_FW_HDR_FW_VER_MICRO(k), G_FW_HDR_FW_VER_BUILD(k),
3933                     G_FW_HDR_FW_VER_MAJOR(d), G_FW_HDR_FW_VER_MINOR(d),
3934                     G_FW_HDR_FW_VER_MICRO(d), G_FW_HDR_FW_VER_BUILD(d));
3935                 rc = sc->flags & FW_OK ? 0 : EINVAL;
3936                 goto done;
3937         }
3938
3939         device_printf(sc->dev, "firmware on card (%u.%u.%u.%u) is %s, "
3940             "installing firmware %u.%u.%u.%u on card.\n",
3941             G_FW_HDR_FW_VER_MAJOR(c), G_FW_HDR_FW_VER_MINOR(c),
3942             G_FW_HDR_FW_VER_MICRO(c), G_FW_HDR_FW_VER_BUILD(c), reason,
3943             G_FW_HDR_FW_VER_MAJOR(d), G_FW_HDR_FW_VER_MINOR(d),
3944             G_FW_HDR_FW_VER_MICRO(d), G_FW_HDR_FW_VER_BUILD(d));
3945
3946         rc = -t4_fw_upgrade(sc, sc->mbox, fw->data, fw->datasize, 0);
3947         if (rc != 0) {
3948                 device_printf(sc->dev, "failed to install firmware: %d\n", rc);
3949         } else {
3950                 /* Installed successfully, update the cached header too. */
3951                 rc = ERESTART;
3952                 memcpy(card_fw, fw->data, sizeof(*card_fw));
3953         }
3954 done:
3955         unload_fw_module(sc, cfg, fw);
3956
3957         return (rc);
3958 }
3959
3960 /*
3961  * Establish contact with the firmware and attempt to become the master driver.
3962  *
3963  * A firmware will be installed to the card if needed (if the driver is allowed
3964  * to do so).
3965  */
3966 static int
3967 contact_firmware(struct adapter *sc)
3968 {
3969         int rc, already = 0;
3970         enum dev_state state;
3971         struct fw_info *fw_info;
3972         struct fw_hdr *card_fw;         /* fw on the card */
3973         const struct fw_h *drv_fw;
3974
3975         fw_info = find_fw_info(chip_id(sc));
3976         if (fw_info == NULL) {
3977                 device_printf(sc->dev,
3978                     "unable to look up firmware information for chip %d.\n",
3979                     chip_id(sc));
3980                 return (EINVAL);
3981         }
3982         drv_fw = &fw_info->fw_h;
3983
3984         /* Read the header of the firmware on the card */
3985         card_fw = malloc(sizeof(*card_fw), M_CXGBE, M_ZERO | M_WAITOK);
3986 restart:
3987         rc = -t4_get_fw_hdr(sc, card_fw);
3988         if (rc != 0) {
3989                 device_printf(sc->dev,
3990                     "unable to read firmware header from card's flash: %d\n",
3991                     rc);
3992                 goto done;
3993         }
3994
3995         rc = install_kld_firmware(sc, (struct fw_h *)card_fw, drv_fw, NULL,
3996             &already);
3997         if (rc == ERESTART)
3998                 goto restart;
3999         if (rc != 0)
4000                 goto done;
4001
4002         rc = t4_fw_hello(sc, sc->mbox, sc->mbox, MASTER_MAY, &state);
4003         if (rc < 0 || state == DEV_STATE_ERR) {
4004                 rc = -rc;
4005                 device_printf(sc->dev,
4006                     "failed to connect to the firmware: %d, %d.  "
4007                     "PCIE_FW 0x%08x\n", rc, state, t4_read_reg(sc, A_PCIE_FW));
4008 #if 0
4009                 if (install_kld_firmware(sc, (struct fw_h *)card_fw, drv_fw,
4010                     "not responding properly to HELLO", &already) == ERESTART)
4011                         goto restart;
4012 #endif
4013                 goto done;
4014         }
4015         MPASS(be32toh(card_fw->flags) & FW_HDR_FLAGS_RESET_HALT);
4016         sc->flags |= FW_OK;     /* The firmware responded to the FW_HELLO. */
4017
4018         if (rc == sc->pf) {
4019                 sc->flags |= MASTER_PF;
4020                 rc = install_kld_firmware(sc, (struct fw_h *)card_fw, drv_fw,
4021                     NULL, &already);
4022                 if (rc == ERESTART)
4023                         rc = 0;
4024                 else if (rc != 0)
4025                         goto done;
4026         } else if (state == DEV_STATE_UNINIT) {
4027                 /*
4028                  * We didn't get to be the master so we definitely won't be
4029                  * configuring the chip.  It's a bug if someone else hasn't
4030                  * configured it already.
4031                  */
4032                 device_printf(sc->dev, "couldn't be master(%d), "
4033                     "device not already initialized either(%d).  "
4034                     "PCIE_FW 0x%08x\n", rc, state, t4_read_reg(sc, A_PCIE_FW));
4035                 rc = EPROTO;
4036                 goto done;
4037         } else {
4038                 /*
4039                  * Some other PF is the master and has configured the chip.
4040                  * This is allowed but untested.
4041                  */
4042                 device_printf(sc->dev, "PF%d is master, device state %d.  "
4043                     "PCIE_FW 0x%08x\n", rc, state, t4_read_reg(sc, A_PCIE_FW));
4044                 snprintf(sc->cfg_file, sizeof(sc->cfg_file), "pf%d", rc);
4045                 sc->cfcsum = 0;
4046                 rc = 0;
4047         }
4048 done:
4049         if (rc != 0 && sc->flags & FW_OK) {
4050                 t4_fw_bye(sc, sc->mbox);
4051                 sc->flags &= ~FW_OK;
4052         }
4053         free(card_fw, M_CXGBE);
4054         return (rc);
4055 }
4056
4057 static int
4058 copy_cfg_file_to_card(struct adapter *sc, char *cfg_file,
4059     uint32_t mtype, uint32_t moff)
4060 {
4061         struct fw_info *fw_info;
4062         const struct firmware *dcfg, *rcfg = NULL;
4063         const uint32_t *cfdata;
4064         uint32_t cflen, addr;
4065         int rc;
4066
4067         load_fw_module(sc, &dcfg, NULL);
4068
4069         /* Card specific interpretation of "default". */
4070         if (strncmp(cfg_file, DEFAULT_CF, sizeof(t4_cfg_file)) == 0) {
4071                 if (pci_get_device(sc->dev) == 0x440a)
4072                         snprintf(cfg_file, sizeof(t4_cfg_file), UWIRE_CF);
4073                 if (is_fpga(sc))
4074                         snprintf(cfg_file, sizeof(t4_cfg_file), FPGA_CF);
4075         }
4076
4077         if (strncmp(cfg_file, DEFAULT_CF, sizeof(t4_cfg_file)) == 0) {
4078                 if (dcfg == NULL) {
4079                         device_printf(sc->dev,
4080                             "KLD with default config is not available.\n");
4081                         rc = ENOENT;
4082                         goto done;
4083                 }
4084                 cfdata = dcfg->data;
4085                 cflen = dcfg->datasize & ~3;
4086         } else {
4087                 char s[32];
4088
4089                 fw_info = find_fw_info(chip_id(sc));
4090                 if (fw_info == NULL) {
4091                         device_printf(sc->dev,
4092                             "unable to look up firmware information for chip %d.\n",
4093                             chip_id(sc));
4094                         rc = EINVAL;
4095                         goto done;
4096                 }
4097                 snprintf(s, sizeof(s), "%s_%s", fw_info->kld_name, cfg_file);
4098
4099                 rcfg = firmware_get(s);
4100                 if (rcfg == NULL) {
4101                         device_printf(sc->dev,
4102                             "unable to load module \"%s\" for configuration "
4103                             "profile \"%s\".\n", s, cfg_file);
4104                         rc = ENOENT;
4105                         goto done;
4106                 }
4107                 cfdata = rcfg->data;
4108                 cflen = rcfg->datasize & ~3;
4109         }
4110
4111         if (cflen > FLASH_CFG_MAX_SIZE) {
4112                 device_printf(sc->dev,
4113                     "config file too long (%d, max allowed is %d).\n",
4114                     cflen, FLASH_CFG_MAX_SIZE);
4115                 rc = EINVAL;
4116                 goto done;
4117         }
4118
4119         rc = validate_mt_off_len(sc, mtype, moff, cflen, &addr);
4120         if (rc != 0) {
4121                 device_printf(sc->dev,
4122                     "%s: addr (%d/0x%x) or len %d is not valid: %d.\n",
4123                     __func__, mtype, moff, cflen, rc);
4124                 rc = EINVAL;
4125                 goto done;
4126         }
4127         write_via_memwin(sc, 2, addr, cfdata, cflen);
4128 done:
4129         if (rcfg != NULL)
4130                 firmware_put(rcfg, FIRMWARE_UNLOAD);
4131         unload_fw_module(sc, dcfg, NULL);
4132         return (rc);
4133 }
4134
4135 struct caps_allowed {
4136         uint16_t nbmcaps;
4137         uint16_t linkcaps;
4138         uint16_t switchcaps;
4139         uint16_t niccaps;
4140         uint16_t toecaps;
4141         uint16_t rdmacaps;
4142         uint16_t cryptocaps;
4143         uint16_t iscsicaps;
4144         uint16_t fcoecaps;
4145 };
4146
4147 #define FW_PARAM_DEV(param) \
4148         (V_FW_PARAMS_MNEM(FW_PARAMS_MNEM_DEV) | \
4149          V_FW_PARAMS_PARAM_X(FW_PARAMS_PARAM_DEV_##param))
4150 #define FW_PARAM_PFVF(param) \
4151         (V_FW_PARAMS_MNEM(FW_PARAMS_MNEM_PFVF) | \
4152          V_FW_PARAMS_PARAM_X(FW_PARAMS_PARAM_PFVF_##param))
4153
4154 /*
4155  * Provide a configuration profile to the firmware and have it initialize the
4156  * chip accordingly.  This may involve uploading a configuration file to the
4157  * card.
4158  */
4159 static int
4160 apply_cfg_and_initialize(struct adapter *sc, char *cfg_file,
4161     const struct caps_allowed *caps_allowed)
4162 {
4163         int rc;
4164         struct fw_caps_config_cmd caps;
4165         uint32_t mtype, moff, finicsum, cfcsum, param, val;
4166
4167         rc = -t4_fw_reset(sc, sc->mbox, F_PIORSTMODE | F_PIORST);
4168         if (rc != 0) {
4169                 device_printf(sc->dev, "firmware reset failed: %d.\n", rc);
4170                 return (rc);
4171         }
4172
4173         bzero(&caps, sizeof(caps));
4174         caps.op_to_write = htobe32(V_FW_CMD_OP(FW_CAPS_CONFIG_CMD) |
4175             F_FW_CMD_REQUEST | F_FW_CMD_READ);
4176         if (strncmp(cfg_file, BUILTIN_CF, sizeof(t4_cfg_file)) == 0) {
4177                 mtype = 0;
4178                 moff = 0;
4179                 caps.cfvalid_to_len16 = htobe32(FW_LEN16(caps));
4180         } else if (strncmp(cfg_file, FLASH_CF, sizeof(t4_cfg_file)) == 0) {
4181                 mtype = FW_MEMTYPE_FLASH;
4182                 moff = t4_flash_cfg_addr(sc);
4183                 caps.cfvalid_to_len16 = htobe32(F_FW_CAPS_CONFIG_CMD_CFVALID |
4184                     V_FW_CAPS_CONFIG_CMD_MEMTYPE_CF(mtype) |
4185                     V_FW_CAPS_CONFIG_CMD_MEMADDR64K_CF(moff >> 16) |
4186                     FW_LEN16(caps));
4187         } else {
4188                 /*
4189                  * Ask the firmware where it wants us to upload the config file.
4190                  */
4191                 param = FW_PARAM_DEV(CF);
4192                 rc = -t4_query_params(sc, sc->mbox, sc->pf, 0, 1, &param, &val);
4193                 if (rc != 0) {
4194                         /* No support for config file?  Shouldn't happen. */
4195                         device_printf(sc->dev,
4196                             "failed to query config file location: %d.\n", rc);
4197                         goto done;
4198                 }
4199                 mtype = G_FW_PARAMS_PARAM_Y(val);
4200                 moff = G_FW_PARAMS_PARAM_Z(val) << 16;
4201                 caps.cfvalid_to_len16 = htobe32(F_FW_CAPS_CONFIG_CMD_CFVALID |
4202                     V_FW_CAPS_CONFIG_CMD_MEMTYPE_CF(mtype) |
4203                     V_FW_CAPS_CONFIG_CMD_MEMADDR64K_CF(moff >> 16) |
4204                     FW_LEN16(caps));
4205
4206                 rc = copy_cfg_file_to_card(sc, cfg_file, mtype, moff);
4207                 if (rc != 0) {
4208                         device_printf(sc->dev,
4209                             "failed to upload config file to card: %d.\n", rc);
4210                         goto done;
4211                 }
4212         }
4213         rc = -t4_wr_mbox(sc, sc->mbox, &caps, sizeof(caps), &caps);
4214         if (rc != 0) {
4215                 device_printf(sc->dev, "failed to pre-process config file: %d "
4216                     "(mtype %d, moff 0x%x).\n", rc, mtype, moff);
4217                 goto done;
4218         }
4219
4220         finicsum = be32toh(caps.finicsum);
4221         cfcsum = be32toh(caps.cfcsum);  /* actual */
4222         if (finicsum != cfcsum) {
4223                 device_printf(sc->dev,
4224                     "WARNING: config file checksum mismatch: %08x %08x\n",
4225                     finicsum, cfcsum);
4226         }
4227         sc->cfcsum = cfcsum;
4228         snprintf(sc->cfg_file, sizeof(sc->cfg_file), "%s", cfg_file);
4229
4230         /*
4231          * Let the firmware know what features will (not) be used so it can tune
4232          * things accordingly.
4233          */
4234 #define LIMIT_CAPS(x) do { \
4235         caps.x##caps &= htobe16(caps_allowed->x##caps); \
4236 } while (0)
4237         LIMIT_CAPS(nbm);
4238         LIMIT_CAPS(link);
4239         LIMIT_CAPS(switch);
4240         LIMIT_CAPS(nic);
4241         LIMIT_CAPS(toe);
4242         LIMIT_CAPS(rdma);
4243         LIMIT_CAPS(crypto);
4244         LIMIT_CAPS(iscsi);
4245         LIMIT_CAPS(fcoe);
4246 #undef LIMIT_CAPS
4247         if (caps.niccaps & htobe16(FW_CAPS_CONFIG_NIC_HASHFILTER)) {
4248                 /*
4249                  * TOE and hashfilters are mutually exclusive.  It is a config
4250                  * file or firmware bug if both are reported as available.  Try
4251                  * to cope with the situation in non-debug builds by disabling
4252                  * TOE.
4253                  */
4254                 MPASS(caps.toecaps == 0);
4255
4256                 caps.toecaps = 0;
4257                 caps.rdmacaps = 0;
4258                 caps.iscsicaps = 0;
4259         }
4260
4261         caps.op_to_write = htobe32(V_FW_CMD_OP(FW_CAPS_CONFIG_CMD) |
4262             F_FW_CMD_REQUEST | F_FW_CMD_WRITE);
4263         caps.cfvalid_to_len16 = htobe32(FW_LEN16(caps));
4264         rc = -t4_wr_mbox(sc, sc->mbox, &caps, sizeof(caps), NULL);
4265         if (rc != 0) {
4266                 device_printf(sc->dev,
4267                     "failed to process config file: %d.\n", rc);
4268                 goto done;
4269         }
4270
4271         t4_tweak_chip_settings(sc);
4272         set_params__pre_init(sc);
4273
4274         /* get basic stuff going */
4275         rc = -t4_fw_initialize(sc, sc->mbox);
4276         if (rc != 0) {
4277                 device_printf(sc->dev, "fw_initialize failed: %d.\n", rc);
4278                 goto done;
4279         }
4280 done:
4281         return (rc);
4282 }
4283
4284 /*
4285  * Partition chip resources for use between various PFs, VFs, etc.
4286  */
4287 static int
4288 partition_resources(struct adapter *sc)
4289 {
4290         char cfg_file[sizeof(t4_cfg_file)];
4291         struct caps_allowed caps_allowed;
4292         int rc;
4293         bool fallback;
4294
4295         /* Only the master driver gets to configure the chip resources. */
4296         MPASS(sc->flags & MASTER_PF);
4297
4298 #define COPY_CAPS(x) do { \
4299         caps_allowed.x##caps = t4_##x##caps_allowed; \
4300 } while (0)
4301         bzero(&caps_allowed, sizeof(caps_allowed));
4302         COPY_CAPS(nbm);
4303         COPY_CAPS(link);
4304         COPY_CAPS(switch);
4305         COPY_CAPS(nic);
4306         COPY_CAPS(toe);
4307         COPY_CAPS(rdma);
4308         COPY_CAPS(crypto);
4309         COPY_CAPS(iscsi);
4310         COPY_CAPS(fcoe);
4311         fallback = sc->debug_flags & DF_DISABLE_CFG_RETRY ? false : true;
4312         snprintf(cfg_file, sizeof(cfg_file), "%s", t4_cfg_file);
4313 retry:
4314         rc = apply_cfg_and_initialize(sc, cfg_file, &caps_allowed);
4315         if (rc != 0 && fallback) {
4316                 device_printf(sc->dev,
4317                     "failed (%d) to configure card with \"%s\" profile, "
4318                     "will fall back to a basic configuration and retry.\n",
4319                     rc, cfg_file);
4320                 snprintf(cfg_file, sizeof(cfg_file), "%s", BUILTIN_CF);
4321                 bzero(&caps_allowed, sizeof(caps_allowed));
4322                 COPY_CAPS(switch);
4323                 caps_allowed.niccaps = FW_CAPS_CONFIG_NIC;
4324                 fallback = false;
4325                 goto retry;
4326         }
4327 #undef COPY_CAPS
4328         return (rc);
4329 }
4330
4331 /*
4332  * Retrieve parameters that are needed (or nice to have) very early.
4333  */
4334 static int
4335 get_params__pre_init(struct adapter *sc)
4336 {
4337         int rc;
4338         uint32_t param[2], val[2];
4339
4340         t4_get_version_info(sc);
4341
4342         snprintf(sc->fw_version, sizeof(sc->fw_version), "%u.%u.%u.%u",
4343             G_FW_HDR_FW_VER_MAJOR(sc->params.fw_vers),
4344             G_FW_HDR_FW_VER_MINOR(sc->params.fw_vers),
4345             G_FW_HDR_FW_VER_MICRO(sc->params.fw_vers),
4346             G_FW_HDR_FW_VER_BUILD(sc->params.fw_vers));
4347
4348         snprintf(sc->bs_version, sizeof(sc->bs_version), "%u.%u.%u.%u",
4349             G_FW_HDR_FW_VER_MAJOR(sc->params.bs_vers),
4350             G_FW_HDR_FW_VER_MINOR(sc->params.bs_vers),
4351             G_FW_HDR_FW_VER_MICRO(sc->params.bs_vers),
4352             G_FW_HDR_FW_VER_BUILD(sc->params.bs_vers));
4353
4354         snprintf(sc->tp_version, sizeof(sc->tp_version), "%u.%u.%u.%u",
4355             G_FW_HDR_FW_VER_MAJOR(sc->params.tp_vers),
4356             G_FW_HDR_FW_VER_MINOR(sc->params.tp_vers),
4357             G_FW_HDR_FW_VER_MICRO(sc->params.tp_vers),
4358             G_FW_HDR_FW_VER_BUILD(sc->params.tp_vers));
4359
4360         snprintf(sc->er_version, sizeof(sc->er_version), "%u.%u.%u.%u",
4361             G_FW_HDR_FW_VER_MAJOR(sc->params.er_vers),
4362             G_FW_HDR_FW_VER_MINOR(sc->params.er_vers),
4363             G_FW_HDR_FW_VER_MICRO(sc->params.er_vers),
4364             G_FW_HDR_FW_VER_BUILD(sc->params.er_vers));
4365
4366         param[0] = FW_PARAM_DEV(PORTVEC);
4367         param[1] = FW_PARAM_DEV(CCLK);
4368         rc = -t4_query_params(sc, sc->mbox, sc->pf, 0, 2, param, val);
4369         if (rc != 0) {
4370                 device_printf(sc->dev,
4371                     "failed to query parameters (pre_init): %d.\n", rc);
4372                 return (rc);
4373         }
4374
4375         sc->params.portvec = val[0];
4376         sc->params.nports = bitcount32(val[0]);
4377         sc->params.vpd.cclk = val[1];
4378
4379         /* Read device log parameters. */
4380         rc = -t4_init_devlog_params(sc, 1);
4381         if (rc == 0)
4382                 fixup_devlog_params(sc);
4383         else {
4384                 device_printf(sc->dev,
4385                     "failed to get devlog parameters: %d.\n", rc);
4386                 rc = 0; /* devlog isn't critical for device operation */
4387         }
4388
4389         return (rc);
4390 }
4391
4392 /*
4393  * Any params that need to be set before FW_INITIALIZE.
4394  */
4395 static int
4396 set_params__pre_init(struct adapter *sc)
4397 {
4398         int rc = 0;
4399         uint32_t param, val;
4400
4401         if (chip_id(sc) >= CHELSIO_T6) {
4402                 param = FW_PARAM_DEV(HPFILTER_REGION_SUPPORT);
4403                 val = 1;
4404                 rc = -t4_set_params(sc, sc->mbox, sc->pf, 0, 1, &param, &val);
4405                 /* firmwares < 1.20.1.0 do not have this param. */
4406                 if (rc == FW_EINVAL &&
4407                     sc->params.fw_vers < FW_VERSION32(1, 20, 1, 0)) {
4408                         rc = 0;
4409                 }
4410                 if (rc != 0) {
4411                         device_printf(sc->dev,
4412                             "failed to enable high priority filters :%d.\n",
4413                             rc);
4414                 }
4415         }
4416
4417         /* Enable opaque VIIDs with firmwares that support it. */
4418         param = FW_PARAM_DEV(OPAQUE_VIID_SMT_EXTN);
4419         val = 1;
4420         rc = -t4_set_params(sc, sc->mbox, sc->pf, 0, 1, &param, &val);
4421         if (rc == 0 && val == 1)
4422                 sc->params.viid_smt_extn_support = true;
4423         else
4424                 sc->params.viid_smt_extn_support = false;
4425
4426         return (rc);
4427 }
4428
4429 /*
4430  * Retrieve various parameters that are of interest to the driver.  The device
4431  * has been initialized by the firmware at this point.
4432  */
4433 static int
4434 get_params__post_init(struct adapter *sc)
4435 {
4436         int rc;
4437         uint32_t param[7], val[7];
4438         struct fw_caps_config_cmd caps;
4439
4440         param[0] = FW_PARAM_PFVF(IQFLINT_START);
4441         param[1] = FW_PARAM_PFVF(EQ_START);
4442         param[2] = FW_PARAM_PFVF(FILTER_START);
4443         param[3] = FW_PARAM_PFVF(FILTER_END);
4444         param[4] = FW_PARAM_PFVF(L2T_START);
4445         param[5] = FW_PARAM_PFVF(L2T_END);
4446         param[6] = V_FW_PARAMS_MNEM(FW_PARAMS_MNEM_DEV) |
4447             V_FW_PARAMS_PARAM_X(FW_PARAMS_PARAM_DEV_DIAG) |
4448             V_FW_PARAMS_PARAM_Y(FW_PARAM_DEV_DIAG_VDD);
4449         rc = -t4_query_params(sc, sc->mbox, sc->pf, 0, 7, param, val);
4450         if (rc != 0) {
4451                 device_printf(sc->dev,
4452                     "failed to query parameters (post_init): %d.\n", rc);
4453                 return (rc);
4454         }
4455
4456         sc->sge.iq_start = val[0];
4457         sc->sge.eq_start = val[1];
4458         if ((int)val[3] > (int)val[2]) {
4459                 sc->tids.ftid_base = val[2];
4460                 sc->tids.ftid_end = val[3];
4461                 sc->tids.nftids = val[3] - val[2] + 1;
4462         }
4463         sc->vres.l2t.start = val[4];
4464         sc->vres.l2t.size = val[5] - val[4] + 1;
4465         KASSERT(sc->vres.l2t.size <= L2T_SIZE,
4466             ("%s: L2 table size (%u) larger than expected (%u)",
4467             __func__, sc->vres.l2t.size, L2T_SIZE));
4468         sc->params.core_vdd = val[6];
4469
4470         param[0] = FW_PARAM_PFVF(IQFLINT_END);
4471         param[1] = FW_PARAM_PFVF(EQ_END);
4472         rc = -t4_query_params(sc, sc->mbox, sc->pf, 0, 2, param, val);
4473         if (rc != 0) {
4474                 device_printf(sc->dev,
4475                     "failed to query parameters (post_init2): %d.\n", rc);
4476                 return (rc);
4477         }
4478         MPASS((int)val[0] >= sc->sge.iq_start);
4479         sc->sge.iqmap_sz = val[0] - sc->sge.iq_start + 1;
4480         MPASS((int)val[1] >= sc->sge.eq_start);
4481         sc->sge.eqmap_sz = val[1] - sc->sge.eq_start + 1;
4482
4483         if (chip_id(sc) >= CHELSIO_T6) {
4484
4485                 sc->tids.tid_base = t4_read_reg(sc,
4486                     A_LE_DB_ACTIVE_TABLE_START_INDEX);
4487
4488                 param[0] = FW_PARAM_PFVF(HPFILTER_START);
4489                 param[1] = FW_PARAM_PFVF(HPFILTER_END);
4490                 rc = -t4_query_params(sc, sc->mbox, sc->pf, 0, 2, param, val);
4491                 if (rc != 0) {
4492                         device_printf(sc->dev,
4493                            "failed to query hpfilter parameters: %d.\n", rc);
4494                         return (rc);
4495                 }
4496                 if ((int)val[1] > (int)val[0]) {
4497                         sc->tids.hpftid_base = val[0];
4498                         sc->tids.hpftid_end = val[1];
4499                         sc->tids.nhpftids = val[1] - val[0] + 1;
4500
4501                         /*
4502                          * These should go off if the layout changes and the
4503                          * driver needs to catch up.
4504                          */
4505                         MPASS(sc->tids.hpftid_base == 0);
4506                         MPASS(sc->tids.tid_base == sc->tids.nhpftids);
4507                 }
4508
4509                 param[0] = FW_PARAM_PFVF(RAWF_START);
4510                 param[1] = FW_PARAM_PFVF(RAWF_END);
4511                 rc = -t4_query_params(sc, sc->mbox, sc->pf, 0, 2, param, val);
4512                 if (rc != 0) {
4513                         device_printf(sc->dev,
4514                            "failed to query rawf parameters: %d.\n", rc);
4515                         return (rc);
4516                 }
4517                 if ((int)val[1] > (int)val[0]) {
4518                         sc->rawf_base = val[0];
4519                         sc->nrawf = val[1] - val[0] + 1;
4520                 }
4521         }
4522
4523         /*
4524          * MPSBGMAP is queried separately because only recent firmwares support
4525          * it as a parameter and we don't want the compound query above to fail
4526          * on older firmwares.
4527          */
4528         param[0] = FW_PARAM_DEV(MPSBGMAP);
4529         val[0] = 0;
4530         rc = -t4_query_params(sc, sc->mbox, sc->pf, 0, 1, param, val);
4531         if (rc == 0)
4532                 sc->params.mps_bg_map = val[0];
4533         else
4534                 sc->params.mps_bg_map = 0;
4535
4536         /*
4537          * Determine whether the firmware supports the filter2 work request.
4538          * This is queried separately for the same reason as MPSBGMAP above.
4539          */
4540         param[0] = FW_PARAM_DEV(FILTER2_WR);
4541         val[0] = 0;
4542         rc = -t4_query_params(sc, sc->mbox, sc->pf, 0, 1, param, val);
4543         if (rc == 0)
4544                 sc->params.filter2_wr_support = val[0] != 0;
4545         else
4546                 sc->params.filter2_wr_support = 0;
4547
4548         /*
4549          * Find out whether we're allowed to use the ULPTX MEMWRITE DSGL.
4550          * This is queried separately for the same reason as other params above.
4551          */
4552         param[0] = FW_PARAM_DEV(ULPTX_MEMWRITE_DSGL);
4553         val[0] = 0;
4554         rc = -t4_query_params(sc, sc->mbox, sc->pf, 0, 1, param, val);
4555         if (rc == 0)
4556                 sc->params.ulptx_memwrite_dsgl = val[0] != 0;
4557         else
4558                 sc->params.ulptx_memwrite_dsgl = false;
4559
4560         /* FW_RI_FR_NSMR_TPTE_WR support */
4561         param[0] = FW_PARAM_DEV(RI_FR_NSMR_TPTE_WR);
4562         rc = -t4_query_params(sc, sc->mbox, sc->pf, 0, 1, param, val);
4563         if (rc == 0)
4564                 sc->params.fr_nsmr_tpte_wr_support = val[0] != 0;
4565         else
4566                 sc->params.fr_nsmr_tpte_wr_support = false;
4567
4568         param[0] = FW_PARAM_PFVF(MAX_PKTS_PER_ETH_TX_PKTS_WR);
4569         rc = -t4_query_params(sc, sc->mbox, sc->pf, 0, 1, param, val);
4570         if (rc == 0)
4571                 sc->params.max_pkts_per_eth_tx_pkts_wr = val[0];
4572         else
4573                 sc->params.max_pkts_per_eth_tx_pkts_wr = 15;
4574
4575         /* get capabilites */
4576         bzero(&caps, sizeof(caps));
4577         caps.op_to_write = htobe32(V_FW_CMD_OP(FW_CAPS_CONFIG_CMD) |
4578             F_FW_CMD_REQUEST | F_FW_CMD_READ);
4579         caps.cfvalid_to_len16 = htobe32(FW_LEN16(caps));
4580         rc = -t4_wr_mbox(sc, sc->mbox, &caps, sizeof(caps), &caps);
4581         if (rc != 0) {
4582                 device_printf(sc->dev,
4583                     "failed to get card capabilities: %d.\n", rc);
4584                 return (rc);
4585         }
4586
4587 #define READ_CAPS(x) do { \
4588         sc->x = htobe16(caps.x); \
4589 } while (0)
4590         READ_CAPS(nbmcaps);
4591         READ_CAPS(linkcaps);
4592         READ_CAPS(switchcaps);
4593         READ_CAPS(niccaps);
4594         READ_CAPS(toecaps);
4595         READ_CAPS(rdmacaps);
4596         READ_CAPS(cryptocaps);
4597         READ_CAPS(iscsicaps);
4598         READ_CAPS(fcoecaps);
4599
4600         if (sc->niccaps & FW_CAPS_CONFIG_NIC_HASHFILTER) {
4601                 MPASS(chip_id(sc) > CHELSIO_T4);
4602                 MPASS(sc->toecaps == 0);
4603                 sc->toecaps = 0;
4604
4605                 param[0] = FW_PARAM_DEV(NTID);
4606                 rc = -t4_query_params(sc, sc->mbox, sc->pf, 0, 1, param, val);
4607                 if (rc != 0) {
4608                         device_printf(sc->dev,
4609                             "failed to query HASHFILTER parameters: %d.\n", rc);
4610                         return (rc);
4611                 }
4612                 sc->tids.ntids = val[0];
4613                 if (sc->params.fw_vers < FW_VERSION32(1, 20, 5, 0)) {
4614                         MPASS(sc->tids.ntids >= sc->tids.nhpftids);
4615                         sc->tids.ntids -= sc->tids.nhpftids;
4616                 }
4617                 sc->tids.natids = min(sc->tids.ntids / 2, MAX_ATIDS);
4618                 sc->params.hash_filter = 1;
4619         }
4620         if (sc->niccaps & FW_CAPS_CONFIG_NIC_ETHOFLD) {
4621                 param[0] = FW_PARAM_PFVF(ETHOFLD_START);
4622                 param[1] = FW_PARAM_PFVF(ETHOFLD_END);
4623                 param[2] = FW_PARAM_DEV(FLOWC_BUFFIFO_SZ);
4624                 rc = -t4_query_params(sc, sc->mbox, sc->pf, 0, 3, param, val);
4625                 if (rc != 0) {
4626                         device_printf(sc->dev,
4627                             "failed to query NIC parameters: %d.\n", rc);
4628                         return (rc);
4629                 }
4630                 if ((int)val[1] > (int)val[0]) {
4631                         sc->tids.etid_base = val[0];
4632                         sc->tids.etid_end = val[1];
4633                         sc->tids.netids = val[1] - val[0] + 1;
4634                         sc->params.eo_wr_cred = val[2];
4635                         sc->params.ethoffload = 1;
4636                 }
4637         }
4638         if (sc->toecaps) {
4639                 /* query offload-related parameters */
4640                 param[0] = FW_PARAM_DEV(NTID);
4641                 param[1] = FW_PARAM_PFVF(SERVER_START);
4642                 param[2] = FW_PARAM_PFVF(SERVER_END);
4643                 param[3] = FW_PARAM_PFVF(TDDP_START);
4644                 param[4] = FW_PARAM_PFVF(TDDP_END);
4645                 param[5] = FW_PARAM_DEV(FLOWC_BUFFIFO_SZ);
4646                 rc = -t4_query_params(sc, sc->mbox, sc->pf, 0, 6, param, val);
4647                 if (rc != 0) {
4648                         device_printf(sc->dev,
4649                             "failed to query TOE parameters: %d.\n", rc);
4650                         return (rc);
4651                 }
4652                 sc->tids.ntids = val[0];
4653                 if (sc->params.fw_vers < FW_VERSION32(1, 20, 5, 0)) {
4654                         MPASS(sc->tids.ntids >= sc->tids.nhpftids);
4655                         sc->tids.ntids -= sc->tids.nhpftids;
4656                 }
4657                 sc->tids.natids = min(sc->tids.ntids / 2, MAX_ATIDS);
4658                 if ((int)val[2] > (int)val[1]) {
4659                         sc->tids.stid_base = val[1];
4660                         sc->tids.nstids = val[2] - val[1] + 1;
4661                 }
4662                 sc->vres.ddp.start = val[3];
4663                 sc->vres.ddp.size = val[4] - val[3] + 1;
4664                 sc->params.ofldq_wr_cred = val[5];
4665                 sc->params.offload = 1;
4666         } else {
4667                 /*
4668                  * The firmware attempts memfree TOE configuration for -SO cards
4669                  * and will report toecaps=0 if it runs out of resources (this
4670                  * depends on the config file).  It may not report 0 for other
4671                  * capabilities dependent on the TOE in this case.  Set them to
4672                  * 0 here so that the driver doesn't bother tracking resources
4673                  * that will never be used.
4674                  */
4675                 sc->iscsicaps = 0;
4676                 sc->rdmacaps = 0;
4677         }
4678         if (sc->rdmacaps) {
4679                 param[0] = FW_PARAM_PFVF(STAG_START);
4680                 param[1] = FW_PARAM_PFVF(STAG_END);
4681                 param[2] = FW_PARAM_PFVF(RQ_START);
4682                 param[3] = FW_PARAM_PFVF(RQ_END);
4683                 param[4] = FW_PARAM_PFVF(PBL_START);
4684                 param[5] = FW_PARAM_PFVF(PBL_END);
4685                 rc = -t4_query_params(sc, sc->mbox, sc->pf, 0, 6, param, val);
4686                 if (rc != 0) {
4687                         device_printf(sc->dev,
4688                             "failed to query RDMA parameters(1): %d.\n", rc);
4689                         return (rc);
4690                 }
4691                 sc->vres.stag.start = val[0];
4692                 sc->vres.stag.size = val[1] - val[0] + 1;
4693                 sc->vres.rq.start = val[2];
4694                 sc->vres.rq.size = val[3] - val[2] + 1;
4695                 sc->vres.pbl.start = val[4];
4696                 sc->vres.pbl.size = val[5] - val[4] + 1;
4697
4698                 param[0] = FW_PARAM_PFVF(SQRQ_START);
4699                 param[1] = FW_PARAM_PFVF(SQRQ_END);
4700                 param[2] = FW_PARAM_PFVF(CQ_START);
4701                 param[3] = FW_PARAM_PFVF(CQ_END);
4702                 param[4] = FW_PARAM_PFVF(OCQ_START);
4703                 param[5] = FW_PARAM_PFVF(OCQ_END);
4704                 rc = -t4_query_params(sc, sc->mbox, sc->pf, 0, 6, param, val);
4705                 if (rc != 0) {
4706                         device_printf(sc->dev,
4707                             "failed to query RDMA parameters(2): %d.\n", rc);
4708                         return (rc);
4709                 }
4710                 sc->vres.qp.start = val[0];
4711                 sc->vres.qp.size = val[1] - val[0] + 1;
4712                 sc->vres.cq.start = val[2];
4713                 sc->vres.cq.size = val[3] - val[2] + 1;
4714                 sc->vres.ocq.start = val[4];
4715                 sc->vres.ocq.size = val[5] - val[4] + 1;
4716
4717                 param[0] = FW_PARAM_PFVF(SRQ_START);
4718                 param[1] = FW_PARAM_PFVF(SRQ_END);
4719                 param[2] = FW_PARAM_DEV(MAXORDIRD_QP);
4720                 param[3] = FW_PARAM_DEV(MAXIRD_ADAPTER);
4721                 rc = -t4_query_params(sc, sc->mbox, sc->pf, 0, 4, param, val);
4722                 if (rc != 0) {
4723                         device_printf(sc->dev,
4724                             "failed to query RDMA parameters(3): %d.\n", rc);
4725                         return (rc);
4726                 }
4727                 sc->vres.srq.start = val[0];
4728                 sc->vres.srq.size = val[1] - val[0] + 1;
4729                 sc->params.max_ordird_qp = val[2];
4730                 sc->params.max_ird_adapter = val[3];
4731         }
4732         if (sc->iscsicaps) {
4733                 param[0] = FW_PARAM_PFVF(ISCSI_START);
4734                 param[1] = FW_PARAM_PFVF(ISCSI_END);
4735                 rc = -t4_query_params(sc, sc->mbox, sc->pf, 0, 2, param, val);
4736                 if (rc != 0) {
4737                         device_printf(sc->dev,
4738                             "failed to query iSCSI parameters: %d.\n", rc);
4739                         return (rc);
4740                 }
4741                 sc->vres.iscsi.start = val[0];
4742                 sc->vres.iscsi.size = val[1] - val[0] + 1;
4743         }
4744         if (sc->cryptocaps & FW_CAPS_CONFIG_TLSKEYS) {
4745                 param[0] = FW_PARAM_PFVF(TLS_START);
4746                 param[1] = FW_PARAM_PFVF(TLS_END);
4747                 rc = -t4_query_params(sc, sc->mbox, sc->pf, 0, 2, param, val);
4748                 if (rc != 0) {
4749                         device_printf(sc->dev,
4750                             "failed to query TLS parameters: %d.\n", rc);
4751                         return (rc);
4752                 }
4753                 sc->vres.key.start = val[0];
4754                 sc->vres.key.size = val[1] - val[0] + 1;
4755         }
4756
4757         t4_init_sge_params(sc);
4758
4759         /*
4760          * We've got the params we wanted to query via the firmware.  Now grab
4761          * some others directly from the chip.
4762          */
4763         rc = t4_read_chip_settings(sc);
4764
4765         return (rc);
4766 }
4767
4768 #ifdef KERN_TLS
4769 static void
4770 ktls_tick(void *arg)
4771 {
4772         struct adapter *sc;
4773         uint32_t tstamp;
4774
4775         sc = arg;
4776
4777         tstamp = tcp_ts_getticks();
4778         t4_write_reg(sc, A_TP_SYNC_TIME_HI, tstamp >> 1);
4779         t4_write_reg(sc, A_TP_SYNC_TIME_LO, tstamp << 31);
4780
4781         callout_schedule_sbt(&sc->ktls_tick, SBT_1MS, 0, C_HARDCLOCK);
4782 }
4783
4784 static void
4785 t4_enable_kern_tls(struct adapter *sc)
4786 {
4787         uint32_t m, v;
4788
4789         m = F_ENABLECBYP;
4790         v = F_ENABLECBYP;
4791         t4_set_reg_field(sc, A_TP_PARA_REG6, m, v);
4792
4793         m = F_CPL_FLAGS_UPDATE_EN | F_SEQ_UPDATE_EN;
4794         v = F_CPL_FLAGS_UPDATE_EN | F_SEQ_UPDATE_EN;
4795         t4_set_reg_field(sc, A_ULP_TX_CONFIG, m, v);
4796
4797         m = F_NICMODE;
4798         v = F_NICMODE;
4799         t4_set_reg_field(sc, A_TP_IN_CONFIG, m, v);
4800
4801         m = F_LOOKUPEVERYPKT;
4802         v = 0;
4803         t4_set_reg_field(sc, A_TP_INGRESS_CONFIG, m, v);
4804
4805         m = F_TXDEFERENABLE | F_DISABLEWINDOWPSH | F_DISABLESEPPSHFLAG;
4806         v = F_DISABLEWINDOWPSH;
4807         t4_set_reg_field(sc, A_TP_PC_CONFIG, m, v);
4808
4809         m = V_TIMESTAMPRESOLUTION(M_TIMESTAMPRESOLUTION);
4810         v = V_TIMESTAMPRESOLUTION(0x1f);
4811         t4_set_reg_field(sc, A_TP_TIMER_RESOLUTION, m, v);
4812
4813         sc->flags |= KERN_TLS_OK;
4814
4815         sc->tlst.inline_keys = t4_tls_inline_keys;
4816         sc->tlst.combo_wrs = t4_tls_combo_wrs;
4817 }
4818 #endif
4819
4820 static int
4821 set_params__post_init(struct adapter *sc)
4822 {
4823         uint32_t mask, param, val;
4824 #ifdef TCP_OFFLOAD
4825         int i, v, shift;
4826 #endif
4827
4828         /* ask for encapsulated CPLs */
4829         param = FW_PARAM_PFVF(CPLFW4MSG_ENCAP);
4830         val = 1;
4831         (void)t4_set_params(sc, sc->mbox, sc->pf, 0, 1, &param, &val);
4832
4833         /* Enable 32b port caps if the firmware supports it. */
4834         param = FW_PARAM_PFVF(PORT_CAPS32);
4835         val = 1;
4836         if (t4_set_params(sc, sc->mbox, sc->pf, 0, 1, &param, &val) == 0)
4837                 sc->params.port_caps32 = 1;
4838
4839         /* Let filter + maskhash steer to a part of the VI's RSS region. */
4840         val = 1 << (G_MASKSIZE(t4_read_reg(sc, A_TP_RSS_CONFIG_TNL)) - 1);
4841         t4_set_reg_field(sc, A_TP_RSS_CONFIG_TNL, V_MASKFILTER(M_MASKFILTER),
4842             V_MASKFILTER(val - 1));
4843
4844         mask = F_DROPERRORANY | F_DROPERRORMAC | F_DROPERRORIPVER |
4845             F_DROPERRORFRAG | F_DROPERRORATTACK | F_DROPERRORETHHDRLEN |
4846             F_DROPERRORIPHDRLEN | F_DROPERRORTCPHDRLEN | F_DROPERRORPKTLEN |
4847             F_DROPERRORTCPOPT | F_DROPERRORCSUMIP | F_DROPERRORCSUM;
4848         val = 0;
4849         if (chip_id(sc) < CHELSIO_T6 && t4_attack_filter != 0) {
4850                 t4_set_reg_field(sc, A_TP_GLOBAL_CONFIG, F_ATTACKFILTERENABLE,
4851                     F_ATTACKFILTERENABLE);
4852                 val |= F_DROPERRORATTACK;
4853         }
4854         if (t4_drop_ip_fragments != 0) {
4855                 t4_set_reg_field(sc, A_TP_GLOBAL_CONFIG, F_FRAGMENTDROP,
4856                     F_FRAGMENTDROP);
4857                 val |= F_DROPERRORFRAG;
4858         }
4859         if (t4_drop_pkts_with_l2_errors != 0)
4860                 val |= F_DROPERRORMAC | F_DROPERRORETHHDRLEN;
4861         if (t4_drop_pkts_with_l3_errors != 0) {
4862                 val |= F_DROPERRORIPVER | F_DROPERRORIPHDRLEN |
4863                     F_DROPERRORCSUMIP;
4864         }
4865         if (t4_drop_pkts_with_l4_errors != 0) {
4866                 val |= F_DROPERRORTCPHDRLEN | F_DROPERRORPKTLEN |
4867                     F_DROPERRORTCPOPT | F_DROPERRORCSUM;
4868         }
4869         t4_set_reg_field(sc, A_TP_ERR_CONFIG, mask, val);
4870
4871 #ifdef TCP_OFFLOAD
4872         /*
4873          * Override the TOE timers with user provided tunables.  This is not the
4874          * recommended way to change the timers (the firmware config file is) so
4875          * these tunables are not documented.
4876          *
4877          * All the timer tunables are in microseconds.
4878          */
4879         if (t4_toe_keepalive_idle != 0) {
4880                 v = us_to_tcp_ticks(sc, t4_toe_keepalive_idle);
4881                 v &= M_KEEPALIVEIDLE;
4882                 t4_set_reg_field(sc, A_TP_KEEP_IDLE,
4883                     V_KEEPALIVEIDLE(M_KEEPALIVEIDLE), V_KEEPALIVEIDLE(v));
4884         }
4885         if (t4_toe_keepalive_interval != 0) {
4886                 v = us_to_tcp_ticks(sc, t4_toe_keepalive_interval);
4887                 v &= M_KEEPALIVEINTVL;
4888                 t4_set_reg_field(sc, A_TP_KEEP_INTVL,
4889                     V_KEEPALIVEINTVL(M_KEEPALIVEINTVL), V_KEEPALIVEINTVL(v));
4890         }
4891         if (t4_toe_keepalive_count != 0) {
4892                 v = t4_toe_keepalive_count & M_KEEPALIVEMAXR2;
4893                 t4_set_reg_field(sc, A_TP_SHIFT_CNT,
4894                     V_KEEPALIVEMAXR1(M_KEEPALIVEMAXR1) |
4895                     V_KEEPALIVEMAXR2(M_KEEPALIVEMAXR2),
4896                     V_KEEPALIVEMAXR1(1) | V_KEEPALIVEMAXR2(v));
4897         }
4898         if (t4_toe_rexmt_min != 0) {
4899                 v = us_to_tcp_ticks(sc, t4_toe_rexmt_min);
4900                 v &= M_RXTMIN;
4901                 t4_set_reg_field(sc, A_TP_RXT_MIN,
4902                     V_RXTMIN(M_RXTMIN), V_RXTMIN(v));
4903         }
4904         if (t4_toe_rexmt_max != 0) {
4905                 v = us_to_tcp_ticks(sc, t4_toe_rexmt_max);
4906                 v &= M_RXTMAX;
4907                 t4_set_reg_field(sc, A_TP_RXT_MAX,
4908                     V_RXTMAX(M_RXTMAX), V_RXTMAX(v));
4909         }
4910         if (t4_toe_rexmt_count != 0) {
4911                 v = t4_toe_rexmt_count & M_RXTSHIFTMAXR2;
4912                 t4_set_reg_field(sc, A_TP_SHIFT_CNT,
4913                     V_RXTSHIFTMAXR1(M_RXTSHIFTMAXR1) |
4914                     V_RXTSHIFTMAXR2(M_RXTSHIFTMAXR2),
4915                     V_RXTSHIFTMAXR1(1) | V_RXTSHIFTMAXR2(v));
4916         }
4917         for (i = 0; i < nitems(t4_toe_rexmt_backoff); i++) {
4918                 if (t4_toe_rexmt_backoff[i] != -1) {
4919                         v = t4_toe_rexmt_backoff[i] & M_TIMERBACKOFFINDEX0;
4920                         shift = (i & 3) << 3;
4921                         t4_set_reg_field(sc, A_TP_TCP_BACKOFF_REG0 + (i & ~3),
4922                             M_TIMERBACKOFFINDEX0 << shift, v << shift);
4923                 }
4924         }
4925 #endif
4926
4927 #ifdef KERN_TLS
4928         if (sc->cryptocaps & FW_CAPS_CONFIG_TLSKEYS &&
4929             sc->toecaps & FW_CAPS_CONFIG_TOE) {
4930                 if (t4_kern_tls != 0)
4931                         t4_enable_kern_tls(sc);
4932                 else {
4933                         /*
4934                          * Limit TOE connections to 2 reassembly
4935                          * "islands".  This is required for TOE TLS
4936                          * connections to downgrade to plain TOE
4937                          * connections if an unsupported TLS version
4938                          * or ciphersuite is used.
4939                          */
4940                         t4_tp_wr_bits_indirect(sc, A_TP_FRAG_CONFIG,
4941                             V_PASSMODE(M_PASSMODE), V_PASSMODE(2));
4942                 }
4943         }
4944 #endif
4945         return (0);
4946 }
4947
4948 #undef FW_PARAM_PFVF
4949 #undef FW_PARAM_DEV
4950
4951 static void
4952 t4_set_desc(struct adapter *sc)
4953 {
4954         char buf[128];
4955         struct adapter_params *p = &sc->params;
4956
4957         snprintf(buf, sizeof(buf), "Chelsio %s", p->vpd.id);
4958
4959         device_set_desc_copy(sc->dev, buf);
4960 }
4961
4962 static inline void
4963 ifmedia_add4(struct ifmedia *ifm, int m)
4964 {
4965
4966         ifmedia_add(ifm, m, 0, NULL);
4967         ifmedia_add(ifm, m | IFM_ETH_TXPAUSE, 0, NULL);
4968         ifmedia_add(ifm, m | IFM_ETH_RXPAUSE, 0, NULL);
4969         ifmedia_add(ifm, m | IFM_ETH_TXPAUSE | IFM_ETH_RXPAUSE, 0, NULL);
4970 }
4971
4972 /*
4973  * This is the selected media, which is not quite the same as the active media.
4974  * The media line in ifconfig is "media: Ethernet selected (active)" if selected
4975  * and active are not the same, and "media: Ethernet selected" otherwise.
4976  */
4977 static void
4978 set_current_media(struct port_info *pi)
4979 {
4980         struct link_config *lc;
4981         struct ifmedia *ifm;
4982         int mword;
4983         u_int speed;
4984
4985         PORT_LOCK_ASSERT_OWNED(pi);
4986
4987         /* Leave current media alone if it's already set to IFM_NONE. */
4988         ifm = &pi->media;
4989         if (ifm->ifm_cur != NULL &&
4990             IFM_SUBTYPE(ifm->ifm_cur->ifm_media) == IFM_NONE)
4991                 return;
4992
4993         lc = &pi->link_cfg;
4994         if (lc->requested_aneg != AUTONEG_DISABLE &&
4995             lc->pcaps & FW_PORT_CAP32_ANEG) {
4996                 ifmedia_set(ifm, IFM_ETHER | IFM_AUTO);
4997                 return;
4998         }
4999         mword = IFM_ETHER | IFM_FDX;
5000         if (lc->requested_fc & PAUSE_TX)
5001                 mword |= IFM_ETH_TXPAUSE;
5002         if (lc->requested_fc & PAUSE_RX)
5003                 mword |= IFM_ETH_RXPAUSE;
5004         if (lc->requested_speed == 0)
5005                 speed = port_top_speed(pi) * 1000;      /* Gbps -> Mbps */
5006         else
5007                 speed = lc->requested_speed;
5008         mword |= port_mword(pi, speed_to_fwcap(speed));
5009         ifmedia_set(ifm, mword);
5010 }
5011
5012 /*
5013  * Returns true if the ifmedia list for the port cannot change.
5014  */
5015 static bool
5016 fixed_ifmedia(struct port_info *pi)
5017 {
5018
5019         return (pi->port_type == FW_PORT_TYPE_BT_SGMII ||
5020             pi->port_type == FW_PORT_TYPE_BT_XFI ||
5021             pi->port_type == FW_PORT_TYPE_BT_XAUI ||
5022             pi->port_type == FW_PORT_TYPE_KX4 ||
5023             pi->port_type == FW_PORT_TYPE_KX ||
5024             pi->port_type == FW_PORT_TYPE_KR ||
5025             pi->port_type == FW_PORT_TYPE_BP_AP ||
5026             pi->port_type == FW_PORT_TYPE_BP4_AP ||
5027             pi->port_type == FW_PORT_TYPE_BP40_BA ||
5028             pi->port_type == FW_PORT_TYPE_KR4_100G ||
5029             pi->port_type == FW_PORT_TYPE_KR_SFP28 ||
5030             pi->port_type == FW_PORT_TYPE_KR_XLAUI);
5031 }
5032
5033 static void
5034 build_medialist(struct port_info *pi)
5035 {
5036         uint32_t ss, speed;
5037         int unknown, mword, bit;
5038         struct link_config *lc;
5039         struct ifmedia *ifm;
5040
5041         PORT_LOCK_ASSERT_OWNED(pi);
5042
5043         if (pi->flags & FIXED_IFMEDIA)
5044                 return;
5045
5046         /*
5047          * Rebuild the ifmedia list.
5048          */
5049         ifm = &pi->media;
5050         ifmedia_removeall(ifm);
5051         lc = &pi->link_cfg;
5052         ss = G_FW_PORT_CAP32_SPEED(lc->pcaps); /* Supported Speeds */
5053         if (__predict_false(ss == 0)) { /* not supposed to happen. */
5054                 MPASS(ss != 0);
5055 no_media:
5056                 MPASS(LIST_EMPTY(&ifm->ifm_list));
5057                 ifmedia_add(ifm, IFM_ETHER | IFM_NONE, 0, NULL);
5058                 ifmedia_set(ifm, IFM_ETHER | IFM_NONE);
5059                 return;
5060         }
5061
5062         unknown = 0;
5063         for (bit = S_FW_PORT_CAP32_SPEED; bit < fls(ss); bit++) {
5064                 speed = 1 << bit;
5065                 MPASS(speed & M_FW_PORT_CAP32_SPEED);
5066                 if (ss & speed) {
5067                         mword = port_mword(pi, speed);
5068                         if (mword == IFM_NONE) {
5069                                 goto no_media;
5070                         } else if (mword == IFM_UNKNOWN)
5071                                 unknown++;
5072                         else
5073                                 ifmedia_add4(ifm, IFM_ETHER | IFM_FDX | mword);
5074                 }
5075         }
5076         if (unknown > 0) /* Add one unknown for all unknown media types. */
5077                 ifmedia_add4(ifm, IFM_ETHER | IFM_FDX | IFM_UNKNOWN);
5078         if (lc->pcaps & FW_PORT_CAP32_ANEG)
5079                 ifmedia_add(ifm, IFM_ETHER | IFM_AUTO, 0, NULL);
5080
5081         set_current_media(pi);
5082 }
5083
5084 /*
5085  * Initialize the requested fields in the link config based on driver tunables.
5086  */
5087 static void
5088 init_link_config(struct port_info *pi)
5089 {
5090         struct link_config *lc = &pi->link_cfg;
5091
5092         PORT_LOCK_ASSERT_OWNED(pi);
5093
5094         lc->requested_speed = 0;
5095
5096         if (t4_autoneg == 0)
5097                 lc->requested_aneg = AUTONEG_DISABLE;
5098         else if (t4_autoneg == 1)
5099                 lc->requested_aneg = AUTONEG_ENABLE;
5100         else
5101                 lc->requested_aneg = AUTONEG_AUTO;
5102
5103         lc->requested_fc = t4_pause_settings & (PAUSE_TX | PAUSE_RX |
5104             PAUSE_AUTONEG);
5105
5106         if (t4_fec & FEC_AUTO)
5107                 lc->requested_fec = FEC_AUTO;
5108         else if (t4_fec == 0)
5109                 lc->requested_fec = FEC_NONE;
5110         else {
5111                 /* -1 is handled by the FEC_AUTO block above and not here. */
5112                 lc->requested_fec = t4_fec &
5113                     (FEC_RS | FEC_BASER_RS | FEC_NONE | FEC_MODULE);
5114                 if (lc->requested_fec == 0)
5115                         lc->requested_fec = FEC_AUTO;
5116         }
5117 }
5118
5119 /*
5120  * Makes sure that all requested settings comply with what's supported by the
5121  * port.  Returns the number of settings that were invalid and had to be fixed.
5122  */
5123 static int
5124 fixup_link_config(struct port_info *pi)
5125 {
5126         int n = 0;
5127         struct link_config *lc = &pi->link_cfg;
5128         uint32_t fwspeed;
5129
5130         PORT_LOCK_ASSERT_OWNED(pi);
5131
5132         /* Speed (when not autonegotiating) */
5133         if (lc->requested_speed != 0) {
5134                 fwspeed = speed_to_fwcap(lc->requested_speed);
5135                 if ((fwspeed & lc->pcaps) == 0) {
5136                         n++;
5137                         lc->requested_speed = 0;
5138                 }
5139         }
5140
5141         /* Link autonegotiation */
5142         MPASS(lc->requested_aneg == AUTONEG_ENABLE ||
5143             lc->requested_aneg == AUTONEG_DISABLE ||
5144             lc->requested_aneg == AUTONEG_AUTO);
5145         if (lc->requested_aneg == AUTONEG_ENABLE &&
5146             !(lc->pcaps & FW_PORT_CAP32_ANEG)) {
5147                 n++;
5148                 lc->requested_aneg = AUTONEG_AUTO;
5149         }
5150
5151         /* Flow control */
5152         MPASS((lc->requested_fc & ~(PAUSE_TX | PAUSE_RX | PAUSE_AUTONEG)) == 0);
5153         if (lc->requested_fc & PAUSE_TX &&
5154             !(lc->pcaps & FW_PORT_CAP32_FC_TX)) {
5155                 n++;
5156                 lc->requested_fc &= ~PAUSE_TX;
5157         }
5158         if (lc->requested_fc & PAUSE_RX &&
5159             !(lc->pcaps & FW_PORT_CAP32_FC_RX)) {
5160                 n++;
5161                 lc->requested_fc &= ~PAUSE_RX;
5162         }
5163         if (!(lc->requested_fc & PAUSE_AUTONEG) &&
5164             !(lc->pcaps & FW_PORT_CAP32_FORCE_PAUSE)) {
5165                 n++;
5166                 lc->requested_fc |= PAUSE_AUTONEG;
5167         }
5168
5169         /* FEC */
5170         if ((lc->requested_fec & FEC_RS &&
5171             !(lc->pcaps & FW_PORT_CAP32_FEC_RS)) ||
5172             (lc->requested_fec & FEC_BASER_RS &&
5173             !(lc->pcaps & FW_PORT_CAP32_FEC_BASER_RS))) {
5174                 n++;
5175                 lc->requested_fec = FEC_AUTO;
5176         }
5177
5178         return (n);
5179 }
5180
5181 /*
5182  * Apply the requested L1 settings, which are expected to be valid, to the
5183  * hardware.
5184  */
5185 static int
5186 apply_link_config(struct port_info *pi)
5187 {
5188         struct adapter *sc = pi->adapter;
5189         struct link_config *lc = &pi->link_cfg;
5190         int rc;
5191
5192 #ifdef INVARIANTS
5193         ASSERT_SYNCHRONIZED_OP(sc);
5194         PORT_LOCK_ASSERT_OWNED(pi);
5195
5196         if (lc->requested_aneg == AUTONEG_ENABLE)
5197                 MPASS(lc->pcaps & FW_PORT_CAP32_ANEG);
5198         if (!(lc->requested_fc & PAUSE_AUTONEG))
5199                 MPASS(lc->pcaps & FW_PORT_CAP32_FORCE_PAUSE);
5200         if (lc->requested_fc & PAUSE_TX)
5201                 MPASS(lc->pcaps & FW_PORT_CAP32_FC_TX);
5202         if (lc->requested_fc & PAUSE_RX)
5203                 MPASS(lc->pcaps & FW_PORT_CAP32_FC_RX);
5204         if (lc->requested_fec & FEC_RS)
5205                 MPASS(lc->pcaps & FW_PORT_CAP32_FEC_RS);
5206         if (lc->requested_fec & FEC_BASER_RS)
5207                 MPASS(lc->pcaps & FW_PORT_CAP32_FEC_BASER_RS);
5208 #endif
5209         rc = -t4_link_l1cfg(sc, sc->mbox, pi->tx_chan, lc);
5210         if (rc != 0) {
5211                 /* Don't complain if the VF driver gets back an EPERM. */
5212                 if (!(sc->flags & IS_VF) || rc != FW_EPERM)
5213                         device_printf(pi->dev, "l1cfg failed: %d\n", rc);
5214         } else {
5215                 /*
5216                  * An L1_CFG will almost always result in a link-change event if
5217                  * the link is up, and the driver will refresh the actual
5218                  * fec/fc/etc. when the notification is processed.  If the link
5219                  * is down then the actual settings are meaningless.
5220                  *
5221                  * This takes care of the case where a change in the L1 settings
5222                  * may not result in a notification.
5223                  */
5224                 if (lc->link_ok && !(lc->requested_fc & PAUSE_AUTONEG))
5225                         lc->fc = lc->requested_fc & (PAUSE_TX | PAUSE_RX);
5226         }
5227         return (rc);
5228 }
5229
5230 #define FW_MAC_EXACT_CHUNK      7
5231 struct mcaddr_ctx {
5232         struct ifnet *ifp;
5233         const uint8_t *mcaddr[FW_MAC_EXACT_CHUNK];
5234         uint64_t hash;
5235         int i;
5236         int del;
5237         int rc;
5238 };
5239
5240 static u_int
5241 add_maddr(void *arg, struct sockaddr_dl *sdl, u_int cnt)
5242 {
5243         struct mcaddr_ctx *ctx = arg;
5244         struct vi_info *vi = ctx->ifp->if_softc;
5245         struct port_info *pi = vi->pi;
5246         struct adapter *sc = pi->adapter;
5247
5248         if (ctx->rc < 0)
5249                 return (0);
5250
5251         ctx->mcaddr[ctx->i] = LLADDR(sdl);
5252         MPASS(ETHER_IS_MULTICAST(ctx->mcaddr[ctx->i]));
5253         ctx->i++;
5254
5255         if (ctx->i == FW_MAC_EXACT_CHUNK) {
5256                 ctx->rc = t4_alloc_mac_filt(sc, sc->mbox, vi->viid, ctx->del,
5257                     ctx->i, ctx->mcaddr, NULL, &ctx->hash, 0);
5258                 if (ctx->rc < 0) {
5259                         int j;
5260
5261                         for (j = 0; j < ctx->i; j++) {
5262                                 if_printf(ctx->ifp,
5263                                     "failed to add mc address"
5264                                     " %02x:%02x:%02x:"
5265                                     "%02x:%02x:%02x rc=%d\n",
5266                                     ctx->mcaddr[j][0], ctx->mcaddr[j][1],
5267                                     ctx->mcaddr[j][2], ctx->mcaddr[j][3],
5268                                     ctx->mcaddr[j][4], ctx->mcaddr[j][5],
5269                                     -ctx->rc);
5270                         }
5271                         return (0);
5272                 }
5273                 ctx->del = 0;
5274                 ctx->i = 0;
5275         }
5276
5277         return (1);
5278 }
5279
5280 /*
5281  * Program the port's XGMAC based on parameters in ifnet.  The caller also
5282  * indicates which parameters should be programmed (the rest are left alone).
5283  */
5284 int
5285 update_mac_settings(struct ifnet *ifp, int flags)
5286 {
5287         int rc = 0;
5288         struct vi_info *vi = ifp->if_softc;
5289         struct port_info *pi = vi->pi;
5290         struct adapter *sc = pi->adapter;
5291         int mtu = -1, promisc = -1, allmulti = -1, vlanex = -1;
5292         uint8_t match_all_mac[ETHER_ADDR_LEN] = {0};
5293
5294         ASSERT_SYNCHRONIZED_OP(sc);
5295         KASSERT(flags, ("%s: not told what to update.", __func__));
5296
5297         if (flags & XGMAC_MTU)
5298                 mtu = ifp->if_mtu;
5299
5300         if (flags & XGMAC_PROMISC)
5301                 promisc = ifp->if_flags & IFF_PROMISC ? 1 : 0;
5302
5303         if (flags & XGMAC_ALLMULTI)
5304                 allmulti = ifp->if_flags & IFF_ALLMULTI ? 1 : 0;
5305
5306         if (flags & XGMAC_VLANEX)
5307                 vlanex = ifp->if_capenable & IFCAP_VLAN_HWTAGGING ? 1 : 0;
5308
5309         if (flags & (XGMAC_MTU|XGMAC_PROMISC|XGMAC_ALLMULTI|XGMAC_VLANEX)) {
5310                 rc = -t4_set_rxmode(sc, sc->mbox, vi->viid, mtu, promisc,
5311                     allmulti, 1, vlanex, false);
5312                 if (rc) {
5313                         if_printf(ifp, "set_rxmode (%x) failed: %d\n", flags,
5314                             rc);
5315                         return (rc);
5316                 }
5317         }
5318
5319         if (flags & XGMAC_UCADDR) {
5320                 uint8_t ucaddr[ETHER_ADDR_LEN];
5321
5322                 bcopy(IF_LLADDR(ifp), ucaddr, sizeof(ucaddr));
5323                 rc = t4_change_mac(sc, sc->mbox, vi->viid, vi->xact_addr_filt,
5324                     ucaddr, true, &vi->smt_idx);
5325                 if (rc < 0) {
5326                         rc = -rc;
5327                         if_printf(ifp, "change_mac failed: %d\n", rc);
5328                         return (rc);
5329                 } else {
5330                         vi->xact_addr_filt = rc;
5331                         rc = 0;
5332                 }
5333         }
5334
5335         if (flags & XGMAC_MCADDRS) {
5336                 struct epoch_tracker et;
5337                 struct mcaddr_ctx ctx;
5338                 int j;
5339
5340                 ctx.ifp = ifp;
5341                 ctx.hash = 0;
5342                 ctx.i = 0;
5343                 ctx.del = 1;
5344                 ctx.rc = 0;
5345                 /*
5346                  * Unlike other drivers, we accumulate list of pointers into
5347                  * interface address lists and we need to keep it safe even
5348                  * after if_foreach_llmaddr() returns, thus we must enter the
5349                  * network epoch.
5350                  */
5351                 NET_EPOCH_ENTER(et);
5352                 if_foreach_llmaddr(ifp, add_maddr, &ctx);
5353                 if (ctx.rc < 0) {
5354                         NET_EPOCH_EXIT(et);
5355                         rc = -ctx.rc;
5356                         return (rc);
5357                 }
5358                 if (ctx.i > 0) {
5359                         rc = t4_alloc_mac_filt(sc, sc->mbox, vi->viid,
5360                             ctx.del, ctx.i, ctx.mcaddr, NULL, &ctx.hash, 0);
5361                         NET_EPOCH_EXIT(et);
5362                         if (rc < 0) {
5363                                 rc = -rc;
5364                                 for (j = 0; j < ctx.i; j++) {
5365                                         if_printf(ifp,
5366                                             "failed to add mcast address"
5367                                             " %02x:%02x:%02x:"
5368                                             "%02x:%02x:%02x rc=%d\n",
5369                                             ctx.mcaddr[j][0], ctx.mcaddr[j][1],
5370                                             ctx.mcaddr[j][2], ctx.mcaddr[j][3],
5371                                             ctx.mcaddr[j][4], ctx.mcaddr[j][5],
5372                                             rc);
5373                                 }
5374                                 return (rc);
5375                         }
5376                         ctx.del = 0;
5377                 } else
5378                         NET_EPOCH_EXIT(et);
5379
5380                 rc = -t4_set_addr_hash(sc, sc->mbox, vi->viid, 0, ctx.hash, 0);
5381                 if (rc != 0)
5382                         if_printf(ifp, "failed to set mcast address hash: %d\n",
5383                             rc);
5384                 if (ctx.del == 0) {
5385                         /* We clobbered the VXLAN entry if there was one. */
5386                         pi->vxlan_tcam_entry = false;
5387                 }
5388         }
5389
5390         if (IS_MAIN_VI(vi) && sc->vxlan_refcount > 0 &&
5391             pi->vxlan_tcam_entry == false) {
5392                 rc = t4_alloc_raw_mac_filt(sc, vi->viid, match_all_mac,
5393                     match_all_mac, sc->rawf_base + pi->port_id, 1, pi->port_id,
5394                     true);
5395                 if (rc < 0) {
5396                         rc = -rc;
5397                         if_printf(ifp, "failed to add VXLAN TCAM entry: %d.\n",
5398                             rc);
5399                 } else {
5400                         MPASS(rc == sc->rawf_base + pi->port_id);
5401                         rc = 0;
5402                         pi->vxlan_tcam_entry = true;
5403                 }
5404         }
5405
5406         return (rc);
5407 }
5408
5409 /*
5410  * {begin|end}_synchronized_op must be called from the same thread.
5411  */
5412 int
5413 begin_synchronized_op(struct adapter *sc, struct vi_info *vi, int flags,
5414     char *wmesg)
5415 {
5416         int rc, pri;
5417
5418 #ifdef WITNESS
5419         /* the caller thinks it's ok to sleep, but is it really? */
5420         if (flags & SLEEP_OK)
5421                 WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK, NULL,
5422                     "begin_synchronized_op");
5423 #endif
5424
5425         if (INTR_OK)
5426                 pri = PCATCH;
5427         else
5428                 pri = 0;
5429
5430         ADAPTER_LOCK(sc);
5431         for (;;) {
5432
5433                 if (vi && IS_DOOMED(vi)) {
5434                         rc = ENXIO;
5435                         goto done;
5436                 }
5437
5438                 if (!IS_BUSY(sc)) {
5439                         rc = 0;
5440                         break;
5441                 }
5442
5443                 if (!(flags & SLEEP_OK)) {
5444                         rc = EBUSY;
5445                         goto done;
5446                 }
5447
5448                 if (mtx_sleep(&sc->flags, &sc->sc_lock, pri, wmesg, 0)) {
5449                         rc = EINTR;
5450                         goto done;
5451                 }
5452         }
5453
5454         KASSERT(!IS_BUSY(sc), ("%s: controller busy.", __func__));
5455         SET_BUSY(sc);
5456 #ifdef INVARIANTS
5457         sc->last_op = wmesg;
5458         sc->last_op_thr = curthread;
5459         sc->last_op_flags = flags;
5460 #endif
5461
5462 done:
5463         if (!(flags & HOLD_LOCK) || rc)
5464                 ADAPTER_UNLOCK(sc);
5465
5466         return (rc);
5467 }
5468
5469 /*
5470  * Tell if_ioctl and if_init that the VI is going away.  This is
5471  * special variant of begin_synchronized_op and must be paired with a
5472  * call to end_synchronized_op.
5473  */
5474 void
5475 doom_vi(struct adapter *sc, struct vi_info *vi)
5476 {
5477
5478         ADAPTER_LOCK(sc);
5479         SET_DOOMED(vi);
5480         wakeup(&sc->flags);
5481         while (IS_BUSY(sc))
5482                 mtx_sleep(&sc->flags, &sc->sc_lock, 0, "t4detach", 0);
5483         SET_BUSY(sc);
5484 #ifdef INVARIANTS
5485         sc->last_op = "t4detach";
5486         sc->last_op_thr = curthread;
5487         sc->last_op_flags = 0;
5488 #endif
5489         ADAPTER_UNLOCK(sc);
5490 }
5491
5492 /*
5493  * {begin|end}_synchronized_op must be called from the same thread.
5494  */
5495 void
5496 end_synchronized_op(struct adapter *sc, int flags)
5497 {
5498
5499         if (flags & LOCK_HELD)
5500                 ADAPTER_LOCK_ASSERT_OWNED(sc);
5501         else
5502                 ADAPTER_LOCK(sc);
5503
5504         KASSERT(IS_BUSY(sc), ("%s: controller not busy.", __func__));
5505         CLR_BUSY(sc);
5506         wakeup(&sc->flags);
5507         ADAPTER_UNLOCK(sc);
5508 }
5509
5510 static int
5511 cxgbe_init_synchronized(struct vi_info *vi)
5512 {
5513         struct port_info *pi = vi->pi;
5514         struct adapter *sc = pi->adapter;
5515         struct ifnet *ifp = vi->ifp;
5516         int rc = 0, i;
5517         struct sge_txq *txq;
5518
5519         ASSERT_SYNCHRONIZED_OP(sc);
5520
5521         if (ifp->if_drv_flags & IFF_DRV_RUNNING)
5522                 return (0);     /* already running */
5523
5524         if (!(sc->flags & FULL_INIT_DONE) &&
5525             ((rc = adapter_full_init(sc)) != 0))
5526                 return (rc);    /* error message displayed already */
5527
5528         if (!(vi->flags & VI_INIT_DONE) &&
5529             ((rc = vi_full_init(vi)) != 0))
5530                 return (rc); /* error message displayed already */
5531
5532         rc = update_mac_settings(ifp, XGMAC_ALL);
5533         if (rc)
5534                 goto done;      /* error message displayed already */
5535
5536         PORT_LOCK(pi);
5537         if (pi->up_vis == 0) {
5538                 t4_update_port_info(pi);
5539                 fixup_link_config(pi);
5540                 build_medialist(pi);
5541                 apply_link_config(pi);
5542         }
5543
5544         rc = -t4_enable_vi(sc, sc->mbox, vi->viid, true, true);
5545         if (rc != 0) {
5546                 if_printf(ifp, "enable_vi failed: %d\n", rc);
5547                 PORT_UNLOCK(pi);
5548                 goto done;
5549         }
5550
5551         /*
5552          * Can't fail from this point onwards.  Review cxgbe_uninit_synchronized
5553          * if this changes.
5554          */
5555
5556         for_each_txq(vi, i, txq) {
5557                 TXQ_LOCK(txq);
5558                 txq->eq.flags |= EQ_ENABLED;
5559                 TXQ_UNLOCK(txq);
5560         }
5561
5562         /*
5563          * The first iq of the first port to come up is used for tracing.
5564          */
5565         if (sc->traceq < 0 && IS_MAIN_VI(vi)) {
5566                 sc->traceq = sc->sge.rxq[vi->first_rxq].iq.abs_id;
5567                 t4_write_reg(sc, is_t4(sc) ?  A_MPS_TRC_RSS_CONTROL :
5568                     A_MPS_T5_TRC_RSS_CONTROL, V_RSSCONTROL(pi->tx_chan) |
5569                     V_QUEUENUMBER(sc->traceq));
5570                 pi->flags |= HAS_TRACEQ;
5571         }
5572
5573         /* all ok */
5574         pi->up_vis++;
5575         ifp->if_drv_flags |= IFF_DRV_RUNNING;
5576
5577         if (pi->nvi > 1 || sc->flags & IS_VF)
5578                 callout_reset(&vi->tick, hz, vi_tick, vi);
5579         else
5580                 callout_reset(&pi->tick, hz, cxgbe_tick, pi);
5581         if (pi->link_cfg.link_ok)
5582                 t4_os_link_changed(pi);
5583         PORT_UNLOCK(pi);
5584 done:
5585         if (rc != 0)
5586                 cxgbe_uninit_synchronized(vi);
5587
5588         return (rc);
5589 }
5590
5591 /*
5592  * Idempotent.
5593  */
5594 static int
5595 cxgbe_uninit_synchronized(struct vi_info *vi)
5596 {
5597         struct port_info *pi = vi->pi;
5598         struct adapter *sc = pi->adapter;
5599         struct ifnet *ifp = vi->ifp;
5600         int rc, i;
5601         struct sge_txq *txq;
5602
5603         ASSERT_SYNCHRONIZED_OP(sc);
5604
5605         if (!(vi->flags & VI_INIT_DONE)) {
5606                 if (__predict_false(ifp->if_drv_flags & IFF_DRV_RUNNING)) {
5607                         KASSERT(0, ("uninited VI is running"));
5608                         if_printf(ifp, "uninited VI with running ifnet.  "
5609                             "vi->flags 0x%016lx, if_flags 0x%08x, "
5610                             "if_drv_flags 0x%08x\n", vi->flags, ifp->if_flags,
5611                             ifp->if_drv_flags);
5612                 }
5613                 return (0);
5614         }
5615
5616         /*
5617          * Disable the VI so that all its data in either direction is discarded
5618          * by the MPS.  Leave everything else (the queues, interrupts, and 1Hz
5619          * tick) intact as the TP can deliver negative advice or data that it's
5620          * holding in its RAM (for an offloaded connection) even after the VI is
5621          * disabled.
5622          */
5623         rc = -t4_enable_vi(sc, sc->mbox, vi->viid, false, false);
5624         if (rc) {
5625                 if_printf(ifp, "disable_vi failed: %d\n", rc);
5626                 return (rc);
5627         }
5628
5629         for_each_txq(vi, i, txq) {
5630                 TXQ_LOCK(txq);
5631                 txq->eq.flags &= ~EQ_ENABLED;
5632                 TXQ_UNLOCK(txq);
5633         }
5634
5635         PORT_LOCK(pi);
5636         if (pi->nvi > 1 || sc->flags & IS_VF)
5637                 callout_stop(&vi->tick);
5638         else
5639                 callout_stop(&pi->tick);
5640         if (!(ifp->if_drv_flags & IFF_DRV_RUNNING)) {
5641                 PORT_UNLOCK(pi);
5642                 return (0);
5643         }
5644         ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
5645         pi->up_vis--;
5646         if (pi->up_vis > 0) {
5647                 PORT_UNLOCK(pi);
5648                 return (0);
5649         }
5650
5651         pi->link_cfg.link_ok = false;
5652         pi->link_cfg.speed = 0;
5653         pi->link_cfg.link_down_rc = 255;
5654         t4_os_link_changed(pi);
5655         PORT_UNLOCK(pi);
5656
5657         return (0);
5658 }
5659
5660 /*
5661  * It is ok for this function to fail midway and return right away.  t4_detach
5662  * will walk the entire sc->irq list and clean up whatever is valid.
5663  */
5664 int
5665 t4_setup_intr_handlers(struct adapter *sc)
5666 {
5667         int rc, rid, p, q, v;
5668         char s[8];
5669         struct irq *irq;
5670         struct port_info *pi;
5671         struct vi_info *vi;
5672         struct sge *sge = &sc->sge;
5673         struct sge_rxq *rxq;
5674 #ifdef TCP_OFFLOAD
5675         struct sge_ofld_rxq *ofld_rxq;
5676 #endif
5677 #ifdef DEV_NETMAP
5678         struct sge_nm_rxq *nm_rxq;
5679 #endif
5680 #ifdef RSS
5681         int nbuckets = rss_getnumbuckets();
5682 #endif
5683
5684         /*
5685          * Setup interrupts.
5686          */
5687         irq = &sc->irq[0];
5688         rid = sc->intr_type == INTR_INTX ? 0 : 1;
5689         if (forwarding_intr_to_fwq(sc))
5690                 return (t4_alloc_irq(sc, irq, rid, t4_intr_all, sc, "all"));
5691
5692         /* Multiple interrupts. */
5693         if (sc->flags & IS_VF)
5694                 KASSERT(sc->intr_count >= T4VF_EXTRA_INTR + sc->params.nports,
5695                     ("%s: too few intr.", __func__));
5696         else
5697                 KASSERT(sc->intr_count >= T4_EXTRA_INTR + sc->params.nports,
5698                     ("%s: too few intr.", __func__));
5699
5700         /* The first one is always error intr on PFs */
5701         if (!(sc->flags & IS_VF)) {
5702                 rc = t4_alloc_irq(sc, irq, rid, t4_intr_err, sc, "err");
5703                 if (rc != 0)
5704                         return (rc);
5705                 irq++;
5706                 rid++;
5707         }
5708
5709         /* The second one is always the firmware event queue (first on VFs) */
5710         rc = t4_alloc_irq(sc, irq, rid, t4_intr_evt, &sge->fwq, "evt");
5711         if (rc != 0)
5712                 return (rc);
5713         irq++;
5714         rid++;
5715
5716         for_each_port(sc, p) {
5717                 pi = sc->port[p];
5718                 for_each_vi(pi, v, vi) {
5719                         vi->first_intr = rid - 1;
5720
5721                         if (vi->nnmrxq > 0) {
5722                                 int n = max(vi->nrxq, vi->nnmrxq);
5723
5724                                 rxq = &sge->rxq[vi->first_rxq];
5725 #ifdef DEV_NETMAP
5726                                 nm_rxq = &sge->nm_rxq[vi->first_nm_rxq];
5727 #endif
5728                                 for (q = 0; q < n; q++) {
5729                                         snprintf(s, sizeof(s), "%x%c%x", p,
5730                                             'a' + v, q);
5731                                         if (q < vi->nrxq)
5732                                                 irq->rxq = rxq++;
5733 #ifdef DEV_NETMAP
5734                                         if (q < vi->nnmrxq)
5735                                                 irq->nm_rxq = nm_rxq++;
5736
5737                                         if (irq->nm_rxq != NULL &&
5738                                             irq->rxq == NULL) {
5739                                                 /* Netmap rx only */
5740                                                 rc = t4_alloc_irq(sc, irq, rid,
5741                                                     t4_nm_intr, irq->nm_rxq, s);
5742                                         }
5743                                         if (irq->nm_rxq != NULL &&
5744                                             irq->rxq != NULL) {
5745                                                 /* NIC and Netmap rx */
5746                                                 rc = t4_alloc_irq(sc, irq, rid,
5747                                                     t4_vi_intr, irq, s);
5748                                         }
5749 #endif
5750                                         if (irq->rxq != NULL &&
5751                                             irq->nm_rxq == NULL) {
5752                                                 /* NIC rx only */
5753                                                 rc = t4_alloc_irq(sc, irq, rid,
5754                                                     t4_intr, irq->rxq, s);
5755                                         }
5756                                         if (rc != 0)
5757                                                 return (rc);
5758 #ifdef RSS
5759                                         if (q < vi->nrxq) {
5760                                                 bus_bind_intr(sc->dev, irq->res,
5761                                                     rss_getcpu(q % nbuckets));
5762                                         }
5763 #endif
5764                                         irq++;
5765                                         rid++;
5766                                         vi->nintr++;
5767                                 }
5768                         } else {
5769                                 for_each_rxq(vi, q, rxq) {
5770                                         snprintf(s, sizeof(s), "%x%c%x", p,
5771                                             'a' + v, q);
5772                                         rc = t4_alloc_irq(sc, irq, rid,
5773                                             t4_intr, rxq, s);
5774                                         if (rc != 0)
5775                                                 return (rc);
5776 #ifdef RSS
5777                                         bus_bind_intr(sc->dev, irq->res,
5778                                             rss_getcpu(q % nbuckets));
5779 #endif
5780                                         irq++;
5781                                         rid++;
5782                                         vi->nintr++;
5783                                 }
5784                         }
5785 #ifdef TCP_OFFLOAD
5786                         for_each_ofld_rxq(vi, q, ofld_rxq) {
5787                                 snprintf(s, sizeof(s), "%x%c%x", p, 'A' + v, q);
5788                                 rc = t4_alloc_irq(sc, irq, rid, t4_intr,
5789                                     ofld_rxq, s);
5790                                 if (rc != 0)
5791                                         return (rc);
5792                                 irq++;
5793                                 rid++;
5794                                 vi->nintr++;
5795                         }
5796 #endif
5797                 }
5798         }
5799         MPASS(irq == &sc->irq[sc->intr_count]);
5800
5801         return (0);
5802 }
5803
5804 int
5805 adapter_full_init(struct adapter *sc)
5806 {
5807         int rc, i;
5808 #ifdef RSS
5809         uint32_t raw_rss_key[RSS_KEYSIZE / sizeof(uint32_t)];
5810         uint32_t rss_key[RSS_KEYSIZE / sizeof(uint32_t)];
5811 #endif
5812
5813         ASSERT_SYNCHRONIZED_OP(sc);
5814         ADAPTER_LOCK_ASSERT_NOTOWNED(sc);
5815         KASSERT((sc->flags & FULL_INIT_DONE) == 0,
5816             ("%s: FULL_INIT_DONE already", __func__));
5817
5818         /*
5819          * queues that belong to the adapter (not any particular port).
5820          */
5821         rc = t4_setup_adapter_queues(sc);
5822         if (rc != 0)
5823                 goto done;
5824
5825         for (i = 0; i < nitems(sc->tq); i++) {
5826                 sc->tq[i] = taskqueue_create("t4 taskq", M_NOWAIT,
5827                     taskqueue_thread_enqueue, &sc->tq[i]);
5828                 if (sc->tq[i] == NULL) {
5829                         device_printf(sc->dev,
5830                             "failed to allocate task queue %d\n", i);
5831                         rc = ENOMEM;
5832                         goto done;
5833                 }
5834                 taskqueue_start_threads(&sc->tq[i], 1, PI_NET, "%s tq%d",
5835                     device_get_nameunit(sc->dev), i);
5836         }
5837 #ifdef RSS
5838         MPASS(RSS_KEYSIZE == 40);
5839         rss_getkey((void *)&raw_rss_key[0]);
5840         for (i = 0; i < nitems(rss_key); i++) {
5841                 rss_key[i] = htobe32(raw_rss_key[nitems(rss_key) - 1 - i]);
5842         }
5843         t4_write_rss_key(sc, &rss_key[0], -1, 1);
5844 #endif
5845
5846         if (!(sc->flags & IS_VF))
5847                 t4_intr_enable(sc);
5848 #ifdef KERN_TLS
5849         if (sc->flags & KERN_TLS_OK)
5850                 callout_reset_sbt(&sc->ktls_tick, SBT_1MS, 0, ktls_tick, sc,
5851                     C_HARDCLOCK);
5852 #endif
5853         sc->flags |= FULL_INIT_DONE;
5854 done:
5855         if (rc != 0)
5856                 adapter_full_uninit(sc);
5857
5858         return (rc);
5859 }
5860
5861 int
5862 adapter_full_uninit(struct adapter *sc)
5863 {
5864         int i;
5865
5866         ADAPTER_LOCK_ASSERT_NOTOWNED(sc);
5867
5868         t4_teardown_adapter_queues(sc);
5869
5870         for (i = 0; i < nitems(sc->tq) && sc->tq[i]; i++) {
5871                 taskqueue_free(sc->tq[i]);
5872                 sc->tq[i] = NULL;
5873         }
5874
5875         sc->flags &= ~FULL_INIT_DONE;
5876
5877         return (0);
5878 }
5879
5880 #ifdef RSS
5881 #define SUPPORTED_RSS_HASHTYPES (RSS_HASHTYPE_RSS_IPV4 | \
5882     RSS_HASHTYPE_RSS_TCP_IPV4 | RSS_HASHTYPE_RSS_IPV6 | \
5883     RSS_HASHTYPE_RSS_TCP_IPV6 | RSS_HASHTYPE_RSS_UDP_IPV4 | \
5884     RSS_HASHTYPE_RSS_UDP_IPV6)
5885
5886 /* Translates kernel hash types to hardware. */
5887 static int
5888 hashconfig_to_hashen(int hashconfig)
5889 {
5890         int hashen = 0;
5891
5892         if (hashconfig & RSS_HASHTYPE_RSS_IPV4)
5893                 hashen |= F_FW_RSS_VI_CONFIG_CMD_IP4TWOTUPEN;
5894         if (hashconfig & RSS_HASHTYPE_RSS_IPV6)
5895                 hashen |= F_FW_RSS_VI_CONFIG_CMD_IP6TWOTUPEN;
5896         if (hashconfig & RSS_HASHTYPE_RSS_UDP_IPV4) {
5897                 hashen |= F_FW_RSS_VI_CONFIG_CMD_UDPEN |
5898                     F_FW_RSS_VI_CONFIG_CMD_IP4FOURTUPEN;
5899         }
5900         if (hashconfig & RSS_HASHTYPE_RSS_UDP_IPV6) {
5901                 hashen |= F_FW_RSS_VI_CONFIG_CMD_UDPEN |
5902                     F_FW_RSS_VI_CONFIG_CMD_IP6FOURTUPEN;
5903         }
5904         if (hashconfig & RSS_HASHTYPE_RSS_TCP_IPV4)
5905                 hashen |= F_FW_RSS_VI_CONFIG_CMD_IP4FOURTUPEN;
5906         if (hashconfig & RSS_HASHTYPE_RSS_TCP_IPV6)
5907                 hashen |= F_FW_RSS_VI_CONFIG_CMD_IP6FOURTUPEN;
5908
5909         return (hashen);
5910 }
5911
5912 /* Translates hardware hash types to kernel. */
5913 static int
5914 hashen_to_hashconfig(int hashen)
5915 {
5916         int hashconfig = 0;
5917
5918         if (hashen & F_FW_RSS_VI_CONFIG_CMD_UDPEN) {
5919                 /*
5920                  * If UDP hashing was enabled it must have been enabled for
5921                  * either IPv4 or IPv6 (inclusive or).  Enabling UDP without
5922                  * enabling any 4-tuple hash is nonsense configuration.
5923                  */
5924                 MPASS(hashen & (F_FW_RSS_VI_CONFIG_CMD_IP4FOURTUPEN |
5925                     F_FW_RSS_VI_CONFIG_CMD_IP6FOURTUPEN));
5926
5927                 if (hashen & F_FW_RSS_VI_CONFIG_CMD_IP4FOURTUPEN)
5928                         hashconfig |= RSS_HASHTYPE_RSS_UDP_IPV4;
5929                 if (hashen & F_FW_RSS_VI_CONFIG_CMD_IP6FOURTUPEN)
5930                         hashconfig |= RSS_HASHTYPE_RSS_UDP_IPV6;
5931         }
5932         if (hashen & F_FW_RSS_VI_CONFIG_CMD_IP4FOURTUPEN)
5933                 hashconfig |= RSS_HASHTYPE_RSS_TCP_IPV4;
5934         if (hashen & F_FW_RSS_VI_CONFIG_CMD_IP6FOURTUPEN)
5935                 hashconfig |= RSS_HASHTYPE_RSS_TCP_IPV6;
5936         if (hashen & F_FW_RSS_VI_CONFIG_CMD_IP4TWOTUPEN)
5937                 hashconfig |= RSS_HASHTYPE_RSS_IPV4;
5938         if (hashen & F_FW_RSS_VI_CONFIG_CMD_IP6TWOTUPEN)
5939                 hashconfig |= RSS_HASHTYPE_RSS_IPV6;
5940
5941         return (hashconfig);
5942 }
5943 #endif
5944
5945 int
5946 vi_full_init(struct vi_info *vi)
5947 {
5948         struct adapter *sc = vi->adapter;
5949         struct ifnet *ifp = vi->ifp;
5950         uint16_t *rss;
5951         struct sge_rxq *rxq;
5952         int rc, i, j;
5953 #ifdef RSS
5954         int nbuckets = rss_getnumbuckets();
5955         int hashconfig = rss_gethashconfig();
5956         int extra;
5957 #endif
5958
5959         ASSERT_SYNCHRONIZED_OP(sc);
5960         KASSERT((vi->flags & VI_INIT_DONE) == 0,
5961             ("%s: VI_INIT_DONE already", __func__));
5962
5963         sysctl_ctx_init(&vi->ctx);
5964         vi->flags |= VI_SYSCTL_CTX;
5965
5966         /*
5967          * Allocate tx/rx/fl queues for this VI.
5968          */
5969         rc = t4_setup_vi_queues(vi);
5970         if (rc != 0)
5971                 goto done;      /* error message displayed already */
5972
5973         /*
5974          * Setup RSS for this VI.  Save a copy of the RSS table for later use.
5975          */
5976         if (vi->nrxq > vi->rss_size) {
5977                 if_printf(ifp, "nrxq (%d) > hw RSS table size (%d); "
5978                     "some queues will never receive traffic.\n", vi->nrxq,
5979                     vi->rss_size);
5980         } else if (vi->rss_size % vi->nrxq) {
5981                 if_printf(ifp, "nrxq (%d), hw RSS table size (%d); "
5982                     "expect uneven traffic distribution.\n", vi->nrxq,
5983                     vi->rss_size);
5984         }
5985 #ifdef RSS
5986         if (vi->nrxq != nbuckets) {
5987                 if_printf(ifp, "nrxq (%d) != kernel RSS buckets (%d);"
5988                     "performance will be impacted.\n", vi->nrxq, nbuckets);
5989         }
5990 #endif
5991         rss = malloc(vi->rss_size * sizeof (*rss), M_CXGBE, M_ZERO | M_WAITOK);
5992         for (i = 0; i < vi->rss_size;) {
5993 #ifdef RSS
5994                 j = rss_get_indirection_to_bucket(i);
5995                 j %= vi->nrxq;
5996                 rxq = &sc->sge.rxq[vi->first_rxq + j];
5997                 rss[i++] = rxq->iq.abs_id;
5998 #else
5999                 for_each_rxq(vi, j, rxq) {
6000                         rss[i++] = rxq->iq.abs_id;
6001                         if (i == vi->rss_size)
6002                                 break;
6003                 }
6004 #endif
6005         }
6006
6007         rc = -t4_config_rss_range(sc, sc->mbox, vi->viid, 0, vi->rss_size, rss,
6008             vi->rss_size);
6009         if (rc != 0) {
6010                 free(rss, M_CXGBE);
6011                 if_printf(ifp, "rss_config failed: %d\n", rc);
6012                 goto done;
6013         }
6014
6015 #ifdef RSS
6016         vi->hashen = hashconfig_to_hashen(hashconfig);
6017
6018         /*
6019          * We may have had to enable some hashes even though the global config
6020          * wants them disabled.  This is a potential problem that must be
6021          * reported to the user.
6022          */
6023         extra = hashen_to_hashconfig(vi->hashen) ^ hashconfig;
6024
6025         /*
6026          * If we consider only the supported hash types, then the enabled hashes
6027          * are a superset of the requested hashes.  In other words, there cannot
6028          * be any supported hash that was requested but not enabled, but there
6029          * can be hashes that were not requested but had to be enabled.
6030          */
6031         extra &= SUPPORTED_RSS_HASHTYPES;
6032         MPASS((extra & hashconfig) == 0);
6033
6034         if (extra) {
6035                 if_printf(ifp,
6036                     "global RSS config (0x%x) cannot be accommodated.\n",
6037                     hashconfig);
6038         }
6039         if (extra & RSS_HASHTYPE_RSS_IPV4)
6040                 if_printf(ifp, "IPv4 2-tuple hashing forced on.\n");
6041         if (extra & RSS_HASHTYPE_RSS_TCP_IPV4)
6042                 if_printf(ifp, "TCP/IPv4 4-tuple hashing forced on.\n");
6043         if (extra & RSS_HASHTYPE_RSS_IPV6)
6044                 if_printf(ifp, "IPv6 2-tuple hashing forced on.\n");
6045         if (extra & RSS_HASHTYPE_RSS_TCP_IPV6)
6046                 if_printf(ifp, "TCP/IPv6 4-tuple hashing forced on.\n");
6047         if (extra & RSS_HASHTYPE_RSS_UDP_IPV4)
6048                 if_printf(ifp, "UDP/IPv4 4-tuple hashing forced on.\n");
6049         if (extra & RSS_HASHTYPE_RSS_UDP_IPV6)
6050                 if_printf(ifp, "UDP/IPv6 4-tuple hashing forced on.\n");
6051 #else
6052         vi->hashen = F_FW_RSS_VI_CONFIG_CMD_IP6FOURTUPEN |
6053             F_FW_RSS_VI_CONFIG_CMD_IP6TWOTUPEN |
6054             F_FW_RSS_VI_CONFIG_CMD_IP4FOURTUPEN |
6055             F_FW_RSS_VI_CONFIG_CMD_IP4TWOTUPEN | F_FW_RSS_VI_CONFIG_CMD_UDPEN;
6056 #endif
6057         rc = -t4_config_vi_rss(sc, sc->mbox, vi->viid, vi->hashen, rss[0], 0, 0);
6058         if (rc != 0) {
6059                 free(rss, M_CXGBE);
6060                 if_printf(ifp, "rss hash/defaultq config failed: %d\n", rc);
6061                 goto done;
6062         }
6063
6064         vi->rss = rss;
6065         vi->flags |= VI_INIT_DONE;
6066 done:
6067         if (rc != 0)
6068                 vi_full_uninit(vi);
6069
6070         return (rc);
6071 }
6072
6073 /*
6074  * Idempotent.
6075  */
6076 int
6077 vi_full_uninit(struct vi_info *vi)
6078 {
6079         struct port_info *pi = vi->pi;
6080         struct adapter *sc = pi->adapter;
6081         int i;
6082         struct sge_rxq *rxq;
6083         struct sge_txq *txq;
6084 #ifdef TCP_OFFLOAD
6085         struct sge_ofld_rxq *ofld_rxq;
6086 #endif
6087 #if defined(TCP_OFFLOAD) || defined(RATELIMIT)
6088         struct sge_wrq *ofld_txq;
6089 #endif
6090
6091         if (vi->flags & VI_INIT_DONE) {
6092
6093                 /* Need to quiesce queues.  */
6094
6095                 /* XXX: Only for the first VI? */
6096                 if (IS_MAIN_VI(vi) && !(sc->flags & IS_VF))
6097                         quiesce_wrq(sc, &sc->sge.ctrlq[pi->port_id]);
6098
6099                 for_each_txq(vi, i, txq) {
6100                         quiesce_txq(sc, txq);
6101                 }
6102
6103 #if defined(TCP_OFFLOAD) || defined(RATELIMIT)
6104                 for_each_ofld_txq(vi, i, ofld_txq) {
6105                         quiesce_wrq(sc, ofld_txq);
6106                 }
6107 #endif
6108
6109                 for_each_rxq(vi, i, rxq) {
6110                         quiesce_iq(sc, &rxq->iq);
6111                         quiesce_fl(sc, &rxq->fl);
6112                 }
6113
6114 #ifdef TCP_OFFLOAD
6115                 for_each_ofld_rxq(vi, i, ofld_rxq) {
6116                         quiesce_iq(sc, &ofld_rxq->iq);
6117                         quiesce_fl(sc, &ofld_rxq->fl);
6118                 }
6119 #endif
6120                 free(vi->rss, M_CXGBE);
6121                 free(vi->nm_rss, M_CXGBE);
6122         }
6123
6124         t4_teardown_vi_queues(vi);
6125         vi->flags &= ~VI_INIT_DONE;
6126
6127         return (0);
6128 }
6129
6130 static void
6131 quiesce_txq(struct adapter *sc, struct sge_txq *txq)
6132 {
6133         struct sge_eq *eq = &txq->eq;
6134         struct sge_qstat *spg = (void *)&eq->desc[eq->sidx];
6135
6136         (void) sc;      /* unused */
6137
6138 #ifdef INVARIANTS
6139         TXQ_LOCK(txq);
6140         MPASS((eq->flags & EQ_ENABLED) == 0);
6141         TXQ_UNLOCK(txq);
6142 #endif
6143
6144         /* Wait for the mp_ring to empty. */
6145         while (!mp_ring_is_idle(txq->r)) {
6146                 mp_ring_check_drainage(txq->r, 4096);
6147                 pause("rquiesce", 1);
6148         }
6149
6150         /* Then wait for the hardware to finish. */
6151         while (spg->cidx != htobe16(eq->pidx))
6152                 pause("equiesce", 1);
6153
6154         /* Finally, wait for the driver to reclaim all descriptors. */
6155         while (eq->cidx != eq->pidx)
6156                 pause("dquiesce", 1);
6157 }
6158
6159 static void
6160 quiesce_wrq(struct adapter *sc, struct sge_wrq *wrq)
6161 {
6162
6163         /* XXXTX */
6164 }
6165
6166 static void
6167 quiesce_iq(struct adapter *sc, struct sge_iq *iq)
6168 {
6169         (void) sc;      /* unused */
6170
6171         /* Synchronize with the interrupt handler */
6172         while (!atomic_cmpset_int(&iq->state, IQS_IDLE, IQS_DISABLED))
6173                 pause("iqfree", 1);
6174 }
6175
6176 static void
6177 quiesce_fl(struct adapter *sc, struct sge_fl *fl)
6178 {
6179         mtx_lock(&sc->sfl_lock);
6180         FL_LOCK(fl);
6181         fl->flags |= FL_DOOMED;
6182         FL_UNLOCK(fl);
6183         callout_stop(&sc->sfl_callout);
6184         mtx_unlock(&sc->sfl_lock);
6185
6186         KASSERT((fl->flags & FL_STARVING) == 0,
6187             ("%s: still starving", __func__));
6188 }
6189
6190 static int
6191 t4_alloc_irq(struct adapter *sc, struct irq *irq, int rid,
6192     driver_intr_t *handler, void *arg, char *name)
6193 {
6194         int rc;
6195
6196         irq->rid = rid;
6197         irq->res = bus_alloc_resource_any(sc->dev, SYS_RES_IRQ, &irq->rid,
6198             RF_SHAREABLE | RF_ACTIVE);
6199         if (irq->res == NULL) {
6200                 device_printf(sc->dev,
6201                     "failed to allocate IRQ for rid %d, name %s.\n", rid, name);
6202                 return (ENOMEM);
6203         }
6204
6205         rc = bus_setup_intr(sc->dev, irq->res, INTR_MPSAFE | INTR_TYPE_NET,
6206             NULL, handler, arg, &irq->tag);
6207         if (rc != 0) {
6208                 device_printf(sc->dev,
6209                     "failed to setup interrupt for rid %d, name %s: %d\n",
6210                     rid, name, rc);
6211         } else if (name)
6212                 bus_describe_intr(sc->dev, irq->res, irq->tag, "%s", name);
6213
6214         return (rc);
6215 }
6216
6217 static int
6218 t4_free_irq(struct adapter *sc, struct irq *irq)
6219 {
6220         if (irq->tag)
6221                 bus_teardown_intr(sc->dev, irq->res, irq->tag);
6222         if (irq->res)
6223                 bus_release_resource(sc->dev, SYS_RES_IRQ, irq->rid, irq->res);
6224
6225         bzero(irq, sizeof(*irq));
6226
6227         return (0);
6228 }
6229
6230 static void
6231 get_regs(struct adapter *sc, struct t4_regdump *regs, uint8_t *buf)
6232 {
6233
6234         regs->version = chip_id(sc) | chip_rev(sc) << 10;
6235         t4_get_regs(sc, buf, regs->len);
6236 }
6237
6238 #define A_PL_INDIR_CMD  0x1f8
6239
6240 #define S_PL_AUTOINC    31
6241 #define M_PL_AUTOINC    0x1U
6242 #define V_PL_AUTOINC(x) ((x) << S_PL_AUTOINC)
6243 #define G_PL_AUTOINC(x) (((x) >> S_PL_AUTOINC) & M_PL_AUTOINC)
6244
6245 #define S_PL_VFID       20
6246 #define M_PL_VFID       0xffU
6247 #define V_PL_VFID(x)    ((x) << S_PL_VFID)
6248 #define G_PL_VFID(x)    (((x) >> S_PL_VFID) & M_PL_VFID)
6249
6250 #define S_PL_ADDR       0
6251 #define M_PL_ADDR       0xfffffU
6252 #define V_PL_ADDR(x)    ((x) << S_PL_ADDR)
6253 #define G_PL_ADDR(x)    (((x) >> S_PL_ADDR) & M_PL_ADDR)
6254
6255 #define A_PL_INDIR_DATA 0x1fc
6256
6257 static uint64_t
6258 read_vf_stat(struct adapter *sc, u_int vin, int reg)
6259 {
6260         u32 stats[2];
6261
6262         mtx_assert(&sc->reg_lock, MA_OWNED);
6263         if (sc->flags & IS_VF) {
6264                 stats[0] = t4_read_reg(sc, VF_MPS_REG(reg));
6265                 stats[1] = t4_read_reg(sc, VF_MPS_REG(reg + 4));
6266         } else {
6267                 t4_write_reg(sc, A_PL_INDIR_CMD, V_PL_AUTOINC(1) |
6268                     V_PL_VFID(vin) | V_PL_ADDR(VF_MPS_REG(reg)));
6269                 stats[0] = t4_read_reg(sc, A_PL_INDIR_DATA);
6270                 stats[1] = t4_read_reg(sc, A_PL_INDIR_DATA);
6271         }
6272         return (((uint64_t)stats[1]) << 32 | stats[0]);
6273 }
6274
6275 static void
6276 t4_get_vi_stats(struct adapter *sc, u_int vin, struct fw_vi_stats_vf *stats)
6277 {
6278
6279 #define GET_STAT(name) \
6280         read_vf_stat(sc, vin, A_MPS_VF_STAT_##name##_L)
6281
6282         stats->tx_bcast_bytes    = GET_STAT(TX_VF_BCAST_BYTES);
6283         stats->tx_bcast_frames   = GET_STAT(TX_VF_BCAST_FRAMES);
6284         stats->tx_mcast_bytes    = GET_STAT(TX_VF_MCAST_BYTES);
6285         stats->tx_mcast_frames   = GET_STAT(TX_VF_MCAST_FRAMES);
6286         stats->tx_ucast_bytes    = GET_STAT(TX_VF_UCAST_BYTES);
6287         stats->tx_ucast_frames   = GET_STAT(TX_VF_UCAST_FRAMES);
6288         stats->tx_drop_frames    = GET_STAT(TX_VF_DROP_FRAMES);
6289         stats->tx_offload_bytes  = GET_STAT(TX_VF_OFFLOAD_BYTES);
6290         stats->tx_offload_frames = GET_STAT(TX_VF_OFFLOAD_FRAMES);
6291         stats->rx_bcast_bytes    = GET_STAT(RX_VF_BCAST_BYTES);
6292         stats->rx_bcast_frames   = GET_STAT(RX_VF_BCAST_FRAMES);
6293         stats->rx_mcast_bytes    = GET_STAT(RX_VF_MCAST_BYTES);
6294         stats->rx_mcast_frames   = GET_STAT(RX_VF_MCAST_FRAMES);
6295         stats->rx_ucast_bytes    = GET_STAT(RX_VF_UCAST_BYTES);
6296         stats->rx_ucast_frames   = GET_STAT(RX_VF_UCAST_FRAMES);
6297         stats->rx_err_frames     = GET_STAT(RX_VF_ERR_FRAMES);
6298
6299 #undef GET_STAT
6300 }
6301
6302 static void
6303 t4_clr_vi_stats(struct adapter *sc, u_int vin)
6304 {
6305         int reg;
6306
6307         t4_write_reg(sc, A_PL_INDIR_CMD, V_PL_AUTOINC(1) | V_PL_VFID(vin) |
6308             V_PL_ADDR(VF_MPS_REG(A_MPS_VF_STAT_TX_VF_BCAST_BYTES_L)));
6309         for (reg = A_MPS_VF_STAT_TX_VF_BCAST_BYTES_L;
6310              reg <= A_MPS_VF_STAT_RX_VF_ERR_FRAMES_H; reg += 4)
6311                 t4_write_reg(sc, A_PL_INDIR_DATA, 0);
6312 }
6313
6314 static void
6315 vi_refresh_stats(struct adapter *sc, struct vi_info *vi)
6316 {
6317         struct timeval tv;
6318         const struct timeval interval = {0, 250000};    /* 250ms */
6319
6320         if (!(vi->flags & VI_INIT_DONE))
6321                 return;
6322
6323         getmicrotime(&tv);
6324         timevalsub(&tv, &interval);
6325         if (timevalcmp(&tv, &vi->last_refreshed, <))
6326                 return;
6327
6328         mtx_lock(&sc->reg_lock);
6329         t4_get_vi_stats(sc, vi->vin, &vi->stats);
6330         getmicrotime(&vi->last_refreshed);
6331         mtx_unlock(&sc->reg_lock);
6332 }
6333
6334 static void
6335 cxgbe_refresh_stats(struct adapter *sc, struct port_info *pi)
6336 {
6337         u_int i, v, tnl_cong_drops, chan_map;
6338         struct timeval tv;
6339         const struct timeval interval = {0, 250000};    /* 250ms */
6340
6341         getmicrotime(&tv);
6342         timevalsub(&tv, &interval);
6343         if (timevalcmp(&tv, &pi->last_refreshed, <))
6344                 return;
6345
6346         tnl_cong_drops = 0;
6347         t4_get_port_stats(sc, pi->tx_chan, &pi->stats);
6348         chan_map = pi->rx_e_chan_map;
6349         while (chan_map) {
6350                 i = ffs(chan_map) - 1;
6351                 mtx_lock(&sc->reg_lock);
6352                 t4_read_indirect(sc, A_TP_MIB_INDEX, A_TP_MIB_DATA, &v, 1,
6353                     A_TP_MIB_TNL_CNG_DROP_0 + i);
6354                 mtx_unlock(&sc->reg_lock);
6355                 tnl_cong_drops += v;
6356                 chan_map &= ~(1 << i);
6357         }
6358         pi->tnl_cong_drops = tnl_cong_drops;
6359         getmicrotime(&pi->last_refreshed);
6360 }
6361
6362 static void
6363 cxgbe_tick(void *arg)
6364 {
6365         struct port_info *pi = arg;
6366         struct adapter *sc = pi->adapter;
6367
6368         PORT_LOCK_ASSERT_OWNED(pi);
6369         cxgbe_refresh_stats(sc, pi);
6370
6371         callout_schedule(&pi->tick, hz);
6372 }
6373
6374 void
6375 vi_tick(void *arg)
6376 {
6377         struct vi_info *vi = arg;
6378         struct adapter *sc = vi->adapter;
6379
6380         vi_refresh_stats(sc, vi);
6381
6382         callout_schedule(&vi->tick, hz);
6383 }
6384
6385 /*
6386  * Should match fw_caps_config_<foo> enums in t4fw_interface.h
6387  */
6388 static char *caps_decoder[] = {
6389         "\20\001IPMI\002NCSI",                          /* 0: NBM */
6390         "\20\001PPP\002QFC\003DCBX",                    /* 1: link */
6391         "\20\001INGRESS\002EGRESS",                     /* 2: switch */
6392         "\20\001NIC\002VM\003IDS\004UM\005UM_ISGL"      /* 3: NIC */
6393             "\006HASHFILTER\007ETHOFLD",
6394         "\20\001TOE",                                   /* 4: TOE */
6395         "\20\001RDDP\002RDMAC",                         /* 5: RDMA */
6396         "\20\001INITIATOR_PDU\002TARGET_PDU"            /* 6: iSCSI */
6397             "\003INITIATOR_CNXOFLD\004TARGET_CNXOFLD"
6398             "\005INITIATOR_SSNOFLD\006TARGET_SSNOFLD"
6399             "\007T10DIF"
6400             "\010INITIATOR_CMDOFLD\011TARGET_CMDOFLD",
6401         "\20\001LOOKASIDE\002TLSKEYS",                  /* 7: Crypto */
6402         "\20\001INITIATOR\002TARGET\003CTRL_OFLD"       /* 8: FCoE */
6403                     "\004PO_INITIATOR\005PO_TARGET",
6404 };
6405
6406 void
6407 t4_sysctls(struct adapter *sc)
6408 {
6409         struct sysctl_ctx_list *ctx;
6410         struct sysctl_oid *oid;
6411         struct sysctl_oid_list *children, *c0;
6412         static char *doorbells = {"\20\1UDB\2WCWR\3UDBWC\4KDB"};
6413
6414         ctx = device_get_sysctl_ctx(sc->dev);
6415
6416         /*
6417          * dev.t4nex.X.
6418          */
6419         oid = device_get_sysctl_tree(sc->dev);
6420         c0 = children = SYSCTL_CHILDREN(oid);
6421
6422         sc->sc_do_rxcopy = 1;
6423         SYSCTL_ADD_INT(ctx, children, OID_AUTO, "do_rx_copy", CTLFLAG_RW,
6424             &sc->sc_do_rxcopy, 1, "Do RX copy of small frames");
6425
6426         SYSCTL_ADD_INT(ctx, children, OID_AUTO, "nports", CTLFLAG_RD, NULL,
6427             sc->params.nports, "# of ports");
6428
6429         SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "doorbells",
6430             CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, doorbells,
6431             (uintptr_t)&sc->doorbells, sysctl_bitfield_8b, "A",
6432             "available doorbells");
6433
6434         SYSCTL_ADD_INT(ctx, children, OID_AUTO, "core_clock", CTLFLAG_RD, NULL,
6435             sc->params.vpd.cclk, "core clock frequency (in KHz)");
6436
6437         SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "holdoff_timers",
6438             CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE,
6439             sc->params.sge.timer_val, sizeof(sc->params.sge.timer_val),
6440             sysctl_int_array, "A", "interrupt holdoff timer values (us)");
6441
6442         SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "holdoff_pkt_counts",
6443             CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE,
6444             sc->params.sge.counter_val, sizeof(sc->params.sge.counter_val),
6445             sysctl_int_array, "A", "interrupt holdoff packet counter values");
6446
6447         t4_sge_sysctls(sc, ctx, children);
6448
6449         sc->lro_timeout = 100;
6450         SYSCTL_ADD_INT(ctx, children, OID_AUTO, "lro_timeout", CTLFLAG_RW,
6451             &sc->lro_timeout, 0, "lro inactive-flush timeout (in us)");
6452
6453         SYSCTL_ADD_INT(ctx, children, OID_AUTO, "dflags", CTLFLAG_RW,
6454             &sc->debug_flags, 0, "flags to enable runtime debugging");
6455
6456         SYSCTL_ADD_STRING(ctx, children, OID_AUTO, "tp_version",
6457             CTLFLAG_RD, sc->tp_version, 0, "TP microcode version");
6458
6459         SYSCTL_ADD_STRING(ctx, children, OID_AUTO, "firmware_version",
6460             CTLFLAG_RD, sc->fw_version, 0, "firmware version");
6461
6462         if (sc->flags & IS_VF)
6463                 return;
6464
6465         SYSCTL_ADD_INT(ctx, children, OID_AUTO, "hw_revision", CTLFLAG_RD,
6466             NULL, chip_rev(sc), "chip hardware revision");
6467
6468         SYSCTL_ADD_STRING(ctx, children, OID_AUTO, "sn",
6469             CTLFLAG_RD, sc->params.vpd.sn, 0, "serial number");
6470
6471         SYSCTL_ADD_STRING(ctx, children, OID_AUTO, "pn",
6472             CTLFLAG_RD, sc->params.vpd.pn, 0, "part number");
6473
6474         SYSCTL_ADD_STRING(ctx, children, OID_AUTO, "ec",
6475             CTLFLAG_RD, sc->params.vpd.ec, 0, "engineering change");
6476
6477         SYSCTL_ADD_STRING(ctx, children, OID_AUTO, "md_version",
6478             CTLFLAG_RD, sc->params.vpd.md, 0, "manufacturing diags version");
6479
6480         SYSCTL_ADD_STRING(ctx, children, OID_AUTO, "na",
6481             CTLFLAG_RD, sc->params.vpd.na, 0, "network address");
6482
6483         SYSCTL_ADD_STRING(ctx, children, OID_AUTO, "er_version", CTLFLAG_RD,
6484             sc->er_version, 0, "expansion ROM version");
6485
6486         SYSCTL_ADD_STRING(ctx, children, OID_AUTO, "bs_version", CTLFLAG_RD,
6487             sc->bs_version, 0, "bootstrap firmware version");
6488
6489         SYSCTL_ADD_UINT(ctx, children, OID_AUTO, "scfg_version", CTLFLAG_RD,
6490             NULL, sc->params.scfg_vers, "serial config version");
6491
6492         SYSCTL_ADD_UINT(ctx, children, OID_AUTO, "vpd_version", CTLFLAG_RD,
6493             NULL, sc->params.vpd_vers, "VPD version");
6494
6495         SYSCTL_ADD_STRING(ctx, children, OID_AUTO, "cf",
6496             CTLFLAG_RD, sc->cfg_file, 0, "configuration file");
6497
6498         SYSCTL_ADD_UINT(ctx, children, OID_AUTO, "cfcsum", CTLFLAG_RD, NULL,
6499             sc->cfcsum, "config file checksum");
6500
6501 #define SYSCTL_CAP(name, n, text) \
6502         SYSCTL_ADD_PROC(ctx, children, OID_AUTO, #name, \
6503             CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, caps_decoder[n], \
6504             (uintptr_t)&sc->name, sysctl_bitfield_16b, "A", \
6505             "available " text " capabilities")
6506
6507         SYSCTL_CAP(nbmcaps, 0, "NBM");
6508         SYSCTL_CAP(linkcaps, 1, "link");
6509         SYSCTL_CAP(switchcaps, 2, "switch");
6510         SYSCTL_CAP(niccaps, 3, "NIC");
6511         SYSCTL_CAP(toecaps, 4, "TCP offload");
6512         SYSCTL_CAP(rdmacaps, 5, "RDMA");
6513         SYSCTL_CAP(iscsicaps, 6, "iSCSI");
6514         SYSCTL_CAP(cryptocaps, 7, "crypto");
6515         SYSCTL_CAP(fcoecaps, 8, "FCoE");
6516 #undef SYSCTL_CAP
6517
6518         SYSCTL_ADD_INT(ctx, children, OID_AUTO, "nfilters", CTLFLAG_RD,
6519             NULL, sc->tids.nftids, "number of filters");
6520
6521         SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "temperature",
6522             CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_MPSAFE, sc, 0,
6523             sysctl_temperature, "I", "chip temperature (in Celsius)");
6524         SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "reset_sensor",
6525             CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 0,
6526             sysctl_reset_sensor, "I", "reset the chip's temperature sensor.");
6527
6528         SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "loadavg",
6529             CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, sc, 0,
6530             sysctl_loadavg, "A",
6531             "microprocessor load averages (debug firmwares only)");
6532
6533         SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "core_vdd",
6534             CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_MPSAFE, sc, 0, sysctl_vdd,
6535             "I", "core Vdd (in mV)");
6536
6537         SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "local_cpus",
6538             CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, sc, LOCAL_CPUS,
6539             sysctl_cpus, "A", "local CPUs");
6540
6541         SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "intr_cpus",
6542             CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, sc, INTR_CPUS,
6543             sysctl_cpus, "A", "preferred CPUs for interrupts");
6544
6545         SYSCTL_ADD_INT(ctx, children, OID_AUTO, "swintr", CTLFLAG_RW,
6546             &sc->swintr, 0, "software triggered interrupts");
6547
6548         /*
6549          * dev.t4nex.X.misc.  Marked CTLFLAG_SKIP to avoid information overload.
6550          */
6551         oid = SYSCTL_ADD_NODE(ctx, c0, OID_AUTO, "misc",
6552             CTLFLAG_RD | CTLFLAG_SKIP | CTLFLAG_MPSAFE, NULL,
6553             "logs and miscellaneous information");
6554         children = SYSCTL_CHILDREN(oid);
6555
6556         SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "cctrl",
6557             CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, sc, 0,
6558             sysctl_cctrl, "A", "congestion control");
6559
6560         SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "cim_ibq_tp0",
6561             CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, sc, 0,
6562             sysctl_cim_ibq_obq, "A", "CIM IBQ 0 (TP0)");
6563
6564         SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "cim_ibq_tp1",
6565             CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, sc, 1,
6566             sysctl_cim_ibq_obq, "A", "CIM IBQ 1 (TP1)");
6567
6568         SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "cim_ibq_ulp",
6569             CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, sc, 2,
6570             sysctl_cim_ibq_obq, "A", "CIM IBQ 2 (ULP)");
6571
6572         SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "cim_ibq_sge0",
6573             CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, sc, 3,
6574             sysctl_cim_ibq_obq, "A", "CIM IBQ 3 (SGE0)");
6575
6576         SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "cim_ibq_sge1",
6577             CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, sc, 4,
6578             sysctl_cim_ibq_obq, "A", "CIM IBQ 4 (SGE1)");
6579
6580         SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "cim_ibq_ncsi",
6581             CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, sc, 5,
6582             sysctl_cim_ibq_obq, "A", "CIM IBQ 5 (NCSI)");
6583
6584         SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "cim_la",
6585             CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, sc, 0,
6586             sysctl_cim_la, "A", "CIM logic analyzer");
6587
6588         SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "cim_ma_la",
6589             CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, sc, 0,
6590             sysctl_cim_ma_la, "A", "CIM MA logic analyzer");
6591
6592         SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "cim_obq_ulp0",
6593             CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, sc,
6594             0 + CIM_NUM_IBQ, sysctl_cim_ibq_obq, "A", "CIM OBQ 0 (ULP0)");
6595
6596         SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "cim_obq_ulp1",
6597             CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, sc,
6598             1 + CIM_NUM_IBQ, sysctl_cim_ibq_obq, "A", "CIM OBQ 1 (ULP1)");
6599
6600         SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "cim_obq_ulp2",
6601             CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, sc,
6602             2 + CIM_NUM_IBQ, sysctl_cim_ibq_obq, "A", "CIM OBQ 2 (ULP2)");
6603
6604         SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "cim_obq_ulp3",
6605             CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, sc,
6606             3 + CIM_NUM_IBQ, sysctl_cim_ibq_obq, "A", "CIM OBQ 3 (ULP3)");
6607
6608         SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "cim_obq_sge",
6609             CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, sc,
6610             4 + CIM_NUM_IBQ, sysctl_cim_ibq_obq, "A", "CIM OBQ 4 (SGE)");
6611
6612         SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "cim_obq_ncsi",
6613             CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, sc,
6614             5 + CIM_NUM_IBQ, sysctl_cim_ibq_obq, "A", "CIM OBQ 5 (NCSI)");
6615
6616         if (chip_id(sc) > CHELSIO_T4) {
6617                 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "cim_obq_sge0_rx",
6618                     CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, sc,
6619                     6 + CIM_NUM_IBQ, sysctl_cim_ibq_obq, "A",
6620                     "CIM OBQ 6 (SGE0-RX)");
6621
6622                 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "cim_obq_sge1_rx",
6623                     CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, sc,
6624                     7 + CIM_NUM_IBQ, sysctl_cim_ibq_obq, "A",
6625                     "CIM OBQ 7 (SGE1-RX)");
6626         }
6627
6628         SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "cim_pif_la",
6629             CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, sc, 0,
6630             sysctl_cim_pif_la, "A", "CIM PIF logic analyzer");
6631
6632         SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "cim_qcfg",
6633             CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, sc, 0,
6634             sysctl_cim_qcfg, "A", "CIM queue configuration");
6635
6636         SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "cpl_stats",
6637             CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, sc, 0,
6638             sysctl_cpl_stats, "A", "CPL statistics");
6639
6640         SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "ddp_stats",
6641             CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, sc, 0,
6642             sysctl_ddp_stats, "A", "non-TCP DDP statistics");
6643
6644         SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "tid_stats",
6645             CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, sc, 0,
6646             sysctl_tid_stats, "A", "tid stats");
6647
6648         SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "devlog",
6649             CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, sc, 0,
6650             sysctl_devlog, "A", "firmware's device log");
6651
6652         SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "fcoe_stats",
6653             CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, sc, 0,
6654             sysctl_fcoe_stats, "A", "FCoE statistics");
6655
6656         SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "hw_sched",
6657             CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, sc, 0,
6658             sysctl_hw_sched, "A", "hardware scheduler ");
6659
6660         SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "l2t",
6661             CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, sc, 0,
6662             sysctl_l2t, "A", "hardware L2 table");
6663
6664         SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "smt",
6665             CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, sc, 0,
6666             sysctl_smt, "A", "hardware source MAC table");
6667
6668 #ifdef INET6
6669         SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "clip",
6670             CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, sc, 0,
6671             sysctl_clip, "A", "active CLIP table entries");
6672 #endif
6673
6674         SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "lb_stats",
6675             CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, sc, 0,
6676             sysctl_lb_stats, "A", "loopback statistics");
6677
6678         SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "meminfo",
6679             CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, sc, 0,
6680             sysctl_meminfo, "A", "memory regions");
6681
6682         SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "mps_tcam",
6683             CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, sc, 0,
6684             chip_id(sc) <= CHELSIO_T5 ? sysctl_mps_tcam : sysctl_mps_tcam_t6,
6685             "A", "MPS TCAM entries");
6686
6687         SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "path_mtus",
6688             CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, sc, 0,
6689             sysctl_path_mtus, "A", "path MTUs");
6690
6691         SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "pm_stats",
6692             CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, sc, 0,
6693             sysctl_pm_stats, "A", "PM statistics");
6694
6695         SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "rdma_stats",
6696             CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, sc, 0,
6697             sysctl_rdma_stats, "A", "RDMA statistics");
6698
6699         SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "tcp_stats",
6700             CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, sc, 0,
6701             sysctl_tcp_stats, "A", "TCP statistics");
6702
6703         SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "tids",
6704             CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, sc, 0,
6705             sysctl_tids, "A", "TID information");
6706
6707         SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "tp_err_stats",
6708             CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, sc, 0,
6709             sysctl_tp_err_stats, "A", "TP error statistics");
6710
6711         SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "tnl_stats",
6712             CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, sc, 0,
6713             sysctl_tnl_stats, "A", "TP tunnel statistics");
6714
6715         SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "tp_la_mask",
6716             CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 0,
6717             sysctl_tp_la_mask, "I", "TP logic analyzer event capture mask");
6718
6719         SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "tp_la",
6720             CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, sc, 0,
6721             sysctl_tp_la, "A", "TP logic analyzer");
6722
6723         SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "tx_rate",
6724             CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, sc, 0,
6725             sysctl_tx_rate, "A", "Tx rate");
6726
6727         SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "ulprx_la",
6728             CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, sc, 0,
6729             sysctl_ulprx_la, "A", "ULPRX logic analyzer");
6730
6731         if (chip_id(sc) >= CHELSIO_T5) {
6732                 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "wcwr_stats",
6733                     CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, sc, 0,
6734                     sysctl_wcwr_stats, "A", "write combined work requests");
6735         }
6736
6737 #ifdef KERN_TLS
6738         if (sc->flags & KERN_TLS_OK) {
6739                 /*
6740                  * dev.t4nex.0.tls.
6741                  */
6742                 oid = SYSCTL_ADD_NODE(ctx, c0, OID_AUTO, "tls",
6743                     CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, "KERN_TLS parameters");
6744                 children = SYSCTL_CHILDREN(oid);
6745
6746                 SYSCTL_ADD_INT(ctx, children, OID_AUTO, "inline_keys",
6747                     CTLFLAG_RW, &sc->tlst.inline_keys, 0, "Always pass TLS "
6748                     "keys in work requests (1) or attempt to store TLS keys "
6749                     "in card memory.");
6750                 SYSCTL_ADD_INT(ctx, children, OID_AUTO, "combo_wrs",
6751                     CTLFLAG_RW, &sc->tlst.combo_wrs, 0, "Attempt to combine "
6752                     "TCB field updates with TLS record work requests.");
6753         }
6754 #endif
6755
6756 #ifdef TCP_OFFLOAD
6757         if (is_offload(sc)) {
6758                 int i;
6759                 char s[4];
6760
6761                 /*
6762                  * dev.t4nex.X.toe.
6763                  */
6764                 oid = SYSCTL_ADD_NODE(ctx, c0, OID_AUTO, "toe",
6765                     CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, "TOE parameters");
6766                 children = SYSCTL_CHILDREN(oid);
6767
6768                 sc->tt.cong_algorithm = -1;
6769                 SYSCTL_ADD_INT(ctx, children, OID_AUTO, "cong_algorithm",
6770                     CTLFLAG_RW, &sc->tt.cong_algorithm, 0, "congestion control "
6771                     "(-1 = default, 0 = reno, 1 = tahoe, 2 = newreno, "
6772                     "3 = highspeed)");
6773
6774                 sc->tt.sndbuf = -1;
6775                 SYSCTL_ADD_INT(ctx, children, OID_AUTO, "sndbuf", CTLFLAG_RW,
6776                     &sc->tt.sndbuf, 0, "hardware send buffer");
6777
6778                 sc->tt.ddp = 0;
6779                 SYSCTL_ADD_INT(ctx, children, OID_AUTO, "ddp",
6780                     CTLFLAG_RW | CTLFLAG_SKIP, &sc->tt.ddp, 0, "");
6781                 SYSCTL_ADD_INT(ctx, children, OID_AUTO, "rx_zcopy", CTLFLAG_RW,
6782                     &sc->tt.ddp, 0, "Enable zero-copy aio_read(2)");
6783
6784                 sc->tt.rx_coalesce = -1;
6785                 SYSCTL_ADD_INT(ctx, children, OID_AUTO, "rx_coalesce",
6786                     CTLFLAG_RW, &sc->tt.rx_coalesce, 0, "receive coalescing");
6787
6788                 sc->tt.tls = 0;
6789                 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "tls", CTLTYPE_INT |
6790                     CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 0, sysctl_tls, "I",
6791                     "Inline TLS allowed");
6792
6793                 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "tls_rx_ports",
6794                     CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 0,
6795                     sysctl_tls_rx_ports, "I",
6796                     "TCP ports that use inline TLS+TOE RX");
6797
6798                 sc->tt.tls_rx_timeout = t4_toe_tls_rx_timeout;
6799                 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "tls_rx_timeout",
6800                     CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 0,
6801                     sysctl_tls_rx_timeout, "I",
6802                     "Timeout in seconds to downgrade TLS sockets to plain TOE");
6803
6804                 sc->tt.tx_align = -1;
6805                 SYSCTL_ADD_INT(ctx, children, OID_AUTO, "tx_align",
6806                     CTLFLAG_RW, &sc->tt.tx_align, 0, "chop and align payload");
6807
6808                 sc->tt.tx_zcopy = 0;
6809                 SYSCTL_ADD_INT(ctx, children, OID_AUTO, "tx_zcopy",
6810                     CTLFLAG_RW, &sc->tt.tx_zcopy, 0,
6811                     "Enable zero-copy aio_write(2)");
6812
6813                 sc->tt.cop_managed_offloading = !!t4_cop_managed_offloading;
6814                 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
6815                     "cop_managed_offloading", CTLFLAG_RW,
6816                     &sc->tt.cop_managed_offloading, 0,
6817                     "COP (Connection Offload Policy) controls all TOE offload");
6818
6819                 sc->tt.autorcvbuf_inc = 16 * 1024;
6820                 SYSCTL_ADD_INT(ctx, children, OID_AUTO, "autorcvbuf_inc",
6821                     CTLFLAG_RW, &sc->tt.autorcvbuf_inc, 0,
6822                     "autorcvbuf increment");
6823
6824                 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "timer_tick",
6825                     CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, sc, 0,
6826                     sysctl_tp_tick, "A", "TP timer tick (us)");
6827
6828                 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "timestamp_tick",
6829                     CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, sc, 1,
6830                     sysctl_tp_tick, "A", "TCP timestamp tick (us)");
6831
6832                 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "dack_tick",
6833                     CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, sc, 2,
6834                     sysctl_tp_tick, "A", "DACK tick (us)");
6835
6836                 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "dack_timer",
6837                     CTLTYPE_UINT | CTLFLAG_RD | CTLFLAG_MPSAFE, sc, 0,
6838                     sysctl_tp_dack_timer, "IU", "DACK timer (us)");
6839
6840                 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "rexmt_min",
6841                     CTLTYPE_ULONG | CTLFLAG_RD | CTLFLAG_MPSAFE, sc,
6842                     A_TP_RXT_MIN, sysctl_tp_timer, "LU",
6843                     "Minimum retransmit interval (us)");
6844
6845                 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "rexmt_max",
6846                     CTLTYPE_ULONG | CTLFLAG_RD | CTLFLAG_MPSAFE, sc,
6847                     A_TP_RXT_MAX, sysctl_tp_timer, "LU",
6848                     "Maximum retransmit interval (us)");
6849
6850                 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "persist_min",
6851                     CTLTYPE_ULONG | CTLFLAG_RD | CTLFLAG_MPSAFE, sc,
6852                     A_TP_PERS_MIN, sysctl_tp_timer, "LU",
6853                     "Persist timer min (us)");
6854
6855                 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "persist_max",
6856                     CTLTYPE_ULONG | CTLFLAG_RD | CTLFLAG_MPSAFE, sc,
6857                     A_TP_PERS_MAX, sysctl_tp_timer, "LU",
6858                     "Persist timer max (us)");
6859
6860                 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "keepalive_idle",
6861                     CTLTYPE_ULONG | CTLFLAG_RD | CTLFLAG_MPSAFE, sc,
6862                     A_TP_KEEP_IDLE, sysctl_tp_timer, "LU",
6863                     "Keepalive idle timer (us)");
6864
6865                 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "keepalive_interval",
6866                     CTLTYPE_ULONG | CTLFLAG_RD | CTLFLAG_MPSAFE, sc,
6867                     A_TP_KEEP_INTVL, sysctl_tp_timer, "LU",
6868                     "Keepalive interval timer (us)");
6869
6870                 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "initial_srtt",
6871                     CTLTYPE_ULONG | CTLFLAG_RD | CTLFLAG_MPSAFE, sc,
6872                     A_TP_INIT_SRTT, sysctl_tp_timer, "LU", "Initial SRTT (us)");
6873
6874                 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "finwait2_timer",
6875                     CTLTYPE_ULONG | CTLFLAG_RD | CTLFLAG_MPSAFE, sc,
6876                     A_TP_FINWAIT2_TIMER, sysctl_tp_timer, "LU",
6877                     "FINWAIT2 timer (us)");
6878
6879                 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "syn_rexmt_count",
6880                     CTLTYPE_UINT | CTLFLAG_RD | CTLFLAG_MPSAFE, sc,
6881                     S_SYNSHIFTMAX, sysctl_tp_shift_cnt, "IU",
6882                     "Number of SYN retransmissions before abort");
6883
6884                 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "rexmt_count",
6885                     CTLTYPE_UINT | CTLFLAG_RD | CTLFLAG_MPSAFE, sc,
6886                     S_RXTSHIFTMAXR2, sysctl_tp_shift_cnt, "IU",
6887                     "Number of retransmissions before abort");
6888
6889                 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "keepalive_count",
6890                     CTLTYPE_UINT | CTLFLAG_RD | CTLFLAG_MPSAFE, sc,
6891                     S_KEEPALIVEMAXR2, sysctl_tp_shift_cnt, "IU",
6892                     "Number of keepalive probes before abort");
6893
6894                 oid = SYSCTL_ADD_NODE(ctx, children, OID_AUTO, "rexmt_backoff",
6895                     CTLFLAG_RD | CTLFLAG_MPSAFE, NULL,
6896                     "TOE retransmit backoffs");
6897                 children = SYSCTL_CHILDREN(oid);
6898                 for (i = 0; i < 16; i++) {
6899                         snprintf(s, sizeof(s), "%u", i);
6900                         SYSCTL_ADD_PROC(ctx, children, OID_AUTO, s,
6901                             CTLTYPE_UINT | CTLFLAG_RD | CTLFLAG_MPSAFE, sc,
6902                             i, sysctl_tp_backoff, "IU",
6903                             "TOE retransmit backoff");
6904                 }
6905         }
6906 #endif
6907 }
6908
6909 void
6910 vi_sysctls(struct vi_info *vi)
6911 {
6912         struct sysctl_ctx_list *ctx;
6913         struct sysctl_oid *oid;
6914         struct sysctl_oid_list *children;
6915
6916         ctx = device_get_sysctl_ctx(vi->dev);
6917
6918         /*
6919          * dev.v?(cxgbe|cxl).X.
6920          */
6921         oid = device_get_sysctl_tree(vi->dev);
6922         children = SYSCTL_CHILDREN(oid);
6923
6924         SYSCTL_ADD_UINT(ctx, children, OID_AUTO, "viid", CTLFLAG_RD, NULL,
6925             vi->viid, "VI identifer");
6926         SYSCTL_ADD_INT(ctx, children, OID_AUTO, "nrxq", CTLFLAG_RD,
6927             &vi->nrxq, 0, "# of rx queues");
6928         SYSCTL_ADD_INT(ctx, children, OID_AUTO, "ntxq", CTLFLAG_RD,
6929             &vi->ntxq, 0, "# of tx queues");
6930         SYSCTL_ADD_INT(ctx, children, OID_AUTO, "first_rxq", CTLFLAG_RD,
6931             &vi->first_rxq, 0, "index of first rx queue");
6932         SYSCTL_ADD_INT(ctx, children, OID_AUTO, "first_txq", CTLFLAG_RD,
6933             &vi->first_txq, 0, "index of first tx queue");
6934         SYSCTL_ADD_UINT(ctx, children, OID_AUTO, "rss_base", CTLFLAG_RD, NULL,
6935             vi->rss_base, "start of RSS indirection table");
6936         SYSCTL_ADD_UINT(ctx, children, OID_AUTO, "rss_size", CTLFLAG_RD, NULL,
6937             vi->rss_size, "size of RSS indirection table");
6938
6939         if (IS_MAIN_VI(vi)) {
6940                 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "rsrv_noflowq",
6941                     CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, vi, 0,
6942                     sysctl_noflowq, "IU",
6943                     "Reserve queue 0 for non-flowid packets");
6944         }
6945
6946         if (vi->adapter->flags & IS_VF) {
6947                 MPASS(vi->flags & TX_USES_VM_WR);
6948                 SYSCTL_ADD_UINT(ctx, children, OID_AUTO, "tx_vm_wr", CTLFLAG_RD,
6949                     NULL, 1, "use VM work requests for transmit");
6950         } else {
6951                 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "tx_vm_wr",
6952                     CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, vi, 0,
6953                     sysctl_tx_vm_wr, "I", "use VM work requestes for transmit");
6954         }
6955
6956 #ifdef TCP_OFFLOAD
6957         if (vi->nofldrxq != 0) {
6958                 SYSCTL_ADD_INT(ctx, children, OID_AUTO, "nofldrxq", CTLFLAG_RD,
6959                     &vi->nofldrxq, 0,
6960                     "# of rx queues for offloaded TCP connections");
6961                 SYSCTL_ADD_INT(ctx, children, OID_AUTO, "first_ofld_rxq",
6962                     CTLFLAG_RD, &vi->first_ofld_rxq, 0,
6963                     "index of first TOE rx queue");
6964                 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "holdoff_tmr_idx_ofld",
6965                     CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, vi, 0,
6966                     sysctl_holdoff_tmr_idx_ofld, "I",
6967                     "holdoff timer index for TOE queues");
6968                 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "holdoff_pktc_idx_ofld",
6969                     CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, vi, 0,
6970                     sysctl_holdoff_pktc_idx_ofld, "I",
6971                     "holdoff packet counter index for TOE queues");
6972         }
6973 #endif
6974 #if defined(TCP_OFFLOAD) || defined(RATELIMIT)
6975         if (vi->nofldtxq != 0) {
6976                 SYSCTL_ADD_INT(ctx, children, OID_AUTO, "nofldtxq", CTLFLAG_RD,
6977                     &vi->nofldtxq, 0,
6978                     "# of tx queues for TOE/ETHOFLD");
6979                 SYSCTL_ADD_INT(ctx, children, OID_AUTO, "first_ofld_txq",
6980                     CTLFLAG_RD, &vi->first_ofld_txq, 0,
6981                     "index of first TOE/ETHOFLD tx queue");
6982         }
6983 #endif
6984 #ifdef DEV_NETMAP
6985         if (vi->nnmrxq != 0) {
6986                 SYSCTL_ADD_INT(ctx, children, OID_AUTO, "nnmrxq", CTLFLAG_RD,
6987                     &vi->nnmrxq, 0, "# of netmap rx queues");
6988                 SYSCTL_ADD_INT(ctx, children, OID_AUTO, "nnmtxq", CTLFLAG_RD,
6989                     &vi->nnmtxq, 0, "# of netmap tx queues");
6990                 SYSCTL_ADD_INT(ctx, children, OID_AUTO, "first_nm_rxq",
6991                     CTLFLAG_RD, &vi->first_nm_rxq, 0,
6992                     "index of first netmap rx queue");
6993                 SYSCTL_ADD_INT(ctx, children, OID_AUTO, "first_nm_txq",
6994                     CTLFLAG_RD, &vi->first_nm_txq, 0,
6995                     "index of first netmap tx queue");
6996         }
6997 #endif
6998
6999         SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "holdoff_tmr_idx",
7000             CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, vi, 0,
7001             sysctl_holdoff_tmr_idx, "I", "holdoff timer index");
7002         SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "holdoff_pktc_idx",
7003             CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, vi, 0,
7004             sysctl_holdoff_pktc_idx, "I", "holdoff packet counter index");
7005
7006         SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "qsize_rxq",
7007             CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, vi, 0,
7008             sysctl_qsize_rxq, "I", "rx queue size");
7009         SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "qsize_txq",
7010             CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, vi, 0,
7011             sysctl_qsize_txq, "I", "tx queue size");
7012 }
7013
7014 static void
7015 cxgbe_sysctls(struct port_info *pi)
7016 {
7017         struct sysctl_ctx_list *ctx;
7018         struct sysctl_oid *oid;
7019         struct sysctl_oid_list *children, *children2;
7020         struct adapter *sc = pi->adapter;
7021         int i;
7022         char name[16];
7023         static char *tc_flags = {"\20\1USER\2SYNC\3ASYNC\4ERR"};
7024
7025         ctx = device_get_sysctl_ctx(pi->dev);
7026
7027         /*
7028          * dev.cxgbe.X.
7029          */
7030         oid = device_get_sysctl_tree(pi->dev);
7031         children = SYSCTL_CHILDREN(oid);
7032
7033         SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "linkdnrc",
7034             CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, pi, 0,
7035             sysctl_linkdnrc, "A", "reason why link is down");
7036         if (pi->port_type == FW_PORT_TYPE_BT_XAUI) {
7037                 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "temperature",
7038                     CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_MPSAFE, pi, 0,
7039                     sysctl_btphy, "I", "PHY temperature (in Celsius)");
7040                 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "fw_version",
7041                     CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_MPSAFE, pi, 1,
7042                     sysctl_btphy, "I", "PHY firmware version");
7043         }
7044
7045         SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "pause_settings",
7046             CTLTYPE_STRING | CTLFLAG_RW | CTLFLAG_MPSAFE, pi, 0,
7047             sysctl_pause_settings, "A",
7048             "PAUSE settings (bit 0 = rx_pause, 1 = tx_pause, 2 = pause_autoneg)");
7049         SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "fec",
7050             CTLTYPE_STRING | CTLFLAG_RW | CTLFLAG_MPSAFE, pi, 0,
7051             sysctl_fec, "A",
7052             "FECs to use (bit 0 = RS, 1 = FC, 2 = none, 5 = auto, 6 = module)");
7053         SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "module_fec",
7054             CTLTYPE_STRING | CTLFLAG_MPSAFE, pi, 0, sysctl_module_fec, "A",
7055             "FEC recommended by the cable/transceiver");
7056         SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "autoneg",
7057             CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, pi, 0,
7058             sysctl_autoneg, "I",
7059             "autonegotiation (-1 = not supported)");
7060
7061         SYSCTL_ADD_INT(ctx, children, OID_AUTO, "pcaps", CTLFLAG_RD,
7062             &pi->link_cfg.pcaps, 0, "port capabilities");
7063         SYSCTL_ADD_INT(ctx, children, OID_AUTO, "acaps", CTLFLAG_RD,
7064             &pi->link_cfg.acaps, 0, "advertised capabilities");
7065         SYSCTL_ADD_INT(ctx, children, OID_AUTO, "lpacaps", CTLFLAG_RD,
7066             &pi->link_cfg.lpacaps, 0, "link partner advertised capabilities");
7067
7068         SYSCTL_ADD_INT(ctx, children, OID_AUTO, "max_speed", CTLFLAG_RD, NULL,
7069             port_top_speed(pi), "max speed (in Gbps)");
7070         SYSCTL_ADD_INT(ctx, children, OID_AUTO, "mps_bg_map", CTLFLAG_RD, NULL,
7071             pi->mps_bg_map, "MPS buffer group map");
7072         SYSCTL_ADD_INT(ctx, children, OID_AUTO, "rx_e_chan_map", CTLFLAG_RD,
7073             NULL, pi->rx_e_chan_map, "TP rx e-channel map");
7074
7075         if (sc->flags & IS_VF)
7076                 return;
7077
7078         /*
7079          * dev.(cxgbe|cxl).X.tc.
7080          */
7081         oid = SYSCTL_ADD_NODE(ctx, children, OID_AUTO, "tc",
7082             CTLFLAG_RD | CTLFLAG_MPSAFE, NULL,
7083             "Tx scheduler traffic classes (cl_rl)");
7084         children2 = SYSCTL_CHILDREN(oid);
7085         SYSCTL_ADD_UINT(ctx, children2, OID_AUTO, "pktsize",
7086             CTLFLAG_RW, &pi->sched_params->pktsize, 0,
7087             "pktsize for per-flow cl-rl (0 means up to the driver )");
7088         SYSCTL_ADD_UINT(ctx, children2, OID_AUTO, "burstsize",
7089             CTLFLAG_RW, &pi->sched_params->burstsize, 0,
7090             "burstsize for per-flow cl-rl (0 means up to the driver)");
7091         for (i = 0; i < sc->chip_params->nsched_cls; i++) {
7092                 struct tx_cl_rl_params *tc = &pi->sched_params->cl_rl[i];
7093
7094                 snprintf(name, sizeof(name), "%d", i);
7095                 children2 = SYSCTL_CHILDREN(SYSCTL_ADD_NODE(ctx,
7096                     SYSCTL_CHILDREN(oid), OID_AUTO, name,
7097                     CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, "traffic class"));
7098                 SYSCTL_ADD_PROC(ctx, children2, OID_AUTO, "flags",
7099                     CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, tc_flags,
7100                     (uintptr_t)&tc->flags, sysctl_bitfield_8b, "A", "flags");
7101                 SYSCTL_ADD_UINT(ctx, children2, OID_AUTO, "refcount",
7102                     CTLFLAG_RD, &tc->refcount, 0, "references to this class");
7103                 SYSCTL_ADD_PROC(ctx, children2, OID_AUTO, "params",
7104                     CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, sc,
7105                     (pi->port_id << 16) | i, sysctl_tc_params, "A",
7106                     "traffic class parameters");
7107         }
7108
7109         /*
7110          * dev.cxgbe.X.stats.
7111          */
7112         oid = SYSCTL_ADD_NODE(ctx, children, OID_AUTO, "stats",
7113             CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, "port statistics");
7114         children = SYSCTL_CHILDREN(oid);
7115         SYSCTL_ADD_UINT(ctx, children, OID_AUTO, "tx_parse_error", CTLFLAG_RD,
7116             &pi->tx_parse_error, 0,
7117             "# of tx packets with invalid length or # of segments");
7118
7119 #define T4_REGSTAT(name, stat, desc) \
7120     SYSCTL_ADD_OID(ctx, children, OID_AUTO, #name, \
7121         CTLTYPE_U64 | CTLFLAG_RD | CTLFLAG_MPSAFE, sc, \
7122         (is_t4(sc) ? PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_##stat##_L) : \
7123         T5_PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_##stat##_L)), \
7124         sysctl_handle_t4_reg64, "QU", desc)
7125
7126 /* We get these from port_stats and they may be stale by up to 1s */
7127 #define T4_PORTSTAT(name, desc) \
7128         SYSCTL_ADD_UQUAD(ctx, children, OID_AUTO, #name, CTLFLAG_RD, \
7129             &pi->stats.name, desc)
7130
7131         T4_REGSTAT(tx_octets, TX_PORT_BYTES, "# of octets in good frames");
7132         T4_REGSTAT(tx_frames, TX_PORT_FRAMES, "total # of good frames");
7133         T4_REGSTAT(tx_bcast_frames, TX_PORT_BCAST, "# of broadcast frames");
7134         T4_REGSTAT(tx_mcast_frames, TX_PORT_MCAST, "# of multicast frames");
7135         T4_REGSTAT(tx_ucast_frames, TX_PORT_UCAST, "# of unicast frames");
7136         T4_REGSTAT(tx_error_frames, TX_PORT_ERROR, "# of error frames");
7137         T4_REGSTAT(tx_frames_64, TX_PORT_64B, "# of tx frames in this range");
7138         T4_REGSTAT(tx_frames_65_127, TX_PORT_65B_127B, "# of tx frames in this range");
7139         T4_REGSTAT(tx_frames_128_255, TX_PORT_128B_255B, "# of tx frames in this range");
7140         T4_REGSTAT(tx_frames_256_511, TX_PORT_256B_511B, "# of tx frames in this range");
7141         T4_REGSTAT(tx_frames_512_1023, TX_PORT_512B_1023B, "# of tx frames in this range");
7142         T4_REGSTAT(tx_frames_1024_1518, TX_PORT_1024B_1518B, "# of tx frames in this range");
7143         T4_REGSTAT(tx_frames_1519_max, TX_PORT_1519B_MAX, "# of tx frames in this range");
7144         T4_REGSTAT(tx_drop, TX_PORT_DROP, "# of dropped tx frames");
7145         T4_REGSTAT(tx_pause, TX_PORT_PAUSE, "# of pause frames transmitted");
7146         T4_REGSTAT(tx_ppp0, TX_PORT_PPP0, "# of PPP prio 0 frames transmitted");
7147         T4_REGSTAT(tx_ppp1, TX_PORT_PPP1, "# of PPP prio 1 frames transmitted");
7148         T4_REGSTAT(tx_ppp2, TX_PORT_PPP2, "# of PPP prio 2 frames transmitted");
7149         T4_REGSTAT(tx_ppp3, TX_PORT_PPP3, "# of PPP prio 3 frames transmitted");
7150         T4_REGSTAT(tx_ppp4, TX_PORT_PPP4, "# of PPP prio 4 frames transmitted");
7151         T4_REGSTAT(tx_ppp5, TX_PORT_PPP5, "# of PPP prio 5 frames transmitted");
7152         T4_REGSTAT(tx_ppp6, TX_PORT_PPP6, "# of PPP prio 6 frames transmitted");
7153         T4_REGSTAT(tx_ppp7, TX_PORT_PPP7, "# of PPP prio 7 frames transmitted");
7154
7155         T4_REGSTAT(rx_octets, RX_PORT_BYTES, "# of octets in good frames");
7156         T4_REGSTAT(rx_frames, RX_PORT_FRAMES, "total # of good frames");
7157         T4_REGSTAT(rx_bcast_frames, RX_PORT_BCAST, "# of broadcast frames");
7158         T4_REGSTAT(rx_mcast_frames, RX_PORT_MCAST, "# of multicast frames");
7159         T4_REGSTAT(rx_ucast_frames, RX_PORT_UCAST, "# of unicast frames");
7160         T4_REGSTAT(rx_too_long, RX_PORT_MTU_ERROR, "# of frames exceeding MTU");
7161         T4_REGSTAT(rx_jabber, RX_PORT_MTU_CRC_ERROR, "# of jabber frames");
7162         if (is_t6(sc)) {
7163                 T4_PORTSTAT(rx_fcs_err,
7164                     "# of frames received with bad FCS since last link up");
7165         } else {
7166                 T4_REGSTAT(rx_fcs_err, RX_PORT_CRC_ERROR,
7167                     "# of frames received with bad FCS");
7168         }
7169         T4_REGSTAT(rx_len_err, RX_PORT_LEN_ERROR, "# of frames received with length error");
7170         T4_REGSTAT(rx_symbol_err, RX_PORT_SYM_ERROR, "symbol errors");
7171         T4_REGSTAT(rx_runt, RX_PORT_LESS_64B, "# of short frames received");
7172         T4_REGSTAT(rx_frames_64, RX_PORT_64B, "# of rx frames in this range");
7173         T4_REGSTAT(rx_frames_65_127, RX_PORT_65B_127B, "# of rx frames in this range");
7174         T4_REGSTAT(rx_frames_128_255, RX_PORT_128B_255B, "# of rx frames in this range");
7175         T4_REGSTAT(rx_frames_256_511, RX_PORT_256B_511B, "# of rx frames in this range");
7176         T4_REGSTAT(rx_frames_512_1023, RX_PORT_512B_1023B, "# of rx frames in this range");
7177         T4_REGSTAT(rx_frames_1024_1518, RX_PORT_1024B_1518B, "# of rx frames in this range");
7178         T4_REGSTAT(rx_frames_1519_max, RX_PORT_1519B_MAX, "# of rx frames in this range");
7179         T4_REGSTAT(rx_pause, RX_PORT_PAUSE, "# of pause frames received");
7180         T4_REGSTAT(rx_ppp0, RX_PORT_PPP0, "# of PPP prio 0 frames received");
7181         T4_REGSTAT(rx_ppp1, RX_PORT_PPP1, "# of PPP prio 1 frames received");
7182         T4_REGSTAT(rx_ppp2, RX_PORT_PPP2, "# of PPP prio 2 frames received");
7183         T4_REGSTAT(rx_ppp3, RX_PORT_PPP3, "# of PPP prio 3 frames received");
7184         T4_REGSTAT(rx_ppp4, RX_PORT_PPP4, "# of PPP prio 4 frames received");
7185         T4_REGSTAT(rx_ppp5, RX_PORT_PPP5, "# of PPP prio 5 frames received");
7186         T4_REGSTAT(rx_ppp6, RX_PORT_PPP6, "# of PPP prio 6 frames received");
7187         T4_REGSTAT(rx_ppp7, RX_PORT_PPP7, "# of PPP prio 7 frames received");
7188
7189         T4_PORTSTAT(rx_ovflow0, "# drops due to buffer-group 0 overflows");
7190         T4_PORTSTAT(rx_ovflow1, "# drops due to buffer-group 1 overflows");
7191         T4_PORTSTAT(rx_ovflow2, "# drops due to buffer-group 2 overflows");
7192         T4_PORTSTAT(rx_ovflow3, "# drops due to buffer-group 3 overflows");
7193         T4_PORTSTAT(rx_trunc0, "# of buffer-group 0 truncated packets");
7194         T4_PORTSTAT(rx_trunc1, "# of buffer-group 1 truncated packets");
7195         T4_PORTSTAT(rx_trunc2, "# of buffer-group 2 truncated packets");
7196         T4_PORTSTAT(rx_trunc3, "# of buffer-group 3 truncated packets");
7197
7198 #undef T4_REGSTAT
7199 #undef T4_PORTSTAT
7200
7201         SYSCTL_ADD_ULONG(ctx, children, OID_AUTO, "tx_toe_tls_records",
7202             CTLFLAG_RD, &pi->tx_toe_tls_records,
7203             "# of TOE TLS records transmitted");
7204         SYSCTL_ADD_ULONG(ctx, children, OID_AUTO, "tx_toe_tls_octets",
7205             CTLFLAG_RD, &pi->tx_toe_tls_octets,
7206             "# of payload octets in transmitted TOE TLS records");
7207         SYSCTL_ADD_ULONG(ctx, children, OID_AUTO, "rx_toe_tls_records",
7208             CTLFLAG_RD, &pi->rx_toe_tls_records,
7209             "# of TOE TLS records received");
7210         SYSCTL_ADD_ULONG(ctx, children, OID_AUTO, "rx_toe_tls_octets",
7211             CTLFLAG_RD, &pi->rx_toe_tls_octets,
7212             "# of payload octets in received TOE TLS records");
7213 }
7214
7215 static int
7216 sysctl_int_array(SYSCTL_HANDLER_ARGS)
7217 {
7218         int rc, *i, space = 0;
7219         struct sbuf sb;
7220
7221         sbuf_new_for_sysctl(&sb, NULL, 64, req);
7222         for (i = arg1; arg2; arg2 -= sizeof(int), i++) {
7223                 if (space)
7224                         sbuf_printf(&sb, " ");
7225                 sbuf_printf(&sb, "%d", *i);
7226                 space = 1;
7227         }
7228         rc = sbuf_finish(&sb);
7229         sbuf_delete(&sb);
7230         return (rc);
7231 }
7232
7233 static int
7234 sysctl_bitfield_8b(SYSCTL_HANDLER_ARGS)
7235 {
7236         int rc;
7237         struct sbuf *sb;
7238
7239         rc = sysctl_wire_old_buffer(req, 0);
7240         if (rc != 0)
7241                 return(rc);
7242
7243         sb = sbuf_new_for_sysctl(NULL, NULL, 128, req);
7244         if (sb == NULL)
7245                 return (ENOMEM);
7246
7247         sbuf_printf(sb, "%b", *(uint8_t *)(uintptr_t)arg2, (char *)arg1);
7248         rc = sbuf_finish(sb);
7249         sbuf_delete(sb);
7250
7251         return (rc);
7252 }
7253
7254 static int
7255 sysctl_bitfield_16b(SYSCTL_HANDLER_ARGS)
7256 {
7257         int rc;
7258         struct sbuf *sb;
7259
7260         rc = sysctl_wire_old_buffer(req, 0);
7261         if (rc != 0)
7262                 return(rc);
7263
7264         sb = sbuf_new_for_sysctl(NULL, NULL, 128, req);
7265         if (sb == NULL)
7266                 return (ENOMEM);
7267
7268         sbuf_printf(sb, "%b", *(uint16_t *)(uintptr_t)arg2, (char *)arg1);
7269         rc = sbuf_finish(sb);
7270         sbuf_delete(sb);
7271
7272         return (rc);
7273 }
7274
7275 static int
7276 sysctl_btphy(SYSCTL_HANDLER_ARGS)
7277 {
7278         struct port_info *pi = arg1;
7279         int op = arg2;
7280         struct adapter *sc = pi->adapter;
7281         u_int v;
7282         int rc;
7283
7284         rc = begin_synchronized_op(sc, &pi->vi[0], SLEEP_OK | INTR_OK, "t4btt");
7285         if (rc)
7286                 return (rc);
7287         /* XXX: magic numbers */
7288         rc = -t4_mdio_rd(sc, sc->mbox, pi->mdio_addr, 0x1e, op ? 0x20 : 0xc820,
7289             &v);
7290         end_synchronized_op(sc, 0);
7291         if (rc)
7292                 return (rc);
7293         if (op == 0)
7294                 v /= 256;
7295
7296         rc = sysctl_handle_int(oidp, &v, 0, req);
7297         return (rc);
7298 }
7299
7300 static int
7301 sysctl_noflowq(SYSCTL_HANDLER_ARGS)
7302 {
7303         struct vi_info *vi = arg1;
7304         int rc, val;
7305
7306         val = vi->rsrv_noflowq;
7307         rc = sysctl_handle_int(oidp, &val, 0, req);
7308         if (rc != 0 || req->newptr == NULL)
7309                 return (rc);
7310
7311         if ((val >= 1) && (vi->ntxq > 1))
7312                 vi->rsrv_noflowq = 1;
7313         else
7314                 vi->rsrv_noflowq = 0;
7315
7316         return (rc);
7317 }
7318
7319 static int
7320 sysctl_tx_vm_wr(SYSCTL_HANDLER_ARGS)
7321 {
7322         struct vi_info *vi = arg1;
7323         struct adapter *sc = vi->adapter;
7324         int rc, val, i;
7325
7326         MPASS(!(sc->flags & IS_VF));
7327
7328         val = vi->flags & TX_USES_VM_WR ? 1 : 0;
7329         rc = sysctl_handle_int(oidp, &val, 0, req);
7330         if (rc != 0 || req->newptr == NULL)
7331                 return (rc);
7332
7333         if (val != 0 && val != 1)
7334                 return (EINVAL);
7335
7336         rc = begin_synchronized_op(sc, vi, HOLD_LOCK | SLEEP_OK | INTR_OK,
7337             "t4txvm");
7338         if (rc)
7339                 return (rc);
7340         if (vi->ifp->if_drv_flags & IFF_DRV_RUNNING) {
7341                 /*
7342                  * We don't want parse_pkt to run with one setting (VF or PF)
7343                  * and then eth_tx to see a different setting but still use
7344                  * stale information calculated by parse_pkt.
7345                  */
7346                 rc = EBUSY;
7347         } else {
7348                 struct port_info *pi = vi->pi;
7349                 struct sge_txq *txq;
7350                 uint32_t ctrl0;
7351                 uint8_t npkt = sc->params.max_pkts_per_eth_tx_pkts_wr;
7352
7353                 if (val) {
7354                         vi->flags |= TX_USES_VM_WR;
7355                         vi->ifp->if_hw_tsomaxsegcount = TX_SGL_SEGS_VM_TSO;
7356                         ctrl0 = htobe32(V_TXPKT_OPCODE(CPL_TX_PKT_XT) |
7357                             V_TXPKT_INTF(pi->tx_chan));
7358                         if (!(sc->flags & IS_VF))
7359                                 npkt--;
7360                 } else {
7361                         vi->flags &= ~TX_USES_VM_WR;
7362                         vi->ifp->if_hw_tsomaxsegcount = TX_SGL_SEGS_TSO;
7363                         ctrl0 = htobe32(V_TXPKT_OPCODE(CPL_TX_PKT_XT) |
7364                             V_TXPKT_INTF(pi->tx_chan) | V_TXPKT_PF(sc->pf) |
7365                             V_TXPKT_VF(vi->vin) | V_TXPKT_VF_VLD(vi->vfvld));
7366                 }
7367                 for_each_txq(vi, i, txq) {
7368                         txq->cpl_ctrl0 = ctrl0;
7369                         txq->txp.max_npkt = npkt;
7370                 }
7371         }
7372         end_synchronized_op(sc, LOCK_HELD);
7373         return (rc);
7374 }
7375
7376 static int
7377 sysctl_holdoff_tmr_idx(SYSCTL_HANDLER_ARGS)
7378 {
7379         struct vi_info *vi = arg1;
7380         struct adapter *sc = vi->adapter;
7381         int idx, rc, i;
7382         struct sge_rxq *rxq;
7383         uint8_t v;
7384
7385         idx = vi->tmr_idx;
7386
7387         rc = sysctl_handle_int(oidp, &idx, 0, req);
7388         if (rc != 0 || req->newptr == NULL)
7389                 return (rc);
7390
7391         if (idx < 0 || idx >= SGE_NTIMERS)
7392                 return (EINVAL);
7393
7394         rc = begin_synchronized_op(sc, vi, HOLD_LOCK | SLEEP_OK | INTR_OK,
7395             "t4tmr");
7396         if (rc)
7397                 return (rc);
7398
7399         v = V_QINTR_TIMER_IDX(idx) | V_QINTR_CNT_EN(vi->pktc_idx != -1);
7400         for_each_rxq(vi, i, rxq) {
7401 #ifdef atomic_store_rel_8
7402                 atomic_store_rel_8(&rxq->iq.intr_params, v);
7403 #else
7404                 rxq->iq.intr_params = v;
7405 #endif
7406         }
7407         vi->tmr_idx = idx;
7408
7409         end_synchronized_op(sc, LOCK_HELD);
7410         return (0);
7411 }
7412
7413 static int
7414 sysctl_holdoff_pktc_idx(SYSCTL_HANDLER_ARGS)
7415 {
7416         struct vi_info *vi = arg1;
7417         struct adapter *sc = vi->adapter;
7418         int idx, rc;
7419
7420         idx = vi->pktc_idx;
7421
7422         rc = sysctl_handle_int(oidp, &idx, 0, req);
7423         if (rc != 0 || req->newptr == NULL)
7424                 return (rc);
7425
7426         if (idx < -1 || idx >= SGE_NCOUNTERS)
7427                 return (EINVAL);
7428
7429         rc = begin_synchronized_op(sc, vi, HOLD_LOCK | SLEEP_OK | INTR_OK,
7430             "t4pktc");
7431         if (rc)
7432                 return (rc);
7433
7434         if (vi->flags & VI_INIT_DONE)
7435                 rc = EBUSY; /* cannot be changed once the queues are created */
7436         else
7437                 vi->pktc_idx = idx;
7438
7439         end_synchronized_op(sc, LOCK_HELD);
7440         return (rc);
7441 }
7442
7443 static int
7444 sysctl_qsize_rxq(SYSCTL_HANDLER_ARGS)
7445 {
7446         struct vi_info *vi = arg1;
7447         struct adapter *sc = vi->adapter;
7448         int qsize, rc;
7449
7450         qsize = vi->qsize_rxq;
7451
7452         rc = sysctl_handle_int(oidp, &qsize, 0, req);
7453         if (rc != 0 || req->newptr == NULL)
7454                 return (rc);
7455
7456         if (qsize < 128 || (qsize & 7))
7457                 return (EINVAL);
7458
7459         rc = begin_synchronized_op(sc, vi, HOLD_LOCK | SLEEP_OK | INTR_OK,
7460             "t4rxqs");
7461         if (rc)
7462                 return (rc);
7463
7464         if (vi->flags & VI_INIT_DONE)
7465                 rc = EBUSY; /* cannot be changed once the queues are created */
7466         else
7467                 vi->qsize_rxq = qsize;
7468
7469         end_synchronized_op(sc, LOCK_HELD);
7470         return (rc);
7471 }
7472
7473 static int
7474 sysctl_qsize_txq(SYSCTL_HANDLER_ARGS)
7475 {
7476         struct vi_info *vi = arg1;
7477         struct adapter *sc = vi->adapter;
7478         int qsize, rc;
7479
7480         qsize = vi->qsize_txq;
7481
7482         rc = sysctl_handle_int(oidp, &qsize, 0, req);
7483         if (rc != 0 || req->newptr == NULL)
7484                 return (rc);
7485
7486         if (qsize < 128 || qsize > 65536)
7487                 return (EINVAL);
7488
7489         rc = begin_synchronized_op(sc, vi, HOLD_LOCK | SLEEP_OK | INTR_OK,
7490             "t4txqs");
7491         if (rc)
7492                 return (rc);
7493
7494         if (vi->flags & VI_INIT_DONE)
7495                 rc = EBUSY; /* cannot be changed once the queues are created */
7496         else
7497                 vi->qsize_txq = qsize;
7498
7499         end_synchronized_op(sc, LOCK_HELD);
7500         return (rc);
7501 }
7502
7503 static int
7504 sysctl_pause_settings(SYSCTL_HANDLER_ARGS)
7505 {
7506         struct port_info *pi = arg1;
7507         struct adapter *sc = pi->adapter;
7508         struct link_config *lc = &pi->link_cfg;
7509         int rc;
7510
7511         if (req->newptr == NULL) {
7512                 struct sbuf *sb;
7513                 static char *bits = "\20\1RX\2TX\3AUTO";
7514
7515                 rc = sysctl_wire_old_buffer(req, 0);
7516                 if (rc != 0)
7517                         return(rc);
7518
7519                 sb = sbuf_new_for_sysctl(NULL, NULL, 128, req);
7520                 if (sb == NULL)
7521                         return (ENOMEM);
7522
7523                 if (lc->link_ok) {
7524                         sbuf_printf(sb, "%b", (lc->fc & (PAUSE_TX | PAUSE_RX)) |
7525                             (lc->requested_fc & PAUSE_AUTONEG), bits);
7526                 } else {
7527                         sbuf_printf(sb, "%b", lc->requested_fc & (PAUSE_TX |
7528                             PAUSE_RX | PAUSE_AUTONEG), bits);
7529                 }
7530                 rc = sbuf_finish(sb);
7531                 sbuf_delete(sb);
7532         } else {
7533                 char s[2];
7534                 int n;
7535
7536                 s[0] = '0' + (lc->requested_fc & (PAUSE_TX | PAUSE_RX |
7537                     PAUSE_AUTONEG));
7538                 s[1] = 0;
7539
7540                 rc = sysctl_handle_string(oidp, s, sizeof(s), req);
7541                 if (rc != 0)
7542                         return(rc);
7543
7544                 if (s[1] != 0)
7545                         return (EINVAL);
7546                 if (s[0] < '0' || s[0] > '9')
7547                         return (EINVAL);        /* not a number */
7548                 n = s[0] - '0';
7549                 if (n & ~(PAUSE_TX | PAUSE_RX | PAUSE_AUTONEG))
7550                         return (EINVAL);        /* some other bit is set too */
7551
7552                 rc = begin_synchronized_op(sc, &pi->vi[0], SLEEP_OK | INTR_OK,
7553                     "t4PAUSE");
7554                 if (rc)
7555                         return (rc);
7556                 PORT_LOCK(pi);
7557                 lc->requested_fc = n;
7558                 fixup_link_config(pi);
7559                 if (pi->up_vis > 0)
7560                         rc = apply_link_config(pi);
7561                 set_current_media(pi);
7562                 PORT_UNLOCK(pi);
7563                 end_synchronized_op(sc, 0);
7564         }
7565
7566         return (rc);
7567 }
7568
7569 static int
7570 sysctl_fec(SYSCTL_HANDLER_ARGS)
7571 {
7572         struct port_info *pi = arg1;
7573         struct adapter *sc = pi->adapter;
7574         struct link_config *lc = &pi->link_cfg;
7575         int rc;
7576         int8_t old;
7577
7578         if (req->newptr == NULL) {
7579                 struct sbuf *sb;
7580                 static char *bits = "\20\1RS-FEC\2FC-FEC\3NO-FEC\4RSVD2"
7581                     "\5RSVD3\6auto\7module";
7582
7583                 rc = sysctl_wire_old_buffer(req, 0);
7584                 if (rc != 0)
7585                         return(rc);
7586
7587                 sb = sbuf_new_for_sysctl(NULL, NULL, 128, req);
7588                 if (sb == NULL)
7589                         return (ENOMEM);
7590
7591                 /*
7592                  * Display the requested_fec when the link is down -- the actual
7593                  * FEC makes sense only when the link is up.
7594                  */
7595                 if (lc->link_ok) {
7596                         sbuf_printf(sb, "%b", (lc->fec & M_FW_PORT_CAP32_FEC) |
7597                             (lc->requested_fec & (FEC_AUTO | FEC_MODULE)),
7598                             bits);
7599                 } else {
7600                         sbuf_printf(sb, "%b", lc->requested_fec, bits);
7601                 }
7602                 rc = sbuf_finish(sb);
7603                 sbuf_delete(sb);
7604         } else {
7605                 char s[8];
7606                 int n;
7607
7608                 snprintf(s, sizeof(s), "%d",
7609                     lc->requested_fec == FEC_AUTO ? -1 :
7610                     lc->requested_fec & (M_FW_PORT_CAP32_FEC | FEC_MODULE));
7611
7612                 rc = sysctl_handle_string(oidp, s, sizeof(s), req);
7613                 if (rc != 0)
7614                         return(rc);
7615
7616                 n = strtol(&s[0], NULL, 0);
7617                 if (n < 0 || n & FEC_AUTO)
7618                         n = FEC_AUTO;
7619                 else if (n & ~(M_FW_PORT_CAP32_FEC | FEC_MODULE))
7620                         return (EINVAL);/* some other bit is set too */
7621
7622                 rc = begin_synchronized_op(sc, &pi->vi[0], SLEEP_OK | INTR_OK,
7623                     "t4fec");
7624                 if (rc)
7625                         return (rc);
7626                 PORT_LOCK(pi);
7627                 old = lc->requested_fec;
7628                 if (n == FEC_AUTO)
7629                         lc->requested_fec = FEC_AUTO;
7630                 else if (n == 0 || n == FEC_NONE)
7631                         lc->requested_fec = FEC_NONE;
7632                 else {
7633                         if ((lc->pcaps |
7634                             V_FW_PORT_CAP32_FEC(n & M_FW_PORT_CAP32_FEC)) !=
7635                             lc->pcaps) {
7636                                 rc = ENOTSUP;
7637                                 goto done;
7638                         }
7639                         lc->requested_fec = n & (M_FW_PORT_CAP32_FEC |
7640                             FEC_MODULE);
7641                 }
7642                 fixup_link_config(pi);
7643                 if (pi->up_vis > 0) {
7644                         rc = apply_link_config(pi);
7645                         if (rc != 0) {
7646                                 lc->requested_fec = old;
7647                                 if (rc == FW_EPROTO)
7648                                         rc = ENOTSUP;
7649                         }
7650                 }
7651 done:
7652                 PORT_UNLOCK(pi);
7653                 end_synchronized_op(sc, 0);
7654         }
7655
7656         return (rc);
7657 }
7658
7659 static int
7660 sysctl_module_fec(SYSCTL_HANDLER_ARGS)
7661 {
7662         struct port_info *pi = arg1;
7663         struct adapter *sc = pi->adapter;
7664         struct link_config *lc = &pi->link_cfg;
7665         int rc;
7666         int8_t fec;
7667         struct sbuf *sb;
7668         static char *bits = "\20\1RS-FEC\2FC-FEC\3NO-FEC\4RSVD2\5RSVD3";
7669
7670         rc = sysctl_wire_old_buffer(req, 0);
7671         if (rc != 0)
7672                 return (rc);
7673
7674         sb = sbuf_new_for_sysctl(NULL, NULL, 128, req);
7675         if (sb == NULL)
7676                 return (ENOMEM);
7677
7678         if (begin_synchronized_op(sc, NULL, SLEEP_OK | INTR_OK, "t4mfec") != 0)
7679                 return (EBUSY);
7680         PORT_LOCK(pi);
7681         if (pi->up_vis == 0) {
7682                 /*
7683                  * If all the interfaces are administratively down the firmware
7684                  * does not report transceiver changes.  Refresh port info here.
7685                  * This is the only reason we have a synchronized op in this
7686                  * function.  Just PORT_LOCK would have been enough otherwise.
7687                  */
7688                 t4_update_port_info(pi);
7689         }
7690
7691         fec = lc->fec_hint;
7692         if (pi->mod_type == FW_PORT_MOD_TYPE_NONE ||
7693             !fec_supported(lc->pcaps)) {
7694                 sbuf_printf(sb, "n/a");
7695         } else {
7696                 if (fec == 0)
7697                         fec = FEC_NONE;
7698                 sbuf_printf(sb, "%b", fec & M_FW_PORT_CAP32_FEC, bits);
7699         }
7700         rc = sbuf_finish(sb);
7701         sbuf_delete(sb);
7702
7703         PORT_UNLOCK(pi);
7704         end_synchronized_op(sc, 0);
7705
7706         return (rc);
7707 }
7708
7709 static int
7710 sysctl_autoneg(SYSCTL_HANDLER_ARGS)
7711 {
7712         struct port_info *pi = arg1;
7713         struct adapter *sc = pi->adapter;
7714         struct link_config *lc = &pi->link_cfg;
7715         int rc, val;
7716
7717         if (lc->pcaps & FW_PORT_CAP32_ANEG)
7718                 val = lc->requested_aneg == AUTONEG_DISABLE ? 0 : 1;
7719         else
7720                 val = -1;
7721         rc = sysctl_handle_int(oidp, &val, 0, req);
7722         if (rc != 0 || req->newptr == NULL)
7723                 return (rc);
7724         if (val == 0)
7725                 val = AUTONEG_DISABLE;
7726         else if (val == 1)
7727                 val = AUTONEG_ENABLE;
7728         else
7729                 val = AUTONEG_AUTO;
7730
7731         rc = begin_synchronized_op(sc, &pi->vi[0], SLEEP_OK | INTR_OK,
7732             "t4aneg");
7733         if (rc)
7734                 return (rc);
7735         PORT_LOCK(pi);
7736         if (val == AUTONEG_ENABLE && !(lc->pcaps & FW_PORT_CAP32_ANEG)) {
7737                 rc = ENOTSUP;
7738                 goto done;
7739         }
7740         lc->requested_aneg = val;
7741         fixup_link_config(pi);
7742         if (pi->up_vis > 0)
7743                 rc = apply_link_config(pi);
7744         set_current_media(pi);
7745 done:
7746         PORT_UNLOCK(pi);
7747         end_synchronized_op(sc, 0);
7748         return (rc);
7749 }
7750
7751 static int
7752 sysctl_handle_t4_reg64(SYSCTL_HANDLER_ARGS)
7753 {
7754         struct adapter *sc = arg1;
7755         int reg = arg2;
7756         uint64_t val;
7757
7758         val = t4_read_reg64(sc, reg);
7759
7760         return (sysctl_handle_64(oidp, &val, 0, req));
7761 }
7762
7763 static int
7764 sysctl_temperature(SYSCTL_HANDLER_ARGS)
7765 {
7766         struct adapter *sc = arg1;
7767         int rc, t;
7768         uint32_t param, val;
7769
7770         rc = begin_synchronized_op(sc, NULL, SLEEP_OK | INTR_OK, "t4temp");
7771         if (rc)
7772                 return (rc);
7773         param = V_FW_PARAMS_MNEM(FW_PARAMS_MNEM_DEV) |
7774             V_FW_PARAMS_PARAM_X(FW_PARAMS_PARAM_DEV_DIAG) |
7775             V_FW_PARAMS_PARAM_Y(FW_PARAM_DEV_DIAG_TMP);
7776         rc = -t4_query_params(sc, sc->mbox, sc->pf, 0, 1, &param, &val);
7777         end_synchronized_op(sc, 0);
7778         if (rc)
7779                 return (rc);
7780
7781         /* unknown is returned as 0 but we display -1 in that case */
7782         t = val == 0 ? -1 : val;
7783
7784         rc = sysctl_handle_int(oidp, &t, 0, req);
7785         return (rc);
7786 }
7787
7788 static int
7789 sysctl_vdd(SYSCTL_HANDLER_ARGS)
7790 {
7791         struct adapter *sc = arg1;
7792         int rc;
7793         uint32_t param, val;
7794
7795         if (sc->params.core_vdd == 0) {
7796                 rc = begin_synchronized_op(sc, NULL, SLEEP_OK | INTR_OK,
7797                     "t4vdd");
7798                 if (rc)
7799                         return (rc);
7800                 param = V_FW_PARAMS_MNEM(FW_PARAMS_MNEM_DEV) |
7801                     V_FW_PARAMS_PARAM_X(FW_PARAMS_PARAM_DEV_DIAG) |
7802                     V_FW_PARAMS_PARAM_Y(FW_PARAM_DEV_DIAG_VDD);
7803                 rc = -t4_query_params(sc, sc->mbox, sc->pf, 0, 1, &param, &val);
7804                 end_synchronized_op(sc, 0);
7805                 if (rc)
7806                         return (rc);
7807                 sc->params.core_vdd = val;
7808         }
7809
7810         return (sysctl_handle_int(oidp, &sc->params.core_vdd, 0, req));
7811 }
7812
7813 static int
7814 sysctl_reset_sensor(SYSCTL_HANDLER_ARGS)
7815 {
7816         struct adapter *sc = arg1;
7817         int rc, v;
7818         uint32_t param, val;
7819
7820         v = sc->sensor_resets;
7821         rc = sysctl_handle_int(oidp, &v, 0, req);
7822         if (rc != 0 || req->newptr == NULL || v <= 0)
7823                 return (rc);
7824
7825         if (sc->params.fw_vers < FW_VERSION32(1, 24, 7, 0) ||
7826             chip_id(sc) < CHELSIO_T5)
7827                 return (ENOTSUP);
7828
7829         rc = begin_synchronized_op(sc, NULL, SLEEP_OK | INTR_OK, "t4srst");
7830         if (rc)
7831                 return (rc);
7832         param = (V_FW_PARAMS_MNEM(FW_PARAMS_MNEM_DEV) |
7833             V_FW_PARAMS_PARAM_X(FW_PARAMS_PARAM_DEV_DIAG) |
7834             V_FW_PARAMS_PARAM_Y(FW_PARAM_DEV_DIAG_RESET_TMP_SENSOR));
7835         val = 1;
7836         rc = -t4_set_params(sc, sc->mbox, sc->pf, 0, 1, &param, &val);
7837         end_synchronized_op(sc, 0);
7838         if (rc == 0)
7839                 sc->sensor_resets++;
7840         return (rc);
7841 }
7842
7843 static int
7844 sysctl_loadavg(SYSCTL_HANDLER_ARGS)
7845 {
7846         struct adapter *sc = arg1;
7847         struct sbuf *sb;
7848         int rc;
7849         uint32_t param, val;
7850
7851         rc = begin_synchronized_op(sc, NULL, SLEEP_OK | INTR_OK, "t4lavg");
7852         if (rc)
7853                 return (rc);
7854         param = V_FW_PARAMS_MNEM(FW_PARAMS_MNEM_DEV) |
7855             V_FW_PARAMS_PARAM_X(FW_PARAMS_PARAM_DEV_LOAD);
7856         rc = -t4_query_params(sc, sc->mbox, sc->pf, 0, 1, &param, &val);
7857         end_synchronized_op(sc, 0);
7858         if (rc)
7859                 return (rc);
7860
7861         rc = sysctl_wire_old_buffer(req, 0);
7862         if (rc != 0)
7863                 return (rc);
7864
7865         sb = sbuf_new_for_sysctl(NULL, NULL, 4096, req);
7866         if (sb == NULL)
7867                 return (ENOMEM);
7868
7869         if (val == 0xffffffff) {
7870                 /* Only debug and custom firmwares report load averages. */
7871                 sbuf_printf(sb, "not available");
7872         } else {
7873                 sbuf_printf(sb, "%d %d %d", val & 0xff, (val >> 8) & 0xff,
7874                     (val >> 16) & 0xff);
7875         }
7876         rc = sbuf_finish(sb);
7877         sbuf_delete(sb);
7878
7879         return (rc);
7880 }
7881
7882 static int
7883 sysctl_cctrl(SYSCTL_HANDLER_ARGS)
7884 {
7885         struct adapter *sc = arg1;
7886         struct sbuf *sb;
7887         int rc, i;
7888         uint16_t incr[NMTUS][NCCTRL_WIN];
7889         static const char *dec_fac[] = {
7890                 "0.5", "0.5625", "0.625", "0.6875", "0.75", "0.8125", "0.875",
7891                 "0.9375"
7892         };
7893
7894         rc = sysctl_wire_old_buffer(req, 0);
7895         if (rc != 0)
7896                 return (rc);
7897
7898         sb = sbuf_new_for_sysctl(NULL, NULL, 4096, req);
7899         if (sb == NULL)
7900                 return (ENOMEM);
7901
7902         t4_read_cong_tbl(sc, incr);
7903
7904         for (i = 0; i < NCCTRL_WIN; ++i) {
7905                 sbuf_printf(sb, "%2d: %4u %4u %4u %4u %4u %4u %4u %4u\n", i,
7906                     incr[0][i], incr[1][i], incr[2][i], incr[3][i], incr[4][i],
7907                     incr[5][i], incr[6][i], incr[7][i]);
7908                 sbuf_printf(sb, "%8u %4u %4u %4u %4u %4u %4u %4u %5u %s\n",
7909                     incr[8][i], incr[9][i], incr[10][i], incr[11][i],
7910                     incr[12][i], incr[13][i], incr[14][i], incr[15][i],
7911                     sc->params.a_wnd[i], dec_fac[sc->params.b_wnd[i]]);
7912         }
7913
7914         rc = sbuf_finish(sb);
7915         sbuf_delete(sb);
7916
7917         return (rc);
7918 }
7919
7920 static const char *qname[CIM_NUM_IBQ + CIM_NUM_OBQ_T5] = {
7921         "TP0", "TP1", "ULP", "SGE0", "SGE1", "NC-SI",   /* ibq's */
7922         "ULP0", "ULP1", "ULP2", "ULP3", "SGE", "NC-SI", /* obq's */
7923         "SGE0-RX", "SGE1-RX"    /* additional obq's (T5 onwards) */
7924 };
7925
7926 static int
7927 sysctl_cim_ibq_obq(SYSCTL_HANDLER_ARGS)
7928 {
7929         struct adapter *sc = arg1;
7930         struct sbuf *sb;
7931         int rc, i, n, qid = arg2;
7932         uint32_t *buf, *p;
7933         char *qtype;
7934         u_int cim_num_obq = sc->chip_params->cim_num_obq;
7935
7936         KASSERT(qid >= 0 && qid < CIM_NUM_IBQ + cim_num_obq,
7937             ("%s: bad qid %d\n", __func__, qid));
7938
7939         if (qid < CIM_NUM_IBQ) {
7940                 /* inbound queue */
7941                 qtype = "IBQ";
7942                 n = 4 * CIM_IBQ_SIZE;
7943                 buf = malloc(n * sizeof(uint32_t), M_CXGBE, M_ZERO | M_WAITOK);
7944                 rc = t4_read_cim_ibq(sc, qid, buf, n);
7945         } else {
7946                 /* outbound queue */
7947                 qtype = "OBQ";
7948                 qid -= CIM_NUM_IBQ;
7949                 n = 4 * cim_num_obq * CIM_OBQ_SIZE;
7950                 buf = malloc(n * sizeof(uint32_t), M_CXGBE, M_ZERO | M_WAITOK);
7951                 rc = t4_read_cim_obq(sc, qid, buf, n);
7952         }
7953
7954         if (rc < 0) {
7955                 rc = -rc;
7956                 goto done;
7957         }
7958         n = rc * sizeof(uint32_t);      /* rc has # of words actually read */
7959
7960         rc = sysctl_wire_old_buffer(req, 0);
7961         if (rc != 0)
7962                 goto done;
7963
7964         sb = sbuf_new_for_sysctl(NULL, NULL, PAGE_SIZE, req);
7965         if (sb == NULL) {
7966                 rc = ENOMEM;
7967                 goto done;
7968         }
7969
7970         sbuf_printf(sb, "%s%d %s", qtype , qid, qname[arg2]);
7971         for (i = 0, p = buf; i < n; i += 16, p += 4)
7972                 sbuf_printf(sb, "\n%#06x: %08x %08x %08x %08x", i, p[0], p[1],
7973                     p[2], p[3]);
7974
7975         rc = sbuf_finish(sb);
7976         sbuf_delete(sb);
7977 done:
7978         free(buf, M_CXGBE);
7979         return (rc);
7980 }
7981
7982 static void
7983 sbuf_cim_la4(struct adapter *sc, struct sbuf *sb, uint32_t *buf, uint32_t cfg)
7984 {
7985         uint32_t *p;
7986
7987         sbuf_printf(sb, "Status   Data      PC%s",
7988             cfg & F_UPDBGLACAPTPCONLY ? "" :
7989             "     LS0Stat  LS0Addr             LS0Data");
7990
7991         for (p = buf; p <= &buf[sc->params.cim_la_size - 8]; p += 8) {
7992                 if (cfg & F_UPDBGLACAPTPCONLY) {
7993                         sbuf_printf(sb, "\n  %02x   %08x %08x", p[5] & 0xff,
7994                             p[6], p[7]);
7995                         sbuf_printf(sb, "\n  %02x   %02x%06x %02x%06x",
7996                             (p[3] >> 8) & 0xff, p[3] & 0xff, p[4] >> 8,
7997                             p[4] & 0xff, p[5] >> 8);
7998                         sbuf_printf(sb, "\n  %02x   %x%07x %x%07x",
7999                             (p[0] >> 4) & 0xff, p[0] & 0xf, p[1] >> 4,
8000                             p[1] & 0xf, p[2] >> 4);
8001                 } else {
8002                         sbuf_printf(sb,
8003                             "\n  %02x   %x%07x %x%07x %08x %08x "
8004                             "%08x%08x%08x%08x",
8005                             (p[0] >> 4) & 0xff, p[0] & 0xf, p[1] >> 4,
8006                             p[1] & 0xf, p[2] >> 4, p[2] & 0xf, p[3], p[4], p[5],
8007                             p[6], p[7]);
8008                 }
8009         }
8010 }
8011
8012 static void
8013 sbuf_cim_la6(struct adapter *sc, struct sbuf *sb, uint32_t *buf, uint32_t cfg)
8014 {
8015         uint32_t *p;
8016
8017         sbuf_printf(sb, "Status   Inst    Data      PC%s",
8018             cfg & F_UPDBGLACAPTPCONLY ? "" :
8019             "     LS0Stat  LS0Addr  LS0Data  LS1Stat  LS1Addr  LS1Data");
8020
8021         for (p = buf; p <= &buf[sc->params.cim_la_size - 10]; p += 10) {
8022                 if (cfg & F_UPDBGLACAPTPCONLY) {
8023                         sbuf_printf(sb, "\n  %02x   %08x %08x %08x",
8024                             p[3] & 0xff, p[2], p[1], p[0]);
8025                         sbuf_printf(sb, "\n  %02x   %02x%06x %02x%06x %02x%06x",
8026                             (p[6] >> 8) & 0xff, p[6] & 0xff, p[5] >> 8,
8027                             p[5] & 0xff, p[4] >> 8, p[4] & 0xff, p[3] >> 8);
8028                         sbuf_printf(sb, "\n  %02x   %04x%04x %04x%04x %04x%04x",
8029                             (p[9] >> 16) & 0xff, p[9] & 0xffff, p[8] >> 16,
8030                             p[8] & 0xffff, p[7] >> 16, p[7] & 0xffff,
8031                             p[6] >> 16);
8032                 } else {
8033                         sbuf_printf(sb, "\n  %02x   %04x%04x %04x%04x %04x%04x "
8034                             "%08x %08x %08x %08x %08x %08x",
8035                             (p[9] >> 16) & 0xff,
8036                             p[9] & 0xffff, p[8] >> 16,
8037                             p[8] & 0xffff, p[7] >> 16,
8038                             p[7] & 0xffff, p[6] >> 16,
8039                             p[2], p[1], p[0], p[5], p[4], p[3]);
8040                 }
8041         }
8042 }
8043
8044 static int
8045 sbuf_cim_la(struct adapter *sc, struct sbuf *sb, int flags)
8046 {
8047         uint32_t cfg, *buf;
8048         int rc;
8049
8050         rc = -t4_cim_read(sc, A_UP_UP_DBG_LA_CFG, 1, &cfg);
8051         if (rc != 0)
8052                 return (rc);
8053
8054         MPASS(flags == M_WAITOK || flags == M_NOWAIT);
8055         buf = malloc(sc->params.cim_la_size * sizeof(uint32_t), M_CXGBE,
8056             M_ZERO | flags);
8057         if (buf == NULL)
8058                 return (ENOMEM);
8059
8060         rc = -t4_cim_read_la(sc, buf, NULL);
8061         if (rc != 0)
8062                 goto done;
8063         if (chip_id(sc) < CHELSIO_T6)
8064                 sbuf_cim_la4(sc, sb, buf, cfg);
8065         else
8066                 sbuf_cim_la6(sc, sb, buf, cfg);
8067
8068 done:
8069         free(buf, M_CXGBE);
8070         return (rc);
8071 }
8072
8073 static int
8074 sysctl_cim_la(SYSCTL_HANDLER_ARGS)
8075 {
8076         struct adapter *sc = arg1;
8077         struct sbuf *sb;
8078         int rc;
8079
8080         rc = sysctl_wire_old_buffer(req, 0);
8081         if (rc != 0)
8082                 return (rc);
8083         sb = sbuf_new_for_sysctl(NULL, NULL, 4096, req);
8084         if (sb == NULL)
8085                 return (ENOMEM);
8086
8087         rc = sbuf_cim_la(sc, sb, M_WAITOK);
8088         if (rc == 0)
8089                 rc = sbuf_finish(sb);
8090         sbuf_delete(sb);
8091         return (rc);
8092 }
8093
8094 bool
8095 t4_os_dump_cimla(struct adapter *sc, int arg, bool verbose)
8096 {
8097         struct sbuf sb;
8098         int rc;
8099
8100         if (sbuf_new(&sb, NULL, 4096, SBUF_AUTOEXTEND) != &sb)
8101                 return (false);
8102         rc = sbuf_cim_la(sc, &sb, M_NOWAIT);
8103         if (rc == 0) {
8104                 rc = sbuf_finish(&sb);
8105                 if (rc == 0) {
8106                         log(LOG_DEBUG, "%s: CIM LA dump follows.\n%s",
8107                                 device_get_nameunit(sc->dev), sbuf_data(&sb));
8108                 }
8109         }
8110         sbuf_delete(&sb);
8111         return (false);
8112 }
8113
8114 static int
8115 sysctl_cim_ma_la(SYSCTL_HANDLER_ARGS)
8116 {
8117         struct adapter *sc = arg1;
8118         u_int i;
8119         struct sbuf *sb;
8120         uint32_t *buf, *p;
8121         int rc;
8122
8123         rc = sysctl_wire_old_buffer(req, 0);
8124         if (rc != 0)
8125                 return (rc);
8126
8127         sb = sbuf_new_for_sysctl(NULL, NULL, 4096, req);
8128         if (sb == NULL)
8129                 return (ENOMEM);
8130
8131         buf = malloc(2 * CIM_MALA_SIZE * 5 * sizeof(uint32_t), M_CXGBE,
8132             M_ZERO | M_WAITOK);
8133
8134         t4_cim_read_ma_la(sc, buf, buf + 5 * CIM_MALA_SIZE);
8135         p = buf;
8136
8137         for (i = 0; i < CIM_MALA_SIZE; i++, p += 5) {
8138                 sbuf_printf(sb, "\n%02x%08x%08x%08x%08x", p[4], p[3], p[2],
8139                     p[1], p[0]);
8140         }
8141
8142         sbuf_printf(sb, "\n\nCnt ID Tag UE       Data       RDY VLD");
8143         for (i = 0; i < CIM_MALA_SIZE; i++, p += 5) {
8144                 sbuf_printf(sb, "\n%3u %2u  %x   %u %08x%08x  %u   %u",
8145                     (p[2] >> 10) & 0xff, (p[2] >> 7) & 7,
8146                     (p[2] >> 3) & 0xf, (p[2] >> 2) & 1,
8147                     (p[1] >> 2) | ((p[2] & 3) << 30),
8148                     (p[0] >> 2) | ((p[1] & 3) << 30), (p[0] >> 1) & 1,
8149                     p[0] & 1);
8150         }
8151
8152         rc = sbuf_finish(sb);
8153         sbuf_delete(sb);
8154         free(buf, M_CXGBE);
8155         return (rc);
8156 }
8157
8158 static int
8159 sysctl_cim_pif_la(SYSCTL_HANDLER_ARGS)
8160 {
8161         struct adapter *sc = arg1;
8162         u_int i;
8163         struct sbuf *sb;
8164         uint32_t *buf, *p;
8165         int rc;
8166
8167         rc = sysctl_wire_old_buffer(req, 0);
8168         if (rc != 0)
8169                 return (rc);
8170
8171         sb = sbuf_new_for_sysctl(NULL, NULL, 4096, req);
8172         if (sb == NULL)
8173                 return (ENOMEM);
8174
8175         buf = malloc(2 * CIM_PIFLA_SIZE * 6 * sizeof(uint32_t), M_CXGBE,
8176             M_ZERO | M_WAITOK);
8177
8178         t4_cim_read_pif_la(sc, buf, buf + 6 * CIM_PIFLA_SIZE, NULL, NULL);
8179         p = buf;
8180
8181         sbuf_printf(sb, "Cntl ID DataBE   Addr                 Data");
8182         for (i = 0; i < CIM_PIFLA_SIZE; i++, p += 6) {
8183                 sbuf_printf(sb, "\n %02x  %02x  %04x  %08x %08x%08x%08x%08x",
8184                     (p[5] >> 22) & 0xff, (p[5] >> 16) & 0x3f, p[5] & 0xffff,
8185                     p[4], p[3], p[2], p[1], p[0]);
8186         }
8187
8188         sbuf_printf(sb, "\n\nCntl ID               Data");
8189         for (i = 0; i < CIM_PIFLA_SIZE; i++, p += 6) {
8190                 sbuf_printf(sb, "\n %02x  %02x %08x%08x%08x%08x",
8191                     (p[4] >> 6) & 0xff, p[4] & 0x3f, p[3], p[2], p[1], p[0]);
8192         }
8193
8194         rc = sbuf_finish(sb);
8195         sbuf_delete(sb);
8196         free(buf, M_CXGBE);
8197         return (rc);
8198 }
8199
8200 static int
8201 sysctl_cim_qcfg(SYSCTL_HANDLER_ARGS)
8202 {
8203         struct adapter *sc = arg1;
8204         struct sbuf *sb;
8205         int rc, i;
8206         uint16_t base[CIM_NUM_IBQ + CIM_NUM_OBQ_T5];
8207         uint16_t size[CIM_NUM_IBQ + CIM_NUM_OBQ_T5];
8208         uint16_t thres[CIM_NUM_IBQ];
8209         uint32_t obq_wr[2 * CIM_NUM_OBQ_T5], *wr = obq_wr;
8210         uint32_t stat[4 * (CIM_NUM_IBQ + CIM_NUM_OBQ_T5)], *p = stat;
8211         u_int cim_num_obq, ibq_rdaddr, obq_rdaddr, nq;
8212
8213         cim_num_obq = sc->chip_params->cim_num_obq;
8214         if (is_t4(sc)) {
8215                 ibq_rdaddr = A_UP_IBQ_0_RDADDR;
8216                 obq_rdaddr = A_UP_OBQ_0_REALADDR;
8217         } else {
8218                 ibq_rdaddr = A_UP_IBQ_0_SHADOW_RDADDR;
8219                 obq_rdaddr = A_UP_OBQ_0_SHADOW_REALADDR;
8220         }
8221         nq = CIM_NUM_IBQ + cim_num_obq;
8222
8223         rc = -t4_cim_read(sc, ibq_rdaddr, 4 * nq, stat);
8224         if (rc == 0)
8225                 rc = -t4_cim_read(sc, obq_rdaddr, 2 * cim_num_obq, obq_wr);
8226         if (rc != 0)
8227                 return (rc);
8228
8229         t4_read_cimq_cfg(sc, base, size, thres);
8230
8231         rc = sysctl_wire_old_buffer(req, 0);
8232         if (rc != 0)
8233                 return (rc);
8234
8235         sb = sbuf_new_for_sysctl(NULL, NULL, PAGE_SIZE, req);
8236         if (sb == NULL)
8237                 return (ENOMEM);
8238
8239         sbuf_printf(sb,
8240             "  Queue  Base  Size Thres  RdPtr WrPtr  SOP  EOP Avail");
8241
8242         for (i = 0; i < CIM_NUM_IBQ; i++, p += 4)
8243                 sbuf_printf(sb, "\n%7s %5x %5u %5u %6x  %4x %4u %4u %5u",
8244                     qname[i], base[i], size[i], thres[i], G_IBQRDADDR(p[0]),
8245                     G_IBQWRADDR(p[1]), G_QUESOPCNT(p[3]), G_QUEEOPCNT(p[3]),
8246                     G_QUEREMFLITS(p[2]) * 16);
8247         for ( ; i < nq; i++, p += 4, wr += 2)
8248                 sbuf_printf(sb, "\n%7s %5x %5u %12x  %4x %4u %4u %5u", qname[i],
8249                     base[i], size[i], G_QUERDADDR(p[0]) & 0x3fff,
8250                     wr[0] - base[i], G_QUESOPCNT(p[3]), G_QUEEOPCNT(p[3]),
8251                     G_QUEREMFLITS(p[2]) * 16);
8252
8253         rc = sbuf_finish(sb);
8254         sbuf_delete(sb);
8255
8256         return (rc);
8257 }
8258
8259 static int
8260 sysctl_cpl_stats(SYSCTL_HANDLER_ARGS)
8261 {
8262         struct adapter *sc = arg1;
8263         struct sbuf *sb;
8264         int rc;
8265         struct tp_cpl_stats stats;
8266
8267         rc = sysctl_wire_old_buffer(req, 0);
8268         if (rc != 0)
8269                 return (rc);
8270
8271         sb = sbuf_new_for_sysctl(NULL, NULL, 256, req);
8272         if (sb == NULL)
8273                 return (ENOMEM);
8274
8275         mtx_lock(&sc->reg_lock);
8276         t4_tp_get_cpl_stats(sc, &stats, 0);
8277         mtx_unlock(&sc->reg_lock);
8278
8279         if (sc->chip_params->nchan > 2) {
8280                 sbuf_printf(sb, "                 channel 0  channel 1"
8281                     "  channel 2  channel 3");
8282                 sbuf_printf(sb, "\nCPL requests:   %10u %10u %10u %10u",
8283                     stats.req[0], stats.req[1], stats.req[2], stats.req[3]);
8284                 sbuf_printf(sb, "\nCPL responses:  %10u %10u %10u %10u",
8285                     stats.rsp[0], stats.rsp[1], stats.rsp[2], stats.rsp[3]);
8286         } else {
8287                 sbuf_printf(sb, "                 channel 0  channel 1");
8288                 sbuf_printf(sb, "\nCPL requests:   %10u %10u",
8289                     stats.req[0], stats.req[1]);
8290                 sbuf_printf(sb, "\nCPL responses:  %10u %10u",
8291                     stats.rsp[0], stats.rsp[1]);
8292         }
8293
8294         rc = sbuf_finish(sb);
8295         sbuf_delete(sb);
8296
8297         return (rc);
8298 }
8299
8300 static int
8301 sysctl_ddp_stats(SYSCTL_HANDLER_ARGS)
8302 {
8303         struct adapter *sc = arg1;
8304         struct sbuf *sb;
8305         int rc;
8306         struct tp_usm_stats stats;
8307
8308         rc = sysctl_wire_old_buffer(req, 0);
8309         if (rc != 0)
8310                 return(rc);
8311
8312         sb = sbuf_new_for_sysctl(NULL, NULL, 256, req);
8313         if (sb == NULL)
8314                 return (ENOMEM);
8315
8316         mtx_lock(&sc->reg_lock);
8317         t4_get_usm_stats(sc, &stats, 1);
8318         mtx_unlock(&sc->reg_lock);
8319
8320         sbuf_printf(sb, "Frames: %u\n", stats.frames);
8321         sbuf_printf(sb, "Octets: %ju\n", stats.octets);
8322         sbuf_printf(sb, "Drops:  %u", stats.drops);
8323
8324         rc = sbuf_finish(sb);
8325         sbuf_delete(sb);
8326
8327         return (rc);
8328 }
8329
8330 static int
8331 sysctl_tid_stats(SYSCTL_HANDLER_ARGS)
8332 {
8333         struct adapter *sc = arg1;
8334         struct sbuf *sb;
8335         int rc;
8336         struct tp_tid_stats stats;
8337
8338         rc = sysctl_wire_old_buffer(req, 0);
8339         if (rc != 0)
8340                 return(rc);
8341
8342         sb = sbuf_new_for_sysctl(NULL, NULL, 256, req);
8343         if (sb == NULL)
8344                 return (ENOMEM);
8345
8346         mtx_lock(&sc->reg_lock);
8347         t4_tp_get_tid_stats(sc, &stats, 1);
8348         mtx_unlock(&sc->reg_lock);
8349
8350         sbuf_printf(sb, "Delete:     %u\n", stats.del);
8351         sbuf_printf(sb, "Invalidate: %u\n", stats.inv);
8352         sbuf_printf(sb, "Active:     %u\n", stats.act);
8353         sbuf_printf(sb, "Passive:    %u", stats.pas);
8354
8355         rc = sbuf_finish(sb);
8356         sbuf_delete(sb);
8357
8358         return (rc);
8359 }
8360
8361 static const char * const devlog_level_strings[] = {
8362         [FW_DEVLOG_LEVEL_EMERG]         = "EMERG",
8363         [FW_DEVLOG_LEVEL_CRIT]          = "CRIT",
8364         [FW_DEVLOG_LEVEL_ERR]           = "ERR",
8365         [FW_DEVLOG_LEVEL_NOTICE]        = "NOTICE",
8366         [FW_DEVLOG_LEVEL_INFO]          = "INFO",
8367         [FW_DEVLOG_LEVEL_DEBUG]         = "DEBUG"
8368 };
8369
8370 static const char * const devlog_facility_strings[] = {
8371         [FW_DEVLOG_FACILITY_CORE]       = "CORE",
8372         [FW_DEVLOG_FACILITY_CF]         = "CF",
8373         [FW_DEVLOG_FACILITY_SCHED]      = "SCHED",
8374         [FW_DEVLOG_FACILITY_TIMER]      = "TIMER",
8375         [FW_DEVLOG_FACILITY_RES]        = "RES",
8376         [FW_DEVLOG_FACILITY_HW]         = "HW",
8377         [FW_DEVLOG_FACILITY_FLR]        = "FLR",
8378         [FW_DEVLOG_FACILITY_DMAQ]       = "DMAQ",
8379         [FW_DEVLOG_FACILITY_PHY]        = "PHY",
8380         [FW_DEVLOG_FACILITY_MAC]        = "MAC",
8381         [FW_DEVLOG_FACILITY_PORT]       = "PORT",
8382         [FW_DEVLOG_FACILITY_VI]         = "VI",
8383         [FW_DEVLOG_FACILITY_FILTER]     = "FILTER",
8384         [FW_DEVLOG_FACILITY_ACL]        = "ACL",
8385         [FW_DEVLOG_FACILITY_TM]         = "TM",
8386         [FW_DEVLOG_FACILITY_QFC]        = "QFC",
8387         [FW_DEVLOG_FACILITY_DCB]        = "DCB",
8388         [FW_DEVLOG_FACILITY_ETH]        = "ETH",
8389         [FW_DEVLOG_FACILITY_OFLD]       = "OFLD",
8390         [FW_DEVLOG_FACILITY_RI]         = "RI",
8391         [FW_DEVLOG_FACILITY_ISCSI]      = "ISCSI",
8392         [FW_DEVLOG_FACILITY_FCOE]       = "FCOE",
8393         [FW_DEVLOG_FACILITY_FOISCSI]    = "FOISCSI",
8394         [FW_DEVLOG_FACILITY_FOFCOE]     = "FOFCOE",
8395         [FW_DEVLOG_FACILITY_CHNET]      = "CHNET",
8396 };
8397
8398 static int
8399 sbuf_devlog(struct adapter *sc, struct sbuf *sb, int flags)
8400 {
8401         int i, j, rc, nentries, first = 0;
8402         struct devlog_params *dparams = &sc->params.devlog;
8403         struct fw_devlog_e *buf, *e;
8404         uint64_t ftstamp = UINT64_MAX;
8405
8406         if (dparams->addr == 0)
8407                 return (ENXIO);
8408
8409         MPASS(flags == M_WAITOK || flags == M_NOWAIT);
8410         buf = malloc(dparams->size, M_CXGBE, M_ZERO | flags);
8411         if (buf == NULL)
8412                 return (ENOMEM);
8413
8414         rc = read_via_memwin(sc, 1, dparams->addr, (void *)buf, dparams->size);
8415         if (rc != 0)
8416                 goto done;
8417
8418         nentries = dparams->size / sizeof(struct fw_devlog_e);
8419         for (i = 0; i < nentries; i++) {
8420                 e = &buf[i];
8421
8422                 if (e->timestamp == 0)
8423                         break;  /* end */
8424
8425                 e->timestamp = be64toh(e->timestamp);
8426                 e->seqno = be32toh(e->seqno);
8427                 for (j = 0; j < 8; j++)
8428                         e->params[j] = be32toh(e->params[j]);
8429
8430                 if (e->timestamp < ftstamp) {
8431                         ftstamp = e->timestamp;
8432                         first = i;
8433                 }
8434         }
8435
8436         if (buf[first].timestamp == 0)
8437                 goto done;      /* nothing in the log */
8438
8439         sbuf_printf(sb, "%10s  %15s  %8s  %8s  %s\n",
8440             "Seq#", "Tstamp", "Level", "Facility", "Message");
8441
8442         i = first;
8443         do {
8444                 e = &buf[i];
8445                 if (e->timestamp == 0)
8446                         break;  /* end */
8447
8448                 sbuf_printf(sb, "%10d  %15ju  %8s  %8s  ",
8449                     e->seqno, e->timestamp,
8450                     (e->level < nitems(devlog_level_strings) ?
8451                         devlog_level_strings[e->level] : "UNKNOWN"),
8452                     (e->facility < nitems(devlog_facility_strings) ?
8453                         devlog_facility_strings[e->facility] : "UNKNOWN"));
8454                 sbuf_printf(sb, e->fmt, e->params[0], e->params[1],
8455                     e->params[2], e->params[3], e->params[4],
8456                     e->params[5], e->params[6], e->params[7]);
8457
8458                 if (++i == nentries)
8459                         i = 0;
8460         } while (i != first);
8461 done:
8462         free(buf, M_CXGBE);
8463         return (rc);
8464 }
8465
8466 static int
8467 sysctl_devlog(SYSCTL_HANDLER_ARGS)
8468 {
8469         struct adapter *sc = arg1;
8470         int rc;
8471         struct sbuf *sb;
8472
8473         rc = sysctl_wire_old_buffer(req, 0);
8474         if (rc != 0)
8475                 return (rc);
8476         sb = sbuf_new_for_sysctl(NULL, NULL, 4096, req);
8477         if (sb == NULL)
8478                 return (ENOMEM);
8479
8480         rc = sbuf_devlog(sc, sb, M_WAITOK);
8481         if (rc == 0)
8482                 rc = sbuf_finish(sb);
8483         sbuf_delete(sb);
8484         return (rc);
8485 }
8486
8487 void
8488 t4_os_dump_devlog(struct adapter *sc)
8489 {
8490         int rc;
8491         struct sbuf sb;
8492
8493         if (sbuf_new(&sb, NULL, 4096, SBUF_AUTOEXTEND) != &sb)
8494                 return;
8495         rc = sbuf_devlog(sc, &sb, M_NOWAIT);
8496         if (rc == 0) {
8497                 rc = sbuf_finish(&sb);
8498                 if (rc == 0) {
8499                         log(LOG_DEBUG, "%s: device log follows.\n%s",
8500                                 device_get_nameunit(sc->dev), sbuf_data(&sb));
8501                 }
8502         }
8503         sbuf_delete(&sb);
8504 }
8505
8506 static int
8507 sysctl_fcoe_stats(SYSCTL_HANDLER_ARGS)
8508 {
8509         struct adapter *sc = arg1;
8510         struct sbuf *sb;
8511         int rc;
8512         struct tp_fcoe_stats stats[MAX_NCHAN];
8513         int i, nchan = sc->chip_params->nchan;
8514
8515         rc = sysctl_wire_old_buffer(req, 0);
8516         if (rc != 0)
8517                 return (rc);
8518
8519         sb = sbuf_new_for_sysctl(NULL, NULL, 256, req);
8520         if (sb == NULL)
8521                 return (ENOMEM);
8522
8523         mtx_lock(&sc->reg_lock);
8524         for (i = 0; i < nchan; i++)
8525                 t4_get_fcoe_stats(sc, i, &stats[i], 1);
8526         mtx_unlock(&sc->reg_lock);
8527
8528         if (nchan > 2) {
8529                 sbuf_printf(sb, "                   channel 0        channel 1"
8530                     "        channel 2        channel 3");
8531                 sbuf_printf(sb, "\noctetsDDP:  %16ju %16ju %16ju %16ju",
8532                     stats[0].octets_ddp, stats[1].octets_ddp,
8533                     stats[2].octets_ddp, stats[3].octets_ddp);
8534                 sbuf_printf(sb, "\nframesDDP:  %16u %16u %16u %16u",
8535                     stats[0].frames_ddp, stats[1].frames_ddp,
8536                     stats[2].frames_ddp, stats[3].frames_ddp);
8537                 sbuf_printf(sb, "\nframesDrop: %16u %16u %16u %16u",
8538                     stats[0].frames_drop, stats[1].frames_drop,
8539                     stats[2].frames_drop, stats[3].frames_drop);
8540         } else {
8541                 sbuf_printf(sb, "                   channel 0        channel 1");
8542                 sbuf_printf(sb, "\noctetsDDP:  %16ju %16ju",
8543                     stats[0].octets_ddp, stats[1].octets_ddp);
8544                 sbuf_printf(sb, "\nframesDDP:  %16u %16u",
8545                     stats[0].frames_ddp, stats[1].frames_ddp);
8546                 sbuf_printf(sb, "\nframesDrop: %16u %16u",
8547                     stats[0].frames_drop, stats[1].frames_drop);
8548         }
8549
8550         rc = sbuf_finish(sb);
8551         sbuf_delete(sb);
8552
8553         return (rc);
8554 }
8555
8556 static int
8557 sysctl_hw_sched(SYSCTL_HANDLER_ARGS)
8558 {
8559         struct adapter *sc = arg1;
8560         struct sbuf *sb;
8561         int rc, i;
8562         unsigned int map, kbps, ipg, mode;
8563         unsigned int pace_tab[NTX_SCHED];
8564
8565         rc = sysctl_wire_old_buffer(req, 0);
8566         if (rc != 0)
8567                 return (rc);
8568
8569         sb = sbuf_new_for_sysctl(NULL, NULL, 256, req);
8570         if (sb == NULL)
8571                 return (ENOMEM);
8572
8573         map = t4_read_reg(sc, A_TP_TX_MOD_QUEUE_REQ_MAP);
8574         mode = G_TIMERMODE(t4_read_reg(sc, A_TP_MOD_CONFIG));
8575         t4_read_pace_tbl(sc, pace_tab);
8576
8577         sbuf_printf(sb, "Scheduler  Mode   Channel  Rate (Kbps)   "
8578             "Class IPG (0.1 ns)   Flow IPG (us)");
8579
8580         for (i = 0; i < NTX_SCHED; ++i, map >>= 2) {
8581                 t4_get_tx_sched(sc, i, &kbps, &ipg, 1);
8582                 sbuf_printf(sb, "\n    %u      %-5s     %u     ", i,
8583                     (mode & (1 << i)) ? "flow" : "class", map & 3);
8584                 if (kbps)
8585                         sbuf_printf(sb, "%9u     ", kbps);
8586                 else
8587                         sbuf_printf(sb, " disabled     ");
8588
8589                 if (ipg)
8590                         sbuf_printf(sb, "%13u        ", ipg);
8591                 else
8592                         sbuf_printf(sb, "     disabled        ");
8593
8594                 if (pace_tab[i])
8595                         sbuf_printf(sb, "%10u", pace_tab[i]);
8596                 else
8597                         sbuf_printf(sb, "  disabled");
8598         }
8599
8600         rc = sbuf_finish(sb);
8601         sbuf_delete(sb);
8602
8603         return (rc);
8604 }
8605
8606 static int
8607 sysctl_lb_stats(SYSCTL_HANDLER_ARGS)
8608 {
8609         struct adapter *sc = arg1;
8610         struct sbuf *sb;
8611         int rc, i, j;
8612         uint64_t *p0, *p1;
8613         struct lb_port_stats s[2];
8614         static const char *stat_name[] = {
8615                 "OctetsOK:", "FramesOK:", "BcastFrames:", "McastFrames:",
8616                 "UcastFrames:", "ErrorFrames:", "Frames64:", "Frames65To127:",
8617                 "Frames128To255:", "Frames256To511:", "Frames512To1023:",
8618                 "Frames1024To1518:", "Frames1519ToMax:", "FramesDropped:",
8619                 "BG0FramesDropped:", "BG1FramesDropped:", "BG2FramesDropped:",
8620                 "BG3FramesDropped:", "BG0FramesTrunc:", "BG1FramesTrunc:",
8621                 "BG2FramesTrunc:", "BG3FramesTrunc:"
8622         };
8623
8624         rc = sysctl_wire_old_buffer(req, 0);
8625         if (rc != 0)
8626                 return (rc);
8627
8628         sb = sbuf_new_for_sysctl(NULL, NULL, 4096, req);
8629         if (sb == NULL)
8630                 return (ENOMEM);
8631
8632         memset(s, 0, sizeof(s));
8633
8634         for (i = 0; i < sc->chip_params->nchan; i += 2) {
8635                 t4_get_lb_stats(sc, i, &s[0]);
8636                 t4_get_lb_stats(sc, i + 1, &s[1]);
8637
8638                 p0 = &s[0].octets;
8639                 p1 = &s[1].octets;
8640                 sbuf_printf(sb, "%s                       Loopback %u"
8641                     "           Loopback %u", i == 0 ? "" : "\n", i, i + 1);
8642
8643                 for (j = 0; j < nitems(stat_name); j++)
8644                         sbuf_printf(sb, "\n%-17s %20ju %20ju", stat_name[j],
8645                                    *p0++, *p1++);
8646         }
8647
8648         rc = sbuf_finish(sb);
8649         sbuf_delete(sb);
8650
8651         return (rc);
8652 }
8653
8654 static int
8655 sysctl_linkdnrc(SYSCTL_HANDLER_ARGS)
8656 {
8657         int rc = 0;
8658         struct port_info *pi = arg1;
8659         struct link_config *lc = &pi->link_cfg;
8660         struct sbuf *sb;
8661
8662         rc = sysctl_wire_old_buffer(req, 0);
8663         if (rc != 0)
8664                 return(rc);
8665         sb = sbuf_new_for_sysctl(NULL, NULL, 64, req);
8666         if (sb == NULL)
8667                 return (ENOMEM);
8668
8669         if (lc->link_ok || lc->link_down_rc == 255)
8670                 sbuf_printf(sb, "n/a");
8671         else
8672                 sbuf_printf(sb, "%s", t4_link_down_rc_str(lc->link_down_rc));
8673
8674         rc = sbuf_finish(sb);
8675         sbuf_delete(sb);
8676
8677         return (rc);
8678 }
8679
8680 struct mem_desc {
8681         unsigned int base;
8682         unsigned int limit;
8683         unsigned int idx;
8684 };
8685
8686 static int
8687 mem_desc_cmp(const void *a, const void *b)
8688 {
8689         return ((const struct mem_desc *)a)->base -
8690                ((const struct mem_desc *)b)->base;
8691 }
8692
8693 static void
8694 mem_region_show(struct sbuf *sb, const char *name, unsigned int from,
8695     unsigned int to)
8696 {
8697         unsigned int size;
8698
8699         if (from == to)
8700                 return;
8701
8702         size = to - from + 1;
8703         if (size == 0)
8704                 return;
8705
8706         /* XXX: need humanize_number(3) in libkern for a more readable 'size' */
8707         sbuf_printf(sb, "%-15s %#x-%#x [%u]\n", name, from, to, size);
8708 }
8709
8710 static int
8711 sysctl_meminfo(SYSCTL_HANDLER_ARGS)
8712 {
8713         struct adapter *sc = arg1;
8714         struct sbuf *sb;
8715         int rc, i, n;
8716         uint32_t lo, hi, used, alloc;
8717         static const char *memory[] = {"EDC0:", "EDC1:", "MC:", "MC0:", "MC1:"};
8718         static const char *region[] = {
8719                 "DBQ contexts:", "IMSG contexts:", "FLM cache:", "TCBs:",
8720                 "Pstructs:", "Timers:", "Rx FL:", "Tx FL:", "Pstruct FL:",
8721                 "Tx payload:", "Rx payload:", "LE hash:", "iSCSI region:",
8722                 "TDDP region:", "TPT region:", "STAG region:", "RQ region:",
8723                 "RQUDP region:", "PBL region:", "TXPBL region:",
8724                 "DBVFIFO region:", "ULPRX state:", "ULPTX state:",
8725                 "On-chip queues:", "TLS keys:",
8726         };
8727         struct mem_desc avail[4];
8728         struct mem_desc mem[nitems(region) + 3];        /* up to 3 holes */
8729         struct mem_desc *md = mem;
8730
8731         rc = sysctl_wire_old_buffer(req, 0);
8732         if (rc != 0)
8733                 return (rc);
8734
8735         sb = sbuf_new_for_sysctl(NULL, NULL, 4096, req);
8736         if (sb == NULL)
8737                 return (ENOMEM);
8738
8739         for (i = 0; i < nitems(mem); i++) {
8740                 mem[i].limit = 0;
8741                 mem[i].idx = i;
8742         }
8743
8744         /* Find and sort the populated memory ranges */
8745         i = 0;
8746         lo = t4_read_reg(sc, A_MA_TARGET_MEM_ENABLE);
8747         if (lo & F_EDRAM0_ENABLE) {
8748                 hi = t4_read_reg(sc, A_MA_EDRAM0_BAR);
8749                 avail[i].base = G_EDRAM0_BASE(hi) << 20;
8750                 avail[i].limit = avail[i].base + (G_EDRAM0_SIZE(hi) << 20);
8751                 avail[i].idx = 0;
8752                 i++;
8753         }
8754         if (lo & F_EDRAM1_ENABLE) {
8755                 hi = t4_read_reg(sc, A_MA_EDRAM1_BAR);
8756                 avail[i].base = G_EDRAM1_BASE(hi) << 20;
8757                 avail[i].limit = avail[i].base + (G_EDRAM1_SIZE(hi) << 20);
8758                 avail[i].idx = 1;
8759                 i++;
8760         }
8761         if (lo & F_EXT_MEM_ENABLE) {
8762                 hi = t4_read_reg(sc, A_MA_EXT_MEMORY_BAR);
8763                 avail[i].base = G_EXT_MEM_BASE(hi) << 20;
8764                 avail[i].limit = avail[i].base +
8765                     (G_EXT_MEM_SIZE(hi) << 20);
8766                 avail[i].idx = is_t5(sc) ? 3 : 2;       /* Call it MC0 for T5 */
8767                 i++;
8768         }
8769         if (is_t5(sc) && lo & F_EXT_MEM1_ENABLE) {
8770                 hi = t4_read_reg(sc, A_MA_EXT_MEMORY1_BAR);
8771                 avail[i].base = G_EXT_MEM1_BASE(hi) << 20;
8772                 avail[i].limit = avail[i].base +
8773                     (G_EXT_MEM1_SIZE(hi) << 20);
8774                 avail[i].idx = 4;
8775                 i++;
8776         }
8777         if (!i)                                    /* no memory available */
8778                 return 0;
8779         qsort(avail, i, sizeof(struct mem_desc), mem_desc_cmp);
8780
8781         (md++)->base = t4_read_reg(sc, A_SGE_DBQ_CTXT_BADDR);
8782         (md++)->base = t4_read_reg(sc, A_SGE_IMSG_CTXT_BADDR);
8783         (md++)->base = t4_read_reg(sc, A_SGE_FLM_CACHE_BADDR);
8784         (md++)->base = t4_read_reg(sc, A_TP_CMM_TCB_BASE);
8785         (md++)->base = t4_read_reg(sc, A_TP_CMM_MM_BASE);
8786         (md++)->base = t4_read_reg(sc, A_TP_CMM_TIMER_BASE);
8787         (md++)->base = t4_read_reg(sc, A_TP_CMM_MM_RX_FLST_BASE);
8788         (md++)->base = t4_read_reg(sc, A_TP_CMM_MM_TX_FLST_BASE);
8789         (md++)->base = t4_read_reg(sc, A_TP_CMM_MM_PS_FLST_BASE);
8790
8791         /* the next few have explicit upper bounds */
8792         md->base = t4_read_reg(sc, A_TP_PMM_TX_BASE);
8793         md->limit = md->base - 1 +
8794                     t4_read_reg(sc, A_TP_PMM_TX_PAGE_SIZE) *
8795                     G_PMTXMAXPAGE(t4_read_reg(sc, A_TP_PMM_TX_MAX_PAGE));
8796         md++;
8797
8798         md->base = t4_read_reg(sc, A_TP_PMM_RX_BASE);
8799         md->limit = md->base - 1 +
8800                     t4_read_reg(sc, A_TP_PMM_RX_PAGE_SIZE) *
8801                     G_PMRXMAXPAGE(t4_read_reg(sc, A_TP_PMM_RX_MAX_PAGE));
8802         md++;
8803
8804         if (t4_read_reg(sc, A_LE_DB_CONFIG) & F_HASHEN) {
8805                 if (chip_id(sc) <= CHELSIO_T5)
8806                         md->base = t4_read_reg(sc, A_LE_DB_HASH_TID_BASE);
8807                 else
8808                         md->base = t4_read_reg(sc, A_LE_DB_HASH_TBL_BASE_ADDR);
8809                 md->limit = 0;
8810         } else {
8811                 md->base = 0;
8812                 md->idx = nitems(region);  /* hide it */
8813         }
8814         md++;
8815
8816 #define ulp_region(reg) \
8817         md->base = t4_read_reg(sc, A_ULP_ ## reg ## _LLIMIT);\
8818         (md++)->limit = t4_read_reg(sc, A_ULP_ ## reg ## _ULIMIT)
8819
8820         ulp_region(RX_ISCSI);
8821         ulp_region(RX_TDDP);
8822         ulp_region(TX_TPT);
8823         ulp_region(RX_STAG);
8824         ulp_region(RX_RQ);
8825         ulp_region(RX_RQUDP);
8826         ulp_region(RX_PBL);
8827         ulp_region(TX_PBL);
8828 #undef ulp_region
8829
8830         md->base = 0;
8831         md->idx = nitems(region);
8832         if (!is_t4(sc)) {
8833                 uint32_t size = 0;
8834                 uint32_t sge_ctrl = t4_read_reg(sc, A_SGE_CONTROL2);
8835                 uint32_t fifo_size = t4_read_reg(sc, A_SGE_DBVFIFO_SIZE);
8836
8837                 if (is_t5(sc)) {
8838                         if (sge_ctrl & F_VFIFO_ENABLE)
8839                                 size = G_DBVFIFO_SIZE(fifo_size);
8840                 } else
8841                         size = G_T6_DBVFIFO_SIZE(fifo_size);
8842
8843                 if (size) {
8844                         md->base = G_BASEADDR(t4_read_reg(sc,
8845                             A_SGE_DBVFIFO_BADDR));
8846                         md->limit = md->base + (size << 2) - 1;
8847                 }
8848         }
8849         md++;
8850
8851         md->base = t4_read_reg(sc, A_ULP_RX_CTX_BASE);
8852         md->limit = 0;
8853         md++;
8854         md->base = t4_read_reg(sc, A_ULP_TX_ERR_TABLE_BASE);
8855         md->limit = 0;
8856         md++;
8857
8858         md->base = sc->vres.ocq.start;
8859         if (sc->vres.ocq.size)
8860                 md->limit = md->base + sc->vres.ocq.size - 1;
8861         else
8862                 md->idx = nitems(region);  /* hide it */
8863         md++;
8864
8865         md->base = sc->vres.key.start;
8866         if (sc->vres.key.size)
8867                 md->limit = md->base + sc->vres.key.size - 1;
8868         else
8869                 md->idx = nitems(region);  /* hide it */
8870         md++;
8871
8872         /* add any address-space holes, there can be up to 3 */
8873         for (n = 0; n < i - 1; n++)
8874                 if (avail[n].limit < avail[n + 1].base)
8875                         (md++)->base = avail[n].limit;
8876         if (avail[n].limit)
8877                 (md++)->base = avail[n].limit;
8878
8879         n = md - mem;
8880         qsort(mem, n, sizeof(struct mem_desc), mem_desc_cmp);
8881
8882         for (lo = 0; lo < i; lo++)
8883                 mem_region_show(sb, memory[avail[lo].idx], avail[lo].base,
8884                                 avail[lo].limit - 1);
8885
8886         sbuf_printf(sb, "\n");
8887         for (i = 0; i < n; i++) {
8888                 if (mem[i].idx >= nitems(region))
8889                         continue;                        /* skip holes */
8890                 if (!mem[i].limit)
8891                         mem[i].limit = i < n - 1 ? mem[i + 1].base - 1 : ~0;
8892                 mem_region_show(sb, region[mem[i].idx], mem[i].base,
8893                                 mem[i].limit);
8894         }
8895
8896         sbuf_printf(sb, "\n");
8897         lo = t4_read_reg(sc, A_CIM_SDRAM_BASE_ADDR);
8898         hi = t4_read_reg(sc, A_CIM_SDRAM_ADDR_SIZE) + lo - 1;
8899         mem_region_show(sb, "uP RAM:", lo, hi);
8900
8901         lo = t4_read_reg(sc, A_CIM_EXTMEM2_BASE_ADDR);
8902         hi = t4_read_reg(sc, A_CIM_EXTMEM2_ADDR_SIZE) + lo - 1;
8903         mem_region_show(sb, "uP Extmem2:", lo, hi);
8904
8905         lo = t4_read_reg(sc, A_TP_PMM_RX_MAX_PAGE);
8906         sbuf_printf(sb, "\n%u Rx pages of size %uKiB for %u channels\n",
8907                    G_PMRXMAXPAGE(lo),
8908                    t4_read_reg(sc, A_TP_PMM_RX_PAGE_SIZE) >> 10,
8909                    (lo & F_PMRXNUMCHN) ? 2 : 1);
8910
8911         lo = t4_read_reg(sc, A_TP_PMM_TX_MAX_PAGE);
8912         hi = t4_read_reg(sc, A_TP_PMM_TX_PAGE_SIZE);
8913         sbuf_printf(sb, "%u Tx pages of size %u%ciB for %u channels\n",
8914                    G_PMTXMAXPAGE(lo),
8915                    hi >= (1 << 20) ? (hi >> 20) : (hi >> 10),
8916                    hi >= (1 << 20) ? 'M' : 'K', 1 << G_PMTXNUMCHN(lo));
8917         sbuf_printf(sb, "%u p-structs\n",
8918                    t4_read_reg(sc, A_TP_CMM_MM_MAX_PSTRUCT));
8919
8920         for (i = 0; i < 4; i++) {
8921                 if (chip_id(sc) > CHELSIO_T5)
8922                         lo = t4_read_reg(sc, A_MPS_RX_MAC_BG_PG_CNT0 + i * 4);
8923                 else
8924                         lo = t4_read_reg(sc, A_MPS_RX_PG_RSV0 + i * 4);
8925                 if (is_t5(sc)) {
8926                         used = G_T5_USED(lo);
8927                         alloc = G_T5_ALLOC(lo);
8928                 } else {
8929                         used = G_USED(lo);
8930                         alloc = G_ALLOC(lo);
8931                 }
8932                 /* For T6 these are MAC buffer groups */
8933                 sbuf_printf(sb, "\nPort %d using %u pages out of %u allocated",
8934                     i, used, alloc);
8935         }
8936         for (i = 0; i < sc->chip_params->nchan; i++) {
8937                 if (chip_id(sc) > CHELSIO_T5)
8938                         lo = t4_read_reg(sc, A_MPS_RX_LPBK_BG_PG_CNT0 + i * 4);
8939                 else
8940                         lo = t4_read_reg(sc, A_MPS_RX_PG_RSV4 + i * 4);
8941                 if (is_t5(sc)) {
8942                         used = G_T5_USED(lo);
8943                         alloc = G_T5_ALLOC(lo);
8944                 } else {
8945                         used = G_USED(lo);
8946                         alloc = G_ALLOC(lo);
8947                 }
8948                 /* For T6 these are MAC buffer groups */
8949                 sbuf_printf(sb,
8950                     "\nLoopback %d using %u pages out of %u allocated",
8951                     i, used, alloc);
8952         }
8953
8954         rc = sbuf_finish(sb);
8955         sbuf_delete(sb);
8956
8957         return (rc);
8958 }
8959
8960 static inline void
8961 tcamxy2valmask(uint64_t x, uint64_t y, uint8_t *addr, uint64_t *mask)
8962 {
8963         *mask = x | y;
8964         y = htobe64(y);
8965         memcpy(addr, (char *)&y + 2, ETHER_ADDR_LEN);
8966 }
8967
8968 static int
8969 sysctl_mps_tcam(SYSCTL_HANDLER_ARGS)
8970 {
8971         struct adapter *sc = arg1;
8972         struct sbuf *sb;
8973         int rc, i;
8974
8975         MPASS(chip_id(sc) <= CHELSIO_T5);
8976
8977         rc = sysctl_wire_old_buffer(req, 0);
8978         if (rc != 0)
8979                 return (rc);
8980
8981         sb = sbuf_new_for_sysctl(NULL, NULL, 4096, req);
8982         if (sb == NULL)
8983                 return (ENOMEM);
8984
8985         sbuf_printf(sb,
8986             "Idx  Ethernet address     Mask     Vld Ports PF"
8987             "  VF              Replication             P0 P1 P2 P3  ML");
8988         for (i = 0; i < sc->chip_params->mps_tcam_size; i++) {
8989                 uint64_t tcamx, tcamy, mask;
8990                 uint32_t cls_lo, cls_hi;
8991                 uint8_t addr[ETHER_ADDR_LEN];
8992
8993                 tcamy = t4_read_reg64(sc, MPS_CLS_TCAM_Y_L(i));
8994                 tcamx = t4_read_reg64(sc, MPS_CLS_TCAM_X_L(i));
8995                 if (tcamx & tcamy)
8996                         continue;
8997                 tcamxy2valmask(tcamx, tcamy, addr, &mask);
8998                 cls_lo = t4_read_reg(sc, MPS_CLS_SRAM_L(i));
8999                 cls_hi = t4_read_reg(sc, MPS_CLS_SRAM_H(i));
9000                 sbuf_printf(sb, "\n%3u %02x:%02x:%02x:%02x:%02x:%02x %012jx"
9001                            "  %c   %#x%4u%4d", i, addr[0], addr[1], addr[2],
9002                            addr[3], addr[4], addr[5], (uintmax_t)mask,
9003                            (cls_lo & F_SRAM_VLD) ? 'Y' : 'N',
9004                            G_PORTMAP(cls_hi), G_PF(cls_lo),
9005                            (cls_lo & F_VF_VALID) ? G_VF(cls_lo) : -1);
9006
9007                 if (cls_lo & F_REPLICATE) {
9008                         struct fw_ldst_cmd ldst_cmd;
9009
9010                         memset(&ldst_cmd, 0, sizeof(ldst_cmd));
9011                         ldst_cmd.op_to_addrspace =
9012                             htobe32(V_FW_CMD_OP(FW_LDST_CMD) |
9013                                 F_FW_CMD_REQUEST | F_FW_CMD_READ |
9014                                 V_FW_LDST_CMD_ADDRSPACE(FW_LDST_ADDRSPC_MPS));
9015                         ldst_cmd.cycles_to_len16 = htobe32(FW_LEN16(ldst_cmd));
9016                         ldst_cmd.u.mps.rplc.fid_idx =
9017                             htobe16(V_FW_LDST_CMD_FID(FW_LDST_MPS_RPLC) |
9018                                 V_FW_LDST_CMD_IDX(i));
9019
9020                         rc = begin_synchronized_op(sc, NULL, SLEEP_OK | INTR_OK,
9021                             "t4mps");
9022                         if (rc)
9023                                 break;
9024                         rc = -t4_wr_mbox(sc, sc->mbox, &ldst_cmd,
9025                             sizeof(ldst_cmd), &ldst_cmd);
9026                         end_synchronized_op(sc, 0);
9027
9028                         if (rc != 0) {
9029                                 sbuf_printf(sb, "%36d", rc);
9030                                 rc = 0;
9031                         } else {
9032                                 sbuf_printf(sb, " %08x %08x %08x %08x",
9033                                     be32toh(ldst_cmd.u.mps.rplc.rplc127_96),
9034                                     be32toh(ldst_cmd.u.mps.rplc.rplc95_64),
9035                                     be32toh(ldst_cmd.u.mps.rplc.rplc63_32),
9036                                     be32toh(ldst_cmd.u.mps.rplc.rplc31_0));
9037                         }
9038                 } else
9039                         sbuf_printf(sb, "%36s", "");
9040
9041                 sbuf_printf(sb, "%4u%3u%3u%3u %#3x", G_SRAM_PRIO0(cls_lo),
9042                     G_SRAM_PRIO1(cls_lo), G_SRAM_PRIO2(cls_lo),
9043                     G_SRAM_PRIO3(cls_lo), (cls_lo >> S_MULTILISTEN0) & 0xf);
9044         }
9045
9046         if (rc)
9047                 (void) sbuf_finish(sb);
9048         else
9049                 rc = sbuf_finish(sb);
9050         sbuf_delete(sb);
9051
9052         return (rc);
9053 }
9054
9055 static int
9056 sysctl_mps_tcam_t6(SYSCTL_HANDLER_ARGS)
9057 {
9058         struct adapter *sc = arg1;
9059         struct sbuf *sb;
9060         int rc, i;
9061
9062         MPASS(chip_id(sc) > CHELSIO_T5);
9063
9064         rc = sysctl_wire_old_buffer(req, 0);
9065         if (rc != 0)
9066                 return (rc);
9067
9068         sb = sbuf_new_for_sysctl(NULL, NULL, 4096, req);
9069         if (sb == NULL)
9070                 return (ENOMEM);
9071
9072         sbuf_printf(sb, "Idx  Ethernet address     Mask       VNI   Mask"
9073             "   IVLAN Vld DIP_Hit   Lookup  Port Vld Ports PF  VF"
9074             "                           Replication"
9075             "                                    P0 P1 P2 P3  ML\n");
9076
9077         for (i = 0; i < sc->chip_params->mps_tcam_size; i++) {
9078                 uint8_t dip_hit, vlan_vld, lookup_type, port_num;
9079                 uint16_t ivlan;
9080                 uint64_t tcamx, tcamy, val, mask;
9081                 uint32_t cls_lo, cls_hi, ctl, data2, vnix, vniy;
9082                 uint8_t addr[ETHER_ADDR_LEN];
9083
9084                 ctl = V_CTLREQID(1) | V_CTLCMDTYPE(0) | V_CTLXYBITSEL(0);
9085                 if (i < 256)
9086                         ctl |= V_CTLTCAMINDEX(i) | V_CTLTCAMSEL(0);
9087                 else
9088                         ctl |= V_CTLTCAMINDEX(i - 256) | V_CTLTCAMSEL(1);
9089                 t4_write_reg(sc, A_MPS_CLS_TCAM_DATA2_CTL, ctl);
9090                 val = t4_read_reg(sc, A_MPS_CLS_TCAM_RDATA1_REQ_ID1);
9091                 tcamy = G_DMACH(val) << 32;
9092                 tcamy |= t4_read_reg(sc, A_MPS_CLS_TCAM_RDATA0_REQ_ID1);
9093                 data2 = t4_read_reg(sc, A_MPS_CLS_TCAM_RDATA2_REQ_ID1);
9094                 lookup_type = G_DATALKPTYPE(data2);
9095                 port_num = G_DATAPORTNUM(data2);
9096                 if (lookup_type && lookup_type != M_DATALKPTYPE) {
9097                         /* Inner header VNI */
9098                         vniy = ((data2 & F_DATAVIDH2) << 23) |
9099                                        (G_DATAVIDH1(data2) << 16) | G_VIDL(val);
9100                         dip_hit = data2 & F_DATADIPHIT;
9101                         vlan_vld = 0;
9102                 } else {
9103                         vniy = 0;
9104                         dip_hit = 0;
9105                         vlan_vld = data2 & F_DATAVIDH2;
9106                         ivlan = G_VIDL(val);
9107                 }
9108
9109                 ctl |= V_CTLXYBITSEL(1);
9110                 t4_write_reg(sc, A_MPS_CLS_TCAM_DATA2_CTL, ctl);
9111                 val = t4_read_reg(sc, A_MPS_CLS_TCAM_RDATA1_REQ_ID1);
9112                 tcamx = G_DMACH(val) << 32;
9113                 tcamx |= t4_read_reg(sc, A_MPS_CLS_TCAM_RDATA0_REQ_ID1);
9114                 data2 = t4_read_reg(sc, A_MPS_CLS_TCAM_RDATA2_REQ_ID1);
9115                 if (lookup_type && lookup_type != M_DATALKPTYPE) {
9116                         /* Inner header VNI mask */
9117                         vnix = ((data2 & F_DATAVIDH2) << 23) |
9118                                (G_DATAVIDH1(data2) << 16) | G_VIDL(val);
9119                 } else
9120                         vnix = 0;
9121
9122                 if (tcamx & tcamy)
9123                         continue;
9124                 tcamxy2valmask(tcamx, tcamy, addr, &mask);
9125
9126                 cls_lo = t4_read_reg(sc, MPS_CLS_SRAM_L(i));
9127                 cls_hi = t4_read_reg(sc, MPS_CLS_SRAM_H(i));
9128
9129                 if (lookup_type && lookup_type != M_DATALKPTYPE) {
9130                         sbuf_printf(sb, "\n%3u %02x:%02x:%02x:%02x:%02x:%02x "
9131                             "%012jx %06x %06x    -    -   %3c"
9132                             "      'I'  %4x   %3c   %#x%4u%4d", i, addr[0],
9133                             addr[1], addr[2], addr[3], addr[4], addr[5],
9134                             (uintmax_t)mask, vniy, vnix, dip_hit ? 'Y' : 'N',
9135                             port_num, cls_lo & F_T6_SRAM_VLD ? 'Y' : 'N',
9136                             G_PORTMAP(cls_hi), G_T6_PF(cls_lo),
9137                             cls_lo & F_T6_VF_VALID ? G_T6_VF(cls_lo) : -1);
9138                 } else {
9139                         sbuf_printf(sb, "\n%3u %02x:%02x:%02x:%02x:%02x:%02x "
9140                             "%012jx    -       -   ", i, addr[0], addr[1],
9141                             addr[2], addr[3], addr[4], addr[5],
9142                             (uintmax_t)mask);
9143
9144                         if (vlan_vld)
9145                                 sbuf_printf(sb, "%4u   Y     ", ivlan);
9146                         else
9147                                 sbuf_printf(sb, "  -    N     ");
9148
9149                         sbuf_printf(sb, "-      %3c  %4x   %3c   %#x%4u%4d",
9150                             lookup_type ? 'I' : 'O', port_num,
9151                             cls_lo & F_T6_SRAM_VLD ? 'Y' : 'N',
9152                             G_PORTMAP(cls_hi), G_T6_PF(cls_lo),
9153                             cls_lo & F_T6_VF_VALID ? G_T6_VF(cls_lo) : -1);
9154                 }
9155
9156
9157                 if (cls_lo & F_T6_REPLICATE) {
9158                         struct fw_ldst_cmd ldst_cmd;
9159
9160                         memset(&ldst_cmd, 0, sizeof(ldst_cmd));
9161                         ldst_cmd.op_to_addrspace =
9162                             htobe32(V_FW_CMD_OP(FW_LDST_CMD) |
9163                                 F_FW_CMD_REQUEST | F_FW_CMD_READ |
9164                                 V_FW_LDST_CMD_ADDRSPACE(FW_LDST_ADDRSPC_MPS));
9165                         ldst_cmd.cycles_to_len16 = htobe32(FW_LEN16(ldst_cmd));
9166                         ldst_cmd.u.mps.rplc.fid_idx =
9167                             htobe16(V_FW_LDST_CMD_FID(FW_LDST_MPS_RPLC) |
9168                                 V_FW_LDST_CMD_IDX(i));
9169
9170                         rc = begin_synchronized_op(sc, NULL, SLEEP_OK | INTR_OK,
9171                             "t6mps");
9172                         if (rc)
9173                                 break;
9174                         rc = -t4_wr_mbox(sc, sc->mbox, &ldst_cmd,
9175                             sizeof(ldst_cmd), &ldst_cmd);
9176                         end_synchronized_op(sc, 0);
9177
9178                         if (rc != 0) {
9179                                 sbuf_printf(sb, "%72d", rc);
9180                                 rc = 0;
9181                         } else {
9182                                 sbuf_printf(sb, " %08x %08x %08x %08x"
9183                                     " %08x %08x %08x %08x",
9184                                     be32toh(ldst_cmd.u.mps.rplc.rplc255_224),
9185                                     be32toh(ldst_cmd.u.mps.rplc.rplc223_192),
9186                                     be32toh(ldst_cmd.u.mps.rplc.rplc191_160),
9187                                     be32toh(ldst_cmd.u.mps.rplc.rplc159_128),
9188                                     be32toh(ldst_cmd.u.mps.rplc.rplc127_96),
9189                                     be32toh(ldst_cmd.u.mps.rplc.rplc95_64),
9190                                     be32toh(ldst_cmd.u.mps.rplc.rplc63_32),
9191                                     be32toh(ldst_cmd.u.mps.rplc.rplc31_0));
9192                         }
9193                 } else
9194                         sbuf_printf(sb, "%72s", "");
9195
9196                 sbuf_printf(sb, "%4u%3u%3u%3u %#x",
9197                     G_T6_SRAM_PRIO0(cls_lo), G_T6_SRAM_PRIO1(cls_lo),
9198                     G_T6_SRAM_PRIO2(cls_lo), G_T6_SRAM_PRIO3(cls_lo),
9199                     (cls_lo >> S_T6_MULTILISTEN0) & 0xf);
9200         }
9201
9202         if (rc)
9203                 (void) sbuf_finish(sb);
9204         else
9205                 rc = sbuf_finish(sb);
9206         sbuf_delete(sb);
9207
9208         return (rc);
9209 }
9210
9211 static int
9212 sysctl_path_mtus(SYSCTL_HANDLER_ARGS)
9213 {
9214         struct adapter *sc = arg1;
9215         struct sbuf *sb;
9216         int rc;
9217         uint16_t mtus[NMTUS];
9218
9219         rc = sysctl_wire_old_buffer(req, 0);
9220         if (rc != 0)
9221                 return (rc);
9222
9223         sb = sbuf_new_for_sysctl(NULL, NULL, 256, req);
9224         if (sb == NULL)
9225                 return (ENOMEM);
9226
9227         t4_read_mtu_tbl(sc, mtus, NULL);
9228
9229         sbuf_printf(sb, "%u %u %u %u %u %u %u %u %u %u %u %u %u %u %u %u",
9230             mtus[0], mtus[1], mtus[2], mtus[3], mtus[4], mtus[5], mtus[6],
9231             mtus[7], mtus[8], mtus[9], mtus[10], mtus[11], mtus[12], mtus[13],
9232             mtus[14], mtus[15]);
9233
9234         rc = sbuf_finish(sb);
9235         sbuf_delete(sb);
9236
9237         return (rc);
9238 }
9239
9240 static int
9241 sysctl_pm_stats(SYSCTL_HANDLER_ARGS)
9242 {
9243         struct adapter *sc = arg1;
9244         struct sbuf *sb;
9245         int rc, i;
9246         uint32_t tx_cnt[MAX_PM_NSTATS], rx_cnt[MAX_PM_NSTATS];
9247         uint64_t tx_cyc[MAX_PM_NSTATS], rx_cyc[MAX_PM_NSTATS];
9248         static const char *tx_stats[MAX_PM_NSTATS] = {
9249                 "Read:", "Write bypass:", "Write mem:", "Bypass + mem:",
9250                 "Tx FIFO wait", NULL, "Tx latency"
9251         };
9252         static const char *rx_stats[MAX_PM_NSTATS] = {
9253                 "Read:", "Write bypass:", "Write mem:", "Flush:",
9254                 "Rx FIFO wait", NULL, "Rx latency"
9255         };
9256
9257         rc = sysctl_wire_old_buffer(req, 0);
9258         if (rc != 0)
9259                 return (rc);
9260
9261         sb = sbuf_new_for_sysctl(NULL, NULL, 256, req);
9262         if (sb == NULL)
9263                 return (ENOMEM);
9264
9265         t4_pmtx_get_stats(sc, tx_cnt, tx_cyc);
9266         t4_pmrx_get_stats(sc, rx_cnt, rx_cyc);
9267
9268         sbuf_printf(sb, "                Tx pcmds             Tx bytes");
9269         for (i = 0; i < 4; i++) {
9270                 sbuf_printf(sb, "\n%-13s %10u %20ju", tx_stats[i], tx_cnt[i],
9271                     tx_cyc[i]);
9272         }
9273
9274         sbuf_printf(sb, "\n                Rx pcmds             Rx bytes");
9275         for (i = 0; i < 4; i++) {
9276                 sbuf_printf(sb, "\n%-13s %10u %20ju", rx_stats[i], rx_cnt[i],
9277                     rx_cyc[i]);
9278         }
9279
9280         if (chip_id(sc) > CHELSIO_T5) {
9281                 sbuf_printf(sb,
9282                     "\n              Total wait      Total occupancy");
9283                 sbuf_printf(sb, "\n%-13s %10u %20ju", tx_stats[i], tx_cnt[i],
9284                     tx_cyc[i]);
9285                 sbuf_printf(sb, "\n%-13s %10u %20ju", rx_stats[i], rx_cnt[i],
9286                     rx_cyc[i]);
9287
9288                 i += 2;
9289                 MPASS(i < nitems(tx_stats));
9290
9291                 sbuf_printf(sb,
9292                     "\n                   Reads           Total wait");
9293                 sbuf_printf(sb, "\n%-13s %10u %20ju", tx_stats[i], tx_cnt[i],
9294                     tx_cyc[i]);
9295                 sbuf_printf(sb, "\n%-13s %10u %20ju", rx_stats[i], rx_cnt[i],
9296                     rx_cyc[i]);
9297         }
9298
9299         rc = sbuf_finish(sb);
9300         sbuf_delete(sb);
9301
9302         return (rc);
9303 }
9304
9305 static int
9306 sysctl_rdma_stats(SYSCTL_HANDLER_ARGS)
9307 {
9308         struct adapter *sc = arg1;
9309         struct sbuf *sb;
9310         int rc;
9311         struct tp_rdma_stats stats;
9312
9313         rc = sysctl_wire_old_buffer(req, 0);
9314         if (rc != 0)
9315                 return (rc);
9316
9317         sb = sbuf_new_for_sysctl(NULL, NULL, 256, req);
9318         if (sb == NULL)
9319                 return (ENOMEM);
9320
9321         mtx_lock(&sc->reg_lock);
9322         t4_tp_get_rdma_stats(sc, &stats, 0);
9323         mtx_unlock(&sc->reg_lock);
9324
9325         sbuf_printf(sb, "NoRQEModDefferals: %u\n", stats.rqe_dfr_mod);
9326         sbuf_printf(sb, "NoRQEPktDefferals: %u", stats.rqe_dfr_pkt);
9327
9328         rc = sbuf_finish(sb);
9329         sbuf_delete(sb);
9330
9331         return (rc);
9332 }
9333
9334 static int
9335 sysctl_tcp_stats(SYSCTL_HANDLER_ARGS)
9336 {
9337         struct adapter *sc = arg1;
9338         struct sbuf *sb;
9339         int rc;
9340         struct tp_tcp_stats v4, v6;
9341
9342         rc = sysctl_wire_old_buffer(req, 0);
9343         if (rc != 0)
9344                 return (rc);
9345
9346         sb = sbuf_new_for_sysctl(NULL, NULL, 256, req);
9347         if (sb == NULL)
9348                 return (ENOMEM);
9349
9350         mtx_lock(&sc->reg_lock);
9351         t4_tp_get_tcp_stats(sc, &v4, &v6, 0);
9352         mtx_unlock(&sc->reg_lock);
9353
9354         sbuf_printf(sb,
9355             "                                IP                 IPv6\n");
9356         sbuf_printf(sb, "OutRsts:      %20u %20u\n",
9357             v4.tcp_out_rsts, v6.tcp_out_rsts);
9358         sbuf_printf(sb, "InSegs:       %20ju %20ju\n",
9359             v4.tcp_in_segs, v6.tcp_in_segs);
9360         sbuf_printf(sb, "OutSegs:      %20ju %20ju\n",
9361             v4.tcp_out_segs, v6.tcp_out_segs);
9362         sbuf_printf(sb, "RetransSegs:  %20ju %20ju",
9363             v4.tcp_retrans_segs, v6.tcp_retrans_segs);
9364
9365         rc = sbuf_finish(sb);
9366         sbuf_delete(sb);
9367
9368         return (rc);
9369 }
9370
9371 static int
9372 sysctl_tids(SYSCTL_HANDLER_ARGS)
9373 {
9374         struct adapter *sc = arg1;
9375         struct sbuf *sb;
9376         int rc;
9377         struct tid_info *t = &sc->tids;
9378
9379         rc = sysctl_wire_old_buffer(req, 0);
9380         if (rc != 0)
9381                 return (rc);
9382
9383         sb = sbuf_new_for_sysctl(NULL, NULL, 256, req);
9384         if (sb == NULL)
9385                 return (ENOMEM);
9386
9387         if (t->natids) {
9388                 sbuf_printf(sb, "ATID range: 0-%u, in use: %u\n", t->natids - 1,
9389                     t->atids_in_use);
9390         }
9391
9392         if (t->nhpftids) {
9393                 sbuf_printf(sb, "HPFTID range: %u-%u, in use: %u\n",
9394                     t->hpftid_base, t->hpftid_end, t->hpftids_in_use);
9395         }
9396
9397         if (t->ntids) {
9398                 sbuf_printf(sb, "TID range: ");
9399                 if (t4_read_reg(sc, A_LE_DB_CONFIG) & F_HASHEN) {
9400                         uint32_t b, hb;
9401
9402                         if (chip_id(sc) <= CHELSIO_T5) {
9403                                 b = t4_read_reg(sc, A_LE_DB_SERVER_INDEX) / 4;
9404                                 hb = t4_read_reg(sc, A_LE_DB_TID_HASHBASE) / 4;
9405                         } else {
9406                                 b = t4_read_reg(sc, A_LE_DB_SRVR_START_INDEX);
9407                                 hb = t4_read_reg(sc, A_T6_LE_DB_HASH_TID_BASE);
9408                         }
9409
9410                         if (b)
9411                                 sbuf_printf(sb, "%u-%u, ", t->tid_base, b - 1);
9412                         sbuf_printf(sb, "%u-%u", hb, t->ntids - 1);
9413                 } else {
9414                         sbuf_printf(sb, "%u-%u", t->tid_base, t->tid_base +
9415                             t->ntids - 1);
9416                 }
9417                 sbuf_printf(sb, ", in use: %u\n",
9418                     atomic_load_acq_int(&t->tids_in_use));
9419         }
9420
9421         if (t->nstids) {
9422                 sbuf_printf(sb, "STID range: %u-%u, in use: %u\n", t->stid_base,
9423                     t->stid_base + t->nstids - 1, t->stids_in_use);
9424         }
9425
9426         if (t->nftids) {
9427                 sbuf_printf(sb, "FTID range: %u-%u, in use: %u\n", t->ftid_base,
9428                     t->ftid_end, t->ftids_in_use);
9429         }
9430
9431         if (t->netids) {
9432                 sbuf_printf(sb, "ETID range: %u-%u, in use: %u\n", t->etid_base,
9433                     t->etid_base + t->netids - 1, t->etids_in_use);
9434         }
9435
9436         sbuf_printf(sb, "HW TID usage: %u IP users, %u IPv6 users",
9437             t4_read_reg(sc, A_LE_DB_ACT_CNT_IPV4),
9438             t4_read_reg(sc, A_LE_DB_ACT_CNT_IPV6));
9439
9440         rc = sbuf_finish(sb);
9441         sbuf_delete(sb);
9442
9443         return (rc);
9444 }
9445
9446 static int
9447 sysctl_tp_err_stats(SYSCTL_HANDLER_ARGS)
9448 {
9449         struct adapter *sc = arg1;
9450         struct sbuf *sb;
9451         int rc;
9452         struct tp_err_stats stats;
9453
9454         rc = sysctl_wire_old_buffer(req, 0);
9455         if (rc != 0)
9456                 return (rc);
9457
9458         sb = sbuf_new_for_sysctl(NULL, NULL, 256, req);
9459         if (sb == NULL)
9460                 return (ENOMEM);
9461
9462         mtx_lock(&sc->reg_lock);
9463         t4_tp_get_err_stats(sc, &stats, 0);
9464         mtx_unlock(&sc->reg_lock);
9465
9466         if (sc->chip_params->nchan > 2) {
9467                 sbuf_printf(sb, "                 channel 0  channel 1"
9468                     "  channel 2  channel 3\n");
9469                 sbuf_printf(sb, "macInErrs:      %10u %10u %10u %10u\n",
9470                     stats.mac_in_errs[0], stats.mac_in_errs[1],
9471                     stats.mac_in_errs[2], stats.mac_in_errs[3]);
9472                 sbuf_printf(sb, "hdrInErrs:      %10u %10u %10u %10u\n",
9473                     stats.hdr_in_errs[0], stats.hdr_in_errs[1],
9474                     stats.hdr_in_errs[2], stats.hdr_in_errs[3]);
9475                 sbuf_printf(sb, "tcpInErrs:      %10u %10u %10u %10u\n",
9476                     stats.tcp_in_errs[0], stats.tcp_in_errs[1],
9477                     stats.tcp_in_errs[2], stats.tcp_in_errs[3]);
9478                 sbuf_printf(sb, "tcp6InErrs:     %10u %10u %10u %10u\n",
9479                     stats.tcp6_in_errs[0], stats.tcp6_in_errs[1],
9480                     stats.tcp6_in_errs[2], stats.tcp6_in_errs[3]);
9481                 sbuf_printf(sb, "tnlCongDrops:   %10u %10u %10u %10u\n",
9482                     stats.tnl_cong_drops[0], stats.tnl_cong_drops[1],
9483                     stats.tnl_cong_drops[2], stats.tnl_cong_drops[3]);
9484                 sbuf_printf(sb, "tnlTxDrops:     %10u %10u %10u %10u\n",
9485                     stats.tnl_tx_drops[0], stats.tnl_tx_drops[1],
9486                     stats.tnl_tx_drops[2], stats.tnl_tx_drops[3]);
9487                 sbuf_printf(sb, "ofldVlanDrops:  %10u %10u %10u %10u\n",
9488                     stats.ofld_vlan_drops[0], stats.ofld_vlan_drops[1],
9489                     stats.ofld_vlan_drops[2], stats.ofld_vlan_drops[3]);
9490                 sbuf_printf(sb, "ofldChanDrops:  %10u %10u %10u %10u\n\n",
9491                     stats.ofld_chan_drops[0], stats.ofld_chan_drops[1],
9492                     stats.ofld_chan_drops[2], stats.ofld_chan_drops[3]);
9493         } else {
9494                 sbuf_printf(sb, "                 channel 0  channel 1\n");
9495                 sbuf_printf(sb, "macInErrs:      %10u %10u\n",
9496                     stats.mac_in_errs[0], stats.mac_in_errs[1]);
9497                 sbuf_printf(sb, "hdrInErrs:      %10u %10u\n",
9498                     stats.hdr_in_errs[0], stats.hdr_in_errs[1]);
9499                 sbuf_printf(sb, "tcpInErrs:      %10u %10u\n",
9500                     stats.tcp_in_errs[0], stats.tcp_in_errs[1]);
9501                 sbuf_printf(sb, "tcp6InErrs:     %10u %10u\n",
9502                     stats.tcp6_in_errs[0], stats.tcp6_in_errs[1]);
9503                 sbuf_printf(sb, "tnlCongDrops:   %10u %10u\n",
9504                     stats.tnl_cong_drops[0], stats.tnl_cong_drops[1]);
9505                 sbuf_printf(sb, "tnlTxDrops:     %10u %10u\n",
9506                     stats.tnl_tx_drops[0], stats.tnl_tx_drops[1]);
9507                 sbuf_printf(sb, "ofldVlanDrops:  %10u %10u\n",
9508                     stats.ofld_vlan_drops[0], stats.ofld_vlan_drops[1]);
9509                 sbuf_printf(sb, "ofldChanDrops:  %10u %10u\n\n",
9510                     stats.ofld_chan_drops[0], stats.ofld_chan_drops[1]);
9511         }
9512
9513         sbuf_printf(sb, "ofldNoNeigh:    %u\nofldCongDefer:  %u",
9514             stats.ofld_no_neigh, stats.ofld_cong_defer);
9515
9516         rc = sbuf_finish(sb);
9517         sbuf_delete(sb);
9518
9519         return (rc);
9520 }
9521
9522 static int
9523 sysctl_tnl_stats(SYSCTL_HANDLER_ARGS)
9524 {
9525         struct adapter *sc = arg1;
9526         struct sbuf *sb;
9527         int rc;
9528         struct tp_tnl_stats stats;
9529
9530         rc = sysctl_wire_old_buffer(req, 0);
9531         if (rc != 0)
9532                 return(rc);
9533
9534         sb = sbuf_new_for_sysctl(NULL, NULL, 256, req);
9535         if (sb == NULL)
9536                 return (ENOMEM);
9537
9538         mtx_lock(&sc->reg_lock);
9539         t4_tp_get_tnl_stats(sc, &stats, 1);
9540         mtx_unlock(&sc->reg_lock);
9541
9542         if (sc->chip_params->nchan > 2) {
9543                 sbuf_printf(sb, "           channel 0  channel 1"
9544                     "  channel 2  channel 3\n");
9545                 sbuf_printf(sb, "OutPkts:  %10u %10u %10u %10u\n",
9546                     stats.out_pkt[0], stats.out_pkt[1],
9547                     stats.out_pkt[2], stats.out_pkt[3]);
9548                 sbuf_printf(sb, "InPkts:   %10u %10u %10u %10u",
9549                     stats.in_pkt[0], stats.in_pkt[1],
9550                     stats.in_pkt[2], stats.in_pkt[3]);
9551         } else {
9552                 sbuf_printf(sb, "           channel 0  channel 1\n");
9553                 sbuf_printf(sb, "OutPkts:  %10u %10u\n",
9554                     stats.out_pkt[0], stats.out_pkt[1]);
9555                 sbuf_printf(sb, "InPkts:   %10u %10u",
9556                     stats.in_pkt[0], stats.in_pkt[1]);
9557         }
9558
9559         rc = sbuf_finish(sb);
9560         sbuf_delete(sb);
9561
9562         return (rc);
9563 }
9564
9565 static int
9566 sysctl_tp_la_mask(SYSCTL_HANDLER_ARGS)
9567 {
9568         struct adapter *sc = arg1;
9569         struct tp_params *tpp = &sc->params.tp;
9570         u_int mask;
9571         int rc;
9572
9573         mask = tpp->la_mask >> 16;
9574         rc = sysctl_handle_int(oidp, &mask, 0, req);
9575         if (rc != 0 || req->newptr == NULL)
9576                 return (rc);
9577         if (mask > 0xffff)
9578                 return (EINVAL);
9579         tpp->la_mask = mask << 16;
9580         t4_set_reg_field(sc, A_TP_DBG_LA_CONFIG, 0xffff0000U, tpp->la_mask);
9581
9582         return (0);
9583 }
9584
9585 struct field_desc {
9586         const char *name;
9587         u_int start;
9588         u_int width;
9589 };
9590
9591 static void
9592 field_desc_show(struct sbuf *sb, uint64_t v, const struct field_desc *f)
9593 {
9594         char buf[32];
9595         int line_size = 0;
9596
9597         while (f->name) {
9598                 uint64_t mask = (1ULL << f->width) - 1;
9599                 int len = snprintf(buf, sizeof(buf), "%s: %ju", f->name,
9600                     ((uintmax_t)v >> f->start) & mask);
9601
9602                 if (line_size + len >= 79) {
9603                         line_size = 8;
9604                         sbuf_printf(sb, "\n        ");
9605                 }
9606                 sbuf_printf(sb, "%s ", buf);
9607                 line_size += len + 1;
9608                 f++;
9609         }
9610         sbuf_printf(sb, "\n");
9611 }
9612
9613 static const struct field_desc tp_la0[] = {
9614         { "RcfOpCodeOut", 60, 4 },
9615         { "State", 56, 4 },
9616         { "WcfState", 52, 4 },
9617         { "RcfOpcSrcOut", 50, 2 },
9618         { "CRxError", 49, 1 },
9619         { "ERxError", 48, 1 },
9620         { "SanityFailed", 47, 1 },
9621         { "SpuriousMsg", 46, 1 },
9622         { "FlushInputMsg", 45, 1 },
9623         { "FlushInputCpl", 44, 1 },
9624         { "RssUpBit", 43, 1 },
9625         { "RssFilterHit", 42, 1 },
9626         { "Tid", 32, 10 },
9627         { "InitTcb", 31, 1 },
9628         { "LineNumber", 24, 7 },
9629         { "Emsg", 23, 1 },
9630         { "EdataOut", 22, 1 },
9631         { "Cmsg", 21, 1 },
9632         { "CdataOut", 20, 1 },
9633         { "EreadPdu", 19, 1 },
9634         { "CreadPdu", 18, 1 },
9635         { "TunnelPkt", 17, 1 },
9636         { "RcfPeerFin", 16, 1 },
9637         { "RcfReasonOut", 12, 4 },
9638         { "TxCchannel", 10, 2 },
9639         { "RcfTxChannel", 8, 2 },
9640         { "RxEchannel", 6, 2 },
9641         { "RcfRxChannel", 5, 1 },
9642         { "RcfDataOutSrdy", 4, 1 },
9643         { "RxDvld", 3, 1 },
9644         { "RxOoDvld", 2, 1 },
9645         { "RxCongestion", 1, 1 },
9646         { "TxCongestion", 0, 1 },
9647         { NULL }
9648 };
9649
9650 static const struct field_desc tp_la1[] = {
9651         { "CplCmdIn", 56, 8 },
9652         { "CplCmdOut", 48, 8 },
9653         { "ESynOut", 47, 1 },
9654         { "EAckOut", 46, 1 },
9655         { "EFinOut", 45, 1 },
9656         { "ERstOut", 44, 1 },
9657         { "SynIn", 43, 1 },
9658         { "AckIn", 42, 1 },
9659         { "FinIn", 41, 1 },
9660         { "RstIn", 40, 1 },
9661         { "DataIn", 39, 1 },
9662         { "DataInVld", 38, 1 },
9663         { "PadIn", 37, 1 },
9664         { "RxBufEmpty", 36, 1 },
9665         { "RxDdp", 35, 1 },
9666         { "RxFbCongestion", 34, 1 },
9667         { "TxFbCongestion", 33, 1 },
9668         { "TxPktSumSrdy", 32, 1 },
9669         { "RcfUlpType", 28, 4 },
9670         { "Eread", 27, 1 },
9671         { "Ebypass", 26, 1 },
9672         { "Esave", 25, 1 },
9673         { "Static0", 24, 1 },
9674         { "Cread", 23, 1 },
9675         { "Cbypass", 22, 1 },
9676         { "Csave", 21, 1 },
9677         { "CPktOut", 20, 1 },
9678         { "RxPagePoolFull", 18, 2 },
9679         { "RxLpbkPkt", 17, 1 },
9680         { "TxLpbkPkt", 16, 1 },
9681         { "RxVfValid", 15, 1 },
9682         { "SynLearned", 14, 1 },
9683         { "SetDelEntry", 13, 1 },
9684         { "SetInvEntry", 12, 1 },
9685         { "CpcmdDvld", 11, 1 },
9686         { "CpcmdSave", 10, 1 },
9687         { "RxPstructsFull", 8, 2 },
9688         { "EpcmdDvld", 7, 1 },
9689         { "EpcmdFlush", 6, 1 },
9690         { "EpcmdTrimPrefix", 5, 1 },
9691         { "EpcmdTrimPostfix", 4, 1 },
9692         { "ERssIp4Pkt", 3, 1 },
9693         { "ERssIp6Pkt", 2, 1 },
9694         { "ERssTcpUdpPkt", 1, 1 },
9695         { "ERssFceFipPkt", 0, 1 },
9696         { NULL }
9697 };
9698
9699 static const struct field_desc tp_la2[] = {
9700         { "CplCmdIn", 56, 8 },
9701         { "MpsVfVld", 55, 1 },
9702         { "MpsPf", 52, 3 },
9703         { "MpsVf", 44, 8 },
9704         { "SynIn", 43, 1 },
9705         { "AckIn", 42, 1 },
9706         { "FinIn", 41, 1 },
9707         { "RstIn", 40, 1 },
9708         { "DataIn", 39, 1 },
9709         { "DataInVld", 38, 1 },
9710         { "PadIn", 37, 1 },
9711         { "RxBufEmpty", 36, 1 },
9712         { "RxDdp", 35, 1 },
9713         { "RxFbCongestion", 34, 1 },
9714         { "TxFbCongestion", 33, 1 },
9715         { "TxPktSumSrdy", 32, 1 },
9716         { "RcfUlpType", 28, 4 },
9717         { "Eread", 27, 1 },
9718         { "Ebypass", 26, 1 },
9719         { "Esave", 25, 1 },
9720         { "Static0", 24, 1 },
9721         { "Cread", 23, 1 },
9722         { "Cbypass", 22, 1 },
9723         { "Csave", 21, 1 },
9724         { "CPktOut", 20, 1 },
9725         { "RxPagePoolFull", 18, 2 },
9726         { "RxLpbkPkt", 17, 1 },
9727         { "TxLpbkPkt", 16, 1 },
9728         { "RxVfValid", 15, 1 },
9729         { "SynLearned", 14, 1 },
9730         { "SetDelEntry", 13, 1 },
9731         { "SetInvEntry", 12, 1 },
9732         { "CpcmdDvld", 11, 1 },
9733         { "CpcmdSave", 10, 1 },
9734         { "RxPstructsFull", 8, 2 },
9735         { "EpcmdDvld", 7, 1 },
9736         { "EpcmdFlush", 6, 1 },
9737         { "EpcmdTrimPrefix", 5, 1 },
9738         { "EpcmdTrimPostfix", 4, 1 },
9739         { "ERssIp4Pkt", 3, 1 },
9740         { "ERssIp6Pkt", 2, 1 },
9741         { "ERssTcpUdpPkt", 1, 1 },
9742         { "ERssFceFipPkt", 0, 1 },
9743         { NULL }
9744 };
9745
9746 static void
9747 tp_la_show(struct sbuf *sb, uint64_t *p, int idx)
9748 {
9749
9750         field_desc_show(sb, *p, tp_la0);
9751 }
9752
9753 static void
9754 tp_la_show2(struct sbuf *sb, uint64_t *p, int idx)
9755 {
9756
9757         if (idx)
9758                 sbuf_printf(sb, "\n");
9759         field_desc_show(sb, p[0], tp_la0);
9760         if (idx < (TPLA_SIZE / 2 - 1) || p[1] != ~0ULL)
9761                 field_desc_show(sb, p[1], tp_la0);
9762 }
9763
9764 static void
9765 tp_la_show3(struct sbuf *sb, uint64_t *p, int idx)
9766 {
9767
9768         if (idx)
9769                 sbuf_printf(sb, "\n");
9770         field_desc_show(sb, p[0], tp_la0);
9771         if (idx < (TPLA_SIZE / 2 - 1) || p[1] != ~0ULL)
9772                 field_desc_show(sb, p[1], (p[0] & (1 << 17)) ? tp_la2 : tp_la1);
9773 }
9774
9775 static int
9776 sysctl_tp_la(SYSCTL_HANDLER_ARGS)
9777 {
9778         struct adapter *sc = arg1;
9779         struct sbuf *sb;
9780         uint64_t *buf, *p;
9781         int rc;
9782         u_int i, inc;
9783         void (*show_func)(struct sbuf *, uint64_t *, int);
9784
9785         rc = sysctl_wire_old_buffer(req, 0);
9786         if (rc != 0)
9787                 return (rc);
9788
9789         sb = sbuf_new_for_sysctl(NULL, NULL, 4096, req);
9790         if (sb == NULL)
9791                 return (ENOMEM);
9792
9793         buf = malloc(TPLA_SIZE * sizeof(uint64_t), M_CXGBE, M_ZERO | M_WAITOK);
9794
9795         t4_tp_read_la(sc, buf, NULL);
9796         p = buf;
9797
9798         switch (G_DBGLAMODE(t4_read_reg(sc, A_TP_DBG_LA_CONFIG))) {
9799         case 2:
9800                 inc = 2;
9801                 show_func = tp_la_show2;
9802                 break;
9803         case 3:
9804                 inc = 2;
9805                 show_func = tp_la_show3;
9806                 break;
9807         default:
9808                 inc = 1;
9809                 show_func = tp_la_show;
9810         }
9811
9812         for (i = 0; i < TPLA_SIZE / inc; i++, p += inc)
9813                 (*show_func)(sb, p, i);
9814
9815         rc = sbuf_finish(sb);
9816         sbuf_delete(sb);
9817         free(buf, M_CXGBE);
9818         return (rc);
9819 }
9820
9821 static int
9822 sysctl_tx_rate(SYSCTL_HANDLER_ARGS)
9823 {
9824         struct adapter *sc = arg1;
9825         struct sbuf *sb;
9826         int rc;
9827         u64 nrate[MAX_NCHAN], orate[MAX_NCHAN];
9828
9829         rc = sysctl_wire_old_buffer(req, 0);
9830         if (rc != 0)
9831                 return (rc);
9832
9833         sb = sbuf_new_for_sysctl(NULL, NULL, 256, req);
9834         if (sb == NULL)
9835                 return (ENOMEM);
9836
9837         t4_get_chan_txrate(sc, nrate, orate);
9838
9839         if (sc->chip_params->nchan > 2) {
9840                 sbuf_printf(sb, "              channel 0   channel 1"
9841                     "   channel 2   channel 3\n");
9842                 sbuf_printf(sb, "NIC B/s:     %10ju  %10ju  %10ju  %10ju\n",
9843                     nrate[0], nrate[1], nrate[2], nrate[3]);
9844                 sbuf_printf(sb, "Offload B/s: %10ju  %10ju  %10ju  %10ju",
9845                     orate[0], orate[1], orate[2], orate[3]);
9846         } else {
9847                 sbuf_printf(sb, "              channel 0   channel 1\n");
9848                 sbuf_printf(sb, "NIC B/s:     %10ju  %10ju\n",
9849                     nrate[0], nrate[1]);
9850                 sbuf_printf(sb, "Offload B/s: %10ju  %10ju",
9851                     orate[0], orate[1]);
9852         }
9853
9854         rc = sbuf_finish(sb);
9855         sbuf_delete(sb);
9856
9857         return (rc);
9858 }
9859
9860 static int
9861 sysctl_ulprx_la(SYSCTL_HANDLER_ARGS)
9862 {
9863         struct adapter *sc = arg1;
9864         struct sbuf *sb;
9865         uint32_t *buf, *p;
9866         int rc, i;
9867
9868         rc = sysctl_wire_old_buffer(req, 0);
9869         if (rc != 0)
9870                 return (rc);
9871
9872         sb = sbuf_new_for_sysctl(NULL, NULL, 4096, req);
9873         if (sb == NULL)
9874                 return (ENOMEM);
9875
9876         buf = malloc(ULPRX_LA_SIZE * 8 * sizeof(uint32_t), M_CXGBE,
9877             M_ZERO | M_WAITOK);
9878
9879         t4_ulprx_read_la(sc, buf);
9880         p = buf;
9881
9882         sbuf_printf(sb, "      Pcmd        Type   Message"
9883             "                Data");
9884         for (i = 0; i < ULPRX_LA_SIZE; i++, p += 8) {
9885                 sbuf_printf(sb, "\n%08x%08x  %4x  %08x  %08x%08x%08x%08x",
9886                     p[1], p[0], p[2], p[3], p[7], p[6], p[5], p[4]);
9887         }
9888
9889         rc = sbuf_finish(sb);
9890         sbuf_delete(sb);
9891         free(buf, M_CXGBE);
9892         return (rc);
9893 }
9894
9895 static int
9896 sysctl_wcwr_stats(SYSCTL_HANDLER_ARGS)
9897 {
9898         struct adapter *sc = arg1;
9899         struct sbuf *sb;
9900         int rc, v;
9901
9902         MPASS(chip_id(sc) >= CHELSIO_T5);
9903
9904         rc = sysctl_wire_old_buffer(req, 0);
9905         if (rc != 0)
9906                 return (rc);
9907
9908         sb = sbuf_new_for_sysctl(NULL, NULL, 4096, req);
9909         if (sb == NULL)
9910                 return (ENOMEM);
9911
9912         v = t4_read_reg(sc, A_SGE_STAT_CFG);
9913         if (G_STATSOURCE_T5(v) == 7) {
9914                 int mode;
9915
9916                 mode = is_t5(sc) ? G_STATMODE(v) : G_T6_STATMODE(v);
9917                 if (mode == 0) {
9918                         sbuf_printf(sb, "total %d, incomplete %d",
9919                             t4_read_reg(sc, A_SGE_STAT_TOTAL),
9920                             t4_read_reg(sc, A_SGE_STAT_MATCH));
9921                 } else if (mode == 1) {
9922                         sbuf_printf(sb, "total %d, data overflow %d",
9923                             t4_read_reg(sc, A_SGE_STAT_TOTAL),
9924                             t4_read_reg(sc, A_SGE_STAT_MATCH));
9925                 } else {
9926                         sbuf_printf(sb, "unknown mode %d", mode);
9927                 }
9928         }
9929         rc = sbuf_finish(sb);
9930         sbuf_delete(sb);
9931
9932         return (rc);
9933 }
9934
9935 static int
9936 sysctl_cpus(SYSCTL_HANDLER_ARGS)
9937 {
9938         struct adapter *sc = arg1;
9939         enum cpu_sets op = arg2;
9940         cpuset_t cpuset;
9941         struct sbuf *sb;
9942         int i, rc;
9943
9944         MPASS(op == LOCAL_CPUS || op == INTR_CPUS);
9945
9946         CPU_ZERO(&cpuset);
9947         rc = bus_get_cpus(sc->dev, op, sizeof(cpuset), &cpuset);
9948         if (rc != 0)
9949                 return (rc);
9950
9951         rc = sysctl_wire_old_buffer(req, 0);
9952         if (rc != 0)
9953                 return (rc);
9954
9955         sb = sbuf_new_for_sysctl(NULL, NULL, 4096, req);
9956         if (sb == NULL)
9957                 return (ENOMEM);
9958
9959         CPU_FOREACH(i)
9960                 sbuf_printf(sb, "%d ", i);
9961         rc = sbuf_finish(sb);
9962         sbuf_delete(sb);
9963
9964         return (rc);
9965 }
9966
9967 #ifdef TCP_OFFLOAD
9968 static int
9969 sysctl_tls(SYSCTL_HANDLER_ARGS)
9970 {
9971         struct adapter *sc = arg1;
9972         int i, j, v, rc;
9973         struct vi_info *vi;
9974
9975         v = sc->tt.tls;
9976         rc = sysctl_handle_int(oidp, &v, 0, req);
9977         if (rc != 0 || req->newptr == NULL)
9978                 return (rc);
9979
9980         if (v != 0 && !(sc->cryptocaps & FW_CAPS_CONFIG_TLSKEYS))
9981                 return (ENOTSUP);
9982
9983         rc = begin_synchronized_op(sc, NULL, SLEEP_OK | INTR_OK, "t4stls");
9984         if (rc)
9985                 return (rc);
9986         sc->tt.tls = !!v;
9987         for_each_port(sc, i) {
9988                 for_each_vi(sc->port[i], j, vi) {
9989                         if (vi->flags & VI_INIT_DONE)
9990                                 t4_update_fl_bufsize(vi->ifp);
9991                 }
9992         }
9993         end_synchronized_op(sc, 0);
9994
9995         return (0);
9996
9997 }
9998
9999 static int
10000 sysctl_tls_rx_ports(SYSCTL_HANDLER_ARGS)
10001 {
10002         struct adapter *sc = arg1;
10003         int *old_ports, *new_ports;
10004         int i, new_count, rc;
10005
10006         if (req->newptr == NULL && req->oldptr == NULL)
10007                 return (SYSCTL_OUT(req, NULL, imax(sc->tt.num_tls_rx_ports, 1) *
10008                     sizeof(sc->tt.tls_rx_ports[0])));
10009
10010         rc = begin_synchronized_op(sc, NULL, SLEEP_OK | INTR_OK, "t4tlsrx");
10011         if (rc)
10012                 return (rc);
10013
10014         if (sc->tt.num_tls_rx_ports == 0) {
10015                 i = -1;
10016                 rc = SYSCTL_OUT(req, &i, sizeof(i));
10017         } else
10018                 rc = SYSCTL_OUT(req, sc->tt.tls_rx_ports,
10019                     sc->tt.num_tls_rx_ports * sizeof(sc->tt.tls_rx_ports[0]));
10020         if (rc == 0 && req->newptr != NULL) {
10021                 new_count = req->newlen / sizeof(new_ports[0]);
10022                 new_ports = malloc(new_count * sizeof(new_ports[0]), M_CXGBE,
10023                     M_WAITOK);
10024                 rc = SYSCTL_IN(req, new_ports, new_count *
10025                     sizeof(new_ports[0]));
10026                 if (rc)
10027                         goto err;
10028
10029                 /* Allow setting to a single '-1' to clear the list. */
10030                 if (new_count == 1 && new_ports[0] == -1) {
10031                         ADAPTER_LOCK(sc);
10032                         old_ports = sc->tt.tls_rx_ports;
10033                         sc->tt.tls_rx_ports = NULL;
10034                         sc->tt.num_tls_rx_ports = 0;
10035                         ADAPTER_UNLOCK(sc);
10036                         free(old_ports, M_CXGBE);
10037                 } else {
10038                         for (i = 0; i < new_count; i++) {
10039                                 if (new_ports[i] < 1 ||
10040                                     new_ports[i] > IPPORT_MAX) {
10041                                         rc = EINVAL;
10042                                         goto err;
10043                                 }
10044                         }
10045
10046                         ADAPTER_LOCK(sc);
10047                         old_ports = sc->tt.tls_rx_ports;
10048                         sc->tt.tls_rx_ports = new_ports;
10049                         sc->tt.num_tls_rx_ports = new_count;
10050                         ADAPTER_UNLOCK(sc);
10051                         free(old_ports, M_CXGBE);
10052                         new_ports = NULL;
10053                 }
10054         err:
10055                 free(new_ports, M_CXGBE);
10056         }
10057         end_synchronized_op(sc, 0);
10058         return (rc);
10059 }
10060
10061 static int
10062 sysctl_tls_rx_timeout(SYSCTL_HANDLER_ARGS)
10063 {
10064         struct adapter *sc = arg1;
10065         int v, rc;
10066
10067         v = sc->tt.tls_rx_timeout;
10068         rc = sysctl_handle_int(oidp, &v, 0, req);
10069         if (rc != 0 || req->newptr == NULL)
10070                 return (rc);
10071
10072         if (v < 0)
10073                 return (EINVAL);
10074
10075         if (v != 0 && !(sc->cryptocaps & FW_CAPS_CONFIG_TLSKEYS))
10076                 return (ENOTSUP);
10077
10078         sc->tt.tls_rx_timeout = v;
10079
10080         return (0);
10081
10082 }
10083
10084 static void
10085 unit_conv(char *buf, size_t len, u_int val, u_int factor)
10086 {
10087         u_int rem = val % factor;
10088
10089         if (rem == 0)
10090                 snprintf(buf, len, "%u", val / factor);
10091         else {
10092                 while (rem % 10 == 0)
10093                         rem /= 10;
10094                 snprintf(buf, len, "%u.%u", val / factor, rem);
10095         }
10096 }
10097
10098 static int
10099 sysctl_tp_tick(SYSCTL_HANDLER_ARGS)
10100 {
10101         struct adapter *sc = arg1;
10102         char buf[16];
10103         u_int res, re;
10104         u_int cclk_ps = 1000000000 / sc->params.vpd.cclk;
10105
10106         res = t4_read_reg(sc, A_TP_TIMER_RESOLUTION);
10107         switch (arg2) {
10108         case 0:
10109                 /* timer_tick */
10110                 re = G_TIMERRESOLUTION(res);
10111                 break;
10112         case 1:
10113                 /* TCP timestamp tick */
10114                 re = G_TIMESTAMPRESOLUTION(res);
10115                 break;
10116         case 2:
10117                 /* DACK tick */
10118                 re = G_DELAYEDACKRESOLUTION(res);
10119                 break;
10120         default:
10121                 return (EDOOFUS);
10122         }
10123
10124         unit_conv(buf, sizeof(buf), (cclk_ps << re), 1000000);
10125
10126         return (sysctl_handle_string(oidp, buf, sizeof(buf), req));
10127 }
10128
10129 static int
10130 sysctl_tp_dack_timer(SYSCTL_HANDLER_ARGS)
10131 {
10132         struct adapter *sc = arg1;
10133         u_int res, dack_re, v;
10134         u_int cclk_ps = 1000000000 / sc->params.vpd.cclk;
10135
10136         res = t4_read_reg(sc, A_TP_TIMER_RESOLUTION);
10137         dack_re = G_DELAYEDACKRESOLUTION(res);
10138         v = ((cclk_ps << dack_re) / 1000000) * t4_read_reg(sc, A_TP_DACK_TIMER);
10139
10140         return (sysctl_handle_int(oidp, &v, 0, req));
10141 }
10142
10143 static int
10144 sysctl_tp_timer(SYSCTL_HANDLER_ARGS)
10145 {
10146         struct adapter *sc = arg1;
10147         int reg = arg2;
10148         u_int tre;
10149         u_long tp_tick_us, v;
10150         u_int cclk_ps = 1000000000 / sc->params.vpd.cclk;
10151
10152         MPASS(reg == A_TP_RXT_MIN || reg == A_TP_RXT_MAX ||
10153             reg == A_TP_PERS_MIN  || reg == A_TP_PERS_MAX ||
10154             reg == A_TP_KEEP_IDLE || reg == A_TP_KEEP_INTVL ||
10155             reg == A_TP_INIT_SRTT || reg == A_TP_FINWAIT2_TIMER);
10156
10157         tre = G_TIMERRESOLUTION(t4_read_reg(sc, A_TP_TIMER_RESOLUTION));
10158         tp_tick_us = (cclk_ps << tre) / 1000000;
10159
10160         if (reg == A_TP_INIT_SRTT)
10161                 v = tp_tick_us * G_INITSRTT(t4_read_reg(sc, reg));
10162         else
10163                 v = tp_tick_us * t4_read_reg(sc, reg);
10164
10165         return (sysctl_handle_long(oidp, &v, 0, req));
10166 }
10167
10168 /*
10169  * All fields in TP_SHIFT_CNT are 4b and the starting location of the field is
10170  * passed to this function.
10171  */
10172 static int
10173 sysctl_tp_shift_cnt(SYSCTL_HANDLER_ARGS)
10174 {
10175         struct adapter *sc = arg1;
10176         int idx = arg2;
10177         u_int v;
10178
10179         MPASS(idx >= 0 && idx <= 24);
10180
10181         v = (t4_read_reg(sc, A_TP_SHIFT_CNT) >> idx) & 0xf;
10182
10183         return (sysctl_handle_int(oidp, &v, 0, req));
10184 }
10185
10186 static int
10187 sysctl_tp_backoff(SYSCTL_HANDLER_ARGS)
10188 {
10189         struct adapter *sc = arg1;
10190         int idx = arg2;
10191         u_int shift, v, r;
10192
10193         MPASS(idx >= 0 && idx < 16);
10194
10195         r = A_TP_TCP_BACKOFF_REG0 + (idx & ~3);
10196         shift = (idx & 3) << 3;
10197         v = (t4_read_reg(sc, r) >> shift) & M_TIMERBACKOFFINDEX0;
10198
10199         return (sysctl_handle_int(oidp, &v, 0, req));
10200 }
10201
10202 static int
10203 sysctl_holdoff_tmr_idx_ofld(SYSCTL_HANDLER_ARGS)
10204 {
10205         struct vi_info *vi = arg1;
10206         struct adapter *sc = vi->adapter;
10207         int idx, rc, i;
10208         struct sge_ofld_rxq *ofld_rxq;
10209         uint8_t v;
10210
10211         idx = vi->ofld_tmr_idx;
10212
10213         rc = sysctl_handle_int(oidp, &idx, 0, req);
10214         if (rc != 0 || req->newptr == NULL)
10215                 return (rc);
10216
10217         if (idx < 0 || idx >= SGE_NTIMERS)
10218                 return (EINVAL);
10219
10220         rc = begin_synchronized_op(sc, vi, HOLD_LOCK | SLEEP_OK | INTR_OK,
10221             "t4otmr");
10222         if (rc)
10223                 return (rc);
10224
10225         v = V_QINTR_TIMER_IDX(idx) | V_QINTR_CNT_EN(vi->ofld_pktc_idx != -1);
10226         for_each_ofld_rxq(vi, i, ofld_rxq) {
10227 #ifdef atomic_store_rel_8
10228                 atomic_store_rel_8(&ofld_rxq->iq.intr_params, v);
10229 #else
10230                 ofld_rxq->iq.intr_params = v;
10231 #endif
10232         }
10233         vi->ofld_tmr_idx = idx;
10234
10235         end_synchronized_op(sc, LOCK_HELD);
10236         return (0);
10237 }
10238
10239 static int
10240 sysctl_holdoff_pktc_idx_ofld(SYSCTL_HANDLER_ARGS)
10241 {
10242         struct vi_info *vi = arg1;
10243         struct adapter *sc = vi->adapter;
10244         int idx, rc;
10245
10246         idx = vi->ofld_pktc_idx;
10247
10248         rc = sysctl_handle_int(oidp, &idx, 0, req);
10249         if (rc != 0 || req->newptr == NULL)
10250                 return (rc);
10251
10252         if (idx < -1 || idx >= SGE_NCOUNTERS)
10253                 return (EINVAL);
10254
10255         rc = begin_synchronized_op(sc, vi, HOLD_LOCK | SLEEP_OK | INTR_OK,
10256             "t4opktc");
10257         if (rc)
10258                 return (rc);
10259
10260         if (vi->flags & VI_INIT_DONE)
10261                 rc = EBUSY; /* cannot be changed once the queues are created */
10262         else
10263                 vi->ofld_pktc_idx = idx;
10264
10265         end_synchronized_op(sc, LOCK_HELD);
10266         return (rc);
10267 }
10268 #endif
10269
10270 static int
10271 get_sge_context(struct adapter *sc, struct t4_sge_context *cntxt)
10272 {
10273         int rc;
10274
10275         if (cntxt->cid > M_CTXTQID)
10276                 return (EINVAL);
10277
10278         if (cntxt->mem_id != CTXT_EGRESS && cntxt->mem_id != CTXT_INGRESS &&
10279             cntxt->mem_id != CTXT_FLM && cntxt->mem_id != CTXT_CNM)
10280                 return (EINVAL);
10281
10282         rc = begin_synchronized_op(sc, NULL, SLEEP_OK | INTR_OK, "t4ctxt");
10283         if (rc)
10284                 return (rc);
10285
10286         if (sc->flags & FW_OK) {
10287                 rc = -t4_sge_ctxt_rd(sc, sc->mbox, cntxt->cid, cntxt->mem_id,
10288                     &cntxt->data[0]);
10289                 if (rc == 0)
10290                         goto done;
10291         }
10292
10293         /*
10294          * Read via firmware failed or wasn't even attempted.  Read directly via
10295          * the backdoor.
10296          */
10297         rc = -t4_sge_ctxt_rd_bd(sc, cntxt->cid, cntxt->mem_id, &cntxt->data[0]);
10298 done:
10299         end_synchronized_op(sc, 0);
10300         return (rc);
10301 }
10302
10303 static int
10304 load_fw(struct adapter *sc, struct t4_data *fw)
10305 {
10306         int rc;
10307         uint8_t *fw_data;
10308
10309         rc = begin_synchronized_op(sc, NULL, SLEEP_OK | INTR_OK, "t4ldfw");
10310         if (rc)
10311                 return (rc);
10312
10313         /*
10314          * The firmware, with the sole exception of the memory parity error
10315          * handler, runs from memory and not flash.  It is almost always safe to
10316          * install a new firmware on a running system.  Just set bit 1 in
10317          * hw.cxgbe.dflags or dev.<nexus>.<n>.dflags first.
10318          */
10319         if (sc->flags & FULL_INIT_DONE &&
10320             (sc->debug_flags & DF_LOAD_FW_ANYTIME) == 0) {
10321                 rc = EBUSY;
10322                 goto done;
10323         }
10324
10325         fw_data = malloc(fw->len, M_CXGBE, M_WAITOK);
10326
10327         rc = copyin(fw->data, fw_data, fw->len);
10328         if (rc == 0)
10329                 rc = -t4_load_fw(sc, fw_data, fw->len);
10330
10331         free(fw_data, M_CXGBE);
10332 done:
10333         end_synchronized_op(sc, 0);
10334         return (rc);
10335 }
10336
10337 static int
10338 load_cfg(struct adapter *sc, struct t4_data *cfg)
10339 {
10340         int rc;
10341         uint8_t *cfg_data = NULL;
10342
10343         rc = begin_synchronized_op(sc, NULL, SLEEP_OK | INTR_OK, "t4ldcf");
10344         if (rc)
10345                 return (rc);
10346
10347         if (cfg->len == 0) {
10348                 /* clear */
10349                 rc = -t4_load_cfg(sc, NULL, 0);
10350                 goto done;
10351         }
10352
10353         cfg_data = malloc(cfg->len, M_CXGBE, M_WAITOK);
10354
10355         rc = copyin(cfg->data, cfg_data, cfg->len);
10356         if (rc == 0)
10357                 rc = -t4_load_cfg(sc, cfg_data, cfg->len);
10358
10359         free(cfg_data, M_CXGBE);
10360 done:
10361         end_synchronized_op(sc, 0);
10362         return (rc);
10363 }
10364
10365 static int
10366 load_boot(struct adapter *sc, struct t4_bootrom *br)
10367 {
10368         int rc;
10369         uint8_t *br_data = NULL;
10370         u_int offset;
10371
10372         if (br->len > 1024 * 1024)
10373                 return (EFBIG);
10374
10375         if (br->pf_offset == 0) {
10376                 /* pfidx */
10377                 if (br->pfidx_addr > 7)
10378                         return (EINVAL);
10379                 offset = G_OFFSET(t4_read_reg(sc, PF_REG(br->pfidx_addr,
10380                     A_PCIE_PF_EXPROM_OFST)));
10381         } else if (br->pf_offset == 1) {
10382                 /* offset */
10383                 offset = G_OFFSET(br->pfidx_addr);
10384         } else {
10385                 return (EINVAL);
10386         }
10387
10388         rc = begin_synchronized_op(sc, NULL, SLEEP_OK | INTR_OK, "t4ldbr");
10389         if (rc)
10390                 return (rc);
10391
10392         if (br->len == 0) {
10393                 /* clear */
10394                 rc = -t4_load_boot(sc, NULL, offset, 0);
10395                 goto done;
10396         }
10397
10398         br_data = malloc(br->len, M_CXGBE, M_WAITOK);
10399
10400         rc = copyin(br->data, br_data, br->len);
10401         if (rc == 0)
10402                 rc = -t4_load_boot(sc, br_data, offset, br->len);
10403
10404         free(br_data, M_CXGBE);
10405 done:
10406         end_synchronized_op(sc, 0);
10407         return (rc);
10408 }
10409
10410 static int
10411 load_bootcfg(struct adapter *sc, struct t4_data *bc)
10412 {
10413         int rc;
10414         uint8_t *bc_data = NULL;
10415
10416         rc = begin_synchronized_op(sc, NULL, SLEEP_OK | INTR_OK, "t4ldcf");
10417         if (rc)
10418                 return (rc);
10419
10420         if (bc->len == 0) {
10421                 /* clear */
10422                 rc = -t4_load_bootcfg(sc, NULL, 0);
10423                 goto done;
10424         }
10425
10426         bc_data = malloc(bc->len, M_CXGBE, M_WAITOK);
10427
10428         rc = copyin(bc->data, bc_data, bc->len);
10429         if (rc == 0)
10430                 rc = -t4_load_bootcfg(sc, bc_data, bc->len);
10431
10432         free(bc_data, M_CXGBE);
10433 done:
10434         end_synchronized_op(sc, 0);
10435         return (rc);
10436 }
10437
10438 static int
10439 cudbg_dump(struct adapter *sc, struct t4_cudbg_dump *dump)
10440 {
10441         int rc;
10442         struct cudbg_init *cudbg;
10443         void *handle, *buf;
10444
10445         /* buf is large, don't block if no memory is available */
10446         buf = malloc(dump->len, M_CXGBE, M_NOWAIT | M_ZERO);
10447         if (buf == NULL)
10448                 return (ENOMEM);
10449
10450         handle = cudbg_alloc_handle();
10451         if (handle == NULL) {
10452                 rc = ENOMEM;
10453                 goto done;
10454         }
10455
10456         cudbg = cudbg_get_init(handle);
10457         cudbg->adap = sc;
10458         cudbg->print = (cudbg_print_cb)printf;
10459
10460 #ifndef notyet
10461         device_printf(sc->dev, "%s: wr_flash %u, len %u, data %p.\n",
10462             __func__, dump->wr_flash, dump->len, dump->data);
10463 #endif
10464
10465         if (dump->wr_flash)
10466                 cudbg->use_flash = 1;
10467         MPASS(sizeof(cudbg->dbg_bitmap) == sizeof(dump->bitmap));
10468         memcpy(cudbg->dbg_bitmap, dump->bitmap, sizeof(cudbg->dbg_bitmap));
10469
10470         rc = cudbg_collect(handle, buf, &dump->len);
10471         if (rc != 0)
10472                 goto done;
10473
10474         rc = copyout(buf, dump->data, dump->len);
10475 done:
10476         cudbg_free_handle(handle);
10477         free(buf, M_CXGBE);
10478         return (rc);
10479 }
10480
10481 static void
10482 free_offload_policy(struct t4_offload_policy *op)
10483 {
10484         struct offload_rule *r;
10485         int i;
10486
10487         if (op == NULL)
10488                 return;
10489
10490         r = &op->rule[0];
10491         for (i = 0; i < op->nrules; i++, r++) {
10492                 free(r->bpf_prog.bf_insns, M_CXGBE);
10493         }
10494         free(op->rule, M_CXGBE);
10495         free(op, M_CXGBE);
10496 }
10497
10498 static int
10499 set_offload_policy(struct adapter *sc, struct t4_offload_policy *uop)
10500 {
10501         int i, rc, len;
10502         struct t4_offload_policy *op, *old;
10503         struct bpf_program *bf;
10504         const struct offload_settings *s;
10505         struct offload_rule *r;
10506         void *u;
10507
10508         if (!is_offload(sc))
10509                 return (ENODEV);
10510
10511         if (uop->nrules == 0) {
10512                 /* Delete installed policies. */
10513                 op = NULL;
10514                 goto set_policy;
10515         } else if (uop->nrules > 256) { /* arbitrary */
10516                 return (E2BIG);
10517         }
10518
10519         /* Copy userspace offload policy to kernel */
10520         op = malloc(sizeof(*op), M_CXGBE, M_ZERO | M_WAITOK);
10521         op->nrules = uop->nrules;
10522         len = op->nrules * sizeof(struct offload_rule);
10523         op->rule = malloc(len, M_CXGBE, M_ZERO | M_WAITOK);
10524         rc = copyin(uop->rule, op->rule, len);
10525         if (rc) {
10526                 free(op->rule, M_CXGBE);
10527                 free(op, M_CXGBE);
10528                 return (rc);
10529         }
10530
10531         r = &op->rule[0];
10532         for (i = 0; i < op->nrules; i++, r++) {
10533
10534                 /* Validate open_type */
10535                 if (r->open_type != OPEN_TYPE_LISTEN &&
10536                     r->open_type != OPEN_TYPE_ACTIVE &&
10537                     r->open_type != OPEN_TYPE_PASSIVE &&
10538                     r->open_type != OPEN_TYPE_DONTCARE) {
10539 error:
10540                         /*
10541                          * Rules 0 to i have malloc'd filters that need to be
10542                          * freed.  Rules i+1 to nrules have userspace pointers
10543                          * and should be left alone.
10544                          */
10545                         op->nrules = i;
10546                         free_offload_policy(op);
10547                         return (rc);
10548                 }
10549
10550                 /* Validate settings */
10551                 s = &r->settings;
10552                 if ((s->offload != 0 && s->offload != 1) ||
10553                     s->cong_algo < -1 || s->cong_algo > CONG_ALG_HIGHSPEED ||
10554                     s->sched_class < -1 ||
10555                     s->sched_class >= sc->chip_params->nsched_cls) {
10556                         rc = EINVAL;
10557                         goto error;
10558                 }
10559
10560                 bf = &r->bpf_prog;
10561                 u = bf->bf_insns;       /* userspace ptr */
10562                 bf->bf_insns = NULL;
10563                 if (bf->bf_len == 0) {
10564                         /* legal, matches everything */
10565                         continue;
10566                 }
10567                 len = bf->bf_len * sizeof(*bf->bf_insns);
10568                 bf->bf_insns = malloc(len, M_CXGBE, M_ZERO | M_WAITOK);
10569                 rc = copyin(u, bf->bf_insns, len);
10570                 if (rc != 0)
10571                         goto error;
10572
10573                 if (!bpf_validate(bf->bf_insns, bf->bf_len)) {
10574                         rc = EINVAL;
10575                         goto error;
10576                 }
10577         }
10578 set_policy:
10579         rw_wlock(&sc->policy_lock);
10580         old = sc->policy;
10581         sc->policy = op;
10582         rw_wunlock(&sc->policy_lock);
10583         free_offload_policy(old);
10584
10585         return (0);
10586 }
10587
10588 #define MAX_READ_BUF_SIZE (128 * 1024)
10589 static int
10590 read_card_mem(struct adapter *sc, int win, struct t4_mem_range *mr)
10591 {
10592         uint32_t addr, remaining, n;
10593         uint32_t *buf;
10594         int rc;
10595         uint8_t *dst;
10596
10597         rc = validate_mem_range(sc, mr->addr, mr->len);
10598         if (rc != 0)
10599                 return (rc);
10600
10601         buf = malloc(min(mr->len, MAX_READ_BUF_SIZE), M_CXGBE, M_WAITOK);
10602         addr = mr->addr;
10603         remaining = mr->len;
10604         dst = (void *)mr->data;
10605
10606         while (remaining) {
10607                 n = min(remaining, MAX_READ_BUF_SIZE);
10608                 read_via_memwin(sc, 2, addr, buf, n);
10609
10610                 rc = copyout(buf, dst, n);
10611                 if (rc != 0)
10612                         break;
10613
10614                 dst += n;
10615                 remaining -= n;
10616                 addr += n;
10617         }
10618
10619         free(buf, M_CXGBE);
10620         return (rc);
10621 }
10622 #undef MAX_READ_BUF_SIZE
10623
10624 static int
10625 read_i2c(struct adapter *sc, struct t4_i2c_data *i2cd)
10626 {
10627         int rc;
10628
10629         if (i2cd->len == 0 || i2cd->port_id >= sc->params.nports)
10630                 return (EINVAL);
10631
10632         if (i2cd->len > sizeof(i2cd->data))
10633                 return (EFBIG);
10634
10635         rc = begin_synchronized_op(sc, NULL, SLEEP_OK | INTR_OK, "t4i2crd");
10636         if (rc)
10637                 return (rc);
10638         rc = -t4_i2c_rd(sc, sc->mbox, i2cd->port_id, i2cd->dev_addr,
10639             i2cd->offset, i2cd->len, &i2cd->data[0]);
10640         end_synchronized_op(sc, 0);
10641
10642         return (rc);
10643 }
10644
10645 static int
10646 clear_stats(struct adapter *sc, u_int port_id)
10647 {
10648         int i, v, chan_map;
10649         struct port_info *pi;
10650         struct vi_info *vi;
10651         struct sge_rxq *rxq;
10652         struct sge_txq *txq;
10653         struct sge_wrq *wrq;
10654 #ifdef TCP_OFFLOAD
10655         struct sge_ofld_rxq *ofld_rxq;
10656 #endif
10657
10658         if (port_id >= sc->params.nports)
10659                 return (EINVAL);
10660         pi = sc->port[port_id];
10661         if (pi == NULL)
10662                 return (EIO);
10663
10664         /* MAC stats */
10665         t4_clr_port_stats(sc, pi->tx_chan);
10666         if (is_t6(sc)) {
10667                 if (pi->fcs_reg != -1)
10668                         pi->fcs_base = t4_read_reg64(sc, pi->fcs_reg);
10669                 else
10670                         pi->stats.rx_fcs_err = 0;
10671         }
10672         pi->tx_parse_error = 0;
10673         pi->tnl_cong_drops = 0;
10674         mtx_lock(&sc->reg_lock);
10675         for_each_vi(pi, v, vi) {
10676                 if (vi->flags & VI_INIT_DONE)
10677                         t4_clr_vi_stats(sc, vi->vin);
10678         }
10679         chan_map = pi->rx_e_chan_map;
10680         v = 0;  /* reuse */
10681         while (chan_map) {
10682                 i = ffs(chan_map) - 1;
10683                 t4_write_indirect(sc, A_TP_MIB_INDEX, A_TP_MIB_DATA, &v,
10684                     1, A_TP_MIB_TNL_CNG_DROP_0 + i);
10685                 chan_map &= ~(1 << i);
10686         }
10687         mtx_unlock(&sc->reg_lock);
10688
10689         /*
10690          * Since this command accepts a port, clear stats for
10691          * all VIs on this port.
10692          */
10693         for_each_vi(pi, v, vi) {
10694                 if (vi->flags & VI_INIT_DONE) {
10695
10696                         for_each_rxq(vi, i, rxq) {
10697 #if defined(INET) || defined(INET6)
10698                                 rxq->lro.lro_queued = 0;
10699                                 rxq->lro.lro_flushed = 0;
10700 #endif
10701                                 rxq->rxcsum = 0;
10702                                 rxq->vlan_extraction = 0;
10703                                 rxq->vxlan_rxcsum = 0;
10704
10705                                 rxq->fl.cl_allocated = 0;
10706                                 rxq->fl.cl_recycled = 0;
10707                                 rxq->fl.cl_fast_recycled = 0;
10708                         }
10709
10710                         for_each_txq(vi, i, txq) {
10711                                 txq->txcsum = 0;
10712                                 txq->tso_wrs = 0;
10713                                 txq->vlan_insertion = 0;
10714                                 txq->imm_wrs = 0;
10715                                 txq->sgl_wrs = 0;
10716                                 txq->txpkt_wrs = 0;
10717                                 txq->txpkts0_wrs = 0;
10718                                 txq->txpkts1_wrs = 0;
10719                                 txq->txpkts0_pkts = 0;
10720                                 txq->txpkts1_pkts = 0;
10721                                 txq->raw_wrs = 0;
10722                                 txq->vxlan_tso_wrs = 0;
10723                                 txq->vxlan_txcsum = 0;
10724                                 txq->kern_tls_records = 0;
10725                                 txq->kern_tls_short = 0;
10726                                 txq->kern_tls_partial = 0;
10727                                 txq->kern_tls_full = 0;
10728                                 txq->kern_tls_octets = 0;
10729                                 txq->kern_tls_waste = 0;
10730                                 txq->kern_tls_options = 0;
10731                                 txq->kern_tls_header = 0;
10732                                 txq->kern_tls_fin = 0;
10733                                 txq->kern_tls_fin_short = 0;
10734                                 txq->kern_tls_cbc = 0;
10735                                 txq->kern_tls_gcm = 0;
10736                                 mp_ring_reset_stats(txq->r);
10737                         }
10738
10739 #if defined(TCP_OFFLOAD) || defined(RATELIMIT)
10740                         for_each_ofld_txq(vi, i, wrq) {
10741                                 wrq->tx_wrs_direct = 0;
10742                                 wrq->tx_wrs_copied = 0;
10743                         }
10744 #endif
10745 #ifdef TCP_OFFLOAD
10746                         for_each_ofld_rxq(vi, i, ofld_rxq) {
10747                                 ofld_rxq->fl.cl_allocated = 0;
10748                                 ofld_rxq->fl.cl_recycled = 0;
10749                                 ofld_rxq->fl.cl_fast_recycled = 0;
10750                         }
10751 #endif
10752
10753                         if (IS_MAIN_VI(vi)) {
10754                                 wrq = &sc->sge.ctrlq[pi->port_id];
10755                                 wrq->tx_wrs_direct = 0;
10756                                 wrq->tx_wrs_copied = 0;
10757                         }
10758                 }
10759         }
10760
10761         return (0);
10762 }
10763
10764 int
10765 t4_os_find_pci_capability(struct adapter *sc, int cap)
10766 {
10767         int i;
10768
10769         return (pci_find_cap(sc->dev, cap, &i) == 0 ? i : 0);
10770 }
10771
10772 int
10773 t4_os_pci_save_state(struct adapter *sc)
10774 {
10775         device_t dev;
10776         struct pci_devinfo *dinfo;
10777
10778         dev = sc->dev;
10779         dinfo = device_get_ivars(dev);
10780
10781         pci_cfg_save(dev, dinfo, 0);
10782         return (0);
10783 }
10784
10785 int
10786 t4_os_pci_restore_state(struct adapter *sc)
10787 {
10788         device_t dev;
10789         struct pci_devinfo *dinfo;
10790
10791         dev = sc->dev;
10792         dinfo = device_get_ivars(dev);
10793
10794         pci_cfg_restore(dev, dinfo);
10795         return (0);
10796 }
10797
10798 void
10799 t4_os_portmod_changed(struct port_info *pi)
10800 {
10801         struct adapter *sc = pi->adapter;
10802         struct vi_info *vi;
10803         struct ifnet *ifp;
10804         static const char *mod_str[] = {
10805                 NULL, "LR", "SR", "ER", "TWINAX", "active TWINAX", "LRM"
10806         };
10807
10808         KASSERT((pi->flags & FIXED_IFMEDIA) == 0,
10809             ("%s: port_type %u", __func__, pi->port_type));
10810
10811         vi = &pi->vi[0];
10812         if (begin_synchronized_op(sc, vi, HOLD_LOCK, "t4mod") == 0) {
10813                 PORT_LOCK(pi);
10814                 build_medialist(pi);
10815                 if (pi->mod_type != FW_PORT_MOD_TYPE_NONE) {
10816                         fixup_link_config(pi);
10817                         apply_link_config(pi);
10818                 }
10819                 PORT_UNLOCK(pi);
10820                 end_synchronized_op(sc, LOCK_HELD);
10821         }
10822
10823         ifp = vi->ifp;
10824         if (pi->mod_type == FW_PORT_MOD_TYPE_NONE)
10825                 if_printf(ifp, "transceiver unplugged.\n");
10826         else if (pi->mod_type == FW_PORT_MOD_TYPE_UNKNOWN)
10827                 if_printf(ifp, "unknown transceiver inserted.\n");
10828         else if (pi->mod_type == FW_PORT_MOD_TYPE_NOTSUPPORTED)
10829                 if_printf(ifp, "unsupported transceiver inserted.\n");
10830         else if (pi->mod_type > 0 && pi->mod_type < nitems(mod_str)) {
10831                 if_printf(ifp, "%dGbps %s transceiver inserted.\n",
10832                     port_top_speed(pi), mod_str[pi->mod_type]);
10833         } else {
10834                 if_printf(ifp, "transceiver (type %d) inserted.\n",
10835                     pi->mod_type);
10836         }
10837 }
10838
10839 void
10840 t4_os_link_changed(struct port_info *pi)
10841 {
10842         struct vi_info *vi;
10843         struct ifnet *ifp;
10844         struct link_config *lc = &pi->link_cfg;
10845         struct adapter *sc = pi->adapter;
10846         int v;
10847
10848         PORT_LOCK_ASSERT_OWNED(pi);
10849
10850         if (is_t6(sc)) {
10851                 if (lc->link_ok) {
10852                         if (lc->speed > 25000 ||
10853                             (lc->speed == 25000 && lc->fec == FEC_RS)) {
10854                                 pi->fcs_reg = T5_PORT_REG(pi->tx_chan,
10855                                     A_MAC_PORT_AFRAMECHECKSEQUENCEERRORS);
10856                         } else {
10857                                 pi->fcs_reg = T5_PORT_REG(pi->tx_chan,
10858                                     A_MAC_PORT_MTIP_1G10G_RX_CRCERRORS);
10859                         }
10860                         pi->fcs_base = t4_read_reg64(sc, pi->fcs_reg);
10861                         pi->stats.rx_fcs_err = 0;
10862                 } else {
10863                         pi->fcs_reg = -1;
10864                 }
10865         } else {
10866                 MPASS(pi->fcs_reg != -1);
10867                 MPASS(pi->fcs_base == 0);
10868         }
10869
10870         for_each_vi(pi, v, vi) {
10871                 ifp = vi->ifp;
10872                 if (ifp == NULL)
10873                         continue;
10874
10875                 if (lc->link_ok) {
10876                         ifp->if_baudrate = IF_Mbps(lc->speed);
10877                         if_link_state_change(ifp, LINK_STATE_UP);
10878                 } else {
10879                         if_link_state_change(ifp, LINK_STATE_DOWN);
10880                 }
10881         }
10882 }
10883
10884 void
10885 t4_iterate(void (*func)(struct adapter *, void *), void *arg)
10886 {
10887         struct adapter *sc;
10888
10889         sx_slock(&t4_list_lock);
10890         SLIST_FOREACH(sc, &t4_list, link) {
10891                 /*
10892                  * func should not make any assumptions about what state sc is
10893                  * in - the only guarantee is that sc->sc_lock is a valid lock.
10894                  */
10895                 func(sc, arg);
10896         }
10897         sx_sunlock(&t4_list_lock);
10898 }
10899
10900 static int
10901 t4_ioctl(struct cdev *dev, unsigned long cmd, caddr_t data, int fflag,
10902     struct thread *td)
10903 {
10904         int rc;
10905         struct adapter *sc = dev->si_drv1;
10906
10907         rc = priv_check(td, PRIV_DRIVER);
10908         if (rc != 0)
10909                 return (rc);
10910
10911         switch (cmd) {
10912         case CHELSIO_T4_GETREG: {
10913                 struct t4_reg *edata = (struct t4_reg *)data;
10914
10915                 if ((edata->addr & 0x3) != 0 || edata->addr >= sc->mmio_len)
10916                         return (EFAULT);
10917
10918                 if (edata->size == 4)
10919                         edata->val = t4_read_reg(sc, edata->addr);
10920                 else if (edata->size == 8)
10921                         edata->val = t4_read_reg64(sc, edata->addr);
10922                 else
10923                         return (EINVAL);
10924
10925                 break;
10926         }
10927         case CHELSIO_T4_SETREG: {
10928                 struct t4_reg *edata = (struct t4_reg *)data;
10929
10930                 if ((edata->addr & 0x3) != 0 || edata->addr >= sc->mmio_len)
10931                         return (EFAULT);
10932
10933                 if (edata->size == 4) {
10934                         if (edata->val & 0xffffffff00000000)
10935                                 return (EINVAL);
10936                         t4_write_reg(sc, edata->addr, (uint32_t) edata->val);
10937                 } else if (edata->size == 8)
10938                         t4_write_reg64(sc, edata->addr, edata->val);
10939                 else
10940                         return (EINVAL);
10941                 break;
10942         }
10943         case CHELSIO_T4_REGDUMP: {
10944                 struct t4_regdump *regs = (struct t4_regdump *)data;
10945                 int reglen = t4_get_regs_len(sc);
10946                 uint8_t *buf;
10947
10948                 if (regs->len < reglen) {
10949                         regs->len = reglen; /* hint to the caller */
10950                         return (ENOBUFS);
10951                 }
10952
10953                 regs->len = reglen;
10954                 buf = malloc(reglen, M_CXGBE, M_WAITOK | M_ZERO);
10955                 get_regs(sc, regs, buf);
10956                 rc = copyout(buf, regs->data, reglen);
10957                 free(buf, M_CXGBE);
10958                 break;
10959         }
10960         case CHELSIO_T4_GET_FILTER_MODE:
10961                 rc = get_filter_mode(sc, (uint32_t *)data);
10962                 break;
10963         case CHELSIO_T4_SET_FILTER_MODE:
10964                 rc = set_filter_mode(sc, *(uint32_t *)data);
10965                 break;
10966         case CHELSIO_T4_GET_FILTER:
10967                 rc = get_filter(sc, (struct t4_filter *)data);
10968                 break;
10969         case CHELSIO_T4_SET_FILTER:
10970                 rc = set_filter(sc, (struct t4_filter *)data);
10971                 break;
10972         case CHELSIO_T4_DEL_FILTER:
10973                 rc = del_filter(sc, (struct t4_filter *)data);
10974                 break;
10975         case CHELSIO_T4_GET_SGE_CONTEXT:
10976                 rc = get_sge_context(sc, (struct t4_sge_context *)data);
10977                 break;
10978         case CHELSIO_T4_LOAD_FW:
10979                 rc = load_fw(sc, (struct t4_data *)data);
10980                 break;
10981         case CHELSIO_T4_GET_MEM:
10982                 rc = read_card_mem(sc, 2, (struct t4_mem_range *)data);
10983                 break;
10984         case CHELSIO_T4_GET_I2C:
10985                 rc = read_i2c(sc, (struct t4_i2c_data *)data);
10986                 break;
10987         case CHELSIO_T4_CLEAR_STATS:
10988                 rc = clear_stats(sc, *(uint32_t *)data);
10989                 break;
10990         case CHELSIO_T4_SCHED_CLASS:
10991                 rc = t4_set_sched_class(sc, (struct t4_sched_params *)data);
10992                 break;
10993         case CHELSIO_T4_SCHED_QUEUE:
10994                 rc = t4_set_sched_queue(sc, (struct t4_sched_queue *)data);
10995                 break;
10996         case CHELSIO_T4_GET_TRACER:
10997                 rc = t4_get_tracer(sc, (struct t4_tracer *)data);
10998                 break;
10999         case CHELSIO_T4_SET_TRACER:
11000                 rc = t4_set_tracer(sc, (struct t4_tracer *)data);
11001                 break;
11002         case CHELSIO_T4_LOAD_CFG:
11003                 rc = load_cfg(sc, (struct t4_data *)data);
11004                 break;
11005         case CHELSIO_T4_LOAD_BOOT:
11006                 rc = load_boot(sc, (struct t4_bootrom *)data);
11007                 break;
11008         case CHELSIO_T4_LOAD_BOOTCFG:
11009                 rc = load_bootcfg(sc, (struct t4_data *)data);
11010                 break;
11011         case CHELSIO_T4_CUDBG_DUMP:
11012                 rc = cudbg_dump(sc, (struct t4_cudbg_dump *)data);
11013                 break;
11014         case CHELSIO_T4_SET_OFLD_POLICY:
11015                 rc = set_offload_policy(sc, (struct t4_offload_policy *)data);
11016                 break;
11017         default:
11018                 rc = ENOTTY;
11019         }
11020
11021         return (rc);
11022 }
11023
11024 #ifdef TCP_OFFLOAD
11025 static int
11026 toe_capability(struct vi_info *vi, int enable)
11027 {
11028         int rc;
11029         struct port_info *pi = vi->pi;
11030         struct adapter *sc = pi->adapter;
11031
11032         ASSERT_SYNCHRONIZED_OP(sc);
11033
11034         if (!is_offload(sc))
11035                 return (ENODEV);
11036
11037         if (enable) {
11038                 if ((vi->ifp->if_capenable & IFCAP_TOE) != 0) {
11039                         /* TOE is already enabled. */
11040                         return (0);
11041                 }
11042
11043                 /*
11044                  * We need the port's queues around so that we're able to send
11045                  * and receive CPLs to/from the TOE even if the ifnet for this
11046                  * port has never been UP'd administratively.
11047                  */
11048                 if (!(vi->flags & VI_INIT_DONE)) {
11049                         rc = vi_full_init(vi);
11050                         if (rc)
11051                                 return (rc);
11052                 }
11053                 if (!(pi->vi[0].flags & VI_INIT_DONE)) {
11054                         rc = vi_full_init(&pi->vi[0]);
11055                         if (rc)
11056                                 return (rc);
11057                 }
11058
11059                 if (isset(&sc->offload_map, pi->port_id)) {
11060                         /* TOE is enabled on another VI of this port. */
11061                         pi->uld_vis++;
11062                         return (0);
11063                 }
11064
11065                 if (!uld_active(sc, ULD_TOM)) {
11066                         rc = t4_activate_uld(sc, ULD_TOM);
11067                         if (rc == EAGAIN) {
11068                                 log(LOG_WARNING,
11069                                     "You must kldload t4_tom.ko before trying "
11070                                     "to enable TOE on a cxgbe interface.\n");
11071                         }
11072                         if (rc != 0)
11073                                 return (rc);
11074                         KASSERT(sc->tom_softc != NULL,
11075                             ("%s: TOM activated but softc NULL", __func__));
11076                         KASSERT(uld_active(sc, ULD_TOM),
11077                             ("%s: TOM activated but flag not set", __func__));
11078                 }
11079
11080                 /* Activate iWARP and iSCSI too, if the modules are loaded. */
11081                 if (!uld_active(sc, ULD_IWARP))
11082                         (void) t4_activate_uld(sc, ULD_IWARP);
11083                 if (!uld_active(sc, ULD_ISCSI))
11084                         (void) t4_activate_uld(sc, ULD_ISCSI);
11085
11086                 pi->uld_vis++;
11087                 setbit(&sc->offload_map, pi->port_id);
11088         } else {
11089                 pi->uld_vis--;
11090
11091                 if (!isset(&sc->offload_map, pi->port_id) || pi->uld_vis > 0)
11092                         return (0);
11093
11094                 KASSERT(uld_active(sc, ULD_TOM),
11095                     ("%s: TOM never initialized?", __func__));
11096                 clrbit(&sc->offload_map, pi->port_id);
11097         }
11098
11099         return (0);
11100 }
11101
11102 /*
11103  * Add an upper layer driver to the global list.
11104  */
11105 int
11106 t4_register_uld(struct uld_info *ui)
11107 {
11108         int rc = 0;
11109         struct uld_info *u;
11110
11111         sx_xlock(&t4_uld_list_lock);
11112         SLIST_FOREACH(u, &t4_uld_list, link) {
11113             if (u->uld_id == ui->uld_id) {
11114                     rc = EEXIST;
11115                     goto done;
11116             }
11117         }
11118
11119         SLIST_INSERT_HEAD(&t4_uld_list, ui, link);
11120         ui->refcount = 0;
11121 done:
11122         sx_xunlock(&t4_uld_list_lock);
11123         return (rc);
11124 }
11125
11126 int
11127 t4_unregister_uld(struct uld_info *ui)
11128 {
11129         int rc = EINVAL;
11130         struct uld_info *u;
11131
11132         sx_xlock(&t4_uld_list_lock);
11133
11134         SLIST_FOREACH(u, &t4_uld_list, link) {
11135             if (u == ui) {
11136                     if (ui->refcount > 0) {
11137                             rc = EBUSY;
11138                             goto done;
11139                     }
11140
11141                     SLIST_REMOVE(&t4_uld_list, ui, uld_info, link);
11142                     rc = 0;
11143                     goto done;
11144             }
11145         }
11146 done:
11147         sx_xunlock(&t4_uld_list_lock);
11148         return (rc);
11149 }
11150
11151 int
11152 t4_activate_uld(struct adapter *sc, int id)
11153 {
11154         int rc;
11155         struct uld_info *ui;
11156
11157         ASSERT_SYNCHRONIZED_OP(sc);
11158
11159         if (id < 0 || id > ULD_MAX)
11160                 return (EINVAL);
11161         rc = EAGAIN;    /* kldoad the module with this ULD and try again. */
11162
11163         sx_slock(&t4_uld_list_lock);
11164
11165         SLIST_FOREACH(ui, &t4_uld_list, link) {
11166                 if (ui->uld_id == id) {
11167                         if (!(sc->flags & FULL_INIT_DONE)) {
11168                                 rc = adapter_full_init(sc);
11169                                 if (rc != 0)
11170                                         break;
11171                         }
11172
11173                         rc = ui->activate(sc);
11174                         if (rc == 0) {
11175                                 setbit(&sc->active_ulds, id);
11176                                 ui->refcount++;
11177                         }
11178                         break;
11179                 }
11180         }
11181
11182         sx_sunlock(&t4_uld_list_lock);
11183
11184         return (rc);
11185 }
11186
11187 int
11188 t4_deactivate_uld(struct adapter *sc, int id)
11189 {
11190         int rc;
11191         struct uld_info *ui;
11192
11193         ASSERT_SYNCHRONIZED_OP(sc);
11194
11195         if (id < 0 || id > ULD_MAX)
11196                 return (EINVAL);
11197         rc = ENXIO;
11198
11199         sx_slock(&t4_uld_list_lock);
11200
11201         SLIST_FOREACH(ui, &t4_uld_list, link) {
11202                 if (ui->uld_id == id) {
11203                         rc = ui->deactivate(sc);
11204                         if (rc == 0) {
11205                                 clrbit(&sc->active_ulds, id);
11206                                 ui->refcount--;
11207                         }
11208                         break;
11209                 }
11210         }
11211
11212         sx_sunlock(&t4_uld_list_lock);
11213
11214         return (rc);
11215 }
11216
11217 static void
11218 t4_async_event(void *arg, int n)
11219 {
11220         struct uld_info *ui;
11221         struct adapter *sc = (struct adapter *)arg;
11222
11223         if (begin_synchronized_op(sc, NULL, SLEEP_OK | INTR_OK, "t4async") != 0)
11224                 return;
11225         sx_slock(&t4_uld_list_lock);
11226         SLIST_FOREACH(ui, &t4_uld_list, link) {
11227                 if (ui->uld_id == ULD_IWARP) {
11228                         ui->async_event(sc);
11229                         break;
11230                 }
11231         }
11232         sx_sunlock(&t4_uld_list_lock);
11233         end_synchronized_op(sc, 0);
11234 }
11235
11236 int
11237 uld_active(struct adapter *sc, int uld_id)
11238 {
11239
11240         MPASS(uld_id >= 0 && uld_id <= ULD_MAX);
11241
11242         return (isset(&sc->active_ulds, uld_id));
11243 }
11244 #endif
11245
11246 /*
11247  * t  = ptr to tunable.
11248  * nc = number of CPUs.
11249  * c  = compiled in default for that tunable.
11250  */
11251 static void
11252 calculate_nqueues(int *t, int nc, const int c)
11253 {
11254         int nq;
11255
11256         if (*t > 0)
11257                 return;
11258         nq = *t < 0 ? -*t : c;
11259         *t = min(nc, nq);
11260 }
11261
11262 /*
11263  * Come up with reasonable defaults for some of the tunables, provided they're
11264  * not set by the user (in which case we'll use the values as is).
11265  */
11266 static void
11267 tweak_tunables(void)
11268 {
11269         int nc = mp_ncpus;      /* our snapshot of the number of CPUs */
11270
11271         if (t4_ntxq < 1) {
11272 #ifdef RSS
11273                 t4_ntxq = rss_getnumbuckets();
11274 #else
11275                 calculate_nqueues(&t4_ntxq, nc, NTXQ);
11276 #endif
11277         }
11278
11279         calculate_nqueues(&t4_ntxq_vi, nc, NTXQ_VI);
11280
11281         if (t4_nrxq < 1) {
11282 #ifdef RSS
11283                 t4_nrxq = rss_getnumbuckets();
11284 #else
11285                 calculate_nqueues(&t4_nrxq, nc, NRXQ);
11286 #endif
11287         }
11288
11289         calculate_nqueues(&t4_nrxq_vi, nc, NRXQ_VI);
11290
11291 #if defined(TCP_OFFLOAD) || defined(RATELIMIT)
11292         calculate_nqueues(&t4_nofldtxq, nc, NOFLDTXQ);
11293         calculate_nqueues(&t4_nofldtxq_vi, nc, NOFLDTXQ_VI);
11294 #endif
11295 #ifdef TCP_OFFLOAD
11296         calculate_nqueues(&t4_nofldrxq, nc, NOFLDRXQ);
11297         calculate_nqueues(&t4_nofldrxq_vi, nc, NOFLDRXQ_VI);
11298 #endif
11299
11300 #if defined(TCP_OFFLOAD) || defined(KERN_TLS)
11301         if (t4_toecaps_allowed == -1)
11302                 t4_toecaps_allowed = FW_CAPS_CONFIG_TOE;
11303 #else
11304         if (t4_toecaps_allowed == -1)
11305                 t4_toecaps_allowed = 0;
11306 #endif
11307
11308 #ifdef TCP_OFFLOAD
11309         if (t4_rdmacaps_allowed == -1) {
11310                 t4_rdmacaps_allowed = FW_CAPS_CONFIG_RDMA_RDDP |
11311                     FW_CAPS_CONFIG_RDMA_RDMAC;
11312         }
11313
11314         if (t4_iscsicaps_allowed == -1) {
11315                 t4_iscsicaps_allowed = FW_CAPS_CONFIG_ISCSI_INITIATOR_PDU |
11316                     FW_CAPS_CONFIG_ISCSI_TARGET_PDU |
11317                     FW_CAPS_CONFIG_ISCSI_T10DIF;
11318         }
11319
11320         if (t4_tmr_idx_ofld < 0 || t4_tmr_idx_ofld >= SGE_NTIMERS)
11321                 t4_tmr_idx_ofld = TMR_IDX_OFLD;
11322
11323         if (t4_pktc_idx_ofld < -1 || t4_pktc_idx_ofld >= SGE_NCOUNTERS)
11324                 t4_pktc_idx_ofld = PKTC_IDX_OFLD;
11325
11326         if (t4_toe_tls_rx_timeout < 0)
11327                 t4_toe_tls_rx_timeout = 0;
11328 #else
11329         if (t4_rdmacaps_allowed == -1)
11330                 t4_rdmacaps_allowed = 0;
11331
11332         if (t4_iscsicaps_allowed == -1)
11333                 t4_iscsicaps_allowed = 0;
11334 #endif
11335
11336 #ifdef DEV_NETMAP
11337         calculate_nqueues(&t4_nnmtxq, nc, NNMTXQ);
11338         calculate_nqueues(&t4_nnmrxq, nc, NNMRXQ);
11339         calculate_nqueues(&t4_nnmtxq_vi, nc, NNMTXQ_VI);
11340         calculate_nqueues(&t4_nnmrxq_vi, nc, NNMRXQ_VI);
11341 #endif
11342
11343         if (t4_tmr_idx < 0 || t4_tmr_idx >= SGE_NTIMERS)
11344                 t4_tmr_idx = TMR_IDX;
11345
11346         if (t4_pktc_idx < -1 || t4_pktc_idx >= SGE_NCOUNTERS)
11347                 t4_pktc_idx = PKTC_IDX;
11348
11349         if (t4_qsize_txq < 128)
11350                 t4_qsize_txq = 128;
11351
11352         if (t4_qsize_rxq < 128)
11353                 t4_qsize_rxq = 128;
11354         while (t4_qsize_rxq & 7)
11355                 t4_qsize_rxq++;
11356
11357         t4_intr_types &= INTR_MSIX | INTR_MSI | INTR_INTX;
11358
11359         /*
11360          * Number of VIs to create per-port.  The first VI is the "main" regular
11361          * VI for the port.  The rest are additional virtual interfaces on the
11362          * same physical port.  Note that the main VI does not have native
11363          * netmap support but the extra VIs do.
11364          *
11365          * Limit the number of VIs per port to the number of available
11366          * MAC addresses per port.
11367          */
11368         if (t4_num_vis < 1)
11369                 t4_num_vis = 1;
11370         if (t4_num_vis > nitems(vi_mac_funcs)) {
11371                 t4_num_vis = nitems(vi_mac_funcs);
11372                 printf("cxgbe: number of VIs limited to %d\n", t4_num_vis);
11373         }
11374
11375         if (pcie_relaxed_ordering < 0 || pcie_relaxed_ordering > 2) {
11376                 pcie_relaxed_ordering = 1;
11377 #if defined(__i386__) || defined(__amd64__)
11378                 if (cpu_vendor_id == CPU_VENDOR_INTEL)
11379                         pcie_relaxed_ordering = 0;
11380 #endif
11381         }
11382 }
11383
11384 #ifdef DDB
11385 static void
11386 t4_dump_tcb(struct adapter *sc, int tid)
11387 {
11388         uint32_t base, i, j, off, pf, reg, save, tcb_addr, win_pos;
11389
11390         reg = PCIE_MEM_ACCESS_REG(A_PCIE_MEM_ACCESS_OFFSET, 2);
11391         save = t4_read_reg(sc, reg);
11392         base = sc->memwin[2].mw_base;
11393
11394         /* Dump TCB for the tid */
11395         tcb_addr = t4_read_reg(sc, A_TP_CMM_TCB_BASE);
11396         tcb_addr += tid * TCB_SIZE;
11397
11398         if (is_t4(sc)) {
11399                 pf = 0;
11400                 win_pos = tcb_addr & ~0xf;      /* start must be 16B aligned */
11401         } else {
11402                 pf = V_PFNUM(sc->pf);
11403                 win_pos = tcb_addr & ~0x7f;     /* start must be 128B aligned */
11404         }
11405         t4_write_reg(sc, reg, win_pos | pf);
11406         t4_read_reg(sc, reg);
11407
11408         off = tcb_addr - win_pos;
11409         for (i = 0; i < 4; i++) {
11410                 uint32_t buf[8];
11411                 for (j = 0; j < 8; j++, off += 4)
11412                         buf[j] = htonl(t4_read_reg(sc, base + off));
11413
11414                 db_printf("%08x %08x %08x %08x %08x %08x %08x %08x\n",
11415                     buf[0], buf[1], buf[2], buf[3], buf[4], buf[5], buf[6],
11416                     buf[7]);
11417         }
11418
11419         t4_write_reg(sc, reg, save);
11420         t4_read_reg(sc, reg);
11421 }
11422
11423 static void
11424 t4_dump_devlog(struct adapter *sc)
11425 {
11426         struct devlog_params *dparams = &sc->params.devlog;
11427         struct fw_devlog_e e;
11428         int i, first, j, m, nentries, rc;
11429         uint64_t ftstamp = UINT64_MAX;
11430
11431         if (dparams->start == 0) {
11432                 db_printf("devlog params not valid\n");
11433                 return;
11434         }
11435
11436         nentries = dparams->size / sizeof(struct fw_devlog_e);
11437         m = fwmtype_to_hwmtype(dparams->memtype);
11438
11439         /* Find the first entry. */
11440         first = -1;
11441         for (i = 0; i < nentries && !db_pager_quit; i++) {
11442                 rc = -t4_mem_read(sc, m, dparams->start + i * sizeof(e),
11443                     sizeof(e), (void *)&e);
11444                 if (rc != 0)
11445                         break;
11446
11447                 if (e.timestamp == 0)
11448                         break;
11449
11450                 e.timestamp = be64toh(e.timestamp);
11451                 if (e.timestamp < ftstamp) {
11452                         ftstamp = e.timestamp;
11453                         first = i;
11454                 }
11455         }
11456
11457         if (first == -1)
11458                 return;
11459
11460         i = first;
11461         do {
11462                 rc = -t4_mem_read(sc, m, dparams->start + i * sizeof(e),
11463                     sizeof(e), (void *)&e);
11464                 if (rc != 0)
11465                         return;
11466
11467                 if (e.timestamp == 0)
11468                         return;
11469
11470                 e.timestamp = be64toh(e.timestamp);
11471                 e.seqno = be32toh(e.seqno);
11472                 for (j = 0; j < 8; j++)
11473                         e.params[j] = be32toh(e.params[j]);
11474
11475                 db_printf("%10d  %15ju  %8s  %8s  ",
11476                     e.seqno, e.timestamp,
11477                     (e.level < nitems(devlog_level_strings) ?
11478                         devlog_level_strings[e.level] : "UNKNOWN"),
11479                     (e.facility < nitems(devlog_facility_strings) ?
11480                         devlog_facility_strings[e.facility] : "UNKNOWN"));
11481                 db_printf(e.fmt, e.params[0], e.params[1], e.params[2],
11482                     e.params[3], e.params[4], e.params[5], e.params[6],
11483                     e.params[7]);
11484
11485                 if (++i == nentries)
11486                         i = 0;
11487         } while (i != first && !db_pager_quit);
11488 }
11489
11490 static struct command_table db_t4_table = LIST_HEAD_INITIALIZER(db_t4_table);
11491 _DB_SET(_show, t4, NULL, db_show_table, 0, &db_t4_table);
11492
11493 DB_FUNC(devlog, db_show_devlog, db_t4_table, CS_OWN, NULL)
11494 {
11495         device_t dev;
11496         int t;
11497         bool valid;
11498
11499         valid = false;
11500         t = db_read_token();
11501         if (t == tIDENT) {
11502                 dev = device_lookup_by_name(db_tok_string);
11503                 valid = true;
11504         }
11505         db_skip_to_eol();
11506         if (!valid) {
11507                 db_printf("usage: show t4 devlog <nexus>\n");
11508                 return;
11509         }
11510
11511         if (dev == NULL) {
11512                 db_printf("device not found\n");
11513                 return;
11514         }
11515
11516         t4_dump_devlog(device_get_softc(dev));
11517 }
11518
11519 DB_FUNC(tcb, db_show_t4tcb, db_t4_table, CS_OWN, NULL)
11520 {
11521         device_t dev;
11522         int radix, tid, t;
11523         bool valid;
11524
11525         valid = false;
11526         radix = db_radix;
11527         db_radix = 10;
11528         t = db_read_token();
11529         if (t == tIDENT) {
11530                 dev = device_lookup_by_name(db_tok_string);
11531                 t = db_read_token();
11532                 if (t == tNUMBER) {
11533                         tid = db_tok_number;
11534                         valid = true;
11535                 }
11536         }       
11537         db_radix = radix;
11538         db_skip_to_eol();
11539         if (!valid) {
11540                 db_printf("usage: show t4 tcb <nexus> <tid>\n");
11541                 return;
11542         }
11543
11544         if (dev == NULL) {
11545                 db_printf("device not found\n");
11546                 return;
11547         }
11548         if (tid < 0) {
11549                 db_printf("invalid tid\n");
11550                 return;
11551         }
11552
11553         t4_dump_tcb(device_get_softc(dev), tid);
11554 }
11555 #endif
11556
11557 static eventhandler_tag vxlan_start_evtag;
11558 static eventhandler_tag vxlan_stop_evtag;
11559
11560 struct vxlan_evargs {
11561         struct ifnet *ifp;
11562         uint16_t port;
11563 };
11564
11565 static void
11566 t4_vxlan_start(struct adapter *sc, void *arg)
11567 {
11568         struct vxlan_evargs *v = arg;
11569         struct port_info *pi;
11570         uint8_t match_all_mac[ETHER_ADDR_LEN] = {0};
11571         int i, rc;
11572
11573         if (sc->nrawf == 0 || chip_id(sc) <= CHELSIO_T5)
11574                 return;
11575         if (begin_synchronized_op(sc, NULL, SLEEP_OK | INTR_OK, "t4vxst") != 0)
11576                 return;
11577
11578         if (sc->vxlan_refcount == 0) {
11579                 sc->vxlan_port = v->port;
11580                 sc->vxlan_refcount = 1;
11581                 t4_write_reg(sc, A_MPS_RX_VXLAN_TYPE,
11582                     V_VXLAN(v->port) | F_VXLAN_EN);
11583                 for_each_port(sc, i) {
11584                         pi = sc->port[i];
11585                         if (pi->vxlan_tcam_entry == true)
11586                                 continue;
11587                         rc = t4_alloc_raw_mac_filt(sc, pi->vi[0].viid,
11588                             match_all_mac, match_all_mac,
11589                             sc->rawf_base + pi->port_id, 1, pi->port_id, true);
11590                         if (rc < 0) {
11591                                 rc = -rc;
11592                                 log(LOG_ERR,
11593                                     "%s: failed to add VXLAN TCAM entry: %d.\n",
11594                                     device_get_name(pi->vi[0].dev), rc);
11595                         } else {
11596                                 MPASS(rc == sc->rawf_base + pi->port_id);
11597                                 rc = 0;
11598                                 pi->vxlan_tcam_entry = true;
11599                         }
11600                 }
11601         } else if (sc->vxlan_port == v->port) {
11602                 sc->vxlan_refcount++;
11603         } else {
11604                 log(LOG_ERR, "%s: VXLAN already configured on port  %d; "
11605                     "ignoring attempt to configure it on port %d\n",
11606                     device_get_nameunit(sc->dev), sc->vxlan_port, v->port);
11607         }
11608         end_synchronized_op(sc, 0);
11609 }
11610
11611 static void
11612 t4_vxlan_stop(struct adapter *sc, void *arg)
11613 {
11614         struct vxlan_evargs *v = arg;
11615
11616         if (sc->nrawf == 0 || chip_id(sc) <= CHELSIO_T5)
11617                 return;
11618         if (begin_synchronized_op(sc, NULL, SLEEP_OK | INTR_OK, "t4vxsp") != 0)
11619                 return;
11620
11621         /*
11622          * VXLANs may have been configured before the driver was loaded so we
11623          * may see more stops than starts.  This is not handled cleanly but at
11624          * least we keep the refcount sane.
11625          */
11626         if (sc->vxlan_port != v->port)
11627                 goto done;
11628         if (sc->vxlan_refcount == 0) {
11629                 log(LOG_ERR,
11630                     "%s: VXLAN operation on port %d was stopped earlier; "
11631                     "ignoring attempt to stop it again.\n",
11632                     device_get_nameunit(sc->dev), sc->vxlan_port);
11633         } else if (--sc->vxlan_refcount == 0) {
11634                 t4_set_reg_field(sc, A_MPS_RX_VXLAN_TYPE, F_VXLAN_EN, 0);
11635         }
11636 done:
11637         end_synchronized_op(sc, 0);
11638 }
11639
11640 static void
11641 t4_vxlan_start_handler(void *arg __unused, struct ifnet *ifp,
11642     sa_family_t family, u_int port)
11643 {
11644         struct vxlan_evargs v;
11645
11646         MPASS(family == AF_INET || family == AF_INET6);
11647         v.ifp = ifp;
11648         v.port = port;
11649
11650         t4_iterate(t4_vxlan_start, &v);
11651 }
11652
11653 static void
11654 t4_vxlan_stop_handler(void *arg __unused, struct ifnet *ifp, sa_family_t family,
11655     u_int port)
11656 {
11657         struct vxlan_evargs v;
11658
11659         MPASS(family == AF_INET || family == AF_INET6);
11660         v.ifp = ifp;
11661         v.port = port;
11662
11663         t4_iterate(t4_vxlan_stop, &v);
11664 }
11665
11666
11667 static struct sx mlu;   /* mod load unload */
11668 SX_SYSINIT(cxgbe_mlu, &mlu, "cxgbe mod load/unload");
11669
11670 static int
11671 mod_event(module_t mod, int cmd, void *arg)
11672 {
11673         int rc = 0;
11674         static int loaded = 0;
11675
11676         switch (cmd) {
11677         case MOD_LOAD:
11678                 sx_xlock(&mlu);
11679                 if (loaded++ == 0) {
11680                         t4_sge_modload();
11681                         t4_register_shared_cpl_handler(CPL_SET_TCB_RPL,
11682                             t4_filter_rpl, CPL_COOKIE_FILTER);
11683                         t4_register_shared_cpl_handler(CPL_L2T_WRITE_RPL,
11684                             do_l2t_write_rpl, CPL_COOKIE_FILTER);
11685                         t4_register_shared_cpl_handler(CPL_ACT_OPEN_RPL,
11686                             t4_hashfilter_ao_rpl, CPL_COOKIE_HASHFILTER);
11687                         t4_register_shared_cpl_handler(CPL_SET_TCB_RPL,
11688                             t4_hashfilter_tcb_rpl, CPL_COOKIE_HASHFILTER);
11689                         t4_register_shared_cpl_handler(CPL_ABORT_RPL_RSS,
11690                             t4_del_hashfilter_rpl, CPL_COOKIE_HASHFILTER);
11691                         t4_register_cpl_handler(CPL_TRACE_PKT, t4_trace_pkt);
11692                         t4_register_cpl_handler(CPL_T5_TRACE_PKT, t5_trace_pkt);
11693                         t4_register_cpl_handler(CPL_SMT_WRITE_RPL,
11694                             do_smt_write_rpl);
11695                         sx_init(&t4_list_lock, "T4/T5 adapters");
11696                         SLIST_INIT(&t4_list);
11697                         callout_init(&fatal_callout, 1);
11698 #ifdef TCP_OFFLOAD
11699                         sx_init(&t4_uld_list_lock, "T4/T5 ULDs");
11700                         SLIST_INIT(&t4_uld_list);
11701 #endif
11702 #ifdef INET6
11703                         t4_clip_modload();
11704 #endif
11705 #ifdef KERN_TLS
11706                         t6_ktls_modload();
11707 #endif
11708                         t4_tracer_modload();
11709                         tweak_tunables();
11710                         vxlan_start_evtag =
11711                             EVENTHANDLER_REGISTER(vxlan_start,
11712                                 t4_vxlan_start_handler, NULL,
11713                                 EVENTHANDLER_PRI_ANY);
11714                         vxlan_stop_evtag =
11715                             EVENTHANDLER_REGISTER(vxlan_stop,
11716                                 t4_vxlan_stop_handler, NULL,
11717                                 EVENTHANDLER_PRI_ANY);
11718                 }
11719                 sx_xunlock(&mlu);
11720                 break;
11721
11722         case MOD_UNLOAD:
11723                 sx_xlock(&mlu);
11724                 if (--loaded == 0) {
11725                         int tries;
11726
11727                         sx_slock(&t4_list_lock);
11728                         if (!SLIST_EMPTY(&t4_list)) {
11729                                 rc = EBUSY;
11730                                 sx_sunlock(&t4_list_lock);
11731                                 goto done_unload;
11732                         }
11733 #ifdef TCP_OFFLOAD
11734                         sx_slock(&t4_uld_list_lock);
11735                         if (!SLIST_EMPTY(&t4_uld_list)) {
11736                                 rc = EBUSY;
11737                                 sx_sunlock(&t4_uld_list_lock);
11738                                 sx_sunlock(&t4_list_lock);
11739                                 goto done_unload;
11740                         }
11741 #endif
11742                         tries = 0;
11743                         while (tries++ < 5 && t4_sge_extfree_refs() != 0) {
11744                                 uprintf("%ju clusters with custom free routine "
11745                                     "still is use.\n", t4_sge_extfree_refs());
11746                                 pause("t4unload", 2 * hz);
11747                         }
11748 #ifdef TCP_OFFLOAD
11749                         sx_sunlock(&t4_uld_list_lock);
11750 #endif
11751                         sx_sunlock(&t4_list_lock);
11752
11753                         if (t4_sge_extfree_refs() == 0) {
11754                                 EVENTHANDLER_DEREGISTER(vxlan_start,
11755                                     vxlan_start_evtag);
11756                                 EVENTHANDLER_DEREGISTER(vxlan_stop,
11757                                     vxlan_stop_evtag);
11758                                 t4_tracer_modunload();
11759 #ifdef KERN_TLS
11760                                 t6_ktls_modunload();
11761 #endif
11762 #ifdef INET6
11763                                 t4_clip_modunload();
11764 #endif
11765 #ifdef TCP_OFFLOAD
11766                                 sx_destroy(&t4_uld_list_lock);
11767 #endif
11768                                 sx_destroy(&t4_list_lock);
11769                                 t4_sge_modunload();
11770                                 loaded = 0;
11771                         } else {
11772                                 rc = EBUSY;
11773                                 loaded++;       /* undo earlier decrement */
11774                         }
11775                 }
11776 done_unload:
11777                 sx_xunlock(&mlu);
11778                 break;
11779         }
11780
11781         return (rc);
11782 }
11783
11784 static devclass_t t4_devclass, t5_devclass, t6_devclass;
11785 static devclass_t cxgbe_devclass, cxl_devclass, cc_devclass;
11786 static devclass_t vcxgbe_devclass, vcxl_devclass, vcc_devclass;
11787
11788 DRIVER_MODULE(t4nex, pci, t4_driver, t4_devclass, mod_event, 0);
11789 MODULE_VERSION(t4nex, 1);
11790 MODULE_DEPEND(t4nex, firmware, 1, 1, 1);
11791 #ifdef DEV_NETMAP
11792 MODULE_DEPEND(t4nex, netmap, 1, 1, 1);
11793 #endif /* DEV_NETMAP */
11794
11795 DRIVER_MODULE(t5nex, pci, t5_driver, t5_devclass, mod_event, 0);
11796 MODULE_VERSION(t5nex, 1);
11797 MODULE_DEPEND(t5nex, firmware, 1, 1, 1);
11798 #ifdef DEV_NETMAP
11799 MODULE_DEPEND(t5nex, netmap, 1, 1, 1);
11800 #endif /* DEV_NETMAP */
11801
11802 DRIVER_MODULE(t6nex, pci, t6_driver, t6_devclass, mod_event, 0);
11803 MODULE_VERSION(t6nex, 1);
11804 MODULE_DEPEND(t6nex, firmware, 1, 1, 1);
11805 #ifdef DEV_NETMAP
11806 MODULE_DEPEND(t6nex, netmap, 1, 1, 1);
11807 #endif /* DEV_NETMAP */
11808
11809 DRIVER_MODULE(cxgbe, t4nex, cxgbe_driver, cxgbe_devclass, 0, 0);
11810 MODULE_VERSION(cxgbe, 1);
11811
11812 DRIVER_MODULE(cxl, t5nex, cxl_driver, cxl_devclass, 0, 0);
11813 MODULE_VERSION(cxl, 1);
11814
11815 DRIVER_MODULE(cc, t6nex, cc_driver, cc_devclass, 0, 0);
11816 MODULE_VERSION(cc, 1);
11817
11818 DRIVER_MODULE(vcxgbe, cxgbe, vcxgbe_driver, vcxgbe_devclass, 0, 0);
11819 MODULE_VERSION(vcxgbe, 1);
11820
11821 DRIVER_MODULE(vcxl, cxl, vcxl_driver, vcxl_devclass, 0, 0);
11822 MODULE_VERSION(vcxl, 1);
11823
11824 DRIVER_MODULE(vcc, cc, vcc_driver, vcc_devclass, 0, 0);
11825 MODULE_VERSION(vcc, 1);