]> CyberLeo.Net >> Repos - FreeBSD/FreeBSD.git/blob - sys/dev/cxgbe/t4_main.c
MFV: r367652
[FreeBSD/FreeBSD.git] / sys / dev / cxgbe / t4_main.c
1 /*-
2  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
3  *
4  * Copyright (c) 2011 Chelsio Communications, Inc.
5  * All rights reserved.
6  * Written by: Navdeep Parhar <np@FreeBSD.org>
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
18  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
21  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27  * SUCH DAMAGE.
28  */
29
30 #include <sys/cdefs.h>
31 __FBSDID("$FreeBSD$");
32
33 #include "opt_ddb.h"
34 #include "opt_inet.h"
35 #include "opt_inet6.h"
36 #include "opt_kern_tls.h"
37 #include "opt_ratelimit.h"
38 #include "opt_rss.h"
39
40 #include <sys/param.h>
41 #include <sys/conf.h>
42 #include <sys/priv.h>
43 #include <sys/kernel.h>
44 #include <sys/bus.h>
45 #include <sys/eventhandler.h>
46 #include <sys/module.h>
47 #include <sys/malloc.h>
48 #include <sys/queue.h>
49 #include <sys/taskqueue.h>
50 #include <sys/pciio.h>
51 #include <dev/pci/pcireg.h>
52 #include <dev/pci/pcivar.h>
53 #include <dev/pci/pci_private.h>
54 #include <sys/firmware.h>
55 #include <sys/sbuf.h>
56 #include <sys/smp.h>
57 #include <sys/socket.h>
58 #include <sys/sockio.h>
59 #include <sys/sysctl.h>
60 #include <net/ethernet.h>
61 #include <net/if.h>
62 #include <net/if_types.h>
63 #include <net/if_dl.h>
64 #include <net/if_vlan_var.h>
65 #ifdef RSS
66 #include <net/rss_config.h>
67 #endif
68 #include <netinet/in.h>
69 #include <netinet/ip.h>
70 #ifdef KERN_TLS
71 #include <netinet/tcp_seq.h>
72 #endif
73 #if defined(__i386__) || defined(__amd64__)
74 #include <machine/md_var.h>
75 #include <machine/cputypes.h>
76 #include <vm/vm.h>
77 #include <vm/pmap.h>
78 #endif
79 #ifdef DDB
80 #include <ddb/ddb.h>
81 #include <ddb/db_lex.h>
82 #endif
83
84 #include "common/common.h"
85 #include "common/t4_msg.h"
86 #include "common/t4_regs.h"
87 #include "common/t4_regs_values.h"
88 #include "cudbg/cudbg.h"
89 #include "t4_clip.h"
90 #include "t4_ioctl.h"
91 #include "t4_l2t.h"
92 #include "t4_mp_ring.h"
93 #include "t4_if.h"
94 #include "t4_smt.h"
95
96 /* T4 bus driver interface */
97 static int t4_probe(device_t);
98 static int t4_attach(device_t);
99 static int t4_detach(device_t);
100 static int t4_child_location_str(device_t, device_t, char *, size_t);
101 static int t4_ready(device_t);
102 static int t4_read_port_device(device_t, int, device_t *);
103 static device_method_t t4_methods[] = {
104         DEVMETHOD(device_probe,         t4_probe),
105         DEVMETHOD(device_attach,        t4_attach),
106         DEVMETHOD(device_detach,        t4_detach),
107
108         DEVMETHOD(bus_child_location_str, t4_child_location_str),
109
110         DEVMETHOD(t4_is_main_ready,     t4_ready),
111         DEVMETHOD(t4_read_port_device,  t4_read_port_device),
112
113         DEVMETHOD_END
114 };
115 static driver_t t4_driver = {
116         "t4nex",
117         t4_methods,
118         sizeof(struct adapter)
119 };
120
121
122 /* T4 port (cxgbe) interface */
123 static int cxgbe_probe(device_t);
124 static int cxgbe_attach(device_t);
125 static int cxgbe_detach(device_t);
126 device_method_t cxgbe_methods[] = {
127         DEVMETHOD(device_probe,         cxgbe_probe),
128         DEVMETHOD(device_attach,        cxgbe_attach),
129         DEVMETHOD(device_detach,        cxgbe_detach),
130         { 0, 0 }
131 };
132 static driver_t cxgbe_driver = {
133         "cxgbe",
134         cxgbe_methods,
135         sizeof(struct port_info)
136 };
137
138 /* T4 VI (vcxgbe) interface */
139 static int vcxgbe_probe(device_t);
140 static int vcxgbe_attach(device_t);
141 static int vcxgbe_detach(device_t);
142 static device_method_t vcxgbe_methods[] = {
143         DEVMETHOD(device_probe,         vcxgbe_probe),
144         DEVMETHOD(device_attach,        vcxgbe_attach),
145         DEVMETHOD(device_detach,        vcxgbe_detach),
146         { 0, 0 }
147 };
148 static driver_t vcxgbe_driver = {
149         "vcxgbe",
150         vcxgbe_methods,
151         sizeof(struct vi_info)
152 };
153
154 static d_ioctl_t t4_ioctl;
155
156 static struct cdevsw t4_cdevsw = {
157        .d_version = D_VERSION,
158        .d_ioctl = t4_ioctl,
159        .d_name = "t4nex",
160 };
161
162 /* T5 bus driver interface */
163 static int t5_probe(device_t);
164 static device_method_t t5_methods[] = {
165         DEVMETHOD(device_probe,         t5_probe),
166         DEVMETHOD(device_attach,        t4_attach),
167         DEVMETHOD(device_detach,        t4_detach),
168
169         DEVMETHOD(bus_child_location_str, t4_child_location_str),
170
171         DEVMETHOD(t4_is_main_ready,     t4_ready),
172         DEVMETHOD(t4_read_port_device,  t4_read_port_device),
173
174         DEVMETHOD_END
175 };
176 static driver_t t5_driver = {
177         "t5nex",
178         t5_methods,
179         sizeof(struct adapter)
180 };
181
182
183 /* T5 port (cxl) interface */
184 static driver_t cxl_driver = {
185         "cxl",
186         cxgbe_methods,
187         sizeof(struct port_info)
188 };
189
190 /* T5 VI (vcxl) interface */
191 static driver_t vcxl_driver = {
192         "vcxl",
193         vcxgbe_methods,
194         sizeof(struct vi_info)
195 };
196
197 /* T6 bus driver interface */
198 static int t6_probe(device_t);
199 static device_method_t t6_methods[] = {
200         DEVMETHOD(device_probe,         t6_probe),
201         DEVMETHOD(device_attach,        t4_attach),
202         DEVMETHOD(device_detach,        t4_detach),
203
204         DEVMETHOD(bus_child_location_str, t4_child_location_str),
205
206         DEVMETHOD(t4_is_main_ready,     t4_ready),
207         DEVMETHOD(t4_read_port_device,  t4_read_port_device),
208
209         DEVMETHOD_END
210 };
211 static driver_t t6_driver = {
212         "t6nex",
213         t6_methods,
214         sizeof(struct adapter)
215 };
216
217
218 /* T6 port (cc) interface */
219 static driver_t cc_driver = {
220         "cc",
221         cxgbe_methods,
222         sizeof(struct port_info)
223 };
224
225 /* T6 VI (vcc) interface */
226 static driver_t vcc_driver = {
227         "vcc",
228         vcxgbe_methods,
229         sizeof(struct vi_info)
230 };
231
232 /* ifnet interface */
233 static void cxgbe_init(void *);
234 static int cxgbe_ioctl(struct ifnet *, unsigned long, caddr_t);
235 static int cxgbe_transmit(struct ifnet *, struct mbuf *);
236 static void cxgbe_qflush(struct ifnet *);
237 #if defined(KERN_TLS) || defined(RATELIMIT)
238 static int cxgbe_snd_tag_alloc(struct ifnet *, union if_snd_tag_alloc_params *,
239     struct m_snd_tag **);
240 static int cxgbe_snd_tag_modify(struct m_snd_tag *,
241     union if_snd_tag_modify_params *);
242 static int cxgbe_snd_tag_query(struct m_snd_tag *,
243     union if_snd_tag_query_params *);
244 static void cxgbe_snd_tag_free(struct m_snd_tag *);
245 #endif
246
247 MALLOC_DEFINE(M_CXGBE, "cxgbe", "Chelsio T4/T5 Ethernet driver and services");
248
249 /*
250  * Correct lock order when you need to acquire multiple locks is t4_list_lock,
251  * then ADAPTER_LOCK, then t4_uld_list_lock.
252  */
253 static struct sx t4_list_lock;
254 SLIST_HEAD(, adapter) t4_list;
255 #ifdef TCP_OFFLOAD
256 static struct sx t4_uld_list_lock;
257 SLIST_HEAD(, uld_info) t4_uld_list;
258 #endif
259
260 /*
261  * Tunables.  See tweak_tunables() too.
262  *
263  * Each tunable is set to a default value here if it's known at compile-time.
264  * Otherwise it is set to -n as an indication to tweak_tunables() that it should
265  * provide a reasonable default (upto n) when the driver is loaded.
266  *
267  * Tunables applicable to both T4 and T5 are under hw.cxgbe.  Those specific to
268  * T5 are under hw.cxl.
269  */
270 SYSCTL_NODE(_hw, OID_AUTO, cxgbe, CTLFLAG_RD | CTLFLAG_MPSAFE, 0,
271     "cxgbe(4) parameters");
272 SYSCTL_NODE(_hw, OID_AUTO, cxl, CTLFLAG_RD | CTLFLAG_MPSAFE, 0,
273     "cxgbe(4) T5+ parameters");
274 SYSCTL_NODE(_hw_cxgbe, OID_AUTO, toe, CTLFLAG_RD | CTLFLAG_MPSAFE, 0,
275     "cxgbe(4) TOE parameters");
276
277 /*
278  * Number of queues for tx and rx, NIC and offload.
279  */
280 #define NTXQ 16
281 int t4_ntxq = -NTXQ;
282 SYSCTL_INT(_hw_cxgbe, OID_AUTO, ntxq, CTLFLAG_RDTUN, &t4_ntxq, 0,
283     "Number of TX queues per port");
284 TUNABLE_INT("hw.cxgbe.ntxq10g", &t4_ntxq);      /* Old name, undocumented */
285
286 #define NRXQ 8
287 int t4_nrxq = -NRXQ;
288 SYSCTL_INT(_hw_cxgbe, OID_AUTO, nrxq, CTLFLAG_RDTUN, &t4_nrxq, 0,
289     "Number of RX queues per port");
290 TUNABLE_INT("hw.cxgbe.nrxq10g", &t4_nrxq);      /* Old name, undocumented */
291
292 #define NTXQ_VI 1
293 static int t4_ntxq_vi = -NTXQ_VI;
294 SYSCTL_INT(_hw_cxgbe, OID_AUTO, ntxq_vi, CTLFLAG_RDTUN, &t4_ntxq_vi, 0,
295     "Number of TX queues per VI");
296
297 #define NRXQ_VI 1
298 static int t4_nrxq_vi = -NRXQ_VI;
299 SYSCTL_INT(_hw_cxgbe, OID_AUTO, nrxq_vi, CTLFLAG_RDTUN, &t4_nrxq_vi, 0,
300     "Number of RX queues per VI");
301
302 static int t4_rsrv_noflowq = 0;
303 SYSCTL_INT(_hw_cxgbe, OID_AUTO, rsrv_noflowq, CTLFLAG_RDTUN, &t4_rsrv_noflowq,
304     0, "Reserve TX queue 0 of each VI for non-flowid packets");
305
306 #if defined(TCP_OFFLOAD) || defined(RATELIMIT)
307 #define NOFLDTXQ 8
308 static int t4_nofldtxq = -NOFLDTXQ;
309 SYSCTL_INT(_hw_cxgbe, OID_AUTO, nofldtxq, CTLFLAG_RDTUN, &t4_nofldtxq, 0,
310     "Number of offload TX queues per port");
311
312 #define NOFLDRXQ 2
313 static int t4_nofldrxq = -NOFLDRXQ;
314 SYSCTL_INT(_hw_cxgbe, OID_AUTO, nofldrxq, CTLFLAG_RDTUN, &t4_nofldrxq, 0,
315     "Number of offload RX queues per port");
316
317 #define NOFLDTXQ_VI 1
318 static int t4_nofldtxq_vi = -NOFLDTXQ_VI;
319 SYSCTL_INT(_hw_cxgbe, OID_AUTO, nofldtxq_vi, CTLFLAG_RDTUN, &t4_nofldtxq_vi, 0,
320     "Number of offload TX queues per VI");
321
322 #define NOFLDRXQ_VI 1
323 static int t4_nofldrxq_vi = -NOFLDRXQ_VI;
324 SYSCTL_INT(_hw_cxgbe, OID_AUTO, nofldrxq_vi, CTLFLAG_RDTUN, &t4_nofldrxq_vi, 0,
325     "Number of offload RX queues per VI");
326
327 #define TMR_IDX_OFLD 1
328 int t4_tmr_idx_ofld = TMR_IDX_OFLD;
329 SYSCTL_INT(_hw_cxgbe, OID_AUTO, holdoff_timer_idx_ofld, CTLFLAG_RDTUN,
330     &t4_tmr_idx_ofld, 0, "Holdoff timer index for offload queues");
331
332 #define PKTC_IDX_OFLD (-1)
333 int t4_pktc_idx_ofld = PKTC_IDX_OFLD;
334 SYSCTL_INT(_hw_cxgbe, OID_AUTO, holdoff_pktc_idx_ofld, CTLFLAG_RDTUN,
335     &t4_pktc_idx_ofld, 0, "holdoff packet counter index for offload queues");
336
337 /* 0 means chip/fw default, non-zero number is value in microseconds */
338 static u_long t4_toe_keepalive_idle = 0;
339 SYSCTL_ULONG(_hw_cxgbe_toe, OID_AUTO, keepalive_idle, CTLFLAG_RDTUN,
340     &t4_toe_keepalive_idle, 0, "TOE keepalive idle timer (us)");
341
342 /* 0 means chip/fw default, non-zero number is value in microseconds */
343 static u_long t4_toe_keepalive_interval = 0;
344 SYSCTL_ULONG(_hw_cxgbe_toe, OID_AUTO, keepalive_interval, CTLFLAG_RDTUN,
345     &t4_toe_keepalive_interval, 0, "TOE keepalive interval timer (us)");
346
347 /* 0 means chip/fw default, non-zero number is # of keepalives before abort */
348 static int t4_toe_keepalive_count = 0;
349 SYSCTL_INT(_hw_cxgbe_toe, OID_AUTO, keepalive_count, CTLFLAG_RDTUN,
350     &t4_toe_keepalive_count, 0, "Number of TOE keepalive probes before abort");
351
352 /* 0 means chip/fw default, non-zero number is value in microseconds */
353 static u_long t4_toe_rexmt_min = 0;
354 SYSCTL_ULONG(_hw_cxgbe_toe, OID_AUTO, rexmt_min, CTLFLAG_RDTUN,
355     &t4_toe_rexmt_min, 0, "Minimum TOE retransmit interval (us)");
356
357 /* 0 means chip/fw default, non-zero number is value in microseconds */
358 static u_long t4_toe_rexmt_max = 0;
359 SYSCTL_ULONG(_hw_cxgbe_toe, OID_AUTO, rexmt_max, CTLFLAG_RDTUN,
360     &t4_toe_rexmt_max, 0, "Maximum TOE retransmit interval (us)");
361
362 /* 0 means chip/fw default, non-zero number is # of rexmt before abort */
363 static int t4_toe_rexmt_count = 0;
364 SYSCTL_INT(_hw_cxgbe_toe, OID_AUTO, rexmt_count, CTLFLAG_RDTUN,
365     &t4_toe_rexmt_count, 0, "Number of TOE retransmissions before abort");
366
367 /* -1 means chip/fw default, other values are raw backoff values to use */
368 static int t4_toe_rexmt_backoff[16] = {
369         -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1
370 };
371 SYSCTL_NODE(_hw_cxgbe_toe, OID_AUTO, rexmt_backoff,
372     CTLFLAG_RD | CTLFLAG_MPSAFE, 0,
373     "cxgbe(4) TOE retransmit backoff values");
374 SYSCTL_INT(_hw_cxgbe_toe_rexmt_backoff, OID_AUTO, 0, CTLFLAG_RDTUN,
375     &t4_toe_rexmt_backoff[0], 0, "");
376 SYSCTL_INT(_hw_cxgbe_toe_rexmt_backoff, OID_AUTO, 1, CTLFLAG_RDTUN,
377     &t4_toe_rexmt_backoff[1], 0, "");
378 SYSCTL_INT(_hw_cxgbe_toe_rexmt_backoff, OID_AUTO, 2, CTLFLAG_RDTUN,
379     &t4_toe_rexmt_backoff[2], 0, "");
380 SYSCTL_INT(_hw_cxgbe_toe_rexmt_backoff, OID_AUTO, 3, CTLFLAG_RDTUN,
381     &t4_toe_rexmt_backoff[3], 0, "");
382 SYSCTL_INT(_hw_cxgbe_toe_rexmt_backoff, OID_AUTO, 4, CTLFLAG_RDTUN,
383     &t4_toe_rexmt_backoff[4], 0, "");
384 SYSCTL_INT(_hw_cxgbe_toe_rexmt_backoff, OID_AUTO, 5, CTLFLAG_RDTUN,
385     &t4_toe_rexmt_backoff[5], 0, "");
386 SYSCTL_INT(_hw_cxgbe_toe_rexmt_backoff, OID_AUTO, 6, CTLFLAG_RDTUN,
387     &t4_toe_rexmt_backoff[6], 0, "");
388 SYSCTL_INT(_hw_cxgbe_toe_rexmt_backoff, OID_AUTO, 7, CTLFLAG_RDTUN,
389     &t4_toe_rexmt_backoff[7], 0, "");
390 SYSCTL_INT(_hw_cxgbe_toe_rexmt_backoff, OID_AUTO, 8, CTLFLAG_RDTUN,
391     &t4_toe_rexmt_backoff[8], 0, "");
392 SYSCTL_INT(_hw_cxgbe_toe_rexmt_backoff, OID_AUTO, 9, CTLFLAG_RDTUN,
393     &t4_toe_rexmt_backoff[9], 0, "");
394 SYSCTL_INT(_hw_cxgbe_toe_rexmt_backoff, OID_AUTO, 10, CTLFLAG_RDTUN,
395     &t4_toe_rexmt_backoff[10], 0, "");
396 SYSCTL_INT(_hw_cxgbe_toe_rexmt_backoff, OID_AUTO, 11, CTLFLAG_RDTUN,
397     &t4_toe_rexmt_backoff[11], 0, "");
398 SYSCTL_INT(_hw_cxgbe_toe_rexmt_backoff, OID_AUTO, 12, CTLFLAG_RDTUN,
399     &t4_toe_rexmt_backoff[12], 0, "");
400 SYSCTL_INT(_hw_cxgbe_toe_rexmt_backoff, OID_AUTO, 13, CTLFLAG_RDTUN,
401     &t4_toe_rexmt_backoff[13], 0, "");
402 SYSCTL_INT(_hw_cxgbe_toe_rexmt_backoff, OID_AUTO, 14, CTLFLAG_RDTUN,
403     &t4_toe_rexmt_backoff[14], 0, "");
404 SYSCTL_INT(_hw_cxgbe_toe_rexmt_backoff, OID_AUTO, 15, CTLFLAG_RDTUN,
405     &t4_toe_rexmt_backoff[15], 0, "");
406 #endif
407
408 #ifdef DEV_NETMAP
409 #define NN_MAIN_VI      (1 << 0)        /* Native netmap on the main VI */
410 #define NN_EXTRA_VI     (1 << 1)        /* Native netmap on the extra VI(s) */
411 static int t4_native_netmap = NN_EXTRA_VI;
412 SYSCTL_INT(_hw_cxgbe, OID_AUTO, native_netmap, CTLFLAG_RDTUN, &t4_native_netmap,
413     0, "Native netmap support.  bit 0 = main VI, bit 1 = extra VIs");
414
415 #define NNMTXQ 8
416 static int t4_nnmtxq = -NNMTXQ;
417 SYSCTL_INT(_hw_cxgbe, OID_AUTO, nnmtxq, CTLFLAG_RDTUN, &t4_nnmtxq, 0,
418     "Number of netmap TX queues");
419
420 #define NNMRXQ 8
421 static int t4_nnmrxq = -NNMRXQ;
422 SYSCTL_INT(_hw_cxgbe, OID_AUTO, nnmrxq, CTLFLAG_RDTUN, &t4_nnmrxq, 0,
423     "Number of netmap RX queues");
424
425 #define NNMTXQ_VI 2
426 static int t4_nnmtxq_vi = -NNMTXQ_VI;
427 SYSCTL_INT(_hw_cxgbe, OID_AUTO, nnmtxq_vi, CTLFLAG_RDTUN, &t4_nnmtxq_vi, 0,
428     "Number of netmap TX queues per VI");
429
430 #define NNMRXQ_VI 2
431 static int t4_nnmrxq_vi = -NNMRXQ_VI;
432 SYSCTL_INT(_hw_cxgbe, OID_AUTO, nnmrxq_vi, CTLFLAG_RDTUN, &t4_nnmrxq_vi, 0,
433     "Number of netmap RX queues per VI");
434 #endif
435
436 /*
437  * Holdoff parameters for ports.
438  */
439 #define TMR_IDX 1
440 int t4_tmr_idx = TMR_IDX;
441 SYSCTL_INT(_hw_cxgbe, OID_AUTO, holdoff_timer_idx, CTLFLAG_RDTUN, &t4_tmr_idx,
442     0, "Holdoff timer index");
443 TUNABLE_INT("hw.cxgbe.holdoff_timer_idx_10G", &t4_tmr_idx);     /* Old name */
444
445 #define PKTC_IDX (-1)
446 int t4_pktc_idx = PKTC_IDX;
447 SYSCTL_INT(_hw_cxgbe, OID_AUTO, holdoff_pktc_idx, CTLFLAG_RDTUN, &t4_pktc_idx,
448     0, "Holdoff packet counter index");
449 TUNABLE_INT("hw.cxgbe.holdoff_pktc_idx_10G", &t4_pktc_idx);     /* Old name */
450
451 /*
452  * Size (# of entries) of each tx and rx queue.
453  */
454 unsigned int t4_qsize_txq = TX_EQ_QSIZE;
455 SYSCTL_INT(_hw_cxgbe, OID_AUTO, qsize_txq, CTLFLAG_RDTUN, &t4_qsize_txq, 0,
456     "Number of descriptors in each TX queue");
457
458 unsigned int t4_qsize_rxq = RX_IQ_QSIZE;
459 SYSCTL_INT(_hw_cxgbe, OID_AUTO, qsize_rxq, CTLFLAG_RDTUN, &t4_qsize_rxq, 0,
460     "Number of descriptors in each RX queue");
461
462 /*
463  * Interrupt types allowed (bits 0, 1, 2 = INTx, MSI, MSI-X respectively).
464  */
465 int t4_intr_types = INTR_MSIX | INTR_MSI | INTR_INTX;
466 SYSCTL_INT(_hw_cxgbe, OID_AUTO, interrupt_types, CTLFLAG_RDTUN, &t4_intr_types,
467     0, "Interrupt types allowed (bit 0 = INTx, 1 = MSI, 2 = MSI-X)");
468
469 /*
470  * Configuration file.  All the _CF names here are special.
471  */
472 #define DEFAULT_CF      "default"
473 #define BUILTIN_CF      "built-in"
474 #define FLASH_CF        "flash"
475 #define UWIRE_CF        "uwire"
476 #define FPGA_CF         "fpga"
477 static char t4_cfg_file[32] = DEFAULT_CF;
478 SYSCTL_STRING(_hw_cxgbe, OID_AUTO, config_file, CTLFLAG_RDTUN, t4_cfg_file,
479     sizeof(t4_cfg_file), "Firmware configuration file");
480
481 /*
482  * PAUSE settings (bit 0, 1, 2 = rx_pause, tx_pause, pause_autoneg respectively).
483  * rx_pause = 1 to heed incoming PAUSE frames, 0 to ignore them.
484  * tx_pause = 1 to emit PAUSE frames when the rx FIFO reaches its high water
485  *            mark or when signalled to do so, 0 to never emit PAUSE.
486  * pause_autoneg = 1 means PAUSE will be negotiated if possible and the
487  *                 negotiated settings will override rx_pause/tx_pause.
488  *                 Otherwise rx_pause/tx_pause are applied forcibly.
489  */
490 static int t4_pause_settings = PAUSE_RX | PAUSE_TX | PAUSE_AUTONEG;
491 SYSCTL_INT(_hw_cxgbe, OID_AUTO, pause_settings, CTLFLAG_RDTUN,
492     &t4_pause_settings, 0,
493     "PAUSE settings (bit 0 = rx_pause, 1 = tx_pause, 2 = pause_autoneg)");
494
495 /*
496  * Forward Error Correction settings (bit 0, 1 = RS, BASER respectively).
497  * -1 to run with the firmware default.  Same as FEC_AUTO (bit 5)
498  *  0 to disable FEC.
499  */
500 static int t4_fec = -1;
501 SYSCTL_INT(_hw_cxgbe, OID_AUTO, fec, CTLFLAG_RDTUN, &t4_fec, 0,
502     "Forward Error Correction (bit 0 = RS, bit 1 = BASER_RS)");
503
504 /*
505  * Link autonegotiation.
506  * -1 to run with the firmware default.
507  *  0 to disable.
508  *  1 to enable.
509  */
510 static int t4_autoneg = -1;
511 SYSCTL_INT(_hw_cxgbe, OID_AUTO, autoneg, CTLFLAG_RDTUN, &t4_autoneg, 0,
512     "Link autonegotiation");
513
514 /*
515  * Firmware auto-install by driver during attach (0, 1, 2 = prohibited, allowed,
516  * encouraged respectively).  '-n' is the same as 'n' except the firmware
517  * version used in the checks is read from the firmware bundled with the driver.
518  */
519 static int t4_fw_install = 1;
520 SYSCTL_INT(_hw_cxgbe, OID_AUTO, fw_install, CTLFLAG_RDTUN, &t4_fw_install, 0,
521     "Firmware auto-install (0 = prohibited, 1 = allowed, 2 = encouraged)");
522
523 /*
524  * ASIC features that will be used.  Disable the ones you don't want so that the
525  * chip resources aren't wasted on features that will not be used.
526  */
527 static int t4_nbmcaps_allowed = 0;
528 SYSCTL_INT(_hw_cxgbe, OID_AUTO, nbmcaps_allowed, CTLFLAG_RDTUN,
529     &t4_nbmcaps_allowed, 0, "Default NBM capabilities");
530
531 static int t4_linkcaps_allowed = 0;     /* No DCBX, PPP, etc. by default */
532 SYSCTL_INT(_hw_cxgbe, OID_AUTO, linkcaps_allowed, CTLFLAG_RDTUN,
533     &t4_linkcaps_allowed, 0, "Default link capabilities");
534
535 static int t4_switchcaps_allowed = FW_CAPS_CONFIG_SWITCH_INGRESS |
536     FW_CAPS_CONFIG_SWITCH_EGRESS;
537 SYSCTL_INT(_hw_cxgbe, OID_AUTO, switchcaps_allowed, CTLFLAG_RDTUN,
538     &t4_switchcaps_allowed, 0, "Default switch capabilities");
539
540 #ifdef RATELIMIT
541 static int t4_niccaps_allowed = FW_CAPS_CONFIG_NIC |
542         FW_CAPS_CONFIG_NIC_HASHFILTER | FW_CAPS_CONFIG_NIC_ETHOFLD;
543 #else
544 static int t4_niccaps_allowed = FW_CAPS_CONFIG_NIC |
545         FW_CAPS_CONFIG_NIC_HASHFILTER;
546 #endif
547 SYSCTL_INT(_hw_cxgbe, OID_AUTO, niccaps_allowed, CTLFLAG_RDTUN,
548     &t4_niccaps_allowed, 0, "Default NIC capabilities");
549
550 static int t4_toecaps_allowed = -1;
551 SYSCTL_INT(_hw_cxgbe, OID_AUTO, toecaps_allowed, CTLFLAG_RDTUN,
552     &t4_toecaps_allowed, 0, "Default TCP offload capabilities");
553
554 static int t4_rdmacaps_allowed = -1;
555 SYSCTL_INT(_hw_cxgbe, OID_AUTO, rdmacaps_allowed, CTLFLAG_RDTUN,
556     &t4_rdmacaps_allowed, 0, "Default RDMA capabilities");
557
558 static int t4_cryptocaps_allowed = -1;
559 SYSCTL_INT(_hw_cxgbe, OID_AUTO, cryptocaps_allowed, CTLFLAG_RDTUN,
560     &t4_cryptocaps_allowed, 0, "Default crypto capabilities");
561
562 static int t4_iscsicaps_allowed = -1;
563 SYSCTL_INT(_hw_cxgbe, OID_AUTO, iscsicaps_allowed, CTLFLAG_RDTUN,
564     &t4_iscsicaps_allowed, 0, "Default iSCSI capabilities");
565
566 static int t4_fcoecaps_allowed = 0;
567 SYSCTL_INT(_hw_cxgbe, OID_AUTO, fcoecaps_allowed, CTLFLAG_RDTUN,
568     &t4_fcoecaps_allowed, 0, "Default FCoE capabilities");
569
570 static int t5_write_combine = 0;
571 SYSCTL_INT(_hw_cxl, OID_AUTO, write_combine, CTLFLAG_RDTUN, &t5_write_combine,
572     0, "Use WC instead of UC for BAR2");
573
574 static int t4_num_vis = 1;
575 SYSCTL_INT(_hw_cxgbe, OID_AUTO, num_vis, CTLFLAG_RDTUN, &t4_num_vis, 0,
576     "Number of VIs per port");
577
578 /*
579  * PCIe Relaxed Ordering.
580  * -1: driver should figure out a good value.
581  * 0: disable RO.
582  * 1: enable RO.
583  * 2: leave RO alone.
584  */
585 static int pcie_relaxed_ordering = -1;
586 SYSCTL_INT(_hw_cxgbe, OID_AUTO, pcie_relaxed_ordering, CTLFLAG_RDTUN,
587     &pcie_relaxed_ordering, 0,
588     "PCIe Relaxed Ordering: 0 = disable, 1 = enable, 2 = leave alone");
589
590 static int t4_panic_on_fatal_err = 0;
591 SYSCTL_INT(_hw_cxgbe, OID_AUTO, panic_on_fatal_err, CTLFLAG_RDTUN,
592     &t4_panic_on_fatal_err, 0, "panic on fatal errors");
593
594 static int t4_tx_vm_wr = 0;
595 SYSCTL_INT(_hw_cxgbe, OID_AUTO, tx_vm_wr, CTLFLAG_RWTUN, &t4_tx_vm_wr, 0,
596     "Use VM work requests to transmit packets.");
597
598 /*
599  * Set to non-zero to enable the attack filter.  A packet that matches any of
600  * these conditions will get dropped on ingress:
601  * 1) IP && source address == destination address.
602  * 2) TCP/IP && source address is not a unicast address.
603  * 3) TCP/IP && destination address is not a unicast address.
604  * 4) IP && source address is loopback (127.x.y.z).
605  * 5) IP && destination address is loopback (127.x.y.z).
606  * 6) IPv6 && source address == destination address.
607  * 7) IPv6 && source address is not a unicast address.
608  * 8) IPv6 && source address is loopback (::1/128).
609  * 9) IPv6 && destination address is loopback (::1/128).
610  * 10) IPv6 && source address is unspecified (::/128).
611  * 11) IPv6 && destination address is unspecified (::/128).
612  * 12) TCP/IPv6 && source address is multicast (ff00::/8).
613  * 13) TCP/IPv6 && destination address is multicast (ff00::/8).
614  */
615 static int t4_attack_filter = 0;
616 SYSCTL_INT(_hw_cxgbe, OID_AUTO, attack_filter, CTLFLAG_RDTUN,
617     &t4_attack_filter, 0, "Drop suspicious traffic");
618
619 static int t4_drop_ip_fragments = 0;
620 SYSCTL_INT(_hw_cxgbe, OID_AUTO, drop_ip_fragments, CTLFLAG_RDTUN,
621     &t4_drop_ip_fragments, 0, "Drop IP fragments");
622
623 static int t4_drop_pkts_with_l2_errors = 1;
624 SYSCTL_INT(_hw_cxgbe, OID_AUTO, drop_pkts_with_l2_errors, CTLFLAG_RDTUN,
625     &t4_drop_pkts_with_l2_errors, 0,
626     "Drop all frames with Layer 2 length or checksum errors");
627
628 static int t4_drop_pkts_with_l3_errors = 0;
629 SYSCTL_INT(_hw_cxgbe, OID_AUTO, drop_pkts_with_l3_errors, CTLFLAG_RDTUN,
630     &t4_drop_pkts_with_l3_errors, 0,
631     "Drop all frames with IP version, length, or checksum errors");
632
633 static int t4_drop_pkts_with_l4_errors = 0;
634 SYSCTL_INT(_hw_cxgbe, OID_AUTO, drop_pkts_with_l4_errors, CTLFLAG_RDTUN,
635     &t4_drop_pkts_with_l4_errors, 0,
636     "Drop all frames with Layer 4 length, checksum, or other errors");
637
638 #ifdef TCP_OFFLOAD
639 /*
640  * TOE tunables.
641  */
642 static int t4_cop_managed_offloading = 0;
643 SYSCTL_INT(_hw_cxgbe, OID_AUTO, cop_managed_offloading, CTLFLAG_RDTUN,
644     &t4_cop_managed_offloading, 0,
645     "COP (Connection Offload Policy) controls all TOE offload");
646 #endif
647
648 #ifdef KERN_TLS
649 /*
650  * This enables KERN_TLS for all adapters if set.
651  */
652 static int t4_kern_tls = 0;
653 SYSCTL_INT(_hw_cxgbe, OID_AUTO, kern_tls, CTLFLAG_RDTUN, &t4_kern_tls, 0,
654     "Enable KERN_TLS mode for all supported adapters");
655
656 SYSCTL_NODE(_hw_cxgbe, OID_AUTO, tls, CTLFLAG_RD | CTLFLAG_MPSAFE, 0,
657     "cxgbe(4) KERN_TLS parameters");
658
659 static int t4_tls_inline_keys = 0;
660 SYSCTL_INT(_hw_cxgbe_tls, OID_AUTO, inline_keys, CTLFLAG_RDTUN,
661     &t4_tls_inline_keys, 0,
662     "Always pass TLS keys in work requests (1) or attempt to store TLS keys "
663     "in card memory.");
664
665 static int t4_tls_combo_wrs = 0;
666 SYSCTL_INT(_hw_cxgbe_tls, OID_AUTO, combo_wrs, CTLFLAG_RDTUN, &t4_tls_combo_wrs,
667     0, "Attempt to combine TCB field updates with TLS record work requests.");
668 #endif
669
670 /* Functions used by VIs to obtain unique MAC addresses for each VI. */
671 static int vi_mac_funcs[] = {
672         FW_VI_FUNC_ETH,
673         FW_VI_FUNC_OFLD,
674         FW_VI_FUNC_IWARP,
675         FW_VI_FUNC_OPENISCSI,
676         FW_VI_FUNC_OPENFCOE,
677         FW_VI_FUNC_FOISCSI,
678         FW_VI_FUNC_FOFCOE,
679 };
680
681 struct intrs_and_queues {
682         uint16_t intr_type;     /* INTx, MSI, or MSI-X */
683         uint16_t num_vis;       /* number of VIs for each port */
684         uint16_t nirq;          /* Total # of vectors */
685         uint16_t ntxq;          /* # of NIC txq's for each port */
686         uint16_t nrxq;          /* # of NIC rxq's for each port */
687         uint16_t nofldtxq;      /* # of TOE/ETHOFLD txq's for each port */
688         uint16_t nofldrxq;      /* # of TOE rxq's for each port */
689         uint16_t nnmtxq;        /* # of netmap txq's */
690         uint16_t nnmrxq;        /* # of netmap rxq's */
691
692         /* The vcxgbe/vcxl interfaces use these and not the ones above. */
693         uint16_t ntxq_vi;       /* # of NIC txq's */
694         uint16_t nrxq_vi;       /* # of NIC rxq's */
695         uint16_t nofldtxq_vi;   /* # of TOE txq's */
696         uint16_t nofldrxq_vi;   /* # of TOE rxq's */
697         uint16_t nnmtxq_vi;     /* # of netmap txq's */
698         uint16_t nnmrxq_vi;     /* # of netmap rxq's */
699 };
700
701 static void setup_memwin(struct adapter *);
702 static void position_memwin(struct adapter *, int, uint32_t);
703 static int validate_mem_range(struct adapter *, uint32_t, uint32_t);
704 static int fwmtype_to_hwmtype(int);
705 static int validate_mt_off_len(struct adapter *, int, uint32_t, uint32_t,
706     uint32_t *);
707 static int fixup_devlog_params(struct adapter *);
708 static int cfg_itype_and_nqueues(struct adapter *, struct intrs_and_queues *);
709 static int contact_firmware(struct adapter *);
710 static int partition_resources(struct adapter *);
711 static int get_params__pre_init(struct adapter *);
712 static int set_params__pre_init(struct adapter *);
713 static int get_params__post_init(struct adapter *);
714 static int set_params__post_init(struct adapter *);
715 static void t4_set_desc(struct adapter *);
716 static bool fixed_ifmedia(struct port_info *);
717 static void build_medialist(struct port_info *);
718 static void init_link_config(struct port_info *);
719 static int fixup_link_config(struct port_info *);
720 static int apply_link_config(struct port_info *);
721 static int cxgbe_init_synchronized(struct vi_info *);
722 static int cxgbe_uninit_synchronized(struct vi_info *);
723 static void quiesce_txq(struct adapter *, struct sge_txq *);
724 static void quiesce_wrq(struct adapter *, struct sge_wrq *);
725 static void quiesce_iq(struct adapter *, struct sge_iq *);
726 static void quiesce_fl(struct adapter *, struct sge_fl *);
727 static int t4_alloc_irq(struct adapter *, struct irq *, int rid,
728     driver_intr_t *, void *, char *);
729 static int t4_free_irq(struct adapter *, struct irq *);
730 static void t4_init_atid_table(struct adapter *);
731 static void t4_free_atid_table(struct adapter *);
732 static void get_regs(struct adapter *, struct t4_regdump *, uint8_t *);
733 static void vi_refresh_stats(struct adapter *, struct vi_info *);
734 static void cxgbe_refresh_stats(struct adapter *, struct port_info *);
735 static void cxgbe_tick(void *);
736 static void cxgbe_sysctls(struct port_info *);
737 static int sysctl_int_array(SYSCTL_HANDLER_ARGS);
738 static int sysctl_bitfield_8b(SYSCTL_HANDLER_ARGS);
739 static int sysctl_bitfield_16b(SYSCTL_HANDLER_ARGS);
740 static int sysctl_btphy(SYSCTL_HANDLER_ARGS);
741 static int sysctl_noflowq(SYSCTL_HANDLER_ARGS);
742 static int sysctl_tx_vm_wr(SYSCTL_HANDLER_ARGS);
743 static int sysctl_holdoff_tmr_idx(SYSCTL_HANDLER_ARGS);
744 static int sysctl_holdoff_pktc_idx(SYSCTL_HANDLER_ARGS);
745 static int sysctl_qsize_rxq(SYSCTL_HANDLER_ARGS);
746 static int sysctl_qsize_txq(SYSCTL_HANDLER_ARGS);
747 static int sysctl_pause_settings(SYSCTL_HANDLER_ARGS);
748 static int sysctl_fec(SYSCTL_HANDLER_ARGS);
749 static int sysctl_module_fec(SYSCTL_HANDLER_ARGS);
750 static int sysctl_autoneg(SYSCTL_HANDLER_ARGS);
751 static int sysctl_handle_t4_reg64(SYSCTL_HANDLER_ARGS);
752 static int sysctl_temperature(SYSCTL_HANDLER_ARGS);
753 static int sysctl_vdd(SYSCTL_HANDLER_ARGS);
754 static int sysctl_reset_sensor(SYSCTL_HANDLER_ARGS);
755 static int sysctl_loadavg(SYSCTL_HANDLER_ARGS);
756 static int sysctl_cctrl(SYSCTL_HANDLER_ARGS);
757 static int sysctl_cim_ibq_obq(SYSCTL_HANDLER_ARGS);
758 static int sysctl_cim_la(SYSCTL_HANDLER_ARGS);
759 static int sysctl_cim_ma_la(SYSCTL_HANDLER_ARGS);
760 static int sysctl_cim_pif_la(SYSCTL_HANDLER_ARGS);
761 static int sysctl_cim_qcfg(SYSCTL_HANDLER_ARGS);
762 static int sysctl_cpl_stats(SYSCTL_HANDLER_ARGS);
763 static int sysctl_ddp_stats(SYSCTL_HANDLER_ARGS);
764 static int sysctl_devlog(SYSCTL_HANDLER_ARGS);
765 static int sysctl_fcoe_stats(SYSCTL_HANDLER_ARGS);
766 static int sysctl_hw_sched(SYSCTL_HANDLER_ARGS);
767 static int sysctl_lb_stats(SYSCTL_HANDLER_ARGS);
768 static int sysctl_linkdnrc(SYSCTL_HANDLER_ARGS);
769 static int sysctl_meminfo(SYSCTL_HANDLER_ARGS);
770 static int sysctl_mps_tcam(SYSCTL_HANDLER_ARGS);
771 static int sysctl_mps_tcam_t6(SYSCTL_HANDLER_ARGS);
772 static int sysctl_path_mtus(SYSCTL_HANDLER_ARGS);
773 static int sysctl_pm_stats(SYSCTL_HANDLER_ARGS);
774 static int sysctl_rdma_stats(SYSCTL_HANDLER_ARGS);
775 static int sysctl_tcp_stats(SYSCTL_HANDLER_ARGS);
776 static int sysctl_tids(SYSCTL_HANDLER_ARGS);
777 static int sysctl_tp_err_stats(SYSCTL_HANDLER_ARGS);
778 static int sysctl_tp_la_mask(SYSCTL_HANDLER_ARGS);
779 static int sysctl_tp_la(SYSCTL_HANDLER_ARGS);
780 static int sysctl_tx_rate(SYSCTL_HANDLER_ARGS);
781 static int sysctl_ulprx_la(SYSCTL_HANDLER_ARGS);
782 static int sysctl_wcwr_stats(SYSCTL_HANDLER_ARGS);
783 static int sysctl_cpus(SYSCTL_HANDLER_ARGS);
784 #ifdef TCP_OFFLOAD
785 static int sysctl_tls(SYSCTL_HANDLER_ARGS);
786 static int sysctl_tls_rx_ports(SYSCTL_HANDLER_ARGS);
787 static int sysctl_tp_tick(SYSCTL_HANDLER_ARGS);
788 static int sysctl_tp_dack_timer(SYSCTL_HANDLER_ARGS);
789 static int sysctl_tp_timer(SYSCTL_HANDLER_ARGS);
790 static int sysctl_tp_shift_cnt(SYSCTL_HANDLER_ARGS);
791 static int sysctl_tp_backoff(SYSCTL_HANDLER_ARGS);
792 static int sysctl_holdoff_tmr_idx_ofld(SYSCTL_HANDLER_ARGS);
793 static int sysctl_holdoff_pktc_idx_ofld(SYSCTL_HANDLER_ARGS);
794 #endif
795 static int get_sge_context(struct adapter *, struct t4_sge_context *);
796 static int load_fw(struct adapter *, struct t4_data *);
797 static int load_cfg(struct adapter *, struct t4_data *);
798 static int load_boot(struct adapter *, struct t4_bootrom *);
799 static int load_bootcfg(struct adapter *, struct t4_data *);
800 static int cudbg_dump(struct adapter *, struct t4_cudbg_dump *);
801 static void free_offload_policy(struct t4_offload_policy *);
802 static int set_offload_policy(struct adapter *, struct t4_offload_policy *);
803 static int read_card_mem(struct adapter *, int, struct t4_mem_range *);
804 static int read_i2c(struct adapter *, struct t4_i2c_data *);
805 static int clear_stats(struct adapter *, u_int);
806 #ifdef TCP_OFFLOAD
807 static int toe_capability(struct vi_info *, int);
808 static void t4_async_event(void *, int);
809 #endif
810 static int mod_event(module_t, int, void *);
811 static int notify_siblings(device_t, int);
812
813 struct {
814         uint16_t device;
815         char *desc;
816 } t4_pciids[] = {
817         {0xa000, "Chelsio Terminator 4 FPGA"},
818         {0x4400, "Chelsio T440-dbg"},
819         {0x4401, "Chelsio T420-CR"},
820         {0x4402, "Chelsio T422-CR"},
821         {0x4403, "Chelsio T440-CR"},
822         {0x4404, "Chelsio T420-BCH"},
823         {0x4405, "Chelsio T440-BCH"},
824         {0x4406, "Chelsio T440-CH"},
825         {0x4407, "Chelsio T420-SO"},
826         {0x4408, "Chelsio T420-CX"},
827         {0x4409, "Chelsio T420-BT"},
828         {0x440a, "Chelsio T404-BT"},
829         {0x440e, "Chelsio T440-LP-CR"},
830 }, t5_pciids[] = {
831         {0xb000, "Chelsio Terminator 5 FPGA"},
832         {0x5400, "Chelsio T580-dbg"},
833         {0x5401,  "Chelsio T520-CR"},           /* 2 x 10G */
834         {0x5402,  "Chelsio T522-CR"},           /* 2 x 10G, 2 X 1G */
835         {0x5403,  "Chelsio T540-CR"},           /* 4 x 10G */
836         {0x5407,  "Chelsio T520-SO"},           /* 2 x 10G, nomem */
837         {0x5409,  "Chelsio T520-BT"},           /* 2 x 10GBaseT */
838         {0x540a,  "Chelsio T504-BT"},           /* 4 x 1G */
839         {0x540d,  "Chelsio T580-CR"},           /* 2 x 40G */
840         {0x540e,  "Chelsio T540-LP-CR"},        /* 4 x 10G */
841         {0x5410,  "Chelsio T580-LP-CR"},        /* 2 x 40G */
842         {0x5411,  "Chelsio T520-LL-CR"},        /* 2 x 10G */
843         {0x5412,  "Chelsio T560-CR"},           /* 1 x 40G, 2 x 10G */
844         {0x5414,  "Chelsio T580-LP-SO-CR"},     /* 2 x 40G, nomem */
845         {0x5415,  "Chelsio T502-BT"},           /* 2 x 1G */
846         {0x5418,  "Chelsio T540-BT"},           /* 4 x 10GBaseT */
847         {0x5419,  "Chelsio T540-LP-BT"},        /* 4 x 10GBaseT */
848         {0x541a,  "Chelsio T540-SO-BT"},        /* 4 x 10GBaseT, nomem */
849         {0x541b,  "Chelsio T540-SO-CR"},        /* 4 x 10G, nomem */
850
851         /* Custom */
852         {0x5483, "Custom T540-CR"},
853         {0x5484, "Custom T540-BT"},
854 }, t6_pciids[] = {
855         {0xc006, "Chelsio Terminator 6 FPGA"},  /* T6 PE10K6 FPGA (PF0) */
856         {0x6400, "Chelsio T6-DBG-25"},          /* 2 x 10/25G, debug */
857         {0x6401, "Chelsio T6225-CR"},           /* 2 x 10/25G */
858         {0x6402, "Chelsio T6225-SO-CR"},        /* 2 x 10/25G, nomem */
859         {0x6403, "Chelsio T6425-CR"},           /* 4 x 10/25G */
860         {0x6404, "Chelsio T6425-SO-CR"},        /* 4 x 10/25G, nomem */
861         {0x6405, "Chelsio T6225-OCP-SO"},       /* 2 x 10/25G, nomem */
862         {0x6406, "Chelsio T62100-OCP-SO"},      /* 2 x 40/50/100G, nomem */
863         {0x6407, "Chelsio T62100-LP-CR"},       /* 2 x 40/50/100G */
864         {0x6408, "Chelsio T62100-SO-CR"},       /* 2 x 40/50/100G, nomem */
865         {0x6409, "Chelsio T6210-BT"},           /* 2 x 10GBASE-T */
866         {0x640d, "Chelsio T62100-CR"},          /* 2 x 40/50/100G */
867         {0x6410, "Chelsio T6-DBG-100"},         /* 2 x 40/50/100G, debug */
868         {0x6411, "Chelsio T6225-LL-CR"},        /* 2 x 10/25G */
869         {0x6414, "Chelsio T61100-OCP-SO"},      /* 1 x 40/50/100G, nomem */
870         {0x6415, "Chelsio T6201-BT"},           /* 2 x 1000BASE-T */
871
872         /* Custom */
873         {0x6480, "Custom T6225-CR"},
874         {0x6481, "Custom T62100-CR"},
875         {0x6482, "Custom T6225-CR"},
876         {0x6483, "Custom T62100-CR"},
877         {0x6484, "Custom T64100-CR"},
878         {0x6485, "Custom T6240-SO"},
879         {0x6486, "Custom T6225-SO-CR"},
880         {0x6487, "Custom T6225-CR"},
881 };
882
883 #ifdef TCP_OFFLOAD
884 /*
885  * service_iq_fl() has an iq and needs the fl.  Offset of fl from the iq should
886  * be exactly the same for both rxq and ofld_rxq.
887  */
888 CTASSERT(offsetof(struct sge_ofld_rxq, iq) == offsetof(struct sge_rxq, iq));
889 CTASSERT(offsetof(struct sge_ofld_rxq, fl) == offsetof(struct sge_rxq, fl));
890 #endif
891 CTASSERT(sizeof(struct cluster_metadata) <= CL_METADATA_SIZE);
892
893 static int
894 t4_probe(device_t dev)
895 {
896         int i;
897         uint16_t v = pci_get_vendor(dev);
898         uint16_t d = pci_get_device(dev);
899         uint8_t f = pci_get_function(dev);
900
901         if (v != PCI_VENDOR_ID_CHELSIO)
902                 return (ENXIO);
903
904         /* Attach only to PF0 of the FPGA */
905         if (d == 0xa000 && f != 0)
906                 return (ENXIO);
907
908         for (i = 0; i < nitems(t4_pciids); i++) {
909                 if (d == t4_pciids[i].device) {
910                         device_set_desc(dev, t4_pciids[i].desc);
911                         return (BUS_PROBE_DEFAULT);
912                 }
913         }
914
915         return (ENXIO);
916 }
917
918 static int
919 t5_probe(device_t dev)
920 {
921         int i;
922         uint16_t v = pci_get_vendor(dev);
923         uint16_t d = pci_get_device(dev);
924         uint8_t f = pci_get_function(dev);
925
926         if (v != PCI_VENDOR_ID_CHELSIO)
927                 return (ENXIO);
928
929         /* Attach only to PF0 of the FPGA */
930         if (d == 0xb000 && f != 0)
931                 return (ENXIO);
932
933         for (i = 0; i < nitems(t5_pciids); i++) {
934                 if (d == t5_pciids[i].device) {
935                         device_set_desc(dev, t5_pciids[i].desc);
936                         return (BUS_PROBE_DEFAULT);
937                 }
938         }
939
940         return (ENXIO);
941 }
942
943 static int
944 t6_probe(device_t dev)
945 {
946         int i;
947         uint16_t v = pci_get_vendor(dev);
948         uint16_t d = pci_get_device(dev);
949
950         if (v != PCI_VENDOR_ID_CHELSIO)
951                 return (ENXIO);
952
953         for (i = 0; i < nitems(t6_pciids); i++) {
954                 if (d == t6_pciids[i].device) {
955                         device_set_desc(dev, t6_pciids[i].desc);
956                         return (BUS_PROBE_DEFAULT);
957                 }
958         }
959
960         return (ENXIO);
961 }
962
963 static void
964 t5_attribute_workaround(device_t dev)
965 {
966         device_t root_port;
967         uint32_t v;
968
969         /*
970          * The T5 chips do not properly echo the No Snoop and Relaxed
971          * Ordering attributes when replying to a TLP from a Root
972          * Port.  As a workaround, find the parent Root Port and
973          * disable No Snoop and Relaxed Ordering.  Note that this
974          * affects all devices under this root port.
975          */
976         root_port = pci_find_pcie_root_port(dev);
977         if (root_port == NULL) {
978                 device_printf(dev, "Unable to find parent root port\n");
979                 return;
980         }
981
982         v = pcie_adjust_config(root_port, PCIER_DEVICE_CTL,
983             PCIEM_CTL_RELAXED_ORD_ENABLE | PCIEM_CTL_NOSNOOP_ENABLE, 0, 2);
984         if ((v & (PCIEM_CTL_RELAXED_ORD_ENABLE | PCIEM_CTL_NOSNOOP_ENABLE)) !=
985             0)
986                 device_printf(dev, "Disabled No Snoop/Relaxed Ordering on %s\n",
987                     device_get_nameunit(root_port));
988 }
989
990 static const struct devnames devnames[] = {
991         {
992                 .nexus_name = "t4nex",
993                 .ifnet_name = "cxgbe",
994                 .vi_ifnet_name = "vcxgbe",
995                 .pf03_drv_name = "t4iov",
996                 .vf_nexus_name = "t4vf",
997                 .vf_ifnet_name = "cxgbev"
998         }, {
999                 .nexus_name = "t5nex",
1000                 .ifnet_name = "cxl",
1001                 .vi_ifnet_name = "vcxl",
1002                 .pf03_drv_name = "t5iov",
1003                 .vf_nexus_name = "t5vf",
1004                 .vf_ifnet_name = "cxlv"
1005         }, {
1006                 .nexus_name = "t6nex",
1007                 .ifnet_name = "cc",
1008                 .vi_ifnet_name = "vcc",
1009                 .pf03_drv_name = "t6iov",
1010                 .vf_nexus_name = "t6vf",
1011                 .vf_ifnet_name = "ccv"
1012         }
1013 };
1014
1015 void
1016 t4_init_devnames(struct adapter *sc)
1017 {
1018         int id;
1019
1020         id = chip_id(sc);
1021         if (id >= CHELSIO_T4 && id - CHELSIO_T4 < nitems(devnames))
1022                 sc->names = &devnames[id - CHELSIO_T4];
1023         else {
1024                 device_printf(sc->dev, "chip id %d is not supported.\n", id);
1025                 sc->names = NULL;
1026         }
1027 }
1028
1029 static int
1030 t4_ifnet_unit(struct adapter *sc, struct port_info *pi)
1031 {
1032         const char *parent, *name;
1033         long value;
1034         int line, unit;
1035
1036         line = 0;
1037         parent = device_get_nameunit(sc->dev);
1038         name = sc->names->ifnet_name;
1039         while (resource_find_dev(&line, name, &unit, "at", parent) == 0) {
1040                 if (resource_long_value(name, unit, "port", &value) == 0 &&
1041                     value == pi->port_id)
1042                         return (unit);
1043         }
1044         return (-1);
1045 }
1046
1047 static int
1048 t4_attach(device_t dev)
1049 {
1050         struct adapter *sc;
1051         int rc = 0, i, j, rqidx, tqidx, nports;
1052         struct make_dev_args mda;
1053         struct intrs_and_queues iaq;
1054         struct sge *s;
1055         uint32_t *buf;
1056 #if defined(TCP_OFFLOAD) || defined(RATELIMIT)
1057         int ofld_tqidx;
1058 #endif
1059 #ifdef TCP_OFFLOAD
1060         int ofld_rqidx;
1061 #endif
1062 #ifdef DEV_NETMAP
1063         int nm_rqidx, nm_tqidx;
1064 #endif
1065         int num_vis;
1066
1067         sc = device_get_softc(dev);
1068         sc->dev = dev;
1069         TUNABLE_INT_FETCH("hw.cxgbe.dflags", &sc->debug_flags);
1070
1071         if ((pci_get_device(dev) & 0xff00) == 0x5400)
1072                 t5_attribute_workaround(dev);
1073         pci_enable_busmaster(dev);
1074         if (pci_find_cap(dev, PCIY_EXPRESS, &i) == 0) {
1075                 uint32_t v;
1076
1077                 pci_set_max_read_req(dev, 4096);
1078                 v = pci_read_config(dev, i + PCIER_DEVICE_CTL, 2);
1079                 sc->params.pci.mps = 128 << ((v & PCIEM_CTL_MAX_PAYLOAD) >> 5);
1080                 if (pcie_relaxed_ordering == 0 &&
1081                     (v & PCIEM_CTL_RELAXED_ORD_ENABLE) != 0) {
1082                         v &= ~PCIEM_CTL_RELAXED_ORD_ENABLE;
1083                         pci_write_config(dev, i + PCIER_DEVICE_CTL, v, 2);
1084                 } else if (pcie_relaxed_ordering == 1 &&
1085                     (v & PCIEM_CTL_RELAXED_ORD_ENABLE) == 0) {
1086                         v |= PCIEM_CTL_RELAXED_ORD_ENABLE;
1087                         pci_write_config(dev, i + PCIER_DEVICE_CTL, v, 2);
1088                 }
1089         }
1090
1091         sc->sge_gts_reg = MYPF_REG(A_SGE_PF_GTS);
1092         sc->sge_kdoorbell_reg = MYPF_REG(A_SGE_PF_KDOORBELL);
1093         sc->traceq = -1;
1094         mtx_init(&sc->ifp_lock, sc->ifp_lockname, 0, MTX_DEF);
1095         snprintf(sc->ifp_lockname, sizeof(sc->ifp_lockname), "%s tracer",
1096             device_get_nameunit(dev));
1097
1098         snprintf(sc->lockname, sizeof(sc->lockname), "%s",
1099             device_get_nameunit(dev));
1100         mtx_init(&sc->sc_lock, sc->lockname, 0, MTX_DEF);
1101         t4_add_adapter(sc);
1102
1103         mtx_init(&sc->sfl_lock, "starving freelists", 0, MTX_DEF);
1104         TAILQ_INIT(&sc->sfl);
1105         callout_init_mtx(&sc->sfl_callout, &sc->sfl_lock, 0);
1106
1107         mtx_init(&sc->reg_lock, "indirect register access", 0, MTX_DEF);
1108
1109         sc->policy = NULL;
1110         rw_init(&sc->policy_lock, "connection offload policy");
1111
1112         callout_init(&sc->ktls_tick, 1);
1113
1114 #ifdef TCP_OFFLOAD
1115         TASK_INIT(&sc->async_event_task, 0, t4_async_event, sc);
1116 #endif
1117
1118         refcount_init(&sc->vxlan_refcount, 0);
1119
1120         rc = t4_map_bars_0_and_4(sc);
1121         if (rc != 0)
1122                 goto done; /* error message displayed already */
1123
1124         memset(sc->chan_map, 0xff, sizeof(sc->chan_map));
1125
1126         /* Prepare the adapter for operation. */
1127         buf = malloc(PAGE_SIZE, M_CXGBE, M_ZERO | M_WAITOK);
1128         rc = -t4_prep_adapter(sc, buf);
1129         free(buf, M_CXGBE);
1130         if (rc != 0) {
1131                 device_printf(dev, "failed to prepare adapter: %d.\n", rc);
1132                 goto done;
1133         }
1134
1135         /*
1136          * This is the real PF# to which we're attaching.  Works from within PCI
1137          * passthrough environments too, where pci_get_function() could return a
1138          * different PF# depending on the passthrough configuration.  We need to
1139          * use the real PF# in all our communication with the firmware.
1140          */
1141         j = t4_read_reg(sc, A_PL_WHOAMI);
1142         sc->pf = chip_id(sc) <= CHELSIO_T5 ? G_SOURCEPF(j) : G_T6_SOURCEPF(j);
1143         sc->mbox = sc->pf;
1144
1145         t4_init_devnames(sc);
1146         if (sc->names == NULL) {
1147                 rc = ENOTSUP;
1148                 goto done; /* error message displayed already */
1149         }
1150
1151         /*
1152          * Do this really early, with the memory windows set up even before the
1153          * character device.  The userland tool's register i/o and mem read
1154          * will work even in "recovery mode".
1155          */
1156         setup_memwin(sc);
1157         if (t4_init_devlog_params(sc, 0) == 0)
1158                 fixup_devlog_params(sc);
1159         make_dev_args_init(&mda);
1160         mda.mda_devsw = &t4_cdevsw;
1161         mda.mda_uid = UID_ROOT;
1162         mda.mda_gid = GID_WHEEL;
1163         mda.mda_mode = 0600;
1164         mda.mda_si_drv1 = sc;
1165         rc = make_dev_s(&mda, &sc->cdev, "%s", device_get_nameunit(dev));
1166         if (rc != 0)
1167                 device_printf(dev, "failed to create nexus char device: %d.\n",
1168                     rc);
1169
1170         /* Go no further if recovery mode has been requested. */
1171         if (TUNABLE_INT_FETCH("hw.cxgbe.sos", &i) && i != 0) {
1172                 device_printf(dev, "recovery mode.\n");
1173                 goto done;
1174         }
1175
1176 #if defined(__i386__)
1177         if ((cpu_feature & CPUID_CX8) == 0) {
1178                 device_printf(dev, "64 bit atomics not available.\n");
1179                 rc = ENOTSUP;
1180                 goto done;
1181         }
1182 #endif
1183
1184         /* Contact the firmware and try to become the master driver. */
1185         rc = contact_firmware(sc);
1186         if (rc != 0)
1187                 goto done; /* error message displayed already */
1188         MPASS(sc->flags & FW_OK);
1189
1190         rc = get_params__pre_init(sc);
1191         if (rc != 0)
1192                 goto done; /* error message displayed already */
1193
1194         if (sc->flags & MASTER_PF) {
1195                 rc = partition_resources(sc);
1196                 if (rc != 0)
1197                         goto done; /* error message displayed already */
1198                 t4_intr_clear(sc);
1199         }
1200
1201         rc = get_params__post_init(sc);
1202         if (rc != 0)
1203                 goto done; /* error message displayed already */
1204
1205         rc = set_params__post_init(sc);
1206         if (rc != 0)
1207                 goto done; /* error message displayed already */
1208
1209         rc = t4_map_bar_2(sc);
1210         if (rc != 0)
1211                 goto done; /* error message displayed already */
1212
1213         rc = t4_create_dma_tag(sc);
1214         if (rc != 0)
1215                 goto done; /* error message displayed already */
1216
1217         /*
1218          * First pass over all the ports - allocate VIs and initialize some
1219          * basic parameters like mac address, port type, etc.
1220          */
1221         for_each_port(sc, i) {
1222                 struct port_info *pi;
1223
1224                 pi = malloc(sizeof(*pi), M_CXGBE, M_ZERO | M_WAITOK);
1225                 sc->port[i] = pi;
1226
1227                 /* These must be set before t4_port_init */
1228                 pi->adapter = sc;
1229                 pi->port_id = i;
1230                 /*
1231                  * XXX: vi[0] is special so we can't delay this allocation until
1232                  * pi->nvi's final value is known.
1233                  */
1234                 pi->vi = malloc(sizeof(struct vi_info) * t4_num_vis, M_CXGBE,
1235                     M_ZERO | M_WAITOK);
1236
1237                 /*
1238                  * Allocate the "main" VI and initialize parameters
1239                  * like mac addr.
1240                  */
1241                 rc = -t4_port_init(sc, sc->mbox, sc->pf, 0, i);
1242                 if (rc != 0) {
1243                         device_printf(dev, "unable to initialize port %d: %d\n",
1244                             i, rc);
1245                         free(pi->vi, M_CXGBE);
1246                         free(pi, M_CXGBE);
1247                         sc->port[i] = NULL;
1248                         goto done;
1249                 }
1250
1251                 snprintf(pi->lockname, sizeof(pi->lockname), "%sp%d",
1252                     device_get_nameunit(dev), i);
1253                 mtx_init(&pi->pi_lock, pi->lockname, 0, MTX_DEF);
1254                 sc->chan_map[pi->tx_chan] = i;
1255
1256                 /*
1257                  * The MPS counter for FCS errors doesn't work correctly on the
1258                  * T6 so we use the MAC counter here.  Which MAC is in use
1259                  * depends on the link settings which will be known when the
1260                  * link comes up.
1261                  */
1262                 if (is_t6(sc)) {
1263                         pi->fcs_reg = -1;
1264                 } else if (is_t4(sc)) {
1265                         pi->fcs_reg = PORT_REG(pi->tx_chan,
1266                             A_MPS_PORT_STAT_RX_PORT_CRC_ERROR_L);
1267                 } else {
1268                         pi->fcs_reg = T5_PORT_REG(pi->tx_chan,
1269                             A_MPS_PORT_STAT_RX_PORT_CRC_ERROR_L);
1270                 }
1271                 pi->fcs_base = 0;
1272
1273                 /* All VIs on this port share this media. */
1274                 ifmedia_init(&pi->media, IFM_IMASK, cxgbe_media_change,
1275                     cxgbe_media_status);
1276
1277                 PORT_LOCK(pi);
1278                 init_link_config(pi);
1279                 fixup_link_config(pi);
1280                 build_medialist(pi);
1281                 if (fixed_ifmedia(pi))
1282                         pi->flags |= FIXED_IFMEDIA;
1283                 PORT_UNLOCK(pi);
1284
1285                 pi->dev = device_add_child(dev, sc->names->ifnet_name,
1286                     t4_ifnet_unit(sc, pi));
1287                 if (pi->dev == NULL) {
1288                         device_printf(dev,
1289                             "failed to add device for port %d.\n", i);
1290                         rc = ENXIO;
1291                         goto done;
1292                 }
1293                 pi->vi[0].dev = pi->dev;
1294                 device_set_softc(pi->dev, pi);
1295         }
1296
1297         /*
1298          * Interrupt type, # of interrupts, # of rx/tx queues, etc.
1299          */
1300         nports = sc->params.nports;
1301         rc = cfg_itype_and_nqueues(sc, &iaq);
1302         if (rc != 0)
1303                 goto done; /* error message displayed already */
1304
1305         num_vis = iaq.num_vis;
1306         sc->intr_type = iaq.intr_type;
1307         sc->intr_count = iaq.nirq;
1308
1309         s = &sc->sge;
1310         s->nrxq = nports * iaq.nrxq;
1311         s->ntxq = nports * iaq.ntxq;
1312         if (num_vis > 1) {
1313                 s->nrxq += nports * (num_vis - 1) * iaq.nrxq_vi;
1314                 s->ntxq += nports * (num_vis - 1) * iaq.ntxq_vi;
1315         }
1316         s->neq = s->ntxq + s->nrxq;     /* the free list in an rxq is an eq */
1317         s->neq += nports;               /* ctrl queues: 1 per port */
1318         s->niq = s->nrxq + 1;           /* 1 extra for firmware event queue */
1319 #if defined(TCP_OFFLOAD) || defined(RATELIMIT)
1320         if (is_offload(sc) || is_ethoffload(sc)) {
1321                 s->nofldtxq = nports * iaq.nofldtxq;
1322                 if (num_vis > 1)
1323                         s->nofldtxq += nports * (num_vis - 1) * iaq.nofldtxq_vi;
1324                 s->neq += s->nofldtxq;
1325
1326                 s->ofld_txq = malloc(s->nofldtxq * sizeof(struct sge_wrq),
1327                     M_CXGBE, M_ZERO | M_WAITOK);
1328         }
1329 #endif
1330 #ifdef TCP_OFFLOAD
1331         if (is_offload(sc)) {
1332                 s->nofldrxq = nports * iaq.nofldrxq;
1333                 if (num_vis > 1)
1334                         s->nofldrxq += nports * (num_vis - 1) * iaq.nofldrxq_vi;
1335                 s->neq += s->nofldrxq;  /* free list */
1336                 s->niq += s->nofldrxq;
1337
1338                 s->ofld_rxq = malloc(s->nofldrxq * sizeof(struct sge_ofld_rxq),
1339                     M_CXGBE, M_ZERO | M_WAITOK);
1340         }
1341 #endif
1342 #ifdef DEV_NETMAP
1343         s->nnmrxq = 0;
1344         s->nnmtxq = 0;
1345         if (t4_native_netmap & NN_MAIN_VI) {
1346                 s->nnmrxq += nports * iaq.nnmrxq;
1347                 s->nnmtxq += nports * iaq.nnmtxq;
1348         }
1349         if (num_vis > 1 && t4_native_netmap & NN_EXTRA_VI) {
1350                 s->nnmrxq += nports * (num_vis - 1) * iaq.nnmrxq_vi;
1351                 s->nnmtxq += nports * (num_vis - 1) * iaq.nnmtxq_vi;
1352         }
1353         s->neq += s->nnmtxq + s->nnmrxq;
1354         s->niq += s->nnmrxq;
1355
1356         s->nm_rxq = malloc(s->nnmrxq * sizeof(struct sge_nm_rxq),
1357             M_CXGBE, M_ZERO | M_WAITOK);
1358         s->nm_txq = malloc(s->nnmtxq * sizeof(struct sge_nm_txq),
1359             M_CXGBE, M_ZERO | M_WAITOK);
1360 #endif
1361         MPASS(s->niq <= s->iqmap_sz);
1362         MPASS(s->neq <= s->eqmap_sz);
1363
1364         s->ctrlq = malloc(nports * sizeof(struct sge_wrq), M_CXGBE,
1365             M_ZERO | M_WAITOK);
1366         s->rxq = malloc(s->nrxq * sizeof(struct sge_rxq), M_CXGBE,
1367             M_ZERO | M_WAITOK);
1368         s->txq = malloc(s->ntxq * sizeof(struct sge_txq), M_CXGBE,
1369             M_ZERO | M_WAITOK);
1370         s->iqmap = malloc(s->iqmap_sz * sizeof(struct sge_iq *), M_CXGBE,
1371             M_ZERO | M_WAITOK);
1372         s->eqmap = malloc(s->eqmap_sz * sizeof(struct sge_eq *), M_CXGBE,
1373             M_ZERO | M_WAITOK);
1374
1375         sc->irq = malloc(sc->intr_count * sizeof(struct irq), M_CXGBE,
1376             M_ZERO | M_WAITOK);
1377
1378         t4_init_l2t(sc, M_WAITOK);
1379         t4_init_smt(sc, M_WAITOK);
1380         t4_init_tx_sched(sc);
1381         t4_init_atid_table(sc);
1382 #ifdef RATELIMIT
1383         t4_init_etid_table(sc);
1384 #endif
1385 #ifdef INET6
1386         t4_init_clip_table(sc);
1387 #endif
1388         if (sc->vres.key.size != 0)
1389                 sc->key_map = vmem_create("T4TLS key map", sc->vres.key.start,
1390                     sc->vres.key.size, 32, 0, M_FIRSTFIT | M_WAITOK);
1391
1392         /*
1393          * Second pass over the ports.  This time we know the number of rx and
1394          * tx queues that each port should get.
1395          */
1396         rqidx = tqidx = 0;
1397 #if defined(TCP_OFFLOAD) || defined(RATELIMIT)
1398         ofld_tqidx = 0;
1399 #endif
1400 #ifdef TCP_OFFLOAD
1401         ofld_rqidx = 0;
1402 #endif
1403 #ifdef DEV_NETMAP
1404         nm_rqidx = nm_tqidx = 0;
1405 #endif
1406         for_each_port(sc, i) {
1407                 struct port_info *pi = sc->port[i];
1408                 struct vi_info *vi;
1409
1410                 if (pi == NULL)
1411                         continue;
1412
1413                 pi->nvi = num_vis;
1414                 for_each_vi(pi, j, vi) {
1415                         vi->pi = pi;
1416                         vi->adapter = sc;
1417                         vi->qsize_rxq = t4_qsize_rxq;
1418                         vi->qsize_txq = t4_qsize_txq;
1419
1420                         vi->first_rxq = rqidx;
1421                         vi->first_txq = tqidx;
1422                         vi->tmr_idx = t4_tmr_idx;
1423                         vi->pktc_idx = t4_pktc_idx;
1424                         vi->nrxq = j == 0 ? iaq.nrxq : iaq.nrxq_vi;
1425                         vi->ntxq = j == 0 ? iaq.ntxq : iaq.ntxq_vi;
1426
1427                         rqidx += vi->nrxq;
1428                         tqidx += vi->ntxq;
1429
1430                         if (j == 0 && vi->ntxq > 1)
1431                                 vi->rsrv_noflowq = t4_rsrv_noflowq ? 1 : 0;
1432                         else
1433                                 vi->rsrv_noflowq = 0;
1434
1435 #if defined(TCP_OFFLOAD) || defined(RATELIMIT)
1436                         vi->first_ofld_txq = ofld_tqidx;
1437                         vi->nofldtxq = j == 0 ? iaq.nofldtxq : iaq.nofldtxq_vi;
1438                         ofld_tqidx += vi->nofldtxq;
1439 #endif
1440 #ifdef TCP_OFFLOAD
1441                         vi->ofld_tmr_idx = t4_tmr_idx_ofld;
1442                         vi->ofld_pktc_idx = t4_pktc_idx_ofld;
1443                         vi->first_ofld_rxq = ofld_rqidx;
1444                         vi->nofldrxq = j == 0 ? iaq.nofldrxq : iaq.nofldrxq_vi;
1445
1446                         ofld_rqidx += vi->nofldrxq;
1447 #endif
1448 #ifdef DEV_NETMAP
1449                         vi->first_nm_rxq = nm_rqidx;
1450                         vi->first_nm_txq = nm_tqidx;
1451                         if (j == 0) {
1452                                 vi->nnmrxq = iaq.nnmrxq;
1453                                 vi->nnmtxq = iaq.nnmtxq;
1454                         } else {
1455                                 vi->nnmrxq = iaq.nnmrxq_vi;
1456                                 vi->nnmtxq = iaq.nnmtxq_vi;
1457                         }
1458                         nm_rqidx += vi->nnmrxq;
1459                         nm_tqidx += vi->nnmtxq;
1460 #endif
1461                 }
1462         }
1463
1464         rc = t4_setup_intr_handlers(sc);
1465         if (rc != 0) {
1466                 device_printf(dev,
1467                     "failed to setup interrupt handlers: %d\n", rc);
1468                 goto done;
1469         }
1470
1471         rc = bus_generic_probe(dev);
1472         if (rc != 0) {
1473                 device_printf(dev, "failed to probe child drivers: %d\n", rc);
1474                 goto done;
1475         }
1476
1477         /*
1478          * Ensure thread-safe mailbox access (in debug builds).
1479          *
1480          * So far this was the only thread accessing the mailbox but various
1481          * ifnets and sysctls are about to be created and their handlers/ioctls
1482          * will access the mailbox from different threads.
1483          */
1484         sc->flags |= CHK_MBOX_ACCESS;
1485
1486         rc = bus_generic_attach(dev);
1487         if (rc != 0) {
1488                 device_printf(dev,
1489                     "failed to attach all child ports: %d\n", rc);
1490                 goto done;
1491         }
1492
1493         device_printf(dev,
1494             "PCIe gen%d x%d, %d ports, %d %s interrupt%s, %d eq, %d iq\n",
1495             sc->params.pci.speed, sc->params.pci.width, sc->params.nports,
1496             sc->intr_count, sc->intr_type == INTR_MSIX ? "MSI-X" :
1497             (sc->intr_type == INTR_MSI ? "MSI" : "INTx"),
1498             sc->intr_count > 1 ? "s" : "", sc->sge.neq, sc->sge.niq);
1499
1500         t4_set_desc(sc);
1501
1502         notify_siblings(dev, 0);
1503
1504 done:
1505         if (rc != 0 && sc->cdev) {
1506                 /* cdev was created and so cxgbetool works; recover that way. */
1507                 device_printf(dev,
1508                     "error during attach, adapter is now in recovery mode.\n");
1509                 rc = 0;
1510         }
1511
1512         if (rc != 0)
1513                 t4_detach_common(dev);
1514         else
1515                 t4_sysctls(sc);
1516
1517         return (rc);
1518 }
1519
1520 static int
1521 t4_child_location_str(device_t bus, device_t dev, char *buf, size_t buflen)
1522 {
1523         struct adapter *sc;
1524         struct port_info *pi;
1525         int i;
1526
1527         sc = device_get_softc(bus);
1528         buf[0] = '\0';
1529         for_each_port(sc, i) {
1530                 pi = sc->port[i];
1531                 if (pi != NULL && pi->dev == dev) {
1532                         snprintf(buf, buflen, "port=%d", pi->port_id);
1533                         break;
1534                 }
1535         }
1536         return (0);
1537 }
1538
1539 static int
1540 t4_ready(device_t dev)
1541 {
1542         struct adapter *sc;
1543
1544         sc = device_get_softc(dev);
1545         if (sc->flags & FW_OK)
1546                 return (0);
1547         return (ENXIO);
1548 }
1549
1550 static int
1551 t4_read_port_device(device_t dev, int port, device_t *child)
1552 {
1553         struct adapter *sc;
1554         struct port_info *pi;
1555
1556         sc = device_get_softc(dev);
1557         if (port < 0 || port >= MAX_NPORTS)
1558                 return (EINVAL);
1559         pi = sc->port[port];
1560         if (pi == NULL || pi->dev == NULL)
1561                 return (ENXIO);
1562         *child = pi->dev;
1563         return (0);
1564 }
1565
1566 static int
1567 notify_siblings(device_t dev, int detaching)
1568 {
1569         device_t sibling;
1570         int error, i;
1571
1572         error = 0;
1573         for (i = 0; i < PCI_FUNCMAX; i++) {
1574                 if (i == pci_get_function(dev))
1575                         continue;
1576                 sibling = pci_find_dbsf(pci_get_domain(dev), pci_get_bus(dev),
1577                     pci_get_slot(dev), i);
1578                 if (sibling == NULL || !device_is_attached(sibling))
1579                         continue;
1580                 if (detaching)
1581                         error = T4_DETACH_CHILD(sibling);
1582                 else
1583                         (void)T4_ATTACH_CHILD(sibling);
1584                 if (error)
1585                         break;
1586         }
1587         return (error);
1588 }
1589
1590 /*
1591  * Idempotent
1592  */
1593 static int
1594 t4_detach(device_t dev)
1595 {
1596         struct adapter *sc;
1597         int rc;
1598
1599         sc = device_get_softc(dev);
1600
1601         rc = notify_siblings(dev, 1);
1602         if (rc) {
1603                 device_printf(dev,
1604                     "failed to detach sibling devices: %d\n", rc);
1605                 return (rc);
1606         }
1607
1608         return (t4_detach_common(dev));
1609 }
1610
1611 int
1612 t4_detach_common(device_t dev)
1613 {
1614         struct adapter *sc;
1615         struct port_info *pi;
1616         int i, rc;
1617
1618         sc = device_get_softc(dev);
1619
1620         if (sc->cdev) {
1621                 destroy_dev(sc->cdev);
1622                 sc->cdev = NULL;
1623         }
1624
1625         sx_xlock(&t4_list_lock);
1626         SLIST_REMOVE(&t4_list, sc, adapter, link);
1627         sx_xunlock(&t4_list_lock);
1628
1629         sc->flags &= ~CHK_MBOX_ACCESS;
1630         if (sc->flags & FULL_INIT_DONE) {
1631                 if (!(sc->flags & IS_VF))
1632                         t4_intr_disable(sc);
1633         }
1634
1635         if (device_is_attached(dev)) {
1636                 rc = bus_generic_detach(dev);
1637                 if (rc) {
1638                         device_printf(dev,
1639                             "failed to detach child devices: %d\n", rc);
1640                         return (rc);
1641                 }
1642         }
1643
1644 #ifdef TCP_OFFLOAD
1645         taskqueue_drain(taskqueue_thread, &sc->async_event_task);
1646 #endif
1647
1648         for (i = 0; i < sc->intr_count; i++)
1649                 t4_free_irq(sc, &sc->irq[i]);
1650
1651         if ((sc->flags & (IS_VF | FW_OK)) == FW_OK)
1652                 t4_free_tx_sched(sc);
1653
1654         for (i = 0; i < MAX_NPORTS; i++) {
1655                 pi = sc->port[i];
1656                 if (pi) {
1657                         t4_free_vi(sc, sc->mbox, sc->pf, 0, pi->vi[0].viid);
1658                         if (pi->dev)
1659                                 device_delete_child(dev, pi->dev);
1660
1661                         mtx_destroy(&pi->pi_lock);
1662                         free(pi->vi, M_CXGBE);
1663                         free(pi, M_CXGBE);
1664                 }
1665         }
1666
1667         device_delete_children(dev);
1668
1669         if (sc->flags & FULL_INIT_DONE)
1670                 adapter_full_uninit(sc);
1671
1672         if ((sc->flags & (IS_VF | FW_OK)) == FW_OK)
1673                 t4_fw_bye(sc, sc->mbox);
1674
1675         if (sc->intr_type == INTR_MSI || sc->intr_type == INTR_MSIX)
1676                 pci_release_msi(dev);
1677
1678         if (sc->regs_res)
1679                 bus_release_resource(dev, SYS_RES_MEMORY, sc->regs_rid,
1680                     sc->regs_res);
1681
1682         if (sc->udbs_res)
1683                 bus_release_resource(dev, SYS_RES_MEMORY, sc->udbs_rid,
1684                     sc->udbs_res);
1685
1686         if (sc->msix_res)
1687                 bus_release_resource(dev, SYS_RES_MEMORY, sc->msix_rid,
1688                     sc->msix_res);
1689
1690         if (sc->l2t)
1691                 t4_free_l2t(sc->l2t);
1692         if (sc->smt)
1693                 t4_free_smt(sc->smt);
1694         t4_free_atid_table(sc);
1695 #ifdef RATELIMIT
1696         t4_free_etid_table(sc);
1697 #endif
1698         if (sc->key_map)
1699                 vmem_destroy(sc->key_map);
1700 #ifdef INET6
1701         t4_destroy_clip_table(sc);
1702 #endif
1703
1704 #if defined(TCP_OFFLOAD) || defined(RATELIMIT)
1705         free(sc->sge.ofld_txq, M_CXGBE);
1706 #endif
1707 #ifdef TCP_OFFLOAD
1708         free(sc->sge.ofld_rxq, M_CXGBE);
1709 #endif
1710 #ifdef DEV_NETMAP
1711         free(sc->sge.nm_rxq, M_CXGBE);
1712         free(sc->sge.nm_txq, M_CXGBE);
1713 #endif
1714         free(sc->irq, M_CXGBE);
1715         free(sc->sge.rxq, M_CXGBE);
1716         free(sc->sge.txq, M_CXGBE);
1717         free(sc->sge.ctrlq, M_CXGBE);
1718         free(sc->sge.iqmap, M_CXGBE);
1719         free(sc->sge.eqmap, M_CXGBE);
1720         free(sc->tids.ftid_tab, M_CXGBE);
1721         free(sc->tids.hpftid_tab, M_CXGBE);
1722         free_hftid_hash(&sc->tids);
1723         free(sc->tids.tid_tab, M_CXGBE);
1724         free(sc->tt.tls_rx_ports, M_CXGBE);
1725         t4_destroy_dma_tag(sc);
1726
1727         callout_drain(&sc->ktls_tick);
1728         callout_drain(&sc->sfl_callout);
1729         if (mtx_initialized(&sc->tids.ftid_lock)) {
1730                 mtx_destroy(&sc->tids.ftid_lock);
1731                 cv_destroy(&sc->tids.ftid_cv);
1732         }
1733         if (mtx_initialized(&sc->tids.atid_lock))
1734                 mtx_destroy(&sc->tids.atid_lock);
1735         if (mtx_initialized(&sc->ifp_lock))
1736                 mtx_destroy(&sc->ifp_lock);
1737
1738         if (rw_initialized(&sc->policy_lock)) {
1739                 rw_destroy(&sc->policy_lock);
1740 #ifdef TCP_OFFLOAD
1741                 if (sc->policy != NULL)
1742                         free_offload_policy(sc->policy);
1743 #endif
1744         }
1745
1746         for (i = 0; i < NUM_MEMWIN; i++) {
1747                 struct memwin *mw = &sc->memwin[i];
1748
1749                 if (rw_initialized(&mw->mw_lock))
1750                         rw_destroy(&mw->mw_lock);
1751         }
1752
1753         mtx_destroy(&sc->sfl_lock);
1754         mtx_destroy(&sc->reg_lock);
1755         mtx_destroy(&sc->sc_lock);
1756
1757         bzero(sc, sizeof(*sc));
1758
1759         return (0);
1760 }
1761
1762 static int
1763 cxgbe_probe(device_t dev)
1764 {
1765         char buf[128];
1766         struct port_info *pi = device_get_softc(dev);
1767
1768         snprintf(buf, sizeof(buf), "port %d", pi->port_id);
1769         device_set_desc_copy(dev, buf);
1770
1771         return (BUS_PROBE_DEFAULT);
1772 }
1773
1774 #define T4_CAP (IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_MTU | IFCAP_HWCSUM | \
1775     IFCAP_VLAN_HWCSUM | IFCAP_TSO | IFCAP_JUMBO_MTU | IFCAP_LRO | \
1776     IFCAP_VLAN_HWTSO | IFCAP_LINKSTATE | IFCAP_HWCSUM_IPV6 | IFCAP_HWSTATS | \
1777     IFCAP_HWRXTSTMP | IFCAP_NOMAP)
1778 #define T4_CAP_ENABLE (T4_CAP)
1779
1780 static int
1781 cxgbe_vi_attach(device_t dev, struct vi_info *vi)
1782 {
1783         struct ifnet *ifp;
1784         struct sbuf *sb;
1785         struct pfil_head_args pa;
1786         struct adapter *sc = vi->adapter;
1787
1788         vi->xact_addr_filt = -1;
1789         callout_init(&vi->tick, 1);
1790         if (sc->flags & IS_VF || t4_tx_vm_wr != 0)
1791                 vi->flags |= TX_USES_VM_WR;
1792
1793         /* Allocate an ifnet and set it up */
1794         ifp = if_alloc_dev(IFT_ETHER, dev);
1795         if (ifp == NULL) {
1796                 device_printf(dev, "Cannot allocate ifnet\n");
1797                 return (ENOMEM);
1798         }
1799         vi->ifp = ifp;
1800         ifp->if_softc = vi;
1801
1802         if_initname(ifp, device_get_name(dev), device_get_unit(dev));
1803         ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
1804
1805         ifp->if_init = cxgbe_init;
1806         ifp->if_ioctl = cxgbe_ioctl;
1807         ifp->if_transmit = cxgbe_transmit;
1808         ifp->if_qflush = cxgbe_qflush;
1809         ifp->if_get_counter = cxgbe_get_counter;
1810 #if defined(KERN_TLS) || defined(RATELIMIT)
1811         ifp->if_snd_tag_alloc = cxgbe_snd_tag_alloc;
1812         ifp->if_snd_tag_modify = cxgbe_snd_tag_modify;
1813         ifp->if_snd_tag_query = cxgbe_snd_tag_query;
1814         ifp->if_snd_tag_free = cxgbe_snd_tag_free;
1815 #endif
1816 #ifdef RATELIMIT
1817         ifp->if_ratelimit_query = cxgbe_ratelimit_query;
1818 #endif
1819
1820         ifp->if_capabilities = T4_CAP;
1821         ifp->if_capenable = T4_CAP_ENABLE;
1822         ifp->if_hwassist = CSUM_TCP | CSUM_UDP | CSUM_IP | CSUM_TSO |
1823             CSUM_UDP_IPV6 | CSUM_TCP_IPV6;
1824         if (chip_id(sc) >= CHELSIO_T6) {
1825                 ifp->if_capabilities |= IFCAP_VXLAN_HWCSUM | IFCAP_VXLAN_HWTSO;
1826                 ifp->if_capenable |= IFCAP_VXLAN_HWCSUM | IFCAP_VXLAN_HWTSO;
1827                 ifp->if_hwassist |= CSUM_INNER_IP6_UDP | CSUM_INNER_IP6_TCP |
1828                     CSUM_INNER_IP6_TSO | CSUM_INNER_IP | CSUM_INNER_IP_UDP |
1829                     CSUM_INNER_IP_TCP | CSUM_INNER_IP_TSO | CSUM_ENCAP_VXLAN;
1830         }
1831
1832 #ifdef TCP_OFFLOAD
1833         if (vi->nofldrxq != 0 && (sc->flags & KERN_TLS_OK) == 0)
1834                 ifp->if_capabilities |= IFCAP_TOE;
1835 #endif
1836 #ifdef RATELIMIT
1837         if (is_ethoffload(sc) && vi->nofldtxq != 0) {
1838                 ifp->if_capabilities |= IFCAP_TXRTLMT;
1839                 ifp->if_capenable |= IFCAP_TXRTLMT;
1840         }
1841 #endif
1842
1843         ifp->if_hw_tsomax = IP_MAXPACKET;
1844         if (vi->flags & TX_USES_VM_WR)
1845                 ifp->if_hw_tsomaxsegcount = TX_SGL_SEGS_VM_TSO;
1846         else
1847                 ifp->if_hw_tsomaxsegcount = TX_SGL_SEGS_TSO;
1848 #ifdef RATELIMIT
1849         if (is_ethoffload(sc) && vi->nofldtxq != 0)
1850                 ifp->if_hw_tsomaxsegcount = TX_SGL_SEGS_EO_TSO;
1851 #endif
1852         ifp->if_hw_tsomaxsegsize = 65536;
1853 #ifdef KERN_TLS
1854         if (sc->flags & KERN_TLS_OK) {
1855                 ifp->if_capabilities |= IFCAP_TXTLS;
1856                 ifp->if_capenable |= IFCAP_TXTLS;
1857         }
1858 #endif
1859
1860         ether_ifattach(ifp, vi->hw_addr);
1861 #ifdef DEV_NETMAP
1862         if (vi->nnmrxq != 0)
1863                 cxgbe_nm_attach(vi);
1864 #endif
1865         sb = sbuf_new_auto();
1866         sbuf_printf(sb, "%d txq, %d rxq (NIC)", vi->ntxq, vi->nrxq);
1867 #if defined(TCP_OFFLOAD) || defined(RATELIMIT)
1868         switch (ifp->if_capabilities & (IFCAP_TOE | IFCAP_TXRTLMT)) {
1869         case IFCAP_TOE:
1870                 sbuf_printf(sb, "; %d txq (TOE)", vi->nofldtxq);
1871                 break;
1872         case IFCAP_TOE | IFCAP_TXRTLMT:
1873                 sbuf_printf(sb, "; %d txq (TOE/ETHOFLD)", vi->nofldtxq);
1874                 break;
1875         case IFCAP_TXRTLMT:
1876                 sbuf_printf(sb, "; %d txq (ETHOFLD)", vi->nofldtxq);
1877                 break;
1878         }
1879 #endif
1880 #ifdef TCP_OFFLOAD
1881         if (ifp->if_capabilities & IFCAP_TOE)
1882                 sbuf_printf(sb, ", %d rxq (TOE)", vi->nofldrxq);
1883 #endif
1884 #ifdef DEV_NETMAP
1885         if (ifp->if_capabilities & IFCAP_NETMAP)
1886                 sbuf_printf(sb, "; %d txq, %d rxq (netmap)",
1887                     vi->nnmtxq, vi->nnmrxq);
1888 #endif
1889         sbuf_finish(sb);
1890         device_printf(dev, "%s\n", sbuf_data(sb));
1891         sbuf_delete(sb);
1892
1893         vi_sysctls(vi);
1894
1895         pa.pa_version = PFIL_VERSION;
1896         pa.pa_flags = PFIL_IN;
1897         pa.pa_type = PFIL_TYPE_ETHERNET;
1898         pa.pa_headname = ifp->if_xname;
1899         vi->pfil = pfil_head_register(&pa);
1900
1901         return (0);
1902 }
1903
1904 static int
1905 cxgbe_attach(device_t dev)
1906 {
1907         struct port_info *pi = device_get_softc(dev);
1908         struct adapter *sc = pi->adapter;
1909         struct vi_info *vi;
1910         int i, rc;
1911
1912         callout_init_mtx(&pi->tick, &pi->pi_lock, 0);
1913
1914         rc = cxgbe_vi_attach(dev, &pi->vi[0]);
1915         if (rc)
1916                 return (rc);
1917
1918         for_each_vi(pi, i, vi) {
1919                 if (i == 0)
1920                         continue;
1921                 vi->dev = device_add_child(dev, sc->names->vi_ifnet_name, -1);
1922                 if (vi->dev == NULL) {
1923                         device_printf(dev, "failed to add VI %d\n", i);
1924                         continue;
1925                 }
1926                 device_set_softc(vi->dev, vi);
1927         }
1928
1929         cxgbe_sysctls(pi);
1930
1931         bus_generic_attach(dev);
1932
1933         return (0);
1934 }
1935
1936 static void
1937 cxgbe_vi_detach(struct vi_info *vi)
1938 {
1939         struct ifnet *ifp = vi->ifp;
1940
1941         if (vi->pfil != NULL) {
1942                 pfil_head_unregister(vi->pfil);
1943                 vi->pfil = NULL;
1944         }
1945
1946         ether_ifdetach(ifp);
1947
1948         /* Let detach proceed even if these fail. */
1949 #ifdef DEV_NETMAP
1950         if (ifp->if_capabilities & IFCAP_NETMAP)
1951                 cxgbe_nm_detach(vi);
1952 #endif
1953         cxgbe_uninit_synchronized(vi);
1954         callout_drain(&vi->tick);
1955         vi_full_uninit(vi);
1956
1957         if_free(vi->ifp);
1958         vi->ifp = NULL;
1959 }
1960
1961 static int
1962 cxgbe_detach(device_t dev)
1963 {
1964         struct port_info *pi = device_get_softc(dev);
1965         struct adapter *sc = pi->adapter;
1966         int rc;
1967
1968         /* Detach the extra VIs first. */
1969         rc = bus_generic_detach(dev);
1970         if (rc)
1971                 return (rc);
1972         device_delete_children(dev);
1973
1974         doom_vi(sc, &pi->vi[0]);
1975
1976         if (pi->flags & HAS_TRACEQ) {
1977                 sc->traceq = -1;        /* cloner should not create ifnet */
1978                 t4_tracer_port_detach(sc);
1979         }
1980
1981         cxgbe_vi_detach(&pi->vi[0]);
1982         callout_drain(&pi->tick);
1983         ifmedia_removeall(&pi->media);
1984
1985         end_synchronized_op(sc, 0);
1986
1987         return (0);
1988 }
1989
1990 static void
1991 cxgbe_init(void *arg)
1992 {
1993         struct vi_info *vi = arg;
1994         struct adapter *sc = vi->adapter;
1995
1996         if (begin_synchronized_op(sc, vi, SLEEP_OK | INTR_OK, "t4init") != 0)
1997                 return;
1998         cxgbe_init_synchronized(vi);
1999         end_synchronized_op(sc, 0);
2000 }
2001
2002 static int
2003 cxgbe_ioctl(struct ifnet *ifp, unsigned long cmd, caddr_t data)
2004 {
2005         int rc = 0, mtu, flags;
2006         struct vi_info *vi = ifp->if_softc;
2007         struct port_info *pi = vi->pi;
2008         struct adapter *sc = pi->adapter;
2009         struct ifreq *ifr = (struct ifreq *)data;
2010         uint32_t mask;
2011
2012         switch (cmd) {
2013         case SIOCSIFMTU:
2014                 mtu = ifr->ifr_mtu;
2015                 if (mtu < ETHERMIN || mtu > MAX_MTU)
2016                         return (EINVAL);
2017
2018                 rc = begin_synchronized_op(sc, vi, SLEEP_OK | INTR_OK, "t4mtu");
2019                 if (rc)
2020                         return (rc);
2021                 ifp->if_mtu = mtu;
2022                 if (vi->flags & VI_INIT_DONE) {
2023                         t4_update_fl_bufsize(ifp);
2024                         if (ifp->if_drv_flags & IFF_DRV_RUNNING)
2025                                 rc = update_mac_settings(ifp, XGMAC_MTU);
2026                 }
2027                 end_synchronized_op(sc, 0);
2028                 break;
2029
2030         case SIOCSIFFLAGS:
2031                 rc = begin_synchronized_op(sc, vi, SLEEP_OK | INTR_OK, "t4flg");
2032                 if (rc)
2033                         return (rc);
2034
2035                 if (ifp->if_flags & IFF_UP) {
2036                         if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
2037                                 flags = vi->if_flags;
2038                                 if ((ifp->if_flags ^ flags) &
2039                                     (IFF_PROMISC | IFF_ALLMULTI)) {
2040                                         rc = update_mac_settings(ifp,
2041                                             XGMAC_PROMISC | XGMAC_ALLMULTI);
2042                                 }
2043                         } else {
2044                                 rc = cxgbe_init_synchronized(vi);
2045                         }
2046                         vi->if_flags = ifp->if_flags;
2047                 } else if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
2048                         rc = cxgbe_uninit_synchronized(vi);
2049                 }
2050                 end_synchronized_op(sc, 0);
2051                 break;
2052
2053         case SIOCADDMULTI:
2054         case SIOCDELMULTI:
2055                 rc = begin_synchronized_op(sc, vi, SLEEP_OK | INTR_OK, "t4multi");
2056                 if (rc)
2057                         return (rc);
2058                 if (ifp->if_drv_flags & IFF_DRV_RUNNING)
2059                         rc = update_mac_settings(ifp, XGMAC_MCADDRS);
2060                 end_synchronized_op(sc, 0);
2061                 break;
2062
2063         case SIOCSIFCAP:
2064                 rc = begin_synchronized_op(sc, vi, SLEEP_OK | INTR_OK, "t4cap");
2065                 if (rc)
2066                         return (rc);
2067
2068                 mask = ifr->ifr_reqcap ^ ifp->if_capenable;
2069                 if (mask & IFCAP_TXCSUM) {
2070                         ifp->if_capenable ^= IFCAP_TXCSUM;
2071                         ifp->if_hwassist ^= (CSUM_TCP | CSUM_UDP | CSUM_IP);
2072
2073                         if (IFCAP_TSO4 & ifp->if_capenable &&
2074                             !(IFCAP_TXCSUM & ifp->if_capenable)) {
2075                                 mask &= ~IFCAP_TSO4;
2076                                 ifp->if_capenable &= ~IFCAP_TSO4;
2077                                 if_printf(ifp,
2078                                     "tso4 disabled due to -txcsum.\n");
2079                         }
2080                 }
2081                 if (mask & IFCAP_TXCSUM_IPV6) {
2082                         ifp->if_capenable ^= IFCAP_TXCSUM_IPV6;
2083                         ifp->if_hwassist ^= (CSUM_UDP_IPV6 | CSUM_TCP_IPV6);
2084
2085                         if (IFCAP_TSO6 & ifp->if_capenable &&
2086                             !(IFCAP_TXCSUM_IPV6 & ifp->if_capenable)) {
2087                                 mask &= ~IFCAP_TSO6;
2088                                 ifp->if_capenable &= ~IFCAP_TSO6;
2089                                 if_printf(ifp,
2090                                     "tso6 disabled due to -txcsum6.\n");
2091                         }
2092                 }
2093                 if (mask & IFCAP_RXCSUM)
2094                         ifp->if_capenable ^= IFCAP_RXCSUM;
2095                 if (mask & IFCAP_RXCSUM_IPV6)
2096                         ifp->if_capenable ^= IFCAP_RXCSUM_IPV6;
2097
2098                 /*
2099                  * Note that we leave CSUM_TSO alone (it is always set).  The
2100                  * kernel takes both IFCAP_TSOx and CSUM_TSO into account before
2101                  * sending a TSO request our way, so it's sufficient to toggle
2102                  * IFCAP_TSOx only.
2103                  */
2104                 if (mask & IFCAP_TSO4) {
2105                         if (!(IFCAP_TSO4 & ifp->if_capenable) &&
2106                             !(IFCAP_TXCSUM & ifp->if_capenable)) {
2107                                 if_printf(ifp, "enable txcsum first.\n");
2108                                 rc = EAGAIN;
2109                                 goto fail;
2110                         }
2111                         ifp->if_capenable ^= IFCAP_TSO4;
2112                 }
2113                 if (mask & IFCAP_TSO6) {
2114                         if (!(IFCAP_TSO6 & ifp->if_capenable) &&
2115                             !(IFCAP_TXCSUM_IPV6 & ifp->if_capenable)) {
2116                                 if_printf(ifp, "enable txcsum6 first.\n");
2117                                 rc = EAGAIN;
2118                                 goto fail;
2119                         }
2120                         ifp->if_capenable ^= IFCAP_TSO6;
2121                 }
2122                 if (mask & IFCAP_LRO) {
2123 #if defined(INET) || defined(INET6)
2124                         int i;
2125                         struct sge_rxq *rxq;
2126
2127                         ifp->if_capenable ^= IFCAP_LRO;
2128                         for_each_rxq(vi, i, rxq) {
2129                                 if (ifp->if_capenable & IFCAP_LRO)
2130                                         rxq->iq.flags |= IQ_LRO_ENABLED;
2131                                 else
2132                                         rxq->iq.flags &= ~IQ_LRO_ENABLED;
2133                         }
2134 #endif
2135                 }
2136 #ifdef TCP_OFFLOAD
2137                 if (mask & IFCAP_TOE) {
2138                         int enable = (ifp->if_capenable ^ mask) & IFCAP_TOE;
2139
2140                         rc = toe_capability(vi, enable);
2141                         if (rc != 0)
2142                                 goto fail;
2143
2144                         ifp->if_capenable ^= mask;
2145                 }
2146 #endif
2147                 if (mask & IFCAP_VLAN_HWTAGGING) {
2148                         ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING;
2149                         if (ifp->if_drv_flags & IFF_DRV_RUNNING)
2150                                 rc = update_mac_settings(ifp, XGMAC_VLANEX);
2151                 }
2152                 if (mask & IFCAP_VLAN_MTU) {
2153                         ifp->if_capenable ^= IFCAP_VLAN_MTU;
2154
2155                         /* Need to find out how to disable auto-mtu-inflation */
2156                 }
2157                 if (mask & IFCAP_VLAN_HWTSO)
2158                         ifp->if_capenable ^= IFCAP_VLAN_HWTSO;
2159                 if (mask & IFCAP_VLAN_HWCSUM)
2160                         ifp->if_capenable ^= IFCAP_VLAN_HWCSUM;
2161 #ifdef RATELIMIT
2162                 if (mask & IFCAP_TXRTLMT)
2163                         ifp->if_capenable ^= IFCAP_TXRTLMT;
2164 #endif
2165                 if (mask & IFCAP_HWRXTSTMP) {
2166                         int i;
2167                         struct sge_rxq *rxq;
2168
2169                         ifp->if_capenable ^= IFCAP_HWRXTSTMP;
2170                         for_each_rxq(vi, i, rxq) {
2171                                 if (ifp->if_capenable & IFCAP_HWRXTSTMP)
2172                                         rxq->iq.flags |= IQ_RX_TIMESTAMP;
2173                                 else
2174                                         rxq->iq.flags &= ~IQ_RX_TIMESTAMP;
2175                         }
2176                 }
2177                 if (mask & IFCAP_NOMAP)
2178                         ifp->if_capenable ^= IFCAP_NOMAP;
2179
2180 #ifdef KERN_TLS
2181                 if (mask & IFCAP_TXTLS)
2182                         ifp->if_capenable ^= (mask & IFCAP_TXTLS);
2183 #endif
2184                 if (mask & IFCAP_VXLAN_HWCSUM) {
2185                         ifp->if_capenable ^= IFCAP_VXLAN_HWCSUM;
2186                         ifp->if_hwassist ^= CSUM_INNER_IP6_UDP |
2187                             CSUM_INNER_IP6_TCP | CSUM_INNER_IP |
2188                             CSUM_INNER_IP_UDP | CSUM_INNER_IP_TCP;
2189                 }
2190                 if (mask & IFCAP_VXLAN_HWTSO) {
2191                         ifp->if_capenable ^= IFCAP_VXLAN_HWTSO;
2192                         ifp->if_hwassist ^= CSUM_INNER_IP6_TSO |
2193                             CSUM_INNER_IP_TSO;
2194                 }
2195
2196 #ifdef VLAN_CAPABILITIES
2197                 VLAN_CAPABILITIES(ifp);
2198 #endif
2199 fail:
2200                 end_synchronized_op(sc, 0);
2201                 break;
2202
2203         case SIOCSIFMEDIA:
2204         case SIOCGIFMEDIA:
2205         case SIOCGIFXMEDIA:
2206                 ifmedia_ioctl(ifp, ifr, &pi->media, cmd);
2207                 break;
2208
2209         case SIOCGI2C: {
2210                 struct ifi2creq i2c;
2211
2212                 rc = copyin(ifr_data_get_ptr(ifr), &i2c, sizeof(i2c));
2213                 if (rc != 0)
2214                         break;
2215                 if (i2c.dev_addr != 0xA0 && i2c.dev_addr != 0xA2) {
2216                         rc = EPERM;
2217                         break;
2218                 }
2219                 if (i2c.len > sizeof(i2c.data)) {
2220                         rc = EINVAL;
2221                         break;
2222                 }
2223                 rc = begin_synchronized_op(sc, vi, SLEEP_OK | INTR_OK, "t4i2c");
2224                 if (rc)
2225                         return (rc);
2226                 rc = -t4_i2c_rd(sc, sc->mbox, pi->port_id, i2c.dev_addr,
2227                     i2c.offset, i2c.len, &i2c.data[0]);
2228                 end_synchronized_op(sc, 0);
2229                 if (rc == 0)
2230                         rc = copyout(&i2c, ifr_data_get_ptr(ifr), sizeof(i2c));
2231                 break;
2232         }
2233
2234         default:
2235                 rc = ether_ioctl(ifp, cmd, data);
2236         }
2237
2238         return (rc);
2239 }
2240
2241 static int
2242 cxgbe_transmit(struct ifnet *ifp, struct mbuf *m)
2243 {
2244         struct vi_info *vi = ifp->if_softc;
2245         struct port_info *pi = vi->pi;
2246         struct adapter *sc;
2247         struct sge_txq *txq;
2248         void *items[1];
2249         int rc;
2250
2251         M_ASSERTPKTHDR(m);
2252         MPASS(m->m_nextpkt == NULL);    /* not quite ready for this yet */
2253 #if defined(KERN_TLS) || defined(RATELIMIT)
2254         if (m->m_pkthdr.csum_flags & CSUM_SND_TAG)
2255                 MPASS(m->m_pkthdr.snd_tag->ifp == ifp);
2256 #endif
2257
2258         if (__predict_false(pi->link_cfg.link_ok == false)) {
2259                 m_freem(m);
2260                 return (ENETDOWN);
2261         }
2262
2263         rc = parse_pkt(&m, vi->flags & TX_USES_VM_WR);
2264         if (__predict_false(rc != 0)) {
2265                 MPASS(m == NULL);                       /* was freed already */
2266                 atomic_add_int(&pi->tx_parse_error, 1); /* rare, atomic is ok */
2267                 return (rc);
2268         }
2269 #ifdef RATELIMIT
2270         if (m->m_pkthdr.csum_flags & CSUM_SND_TAG) {
2271                 if (m->m_pkthdr.snd_tag->type == IF_SND_TAG_TYPE_RATE_LIMIT)
2272                         return (ethofld_transmit(ifp, m));
2273         }
2274 #endif
2275
2276         /* Select a txq. */
2277         sc = vi->adapter;
2278         txq = &sc->sge.txq[vi->first_txq];
2279         if (M_HASHTYPE_GET(m) != M_HASHTYPE_NONE)
2280                 txq += ((m->m_pkthdr.flowid % (vi->ntxq - vi->rsrv_noflowq)) +
2281                     vi->rsrv_noflowq);
2282
2283         items[0] = m;
2284         rc = mp_ring_enqueue(txq->r, items, 1, 256);
2285         if (__predict_false(rc != 0))
2286                 m_freem(m);
2287
2288         return (rc);
2289 }
2290
2291 static void
2292 cxgbe_qflush(struct ifnet *ifp)
2293 {
2294         struct vi_info *vi = ifp->if_softc;
2295         struct sge_txq *txq;
2296         int i;
2297
2298         /* queues do not exist if !VI_INIT_DONE. */
2299         if (vi->flags & VI_INIT_DONE) {
2300                 for_each_txq(vi, i, txq) {
2301                         TXQ_LOCK(txq);
2302                         txq->eq.flags |= EQ_QFLUSH;
2303                         TXQ_UNLOCK(txq);
2304                         while (!mp_ring_is_idle(txq->r)) {
2305                                 mp_ring_check_drainage(txq->r, 4096);
2306                                 pause("qflush", 1);
2307                         }
2308                         TXQ_LOCK(txq);
2309                         txq->eq.flags &= ~EQ_QFLUSH;
2310                         TXQ_UNLOCK(txq);
2311                 }
2312         }
2313         if_qflush(ifp);
2314 }
2315
2316 static uint64_t
2317 vi_get_counter(struct ifnet *ifp, ift_counter c)
2318 {
2319         struct vi_info *vi = ifp->if_softc;
2320         struct fw_vi_stats_vf *s = &vi->stats;
2321
2322         vi_refresh_stats(vi->adapter, vi);
2323
2324         switch (c) {
2325         case IFCOUNTER_IPACKETS:
2326                 return (s->rx_bcast_frames + s->rx_mcast_frames +
2327                     s->rx_ucast_frames);
2328         case IFCOUNTER_IERRORS:
2329                 return (s->rx_err_frames);
2330         case IFCOUNTER_OPACKETS:
2331                 return (s->tx_bcast_frames + s->tx_mcast_frames +
2332                     s->tx_ucast_frames + s->tx_offload_frames);
2333         case IFCOUNTER_OERRORS:
2334                 return (s->tx_drop_frames);
2335         case IFCOUNTER_IBYTES:
2336                 return (s->rx_bcast_bytes + s->rx_mcast_bytes +
2337                     s->rx_ucast_bytes);
2338         case IFCOUNTER_OBYTES:
2339                 return (s->tx_bcast_bytes + s->tx_mcast_bytes +
2340                     s->tx_ucast_bytes + s->tx_offload_bytes);
2341         case IFCOUNTER_IMCASTS:
2342                 return (s->rx_mcast_frames);
2343         case IFCOUNTER_OMCASTS:
2344                 return (s->tx_mcast_frames);
2345         case IFCOUNTER_OQDROPS: {
2346                 uint64_t drops;
2347
2348                 drops = 0;
2349                 if (vi->flags & VI_INIT_DONE) {
2350                         int i;
2351                         struct sge_txq *txq;
2352
2353                         for_each_txq(vi, i, txq)
2354                                 drops += counter_u64_fetch(txq->r->dropped);
2355                 }
2356
2357                 return (drops);
2358
2359         }
2360
2361         default:
2362                 return (if_get_counter_default(ifp, c));
2363         }
2364 }
2365
2366 uint64_t
2367 cxgbe_get_counter(struct ifnet *ifp, ift_counter c)
2368 {
2369         struct vi_info *vi = ifp->if_softc;
2370         struct port_info *pi = vi->pi;
2371         struct adapter *sc = pi->adapter;
2372         struct port_stats *s = &pi->stats;
2373
2374         if (pi->nvi > 1 || sc->flags & IS_VF)
2375                 return (vi_get_counter(ifp, c));
2376
2377         cxgbe_refresh_stats(sc, pi);
2378
2379         switch (c) {
2380         case IFCOUNTER_IPACKETS:
2381                 return (s->rx_frames);
2382
2383         case IFCOUNTER_IERRORS:
2384                 return (s->rx_jabber + s->rx_runt + s->rx_too_long +
2385                     s->rx_fcs_err + s->rx_len_err);
2386
2387         case IFCOUNTER_OPACKETS:
2388                 return (s->tx_frames);
2389
2390         case IFCOUNTER_OERRORS:
2391                 return (s->tx_error_frames);
2392
2393         case IFCOUNTER_IBYTES:
2394                 return (s->rx_octets);
2395
2396         case IFCOUNTER_OBYTES:
2397                 return (s->tx_octets);
2398
2399         case IFCOUNTER_IMCASTS:
2400                 return (s->rx_mcast_frames);
2401
2402         case IFCOUNTER_OMCASTS:
2403                 return (s->tx_mcast_frames);
2404
2405         case IFCOUNTER_IQDROPS:
2406                 return (s->rx_ovflow0 + s->rx_ovflow1 + s->rx_ovflow2 +
2407                     s->rx_ovflow3 + s->rx_trunc0 + s->rx_trunc1 + s->rx_trunc2 +
2408                     s->rx_trunc3 + pi->tnl_cong_drops);
2409
2410         case IFCOUNTER_OQDROPS: {
2411                 uint64_t drops;
2412
2413                 drops = s->tx_drop;
2414                 if (vi->flags & VI_INIT_DONE) {
2415                         int i;
2416                         struct sge_txq *txq;
2417
2418                         for_each_txq(vi, i, txq)
2419                                 drops += counter_u64_fetch(txq->r->dropped);
2420                 }
2421
2422                 return (drops);
2423
2424         }
2425
2426         default:
2427                 return (if_get_counter_default(ifp, c));
2428         }
2429 }
2430
2431 #if defined(KERN_TLS) || defined(RATELIMIT)
2432 static int
2433 cxgbe_snd_tag_alloc(struct ifnet *ifp, union if_snd_tag_alloc_params *params,
2434     struct m_snd_tag **pt)
2435 {
2436         int error;
2437
2438         switch (params->hdr.type) {
2439 #ifdef RATELIMIT
2440         case IF_SND_TAG_TYPE_RATE_LIMIT:
2441                 error = cxgbe_rate_tag_alloc(ifp, params, pt);
2442                 break;
2443 #endif
2444 #ifdef KERN_TLS
2445         case IF_SND_TAG_TYPE_TLS:
2446                 error = cxgbe_tls_tag_alloc(ifp, params, pt);
2447                 break;
2448 #endif
2449         default:
2450                 error = EOPNOTSUPP;
2451         }
2452         return (error);
2453 }
2454
2455 static int
2456 cxgbe_snd_tag_modify(struct m_snd_tag *mst,
2457     union if_snd_tag_modify_params *params)
2458 {
2459
2460         switch (mst->type) {
2461 #ifdef RATELIMIT
2462         case IF_SND_TAG_TYPE_RATE_LIMIT:
2463                 return (cxgbe_rate_tag_modify(mst, params));
2464 #endif
2465         default:
2466                 return (EOPNOTSUPP);
2467         }
2468 }
2469
2470 static int
2471 cxgbe_snd_tag_query(struct m_snd_tag *mst,
2472     union if_snd_tag_query_params *params)
2473 {
2474
2475         switch (mst->type) {
2476 #ifdef RATELIMIT
2477         case IF_SND_TAG_TYPE_RATE_LIMIT:
2478                 return (cxgbe_rate_tag_query(mst, params));
2479 #endif
2480         default:
2481                 return (EOPNOTSUPP);
2482         }
2483 }
2484
2485 static void
2486 cxgbe_snd_tag_free(struct m_snd_tag *mst)
2487 {
2488
2489         switch (mst->type) {
2490 #ifdef RATELIMIT
2491         case IF_SND_TAG_TYPE_RATE_LIMIT:
2492                 cxgbe_rate_tag_free(mst);
2493                 return;
2494 #endif
2495 #ifdef KERN_TLS
2496         case IF_SND_TAG_TYPE_TLS:
2497                 cxgbe_tls_tag_free(mst);
2498                 return;
2499 #endif
2500         default:
2501                 panic("shouldn't get here");
2502         }
2503 }
2504 #endif
2505
2506 /*
2507  * The kernel picks a media from the list we had provided but we still validate
2508  * the requeste.
2509  */
2510 int
2511 cxgbe_media_change(struct ifnet *ifp)
2512 {
2513         struct vi_info *vi = ifp->if_softc;
2514         struct port_info *pi = vi->pi;
2515         struct ifmedia *ifm = &pi->media;
2516         struct link_config *lc = &pi->link_cfg;
2517         struct adapter *sc = pi->adapter;
2518         int rc;
2519
2520         rc = begin_synchronized_op(sc, NULL, SLEEP_OK | INTR_OK, "t4mec");
2521         if (rc != 0)
2522                 return (rc);
2523         PORT_LOCK(pi);
2524         if (IFM_SUBTYPE(ifm->ifm_media) == IFM_AUTO) {
2525                 /* ifconfig .. media autoselect */
2526                 if (!(lc->pcaps & FW_PORT_CAP32_ANEG)) {
2527                         rc = ENOTSUP; /* AN not supported by transceiver */
2528                         goto done;
2529                 }
2530                 lc->requested_aneg = AUTONEG_ENABLE;
2531                 lc->requested_speed = 0;
2532                 lc->requested_fc |= PAUSE_AUTONEG;
2533         } else {
2534                 lc->requested_aneg = AUTONEG_DISABLE;
2535                 lc->requested_speed =
2536                     ifmedia_baudrate(ifm->ifm_media) / 1000000;
2537                 lc->requested_fc = 0;
2538                 if (IFM_OPTIONS(ifm->ifm_media) & IFM_ETH_RXPAUSE)
2539                         lc->requested_fc |= PAUSE_RX;
2540                 if (IFM_OPTIONS(ifm->ifm_media) & IFM_ETH_TXPAUSE)
2541                         lc->requested_fc |= PAUSE_TX;
2542         }
2543         if (pi->up_vis > 0) {
2544                 fixup_link_config(pi);
2545                 rc = apply_link_config(pi);
2546         }
2547 done:
2548         PORT_UNLOCK(pi);
2549         end_synchronized_op(sc, 0);
2550         return (rc);
2551 }
2552
2553 /*
2554  * Base media word (without ETHER, pause, link active, etc.) for the port at the
2555  * given speed.
2556  */
2557 static int
2558 port_mword(struct port_info *pi, uint32_t speed)
2559 {
2560
2561         MPASS(speed & M_FW_PORT_CAP32_SPEED);
2562         MPASS(powerof2(speed));
2563
2564         switch(pi->port_type) {
2565         case FW_PORT_TYPE_BT_SGMII:
2566         case FW_PORT_TYPE_BT_XFI:
2567         case FW_PORT_TYPE_BT_XAUI:
2568                 /* BaseT */
2569                 switch (speed) {
2570                 case FW_PORT_CAP32_SPEED_100M:
2571                         return (IFM_100_T);
2572                 case FW_PORT_CAP32_SPEED_1G:
2573                         return (IFM_1000_T);
2574                 case FW_PORT_CAP32_SPEED_10G:
2575                         return (IFM_10G_T);
2576                 }
2577                 break;
2578         case FW_PORT_TYPE_KX4:
2579                 if (speed == FW_PORT_CAP32_SPEED_10G)
2580                         return (IFM_10G_KX4);
2581                 break;
2582         case FW_PORT_TYPE_CX4:
2583                 if (speed == FW_PORT_CAP32_SPEED_10G)
2584                         return (IFM_10G_CX4);
2585                 break;
2586         case FW_PORT_TYPE_KX:
2587                 if (speed == FW_PORT_CAP32_SPEED_1G)
2588                         return (IFM_1000_KX);
2589                 break;
2590         case FW_PORT_TYPE_KR:
2591         case FW_PORT_TYPE_BP_AP:
2592         case FW_PORT_TYPE_BP4_AP:
2593         case FW_PORT_TYPE_BP40_BA:
2594         case FW_PORT_TYPE_KR4_100G:
2595         case FW_PORT_TYPE_KR_SFP28:
2596         case FW_PORT_TYPE_KR_XLAUI:
2597                 switch (speed) {
2598                 case FW_PORT_CAP32_SPEED_1G:
2599                         return (IFM_1000_KX);
2600                 case FW_PORT_CAP32_SPEED_10G:
2601                         return (IFM_10G_KR);
2602                 case FW_PORT_CAP32_SPEED_25G:
2603                         return (IFM_25G_KR);
2604                 case FW_PORT_CAP32_SPEED_40G:
2605                         return (IFM_40G_KR4);
2606                 case FW_PORT_CAP32_SPEED_50G:
2607                         return (IFM_50G_KR2);
2608                 case FW_PORT_CAP32_SPEED_100G:
2609                         return (IFM_100G_KR4);
2610                 }
2611                 break;
2612         case FW_PORT_TYPE_FIBER_XFI:
2613         case FW_PORT_TYPE_FIBER_XAUI:
2614         case FW_PORT_TYPE_SFP:
2615         case FW_PORT_TYPE_QSFP_10G:
2616         case FW_PORT_TYPE_QSA:
2617         case FW_PORT_TYPE_QSFP:
2618         case FW_PORT_TYPE_CR4_QSFP:
2619         case FW_PORT_TYPE_CR_QSFP:
2620         case FW_PORT_TYPE_CR2_QSFP:
2621         case FW_PORT_TYPE_SFP28:
2622                 /* Pluggable transceiver */
2623                 switch (pi->mod_type) {
2624                 case FW_PORT_MOD_TYPE_LR:
2625                         switch (speed) {
2626                         case FW_PORT_CAP32_SPEED_1G:
2627                                 return (IFM_1000_LX);
2628                         case FW_PORT_CAP32_SPEED_10G:
2629                                 return (IFM_10G_LR);
2630                         case FW_PORT_CAP32_SPEED_25G:
2631                                 return (IFM_25G_LR);
2632                         case FW_PORT_CAP32_SPEED_40G:
2633                                 return (IFM_40G_LR4);
2634                         case FW_PORT_CAP32_SPEED_50G:
2635                                 return (IFM_50G_LR2);
2636                         case FW_PORT_CAP32_SPEED_100G:
2637                                 return (IFM_100G_LR4);
2638                         }
2639                         break;
2640                 case FW_PORT_MOD_TYPE_SR:
2641                         switch (speed) {
2642                         case FW_PORT_CAP32_SPEED_1G:
2643                                 return (IFM_1000_SX);
2644                         case FW_PORT_CAP32_SPEED_10G:
2645                                 return (IFM_10G_SR);
2646                         case FW_PORT_CAP32_SPEED_25G:
2647                                 return (IFM_25G_SR);
2648                         case FW_PORT_CAP32_SPEED_40G:
2649                                 return (IFM_40G_SR4);
2650                         case FW_PORT_CAP32_SPEED_50G:
2651                                 return (IFM_50G_SR2);
2652                         case FW_PORT_CAP32_SPEED_100G:
2653                                 return (IFM_100G_SR4);
2654                         }
2655                         break;
2656                 case FW_PORT_MOD_TYPE_ER:
2657                         if (speed == FW_PORT_CAP32_SPEED_10G)
2658                                 return (IFM_10G_ER);
2659                         break;
2660                 case FW_PORT_MOD_TYPE_TWINAX_PASSIVE:
2661                 case FW_PORT_MOD_TYPE_TWINAX_ACTIVE:
2662                         switch (speed) {
2663                         case FW_PORT_CAP32_SPEED_1G:
2664                                 return (IFM_1000_CX);
2665                         case FW_PORT_CAP32_SPEED_10G:
2666                                 return (IFM_10G_TWINAX);
2667                         case FW_PORT_CAP32_SPEED_25G:
2668                                 return (IFM_25G_CR);
2669                         case FW_PORT_CAP32_SPEED_40G:
2670                                 return (IFM_40G_CR4);
2671                         case FW_PORT_CAP32_SPEED_50G:
2672                                 return (IFM_50G_CR2);
2673                         case FW_PORT_CAP32_SPEED_100G:
2674                                 return (IFM_100G_CR4);
2675                         }
2676                         break;
2677                 case FW_PORT_MOD_TYPE_LRM:
2678                         if (speed == FW_PORT_CAP32_SPEED_10G)
2679                                 return (IFM_10G_LRM);
2680                         break;
2681                 case FW_PORT_MOD_TYPE_NA:
2682                         MPASS(0);       /* Not pluggable? */
2683                         /* fall throough */
2684                 case FW_PORT_MOD_TYPE_ERROR:
2685                 case FW_PORT_MOD_TYPE_UNKNOWN:
2686                 case FW_PORT_MOD_TYPE_NOTSUPPORTED:
2687                         break;
2688                 case FW_PORT_MOD_TYPE_NONE:
2689                         return (IFM_NONE);
2690                 }
2691                 break;
2692         case FW_PORT_TYPE_NONE:
2693                 return (IFM_NONE);
2694         }
2695
2696         return (IFM_UNKNOWN);
2697 }
2698
2699 void
2700 cxgbe_media_status(struct ifnet *ifp, struct ifmediareq *ifmr)
2701 {
2702         struct vi_info *vi = ifp->if_softc;
2703         struct port_info *pi = vi->pi;
2704         struct adapter *sc = pi->adapter;
2705         struct link_config *lc = &pi->link_cfg;
2706
2707         if (begin_synchronized_op(sc, NULL, SLEEP_OK | INTR_OK, "t4med") != 0)
2708                 return;
2709         PORT_LOCK(pi);
2710
2711         if (pi->up_vis == 0) {
2712                 /*
2713                  * If all the interfaces are administratively down the firmware
2714                  * does not report transceiver changes.  Refresh port info here
2715                  * so that ifconfig displays accurate ifmedia at all times.
2716                  * This is the only reason we have a synchronized op in this
2717                  * function.  Just PORT_LOCK would have been enough otherwise.
2718                  */
2719                 t4_update_port_info(pi);
2720                 build_medialist(pi);
2721         }
2722
2723         /* ifm_status */
2724         ifmr->ifm_status = IFM_AVALID;
2725         if (lc->link_ok == false)
2726                 goto done;
2727         ifmr->ifm_status |= IFM_ACTIVE;
2728
2729         /* ifm_active */
2730         ifmr->ifm_active = IFM_ETHER | IFM_FDX;
2731         ifmr->ifm_active &= ~(IFM_ETH_TXPAUSE | IFM_ETH_RXPAUSE);
2732         if (lc->fc & PAUSE_RX)
2733                 ifmr->ifm_active |= IFM_ETH_RXPAUSE;
2734         if (lc->fc & PAUSE_TX)
2735                 ifmr->ifm_active |= IFM_ETH_TXPAUSE;
2736         ifmr->ifm_active |= port_mword(pi, speed_to_fwcap(lc->speed));
2737 done:
2738         PORT_UNLOCK(pi);
2739         end_synchronized_op(sc, 0);
2740 }
2741
2742 static int
2743 vcxgbe_probe(device_t dev)
2744 {
2745         char buf[128];
2746         struct vi_info *vi = device_get_softc(dev);
2747
2748         snprintf(buf, sizeof(buf), "port %d vi %td", vi->pi->port_id,
2749             vi - vi->pi->vi);
2750         device_set_desc_copy(dev, buf);
2751
2752         return (BUS_PROBE_DEFAULT);
2753 }
2754
2755 static int
2756 alloc_extra_vi(struct adapter *sc, struct port_info *pi, struct vi_info *vi)
2757 {
2758         int func, index, rc;
2759         uint32_t param, val;
2760
2761         ASSERT_SYNCHRONIZED_OP(sc);
2762
2763         index = vi - pi->vi;
2764         MPASS(index > 0);       /* This function deals with _extra_ VIs only */
2765         KASSERT(index < nitems(vi_mac_funcs),
2766             ("%s: VI %s doesn't have a MAC func", __func__,
2767             device_get_nameunit(vi->dev)));
2768         func = vi_mac_funcs[index];
2769         rc = t4_alloc_vi_func(sc, sc->mbox, pi->tx_chan, sc->pf, 0, 1,
2770             vi->hw_addr, &vi->rss_size, &vi->vfvld, &vi->vin, func, 0);
2771         if (rc < 0) {
2772                 device_printf(vi->dev, "failed to allocate virtual interface %d"
2773                     "for port %d: %d\n", index, pi->port_id, -rc);
2774                 return (-rc);
2775         }
2776         vi->viid = rc;
2777
2778         if (vi->rss_size == 1) {
2779                 /*
2780                  * This VI didn't get a slice of the RSS table.  Reduce the
2781                  * number of VIs being created (hw.cxgbe.num_vis) or modify the
2782                  * configuration file (nvi, rssnvi for this PF) if this is a
2783                  * problem.
2784                  */
2785                 device_printf(vi->dev, "RSS table not available.\n");
2786                 vi->rss_base = 0xffff;
2787
2788                 return (0);
2789         }
2790
2791         param = V_FW_PARAMS_MNEM(FW_PARAMS_MNEM_DEV) |
2792             V_FW_PARAMS_PARAM_X(FW_PARAMS_PARAM_DEV_RSSINFO) |
2793             V_FW_PARAMS_PARAM_YZ(vi->viid);
2794         rc = t4_query_params(sc, sc->mbox, sc->pf, 0, 1, &param, &val);
2795         if (rc)
2796                 vi->rss_base = 0xffff;
2797         else {
2798                 MPASS((val >> 16) == vi->rss_size);
2799                 vi->rss_base = val & 0xffff;
2800         }
2801
2802         return (0);
2803 }
2804
2805 static int
2806 vcxgbe_attach(device_t dev)
2807 {
2808         struct vi_info *vi;
2809         struct port_info *pi;
2810         struct adapter *sc;
2811         int rc;
2812
2813         vi = device_get_softc(dev);
2814         pi = vi->pi;
2815         sc = pi->adapter;
2816
2817         rc = begin_synchronized_op(sc, vi, SLEEP_OK | INTR_OK, "t4via");
2818         if (rc)
2819                 return (rc);
2820         rc = alloc_extra_vi(sc, pi, vi);
2821         end_synchronized_op(sc, 0);
2822         if (rc)
2823                 return (rc);
2824
2825         rc = cxgbe_vi_attach(dev, vi);
2826         if (rc) {
2827                 t4_free_vi(sc, sc->mbox, sc->pf, 0, vi->viid);
2828                 return (rc);
2829         }
2830         return (0);
2831 }
2832
2833 static int
2834 vcxgbe_detach(device_t dev)
2835 {
2836         struct vi_info *vi;
2837         struct adapter *sc;
2838
2839         vi = device_get_softc(dev);
2840         sc = vi->adapter;
2841
2842         doom_vi(sc, vi);
2843
2844         cxgbe_vi_detach(vi);
2845         t4_free_vi(sc, sc->mbox, sc->pf, 0, vi->viid);
2846
2847         end_synchronized_op(sc, 0);
2848
2849         return (0);
2850 }
2851
2852 static struct callout fatal_callout;
2853
2854 static void
2855 delayed_panic(void *arg)
2856 {
2857         struct adapter *sc = arg;
2858
2859         panic("%s: panic on fatal error", device_get_nameunit(sc->dev));
2860 }
2861
2862 void
2863 t4_fatal_err(struct adapter *sc, bool fw_error)
2864 {
2865
2866         t4_shutdown_adapter(sc);
2867         log(LOG_ALERT, "%s: encountered fatal error, adapter stopped.\n",
2868             device_get_nameunit(sc->dev));
2869         if (fw_error) {
2870                 ASSERT_SYNCHRONIZED_OP(sc);
2871                 sc->flags |= ADAP_ERR;
2872         } else {
2873                 ADAPTER_LOCK(sc);
2874                 sc->flags |= ADAP_ERR;
2875                 ADAPTER_UNLOCK(sc);
2876         }
2877 #ifdef TCP_OFFLOAD
2878         taskqueue_enqueue(taskqueue_thread, &sc->async_event_task);
2879 #endif
2880
2881         if (t4_panic_on_fatal_err) {
2882                 log(LOG_ALERT, "%s: panic on fatal error after 30s",
2883                     device_get_nameunit(sc->dev));
2884                 callout_reset(&fatal_callout, hz * 30, delayed_panic, sc);
2885         }
2886 }
2887
2888 void
2889 t4_add_adapter(struct adapter *sc)
2890 {
2891         sx_xlock(&t4_list_lock);
2892         SLIST_INSERT_HEAD(&t4_list, sc, link);
2893         sx_xunlock(&t4_list_lock);
2894 }
2895
2896 int
2897 t4_map_bars_0_and_4(struct adapter *sc)
2898 {
2899         sc->regs_rid = PCIR_BAR(0);
2900         sc->regs_res = bus_alloc_resource_any(sc->dev, SYS_RES_MEMORY,
2901             &sc->regs_rid, RF_ACTIVE);
2902         if (sc->regs_res == NULL) {
2903                 device_printf(sc->dev, "cannot map registers.\n");
2904                 return (ENXIO);
2905         }
2906         sc->bt = rman_get_bustag(sc->regs_res);
2907         sc->bh = rman_get_bushandle(sc->regs_res);
2908         sc->mmio_len = rman_get_size(sc->regs_res);
2909         setbit(&sc->doorbells, DOORBELL_KDB);
2910
2911         sc->msix_rid = PCIR_BAR(4);
2912         sc->msix_res = bus_alloc_resource_any(sc->dev, SYS_RES_MEMORY,
2913             &sc->msix_rid, RF_ACTIVE);
2914         if (sc->msix_res == NULL) {
2915                 device_printf(sc->dev, "cannot map MSI-X BAR.\n");
2916                 return (ENXIO);
2917         }
2918
2919         return (0);
2920 }
2921
2922 int
2923 t4_map_bar_2(struct adapter *sc)
2924 {
2925
2926         /*
2927          * T4: only iWARP driver uses the userspace doorbells.  There is no need
2928          * to map it if RDMA is disabled.
2929          */
2930         if (is_t4(sc) && sc->rdmacaps == 0)
2931                 return (0);
2932
2933         sc->udbs_rid = PCIR_BAR(2);
2934         sc->udbs_res = bus_alloc_resource_any(sc->dev, SYS_RES_MEMORY,
2935             &sc->udbs_rid, RF_ACTIVE);
2936         if (sc->udbs_res == NULL) {
2937                 device_printf(sc->dev, "cannot map doorbell BAR.\n");
2938                 return (ENXIO);
2939         }
2940         sc->udbs_base = rman_get_virtual(sc->udbs_res);
2941
2942         if (chip_id(sc) >= CHELSIO_T5) {
2943                 setbit(&sc->doorbells, DOORBELL_UDB);
2944 #if defined(__i386__) || defined(__amd64__)
2945                 if (t5_write_combine) {
2946                         int rc, mode;
2947
2948                         /*
2949                          * Enable write combining on BAR2.  This is the
2950                          * userspace doorbell BAR and is split into 128B
2951                          * (UDBS_SEG_SIZE) doorbell regions, each associated
2952                          * with an egress queue.  The first 64B has the doorbell
2953                          * and the second 64B can be used to submit a tx work
2954                          * request with an implicit doorbell.
2955                          */
2956
2957                         rc = pmap_change_attr((vm_offset_t)sc->udbs_base,
2958                             rman_get_size(sc->udbs_res), PAT_WRITE_COMBINING);
2959                         if (rc == 0) {
2960                                 clrbit(&sc->doorbells, DOORBELL_UDB);
2961                                 setbit(&sc->doorbells, DOORBELL_WCWR);
2962                                 setbit(&sc->doorbells, DOORBELL_UDBWC);
2963                         } else {
2964                                 device_printf(sc->dev,
2965                                     "couldn't enable write combining: %d\n",
2966                                     rc);
2967                         }
2968
2969                         mode = is_t5(sc) ? V_STATMODE(0) : V_T6_STATMODE(0);
2970                         t4_write_reg(sc, A_SGE_STAT_CFG,
2971                             V_STATSOURCE_T5(7) | mode);
2972                 }
2973 #endif
2974         }
2975         sc->iwt.wc_en = isset(&sc->doorbells, DOORBELL_UDBWC) ? 1 : 0;
2976
2977         return (0);
2978 }
2979
2980 struct memwin_init {
2981         uint32_t base;
2982         uint32_t aperture;
2983 };
2984
2985 static const struct memwin_init t4_memwin[NUM_MEMWIN] = {
2986         { MEMWIN0_BASE, MEMWIN0_APERTURE },
2987         { MEMWIN1_BASE, MEMWIN1_APERTURE },
2988         { MEMWIN2_BASE_T4, MEMWIN2_APERTURE_T4 }
2989 };
2990
2991 static const struct memwin_init t5_memwin[NUM_MEMWIN] = {
2992         { MEMWIN0_BASE, MEMWIN0_APERTURE },
2993         { MEMWIN1_BASE, MEMWIN1_APERTURE },
2994         { MEMWIN2_BASE_T5, MEMWIN2_APERTURE_T5 },
2995 };
2996
2997 static void
2998 setup_memwin(struct adapter *sc)
2999 {
3000         const struct memwin_init *mw_init;
3001         struct memwin *mw;
3002         int i;
3003         uint32_t bar0;
3004
3005         if (is_t4(sc)) {
3006                 /*
3007                  * Read low 32b of bar0 indirectly via the hardware backdoor
3008                  * mechanism.  Works from within PCI passthrough environments
3009                  * too, where rman_get_start() can return a different value.  We
3010                  * need to program the T4 memory window decoders with the actual
3011                  * addresses that will be coming across the PCIe link.
3012                  */
3013                 bar0 = t4_hw_pci_read_cfg4(sc, PCIR_BAR(0));
3014                 bar0 &= (uint32_t) PCIM_BAR_MEM_BASE;
3015
3016                 mw_init = &t4_memwin[0];
3017         } else {
3018                 /* T5+ use the relative offset inside the PCIe BAR */
3019                 bar0 = 0;
3020
3021                 mw_init = &t5_memwin[0];
3022         }
3023
3024         for (i = 0, mw = &sc->memwin[0]; i < NUM_MEMWIN; i++, mw_init++, mw++) {
3025                 rw_init(&mw->mw_lock, "memory window access");
3026                 mw->mw_base = mw_init->base;
3027                 mw->mw_aperture = mw_init->aperture;
3028                 mw->mw_curpos = 0;
3029                 t4_write_reg(sc,
3030                     PCIE_MEM_ACCESS_REG(A_PCIE_MEM_ACCESS_BASE_WIN, i),
3031                     (mw->mw_base + bar0) | V_BIR(0) |
3032                     V_WINDOW(ilog2(mw->mw_aperture) - 10));
3033                 rw_wlock(&mw->mw_lock);
3034                 position_memwin(sc, i, 0);
3035                 rw_wunlock(&mw->mw_lock);
3036         }
3037
3038         /* flush */
3039         t4_read_reg(sc, PCIE_MEM_ACCESS_REG(A_PCIE_MEM_ACCESS_BASE_WIN, 2));
3040 }
3041
3042 /*
3043  * Positions the memory window at the given address in the card's address space.
3044  * There are some alignment requirements and the actual position may be at an
3045  * address prior to the requested address.  mw->mw_curpos always has the actual
3046  * position of the window.
3047  */
3048 static void
3049 position_memwin(struct adapter *sc, int idx, uint32_t addr)
3050 {
3051         struct memwin *mw;
3052         uint32_t pf;
3053         uint32_t reg;
3054
3055         MPASS(idx >= 0 && idx < NUM_MEMWIN);
3056         mw = &sc->memwin[idx];
3057         rw_assert(&mw->mw_lock, RA_WLOCKED);
3058
3059         if (is_t4(sc)) {
3060                 pf = 0;
3061                 mw->mw_curpos = addr & ~0xf;    /* start must be 16B aligned */
3062         } else {
3063                 pf = V_PFNUM(sc->pf);
3064                 mw->mw_curpos = addr & ~0x7f;   /* start must be 128B aligned */
3065         }
3066         reg = PCIE_MEM_ACCESS_REG(A_PCIE_MEM_ACCESS_OFFSET, idx);
3067         t4_write_reg(sc, reg, mw->mw_curpos | pf);
3068         t4_read_reg(sc, reg);   /* flush */
3069 }
3070
3071 int
3072 rw_via_memwin(struct adapter *sc, int idx, uint32_t addr, uint32_t *val,
3073     int len, int rw)
3074 {
3075         struct memwin *mw;
3076         uint32_t mw_end, v;
3077
3078         MPASS(idx >= 0 && idx < NUM_MEMWIN);
3079
3080         /* Memory can only be accessed in naturally aligned 4 byte units */
3081         if (addr & 3 || len & 3 || len <= 0)
3082                 return (EINVAL);
3083
3084         mw = &sc->memwin[idx];
3085         while (len > 0) {
3086                 rw_rlock(&mw->mw_lock);
3087                 mw_end = mw->mw_curpos + mw->mw_aperture;
3088                 if (addr >= mw_end || addr < mw->mw_curpos) {
3089                         /* Will need to reposition the window */
3090                         if (!rw_try_upgrade(&mw->mw_lock)) {
3091                                 rw_runlock(&mw->mw_lock);
3092                                 rw_wlock(&mw->mw_lock);
3093                         }
3094                         rw_assert(&mw->mw_lock, RA_WLOCKED);
3095                         position_memwin(sc, idx, addr);
3096                         rw_downgrade(&mw->mw_lock);
3097                         mw_end = mw->mw_curpos + mw->mw_aperture;
3098                 }
3099                 rw_assert(&mw->mw_lock, RA_RLOCKED);
3100                 while (addr < mw_end && len > 0) {
3101                         if (rw == 0) {
3102                                 v = t4_read_reg(sc, mw->mw_base + addr -
3103                                     mw->mw_curpos);
3104                                 *val++ = le32toh(v);
3105                         } else {
3106                                 v = *val++;
3107                                 t4_write_reg(sc, mw->mw_base + addr -
3108                                     mw->mw_curpos, htole32(v));
3109                         }
3110                         addr += 4;
3111                         len -= 4;
3112                 }
3113                 rw_runlock(&mw->mw_lock);
3114         }
3115
3116         return (0);
3117 }
3118
3119 static void
3120 t4_init_atid_table(struct adapter *sc)
3121 {
3122         struct tid_info *t;
3123         int i;
3124
3125         t = &sc->tids;
3126         if (t->natids == 0)
3127                 return;
3128
3129         MPASS(t->atid_tab == NULL);
3130
3131         t->atid_tab = malloc(t->natids * sizeof(*t->atid_tab), M_CXGBE,
3132             M_ZERO | M_WAITOK);
3133         mtx_init(&t->atid_lock, "atid lock", NULL, MTX_DEF);
3134         t->afree = t->atid_tab;
3135         t->atids_in_use = 0;
3136         for (i = 1; i < t->natids; i++)
3137                 t->atid_tab[i - 1].next = &t->atid_tab[i];
3138         t->atid_tab[t->natids - 1].next = NULL;
3139 }
3140
3141 static void
3142 t4_free_atid_table(struct adapter *sc)
3143 {
3144         struct tid_info *t;
3145
3146         t = &sc->tids;
3147
3148         KASSERT(t->atids_in_use == 0,
3149             ("%s: %d atids still in use.", __func__, t->atids_in_use));
3150
3151         if (mtx_initialized(&t->atid_lock))
3152                 mtx_destroy(&t->atid_lock);
3153         free(t->atid_tab, M_CXGBE);
3154         t->atid_tab = NULL;
3155 }
3156
3157 int
3158 alloc_atid(struct adapter *sc, void *ctx)
3159 {
3160         struct tid_info *t = &sc->tids;
3161         int atid = -1;
3162
3163         mtx_lock(&t->atid_lock);
3164         if (t->afree) {
3165                 union aopen_entry *p = t->afree;
3166
3167                 atid = p - t->atid_tab;
3168                 MPASS(atid <= M_TID_TID);
3169                 t->afree = p->next;
3170                 p->data = ctx;
3171                 t->atids_in_use++;
3172         }
3173         mtx_unlock(&t->atid_lock);
3174         return (atid);
3175 }
3176
3177 void *
3178 lookup_atid(struct adapter *sc, int atid)
3179 {
3180         struct tid_info *t = &sc->tids;
3181
3182         return (t->atid_tab[atid].data);
3183 }
3184
3185 void
3186 free_atid(struct adapter *sc, int atid)
3187 {
3188         struct tid_info *t = &sc->tids;
3189         union aopen_entry *p = &t->atid_tab[atid];
3190
3191         mtx_lock(&t->atid_lock);
3192         p->next = t->afree;
3193         t->afree = p;
3194         t->atids_in_use--;
3195         mtx_unlock(&t->atid_lock);
3196 }
3197
3198 static void
3199 queue_tid_release(struct adapter *sc, int tid)
3200 {
3201
3202         CXGBE_UNIMPLEMENTED("deferred tid release");
3203 }
3204
3205 void
3206 release_tid(struct adapter *sc, int tid, struct sge_wrq *ctrlq)
3207 {
3208         struct wrqe *wr;
3209         struct cpl_tid_release *req;
3210
3211         wr = alloc_wrqe(sizeof(*req), ctrlq);
3212         if (wr == NULL) {
3213                 queue_tid_release(sc, tid);     /* defer */
3214                 return;
3215         }
3216         req = wrtod(wr);
3217
3218         INIT_TP_WR_MIT_CPL(req, CPL_TID_RELEASE, tid);
3219
3220         t4_wrq_tx(sc, wr);
3221 }
3222
3223 static int
3224 t4_range_cmp(const void *a, const void *b)
3225 {
3226         return ((const struct t4_range *)a)->start -
3227                ((const struct t4_range *)b)->start;
3228 }
3229
3230 /*
3231  * Verify that the memory range specified by the addr/len pair is valid within
3232  * the card's address space.
3233  */
3234 static int
3235 validate_mem_range(struct adapter *sc, uint32_t addr, uint32_t len)
3236 {
3237         struct t4_range mem_ranges[4], *r, *next;
3238         uint32_t em, addr_len;
3239         int i, n, remaining;
3240
3241         /* Memory can only be accessed in naturally aligned 4 byte units */
3242         if (addr & 3 || len & 3 || len == 0)
3243                 return (EINVAL);
3244
3245         /* Enabled memories */
3246         em = t4_read_reg(sc, A_MA_TARGET_MEM_ENABLE);
3247
3248         r = &mem_ranges[0];
3249         n = 0;
3250         bzero(r, sizeof(mem_ranges));
3251         if (em & F_EDRAM0_ENABLE) {
3252                 addr_len = t4_read_reg(sc, A_MA_EDRAM0_BAR);
3253                 r->size = G_EDRAM0_SIZE(addr_len) << 20;
3254                 if (r->size > 0) {
3255                         r->start = G_EDRAM0_BASE(addr_len) << 20;
3256                         if (addr >= r->start &&
3257                             addr + len <= r->start + r->size)
3258                                 return (0);
3259                         r++;
3260                         n++;
3261                 }
3262         }
3263         if (em & F_EDRAM1_ENABLE) {
3264                 addr_len = t4_read_reg(sc, A_MA_EDRAM1_BAR);
3265                 r->size = G_EDRAM1_SIZE(addr_len) << 20;
3266                 if (r->size > 0) {
3267                         r->start = G_EDRAM1_BASE(addr_len) << 20;
3268                         if (addr >= r->start &&
3269                             addr + len <= r->start + r->size)
3270                                 return (0);
3271                         r++;
3272                         n++;
3273                 }
3274         }
3275         if (em & F_EXT_MEM_ENABLE) {
3276                 addr_len = t4_read_reg(sc, A_MA_EXT_MEMORY_BAR);
3277                 r->size = G_EXT_MEM_SIZE(addr_len) << 20;
3278                 if (r->size > 0) {
3279                         r->start = G_EXT_MEM_BASE(addr_len) << 20;
3280                         if (addr >= r->start &&
3281                             addr + len <= r->start + r->size)
3282                                 return (0);
3283                         r++;
3284                         n++;
3285                 }
3286         }
3287         if (is_t5(sc) && em & F_EXT_MEM1_ENABLE) {
3288                 addr_len = t4_read_reg(sc, A_MA_EXT_MEMORY1_BAR);
3289                 r->size = G_EXT_MEM1_SIZE(addr_len) << 20;
3290                 if (r->size > 0) {
3291                         r->start = G_EXT_MEM1_BASE(addr_len) << 20;
3292                         if (addr >= r->start &&
3293                             addr + len <= r->start + r->size)
3294                                 return (0);
3295                         r++;
3296                         n++;
3297                 }
3298         }
3299         MPASS(n <= nitems(mem_ranges));
3300
3301         if (n > 1) {
3302                 /* Sort and merge the ranges. */
3303                 qsort(mem_ranges, n, sizeof(struct t4_range), t4_range_cmp);
3304
3305                 /* Start from index 0 and examine the next n - 1 entries. */
3306                 r = &mem_ranges[0];
3307                 for (remaining = n - 1; remaining > 0; remaining--, r++) {
3308
3309                         MPASS(r->size > 0);     /* r is a valid entry. */
3310                         next = r + 1;
3311                         MPASS(next->size > 0);  /* and so is the next one. */
3312
3313                         while (r->start + r->size >= next->start) {
3314                                 /* Merge the next one into the current entry. */
3315                                 r->size = max(r->start + r->size,
3316                                     next->start + next->size) - r->start;
3317                                 n--;    /* One fewer entry in total. */
3318                                 if (--remaining == 0)
3319                                         goto done;      /* short circuit */
3320                                 next++;
3321                         }
3322                         if (next != r + 1) {
3323                                 /*
3324                                  * Some entries were merged into r and next
3325                                  * points to the first valid entry that couldn't
3326                                  * be merged.
3327                                  */
3328                                 MPASS(next->size > 0);  /* must be valid */
3329                                 memcpy(r + 1, next, remaining * sizeof(*r));
3330 #ifdef INVARIANTS
3331                                 /*
3332                                  * This so that the foo->size assertion in the
3333                                  * next iteration of the loop do the right
3334                                  * thing for entries that were pulled up and are
3335                                  * no longer valid.
3336                                  */
3337                                 MPASS(n < nitems(mem_ranges));
3338                                 bzero(&mem_ranges[n], (nitems(mem_ranges) - n) *
3339                                     sizeof(struct t4_range));
3340 #endif
3341                         }
3342                 }
3343 done:
3344                 /* Done merging the ranges. */
3345                 MPASS(n > 0);
3346                 r = &mem_ranges[0];
3347                 for (i = 0; i < n; i++, r++) {
3348                         if (addr >= r->start &&
3349                             addr + len <= r->start + r->size)
3350                                 return (0);
3351                 }
3352         }
3353
3354         return (EFAULT);
3355 }
3356
3357 static int
3358 fwmtype_to_hwmtype(int mtype)
3359 {
3360
3361         switch (mtype) {
3362         case FW_MEMTYPE_EDC0:
3363                 return (MEM_EDC0);
3364         case FW_MEMTYPE_EDC1:
3365                 return (MEM_EDC1);
3366         case FW_MEMTYPE_EXTMEM:
3367                 return (MEM_MC0);
3368         case FW_MEMTYPE_EXTMEM1:
3369                 return (MEM_MC1);
3370         default:
3371                 panic("%s: cannot translate fw mtype %d.", __func__, mtype);
3372         }
3373 }
3374
3375 /*
3376  * Verify that the memory range specified by the memtype/offset/len pair is
3377  * valid and lies entirely within the memtype specified.  The global address of
3378  * the start of the range is returned in addr.
3379  */
3380 static int
3381 validate_mt_off_len(struct adapter *sc, int mtype, uint32_t off, uint32_t len,
3382     uint32_t *addr)
3383 {
3384         uint32_t em, addr_len, maddr;
3385
3386         /* Memory can only be accessed in naturally aligned 4 byte units */
3387         if (off & 3 || len & 3 || len == 0)
3388                 return (EINVAL);
3389
3390         em = t4_read_reg(sc, A_MA_TARGET_MEM_ENABLE);
3391         switch (fwmtype_to_hwmtype(mtype)) {
3392         case MEM_EDC0:
3393                 if (!(em & F_EDRAM0_ENABLE))
3394                         return (EINVAL);
3395                 addr_len = t4_read_reg(sc, A_MA_EDRAM0_BAR);
3396                 maddr = G_EDRAM0_BASE(addr_len) << 20;
3397                 break;
3398         case MEM_EDC1:
3399                 if (!(em & F_EDRAM1_ENABLE))
3400                         return (EINVAL);
3401                 addr_len = t4_read_reg(sc, A_MA_EDRAM1_BAR);
3402                 maddr = G_EDRAM1_BASE(addr_len) << 20;
3403                 break;
3404         case MEM_MC:
3405                 if (!(em & F_EXT_MEM_ENABLE))
3406                         return (EINVAL);
3407                 addr_len = t4_read_reg(sc, A_MA_EXT_MEMORY_BAR);
3408                 maddr = G_EXT_MEM_BASE(addr_len) << 20;
3409                 break;
3410         case MEM_MC1:
3411                 if (!is_t5(sc) || !(em & F_EXT_MEM1_ENABLE))
3412                         return (EINVAL);
3413                 addr_len = t4_read_reg(sc, A_MA_EXT_MEMORY1_BAR);
3414                 maddr = G_EXT_MEM1_BASE(addr_len) << 20;
3415                 break;
3416         default:
3417                 return (EINVAL);
3418         }
3419
3420         *addr = maddr + off;    /* global address */
3421         return (validate_mem_range(sc, *addr, len));
3422 }
3423
3424 static int
3425 fixup_devlog_params(struct adapter *sc)
3426 {
3427         struct devlog_params *dparams = &sc->params.devlog;
3428         int rc;
3429
3430         rc = validate_mt_off_len(sc, dparams->memtype, dparams->start,
3431             dparams->size, &dparams->addr);
3432
3433         return (rc);
3434 }
3435
3436 static void
3437 update_nirq(struct intrs_and_queues *iaq, int nports)
3438 {
3439
3440         iaq->nirq = T4_EXTRA_INTR;
3441         iaq->nirq += nports * max(iaq->nrxq, iaq->nnmrxq);
3442         iaq->nirq += nports * iaq->nofldrxq;
3443         iaq->nirq += nports * (iaq->num_vis - 1) *
3444             max(iaq->nrxq_vi, iaq->nnmrxq_vi);
3445         iaq->nirq += nports * (iaq->num_vis - 1) * iaq->nofldrxq_vi;
3446 }
3447
3448 /*
3449  * Adjust requirements to fit the number of interrupts available.
3450  */
3451 static void
3452 calculate_iaq(struct adapter *sc, struct intrs_and_queues *iaq, int itype,
3453     int navail)
3454 {
3455         int old_nirq;
3456         const int nports = sc->params.nports;
3457
3458         MPASS(nports > 0);
3459         MPASS(navail > 0);
3460
3461         bzero(iaq, sizeof(*iaq));
3462         iaq->intr_type = itype;
3463         iaq->num_vis = t4_num_vis;
3464         iaq->ntxq = t4_ntxq;
3465         iaq->ntxq_vi = t4_ntxq_vi;
3466         iaq->nrxq = t4_nrxq;
3467         iaq->nrxq_vi = t4_nrxq_vi;
3468 #if defined(TCP_OFFLOAD) || defined(RATELIMIT)
3469         if (is_offload(sc) || is_ethoffload(sc)) {
3470                 iaq->nofldtxq = t4_nofldtxq;
3471                 iaq->nofldtxq_vi = t4_nofldtxq_vi;
3472         }
3473 #endif
3474 #ifdef TCP_OFFLOAD
3475         if (is_offload(sc)) {
3476                 iaq->nofldrxq = t4_nofldrxq;
3477                 iaq->nofldrxq_vi = t4_nofldrxq_vi;
3478         }
3479 #endif
3480 #ifdef DEV_NETMAP
3481         if (t4_native_netmap & NN_MAIN_VI) {
3482                 iaq->nnmtxq = t4_nnmtxq;
3483                 iaq->nnmrxq = t4_nnmrxq;
3484         }
3485         if (t4_native_netmap & NN_EXTRA_VI) {
3486                 iaq->nnmtxq_vi = t4_nnmtxq_vi;
3487                 iaq->nnmrxq_vi = t4_nnmrxq_vi;
3488         }
3489 #endif
3490
3491         update_nirq(iaq, nports);
3492         if (iaq->nirq <= navail &&
3493             (itype != INTR_MSI || powerof2(iaq->nirq))) {
3494                 /*
3495                  * This is the normal case -- there are enough interrupts for
3496                  * everything.
3497                  */
3498                 goto done;
3499         }
3500
3501         /*
3502          * If extra VIs have been configured try reducing their count and see if
3503          * that works.
3504          */
3505         while (iaq->num_vis > 1) {
3506                 iaq->num_vis--;
3507                 update_nirq(iaq, nports);
3508                 if (iaq->nirq <= navail &&
3509                     (itype != INTR_MSI || powerof2(iaq->nirq))) {
3510                         device_printf(sc->dev, "virtual interfaces per port "
3511                             "reduced to %d from %d.  nrxq=%u, nofldrxq=%u, "
3512                             "nrxq_vi=%u nofldrxq_vi=%u, nnmrxq_vi=%u.  "
3513                             "itype %d, navail %u, nirq %d.\n",
3514                             iaq->num_vis, t4_num_vis, iaq->nrxq, iaq->nofldrxq,
3515                             iaq->nrxq_vi, iaq->nofldrxq_vi, iaq->nnmrxq_vi,
3516                             itype, navail, iaq->nirq);
3517                         goto done;
3518                 }
3519         }
3520
3521         /*
3522          * Extra VIs will not be created.  Log a message if they were requested.
3523          */
3524         MPASS(iaq->num_vis == 1);
3525         iaq->ntxq_vi = iaq->nrxq_vi = 0;
3526         iaq->nofldtxq_vi = iaq->nofldrxq_vi = 0;
3527         iaq->nnmtxq_vi = iaq->nnmrxq_vi = 0;
3528         if (iaq->num_vis != t4_num_vis) {
3529                 device_printf(sc->dev, "extra virtual interfaces disabled.  "
3530                     "nrxq=%u, nofldrxq=%u, nrxq_vi=%u nofldrxq_vi=%u, "
3531                     "nnmrxq_vi=%u.  itype %d, navail %u, nirq %d.\n",
3532                     iaq->nrxq, iaq->nofldrxq, iaq->nrxq_vi, iaq->nofldrxq_vi,
3533                     iaq->nnmrxq_vi, itype, navail, iaq->nirq);
3534         }
3535
3536         /*
3537          * Keep reducing the number of NIC rx queues to the next lower power of
3538          * 2 (for even RSS distribution) and halving the TOE rx queues and see
3539          * if that works.
3540          */
3541         do {
3542                 if (iaq->nrxq > 1) {
3543                         do {
3544                                 iaq->nrxq--;
3545                         } while (!powerof2(iaq->nrxq));
3546                         if (iaq->nnmrxq > iaq->nrxq)
3547                                 iaq->nnmrxq = iaq->nrxq;
3548                 }
3549                 if (iaq->nofldrxq > 1)
3550                         iaq->nofldrxq >>= 1;
3551
3552                 old_nirq = iaq->nirq;
3553                 update_nirq(iaq, nports);
3554                 if (iaq->nirq <= navail &&
3555                     (itype != INTR_MSI || powerof2(iaq->nirq))) {
3556                         device_printf(sc->dev, "running with reduced number of "
3557                             "rx queues because of shortage of interrupts.  "
3558                             "nrxq=%u, nofldrxq=%u.  "
3559                             "itype %d, navail %u, nirq %d.\n", iaq->nrxq,
3560                             iaq->nofldrxq, itype, navail, iaq->nirq);
3561                         goto done;
3562                 }
3563         } while (old_nirq != iaq->nirq);
3564
3565         /* One interrupt for everything.  Ugh. */
3566         device_printf(sc->dev, "running with minimal number of queues.  "
3567             "itype %d, navail %u.\n", itype, navail);
3568         iaq->nirq = 1;
3569         iaq->nrxq = 1;
3570         iaq->ntxq = 1;
3571         if (iaq->nofldrxq > 0) {
3572                 iaq->nofldrxq = 1;
3573                 iaq->nofldtxq = 1;
3574         }
3575         iaq->nnmtxq = 0;
3576         iaq->nnmrxq = 0;
3577 done:
3578         MPASS(iaq->num_vis > 0);
3579         if (iaq->num_vis > 1) {
3580                 MPASS(iaq->nrxq_vi > 0);
3581                 MPASS(iaq->ntxq_vi > 0);
3582         }
3583         MPASS(iaq->nirq > 0);
3584         MPASS(iaq->nrxq > 0);
3585         MPASS(iaq->ntxq > 0);
3586         if (itype == INTR_MSI) {
3587                 MPASS(powerof2(iaq->nirq));
3588         }
3589 }
3590
3591 static int
3592 cfg_itype_and_nqueues(struct adapter *sc, struct intrs_and_queues *iaq)
3593 {
3594         int rc, itype, navail, nalloc;
3595
3596         for (itype = INTR_MSIX; itype; itype >>= 1) {
3597
3598                 if ((itype & t4_intr_types) == 0)
3599                         continue;       /* not allowed */
3600
3601                 if (itype == INTR_MSIX)
3602                         navail = pci_msix_count(sc->dev);
3603                 else if (itype == INTR_MSI)
3604                         navail = pci_msi_count(sc->dev);
3605                 else
3606                         navail = 1;
3607 restart:
3608                 if (navail == 0)
3609                         continue;
3610
3611                 calculate_iaq(sc, iaq, itype, navail);
3612                 nalloc = iaq->nirq;
3613                 rc = 0;
3614                 if (itype == INTR_MSIX)
3615                         rc = pci_alloc_msix(sc->dev, &nalloc);
3616                 else if (itype == INTR_MSI)
3617                         rc = pci_alloc_msi(sc->dev, &nalloc);
3618
3619                 if (rc == 0 && nalloc > 0) {
3620                         if (nalloc == iaq->nirq)
3621                                 return (0);
3622
3623                         /*
3624                          * Didn't get the number requested.  Use whatever number
3625                          * the kernel is willing to allocate.
3626                          */
3627                         device_printf(sc->dev, "fewer vectors than requested, "
3628                             "type=%d, req=%d, rcvd=%d; will downshift req.\n",
3629                             itype, iaq->nirq, nalloc);
3630                         pci_release_msi(sc->dev);
3631                         navail = nalloc;
3632                         goto restart;
3633                 }
3634
3635                 device_printf(sc->dev,
3636                     "failed to allocate vectors:%d, type=%d, req=%d, rcvd=%d\n",
3637                     itype, rc, iaq->nirq, nalloc);
3638         }
3639
3640         device_printf(sc->dev,
3641             "failed to find a usable interrupt type.  "
3642             "allowed=%d, msi-x=%d, msi=%d, intx=1", t4_intr_types,
3643             pci_msix_count(sc->dev), pci_msi_count(sc->dev));
3644
3645         return (ENXIO);
3646 }
3647
3648 #define FW_VERSION(chip) ( \
3649     V_FW_HDR_FW_VER_MAJOR(chip##FW_VERSION_MAJOR) | \
3650     V_FW_HDR_FW_VER_MINOR(chip##FW_VERSION_MINOR) | \
3651     V_FW_HDR_FW_VER_MICRO(chip##FW_VERSION_MICRO) | \
3652     V_FW_HDR_FW_VER_BUILD(chip##FW_VERSION_BUILD))
3653 #define FW_INTFVER(chip, intf) (chip##FW_HDR_INTFVER_##intf)
3654
3655 /* Just enough of fw_hdr to cover all version info. */
3656 struct fw_h {
3657         __u8    ver;
3658         __u8    chip;
3659         __be16  len512;
3660         __be32  fw_ver;
3661         __be32  tp_microcode_ver;
3662         __u8    intfver_nic;
3663         __u8    intfver_vnic;
3664         __u8    intfver_ofld;
3665         __u8    intfver_ri;
3666         __u8    intfver_iscsipdu;
3667         __u8    intfver_iscsi;
3668         __u8    intfver_fcoepdu;
3669         __u8    intfver_fcoe;
3670 };
3671 /* Spot check a couple of fields. */
3672 CTASSERT(offsetof(struct fw_h, fw_ver) == offsetof(struct fw_hdr, fw_ver));
3673 CTASSERT(offsetof(struct fw_h, intfver_nic) == offsetof(struct fw_hdr, intfver_nic));
3674 CTASSERT(offsetof(struct fw_h, intfver_fcoe) == offsetof(struct fw_hdr, intfver_fcoe));
3675
3676 struct fw_info {
3677         uint8_t chip;
3678         char *kld_name;
3679         char *fw_mod_name;
3680         struct fw_h fw_h;
3681 } fw_info[] = {
3682         {
3683                 .chip = CHELSIO_T4,
3684                 .kld_name = "t4fw_cfg",
3685                 .fw_mod_name = "t4fw",
3686                 .fw_h = {
3687                         .chip = FW_HDR_CHIP_T4,
3688                         .fw_ver = htobe32(FW_VERSION(T4)),
3689                         .intfver_nic = FW_INTFVER(T4, NIC),
3690                         .intfver_vnic = FW_INTFVER(T4, VNIC),
3691                         .intfver_ofld = FW_INTFVER(T4, OFLD),
3692                         .intfver_ri = FW_INTFVER(T4, RI),
3693                         .intfver_iscsipdu = FW_INTFVER(T4, ISCSIPDU),
3694                         .intfver_iscsi = FW_INTFVER(T4, ISCSI),
3695                         .intfver_fcoepdu = FW_INTFVER(T4, FCOEPDU),
3696                         .intfver_fcoe = FW_INTFVER(T4, FCOE),
3697                 },
3698         }, {
3699                 .chip = CHELSIO_T5,
3700                 .kld_name = "t5fw_cfg",
3701                 .fw_mod_name = "t5fw",
3702                 .fw_h = {
3703                         .chip = FW_HDR_CHIP_T5,
3704                         .fw_ver = htobe32(FW_VERSION(T5)),
3705                         .intfver_nic = FW_INTFVER(T5, NIC),
3706                         .intfver_vnic = FW_INTFVER(T5, VNIC),
3707                         .intfver_ofld = FW_INTFVER(T5, OFLD),
3708                         .intfver_ri = FW_INTFVER(T5, RI),
3709                         .intfver_iscsipdu = FW_INTFVER(T5, ISCSIPDU),
3710                         .intfver_iscsi = FW_INTFVER(T5, ISCSI),
3711                         .intfver_fcoepdu = FW_INTFVER(T5, FCOEPDU),
3712                         .intfver_fcoe = FW_INTFVER(T5, FCOE),
3713                 },
3714         }, {
3715                 .chip = CHELSIO_T6,
3716                 .kld_name = "t6fw_cfg",
3717                 .fw_mod_name = "t6fw",
3718                 .fw_h = {
3719                         .chip = FW_HDR_CHIP_T6,
3720                         .fw_ver = htobe32(FW_VERSION(T6)),
3721                         .intfver_nic = FW_INTFVER(T6, NIC),
3722                         .intfver_vnic = FW_INTFVER(T6, VNIC),
3723                         .intfver_ofld = FW_INTFVER(T6, OFLD),
3724                         .intfver_ri = FW_INTFVER(T6, RI),
3725                         .intfver_iscsipdu = FW_INTFVER(T6, ISCSIPDU),
3726                         .intfver_iscsi = FW_INTFVER(T6, ISCSI),
3727                         .intfver_fcoepdu = FW_INTFVER(T6, FCOEPDU),
3728                         .intfver_fcoe = FW_INTFVER(T6, FCOE),
3729                 },
3730         }
3731 };
3732
3733 static struct fw_info *
3734 find_fw_info(int chip)
3735 {
3736         int i;
3737
3738         for (i = 0; i < nitems(fw_info); i++) {
3739                 if (fw_info[i].chip == chip)
3740                         return (&fw_info[i]);
3741         }
3742         return (NULL);
3743 }
3744
3745 /*
3746  * Is the given firmware API compatible with the one the driver was compiled
3747  * with?
3748  */
3749 static int
3750 fw_compatible(const struct fw_h *hdr1, const struct fw_h *hdr2)
3751 {
3752
3753         /* short circuit if it's the exact same firmware version */
3754         if (hdr1->chip == hdr2->chip && hdr1->fw_ver == hdr2->fw_ver)
3755                 return (1);
3756
3757         /*
3758          * XXX: Is this too conservative?  Perhaps I should limit this to the
3759          * features that are supported in the driver.
3760          */
3761 #define SAME_INTF(x) (hdr1->intfver_##x == hdr2->intfver_##x)
3762         if (hdr1->chip == hdr2->chip && SAME_INTF(nic) && SAME_INTF(vnic) &&
3763             SAME_INTF(ofld) && SAME_INTF(ri) && SAME_INTF(iscsipdu) &&
3764             SAME_INTF(iscsi) && SAME_INTF(fcoepdu) && SAME_INTF(fcoe))
3765                 return (1);
3766 #undef SAME_INTF
3767
3768         return (0);
3769 }
3770
3771 static int
3772 load_fw_module(struct adapter *sc, const struct firmware **dcfg,
3773     const struct firmware **fw)
3774 {
3775         struct fw_info *fw_info;
3776
3777         *dcfg = NULL;
3778         if (fw != NULL)
3779                 *fw = NULL;
3780
3781         fw_info = find_fw_info(chip_id(sc));
3782         if (fw_info == NULL) {
3783                 device_printf(sc->dev,
3784                     "unable to look up firmware information for chip %d.\n",
3785                     chip_id(sc));
3786                 return (EINVAL);
3787         }
3788
3789         *dcfg = firmware_get(fw_info->kld_name);
3790         if (*dcfg != NULL) {
3791                 if (fw != NULL)
3792                         *fw = firmware_get(fw_info->fw_mod_name);
3793                 return (0);
3794         }
3795
3796         return (ENOENT);
3797 }
3798
3799 static void
3800 unload_fw_module(struct adapter *sc, const struct firmware *dcfg,
3801     const struct firmware *fw)
3802 {
3803
3804         if (fw != NULL)
3805                 firmware_put(fw, FIRMWARE_UNLOAD);
3806         if (dcfg != NULL)
3807                 firmware_put(dcfg, FIRMWARE_UNLOAD);
3808 }
3809
3810 /*
3811  * Return values:
3812  * 0 means no firmware install attempted.
3813  * ERESTART means a firmware install was attempted and was successful.
3814  * +ve errno means a firmware install was attempted but failed.
3815  */
3816 static int
3817 install_kld_firmware(struct adapter *sc, struct fw_h *card_fw,
3818     const struct fw_h *drv_fw, const char *reason, int *already)
3819 {
3820         const struct firmware *cfg, *fw;
3821         const uint32_t c = be32toh(card_fw->fw_ver);
3822         uint32_t d, k;
3823         int rc, fw_install;
3824         struct fw_h bundled_fw;
3825         bool load_attempted;
3826
3827         cfg = fw = NULL;
3828         load_attempted = false;
3829         fw_install = t4_fw_install < 0 ? -t4_fw_install : t4_fw_install;
3830
3831         memcpy(&bundled_fw, drv_fw, sizeof(bundled_fw));
3832         if (t4_fw_install < 0) {
3833                 rc = load_fw_module(sc, &cfg, &fw);
3834                 if (rc != 0 || fw == NULL) {
3835                         device_printf(sc->dev,
3836                             "failed to load firmware module: %d. cfg %p, fw %p;"
3837                             " will use compiled-in firmware version for"
3838                             "hw.cxgbe.fw_install checks.\n",
3839                             rc, cfg, fw);
3840                 } else {
3841                         memcpy(&bundled_fw, fw->data, sizeof(bundled_fw));
3842                 }
3843                 load_attempted = true;
3844         }
3845         d = be32toh(bundled_fw.fw_ver);
3846
3847         if (reason != NULL)
3848                 goto install;
3849
3850         if ((sc->flags & FW_OK) == 0) {
3851
3852                 if (c == 0xffffffff) {
3853                         reason = "missing";
3854                         goto install;
3855                 }
3856
3857                 rc = 0;
3858                 goto done;
3859         }
3860
3861         if (!fw_compatible(card_fw, &bundled_fw)) {
3862                 reason = "incompatible or unusable";
3863                 goto install;
3864         }
3865
3866         if (d > c) {
3867                 reason = "older than the version bundled with this driver";
3868                 goto install;
3869         }
3870
3871         if (fw_install == 2 && d != c) {
3872                 reason = "different than the version bundled with this driver";
3873                 goto install;
3874         }
3875
3876         /* No reason to do anything to the firmware already on the card. */
3877         rc = 0;
3878         goto done;
3879
3880 install:
3881         rc = 0;
3882         if ((*already)++)
3883                 goto done;
3884
3885         if (fw_install == 0) {
3886                 device_printf(sc->dev, "firmware on card (%u.%u.%u.%u) is %s, "
3887                     "but the driver is prohibited from installing a firmware "
3888                     "on the card.\n",
3889                     G_FW_HDR_FW_VER_MAJOR(c), G_FW_HDR_FW_VER_MINOR(c),
3890                     G_FW_HDR_FW_VER_MICRO(c), G_FW_HDR_FW_VER_BUILD(c), reason);
3891
3892                 goto done;
3893         }
3894
3895         /*
3896          * We'll attempt to install a firmware.  Load the module first (if it
3897          * hasn't been loaded already).
3898          */
3899         if (!load_attempted) {
3900                 rc = load_fw_module(sc, &cfg, &fw);
3901                 if (rc != 0 || fw == NULL) {
3902                         device_printf(sc->dev,
3903                             "failed to load firmware module: %d. cfg %p, fw %p\n",
3904                             rc, cfg, fw);
3905                         /* carry on */
3906                 }
3907         }
3908         if (fw == NULL) {
3909                 device_printf(sc->dev, "firmware on card (%u.%u.%u.%u) is %s, "
3910                     "but the driver cannot take corrective action because it "
3911                     "is unable to load the firmware module.\n",
3912                     G_FW_HDR_FW_VER_MAJOR(c), G_FW_HDR_FW_VER_MINOR(c),
3913                     G_FW_HDR_FW_VER_MICRO(c), G_FW_HDR_FW_VER_BUILD(c), reason);
3914                 rc = sc->flags & FW_OK ? 0 : ENOENT;
3915                 goto done;
3916         }
3917         k = be32toh(((const struct fw_hdr *)fw->data)->fw_ver);
3918         if (k != d) {
3919                 MPASS(t4_fw_install > 0);
3920                 device_printf(sc->dev,
3921                     "firmware in KLD (%u.%u.%u.%u) is not what the driver was "
3922                     "expecting (%u.%u.%u.%u) and will not be used.\n",
3923                     G_FW_HDR_FW_VER_MAJOR(k), G_FW_HDR_FW_VER_MINOR(k),
3924                     G_FW_HDR_FW_VER_MICRO(k), G_FW_HDR_FW_VER_BUILD(k),
3925                     G_FW_HDR_FW_VER_MAJOR(d), G_FW_HDR_FW_VER_MINOR(d),
3926                     G_FW_HDR_FW_VER_MICRO(d), G_FW_HDR_FW_VER_BUILD(d));
3927                 rc = sc->flags & FW_OK ? 0 : EINVAL;
3928                 goto done;
3929         }
3930
3931         device_printf(sc->dev, "firmware on card (%u.%u.%u.%u) is %s, "
3932             "installing firmware %u.%u.%u.%u on card.\n",
3933             G_FW_HDR_FW_VER_MAJOR(c), G_FW_HDR_FW_VER_MINOR(c),
3934             G_FW_HDR_FW_VER_MICRO(c), G_FW_HDR_FW_VER_BUILD(c), reason,
3935             G_FW_HDR_FW_VER_MAJOR(d), G_FW_HDR_FW_VER_MINOR(d),
3936             G_FW_HDR_FW_VER_MICRO(d), G_FW_HDR_FW_VER_BUILD(d));
3937
3938         rc = -t4_fw_upgrade(sc, sc->mbox, fw->data, fw->datasize, 0);
3939         if (rc != 0) {
3940                 device_printf(sc->dev, "failed to install firmware: %d\n", rc);
3941         } else {
3942                 /* Installed successfully, update the cached header too. */
3943                 rc = ERESTART;
3944                 memcpy(card_fw, fw->data, sizeof(*card_fw));
3945         }
3946 done:
3947         unload_fw_module(sc, cfg, fw);
3948
3949         return (rc);
3950 }
3951
3952 /*
3953  * Establish contact with the firmware and attempt to become the master driver.
3954  *
3955  * A firmware will be installed to the card if needed (if the driver is allowed
3956  * to do so).
3957  */
3958 static int
3959 contact_firmware(struct adapter *sc)
3960 {
3961         int rc, already = 0;
3962         enum dev_state state;
3963         struct fw_info *fw_info;
3964         struct fw_hdr *card_fw;         /* fw on the card */
3965         const struct fw_h *drv_fw;
3966
3967         fw_info = find_fw_info(chip_id(sc));
3968         if (fw_info == NULL) {
3969                 device_printf(sc->dev,
3970                     "unable to look up firmware information for chip %d.\n",
3971                     chip_id(sc));
3972                 return (EINVAL);
3973         }
3974         drv_fw = &fw_info->fw_h;
3975
3976         /* Read the header of the firmware on the card */
3977         card_fw = malloc(sizeof(*card_fw), M_CXGBE, M_ZERO | M_WAITOK);
3978 restart:
3979         rc = -t4_get_fw_hdr(sc, card_fw);
3980         if (rc != 0) {
3981                 device_printf(sc->dev,
3982                     "unable to read firmware header from card's flash: %d\n",
3983                     rc);
3984                 goto done;
3985         }
3986
3987         rc = install_kld_firmware(sc, (struct fw_h *)card_fw, drv_fw, NULL,
3988             &already);
3989         if (rc == ERESTART)
3990                 goto restart;
3991         if (rc != 0)
3992                 goto done;
3993
3994         rc = t4_fw_hello(sc, sc->mbox, sc->mbox, MASTER_MAY, &state);
3995         if (rc < 0 || state == DEV_STATE_ERR) {
3996                 rc = -rc;
3997                 device_printf(sc->dev,
3998                     "failed to connect to the firmware: %d, %d.  "
3999                     "PCIE_FW 0x%08x\n", rc, state, t4_read_reg(sc, A_PCIE_FW));
4000 #if 0
4001                 if (install_kld_firmware(sc, (struct fw_h *)card_fw, drv_fw,
4002                     "not responding properly to HELLO", &already) == ERESTART)
4003                         goto restart;
4004 #endif
4005                 goto done;
4006         }
4007         MPASS(be32toh(card_fw->flags) & FW_HDR_FLAGS_RESET_HALT);
4008         sc->flags |= FW_OK;     /* The firmware responded to the FW_HELLO. */
4009
4010         if (rc == sc->pf) {
4011                 sc->flags |= MASTER_PF;
4012                 rc = install_kld_firmware(sc, (struct fw_h *)card_fw, drv_fw,
4013                     NULL, &already);
4014                 if (rc == ERESTART)
4015                         rc = 0;
4016                 else if (rc != 0)
4017                         goto done;
4018         } else if (state == DEV_STATE_UNINIT) {
4019                 /*
4020                  * We didn't get to be the master so we definitely won't be
4021                  * configuring the chip.  It's a bug if someone else hasn't
4022                  * configured it already.
4023                  */
4024                 device_printf(sc->dev, "couldn't be master(%d), "
4025                     "device not already initialized either(%d).  "
4026                     "PCIE_FW 0x%08x\n", rc, state, t4_read_reg(sc, A_PCIE_FW));
4027                 rc = EPROTO;
4028                 goto done;
4029         } else {
4030                 /*
4031                  * Some other PF is the master and has configured the chip.
4032                  * This is allowed but untested.
4033                  */
4034                 device_printf(sc->dev, "PF%d is master, device state %d.  "
4035                     "PCIE_FW 0x%08x\n", rc, state, t4_read_reg(sc, A_PCIE_FW));
4036                 snprintf(sc->cfg_file, sizeof(sc->cfg_file), "pf%d", rc);
4037                 sc->cfcsum = 0;
4038                 rc = 0;
4039         }
4040 done:
4041         if (rc != 0 && sc->flags & FW_OK) {
4042                 t4_fw_bye(sc, sc->mbox);
4043                 sc->flags &= ~FW_OK;
4044         }
4045         free(card_fw, M_CXGBE);
4046         return (rc);
4047 }
4048
4049 static int
4050 copy_cfg_file_to_card(struct adapter *sc, char *cfg_file,
4051     uint32_t mtype, uint32_t moff)
4052 {
4053         struct fw_info *fw_info;
4054         const struct firmware *dcfg, *rcfg = NULL;
4055         const uint32_t *cfdata;
4056         uint32_t cflen, addr;
4057         int rc;
4058
4059         load_fw_module(sc, &dcfg, NULL);
4060
4061         /* Card specific interpretation of "default". */
4062         if (strncmp(cfg_file, DEFAULT_CF, sizeof(t4_cfg_file)) == 0) {
4063                 if (pci_get_device(sc->dev) == 0x440a)
4064                         snprintf(cfg_file, sizeof(t4_cfg_file), UWIRE_CF);
4065                 if (is_fpga(sc))
4066                         snprintf(cfg_file, sizeof(t4_cfg_file), FPGA_CF);
4067         }
4068
4069         if (strncmp(cfg_file, DEFAULT_CF, sizeof(t4_cfg_file)) == 0) {
4070                 if (dcfg == NULL) {
4071                         device_printf(sc->dev,
4072                             "KLD with default config is not available.\n");
4073                         rc = ENOENT;
4074                         goto done;
4075                 }
4076                 cfdata = dcfg->data;
4077                 cflen = dcfg->datasize & ~3;
4078         } else {
4079                 char s[32];
4080
4081                 fw_info = find_fw_info(chip_id(sc));
4082                 if (fw_info == NULL) {
4083                         device_printf(sc->dev,
4084                             "unable to look up firmware information for chip %d.\n",
4085                             chip_id(sc));
4086                         rc = EINVAL;
4087                         goto done;
4088                 }
4089                 snprintf(s, sizeof(s), "%s_%s", fw_info->kld_name, cfg_file);
4090
4091                 rcfg = firmware_get(s);
4092                 if (rcfg == NULL) {
4093                         device_printf(sc->dev,
4094                             "unable to load module \"%s\" for configuration "
4095                             "profile \"%s\".\n", s, cfg_file);
4096                         rc = ENOENT;
4097                         goto done;
4098                 }
4099                 cfdata = rcfg->data;
4100                 cflen = rcfg->datasize & ~3;
4101         }
4102
4103         if (cflen > FLASH_CFG_MAX_SIZE) {
4104                 device_printf(sc->dev,
4105                     "config file too long (%d, max allowed is %d).\n",
4106                     cflen, FLASH_CFG_MAX_SIZE);
4107                 rc = EINVAL;
4108                 goto done;
4109         }
4110
4111         rc = validate_mt_off_len(sc, mtype, moff, cflen, &addr);
4112         if (rc != 0) {
4113                 device_printf(sc->dev,
4114                     "%s: addr (%d/0x%x) or len %d is not valid: %d.\n",
4115                     __func__, mtype, moff, cflen, rc);
4116                 rc = EINVAL;
4117                 goto done;
4118         }
4119         write_via_memwin(sc, 2, addr, cfdata, cflen);
4120 done:
4121         if (rcfg != NULL)
4122                 firmware_put(rcfg, FIRMWARE_UNLOAD);
4123         unload_fw_module(sc, dcfg, NULL);
4124         return (rc);
4125 }
4126
4127 struct caps_allowed {
4128         uint16_t nbmcaps;
4129         uint16_t linkcaps;
4130         uint16_t switchcaps;
4131         uint16_t niccaps;
4132         uint16_t toecaps;
4133         uint16_t rdmacaps;
4134         uint16_t cryptocaps;
4135         uint16_t iscsicaps;
4136         uint16_t fcoecaps;
4137 };
4138
4139 #define FW_PARAM_DEV(param) \
4140         (V_FW_PARAMS_MNEM(FW_PARAMS_MNEM_DEV) | \
4141          V_FW_PARAMS_PARAM_X(FW_PARAMS_PARAM_DEV_##param))
4142 #define FW_PARAM_PFVF(param) \
4143         (V_FW_PARAMS_MNEM(FW_PARAMS_MNEM_PFVF) | \
4144          V_FW_PARAMS_PARAM_X(FW_PARAMS_PARAM_PFVF_##param))
4145
4146 /*
4147  * Provide a configuration profile to the firmware and have it initialize the
4148  * chip accordingly.  This may involve uploading a configuration file to the
4149  * card.
4150  */
4151 static int
4152 apply_cfg_and_initialize(struct adapter *sc, char *cfg_file,
4153     const struct caps_allowed *caps_allowed)
4154 {
4155         int rc;
4156         struct fw_caps_config_cmd caps;
4157         uint32_t mtype, moff, finicsum, cfcsum, param, val;
4158
4159         rc = -t4_fw_reset(sc, sc->mbox, F_PIORSTMODE | F_PIORST);
4160         if (rc != 0) {
4161                 device_printf(sc->dev, "firmware reset failed: %d.\n", rc);
4162                 return (rc);
4163         }
4164
4165         bzero(&caps, sizeof(caps));
4166         caps.op_to_write = htobe32(V_FW_CMD_OP(FW_CAPS_CONFIG_CMD) |
4167             F_FW_CMD_REQUEST | F_FW_CMD_READ);
4168         if (strncmp(cfg_file, BUILTIN_CF, sizeof(t4_cfg_file)) == 0) {
4169                 mtype = 0;
4170                 moff = 0;
4171                 caps.cfvalid_to_len16 = htobe32(FW_LEN16(caps));
4172         } else if (strncmp(cfg_file, FLASH_CF, sizeof(t4_cfg_file)) == 0) {
4173                 mtype = FW_MEMTYPE_FLASH;
4174                 moff = t4_flash_cfg_addr(sc);
4175                 caps.cfvalid_to_len16 = htobe32(F_FW_CAPS_CONFIG_CMD_CFVALID |
4176                     V_FW_CAPS_CONFIG_CMD_MEMTYPE_CF(mtype) |
4177                     V_FW_CAPS_CONFIG_CMD_MEMADDR64K_CF(moff >> 16) |
4178                     FW_LEN16(caps));
4179         } else {
4180                 /*
4181                  * Ask the firmware where it wants us to upload the config file.
4182                  */
4183                 param = FW_PARAM_DEV(CF);
4184                 rc = -t4_query_params(sc, sc->mbox, sc->pf, 0, 1, &param, &val);
4185                 if (rc != 0) {
4186                         /* No support for config file?  Shouldn't happen. */
4187                         device_printf(sc->dev,
4188                             "failed to query config file location: %d.\n", rc);
4189                         goto done;
4190                 }
4191                 mtype = G_FW_PARAMS_PARAM_Y(val);
4192                 moff = G_FW_PARAMS_PARAM_Z(val) << 16;
4193                 caps.cfvalid_to_len16 = htobe32(F_FW_CAPS_CONFIG_CMD_CFVALID |
4194                     V_FW_CAPS_CONFIG_CMD_MEMTYPE_CF(mtype) |
4195                     V_FW_CAPS_CONFIG_CMD_MEMADDR64K_CF(moff >> 16) |
4196                     FW_LEN16(caps));
4197
4198                 rc = copy_cfg_file_to_card(sc, cfg_file, mtype, moff);
4199                 if (rc != 0) {
4200                         device_printf(sc->dev,
4201                             "failed to upload config file to card: %d.\n", rc);
4202                         goto done;
4203                 }
4204         }
4205         rc = -t4_wr_mbox(sc, sc->mbox, &caps, sizeof(caps), &caps);
4206         if (rc != 0) {
4207                 device_printf(sc->dev, "failed to pre-process config file: %d "
4208                     "(mtype %d, moff 0x%x).\n", rc, mtype, moff);
4209                 goto done;
4210         }
4211
4212         finicsum = be32toh(caps.finicsum);
4213         cfcsum = be32toh(caps.cfcsum);  /* actual */
4214         if (finicsum != cfcsum) {
4215                 device_printf(sc->dev,
4216                     "WARNING: config file checksum mismatch: %08x %08x\n",
4217                     finicsum, cfcsum);
4218         }
4219         sc->cfcsum = cfcsum;
4220         snprintf(sc->cfg_file, sizeof(sc->cfg_file), "%s", cfg_file);
4221
4222         /*
4223          * Let the firmware know what features will (not) be used so it can tune
4224          * things accordingly.
4225          */
4226 #define LIMIT_CAPS(x) do { \
4227         caps.x##caps &= htobe16(caps_allowed->x##caps); \
4228 } while (0)
4229         LIMIT_CAPS(nbm);
4230         LIMIT_CAPS(link);
4231         LIMIT_CAPS(switch);
4232         LIMIT_CAPS(nic);
4233         LIMIT_CAPS(toe);
4234         LIMIT_CAPS(rdma);
4235         LIMIT_CAPS(crypto);
4236         LIMIT_CAPS(iscsi);
4237         LIMIT_CAPS(fcoe);
4238 #undef LIMIT_CAPS
4239         if (caps.niccaps & htobe16(FW_CAPS_CONFIG_NIC_HASHFILTER)) {
4240                 /*
4241                  * TOE and hashfilters are mutually exclusive.  It is a config
4242                  * file or firmware bug if both are reported as available.  Try
4243                  * to cope with the situation in non-debug builds by disabling
4244                  * TOE.
4245                  */
4246                 MPASS(caps.toecaps == 0);
4247
4248                 caps.toecaps = 0;
4249                 caps.rdmacaps = 0;
4250                 caps.iscsicaps = 0;
4251         }
4252
4253         caps.op_to_write = htobe32(V_FW_CMD_OP(FW_CAPS_CONFIG_CMD) |
4254             F_FW_CMD_REQUEST | F_FW_CMD_WRITE);
4255         caps.cfvalid_to_len16 = htobe32(FW_LEN16(caps));
4256         rc = -t4_wr_mbox(sc, sc->mbox, &caps, sizeof(caps), NULL);
4257         if (rc != 0) {
4258                 device_printf(sc->dev,
4259                     "failed to process config file: %d.\n", rc);
4260                 goto done;
4261         }
4262
4263         t4_tweak_chip_settings(sc);
4264         set_params__pre_init(sc);
4265
4266         /* get basic stuff going */
4267         rc = -t4_fw_initialize(sc, sc->mbox);
4268         if (rc != 0) {
4269                 device_printf(sc->dev, "fw_initialize failed: %d.\n", rc);
4270                 goto done;
4271         }
4272 done:
4273         return (rc);
4274 }
4275
4276 /*
4277  * Partition chip resources for use between various PFs, VFs, etc.
4278  */
4279 static int
4280 partition_resources(struct adapter *sc)
4281 {
4282         char cfg_file[sizeof(t4_cfg_file)];
4283         struct caps_allowed caps_allowed;
4284         int rc;
4285         bool fallback;
4286
4287         /* Only the master driver gets to configure the chip resources. */
4288         MPASS(sc->flags & MASTER_PF);
4289
4290 #define COPY_CAPS(x) do { \
4291         caps_allowed.x##caps = t4_##x##caps_allowed; \
4292 } while (0)
4293         bzero(&caps_allowed, sizeof(caps_allowed));
4294         COPY_CAPS(nbm);
4295         COPY_CAPS(link);
4296         COPY_CAPS(switch);
4297         COPY_CAPS(nic);
4298         COPY_CAPS(toe);
4299         COPY_CAPS(rdma);
4300         COPY_CAPS(crypto);
4301         COPY_CAPS(iscsi);
4302         COPY_CAPS(fcoe);
4303         fallback = sc->debug_flags & DF_DISABLE_CFG_RETRY ? false : true;
4304         snprintf(cfg_file, sizeof(cfg_file), "%s", t4_cfg_file);
4305 retry:
4306         rc = apply_cfg_and_initialize(sc, cfg_file, &caps_allowed);
4307         if (rc != 0 && fallback) {
4308                 device_printf(sc->dev,
4309                     "failed (%d) to configure card with \"%s\" profile, "
4310                     "will fall back to a basic configuration and retry.\n",
4311                     rc, cfg_file);
4312                 snprintf(cfg_file, sizeof(cfg_file), "%s", BUILTIN_CF);
4313                 bzero(&caps_allowed, sizeof(caps_allowed));
4314                 COPY_CAPS(switch);
4315                 caps_allowed.niccaps = FW_CAPS_CONFIG_NIC;
4316                 fallback = false;
4317                 goto retry;
4318         }
4319 #undef COPY_CAPS
4320         return (rc);
4321 }
4322
4323 /*
4324  * Retrieve parameters that are needed (or nice to have) very early.
4325  */
4326 static int
4327 get_params__pre_init(struct adapter *sc)
4328 {
4329         int rc;
4330         uint32_t param[2], val[2];
4331
4332         t4_get_version_info(sc);
4333
4334         snprintf(sc->fw_version, sizeof(sc->fw_version), "%u.%u.%u.%u",
4335             G_FW_HDR_FW_VER_MAJOR(sc->params.fw_vers),
4336             G_FW_HDR_FW_VER_MINOR(sc->params.fw_vers),
4337             G_FW_HDR_FW_VER_MICRO(sc->params.fw_vers),
4338             G_FW_HDR_FW_VER_BUILD(sc->params.fw_vers));
4339
4340         snprintf(sc->bs_version, sizeof(sc->bs_version), "%u.%u.%u.%u",
4341             G_FW_HDR_FW_VER_MAJOR(sc->params.bs_vers),
4342             G_FW_HDR_FW_VER_MINOR(sc->params.bs_vers),
4343             G_FW_HDR_FW_VER_MICRO(sc->params.bs_vers),
4344             G_FW_HDR_FW_VER_BUILD(sc->params.bs_vers));
4345
4346         snprintf(sc->tp_version, sizeof(sc->tp_version), "%u.%u.%u.%u",
4347             G_FW_HDR_FW_VER_MAJOR(sc->params.tp_vers),
4348             G_FW_HDR_FW_VER_MINOR(sc->params.tp_vers),
4349             G_FW_HDR_FW_VER_MICRO(sc->params.tp_vers),
4350             G_FW_HDR_FW_VER_BUILD(sc->params.tp_vers));
4351
4352         snprintf(sc->er_version, sizeof(sc->er_version), "%u.%u.%u.%u",
4353             G_FW_HDR_FW_VER_MAJOR(sc->params.er_vers),
4354             G_FW_HDR_FW_VER_MINOR(sc->params.er_vers),
4355             G_FW_HDR_FW_VER_MICRO(sc->params.er_vers),
4356             G_FW_HDR_FW_VER_BUILD(sc->params.er_vers));
4357
4358         param[0] = FW_PARAM_DEV(PORTVEC);
4359         param[1] = FW_PARAM_DEV(CCLK);
4360         rc = -t4_query_params(sc, sc->mbox, sc->pf, 0, 2, param, val);
4361         if (rc != 0) {
4362                 device_printf(sc->dev,
4363                     "failed to query parameters (pre_init): %d.\n", rc);
4364                 return (rc);
4365         }
4366
4367         sc->params.portvec = val[0];
4368         sc->params.nports = bitcount32(val[0]);
4369         sc->params.vpd.cclk = val[1];
4370
4371         /* Read device log parameters. */
4372         rc = -t4_init_devlog_params(sc, 1);
4373         if (rc == 0)
4374                 fixup_devlog_params(sc);
4375         else {
4376                 device_printf(sc->dev,
4377                     "failed to get devlog parameters: %d.\n", rc);
4378                 rc = 0; /* devlog isn't critical for device operation */
4379         }
4380
4381         return (rc);
4382 }
4383
4384 /*
4385  * Any params that need to be set before FW_INITIALIZE.
4386  */
4387 static int
4388 set_params__pre_init(struct adapter *sc)
4389 {
4390         int rc = 0;
4391         uint32_t param, val;
4392
4393         if (chip_id(sc) >= CHELSIO_T6) {
4394                 param = FW_PARAM_DEV(HPFILTER_REGION_SUPPORT);
4395                 val = 1;
4396                 rc = -t4_set_params(sc, sc->mbox, sc->pf, 0, 1, &param, &val);
4397                 /* firmwares < 1.20.1.0 do not have this param. */
4398                 if (rc == FW_EINVAL &&
4399                     sc->params.fw_vers < FW_VERSION32(1, 20, 1, 0)) {
4400                         rc = 0;
4401                 }
4402                 if (rc != 0) {
4403                         device_printf(sc->dev,
4404                             "failed to enable high priority filters :%d.\n",
4405                             rc);
4406                 }
4407         }
4408
4409         /* Enable opaque VIIDs with firmwares that support it. */
4410         param = FW_PARAM_DEV(OPAQUE_VIID_SMT_EXTN);
4411         val = 1;
4412         rc = -t4_set_params(sc, sc->mbox, sc->pf, 0, 1, &param, &val);
4413         if (rc == 0 && val == 1)
4414                 sc->params.viid_smt_extn_support = true;
4415         else
4416                 sc->params.viid_smt_extn_support = false;
4417
4418         return (rc);
4419 }
4420
4421 /*
4422  * Retrieve various parameters that are of interest to the driver.  The device
4423  * has been initialized by the firmware at this point.
4424  */
4425 static int
4426 get_params__post_init(struct adapter *sc)
4427 {
4428         int rc;
4429         uint32_t param[7], val[7];
4430         struct fw_caps_config_cmd caps;
4431
4432         param[0] = FW_PARAM_PFVF(IQFLINT_START);
4433         param[1] = FW_PARAM_PFVF(EQ_START);
4434         param[2] = FW_PARAM_PFVF(FILTER_START);
4435         param[3] = FW_PARAM_PFVF(FILTER_END);
4436         param[4] = FW_PARAM_PFVF(L2T_START);
4437         param[5] = FW_PARAM_PFVF(L2T_END);
4438         param[6] = V_FW_PARAMS_MNEM(FW_PARAMS_MNEM_DEV) |
4439             V_FW_PARAMS_PARAM_X(FW_PARAMS_PARAM_DEV_DIAG) |
4440             V_FW_PARAMS_PARAM_Y(FW_PARAM_DEV_DIAG_VDD);
4441         rc = -t4_query_params(sc, sc->mbox, sc->pf, 0, 7, param, val);
4442         if (rc != 0) {
4443                 device_printf(sc->dev,
4444                     "failed to query parameters (post_init): %d.\n", rc);
4445                 return (rc);
4446         }
4447
4448         sc->sge.iq_start = val[0];
4449         sc->sge.eq_start = val[1];
4450         if ((int)val[3] > (int)val[2]) {
4451                 sc->tids.ftid_base = val[2];
4452                 sc->tids.ftid_end = val[3];
4453                 sc->tids.nftids = val[3] - val[2] + 1;
4454         }
4455         sc->vres.l2t.start = val[4];
4456         sc->vres.l2t.size = val[5] - val[4] + 1;
4457         KASSERT(sc->vres.l2t.size <= L2T_SIZE,
4458             ("%s: L2 table size (%u) larger than expected (%u)",
4459             __func__, sc->vres.l2t.size, L2T_SIZE));
4460         sc->params.core_vdd = val[6];
4461
4462         param[0] = FW_PARAM_PFVF(IQFLINT_END);
4463         param[1] = FW_PARAM_PFVF(EQ_END);
4464         rc = -t4_query_params(sc, sc->mbox, sc->pf, 0, 2, param, val);
4465         if (rc != 0) {
4466                 device_printf(sc->dev,
4467                     "failed to query parameters (post_init2): %d.\n", rc);
4468                 return (rc);
4469         }
4470         MPASS((int)val[0] >= sc->sge.iq_start);
4471         sc->sge.iqmap_sz = val[0] - sc->sge.iq_start + 1;
4472         MPASS((int)val[1] >= sc->sge.eq_start);
4473         sc->sge.eqmap_sz = val[1] - sc->sge.eq_start + 1;
4474
4475         if (chip_id(sc) >= CHELSIO_T6) {
4476
4477                 sc->tids.tid_base = t4_read_reg(sc,
4478                     A_LE_DB_ACTIVE_TABLE_START_INDEX);
4479
4480                 param[0] = FW_PARAM_PFVF(HPFILTER_START);
4481                 param[1] = FW_PARAM_PFVF(HPFILTER_END);
4482                 rc = -t4_query_params(sc, sc->mbox, sc->pf, 0, 2, param, val);
4483                 if (rc != 0) {
4484                         device_printf(sc->dev,
4485                            "failed to query hpfilter parameters: %d.\n", rc);
4486                         return (rc);
4487                 }
4488                 if ((int)val[1] > (int)val[0]) {
4489                         sc->tids.hpftid_base = val[0];
4490                         sc->tids.hpftid_end = val[1];
4491                         sc->tids.nhpftids = val[1] - val[0] + 1;
4492
4493                         /*
4494                          * These should go off if the layout changes and the
4495                          * driver needs to catch up.
4496                          */
4497                         MPASS(sc->tids.hpftid_base == 0);
4498                         MPASS(sc->tids.tid_base == sc->tids.nhpftids);
4499                 }
4500
4501                 param[0] = FW_PARAM_PFVF(RAWF_START);
4502                 param[1] = FW_PARAM_PFVF(RAWF_END);
4503                 rc = -t4_query_params(sc, sc->mbox, sc->pf, 0, 2, param, val);
4504                 if (rc != 0) {
4505                         device_printf(sc->dev,
4506                            "failed to query rawf parameters: %d.\n", rc);
4507                         return (rc);
4508                 }
4509                 if ((int)val[1] > (int)val[0]) {
4510                         sc->rawf_base = val[0];
4511                         sc->nrawf = val[1] - val[0] + 1;
4512                 }
4513         }
4514
4515         /*
4516          * MPSBGMAP is queried separately because only recent firmwares support
4517          * it as a parameter and we don't want the compound query above to fail
4518          * on older firmwares.
4519          */
4520         param[0] = FW_PARAM_DEV(MPSBGMAP);
4521         val[0] = 0;
4522         rc = -t4_query_params(sc, sc->mbox, sc->pf, 0, 1, param, val);
4523         if (rc == 0)
4524                 sc->params.mps_bg_map = val[0];
4525         else
4526                 sc->params.mps_bg_map = 0;
4527
4528         /*
4529          * Determine whether the firmware supports the filter2 work request.
4530          * This is queried separately for the same reason as MPSBGMAP above.
4531          */
4532         param[0] = FW_PARAM_DEV(FILTER2_WR);
4533         val[0] = 0;
4534         rc = -t4_query_params(sc, sc->mbox, sc->pf, 0, 1, param, val);
4535         if (rc == 0)
4536                 sc->params.filter2_wr_support = val[0] != 0;
4537         else
4538                 sc->params.filter2_wr_support = 0;
4539
4540         /*
4541          * Find out whether we're allowed to use the ULPTX MEMWRITE DSGL.
4542          * This is queried separately for the same reason as other params above.
4543          */
4544         param[0] = FW_PARAM_DEV(ULPTX_MEMWRITE_DSGL);
4545         val[0] = 0;
4546         rc = -t4_query_params(sc, sc->mbox, sc->pf, 0, 1, param, val);
4547         if (rc == 0)
4548                 sc->params.ulptx_memwrite_dsgl = val[0] != 0;
4549         else
4550                 sc->params.ulptx_memwrite_dsgl = false;
4551
4552         /* FW_RI_FR_NSMR_TPTE_WR support */
4553         param[0] = FW_PARAM_DEV(RI_FR_NSMR_TPTE_WR);
4554         rc = -t4_query_params(sc, sc->mbox, sc->pf, 0, 1, param, val);
4555         if (rc == 0)
4556                 sc->params.fr_nsmr_tpte_wr_support = val[0] != 0;
4557         else
4558                 sc->params.fr_nsmr_tpte_wr_support = false;
4559
4560         param[0] = FW_PARAM_PFVF(MAX_PKTS_PER_ETH_TX_PKTS_WR);
4561         rc = -t4_query_params(sc, sc->mbox, sc->pf, 0, 1, param, val);
4562         if (rc == 0)
4563                 sc->params.max_pkts_per_eth_tx_pkts_wr = val[0];
4564         else
4565                 sc->params.max_pkts_per_eth_tx_pkts_wr = 15;
4566
4567         /* get capabilites */
4568         bzero(&caps, sizeof(caps));
4569         caps.op_to_write = htobe32(V_FW_CMD_OP(FW_CAPS_CONFIG_CMD) |
4570             F_FW_CMD_REQUEST | F_FW_CMD_READ);
4571         caps.cfvalid_to_len16 = htobe32(FW_LEN16(caps));
4572         rc = -t4_wr_mbox(sc, sc->mbox, &caps, sizeof(caps), &caps);
4573         if (rc != 0) {
4574                 device_printf(sc->dev,
4575                     "failed to get card capabilities: %d.\n", rc);
4576                 return (rc);
4577         }
4578
4579 #define READ_CAPS(x) do { \
4580         sc->x = htobe16(caps.x); \
4581 } while (0)
4582         READ_CAPS(nbmcaps);
4583         READ_CAPS(linkcaps);
4584         READ_CAPS(switchcaps);
4585         READ_CAPS(niccaps);
4586         READ_CAPS(toecaps);
4587         READ_CAPS(rdmacaps);
4588         READ_CAPS(cryptocaps);
4589         READ_CAPS(iscsicaps);
4590         READ_CAPS(fcoecaps);
4591
4592         if (sc->niccaps & FW_CAPS_CONFIG_NIC_HASHFILTER) {
4593                 MPASS(chip_id(sc) > CHELSIO_T4);
4594                 MPASS(sc->toecaps == 0);
4595                 sc->toecaps = 0;
4596
4597                 param[0] = FW_PARAM_DEV(NTID);
4598                 rc = -t4_query_params(sc, sc->mbox, sc->pf, 0, 1, param, val);
4599                 if (rc != 0) {
4600                         device_printf(sc->dev,
4601                             "failed to query HASHFILTER parameters: %d.\n", rc);
4602                         return (rc);
4603                 }
4604                 sc->tids.ntids = val[0];
4605                 if (sc->params.fw_vers < FW_VERSION32(1, 20, 5, 0)) {
4606                         MPASS(sc->tids.ntids >= sc->tids.nhpftids);
4607                         sc->tids.ntids -= sc->tids.nhpftids;
4608                 }
4609                 sc->tids.natids = min(sc->tids.ntids / 2, MAX_ATIDS);
4610                 sc->params.hash_filter = 1;
4611         }
4612         if (sc->niccaps & FW_CAPS_CONFIG_NIC_ETHOFLD) {
4613                 param[0] = FW_PARAM_PFVF(ETHOFLD_START);
4614                 param[1] = FW_PARAM_PFVF(ETHOFLD_END);
4615                 param[2] = FW_PARAM_DEV(FLOWC_BUFFIFO_SZ);
4616                 rc = -t4_query_params(sc, sc->mbox, sc->pf, 0, 3, param, val);
4617                 if (rc != 0) {
4618                         device_printf(sc->dev,
4619                             "failed to query NIC parameters: %d.\n", rc);
4620                         return (rc);
4621                 }
4622                 if ((int)val[1] > (int)val[0]) {
4623                         sc->tids.etid_base = val[0];
4624                         sc->tids.etid_end = val[1];
4625                         sc->tids.netids = val[1] - val[0] + 1;
4626                         sc->params.eo_wr_cred = val[2];
4627                         sc->params.ethoffload = 1;
4628                 }
4629         }
4630         if (sc->toecaps) {
4631                 /* query offload-related parameters */
4632                 param[0] = FW_PARAM_DEV(NTID);
4633                 param[1] = FW_PARAM_PFVF(SERVER_START);
4634                 param[2] = FW_PARAM_PFVF(SERVER_END);
4635                 param[3] = FW_PARAM_PFVF(TDDP_START);
4636                 param[4] = FW_PARAM_PFVF(TDDP_END);
4637                 param[5] = FW_PARAM_DEV(FLOWC_BUFFIFO_SZ);
4638                 rc = -t4_query_params(sc, sc->mbox, sc->pf, 0, 6, param, val);
4639                 if (rc != 0) {
4640                         device_printf(sc->dev,
4641                             "failed to query TOE parameters: %d.\n", rc);
4642                         return (rc);
4643                 }
4644                 sc->tids.ntids = val[0];
4645                 if (sc->params.fw_vers < FW_VERSION32(1, 20, 5, 0)) {
4646                         MPASS(sc->tids.ntids >= sc->tids.nhpftids);
4647                         sc->tids.ntids -= sc->tids.nhpftids;
4648                 }
4649                 sc->tids.natids = min(sc->tids.ntids / 2, MAX_ATIDS);
4650                 if ((int)val[2] > (int)val[1]) {
4651                         sc->tids.stid_base = val[1];
4652                         sc->tids.nstids = val[2] - val[1] + 1;
4653                 }
4654                 sc->vres.ddp.start = val[3];
4655                 sc->vres.ddp.size = val[4] - val[3] + 1;
4656                 sc->params.ofldq_wr_cred = val[5];
4657                 sc->params.offload = 1;
4658         } else {
4659                 /*
4660                  * The firmware attempts memfree TOE configuration for -SO cards
4661                  * and will report toecaps=0 if it runs out of resources (this
4662                  * depends on the config file).  It may not report 0 for other
4663                  * capabilities dependent on the TOE in this case.  Set them to
4664                  * 0 here so that the driver doesn't bother tracking resources
4665                  * that will never be used.
4666                  */
4667                 sc->iscsicaps = 0;
4668                 sc->rdmacaps = 0;
4669         }
4670         if (sc->rdmacaps) {
4671                 param[0] = FW_PARAM_PFVF(STAG_START);
4672                 param[1] = FW_PARAM_PFVF(STAG_END);
4673                 param[2] = FW_PARAM_PFVF(RQ_START);
4674                 param[3] = FW_PARAM_PFVF(RQ_END);
4675                 param[4] = FW_PARAM_PFVF(PBL_START);
4676                 param[5] = FW_PARAM_PFVF(PBL_END);
4677                 rc = -t4_query_params(sc, sc->mbox, sc->pf, 0, 6, param, val);
4678                 if (rc != 0) {
4679                         device_printf(sc->dev,
4680                             "failed to query RDMA parameters(1): %d.\n", rc);
4681                         return (rc);
4682                 }
4683                 sc->vres.stag.start = val[0];
4684                 sc->vres.stag.size = val[1] - val[0] + 1;
4685                 sc->vres.rq.start = val[2];
4686                 sc->vres.rq.size = val[3] - val[2] + 1;
4687                 sc->vres.pbl.start = val[4];
4688                 sc->vres.pbl.size = val[5] - val[4] + 1;
4689
4690                 param[0] = FW_PARAM_PFVF(SQRQ_START);
4691                 param[1] = FW_PARAM_PFVF(SQRQ_END);
4692                 param[2] = FW_PARAM_PFVF(CQ_START);
4693                 param[3] = FW_PARAM_PFVF(CQ_END);
4694                 param[4] = FW_PARAM_PFVF(OCQ_START);
4695                 param[5] = FW_PARAM_PFVF(OCQ_END);
4696                 rc = -t4_query_params(sc, sc->mbox, sc->pf, 0, 6, param, val);
4697                 if (rc != 0) {
4698                         device_printf(sc->dev,
4699                             "failed to query RDMA parameters(2): %d.\n", rc);
4700                         return (rc);
4701                 }
4702                 sc->vres.qp.start = val[0];
4703                 sc->vres.qp.size = val[1] - val[0] + 1;
4704                 sc->vres.cq.start = val[2];
4705                 sc->vres.cq.size = val[3] - val[2] + 1;
4706                 sc->vres.ocq.start = val[4];
4707                 sc->vres.ocq.size = val[5] - val[4] + 1;
4708
4709                 param[0] = FW_PARAM_PFVF(SRQ_START);
4710                 param[1] = FW_PARAM_PFVF(SRQ_END);
4711                 param[2] = FW_PARAM_DEV(MAXORDIRD_QP);
4712                 param[3] = FW_PARAM_DEV(MAXIRD_ADAPTER);
4713                 rc = -t4_query_params(sc, sc->mbox, sc->pf, 0, 4, param, val);
4714                 if (rc != 0) {
4715                         device_printf(sc->dev,
4716                             "failed to query RDMA parameters(3): %d.\n", rc);
4717                         return (rc);
4718                 }
4719                 sc->vres.srq.start = val[0];
4720                 sc->vres.srq.size = val[1] - val[0] + 1;
4721                 sc->params.max_ordird_qp = val[2];
4722                 sc->params.max_ird_adapter = val[3];
4723         }
4724         if (sc->iscsicaps) {
4725                 param[0] = FW_PARAM_PFVF(ISCSI_START);
4726                 param[1] = FW_PARAM_PFVF(ISCSI_END);
4727                 rc = -t4_query_params(sc, sc->mbox, sc->pf, 0, 2, param, val);
4728                 if (rc != 0) {
4729                         device_printf(sc->dev,
4730                             "failed to query iSCSI parameters: %d.\n", rc);
4731                         return (rc);
4732                 }
4733                 sc->vres.iscsi.start = val[0];
4734                 sc->vres.iscsi.size = val[1] - val[0] + 1;
4735         }
4736         if (sc->cryptocaps & FW_CAPS_CONFIG_TLSKEYS) {
4737                 param[0] = FW_PARAM_PFVF(TLS_START);
4738                 param[1] = FW_PARAM_PFVF(TLS_END);
4739                 rc = -t4_query_params(sc, sc->mbox, sc->pf, 0, 2, param, val);
4740                 if (rc != 0) {
4741                         device_printf(sc->dev,
4742                             "failed to query TLS parameters: %d.\n", rc);
4743                         return (rc);
4744                 }
4745                 sc->vres.key.start = val[0];
4746                 sc->vres.key.size = val[1] - val[0] + 1;
4747         }
4748
4749         t4_init_sge_params(sc);
4750
4751         /*
4752          * We've got the params we wanted to query via the firmware.  Now grab
4753          * some others directly from the chip.
4754          */
4755         rc = t4_read_chip_settings(sc);
4756
4757         return (rc);
4758 }
4759
4760 #ifdef KERN_TLS
4761 static void
4762 ktls_tick(void *arg)
4763 {
4764         struct adapter *sc;
4765         uint32_t tstamp;
4766
4767         sc = arg;
4768
4769         tstamp = tcp_ts_getticks();
4770         t4_write_reg(sc, A_TP_SYNC_TIME_HI, tstamp >> 1);
4771         t4_write_reg(sc, A_TP_SYNC_TIME_LO, tstamp << 31);
4772
4773         callout_schedule_sbt(&sc->ktls_tick, SBT_1MS, 0, C_HARDCLOCK);
4774 }
4775
4776 static void
4777 t4_enable_kern_tls(struct adapter *sc)
4778 {
4779         uint32_t m, v;
4780
4781         m = F_ENABLECBYP;
4782         v = F_ENABLECBYP;
4783         t4_set_reg_field(sc, A_TP_PARA_REG6, m, v);
4784
4785         m = F_CPL_FLAGS_UPDATE_EN | F_SEQ_UPDATE_EN;
4786         v = F_CPL_FLAGS_UPDATE_EN | F_SEQ_UPDATE_EN;
4787         t4_set_reg_field(sc, A_ULP_TX_CONFIG, m, v);
4788
4789         m = F_NICMODE;
4790         v = F_NICMODE;
4791         t4_set_reg_field(sc, A_TP_IN_CONFIG, m, v);
4792
4793         m = F_LOOKUPEVERYPKT;
4794         v = 0;
4795         t4_set_reg_field(sc, A_TP_INGRESS_CONFIG, m, v);
4796
4797         m = F_TXDEFERENABLE | F_DISABLEWINDOWPSH | F_DISABLESEPPSHFLAG;
4798         v = F_DISABLEWINDOWPSH;
4799         t4_set_reg_field(sc, A_TP_PC_CONFIG, m, v);
4800
4801         m = V_TIMESTAMPRESOLUTION(M_TIMESTAMPRESOLUTION);
4802         v = V_TIMESTAMPRESOLUTION(0x1f);
4803         t4_set_reg_field(sc, A_TP_TIMER_RESOLUTION, m, v);
4804
4805         sc->flags |= KERN_TLS_OK;
4806
4807         sc->tlst.inline_keys = t4_tls_inline_keys;
4808         sc->tlst.combo_wrs = t4_tls_combo_wrs;
4809 }
4810 #endif
4811
4812 static int
4813 set_params__post_init(struct adapter *sc)
4814 {
4815         uint32_t mask, param, val;
4816 #ifdef TCP_OFFLOAD
4817         int i, v, shift;
4818 #endif
4819
4820         /* ask for encapsulated CPLs */
4821         param = FW_PARAM_PFVF(CPLFW4MSG_ENCAP);
4822         val = 1;
4823         (void)t4_set_params(sc, sc->mbox, sc->pf, 0, 1, &param, &val);
4824
4825         /* Enable 32b port caps if the firmware supports it. */
4826         param = FW_PARAM_PFVF(PORT_CAPS32);
4827         val = 1;
4828         if (t4_set_params(sc, sc->mbox, sc->pf, 0, 1, &param, &val) == 0)
4829                 sc->params.port_caps32 = 1;
4830
4831         /* Let filter + maskhash steer to a part of the VI's RSS region. */
4832         val = 1 << (G_MASKSIZE(t4_read_reg(sc, A_TP_RSS_CONFIG_TNL)) - 1);
4833         t4_set_reg_field(sc, A_TP_RSS_CONFIG_TNL, V_MASKFILTER(M_MASKFILTER),
4834             V_MASKFILTER(val - 1));
4835
4836         mask = F_DROPERRORANY | F_DROPERRORMAC | F_DROPERRORIPVER |
4837             F_DROPERRORFRAG | F_DROPERRORATTACK | F_DROPERRORETHHDRLEN |
4838             F_DROPERRORIPHDRLEN | F_DROPERRORTCPHDRLEN | F_DROPERRORPKTLEN |
4839             F_DROPERRORTCPOPT | F_DROPERRORCSUMIP | F_DROPERRORCSUM;
4840         val = 0;
4841         if (chip_id(sc) < CHELSIO_T6 && t4_attack_filter != 0) {
4842                 t4_set_reg_field(sc, A_TP_GLOBAL_CONFIG, F_ATTACKFILTERENABLE,
4843                     F_ATTACKFILTERENABLE);
4844                 val |= F_DROPERRORATTACK;
4845         }
4846         if (t4_drop_ip_fragments != 0) {
4847                 t4_set_reg_field(sc, A_TP_GLOBAL_CONFIG, F_FRAGMENTDROP,
4848                     F_FRAGMENTDROP);
4849                 val |= F_DROPERRORFRAG;
4850         }
4851         if (t4_drop_pkts_with_l2_errors != 0)
4852                 val |= F_DROPERRORMAC | F_DROPERRORETHHDRLEN;
4853         if (t4_drop_pkts_with_l3_errors != 0) {
4854                 val |= F_DROPERRORIPVER | F_DROPERRORIPHDRLEN |
4855                     F_DROPERRORCSUMIP;
4856         }
4857         if (t4_drop_pkts_with_l4_errors != 0) {
4858                 val |= F_DROPERRORTCPHDRLEN | F_DROPERRORPKTLEN |
4859                     F_DROPERRORTCPOPT | F_DROPERRORCSUM;
4860         }
4861         t4_set_reg_field(sc, A_TP_ERR_CONFIG, mask, val);
4862
4863 #ifdef TCP_OFFLOAD
4864         /*
4865          * Override the TOE timers with user provided tunables.  This is not the
4866          * recommended way to change the timers (the firmware config file is) so
4867          * these tunables are not documented.
4868          *
4869          * All the timer tunables are in microseconds.
4870          */
4871         if (t4_toe_keepalive_idle != 0) {
4872                 v = us_to_tcp_ticks(sc, t4_toe_keepalive_idle);
4873                 v &= M_KEEPALIVEIDLE;
4874                 t4_set_reg_field(sc, A_TP_KEEP_IDLE,
4875                     V_KEEPALIVEIDLE(M_KEEPALIVEIDLE), V_KEEPALIVEIDLE(v));
4876         }
4877         if (t4_toe_keepalive_interval != 0) {
4878                 v = us_to_tcp_ticks(sc, t4_toe_keepalive_interval);
4879                 v &= M_KEEPALIVEINTVL;
4880                 t4_set_reg_field(sc, A_TP_KEEP_INTVL,
4881                     V_KEEPALIVEINTVL(M_KEEPALIVEINTVL), V_KEEPALIVEINTVL(v));
4882         }
4883         if (t4_toe_keepalive_count != 0) {
4884                 v = t4_toe_keepalive_count & M_KEEPALIVEMAXR2;
4885                 t4_set_reg_field(sc, A_TP_SHIFT_CNT,
4886                     V_KEEPALIVEMAXR1(M_KEEPALIVEMAXR1) |
4887                     V_KEEPALIVEMAXR2(M_KEEPALIVEMAXR2),
4888                     V_KEEPALIVEMAXR1(1) | V_KEEPALIVEMAXR2(v));
4889         }
4890         if (t4_toe_rexmt_min != 0) {
4891                 v = us_to_tcp_ticks(sc, t4_toe_rexmt_min);
4892                 v &= M_RXTMIN;
4893                 t4_set_reg_field(sc, A_TP_RXT_MIN,
4894                     V_RXTMIN(M_RXTMIN), V_RXTMIN(v));
4895         }
4896         if (t4_toe_rexmt_max != 0) {
4897                 v = us_to_tcp_ticks(sc, t4_toe_rexmt_max);
4898                 v &= M_RXTMAX;
4899                 t4_set_reg_field(sc, A_TP_RXT_MAX,
4900                     V_RXTMAX(M_RXTMAX), V_RXTMAX(v));
4901         }
4902         if (t4_toe_rexmt_count != 0) {
4903                 v = t4_toe_rexmt_count & M_RXTSHIFTMAXR2;
4904                 t4_set_reg_field(sc, A_TP_SHIFT_CNT,
4905                     V_RXTSHIFTMAXR1(M_RXTSHIFTMAXR1) |
4906                     V_RXTSHIFTMAXR2(M_RXTSHIFTMAXR2),
4907                     V_RXTSHIFTMAXR1(1) | V_RXTSHIFTMAXR2(v));
4908         }
4909         for (i = 0; i < nitems(t4_toe_rexmt_backoff); i++) {
4910                 if (t4_toe_rexmt_backoff[i] != -1) {
4911                         v = t4_toe_rexmt_backoff[i] & M_TIMERBACKOFFINDEX0;
4912                         shift = (i & 3) << 3;
4913                         t4_set_reg_field(sc, A_TP_TCP_BACKOFF_REG0 + (i & ~3),
4914                             M_TIMERBACKOFFINDEX0 << shift, v << shift);
4915                 }
4916         }
4917 #endif
4918
4919 #ifdef KERN_TLS
4920         if (t4_kern_tls != 0 && sc->cryptocaps & FW_CAPS_CONFIG_TLSKEYS &&
4921             sc->toecaps & FW_CAPS_CONFIG_TOE)
4922                 t4_enable_kern_tls(sc);
4923 #endif
4924         return (0);
4925 }
4926
4927 #undef FW_PARAM_PFVF
4928 #undef FW_PARAM_DEV
4929
4930 static void
4931 t4_set_desc(struct adapter *sc)
4932 {
4933         char buf[128];
4934         struct adapter_params *p = &sc->params;
4935
4936         snprintf(buf, sizeof(buf), "Chelsio %s", p->vpd.id);
4937
4938         device_set_desc_copy(sc->dev, buf);
4939 }
4940
4941 static inline void
4942 ifmedia_add4(struct ifmedia *ifm, int m)
4943 {
4944
4945         ifmedia_add(ifm, m, 0, NULL);
4946         ifmedia_add(ifm, m | IFM_ETH_TXPAUSE, 0, NULL);
4947         ifmedia_add(ifm, m | IFM_ETH_RXPAUSE, 0, NULL);
4948         ifmedia_add(ifm, m | IFM_ETH_TXPAUSE | IFM_ETH_RXPAUSE, 0, NULL);
4949 }
4950
4951 /*
4952  * This is the selected media, which is not quite the same as the active media.
4953  * The media line in ifconfig is "media: Ethernet selected (active)" if selected
4954  * and active are not the same, and "media: Ethernet selected" otherwise.
4955  */
4956 static void
4957 set_current_media(struct port_info *pi)
4958 {
4959         struct link_config *lc;
4960         struct ifmedia *ifm;
4961         int mword;
4962         u_int speed;
4963
4964         PORT_LOCK_ASSERT_OWNED(pi);
4965
4966         /* Leave current media alone if it's already set to IFM_NONE. */
4967         ifm = &pi->media;
4968         if (ifm->ifm_cur != NULL &&
4969             IFM_SUBTYPE(ifm->ifm_cur->ifm_media) == IFM_NONE)
4970                 return;
4971
4972         lc = &pi->link_cfg;
4973         if (lc->requested_aneg != AUTONEG_DISABLE &&
4974             lc->pcaps & FW_PORT_CAP32_ANEG) {
4975                 ifmedia_set(ifm, IFM_ETHER | IFM_AUTO);
4976                 return;
4977         }
4978         mword = IFM_ETHER | IFM_FDX;
4979         if (lc->requested_fc & PAUSE_TX)
4980                 mword |= IFM_ETH_TXPAUSE;
4981         if (lc->requested_fc & PAUSE_RX)
4982                 mword |= IFM_ETH_RXPAUSE;
4983         if (lc->requested_speed == 0)
4984                 speed = port_top_speed(pi) * 1000;      /* Gbps -> Mbps */
4985         else
4986                 speed = lc->requested_speed;
4987         mword |= port_mword(pi, speed_to_fwcap(speed));
4988         ifmedia_set(ifm, mword);
4989 }
4990
4991 /*
4992  * Returns true if the ifmedia list for the port cannot change.
4993  */
4994 static bool
4995 fixed_ifmedia(struct port_info *pi)
4996 {
4997
4998         return (pi->port_type == FW_PORT_TYPE_BT_SGMII ||
4999             pi->port_type == FW_PORT_TYPE_BT_XFI ||
5000             pi->port_type == FW_PORT_TYPE_BT_XAUI ||
5001             pi->port_type == FW_PORT_TYPE_KX4 ||
5002             pi->port_type == FW_PORT_TYPE_KX ||
5003             pi->port_type == FW_PORT_TYPE_KR ||
5004             pi->port_type == FW_PORT_TYPE_BP_AP ||
5005             pi->port_type == FW_PORT_TYPE_BP4_AP ||
5006             pi->port_type == FW_PORT_TYPE_BP40_BA ||
5007             pi->port_type == FW_PORT_TYPE_KR4_100G ||
5008             pi->port_type == FW_PORT_TYPE_KR_SFP28 ||
5009             pi->port_type == FW_PORT_TYPE_KR_XLAUI);
5010 }
5011
5012 static void
5013 build_medialist(struct port_info *pi)
5014 {
5015         uint32_t ss, speed;
5016         int unknown, mword, bit;
5017         struct link_config *lc;
5018         struct ifmedia *ifm;
5019
5020         PORT_LOCK_ASSERT_OWNED(pi);
5021
5022         if (pi->flags & FIXED_IFMEDIA)
5023                 return;
5024
5025         /*
5026          * Rebuild the ifmedia list.
5027          */
5028         ifm = &pi->media;
5029         ifmedia_removeall(ifm);
5030         lc = &pi->link_cfg;
5031         ss = G_FW_PORT_CAP32_SPEED(lc->pcaps); /* Supported Speeds */
5032         if (__predict_false(ss == 0)) { /* not supposed to happen. */
5033                 MPASS(ss != 0);
5034 no_media:
5035                 MPASS(LIST_EMPTY(&ifm->ifm_list));
5036                 ifmedia_add(ifm, IFM_ETHER | IFM_NONE, 0, NULL);
5037                 ifmedia_set(ifm, IFM_ETHER | IFM_NONE);
5038                 return;
5039         }
5040
5041         unknown = 0;
5042         for (bit = S_FW_PORT_CAP32_SPEED; bit < fls(ss); bit++) {
5043                 speed = 1 << bit;
5044                 MPASS(speed & M_FW_PORT_CAP32_SPEED);
5045                 if (ss & speed) {
5046                         mword = port_mword(pi, speed);
5047                         if (mword == IFM_NONE) {
5048                                 goto no_media;
5049                         } else if (mword == IFM_UNKNOWN)
5050                                 unknown++;
5051                         else
5052                                 ifmedia_add4(ifm, IFM_ETHER | IFM_FDX | mword);
5053                 }
5054         }
5055         if (unknown > 0) /* Add one unknown for all unknown media types. */
5056                 ifmedia_add4(ifm, IFM_ETHER | IFM_FDX | IFM_UNKNOWN);
5057         if (lc->pcaps & FW_PORT_CAP32_ANEG)
5058                 ifmedia_add(ifm, IFM_ETHER | IFM_AUTO, 0, NULL);
5059
5060         set_current_media(pi);
5061 }
5062
5063 /*
5064  * Initialize the requested fields in the link config based on driver tunables.
5065  */
5066 static void
5067 init_link_config(struct port_info *pi)
5068 {
5069         struct link_config *lc = &pi->link_cfg;
5070
5071         PORT_LOCK_ASSERT_OWNED(pi);
5072
5073         lc->requested_speed = 0;
5074
5075         if (t4_autoneg == 0)
5076                 lc->requested_aneg = AUTONEG_DISABLE;
5077         else if (t4_autoneg == 1)
5078                 lc->requested_aneg = AUTONEG_ENABLE;
5079         else
5080                 lc->requested_aneg = AUTONEG_AUTO;
5081
5082         lc->requested_fc = t4_pause_settings & (PAUSE_TX | PAUSE_RX |
5083             PAUSE_AUTONEG);
5084
5085         if (t4_fec & FEC_AUTO)
5086                 lc->requested_fec = FEC_AUTO;
5087         else if (t4_fec == 0)
5088                 lc->requested_fec = FEC_NONE;
5089         else {
5090                 /* -1 is handled by the FEC_AUTO block above and not here. */
5091                 lc->requested_fec = t4_fec &
5092                     (FEC_RS | FEC_BASER_RS | FEC_NONE | FEC_MODULE);
5093                 if (lc->requested_fec == 0)
5094                         lc->requested_fec = FEC_AUTO;
5095         }
5096 }
5097
5098 /*
5099  * Makes sure that all requested settings comply with what's supported by the
5100  * port.  Returns the number of settings that were invalid and had to be fixed.
5101  */
5102 static int
5103 fixup_link_config(struct port_info *pi)
5104 {
5105         int n = 0;
5106         struct link_config *lc = &pi->link_cfg;
5107         uint32_t fwspeed;
5108
5109         PORT_LOCK_ASSERT_OWNED(pi);
5110
5111         /* Speed (when not autonegotiating) */
5112         if (lc->requested_speed != 0) {
5113                 fwspeed = speed_to_fwcap(lc->requested_speed);
5114                 if ((fwspeed & lc->pcaps) == 0) {
5115                         n++;
5116                         lc->requested_speed = 0;
5117                 }
5118         }
5119
5120         /* Link autonegotiation */
5121         MPASS(lc->requested_aneg == AUTONEG_ENABLE ||
5122             lc->requested_aneg == AUTONEG_DISABLE ||
5123             lc->requested_aneg == AUTONEG_AUTO);
5124         if (lc->requested_aneg == AUTONEG_ENABLE &&
5125             !(lc->pcaps & FW_PORT_CAP32_ANEG)) {
5126                 n++;
5127                 lc->requested_aneg = AUTONEG_AUTO;
5128         }
5129
5130         /* Flow control */
5131         MPASS((lc->requested_fc & ~(PAUSE_TX | PAUSE_RX | PAUSE_AUTONEG)) == 0);
5132         if (lc->requested_fc & PAUSE_TX &&
5133             !(lc->pcaps & FW_PORT_CAP32_FC_TX)) {
5134                 n++;
5135                 lc->requested_fc &= ~PAUSE_TX;
5136         }
5137         if (lc->requested_fc & PAUSE_RX &&
5138             !(lc->pcaps & FW_PORT_CAP32_FC_RX)) {
5139                 n++;
5140                 lc->requested_fc &= ~PAUSE_RX;
5141         }
5142         if (!(lc->requested_fc & PAUSE_AUTONEG) &&
5143             !(lc->pcaps & FW_PORT_CAP32_FORCE_PAUSE)) {
5144                 n++;
5145                 lc->requested_fc |= PAUSE_AUTONEG;
5146         }
5147
5148         /* FEC */
5149         if ((lc->requested_fec & FEC_RS &&
5150             !(lc->pcaps & FW_PORT_CAP32_FEC_RS)) ||
5151             (lc->requested_fec & FEC_BASER_RS &&
5152             !(lc->pcaps & FW_PORT_CAP32_FEC_BASER_RS))) {
5153                 n++;
5154                 lc->requested_fec = FEC_AUTO;
5155         }
5156
5157         return (n);
5158 }
5159
5160 /*
5161  * Apply the requested L1 settings, which are expected to be valid, to the
5162  * hardware.
5163  */
5164 static int
5165 apply_link_config(struct port_info *pi)
5166 {
5167         struct adapter *sc = pi->adapter;
5168         struct link_config *lc = &pi->link_cfg;
5169         int rc;
5170
5171 #ifdef INVARIANTS
5172         ASSERT_SYNCHRONIZED_OP(sc);
5173         PORT_LOCK_ASSERT_OWNED(pi);
5174
5175         if (lc->requested_aneg == AUTONEG_ENABLE)
5176                 MPASS(lc->pcaps & FW_PORT_CAP32_ANEG);
5177         if (!(lc->requested_fc & PAUSE_AUTONEG))
5178                 MPASS(lc->pcaps & FW_PORT_CAP32_FORCE_PAUSE);
5179         if (lc->requested_fc & PAUSE_TX)
5180                 MPASS(lc->pcaps & FW_PORT_CAP32_FC_TX);
5181         if (lc->requested_fc & PAUSE_RX)
5182                 MPASS(lc->pcaps & FW_PORT_CAP32_FC_RX);
5183         if (lc->requested_fec & FEC_RS)
5184                 MPASS(lc->pcaps & FW_PORT_CAP32_FEC_RS);
5185         if (lc->requested_fec & FEC_BASER_RS)
5186                 MPASS(lc->pcaps & FW_PORT_CAP32_FEC_BASER_RS);
5187 #endif
5188         rc = -t4_link_l1cfg(sc, sc->mbox, pi->tx_chan, lc);
5189         if (rc != 0) {
5190                 /* Don't complain if the VF driver gets back an EPERM. */
5191                 if (!(sc->flags & IS_VF) || rc != FW_EPERM)
5192                         device_printf(pi->dev, "l1cfg failed: %d\n", rc);
5193         } else {
5194                 /*
5195                  * An L1_CFG will almost always result in a link-change event if
5196                  * the link is up, and the driver will refresh the actual
5197                  * fec/fc/etc. when the notification is processed.  If the link
5198                  * is down then the actual settings are meaningless.
5199                  *
5200                  * This takes care of the case where a change in the L1 settings
5201                  * may not result in a notification.
5202                  */
5203                 if (lc->link_ok && !(lc->requested_fc & PAUSE_AUTONEG))
5204                         lc->fc = lc->requested_fc & (PAUSE_TX | PAUSE_RX);
5205         }
5206         return (rc);
5207 }
5208
5209 #define FW_MAC_EXACT_CHUNK      7
5210 struct mcaddr_ctx {
5211         struct ifnet *ifp;
5212         const uint8_t *mcaddr[FW_MAC_EXACT_CHUNK];
5213         uint64_t hash;
5214         int i;
5215         int del;
5216         int rc;
5217 };
5218
5219 static u_int
5220 add_maddr(void *arg, struct sockaddr_dl *sdl, u_int cnt)
5221 {
5222         struct mcaddr_ctx *ctx = arg;
5223         struct vi_info *vi = ctx->ifp->if_softc;
5224         struct port_info *pi = vi->pi;
5225         struct adapter *sc = pi->adapter;
5226
5227         if (ctx->rc < 0)
5228                 return (0);
5229
5230         ctx->mcaddr[ctx->i] = LLADDR(sdl);
5231         MPASS(ETHER_IS_MULTICAST(ctx->mcaddr[ctx->i]));
5232         ctx->i++;
5233
5234         if (ctx->i == FW_MAC_EXACT_CHUNK) {
5235                 ctx->rc = t4_alloc_mac_filt(sc, sc->mbox, vi->viid, ctx->del,
5236                     ctx->i, ctx->mcaddr, NULL, &ctx->hash, 0);
5237                 if (ctx->rc < 0) {
5238                         int j;
5239
5240                         for (j = 0; j < ctx->i; j++) {
5241                                 if_printf(ctx->ifp,
5242                                     "failed to add mc address"
5243                                     " %02x:%02x:%02x:"
5244                                     "%02x:%02x:%02x rc=%d\n",
5245                                     ctx->mcaddr[j][0], ctx->mcaddr[j][1],
5246                                     ctx->mcaddr[j][2], ctx->mcaddr[j][3],
5247                                     ctx->mcaddr[j][4], ctx->mcaddr[j][5],
5248                                     -ctx->rc);
5249                         }
5250                         return (0);
5251                 }
5252                 ctx->del = 0;
5253                 ctx->i = 0;
5254         }
5255
5256         return (1);
5257 }
5258
5259 /*
5260  * Program the port's XGMAC based on parameters in ifnet.  The caller also
5261  * indicates which parameters should be programmed (the rest are left alone).
5262  */
5263 int
5264 update_mac_settings(struct ifnet *ifp, int flags)
5265 {
5266         int rc = 0;
5267         struct vi_info *vi = ifp->if_softc;
5268         struct port_info *pi = vi->pi;
5269         struct adapter *sc = pi->adapter;
5270         int mtu = -1, promisc = -1, allmulti = -1, vlanex = -1;
5271         uint8_t match_all_mac[ETHER_ADDR_LEN] = {0};
5272
5273         ASSERT_SYNCHRONIZED_OP(sc);
5274         KASSERT(flags, ("%s: not told what to update.", __func__));
5275
5276         if (flags & XGMAC_MTU)
5277                 mtu = ifp->if_mtu;
5278
5279         if (flags & XGMAC_PROMISC)
5280                 promisc = ifp->if_flags & IFF_PROMISC ? 1 : 0;
5281
5282         if (flags & XGMAC_ALLMULTI)
5283                 allmulti = ifp->if_flags & IFF_ALLMULTI ? 1 : 0;
5284
5285         if (flags & XGMAC_VLANEX)
5286                 vlanex = ifp->if_capenable & IFCAP_VLAN_HWTAGGING ? 1 : 0;
5287
5288         if (flags & (XGMAC_MTU|XGMAC_PROMISC|XGMAC_ALLMULTI|XGMAC_VLANEX)) {
5289                 rc = -t4_set_rxmode(sc, sc->mbox, vi->viid, mtu, promisc,
5290                     allmulti, 1, vlanex, false);
5291                 if (rc) {
5292                         if_printf(ifp, "set_rxmode (%x) failed: %d\n", flags,
5293                             rc);
5294                         return (rc);
5295                 }
5296         }
5297
5298         if (flags & XGMAC_UCADDR) {
5299                 uint8_t ucaddr[ETHER_ADDR_LEN];
5300
5301                 bcopy(IF_LLADDR(ifp), ucaddr, sizeof(ucaddr));
5302                 rc = t4_change_mac(sc, sc->mbox, vi->viid, vi->xact_addr_filt,
5303                     ucaddr, true, &vi->smt_idx);
5304                 if (rc < 0) {
5305                         rc = -rc;
5306                         if_printf(ifp, "change_mac failed: %d\n", rc);
5307                         return (rc);
5308                 } else {
5309                         vi->xact_addr_filt = rc;
5310                         rc = 0;
5311                 }
5312         }
5313
5314         if (flags & XGMAC_MCADDRS) {
5315                 struct epoch_tracker et;
5316                 struct mcaddr_ctx ctx;
5317                 int j;
5318
5319                 ctx.ifp = ifp;
5320                 ctx.hash = 0;
5321                 ctx.i = 0;
5322                 ctx.del = 1;
5323                 ctx.rc = 0;
5324                 /*
5325                  * Unlike other drivers, we accumulate list of pointers into
5326                  * interface address lists and we need to keep it safe even
5327                  * after if_foreach_llmaddr() returns, thus we must enter the
5328                  * network epoch.
5329                  */
5330                 NET_EPOCH_ENTER(et);
5331                 if_foreach_llmaddr(ifp, add_maddr, &ctx);
5332                 if (ctx.rc < 0) {
5333                         NET_EPOCH_EXIT(et);
5334                         rc = -ctx.rc;
5335                         return (rc);
5336                 }
5337                 if (ctx.i > 0) {
5338                         rc = t4_alloc_mac_filt(sc, sc->mbox, vi->viid,
5339                             ctx.del, ctx.i, ctx.mcaddr, NULL, &ctx.hash, 0);
5340                         NET_EPOCH_EXIT(et);
5341                         if (rc < 0) {
5342                                 rc = -rc;
5343                                 for (j = 0; j < ctx.i; j++) {
5344                                         if_printf(ifp,
5345                                             "failed to add mcast address"
5346                                             " %02x:%02x:%02x:"
5347                                             "%02x:%02x:%02x rc=%d\n",
5348                                             ctx.mcaddr[j][0], ctx.mcaddr[j][1],
5349                                             ctx.mcaddr[j][2], ctx.mcaddr[j][3],
5350                                             ctx.mcaddr[j][4], ctx.mcaddr[j][5],
5351                                             rc);
5352                                 }
5353                                 return (rc);
5354                         }
5355                         ctx.del = 0;
5356                 } else
5357                         NET_EPOCH_EXIT(et);
5358
5359                 rc = -t4_set_addr_hash(sc, sc->mbox, vi->viid, 0, ctx.hash, 0);
5360                 if (rc != 0)
5361                         if_printf(ifp, "failed to set mcast address hash: %d\n",
5362                             rc);
5363                 if (ctx.del == 0) {
5364                         /* We clobbered the VXLAN entry if there was one. */
5365                         pi->vxlan_tcam_entry = false;
5366                 }
5367         }
5368
5369         if (IS_MAIN_VI(vi) && sc->vxlan_refcount > 0 &&
5370             pi->vxlan_tcam_entry == false) {
5371                 rc = t4_alloc_raw_mac_filt(sc, vi->viid, match_all_mac,
5372                     match_all_mac, sc->rawf_base + pi->port_id, 1, pi->port_id,
5373                     true);
5374                 if (rc < 0) {
5375                         rc = -rc;
5376                         if_printf(ifp, "failed to add VXLAN TCAM entry: %d.\n",
5377                             rc);
5378                 } else {
5379                         MPASS(rc == sc->rawf_base + pi->port_id);
5380                         rc = 0;
5381                         pi->vxlan_tcam_entry = true;
5382                 }
5383         }
5384
5385         return (rc);
5386 }
5387
5388 /*
5389  * {begin|end}_synchronized_op must be called from the same thread.
5390  */
5391 int
5392 begin_synchronized_op(struct adapter *sc, struct vi_info *vi, int flags,
5393     char *wmesg)
5394 {
5395         int rc, pri;
5396
5397 #ifdef WITNESS
5398         /* the caller thinks it's ok to sleep, but is it really? */
5399         if (flags & SLEEP_OK)
5400                 WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK, NULL,
5401                     "begin_synchronized_op");
5402 #endif
5403
5404         if (INTR_OK)
5405                 pri = PCATCH;
5406         else
5407                 pri = 0;
5408
5409         ADAPTER_LOCK(sc);
5410         for (;;) {
5411
5412                 if (vi && IS_DOOMED(vi)) {
5413                         rc = ENXIO;
5414                         goto done;
5415                 }
5416
5417                 if (!IS_BUSY(sc)) {
5418                         rc = 0;
5419                         break;
5420                 }
5421
5422                 if (!(flags & SLEEP_OK)) {
5423                         rc = EBUSY;
5424                         goto done;
5425                 }
5426
5427                 if (mtx_sleep(&sc->flags, &sc->sc_lock, pri, wmesg, 0)) {
5428                         rc = EINTR;
5429                         goto done;
5430                 }
5431         }
5432
5433         KASSERT(!IS_BUSY(sc), ("%s: controller busy.", __func__));
5434         SET_BUSY(sc);
5435 #ifdef INVARIANTS
5436         sc->last_op = wmesg;
5437         sc->last_op_thr = curthread;
5438         sc->last_op_flags = flags;
5439 #endif
5440
5441 done:
5442         if (!(flags & HOLD_LOCK) || rc)
5443                 ADAPTER_UNLOCK(sc);
5444
5445         return (rc);
5446 }
5447
5448 /*
5449  * Tell if_ioctl and if_init that the VI is going away.  This is
5450  * special variant of begin_synchronized_op and must be paired with a
5451  * call to end_synchronized_op.
5452  */
5453 void
5454 doom_vi(struct adapter *sc, struct vi_info *vi)
5455 {
5456
5457         ADAPTER_LOCK(sc);
5458         SET_DOOMED(vi);
5459         wakeup(&sc->flags);
5460         while (IS_BUSY(sc))
5461                 mtx_sleep(&sc->flags, &sc->sc_lock, 0, "t4detach", 0);
5462         SET_BUSY(sc);
5463 #ifdef INVARIANTS
5464         sc->last_op = "t4detach";
5465         sc->last_op_thr = curthread;
5466         sc->last_op_flags = 0;
5467 #endif
5468         ADAPTER_UNLOCK(sc);
5469 }
5470
5471 /*
5472  * {begin|end}_synchronized_op must be called from the same thread.
5473  */
5474 void
5475 end_synchronized_op(struct adapter *sc, int flags)
5476 {
5477
5478         if (flags & LOCK_HELD)
5479                 ADAPTER_LOCK_ASSERT_OWNED(sc);
5480         else
5481                 ADAPTER_LOCK(sc);
5482
5483         KASSERT(IS_BUSY(sc), ("%s: controller not busy.", __func__));
5484         CLR_BUSY(sc);
5485         wakeup(&sc->flags);
5486         ADAPTER_UNLOCK(sc);
5487 }
5488
5489 static int
5490 cxgbe_init_synchronized(struct vi_info *vi)
5491 {
5492         struct port_info *pi = vi->pi;
5493         struct adapter *sc = pi->adapter;
5494         struct ifnet *ifp = vi->ifp;
5495         int rc = 0, i;
5496         struct sge_txq *txq;
5497
5498         ASSERT_SYNCHRONIZED_OP(sc);
5499
5500         if (ifp->if_drv_flags & IFF_DRV_RUNNING)
5501                 return (0);     /* already running */
5502
5503         if (!(sc->flags & FULL_INIT_DONE) &&
5504             ((rc = adapter_full_init(sc)) != 0))
5505                 return (rc);    /* error message displayed already */
5506
5507         if (!(vi->flags & VI_INIT_DONE) &&
5508             ((rc = vi_full_init(vi)) != 0))
5509                 return (rc); /* error message displayed already */
5510
5511         rc = update_mac_settings(ifp, XGMAC_ALL);
5512         if (rc)
5513                 goto done;      /* error message displayed already */
5514
5515         PORT_LOCK(pi);
5516         if (pi->up_vis == 0) {
5517                 t4_update_port_info(pi);
5518                 fixup_link_config(pi);
5519                 build_medialist(pi);
5520                 apply_link_config(pi);
5521         }
5522
5523         rc = -t4_enable_vi(sc, sc->mbox, vi->viid, true, true);
5524         if (rc != 0) {
5525                 if_printf(ifp, "enable_vi failed: %d\n", rc);
5526                 PORT_UNLOCK(pi);
5527                 goto done;
5528         }
5529
5530         /*
5531          * Can't fail from this point onwards.  Review cxgbe_uninit_synchronized
5532          * if this changes.
5533          */
5534
5535         for_each_txq(vi, i, txq) {
5536                 TXQ_LOCK(txq);
5537                 txq->eq.flags |= EQ_ENABLED;
5538                 TXQ_UNLOCK(txq);
5539         }
5540
5541         /*
5542          * The first iq of the first port to come up is used for tracing.
5543          */
5544         if (sc->traceq < 0 && IS_MAIN_VI(vi)) {
5545                 sc->traceq = sc->sge.rxq[vi->first_rxq].iq.abs_id;
5546                 t4_write_reg(sc, is_t4(sc) ?  A_MPS_TRC_RSS_CONTROL :
5547                     A_MPS_T5_TRC_RSS_CONTROL, V_RSSCONTROL(pi->tx_chan) |
5548                     V_QUEUENUMBER(sc->traceq));
5549                 pi->flags |= HAS_TRACEQ;
5550         }
5551
5552         /* all ok */
5553         pi->up_vis++;
5554         ifp->if_drv_flags |= IFF_DRV_RUNNING;
5555
5556         if (pi->nvi > 1 || sc->flags & IS_VF)
5557                 callout_reset(&vi->tick, hz, vi_tick, vi);
5558         else
5559                 callout_reset(&pi->tick, hz, cxgbe_tick, pi);
5560         if (pi->link_cfg.link_ok)
5561                 t4_os_link_changed(pi);
5562         PORT_UNLOCK(pi);
5563 done:
5564         if (rc != 0)
5565                 cxgbe_uninit_synchronized(vi);
5566
5567         return (rc);
5568 }
5569
5570 /*
5571  * Idempotent.
5572  */
5573 static int
5574 cxgbe_uninit_synchronized(struct vi_info *vi)
5575 {
5576         struct port_info *pi = vi->pi;
5577         struct adapter *sc = pi->adapter;
5578         struct ifnet *ifp = vi->ifp;
5579         int rc, i;
5580         struct sge_txq *txq;
5581
5582         ASSERT_SYNCHRONIZED_OP(sc);
5583
5584         if (!(vi->flags & VI_INIT_DONE)) {
5585                 if (__predict_false(ifp->if_drv_flags & IFF_DRV_RUNNING)) {
5586                         KASSERT(0, ("uninited VI is running"));
5587                         if_printf(ifp, "uninited VI with running ifnet.  "
5588                             "vi->flags 0x%016lx, if_flags 0x%08x, "
5589                             "if_drv_flags 0x%08x\n", vi->flags, ifp->if_flags,
5590                             ifp->if_drv_flags);
5591                 }
5592                 return (0);
5593         }
5594
5595         /*
5596          * Disable the VI so that all its data in either direction is discarded
5597          * by the MPS.  Leave everything else (the queues, interrupts, and 1Hz
5598          * tick) intact as the TP can deliver negative advice or data that it's
5599          * holding in its RAM (for an offloaded connection) even after the VI is
5600          * disabled.
5601          */
5602         rc = -t4_enable_vi(sc, sc->mbox, vi->viid, false, false);
5603         if (rc) {
5604                 if_printf(ifp, "disable_vi failed: %d\n", rc);
5605                 return (rc);
5606         }
5607
5608         for_each_txq(vi, i, txq) {
5609                 TXQ_LOCK(txq);
5610                 txq->eq.flags &= ~EQ_ENABLED;
5611                 TXQ_UNLOCK(txq);
5612         }
5613
5614         PORT_LOCK(pi);
5615         if (pi->nvi > 1 || sc->flags & IS_VF)
5616                 callout_stop(&vi->tick);
5617         else
5618                 callout_stop(&pi->tick);
5619         if (!(ifp->if_drv_flags & IFF_DRV_RUNNING)) {
5620                 PORT_UNLOCK(pi);
5621                 return (0);
5622         }
5623         ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
5624         pi->up_vis--;
5625         if (pi->up_vis > 0) {
5626                 PORT_UNLOCK(pi);
5627                 return (0);
5628         }
5629
5630         pi->link_cfg.link_ok = false;
5631         pi->link_cfg.speed = 0;
5632         pi->link_cfg.link_down_rc = 255;
5633         t4_os_link_changed(pi);
5634         PORT_UNLOCK(pi);
5635
5636         return (0);
5637 }
5638
5639 /*
5640  * It is ok for this function to fail midway and return right away.  t4_detach
5641  * will walk the entire sc->irq list and clean up whatever is valid.
5642  */
5643 int
5644 t4_setup_intr_handlers(struct adapter *sc)
5645 {
5646         int rc, rid, p, q, v;
5647         char s[8];
5648         struct irq *irq;
5649         struct port_info *pi;
5650         struct vi_info *vi;
5651         struct sge *sge = &sc->sge;
5652         struct sge_rxq *rxq;
5653 #ifdef TCP_OFFLOAD
5654         struct sge_ofld_rxq *ofld_rxq;
5655 #endif
5656 #ifdef DEV_NETMAP
5657         struct sge_nm_rxq *nm_rxq;
5658 #endif
5659 #ifdef RSS
5660         int nbuckets = rss_getnumbuckets();
5661 #endif
5662
5663         /*
5664          * Setup interrupts.
5665          */
5666         irq = &sc->irq[0];
5667         rid = sc->intr_type == INTR_INTX ? 0 : 1;
5668         if (forwarding_intr_to_fwq(sc))
5669                 return (t4_alloc_irq(sc, irq, rid, t4_intr_all, sc, "all"));
5670
5671         /* Multiple interrupts. */
5672         if (sc->flags & IS_VF)
5673                 KASSERT(sc->intr_count >= T4VF_EXTRA_INTR + sc->params.nports,
5674                     ("%s: too few intr.", __func__));
5675         else
5676                 KASSERT(sc->intr_count >= T4_EXTRA_INTR + sc->params.nports,
5677                     ("%s: too few intr.", __func__));
5678
5679         /* The first one is always error intr on PFs */
5680         if (!(sc->flags & IS_VF)) {
5681                 rc = t4_alloc_irq(sc, irq, rid, t4_intr_err, sc, "err");
5682                 if (rc != 0)
5683                         return (rc);
5684                 irq++;
5685                 rid++;
5686         }
5687
5688         /* The second one is always the firmware event queue (first on VFs) */
5689         rc = t4_alloc_irq(sc, irq, rid, t4_intr_evt, &sge->fwq, "evt");
5690         if (rc != 0)
5691                 return (rc);
5692         irq++;
5693         rid++;
5694
5695         for_each_port(sc, p) {
5696                 pi = sc->port[p];
5697                 for_each_vi(pi, v, vi) {
5698                         vi->first_intr = rid - 1;
5699
5700                         if (vi->nnmrxq > 0) {
5701                                 int n = max(vi->nrxq, vi->nnmrxq);
5702
5703                                 rxq = &sge->rxq[vi->first_rxq];
5704 #ifdef DEV_NETMAP
5705                                 nm_rxq = &sge->nm_rxq[vi->first_nm_rxq];
5706 #endif
5707                                 for (q = 0; q < n; q++) {
5708                                         snprintf(s, sizeof(s), "%x%c%x", p,
5709                                             'a' + v, q);
5710                                         if (q < vi->nrxq)
5711                                                 irq->rxq = rxq++;
5712 #ifdef DEV_NETMAP
5713                                         if (q < vi->nnmrxq)
5714                                                 irq->nm_rxq = nm_rxq++;
5715
5716                                         if (irq->nm_rxq != NULL &&
5717                                             irq->rxq == NULL) {
5718                                                 /* Netmap rx only */
5719                                                 rc = t4_alloc_irq(sc, irq, rid,
5720                                                     t4_nm_intr, irq->nm_rxq, s);
5721                                         }
5722                                         if (irq->nm_rxq != NULL &&
5723                                             irq->rxq != NULL) {
5724                                                 /* NIC and Netmap rx */
5725                                                 rc = t4_alloc_irq(sc, irq, rid,
5726                                                     t4_vi_intr, irq, s);
5727                                         }
5728 #endif
5729                                         if (irq->rxq != NULL &&
5730                                             irq->nm_rxq == NULL) {
5731                                                 /* NIC rx only */
5732                                                 rc = t4_alloc_irq(sc, irq, rid,
5733                                                     t4_intr, irq->rxq, s);
5734                                         }
5735                                         if (rc != 0)
5736                                                 return (rc);
5737 #ifdef RSS
5738                                         if (q < vi->nrxq) {
5739                                                 bus_bind_intr(sc->dev, irq->res,
5740                                                     rss_getcpu(q % nbuckets));
5741                                         }
5742 #endif
5743                                         irq++;
5744                                         rid++;
5745                                         vi->nintr++;
5746                                 }
5747                         } else {
5748                                 for_each_rxq(vi, q, rxq) {
5749                                         snprintf(s, sizeof(s), "%x%c%x", p,
5750                                             'a' + v, q);
5751                                         rc = t4_alloc_irq(sc, irq, rid,
5752                                             t4_intr, rxq, s);
5753                                         if (rc != 0)
5754                                                 return (rc);
5755 #ifdef RSS
5756                                         bus_bind_intr(sc->dev, irq->res,
5757                                             rss_getcpu(q % nbuckets));
5758 #endif
5759                                         irq++;
5760                                         rid++;
5761                                         vi->nintr++;
5762                                 }
5763                         }
5764 #ifdef TCP_OFFLOAD
5765                         for_each_ofld_rxq(vi, q, ofld_rxq) {
5766                                 snprintf(s, sizeof(s), "%x%c%x", p, 'A' + v, q);
5767                                 rc = t4_alloc_irq(sc, irq, rid, t4_intr,
5768                                     ofld_rxq, s);
5769                                 if (rc != 0)
5770                                         return (rc);
5771                                 irq++;
5772                                 rid++;
5773                                 vi->nintr++;
5774                         }
5775 #endif
5776                 }
5777         }
5778         MPASS(irq == &sc->irq[sc->intr_count]);
5779
5780         return (0);
5781 }
5782
5783 int
5784 adapter_full_init(struct adapter *sc)
5785 {
5786         int rc, i;
5787 #ifdef RSS
5788         uint32_t raw_rss_key[RSS_KEYSIZE / sizeof(uint32_t)];
5789         uint32_t rss_key[RSS_KEYSIZE / sizeof(uint32_t)];
5790 #endif
5791
5792         ASSERT_SYNCHRONIZED_OP(sc);
5793         ADAPTER_LOCK_ASSERT_NOTOWNED(sc);
5794         KASSERT((sc->flags & FULL_INIT_DONE) == 0,
5795             ("%s: FULL_INIT_DONE already", __func__));
5796
5797         /*
5798          * queues that belong to the adapter (not any particular port).
5799          */
5800         rc = t4_setup_adapter_queues(sc);
5801         if (rc != 0)
5802                 goto done;
5803
5804         for (i = 0; i < nitems(sc->tq); i++) {
5805                 sc->tq[i] = taskqueue_create("t4 taskq", M_NOWAIT,
5806                     taskqueue_thread_enqueue, &sc->tq[i]);
5807                 if (sc->tq[i] == NULL) {
5808                         device_printf(sc->dev,
5809                             "failed to allocate task queue %d\n", i);
5810                         rc = ENOMEM;
5811                         goto done;
5812                 }
5813                 taskqueue_start_threads(&sc->tq[i], 1, PI_NET, "%s tq%d",
5814                     device_get_nameunit(sc->dev), i);
5815         }
5816 #ifdef RSS
5817         MPASS(RSS_KEYSIZE == 40);
5818         rss_getkey((void *)&raw_rss_key[0]);
5819         for (i = 0; i < nitems(rss_key); i++) {
5820                 rss_key[i] = htobe32(raw_rss_key[nitems(rss_key) - 1 - i]);
5821         }
5822         t4_write_rss_key(sc, &rss_key[0], -1, 1);
5823 #endif
5824
5825         if (!(sc->flags & IS_VF))
5826                 t4_intr_enable(sc);
5827 #ifdef KERN_TLS
5828         if (sc->flags & KERN_TLS_OK)
5829                 callout_reset_sbt(&sc->ktls_tick, SBT_1MS, 0, ktls_tick, sc,
5830                     C_HARDCLOCK);
5831 #endif
5832         sc->flags |= FULL_INIT_DONE;
5833 done:
5834         if (rc != 0)
5835                 adapter_full_uninit(sc);
5836
5837         return (rc);
5838 }
5839
5840 int
5841 adapter_full_uninit(struct adapter *sc)
5842 {
5843         int i;
5844
5845         ADAPTER_LOCK_ASSERT_NOTOWNED(sc);
5846
5847         t4_teardown_adapter_queues(sc);
5848
5849         for (i = 0; i < nitems(sc->tq) && sc->tq[i]; i++) {
5850                 taskqueue_free(sc->tq[i]);
5851                 sc->tq[i] = NULL;
5852         }
5853
5854         sc->flags &= ~FULL_INIT_DONE;
5855
5856         return (0);
5857 }
5858
5859 #ifdef RSS
5860 #define SUPPORTED_RSS_HASHTYPES (RSS_HASHTYPE_RSS_IPV4 | \
5861     RSS_HASHTYPE_RSS_TCP_IPV4 | RSS_HASHTYPE_RSS_IPV6 | \
5862     RSS_HASHTYPE_RSS_TCP_IPV6 | RSS_HASHTYPE_RSS_UDP_IPV4 | \
5863     RSS_HASHTYPE_RSS_UDP_IPV6)
5864
5865 /* Translates kernel hash types to hardware. */
5866 static int
5867 hashconfig_to_hashen(int hashconfig)
5868 {
5869         int hashen = 0;
5870
5871         if (hashconfig & RSS_HASHTYPE_RSS_IPV4)
5872                 hashen |= F_FW_RSS_VI_CONFIG_CMD_IP4TWOTUPEN;
5873         if (hashconfig & RSS_HASHTYPE_RSS_IPV6)
5874                 hashen |= F_FW_RSS_VI_CONFIG_CMD_IP6TWOTUPEN;
5875         if (hashconfig & RSS_HASHTYPE_RSS_UDP_IPV4) {
5876                 hashen |= F_FW_RSS_VI_CONFIG_CMD_UDPEN |
5877                     F_FW_RSS_VI_CONFIG_CMD_IP4FOURTUPEN;
5878         }
5879         if (hashconfig & RSS_HASHTYPE_RSS_UDP_IPV6) {
5880                 hashen |= F_FW_RSS_VI_CONFIG_CMD_UDPEN |
5881                     F_FW_RSS_VI_CONFIG_CMD_IP6FOURTUPEN;
5882         }
5883         if (hashconfig & RSS_HASHTYPE_RSS_TCP_IPV4)
5884                 hashen |= F_FW_RSS_VI_CONFIG_CMD_IP4FOURTUPEN;
5885         if (hashconfig & RSS_HASHTYPE_RSS_TCP_IPV6)
5886                 hashen |= F_FW_RSS_VI_CONFIG_CMD_IP6FOURTUPEN;
5887
5888         return (hashen);
5889 }
5890
5891 /* Translates hardware hash types to kernel. */
5892 static int
5893 hashen_to_hashconfig(int hashen)
5894 {
5895         int hashconfig = 0;
5896
5897         if (hashen & F_FW_RSS_VI_CONFIG_CMD_UDPEN) {
5898                 /*
5899                  * If UDP hashing was enabled it must have been enabled for
5900                  * either IPv4 or IPv6 (inclusive or).  Enabling UDP without
5901                  * enabling any 4-tuple hash is nonsense configuration.
5902                  */
5903                 MPASS(hashen & (F_FW_RSS_VI_CONFIG_CMD_IP4FOURTUPEN |
5904                     F_FW_RSS_VI_CONFIG_CMD_IP6FOURTUPEN));
5905
5906                 if (hashen & F_FW_RSS_VI_CONFIG_CMD_IP4FOURTUPEN)
5907                         hashconfig |= RSS_HASHTYPE_RSS_UDP_IPV4;
5908                 if (hashen & F_FW_RSS_VI_CONFIG_CMD_IP6FOURTUPEN)
5909                         hashconfig |= RSS_HASHTYPE_RSS_UDP_IPV6;
5910         }
5911         if (hashen & F_FW_RSS_VI_CONFIG_CMD_IP4FOURTUPEN)
5912                 hashconfig |= RSS_HASHTYPE_RSS_TCP_IPV4;
5913         if (hashen & F_FW_RSS_VI_CONFIG_CMD_IP6FOURTUPEN)
5914                 hashconfig |= RSS_HASHTYPE_RSS_TCP_IPV6;
5915         if (hashen & F_FW_RSS_VI_CONFIG_CMD_IP4TWOTUPEN)
5916                 hashconfig |= RSS_HASHTYPE_RSS_IPV4;
5917         if (hashen & F_FW_RSS_VI_CONFIG_CMD_IP6TWOTUPEN)
5918                 hashconfig |= RSS_HASHTYPE_RSS_IPV6;
5919
5920         return (hashconfig);
5921 }
5922 #endif
5923
5924 int
5925 vi_full_init(struct vi_info *vi)
5926 {
5927         struct adapter *sc = vi->adapter;
5928         struct ifnet *ifp = vi->ifp;
5929         uint16_t *rss;
5930         struct sge_rxq *rxq;
5931         int rc, i, j;
5932 #ifdef RSS
5933         int nbuckets = rss_getnumbuckets();
5934         int hashconfig = rss_gethashconfig();
5935         int extra;
5936 #endif
5937
5938         ASSERT_SYNCHRONIZED_OP(sc);
5939         KASSERT((vi->flags & VI_INIT_DONE) == 0,
5940             ("%s: VI_INIT_DONE already", __func__));
5941
5942         sysctl_ctx_init(&vi->ctx);
5943         vi->flags |= VI_SYSCTL_CTX;
5944
5945         /*
5946          * Allocate tx/rx/fl queues for this VI.
5947          */
5948         rc = t4_setup_vi_queues(vi);
5949         if (rc != 0)
5950                 goto done;      /* error message displayed already */
5951
5952         /*
5953          * Setup RSS for this VI.  Save a copy of the RSS table for later use.
5954          */
5955         if (vi->nrxq > vi->rss_size) {
5956                 if_printf(ifp, "nrxq (%d) > hw RSS table size (%d); "
5957                     "some queues will never receive traffic.\n", vi->nrxq,
5958                     vi->rss_size);
5959         } else if (vi->rss_size % vi->nrxq) {
5960                 if_printf(ifp, "nrxq (%d), hw RSS table size (%d); "
5961                     "expect uneven traffic distribution.\n", vi->nrxq,
5962                     vi->rss_size);
5963         }
5964 #ifdef RSS
5965         if (vi->nrxq != nbuckets) {
5966                 if_printf(ifp, "nrxq (%d) != kernel RSS buckets (%d);"
5967                     "performance will be impacted.\n", vi->nrxq, nbuckets);
5968         }
5969 #endif
5970         rss = malloc(vi->rss_size * sizeof (*rss), M_CXGBE, M_ZERO | M_WAITOK);
5971         for (i = 0; i < vi->rss_size;) {
5972 #ifdef RSS
5973                 j = rss_get_indirection_to_bucket(i);
5974                 j %= vi->nrxq;
5975                 rxq = &sc->sge.rxq[vi->first_rxq + j];
5976                 rss[i++] = rxq->iq.abs_id;
5977 #else
5978                 for_each_rxq(vi, j, rxq) {
5979                         rss[i++] = rxq->iq.abs_id;
5980                         if (i == vi->rss_size)
5981                                 break;
5982                 }
5983 #endif
5984         }
5985
5986         rc = -t4_config_rss_range(sc, sc->mbox, vi->viid, 0, vi->rss_size, rss,
5987             vi->rss_size);
5988         if (rc != 0) {
5989                 free(rss, M_CXGBE);
5990                 if_printf(ifp, "rss_config failed: %d\n", rc);
5991                 goto done;
5992         }
5993
5994 #ifdef RSS
5995         vi->hashen = hashconfig_to_hashen(hashconfig);
5996
5997         /*
5998          * We may have had to enable some hashes even though the global config
5999          * wants them disabled.  This is a potential problem that must be
6000          * reported to the user.
6001          */
6002         extra = hashen_to_hashconfig(vi->hashen) ^ hashconfig;
6003
6004         /*
6005          * If we consider only the supported hash types, then the enabled hashes
6006          * are a superset of the requested hashes.  In other words, there cannot
6007          * be any supported hash that was requested but not enabled, but there
6008          * can be hashes that were not requested but had to be enabled.
6009          */
6010         extra &= SUPPORTED_RSS_HASHTYPES;
6011         MPASS((extra & hashconfig) == 0);
6012
6013         if (extra) {
6014                 if_printf(ifp,
6015                     "global RSS config (0x%x) cannot be accommodated.\n",
6016                     hashconfig);
6017         }
6018         if (extra & RSS_HASHTYPE_RSS_IPV4)
6019                 if_printf(ifp, "IPv4 2-tuple hashing forced on.\n");
6020         if (extra & RSS_HASHTYPE_RSS_TCP_IPV4)
6021                 if_printf(ifp, "TCP/IPv4 4-tuple hashing forced on.\n");
6022         if (extra & RSS_HASHTYPE_RSS_IPV6)
6023                 if_printf(ifp, "IPv6 2-tuple hashing forced on.\n");
6024         if (extra & RSS_HASHTYPE_RSS_TCP_IPV6)
6025                 if_printf(ifp, "TCP/IPv6 4-tuple hashing forced on.\n");
6026         if (extra & RSS_HASHTYPE_RSS_UDP_IPV4)
6027                 if_printf(ifp, "UDP/IPv4 4-tuple hashing forced on.\n");
6028         if (extra & RSS_HASHTYPE_RSS_UDP_IPV6)
6029                 if_printf(ifp, "UDP/IPv6 4-tuple hashing forced on.\n");
6030 #else
6031         vi->hashen = F_FW_RSS_VI_CONFIG_CMD_IP6FOURTUPEN |
6032             F_FW_RSS_VI_CONFIG_CMD_IP6TWOTUPEN |
6033             F_FW_RSS_VI_CONFIG_CMD_IP4FOURTUPEN |
6034             F_FW_RSS_VI_CONFIG_CMD_IP4TWOTUPEN | F_FW_RSS_VI_CONFIG_CMD_UDPEN;
6035 #endif
6036         rc = -t4_config_vi_rss(sc, sc->mbox, vi->viid, vi->hashen, rss[0], 0, 0);
6037         if (rc != 0) {
6038                 free(rss, M_CXGBE);
6039                 if_printf(ifp, "rss hash/defaultq config failed: %d\n", rc);
6040                 goto done;
6041         }
6042
6043         vi->rss = rss;
6044         vi->flags |= VI_INIT_DONE;
6045 done:
6046         if (rc != 0)
6047                 vi_full_uninit(vi);
6048
6049         return (rc);
6050 }
6051
6052 /*
6053  * Idempotent.
6054  */
6055 int
6056 vi_full_uninit(struct vi_info *vi)
6057 {
6058         struct port_info *pi = vi->pi;
6059         struct adapter *sc = pi->adapter;
6060         int i;
6061         struct sge_rxq *rxq;
6062         struct sge_txq *txq;
6063 #ifdef TCP_OFFLOAD
6064         struct sge_ofld_rxq *ofld_rxq;
6065 #endif
6066 #if defined(TCP_OFFLOAD) || defined(RATELIMIT)
6067         struct sge_wrq *ofld_txq;
6068 #endif
6069
6070         if (vi->flags & VI_INIT_DONE) {
6071
6072                 /* Need to quiesce queues.  */
6073
6074                 /* XXX: Only for the first VI? */
6075                 if (IS_MAIN_VI(vi) && !(sc->flags & IS_VF))
6076                         quiesce_wrq(sc, &sc->sge.ctrlq[pi->port_id]);
6077
6078                 for_each_txq(vi, i, txq) {
6079                         quiesce_txq(sc, txq);
6080                 }
6081
6082 #if defined(TCP_OFFLOAD) || defined(RATELIMIT)
6083                 for_each_ofld_txq(vi, i, ofld_txq) {
6084                         quiesce_wrq(sc, ofld_txq);
6085                 }
6086 #endif
6087
6088                 for_each_rxq(vi, i, rxq) {
6089                         quiesce_iq(sc, &rxq->iq);
6090                         quiesce_fl(sc, &rxq->fl);
6091                 }
6092
6093 #ifdef TCP_OFFLOAD
6094                 for_each_ofld_rxq(vi, i, ofld_rxq) {
6095                         quiesce_iq(sc, &ofld_rxq->iq);
6096                         quiesce_fl(sc, &ofld_rxq->fl);
6097                 }
6098 #endif
6099                 free(vi->rss, M_CXGBE);
6100                 free(vi->nm_rss, M_CXGBE);
6101         }
6102
6103         t4_teardown_vi_queues(vi);
6104         vi->flags &= ~VI_INIT_DONE;
6105
6106         return (0);
6107 }
6108
6109 static void
6110 quiesce_txq(struct adapter *sc, struct sge_txq *txq)
6111 {
6112         struct sge_eq *eq = &txq->eq;
6113         struct sge_qstat *spg = (void *)&eq->desc[eq->sidx];
6114
6115         (void) sc;      /* unused */
6116
6117 #ifdef INVARIANTS
6118         TXQ_LOCK(txq);
6119         MPASS((eq->flags & EQ_ENABLED) == 0);
6120         TXQ_UNLOCK(txq);
6121 #endif
6122
6123         /* Wait for the mp_ring to empty. */
6124         while (!mp_ring_is_idle(txq->r)) {
6125                 mp_ring_check_drainage(txq->r, 4096);
6126                 pause("rquiesce", 1);
6127         }
6128
6129         /* Then wait for the hardware to finish. */
6130         while (spg->cidx != htobe16(eq->pidx))
6131                 pause("equiesce", 1);
6132
6133         /* Finally, wait for the driver to reclaim all descriptors. */
6134         while (eq->cidx != eq->pidx)
6135                 pause("dquiesce", 1);
6136 }
6137
6138 static void
6139 quiesce_wrq(struct adapter *sc, struct sge_wrq *wrq)
6140 {
6141
6142         /* XXXTX */
6143 }
6144
6145 static void
6146 quiesce_iq(struct adapter *sc, struct sge_iq *iq)
6147 {
6148         (void) sc;      /* unused */
6149
6150         /* Synchronize with the interrupt handler */
6151         while (!atomic_cmpset_int(&iq->state, IQS_IDLE, IQS_DISABLED))
6152                 pause("iqfree", 1);
6153 }
6154
6155 static void
6156 quiesce_fl(struct adapter *sc, struct sge_fl *fl)
6157 {
6158         mtx_lock(&sc->sfl_lock);
6159         FL_LOCK(fl);
6160         fl->flags |= FL_DOOMED;
6161         FL_UNLOCK(fl);
6162         callout_stop(&sc->sfl_callout);
6163         mtx_unlock(&sc->sfl_lock);
6164
6165         KASSERT((fl->flags & FL_STARVING) == 0,
6166             ("%s: still starving", __func__));
6167 }
6168
6169 static int
6170 t4_alloc_irq(struct adapter *sc, struct irq *irq, int rid,
6171     driver_intr_t *handler, void *arg, char *name)
6172 {
6173         int rc;
6174
6175         irq->rid = rid;
6176         irq->res = bus_alloc_resource_any(sc->dev, SYS_RES_IRQ, &irq->rid,
6177             RF_SHAREABLE | RF_ACTIVE);
6178         if (irq->res == NULL) {
6179                 device_printf(sc->dev,
6180                     "failed to allocate IRQ for rid %d, name %s.\n", rid, name);
6181                 return (ENOMEM);
6182         }
6183
6184         rc = bus_setup_intr(sc->dev, irq->res, INTR_MPSAFE | INTR_TYPE_NET,
6185             NULL, handler, arg, &irq->tag);
6186         if (rc != 0) {
6187                 device_printf(sc->dev,
6188                     "failed to setup interrupt for rid %d, name %s: %d\n",
6189                     rid, name, rc);
6190         } else if (name)
6191                 bus_describe_intr(sc->dev, irq->res, irq->tag, "%s", name);
6192
6193         return (rc);
6194 }
6195
6196 static int
6197 t4_free_irq(struct adapter *sc, struct irq *irq)
6198 {
6199         if (irq->tag)
6200                 bus_teardown_intr(sc->dev, irq->res, irq->tag);
6201         if (irq->res)
6202                 bus_release_resource(sc->dev, SYS_RES_IRQ, irq->rid, irq->res);
6203
6204         bzero(irq, sizeof(*irq));
6205
6206         return (0);
6207 }
6208
6209 static void
6210 get_regs(struct adapter *sc, struct t4_regdump *regs, uint8_t *buf)
6211 {
6212
6213         regs->version = chip_id(sc) | chip_rev(sc) << 10;
6214         t4_get_regs(sc, buf, regs->len);
6215 }
6216
6217 #define A_PL_INDIR_CMD  0x1f8
6218
6219 #define S_PL_AUTOINC    31
6220 #define M_PL_AUTOINC    0x1U
6221 #define V_PL_AUTOINC(x) ((x) << S_PL_AUTOINC)
6222 #define G_PL_AUTOINC(x) (((x) >> S_PL_AUTOINC) & M_PL_AUTOINC)
6223
6224 #define S_PL_VFID       20
6225 #define M_PL_VFID       0xffU
6226 #define V_PL_VFID(x)    ((x) << S_PL_VFID)
6227 #define G_PL_VFID(x)    (((x) >> S_PL_VFID) & M_PL_VFID)
6228
6229 #define S_PL_ADDR       0
6230 #define M_PL_ADDR       0xfffffU
6231 #define V_PL_ADDR(x)    ((x) << S_PL_ADDR)
6232 #define G_PL_ADDR(x)    (((x) >> S_PL_ADDR) & M_PL_ADDR)
6233
6234 #define A_PL_INDIR_DATA 0x1fc
6235
6236 static uint64_t
6237 read_vf_stat(struct adapter *sc, u_int vin, int reg)
6238 {
6239         u32 stats[2];
6240
6241         mtx_assert(&sc->reg_lock, MA_OWNED);
6242         if (sc->flags & IS_VF) {
6243                 stats[0] = t4_read_reg(sc, VF_MPS_REG(reg));
6244                 stats[1] = t4_read_reg(sc, VF_MPS_REG(reg + 4));
6245         } else {
6246                 t4_write_reg(sc, A_PL_INDIR_CMD, V_PL_AUTOINC(1) |
6247                     V_PL_VFID(vin) | V_PL_ADDR(VF_MPS_REG(reg)));
6248                 stats[0] = t4_read_reg(sc, A_PL_INDIR_DATA);
6249                 stats[1] = t4_read_reg(sc, A_PL_INDIR_DATA);
6250         }
6251         return (((uint64_t)stats[1]) << 32 | stats[0]);
6252 }
6253
6254 static void
6255 t4_get_vi_stats(struct adapter *sc, u_int vin, struct fw_vi_stats_vf *stats)
6256 {
6257
6258 #define GET_STAT(name) \
6259         read_vf_stat(sc, vin, A_MPS_VF_STAT_##name##_L)
6260
6261         stats->tx_bcast_bytes    = GET_STAT(TX_VF_BCAST_BYTES);
6262         stats->tx_bcast_frames   = GET_STAT(TX_VF_BCAST_FRAMES);
6263         stats->tx_mcast_bytes    = GET_STAT(TX_VF_MCAST_BYTES);
6264         stats->tx_mcast_frames   = GET_STAT(TX_VF_MCAST_FRAMES);
6265         stats->tx_ucast_bytes    = GET_STAT(TX_VF_UCAST_BYTES);
6266         stats->tx_ucast_frames   = GET_STAT(TX_VF_UCAST_FRAMES);
6267         stats->tx_drop_frames    = GET_STAT(TX_VF_DROP_FRAMES);
6268         stats->tx_offload_bytes  = GET_STAT(TX_VF_OFFLOAD_BYTES);
6269         stats->tx_offload_frames = GET_STAT(TX_VF_OFFLOAD_FRAMES);
6270         stats->rx_bcast_bytes    = GET_STAT(RX_VF_BCAST_BYTES);
6271         stats->rx_bcast_frames   = GET_STAT(RX_VF_BCAST_FRAMES);
6272         stats->rx_mcast_bytes    = GET_STAT(RX_VF_MCAST_BYTES);
6273         stats->rx_mcast_frames   = GET_STAT(RX_VF_MCAST_FRAMES);
6274         stats->rx_ucast_bytes    = GET_STAT(RX_VF_UCAST_BYTES);
6275         stats->rx_ucast_frames   = GET_STAT(RX_VF_UCAST_FRAMES);
6276         stats->rx_err_frames     = GET_STAT(RX_VF_ERR_FRAMES);
6277
6278 #undef GET_STAT
6279 }
6280
6281 static void
6282 t4_clr_vi_stats(struct adapter *sc, u_int vin)
6283 {
6284         int reg;
6285
6286         t4_write_reg(sc, A_PL_INDIR_CMD, V_PL_AUTOINC(1) | V_PL_VFID(vin) |
6287             V_PL_ADDR(VF_MPS_REG(A_MPS_VF_STAT_TX_VF_BCAST_BYTES_L)));
6288         for (reg = A_MPS_VF_STAT_TX_VF_BCAST_BYTES_L;
6289              reg <= A_MPS_VF_STAT_RX_VF_ERR_FRAMES_H; reg += 4)
6290                 t4_write_reg(sc, A_PL_INDIR_DATA, 0);
6291 }
6292
6293 static void
6294 vi_refresh_stats(struct adapter *sc, struct vi_info *vi)
6295 {
6296         struct timeval tv;
6297         const struct timeval interval = {0, 250000};    /* 250ms */
6298
6299         if (!(vi->flags & VI_INIT_DONE))
6300                 return;
6301
6302         getmicrotime(&tv);
6303         timevalsub(&tv, &interval);
6304         if (timevalcmp(&tv, &vi->last_refreshed, <))
6305                 return;
6306
6307         mtx_lock(&sc->reg_lock);
6308         t4_get_vi_stats(sc, vi->vin, &vi->stats);
6309         getmicrotime(&vi->last_refreshed);
6310         mtx_unlock(&sc->reg_lock);
6311 }
6312
6313 static void
6314 cxgbe_refresh_stats(struct adapter *sc, struct port_info *pi)
6315 {
6316         u_int i, v, tnl_cong_drops, chan_map;
6317         struct timeval tv;
6318         const struct timeval interval = {0, 250000};    /* 250ms */
6319
6320         getmicrotime(&tv);
6321         timevalsub(&tv, &interval);
6322         if (timevalcmp(&tv, &pi->last_refreshed, <))
6323                 return;
6324
6325         tnl_cong_drops = 0;
6326         t4_get_port_stats(sc, pi->tx_chan, &pi->stats);
6327         chan_map = pi->rx_e_chan_map;
6328         while (chan_map) {
6329                 i = ffs(chan_map) - 1;
6330                 mtx_lock(&sc->reg_lock);
6331                 t4_read_indirect(sc, A_TP_MIB_INDEX, A_TP_MIB_DATA, &v, 1,
6332                     A_TP_MIB_TNL_CNG_DROP_0 + i);
6333                 mtx_unlock(&sc->reg_lock);
6334                 tnl_cong_drops += v;
6335                 chan_map &= ~(1 << i);
6336         }
6337         pi->tnl_cong_drops = tnl_cong_drops;
6338         getmicrotime(&pi->last_refreshed);
6339 }
6340
6341 static void
6342 cxgbe_tick(void *arg)
6343 {
6344         struct port_info *pi = arg;
6345         struct adapter *sc = pi->adapter;
6346
6347         PORT_LOCK_ASSERT_OWNED(pi);
6348         cxgbe_refresh_stats(sc, pi);
6349
6350         callout_schedule(&pi->tick, hz);
6351 }
6352
6353 void
6354 vi_tick(void *arg)
6355 {
6356         struct vi_info *vi = arg;
6357         struct adapter *sc = vi->adapter;
6358
6359         vi_refresh_stats(sc, vi);
6360
6361         callout_schedule(&vi->tick, hz);
6362 }
6363
6364 /*
6365  * Should match fw_caps_config_<foo> enums in t4fw_interface.h
6366  */
6367 static char *caps_decoder[] = {
6368         "\20\001IPMI\002NCSI",                          /* 0: NBM */
6369         "\20\001PPP\002QFC\003DCBX",                    /* 1: link */
6370         "\20\001INGRESS\002EGRESS",                     /* 2: switch */
6371         "\20\001NIC\002VM\003IDS\004UM\005UM_ISGL"      /* 3: NIC */
6372             "\006HASHFILTER\007ETHOFLD",
6373         "\20\001TOE",                                   /* 4: TOE */
6374         "\20\001RDDP\002RDMAC",                         /* 5: RDMA */
6375         "\20\001INITIATOR_PDU\002TARGET_PDU"            /* 6: iSCSI */
6376             "\003INITIATOR_CNXOFLD\004TARGET_CNXOFLD"
6377             "\005INITIATOR_SSNOFLD\006TARGET_SSNOFLD"
6378             "\007T10DIF"
6379             "\010INITIATOR_CMDOFLD\011TARGET_CMDOFLD",
6380         "\20\001LOOKASIDE\002TLSKEYS",                  /* 7: Crypto */
6381         "\20\001INITIATOR\002TARGET\003CTRL_OFLD"       /* 8: FCoE */
6382                     "\004PO_INITIATOR\005PO_TARGET",
6383 };
6384
6385 void
6386 t4_sysctls(struct adapter *sc)
6387 {
6388         struct sysctl_ctx_list *ctx;
6389         struct sysctl_oid *oid;
6390         struct sysctl_oid_list *children, *c0;
6391         static char *doorbells = {"\20\1UDB\2WCWR\3UDBWC\4KDB"};
6392
6393         ctx = device_get_sysctl_ctx(sc->dev);
6394
6395         /*
6396          * dev.t4nex.X.
6397          */
6398         oid = device_get_sysctl_tree(sc->dev);
6399         c0 = children = SYSCTL_CHILDREN(oid);
6400
6401         sc->sc_do_rxcopy = 1;
6402         SYSCTL_ADD_INT(ctx, children, OID_AUTO, "do_rx_copy", CTLFLAG_RW,
6403             &sc->sc_do_rxcopy, 1, "Do RX copy of small frames");
6404
6405         SYSCTL_ADD_INT(ctx, children, OID_AUTO, "nports", CTLFLAG_RD, NULL,
6406             sc->params.nports, "# of ports");
6407
6408         SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "doorbells",
6409             CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, doorbells,
6410             (uintptr_t)&sc->doorbells, sysctl_bitfield_8b, "A",
6411             "available doorbells");
6412
6413         SYSCTL_ADD_INT(ctx, children, OID_AUTO, "core_clock", CTLFLAG_RD, NULL,
6414             sc->params.vpd.cclk, "core clock frequency (in KHz)");
6415
6416         SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "holdoff_timers",
6417             CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE,
6418             sc->params.sge.timer_val, sizeof(sc->params.sge.timer_val),
6419             sysctl_int_array, "A", "interrupt holdoff timer values (us)");
6420
6421         SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "holdoff_pkt_counts",
6422             CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE,
6423             sc->params.sge.counter_val, sizeof(sc->params.sge.counter_val),
6424             sysctl_int_array, "A", "interrupt holdoff packet counter values");
6425
6426         t4_sge_sysctls(sc, ctx, children);
6427
6428         sc->lro_timeout = 100;
6429         SYSCTL_ADD_INT(ctx, children, OID_AUTO, "lro_timeout", CTLFLAG_RW,
6430             &sc->lro_timeout, 0, "lro inactive-flush timeout (in us)");
6431
6432         SYSCTL_ADD_INT(ctx, children, OID_AUTO, "dflags", CTLFLAG_RW,
6433             &sc->debug_flags, 0, "flags to enable runtime debugging");
6434
6435         SYSCTL_ADD_STRING(ctx, children, OID_AUTO, "tp_version",
6436             CTLFLAG_RD, sc->tp_version, 0, "TP microcode version");
6437
6438         SYSCTL_ADD_STRING(ctx, children, OID_AUTO, "firmware_version",
6439             CTLFLAG_RD, sc->fw_version, 0, "firmware version");
6440
6441         if (sc->flags & IS_VF)
6442                 return;
6443
6444         SYSCTL_ADD_INT(ctx, children, OID_AUTO, "hw_revision", CTLFLAG_RD,
6445             NULL, chip_rev(sc), "chip hardware revision");
6446
6447         SYSCTL_ADD_STRING(ctx, children, OID_AUTO, "sn",
6448             CTLFLAG_RD, sc->params.vpd.sn, 0, "serial number");
6449
6450         SYSCTL_ADD_STRING(ctx, children, OID_AUTO, "pn",
6451             CTLFLAG_RD, sc->params.vpd.pn, 0, "part number");
6452
6453         SYSCTL_ADD_STRING(ctx, children, OID_AUTO, "ec",
6454             CTLFLAG_RD, sc->params.vpd.ec, 0, "engineering change");
6455
6456         SYSCTL_ADD_STRING(ctx, children, OID_AUTO, "md_version",
6457             CTLFLAG_RD, sc->params.vpd.md, 0, "manufacturing diags version");
6458
6459         SYSCTL_ADD_STRING(ctx, children, OID_AUTO, "na",
6460             CTLFLAG_RD, sc->params.vpd.na, 0, "network address");
6461
6462         SYSCTL_ADD_STRING(ctx, children, OID_AUTO, "er_version", CTLFLAG_RD,
6463             sc->er_version, 0, "expansion ROM version");
6464
6465         SYSCTL_ADD_STRING(ctx, children, OID_AUTO, "bs_version", CTLFLAG_RD,
6466             sc->bs_version, 0, "bootstrap firmware version");
6467
6468         SYSCTL_ADD_UINT(ctx, children, OID_AUTO, "scfg_version", CTLFLAG_RD,
6469             NULL, sc->params.scfg_vers, "serial config version");
6470
6471         SYSCTL_ADD_UINT(ctx, children, OID_AUTO, "vpd_version", CTLFLAG_RD,
6472             NULL, sc->params.vpd_vers, "VPD version");
6473
6474         SYSCTL_ADD_STRING(ctx, children, OID_AUTO, "cf",
6475             CTLFLAG_RD, sc->cfg_file, 0, "configuration file");
6476
6477         SYSCTL_ADD_UINT(ctx, children, OID_AUTO, "cfcsum", CTLFLAG_RD, NULL,
6478             sc->cfcsum, "config file checksum");
6479
6480 #define SYSCTL_CAP(name, n, text) \
6481         SYSCTL_ADD_PROC(ctx, children, OID_AUTO, #name, \
6482             CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, caps_decoder[n], \
6483             (uintptr_t)&sc->name, sysctl_bitfield_16b, "A", \
6484             "available " text " capabilities")
6485
6486         SYSCTL_CAP(nbmcaps, 0, "NBM");
6487         SYSCTL_CAP(linkcaps, 1, "link");
6488         SYSCTL_CAP(switchcaps, 2, "switch");
6489         SYSCTL_CAP(niccaps, 3, "NIC");
6490         SYSCTL_CAP(toecaps, 4, "TCP offload");
6491         SYSCTL_CAP(rdmacaps, 5, "RDMA");
6492         SYSCTL_CAP(iscsicaps, 6, "iSCSI");
6493         SYSCTL_CAP(cryptocaps, 7, "crypto");
6494         SYSCTL_CAP(fcoecaps, 8, "FCoE");
6495 #undef SYSCTL_CAP
6496
6497         SYSCTL_ADD_INT(ctx, children, OID_AUTO, "nfilters", CTLFLAG_RD,
6498             NULL, sc->tids.nftids, "number of filters");
6499
6500         SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "temperature",
6501             CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_MPSAFE, sc, 0,
6502             sysctl_temperature, "I", "chip temperature (in Celsius)");
6503         SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "reset_sensor",
6504             CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 0,
6505             sysctl_reset_sensor, "I", "reset the chip's temperature sensor.");
6506
6507         SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "loadavg",
6508             CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, sc, 0,
6509             sysctl_loadavg, "A",
6510             "microprocessor load averages (debug firmwares only)");
6511
6512         SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "core_vdd",
6513             CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_MPSAFE, sc, 0, sysctl_vdd,
6514             "I", "core Vdd (in mV)");
6515
6516         SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "local_cpus",
6517             CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, sc, LOCAL_CPUS,
6518             sysctl_cpus, "A", "local CPUs");
6519
6520         SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "intr_cpus",
6521             CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, sc, INTR_CPUS,
6522             sysctl_cpus, "A", "preferred CPUs for interrupts");
6523
6524         SYSCTL_ADD_INT(ctx, children, OID_AUTO, "swintr", CTLFLAG_RW,
6525             &sc->swintr, 0, "software triggered interrupts");
6526
6527         /*
6528          * dev.t4nex.X.misc.  Marked CTLFLAG_SKIP to avoid information overload.
6529          */
6530         oid = SYSCTL_ADD_NODE(ctx, c0, OID_AUTO, "misc",
6531             CTLFLAG_RD | CTLFLAG_SKIP | CTLFLAG_MPSAFE, NULL,
6532             "logs and miscellaneous information");
6533         children = SYSCTL_CHILDREN(oid);
6534
6535         SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "cctrl",
6536             CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, sc, 0,
6537             sysctl_cctrl, "A", "congestion control");
6538
6539         SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "cim_ibq_tp0",
6540             CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, sc, 0,
6541             sysctl_cim_ibq_obq, "A", "CIM IBQ 0 (TP0)");
6542
6543         SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "cim_ibq_tp1",
6544             CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, sc, 1,
6545             sysctl_cim_ibq_obq, "A", "CIM IBQ 1 (TP1)");
6546
6547         SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "cim_ibq_ulp",
6548             CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, sc, 2,
6549             sysctl_cim_ibq_obq, "A", "CIM IBQ 2 (ULP)");
6550
6551         SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "cim_ibq_sge0",
6552             CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, sc, 3,
6553             sysctl_cim_ibq_obq, "A", "CIM IBQ 3 (SGE0)");
6554
6555         SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "cim_ibq_sge1",
6556             CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, sc, 4,
6557             sysctl_cim_ibq_obq, "A", "CIM IBQ 4 (SGE1)");
6558
6559         SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "cim_ibq_ncsi",
6560             CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, sc, 5,
6561             sysctl_cim_ibq_obq, "A", "CIM IBQ 5 (NCSI)");
6562
6563         SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "cim_la",
6564             CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, sc, 0,
6565             sysctl_cim_la, "A", "CIM logic analyzer");
6566
6567         SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "cim_ma_la",
6568             CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, sc, 0,
6569             sysctl_cim_ma_la, "A", "CIM MA logic analyzer");
6570
6571         SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "cim_obq_ulp0",
6572             CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, sc,
6573             0 + CIM_NUM_IBQ, sysctl_cim_ibq_obq, "A", "CIM OBQ 0 (ULP0)");
6574
6575         SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "cim_obq_ulp1",
6576             CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, sc,
6577             1 + CIM_NUM_IBQ, sysctl_cim_ibq_obq, "A", "CIM OBQ 1 (ULP1)");
6578
6579         SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "cim_obq_ulp2",
6580             CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, sc,
6581             2 + CIM_NUM_IBQ, sysctl_cim_ibq_obq, "A", "CIM OBQ 2 (ULP2)");
6582
6583         SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "cim_obq_ulp3",
6584             CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, sc,
6585             3 + CIM_NUM_IBQ, sysctl_cim_ibq_obq, "A", "CIM OBQ 3 (ULP3)");
6586
6587         SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "cim_obq_sge",
6588             CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, sc,
6589             4 + CIM_NUM_IBQ, sysctl_cim_ibq_obq, "A", "CIM OBQ 4 (SGE)");
6590
6591         SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "cim_obq_ncsi",
6592             CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, sc,
6593             5 + CIM_NUM_IBQ, sysctl_cim_ibq_obq, "A", "CIM OBQ 5 (NCSI)");
6594
6595         if (chip_id(sc) > CHELSIO_T4) {
6596                 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "cim_obq_sge0_rx",
6597                     CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, sc,
6598                     6 + CIM_NUM_IBQ, sysctl_cim_ibq_obq, "A",
6599                     "CIM OBQ 6 (SGE0-RX)");
6600
6601                 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "cim_obq_sge1_rx",
6602                     CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, sc,
6603                     7 + CIM_NUM_IBQ, sysctl_cim_ibq_obq, "A",
6604                     "CIM OBQ 7 (SGE1-RX)");
6605         }
6606
6607         SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "cim_pif_la",
6608             CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, sc, 0,
6609             sysctl_cim_pif_la, "A", "CIM PIF logic analyzer");
6610
6611         SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "cim_qcfg",
6612             CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, sc, 0,
6613             sysctl_cim_qcfg, "A", "CIM queue configuration");
6614
6615         SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "cpl_stats",
6616             CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, sc, 0,
6617             sysctl_cpl_stats, "A", "CPL statistics");
6618
6619         SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "ddp_stats",
6620             CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, sc, 0,
6621             sysctl_ddp_stats, "A", "non-TCP DDP statistics");
6622
6623         SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "devlog",
6624             CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, sc, 0,
6625             sysctl_devlog, "A", "firmware's device log");
6626
6627         SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "fcoe_stats",
6628             CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, sc, 0,
6629             sysctl_fcoe_stats, "A", "FCoE statistics");
6630
6631         SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "hw_sched",
6632             CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, sc, 0,
6633             sysctl_hw_sched, "A", "hardware scheduler ");
6634
6635         SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "l2t",
6636             CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, sc, 0,
6637             sysctl_l2t, "A", "hardware L2 table");
6638
6639         SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "smt",
6640             CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, sc, 0,
6641             sysctl_smt, "A", "hardware source MAC table");
6642
6643 #ifdef INET6
6644         SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "clip",
6645             CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, sc, 0,
6646             sysctl_clip, "A", "active CLIP table entries");
6647 #endif
6648
6649         SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "lb_stats",
6650             CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, sc, 0,
6651             sysctl_lb_stats, "A", "loopback statistics");
6652
6653         SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "meminfo",
6654             CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, sc, 0,
6655             sysctl_meminfo, "A", "memory regions");
6656
6657         SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "mps_tcam",
6658             CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, sc, 0,
6659             chip_id(sc) <= CHELSIO_T5 ? sysctl_mps_tcam : sysctl_mps_tcam_t6,
6660             "A", "MPS TCAM entries");
6661
6662         SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "path_mtus",
6663             CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, sc, 0,
6664             sysctl_path_mtus, "A", "path MTUs");
6665
6666         SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "pm_stats",
6667             CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, sc, 0,
6668             sysctl_pm_stats, "A", "PM statistics");
6669
6670         SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "rdma_stats",
6671             CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, sc, 0,
6672             sysctl_rdma_stats, "A", "RDMA statistics");
6673
6674         SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "tcp_stats",
6675             CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, sc, 0,
6676             sysctl_tcp_stats, "A", "TCP statistics");
6677
6678         SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "tids",
6679             CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, sc, 0,
6680             sysctl_tids, "A", "TID information");
6681
6682         SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "tp_err_stats",
6683             CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, sc, 0,
6684             sysctl_tp_err_stats, "A", "TP error statistics");
6685
6686         SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "tp_la_mask",
6687             CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 0,
6688             sysctl_tp_la_mask, "I", "TP logic analyzer event capture mask");
6689
6690         SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "tp_la",
6691             CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, sc, 0,
6692             sysctl_tp_la, "A", "TP logic analyzer");
6693
6694         SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "tx_rate",
6695             CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, sc, 0,
6696             sysctl_tx_rate, "A", "Tx rate");
6697
6698         SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "ulprx_la",
6699             CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, sc, 0,
6700             sysctl_ulprx_la, "A", "ULPRX logic analyzer");
6701
6702         if (chip_id(sc) >= CHELSIO_T5) {
6703                 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "wcwr_stats",
6704                     CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, sc, 0,
6705                     sysctl_wcwr_stats, "A", "write combined work requests");
6706         }
6707
6708 #ifdef KERN_TLS
6709         if (sc->flags & KERN_TLS_OK) {
6710                 /*
6711                  * dev.t4nex.0.tls.
6712                  */
6713                 oid = SYSCTL_ADD_NODE(ctx, c0, OID_AUTO, "tls",
6714                     CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, "KERN_TLS parameters");
6715                 children = SYSCTL_CHILDREN(oid);
6716
6717                 SYSCTL_ADD_INT(ctx, children, OID_AUTO, "inline_keys",
6718                     CTLFLAG_RW, &sc->tlst.inline_keys, 0, "Always pass TLS "
6719                     "keys in work requests (1) or attempt to store TLS keys "
6720                     "in card memory.");
6721                 SYSCTL_ADD_INT(ctx, children, OID_AUTO, "combo_wrs",
6722                     CTLFLAG_RW, &sc->tlst.combo_wrs, 0, "Attempt to combine "
6723                     "TCB field updates with TLS record work requests.");
6724         }
6725 #endif
6726
6727 #ifdef TCP_OFFLOAD
6728         if (is_offload(sc)) {
6729                 int i;
6730                 char s[4];
6731
6732                 /*
6733                  * dev.t4nex.X.toe.
6734                  */
6735                 oid = SYSCTL_ADD_NODE(ctx, c0, OID_AUTO, "toe",
6736                     CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, "TOE parameters");
6737                 children = SYSCTL_CHILDREN(oid);
6738
6739                 sc->tt.cong_algorithm = -1;
6740                 SYSCTL_ADD_INT(ctx, children, OID_AUTO, "cong_algorithm",
6741                     CTLFLAG_RW, &sc->tt.cong_algorithm, 0, "congestion control "
6742                     "(-1 = default, 0 = reno, 1 = tahoe, 2 = newreno, "
6743                     "3 = highspeed)");
6744
6745                 sc->tt.sndbuf = -1;
6746                 SYSCTL_ADD_INT(ctx, children, OID_AUTO, "sndbuf", CTLFLAG_RW,
6747                     &sc->tt.sndbuf, 0, "hardware send buffer");
6748
6749                 sc->tt.ddp = 0;
6750                 SYSCTL_ADD_INT(ctx, children, OID_AUTO, "ddp",
6751                     CTLFLAG_RW | CTLFLAG_SKIP, &sc->tt.ddp, 0, "");
6752                 SYSCTL_ADD_INT(ctx, children, OID_AUTO, "rx_zcopy", CTLFLAG_RW,
6753                     &sc->tt.ddp, 0, "Enable zero-copy aio_read(2)");
6754
6755                 sc->tt.rx_coalesce = -1;
6756                 SYSCTL_ADD_INT(ctx, children, OID_AUTO, "rx_coalesce",
6757                     CTLFLAG_RW, &sc->tt.rx_coalesce, 0, "receive coalescing");
6758
6759                 sc->tt.tls = 0;
6760                 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "tls", CTLTYPE_INT |
6761                     CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 0, sysctl_tls, "I",
6762                     "Inline TLS allowed");
6763
6764                 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "tls_rx_ports",
6765                     CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 0,
6766                     sysctl_tls_rx_ports, "I",
6767                     "TCP ports that use inline TLS+TOE RX");
6768
6769                 sc->tt.tx_align = -1;
6770                 SYSCTL_ADD_INT(ctx, children, OID_AUTO, "tx_align",
6771                     CTLFLAG_RW, &sc->tt.tx_align, 0, "chop and align payload");
6772
6773                 sc->tt.tx_zcopy = 0;
6774                 SYSCTL_ADD_INT(ctx, children, OID_AUTO, "tx_zcopy",
6775                     CTLFLAG_RW, &sc->tt.tx_zcopy, 0,
6776                     "Enable zero-copy aio_write(2)");
6777
6778                 sc->tt.cop_managed_offloading = !!t4_cop_managed_offloading;
6779                 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
6780                     "cop_managed_offloading", CTLFLAG_RW,
6781                     &sc->tt.cop_managed_offloading, 0,
6782                     "COP (Connection Offload Policy) controls all TOE offload");
6783
6784                 sc->tt.autorcvbuf_inc = 16 * 1024;
6785                 SYSCTL_ADD_INT(ctx, children, OID_AUTO, "autorcvbuf_inc",
6786                     CTLFLAG_RW, &sc->tt.autorcvbuf_inc, 0,
6787                     "autorcvbuf increment");
6788
6789                 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "timer_tick",
6790                     CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, sc, 0,
6791                     sysctl_tp_tick, "A", "TP timer tick (us)");
6792
6793                 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "timestamp_tick",
6794                     CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, sc, 1,
6795                     sysctl_tp_tick, "A", "TCP timestamp tick (us)");
6796
6797                 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "dack_tick",
6798                     CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, sc, 2,
6799                     sysctl_tp_tick, "A", "DACK tick (us)");
6800
6801                 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "dack_timer",
6802                     CTLTYPE_UINT | CTLFLAG_RD | CTLFLAG_MPSAFE, sc, 0,
6803                     sysctl_tp_dack_timer, "IU", "DACK timer (us)");
6804
6805                 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "rexmt_min",
6806                     CTLTYPE_ULONG | CTLFLAG_RD | CTLFLAG_MPSAFE, sc,
6807                     A_TP_RXT_MIN, sysctl_tp_timer, "LU",
6808                     "Minimum retransmit interval (us)");
6809
6810                 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "rexmt_max",
6811                     CTLTYPE_ULONG | CTLFLAG_RD | CTLFLAG_MPSAFE, sc,
6812                     A_TP_RXT_MAX, sysctl_tp_timer, "LU",
6813                     "Maximum retransmit interval (us)");
6814
6815                 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "persist_min",
6816                     CTLTYPE_ULONG | CTLFLAG_RD | CTLFLAG_MPSAFE, sc,
6817                     A_TP_PERS_MIN, sysctl_tp_timer, "LU",
6818                     "Persist timer min (us)");
6819
6820                 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "persist_max",
6821                     CTLTYPE_ULONG | CTLFLAG_RD | CTLFLAG_MPSAFE, sc,
6822                     A_TP_PERS_MAX, sysctl_tp_timer, "LU",
6823                     "Persist timer max (us)");
6824
6825                 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "keepalive_idle",
6826                     CTLTYPE_ULONG | CTLFLAG_RD | CTLFLAG_MPSAFE, sc,
6827                     A_TP_KEEP_IDLE, sysctl_tp_timer, "LU",
6828                     "Keepalive idle timer (us)");
6829
6830                 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "keepalive_interval",
6831                     CTLTYPE_ULONG | CTLFLAG_RD | CTLFLAG_MPSAFE, sc,
6832                     A_TP_KEEP_INTVL, sysctl_tp_timer, "LU",
6833                     "Keepalive interval timer (us)");
6834
6835                 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "initial_srtt",
6836                     CTLTYPE_ULONG | CTLFLAG_RD | CTLFLAG_MPSAFE, sc,
6837                     A_TP_INIT_SRTT, sysctl_tp_timer, "LU", "Initial SRTT (us)");
6838
6839                 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "finwait2_timer",
6840                     CTLTYPE_ULONG | CTLFLAG_RD | CTLFLAG_MPSAFE, sc,
6841                     A_TP_FINWAIT2_TIMER, sysctl_tp_timer, "LU",
6842                     "FINWAIT2 timer (us)");
6843
6844                 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "syn_rexmt_count",
6845                     CTLTYPE_UINT | CTLFLAG_RD | CTLFLAG_MPSAFE, sc,
6846                     S_SYNSHIFTMAX, sysctl_tp_shift_cnt, "IU",
6847                     "Number of SYN retransmissions before abort");
6848
6849                 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "rexmt_count",
6850                     CTLTYPE_UINT | CTLFLAG_RD | CTLFLAG_MPSAFE, sc,
6851                     S_RXTSHIFTMAXR2, sysctl_tp_shift_cnt, "IU",
6852                     "Number of retransmissions before abort");
6853
6854                 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "keepalive_count",
6855                     CTLTYPE_UINT | CTLFLAG_RD | CTLFLAG_MPSAFE, sc,
6856                     S_KEEPALIVEMAXR2, sysctl_tp_shift_cnt, "IU",
6857                     "Number of keepalive probes before abort");
6858
6859                 oid = SYSCTL_ADD_NODE(ctx, children, OID_AUTO, "rexmt_backoff",
6860                     CTLFLAG_RD | CTLFLAG_MPSAFE, NULL,
6861                     "TOE retransmit backoffs");
6862                 children = SYSCTL_CHILDREN(oid);
6863                 for (i = 0; i < 16; i++) {
6864                         snprintf(s, sizeof(s), "%u", i);
6865                         SYSCTL_ADD_PROC(ctx, children, OID_AUTO, s,
6866                             CTLTYPE_UINT | CTLFLAG_RD | CTLFLAG_MPSAFE, sc,
6867                             i, sysctl_tp_backoff, "IU",
6868                             "TOE retransmit backoff");
6869                 }
6870         }
6871 #endif
6872 }
6873
6874 void
6875 vi_sysctls(struct vi_info *vi)
6876 {
6877         struct sysctl_ctx_list *ctx;
6878         struct sysctl_oid *oid;
6879         struct sysctl_oid_list *children;
6880
6881         ctx = device_get_sysctl_ctx(vi->dev);
6882
6883         /*
6884          * dev.v?(cxgbe|cxl).X.
6885          */
6886         oid = device_get_sysctl_tree(vi->dev);
6887         children = SYSCTL_CHILDREN(oid);
6888
6889         SYSCTL_ADD_UINT(ctx, children, OID_AUTO, "viid", CTLFLAG_RD, NULL,
6890             vi->viid, "VI identifer");
6891         SYSCTL_ADD_INT(ctx, children, OID_AUTO, "nrxq", CTLFLAG_RD,
6892             &vi->nrxq, 0, "# of rx queues");
6893         SYSCTL_ADD_INT(ctx, children, OID_AUTO, "ntxq", CTLFLAG_RD,
6894             &vi->ntxq, 0, "# of tx queues");
6895         SYSCTL_ADD_INT(ctx, children, OID_AUTO, "first_rxq", CTLFLAG_RD,
6896             &vi->first_rxq, 0, "index of first rx queue");
6897         SYSCTL_ADD_INT(ctx, children, OID_AUTO, "first_txq", CTLFLAG_RD,
6898             &vi->first_txq, 0, "index of first tx queue");
6899         SYSCTL_ADD_UINT(ctx, children, OID_AUTO, "rss_base", CTLFLAG_RD, NULL,
6900             vi->rss_base, "start of RSS indirection table");
6901         SYSCTL_ADD_UINT(ctx, children, OID_AUTO, "rss_size", CTLFLAG_RD, NULL,
6902             vi->rss_size, "size of RSS indirection table");
6903
6904         if (IS_MAIN_VI(vi)) {
6905                 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "rsrv_noflowq",
6906                     CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, vi, 0,
6907                     sysctl_noflowq, "IU",
6908                     "Reserve queue 0 for non-flowid packets");
6909         }
6910
6911         if (vi->adapter->flags & IS_VF) {
6912                 MPASS(vi->flags & TX_USES_VM_WR);
6913                 SYSCTL_ADD_UINT(ctx, children, OID_AUTO, "tx_vm_wr", CTLFLAG_RD,
6914                     NULL, 1, "use VM work requests for transmit");
6915         } else {
6916                 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "tx_vm_wr",
6917                     CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, vi, 0,
6918                     sysctl_tx_vm_wr, "I", "use VM work requestes for transmit");
6919         }
6920
6921 #ifdef TCP_OFFLOAD
6922         if (vi->nofldrxq != 0) {
6923                 SYSCTL_ADD_INT(ctx, children, OID_AUTO, "nofldrxq", CTLFLAG_RD,
6924                     &vi->nofldrxq, 0,
6925                     "# of rx queues for offloaded TCP connections");
6926                 SYSCTL_ADD_INT(ctx, children, OID_AUTO, "first_ofld_rxq",
6927                     CTLFLAG_RD, &vi->first_ofld_rxq, 0,
6928                     "index of first TOE rx queue");
6929                 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "holdoff_tmr_idx_ofld",
6930                     CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, vi, 0,
6931                     sysctl_holdoff_tmr_idx_ofld, "I",
6932                     "holdoff timer index for TOE queues");
6933                 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "holdoff_pktc_idx_ofld",
6934                     CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, vi, 0,
6935                     sysctl_holdoff_pktc_idx_ofld, "I",
6936                     "holdoff packet counter index for TOE queues");
6937         }
6938 #endif
6939 #if defined(TCP_OFFLOAD) || defined(RATELIMIT)
6940         if (vi->nofldtxq != 0) {
6941                 SYSCTL_ADD_INT(ctx, children, OID_AUTO, "nofldtxq", CTLFLAG_RD,
6942                     &vi->nofldtxq, 0,
6943                     "# of tx queues for TOE/ETHOFLD");
6944                 SYSCTL_ADD_INT(ctx, children, OID_AUTO, "first_ofld_txq",
6945                     CTLFLAG_RD, &vi->first_ofld_txq, 0,
6946                     "index of first TOE/ETHOFLD tx queue");
6947         }
6948 #endif
6949 #ifdef DEV_NETMAP
6950         if (vi->nnmrxq != 0) {
6951                 SYSCTL_ADD_INT(ctx, children, OID_AUTO, "nnmrxq", CTLFLAG_RD,
6952                     &vi->nnmrxq, 0, "# of netmap rx queues");
6953                 SYSCTL_ADD_INT(ctx, children, OID_AUTO, "nnmtxq", CTLFLAG_RD,
6954                     &vi->nnmtxq, 0, "# of netmap tx queues");
6955                 SYSCTL_ADD_INT(ctx, children, OID_AUTO, "first_nm_rxq",
6956                     CTLFLAG_RD, &vi->first_nm_rxq, 0,
6957                     "index of first netmap rx queue");
6958                 SYSCTL_ADD_INT(ctx, children, OID_AUTO, "first_nm_txq",
6959                     CTLFLAG_RD, &vi->first_nm_txq, 0,
6960                     "index of first netmap tx queue");
6961         }
6962 #endif
6963
6964         SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "holdoff_tmr_idx",
6965             CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, vi, 0,
6966             sysctl_holdoff_tmr_idx, "I", "holdoff timer index");
6967         SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "holdoff_pktc_idx",
6968             CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, vi, 0,
6969             sysctl_holdoff_pktc_idx, "I", "holdoff packet counter index");
6970
6971         SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "qsize_rxq",
6972             CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, vi, 0,
6973             sysctl_qsize_rxq, "I", "rx queue size");
6974         SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "qsize_txq",
6975             CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, vi, 0,
6976             sysctl_qsize_txq, "I", "tx queue size");
6977 }
6978
6979 static void
6980 cxgbe_sysctls(struct port_info *pi)
6981 {
6982         struct sysctl_ctx_list *ctx;
6983         struct sysctl_oid *oid;
6984         struct sysctl_oid_list *children, *children2;
6985         struct adapter *sc = pi->adapter;
6986         int i;
6987         char name[16];
6988         static char *tc_flags = {"\20\1USER\2SYNC\3ASYNC\4ERR"};
6989
6990         ctx = device_get_sysctl_ctx(pi->dev);
6991
6992         /*
6993          * dev.cxgbe.X.
6994          */
6995         oid = device_get_sysctl_tree(pi->dev);
6996         children = SYSCTL_CHILDREN(oid);
6997
6998         SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "linkdnrc",
6999             CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, pi, 0,
7000             sysctl_linkdnrc, "A", "reason why link is down");
7001         if (pi->port_type == FW_PORT_TYPE_BT_XAUI) {
7002                 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "temperature",
7003                     CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_MPSAFE, pi, 0,
7004                     sysctl_btphy, "I", "PHY temperature (in Celsius)");
7005                 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "fw_version",
7006                     CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_MPSAFE, pi, 1,
7007                     sysctl_btphy, "I", "PHY firmware version");
7008         }
7009
7010         SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "pause_settings",
7011             CTLTYPE_STRING | CTLFLAG_RW | CTLFLAG_MPSAFE, pi, 0,
7012             sysctl_pause_settings, "A",
7013             "PAUSE settings (bit 0 = rx_pause, 1 = tx_pause, 2 = pause_autoneg)");
7014         SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "fec",
7015             CTLTYPE_STRING | CTLFLAG_RW | CTLFLAG_MPSAFE, pi, 0,
7016             sysctl_fec, "A",
7017             "FECs to use (bit 0 = RS, 1 = FC, 2 = none, 5 = auto, 6 = module)");
7018         SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "module_fec",
7019             CTLTYPE_STRING | CTLFLAG_MPSAFE, pi, 0, sysctl_module_fec, "A",
7020             "FEC recommended by the cable/transceiver");
7021         SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "autoneg",
7022             CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, pi, 0,
7023             sysctl_autoneg, "I",
7024             "autonegotiation (-1 = not supported)");
7025
7026         SYSCTL_ADD_INT(ctx, children, OID_AUTO, "pcaps", CTLFLAG_RD,
7027             &pi->link_cfg.pcaps, 0, "port capabilities");
7028         SYSCTL_ADD_INT(ctx, children, OID_AUTO, "acaps", CTLFLAG_RD,
7029             &pi->link_cfg.acaps, 0, "advertised capabilities");
7030         SYSCTL_ADD_INT(ctx, children, OID_AUTO, "lpacaps", CTLFLAG_RD,
7031             &pi->link_cfg.lpacaps, 0, "link partner advertised capabilities");
7032
7033         SYSCTL_ADD_INT(ctx, children, OID_AUTO, "max_speed", CTLFLAG_RD, NULL,
7034             port_top_speed(pi), "max speed (in Gbps)");
7035         SYSCTL_ADD_INT(ctx, children, OID_AUTO, "mps_bg_map", CTLFLAG_RD, NULL,
7036             pi->mps_bg_map, "MPS buffer group map");
7037         SYSCTL_ADD_INT(ctx, children, OID_AUTO, "rx_e_chan_map", CTLFLAG_RD,
7038             NULL, pi->rx_e_chan_map, "TP rx e-channel map");
7039
7040         if (sc->flags & IS_VF)
7041                 return;
7042
7043         /*
7044          * dev.(cxgbe|cxl).X.tc.
7045          */
7046         oid = SYSCTL_ADD_NODE(ctx, children, OID_AUTO, "tc",
7047             CTLFLAG_RD | CTLFLAG_MPSAFE, NULL,
7048             "Tx scheduler traffic classes (cl_rl)");
7049         children2 = SYSCTL_CHILDREN(oid);
7050         SYSCTL_ADD_UINT(ctx, children2, OID_AUTO, "pktsize",
7051             CTLFLAG_RW, &pi->sched_params->pktsize, 0,
7052             "pktsize for per-flow cl-rl (0 means up to the driver )");
7053         SYSCTL_ADD_UINT(ctx, children2, OID_AUTO, "burstsize",
7054             CTLFLAG_RW, &pi->sched_params->burstsize, 0,
7055             "burstsize for per-flow cl-rl (0 means up to the driver)");
7056         for (i = 0; i < sc->chip_params->nsched_cls; i++) {
7057                 struct tx_cl_rl_params *tc = &pi->sched_params->cl_rl[i];
7058
7059                 snprintf(name, sizeof(name), "%d", i);
7060                 children2 = SYSCTL_CHILDREN(SYSCTL_ADD_NODE(ctx,
7061                     SYSCTL_CHILDREN(oid), OID_AUTO, name,
7062                     CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, "traffic class"));
7063                 SYSCTL_ADD_PROC(ctx, children2, OID_AUTO, "flags",
7064                     CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, tc_flags,
7065                     (uintptr_t)&tc->flags, sysctl_bitfield_8b, "A", "flags");
7066                 SYSCTL_ADD_UINT(ctx, children2, OID_AUTO, "refcount",
7067                     CTLFLAG_RD, &tc->refcount, 0, "references to this class");
7068                 SYSCTL_ADD_PROC(ctx, children2, OID_AUTO, "params",
7069                     CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, sc,
7070                     (pi->port_id << 16) | i, sysctl_tc_params, "A",
7071                     "traffic class parameters");
7072         }
7073
7074         /*
7075          * dev.cxgbe.X.stats.
7076          */
7077         oid = SYSCTL_ADD_NODE(ctx, children, OID_AUTO, "stats",
7078             CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, "port statistics");
7079         children = SYSCTL_CHILDREN(oid);
7080         SYSCTL_ADD_UINT(ctx, children, OID_AUTO, "tx_parse_error", CTLFLAG_RD,
7081             &pi->tx_parse_error, 0,
7082             "# of tx packets with invalid length or # of segments");
7083
7084 #define T4_REGSTAT(name, stat, desc) \
7085     SYSCTL_ADD_OID(ctx, children, OID_AUTO, #name, \
7086         CTLTYPE_U64 | CTLFLAG_RD | CTLFLAG_MPSAFE, sc, \
7087         (is_t4(sc) ? PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_##stat##_L) : \
7088         T5_PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_##stat##_L)), \
7089         sysctl_handle_t4_reg64, "QU", desc)
7090
7091 /* We get these from port_stats and they may be stale by up to 1s */
7092 #define T4_PORTSTAT(name, desc) \
7093         SYSCTL_ADD_UQUAD(ctx, children, OID_AUTO, #name, CTLFLAG_RD, \
7094             &pi->stats.name, desc)
7095
7096         T4_REGSTAT(tx_octets, TX_PORT_BYTES, "# of octets in good frames");
7097         T4_REGSTAT(tx_frames, TX_PORT_FRAMES, "total # of good frames");
7098         T4_REGSTAT(tx_bcast_frames, TX_PORT_BCAST, "# of broadcast frames");
7099         T4_REGSTAT(tx_mcast_frames, TX_PORT_MCAST, "# of multicast frames");
7100         T4_REGSTAT(tx_ucast_frames, TX_PORT_UCAST, "# of unicast frames");
7101         T4_REGSTAT(tx_error_frames, TX_PORT_ERROR, "# of error frames");
7102         T4_REGSTAT(tx_frames_64, TX_PORT_64B, "# of tx frames in this range");
7103         T4_REGSTAT(tx_frames_65_127, TX_PORT_65B_127B, "# of tx frames in this range");
7104         T4_REGSTAT(tx_frames_128_255, TX_PORT_128B_255B, "# of tx frames in this range");
7105         T4_REGSTAT(tx_frames_256_511, TX_PORT_256B_511B, "# of tx frames in this range");
7106         T4_REGSTAT(tx_frames_512_1023, TX_PORT_512B_1023B, "# of tx frames in this range");
7107         T4_REGSTAT(tx_frames_1024_1518, TX_PORT_1024B_1518B, "# of tx frames in this range");
7108         T4_REGSTAT(tx_frames_1519_max, TX_PORT_1519B_MAX, "# of tx frames in this range");
7109         T4_REGSTAT(tx_drop, TX_PORT_DROP, "# of dropped tx frames");
7110         T4_REGSTAT(tx_pause, TX_PORT_PAUSE, "# of pause frames transmitted");
7111         T4_REGSTAT(tx_ppp0, TX_PORT_PPP0, "# of PPP prio 0 frames transmitted");
7112         T4_REGSTAT(tx_ppp1, TX_PORT_PPP1, "# of PPP prio 1 frames transmitted");
7113         T4_REGSTAT(tx_ppp2, TX_PORT_PPP2, "# of PPP prio 2 frames transmitted");
7114         T4_REGSTAT(tx_ppp3, TX_PORT_PPP3, "# of PPP prio 3 frames transmitted");
7115         T4_REGSTAT(tx_ppp4, TX_PORT_PPP4, "# of PPP prio 4 frames transmitted");
7116         T4_REGSTAT(tx_ppp5, TX_PORT_PPP5, "# of PPP prio 5 frames transmitted");
7117         T4_REGSTAT(tx_ppp6, TX_PORT_PPP6, "# of PPP prio 6 frames transmitted");
7118         T4_REGSTAT(tx_ppp7, TX_PORT_PPP7, "# of PPP prio 7 frames transmitted");
7119
7120         T4_REGSTAT(rx_octets, RX_PORT_BYTES, "# of octets in good frames");
7121         T4_REGSTAT(rx_frames, RX_PORT_FRAMES, "total # of good frames");
7122         T4_REGSTAT(rx_bcast_frames, RX_PORT_BCAST, "# of broadcast frames");
7123         T4_REGSTAT(rx_mcast_frames, RX_PORT_MCAST, "# of multicast frames");
7124         T4_REGSTAT(rx_ucast_frames, RX_PORT_UCAST, "# of unicast frames");
7125         T4_REGSTAT(rx_too_long, RX_PORT_MTU_ERROR, "# of frames exceeding MTU");
7126         T4_REGSTAT(rx_jabber, RX_PORT_MTU_CRC_ERROR, "# of jabber frames");
7127         if (is_t6(sc)) {
7128                 T4_PORTSTAT(rx_fcs_err,
7129                     "# of frames received with bad FCS since last link up");
7130         } else {
7131                 T4_REGSTAT(rx_fcs_err, RX_PORT_CRC_ERROR,
7132                     "# of frames received with bad FCS");
7133         }
7134         T4_REGSTAT(rx_len_err, RX_PORT_LEN_ERROR, "# of frames received with length error");
7135         T4_REGSTAT(rx_symbol_err, RX_PORT_SYM_ERROR, "symbol errors");
7136         T4_REGSTAT(rx_runt, RX_PORT_LESS_64B, "# of short frames received");
7137         T4_REGSTAT(rx_frames_64, RX_PORT_64B, "# of rx frames in this range");
7138         T4_REGSTAT(rx_frames_65_127, RX_PORT_65B_127B, "# of rx frames in this range");
7139         T4_REGSTAT(rx_frames_128_255, RX_PORT_128B_255B, "# of rx frames in this range");
7140         T4_REGSTAT(rx_frames_256_511, RX_PORT_256B_511B, "# of rx frames in this range");
7141         T4_REGSTAT(rx_frames_512_1023, RX_PORT_512B_1023B, "# of rx frames in this range");
7142         T4_REGSTAT(rx_frames_1024_1518, RX_PORT_1024B_1518B, "# of rx frames in this range");
7143         T4_REGSTAT(rx_frames_1519_max, RX_PORT_1519B_MAX, "# of rx frames in this range");
7144         T4_REGSTAT(rx_pause, RX_PORT_PAUSE, "# of pause frames received");
7145         T4_REGSTAT(rx_ppp0, RX_PORT_PPP0, "# of PPP prio 0 frames received");
7146         T4_REGSTAT(rx_ppp1, RX_PORT_PPP1, "# of PPP prio 1 frames received");
7147         T4_REGSTAT(rx_ppp2, RX_PORT_PPP2, "# of PPP prio 2 frames received");
7148         T4_REGSTAT(rx_ppp3, RX_PORT_PPP3, "# of PPP prio 3 frames received");
7149         T4_REGSTAT(rx_ppp4, RX_PORT_PPP4, "# of PPP prio 4 frames received");
7150         T4_REGSTAT(rx_ppp5, RX_PORT_PPP5, "# of PPP prio 5 frames received");
7151         T4_REGSTAT(rx_ppp6, RX_PORT_PPP6, "# of PPP prio 6 frames received");
7152         T4_REGSTAT(rx_ppp7, RX_PORT_PPP7, "# of PPP prio 7 frames received");
7153
7154         T4_PORTSTAT(rx_ovflow0, "# drops due to buffer-group 0 overflows");
7155         T4_PORTSTAT(rx_ovflow1, "# drops due to buffer-group 1 overflows");
7156         T4_PORTSTAT(rx_ovflow2, "# drops due to buffer-group 2 overflows");
7157         T4_PORTSTAT(rx_ovflow3, "# drops due to buffer-group 3 overflows");
7158         T4_PORTSTAT(rx_trunc0, "# of buffer-group 0 truncated packets");
7159         T4_PORTSTAT(rx_trunc1, "# of buffer-group 1 truncated packets");
7160         T4_PORTSTAT(rx_trunc2, "# of buffer-group 2 truncated packets");
7161         T4_PORTSTAT(rx_trunc3, "# of buffer-group 3 truncated packets");
7162
7163 #undef T4_REGSTAT
7164 #undef T4_PORTSTAT
7165
7166         SYSCTL_ADD_ULONG(ctx, children, OID_AUTO, "tx_toe_tls_records",
7167             CTLFLAG_RD, &pi->tx_toe_tls_records,
7168             "# of TOE TLS records transmitted");
7169         SYSCTL_ADD_ULONG(ctx, children, OID_AUTO, "tx_toe_tls_octets",
7170             CTLFLAG_RD, &pi->tx_toe_tls_octets,
7171             "# of payload octets in transmitted TOE TLS records");
7172         SYSCTL_ADD_ULONG(ctx, children, OID_AUTO, "rx_toe_tls_records",
7173             CTLFLAG_RD, &pi->rx_toe_tls_records,
7174             "# of TOE TLS records received");
7175         SYSCTL_ADD_ULONG(ctx, children, OID_AUTO, "rx_toe_tls_octets",
7176             CTLFLAG_RD, &pi->rx_toe_tls_octets,
7177             "# of payload octets in received TOE TLS records");
7178 }
7179
7180 static int
7181 sysctl_int_array(SYSCTL_HANDLER_ARGS)
7182 {
7183         int rc, *i, space = 0;
7184         struct sbuf sb;
7185
7186         sbuf_new_for_sysctl(&sb, NULL, 64, req);
7187         for (i = arg1; arg2; arg2 -= sizeof(int), i++) {
7188                 if (space)
7189                         sbuf_printf(&sb, " ");
7190                 sbuf_printf(&sb, "%d", *i);
7191                 space = 1;
7192         }
7193         rc = sbuf_finish(&sb);
7194         sbuf_delete(&sb);
7195         return (rc);
7196 }
7197
7198 static int
7199 sysctl_bitfield_8b(SYSCTL_HANDLER_ARGS)
7200 {
7201         int rc;
7202         struct sbuf *sb;
7203
7204         rc = sysctl_wire_old_buffer(req, 0);
7205         if (rc != 0)
7206                 return(rc);
7207
7208         sb = sbuf_new_for_sysctl(NULL, NULL, 128, req);
7209         if (sb == NULL)
7210                 return (ENOMEM);
7211
7212         sbuf_printf(sb, "%b", *(uint8_t *)(uintptr_t)arg2, (char *)arg1);
7213         rc = sbuf_finish(sb);
7214         sbuf_delete(sb);
7215
7216         return (rc);
7217 }
7218
7219 static int
7220 sysctl_bitfield_16b(SYSCTL_HANDLER_ARGS)
7221 {
7222         int rc;
7223         struct sbuf *sb;
7224
7225         rc = sysctl_wire_old_buffer(req, 0);
7226         if (rc != 0)
7227                 return(rc);
7228
7229         sb = sbuf_new_for_sysctl(NULL, NULL, 128, req);
7230         if (sb == NULL)
7231                 return (ENOMEM);
7232
7233         sbuf_printf(sb, "%b", *(uint16_t *)(uintptr_t)arg2, (char *)arg1);
7234         rc = sbuf_finish(sb);
7235         sbuf_delete(sb);
7236
7237         return (rc);
7238 }
7239
7240 static int
7241 sysctl_btphy(SYSCTL_HANDLER_ARGS)
7242 {
7243         struct port_info *pi = arg1;
7244         int op = arg2;
7245         struct adapter *sc = pi->adapter;
7246         u_int v;
7247         int rc;
7248
7249         rc = begin_synchronized_op(sc, &pi->vi[0], SLEEP_OK | INTR_OK, "t4btt");
7250         if (rc)
7251                 return (rc);
7252         /* XXX: magic numbers */
7253         rc = -t4_mdio_rd(sc, sc->mbox, pi->mdio_addr, 0x1e, op ? 0x20 : 0xc820,
7254             &v);
7255         end_synchronized_op(sc, 0);
7256         if (rc)
7257                 return (rc);
7258         if (op == 0)
7259                 v /= 256;
7260
7261         rc = sysctl_handle_int(oidp, &v, 0, req);
7262         return (rc);
7263 }
7264
7265 static int
7266 sysctl_noflowq(SYSCTL_HANDLER_ARGS)
7267 {
7268         struct vi_info *vi = arg1;
7269         int rc, val;
7270
7271         val = vi->rsrv_noflowq;
7272         rc = sysctl_handle_int(oidp, &val, 0, req);
7273         if (rc != 0 || req->newptr == NULL)
7274                 return (rc);
7275
7276         if ((val >= 1) && (vi->ntxq > 1))
7277                 vi->rsrv_noflowq = 1;
7278         else
7279                 vi->rsrv_noflowq = 0;
7280
7281         return (rc);
7282 }
7283
7284 static int
7285 sysctl_tx_vm_wr(SYSCTL_HANDLER_ARGS)
7286 {
7287         struct vi_info *vi = arg1;
7288         struct adapter *sc = vi->adapter;
7289         int rc, val, i;
7290
7291         MPASS(!(sc->flags & IS_VF));
7292
7293         val = vi->flags & TX_USES_VM_WR ? 1 : 0;
7294         rc = sysctl_handle_int(oidp, &val, 0, req);
7295         if (rc != 0 || req->newptr == NULL)
7296                 return (rc);
7297
7298         if (val != 0 && val != 1)
7299                 return (EINVAL);
7300
7301         rc = begin_synchronized_op(sc, vi, HOLD_LOCK | SLEEP_OK | INTR_OK,
7302             "t4txvm");
7303         if (rc)
7304                 return (rc);
7305         if (vi->ifp->if_drv_flags & IFF_DRV_RUNNING) {
7306                 /*
7307                  * We don't want parse_pkt to run with one setting (VF or PF)
7308                  * and then eth_tx to see a different setting but still use
7309                  * stale information calculated by parse_pkt.
7310                  */
7311                 rc = EBUSY;
7312         } else {
7313                 struct port_info *pi = vi->pi;
7314                 struct sge_txq *txq;
7315                 uint32_t ctrl0;
7316                 uint8_t npkt = sc->params.max_pkts_per_eth_tx_pkts_wr;
7317
7318                 if (val) {
7319                         vi->flags |= TX_USES_VM_WR;
7320                         vi->ifp->if_hw_tsomaxsegcount = TX_SGL_SEGS_VM_TSO;
7321                         ctrl0 = htobe32(V_TXPKT_OPCODE(CPL_TX_PKT_XT) |
7322                             V_TXPKT_INTF(pi->tx_chan));
7323                         if (!(sc->flags & IS_VF))
7324                                 npkt--;
7325                 } else {
7326                         vi->flags &= ~TX_USES_VM_WR;
7327                         vi->ifp->if_hw_tsomaxsegcount = TX_SGL_SEGS_TSO;
7328                         ctrl0 = htobe32(V_TXPKT_OPCODE(CPL_TX_PKT_XT) |
7329                             V_TXPKT_INTF(pi->tx_chan) | V_TXPKT_PF(sc->pf) |
7330                             V_TXPKT_VF(vi->vin) | V_TXPKT_VF_VLD(vi->vfvld));
7331                 }
7332                 for_each_txq(vi, i, txq) {
7333                         txq->cpl_ctrl0 = ctrl0;
7334                         txq->txp.max_npkt = npkt;
7335                 }
7336         }
7337         end_synchronized_op(sc, LOCK_HELD);
7338         return (rc);
7339 }
7340
7341 static int
7342 sysctl_holdoff_tmr_idx(SYSCTL_HANDLER_ARGS)
7343 {
7344         struct vi_info *vi = arg1;
7345         struct adapter *sc = vi->adapter;
7346         int idx, rc, i;
7347         struct sge_rxq *rxq;
7348         uint8_t v;
7349
7350         idx = vi->tmr_idx;
7351
7352         rc = sysctl_handle_int(oidp, &idx, 0, req);
7353         if (rc != 0 || req->newptr == NULL)
7354                 return (rc);
7355
7356         if (idx < 0 || idx >= SGE_NTIMERS)
7357                 return (EINVAL);
7358
7359         rc = begin_synchronized_op(sc, vi, HOLD_LOCK | SLEEP_OK | INTR_OK,
7360             "t4tmr");
7361         if (rc)
7362                 return (rc);
7363
7364         v = V_QINTR_TIMER_IDX(idx) | V_QINTR_CNT_EN(vi->pktc_idx != -1);
7365         for_each_rxq(vi, i, rxq) {
7366 #ifdef atomic_store_rel_8
7367                 atomic_store_rel_8(&rxq->iq.intr_params, v);
7368 #else
7369                 rxq->iq.intr_params = v;
7370 #endif
7371         }
7372         vi->tmr_idx = idx;
7373
7374         end_synchronized_op(sc, LOCK_HELD);
7375         return (0);
7376 }
7377
7378 static int
7379 sysctl_holdoff_pktc_idx(SYSCTL_HANDLER_ARGS)
7380 {
7381         struct vi_info *vi = arg1;
7382         struct adapter *sc = vi->adapter;
7383         int idx, rc;
7384
7385         idx = vi->pktc_idx;
7386
7387         rc = sysctl_handle_int(oidp, &idx, 0, req);
7388         if (rc != 0 || req->newptr == NULL)
7389                 return (rc);
7390
7391         if (idx < -1 || idx >= SGE_NCOUNTERS)
7392                 return (EINVAL);
7393
7394         rc = begin_synchronized_op(sc, vi, HOLD_LOCK | SLEEP_OK | INTR_OK,
7395             "t4pktc");
7396         if (rc)
7397                 return (rc);
7398
7399         if (vi->flags & VI_INIT_DONE)
7400                 rc = EBUSY; /* cannot be changed once the queues are created */
7401         else
7402                 vi->pktc_idx = idx;
7403
7404         end_synchronized_op(sc, LOCK_HELD);
7405         return (rc);
7406 }
7407
7408 static int
7409 sysctl_qsize_rxq(SYSCTL_HANDLER_ARGS)
7410 {
7411         struct vi_info *vi = arg1;
7412         struct adapter *sc = vi->adapter;
7413         int qsize, rc;
7414
7415         qsize = vi->qsize_rxq;
7416
7417         rc = sysctl_handle_int(oidp, &qsize, 0, req);
7418         if (rc != 0 || req->newptr == NULL)
7419                 return (rc);
7420
7421         if (qsize < 128 || (qsize & 7))
7422                 return (EINVAL);
7423
7424         rc = begin_synchronized_op(sc, vi, HOLD_LOCK | SLEEP_OK | INTR_OK,
7425             "t4rxqs");
7426         if (rc)
7427                 return (rc);
7428
7429         if (vi->flags & VI_INIT_DONE)
7430                 rc = EBUSY; /* cannot be changed once the queues are created */
7431         else
7432                 vi->qsize_rxq = qsize;
7433
7434         end_synchronized_op(sc, LOCK_HELD);
7435         return (rc);
7436 }
7437
7438 static int
7439 sysctl_qsize_txq(SYSCTL_HANDLER_ARGS)
7440 {
7441         struct vi_info *vi = arg1;
7442         struct adapter *sc = vi->adapter;
7443         int qsize, rc;
7444
7445         qsize = vi->qsize_txq;
7446
7447         rc = sysctl_handle_int(oidp, &qsize, 0, req);
7448         if (rc != 0 || req->newptr == NULL)
7449                 return (rc);
7450
7451         if (qsize < 128 || qsize > 65536)
7452                 return (EINVAL);
7453
7454         rc = begin_synchronized_op(sc, vi, HOLD_LOCK | SLEEP_OK | INTR_OK,
7455             "t4txqs");
7456         if (rc)
7457                 return (rc);
7458
7459         if (vi->flags & VI_INIT_DONE)
7460                 rc = EBUSY; /* cannot be changed once the queues are created */
7461         else
7462                 vi->qsize_txq = qsize;
7463
7464         end_synchronized_op(sc, LOCK_HELD);
7465         return (rc);
7466 }
7467
7468 static int
7469 sysctl_pause_settings(SYSCTL_HANDLER_ARGS)
7470 {
7471         struct port_info *pi = arg1;
7472         struct adapter *sc = pi->adapter;
7473         struct link_config *lc = &pi->link_cfg;
7474         int rc;
7475
7476         if (req->newptr == NULL) {
7477                 struct sbuf *sb;
7478                 static char *bits = "\20\1RX\2TX\3AUTO";
7479
7480                 rc = sysctl_wire_old_buffer(req, 0);
7481                 if (rc != 0)
7482                         return(rc);
7483
7484                 sb = sbuf_new_for_sysctl(NULL, NULL, 128, req);
7485                 if (sb == NULL)
7486                         return (ENOMEM);
7487
7488                 if (lc->link_ok) {
7489                         sbuf_printf(sb, "%b", (lc->fc & (PAUSE_TX | PAUSE_RX)) |
7490                             (lc->requested_fc & PAUSE_AUTONEG), bits);
7491                 } else {
7492                         sbuf_printf(sb, "%b", lc->requested_fc & (PAUSE_TX |
7493                             PAUSE_RX | PAUSE_AUTONEG), bits);
7494                 }
7495                 rc = sbuf_finish(sb);
7496                 sbuf_delete(sb);
7497         } else {
7498                 char s[2];
7499                 int n;
7500
7501                 s[0] = '0' + (lc->requested_fc & (PAUSE_TX | PAUSE_RX |
7502                     PAUSE_AUTONEG));
7503                 s[1] = 0;
7504
7505                 rc = sysctl_handle_string(oidp, s, sizeof(s), req);
7506                 if (rc != 0)
7507                         return(rc);
7508
7509                 if (s[1] != 0)
7510                         return (EINVAL);
7511                 if (s[0] < '0' || s[0] > '9')
7512                         return (EINVAL);        /* not a number */
7513                 n = s[0] - '0';
7514                 if (n & ~(PAUSE_TX | PAUSE_RX | PAUSE_AUTONEG))
7515                         return (EINVAL);        /* some other bit is set too */
7516
7517                 rc = begin_synchronized_op(sc, &pi->vi[0], SLEEP_OK | INTR_OK,
7518                     "t4PAUSE");
7519                 if (rc)
7520                         return (rc);
7521                 PORT_LOCK(pi);
7522                 lc->requested_fc = n;
7523                 fixup_link_config(pi);
7524                 if (pi->up_vis > 0)
7525                         rc = apply_link_config(pi);
7526                 set_current_media(pi);
7527                 PORT_UNLOCK(pi);
7528                 end_synchronized_op(sc, 0);
7529         }
7530
7531         return (rc);
7532 }
7533
7534 static int
7535 sysctl_fec(SYSCTL_HANDLER_ARGS)
7536 {
7537         struct port_info *pi = arg1;
7538         struct adapter *sc = pi->adapter;
7539         struct link_config *lc = &pi->link_cfg;
7540         int rc;
7541         int8_t old;
7542
7543         if (req->newptr == NULL) {
7544                 struct sbuf *sb;
7545                 static char *bits = "\20\1RS-FEC\2FC-FEC\3NO-FEC\4RSVD2"
7546                     "\5RSVD3\6auto\7module";
7547
7548                 rc = sysctl_wire_old_buffer(req, 0);
7549                 if (rc != 0)
7550                         return(rc);
7551
7552                 sb = sbuf_new_for_sysctl(NULL, NULL, 128, req);
7553                 if (sb == NULL)
7554                         return (ENOMEM);
7555
7556                 /*
7557                  * Display the requested_fec when the link is down -- the actual
7558                  * FEC makes sense only when the link is up.
7559                  */
7560                 if (lc->link_ok) {
7561                         sbuf_printf(sb, "%b", (lc->fec & M_FW_PORT_CAP32_FEC) |
7562                             (lc->requested_fec & (FEC_AUTO | FEC_MODULE)),
7563                             bits);
7564                 } else {
7565                         sbuf_printf(sb, "%b", lc->requested_fec, bits);
7566                 }
7567                 rc = sbuf_finish(sb);
7568                 sbuf_delete(sb);
7569         } else {
7570                 char s[8];
7571                 int n;
7572
7573                 snprintf(s, sizeof(s), "%d",
7574                     lc->requested_fec == FEC_AUTO ? -1 :
7575                     lc->requested_fec & (M_FW_PORT_CAP32_FEC | FEC_MODULE));
7576
7577                 rc = sysctl_handle_string(oidp, s, sizeof(s), req);
7578                 if (rc != 0)
7579                         return(rc);
7580
7581                 n = strtol(&s[0], NULL, 0);
7582                 if (n < 0 || n & FEC_AUTO)
7583                         n = FEC_AUTO;
7584                 else if (n & ~(M_FW_PORT_CAP32_FEC | FEC_MODULE))
7585                         return (EINVAL);/* some other bit is set too */
7586
7587                 rc = begin_synchronized_op(sc, &pi->vi[0], SLEEP_OK | INTR_OK,
7588                     "t4fec");
7589                 if (rc)
7590                         return (rc);
7591                 PORT_LOCK(pi);
7592                 old = lc->requested_fec;
7593                 if (n == FEC_AUTO)
7594                         lc->requested_fec = FEC_AUTO;
7595                 else if (n == 0 || n == FEC_NONE)
7596                         lc->requested_fec = FEC_NONE;
7597                 else {
7598                         if ((lc->pcaps |
7599                             V_FW_PORT_CAP32_FEC(n & M_FW_PORT_CAP32_FEC)) !=
7600                             lc->pcaps) {
7601                                 rc = ENOTSUP;
7602                                 goto done;
7603                         }
7604                         lc->requested_fec = n & (M_FW_PORT_CAP32_FEC |
7605                             FEC_MODULE);
7606                 }
7607                 fixup_link_config(pi);
7608                 if (pi->up_vis > 0) {
7609                         rc = apply_link_config(pi);
7610                         if (rc != 0) {
7611                                 lc->requested_fec = old;
7612                                 if (rc == FW_EPROTO)
7613                                         rc = ENOTSUP;
7614                         }
7615                 }
7616 done:
7617                 PORT_UNLOCK(pi);
7618                 end_synchronized_op(sc, 0);
7619         }
7620
7621         return (rc);
7622 }
7623
7624 static int
7625 sysctl_module_fec(SYSCTL_HANDLER_ARGS)
7626 {
7627         struct port_info *pi = arg1;
7628         struct adapter *sc = pi->adapter;
7629         struct link_config *lc = &pi->link_cfg;
7630         int rc;
7631         int8_t fec;
7632         struct sbuf *sb;
7633         static char *bits = "\20\1RS-FEC\2FC-FEC\3NO-FEC\4RSVD2\5RSVD3";
7634
7635         rc = sysctl_wire_old_buffer(req, 0);
7636         if (rc != 0)
7637                 return (rc);
7638
7639         sb = sbuf_new_for_sysctl(NULL, NULL, 128, req);
7640         if (sb == NULL)
7641                 return (ENOMEM);
7642
7643         if (begin_synchronized_op(sc, NULL, SLEEP_OK | INTR_OK, "t4mfec") != 0)
7644                 return (EBUSY);
7645         PORT_LOCK(pi);
7646         if (pi->up_vis == 0) {
7647                 /*
7648                  * If all the interfaces are administratively down the firmware
7649                  * does not report transceiver changes.  Refresh port info here.
7650                  * This is the only reason we have a synchronized op in this
7651                  * function.  Just PORT_LOCK would have been enough otherwise.
7652                  */
7653                 t4_update_port_info(pi);
7654         }
7655
7656         fec = lc->fec_hint;
7657         if (pi->mod_type == FW_PORT_MOD_TYPE_NONE ||
7658             !fec_supported(lc->pcaps)) {
7659                 sbuf_printf(sb, "n/a");
7660         } else {
7661                 if (fec == 0)
7662                         fec = FEC_NONE;
7663                 sbuf_printf(sb, "%b", fec & M_FW_PORT_CAP32_FEC, bits);
7664         }
7665         rc = sbuf_finish(sb);
7666         sbuf_delete(sb);
7667
7668         PORT_UNLOCK(pi);
7669         end_synchronized_op(sc, 0);
7670
7671         return (rc);
7672 }
7673
7674 static int
7675 sysctl_autoneg(SYSCTL_HANDLER_ARGS)
7676 {
7677         struct port_info *pi = arg1;
7678         struct adapter *sc = pi->adapter;
7679         struct link_config *lc = &pi->link_cfg;
7680         int rc, val;
7681
7682         if (lc->pcaps & FW_PORT_CAP32_ANEG)
7683                 val = lc->requested_aneg == AUTONEG_DISABLE ? 0 : 1;
7684         else
7685                 val = -1;
7686         rc = sysctl_handle_int(oidp, &val, 0, req);
7687         if (rc != 0 || req->newptr == NULL)
7688                 return (rc);
7689         if (val == 0)
7690                 val = AUTONEG_DISABLE;
7691         else if (val == 1)
7692                 val = AUTONEG_ENABLE;
7693         else
7694                 val = AUTONEG_AUTO;
7695
7696         rc = begin_synchronized_op(sc, &pi->vi[0], SLEEP_OK | INTR_OK,
7697             "t4aneg");
7698         if (rc)
7699                 return (rc);
7700         PORT_LOCK(pi);
7701         if (val == AUTONEG_ENABLE && !(lc->pcaps & FW_PORT_CAP32_ANEG)) {
7702                 rc = ENOTSUP;
7703                 goto done;
7704         }
7705         lc->requested_aneg = val;
7706         fixup_link_config(pi);
7707         if (pi->up_vis > 0)
7708                 rc = apply_link_config(pi);
7709         set_current_media(pi);
7710 done:
7711         PORT_UNLOCK(pi);
7712         end_synchronized_op(sc, 0);
7713         return (rc);
7714 }
7715
7716 static int
7717 sysctl_handle_t4_reg64(SYSCTL_HANDLER_ARGS)
7718 {
7719         struct adapter *sc = arg1;
7720         int reg = arg2;
7721         uint64_t val;
7722
7723         val = t4_read_reg64(sc, reg);
7724
7725         return (sysctl_handle_64(oidp, &val, 0, req));
7726 }
7727
7728 static int
7729 sysctl_temperature(SYSCTL_HANDLER_ARGS)
7730 {
7731         struct adapter *sc = arg1;
7732         int rc, t;
7733         uint32_t param, val;
7734
7735         rc = begin_synchronized_op(sc, NULL, SLEEP_OK | INTR_OK, "t4temp");
7736         if (rc)
7737                 return (rc);
7738         param = V_FW_PARAMS_MNEM(FW_PARAMS_MNEM_DEV) |
7739             V_FW_PARAMS_PARAM_X(FW_PARAMS_PARAM_DEV_DIAG) |
7740             V_FW_PARAMS_PARAM_Y(FW_PARAM_DEV_DIAG_TMP);
7741         rc = -t4_query_params(sc, sc->mbox, sc->pf, 0, 1, &param, &val);
7742         end_synchronized_op(sc, 0);
7743         if (rc)
7744                 return (rc);
7745
7746         /* unknown is returned as 0 but we display -1 in that case */
7747         t = val == 0 ? -1 : val;
7748
7749         rc = sysctl_handle_int(oidp, &t, 0, req);
7750         return (rc);
7751 }
7752
7753 static int
7754 sysctl_vdd(SYSCTL_HANDLER_ARGS)
7755 {
7756         struct adapter *sc = arg1;
7757         int rc;
7758         uint32_t param, val;
7759
7760         if (sc->params.core_vdd == 0) {
7761                 rc = begin_synchronized_op(sc, NULL, SLEEP_OK | INTR_OK,
7762                     "t4vdd");
7763                 if (rc)
7764                         return (rc);
7765                 param = V_FW_PARAMS_MNEM(FW_PARAMS_MNEM_DEV) |
7766                     V_FW_PARAMS_PARAM_X(FW_PARAMS_PARAM_DEV_DIAG) |
7767                     V_FW_PARAMS_PARAM_Y(FW_PARAM_DEV_DIAG_VDD);
7768                 rc = -t4_query_params(sc, sc->mbox, sc->pf, 0, 1, &param, &val);
7769                 end_synchronized_op(sc, 0);
7770                 if (rc)
7771                         return (rc);
7772                 sc->params.core_vdd = val;
7773         }
7774
7775         return (sysctl_handle_int(oidp, &sc->params.core_vdd, 0, req));
7776 }
7777
7778 static int
7779 sysctl_reset_sensor(SYSCTL_HANDLER_ARGS)
7780 {
7781         struct adapter *sc = arg1;
7782         int rc, v;
7783         uint32_t param, val;
7784
7785         v = sc->sensor_resets;
7786         rc = sysctl_handle_int(oidp, &v, 0, req);
7787         if (rc != 0 || req->newptr == NULL || v <= 0)
7788                 return (rc);
7789
7790         if (sc->params.fw_vers < FW_VERSION32(1, 24, 7, 0) ||
7791             chip_id(sc) < CHELSIO_T5)
7792                 return (ENOTSUP);
7793
7794         rc = begin_synchronized_op(sc, NULL, SLEEP_OK | INTR_OK, "t4srst");
7795         if (rc)
7796                 return (rc);
7797         param = (V_FW_PARAMS_MNEM(FW_PARAMS_MNEM_DEV) |
7798             V_FW_PARAMS_PARAM_X(FW_PARAMS_PARAM_DEV_DIAG) |
7799             V_FW_PARAMS_PARAM_Y(FW_PARAM_DEV_DIAG_RESET_TMP_SENSOR));
7800         val = 1;
7801         rc = -t4_set_params(sc, sc->mbox, sc->pf, 0, 1, &param, &val);
7802         end_synchronized_op(sc, 0);
7803         if (rc == 0)
7804                 sc->sensor_resets++;
7805         return (rc);
7806 }
7807
7808 static int
7809 sysctl_loadavg(SYSCTL_HANDLER_ARGS)
7810 {
7811         struct adapter *sc = arg1;
7812         struct sbuf *sb;
7813         int rc;
7814         uint32_t param, val;
7815
7816         rc = begin_synchronized_op(sc, NULL, SLEEP_OK | INTR_OK, "t4lavg");
7817         if (rc)
7818                 return (rc);
7819         param = V_FW_PARAMS_MNEM(FW_PARAMS_MNEM_DEV) |
7820             V_FW_PARAMS_PARAM_X(FW_PARAMS_PARAM_DEV_LOAD);
7821         rc = -t4_query_params(sc, sc->mbox, sc->pf, 0, 1, &param, &val);
7822         end_synchronized_op(sc, 0);
7823         if (rc)
7824                 return (rc);
7825
7826         rc = sysctl_wire_old_buffer(req, 0);
7827         if (rc != 0)
7828                 return (rc);
7829
7830         sb = sbuf_new_for_sysctl(NULL, NULL, 4096, req);
7831         if (sb == NULL)
7832                 return (ENOMEM);
7833
7834         if (val == 0xffffffff) {
7835                 /* Only debug and custom firmwares report load averages. */
7836                 sbuf_printf(sb, "not available");
7837         } else {
7838                 sbuf_printf(sb, "%d %d %d", val & 0xff, (val >> 8) & 0xff,
7839                     (val >> 16) & 0xff);
7840         }
7841         rc = sbuf_finish(sb);
7842         sbuf_delete(sb);
7843
7844         return (rc);
7845 }
7846
7847 static int
7848 sysctl_cctrl(SYSCTL_HANDLER_ARGS)
7849 {
7850         struct adapter *sc = arg1;
7851         struct sbuf *sb;
7852         int rc, i;
7853         uint16_t incr[NMTUS][NCCTRL_WIN];
7854         static const char *dec_fac[] = {
7855                 "0.5", "0.5625", "0.625", "0.6875", "0.75", "0.8125", "0.875",
7856                 "0.9375"
7857         };
7858
7859         rc = sysctl_wire_old_buffer(req, 0);
7860         if (rc != 0)
7861                 return (rc);
7862
7863         sb = sbuf_new_for_sysctl(NULL, NULL, 4096, req);
7864         if (sb == NULL)
7865                 return (ENOMEM);
7866
7867         t4_read_cong_tbl(sc, incr);
7868
7869         for (i = 0; i < NCCTRL_WIN; ++i) {
7870                 sbuf_printf(sb, "%2d: %4u %4u %4u %4u %4u %4u %4u %4u\n", i,
7871                     incr[0][i], incr[1][i], incr[2][i], incr[3][i], incr[4][i],
7872                     incr[5][i], incr[6][i], incr[7][i]);
7873                 sbuf_printf(sb, "%8u %4u %4u %4u %4u %4u %4u %4u %5u %s\n",
7874                     incr[8][i], incr[9][i], incr[10][i], incr[11][i],
7875                     incr[12][i], incr[13][i], incr[14][i], incr[15][i],
7876                     sc->params.a_wnd[i], dec_fac[sc->params.b_wnd[i]]);
7877         }
7878
7879         rc = sbuf_finish(sb);
7880         sbuf_delete(sb);
7881
7882         return (rc);
7883 }
7884
7885 static const char *qname[CIM_NUM_IBQ + CIM_NUM_OBQ_T5] = {
7886         "TP0", "TP1", "ULP", "SGE0", "SGE1", "NC-SI",   /* ibq's */
7887         "ULP0", "ULP1", "ULP2", "ULP3", "SGE", "NC-SI", /* obq's */
7888         "SGE0-RX", "SGE1-RX"    /* additional obq's (T5 onwards) */
7889 };
7890
7891 static int
7892 sysctl_cim_ibq_obq(SYSCTL_HANDLER_ARGS)
7893 {
7894         struct adapter *sc = arg1;
7895         struct sbuf *sb;
7896         int rc, i, n, qid = arg2;
7897         uint32_t *buf, *p;
7898         char *qtype;
7899         u_int cim_num_obq = sc->chip_params->cim_num_obq;
7900
7901         KASSERT(qid >= 0 && qid < CIM_NUM_IBQ + cim_num_obq,
7902             ("%s: bad qid %d\n", __func__, qid));
7903
7904         if (qid < CIM_NUM_IBQ) {
7905                 /* inbound queue */
7906                 qtype = "IBQ";
7907                 n = 4 * CIM_IBQ_SIZE;
7908                 buf = malloc(n * sizeof(uint32_t), M_CXGBE, M_ZERO | M_WAITOK);
7909                 rc = t4_read_cim_ibq(sc, qid, buf, n);
7910         } else {
7911                 /* outbound queue */
7912                 qtype = "OBQ";
7913                 qid -= CIM_NUM_IBQ;
7914                 n = 4 * cim_num_obq * CIM_OBQ_SIZE;
7915                 buf = malloc(n * sizeof(uint32_t), M_CXGBE, M_ZERO | M_WAITOK);
7916                 rc = t4_read_cim_obq(sc, qid, buf, n);
7917         }
7918
7919         if (rc < 0) {
7920                 rc = -rc;
7921                 goto done;
7922         }
7923         n = rc * sizeof(uint32_t);      /* rc has # of words actually read */
7924
7925         rc = sysctl_wire_old_buffer(req, 0);
7926         if (rc != 0)
7927                 goto done;
7928
7929         sb = sbuf_new_for_sysctl(NULL, NULL, PAGE_SIZE, req);
7930         if (sb == NULL) {
7931                 rc = ENOMEM;
7932                 goto done;
7933         }
7934
7935         sbuf_printf(sb, "%s%d %s", qtype , qid, qname[arg2]);
7936         for (i = 0, p = buf; i < n; i += 16, p += 4)
7937                 sbuf_printf(sb, "\n%#06x: %08x %08x %08x %08x", i, p[0], p[1],
7938                     p[2], p[3]);
7939
7940         rc = sbuf_finish(sb);
7941         sbuf_delete(sb);
7942 done:
7943         free(buf, M_CXGBE);
7944         return (rc);
7945 }
7946
7947 static void
7948 sbuf_cim_la4(struct adapter *sc, struct sbuf *sb, uint32_t *buf, uint32_t cfg)
7949 {
7950         uint32_t *p;
7951
7952         sbuf_printf(sb, "Status   Data      PC%s",
7953             cfg & F_UPDBGLACAPTPCONLY ? "" :
7954             "     LS0Stat  LS0Addr             LS0Data");
7955
7956         for (p = buf; p <= &buf[sc->params.cim_la_size - 8]; p += 8) {
7957                 if (cfg & F_UPDBGLACAPTPCONLY) {
7958                         sbuf_printf(sb, "\n  %02x   %08x %08x", p[5] & 0xff,
7959                             p[6], p[7]);
7960                         sbuf_printf(sb, "\n  %02x   %02x%06x %02x%06x",
7961                             (p[3] >> 8) & 0xff, p[3] & 0xff, p[4] >> 8,
7962                             p[4] & 0xff, p[5] >> 8);
7963                         sbuf_printf(sb, "\n  %02x   %x%07x %x%07x",
7964                             (p[0] >> 4) & 0xff, p[0] & 0xf, p[1] >> 4,
7965                             p[1] & 0xf, p[2] >> 4);
7966                 } else {
7967                         sbuf_printf(sb,
7968                             "\n  %02x   %x%07x %x%07x %08x %08x "
7969                             "%08x%08x%08x%08x",
7970                             (p[0] >> 4) & 0xff, p[0] & 0xf, p[1] >> 4,
7971                             p[1] & 0xf, p[2] >> 4, p[2] & 0xf, p[3], p[4], p[5],
7972                             p[6], p[7]);
7973                 }
7974         }
7975 }
7976
7977 static void
7978 sbuf_cim_la6(struct adapter *sc, struct sbuf *sb, uint32_t *buf, uint32_t cfg)
7979 {
7980         uint32_t *p;
7981
7982         sbuf_printf(sb, "Status   Inst    Data      PC%s",
7983             cfg & F_UPDBGLACAPTPCONLY ? "" :
7984             "     LS0Stat  LS0Addr  LS0Data  LS1Stat  LS1Addr  LS1Data");
7985
7986         for (p = buf; p <= &buf[sc->params.cim_la_size - 10]; p += 10) {
7987                 if (cfg & F_UPDBGLACAPTPCONLY) {
7988                         sbuf_printf(sb, "\n  %02x   %08x %08x %08x",
7989                             p[3] & 0xff, p[2], p[1], p[0]);
7990                         sbuf_printf(sb, "\n  %02x   %02x%06x %02x%06x %02x%06x",
7991                             (p[6] >> 8) & 0xff, p[6] & 0xff, p[5] >> 8,
7992                             p[5] & 0xff, p[4] >> 8, p[4] & 0xff, p[3] >> 8);
7993                         sbuf_printf(sb, "\n  %02x   %04x%04x %04x%04x %04x%04x",
7994                             (p[9] >> 16) & 0xff, p[9] & 0xffff, p[8] >> 16,
7995                             p[8] & 0xffff, p[7] >> 16, p[7] & 0xffff,
7996                             p[6] >> 16);
7997                 } else {
7998                         sbuf_printf(sb, "\n  %02x   %04x%04x %04x%04x %04x%04x "
7999                             "%08x %08x %08x %08x %08x %08x",
8000                             (p[9] >> 16) & 0xff,
8001                             p[9] & 0xffff, p[8] >> 16,
8002                             p[8] & 0xffff, p[7] >> 16,
8003                             p[7] & 0xffff, p[6] >> 16,
8004                             p[2], p[1], p[0], p[5], p[4], p[3]);
8005                 }
8006         }
8007 }
8008
8009 static int
8010 sbuf_cim_la(struct adapter *sc, struct sbuf *sb, int flags)
8011 {
8012         uint32_t cfg, *buf;
8013         int rc;
8014
8015         rc = -t4_cim_read(sc, A_UP_UP_DBG_LA_CFG, 1, &cfg);
8016         if (rc != 0)
8017                 return (rc);
8018
8019         MPASS(flags == M_WAITOK || flags == M_NOWAIT);
8020         buf = malloc(sc->params.cim_la_size * sizeof(uint32_t), M_CXGBE,
8021             M_ZERO | flags);
8022         if (buf == NULL)
8023                 return (ENOMEM);
8024
8025         rc = -t4_cim_read_la(sc, buf, NULL);
8026         if (rc != 0)
8027                 goto done;
8028         if (chip_id(sc) < CHELSIO_T6)
8029                 sbuf_cim_la4(sc, sb, buf, cfg);
8030         else
8031                 sbuf_cim_la6(sc, sb, buf, cfg);
8032
8033 done:
8034         free(buf, M_CXGBE);
8035         return (rc);
8036 }
8037
8038 static int
8039 sysctl_cim_la(SYSCTL_HANDLER_ARGS)
8040 {
8041         struct adapter *sc = arg1;
8042         struct sbuf *sb;
8043         int rc;
8044
8045         rc = sysctl_wire_old_buffer(req, 0);
8046         if (rc != 0)
8047                 return (rc);
8048         sb = sbuf_new_for_sysctl(NULL, NULL, 4096, req);
8049         if (sb == NULL)
8050                 return (ENOMEM);
8051
8052         rc = sbuf_cim_la(sc, sb, M_WAITOK);
8053         if (rc == 0)
8054                 rc = sbuf_finish(sb);
8055         sbuf_delete(sb);
8056         return (rc);
8057 }
8058
8059 bool
8060 t4_os_dump_cimla(struct adapter *sc, int arg, bool verbose)
8061 {
8062         struct sbuf sb;
8063         int rc;
8064
8065         if (sbuf_new(&sb, NULL, 4096, SBUF_AUTOEXTEND) != &sb)
8066                 return (false);
8067         rc = sbuf_cim_la(sc, &sb, M_NOWAIT);
8068         if (rc == 0) {
8069                 rc = sbuf_finish(&sb);
8070                 if (rc == 0) {
8071                         log(LOG_DEBUG, "%s: CIM LA dump follows.\n%s",
8072                                 device_get_nameunit(sc->dev), sbuf_data(&sb));
8073                 }
8074         }
8075         sbuf_delete(&sb);
8076         return (false);
8077 }
8078
8079 static int
8080 sysctl_cim_ma_la(SYSCTL_HANDLER_ARGS)
8081 {
8082         struct adapter *sc = arg1;
8083         u_int i;
8084         struct sbuf *sb;
8085         uint32_t *buf, *p;
8086         int rc;
8087
8088         rc = sysctl_wire_old_buffer(req, 0);
8089         if (rc != 0)
8090                 return (rc);
8091
8092         sb = sbuf_new_for_sysctl(NULL, NULL, 4096, req);
8093         if (sb == NULL)
8094                 return (ENOMEM);
8095
8096         buf = malloc(2 * CIM_MALA_SIZE * 5 * sizeof(uint32_t), M_CXGBE,
8097             M_ZERO | M_WAITOK);
8098
8099         t4_cim_read_ma_la(sc, buf, buf + 5 * CIM_MALA_SIZE);
8100         p = buf;
8101
8102         for (i = 0; i < CIM_MALA_SIZE; i++, p += 5) {
8103                 sbuf_printf(sb, "\n%02x%08x%08x%08x%08x", p[4], p[3], p[2],
8104                     p[1], p[0]);
8105         }
8106
8107         sbuf_printf(sb, "\n\nCnt ID Tag UE       Data       RDY VLD");
8108         for (i = 0; i < CIM_MALA_SIZE; i++, p += 5) {
8109                 sbuf_printf(sb, "\n%3u %2u  %x   %u %08x%08x  %u   %u",
8110                     (p[2] >> 10) & 0xff, (p[2] >> 7) & 7,
8111                     (p[2] >> 3) & 0xf, (p[2] >> 2) & 1,
8112                     (p[1] >> 2) | ((p[2] & 3) << 30),
8113                     (p[0] >> 2) | ((p[1] & 3) << 30), (p[0] >> 1) & 1,
8114                     p[0] & 1);
8115         }
8116
8117         rc = sbuf_finish(sb);
8118         sbuf_delete(sb);
8119         free(buf, M_CXGBE);
8120         return (rc);
8121 }
8122
8123 static int
8124 sysctl_cim_pif_la(SYSCTL_HANDLER_ARGS)
8125 {
8126         struct adapter *sc = arg1;
8127         u_int i;
8128         struct sbuf *sb;
8129         uint32_t *buf, *p;
8130         int rc;
8131
8132         rc = sysctl_wire_old_buffer(req, 0);
8133         if (rc != 0)
8134                 return (rc);
8135
8136         sb = sbuf_new_for_sysctl(NULL, NULL, 4096, req);
8137         if (sb == NULL)
8138                 return (ENOMEM);
8139
8140         buf = malloc(2 * CIM_PIFLA_SIZE * 6 * sizeof(uint32_t), M_CXGBE,
8141             M_ZERO | M_WAITOK);
8142
8143         t4_cim_read_pif_la(sc, buf, buf + 6 * CIM_PIFLA_SIZE, NULL, NULL);
8144         p = buf;
8145
8146         sbuf_printf(sb, "Cntl ID DataBE   Addr                 Data");
8147         for (i = 0; i < CIM_PIFLA_SIZE; i++, p += 6) {
8148                 sbuf_printf(sb, "\n %02x  %02x  %04x  %08x %08x%08x%08x%08x",
8149                     (p[5] >> 22) & 0xff, (p[5] >> 16) & 0x3f, p[5] & 0xffff,
8150                     p[4], p[3], p[2], p[1], p[0]);
8151         }
8152
8153         sbuf_printf(sb, "\n\nCntl ID               Data");
8154         for (i = 0; i < CIM_PIFLA_SIZE; i++, p += 6) {
8155                 sbuf_printf(sb, "\n %02x  %02x %08x%08x%08x%08x",
8156                     (p[4] >> 6) & 0xff, p[4] & 0x3f, p[3], p[2], p[1], p[0]);
8157         }
8158
8159         rc = sbuf_finish(sb);
8160         sbuf_delete(sb);
8161         free(buf, M_CXGBE);
8162         return (rc);
8163 }
8164
8165 static int
8166 sysctl_cim_qcfg(SYSCTL_HANDLER_ARGS)
8167 {
8168         struct adapter *sc = arg1;
8169         struct sbuf *sb;
8170         int rc, i;
8171         uint16_t base[CIM_NUM_IBQ + CIM_NUM_OBQ_T5];
8172         uint16_t size[CIM_NUM_IBQ + CIM_NUM_OBQ_T5];
8173         uint16_t thres[CIM_NUM_IBQ];
8174         uint32_t obq_wr[2 * CIM_NUM_OBQ_T5], *wr = obq_wr;
8175         uint32_t stat[4 * (CIM_NUM_IBQ + CIM_NUM_OBQ_T5)], *p = stat;
8176         u_int cim_num_obq, ibq_rdaddr, obq_rdaddr, nq;
8177
8178         cim_num_obq = sc->chip_params->cim_num_obq;
8179         if (is_t4(sc)) {
8180                 ibq_rdaddr = A_UP_IBQ_0_RDADDR;
8181                 obq_rdaddr = A_UP_OBQ_0_REALADDR;
8182         } else {
8183                 ibq_rdaddr = A_UP_IBQ_0_SHADOW_RDADDR;
8184                 obq_rdaddr = A_UP_OBQ_0_SHADOW_REALADDR;
8185         }
8186         nq = CIM_NUM_IBQ + cim_num_obq;
8187
8188         rc = -t4_cim_read(sc, ibq_rdaddr, 4 * nq, stat);
8189         if (rc == 0)
8190                 rc = -t4_cim_read(sc, obq_rdaddr, 2 * cim_num_obq, obq_wr);
8191         if (rc != 0)
8192                 return (rc);
8193
8194         t4_read_cimq_cfg(sc, base, size, thres);
8195
8196         rc = sysctl_wire_old_buffer(req, 0);
8197         if (rc != 0)
8198                 return (rc);
8199
8200         sb = sbuf_new_for_sysctl(NULL, NULL, PAGE_SIZE, req);
8201         if (sb == NULL)
8202                 return (ENOMEM);
8203
8204         sbuf_printf(sb,
8205             "  Queue  Base  Size Thres  RdPtr WrPtr  SOP  EOP Avail");
8206
8207         for (i = 0; i < CIM_NUM_IBQ; i++, p += 4)
8208                 sbuf_printf(sb, "\n%7s %5x %5u %5u %6x  %4x %4u %4u %5u",
8209                     qname[i], base[i], size[i], thres[i], G_IBQRDADDR(p[0]),
8210                     G_IBQWRADDR(p[1]), G_QUESOPCNT(p[3]), G_QUEEOPCNT(p[3]),
8211                     G_QUEREMFLITS(p[2]) * 16);
8212         for ( ; i < nq; i++, p += 4, wr += 2)
8213                 sbuf_printf(sb, "\n%7s %5x %5u %12x  %4x %4u %4u %5u", qname[i],
8214                     base[i], size[i], G_QUERDADDR(p[0]) & 0x3fff,
8215                     wr[0] - base[i], G_QUESOPCNT(p[3]), G_QUEEOPCNT(p[3]),
8216                     G_QUEREMFLITS(p[2]) * 16);
8217
8218         rc = sbuf_finish(sb);
8219         sbuf_delete(sb);
8220
8221         return (rc);
8222 }
8223
8224 static int
8225 sysctl_cpl_stats(SYSCTL_HANDLER_ARGS)
8226 {
8227         struct adapter *sc = arg1;
8228         struct sbuf *sb;
8229         int rc;
8230         struct tp_cpl_stats stats;
8231
8232         rc = sysctl_wire_old_buffer(req, 0);
8233         if (rc != 0)
8234                 return (rc);
8235
8236         sb = sbuf_new_for_sysctl(NULL, NULL, 256, req);
8237         if (sb == NULL)
8238                 return (ENOMEM);
8239
8240         mtx_lock(&sc->reg_lock);
8241         t4_tp_get_cpl_stats(sc, &stats, 0);
8242         mtx_unlock(&sc->reg_lock);
8243
8244         if (sc->chip_params->nchan > 2) {
8245                 sbuf_printf(sb, "                 channel 0  channel 1"
8246                     "  channel 2  channel 3");
8247                 sbuf_printf(sb, "\nCPL requests:   %10u %10u %10u %10u",
8248                     stats.req[0], stats.req[1], stats.req[2], stats.req[3]);
8249                 sbuf_printf(sb, "\nCPL responses:   %10u %10u %10u %10u",
8250                     stats.rsp[0], stats.rsp[1], stats.rsp[2], stats.rsp[3]);
8251         } else {
8252                 sbuf_printf(sb, "                 channel 0  channel 1");
8253                 sbuf_printf(sb, "\nCPL requests:   %10u %10u",
8254                     stats.req[0], stats.req[1]);
8255                 sbuf_printf(sb, "\nCPL responses:   %10u %10u",
8256                     stats.rsp[0], stats.rsp[1]);
8257         }
8258
8259         rc = sbuf_finish(sb);
8260         sbuf_delete(sb);
8261
8262         return (rc);
8263 }
8264
8265 static int
8266 sysctl_ddp_stats(SYSCTL_HANDLER_ARGS)
8267 {
8268         struct adapter *sc = arg1;
8269         struct sbuf *sb;
8270         int rc;
8271         struct tp_usm_stats stats;
8272
8273         rc = sysctl_wire_old_buffer(req, 0);
8274         if (rc != 0)
8275                 return(rc);
8276
8277         sb = sbuf_new_for_sysctl(NULL, NULL, 256, req);
8278         if (sb == NULL)
8279                 return (ENOMEM);
8280
8281         t4_get_usm_stats(sc, &stats, 1);
8282
8283         sbuf_printf(sb, "Frames: %u\n", stats.frames);
8284         sbuf_printf(sb, "Octets: %ju\n", stats.octets);
8285         sbuf_printf(sb, "Drops:  %u", stats.drops);
8286
8287         rc = sbuf_finish(sb);
8288         sbuf_delete(sb);
8289
8290         return (rc);
8291 }
8292
8293 static const char * const devlog_level_strings[] = {
8294         [FW_DEVLOG_LEVEL_EMERG]         = "EMERG",
8295         [FW_DEVLOG_LEVEL_CRIT]          = "CRIT",
8296         [FW_DEVLOG_LEVEL_ERR]           = "ERR",
8297         [FW_DEVLOG_LEVEL_NOTICE]        = "NOTICE",
8298         [FW_DEVLOG_LEVEL_INFO]          = "INFO",
8299         [FW_DEVLOG_LEVEL_DEBUG]         = "DEBUG"
8300 };
8301
8302 static const char * const devlog_facility_strings[] = {
8303         [FW_DEVLOG_FACILITY_CORE]       = "CORE",
8304         [FW_DEVLOG_FACILITY_CF]         = "CF",
8305         [FW_DEVLOG_FACILITY_SCHED]      = "SCHED",
8306         [FW_DEVLOG_FACILITY_TIMER]      = "TIMER",
8307         [FW_DEVLOG_FACILITY_RES]        = "RES",
8308         [FW_DEVLOG_FACILITY_HW]         = "HW",
8309         [FW_DEVLOG_FACILITY_FLR]        = "FLR",
8310         [FW_DEVLOG_FACILITY_DMAQ]       = "DMAQ",
8311         [FW_DEVLOG_FACILITY_PHY]        = "PHY",
8312         [FW_DEVLOG_FACILITY_MAC]        = "MAC",
8313         [FW_DEVLOG_FACILITY_PORT]       = "PORT",
8314         [FW_DEVLOG_FACILITY_VI]         = "VI",
8315         [FW_DEVLOG_FACILITY_FILTER]     = "FILTER",
8316         [FW_DEVLOG_FACILITY_ACL]        = "ACL",
8317         [FW_DEVLOG_FACILITY_TM]         = "TM",
8318         [FW_DEVLOG_FACILITY_QFC]        = "QFC",
8319         [FW_DEVLOG_FACILITY_DCB]        = "DCB",
8320         [FW_DEVLOG_FACILITY_ETH]        = "ETH",
8321         [FW_DEVLOG_FACILITY_OFLD]       = "OFLD",
8322         [FW_DEVLOG_FACILITY_RI]         = "RI",
8323         [FW_DEVLOG_FACILITY_ISCSI]      = "ISCSI",
8324         [FW_DEVLOG_FACILITY_FCOE]       = "FCOE",
8325         [FW_DEVLOG_FACILITY_FOISCSI]    = "FOISCSI",
8326         [FW_DEVLOG_FACILITY_FOFCOE]     = "FOFCOE",
8327         [FW_DEVLOG_FACILITY_CHNET]      = "CHNET",
8328 };
8329
8330 static int
8331 sbuf_devlog(struct adapter *sc, struct sbuf *sb, int flags)
8332 {
8333         int i, j, rc, nentries, first = 0;
8334         struct devlog_params *dparams = &sc->params.devlog;
8335         struct fw_devlog_e *buf, *e;
8336         uint64_t ftstamp = UINT64_MAX;
8337
8338         if (dparams->addr == 0)
8339                 return (ENXIO);
8340
8341         MPASS(flags == M_WAITOK || flags == M_NOWAIT);
8342         buf = malloc(dparams->size, M_CXGBE, M_ZERO | flags);
8343         if (buf == NULL)
8344                 return (ENOMEM);
8345
8346         rc = read_via_memwin(sc, 1, dparams->addr, (void *)buf, dparams->size);
8347         if (rc != 0)
8348                 goto done;
8349
8350         nentries = dparams->size / sizeof(struct fw_devlog_e);
8351         for (i = 0; i < nentries; i++) {
8352                 e = &buf[i];
8353
8354                 if (e->timestamp == 0)
8355                         break;  /* end */
8356
8357                 e->timestamp = be64toh(e->timestamp);
8358                 e->seqno = be32toh(e->seqno);
8359                 for (j = 0; j < 8; j++)
8360                         e->params[j] = be32toh(e->params[j]);
8361
8362                 if (e->timestamp < ftstamp) {
8363                         ftstamp = e->timestamp;
8364                         first = i;
8365                 }
8366         }
8367
8368         if (buf[first].timestamp == 0)
8369                 goto done;      /* nothing in the log */
8370
8371         sbuf_printf(sb, "%10s  %15s  %8s  %8s  %s\n",
8372             "Seq#", "Tstamp", "Level", "Facility", "Message");
8373
8374         i = first;
8375         do {
8376                 e = &buf[i];
8377                 if (e->timestamp == 0)
8378                         break;  /* end */
8379
8380                 sbuf_printf(sb, "%10d  %15ju  %8s  %8s  ",
8381                     e->seqno, e->timestamp,
8382                     (e->level < nitems(devlog_level_strings) ?
8383                         devlog_level_strings[e->level] : "UNKNOWN"),
8384                     (e->facility < nitems(devlog_facility_strings) ?
8385                         devlog_facility_strings[e->facility] : "UNKNOWN"));
8386                 sbuf_printf(sb, e->fmt, e->params[0], e->params[1],
8387                     e->params[2], e->params[3], e->params[4],
8388                     e->params[5], e->params[6], e->params[7]);
8389
8390                 if (++i == nentries)
8391                         i = 0;
8392         } while (i != first);
8393 done:
8394         free(buf, M_CXGBE);
8395         return (rc);
8396 }
8397
8398 static int
8399 sysctl_devlog(SYSCTL_HANDLER_ARGS)
8400 {
8401         struct adapter *sc = arg1;
8402         int rc;
8403         struct sbuf *sb;
8404
8405         rc = sysctl_wire_old_buffer(req, 0);
8406         if (rc != 0)
8407                 return (rc);
8408         sb = sbuf_new_for_sysctl(NULL, NULL, 4096, req);
8409         if (sb == NULL)
8410                 return (ENOMEM);
8411
8412         rc = sbuf_devlog(sc, sb, M_WAITOK);
8413         if (rc == 0)
8414                 rc = sbuf_finish(sb);
8415         sbuf_delete(sb);
8416         return (rc);
8417 }
8418
8419 void
8420 t4_os_dump_devlog(struct adapter *sc)
8421 {
8422         int rc;
8423         struct sbuf sb;
8424
8425         if (sbuf_new(&sb, NULL, 4096, SBUF_AUTOEXTEND) != &sb)
8426                 return;
8427         rc = sbuf_devlog(sc, &sb, M_NOWAIT);
8428         if (rc == 0) {
8429                 rc = sbuf_finish(&sb);
8430                 if (rc == 0) {
8431                         log(LOG_DEBUG, "%s: device log follows.\n%s",
8432                                 device_get_nameunit(sc->dev), sbuf_data(&sb));
8433                 }
8434         }
8435         sbuf_delete(&sb);
8436 }
8437
8438 static int
8439 sysctl_fcoe_stats(SYSCTL_HANDLER_ARGS)
8440 {
8441         struct adapter *sc = arg1;
8442         struct sbuf *sb;
8443         int rc;
8444         struct tp_fcoe_stats stats[MAX_NCHAN];
8445         int i, nchan = sc->chip_params->nchan;
8446
8447         rc = sysctl_wire_old_buffer(req, 0);
8448         if (rc != 0)
8449                 return (rc);
8450
8451         sb = sbuf_new_for_sysctl(NULL, NULL, 256, req);
8452         if (sb == NULL)
8453                 return (ENOMEM);
8454
8455         for (i = 0; i < nchan; i++)
8456                 t4_get_fcoe_stats(sc, i, &stats[i], 1);
8457
8458         if (nchan > 2) {
8459                 sbuf_printf(sb, "                   channel 0        channel 1"
8460                     "        channel 2        channel 3");
8461                 sbuf_printf(sb, "\noctetsDDP:  %16ju %16ju %16ju %16ju",
8462                     stats[0].octets_ddp, stats[1].octets_ddp,
8463                     stats[2].octets_ddp, stats[3].octets_ddp);
8464                 sbuf_printf(sb, "\nframesDDP:  %16u %16u %16u %16u",
8465                     stats[0].frames_ddp, stats[1].frames_ddp,
8466                     stats[2].frames_ddp, stats[3].frames_ddp);
8467                 sbuf_printf(sb, "\nframesDrop: %16u %16u %16u %16u",
8468                     stats[0].frames_drop, stats[1].frames_drop,
8469                     stats[2].frames_drop, stats[3].frames_drop);
8470         } else {
8471                 sbuf_printf(sb, "                   channel 0        channel 1");
8472                 sbuf_printf(sb, "\noctetsDDP:  %16ju %16ju",
8473                     stats[0].octets_ddp, stats[1].octets_ddp);
8474                 sbuf_printf(sb, "\nframesDDP:  %16u %16u",
8475                     stats[0].frames_ddp, stats[1].frames_ddp);
8476                 sbuf_printf(sb, "\nframesDrop: %16u %16u",
8477                     stats[0].frames_drop, stats[1].frames_drop);
8478         }
8479
8480         rc = sbuf_finish(sb);
8481         sbuf_delete(sb);
8482
8483         return (rc);
8484 }
8485
8486 static int
8487 sysctl_hw_sched(SYSCTL_HANDLER_ARGS)
8488 {
8489         struct adapter *sc = arg1;
8490         struct sbuf *sb;
8491         int rc, i;
8492         unsigned int map, kbps, ipg, mode;
8493         unsigned int pace_tab[NTX_SCHED];
8494
8495         rc = sysctl_wire_old_buffer(req, 0);
8496         if (rc != 0)
8497                 return (rc);
8498
8499         sb = sbuf_new_for_sysctl(NULL, NULL, 256, req);
8500         if (sb == NULL)
8501                 return (ENOMEM);
8502
8503         map = t4_read_reg(sc, A_TP_TX_MOD_QUEUE_REQ_MAP);
8504         mode = G_TIMERMODE(t4_read_reg(sc, A_TP_MOD_CONFIG));
8505         t4_read_pace_tbl(sc, pace_tab);
8506
8507         sbuf_printf(sb, "Scheduler  Mode   Channel  Rate (Kbps)   "
8508             "Class IPG (0.1 ns)   Flow IPG (us)");
8509
8510         for (i = 0; i < NTX_SCHED; ++i, map >>= 2) {
8511                 t4_get_tx_sched(sc, i, &kbps, &ipg, 1);
8512                 sbuf_printf(sb, "\n    %u      %-5s     %u     ", i,
8513                     (mode & (1 << i)) ? "flow" : "class", map & 3);
8514                 if (kbps)
8515                         sbuf_printf(sb, "%9u     ", kbps);
8516                 else
8517                         sbuf_printf(sb, " disabled     ");
8518
8519                 if (ipg)
8520                         sbuf_printf(sb, "%13u        ", ipg);
8521                 else
8522                         sbuf_printf(sb, "     disabled        ");
8523
8524                 if (pace_tab[i])
8525                         sbuf_printf(sb, "%10u", pace_tab[i]);
8526                 else
8527                         sbuf_printf(sb, "  disabled");
8528         }
8529
8530         rc = sbuf_finish(sb);
8531         sbuf_delete(sb);
8532
8533         return (rc);
8534 }
8535
8536 static int
8537 sysctl_lb_stats(SYSCTL_HANDLER_ARGS)
8538 {
8539         struct adapter *sc = arg1;
8540         struct sbuf *sb;
8541         int rc, i, j;
8542         uint64_t *p0, *p1;
8543         struct lb_port_stats s[2];
8544         static const char *stat_name[] = {
8545                 "OctetsOK:", "FramesOK:", "BcastFrames:", "McastFrames:",
8546                 "UcastFrames:", "ErrorFrames:", "Frames64:", "Frames65To127:",
8547                 "Frames128To255:", "Frames256To511:", "Frames512To1023:",
8548                 "Frames1024To1518:", "Frames1519ToMax:", "FramesDropped:",
8549                 "BG0FramesDropped:", "BG1FramesDropped:", "BG2FramesDropped:",
8550                 "BG3FramesDropped:", "BG0FramesTrunc:", "BG1FramesTrunc:",
8551                 "BG2FramesTrunc:", "BG3FramesTrunc:"
8552         };
8553
8554         rc = sysctl_wire_old_buffer(req, 0);
8555         if (rc != 0)
8556                 return (rc);
8557
8558         sb = sbuf_new_for_sysctl(NULL, NULL, 4096, req);
8559         if (sb == NULL)
8560                 return (ENOMEM);
8561
8562         memset(s, 0, sizeof(s));
8563
8564         for (i = 0; i < sc->chip_params->nchan; i += 2) {
8565                 t4_get_lb_stats(sc, i, &s[0]);
8566                 t4_get_lb_stats(sc, i + 1, &s[1]);
8567
8568                 p0 = &s[0].octets;
8569                 p1 = &s[1].octets;
8570                 sbuf_printf(sb, "%s                       Loopback %u"
8571                     "           Loopback %u", i == 0 ? "" : "\n", i, i + 1);
8572
8573                 for (j = 0; j < nitems(stat_name); j++)
8574                         sbuf_printf(sb, "\n%-17s %20ju %20ju", stat_name[j],
8575                                    *p0++, *p1++);
8576         }
8577
8578         rc = sbuf_finish(sb);
8579         sbuf_delete(sb);
8580
8581         return (rc);
8582 }
8583
8584 static int
8585 sysctl_linkdnrc(SYSCTL_HANDLER_ARGS)
8586 {
8587         int rc = 0;
8588         struct port_info *pi = arg1;
8589         struct link_config *lc = &pi->link_cfg;
8590         struct sbuf *sb;
8591
8592         rc = sysctl_wire_old_buffer(req, 0);
8593         if (rc != 0)
8594                 return(rc);
8595         sb = sbuf_new_for_sysctl(NULL, NULL, 64, req);
8596         if (sb == NULL)
8597                 return (ENOMEM);
8598
8599         if (lc->link_ok || lc->link_down_rc == 255)
8600                 sbuf_printf(sb, "n/a");
8601         else
8602                 sbuf_printf(sb, "%s", t4_link_down_rc_str(lc->link_down_rc));
8603
8604         rc = sbuf_finish(sb);
8605         sbuf_delete(sb);
8606
8607         return (rc);
8608 }
8609
8610 struct mem_desc {
8611         unsigned int base;
8612         unsigned int limit;
8613         unsigned int idx;
8614 };
8615
8616 static int
8617 mem_desc_cmp(const void *a, const void *b)
8618 {
8619         return ((const struct mem_desc *)a)->base -
8620                ((const struct mem_desc *)b)->base;
8621 }
8622
8623 static void
8624 mem_region_show(struct sbuf *sb, const char *name, unsigned int from,
8625     unsigned int to)
8626 {
8627         unsigned int size;
8628
8629         if (from == to)
8630                 return;
8631
8632         size = to - from + 1;
8633         if (size == 0)
8634                 return;
8635
8636         /* XXX: need humanize_number(3) in libkern for a more readable 'size' */
8637         sbuf_printf(sb, "%-15s %#x-%#x [%u]\n", name, from, to, size);
8638 }
8639
8640 static int
8641 sysctl_meminfo(SYSCTL_HANDLER_ARGS)
8642 {
8643         struct adapter *sc = arg1;
8644         struct sbuf *sb;
8645         int rc, i, n;
8646         uint32_t lo, hi, used, alloc;
8647         static const char *memory[] = {"EDC0:", "EDC1:", "MC:", "MC0:", "MC1:"};
8648         static const char *region[] = {
8649                 "DBQ contexts:", "IMSG contexts:", "FLM cache:", "TCBs:",
8650                 "Pstructs:", "Timers:", "Rx FL:", "Tx FL:", "Pstruct FL:",
8651                 "Tx payload:", "Rx payload:", "LE hash:", "iSCSI region:",
8652                 "TDDP region:", "TPT region:", "STAG region:", "RQ region:",
8653                 "RQUDP region:", "PBL region:", "TXPBL region:",
8654                 "DBVFIFO region:", "ULPRX state:", "ULPTX state:",
8655                 "On-chip queues:", "TLS keys:",
8656         };
8657         struct mem_desc avail[4];
8658         struct mem_desc mem[nitems(region) + 3];        /* up to 3 holes */
8659         struct mem_desc *md = mem;
8660
8661         rc = sysctl_wire_old_buffer(req, 0);
8662         if (rc != 0)
8663                 return (rc);
8664
8665         sb = sbuf_new_for_sysctl(NULL, NULL, 4096, req);
8666         if (sb == NULL)
8667                 return (ENOMEM);
8668
8669         for (i = 0; i < nitems(mem); i++) {
8670                 mem[i].limit = 0;
8671                 mem[i].idx = i;
8672         }
8673
8674         /* Find and sort the populated memory ranges */
8675         i = 0;
8676         lo = t4_read_reg(sc, A_MA_TARGET_MEM_ENABLE);
8677         if (lo & F_EDRAM0_ENABLE) {
8678                 hi = t4_read_reg(sc, A_MA_EDRAM0_BAR);
8679                 avail[i].base = G_EDRAM0_BASE(hi) << 20;
8680                 avail[i].limit = avail[i].base + (G_EDRAM0_SIZE(hi) << 20);
8681                 avail[i].idx = 0;
8682                 i++;
8683         }
8684         if (lo & F_EDRAM1_ENABLE) {
8685                 hi = t4_read_reg(sc, A_MA_EDRAM1_BAR);
8686                 avail[i].base = G_EDRAM1_BASE(hi) << 20;
8687                 avail[i].limit = avail[i].base + (G_EDRAM1_SIZE(hi) << 20);
8688                 avail[i].idx = 1;
8689                 i++;
8690         }
8691         if (lo & F_EXT_MEM_ENABLE) {
8692                 hi = t4_read_reg(sc, A_MA_EXT_MEMORY_BAR);
8693                 avail[i].base = G_EXT_MEM_BASE(hi) << 20;
8694                 avail[i].limit = avail[i].base +
8695                     (G_EXT_MEM_SIZE(hi) << 20);
8696                 avail[i].idx = is_t5(sc) ? 3 : 2;       /* Call it MC0 for T5 */
8697                 i++;
8698         }
8699         if (is_t5(sc) && lo & F_EXT_MEM1_ENABLE) {
8700                 hi = t4_read_reg(sc, A_MA_EXT_MEMORY1_BAR);
8701                 avail[i].base = G_EXT_MEM1_BASE(hi) << 20;
8702                 avail[i].limit = avail[i].base +
8703                     (G_EXT_MEM1_SIZE(hi) << 20);
8704                 avail[i].idx = 4;
8705                 i++;
8706         }
8707         if (!i)                                    /* no memory available */
8708                 return 0;
8709         qsort(avail, i, sizeof(struct mem_desc), mem_desc_cmp);
8710
8711         (md++)->base = t4_read_reg(sc, A_SGE_DBQ_CTXT_BADDR);
8712         (md++)->base = t4_read_reg(sc, A_SGE_IMSG_CTXT_BADDR);
8713         (md++)->base = t4_read_reg(sc, A_SGE_FLM_CACHE_BADDR);
8714         (md++)->base = t4_read_reg(sc, A_TP_CMM_TCB_BASE);
8715         (md++)->base = t4_read_reg(sc, A_TP_CMM_MM_BASE);
8716         (md++)->base = t4_read_reg(sc, A_TP_CMM_TIMER_BASE);
8717         (md++)->base = t4_read_reg(sc, A_TP_CMM_MM_RX_FLST_BASE);
8718         (md++)->base = t4_read_reg(sc, A_TP_CMM_MM_TX_FLST_BASE);
8719         (md++)->base = t4_read_reg(sc, A_TP_CMM_MM_PS_FLST_BASE);
8720
8721         /* the next few have explicit upper bounds */
8722         md->base = t4_read_reg(sc, A_TP_PMM_TX_BASE);
8723         md->limit = md->base - 1 +
8724                     t4_read_reg(sc, A_TP_PMM_TX_PAGE_SIZE) *
8725                     G_PMTXMAXPAGE(t4_read_reg(sc, A_TP_PMM_TX_MAX_PAGE));
8726         md++;
8727
8728         md->base = t4_read_reg(sc, A_TP_PMM_RX_BASE);
8729         md->limit = md->base - 1 +
8730                     t4_read_reg(sc, A_TP_PMM_RX_PAGE_SIZE) *
8731                     G_PMRXMAXPAGE(t4_read_reg(sc, A_TP_PMM_RX_MAX_PAGE));
8732         md++;
8733
8734         if (t4_read_reg(sc, A_LE_DB_CONFIG) & F_HASHEN) {
8735                 if (chip_id(sc) <= CHELSIO_T5)
8736                         md->base = t4_read_reg(sc, A_LE_DB_HASH_TID_BASE);
8737                 else
8738                         md->base = t4_read_reg(sc, A_LE_DB_HASH_TBL_BASE_ADDR);
8739                 md->limit = 0;
8740         } else {
8741                 md->base = 0;
8742                 md->idx = nitems(region);  /* hide it */
8743         }
8744         md++;
8745
8746 #define ulp_region(reg) \
8747         md->base = t4_read_reg(sc, A_ULP_ ## reg ## _LLIMIT);\
8748         (md++)->limit = t4_read_reg(sc, A_ULP_ ## reg ## _ULIMIT)
8749
8750         ulp_region(RX_ISCSI);
8751         ulp_region(RX_TDDP);
8752         ulp_region(TX_TPT);
8753         ulp_region(RX_STAG);
8754         ulp_region(RX_RQ);
8755         ulp_region(RX_RQUDP);
8756         ulp_region(RX_PBL);
8757         ulp_region(TX_PBL);
8758 #undef ulp_region
8759
8760         md->base = 0;
8761         md->idx = nitems(region);
8762         if (!is_t4(sc)) {
8763                 uint32_t size = 0;
8764                 uint32_t sge_ctrl = t4_read_reg(sc, A_SGE_CONTROL2);
8765                 uint32_t fifo_size = t4_read_reg(sc, A_SGE_DBVFIFO_SIZE);
8766
8767                 if (is_t5(sc)) {
8768                         if (sge_ctrl & F_VFIFO_ENABLE)
8769                                 size = G_DBVFIFO_SIZE(fifo_size);
8770                 } else
8771                         size = G_T6_DBVFIFO_SIZE(fifo_size);
8772
8773                 if (size) {
8774                         md->base = G_BASEADDR(t4_read_reg(sc,
8775                             A_SGE_DBVFIFO_BADDR));
8776                         md->limit = md->base + (size << 2) - 1;
8777                 }
8778         }
8779         md++;
8780
8781         md->base = t4_read_reg(sc, A_ULP_RX_CTX_BASE);
8782         md->limit = 0;
8783         md++;
8784         md->base = t4_read_reg(sc, A_ULP_TX_ERR_TABLE_BASE);
8785         md->limit = 0;
8786         md++;
8787
8788         md->base = sc->vres.ocq.start;
8789         if (sc->vres.ocq.size)
8790                 md->limit = md->base + sc->vres.ocq.size - 1;
8791         else
8792                 md->idx = nitems(region);  /* hide it */
8793         md++;
8794
8795         md->base = sc->vres.key.start;
8796         if (sc->vres.key.size)
8797                 md->limit = md->base + sc->vres.key.size - 1;
8798         else
8799                 md->idx = nitems(region);  /* hide it */
8800         md++;
8801
8802         /* add any address-space holes, there can be up to 3 */
8803         for (n = 0; n < i - 1; n++)
8804                 if (avail[n].limit < avail[n + 1].base)
8805                         (md++)->base = avail[n].limit;
8806         if (avail[n].limit)
8807                 (md++)->base = avail[n].limit;
8808
8809         n = md - mem;
8810         qsort(mem, n, sizeof(struct mem_desc), mem_desc_cmp);
8811
8812         for (lo = 0; lo < i; lo++)
8813                 mem_region_show(sb, memory[avail[lo].idx], avail[lo].base,
8814                                 avail[lo].limit - 1);
8815
8816         sbuf_printf(sb, "\n");
8817         for (i = 0; i < n; i++) {
8818                 if (mem[i].idx >= nitems(region))
8819                         continue;                        /* skip holes */
8820                 if (!mem[i].limit)
8821                         mem[i].limit = i < n - 1 ? mem[i + 1].base - 1 : ~0;
8822                 mem_region_show(sb, region[mem[i].idx], mem[i].base,
8823                                 mem[i].limit);
8824         }
8825
8826         sbuf_printf(sb, "\n");
8827         lo = t4_read_reg(sc, A_CIM_SDRAM_BASE_ADDR);
8828         hi = t4_read_reg(sc, A_CIM_SDRAM_ADDR_SIZE) + lo - 1;
8829         mem_region_show(sb, "uP RAM:", lo, hi);
8830
8831         lo = t4_read_reg(sc, A_CIM_EXTMEM2_BASE_ADDR);
8832         hi = t4_read_reg(sc, A_CIM_EXTMEM2_ADDR_SIZE) + lo - 1;
8833         mem_region_show(sb, "uP Extmem2:", lo, hi);
8834
8835         lo = t4_read_reg(sc, A_TP_PMM_RX_MAX_PAGE);
8836         sbuf_printf(sb, "\n%u Rx pages of size %uKiB for %u channels\n",
8837                    G_PMRXMAXPAGE(lo),
8838                    t4_read_reg(sc, A_TP_PMM_RX_PAGE_SIZE) >> 10,
8839                    (lo & F_PMRXNUMCHN) ? 2 : 1);
8840
8841         lo = t4_read_reg(sc, A_TP_PMM_TX_MAX_PAGE);
8842         hi = t4_read_reg(sc, A_TP_PMM_TX_PAGE_SIZE);
8843         sbuf_printf(sb, "%u Tx pages of size %u%ciB for %u channels\n",
8844                    G_PMTXMAXPAGE(lo),
8845                    hi >= (1 << 20) ? (hi >> 20) : (hi >> 10),
8846                    hi >= (1 << 20) ? 'M' : 'K', 1 << G_PMTXNUMCHN(lo));
8847         sbuf_printf(sb, "%u p-structs\n",
8848                    t4_read_reg(sc, A_TP_CMM_MM_MAX_PSTRUCT));
8849
8850         for (i = 0; i < 4; i++) {
8851                 if (chip_id(sc) > CHELSIO_T5)
8852                         lo = t4_read_reg(sc, A_MPS_RX_MAC_BG_PG_CNT0 + i * 4);
8853                 else
8854                         lo = t4_read_reg(sc, A_MPS_RX_PG_RSV0 + i * 4);
8855                 if (is_t5(sc)) {
8856                         used = G_T5_USED(lo);
8857                         alloc = G_T5_ALLOC(lo);
8858                 } else {
8859                         used = G_USED(lo);
8860                         alloc = G_ALLOC(lo);
8861                 }
8862                 /* For T6 these are MAC buffer groups */
8863                 sbuf_printf(sb, "\nPort %d using %u pages out of %u allocated",
8864                     i, used, alloc);
8865         }
8866         for (i = 0; i < sc->chip_params->nchan; i++) {
8867                 if (chip_id(sc) > CHELSIO_T5)
8868                         lo = t4_read_reg(sc, A_MPS_RX_LPBK_BG_PG_CNT0 + i * 4);
8869                 else
8870                         lo = t4_read_reg(sc, A_MPS_RX_PG_RSV4 + i * 4);
8871                 if (is_t5(sc)) {
8872                         used = G_T5_USED(lo);
8873                         alloc = G_T5_ALLOC(lo);
8874                 } else {
8875                         used = G_USED(lo);
8876                         alloc = G_ALLOC(lo);
8877                 }
8878                 /* For T6 these are MAC buffer groups */
8879                 sbuf_printf(sb,
8880                     "\nLoopback %d using %u pages out of %u allocated",
8881                     i, used, alloc);
8882         }
8883
8884         rc = sbuf_finish(sb);
8885         sbuf_delete(sb);
8886
8887         return (rc);
8888 }
8889
8890 static inline void
8891 tcamxy2valmask(uint64_t x, uint64_t y, uint8_t *addr, uint64_t *mask)
8892 {
8893         *mask = x | y;
8894         y = htobe64(y);
8895         memcpy(addr, (char *)&y + 2, ETHER_ADDR_LEN);
8896 }
8897
8898 static int
8899 sysctl_mps_tcam(SYSCTL_HANDLER_ARGS)
8900 {
8901         struct adapter *sc = arg1;
8902         struct sbuf *sb;
8903         int rc, i;
8904
8905         MPASS(chip_id(sc) <= CHELSIO_T5);
8906
8907         rc = sysctl_wire_old_buffer(req, 0);
8908         if (rc != 0)
8909                 return (rc);
8910
8911         sb = sbuf_new_for_sysctl(NULL, NULL, 4096, req);
8912         if (sb == NULL)
8913                 return (ENOMEM);
8914
8915         sbuf_printf(sb,
8916             "Idx  Ethernet address     Mask     Vld Ports PF"
8917             "  VF              Replication             P0 P1 P2 P3  ML");
8918         for (i = 0; i < sc->chip_params->mps_tcam_size; i++) {
8919                 uint64_t tcamx, tcamy, mask;
8920                 uint32_t cls_lo, cls_hi;
8921                 uint8_t addr[ETHER_ADDR_LEN];
8922
8923                 tcamy = t4_read_reg64(sc, MPS_CLS_TCAM_Y_L(i));
8924                 tcamx = t4_read_reg64(sc, MPS_CLS_TCAM_X_L(i));
8925                 if (tcamx & tcamy)
8926                         continue;
8927                 tcamxy2valmask(tcamx, tcamy, addr, &mask);
8928                 cls_lo = t4_read_reg(sc, MPS_CLS_SRAM_L(i));
8929                 cls_hi = t4_read_reg(sc, MPS_CLS_SRAM_H(i));
8930                 sbuf_printf(sb, "\n%3u %02x:%02x:%02x:%02x:%02x:%02x %012jx"
8931                            "  %c   %#x%4u%4d", i, addr[0], addr[1], addr[2],
8932                            addr[3], addr[4], addr[5], (uintmax_t)mask,
8933                            (cls_lo & F_SRAM_VLD) ? 'Y' : 'N',
8934                            G_PORTMAP(cls_hi), G_PF(cls_lo),
8935                            (cls_lo & F_VF_VALID) ? G_VF(cls_lo) : -1);
8936
8937                 if (cls_lo & F_REPLICATE) {
8938                         struct fw_ldst_cmd ldst_cmd;
8939
8940                         memset(&ldst_cmd, 0, sizeof(ldst_cmd));
8941                         ldst_cmd.op_to_addrspace =
8942                             htobe32(V_FW_CMD_OP(FW_LDST_CMD) |
8943                                 F_FW_CMD_REQUEST | F_FW_CMD_READ |
8944                                 V_FW_LDST_CMD_ADDRSPACE(FW_LDST_ADDRSPC_MPS));
8945                         ldst_cmd.cycles_to_len16 = htobe32(FW_LEN16(ldst_cmd));
8946                         ldst_cmd.u.mps.rplc.fid_idx =
8947                             htobe16(V_FW_LDST_CMD_FID(FW_LDST_MPS_RPLC) |
8948                                 V_FW_LDST_CMD_IDX(i));
8949
8950                         rc = begin_synchronized_op(sc, NULL, SLEEP_OK | INTR_OK,
8951                             "t4mps");
8952                         if (rc)
8953                                 break;
8954                         rc = -t4_wr_mbox(sc, sc->mbox, &ldst_cmd,
8955                             sizeof(ldst_cmd), &ldst_cmd);
8956                         end_synchronized_op(sc, 0);
8957
8958                         if (rc != 0) {
8959                                 sbuf_printf(sb, "%36d", rc);
8960                                 rc = 0;
8961                         } else {
8962                                 sbuf_printf(sb, " %08x %08x %08x %08x",
8963                                     be32toh(ldst_cmd.u.mps.rplc.rplc127_96),
8964                                     be32toh(ldst_cmd.u.mps.rplc.rplc95_64),
8965                                     be32toh(ldst_cmd.u.mps.rplc.rplc63_32),
8966                                     be32toh(ldst_cmd.u.mps.rplc.rplc31_0));
8967                         }
8968                 } else
8969                         sbuf_printf(sb, "%36s", "");
8970
8971                 sbuf_printf(sb, "%4u%3u%3u%3u %#3x", G_SRAM_PRIO0(cls_lo),
8972                     G_SRAM_PRIO1(cls_lo), G_SRAM_PRIO2(cls_lo),
8973                     G_SRAM_PRIO3(cls_lo), (cls_lo >> S_MULTILISTEN0) & 0xf);
8974         }
8975
8976         if (rc)
8977                 (void) sbuf_finish(sb);
8978         else
8979                 rc = sbuf_finish(sb);
8980         sbuf_delete(sb);
8981
8982         return (rc);
8983 }
8984
8985 static int
8986 sysctl_mps_tcam_t6(SYSCTL_HANDLER_ARGS)
8987 {
8988         struct adapter *sc = arg1;
8989         struct sbuf *sb;
8990         int rc, i;
8991
8992         MPASS(chip_id(sc) > CHELSIO_T5);
8993
8994         rc = sysctl_wire_old_buffer(req, 0);
8995         if (rc != 0)
8996                 return (rc);
8997
8998         sb = sbuf_new_for_sysctl(NULL, NULL, 4096, req);
8999         if (sb == NULL)
9000                 return (ENOMEM);
9001
9002         sbuf_printf(sb, "Idx  Ethernet address     Mask       VNI   Mask"
9003             "   IVLAN Vld DIP_Hit   Lookup  Port Vld Ports PF  VF"
9004             "                           Replication"
9005             "                                    P0 P1 P2 P3  ML\n");
9006
9007         for (i = 0; i < sc->chip_params->mps_tcam_size; i++) {
9008                 uint8_t dip_hit, vlan_vld, lookup_type, port_num;
9009                 uint16_t ivlan;
9010                 uint64_t tcamx, tcamy, val, mask;
9011                 uint32_t cls_lo, cls_hi, ctl, data2, vnix, vniy;
9012                 uint8_t addr[ETHER_ADDR_LEN];
9013
9014                 ctl = V_CTLREQID(1) | V_CTLCMDTYPE(0) | V_CTLXYBITSEL(0);
9015                 if (i < 256)
9016                         ctl |= V_CTLTCAMINDEX(i) | V_CTLTCAMSEL(0);
9017                 else
9018                         ctl |= V_CTLTCAMINDEX(i - 256) | V_CTLTCAMSEL(1);
9019                 t4_write_reg(sc, A_MPS_CLS_TCAM_DATA2_CTL, ctl);
9020                 val = t4_read_reg(sc, A_MPS_CLS_TCAM_RDATA1_REQ_ID1);
9021                 tcamy = G_DMACH(val) << 32;
9022                 tcamy |= t4_read_reg(sc, A_MPS_CLS_TCAM_RDATA0_REQ_ID1);
9023                 data2 = t4_read_reg(sc, A_MPS_CLS_TCAM_RDATA2_REQ_ID1);
9024                 lookup_type = G_DATALKPTYPE(data2);
9025                 port_num = G_DATAPORTNUM(data2);
9026                 if (lookup_type && lookup_type != M_DATALKPTYPE) {
9027                         /* Inner header VNI */
9028                         vniy = ((data2 & F_DATAVIDH2) << 23) |
9029                                        (G_DATAVIDH1(data2) << 16) | G_VIDL(val);
9030                         dip_hit = data2 & F_DATADIPHIT;
9031                         vlan_vld = 0;
9032                 } else {
9033                         vniy = 0;
9034                         dip_hit = 0;
9035                         vlan_vld = data2 & F_DATAVIDH2;
9036                         ivlan = G_VIDL(val);
9037                 }
9038
9039                 ctl |= V_CTLXYBITSEL(1);
9040                 t4_write_reg(sc, A_MPS_CLS_TCAM_DATA2_CTL, ctl);
9041                 val = t4_read_reg(sc, A_MPS_CLS_TCAM_RDATA1_REQ_ID1);
9042                 tcamx = G_DMACH(val) << 32;
9043                 tcamx |= t4_read_reg(sc, A_MPS_CLS_TCAM_RDATA0_REQ_ID1);
9044                 data2 = t4_read_reg(sc, A_MPS_CLS_TCAM_RDATA2_REQ_ID1);
9045                 if (lookup_type && lookup_type != M_DATALKPTYPE) {
9046                         /* Inner header VNI mask */
9047                         vnix = ((data2 & F_DATAVIDH2) << 23) |
9048                                (G_DATAVIDH1(data2) << 16) | G_VIDL(val);
9049                 } else
9050                         vnix = 0;
9051
9052                 if (tcamx & tcamy)
9053                         continue;
9054                 tcamxy2valmask(tcamx, tcamy, addr, &mask);
9055
9056                 cls_lo = t4_read_reg(sc, MPS_CLS_SRAM_L(i));
9057                 cls_hi = t4_read_reg(sc, MPS_CLS_SRAM_H(i));
9058
9059                 if (lookup_type && lookup_type != M_DATALKPTYPE) {
9060                         sbuf_printf(sb, "\n%3u %02x:%02x:%02x:%02x:%02x:%02x "
9061                             "%012jx %06x %06x    -    -   %3c"
9062                             "      'I'  %4x   %3c   %#x%4u%4d", i, addr[0],
9063                             addr[1], addr[2], addr[3], addr[4], addr[5],
9064                             (uintmax_t)mask, vniy, vnix, dip_hit ? 'Y' : 'N',
9065                             port_num, cls_lo & F_T6_SRAM_VLD ? 'Y' : 'N',
9066                             G_PORTMAP(cls_hi), G_T6_PF(cls_lo),
9067                             cls_lo & F_T6_VF_VALID ? G_T6_VF(cls_lo) : -1);
9068                 } else {
9069                         sbuf_printf(sb, "\n%3u %02x:%02x:%02x:%02x:%02x:%02x "
9070                             "%012jx    -       -   ", i, addr[0], addr[1],
9071                             addr[2], addr[3], addr[4], addr[5],
9072                             (uintmax_t)mask);
9073
9074                         if (vlan_vld)
9075                                 sbuf_printf(sb, "%4u   Y     ", ivlan);
9076                         else
9077                                 sbuf_printf(sb, "  -    N     ");
9078
9079                         sbuf_printf(sb, "-      %3c  %4x   %3c   %#x%4u%4d",
9080                             lookup_type ? 'I' : 'O', port_num,
9081                             cls_lo & F_T6_SRAM_VLD ? 'Y' : 'N',
9082                             G_PORTMAP(cls_hi), G_T6_PF(cls_lo),
9083                             cls_lo & F_T6_VF_VALID ? G_T6_VF(cls_lo) : -1);
9084                 }
9085
9086
9087                 if (cls_lo & F_T6_REPLICATE) {
9088                         struct fw_ldst_cmd ldst_cmd;
9089
9090                         memset(&ldst_cmd, 0, sizeof(ldst_cmd));
9091                         ldst_cmd.op_to_addrspace =
9092                             htobe32(V_FW_CMD_OP(FW_LDST_CMD) |
9093                                 F_FW_CMD_REQUEST | F_FW_CMD_READ |
9094                                 V_FW_LDST_CMD_ADDRSPACE(FW_LDST_ADDRSPC_MPS));
9095                         ldst_cmd.cycles_to_len16 = htobe32(FW_LEN16(ldst_cmd));
9096                         ldst_cmd.u.mps.rplc.fid_idx =
9097                             htobe16(V_FW_LDST_CMD_FID(FW_LDST_MPS_RPLC) |
9098                                 V_FW_LDST_CMD_IDX(i));
9099
9100                         rc = begin_synchronized_op(sc, NULL, SLEEP_OK | INTR_OK,
9101                             "t6mps");
9102                         if (rc)
9103                                 break;
9104                         rc = -t4_wr_mbox(sc, sc->mbox, &ldst_cmd,
9105                             sizeof(ldst_cmd), &ldst_cmd);
9106                         end_synchronized_op(sc, 0);
9107
9108                         if (rc != 0) {
9109                                 sbuf_printf(sb, "%72d", rc);
9110                                 rc = 0;
9111                         } else {
9112                                 sbuf_printf(sb, " %08x %08x %08x %08x"
9113                                     " %08x %08x %08x %08x",
9114                                     be32toh(ldst_cmd.u.mps.rplc.rplc255_224),
9115                                     be32toh(ldst_cmd.u.mps.rplc.rplc223_192),
9116                                     be32toh(ldst_cmd.u.mps.rplc.rplc191_160),
9117                                     be32toh(ldst_cmd.u.mps.rplc.rplc159_128),
9118                                     be32toh(ldst_cmd.u.mps.rplc.rplc127_96),
9119                                     be32toh(ldst_cmd.u.mps.rplc.rplc95_64),
9120                                     be32toh(ldst_cmd.u.mps.rplc.rplc63_32),
9121                                     be32toh(ldst_cmd.u.mps.rplc.rplc31_0));
9122                         }
9123                 } else
9124                         sbuf_printf(sb, "%72s", "");
9125
9126                 sbuf_printf(sb, "%4u%3u%3u%3u %#x",
9127                     G_T6_SRAM_PRIO0(cls_lo), G_T6_SRAM_PRIO1(cls_lo),
9128                     G_T6_SRAM_PRIO2(cls_lo), G_T6_SRAM_PRIO3(cls_lo),
9129                     (cls_lo >> S_T6_MULTILISTEN0) & 0xf);
9130         }
9131
9132         if (rc)
9133                 (void) sbuf_finish(sb);
9134         else
9135                 rc = sbuf_finish(sb);
9136         sbuf_delete(sb);
9137
9138         return (rc);
9139 }
9140
9141 static int
9142 sysctl_path_mtus(SYSCTL_HANDLER_ARGS)
9143 {
9144         struct adapter *sc = arg1;
9145         struct sbuf *sb;
9146         int rc;
9147         uint16_t mtus[NMTUS];
9148
9149         rc = sysctl_wire_old_buffer(req, 0);
9150         if (rc != 0)
9151                 return (rc);
9152
9153         sb = sbuf_new_for_sysctl(NULL, NULL, 256, req);
9154         if (sb == NULL)
9155                 return (ENOMEM);
9156
9157         t4_read_mtu_tbl(sc, mtus, NULL);
9158
9159         sbuf_printf(sb, "%u %u %u %u %u %u %u %u %u %u %u %u %u %u %u %u",
9160             mtus[0], mtus[1], mtus[2], mtus[3], mtus[4], mtus[5], mtus[6],
9161             mtus[7], mtus[8], mtus[9], mtus[10], mtus[11], mtus[12], mtus[13],
9162             mtus[14], mtus[15]);
9163
9164         rc = sbuf_finish(sb);
9165         sbuf_delete(sb);
9166
9167         return (rc);
9168 }
9169
9170 static int
9171 sysctl_pm_stats(SYSCTL_HANDLER_ARGS)
9172 {
9173         struct adapter *sc = arg1;
9174         struct sbuf *sb;
9175         int rc, i;
9176         uint32_t tx_cnt[MAX_PM_NSTATS], rx_cnt[MAX_PM_NSTATS];
9177         uint64_t tx_cyc[MAX_PM_NSTATS], rx_cyc[MAX_PM_NSTATS];
9178         static const char *tx_stats[MAX_PM_NSTATS] = {
9179                 "Read:", "Write bypass:", "Write mem:", "Bypass + mem:",
9180                 "Tx FIFO wait", NULL, "Tx latency"
9181         };
9182         static const char *rx_stats[MAX_PM_NSTATS] = {
9183                 "Read:", "Write bypass:", "Write mem:", "Flush:",
9184                 "Rx FIFO wait", NULL, "Rx latency"
9185         };
9186
9187         rc = sysctl_wire_old_buffer(req, 0);
9188         if (rc != 0)
9189                 return (rc);
9190
9191         sb = sbuf_new_for_sysctl(NULL, NULL, 256, req);
9192         if (sb == NULL)
9193                 return (ENOMEM);
9194
9195         t4_pmtx_get_stats(sc, tx_cnt, tx_cyc);
9196         t4_pmrx_get_stats(sc, rx_cnt, rx_cyc);
9197
9198         sbuf_printf(sb, "                Tx pcmds             Tx bytes");
9199         for (i = 0; i < 4; i++) {
9200                 sbuf_printf(sb, "\n%-13s %10u %20ju", tx_stats[i], tx_cnt[i],
9201                     tx_cyc[i]);
9202         }
9203
9204         sbuf_printf(sb, "\n                Rx pcmds             Rx bytes");
9205         for (i = 0; i < 4; i++) {
9206                 sbuf_printf(sb, "\n%-13s %10u %20ju", rx_stats[i], rx_cnt[i],
9207                     rx_cyc[i]);
9208         }
9209
9210         if (chip_id(sc) > CHELSIO_T5) {
9211                 sbuf_printf(sb,
9212                     "\n              Total wait      Total occupancy");
9213                 sbuf_printf(sb, "\n%-13s %10u %20ju", tx_stats[i], tx_cnt[i],
9214                     tx_cyc[i]);
9215                 sbuf_printf(sb, "\n%-13s %10u %20ju", rx_stats[i], rx_cnt[i],
9216                     rx_cyc[i]);
9217
9218                 i += 2;
9219                 MPASS(i < nitems(tx_stats));
9220
9221                 sbuf_printf(sb,
9222                     "\n                   Reads           Total wait");
9223                 sbuf_printf(sb, "\n%-13s %10u %20ju", tx_stats[i], tx_cnt[i],
9224                     tx_cyc[i]);
9225                 sbuf_printf(sb, "\n%-13s %10u %20ju", rx_stats[i], rx_cnt[i],
9226                     rx_cyc[i]);
9227         }
9228
9229         rc = sbuf_finish(sb);
9230         sbuf_delete(sb);
9231
9232         return (rc);
9233 }
9234
9235 static int
9236 sysctl_rdma_stats(SYSCTL_HANDLER_ARGS)
9237 {
9238         struct adapter *sc = arg1;
9239         struct sbuf *sb;
9240         int rc;
9241         struct tp_rdma_stats stats;
9242
9243         rc = sysctl_wire_old_buffer(req, 0);
9244         if (rc != 0)
9245                 return (rc);
9246
9247         sb = sbuf_new_for_sysctl(NULL, NULL, 256, req);
9248         if (sb == NULL)
9249                 return (ENOMEM);
9250
9251         mtx_lock(&sc->reg_lock);
9252         t4_tp_get_rdma_stats(sc, &stats, 0);
9253         mtx_unlock(&sc->reg_lock);
9254
9255         sbuf_printf(sb, "NoRQEModDefferals: %u\n", stats.rqe_dfr_mod);
9256         sbuf_printf(sb, "NoRQEPktDefferals: %u", stats.rqe_dfr_pkt);
9257
9258         rc = sbuf_finish(sb);
9259         sbuf_delete(sb);
9260
9261         return (rc);
9262 }
9263
9264 static int
9265 sysctl_tcp_stats(SYSCTL_HANDLER_ARGS)
9266 {
9267         struct adapter *sc = arg1;
9268         struct sbuf *sb;
9269         int rc;
9270         struct tp_tcp_stats v4, v6;
9271
9272         rc = sysctl_wire_old_buffer(req, 0);
9273         if (rc != 0)
9274                 return (rc);
9275
9276         sb = sbuf_new_for_sysctl(NULL, NULL, 256, req);
9277         if (sb == NULL)
9278                 return (ENOMEM);
9279
9280         mtx_lock(&sc->reg_lock);
9281         t4_tp_get_tcp_stats(sc, &v4, &v6, 0);
9282         mtx_unlock(&sc->reg_lock);
9283
9284         sbuf_printf(sb,
9285             "                                IP                 IPv6\n");
9286         sbuf_printf(sb, "OutRsts:      %20u %20u\n",
9287             v4.tcp_out_rsts, v6.tcp_out_rsts);
9288         sbuf_printf(sb, "InSegs:       %20ju %20ju\n",
9289             v4.tcp_in_segs, v6.tcp_in_segs);
9290         sbuf_printf(sb, "OutSegs:      %20ju %20ju\n",
9291             v4.tcp_out_segs, v6.tcp_out_segs);
9292         sbuf_printf(sb, "RetransSegs:  %20ju %20ju",
9293             v4.tcp_retrans_segs, v6.tcp_retrans_segs);
9294
9295         rc = sbuf_finish(sb);
9296         sbuf_delete(sb);
9297
9298         return (rc);
9299 }
9300
9301 static int
9302 sysctl_tids(SYSCTL_HANDLER_ARGS)
9303 {
9304         struct adapter *sc = arg1;
9305         struct sbuf *sb;
9306         int rc;
9307         struct tid_info *t = &sc->tids;
9308
9309         rc = sysctl_wire_old_buffer(req, 0);
9310         if (rc != 0)
9311                 return (rc);
9312
9313         sb = sbuf_new_for_sysctl(NULL, NULL, 256, req);
9314         if (sb == NULL)
9315                 return (ENOMEM);
9316
9317         if (t->natids) {
9318                 sbuf_printf(sb, "ATID range: 0-%u, in use: %u\n", t->natids - 1,
9319                     t->atids_in_use);
9320         }
9321
9322         if (t->nhpftids) {
9323                 sbuf_printf(sb, "HPFTID range: %u-%u, in use: %u\n",
9324                     t->hpftid_base, t->hpftid_end, t->hpftids_in_use);
9325         }
9326
9327         if (t->ntids) {
9328                 sbuf_printf(sb, "TID range: ");
9329                 if (t4_read_reg(sc, A_LE_DB_CONFIG) & F_HASHEN) {
9330                         uint32_t b, hb;
9331
9332                         if (chip_id(sc) <= CHELSIO_T5) {
9333                                 b = t4_read_reg(sc, A_LE_DB_SERVER_INDEX) / 4;
9334                                 hb = t4_read_reg(sc, A_LE_DB_TID_HASHBASE) / 4;
9335                         } else {
9336                                 b = t4_read_reg(sc, A_LE_DB_SRVR_START_INDEX);
9337                                 hb = t4_read_reg(sc, A_T6_LE_DB_HASH_TID_BASE);
9338                         }
9339
9340                         if (b)
9341                                 sbuf_printf(sb, "%u-%u, ", t->tid_base, b - 1);
9342                         sbuf_printf(sb, "%u-%u", hb, t->ntids - 1);
9343                 } else {
9344                         sbuf_printf(sb, "%u-%u", t->tid_base, t->tid_base +
9345                             t->ntids - 1);
9346                 }
9347                 sbuf_printf(sb, ", in use: %u\n",
9348                     atomic_load_acq_int(&t->tids_in_use));
9349         }
9350
9351         if (t->nstids) {
9352                 sbuf_printf(sb, "STID range: %u-%u, in use: %u\n", t->stid_base,
9353                     t->stid_base + t->nstids - 1, t->stids_in_use);
9354         }
9355
9356         if (t->nftids) {
9357                 sbuf_printf(sb, "FTID range: %u-%u, in use: %u\n", t->ftid_base,
9358                     t->ftid_end, t->ftids_in_use);
9359         }
9360
9361         if (t->netids) {
9362                 sbuf_printf(sb, "ETID range: %u-%u, in use: %u\n", t->etid_base,
9363                     t->etid_base + t->netids - 1, t->etids_in_use);
9364         }
9365
9366         sbuf_printf(sb, "HW TID usage: %u IP users, %u IPv6 users",
9367             t4_read_reg(sc, A_LE_DB_ACT_CNT_IPV4),
9368             t4_read_reg(sc, A_LE_DB_ACT_CNT_IPV6));
9369
9370         rc = sbuf_finish(sb);
9371         sbuf_delete(sb);
9372
9373         return (rc);
9374 }
9375
9376 static int
9377 sysctl_tp_err_stats(SYSCTL_HANDLER_ARGS)
9378 {
9379         struct adapter *sc = arg1;
9380         struct sbuf *sb;
9381         int rc;
9382         struct tp_err_stats stats;
9383
9384         rc = sysctl_wire_old_buffer(req, 0);
9385         if (rc != 0)
9386                 return (rc);
9387
9388         sb = sbuf_new_for_sysctl(NULL, NULL, 256, req);
9389         if (sb == NULL)
9390                 return (ENOMEM);
9391
9392         mtx_lock(&sc->reg_lock);
9393         t4_tp_get_err_stats(sc, &stats, 0);
9394         mtx_unlock(&sc->reg_lock);
9395
9396         if (sc->chip_params->nchan > 2) {
9397                 sbuf_printf(sb, "                 channel 0  channel 1"
9398                     "  channel 2  channel 3\n");
9399                 sbuf_printf(sb, "macInErrs:      %10u %10u %10u %10u\n",
9400                     stats.mac_in_errs[0], stats.mac_in_errs[1],
9401                     stats.mac_in_errs[2], stats.mac_in_errs[3]);
9402                 sbuf_printf(sb, "hdrInErrs:      %10u %10u %10u %10u\n",
9403                     stats.hdr_in_errs[0], stats.hdr_in_errs[1],
9404                     stats.hdr_in_errs[2], stats.hdr_in_errs[3]);
9405                 sbuf_printf(sb, "tcpInErrs:      %10u %10u %10u %10u\n",
9406                     stats.tcp_in_errs[0], stats.tcp_in_errs[1],
9407                     stats.tcp_in_errs[2], stats.tcp_in_errs[3]);
9408                 sbuf_printf(sb, "tcp6InErrs:     %10u %10u %10u %10u\n",
9409                     stats.tcp6_in_errs[0], stats.tcp6_in_errs[1],
9410                     stats.tcp6_in_errs[2], stats.tcp6_in_errs[3]);
9411                 sbuf_printf(sb, "tnlCongDrops:   %10u %10u %10u %10u\n",
9412                     stats.tnl_cong_drops[0], stats.tnl_cong_drops[1],
9413                     stats.tnl_cong_drops[2], stats.tnl_cong_drops[3]);
9414                 sbuf_printf(sb, "tnlTxDrops:     %10u %10u %10u %10u\n",
9415                     stats.tnl_tx_drops[0], stats.tnl_tx_drops[1],
9416                     stats.tnl_tx_drops[2], stats.tnl_tx_drops[3]);
9417                 sbuf_printf(sb, "ofldVlanDrops:  %10u %10u %10u %10u\n",
9418                     stats.ofld_vlan_drops[0], stats.ofld_vlan_drops[1],
9419                     stats.ofld_vlan_drops[2], stats.ofld_vlan_drops[3]);
9420                 sbuf_printf(sb, "ofldChanDrops:  %10u %10u %10u %10u\n\n",
9421                     stats.ofld_chan_drops[0], stats.ofld_chan_drops[1],
9422                     stats.ofld_chan_drops[2], stats.ofld_chan_drops[3]);
9423         } else {
9424                 sbuf_printf(sb, "                 channel 0  channel 1\n");
9425                 sbuf_printf(sb, "macInErrs:      %10u %10u\n",
9426                     stats.mac_in_errs[0], stats.mac_in_errs[1]);
9427                 sbuf_printf(sb, "hdrInErrs:      %10u %10u\n",
9428                     stats.hdr_in_errs[0], stats.hdr_in_errs[1]);
9429                 sbuf_printf(sb, "tcpInErrs:      %10u %10u\n",
9430                     stats.tcp_in_errs[0], stats.tcp_in_errs[1]);
9431                 sbuf_printf(sb, "tcp6InErrs:     %10u %10u\n",
9432                     stats.tcp6_in_errs[0], stats.tcp6_in_errs[1]);
9433                 sbuf_printf(sb, "tnlCongDrops:   %10u %10u\n",
9434                     stats.tnl_cong_drops[0], stats.tnl_cong_drops[1]);
9435                 sbuf_printf(sb, "tnlTxDrops:     %10u %10u\n",
9436                     stats.tnl_tx_drops[0], stats.tnl_tx_drops[1]);
9437                 sbuf_printf(sb, "ofldVlanDrops:  %10u %10u\n",
9438                     stats.ofld_vlan_drops[0], stats.ofld_vlan_drops[1]);
9439                 sbuf_printf(sb, "ofldChanDrops:  %10u %10u\n\n",
9440                     stats.ofld_chan_drops[0], stats.ofld_chan_drops[1]);
9441         }
9442
9443         sbuf_printf(sb, "ofldNoNeigh:    %u\nofldCongDefer:  %u",
9444             stats.ofld_no_neigh, stats.ofld_cong_defer);
9445
9446         rc = sbuf_finish(sb);
9447         sbuf_delete(sb);
9448
9449         return (rc);
9450 }
9451
9452 static int
9453 sysctl_tp_la_mask(SYSCTL_HANDLER_ARGS)
9454 {
9455         struct adapter *sc = arg1;
9456         struct tp_params *tpp = &sc->params.tp;
9457         u_int mask;
9458         int rc;
9459
9460         mask = tpp->la_mask >> 16;
9461         rc = sysctl_handle_int(oidp, &mask, 0, req);
9462         if (rc != 0 || req->newptr == NULL)
9463                 return (rc);
9464         if (mask > 0xffff)
9465                 return (EINVAL);
9466         tpp->la_mask = mask << 16;
9467         t4_set_reg_field(sc, A_TP_DBG_LA_CONFIG, 0xffff0000U, tpp->la_mask);
9468
9469         return (0);
9470 }
9471
9472 struct field_desc {
9473         const char *name;
9474         u_int start;
9475         u_int width;
9476 };
9477
9478 static void
9479 field_desc_show(struct sbuf *sb, uint64_t v, const struct field_desc *f)
9480 {
9481         char buf[32];
9482         int line_size = 0;
9483
9484         while (f->name) {
9485                 uint64_t mask = (1ULL << f->width) - 1;
9486                 int len = snprintf(buf, sizeof(buf), "%s: %ju", f->name,
9487                     ((uintmax_t)v >> f->start) & mask);
9488
9489                 if (line_size + len >= 79) {
9490                         line_size = 8;
9491                         sbuf_printf(sb, "\n        ");
9492                 }
9493                 sbuf_printf(sb, "%s ", buf);
9494                 line_size += len + 1;
9495                 f++;
9496         }
9497         sbuf_printf(sb, "\n");
9498 }
9499
9500 static const struct field_desc tp_la0[] = {
9501         { "RcfOpCodeOut", 60, 4 },
9502         { "State", 56, 4 },
9503         { "WcfState", 52, 4 },
9504         { "RcfOpcSrcOut", 50, 2 },
9505         { "CRxError", 49, 1 },
9506         { "ERxError", 48, 1 },
9507         { "SanityFailed", 47, 1 },
9508         { "SpuriousMsg", 46, 1 },
9509         { "FlushInputMsg", 45, 1 },
9510         { "FlushInputCpl", 44, 1 },
9511         { "RssUpBit", 43, 1 },
9512         { "RssFilterHit", 42, 1 },
9513         { "Tid", 32, 10 },
9514         { "InitTcb", 31, 1 },
9515         { "LineNumber", 24, 7 },
9516         { "Emsg", 23, 1 },
9517         { "EdataOut", 22, 1 },
9518         { "Cmsg", 21, 1 },
9519         { "CdataOut", 20, 1 },
9520         { "EreadPdu", 19, 1 },
9521         { "CreadPdu", 18, 1 },
9522         { "TunnelPkt", 17, 1 },
9523         { "RcfPeerFin", 16, 1 },
9524         { "RcfReasonOut", 12, 4 },
9525         { "TxCchannel", 10, 2 },
9526         { "RcfTxChannel", 8, 2 },
9527         { "RxEchannel", 6, 2 },
9528         { "RcfRxChannel", 5, 1 },
9529         { "RcfDataOutSrdy", 4, 1 },
9530         { "RxDvld", 3, 1 },
9531         { "RxOoDvld", 2, 1 },
9532         { "RxCongestion", 1, 1 },
9533         { "TxCongestion", 0, 1 },
9534         { NULL }
9535 };
9536
9537 static const struct field_desc tp_la1[] = {
9538         { "CplCmdIn", 56, 8 },
9539         { "CplCmdOut", 48, 8 },
9540         { "ESynOut", 47, 1 },
9541         { "EAckOut", 46, 1 },
9542         { "EFinOut", 45, 1 },
9543         { "ERstOut", 44, 1 },
9544         { "SynIn", 43, 1 },
9545         { "AckIn", 42, 1 },
9546         { "FinIn", 41, 1 },
9547         { "RstIn", 40, 1 },
9548         { "DataIn", 39, 1 },
9549         { "DataInVld", 38, 1 },
9550         { "PadIn", 37, 1 },
9551         { "RxBufEmpty", 36, 1 },
9552         { "RxDdp", 35, 1 },
9553         { "RxFbCongestion", 34, 1 },
9554         { "TxFbCongestion", 33, 1 },
9555         { "TxPktSumSrdy", 32, 1 },
9556         { "RcfUlpType", 28, 4 },
9557         { "Eread", 27, 1 },
9558         { "Ebypass", 26, 1 },
9559         { "Esave", 25, 1 },
9560         { "Static0", 24, 1 },
9561         { "Cread", 23, 1 },
9562         { "Cbypass", 22, 1 },
9563         { "Csave", 21, 1 },
9564         { "CPktOut", 20, 1 },
9565         { "RxPagePoolFull", 18, 2 },
9566         { "RxLpbkPkt", 17, 1 },
9567         { "TxLpbkPkt", 16, 1 },
9568         { "RxVfValid", 15, 1 },
9569         { "SynLearned", 14, 1 },
9570         { "SetDelEntry", 13, 1 },
9571         { "SetInvEntry", 12, 1 },
9572         { "CpcmdDvld", 11, 1 },
9573         { "CpcmdSave", 10, 1 },
9574         { "RxPstructsFull", 8, 2 },
9575         { "EpcmdDvld", 7, 1 },
9576         { "EpcmdFlush", 6, 1 },
9577         { "EpcmdTrimPrefix", 5, 1 },
9578         { "EpcmdTrimPostfix", 4, 1 },
9579         { "ERssIp4Pkt", 3, 1 },
9580         { "ERssIp6Pkt", 2, 1 },
9581         { "ERssTcpUdpPkt", 1, 1 },
9582         { "ERssFceFipPkt", 0, 1 },
9583         { NULL }
9584 };
9585
9586 static const struct field_desc tp_la2[] = {
9587         { "CplCmdIn", 56, 8 },
9588         { "MpsVfVld", 55, 1 },
9589         { "MpsPf", 52, 3 },
9590         { "MpsVf", 44, 8 },
9591         { "SynIn", 43, 1 },
9592         { "AckIn", 42, 1 },
9593         { "FinIn", 41, 1 },
9594         { "RstIn", 40, 1 },
9595         { "DataIn", 39, 1 },
9596         { "DataInVld", 38, 1 },
9597         { "PadIn", 37, 1 },
9598         { "RxBufEmpty", 36, 1 },
9599         { "RxDdp", 35, 1 },
9600         { "RxFbCongestion", 34, 1 },
9601         { "TxFbCongestion", 33, 1 },
9602         { "TxPktSumSrdy", 32, 1 },
9603         { "RcfUlpType", 28, 4 },
9604         { "Eread", 27, 1 },
9605         { "Ebypass", 26, 1 },
9606         { "Esave", 25, 1 },
9607         { "Static0", 24, 1 },
9608         { "Cread", 23, 1 },
9609         { "Cbypass", 22, 1 },
9610         { "Csave", 21, 1 },
9611         { "CPktOut", 20, 1 },
9612         { "RxPagePoolFull", 18, 2 },
9613         { "RxLpbkPkt", 17, 1 },
9614         { "TxLpbkPkt", 16, 1 },
9615         { "RxVfValid", 15, 1 },
9616         { "SynLearned", 14, 1 },
9617         { "SetDelEntry", 13, 1 },
9618         { "SetInvEntry", 12, 1 },
9619         { "CpcmdDvld", 11, 1 },
9620         { "CpcmdSave", 10, 1 },
9621         { "RxPstructsFull", 8, 2 },
9622         { "EpcmdDvld", 7, 1 },
9623         { "EpcmdFlush", 6, 1 },
9624         { "EpcmdTrimPrefix", 5, 1 },
9625         { "EpcmdTrimPostfix", 4, 1 },
9626         { "ERssIp4Pkt", 3, 1 },
9627         { "ERssIp6Pkt", 2, 1 },
9628         { "ERssTcpUdpPkt", 1, 1 },
9629         { "ERssFceFipPkt", 0, 1 },
9630         { NULL }
9631 };
9632
9633 static void
9634 tp_la_show(struct sbuf *sb, uint64_t *p, int idx)
9635 {
9636
9637         field_desc_show(sb, *p, tp_la0);
9638 }
9639
9640 static void
9641 tp_la_show2(struct sbuf *sb, uint64_t *p, int idx)
9642 {
9643
9644         if (idx)
9645                 sbuf_printf(sb, "\n");
9646         field_desc_show(sb, p[0], tp_la0);
9647         if (idx < (TPLA_SIZE / 2 - 1) || p[1] != ~0ULL)
9648                 field_desc_show(sb, p[1], tp_la0);
9649 }
9650
9651 static void
9652 tp_la_show3(struct sbuf *sb, uint64_t *p, int idx)
9653 {
9654
9655         if (idx)
9656                 sbuf_printf(sb, "\n");
9657         field_desc_show(sb, p[0], tp_la0);
9658         if (idx < (TPLA_SIZE / 2 - 1) || p[1] != ~0ULL)
9659                 field_desc_show(sb, p[1], (p[0] & (1 << 17)) ? tp_la2 : tp_la1);
9660 }
9661
9662 static int
9663 sysctl_tp_la(SYSCTL_HANDLER_ARGS)
9664 {
9665         struct adapter *sc = arg1;
9666         struct sbuf *sb;
9667         uint64_t *buf, *p;
9668         int rc;
9669         u_int i, inc;
9670         void (*show_func)(struct sbuf *, uint64_t *, int);
9671
9672         rc = sysctl_wire_old_buffer(req, 0);
9673         if (rc != 0)
9674                 return (rc);
9675
9676         sb = sbuf_new_for_sysctl(NULL, NULL, 4096, req);
9677         if (sb == NULL)
9678                 return (ENOMEM);
9679
9680         buf = malloc(TPLA_SIZE * sizeof(uint64_t), M_CXGBE, M_ZERO | M_WAITOK);
9681
9682         t4_tp_read_la(sc, buf, NULL);
9683         p = buf;
9684
9685         switch (G_DBGLAMODE(t4_read_reg(sc, A_TP_DBG_LA_CONFIG))) {
9686         case 2:
9687                 inc = 2;
9688                 show_func = tp_la_show2;
9689                 break;
9690         case 3:
9691                 inc = 2;
9692                 show_func = tp_la_show3;
9693                 break;
9694         default:
9695                 inc = 1;
9696                 show_func = tp_la_show;
9697         }
9698
9699         for (i = 0; i < TPLA_SIZE / inc; i++, p += inc)
9700                 (*show_func)(sb, p, i);
9701
9702         rc = sbuf_finish(sb);
9703         sbuf_delete(sb);
9704         free(buf, M_CXGBE);
9705         return (rc);
9706 }
9707
9708 static int
9709 sysctl_tx_rate(SYSCTL_HANDLER_ARGS)
9710 {
9711         struct adapter *sc = arg1;
9712         struct sbuf *sb;
9713         int rc;
9714         u64 nrate[MAX_NCHAN], orate[MAX_NCHAN];
9715
9716         rc = sysctl_wire_old_buffer(req, 0);
9717         if (rc != 0)
9718                 return (rc);
9719
9720         sb = sbuf_new_for_sysctl(NULL, NULL, 256, req);
9721         if (sb == NULL)
9722                 return (ENOMEM);
9723
9724         t4_get_chan_txrate(sc, nrate, orate);
9725
9726         if (sc->chip_params->nchan > 2) {
9727                 sbuf_printf(sb, "              channel 0   channel 1"
9728                     "   channel 2   channel 3\n");
9729                 sbuf_printf(sb, "NIC B/s:     %10ju  %10ju  %10ju  %10ju\n",
9730                     nrate[0], nrate[1], nrate[2], nrate[3]);
9731                 sbuf_printf(sb, "Offload B/s: %10ju  %10ju  %10ju  %10ju",
9732                     orate[0], orate[1], orate[2], orate[3]);
9733         } else {
9734                 sbuf_printf(sb, "              channel 0   channel 1\n");
9735                 sbuf_printf(sb, "NIC B/s:     %10ju  %10ju\n",
9736                     nrate[0], nrate[1]);
9737                 sbuf_printf(sb, "Offload B/s: %10ju  %10ju",
9738                     orate[0], orate[1]);
9739         }
9740
9741         rc = sbuf_finish(sb);
9742         sbuf_delete(sb);
9743
9744         return (rc);
9745 }
9746
9747 static int
9748 sysctl_ulprx_la(SYSCTL_HANDLER_ARGS)
9749 {
9750         struct adapter *sc = arg1;
9751         struct sbuf *sb;
9752         uint32_t *buf, *p;
9753         int rc, i;
9754
9755         rc = sysctl_wire_old_buffer(req, 0);
9756         if (rc != 0)
9757                 return (rc);
9758
9759         sb = sbuf_new_for_sysctl(NULL, NULL, 4096, req);
9760         if (sb == NULL)
9761                 return (ENOMEM);
9762
9763         buf = malloc(ULPRX_LA_SIZE * 8 * sizeof(uint32_t), M_CXGBE,
9764             M_ZERO | M_WAITOK);
9765
9766         t4_ulprx_read_la(sc, buf);
9767         p = buf;
9768
9769         sbuf_printf(sb, "      Pcmd        Type   Message"
9770             "                Data");
9771         for (i = 0; i < ULPRX_LA_SIZE; i++, p += 8) {
9772                 sbuf_printf(sb, "\n%08x%08x  %4x  %08x  %08x%08x%08x%08x",
9773                     p[1], p[0], p[2], p[3], p[7], p[6], p[5], p[4]);
9774         }
9775
9776         rc = sbuf_finish(sb);
9777         sbuf_delete(sb);
9778         free(buf, M_CXGBE);
9779         return (rc);
9780 }
9781
9782 static int
9783 sysctl_wcwr_stats(SYSCTL_HANDLER_ARGS)
9784 {
9785         struct adapter *sc = arg1;
9786         struct sbuf *sb;
9787         int rc, v;
9788
9789         MPASS(chip_id(sc) >= CHELSIO_T5);
9790
9791         rc = sysctl_wire_old_buffer(req, 0);
9792         if (rc != 0)
9793                 return (rc);
9794
9795         sb = sbuf_new_for_sysctl(NULL, NULL, 4096, req);
9796         if (sb == NULL)
9797                 return (ENOMEM);
9798
9799         v = t4_read_reg(sc, A_SGE_STAT_CFG);
9800         if (G_STATSOURCE_T5(v) == 7) {
9801                 int mode;
9802
9803                 mode = is_t5(sc) ? G_STATMODE(v) : G_T6_STATMODE(v);
9804                 if (mode == 0) {
9805                         sbuf_printf(sb, "total %d, incomplete %d",
9806                             t4_read_reg(sc, A_SGE_STAT_TOTAL),
9807                             t4_read_reg(sc, A_SGE_STAT_MATCH));
9808                 } else if (mode == 1) {
9809                         sbuf_printf(sb, "total %d, data overflow %d",
9810                             t4_read_reg(sc, A_SGE_STAT_TOTAL),
9811                             t4_read_reg(sc, A_SGE_STAT_MATCH));
9812                 } else {
9813                         sbuf_printf(sb, "unknown mode %d", mode);
9814                 }
9815         }
9816         rc = sbuf_finish(sb);
9817         sbuf_delete(sb);
9818
9819         return (rc);
9820 }
9821
9822 static int
9823 sysctl_cpus(SYSCTL_HANDLER_ARGS)
9824 {
9825         struct adapter *sc = arg1;
9826         enum cpu_sets op = arg2;
9827         cpuset_t cpuset;
9828         struct sbuf *sb;
9829         int i, rc;
9830
9831         MPASS(op == LOCAL_CPUS || op == INTR_CPUS);
9832
9833         CPU_ZERO(&cpuset);
9834         rc = bus_get_cpus(sc->dev, op, sizeof(cpuset), &cpuset);
9835         if (rc != 0)
9836                 return (rc);
9837
9838         rc = sysctl_wire_old_buffer(req, 0);
9839         if (rc != 0)
9840                 return (rc);
9841
9842         sb = sbuf_new_for_sysctl(NULL, NULL, 4096, req);
9843         if (sb == NULL)
9844                 return (ENOMEM);
9845
9846         CPU_FOREACH(i)
9847                 sbuf_printf(sb, "%d ", i);
9848         rc = sbuf_finish(sb);
9849         sbuf_delete(sb);
9850
9851         return (rc);
9852 }
9853
9854 #ifdef TCP_OFFLOAD
9855 static int
9856 sysctl_tls(SYSCTL_HANDLER_ARGS)
9857 {
9858         struct adapter *sc = arg1;
9859         int i, j, v, rc;
9860         struct vi_info *vi;
9861
9862         v = sc->tt.tls;
9863         rc = sysctl_handle_int(oidp, &v, 0, req);
9864         if (rc != 0 || req->newptr == NULL)
9865                 return (rc);
9866
9867         if (v != 0 && !(sc->cryptocaps & FW_CAPS_CONFIG_TLSKEYS))
9868                 return (ENOTSUP);
9869
9870         rc = begin_synchronized_op(sc, NULL, SLEEP_OK | INTR_OK, "t4stls");
9871         if (rc)
9872                 return (rc);
9873         sc->tt.tls = !!v;
9874         for_each_port(sc, i) {
9875                 for_each_vi(sc->port[i], j, vi) {
9876                         if (vi->flags & VI_INIT_DONE)
9877                                 t4_update_fl_bufsize(vi->ifp);
9878                 }
9879         }
9880         end_synchronized_op(sc, 0);
9881
9882         return (0);
9883
9884 }
9885
9886 static int
9887 sysctl_tls_rx_ports(SYSCTL_HANDLER_ARGS)
9888 {
9889         struct adapter *sc = arg1;
9890         int *old_ports, *new_ports;
9891         int i, new_count, rc;
9892
9893         if (req->newptr == NULL && req->oldptr == NULL)
9894                 return (SYSCTL_OUT(req, NULL, imax(sc->tt.num_tls_rx_ports, 1) *
9895                     sizeof(sc->tt.tls_rx_ports[0])));
9896
9897         rc = begin_synchronized_op(sc, NULL, SLEEP_OK | INTR_OK, "t4tlsrx");
9898         if (rc)
9899                 return (rc);
9900
9901         if (sc->tt.num_tls_rx_ports == 0) {
9902                 i = -1;
9903                 rc = SYSCTL_OUT(req, &i, sizeof(i));
9904         } else
9905                 rc = SYSCTL_OUT(req, sc->tt.tls_rx_ports,
9906                     sc->tt.num_tls_rx_ports * sizeof(sc->tt.tls_rx_ports[0]));
9907         if (rc == 0 && req->newptr != NULL) {
9908                 new_count = req->newlen / sizeof(new_ports[0]);
9909                 new_ports = malloc(new_count * sizeof(new_ports[0]), M_CXGBE,
9910                     M_WAITOK);
9911                 rc = SYSCTL_IN(req, new_ports, new_count *
9912                     sizeof(new_ports[0]));
9913                 if (rc)
9914                         goto err;
9915
9916                 /* Allow setting to a single '-1' to clear the list. */
9917                 if (new_count == 1 && new_ports[0] == -1) {
9918                         ADAPTER_LOCK(sc);
9919                         old_ports = sc->tt.tls_rx_ports;
9920                         sc->tt.tls_rx_ports = NULL;
9921                         sc->tt.num_tls_rx_ports = 0;
9922                         ADAPTER_UNLOCK(sc);
9923                         free(old_ports, M_CXGBE);
9924                 } else {
9925                         for (i = 0; i < new_count; i++) {
9926                                 if (new_ports[i] < 1 ||
9927                                     new_ports[i] > IPPORT_MAX) {
9928                                         rc = EINVAL;
9929                                         goto err;
9930                                 }
9931                         }
9932
9933                         ADAPTER_LOCK(sc);
9934                         old_ports = sc->tt.tls_rx_ports;
9935                         sc->tt.tls_rx_ports = new_ports;
9936                         sc->tt.num_tls_rx_ports = new_count;
9937                         ADAPTER_UNLOCK(sc);
9938                         free(old_ports, M_CXGBE);
9939                         new_ports = NULL;
9940                 }
9941         err:
9942                 free(new_ports, M_CXGBE);
9943         }
9944         end_synchronized_op(sc, 0);
9945         return (rc);
9946 }
9947
9948 static void
9949 unit_conv(char *buf, size_t len, u_int val, u_int factor)
9950 {
9951         u_int rem = val % factor;
9952
9953         if (rem == 0)
9954                 snprintf(buf, len, "%u", val / factor);
9955         else {
9956                 while (rem % 10 == 0)
9957                         rem /= 10;
9958                 snprintf(buf, len, "%u.%u", val / factor, rem);
9959         }
9960 }
9961
9962 static int
9963 sysctl_tp_tick(SYSCTL_HANDLER_ARGS)
9964 {
9965         struct adapter *sc = arg1;
9966         char buf[16];
9967         u_int res, re;
9968         u_int cclk_ps = 1000000000 / sc->params.vpd.cclk;
9969
9970         res = t4_read_reg(sc, A_TP_TIMER_RESOLUTION);
9971         switch (arg2) {
9972         case 0:
9973                 /* timer_tick */
9974                 re = G_TIMERRESOLUTION(res);
9975                 break;
9976         case 1:
9977                 /* TCP timestamp tick */
9978                 re = G_TIMESTAMPRESOLUTION(res);
9979                 break;
9980         case 2:
9981                 /* DACK tick */
9982                 re = G_DELAYEDACKRESOLUTION(res);
9983                 break;
9984         default:
9985                 return (EDOOFUS);
9986         }
9987
9988         unit_conv(buf, sizeof(buf), (cclk_ps << re), 1000000);
9989
9990         return (sysctl_handle_string(oidp, buf, sizeof(buf), req));
9991 }
9992
9993 static int
9994 sysctl_tp_dack_timer(SYSCTL_HANDLER_ARGS)
9995 {
9996         struct adapter *sc = arg1;
9997         u_int res, dack_re, v;
9998         u_int cclk_ps = 1000000000 / sc->params.vpd.cclk;
9999
10000         res = t4_read_reg(sc, A_TP_TIMER_RESOLUTION);
10001         dack_re = G_DELAYEDACKRESOLUTION(res);
10002         v = ((cclk_ps << dack_re) / 1000000) * t4_read_reg(sc, A_TP_DACK_TIMER);
10003
10004         return (sysctl_handle_int(oidp, &v, 0, req));
10005 }
10006
10007 static int
10008 sysctl_tp_timer(SYSCTL_HANDLER_ARGS)
10009 {
10010         struct adapter *sc = arg1;
10011         int reg = arg2;
10012         u_int tre;
10013         u_long tp_tick_us, v;
10014         u_int cclk_ps = 1000000000 / sc->params.vpd.cclk;
10015
10016         MPASS(reg == A_TP_RXT_MIN || reg == A_TP_RXT_MAX ||
10017             reg == A_TP_PERS_MIN  || reg == A_TP_PERS_MAX ||
10018             reg == A_TP_KEEP_IDLE || reg == A_TP_KEEP_INTVL ||
10019             reg == A_TP_INIT_SRTT || reg == A_TP_FINWAIT2_TIMER);
10020
10021         tre = G_TIMERRESOLUTION(t4_read_reg(sc, A_TP_TIMER_RESOLUTION));
10022         tp_tick_us = (cclk_ps << tre) / 1000000;
10023
10024         if (reg == A_TP_INIT_SRTT)
10025                 v = tp_tick_us * G_INITSRTT(t4_read_reg(sc, reg));
10026         else
10027                 v = tp_tick_us * t4_read_reg(sc, reg);
10028
10029         return (sysctl_handle_long(oidp, &v, 0, req));
10030 }
10031
10032 /*
10033  * All fields in TP_SHIFT_CNT are 4b and the starting location of the field is
10034  * passed to this function.
10035  */
10036 static int
10037 sysctl_tp_shift_cnt(SYSCTL_HANDLER_ARGS)
10038 {
10039         struct adapter *sc = arg1;
10040         int idx = arg2;
10041         u_int v;
10042
10043         MPASS(idx >= 0 && idx <= 24);
10044
10045         v = (t4_read_reg(sc, A_TP_SHIFT_CNT) >> idx) & 0xf;
10046
10047         return (sysctl_handle_int(oidp, &v, 0, req));
10048 }
10049
10050 static int
10051 sysctl_tp_backoff(SYSCTL_HANDLER_ARGS)
10052 {
10053         struct adapter *sc = arg1;
10054         int idx = arg2;
10055         u_int shift, v, r;
10056
10057         MPASS(idx >= 0 && idx < 16);
10058
10059         r = A_TP_TCP_BACKOFF_REG0 + (idx & ~3);
10060         shift = (idx & 3) << 3;
10061         v = (t4_read_reg(sc, r) >> shift) & M_TIMERBACKOFFINDEX0;
10062
10063         return (sysctl_handle_int(oidp, &v, 0, req));
10064 }
10065
10066 static int
10067 sysctl_holdoff_tmr_idx_ofld(SYSCTL_HANDLER_ARGS)
10068 {
10069         struct vi_info *vi = arg1;
10070         struct adapter *sc = vi->adapter;
10071         int idx, rc, i;
10072         struct sge_ofld_rxq *ofld_rxq;
10073         uint8_t v;
10074
10075         idx = vi->ofld_tmr_idx;
10076
10077         rc = sysctl_handle_int(oidp, &idx, 0, req);
10078         if (rc != 0 || req->newptr == NULL)
10079                 return (rc);
10080
10081         if (idx < 0 || idx >= SGE_NTIMERS)
10082                 return (EINVAL);
10083
10084         rc = begin_synchronized_op(sc, vi, HOLD_LOCK | SLEEP_OK | INTR_OK,
10085             "t4otmr");
10086         if (rc)
10087                 return (rc);
10088
10089         v = V_QINTR_TIMER_IDX(idx) | V_QINTR_CNT_EN(vi->ofld_pktc_idx != -1);
10090         for_each_ofld_rxq(vi, i, ofld_rxq) {
10091 #ifdef atomic_store_rel_8
10092                 atomic_store_rel_8(&ofld_rxq->iq.intr_params, v);
10093 #else
10094                 ofld_rxq->iq.intr_params = v;
10095 #endif
10096         }
10097         vi->ofld_tmr_idx = idx;
10098
10099         end_synchronized_op(sc, LOCK_HELD);
10100         return (0);
10101 }
10102
10103 static int
10104 sysctl_holdoff_pktc_idx_ofld(SYSCTL_HANDLER_ARGS)
10105 {
10106         struct vi_info *vi = arg1;
10107         struct adapter *sc = vi->adapter;
10108         int idx, rc;
10109
10110         idx = vi->ofld_pktc_idx;
10111
10112         rc = sysctl_handle_int(oidp, &idx, 0, req);
10113         if (rc != 0 || req->newptr == NULL)
10114                 return (rc);
10115
10116         if (idx < -1 || idx >= SGE_NCOUNTERS)
10117                 return (EINVAL);
10118
10119         rc = begin_synchronized_op(sc, vi, HOLD_LOCK | SLEEP_OK | INTR_OK,
10120             "t4opktc");
10121         if (rc)
10122                 return (rc);
10123
10124         if (vi->flags & VI_INIT_DONE)
10125                 rc = EBUSY; /* cannot be changed once the queues are created */
10126         else
10127                 vi->ofld_pktc_idx = idx;
10128
10129         end_synchronized_op(sc, LOCK_HELD);
10130         return (rc);
10131 }
10132 #endif
10133
10134 static int
10135 get_sge_context(struct adapter *sc, struct t4_sge_context *cntxt)
10136 {
10137         int rc;
10138
10139         if (cntxt->cid > M_CTXTQID)
10140                 return (EINVAL);
10141
10142         if (cntxt->mem_id != CTXT_EGRESS && cntxt->mem_id != CTXT_INGRESS &&
10143             cntxt->mem_id != CTXT_FLM && cntxt->mem_id != CTXT_CNM)
10144                 return (EINVAL);
10145
10146         rc = begin_synchronized_op(sc, NULL, SLEEP_OK | INTR_OK, "t4ctxt");
10147         if (rc)
10148                 return (rc);
10149
10150         if (sc->flags & FW_OK) {
10151                 rc = -t4_sge_ctxt_rd(sc, sc->mbox, cntxt->cid, cntxt->mem_id,
10152                     &cntxt->data[0]);
10153                 if (rc == 0)
10154                         goto done;
10155         }
10156
10157         /*
10158          * Read via firmware failed or wasn't even attempted.  Read directly via
10159          * the backdoor.
10160          */
10161         rc = -t4_sge_ctxt_rd_bd(sc, cntxt->cid, cntxt->mem_id, &cntxt->data[0]);
10162 done:
10163         end_synchronized_op(sc, 0);
10164         return (rc);
10165 }
10166
10167 static int
10168 load_fw(struct adapter *sc, struct t4_data *fw)
10169 {
10170         int rc;
10171         uint8_t *fw_data;
10172
10173         rc = begin_synchronized_op(sc, NULL, SLEEP_OK | INTR_OK, "t4ldfw");
10174         if (rc)
10175                 return (rc);
10176
10177         /*
10178          * The firmware, with the sole exception of the memory parity error
10179          * handler, runs from memory and not flash.  It is almost always safe to
10180          * install a new firmware on a running system.  Just set bit 1 in
10181          * hw.cxgbe.dflags or dev.<nexus>.<n>.dflags first.
10182          */
10183         if (sc->flags & FULL_INIT_DONE &&
10184             (sc->debug_flags & DF_LOAD_FW_ANYTIME) == 0) {
10185                 rc = EBUSY;
10186                 goto done;
10187         }
10188
10189         fw_data = malloc(fw->len, M_CXGBE, M_WAITOK);
10190
10191         rc = copyin(fw->data, fw_data, fw->len);
10192         if (rc == 0)
10193                 rc = -t4_load_fw(sc, fw_data, fw->len);
10194
10195         free(fw_data, M_CXGBE);
10196 done:
10197         end_synchronized_op(sc, 0);
10198         return (rc);
10199 }
10200
10201 static int
10202 load_cfg(struct adapter *sc, struct t4_data *cfg)
10203 {
10204         int rc;
10205         uint8_t *cfg_data = NULL;
10206
10207         rc = begin_synchronized_op(sc, NULL, SLEEP_OK | INTR_OK, "t4ldcf");
10208         if (rc)
10209                 return (rc);
10210
10211         if (cfg->len == 0) {
10212                 /* clear */
10213                 rc = -t4_load_cfg(sc, NULL, 0);
10214                 goto done;
10215         }
10216
10217         cfg_data = malloc(cfg->len, M_CXGBE, M_WAITOK);
10218
10219         rc = copyin(cfg->data, cfg_data, cfg->len);
10220         if (rc == 0)
10221                 rc = -t4_load_cfg(sc, cfg_data, cfg->len);
10222
10223         free(cfg_data, M_CXGBE);
10224 done:
10225         end_synchronized_op(sc, 0);
10226         return (rc);
10227 }
10228
10229 static int
10230 load_boot(struct adapter *sc, struct t4_bootrom *br)
10231 {
10232         int rc;
10233         uint8_t *br_data = NULL;
10234         u_int offset;
10235
10236         if (br->len > 1024 * 1024)
10237                 return (EFBIG);
10238
10239         if (br->pf_offset == 0) {
10240                 /* pfidx */
10241                 if (br->pfidx_addr > 7)
10242                         return (EINVAL);
10243                 offset = G_OFFSET(t4_read_reg(sc, PF_REG(br->pfidx_addr,
10244                     A_PCIE_PF_EXPROM_OFST)));
10245         } else if (br->pf_offset == 1) {
10246                 /* offset */
10247                 offset = G_OFFSET(br->pfidx_addr);
10248         } else {
10249                 return (EINVAL);
10250         }
10251
10252         rc = begin_synchronized_op(sc, NULL, SLEEP_OK | INTR_OK, "t4ldbr");
10253         if (rc)
10254                 return (rc);
10255
10256         if (br->len == 0) {
10257                 /* clear */
10258                 rc = -t4_load_boot(sc, NULL, offset, 0);
10259                 goto done;
10260         }
10261
10262         br_data = malloc(br->len, M_CXGBE, M_WAITOK);
10263
10264         rc = copyin(br->data, br_data, br->len);
10265         if (rc == 0)
10266                 rc = -t4_load_boot(sc, br_data, offset, br->len);
10267
10268         free(br_data, M_CXGBE);
10269 done:
10270         end_synchronized_op(sc, 0);
10271         return (rc);
10272 }
10273
10274 static int
10275 load_bootcfg(struct adapter *sc, struct t4_data *bc)
10276 {
10277         int rc;
10278         uint8_t *bc_data = NULL;
10279
10280         rc = begin_synchronized_op(sc, NULL, SLEEP_OK | INTR_OK, "t4ldcf");
10281         if (rc)
10282                 return (rc);
10283
10284         if (bc->len == 0) {
10285                 /* clear */
10286                 rc = -t4_load_bootcfg(sc, NULL, 0);
10287                 goto done;
10288         }
10289
10290         bc_data = malloc(bc->len, M_CXGBE, M_WAITOK);
10291
10292         rc = copyin(bc->data, bc_data, bc->len);
10293         if (rc == 0)
10294                 rc = -t4_load_bootcfg(sc, bc_data, bc->len);
10295
10296         free(bc_data, M_CXGBE);
10297 done:
10298         end_synchronized_op(sc, 0);
10299         return (rc);
10300 }
10301
10302 static int
10303 cudbg_dump(struct adapter *sc, struct t4_cudbg_dump *dump)
10304 {
10305         int rc;
10306         struct cudbg_init *cudbg;
10307         void *handle, *buf;
10308
10309         /* buf is large, don't block if no memory is available */
10310         buf = malloc(dump->len, M_CXGBE, M_NOWAIT | M_ZERO);
10311         if (buf == NULL)
10312                 return (ENOMEM);
10313
10314         handle = cudbg_alloc_handle();
10315         if (handle == NULL) {
10316                 rc = ENOMEM;
10317                 goto done;
10318         }
10319
10320         cudbg = cudbg_get_init(handle);
10321         cudbg->adap = sc;
10322         cudbg->print = (cudbg_print_cb)printf;
10323
10324 #ifndef notyet
10325         device_printf(sc->dev, "%s: wr_flash %u, len %u, data %p.\n",
10326             __func__, dump->wr_flash, dump->len, dump->data);
10327 #endif
10328
10329         if (dump->wr_flash)
10330                 cudbg->use_flash = 1;
10331         MPASS(sizeof(cudbg->dbg_bitmap) == sizeof(dump->bitmap));
10332         memcpy(cudbg->dbg_bitmap, dump->bitmap, sizeof(cudbg->dbg_bitmap));
10333
10334         rc = cudbg_collect(handle, buf, &dump->len);
10335         if (rc != 0)
10336                 goto done;
10337
10338         rc = copyout(buf, dump->data, dump->len);
10339 done:
10340         cudbg_free_handle(handle);
10341         free(buf, M_CXGBE);
10342         return (rc);
10343 }
10344
10345 static void
10346 free_offload_policy(struct t4_offload_policy *op)
10347 {
10348         struct offload_rule *r;
10349         int i;
10350
10351         if (op == NULL)
10352                 return;
10353
10354         r = &op->rule[0];
10355         for (i = 0; i < op->nrules; i++, r++) {
10356                 free(r->bpf_prog.bf_insns, M_CXGBE);
10357         }
10358         free(op->rule, M_CXGBE);
10359         free(op, M_CXGBE);
10360 }
10361
10362 static int
10363 set_offload_policy(struct adapter *sc, struct t4_offload_policy *uop)
10364 {
10365         int i, rc, len;
10366         struct t4_offload_policy *op, *old;
10367         struct bpf_program *bf;
10368         const struct offload_settings *s;
10369         struct offload_rule *r;
10370         void *u;
10371
10372         if (!is_offload(sc))
10373                 return (ENODEV);
10374
10375         if (uop->nrules == 0) {
10376                 /* Delete installed policies. */
10377                 op = NULL;
10378                 goto set_policy;
10379         } else if (uop->nrules > 256) { /* arbitrary */
10380                 return (E2BIG);
10381         }
10382
10383         /* Copy userspace offload policy to kernel */
10384         op = malloc(sizeof(*op), M_CXGBE, M_ZERO | M_WAITOK);
10385         op->nrules = uop->nrules;
10386         len = op->nrules * sizeof(struct offload_rule);
10387         op->rule = malloc(len, M_CXGBE, M_ZERO | M_WAITOK);
10388         rc = copyin(uop->rule, op->rule, len);
10389         if (rc) {
10390                 free(op->rule, M_CXGBE);
10391                 free(op, M_CXGBE);
10392                 return (rc);
10393         }
10394
10395         r = &op->rule[0];
10396         for (i = 0; i < op->nrules; i++, r++) {
10397
10398                 /* Validate open_type */
10399                 if (r->open_type != OPEN_TYPE_LISTEN &&
10400                     r->open_type != OPEN_TYPE_ACTIVE &&
10401                     r->open_type != OPEN_TYPE_PASSIVE &&
10402                     r->open_type != OPEN_TYPE_DONTCARE) {
10403 error:
10404                         /*
10405                          * Rules 0 to i have malloc'd filters that need to be
10406                          * freed.  Rules i+1 to nrules have userspace pointers
10407                          * and should be left alone.
10408                          */
10409                         op->nrules = i;
10410                         free_offload_policy(op);
10411                         return (rc);
10412                 }
10413
10414                 /* Validate settings */
10415                 s = &r->settings;
10416                 if ((s->offload != 0 && s->offload != 1) ||
10417                     s->cong_algo < -1 || s->cong_algo > CONG_ALG_HIGHSPEED ||
10418                     s->sched_class < -1 ||
10419                     s->sched_class >= sc->chip_params->nsched_cls) {
10420                         rc = EINVAL;
10421                         goto error;
10422                 }
10423
10424                 bf = &r->bpf_prog;
10425                 u = bf->bf_insns;       /* userspace ptr */
10426                 bf->bf_insns = NULL;
10427                 if (bf->bf_len == 0) {
10428                         /* legal, matches everything */
10429                         continue;
10430                 }
10431                 len = bf->bf_len * sizeof(*bf->bf_insns);
10432                 bf->bf_insns = malloc(len, M_CXGBE, M_ZERO | M_WAITOK);
10433                 rc = copyin(u, bf->bf_insns, len);
10434                 if (rc != 0)
10435                         goto error;
10436
10437                 if (!bpf_validate(bf->bf_insns, bf->bf_len)) {
10438                         rc = EINVAL;
10439                         goto error;
10440                 }
10441         }
10442 set_policy:
10443         rw_wlock(&sc->policy_lock);
10444         old = sc->policy;
10445         sc->policy = op;
10446         rw_wunlock(&sc->policy_lock);
10447         free_offload_policy(old);
10448
10449         return (0);
10450 }
10451
10452 #define MAX_READ_BUF_SIZE (128 * 1024)
10453 static int
10454 read_card_mem(struct adapter *sc, int win, struct t4_mem_range *mr)
10455 {
10456         uint32_t addr, remaining, n;
10457         uint32_t *buf;
10458         int rc;
10459         uint8_t *dst;
10460
10461         rc = validate_mem_range(sc, mr->addr, mr->len);
10462         if (rc != 0)
10463                 return (rc);
10464
10465         buf = malloc(min(mr->len, MAX_READ_BUF_SIZE), M_CXGBE, M_WAITOK);
10466         addr = mr->addr;
10467         remaining = mr->len;
10468         dst = (void *)mr->data;
10469
10470         while (remaining) {
10471                 n = min(remaining, MAX_READ_BUF_SIZE);
10472                 read_via_memwin(sc, 2, addr, buf, n);
10473
10474                 rc = copyout(buf, dst, n);
10475                 if (rc != 0)
10476                         break;
10477
10478                 dst += n;
10479                 remaining -= n;
10480                 addr += n;
10481         }
10482
10483         free(buf, M_CXGBE);
10484         return (rc);
10485 }
10486 #undef MAX_READ_BUF_SIZE
10487
10488 static int
10489 read_i2c(struct adapter *sc, struct t4_i2c_data *i2cd)
10490 {
10491         int rc;
10492
10493         if (i2cd->len == 0 || i2cd->port_id >= sc->params.nports)
10494                 return (EINVAL);
10495
10496         if (i2cd->len > sizeof(i2cd->data))
10497                 return (EFBIG);
10498
10499         rc = begin_synchronized_op(sc, NULL, SLEEP_OK | INTR_OK, "t4i2crd");
10500         if (rc)
10501                 return (rc);
10502         rc = -t4_i2c_rd(sc, sc->mbox, i2cd->port_id, i2cd->dev_addr,
10503             i2cd->offset, i2cd->len, &i2cd->data[0]);
10504         end_synchronized_op(sc, 0);
10505
10506         return (rc);
10507 }
10508
10509 static int
10510 clear_stats(struct adapter *sc, u_int port_id)
10511 {
10512         int i, v, chan_map;
10513         struct port_info *pi;
10514         struct vi_info *vi;
10515         struct sge_rxq *rxq;
10516         struct sge_txq *txq;
10517         struct sge_wrq *wrq;
10518 #ifdef TCP_OFFLOAD
10519         struct sge_ofld_rxq *ofld_rxq;
10520 #endif
10521
10522         if (port_id >= sc->params.nports)
10523                 return (EINVAL);
10524         pi = sc->port[port_id];
10525         if (pi == NULL)
10526                 return (EIO);
10527
10528         /* MAC stats */
10529         t4_clr_port_stats(sc, pi->tx_chan);
10530         if (is_t6(sc)) {
10531                 if (pi->fcs_reg != -1)
10532                         pi->fcs_base = t4_read_reg64(sc, pi->fcs_reg);
10533                 else
10534                         pi->stats.rx_fcs_err = 0;
10535         }
10536         pi->tx_parse_error = 0;
10537         pi->tnl_cong_drops = 0;
10538         mtx_lock(&sc->reg_lock);
10539         for_each_vi(pi, v, vi) {
10540                 if (vi->flags & VI_INIT_DONE)
10541                         t4_clr_vi_stats(sc, vi->vin);
10542         }
10543         chan_map = pi->rx_e_chan_map;
10544         v = 0;  /* reuse */
10545         while (chan_map) {
10546                 i = ffs(chan_map) - 1;
10547                 t4_write_indirect(sc, A_TP_MIB_INDEX, A_TP_MIB_DATA, &v,
10548                     1, A_TP_MIB_TNL_CNG_DROP_0 + i);
10549                 chan_map &= ~(1 << i);
10550         }
10551         mtx_unlock(&sc->reg_lock);
10552
10553         /*
10554          * Since this command accepts a port, clear stats for
10555          * all VIs on this port.
10556          */
10557         for_each_vi(pi, v, vi) {
10558                 if (vi->flags & VI_INIT_DONE) {
10559
10560                         for_each_rxq(vi, i, rxq) {
10561 #if defined(INET) || defined(INET6)
10562                                 rxq->lro.lro_queued = 0;
10563                                 rxq->lro.lro_flushed = 0;
10564 #endif
10565                                 rxq->rxcsum = 0;
10566                                 rxq->vlan_extraction = 0;
10567                                 rxq->vxlan_rxcsum = 0;
10568
10569                                 rxq->fl.cl_allocated = 0;
10570                                 rxq->fl.cl_recycled = 0;
10571                                 rxq->fl.cl_fast_recycled = 0;
10572                         }
10573
10574                         for_each_txq(vi, i, txq) {
10575                                 txq->txcsum = 0;
10576                                 txq->tso_wrs = 0;
10577                                 txq->vlan_insertion = 0;
10578                                 txq->imm_wrs = 0;
10579                                 txq->sgl_wrs = 0;
10580                                 txq->txpkt_wrs = 0;
10581                                 txq->txpkts0_wrs = 0;
10582                                 txq->txpkts1_wrs = 0;
10583                                 txq->txpkts0_pkts = 0;
10584                                 txq->txpkts1_pkts = 0;
10585                                 txq->raw_wrs = 0;
10586                                 txq->vxlan_tso_wrs = 0;
10587                                 txq->vxlan_txcsum = 0;
10588                                 txq->kern_tls_records = 0;
10589                                 txq->kern_tls_short = 0;
10590                                 txq->kern_tls_partial = 0;
10591                                 txq->kern_tls_full = 0;
10592                                 txq->kern_tls_octets = 0;
10593                                 txq->kern_tls_waste = 0;
10594                                 txq->kern_tls_options = 0;
10595                                 txq->kern_tls_header = 0;
10596                                 txq->kern_tls_fin = 0;
10597                                 txq->kern_tls_fin_short = 0;
10598                                 txq->kern_tls_cbc = 0;
10599                                 txq->kern_tls_gcm = 0;
10600                                 mp_ring_reset_stats(txq->r);
10601                         }
10602
10603 #if defined(TCP_OFFLOAD) || defined(RATELIMIT)
10604                         for_each_ofld_txq(vi, i, wrq) {
10605                                 wrq->tx_wrs_direct = 0;
10606                                 wrq->tx_wrs_copied = 0;
10607                         }
10608 #endif
10609 #ifdef TCP_OFFLOAD
10610                         for_each_ofld_rxq(vi, i, ofld_rxq) {
10611                                 ofld_rxq->fl.cl_allocated = 0;
10612                                 ofld_rxq->fl.cl_recycled = 0;
10613                                 ofld_rxq->fl.cl_fast_recycled = 0;
10614                         }
10615 #endif
10616
10617                         if (IS_MAIN_VI(vi)) {
10618                                 wrq = &sc->sge.ctrlq[pi->port_id];
10619                                 wrq->tx_wrs_direct = 0;
10620                                 wrq->tx_wrs_copied = 0;
10621                         }
10622                 }
10623         }
10624
10625         return (0);
10626 }
10627
10628 int
10629 t4_os_find_pci_capability(struct adapter *sc, int cap)
10630 {
10631         int i;
10632
10633         return (pci_find_cap(sc->dev, cap, &i) == 0 ? i : 0);
10634 }
10635
10636 int
10637 t4_os_pci_save_state(struct adapter *sc)
10638 {
10639         device_t dev;
10640         struct pci_devinfo *dinfo;
10641
10642         dev = sc->dev;
10643         dinfo = device_get_ivars(dev);
10644
10645         pci_cfg_save(dev, dinfo, 0);
10646         return (0);
10647 }
10648
10649 int
10650 t4_os_pci_restore_state(struct adapter *sc)
10651 {
10652         device_t dev;
10653         struct pci_devinfo *dinfo;
10654
10655         dev = sc->dev;
10656         dinfo = device_get_ivars(dev);
10657
10658         pci_cfg_restore(dev, dinfo);
10659         return (0);
10660 }
10661
10662 void
10663 t4_os_portmod_changed(struct port_info *pi)
10664 {
10665         struct adapter *sc = pi->adapter;
10666         struct vi_info *vi;
10667         struct ifnet *ifp;
10668         static const char *mod_str[] = {
10669                 NULL, "LR", "SR", "ER", "TWINAX", "active TWINAX", "LRM"
10670         };
10671
10672         KASSERT((pi->flags & FIXED_IFMEDIA) == 0,
10673             ("%s: port_type %u", __func__, pi->port_type));
10674
10675         vi = &pi->vi[0];
10676         if (begin_synchronized_op(sc, vi, HOLD_LOCK, "t4mod") == 0) {
10677                 PORT_LOCK(pi);
10678                 build_medialist(pi);
10679                 if (pi->mod_type != FW_PORT_MOD_TYPE_NONE) {
10680                         fixup_link_config(pi);
10681                         apply_link_config(pi);
10682                 }
10683                 PORT_UNLOCK(pi);
10684                 end_synchronized_op(sc, LOCK_HELD);
10685         }
10686
10687         ifp = vi->ifp;
10688         if (pi->mod_type == FW_PORT_MOD_TYPE_NONE)
10689                 if_printf(ifp, "transceiver unplugged.\n");
10690         else if (pi->mod_type == FW_PORT_MOD_TYPE_UNKNOWN)
10691                 if_printf(ifp, "unknown transceiver inserted.\n");
10692         else if (pi->mod_type == FW_PORT_MOD_TYPE_NOTSUPPORTED)
10693                 if_printf(ifp, "unsupported transceiver inserted.\n");
10694         else if (pi->mod_type > 0 && pi->mod_type < nitems(mod_str)) {
10695                 if_printf(ifp, "%dGbps %s transceiver inserted.\n",
10696                     port_top_speed(pi), mod_str[pi->mod_type]);
10697         } else {
10698                 if_printf(ifp, "transceiver (type %d) inserted.\n",
10699                     pi->mod_type);
10700         }
10701 }
10702
10703 void
10704 t4_os_link_changed(struct port_info *pi)
10705 {
10706         struct vi_info *vi;
10707         struct ifnet *ifp;
10708         struct link_config *lc = &pi->link_cfg;
10709         struct adapter *sc = pi->adapter;
10710         int v;
10711
10712         PORT_LOCK_ASSERT_OWNED(pi);
10713
10714         if (is_t6(sc)) {
10715                 if (lc->link_ok) {
10716                         if (lc->speed > 25000 ||
10717                             (lc->speed == 25000 && lc->fec == FEC_RS)) {
10718                                 pi->fcs_reg = T5_PORT_REG(pi->tx_chan,
10719                                     A_MAC_PORT_AFRAMECHECKSEQUENCEERRORS);
10720                         } else {
10721                                 pi->fcs_reg = T5_PORT_REG(pi->tx_chan,
10722                                     A_MAC_PORT_MTIP_1G10G_RX_CRCERRORS);
10723                         }
10724                         pi->fcs_base = t4_read_reg64(sc, pi->fcs_reg);
10725                         pi->stats.rx_fcs_err = 0;
10726                 } else {
10727                         pi->fcs_reg = -1;
10728                 }
10729         } else {
10730                 MPASS(pi->fcs_reg != -1);
10731                 MPASS(pi->fcs_base == 0);
10732         }
10733
10734         for_each_vi(pi, v, vi) {
10735                 ifp = vi->ifp;
10736                 if (ifp == NULL)
10737                         continue;
10738
10739                 if (lc->link_ok) {
10740                         ifp->if_baudrate = IF_Mbps(lc->speed);
10741                         if_link_state_change(ifp, LINK_STATE_UP);
10742                 } else {
10743                         if_link_state_change(ifp, LINK_STATE_DOWN);
10744                 }
10745         }
10746 }
10747
10748 void
10749 t4_iterate(void (*func)(struct adapter *, void *), void *arg)
10750 {
10751         struct adapter *sc;
10752
10753         sx_slock(&t4_list_lock);
10754         SLIST_FOREACH(sc, &t4_list, link) {
10755                 /*
10756                  * func should not make any assumptions about what state sc is
10757                  * in - the only guarantee is that sc->sc_lock is a valid lock.
10758                  */
10759                 func(sc, arg);
10760         }
10761         sx_sunlock(&t4_list_lock);
10762 }
10763
10764 static int
10765 t4_ioctl(struct cdev *dev, unsigned long cmd, caddr_t data, int fflag,
10766     struct thread *td)
10767 {
10768         int rc;
10769         struct adapter *sc = dev->si_drv1;
10770
10771         rc = priv_check(td, PRIV_DRIVER);
10772         if (rc != 0)
10773                 return (rc);
10774
10775         switch (cmd) {
10776         case CHELSIO_T4_GETREG: {
10777                 struct t4_reg *edata = (struct t4_reg *)data;
10778
10779                 if ((edata->addr & 0x3) != 0 || edata->addr >= sc->mmio_len)
10780                         return (EFAULT);
10781
10782                 if (edata->size == 4)
10783                         edata->val = t4_read_reg(sc, edata->addr);
10784                 else if (edata->size == 8)
10785                         edata->val = t4_read_reg64(sc, edata->addr);
10786                 else
10787                         return (EINVAL);
10788
10789                 break;
10790         }
10791         case CHELSIO_T4_SETREG: {
10792                 struct t4_reg *edata = (struct t4_reg *)data;
10793
10794                 if ((edata->addr & 0x3) != 0 || edata->addr >= sc->mmio_len)
10795                         return (EFAULT);
10796
10797                 if (edata->size == 4) {
10798                         if (edata->val & 0xffffffff00000000)
10799                                 return (EINVAL);
10800                         t4_write_reg(sc, edata->addr, (uint32_t) edata->val);
10801                 } else if (edata->size == 8)
10802                         t4_write_reg64(sc, edata->addr, edata->val);
10803                 else
10804                         return (EINVAL);
10805                 break;
10806         }
10807         case CHELSIO_T4_REGDUMP: {
10808                 struct t4_regdump *regs = (struct t4_regdump *)data;
10809                 int reglen = t4_get_regs_len(sc);
10810                 uint8_t *buf;
10811
10812                 if (regs->len < reglen) {
10813                         regs->len = reglen; /* hint to the caller */
10814                         return (ENOBUFS);
10815                 }
10816
10817                 regs->len = reglen;
10818                 buf = malloc(reglen, M_CXGBE, M_WAITOK | M_ZERO);
10819                 get_regs(sc, regs, buf);
10820                 rc = copyout(buf, regs->data, reglen);
10821                 free(buf, M_CXGBE);
10822                 break;
10823         }
10824         case CHELSIO_T4_GET_FILTER_MODE:
10825                 rc = get_filter_mode(sc, (uint32_t *)data);
10826                 break;
10827         case CHELSIO_T4_SET_FILTER_MODE:
10828                 rc = set_filter_mode(sc, *(uint32_t *)data);
10829                 break;
10830         case CHELSIO_T4_GET_FILTER:
10831                 rc = get_filter(sc, (struct t4_filter *)data);
10832                 break;
10833         case CHELSIO_T4_SET_FILTER:
10834                 rc = set_filter(sc, (struct t4_filter *)data);
10835                 break;
10836         case CHELSIO_T4_DEL_FILTER:
10837                 rc = del_filter(sc, (struct t4_filter *)data);
10838                 break;
10839         case CHELSIO_T4_GET_SGE_CONTEXT:
10840                 rc = get_sge_context(sc, (struct t4_sge_context *)data);
10841                 break;
10842         case CHELSIO_T4_LOAD_FW:
10843                 rc = load_fw(sc, (struct t4_data *)data);
10844                 break;
10845         case CHELSIO_T4_GET_MEM:
10846                 rc = read_card_mem(sc, 2, (struct t4_mem_range *)data);
10847                 break;
10848         case CHELSIO_T4_GET_I2C:
10849                 rc = read_i2c(sc, (struct t4_i2c_data *)data);
10850                 break;
10851         case CHELSIO_T4_CLEAR_STATS:
10852                 rc = clear_stats(sc, *(uint32_t *)data);
10853                 break;
10854         case CHELSIO_T4_SCHED_CLASS:
10855                 rc = t4_set_sched_class(sc, (struct t4_sched_params *)data);
10856                 break;
10857         case CHELSIO_T4_SCHED_QUEUE:
10858                 rc = t4_set_sched_queue(sc, (struct t4_sched_queue *)data);
10859                 break;
10860         case CHELSIO_T4_GET_TRACER:
10861                 rc = t4_get_tracer(sc, (struct t4_tracer *)data);
10862                 break;
10863         case CHELSIO_T4_SET_TRACER:
10864                 rc = t4_set_tracer(sc, (struct t4_tracer *)data);
10865                 break;
10866         case CHELSIO_T4_LOAD_CFG:
10867                 rc = load_cfg(sc, (struct t4_data *)data);
10868                 break;
10869         case CHELSIO_T4_LOAD_BOOT:
10870                 rc = load_boot(sc, (struct t4_bootrom *)data);
10871                 break;
10872         case CHELSIO_T4_LOAD_BOOTCFG:
10873                 rc = load_bootcfg(sc, (struct t4_data *)data);
10874                 break;
10875         case CHELSIO_T4_CUDBG_DUMP:
10876                 rc = cudbg_dump(sc, (struct t4_cudbg_dump *)data);
10877                 break;
10878         case CHELSIO_T4_SET_OFLD_POLICY:
10879                 rc = set_offload_policy(sc, (struct t4_offload_policy *)data);
10880                 break;
10881         default:
10882                 rc = ENOTTY;
10883         }
10884
10885         return (rc);
10886 }
10887
10888 #ifdef TCP_OFFLOAD
10889 static int
10890 toe_capability(struct vi_info *vi, int enable)
10891 {
10892         int rc;
10893         struct port_info *pi = vi->pi;
10894         struct adapter *sc = pi->adapter;
10895
10896         ASSERT_SYNCHRONIZED_OP(sc);
10897
10898         if (!is_offload(sc))
10899                 return (ENODEV);
10900
10901         if (enable) {
10902                 if ((vi->ifp->if_capenable & IFCAP_TOE) != 0) {
10903                         /* TOE is already enabled. */
10904                         return (0);
10905                 }
10906
10907                 /*
10908                  * We need the port's queues around so that we're able to send
10909                  * and receive CPLs to/from the TOE even if the ifnet for this
10910                  * port has never been UP'd administratively.
10911                  */
10912                 if (!(vi->flags & VI_INIT_DONE)) {
10913                         rc = vi_full_init(vi);
10914                         if (rc)
10915                                 return (rc);
10916                 }
10917                 if (!(pi->vi[0].flags & VI_INIT_DONE)) {
10918                         rc = vi_full_init(&pi->vi[0]);
10919                         if (rc)
10920                                 return (rc);
10921                 }
10922
10923                 if (isset(&sc->offload_map, pi->port_id)) {
10924                         /* TOE is enabled on another VI of this port. */
10925                         pi->uld_vis++;
10926                         return (0);
10927                 }
10928
10929                 if (!uld_active(sc, ULD_TOM)) {
10930                         rc = t4_activate_uld(sc, ULD_TOM);
10931                         if (rc == EAGAIN) {
10932                                 log(LOG_WARNING,
10933                                     "You must kldload t4_tom.ko before trying "
10934                                     "to enable TOE on a cxgbe interface.\n");
10935                         }
10936                         if (rc != 0)
10937                                 return (rc);
10938                         KASSERT(sc->tom_softc != NULL,
10939                             ("%s: TOM activated but softc NULL", __func__));
10940                         KASSERT(uld_active(sc, ULD_TOM),
10941                             ("%s: TOM activated but flag not set", __func__));
10942                 }
10943
10944                 /* Activate iWARP and iSCSI too, if the modules are loaded. */
10945                 if (!uld_active(sc, ULD_IWARP))
10946                         (void) t4_activate_uld(sc, ULD_IWARP);
10947                 if (!uld_active(sc, ULD_ISCSI))
10948                         (void) t4_activate_uld(sc, ULD_ISCSI);
10949
10950                 pi->uld_vis++;
10951                 setbit(&sc->offload_map, pi->port_id);
10952         } else {
10953                 pi->uld_vis--;
10954
10955                 if (!isset(&sc->offload_map, pi->port_id) || pi->uld_vis > 0)
10956                         return (0);
10957
10958                 KASSERT(uld_active(sc, ULD_TOM),
10959                     ("%s: TOM never initialized?", __func__));
10960                 clrbit(&sc->offload_map, pi->port_id);
10961         }
10962
10963         return (0);
10964 }
10965
10966 /*
10967  * Add an upper layer driver to the global list.
10968  */
10969 int
10970 t4_register_uld(struct uld_info *ui)
10971 {
10972         int rc = 0;
10973         struct uld_info *u;
10974
10975         sx_xlock(&t4_uld_list_lock);
10976         SLIST_FOREACH(u, &t4_uld_list, link) {
10977             if (u->uld_id == ui->uld_id) {
10978                     rc = EEXIST;
10979                     goto done;
10980             }
10981         }
10982
10983         SLIST_INSERT_HEAD(&t4_uld_list, ui, link);
10984         ui->refcount = 0;
10985 done:
10986         sx_xunlock(&t4_uld_list_lock);
10987         return (rc);
10988 }
10989
10990 int
10991 t4_unregister_uld(struct uld_info *ui)
10992 {
10993         int rc = EINVAL;
10994         struct uld_info *u;
10995
10996         sx_xlock(&t4_uld_list_lock);
10997
10998         SLIST_FOREACH(u, &t4_uld_list, link) {
10999             if (u == ui) {
11000                     if (ui->refcount > 0) {
11001                             rc = EBUSY;
11002                             goto done;
11003                     }
11004
11005                     SLIST_REMOVE(&t4_uld_list, ui, uld_info, link);
11006                     rc = 0;
11007                     goto done;
11008             }
11009         }
11010 done:
11011         sx_xunlock(&t4_uld_list_lock);
11012         return (rc);
11013 }
11014
11015 int
11016 t4_activate_uld(struct adapter *sc, int id)
11017 {
11018         int rc;
11019         struct uld_info *ui;
11020
11021         ASSERT_SYNCHRONIZED_OP(sc);
11022
11023         if (id < 0 || id > ULD_MAX)
11024                 return (EINVAL);
11025         rc = EAGAIN;    /* kldoad the module with this ULD and try again. */
11026
11027         sx_slock(&t4_uld_list_lock);
11028
11029         SLIST_FOREACH(ui, &t4_uld_list, link) {
11030                 if (ui->uld_id == id) {
11031                         if (!(sc->flags & FULL_INIT_DONE)) {
11032                                 rc = adapter_full_init(sc);
11033                                 if (rc != 0)
11034                                         break;
11035                         }
11036
11037                         rc = ui->activate(sc);
11038                         if (rc == 0) {
11039                                 setbit(&sc->active_ulds, id);
11040                                 ui->refcount++;
11041                         }
11042                         break;
11043                 }
11044         }
11045
11046         sx_sunlock(&t4_uld_list_lock);
11047
11048         return (rc);
11049 }
11050
11051 int
11052 t4_deactivate_uld(struct adapter *sc, int id)
11053 {
11054         int rc;
11055         struct uld_info *ui;
11056
11057         ASSERT_SYNCHRONIZED_OP(sc);
11058
11059         if (id < 0 || id > ULD_MAX)
11060                 return (EINVAL);
11061         rc = ENXIO;
11062
11063         sx_slock(&t4_uld_list_lock);
11064
11065         SLIST_FOREACH(ui, &t4_uld_list, link) {
11066                 if (ui->uld_id == id) {
11067                         rc = ui->deactivate(sc);
11068                         if (rc == 0) {
11069                                 clrbit(&sc->active_ulds, id);
11070                                 ui->refcount--;
11071                         }
11072                         break;
11073                 }
11074         }
11075
11076         sx_sunlock(&t4_uld_list_lock);
11077
11078         return (rc);
11079 }
11080
11081 static void
11082 t4_async_event(void *arg, int n)
11083 {
11084         struct uld_info *ui;
11085         struct adapter *sc = (struct adapter *)arg;
11086
11087         if (begin_synchronized_op(sc, NULL, SLEEP_OK | INTR_OK, "t4async") != 0)
11088                 return;
11089         sx_slock(&t4_uld_list_lock);
11090         SLIST_FOREACH(ui, &t4_uld_list, link) {
11091                 if (ui->uld_id == ULD_IWARP) {
11092                         ui->async_event(sc);
11093                         break;
11094                 }
11095         }
11096         sx_sunlock(&t4_uld_list_lock);
11097         end_synchronized_op(sc, 0);
11098 }
11099
11100 int
11101 uld_active(struct adapter *sc, int uld_id)
11102 {
11103
11104         MPASS(uld_id >= 0 && uld_id <= ULD_MAX);
11105
11106         return (isset(&sc->active_ulds, uld_id));
11107 }
11108 #endif
11109
11110 /*
11111  * t  = ptr to tunable.
11112  * nc = number of CPUs.
11113  * c  = compiled in default for that tunable.
11114  */
11115 static void
11116 calculate_nqueues(int *t, int nc, const int c)
11117 {
11118         int nq;
11119
11120         if (*t > 0)
11121                 return;
11122         nq = *t < 0 ? -*t : c;
11123         *t = min(nc, nq);
11124 }
11125
11126 /*
11127  * Come up with reasonable defaults for some of the tunables, provided they're
11128  * not set by the user (in which case we'll use the values as is).
11129  */
11130 static void
11131 tweak_tunables(void)
11132 {
11133         int nc = mp_ncpus;      /* our snapshot of the number of CPUs */
11134
11135         if (t4_ntxq < 1) {
11136 #ifdef RSS
11137                 t4_ntxq = rss_getnumbuckets();
11138 #else
11139                 calculate_nqueues(&t4_ntxq, nc, NTXQ);
11140 #endif
11141         }
11142
11143         calculate_nqueues(&t4_ntxq_vi, nc, NTXQ_VI);
11144
11145         if (t4_nrxq < 1) {
11146 #ifdef RSS
11147                 t4_nrxq = rss_getnumbuckets();
11148 #else
11149                 calculate_nqueues(&t4_nrxq, nc, NRXQ);
11150 #endif
11151         }
11152
11153         calculate_nqueues(&t4_nrxq_vi, nc, NRXQ_VI);
11154
11155 #if defined(TCP_OFFLOAD) || defined(RATELIMIT)
11156         calculate_nqueues(&t4_nofldtxq, nc, NOFLDTXQ);
11157         calculate_nqueues(&t4_nofldtxq_vi, nc, NOFLDTXQ_VI);
11158 #endif
11159 #ifdef TCP_OFFLOAD
11160         calculate_nqueues(&t4_nofldrxq, nc, NOFLDRXQ);
11161         calculate_nqueues(&t4_nofldrxq_vi, nc, NOFLDRXQ_VI);
11162 #endif
11163
11164 #if defined(TCP_OFFLOAD) || defined(KERN_TLS)
11165         if (t4_toecaps_allowed == -1)
11166                 t4_toecaps_allowed = FW_CAPS_CONFIG_TOE;
11167 #else
11168         if (t4_toecaps_allowed == -1)
11169                 t4_toecaps_allowed = 0;
11170 #endif
11171
11172 #ifdef TCP_OFFLOAD
11173         if (t4_rdmacaps_allowed == -1) {
11174                 t4_rdmacaps_allowed = FW_CAPS_CONFIG_RDMA_RDDP |
11175                     FW_CAPS_CONFIG_RDMA_RDMAC;
11176         }
11177
11178         if (t4_iscsicaps_allowed == -1) {
11179                 t4_iscsicaps_allowed = FW_CAPS_CONFIG_ISCSI_INITIATOR_PDU |
11180                     FW_CAPS_CONFIG_ISCSI_TARGET_PDU |
11181                     FW_CAPS_CONFIG_ISCSI_T10DIF;
11182         }
11183
11184         if (t4_tmr_idx_ofld < 0 || t4_tmr_idx_ofld >= SGE_NTIMERS)
11185                 t4_tmr_idx_ofld = TMR_IDX_OFLD;
11186
11187         if (t4_pktc_idx_ofld < -1 || t4_pktc_idx_ofld >= SGE_NCOUNTERS)
11188                 t4_pktc_idx_ofld = PKTC_IDX_OFLD;
11189 #else
11190         if (t4_rdmacaps_allowed == -1)
11191                 t4_rdmacaps_allowed = 0;
11192
11193         if (t4_iscsicaps_allowed == -1)
11194                 t4_iscsicaps_allowed = 0;
11195 #endif
11196
11197 #ifdef DEV_NETMAP
11198         calculate_nqueues(&t4_nnmtxq, nc, NNMTXQ);
11199         calculate_nqueues(&t4_nnmrxq, nc, NNMRXQ);
11200         calculate_nqueues(&t4_nnmtxq_vi, nc, NNMTXQ_VI);
11201         calculate_nqueues(&t4_nnmrxq_vi, nc, NNMRXQ_VI);
11202 #endif
11203
11204         if (t4_tmr_idx < 0 || t4_tmr_idx >= SGE_NTIMERS)
11205                 t4_tmr_idx = TMR_IDX;
11206
11207         if (t4_pktc_idx < -1 || t4_pktc_idx >= SGE_NCOUNTERS)
11208                 t4_pktc_idx = PKTC_IDX;
11209
11210         if (t4_qsize_txq < 128)
11211                 t4_qsize_txq = 128;
11212
11213         if (t4_qsize_rxq < 128)
11214                 t4_qsize_rxq = 128;
11215         while (t4_qsize_rxq & 7)
11216                 t4_qsize_rxq++;
11217
11218         t4_intr_types &= INTR_MSIX | INTR_MSI | INTR_INTX;
11219
11220         /*
11221          * Number of VIs to create per-port.  The first VI is the "main" regular
11222          * VI for the port.  The rest are additional virtual interfaces on the
11223          * same physical port.  Note that the main VI does not have native
11224          * netmap support but the extra VIs do.
11225          *
11226          * Limit the number of VIs per port to the number of available
11227          * MAC addresses per port.
11228          */
11229         if (t4_num_vis < 1)
11230                 t4_num_vis = 1;
11231         if (t4_num_vis > nitems(vi_mac_funcs)) {
11232                 t4_num_vis = nitems(vi_mac_funcs);
11233                 printf("cxgbe: number of VIs limited to %d\n", t4_num_vis);
11234         }
11235
11236         if (pcie_relaxed_ordering < 0 || pcie_relaxed_ordering > 2) {
11237                 pcie_relaxed_ordering = 1;
11238 #if defined(__i386__) || defined(__amd64__)
11239                 if (cpu_vendor_id == CPU_VENDOR_INTEL)
11240                         pcie_relaxed_ordering = 0;
11241 #endif
11242         }
11243 }
11244
11245 #ifdef DDB
11246 static void
11247 t4_dump_tcb(struct adapter *sc, int tid)
11248 {
11249         uint32_t base, i, j, off, pf, reg, save, tcb_addr, win_pos;
11250
11251         reg = PCIE_MEM_ACCESS_REG(A_PCIE_MEM_ACCESS_OFFSET, 2);
11252         save = t4_read_reg(sc, reg);
11253         base = sc->memwin[2].mw_base;
11254
11255         /* Dump TCB for the tid */
11256         tcb_addr = t4_read_reg(sc, A_TP_CMM_TCB_BASE);
11257         tcb_addr += tid * TCB_SIZE;
11258
11259         if (is_t4(sc)) {
11260                 pf = 0;
11261                 win_pos = tcb_addr & ~0xf;      /* start must be 16B aligned */
11262         } else {
11263                 pf = V_PFNUM(sc->pf);
11264                 win_pos = tcb_addr & ~0x7f;     /* start must be 128B aligned */
11265         }
11266         t4_write_reg(sc, reg, win_pos | pf);
11267         t4_read_reg(sc, reg);
11268
11269         off = tcb_addr - win_pos;
11270         for (i = 0; i < 4; i++) {
11271                 uint32_t buf[8];
11272                 for (j = 0; j < 8; j++, off += 4)
11273                         buf[j] = htonl(t4_read_reg(sc, base + off));
11274
11275                 db_printf("%08x %08x %08x %08x %08x %08x %08x %08x\n",
11276                     buf[0], buf[1], buf[2], buf[3], buf[4], buf[5], buf[6],
11277                     buf[7]);
11278         }
11279
11280         t4_write_reg(sc, reg, save);
11281         t4_read_reg(sc, reg);
11282 }
11283
11284 static void
11285 t4_dump_devlog(struct adapter *sc)
11286 {
11287         struct devlog_params *dparams = &sc->params.devlog;
11288         struct fw_devlog_e e;
11289         int i, first, j, m, nentries, rc;
11290         uint64_t ftstamp = UINT64_MAX;
11291
11292         if (dparams->start == 0) {
11293                 db_printf("devlog params not valid\n");
11294                 return;
11295         }
11296
11297         nentries = dparams->size / sizeof(struct fw_devlog_e);
11298         m = fwmtype_to_hwmtype(dparams->memtype);
11299
11300         /* Find the first entry. */
11301         first = -1;
11302         for (i = 0; i < nentries && !db_pager_quit; i++) {
11303                 rc = -t4_mem_read(sc, m, dparams->start + i * sizeof(e),
11304                     sizeof(e), (void *)&e);
11305                 if (rc != 0)
11306                         break;
11307
11308                 if (e.timestamp == 0)
11309                         break;
11310
11311                 e.timestamp = be64toh(e.timestamp);
11312                 if (e.timestamp < ftstamp) {
11313                         ftstamp = e.timestamp;
11314                         first = i;
11315                 }
11316         }
11317
11318         if (first == -1)
11319                 return;
11320
11321         i = first;
11322         do {
11323                 rc = -t4_mem_read(sc, m, dparams->start + i * sizeof(e),
11324                     sizeof(e), (void *)&e);
11325                 if (rc != 0)
11326                         return;
11327
11328                 if (e.timestamp == 0)
11329                         return;
11330
11331                 e.timestamp = be64toh(e.timestamp);
11332                 e.seqno = be32toh(e.seqno);
11333                 for (j = 0; j < 8; j++)
11334                         e.params[j] = be32toh(e.params[j]);
11335
11336                 db_printf("%10d  %15ju  %8s  %8s  ",
11337                     e.seqno, e.timestamp,
11338                     (e.level < nitems(devlog_level_strings) ?
11339                         devlog_level_strings[e.level] : "UNKNOWN"),
11340                     (e.facility < nitems(devlog_facility_strings) ?
11341                         devlog_facility_strings[e.facility] : "UNKNOWN"));
11342                 db_printf(e.fmt, e.params[0], e.params[1], e.params[2],
11343                     e.params[3], e.params[4], e.params[5], e.params[6],
11344                     e.params[7]);
11345
11346                 if (++i == nentries)
11347                         i = 0;
11348         } while (i != first && !db_pager_quit);
11349 }
11350
11351 static struct command_table db_t4_table = LIST_HEAD_INITIALIZER(db_t4_table);
11352 _DB_SET(_show, t4, NULL, db_show_table, 0, &db_t4_table);
11353
11354 DB_FUNC(devlog, db_show_devlog, db_t4_table, CS_OWN, NULL)
11355 {
11356         device_t dev;
11357         int t;
11358         bool valid;
11359
11360         valid = false;
11361         t = db_read_token();
11362         if (t == tIDENT) {
11363                 dev = device_lookup_by_name(db_tok_string);
11364                 valid = true;
11365         }
11366         db_skip_to_eol();
11367         if (!valid) {
11368                 db_printf("usage: show t4 devlog <nexus>\n");
11369                 return;
11370         }
11371
11372         if (dev == NULL) {
11373                 db_printf("device not found\n");
11374                 return;
11375         }
11376
11377         t4_dump_devlog(device_get_softc(dev));
11378 }
11379
11380 DB_FUNC(tcb, db_show_t4tcb, db_t4_table, CS_OWN, NULL)
11381 {
11382         device_t dev;
11383         int radix, tid, t;
11384         bool valid;
11385
11386         valid = false;
11387         radix = db_radix;
11388         db_radix = 10;
11389         t = db_read_token();
11390         if (t == tIDENT) {
11391                 dev = device_lookup_by_name(db_tok_string);
11392                 t = db_read_token();
11393                 if (t == tNUMBER) {
11394                         tid = db_tok_number;
11395                         valid = true;
11396                 }
11397         }       
11398         db_radix = radix;
11399         db_skip_to_eol();
11400         if (!valid) {
11401                 db_printf("usage: show t4 tcb <nexus> <tid>\n");
11402                 return;
11403         }
11404
11405         if (dev == NULL) {
11406                 db_printf("device not found\n");
11407                 return;
11408         }
11409         if (tid < 0) {
11410                 db_printf("invalid tid\n");
11411                 return;
11412         }
11413
11414         t4_dump_tcb(device_get_softc(dev), tid);
11415 }
11416 #endif
11417
11418 static eventhandler_tag vxlan_start_evtag;
11419 static eventhandler_tag vxlan_stop_evtag;
11420
11421 struct vxlan_evargs {
11422         struct ifnet *ifp;
11423         uint16_t port;
11424 };
11425
11426 static void
11427 t4_vxlan_start(struct adapter *sc, void *arg)
11428 {
11429         struct vxlan_evargs *v = arg;
11430         struct port_info *pi;
11431         uint8_t match_all_mac[ETHER_ADDR_LEN] = {0};
11432         int i, rc;
11433
11434         if (sc->nrawf == 0 || chip_id(sc) <= CHELSIO_T5)
11435                 return;
11436         if (begin_synchronized_op(sc, NULL, SLEEP_OK | INTR_OK, "t4vxst") != 0)
11437                 return;
11438
11439         if (sc->vxlan_refcount == 0) {
11440                 sc->vxlan_port = v->port;
11441                 sc->vxlan_refcount = 1;
11442                 t4_write_reg(sc, A_MPS_RX_VXLAN_TYPE,
11443                     V_VXLAN(v->port) | F_VXLAN_EN);
11444                 for_each_port(sc, i) {
11445                         pi = sc->port[i];
11446                         if (pi->vxlan_tcam_entry == true)
11447                                 continue;
11448                         rc = t4_alloc_raw_mac_filt(sc, pi->vi[0].viid,
11449                             match_all_mac, match_all_mac,
11450                             sc->rawf_base + pi->port_id, 1, pi->port_id, true);
11451                         if (rc < 0) {
11452                                 rc = -rc;
11453                                 log(LOG_ERR,
11454                                     "%s: failed to add VXLAN TCAM entry: %d.\n",
11455                                     device_get_name(pi->vi[0].dev), rc);
11456                         } else {
11457                                 MPASS(rc == sc->rawf_base + pi->port_id);
11458                                 rc = 0;
11459                                 pi->vxlan_tcam_entry = true;
11460                         }
11461                 }
11462         } else if (sc->vxlan_port == v->port) {
11463                 sc->vxlan_refcount++;
11464         } else {
11465                 log(LOG_ERR, "%s: VXLAN already configured on port  %d; "
11466                     "ignoring attempt to configure it on port %d\n",
11467                     device_get_nameunit(sc->dev), sc->vxlan_port, v->port);
11468         }
11469         end_synchronized_op(sc, 0);
11470 }
11471
11472 static void
11473 t4_vxlan_stop(struct adapter *sc, void *arg)
11474 {
11475         struct vxlan_evargs *v = arg;
11476
11477         if (sc->nrawf == 0 || chip_id(sc) <= CHELSIO_T5)
11478                 return;
11479         if (begin_synchronized_op(sc, NULL, SLEEP_OK | INTR_OK, "t4vxsp") != 0)
11480                 return;
11481
11482         /*
11483          * VXLANs may have been configured before the driver was loaded so we
11484          * may see more stops than starts.  This is not handled cleanly but at
11485          * least we keep the refcount sane.
11486          */
11487         if (sc->vxlan_port != v->port)
11488                 goto done;
11489         if (sc->vxlan_refcount == 0) {
11490                 log(LOG_ERR,
11491                     "%s: VXLAN operation on port %d was stopped earlier; "
11492                     "ignoring attempt to stop it again.\n",
11493                     device_get_nameunit(sc->dev), sc->vxlan_port);
11494         } else if (--sc->vxlan_refcount == 0) {
11495                 t4_set_reg_field(sc, A_MPS_RX_VXLAN_TYPE, F_VXLAN_EN, 0);
11496         }
11497 done:
11498         end_synchronized_op(sc, 0);
11499 }
11500
11501 static void
11502 t4_vxlan_start_handler(void *arg __unused, struct ifnet *ifp,
11503     sa_family_t family, u_int port)
11504 {
11505         struct vxlan_evargs v;
11506
11507         MPASS(family == AF_INET || family == AF_INET6);
11508         v.ifp = ifp;
11509         v.port = port;
11510
11511         t4_iterate(t4_vxlan_start, &v);
11512 }
11513
11514 static void
11515 t4_vxlan_stop_handler(void *arg __unused, struct ifnet *ifp, sa_family_t family,
11516     u_int port)
11517 {
11518         struct vxlan_evargs v;
11519
11520         MPASS(family == AF_INET || family == AF_INET6);
11521         v.ifp = ifp;
11522         v.port = port;
11523
11524         t4_iterate(t4_vxlan_stop, &v);
11525 }
11526
11527
11528 static struct sx mlu;   /* mod load unload */
11529 SX_SYSINIT(cxgbe_mlu, &mlu, "cxgbe mod load/unload");
11530
11531 static int
11532 mod_event(module_t mod, int cmd, void *arg)
11533 {
11534         int rc = 0;
11535         static int loaded = 0;
11536
11537         switch (cmd) {
11538         case MOD_LOAD:
11539                 sx_xlock(&mlu);
11540                 if (loaded++ == 0) {
11541                         t4_sge_modload();
11542                         t4_register_shared_cpl_handler(CPL_SET_TCB_RPL,
11543                             t4_filter_rpl, CPL_COOKIE_FILTER);
11544                         t4_register_shared_cpl_handler(CPL_L2T_WRITE_RPL,
11545                             do_l2t_write_rpl, CPL_COOKIE_FILTER);
11546                         t4_register_shared_cpl_handler(CPL_ACT_OPEN_RPL,
11547                             t4_hashfilter_ao_rpl, CPL_COOKIE_HASHFILTER);
11548                         t4_register_shared_cpl_handler(CPL_SET_TCB_RPL,
11549                             t4_hashfilter_tcb_rpl, CPL_COOKIE_HASHFILTER);
11550                         t4_register_shared_cpl_handler(CPL_ABORT_RPL_RSS,
11551                             t4_del_hashfilter_rpl, CPL_COOKIE_HASHFILTER);
11552                         t4_register_cpl_handler(CPL_TRACE_PKT, t4_trace_pkt);
11553                         t4_register_cpl_handler(CPL_T5_TRACE_PKT, t5_trace_pkt);
11554                         t4_register_cpl_handler(CPL_SMT_WRITE_RPL,
11555                             do_smt_write_rpl);
11556                         sx_init(&t4_list_lock, "T4/T5 adapters");
11557                         SLIST_INIT(&t4_list);
11558                         callout_init(&fatal_callout, 1);
11559 #ifdef TCP_OFFLOAD
11560                         sx_init(&t4_uld_list_lock, "T4/T5 ULDs");
11561                         SLIST_INIT(&t4_uld_list);
11562 #endif
11563 #ifdef INET6
11564                         t4_clip_modload();
11565 #endif
11566 #ifdef KERN_TLS
11567                         t6_ktls_modload();
11568 #endif
11569                         t4_tracer_modload();
11570                         tweak_tunables();
11571                         vxlan_start_evtag =
11572                             EVENTHANDLER_REGISTER(vxlan_start,
11573                                 t4_vxlan_start_handler, NULL,
11574                                 EVENTHANDLER_PRI_ANY);
11575                         vxlan_stop_evtag =
11576                             EVENTHANDLER_REGISTER(vxlan_stop,
11577                                 t4_vxlan_stop_handler, NULL,
11578                                 EVENTHANDLER_PRI_ANY);
11579                 }
11580                 sx_xunlock(&mlu);
11581                 break;
11582
11583         case MOD_UNLOAD:
11584                 sx_xlock(&mlu);
11585                 if (--loaded == 0) {
11586                         int tries;
11587
11588                         sx_slock(&t4_list_lock);
11589                         if (!SLIST_EMPTY(&t4_list)) {
11590                                 rc = EBUSY;
11591                                 sx_sunlock(&t4_list_lock);
11592                                 goto done_unload;
11593                         }
11594 #ifdef TCP_OFFLOAD
11595                         sx_slock(&t4_uld_list_lock);
11596                         if (!SLIST_EMPTY(&t4_uld_list)) {
11597                                 rc = EBUSY;
11598                                 sx_sunlock(&t4_uld_list_lock);
11599                                 sx_sunlock(&t4_list_lock);
11600                                 goto done_unload;
11601                         }
11602 #endif
11603                         tries = 0;
11604                         while (tries++ < 5 && t4_sge_extfree_refs() != 0) {
11605                                 uprintf("%ju clusters with custom free routine "
11606                                     "still is use.\n", t4_sge_extfree_refs());
11607                                 pause("t4unload", 2 * hz);
11608                         }
11609 #ifdef TCP_OFFLOAD
11610                         sx_sunlock(&t4_uld_list_lock);
11611 #endif
11612                         sx_sunlock(&t4_list_lock);
11613
11614                         if (t4_sge_extfree_refs() == 0) {
11615                                 EVENTHANDLER_DEREGISTER(vxlan_start,
11616                                     vxlan_start_evtag);
11617                                 EVENTHANDLER_DEREGISTER(vxlan_stop,
11618                                     vxlan_stop_evtag);
11619                                 t4_tracer_modunload();
11620 #ifdef KERN_TLS
11621                                 t6_ktls_modunload();
11622 #endif
11623 #ifdef INET6
11624                                 t4_clip_modunload();
11625 #endif
11626 #ifdef TCP_OFFLOAD
11627                                 sx_destroy(&t4_uld_list_lock);
11628 #endif
11629                                 sx_destroy(&t4_list_lock);
11630                                 t4_sge_modunload();
11631                                 loaded = 0;
11632                         } else {
11633                                 rc = EBUSY;
11634                                 loaded++;       /* undo earlier decrement */
11635                         }
11636                 }
11637 done_unload:
11638                 sx_xunlock(&mlu);
11639                 break;
11640         }
11641
11642         return (rc);
11643 }
11644
11645 static devclass_t t4_devclass, t5_devclass, t6_devclass;
11646 static devclass_t cxgbe_devclass, cxl_devclass, cc_devclass;
11647 static devclass_t vcxgbe_devclass, vcxl_devclass, vcc_devclass;
11648
11649 DRIVER_MODULE(t4nex, pci, t4_driver, t4_devclass, mod_event, 0);
11650 MODULE_VERSION(t4nex, 1);
11651 MODULE_DEPEND(t4nex, firmware, 1, 1, 1);
11652 #ifdef DEV_NETMAP
11653 MODULE_DEPEND(t4nex, netmap, 1, 1, 1);
11654 #endif /* DEV_NETMAP */
11655
11656 DRIVER_MODULE(t5nex, pci, t5_driver, t5_devclass, mod_event, 0);
11657 MODULE_VERSION(t5nex, 1);
11658 MODULE_DEPEND(t5nex, firmware, 1, 1, 1);
11659 #ifdef DEV_NETMAP
11660 MODULE_DEPEND(t5nex, netmap, 1, 1, 1);
11661 #endif /* DEV_NETMAP */
11662
11663 DRIVER_MODULE(t6nex, pci, t6_driver, t6_devclass, mod_event, 0);
11664 MODULE_VERSION(t6nex, 1);
11665 MODULE_DEPEND(t6nex, firmware, 1, 1, 1);
11666 #ifdef DEV_NETMAP
11667 MODULE_DEPEND(t6nex, netmap, 1, 1, 1);
11668 #endif /* DEV_NETMAP */
11669
11670 DRIVER_MODULE(cxgbe, t4nex, cxgbe_driver, cxgbe_devclass, 0, 0);
11671 MODULE_VERSION(cxgbe, 1);
11672
11673 DRIVER_MODULE(cxl, t5nex, cxl_driver, cxl_devclass, 0, 0);
11674 MODULE_VERSION(cxl, 1);
11675
11676 DRIVER_MODULE(cc, t6nex, cc_driver, cc_devclass, 0, 0);
11677 MODULE_VERSION(cc, 1);
11678
11679 DRIVER_MODULE(vcxgbe, cxgbe, vcxgbe_driver, vcxgbe_devclass, 0, 0);
11680 MODULE_VERSION(vcxgbe, 1);
11681
11682 DRIVER_MODULE(vcxl, cxl, vcxl_driver, vcxl_devclass, 0, 0);
11683 MODULE_VERSION(vcxl, 1);
11684
11685 DRIVER_MODULE(vcc, cc, vcc_driver, vcc_devclass, 0, 0);
11686 MODULE_VERSION(vcc, 1);