]> CyberLeo.Net >> Repos - FreeBSD/FreeBSD.git/blob - sys/dev/cxgbe/t4_main.c
MFV r344364:
[FreeBSD/FreeBSD.git] / sys / dev / cxgbe / t4_main.c
1 /*-
2  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
3  *
4  * Copyright (c) 2011 Chelsio Communications, Inc.
5  * All rights reserved.
6  * Written by: Navdeep Parhar <np@FreeBSD.org>
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
18  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
21  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27  * SUCH DAMAGE.
28  */
29
30 #include <sys/cdefs.h>
31 __FBSDID("$FreeBSD$");
32
33 #include "opt_ddb.h"
34 #include "opt_inet.h"
35 #include "opt_inet6.h"
36 #include "opt_ratelimit.h"
37 #include "opt_rss.h"
38
39 #include <sys/param.h>
40 #include <sys/conf.h>
41 #include <sys/priv.h>
42 #include <sys/kernel.h>
43 #include <sys/bus.h>
44 #include <sys/module.h>
45 #include <sys/malloc.h>
46 #include <sys/queue.h>
47 #include <sys/taskqueue.h>
48 #include <sys/pciio.h>
49 #include <dev/pci/pcireg.h>
50 #include <dev/pci/pcivar.h>
51 #include <dev/pci/pci_private.h>
52 #include <sys/firmware.h>
53 #include <sys/sbuf.h>
54 #include <sys/smp.h>
55 #include <sys/socket.h>
56 #include <sys/sockio.h>
57 #include <sys/sysctl.h>
58 #include <net/ethernet.h>
59 #include <net/if.h>
60 #include <net/if_types.h>
61 #include <net/if_dl.h>
62 #include <net/if_vlan_var.h>
63 #ifdef RSS
64 #include <net/rss_config.h>
65 #endif
66 #include <netinet/in.h>
67 #include <netinet/ip.h>
68 #if defined(__i386__) || defined(__amd64__)
69 #include <machine/md_var.h>
70 #include <machine/cputypes.h>
71 #include <vm/vm.h>
72 #include <vm/pmap.h>
73 #endif
74 #include <crypto/rijndael/rijndael.h>
75 #ifdef DDB
76 #include <ddb/ddb.h>
77 #include <ddb/db_lex.h>
78 #endif
79
80 #include "common/common.h"
81 #include "common/t4_msg.h"
82 #include "common/t4_regs.h"
83 #include "common/t4_regs_values.h"
84 #include "cudbg/cudbg.h"
85 #include "t4_clip.h"
86 #include "t4_ioctl.h"
87 #include "t4_l2t.h"
88 #include "t4_mp_ring.h"
89 #include "t4_if.h"
90 #include "t4_smt.h"
91
92 /* T4 bus driver interface */
93 static int t4_probe(device_t);
94 static int t4_attach(device_t);
95 static int t4_detach(device_t);
96 static int t4_child_location_str(device_t, device_t, char *, size_t);
97 static int t4_ready(device_t);
98 static int t4_read_port_device(device_t, int, device_t *);
99 static device_method_t t4_methods[] = {
100         DEVMETHOD(device_probe,         t4_probe),
101         DEVMETHOD(device_attach,        t4_attach),
102         DEVMETHOD(device_detach,        t4_detach),
103
104         DEVMETHOD(bus_child_location_str, t4_child_location_str),
105
106         DEVMETHOD(t4_is_main_ready,     t4_ready),
107         DEVMETHOD(t4_read_port_device,  t4_read_port_device),
108
109         DEVMETHOD_END
110 };
111 static driver_t t4_driver = {
112         "t4nex",
113         t4_methods,
114         sizeof(struct adapter)
115 };
116
117
118 /* T4 port (cxgbe) interface */
119 static int cxgbe_probe(device_t);
120 static int cxgbe_attach(device_t);
121 static int cxgbe_detach(device_t);
122 device_method_t cxgbe_methods[] = {
123         DEVMETHOD(device_probe,         cxgbe_probe),
124         DEVMETHOD(device_attach,        cxgbe_attach),
125         DEVMETHOD(device_detach,        cxgbe_detach),
126         { 0, 0 }
127 };
128 static driver_t cxgbe_driver = {
129         "cxgbe",
130         cxgbe_methods,
131         sizeof(struct port_info)
132 };
133
134 /* T4 VI (vcxgbe) interface */
135 static int vcxgbe_probe(device_t);
136 static int vcxgbe_attach(device_t);
137 static int vcxgbe_detach(device_t);
138 static device_method_t vcxgbe_methods[] = {
139         DEVMETHOD(device_probe,         vcxgbe_probe),
140         DEVMETHOD(device_attach,        vcxgbe_attach),
141         DEVMETHOD(device_detach,        vcxgbe_detach),
142         { 0, 0 }
143 };
144 static driver_t vcxgbe_driver = {
145         "vcxgbe",
146         vcxgbe_methods,
147         sizeof(struct vi_info)
148 };
149
150 static d_ioctl_t t4_ioctl;
151
152 static struct cdevsw t4_cdevsw = {
153        .d_version = D_VERSION,
154        .d_ioctl = t4_ioctl,
155        .d_name = "t4nex",
156 };
157
158 /* T5 bus driver interface */
159 static int t5_probe(device_t);
160 static device_method_t t5_methods[] = {
161         DEVMETHOD(device_probe,         t5_probe),
162         DEVMETHOD(device_attach,        t4_attach),
163         DEVMETHOD(device_detach,        t4_detach),
164
165         DEVMETHOD(bus_child_location_str, t4_child_location_str),
166
167         DEVMETHOD(t4_is_main_ready,     t4_ready),
168         DEVMETHOD(t4_read_port_device,  t4_read_port_device),
169
170         DEVMETHOD_END
171 };
172 static driver_t t5_driver = {
173         "t5nex",
174         t5_methods,
175         sizeof(struct adapter)
176 };
177
178
179 /* T5 port (cxl) interface */
180 static driver_t cxl_driver = {
181         "cxl",
182         cxgbe_methods,
183         sizeof(struct port_info)
184 };
185
186 /* T5 VI (vcxl) interface */
187 static driver_t vcxl_driver = {
188         "vcxl",
189         vcxgbe_methods,
190         sizeof(struct vi_info)
191 };
192
193 /* T6 bus driver interface */
194 static int t6_probe(device_t);
195 static device_method_t t6_methods[] = {
196         DEVMETHOD(device_probe,         t6_probe),
197         DEVMETHOD(device_attach,        t4_attach),
198         DEVMETHOD(device_detach,        t4_detach),
199
200         DEVMETHOD(bus_child_location_str, t4_child_location_str),
201
202         DEVMETHOD(t4_is_main_ready,     t4_ready),
203         DEVMETHOD(t4_read_port_device,  t4_read_port_device),
204
205         DEVMETHOD_END
206 };
207 static driver_t t6_driver = {
208         "t6nex",
209         t6_methods,
210         sizeof(struct adapter)
211 };
212
213
214 /* T6 port (cc) interface */
215 static driver_t cc_driver = {
216         "cc",
217         cxgbe_methods,
218         sizeof(struct port_info)
219 };
220
221 /* T6 VI (vcc) interface */
222 static driver_t vcc_driver = {
223         "vcc",
224         vcxgbe_methods,
225         sizeof(struct vi_info)
226 };
227
228 /* ifnet interface */
229 static void cxgbe_init(void *);
230 static int cxgbe_ioctl(struct ifnet *, unsigned long, caddr_t);
231 static int cxgbe_transmit(struct ifnet *, struct mbuf *);
232 static void cxgbe_qflush(struct ifnet *);
233
234 MALLOC_DEFINE(M_CXGBE, "cxgbe", "Chelsio T4/T5 Ethernet driver and services");
235
236 /*
237  * Correct lock order when you need to acquire multiple locks is t4_list_lock,
238  * then ADAPTER_LOCK, then t4_uld_list_lock.
239  */
240 static struct sx t4_list_lock;
241 SLIST_HEAD(, adapter) t4_list;
242 #ifdef TCP_OFFLOAD
243 static struct sx t4_uld_list_lock;
244 SLIST_HEAD(, uld_info) t4_uld_list;
245 #endif
246
247 /*
248  * Tunables.  See tweak_tunables() too.
249  *
250  * Each tunable is set to a default value here if it's known at compile-time.
251  * Otherwise it is set to -n as an indication to tweak_tunables() that it should
252  * provide a reasonable default (upto n) when the driver is loaded.
253  *
254  * Tunables applicable to both T4 and T5 are under hw.cxgbe.  Those specific to
255  * T5 are under hw.cxl.
256  */
257 SYSCTL_NODE(_hw, OID_AUTO, cxgbe, CTLFLAG_RD, 0, "cxgbe(4) parameters");
258 SYSCTL_NODE(_hw, OID_AUTO, cxl, CTLFLAG_RD, 0, "cxgbe(4) T5+ parameters");
259 SYSCTL_NODE(_hw_cxgbe, OID_AUTO, toe, CTLFLAG_RD, 0, "cxgbe(4) TOE parameters");
260
261 /*
262  * Number of queues for tx and rx, NIC and offload.
263  */
264 #define NTXQ 16
265 int t4_ntxq = -NTXQ;
266 SYSCTL_INT(_hw_cxgbe, OID_AUTO, ntxq, CTLFLAG_RDTUN, &t4_ntxq, 0,
267     "Number of TX queues per port");
268 TUNABLE_INT("hw.cxgbe.ntxq10g", &t4_ntxq);      /* Old name, undocumented */
269
270 #define NRXQ 8
271 int t4_nrxq = -NRXQ;
272 SYSCTL_INT(_hw_cxgbe, OID_AUTO, nrxq, CTLFLAG_RDTUN, &t4_nrxq, 0,
273     "Number of RX queues per port");
274 TUNABLE_INT("hw.cxgbe.nrxq10g", &t4_nrxq);      /* Old name, undocumented */
275
276 #define NTXQ_VI 1
277 static int t4_ntxq_vi = -NTXQ_VI;
278 SYSCTL_INT(_hw_cxgbe, OID_AUTO, ntxq_vi, CTLFLAG_RDTUN, &t4_ntxq_vi, 0,
279     "Number of TX queues per VI");
280
281 #define NRXQ_VI 1
282 static int t4_nrxq_vi = -NRXQ_VI;
283 SYSCTL_INT(_hw_cxgbe, OID_AUTO, nrxq_vi, CTLFLAG_RDTUN, &t4_nrxq_vi, 0,
284     "Number of RX queues per VI");
285
286 static int t4_rsrv_noflowq = 0;
287 SYSCTL_INT(_hw_cxgbe, OID_AUTO, rsrv_noflowq, CTLFLAG_RDTUN, &t4_rsrv_noflowq,
288     0, "Reserve TX queue 0 of each VI for non-flowid packets");
289
290 #if defined(TCP_OFFLOAD) || defined(RATELIMIT)
291 #define NOFLDTXQ 8
292 static int t4_nofldtxq = -NOFLDTXQ;
293 SYSCTL_INT(_hw_cxgbe, OID_AUTO, nofldtxq, CTLFLAG_RDTUN, &t4_nofldtxq, 0,
294     "Number of offload TX queues per port");
295
296 #define NOFLDRXQ 2
297 static int t4_nofldrxq = -NOFLDRXQ;
298 SYSCTL_INT(_hw_cxgbe, OID_AUTO, nofldrxq, CTLFLAG_RDTUN, &t4_nofldrxq, 0,
299     "Number of offload RX queues per port");
300
301 #define NOFLDTXQ_VI 1
302 static int t4_nofldtxq_vi = -NOFLDTXQ_VI;
303 SYSCTL_INT(_hw_cxgbe, OID_AUTO, nofldtxq_vi, CTLFLAG_RDTUN, &t4_nofldtxq_vi, 0,
304     "Number of offload TX queues per VI");
305
306 #define NOFLDRXQ_VI 1
307 static int t4_nofldrxq_vi = -NOFLDRXQ_VI;
308 SYSCTL_INT(_hw_cxgbe, OID_AUTO, nofldrxq_vi, CTLFLAG_RDTUN, &t4_nofldrxq_vi, 0,
309     "Number of offload RX queues per VI");
310
311 #define TMR_IDX_OFLD 1
312 int t4_tmr_idx_ofld = TMR_IDX_OFLD;
313 SYSCTL_INT(_hw_cxgbe, OID_AUTO, holdoff_timer_idx_ofld, CTLFLAG_RDTUN,
314     &t4_tmr_idx_ofld, 0, "Holdoff timer index for offload queues");
315
316 #define PKTC_IDX_OFLD (-1)
317 int t4_pktc_idx_ofld = PKTC_IDX_OFLD;
318 SYSCTL_INT(_hw_cxgbe, OID_AUTO, holdoff_pktc_idx_ofld, CTLFLAG_RDTUN,
319     &t4_pktc_idx_ofld, 0, "holdoff packet counter index for offload queues");
320
321 /* 0 means chip/fw default, non-zero number is value in microseconds */
322 static u_long t4_toe_keepalive_idle = 0;
323 SYSCTL_ULONG(_hw_cxgbe_toe, OID_AUTO, keepalive_idle, CTLFLAG_RDTUN,
324     &t4_toe_keepalive_idle, 0, "TOE keepalive idle timer (us)");
325
326 /* 0 means chip/fw default, non-zero number is value in microseconds */
327 static u_long t4_toe_keepalive_interval = 0;
328 SYSCTL_ULONG(_hw_cxgbe_toe, OID_AUTO, keepalive_interval, CTLFLAG_RDTUN,
329     &t4_toe_keepalive_interval, 0, "TOE keepalive interval timer (us)");
330
331 /* 0 means chip/fw default, non-zero number is # of keepalives before abort */
332 static int t4_toe_keepalive_count = 0;
333 SYSCTL_INT(_hw_cxgbe_toe, OID_AUTO, keepalive_count, CTLFLAG_RDTUN,
334     &t4_toe_keepalive_count, 0, "Number of TOE keepalive probes before abort");
335
336 /* 0 means chip/fw default, non-zero number is value in microseconds */
337 static u_long t4_toe_rexmt_min = 0;
338 SYSCTL_ULONG(_hw_cxgbe_toe, OID_AUTO, rexmt_min, CTLFLAG_RDTUN,
339     &t4_toe_rexmt_min, 0, "Minimum TOE retransmit interval (us)");
340
341 /* 0 means chip/fw default, non-zero number is value in microseconds */
342 static u_long t4_toe_rexmt_max = 0;
343 SYSCTL_ULONG(_hw_cxgbe_toe, OID_AUTO, rexmt_max, CTLFLAG_RDTUN,
344     &t4_toe_rexmt_max, 0, "Maximum TOE retransmit interval (us)");
345
346 /* 0 means chip/fw default, non-zero number is # of rexmt before abort */
347 static int t4_toe_rexmt_count = 0;
348 SYSCTL_INT(_hw_cxgbe_toe, OID_AUTO, rexmt_count, CTLFLAG_RDTUN,
349     &t4_toe_rexmt_count, 0, "Number of TOE retransmissions before abort");
350
351 /* -1 means chip/fw default, other values are raw backoff values to use */
352 static int t4_toe_rexmt_backoff[16] = {
353         -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1
354 };
355 SYSCTL_NODE(_hw_cxgbe_toe, OID_AUTO, rexmt_backoff, CTLFLAG_RD, 0,
356     "cxgbe(4) TOE retransmit backoff values");
357 SYSCTL_INT(_hw_cxgbe_toe_rexmt_backoff, OID_AUTO, 0, CTLFLAG_RDTUN,
358     &t4_toe_rexmt_backoff[0], 0, "");
359 SYSCTL_INT(_hw_cxgbe_toe_rexmt_backoff, OID_AUTO, 1, CTLFLAG_RDTUN,
360     &t4_toe_rexmt_backoff[1], 0, "");
361 SYSCTL_INT(_hw_cxgbe_toe_rexmt_backoff, OID_AUTO, 2, CTLFLAG_RDTUN,
362     &t4_toe_rexmt_backoff[2], 0, "");
363 SYSCTL_INT(_hw_cxgbe_toe_rexmt_backoff, OID_AUTO, 3, CTLFLAG_RDTUN,
364     &t4_toe_rexmt_backoff[3], 0, "");
365 SYSCTL_INT(_hw_cxgbe_toe_rexmt_backoff, OID_AUTO, 4, CTLFLAG_RDTUN,
366     &t4_toe_rexmt_backoff[4], 0, "");
367 SYSCTL_INT(_hw_cxgbe_toe_rexmt_backoff, OID_AUTO, 5, CTLFLAG_RDTUN,
368     &t4_toe_rexmt_backoff[5], 0, "");
369 SYSCTL_INT(_hw_cxgbe_toe_rexmt_backoff, OID_AUTO, 6, CTLFLAG_RDTUN,
370     &t4_toe_rexmt_backoff[6], 0, "");
371 SYSCTL_INT(_hw_cxgbe_toe_rexmt_backoff, OID_AUTO, 7, CTLFLAG_RDTUN,
372     &t4_toe_rexmt_backoff[7], 0, "");
373 SYSCTL_INT(_hw_cxgbe_toe_rexmt_backoff, OID_AUTO, 8, CTLFLAG_RDTUN,
374     &t4_toe_rexmt_backoff[8], 0, "");
375 SYSCTL_INT(_hw_cxgbe_toe_rexmt_backoff, OID_AUTO, 9, CTLFLAG_RDTUN,
376     &t4_toe_rexmt_backoff[9], 0, "");
377 SYSCTL_INT(_hw_cxgbe_toe_rexmt_backoff, OID_AUTO, 10, CTLFLAG_RDTUN,
378     &t4_toe_rexmt_backoff[10], 0, "");
379 SYSCTL_INT(_hw_cxgbe_toe_rexmt_backoff, OID_AUTO, 11, CTLFLAG_RDTUN,
380     &t4_toe_rexmt_backoff[11], 0, "");
381 SYSCTL_INT(_hw_cxgbe_toe_rexmt_backoff, OID_AUTO, 12, CTLFLAG_RDTUN,
382     &t4_toe_rexmt_backoff[12], 0, "");
383 SYSCTL_INT(_hw_cxgbe_toe_rexmt_backoff, OID_AUTO, 13, CTLFLAG_RDTUN,
384     &t4_toe_rexmt_backoff[13], 0, "");
385 SYSCTL_INT(_hw_cxgbe_toe_rexmt_backoff, OID_AUTO, 14, CTLFLAG_RDTUN,
386     &t4_toe_rexmt_backoff[14], 0, "");
387 SYSCTL_INT(_hw_cxgbe_toe_rexmt_backoff, OID_AUTO, 15, CTLFLAG_RDTUN,
388     &t4_toe_rexmt_backoff[15], 0, "");
389 #endif
390
391 #ifdef DEV_NETMAP
392 #define NNMTXQ_VI 2
393 static int t4_nnmtxq_vi = -NNMTXQ_VI;
394 SYSCTL_INT(_hw_cxgbe, OID_AUTO, nnmtxq_vi, CTLFLAG_RDTUN, &t4_nnmtxq_vi, 0,
395     "Number of netmap TX queues per VI");
396
397 #define NNMRXQ_VI 2
398 static int t4_nnmrxq_vi = -NNMRXQ_VI;
399 SYSCTL_INT(_hw_cxgbe, OID_AUTO, nnmrxq_vi, CTLFLAG_RDTUN, &t4_nnmrxq_vi, 0,
400     "Number of netmap RX queues per VI");
401 #endif
402
403 /*
404  * Holdoff parameters for ports.
405  */
406 #define TMR_IDX 1
407 int t4_tmr_idx = TMR_IDX;
408 SYSCTL_INT(_hw_cxgbe, OID_AUTO, holdoff_timer_idx, CTLFLAG_RDTUN, &t4_tmr_idx,
409     0, "Holdoff timer index");
410 TUNABLE_INT("hw.cxgbe.holdoff_timer_idx_10G", &t4_tmr_idx);     /* Old name */
411
412 #define PKTC_IDX (-1)
413 int t4_pktc_idx = PKTC_IDX;
414 SYSCTL_INT(_hw_cxgbe, OID_AUTO, holdoff_pktc_idx, CTLFLAG_RDTUN, &t4_pktc_idx,
415     0, "Holdoff packet counter index");
416 TUNABLE_INT("hw.cxgbe.holdoff_pktc_idx_10G", &t4_pktc_idx);     /* Old name */
417
418 /*
419  * Size (# of entries) of each tx and rx queue.
420  */
421 unsigned int t4_qsize_txq = TX_EQ_QSIZE;
422 SYSCTL_INT(_hw_cxgbe, OID_AUTO, qsize_txq, CTLFLAG_RDTUN, &t4_qsize_txq, 0,
423     "Number of descriptors in each TX queue");
424
425 unsigned int t4_qsize_rxq = RX_IQ_QSIZE;
426 SYSCTL_INT(_hw_cxgbe, OID_AUTO, qsize_rxq, CTLFLAG_RDTUN, &t4_qsize_rxq, 0,
427     "Number of descriptors in each RX queue");
428
429 /*
430  * Interrupt types allowed (bits 0, 1, 2 = INTx, MSI, MSI-X respectively).
431  */
432 int t4_intr_types = INTR_MSIX | INTR_MSI | INTR_INTX;
433 SYSCTL_INT(_hw_cxgbe, OID_AUTO, interrupt_types, CTLFLAG_RDTUN, &t4_intr_types,
434     0, "Interrupt types allowed (bit 0 = INTx, 1 = MSI, 2 = MSI-X)");
435
436 /*
437  * Configuration file.  All the _CF names here are special.
438  */
439 #define DEFAULT_CF      "default"
440 #define BUILTIN_CF      "built-in"
441 #define FLASH_CF        "flash"
442 #define UWIRE_CF        "uwire"
443 #define FPGA_CF         "fpga"
444 static char t4_cfg_file[32] = DEFAULT_CF;
445 SYSCTL_STRING(_hw_cxgbe, OID_AUTO, config_file, CTLFLAG_RDTUN, t4_cfg_file,
446     sizeof(t4_cfg_file), "Firmware configuration file");
447
448 /*
449  * PAUSE settings (bit 0, 1, 2 = rx_pause, tx_pause, pause_autoneg respectively).
450  * rx_pause = 1 to heed incoming PAUSE frames, 0 to ignore them.
451  * tx_pause = 1 to emit PAUSE frames when the rx FIFO reaches its high water
452  *            mark or when signalled to do so, 0 to never emit PAUSE.
453  * pause_autoneg = 1 means PAUSE will be negotiated if possible and the
454  *                 negotiated settings will override rx_pause/tx_pause.
455  *                 Otherwise rx_pause/tx_pause are applied forcibly.
456  */
457 static int t4_pause_settings = PAUSE_RX | PAUSE_TX | PAUSE_AUTONEG;
458 SYSCTL_INT(_hw_cxgbe, OID_AUTO, pause_settings, CTLFLAG_RDTUN,
459     &t4_pause_settings, 0,
460     "PAUSE settings (bit 0 = rx_pause, 1 = tx_pause, 2 = pause_autoneg)");
461
462 /*
463  * Forward Error Correction settings (bit 0, 1 = RS, BASER respectively).
464  * -1 to run with the firmware default.  Same as FEC_AUTO (bit 5)
465  *  0 to disable FEC.
466  */
467 static int t4_fec = -1;
468 SYSCTL_INT(_hw_cxgbe, OID_AUTO, fec, CTLFLAG_RDTUN, &t4_fec, 0,
469     "Forward Error Correction (bit 0 = RS, bit 1 = BASER_RS)");
470
471 /*
472  * Link autonegotiation.
473  * -1 to run with the firmware default.
474  *  0 to disable.
475  *  1 to enable.
476  */
477 static int t4_autoneg = -1;
478 SYSCTL_INT(_hw_cxgbe, OID_AUTO, autoneg, CTLFLAG_RDTUN, &t4_autoneg, 0,
479     "Link autonegotiation");
480
481 /*
482  * Firmware auto-install by driver during attach (0, 1, 2 = prohibited, allowed,
483  * encouraged respectively).  '-n' is the same as 'n' except the firmware
484  * version used in the checks is read from the firmware bundled with the driver.
485  */
486 static int t4_fw_install = 1;
487 SYSCTL_INT(_hw_cxgbe, OID_AUTO, fw_install, CTLFLAG_RDTUN, &t4_fw_install, 0,
488     "Firmware auto-install (0 = prohibited, 1 = allowed, 2 = encouraged)");
489
490 /*
491  * ASIC features that will be used.  Disable the ones you don't want so that the
492  * chip resources aren't wasted on features that will not be used.
493  */
494 static int t4_nbmcaps_allowed = 0;
495 SYSCTL_INT(_hw_cxgbe, OID_AUTO, nbmcaps_allowed, CTLFLAG_RDTUN,
496     &t4_nbmcaps_allowed, 0, "Default NBM capabilities");
497
498 static int t4_linkcaps_allowed = 0;     /* No DCBX, PPP, etc. by default */
499 SYSCTL_INT(_hw_cxgbe, OID_AUTO, linkcaps_allowed, CTLFLAG_RDTUN,
500     &t4_linkcaps_allowed, 0, "Default link capabilities");
501
502 static int t4_switchcaps_allowed = FW_CAPS_CONFIG_SWITCH_INGRESS |
503     FW_CAPS_CONFIG_SWITCH_EGRESS;
504 SYSCTL_INT(_hw_cxgbe, OID_AUTO, switchcaps_allowed, CTLFLAG_RDTUN,
505     &t4_switchcaps_allowed, 0, "Default switch capabilities");
506
507 #ifdef RATELIMIT
508 static int t4_niccaps_allowed = FW_CAPS_CONFIG_NIC |
509         FW_CAPS_CONFIG_NIC_HASHFILTER | FW_CAPS_CONFIG_NIC_ETHOFLD;
510 #else
511 static int t4_niccaps_allowed = FW_CAPS_CONFIG_NIC |
512         FW_CAPS_CONFIG_NIC_HASHFILTER;
513 #endif
514 SYSCTL_INT(_hw_cxgbe, OID_AUTO, niccaps_allowed, CTLFLAG_RDTUN,
515     &t4_niccaps_allowed, 0, "Default NIC capabilities");
516
517 static int t4_toecaps_allowed = -1;
518 SYSCTL_INT(_hw_cxgbe, OID_AUTO, toecaps_allowed, CTLFLAG_RDTUN,
519     &t4_toecaps_allowed, 0, "Default TCP offload capabilities");
520
521 static int t4_rdmacaps_allowed = -1;
522 SYSCTL_INT(_hw_cxgbe, OID_AUTO, rdmacaps_allowed, CTLFLAG_RDTUN,
523     &t4_rdmacaps_allowed, 0, "Default RDMA capabilities");
524
525 static int t4_cryptocaps_allowed = -1;
526 SYSCTL_INT(_hw_cxgbe, OID_AUTO, cryptocaps_allowed, CTLFLAG_RDTUN,
527     &t4_cryptocaps_allowed, 0, "Default crypto capabilities");
528
529 static int t4_iscsicaps_allowed = -1;
530 SYSCTL_INT(_hw_cxgbe, OID_AUTO, iscsicaps_allowed, CTLFLAG_RDTUN,
531     &t4_iscsicaps_allowed, 0, "Default iSCSI capabilities");
532
533 static int t4_fcoecaps_allowed = 0;
534 SYSCTL_INT(_hw_cxgbe, OID_AUTO, fcoecaps_allowed, CTLFLAG_RDTUN,
535     &t4_fcoecaps_allowed, 0, "Default FCoE capabilities");
536
537 static int t5_write_combine = 0;
538 SYSCTL_INT(_hw_cxl, OID_AUTO, write_combine, CTLFLAG_RDTUN, &t5_write_combine,
539     0, "Use WC instead of UC for BAR2");
540
541 static int t4_num_vis = 1;
542 SYSCTL_INT(_hw_cxgbe, OID_AUTO, num_vis, CTLFLAG_RDTUN, &t4_num_vis, 0,
543     "Number of VIs per port");
544
545 /*
546  * PCIe Relaxed Ordering.
547  * -1: driver should figure out a good value.
548  * 0: disable RO.
549  * 1: enable RO.
550  * 2: leave RO alone.
551  */
552 static int pcie_relaxed_ordering = -1;
553 SYSCTL_INT(_hw_cxgbe, OID_AUTO, pcie_relaxed_ordering, CTLFLAG_RDTUN,
554     &pcie_relaxed_ordering, 0,
555     "PCIe Relaxed Ordering: 0 = disable, 1 = enable, 2 = leave alone");
556
557 static int t4_panic_on_fatal_err = 0;
558 SYSCTL_INT(_hw_cxgbe, OID_AUTO, panic_on_fatal_err, CTLFLAG_RDTUN,
559     &t4_panic_on_fatal_err, 0, "panic on fatal errors");
560
561 #ifdef TCP_OFFLOAD
562 /*
563  * TOE tunables.
564  */
565 static int t4_cop_managed_offloading = 0;
566 SYSCTL_INT(_hw_cxgbe, OID_AUTO, cop_managed_offloading, CTLFLAG_RDTUN,
567     &t4_cop_managed_offloading, 0,
568     "COP (Connection Offload Policy) controls all TOE offload");
569 #endif
570
571 /* Functions used by VIs to obtain unique MAC addresses for each VI. */
572 static int vi_mac_funcs[] = {
573         FW_VI_FUNC_ETH,
574         FW_VI_FUNC_OFLD,
575         FW_VI_FUNC_IWARP,
576         FW_VI_FUNC_OPENISCSI,
577         FW_VI_FUNC_OPENFCOE,
578         FW_VI_FUNC_FOISCSI,
579         FW_VI_FUNC_FOFCOE,
580 };
581
582 struct intrs_and_queues {
583         uint16_t intr_type;     /* INTx, MSI, or MSI-X */
584         uint16_t num_vis;       /* number of VIs for each port */
585         uint16_t nirq;          /* Total # of vectors */
586         uint16_t ntxq;          /* # of NIC txq's for each port */
587         uint16_t nrxq;          /* # of NIC rxq's for each port */
588         uint16_t nofldtxq;      /* # of TOE/ETHOFLD txq's for each port */
589         uint16_t nofldrxq;      /* # of TOE rxq's for each port */
590
591         /* The vcxgbe/vcxl interfaces use these and not the ones above. */
592         uint16_t ntxq_vi;       /* # of NIC txq's */
593         uint16_t nrxq_vi;       /* # of NIC rxq's */
594         uint16_t nofldtxq_vi;   /* # of TOE txq's */
595         uint16_t nofldrxq_vi;   /* # of TOE rxq's */
596         uint16_t nnmtxq_vi;     /* # of netmap txq's */
597         uint16_t nnmrxq_vi;     /* # of netmap rxq's */
598 };
599
600 static void setup_memwin(struct adapter *);
601 static void position_memwin(struct adapter *, int, uint32_t);
602 static int validate_mem_range(struct adapter *, uint32_t, uint32_t);
603 static int fwmtype_to_hwmtype(int);
604 static int validate_mt_off_len(struct adapter *, int, uint32_t, uint32_t,
605     uint32_t *);
606 static int fixup_devlog_params(struct adapter *);
607 static int cfg_itype_and_nqueues(struct adapter *, struct intrs_and_queues *);
608 static int contact_firmware(struct adapter *);
609 static int partition_resources(struct adapter *);
610 static int get_params__pre_init(struct adapter *);
611 static int get_params__post_init(struct adapter *);
612 static int set_params__post_init(struct adapter *);
613 static void t4_set_desc(struct adapter *);
614 static bool fixed_ifmedia(struct port_info *);
615 static void build_medialist(struct port_info *);
616 static void init_link_config(struct port_info *);
617 static int fixup_link_config(struct port_info *);
618 static int apply_link_config(struct port_info *);
619 static int cxgbe_init_synchronized(struct vi_info *);
620 static int cxgbe_uninit_synchronized(struct vi_info *);
621 static void quiesce_txq(struct adapter *, struct sge_txq *);
622 static void quiesce_wrq(struct adapter *, struct sge_wrq *);
623 static void quiesce_iq(struct adapter *, struct sge_iq *);
624 static void quiesce_fl(struct adapter *, struct sge_fl *);
625 static int t4_alloc_irq(struct adapter *, struct irq *, int rid,
626     driver_intr_t *, void *, char *);
627 static int t4_free_irq(struct adapter *, struct irq *);
628 static void get_regs(struct adapter *, struct t4_regdump *, uint8_t *);
629 static void vi_refresh_stats(struct adapter *, struct vi_info *);
630 static void cxgbe_refresh_stats(struct adapter *, struct port_info *);
631 static void cxgbe_tick(void *);
632 static void cxgbe_sysctls(struct port_info *);
633 static int sysctl_int_array(SYSCTL_HANDLER_ARGS);
634 static int sysctl_bitfield_8b(SYSCTL_HANDLER_ARGS);
635 static int sysctl_bitfield_16b(SYSCTL_HANDLER_ARGS);
636 static int sysctl_btphy(SYSCTL_HANDLER_ARGS);
637 static int sysctl_noflowq(SYSCTL_HANDLER_ARGS);
638 static int sysctl_holdoff_tmr_idx(SYSCTL_HANDLER_ARGS);
639 static int sysctl_holdoff_pktc_idx(SYSCTL_HANDLER_ARGS);
640 static int sysctl_qsize_rxq(SYSCTL_HANDLER_ARGS);
641 static int sysctl_qsize_txq(SYSCTL_HANDLER_ARGS);
642 static int sysctl_pause_settings(SYSCTL_HANDLER_ARGS);
643 static int sysctl_fec(SYSCTL_HANDLER_ARGS);
644 static int sysctl_autoneg(SYSCTL_HANDLER_ARGS);
645 static int sysctl_handle_t4_reg64(SYSCTL_HANDLER_ARGS);
646 static int sysctl_temperature(SYSCTL_HANDLER_ARGS);
647 static int sysctl_loadavg(SYSCTL_HANDLER_ARGS);
648 static int sysctl_cctrl(SYSCTL_HANDLER_ARGS);
649 static int sysctl_cim_ibq_obq(SYSCTL_HANDLER_ARGS);
650 static int sysctl_cim_la(SYSCTL_HANDLER_ARGS);
651 static int sysctl_cim_ma_la(SYSCTL_HANDLER_ARGS);
652 static int sysctl_cim_pif_la(SYSCTL_HANDLER_ARGS);
653 static int sysctl_cim_qcfg(SYSCTL_HANDLER_ARGS);
654 static int sysctl_cpl_stats(SYSCTL_HANDLER_ARGS);
655 static int sysctl_ddp_stats(SYSCTL_HANDLER_ARGS);
656 static int sysctl_devlog(SYSCTL_HANDLER_ARGS);
657 static int sysctl_fcoe_stats(SYSCTL_HANDLER_ARGS);
658 static int sysctl_hw_sched(SYSCTL_HANDLER_ARGS);
659 static int sysctl_lb_stats(SYSCTL_HANDLER_ARGS);
660 static int sysctl_linkdnrc(SYSCTL_HANDLER_ARGS);
661 static int sysctl_meminfo(SYSCTL_HANDLER_ARGS);
662 static int sysctl_mps_tcam(SYSCTL_HANDLER_ARGS);
663 static int sysctl_mps_tcam_t6(SYSCTL_HANDLER_ARGS);
664 static int sysctl_path_mtus(SYSCTL_HANDLER_ARGS);
665 static int sysctl_pm_stats(SYSCTL_HANDLER_ARGS);
666 static int sysctl_rdma_stats(SYSCTL_HANDLER_ARGS);
667 static int sysctl_tcp_stats(SYSCTL_HANDLER_ARGS);
668 static int sysctl_tids(SYSCTL_HANDLER_ARGS);
669 static int sysctl_tp_err_stats(SYSCTL_HANDLER_ARGS);
670 static int sysctl_tp_la_mask(SYSCTL_HANDLER_ARGS);
671 static int sysctl_tp_la(SYSCTL_HANDLER_ARGS);
672 static int sysctl_tx_rate(SYSCTL_HANDLER_ARGS);
673 static int sysctl_ulprx_la(SYSCTL_HANDLER_ARGS);
674 static int sysctl_wcwr_stats(SYSCTL_HANDLER_ARGS);
675 static int sysctl_cpus(SYSCTL_HANDLER_ARGS);
676 #ifdef TCP_OFFLOAD
677 static int sysctl_tls_rx_ports(SYSCTL_HANDLER_ARGS);
678 static int sysctl_tp_tick(SYSCTL_HANDLER_ARGS);
679 static int sysctl_tp_dack_timer(SYSCTL_HANDLER_ARGS);
680 static int sysctl_tp_timer(SYSCTL_HANDLER_ARGS);
681 static int sysctl_tp_shift_cnt(SYSCTL_HANDLER_ARGS);
682 static int sysctl_tp_backoff(SYSCTL_HANDLER_ARGS);
683 static int sysctl_holdoff_tmr_idx_ofld(SYSCTL_HANDLER_ARGS);
684 static int sysctl_holdoff_pktc_idx_ofld(SYSCTL_HANDLER_ARGS);
685 #endif
686 static int get_sge_context(struct adapter *, struct t4_sge_context *);
687 static int load_fw(struct adapter *, struct t4_data *);
688 static int load_cfg(struct adapter *, struct t4_data *);
689 static int load_boot(struct adapter *, struct t4_bootrom *);
690 static int load_bootcfg(struct adapter *, struct t4_data *);
691 static int cudbg_dump(struct adapter *, struct t4_cudbg_dump *);
692 static void free_offload_policy(struct t4_offload_policy *);
693 static int set_offload_policy(struct adapter *, struct t4_offload_policy *);
694 static int read_card_mem(struct adapter *, int, struct t4_mem_range *);
695 static int read_i2c(struct adapter *, struct t4_i2c_data *);
696 #ifdef TCP_OFFLOAD
697 static int toe_capability(struct vi_info *, int);
698 #endif
699 static int mod_event(module_t, int, void *);
700 static int notify_siblings(device_t, int);
701
702 struct {
703         uint16_t device;
704         char *desc;
705 } t4_pciids[] = {
706         {0xa000, "Chelsio Terminator 4 FPGA"},
707         {0x4400, "Chelsio T440-dbg"},
708         {0x4401, "Chelsio T420-CR"},
709         {0x4402, "Chelsio T422-CR"},
710         {0x4403, "Chelsio T440-CR"},
711         {0x4404, "Chelsio T420-BCH"},
712         {0x4405, "Chelsio T440-BCH"},
713         {0x4406, "Chelsio T440-CH"},
714         {0x4407, "Chelsio T420-SO"},
715         {0x4408, "Chelsio T420-CX"},
716         {0x4409, "Chelsio T420-BT"},
717         {0x440a, "Chelsio T404-BT"},
718         {0x440e, "Chelsio T440-LP-CR"},
719 }, t5_pciids[] = {
720         {0xb000, "Chelsio Terminator 5 FPGA"},
721         {0x5400, "Chelsio T580-dbg"},
722         {0x5401,  "Chelsio T520-CR"},           /* 2 x 10G */
723         {0x5402,  "Chelsio T522-CR"},           /* 2 x 10G, 2 X 1G */
724         {0x5403,  "Chelsio T540-CR"},           /* 4 x 10G */
725         {0x5407,  "Chelsio T520-SO"},           /* 2 x 10G, nomem */
726         {0x5409,  "Chelsio T520-BT"},           /* 2 x 10GBaseT */
727         {0x540a,  "Chelsio T504-BT"},           /* 4 x 1G */
728         {0x540d,  "Chelsio T580-CR"},           /* 2 x 40G */
729         {0x540e,  "Chelsio T540-LP-CR"},        /* 4 x 10G */
730         {0x5410,  "Chelsio T580-LP-CR"},        /* 2 x 40G */
731         {0x5411,  "Chelsio T520-LL-CR"},        /* 2 x 10G */
732         {0x5412,  "Chelsio T560-CR"},           /* 1 x 40G, 2 x 10G */
733         {0x5414,  "Chelsio T580-LP-SO-CR"},     /* 2 x 40G, nomem */
734         {0x5415,  "Chelsio T502-BT"},           /* 2 x 1G */
735         {0x5418,  "Chelsio T540-BT"},           /* 4 x 10GBaseT */
736         {0x5419,  "Chelsio T540-LP-BT"},        /* 4 x 10GBaseT */
737         {0x541a,  "Chelsio T540-SO-BT"},        /* 4 x 10GBaseT, nomem */
738         {0x541b,  "Chelsio T540-SO-CR"},        /* 4 x 10G, nomem */
739
740         /* Custom */
741         {0x5483, "Custom T540-CR"},
742         {0x5484, "Custom T540-BT"},
743 }, t6_pciids[] = {
744         {0xc006, "Chelsio Terminator 6 FPGA"},  /* T6 PE10K6 FPGA (PF0) */
745         {0x6400, "Chelsio T6-DBG-25"},          /* 2 x 10/25G, debug */
746         {0x6401, "Chelsio T6225-CR"},           /* 2 x 10/25G */
747         {0x6402, "Chelsio T6225-SO-CR"},        /* 2 x 10/25G, nomem */
748         {0x6403, "Chelsio T6425-CR"},           /* 4 x 10/25G */
749         {0x6404, "Chelsio T6425-SO-CR"},        /* 4 x 10/25G, nomem */
750         {0x6405, "Chelsio T6225-OCP-SO"},       /* 2 x 10/25G, nomem */
751         {0x6406, "Chelsio T62100-OCP-SO"},      /* 2 x 40/50/100G, nomem */
752         {0x6407, "Chelsio T62100-LP-CR"},       /* 2 x 40/50/100G */
753         {0x6408, "Chelsio T62100-SO-CR"},       /* 2 x 40/50/100G, nomem */
754         {0x6409, "Chelsio T6210-BT"},           /* 2 x 10GBASE-T */
755         {0x640d, "Chelsio T62100-CR"},          /* 2 x 40/50/100G */
756         {0x6410, "Chelsio T6-DBG-100"},         /* 2 x 40/50/100G, debug */
757         {0x6411, "Chelsio T6225-LL-CR"},        /* 2 x 10/25G */
758         {0x6414, "Chelsio T61100-OCP-SO"},      /* 1 x 40/50/100G, nomem */
759         {0x6415, "Chelsio T6201-BT"},           /* 2 x 1000BASE-T */
760
761         /* Custom */
762         {0x6480, "Custom T6225-CR"},
763         {0x6481, "Custom T62100-CR"},
764         {0x6482, "Custom T6225-CR"},
765         {0x6483, "Custom T62100-CR"},
766         {0x6484, "Custom T64100-CR"},
767         {0x6485, "Custom T6240-SO"},
768         {0x6486, "Custom T6225-SO-CR"},
769         {0x6487, "Custom T6225-CR"},
770 };
771
772 #ifdef TCP_OFFLOAD
773 /*
774  * service_iq_fl() has an iq and needs the fl.  Offset of fl from the iq should
775  * be exactly the same for both rxq and ofld_rxq.
776  */
777 CTASSERT(offsetof(struct sge_ofld_rxq, iq) == offsetof(struct sge_rxq, iq));
778 CTASSERT(offsetof(struct sge_ofld_rxq, fl) == offsetof(struct sge_rxq, fl));
779 #endif
780 CTASSERT(sizeof(struct cluster_metadata) <= CL_METADATA_SIZE);
781
782 static int
783 t4_probe(device_t dev)
784 {
785         int i;
786         uint16_t v = pci_get_vendor(dev);
787         uint16_t d = pci_get_device(dev);
788         uint8_t f = pci_get_function(dev);
789
790         if (v != PCI_VENDOR_ID_CHELSIO)
791                 return (ENXIO);
792
793         /* Attach only to PF0 of the FPGA */
794         if (d == 0xa000 && f != 0)
795                 return (ENXIO);
796
797         for (i = 0; i < nitems(t4_pciids); i++) {
798                 if (d == t4_pciids[i].device) {
799                         device_set_desc(dev, t4_pciids[i].desc);
800                         return (BUS_PROBE_DEFAULT);
801                 }
802         }
803
804         return (ENXIO);
805 }
806
807 static int
808 t5_probe(device_t dev)
809 {
810         int i;
811         uint16_t v = pci_get_vendor(dev);
812         uint16_t d = pci_get_device(dev);
813         uint8_t f = pci_get_function(dev);
814
815         if (v != PCI_VENDOR_ID_CHELSIO)
816                 return (ENXIO);
817
818         /* Attach only to PF0 of the FPGA */
819         if (d == 0xb000 && f != 0)
820                 return (ENXIO);
821
822         for (i = 0; i < nitems(t5_pciids); i++) {
823                 if (d == t5_pciids[i].device) {
824                         device_set_desc(dev, t5_pciids[i].desc);
825                         return (BUS_PROBE_DEFAULT);
826                 }
827         }
828
829         return (ENXIO);
830 }
831
832 static int
833 t6_probe(device_t dev)
834 {
835         int i;
836         uint16_t v = pci_get_vendor(dev);
837         uint16_t d = pci_get_device(dev);
838
839         if (v != PCI_VENDOR_ID_CHELSIO)
840                 return (ENXIO);
841
842         for (i = 0; i < nitems(t6_pciids); i++) {
843                 if (d == t6_pciids[i].device) {
844                         device_set_desc(dev, t6_pciids[i].desc);
845                         return (BUS_PROBE_DEFAULT);
846                 }
847         }
848
849         return (ENXIO);
850 }
851
852 static void
853 t5_attribute_workaround(device_t dev)
854 {
855         device_t root_port;
856         uint32_t v;
857
858         /*
859          * The T5 chips do not properly echo the No Snoop and Relaxed
860          * Ordering attributes when replying to a TLP from a Root
861          * Port.  As a workaround, find the parent Root Port and
862          * disable No Snoop and Relaxed Ordering.  Note that this
863          * affects all devices under this root port.
864          */
865         root_port = pci_find_pcie_root_port(dev);
866         if (root_port == NULL) {
867                 device_printf(dev, "Unable to find parent root port\n");
868                 return;
869         }
870
871         v = pcie_adjust_config(root_port, PCIER_DEVICE_CTL,
872             PCIEM_CTL_RELAXED_ORD_ENABLE | PCIEM_CTL_NOSNOOP_ENABLE, 0, 2);
873         if ((v & (PCIEM_CTL_RELAXED_ORD_ENABLE | PCIEM_CTL_NOSNOOP_ENABLE)) !=
874             0)
875                 device_printf(dev, "Disabled No Snoop/Relaxed Ordering on %s\n",
876                     device_get_nameunit(root_port));
877 }
878
879 static const struct devnames devnames[] = {
880         {
881                 .nexus_name = "t4nex",
882                 .ifnet_name = "cxgbe",
883                 .vi_ifnet_name = "vcxgbe",
884                 .pf03_drv_name = "t4iov",
885                 .vf_nexus_name = "t4vf",
886                 .vf_ifnet_name = "cxgbev"
887         }, {
888                 .nexus_name = "t5nex",
889                 .ifnet_name = "cxl",
890                 .vi_ifnet_name = "vcxl",
891                 .pf03_drv_name = "t5iov",
892                 .vf_nexus_name = "t5vf",
893                 .vf_ifnet_name = "cxlv"
894         }, {
895                 .nexus_name = "t6nex",
896                 .ifnet_name = "cc",
897                 .vi_ifnet_name = "vcc",
898                 .pf03_drv_name = "t6iov",
899                 .vf_nexus_name = "t6vf",
900                 .vf_ifnet_name = "ccv"
901         }
902 };
903
904 void
905 t4_init_devnames(struct adapter *sc)
906 {
907         int id;
908
909         id = chip_id(sc);
910         if (id >= CHELSIO_T4 && id - CHELSIO_T4 < nitems(devnames))
911                 sc->names = &devnames[id - CHELSIO_T4];
912         else {
913                 device_printf(sc->dev, "chip id %d is not supported.\n", id);
914                 sc->names = NULL;
915         }
916 }
917
918 static int
919 t4_ifnet_unit(struct adapter *sc, struct port_info *pi)
920 {
921         const char *parent, *name;
922         long value;
923         int line, unit;
924
925         line = 0;
926         parent = device_get_nameunit(sc->dev);
927         name = sc->names->ifnet_name;
928         while (resource_find_dev(&line, name, &unit, "at", parent) == 0) {
929                 if (resource_long_value(name, unit, "port", &value) == 0 &&
930                     value == pi->port_id)
931                         return (unit);
932         }
933         return (-1);
934 }
935
936 static int
937 t4_attach(device_t dev)
938 {
939         struct adapter *sc;
940         int rc = 0, i, j, rqidx, tqidx, nports;
941         struct make_dev_args mda;
942         struct intrs_and_queues iaq;
943         struct sge *s;
944         uint32_t *buf;
945 #if defined(TCP_OFFLOAD) || defined(RATELIMIT)
946         int ofld_tqidx;
947 #endif
948 #ifdef TCP_OFFLOAD
949         int ofld_rqidx;
950 #endif
951 #ifdef DEV_NETMAP
952         int nm_rqidx, nm_tqidx;
953 #endif
954         int num_vis;
955
956         sc = device_get_softc(dev);
957         sc->dev = dev;
958         TUNABLE_INT_FETCH("hw.cxgbe.dflags", &sc->debug_flags);
959
960         if ((pci_get_device(dev) & 0xff00) == 0x5400)
961                 t5_attribute_workaround(dev);
962         pci_enable_busmaster(dev);
963         if (pci_find_cap(dev, PCIY_EXPRESS, &i) == 0) {
964                 uint32_t v;
965
966                 pci_set_max_read_req(dev, 4096);
967                 v = pci_read_config(dev, i + PCIER_DEVICE_CTL, 2);
968                 sc->params.pci.mps = 128 << ((v & PCIEM_CTL_MAX_PAYLOAD) >> 5);
969                 if (pcie_relaxed_ordering == 0 &&
970                     (v & PCIEM_CTL_RELAXED_ORD_ENABLE) != 0) {
971                         v &= ~PCIEM_CTL_RELAXED_ORD_ENABLE;
972                         pci_write_config(dev, i + PCIER_DEVICE_CTL, v, 2);
973                 } else if (pcie_relaxed_ordering == 1 &&
974                     (v & PCIEM_CTL_RELAXED_ORD_ENABLE) == 0) {
975                         v |= PCIEM_CTL_RELAXED_ORD_ENABLE;
976                         pci_write_config(dev, i + PCIER_DEVICE_CTL, v, 2);
977                 }
978         }
979
980         sc->sge_gts_reg = MYPF_REG(A_SGE_PF_GTS);
981         sc->sge_kdoorbell_reg = MYPF_REG(A_SGE_PF_KDOORBELL);
982         sc->traceq = -1;
983         mtx_init(&sc->ifp_lock, sc->ifp_lockname, 0, MTX_DEF);
984         snprintf(sc->ifp_lockname, sizeof(sc->ifp_lockname), "%s tracer",
985             device_get_nameunit(dev));
986
987         snprintf(sc->lockname, sizeof(sc->lockname), "%s",
988             device_get_nameunit(dev));
989         mtx_init(&sc->sc_lock, sc->lockname, 0, MTX_DEF);
990         t4_add_adapter(sc);
991
992         mtx_init(&sc->sfl_lock, "starving freelists", 0, MTX_DEF);
993         TAILQ_INIT(&sc->sfl);
994         callout_init_mtx(&sc->sfl_callout, &sc->sfl_lock, 0);
995
996         mtx_init(&sc->reg_lock, "indirect register access", 0, MTX_DEF);
997
998         sc->policy = NULL;
999         rw_init(&sc->policy_lock, "connection offload policy");
1000
1001         rc = t4_map_bars_0_and_4(sc);
1002         if (rc != 0)
1003                 goto done; /* error message displayed already */
1004
1005         memset(sc->chan_map, 0xff, sizeof(sc->chan_map));
1006
1007         /* Prepare the adapter for operation. */
1008         buf = malloc(PAGE_SIZE, M_CXGBE, M_ZERO | M_WAITOK);
1009         rc = -t4_prep_adapter(sc, buf);
1010         free(buf, M_CXGBE);
1011         if (rc != 0) {
1012                 device_printf(dev, "failed to prepare adapter: %d.\n", rc);
1013                 goto done;
1014         }
1015
1016         /*
1017          * This is the real PF# to which we're attaching.  Works from within PCI
1018          * passthrough environments too, where pci_get_function() could return a
1019          * different PF# depending on the passthrough configuration.  We need to
1020          * use the real PF# in all our communication with the firmware.
1021          */
1022         j = t4_read_reg(sc, A_PL_WHOAMI);
1023         sc->pf = chip_id(sc) <= CHELSIO_T5 ? G_SOURCEPF(j) : G_T6_SOURCEPF(j);
1024         sc->mbox = sc->pf;
1025
1026         t4_init_devnames(sc);
1027         if (sc->names == NULL) {
1028                 rc = ENOTSUP;
1029                 goto done; /* error message displayed already */
1030         }
1031
1032         /*
1033          * Do this really early, with the memory windows set up even before the
1034          * character device.  The userland tool's register i/o and mem read
1035          * will work even in "recovery mode".
1036          */
1037         setup_memwin(sc);
1038         if (t4_init_devlog_params(sc, 0) == 0)
1039                 fixup_devlog_params(sc);
1040         make_dev_args_init(&mda);
1041         mda.mda_devsw = &t4_cdevsw;
1042         mda.mda_uid = UID_ROOT;
1043         mda.mda_gid = GID_WHEEL;
1044         mda.mda_mode = 0600;
1045         mda.mda_si_drv1 = sc;
1046         rc = make_dev_s(&mda, &sc->cdev, "%s", device_get_nameunit(dev));
1047         if (rc != 0)
1048                 device_printf(dev, "failed to create nexus char device: %d.\n",
1049                     rc);
1050
1051         /* Go no further if recovery mode has been requested. */
1052         if (TUNABLE_INT_FETCH("hw.cxgbe.sos", &i) && i != 0) {
1053                 device_printf(dev, "recovery mode.\n");
1054                 goto done;
1055         }
1056
1057 #if defined(__i386__)
1058         if ((cpu_feature & CPUID_CX8) == 0) {
1059                 device_printf(dev, "64 bit atomics not available.\n");
1060                 rc = ENOTSUP;
1061                 goto done;
1062         }
1063 #endif
1064
1065         /* Contact the firmware and try to become the master driver. */
1066         rc = contact_firmware(sc);
1067         if (rc != 0)
1068                 goto done; /* error message displayed already */
1069         MPASS(sc->flags & FW_OK);
1070
1071         rc = get_params__pre_init(sc);
1072         if (rc != 0)
1073                 goto done; /* error message displayed already */
1074
1075         if (sc->flags & MASTER_PF) {
1076                 rc = partition_resources(sc);
1077                 if (rc != 0)
1078                         goto done; /* error message displayed already */
1079                 t4_intr_clear(sc);
1080         }
1081
1082         rc = get_params__post_init(sc);
1083         if (rc != 0)
1084                 goto done; /* error message displayed already */
1085
1086         rc = set_params__post_init(sc);
1087         if (rc != 0)
1088                 goto done; /* error message displayed already */
1089
1090         rc = t4_map_bar_2(sc);
1091         if (rc != 0)
1092                 goto done; /* error message displayed already */
1093
1094         rc = t4_create_dma_tag(sc);
1095         if (rc != 0)
1096                 goto done; /* error message displayed already */
1097
1098         /*
1099          * First pass over all the ports - allocate VIs and initialize some
1100          * basic parameters like mac address, port type, etc.
1101          */
1102         for_each_port(sc, i) {
1103                 struct port_info *pi;
1104
1105                 pi = malloc(sizeof(*pi), M_CXGBE, M_ZERO | M_WAITOK);
1106                 sc->port[i] = pi;
1107
1108                 /* These must be set before t4_port_init */
1109                 pi->adapter = sc;
1110                 pi->port_id = i;
1111                 /*
1112                  * XXX: vi[0] is special so we can't delay this allocation until
1113                  * pi->nvi's final value is known.
1114                  */
1115                 pi->vi = malloc(sizeof(struct vi_info) * t4_num_vis, M_CXGBE,
1116                     M_ZERO | M_WAITOK);
1117
1118                 /*
1119                  * Allocate the "main" VI and initialize parameters
1120                  * like mac addr.
1121                  */
1122                 rc = -t4_port_init(sc, sc->mbox, sc->pf, 0, i);
1123                 if (rc != 0) {
1124                         device_printf(dev, "unable to initialize port %d: %d\n",
1125                             i, rc);
1126                         free(pi->vi, M_CXGBE);
1127                         free(pi, M_CXGBE);
1128                         sc->port[i] = NULL;
1129                         goto done;
1130                 }
1131
1132                 snprintf(pi->lockname, sizeof(pi->lockname), "%sp%d",
1133                     device_get_nameunit(dev), i);
1134                 mtx_init(&pi->pi_lock, pi->lockname, 0, MTX_DEF);
1135                 sc->chan_map[pi->tx_chan] = i;
1136
1137                 /* All VIs on this port share this media. */
1138                 ifmedia_init(&pi->media, IFM_IMASK, cxgbe_media_change,
1139                     cxgbe_media_status);
1140
1141                 PORT_LOCK(pi);
1142                 init_link_config(pi);
1143                 fixup_link_config(pi);
1144                 build_medialist(pi);
1145                 if (fixed_ifmedia(pi))
1146                         pi->flags |= FIXED_IFMEDIA;
1147                 PORT_UNLOCK(pi);
1148
1149                 pi->dev = device_add_child(dev, sc->names->ifnet_name,
1150                     t4_ifnet_unit(sc, pi));
1151                 if (pi->dev == NULL) {
1152                         device_printf(dev,
1153                             "failed to add device for port %d.\n", i);
1154                         rc = ENXIO;
1155                         goto done;
1156                 }
1157                 pi->vi[0].dev = pi->dev;
1158                 device_set_softc(pi->dev, pi);
1159         }
1160
1161         /*
1162          * Interrupt type, # of interrupts, # of rx/tx queues, etc.
1163          */
1164         nports = sc->params.nports;
1165         rc = cfg_itype_and_nqueues(sc, &iaq);
1166         if (rc != 0)
1167                 goto done; /* error message displayed already */
1168
1169         num_vis = iaq.num_vis;
1170         sc->intr_type = iaq.intr_type;
1171         sc->intr_count = iaq.nirq;
1172
1173         s = &sc->sge;
1174         s->nrxq = nports * iaq.nrxq;
1175         s->ntxq = nports * iaq.ntxq;
1176         if (num_vis > 1) {
1177                 s->nrxq += nports * (num_vis - 1) * iaq.nrxq_vi;
1178                 s->ntxq += nports * (num_vis - 1) * iaq.ntxq_vi;
1179         }
1180         s->neq = s->ntxq + s->nrxq;     /* the free list in an rxq is an eq */
1181         s->neq += nports;               /* ctrl queues: 1 per port */
1182         s->niq = s->nrxq + 1;           /* 1 extra for firmware event queue */
1183 #if defined(TCP_OFFLOAD) || defined(RATELIMIT)
1184         if (is_offload(sc) || is_ethoffload(sc)) {
1185                 s->nofldtxq = nports * iaq.nofldtxq;
1186                 if (num_vis > 1)
1187                         s->nofldtxq += nports * (num_vis - 1) * iaq.nofldtxq_vi;
1188                 s->neq += s->nofldtxq;
1189
1190                 s->ofld_txq = malloc(s->nofldtxq * sizeof(struct sge_wrq),
1191                     M_CXGBE, M_ZERO | M_WAITOK);
1192         }
1193 #endif
1194 #ifdef TCP_OFFLOAD
1195         if (is_offload(sc)) {
1196                 s->nofldrxq = nports * iaq.nofldrxq;
1197                 if (num_vis > 1)
1198                         s->nofldrxq += nports * (num_vis - 1) * iaq.nofldrxq_vi;
1199                 s->neq += s->nofldrxq;  /* free list */
1200                 s->niq += s->nofldrxq;
1201
1202                 s->ofld_rxq = malloc(s->nofldrxq * sizeof(struct sge_ofld_rxq),
1203                     M_CXGBE, M_ZERO | M_WAITOK);
1204         }
1205 #endif
1206 #ifdef DEV_NETMAP
1207         if (num_vis > 1) {
1208                 s->nnmrxq = nports * (num_vis - 1) * iaq.nnmrxq_vi;
1209                 s->nnmtxq = nports * (num_vis - 1) * iaq.nnmtxq_vi;
1210         }
1211         s->neq += s->nnmtxq + s->nnmrxq;
1212         s->niq += s->nnmrxq;
1213
1214         s->nm_rxq = malloc(s->nnmrxq * sizeof(struct sge_nm_rxq),
1215             M_CXGBE, M_ZERO | M_WAITOK);
1216         s->nm_txq = malloc(s->nnmtxq * sizeof(struct sge_nm_txq),
1217             M_CXGBE, M_ZERO | M_WAITOK);
1218 #endif
1219
1220         s->ctrlq = malloc(nports * sizeof(struct sge_wrq), M_CXGBE,
1221             M_ZERO | M_WAITOK);
1222         s->rxq = malloc(s->nrxq * sizeof(struct sge_rxq), M_CXGBE,
1223             M_ZERO | M_WAITOK);
1224         s->txq = malloc(s->ntxq * sizeof(struct sge_txq), M_CXGBE,
1225             M_ZERO | M_WAITOK);
1226         s->iqmap = malloc(s->niq * sizeof(struct sge_iq *), M_CXGBE,
1227             M_ZERO | M_WAITOK);
1228         s->eqmap = malloc(s->neq * sizeof(struct sge_eq *), M_CXGBE,
1229             M_ZERO | M_WAITOK);
1230
1231         sc->irq = malloc(sc->intr_count * sizeof(struct irq), M_CXGBE,
1232             M_ZERO | M_WAITOK);
1233
1234         t4_init_l2t(sc, M_WAITOK);
1235         t4_init_smt(sc, M_WAITOK);
1236         t4_init_tx_sched(sc);
1237 #ifdef RATELIMIT
1238         t4_init_etid_table(sc);
1239 #endif
1240 #ifdef INET6
1241         t4_init_clip_table(sc);
1242 #endif
1243         if (sc->vres.key.size != 0)
1244                 sc->key_map = vmem_create("T4TLS key map", sc->vres.key.start,
1245                     sc->vres.key.size, 32, 0, M_FIRSTFIT | M_WAITOK);
1246
1247         /*
1248          * Second pass over the ports.  This time we know the number of rx and
1249          * tx queues that each port should get.
1250          */
1251         rqidx = tqidx = 0;
1252 #if defined(TCP_OFFLOAD) || defined(RATELIMIT)
1253         ofld_tqidx = 0;
1254 #endif
1255 #ifdef TCP_OFFLOAD
1256         ofld_rqidx = 0;
1257 #endif
1258 #ifdef DEV_NETMAP
1259         nm_rqidx = nm_tqidx = 0;
1260 #endif
1261         for_each_port(sc, i) {
1262                 struct port_info *pi = sc->port[i];
1263                 struct vi_info *vi;
1264
1265                 if (pi == NULL)
1266                         continue;
1267
1268                 pi->nvi = num_vis;
1269                 for_each_vi(pi, j, vi) {
1270                         vi->pi = pi;
1271                         vi->qsize_rxq = t4_qsize_rxq;
1272                         vi->qsize_txq = t4_qsize_txq;
1273
1274                         vi->first_rxq = rqidx;
1275                         vi->first_txq = tqidx;
1276                         vi->tmr_idx = t4_tmr_idx;
1277                         vi->pktc_idx = t4_pktc_idx;
1278                         vi->nrxq = j == 0 ? iaq.nrxq : iaq.nrxq_vi;
1279                         vi->ntxq = j == 0 ? iaq.ntxq : iaq.ntxq_vi;
1280
1281                         rqidx += vi->nrxq;
1282                         tqidx += vi->ntxq;
1283
1284                         if (j == 0 && vi->ntxq > 1)
1285                                 vi->rsrv_noflowq = t4_rsrv_noflowq ? 1 : 0;
1286                         else
1287                                 vi->rsrv_noflowq = 0;
1288
1289 #if defined(TCP_OFFLOAD) || defined(RATELIMIT)
1290                         vi->first_ofld_txq = ofld_tqidx;
1291                         vi->nofldtxq = j == 0 ? iaq.nofldtxq : iaq.nofldtxq_vi;
1292                         ofld_tqidx += vi->nofldtxq;
1293 #endif
1294 #ifdef TCP_OFFLOAD
1295                         vi->ofld_tmr_idx = t4_tmr_idx_ofld;
1296                         vi->ofld_pktc_idx = t4_pktc_idx_ofld;
1297                         vi->first_ofld_rxq = ofld_rqidx;
1298                         vi->nofldrxq = j == 0 ? iaq.nofldrxq : iaq.nofldrxq_vi;
1299
1300                         ofld_rqidx += vi->nofldrxq;
1301 #endif
1302 #ifdef DEV_NETMAP
1303                         if (j > 0) {
1304                                 vi->first_nm_rxq = nm_rqidx;
1305                                 vi->first_nm_txq = nm_tqidx;
1306                                 vi->nnmrxq = iaq.nnmrxq_vi;
1307                                 vi->nnmtxq = iaq.nnmtxq_vi;
1308                                 nm_rqidx += vi->nnmrxq;
1309                                 nm_tqidx += vi->nnmtxq;
1310                         }
1311 #endif
1312                 }
1313         }
1314
1315         rc = t4_setup_intr_handlers(sc);
1316         if (rc != 0) {
1317                 device_printf(dev,
1318                     "failed to setup interrupt handlers: %d\n", rc);
1319                 goto done;
1320         }
1321
1322         rc = bus_generic_probe(dev);
1323         if (rc != 0) {
1324                 device_printf(dev, "failed to probe child drivers: %d\n", rc);
1325                 goto done;
1326         }
1327
1328         /*
1329          * Ensure thread-safe mailbox access (in debug builds).
1330          *
1331          * So far this was the only thread accessing the mailbox but various
1332          * ifnets and sysctls are about to be created and their handlers/ioctls
1333          * will access the mailbox from different threads.
1334          */
1335         sc->flags |= CHK_MBOX_ACCESS;
1336
1337         rc = bus_generic_attach(dev);
1338         if (rc != 0) {
1339                 device_printf(dev,
1340                     "failed to attach all child ports: %d\n", rc);
1341                 goto done;
1342         }
1343
1344         device_printf(dev,
1345             "PCIe gen%d x%d, %d ports, %d %s interrupt%s, %d eq, %d iq\n",
1346             sc->params.pci.speed, sc->params.pci.width, sc->params.nports,
1347             sc->intr_count, sc->intr_type == INTR_MSIX ? "MSI-X" :
1348             (sc->intr_type == INTR_MSI ? "MSI" : "INTx"),
1349             sc->intr_count > 1 ? "s" : "", sc->sge.neq, sc->sge.niq);
1350
1351         t4_set_desc(sc);
1352
1353         notify_siblings(dev, 0);
1354
1355 done:
1356         if (rc != 0 && sc->cdev) {
1357                 /* cdev was created and so cxgbetool works; recover that way. */
1358                 device_printf(dev,
1359                     "error during attach, adapter is now in recovery mode.\n");
1360                 rc = 0;
1361         }
1362
1363         if (rc != 0)
1364                 t4_detach_common(dev);
1365         else
1366                 t4_sysctls(sc);
1367
1368         return (rc);
1369 }
1370
1371 static int
1372 t4_child_location_str(device_t bus, device_t dev, char *buf, size_t buflen)
1373 {
1374         struct port_info *pi;
1375
1376         pi = device_get_softc(dev);
1377         snprintf(buf, buflen, "port=%d", pi->port_id);
1378         return (0);
1379 }
1380
1381 static int
1382 t4_ready(device_t dev)
1383 {
1384         struct adapter *sc;
1385
1386         sc = device_get_softc(dev);
1387         if (sc->flags & FW_OK)
1388                 return (0);
1389         return (ENXIO);
1390 }
1391
1392 static int
1393 t4_read_port_device(device_t dev, int port, device_t *child)
1394 {
1395         struct adapter *sc;
1396         struct port_info *pi;
1397
1398         sc = device_get_softc(dev);
1399         if (port < 0 || port >= MAX_NPORTS)
1400                 return (EINVAL);
1401         pi = sc->port[port];
1402         if (pi == NULL || pi->dev == NULL)
1403                 return (ENXIO);
1404         *child = pi->dev;
1405         return (0);
1406 }
1407
1408 static int
1409 notify_siblings(device_t dev, int detaching)
1410 {
1411         device_t sibling;
1412         int error, i;
1413
1414         error = 0;
1415         for (i = 0; i < PCI_FUNCMAX; i++) {
1416                 if (i == pci_get_function(dev))
1417                         continue;
1418                 sibling = pci_find_dbsf(pci_get_domain(dev), pci_get_bus(dev),
1419                     pci_get_slot(dev), i);
1420                 if (sibling == NULL || !device_is_attached(sibling))
1421                         continue;
1422                 if (detaching)
1423                         error = T4_DETACH_CHILD(sibling);
1424                 else
1425                         (void)T4_ATTACH_CHILD(sibling);
1426                 if (error)
1427                         break;
1428         }
1429         return (error);
1430 }
1431
1432 /*
1433  * Idempotent
1434  */
1435 static int
1436 t4_detach(device_t dev)
1437 {
1438         struct adapter *sc;
1439         int rc;
1440
1441         sc = device_get_softc(dev);
1442
1443         rc = notify_siblings(dev, 1);
1444         if (rc) {
1445                 device_printf(dev,
1446                     "failed to detach sibling devices: %d\n", rc);
1447                 return (rc);
1448         }
1449
1450         return (t4_detach_common(dev));
1451 }
1452
1453 int
1454 t4_detach_common(device_t dev)
1455 {
1456         struct adapter *sc;
1457         struct port_info *pi;
1458         int i, rc;
1459
1460         sc = device_get_softc(dev);
1461
1462         if (sc->cdev) {
1463                 destroy_dev(sc->cdev);
1464                 sc->cdev = NULL;
1465         }
1466
1467         sc->flags &= ~CHK_MBOX_ACCESS;
1468         if (sc->flags & FULL_INIT_DONE) {
1469                 if (!(sc->flags & IS_VF))
1470                         t4_intr_disable(sc);
1471         }
1472
1473         if (device_is_attached(dev)) {
1474                 rc = bus_generic_detach(dev);
1475                 if (rc) {
1476                         device_printf(dev,
1477                             "failed to detach child devices: %d\n", rc);
1478                         return (rc);
1479                 }
1480         }
1481
1482         for (i = 0; i < sc->intr_count; i++)
1483                 t4_free_irq(sc, &sc->irq[i]);
1484
1485         if ((sc->flags & (IS_VF | FW_OK)) == FW_OK)
1486                 t4_free_tx_sched(sc);
1487
1488         for (i = 0; i < MAX_NPORTS; i++) {
1489                 pi = sc->port[i];
1490                 if (pi) {
1491                         t4_free_vi(sc, sc->mbox, sc->pf, 0, pi->vi[0].viid);
1492                         if (pi->dev)
1493                                 device_delete_child(dev, pi->dev);
1494
1495                         mtx_destroy(&pi->pi_lock);
1496                         free(pi->vi, M_CXGBE);
1497                         free(pi, M_CXGBE);
1498                 }
1499         }
1500
1501         device_delete_children(dev);
1502
1503         if (sc->flags & FULL_INIT_DONE)
1504                 adapter_full_uninit(sc);
1505
1506         if ((sc->flags & (IS_VF | FW_OK)) == FW_OK)
1507                 t4_fw_bye(sc, sc->mbox);
1508
1509         if (sc->intr_type == INTR_MSI || sc->intr_type == INTR_MSIX)
1510                 pci_release_msi(dev);
1511
1512         if (sc->regs_res)
1513                 bus_release_resource(dev, SYS_RES_MEMORY, sc->regs_rid,
1514                     sc->regs_res);
1515
1516         if (sc->udbs_res)
1517                 bus_release_resource(dev, SYS_RES_MEMORY, sc->udbs_rid,
1518                     sc->udbs_res);
1519
1520         if (sc->msix_res)
1521                 bus_release_resource(dev, SYS_RES_MEMORY, sc->msix_rid,
1522                     sc->msix_res);
1523
1524         if (sc->l2t)
1525                 t4_free_l2t(sc->l2t);
1526         if (sc->smt)
1527                 t4_free_smt(sc->smt);
1528 #ifdef RATELIMIT
1529         t4_free_etid_table(sc);
1530 #endif
1531         if (sc->key_map)
1532                 vmem_destroy(sc->key_map);
1533 #ifdef INET6
1534         t4_destroy_clip_table(sc);
1535 #endif
1536
1537 #if defined(TCP_OFFLOAD) || defined(RATELIMIT)
1538         free(sc->sge.ofld_txq, M_CXGBE);
1539 #endif
1540 #ifdef TCP_OFFLOAD
1541         free(sc->sge.ofld_rxq, M_CXGBE);
1542 #endif
1543 #ifdef DEV_NETMAP
1544         free(sc->sge.nm_rxq, M_CXGBE);
1545         free(sc->sge.nm_txq, M_CXGBE);
1546 #endif
1547         free(sc->irq, M_CXGBE);
1548         free(sc->sge.rxq, M_CXGBE);
1549         free(sc->sge.txq, M_CXGBE);
1550         free(sc->sge.ctrlq, M_CXGBE);
1551         free(sc->sge.iqmap, M_CXGBE);
1552         free(sc->sge.eqmap, M_CXGBE);
1553         free(sc->tids.ftid_tab, M_CXGBE);
1554         free(sc->tids.hpftid_tab, M_CXGBE);
1555         free_hftid_hash(&sc->tids);
1556         free(sc->tids.atid_tab, M_CXGBE);
1557         free(sc->tids.tid_tab, M_CXGBE);
1558         free(sc->tt.tls_rx_ports, M_CXGBE);
1559         t4_destroy_dma_tag(sc);
1560         if (mtx_initialized(&sc->sc_lock)) {
1561                 sx_xlock(&t4_list_lock);
1562                 SLIST_REMOVE(&t4_list, sc, adapter, link);
1563                 sx_xunlock(&t4_list_lock);
1564                 mtx_destroy(&sc->sc_lock);
1565         }
1566
1567         callout_drain(&sc->sfl_callout);
1568         if (mtx_initialized(&sc->tids.ftid_lock)) {
1569                 mtx_destroy(&sc->tids.ftid_lock);
1570                 cv_destroy(&sc->tids.ftid_cv);
1571         }
1572         if (mtx_initialized(&sc->tids.atid_lock))
1573                 mtx_destroy(&sc->tids.atid_lock);
1574         if (mtx_initialized(&sc->sfl_lock))
1575                 mtx_destroy(&sc->sfl_lock);
1576         if (mtx_initialized(&sc->ifp_lock))
1577                 mtx_destroy(&sc->ifp_lock);
1578         if (mtx_initialized(&sc->reg_lock))
1579                 mtx_destroy(&sc->reg_lock);
1580
1581         if (rw_initialized(&sc->policy_lock)) {
1582                 rw_destroy(&sc->policy_lock);
1583 #ifdef TCP_OFFLOAD
1584                 if (sc->policy != NULL)
1585                         free_offload_policy(sc->policy);
1586 #endif
1587         }
1588
1589         for (i = 0; i < NUM_MEMWIN; i++) {
1590                 struct memwin *mw = &sc->memwin[i];
1591
1592                 if (rw_initialized(&mw->mw_lock))
1593                         rw_destroy(&mw->mw_lock);
1594         }
1595
1596         bzero(sc, sizeof(*sc));
1597
1598         return (0);
1599 }
1600
1601 static int
1602 cxgbe_probe(device_t dev)
1603 {
1604         char buf[128];
1605         struct port_info *pi = device_get_softc(dev);
1606
1607         snprintf(buf, sizeof(buf), "port %d", pi->port_id);
1608         device_set_desc_copy(dev, buf);
1609
1610         return (BUS_PROBE_DEFAULT);
1611 }
1612
1613 #define T4_CAP (IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_MTU | IFCAP_HWCSUM | \
1614     IFCAP_VLAN_HWCSUM | IFCAP_TSO | IFCAP_JUMBO_MTU | IFCAP_LRO | \
1615     IFCAP_VLAN_HWTSO | IFCAP_LINKSTATE | IFCAP_HWCSUM_IPV6 | IFCAP_HWSTATS | \
1616     IFCAP_HWRXTSTMP)
1617 #define T4_CAP_ENABLE (T4_CAP)
1618
1619 static int
1620 cxgbe_vi_attach(device_t dev, struct vi_info *vi)
1621 {
1622         struct ifnet *ifp;
1623         struct sbuf *sb;
1624
1625         vi->xact_addr_filt = -1;
1626         callout_init(&vi->tick, 1);
1627
1628         /* Allocate an ifnet and set it up */
1629         ifp = if_alloc(IFT_ETHER);
1630         if (ifp == NULL) {
1631                 device_printf(dev, "Cannot allocate ifnet\n");
1632                 return (ENOMEM);
1633         }
1634         vi->ifp = ifp;
1635         ifp->if_softc = vi;
1636
1637         if_initname(ifp, device_get_name(dev), device_get_unit(dev));
1638         ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
1639
1640         ifp->if_init = cxgbe_init;
1641         ifp->if_ioctl = cxgbe_ioctl;
1642         ifp->if_transmit = cxgbe_transmit;
1643         ifp->if_qflush = cxgbe_qflush;
1644         ifp->if_get_counter = cxgbe_get_counter;
1645 #ifdef RATELIMIT
1646         ifp->if_snd_tag_alloc = cxgbe_snd_tag_alloc;
1647         ifp->if_snd_tag_modify = cxgbe_snd_tag_modify;
1648         ifp->if_snd_tag_query = cxgbe_snd_tag_query;
1649         ifp->if_snd_tag_free = cxgbe_snd_tag_free;
1650 #endif
1651
1652         ifp->if_capabilities = T4_CAP;
1653         ifp->if_capenable = T4_CAP_ENABLE;
1654 #ifdef TCP_OFFLOAD
1655         if (vi->nofldrxq != 0)
1656                 ifp->if_capabilities |= IFCAP_TOE;
1657 #endif
1658 #ifdef RATELIMIT
1659         if (is_ethoffload(vi->pi->adapter) && vi->nofldtxq != 0) {
1660                 ifp->if_capabilities |= IFCAP_TXRTLMT;
1661                 ifp->if_capenable |= IFCAP_TXRTLMT;
1662         }
1663 #endif
1664         ifp->if_hwassist = CSUM_TCP | CSUM_UDP | CSUM_IP | CSUM_TSO |
1665             CSUM_UDP_IPV6 | CSUM_TCP_IPV6;
1666
1667         ifp->if_hw_tsomax = IP_MAXPACKET;
1668         ifp->if_hw_tsomaxsegcount = TX_SGL_SEGS_TSO;
1669 #ifdef RATELIMIT
1670         if (is_ethoffload(vi->pi->adapter) && vi->nofldtxq != 0)
1671                 ifp->if_hw_tsomaxsegcount = TX_SGL_SEGS_EO_TSO;
1672 #endif
1673         ifp->if_hw_tsomaxsegsize = 65536;
1674
1675         ether_ifattach(ifp, vi->hw_addr);
1676 #ifdef DEV_NETMAP
1677         if (vi->nnmrxq != 0)
1678                 cxgbe_nm_attach(vi);
1679 #endif
1680         sb = sbuf_new_auto();
1681         sbuf_printf(sb, "%d txq, %d rxq (NIC)", vi->ntxq, vi->nrxq);
1682 #if defined(TCP_OFFLOAD) || defined(RATELIMIT)
1683         switch (ifp->if_capabilities & (IFCAP_TOE | IFCAP_TXRTLMT)) {
1684         case IFCAP_TOE:
1685                 sbuf_printf(sb, "; %d txq (TOE)", vi->nofldtxq);
1686                 break;
1687         case IFCAP_TOE | IFCAP_TXRTLMT:
1688                 sbuf_printf(sb, "; %d txq (TOE/ETHOFLD)", vi->nofldtxq);
1689                 break;
1690         case IFCAP_TXRTLMT:
1691                 sbuf_printf(sb, "; %d txq (ETHOFLD)", vi->nofldtxq);
1692                 break;
1693         }
1694 #endif
1695 #ifdef TCP_OFFLOAD
1696         if (ifp->if_capabilities & IFCAP_TOE)
1697                 sbuf_printf(sb, ", %d rxq (TOE)", vi->nofldrxq);
1698 #endif
1699 #ifdef DEV_NETMAP
1700         if (ifp->if_capabilities & IFCAP_NETMAP)
1701                 sbuf_printf(sb, "; %d txq, %d rxq (netmap)",
1702                     vi->nnmtxq, vi->nnmrxq);
1703 #endif
1704         sbuf_finish(sb);
1705         device_printf(dev, "%s\n", sbuf_data(sb));
1706         sbuf_delete(sb);
1707
1708         vi_sysctls(vi);
1709
1710         return (0);
1711 }
1712
1713 static int
1714 cxgbe_attach(device_t dev)
1715 {
1716         struct port_info *pi = device_get_softc(dev);
1717         struct adapter *sc = pi->adapter;
1718         struct vi_info *vi;
1719         int i, rc;
1720
1721         callout_init_mtx(&pi->tick, &pi->pi_lock, 0);
1722
1723         rc = cxgbe_vi_attach(dev, &pi->vi[0]);
1724         if (rc)
1725                 return (rc);
1726
1727         for_each_vi(pi, i, vi) {
1728                 if (i == 0)
1729                         continue;
1730                 vi->dev = device_add_child(dev, sc->names->vi_ifnet_name, -1);
1731                 if (vi->dev == NULL) {
1732                         device_printf(dev, "failed to add VI %d\n", i);
1733                         continue;
1734                 }
1735                 device_set_softc(vi->dev, vi);
1736         }
1737
1738         cxgbe_sysctls(pi);
1739
1740         bus_generic_attach(dev);
1741
1742         return (0);
1743 }
1744
1745 static void
1746 cxgbe_vi_detach(struct vi_info *vi)
1747 {
1748         struct ifnet *ifp = vi->ifp;
1749
1750         ether_ifdetach(ifp);
1751
1752         /* Let detach proceed even if these fail. */
1753 #ifdef DEV_NETMAP
1754         if (ifp->if_capabilities & IFCAP_NETMAP)
1755                 cxgbe_nm_detach(vi);
1756 #endif
1757         cxgbe_uninit_synchronized(vi);
1758         callout_drain(&vi->tick);
1759         vi_full_uninit(vi);
1760
1761         if_free(vi->ifp);
1762         vi->ifp = NULL;
1763 }
1764
1765 static int
1766 cxgbe_detach(device_t dev)
1767 {
1768         struct port_info *pi = device_get_softc(dev);
1769         struct adapter *sc = pi->adapter;
1770         int rc;
1771
1772         /* Detach the extra VIs first. */
1773         rc = bus_generic_detach(dev);
1774         if (rc)
1775                 return (rc);
1776         device_delete_children(dev);
1777
1778         doom_vi(sc, &pi->vi[0]);
1779
1780         if (pi->flags & HAS_TRACEQ) {
1781                 sc->traceq = -1;        /* cloner should not create ifnet */
1782                 t4_tracer_port_detach(sc);
1783         }
1784
1785         cxgbe_vi_detach(&pi->vi[0]);
1786         callout_drain(&pi->tick);
1787         ifmedia_removeall(&pi->media);
1788
1789         end_synchronized_op(sc, 0);
1790
1791         return (0);
1792 }
1793
1794 static void
1795 cxgbe_init(void *arg)
1796 {
1797         struct vi_info *vi = arg;
1798         struct adapter *sc = vi->pi->adapter;
1799
1800         if (begin_synchronized_op(sc, vi, SLEEP_OK | INTR_OK, "t4init") != 0)
1801                 return;
1802         cxgbe_init_synchronized(vi);
1803         end_synchronized_op(sc, 0);
1804 }
1805
1806 static int
1807 cxgbe_ioctl(struct ifnet *ifp, unsigned long cmd, caddr_t data)
1808 {
1809         int rc = 0, mtu, flags;
1810         struct vi_info *vi = ifp->if_softc;
1811         struct port_info *pi = vi->pi;
1812         struct adapter *sc = pi->adapter;
1813         struct ifreq *ifr = (struct ifreq *)data;
1814         uint32_t mask;
1815
1816         switch (cmd) {
1817         case SIOCSIFMTU:
1818                 mtu = ifr->ifr_mtu;
1819                 if (mtu < ETHERMIN || mtu > MAX_MTU)
1820                         return (EINVAL);
1821
1822                 rc = begin_synchronized_op(sc, vi, SLEEP_OK | INTR_OK, "t4mtu");
1823                 if (rc)
1824                         return (rc);
1825                 ifp->if_mtu = mtu;
1826                 if (vi->flags & VI_INIT_DONE) {
1827                         t4_update_fl_bufsize(ifp);
1828                         if (ifp->if_drv_flags & IFF_DRV_RUNNING)
1829                                 rc = update_mac_settings(ifp, XGMAC_MTU);
1830                 }
1831                 end_synchronized_op(sc, 0);
1832                 break;
1833
1834         case SIOCSIFFLAGS:
1835                 rc = begin_synchronized_op(sc, vi, SLEEP_OK | INTR_OK, "t4flg");
1836                 if (rc)
1837                         return (rc);
1838
1839                 if (ifp->if_flags & IFF_UP) {
1840                         if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1841                                 flags = vi->if_flags;
1842                                 if ((ifp->if_flags ^ flags) &
1843                                     (IFF_PROMISC | IFF_ALLMULTI)) {
1844                                         rc = update_mac_settings(ifp,
1845                                             XGMAC_PROMISC | XGMAC_ALLMULTI);
1846                                 }
1847                         } else {
1848                                 rc = cxgbe_init_synchronized(vi);
1849                         }
1850                         vi->if_flags = ifp->if_flags;
1851                 } else if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1852                         rc = cxgbe_uninit_synchronized(vi);
1853                 }
1854                 end_synchronized_op(sc, 0);
1855                 break;
1856
1857         case SIOCADDMULTI:
1858         case SIOCDELMULTI:
1859                 rc = begin_synchronized_op(sc, vi, SLEEP_OK | INTR_OK, "t4multi");
1860                 if (rc)
1861                         return (rc);
1862                 if (ifp->if_drv_flags & IFF_DRV_RUNNING)
1863                         rc = update_mac_settings(ifp, XGMAC_MCADDRS);
1864                 end_synchronized_op(sc, 0);
1865                 break;
1866
1867         case SIOCSIFCAP:
1868                 rc = begin_synchronized_op(sc, vi, SLEEP_OK | INTR_OK, "t4cap");
1869                 if (rc)
1870                         return (rc);
1871
1872                 mask = ifr->ifr_reqcap ^ ifp->if_capenable;
1873                 if (mask & IFCAP_TXCSUM) {
1874                         ifp->if_capenable ^= IFCAP_TXCSUM;
1875                         ifp->if_hwassist ^= (CSUM_TCP | CSUM_UDP | CSUM_IP);
1876
1877                         if (IFCAP_TSO4 & ifp->if_capenable &&
1878                             !(IFCAP_TXCSUM & ifp->if_capenable)) {
1879                                 ifp->if_capenable &= ~IFCAP_TSO4;
1880                                 if_printf(ifp,
1881                                     "tso4 disabled due to -txcsum.\n");
1882                         }
1883                 }
1884                 if (mask & IFCAP_TXCSUM_IPV6) {
1885                         ifp->if_capenable ^= IFCAP_TXCSUM_IPV6;
1886                         ifp->if_hwassist ^= (CSUM_UDP_IPV6 | CSUM_TCP_IPV6);
1887
1888                         if (IFCAP_TSO6 & ifp->if_capenable &&
1889                             !(IFCAP_TXCSUM_IPV6 & ifp->if_capenable)) {
1890                                 ifp->if_capenable &= ~IFCAP_TSO6;
1891                                 if_printf(ifp,
1892                                     "tso6 disabled due to -txcsum6.\n");
1893                         }
1894                 }
1895                 if (mask & IFCAP_RXCSUM)
1896                         ifp->if_capenable ^= IFCAP_RXCSUM;
1897                 if (mask & IFCAP_RXCSUM_IPV6)
1898                         ifp->if_capenable ^= IFCAP_RXCSUM_IPV6;
1899
1900                 /*
1901                  * Note that we leave CSUM_TSO alone (it is always set).  The
1902                  * kernel takes both IFCAP_TSOx and CSUM_TSO into account before
1903                  * sending a TSO request our way, so it's sufficient to toggle
1904                  * IFCAP_TSOx only.
1905                  */
1906                 if (mask & IFCAP_TSO4) {
1907                         if (!(IFCAP_TSO4 & ifp->if_capenable) &&
1908                             !(IFCAP_TXCSUM & ifp->if_capenable)) {
1909                                 if_printf(ifp, "enable txcsum first.\n");
1910                                 rc = EAGAIN;
1911                                 goto fail;
1912                         }
1913                         ifp->if_capenable ^= IFCAP_TSO4;
1914                 }
1915                 if (mask & IFCAP_TSO6) {
1916                         if (!(IFCAP_TSO6 & ifp->if_capenable) &&
1917                             !(IFCAP_TXCSUM_IPV6 & ifp->if_capenable)) {
1918                                 if_printf(ifp, "enable txcsum6 first.\n");
1919                                 rc = EAGAIN;
1920                                 goto fail;
1921                         }
1922                         ifp->if_capenable ^= IFCAP_TSO6;
1923                 }
1924                 if (mask & IFCAP_LRO) {
1925 #if defined(INET) || defined(INET6)
1926                         int i;
1927                         struct sge_rxq *rxq;
1928
1929                         ifp->if_capenable ^= IFCAP_LRO;
1930                         for_each_rxq(vi, i, rxq) {
1931                                 if (ifp->if_capenable & IFCAP_LRO)
1932                                         rxq->iq.flags |= IQ_LRO_ENABLED;
1933                                 else
1934                                         rxq->iq.flags &= ~IQ_LRO_ENABLED;
1935                         }
1936 #endif
1937                 }
1938 #ifdef TCP_OFFLOAD
1939                 if (mask & IFCAP_TOE) {
1940                         int enable = (ifp->if_capenable ^ mask) & IFCAP_TOE;
1941
1942                         rc = toe_capability(vi, enable);
1943                         if (rc != 0)
1944                                 goto fail;
1945
1946                         ifp->if_capenable ^= mask;
1947                 }
1948 #endif
1949                 if (mask & IFCAP_VLAN_HWTAGGING) {
1950                         ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING;
1951                         if (ifp->if_drv_flags & IFF_DRV_RUNNING)
1952                                 rc = update_mac_settings(ifp, XGMAC_VLANEX);
1953                 }
1954                 if (mask & IFCAP_VLAN_MTU) {
1955                         ifp->if_capenable ^= IFCAP_VLAN_MTU;
1956
1957                         /* Need to find out how to disable auto-mtu-inflation */
1958                 }
1959                 if (mask & IFCAP_VLAN_HWTSO)
1960                         ifp->if_capenable ^= IFCAP_VLAN_HWTSO;
1961                 if (mask & IFCAP_VLAN_HWCSUM)
1962                         ifp->if_capenable ^= IFCAP_VLAN_HWCSUM;
1963 #ifdef RATELIMIT
1964                 if (mask & IFCAP_TXRTLMT)
1965                         ifp->if_capenable ^= IFCAP_TXRTLMT;
1966 #endif
1967                 if (mask & IFCAP_HWRXTSTMP) {
1968                         int i;
1969                         struct sge_rxq *rxq;
1970
1971                         ifp->if_capenable ^= IFCAP_HWRXTSTMP;
1972                         for_each_rxq(vi, i, rxq) {
1973                                 if (ifp->if_capenable & IFCAP_HWRXTSTMP)
1974                                         rxq->iq.flags |= IQ_RX_TIMESTAMP;
1975                                 else
1976                                         rxq->iq.flags &= ~IQ_RX_TIMESTAMP;
1977                         }
1978                 }
1979
1980 #ifdef VLAN_CAPABILITIES
1981                 VLAN_CAPABILITIES(ifp);
1982 #endif
1983 fail:
1984                 end_synchronized_op(sc, 0);
1985                 break;
1986
1987         case SIOCSIFMEDIA:
1988         case SIOCGIFMEDIA:
1989         case SIOCGIFXMEDIA:
1990                 ifmedia_ioctl(ifp, ifr, &pi->media, cmd);
1991                 break;
1992
1993         case SIOCGI2C: {
1994                 struct ifi2creq i2c;
1995
1996                 rc = copyin(ifr_data_get_ptr(ifr), &i2c, sizeof(i2c));
1997                 if (rc != 0)
1998                         break;
1999                 if (i2c.dev_addr != 0xA0 && i2c.dev_addr != 0xA2) {
2000                         rc = EPERM;
2001                         break;
2002                 }
2003                 if (i2c.len > sizeof(i2c.data)) {
2004                         rc = EINVAL;
2005                         break;
2006                 }
2007                 rc = begin_synchronized_op(sc, vi, SLEEP_OK | INTR_OK, "t4i2c");
2008                 if (rc)
2009                         return (rc);
2010                 rc = -t4_i2c_rd(sc, sc->mbox, pi->port_id, i2c.dev_addr,
2011                     i2c.offset, i2c.len, &i2c.data[0]);
2012                 end_synchronized_op(sc, 0);
2013                 if (rc == 0)
2014                         rc = copyout(&i2c, ifr_data_get_ptr(ifr), sizeof(i2c));
2015                 break;
2016         }
2017
2018         default:
2019                 rc = ether_ioctl(ifp, cmd, data);
2020         }
2021
2022         return (rc);
2023 }
2024
2025 static int
2026 cxgbe_transmit(struct ifnet *ifp, struct mbuf *m)
2027 {
2028         struct vi_info *vi = ifp->if_softc;
2029         struct port_info *pi = vi->pi;
2030         struct adapter *sc = pi->adapter;
2031         struct sge_txq *txq;
2032         void *items[1];
2033         int rc;
2034
2035         M_ASSERTPKTHDR(m);
2036         MPASS(m->m_nextpkt == NULL);    /* not quite ready for this yet */
2037
2038         if (__predict_false(pi->link_cfg.link_ok == false)) {
2039                 m_freem(m);
2040                 return (ENETDOWN);
2041         }
2042
2043         rc = parse_pkt(sc, &m);
2044         if (__predict_false(rc != 0)) {
2045                 MPASS(m == NULL);                       /* was freed already */
2046                 atomic_add_int(&pi->tx_parse_error, 1); /* rare, atomic is ok */
2047                 return (rc);
2048         }
2049 #ifdef RATELIMIT
2050         if (m->m_pkthdr.snd_tag != NULL) {
2051                 /* EAGAIN tells the stack we are not the correct interface. */
2052                 if (__predict_false(ifp != m->m_pkthdr.snd_tag->ifp)) {
2053                         m_freem(m);
2054                         return (EAGAIN);
2055                 }
2056
2057                 return (ethofld_transmit(ifp, m));
2058         }
2059 #endif
2060
2061         /* Select a txq. */
2062         txq = &sc->sge.txq[vi->first_txq];
2063         if (M_HASHTYPE_GET(m) != M_HASHTYPE_NONE)
2064                 txq += ((m->m_pkthdr.flowid % (vi->ntxq - vi->rsrv_noflowq)) +
2065                     vi->rsrv_noflowq);
2066
2067         items[0] = m;
2068         rc = mp_ring_enqueue(txq->r, items, 1, 4096);
2069         if (__predict_false(rc != 0))
2070                 m_freem(m);
2071
2072         return (rc);
2073 }
2074
2075 static void
2076 cxgbe_qflush(struct ifnet *ifp)
2077 {
2078         struct vi_info *vi = ifp->if_softc;
2079         struct sge_txq *txq;
2080         int i;
2081
2082         /* queues do not exist if !VI_INIT_DONE. */
2083         if (vi->flags & VI_INIT_DONE) {
2084                 for_each_txq(vi, i, txq) {
2085                         TXQ_LOCK(txq);
2086                         txq->eq.flags |= EQ_QFLUSH;
2087                         TXQ_UNLOCK(txq);
2088                         while (!mp_ring_is_idle(txq->r)) {
2089                                 mp_ring_check_drainage(txq->r, 0);
2090                                 pause("qflush", 1);
2091                         }
2092                         TXQ_LOCK(txq);
2093                         txq->eq.flags &= ~EQ_QFLUSH;
2094                         TXQ_UNLOCK(txq);
2095                 }
2096         }
2097         if_qflush(ifp);
2098 }
2099
2100 static uint64_t
2101 vi_get_counter(struct ifnet *ifp, ift_counter c)
2102 {
2103         struct vi_info *vi = ifp->if_softc;
2104         struct fw_vi_stats_vf *s = &vi->stats;
2105
2106         vi_refresh_stats(vi->pi->adapter, vi);
2107
2108         switch (c) {
2109         case IFCOUNTER_IPACKETS:
2110                 return (s->rx_bcast_frames + s->rx_mcast_frames +
2111                     s->rx_ucast_frames);
2112         case IFCOUNTER_IERRORS:
2113                 return (s->rx_err_frames);
2114         case IFCOUNTER_OPACKETS:
2115                 return (s->tx_bcast_frames + s->tx_mcast_frames +
2116                     s->tx_ucast_frames + s->tx_offload_frames);
2117         case IFCOUNTER_OERRORS:
2118                 return (s->tx_drop_frames);
2119         case IFCOUNTER_IBYTES:
2120                 return (s->rx_bcast_bytes + s->rx_mcast_bytes +
2121                     s->rx_ucast_bytes);
2122         case IFCOUNTER_OBYTES:
2123                 return (s->tx_bcast_bytes + s->tx_mcast_bytes +
2124                     s->tx_ucast_bytes + s->tx_offload_bytes);
2125         case IFCOUNTER_IMCASTS:
2126                 return (s->rx_mcast_frames);
2127         case IFCOUNTER_OMCASTS:
2128                 return (s->tx_mcast_frames);
2129         case IFCOUNTER_OQDROPS: {
2130                 uint64_t drops;
2131
2132                 drops = 0;
2133                 if (vi->flags & VI_INIT_DONE) {
2134                         int i;
2135                         struct sge_txq *txq;
2136
2137                         for_each_txq(vi, i, txq)
2138                                 drops += counter_u64_fetch(txq->r->drops);
2139                 }
2140
2141                 return (drops);
2142
2143         }
2144
2145         default:
2146                 return (if_get_counter_default(ifp, c));
2147         }
2148 }
2149
2150 uint64_t
2151 cxgbe_get_counter(struct ifnet *ifp, ift_counter c)
2152 {
2153         struct vi_info *vi = ifp->if_softc;
2154         struct port_info *pi = vi->pi;
2155         struct adapter *sc = pi->adapter;
2156         struct port_stats *s = &pi->stats;
2157
2158         if (pi->nvi > 1 || sc->flags & IS_VF)
2159                 return (vi_get_counter(ifp, c));
2160
2161         cxgbe_refresh_stats(sc, pi);
2162
2163         switch (c) {
2164         case IFCOUNTER_IPACKETS:
2165                 return (s->rx_frames);
2166
2167         case IFCOUNTER_IERRORS:
2168                 return (s->rx_jabber + s->rx_runt + s->rx_too_long +
2169                     s->rx_fcs_err + s->rx_len_err);
2170
2171         case IFCOUNTER_OPACKETS:
2172                 return (s->tx_frames);
2173
2174         case IFCOUNTER_OERRORS:
2175                 return (s->tx_error_frames);
2176
2177         case IFCOUNTER_IBYTES:
2178                 return (s->rx_octets);
2179
2180         case IFCOUNTER_OBYTES:
2181                 return (s->tx_octets);
2182
2183         case IFCOUNTER_IMCASTS:
2184                 return (s->rx_mcast_frames);
2185
2186         case IFCOUNTER_OMCASTS:
2187                 return (s->tx_mcast_frames);
2188
2189         case IFCOUNTER_IQDROPS:
2190                 return (s->rx_ovflow0 + s->rx_ovflow1 + s->rx_ovflow2 +
2191                     s->rx_ovflow3 + s->rx_trunc0 + s->rx_trunc1 + s->rx_trunc2 +
2192                     s->rx_trunc3 + pi->tnl_cong_drops);
2193
2194         case IFCOUNTER_OQDROPS: {
2195                 uint64_t drops;
2196
2197                 drops = s->tx_drop;
2198                 if (vi->flags & VI_INIT_DONE) {
2199                         int i;
2200                         struct sge_txq *txq;
2201
2202                         for_each_txq(vi, i, txq)
2203                                 drops += counter_u64_fetch(txq->r->drops);
2204                 }
2205
2206                 return (drops);
2207
2208         }
2209
2210         default:
2211                 return (if_get_counter_default(ifp, c));
2212         }
2213 }
2214
2215 /*
2216  * The kernel picks a media from the list we had provided but we still validate
2217  * the requeste.
2218  */
2219 int
2220 cxgbe_media_change(struct ifnet *ifp)
2221 {
2222         struct vi_info *vi = ifp->if_softc;
2223         struct port_info *pi = vi->pi;
2224         struct ifmedia *ifm = &pi->media;
2225         struct link_config *lc = &pi->link_cfg;
2226         struct adapter *sc = pi->adapter;
2227         int rc;
2228
2229         rc = begin_synchronized_op(sc, NULL, SLEEP_OK | INTR_OK, "t4mec");
2230         if (rc != 0)
2231                 return (rc);
2232         PORT_LOCK(pi);
2233         if (IFM_SUBTYPE(ifm->ifm_media) == IFM_AUTO) {
2234                 /* ifconfig .. media autoselect */
2235                 if (!(lc->supported & FW_PORT_CAP32_ANEG)) {
2236                         rc = ENOTSUP; /* AN not supported by transceiver */
2237                         goto done;
2238                 }
2239                 lc->requested_aneg = AUTONEG_ENABLE;
2240                 lc->requested_speed = 0;
2241                 lc->requested_fc |= PAUSE_AUTONEG;
2242         } else {
2243                 lc->requested_aneg = AUTONEG_DISABLE;
2244                 lc->requested_speed =
2245                     ifmedia_baudrate(ifm->ifm_media) / 1000000;
2246                 lc->requested_fc = 0;
2247                 if (IFM_OPTIONS(ifm->ifm_media) & IFM_ETH_RXPAUSE)
2248                         lc->requested_fc |= PAUSE_RX;
2249                 if (IFM_OPTIONS(ifm->ifm_media) & IFM_ETH_TXPAUSE)
2250                         lc->requested_fc |= PAUSE_TX;
2251         }
2252         if (pi->up_vis > 0) {
2253                 fixup_link_config(pi);
2254                 rc = apply_link_config(pi);
2255         }
2256 done:
2257         PORT_UNLOCK(pi);
2258         end_synchronized_op(sc, 0);
2259         return (rc);
2260 }
2261
2262 /*
2263  * Base media word (without ETHER, pause, link active, etc.) for the port at the
2264  * given speed.
2265  */
2266 static int
2267 port_mword(struct port_info *pi, uint32_t speed)
2268 {
2269
2270         MPASS(speed & M_FW_PORT_CAP32_SPEED);
2271         MPASS(powerof2(speed));
2272
2273         switch(pi->port_type) {
2274         case FW_PORT_TYPE_BT_SGMII:
2275         case FW_PORT_TYPE_BT_XFI:
2276         case FW_PORT_TYPE_BT_XAUI:
2277                 /* BaseT */
2278                 switch (speed) {
2279                 case FW_PORT_CAP32_SPEED_100M:
2280                         return (IFM_100_T);
2281                 case FW_PORT_CAP32_SPEED_1G:
2282                         return (IFM_1000_T);
2283                 case FW_PORT_CAP32_SPEED_10G:
2284                         return (IFM_10G_T);
2285                 }
2286                 break;
2287         case FW_PORT_TYPE_KX4:
2288                 if (speed == FW_PORT_CAP32_SPEED_10G)
2289                         return (IFM_10G_KX4);
2290                 break;
2291         case FW_PORT_TYPE_CX4:
2292                 if (speed == FW_PORT_CAP32_SPEED_10G)
2293                         return (IFM_10G_CX4);
2294                 break;
2295         case FW_PORT_TYPE_KX:
2296                 if (speed == FW_PORT_CAP32_SPEED_1G)
2297                         return (IFM_1000_KX);
2298                 break;
2299         case FW_PORT_TYPE_KR:
2300         case FW_PORT_TYPE_BP_AP:
2301         case FW_PORT_TYPE_BP4_AP:
2302         case FW_PORT_TYPE_BP40_BA:
2303         case FW_PORT_TYPE_KR4_100G:
2304         case FW_PORT_TYPE_KR_SFP28:
2305         case FW_PORT_TYPE_KR_XLAUI:
2306                 switch (speed) {
2307                 case FW_PORT_CAP32_SPEED_1G:
2308                         return (IFM_1000_KX);
2309                 case FW_PORT_CAP32_SPEED_10G:
2310                         return (IFM_10G_KR);
2311                 case FW_PORT_CAP32_SPEED_25G:
2312                         return (IFM_25G_KR);
2313                 case FW_PORT_CAP32_SPEED_40G:
2314                         return (IFM_40G_KR4);
2315                 case FW_PORT_CAP32_SPEED_50G:
2316                         return (IFM_50G_KR2);
2317                 case FW_PORT_CAP32_SPEED_100G:
2318                         return (IFM_100G_KR4);
2319                 }
2320                 break;
2321         case FW_PORT_TYPE_FIBER_XFI:
2322         case FW_PORT_TYPE_FIBER_XAUI:
2323         case FW_PORT_TYPE_SFP:
2324         case FW_PORT_TYPE_QSFP_10G:
2325         case FW_PORT_TYPE_QSA:
2326         case FW_PORT_TYPE_QSFP:
2327         case FW_PORT_TYPE_CR4_QSFP:
2328         case FW_PORT_TYPE_CR_QSFP:
2329         case FW_PORT_TYPE_CR2_QSFP:
2330         case FW_PORT_TYPE_SFP28:
2331                 /* Pluggable transceiver */
2332                 switch (pi->mod_type) {
2333                 case FW_PORT_MOD_TYPE_LR:
2334                         switch (speed) {
2335                         case FW_PORT_CAP32_SPEED_1G:
2336                                 return (IFM_1000_LX);
2337                         case FW_PORT_CAP32_SPEED_10G:
2338                                 return (IFM_10G_LR);
2339                         case FW_PORT_CAP32_SPEED_25G:
2340                                 return (IFM_25G_LR);
2341                         case FW_PORT_CAP32_SPEED_40G:
2342                                 return (IFM_40G_LR4);
2343                         case FW_PORT_CAP32_SPEED_50G:
2344                                 return (IFM_50G_LR2);
2345                         case FW_PORT_CAP32_SPEED_100G:
2346                                 return (IFM_100G_LR4);
2347                         }
2348                         break;
2349                 case FW_PORT_MOD_TYPE_SR:
2350                         switch (speed) {
2351                         case FW_PORT_CAP32_SPEED_1G:
2352                                 return (IFM_1000_SX);
2353                         case FW_PORT_CAP32_SPEED_10G:
2354                                 return (IFM_10G_SR);
2355                         case FW_PORT_CAP32_SPEED_25G:
2356                                 return (IFM_25G_SR);
2357                         case FW_PORT_CAP32_SPEED_40G:
2358                                 return (IFM_40G_SR4);
2359                         case FW_PORT_CAP32_SPEED_50G:
2360                                 return (IFM_50G_SR2);
2361                         case FW_PORT_CAP32_SPEED_100G:
2362                                 return (IFM_100G_SR4);
2363                         }
2364                         break;
2365                 case FW_PORT_MOD_TYPE_ER:
2366                         if (speed == FW_PORT_CAP32_SPEED_10G)
2367                                 return (IFM_10G_ER);
2368                         break;
2369                 case FW_PORT_MOD_TYPE_TWINAX_PASSIVE:
2370                 case FW_PORT_MOD_TYPE_TWINAX_ACTIVE:
2371                         switch (speed) {
2372                         case FW_PORT_CAP32_SPEED_1G:
2373                                 return (IFM_1000_CX);
2374                         case FW_PORT_CAP32_SPEED_10G:
2375                                 return (IFM_10G_TWINAX);
2376                         case FW_PORT_CAP32_SPEED_25G:
2377                                 return (IFM_25G_CR);
2378                         case FW_PORT_CAP32_SPEED_40G:
2379                                 return (IFM_40G_CR4);
2380                         case FW_PORT_CAP32_SPEED_50G:
2381                                 return (IFM_50G_CR2);
2382                         case FW_PORT_CAP32_SPEED_100G:
2383                                 return (IFM_100G_CR4);
2384                         }
2385                         break;
2386                 case FW_PORT_MOD_TYPE_LRM:
2387                         if (speed == FW_PORT_CAP32_SPEED_10G)
2388                                 return (IFM_10G_LRM);
2389                         break;
2390                 case FW_PORT_MOD_TYPE_NA:
2391                         MPASS(0);       /* Not pluggable? */
2392                         /* fall throough */
2393                 case FW_PORT_MOD_TYPE_ERROR:
2394                 case FW_PORT_MOD_TYPE_UNKNOWN:
2395                 case FW_PORT_MOD_TYPE_NOTSUPPORTED:
2396                         break;
2397                 case FW_PORT_MOD_TYPE_NONE:
2398                         return (IFM_NONE);
2399                 }
2400                 break;
2401         case FW_PORT_TYPE_NONE:
2402                 return (IFM_NONE);
2403         }
2404
2405         return (IFM_UNKNOWN);
2406 }
2407
2408 void
2409 cxgbe_media_status(struct ifnet *ifp, struct ifmediareq *ifmr)
2410 {
2411         struct vi_info *vi = ifp->if_softc;
2412         struct port_info *pi = vi->pi;
2413         struct adapter *sc = pi->adapter;
2414         struct link_config *lc = &pi->link_cfg;
2415
2416         if (begin_synchronized_op(sc, NULL, SLEEP_OK | INTR_OK, "t4med") != 0)
2417                 return;
2418         PORT_LOCK(pi);
2419
2420         if (pi->up_vis == 0) {
2421                 /*
2422                  * If all the interfaces are administratively down the firmware
2423                  * does not report transceiver changes.  Refresh port info here
2424                  * so that ifconfig displays accurate ifmedia at all times.
2425                  * This is the only reason we have a synchronized op in this
2426                  * function.  Just PORT_LOCK would have been enough otherwise.
2427                  */
2428                 t4_update_port_info(pi);
2429                 build_medialist(pi);
2430         }
2431
2432         /* ifm_status */
2433         ifmr->ifm_status = IFM_AVALID;
2434         if (lc->link_ok == false)
2435                 goto done;
2436         ifmr->ifm_status |= IFM_ACTIVE;
2437
2438         /* ifm_active */
2439         ifmr->ifm_active = IFM_ETHER | IFM_FDX;
2440         ifmr->ifm_active &= ~(IFM_ETH_TXPAUSE | IFM_ETH_RXPAUSE);
2441         if (lc->fc & PAUSE_RX)
2442                 ifmr->ifm_active |= IFM_ETH_RXPAUSE;
2443         if (lc->fc & PAUSE_TX)
2444                 ifmr->ifm_active |= IFM_ETH_TXPAUSE;
2445         ifmr->ifm_active |= port_mword(pi, speed_to_fwcap(lc->speed));
2446 done:
2447         PORT_UNLOCK(pi);
2448         end_synchronized_op(sc, 0);
2449 }
2450
2451 static int
2452 vcxgbe_probe(device_t dev)
2453 {
2454         char buf[128];
2455         struct vi_info *vi = device_get_softc(dev);
2456
2457         snprintf(buf, sizeof(buf), "port %d vi %td", vi->pi->port_id,
2458             vi - vi->pi->vi);
2459         device_set_desc_copy(dev, buf);
2460
2461         return (BUS_PROBE_DEFAULT);
2462 }
2463
2464 static int
2465 alloc_extra_vi(struct adapter *sc, struct port_info *pi, struct vi_info *vi)
2466 {
2467         int func, index, rc;
2468         uint32_t param, val;
2469
2470         ASSERT_SYNCHRONIZED_OP(sc);
2471
2472         index = vi - pi->vi;
2473         MPASS(index > 0);       /* This function deals with _extra_ VIs only */
2474         KASSERT(index < nitems(vi_mac_funcs),
2475             ("%s: VI %s doesn't have a MAC func", __func__,
2476             device_get_nameunit(vi->dev)));
2477         func = vi_mac_funcs[index];
2478         rc = t4_alloc_vi_func(sc, sc->mbox, pi->tx_chan, sc->pf, 0, 1,
2479             vi->hw_addr, &vi->rss_size, func, 0);
2480         if (rc < 0) {
2481                 device_printf(vi->dev, "failed to allocate virtual interface %d"
2482                     "for port %d: %d\n", index, pi->port_id, -rc);
2483                 return (-rc);
2484         }
2485         vi->viid = rc;
2486         if (chip_id(sc) <= CHELSIO_T5)
2487                 vi->smt_idx = (rc & 0x7f) << 1;
2488         else
2489                 vi->smt_idx = (rc & 0x7f);
2490
2491         if (vi->rss_size == 1) {
2492                 /*
2493                  * This VI didn't get a slice of the RSS table.  Reduce the
2494                  * number of VIs being created (hw.cxgbe.num_vis) or modify the
2495                  * configuration file (nvi, rssnvi for this PF) if this is a
2496                  * problem.
2497                  */
2498                 device_printf(vi->dev, "RSS table not available.\n");
2499                 vi->rss_base = 0xffff;
2500
2501                 return (0);
2502         }
2503
2504         param = V_FW_PARAMS_MNEM(FW_PARAMS_MNEM_DEV) |
2505             V_FW_PARAMS_PARAM_X(FW_PARAMS_PARAM_DEV_RSSINFO) |
2506             V_FW_PARAMS_PARAM_YZ(vi->viid);
2507         rc = t4_query_params(sc, sc->mbox, sc->pf, 0, 1, &param, &val);
2508         if (rc)
2509                 vi->rss_base = 0xffff;
2510         else {
2511                 MPASS((val >> 16) == vi->rss_size);
2512                 vi->rss_base = val & 0xffff;
2513         }
2514
2515         return (0);
2516 }
2517
2518 static int
2519 vcxgbe_attach(device_t dev)
2520 {
2521         struct vi_info *vi;
2522         struct port_info *pi;
2523         struct adapter *sc;
2524         int rc;
2525
2526         vi = device_get_softc(dev);
2527         pi = vi->pi;
2528         sc = pi->adapter;
2529
2530         rc = begin_synchronized_op(sc, vi, SLEEP_OK | INTR_OK, "t4via");
2531         if (rc)
2532                 return (rc);
2533         rc = alloc_extra_vi(sc, pi, vi);
2534         end_synchronized_op(sc, 0);
2535         if (rc)
2536                 return (rc);
2537
2538         rc = cxgbe_vi_attach(dev, vi);
2539         if (rc) {
2540                 t4_free_vi(sc, sc->mbox, sc->pf, 0, vi->viid);
2541                 return (rc);
2542         }
2543         return (0);
2544 }
2545
2546 static int
2547 vcxgbe_detach(device_t dev)
2548 {
2549         struct vi_info *vi;
2550         struct adapter *sc;
2551
2552         vi = device_get_softc(dev);
2553         sc = vi->pi->adapter;
2554
2555         doom_vi(sc, vi);
2556
2557         cxgbe_vi_detach(vi);
2558         t4_free_vi(sc, sc->mbox, sc->pf, 0, vi->viid);
2559
2560         end_synchronized_op(sc, 0);
2561
2562         return (0);
2563 }
2564
2565 static struct callout fatal_callout;
2566
2567 static void
2568 delayed_panic(void *arg)
2569 {
2570         struct adapter *sc = arg;
2571
2572         panic("%s: panic on fatal error", device_get_nameunit(sc->dev));
2573 }
2574
2575 void
2576 t4_fatal_err(struct adapter *sc, bool fw_error)
2577 {
2578
2579         t4_shutdown_adapter(sc);
2580         log(LOG_ALERT, "%s: encountered fatal error, adapter stopped.\n",
2581             device_get_nameunit(sc->dev));
2582         if (fw_error) {
2583                 ASSERT_SYNCHRONIZED_OP(sc);
2584                 sc->flags |= ADAP_ERR;
2585         } else {
2586                 ADAPTER_LOCK(sc);
2587                 sc->flags |= ADAP_ERR;
2588                 ADAPTER_UNLOCK(sc);
2589         }
2590
2591         if (t4_panic_on_fatal_err) {
2592                 log(LOG_ALERT, "%s: panic on fatal error after 30s",
2593                     device_get_nameunit(sc->dev));
2594                 callout_reset(&fatal_callout, hz * 30, delayed_panic, sc);
2595         }
2596 }
2597
2598 void
2599 t4_add_adapter(struct adapter *sc)
2600 {
2601         sx_xlock(&t4_list_lock);
2602         SLIST_INSERT_HEAD(&t4_list, sc, link);
2603         sx_xunlock(&t4_list_lock);
2604 }
2605
2606 int
2607 t4_map_bars_0_and_4(struct adapter *sc)
2608 {
2609         sc->regs_rid = PCIR_BAR(0);
2610         sc->regs_res = bus_alloc_resource_any(sc->dev, SYS_RES_MEMORY,
2611             &sc->regs_rid, RF_ACTIVE);
2612         if (sc->regs_res == NULL) {
2613                 device_printf(sc->dev, "cannot map registers.\n");
2614                 return (ENXIO);
2615         }
2616         sc->bt = rman_get_bustag(sc->regs_res);
2617         sc->bh = rman_get_bushandle(sc->regs_res);
2618         sc->mmio_len = rman_get_size(sc->regs_res);
2619         setbit(&sc->doorbells, DOORBELL_KDB);
2620
2621         sc->msix_rid = PCIR_BAR(4);
2622         sc->msix_res = bus_alloc_resource_any(sc->dev, SYS_RES_MEMORY,
2623             &sc->msix_rid, RF_ACTIVE);
2624         if (sc->msix_res == NULL) {
2625                 device_printf(sc->dev, "cannot map MSI-X BAR.\n");
2626                 return (ENXIO);
2627         }
2628
2629         return (0);
2630 }
2631
2632 int
2633 t4_map_bar_2(struct adapter *sc)
2634 {
2635
2636         /*
2637          * T4: only iWARP driver uses the userspace doorbells.  There is no need
2638          * to map it if RDMA is disabled.
2639          */
2640         if (is_t4(sc) && sc->rdmacaps == 0)
2641                 return (0);
2642
2643         sc->udbs_rid = PCIR_BAR(2);
2644         sc->udbs_res = bus_alloc_resource_any(sc->dev, SYS_RES_MEMORY,
2645             &sc->udbs_rid, RF_ACTIVE);
2646         if (sc->udbs_res == NULL) {
2647                 device_printf(sc->dev, "cannot map doorbell BAR.\n");
2648                 return (ENXIO);
2649         }
2650         sc->udbs_base = rman_get_virtual(sc->udbs_res);
2651
2652         if (chip_id(sc) >= CHELSIO_T5) {
2653                 setbit(&sc->doorbells, DOORBELL_UDB);
2654 #if defined(__i386__) || defined(__amd64__)
2655                 if (t5_write_combine) {
2656                         int rc, mode;
2657
2658                         /*
2659                          * Enable write combining on BAR2.  This is the
2660                          * userspace doorbell BAR and is split into 128B
2661                          * (UDBS_SEG_SIZE) doorbell regions, each associated
2662                          * with an egress queue.  The first 64B has the doorbell
2663                          * and the second 64B can be used to submit a tx work
2664                          * request with an implicit doorbell.
2665                          */
2666
2667                         rc = pmap_change_attr((vm_offset_t)sc->udbs_base,
2668                             rman_get_size(sc->udbs_res), PAT_WRITE_COMBINING);
2669                         if (rc == 0) {
2670                                 clrbit(&sc->doorbells, DOORBELL_UDB);
2671                                 setbit(&sc->doorbells, DOORBELL_WCWR);
2672                                 setbit(&sc->doorbells, DOORBELL_UDBWC);
2673                         } else {
2674                                 device_printf(sc->dev,
2675                                     "couldn't enable write combining: %d\n",
2676                                     rc);
2677                         }
2678
2679                         mode = is_t5(sc) ? V_STATMODE(0) : V_T6_STATMODE(0);
2680                         t4_write_reg(sc, A_SGE_STAT_CFG,
2681                             V_STATSOURCE_T5(7) | mode);
2682                 }
2683 #endif
2684         }
2685         sc->iwt.wc_en = isset(&sc->doorbells, DOORBELL_UDBWC) ? 1 : 0;
2686
2687         return (0);
2688 }
2689
2690 struct memwin_init {
2691         uint32_t base;
2692         uint32_t aperture;
2693 };
2694
2695 static const struct memwin_init t4_memwin[NUM_MEMWIN] = {
2696         { MEMWIN0_BASE, MEMWIN0_APERTURE },
2697         { MEMWIN1_BASE, MEMWIN1_APERTURE },
2698         { MEMWIN2_BASE_T4, MEMWIN2_APERTURE_T4 }
2699 };
2700
2701 static const struct memwin_init t5_memwin[NUM_MEMWIN] = {
2702         { MEMWIN0_BASE, MEMWIN0_APERTURE },
2703         { MEMWIN1_BASE, MEMWIN1_APERTURE },
2704         { MEMWIN2_BASE_T5, MEMWIN2_APERTURE_T5 },
2705 };
2706
2707 static void
2708 setup_memwin(struct adapter *sc)
2709 {
2710         const struct memwin_init *mw_init;
2711         struct memwin *mw;
2712         int i;
2713         uint32_t bar0;
2714
2715         if (is_t4(sc)) {
2716                 /*
2717                  * Read low 32b of bar0 indirectly via the hardware backdoor
2718                  * mechanism.  Works from within PCI passthrough environments
2719                  * too, where rman_get_start() can return a different value.  We
2720                  * need to program the T4 memory window decoders with the actual
2721                  * addresses that will be coming across the PCIe link.
2722                  */
2723                 bar0 = t4_hw_pci_read_cfg4(sc, PCIR_BAR(0));
2724                 bar0 &= (uint32_t) PCIM_BAR_MEM_BASE;
2725
2726                 mw_init = &t4_memwin[0];
2727         } else {
2728                 /* T5+ use the relative offset inside the PCIe BAR */
2729                 bar0 = 0;
2730
2731                 mw_init = &t5_memwin[0];
2732         }
2733
2734         for (i = 0, mw = &sc->memwin[0]; i < NUM_MEMWIN; i++, mw_init++, mw++) {
2735                 rw_init(&mw->mw_lock, "memory window access");
2736                 mw->mw_base = mw_init->base;
2737                 mw->mw_aperture = mw_init->aperture;
2738                 mw->mw_curpos = 0;
2739                 t4_write_reg(sc,
2740                     PCIE_MEM_ACCESS_REG(A_PCIE_MEM_ACCESS_BASE_WIN, i),
2741                     (mw->mw_base + bar0) | V_BIR(0) |
2742                     V_WINDOW(ilog2(mw->mw_aperture) - 10));
2743                 rw_wlock(&mw->mw_lock);
2744                 position_memwin(sc, i, 0);
2745                 rw_wunlock(&mw->mw_lock);
2746         }
2747
2748         /* flush */
2749         t4_read_reg(sc, PCIE_MEM_ACCESS_REG(A_PCIE_MEM_ACCESS_BASE_WIN, 2));
2750 }
2751
2752 /*
2753  * Positions the memory window at the given address in the card's address space.
2754  * There are some alignment requirements and the actual position may be at an
2755  * address prior to the requested address.  mw->mw_curpos always has the actual
2756  * position of the window.
2757  */
2758 static void
2759 position_memwin(struct adapter *sc, int idx, uint32_t addr)
2760 {
2761         struct memwin *mw;
2762         uint32_t pf;
2763         uint32_t reg;
2764
2765         MPASS(idx >= 0 && idx < NUM_MEMWIN);
2766         mw = &sc->memwin[idx];
2767         rw_assert(&mw->mw_lock, RA_WLOCKED);
2768
2769         if (is_t4(sc)) {
2770                 pf = 0;
2771                 mw->mw_curpos = addr & ~0xf;    /* start must be 16B aligned */
2772         } else {
2773                 pf = V_PFNUM(sc->pf);
2774                 mw->mw_curpos = addr & ~0x7f;   /* start must be 128B aligned */
2775         }
2776         reg = PCIE_MEM_ACCESS_REG(A_PCIE_MEM_ACCESS_OFFSET, idx);
2777         t4_write_reg(sc, reg, mw->mw_curpos | pf);
2778         t4_read_reg(sc, reg);   /* flush */
2779 }
2780
2781 int
2782 rw_via_memwin(struct adapter *sc, int idx, uint32_t addr, uint32_t *val,
2783     int len, int rw)
2784 {
2785         struct memwin *mw;
2786         uint32_t mw_end, v;
2787
2788         MPASS(idx >= 0 && idx < NUM_MEMWIN);
2789
2790         /* Memory can only be accessed in naturally aligned 4 byte units */
2791         if (addr & 3 || len & 3 || len <= 0)
2792                 return (EINVAL);
2793
2794         mw = &sc->memwin[idx];
2795         while (len > 0) {
2796                 rw_rlock(&mw->mw_lock);
2797                 mw_end = mw->mw_curpos + mw->mw_aperture;
2798                 if (addr >= mw_end || addr < mw->mw_curpos) {
2799                         /* Will need to reposition the window */
2800                         if (!rw_try_upgrade(&mw->mw_lock)) {
2801                                 rw_runlock(&mw->mw_lock);
2802                                 rw_wlock(&mw->mw_lock);
2803                         }
2804                         rw_assert(&mw->mw_lock, RA_WLOCKED);
2805                         position_memwin(sc, idx, addr);
2806                         rw_downgrade(&mw->mw_lock);
2807                         mw_end = mw->mw_curpos + mw->mw_aperture;
2808                 }
2809                 rw_assert(&mw->mw_lock, RA_RLOCKED);
2810                 while (addr < mw_end && len > 0) {
2811                         if (rw == 0) {
2812                                 v = t4_read_reg(sc, mw->mw_base + addr -
2813                                     mw->mw_curpos);
2814                                 *val++ = le32toh(v);
2815                         } else {
2816                                 v = *val++;
2817                                 t4_write_reg(sc, mw->mw_base + addr -
2818                                     mw->mw_curpos, htole32(v));
2819                         }
2820                         addr += 4;
2821                         len -= 4;
2822                 }
2823                 rw_runlock(&mw->mw_lock);
2824         }
2825
2826         return (0);
2827 }
2828
2829 int
2830 alloc_atid_tab(struct tid_info *t, int flags)
2831 {
2832         int i;
2833
2834         MPASS(t->natids > 0);
2835         MPASS(t->atid_tab == NULL);
2836
2837         t->atid_tab = malloc(t->natids * sizeof(*t->atid_tab), M_CXGBE,
2838             M_ZERO | flags);
2839         if (t->atid_tab == NULL)
2840                 return (ENOMEM);
2841         mtx_init(&t->atid_lock, "atid lock", NULL, MTX_DEF);
2842         t->afree = t->atid_tab;
2843         t->atids_in_use = 0;
2844         for (i = 1; i < t->natids; i++)
2845                 t->atid_tab[i - 1].next = &t->atid_tab[i];
2846         t->atid_tab[t->natids - 1].next = NULL;
2847
2848         return (0);
2849 }
2850
2851 void
2852 free_atid_tab(struct tid_info *t)
2853 {
2854
2855         KASSERT(t->atids_in_use == 0,
2856             ("%s: %d atids still in use.", __func__, t->atids_in_use));
2857
2858         if (mtx_initialized(&t->atid_lock))
2859                 mtx_destroy(&t->atid_lock);
2860         free(t->atid_tab, M_CXGBE);
2861         t->atid_tab = NULL;
2862 }
2863
2864 int
2865 alloc_atid(struct adapter *sc, void *ctx)
2866 {
2867         struct tid_info *t = &sc->tids;
2868         int atid = -1;
2869
2870         mtx_lock(&t->atid_lock);
2871         if (t->afree) {
2872                 union aopen_entry *p = t->afree;
2873
2874                 atid = p - t->atid_tab;
2875                 MPASS(atid <= M_TID_TID);
2876                 t->afree = p->next;
2877                 p->data = ctx;
2878                 t->atids_in_use++;
2879         }
2880         mtx_unlock(&t->atid_lock);
2881         return (atid);
2882 }
2883
2884 void *
2885 lookup_atid(struct adapter *sc, int atid)
2886 {
2887         struct tid_info *t = &sc->tids;
2888
2889         return (t->atid_tab[atid].data);
2890 }
2891
2892 void
2893 free_atid(struct adapter *sc, int atid)
2894 {
2895         struct tid_info *t = &sc->tids;
2896         union aopen_entry *p = &t->atid_tab[atid];
2897
2898         mtx_lock(&t->atid_lock);
2899         p->next = t->afree;
2900         t->afree = p;
2901         t->atids_in_use--;
2902         mtx_unlock(&t->atid_lock);
2903 }
2904
2905 static void
2906 queue_tid_release(struct adapter *sc, int tid)
2907 {
2908
2909         CXGBE_UNIMPLEMENTED("deferred tid release");
2910 }
2911
2912 void
2913 release_tid(struct adapter *sc, int tid, struct sge_wrq *ctrlq)
2914 {
2915         struct wrqe *wr;
2916         struct cpl_tid_release *req;
2917
2918         wr = alloc_wrqe(sizeof(*req), ctrlq);
2919         if (wr == NULL) {
2920                 queue_tid_release(sc, tid);     /* defer */
2921                 return;
2922         }
2923         req = wrtod(wr);
2924
2925         INIT_TP_WR_MIT_CPL(req, CPL_TID_RELEASE, tid);
2926
2927         t4_wrq_tx(sc, wr);
2928 }
2929
2930 static int
2931 t4_range_cmp(const void *a, const void *b)
2932 {
2933         return ((const struct t4_range *)a)->start -
2934                ((const struct t4_range *)b)->start;
2935 }
2936
2937 /*
2938  * Verify that the memory range specified by the addr/len pair is valid within
2939  * the card's address space.
2940  */
2941 static int
2942 validate_mem_range(struct adapter *sc, uint32_t addr, uint32_t len)
2943 {
2944         struct t4_range mem_ranges[4], *r, *next;
2945         uint32_t em, addr_len;
2946         int i, n, remaining;
2947
2948         /* Memory can only be accessed in naturally aligned 4 byte units */
2949         if (addr & 3 || len & 3 || len == 0)
2950                 return (EINVAL);
2951
2952         /* Enabled memories */
2953         em = t4_read_reg(sc, A_MA_TARGET_MEM_ENABLE);
2954
2955         r = &mem_ranges[0];
2956         n = 0;
2957         bzero(r, sizeof(mem_ranges));
2958         if (em & F_EDRAM0_ENABLE) {
2959                 addr_len = t4_read_reg(sc, A_MA_EDRAM0_BAR);
2960                 r->size = G_EDRAM0_SIZE(addr_len) << 20;
2961                 if (r->size > 0) {
2962                         r->start = G_EDRAM0_BASE(addr_len) << 20;
2963                         if (addr >= r->start &&
2964                             addr + len <= r->start + r->size)
2965                                 return (0);
2966                         r++;
2967                         n++;
2968                 }
2969         }
2970         if (em & F_EDRAM1_ENABLE) {
2971                 addr_len = t4_read_reg(sc, A_MA_EDRAM1_BAR);
2972                 r->size = G_EDRAM1_SIZE(addr_len) << 20;
2973                 if (r->size > 0) {
2974                         r->start = G_EDRAM1_BASE(addr_len) << 20;
2975                         if (addr >= r->start &&
2976                             addr + len <= r->start + r->size)
2977                                 return (0);
2978                         r++;
2979                         n++;
2980                 }
2981         }
2982         if (em & F_EXT_MEM_ENABLE) {
2983                 addr_len = t4_read_reg(sc, A_MA_EXT_MEMORY_BAR);
2984                 r->size = G_EXT_MEM_SIZE(addr_len) << 20;
2985                 if (r->size > 0) {
2986                         r->start = G_EXT_MEM_BASE(addr_len) << 20;
2987                         if (addr >= r->start &&
2988                             addr + len <= r->start + r->size)
2989                                 return (0);
2990                         r++;
2991                         n++;
2992                 }
2993         }
2994         if (is_t5(sc) && em & F_EXT_MEM1_ENABLE) {
2995                 addr_len = t4_read_reg(sc, A_MA_EXT_MEMORY1_BAR);
2996                 r->size = G_EXT_MEM1_SIZE(addr_len) << 20;
2997                 if (r->size > 0) {
2998                         r->start = G_EXT_MEM1_BASE(addr_len) << 20;
2999                         if (addr >= r->start &&
3000                             addr + len <= r->start + r->size)
3001                                 return (0);
3002                         r++;
3003                         n++;
3004                 }
3005         }
3006         MPASS(n <= nitems(mem_ranges));
3007
3008         if (n > 1) {
3009                 /* Sort and merge the ranges. */
3010                 qsort(mem_ranges, n, sizeof(struct t4_range), t4_range_cmp);
3011
3012                 /* Start from index 0 and examine the next n - 1 entries. */
3013                 r = &mem_ranges[0];
3014                 for (remaining = n - 1; remaining > 0; remaining--, r++) {
3015
3016                         MPASS(r->size > 0);     /* r is a valid entry. */
3017                         next = r + 1;
3018                         MPASS(next->size > 0);  /* and so is the next one. */
3019
3020                         while (r->start + r->size >= next->start) {
3021                                 /* Merge the next one into the current entry. */
3022                                 r->size = max(r->start + r->size,
3023                                     next->start + next->size) - r->start;
3024                                 n--;    /* One fewer entry in total. */
3025                                 if (--remaining == 0)
3026                                         goto done;      /* short circuit */
3027                                 next++;
3028                         }
3029                         if (next != r + 1) {
3030                                 /*
3031                                  * Some entries were merged into r and next
3032                                  * points to the first valid entry that couldn't
3033                                  * be merged.
3034                                  */
3035                                 MPASS(next->size > 0);  /* must be valid */
3036                                 memcpy(r + 1, next, remaining * sizeof(*r));
3037 #ifdef INVARIANTS
3038                                 /*
3039                                  * This so that the foo->size assertion in the
3040                                  * next iteration of the loop do the right
3041                                  * thing for entries that were pulled up and are
3042                                  * no longer valid.
3043                                  */
3044                                 MPASS(n < nitems(mem_ranges));
3045                                 bzero(&mem_ranges[n], (nitems(mem_ranges) - n) *
3046                                     sizeof(struct t4_range));
3047 #endif
3048                         }
3049                 }
3050 done:
3051                 /* Done merging the ranges. */
3052                 MPASS(n > 0);
3053                 r = &mem_ranges[0];
3054                 for (i = 0; i < n; i++, r++) {
3055                         if (addr >= r->start &&
3056                             addr + len <= r->start + r->size)
3057                                 return (0);
3058                 }
3059         }
3060
3061         return (EFAULT);
3062 }
3063
3064 static int
3065 fwmtype_to_hwmtype(int mtype)
3066 {
3067
3068         switch (mtype) {
3069         case FW_MEMTYPE_EDC0:
3070                 return (MEM_EDC0);
3071         case FW_MEMTYPE_EDC1:
3072                 return (MEM_EDC1);
3073         case FW_MEMTYPE_EXTMEM:
3074                 return (MEM_MC0);
3075         case FW_MEMTYPE_EXTMEM1:
3076                 return (MEM_MC1);
3077         default:
3078                 panic("%s: cannot translate fw mtype %d.", __func__, mtype);
3079         }
3080 }
3081
3082 /*
3083  * Verify that the memory range specified by the memtype/offset/len pair is
3084  * valid and lies entirely within the memtype specified.  The global address of
3085  * the start of the range is returned in addr.
3086  */
3087 static int
3088 validate_mt_off_len(struct adapter *sc, int mtype, uint32_t off, uint32_t len,
3089     uint32_t *addr)
3090 {
3091         uint32_t em, addr_len, maddr;
3092
3093         /* Memory can only be accessed in naturally aligned 4 byte units */
3094         if (off & 3 || len & 3 || len == 0)
3095                 return (EINVAL);
3096
3097         em = t4_read_reg(sc, A_MA_TARGET_MEM_ENABLE);
3098         switch (fwmtype_to_hwmtype(mtype)) {
3099         case MEM_EDC0:
3100                 if (!(em & F_EDRAM0_ENABLE))
3101                         return (EINVAL);
3102                 addr_len = t4_read_reg(sc, A_MA_EDRAM0_BAR);
3103                 maddr = G_EDRAM0_BASE(addr_len) << 20;
3104                 break;
3105         case MEM_EDC1:
3106                 if (!(em & F_EDRAM1_ENABLE))
3107                         return (EINVAL);
3108                 addr_len = t4_read_reg(sc, A_MA_EDRAM1_BAR);
3109                 maddr = G_EDRAM1_BASE(addr_len) << 20;
3110                 break;
3111         case MEM_MC:
3112                 if (!(em & F_EXT_MEM_ENABLE))
3113                         return (EINVAL);
3114                 addr_len = t4_read_reg(sc, A_MA_EXT_MEMORY_BAR);
3115                 maddr = G_EXT_MEM_BASE(addr_len) << 20;
3116                 break;
3117         case MEM_MC1:
3118                 if (!is_t5(sc) || !(em & F_EXT_MEM1_ENABLE))
3119                         return (EINVAL);
3120                 addr_len = t4_read_reg(sc, A_MA_EXT_MEMORY1_BAR);
3121                 maddr = G_EXT_MEM1_BASE(addr_len) << 20;
3122                 break;
3123         default:
3124                 return (EINVAL);
3125         }
3126
3127         *addr = maddr + off;    /* global address */
3128         return (validate_mem_range(sc, *addr, len));
3129 }
3130
3131 static int
3132 fixup_devlog_params(struct adapter *sc)
3133 {
3134         struct devlog_params *dparams = &sc->params.devlog;
3135         int rc;
3136
3137         rc = validate_mt_off_len(sc, dparams->memtype, dparams->start,
3138             dparams->size, &dparams->addr);
3139
3140         return (rc);
3141 }
3142
3143 static void
3144 update_nirq(struct intrs_and_queues *iaq, int nports)
3145 {
3146         int extra = T4_EXTRA_INTR;
3147
3148         iaq->nirq = extra;
3149         iaq->nirq += nports * (iaq->nrxq + iaq->nofldrxq);
3150         iaq->nirq += nports * (iaq->num_vis - 1) *
3151             max(iaq->nrxq_vi, iaq->nnmrxq_vi);
3152         iaq->nirq += nports * (iaq->num_vis - 1) * iaq->nofldrxq_vi;
3153 }
3154
3155 /*
3156  * Adjust requirements to fit the number of interrupts available.
3157  */
3158 static void
3159 calculate_iaq(struct adapter *sc, struct intrs_and_queues *iaq, int itype,
3160     int navail)
3161 {
3162         int old_nirq;
3163         const int nports = sc->params.nports;
3164
3165         MPASS(nports > 0);
3166         MPASS(navail > 0);
3167
3168         bzero(iaq, sizeof(*iaq));
3169         iaq->intr_type = itype;
3170         iaq->num_vis = t4_num_vis;
3171         iaq->ntxq = t4_ntxq;
3172         iaq->ntxq_vi = t4_ntxq_vi;
3173         iaq->nrxq = t4_nrxq;
3174         iaq->nrxq_vi = t4_nrxq_vi;
3175 #if defined(TCP_OFFLOAD) || defined(RATELIMIT)
3176         if (is_offload(sc) || is_ethoffload(sc)) {
3177                 iaq->nofldtxq = t4_nofldtxq;
3178                 iaq->nofldtxq_vi = t4_nofldtxq_vi;
3179         }
3180 #endif
3181 #ifdef TCP_OFFLOAD
3182         if (is_offload(sc)) {
3183                 iaq->nofldrxq = t4_nofldrxq;
3184                 iaq->nofldrxq_vi = t4_nofldrxq_vi;
3185         }
3186 #endif
3187 #ifdef DEV_NETMAP
3188         iaq->nnmtxq_vi = t4_nnmtxq_vi;
3189         iaq->nnmrxq_vi = t4_nnmrxq_vi;
3190 #endif
3191
3192         update_nirq(iaq, nports);
3193         if (iaq->nirq <= navail &&
3194             (itype != INTR_MSI || powerof2(iaq->nirq))) {
3195                 /*
3196                  * This is the normal case -- there are enough interrupts for
3197                  * everything.
3198                  */
3199                 goto done;
3200         }
3201
3202         /*
3203          * If extra VIs have been configured try reducing their count and see if
3204          * that works.
3205          */
3206         while (iaq->num_vis > 1) {
3207                 iaq->num_vis--;
3208                 update_nirq(iaq, nports);
3209                 if (iaq->nirq <= navail &&
3210                     (itype != INTR_MSI || powerof2(iaq->nirq))) {
3211                         device_printf(sc->dev, "virtual interfaces per port "
3212                             "reduced to %d from %d.  nrxq=%u, nofldrxq=%u, "
3213                             "nrxq_vi=%u nofldrxq_vi=%u, nnmrxq_vi=%u.  "
3214                             "itype %d, navail %u, nirq %d.\n",
3215                             iaq->num_vis, t4_num_vis, iaq->nrxq, iaq->nofldrxq,
3216                             iaq->nrxq_vi, iaq->nofldrxq_vi, iaq->nnmrxq_vi,
3217                             itype, navail, iaq->nirq);
3218                         goto done;
3219                 }
3220         }
3221
3222         /*
3223          * Extra VIs will not be created.  Log a message if they were requested.
3224          */
3225         MPASS(iaq->num_vis == 1);
3226         iaq->ntxq_vi = iaq->nrxq_vi = 0;
3227         iaq->nofldtxq_vi = iaq->nofldrxq_vi = 0;
3228         iaq->nnmtxq_vi = iaq->nnmrxq_vi = 0;
3229         if (iaq->num_vis != t4_num_vis) {
3230                 device_printf(sc->dev, "extra virtual interfaces disabled.  "
3231                     "nrxq=%u, nofldrxq=%u, nrxq_vi=%u nofldrxq_vi=%u, "
3232                     "nnmrxq_vi=%u.  itype %d, navail %u, nirq %d.\n",
3233                     iaq->nrxq, iaq->nofldrxq, iaq->nrxq_vi, iaq->nofldrxq_vi,
3234                     iaq->nnmrxq_vi, itype, navail, iaq->nirq);
3235         }
3236
3237         /*
3238          * Keep reducing the number of NIC rx queues to the next lower power of
3239          * 2 (for even RSS distribution) and halving the TOE rx queues and see
3240          * if that works.
3241          */
3242         do {
3243                 if (iaq->nrxq > 1) {
3244                         do {
3245                                 iaq->nrxq--;
3246                         } while (!powerof2(iaq->nrxq));
3247                 }
3248                 if (iaq->nofldrxq > 1)
3249                         iaq->nofldrxq >>= 1;
3250
3251                 old_nirq = iaq->nirq;
3252                 update_nirq(iaq, nports);
3253                 if (iaq->nirq <= navail &&
3254                     (itype != INTR_MSI || powerof2(iaq->nirq))) {
3255                         device_printf(sc->dev, "running with reduced number of "
3256                             "rx queues because of shortage of interrupts.  "
3257                             "nrxq=%u, nofldrxq=%u.  "
3258                             "itype %d, navail %u, nirq %d.\n", iaq->nrxq,
3259                             iaq->nofldrxq, itype, navail, iaq->nirq);
3260                         goto done;
3261                 }
3262         } while (old_nirq != iaq->nirq);
3263
3264         /* One interrupt for everything.  Ugh. */
3265         device_printf(sc->dev, "running with minimal number of queues.  "
3266             "itype %d, navail %u.\n", itype, navail);
3267         iaq->nirq = 1;
3268         MPASS(iaq->nrxq == 1);
3269         iaq->ntxq = 1;
3270         if (iaq->nofldrxq > 1)
3271                 iaq->nofldtxq = 1;
3272 done:
3273         MPASS(iaq->num_vis > 0);
3274         if (iaq->num_vis > 1) {
3275                 MPASS(iaq->nrxq_vi > 0);
3276                 MPASS(iaq->ntxq_vi > 0);
3277         }
3278         MPASS(iaq->nirq > 0);
3279         MPASS(iaq->nrxq > 0);
3280         MPASS(iaq->ntxq > 0);
3281         if (itype == INTR_MSI) {
3282                 MPASS(powerof2(iaq->nirq));
3283         }
3284 }
3285
3286 static int
3287 cfg_itype_and_nqueues(struct adapter *sc, struct intrs_and_queues *iaq)
3288 {
3289         int rc, itype, navail, nalloc;
3290
3291         for (itype = INTR_MSIX; itype; itype >>= 1) {
3292
3293                 if ((itype & t4_intr_types) == 0)
3294                         continue;       /* not allowed */
3295
3296                 if (itype == INTR_MSIX)
3297                         navail = pci_msix_count(sc->dev);
3298                 else if (itype == INTR_MSI)
3299                         navail = pci_msi_count(sc->dev);
3300                 else
3301                         navail = 1;
3302 restart:
3303                 if (navail == 0)
3304                         continue;
3305
3306                 calculate_iaq(sc, iaq, itype, navail);
3307                 nalloc = iaq->nirq;
3308                 rc = 0;
3309                 if (itype == INTR_MSIX)
3310                         rc = pci_alloc_msix(sc->dev, &nalloc);
3311                 else if (itype == INTR_MSI)
3312                         rc = pci_alloc_msi(sc->dev, &nalloc);
3313
3314                 if (rc == 0 && nalloc > 0) {
3315                         if (nalloc == iaq->nirq)
3316                                 return (0);
3317
3318                         /*
3319                          * Didn't get the number requested.  Use whatever number
3320                          * the kernel is willing to allocate.
3321                          */
3322                         device_printf(sc->dev, "fewer vectors than requested, "
3323                             "type=%d, req=%d, rcvd=%d; will downshift req.\n",
3324                             itype, iaq->nirq, nalloc);
3325                         pci_release_msi(sc->dev);
3326                         navail = nalloc;
3327                         goto restart;
3328                 }
3329
3330                 device_printf(sc->dev,
3331                     "failed to allocate vectors:%d, type=%d, req=%d, rcvd=%d\n",
3332                     itype, rc, iaq->nirq, nalloc);
3333         }
3334
3335         device_printf(sc->dev,
3336             "failed to find a usable interrupt type.  "
3337             "allowed=%d, msi-x=%d, msi=%d, intx=1", t4_intr_types,
3338             pci_msix_count(sc->dev), pci_msi_count(sc->dev));
3339
3340         return (ENXIO);
3341 }
3342
3343 #define FW_VERSION(chip) ( \
3344     V_FW_HDR_FW_VER_MAJOR(chip##FW_VERSION_MAJOR) | \
3345     V_FW_HDR_FW_VER_MINOR(chip##FW_VERSION_MINOR) | \
3346     V_FW_HDR_FW_VER_MICRO(chip##FW_VERSION_MICRO) | \
3347     V_FW_HDR_FW_VER_BUILD(chip##FW_VERSION_BUILD))
3348 #define FW_INTFVER(chip, intf) (chip##FW_HDR_INTFVER_##intf)
3349
3350 /* Just enough of fw_hdr to cover all version info. */
3351 struct fw_h {
3352         __u8    ver;
3353         __u8    chip;
3354         __be16  len512;
3355         __be32  fw_ver;
3356         __be32  tp_microcode_ver;
3357         __u8    intfver_nic;
3358         __u8    intfver_vnic;
3359         __u8    intfver_ofld;
3360         __u8    intfver_ri;
3361         __u8    intfver_iscsipdu;
3362         __u8    intfver_iscsi;
3363         __u8    intfver_fcoepdu;
3364         __u8    intfver_fcoe;
3365 };
3366 /* Spot check a couple of fields. */
3367 CTASSERT(offsetof(struct fw_h, fw_ver) == offsetof(struct fw_hdr, fw_ver));
3368 CTASSERT(offsetof(struct fw_h, intfver_nic) == offsetof(struct fw_hdr, intfver_nic));
3369 CTASSERT(offsetof(struct fw_h, intfver_fcoe) == offsetof(struct fw_hdr, intfver_fcoe));
3370
3371 struct fw_info {
3372         uint8_t chip;
3373         char *kld_name;
3374         char *fw_mod_name;
3375         struct fw_h fw_h;
3376 } fw_info[] = {
3377         {
3378                 .chip = CHELSIO_T4,
3379                 .kld_name = "t4fw_cfg",
3380                 .fw_mod_name = "t4fw",
3381                 .fw_h = {
3382                         .chip = FW_HDR_CHIP_T4,
3383                         .fw_ver = htobe32(FW_VERSION(T4)),
3384                         .intfver_nic = FW_INTFVER(T4, NIC),
3385                         .intfver_vnic = FW_INTFVER(T4, VNIC),
3386                         .intfver_ofld = FW_INTFVER(T4, OFLD),
3387                         .intfver_ri = FW_INTFVER(T4, RI),
3388                         .intfver_iscsipdu = FW_INTFVER(T4, ISCSIPDU),
3389                         .intfver_iscsi = FW_INTFVER(T4, ISCSI),
3390                         .intfver_fcoepdu = FW_INTFVER(T4, FCOEPDU),
3391                         .intfver_fcoe = FW_INTFVER(T4, FCOE),
3392                 },
3393         }, {
3394                 .chip = CHELSIO_T5,
3395                 .kld_name = "t5fw_cfg",
3396                 .fw_mod_name = "t5fw",
3397                 .fw_h = {
3398                         .chip = FW_HDR_CHIP_T5,
3399                         .fw_ver = htobe32(FW_VERSION(T5)),
3400                         .intfver_nic = FW_INTFVER(T5, NIC),
3401                         .intfver_vnic = FW_INTFVER(T5, VNIC),
3402                         .intfver_ofld = FW_INTFVER(T5, OFLD),
3403                         .intfver_ri = FW_INTFVER(T5, RI),
3404                         .intfver_iscsipdu = FW_INTFVER(T5, ISCSIPDU),
3405                         .intfver_iscsi = FW_INTFVER(T5, ISCSI),
3406                         .intfver_fcoepdu = FW_INTFVER(T5, FCOEPDU),
3407                         .intfver_fcoe = FW_INTFVER(T5, FCOE),
3408                 },
3409         }, {
3410                 .chip = CHELSIO_T6,
3411                 .kld_name = "t6fw_cfg",
3412                 .fw_mod_name = "t6fw",
3413                 .fw_h = {
3414                         .chip = FW_HDR_CHIP_T6,
3415                         .fw_ver = htobe32(FW_VERSION(T6)),
3416                         .intfver_nic = FW_INTFVER(T6, NIC),
3417                         .intfver_vnic = FW_INTFVER(T6, VNIC),
3418                         .intfver_ofld = FW_INTFVER(T6, OFLD),
3419                         .intfver_ri = FW_INTFVER(T6, RI),
3420                         .intfver_iscsipdu = FW_INTFVER(T6, ISCSIPDU),
3421                         .intfver_iscsi = FW_INTFVER(T6, ISCSI),
3422                         .intfver_fcoepdu = FW_INTFVER(T6, FCOEPDU),
3423                         .intfver_fcoe = FW_INTFVER(T6, FCOE),
3424                 },
3425         }
3426 };
3427
3428 static struct fw_info *
3429 find_fw_info(int chip)
3430 {
3431         int i;
3432
3433         for (i = 0; i < nitems(fw_info); i++) {
3434                 if (fw_info[i].chip == chip)
3435                         return (&fw_info[i]);
3436         }
3437         return (NULL);
3438 }
3439
3440 /*
3441  * Is the given firmware API compatible with the one the driver was compiled
3442  * with?
3443  */
3444 static int
3445 fw_compatible(const struct fw_h *hdr1, const struct fw_h *hdr2)
3446 {
3447
3448         /* short circuit if it's the exact same firmware version */
3449         if (hdr1->chip == hdr2->chip && hdr1->fw_ver == hdr2->fw_ver)
3450                 return (1);
3451
3452         /*
3453          * XXX: Is this too conservative?  Perhaps I should limit this to the
3454          * features that are supported in the driver.
3455          */
3456 #define SAME_INTF(x) (hdr1->intfver_##x == hdr2->intfver_##x)
3457         if (hdr1->chip == hdr2->chip && SAME_INTF(nic) && SAME_INTF(vnic) &&
3458             SAME_INTF(ofld) && SAME_INTF(ri) && SAME_INTF(iscsipdu) &&
3459             SAME_INTF(iscsi) && SAME_INTF(fcoepdu) && SAME_INTF(fcoe))
3460                 return (1);
3461 #undef SAME_INTF
3462
3463         return (0);
3464 }
3465
3466 static int
3467 load_fw_module(struct adapter *sc, const struct firmware **dcfg,
3468     const struct firmware **fw)
3469 {
3470         struct fw_info *fw_info;
3471
3472         *dcfg = NULL;
3473         if (fw != NULL)
3474                 *fw = NULL;
3475
3476         fw_info = find_fw_info(chip_id(sc));
3477         if (fw_info == NULL) {
3478                 device_printf(sc->dev,
3479                     "unable to look up firmware information for chip %d.\n",
3480                     chip_id(sc));
3481                 return (EINVAL);
3482         }
3483
3484         *dcfg = firmware_get(fw_info->kld_name);
3485         if (*dcfg != NULL) {
3486                 if (fw != NULL)
3487                         *fw = firmware_get(fw_info->fw_mod_name);
3488                 return (0);
3489         }
3490
3491         return (ENOENT);
3492 }
3493
3494 static void
3495 unload_fw_module(struct adapter *sc, const struct firmware *dcfg,
3496     const struct firmware *fw)
3497 {
3498
3499         if (fw != NULL)
3500                 firmware_put(fw, FIRMWARE_UNLOAD);
3501         if (dcfg != NULL)
3502                 firmware_put(dcfg, FIRMWARE_UNLOAD);
3503 }
3504
3505 /*
3506  * Return values:
3507  * 0 means no firmware install attempted.
3508  * ERESTART means a firmware install was attempted and was successful.
3509  * +ve errno means a firmware install was attempted but failed.
3510  */
3511 static int
3512 install_kld_firmware(struct adapter *sc, struct fw_h *card_fw,
3513     const struct fw_h *drv_fw, const char *reason, int *already)
3514 {
3515         const struct firmware *cfg, *fw;
3516         const uint32_t c = be32toh(card_fw->fw_ver);
3517         uint32_t d, k;
3518         int rc, fw_install;
3519         struct fw_h bundled_fw;
3520         bool load_attempted;
3521
3522         cfg = fw = NULL;
3523         load_attempted = false;
3524         fw_install = t4_fw_install < 0 ? -t4_fw_install : t4_fw_install;
3525
3526         if (reason != NULL)
3527                 goto install;
3528
3529         if ((sc->flags & FW_OK) == 0) {
3530
3531                 if (c == 0xffffffff) {
3532                         reason = "missing";
3533                         goto install;
3534                 }
3535
3536                 return (0);
3537         }
3538
3539         memcpy(&bundled_fw, drv_fw, sizeof(bundled_fw));
3540         if (t4_fw_install < 0) {
3541                 rc = load_fw_module(sc, &cfg, &fw);
3542                 if (rc != 0 || fw == NULL) {
3543                         device_printf(sc->dev,
3544                             "failed to load firmware module: %d. cfg %p, fw %p;"
3545                             " will use compiled-in firmware version for"
3546                             "hw.cxgbe.fw_install checks.\n",
3547                             rc, cfg, fw);
3548                 } else {
3549                         memcpy(&bundled_fw, fw->data, sizeof(bundled_fw));
3550                 }
3551                 load_attempted = true;
3552         }
3553         d = be32toh(bundled_fw.fw_ver);
3554
3555         if (!fw_compatible(card_fw, &bundled_fw)) {
3556                 reason = "incompatible or unusable";
3557                 goto install;
3558         }
3559
3560         if (d > c) {
3561                 reason = "older than the version bundled with this driver";
3562                 goto install;
3563         }
3564
3565         if (fw_install == 2 && d != c) {
3566                 reason = "different than the version bundled with this driver";
3567                 goto install;
3568         }
3569
3570         /* No reason to do anything to the firmware already on the card. */
3571         rc = 0;
3572         goto done;
3573
3574 install:
3575         rc = 0;
3576         if ((*already)++)
3577                 goto done;
3578
3579         if (fw_install == 0) {
3580                 device_printf(sc->dev, "firmware on card (%u.%u.%u.%u) is %s, "
3581                     "but the driver is prohibited from installing a firmware "
3582                     "on the card.\n",
3583                     G_FW_HDR_FW_VER_MAJOR(c), G_FW_HDR_FW_VER_MINOR(c),
3584                     G_FW_HDR_FW_VER_MICRO(c), G_FW_HDR_FW_VER_BUILD(c), reason);
3585
3586                 goto done;
3587         }
3588
3589         /*
3590          * We'll attempt to install a firmware.  Load the module first (if it
3591          * hasn't been loaded already).
3592          */
3593         if (!load_attempted) {
3594                 rc = load_fw_module(sc, &cfg, &fw);
3595                 if (rc != 0 || fw == NULL) {
3596                         device_printf(sc->dev,
3597                             "failed to load firmware module: %d. cfg %p, fw %p\n",
3598                             rc, cfg, fw);
3599                         /* carry on */
3600                 }
3601         }
3602         if (fw == NULL) {
3603                 device_printf(sc->dev, "firmware on card (%u.%u.%u.%u) is %s, "
3604                     "but the driver cannot take corrective action because it "
3605                     "is unable to load the firmware module.\n",
3606                     G_FW_HDR_FW_VER_MAJOR(c), G_FW_HDR_FW_VER_MINOR(c),
3607                     G_FW_HDR_FW_VER_MICRO(c), G_FW_HDR_FW_VER_BUILD(c), reason);
3608                 rc = sc->flags & FW_OK ? 0 : ENOENT;
3609                 goto done;
3610         }
3611         k = be32toh(((const struct fw_hdr *)fw->data)->fw_ver);
3612         if (k != d) {
3613                 MPASS(t4_fw_install > 0);
3614                 device_printf(sc->dev,
3615                     "firmware in KLD (%u.%u.%u.%u) is not what the driver was "
3616                     "expecting (%u.%u.%u.%u) and will not be used.\n",
3617                     G_FW_HDR_FW_VER_MAJOR(k), G_FW_HDR_FW_VER_MINOR(k),
3618                     G_FW_HDR_FW_VER_MICRO(k), G_FW_HDR_FW_VER_BUILD(k),
3619                     G_FW_HDR_FW_VER_MAJOR(d), G_FW_HDR_FW_VER_MINOR(d),
3620                     G_FW_HDR_FW_VER_MICRO(d), G_FW_HDR_FW_VER_BUILD(d));
3621                 rc = sc->flags & FW_OK ? 0 : EINVAL;
3622                 goto done;
3623         }
3624
3625         device_printf(sc->dev, "firmware on card (%u.%u.%u.%u) is %s, "
3626             "installing firmware %u.%u.%u.%u on card.\n",
3627             G_FW_HDR_FW_VER_MAJOR(c), G_FW_HDR_FW_VER_MINOR(c),
3628             G_FW_HDR_FW_VER_MICRO(c), G_FW_HDR_FW_VER_BUILD(c), reason,
3629             G_FW_HDR_FW_VER_MAJOR(d), G_FW_HDR_FW_VER_MINOR(d),
3630             G_FW_HDR_FW_VER_MICRO(d), G_FW_HDR_FW_VER_BUILD(d));
3631
3632         rc = -t4_fw_upgrade(sc, sc->mbox, fw->data, fw->datasize, 0);
3633         if (rc != 0) {
3634                 device_printf(sc->dev, "failed to install firmware: %d\n", rc);
3635         } else {
3636                 /* Installed successfully, update the cached header too. */
3637                 rc = ERESTART;
3638                 memcpy(card_fw, fw->data, sizeof(*card_fw));
3639         }
3640 done:
3641         unload_fw_module(sc, cfg, fw);
3642
3643         return (rc);
3644 }
3645
3646 /*
3647  * Establish contact with the firmware and attempt to become the master driver.
3648  *
3649  * A firmware will be installed to the card if needed (if the driver is allowed
3650  * to do so).
3651  */
3652 static int
3653 contact_firmware(struct adapter *sc)
3654 {
3655         int rc, already = 0;
3656         enum dev_state state;
3657         struct fw_info *fw_info;
3658         struct fw_hdr *card_fw;         /* fw on the card */
3659         const struct fw_h *drv_fw;
3660
3661         fw_info = find_fw_info(chip_id(sc));
3662         if (fw_info == NULL) {
3663                 device_printf(sc->dev,
3664                     "unable to look up firmware information for chip %d.\n",
3665                     chip_id(sc));
3666                 return (EINVAL);
3667         }
3668         drv_fw = &fw_info->fw_h;
3669
3670         /* Read the header of the firmware on the card */
3671         card_fw = malloc(sizeof(*card_fw), M_CXGBE, M_ZERO | M_WAITOK);
3672 restart:
3673         rc = -t4_get_fw_hdr(sc, card_fw);
3674         if (rc != 0) {
3675                 device_printf(sc->dev,
3676                     "unable to read firmware header from card's flash: %d\n",
3677                     rc);
3678                 goto done;
3679         }
3680
3681         rc = install_kld_firmware(sc, (struct fw_h *)card_fw, drv_fw, NULL,
3682             &already);
3683         if (rc == ERESTART)
3684                 goto restart;
3685         if (rc != 0)
3686                 goto done;
3687
3688         rc = t4_fw_hello(sc, sc->mbox, sc->mbox, MASTER_MAY, &state);
3689         if (rc < 0 || state == DEV_STATE_ERR) {
3690                 rc = -rc;
3691                 device_printf(sc->dev,
3692                     "failed to connect to the firmware: %d, %d.  "
3693                     "PCIE_FW 0x%08x\n", rc, state, t4_read_reg(sc, A_PCIE_FW));
3694 #if 0
3695                 if (install_kld_firmware(sc, (struct fw_h *)card_fw, drv_fw,
3696                     "not responding properly to HELLO", &already) == ERESTART)
3697                         goto restart;
3698 #endif
3699                 goto done;
3700         }
3701         MPASS(be32toh(card_fw->flags) & FW_HDR_FLAGS_RESET_HALT);
3702         sc->flags |= FW_OK;     /* The firmware responded to the FW_HELLO. */
3703
3704         if (rc == sc->pf) {
3705                 sc->flags |= MASTER_PF;
3706                 rc = install_kld_firmware(sc, (struct fw_h *)card_fw, drv_fw,
3707                     NULL, &already);
3708                 if (rc == ERESTART)
3709                         rc = 0;
3710                 else if (rc != 0)
3711                         goto done;
3712         } else if (state == DEV_STATE_UNINIT) {
3713                 /*
3714                  * We didn't get to be the master so we definitely won't be
3715                  * configuring the chip.  It's a bug if someone else hasn't
3716                  * configured it already.
3717                  */
3718                 device_printf(sc->dev, "couldn't be master(%d), "
3719                     "device not already initialized either(%d).  "
3720                     "PCIE_FW 0x%08x\n", rc, state, t4_read_reg(sc, A_PCIE_FW));
3721                 rc = EPROTO;
3722                 goto done;
3723         } else {
3724                 /*
3725                  * Some other PF is the master and has configured the chip.
3726                  * This is allowed but untested.
3727                  */
3728                 device_printf(sc->dev, "PF%d is master, device state %d.  "
3729                     "PCIE_FW 0x%08x\n", rc, state, t4_read_reg(sc, A_PCIE_FW));
3730                 snprintf(sc->cfg_file, sizeof(sc->cfg_file), "pf%d", rc);
3731                 sc->cfcsum = 0;
3732                 rc = 0;
3733         }
3734 done:
3735         if (rc != 0 && sc->flags & FW_OK) {
3736                 t4_fw_bye(sc, sc->mbox);
3737                 sc->flags &= ~FW_OK;
3738         }
3739         free(card_fw, M_CXGBE);
3740         return (rc);
3741 }
3742
3743 static int
3744 copy_cfg_file_to_card(struct adapter *sc, char *cfg_file,
3745     uint32_t mtype, uint32_t moff)
3746 {
3747         struct fw_info *fw_info;
3748         const struct firmware *dcfg, *rcfg = NULL;
3749         const uint32_t *cfdata;
3750         uint32_t cflen, addr;
3751         int rc;
3752
3753         load_fw_module(sc, &dcfg, NULL);
3754
3755         /* Card specific interpretation of "default". */
3756         if (strncmp(cfg_file, DEFAULT_CF, sizeof(t4_cfg_file)) == 0) {
3757                 if (pci_get_device(sc->dev) == 0x440a)
3758                         snprintf(cfg_file, sizeof(t4_cfg_file), UWIRE_CF);
3759                 if (is_fpga(sc))
3760                         snprintf(cfg_file, sizeof(t4_cfg_file), FPGA_CF);
3761         }
3762
3763         if (strncmp(cfg_file, DEFAULT_CF, sizeof(t4_cfg_file)) == 0) {
3764                 if (dcfg == NULL) {
3765                         device_printf(sc->dev,
3766                             "KLD with default config is not available.\n");
3767                         rc = ENOENT;
3768                         goto done;
3769                 }
3770                 cfdata = dcfg->data;
3771                 cflen = dcfg->datasize & ~3;
3772         } else {
3773                 char s[32];
3774
3775                 fw_info = find_fw_info(chip_id(sc));
3776                 if (fw_info == NULL) {
3777                         device_printf(sc->dev,
3778                             "unable to look up firmware information for chip %d.\n",
3779                             chip_id(sc));
3780                         rc = EINVAL;
3781                         goto done;
3782                 }
3783                 snprintf(s, sizeof(s), "%s_%s", fw_info->kld_name, cfg_file);
3784
3785                 rcfg = firmware_get(s);
3786                 if (rcfg == NULL) {
3787                         device_printf(sc->dev,
3788                             "unable to load module \"%s\" for configuration "
3789                             "profile \"%s\".\n", s, cfg_file);
3790                         rc = ENOENT;
3791                         goto done;
3792                 }
3793                 cfdata = rcfg->data;
3794                 cflen = rcfg->datasize & ~3;
3795         }
3796
3797         if (cflen > FLASH_CFG_MAX_SIZE) {
3798                 device_printf(sc->dev,
3799                     "config file too long (%d, max allowed is %d).\n",
3800                     cflen, FLASH_CFG_MAX_SIZE);
3801                 rc = EINVAL;
3802                 goto done;
3803         }
3804
3805         rc = validate_mt_off_len(sc, mtype, moff, cflen, &addr);
3806         if (rc != 0) {
3807                 device_printf(sc->dev,
3808                     "%s: addr (%d/0x%x) or len %d is not valid: %d.\n",
3809                     __func__, mtype, moff, cflen, rc);
3810                 rc = EINVAL;
3811                 goto done;
3812         }
3813         write_via_memwin(sc, 2, addr, cfdata, cflen);
3814 done:
3815         if (rcfg != NULL)
3816                 firmware_put(rcfg, FIRMWARE_UNLOAD);
3817         unload_fw_module(sc, dcfg, NULL);
3818         return (rc);
3819 }
3820
3821 struct caps_allowed {
3822         uint16_t nbmcaps;
3823         uint16_t linkcaps;
3824         uint16_t switchcaps;
3825         uint16_t niccaps;
3826         uint16_t toecaps;
3827         uint16_t rdmacaps;
3828         uint16_t cryptocaps;
3829         uint16_t iscsicaps;
3830         uint16_t fcoecaps;
3831 };
3832
3833 #define FW_PARAM_DEV(param) \
3834         (V_FW_PARAMS_MNEM(FW_PARAMS_MNEM_DEV) | \
3835          V_FW_PARAMS_PARAM_X(FW_PARAMS_PARAM_DEV_##param))
3836 #define FW_PARAM_PFVF(param) \
3837         (V_FW_PARAMS_MNEM(FW_PARAMS_MNEM_PFVF) | \
3838          V_FW_PARAMS_PARAM_X(FW_PARAMS_PARAM_PFVF_##param))
3839
3840 /*
3841  * Provide a configuration profile to the firmware and have it initialize the
3842  * chip accordingly.  This may involve uploading a configuration file to the
3843  * card.
3844  */
3845 static int
3846 apply_cfg_and_initialize(struct adapter *sc, char *cfg_file,
3847     const struct caps_allowed *caps_allowed)
3848 {
3849         int rc;
3850         struct fw_caps_config_cmd caps;
3851         uint32_t mtype, moff, finicsum, cfcsum, param, val;
3852
3853         rc = -t4_fw_reset(sc, sc->mbox, F_PIORSTMODE | F_PIORST);
3854         if (rc != 0) {
3855                 device_printf(sc->dev, "firmware reset failed: %d.\n", rc);
3856                 return (rc);
3857         }
3858
3859         bzero(&caps, sizeof(caps));
3860         caps.op_to_write = htobe32(V_FW_CMD_OP(FW_CAPS_CONFIG_CMD) |
3861             F_FW_CMD_REQUEST | F_FW_CMD_READ);
3862         if (strncmp(cfg_file, BUILTIN_CF, sizeof(t4_cfg_file)) == 0) {
3863                 mtype = 0;
3864                 moff = 0;
3865                 caps.cfvalid_to_len16 = htobe32(FW_LEN16(caps));
3866         } else if (strncmp(cfg_file, FLASH_CF, sizeof(t4_cfg_file)) == 0) {
3867                 mtype = FW_MEMTYPE_FLASH;
3868                 moff = t4_flash_cfg_addr(sc);
3869                 caps.cfvalid_to_len16 = htobe32(F_FW_CAPS_CONFIG_CMD_CFVALID |
3870                     V_FW_CAPS_CONFIG_CMD_MEMTYPE_CF(mtype) |
3871                     V_FW_CAPS_CONFIG_CMD_MEMADDR64K_CF(moff >> 16) |
3872                     FW_LEN16(caps));
3873         } else {
3874                 /*
3875                  * Ask the firmware where it wants us to upload the config file.
3876                  */
3877                 param = FW_PARAM_DEV(CF);
3878                 rc = -t4_query_params(sc, sc->mbox, sc->pf, 0, 1, &param, &val);
3879                 if (rc != 0) {
3880                         /* No support for config file?  Shouldn't happen. */
3881                         device_printf(sc->dev,
3882                             "failed to query config file location: %d.\n", rc);
3883                         goto done;
3884                 }
3885                 mtype = G_FW_PARAMS_PARAM_Y(val);
3886                 moff = G_FW_PARAMS_PARAM_Z(val) << 16;
3887                 caps.cfvalid_to_len16 = htobe32(F_FW_CAPS_CONFIG_CMD_CFVALID |
3888                     V_FW_CAPS_CONFIG_CMD_MEMTYPE_CF(mtype) |
3889                     V_FW_CAPS_CONFIG_CMD_MEMADDR64K_CF(moff >> 16) |
3890                     FW_LEN16(caps));
3891
3892                 rc = copy_cfg_file_to_card(sc, cfg_file, mtype, moff);
3893                 if (rc != 0) {
3894                         device_printf(sc->dev,
3895                             "failed to upload config file to card: %d.\n", rc);
3896                         goto done;
3897                 }
3898         }
3899         rc = -t4_wr_mbox(sc, sc->mbox, &caps, sizeof(caps), &caps);
3900         if (rc != 0) {
3901                 device_printf(sc->dev, "failed to pre-process config file: %d "
3902                     "(mtype %d, moff 0x%x).\n", rc, mtype, moff);
3903                 goto done;
3904         }
3905
3906         finicsum = be32toh(caps.finicsum);
3907         cfcsum = be32toh(caps.cfcsum);  /* actual */
3908         if (finicsum != cfcsum) {
3909                 device_printf(sc->dev,
3910                     "WARNING: config file checksum mismatch: %08x %08x\n",
3911                     finicsum, cfcsum);
3912         }
3913         sc->cfcsum = cfcsum;
3914         snprintf(sc->cfg_file, sizeof(sc->cfg_file), "%s", cfg_file);
3915
3916         /*
3917          * Let the firmware know what features will (not) be used so it can tune
3918          * things accordingly.
3919          */
3920 #define LIMIT_CAPS(x) do { \
3921         caps.x##caps &= htobe16(caps_allowed->x##caps); \
3922 } while (0)
3923         LIMIT_CAPS(nbm);
3924         LIMIT_CAPS(link);
3925         LIMIT_CAPS(switch);
3926         LIMIT_CAPS(nic);
3927         LIMIT_CAPS(toe);
3928         LIMIT_CAPS(rdma);
3929         LIMIT_CAPS(crypto);
3930         LIMIT_CAPS(iscsi);
3931         LIMIT_CAPS(fcoe);
3932 #undef LIMIT_CAPS
3933         if (caps.niccaps & htobe16(FW_CAPS_CONFIG_NIC_HASHFILTER)) {
3934                 /*
3935                  * TOE and hashfilters are mutually exclusive.  It is a config
3936                  * file or firmware bug if both are reported as available.  Try
3937                  * to cope with the situation in non-debug builds by disabling
3938                  * TOE.
3939                  */
3940                 MPASS(caps.toecaps == 0);
3941
3942                 caps.toecaps = 0;
3943                 caps.rdmacaps = 0;
3944                 caps.iscsicaps = 0;
3945         }
3946
3947         caps.op_to_write = htobe32(V_FW_CMD_OP(FW_CAPS_CONFIG_CMD) |
3948             F_FW_CMD_REQUEST | F_FW_CMD_WRITE);
3949         caps.cfvalid_to_len16 = htobe32(FW_LEN16(caps));
3950         rc = -t4_wr_mbox(sc, sc->mbox, &caps, sizeof(caps), NULL);
3951         if (rc != 0) {
3952                 device_printf(sc->dev,
3953                     "failed to process config file: %d.\n", rc);
3954                 goto done;
3955         }
3956
3957         t4_tweak_chip_settings(sc);
3958
3959         /* get basic stuff going */
3960         rc = -t4_fw_initialize(sc, sc->mbox);
3961         if (rc != 0) {
3962                 device_printf(sc->dev, "fw_initialize failed: %d.\n", rc);
3963                 goto done;
3964         }
3965 done:
3966         return (rc);
3967 }
3968
3969 /*
3970  * Partition chip resources for use between various PFs, VFs, etc.
3971  */
3972 static int
3973 partition_resources(struct adapter *sc)
3974 {
3975         char cfg_file[sizeof(t4_cfg_file)];
3976         struct caps_allowed caps_allowed;
3977         int rc;
3978         bool fallback;
3979
3980         /* Only the master driver gets to configure the chip resources. */
3981         MPASS(sc->flags & MASTER_PF);
3982
3983 #define COPY_CAPS(x) do { \
3984         caps_allowed.x##caps = t4_##x##caps_allowed; \
3985 } while (0)
3986         bzero(&caps_allowed, sizeof(caps_allowed));
3987         COPY_CAPS(nbm);
3988         COPY_CAPS(link);
3989         COPY_CAPS(switch);
3990         COPY_CAPS(nic);
3991         COPY_CAPS(toe);
3992         COPY_CAPS(rdma);
3993         COPY_CAPS(crypto);
3994         COPY_CAPS(iscsi);
3995         COPY_CAPS(fcoe);
3996         fallback = sc->debug_flags & DF_DISABLE_CFG_RETRY ? false : true;
3997         snprintf(cfg_file, sizeof(cfg_file), "%s", t4_cfg_file);
3998 retry:
3999         rc = apply_cfg_and_initialize(sc, cfg_file, &caps_allowed);
4000         if (rc != 0 && fallback) {
4001                 device_printf(sc->dev,
4002                     "failed (%d) to configure card with \"%s\" profile, "
4003                     "will fall back to a basic configuration and retry.\n",
4004                     rc, cfg_file);
4005                 snprintf(cfg_file, sizeof(cfg_file), "%s", BUILTIN_CF);
4006                 bzero(&caps_allowed, sizeof(caps_allowed));
4007                 COPY_CAPS(nbm);
4008                 COPY_CAPS(link);
4009                 COPY_CAPS(switch);
4010                 COPY_CAPS(nic);
4011                 fallback = false;
4012                 goto retry;
4013         }
4014 #undef COPY_CAPS
4015         return (rc);
4016 }
4017
4018 /*
4019  * Retrieve parameters that are needed (or nice to have) very early.
4020  */
4021 static int
4022 get_params__pre_init(struct adapter *sc)
4023 {
4024         int rc;
4025         uint32_t param[2], val[2];
4026
4027         t4_get_version_info(sc);
4028
4029         snprintf(sc->fw_version, sizeof(sc->fw_version), "%u.%u.%u.%u",
4030             G_FW_HDR_FW_VER_MAJOR(sc->params.fw_vers),
4031             G_FW_HDR_FW_VER_MINOR(sc->params.fw_vers),
4032             G_FW_HDR_FW_VER_MICRO(sc->params.fw_vers),
4033             G_FW_HDR_FW_VER_BUILD(sc->params.fw_vers));
4034
4035         snprintf(sc->bs_version, sizeof(sc->bs_version), "%u.%u.%u.%u",
4036             G_FW_HDR_FW_VER_MAJOR(sc->params.bs_vers),
4037             G_FW_HDR_FW_VER_MINOR(sc->params.bs_vers),
4038             G_FW_HDR_FW_VER_MICRO(sc->params.bs_vers),
4039             G_FW_HDR_FW_VER_BUILD(sc->params.bs_vers));
4040
4041         snprintf(sc->tp_version, sizeof(sc->tp_version), "%u.%u.%u.%u",
4042             G_FW_HDR_FW_VER_MAJOR(sc->params.tp_vers),
4043             G_FW_HDR_FW_VER_MINOR(sc->params.tp_vers),
4044             G_FW_HDR_FW_VER_MICRO(sc->params.tp_vers),
4045             G_FW_HDR_FW_VER_BUILD(sc->params.tp_vers));
4046
4047         snprintf(sc->er_version, sizeof(sc->er_version), "%u.%u.%u.%u",
4048             G_FW_HDR_FW_VER_MAJOR(sc->params.er_vers),
4049             G_FW_HDR_FW_VER_MINOR(sc->params.er_vers),
4050             G_FW_HDR_FW_VER_MICRO(sc->params.er_vers),
4051             G_FW_HDR_FW_VER_BUILD(sc->params.er_vers));
4052
4053         param[0] = FW_PARAM_DEV(PORTVEC);
4054         param[1] = FW_PARAM_DEV(CCLK);
4055         rc = -t4_query_params(sc, sc->mbox, sc->pf, 0, 2, param, val);
4056         if (rc != 0) {
4057                 device_printf(sc->dev,
4058                     "failed to query parameters (pre_init): %d.\n", rc);
4059                 return (rc);
4060         }
4061
4062         sc->params.portvec = val[0];
4063         sc->params.nports = bitcount32(val[0]);
4064         sc->params.vpd.cclk = val[1];
4065
4066         /* Read device log parameters. */
4067         rc = -t4_init_devlog_params(sc, 1);
4068         if (rc == 0)
4069                 fixup_devlog_params(sc);
4070         else {
4071                 device_printf(sc->dev,
4072                     "failed to get devlog parameters: %d.\n", rc);
4073                 rc = 0; /* devlog isn't critical for device operation */
4074         }
4075
4076         return (rc);
4077 }
4078
4079 /*
4080  * Retrieve various parameters that are of interest to the driver.  The device
4081  * has been initialized by the firmware at this point.
4082  */
4083 static int
4084 get_params__post_init(struct adapter *sc)
4085 {
4086         int rc;
4087         uint32_t param[7], val[7];
4088         struct fw_caps_config_cmd caps;
4089
4090         param[0] = FW_PARAM_PFVF(IQFLINT_START);
4091         param[1] = FW_PARAM_PFVF(EQ_START);
4092         param[2] = FW_PARAM_PFVF(FILTER_START);
4093         param[3] = FW_PARAM_PFVF(FILTER_END);
4094         param[4] = FW_PARAM_PFVF(L2T_START);
4095         param[5] = FW_PARAM_PFVF(L2T_END);
4096         param[6] = V_FW_PARAMS_MNEM(FW_PARAMS_MNEM_DEV) |
4097             V_FW_PARAMS_PARAM_X(FW_PARAMS_PARAM_DEV_DIAG) |
4098             V_FW_PARAMS_PARAM_Y(FW_PARAM_DEV_DIAG_VDD);
4099         rc = -t4_query_params(sc, sc->mbox, sc->pf, 0, 7, param, val);
4100         if (rc != 0) {
4101                 device_printf(sc->dev,
4102                     "failed to query parameters (post_init): %d.\n", rc);
4103                 return (rc);
4104         }
4105
4106         sc->sge.iq_start = val[0];
4107         sc->sge.eq_start = val[1];
4108         if ((int)val[3] > (int)val[2]) {
4109                 sc->tids.ftid_base = val[2];
4110                 sc->tids.ftid_end = val[3];
4111                 sc->tids.nftids = val[3] - val[2] + 1;
4112         }
4113         sc->vres.l2t.start = val[4];
4114         sc->vres.l2t.size = val[5] - val[4] + 1;
4115         KASSERT(sc->vres.l2t.size <= L2T_SIZE,
4116             ("%s: L2 table size (%u) larger than expected (%u)",
4117             __func__, sc->vres.l2t.size, L2T_SIZE));
4118         sc->params.core_vdd = val[6];
4119
4120         if (chip_id(sc) >= CHELSIO_T6) {
4121
4122 #ifdef INVARIANTS
4123                 if (sc->params.fw_vers >=
4124                     (V_FW_HDR_FW_VER_MAJOR(1) | V_FW_HDR_FW_VER_MINOR(20) |
4125                     V_FW_HDR_FW_VER_MICRO(1) | V_FW_HDR_FW_VER_BUILD(0))) {
4126                         /*
4127                          * Note that the code to enable the region should run
4128                          * before t4_fw_initialize and not here.  This is just a
4129                          * reminder to add said code.
4130                          */
4131                         device_printf(sc->dev,
4132                             "hpfilter region not enabled.\n");
4133                 }
4134 #endif
4135
4136                 sc->tids.tid_base = t4_read_reg(sc,
4137                     A_LE_DB_ACTIVE_TABLE_START_INDEX);
4138
4139                 param[0] = FW_PARAM_PFVF(HPFILTER_START);
4140                 param[1] = FW_PARAM_PFVF(HPFILTER_END);
4141                 rc = -t4_query_params(sc, sc->mbox, sc->pf, 0, 2, param, val);
4142                 if (rc != 0) {
4143                         device_printf(sc->dev,
4144                            "failed to query hpfilter parameters: %d.\n", rc);
4145                         return (rc);
4146                 }
4147                 if ((int)val[1] > (int)val[0]) {
4148                         sc->tids.hpftid_base = val[0];
4149                         sc->tids.hpftid_end = val[1];
4150                         sc->tids.nhpftids = val[1] - val[0] + 1;
4151
4152                         /*
4153                          * These should go off if the layout changes and the
4154                          * driver needs to catch up.
4155                          */
4156                         MPASS(sc->tids.hpftid_base == 0);
4157                         MPASS(sc->tids.tid_base == sc->tids.nhpftids);
4158                 }
4159         }
4160
4161         /*
4162          * MPSBGMAP is queried separately because only recent firmwares support
4163          * it as a parameter and we don't want the compound query above to fail
4164          * on older firmwares.
4165          */
4166         param[0] = FW_PARAM_DEV(MPSBGMAP);
4167         val[0] = 0;
4168         rc = -t4_query_params(sc, sc->mbox, sc->pf, 0, 1, param, val);
4169         if (rc == 0)
4170                 sc->params.mps_bg_map = val[0];
4171         else
4172                 sc->params.mps_bg_map = 0;
4173
4174         /*
4175          * Determine whether the firmware supports the filter2 work request.
4176          * This is queried separately for the same reason as MPSBGMAP above.
4177          */
4178         param[0] = FW_PARAM_DEV(FILTER2_WR);
4179         val[0] = 0;
4180         rc = -t4_query_params(sc, sc->mbox, sc->pf, 0, 1, param, val);
4181         if (rc == 0)
4182                 sc->params.filter2_wr_support = val[0] != 0;
4183         else
4184                 sc->params.filter2_wr_support = 0;
4185
4186         /*
4187          * Find out whether we're allowed to use the ULPTX MEMWRITE DSGL.
4188          * This is queried separately for the same reason as other params above.
4189          */
4190         param[0] = FW_PARAM_DEV(ULPTX_MEMWRITE_DSGL);
4191         val[0] = 0;
4192         rc = -t4_query_params(sc, sc->mbox, sc->pf, 0, 1, param, val);
4193         if (rc == 0)
4194                 sc->params.ulptx_memwrite_dsgl = val[0] != 0;
4195         else
4196                 sc->params.ulptx_memwrite_dsgl = false;
4197
4198         /* get capabilites */
4199         bzero(&caps, sizeof(caps));
4200         caps.op_to_write = htobe32(V_FW_CMD_OP(FW_CAPS_CONFIG_CMD) |
4201             F_FW_CMD_REQUEST | F_FW_CMD_READ);
4202         caps.cfvalid_to_len16 = htobe32(FW_LEN16(caps));
4203         rc = -t4_wr_mbox(sc, sc->mbox, &caps, sizeof(caps), &caps);
4204         if (rc != 0) {
4205                 device_printf(sc->dev,
4206                     "failed to get card capabilities: %d.\n", rc);
4207                 return (rc);
4208         }
4209
4210 #define READ_CAPS(x) do { \
4211         sc->x = htobe16(caps.x); \
4212 } while (0)
4213         READ_CAPS(nbmcaps);
4214         READ_CAPS(linkcaps);
4215         READ_CAPS(switchcaps);
4216         READ_CAPS(niccaps);
4217         READ_CAPS(toecaps);
4218         READ_CAPS(rdmacaps);
4219         READ_CAPS(cryptocaps);
4220         READ_CAPS(iscsicaps);
4221         READ_CAPS(fcoecaps);
4222
4223         if (sc->niccaps & FW_CAPS_CONFIG_NIC_HASHFILTER) {
4224                 MPASS(chip_id(sc) > CHELSIO_T4);
4225                 MPASS(sc->toecaps == 0);
4226                 sc->toecaps = 0;
4227
4228                 param[0] = FW_PARAM_DEV(NTID);
4229                 rc = -t4_query_params(sc, sc->mbox, sc->pf, 0, 1, param, val);
4230                 if (rc != 0) {
4231                         device_printf(sc->dev,
4232                             "failed to query HASHFILTER parameters: %d.\n", rc);
4233                         return (rc);
4234                 }
4235                 sc->tids.ntids = val[0];
4236                 if (sc->params.fw_vers <
4237                     (V_FW_HDR_FW_VER_MAJOR(1) | V_FW_HDR_FW_VER_MINOR(20) |
4238                     V_FW_HDR_FW_VER_MICRO(5) | V_FW_HDR_FW_VER_BUILD(0))) {
4239                         MPASS(sc->tids.ntids >= sc->tids.nhpftids);
4240                         sc->tids.ntids -= sc->tids.nhpftids;
4241                 }
4242                 sc->tids.natids = min(sc->tids.ntids / 2, MAX_ATIDS);
4243                 sc->params.hash_filter = 1;
4244         }
4245         if (sc->niccaps & FW_CAPS_CONFIG_NIC_ETHOFLD) {
4246                 param[0] = FW_PARAM_PFVF(ETHOFLD_START);
4247                 param[1] = FW_PARAM_PFVF(ETHOFLD_END);
4248                 param[2] = FW_PARAM_DEV(FLOWC_BUFFIFO_SZ);
4249                 rc = -t4_query_params(sc, sc->mbox, sc->pf, 0, 3, param, val);
4250                 if (rc != 0) {
4251                         device_printf(sc->dev,
4252                             "failed to query NIC parameters: %d.\n", rc);
4253                         return (rc);
4254                 }
4255                 if ((int)val[1] > (int)val[0]) {
4256                         sc->tids.etid_base = val[0];
4257                         sc->tids.etid_end = val[1];
4258                         sc->tids.netids = val[1] - val[0] + 1;
4259                         sc->params.eo_wr_cred = val[2];
4260                         sc->params.ethoffload = 1;
4261                 }
4262         }
4263         if (sc->toecaps) {
4264                 /* query offload-related parameters */
4265                 param[0] = FW_PARAM_DEV(NTID);
4266                 param[1] = FW_PARAM_PFVF(SERVER_START);
4267                 param[2] = FW_PARAM_PFVF(SERVER_END);
4268                 param[3] = FW_PARAM_PFVF(TDDP_START);
4269                 param[4] = FW_PARAM_PFVF(TDDP_END);
4270                 param[5] = FW_PARAM_DEV(FLOWC_BUFFIFO_SZ);
4271                 rc = -t4_query_params(sc, sc->mbox, sc->pf, 0, 6, param, val);
4272                 if (rc != 0) {
4273                         device_printf(sc->dev,
4274                             "failed to query TOE parameters: %d.\n", rc);
4275                         return (rc);
4276                 }
4277                 sc->tids.ntids = val[0];
4278                 if (sc->params.fw_vers <
4279                     (V_FW_HDR_FW_VER_MAJOR(1) | V_FW_HDR_FW_VER_MINOR(20) |
4280                     V_FW_HDR_FW_VER_MICRO(5) | V_FW_HDR_FW_VER_BUILD(0))) {
4281                         MPASS(sc->tids.ntids >= sc->tids.nhpftids);
4282                         sc->tids.ntids -= sc->tids.nhpftids;
4283                 }
4284                 sc->tids.natids = min(sc->tids.ntids / 2, MAX_ATIDS);
4285                 if ((int)val[2] > (int)val[1]) {
4286                         sc->tids.stid_base = val[1];
4287                         sc->tids.nstids = val[2] - val[1] + 1;
4288                 }
4289                 sc->vres.ddp.start = val[3];
4290                 sc->vres.ddp.size = val[4] - val[3] + 1;
4291                 sc->params.ofldq_wr_cred = val[5];
4292                 sc->params.offload = 1;
4293         } else {
4294                 /*
4295                  * The firmware attempts memfree TOE configuration for -SO cards
4296                  * and will report toecaps=0 if it runs out of resources (this
4297                  * depends on the config file).  It may not report 0 for other
4298                  * capabilities dependent on the TOE in this case.  Set them to
4299                  * 0 here so that the driver doesn't bother tracking resources
4300                  * that will never be used.
4301                  */
4302                 sc->iscsicaps = 0;
4303                 sc->rdmacaps = 0;
4304         }
4305         if (sc->rdmacaps) {
4306                 param[0] = FW_PARAM_PFVF(STAG_START);
4307                 param[1] = FW_PARAM_PFVF(STAG_END);
4308                 param[2] = FW_PARAM_PFVF(RQ_START);
4309                 param[3] = FW_PARAM_PFVF(RQ_END);
4310                 param[4] = FW_PARAM_PFVF(PBL_START);
4311                 param[5] = FW_PARAM_PFVF(PBL_END);
4312                 rc = -t4_query_params(sc, sc->mbox, sc->pf, 0, 6, param, val);
4313                 if (rc != 0) {
4314                         device_printf(sc->dev,
4315                             "failed to query RDMA parameters(1): %d.\n", rc);
4316                         return (rc);
4317                 }
4318                 sc->vres.stag.start = val[0];
4319                 sc->vres.stag.size = val[1] - val[0] + 1;
4320                 sc->vres.rq.start = val[2];
4321                 sc->vres.rq.size = val[3] - val[2] + 1;
4322                 sc->vres.pbl.start = val[4];
4323                 sc->vres.pbl.size = val[5] - val[4] + 1;
4324
4325                 param[0] = FW_PARAM_PFVF(SQRQ_START);
4326                 param[1] = FW_PARAM_PFVF(SQRQ_END);
4327                 param[2] = FW_PARAM_PFVF(CQ_START);
4328                 param[3] = FW_PARAM_PFVF(CQ_END);
4329                 param[4] = FW_PARAM_PFVF(OCQ_START);
4330                 param[5] = FW_PARAM_PFVF(OCQ_END);
4331                 rc = -t4_query_params(sc, sc->mbox, sc->pf, 0, 6, param, val);
4332                 if (rc != 0) {
4333                         device_printf(sc->dev,
4334                             "failed to query RDMA parameters(2): %d.\n", rc);
4335                         return (rc);
4336                 }
4337                 sc->vres.qp.start = val[0];
4338                 sc->vres.qp.size = val[1] - val[0] + 1;
4339                 sc->vres.cq.start = val[2];
4340                 sc->vres.cq.size = val[3] - val[2] + 1;
4341                 sc->vres.ocq.start = val[4];
4342                 sc->vres.ocq.size = val[5] - val[4] + 1;
4343
4344                 param[0] = FW_PARAM_PFVF(SRQ_START);
4345                 param[1] = FW_PARAM_PFVF(SRQ_END);
4346                 param[2] = FW_PARAM_DEV(MAXORDIRD_QP);
4347                 param[3] = FW_PARAM_DEV(MAXIRD_ADAPTER);
4348                 rc = -t4_query_params(sc, sc->mbox, sc->pf, 0, 4, param, val);
4349                 if (rc != 0) {
4350                         device_printf(sc->dev,
4351                             "failed to query RDMA parameters(3): %d.\n", rc);
4352                         return (rc);
4353                 }
4354                 sc->vres.srq.start = val[0];
4355                 sc->vres.srq.size = val[1] - val[0] + 1;
4356                 sc->params.max_ordird_qp = val[2];
4357                 sc->params.max_ird_adapter = val[3];
4358         }
4359         if (sc->iscsicaps) {
4360                 param[0] = FW_PARAM_PFVF(ISCSI_START);
4361                 param[1] = FW_PARAM_PFVF(ISCSI_END);
4362                 rc = -t4_query_params(sc, sc->mbox, sc->pf, 0, 2, param, val);
4363                 if (rc != 0) {
4364                         device_printf(sc->dev,
4365                             "failed to query iSCSI parameters: %d.\n", rc);
4366                         return (rc);
4367                 }
4368                 sc->vres.iscsi.start = val[0];
4369                 sc->vres.iscsi.size = val[1] - val[0] + 1;
4370         }
4371         if (sc->cryptocaps & FW_CAPS_CONFIG_TLSKEYS) {
4372                 param[0] = FW_PARAM_PFVF(TLS_START);
4373                 param[1] = FW_PARAM_PFVF(TLS_END);
4374                 rc = -t4_query_params(sc, sc->mbox, sc->pf, 0, 2, param, val);
4375                 if (rc != 0) {
4376                         device_printf(sc->dev,
4377                             "failed to query TLS parameters: %d.\n", rc);
4378                         return (rc);
4379                 }
4380                 sc->vres.key.start = val[0];
4381                 sc->vres.key.size = val[1] - val[0] + 1;
4382         }
4383
4384         t4_init_sge_params(sc);
4385
4386         /*
4387          * We've got the params we wanted to query via the firmware.  Now grab
4388          * some others directly from the chip.
4389          */
4390         rc = t4_read_chip_settings(sc);
4391
4392         return (rc);
4393 }
4394
4395 static int
4396 set_params__post_init(struct adapter *sc)
4397 {
4398         uint32_t param, val;
4399 #ifdef TCP_OFFLOAD
4400         int i, v, shift;
4401 #endif
4402
4403         /* ask for encapsulated CPLs */
4404         param = FW_PARAM_PFVF(CPLFW4MSG_ENCAP);
4405         val = 1;
4406         (void)t4_set_params(sc, sc->mbox, sc->pf, 0, 1, &param, &val);
4407
4408         /* Enable 32b port caps if the firmware supports it. */
4409         param = FW_PARAM_PFVF(PORT_CAPS32);
4410         val = 1;
4411         if (t4_set_params(sc, sc->mbox, sc->pf, 0, 1, &param, &val) == 0)
4412                 sc->params.port_caps32 = 1;
4413
4414         /* Let filter + maskhash steer to a part of the VI's RSS region. */
4415         val = 1 << (G_MASKSIZE(t4_read_reg(sc, A_TP_RSS_CONFIG_TNL)) - 1);
4416         t4_set_reg_field(sc, A_TP_RSS_CONFIG_TNL, V_MASKFILTER(M_MASKFILTER),
4417             V_MASKFILTER(val - 1));
4418
4419 #ifdef TCP_OFFLOAD
4420         /*
4421          * Override the TOE timers with user provided tunables.  This is not the
4422          * recommended way to change the timers (the firmware config file is) so
4423          * these tunables are not documented.
4424          *
4425          * All the timer tunables are in microseconds.
4426          */
4427         if (t4_toe_keepalive_idle != 0) {
4428                 v = us_to_tcp_ticks(sc, t4_toe_keepalive_idle);
4429                 v &= M_KEEPALIVEIDLE;
4430                 t4_set_reg_field(sc, A_TP_KEEP_IDLE,
4431                     V_KEEPALIVEIDLE(M_KEEPALIVEIDLE), V_KEEPALIVEIDLE(v));
4432         }
4433         if (t4_toe_keepalive_interval != 0) {
4434                 v = us_to_tcp_ticks(sc, t4_toe_keepalive_interval);
4435                 v &= M_KEEPALIVEINTVL;
4436                 t4_set_reg_field(sc, A_TP_KEEP_INTVL,
4437                     V_KEEPALIVEINTVL(M_KEEPALIVEINTVL), V_KEEPALIVEINTVL(v));
4438         }
4439         if (t4_toe_keepalive_count != 0) {
4440                 v = t4_toe_keepalive_count & M_KEEPALIVEMAXR2;
4441                 t4_set_reg_field(sc, A_TP_SHIFT_CNT,
4442                     V_KEEPALIVEMAXR1(M_KEEPALIVEMAXR1) |
4443                     V_KEEPALIVEMAXR2(M_KEEPALIVEMAXR2),
4444                     V_KEEPALIVEMAXR1(1) | V_KEEPALIVEMAXR2(v));
4445         }
4446         if (t4_toe_rexmt_min != 0) {
4447                 v = us_to_tcp_ticks(sc, t4_toe_rexmt_min);
4448                 v &= M_RXTMIN;
4449                 t4_set_reg_field(sc, A_TP_RXT_MIN,
4450                     V_RXTMIN(M_RXTMIN), V_RXTMIN(v));
4451         }
4452         if (t4_toe_rexmt_max != 0) {
4453                 v = us_to_tcp_ticks(sc, t4_toe_rexmt_max);
4454                 v &= M_RXTMAX;
4455                 t4_set_reg_field(sc, A_TP_RXT_MAX,
4456                     V_RXTMAX(M_RXTMAX), V_RXTMAX(v));
4457         }
4458         if (t4_toe_rexmt_count != 0) {
4459                 v = t4_toe_rexmt_count & M_RXTSHIFTMAXR2;
4460                 t4_set_reg_field(sc, A_TP_SHIFT_CNT,
4461                     V_RXTSHIFTMAXR1(M_RXTSHIFTMAXR1) |
4462                     V_RXTSHIFTMAXR2(M_RXTSHIFTMAXR2),
4463                     V_RXTSHIFTMAXR1(1) | V_RXTSHIFTMAXR2(v));
4464         }
4465         for (i = 0; i < nitems(t4_toe_rexmt_backoff); i++) {
4466                 if (t4_toe_rexmt_backoff[i] != -1) {
4467                         v = t4_toe_rexmt_backoff[i] & M_TIMERBACKOFFINDEX0;
4468                         shift = (i & 3) << 3;
4469                         t4_set_reg_field(sc, A_TP_TCP_BACKOFF_REG0 + (i & ~3),
4470                             M_TIMERBACKOFFINDEX0 << shift, v << shift);
4471                 }
4472         }
4473 #endif
4474         return (0);
4475 }
4476
4477 #undef FW_PARAM_PFVF
4478 #undef FW_PARAM_DEV
4479
4480 static void
4481 t4_set_desc(struct adapter *sc)
4482 {
4483         char buf[128];
4484         struct adapter_params *p = &sc->params;
4485
4486         snprintf(buf, sizeof(buf), "Chelsio %s", p->vpd.id);
4487
4488         device_set_desc_copy(sc->dev, buf);
4489 }
4490
4491 static inline void
4492 ifmedia_add4(struct ifmedia *ifm, int m)
4493 {
4494
4495         ifmedia_add(ifm, m, 0, NULL);
4496         ifmedia_add(ifm, m | IFM_ETH_TXPAUSE, 0, NULL);
4497         ifmedia_add(ifm, m | IFM_ETH_RXPAUSE, 0, NULL);
4498         ifmedia_add(ifm, m | IFM_ETH_TXPAUSE | IFM_ETH_RXPAUSE, 0, NULL);
4499 }
4500
4501 /*
4502  * This is the selected media, which is not quite the same as the active media.
4503  * The media line in ifconfig is "media: Ethernet selected (active)" if selected
4504  * and active are not the same, and "media: Ethernet selected" otherwise.
4505  */
4506 static void
4507 set_current_media(struct port_info *pi)
4508 {
4509         struct link_config *lc;
4510         struct ifmedia *ifm;
4511         int mword;
4512         u_int speed;
4513
4514         PORT_LOCK_ASSERT_OWNED(pi);
4515
4516         /* Leave current media alone if it's already set to IFM_NONE. */
4517         ifm = &pi->media;
4518         if (ifm->ifm_cur != NULL &&
4519             IFM_SUBTYPE(ifm->ifm_cur->ifm_media) == IFM_NONE)
4520                 return;
4521
4522         lc = &pi->link_cfg;
4523         if (lc->requested_aneg != AUTONEG_DISABLE &&
4524             lc->supported & FW_PORT_CAP32_ANEG) {
4525                 ifmedia_set(ifm, IFM_ETHER | IFM_AUTO);
4526                 return;
4527         }
4528         mword = IFM_ETHER | IFM_FDX;
4529         if (lc->requested_fc & PAUSE_TX)
4530                 mword |= IFM_ETH_TXPAUSE;
4531         if (lc->requested_fc & PAUSE_RX)
4532                 mword |= IFM_ETH_RXPAUSE;
4533         if (lc->requested_speed == 0)
4534                 speed = port_top_speed(pi) * 1000;      /* Gbps -> Mbps */
4535         else
4536                 speed = lc->requested_speed;
4537         mword |= port_mword(pi, speed_to_fwcap(speed));
4538         ifmedia_set(ifm, mword);
4539 }
4540
4541 /*
4542  * Returns true if the ifmedia list for the port cannot change.
4543  */
4544 static bool
4545 fixed_ifmedia(struct port_info *pi)
4546 {
4547
4548         return (pi->port_type == FW_PORT_TYPE_BT_SGMII ||
4549             pi->port_type == FW_PORT_TYPE_BT_XFI ||
4550             pi->port_type == FW_PORT_TYPE_BT_XAUI ||
4551             pi->port_type == FW_PORT_TYPE_KX4 ||
4552             pi->port_type == FW_PORT_TYPE_KX ||
4553             pi->port_type == FW_PORT_TYPE_KR ||
4554             pi->port_type == FW_PORT_TYPE_BP_AP ||
4555             pi->port_type == FW_PORT_TYPE_BP4_AP ||
4556             pi->port_type == FW_PORT_TYPE_BP40_BA ||
4557             pi->port_type == FW_PORT_TYPE_KR4_100G ||
4558             pi->port_type == FW_PORT_TYPE_KR_SFP28 ||
4559             pi->port_type == FW_PORT_TYPE_KR_XLAUI);
4560 }
4561
4562 static void
4563 build_medialist(struct port_info *pi)
4564 {
4565         uint32_t ss, speed;
4566         int unknown, mword, bit;
4567         struct link_config *lc;
4568         struct ifmedia *ifm;
4569
4570         PORT_LOCK_ASSERT_OWNED(pi);
4571
4572         if (pi->flags & FIXED_IFMEDIA)
4573                 return;
4574
4575         /*
4576          * Rebuild the ifmedia list.
4577          */
4578         ifm = &pi->media;
4579         ifmedia_removeall(ifm);
4580         lc = &pi->link_cfg;
4581         ss = G_FW_PORT_CAP32_SPEED(lc->supported); /* Supported Speeds */
4582         if (__predict_false(ss == 0)) { /* not supposed to happen. */
4583                 MPASS(ss != 0);
4584 no_media:
4585                 MPASS(LIST_EMPTY(&ifm->ifm_list));
4586                 ifmedia_add(ifm, IFM_ETHER | IFM_NONE, 0, NULL);
4587                 ifmedia_set(ifm, IFM_ETHER | IFM_NONE);
4588                 return;
4589         }
4590
4591         unknown = 0;
4592         for (bit = S_FW_PORT_CAP32_SPEED; bit < fls(ss); bit++) {
4593                 speed = 1 << bit;
4594                 MPASS(speed & M_FW_PORT_CAP32_SPEED);
4595                 if (ss & speed) {
4596                         mword = port_mword(pi, speed);
4597                         if (mword == IFM_NONE) {
4598                                 goto no_media;
4599                         } else if (mword == IFM_UNKNOWN)
4600                                 unknown++;
4601                         else
4602                                 ifmedia_add4(ifm, IFM_ETHER | IFM_FDX | mword);
4603                 }
4604         }
4605         if (unknown > 0) /* Add one unknown for all unknown media types. */
4606                 ifmedia_add4(ifm, IFM_ETHER | IFM_FDX | IFM_UNKNOWN);
4607         if (lc->supported & FW_PORT_CAP32_ANEG)
4608                 ifmedia_add(ifm, IFM_ETHER | IFM_AUTO, 0, NULL);
4609
4610         set_current_media(pi);
4611 }
4612
4613 /*
4614  * Initialize the requested fields in the link config based on driver tunables.
4615  */
4616 static void
4617 init_link_config(struct port_info *pi)
4618 {
4619         struct link_config *lc = &pi->link_cfg;
4620
4621         PORT_LOCK_ASSERT_OWNED(pi);
4622
4623         lc->requested_speed = 0;
4624
4625         if (t4_autoneg == 0)
4626                 lc->requested_aneg = AUTONEG_DISABLE;
4627         else if (t4_autoneg == 1)
4628                 lc->requested_aneg = AUTONEG_ENABLE;
4629         else
4630                 lc->requested_aneg = AUTONEG_AUTO;
4631
4632         lc->requested_fc = t4_pause_settings & (PAUSE_TX | PAUSE_RX |
4633             PAUSE_AUTONEG);
4634
4635         if (t4_fec == -1 || t4_fec & FEC_AUTO)
4636                 lc->requested_fec = FEC_AUTO;
4637         else {
4638                 lc->requested_fec = FEC_NONE;
4639                 if (t4_fec & FEC_RS)
4640                         lc->requested_fec |= FEC_RS;
4641                 if (t4_fec & FEC_BASER_RS)
4642                         lc->requested_fec |= FEC_BASER_RS;
4643         }
4644 }
4645
4646 /*
4647  * Makes sure that all requested settings comply with what's supported by the
4648  * port.  Returns the number of settings that were invalid and had to be fixed.
4649  */
4650 static int
4651 fixup_link_config(struct port_info *pi)
4652 {
4653         int n = 0;
4654         struct link_config *lc = &pi->link_cfg;
4655         uint32_t fwspeed;
4656
4657         PORT_LOCK_ASSERT_OWNED(pi);
4658
4659         /* Speed (when not autonegotiating) */
4660         if (lc->requested_speed != 0) {
4661                 fwspeed = speed_to_fwcap(lc->requested_speed);
4662                 if ((fwspeed & lc->supported) == 0) {
4663                         n++;
4664                         lc->requested_speed = 0;
4665                 }
4666         }
4667
4668         /* Link autonegotiation */
4669         MPASS(lc->requested_aneg == AUTONEG_ENABLE ||
4670             lc->requested_aneg == AUTONEG_DISABLE ||
4671             lc->requested_aneg == AUTONEG_AUTO);
4672         if (lc->requested_aneg == AUTONEG_ENABLE &&
4673             !(lc->supported & FW_PORT_CAP32_ANEG)) {
4674                 n++;
4675                 lc->requested_aneg = AUTONEG_AUTO;
4676         }
4677
4678         /* Flow control */
4679         MPASS((lc->requested_fc & ~(PAUSE_TX | PAUSE_RX | PAUSE_AUTONEG)) == 0);
4680         if (lc->requested_fc & PAUSE_TX &&
4681             !(lc->supported & FW_PORT_CAP32_FC_TX)) {
4682                 n++;
4683                 lc->requested_fc &= ~PAUSE_TX;
4684         }
4685         if (lc->requested_fc & PAUSE_RX &&
4686             !(lc->supported & FW_PORT_CAP32_FC_RX)) {
4687                 n++;
4688                 lc->requested_fc &= ~PAUSE_RX;
4689         }
4690         if (!(lc->requested_fc & PAUSE_AUTONEG) &&
4691             !(lc->supported & FW_PORT_CAP32_FORCE_PAUSE)) {
4692                 n++;
4693                 lc->requested_fc |= PAUSE_AUTONEG;
4694         }
4695
4696         /* FEC */
4697         if ((lc->requested_fec & FEC_RS &&
4698             !(lc->supported & FW_PORT_CAP32_FEC_RS)) ||
4699             (lc->requested_fec & FEC_BASER_RS &&
4700             !(lc->supported & FW_PORT_CAP32_FEC_BASER_RS))) {
4701                 n++;
4702                 lc->requested_fec = FEC_AUTO;
4703         }
4704
4705         return (n);
4706 }
4707
4708 /*
4709  * Apply the requested L1 settings, which are expected to be valid, to the
4710  * hardware.
4711  */
4712 static int
4713 apply_link_config(struct port_info *pi)
4714 {
4715         struct adapter *sc = pi->adapter;
4716         struct link_config *lc = &pi->link_cfg;
4717         int rc;
4718
4719 #ifdef INVARIANTS
4720         ASSERT_SYNCHRONIZED_OP(sc);
4721         PORT_LOCK_ASSERT_OWNED(pi);
4722
4723         if (lc->requested_aneg == AUTONEG_ENABLE)
4724                 MPASS(lc->supported & FW_PORT_CAP32_ANEG);
4725         if (!(lc->requested_fc & PAUSE_AUTONEG))
4726                 MPASS(lc->supported & FW_PORT_CAP32_FORCE_PAUSE);
4727         if (lc->requested_fc & PAUSE_TX)
4728                 MPASS(lc->supported & FW_PORT_CAP32_FC_TX);
4729         if (lc->requested_fc & PAUSE_RX)
4730                 MPASS(lc->supported & FW_PORT_CAP32_FC_RX);
4731         if (lc->requested_fec & FEC_RS)
4732                 MPASS(lc->supported & FW_PORT_CAP32_FEC_RS);
4733         if (lc->requested_fec & FEC_BASER_RS)
4734                 MPASS(lc->supported & FW_PORT_CAP32_FEC_BASER_RS);
4735 #endif
4736         rc = -t4_link_l1cfg(sc, sc->mbox, pi->tx_chan, lc);
4737         if (rc != 0) {
4738                 /* Don't complain if the VF driver gets back an EPERM. */
4739                 if (!(sc->flags & IS_VF) || rc != FW_EPERM)
4740                         device_printf(pi->dev, "l1cfg failed: %d\n", rc);
4741         } else {
4742                 /*
4743                  * An L1_CFG will almost always result in a link-change event if
4744                  * the link is up, and the driver will refresh the actual
4745                  * fec/fc/etc. when the notification is processed.  If the link
4746                  * is down then the actual settings are meaningless.
4747                  *
4748                  * This takes care of the case where a change in the L1 settings
4749                  * may not result in a notification.
4750                  */
4751                 if (lc->link_ok && !(lc->requested_fc & PAUSE_AUTONEG))
4752                         lc->fc = lc->requested_fc & (PAUSE_TX | PAUSE_RX);
4753         }
4754         return (rc);
4755 }
4756
4757 #define FW_MAC_EXACT_CHUNK      7
4758
4759 /*
4760  * Program the port's XGMAC based on parameters in ifnet.  The caller also
4761  * indicates which parameters should be programmed (the rest are left alone).
4762  */
4763 int
4764 update_mac_settings(struct ifnet *ifp, int flags)
4765 {
4766         int rc = 0;
4767         struct vi_info *vi = ifp->if_softc;
4768         struct port_info *pi = vi->pi;
4769         struct adapter *sc = pi->adapter;
4770         int mtu = -1, promisc = -1, allmulti = -1, vlanex = -1;
4771
4772         ASSERT_SYNCHRONIZED_OP(sc);
4773         KASSERT(flags, ("%s: not told what to update.", __func__));
4774
4775         if (flags & XGMAC_MTU)
4776                 mtu = ifp->if_mtu;
4777
4778         if (flags & XGMAC_PROMISC)
4779                 promisc = ifp->if_flags & IFF_PROMISC ? 1 : 0;
4780
4781         if (flags & XGMAC_ALLMULTI)
4782                 allmulti = ifp->if_flags & IFF_ALLMULTI ? 1 : 0;
4783
4784         if (flags & XGMAC_VLANEX)
4785                 vlanex = ifp->if_capenable & IFCAP_VLAN_HWTAGGING ? 1 : 0;
4786
4787         if (flags & (XGMAC_MTU|XGMAC_PROMISC|XGMAC_ALLMULTI|XGMAC_VLANEX)) {
4788                 rc = -t4_set_rxmode(sc, sc->mbox, vi->viid, mtu, promisc,
4789                     allmulti, 1, vlanex, false);
4790                 if (rc) {
4791                         if_printf(ifp, "set_rxmode (%x) failed: %d\n", flags,
4792                             rc);
4793                         return (rc);
4794                 }
4795         }
4796
4797         if (flags & XGMAC_UCADDR) {
4798                 uint8_t ucaddr[ETHER_ADDR_LEN];
4799
4800                 bcopy(IF_LLADDR(ifp), ucaddr, sizeof(ucaddr));
4801                 rc = t4_change_mac(sc, sc->mbox, vi->viid, vi->xact_addr_filt,
4802                     ucaddr, true, true);
4803                 if (rc < 0) {
4804                         rc = -rc;
4805                         if_printf(ifp, "change_mac failed: %d\n", rc);
4806                         return (rc);
4807                 } else {
4808                         vi->xact_addr_filt = rc;
4809                         rc = 0;
4810                 }
4811         }
4812
4813         if (flags & XGMAC_MCADDRS) {
4814                 const uint8_t *mcaddr[FW_MAC_EXACT_CHUNK];
4815                 int del = 1;
4816                 uint64_t hash = 0;
4817                 struct ifmultiaddr *ifma;
4818                 int i = 0, j;
4819
4820                 if_maddr_rlock(ifp);
4821                 CK_STAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
4822                         if (ifma->ifma_addr->sa_family != AF_LINK)
4823                                 continue;
4824                         mcaddr[i] =
4825                             LLADDR((struct sockaddr_dl *)ifma->ifma_addr);
4826                         MPASS(ETHER_IS_MULTICAST(mcaddr[i]));
4827                         i++;
4828
4829                         if (i == FW_MAC_EXACT_CHUNK) {
4830                                 rc = t4_alloc_mac_filt(sc, sc->mbox, vi->viid,
4831                                     del, i, mcaddr, NULL, &hash, 0);
4832                                 if (rc < 0) {
4833                                         rc = -rc;
4834                                         for (j = 0; j < i; j++) {
4835                                                 if_printf(ifp,
4836                                                     "failed to add mc address"
4837                                                     " %02x:%02x:%02x:"
4838                                                     "%02x:%02x:%02x rc=%d\n",
4839                                                     mcaddr[j][0], mcaddr[j][1],
4840                                                     mcaddr[j][2], mcaddr[j][3],
4841                                                     mcaddr[j][4], mcaddr[j][5],
4842                                                     rc);
4843                                         }
4844                                         goto mcfail;
4845                                 }
4846                                 del = 0;
4847                                 i = 0;
4848                         }
4849                 }
4850                 if (i > 0) {
4851                         rc = t4_alloc_mac_filt(sc, sc->mbox, vi->viid, del, i,
4852                             mcaddr, NULL, &hash, 0);
4853                         if (rc < 0) {
4854                                 rc = -rc;
4855                                 for (j = 0; j < i; j++) {
4856                                         if_printf(ifp,
4857                                             "failed to add mc address"
4858                                             " %02x:%02x:%02x:"
4859                                             "%02x:%02x:%02x rc=%d\n",
4860                                             mcaddr[j][0], mcaddr[j][1],
4861                                             mcaddr[j][2], mcaddr[j][3],
4862                                             mcaddr[j][4], mcaddr[j][5],
4863                                             rc);
4864                                 }
4865                                 goto mcfail;
4866                         }
4867                 }
4868
4869                 rc = -t4_set_addr_hash(sc, sc->mbox, vi->viid, 0, hash, 0);
4870                 if (rc != 0)
4871                         if_printf(ifp, "failed to set mc address hash: %d", rc);
4872 mcfail:
4873                 if_maddr_runlock(ifp);
4874         }
4875
4876         return (rc);
4877 }
4878
4879 /*
4880  * {begin|end}_synchronized_op must be called from the same thread.
4881  */
4882 int
4883 begin_synchronized_op(struct adapter *sc, struct vi_info *vi, int flags,
4884     char *wmesg)
4885 {
4886         int rc, pri;
4887
4888 #ifdef WITNESS
4889         /* the caller thinks it's ok to sleep, but is it really? */
4890         if (flags & SLEEP_OK)
4891                 WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK, NULL,
4892                     "begin_synchronized_op");
4893 #endif
4894
4895         if (INTR_OK)
4896                 pri = PCATCH;
4897         else
4898                 pri = 0;
4899
4900         ADAPTER_LOCK(sc);
4901         for (;;) {
4902
4903                 if (vi && IS_DOOMED(vi)) {
4904                         rc = ENXIO;
4905                         goto done;
4906                 }
4907
4908                 if (!IS_BUSY(sc)) {
4909                         rc = 0;
4910                         break;
4911                 }
4912
4913                 if (!(flags & SLEEP_OK)) {
4914                         rc = EBUSY;
4915                         goto done;
4916                 }
4917
4918                 if (mtx_sleep(&sc->flags, &sc->sc_lock, pri, wmesg, 0)) {
4919                         rc = EINTR;
4920                         goto done;
4921                 }
4922         }
4923
4924         KASSERT(!IS_BUSY(sc), ("%s: controller busy.", __func__));
4925         SET_BUSY(sc);
4926 #ifdef INVARIANTS
4927         sc->last_op = wmesg;
4928         sc->last_op_thr = curthread;
4929         sc->last_op_flags = flags;
4930 #endif
4931
4932 done:
4933         if (!(flags & HOLD_LOCK) || rc)
4934                 ADAPTER_UNLOCK(sc);
4935
4936         return (rc);
4937 }
4938
4939 /*
4940  * Tell if_ioctl and if_init that the VI is going away.  This is
4941  * special variant of begin_synchronized_op and must be paired with a
4942  * call to end_synchronized_op.
4943  */
4944 void
4945 doom_vi(struct adapter *sc, struct vi_info *vi)
4946 {
4947
4948         ADAPTER_LOCK(sc);
4949         SET_DOOMED(vi);
4950         wakeup(&sc->flags);
4951         while (IS_BUSY(sc))
4952                 mtx_sleep(&sc->flags, &sc->sc_lock, 0, "t4detach", 0);
4953         SET_BUSY(sc);
4954 #ifdef INVARIANTS
4955         sc->last_op = "t4detach";
4956         sc->last_op_thr = curthread;
4957         sc->last_op_flags = 0;
4958 #endif
4959         ADAPTER_UNLOCK(sc);
4960 }
4961
4962 /*
4963  * {begin|end}_synchronized_op must be called from the same thread.
4964  */
4965 void
4966 end_synchronized_op(struct adapter *sc, int flags)
4967 {
4968
4969         if (flags & LOCK_HELD)
4970                 ADAPTER_LOCK_ASSERT_OWNED(sc);
4971         else
4972                 ADAPTER_LOCK(sc);
4973
4974         KASSERT(IS_BUSY(sc), ("%s: controller not busy.", __func__));
4975         CLR_BUSY(sc);
4976         wakeup(&sc->flags);
4977         ADAPTER_UNLOCK(sc);
4978 }
4979
4980 static int
4981 cxgbe_init_synchronized(struct vi_info *vi)
4982 {
4983         struct port_info *pi = vi->pi;
4984         struct adapter *sc = pi->adapter;
4985         struct ifnet *ifp = vi->ifp;
4986         int rc = 0, i;
4987         struct sge_txq *txq;
4988
4989         ASSERT_SYNCHRONIZED_OP(sc);
4990
4991         if (ifp->if_drv_flags & IFF_DRV_RUNNING)
4992                 return (0);     /* already running */
4993
4994         if (!(sc->flags & FULL_INIT_DONE) &&
4995             ((rc = adapter_full_init(sc)) != 0))
4996                 return (rc);    /* error message displayed already */
4997
4998         if (!(vi->flags & VI_INIT_DONE) &&
4999             ((rc = vi_full_init(vi)) != 0))
5000                 return (rc); /* error message displayed already */
5001
5002         rc = update_mac_settings(ifp, XGMAC_ALL);
5003         if (rc)
5004                 goto done;      /* error message displayed already */
5005
5006         PORT_LOCK(pi);
5007         if (pi->up_vis == 0) {
5008                 t4_update_port_info(pi);
5009                 fixup_link_config(pi);
5010                 build_medialist(pi);
5011                 apply_link_config(pi);
5012         }
5013
5014         rc = -t4_enable_vi(sc, sc->mbox, vi->viid, true, true);
5015         if (rc != 0) {
5016                 if_printf(ifp, "enable_vi failed: %d\n", rc);
5017                 PORT_UNLOCK(pi);
5018                 goto done;
5019         }
5020
5021         /*
5022          * Can't fail from this point onwards.  Review cxgbe_uninit_synchronized
5023          * if this changes.
5024          */
5025
5026         for_each_txq(vi, i, txq) {
5027                 TXQ_LOCK(txq);
5028                 txq->eq.flags |= EQ_ENABLED;
5029                 TXQ_UNLOCK(txq);
5030         }
5031
5032         /*
5033          * The first iq of the first port to come up is used for tracing.
5034          */
5035         if (sc->traceq < 0 && IS_MAIN_VI(vi)) {
5036                 sc->traceq = sc->sge.rxq[vi->first_rxq].iq.abs_id;
5037                 t4_write_reg(sc, is_t4(sc) ?  A_MPS_TRC_RSS_CONTROL :
5038                     A_MPS_T5_TRC_RSS_CONTROL, V_RSSCONTROL(pi->tx_chan) |
5039                     V_QUEUENUMBER(sc->traceq));
5040                 pi->flags |= HAS_TRACEQ;
5041         }
5042
5043         /* all ok */
5044         pi->up_vis++;
5045         ifp->if_drv_flags |= IFF_DRV_RUNNING;
5046
5047         if (pi->nvi > 1 || sc->flags & IS_VF)
5048                 callout_reset(&vi->tick, hz, vi_tick, vi);
5049         else
5050                 callout_reset(&pi->tick, hz, cxgbe_tick, pi);
5051         PORT_UNLOCK(pi);
5052 done:
5053         if (rc != 0)
5054                 cxgbe_uninit_synchronized(vi);
5055
5056         return (rc);
5057 }
5058
5059 /*
5060  * Idempotent.
5061  */
5062 static int
5063 cxgbe_uninit_synchronized(struct vi_info *vi)
5064 {
5065         struct port_info *pi = vi->pi;
5066         struct adapter *sc = pi->adapter;
5067         struct ifnet *ifp = vi->ifp;
5068         int rc, i;
5069         struct sge_txq *txq;
5070
5071         ASSERT_SYNCHRONIZED_OP(sc);
5072
5073         if (!(vi->flags & VI_INIT_DONE)) {
5074                 if (__predict_false(ifp->if_drv_flags & IFF_DRV_RUNNING)) {
5075                         KASSERT(0, ("uninited VI is running"));
5076                         if_printf(ifp, "uninited VI with running ifnet.  "
5077                             "vi->flags 0x%016lx, if_flags 0x%08x, "
5078                             "if_drv_flags 0x%08x\n", vi->flags, ifp->if_flags,
5079                             ifp->if_drv_flags);
5080                 }
5081                 return (0);
5082         }
5083
5084         /*
5085          * Disable the VI so that all its data in either direction is discarded
5086          * by the MPS.  Leave everything else (the queues, interrupts, and 1Hz
5087          * tick) intact as the TP can deliver negative advice or data that it's
5088          * holding in its RAM (for an offloaded connection) even after the VI is
5089          * disabled.
5090          */
5091         rc = -t4_enable_vi(sc, sc->mbox, vi->viid, false, false);
5092         if (rc) {
5093                 if_printf(ifp, "disable_vi failed: %d\n", rc);
5094                 return (rc);
5095         }
5096
5097         for_each_txq(vi, i, txq) {
5098                 TXQ_LOCK(txq);
5099                 txq->eq.flags &= ~EQ_ENABLED;
5100                 TXQ_UNLOCK(txq);
5101         }
5102
5103         PORT_LOCK(pi);
5104         if (pi->nvi > 1 || sc->flags & IS_VF)
5105                 callout_stop(&vi->tick);
5106         else
5107                 callout_stop(&pi->tick);
5108         if (!(ifp->if_drv_flags & IFF_DRV_RUNNING)) {
5109                 PORT_UNLOCK(pi);
5110                 return (0);
5111         }
5112         ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
5113         pi->up_vis--;
5114         if (pi->up_vis > 0) {
5115                 PORT_UNLOCK(pi);
5116                 return (0);
5117         }
5118
5119         pi->link_cfg.link_ok = false;
5120         pi->link_cfg.speed = 0;
5121         pi->link_cfg.link_down_rc = 255;
5122         t4_os_link_changed(pi);
5123         PORT_UNLOCK(pi);
5124
5125         return (0);
5126 }
5127
5128 /*
5129  * It is ok for this function to fail midway and return right away.  t4_detach
5130  * will walk the entire sc->irq list and clean up whatever is valid.
5131  */
5132 int
5133 t4_setup_intr_handlers(struct adapter *sc)
5134 {
5135         int rc, rid, p, q, v;
5136         char s[8];
5137         struct irq *irq;
5138         struct port_info *pi;
5139         struct vi_info *vi;
5140         struct sge *sge = &sc->sge;
5141         struct sge_rxq *rxq;
5142 #ifdef TCP_OFFLOAD
5143         struct sge_ofld_rxq *ofld_rxq;
5144 #endif
5145 #ifdef DEV_NETMAP
5146         struct sge_nm_rxq *nm_rxq;
5147 #endif
5148 #ifdef RSS
5149         int nbuckets = rss_getnumbuckets();
5150 #endif
5151
5152         /*
5153          * Setup interrupts.
5154          */
5155         irq = &sc->irq[0];
5156         rid = sc->intr_type == INTR_INTX ? 0 : 1;
5157         if (forwarding_intr_to_fwq(sc))
5158                 return (t4_alloc_irq(sc, irq, rid, t4_intr_all, sc, "all"));
5159
5160         /* Multiple interrupts. */
5161         if (sc->flags & IS_VF)
5162                 KASSERT(sc->intr_count >= T4VF_EXTRA_INTR + sc->params.nports,
5163                     ("%s: too few intr.", __func__));
5164         else
5165                 KASSERT(sc->intr_count >= T4_EXTRA_INTR + sc->params.nports,
5166                     ("%s: too few intr.", __func__));
5167
5168         /* The first one is always error intr on PFs */
5169         if (!(sc->flags & IS_VF)) {
5170                 rc = t4_alloc_irq(sc, irq, rid, t4_intr_err, sc, "err");
5171                 if (rc != 0)
5172                         return (rc);
5173                 irq++;
5174                 rid++;
5175         }
5176
5177         /* The second one is always the firmware event queue (first on VFs) */
5178         rc = t4_alloc_irq(sc, irq, rid, t4_intr_evt, &sge->fwq, "evt");
5179         if (rc != 0)
5180                 return (rc);
5181         irq++;
5182         rid++;
5183
5184         for_each_port(sc, p) {
5185                 pi = sc->port[p];
5186                 for_each_vi(pi, v, vi) {
5187                         vi->first_intr = rid - 1;
5188
5189                         if (vi->nnmrxq > 0) {
5190                                 int n = max(vi->nrxq, vi->nnmrxq);
5191
5192                                 rxq = &sge->rxq[vi->first_rxq];
5193 #ifdef DEV_NETMAP
5194                                 nm_rxq = &sge->nm_rxq[vi->first_nm_rxq];
5195 #endif
5196                                 for (q = 0; q < n; q++) {
5197                                         snprintf(s, sizeof(s), "%x%c%x", p,
5198                                             'a' + v, q);
5199                                         if (q < vi->nrxq)
5200                                                 irq->rxq = rxq++;
5201 #ifdef DEV_NETMAP
5202                                         if (q < vi->nnmrxq)
5203                                                 irq->nm_rxq = nm_rxq++;
5204
5205                                         if (irq->nm_rxq != NULL &&
5206                                             irq->rxq == NULL) {
5207                                                 /* Netmap rx only */
5208                                                 rc = t4_alloc_irq(sc, irq, rid,
5209                                                     t4_nm_intr, irq->nm_rxq, s);
5210                                         }
5211                                         if (irq->nm_rxq != NULL &&
5212                                             irq->rxq != NULL) {
5213                                                 /* NIC and Netmap rx */
5214                                                 rc = t4_alloc_irq(sc, irq, rid,
5215                                                     t4_vi_intr, irq, s);
5216                                         }
5217 #endif
5218                                         if (irq->rxq != NULL &&
5219                                             irq->nm_rxq == NULL) {
5220                                                 /* NIC rx only */
5221                                                 rc = t4_alloc_irq(sc, irq, rid,
5222                                                     t4_intr, irq->rxq, s);
5223                                         }
5224                                         if (rc != 0)
5225                                                 return (rc);
5226 #ifdef RSS
5227                                         if (q < vi->nrxq) {
5228                                                 bus_bind_intr(sc->dev, irq->res,
5229                                                     rss_getcpu(q % nbuckets));
5230                                         }
5231 #endif
5232                                         irq++;
5233                                         rid++;
5234                                         vi->nintr++;
5235                                 }
5236                         } else {
5237                                 for_each_rxq(vi, q, rxq) {
5238                                         snprintf(s, sizeof(s), "%x%c%x", p,
5239                                             'a' + v, q);
5240                                         rc = t4_alloc_irq(sc, irq, rid,
5241                                             t4_intr, rxq, s);
5242                                         if (rc != 0)
5243                                                 return (rc);
5244 #ifdef RSS
5245                                         bus_bind_intr(sc->dev, irq->res,
5246                                             rss_getcpu(q % nbuckets));
5247 #endif
5248                                         irq++;
5249                                         rid++;
5250                                         vi->nintr++;
5251                                 }
5252                         }
5253 #ifdef TCP_OFFLOAD
5254                         for_each_ofld_rxq(vi, q, ofld_rxq) {
5255                                 snprintf(s, sizeof(s), "%x%c%x", p, 'A' + v, q);
5256                                 rc = t4_alloc_irq(sc, irq, rid, t4_intr,
5257                                     ofld_rxq, s);
5258                                 if (rc != 0)
5259                                         return (rc);
5260                                 irq++;
5261                                 rid++;
5262                                 vi->nintr++;
5263                         }
5264 #endif
5265                 }
5266         }
5267         MPASS(irq == &sc->irq[sc->intr_count]);
5268
5269         return (0);
5270 }
5271
5272 int
5273 adapter_full_init(struct adapter *sc)
5274 {
5275         int rc, i;
5276 #ifdef RSS
5277         uint32_t raw_rss_key[RSS_KEYSIZE / sizeof(uint32_t)];
5278         uint32_t rss_key[RSS_KEYSIZE / sizeof(uint32_t)];
5279 #endif
5280
5281         ASSERT_SYNCHRONIZED_OP(sc);
5282         ADAPTER_LOCK_ASSERT_NOTOWNED(sc);
5283         KASSERT((sc->flags & FULL_INIT_DONE) == 0,
5284             ("%s: FULL_INIT_DONE already", __func__));
5285
5286         /*
5287          * queues that belong to the adapter (not any particular port).
5288          */
5289         rc = t4_setup_adapter_queues(sc);
5290         if (rc != 0)
5291                 goto done;
5292
5293         for (i = 0; i < nitems(sc->tq); i++) {
5294                 sc->tq[i] = taskqueue_create("t4 taskq", M_NOWAIT,
5295                     taskqueue_thread_enqueue, &sc->tq[i]);
5296                 if (sc->tq[i] == NULL) {
5297                         device_printf(sc->dev,
5298                             "failed to allocate task queue %d\n", i);
5299                         rc = ENOMEM;
5300                         goto done;
5301                 }
5302                 taskqueue_start_threads(&sc->tq[i], 1, PI_NET, "%s tq%d",
5303                     device_get_nameunit(sc->dev), i);
5304         }
5305 #ifdef RSS
5306         MPASS(RSS_KEYSIZE == 40);
5307         rss_getkey((void *)&raw_rss_key[0]);
5308         for (i = 0; i < nitems(rss_key); i++) {
5309                 rss_key[i] = htobe32(raw_rss_key[nitems(rss_key) - 1 - i]);
5310         }
5311         t4_write_rss_key(sc, &rss_key[0], -1, 1);
5312 #endif
5313
5314         if (!(sc->flags & IS_VF))
5315                 t4_intr_enable(sc);
5316         sc->flags |= FULL_INIT_DONE;
5317 done:
5318         if (rc != 0)
5319                 adapter_full_uninit(sc);
5320
5321         return (rc);
5322 }
5323
5324 int
5325 adapter_full_uninit(struct adapter *sc)
5326 {
5327         int i;
5328
5329         ADAPTER_LOCK_ASSERT_NOTOWNED(sc);
5330
5331         t4_teardown_adapter_queues(sc);
5332
5333         for (i = 0; i < nitems(sc->tq) && sc->tq[i]; i++) {
5334                 taskqueue_free(sc->tq[i]);
5335                 sc->tq[i] = NULL;
5336         }
5337
5338         sc->flags &= ~FULL_INIT_DONE;
5339
5340         return (0);
5341 }
5342
5343 #ifdef RSS
5344 #define SUPPORTED_RSS_HASHTYPES (RSS_HASHTYPE_RSS_IPV4 | \
5345     RSS_HASHTYPE_RSS_TCP_IPV4 | RSS_HASHTYPE_RSS_IPV6 | \
5346     RSS_HASHTYPE_RSS_TCP_IPV6 | RSS_HASHTYPE_RSS_UDP_IPV4 | \
5347     RSS_HASHTYPE_RSS_UDP_IPV6)
5348
5349 /* Translates kernel hash types to hardware. */
5350 static int
5351 hashconfig_to_hashen(int hashconfig)
5352 {
5353         int hashen = 0;
5354
5355         if (hashconfig & RSS_HASHTYPE_RSS_IPV4)
5356                 hashen |= F_FW_RSS_VI_CONFIG_CMD_IP4TWOTUPEN;
5357         if (hashconfig & RSS_HASHTYPE_RSS_IPV6)
5358                 hashen |= F_FW_RSS_VI_CONFIG_CMD_IP6TWOTUPEN;
5359         if (hashconfig & RSS_HASHTYPE_RSS_UDP_IPV4) {
5360                 hashen |= F_FW_RSS_VI_CONFIG_CMD_UDPEN |
5361                     F_FW_RSS_VI_CONFIG_CMD_IP4FOURTUPEN;
5362         }
5363         if (hashconfig & RSS_HASHTYPE_RSS_UDP_IPV6) {
5364                 hashen |= F_FW_RSS_VI_CONFIG_CMD_UDPEN |
5365                     F_FW_RSS_VI_CONFIG_CMD_IP6FOURTUPEN;
5366         }
5367         if (hashconfig & RSS_HASHTYPE_RSS_TCP_IPV4)
5368                 hashen |= F_FW_RSS_VI_CONFIG_CMD_IP4FOURTUPEN;
5369         if (hashconfig & RSS_HASHTYPE_RSS_TCP_IPV6)
5370                 hashen |= F_FW_RSS_VI_CONFIG_CMD_IP6FOURTUPEN;
5371
5372         return (hashen);
5373 }
5374
5375 /* Translates hardware hash types to kernel. */
5376 static int
5377 hashen_to_hashconfig(int hashen)
5378 {
5379         int hashconfig = 0;
5380
5381         if (hashen & F_FW_RSS_VI_CONFIG_CMD_UDPEN) {
5382                 /*
5383                  * If UDP hashing was enabled it must have been enabled for
5384                  * either IPv4 or IPv6 (inclusive or).  Enabling UDP without
5385                  * enabling any 4-tuple hash is nonsense configuration.
5386                  */
5387                 MPASS(hashen & (F_FW_RSS_VI_CONFIG_CMD_IP4FOURTUPEN |
5388                     F_FW_RSS_VI_CONFIG_CMD_IP6FOURTUPEN));
5389
5390                 if (hashen & F_FW_RSS_VI_CONFIG_CMD_IP4FOURTUPEN)
5391                         hashconfig |= RSS_HASHTYPE_RSS_UDP_IPV4;
5392                 if (hashen & F_FW_RSS_VI_CONFIG_CMD_IP6FOURTUPEN)
5393                         hashconfig |= RSS_HASHTYPE_RSS_UDP_IPV6;
5394         }
5395         if (hashen & F_FW_RSS_VI_CONFIG_CMD_IP4FOURTUPEN)
5396                 hashconfig |= RSS_HASHTYPE_RSS_TCP_IPV4;
5397         if (hashen & F_FW_RSS_VI_CONFIG_CMD_IP6FOURTUPEN)
5398                 hashconfig |= RSS_HASHTYPE_RSS_TCP_IPV6;
5399         if (hashen & F_FW_RSS_VI_CONFIG_CMD_IP4TWOTUPEN)
5400                 hashconfig |= RSS_HASHTYPE_RSS_IPV4;
5401         if (hashen & F_FW_RSS_VI_CONFIG_CMD_IP6TWOTUPEN)
5402                 hashconfig |= RSS_HASHTYPE_RSS_IPV6;
5403
5404         return (hashconfig);
5405 }
5406 #endif
5407
5408 int
5409 vi_full_init(struct vi_info *vi)
5410 {
5411         struct adapter *sc = vi->pi->adapter;
5412         struct ifnet *ifp = vi->ifp;
5413         uint16_t *rss;
5414         struct sge_rxq *rxq;
5415         int rc, i, j;
5416 #ifdef RSS
5417         int nbuckets = rss_getnumbuckets();
5418         int hashconfig = rss_gethashconfig();
5419         int extra;
5420 #endif
5421
5422         ASSERT_SYNCHRONIZED_OP(sc);
5423         KASSERT((vi->flags & VI_INIT_DONE) == 0,
5424             ("%s: VI_INIT_DONE already", __func__));
5425
5426         sysctl_ctx_init(&vi->ctx);
5427         vi->flags |= VI_SYSCTL_CTX;
5428
5429         /*
5430          * Allocate tx/rx/fl queues for this VI.
5431          */
5432         rc = t4_setup_vi_queues(vi);
5433         if (rc != 0)
5434                 goto done;      /* error message displayed already */
5435
5436         /*
5437          * Setup RSS for this VI.  Save a copy of the RSS table for later use.
5438          */
5439         if (vi->nrxq > vi->rss_size) {
5440                 if_printf(ifp, "nrxq (%d) > hw RSS table size (%d); "
5441                     "some queues will never receive traffic.\n", vi->nrxq,
5442                     vi->rss_size);
5443         } else if (vi->rss_size % vi->nrxq) {
5444                 if_printf(ifp, "nrxq (%d), hw RSS table size (%d); "
5445                     "expect uneven traffic distribution.\n", vi->nrxq,
5446                     vi->rss_size);
5447         }
5448 #ifdef RSS
5449         if (vi->nrxq != nbuckets) {
5450                 if_printf(ifp, "nrxq (%d) != kernel RSS buckets (%d);"
5451                     "performance will be impacted.\n", vi->nrxq, nbuckets);
5452         }
5453 #endif
5454         rss = malloc(vi->rss_size * sizeof (*rss), M_CXGBE, M_ZERO | M_WAITOK);
5455         for (i = 0; i < vi->rss_size;) {
5456 #ifdef RSS
5457                 j = rss_get_indirection_to_bucket(i);
5458                 j %= vi->nrxq;
5459                 rxq = &sc->sge.rxq[vi->first_rxq + j];
5460                 rss[i++] = rxq->iq.abs_id;
5461 #else
5462                 for_each_rxq(vi, j, rxq) {
5463                         rss[i++] = rxq->iq.abs_id;
5464                         if (i == vi->rss_size)
5465                                 break;
5466                 }
5467 #endif
5468         }
5469
5470         rc = -t4_config_rss_range(sc, sc->mbox, vi->viid, 0, vi->rss_size, rss,
5471             vi->rss_size);
5472         if (rc != 0) {
5473                 free(rss, M_CXGBE);
5474                 if_printf(ifp, "rss_config failed: %d\n", rc);
5475                 goto done;
5476         }
5477
5478 #ifdef RSS
5479         vi->hashen = hashconfig_to_hashen(hashconfig);
5480
5481         /*
5482          * We may have had to enable some hashes even though the global config
5483          * wants them disabled.  This is a potential problem that must be
5484          * reported to the user.
5485          */
5486         extra = hashen_to_hashconfig(vi->hashen) ^ hashconfig;
5487
5488         /*
5489          * If we consider only the supported hash types, then the enabled hashes
5490          * are a superset of the requested hashes.  In other words, there cannot
5491          * be any supported hash that was requested but not enabled, but there
5492          * can be hashes that were not requested but had to be enabled.
5493          */
5494         extra &= SUPPORTED_RSS_HASHTYPES;
5495         MPASS((extra & hashconfig) == 0);
5496
5497         if (extra) {
5498                 if_printf(ifp,
5499                     "global RSS config (0x%x) cannot be accommodated.\n",
5500                     hashconfig);
5501         }
5502         if (extra & RSS_HASHTYPE_RSS_IPV4)
5503                 if_printf(ifp, "IPv4 2-tuple hashing forced on.\n");
5504         if (extra & RSS_HASHTYPE_RSS_TCP_IPV4)
5505                 if_printf(ifp, "TCP/IPv4 4-tuple hashing forced on.\n");
5506         if (extra & RSS_HASHTYPE_RSS_IPV6)
5507                 if_printf(ifp, "IPv6 2-tuple hashing forced on.\n");
5508         if (extra & RSS_HASHTYPE_RSS_TCP_IPV6)
5509                 if_printf(ifp, "TCP/IPv6 4-tuple hashing forced on.\n");
5510         if (extra & RSS_HASHTYPE_RSS_UDP_IPV4)
5511                 if_printf(ifp, "UDP/IPv4 4-tuple hashing forced on.\n");
5512         if (extra & RSS_HASHTYPE_RSS_UDP_IPV6)
5513                 if_printf(ifp, "UDP/IPv6 4-tuple hashing forced on.\n");
5514 #else
5515         vi->hashen = F_FW_RSS_VI_CONFIG_CMD_IP6FOURTUPEN |
5516             F_FW_RSS_VI_CONFIG_CMD_IP6TWOTUPEN |
5517             F_FW_RSS_VI_CONFIG_CMD_IP4FOURTUPEN |
5518             F_FW_RSS_VI_CONFIG_CMD_IP4TWOTUPEN | F_FW_RSS_VI_CONFIG_CMD_UDPEN;
5519 #endif
5520         rc = -t4_config_vi_rss(sc, sc->mbox, vi->viid, vi->hashen, rss[0], 0, 0);
5521         if (rc != 0) {
5522                 free(rss, M_CXGBE);
5523                 if_printf(ifp, "rss hash/defaultq config failed: %d\n", rc);
5524                 goto done;
5525         }
5526
5527         vi->rss = rss;
5528         vi->flags |= VI_INIT_DONE;
5529 done:
5530         if (rc != 0)
5531                 vi_full_uninit(vi);
5532
5533         return (rc);
5534 }
5535
5536 /*
5537  * Idempotent.
5538  */
5539 int
5540 vi_full_uninit(struct vi_info *vi)
5541 {
5542         struct port_info *pi = vi->pi;
5543         struct adapter *sc = pi->adapter;
5544         int i;
5545         struct sge_rxq *rxq;
5546         struct sge_txq *txq;
5547 #ifdef TCP_OFFLOAD
5548         struct sge_ofld_rxq *ofld_rxq;
5549 #endif
5550 #if defined(TCP_OFFLOAD) || defined(RATELIMIT)
5551         struct sge_wrq *ofld_txq;
5552 #endif
5553
5554         if (vi->flags & VI_INIT_DONE) {
5555
5556                 /* Need to quiesce queues.  */
5557
5558                 /* XXX: Only for the first VI? */
5559                 if (IS_MAIN_VI(vi) && !(sc->flags & IS_VF))
5560                         quiesce_wrq(sc, &sc->sge.ctrlq[pi->port_id]);
5561
5562                 for_each_txq(vi, i, txq) {
5563                         quiesce_txq(sc, txq);
5564                 }
5565
5566 #if defined(TCP_OFFLOAD) || defined(RATELIMIT)
5567                 for_each_ofld_txq(vi, i, ofld_txq) {
5568                         quiesce_wrq(sc, ofld_txq);
5569                 }
5570 #endif
5571
5572                 for_each_rxq(vi, i, rxq) {
5573                         quiesce_iq(sc, &rxq->iq);
5574                         quiesce_fl(sc, &rxq->fl);
5575                 }
5576
5577 #ifdef TCP_OFFLOAD
5578                 for_each_ofld_rxq(vi, i, ofld_rxq) {
5579                         quiesce_iq(sc, &ofld_rxq->iq);
5580                         quiesce_fl(sc, &ofld_rxq->fl);
5581                 }
5582 #endif
5583                 free(vi->rss, M_CXGBE);
5584                 free(vi->nm_rss, M_CXGBE);
5585         }
5586
5587         t4_teardown_vi_queues(vi);
5588         vi->flags &= ~VI_INIT_DONE;
5589
5590         return (0);
5591 }
5592
5593 static void
5594 quiesce_txq(struct adapter *sc, struct sge_txq *txq)
5595 {
5596         struct sge_eq *eq = &txq->eq;
5597         struct sge_qstat *spg = (void *)&eq->desc[eq->sidx];
5598
5599         (void) sc;      /* unused */
5600
5601 #ifdef INVARIANTS
5602         TXQ_LOCK(txq);
5603         MPASS((eq->flags & EQ_ENABLED) == 0);
5604         TXQ_UNLOCK(txq);
5605 #endif
5606
5607         /* Wait for the mp_ring to empty. */
5608         while (!mp_ring_is_idle(txq->r)) {
5609                 mp_ring_check_drainage(txq->r, 0);
5610                 pause("rquiesce", 1);
5611         }
5612
5613         /* Then wait for the hardware to finish. */
5614         while (spg->cidx != htobe16(eq->pidx))
5615                 pause("equiesce", 1);
5616
5617         /* Finally, wait for the driver to reclaim all descriptors. */
5618         while (eq->cidx != eq->pidx)
5619                 pause("dquiesce", 1);
5620 }
5621
5622 static void
5623 quiesce_wrq(struct adapter *sc, struct sge_wrq *wrq)
5624 {
5625
5626         /* XXXTX */
5627 }
5628
5629 static void
5630 quiesce_iq(struct adapter *sc, struct sge_iq *iq)
5631 {
5632         (void) sc;      /* unused */
5633
5634         /* Synchronize with the interrupt handler */
5635         while (!atomic_cmpset_int(&iq->state, IQS_IDLE, IQS_DISABLED))
5636                 pause("iqfree", 1);
5637 }
5638
5639 static void
5640 quiesce_fl(struct adapter *sc, struct sge_fl *fl)
5641 {
5642         mtx_lock(&sc->sfl_lock);
5643         FL_LOCK(fl);
5644         fl->flags |= FL_DOOMED;
5645         FL_UNLOCK(fl);
5646         callout_stop(&sc->sfl_callout);
5647         mtx_unlock(&sc->sfl_lock);
5648
5649         KASSERT((fl->flags & FL_STARVING) == 0,
5650             ("%s: still starving", __func__));
5651 }
5652
5653 static int
5654 t4_alloc_irq(struct adapter *sc, struct irq *irq, int rid,
5655     driver_intr_t *handler, void *arg, char *name)
5656 {
5657         int rc;
5658
5659         irq->rid = rid;
5660         irq->res = bus_alloc_resource_any(sc->dev, SYS_RES_IRQ, &irq->rid,
5661             RF_SHAREABLE | RF_ACTIVE);
5662         if (irq->res == NULL) {
5663                 device_printf(sc->dev,
5664                     "failed to allocate IRQ for rid %d, name %s.\n", rid, name);
5665                 return (ENOMEM);
5666         }
5667
5668         rc = bus_setup_intr(sc->dev, irq->res, INTR_MPSAFE | INTR_TYPE_NET,
5669             NULL, handler, arg, &irq->tag);
5670         if (rc != 0) {
5671                 device_printf(sc->dev,
5672                     "failed to setup interrupt for rid %d, name %s: %d\n",
5673                     rid, name, rc);
5674         } else if (name)
5675                 bus_describe_intr(sc->dev, irq->res, irq->tag, "%s", name);
5676
5677         return (rc);
5678 }
5679
5680 static int
5681 t4_free_irq(struct adapter *sc, struct irq *irq)
5682 {
5683         if (irq->tag)
5684                 bus_teardown_intr(sc->dev, irq->res, irq->tag);
5685         if (irq->res)
5686                 bus_release_resource(sc->dev, SYS_RES_IRQ, irq->rid, irq->res);
5687
5688         bzero(irq, sizeof(*irq));
5689
5690         return (0);
5691 }
5692
5693 static void
5694 get_regs(struct adapter *sc, struct t4_regdump *regs, uint8_t *buf)
5695 {
5696
5697         regs->version = chip_id(sc) | chip_rev(sc) << 10;
5698         t4_get_regs(sc, buf, regs->len);
5699 }
5700
5701 #define A_PL_INDIR_CMD  0x1f8
5702
5703 #define S_PL_AUTOINC    31
5704 #define M_PL_AUTOINC    0x1U
5705 #define V_PL_AUTOINC(x) ((x) << S_PL_AUTOINC)
5706 #define G_PL_AUTOINC(x) (((x) >> S_PL_AUTOINC) & M_PL_AUTOINC)
5707
5708 #define S_PL_VFID       20
5709 #define M_PL_VFID       0xffU
5710 #define V_PL_VFID(x)    ((x) << S_PL_VFID)
5711 #define G_PL_VFID(x)    (((x) >> S_PL_VFID) & M_PL_VFID)
5712
5713 #define S_PL_ADDR       0
5714 #define M_PL_ADDR       0xfffffU
5715 #define V_PL_ADDR(x)    ((x) << S_PL_ADDR)
5716 #define G_PL_ADDR(x)    (((x) >> S_PL_ADDR) & M_PL_ADDR)
5717
5718 #define A_PL_INDIR_DATA 0x1fc
5719
5720 static uint64_t
5721 read_vf_stat(struct adapter *sc, unsigned int viid, int reg)
5722 {
5723         u32 stats[2];
5724
5725         mtx_assert(&sc->reg_lock, MA_OWNED);
5726         if (sc->flags & IS_VF) {
5727                 stats[0] = t4_read_reg(sc, VF_MPS_REG(reg));
5728                 stats[1] = t4_read_reg(sc, VF_MPS_REG(reg + 4));
5729         } else {
5730                 t4_write_reg(sc, A_PL_INDIR_CMD, V_PL_AUTOINC(1) |
5731                     V_PL_VFID(G_FW_VIID_VIN(viid)) |
5732                     V_PL_ADDR(VF_MPS_REG(reg)));
5733                 stats[0] = t4_read_reg(sc, A_PL_INDIR_DATA);
5734                 stats[1] = t4_read_reg(sc, A_PL_INDIR_DATA);
5735         }
5736         return (((uint64_t)stats[1]) << 32 | stats[0]);
5737 }
5738
5739 static void
5740 t4_get_vi_stats(struct adapter *sc, unsigned int viid,
5741     struct fw_vi_stats_vf *stats)
5742 {
5743
5744 #define GET_STAT(name) \
5745         read_vf_stat(sc, viid, A_MPS_VF_STAT_##name##_L)
5746
5747         stats->tx_bcast_bytes    = GET_STAT(TX_VF_BCAST_BYTES);
5748         stats->tx_bcast_frames   = GET_STAT(TX_VF_BCAST_FRAMES);
5749         stats->tx_mcast_bytes    = GET_STAT(TX_VF_MCAST_BYTES);
5750         stats->tx_mcast_frames   = GET_STAT(TX_VF_MCAST_FRAMES);
5751         stats->tx_ucast_bytes    = GET_STAT(TX_VF_UCAST_BYTES);
5752         stats->tx_ucast_frames   = GET_STAT(TX_VF_UCAST_FRAMES);
5753         stats->tx_drop_frames    = GET_STAT(TX_VF_DROP_FRAMES);
5754         stats->tx_offload_bytes  = GET_STAT(TX_VF_OFFLOAD_BYTES);
5755         stats->tx_offload_frames = GET_STAT(TX_VF_OFFLOAD_FRAMES);
5756         stats->rx_bcast_bytes    = GET_STAT(RX_VF_BCAST_BYTES);
5757         stats->rx_bcast_frames   = GET_STAT(RX_VF_BCAST_FRAMES);
5758         stats->rx_mcast_bytes    = GET_STAT(RX_VF_MCAST_BYTES);
5759         stats->rx_mcast_frames   = GET_STAT(RX_VF_MCAST_FRAMES);
5760         stats->rx_ucast_bytes    = GET_STAT(RX_VF_UCAST_BYTES);
5761         stats->rx_ucast_frames   = GET_STAT(RX_VF_UCAST_FRAMES);
5762         stats->rx_err_frames     = GET_STAT(RX_VF_ERR_FRAMES);
5763
5764 #undef GET_STAT
5765 }
5766
5767 static void
5768 t4_clr_vi_stats(struct adapter *sc, unsigned int viid)
5769 {
5770         int reg;
5771
5772         t4_write_reg(sc, A_PL_INDIR_CMD, V_PL_AUTOINC(1) |
5773             V_PL_VFID(G_FW_VIID_VIN(viid)) |
5774             V_PL_ADDR(VF_MPS_REG(A_MPS_VF_STAT_TX_VF_BCAST_BYTES_L)));
5775         for (reg = A_MPS_VF_STAT_TX_VF_BCAST_BYTES_L;
5776              reg <= A_MPS_VF_STAT_RX_VF_ERR_FRAMES_H; reg += 4)
5777                 t4_write_reg(sc, A_PL_INDIR_DATA, 0);
5778 }
5779
5780 static void
5781 vi_refresh_stats(struct adapter *sc, struct vi_info *vi)
5782 {
5783         struct timeval tv;
5784         const struct timeval interval = {0, 250000};    /* 250ms */
5785
5786         if (!(vi->flags & VI_INIT_DONE))
5787                 return;
5788
5789         getmicrotime(&tv);
5790         timevalsub(&tv, &interval);
5791         if (timevalcmp(&tv, &vi->last_refreshed, <))
5792                 return;
5793
5794         mtx_lock(&sc->reg_lock);
5795         t4_get_vi_stats(sc, vi->viid, &vi->stats);
5796         getmicrotime(&vi->last_refreshed);
5797         mtx_unlock(&sc->reg_lock);
5798 }
5799
5800 static void
5801 cxgbe_refresh_stats(struct adapter *sc, struct port_info *pi)
5802 {
5803         u_int i, v, tnl_cong_drops, bg_map;
5804         struct timeval tv;
5805         const struct timeval interval = {0, 250000};    /* 250ms */
5806
5807         getmicrotime(&tv);
5808         timevalsub(&tv, &interval);
5809         if (timevalcmp(&tv, &pi->last_refreshed, <))
5810                 return;
5811
5812         tnl_cong_drops = 0;
5813         t4_get_port_stats(sc, pi->tx_chan, &pi->stats);
5814         bg_map = pi->mps_bg_map;
5815         while (bg_map) {
5816                 i = ffs(bg_map) - 1;
5817                 mtx_lock(&sc->reg_lock);
5818                 t4_read_indirect(sc, A_TP_MIB_INDEX, A_TP_MIB_DATA, &v, 1,
5819                     A_TP_MIB_TNL_CNG_DROP_0 + i);
5820                 mtx_unlock(&sc->reg_lock);
5821                 tnl_cong_drops += v;
5822                 bg_map &= ~(1 << i);
5823         }
5824         pi->tnl_cong_drops = tnl_cong_drops;
5825         getmicrotime(&pi->last_refreshed);
5826 }
5827
5828 static void
5829 cxgbe_tick(void *arg)
5830 {
5831         struct port_info *pi = arg;
5832         struct adapter *sc = pi->adapter;
5833
5834         PORT_LOCK_ASSERT_OWNED(pi);
5835         cxgbe_refresh_stats(sc, pi);
5836
5837         callout_schedule(&pi->tick, hz);
5838 }
5839
5840 void
5841 vi_tick(void *arg)
5842 {
5843         struct vi_info *vi = arg;
5844         struct adapter *sc = vi->pi->adapter;
5845
5846         vi_refresh_stats(sc, vi);
5847
5848         callout_schedule(&vi->tick, hz);
5849 }
5850
5851 /*
5852  * Should match fw_caps_config_<foo> enums in t4fw_interface.h
5853  */
5854 static char *caps_decoder[] = {
5855         "\20\001IPMI\002NCSI",                          /* 0: NBM */
5856         "\20\001PPP\002QFC\003DCBX",                    /* 1: link */
5857         "\20\001INGRESS\002EGRESS",                     /* 2: switch */
5858         "\20\001NIC\002VM\003IDS\004UM\005UM_ISGL"      /* 3: NIC */
5859             "\006HASHFILTER\007ETHOFLD",
5860         "\20\001TOE",                                   /* 4: TOE */
5861         "\20\001RDDP\002RDMAC",                         /* 5: RDMA */
5862         "\20\001INITIATOR_PDU\002TARGET_PDU"            /* 6: iSCSI */
5863             "\003INITIATOR_CNXOFLD\004TARGET_CNXOFLD"
5864             "\005INITIATOR_SSNOFLD\006TARGET_SSNOFLD"
5865             "\007T10DIF"
5866             "\010INITIATOR_CMDOFLD\011TARGET_CMDOFLD",
5867         "\20\001LOOKASIDE\002TLSKEYS",                  /* 7: Crypto */
5868         "\20\001INITIATOR\002TARGET\003CTRL_OFLD"       /* 8: FCoE */
5869                     "\004PO_INITIATOR\005PO_TARGET",
5870 };
5871
5872 void
5873 t4_sysctls(struct adapter *sc)
5874 {
5875         struct sysctl_ctx_list *ctx;
5876         struct sysctl_oid *oid;
5877         struct sysctl_oid_list *children, *c0;
5878         static char *doorbells = {"\20\1UDB\2WCWR\3UDBWC\4KDB"};
5879
5880         ctx = device_get_sysctl_ctx(sc->dev);
5881
5882         /*
5883          * dev.t4nex.X.
5884          */
5885         oid = device_get_sysctl_tree(sc->dev);
5886         c0 = children = SYSCTL_CHILDREN(oid);
5887
5888         sc->sc_do_rxcopy = 1;
5889         SYSCTL_ADD_INT(ctx, children, OID_AUTO, "do_rx_copy", CTLFLAG_RW,
5890             &sc->sc_do_rxcopy, 1, "Do RX copy of small frames");
5891
5892         SYSCTL_ADD_INT(ctx, children, OID_AUTO, "nports", CTLFLAG_RD, NULL,
5893             sc->params.nports, "# of ports");
5894
5895         SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "doorbells",
5896             CTLTYPE_STRING | CTLFLAG_RD, doorbells, (uintptr_t)&sc->doorbells,
5897             sysctl_bitfield_8b, "A", "available doorbells");
5898
5899         SYSCTL_ADD_INT(ctx, children, OID_AUTO, "core_clock", CTLFLAG_RD, NULL,
5900             sc->params.vpd.cclk, "core clock frequency (in KHz)");
5901
5902         SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "holdoff_timers",
5903             CTLTYPE_STRING | CTLFLAG_RD, sc->params.sge.timer_val,
5904             sizeof(sc->params.sge.timer_val), sysctl_int_array, "A",
5905             "interrupt holdoff timer values (us)");
5906
5907         SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "holdoff_pkt_counts",
5908             CTLTYPE_STRING | CTLFLAG_RD, sc->params.sge.counter_val,
5909             sizeof(sc->params.sge.counter_val), sysctl_int_array, "A",
5910             "interrupt holdoff packet counter values");
5911
5912         t4_sge_sysctls(sc, ctx, children);
5913
5914         sc->lro_timeout = 100;
5915         SYSCTL_ADD_INT(ctx, children, OID_AUTO, "lro_timeout", CTLFLAG_RW,
5916             &sc->lro_timeout, 0, "lro inactive-flush timeout (in us)");
5917
5918         SYSCTL_ADD_INT(ctx, children, OID_AUTO, "dflags", CTLFLAG_RW,
5919             &sc->debug_flags, 0, "flags to enable runtime debugging");
5920
5921         SYSCTL_ADD_STRING(ctx, children, OID_AUTO, "tp_version",
5922             CTLFLAG_RD, sc->tp_version, 0, "TP microcode version");
5923
5924         SYSCTL_ADD_STRING(ctx, children, OID_AUTO, "firmware_version",
5925             CTLFLAG_RD, sc->fw_version, 0, "firmware version");
5926
5927         if (sc->flags & IS_VF)
5928                 return;
5929
5930         SYSCTL_ADD_INT(ctx, children, OID_AUTO, "hw_revision", CTLFLAG_RD,
5931             NULL, chip_rev(sc), "chip hardware revision");
5932
5933         SYSCTL_ADD_STRING(ctx, children, OID_AUTO, "sn",
5934             CTLFLAG_RD, sc->params.vpd.sn, 0, "serial number");
5935
5936         SYSCTL_ADD_STRING(ctx, children, OID_AUTO, "pn",
5937             CTLFLAG_RD, sc->params.vpd.pn, 0, "part number");
5938
5939         SYSCTL_ADD_STRING(ctx, children, OID_AUTO, "ec",
5940             CTLFLAG_RD, sc->params.vpd.ec, 0, "engineering change");
5941
5942         SYSCTL_ADD_STRING(ctx, children, OID_AUTO, "md_version",
5943             CTLFLAG_RD, sc->params.vpd.md, 0, "manufacturing diags version");
5944
5945         SYSCTL_ADD_STRING(ctx, children, OID_AUTO, "na",
5946             CTLFLAG_RD, sc->params.vpd.na, 0, "network address");
5947
5948         SYSCTL_ADD_STRING(ctx, children, OID_AUTO, "er_version", CTLFLAG_RD,
5949             sc->er_version, 0, "expansion ROM version");
5950
5951         SYSCTL_ADD_STRING(ctx, children, OID_AUTO, "bs_version", CTLFLAG_RD,
5952             sc->bs_version, 0, "bootstrap firmware version");
5953
5954         SYSCTL_ADD_UINT(ctx, children, OID_AUTO, "scfg_version", CTLFLAG_RD,
5955             NULL, sc->params.scfg_vers, "serial config version");
5956
5957         SYSCTL_ADD_UINT(ctx, children, OID_AUTO, "vpd_version", CTLFLAG_RD,
5958             NULL, sc->params.vpd_vers, "VPD version");
5959
5960         SYSCTL_ADD_STRING(ctx, children, OID_AUTO, "cf",
5961             CTLFLAG_RD, sc->cfg_file, 0, "configuration file");
5962
5963         SYSCTL_ADD_UINT(ctx, children, OID_AUTO, "cfcsum", CTLFLAG_RD, NULL,
5964             sc->cfcsum, "config file checksum");
5965
5966 #define SYSCTL_CAP(name, n, text) \
5967         SYSCTL_ADD_PROC(ctx, children, OID_AUTO, #name, \
5968             CTLTYPE_STRING | CTLFLAG_RD, caps_decoder[n], (uintptr_t)&sc->name, \
5969             sysctl_bitfield_16b, "A", "available " text " capabilities")
5970
5971         SYSCTL_CAP(nbmcaps, 0, "NBM");
5972         SYSCTL_CAP(linkcaps, 1, "link");
5973         SYSCTL_CAP(switchcaps, 2, "switch");
5974         SYSCTL_CAP(niccaps, 3, "NIC");
5975         SYSCTL_CAP(toecaps, 4, "TCP offload");
5976         SYSCTL_CAP(rdmacaps, 5, "RDMA");
5977         SYSCTL_CAP(iscsicaps, 6, "iSCSI");
5978         SYSCTL_CAP(cryptocaps, 7, "crypto");
5979         SYSCTL_CAP(fcoecaps, 8, "FCoE");
5980 #undef SYSCTL_CAP
5981
5982         SYSCTL_ADD_INT(ctx, children, OID_AUTO, "nfilters", CTLFLAG_RD,
5983             NULL, sc->tids.nftids, "number of filters");
5984
5985         SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "temperature", CTLTYPE_INT |
5986             CTLFLAG_RD, sc, 0, sysctl_temperature, "I",
5987             "chip temperature (in Celsius)");
5988
5989         SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "loadavg", CTLTYPE_STRING |
5990             CTLFLAG_RD, sc, 0, sysctl_loadavg, "A",
5991             "microprocessor load averages (debug firmwares only)");
5992
5993         SYSCTL_ADD_INT(ctx, children, OID_AUTO, "core_vdd", CTLFLAG_RD,
5994             &sc->params.core_vdd, 0, "core Vdd (in mV)");
5995
5996         SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "local_cpus",
5997             CTLTYPE_STRING | CTLFLAG_RD, sc, LOCAL_CPUS,
5998             sysctl_cpus, "A", "local CPUs");
5999
6000         SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "intr_cpus",
6001             CTLTYPE_STRING | CTLFLAG_RD, sc, INTR_CPUS,
6002             sysctl_cpus, "A", "preferred CPUs for interrupts");
6003
6004         /*
6005          * dev.t4nex.X.misc.  Marked CTLFLAG_SKIP to avoid information overload.
6006          */
6007         oid = SYSCTL_ADD_NODE(ctx, c0, OID_AUTO, "misc",
6008             CTLFLAG_RD | CTLFLAG_SKIP, NULL,
6009             "logs and miscellaneous information");
6010         children = SYSCTL_CHILDREN(oid);
6011
6012         SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "cctrl",
6013             CTLTYPE_STRING | CTLFLAG_RD, sc, 0,
6014             sysctl_cctrl, "A", "congestion control");
6015
6016         SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "cim_ibq_tp0",
6017             CTLTYPE_STRING | CTLFLAG_RD, sc, 0,
6018             sysctl_cim_ibq_obq, "A", "CIM IBQ 0 (TP0)");
6019
6020         SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "cim_ibq_tp1",
6021             CTLTYPE_STRING | CTLFLAG_RD, sc, 1,
6022             sysctl_cim_ibq_obq, "A", "CIM IBQ 1 (TP1)");
6023
6024         SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "cim_ibq_ulp",
6025             CTLTYPE_STRING | CTLFLAG_RD, sc, 2,
6026             sysctl_cim_ibq_obq, "A", "CIM IBQ 2 (ULP)");
6027
6028         SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "cim_ibq_sge0",
6029             CTLTYPE_STRING | CTLFLAG_RD, sc, 3,
6030             sysctl_cim_ibq_obq, "A", "CIM IBQ 3 (SGE0)");
6031
6032         SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "cim_ibq_sge1",
6033             CTLTYPE_STRING | CTLFLAG_RD, sc, 4,
6034             sysctl_cim_ibq_obq, "A", "CIM IBQ 4 (SGE1)");
6035
6036         SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "cim_ibq_ncsi",
6037             CTLTYPE_STRING | CTLFLAG_RD, sc, 5,
6038             sysctl_cim_ibq_obq, "A", "CIM IBQ 5 (NCSI)");
6039
6040         SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "cim_la",
6041             CTLTYPE_STRING | CTLFLAG_RD, sc, 0, sysctl_cim_la,
6042             "A", "CIM logic analyzer");
6043
6044         SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "cim_ma_la",
6045             CTLTYPE_STRING | CTLFLAG_RD, sc, 0,
6046             sysctl_cim_ma_la, "A", "CIM MA logic analyzer");
6047
6048         SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "cim_obq_ulp0",
6049             CTLTYPE_STRING | CTLFLAG_RD, sc, 0 + CIM_NUM_IBQ,
6050             sysctl_cim_ibq_obq, "A", "CIM OBQ 0 (ULP0)");
6051
6052         SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "cim_obq_ulp1",
6053             CTLTYPE_STRING | CTLFLAG_RD, sc, 1 + CIM_NUM_IBQ,
6054             sysctl_cim_ibq_obq, "A", "CIM OBQ 1 (ULP1)");
6055
6056         SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "cim_obq_ulp2",
6057             CTLTYPE_STRING | CTLFLAG_RD, sc, 2 + CIM_NUM_IBQ,
6058             sysctl_cim_ibq_obq, "A", "CIM OBQ 2 (ULP2)");
6059
6060         SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "cim_obq_ulp3",
6061             CTLTYPE_STRING | CTLFLAG_RD, sc, 3 + CIM_NUM_IBQ,
6062             sysctl_cim_ibq_obq, "A", "CIM OBQ 3 (ULP3)");
6063
6064         SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "cim_obq_sge",
6065             CTLTYPE_STRING | CTLFLAG_RD, sc, 4 + CIM_NUM_IBQ,
6066             sysctl_cim_ibq_obq, "A", "CIM OBQ 4 (SGE)");
6067
6068         SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "cim_obq_ncsi",
6069             CTLTYPE_STRING | CTLFLAG_RD, sc, 5 + CIM_NUM_IBQ,
6070             sysctl_cim_ibq_obq, "A", "CIM OBQ 5 (NCSI)");
6071
6072         if (chip_id(sc) > CHELSIO_T4) {
6073                 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "cim_obq_sge0_rx",
6074                     CTLTYPE_STRING | CTLFLAG_RD, sc, 6 + CIM_NUM_IBQ,
6075                     sysctl_cim_ibq_obq, "A", "CIM OBQ 6 (SGE0-RX)");
6076
6077                 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "cim_obq_sge1_rx",
6078                     CTLTYPE_STRING | CTLFLAG_RD, sc, 7 + CIM_NUM_IBQ,
6079                     sysctl_cim_ibq_obq, "A", "CIM OBQ 7 (SGE1-RX)");
6080         }
6081
6082         SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "cim_pif_la",
6083             CTLTYPE_STRING | CTLFLAG_RD, sc, 0,
6084             sysctl_cim_pif_la, "A", "CIM PIF logic analyzer");
6085
6086         SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "cim_qcfg",
6087             CTLTYPE_STRING | CTLFLAG_RD, sc, 0,
6088             sysctl_cim_qcfg, "A", "CIM queue configuration");
6089
6090         SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "cpl_stats",
6091             CTLTYPE_STRING | CTLFLAG_RD, sc, 0,
6092             sysctl_cpl_stats, "A", "CPL statistics");
6093
6094         SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "ddp_stats",
6095             CTLTYPE_STRING | CTLFLAG_RD, sc, 0,
6096             sysctl_ddp_stats, "A", "non-TCP DDP statistics");
6097
6098         SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "devlog",
6099             CTLTYPE_STRING | CTLFLAG_RD, sc, 0,
6100             sysctl_devlog, "A", "firmware's device log");
6101
6102         SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "fcoe_stats",
6103             CTLTYPE_STRING | CTLFLAG_RD, sc, 0,
6104             sysctl_fcoe_stats, "A", "FCoE statistics");
6105
6106         SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "hw_sched",
6107             CTLTYPE_STRING | CTLFLAG_RD, sc, 0,
6108             sysctl_hw_sched, "A", "hardware scheduler ");
6109
6110         SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "l2t",
6111             CTLTYPE_STRING | CTLFLAG_RD, sc, 0,
6112             sysctl_l2t, "A", "hardware L2 table");
6113
6114         SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "smt",
6115             CTLTYPE_STRING | CTLFLAG_RD, sc, 0,
6116             sysctl_smt, "A", "hardware source MAC table");
6117
6118 #ifdef INET6
6119         SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "clip",
6120             CTLTYPE_STRING | CTLFLAG_RD, sc, 0,
6121             sysctl_clip, "A", "active CLIP table entries");
6122 #endif
6123
6124         SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "lb_stats",
6125             CTLTYPE_STRING | CTLFLAG_RD, sc, 0,
6126             sysctl_lb_stats, "A", "loopback statistics");
6127
6128         SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "meminfo",
6129             CTLTYPE_STRING | CTLFLAG_RD, sc, 0,
6130             sysctl_meminfo, "A", "memory regions");
6131
6132         SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "mps_tcam",
6133             CTLTYPE_STRING | CTLFLAG_RD, sc, 0,
6134             chip_id(sc) <= CHELSIO_T5 ? sysctl_mps_tcam : sysctl_mps_tcam_t6,
6135             "A", "MPS TCAM entries");
6136
6137         SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "path_mtus",
6138             CTLTYPE_STRING | CTLFLAG_RD, sc, 0,
6139             sysctl_path_mtus, "A", "path MTUs");
6140
6141         SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "pm_stats",
6142             CTLTYPE_STRING | CTLFLAG_RD, sc, 0,
6143             sysctl_pm_stats, "A", "PM statistics");
6144
6145         SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "rdma_stats",
6146             CTLTYPE_STRING | CTLFLAG_RD, sc, 0,
6147             sysctl_rdma_stats, "A", "RDMA statistics");
6148
6149         SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "tcp_stats",
6150             CTLTYPE_STRING | CTLFLAG_RD, sc, 0,
6151             sysctl_tcp_stats, "A", "TCP statistics");
6152
6153         SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "tids",
6154             CTLTYPE_STRING | CTLFLAG_RD, sc, 0,
6155             sysctl_tids, "A", "TID information");
6156
6157         SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "tp_err_stats",
6158             CTLTYPE_STRING | CTLFLAG_RD, sc, 0,
6159             sysctl_tp_err_stats, "A", "TP error statistics");
6160
6161         SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "tp_la_mask",
6162             CTLTYPE_INT | CTLFLAG_RW, sc, 0, sysctl_tp_la_mask, "I",
6163             "TP logic analyzer event capture mask");
6164
6165         SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "tp_la",
6166             CTLTYPE_STRING | CTLFLAG_RD, sc, 0,
6167             sysctl_tp_la, "A", "TP logic analyzer");
6168
6169         SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "tx_rate",
6170             CTLTYPE_STRING | CTLFLAG_RD, sc, 0,
6171             sysctl_tx_rate, "A", "Tx rate");
6172
6173         SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "ulprx_la",
6174             CTLTYPE_STRING | CTLFLAG_RD, sc, 0,
6175             sysctl_ulprx_la, "A", "ULPRX logic analyzer");
6176
6177         if (chip_id(sc) >= CHELSIO_T5) {
6178                 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "wcwr_stats",
6179                     CTLTYPE_STRING | CTLFLAG_RD, sc, 0,
6180                     sysctl_wcwr_stats, "A", "write combined work requests");
6181         }
6182
6183 #ifdef TCP_OFFLOAD
6184         if (is_offload(sc)) {
6185                 int i;
6186                 char s[4];
6187
6188                 /*
6189                  * dev.t4nex.X.toe.
6190                  */
6191                 oid = SYSCTL_ADD_NODE(ctx, c0, OID_AUTO, "toe", CTLFLAG_RD,
6192                     NULL, "TOE parameters");
6193                 children = SYSCTL_CHILDREN(oid);
6194
6195                 sc->tt.cong_algorithm = -1;
6196                 SYSCTL_ADD_INT(ctx, children, OID_AUTO, "cong_algorithm",
6197                     CTLFLAG_RW, &sc->tt.cong_algorithm, 0, "congestion control "
6198                     "(-1 = default, 0 = reno, 1 = tahoe, 2 = newreno, "
6199                     "3 = highspeed)");
6200
6201                 sc->tt.sndbuf = 256 * 1024;
6202                 SYSCTL_ADD_INT(ctx, children, OID_AUTO, "sndbuf", CTLFLAG_RW,
6203                     &sc->tt.sndbuf, 0, "max hardware send buffer size");
6204
6205                 sc->tt.ddp = 0;
6206                 SYSCTL_ADD_INT(ctx, children, OID_AUTO, "ddp", CTLFLAG_RW,
6207                     &sc->tt.ddp, 0, "DDP allowed");
6208
6209                 sc->tt.rx_coalesce = 1;
6210                 SYSCTL_ADD_INT(ctx, children, OID_AUTO, "rx_coalesce",
6211                     CTLFLAG_RW, &sc->tt.rx_coalesce, 0, "receive coalescing");
6212
6213                 sc->tt.tls = 0;
6214                 SYSCTL_ADD_INT(ctx, children, OID_AUTO, "tls", CTLFLAG_RW,
6215                     &sc->tt.tls, 0, "Inline TLS allowed");
6216
6217                 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "tls_rx_ports",
6218                     CTLTYPE_INT | CTLFLAG_RW, sc, 0, sysctl_tls_rx_ports,
6219                     "I", "TCP ports that use inline TLS+TOE RX");
6220
6221                 sc->tt.tx_align = 1;
6222                 SYSCTL_ADD_INT(ctx, children, OID_AUTO, "tx_align",
6223                     CTLFLAG_RW, &sc->tt.tx_align, 0, "chop and align payload");
6224
6225                 sc->tt.tx_zcopy = 0;
6226                 SYSCTL_ADD_INT(ctx, children, OID_AUTO, "tx_zcopy",
6227                     CTLFLAG_RW, &sc->tt.tx_zcopy, 0,
6228                     "Enable zero-copy aio_write(2)");
6229
6230                 sc->tt.cop_managed_offloading = !!t4_cop_managed_offloading;
6231                 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
6232                     "cop_managed_offloading", CTLFLAG_RW,
6233                     &sc->tt.cop_managed_offloading, 0,
6234                     "COP (Connection Offload Policy) controls all TOE offload");
6235
6236                 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "timer_tick",
6237                     CTLTYPE_STRING | CTLFLAG_RD, sc, 0, sysctl_tp_tick, "A",
6238                     "TP timer tick (us)");
6239
6240                 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "timestamp_tick",
6241                     CTLTYPE_STRING | CTLFLAG_RD, sc, 1, sysctl_tp_tick, "A",
6242                     "TCP timestamp tick (us)");
6243
6244                 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "dack_tick",
6245                     CTLTYPE_STRING | CTLFLAG_RD, sc, 2, sysctl_tp_tick, "A",
6246                     "DACK tick (us)");
6247
6248                 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "dack_timer",
6249                     CTLTYPE_UINT | CTLFLAG_RD, sc, 0, sysctl_tp_dack_timer,
6250                     "IU", "DACK timer (us)");
6251
6252                 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "rexmt_min",
6253                     CTLTYPE_ULONG | CTLFLAG_RD, sc, A_TP_RXT_MIN,
6254                     sysctl_tp_timer, "LU", "Minimum retransmit interval (us)");
6255
6256                 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "rexmt_max",
6257                     CTLTYPE_ULONG | CTLFLAG_RD, sc, A_TP_RXT_MAX,
6258                     sysctl_tp_timer, "LU", "Maximum retransmit interval (us)");
6259
6260                 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "persist_min",
6261                     CTLTYPE_ULONG | CTLFLAG_RD, sc, A_TP_PERS_MIN,
6262                     sysctl_tp_timer, "LU", "Persist timer min (us)");
6263
6264                 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "persist_max",
6265                     CTLTYPE_ULONG | CTLFLAG_RD, sc, A_TP_PERS_MAX,
6266                     sysctl_tp_timer, "LU", "Persist timer max (us)");
6267
6268                 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "keepalive_idle",
6269                     CTLTYPE_ULONG | CTLFLAG_RD, sc, A_TP_KEEP_IDLE,
6270                     sysctl_tp_timer, "LU", "Keepalive idle timer (us)");
6271
6272                 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "keepalive_interval",
6273                     CTLTYPE_ULONG | CTLFLAG_RD, sc, A_TP_KEEP_INTVL,
6274                     sysctl_tp_timer, "LU", "Keepalive interval timer (us)");
6275
6276                 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "initial_srtt",
6277                     CTLTYPE_ULONG | CTLFLAG_RD, sc, A_TP_INIT_SRTT,
6278                     sysctl_tp_timer, "LU", "Initial SRTT (us)");
6279
6280                 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "finwait2_timer",
6281                     CTLTYPE_ULONG | CTLFLAG_RD, sc, A_TP_FINWAIT2_TIMER,
6282                     sysctl_tp_timer, "LU", "FINWAIT2 timer (us)");
6283
6284                 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "syn_rexmt_count",
6285                     CTLTYPE_UINT | CTLFLAG_RD, sc, S_SYNSHIFTMAX,
6286                     sysctl_tp_shift_cnt, "IU",
6287                     "Number of SYN retransmissions before abort");
6288
6289                 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "rexmt_count",
6290                     CTLTYPE_UINT | CTLFLAG_RD, sc, S_RXTSHIFTMAXR2,
6291                     sysctl_tp_shift_cnt, "IU",
6292                     "Number of retransmissions before abort");
6293
6294                 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "keepalive_count",
6295                     CTLTYPE_UINT | CTLFLAG_RD, sc, S_KEEPALIVEMAXR2,
6296                     sysctl_tp_shift_cnt, "IU",
6297                     "Number of keepalive probes before abort");
6298
6299                 oid = SYSCTL_ADD_NODE(ctx, children, OID_AUTO, "rexmt_backoff",
6300                     CTLFLAG_RD, NULL, "TOE retransmit backoffs");
6301                 children = SYSCTL_CHILDREN(oid);
6302                 for (i = 0; i < 16; i++) {
6303                         snprintf(s, sizeof(s), "%u", i);
6304                         SYSCTL_ADD_PROC(ctx, children, OID_AUTO, s,
6305                             CTLTYPE_UINT | CTLFLAG_RD, sc, i, sysctl_tp_backoff,
6306                             "IU", "TOE retransmit backoff");
6307                 }
6308         }
6309 #endif
6310 }
6311
6312 void
6313 vi_sysctls(struct vi_info *vi)
6314 {
6315         struct sysctl_ctx_list *ctx;
6316         struct sysctl_oid *oid;
6317         struct sysctl_oid_list *children;
6318
6319         ctx = device_get_sysctl_ctx(vi->dev);
6320
6321         /*
6322          * dev.v?(cxgbe|cxl).X.
6323          */
6324         oid = device_get_sysctl_tree(vi->dev);
6325         children = SYSCTL_CHILDREN(oid);
6326
6327         SYSCTL_ADD_UINT(ctx, children, OID_AUTO, "viid", CTLFLAG_RD, NULL,
6328             vi->viid, "VI identifer");
6329         SYSCTL_ADD_INT(ctx, children, OID_AUTO, "nrxq", CTLFLAG_RD,
6330             &vi->nrxq, 0, "# of rx queues");
6331         SYSCTL_ADD_INT(ctx, children, OID_AUTO, "ntxq", CTLFLAG_RD,
6332             &vi->ntxq, 0, "# of tx queues");
6333         SYSCTL_ADD_INT(ctx, children, OID_AUTO, "first_rxq", CTLFLAG_RD,
6334             &vi->first_rxq, 0, "index of first rx queue");
6335         SYSCTL_ADD_INT(ctx, children, OID_AUTO, "first_txq", CTLFLAG_RD,
6336             &vi->first_txq, 0, "index of first tx queue");
6337         SYSCTL_ADD_UINT(ctx, children, OID_AUTO, "rss_base", CTLFLAG_RD, NULL,
6338             vi->rss_base, "start of RSS indirection table");
6339         SYSCTL_ADD_UINT(ctx, children, OID_AUTO, "rss_size", CTLFLAG_RD, NULL,
6340             vi->rss_size, "size of RSS indirection table");
6341
6342         if (IS_MAIN_VI(vi)) {
6343                 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "rsrv_noflowq",
6344                     CTLTYPE_INT | CTLFLAG_RW, vi, 0, sysctl_noflowq, "IU",
6345                     "Reserve queue 0 for non-flowid packets");
6346         }
6347
6348 #ifdef TCP_OFFLOAD
6349         if (vi->nofldrxq != 0) {
6350                 SYSCTL_ADD_INT(ctx, children, OID_AUTO, "nofldrxq", CTLFLAG_RD,
6351                     &vi->nofldrxq, 0,
6352                     "# of rx queues for offloaded TCP connections");
6353                 SYSCTL_ADD_INT(ctx, children, OID_AUTO, "first_ofld_rxq",
6354                     CTLFLAG_RD, &vi->first_ofld_rxq, 0,
6355                     "index of first TOE rx queue");
6356                 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "holdoff_tmr_idx_ofld",
6357                     CTLTYPE_INT | CTLFLAG_RW, vi, 0,
6358                     sysctl_holdoff_tmr_idx_ofld, "I",
6359                     "holdoff timer index for TOE queues");
6360                 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "holdoff_pktc_idx_ofld",
6361                     CTLTYPE_INT | CTLFLAG_RW, vi, 0,
6362                     sysctl_holdoff_pktc_idx_ofld, "I",
6363                     "holdoff packet counter index for TOE queues");
6364         }
6365 #endif
6366 #if defined(TCP_OFFLOAD) || defined(RATELIMIT)
6367         if (vi->nofldtxq != 0) {
6368                 SYSCTL_ADD_INT(ctx, children, OID_AUTO, "nofldtxq", CTLFLAG_RD,
6369                     &vi->nofldtxq, 0,
6370                     "# of tx queues for TOE/ETHOFLD");
6371                 SYSCTL_ADD_INT(ctx, children, OID_AUTO, "first_ofld_txq",
6372                     CTLFLAG_RD, &vi->first_ofld_txq, 0,
6373                     "index of first TOE/ETHOFLD tx queue");
6374         }
6375 #endif
6376 #ifdef DEV_NETMAP
6377         if (vi->nnmrxq != 0) {
6378                 SYSCTL_ADD_INT(ctx, children, OID_AUTO, "nnmrxq", CTLFLAG_RD,
6379                     &vi->nnmrxq, 0, "# of netmap rx queues");
6380                 SYSCTL_ADD_INT(ctx, children, OID_AUTO, "nnmtxq", CTLFLAG_RD,
6381                     &vi->nnmtxq, 0, "# of netmap tx queues");
6382                 SYSCTL_ADD_INT(ctx, children, OID_AUTO, "first_nm_rxq",
6383                     CTLFLAG_RD, &vi->first_nm_rxq, 0,
6384                     "index of first netmap rx queue");
6385                 SYSCTL_ADD_INT(ctx, children, OID_AUTO, "first_nm_txq",
6386                     CTLFLAG_RD, &vi->first_nm_txq, 0,
6387                     "index of first netmap tx queue");
6388         }
6389 #endif
6390
6391         SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "holdoff_tmr_idx",
6392             CTLTYPE_INT | CTLFLAG_RW, vi, 0, sysctl_holdoff_tmr_idx, "I",
6393             "holdoff timer index");
6394         SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "holdoff_pktc_idx",
6395             CTLTYPE_INT | CTLFLAG_RW, vi, 0, sysctl_holdoff_pktc_idx, "I",
6396             "holdoff packet counter index");
6397
6398         SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "qsize_rxq",
6399             CTLTYPE_INT | CTLFLAG_RW, vi, 0, sysctl_qsize_rxq, "I",
6400             "rx queue size");
6401         SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "qsize_txq",
6402             CTLTYPE_INT | CTLFLAG_RW, vi, 0, sysctl_qsize_txq, "I",
6403             "tx queue size");
6404 }
6405
6406 static void
6407 cxgbe_sysctls(struct port_info *pi)
6408 {
6409         struct sysctl_ctx_list *ctx;
6410         struct sysctl_oid *oid;
6411         struct sysctl_oid_list *children, *children2;
6412         struct adapter *sc = pi->adapter;
6413         int i;
6414         char name[16];
6415         static char *tc_flags = {"\20\1USER\2SYNC\3ASYNC\4ERR"};
6416
6417         ctx = device_get_sysctl_ctx(pi->dev);
6418
6419         /*
6420          * dev.cxgbe.X.
6421          */
6422         oid = device_get_sysctl_tree(pi->dev);
6423         children = SYSCTL_CHILDREN(oid);
6424
6425         SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "linkdnrc", CTLTYPE_STRING |
6426            CTLFLAG_RD, pi, 0, sysctl_linkdnrc, "A", "reason why link is down");
6427         if (pi->port_type == FW_PORT_TYPE_BT_XAUI) {
6428                 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "temperature",
6429                     CTLTYPE_INT | CTLFLAG_RD, pi, 0, sysctl_btphy, "I",
6430                     "PHY temperature (in Celsius)");
6431                 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "fw_version",
6432                     CTLTYPE_INT | CTLFLAG_RD, pi, 1, sysctl_btphy, "I",
6433                     "PHY firmware version");
6434         }
6435
6436         SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "pause_settings",
6437             CTLTYPE_STRING | CTLFLAG_RW, pi, 0, sysctl_pause_settings, "A",
6438     "PAUSE settings (bit 0 = rx_pause, 1 = tx_pause, 2 = pause_autoneg)");
6439         SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "fec",
6440             CTLTYPE_STRING | CTLFLAG_RW, pi, 0, sysctl_fec, "A",
6441             "Forward Error Correction (bit 0 = RS, bit 1 = BASER_RS)");
6442         SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "autoneg",
6443             CTLTYPE_INT | CTLFLAG_RW, pi, 0, sysctl_autoneg, "I",
6444             "autonegotiation (-1 = not supported)");
6445
6446         SYSCTL_ADD_INT(ctx, children, OID_AUTO, "max_speed", CTLFLAG_RD, NULL,
6447             port_top_speed(pi), "max speed (in Gbps)");
6448         SYSCTL_ADD_INT(ctx, children, OID_AUTO, "mps_bg_map", CTLFLAG_RD, NULL,
6449             pi->mps_bg_map, "MPS buffer group map");
6450         SYSCTL_ADD_INT(ctx, children, OID_AUTO, "rx_e_chan_map", CTLFLAG_RD,
6451             NULL, pi->rx_e_chan_map, "TP rx e-channel map");
6452
6453         if (sc->flags & IS_VF)
6454                 return;
6455
6456         /*
6457          * dev.(cxgbe|cxl).X.tc.
6458          */
6459         oid = SYSCTL_ADD_NODE(ctx, children, OID_AUTO, "tc", CTLFLAG_RD, NULL,
6460             "Tx scheduler traffic classes (cl_rl)");
6461         children2 = SYSCTL_CHILDREN(oid);
6462         SYSCTL_ADD_UINT(ctx, children2, OID_AUTO, "pktsize",
6463             CTLFLAG_RW, &pi->sched_params->pktsize, 0,
6464             "pktsize for per-flow cl-rl (0 means up to the driver )");
6465         SYSCTL_ADD_UINT(ctx, children2, OID_AUTO, "burstsize",
6466             CTLFLAG_RW, &pi->sched_params->burstsize, 0,
6467             "burstsize for per-flow cl-rl (0 means up to the driver)");
6468         for (i = 0; i < sc->chip_params->nsched_cls; i++) {
6469                 struct tx_cl_rl_params *tc = &pi->sched_params->cl_rl[i];
6470
6471                 snprintf(name, sizeof(name), "%d", i);
6472                 children2 = SYSCTL_CHILDREN(SYSCTL_ADD_NODE(ctx,
6473                     SYSCTL_CHILDREN(oid), OID_AUTO, name, CTLFLAG_RD, NULL,
6474                     "traffic class"));
6475                 SYSCTL_ADD_PROC(ctx, children2, OID_AUTO, "flags",
6476                     CTLTYPE_STRING | CTLFLAG_RD, tc_flags, (uintptr_t)&tc->flags,
6477                     sysctl_bitfield_8b, "A", "flags");
6478                 SYSCTL_ADD_UINT(ctx, children2, OID_AUTO, "refcount",
6479                     CTLFLAG_RD, &tc->refcount, 0, "references to this class");
6480                 SYSCTL_ADD_PROC(ctx, children2, OID_AUTO, "params",
6481                     CTLTYPE_STRING | CTLFLAG_RD, sc, (pi->port_id << 16) | i,
6482                     sysctl_tc_params, "A", "traffic class parameters");
6483         }
6484
6485         /*
6486          * dev.cxgbe.X.stats.
6487          */
6488         oid = SYSCTL_ADD_NODE(ctx, children, OID_AUTO, "stats", CTLFLAG_RD,
6489             NULL, "port statistics");
6490         children = SYSCTL_CHILDREN(oid);
6491         SYSCTL_ADD_UINT(ctx, children, OID_AUTO, "tx_parse_error", CTLFLAG_RD,
6492             &pi->tx_parse_error, 0,
6493             "# of tx packets with invalid length or # of segments");
6494
6495 #define SYSCTL_ADD_T4_REG64(pi, name, desc, reg) \
6496         SYSCTL_ADD_OID(ctx, children, OID_AUTO, name, \
6497             CTLTYPE_U64 | CTLFLAG_RD, sc, reg, \
6498             sysctl_handle_t4_reg64, "QU", desc)
6499
6500         SYSCTL_ADD_T4_REG64(pi, "tx_octets", "# of octets in good frames",
6501             PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_TX_PORT_BYTES_L));
6502         SYSCTL_ADD_T4_REG64(pi, "tx_frames", "total # of good frames",
6503             PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_TX_PORT_FRAMES_L));
6504         SYSCTL_ADD_T4_REG64(pi, "tx_bcast_frames", "# of broadcast frames",
6505             PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_TX_PORT_BCAST_L));
6506         SYSCTL_ADD_T4_REG64(pi, "tx_mcast_frames", "# of multicast frames",
6507             PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_TX_PORT_MCAST_L));
6508         SYSCTL_ADD_T4_REG64(pi, "tx_ucast_frames", "# of unicast frames",
6509             PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_TX_PORT_UCAST_L));
6510         SYSCTL_ADD_T4_REG64(pi, "tx_error_frames", "# of error frames",
6511             PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_TX_PORT_ERROR_L));
6512         SYSCTL_ADD_T4_REG64(pi, "tx_frames_64",
6513             "# of tx frames in this range",
6514             PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_TX_PORT_64B_L));
6515         SYSCTL_ADD_T4_REG64(pi, "tx_frames_65_127",
6516             "# of tx frames in this range",
6517             PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_TX_PORT_65B_127B_L));
6518         SYSCTL_ADD_T4_REG64(pi, "tx_frames_128_255",
6519             "# of tx frames in this range",
6520             PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_TX_PORT_128B_255B_L));
6521         SYSCTL_ADD_T4_REG64(pi, "tx_frames_256_511",
6522             "# of tx frames in this range",
6523             PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_TX_PORT_256B_511B_L));
6524         SYSCTL_ADD_T4_REG64(pi, "tx_frames_512_1023",
6525             "# of tx frames in this range",
6526             PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_TX_PORT_512B_1023B_L));
6527         SYSCTL_ADD_T4_REG64(pi, "tx_frames_1024_1518",
6528             "# of tx frames in this range",
6529             PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_TX_PORT_1024B_1518B_L));
6530         SYSCTL_ADD_T4_REG64(pi, "tx_frames_1519_max",
6531             "# of tx frames in this range",
6532             PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_TX_PORT_1519B_MAX_L));
6533         SYSCTL_ADD_T4_REG64(pi, "tx_drop", "# of dropped tx frames",
6534             PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_TX_PORT_DROP_L));
6535         SYSCTL_ADD_T4_REG64(pi, "tx_pause", "# of pause frames transmitted",
6536             PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_TX_PORT_PAUSE_L));
6537         SYSCTL_ADD_T4_REG64(pi, "tx_ppp0", "# of PPP prio 0 frames transmitted",
6538             PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_TX_PORT_PPP0_L));
6539         SYSCTL_ADD_T4_REG64(pi, "tx_ppp1", "# of PPP prio 1 frames transmitted",
6540             PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_TX_PORT_PPP1_L));
6541         SYSCTL_ADD_T4_REG64(pi, "tx_ppp2", "# of PPP prio 2 frames transmitted",
6542             PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_TX_PORT_PPP2_L));
6543         SYSCTL_ADD_T4_REG64(pi, "tx_ppp3", "# of PPP prio 3 frames transmitted",
6544             PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_TX_PORT_PPP3_L));
6545         SYSCTL_ADD_T4_REG64(pi, "tx_ppp4", "# of PPP prio 4 frames transmitted",
6546             PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_TX_PORT_PPP4_L));
6547         SYSCTL_ADD_T4_REG64(pi, "tx_ppp5", "# of PPP prio 5 frames transmitted",
6548             PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_TX_PORT_PPP5_L));
6549         SYSCTL_ADD_T4_REG64(pi, "tx_ppp6", "# of PPP prio 6 frames transmitted",
6550             PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_TX_PORT_PPP6_L));
6551         SYSCTL_ADD_T4_REG64(pi, "tx_ppp7", "# of PPP prio 7 frames transmitted",
6552             PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_TX_PORT_PPP7_L));
6553
6554         SYSCTL_ADD_T4_REG64(pi, "rx_octets", "# of octets in good frames",
6555             PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_RX_PORT_BYTES_L));
6556         SYSCTL_ADD_T4_REG64(pi, "rx_frames", "total # of good frames",
6557             PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_RX_PORT_FRAMES_L));
6558         SYSCTL_ADD_T4_REG64(pi, "rx_bcast_frames", "# of broadcast frames",
6559             PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_RX_PORT_BCAST_L));
6560         SYSCTL_ADD_T4_REG64(pi, "rx_mcast_frames", "# of multicast frames",
6561             PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_RX_PORT_MCAST_L));
6562         SYSCTL_ADD_T4_REG64(pi, "rx_ucast_frames", "# of unicast frames",
6563             PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_RX_PORT_UCAST_L));
6564         SYSCTL_ADD_T4_REG64(pi, "rx_too_long", "# of frames exceeding MTU",
6565             PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_RX_PORT_MTU_ERROR_L));
6566         SYSCTL_ADD_T4_REG64(pi, "rx_jabber", "# of jabber frames",
6567             PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_RX_PORT_MTU_CRC_ERROR_L));
6568         SYSCTL_ADD_T4_REG64(pi, "rx_fcs_err",
6569             "# of frames received with bad FCS",
6570             PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_RX_PORT_CRC_ERROR_L));
6571         SYSCTL_ADD_T4_REG64(pi, "rx_len_err",
6572             "# of frames received with length error",
6573             PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_RX_PORT_LEN_ERROR_L));
6574         SYSCTL_ADD_T4_REG64(pi, "rx_symbol_err", "symbol errors",
6575             PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_RX_PORT_SYM_ERROR_L));
6576         SYSCTL_ADD_T4_REG64(pi, "rx_runt", "# of short frames received",
6577             PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_RX_PORT_LESS_64B_L));
6578         SYSCTL_ADD_T4_REG64(pi, "rx_frames_64",
6579             "# of rx frames in this range",
6580             PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_RX_PORT_64B_L));
6581         SYSCTL_ADD_T4_REG64(pi, "rx_frames_65_127",
6582             "# of rx frames in this range",
6583             PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_RX_PORT_65B_127B_L));
6584         SYSCTL_ADD_T4_REG64(pi, "rx_frames_128_255",
6585             "# of rx frames in this range",
6586             PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_RX_PORT_128B_255B_L));
6587         SYSCTL_ADD_T4_REG64(pi, "rx_frames_256_511",
6588             "# of rx frames in this range",
6589             PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_RX_PORT_256B_511B_L));
6590         SYSCTL_ADD_T4_REG64(pi, "rx_frames_512_1023",
6591             "# of rx frames in this range",
6592             PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_RX_PORT_512B_1023B_L));
6593         SYSCTL_ADD_T4_REG64(pi, "rx_frames_1024_1518",
6594             "# of rx frames in this range",
6595             PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_RX_PORT_1024B_1518B_L));
6596         SYSCTL_ADD_T4_REG64(pi, "rx_frames_1519_max",
6597             "# of rx frames in this range",
6598             PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_RX_PORT_1519B_MAX_L));
6599         SYSCTL_ADD_T4_REG64(pi, "rx_pause", "# of pause frames received",
6600             PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_RX_PORT_PAUSE_L));
6601         SYSCTL_ADD_T4_REG64(pi, "rx_ppp0", "# of PPP prio 0 frames received",
6602             PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_RX_PORT_PPP0_L));
6603         SYSCTL_ADD_T4_REG64(pi, "rx_ppp1", "# of PPP prio 1 frames received",
6604             PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_RX_PORT_PPP1_L));
6605         SYSCTL_ADD_T4_REG64(pi, "rx_ppp2", "# of PPP prio 2 frames received",
6606             PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_RX_PORT_PPP2_L));
6607         SYSCTL_ADD_T4_REG64(pi, "rx_ppp3", "# of PPP prio 3 frames received",
6608             PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_RX_PORT_PPP3_L));
6609         SYSCTL_ADD_T4_REG64(pi, "rx_ppp4", "# of PPP prio 4 frames received",
6610             PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_RX_PORT_PPP4_L));
6611         SYSCTL_ADD_T4_REG64(pi, "rx_ppp5", "# of PPP prio 5 frames received",
6612             PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_RX_PORT_PPP5_L));
6613         SYSCTL_ADD_T4_REG64(pi, "rx_ppp6", "# of PPP prio 6 frames received",
6614             PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_RX_PORT_PPP6_L));
6615         SYSCTL_ADD_T4_REG64(pi, "rx_ppp7", "# of PPP prio 7 frames received",
6616             PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_RX_PORT_PPP7_L));
6617
6618 #undef SYSCTL_ADD_T4_REG64
6619
6620 #define SYSCTL_ADD_T4_PORTSTAT(name, desc) \
6621         SYSCTL_ADD_UQUAD(ctx, children, OID_AUTO, #name, CTLFLAG_RD, \
6622             &pi->stats.name, desc)
6623
6624         /* We get these from port_stats and they may be stale by up to 1s */
6625         SYSCTL_ADD_T4_PORTSTAT(rx_ovflow0,
6626             "# drops due to buffer-group 0 overflows");
6627         SYSCTL_ADD_T4_PORTSTAT(rx_ovflow1,
6628             "# drops due to buffer-group 1 overflows");
6629         SYSCTL_ADD_T4_PORTSTAT(rx_ovflow2,
6630             "# drops due to buffer-group 2 overflows");
6631         SYSCTL_ADD_T4_PORTSTAT(rx_ovflow3,
6632             "# drops due to buffer-group 3 overflows");
6633         SYSCTL_ADD_T4_PORTSTAT(rx_trunc0,
6634             "# of buffer-group 0 truncated packets");
6635         SYSCTL_ADD_T4_PORTSTAT(rx_trunc1,
6636             "# of buffer-group 1 truncated packets");
6637         SYSCTL_ADD_T4_PORTSTAT(rx_trunc2,
6638             "# of buffer-group 2 truncated packets");
6639         SYSCTL_ADD_T4_PORTSTAT(rx_trunc3,
6640             "# of buffer-group 3 truncated packets");
6641
6642 #undef SYSCTL_ADD_T4_PORTSTAT
6643
6644         SYSCTL_ADD_ULONG(ctx, children, OID_AUTO, "tx_tls_records",
6645             CTLFLAG_RD, &pi->tx_tls_records,
6646             "# of TLS records transmitted");
6647         SYSCTL_ADD_ULONG(ctx, children, OID_AUTO, "tx_tls_octets",
6648             CTLFLAG_RD, &pi->tx_tls_octets,
6649             "# of payload octets in transmitted TLS records");
6650         SYSCTL_ADD_ULONG(ctx, children, OID_AUTO, "rx_tls_records",
6651             CTLFLAG_RD, &pi->rx_tls_records,
6652             "# of TLS records received");
6653         SYSCTL_ADD_ULONG(ctx, children, OID_AUTO, "rx_tls_octets",
6654             CTLFLAG_RD, &pi->rx_tls_octets,
6655             "# of payload octets in received TLS records");
6656 }
6657
6658 static int
6659 sysctl_int_array(SYSCTL_HANDLER_ARGS)
6660 {
6661         int rc, *i, space = 0;
6662         struct sbuf sb;
6663
6664         sbuf_new_for_sysctl(&sb, NULL, 64, req);
6665         for (i = arg1; arg2; arg2 -= sizeof(int), i++) {
6666                 if (space)
6667                         sbuf_printf(&sb, " ");
6668                 sbuf_printf(&sb, "%d", *i);
6669                 space = 1;
6670         }
6671         rc = sbuf_finish(&sb);
6672         sbuf_delete(&sb);
6673         return (rc);
6674 }
6675
6676 static int
6677 sysctl_bitfield_8b(SYSCTL_HANDLER_ARGS)
6678 {
6679         int rc;
6680         struct sbuf *sb;
6681
6682         rc = sysctl_wire_old_buffer(req, 0);
6683         if (rc != 0)
6684                 return(rc);
6685
6686         sb = sbuf_new_for_sysctl(NULL, NULL, 128, req);
6687         if (sb == NULL)
6688                 return (ENOMEM);
6689
6690         sbuf_printf(sb, "%b", *(uint8_t *)(uintptr_t)arg2, (char *)arg1);
6691         rc = sbuf_finish(sb);
6692         sbuf_delete(sb);
6693
6694         return (rc);
6695 }
6696
6697 static int
6698 sysctl_bitfield_16b(SYSCTL_HANDLER_ARGS)
6699 {
6700         int rc;
6701         struct sbuf *sb;
6702
6703         rc = sysctl_wire_old_buffer(req, 0);
6704         if (rc != 0)
6705                 return(rc);
6706
6707         sb = sbuf_new_for_sysctl(NULL, NULL, 128, req);
6708         if (sb == NULL)
6709                 return (ENOMEM);
6710
6711         sbuf_printf(sb, "%b", *(uint16_t *)(uintptr_t)arg2, (char *)arg1);
6712         rc = sbuf_finish(sb);
6713         sbuf_delete(sb);
6714
6715         return (rc);
6716 }
6717
6718 static int
6719 sysctl_btphy(SYSCTL_HANDLER_ARGS)
6720 {
6721         struct port_info *pi = arg1;
6722         int op = arg2;
6723         struct adapter *sc = pi->adapter;
6724         u_int v;
6725         int rc;
6726
6727         rc = begin_synchronized_op(sc, &pi->vi[0], SLEEP_OK | INTR_OK, "t4btt");
6728         if (rc)
6729                 return (rc);
6730         /* XXX: magic numbers */
6731         rc = -t4_mdio_rd(sc, sc->mbox, pi->mdio_addr, 0x1e, op ? 0x20 : 0xc820,
6732             &v);
6733         end_synchronized_op(sc, 0);
6734         if (rc)
6735                 return (rc);
6736         if (op == 0)
6737                 v /= 256;
6738
6739         rc = sysctl_handle_int(oidp, &v, 0, req);
6740         return (rc);
6741 }
6742
6743 static int
6744 sysctl_noflowq(SYSCTL_HANDLER_ARGS)
6745 {
6746         struct vi_info *vi = arg1;
6747         int rc, val;
6748
6749         val = vi->rsrv_noflowq;
6750         rc = sysctl_handle_int(oidp, &val, 0, req);
6751         if (rc != 0 || req->newptr == NULL)
6752                 return (rc);
6753
6754         if ((val >= 1) && (vi->ntxq > 1))
6755                 vi->rsrv_noflowq = 1;
6756         else
6757                 vi->rsrv_noflowq = 0;
6758
6759         return (rc);
6760 }
6761
6762 static int
6763 sysctl_holdoff_tmr_idx(SYSCTL_HANDLER_ARGS)
6764 {
6765         struct vi_info *vi = arg1;
6766         struct adapter *sc = vi->pi->adapter;
6767         int idx, rc, i;
6768         struct sge_rxq *rxq;
6769         uint8_t v;
6770
6771         idx = vi->tmr_idx;
6772
6773         rc = sysctl_handle_int(oidp, &idx, 0, req);
6774         if (rc != 0 || req->newptr == NULL)
6775                 return (rc);
6776
6777         if (idx < 0 || idx >= SGE_NTIMERS)
6778                 return (EINVAL);
6779
6780         rc = begin_synchronized_op(sc, vi, HOLD_LOCK | SLEEP_OK | INTR_OK,
6781             "t4tmr");
6782         if (rc)
6783                 return (rc);
6784
6785         v = V_QINTR_TIMER_IDX(idx) | V_QINTR_CNT_EN(vi->pktc_idx != -1);
6786         for_each_rxq(vi, i, rxq) {
6787 #ifdef atomic_store_rel_8
6788                 atomic_store_rel_8(&rxq->iq.intr_params, v);
6789 #else
6790                 rxq->iq.intr_params = v;
6791 #endif
6792         }
6793         vi->tmr_idx = idx;
6794
6795         end_synchronized_op(sc, LOCK_HELD);
6796         return (0);
6797 }
6798
6799 static int
6800 sysctl_holdoff_pktc_idx(SYSCTL_HANDLER_ARGS)
6801 {
6802         struct vi_info *vi = arg1;
6803         struct adapter *sc = vi->pi->adapter;
6804         int idx, rc;
6805
6806         idx = vi->pktc_idx;
6807
6808         rc = sysctl_handle_int(oidp, &idx, 0, req);
6809         if (rc != 0 || req->newptr == NULL)
6810                 return (rc);
6811
6812         if (idx < -1 || idx >= SGE_NCOUNTERS)
6813                 return (EINVAL);
6814
6815         rc = begin_synchronized_op(sc, vi, HOLD_LOCK | SLEEP_OK | INTR_OK,
6816             "t4pktc");
6817         if (rc)
6818                 return (rc);
6819
6820         if (vi->flags & VI_INIT_DONE)
6821                 rc = EBUSY; /* cannot be changed once the queues are created */
6822         else
6823                 vi->pktc_idx = idx;
6824
6825         end_synchronized_op(sc, LOCK_HELD);
6826         return (rc);
6827 }
6828
6829 static int
6830 sysctl_qsize_rxq(SYSCTL_HANDLER_ARGS)
6831 {
6832         struct vi_info *vi = arg1;
6833         struct adapter *sc = vi->pi->adapter;
6834         int qsize, rc;
6835
6836         qsize = vi->qsize_rxq;
6837
6838         rc = sysctl_handle_int(oidp, &qsize, 0, req);
6839         if (rc != 0 || req->newptr == NULL)
6840                 return (rc);
6841
6842         if (qsize < 128 || (qsize & 7))
6843                 return (EINVAL);
6844
6845         rc = begin_synchronized_op(sc, vi, HOLD_LOCK | SLEEP_OK | INTR_OK,
6846             "t4rxqs");
6847         if (rc)
6848                 return (rc);
6849
6850         if (vi->flags & VI_INIT_DONE)
6851                 rc = EBUSY; /* cannot be changed once the queues are created */
6852         else
6853                 vi->qsize_rxq = qsize;
6854
6855         end_synchronized_op(sc, LOCK_HELD);
6856         return (rc);
6857 }
6858
6859 static int
6860 sysctl_qsize_txq(SYSCTL_HANDLER_ARGS)
6861 {
6862         struct vi_info *vi = arg1;
6863         struct adapter *sc = vi->pi->adapter;
6864         int qsize, rc;
6865
6866         qsize = vi->qsize_txq;
6867
6868         rc = sysctl_handle_int(oidp, &qsize, 0, req);
6869         if (rc != 0 || req->newptr == NULL)
6870                 return (rc);
6871
6872         if (qsize < 128 || qsize > 65536)
6873                 return (EINVAL);
6874
6875         rc = begin_synchronized_op(sc, vi, HOLD_LOCK | SLEEP_OK | INTR_OK,
6876             "t4txqs");
6877         if (rc)
6878                 return (rc);
6879
6880         if (vi->flags & VI_INIT_DONE)
6881                 rc = EBUSY; /* cannot be changed once the queues are created */
6882         else
6883                 vi->qsize_txq = qsize;
6884
6885         end_synchronized_op(sc, LOCK_HELD);
6886         return (rc);
6887 }
6888
6889 static int
6890 sysctl_pause_settings(SYSCTL_HANDLER_ARGS)
6891 {
6892         struct port_info *pi = arg1;
6893         struct adapter *sc = pi->adapter;
6894         struct link_config *lc = &pi->link_cfg;
6895         int rc;
6896
6897         if (req->newptr == NULL) {
6898                 struct sbuf *sb;
6899                 static char *bits = "\20\1RX\2TX\3AUTO";
6900
6901                 rc = sysctl_wire_old_buffer(req, 0);
6902                 if (rc != 0)
6903                         return(rc);
6904
6905                 sb = sbuf_new_for_sysctl(NULL, NULL, 128, req);
6906                 if (sb == NULL)
6907                         return (ENOMEM);
6908
6909                 if (lc->link_ok) {
6910                         sbuf_printf(sb, "%b", (lc->fc & (PAUSE_TX | PAUSE_RX)) |
6911                             (lc->requested_fc & PAUSE_AUTONEG), bits);
6912                 } else {
6913                         sbuf_printf(sb, "%b", lc->requested_fc & (PAUSE_TX |
6914                             PAUSE_RX | PAUSE_AUTONEG), bits);
6915                 }
6916                 rc = sbuf_finish(sb);
6917                 sbuf_delete(sb);
6918         } else {
6919                 char s[2];
6920                 int n;
6921
6922                 s[0] = '0' + (lc->requested_fc & (PAUSE_TX | PAUSE_RX |
6923                     PAUSE_AUTONEG));
6924                 s[1] = 0;
6925
6926                 rc = sysctl_handle_string(oidp, s, sizeof(s), req);
6927                 if (rc != 0)
6928                         return(rc);
6929
6930                 if (s[1] != 0)
6931                         return (EINVAL);
6932                 if (s[0] < '0' || s[0] > '9')
6933                         return (EINVAL);        /* not a number */
6934                 n = s[0] - '0';
6935                 if (n & ~(PAUSE_TX | PAUSE_RX | PAUSE_AUTONEG))
6936                         return (EINVAL);        /* some other bit is set too */
6937
6938                 rc = begin_synchronized_op(sc, &pi->vi[0], SLEEP_OK | INTR_OK,
6939                     "t4PAUSE");
6940                 if (rc)
6941                         return (rc);
6942                 PORT_LOCK(pi);
6943                 lc->requested_fc = n;
6944                 fixup_link_config(pi);
6945                 if (pi->up_vis > 0)
6946                         rc = apply_link_config(pi);
6947                 set_current_media(pi);
6948                 PORT_UNLOCK(pi);
6949                 end_synchronized_op(sc, 0);
6950         }
6951
6952         return (rc);
6953 }
6954
6955 static int
6956 sysctl_fec(SYSCTL_HANDLER_ARGS)
6957 {
6958         struct port_info *pi = arg1;
6959         struct adapter *sc = pi->adapter;
6960         struct link_config *lc = &pi->link_cfg;
6961         int rc;
6962         int8_t old;
6963
6964         if (req->newptr == NULL) {
6965                 struct sbuf *sb;
6966                 static char *bits = "\20\1RS\2BASE-R\3RSVD1\4RSVD2\5RSVD3\6AUTO";
6967
6968                 rc = sysctl_wire_old_buffer(req, 0);
6969                 if (rc != 0)
6970                         return(rc);
6971
6972                 sb = sbuf_new_for_sysctl(NULL, NULL, 128, req);
6973                 if (sb == NULL)
6974                         return (ENOMEM);
6975
6976                 /*
6977                  * Display the requested_fec when the link is down -- the actual
6978                  * FEC makes sense only when the link is up.
6979                  */
6980                 if (lc->link_ok) {
6981                         sbuf_printf(sb, "%b", (lc->fec & M_FW_PORT_CAP32_FEC) |
6982                             (lc->requested_fec & FEC_AUTO), bits);
6983                 } else {
6984                         sbuf_printf(sb, "%b", lc->requested_fec, bits);
6985                 }
6986                 rc = sbuf_finish(sb);
6987                 sbuf_delete(sb);
6988         } else {
6989                 char s[3];
6990                 int n;
6991
6992                 snprintf(s, sizeof(s), "%d",
6993                     lc->requested_fec == FEC_AUTO ? -1 :
6994                     lc->requested_fec & M_FW_PORT_CAP32_FEC);
6995
6996                 rc = sysctl_handle_string(oidp, s, sizeof(s), req);
6997                 if (rc != 0)
6998                         return(rc);
6999
7000                 n = strtol(&s[0], NULL, 0);
7001                 if (n < 0 || n & FEC_AUTO)
7002                         n = FEC_AUTO;
7003                 else {
7004                         if (n & ~M_FW_PORT_CAP32_FEC)
7005                                 return (EINVAL);/* some other bit is set too */
7006                         if (!powerof2(n))
7007                                 return (EINVAL);/* one bit can be set at most */
7008                 }
7009
7010                 rc = begin_synchronized_op(sc, &pi->vi[0], SLEEP_OK | INTR_OK,
7011                     "t4fec");
7012                 if (rc)
7013                         return (rc);
7014                 PORT_LOCK(pi);
7015                 old = lc->requested_fec;
7016                 if (n == FEC_AUTO)
7017                         lc->requested_fec = FEC_AUTO;
7018                 else if (n == 0)
7019                         lc->requested_fec = FEC_NONE;
7020                 else {
7021                         if ((lc->supported | V_FW_PORT_CAP32_FEC(n)) !=
7022                             lc->supported) {
7023                                 rc = ENOTSUP;
7024                                 goto done;
7025                         }
7026                         lc->requested_fec = n;
7027                 }
7028                 fixup_link_config(pi);
7029                 if (pi->up_vis > 0) {
7030                         rc = apply_link_config(pi);
7031                         if (rc != 0) {
7032                                 lc->requested_fec = old;
7033                                 if (rc == FW_EPROTO)
7034                                         rc = ENOTSUP;
7035                         }
7036                 }
7037 done:
7038                 PORT_UNLOCK(pi);
7039                 end_synchronized_op(sc, 0);
7040         }
7041
7042         return (rc);
7043 }
7044
7045 static int
7046 sysctl_autoneg(SYSCTL_HANDLER_ARGS)
7047 {
7048         struct port_info *pi = arg1;
7049         struct adapter *sc = pi->adapter;
7050         struct link_config *lc = &pi->link_cfg;
7051         int rc, val;
7052
7053         if (lc->supported & FW_PORT_CAP32_ANEG)
7054                 val = lc->requested_aneg == AUTONEG_DISABLE ? 0 : 1;
7055         else
7056                 val = -1;
7057         rc = sysctl_handle_int(oidp, &val, 0, req);
7058         if (rc != 0 || req->newptr == NULL)
7059                 return (rc);
7060         if (val == 0)
7061                 val = AUTONEG_DISABLE;
7062         else if (val == 1)
7063                 val = AUTONEG_ENABLE;
7064         else
7065                 val = AUTONEG_AUTO;
7066
7067         rc = begin_synchronized_op(sc, &pi->vi[0], SLEEP_OK | INTR_OK,
7068             "t4aneg");
7069         if (rc)
7070                 return (rc);
7071         PORT_LOCK(pi);
7072         if (val == AUTONEG_ENABLE && !(lc->supported & FW_PORT_CAP32_ANEG)) {
7073                 rc = ENOTSUP;
7074                 goto done;
7075         }
7076         lc->requested_aneg = val;
7077         fixup_link_config(pi);
7078         if (pi->up_vis > 0)
7079                 rc = apply_link_config(pi);
7080         set_current_media(pi);
7081 done:
7082         PORT_UNLOCK(pi);
7083         end_synchronized_op(sc, 0);
7084         return (rc);
7085 }
7086
7087 static int
7088 sysctl_handle_t4_reg64(SYSCTL_HANDLER_ARGS)
7089 {
7090         struct adapter *sc = arg1;
7091         int reg = arg2;
7092         uint64_t val;
7093
7094         val = t4_read_reg64(sc, reg);
7095
7096         return (sysctl_handle_64(oidp, &val, 0, req));
7097 }
7098
7099 static int
7100 sysctl_temperature(SYSCTL_HANDLER_ARGS)
7101 {
7102         struct adapter *sc = arg1;
7103         int rc, t;
7104         uint32_t param, val;
7105
7106         rc = begin_synchronized_op(sc, NULL, SLEEP_OK | INTR_OK, "t4temp");
7107         if (rc)
7108                 return (rc);
7109         param = V_FW_PARAMS_MNEM(FW_PARAMS_MNEM_DEV) |
7110             V_FW_PARAMS_PARAM_X(FW_PARAMS_PARAM_DEV_DIAG) |
7111             V_FW_PARAMS_PARAM_Y(FW_PARAM_DEV_DIAG_TMP);
7112         rc = -t4_query_params(sc, sc->mbox, sc->pf, 0, 1, &param, &val);
7113         end_synchronized_op(sc, 0);
7114         if (rc)
7115                 return (rc);
7116
7117         /* unknown is returned as 0 but we display -1 in that case */
7118         t = val == 0 ? -1 : val;
7119
7120         rc = sysctl_handle_int(oidp, &t, 0, req);
7121         return (rc);
7122 }
7123
7124 static int
7125 sysctl_loadavg(SYSCTL_HANDLER_ARGS)
7126 {
7127         struct adapter *sc = arg1;
7128         struct sbuf *sb;
7129         int rc;
7130         uint32_t param, val;
7131
7132         rc = begin_synchronized_op(sc, NULL, SLEEP_OK | INTR_OK, "t4lavg");
7133         if (rc)
7134                 return (rc);
7135         param = V_FW_PARAMS_MNEM(FW_PARAMS_MNEM_DEV) |
7136             V_FW_PARAMS_PARAM_X(FW_PARAMS_PARAM_DEV_LOAD);
7137         rc = -t4_query_params(sc, sc->mbox, sc->pf, 0, 1, &param, &val);
7138         end_synchronized_op(sc, 0);
7139         if (rc)
7140                 return (rc);
7141
7142         rc = sysctl_wire_old_buffer(req, 0);
7143         if (rc != 0)
7144                 return (rc);
7145
7146         sb = sbuf_new_for_sysctl(NULL, NULL, 4096, req);
7147         if (sb == NULL)
7148                 return (ENOMEM);
7149
7150         if (val == 0xffffffff) {
7151                 /* Only debug and custom firmwares report load averages. */
7152                 sbuf_printf(sb, "not available");
7153         } else {
7154                 sbuf_printf(sb, "%d %d %d", val & 0xff, (val >> 8) & 0xff,
7155                     (val >> 16) & 0xff);
7156         }
7157         rc = sbuf_finish(sb);
7158         sbuf_delete(sb);
7159
7160         return (rc);
7161 }
7162
7163 static int
7164 sysctl_cctrl(SYSCTL_HANDLER_ARGS)
7165 {
7166         struct adapter *sc = arg1;
7167         struct sbuf *sb;
7168         int rc, i;
7169         uint16_t incr[NMTUS][NCCTRL_WIN];
7170         static const char *dec_fac[] = {
7171                 "0.5", "0.5625", "0.625", "0.6875", "0.75", "0.8125", "0.875",
7172                 "0.9375"
7173         };
7174
7175         rc = sysctl_wire_old_buffer(req, 0);
7176         if (rc != 0)
7177                 return (rc);
7178
7179         sb = sbuf_new_for_sysctl(NULL, NULL, 4096, req);
7180         if (sb == NULL)
7181                 return (ENOMEM);
7182
7183         t4_read_cong_tbl(sc, incr);
7184
7185         for (i = 0; i < NCCTRL_WIN; ++i) {
7186                 sbuf_printf(sb, "%2d: %4u %4u %4u %4u %4u %4u %4u %4u\n", i,
7187                     incr[0][i], incr[1][i], incr[2][i], incr[3][i], incr[4][i],
7188                     incr[5][i], incr[6][i], incr[7][i]);
7189                 sbuf_printf(sb, "%8u %4u %4u %4u %4u %4u %4u %4u %5u %s\n",
7190                     incr[8][i], incr[9][i], incr[10][i], incr[11][i],
7191                     incr[12][i], incr[13][i], incr[14][i], incr[15][i],
7192                     sc->params.a_wnd[i], dec_fac[sc->params.b_wnd[i]]);
7193         }
7194
7195         rc = sbuf_finish(sb);
7196         sbuf_delete(sb);
7197
7198         return (rc);
7199 }
7200
7201 static const char *qname[CIM_NUM_IBQ + CIM_NUM_OBQ_T5] = {
7202         "TP0", "TP1", "ULP", "SGE0", "SGE1", "NC-SI",   /* ibq's */
7203         "ULP0", "ULP1", "ULP2", "ULP3", "SGE", "NC-SI", /* obq's */
7204         "SGE0-RX", "SGE1-RX"    /* additional obq's (T5 onwards) */
7205 };
7206
7207 static int
7208 sysctl_cim_ibq_obq(SYSCTL_HANDLER_ARGS)
7209 {
7210         struct adapter *sc = arg1;
7211         struct sbuf *sb;
7212         int rc, i, n, qid = arg2;
7213         uint32_t *buf, *p;
7214         char *qtype;
7215         u_int cim_num_obq = sc->chip_params->cim_num_obq;
7216
7217         KASSERT(qid >= 0 && qid < CIM_NUM_IBQ + cim_num_obq,
7218             ("%s: bad qid %d\n", __func__, qid));
7219
7220         if (qid < CIM_NUM_IBQ) {
7221                 /* inbound queue */
7222                 qtype = "IBQ";
7223                 n = 4 * CIM_IBQ_SIZE;
7224                 buf = malloc(n * sizeof(uint32_t), M_CXGBE, M_ZERO | M_WAITOK);
7225                 rc = t4_read_cim_ibq(sc, qid, buf, n);
7226         } else {
7227                 /* outbound queue */
7228                 qtype = "OBQ";
7229                 qid -= CIM_NUM_IBQ;
7230                 n = 4 * cim_num_obq * CIM_OBQ_SIZE;
7231                 buf = malloc(n * sizeof(uint32_t), M_CXGBE, M_ZERO | M_WAITOK);
7232                 rc = t4_read_cim_obq(sc, qid, buf, n);
7233         }
7234
7235         if (rc < 0) {
7236                 rc = -rc;
7237                 goto done;
7238         }
7239         n = rc * sizeof(uint32_t);      /* rc has # of words actually read */
7240
7241         rc = sysctl_wire_old_buffer(req, 0);
7242         if (rc != 0)
7243                 goto done;
7244
7245         sb = sbuf_new_for_sysctl(NULL, NULL, PAGE_SIZE, req);
7246         if (sb == NULL) {
7247                 rc = ENOMEM;
7248                 goto done;
7249         }
7250
7251         sbuf_printf(sb, "%s%d %s", qtype , qid, qname[arg2]);
7252         for (i = 0, p = buf; i < n; i += 16, p += 4)
7253                 sbuf_printf(sb, "\n%#06x: %08x %08x %08x %08x", i, p[0], p[1],
7254                     p[2], p[3]);
7255
7256         rc = sbuf_finish(sb);
7257         sbuf_delete(sb);
7258 done:
7259         free(buf, M_CXGBE);
7260         return (rc);
7261 }
7262
7263 static void
7264 sbuf_cim_la4(struct adapter *sc, struct sbuf *sb, uint32_t *buf, uint32_t cfg)
7265 {
7266         uint32_t *p;
7267
7268         sbuf_printf(sb, "Status   Data      PC%s",
7269             cfg & F_UPDBGLACAPTPCONLY ? "" :
7270             "     LS0Stat  LS0Addr             LS0Data");
7271
7272         for (p = buf; p <= &buf[sc->params.cim_la_size - 8]; p += 8) {
7273                 if (cfg & F_UPDBGLACAPTPCONLY) {
7274                         sbuf_printf(sb, "\n  %02x   %08x %08x", p[5] & 0xff,
7275                             p[6], p[7]);
7276                         sbuf_printf(sb, "\n  %02x   %02x%06x %02x%06x",
7277                             (p[3] >> 8) & 0xff, p[3] & 0xff, p[4] >> 8,
7278                             p[4] & 0xff, p[5] >> 8);
7279                         sbuf_printf(sb, "\n  %02x   %x%07x %x%07x",
7280                             (p[0] >> 4) & 0xff, p[0] & 0xf, p[1] >> 4,
7281                             p[1] & 0xf, p[2] >> 4);
7282                 } else {
7283                         sbuf_printf(sb,
7284                             "\n  %02x   %x%07x %x%07x %08x %08x "
7285                             "%08x%08x%08x%08x",
7286                             (p[0] >> 4) & 0xff, p[0] & 0xf, p[1] >> 4,
7287                             p[1] & 0xf, p[2] >> 4, p[2] & 0xf, p[3], p[4], p[5],
7288                             p[6], p[7]);
7289                 }
7290         }
7291 }
7292
7293 static void
7294 sbuf_cim_la6(struct adapter *sc, struct sbuf *sb, uint32_t *buf, uint32_t cfg)
7295 {
7296         uint32_t *p;
7297
7298         sbuf_printf(sb, "Status   Inst    Data      PC%s",
7299             cfg & F_UPDBGLACAPTPCONLY ? "" :
7300             "     LS0Stat  LS0Addr  LS0Data  LS1Stat  LS1Addr  LS1Data");
7301
7302         for (p = buf; p <= &buf[sc->params.cim_la_size - 10]; p += 10) {
7303                 if (cfg & F_UPDBGLACAPTPCONLY) {
7304                         sbuf_printf(sb, "\n  %02x   %08x %08x %08x",
7305                             p[3] & 0xff, p[2], p[1], p[0]);
7306                         sbuf_printf(sb, "\n  %02x   %02x%06x %02x%06x %02x%06x",
7307                             (p[6] >> 8) & 0xff, p[6] & 0xff, p[5] >> 8,
7308                             p[5] & 0xff, p[4] >> 8, p[4] & 0xff, p[3] >> 8);
7309                         sbuf_printf(sb, "\n  %02x   %04x%04x %04x%04x %04x%04x",
7310                             (p[9] >> 16) & 0xff, p[9] & 0xffff, p[8] >> 16,
7311                             p[8] & 0xffff, p[7] >> 16, p[7] & 0xffff,
7312                             p[6] >> 16);
7313                 } else {
7314                         sbuf_printf(sb, "\n  %02x   %04x%04x %04x%04x %04x%04x "
7315                             "%08x %08x %08x %08x %08x %08x",
7316                             (p[9] >> 16) & 0xff,
7317                             p[9] & 0xffff, p[8] >> 16,
7318                             p[8] & 0xffff, p[7] >> 16,
7319                             p[7] & 0xffff, p[6] >> 16,
7320                             p[2], p[1], p[0], p[5], p[4], p[3]);
7321                 }
7322         }
7323 }
7324
7325 static int
7326 sbuf_cim_la(struct adapter *sc, struct sbuf *sb, int flags)
7327 {
7328         uint32_t cfg, *buf;
7329         int rc;
7330
7331         rc = -t4_cim_read(sc, A_UP_UP_DBG_LA_CFG, 1, &cfg);
7332         if (rc != 0)
7333                 return (rc);
7334
7335         MPASS(flags == M_WAITOK || flags == M_NOWAIT);
7336         buf = malloc(sc->params.cim_la_size * sizeof(uint32_t), M_CXGBE,
7337             M_ZERO | flags);
7338         if (buf == NULL)
7339                 return (ENOMEM);
7340
7341         rc = -t4_cim_read_la(sc, buf, NULL);
7342         if (rc != 0)
7343                 goto done;
7344         if (chip_id(sc) < CHELSIO_T6)
7345                 sbuf_cim_la4(sc, sb, buf, cfg);
7346         else
7347                 sbuf_cim_la6(sc, sb, buf, cfg);
7348
7349 done:
7350         free(buf, M_CXGBE);
7351         return (rc);
7352 }
7353
7354 static int
7355 sysctl_cim_la(SYSCTL_HANDLER_ARGS)
7356 {
7357         struct adapter *sc = arg1;
7358         struct sbuf *sb;
7359         int rc;
7360
7361         rc = sysctl_wire_old_buffer(req, 0);
7362         if (rc != 0)
7363                 return (rc);
7364         sb = sbuf_new_for_sysctl(NULL, NULL, 4096, req);
7365         if (sb == NULL)
7366                 return (ENOMEM);
7367
7368         rc = sbuf_cim_la(sc, sb, M_WAITOK);
7369         if (rc == 0)
7370                 rc = sbuf_finish(sb);
7371         sbuf_delete(sb);
7372         return (rc);
7373 }
7374
7375 bool
7376 t4_os_dump_cimla(struct adapter *sc, int arg, bool verbose)
7377 {
7378         struct sbuf sb;
7379         int rc;
7380
7381         if (sbuf_new(&sb, NULL, 4096, SBUF_AUTOEXTEND) != &sb)
7382                 return (false);
7383         rc = sbuf_cim_la(sc, &sb, M_NOWAIT);
7384         if (rc == 0) {
7385                 rc = sbuf_finish(&sb);
7386                 if (rc == 0) {
7387                         log(LOG_DEBUG, "%s: CIM LA dump follows.\n%s",
7388                                 device_get_nameunit(sc->dev), sbuf_data(&sb));
7389                 }
7390         }
7391         sbuf_delete(&sb);
7392         return (false);
7393 }
7394
7395 static int
7396 sysctl_cim_ma_la(SYSCTL_HANDLER_ARGS)
7397 {
7398         struct adapter *sc = arg1;
7399         u_int i;
7400         struct sbuf *sb;
7401         uint32_t *buf, *p;
7402         int rc;
7403
7404         rc = sysctl_wire_old_buffer(req, 0);
7405         if (rc != 0)
7406                 return (rc);
7407
7408         sb = sbuf_new_for_sysctl(NULL, NULL, 4096, req);
7409         if (sb == NULL)
7410                 return (ENOMEM);
7411
7412         buf = malloc(2 * CIM_MALA_SIZE * 5 * sizeof(uint32_t), M_CXGBE,
7413             M_ZERO | M_WAITOK);
7414
7415         t4_cim_read_ma_la(sc, buf, buf + 5 * CIM_MALA_SIZE);
7416         p = buf;
7417
7418         for (i = 0; i < CIM_MALA_SIZE; i++, p += 5) {
7419                 sbuf_printf(sb, "\n%02x%08x%08x%08x%08x", p[4], p[3], p[2],
7420                     p[1], p[0]);
7421         }
7422
7423         sbuf_printf(sb, "\n\nCnt ID Tag UE       Data       RDY VLD");
7424         for (i = 0; i < CIM_MALA_SIZE; i++, p += 5) {
7425                 sbuf_printf(sb, "\n%3u %2u  %x   %u %08x%08x  %u   %u",
7426                     (p[2] >> 10) & 0xff, (p[2] >> 7) & 7,
7427                     (p[2] >> 3) & 0xf, (p[2] >> 2) & 1,
7428                     (p[1] >> 2) | ((p[2] & 3) << 30),
7429                     (p[0] >> 2) | ((p[1] & 3) << 30), (p[0] >> 1) & 1,
7430                     p[0] & 1);
7431         }
7432
7433         rc = sbuf_finish(sb);
7434         sbuf_delete(sb);
7435         free(buf, M_CXGBE);
7436         return (rc);
7437 }
7438
7439 static int
7440 sysctl_cim_pif_la(SYSCTL_HANDLER_ARGS)
7441 {
7442         struct adapter *sc = arg1;
7443         u_int i;
7444         struct sbuf *sb;
7445         uint32_t *buf, *p;
7446         int rc;
7447
7448         rc = sysctl_wire_old_buffer(req, 0);
7449         if (rc != 0)
7450                 return (rc);
7451
7452         sb = sbuf_new_for_sysctl(NULL, NULL, 4096, req);
7453         if (sb == NULL)
7454                 return (ENOMEM);
7455
7456         buf = malloc(2 * CIM_PIFLA_SIZE * 6 * sizeof(uint32_t), M_CXGBE,
7457             M_ZERO | M_WAITOK);
7458
7459         t4_cim_read_pif_la(sc, buf, buf + 6 * CIM_PIFLA_SIZE, NULL, NULL);
7460         p = buf;
7461
7462         sbuf_printf(sb, "Cntl ID DataBE   Addr                 Data");
7463         for (i = 0; i < CIM_PIFLA_SIZE; i++, p += 6) {
7464                 sbuf_printf(sb, "\n %02x  %02x  %04x  %08x %08x%08x%08x%08x",
7465                     (p[5] >> 22) & 0xff, (p[5] >> 16) & 0x3f, p[5] & 0xffff,
7466                     p[4], p[3], p[2], p[1], p[0]);
7467         }
7468
7469         sbuf_printf(sb, "\n\nCntl ID               Data");
7470         for (i = 0; i < CIM_PIFLA_SIZE; i++, p += 6) {
7471                 sbuf_printf(sb, "\n %02x  %02x %08x%08x%08x%08x",
7472                     (p[4] >> 6) & 0xff, p[4] & 0x3f, p[3], p[2], p[1], p[0]);
7473         }
7474
7475         rc = sbuf_finish(sb);
7476         sbuf_delete(sb);
7477         free(buf, M_CXGBE);
7478         return (rc);
7479 }
7480
7481 static int
7482 sysctl_cim_qcfg(SYSCTL_HANDLER_ARGS)
7483 {
7484         struct adapter *sc = arg1;
7485         struct sbuf *sb;
7486         int rc, i;
7487         uint16_t base[CIM_NUM_IBQ + CIM_NUM_OBQ_T5];
7488         uint16_t size[CIM_NUM_IBQ + CIM_NUM_OBQ_T5];
7489         uint16_t thres[CIM_NUM_IBQ];
7490         uint32_t obq_wr[2 * CIM_NUM_OBQ_T5], *wr = obq_wr;
7491         uint32_t stat[4 * (CIM_NUM_IBQ + CIM_NUM_OBQ_T5)], *p = stat;
7492         u_int cim_num_obq, ibq_rdaddr, obq_rdaddr, nq;
7493
7494         cim_num_obq = sc->chip_params->cim_num_obq;
7495         if (is_t4(sc)) {
7496                 ibq_rdaddr = A_UP_IBQ_0_RDADDR;
7497                 obq_rdaddr = A_UP_OBQ_0_REALADDR;
7498         } else {
7499                 ibq_rdaddr = A_UP_IBQ_0_SHADOW_RDADDR;
7500                 obq_rdaddr = A_UP_OBQ_0_SHADOW_REALADDR;
7501         }
7502         nq = CIM_NUM_IBQ + cim_num_obq;
7503
7504         rc = -t4_cim_read(sc, ibq_rdaddr, 4 * nq, stat);
7505         if (rc == 0)
7506                 rc = -t4_cim_read(sc, obq_rdaddr, 2 * cim_num_obq, obq_wr);
7507         if (rc != 0)
7508                 return (rc);
7509
7510         t4_read_cimq_cfg(sc, base, size, thres);
7511
7512         rc = sysctl_wire_old_buffer(req, 0);
7513         if (rc != 0)
7514                 return (rc);
7515
7516         sb = sbuf_new_for_sysctl(NULL, NULL, PAGE_SIZE, req);
7517         if (sb == NULL)
7518                 return (ENOMEM);
7519
7520         sbuf_printf(sb,
7521             "  Queue  Base  Size Thres  RdPtr WrPtr  SOP  EOP Avail");
7522
7523         for (i = 0; i < CIM_NUM_IBQ; i++, p += 4)
7524                 sbuf_printf(sb, "\n%7s %5x %5u %5u %6x  %4x %4u %4u %5u",
7525                     qname[i], base[i], size[i], thres[i], G_IBQRDADDR(p[0]),
7526                     G_IBQWRADDR(p[1]), G_QUESOPCNT(p[3]), G_QUEEOPCNT(p[3]),
7527                     G_QUEREMFLITS(p[2]) * 16);
7528         for ( ; i < nq; i++, p += 4, wr += 2)
7529                 sbuf_printf(sb, "\n%7s %5x %5u %12x  %4x %4u %4u %5u", qname[i],
7530                     base[i], size[i], G_QUERDADDR(p[0]) & 0x3fff,
7531                     wr[0] - base[i], G_QUESOPCNT(p[3]), G_QUEEOPCNT(p[3]),
7532                     G_QUEREMFLITS(p[2]) * 16);
7533
7534         rc = sbuf_finish(sb);
7535         sbuf_delete(sb);
7536
7537         return (rc);
7538 }
7539
7540 static int
7541 sysctl_cpl_stats(SYSCTL_HANDLER_ARGS)
7542 {
7543         struct adapter *sc = arg1;
7544         struct sbuf *sb;
7545         int rc;
7546         struct tp_cpl_stats stats;
7547
7548         rc = sysctl_wire_old_buffer(req, 0);
7549         if (rc != 0)
7550                 return (rc);
7551
7552         sb = sbuf_new_for_sysctl(NULL, NULL, 256, req);
7553         if (sb == NULL)
7554                 return (ENOMEM);
7555
7556         mtx_lock(&sc->reg_lock);
7557         t4_tp_get_cpl_stats(sc, &stats, 0);
7558         mtx_unlock(&sc->reg_lock);
7559
7560         if (sc->chip_params->nchan > 2) {
7561                 sbuf_printf(sb, "                 channel 0  channel 1"
7562                     "  channel 2  channel 3");
7563                 sbuf_printf(sb, "\nCPL requests:   %10u %10u %10u %10u",
7564                     stats.req[0], stats.req[1], stats.req[2], stats.req[3]);
7565                 sbuf_printf(sb, "\nCPL responses:   %10u %10u %10u %10u",
7566                     stats.rsp[0], stats.rsp[1], stats.rsp[2], stats.rsp[3]);
7567         } else {
7568                 sbuf_printf(sb, "                 channel 0  channel 1");
7569                 sbuf_printf(sb, "\nCPL requests:   %10u %10u",
7570                     stats.req[0], stats.req[1]);
7571                 sbuf_printf(sb, "\nCPL responses:   %10u %10u",
7572                     stats.rsp[0], stats.rsp[1]);
7573         }
7574
7575         rc = sbuf_finish(sb);
7576         sbuf_delete(sb);
7577
7578         return (rc);
7579 }
7580
7581 static int
7582 sysctl_ddp_stats(SYSCTL_HANDLER_ARGS)
7583 {
7584         struct adapter *sc = arg1;
7585         struct sbuf *sb;
7586         int rc;
7587         struct tp_usm_stats stats;
7588
7589         rc = sysctl_wire_old_buffer(req, 0);
7590         if (rc != 0)
7591                 return(rc);
7592
7593         sb = sbuf_new_for_sysctl(NULL, NULL, 256, req);
7594         if (sb == NULL)
7595                 return (ENOMEM);
7596
7597         t4_get_usm_stats(sc, &stats, 1);
7598
7599         sbuf_printf(sb, "Frames: %u\n", stats.frames);
7600         sbuf_printf(sb, "Octets: %ju\n", stats.octets);
7601         sbuf_printf(sb, "Drops:  %u", stats.drops);
7602
7603         rc = sbuf_finish(sb);
7604         sbuf_delete(sb);
7605
7606         return (rc);
7607 }
7608
7609 static const char * const devlog_level_strings[] = {
7610         [FW_DEVLOG_LEVEL_EMERG]         = "EMERG",
7611         [FW_DEVLOG_LEVEL_CRIT]          = "CRIT",
7612         [FW_DEVLOG_LEVEL_ERR]           = "ERR",
7613         [FW_DEVLOG_LEVEL_NOTICE]        = "NOTICE",
7614         [FW_DEVLOG_LEVEL_INFO]          = "INFO",
7615         [FW_DEVLOG_LEVEL_DEBUG]         = "DEBUG"
7616 };
7617
7618 static const char * const devlog_facility_strings[] = {
7619         [FW_DEVLOG_FACILITY_CORE]       = "CORE",
7620         [FW_DEVLOG_FACILITY_CF]         = "CF",
7621         [FW_DEVLOG_FACILITY_SCHED]      = "SCHED",
7622         [FW_DEVLOG_FACILITY_TIMER]      = "TIMER",
7623         [FW_DEVLOG_FACILITY_RES]        = "RES",
7624         [FW_DEVLOG_FACILITY_HW]         = "HW",
7625         [FW_DEVLOG_FACILITY_FLR]        = "FLR",
7626         [FW_DEVLOG_FACILITY_DMAQ]       = "DMAQ",
7627         [FW_DEVLOG_FACILITY_PHY]        = "PHY",
7628         [FW_DEVLOG_FACILITY_MAC]        = "MAC",
7629         [FW_DEVLOG_FACILITY_PORT]       = "PORT",
7630         [FW_DEVLOG_FACILITY_VI]         = "VI",
7631         [FW_DEVLOG_FACILITY_FILTER]     = "FILTER",
7632         [FW_DEVLOG_FACILITY_ACL]        = "ACL",
7633         [FW_DEVLOG_FACILITY_TM]         = "TM",
7634         [FW_DEVLOG_FACILITY_QFC]        = "QFC",
7635         [FW_DEVLOG_FACILITY_DCB]        = "DCB",
7636         [FW_DEVLOG_FACILITY_ETH]        = "ETH",
7637         [FW_DEVLOG_FACILITY_OFLD]       = "OFLD",
7638         [FW_DEVLOG_FACILITY_RI]         = "RI",
7639         [FW_DEVLOG_FACILITY_ISCSI]      = "ISCSI",
7640         [FW_DEVLOG_FACILITY_FCOE]       = "FCOE",
7641         [FW_DEVLOG_FACILITY_FOISCSI]    = "FOISCSI",
7642         [FW_DEVLOG_FACILITY_FOFCOE]     = "FOFCOE",
7643         [FW_DEVLOG_FACILITY_CHNET]      = "CHNET",
7644 };
7645
7646 static int
7647 sbuf_devlog(struct adapter *sc, struct sbuf *sb, int flags)
7648 {
7649         int i, j, rc, nentries, first = 0;
7650         struct devlog_params *dparams = &sc->params.devlog;
7651         struct fw_devlog_e *buf, *e;
7652         uint64_t ftstamp = UINT64_MAX;
7653
7654         if (dparams->addr == 0)
7655                 return (ENXIO);
7656
7657         MPASS(flags == M_WAITOK || flags == M_NOWAIT);
7658         buf = malloc(dparams->size, M_CXGBE, M_ZERO | flags);
7659         if (buf == NULL)
7660                 return (ENOMEM);
7661
7662         rc = read_via_memwin(sc, 1, dparams->addr, (void *)buf, dparams->size);
7663         if (rc != 0)
7664                 goto done;
7665
7666         nentries = dparams->size / sizeof(struct fw_devlog_e);
7667         for (i = 0; i < nentries; i++) {
7668                 e = &buf[i];
7669
7670                 if (e->timestamp == 0)
7671                         break;  /* end */
7672
7673                 e->timestamp = be64toh(e->timestamp);
7674                 e->seqno = be32toh(e->seqno);
7675                 for (j = 0; j < 8; j++)
7676                         e->params[j] = be32toh(e->params[j]);
7677
7678                 if (e->timestamp < ftstamp) {
7679                         ftstamp = e->timestamp;
7680                         first = i;
7681                 }
7682         }
7683
7684         if (buf[first].timestamp == 0)
7685                 goto done;      /* nothing in the log */
7686
7687         sbuf_printf(sb, "%10s  %15s  %8s  %8s  %s\n",
7688             "Seq#", "Tstamp", "Level", "Facility", "Message");
7689
7690         i = first;
7691         do {
7692                 e = &buf[i];
7693                 if (e->timestamp == 0)
7694                         break;  /* end */
7695
7696                 sbuf_printf(sb, "%10d  %15ju  %8s  %8s  ",
7697                     e->seqno, e->timestamp,
7698                     (e->level < nitems(devlog_level_strings) ?
7699                         devlog_level_strings[e->level] : "UNKNOWN"),
7700                     (e->facility < nitems(devlog_facility_strings) ?
7701                         devlog_facility_strings[e->facility] : "UNKNOWN"));
7702                 sbuf_printf(sb, e->fmt, e->params[0], e->params[1],
7703                     e->params[2], e->params[3], e->params[4],
7704                     e->params[5], e->params[6], e->params[7]);
7705
7706                 if (++i == nentries)
7707                         i = 0;
7708         } while (i != first);
7709 done:
7710         free(buf, M_CXGBE);
7711         return (rc);
7712 }
7713
7714 static int
7715 sysctl_devlog(SYSCTL_HANDLER_ARGS)
7716 {
7717         struct adapter *sc = arg1;
7718         int rc;
7719         struct sbuf *sb;
7720
7721         rc = sysctl_wire_old_buffer(req, 0);
7722         if (rc != 0)
7723                 return (rc);
7724         sb = sbuf_new_for_sysctl(NULL, NULL, 4096, req);
7725         if (sb == NULL)
7726                 return (ENOMEM);
7727
7728         rc = sbuf_devlog(sc, sb, M_WAITOK);
7729         if (rc == 0)
7730                 rc = sbuf_finish(sb);
7731         sbuf_delete(sb);
7732         return (rc);
7733 }
7734
7735 void
7736 t4_os_dump_devlog(struct adapter *sc)
7737 {
7738         int rc;
7739         struct sbuf sb;
7740
7741         if (sbuf_new(&sb, NULL, 4096, SBUF_AUTOEXTEND) != &sb)
7742                 return;
7743         rc = sbuf_devlog(sc, &sb, M_NOWAIT);
7744         if (rc == 0) {
7745                 rc = sbuf_finish(&sb);
7746                 if (rc == 0) {
7747                         log(LOG_DEBUG, "%s: device log follows.\n%s",
7748                                 device_get_nameunit(sc->dev), sbuf_data(&sb));
7749                 }
7750         }
7751         sbuf_delete(&sb);
7752 }
7753
7754 static int
7755 sysctl_fcoe_stats(SYSCTL_HANDLER_ARGS)
7756 {
7757         struct adapter *sc = arg1;
7758         struct sbuf *sb;
7759         int rc;
7760         struct tp_fcoe_stats stats[MAX_NCHAN];
7761         int i, nchan = sc->chip_params->nchan;
7762
7763         rc = sysctl_wire_old_buffer(req, 0);
7764         if (rc != 0)
7765                 return (rc);
7766
7767         sb = sbuf_new_for_sysctl(NULL, NULL, 256, req);
7768         if (sb == NULL)
7769                 return (ENOMEM);
7770
7771         for (i = 0; i < nchan; i++)
7772                 t4_get_fcoe_stats(sc, i, &stats[i], 1);
7773
7774         if (nchan > 2) {
7775                 sbuf_printf(sb, "                   channel 0        channel 1"
7776                     "        channel 2        channel 3");
7777                 sbuf_printf(sb, "\noctetsDDP:  %16ju %16ju %16ju %16ju",
7778                     stats[0].octets_ddp, stats[1].octets_ddp,
7779                     stats[2].octets_ddp, stats[3].octets_ddp);
7780                 sbuf_printf(sb, "\nframesDDP:  %16u %16u %16u %16u",
7781                     stats[0].frames_ddp, stats[1].frames_ddp,
7782                     stats[2].frames_ddp, stats[3].frames_ddp);
7783                 sbuf_printf(sb, "\nframesDrop: %16u %16u %16u %16u",
7784                     stats[0].frames_drop, stats[1].frames_drop,
7785                     stats[2].frames_drop, stats[3].frames_drop);
7786         } else {
7787                 sbuf_printf(sb, "                   channel 0        channel 1");
7788                 sbuf_printf(sb, "\noctetsDDP:  %16ju %16ju",
7789                     stats[0].octets_ddp, stats[1].octets_ddp);
7790                 sbuf_printf(sb, "\nframesDDP:  %16u %16u",
7791                     stats[0].frames_ddp, stats[1].frames_ddp);
7792                 sbuf_printf(sb, "\nframesDrop: %16u %16u",
7793                     stats[0].frames_drop, stats[1].frames_drop);
7794         }
7795
7796         rc = sbuf_finish(sb);
7797         sbuf_delete(sb);
7798
7799         return (rc);
7800 }
7801
7802 static int
7803 sysctl_hw_sched(SYSCTL_HANDLER_ARGS)
7804 {
7805         struct adapter *sc = arg1;
7806         struct sbuf *sb;
7807         int rc, i;
7808         unsigned int map, kbps, ipg, mode;
7809         unsigned int pace_tab[NTX_SCHED];
7810
7811         rc = sysctl_wire_old_buffer(req, 0);
7812         if (rc != 0)
7813                 return (rc);
7814
7815         sb = sbuf_new_for_sysctl(NULL, NULL, 256, req);
7816         if (sb == NULL)
7817                 return (ENOMEM);
7818
7819         map = t4_read_reg(sc, A_TP_TX_MOD_QUEUE_REQ_MAP);
7820         mode = G_TIMERMODE(t4_read_reg(sc, A_TP_MOD_CONFIG));
7821         t4_read_pace_tbl(sc, pace_tab);
7822
7823         sbuf_printf(sb, "Scheduler  Mode   Channel  Rate (Kbps)   "
7824             "Class IPG (0.1 ns)   Flow IPG (us)");
7825
7826         for (i = 0; i < NTX_SCHED; ++i, map >>= 2) {
7827                 t4_get_tx_sched(sc, i, &kbps, &ipg, 1);
7828                 sbuf_printf(sb, "\n    %u      %-5s     %u     ", i,
7829                     (mode & (1 << i)) ? "flow" : "class", map & 3);
7830                 if (kbps)
7831                         sbuf_printf(sb, "%9u     ", kbps);
7832                 else
7833                         sbuf_printf(sb, " disabled     ");
7834
7835                 if (ipg)
7836                         sbuf_printf(sb, "%13u        ", ipg);
7837                 else
7838                         sbuf_printf(sb, "     disabled        ");
7839
7840                 if (pace_tab[i])
7841                         sbuf_printf(sb, "%10u", pace_tab[i]);
7842                 else
7843                         sbuf_printf(sb, "  disabled");
7844         }
7845
7846         rc = sbuf_finish(sb);
7847         sbuf_delete(sb);
7848
7849         return (rc);
7850 }
7851
7852 static int
7853 sysctl_lb_stats(SYSCTL_HANDLER_ARGS)
7854 {
7855         struct adapter *sc = arg1;
7856         struct sbuf *sb;
7857         int rc, i, j;
7858         uint64_t *p0, *p1;
7859         struct lb_port_stats s[2];
7860         static const char *stat_name[] = {
7861                 "OctetsOK:", "FramesOK:", "BcastFrames:", "McastFrames:",
7862                 "UcastFrames:", "ErrorFrames:", "Frames64:", "Frames65To127:",
7863                 "Frames128To255:", "Frames256To511:", "Frames512To1023:",
7864                 "Frames1024To1518:", "Frames1519ToMax:", "FramesDropped:",
7865                 "BG0FramesDropped:", "BG1FramesDropped:", "BG2FramesDropped:",
7866                 "BG3FramesDropped:", "BG0FramesTrunc:", "BG1FramesTrunc:",
7867                 "BG2FramesTrunc:", "BG3FramesTrunc:"
7868         };
7869
7870         rc = sysctl_wire_old_buffer(req, 0);
7871         if (rc != 0)
7872                 return (rc);
7873
7874         sb = sbuf_new_for_sysctl(NULL, NULL, 4096, req);
7875         if (sb == NULL)
7876                 return (ENOMEM);
7877
7878         memset(s, 0, sizeof(s));
7879
7880         for (i = 0; i < sc->chip_params->nchan; i += 2) {
7881                 t4_get_lb_stats(sc, i, &s[0]);
7882                 t4_get_lb_stats(sc, i + 1, &s[1]);
7883
7884                 p0 = &s[0].octets;
7885                 p1 = &s[1].octets;
7886                 sbuf_printf(sb, "%s                       Loopback %u"
7887                     "           Loopback %u", i == 0 ? "" : "\n", i, i + 1);
7888
7889                 for (j = 0; j < nitems(stat_name); j++)
7890                         sbuf_printf(sb, "\n%-17s %20ju %20ju", stat_name[j],
7891                                    *p0++, *p1++);
7892         }
7893
7894         rc = sbuf_finish(sb);
7895         sbuf_delete(sb);
7896
7897         return (rc);
7898 }
7899
7900 static int
7901 sysctl_linkdnrc(SYSCTL_HANDLER_ARGS)
7902 {
7903         int rc = 0;
7904         struct port_info *pi = arg1;
7905         struct link_config *lc = &pi->link_cfg;
7906         struct sbuf *sb;
7907
7908         rc = sysctl_wire_old_buffer(req, 0);
7909         if (rc != 0)
7910                 return(rc);
7911         sb = sbuf_new_for_sysctl(NULL, NULL, 64, req);
7912         if (sb == NULL)
7913                 return (ENOMEM);
7914
7915         if (lc->link_ok || lc->link_down_rc == 255)
7916                 sbuf_printf(sb, "n/a");
7917         else
7918                 sbuf_printf(sb, "%s", t4_link_down_rc_str(lc->link_down_rc));
7919
7920         rc = sbuf_finish(sb);
7921         sbuf_delete(sb);
7922
7923         return (rc);
7924 }
7925
7926 struct mem_desc {
7927         unsigned int base;
7928         unsigned int limit;
7929         unsigned int idx;
7930 };
7931
7932 static int
7933 mem_desc_cmp(const void *a, const void *b)
7934 {
7935         return ((const struct mem_desc *)a)->base -
7936                ((const struct mem_desc *)b)->base;
7937 }
7938
7939 static void
7940 mem_region_show(struct sbuf *sb, const char *name, unsigned int from,
7941     unsigned int to)
7942 {
7943         unsigned int size;
7944
7945         if (from == to)
7946                 return;
7947
7948         size = to - from + 1;
7949         if (size == 0)
7950                 return;
7951
7952         /* XXX: need humanize_number(3) in libkern for a more readable 'size' */
7953         sbuf_printf(sb, "%-15s %#x-%#x [%u]\n", name, from, to, size);
7954 }
7955
7956 static int
7957 sysctl_meminfo(SYSCTL_HANDLER_ARGS)
7958 {
7959         struct adapter *sc = arg1;
7960         struct sbuf *sb;
7961         int rc, i, n;
7962         uint32_t lo, hi, used, alloc;
7963         static const char *memory[] = {"EDC0:", "EDC1:", "MC:", "MC0:", "MC1:"};
7964         static const char *region[] = {
7965                 "DBQ contexts:", "IMSG contexts:", "FLM cache:", "TCBs:",
7966                 "Pstructs:", "Timers:", "Rx FL:", "Tx FL:", "Pstruct FL:",
7967                 "Tx payload:", "Rx payload:", "LE hash:", "iSCSI region:",
7968                 "TDDP region:", "TPT region:", "STAG region:", "RQ region:",
7969                 "RQUDP region:", "PBL region:", "TXPBL region:",
7970                 "DBVFIFO region:", "ULPRX state:", "ULPTX state:",
7971                 "On-chip queues:", "TLS keys:",
7972         };
7973         struct mem_desc avail[4];
7974         struct mem_desc mem[nitems(region) + 3];        /* up to 3 holes */
7975         struct mem_desc *md = mem;
7976
7977         rc = sysctl_wire_old_buffer(req, 0);
7978         if (rc != 0)
7979                 return (rc);
7980
7981         sb = sbuf_new_for_sysctl(NULL, NULL, 4096, req);
7982         if (sb == NULL)
7983                 return (ENOMEM);
7984
7985         for (i = 0; i < nitems(mem); i++) {
7986                 mem[i].limit = 0;
7987                 mem[i].idx = i;
7988         }
7989
7990         /* Find and sort the populated memory ranges */
7991         i = 0;
7992         lo = t4_read_reg(sc, A_MA_TARGET_MEM_ENABLE);
7993         if (lo & F_EDRAM0_ENABLE) {
7994                 hi = t4_read_reg(sc, A_MA_EDRAM0_BAR);
7995                 avail[i].base = G_EDRAM0_BASE(hi) << 20;
7996                 avail[i].limit = avail[i].base + (G_EDRAM0_SIZE(hi) << 20);
7997                 avail[i].idx = 0;
7998                 i++;
7999         }
8000         if (lo & F_EDRAM1_ENABLE) {
8001                 hi = t4_read_reg(sc, A_MA_EDRAM1_BAR);
8002                 avail[i].base = G_EDRAM1_BASE(hi) << 20;
8003                 avail[i].limit = avail[i].base + (G_EDRAM1_SIZE(hi) << 20);
8004                 avail[i].idx = 1;
8005                 i++;
8006         }
8007         if (lo & F_EXT_MEM_ENABLE) {
8008                 hi = t4_read_reg(sc, A_MA_EXT_MEMORY_BAR);
8009                 avail[i].base = G_EXT_MEM_BASE(hi) << 20;
8010                 avail[i].limit = avail[i].base +
8011                     (G_EXT_MEM_SIZE(hi) << 20);
8012                 avail[i].idx = is_t5(sc) ? 3 : 2;       /* Call it MC0 for T5 */
8013                 i++;
8014         }
8015         if (is_t5(sc) && lo & F_EXT_MEM1_ENABLE) {
8016                 hi = t4_read_reg(sc, A_MA_EXT_MEMORY1_BAR);
8017                 avail[i].base = G_EXT_MEM1_BASE(hi) << 20;
8018                 avail[i].limit = avail[i].base +
8019                     (G_EXT_MEM1_SIZE(hi) << 20);
8020                 avail[i].idx = 4;
8021                 i++;
8022         }
8023         if (!i)                                    /* no memory available */
8024                 return 0;
8025         qsort(avail, i, sizeof(struct mem_desc), mem_desc_cmp);
8026
8027         (md++)->base = t4_read_reg(sc, A_SGE_DBQ_CTXT_BADDR);
8028         (md++)->base = t4_read_reg(sc, A_SGE_IMSG_CTXT_BADDR);
8029         (md++)->base = t4_read_reg(sc, A_SGE_FLM_CACHE_BADDR);
8030         (md++)->base = t4_read_reg(sc, A_TP_CMM_TCB_BASE);
8031         (md++)->base = t4_read_reg(sc, A_TP_CMM_MM_BASE);
8032         (md++)->base = t4_read_reg(sc, A_TP_CMM_TIMER_BASE);
8033         (md++)->base = t4_read_reg(sc, A_TP_CMM_MM_RX_FLST_BASE);
8034         (md++)->base = t4_read_reg(sc, A_TP_CMM_MM_TX_FLST_BASE);
8035         (md++)->base = t4_read_reg(sc, A_TP_CMM_MM_PS_FLST_BASE);
8036
8037         /* the next few have explicit upper bounds */
8038         md->base = t4_read_reg(sc, A_TP_PMM_TX_BASE);
8039         md->limit = md->base - 1 +
8040                     t4_read_reg(sc, A_TP_PMM_TX_PAGE_SIZE) *
8041                     G_PMTXMAXPAGE(t4_read_reg(sc, A_TP_PMM_TX_MAX_PAGE));
8042         md++;
8043
8044         md->base = t4_read_reg(sc, A_TP_PMM_RX_BASE);
8045         md->limit = md->base - 1 +
8046                     t4_read_reg(sc, A_TP_PMM_RX_PAGE_SIZE) *
8047                     G_PMRXMAXPAGE(t4_read_reg(sc, A_TP_PMM_RX_MAX_PAGE));
8048         md++;
8049
8050         if (t4_read_reg(sc, A_LE_DB_CONFIG) & F_HASHEN) {
8051                 if (chip_id(sc) <= CHELSIO_T5)
8052                         md->base = t4_read_reg(sc, A_LE_DB_HASH_TID_BASE);
8053                 else
8054                         md->base = t4_read_reg(sc, A_LE_DB_HASH_TBL_BASE_ADDR);
8055                 md->limit = 0;
8056         } else {
8057                 md->base = 0;
8058                 md->idx = nitems(region);  /* hide it */
8059         }
8060         md++;
8061
8062 #define ulp_region(reg) \
8063         md->base = t4_read_reg(sc, A_ULP_ ## reg ## _LLIMIT);\
8064         (md++)->limit = t4_read_reg(sc, A_ULP_ ## reg ## _ULIMIT)
8065
8066         ulp_region(RX_ISCSI);
8067         ulp_region(RX_TDDP);
8068         ulp_region(TX_TPT);
8069         ulp_region(RX_STAG);
8070         ulp_region(RX_RQ);
8071         ulp_region(RX_RQUDP);
8072         ulp_region(RX_PBL);
8073         ulp_region(TX_PBL);
8074 #undef ulp_region
8075
8076         md->base = 0;
8077         md->idx = nitems(region);
8078         if (!is_t4(sc)) {
8079                 uint32_t size = 0;
8080                 uint32_t sge_ctrl = t4_read_reg(sc, A_SGE_CONTROL2);
8081                 uint32_t fifo_size = t4_read_reg(sc, A_SGE_DBVFIFO_SIZE);
8082
8083                 if (is_t5(sc)) {
8084                         if (sge_ctrl & F_VFIFO_ENABLE)
8085                                 size = G_DBVFIFO_SIZE(fifo_size);
8086                 } else
8087                         size = G_T6_DBVFIFO_SIZE(fifo_size);
8088
8089                 if (size) {
8090                         md->base = G_BASEADDR(t4_read_reg(sc,
8091                             A_SGE_DBVFIFO_BADDR));
8092                         md->limit = md->base + (size << 2) - 1;
8093                 }
8094         }
8095         md++;
8096
8097         md->base = t4_read_reg(sc, A_ULP_RX_CTX_BASE);
8098         md->limit = 0;
8099         md++;
8100         md->base = t4_read_reg(sc, A_ULP_TX_ERR_TABLE_BASE);
8101         md->limit = 0;
8102         md++;
8103
8104         md->base = sc->vres.ocq.start;
8105         if (sc->vres.ocq.size)
8106                 md->limit = md->base + sc->vres.ocq.size - 1;
8107         else
8108                 md->idx = nitems(region);  /* hide it */
8109         md++;
8110
8111         md->base = sc->vres.key.start;
8112         if (sc->vres.key.size)
8113                 md->limit = md->base + sc->vres.key.size - 1;
8114         else
8115                 md->idx = nitems(region);  /* hide it */
8116         md++;
8117
8118         /* add any address-space holes, there can be up to 3 */
8119         for (n = 0; n < i - 1; n++)
8120                 if (avail[n].limit < avail[n + 1].base)
8121                         (md++)->base = avail[n].limit;
8122         if (avail[n].limit)
8123                 (md++)->base = avail[n].limit;
8124
8125         n = md - mem;
8126         qsort(mem, n, sizeof(struct mem_desc), mem_desc_cmp);
8127
8128         for (lo = 0; lo < i; lo++)
8129                 mem_region_show(sb, memory[avail[lo].idx], avail[lo].base,
8130                                 avail[lo].limit - 1);
8131
8132         sbuf_printf(sb, "\n");
8133         for (i = 0; i < n; i++) {
8134                 if (mem[i].idx >= nitems(region))
8135                         continue;                        /* skip holes */
8136                 if (!mem[i].limit)
8137                         mem[i].limit = i < n - 1 ? mem[i + 1].base - 1 : ~0;
8138                 mem_region_show(sb, region[mem[i].idx], mem[i].base,
8139                                 mem[i].limit);
8140         }
8141
8142         sbuf_printf(sb, "\n");
8143         lo = t4_read_reg(sc, A_CIM_SDRAM_BASE_ADDR);
8144         hi = t4_read_reg(sc, A_CIM_SDRAM_ADDR_SIZE) + lo - 1;
8145         mem_region_show(sb, "uP RAM:", lo, hi);
8146
8147         lo = t4_read_reg(sc, A_CIM_EXTMEM2_BASE_ADDR);
8148         hi = t4_read_reg(sc, A_CIM_EXTMEM2_ADDR_SIZE) + lo - 1;
8149         mem_region_show(sb, "uP Extmem2:", lo, hi);
8150
8151         lo = t4_read_reg(sc, A_TP_PMM_RX_MAX_PAGE);
8152         sbuf_printf(sb, "\n%u Rx pages of size %uKiB for %u channels\n",
8153                    G_PMRXMAXPAGE(lo),
8154                    t4_read_reg(sc, A_TP_PMM_RX_PAGE_SIZE) >> 10,
8155                    (lo & F_PMRXNUMCHN) ? 2 : 1);
8156
8157         lo = t4_read_reg(sc, A_TP_PMM_TX_MAX_PAGE);
8158         hi = t4_read_reg(sc, A_TP_PMM_TX_PAGE_SIZE);
8159         sbuf_printf(sb, "%u Tx pages of size %u%ciB for %u channels\n",
8160                    G_PMTXMAXPAGE(lo),
8161                    hi >= (1 << 20) ? (hi >> 20) : (hi >> 10),
8162                    hi >= (1 << 20) ? 'M' : 'K', 1 << G_PMTXNUMCHN(lo));
8163         sbuf_printf(sb, "%u p-structs\n",
8164                    t4_read_reg(sc, A_TP_CMM_MM_MAX_PSTRUCT));
8165
8166         for (i = 0; i < 4; i++) {
8167                 if (chip_id(sc) > CHELSIO_T5)
8168                         lo = t4_read_reg(sc, A_MPS_RX_MAC_BG_PG_CNT0 + i * 4);
8169                 else
8170                         lo = t4_read_reg(sc, A_MPS_RX_PG_RSV0 + i * 4);
8171                 if (is_t5(sc)) {
8172                         used = G_T5_USED(lo);
8173                         alloc = G_T5_ALLOC(lo);
8174                 } else {
8175                         used = G_USED(lo);
8176                         alloc = G_ALLOC(lo);
8177                 }
8178                 /* For T6 these are MAC buffer groups */
8179                 sbuf_printf(sb, "\nPort %d using %u pages out of %u allocated",
8180                     i, used, alloc);
8181         }
8182         for (i = 0; i < sc->chip_params->nchan; i++) {
8183                 if (chip_id(sc) > CHELSIO_T5)
8184                         lo = t4_read_reg(sc, A_MPS_RX_LPBK_BG_PG_CNT0 + i * 4);
8185                 else
8186                         lo = t4_read_reg(sc, A_MPS_RX_PG_RSV4 + i * 4);
8187                 if (is_t5(sc)) {
8188                         used = G_T5_USED(lo);
8189                         alloc = G_T5_ALLOC(lo);
8190                 } else {
8191                         used = G_USED(lo);
8192                         alloc = G_ALLOC(lo);
8193                 }
8194                 /* For T6 these are MAC buffer groups */
8195                 sbuf_printf(sb,
8196                     "\nLoopback %d using %u pages out of %u allocated",
8197                     i, used, alloc);
8198         }
8199
8200         rc = sbuf_finish(sb);
8201         sbuf_delete(sb);
8202
8203         return (rc);
8204 }
8205
8206 static inline void
8207 tcamxy2valmask(uint64_t x, uint64_t y, uint8_t *addr, uint64_t *mask)
8208 {
8209         *mask = x | y;
8210         y = htobe64(y);
8211         memcpy(addr, (char *)&y + 2, ETHER_ADDR_LEN);
8212 }
8213
8214 static int
8215 sysctl_mps_tcam(SYSCTL_HANDLER_ARGS)
8216 {
8217         struct adapter *sc = arg1;
8218         struct sbuf *sb;
8219         int rc, i;
8220
8221         MPASS(chip_id(sc) <= CHELSIO_T5);
8222
8223         rc = sysctl_wire_old_buffer(req, 0);
8224         if (rc != 0)
8225                 return (rc);
8226
8227         sb = sbuf_new_for_sysctl(NULL, NULL, 4096, req);
8228         if (sb == NULL)
8229                 return (ENOMEM);
8230
8231         sbuf_printf(sb,
8232             "Idx  Ethernet address     Mask     Vld Ports PF"
8233             "  VF              Replication             P0 P1 P2 P3  ML");
8234         for (i = 0; i < sc->chip_params->mps_tcam_size; i++) {
8235                 uint64_t tcamx, tcamy, mask;
8236                 uint32_t cls_lo, cls_hi;
8237                 uint8_t addr[ETHER_ADDR_LEN];
8238
8239                 tcamy = t4_read_reg64(sc, MPS_CLS_TCAM_Y_L(i));
8240                 tcamx = t4_read_reg64(sc, MPS_CLS_TCAM_X_L(i));
8241                 if (tcamx & tcamy)
8242                         continue;
8243                 tcamxy2valmask(tcamx, tcamy, addr, &mask);
8244                 cls_lo = t4_read_reg(sc, MPS_CLS_SRAM_L(i));
8245                 cls_hi = t4_read_reg(sc, MPS_CLS_SRAM_H(i));
8246                 sbuf_printf(sb, "\n%3u %02x:%02x:%02x:%02x:%02x:%02x %012jx"
8247                            "  %c   %#x%4u%4d", i, addr[0], addr[1], addr[2],
8248                            addr[3], addr[4], addr[5], (uintmax_t)mask,
8249                            (cls_lo & F_SRAM_VLD) ? 'Y' : 'N',
8250                            G_PORTMAP(cls_hi), G_PF(cls_lo),
8251                            (cls_lo & F_VF_VALID) ? G_VF(cls_lo) : -1);
8252
8253                 if (cls_lo & F_REPLICATE) {
8254                         struct fw_ldst_cmd ldst_cmd;
8255
8256                         memset(&ldst_cmd, 0, sizeof(ldst_cmd));
8257                         ldst_cmd.op_to_addrspace =
8258                             htobe32(V_FW_CMD_OP(FW_LDST_CMD) |
8259                                 F_FW_CMD_REQUEST | F_FW_CMD_READ |
8260                                 V_FW_LDST_CMD_ADDRSPACE(FW_LDST_ADDRSPC_MPS));
8261                         ldst_cmd.cycles_to_len16 = htobe32(FW_LEN16(ldst_cmd));
8262                         ldst_cmd.u.mps.rplc.fid_idx =
8263                             htobe16(V_FW_LDST_CMD_FID(FW_LDST_MPS_RPLC) |
8264                                 V_FW_LDST_CMD_IDX(i));
8265
8266                         rc = begin_synchronized_op(sc, NULL, SLEEP_OK | INTR_OK,
8267                             "t4mps");
8268                         if (rc)
8269                                 break;
8270                         rc = -t4_wr_mbox(sc, sc->mbox, &ldst_cmd,
8271                             sizeof(ldst_cmd), &ldst_cmd);
8272                         end_synchronized_op(sc, 0);
8273
8274                         if (rc != 0) {
8275                                 sbuf_printf(sb, "%36d", rc);
8276                                 rc = 0;
8277                         } else {
8278                                 sbuf_printf(sb, " %08x %08x %08x %08x",
8279                                     be32toh(ldst_cmd.u.mps.rplc.rplc127_96),
8280                                     be32toh(ldst_cmd.u.mps.rplc.rplc95_64),
8281                                     be32toh(ldst_cmd.u.mps.rplc.rplc63_32),
8282                                     be32toh(ldst_cmd.u.mps.rplc.rplc31_0));
8283                         }
8284                 } else
8285                         sbuf_printf(sb, "%36s", "");
8286
8287                 sbuf_printf(sb, "%4u%3u%3u%3u %#3x", G_SRAM_PRIO0(cls_lo),
8288                     G_SRAM_PRIO1(cls_lo), G_SRAM_PRIO2(cls_lo),
8289                     G_SRAM_PRIO3(cls_lo), (cls_lo >> S_MULTILISTEN0) & 0xf);
8290         }
8291
8292         if (rc)
8293                 (void) sbuf_finish(sb);
8294         else
8295                 rc = sbuf_finish(sb);
8296         sbuf_delete(sb);
8297
8298         return (rc);
8299 }
8300
8301 static int
8302 sysctl_mps_tcam_t6(SYSCTL_HANDLER_ARGS)
8303 {
8304         struct adapter *sc = arg1;
8305         struct sbuf *sb;
8306         int rc, i;
8307
8308         MPASS(chip_id(sc) > CHELSIO_T5);
8309
8310         rc = sysctl_wire_old_buffer(req, 0);
8311         if (rc != 0)
8312                 return (rc);
8313
8314         sb = sbuf_new_for_sysctl(NULL, NULL, 4096, req);
8315         if (sb == NULL)
8316                 return (ENOMEM);
8317
8318         sbuf_printf(sb, "Idx  Ethernet address     Mask       VNI   Mask"
8319             "   IVLAN Vld DIP_Hit   Lookup  Port Vld Ports PF  VF"
8320             "                           Replication"
8321             "                                    P0 P1 P2 P3  ML\n");
8322
8323         for (i = 0; i < sc->chip_params->mps_tcam_size; i++) {
8324                 uint8_t dip_hit, vlan_vld, lookup_type, port_num;
8325                 uint16_t ivlan;
8326                 uint64_t tcamx, tcamy, val, mask;
8327                 uint32_t cls_lo, cls_hi, ctl, data2, vnix, vniy;
8328                 uint8_t addr[ETHER_ADDR_LEN];
8329
8330                 ctl = V_CTLREQID(1) | V_CTLCMDTYPE(0) | V_CTLXYBITSEL(0);
8331                 if (i < 256)
8332                         ctl |= V_CTLTCAMINDEX(i) | V_CTLTCAMSEL(0);
8333                 else
8334                         ctl |= V_CTLTCAMINDEX(i - 256) | V_CTLTCAMSEL(1);
8335                 t4_write_reg(sc, A_MPS_CLS_TCAM_DATA2_CTL, ctl);
8336                 val = t4_read_reg(sc, A_MPS_CLS_TCAM_RDATA1_REQ_ID1);
8337                 tcamy = G_DMACH(val) << 32;
8338                 tcamy |= t4_read_reg(sc, A_MPS_CLS_TCAM_RDATA0_REQ_ID1);
8339                 data2 = t4_read_reg(sc, A_MPS_CLS_TCAM_RDATA2_REQ_ID1);
8340                 lookup_type = G_DATALKPTYPE(data2);
8341                 port_num = G_DATAPORTNUM(data2);
8342                 if (lookup_type && lookup_type != M_DATALKPTYPE) {
8343                         /* Inner header VNI */
8344                         vniy = ((data2 & F_DATAVIDH2) << 23) |
8345                                        (G_DATAVIDH1(data2) << 16) | G_VIDL(val);
8346                         dip_hit = data2 & F_DATADIPHIT;
8347                         vlan_vld = 0;
8348                 } else {
8349                         vniy = 0;
8350                         dip_hit = 0;
8351                         vlan_vld = data2 & F_DATAVIDH2;
8352                         ivlan = G_VIDL(val);
8353                 }
8354
8355                 ctl |= V_CTLXYBITSEL(1);
8356                 t4_write_reg(sc, A_MPS_CLS_TCAM_DATA2_CTL, ctl);
8357                 val = t4_read_reg(sc, A_MPS_CLS_TCAM_RDATA1_REQ_ID1);
8358                 tcamx = G_DMACH(val) << 32;
8359                 tcamx |= t4_read_reg(sc, A_MPS_CLS_TCAM_RDATA0_REQ_ID1);
8360                 data2 = t4_read_reg(sc, A_MPS_CLS_TCAM_RDATA2_REQ_ID1);
8361                 if (lookup_type && lookup_type != M_DATALKPTYPE) {
8362                         /* Inner header VNI mask */
8363                         vnix = ((data2 & F_DATAVIDH2) << 23) |
8364                                (G_DATAVIDH1(data2) << 16) | G_VIDL(val);
8365                 } else
8366                         vnix = 0;
8367
8368                 if (tcamx & tcamy)
8369                         continue;
8370                 tcamxy2valmask(tcamx, tcamy, addr, &mask);
8371
8372                 cls_lo = t4_read_reg(sc, MPS_CLS_SRAM_L(i));
8373                 cls_hi = t4_read_reg(sc, MPS_CLS_SRAM_H(i));
8374
8375                 if (lookup_type && lookup_type != M_DATALKPTYPE) {
8376                         sbuf_printf(sb, "\n%3u %02x:%02x:%02x:%02x:%02x:%02x "
8377                             "%012jx %06x %06x    -    -   %3c"
8378                             "      'I'  %4x   %3c   %#x%4u%4d", i, addr[0],
8379                             addr[1], addr[2], addr[3], addr[4], addr[5],
8380                             (uintmax_t)mask, vniy, vnix, dip_hit ? 'Y' : 'N',
8381                             port_num, cls_lo & F_T6_SRAM_VLD ? 'Y' : 'N',
8382                             G_PORTMAP(cls_hi), G_T6_PF(cls_lo),
8383                             cls_lo & F_T6_VF_VALID ? G_T6_VF(cls_lo) : -1);
8384                 } else {
8385                         sbuf_printf(sb, "\n%3u %02x:%02x:%02x:%02x:%02x:%02x "
8386                             "%012jx    -       -   ", i, addr[0], addr[1],
8387                             addr[2], addr[3], addr[4], addr[5],
8388                             (uintmax_t)mask);
8389
8390                         if (vlan_vld)
8391                                 sbuf_printf(sb, "%4u   Y     ", ivlan);
8392                         else
8393                                 sbuf_printf(sb, "  -    N     ");
8394
8395                         sbuf_printf(sb, "-      %3c  %4x   %3c   %#x%4u%4d",
8396                             lookup_type ? 'I' : 'O', port_num,
8397                             cls_lo & F_T6_SRAM_VLD ? 'Y' : 'N',
8398                             G_PORTMAP(cls_hi), G_T6_PF(cls_lo),
8399                             cls_lo & F_T6_VF_VALID ? G_T6_VF(cls_lo) : -1);
8400                 }
8401
8402
8403                 if (cls_lo & F_T6_REPLICATE) {
8404                         struct fw_ldst_cmd ldst_cmd;
8405
8406                         memset(&ldst_cmd, 0, sizeof(ldst_cmd));
8407                         ldst_cmd.op_to_addrspace =
8408                             htobe32(V_FW_CMD_OP(FW_LDST_CMD) |
8409                                 F_FW_CMD_REQUEST | F_FW_CMD_READ |
8410                                 V_FW_LDST_CMD_ADDRSPACE(FW_LDST_ADDRSPC_MPS));
8411                         ldst_cmd.cycles_to_len16 = htobe32(FW_LEN16(ldst_cmd));
8412                         ldst_cmd.u.mps.rplc.fid_idx =
8413                             htobe16(V_FW_LDST_CMD_FID(FW_LDST_MPS_RPLC) |
8414                                 V_FW_LDST_CMD_IDX(i));
8415
8416                         rc = begin_synchronized_op(sc, NULL, SLEEP_OK | INTR_OK,
8417                             "t6mps");
8418                         if (rc)
8419                                 break;
8420                         rc = -t4_wr_mbox(sc, sc->mbox, &ldst_cmd,
8421                             sizeof(ldst_cmd), &ldst_cmd);
8422                         end_synchronized_op(sc, 0);
8423
8424                         if (rc != 0) {
8425                                 sbuf_printf(sb, "%72d", rc);
8426                                 rc = 0;
8427                         } else {
8428                                 sbuf_printf(sb, " %08x %08x %08x %08x"
8429                                     " %08x %08x %08x %08x",
8430                                     be32toh(ldst_cmd.u.mps.rplc.rplc255_224),
8431                                     be32toh(ldst_cmd.u.mps.rplc.rplc223_192),
8432                                     be32toh(ldst_cmd.u.mps.rplc.rplc191_160),
8433                                     be32toh(ldst_cmd.u.mps.rplc.rplc159_128),
8434                                     be32toh(ldst_cmd.u.mps.rplc.rplc127_96),
8435                                     be32toh(ldst_cmd.u.mps.rplc.rplc95_64),
8436                                     be32toh(ldst_cmd.u.mps.rplc.rplc63_32),
8437                                     be32toh(ldst_cmd.u.mps.rplc.rplc31_0));
8438                         }
8439                 } else
8440                         sbuf_printf(sb, "%72s", "");
8441
8442                 sbuf_printf(sb, "%4u%3u%3u%3u %#x",
8443                     G_T6_SRAM_PRIO0(cls_lo), G_T6_SRAM_PRIO1(cls_lo),
8444                     G_T6_SRAM_PRIO2(cls_lo), G_T6_SRAM_PRIO3(cls_lo),
8445                     (cls_lo >> S_T6_MULTILISTEN0) & 0xf);
8446         }
8447
8448         if (rc)
8449                 (void) sbuf_finish(sb);
8450         else
8451                 rc = sbuf_finish(sb);
8452         sbuf_delete(sb);
8453
8454         return (rc);
8455 }
8456
8457 static int
8458 sysctl_path_mtus(SYSCTL_HANDLER_ARGS)
8459 {
8460         struct adapter *sc = arg1;
8461         struct sbuf *sb;
8462         int rc;
8463         uint16_t mtus[NMTUS];
8464
8465         rc = sysctl_wire_old_buffer(req, 0);
8466         if (rc != 0)
8467                 return (rc);
8468
8469         sb = sbuf_new_for_sysctl(NULL, NULL, 256, req);
8470         if (sb == NULL)
8471                 return (ENOMEM);
8472
8473         t4_read_mtu_tbl(sc, mtus, NULL);
8474
8475         sbuf_printf(sb, "%u %u %u %u %u %u %u %u %u %u %u %u %u %u %u %u",
8476             mtus[0], mtus[1], mtus[2], mtus[3], mtus[4], mtus[5], mtus[6],
8477             mtus[7], mtus[8], mtus[9], mtus[10], mtus[11], mtus[12], mtus[13],
8478             mtus[14], mtus[15]);
8479
8480         rc = sbuf_finish(sb);
8481         sbuf_delete(sb);
8482
8483         return (rc);
8484 }
8485
8486 static int
8487 sysctl_pm_stats(SYSCTL_HANDLER_ARGS)
8488 {
8489         struct adapter *sc = arg1;
8490         struct sbuf *sb;
8491         int rc, i;
8492         uint32_t tx_cnt[MAX_PM_NSTATS], rx_cnt[MAX_PM_NSTATS];
8493         uint64_t tx_cyc[MAX_PM_NSTATS], rx_cyc[MAX_PM_NSTATS];
8494         static const char *tx_stats[MAX_PM_NSTATS] = {
8495                 "Read:", "Write bypass:", "Write mem:", "Bypass + mem:",
8496                 "Tx FIFO wait", NULL, "Tx latency"
8497         };
8498         static const char *rx_stats[MAX_PM_NSTATS] = {
8499                 "Read:", "Write bypass:", "Write mem:", "Flush:",
8500                 "Rx FIFO wait", NULL, "Rx latency"
8501         };
8502
8503         rc = sysctl_wire_old_buffer(req, 0);
8504         if (rc != 0)
8505                 return (rc);
8506
8507         sb = sbuf_new_for_sysctl(NULL, NULL, 256, req);
8508         if (sb == NULL)
8509                 return (ENOMEM);
8510
8511         t4_pmtx_get_stats(sc, tx_cnt, tx_cyc);
8512         t4_pmrx_get_stats(sc, rx_cnt, rx_cyc);
8513
8514         sbuf_printf(sb, "                Tx pcmds             Tx bytes");
8515         for (i = 0; i < 4; i++) {
8516                 sbuf_printf(sb, "\n%-13s %10u %20ju", tx_stats[i], tx_cnt[i],
8517                     tx_cyc[i]);
8518         }
8519
8520         sbuf_printf(sb, "\n                Rx pcmds             Rx bytes");
8521         for (i = 0; i < 4; i++) {
8522                 sbuf_printf(sb, "\n%-13s %10u %20ju", rx_stats[i], rx_cnt[i],
8523                     rx_cyc[i]);
8524         }
8525
8526         if (chip_id(sc) > CHELSIO_T5) {
8527                 sbuf_printf(sb,
8528                     "\n              Total wait      Total occupancy");
8529                 sbuf_printf(sb, "\n%-13s %10u %20ju", tx_stats[i], tx_cnt[i],
8530                     tx_cyc[i]);
8531                 sbuf_printf(sb, "\n%-13s %10u %20ju", rx_stats[i], rx_cnt[i],
8532                     rx_cyc[i]);
8533
8534                 i += 2;
8535                 MPASS(i < nitems(tx_stats));
8536
8537                 sbuf_printf(sb,
8538                     "\n                   Reads           Total wait");
8539                 sbuf_printf(sb, "\n%-13s %10u %20ju", tx_stats[i], tx_cnt[i],
8540                     tx_cyc[i]);
8541                 sbuf_printf(sb, "\n%-13s %10u %20ju", rx_stats[i], rx_cnt[i],
8542                     rx_cyc[i]);
8543         }
8544
8545         rc = sbuf_finish(sb);
8546         sbuf_delete(sb);
8547
8548         return (rc);
8549 }
8550
8551 static int
8552 sysctl_rdma_stats(SYSCTL_HANDLER_ARGS)
8553 {
8554         struct adapter *sc = arg1;
8555         struct sbuf *sb;
8556         int rc;
8557         struct tp_rdma_stats stats;
8558
8559         rc = sysctl_wire_old_buffer(req, 0);
8560         if (rc != 0)
8561                 return (rc);
8562
8563         sb = sbuf_new_for_sysctl(NULL, NULL, 256, req);
8564         if (sb == NULL)
8565                 return (ENOMEM);
8566
8567         mtx_lock(&sc->reg_lock);
8568         t4_tp_get_rdma_stats(sc, &stats, 0);
8569         mtx_unlock(&sc->reg_lock);
8570
8571         sbuf_printf(sb, "NoRQEModDefferals: %u\n", stats.rqe_dfr_mod);
8572         sbuf_printf(sb, "NoRQEPktDefferals: %u", stats.rqe_dfr_pkt);
8573
8574         rc = sbuf_finish(sb);
8575         sbuf_delete(sb);
8576
8577         return (rc);
8578 }
8579
8580 static int
8581 sysctl_tcp_stats(SYSCTL_HANDLER_ARGS)
8582 {
8583         struct adapter *sc = arg1;
8584         struct sbuf *sb;
8585         int rc;
8586         struct tp_tcp_stats v4, v6;
8587
8588         rc = sysctl_wire_old_buffer(req, 0);
8589         if (rc != 0)
8590                 return (rc);
8591
8592         sb = sbuf_new_for_sysctl(NULL, NULL, 256, req);
8593         if (sb == NULL)
8594                 return (ENOMEM);
8595
8596         mtx_lock(&sc->reg_lock);
8597         t4_tp_get_tcp_stats(sc, &v4, &v6, 0);
8598         mtx_unlock(&sc->reg_lock);
8599
8600         sbuf_printf(sb,
8601             "                                IP                 IPv6\n");
8602         sbuf_printf(sb, "OutRsts:      %20u %20u\n",
8603             v4.tcp_out_rsts, v6.tcp_out_rsts);
8604         sbuf_printf(sb, "InSegs:       %20ju %20ju\n",
8605             v4.tcp_in_segs, v6.tcp_in_segs);
8606         sbuf_printf(sb, "OutSegs:      %20ju %20ju\n",
8607             v4.tcp_out_segs, v6.tcp_out_segs);
8608         sbuf_printf(sb, "RetransSegs:  %20ju %20ju",
8609             v4.tcp_retrans_segs, v6.tcp_retrans_segs);
8610
8611         rc = sbuf_finish(sb);
8612         sbuf_delete(sb);
8613
8614         return (rc);
8615 }
8616
8617 static int
8618 sysctl_tids(SYSCTL_HANDLER_ARGS)
8619 {
8620         struct adapter *sc = arg1;
8621         struct sbuf *sb;
8622         int rc;
8623         struct tid_info *t = &sc->tids;
8624
8625         rc = sysctl_wire_old_buffer(req, 0);
8626         if (rc != 0)
8627                 return (rc);
8628
8629         sb = sbuf_new_for_sysctl(NULL, NULL, 256, req);
8630         if (sb == NULL)
8631                 return (ENOMEM);
8632
8633         if (t->natids) {
8634                 sbuf_printf(sb, "ATID range: 0-%u, in use: %u\n", t->natids - 1,
8635                     t->atids_in_use);
8636         }
8637
8638         if (t->nhpftids) {
8639                 sbuf_printf(sb, "HPFTID range: %u-%u, in use: %u\n",
8640                     t->hpftid_base, t->hpftid_end, t->hpftids_in_use);
8641         }
8642
8643         if (t->ntids) {
8644                 sbuf_printf(sb, "TID range: ");
8645                 if (t4_read_reg(sc, A_LE_DB_CONFIG) & F_HASHEN) {
8646                         uint32_t b, hb;
8647
8648                         if (chip_id(sc) <= CHELSIO_T5) {
8649                                 b = t4_read_reg(sc, A_LE_DB_SERVER_INDEX) / 4;
8650                                 hb = t4_read_reg(sc, A_LE_DB_TID_HASHBASE) / 4;
8651                         } else {
8652                                 b = t4_read_reg(sc, A_LE_DB_SRVR_START_INDEX);
8653                                 hb = t4_read_reg(sc, A_T6_LE_DB_HASH_TID_BASE);
8654                         }
8655
8656                         if (b)
8657                                 sbuf_printf(sb, "%u-%u, ", t->tid_base, b - 1);
8658                         sbuf_printf(sb, "%u-%u", hb, t->ntids - 1);
8659                 } else
8660                         sbuf_printf(sb, "%u-%u", t->tid_base, t->ntids - 1);
8661                 sbuf_printf(sb, ", in use: %u\n",
8662                     atomic_load_acq_int(&t->tids_in_use));
8663         }
8664
8665         if (t->nstids) {
8666                 sbuf_printf(sb, "STID range: %u-%u, in use: %u\n", t->stid_base,
8667                     t->stid_base + t->nstids - 1, t->stids_in_use);
8668         }
8669
8670         if (t->nftids) {
8671                 sbuf_printf(sb, "FTID range: %u-%u, in use: %u\n", t->ftid_base,
8672                     t->ftid_end, t->ftids_in_use);
8673         }
8674
8675         if (t->netids) {
8676                 sbuf_printf(sb, "ETID range: %u-%u, in use: %u\n", t->etid_base,
8677                     t->etid_base + t->netids - 1, t->etids_in_use);
8678         }
8679
8680         sbuf_printf(sb, "HW TID usage: %u IP users, %u IPv6 users",
8681             t4_read_reg(sc, A_LE_DB_ACT_CNT_IPV4),
8682             t4_read_reg(sc, A_LE_DB_ACT_CNT_IPV6));
8683
8684         rc = sbuf_finish(sb);
8685         sbuf_delete(sb);
8686
8687         return (rc);
8688 }
8689
8690 static int
8691 sysctl_tp_err_stats(SYSCTL_HANDLER_ARGS)
8692 {
8693         struct adapter *sc = arg1;
8694         struct sbuf *sb;
8695         int rc;
8696         struct tp_err_stats stats;
8697
8698         rc = sysctl_wire_old_buffer(req, 0);
8699         if (rc != 0)
8700                 return (rc);
8701
8702         sb = sbuf_new_for_sysctl(NULL, NULL, 256, req);
8703         if (sb == NULL)
8704                 return (ENOMEM);
8705
8706         mtx_lock(&sc->reg_lock);
8707         t4_tp_get_err_stats(sc, &stats, 0);
8708         mtx_unlock(&sc->reg_lock);
8709
8710         if (sc->chip_params->nchan > 2) {
8711                 sbuf_printf(sb, "                 channel 0  channel 1"
8712                     "  channel 2  channel 3\n");
8713                 sbuf_printf(sb, "macInErrs:      %10u %10u %10u %10u\n",
8714                     stats.mac_in_errs[0], stats.mac_in_errs[1],
8715                     stats.mac_in_errs[2], stats.mac_in_errs[3]);
8716                 sbuf_printf(sb, "hdrInErrs:      %10u %10u %10u %10u\n",
8717                     stats.hdr_in_errs[0], stats.hdr_in_errs[1],
8718                     stats.hdr_in_errs[2], stats.hdr_in_errs[3]);
8719                 sbuf_printf(sb, "tcpInErrs:      %10u %10u %10u %10u\n",
8720                     stats.tcp_in_errs[0], stats.tcp_in_errs[1],
8721                     stats.tcp_in_errs[2], stats.tcp_in_errs[3]);
8722                 sbuf_printf(sb, "tcp6InErrs:     %10u %10u %10u %10u\n",
8723                     stats.tcp6_in_errs[0], stats.tcp6_in_errs[1],
8724                     stats.tcp6_in_errs[2], stats.tcp6_in_errs[3]);
8725                 sbuf_printf(sb, "tnlCongDrops:   %10u %10u %10u %10u\n",
8726                     stats.tnl_cong_drops[0], stats.tnl_cong_drops[1],
8727                     stats.tnl_cong_drops[2], stats.tnl_cong_drops[3]);
8728                 sbuf_printf(sb, "tnlTxDrops:     %10u %10u %10u %10u\n",
8729                     stats.tnl_tx_drops[0], stats.tnl_tx_drops[1],
8730                     stats.tnl_tx_drops[2], stats.tnl_tx_drops[3]);
8731                 sbuf_printf(sb, "ofldVlanDrops:  %10u %10u %10u %10u\n",
8732                     stats.ofld_vlan_drops[0], stats.ofld_vlan_drops[1],
8733                     stats.ofld_vlan_drops[2], stats.ofld_vlan_drops[3]);
8734                 sbuf_printf(sb, "ofldChanDrops:  %10u %10u %10u %10u\n\n",
8735                     stats.ofld_chan_drops[0], stats.ofld_chan_drops[1],
8736                     stats.ofld_chan_drops[2], stats.ofld_chan_drops[3]);
8737         } else {
8738                 sbuf_printf(sb, "                 channel 0  channel 1\n");
8739                 sbuf_printf(sb, "macInErrs:      %10u %10u\n",
8740                     stats.mac_in_errs[0], stats.mac_in_errs[1]);
8741                 sbuf_printf(sb, "hdrInErrs:      %10u %10u\n",
8742                     stats.hdr_in_errs[0], stats.hdr_in_errs[1]);
8743                 sbuf_printf(sb, "tcpInErrs:      %10u %10u\n",
8744                     stats.tcp_in_errs[0], stats.tcp_in_errs[1]);
8745                 sbuf_printf(sb, "tcp6InErrs:     %10u %10u\n",
8746                     stats.tcp6_in_errs[0], stats.tcp6_in_errs[1]);
8747                 sbuf_printf(sb, "tnlCongDrops:   %10u %10u\n",
8748                     stats.tnl_cong_drops[0], stats.tnl_cong_drops[1]);
8749                 sbuf_printf(sb, "tnlTxDrops:     %10u %10u\n",
8750                     stats.tnl_tx_drops[0], stats.tnl_tx_drops[1]);
8751                 sbuf_printf(sb, "ofldVlanDrops:  %10u %10u\n",
8752                     stats.ofld_vlan_drops[0], stats.ofld_vlan_drops[1]);
8753                 sbuf_printf(sb, "ofldChanDrops:  %10u %10u\n\n",
8754                     stats.ofld_chan_drops[0], stats.ofld_chan_drops[1]);
8755         }
8756
8757         sbuf_printf(sb, "ofldNoNeigh:    %u\nofldCongDefer:  %u",
8758             stats.ofld_no_neigh, stats.ofld_cong_defer);
8759
8760         rc = sbuf_finish(sb);
8761         sbuf_delete(sb);
8762
8763         return (rc);
8764 }
8765
8766 static int
8767 sysctl_tp_la_mask(SYSCTL_HANDLER_ARGS)
8768 {
8769         struct adapter *sc = arg1;
8770         struct tp_params *tpp = &sc->params.tp;
8771         u_int mask;
8772         int rc;
8773
8774         mask = tpp->la_mask >> 16;
8775         rc = sysctl_handle_int(oidp, &mask, 0, req);
8776         if (rc != 0 || req->newptr == NULL)
8777                 return (rc);
8778         if (mask > 0xffff)
8779                 return (EINVAL);
8780         tpp->la_mask = mask << 16;
8781         t4_set_reg_field(sc, A_TP_DBG_LA_CONFIG, 0xffff0000U, tpp->la_mask);
8782
8783         return (0);
8784 }
8785
8786 struct field_desc {
8787         const char *name;
8788         u_int start;
8789         u_int width;
8790 };
8791
8792 static void
8793 field_desc_show(struct sbuf *sb, uint64_t v, const struct field_desc *f)
8794 {
8795         char buf[32];
8796         int line_size = 0;
8797
8798         while (f->name) {
8799                 uint64_t mask = (1ULL << f->width) - 1;
8800                 int len = snprintf(buf, sizeof(buf), "%s: %ju", f->name,
8801                     ((uintmax_t)v >> f->start) & mask);
8802
8803                 if (line_size + len >= 79) {
8804                         line_size = 8;
8805                         sbuf_printf(sb, "\n        ");
8806                 }
8807                 sbuf_printf(sb, "%s ", buf);
8808                 line_size += len + 1;
8809                 f++;
8810         }
8811         sbuf_printf(sb, "\n");
8812 }
8813
8814 static const struct field_desc tp_la0[] = {
8815         { "RcfOpCodeOut", 60, 4 },
8816         { "State", 56, 4 },
8817         { "WcfState", 52, 4 },
8818         { "RcfOpcSrcOut", 50, 2 },
8819         { "CRxError", 49, 1 },
8820         { "ERxError", 48, 1 },
8821         { "SanityFailed", 47, 1 },
8822         { "SpuriousMsg", 46, 1 },
8823         { "FlushInputMsg", 45, 1 },
8824         { "FlushInputCpl", 44, 1 },
8825         { "RssUpBit", 43, 1 },
8826         { "RssFilterHit", 42, 1 },
8827         { "Tid", 32, 10 },
8828         { "InitTcb", 31, 1 },
8829         { "LineNumber", 24, 7 },
8830         { "Emsg", 23, 1 },
8831         { "EdataOut", 22, 1 },
8832         { "Cmsg", 21, 1 },
8833         { "CdataOut", 20, 1 },
8834         { "EreadPdu", 19, 1 },
8835         { "CreadPdu", 18, 1 },
8836         { "TunnelPkt", 17, 1 },
8837         { "RcfPeerFin", 16, 1 },
8838         { "RcfReasonOut", 12, 4 },
8839         { "TxCchannel", 10, 2 },
8840         { "RcfTxChannel", 8, 2 },
8841         { "RxEchannel", 6, 2 },
8842         { "RcfRxChannel", 5, 1 },
8843         { "RcfDataOutSrdy", 4, 1 },
8844         { "RxDvld", 3, 1 },
8845         { "RxOoDvld", 2, 1 },
8846         { "RxCongestion", 1, 1 },
8847         { "TxCongestion", 0, 1 },
8848         { NULL }
8849 };
8850
8851 static const struct field_desc tp_la1[] = {
8852         { "CplCmdIn", 56, 8 },
8853         { "CplCmdOut", 48, 8 },
8854         { "ESynOut", 47, 1 },
8855         { "EAckOut", 46, 1 },
8856         { "EFinOut", 45, 1 },
8857         { "ERstOut", 44, 1 },
8858         { "SynIn", 43, 1 },
8859         { "AckIn", 42, 1 },
8860         { "FinIn", 41, 1 },
8861         { "RstIn", 40, 1 },
8862         { "DataIn", 39, 1 },
8863         { "DataInVld", 38, 1 },
8864         { "PadIn", 37, 1 },
8865         { "RxBufEmpty", 36, 1 },
8866         { "RxDdp", 35, 1 },
8867         { "RxFbCongestion", 34, 1 },
8868         { "TxFbCongestion", 33, 1 },
8869         { "TxPktSumSrdy", 32, 1 },
8870         { "RcfUlpType", 28, 4 },
8871         { "Eread", 27, 1 },
8872         { "Ebypass", 26, 1 },
8873         { "Esave", 25, 1 },
8874         { "Static0", 24, 1 },
8875         { "Cread", 23, 1 },
8876         { "Cbypass", 22, 1 },
8877         { "Csave", 21, 1 },
8878         { "CPktOut", 20, 1 },
8879         { "RxPagePoolFull", 18, 2 },
8880         { "RxLpbkPkt", 17, 1 },
8881         { "TxLpbkPkt", 16, 1 },
8882         { "RxVfValid", 15, 1 },
8883         { "SynLearned", 14, 1 },
8884         { "SetDelEntry", 13, 1 },
8885         { "SetInvEntry", 12, 1 },
8886         { "CpcmdDvld", 11, 1 },
8887         { "CpcmdSave", 10, 1 },
8888         { "RxPstructsFull", 8, 2 },
8889         { "EpcmdDvld", 7, 1 },
8890         { "EpcmdFlush", 6, 1 },
8891         { "EpcmdTrimPrefix", 5, 1 },
8892         { "EpcmdTrimPostfix", 4, 1 },
8893         { "ERssIp4Pkt", 3, 1 },
8894         { "ERssIp6Pkt", 2, 1 },
8895         { "ERssTcpUdpPkt", 1, 1 },
8896         { "ERssFceFipPkt", 0, 1 },
8897         { NULL }
8898 };
8899
8900 static const struct field_desc tp_la2[] = {
8901         { "CplCmdIn", 56, 8 },
8902         { "MpsVfVld", 55, 1 },
8903         { "MpsPf", 52, 3 },
8904         { "MpsVf", 44, 8 },
8905         { "SynIn", 43, 1 },
8906         { "AckIn", 42, 1 },
8907         { "FinIn", 41, 1 },
8908         { "RstIn", 40, 1 },
8909         { "DataIn", 39, 1 },
8910         { "DataInVld", 38, 1 },
8911         { "PadIn", 37, 1 },
8912         { "RxBufEmpty", 36, 1 },
8913         { "RxDdp", 35, 1 },
8914         { "RxFbCongestion", 34, 1 },
8915         { "TxFbCongestion", 33, 1 },
8916         { "TxPktSumSrdy", 32, 1 },
8917         { "RcfUlpType", 28, 4 },
8918         { "Eread", 27, 1 },
8919         { "Ebypass", 26, 1 },
8920         { "Esave", 25, 1 },
8921         { "Static0", 24, 1 },
8922         { "Cread", 23, 1 },
8923         { "Cbypass", 22, 1 },
8924         { "Csave", 21, 1 },
8925         { "CPktOut", 20, 1 },
8926         { "RxPagePoolFull", 18, 2 },
8927         { "RxLpbkPkt", 17, 1 },
8928         { "TxLpbkPkt", 16, 1 },
8929         { "RxVfValid", 15, 1 },
8930         { "SynLearned", 14, 1 },
8931         { "SetDelEntry", 13, 1 },
8932         { "SetInvEntry", 12, 1 },
8933         { "CpcmdDvld", 11, 1 },
8934         { "CpcmdSave", 10, 1 },
8935         { "RxPstructsFull", 8, 2 },
8936         { "EpcmdDvld", 7, 1 },
8937         { "EpcmdFlush", 6, 1 },
8938         { "EpcmdTrimPrefix", 5, 1 },
8939         { "EpcmdTrimPostfix", 4, 1 },
8940         { "ERssIp4Pkt", 3, 1 },
8941         { "ERssIp6Pkt", 2, 1 },
8942         { "ERssTcpUdpPkt", 1, 1 },
8943         { "ERssFceFipPkt", 0, 1 },
8944         { NULL }
8945 };
8946
8947 static void
8948 tp_la_show(struct sbuf *sb, uint64_t *p, int idx)
8949 {
8950
8951         field_desc_show(sb, *p, tp_la0);
8952 }
8953
8954 static void
8955 tp_la_show2(struct sbuf *sb, uint64_t *p, int idx)
8956 {
8957
8958         if (idx)
8959                 sbuf_printf(sb, "\n");
8960         field_desc_show(sb, p[0], tp_la0);
8961         if (idx < (TPLA_SIZE / 2 - 1) || p[1] != ~0ULL)
8962                 field_desc_show(sb, p[1], tp_la0);
8963 }
8964
8965 static void
8966 tp_la_show3(struct sbuf *sb, uint64_t *p, int idx)
8967 {
8968
8969         if (idx)
8970                 sbuf_printf(sb, "\n");
8971         field_desc_show(sb, p[0], tp_la0);
8972         if (idx < (TPLA_SIZE / 2 - 1) || p[1] != ~0ULL)
8973                 field_desc_show(sb, p[1], (p[0] & (1 << 17)) ? tp_la2 : tp_la1);
8974 }
8975
8976 static int
8977 sysctl_tp_la(SYSCTL_HANDLER_ARGS)
8978 {
8979         struct adapter *sc = arg1;
8980         struct sbuf *sb;
8981         uint64_t *buf, *p;
8982         int rc;
8983         u_int i, inc;
8984         void (*show_func)(struct sbuf *, uint64_t *, int);
8985
8986         rc = sysctl_wire_old_buffer(req, 0);
8987         if (rc != 0)
8988                 return (rc);
8989
8990         sb = sbuf_new_for_sysctl(NULL, NULL, 4096, req);
8991         if (sb == NULL)
8992                 return (ENOMEM);
8993
8994         buf = malloc(TPLA_SIZE * sizeof(uint64_t), M_CXGBE, M_ZERO | M_WAITOK);
8995
8996         t4_tp_read_la(sc, buf, NULL);
8997         p = buf;
8998
8999         switch (G_DBGLAMODE(t4_read_reg(sc, A_TP_DBG_LA_CONFIG))) {
9000         case 2:
9001                 inc = 2;
9002                 show_func = tp_la_show2;
9003                 break;
9004         case 3:
9005                 inc = 2;
9006                 show_func = tp_la_show3;
9007                 break;
9008         default:
9009                 inc = 1;
9010                 show_func = tp_la_show;
9011         }
9012
9013         for (i = 0; i < TPLA_SIZE / inc; i++, p += inc)
9014                 (*show_func)(sb, p, i);
9015
9016         rc = sbuf_finish(sb);
9017         sbuf_delete(sb);
9018         free(buf, M_CXGBE);
9019         return (rc);
9020 }
9021
9022 static int
9023 sysctl_tx_rate(SYSCTL_HANDLER_ARGS)
9024 {
9025         struct adapter *sc = arg1;
9026         struct sbuf *sb;
9027         int rc;
9028         u64 nrate[MAX_NCHAN], orate[MAX_NCHAN];
9029
9030         rc = sysctl_wire_old_buffer(req, 0);
9031         if (rc != 0)
9032                 return (rc);
9033
9034         sb = sbuf_new_for_sysctl(NULL, NULL, 256, req);
9035         if (sb == NULL)
9036                 return (ENOMEM);
9037
9038         t4_get_chan_txrate(sc, nrate, orate);
9039
9040         if (sc->chip_params->nchan > 2) {
9041                 sbuf_printf(sb, "              channel 0   channel 1"
9042                     "   channel 2   channel 3\n");
9043                 sbuf_printf(sb, "NIC B/s:     %10ju  %10ju  %10ju  %10ju\n",
9044                     nrate[0], nrate[1], nrate[2], nrate[3]);
9045                 sbuf_printf(sb, "Offload B/s: %10ju  %10ju  %10ju  %10ju",
9046                     orate[0], orate[1], orate[2], orate[3]);
9047         } else {
9048                 sbuf_printf(sb, "              channel 0   channel 1\n");
9049                 sbuf_printf(sb, "NIC B/s:     %10ju  %10ju\n",
9050                     nrate[0], nrate[1]);
9051                 sbuf_printf(sb, "Offload B/s: %10ju  %10ju",
9052                     orate[0], orate[1]);
9053         }
9054
9055         rc = sbuf_finish(sb);
9056         sbuf_delete(sb);
9057
9058         return (rc);
9059 }
9060
9061 static int
9062 sysctl_ulprx_la(SYSCTL_HANDLER_ARGS)
9063 {
9064         struct adapter *sc = arg1;
9065         struct sbuf *sb;
9066         uint32_t *buf, *p;
9067         int rc, i;
9068
9069         rc = sysctl_wire_old_buffer(req, 0);
9070         if (rc != 0)
9071                 return (rc);
9072
9073         sb = sbuf_new_for_sysctl(NULL, NULL, 4096, req);
9074         if (sb == NULL)
9075                 return (ENOMEM);
9076
9077         buf = malloc(ULPRX_LA_SIZE * 8 * sizeof(uint32_t), M_CXGBE,
9078             M_ZERO | M_WAITOK);
9079
9080         t4_ulprx_read_la(sc, buf);
9081         p = buf;
9082
9083         sbuf_printf(sb, "      Pcmd        Type   Message"
9084             "                Data");
9085         for (i = 0; i < ULPRX_LA_SIZE; i++, p += 8) {
9086                 sbuf_printf(sb, "\n%08x%08x  %4x  %08x  %08x%08x%08x%08x",
9087                     p[1], p[0], p[2], p[3], p[7], p[6], p[5], p[4]);
9088         }
9089
9090         rc = sbuf_finish(sb);
9091         sbuf_delete(sb);
9092         free(buf, M_CXGBE);
9093         return (rc);
9094 }
9095
9096 static int
9097 sysctl_wcwr_stats(SYSCTL_HANDLER_ARGS)
9098 {
9099         struct adapter *sc = arg1;
9100         struct sbuf *sb;
9101         int rc, v;
9102
9103         MPASS(chip_id(sc) >= CHELSIO_T5);
9104
9105         rc = sysctl_wire_old_buffer(req, 0);
9106         if (rc != 0)
9107                 return (rc);
9108
9109         sb = sbuf_new_for_sysctl(NULL, NULL, 4096, req);
9110         if (sb == NULL)
9111                 return (ENOMEM);
9112
9113         v = t4_read_reg(sc, A_SGE_STAT_CFG);
9114         if (G_STATSOURCE_T5(v) == 7) {
9115                 int mode;
9116
9117                 mode = is_t5(sc) ? G_STATMODE(v) : G_T6_STATMODE(v);
9118                 if (mode == 0) {
9119                         sbuf_printf(sb, "total %d, incomplete %d",
9120                             t4_read_reg(sc, A_SGE_STAT_TOTAL),
9121                             t4_read_reg(sc, A_SGE_STAT_MATCH));
9122                 } else if (mode == 1) {
9123                         sbuf_printf(sb, "total %d, data overflow %d",
9124                             t4_read_reg(sc, A_SGE_STAT_TOTAL),
9125                             t4_read_reg(sc, A_SGE_STAT_MATCH));
9126                 } else {
9127                         sbuf_printf(sb, "unknown mode %d", mode);
9128                 }
9129         }
9130         rc = sbuf_finish(sb);
9131         sbuf_delete(sb);
9132
9133         return (rc);
9134 }
9135
9136 static int
9137 sysctl_cpus(SYSCTL_HANDLER_ARGS)
9138 {
9139         struct adapter *sc = arg1;
9140         enum cpu_sets op = arg2;
9141         cpuset_t cpuset;
9142         struct sbuf *sb;
9143         int i, rc;
9144
9145         MPASS(op == LOCAL_CPUS || op == INTR_CPUS);
9146
9147         CPU_ZERO(&cpuset);
9148         rc = bus_get_cpus(sc->dev, op, sizeof(cpuset), &cpuset);
9149         if (rc != 0)
9150                 return (rc);
9151
9152         rc = sysctl_wire_old_buffer(req, 0);
9153         if (rc != 0)
9154                 return (rc);
9155
9156         sb = sbuf_new_for_sysctl(NULL, NULL, 4096, req);
9157         if (sb == NULL)
9158                 return (ENOMEM);
9159
9160         CPU_FOREACH(i)
9161                 sbuf_printf(sb, "%d ", i);
9162         rc = sbuf_finish(sb);
9163         sbuf_delete(sb);
9164
9165         return (rc);
9166 }
9167
9168 #ifdef TCP_OFFLOAD
9169 static int
9170 sysctl_tls_rx_ports(SYSCTL_HANDLER_ARGS)
9171 {
9172         struct adapter *sc = arg1;
9173         int *old_ports, *new_ports;
9174         int i, new_count, rc;
9175
9176         if (req->newptr == NULL && req->oldptr == NULL)
9177                 return (SYSCTL_OUT(req, NULL, imax(sc->tt.num_tls_rx_ports, 1) *
9178                     sizeof(sc->tt.tls_rx_ports[0])));
9179
9180         rc = begin_synchronized_op(sc, NULL, SLEEP_OK | INTR_OK, "t4tlsrx");
9181         if (rc)
9182                 return (rc);
9183
9184         if (sc->tt.num_tls_rx_ports == 0) {
9185                 i = -1;
9186                 rc = SYSCTL_OUT(req, &i, sizeof(i));
9187         } else
9188                 rc = SYSCTL_OUT(req, sc->tt.tls_rx_ports,
9189                     sc->tt.num_tls_rx_ports * sizeof(sc->tt.tls_rx_ports[0]));
9190         if (rc == 0 && req->newptr != NULL) {
9191                 new_count = req->newlen / sizeof(new_ports[0]);
9192                 new_ports = malloc(new_count * sizeof(new_ports[0]), M_CXGBE,
9193                     M_WAITOK);
9194                 rc = SYSCTL_IN(req, new_ports, new_count *
9195                     sizeof(new_ports[0]));
9196                 if (rc)
9197                         goto err;
9198
9199                 /* Allow setting to a single '-1' to clear the list. */
9200                 if (new_count == 1 && new_ports[0] == -1) {
9201                         ADAPTER_LOCK(sc);
9202                         old_ports = sc->tt.tls_rx_ports;
9203                         sc->tt.tls_rx_ports = NULL;
9204                         sc->tt.num_tls_rx_ports = 0;
9205                         ADAPTER_UNLOCK(sc);
9206                         free(old_ports, M_CXGBE);
9207                 } else {
9208                         for (i = 0; i < new_count; i++) {
9209                                 if (new_ports[i] < 1 ||
9210                                     new_ports[i] > IPPORT_MAX) {
9211                                         rc = EINVAL;
9212                                         goto err;
9213                                 }
9214                         }
9215
9216                         ADAPTER_LOCK(sc);
9217                         old_ports = sc->tt.tls_rx_ports;
9218                         sc->tt.tls_rx_ports = new_ports;
9219                         sc->tt.num_tls_rx_ports = new_count;
9220                         ADAPTER_UNLOCK(sc);
9221                         free(old_ports, M_CXGBE);
9222                         new_ports = NULL;
9223                 }
9224         err:
9225                 free(new_ports, M_CXGBE);
9226         }
9227         end_synchronized_op(sc, 0);
9228         return (rc);
9229 }
9230
9231 static void
9232 unit_conv(char *buf, size_t len, u_int val, u_int factor)
9233 {
9234         u_int rem = val % factor;
9235
9236         if (rem == 0)
9237                 snprintf(buf, len, "%u", val / factor);
9238         else {
9239                 while (rem % 10 == 0)
9240                         rem /= 10;
9241                 snprintf(buf, len, "%u.%u", val / factor, rem);
9242         }
9243 }
9244
9245 static int
9246 sysctl_tp_tick(SYSCTL_HANDLER_ARGS)
9247 {
9248         struct adapter *sc = arg1;
9249         char buf[16];
9250         u_int res, re;
9251         u_int cclk_ps = 1000000000 / sc->params.vpd.cclk;
9252
9253         res = t4_read_reg(sc, A_TP_TIMER_RESOLUTION);
9254         switch (arg2) {
9255         case 0:
9256                 /* timer_tick */
9257                 re = G_TIMERRESOLUTION(res);
9258                 break;
9259         case 1:
9260                 /* TCP timestamp tick */
9261                 re = G_TIMESTAMPRESOLUTION(res);
9262                 break;
9263         case 2:
9264                 /* DACK tick */
9265                 re = G_DELAYEDACKRESOLUTION(res);
9266                 break;
9267         default:
9268                 return (EDOOFUS);
9269         }
9270
9271         unit_conv(buf, sizeof(buf), (cclk_ps << re), 1000000);
9272
9273         return (sysctl_handle_string(oidp, buf, sizeof(buf), req));
9274 }
9275
9276 static int
9277 sysctl_tp_dack_timer(SYSCTL_HANDLER_ARGS)
9278 {
9279         struct adapter *sc = arg1;
9280         u_int res, dack_re, v;
9281         u_int cclk_ps = 1000000000 / sc->params.vpd.cclk;
9282
9283         res = t4_read_reg(sc, A_TP_TIMER_RESOLUTION);
9284         dack_re = G_DELAYEDACKRESOLUTION(res);
9285         v = ((cclk_ps << dack_re) / 1000000) * t4_read_reg(sc, A_TP_DACK_TIMER);
9286
9287         return (sysctl_handle_int(oidp, &v, 0, req));
9288 }
9289
9290 static int
9291 sysctl_tp_timer(SYSCTL_HANDLER_ARGS)
9292 {
9293         struct adapter *sc = arg1;
9294         int reg = arg2;
9295         u_int tre;
9296         u_long tp_tick_us, v;
9297         u_int cclk_ps = 1000000000 / sc->params.vpd.cclk;
9298
9299         MPASS(reg == A_TP_RXT_MIN || reg == A_TP_RXT_MAX ||
9300             reg == A_TP_PERS_MIN  || reg == A_TP_PERS_MAX ||
9301             reg == A_TP_KEEP_IDLE || reg == A_TP_KEEP_INTVL ||
9302             reg == A_TP_INIT_SRTT || reg == A_TP_FINWAIT2_TIMER);
9303
9304         tre = G_TIMERRESOLUTION(t4_read_reg(sc, A_TP_TIMER_RESOLUTION));
9305         tp_tick_us = (cclk_ps << tre) / 1000000;
9306
9307         if (reg == A_TP_INIT_SRTT)
9308                 v = tp_tick_us * G_INITSRTT(t4_read_reg(sc, reg));
9309         else
9310                 v = tp_tick_us * t4_read_reg(sc, reg);
9311
9312         return (sysctl_handle_long(oidp, &v, 0, req));
9313 }
9314
9315 /*
9316  * All fields in TP_SHIFT_CNT are 4b and the starting location of the field is
9317  * passed to this function.
9318  */
9319 static int
9320 sysctl_tp_shift_cnt(SYSCTL_HANDLER_ARGS)
9321 {
9322         struct adapter *sc = arg1;
9323         int idx = arg2;
9324         u_int v;
9325
9326         MPASS(idx >= 0 && idx <= 24);
9327
9328         v = (t4_read_reg(sc, A_TP_SHIFT_CNT) >> idx) & 0xf;
9329
9330         return (sysctl_handle_int(oidp, &v, 0, req));
9331 }
9332
9333 static int
9334 sysctl_tp_backoff(SYSCTL_HANDLER_ARGS)
9335 {
9336         struct adapter *sc = arg1;
9337         int idx = arg2;
9338         u_int shift, v, r;
9339
9340         MPASS(idx >= 0 && idx < 16);
9341
9342         r = A_TP_TCP_BACKOFF_REG0 + (idx & ~3);
9343         shift = (idx & 3) << 3;
9344         v = (t4_read_reg(sc, r) >> shift) & M_TIMERBACKOFFINDEX0;
9345
9346         return (sysctl_handle_int(oidp, &v, 0, req));
9347 }
9348
9349 static int
9350 sysctl_holdoff_tmr_idx_ofld(SYSCTL_HANDLER_ARGS)
9351 {
9352         struct vi_info *vi = arg1;
9353         struct adapter *sc = vi->pi->adapter;
9354         int idx, rc, i;
9355         struct sge_ofld_rxq *ofld_rxq;
9356         uint8_t v;
9357
9358         idx = vi->ofld_tmr_idx;
9359
9360         rc = sysctl_handle_int(oidp, &idx, 0, req);
9361         if (rc != 0 || req->newptr == NULL)
9362                 return (rc);
9363
9364         if (idx < 0 || idx >= SGE_NTIMERS)
9365                 return (EINVAL);
9366
9367         rc = begin_synchronized_op(sc, vi, HOLD_LOCK | SLEEP_OK | INTR_OK,
9368             "t4otmr");
9369         if (rc)
9370                 return (rc);
9371
9372         v = V_QINTR_TIMER_IDX(idx) | V_QINTR_CNT_EN(vi->ofld_pktc_idx != -1);
9373         for_each_ofld_rxq(vi, i, ofld_rxq) {
9374 #ifdef atomic_store_rel_8
9375                 atomic_store_rel_8(&ofld_rxq->iq.intr_params, v);
9376 #else
9377                 ofld_rxq->iq.intr_params = v;
9378 #endif
9379         }
9380         vi->ofld_tmr_idx = idx;
9381
9382         end_synchronized_op(sc, LOCK_HELD);
9383         return (0);
9384 }
9385
9386 static int
9387 sysctl_holdoff_pktc_idx_ofld(SYSCTL_HANDLER_ARGS)
9388 {
9389         struct vi_info *vi = arg1;
9390         struct adapter *sc = vi->pi->adapter;
9391         int idx, rc;
9392
9393         idx = vi->ofld_pktc_idx;
9394
9395         rc = sysctl_handle_int(oidp, &idx, 0, req);
9396         if (rc != 0 || req->newptr == NULL)
9397                 return (rc);
9398
9399         if (idx < -1 || idx >= SGE_NCOUNTERS)
9400                 return (EINVAL);
9401
9402         rc = begin_synchronized_op(sc, vi, HOLD_LOCK | SLEEP_OK | INTR_OK,
9403             "t4opktc");
9404         if (rc)
9405                 return (rc);
9406
9407         if (vi->flags & VI_INIT_DONE)
9408                 rc = EBUSY; /* cannot be changed once the queues are created */
9409         else
9410                 vi->ofld_pktc_idx = idx;
9411
9412         end_synchronized_op(sc, LOCK_HELD);
9413         return (rc);
9414 }
9415 #endif
9416
9417 static int
9418 get_sge_context(struct adapter *sc, struct t4_sge_context *cntxt)
9419 {
9420         int rc;
9421
9422         if (cntxt->cid > M_CTXTQID)
9423                 return (EINVAL);
9424
9425         if (cntxt->mem_id != CTXT_EGRESS && cntxt->mem_id != CTXT_INGRESS &&
9426             cntxt->mem_id != CTXT_FLM && cntxt->mem_id != CTXT_CNM)
9427                 return (EINVAL);
9428
9429         rc = begin_synchronized_op(sc, NULL, SLEEP_OK | INTR_OK, "t4ctxt");
9430         if (rc)
9431                 return (rc);
9432
9433         if (sc->flags & FW_OK) {
9434                 rc = -t4_sge_ctxt_rd(sc, sc->mbox, cntxt->cid, cntxt->mem_id,
9435                     &cntxt->data[0]);
9436                 if (rc == 0)
9437                         goto done;
9438         }
9439
9440         /*
9441          * Read via firmware failed or wasn't even attempted.  Read directly via
9442          * the backdoor.
9443          */
9444         rc = -t4_sge_ctxt_rd_bd(sc, cntxt->cid, cntxt->mem_id, &cntxt->data[0]);
9445 done:
9446         end_synchronized_op(sc, 0);
9447         return (rc);
9448 }
9449
9450 static int
9451 load_fw(struct adapter *sc, struct t4_data *fw)
9452 {
9453         int rc;
9454         uint8_t *fw_data;
9455
9456         rc = begin_synchronized_op(sc, NULL, SLEEP_OK | INTR_OK, "t4ldfw");
9457         if (rc)
9458                 return (rc);
9459
9460         /*
9461          * The firmware, with the sole exception of the memory parity error
9462          * handler, runs from memory and not flash.  It is almost always safe to
9463          * install a new firmware on a running system.  Just set bit 1 in
9464          * hw.cxgbe.dflags or dev.<nexus>.<n>.dflags first.
9465          */
9466         if (sc->flags & FULL_INIT_DONE &&
9467             (sc->debug_flags & DF_LOAD_FW_ANYTIME) == 0) {
9468                 rc = EBUSY;
9469                 goto done;
9470         }
9471
9472         fw_data = malloc(fw->len, M_CXGBE, M_WAITOK);
9473         if (fw_data == NULL) {
9474                 rc = ENOMEM;
9475                 goto done;
9476         }
9477
9478         rc = copyin(fw->data, fw_data, fw->len);
9479         if (rc == 0)
9480                 rc = -t4_load_fw(sc, fw_data, fw->len);
9481
9482         free(fw_data, M_CXGBE);
9483 done:
9484         end_synchronized_op(sc, 0);
9485         return (rc);
9486 }
9487
9488 static int
9489 load_cfg(struct adapter *sc, struct t4_data *cfg)
9490 {
9491         int rc;
9492         uint8_t *cfg_data = NULL;
9493
9494         rc = begin_synchronized_op(sc, NULL, SLEEP_OK | INTR_OK, "t4ldcf");
9495         if (rc)
9496                 return (rc);
9497
9498         if (cfg->len == 0) {
9499                 /* clear */
9500                 rc = -t4_load_cfg(sc, NULL, 0);
9501                 goto done;
9502         }
9503
9504         cfg_data = malloc(cfg->len, M_CXGBE, M_WAITOK);
9505         if (cfg_data == NULL) {
9506                 rc = ENOMEM;
9507                 goto done;
9508         }
9509
9510         rc = copyin(cfg->data, cfg_data, cfg->len);
9511         if (rc == 0)
9512                 rc = -t4_load_cfg(sc, cfg_data, cfg->len);
9513
9514         free(cfg_data, M_CXGBE);
9515 done:
9516         end_synchronized_op(sc, 0);
9517         return (rc);
9518 }
9519
9520 static int
9521 load_boot(struct adapter *sc, struct t4_bootrom *br)
9522 {
9523         int rc;
9524         uint8_t *br_data = NULL;
9525         u_int offset;
9526
9527         if (br->len > 1024 * 1024)
9528                 return (EFBIG);
9529
9530         if (br->pf_offset == 0) {
9531                 /* pfidx */
9532                 if (br->pfidx_addr > 7)
9533                         return (EINVAL);
9534                 offset = G_OFFSET(t4_read_reg(sc, PF_REG(br->pfidx_addr,
9535                     A_PCIE_PF_EXPROM_OFST)));
9536         } else if (br->pf_offset == 1) {
9537                 /* offset */
9538                 offset = G_OFFSET(br->pfidx_addr);
9539         } else {
9540                 return (EINVAL);
9541         }
9542
9543         rc = begin_synchronized_op(sc, NULL, SLEEP_OK | INTR_OK, "t4ldbr");
9544         if (rc)
9545                 return (rc);
9546
9547         if (br->len == 0) {
9548                 /* clear */
9549                 rc = -t4_load_boot(sc, NULL, offset, 0);
9550                 goto done;
9551         }
9552
9553         br_data = malloc(br->len, M_CXGBE, M_WAITOK);
9554         if (br_data == NULL) {
9555                 rc = ENOMEM;
9556                 goto done;
9557         }
9558
9559         rc = copyin(br->data, br_data, br->len);
9560         if (rc == 0)
9561                 rc = -t4_load_boot(sc, br_data, offset, br->len);
9562
9563         free(br_data, M_CXGBE);
9564 done:
9565         end_synchronized_op(sc, 0);
9566         return (rc);
9567 }
9568
9569 static int
9570 load_bootcfg(struct adapter *sc, struct t4_data *bc)
9571 {
9572         int rc;
9573         uint8_t *bc_data = NULL;
9574
9575         rc = begin_synchronized_op(sc, NULL, SLEEP_OK | INTR_OK, "t4ldcf");
9576         if (rc)
9577                 return (rc);
9578
9579         if (bc->len == 0) {
9580                 /* clear */
9581                 rc = -t4_load_bootcfg(sc, NULL, 0);
9582                 goto done;
9583         }
9584
9585         bc_data = malloc(bc->len, M_CXGBE, M_WAITOK);
9586         if (bc_data == NULL) {
9587                 rc = ENOMEM;
9588                 goto done;
9589         }
9590
9591         rc = copyin(bc->data, bc_data, bc->len);
9592         if (rc == 0)
9593                 rc = -t4_load_bootcfg(sc, bc_data, bc->len);
9594
9595         free(bc_data, M_CXGBE);
9596 done:
9597         end_synchronized_op(sc, 0);
9598         return (rc);
9599 }
9600
9601 static int
9602 cudbg_dump(struct adapter *sc, struct t4_cudbg_dump *dump)
9603 {
9604         int rc;
9605         struct cudbg_init *cudbg;
9606         void *handle, *buf;
9607
9608         /* buf is large, don't block if no memory is available */
9609         buf = malloc(dump->len, M_CXGBE, M_NOWAIT | M_ZERO);
9610         if (buf == NULL)
9611                 return (ENOMEM);
9612
9613         handle = cudbg_alloc_handle();
9614         if (handle == NULL) {
9615                 rc = ENOMEM;
9616                 goto done;
9617         }
9618
9619         cudbg = cudbg_get_init(handle);
9620         cudbg->adap = sc;
9621         cudbg->print = (cudbg_print_cb)printf;
9622
9623 #ifndef notyet
9624         device_printf(sc->dev, "%s: wr_flash %u, len %u, data %p.\n",
9625             __func__, dump->wr_flash, dump->len, dump->data);
9626 #endif
9627
9628         if (dump->wr_flash)
9629                 cudbg->use_flash = 1;
9630         MPASS(sizeof(cudbg->dbg_bitmap) == sizeof(dump->bitmap));
9631         memcpy(cudbg->dbg_bitmap, dump->bitmap, sizeof(cudbg->dbg_bitmap));
9632
9633         rc = cudbg_collect(handle, buf, &dump->len);
9634         if (rc != 0)
9635                 goto done;
9636
9637         rc = copyout(buf, dump->data, dump->len);
9638 done:
9639         cudbg_free_handle(handle);
9640         free(buf, M_CXGBE);
9641         return (rc);
9642 }
9643
9644 static void
9645 free_offload_policy(struct t4_offload_policy *op)
9646 {
9647         struct offload_rule *r;
9648         int i;
9649
9650         if (op == NULL)
9651                 return;
9652
9653         r = &op->rule[0];
9654         for (i = 0; i < op->nrules; i++, r++) {
9655                 free(r->bpf_prog.bf_insns, M_CXGBE);
9656         }
9657         free(op->rule, M_CXGBE);
9658         free(op, M_CXGBE);
9659 }
9660
9661 static int
9662 set_offload_policy(struct adapter *sc, struct t4_offload_policy *uop)
9663 {
9664         int i, rc, len;
9665         struct t4_offload_policy *op, *old;
9666         struct bpf_program *bf;
9667         const struct offload_settings *s;
9668         struct offload_rule *r;
9669         void *u;
9670
9671         if (!is_offload(sc))
9672                 return (ENODEV);
9673
9674         if (uop->nrules == 0) {
9675                 /* Delete installed policies. */
9676                 op = NULL;
9677                 goto set_policy;
9678         } if (uop->nrules > 256) { /* arbitrary */
9679                 return (E2BIG);
9680         }
9681
9682         /* Copy userspace offload policy to kernel */
9683         op = malloc(sizeof(*op), M_CXGBE, M_ZERO | M_WAITOK);
9684         op->nrules = uop->nrules;
9685         len = op->nrules * sizeof(struct offload_rule);
9686         op->rule = malloc(len, M_CXGBE, M_ZERO | M_WAITOK);
9687         rc = copyin(uop->rule, op->rule, len);
9688         if (rc) {
9689                 free(op->rule, M_CXGBE);
9690                 free(op, M_CXGBE);
9691                 return (rc);
9692         }
9693
9694         r = &op->rule[0];
9695         for (i = 0; i < op->nrules; i++, r++) {
9696
9697                 /* Validate open_type */
9698                 if (r->open_type != OPEN_TYPE_LISTEN &&
9699                     r->open_type != OPEN_TYPE_ACTIVE &&
9700                     r->open_type != OPEN_TYPE_PASSIVE &&
9701                     r->open_type != OPEN_TYPE_DONTCARE) {
9702 error:
9703                         /*
9704                          * Rules 0 to i have malloc'd filters that need to be
9705                          * freed.  Rules i+1 to nrules have userspace pointers
9706                          * and should be left alone.
9707                          */
9708                         op->nrules = i;
9709                         free_offload_policy(op);
9710                         return (rc);
9711                 }
9712
9713                 /* Validate settings */
9714                 s = &r->settings;
9715                 if ((s->offload != 0 && s->offload != 1) ||
9716                     s->cong_algo < -1 || s->cong_algo > CONG_ALG_HIGHSPEED ||
9717                     s->sched_class < -1 ||
9718                     s->sched_class >= sc->chip_params->nsched_cls) {
9719                         rc = EINVAL;
9720                         goto error;
9721                 }
9722
9723                 bf = &r->bpf_prog;
9724                 u = bf->bf_insns;       /* userspace ptr */
9725                 bf->bf_insns = NULL;
9726                 if (bf->bf_len == 0) {
9727                         /* legal, matches everything */
9728                         continue;
9729                 }
9730                 len = bf->bf_len * sizeof(*bf->bf_insns);
9731                 bf->bf_insns = malloc(len, M_CXGBE, M_ZERO | M_WAITOK);
9732                 rc = copyin(u, bf->bf_insns, len);
9733                 if (rc != 0)
9734                         goto error;
9735
9736                 if (!bpf_validate(bf->bf_insns, bf->bf_len)) {
9737                         rc = EINVAL;
9738                         goto error;
9739                 }
9740         }
9741 set_policy:
9742         rw_wlock(&sc->policy_lock);
9743         old = sc->policy;
9744         sc->policy = op;
9745         rw_wunlock(&sc->policy_lock);
9746         free_offload_policy(old);
9747
9748         return (0);
9749 }
9750
9751 #define MAX_READ_BUF_SIZE (128 * 1024)
9752 static int
9753 read_card_mem(struct adapter *sc, int win, struct t4_mem_range *mr)
9754 {
9755         uint32_t addr, remaining, n;
9756         uint32_t *buf;
9757         int rc;
9758         uint8_t *dst;
9759
9760         rc = validate_mem_range(sc, mr->addr, mr->len);
9761         if (rc != 0)
9762                 return (rc);
9763
9764         buf = malloc(min(mr->len, MAX_READ_BUF_SIZE), M_CXGBE, M_WAITOK);
9765         addr = mr->addr;
9766         remaining = mr->len;
9767         dst = (void *)mr->data;
9768
9769         while (remaining) {
9770                 n = min(remaining, MAX_READ_BUF_SIZE);
9771                 read_via_memwin(sc, 2, addr, buf, n);
9772
9773                 rc = copyout(buf, dst, n);
9774                 if (rc != 0)
9775                         break;
9776
9777                 dst += n;
9778                 remaining -= n;
9779                 addr += n;
9780         }
9781
9782         free(buf, M_CXGBE);
9783         return (rc);
9784 }
9785 #undef MAX_READ_BUF_SIZE
9786
9787 static int
9788 read_i2c(struct adapter *sc, struct t4_i2c_data *i2cd)
9789 {
9790         int rc;
9791
9792         if (i2cd->len == 0 || i2cd->port_id >= sc->params.nports)
9793                 return (EINVAL);
9794
9795         if (i2cd->len > sizeof(i2cd->data))
9796                 return (EFBIG);
9797
9798         rc = begin_synchronized_op(sc, NULL, SLEEP_OK | INTR_OK, "t4i2crd");
9799         if (rc)
9800                 return (rc);
9801         rc = -t4_i2c_rd(sc, sc->mbox, i2cd->port_id, i2cd->dev_addr,
9802             i2cd->offset, i2cd->len, &i2cd->data[0]);
9803         end_synchronized_op(sc, 0);
9804
9805         return (rc);
9806 }
9807
9808 int
9809 t4_os_find_pci_capability(struct adapter *sc, int cap)
9810 {
9811         int i;
9812
9813         return (pci_find_cap(sc->dev, cap, &i) == 0 ? i : 0);
9814 }
9815
9816 int
9817 t4_os_pci_save_state(struct adapter *sc)
9818 {
9819         device_t dev;
9820         struct pci_devinfo *dinfo;
9821
9822         dev = sc->dev;
9823         dinfo = device_get_ivars(dev);
9824
9825         pci_cfg_save(dev, dinfo, 0);
9826         return (0);
9827 }
9828
9829 int
9830 t4_os_pci_restore_state(struct adapter *sc)
9831 {
9832         device_t dev;
9833         struct pci_devinfo *dinfo;
9834
9835         dev = sc->dev;
9836         dinfo = device_get_ivars(dev);
9837
9838         pci_cfg_restore(dev, dinfo);
9839         return (0);
9840 }
9841
9842 void
9843 t4_os_portmod_changed(struct port_info *pi)
9844 {
9845         struct adapter *sc = pi->adapter;
9846         struct vi_info *vi;
9847         struct ifnet *ifp;
9848         static const char *mod_str[] = {
9849                 NULL, "LR", "SR", "ER", "TWINAX", "active TWINAX", "LRM"
9850         };
9851
9852         KASSERT((pi->flags & FIXED_IFMEDIA) == 0,
9853             ("%s: port_type %u", __func__, pi->port_type));
9854
9855         vi = &pi->vi[0];
9856         if (begin_synchronized_op(sc, vi, HOLD_LOCK, "t4mod") == 0) {
9857                 PORT_LOCK(pi);
9858                 build_medialist(pi);
9859                 if (pi->mod_type != FW_PORT_MOD_TYPE_NONE) {
9860                         fixup_link_config(pi);
9861                         apply_link_config(pi);
9862                 }
9863                 PORT_UNLOCK(pi);
9864                 end_synchronized_op(sc, LOCK_HELD);
9865         }
9866
9867         ifp = vi->ifp;
9868         if (pi->mod_type == FW_PORT_MOD_TYPE_NONE)
9869                 if_printf(ifp, "transceiver unplugged.\n");
9870         else if (pi->mod_type == FW_PORT_MOD_TYPE_UNKNOWN)
9871                 if_printf(ifp, "unknown transceiver inserted.\n");
9872         else if (pi->mod_type == FW_PORT_MOD_TYPE_NOTSUPPORTED)
9873                 if_printf(ifp, "unsupported transceiver inserted.\n");
9874         else if (pi->mod_type > 0 && pi->mod_type < nitems(mod_str)) {
9875                 if_printf(ifp, "%dGbps %s transceiver inserted.\n",
9876                     port_top_speed(pi), mod_str[pi->mod_type]);
9877         } else {
9878                 if_printf(ifp, "transceiver (type %d) inserted.\n",
9879                     pi->mod_type);
9880         }
9881 }
9882
9883 void
9884 t4_os_link_changed(struct port_info *pi)
9885 {
9886         struct vi_info *vi;
9887         struct ifnet *ifp;
9888         struct link_config *lc;
9889         int v;
9890
9891         PORT_LOCK_ASSERT_OWNED(pi);
9892
9893         for_each_vi(pi, v, vi) {
9894                 ifp = vi->ifp;
9895                 if (ifp == NULL)
9896                         continue;
9897
9898                 lc = &pi->link_cfg;
9899                 if (lc->link_ok) {
9900                         ifp->if_baudrate = IF_Mbps(lc->speed);
9901                         if_link_state_change(ifp, LINK_STATE_UP);
9902                 } else {
9903                         if_link_state_change(ifp, LINK_STATE_DOWN);
9904                 }
9905         }
9906 }
9907
9908 void
9909 t4_iterate(void (*func)(struct adapter *, void *), void *arg)
9910 {
9911         struct adapter *sc;
9912
9913         sx_slock(&t4_list_lock);
9914         SLIST_FOREACH(sc, &t4_list, link) {
9915                 /*
9916                  * func should not make any assumptions about what state sc is
9917                  * in - the only guarantee is that sc->sc_lock is a valid lock.
9918                  */
9919                 func(sc, arg);
9920         }
9921         sx_sunlock(&t4_list_lock);
9922 }
9923
9924 static int
9925 t4_ioctl(struct cdev *dev, unsigned long cmd, caddr_t data, int fflag,
9926     struct thread *td)
9927 {
9928         int rc;
9929         struct adapter *sc = dev->si_drv1;
9930
9931         rc = priv_check(td, PRIV_DRIVER);
9932         if (rc != 0)
9933                 return (rc);
9934
9935         switch (cmd) {
9936         case CHELSIO_T4_GETREG: {
9937                 struct t4_reg *edata = (struct t4_reg *)data;
9938
9939                 if ((edata->addr & 0x3) != 0 || edata->addr >= sc->mmio_len)
9940                         return (EFAULT);
9941
9942                 if (edata->size == 4)
9943                         edata->val = t4_read_reg(sc, edata->addr);
9944                 else if (edata->size == 8)
9945                         edata->val = t4_read_reg64(sc, edata->addr);
9946                 else
9947                         return (EINVAL);
9948
9949                 break;
9950         }
9951         case CHELSIO_T4_SETREG: {
9952                 struct t4_reg *edata = (struct t4_reg *)data;
9953
9954                 if ((edata->addr & 0x3) != 0 || edata->addr >= sc->mmio_len)
9955                         return (EFAULT);
9956
9957                 if (edata->size == 4) {
9958                         if (edata->val & 0xffffffff00000000)
9959                                 return (EINVAL);
9960                         t4_write_reg(sc, edata->addr, (uint32_t) edata->val);
9961                 } else if (edata->size == 8)
9962                         t4_write_reg64(sc, edata->addr, edata->val);
9963                 else
9964                         return (EINVAL);
9965                 break;
9966         }
9967         case CHELSIO_T4_REGDUMP: {
9968                 struct t4_regdump *regs = (struct t4_regdump *)data;
9969                 int reglen = t4_get_regs_len(sc);
9970                 uint8_t *buf;
9971
9972                 if (regs->len < reglen) {
9973                         regs->len = reglen; /* hint to the caller */
9974                         return (ENOBUFS);
9975                 }
9976
9977                 regs->len = reglen;
9978                 buf = malloc(reglen, M_CXGBE, M_WAITOK | M_ZERO);
9979                 get_regs(sc, regs, buf);
9980                 rc = copyout(buf, regs->data, reglen);
9981                 free(buf, M_CXGBE);
9982                 break;
9983         }
9984         case CHELSIO_T4_GET_FILTER_MODE:
9985                 rc = get_filter_mode(sc, (uint32_t *)data);
9986                 break;
9987         case CHELSIO_T4_SET_FILTER_MODE:
9988                 rc = set_filter_mode(sc, *(uint32_t *)data);
9989                 break;
9990         case CHELSIO_T4_GET_FILTER:
9991                 rc = get_filter(sc, (struct t4_filter *)data);
9992                 break;
9993         case CHELSIO_T4_SET_FILTER:
9994                 rc = set_filter(sc, (struct t4_filter *)data);
9995                 break;
9996         case CHELSIO_T4_DEL_FILTER:
9997                 rc = del_filter(sc, (struct t4_filter *)data);
9998                 break;
9999         case CHELSIO_T4_GET_SGE_CONTEXT:
10000                 rc = get_sge_context(sc, (struct t4_sge_context *)data);
10001                 break;
10002         case CHELSIO_T4_LOAD_FW:
10003                 rc = load_fw(sc, (struct t4_data *)data);
10004                 break;
10005         case CHELSIO_T4_GET_MEM:
10006                 rc = read_card_mem(sc, 2, (struct t4_mem_range *)data);
10007                 break;
10008         case CHELSIO_T4_GET_I2C:
10009                 rc = read_i2c(sc, (struct t4_i2c_data *)data);
10010                 break;
10011         case CHELSIO_T4_CLEAR_STATS: {
10012                 int i, v, bg_map;
10013                 u_int port_id = *(uint32_t *)data;
10014                 struct port_info *pi;
10015                 struct vi_info *vi;
10016
10017                 if (port_id >= sc->params.nports)
10018                         return (EINVAL);
10019                 pi = sc->port[port_id];
10020                 if (pi == NULL)
10021                         return (EIO);
10022
10023                 /* MAC stats */
10024                 t4_clr_port_stats(sc, pi->tx_chan);
10025                 pi->tx_parse_error = 0;
10026                 pi->tnl_cong_drops = 0;
10027                 mtx_lock(&sc->reg_lock);
10028                 for_each_vi(pi, v, vi) {
10029                         if (vi->flags & VI_INIT_DONE)
10030                                 t4_clr_vi_stats(sc, vi->viid);
10031                 }
10032                 bg_map = pi->mps_bg_map;
10033                 v = 0;  /* reuse */
10034                 while (bg_map) {
10035                         i = ffs(bg_map) - 1;
10036                         t4_write_indirect(sc, A_TP_MIB_INDEX, A_TP_MIB_DATA, &v,
10037                             1, A_TP_MIB_TNL_CNG_DROP_0 + i);
10038                         bg_map &= ~(1 << i);
10039                 }
10040                 mtx_unlock(&sc->reg_lock);
10041
10042                 /*
10043                  * Since this command accepts a port, clear stats for
10044                  * all VIs on this port.
10045                  */
10046                 for_each_vi(pi, v, vi) {
10047                         if (vi->flags & VI_INIT_DONE) {
10048                                 struct sge_rxq *rxq;
10049                                 struct sge_txq *txq;
10050                                 struct sge_wrq *wrq;
10051
10052                                 for_each_rxq(vi, i, rxq) {
10053 #if defined(INET) || defined(INET6)
10054                                         rxq->lro.lro_queued = 0;
10055                                         rxq->lro.lro_flushed = 0;
10056 #endif
10057                                         rxq->rxcsum = 0;
10058                                         rxq->vlan_extraction = 0;
10059                                 }
10060
10061                                 for_each_txq(vi, i, txq) {
10062                                         txq->txcsum = 0;
10063                                         txq->tso_wrs = 0;
10064                                         txq->vlan_insertion = 0;
10065                                         txq->imm_wrs = 0;
10066                                         txq->sgl_wrs = 0;
10067                                         txq->txpkt_wrs = 0;
10068                                         txq->txpkts0_wrs = 0;
10069                                         txq->txpkts1_wrs = 0;
10070                                         txq->txpkts0_pkts = 0;
10071                                         txq->txpkts1_pkts = 0;
10072                                         txq->raw_wrs = 0;
10073                                         mp_ring_reset_stats(txq->r);
10074                                 }
10075
10076 #if defined(TCP_OFFLOAD) || defined(RATELIMIT)
10077                                 /* nothing to clear for each ofld_rxq */
10078
10079                                 for_each_ofld_txq(vi, i, wrq) {
10080                                         wrq->tx_wrs_direct = 0;
10081                                         wrq->tx_wrs_copied = 0;
10082                                 }
10083 #endif
10084
10085                                 if (IS_MAIN_VI(vi)) {
10086                                         wrq = &sc->sge.ctrlq[pi->port_id];
10087                                         wrq->tx_wrs_direct = 0;
10088                                         wrq->tx_wrs_copied = 0;
10089                                 }
10090                         }
10091                 }
10092                 break;
10093         }
10094         case CHELSIO_T4_SCHED_CLASS:
10095                 rc = t4_set_sched_class(sc, (struct t4_sched_params *)data);
10096                 break;
10097         case CHELSIO_T4_SCHED_QUEUE:
10098                 rc = t4_set_sched_queue(sc, (struct t4_sched_queue *)data);
10099                 break;
10100         case CHELSIO_T4_GET_TRACER:
10101                 rc = t4_get_tracer(sc, (struct t4_tracer *)data);
10102                 break;
10103         case CHELSIO_T4_SET_TRACER:
10104                 rc = t4_set_tracer(sc, (struct t4_tracer *)data);
10105                 break;
10106         case CHELSIO_T4_LOAD_CFG:
10107                 rc = load_cfg(sc, (struct t4_data *)data);
10108                 break;
10109         case CHELSIO_T4_LOAD_BOOT:
10110                 rc = load_boot(sc, (struct t4_bootrom *)data);
10111                 break;
10112         case CHELSIO_T4_LOAD_BOOTCFG:
10113                 rc = load_bootcfg(sc, (struct t4_data *)data);
10114                 break;
10115         case CHELSIO_T4_CUDBG_DUMP:
10116                 rc = cudbg_dump(sc, (struct t4_cudbg_dump *)data);
10117                 break;
10118         case CHELSIO_T4_SET_OFLD_POLICY:
10119                 rc = set_offload_policy(sc, (struct t4_offload_policy *)data);
10120                 break;
10121         default:
10122                 rc = ENOTTY;
10123         }
10124
10125         return (rc);
10126 }
10127
10128 #ifdef TCP_OFFLOAD
10129 static int
10130 toe_capability(struct vi_info *vi, int enable)
10131 {
10132         int rc;
10133         struct port_info *pi = vi->pi;
10134         struct adapter *sc = pi->adapter;
10135
10136         ASSERT_SYNCHRONIZED_OP(sc);
10137
10138         if (!is_offload(sc))
10139                 return (ENODEV);
10140
10141         if (enable) {
10142                 if ((vi->ifp->if_capenable & IFCAP_TOE) != 0) {
10143                         /* TOE is already enabled. */
10144                         return (0);
10145                 }
10146
10147                 /*
10148                  * We need the port's queues around so that we're able to send
10149                  * and receive CPLs to/from the TOE even if the ifnet for this
10150                  * port has never been UP'd administratively.
10151                  */
10152                 if (!(vi->flags & VI_INIT_DONE)) {
10153                         rc = vi_full_init(vi);
10154                         if (rc)
10155                                 return (rc);
10156                 }
10157                 if (!(pi->vi[0].flags & VI_INIT_DONE)) {
10158                         rc = vi_full_init(&pi->vi[0]);
10159                         if (rc)
10160                                 return (rc);
10161                 }
10162
10163                 if (isset(&sc->offload_map, pi->port_id)) {
10164                         /* TOE is enabled on another VI of this port. */
10165                         pi->uld_vis++;
10166                         return (0);
10167                 }
10168
10169                 if (!uld_active(sc, ULD_TOM)) {
10170                         rc = t4_activate_uld(sc, ULD_TOM);
10171                         if (rc == EAGAIN) {
10172                                 log(LOG_WARNING,
10173                                     "You must kldload t4_tom.ko before trying "
10174                                     "to enable TOE on a cxgbe interface.\n");
10175                         }
10176                         if (rc != 0)
10177                                 return (rc);
10178                         KASSERT(sc->tom_softc != NULL,
10179                             ("%s: TOM activated but softc NULL", __func__));
10180                         KASSERT(uld_active(sc, ULD_TOM),
10181                             ("%s: TOM activated but flag not set", __func__));
10182                 }
10183
10184                 /* Activate iWARP and iSCSI too, if the modules are loaded. */
10185                 if (!uld_active(sc, ULD_IWARP))
10186                         (void) t4_activate_uld(sc, ULD_IWARP);
10187                 if (!uld_active(sc, ULD_ISCSI))
10188                         (void) t4_activate_uld(sc, ULD_ISCSI);
10189
10190                 pi->uld_vis++;
10191                 setbit(&sc->offload_map, pi->port_id);
10192         } else {
10193                 pi->uld_vis--;
10194
10195                 if (!isset(&sc->offload_map, pi->port_id) || pi->uld_vis > 0)
10196                         return (0);
10197
10198                 KASSERT(uld_active(sc, ULD_TOM),
10199                     ("%s: TOM never initialized?", __func__));
10200                 clrbit(&sc->offload_map, pi->port_id);
10201         }
10202
10203         return (0);
10204 }
10205
10206 /*
10207  * Add an upper layer driver to the global list.
10208  */
10209 int
10210 t4_register_uld(struct uld_info *ui)
10211 {
10212         int rc = 0;
10213         struct uld_info *u;
10214
10215         sx_xlock(&t4_uld_list_lock);
10216         SLIST_FOREACH(u, &t4_uld_list, link) {
10217             if (u->uld_id == ui->uld_id) {
10218                     rc = EEXIST;
10219                     goto done;
10220             }
10221         }
10222
10223         SLIST_INSERT_HEAD(&t4_uld_list, ui, link);
10224         ui->refcount = 0;
10225 done:
10226         sx_xunlock(&t4_uld_list_lock);
10227         return (rc);
10228 }
10229
10230 int
10231 t4_unregister_uld(struct uld_info *ui)
10232 {
10233         int rc = EINVAL;
10234         struct uld_info *u;
10235
10236         sx_xlock(&t4_uld_list_lock);
10237
10238         SLIST_FOREACH(u, &t4_uld_list, link) {
10239             if (u == ui) {
10240                     if (ui->refcount > 0) {
10241                             rc = EBUSY;
10242                             goto done;
10243                     }
10244
10245                     SLIST_REMOVE(&t4_uld_list, ui, uld_info, link);
10246                     rc = 0;
10247                     goto done;
10248             }
10249         }
10250 done:
10251         sx_xunlock(&t4_uld_list_lock);
10252         return (rc);
10253 }
10254
10255 int
10256 t4_activate_uld(struct adapter *sc, int id)
10257 {
10258         int rc;
10259         struct uld_info *ui;
10260
10261         ASSERT_SYNCHRONIZED_OP(sc);
10262
10263         if (id < 0 || id > ULD_MAX)
10264                 return (EINVAL);
10265         rc = EAGAIN;    /* kldoad the module with this ULD and try again. */
10266
10267         sx_slock(&t4_uld_list_lock);
10268
10269         SLIST_FOREACH(ui, &t4_uld_list, link) {
10270                 if (ui->uld_id == id) {
10271                         if (!(sc->flags & FULL_INIT_DONE)) {
10272                                 rc = adapter_full_init(sc);
10273                                 if (rc != 0)
10274                                         break;
10275                         }
10276
10277                         rc = ui->activate(sc);
10278                         if (rc == 0) {
10279                                 setbit(&sc->active_ulds, id);
10280                                 ui->refcount++;
10281                         }
10282                         break;
10283                 }
10284         }
10285
10286         sx_sunlock(&t4_uld_list_lock);
10287
10288         return (rc);
10289 }
10290
10291 int
10292 t4_deactivate_uld(struct adapter *sc, int id)
10293 {
10294         int rc;
10295         struct uld_info *ui;
10296
10297         ASSERT_SYNCHRONIZED_OP(sc);
10298
10299         if (id < 0 || id > ULD_MAX)
10300                 return (EINVAL);
10301         rc = ENXIO;
10302
10303         sx_slock(&t4_uld_list_lock);
10304
10305         SLIST_FOREACH(ui, &t4_uld_list, link) {
10306                 if (ui->uld_id == id) {
10307                         rc = ui->deactivate(sc);
10308                         if (rc == 0) {
10309                                 clrbit(&sc->active_ulds, id);
10310                                 ui->refcount--;
10311                         }
10312                         break;
10313                 }
10314         }
10315
10316         sx_sunlock(&t4_uld_list_lock);
10317
10318         return (rc);
10319 }
10320
10321 int
10322 uld_active(struct adapter *sc, int uld_id)
10323 {
10324
10325         MPASS(uld_id >= 0 && uld_id <= ULD_MAX);
10326
10327         return (isset(&sc->active_ulds, uld_id));
10328 }
10329 #endif
10330
10331 /*
10332  * t  = ptr to tunable.
10333  * nc = number of CPUs.
10334  * c  = compiled in default for that tunable.
10335  */
10336 static void
10337 calculate_nqueues(int *t, int nc, const int c)
10338 {
10339         int nq;
10340
10341         if (*t > 0)
10342                 return;
10343         nq = *t < 0 ? -*t : c;
10344         *t = min(nc, nq);
10345 }
10346
10347 /*
10348  * Come up with reasonable defaults for some of the tunables, provided they're
10349  * not set by the user (in which case we'll use the values as is).
10350  */
10351 static void
10352 tweak_tunables(void)
10353 {
10354         int nc = mp_ncpus;      /* our snapshot of the number of CPUs */
10355
10356         if (t4_ntxq < 1) {
10357 #ifdef RSS
10358                 t4_ntxq = rss_getnumbuckets();
10359 #else
10360                 calculate_nqueues(&t4_ntxq, nc, NTXQ);
10361 #endif
10362         }
10363
10364         calculate_nqueues(&t4_ntxq_vi, nc, NTXQ_VI);
10365
10366         if (t4_nrxq < 1) {
10367 #ifdef RSS
10368                 t4_nrxq = rss_getnumbuckets();
10369 #else
10370                 calculate_nqueues(&t4_nrxq, nc, NRXQ);
10371 #endif
10372         }
10373
10374         calculate_nqueues(&t4_nrxq_vi, nc, NRXQ_VI);
10375
10376 #if defined(TCP_OFFLOAD) || defined(RATELIMIT)
10377         calculate_nqueues(&t4_nofldtxq, nc, NOFLDTXQ);
10378         calculate_nqueues(&t4_nofldtxq_vi, nc, NOFLDTXQ_VI);
10379 #endif
10380 #ifdef TCP_OFFLOAD
10381         calculate_nqueues(&t4_nofldrxq, nc, NOFLDRXQ);
10382         calculate_nqueues(&t4_nofldrxq_vi, nc, NOFLDRXQ_VI);
10383
10384         if (t4_toecaps_allowed == -1)
10385                 t4_toecaps_allowed = FW_CAPS_CONFIG_TOE;
10386
10387         if (t4_rdmacaps_allowed == -1) {
10388                 t4_rdmacaps_allowed = FW_CAPS_CONFIG_RDMA_RDDP |
10389                     FW_CAPS_CONFIG_RDMA_RDMAC;
10390         }
10391
10392         if (t4_iscsicaps_allowed == -1) {
10393                 t4_iscsicaps_allowed = FW_CAPS_CONFIG_ISCSI_INITIATOR_PDU |
10394                     FW_CAPS_CONFIG_ISCSI_TARGET_PDU |
10395                     FW_CAPS_CONFIG_ISCSI_T10DIF;
10396         }
10397
10398         if (t4_tmr_idx_ofld < 0 || t4_tmr_idx_ofld >= SGE_NTIMERS)
10399                 t4_tmr_idx_ofld = TMR_IDX_OFLD;
10400
10401         if (t4_pktc_idx_ofld < -1 || t4_pktc_idx_ofld >= SGE_NCOUNTERS)
10402                 t4_pktc_idx_ofld = PKTC_IDX_OFLD;
10403 #else
10404         if (t4_toecaps_allowed == -1)
10405                 t4_toecaps_allowed = 0;
10406
10407         if (t4_rdmacaps_allowed == -1)
10408                 t4_rdmacaps_allowed = 0;
10409
10410         if (t4_iscsicaps_allowed == -1)
10411                 t4_iscsicaps_allowed = 0;
10412 #endif
10413
10414 #ifdef DEV_NETMAP
10415         calculate_nqueues(&t4_nnmtxq_vi, nc, NNMTXQ_VI);
10416         calculate_nqueues(&t4_nnmrxq_vi, nc, NNMRXQ_VI);
10417 #endif
10418
10419         if (t4_tmr_idx < 0 || t4_tmr_idx >= SGE_NTIMERS)
10420                 t4_tmr_idx = TMR_IDX;
10421
10422         if (t4_pktc_idx < -1 || t4_pktc_idx >= SGE_NCOUNTERS)
10423                 t4_pktc_idx = PKTC_IDX;
10424
10425         if (t4_qsize_txq < 128)
10426                 t4_qsize_txq = 128;
10427
10428         if (t4_qsize_rxq < 128)
10429                 t4_qsize_rxq = 128;
10430         while (t4_qsize_rxq & 7)
10431                 t4_qsize_rxq++;
10432
10433         t4_intr_types &= INTR_MSIX | INTR_MSI | INTR_INTX;
10434
10435         /*
10436          * Number of VIs to create per-port.  The first VI is the "main" regular
10437          * VI for the port.  The rest are additional virtual interfaces on the
10438          * same physical port.  Note that the main VI does not have native
10439          * netmap support but the extra VIs do.
10440          *
10441          * Limit the number of VIs per port to the number of available
10442          * MAC addresses per port.
10443          */
10444         if (t4_num_vis < 1)
10445                 t4_num_vis = 1;
10446         if (t4_num_vis > nitems(vi_mac_funcs)) {
10447                 t4_num_vis = nitems(vi_mac_funcs);
10448                 printf("cxgbe: number of VIs limited to %d\n", t4_num_vis);
10449         }
10450
10451         if (pcie_relaxed_ordering < 0 || pcie_relaxed_ordering > 2) {
10452                 pcie_relaxed_ordering = 1;
10453 #if defined(__i386__) || defined(__amd64__)
10454                 if (cpu_vendor_id == CPU_VENDOR_INTEL)
10455                         pcie_relaxed_ordering = 0;
10456 #endif
10457         }
10458 }
10459
10460 #ifdef DDB
10461 static void
10462 t4_dump_tcb(struct adapter *sc, int tid)
10463 {
10464         uint32_t base, i, j, off, pf, reg, save, tcb_addr, win_pos;
10465
10466         reg = PCIE_MEM_ACCESS_REG(A_PCIE_MEM_ACCESS_OFFSET, 2);
10467         save = t4_read_reg(sc, reg);
10468         base = sc->memwin[2].mw_base;
10469
10470         /* Dump TCB for the tid */
10471         tcb_addr = t4_read_reg(sc, A_TP_CMM_TCB_BASE);
10472         tcb_addr += tid * TCB_SIZE;
10473
10474         if (is_t4(sc)) {
10475                 pf = 0;
10476                 win_pos = tcb_addr & ~0xf;      /* start must be 16B aligned */
10477         } else {
10478                 pf = V_PFNUM(sc->pf);
10479                 win_pos = tcb_addr & ~0x7f;     /* start must be 128B aligned */
10480         }
10481         t4_write_reg(sc, reg, win_pos | pf);
10482         t4_read_reg(sc, reg);
10483
10484         off = tcb_addr - win_pos;
10485         for (i = 0; i < 4; i++) {
10486                 uint32_t buf[8];
10487                 for (j = 0; j < 8; j++, off += 4)
10488                         buf[j] = htonl(t4_read_reg(sc, base + off));
10489
10490                 db_printf("%08x %08x %08x %08x %08x %08x %08x %08x\n",
10491                     buf[0], buf[1], buf[2], buf[3], buf[4], buf[5], buf[6],
10492                     buf[7]);
10493         }
10494
10495         t4_write_reg(sc, reg, save);
10496         t4_read_reg(sc, reg);
10497 }
10498
10499 static void
10500 t4_dump_devlog(struct adapter *sc)
10501 {
10502         struct devlog_params *dparams = &sc->params.devlog;
10503         struct fw_devlog_e e;
10504         int i, first, j, m, nentries, rc;
10505         uint64_t ftstamp = UINT64_MAX;
10506
10507         if (dparams->start == 0) {
10508                 db_printf("devlog params not valid\n");
10509                 return;
10510         }
10511
10512         nentries = dparams->size / sizeof(struct fw_devlog_e);
10513         m = fwmtype_to_hwmtype(dparams->memtype);
10514
10515         /* Find the first entry. */
10516         first = -1;
10517         for (i = 0; i < nentries && !db_pager_quit; i++) {
10518                 rc = -t4_mem_read(sc, m, dparams->start + i * sizeof(e),
10519                     sizeof(e), (void *)&e);
10520                 if (rc != 0)
10521                         break;
10522
10523                 if (e.timestamp == 0)
10524                         break;
10525
10526                 e.timestamp = be64toh(e.timestamp);
10527                 if (e.timestamp < ftstamp) {
10528                         ftstamp = e.timestamp;
10529                         first = i;
10530                 }
10531         }
10532
10533         if (first == -1)
10534                 return;
10535
10536         i = first;
10537         do {
10538                 rc = -t4_mem_read(sc, m, dparams->start + i * sizeof(e),
10539                     sizeof(e), (void *)&e);
10540                 if (rc != 0)
10541                         return;
10542
10543                 if (e.timestamp == 0)
10544                         return;
10545
10546                 e.timestamp = be64toh(e.timestamp);
10547                 e.seqno = be32toh(e.seqno);
10548                 for (j = 0; j < 8; j++)
10549                         e.params[j] = be32toh(e.params[j]);
10550
10551                 db_printf("%10d  %15ju  %8s  %8s  ",
10552                     e.seqno, e.timestamp,
10553                     (e.level < nitems(devlog_level_strings) ?
10554                         devlog_level_strings[e.level] : "UNKNOWN"),
10555                     (e.facility < nitems(devlog_facility_strings) ?
10556                         devlog_facility_strings[e.facility] : "UNKNOWN"));
10557                 db_printf(e.fmt, e.params[0], e.params[1], e.params[2],
10558                     e.params[3], e.params[4], e.params[5], e.params[6],
10559                     e.params[7]);
10560
10561                 if (++i == nentries)
10562                         i = 0;
10563         } while (i != first && !db_pager_quit);
10564 }
10565
10566 static struct command_table db_t4_table = LIST_HEAD_INITIALIZER(db_t4_table);
10567 _DB_SET(_show, t4, NULL, db_show_table, 0, &db_t4_table);
10568
10569 DB_FUNC(devlog, db_show_devlog, db_t4_table, CS_OWN, NULL)
10570 {
10571         device_t dev;
10572         int t;
10573         bool valid;
10574
10575         valid = false;
10576         t = db_read_token();
10577         if (t == tIDENT) {
10578                 dev = device_lookup_by_name(db_tok_string);
10579                 valid = true;
10580         }
10581         db_skip_to_eol();
10582         if (!valid) {
10583                 db_printf("usage: show t4 devlog <nexus>\n");
10584                 return;
10585         }
10586
10587         if (dev == NULL) {
10588                 db_printf("device not found\n");
10589                 return;
10590         }
10591
10592         t4_dump_devlog(device_get_softc(dev));
10593 }
10594
10595 DB_FUNC(tcb, db_show_t4tcb, db_t4_table, CS_OWN, NULL)
10596 {
10597         device_t dev;
10598         int radix, tid, t;
10599         bool valid;
10600
10601         valid = false;
10602         radix = db_radix;
10603         db_radix = 10;
10604         t = db_read_token();
10605         if (t == tIDENT) {
10606                 dev = device_lookup_by_name(db_tok_string);
10607                 t = db_read_token();
10608                 if (t == tNUMBER) {
10609                         tid = db_tok_number;
10610                         valid = true;
10611                 }
10612         }       
10613         db_radix = radix;
10614         db_skip_to_eol();
10615         if (!valid) {
10616                 db_printf("usage: show t4 tcb <nexus> <tid>\n");
10617                 return;
10618         }
10619
10620         if (dev == NULL) {
10621                 db_printf("device not found\n");
10622                 return;
10623         }
10624         if (tid < 0) {
10625                 db_printf("invalid tid\n");
10626                 return;
10627         }
10628
10629         t4_dump_tcb(device_get_softc(dev), tid);
10630 }
10631 #endif
10632
10633 /*
10634  * Borrowed from cesa_prep_aes_key().
10635  *
10636  * NB: The crypto engine wants the words in the decryption key in reverse
10637  * order.
10638  */
10639 void
10640 t4_aes_getdeckey(void *dec_key, const void *enc_key, unsigned int kbits)
10641 {
10642         uint32_t ek[4 * (RIJNDAEL_MAXNR + 1)];
10643         uint32_t *dkey;
10644         int i;
10645
10646         rijndaelKeySetupEnc(ek, enc_key, kbits);
10647         dkey = dec_key;
10648         dkey += (kbits / 8) / 4;
10649
10650         switch (kbits) {
10651         case 128:
10652                 for (i = 0; i < 4; i++)
10653                         *--dkey = htobe32(ek[4 * 10 + i]);
10654                 break;
10655         case 192:
10656                 for (i = 0; i < 2; i++)
10657                         *--dkey = htobe32(ek[4 * 11 + 2 + i]);
10658                 for (i = 0; i < 4; i++)
10659                         *--dkey = htobe32(ek[4 * 12 + i]);
10660                 break;
10661         case 256:
10662                 for (i = 0; i < 4; i++)
10663                         *--dkey = htobe32(ek[4 * 13 + i]);
10664                 for (i = 0; i < 4; i++)
10665                         *--dkey = htobe32(ek[4 * 14 + i]);
10666                 break;
10667         }
10668         MPASS(dkey == dec_key);
10669 }
10670
10671 static struct sx mlu;   /* mod load unload */
10672 SX_SYSINIT(cxgbe_mlu, &mlu, "cxgbe mod load/unload");
10673
10674 static int
10675 mod_event(module_t mod, int cmd, void *arg)
10676 {
10677         int rc = 0;
10678         static int loaded = 0;
10679
10680         switch (cmd) {
10681         case MOD_LOAD:
10682                 sx_xlock(&mlu);
10683                 if (loaded++ == 0) {
10684                         t4_sge_modload();
10685                         t4_register_shared_cpl_handler(CPL_SET_TCB_RPL,
10686                             t4_filter_rpl, CPL_COOKIE_FILTER);
10687                         t4_register_shared_cpl_handler(CPL_L2T_WRITE_RPL,
10688                             do_l2t_write_rpl, CPL_COOKIE_FILTER);
10689                         t4_register_shared_cpl_handler(CPL_ACT_OPEN_RPL,
10690                             t4_hashfilter_ao_rpl, CPL_COOKIE_HASHFILTER);
10691                         t4_register_shared_cpl_handler(CPL_SET_TCB_RPL,
10692                             t4_hashfilter_tcb_rpl, CPL_COOKIE_HASHFILTER);
10693                         t4_register_shared_cpl_handler(CPL_ABORT_RPL_RSS,
10694                             t4_del_hashfilter_rpl, CPL_COOKIE_HASHFILTER);
10695                         t4_register_cpl_handler(CPL_TRACE_PKT, t4_trace_pkt);
10696                         t4_register_cpl_handler(CPL_T5_TRACE_PKT, t5_trace_pkt);
10697                         t4_register_cpl_handler(CPL_SMT_WRITE_RPL,
10698                             do_smt_write_rpl);
10699                         sx_init(&t4_list_lock, "T4/T5 adapters");
10700                         SLIST_INIT(&t4_list);
10701                         callout_init(&fatal_callout, 1);
10702 #ifdef TCP_OFFLOAD
10703                         sx_init(&t4_uld_list_lock, "T4/T5 ULDs");
10704                         SLIST_INIT(&t4_uld_list);
10705 #endif
10706 #ifdef INET6
10707                         t4_clip_modload();
10708 #endif
10709                         t4_tracer_modload();
10710                         tweak_tunables();
10711                 }
10712                 sx_xunlock(&mlu);
10713                 break;
10714
10715         case MOD_UNLOAD:
10716                 sx_xlock(&mlu);
10717                 if (--loaded == 0) {
10718                         int tries;
10719
10720                         sx_slock(&t4_list_lock);
10721                         if (!SLIST_EMPTY(&t4_list)) {
10722                                 rc = EBUSY;
10723                                 sx_sunlock(&t4_list_lock);
10724                                 goto done_unload;
10725                         }
10726 #ifdef TCP_OFFLOAD
10727                         sx_slock(&t4_uld_list_lock);
10728                         if (!SLIST_EMPTY(&t4_uld_list)) {
10729                                 rc = EBUSY;
10730                                 sx_sunlock(&t4_uld_list_lock);
10731                                 sx_sunlock(&t4_list_lock);
10732                                 goto done_unload;
10733                         }
10734 #endif
10735                         tries = 0;
10736                         while (tries++ < 5 && t4_sge_extfree_refs() != 0) {
10737                                 uprintf("%ju clusters with custom free routine "
10738                                     "still is use.\n", t4_sge_extfree_refs());
10739                                 pause("t4unload", 2 * hz);
10740                         }
10741 #ifdef TCP_OFFLOAD
10742                         sx_sunlock(&t4_uld_list_lock);
10743 #endif
10744                         sx_sunlock(&t4_list_lock);
10745
10746                         if (t4_sge_extfree_refs() == 0) {
10747                                 t4_tracer_modunload();
10748 #ifdef INET6
10749                                 t4_clip_modunload();
10750 #endif
10751 #ifdef TCP_OFFLOAD
10752                                 sx_destroy(&t4_uld_list_lock);
10753 #endif
10754                                 sx_destroy(&t4_list_lock);
10755                                 t4_sge_modunload();
10756                                 loaded = 0;
10757                         } else {
10758                                 rc = EBUSY;
10759                                 loaded++;       /* undo earlier decrement */
10760                         }
10761                 }
10762 done_unload:
10763                 sx_xunlock(&mlu);
10764                 break;
10765         }
10766
10767         return (rc);
10768 }
10769
10770 static devclass_t t4_devclass, t5_devclass, t6_devclass;
10771 static devclass_t cxgbe_devclass, cxl_devclass, cc_devclass;
10772 static devclass_t vcxgbe_devclass, vcxl_devclass, vcc_devclass;
10773
10774 DRIVER_MODULE(t4nex, pci, t4_driver, t4_devclass, mod_event, 0);
10775 MODULE_VERSION(t4nex, 1);
10776 MODULE_DEPEND(t4nex, firmware, 1, 1, 1);
10777 #ifdef DEV_NETMAP
10778 MODULE_DEPEND(t4nex, netmap, 1, 1, 1);
10779 #endif /* DEV_NETMAP */
10780
10781 DRIVER_MODULE(t5nex, pci, t5_driver, t5_devclass, mod_event, 0);
10782 MODULE_VERSION(t5nex, 1);
10783 MODULE_DEPEND(t5nex, firmware, 1, 1, 1);
10784 #ifdef DEV_NETMAP
10785 MODULE_DEPEND(t5nex, netmap, 1, 1, 1);
10786 #endif /* DEV_NETMAP */
10787
10788 DRIVER_MODULE(t6nex, pci, t6_driver, t6_devclass, mod_event, 0);
10789 MODULE_VERSION(t6nex, 1);
10790 MODULE_DEPEND(t6nex, firmware, 1, 1, 1);
10791 #ifdef DEV_NETMAP
10792 MODULE_DEPEND(t6nex, netmap, 1, 1, 1);
10793 #endif /* DEV_NETMAP */
10794
10795 DRIVER_MODULE(cxgbe, t4nex, cxgbe_driver, cxgbe_devclass, 0, 0);
10796 MODULE_VERSION(cxgbe, 1);
10797
10798 DRIVER_MODULE(cxl, t5nex, cxl_driver, cxl_devclass, 0, 0);
10799 MODULE_VERSION(cxl, 1);
10800
10801 DRIVER_MODULE(cc, t6nex, cc_driver, cc_devclass, 0, 0);
10802 MODULE_VERSION(cc, 1);
10803
10804 DRIVER_MODULE(vcxgbe, cxgbe, vcxgbe_driver, vcxgbe_devclass, 0, 0);
10805 MODULE_VERSION(vcxgbe, 1);
10806
10807 DRIVER_MODULE(vcxl, cxl, vcxl_driver, vcxl_devclass, 0, 0);
10808 MODULE_VERSION(vcxl, 1);
10809
10810 DRIVER_MODULE(vcc, cc, vcc_driver, vcc_devclass, 0, 0);
10811 MODULE_VERSION(vcc, 1);