]> CyberLeo.Net >> Repos - FreeBSD/FreeBSD.git/blob - sys/dev/cxgbe/t4_netmap.c
cxgbe ddp: Trim stale function prototype
[FreeBSD/FreeBSD.git] / sys / dev / cxgbe / t4_netmap.c
1 /*-
2  * Copyright (c) 2014 Chelsio Communications, Inc.
3  * All rights reserved.
4  * Written by: Navdeep Parhar <np@FreeBSD.org>
5  *
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted provided that the following conditions
8  * are met:
9  * 1. Redistributions of source code must retain the above copyright
10  *    notice, this list of conditions and the following disclaimer.
11  * 2. Redistributions in binary form must reproduce the above copyright
12  *    notice, this list of conditions and the following disclaimer in the
13  *    documentation and/or other materials provided with the distribution.
14  *
15  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
16  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
18  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
19  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
21  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
22  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
23  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
24  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
25  * SUCH DAMAGE.
26  */
27
28 #include <sys/cdefs.h>
29 #include "opt_inet.h"
30 #include "opt_inet6.h"
31
32 #ifdef DEV_NETMAP
33 #include <sys/param.h>
34 #include <sys/bus.h>
35 #include <sys/eventhandler.h>
36 #include <sys/lock.h>
37 #include <sys/mbuf.h>
38 #include <sys/module.h>
39 #include <sys/selinfo.h>
40 #include <sys/socket.h>
41 #include <sys/sockio.h>
42 #include <machine/bus.h>
43 #include <net/ethernet.h>
44 #include <net/if.h>
45 #include <net/if_media.h>
46 #include <net/if_var.h>
47 #include <net/if_clone.h>
48 #include <net/if_types.h>
49 #include <net/netmap.h>
50 #include <dev/netmap/netmap_kern.h>
51
52 #include "common/common.h"
53 #include "common/t4_regs.h"
54 #include "common/t4_regs_values.h"
55
56 extern int fl_pad;      /* XXXNM */
57
58 /*
59  * 0 = normal netmap rx
60  * 1 = black hole
61  * 2 = supermassive black hole (buffer packing enabled)
62  */
63 int black_hole = 0;
64 SYSCTL_INT(_hw_cxgbe, OID_AUTO, nm_black_hole, CTLFLAG_RWTUN, &black_hole, 0,
65     "Sink incoming packets.");
66
67 int rx_ndesc = 256;
68 SYSCTL_INT(_hw_cxgbe, OID_AUTO, nm_rx_ndesc, CTLFLAG_RWTUN,
69     &rx_ndesc, 0, "# of rx descriptors after which the hw cidx is updated.");
70
71 int rx_nframes = 64;
72 SYSCTL_INT(_hw_cxgbe, OID_AUTO, nm_rx_nframes, CTLFLAG_RWTUN,
73     &rx_nframes, 0, "max # of frames received before waking up netmap rx.");
74
75 int holdoff_tmr_idx = 2;
76 SYSCTL_INT(_hw_cxgbe, OID_AUTO, nm_holdoff_tmr_idx, CTLFLAG_RWTUN,
77     &holdoff_tmr_idx, 0, "Holdoff timer index for netmap rx queues.");
78
79 /*
80  * Congestion drops.
81  * -1: no congestion feedback (not recommended).
82  *  0: backpressure the channel instead of dropping packets right away.
83  *  1: no backpressure, drop packets for the congested queue immediately.
84  */
85 static int nm_cong_drop = 1;
86 SYSCTL_INT(_hw_cxgbe, OID_AUTO, nm_cong_drop, CTLFLAG_RWTUN,
87     &nm_cong_drop, 0,
88     "Congestion control for netmap rx queues (0 = backpressure, 1 = drop");
89
90 int starve_fl = 0;
91 SYSCTL_INT(_hw_cxgbe, OID_AUTO, starve_fl, CTLFLAG_RWTUN,
92     &starve_fl, 0, "Don't ring fl db for netmap rx queues.");
93
94 /*
95  * Try to process tx credits in bulk.  This may cause a delay in the return of
96  * tx credits and is suitable for bursty or non-stop tx only.
97  */
98 int lazy_tx_credit_flush = 1;
99 SYSCTL_INT(_hw_cxgbe, OID_AUTO, lazy_tx_credit_flush, CTLFLAG_RWTUN,
100     &lazy_tx_credit_flush, 0, "lazy credit flush for netmap tx queues.");
101
102 /*
103  * Split the netmap rx queues into two groups that populate separate halves of
104  * the RSS indirection table.  This allows filters with hashmask to steer to a
105  * particular group of queues.
106  */
107 static int nm_split_rss = 0;
108 SYSCTL_INT(_hw_cxgbe, OID_AUTO, nm_split_rss, CTLFLAG_RWTUN,
109     &nm_split_rss, 0, "Split the netmap rx queues into two groups.");
110
111 /*
112  * netmap(4) says "netmap does not use features such as checksum offloading, TCP
113  * segmentation offloading, encryption, VLAN encapsulation/decapsulation, etc."
114  * but this knob can be used to get the hardware to checksum all tx traffic
115  * anyway.
116  */
117 static int nm_txcsum = 0;
118 SYSCTL_INT(_hw_cxgbe, OID_AUTO, nm_txcsum, CTLFLAG_RWTUN,
119     &nm_txcsum, 0, "Enable transmit checksum offloading.");
120
121 static int free_nm_rxq_hwq(struct vi_info *, struct sge_nm_rxq *);
122 static int free_nm_txq_hwq(struct vi_info *, struct sge_nm_txq *);
123
124 int
125 alloc_nm_rxq(struct vi_info *vi, struct sge_nm_rxq *nm_rxq, int intr_idx,
126     int idx)
127 {
128         int rc;
129         struct sysctl_oid *oid;
130         struct sysctl_oid_list *children;
131         struct sysctl_ctx_list *ctx;
132         char name[16];
133         size_t len;
134         struct adapter *sc = vi->adapter;
135         struct netmap_adapter *na = NA(vi->ifp);
136
137         MPASS(na != NULL);
138
139         len = vi->qsize_rxq * IQ_ESIZE;
140         rc = alloc_ring(sc, len, &nm_rxq->iq_desc_tag, &nm_rxq->iq_desc_map,
141             &nm_rxq->iq_ba, (void **)&nm_rxq->iq_desc);
142         if (rc != 0)
143                 return (rc);
144
145         len = na->num_rx_desc * EQ_ESIZE + sc->params.sge.spg_len;
146         rc = alloc_ring(sc, len, &nm_rxq->fl_desc_tag, &nm_rxq->fl_desc_map,
147             &nm_rxq->fl_ba, (void **)&nm_rxq->fl_desc);
148         if (rc != 0)
149                 return (rc);
150
151         nm_rxq->vi = vi;
152         nm_rxq->nid = idx;
153         nm_rxq->iq_cidx = 0;
154         nm_rxq->iq_sidx = vi->qsize_rxq - sc->params.sge.spg_len / IQ_ESIZE;
155         nm_rxq->iq_gen = F_RSPD_GEN;
156         nm_rxq->fl_pidx = nm_rxq->fl_cidx = 0;
157         nm_rxq->fl_sidx = na->num_rx_desc;
158         nm_rxq->fl_sidx2 = nm_rxq->fl_sidx;     /* copy for rxsync cacheline */
159         nm_rxq->intr_idx = intr_idx;
160         nm_rxq->iq_cntxt_id = INVALID_NM_RXQ_CNTXT_ID;
161
162         ctx = &vi->ctx;
163         children = SYSCTL_CHILDREN(vi->nm_rxq_oid);
164
165         snprintf(name, sizeof(name), "%d", idx);
166         oid = SYSCTL_ADD_NODE(ctx, children, OID_AUTO, name,
167             CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, "rx queue");
168         children = SYSCTL_CHILDREN(oid);
169
170         SYSCTL_ADD_U16(ctx, children, OID_AUTO, "abs_id", CTLFLAG_RD,
171             &nm_rxq->iq_abs_id, 0, "absolute id of the queue");
172         SYSCTL_ADD_U16(ctx, children, OID_AUTO, "cntxt_id", CTLFLAG_RD,
173             &nm_rxq->iq_cntxt_id, 0, "SGE context id of the queue");
174         SYSCTL_ADD_U16(ctx, children, OID_AUTO, "cidx", CTLFLAG_RD,
175             &nm_rxq->iq_cidx, 0, "consumer index");
176
177         children = SYSCTL_CHILDREN(oid);
178         oid = SYSCTL_ADD_NODE(ctx, children, OID_AUTO, "fl",
179             CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, "freelist");
180         children = SYSCTL_CHILDREN(oid);
181
182         SYSCTL_ADD_U16(ctx, children, OID_AUTO, "cntxt_id", CTLFLAG_RD,
183             &nm_rxq->fl_cntxt_id, 0, "SGE context id of the freelist");
184         SYSCTL_ADD_UINT(ctx, children, OID_AUTO, "cidx", CTLFLAG_RD,
185             &nm_rxq->fl_cidx, 0, "consumer index");
186         SYSCTL_ADD_UINT(ctx, children, OID_AUTO, "pidx", CTLFLAG_RD,
187             &nm_rxq->fl_pidx, 0, "producer index");
188
189         return (rc);
190 }
191
192 int
193 free_nm_rxq(struct vi_info *vi, struct sge_nm_rxq *nm_rxq)
194 {
195         struct adapter *sc = vi->adapter;
196
197         if (!(vi->flags & VI_INIT_DONE))
198                 return (0);
199
200         if (nm_rxq->iq_cntxt_id != INVALID_NM_RXQ_CNTXT_ID)
201                 free_nm_rxq_hwq(vi, nm_rxq);
202         MPASS(nm_rxq->iq_cntxt_id == INVALID_NM_RXQ_CNTXT_ID);
203
204         free_ring(sc, nm_rxq->iq_desc_tag, nm_rxq->iq_desc_map, nm_rxq->iq_ba,
205             nm_rxq->iq_desc);
206         free_ring(sc, nm_rxq->fl_desc_tag, nm_rxq->fl_desc_map, nm_rxq->fl_ba,
207             nm_rxq->fl_desc);
208
209         return (0);
210 }
211
212 int
213 alloc_nm_txq(struct vi_info *vi, struct sge_nm_txq *nm_txq, int iqidx, int idx)
214 {
215         int rc;
216         size_t len;
217         struct port_info *pi = vi->pi;
218         struct adapter *sc = pi->adapter;
219         struct netmap_adapter *na = NA(vi->ifp);
220         char name[16];
221         struct sysctl_oid *oid;
222         struct sysctl_oid_list *children = SYSCTL_CHILDREN(vi->nm_txq_oid);
223
224         len = na->num_tx_desc * EQ_ESIZE + sc->params.sge.spg_len;
225         rc = alloc_ring(sc, len, &nm_txq->desc_tag, &nm_txq->desc_map,
226             &nm_txq->ba, (void **)&nm_txq->desc);
227         if (rc)
228                 return (rc);
229
230         nm_txq->pidx = nm_txq->cidx = 0;
231         nm_txq->sidx = na->num_tx_desc;
232         nm_txq->nid = idx;
233         nm_txq->iqidx = iqidx;
234         nm_txq->cpl_ctrl0 = htobe32(V_TXPKT_OPCODE(CPL_TX_PKT) |
235             V_TXPKT_INTF(pi->tx_chan) | V_TXPKT_PF(sc->pf) |
236             V_TXPKT_VF(vi->vin) | V_TXPKT_VF_VLD(vi->vfvld));
237         if (sc->params.fw_vers >= FW_VERSION32(1, 24, 11, 0))
238                 nm_txq->op_pkd = htobe32(V_FW_WR_OP(FW_ETH_TX_PKTS2_WR));
239         else
240                 nm_txq->op_pkd = htobe32(V_FW_WR_OP(FW_ETH_TX_PKTS_WR));
241         nm_txq->cntxt_id = INVALID_NM_TXQ_CNTXT_ID;
242
243         snprintf(name, sizeof(name), "%d", idx);
244         oid = SYSCTL_ADD_NODE(&vi->ctx, children, OID_AUTO, name,
245             CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, "netmap tx queue");
246         children = SYSCTL_CHILDREN(oid);
247
248         SYSCTL_ADD_UINT(&vi->ctx, children, OID_AUTO, "cntxt_id", CTLFLAG_RD,
249             &nm_txq->cntxt_id, 0, "SGE context id of the queue");
250         SYSCTL_ADD_U16(&vi->ctx, children, OID_AUTO, "cidx", CTLFLAG_RD,
251             &nm_txq->cidx, 0, "consumer index");
252         SYSCTL_ADD_U16(&vi->ctx, children, OID_AUTO, "pidx", CTLFLAG_RD,
253             &nm_txq->pidx, 0, "producer index");
254
255         return (rc);
256 }
257
258 int
259 free_nm_txq(struct vi_info *vi, struct sge_nm_txq *nm_txq)
260 {
261         struct adapter *sc = vi->adapter;
262
263         if (!(vi->flags & VI_INIT_DONE))
264                 return (0);
265
266         if (nm_txq->cntxt_id != INVALID_NM_TXQ_CNTXT_ID)
267                 free_nm_txq_hwq(vi, nm_txq);
268         MPASS(nm_txq->cntxt_id == INVALID_NM_TXQ_CNTXT_ID);
269
270         free_ring(sc, nm_txq->desc_tag, nm_txq->desc_map, nm_txq->ba,
271             nm_txq->desc);
272
273         return (0);
274 }
275
276 static int
277 alloc_nm_rxq_hwq(struct vi_info *vi, struct sge_nm_rxq *nm_rxq)
278 {
279         int rc, cntxt_id;
280         __be32 v;
281         struct adapter *sc = vi->adapter;
282         struct port_info *pi = vi->pi;
283         struct sge_params *sp = &sc->params.sge;
284         struct netmap_adapter *na = NA(vi->ifp);
285         struct fw_iq_cmd c;
286         const int cong_drop = nm_cong_drop;
287         const int cong_map = pi->rx_e_chan_map;
288
289         MPASS(na != NULL);
290         MPASS(nm_rxq->iq_desc != NULL);
291         MPASS(nm_rxq->fl_desc != NULL);
292
293         bzero(nm_rxq->iq_desc, vi->qsize_rxq * IQ_ESIZE);
294         bzero(nm_rxq->fl_desc, na->num_rx_desc * EQ_ESIZE + sp->spg_len);
295
296         bzero(&c, sizeof(c));
297         c.op_to_vfn = htobe32(V_FW_CMD_OP(FW_IQ_CMD) | F_FW_CMD_REQUEST |
298             F_FW_CMD_WRITE | F_FW_CMD_EXEC | V_FW_IQ_CMD_PFN(sc->pf) |
299             V_FW_IQ_CMD_VFN(0));
300         c.alloc_to_len16 = htobe32(F_FW_IQ_CMD_IQSTART | FW_LEN16(c));
301         if (nm_rxq->iq_cntxt_id == INVALID_NM_RXQ_CNTXT_ID)
302                 c.alloc_to_len16 |= htobe32(F_FW_IQ_CMD_ALLOC);
303         else {
304                 c.iqid = htobe16(nm_rxq->iq_cntxt_id);
305                 c.fl0id = htobe16(nm_rxq->fl_cntxt_id);
306                 c.fl1id = htobe16(0xffff);
307                 c.physiqid = htobe16(nm_rxq->iq_abs_id);
308         }
309         MPASS(!forwarding_intr_to_fwq(sc));
310         KASSERT(nm_rxq->intr_idx < sc->intr_count,
311             ("%s: invalid direct intr_idx %d", __func__, nm_rxq->intr_idx));
312         v = V_FW_IQ_CMD_IQANDSTINDEX(nm_rxq->intr_idx);
313         c.type_to_iqandstindex = htobe32(v |
314             V_FW_IQ_CMD_TYPE(FW_IQ_TYPE_FL_INT_CAP) |
315             V_FW_IQ_CMD_VIID(vi->viid) |
316             V_FW_IQ_CMD_IQANUD(X_UPDATEDELIVERY_INTERRUPT));
317         c.iqdroprss_to_iqesize = htobe16(V_FW_IQ_CMD_IQPCIECH(pi->tx_chan) |
318             F_FW_IQ_CMD_IQGTSMODE |
319             V_FW_IQ_CMD_IQINTCNTTHRESH(0) |
320             V_FW_IQ_CMD_IQESIZE(ilog2(IQ_ESIZE) - 4));
321         c.iqsize = htobe16(vi->qsize_rxq);
322         c.iqaddr = htobe64(nm_rxq->iq_ba);
323         if (cong_drop != -1) {
324                 c.iqns_to_fl0congen = htobe32(F_FW_IQ_CMD_IQFLINTCONGEN |
325                     V_FW_IQ_CMD_FL0CNGCHMAP(cong_map) | F_FW_IQ_CMD_FL0CONGCIF |
326                     F_FW_IQ_CMD_FL0CONGEN);
327         }
328         c.iqns_to_fl0congen |=
329             htobe32(V_FW_IQ_CMD_FL0HOSTFCMODE(X_HOSTFCMODE_NONE) |
330                 V_FW_IQ_CMD_IQTYPE(FW_IQ_IQTYPE_NIC) |
331                 F_FW_IQ_CMD_FL0FETCHRO | F_FW_IQ_CMD_FL0DATARO |
332                 (fl_pad ? F_FW_IQ_CMD_FL0PADEN : 0) |
333                 (black_hole == 2 ? F_FW_IQ_CMD_FL0PACKEN : 0));
334         c.fl0dcaen_to_fl0cidxfthresh =
335             htobe16(V_FW_IQ_CMD_FL0FBMIN(chip_id(sc) <= CHELSIO_T5 ?
336                 X_FETCHBURSTMIN_128B : X_FETCHBURSTMIN_64B_T6) |
337                 V_FW_IQ_CMD_FL0FBMAX(chip_id(sc) <= CHELSIO_T5 ?
338                 X_FETCHBURSTMAX_512B : X_FETCHBURSTMAX_256B));
339         c.fl0size = htobe16(na->num_rx_desc / 8 + sp->spg_len / EQ_ESIZE);
340         c.fl0addr = htobe64(nm_rxq->fl_ba);
341
342         rc = -t4_wr_mbox(sc, sc->mbox, &c, sizeof(c), &c);
343         if (rc != 0) {
344                 device_printf(sc->dev,
345                     "failed to create netmap ingress queue: %d\n", rc);
346                 return (rc);
347         }
348
349         nm_rxq->iq_cidx = 0;
350         MPASS(nm_rxq->iq_sidx == vi->qsize_rxq - sp->spg_len / IQ_ESIZE);
351         nm_rxq->iq_gen = F_RSPD_GEN;
352         nm_rxq->iq_cntxt_id = be16toh(c.iqid);
353         nm_rxq->iq_abs_id = be16toh(c.physiqid);
354         cntxt_id = nm_rxq->iq_cntxt_id - sc->sge.iq_start;
355         if (cntxt_id >= sc->sge.iqmap_sz) {
356                 panic ("%s: nm_rxq->iq_cntxt_id (%d) more than the max (%d)",
357                     __func__, cntxt_id, sc->sge.iqmap_sz - 1);
358         }
359         sc->sge.iqmap[cntxt_id] = (void *)nm_rxq;
360
361         nm_rxq->fl_cntxt_id = be16toh(c.fl0id);
362         nm_rxq->fl_pidx = nm_rxq->fl_cidx = 0;
363         nm_rxq->fl_db_saved = 0;
364         /* matches the X_FETCHBURSTMAX_512B or X_FETCHBURSTMAX_256B above. */
365         nm_rxq->fl_db_threshold = chip_id(sc) <= CHELSIO_T5 ? 8 : 4;
366         MPASS(nm_rxq->fl_sidx == na->num_rx_desc);
367         cntxt_id = nm_rxq->fl_cntxt_id - sc->sge.eq_start;
368         if (cntxt_id >= sc->sge.eqmap_sz) {
369                 panic("%s: nm_rxq->fl_cntxt_id (%d) more than the max (%d)",
370                     __func__, cntxt_id, sc->sge.eqmap_sz - 1);
371         }
372         sc->sge.eqmap[cntxt_id] = (void *)nm_rxq;
373
374         nm_rxq->fl_db_val = V_QID(nm_rxq->fl_cntxt_id) |
375             sc->chip_params->sge_fl_db;
376
377         if (chip_id(sc) >= CHELSIO_T5 && cong_drop != -1) {
378                 t4_sge_set_conm_context(sc, nm_rxq->iq_cntxt_id, cong_drop,
379                     cong_map);
380         }
381
382         t4_write_reg(sc, sc->sge_gts_reg,
383             V_INGRESSQID(nm_rxq->iq_cntxt_id) |
384             V_SEINTARM(V_QINTR_TIMER_IDX(holdoff_tmr_idx)));
385
386         return (rc);
387 }
388
389 static int
390 free_nm_rxq_hwq(struct vi_info *vi, struct sge_nm_rxq *nm_rxq)
391 {
392         struct adapter *sc = vi->adapter;
393         int rc;
394
395         rc = -t4_iq_free(sc, sc->mbox, sc->pf, 0, FW_IQ_TYPE_FL_INT_CAP,
396             nm_rxq->iq_cntxt_id, nm_rxq->fl_cntxt_id, 0xffff);
397         if (rc != 0)
398                 device_printf(sc->dev, "%s: failed for iq %d, fl %d: %d\n",
399                     __func__, nm_rxq->iq_cntxt_id, nm_rxq->fl_cntxt_id, rc);
400         nm_rxq->iq_cntxt_id = INVALID_NM_RXQ_CNTXT_ID;
401         return (rc);
402 }
403
404 static int
405 alloc_nm_txq_hwq(struct vi_info *vi, struct sge_nm_txq *nm_txq)
406 {
407         int rc, cntxt_id;
408         size_t len;
409         struct adapter *sc = vi->adapter;
410         struct netmap_adapter *na = NA(vi->ifp);
411         struct fw_eq_eth_cmd c;
412
413         MPASS(na != NULL);
414         MPASS(nm_txq->desc != NULL);
415
416         len = na->num_tx_desc * EQ_ESIZE + sc->params.sge.spg_len;
417         bzero(nm_txq->desc, len);
418
419         bzero(&c, sizeof(c));
420         c.op_to_vfn = htobe32(V_FW_CMD_OP(FW_EQ_ETH_CMD) | F_FW_CMD_REQUEST |
421             F_FW_CMD_WRITE | F_FW_CMD_EXEC | V_FW_EQ_ETH_CMD_PFN(sc->pf) |
422             V_FW_EQ_ETH_CMD_VFN(0));
423         c.alloc_to_len16 = htobe32(F_FW_EQ_ETH_CMD_EQSTART | FW_LEN16(c));
424         if (nm_txq->cntxt_id == INVALID_NM_TXQ_CNTXT_ID)
425                 c.alloc_to_len16 |= htobe32(F_FW_EQ_ETH_CMD_ALLOC);
426         else
427                 c.eqid_pkd = htobe32(V_FW_EQ_ETH_CMD_EQID(nm_txq->cntxt_id));
428         c.autoequiqe_to_viid = htobe32(F_FW_EQ_ETH_CMD_AUTOEQUIQE |
429             F_FW_EQ_ETH_CMD_AUTOEQUEQE | V_FW_EQ_ETH_CMD_VIID(vi->viid));
430         c.fetchszm_to_iqid =
431             htobe32(V_FW_EQ_ETH_CMD_HOSTFCMODE(X_HOSTFCMODE_NONE) |
432                 V_FW_EQ_ETH_CMD_PCIECHN(vi->pi->tx_chan) | F_FW_EQ_ETH_CMD_FETCHRO |
433                 V_FW_EQ_ETH_CMD_IQID(sc->sge.nm_rxq[nm_txq->iqidx].iq_cntxt_id));
434         c.dcaen_to_eqsize =
435             htobe32(V_FW_EQ_ETH_CMD_FBMIN(chip_id(sc) <= CHELSIO_T5 ?
436                 X_FETCHBURSTMIN_64B : X_FETCHBURSTMIN_64B_T6) |
437                 V_FW_EQ_ETH_CMD_FBMAX(X_FETCHBURSTMAX_512B) |
438                 V_FW_EQ_ETH_CMD_EQSIZE(len / EQ_ESIZE));
439         c.eqaddr = htobe64(nm_txq->ba);
440
441         rc = -t4_wr_mbox(sc, sc->mbox, &c, sizeof(c), &c);
442         if (rc != 0) {
443                 device_printf(vi->dev,
444                     "failed to create netmap egress queue: %d\n", rc);
445                 return (rc);
446         }
447
448         nm_txq->cntxt_id = G_FW_EQ_ETH_CMD_EQID(be32toh(c.eqid_pkd));
449         cntxt_id = nm_txq->cntxt_id - sc->sge.eq_start;
450         if (cntxt_id >= sc->sge.eqmap_sz)
451             panic("%s: nm_txq->cntxt_id (%d) more than the max (%d)", __func__,
452                 cntxt_id, sc->sge.eqmap_sz - 1);
453         sc->sge.eqmap[cntxt_id] = (void *)nm_txq;
454
455         nm_txq->pidx = nm_txq->cidx = 0;
456         MPASS(nm_txq->sidx == na->num_tx_desc);
457         nm_txq->equiqidx = nm_txq->equeqidx = nm_txq->dbidx = 0;
458
459         nm_txq->doorbells = sc->doorbells;
460         if (isset(&nm_txq->doorbells, DOORBELL_UDB) ||
461             isset(&nm_txq->doorbells, DOORBELL_UDBWC) ||
462             isset(&nm_txq->doorbells, DOORBELL_WCWR)) {
463                 uint32_t s_qpp = sc->params.sge.eq_s_qpp;
464                 uint32_t mask = (1 << s_qpp) - 1;
465                 volatile uint8_t *udb;
466
467                 udb = sc->udbs_base + UDBS_DB_OFFSET;
468                 udb += (nm_txq->cntxt_id >> s_qpp) << PAGE_SHIFT;
469                 nm_txq->udb_qid = nm_txq->cntxt_id & mask;
470                 if (nm_txq->udb_qid >= PAGE_SIZE / UDBS_SEG_SIZE)
471                         clrbit(&nm_txq->doorbells, DOORBELL_WCWR);
472                 else {
473                         udb += nm_txq->udb_qid << UDBS_SEG_SHIFT;
474                         nm_txq->udb_qid = 0;
475                 }
476                 nm_txq->udb = (volatile void *)udb;
477         }
478
479         if (sc->params.fw_vers < FW_VERSION32(1, 25, 1, 0)) {
480                 uint32_t param, val;
481
482                 param = V_FW_PARAMS_MNEM(FW_PARAMS_MNEM_DMAQ) |
483                     V_FW_PARAMS_PARAM_X(FW_PARAMS_PARAM_DMAQ_EQ_SCHEDCLASS_ETH) |
484                     V_FW_PARAMS_PARAM_YZ(nm_txq->cntxt_id);
485                 val = 0xff;
486                 rc = -t4_set_params(sc, sc->mbox, sc->pf, 0, 1, &param, &val);
487                 if (rc != 0) {
488                         device_printf(vi->dev,
489                             "failed to bind netmap txq %d to class 0xff: %d\n",
490                             nm_txq->cntxt_id, rc);
491                         rc = 0;
492                 }
493         }
494
495         return (rc);
496 }
497
498 static int
499 free_nm_txq_hwq(struct vi_info *vi, struct sge_nm_txq *nm_txq)
500 {
501         struct adapter *sc = vi->adapter;
502         int rc;
503
504         rc = -t4_eth_eq_free(sc, sc->mbox, sc->pf, 0, nm_txq->cntxt_id);
505         if (rc != 0)
506                 device_printf(sc->dev, "%s: failed for eq %d: %d\n", __func__,
507                     nm_txq->cntxt_id, rc);
508         nm_txq->cntxt_id = INVALID_NM_TXQ_CNTXT_ID;
509         return (rc);
510 }
511
512 static int
513 cxgbe_netmap_simple_rss(struct adapter *sc, struct vi_info *vi,
514     if_t ifp, struct netmap_adapter *na)
515 {
516         struct netmap_kring *kring;
517         struct sge_nm_rxq *nm_rxq;
518         int rc, i, j, nm_state, defq;
519         uint16_t *rss;
520
521         /*
522          * Check if there's at least one active (or about to go active) netmap
523          * rx queue.
524          */
525         defq = -1;
526         for_each_nm_rxq(vi, j, nm_rxq) {
527                 nm_state = atomic_load_int(&nm_rxq->nm_state);
528                 kring = na->rx_rings[nm_rxq->nid];
529                 if ((nm_state != NM_OFF && !nm_kring_pending_off(kring)) ||
530                     (nm_state == NM_OFF && nm_kring_pending_on(kring))) {
531                         MPASS(nm_rxq->iq_cntxt_id != INVALID_NM_RXQ_CNTXT_ID);
532                         if (defq == -1) {
533                                 defq = nm_rxq->iq_abs_id;
534                                 break;
535                         }
536                 }
537         }
538
539         if (defq == -1) {
540                 /* No active netmap queues.  Switch back to NIC queues. */
541                 rss = vi->rss;
542                 defq = vi->rss[0];
543         } else {
544                 for (i = 0; i < vi->rss_size;) {
545                         for_each_nm_rxq(vi, j, nm_rxq) {
546                                 nm_state = atomic_load_int(&nm_rxq->nm_state);
547                                 kring = na->rx_rings[nm_rxq->nid];
548                                 if ((nm_state != NM_OFF &&
549                                     !nm_kring_pending_off(kring)) ||
550                                     (nm_state == NM_OFF &&
551                                     nm_kring_pending_on(kring))) {
552                                         MPASS(nm_rxq->iq_cntxt_id !=
553                                             INVALID_NM_RXQ_CNTXT_ID);
554                                         vi->nm_rss[i++] = nm_rxq->iq_abs_id;
555                                         if (i == vi->rss_size)
556                                                 break;
557                                 }
558                         }
559                 }
560                 rss = vi->nm_rss;
561         }
562
563         rc = -t4_config_rss_range(sc, sc->mbox, vi->viid, 0, vi->rss_size, rss,
564             vi->rss_size);
565         if (rc != 0)
566                 if_printf(ifp, "netmap rss_config failed: %d\n", rc);
567
568         rc = -t4_config_vi_rss(sc, sc->mbox, vi->viid, vi->hashen, defq, 0, 0);
569         if (rc != 0) {
570                 if_printf(ifp, "netmap defaultq config failed: %d\n", rc);
571         }
572
573         return (rc);
574 }
575
576 /*
577  * Odd number of rx queues work best for split RSS mode as the first queue can
578  * be dedicated for non-RSS traffic and the rest divided into two equal halves.
579  */
580 static int
581 cxgbe_netmap_split_rss(struct adapter *sc, struct vi_info *vi,
582     if_t ifp, struct netmap_adapter *na)
583 {
584         struct netmap_kring *kring;
585         struct sge_nm_rxq *nm_rxq;
586         int rc, i, j, nm_state, defq;
587         int nactive[2] = {0, 0};
588         int dq[2] = {-1, -1};
589         bool dq_norss;          /* default queue should not be in RSS table. */
590
591         MPASS(nm_split_rss != 0);
592         MPASS(vi->nnmrxq > 1);
593
594         for_each_nm_rxq(vi, i, nm_rxq) {
595                 j = i / ((vi->nnmrxq + 1) / 2);
596                 nm_state = atomic_load_int(&nm_rxq->nm_state);
597                 kring = na->rx_rings[nm_rxq->nid];
598                 if ((nm_state != NM_OFF && !nm_kring_pending_off(kring)) ||
599                     (nm_state == NM_OFF && nm_kring_pending_on(kring))) {
600                         MPASS(nm_rxq->iq_cntxt_id != INVALID_NM_RXQ_CNTXT_ID);
601                         nactive[j]++;
602                         if (dq[j] == -1) {
603                                 dq[j] = nm_rxq->iq_abs_id;
604                                 break;
605                         }
606                 }
607         }
608
609         if (nactive[0] == 0 || nactive[1] == 0)
610                 return (cxgbe_netmap_simple_rss(sc, vi, ifp, na));
611
612         MPASS(dq[0] != -1 && dq[1] != -1);
613         if (nactive[0] > nactive[1]) {
614                 defq = dq[0];
615                 dq_norss = true;
616         } else if (nactive[0] < nactive[1]) {
617                 defq = dq[1];
618                 dq_norss = true;
619         } else {
620                 defq = dq[0];
621                 dq_norss = false;
622         }
623
624         i = 0;
625         nm_rxq = &sc->sge.nm_rxq[vi->first_nm_rxq];
626         while (i < vi->rss_size / 2) {
627                 for (j = 0; j < (vi->nnmrxq + 1) / 2; j++) {
628                         nm_state = atomic_load_int(&nm_rxq[j].nm_state);
629                         kring = na->rx_rings[nm_rxq[j].nid];
630                         if ((nm_state == NM_OFF &&
631                             !nm_kring_pending_on(kring)) ||
632                             (nm_state == NM_ON &&
633                             nm_kring_pending_off(kring))) {
634                                 continue;
635                         }
636                         MPASS(nm_rxq[j].iq_cntxt_id != INVALID_NM_RXQ_CNTXT_ID);
637                         if (dq_norss && defq == nm_rxq[j].iq_abs_id)
638                                 continue;
639                         vi->nm_rss[i++] = nm_rxq[j].iq_abs_id;
640                         if (i == vi->rss_size / 2)
641                                 break;
642                 }
643         }
644         while (i < vi->rss_size) {
645                 for (j = (vi->nnmrxq + 1) / 2; j < vi->nnmrxq; j++) {
646                         nm_state = atomic_load_int(&nm_rxq[j].nm_state);
647                         kring = na->rx_rings[nm_rxq[j].nid];
648                         if ((nm_state == NM_OFF &&
649                             !nm_kring_pending_on(kring)) ||
650                             (nm_state == NM_ON &&
651                             nm_kring_pending_off(kring))) {
652                                 continue;
653                         }
654                         MPASS(nm_rxq[j].iq_cntxt_id != INVALID_NM_RXQ_CNTXT_ID);
655                         if (dq_norss && defq == nm_rxq[j].iq_abs_id)
656                                 continue;
657                         vi->nm_rss[i++] = nm_rxq[j].iq_abs_id;
658                         if (i == vi->rss_size)
659                                 break;
660                 }
661         }
662
663         rc = -t4_config_rss_range(sc, sc->mbox, vi->viid, 0, vi->rss_size,
664             vi->nm_rss, vi->rss_size);
665         if (rc != 0)
666                 if_printf(ifp, "netmap split_rss_config failed: %d\n", rc);
667
668         rc = -t4_config_vi_rss(sc, sc->mbox, vi->viid, vi->hashen, defq, 0, 0);
669         if (rc != 0)
670                 if_printf(ifp, "netmap defaultq config failed: %d\n", rc);
671
672         return (rc);
673 }
674
675 static inline int
676 cxgbe_netmap_rss(struct adapter *sc, struct vi_info *vi, if_t ifp,
677     struct netmap_adapter *na)
678 {
679
680         if (nm_split_rss == 0 || vi->nnmrxq == 1)
681                 return (cxgbe_netmap_simple_rss(sc, vi, ifp, na));
682         else
683                 return (cxgbe_netmap_split_rss(sc, vi, ifp, na));
684 }
685
686 static int
687 cxgbe_netmap_on(struct adapter *sc, struct vi_info *vi, if_t ifp,
688     struct netmap_adapter *na)
689 {
690         struct netmap_slot *slot;
691         struct netmap_kring *kring;
692         struct sge_nm_rxq *nm_rxq;
693         struct sge_nm_txq *nm_txq;
694         int i, j, hwidx;
695         struct rx_buf_info *rxb;
696
697         ASSERT_SYNCHRONIZED_OP(sc);
698         MPASS(vi->nnmrxq > 0);
699         MPASS(vi->nnmtxq > 0);
700
701         if ((vi->flags & VI_INIT_DONE) == 0 ||
702             (if_getdrvflags(ifp) & IFF_DRV_RUNNING) == 0) {
703                 if_printf(ifp, "cannot enable netmap operation because "
704                     "interface is not UP.\n");
705                 return (EAGAIN);
706         }
707
708         rxb = &sc->sge.rx_buf_info[0];
709         for (i = 0; i < SW_ZONE_SIZES; i++, rxb++) {
710                 if (rxb->size1 == NETMAP_BUF_SIZE(na)) {
711                         hwidx = rxb->hwidx1;
712                         break;
713                 }
714                 if (rxb->size2 == NETMAP_BUF_SIZE(na)) {
715                         hwidx = rxb->hwidx2;
716                         break;
717                 }
718         }
719         if (i >= SW_ZONE_SIZES) {
720                 if_printf(ifp, "no hwidx for netmap buffer size %d.\n",
721                     NETMAP_BUF_SIZE(na));
722                 return (ENXIO);
723         }
724
725         /* Must set caps before calling netmap_reset */
726         nm_set_native_flags(na);
727
728         for_each_nm_rxq(vi, i, nm_rxq) {
729                 kring = na->rx_rings[nm_rxq->nid];
730                 if (!nm_kring_pending_on(kring))
731                         continue;
732
733                 alloc_nm_rxq_hwq(vi, nm_rxq);
734                 nm_rxq->fl_hwidx = hwidx;
735                 slot = netmap_reset(na, NR_RX, i, 0);
736                 MPASS(slot != NULL);    /* XXXNM: error check, not assert */
737
738                 /* We deal with 8 bufs at a time */
739                 MPASS((na->num_rx_desc & 7) == 0);
740                 MPASS(na->num_rx_desc == nm_rxq->fl_sidx);
741                 for (j = 0; j < nm_rxq->fl_sidx; j++) {
742                         uint64_t ba;
743
744                         PNMB(na, &slot[j], &ba);
745                         MPASS(ba != 0);
746                         nm_rxq->fl_desc[j] = htobe64(ba | hwidx);
747                 }
748                 j = nm_rxq->fl_pidx = nm_rxq->fl_sidx - 8;
749                 MPASS((j & 7) == 0);
750                 j /= 8; /* driver pidx to hardware pidx */
751                 wmb();
752                 t4_write_reg(sc, sc->sge_kdoorbell_reg,
753                     nm_rxq->fl_db_val | V_PIDX(j));
754
755                 (void) atomic_cmpset_int(&nm_rxq->nm_state, NM_OFF, NM_ON);
756         }
757
758         for_each_nm_txq(vi, i, nm_txq) {
759                 kring = na->tx_rings[nm_txq->nid];
760                 if (!nm_kring_pending_on(kring))
761                         continue;
762
763                 alloc_nm_txq_hwq(vi, nm_txq);
764                 slot = netmap_reset(na, NR_TX, i, 0);
765                 MPASS(slot != NULL);    /* XXXNM: error check, not assert */
766         }
767
768         if (vi->nm_rss == NULL) {
769                 vi->nm_rss = malloc(vi->rss_size * sizeof(uint16_t), M_CXGBE,
770                     M_ZERO | M_WAITOK);
771         }
772
773         return (cxgbe_netmap_rss(sc, vi, ifp, na));
774 }
775
776 static int
777 cxgbe_netmap_off(struct adapter *sc, struct vi_info *vi, if_t ifp,
778     struct netmap_adapter *na)
779 {
780         struct netmap_kring *kring;
781         int rc, i, nm_state, nactive;
782         struct sge_nm_txq *nm_txq;
783         struct sge_nm_rxq *nm_rxq;
784
785         ASSERT_SYNCHRONIZED_OP(sc);
786         MPASS(vi->nnmrxq > 0);
787         MPASS(vi->nnmtxq > 0);
788
789         if (!nm_netmap_on(na))
790                 return (0);
791
792         if ((vi->flags & VI_INIT_DONE) == 0)
793                 return (0);
794
795         /* First remove the queues that are stopping from the RSS table. */
796         rc = cxgbe_netmap_rss(sc, vi, ifp, na);
797         if (rc != 0)
798                 return (rc);    /* error message logged already. */
799
800         for_each_nm_txq(vi, i, nm_txq) {
801                 kring = na->tx_rings[nm_txq->nid];
802                 if (!nm_kring_pending_off(kring))
803                         continue;
804                 MPASS(nm_txq->cntxt_id != INVALID_NM_TXQ_CNTXT_ID);
805
806                 rc = -t4_eth_eq_stop(sc, sc->mbox, sc->pf, 0, nm_txq->cntxt_id);
807                 if (rc != 0) {
808                         device_printf(vi->dev,
809                             "failed to stop nm_txq[%d]: %d.\n", i, rc);
810                         return (rc);
811                 }
812
813                 /* XXX: netmap, not the driver, should do this. */
814                 kring->rhead = kring->rcur = kring->nr_hwcur = 0;
815                 kring->rtail = kring->nr_hwtail = kring->nkr_num_slots - 1;
816         }
817         nactive = 0;
818         for_each_nm_rxq(vi, i, nm_rxq) {
819                 nm_state = atomic_load_int(&nm_rxq->nm_state);
820                 kring = na->rx_rings[nm_rxq->nid];
821                 if (nm_state != NM_OFF && !nm_kring_pending_off(kring))
822                         nactive++;
823                 if (!nm_kring_pending_off(kring))
824                         continue;
825                 MPASS(nm_state != NM_OFF);
826                 MPASS(nm_rxq->iq_cntxt_id != INVALID_NM_RXQ_CNTXT_ID);
827
828                 rc = -t4_iq_stop(sc, sc->mbox, sc->pf, 0, FW_IQ_TYPE_FL_INT_CAP,
829                     nm_rxq->iq_cntxt_id, nm_rxq->fl_cntxt_id, 0xffff);
830                 if (rc != 0) {
831                         device_printf(vi->dev,
832                             "failed to stop nm_rxq[%d]: %d.\n", i, rc);
833                         return (rc);
834                 }
835
836                 while (!atomic_cmpset_int(&nm_rxq->nm_state, NM_ON, NM_OFF))
837                         pause("nmst", 1);
838
839                 /* XXX: netmap, not the driver, should do this. */
840                 kring->rhead = kring->rcur = kring->nr_hwcur = 0;
841                 kring->rtail = kring->nr_hwtail = 0;
842         }
843         netmap_krings_mode_commit(na, 0);
844         if (nactive == 0)
845                 nm_clear_native_flags(na);
846
847         return (rc);
848 }
849
850 static int
851 cxgbe_netmap_reg(struct netmap_adapter *na, int on)
852 {
853         if_t ifp = na->ifp;
854         struct vi_info *vi = if_getsoftc(ifp);
855         struct adapter *sc = vi->adapter;
856         int rc;
857
858         rc = begin_synchronized_op(sc, vi, SLEEP_OK | INTR_OK, "t4nmreg");
859         if (rc != 0)
860                 return (rc);
861         if (on)
862                 rc = cxgbe_netmap_on(sc, vi, ifp, na);
863         else
864                 rc = cxgbe_netmap_off(sc, vi, ifp, na);
865         end_synchronized_op(sc, 0);
866
867         return (rc);
868 }
869
870 /* How many packets can a single type1 WR carry in n descriptors */
871 static inline int
872 ndesc_to_npkt(const int n)
873 {
874
875         MPASS(n > 0 && n <= SGE_MAX_WR_NDESC);
876
877         return (n * 2 - 1);
878 }
879 #define MAX_NPKT_IN_TYPE1_WR    (ndesc_to_npkt(SGE_MAX_WR_NDESC))
880
881 /*
882  * Space (in descriptors) needed for a type1 WR (TX_PKTS or TX_PKTS2) that
883  * carries n packets
884  */
885 static inline int
886 npkt_to_ndesc(const int n)
887 {
888
889         MPASS(n > 0 && n <= MAX_NPKT_IN_TYPE1_WR);
890
891         return ((n + 2) / 2);
892 }
893
894 /*
895  * Space (in 16B units) needed for a type1 WR (TX_PKTS or TX_PKTS2) that
896  * carries n packets
897  */
898 static inline int
899 npkt_to_len16(const int n)
900 {
901
902         MPASS(n > 0 && n <= MAX_NPKT_IN_TYPE1_WR);
903
904         return (n * 2 + 1);
905 }
906
907 #define NMIDXDIFF(q, idx) IDXDIFF((q)->pidx, (q)->idx, (q)->sidx)
908
909 static void
910 ring_nm_txq_db(struct adapter *sc, struct sge_nm_txq *nm_txq)
911 {
912         int n;
913         u_int db = nm_txq->doorbells;
914
915         MPASS(nm_txq->pidx != nm_txq->dbidx);
916
917         n = NMIDXDIFF(nm_txq, dbidx);
918         if (n > 1)
919                 clrbit(&db, DOORBELL_WCWR);
920         wmb();
921
922         switch (ffs(db) - 1) {
923         case DOORBELL_UDB:
924                 *nm_txq->udb = htole32(V_QID(nm_txq->udb_qid) | V_PIDX(n));
925                 break;
926
927         case DOORBELL_WCWR: {
928                 volatile uint64_t *dst, *src;
929
930                 /*
931                  * Queues whose 128B doorbell segment fits in the page do not
932                  * use relative qid (udb_qid is always 0).  Only queues with
933                  * doorbell segments can do WCWR.
934                  */
935                 KASSERT(nm_txq->udb_qid == 0 && n == 1,
936                     ("%s: inappropriate doorbell (0x%x, %d, %d) for nm_txq %p",
937                     __func__, nm_txq->doorbells, n, nm_txq->pidx, nm_txq));
938
939                 dst = (volatile void *)((uintptr_t)nm_txq->udb +
940                     UDBS_WR_OFFSET - UDBS_DB_OFFSET);
941                 src = (void *)&nm_txq->desc[nm_txq->dbidx];
942                 while (src != (void *)&nm_txq->desc[nm_txq->dbidx + 1])
943                         *dst++ = *src++;
944                 wmb();
945                 break;
946         }
947
948         case DOORBELL_UDBWC:
949                 *nm_txq->udb = htole32(V_QID(nm_txq->udb_qid) | V_PIDX(n));
950                 wmb();
951                 break;
952
953         case DOORBELL_KDB:
954                 t4_write_reg(sc, sc->sge_kdoorbell_reg,
955                     V_QID(nm_txq->cntxt_id) | V_PIDX(n));
956                 break;
957         }
958         nm_txq->dbidx = nm_txq->pidx;
959 }
960
961 /*
962  * Write work requests to send 'npkt' frames and ring the doorbell to send them
963  * on their way.  No need to check for wraparound.
964  */
965 static void
966 cxgbe_nm_tx(struct adapter *sc, struct sge_nm_txq *nm_txq,
967     struct netmap_kring *kring, int npkt, int npkt_remaining)
968 {
969         struct netmap_ring *ring = kring->ring;
970         struct netmap_slot *slot;
971         const u_int lim = kring->nkr_num_slots - 1;
972         struct fw_eth_tx_pkts_wr *wr = (void *)&nm_txq->desc[nm_txq->pidx];
973         uint16_t len;
974         uint64_t ba;
975         struct cpl_tx_pkt_core *cpl;
976         struct ulptx_sgl *usgl;
977         int i, n;
978
979         while (npkt) {
980                 n = min(npkt, MAX_NPKT_IN_TYPE1_WR);
981                 len = 0;
982
983                 wr = (void *)&nm_txq->desc[nm_txq->pidx];
984                 wr->op_pkd = nm_txq->op_pkd;
985                 wr->equiq_to_len16 = htobe32(V_FW_WR_LEN16(npkt_to_len16(n)));
986                 wr->npkt = n;
987                 wr->r3 = 0;
988                 wr->type = 1;
989                 cpl = (void *)(wr + 1);
990
991                 for (i = 0; i < n; i++) {
992                         slot = &ring->slot[kring->nr_hwcur];
993                         PNMB(kring->na, slot, &ba);
994                         MPASS(ba != 0);
995
996                         cpl->ctrl0 = nm_txq->cpl_ctrl0;
997                         cpl->pack = 0;
998                         cpl->len = htobe16(slot->len);
999                         cpl->ctrl1 = nm_txcsum ? 0 :
1000                             htobe64(F_TXPKT_IPCSUM_DIS | F_TXPKT_L4CSUM_DIS);
1001
1002                         usgl = (void *)(cpl + 1);
1003                         usgl->cmd_nsge = htobe32(V_ULPTX_CMD(ULP_TX_SC_DSGL) |
1004                             V_ULPTX_NSGE(1));
1005                         usgl->len0 = htobe32(slot->len);
1006                         usgl->addr0 = htobe64(ba);
1007
1008                         slot->flags &= ~(NS_REPORT | NS_BUF_CHANGED);
1009                         cpl = (void *)(usgl + 1);
1010                         MPASS(slot->len + len <= UINT16_MAX);
1011                         len += slot->len;
1012                         kring->nr_hwcur = nm_next(kring->nr_hwcur, lim);
1013                 }
1014                 wr->plen = htobe16(len);
1015
1016                 npkt -= n;
1017                 nm_txq->pidx += npkt_to_ndesc(n);
1018                 MPASS(nm_txq->pidx <= nm_txq->sidx);
1019                 if (__predict_false(nm_txq->pidx == nm_txq->sidx)) {
1020                         /*
1021                          * This routine doesn't know how to write WRs that wrap
1022                          * around.  Make sure it wasn't asked to.
1023                          */
1024                         MPASS(npkt == 0);
1025                         nm_txq->pidx = 0;
1026                 }
1027
1028                 if (npkt == 0 && npkt_remaining == 0) {
1029                         /* All done. */
1030                         if (lazy_tx_credit_flush == 0) {
1031                                 wr->equiq_to_len16 |= htobe32(F_FW_WR_EQUEQ |
1032                                     F_FW_WR_EQUIQ);
1033                                 nm_txq->equeqidx = nm_txq->pidx;
1034                                 nm_txq->equiqidx = nm_txq->pidx;
1035                         }
1036                         ring_nm_txq_db(sc, nm_txq);
1037                         return;
1038                 }
1039
1040                 if (NMIDXDIFF(nm_txq, equiqidx) >= nm_txq->sidx / 2) {
1041                         wr->equiq_to_len16 |= htobe32(F_FW_WR_EQUEQ |
1042                             F_FW_WR_EQUIQ);
1043                         nm_txq->equeqidx = nm_txq->pidx;
1044                         nm_txq->equiqidx = nm_txq->pidx;
1045                 } else if (NMIDXDIFF(nm_txq, equeqidx) >= 64) {
1046                         wr->equiq_to_len16 |= htobe32(F_FW_WR_EQUEQ);
1047                         nm_txq->equeqidx = nm_txq->pidx;
1048                 }
1049                 if (NMIDXDIFF(nm_txq, dbidx) >= 2 * SGE_MAX_WR_NDESC)
1050                         ring_nm_txq_db(sc, nm_txq);
1051         }
1052
1053         /* Will get called again. */
1054         MPASS(npkt_remaining);
1055 }
1056
1057 /* How many contiguous free descriptors starting at pidx */
1058 static inline int
1059 contiguous_ndesc_available(struct sge_nm_txq *nm_txq)
1060 {
1061
1062         if (nm_txq->cidx > nm_txq->pidx)
1063                 return (nm_txq->cidx - nm_txq->pidx - 1);
1064         else if (nm_txq->cidx > 0)
1065                 return (nm_txq->sidx - nm_txq->pidx);
1066         else
1067                 return (nm_txq->sidx - nm_txq->pidx - 1);
1068 }
1069
1070 static int
1071 reclaim_nm_tx_desc(struct sge_nm_txq *nm_txq)
1072 {
1073         struct sge_qstat *spg = (void *)&nm_txq->desc[nm_txq->sidx];
1074         uint16_t hw_cidx = spg->cidx;   /* snapshot */
1075         struct fw_eth_tx_pkts_wr *wr;
1076         int n = 0;
1077
1078         hw_cidx = be16toh(hw_cidx);
1079
1080         while (nm_txq->cidx != hw_cidx) {
1081                 wr = (void *)&nm_txq->desc[nm_txq->cidx];
1082
1083                 MPASS(wr->op_pkd == htobe32(V_FW_WR_OP(FW_ETH_TX_PKTS_WR)) ||
1084                     wr->op_pkd == htobe32(V_FW_WR_OP(FW_ETH_TX_PKTS2_WR)));
1085                 MPASS(wr->type == 1);
1086                 MPASS(wr->npkt > 0 && wr->npkt <= MAX_NPKT_IN_TYPE1_WR);
1087
1088                 n += wr->npkt;
1089                 nm_txq->cidx += npkt_to_ndesc(wr->npkt);
1090
1091                 /*
1092                  * We never sent a WR that wrapped around so the credits coming
1093                  * back, WR by WR, should never cause the cidx to wrap around
1094                  * either.
1095                  */
1096                 MPASS(nm_txq->cidx <= nm_txq->sidx);
1097                 if (__predict_false(nm_txq->cidx == nm_txq->sidx))
1098                         nm_txq->cidx = 0;
1099         }
1100
1101         return (n);
1102 }
1103
1104 static int
1105 cxgbe_netmap_txsync(struct netmap_kring *kring, int flags)
1106 {
1107         struct netmap_adapter *na = kring->na;
1108         if_t ifp = na->ifp;
1109         struct vi_info *vi = if_getsoftc(ifp);
1110         struct adapter *sc = vi->adapter;
1111         struct sge_nm_txq *nm_txq = &sc->sge.nm_txq[vi->first_nm_txq + kring->ring_id];
1112         const u_int head = kring->rhead;
1113         u_int reclaimed = 0;
1114         int n, d, npkt_remaining, ndesc_remaining;
1115
1116         /*
1117          * Tx was at kring->nr_hwcur last time around and now we need to advance
1118          * to kring->rhead.  Note that the driver's pidx moves independent of
1119          * netmap's kring->nr_hwcur (pidx counts descriptors and the relation
1120          * between descriptors and frames isn't 1:1).
1121          */
1122
1123         npkt_remaining = head >= kring->nr_hwcur ? head - kring->nr_hwcur :
1124             kring->nkr_num_slots - kring->nr_hwcur + head;
1125         while (npkt_remaining) {
1126                 reclaimed += reclaim_nm_tx_desc(nm_txq);
1127                 ndesc_remaining = contiguous_ndesc_available(nm_txq);
1128                 /* Can't run out of descriptors with packets still remaining */
1129                 MPASS(ndesc_remaining > 0);
1130
1131                 /* # of desc needed to tx all remaining packets */
1132                 d = (npkt_remaining / MAX_NPKT_IN_TYPE1_WR) * SGE_MAX_WR_NDESC;
1133                 if (npkt_remaining % MAX_NPKT_IN_TYPE1_WR)
1134                         d += npkt_to_ndesc(npkt_remaining % MAX_NPKT_IN_TYPE1_WR);
1135
1136                 if (d <= ndesc_remaining)
1137                         n = npkt_remaining;
1138                 else {
1139                         /* Can't send all, calculate how many can be sent */
1140                         n = (ndesc_remaining / SGE_MAX_WR_NDESC) *
1141                             MAX_NPKT_IN_TYPE1_WR;
1142                         if (ndesc_remaining % SGE_MAX_WR_NDESC)
1143                                 n += ndesc_to_npkt(ndesc_remaining % SGE_MAX_WR_NDESC);
1144                 }
1145
1146                 /* Send n packets and update nm_txq->pidx and kring->nr_hwcur */
1147                 npkt_remaining -= n;
1148                 cxgbe_nm_tx(sc, nm_txq, kring, n, npkt_remaining);
1149         }
1150         MPASS(npkt_remaining == 0);
1151         MPASS(kring->nr_hwcur == head);
1152         MPASS(nm_txq->dbidx == nm_txq->pidx);
1153
1154         /*
1155          * Second part: reclaim buffers for completed transmissions.
1156          */
1157         if (reclaimed || flags & NAF_FORCE_RECLAIM || nm_kr_txempty(kring)) {
1158                 reclaimed += reclaim_nm_tx_desc(nm_txq);
1159                 kring->nr_hwtail += reclaimed;
1160                 if (kring->nr_hwtail >= kring->nkr_num_slots)
1161                         kring->nr_hwtail -= kring->nkr_num_slots;
1162         }
1163
1164         return (0);
1165 }
1166
1167 static int
1168 cxgbe_netmap_rxsync(struct netmap_kring *kring, int flags)
1169 {
1170         struct netmap_adapter *na = kring->na;
1171         struct netmap_ring *ring = kring->ring;
1172         if_t ifp = na->ifp;
1173         struct vi_info *vi = if_getsoftc(ifp);
1174         struct adapter *sc = vi->adapter;
1175         struct sge_nm_rxq *nm_rxq = &sc->sge.nm_rxq[vi->first_nm_rxq + kring->ring_id];
1176         u_int const head = kring->rhead;
1177         u_int n;
1178         int force_update = (flags & NAF_FORCE_READ) || kring->nr_kflags & NKR_PENDINTR;
1179
1180         if (black_hole)
1181                 return (0);     /* No updates ever. */
1182
1183         if (netmap_no_pendintr || force_update) {
1184                 kring->nr_hwtail = atomic_load_acq_32(&nm_rxq->fl_cidx);
1185                 kring->nr_kflags &= ~NKR_PENDINTR;
1186         }
1187
1188         if (nm_rxq->fl_db_saved > 0 && starve_fl == 0) {
1189                 wmb();
1190                 t4_write_reg(sc, sc->sge_kdoorbell_reg,
1191                     nm_rxq->fl_db_val | V_PIDX(nm_rxq->fl_db_saved));
1192                 nm_rxq->fl_db_saved = 0;
1193         }
1194
1195         /* Userspace done with buffers from kring->nr_hwcur to head */
1196         n = head >= kring->nr_hwcur ? head - kring->nr_hwcur :
1197             kring->nkr_num_slots - kring->nr_hwcur + head;
1198         n &= ~7U;
1199         if (n > 0) {
1200                 u_int fl_pidx = nm_rxq->fl_pidx;
1201                 struct netmap_slot *slot = &ring->slot[fl_pidx];
1202                 uint64_t ba;
1203                 int i, dbinc = 0, hwidx = nm_rxq->fl_hwidx;
1204
1205                 /*
1206                  * We always deal with 8 buffers at a time.  We must have
1207                  * stopped at an 8B boundary (fl_pidx) last time around and we
1208                  * must have a multiple of 8B buffers to give to the freelist.
1209                  */
1210                 MPASS((fl_pidx & 7) == 0);
1211                 MPASS((n & 7) == 0);
1212
1213                 IDXINCR(kring->nr_hwcur, n, kring->nkr_num_slots);
1214                 IDXINCR(nm_rxq->fl_pidx, n, nm_rxq->fl_sidx2);
1215
1216                 while (n > 0) {
1217                         for (i = 0; i < 8; i++, fl_pidx++, slot++) {
1218                                 PNMB(na, slot, &ba);
1219                                 MPASS(ba != 0);
1220                                 nm_rxq->fl_desc[fl_pidx] = htobe64(ba | hwidx);
1221                                 slot->flags &= ~NS_BUF_CHANGED;
1222                                 MPASS(fl_pidx <= nm_rxq->fl_sidx2);
1223                         }
1224                         n -= 8;
1225                         if (fl_pidx == nm_rxq->fl_sidx2) {
1226                                 fl_pidx = 0;
1227                                 slot = &ring->slot[0];
1228                         }
1229                         if (++dbinc == nm_rxq->fl_db_threshold) {
1230                                 wmb();
1231                                 if (starve_fl)
1232                                         nm_rxq->fl_db_saved += dbinc;
1233                                 else {
1234                                         t4_write_reg(sc, sc->sge_kdoorbell_reg,
1235                                             nm_rxq->fl_db_val | V_PIDX(dbinc));
1236                                 }
1237                                 dbinc = 0;
1238                         }
1239                 }
1240                 MPASS(nm_rxq->fl_pidx == fl_pidx);
1241
1242                 if (dbinc > 0) {
1243                         wmb();
1244                         if (starve_fl)
1245                                 nm_rxq->fl_db_saved += dbinc;
1246                         else {
1247                                 t4_write_reg(sc, sc->sge_kdoorbell_reg,
1248                                     nm_rxq->fl_db_val | V_PIDX(dbinc));
1249                         }
1250                 }
1251         }
1252
1253         return (0);
1254 }
1255
1256 void
1257 cxgbe_nm_attach(struct vi_info *vi)
1258 {
1259         struct port_info *pi;
1260         struct adapter *sc;
1261         struct netmap_adapter na;
1262
1263         MPASS(vi->nnmrxq > 0);
1264         MPASS(vi->ifp != NULL);
1265
1266         pi = vi->pi;
1267         sc = pi->adapter;
1268
1269         bzero(&na, sizeof(na));
1270
1271         na.ifp = vi->ifp;
1272         na.na_flags = NAF_BDG_MAYSLEEP;
1273
1274         /* Netmap doesn't know about the space reserved for the status page. */
1275         na.num_tx_desc = vi->qsize_txq - sc->params.sge.spg_len / EQ_ESIZE;
1276
1277         /*
1278          * The freelist's cidx/pidx drives netmap's rx cidx/pidx.  So
1279          * num_rx_desc is based on the number of buffers that can be held in the
1280          * freelist, and not the number of entries in the iq.  (These two are
1281          * not exactly the same due to the space taken up by the status page).
1282          */
1283         na.num_rx_desc = rounddown(vi->qsize_rxq, 8);
1284         na.nm_txsync = cxgbe_netmap_txsync;
1285         na.nm_rxsync = cxgbe_netmap_rxsync;
1286         na.nm_register = cxgbe_netmap_reg;
1287         na.num_tx_rings = vi->nnmtxq;
1288         na.num_rx_rings = vi->nnmrxq;
1289         na.rx_buf_maxsize = MAX_MTU;
1290         netmap_attach(&na);     /* This adds IFCAP_NETMAP to if_capabilities */
1291 }
1292
1293 void
1294 cxgbe_nm_detach(struct vi_info *vi)
1295 {
1296
1297         MPASS(vi->nnmrxq > 0);
1298         MPASS(vi->ifp != NULL);
1299
1300         netmap_detach(vi->ifp);
1301 }
1302
1303 static inline const void *
1304 unwrap_nm_fw6_msg(const struct cpl_fw6_msg *cpl)
1305 {
1306
1307         MPASS(cpl->type == FW_TYPE_RSSCPL || cpl->type == FW6_TYPE_RSSCPL);
1308
1309         /* data[0] is RSS header */
1310         return (&cpl->data[1]);
1311 }
1312
1313 static void
1314 handle_nm_sge_egr_update(struct adapter *sc, if_t ifp,
1315     const struct cpl_sge_egr_update *egr)
1316 {
1317         uint32_t oq;
1318         struct sge_nm_txq *nm_txq;
1319
1320         oq = be32toh(egr->opcode_qid);
1321         MPASS(G_CPL_OPCODE(oq) == CPL_SGE_EGR_UPDATE);
1322         nm_txq = (void *)sc->sge.eqmap[G_EGR_QID(oq) - sc->sge.eq_start];
1323
1324         netmap_tx_irq(ifp, nm_txq->nid);
1325 }
1326
1327 void
1328 service_nm_rxq(struct sge_nm_rxq *nm_rxq)
1329 {
1330         struct vi_info *vi = nm_rxq->vi;
1331         struct adapter *sc = vi->adapter;
1332         if_t ifp = vi->ifp;
1333         struct netmap_adapter *na = NA(ifp);
1334         struct netmap_kring *kring = na->rx_rings[nm_rxq->nid];
1335         struct netmap_ring *ring = kring->ring;
1336         struct iq_desc *d = &nm_rxq->iq_desc[nm_rxq->iq_cidx];
1337         const void *cpl;
1338         uint32_t lq;
1339         u_int work = 0;
1340         uint8_t opcode;
1341         uint32_t fl_cidx = atomic_load_acq_32(&nm_rxq->fl_cidx);
1342         u_int fl_credits = fl_cidx & 7;
1343         u_int ndesc = 0;        /* desc processed since last cidx update */
1344         u_int nframes = 0;      /* frames processed since last netmap wakeup */
1345
1346         while ((d->rsp.u.type_gen & F_RSPD_GEN) == nm_rxq->iq_gen) {
1347
1348                 rmb();
1349
1350                 lq = be32toh(d->rsp.pldbuflen_qid);
1351                 opcode = d->rss.opcode;
1352                 cpl = &d->cpl[0];
1353
1354                 switch (G_RSPD_TYPE(d->rsp.u.type_gen)) {
1355                 case X_RSPD_TYPE_FLBUF:
1356
1357                         /* fall through */
1358
1359                 case X_RSPD_TYPE_CPL:
1360                         MPASS(opcode < NUM_CPL_CMDS);
1361
1362                         switch (opcode) {
1363                         case CPL_FW4_MSG:
1364                         case CPL_FW6_MSG:
1365                                 cpl = unwrap_nm_fw6_msg(cpl);
1366                                 /* fall through */
1367                         case CPL_SGE_EGR_UPDATE:
1368                                 handle_nm_sge_egr_update(sc, ifp, cpl);
1369                                 break;
1370                         case CPL_RX_PKT:
1371                                 ring->slot[fl_cidx].len = G_RSPD_LEN(lq) -
1372                                     sc->params.sge.fl_pktshift;
1373                                 ring->slot[fl_cidx].flags = 0;
1374                                 nframes++;
1375                                 if (!(lq & F_RSPD_NEWBUF)) {
1376                                         MPASS(black_hole == 2);
1377                                         break;
1378                                 }
1379                                 fl_credits++;
1380                                 if (__predict_false(++fl_cidx == nm_rxq->fl_sidx))
1381                                         fl_cidx = 0;
1382                                 break;
1383                         default:
1384                                 panic("%s: unexpected opcode 0x%x on nm_rxq %p",
1385                                     __func__, opcode, nm_rxq);
1386                         }
1387                         break;
1388
1389                 case X_RSPD_TYPE_INTR:
1390                         /* Not equipped to handle forwarded interrupts. */
1391                         panic("%s: netmap queue received interrupt for iq %u\n",
1392                             __func__, lq);
1393
1394                 default:
1395                         panic("%s: illegal response type %d on nm_rxq %p",
1396                             __func__, G_RSPD_TYPE(d->rsp.u.type_gen), nm_rxq);
1397                 }
1398
1399                 d++;
1400                 if (__predict_false(++nm_rxq->iq_cidx == nm_rxq->iq_sidx)) {
1401                         nm_rxq->iq_cidx = 0;
1402                         d = &nm_rxq->iq_desc[0];
1403                         nm_rxq->iq_gen ^= F_RSPD_GEN;
1404                 }
1405
1406                 if (__predict_false(++nframes == rx_nframes) && !black_hole) {
1407                         atomic_store_rel_32(&nm_rxq->fl_cidx, fl_cidx);
1408                         netmap_rx_irq(ifp, nm_rxq->nid, &work);
1409                         nframes = 0;
1410                 }
1411
1412                 if (__predict_false(++ndesc == rx_ndesc)) {
1413                         if (black_hole && fl_credits >= 8) {
1414                                 fl_credits /= 8;
1415                                 IDXINCR(nm_rxq->fl_pidx, fl_credits * 8,
1416                                     nm_rxq->fl_sidx);
1417                                 t4_write_reg(sc, sc->sge_kdoorbell_reg,
1418                                     nm_rxq->fl_db_val | V_PIDX(fl_credits));
1419                                 fl_credits = fl_cidx & 7;
1420                         }
1421                         t4_write_reg(sc, sc->sge_gts_reg,
1422                             V_CIDXINC(ndesc) |
1423                             V_INGRESSQID(nm_rxq->iq_cntxt_id) |
1424                             V_SEINTARM(V_QINTR_TIMER_IDX(X_TIMERREG_UPDATE_CIDX)));
1425                         ndesc = 0;
1426                 }
1427         }
1428
1429         atomic_store_rel_32(&nm_rxq->fl_cidx, fl_cidx);
1430         if (black_hole) {
1431                 fl_credits /= 8;
1432                 IDXINCR(nm_rxq->fl_pidx, fl_credits * 8, nm_rxq->fl_sidx);
1433                 t4_write_reg(sc, sc->sge_kdoorbell_reg,
1434                     nm_rxq->fl_db_val | V_PIDX(fl_credits));
1435         } else if (nframes > 0)
1436                 netmap_rx_irq(ifp, nm_rxq->nid, &work);
1437
1438         t4_write_reg(sc, sc->sge_gts_reg, V_CIDXINC(ndesc) |
1439             V_INGRESSQID((u32)nm_rxq->iq_cntxt_id) |
1440             V_SEINTARM(V_QINTR_TIMER_IDX(holdoff_tmr_idx)));
1441 }
1442 #endif