2 * Copyright (c) 2021-2022 NVIDIA corporation & affiliates.
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions
7 * 1. Redistributions of source code must retain the above copyright
8 * notice, this list of conditions and the following disclaimer.
9 * 2. Redistributions in binary form must reproduce the above copyright
10 * notice, this list of conditions and the following disclaimer in the
11 * documentation and/or other materials provided with the distribution.
13 * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS `AS IS' AND
14 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
15 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
16 * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
17 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
18 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
19 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
20 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
21 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
22 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27 * The internal queue, IQ, code is more or less a stripped down copy
28 * of the existing SQ managing code with exception of:
30 * - an optional single segment memory buffer which can be read or
31 * written as a whole by the hardware, may be provided.
33 * - an optional completion callback for all transmit operations, may
36 * - does not support mbufs.
39 #include <dev/mlx5/mlx5_en/en.h>
42 mlx5e_iq_poll(struct mlx5e_iq *iq, int budget)
44 const struct mlx5_cqe64 *cqe;
49 * iq->cc must be updated only after mlx5_cqwq_update_db_record(),
50 * otherwise a cq overrun may occur
54 while (budget-- > 0) {
56 cqe = mlx5e_get_cqe(&iq->cq);
60 mlx5_cqwq_pop(&iq->cq.wq);
62 ci = iqcc & iq->wq.sz_m1;
64 if (likely(iq->data[ci].dma_sync != 0)) {
65 /* make sure data written by hardware is visible to CPU */
66 bus_dmamap_sync(iq->dma_tag, iq->data[ci].dma_map, iq->data[ci].dma_sync);
67 bus_dmamap_unload(iq->dma_tag, iq->data[ci].dma_map);
69 iq->data[ci].dma_sync = 0;
72 if (likely(iq->data[ci].callback != NULL)) {
73 iq->data[ci].callback(iq->data[ci].arg);
74 iq->data[ci].callback = NULL;
77 if (unlikely(iq->data[ci].p_refcount != NULL)) {
78 atomic_add_int(iq->data[ci].p_refcount, -1);
79 iq->data[ci].p_refcount = NULL;
81 iqcc += iq->data[ci].num_wqebbs;
84 mlx5_cqwq_update_db_record(&iq->cq.wq);
86 /* Ensure cq space is freed before enabling more cqes */
87 atomic_thread_fence_rel();
93 mlx5e_iq_completion(struct mlx5_core_cq *mcq, struct mlx5_eqe *eqe __unused)
95 struct mlx5e_iq *iq = container_of(mcq, struct mlx5e_iq, cq.mcq);
97 mtx_lock(&iq->comp_lock);
98 mlx5e_iq_poll(iq, MLX5E_BUDGET_MAX);
99 mlx5e_cq_arm(&iq->cq, MLX5_GET_DOORBELL_LOCK(&iq->priv->doorbell_lock));
100 mtx_unlock(&iq->comp_lock);
104 mlx5e_iq_send_nop(struct mlx5e_iq *iq, u32 ds_cnt)
106 u16 pi = iq->pc & iq->wq.sz_m1;
107 struct mlx5e_tx_wqe *wqe = mlx5_wq_cyc_get_wqe(&iq->wq, pi);
109 mtx_assert(&iq->lock, MA_OWNED);
111 memset(&wqe->ctrl, 0, sizeof(wqe->ctrl));
113 wqe->ctrl.opmod_idx_opcode = cpu_to_be32((iq->pc << 8) | MLX5_OPCODE_NOP);
114 wqe->ctrl.qpn_ds = cpu_to_be32((iq->sqn << 8) | ds_cnt);
115 wqe->ctrl.fm_ce_se = MLX5_WQE_CTRL_CQ_UPDATE;
117 /* Copy data for doorbell */
118 memcpy(iq->doorbell.d32, &wqe->ctrl, sizeof(iq->doorbell.d32));
120 iq->data[pi].callback = NULL;
121 iq->data[pi].arg = NULL;
122 iq->data[pi].num_wqebbs = DIV_ROUND_UP(ds_cnt, MLX5_SEND_WQEBB_NUM_DS);
123 iq->data[pi].dma_sync = 0;
124 iq->pc += iq->data[pi].num_wqebbs;
128 mlx5e_iq_free_db(struct mlx5e_iq *iq)
130 int wq_sz = mlx5_wq_cyc_get_size(&iq->wq);
133 for (x = 0; x != wq_sz; x++) {
134 if (likely(iq->data[x].dma_sync != 0)) {
135 bus_dmamap_unload(iq->dma_tag, iq->data[x].dma_map);
136 iq->data[x].dma_sync = 0;
138 if (likely(iq->data[x].callback != NULL)) {
139 iq->data[x].callback(iq->data[x].arg);
140 iq->data[x].callback = NULL;
142 if (unlikely(iq->data[x].p_refcount != NULL)) {
143 atomic_add_int(iq->data[x].p_refcount, -1);
144 iq->data[x].p_refcount = NULL;
146 bus_dmamap_destroy(iq->dma_tag, iq->data[x].dma_map);
148 free(iq->data, M_MLX5EN);
152 mlx5e_iq_alloc_db(struct mlx5e_iq *iq)
154 int wq_sz = mlx5_wq_cyc_get_size(&iq->wq);
158 iq->data = malloc_domainset(wq_sz * sizeof(iq->data[0]), M_MLX5EN,
159 mlx5_dev_domainset(iq->priv->mdev), M_WAITOK | M_ZERO);
161 /* Create DMA descriptor maps */
162 for (x = 0; x != wq_sz; x++) {
163 err = -bus_dmamap_create(iq->dma_tag, 0, &iq->data[x].dma_map);
166 bus_dmamap_destroy(iq->dma_tag, iq->data[x].dma_map);
167 free(iq->data, M_MLX5EN);
175 mlx5e_iq_create(struct mlx5e_channel *c,
176 struct mlx5e_sq_param *param,
179 struct mlx5e_priv *priv = c->priv;
180 struct mlx5_core_dev *mdev = priv->mdev;
181 void *sqc = param->sqc;
182 void *sqc_wq = MLX5_ADDR_OF(sqc, sqc, wq);
185 /* Create DMA descriptor TAG */
186 if ((err = -bus_dma_tag_create(
187 bus_get_dma_tag(mdev->pdev->dev.bsddev),
188 1, /* any alignment */
190 BUS_SPACE_MAXADDR, /* lowaddr */
191 BUS_SPACE_MAXADDR, /* highaddr */
192 NULL, NULL, /* filter, filterarg */
193 PAGE_SIZE, /* maxsize */
195 PAGE_SIZE, /* maxsegsize */
197 NULL, NULL, /* lockfunc, lockfuncarg */
201 iq->mkey_be = cpu_to_be32(priv->mr.key);
204 err = mlx5_wq_cyc_create(mdev, ¶m->wq, sqc_wq,
205 &iq->wq, &iq->wq_ctrl);
207 goto err_free_dma_tag;
209 iq->wq.db = &iq->wq.db[MLX5_SND_DBR];
211 err = mlx5e_iq_alloc_db(iq);
213 goto err_iq_wq_destroy;
218 mlx5_wq_destroy(&iq->wq_ctrl);
221 bus_dma_tag_destroy(iq->dma_tag);
227 mlx5e_iq_destroy(struct mlx5e_iq *iq)
229 mlx5e_iq_free_db(iq);
230 mlx5_wq_destroy(&iq->wq_ctrl);
231 bus_dma_tag_destroy(iq->dma_tag);
235 mlx5e_iq_enable(struct mlx5e_iq *iq, struct mlx5e_sq_param *param,
236 const struct mlx5_sq_bfreg *bfreg, int tis_num)
245 inlen = MLX5_ST_SZ_BYTES(create_sq_in) +
246 sizeof(u64) * iq->wq_ctrl.buf.npages;
247 in = mlx5_vzalloc(inlen);
251 iq->uar_map = bfreg->map;
253 ts_format = mlx5_get_sq_default_ts(iq->priv->mdev);
254 sqc = MLX5_ADDR_OF(create_sq_in, in, ctx);
255 wq = MLX5_ADDR_OF(sqc, sqc, wq);
257 memcpy(sqc, param->sqc, sizeof(param->sqc));
259 MLX5_SET(sqc, sqc, tis_num_0, tis_num);
260 MLX5_SET(sqc, sqc, cqn, iq->cq.mcq.cqn);
261 MLX5_SET(sqc, sqc, state, MLX5_SQC_STATE_RST);
262 MLX5_SET(sqc, sqc, ts_format, ts_format);
263 MLX5_SET(sqc, sqc, tis_lst_sz, 1);
264 MLX5_SET(sqc, sqc, flush_in_error_en, 1);
265 MLX5_SET(sqc, sqc, allow_swp, 1);
267 /* SQ remap support requires reg_umr privileges level */
268 if (MLX5_CAP_QOS(iq->priv->mdev, qos_remap_pp)) {
269 MLX5_SET(sqc, sqc, qos_remap_en, 1);
270 if (MLX5_CAP_ETH(iq->priv->mdev, reg_umr_sq))
271 MLX5_SET(sqc, sqc, reg_umr, 1);
273 mlx5_en_err(iq->priv->ifp,
274 "No reg umr SQ capability, SQ remap disabled\n");
277 MLX5_SET(wq, wq, wq_type, MLX5_WQ_TYPE_CYCLIC);
278 MLX5_SET(wq, wq, uar_page, bfreg->index);
279 MLX5_SET(wq, wq, log_wq_pg_sz, iq->wq_ctrl.buf.page_shift -
280 MLX5_ADAPTER_PAGE_SHIFT);
281 MLX5_SET64(wq, wq, dbr_addr, iq->wq_ctrl.db.dma);
283 mlx5_fill_page_array(&iq->wq_ctrl.buf,
284 (__be64 *) MLX5_ADDR_OF(wq, wq, pas));
286 err = mlx5_core_create_sq(iq->priv->mdev, in, inlen, &iq->sqn);
294 mlx5e_iq_modify(struct mlx5e_iq *iq, int curr_state, int next_state)
301 inlen = MLX5_ST_SZ_BYTES(modify_sq_in);
302 in = mlx5_vzalloc(inlen);
306 sqc = MLX5_ADDR_OF(modify_sq_in, in, ctx);
308 MLX5_SET(modify_sq_in, in, sqn, iq->sqn);
309 MLX5_SET(modify_sq_in, in, sq_state, curr_state);
310 MLX5_SET(sqc, sqc, state, next_state);
312 err = mlx5_core_modify_sq(iq->priv->mdev, in, inlen);
320 mlx5e_iq_disable(struct mlx5e_iq *iq)
322 mlx5_core_destroy_sq(iq->priv->mdev, iq->sqn);
326 mlx5e_iq_open(struct mlx5e_channel *c,
327 struct mlx5e_sq_param *sq_param,
328 struct mlx5e_cq_param *cq_param,
333 err = mlx5e_open_cq(c->priv, cq_param, &iq->cq,
334 &mlx5e_iq_completion, c->ix);
338 err = mlx5e_iq_create(c, sq_param, iq);
342 err = mlx5e_iq_enable(iq, sq_param, &c->bfreg, c->priv->tisn[0]);
346 err = mlx5e_iq_modify(iq, MLX5_SQC_STATE_RST, MLX5_SQC_STATE_RDY);
350 WRITE_ONCE(iq->running, 1);
355 mlx5e_iq_disable(iq);
357 mlx5e_iq_destroy(iq);
359 mlx5e_close_cq(&iq->cq);
365 mlx5e_iq_drain(struct mlx5e_iq *iq)
367 struct mlx5_core_dev *mdev = iq->priv->mdev;
370 * Check if already stopped.
372 * NOTE: Serialization of this function is managed by the
373 * caller ensuring the priv's state lock is locked or in case
374 * of rate limit support, a single thread manages drain and
375 * resume of SQs. The "running" variable can therefore safely
376 * be read without any locks.
378 if (READ_ONCE(iq->running) == 0)
381 /* don't put more packets into the SQ */
382 WRITE_ONCE(iq->running, 0);
384 /* wait till SQ is empty or link is down */
386 while (iq->cc != iq->pc &&
387 (iq->priv->media_status_last & IFM_ACTIVE) != 0 &&
388 mdev->state != MLX5_DEVICE_STATE_INTERNAL_ERROR &&
389 pci_channel_offline(mdev->pdev) == 0) {
390 mtx_unlock(&iq->lock);
392 iq->cq.mcq.comp(&iq->cq.mcq, NULL);
395 mtx_unlock(&iq->lock);
397 /* error out remaining requests */
398 (void) mlx5e_iq_modify(iq, MLX5_SQC_STATE_RDY, MLX5_SQC_STATE_ERR);
400 /* wait till SQ is empty */
402 while (iq->cc != iq->pc &&
403 mdev->state != MLX5_DEVICE_STATE_INTERNAL_ERROR &&
404 pci_channel_offline(mdev->pdev) == 0) {
405 mtx_unlock(&iq->lock);
407 iq->cq.mcq.comp(&iq->cq.mcq, NULL);
410 mtx_unlock(&iq->lock);
414 mlx5e_iq_close(struct mlx5e_iq *iq)
417 mlx5e_iq_disable(iq);
418 mlx5e_iq_destroy(iq);
419 mlx5e_close_cq(&iq->cq);
423 mlx5e_iq_static_init(struct mlx5e_iq *iq)
425 mtx_init(&iq->lock, "mlx5iq",
426 MTX_NETWORK_LOCK " IQ", MTX_DEF);
427 mtx_init(&iq->comp_lock, "mlx5iq_comp",
428 MTX_NETWORK_LOCK " IQ COMP", MTX_DEF);
432 mlx5e_iq_static_destroy(struct mlx5e_iq *iq)
434 mtx_destroy(&iq->lock);
435 mtx_destroy(&iq->comp_lock);
439 mlx5e_iq_notify_hw(struct mlx5e_iq *iq)
441 mtx_assert(&iq->lock, MA_OWNED);
443 /* Check if we need to write the doorbell */
444 if (unlikely(iq->db_inhibit != 0 || iq->doorbell.d64 == 0))
447 /* Ensure wqe is visible to device before updating doorbell record */
450 *iq->wq.db = cpu_to_be32(iq->pc);
453 * Ensure the doorbell record is visible to device before ringing
458 mlx5_write64(iq->doorbell.d32, iq->uar_map,
459 MLX5_GET_DOORBELL_LOCK(&iq->priv->doorbell_lock));
461 iq->doorbell.d64 = 0;
465 mlx5e_iq_has_room_for(struct mlx5e_iq *iq, u16 n)
470 return ((iq->wq.sz_m1 & (cc - pc)) >= n || cc == pc);
474 mlx5e_iq_get_producer_index(struct mlx5e_iq *iq)
478 mtx_assert(&iq->lock, MA_OWNED);
480 if (unlikely(iq->running == 0))
482 if (unlikely(!mlx5e_iq_has_room_for(iq, 2 * MLX5_SEND_WQE_MAX_WQEBBS)))
485 /* Align IQ edge with NOPs to avoid WQE wrap around */
486 pi = ((~iq->pc) & iq->wq.sz_m1);
487 if (unlikely(pi < (MLX5_SEND_WQE_MAX_WQEBBS - 1))) {
488 /* Send one multi NOP message instead of many */
489 mlx5e_iq_send_nop(iq, (pi + 1) * MLX5_SEND_WQEBB_NUM_DS);
490 pi = ((~iq->pc) & iq->wq.sz_m1);
491 if (unlikely(pi < (MLX5_SEND_WQE_MAX_WQEBBS - 1)))
494 return (iq->pc & iq->wq.sz_m1);
498 mlx5e_iq_load_memory_cb(void *arg, bus_dma_segment_t *segs,
501 u64 *pdma_address = arg;
503 if (unlikely(error || nseg != 1))
504 panic("mlx5e_iq_load_memory_cb: error=%d nseg=%d", error, nseg);
506 *pdma_address = segs[0].ds_addr;
509 CTASSERT(BUS_DMASYNC_POSTREAD != 0);
510 CTASSERT(BUS_DMASYNC_POSTWRITE != 0);
513 mlx5e_iq_load_memory_single(struct mlx5e_iq *iq, u16 pi, void *buffer, size_t size,
514 u64 *pdma_address, u32 dma_sync)
518 error = bus_dmamap_load(iq->dma_tag, iq->data[pi].dma_map, buffer, size,
519 &mlx5e_iq_load_memory_cb, pdma_address, BUS_DMA_NOWAIT);
521 panic("mlx5e_iq_load_memory: error=%d buffer=%p size=%zd", error, buffer, size);
524 case BUS_DMASYNC_PREREAD:
525 iq->data[pi].dma_sync = BUS_DMASYNC_POSTREAD;
527 case BUS_DMASYNC_PREWRITE:
528 iq->data[pi].dma_sync = BUS_DMASYNC_POSTWRITE;
531 panic("mlx5e_iq_load_memory_single: Invalid DMA sync operation(%d)", dma_sync);
534 /* make sure data in buffer is visible to hardware */
535 bus_dmamap_sync(iq->dma_tag, iq->data[pi].dma_map, dma_sync);