2 * Copyright (c) 2017 Chelsio Communications, Inc.
4 * Written by: Navdeep Parhar <np@FreeBSD.org>
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
9 * 1. Redistributions of source code must retain the above copyright
10 * notice, this list of conditions and the following disclaimer.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in the
13 * documentation and/or other materials provided with the distribution.
15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
16 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
18 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
19 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
21 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
23 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
24 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
28 #include <sys/cdefs.h>
30 #include "opt_inet6.h"
31 #include "opt_ratelimit.h"
33 #include <sys/types.h>
34 #include <sys/malloc.h>
35 #include <sys/queue.h>
37 #include <sys/taskqueue.h>
38 #include <sys/sysctl.h>
40 #include "common/common.h"
41 #include "common/t4_regs.h"
42 #include "common/t4_regs_values.h"
43 #include "common/t4_msg.h"
46 in_range(int val, int lo, int hi)
49 return (val < 0 || (val <= hi && val >= lo));
53 set_sched_class_config(struct adapter *sc, int minmax)
60 rc = begin_synchronized_op(sc, NULL, SLEEP_OK | INTR_OK, "t4sscc");
63 if (hw_off_limits(sc))
66 rc = -t4_sched_config(sc, FW_SCHED_TYPE_PKTSCHED, minmax, 1);
67 end_synchronized_op(sc, 0);
73 set_sched_class_params(struct adapter *sc, struct t4_sched_class_params *p,
76 int rc, top_speed, fw_level, fw_mode, fw_rateunit, fw_ratemode;
78 struct tx_cl_rl_params *tc, old;
79 bool check_pktsize = false;
81 if (p->level == SCHED_CLASS_LEVEL_CL_RL)
82 fw_level = FW_SCHED_PARAMS_LEVEL_CL_RL;
83 else if (p->level == SCHED_CLASS_LEVEL_CL_WRR)
84 fw_level = FW_SCHED_PARAMS_LEVEL_CL_WRR;
85 else if (p->level == SCHED_CLASS_LEVEL_CH_RL)
86 fw_level = FW_SCHED_PARAMS_LEVEL_CH_RL;
90 if (p->level == SCHED_CLASS_LEVEL_CL_RL) {
91 if (p->mode == SCHED_CLASS_MODE_CLASS)
92 fw_mode = FW_SCHED_PARAMS_MODE_CLASS;
93 else if (p->mode == SCHED_CLASS_MODE_FLOW) {
95 fw_mode = FW_SCHED_PARAMS_MODE_FLOW;
101 /* Valid channel must always be provided. */
104 if (!in_range(p->channel, 0, sc->chip_params->nchan - 1))
107 pi = sc->port[sc->chan_map[p->channel]];
110 MPASS(pi->tx_chan == p->channel);
111 top_speed = port_top_speed(pi) * 1000000; /* Gbps -> Kbps */
113 if (p->level == SCHED_CLASS_LEVEL_CL_RL ||
114 p->level == SCHED_CLASS_LEVEL_CH_RL) {
116 * Valid rate (mode, unit and values) must be provided.
124 if (p->rateunit == SCHED_CLASS_RATEUNIT_BITS) {
125 fw_rateunit = FW_SCHED_PARAMS_UNIT_BITRATE;
126 /* ratemode could be relative (%) or absolute. */
127 if (p->ratemode == SCHED_CLASS_RATEMODE_REL) {
128 fw_ratemode = FW_SCHED_PARAMS_RATE_REL;
129 /* maxrate is % of port bandwidth. */
130 if (!in_range(p->minrate, 0, 100) ||
131 !in_range(p->maxrate, 0, 100)) {
134 } else if (p->ratemode == SCHED_CLASS_RATEMODE_ABS) {
135 fw_ratemode = FW_SCHED_PARAMS_RATE_ABS;
136 /* maxrate is absolute value in kbps. */
137 if (!in_range(p->minrate, 0, top_speed) ||
138 !in_range(p->maxrate, 0, top_speed)) {
143 } else if (p->rateunit == SCHED_CLASS_RATEUNIT_PKTS) {
144 /* maxrate is the absolute value in pps. */
145 check_pktsize = true;
146 fw_rateunit = FW_SCHED_PARAMS_UNIT_PKTRATE;
150 MPASS(p->level == SCHED_CLASS_LEVEL_CL_WRR);
153 * Valid weight must be provided.
157 if (!in_range(p->weight, 1, 99))
164 if (p->level == SCHED_CLASS_LEVEL_CL_RL ||
165 p->level == SCHED_CLASS_LEVEL_CL_WRR) {
167 * Valid scheduling class must be provided.
171 if (!in_range(p->cl, 0, sc->params.nsched_cls - 1))
178 if (!in_range(p->pktsize, 64, if_getmtu(pi->vi[0].ifp)))
182 if (p->level == SCHED_CLASS_LEVEL_CL_RL) {
183 tc = &pi->sched_params->cl_rl[p->cl];
184 mtx_lock(&sc->tc_lock);
185 if (tc->refcount > 0 || tc->state == CS_HW_UPDATE_IN_PROGRESS)
190 tc->flags |= CF_USER;
191 tc->state = CS_HW_UPDATE_IN_PROGRESS;
192 tc->ratemode = fw_ratemode;
193 tc->rateunit = fw_rateunit;
195 tc->maxrate = p->maxrate;
196 tc->pktsize = p->pktsize;
199 mtx_unlock(&sc->tc_lock);
204 rc = begin_synchronized_op(sc, NULL,
205 sleep_ok ? (SLEEP_OK | INTR_OK) : HOLD_LOCK, "t4sscp");
207 if (p->level == SCHED_CLASS_LEVEL_CL_RL) {
208 mtx_lock(&sc->tc_lock);
209 MPASS(tc->refcount == 0);
210 MPASS(tc->flags & CF_USER);
211 MPASS(tc->state == CS_HW_UPDATE_IN_PROGRESS);
213 mtx_unlock(&sc->tc_lock);
217 if (!hw_off_limits(sc)) {
218 rc = -t4_sched_params(sc, FW_SCHED_TYPE_PKTSCHED, fw_level,
219 fw_mode, fw_rateunit, fw_ratemode, p->channel, p->cl,
220 p->minrate, p->maxrate, p->weight, p->pktsize, 0, sleep_ok);
222 end_synchronized_op(sc, sleep_ok ? 0 : LOCK_HELD);
224 if (p->level == SCHED_CLASS_LEVEL_CL_RL) {
225 mtx_lock(&sc->tc_lock);
226 MPASS(tc->refcount == 0);
227 MPASS(tc->flags & CF_USER);
228 MPASS(tc->state == CS_HW_UPDATE_IN_PROGRESS);
231 tc->state = CS_HW_CONFIGURED;
233 /* parameters failed so we don't park at params_set */
234 tc->state = CS_UNINITIALIZED;
235 tc->flags &= ~CF_USER;
236 CH_ERR(pi, "failed to configure traffic class %d: %d. "
237 "params: mode %d, rateunit %d, ratemode %d, "
238 "channel %d, minrate %d, maxrate %d, pktsize %d, "
239 "burstsize %d\n", p->cl, rc, fw_mode, fw_rateunit,
240 fw_ratemode, p->channel, p->minrate, p->maxrate,
243 mtx_unlock(&sc->tc_lock);
250 update_tx_sched(void *context, int pending)
253 struct port_info *pi;
254 struct tx_cl_rl_params *tc;
255 struct adapter *sc = context;
256 const int n = sc->params.nsched_cls;
258 mtx_lock(&sc->tc_lock);
259 for_each_port(sc, i) {
261 tc = &pi->sched_params->cl_rl[0];
262 for (j = 0; j < n; j++, tc++) {
263 MPASS(mtx_owned(&sc->tc_lock));
264 if (tc->state != CS_HW_UPDATE_REQUESTED)
266 mtx_unlock(&sc->tc_lock);
268 if (begin_synchronized_op(sc, NULL, SLEEP_OK | INTR_OK,
270 mtx_lock(&sc->tc_lock);
273 rc = -t4_sched_params(sc, FW_SCHED_TYPE_PKTSCHED,
274 FW_SCHED_PARAMS_LEVEL_CL_RL, tc->mode, tc->rateunit,
275 tc->ratemode, pi->tx_chan, j, 0, tc->maxrate, 0,
276 tc->pktsize, tc->burstsize, 1);
277 end_synchronized_op(sc, 0);
279 mtx_lock(&sc->tc_lock);
280 MPASS(tc->state == CS_HW_UPDATE_REQUESTED);
282 tc->state = CS_HW_CONFIGURED;
285 /* parameters failed so we try to avoid params_set */
286 if (tc->refcount > 0)
287 tc->state = CS_PARAMS_SET;
289 tc->state = CS_UNINITIALIZED;
290 CH_ERR(pi, "failed to configure traffic class %d: %d. "
291 "params: mode %d, rateunit %d, ratemode %d, "
292 "channel %d, minrate %d, maxrate %d, pktsize %d, "
293 "burstsize %d\n", j, rc, tc->mode, tc->rateunit,
294 tc->ratemode, pi->tx_chan, 0, tc->maxrate,
295 tc->pktsize, tc->burstsize);
298 mtx_unlock(&sc->tc_lock);
302 t4_set_sched_class(struct adapter *sc, struct t4_sched_params *p)
305 if (p->type != SCHED_CLASS_TYPE_PACKET)
308 if (p->subcmd == SCHED_CLASS_SUBCMD_CONFIG)
309 return (set_sched_class_config(sc, p->u.config.minmax));
311 if (p->subcmd == SCHED_CLASS_SUBCMD_PARAMS)
312 return (set_sched_class_params(sc, &p->u.params, 1));
318 bind_txq_to_traffic_class(struct adapter *sc, struct sge_txq *txq, int idx)
320 struct tx_cl_rl_params *tc0, *tc;
322 uint32_t fw_mnem, fw_class;
324 if (!(txq->eq.flags & EQ_HW_ALLOCATED))
327 mtx_lock(&sc->tc_lock);
328 if (txq->tc_idx == -2) {
329 rc = EBUSY; /* Another bind/unbind in progress already. */
332 if (idx == txq->tc_idx) {
333 rc = 0; /* No change, nothing to do. */
337 tc0 = &sc->port[txq->eq.tx_chan]->sched_params->cl_rl[0];
340 * Bind to a different class at index idx.
343 if (tc->state != CS_HW_CONFIGURED) {
348 * Ok to proceed. Place a reference on the new class
349 * while still holding on to the reference on the
350 * previous class, if any.
355 /* Mark as busy before letting go of the lock. */
356 old_idx = txq->tc_idx;
358 mtx_unlock(&sc->tc_lock);
360 rc = begin_synchronized_op(sc, NULL, SLEEP_OK | INTR_OK, "t4btxq");
362 fw_mnem = (V_FW_PARAMS_MNEM(FW_PARAMS_MNEM_DMAQ) |
363 V_FW_PARAMS_PARAM_X(FW_PARAMS_PARAM_DMAQ_EQ_SCHEDCLASS_ETH) |
364 V_FW_PARAMS_PARAM_YZ(txq->eq.cntxt_id));
365 fw_class = idx < 0 ? 0xffffffff : idx;
366 rc = -t4_set_params(sc, sc->mbox, sc->pf, 0, 1, &fw_mnem,
368 end_synchronized_op(sc, 0);
371 mtx_lock(&sc->tc_lock);
372 MPASS(txq->tc_idx == -2);
375 * Unbind, bind, or bind to a different class succeeded. Remove
376 * the reference on the old traffic class, if any.
380 MPASS(tc->refcount > 0);
386 * Unbind, bind, or bind to a different class failed. Remove
387 * the anticipatory reference on the new traffic class, if any.
391 MPASS(tc->refcount > 0);
394 txq->tc_idx = old_idx;
397 MPASS(txq->tc_idx >= -1 && txq->tc_idx < sc->params.nsched_cls);
398 mtx_unlock(&sc->tc_lock);
403 t4_set_sched_queue(struct adapter *sc, struct t4_sched_queue *p)
405 struct port_info *pi = NULL;
410 if (p->port >= sc->params.nports)
414 * XXX: cxgbetool allows the user to specify the physical port only. So
415 * we always operate on the main VI.
417 pi = sc->port[p->port];
420 /* Checking VI_INIT_DONE outside a synch-op is a harmless race here. */
421 if (!(vi->flags & VI_INIT_DONE))
425 if (!in_range(p->queue, 0, vi->ntxq - 1) ||
426 !in_range(p->cl, 0, sc->params.nsched_cls - 1))
431 * Change the scheduling on all the TX queues for the
434 for_each_txq(vi, i, txq) {
435 rc = bind_txq_to_traffic_class(sc, txq, p->cl);
441 * If op.queue is non-negative, then we're only changing the
442 * scheduling on a single specified TX queue.
444 txq = &sc->sge.txq[vi->first_txq + p->queue];
445 rc = bind_txq_to_traffic_class(sc, txq, p->cl);
452 t4_init_tx_sched(struct adapter *sc)
455 const int n = sc->params.nsched_cls;
456 struct port_info *pi;
458 mtx_init(&sc->tc_lock, "tx_sched lock", NULL, MTX_DEF);
459 TASK_INIT(&sc->tc_task, 0, update_tx_sched, sc);
460 for_each_port(sc, i) {
462 pi->sched_params = malloc(sizeof(*pi->sched_params) +
463 n * sizeof(struct tx_cl_rl_params), M_CXGBE, M_ZERO | M_WAITOK);
470 t4_free_tx_sched(struct adapter *sc)
474 taskqueue_drain(taskqueue_thread, &sc->tc_task);
476 for_each_port(sc, i) {
477 if (sc->port[i] != NULL)
478 free(sc->port[i]->sched_params, M_CXGBE);
481 if (mtx_initialized(&sc->tc_lock))
482 mtx_destroy(&sc->tc_lock);
488 t4_update_tx_sched(struct adapter *sc)
491 taskqueue_enqueue(taskqueue_thread, &sc->tc_task);
495 t4_reserve_cl_rl_kbps(struct adapter *sc, int port_id, u_int maxrate,
498 int rc = 0, fa, fa2, i, pktsize, burstsize;
500 struct tx_cl_rl_params *tc;
501 struct port_info *pi;
503 MPASS(port_id >= 0 && port_id < sc->params.nports);
505 pi = sc->port[port_id];
506 if (pi->sched_params->pktsize > 0)
507 pktsize = pi->sched_params->pktsize;
509 pktsize = if_getmtu(pi->vi[0].ifp);
510 if (pi->sched_params->burstsize > 0)
511 burstsize = pi->sched_params->burstsize;
513 burstsize = pktsize * 4;
514 tc = &pi->sched_params->cl_rl[0];
518 mtx_lock(&sc->tc_lock);
519 for (i = 0; i < sc->params.nsched_cls; i++, tc++) {
520 if (tc->state >= CS_PARAMS_SET &&
521 tc->ratemode == FW_SCHED_PARAMS_RATE_ABS &&
522 tc->rateunit == FW_SCHED_PARAMS_UNIT_BITRATE &&
523 tc->mode == FW_SCHED_PARAMS_MODE_FLOW &&
524 tc->maxrate == maxrate && tc->pktsize == pktsize &&
525 tc->burstsize == burstsize) {
528 if (tc->state == CS_PARAMS_SET) {
529 tc->state = CS_HW_UPDATE_REQUESTED;
535 if (fa < 0 && tc->state == CS_UNINITIALIZED) {
536 MPASS(tc->refcount == 0);
537 fa = i; /* first available, never used. */
539 if (fa2 < 0 && tc->refcount == 0 && !(tc->flags & CF_USER)) {
540 fa2 = i; /* first available, used previously. */
544 MPASS(i == sc->params.nsched_cls);
551 MPASS(fa >= 0 && fa < sc->params.nsched_cls);
552 tc = &pi->sched_params->cl_rl[fa];
553 MPASS(!(tc->flags & CF_USER));
554 MPASS(tc->refcount == 0);
557 tc->state = CS_HW_UPDATE_REQUESTED;
558 tc->ratemode = FW_SCHED_PARAMS_RATE_ABS;
559 tc->rateunit = FW_SCHED_PARAMS_UNIT_BITRATE;
560 tc->mode = FW_SCHED_PARAMS_MODE_FLOW;
561 tc->maxrate = maxrate;
562 tc->pktsize = pktsize;
563 tc->burstsize = burstsize;
568 mtx_unlock(&sc->tc_lock);
570 t4_update_tx_sched(sc);
575 t4_release_cl_rl(struct adapter *sc, int port_id, int tc_idx)
577 struct tx_cl_rl_params *tc;
579 MPASS(port_id >= 0 && port_id < sc->params.nports);
580 MPASS(tc_idx >= 0 && tc_idx < sc->params.nsched_cls);
582 mtx_lock(&sc->tc_lock);
583 tc = &sc->port[port_id]->sched_params->cl_rl[tc_idx];
584 MPASS(tc->refcount > 0);
586 mtx_unlock(&sc->tc_lock);
590 sysctl_tc(SYSCTL_HANDLER_ARGS)
592 struct vi_info *vi = arg1;
593 struct adapter *sc = vi->adapter;
595 int qidx = arg2, rc, tc_idx;
597 MPASS(qidx >= vi->first_txq && qidx < vi->first_txq + vi->ntxq);
599 txq = &sc->sge.txq[qidx];
600 tc_idx = txq->tc_idx;
601 rc = sysctl_handle_int(oidp, &tc_idx, 0, req);
602 if (rc != 0 || req->newptr == NULL)
605 if (sc->flags & IS_VF)
607 if (!in_range(tc_idx, 0, sc->params.nsched_cls - 1))
610 return (bind_txq_to_traffic_class(sc, txq, tc_idx));
614 sysctl_tc_params(SYSCTL_HANDLER_ARGS)
616 struct adapter *sc = arg1;
617 struct tx_cl_rl_params tc;
619 int i, rc, port_id, mbps, gbps;
621 rc = sysctl_wire_old_buffer(req, 0);
625 sb = sbuf_new_for_sysctl(NULL, NULL, 4096, req);
629 port_id = arg2 >> 16;
630 MPASS(port_id < sc->params.nports);
631 MPASS(sc->port[port_id] != NULL);
633 MPASS(i < sc->params.nsched_cls);
635 mtx_lock(&sc->tc_lock);
636 tc = sc->port[port_id]->sched_params->cl_rl[i];
637 mtx_unlock(&sc->tc_lock);
639 if (tc.state < CS_PARAMS_SET) {
640 sbuf_printf(sb, "uninitialized");
644 switch (tc.rateunit) {
645 case SCHED_CLASS_RATEUNIT_BITS:
646 switch (tc.ratemode) {
647 case SCHED_CLASS_RATEMODE_REL:
648 /* XXX: top speed or actual link speed? */
649 gbps = port_top_speed(sc->port[port_id]);
650 sbuf_printf(sb, "%u%% of %uGbps", tc.maxrate, gbps);
652 case SCHED_CLASS_RATEMODE_ABS:
653 mbps = tc.maxrate / 1000;
654 gbps = tc.maxrate / 1000000;
655 if (tc.maxrate == gbps * 1000000)
656 sbuf_printf(sb, "%uGbps", gbps);
657 else if (tc.maxrate == mbps * 1000)
658 sbuf_printf(sb, "%uMbps", mbps);
660 sbuf_printf(sb, "%uKbps", tc.maxrate);
667 case SCHED_CLASS_RATEUNIT_PKTS:
668 sbuf_printf(sb, "%upps", tc.maxrate);
676 case SCHED_CLASS_MODE_CLASS:
677 /* Note that pktsize and burstsize are not used in this mode. */
678 sbuf_printf(sb, " aggregate");
680 case SCHED_CLASS_MODE_FLOW:
681 sbuf_printf(sb, " per-flow");
683 sbuf_printf(sb, " pkt-size %u", tc.pktsize);
684 if (tc.burstsize > 0)
685 sbuf_printf(sb, " burst-size %u", tc.burstsize);
694 rc = sbuf_finish(sb);
702 t4_init_etid_table(struct adapter *sc)
707 if (!is_ethoffload(sc))
711 MPASS(t->netids > 0);
713 mtx_init(&t->etid_lock, "etid lock", NULL, MTX_DEF);
714 t->etid_tab = malloc(sizeof(*t->etid_tab) * t->netids, M_CXGBE,
716 t->efree = t->etid_tab;
718 for (i = 1; i < t->netids; i++)
719 t->etid_tab[i - 1].next = &t->etid_tab[i];
720 t->etid_tab[t->netids - 1].next = NULL;
724 t4_free_etid_table(struct adapter *sc)
728 if (!is_ethoffload(sc))
732 MPASS(t->netids > 0);
734 free(t->etid_tab, M_CXGBE);
737 if (mtx_initialized(&t->etid_lock))
738 mtx_destroy(&t->etid_lock);
742 static int alloc_etid(struct adapter *, struct cxgbe_rate_tag *);
743 static void free_etid(struct adapter *, int);
746 alloc_etid(struct adapter *sc, struct cxgbe_rate_tag *cst)
748 struct tid_info *t = &sc->tids;
751 mtx_lock(&t->etid_lock);
753 union etid_entry *p = t->efree;
755 etid = p - t->etid_tab + t->etid_base;
760 mtx_unlock(&t->etid_lock);
764 struct cxgbe_rate_tag *
765 lookup_etid(struct adapter *sc, int etid)
767 struct tid_info *t = &sc->tids;
769 return (t->etid_tab[etid - t->etid_base].cst);
773 free_etid(struct adapter *sc, int etid)
775 struct tid_info *t = &sc->tids;
776 union etid_entry *p = &t->etid_tab[etid - t->etid_base];
778 mtx_lock(&t->etid_lock);
782 mtx_unlock(&t->etid_lock);
785 static int cxgbe_rate_tag_modify(struct m_snd_tag *,
786 union if_snd_tag_modify_params *);
787 static int cxgbe_rate_tag_query(struct m_snd_tag *,
788 union if_snd_tag_query_params *);
789 static void cxgbe_rate_tag_free(struct m_snd_tag *);
791 static const struct if_snd_tag_sw cxgbe_rate_tag_sw = {
792 .snd_tag_modify = cxgbe_rate_tag_modify,
793 .snd_tag_query = cxgbe_rate_tag_query,
794 .snd_tag_free = cxgbe_rate_tag_free,
795 .type = IF_SND_TAG_TYPE_RATE_LIMIT
799 cxgbe_rate_tag_alloc(if_t ifp, union if_snd_tag_alloc_params *params,
800 struct m_snd_tag **pt)
803 struct vi_info *vi = if_getsoftc(ifp);
804 struct port_info *pi = vi->pi;
805 struct adapter *sc = pi->adapter;
806 struct cxgbe_rate_tag *cst;
808 MPASS(params->hdr.type == IF_SND_TAG_TYPE_RATE_LIMIT);
810 rc = t4_reserve_cl_rl_kbps(sc, pi->port_id,
811 (params->rate_limit.max_rate * 8ULL / 1000), &schedcl);
814 MPASS(schedcl >= 0 && schedcl < sc->params.nsched_cls);
816 cst = malloc(sizeof(*cst), M_CXGBE, M_ZERO | M_NOWAIT);
819 t4_release_cl_rl(sc, pi->port_id, schedcl);
823 cst->etid = alloc_etid(sc, cst);
829 mtx_init(&cst->lock, "cst_lock", NULL, MTX_DEF);
830 mbufq_init(&cst->pending_tx, INT_MAX);
831 mbufq_init(&cst->pending_fwack, INT_MAX);
832 m_snd_tag_init(&cst->com, ifp, &cxgbe_rate_tag_sw);
833 cst->flags |= EO_FLOWC_PENDING | EO_SND_TAG_REF;
835 cst->port_id = pi->port_id;
836 cst->schedcl = schedcl;
837 cst->max_rate = params->rate_limit.max_rate;
838 cst->tx_credits = sc->params.eo_wr_cred;
839 cst->tx_total = cst->tx_credits;
841 cst->ctrl0 = htobe32(V_TXPKT_OPCODE(CPL_TX_PKT_XT) |
842 V_TXPKT_INTF(pi->tx_chan) | V_TXPKT_PF(sc->pf) |
843 V_TXPKT_VF(vi->vin) | V_TXPKT_VF_VLD(vi->vfvld));
846 * Queues will be selected later when the connection flowid is available.
854 * Change in parameters, no change in ifp.
857 cxgbe_rate_tag_modify(struct m_snd_tag *mst,
858 union if_snd_tag_modify_params *params)
861 struct cxgbe_rate_tag *cst = mst_to_crt(mst);
862 struct adapter *sc = cst->adapter;
864 /* XXX: is schedcl -1 ok here? */
865 MPASS(cst->schedcl >= 0 && cst->schedcl < sc->params.nsched_cls);
867 mtx_lock(&cst->lock);
868 MPASS(cst->flags & EO_SND_TAG_REF);
869 rc = t4_reserve_cl_rl_kbps(sc, cst->port_id,
870 (params->rate_limit.max_rate * 8ULL / 1000), &schedcl);
873 MPASS(schedcl >= 0 && schedcl < sc->params.nsched_cls);
874 t4_release_cl_rl(sc, cst->port_id, cst->schedcl);
875 cst->schedcl = schedcl;
876 cst->max_rate = params->rate_limit.max_rate;
877 mtx_unlock(&cst->lock);
883 cxgbe_rate_tag_query(struct m_snd_tag *mst,
884 union if_snd_tag_query_params *params)
886 struct cxgbe_rate_tag *cst = mst_to_crt(mst);
888 params->rate_limit.max_rate = cst->max_rate;
890 #define CST_TO_MST_QLEVEL_SCALE (IF_SND_QUEUE_LEVEL_MAX / cst->tx_total)
891 params->rate_limit.queue_level =
892 (cst->tx_total - cst->tx_credits) * CST_TO_MST_QLEVEL_SCALE;
898 * Unlocks cst and frees it.
901 cxgbe_rate_tag_free_locked(struct cxgbe_rate_tag *cst)
903 struct adapter *sc = cst->adapter;
905 mtx_assert(&cst->lock, MA_OWNED);
906 MPASS((cst->flags & EO_SND_TAG_REF) == 0);
907 MPASS(cst->tx_credits == cst->tx_total);
908 MPASS(cst->plen == 0);
909 MPASS(mbufq_first(&cst->pending_tx) == NULL);
910 MPASS(mbufq_first(&cst->pending_fwack) == NULL);
913 free_etid(sc, cst->etid);
914 if (cst->schedcl != -1)
915 t4_release_cl_rl(sc, cst->port_id, cst->schedcl);
916 mtx_unlock(&cst->lock);
917 mtx_destroy(&cst->lock);
922 cxgbe_rate_tag_free(struct m_snd_tag *mst)
924 struct cxgbe_rate_tag *cst = mst_to_crt(mst);
926 mtx_lock(&cst->lock);
928 /* The kernel is done with the snd_tag. Remove its reference. */
929 MPASS(cst->flags & EO_SND_TAG_REF);
930 cst->flags &= ~EO_SND_TAG_REF;
932 if (cst->ncompl == 0) {
934 * No fw4_ack in flight. Free the tag right away if there are
935 * no outstanding credits. Request the firmware to return all
936 * credits for the etid otherwise.
938 if (cst->tx_credits == cst->tx_total) {
939 cxgbe_rate_tag_free_locked(cst);
940 return; /* cst is gone. */
942 send_etid_flush_wr(cst);
944 mtx_unlock(&cst->lock);
948 cxgbe_ratelimit_query(if_t ifp, struct if_ratelimit_query_results *q)
950 struct vi_info *vi = if_getsoftc(ifp);
951 struct adapter *sc = vi->adapter;
953 q->rate_table = NULL;
954 q->flags = RT_IS_SELECTABLE;
956 * Absolute max limits from the firmware configuration. Practical
957 * limits depend on the burstsize, pktsize (if_getmtu(ifp) ultimately) and
960 q->max_flows = sc->tids.netids;
961 q->number_of_rates = sc->params.nsched_cls;
962 q->min_segment_burst = 4; /* matches PKTSCHED_BURST in the firmware. */
965 if (chip_id(sc) < CHELSIO_T6) {
966 /* Based on testing by rrs@ with a T580 at burstsize = 4. */
967 MPASS(q->min_segment_burst == 4);
968 q->max_flows = min(4000, q->max_flows);
970 /* XXX: TBD, carried forward from T5 for now. */
971 q->max_flows = min(4000, q->max_flows);
975 * XXX: tcp_ratelimit.c grabs all available rates on link-up before it
976 * even knows whether hw pacing will be used or not. This prevents
977 * other consumers like SO_MAX_PACING_RATE or those using cxgbetool or
978 * the private ioctls from using any of traffic classes.
980 * Underreport the number of rates to tcp_ratelimit so that it doesn't
981 * hog all of them. This can be removed if/when tcp_ratelimit switches
982 * to making its allocations on first-use rather than link-up. There is
983 * nothing wrong with one particular consumer reserving all the classes
984 * but it should do so only if it'll actually use hw rate limiting.
986 q->number_of_rates /= 4;