2 * Copyright (c) 2017 Chelsio Communications, Inc.
4 * Written by: Navdeep Parhar <np@FreeBSD.org>
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
9 * 1. Redistributions of source code must retain the above copyright
10 * notice, this list of conditions and the following disclaimer.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in the
13 * documentation and/or other materials provided with the distribution.
15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
16 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
18 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
19 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
21 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
23 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
24 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
28 #include <sys/cdefs.h>
29 __FBSDID("$FreeBSD$");
32 #include "opt_inet6.h"
34 #include <sys/types.h>
35 #include <sys/malloc.h>
36 #include <sys/queue.h>
38 #include <sys/taskqueue.h>
39 #include <sys/sysctl.h>
41 #include "common/common.h"
42 #include "common/t4_regs.h"
43 #include "common/t4_regs_values.h"
44 #include "common/t4_msg.h"
48 in_range(int val, int lo, int hi)
51 return (val < 0 || (val <= hi && val >= lo));
55 set_sched_class_config(struct adapter *sc, int minmax)
62 rc = begin_synchronized_op(sc, NULL, SLEEP_OK | INTR_OK, "t4sscc");
65 rc = -t4_sched_config(sc, FW_SCHED_TYPE_PKTSCHED, minmax, 1);
66 end_synchronized_op(sc, 0);
72 set_sched_class_params(struct adapter *sc, struct t4_sched_class_params *p,
75 int rc, top_speed, fw_level, fw_mode, fw_rateunit, fw_ratemode;
77 struct tx_cl_rl_params *tc, old;
78 bool check_pktsize = false;
80 if (p->level == SCHED_CLASS_LEVEL_CL_RL)
81 fw_level = FW_SCHED_PARAMS_LEVEL_CL_RL;
82 else if (p->level == SCHED_CLASS_LEVEL_CL_WRR)
83 fw_level = FW_SCHED_PARAMS_LEVEL_CL_WRR;
84 else if (p->level == SCHED_CLASS_LEVEL_CH_RL)
85 fw_level = FW_SCHED_PARAMS_LEVEL_CH_RL;
89 if (p->level == SCHED_CLASS_LEVEL_CL_RL) {
90 if (p->mode == SCHED_CLASS_MODE_CLASS)
91 fw_mode = FW_SCHED_PARAMS_MODE_CLASS;
92 else if (p->mode == SCHED_CLASS_MODE_FLOW) {
94 fw_mode = FW_SCHED_PARAMS_MODE_FLOW;
100 /* Valid channel must always be provided. */
103 if (!in_range(p->channel, 0, sc->chip_params->nchan - 1))
106 pi = sc->port[sc->chan_map[p->channel]];
109 MPASS(pi->tx_chan == p->channel);
110 top_speed = port_top_speed(pi) * 1000000; /* Gbps -> Kbps */
112 if (p->level == SCHED_CLASS_LEVEL_CL_RL ||
113 p->level == SCHED_CLASS_LEVEL_CH_RL) {
115 * Valid rate (mode, unit and values) must be provided.
123 if (p->rateunit == SCHED_CLASS_RATEUNIT_BITS) {
124 fw_rateunit = FW_SCHED_PARAMS_UNIT_BITRATE;
125 /* ratemode could be relative (%) or absolute. */
126 if (p->ratemode == SCHED_CLASS_RATEMODE_REL) {
127 fw_ratemode = FW_SCHED_PARAMS_RATE_REL;
128 /* maxrate is % of port bandwidth. */
129 if (!in_range(p->minrate, 0, 100) ||
130 !in_range(p->maxrate, 0, 100)) {
133 } else if (p->ratemode == SCHED_CLASS_RATEMODE_ABS) {
134 fw_ratemode = FW_SCHED_PARAMS_RATE_ABS;
135 /* maxrate is absolute value in kbps. */
136 if (!in_range(p->minrate, 0, top_speed) ||
137 !in_range(p->maxrate, 0, top_speed)) {
142 } else if (p->rateunit == SCHED_CLASS_RATEUNIT_PKTS) {
143 /* maxrate is the absolute value in pps. */
144 check_pktsize = true;
145 fw_rateunit = FW_SCHED_PARAMS_UNIT_PKTRATE;
149 MPASS(p->level == SCHED_CLASS_LEVEL_CL_WRR);
152 * Valid weight must be provided.
156 if (!in_range(p->weight, 1, 99))
163 if (p->level == SCHED_CLASS_LEVEL_CL_RL ||
164 p->level == SCHED_CLASS_LEVEL_CL_WRR) {
166 * Valid scheduling class must be provided.
170 if (!in_range(p->cl, 0, sc->chip_params->nsched_cls - 1))
177 if (!in_range(p->pktsize, 64, pi->vi[0].ifp->if_mtu))
181 if (p->level == SCHED_CLASS_LEVEL_CL_RL) {
182 tc = &pi->sched_params->cl_rl[p->cl];
183 mtx_lock(&sc->tc_lock);
184 if (tc->refcount > 0 || tc->flags & (CLRL_SYNC | CLRL_ASYNC))
187 tc->flags |= CLRL_SYNC | CLRL_USER;
188 tc->ratemode = fw_ratemode;
189 tc->rateunit = fw_rateunit;
191 tc->maxrate = p->maxrate;
192 tc->pktsize = p->pktsize;
196 mtx_unlock(&sc->tc_lock);
201 rc = begin_synchronized_op(sc, NULL,
202 sleep_ok ? (SLEEP_OK | INTR_OK) : HOLD_LOCK, "t4sscp");
204 if (p->level == SCHED_CLASS_LEVEL_CL_RL) {
205 mtx_lock(&sc->tc_lock);
207 mtx_unlock(&sc->tc_lock);
211 rc = -t4_sched_params(sc, FW_SCHED_TYPE_PKTSCHED, fw_level, fw_mode,
212 fw_rateunit, fw_ratemode, p->channel, p->cl, p->minrate, p->maxrate,
213 p->weight, p->pktsize, 0, sleep_ok);
214 end_synchronized_op(sc, sleep_ok ? 0 : LOCK_HELD);
216 if (p->level == SCHED_CLASS_LEVEL_CL_RL) {
217 mtx_lock(&sc->tc_lock);
218 MPASS(tc->flags & CLRL_SYNC);
219 MPASS(tc->flags & CLRL_USER);
220 MPASS(tc->refcount == 0);
222 tc->flags &= ~CLRL_SYNC;
224 tc->flags &= ~CLRL_ERR;
226 tc->flags |= CLRL_ERR;
227 mtx_unlock(&sc->tc_lock);
234 update_tx_sched(void *context, int pending)
237 struct port_info *pi;
238 struct tx_cl_rl_params *tc;
239 struct adapter *sc = context;
240 const int n = sc->chip_params->nsched_cls;
242 mtx_lock(&sc->tc_lock);
243 for_each_port(sc, i) {
245 tc = &pi->sched_params->cl_rl[0];
246 for (j = 0; j < n; j++, tc++) {
247 MPASS(mtx_owned(&sc->tc_lock));
248 if ((tc->flags & CLRL_ASYNC) == 0)
250 mtx_unlock(&sc->tc_lock);
252 if (begin_synchronized_op(sc, NULL, SLEEP_OK | INTR_OK,
254 mtx_lock(&sc->tc_lock);
257 rc = -t4_sched_params(sc, FW_SCHED_TYPE_PKTSCHED,
258 FW_SCHED_PARAMS_LEVEL_CL_RL, tc->mode, tc->rateunit,
259 tc->ratemode, pi->tx_chan, j, 0, tc->maxrate, 0,
260 tc->pktsize, tc->burstsize, 1);
261 end_synchronized_op(sc, 0);
263 mtx_lock(&sc->tc_lock);
264 MPASS(tc->flags & CLRL_ASYNC);
265 tc->flags &= ~CLRL_ASYNC;
267 tc->flags &= ~CLRL_ERR;
269 tc->flags |= CLRL_ERR;
272 mtx_unlock(&sc->tc_lock);
276 t4_set_sched_class(struct adapter *sc, struct t4_sched_params *p)
279 if (p->type != SCHED_CLASS_TYPE_PACKET)
282 if (p->subcmd == SCHED_CLASS_SUBCMD_CONFIG)
283 return (set_sched_class_config(sc, p->u.config.minmax));
285 if (p->subcmd == SCHED_CLASS_SUBCMD_PARAMS)
286 return (set_sched_class_params(sc, &p->u.params, 1));
292 bind_txq_to_traffic_class(struct adapter *sc, struct sge_txq *txq, int idx)
294 struct tx_cl_rl_params *tc0, *tc;
296 uint32_t fw_mnem, fw_class;
298 if (!(txq->eq.flags & EQ_ALLOCATED))
301 mtx_lock(&sc->tc_lock);
302 if (txq->tc_idx == -2) {
303 rc = EBUSY; /* Another bind/unbind in progress already. */
306 if (idx == txq->tc_idx) {
307 rc = 0; /* No change, nothing to do. */
311 tc0 = &sc->port[txq->eq.tx_chan]->sched_params->cl_rl[0];
314 * Bind to a different class at index idx.
317 if (tc->flags & CLRL_ERR) {
322 * Ok to proceed. Place a reference on the new class
323 * while still holding on to the reference on the
324 * previous class, if any.
329 /* Mark as busy before letting go of the lock. */
330 old_idx = txq->tc_idx;
332 mtx_unlock(&sc->tc_lock);
334 rc = begin_synchronized_op(sc, NULL, SLEEP_OK | INTR_OK, "t4btxq");
337 fw_mnem = (V_FW_PARAMS_MNEM(FW_PARAMS_MNEM_DMAQ) |
338 V_FW_PARAMS_PARAM_X(FW_PARAMS_PARAM_DMAQ_EQ_SCHEDCLASS_ETH) |
339 V_FW_PARAMS_PARAM_YZ(txq->eq.cntxt_id));
340 fw_class = idx < 0 ? 0xffffffff : idx;
341 rc = -t4_set_params(sc, sc->mbox, sc->pf, 0, 1, &fw_mnem, &fw_class);
342 end_synchronized_op(sc, 0);
344 mtx_lock(&sc->tc_lock);
345 MPASS(txq->tc_idx == -2);
348 * Unbind, bind, or bind to a different class succeeded. Remove
349 * the reference on the old traffic class, if any.
353 MPASS(tc->refcount > 0);
359 * Unbind, bind, or bind to a different class failed. Remove
360 * the anticipatory reference on the new traffic class, if any.
364 MPASS(tc->refcount > 0);
367 txq->tc_idx = old_idx;
370 MPASS(txq->tc_idx >= -1 && txq->tc_idx < sc->chip_params->nsched_cls);
371 mtx_unlock(&sc->tc_lock);
376 t4_set_sched_queue(struct adapter *sc, struct t4_sched_queue *p)
378 struct port_info *pi = NULL;
383 if (p->port >= sc->params.nports)
387 * XXX: cxgbetool allows the user to specify the physical port only. So
388 * we always operate on the main VI.
390 pi = sc->port[p->port];
393 /* Checking VI_INIT_DONE outside a synch-op is a harmless race here. */
394 if (!(vi->flags & VI_INIT_DONE))
398 if (!in_range(p->queue, 0, vi->ntxq - 1) ||
399 !in_range(p->cl, 0, sc->chip_params->nsched_cls - 1))
404 * Change the scheduling on all the TX queues for the
407 for_each_txq(vi, i, txq) {
408 rc = bind_txq_to_traffic_class(sc, txq, p->cl);
414 * If op.queue is non-negative, then we're only changing the
415 * scheduling on a single specified TX queue.
417 txq = &sc->sge.txq[vi->first_txq + p->queue];
418 rc = bind_txq_to_traffic_class(sc, txq, p->cl);
425 t4_init_tx_sched(struct adapter *sc)
428 const int n = sc->chip_params->nsched_cls;
429 struct port_info *pi;
430 struct tx_cl_rl_params *tc;
432 mtx_init(&sc->tc_lock, "tx_sched lock", NULL, MTX_DEF);
433 TASK_INIT(&sc->tc_task, 0, update_tx_sched, sc);
434 for_each_port(sc, i) {
436 pi->sched_params = malloc(sizeof(*pi->sched_params) +
437 n * sizeof(*tc), M_CXGBE, M_ZERO | M_WAITOK);
438 tc = &pi->sched_params->cl_rl[0];
439 for (j = 0; j < n; j++, tc++) {
441 tc->ratemode = FW_SCHED_PARAMS_RATE_ABS;
442 tc->rateunit = FW_SCHED_PARAMS_UNIT_BITRATE;
443 tc->mode = FW_SCHED_PARAMS_MODE_CLASS;
444 tc->maxrate = 1000 * 1000; /* 1 Gbps. Arbitrary */
446 if (t4_sched_params_cl_rl_kbps(sc, pi->tx_chan, j,
447 tc->mode, tc->maxrate, tc->pktsize, 1) != 0)
448 tc->flags = CLRL_ERR;
456 t4_free_tx_sched(struct adapter *sc)
460 taskqueue_drain(taskqueue_thread, &sc->tc_task);
462 for_each_port(sc, i) {
463 if (sc->port[i] != NULL)
464 free(sc->port[i]->sched_params, M_CXGBE);
467 if (mtx_initialized(&sc->tc_lock))
468 mtx_destroy(&sc->tc_lock);
474 t4_update_tx_sched(struct adapter *sc)
477 taskqueue_enqueue(taskqueue_thread, &sc->tc_task);
481 t4_reserve_cl_rl_kbps(struct adapter *sc, int port_id, u_int maxrate,
484 int rc = 0, fa = -1, i, pktsize, burstsize;
486 struct tx_cl_rl_params *tc;
487 struct port_info *pi;
489 MPASS(port_id >= 0 && port_id < sc->params.nports);
491 pi = sc->port[port_id];
492 if (pi->sched_params->pktsize > 0)
493 pktsize = pi->sched_params->pktsize;
495 pktsize = pi->vi[0].ifp->if_mtu;
496 if (pi->sched_params->burstsize > 0)
497 burstsize = pi->sched_params->burstsize;
499 burstsize = pktsize * 4;
500 tc = &pi->sched_params->cl_rl[0];
503 mtx_lock(&sc->tc_lock);
504 for (i = 0; i < sc->chip_params->nsched_cls; i++, tc++) {
505 if (fa < 0 && tc->refcount == 0 && !(tc->flags & CLRL_USER))
506 fa = i; /* first available */
508 if (tc->ratemode == FW_SCHED_PARAMS_RATE_ABS &&
509 tc->rateunit == FW_SCHED_PARAMS_UNIT_BITRATE &&
510 tc->mode == FW_SCHED_PARAMS_MODE_FLOW &&
511 tc->maxrate == maxrate && tc->pktsize == pktsize &&
512 tc->burstsize == burstsize) {
515 if ((tc->flags & (CLRL_ERR | CLRL_ASYNC | CLRL_SYNC)) ==
523 MPASS(i == sc->chip_params->nsched_cls);
525 tc = &pi->sched_params->cl_rl[fa];
527 tc->ratemode = FW_SCHED_PARAMS_RATE_ABS;
528 tc->rateunit = FW_SCHED_PARAMS_UNIT_BITRATE;
529 tc->mode = FW_SCHED_PARAMS_MODE_FLOW;
530 tc->maxrate = maxrate;
531 tc->pktsize = pktsize;
532 tc->burstsize = burstsize;
540 mtx_unlock(&sc->tc_lock);
542 tc->flags |= CLRL_ASYNC;
543 t4_update_tx_sched(sc);
549 t4_release_cl_rl(struct adapter *sc, int port_id, int tc_idx)
551 struct tx_cl_rl_params *tc;
553 MPASS(port_id >= 0 && port_id < sc->params.nports);
554 MPASS(tc_idx >= 0 && tc_idx < sc->chip_params->nsched_cls);
556 mtx_lock(&sc->tc_lock);
557 tc = &sc->port[port_id]->sched_params->cl_rl[tc_idx];
558 MPASS(tc->refcount > 0);
560 mtx_unlock(&sc->tc_lock);
564 sysctl_tc(SYSCTL_HANDLER_ARGS)
566 struct vi_info *vi = arg1;
567 struct port_info *pi;
570 int qidx = arg2, rc, tc_idx;
572 MPASS(qidx >= 0 && qidx < vi->ntxq);
575 txq = &sc->sge.txq[vi->first_txq + qidx];
577 tc_idx = txq->tc_idx;
578 rc = sysctl_handle_int(oidp, &tc_idx, 0, req);
579 if (rc != 0 || req->newptr == NULL)
582 if (sc->flags & IS_VF)
584 if (!in_range(tc_idx, 0, sc->chip_params->nsched_cls - 1))
587 return (bind_txq_to_traffic_class(sc, txq, tc_idx));
591 sysctl_tc_params(SYSCTL_HANDLER_ARGS)
593 struct adapter *sc = arg1;
594 struct tx_cl_rl_params tc;
596 int i, rc, port_id, mbps, gbps;
598 rc = sysctl_wire_old_buffer(req, 0);
602 sb = sbuf_new_for_sysctl(NULL, NULL, 4096, req);
606 port_id = arg2 >> 16;
607 MPASS(port_id < sc->params.nports);
608 MPASS(sc->port[port_id] != NULL);
610 MPASS(i < sc->chip_params->nsched_cls);
612 mtx_lock(&sc->tc_lock);
613 tc = sc->port[port_id]->sched_params->cl_rl[i];
614 mtx_unlock(&sc->tc_lock);
616 switch (tc.rateunit) {
617 case SCHED_CLASS_RATEUNIT_BITS:
618 switch (tc.ratemode) {
619 case SCHED_CLASS_RATEMODE_REL:
620 /* XXX: top speed or actual link speed? */
621 gbps = port_top_speed(sc->port[port_id]);
622 sbuf_printf(sb, "%u%% of %uGbps", tc.maxrate, gbps);
624 case SCHED_CLASS_RATEMODE_ABS:
625 mbps = tc.maxrate / 1000;
626 gbps = tc.maxrate / 1000000;
627 if (tc.maxrate == gbps * 1000000)
628 sbuf_printf(sb, "%uGbps", gbps);
629 else if (tc.maxrate == mbps * 1000)
630 sbuf_printf(sb, "%uMbps", mbps);
632 sbuf_printf(sb, "%uKbps", tc.maxrate);
639 case SCHED_CLASS_RATEUNIT_PKTS:
640 sbuf_printf(sb, "%upps", tc.maxrate);
648 case SCHED_CLASS_MODE_CLASS:
649 sbuf_printf(sb, " aggregate");
651 case SCHED_CLASS_MODE_FLOW:
652 sbuf_printf(sb, " per-flow");
654 sbuf_printf(sb, " pkt-size %u", tc.pktsize);
655 if (tc.burstsize > 0)
656 sbuf_printf(sb, " burst-size %u", tc.burstsize);
665 rc = sbuf_finish(sb);