1 /* $OpenBSD: pfctl_altq.c,v 1.93 2007/10/15 02:16:35 deraadt Exp $ */
5 * Sony Computer Science Laboratories Inc.
6 * Copyright (c) 2002, 2003 Henning Brauer <henning@openbsd.org>
8 * Permission to use, copy, modify, and distribute this software for any
9 * purpose with or without fee is hereby granted, provided that the above
10 * copyright notice and this permission notice appear in all copies.
12 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
13 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
14 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
15 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
16 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
17 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
18 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
21 #include <sys/cdefs.h>
22 __FBSDID("$FreeBSD$");
24 #define PFIOC_USE_LATEST
26 #include <sys/types.h>
27 #include <sys/bitset.h>
28 #include <sys/ioctl.h>
29 #include <sys/socket.h>
32 #include <netinet/in.h>
33 #include <net/pfvar.h>
46 #include <net/altq/altq.h>
47 #include <net/altq/altq_cbq.h>
48 #include <net/altq/altq_codel.h>
49 #include <net/altq/altq_priq.h>
50 #include <net/altq/altq_hfsc.h>
51 #include <net/altq/altq_fairq.h>
53 #include "pfctl_parser.h"
56 #define is_sc_null(sc) (((sc) == NULL) || ((sc)->m1 == 0 && (sc)->m2 == 0))
58 static STAILQ_HEAD(interfaces, pfctl_altq) interfaces = STAILQ_HEAD_INITIALIZER(interfaces);
59 static struct hsearch_data queue_map;
60 static struct hsearch_data if_map;
61 static struct hsearch_data qid_map;
63 static struct pfctl_altq *pfaltq_lookup(char *ifname);
64 static struct pfctl_altq *qname_to_pfaltq(const char *, const char *);
65 static u_int32_t qname_to_qid(char *);
67 static int eval_pfqueue_cbq(struct pfctl *, struct pf_altq *,
69 static int cbq_compute_idletime(struct pfctl *, struct pf_altq *);
70 static int check_commit_cbq(int, int, struct pfctl_altq *);
71 static int print_cbq_opts(const struct pf_altq *);
73 static int print_codel_opts(const struct pf_altq *,
74 const struct node_queue_opt *);
76 static int eval_pfqueue_priq(struct pfctl *, struct pf_altq *,
78 static int check_commit_priq(int, int, struct pfctl_altq *);
79 static int print_priq_opts(const struct pf_altq *);
81 static int eval_pfqueue_hfsc(struct pfctl *, struct pf_altq *,
82 struct pfctl_altq *, struct pfctl_altq *);
83 static int check_commit_hfsc(int, int, struct pfctl_altq *);
84 static int print_hfsc_opts(const struct pf_altq *,
85 const struct node_queue_opt *);
87 static int eval_pfqueue_fairq(struct pfctl *, struct pf_altq *,
88 struct pfctl_altq *, struct pfctl_altq *);
89 static int print_fairq_opts(const struct pf_altq *,
90 const struct node_queue_opt *);
91 static int check_commit_fairq(int, int, struct pfctl_altq *);
93 static void gsc_add_sc(struct gen_sc *, struct service_curve *);
94 static int is_gsc_under_sc(struct gen_sc *,
95 struct service_curve *);
96 static struct segment *gsc_getentry(struct gen_sc *, double);
97 static int gsc_add_seg(struct gen_sc *, double, double, double,
99 static double sc_x2y(struct service_curve *, double);
102 u_int64_t getifspeed(int, char *);
104 u_int32_t getifspeed(char *);
106 u_long getifmtu(char *);
107 int eval_queue_opts(struct pf_altq *, struct node_queue_opt *,
109 u_int64_t eval_bwspec(struct node_queue_bw *, u_int64_t);
110 void print_hfsc_sc(const char *, u_int, u_int, u_int,
111 const struct node_hfsc_sc *);
112 void print_fairq_sc(const char *, u_int, u_int, u_int,
113 const struct node_fairq_sc *);
115 static __attribute__((constructor)) void
116 pfctl_altq_init(void)
119 * As hdestroy() will never be called on these tables, it will be
120 * safe to use references into the stored data as keys.
122 if (hcreate_r(0, &queue_map) == 0)
123 err(1, "Failed to create altq queue map");
124 if (hcreate_r(0, &if_map) == 0)
125 err(1, "Failed to create altq interface map");
126 if (hcreate_r(0, &qid_map) == 0)
127 err(1, "Failed to create altq queue id map");
131 pfaltq_store(struct pf_altq *a)
133 struct pfctl_altq *altq;
138 if ((altq = malloc(sizeof(*altq))) == NULL)
139 err(1, "queue malloc");
140 memcpy(&altq->pa, a, sizeof(struct pf_altq));
141 memset(&altq->meta, 0, sizeof(altq->meta));
143 if (a->qname[0] == 0) {
144 item.key = altq->pa.ifname;
146 if (hsearch_r(item, ENTER, &ret_item, &if_map) == 0)
147 err(1, "interface map insert");
148 STAILQ_INSERT_TAIL(&interfaces, altq, meta.link);
150 key_size = sizeof(a->ifname) + sizeof(a->qname);
151 if ((item.key = malloc(key_size)) == NULL)
152 err(1, "queue map key malloc");
153 snprintf(item.key, key_size, "%s:%s", a->ifname, a->qname);
155 if (hsearch_r(item, ENTER, &ret_item, &queue_map) == 0)
156 err(1, "queue map insert");
158 item.key = altq->pa.qname;
159 item.data = &altq->pa.qid;
160 if (hsearch_r(item, ENTER, &ret_item, &qid_map) == 0)
161 err(1, "qid map insert");
165 static struct pfctl_altq *
166 pfaltq_lookup(char *ifname)
172 if (hsearch_r(item, FIND, &ret_item, &if_map) == 0)
175 return (ret_item->data);
178 static struct pfctl_altq *
179 qname_to_pfaltq(const char *qname, const char *ifname)
183 char key[IFNAMSIZ + PF_QNAME_SIZE];
186 snprintf(item.key, sizeof(key), "%s:%s", ifname, qname);
187 if (hsearch_r(item, FIND, &ret_item, &queue_map) == 0)
190 return (ret_item->data);
194 qname_to_qid(char *qname)
201 * We guarantee that same named queues on different interfaces
205 if (hsearch_r(item, FIND, &ret_item, &qid_map) == 0)
208 qid = *(uint32_t *)ret_item->data;
213 print_altq(const struct pf_altq *a, unsigned int level,
214 struct node_queue_bw *bw, struct node_queue_opt *qopts)
216 if (a->qname[0] != 0) {
217 print_queue(a, level, bw, 1, qopts);
222 if (a->local_flags & PFALTQ_FLAG_IF_REMOVED)
226 printf("altq on %s ", a->ifname);
228 switch (a->scheduler) {
230 if (!print_cbq_opts(a))
234 if (!print_priq_opts(a))
238 if (!print_hfsc_opts(a, qopts))
242 if (!print_fairq_opts(a, qopts))
246 if (!print_codel_opts(a, qopts))
251 if (bw != NULL && bw->bw_percent > 0) {
252 if (bw->bw_percent < 100)
253 printf("bandwidth %u%% ", bw->bw_percent);
255 printf("bandwidth %s ", rate2str((double)a->ifbandwidth));
257 if (a->qlimit != DEFAULT_QLIMIT)
258 printf("qlimit %u ", a->qlimit);
259 printf("tbrsize %u ", a->tbrsize);
263 print_queue(const struct pf_altq *a, unsigned int level,
264 struct node_queue_bw *bw, int print_interface,
265 struct node_queue_opt *qopts)
270 if (a->local_flags & PFALTQ_FLAG_IF_REMOVED)
274 for (i = 0; i < level; ++i)
276 printf("%s ", a->qname);
278 printf("on %s ", a->ifname);
279 if (a->scheduler == ALTQT_CBQ || a->scheduler == ALTQT_HFSC ||
280 a->scheduler == ALTQT_FAIRQ) {
281 if (bw != NULL && bw->bw_percent > 0) {
282 if (bw->bw_percent < 100)
283 printf("bandwidth %u%% ", bw->bw_percent);
285 printf("bandwidth %s ", rate2str((double)a->bandwidth));
287 if (a->priority != DEFAULT_PRIORITY)
288 printf("priority %u ", a->priority);
289 if (a->qlimit != DEFAULT_QLIMIT)
290 printf("qlimit %u ", a->qlimit);
291 switch (a->scheduler) {
299 print_hfsc_opts(a, qopts);
302 print_fairq_opts(a, qopts);
308 * eval_pfaltq computes the discipline parameters.
311 eval_pfaltq(struct pfctl *pf, struct pf_altq *pa, struct node_queue_bw *bw,
312 struct node_queue_opt *opts)
315 u_int size, errors = 0;
317 if (bw->bw_absolute > 0)
318 pa->ifbandwidth = bw->bw_absolute;
321 if ((rate = getifspeed(pf->dev, pa->ifname)) == 0) {
323 if ((rate = getifspeed(pa->ifname)) == 0) {
325 fprintf(stderr, "interface %s does not know its bandwidth, "
326 "please specify an absolute bandwidth\n",
329 } else if ((pa->ifbandwidth = eval_bwspec(bw, rate)) == 0)
330 pa->ifbandwidth = rate;
333 * Limit bandwidth to UINT_MAX for schedulers that aren't 64-bit ready.
335 if ((pa->scheduler != ALTQT_HFSC) && (pa->ifbandwidth > UINT_MAX)) {
336 pa->ifbandwidth = UINT_MAX;
337 warnx("interface %s bandwidth limited to %" PRIu64 " bps "
338 "because selected scheduler is 32-bit limited\n", pa->ifname,
341 errors += eval_queue_opts(pa, opts, pa->ifbandwidth);
343 /* if tbrsize is not specified, use heuristics */
344 if (pa->tbrsize == 0) {
345 rate = pa->ifbandwidth;
346 if (rate <= 1 * 1000 * 1000)
348 else if (rate <= 10 * 1000 * 1000)
350 else if (rate <= 200 * 1000 * 1000)
352 else if (rate <= 2500 * 1000 * 1000ULL)
356 size = size * getifmtu(pa->ifname);
363 * check_commit_altq does consistency check for each interface
366 check_commit_altq(int dev, int opts)
368 struct pfctl_altq *if_ppa;
371 /* call the discipline check for each interface. */
372 STAILQ_FOREACH(if_ppa, &interfaces, meta.link) {
373 switch (if_ppa->pa.scheduler) {
375 error = check_commit_cbq(dev, opts, if_ppa);
378 error = check_commit_priq(dev, opts, if_ppa);
381 error = check_commit_hfsc(dev, opts, if_ppa);
384 error = check_commit_fairq(dev, opts, if_ppa);
394 * eval_pfqueue computes the queue parameters.
397 eval_pfqueue(struct pfctl *pf, struct pf_altq *pa, struct node_queue_bw *bw,
398 struct node_queue_opt *opts)
400 /* should be merged with expand_queue */
401 struct pfctl_altq *if_ppa, *parent;
404 /* find the corresponding interface and copy fields used by queues */
405 if ((if_ppa = pfaltq_lookup(pa->ifname)) == NULL) {
406 fprintf(stderr, "altq not defined on %s\n", pa->ifname);
409 pa->scheduler = if_ppa->pa.scheduler;
410 pa->ifbandwidth = if_ppa->pa.ifbandwidth;
412 if (qname_to_pfaltq(pa->qname, pa->ifname) != NULL) {
413 fprintf(stderr, "queue %s already exists on interface %s\n",
414 pa->qname, pa->ifname);
417 pa->qid = qname_to_qid(pa->qname);
420 if (pa->parent[0] != 0) {
421 parent = qname_to_pfaltq(pa->parent, pa->ifname);
422 if (parent == NULL) {
423 fprintf(stderr, "parent %s not found for %s\n",
424 pa->parent, pa->qname);
427 pa->parent_qid = parent->pa.qid;
430 pa->qlimit = DEFAULT_QLIMIT;
432 if (pa->scheduler == ALTQT_CBQ || pa->scheduler == ALTQT_HFSC ||
433 pa->scheduler == ALTQT_FAIRQ) {
434 pa->bandwidth = eval_bwspec(bw,
435 parent == NULL ? pa->ifbandwidth : parent->pa.bandwidth);
437 if (pa->bandwidth > pa->ifbandwidth) {
438 fprintf(stderr, "bandwidth for %s higher than "
439 "interface\n", pa->qname);
443 * If not HFSC, then check that the sum of the child
444 * bandwidths is less than the parent's bandwidth. For
445 * HFSC, the equivalent concept is to check that the sum of
446 * the child linkshare service curves are under the parent's
447 * linkshare service curve, and that check is performed by
448 * eval_pfqueue_hfsc().
450 if ((parent != NULL) && (pa->scheduler != ALTQT_HFSC)) {
451 if (pa->bandwidth > parent->pa.bandwidth) {
452 warnx("bandwidth for %s higher than parent",
456 parent->meta.bwsum += pa->bandwidth;
457 if (parent->meta.bwsum > parent->pa.bandwidth) {
458 warnx("the sum of the child bandwidth (%" PRIu64
459 ") higher than parent \"%s\" (%" PRIu64 ")",
460 parent->meta.bwsum, parent->pa.qname,
461 parent->pa.bandwidth);
466 if (eval_queue_opts(pa, opts,
467 parent == NULL ? pa->ifbandwidth : parent->pa.bandwidth))
471 parent->meta.children++;
473 switch (pa->scheduler) {
475 error = eval_pfqueue_cbq(pf, pa, if_ppa);
478 error = eval_pfqueue_priq(pf, pa, if_ppa);
481 error = eval_pfqueue_hfsc(pf, pa, if_ppa, parent);
484 error = eval_pfqueue_fairq(pf, pa, if_ppa, parent);
493 * CBQ support functions
495 #define RM_FILTER_GAIN 5 /* log2 of gain, e.g., 5 => 31/32 */
496 #define RM_NS_PER_SEC (1000000000)
499 eval_pfqueue_cbq(struct pfctl *pf, struct pf_altq *pa, struct pfctl_altq *if_ppa)
501 struct cbq_opts *opts;
504 if (pa->priority >= CBQ_MAXPRI) {
505 warnx("priority out of range: max %d", CBQ_MAXPRI - 1);
509 ifmtu = getifmtu(pa->ifname);
510 opts = &pa->pq_u.cbq_opts;
512 if (opts->pktsize == 0) { /* use default */
513 opts->pktsize = ifmtu;
514 if (opts->pktsize > MCLBYTES) /* do what TCP does */
515 opts->pktsize &= ~MCLBYTES;
516 } else if (opts->pktsize > ifmtu)
517 opts->pktsize = ifmtu;
518 if (opts->maxpktsize == 0) /* use default */
519 opts->maxpktsize = ifmtu;
520 else if (opts->maxpktsize > ifmtu)
521 opts->pktsize = ifmtu;
523 if (opts->pktsize > opts->maxpktsize)
524 opts->pktsize = opts->maxpktsize;
526 if (pa->parent[0] == 0)
527 opts->flags |= (CBQCLF_ROOTCLASS | CBQCLF_WRR);
529 if (pa->pq_u.cbq_opts.flags & CBQCLF_ROOTCLASS)
530 if_ppa->meta.root_classes++;
531 if (pa->pq_u.cbq_opts.flags & CBQCLF_DEFCLASS)
532 if_ppa->meta.default_classes++;
534 cbq_compute_idletime(pf, pa);
539 * compute ns_per_byte, maxidle, minidle, and offtime
542 cbq_compute_idletime(struct pfctl *pf, struct pf_altq *pa)
544 struct cbq_opts *opts;
545 double maxidle_s, maxidle, minidle;
546 double offtime, nsPerByte, ifnsPerByte, ptime, cptime;
547 double z, g, f, gton, gtom;
548 u_int minburst, maxburst;
550 opts = &pa->pq_u.cbq_opts;
551 ifnsPerByte = (1.0 / (double)pa->ifbandwidth) * RM_NS_PER_SEC * 8;
552 minburst = opts->minburst;
553 maxburst = opts->maxburst;
555 if (pa->bandwidth == 0)
556 f = 0.0001; /* small enough? */
558 f = ((double) pa->bandwidth / (double) pa->ifbandwidth);
560 nsPerByte = ifnsPerByte / f;
561 ptime = (double)opts->pktsize * ifnsPerByte;
562 cptime = ptime * (1.0 - f) / f;
564 if (nsPerByte * (double)opts->maxpktsize > (double)INT_MAX) {
566 * this causes integer overflow in kernel!
567 * (bandwidth < 6Kbps when max_pkt_size=1500)
569 if (pa->bandwidth != 0 && (pf->opts & PF_OPT_QUIET) == 0) {
570 warnx("queue bandwidth must be larger than %s",
571 rate2str(ifnsPerByte * (double)opts->maxpktsize /
572 (double)INT_MAX * (double)pa->ifbandwidth));
573 fprintf(stderr, "cbq: queue %s is too slow!\n",
576 nsPerByte = (double)(INT_MAX / opts->maxpktsize);
579 if (maxburst == 0) { /* use default */
580 if (cptime > 10.0 * 1000000)
585 if (minburst == 0) /* use default */
587 if (minburst > maxburst)
590 z = (double)(1 << RM_FILTER_GAIN);
592 gton = pow(g, (double)maxburst);
593 gtom = pow(g, (double)(minburst-1));
594 maxidle = ((1.0 / f - 1.0) * ((1.0 - gton) / gton));
595 maxidle_s = (1.0 - g);
596 if (maxidle > maxidle_s)
597 maxidle = ptime * maxidle;
599 maxidle = ptime * maxidle_s;
600 offtime = cptime * (1.0 + 1.0/(1.0 - g) * (1.0 - gtom) / gtom);
601 minidle = -((double)opts->maxpktsize * (double)nsPerByte);
603 /* scale parameters */
604 maxidle = ((maxidle * 8.0) / nsPerByte) *
605 pow(2.0, (double)RM_FILTER_GAIN);
606 offtime = (offtime * 8.0) / nsPerByte *
607 pow(2.0, (double)RM_FILTER_GAIN);
608 minidle = ((minidle * 8.0) / nsPerByte) *
609 pow(2.0, (double)RM_FILTER_GAIN);
611 maxidle = maxidle / 1000.0;
612 offtime = offtime / 1000.0;
613 minidle = minidle / 1000.0;
615 opts->minburst = minburst;
616 opts->maxburst = maxburst;
617 opts->ns_per_byte = (u_int)nsPerByte;
618 opts->maxidle = (u_int)fabs(maxidle);
619 opts->minidle = (int)minidle;
620 opts->offtime = (u_int)fabs(offtime);
626 check_commit_cbq(int dev, int opts, struct pfctl_altq *if_ppa)
631 * check if cbq has one root queue and one default queue
634 if (if_ppa->meta.root_classes != 1) {
635 warnx("should have one root queue on %s", if_ppa->pa.ifname);
638 if (if_ppa->meta.default_classes != 1) {
639 warnx("should have one default queue on %s", if_ppa->pa.ifname);
646 print_cbq_opts(const struct pf_altq *a)
648 const struct cbq_opts *opts;
650 opts = &a->pq_u.cbq_opts;
653 if (opts->flags & CBQCLF_RED)
655 if (opts->flags & CBQCLF_ECN)
657 if (opts->flags & CBQCLF_RIO)
659 if (opts->flags & CBQCLF_CODEL)
661 if (opts->flags & CBQCLF_CLEARDSCP)
662 printf(" cleardscp");
663 if (opts->flags & CBQCLF_FLOWVALVE)
664 printf(" flowvalve");
665 if (opts->flags & CBQCLF_BORROW)
667 if (opts->flags & CBQCLF_WRR)
669 if (opts->flags & CBQCLF_EFFICIENT)
670 printf(" efficient");
671 if (opts->flags & CBQCLF_ROOTCLASS)
673 if (opts->flags & CBQCLF_DEFCLASS)
683 * PRIQ support functions
686 eval_pfqueue_priq(struct pfctl *pf, struct pf_altq *pa, struct pfctl_altq *if_ppa)
689 if (pa->priority >= PRIQ_MAXPRI) {
690 warnx("priority out of range: max %d", PRIQ_MAXPRI - 1);
693 if (BIT_ISSET(QPRI_BITSET_SIZE, pa->priority, &if_ppa->meta.qpris)) {
694 warnx("%s does not have a unique priority on interface %s",
695 pa->qname, pa->ifname);
698 BIT_SET(QPRI_BITSET_SIZE, pa->priority, &if_ppa->meta.qpris);
700 if (pa->pq_u.priq_opts.flags & PRCF_DEFAULTCLASS)
701 if_ppa->meta.default_classes++;
706 check_commit_priq(int dev, int opts, struct pfctl_altq *if_ppa)
710 * check if priq has one default class for this interface
712 if (if_ppa->meta.default_classes != 1) {
713 warnx("should have one default queue on %s", if_ppa->pa.ifname);
720 print_priq_opts(const struct pf_altq *a)
722 const struct priq_opts *opts;
724 opts = &a->pq_u.priq_opts;
728 if (opts->flags & PRCF_RED)
730 if (opts->flags & PRCF_ECN)
732 if (opts->flags & PRCF_RIO)
734 if (opts->flags & PRCF_CODEL)
736 if (opts->flags & PRCF_CLEARDSCP)
737 printf(" cleardscp");
738 if (opts->flags & PRCF_DEFAULTCLASS)
748 * HFSC support functions
751 eval_pfqueue_hfsc(struct pfctl *pf, struct pf_altq *pa, struct pfctl_altq *if_ppa,
752 struct pfctl_altq *parent)
754 struct hfsc_opts_v1 *opts;
755 struct service_curve sc;
757 opts = &pa->pq_u.hfsc_opts;
759 if (parent == NULL) {
761 opts->lssc_m1 = pa->ifbandwidth;
762 opts->lssc_m2 = pa->ifbandwidth;
767 /* First child initializes the parent's service curve accumulators. */
768 if (parent->meta.children == 1) {
769 LIST_INIT(&parent->meta.rtsc);
770 LIST_INIT(&parent->meta.lssc);
773 if (parent->pa.pq_u.hfsc_opts.flags & HFCF_DEFAULTCLASS) {
774 warnx("adding %s would make default queue %s not a leaf",
775 pa->qname, pa->parent);
779 if (pa->pq_u.hfsc_opts.flags & HFCF_DEFAULTCLASS)
780 if_ppa->meta.default_classes++;
782 /* if link_share is not specified, use bandwidth */
783 if (opts->lssc_m2 == 0)
784 opts->lssc_m2 = pa->bandwidth;
786 if ((opts->rtsc_m1 > 0 && opts->rtsc_m2 == 0) ||
787 (opts->lssc_m1 > 0 && opts->lssc_m2 == 0) ||
788 (opts->ulsc_m1 > 0 && opts->ulsc_m2 == 0)) {
789 warnx("m2 is zero for %s", pa->qname);
793 if ((opts->rtsc_m1 < opts->rtsc_m2 && opts->rtsc_m1 != 0) ||
794 (opts->lssc_m1 < opts->lssc_m2 && opts->lssc_m1 != 0) ||
795 (opts->ulsc_m1 < opts->ulsc_m2 && opts->ulsc_m1 != 0)) {
796 warnx("m1 must be zero for convex curve: %s", pa->qname);
802 * for the real-time service curve, the sum of the service curves
803 * should not exceed 80% of the interface bandwidth. 20% is reserved
804 * not to over-commit the actual interface bandwidth.
805 * for the linkshare service curve, the sum of the child service
806 * curve should not exceed the parent service curve.
807 * for the upper-limit service curve, the assigned bandwidth should
808 * be smaller than the interface bandwidth, and the upper-limit should
809 * be larger than the real-time service curve when both are defined.
812 /* check the real-time service curve. reserve 20% of interface bw */
813 if (opts->rtsc_m2 != 0) {
814 /* add this queue to the sum */
815 sc.m1 = opts->rtsc_m1;
817 sc.m2 = opts->rtsc_m2;
818 gsc_add_sc(&parent->meta.rtsc, &sc);
819 /* compare the sum with 80% of the interface */
822 sc.m2 = pa->ifbandwidth / 100 * 80;
823 if (!is_gsc_under_sc(&parent->meta.rtsc, &sc)) {
824 warnx("real-time sc exceeds 80%% of the interface "
825 "bandwidth (%s)", rate2str((double)sc.m2));
830 /* check the linkshare service curve. */
831 if (opts->lssc_m2 != 0) {
832 /* add this queue to the child sum */
833 sc.m1 = opts->lssc_m1;
835 sc.m2 = opts->lssc_m2;
836 gsc_add_sc(&parent->meta.lssc, &sc);
837 /* compare the sum of the children with parent's sc */
838 sc.m1 = parent->pa.pq_u.hfsc_opts.lssc_m1;
839 sc.d = parent->pa.pq_u.hfsc_opts.lssc_d;
840 sc.m2 = parent->pa.pq_u.hfsc_opts.lssc_m2;
841 if (!is_gsc_under_sc(&parent->meta.lssc, &sc)) {
842 warnx("linkshare sc exceeds parent's sc");
847 /* check the upper-limit service curve. */
848 if (opts->ulsc_m2 != 0) {
849 if (opts->ulsc_m1 > pa->ifbandwidth ||
850 opts->ulsc_m2 > pa->ifbandwidth) {
851 warnx("upper-limit larger than interface bandwidth");
854 if (opts->rtsc_m2 != 0 && opts->rtsc_m2 > opts->ulsc_m2) {
855 warnx("upper-limit sc smaller than real-time sc");
864 * FAIRQ support functions
867 eval_pfqueue_fairq(struct pfctl *pf __unused, struct pf_altq *pa,
868 struct pfctl_altq *if_ppa, struct pfctl_altq *parent)
870 struct fairq_opts *opts;
871 struct service_curve sc;
873 opts = &pa->pq_u.fairq_opts;
875 if (pa->parent == NULL) {
877 opts->lssc_m1 = pa->ifbandwidth;
878 opts->lssc_m2 = pa->ifbandwidth;
883 /* First child initializes the parent's service curve accumulator. */
884 if (parent->meta.children == 1)
885 LIST_INIT(&parent->meta.lssc);
887 if (parent->pa.pq_u.fairq_opts.flags & FARF_DEFAULTCLASS) {
888 warnx("adding %s would make default queue %s not a leaf",
889 pa->qname, pa->parent);
893 if (pa->pq_u.fairq_opts.flags & FARF_DEFAULTCLASS)
894 if_ppa->meta.default_classes++;
896 /* if link_share is not specified, use bandwidth */
897 if (opts->lssc_m2 == 0)
898 opts->lssc_m2 = pa->bandwidth;
902 * for the real-time service curve, the sum of the service curves
903 * should not exceed 80% of the interface bandwidth. 20% is reserved
904 * not to over-commit the actual interface bandwidth.
905 * for the link-sharing service curve, the sum of the child service
906 * curve should not exceed the parent service curve.
907 * for the upper-limit service curve, the assigned bandwidth should
908 * be smaller than the interface bandwidth, and the upper-limit should
909 * be larger than the real-time service curve when both are defined.
912 /* check the linkshare service curve. */
913 if (opts->lssc_m2 != 0) {
914 /* add this queue to the child sum */
915 sc.m1 = opts->lssc_m1;
917 sc.m2 = opts->lssc_m2;
918 gsc_add_sc(&parent->meta.lssc, &sc);
919 /* compare the sum of the children with parent's sc */
920 sc.m1 = parent->pa.pq_u.fairq_opts.lssc_m1;
921 sc.d = parent->pa.pq_u.fairq_opts.lssc_d;
922 sc.m2 = parent->pa.pq_u.fairq_opts.lssc_m2;
923 if (!is_gsc_under_sc(&parent->meta.lssc, &sc)) {
924 warnx("link-sharing sc exceeds parent's sc");
933 check_commit_hfsc(int dev, int opts, struct pfctl_altq *if_ppa)
936 /* check if hfsc has one default queue for this interface */
937 if (if_ppa->meta.default_classes != 1) {
938 warnx("should have one default queue on %s", if_ppa->pa.ifname);
945 check_commit_fairq(int dev __unused, int opts __unused, struct pfctl_altq *if_ppa)
948 /* check if fairq has one default queue for this interface */
949 if (if_ppa->meta.default_classes != 1) {
950 warnx("should have one default queue on %s", if_ppa->pa.ifname);
957 print_hfsc_opts(const struct pf_altq *a, const struct node_queue_opt *qopts)
959 const struct hfsc_opts_v1 *opts;
960 const struct node_hfsc_sc *rtsc, *lssc, *ulsc;
962 opts = &a->pq_u.hfsc_opts;
964 rtsc = lssc = ulsc = NULL;
966 rtsc = &qopts->data.hfsc_opts.realtime;
967 lssc = &qopts->data.hfsc_opts.linkshare;
968 ulsc = &qopts->data.hfsc_opts.upperlimit;
971 if (opts->flags || opts->rtsc_m2 != 0 || opts->ulsc_m2 != 0 ||
972 (opts->lssc_m2 != 0 && (opts->lssc_m2 != a->bandwidth ||
973 opts->lssc_d != 0))) {
975 if (opts->flags & HFCF_RED)
977 if (opts->flags & HFCF_ECN)
979 if (opts->flags & HFCF_RIO)
981 if (opts->flags & HFCF_CODEL)
983 if (opts->flags & HFCF_CLEARDSCP)
984 printf(" cleardscp");
985 if (opts->flags & HFCF_DEFAULTCLASS)
987 if (opts->rtsc_m2 != 0)
988 print_hfsc_sc("realtime", opts->rtsc_m1, opts->rtsc_d,
989 opts->rtsc_m2, rtsc);
990 if (opts->lssc_m2 != 0 && (opts->lssc_m2 != a->bandwidth ||
992 print_hfsc_sc("linkshare", opts->lssc_m1, opts->lssc_d,
993 opts->lssc_m2, lssc);
994 if (opts->ulsc_m2 != 0)
995 print_hfsc_sc("upperlimit", opts->ulsc_m1, opts->ulsc_d,
996 opts->ulsc_m2, ulsc);
1005 print_codel_opts(const struct pf_altq *a, const struct node_queue_opt *qopts)
1007 const struct codel_opts *opts;
1009 opts = &a->pq_u.codel_opts;
1010 if (opts->target || opts->interval || opts->ecn) {
1013 printf(" target %d", opts->target);
1015 printf(" interval %d", opts->interval);
1027 print_fairq_opts(const struct pf_altq *a, const struct node_queue_opt *qopts)
1029 const struct fairq_opts *opts;
1030 const struct node_fairq_sc *loc_lssc;
1032 opts = &a->pq_u.fairq_opts;
1036 loc_lssc = &qopts->data.fairq_opts.linkshare;
1039 (opts->lssc_m2 != 0 && (opts->lssc_m2 != a->bandwidth ||
1040 opts->lssc_d != 0))) {
1042 if (opts->flags & FARF_RED)
1044 if (opts->flags & FARF_ECN)
1046 if (opts->flags & FARF_RIO)
1048 if (opts->flags & FARF_CODEL)
1050 if (opts->flags & FARF_CLEARDSCP)
1051 printf(" cleardscp");
1052 if (opts->flags & FARF_DEFAULTCLASS)
1054 if (opts->lssc_m2 != 0 && (opts->lssc_m2 != a->bandwidth ||
1056 print_fairq_sc("linkshare", opts->lssc_m1, opts->lssc_d,
1057 opts->lssc_m2, loc_lssc);
1066 * admission control using generalized service curve
1069 /* add a new service curve to a generalized service curve */
1071 gsc_add_sc(struct gen_sc *gsc, struct service_curve *sc)
1076 gsc_add_seg(gsc, 0.0, 0.0, (double)sc->d, (double)sc->m1);
1077 gsc_add_seg(gsc, (double)sc->d, 0.0, INFINITY, (double)sc->m2);
1081 * check whether all points of a generalized service curve have
1082 * their y-coordinates no larger than a given two-piece linear
1086 is_gsc_under_sc(struct gen_sc *gsc, struct service_curve *sc)
1088 struct segment *s, *last, *end;
1091 if (is_sc_null(sc)) {
1092 if (LIST_EMPTY(gsc))
1094 LIST_FOREACH(s, gsc, _next) {
1101 * gsc has a dummy entry at the end with x = INFINITY.
1102 * loop through up to this dummy entry.
1104 end = gsc_getentry(gsc, INFINITY);
1108 for (s = LIST_FIRST(gsc); s != end; s = LIST_NEXT(s, _next)) {
1109 if (s->y > sc_x2y(sc, s->x))
1113 /* last now holds the real last segment */
1116 if (last->m > sc->m2)
1118 if (last->x < sc->d && last->m > sc->m1) {
1119 y = last->y + (sc->d - last->x) * last->m;
1120 if (y > sc_x2y(sc, sc->d))
1127 * return a segment entry starting at x.
1128 * if gsc has no entry starting at x, a new entry is created at x.
1130 static struct segment *
1131 gsc_getentry(struct gen_sc *gsc, double x)
1133 struct segment *new, *prev, *s;
1136 LIST_FOREACH(s, gsc, _next) {
1138 return (s); /* matching entry found */
1145 /* we have to create a new entry */
1146 if ((new = calloc(1, sizeof(struct segment))) == NULL)
1150 if (x == INFINITY || s == NULL)
1152 else if (s->x == INFINITY)
1157 /* insert the new entry at the head of the list */
1160 LIST_INSERT_HEAD(gsc, new, _next);
1163 * the start point intersects with the segment pointed by
1164 * prev. divide prev into 2 segments
1166 if (x == INFINITY) {
1173 prev->d = x - prev->x;
1174 new->y = prev->d * prev->m + prev->y;
1177 LIST_INSERT_AFTER(prev, new, _next);
1182 /* add a segment to a generalized service curve */
1184 gsc_add_seg(struct gen_sc *gsc, double x, double y, double d, double m)
1186 struct segment *start, *end, *s;
1193 start = gsc_getentry(gsc, x);
1194 end = gsc_getentry(gsc, x2);
1195 if (start == NULL || end == NULL)
1198 for (s = start; s != end; s = LIST_NEXT(s, _next)) {
1200 s->y += y + (s->x - x) * m;
1203 end = gsc_getentry(gsc, INFINITY);
1204 for (; s != end; s = LIST_NEXT(s, _next)) {
1211 /* get y-projection of a service curve */
1213 sc_x2y(struct service_curve *sc, double x)
1217 if (x <= (double)sc->d)
1218 /* y belongs to the 1st segment */
1219 y = x * (double)sc->m1;
1221 /* y belongs to the 2nd segment */
1222 y = (double)sc->d * (double)sc->m1
1223 + (x - (double)sc->d) * (double)sc->m2;
1231 #define RATESTR_MAX 16
1234 rate2str(double rate)
1237 static char r2sbuf[R2S_BUFS][RATESTR_MAX]; /* ring bufer */
1240 static const char unit[] = " KMG";
1242 buf = r2sbuf[idx++];
1243 if (idx == R2S_BUFS)
1246 for (i = 0; rate >= 1000 && i <= 3; i++)
1249 if ((int)(rate * 100) % 100)
1250 snprintf(buf, RATESTR_MAX, "%.2f%cb", rate, unit[i]);
1252 snprintf(buf, RATESTR_MAX, "%d%cb", (int)rate, unit[i]);
1260 * FreeBSD does not have SIOCGIFDATA.
1261 * To emulate this, DIOCGIFSPEED ioctl added to pf.
1264 getifspeed(int pfdev, char *ifname)
1266 struct pf_ifspeed io;
1268 bzero(&io, sizeof io);
1269 if (strlcpy(io.ifname, ifname, IFNAMSIZ) >=
1271 errx(1, "getifspeed: strlcpy");
1272 if (ioctl(pfdev, DIOCGIFSPEED, &io) == -1)
1273 err(1, "DIOCGIFSPEED");
1274 return (io.baudrate);
1278 getifspeed(char *ifname)
1282 struct if_data ifrdat;
1284 s = get_query_socket();
1285 bzero(&ifr, sizeof(ifr));
1286 if (strlcpy(ifr.ifr_name, ifname, sizeof(ifr.ifr_name)) >=
1287 sizeof(ifr.ifr_name))
1288 errx(1, "getifspeed: strlcpy");
1289 ifr.ifr_data = (caddr_t)&ifrdat;
1290 if (ioctl(s, SIOCGIFDATA, (caddr_t)&ifr) == -1)
1291 err(1, "SIOCGIFDATA");
1292 return ((u_int32_t)ifrdat.ifi_baudrate);
1297 getifmtu(char *ifname)
1302 s = get_query_socket();
1303 bzero(&ifr, sizeof(ifr));
1304 if (strlcpy(ifr.ifr_name, ifname, sizeof(ifr.ifr_name)) >=
1305 sizeof(ifr.ifr_name))
1306 errx(1, "getifmtu: strlcpy");
1307 if (ioctl(s, SIOCGIFMTU, (caddr_t)&ifr) == -1)
1311 err(1, "SIOCGIFMTU");
1313 if (ifr.ifr_mtu > 0)
1314 return (ifr.ifr_mtu);
1316 warnx("could not get mtu for %s, assuming 1500", ifname);
1322 eval_queue_opts(struct pf_altq *pa, struct node_queue_opt *opts,
1327 switch (pa->scheduler) {
1329 pa->pq_u.cbq_opts = opts->data.cbq_opts;
1332 pa->pq_u.priq_opts = opts->data.priq_opts;
1335 pa->pq_u.hfsc_opts.flags = opts->data.hfsc_opts.flags;
1336 if (opts->data.hfsc_opts.linkshare.used) {
1337 pa->pq_u.hfsc_opts.lssc_m1 =
1338 eval_bwspec(&opts->data.hfsc_opts.linkshare.m1,
1340 pa->pq_u.hfsc_opts.lssc_m2 =
1341 eval_bwspec(&opts->data.hfsc_opts.linkshare.m2,
1343 pa->pq_u.hfsc_opts.lssc_d =
1344 opts->data.hfsc_opts.linkshare.d;
1346 if (opts->data.hfsc_opts.realtime.used) {
1347 pa->pq_u.hfsc_opts.rtsc_m1 =
1348 eval_bwspec(&opts->data.hfsc_opts.realtime.m1,
1350 pa->pq_u.hfsc_opts.rtsc_m2 =
1351 eval_bwspec(&opts->data.hfsc_opts.realtime.m2,
1353 pa->pq_u.hfsc_opts.rtsc_d =
1354 opts->data.hfsc_opts.realtime.d;
1356 if (opts->data.hfsc_opts.upperlimit.used) {
1357 pa->pq_u.hfsc_opts.ulsc_m1 =
1358 eval_bwspec(&opts->data.hfsc_opts.upperlimit.m1,
1360 pa->pq_u.hfsc_opts.ulsc_m2 =
1361 eval_bwspec(&opts->data.hfsc_opts.upperlimit.m2,
1363 pa->pq_u.hfsc_opts.ulsc_d =
1364 opts->data.hfsc_opts.upperlimit.d;
1368 pa->pq_u.fairq_opts.flags = opts->data.fairq_opts.flags;
1369 pa->pq_u.fairq_opts.nbuckets = opts->data.fairq_opts.nbuckets;
1370 pa->pq_u.fairq_opts.hogs_m1 =
1371 eval_bwspec(&opts->data.fairq_opts.hogs_bw, ref_bw);
1373 if (opts->data.fairq_opts.linkshare.used) {
1374 pa->pq_u.fairq_opts.lssc_m1 =
1375 eval_bwspec(&opts->data.fairq_opts.linkshare.m1,
1377 pa->pq_u.fairq_opts.lssc_m2 =
1378 eval_bwspec(&opts->data.fairq_opts.linkshare.m2,
1380 pa->pq_u.fairq_opts.lssc_d =
1381 opts->data.fairq_opts.linkshare.d;
1385 pa->pq_u.codel_opts.target = opts->data.codel_opts.target;
1386 pa->pq_u.codel_opts.interval = opts->data.codel_opts.interval;
1387 pa->pq_u.codel_opts.ecn = opts->data.codel_opts.ecn;
1390 warnx("eval_queue_opts: unknown scheduler type %u",
1400 * If absolute bandwidth if set, return the lesser of that value and the
1401 * reference bandwidth. Limiting to the reference bandwidth allows simple
1402 * limiting of configured bandwidth parameters for schedulers that are
1403 * 32-bit limited, as the root/interface bandwidth (top-level reference
1404 * bandwidth) will be properly limited in that case.
1406 * Otherwise, if the absolute bandwidth is not set, return given percentage
1407 * of reference bandwidth.
1410 eval_bwspec(struct node_queue_bw *bw, u_int64_t ref_bw)
1412 if (bw->bw_absolute > 0)
1413 return (MIN(bw->bw_absolute, ref_bw));
1415 if (bw->bw_percent > 0)
1416 return (ref_bw / 100 * bw->bw_percent);
1422 print_hfsc_sc(const char *scname, u_int m1, u_int d, u_int m2,
1423 const struct node_hfsc_sc *sc)
1425 printf(" %s", scname);
1429 if (sc != NULL && sc->m1.bw_percent > 0)
1430 printf("%u%%", sc->m1.bw_percent);
1432 printf("%s", rate2str((double)m1));
1436 if (sc != NULL && sc->m2.bw_percent > 0)
1437 printf(" %u%%", sc->m2.bw_percent);
1439 printf(" %s", rate2str((double)m2));
1446 print_fairq_sc(const char *scname, u_int m1, u_int d, u_int m2,
1447 const struct node_fairq_sc *sc)
1449 printf(" %s", scname);
1453 if (sc != NULL && sc->m1.bw_percent > 0)
1454 printf("%u%%", sc->m1.bw_percent);
1456 printf("%s", rate2str((double)m1));
1460 if (sc != NULL && sc->m2.bw_percent > 0)
1461 printf(" %u%%", sc->m2.bw_percent);
1463 printf(" %s", rate2str((double)m2));