2 * FQ_Codel - The FlowQueue-Codel scheduler/AQM
6 * Copyright (C) 2016 Centre for Advanced Internet Architectures,
7 * Swinburne University of Technology, Melbourne, Australia.
8 * Portions of this code were made possible in part by a gift from
9 * The Comcast Innovation Fund.
10 * Implemented by Rasool Al-Saadi <ralsaadi@swin.edu.au>
12 * Redistribution and use in source and binary forms, with or without
13 * modification, are permitted provided that the following conditions
15 * 1. Redistributions of source code must retain the above copyright
16 * notice, this list of conditions and the following disclaimer.
17 * 2. Redistributions in binary form must reproduce the above copyright
18 * notice, this list of conditions and the following disclaimer in the
19 * documentation and/or other materials provided with the distribution.
21 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
22 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
25 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
26 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
27 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
28 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
30 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
35 #include <sys/malloc.h>
36 #include <sys/socket.h>
37 //#include <sys/socketvar.h>
38 #include <sys/kernel.h>
40 #include <sys/module.h>
41 #include <net/if.h> /* IFNAMSIZ */
42 #include <netinet/in.h>
43 #include <netinet/ip_var.h> /* ipfw_rule_ref */
44 #include <netinet/ip_fw.h> /* flow_id */
45 #include <netinet/ip_dummynet.h>
48 #include <sys/rwlock.h>
50 #include <netpfil/ipfw/ip_fw_private.h>
51 #include <sys/sysctl.h>
52 #include <netinet/ip.h>
53 #include <netinet/ip6.h>
54 #include <netinet/ip_icmp.h>
55 #include <netinet/tcp.h>
56 #include <netinet/udp.h>
57 #include <sys/queue.h>
60 #include <netpfil/ipfw/dn_heap.h>
61 #include <netpfil/ipfw/ip_dn_private.h>
63 #include <netpfil/ipfw/dn_aqm.h>
64 #include <netpfil/ipfw/dn_aqm_codel.h>
65 #include <netpfil/ipfw/dn_sched.h>
66 #include <netpfil/ipfw/dn_sched_fq_codel.h>
67 #include <netpfil/ipfw/dn_sched_fq_codel_helper.h>
73 /* NOTE: In fq_codel module, we reimplements CoDel AQM functions
74 * because fq_codel use different flows (sub-queues) structure and
75 * dn_queue includes many variables not needed by a flow (sub-queue
76 * )i.e. avoid extra overhead (88 bytes vs 208 bytes).
77 * Also, CoDel functions manages stats of sub-queues as well as the main queue.
80 #define DN_SCHED_FQ_CODEL 6
82 static struct dn_alg fq_codel_desc;
84 /* fq_codel default parameters including codel */
85 struct dn_sch_fq_codel_parms
86 fq_codel_sysctl = {{5000 * AQM_TIME_1US, 100000 * AQM_TIME_1US,
87 CODEL_ECN_ENABLED}, 1024, 10240, 1514};
90 fqcodel_sysctl_interval_handler(SYSCTL_HANDLER_ARGS)
95 value = fq_codel_sysctl.ccfg.interval;
96 value /= AQM_TIME_1US;
97 error = sysctl_handle_long(oidp, &value, 0, req);
98 if (error != 0 || req->newptr == NULL)
100 if (value < 1 || value > 100 * AQM_TIME_1S)
102 fq_codel_sysctl.ccfg.interval = value * AQM_TIME_1US ;
108 fqcodel_sysctl_target_handler(SYSCTL_HANDLER_ARGS)
113 value = fq_codel_sysctl.ccfg.target;
114 value /= AQM_TIME_1US;
115 error = sysctl_handle_long(oidp, &value, 0, req);
116 if (error != 0 || req->newptr == NULL)
118 if (value < 1 || value > 5 * AQM_TIME_1S)
120 fq_codel_sysctl.ccfg.target = value * AQM_TIME_1US ;
128 SYSCTL_DECL(_net_inet);
129 SYSCTL_DECL(_net_inet_ip);
130 SYSCTL_DECL(_net_inet_ip_dummynet);
131 static SYSCTL_NODE(_net_inet_ip_dummynet, OID_AUTO, fqcodel,
132 CTLFLAG_RW, 0, "FQ_CODEL");
136 SYSCTL_PROC(_net_inet_ip_dummynet_fqcodel, OID_AUTO, target,
137 CTLTYPE_LONG | CTLFLAG_RW, NULL, 0, fqcodel_sysctl_target_handler, "L",
138 "FQ_CoDel target in microsecond");
139 SYSCTL_PROC(_net_inet_ip_dummynet_fqcodel, OID_AUTO, interval,
140 CTLTYPE_LONG | CTLFLAG_RW, NULL, 0, fqcodel_sysctl_interval_handler, "L",
141 "FQ_CoDel interval in microsecond");
143 SYSCTL_UINT(_net_inet_ip_dummynet_fqcodel, OID_AUTO, quantum,
144 CTLFLAG_RW, &fq_codel_sysctl.quantum, 1514, "FQ_CoDel quantum");
145 SYSCTL_UINT(_net_inet_ip_dummynet_fqcodel, OID_AUTO, flows,
146 CTLFLAG_RW, &fq_codel_sysctl.flows_cnt, 1024,
147 "Number of queues for FQ_CoDel");
148 SYSCTL_UINT(_net_inet_ip_dummynet_fqcodel, OID_AUTO, limit,
149 CTLFLAG_RW, &fq_codel_sysctl.limit, 10240, "FQ_CoDel queues size limit");
152 /* Drop a packet form the head of codel queue */
154 codel_drop_head(struct fq_codel_flow *q, struct fq_codel_si *si)
156 struct mbuf *m = q->mq.head;
160 q->mq.head = m->m_nextpkt;
162 fq_update_stats(q, si, -m->m_pkthdr.len, 1);
164 if (si->main_q.ni.length == 0) /* queue is now idle */
165 si->main_q.q_time = dn_cfg.curr_time;
170 /* Enqueue a packet 'm' to a queue 'q' and add timestamp to that packet.
171 * Return 1 when unable to add timestamp, otherwise return 0
174 codel_enqueue(struct fq_codel_flow *q, struct mbuf *m, struct fq_codel_si *si)
178 len = m->m_pkthdr.len;
179 /* finding maximum packet size */
180 if (len > q->cst.maxpkt_size)
181 q->cst.maxpkt_size = len;
183 /* Add timestamp to mbuf as MTAG */
185 mtag = m_tag_locate(m, MTAG_ABI_COMPAT, DN_AQM_MTAG_TS, NULL);
187 mtag = m_tag_alloc(MTAG_ABI_COMPAT, DN_AQM_MTAG_TS, sizeof(aqm_time_t),
193 *(aqm_time_t *)(mtag + 1) = AQM_UNOW;
194 m_tag_prepend(m, mtag);
196 mq_append(&q->mq, m);
197 fq_update_stats(q, si, len, 0);
201 fq_update_stats(q, si, len, 1);
207 * Classify a packet to queue number using Jenkins hash function.
208 * Return: queue number
209 * the input of the hash are protocol no, perturbation, src IP, dst IP,
210 * src port, dst port,
213 fq_codel_classify_flow(struct mbuf *m, uint16_t fcount, struct fq_codel_si *si)
224 isip6 = (mtod(m, struct ip *)->ip_v == 6) ? 1 : 0;
227 ip6 = mtod(m, struct ip6_hdr *);
228 *((uint8_t *) &tuple[0]) = ip6->ip6_nxt;
229 *((uint32_t *) &tuple[1]) = si->perturbation;
230 memcpy(&tuple[5], ip6->ip6_src.s6_addr, 16);
231 memcpy(&tuple[21], ip6->ip6_dst.s6_addr, 16);
233 switch (ip6->ip6_nxt) {
235 th = (struct tcphdr *)(ip6 + 1);
236 *((uint16_t *) &tuple[37]) = th->th_dport;
237 *((uint16_t *) &tuple[39]) = th->th_sport;
241 uh = (struct udphdr *)(ip6 + 1);
242 *((uint16_t *) &tuple[37]) = uh->uh_dport;
243 *((uint16_t *) &tuple[39]) = uh->uh_sport;
246 memset(&tuple[37], 0, 4);
250 hash = jenkins_hash(tuple, 41, HASHINIT) % fcount;
256 ip = mtod(m, struct ip *);
257 *((uint8_t *) &tuple[0]) = ip->ip_p;
258 *((uint32_t *) &tuple[1]) = si->perturbation;
259 *((uint32_t *) &tuple[5]) = ip->ip_src.s_addr;
260 *((uint32_t *) &tuple[9]) = ip->ip_dst.s_addr;
264 th = (struct tcphdr *)(ip + 1);
265 *((uint16_t *) &tuple[13]) = th->th_dport;
266 *((uint16_t *) &tuple[15]) = th->th_sport;
270 uh = (struct udphdr *)(ip + 1);
271 *((uint16_t *) &tuple[13]) = uh->uh_dport;
272 *((uint16_t *) &tuple[15]) = uh->uh_sport;
275 memset(&tuple[13], 0, 4);
278 hash = jenkins_hash(tuple, 17, HASHINIT) % fcount;
284 * Enqueue a packet into an appropriate queue according to
285 * FQ_CODEL algorithm.
288 fq_codel_enqueue(struct dn_sch_inst *_si, struct dn_queue *_q,
291 struct fq_codel_si *si;
292 struct fq_codel_schk *schk;
293 struct dn_sch_fq_codel_parms *param;
294 struct dn_queue *mainq;
295 int idx, drop, i, maxidx;
297 mainq = (struct dn_queue *)(_si + 1);
298 si = (struct fq_codel_si *)_si;
299 schk = (struct fq_codel_schk *)(si->_si.sched+1);
302 /* classify a packet to queue number*/
303 idx = fq_codel_classify_flow(m, param->flows_cnt, si);
304 /* enqueue packet into appropriate queue using CoDel AQM.
305 * Note: 'codel_enqueue' function returns 1 only when it unable to
306 * add timestamp to packet (no limit check)*/
307 drop = codel_enqueue(&si->flows[idx], m, si);
309 /* codel unable to timestamp a packet */
313 /* If the flow (sub-queue) is not active ,then add it to the tail of
314 * new flows list, initialize and activate it.
316 if (!si->flows[idx].active ) {
317 STAILQ_INSERT_TAIL(&si->newflows, &si->flows[idx], flowchain);
318 si->flows[idx].deficit = param->quantum;
319 si->flows[idx].cst.dropping = false;
320 si->flows[idx].cst.first_above_time = 0;
321 si->flows[idx].active = 1;
322 //D("activate %d",idx);
325 /* check the limit for all queues and remove a packet from the
328 if (mainq->ni.length > schk->cfg.limit) { D("over limit");
329 /* find first active flow */
330 for (maxidx = 0; maxidx < schk->cfg.flows_cnt; maxidx++)
331 if (si->flows[maxidx].active)
333 if (maxidx < schk->cfg.flows_cnt) {
334 /* find the largest sub- queue */
335 for (i = maxidx + 1; i < schk->cfg.flows_cnt; i++)
336 if (si->flows[i].active && si->flows[i].stats.length >
337 si->flows[maxidx].stats.length)
339 codel_drop_head(&si->flows[maxidx], si);
340 D("maxidx = %d",maxidx);
349 * Dequeue a packet from an appropriate queue according to
350 * FQ_CODEL algorithm.
353 fq_codel_dequeue(struct dn_sch_inst *_si)
355 struct fq_codel_si *si;
356 struct fq_codel_schk *schk;
357 struct dn_sch_fq_codel_parms *param;
358 struct fq_codel_flow *f;
360 struct fq_codel_list *fq_codel_flowlist;
362 si = (struct fq_codel_si *)_si;
363 schk = (struct fq_codel_schk *)(si->_si.sched+1);
367 /* select a list to start with */
368 if (STAILQ_EMPTY(&si->newflows))
369 fq_codel_flowlist = &si->oldflows;
371 fq_codel_flowlist = &si->newflows;
373 /* Both new and old queue lists are empty, return NULL */
374 if (STAILQ_EMPTY(fq_codel_flowlist))
377 f = STAILQ_FIRST(fq_codel_flowlist);
379 /* if there is no flow(sub-queue) deficit, increase deficit
380 * by quantum, move the flow to the tail of old flows list
381 * and try another flow.
382 * Otherwise, the flow will be used for dequeue.
384 if (f->deficit < 0) {
385 f->deficit += param->quantum;
386 STAILQ_REMOVE_HEAD(fq_codel_flowlist, flowchain);
387 STAILQ_INSERT_TAIL(&si->oldflows, f, flowchain);
391 f = STAILQ_FIRST(fq_codel_flowlist);
394 /* the new flows list is empty, try old flows list */
395 if (STAILQ_EMPTY(fq_codel_flowlist))
398 /* Dequeue a packet from the selected flow */
399 mbuf = fqc_codel_dequeue(f, si);
401 /* Codel did not return a packet */
403 /* If the selected flow belongs to new flows list, then move
404 * it to the tail of old flows list. Otherwise, deactivate it and
405 * remove it from the old list and
407 if (fq_codel_flowlist == &si->newflows) {
408 STAILQ_REMOVE_HEAD(fq_codel_flowlist, flowchain);
409 STAILQ_INSERT_TAIL(&si->oldflows, f, flowchain);
412 STAILQ_REMOVE_HEAD(fq_codel_flowlist, flowchain);
418 /* we have a packet to return,
419 * update flow deficit and return the packet*/
420 f->deficit -= mbuf->m_pkthdr.len;
425 /* unreachable point */
430 * Initialize fq_codel scheduler instance.
431 * also, allocate memory for flows array.
434 fq_codel_new_sched(struct dn_sch_inst *_si)
436 struct fq_codel_si *si;
438 struct fq_codel_schk *schk;
441 si = (struct fq_codel_si *)_si;
442 schk = (struct fq_codel_schk *)(_si->sched+1);
445 D("si already configured!");
449 /* init the main queue */
451 set_oid(&q->ni.oid, DN_QUEUE, sizeof(*q));
453 q->fs = _si->sched->fs;
455 /* allocate memory for flows array */
456 si->flows = malloc(schk->cfg.flows_cnt * sizeof(struct fq_codel_flow),
457 M_DUMMYNET, M_NOWAIT | M_ZERO);
458 if (si->flows == NULL) {
459 D("cannot allocate memory for fq_codel configuration parameters");
463 /* init perturbation for this si */
464 si->perturbation = random();
466 /* init the old and new flows lists */
467 STAILQ_INIT(&si->newflows);
468 STAILQ_INIT(&si->oldflows);
470 /* init the flows (sub-queues) */
471 for (i = 0; i < schk->cfg.flows_cnt; i++) {
473 si->flows[i].cst.maxpkt_size = 500;
476 fq_codel_desc.ref_count++;
481 * Free fq_codel scheduler instance.
484 fq_codel_free_sched(struct dn_sch_inst *_si)
486 struct fq_codel_si *si = (struct fq_codel_si *)_si ;
488 /* free the flows array */
489 free(si->flows , M_DUMMYNET);
491 fq_codel_desc.ref_count--;
497 * Configure fq_codel scheduler.
498 * the configurations for the scheduler is passed from userland.
501 fq_codel_config(struct dn_schk *_schk)
503 struct fq_codel_schk *schk;
504 struct dn_extra_parms *ep;
505 struct dn_sch_fq_codel_parms *fqc_cfg;
507 schk = (struct fq_codel_schk *)(_schk+1);
508 ep = (struct dn_extra_parms *) _schk->cfg;
510 /* par array contains fq_codel configuration as follow
511 * Codel: 0- target,1- interval, 2- flags
512 * FQ_CODEL: 3- quantum, 4- limit, 5- flows
514 if (ep && ep->oid.len ==sizeof(*ep) &&
515 ep->oid.subtype == DN_SCH_PARAMS) {
517 fqc_cfg = &schk->cfg;
519 fqc_cfg->ccfg.target = fq_codel_sysctl.ccfg.target;
521 fqc_cfg->ccfg.target = ep->par[0] * AQM_TIME_1US;
524 fqc_cfg->ccfg.interval = fq_codel_sysctl.ccfg.interval;
526 fqc_cfg->ccfg.interval = ep->par[1] * AQM_TIME_1US;
529 fqc_cfg->ccfg.flags = 0;
531 fqc_cfg->ccfg.flags = ep->par[2];
533 /* FQ configurations */
535 fqc_cfg->quantum = fq_codel_sysctl.quantum;
537 fqc_cfg->quantum = ep->par[3];
540 fqc_cfg->limit = fq_codel_sysctl.limit;
542 fqc_cfg->limit = ep->par[4];
545 fqc_cfg->flows_cnt = fq_codel_sysctl.flows_cnt;
547 fqc_cfg->flows_cnt = ep->par[5];
549 /* Bound the configurations */
550 fqc_cfg->ccfg.target = BOUND_VAR(fqc_cfg->ccfg.target, 1 ,
552 fqc_cfg->ccfg.interval = BOUND_VAR(fqc_cfg->ccfg.interval, 1,
555 fqc_cfg->quantum = BOUND_VAR(fqc_cfg->quantum,1, 9000);
556 fqc_cfg->limit= BOUND_VAR(fqc_cfg->limit,1,20480);
557 fqc_cfg->flows_cnt= BOUND_VAR(fqc_cfg->flows_cnt,1,65536);
566 * Return fq_codel scheduler configurations
567 * the configurations for the scheduler is passed to userland.
570 fq_codel_getconfig (struct dn_schk *_schk, struct dn_extra_parms *ep) {
572 struct fq_codel_schk *schk = (struct fq_codel_schk *)(_schk+1);
573 struct dn_sch_fq_codel_parms *fqc_cfg;
575 fqc_cfg = &schk->cfg;
577 strcpy(ep->name, fq_codel_desc.name);
578 ep->par[0] = fqc_cfg->ccfg.target / AQM_TIME_1US;
579 ep->par[1] = fqc_cfg->ccfg.interval / AQM_TIME_1US;
580 ep->par[2] = fqc_cfg->ccfg.flags;
582 ep->par[3] = fqc_cfg->quantum;
583 ep->par[4] = fqc_cfg->limit;
584 ep->par[5] = fqc_cfg->flows_cnt;
590 * fq_codel scheduler descriptor
591 * contains the type of the scheduler, the name, the size of extra
592 * data structures, and function pointers.
594 static struct dn_alg fq_codel_desc = {
595 _SI( .type = ) DN_SCHED_FQ_CODEL,
596 _SI( .name = ) "FQ_CODEL",
599 _SI( .schk_datalen = ) sizeof(struct fq_codel_schk),
600 _SI( .si_datalen = ) sizeof(struct fq_codel_si) - sizeof(struct dn_sch_inst),
601 _SI( .q_datalen = ) 0,
603 _SI( .enqueue = ) fq_codel_enqueue,
604 _SI( .dequeue = ) fq_codel_dequeue,
605 _SI( .config = ) fq_codel_config, /* new sched i.e. sched X config ...*/
606 _SI( .destroy = ) NULL, /*sched x delete */
607 _SI( .new_sched = ) fq_codel_new_sched, /* new schd instance */
608 _SI( .free_sched = ) fq_codel_free_sched, /* delete schd instance */
609 _SI( .new_fsk = ) NULL,
610 _SI( .free_fsk = ) NULL,
611 _SI( .new_queue = ) NULL,
612 _SI( .free_queue = ) NULL,
613 _SI( .getconfig = ) fq_codel_getconfig,
614 _SI( .ref_count = ) 0
617 DECLARE_DNSCHED_MODULE(dn_fq_codel, &fq_codel_desc);