2 * FQ_Codel - The FlowQueue-Codel scheduler/AQM
6 * Copyright (C) 2016 Centre for Advanced Internet Architectures,
7 * Swinburne University of Technology, Melbourne, Australia.
8 * Portions of this code were made possible in part by a gift from
9 * The Comcast Innovation Fund.
10 * Implemented by Rasool Al-Saadi <ralsaadi@swin.edu.au>
12 * Redistribution and use in source and binary forms, with or without
13 * modification, are permitted provided that the following conditions
15 * 1. Redistributions of source code must retain the above copyright
16 * notice, this list of conditions and the following disclaimer.
17 * 2. Redistributions in binary form must reproduce the above copyright
18 * notice, this list of conditions and the following disclaimer in the
19 * documentation and/or other materials provided with the distribution.
21 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
22 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
25 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
26 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
27 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
28 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
30 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
35 #include <sys/malloc.h>
36 #include <sys/socket.h>
37 //#include <sys/socketvar.h>
38 #include <sys/kernel.h>
40 #include <sys/module.h>
41 #include <net/if.h> /* IFNAMSIZ */
42 #include <netinet/in.h>
43 #include <netinet/ip_var.h> /* ipfw_rule_ref */
44 #include <netinet/ip_fw.h> /* flow_id */
45 #include <netinet/ip_dummynet.h>
49 #include <sys/rwlock.h>
51 #include <netpfil/ipfw/ip_fw_private.h>
52 #include <sys/sysctl.h>
53 #include <netinet/ip.h>
54 #include <netinet/ip6.h>
55 #include <netinet/ip_icmp.h>
56 #include <netinet/tcp.h>
57 #include <netinet/udp.h>
58 #include <sys/queue.h>
61 #include <netpfil/ipfw/dn_heap.h>
62 #include <netpfil/ipfw/ip_dn_private.h>
64 #include <netpfil/ipfw/dn_aqm.h>
65 #include <netpfil/ipfw/dn_aqm_codel.h>
66 #include <netpfil/ipfw/dn_sched.h>
67 #include <netpfil/ipfw/dn_sched_fq_codel.h>
68 #include <netpfil/ipfw/dn_sched_fq_codel_helper.h>
74 /* NOTE: In fq_codel module, we reimplements CoDel AQM functions
75 * because fq_codel use different flows (sub-queues) structure and
76 * dn_queue includes many variables not needed by a flow (sub-queue
77 * )i.e. avoid extra overhead (88 bytes vs 208 bytes).
78 * Also, CoDel functions manages stats of sub-queues as well as the main queue.
81 #define DN_SCHED_FQ_CODEL 6
83 static struct dn_alg fq_codel_desc;
85 /* fq_codel default parameters including codel */
86 struct dn_sch_fq_codel_parms
87 fq_codel_sysctl = {{5000 * AQM_TIME_1US, 100000 * AQM_TIME_1US,
88 CODEL_ECN_ENABLED}, 1024, 10240, 1514};
91 fqcodel_sysctl_interval_handler(SYSCTL_HANDLER_ARGS)
96 value = fq_codel_sysctl.ccfg.interval;
97 value /= AQM_TIME_1US;
98 error = sysctl_handle_long(oidp, &value, 0, req);
99 if (error != 0 || req->newptr == NULL)
101 if (value < 1 || value > 100 * AQM_TIME_1S)
103 fq_codel_sysctl.ccfg.interval = value * AQM_TIME_1US ;
109 fqcodel_sysctl_target_handler(SYSCTL_HANDLER_ARGS)
114 value = fq_codel_sysctl.ccfg.target;
115 value /= AQM_TIME_1US;
116 error = sysctl_handle_long(oidp, &value, 0, req);
117 if (error != 0 || req->newptr == NULL)
119 if (value < 1 || value > 5 * AQM_TIME_1S)
121 fq_codel_sysctl.ccfg.target = value * AQM_TIME_1US ;
129 SYSCTL_DECL(_net_inet);
130 SYSCTL_DECL(_net_inet_ip);
131 SYSCTL_DECL(_net_inet_ip_dummynet);
132 static SYSCTL_NODE(_net_inet_ip_dummynet, OID_AUTO, fqcodel,
133 CTLFLAG_RW, 0, "FQ_CODEL");
137 SYSCTL_PROC(_net_inet_ip_dummynet_fqcodel, OID_AUTO, target,
138 CTLTYPE_LONG | CTLFLAG_RW, NULL, 0, fqcodel_sysctl_target_handler, "L",
139 "FQ_CoDel target in microsecond");
140 SYSCTL_PROC(_net_inet_ip_dummynet_fqcodel, OID_AUTO, interval,
141 CTLTYPE_LONG | CTLFLAG_RW, NULL, 0, fqcodel_sysctl_interval_handler, "L",
142 "FQ_CoDel interval in microsecond");
144 SYSCTL_UINT(_net_inet_ip_dummynet_fqcodel, OID_AUTO, quantum,
145 CTLFLAG_RW, &fq_codel_sysctl.quantum, 1514, "FQ_CoDel quantum");
146 SYSCTL_UINT(_net_inet_ip_dummynet_fqcodel, OID_AUTO, flows,
147 CTLFLAG_RW, &fq_codel_sysctl.flows_cnt, 1024,
148 "Number of queues for FQ_CoDel");
149 SYSCTL_UINT(_net_inet_ip_dummynet_fqcodel, OID_AUTO, limit,
150 CTLFLAG_RW, &fq_codel_sysctl.limit, 10240, "FQ_CoDel queues size limit");
153 /* Drop a packet form the head of codel queue */
155 codel_drop_head(struct fq_codel_flow *q, struct fq_codel_si *si)
157 struct mbuf *m = q->mq.head;
161 q->mq.head = m->m_nextpkt;
163 fq_update_stats(q, si, -m->m_pkthdr.len, 1);
165 if (si->main_q.ni.length == 0) /* queue is now idle */
166 si->main_q.q_time = dn_cfg.curr_time;
171 /* Enqueue a packet 'm' to a queue 'q' and add timestamp to that packet.
172 * Return 1 when unable to add timestamp, otherwise return 0
175 codel_enqueue(struct fq_codel_flow *q, struct mbuf *m, struct fq_codel_si *si)
179 len = m->m_pkthdr.len;
180 /* finding maximum packet size */
181 if (len > q->cst.maxpkt_size)
182 q->cst.maxpkt_size = len;
184 /* Add timestamp to mbuf as MTAG */
186 mtag = m_tag_locate(m, MTAG_ABI_COMPAT, DN_AQM_MTAG_TS, NULL);
188 mtag = m_tag_alloc(MTAG_ABI_COMPAT, DN_AQM_MTAG_TS, sizeof(aqm_time_t),
194 *(aqm_time_t *)(mtag + 1) = AQM_UNOW;
195 m_tag_prepend(m, mtag);
197 mq_append(&q->mq, m);
198 fq_update_stats(q, si, len, 0);
202 fq_update_stats(q, si, len, 1);
208 * Classify a packet to queue number using Jenkins hash function.
209 * Return: queue number
210 * the input of the hash are protocol no, perturbation, src IP, dst IP,
211 * src port, dst port,
214 fq_codel_classify_flow(struct mbuf *m, uint16_t fcount, struct fq_codel_si *si)
222 ip = (struct ip *)mtodo(m, dn_tag_get(m)->iphdr_off);
226 isip6 = (ip->ip_v == 6);
229 ip6 = (struct ip6_hdr *)ip;
230 *((uint8_t *) &tuple[0]) = ip6->ip6_nxt;
231 *((uint32_t *) &tuple[1]) = si->perturbation;
232 memcpy(&tuple[5], ip6->ip6_src.s6_addr, 16);
233 memcpy(&tuple[21], ip6->ip6_dst.s6_addr, 16);
235 switch (ip6->ip6_nxt) {
237 th = (struct tcphdr *)(ip6 + 1);
238 *((uint16_t *) &tuple[37]) = th->th_dport;
239 *((uint16_t *) &tuple[39]) = th->th_sport;
243 uh = (struct udphdr *)(ip6 + 1);
244 *((uint16_t *) &tuple[37]) = uh->uh_dport;
245 *((uint16_t *) &tuple[39]) = uh->uh_sport;
248 memset(&tuple[37], 0, 4);
252 hash = jenkins_hash(tuple, 41, HASHINIT) % fcount;
258 *((uint8_t *) &tuple[0]) = ip->ip_p;
259 *((uint32_t *) &tuple[1]) = si->perturbation;
260 *((uint32_t *) &tuple[5]) = ip->ip_src.s_addr;
261 *((uint32_t *) &tuple[9]) = ip->ip_dst.s_addr;
265 th = (struct tcphdr *)(ip + 1);
266 *((uint16_t *) &tuple[13]) = th->th_dport;
267 *((uint16_t *) &tuple[15]) = th->th_sport;
271 uh = (struct udphdr *)(ip + 1);
272 *((uint16_t *) &tuple[13]) = uh->uh_dport;
273 *((uint16_t *) &tuple[15]) = uh->uh_sport;
276 memset(&tuple[13], 0, 4);
279 hash = jenkins_hash(tuple, 17, HASHINIT) % fcount;
285 * Enqueue a packet into an appropriate queue according to
286 * FQ_CODEL algorithm.
289 fq_codel_enqueue(struct dn_sch_inst *_si, struct dn_queue *_q,
292 struct fq_codel_si *si;
293 struct fq_codel_schk *schk;
294 struct dn_sch_fq_codel_parms *param;
295 struct dn_queue *mainq;
296 int idx, drop, i, maxidx;
298 mainq = (struct dn_queue *)(_si + 1);
299 si = (struct fq_codel_si *)_si;
300 schk = (struct fq_codel_schk *)(si->_si.sched+1);
303 /* classify a packet to queue number*/
304 idx = fq_codel_classify_flow(m, param->flows_cnt, si);
305 /* enqueue packet into appropriate queue using CoDel AQM.
306 * Note: 'codel_enqueue' function returns 1 only when it unable to
307 * add timestamp to packet (no limit check)*/
308 drop = codel_enqueue(&si->flows[idx], m, si);
310 /* codel unable to timestamp a packet */
314 /* If the flow (sub-queue) is not active ,then add it to the tail of
315 * new flows list, initialize and activate it.
317 if (!si->flows[idx].active ) {
318 STAILQ_INSERT_TAIL(&si->newflows, &si->flows[idx], flowchain);
319 si->flows[idx].deficit = param->quantum;
320 si->flows[idx].cst.dropping = false;
321 si->flows[idx].cst.first_above_time = 0;
322 si->flows[idx].active = 1;
323 //D("activate %d",idx);
326 /* check the limit for all queues and remove a packet from the
329 if (mainq->ni.length > schk->cfg.limit) { D("over limit");
330 /* find first active flow */
331 for (maxidx = 0; maxidx < schk->cfg.flows_cnt; maxidx++)
332 if (si->flows[maxidx].active)
334 if (maxidx < schk->cfg.flows_cnt) {
335 /* find the largest sub- queue */
336 for (i = maxidx + 1; i < schk->cfg.flows_cnt; i++)
337 if (si->flows[i].active && si->flows[i].stats.length >
338 si->flows[maxidx].stats.length)
340 codel_drop_head(&si->flows[maxidx], si);
341 D("maxidx = %d",maxidx);
350 * Dequeue a packet from an appropriate queue according to
351 * FQ_CODEL algorithm.
354 fq_codel_dequeue(struct dn_sch_inst *_si)
356 struct fq_codel_si *si;
357 struct fq_codel_schk *schk;
358 struct dn_sch_fq_codel_parms *param;
359 struct fq_codel_flow *f;
361 struct fq_codel_list *fq_codel_flowlist;
363 si = (struct fq_codel_si *)_si;
364 schk = (struct fq_codel_schk *)(si->_si.sched+1);
368 /* select a list to start with */
369 if (STAILQ_EMPTY(&si->newflows))
370 fq_codel_flowlist = &si->oldflows;
372 fq_codel_flowlist = &si->newflows;
374 /* Both new and old queue lists are empty, return NULL */
375 if (STAILQ_EMPTY(fq_codel_flowlist))
378 f = STAILQ_FIRST(fq_codel_flowlist);
380 /* if there is no flow(sub-queue) deficit, increase deficit
381 * by quantum, move the flow to the tail of old flows list
382 * and try another flow.
383 * Otherwise, the flow will be used for dequeue.
385 if (f->deficit < 0) {
386 f->deficit += param->quantum;
387 STAILQ_REMOVE_HEAD(fq_codel_flowlist, flowchain);
388 STAILQ_INSERT_TAIL(&si->oldflows, f, flowchain);
392 f = STAILQ_FIRST(fq_codel_flowlist);
395 /* the new flows list is empty, try old flows list */
396 if (STAILQ_EMPTY(fq_codel_flowlist))
399 /* Dequeue a packet from the selected flow */
400 mbuf = fqc_codel_dequeue(f, si);
402 /* Codel did not return a packet */
404 /* If the selected flow belongs to new flows list, then move
405 * it to the tail of old flows list. Otherwise, deactivate it and
406 * remove it from the old list and
408 if (fq_codel_flowlist == &si->newflows) {
409 STAILQ_REMOVE_HEAD(fq_codel_flowlist, flowchain);
410 STAILQ_INSERT_TAIL(&si->oldflows, f, flowchain);
413 STAILQ_REMOVE_HEAD(fq_codel_flowlist, flowchain);
419 /* we have a packet to return,
420 * update flow deficit and return the packet*/
421 f->deficit -= mbuf->m_pkthdr.len;
426 /* unreachable point */
431 * Initialize fq_codel scheduler instance.
432 * also, allocate memory for flows array.
435 fq_codel_new_sched(struct dn_sch_inst *_si)
437 struct fq_codel_si *si;
439 struct fq_codel_schk *schk;
442 si = (struct fq_codel_si *)_si;
443 schk = (struct fq_codel_schk *)(_si->sched+1);
446 D("si already configured!");
450 /* init the main queue */
452 set_oid(&q->ni.oid, DN_QUEUE, sizeof(*q));
454 q->fs = _si->sched->fs;
456 /* allocate memory for flows array */
457 si->flows = mallocarray(schk->cfg.flows_cnt,
458 sizeof(struct fq_codel_flow), M_DUMMYNET, M_NOWAIT | M_ZERO);
459 if (si->flows == NULL) {
460 D("cannot allocate memory for fq_codel configuration parameters");
464 /* init perturbation for this si */
465 si->perturbation = random();
467 /* init the old and new flows lists */
468 STAILQ_INIT(&si->newflows);
469 STAILQ_INIT(&si->oldflows);
471 /* init the flows (sub-queues) */
472 for (i = 0; i < schk->cfg.flows_cnt; i++) {
474 si->flows[i].cst.maxpkt_size = 500;
477 fq_codel_desc.ref_count++;
482 * Free fq_codel scheduler instance.
485 fq_codel_free_sched(struct dn_sch_inst *_si)
487 struct fq_codel_si *si = (struct fq_codel_si *)_si ;
489 /* free the flows array */
490 free(si->flows , M_DUMMYNET);
492 fq_codel_desc.ref_count--;
498 * Configure fq_codel scheduler.
499 * the configurations for the scheduler is passed from userland.
502 fq_codel_config(struct dn_schk *_schk)
504 struct fq_codel_schk *schk;
505 struct dn_extra_parms *ep;
506 struct dn_sch_fq_codel_parms *fqc_cfg;
508 schk = (struct fq_codel_schk *)(_schk+1);
509 ep = (struct dn_extra_parms *) _schk->cfg;
511 /* par array contains fq_codel configuration as follow
512 * Codel: 0- target,1- interval, 2- flags
513 * FQ_CODEL: 3- quantum, 4- limit, 5- flows
515 if (ep && ep->oid.len ==sizeof(*ep) &&
516 ep->oid.subtype == DN_SCH_PARAMS) {
518 fqc_cfg = &schk->cfg;
520 fqc_cfg->ccfg.target = fq_codel_sysctl.ccfg.target;
522 fqc_cfg->ccfg.target = ep->par[0] * AQM_TIME_1US;
525 fqc_cfg->ccfg.interval = fq_codel_sysctl.ccfg.interval;
527 fqc_cfg->ccfg.interval = ep->par[1] * AQM_TIME_1US;
530 fqc_cfg->ccfg.flags = 0;
532 fqc_cfg->ccfg.flags = ep->par[2];
534 /* FQ configurations */
536 fqc_cfg->quantum = fq_codel_sysctl.quantum;
538 fqc_cfg->quantum = ep->par[3];
541 fqc_cfg->limit = fq_codel_sysctl.limit;
543 fqc_cfg->limit = ep->par[4];
546 fqc_cfg->flows_cnt = fq_codel_sysctl.flows_cnt;
548 fqc_cfg->flows_cnt = ep->par[5];
550 /* Bound the configurations */
551 fqc_cfg->ccfg.target = BOUND_VAR(fqc_cfg->ccfg.target, 1 ,
553 fqc_cfg->ccfg.interval = BOUND_VAR(fqc_cfg->ccfg.interval, 1,
556 fqc_cfg->quantum = BOUND_VAR(fqc_cfg->quantum,1, 9000);
557 fqc_cfg->limit= BOUND_VAR(fqc_cfg->limit,1,20480);
558 fqc_cfg->flows_cnt= BOUND_VAR(fqc_cfg->flows_cnt,1,65536);
567 * Return fq_codel scheduler configurations
568 * the configurations for the scheduler is passed to userland.
571 fq_codel_getconfig (struct dn_schk *_schk, struct dn_extra_parms *ep) {
573 struct fq_codel_schk *schk = (struct fq_codel_schk *)(_schk+1);
574 struct dn_sch_fq_codel_parms *fqc_cfg;
576 fqc_cfg = &schk->cfg;
578 strcpy(ep->name, fq_codel_desc.name);
579 ep->par[0] = fqc_cfg->ccfg.target / AQM_TIME_1US;
580 ep->par[1] = fqc_cfg->ccfg.interval / AQM_TIME_1US;
581 ep->par[2] = fqc_cfg->ccfg.flags;
583 ep->par[3] = fqc_cfg->quantum;
584 ep->par[4] = fqc_cfg->limit;
585 ep->par[5] = fqc_cfg->flows_cnt;
591 * fq_codel scheduler descriptor
592 * contains the type of the scheduler, the name, the size of extra
593 * data structures, and function pointers.
595 static struct dn_alg fq_codel_desc = {
596 _SI( .type = ) DN_SCHED_FQ_CODEL,
597 _SI( .name = ) "FQ_CODEL",
600 _SI( .schk_datalen = ) sizeof(struct fq_codel_schk),
601 _SI( .si_datalen = ) sizeof(struct fq_codel_si) - sizeof(struct dn_sch_inst),
602 _SI( .q_datalen = ) 0,
604 _SI( .enqueue = ) fq_codel_enqueue,
605 _SI( .dequeue = ) fq_codel_dequeue,
606 _SI( .config = ) fq_codel_config, /* new sched i.e. sched X config ...*/
607 _SI( .destroy = ) NULL, /*sched x delete */
608 _SI( .new_sched = ) fq_codel_new_sched, /* new schd instance */
609 _SI( .free_sched = ) fq_codel_free_sched, /* delete schd instance */
610 _SI( .new_fsk = ) NULL,
611 _SI( .free_fsk = ) NULL,
612 _SI( .new_queue = ) NULL,
613 _SI( .free_queue = ) NULL,
614 _SI( .getconfig = ) fq_codel_getconfig,
615 _SI( .ref_count = ) 0
618 DECLARE_DNSCHED_MODULE(dn_fq_codel, &fq_codel_desc);