]> CyberLeo.Net >> Repos - FreeBSD/FreeBSD.git/blob - sys/net/altq/altq_cdnr.c
Merge bmake-20180512
[FreeBSD/FreeBSD.git] / sys / net / altq / altq_cdnr.c
1 /*-
2  * Copyright (C) 1999-2002
3  *      Sony Computer Science Laboratories Inc.  All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  *    notice, this list of conditions and the following disclaimer in the
12  *    documentation and/or other materials provided with the distribution.
13  *
14  * THIS SOFTWARE IS PROVIDED BY SONY CSL AND CONTRIBUTORS ``AS IS'' AND
15  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17  * ARE DISCLAIMED.  IN NO EVENT SHALL SONY CSL OR CONTRIBUTORS BE LIABLE
18  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24  * SUCH DAMAGE.
25  *
26  * $KAME: altq_cdnr.c,v 1.15 2005/04/13 03:44:24 suz Exp $
27  * $FreeBSD$
28  */
29
30 #include "opt_altq.h"
31 #include "opt_inet.h"
32 #include "opt_inet6.h"
33
34 #include <sys/param.h>
35 #include <sys/malloc.h>
36 #include <sys/mbuf.h>
37 #include <sys/socket.h>
38 #include <sys/sockio.h>
39 #include <sys/systm.h>
40 #include <sys/proc.h>
41 #include <sys/errno.h>
42 #include <sys/kernel.h>
43 #include <sys/queue.h>
44
45 #include <net/if.h>
46 #include <net/if_types.h>
47 #include <netinet/in.h>
48 #include <netinet/in_systm.h>
49 #include <netinet/ip.h>
50 #ifdef INET6
51 #include <netinet/ip6.h>
52 #endif
53
54 #include <net/altq/if_altq.h>
55 #include <net/altq/altq.h>
56 #ifdef ALTQ3_COMPAT
57 #include <net/altq/altq_conf.h>
58 #endif
59 #include <net/altq/altq_cdnr.h>
60
61 #ifdef ALTQ3_COMPAT
62 /*
63  * diffserv traffic conditioning module
64  */
65
66 int altq_cdnr_enabled = 0;
67
68 /* traffic conditioner is enabled by ALTQ_CDNR option in opt_altq.h */
69 #ifdef ALTQ_CDNR
70
71 /* cdnr_list keeps all cdnr's allocated. */
72 static LIST_HEAD(, top_cdnr) tcb_list;
73
74 static int altq_cdnr_input(struct mbuf *, int);
75 static struct top_cdnr *tcb_lookup(char *ifname);
76 static struct cdnr_block *cdnr_handle2cb(u_long);
77 static u_long cdnr_cb2handle(struct cdnr_block *);
78 static void *cdnr_cballoc(struct top_cdnr *, int,
79        struct tc_action *(*)(struct cdnr_block *, struct cdnr_pktinfo *));
80 static void cdnr_cbdestroy(void *);
81 static int tca_verify_action(struct tc_action *);
82 static void tca_import_action(struct tc_action *, struct tc_action *);
83 static void tca_invalidate_action(struct tc_action *);
84
85 static int generic_element_destroy(struct cdnr_block *);
86 static struct top_cdnr *top_create(struct ifaltq *);
87 static int top_destroy(struct top_cdnr *);
88 static struct cdnr_block *element_create(struct top_cdnr *, struct tc_action *);
89 static int element_destroy(struct cdnr_block *);
90 static void tb_import_profile(struct tbe *, struct tb_profile *);
91 static struct tbmeter *tbm_create(struct top_cdnr *, struct tb_profile *,
92                                   struct tc_action *, struct tc_action *);
93 static int tbm_destroy(struct tbmeter *);
94 static struct tc_action *tbm_input(struct cdnr_block *, struct cdnr_pktinfo *);
95 static struct trtcm *trtcm_create(struct top_cdnr *,
96                   struct tb_profile *, struct tb_profile *,
97                   struct tc_action *, struct tc_action *, struct tc_action *,
98                   int);
99 static int trtcm_destroy(struct trtcm *);
100 static struct tc_action *trtcm_input(struct cdnr_block *, struct cdnr_pktinfo *);
101 static struct tswtcm *tswtcm_create(struct top_cdnr *,
102                   u_int32_t, u_int32_t, u_int32_t,
103                   struct tc_action *, struct tc_action *, struct tc_action *);
104 static int tswtcm_destroy(struct tswtcm *);
105 static struct tc_action *tswtcm_input(struct cdnr_block *, struct cdnr_pktinfo *);
106
107 static int cdnrcmd_if_attach(char *);
108 static int cdnrcmd_if_detach(char *);
109 static int cdnrcmd_add_element(struct cdnr_add_element *);
110 static int cdnrcmd_delete_element(struct cdnr_delete_element *);
111 static int cdnrcmd_add_filter(struct cdnr_add_filter *);
112 static int cdnrcmd_delete_filter(struct cdnr_delete_filter *);
113 static int cdnrcmd_add_tbm(struct cdnr_add_tbmeter *);
114 static int cdnrcmd_modify_tbm(struct cdnr_modify_tbmeter *);
115 static int cdnrcmd_tbm_stats(struct cdnr_tbmeter_stats *);
116 static int cdnrcmd_add_trtcm(struct cdnr_add_trtcm *);
117 static int cdnrcmd_modify_trtcm(struct cdnr_modify_trtcm *);
118 static int cdnrcmd_tcm_stats(struct cdnr_tcm_stats *);
119 static int cdnrcmd_add_tswtcm(struct cdnr_add_tswtcm *);
120 static int cdnrcmd_modify_tswtcm(struct cdnr_modify_tswtcm *);
121 static int cdnrcmd_get_stats(struct cdnr_get_stats *);
122
123 altqdev_decl(cdnr);
124
125 /*
126  * top level input function called from ip_input.
127  * should be called before converting header fields to host-byte-order.
128  */
129 int
130 altq_cdnr_input(m, af)
131         struct mbuf     *m;
132         int             af;     /* address family */
133 {
134         struct ifnet            *ifp;
135         struct ip               *ip;
136         struct top_cdnr         *top;
137         struct tc_action        *tca;
138         struct cdnr_block       *cb;
139         struct cdnr_pktinfo     pktinfo;
140
141         ifp = m->m_pkthdr.rcvif;
142         if (!ALTQ_IS_CNDTNING(&ifp->if_snd))
143                 /* traffic conditioner is not enabled on this interface */
144                 return (1);
145
146         top = ifp->if_snd.altq_cdnr;
147
148         ip = mtod(m, struct ip *);
149 #ifdef INET6
150         if (af == AF_INET6) {
151                 u_int32_t flowlabel;
152
153                 flowlabel = ((struct ip6_hdr *)ip)->ip6_flow;
154                 pktinfo.pkt_dscp = (ntohl(flowlabel) >> 20) & DSCP_MASK;
155         } else
156 #endif
157                 pktinfo.pkt_dscp = ip->ip_tos & DSCP_MASK;
158         pktinfo.pkt_len = m_pktlen(m);
159
160         tca = NULL;
161
162         cb = acc_classify(&top->tc_classifier, m, af);
163         if (cb != NULL)
164                 tca = &cb->cb_action;
165
166         if (tca == NULL)
167                 tca = &top->tc_block.cb_action;
168
169         while (1) {
170                 PKTCNTR_ADD(&top->tc_cnts[tca->tca_code], pktinfo.pkt_len);
171
172                 switch (tca->tca_code) {
173                 case TCACODE_PASS:
174                         return (1);
175                 case TCACODE_DROP:
176                         m_freem(m);
177                         return (0);
178                 case TCACODE_RETURN:
179                         return (0);
180                 case TCACODE_MARK:
181 #ifdef INET6
182                         if (af == AF_INET6) {
183                                 struct ip6_hdr *ip6 = (struct ip6_hdr *)ip;
184                                 u_int32_t flowlabel;
185
186                                 flowlabel = ntohl(ip6->ip6_flow);
187                                 flowlabel = (tca->tca_dscp << 20) |
188                                         (flowlabel & ~(DSCP_MASK << 20));
189                                 ip6->ip6_flow = htonl(flowlabel);
190                         } else
191 #endif
192                                 ip->ip_tos = tca->tca_dscp |
193                                         (ip->ip_tos & DSCP_CUMASK);
194                         return (1);
195                 case TCACODE_NEXT:
196                         cb = tca->tca_next;
197                         tca = (*cb->cb_input)(cb, &pktinfo);
198                         break;
199                 case TCACODE_NONE:
200                 default:
201                         return (1);
202                 }
203         }
204 }
205
206 static struct top_cdnr *
207 tcb_lookup(ifname)
208         char *ifname;
209 {
210         struct top_cdnr *top;
211         struct ifnet *ifp;
212
213         if ((ifp = ifunit(ifname)) != NULL)
214                 LIST_FOREACH(top, &tcb_list, tc_next)
215                         if (top->tc_ifq->altq_ifp == ifp)
216                                 return (top);
217         return (NULL);
218 }
219
220 static struct cdnr_block *
221 cdnr_handle2cb(handle)
222         u_long handle;
223 {
224         struct cdnr_block *cb;
225
226         cb = (struct cdnr_block *)handle;
227         if (handle != ALIGN(cb))
228                 return (NULL);
229
230         if (cb == NULL || cb->cb_handle != handle)
231                 return (NULL);
232         return (cb);
233 }
234
235 static u_long
236 cdnr_cb2handle(cb)
237         struct cdnr_block *cb;
238 {
239         return (cb->cb_handle);
240 }
241
242 static void *
243 cdnr_cballoc(top, type, input_func)
244         struct top_cdnr *top;
245         int type;
246         struct tc_action *(*input_func)(struct cdnr_block *,
247                                         struct cdnr_pktinfo *);
248 {
249         struct cdnr_block *cb;
250         int size;
251
252         switch (type) {
253         case TCETYPE_TOP:
254                 size = sizeof(struct top_cdnr);
255                 break;
256         case TCETYPE_ELEMENT:
257                 size = sizeof(struct cdnr_block);
258                 break;
259         case TCETYPE_TBMETER:
260                 size = sizeof(struct tbmeter);
261                 break;
262         case TCETYPE_TRTCM:
263                 size = sizeof(struct trtcm);
264                 break;
265         case TCETYPE_TSWTCM:
266                 size = sizeof(struct tswtcm);
267                 break;
268         default:
269                 return (NULL);
270         }
271
272         cb = malloc(size, M_DEVBUF, M_WAITOK);
273         if (cb == NULL)
274                 return (NULL);
275         bzero(cb, size);
276
277         cb->cb_len = size;
278         cb->cb_type = type;
279         cb->cb_ref = 0;
280         cb->cb_handle = (u_long)cb;
281         if (top == NULL)
282                 cb->cb_top = (struct top_cdnr *)cb;
283         else
284                 cb->cb_top = top;
285
286         if (input_func != NULL) {
287                 /*
288                  * if this cdnr has an action function,
289                  * make tc_action to call itself.
290                  */
291                 cb->cb_action.tca_code = TCACODE_NEXT;
292                 cb->cb_action.tca_next = cb;
293                 cb->cb_input = input_func;
294         } else
295                 cb->cb_action.tca_code = TCACODE_NONE;
296
297         /* if this isn't top, register the element to the top level cdnr */
298         if (top != NULL)
299                 LIST_INSERT_HEAD(&top->tc_elements, cb, cb_next);
300
301         return ((void *)cb);
302 }
303
304 static void
305 cdnr_cbdestroy(cblock)
306         void *cblock;
307 {
308         struct cdnr_block *cb = cblock;
309
310         /* delete filters belonging to this cdnr */
311         acc_discard_filters(&cb->cb_top->tc_classifier, cb, 0);
312
313         /* remove from the top level cdnr */
314         if (cb->cb_top != cblock)
315                 LIST_REMOVE(cb, cb_next);
316
317         free(cb, M_DEVBUF);
318 }
319
320 /*
321  * conditioner common destroy routine
322  */
323 static int
324 generic_element_destroy(cb)
325         struct cdnr_block *cb;
326 {
327         int error = 0;
328
329         switch (cb->cb_type) {
330         case TCETYPE_TOP:
331                 error = top_destroy((struct top_cdnr *)cb);
332                 break;
333         case TCETYPE_ELEMENT:
334                 error = element_destroy(cb);
335                 break;
336         case TCETYPE_TBMETER:
337                 error = tbm_destroy((struct tbmeter *)cb);
338                 break;
339         case TCETYPE_TRTCM:
340                 error = trtcm_destroy((struct trtcm *)cb);
341                 break;
342         case TCETYPE_TSWTCM:
343                 error = tswtcm_destroy((struct tswtcm *)cb);
344                 break;
345         default:
346                 error = EINVAL;
347         }
348         return (error);
349 }
350
351 static int
352 tca_verify_action(utca)
353         struct tc_action *utca;
354 {
355         switch (utca->tca_code) {
356         case TCACODE_PASS:
357         case TCACODE_DROP:
358         case TCACODE_MARK:
359                 /* these are ok */
360                 break;
361
362         case TCACODE_HANDLE:
363                 /* verify handle value */
364                 if (cdnr_handle2cb(utca->tca_handle) == NULL)
365                         return (-1);
366                 break;
367
368         case TCACODE_NONE:
369         case TCACODE_RETURN:
370         case TCACODE_NEXT:
371         default:
372                 /* should not be passed from a user */
373                 return (-1);
374         }
375         return (0);
376 }
377
378 static void
379 tca_import_action(ktca, utca)
380         struct tc_action *ktca, *utca;
381 {
382         struct cdnr_block *cb;
383
384         *ktca = *utca;
385         if (ktca->tca_code == TCACODE_HANDLE) {
386                 cb = cdnr_handle2cb(ktca->tca_handle);
387                 if (cb == NULL) {
388                         ktca->tca_code = TCACODE_NONE;
389                         return;
390                 }
391                 ktca->tca_code = TCACODE_NEXT;
392                 ktca->tca_next = cb;
393                 cb->cb_ref++;
394         } else if (ktca->tca_code == TCACODE_MARK) {
395                 ktca->tca_dscp &= DSCP_MASK;
396         }
397         return;
398 }
399
400 static void
401 tca_invalidate_action(tca)
402         struct tc_action *tca;
403 {
404         struct cdnr_block *cb;
405
406         if (tca->tca_code == TCACODE_NEXT) {
407                 cb = tca->tca_next;
408                 if (cb == NULL)
409                         return;
410                 cb->cb_ref--;
411         }
412         tca->tca_code = TCACODE_NONE;
413 }
414
415 /*
416  * top level traffic conditioner
417  */
418 static struct top_cdnr *
419 top_create(ifq)
420         struct ifaltq *ifq;
421 {
422         struct top_cdnr *top;
423
424         if ((top = cdnr_cballoc(NULL, TCETYPE_TOP, NULL)) == NULL)
425                 return (NULL);
426
427         top->tc_ifq = ifq;
428         /* set default action for the top level conditioner */
429         top->tc_block.cb_action.tca_code = TCACODE_PASS;
430
431         LIST_INSERT_HEAD(&tcb_list, top, tc_next);
432
433         ifq->altq_cdnr = top;
434
435         return (top);
436 }
437
438 static int
439 top_destroy(top)
440         struct top_cdnr *top;
441 {
442         struct cdnr_block *cb;
443
444         if (ALTQ_IS_CNDTNING(top->tc_ifq))
445                 ALTQ_CLEAR_CNDTNING(top->tc_ifq);
446         top->tc_ifq->altq_cdnr = NULL;
447
448         /*
449          * destroy all the conditioner elements belonging to this interface
450          */
451         while ((cb = LIST_FIRST(&top->tc_elements)) != NULL) {
452                 while (cb != NULL && cb->cb_ref > 0)
453                         cb = LIST_NEXT(cb, cb_next);
454                 if (cb != NULL)
455                         generic_element_destroy(cb);
456         }
457
458         LIST_REMOVE(top, tc_next);
459
460         cdnr_cbdestroy(top);
461
462         /* if there is no active conditioner, remove the input hook */
463         if (altq_input != NULL) {
464                 LIST_FOREACH(top, &tcb_list, tc_next)
465                         if (ALTQ_IS_CNDTNING(top->tc_ifq))
466                                 break;
467                 if (top == NULL)
468                         altq_input = NULL;
469         }
470
471         return (0);
472 }
473
474 /*
475  * simple tc elements without input function (e.g., dropper and makers).
476  */
477 static struct cdnr_block *
478 element_create(top, action)
479         struct top_cdnr *top;
480         struct tc_action *action;
481 {
482         struct cdnr_block *cb;
483
484         if (tca_verify_action(action) < 0)
485                 return (NULL);
486
487         if ((cb = cdnr_cballoc(top, TCETYPE_ELEMENT, NULL)) == NULL)
488                 return (NULL);
489
490         tca_import_action(&cb->cb_action, action);
491
492         return (cb);
493 }
494
495 static int
496 element_destroy(cb)
497         struct cdnr_block *cb;
498 {
499         if (cb->cb_ref > 0)
500                 return (EBUSY);
501
502         tca_invalidate_action(&cb->cb_action);
503
504         cdnr_cbdestroy(cb);
505         return (0);
506 }
507
508 /*
509  * internal representation of token bucket parameters
510  *      rate:   byte_per_unittime << 32
511  *              (((bits_per_sec) / 8) << 32) / machclk_freq
512  *      depth:  byte << 32
513  *
514  */
515 #define TB_SHIFT        32
516 #define TB_SCALE(x)     ((u_int64_t)(x) << TB_SHIFT)
517 #define TB_UNSCALE(x)   ((x) >> TB_SHIFT)
518
519 static void
520 tb_import_profile(tb, profile)
521         struct tbe *tb;
522         struct tb_profile *profile;
523 {
524         tb->rate = TB_SCALE(profile->rate / 8) / machclk_freq;
525         tb->depth = TB_SCALE(profile->depth);
526         if (tb->rate > 0)
527                 tb->filluptime = tb->depth / tb->rate;
528         else
529                 tb->filluptime = 0xffffffffffffffffLL;
530         tb->token = tb->depth;
531         tb->last = read_machclk();
532 }
533
534 /*
535  * simple token bucket meter
536  */
537 static struct tbmeter *
538 tbm_create(top, profile, in_action, out_action)
539         struct top_cdnr *top;
540         struct tb_profile *profile;
541         struct tc_action *in_action, *out_action;
542 {
543         struct tbmeter *tbm = NULL;
544
545         if (tca_verify_action(in_action) < 0
546             || tca_verify_action(out_action) < 0)
547                 return (NULL);
548
549         if ((tbm = cdnr_cballoc(top, TCETYPE_TBMETER,
550                                 tbm_input)) == NULL)
551                 return (NULL);
552
553         tb_import_profile(&tbm->tb, profile);
554
555         tca_import_action(&tbm->in_action, in_action);
556         tca_import_action(&tbm->out_action, out_action);
557
558         return (tbm);
559 }
560
561 static int
562 tbm_destroy(tbm)
563         struct tbmeter *tbm;
564 {
565         if (tbm->cdnrblk.cb_ref > 0)
566                 return (EBUSY);
567
568         tca_invalidate_action(&tbm->in_action);
569         tca_invalidate_action(&tbm->out_action);
570
571         cdnr_cbdestroy(tbm);
572         return (0);
573 }
574
575 static struct tc_action *
576 tbm_input(cb, pktinfo)
577         struct cdnr_block *cb;
578         struct cdnr_pktinfo *pktinfo;
579 {
580         struct tbmeter *tbm = (struct tbmeter *)cb;
581         u_int64_t       len;
582         u_int64_t       interval, now;
583
584         len = TB_SCALE(pktinfo->pkt_len);
585
586         if (tbm->tb.token < len) {
587                 now = read_machclk();
588                 interval = now - tbm->tb.last;
589                 if (interval >= tbm->tb.filluptime)
590                         tbm->tb.token = tbm->tb.depth;
591                 else {
592                         tbm->tb.token += interval * tbm->tb.rate;
593                         if (tbm->tb.token > tbm->tb.depth)
594                                 tbm->tb.token = tbm->tb.depth;
595                 }
596                 tbm->tb.last = now;
597         }
598
599         if (tbm->tb.token < len) {
600                 PKTCNTR_ADD(&tbm->out_cnt, pktinfo->pkt_len);
601                 return (&tbm->out_action);
602         }
603
604         tbm->tb.token -= len;
605         PKTCNTR_ADD(&tbm->in_cnt, pktinfo->pkt_len);
606         return (&tbm->in_action);
607 }
608
609 /*
610  * two rate three color marker
611  * as described in draft-heinanen-diffserv-trtcm-01.txt
612  */
613 static struct trtcm *
614 trtcm_create(top, cmtd_profile, peak_profile,
615              green_action, yellow_action, red_action, coloraware)
616         struct top_cdnr *top;
617         struct tb_profile *cmtd_profile, *peak_profile;
618         struct tc_action *green_action, *yellow_action, *red_action;
619         int     coloraware;
620 {
621         struct trtcm *tcm = NULL;
622
623         if (tca_verify_action(green_action) < 0
624             || tca_verify_action(yellow_action) < 0
625             || tca_verify_action(red_action) < 0)
626                 return (NULL);
627
628         if ((tcm = cdnr_cballoc(top, TCETYPE_TRTCM,
629                                 trtcm_input)) == NULL)
630                 return (NULL);
631
632         tb_import_profile(&tcm->cmtd_tb, cmtd_profile);
633         tb_import_profile(&tcm->peak_tb, peak_profile);
634
635         tca_import_action(&tcm->green_action, green_action);
636         tca_import_action(&tcm->yellow_action, yellow_action);
637         tca_import_action(&tcm->red_action, red_action);
638
639         /* set dscps to use */
640         if (tcm->green_action.tca_code == TCACODE_MARK)
641                 tcm->green_dscp = tcm->green_action.tca_dscp & DSCP_MASK;
642         else
643                 tcm->green_dscp = DSCP_AF11;
644         if (tcm->yellow_action.tca_code == TCACODE_MARK)
645                 tcm->yellow_dscp = tcm->yellow_action.tca_dscp & DSCP_MASK;
646         else
647                 tcm->yellow_dscp = DSCP_AF12;
648         if (tcm->red_action.tca_code == TCACODE_MARK)
649                 tcm->red_dscp = tcm->red_action.tca_dscp & DSCP_MASK;
650         else
651                 tcm->red_dscp = DSCP_AF13;
652
653         tcm->coloraware = coloraware;
654
655         return (tcm);
656 }
657
658 static int
659 trtcm_destroy(tcm)
660         struct trtcm *tcm;
661 {
662         if (tcm->cdnrblk.cb_ref > 0)
663                 return (EBUSY);
664
665         tca_invalidate_action(&tcm->green_action);
666         tca_invalidate_action(&tcm->yellow_action);
667         tca_invalidate_action(&tcm->red_action);
668
669         cdnr_cbdestroy(tcm);
670         return (0);
671 }
672
673 static struct tc_action *
674 trtcm_input(cb, pktinfo)
675         struct cdnr_block *cb;
676         struct cdnr_pktinfo *pktinfo;
677 {
678         struct trtcm *tcm = (struct trtcm *)cb;
679         u_int64_t       len;
680         u_int64_t       interval, now;
681         u_int8_t        color;
682
683         len = TB_SCALE(pktinfo->pkt_len);
684         if (tcm->coloraware) {
685                 color = pktinfo->pkt_dscp;
686                 if (color != tcm->yellow_dscp && color != tcm->red_dscp)
687                         color = tcm->green_dscp;
688         } else {
689                 /* if color-blind, precolor it as green */
690                 color = tcm->green_dscp;
691         }
692
693         now = read_machclk();
694         if (tcm->cmtd_tb.token < len) {
695                 interval = now - tcm->cmtd_tb.last;
696                 if (interval >= tcm->cmtd_tb.filluptime)
697                         tcm->cmtd_tb.token = tcm->cmtd_tb.depth;
698                 else {
699                         tcm->cmtd_tb.token += interval * tcm->cmtd_tb.rate;
700                         if (tcm->cmtd_tb.token > tcm->cmtd_tb.depth)
701                                 tcm->cmtd_tb.token = tcm->cmtd_tb.depth;
702                 }
703                 tcm->cmtd_tb.last = now;
704         }
705         if (tcm->peak_tb.token < len) {
706                 interval = now - tcm->peak_tb.last;
707                 if (interval >= tcm->peak_tb.filluptime)
708                         tcm->peak_tb.token = tcm->peak_tb.depth;
709                 else {
710                         tcm->peak_tb.token += interval * tcm->peak_tb.rate;
711                         if (tcm->peak_tb.token > tcm->peak_tb.depth)
712                                 tcm->peak_tb.token = tcm->peak_tb.depth;
713                 }
714                 tcm->peak_tb.last = now;
715         }
716
717         if (color == tcm->red_dscp || tcm->peak_tb.token < len) {
718                 pktinfo->pkt_dscp = tcm->red_dscp;
719                 PKTCNTR_ADD(&tcm->red_cnt, pktinfo->pkt_len);
720                 return (&tcm->red_action);
721         }
722
723         if (color == tcm->yellow_dscp || tcm->cmtd_tb.token < len) {
724                 pktinfo->pkt_dscp = tcm->yellow_dscp;
725                 tcm->peak_tb.token -= len;
726                 PKTCNTR_ADD(&tcm->yellow_cnt, pktinfo->pkt_len);
727                 return (&tcm->yellow_action);
728         }
729
730         pktinfo->pkt_dscp = tcm->green_dscp;
731         tcm->cmtd_tb.token -= len;
732         tcm->peak_tb.token -= len;
733         PKTCNTR_ADD(&tcm->green_cnt, pktinfo->pkt_len);
734         return (&tcm->green_action);
735 }
736
737 /*
738  * time sliding window three color marker
739  * as described in draft-fang-diffserv-tc-tswtcm-00.txt
740  */
741 static struct tswtcm *
742 tswtcm_create(top, cmtd_rate, peak_rate, avg_interval,
743               green_action, yellow_action, red_action)
744         struct top_cdnr *top;
745         u_int32_t       cmtd_rate, peak_rate, avg_interval;
746         struct tc_action *green_action, *yellow_action, *red_action;
747 {
748         struct tswtcm *tsw;
749
750         if (tca_verify_action(green_action) < 0
751             || tca_verify_action(yellow_action) < 0
752             || tca_verify_action(red_action) < 0)
753                 return (NULL);
754
755         if ((tsw = cdnr_cballoc(top, TCETYPE_TSWTCM,
756                                 tswtcm_input)) == NULL)
757                 return (NULL);
758
759         tca_import_action(&tsw->green_action, green_action);
760         tca_import_action(&tsw->yellow_action, yellow_action);
761         tca_import_action(&tsw->red_action, red_action);
762
763         /* set dscps to use */
764         if (tsw->green_action.tca_code == TCACODE_MARK)
765                 tsw->green_dscp = tsw->green_action.tca_dscp & DSCP_MASK;
766         else
767                 tsw->green_dscp = DSCP_AF11;
768         if (tsw->yellow_action.tca_code == TCACODE_MARK)
769                 tsw->yellow_dscp = tsw->yellow_action.tca_dscp & DSCP_MASK;
770         else
771                 tsw->yellow_dscp = DSCP_AF12;
772         if (tsw->red_action.tca_code == TCACODE_MARK)
773                 tsw->red_dscp = tsw->red_action.tca_dscp & DSCP_MASK;
774         else
775                 tsw->red_dscp = DSCP_AF13;
776
777         /* convert rates from bits/sec to bytes/sec */
778         tsw->cmtd_rate = cmtd_rate / 8;
779         tsw->peak_rate = peak_rate / 8;
780         tsw->avg_rate = 0;
781
782         /* timewin is converted from msec to machine clock unit */
783         tsw->timewin = (u_int64_t)machclk_freq * avg_interval / 1000;
784
785         return (tsw);
786 }
787
788 static int
789 tswtcm_destroy(tsw)
790         struct tswtcm *tsw;
791 {
792         if (tsw->cdnrblk.cb_ref > 0)
793                 return (EBUSY);
794
795         tca_invalidate_action(&tsw->green_action);
796         tca_invalidate_action(&tsw->yellow_action);
797         tca_invalidate_action(&tsw->red_action);
798
799         cdnr_cbdestroy(tsw);
800         return (0);
801 }
802
803 static struct tc_action *
804 tswtcm_input(cb, pktinfo)
805         struct cdnr_block *cb;
806         struct cdnr_pktinfo *pktinfo;
807 {
808         struct tswtcm   *tsw = (struct tswtcm *)cb;
809         int             len;
810         u_int32_t       avg_rate;
811         u_int64_t       interval, now, tmp;
812
813         /*
814          * rate estimator
815          */
816         len = pktinfo->pkt_len;
817         now = read_machclk();
818
819         interval = now - tsw->t_front;
820         /*
821          * calculate average rate:
822          *      avg = (avg * timewin + pkt_len)/(timewin + interval)
823          * pkt_len needs to be multiplied by machclk_freq in order to
824          * get (bytes/sec).
825          * note: when avg_rate (bytes/sec) and timewin (machclk unit) are
826          * less than 32 bits, the following 64-bit operation has enough
827          * precision.
828          */
829         tmp = ((u_int64_t)tsw->avg_rate * tsw->timewin
830                + (u_int64_t)len * machclk_freq) / (tsw->timewin + interval);
831         tsw->avg_rate = avg_rate = (u_int32_t)tmp;
832         tsw->t_front = now;
833
834         /*
835          * marker
836          */
837         if (avg_rate > tsw->cmtd_rate) {
838                 u_int32_t randval = arc4random() % avg_rate;
839
840                 if (avg_rate > tsw->peak_rate) {
841                         if (randval < avg_rate - tsw->peak_rate) {
842                                 /* mark red */
843                                 pktinfo->pkt_dscp = tsw->red_dscp;
844                                 PKTCNTR_ADD(&tsw->red_cnt, len);
845                                 return (&tsw->red_action);
846                         } else if (randval < avg_rate - tsw->cmtd_rate)
847                                 goto mark_yellow;
848                 } else {
849                         /* peak_rate >= avg_rate > cmtd_rate */
850                         if (randval < avg_rate - tsw->cmtd_rate) {
851                         mark_yellow:
852                                 pktinfo->pkt_dscp = tsw->yellow_dscp;
853                                 PKTCNTR_ADD(&tsw->yellow_cnt, len);
854                                 return (&tsw->yellow_action);
855                         }
856                 }
857         }
858
859         /* mark green */
860         pktinfo->pkt_dscp = tsw->green_dscp;
861         PKTCNTR_ADD(&tsw->green_cnt, len);
862         return (&tsw->green_action);
863 }
864
865 /*
866  * ioctl requests
867  */
868 static int
869 cdnrcmd_if_attach(ifname)
870         char *ifname;
871 {
872         struct ifnet *ifp;
873         struct top_cdnr *top;
874
875         if ((ifp = ifunit(ifname)) == NULL)
876                 return (EBADF);
877
878         if (ifp->if_snd.altq_cdnr != NULL)
879                 return (EBUSY);
880
881         if ((top = top_create(&ifp->if_snd)) == NULL)
882                 return (ENOMEM);
883         return (0);
884 }
885
886 static int
887 cdnrcmd_if_detach(ifname)
888         char *ifname;
889 {
890         struct top_cdnr *top;
891
892         if ((top = tcb_lookup(ifname)) == NULL)
893                 return (EBADF);
894
895         return top_destroy(top);
896 }
897
898 static int
899 cdnrcmd_add_element(ap)
900         struct cdnr_add_element *ap;
901 {
902         struct top_cdnr *top;
903         struct cdnr_block *cb;
904
905         if ((top = tcb_lookup(ap->iface.cdnr_ifname)) == NULL)
906                 return (EBADF);
907
908         cb = element_create(top, &ap->action);
909         if (cb == NULL)
910                 return (EINVAL);
911         /* return a class handle to the user */
912         ap->cdnr_handle = cdnr_cb2handle(cb);
913         return (0);
914 }
915
916 static int
917 cdnrcmd_delete_element(ap)
918         struct cdnr_delete_element *ap;
919 {
920         struct top_cdnr *top;
921         struct cdnr_block *cb;
922
923         if ((top = tcb_lookup(ap->iface.cdnr_ifname)) == NULL)
924                 return (EBADF);
925
926         if ((cb = cdnr_handle2cb(ap->cdnr_handle)) == NULL)
927                 return (EINVAL);
928
929         if (cb->cb_type != TCETYPE_ELEMENT)
930                 return generic_element_destroy(cb);
931
932         return element_destroy(cb);
933 }
934
935 static int
936 cdnrcmd_add_filter(ap)
937         struct cdnr_add_filter *ap;
938 {
939         struct top_cdnr *top;
940         struct cdnr_block *cb;
941
942         if ((top = tcb_lookup(ap->iface.cdnr_ifname)) == NULL)
943                 return (EBADF);
944
945         if ((cb = cdnr_handle2cb(ap->cdnr_handle)) == NULL)
946                 return (EINVAL);
947
948         return acc_add_filter(&top->tc_classifier, &ap->filter,
949                               cb, &ap->filter_handle);
950 }
951
952 static int
953 cdnrcmd_delete_filter(ap)
954         struct cdnr_delete_filter *ap;
955 {
956         struct top_cdnr *top;
957
958         if ((top = tcb_lookup(ap->iface.cdnr_ifname)) == NULL)
959                 return (EBADF);
960
961         return acc_delete_filter(&top->tc_classifier, ap->filter_handle);
962 }
963
964 static int
965 cdnrcmd_add_tbm(ap)
966         struct cdnr_add_tbmeter *ap;
967 {
968         struct top_cdnr *top;
969         struct tbmeter *tbm;
970
971         if ((top = tcb_lookup(ap->iface.cdnr_ifname)) == NULL)
972                 return (EBADF);
973
974         tbm = tbm_create(top, &ap->profile, &ap->in_action, &ap->out_action);
975         if (tbm == NULL)
976                 return (EINVAL);
977         /* return a class handle to the user */
978         ap->cdnr_handle = cdnr_cb2handle(&tbm->cdnrblk);
979         return (0);
980 }
981
982 static int
983 cdnrcmd_modify_tbm(ap)
984         struct cdnr_modify_tbmeter *ap;
985 {
986         struct tbmeter *tbm;
987
988         if ((tbm = (struct tbmeter *)cdnr_handle2cb(ap->cdnr_handle)) == NULL)
989                 return (EINVAL);
990
991         tb_import_profile(&tbm->tb, &ap->profile);
992
993         return (0);
994 }
995
996 static int
997 cdnrcmd_tbm_stats(ap)
998         struct cdnr_tbmeter_stats *ap;
999 {
1000         struct tbmeter *tbm;
1001
1002         if ((tbm = (struct tbmeter *)cdnr_handle2cb(ap->cdnr_handle)) == NULL)
1003                 return (EINVAL);
1004
1005         ap->in_cnt = tbm->in_cnt;
1006         ap->out_cnt = tbm->out_cnt;
1007
1008         return (0);
1009 }
1010
1011 static int
1012 cdnrcmd_add_trtcm(ap)
1013         struct cdnr_add_trtcm *ap;
1014 {
1015         struct top_cdnr *top;
1016         struct trtcm *tcm;
1017
1018         if ((top = tcb_lookup(ap->iface.cdnr_ifname)) == NULL)
1019                 return (EBADF);
1020
1021         tcm = trtcm_create(top, &ap->cmtd_profile, &ap->peak_profile,
1022                            &ap->green_action, &ap->yellow_action,
1023                            &ap->red_action, ap->coloraware);
1024         if (tcm == NULL)
1025                 return (EINVAL);
1026
1027         /* return a class handle to the user */
1028         ap->cdnr_handle = cdnr_cb2handle(&tcm->cdnrblk);
1029         return (0);
1030 }
1031
1032 static int
1033 cdnrcmd_modify_trtcm(ap)
1034         struct cdnr_modify_trtcm *ap;
1035 {
1036         struct trtcm *tcm;
1037
1038         if ((tcm = (struct trtcm *)cdnr_handle2cb(ap->cdnr_handle)) == NULL)
1039                 return (EINVAL);
1040
1041         tb_import_profile(&tcm->cmtd_tb, &ap->cmtd_profile);
1042         tb_import_profile(&tcm->peak_tb, &ap->peak_profile);
1043
1044         return (0);
1045 }
1046
1047 static int
1048 cdnrcmd_tcm_stats(ap)
1049         struct cdnr_tcm_stats *ap;
1050 {
1051         struct cdnr_block *cb;
1052
1053         if ((cb = cdnr_handle2cb(ap->cdnr_handle)) == NULL)
1054                 return (EINVAL);
1055
1056         if (cb->cb_type == TCETYPE_TRTCM) {
1057             struct trtcm *tcm = (struct trtcm *)cb;
1058
1059             ap->green_cnt = tcm->green_cnt;
1060             ap->yellow_cnt = tcm->yellow_cnt;
1061             ap->red_cnt = tcm->red_cnt;
1062         } else if (cb->cb_type == TCETYPE_TSWTCM) {
1063             struct tswtcm *tsw = (struct tswtcm *)cb;
1064
1065             ap->green_cnt = tsw->green_cnt;
1066             ap->yellow_cnt = tsw->yellow_cnt;
1067             ap->red_cnt = tsw->red_cnt;
1068         } else
1069             return (EINVAL);
1070
1071         return (0);
1072 }
1073
1074 static int
1075 cdnrcmd_add_tswtcm(ap)
1076         struct cdnr_add_tswtcm *ap;
1077 {
1078         struct top_cdnr *top;
1079         struct tswtcm *tsw;
1080
1081         if ((top = tcb_lookup(ap->iface.cdnr_ifname)) == NULL)
1082                 return (EBADF);
1083
1084         if (ap->cmtd_rate > ap->peak_rate)
1085                 return (EINVAL);
1086
1087         tsw = tswtcm_create(top, ap->cmtd_rate, ap->peak_rate,
1088                             ap->avg_interval, &ap->green_action,
1089                             &ap->yellow_action, &ap->red_action);
1090         if (tsw == NULL)
1091             return (EINVAL);
1092
1093         /* return a class handle to the user */
1094         ap->cdnr_handle = cdnr_cb2handle(&tsw->cdnrblk);
1095         return (0);
1096 }
1097
1098 static int
1099 cdnrcmd_modify_tswtcm(ap)
1100         struct cdnr_modify_tswtcm *ap;
1101 {
1102         struct tswtcm *tsw;
1103
1104         if ((tsw = (struct tswtcm *)cdnr_handle2cb(ap->cdnr_handle)) == NULL)
1105                 return (EINVAL);
1106
1107         if (ap->cmtd_rate > ap->peak_rate)
1108                 return (EINVAL);
1109
1110         /* convert rates from bits/sec to bytes/sec */
1111         tsw->cmtd_rate = ap->cmtd_rate / 8;
1112         tsw->peak_rate = ap->peak_rate / 8;
1113         tsw->avg_rate = 0;
1114
1115         /* timewin is converted from msec to machine clock unit */
1116         tsw->timewin = (u_int64_t)machclk_freq * ap->avg_interval / 1000;
1117
1118         return (0);
1119 }
1120
1121 static int
1122 cdnrcmd_get_stats(ap)
1123         struct cdnr_get_stats *ap;
1124 {
1125         struct top_cdnr *top;
1126         struct cdnr_block *cb;
1127         struct tbmeter *tbm;
1128         struct trtcm *tcm;
1129         struct tswtcm *tsw;
1130         struct tce_stats tce, *usp;
1131         int error, n, nskip, nelements;
1132
1133         if ((top = tcb_lookup(ap->iface.cdnr_ifname)) == NULL)
1134                 return (EBADF);
1135
1136         /* copy action stats */
1137         bcopy(top->tc_cnts, ap->cnts, sizeof(ap->cnts));
1138
1139         /* stats for each element */
1140         nelements = ap->nelements;
1141         usp = ap->tce_stats;
1142         if (nelements <= 0 || usp == NULL)
1143                 return (0);
1144
1145         nskip = ap->nskip;
1146         n = 0;
1147         LIST_FOREACH(cb, &top->tc_elements, cb_next) {
1148                 if (nskip > 0) {
1149                         nskip--;
1150                         continue;
1151                 }
1152
1153                 bzero(&tce, sizeof(tce));
1154                 tce.tce_handle = cb->cb_handle;
1155                 tce.tce_type = cb->cb_type;
1156                 switch (cb->cb_type) {
1157                 case TCETYPE_TBMETER:
1158                         tbm = (struct tbmeter *)cb;
1159                         tce.tce_cnts[0] = tbm->in_cnt;
1160                         tce.tce_cnts[1] = tbm->out_cnt;
1161                         break;
1162                 case TCETYPE_TRTCM:
1163                         tcm = (struct trtcm *)cb;
1164                         tce.tce_cnts[0] = tcm->green_cnt;
1165                         tce.tce_cnts[1] = tcm->yellow_cnt;
1166                         tce.tce_cnts[2] = tcm->red_cnt;
1167                         break;
1168                 case TCETYPE_TSWTCM:
1169                         tsw = (struct tswtcm *)cb;
1170                         tce.tce_cnts[0] = tsw->green_cnt;
1171                         tce.tce_cnts[1] = tsw->yellow_cnt;
1172                         tce.tce_cnts[2] = tsw->red_cnt;
1173                         break;
1174                 default:
1175                         continue;
1176                 }
1177
1178                 if ((error = copyout((caddr_t)&tce, (caddr_t)usp++,
1179                                      sizeof(tce))) != 0)
1180                         return (error);
1181
1182                 if (++n == nelements)
1183                         break;
1184         }
1185         ap->nelements = n;
1186
1187         return (0);
1188 }
1189
1190 /*
1191  * conditioner device interface
1192  */
1193 int
1194 cdnropen(dev, flag, fmt, p)
1195         dev_t dev;
1196         int flag, fmt;
1197 #if (__FreeBSD_version > 500000)
1198         struct thread *p;
1199 #else
1200         struct proc *p;
1201 #endif
1202 {
1203         if (machclk_freq == 0)
1204                 init_machclk();
1205
1206         if (machclk_freq == 0) {
1207                 printf("cdnr: no cpu clock available!\n");
1208                 return (ENXIO);
1209         }
1210
1211         /* everything will be done when the queueing scheme is attached. */
1212         return 0;
1213 }
1214
1215 int
1216 cdnrclose(dev, flag, fmt, p)
1217         dev_t dev;
1218         int flag, fmt;
1219 #if (__FreeBSD_version > 500000)
1220         struct thread *p;
1221 #else
1222         struct proc *p;
1223 #endif
1224 {
1225         struct top_cdnr *top;
1226         int err, error = 0;
1227
1228         while ((top = LIST_FIRST(&tcb_list)) != NULL) {
1229                 /* destroy all */
1230                 err = top_destroy(top);
1231                 if (err != 0 && error == 0)
1232                         error = err;
1233         }
1234         altq_input = NULL;
1235
1236         return (error);
1237 }
1238
1239 int
1240 cdnrioctl(dev, cmd, addr, flag, p)
1241         dev_t dev;
1242         ioctlcmd_t cmd;
1243         caddr_t addr;
1244         int flag;
1245 #if (__FreeBSD_version > 500000)
1246         struct thread *p;
1247 #else
1248         struct proc *p;
1249 #endif
1250 {
1251         struct top_cdnr *top;
1252         struct cdnr_interface *ifacep;
1253         int     s, error = 0;
1254
1255         /* check super-user privilege */
1256         switch (cmd) {
1257         case CDNR_GETSTATS:
1258                 break;
1259         default:
1260 #if (__FreeBSD_version > 700000)
1261                 if ((error = priv_check(p, PRIV_ALTQ_MANAGE)) != 0)
1262 #elsif (__FreeBSD_version > 400000)
1263                 if ((error = suser(p)) != 0)
1264 #else
1265                 if ((error = suser(p->p_ucred, &p->p_acflag)) != 0)
1266 #endif
1267                         return (error);
1268                 break;
1269         }
1270
1271         s = splnet();
1272         switch (cmd) {
1273
1274         case CDNR_IF_ATTACH:
1275                 ifacep = (struct cdnr_interface *)addr;
1276                 error = cdnrcmd_if_attach(ifacep->cdnr_ifname);
1277                 break;
1278
1279         case CDNR_IF_DETACH:
1280                 ifacep = (struct cdnr_interface *)addr;
1281                 error = cdnrcmd_if_detach(ifacep->cdnr_ifname);
1282                 break;
1283
1284         case CDNR_ENABLE:
1285         case CDNR_DISABLE:
1286                 ifacep = (struct cdnr_interface *)addr;
1287                 if ((top = tcb_lookup(ifacep->cdnr_ifname)) == NULL) {
1288                         error = EBADF;
1289                         break;
1290                 }
1291
1292                 switch (cmd) {
1293
1294                 case CDNR_ENABLE:
1295                         ALTQ_SET_CNDTNING(top->tc_ifq);
1296                         if (altq_input == NULL)
1297                                 altq_input = altq_cdnr_input;
1298                         break;
1299
1300                 case CDNR_DISABLE:
1301                         ALTQ_CLEAR_CNDTNING(top->tc_ifq);
1302                         LIST_FOREACH(top, &tcb_list, tc_next)
1303                                 if (ALTQ_IS_CNDTNING(top->tc_ifq))
1304                                         break;
1305                         if (top == NULL)
1306                                 altq_input = NULL;
1307                         break;
1308                 }
1309                 break;
1310
1311         case CDNR_ADD_ELEM:
1312                 error = cdnrcmd_add_element((struct cdnr_add_element *)addr);
1313                 break;
1314
1315         case CDNR_DEL_ELEM:
1316                 error = cdnrcmd_delete_element((struct cdnr_delete_element *)addr);
1317                 break;
1318
1319         case CDNR_ADD_TBM:
1320                 error = cdnrcmd_add_tbm((struct cdnr_add_tbmeter *)addr);
1321                 break;
1322
1323         case CDNR_MOD_TBM:
1324                 error = cdnrcmd_modify_tbm((struct cdnr_modify_tbmeter *)addr);
1325                 break;
1326
1327         case CDNR_TBM_STATS:
1328                 error = cdnrcmd_tbm_stats((struct cdnr_tbmeter_stats *)addr);
1329                 break;
1330
1331         case CDNR_ADD_TCM:
1332                 error = cdnrcmd_add_trtcm((struct cdnr_add_trtcm *)addr);
1333                 break;
1334
1335         case CDNR_MOD_TCM:
1336                 error = cdnrcmd_modify_trtcm((struct cdnr_modify_trtcm *)addr);
1337                 break;
1338
1339         case CDNR_TCM_STATS:
1340                 error = cdnrcmd_tcm_stats((struct cdnr_tcm_stats *)addr);
1341                 break;
1342
1343         case CDNR_ADD_FILTER:
1344                 error = cdnrcmd_add_filter((struct cdnr_add_filter *)addr);
1345                 break;
1346
1347         case CDNR_DEL_FILTER:
1348                 error = cdnrcmd_delete_filter((struct cdnr_delete_filter *)addr);
1349                 break;
1350
1351         case CDNR_GETSTATS:
1352                 error = cdnrcmd_get_stats((struct cdnr_get_stats *)addr);
1353                 break;
1354
1355         case CDNR_ADD_TSW:
1356                 error = cdnrcmd_add_tswtcm((struct cdnr_add_tswtcm *)addr);
1357                 break;
1358
1359         case CDNR_MOD_TSW:
1360                 error = cdnrcmd_modify_tswtcm((struct cdnr_modify_tswtcm *)addr);
1361                 break;
1362
1363         default:
1364                 error = EINVAL;
1365                 break;
1366         }
1367         splx(s);
1368
1369         return error;
1370 }
1371
1372 #ifdef KLD_MODULE
1373
1374 static struct altqsw cdnr_sw =
1375         {"cdnr", cdnropen, cdnrclose, cdnrioctl};
1376
1377 ALTQ_MODULE(altq_cdnr, ALTQT_CDNR, &cdnr_sw);
1378
1379 #endif /* KLD_MODULE */
1380
1381 #endif /* ALTQ3_COMPAT */
1382 #endif /* ALTQ_CDNR */