]> CyberLeo.Net >> Repos - FreeBSD/stable/10.git/blob - sys/contrib/altq/altq/altq_subr.c
MFC r287009, r287120 and r298131:
[FreeBSD/stable/10.git] / sys / contrib / altq / altq / altq_subr.c
1 /*      $FreeBSD$       */
2 /*      $KAME: altq_subr.c,v 1.21 2003/11/06 06:32:53 kjc Exp $ */
3
4 /*
5  * Copyright (C) 1997-2003
6  *      Sony Computer Science Laboratories Inc.  All rights reserved.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  *
17  * THIS SOFTWARE IS PROVIDED BY SONY CSL AND CONTRIBUTORS ``AS IS'' AND
18  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20  * ARE DISCLAIMED.  IN NO EVENT SHALL SONY CSL OR CONTRIBUTORS BE LIABLE
21  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27  * SUCH DAMAGE.
28  */
29
30 #if defined(__FreeBSD__) || defined(__NetBSD__)
31 #include "opt_altq.h"
32 #include "opt_inet.h"
33 #ifdef __FreeBSD__
34 #include "opt_inet6.h"
35 #endif
36 #endif /* __FreeBSD__ || __NetBSD__ */
37
38 #include <sys/param.h>
39 #include <sys/malloc.h>
40 #include <sys/mbuf.h>
41 #include <sys/systm.h>
42 #include <sys/proc.h>
43 #include <sys/socket.h>
44 #include <sys/socketvar.h>
45 #include <sys/kernel.h>
46 #include <sys/errno.h>
47 #include <sys/syslog.h>
48 #include <sys/sysctl.h>
49 #include <sys/queue.h>
50
51 #include <net/if.h>
52 #include <net/if_var.h>
53 #include <net/if_dl.h>
54 #include <net/if_types.h>
55 #ifdef __FreeBSD__
56 #include <net/vnet.h>
57 #endif
58
59 #include <netinet/in.h>
60 #include <netinet/in_systm.h>
61 #include <netinet/ip.h>
62 #ifdef INET6
63 #include <netinet/ip6.h>
64 #endif
65 #include <netinet/tcp.h>
66 #include <netinet/udp.h>
67
68 #include <netpfil/pf/pf.h>
69 #include <netpfil/pf/pf_altq.h>
70 #include <altq/altq.h>
71 #ifdef ALTQ3_COMPAT
72 #include <altq/altq_conf.h>
73 #endif
74
75 /* machine dependent clock related includes */
76 #ifdef __FreeBSD__
77 #include <sys/bus.h>
78 #include <sys/cpu.h>
79 #include <sys/eventhandler.h>
80 #include <machine/clock.h>
81 #endif
82 #if defined(__amd64__) || defined(__i386__)
83 #include <machine/cpufunc.h>            /* for pentium tsc */
84 #include <machine/specialreg.h>         /* for CPUID_TSC */
85 #ifdef __FreeBSD__
86 #include <machine/md_var.h>             /* for cpu_feature */
87 #elif defined(__NetBSD__) || defined(__OpenBSD__)
88 #include <machine/cpu.h>                /* for cpu_feature */
89 #endif
90 #endif /* __amd64 || __i386__ */
91
92 /*
93  * internal function prototypes
94  */
95 static void     tbr_timeout(void *);
96 int (*altq_input)(struct mbuf *, int) = NULL;
97 static struct mbuf *tbr_dequeue(struct ifaltq *, int);
98 static int tbr_timer = 0;       /* token bucket regulator timer */
99 #if !defined(__FreeBSD__) || (__FreeBSD_version < 600000)
100 static struct callout tbr_callout = CALLOUT_INITIALIZER;
101 #else
102 static struct callout tbr_callout;
103 #endif
104
105 #ifdef ALTQ3_CLFIER_COMPAT
106 static int      extract_ports4(struct mbuf *, struct ip *, struct flowinfo_in *);
107 #ifdef INET6
108 static int      extract_ports6(struct mbuf *, struct ip6_hdr *,
109                                struct flowinfo_in6 *);
110 #endif
111 static int      apply_filter4(u_int32_t, struct flow_filter *,
112                               struct flowinfo_in *);
113 static int      apply_ppfilter4(u_int32_t, struct flow_filter *,
114                                 struct flowinfo_in *);
115 #ifdef INET6
116 static int      apply_filter6(u_int32_t, struct flow_filter6 *,
117                               struct flowinfo_in6 *);
118 #endif
119 static int      apply_tosfilter4(u_int32_t, struct flow_filter *,
120                                  struct flowinfo_in *);
121 static u_long   get_filt_handle(struct acc_classifier *, int);
122 static struct acc_filter *filth_to_filtp(struct acc_classifier *, u_long);
123 static u_int32_t filt2fibmask(struct flow_filter *);
124
125 static void     ip4f_cache(struct ip *, struct flowinfo_in *);
126 static int      ip4f_lookup(struct ip *, struct flowinfo_in *);
127 static int      ip4f_init(void);
128 static struct ip4_frag  *ip4f_alloc(void);
129 static void     ip4f_free(struct ip4_frag *);
130 #endif /* ALTQ3_CLFIER_COMPAT */
131
132 /*
133  * alternate queueing support routines
134  */
135
136 /* look up the queue state by the interface name and the queueing type. */
137 void *
138 altq_lookup(name, type)
139         char *name;
140         int type;
141 {
142         struct ifnet *ifp;
143
144         if ((ifp = ifunit(name)) != NULL) {
145                 /* read if_snd unlocked */
146                 if (type != ALTQT_NONE && ifp->if_snd.altq_type == type)
147                         return (ifp->if_snd.altq_disc);
148         }
149
150         return NULL;
151 }
152
153 int
154 altq_attach(ifq, type, discipline, enqueue, dequeue, request, clfier, classify)
155         struct ifaltq *ifq;
156         int type;
157         void *discipline;
158         int (*enqueue)(struct ifaltq *, struct mbuf *, struct altq_pktattr *);
159         struct mbuf *(*dequeue)(struct ifaltq *, int);
160         int (*request)(struct ifaltq *, int, void *);
161         void *clfier;
162         void *(*classify)(void *, struct mbuf *, int);
163 {
164         IFQ_LOCK(ifq);
165         if (!ALTQ_IS_READY(ifq)) {
166                 IFQ_UNLOCK(ifq);
167                 return ENXIO;
168         }
169
170 #ifdef ALTQ3_COMPAT
171         /*
172          * pfaltq can override the existing discipline, but altq3 cannot.
173          * check these if clfier is not NULL (which implies altq3).
174          */
175         if (clfier != NULL) {
176                 if (ALTQ_IS_ENABLED(ifq)) {
177                         IFQ_UNLOCK(ifq);
178                         return EBUSY;
179                 }
180                 if (ALTQ_IS_ATTACHED(ifq)) {
181                         IFQ_UNLOCK(ifq);
182                         return EEXIST;
183                 }
184         }
185 #endif
186         ifq->altq_type     = type;
187         ifq->altq_disc     = discipline;
188         ifq->altq_enqueue  = enqueue;
189         ifq->altq_dequeue  = dequeue;
190         ifq->altq_request  = request;
191         ifq->altq_clfier   = clfier;
192         ifq->altq_classify = classify;
193         ifq->altq_flags &= (ALTQF_CANTCHANGE|ALTQF_ENABLED);
194 #ifdef ALTQ3_COMPAT
195 #ifdef ALTQ_KLD
196         altq_module_incref(type);
197 #endif
198 #endif
199         IFQ_UNLOCK(ifq);
200         return 0;
201 }
202
203 int
204 altq_detach(ifq)
205         struct ifaltq *ifq;
206 {
207         IFQ_LOCK(ifq);
208
209         if (!ALTQ_IS_READY(ifq)) {
210                 IFQ_UNLOCK(ifq);
211                 return ENXIO;
212         }
213         if (ALTQ_IS_ENABLED(ifq)) {
214                 IFQ_UNLOCK(ifq);
215                 return EBUSY;
216         }
217         if (!ALTQ_IS_ATTACHED(ifq)) {
218                 IFQ_UNLOCK(ifq);
219                 return (0);
220         }
221 #ifdef ALTQ3_COMPAT
222 #ifdef ALTQ_KLD
223         altq_module_declref(ifq->altq_type);
224 #endif
225 #endif
226
227         ifq->altq_type     = ALTQT_NONE;
228         ifq->altq_disc     = NULL;
229         ifq->altq_enqueue  = NULL;
230         ifq->altq_dequeue  = NULL;
231         ifq->altq_request  = NULL;
232         ifq->altq_clfier   = NULL;
233         ifq->altq_classify = NULL;
234         ifq->altq_flags &= ALTQF_CANTCHANGE;
235
236         IFQ_UNLOCK(ifq);
237         return 0;
238 }
239
240 int
241 altq_enable(ifq)
242         struct ifaltq *ifq;
243 {
244         int s;
245
246         IFQ_LOCK(ifq);
247
248         if (!ALTQ_IS_READY(ifq)) {
249                 IFQ_UNLOCK(ifq);
250                 return ENXIO;
251         }
252         if (ALTQ_IS_ENABLED(ifq)) {
253                 IFQ_UNLOCK(ifq);
254                 return 0;
255         }
256
257 #ifdef __NetBSD__
258         s = splnet();
259 #else
260         s = splimp();
261 #endif
262         IFQ_PURGE_NOLOCK(ifq);
263         ASSERT(ifq->ifq_len == 0);
264         ifq->ifq_drv_maxlen = 0;                /* disable bulk dequeue */
265         ifq->altq_flags |= ALTQF_ENABLED;
266         if (ifq->altq_clfier != NULL)
267                 ifq->altq_flags |= ALTQF_CLASSIFY;
268         splx(s);
269
270         IFQ_UNLOCK(ifq);
271         return 0;
272 }
273
274 int
275 altq_disable(ifq)
276         struct ifaltq *ifq;
277 {
278         int s;
279
280         IFQ_LOCK(ifq);
281         if (!ALTQ_IS_ENABLED(ifq)) {
282                 IFQ_UNLOCK(ifq);
283                 return 0;
284         }
285
286 #ifdef __NetBSD__
287         s = splnet();
288 #else
289         s = splimp();
290 #endif
291         IFQ_PURGE_NOLOCK(ifq);
292         ASSERT(ifq->ifq_len == 0);
293         ifq->altq_flags &= ~(ALTQF_ENABLED|ALTQF_CLASSIFY);
294         splx(s);
295         
296         IFQ_UNLOCK(ifq);
297         return 0;
298 }
299
300 #ifdef ALTQ_DEBUG
301 void
302 altq_assert(file, line, failedexpr)
303         const char *file, *failedexpr;
304         int line;
305 {
306         (void)printf("altq assertion \"%s\" failed: file \"%s\", line %d\n",
307                      failedexpr, file, line);
308         panic("altq assertion");
309         /* NOTREACHED */
310 }
311 #endif
312
313 /*
314  * internal representation of token bucket parameters
315  *      rate:   byte_per_unittime << 32
316  *              (((bits_per_sec) / 8) << 32) / machclk_freq
317  *      depth:  byte << 32
318  *
319  */
320 #define TBR_SHIFT       32
321 #define TBR_SCALE(x)    ((int64_t)(x) << TBR_SHIFT)
322 #define TBR_UNSCALE(x)  ((x) >> TBR_SHIFT)
323
324 static struct mbuf *
325 tbr_dequeue(ifq, op)
326         struct ifaltq *ifq;
327         int op;
328 {
329         struct tb_regulator *tbr;
330         struct mbuf *m;
331         int64_t interval;
332         u_int64_t now;
333
334         IFQ_LOCK_ASSERT(ifq);
335         tbr = ifq->altq_tbr;
336         if (op == ALTDQ_REMOVE && tbr->tbr_lastop == ALTDQ_POLL) {
337                 /* if this is a remove after poll, bypass tbr check */
338         } else {
339                 /* update token only when it is negative */
340                 if (tbr->tbr_token <= 0) {
341                         now = read_machclk();
342                         interval = now - tbr->tbr_last;
343                         if (interval >= tbr->tbr_filluptime)
344                                 tbr->tbr_token = tbr->tbr_depth;
345                         else {
346                                 tbr->tbr_token += interval * tbr->tbr_rate;
347                                 if (tbr->tbr_token > tbr->tbr_depth)
348                                         tbr->tbr_token = tbr->tbr_depth;
349                         }
350                         tbr->tbr_last = now;
351                 }
352                 /* if token is still negative, don't allow dequeue */
353                 if (tbr->tbr_token <= 0)
354                         return (NULL);
355         }
356
357         if (ALTQ_IS_ENABLED(ifq))
358                 m = (*ifq->altq_dequeue)(ifq, op);
359         else {
360                 if (op == ALTDQ_POLL)
361                         _IF_POLL(ifq, m);
362                 else
363                         _IF_DEQUEUE(ifq, m);
364         }
365
366         if (m != NULL && op == ALTDQ_REMOVE)
367                 tbr->tbr_token -= TBR_SCALE(m_pktlen(m));
368         tbr->tbr_lastop = op;
369         return (m);
370 }
371
372 /*
373  * set a token bucket regulator.
374  * if the specified rate is zero, the token bucket regulator is deleted.
375  */
376 int
377 tbr_set(ifq, profile)
378         struct ifaltq *ifq;
379         struct tb_profile *profile;
380 {
381         struct tb_regulator *tbr, *otbr;
382         
383         if (tbr_dequeue_ptr == NULL)
384                 tbr_dequeue_ptr = tbr_dequeue;
385
386         if (machclk_freq == 0)
387                 init_machclk();
388         if (machclk_freq == 0) {
389                 printf("tbr_set: no cpu clock available!\n");
390                 return (ENXIO);
391         }
392
393         IFQ_LOCK(ifq);
394         if (profile->rate == 0) {
395                 /* delete this tbr */
396                 if ((tbr = ifq->altq_tbr) == NULL) {
397                         IFQ_UNLOCK(ifq);
398                         return (ENOENT);
399                 }
400                 ifq->altq_tbr = NULL;
401                 free(tbr, M_DEVBUF);
402                 IFQ_UNLOCK(ifq);
403                 return (0);
404         }
405
406         tbr = malloc(sizeof(struct tb_regulator), M_DEVBUF, M_NOWAIT | M_ZERO);
407         if (tbr == NULL) {
408                 IFQ_UNLOCK(ifq);
409                 return (ENOMEM);
410         }
411
412         tbr->tbr_rate = TBR_SCALE(profile->rate / 8) / machclk_freq;
413         tbr->tbr_depth = TBR_SCALE(profile->depth);
414         if (tbr->tbr_rate > 0)
415                 tbr->tbr_filluptime = tbr->tbr_depth / tbr->tbr_rate;
416         else
417                 tbr->tbr_filluptime = 0xffffffffffffffffLL;
418         tbr->tbr_token = tbr->tbr_depth;
419         tbr->tbr_last = read_machclk();
420         tbr->tbr_lastop = ALTDQ_REMOVE;
421
422         otbr = ifq->altq_tbr;
423         ifq->altq_tbr = tbr;    /* set the new tbr */
424
425         if (otbr != NULL)
426                 free(otbr, M_DEVBUF);
427         else {
428                 if (tbr_timer == 0) {
429                         CALLOUT_RESET(&tbr_callout, 1, tbr_timeout, (void *)0);
430                         tbr_timer = 1;
431                 }
432         }
433         IFQ_UNLOCK(ifq);
434         return (0);
435 }
436
437 /*
438  * tbr_timeout goes through the interface list, and kicks the drivers
439  * if necessary.
440  *
441  * MPSAFE
442  */
443 static void
444 tbr_timeout(arg)
445         void *arg;
446 {
447 #ifdef __FreeBSD__
448         VNET_ITERATOR_DECL(vnet_iter);
449 #endif
450         struct ifnet *ifp;
451         int active, s;
452
453         active = 0;
454 #ifdef __NetBSD__
455         s = splnet();
456 #else
457         s = splimp();
458 #endif
459 #ifdef __FreeBSD__
460         IFNET_RLOCK_NOSLEEP();
461         VNET_LIST_RLOCK_NOSLEEP();
462         VNET_FOREACH(vnet_iter) {
463                 CURVNET_SET(vnet_iter);
464 #endif
465                 for (ifp = TAILQ_FIRST(&V_ifnet); ifp;
466                     ifp = TAILQ_NEXT(ifp, if_list)) {
467                         /* read from if_snd unlocked */
468                         if (!TBR_IS_ENABLED(&ifp->if_snd))
469                                 continue;
470                         active++;
471                         if (!IFQ_IS_EMPTY(&ifp->if_snd) &&
472                             ifp->if_start != NULL)
473                                 (*ifp->if_start)(ifp);
474                 }
475 #ifdef __FreeBSD__
476                 CURVNET_RESTORE();
477         }
478         VNET_LIST_RUNLOCK_NOSLEEP();
479         IFNET_RUNLOCK_NOSLEEP();
480 #endif
481         splx(s);
482         if (active > 0)
483                 CALLOUT_RESET(&tbr_callout, 1, tbr_timeout, (void *)0);
484         else
485                 tbr_timer = 0;  /* don't need tbr_timer anymore */
486 }
487
488 /*
489  * get token bucket regulator profile
490  */
491 int
492 tbr_get(ifq, profile)
493         struct ifaltq *ifq;
494         struct tb_profile *profile;
495 {
496         struct tb_regulator *tbr;
497
498         IFQ_LOCK(ifq);
499         if ((tbr = ifq->altq_tbr) == NULL) {
500                 profile->rate = 0;
501                 profile->depth = 0;
502         } else {
503                 profile->rate =
504                     (u_int)TBR_UNSCALE(tbr->tbr_rate * 8 * machclk_freq);
505                 profile->depth = (u_int)TBR_UNSCALE(tbr->tbr_depth);
506         }
507         IFQ_UNLOCK(ifq);
508         return (0);
509 }
510
511 /*
512  * attach a discipline to the interface.  if one already exists, it is
513  * overridden.
514  * Locking is done in the discipline specific attach functions. Basically
515  * they call back to altq_attach which takes care of the attach and locking.
516  */
517 int
518 altq_pfattach(struct pf_altq *a)
519 {
520         int error = 0;
521
522         switch (a->scheduler) {
523         case ALTQT_NONE:
524                 break;
525 #ifdef ALTQ_CBQ
526         case ALTQT_CBQ:
527                 error = cbq_pfattach(a);
528                 break;
529 #endif
530 #ifdef ALTQ_PRIQ
531         case ALTQT_PRIQ:
532                 error = priq_pfattach(a);
533                 break;
534 #endif
535 #ifdef ALTQ_HFSC
536         case ALTQT_HFSC:
537                 error = hfsc_pfattach(a);
538                 break;
539 #endif
540 #ifdef ALTQ_FAIRQ
541         case ALTQT_FAIRQ:
542                 error = fairq_pfattach(a);
543                 break;
544 #endif
545 #ifdef ALTQ_CODEL
546         case ALTQT_CODEL:
547                 error = codel_pfattach(a);
548                 break;
549 #endif
550         default:
551                 error = ENXIO;
552         }
553
554         return (error);
555 }
556
557 /*
558  * detach a discipline from the interface.
559  * it is possible that the discipline was already overridden by another
560  * discipline.
561  */
562 int
563 altq_pfdetach(struct pf_altq *a)
564 {
565         struct ifnet *ifp;
566         int s, error = 0;
567
568         if ((ifp = ifunit(a->ifname)) == NULL)
569                 return (EINVAL);
570
571         /* if this discipline is no longer referenced, just return */
572         /* read unlocked from if_snd */
573         if (a->altq_disc == NULL || a->altq_disc != ifp->if_snd.altq_disc)
574                 return (0);
575
576 #ifdef __NetBSD__
577         s = splnet();
578 #else
579         s = splimp();
580 #endif
581         /* read unlocked from if_snd, _disable and _detach take care */
582         if (ALTQ_IS_ENABLED(&ifp->if_snd))
583                 error = altq_disable(&ifp->if_snd);
584         if (error == 0)
585                 error = altq_detach(&ifp->if_snd);
586         splx(s);
587
588         return (error);
589 }
590
591 /*
592  * add a discipline or a queue
593  * Locking is done in the discipline specific functions with regards to
594  * malloc with WAITOK, also it is not yet clear which lock to use.
595  */
596 int
597 altq_add(struct pf_altq *a)
598 {
599         int error = 0;
600
601         if (a->qname[0] != 0)
602                 return (altq_add_queue(a));
603
604         if (machclk_freq == 0)
605                 init_machclk();
606         if (machclk_freq == 0)
607                 panic("altq_add: no cpu clock");
608
609         switch (a->scheduler) {
610 #ifdef ALTQ_CBQ
611         case ALTQT_CBQ:
612                 error = cbq_add_altq(a);
613                 break;
614 #endif
615 #ifdef ALTQ_PRIQ
616         case ALTQT_PRIQ:
617                 error = priq_add_altq(a);
618                 break;
619 #endif
620 #ifdef ALTQ_HFSC
621         case ALTQT_HFSC:
622                 error = hfsc_add_altq(a);
623                 break;
624 #endif
625 #ifdef ALTQ_FAIRQ
626         case ALTQT_FAIRQ:
627                 error = fairq_add_altq(a);
628                 break;
629 #endif
630 #ifdef ALTQ_CODEL
631         case ALTQT_CODEL:
632                 error = codel_add_altq(a);
633                 break;
634 #endif
635         default:
636                 error = ENXIO;
637         }
638
639         return (error);
640 }
641
642 /*
643  * remove a discipline or a queue
644  * It is yet unclear what lock to use to protect this operation, the
645  * discipline specific functions will determine and grab it
646  */
647 int
648 altq_remove(struct pf_altq *a)
649 {
650         int error = 0;
651
652         if (a->qname[0] != 0)
653                 return (altq_remove_queue(a));
654
655         switch (a->scheduler) {
656 #ifdef ALTQ_CBQ
657         case ALTQT_CBQ:
658                 error = cbq_remove_altq(a);
659                 break;
660 #endif
661 #ifdef ALTQ_PRIQ
662         case ALTQT_PRIQ:
663                 error = priq_remove_altq(a);
664                 break;
665 #endif
666 #ifdef ALTQ_HFSC
667         case ALTQT_HFSC:
668                 error = hfsc_remove_altq(a);
669                 break;
670 #endif
671 #ifdef ALTQ_FAIRQ
672         case ALTQT_FAIRQ:
673                 error = fairq_remove_altq(a);
674                 break;
675 #endif
676 #ifdef ALTQ_CODEL
677         case ALTQT_CODEL:
678                 error = codel_remove_altq(a);
679                 break;
680 #endif
681         default:
682                 error = ENXIO;
683         }
684
685         return (error);
686 }
687
688 /*
689  * add a queue to the discipline
690  * It is yet unclear what lock to use to protect this operation, the
691  * discipline specific functions will determine and grab it
692  */
693 int
694 altq_add_queue(struct pf_altq *a)
695 {
696         int error = 0;
697
698         switch (a->scheduler) {
699 #ifdef ALTQ_CBQ
700         case ALTQT_CBQ:
701                 error = cbq_add_queue(a);
702                 break;
703 #endif
704 #ifdef ALTQ_PRIQ
705         case ALTQT_PRIQ:
706                 error = priq_add_queue(a);
707                 break;
708 #endif
709 #ifdef ALTQ_HFSC
710         case ALTQT_HFSC:
711                 error = hfsc_add_queue(a);
712                 break;
713 #endif
714 #ifdef ALTQ_FAIRQ
715         case ALTQT_FAIRQ:
716                 error = fairq_add_queue(a);
717                 break;
718 #endif
719         default:
720                 error = ENXIO;
721         }
722
723         return (error);
724 }
725
726 /*
727  * remove a queue from the discipline
728  * It is yet unclear what lock to use to protect this operation, the
729  * discipline specific functions will determine and grab it
730  */
731 int
732 altq_remove_queue(struct pf_altq *a)
733 {
734         int error = 0;
735
736         switch (a->scheduler) {
737 #ifdef ALTQ_CBQ
738         case ALTQT_CBQ:
739                 error = cbq_remove_queue(a);
740                 break;
741 #endif
742 #ifdef ALTQ_PRIQ
743         case ALTQT_PRIQ:
744                 error = priq_remove_queue(a);
745                 break;
746 #endif
747 #ifdef ALTQ_HFSC
748         case ALTQT_HFSC:
749                 error = hfsc_remove_queue(a);
750                 break;
751 #endif
752 #ifdef ALTQ_FAIRQ
753         case ALTQT_FAIRQ:
754                 error = fairq_remove_queue(a);
755                 break;
756 #endif
757         default:
758                 error = ENXIO;
759         }
760
761         return (error);
762 }
763
764 /*
765  * get queue statistics
766  * Locking is done in the discipline specific functions with regards to
767  * copyout operations, also it is not yet clear which lock to use.
768  */
769 int
770 altq_getqstats(struct pf_altq *a, void *ubuf, int *nbytes)
771 {
772         int error = 0;
773
774         switch (a->scheduler) {
775 #ifdef ALTQ_CBQ
776         case ALTQT_CBQ:
777                 error = cbq_getqstats(a, ubuf, nbytes);
778                 break;
779 #endif
780 #ifdef ALTQ_PRIQ
781         case ALTQT_PRIQ:
782                 error = priq_getqstats(a, ubuf, nbytes);
783                 break;
784 #endif
785 #ifdef ALTQ_HFSC
786         case ALTQT_HFSC:
787                 error = hfsc_getqstats(a, ubuf, nbytes);
788                 break;
789 #endif
790 #ifdef ALTQ_FAIRQ
791         case ALTQT_FAIRQ:
792                 error = fairq_getqstats(a, ubuf, nbytes);
793                 break;
794 #endif
795 #ifdef ALTQ_CODEL
796         case ALTQT_CODEL:
797                 error = codel_getqstats(a, ubuf, nbytes);
798                 break;
799 #endif
800         default:
801                 error = ENXIO;
802         }
803
804         return (error);
805 }
806
807 /*
808  * read and write diffserv field in IPv4 or IPv6 header
809  */
810 u_int8_t
811 read_dsfield(m, pktattr)
812         struct mbuf *m;
813         struct altq_pktattr *pktattr;
814 {
815         struct mbuf *m0;
816         u_int8_t ds_field = 0;
817
818         if (pktattr == NULL ||
819             (pktattr->pattr_af != AF_INET && pktattr->pattr_af != AF_INET6))
820                 return ((u_int8_t)0);
821
822         /* verify that pattr_hdr is within the mbuf data */
823         for (m0 = m; m0 != NULL; m0 = m0->m_next)
824                 if ((pktattr->pattr_hdr >= m0->m_data) &&
825                     (pktattr->pattr_hdr < m0->m_data + m0->m_len))
826                         break;
827         if (m0 == NULL) {
828                 /* ick, pattr_hdr is stale */
829                 pktattr->pattr_af = AF_UNSPEC;
830 #ifdef ALTQ_DEBUG
831                 printf("read_dsfield: can't locate header!\n");
832 #endif
833                 return ((u_int8_t)0);
834         }
835
836         if (pktattr->pattr_af == AF_INET) {
837                 struct ip *ip = (struct ip *)pktattr->pattr_hdr;
838
839                 if (ip->ip_v != 4)
840                         return ((u_int8_t)0);   /* version mismatch! */
841                 ds_field = ip->ip_tos;
842         }
843 #ifdef INET6
844         else if (pktattr->pattr_af == AF_INET6) {
845                 struct ip6_hdr *ip6 = (struct ip6_hdr *)pktattr->pattr_hdr;
846                 u_int32_t flowlabel;
847
848                 flowlabel = ntohl(ip6->ip6_flow);
849                 if ((flowlabel >> 28) != 6)
850                         return ((u_int8_t)0);   /* version mismatch! */
851                 ds_field = (flowlabel >> 20) & 0xff;
852         }
853 #endif
854         return (ds_field);
855 }
856
857 void
858 write_dsfield(struct mbuf *m, struct altq_pktattr *pktattr, u_int8_t dsfield)
859 {
860         struct mbuf *m0;
861
862         if (pktattr == NULL ||
863             (pktattr->pattr_af != AF_INET && pktattr->pattr_af != AF_INET6))
864                 return;
865
866         /* verify that pattr_hdr is within the mbuf data */
867         for (m0 = m; m0 != NULL; m0 = m0->m_next)
868                 if ((pktattr->pattr_hdr >= m0->m_data) &&
869                     (pktattr->pattr_hdr < m0->m_data + m0->m_len))
870                         break;
871         if (m0 == NULL) {
872                 /* ick, pattr_hdr is stale */
873                 pktattr->pattr_af = AF_UNSPEC;
874 #ifdef ALTQ_DEBUG
875                 printf("write_dsfield: can't locate header!\n");
876 #endif
877                 return;
878         }
879
880         if (pktattr->pattr_af == AF_INET) {
881                 struct ip *ip = (struct ip *)pktattr->pattr_hdr;
882                 u_int8_t old;
883                 int32_t sum;
884
885                 if (ip->ip_v != 4)
886                         return;         /* version mismatch! */
887                 old = ip->ip_tos;
888                 dsfield |= old & 3;     /* leave CU bits */
889                 if (old == dsfield)
890                         return;
891                 ip->ip_tos = dsfield;
892                 /*
893                  * update checksum (from RFC1624)
894                  *         HC' = ~(~HC + ~m + m')
895                  */
896                 sum = ~ntohs(ip->ip_sum) & 0xffff;
897                 sum += 0xff00 + (~old & 0xff) + dsfield;
898                 sum = (sum >> 16) + (sum & 0xffff);
899                 sum += (sum >> 16);  /* add carry */
900
901                 ip->ip_sum = htons(~sum & 0xffff);
902         }
903 #ifdef INET6
904         else if (pktattr->pattr_af == AF_INET6) {
905                 struct ip6_hdr *ip6 = (struct ip6_hdr *)pktattr->pattr_hdr;
906                 u_int32_t flowlabel;
907
908                 flowlabel = ntohl(ip6->ip6_flow);
909                 if ((flowlabel >> 28) != 6)
910                         return;         /* version mismatch! */
911                 flowlabel = (flowlabel & 0xf03fffff) | (dsfield << 20);
912                 ip6->ip6_flow = htonl(flowlabel);
913         }
914 #endif
915         return;
916 }
917
918
919 /*
920  * high resolution clock support taking advantage of a machine dependent
921  * high resolution time counter (e.g., timestamp counter of intel pentium).
922  * we assume
923  *  - 64-bit-long monotonically-increasing counter
924  *  - frequency range is 100M-4GHz (CPU speed)
925  */
926 /* if pcc is not available or disabled, emulate 256MHz using microtime() */
927 #define MACHCLK_SHIFT   8
928
929 int machclk_usepcc;
930 u_int32_t machclk_freq;
931 u_int32_t machclk_per_tick;
932
933 #if defined(__i386__) && defined(__NetBSD__)
934 extern u_int64_t cpu_tsc_freq;
935 #endif
936
937 #if (__FreeBSD_version >= 700035)
938 /* Update TSC freq with the value indicated by the caller. */
939 static void
940 tsc_freq_changed(void *arg, const struct cf_level *level, int status)
941 {
942         /* If there was an error during the transition, don't do anything. */
943         if (status != 0)
944                 return;
945
946 #if (__FreeBSD_version >= 701102) && (defined(__amd64__) || defined(__i386__))
947         /* If TSC is P-state invariant, don't do anything. */
948         if (tsc_is_invariant)
949                 return;
950 #endif
951
952         /* Total setting for this level gives the new frequency in MHz. */
953         init_machclk();
954 }
955 EVENTHANDLER_DEFINE(cpufreq_post_change, tsc_freq_changed, NULL,
956     EVENTHANDLER_PRI_LAST);
957 #endif /* __FreeBSD_version >= 700035 */
958
959 static void
960 init_machclk_setup(void)
961 {
962 #if (__FreeBSD_version >= 600000)
963         callout_init(&tbr_callout, 0);
964 #endif
965
966         machclk_usepcc = 1;
967
968 #if (!defined(__amd64__) && !defined(__i386__)) || defined(ALTQ_NOPCC)
969         machclk_usepcc = 0;
970 #endif
971 #if defined(__FreeBSD__) && defined(SMP)
972         machclk_usepcc = 0;
973 #endif
974 #if defined(__NetBSD__) && defined(MULTIPROCESSOR)
975         machclk_usepcc = 0;
976 #endif
977 #if defined(__amd64__) || defined(__i386__)
978         /* check if TSC is available */
979 #ifdef __FreeBSD__
980         if ((cpu_feature & CPUID_TSC) == 0 ||
981             atomic_load_acq_64(&tsc_freq) == 0)
982 #else
983         if ((cpu_feature & CPUID_TSC) == 0)
984 #endif
985                 machclk_usepcc = 0;
986 #endif
987 }
988
989 void
990 init_machclk(void)
991 {
992         static int called;
993
994         /* Call one-time initialization function. */
995         if (!called) {
996                 init_machclk_setup();
997                 called = 1;
998         }
999
1000         if (machclk_usepcc == 0) {
1001                 /* emulate 256MHz using microtime() */
1002                 machclk_freq = 1000000 << MACHCLK_SHIFT;
1003                 machclk_per_tick = machclk_freq / hz;
1004 #ifdef ALTQ_DEBUG
1005                 printf("altq: emulate %uHz cpu clock\n", machclk_freq);
1006 #endif
1007                 return;
1008         }
1009
1010         /*
1011          * if the clock frequency (of Pentium TSC or Alpha PCC) is
1012          * accessible, just use it.
1013          */
1014 #if defined(__amd64__) || defined(__i386__)
1015 #ifdef __FreeBSD__
1016         machclk_freq = atomic_load_acq_64(&tsc_freq);
1017 #elif defined(__NetBSD__)
1018         machclk_freq = (u_int32_t)cpu_tsc_freq;
1019 #elif defined(__OpenBSD__) && (defined(I586_CPU) || defined(I686_CPU))
1020         machclk_freq = pentium_mhz * 1000000;
1021 #endif
1022 #endif
1023
1024         /*
1025          * if we don't know the clock frequency, measure it.
1026          */
1027         if (machclk_freq == 0) {
1028                 static int      wait;
1029                 struct timeval  tv_start, tv_end;
1030                 u_int64_t       start, end, diff;
1031                 int             timo;
1032
1033                 microtime(&tv_start);
1034                 start = read_machclk();
1035                 timo = hz;      /* 1 sec */
1036                 (void)tsleep(&wait, PWAIT | PCATCH, "init_machclk", timo);
1037                 microtime(&tv_end);
1038                 end = read_machclk();
1039                 diff = (u_int64_t)(tv_end.tv_sec - tv_start.tv_sec) * 1000000
1040                     + tv_end.tv_usec - tv_start.tv_usec;
1041                 if (diff != 0)
1042                         machclk_freq = (u_int)((end - start) * 1000000 / diff);
1043         }
1044
1045         machclk_per_tick = machclk_freq / hz;
1046
1047 #ifdef ALTQ_DEBUG
1048         printf("altq: CPU clock: %uHz\n", machclk_freq);
1049 #endif
1050 }
1051
1052 #if defined(__OpenBSD__) && defined(__i386__)
1053 static __inline u_int64_t
1054 rdtsc(void)
1055 {
1056         u_int64_t rv;
1057         __asm __volatile(".byte 0x0f, 0x31" : "=A" (rv));
1058         return (rv);
1059 }
1060 #endif /* __OpenBSD__ && __i386__ */
1061
1062 u_int64_t
1063 read_machclk(void)
1064 {
1065         u_int64_t val;
1066
1067         if (machclk_usepcc) {
1068 #if defined(__amd64__) || defined(__i386__)
1069                 val = rdtsc();
1070 #else
1071                 panic("read_machclk");
1072 #endif
1073         } else {
1074                 struct timeval tv;
1075
1076                 microtime(&tv);
1077                 val = (((u_int64_t)(tv.tv_sec - boottime.tv_sec) * 1000000
1078                     + tv.tv_usec) << MACHCLK_SHIFT);
1079         }
1080         return (val);
1081 }
1082
1083 #ifdef ALTQ3_CLFIER_COMPAT
1084
1085 #ifndef IPPROTO_ESP
1086 #define IPPROTO_ESP     50              /* encapsulating security payload */
1087 #endif
1088 #ifndef IPPROTO_AH
1089 #define IPPROTO_AH      51              /* authentication header */
1090 #endif
1091
1092 /*
1093  * extract flow information from a given packet.
1094  * filt_mask shows flowinfo fields required.
1095  * we assume the ip header is in one mbuf, and addresses and ports are
1096  * in network byte order.
1097  */
1098 int
1099 altq_extractflow(m, af, flow, filt_bmask)
1100         struct mbuf *m;
1101         int af;
1102         struct flowinfo *flow;
1103         u_int32_t       filt_bmask;
1104 {
1105
1106         switch (af) {
1107         case PF_INET: {
1108                 struct flowinfo_in *fin;
1109                 struct ip *ip;
1110
1111                 ip = mtod(m, struct ip *);
1112
1113                 if (ip->ip_v != 4)
1114                         break;
1115
1116                 fin = (struct flowinfo_in *)flow;
1117                 fin->fi_len = sizeof(struct flowinfo_in);
1118                 fin->fi_family = AF_INET;
1119
1120                 fin->fi_proto = ip->ip_p;
1121                 fin->fi_tos = ip->ip_tos;
1122
1123                 fin->fi_src.s_addr = ip->ip_src.s_addr;
1124                 fin->fi_dst.s_addr = ip->ip_dst.s_addr;
1125
1126                 if (filt_bmask & FIMB4_PORTS)
1127                         /* if port info is required, extract port numbers */
1128                         extract_ports4(m, ip, fin);
1129                 else {
1130                         fin->fi_sport = 0;
1131                         fin->fi_dport = 0;
1132                         fin->fi_gpi = 0;
1133                 }
1134                 return (1);
1135         }
1136
1137 #ifdef INET6
1138         case PF_INET6: {
1139                 struct flowinfo_in6 *fin6;
1140                 struct ip6_hdr *ip6;
1141
1142                 ip6 = mtod(m, struct ip6_hdr *);
1143                 /* should we check the ip version? */
1144
1145                 fin6 = (struct flowinfo_in6 *)flow;
1146                 fin6->fi6_len = sizeof(struct flowinfo_in6);
1147                 fin6->fi6_family = AF_INET6;
1148
1149                 fin6->fi6_proto = ip6->ip6_nxt;
1150                 fin6->fi6_tclass   = (ntohl(ip6->ip6_flow) >> 20) & 0xff;
1151
1152                 fin6->fi6_flowlabel = ip6->ip6_flow & htonl(0x000fffff);
1153                 fin6->fi6_src = ip6->ip6_src;
1154                 fin6->fi6_dst = ip6->ip6_dst;
1155
1156                 if ((filt_bmask & FIMB6_PORTS) ||
1157                     ((filt_bmask & FIMB6_PROTO)
1158                      && ip6->ip6_nxt > IPPROTO_IPV6))
1159                         /*
1160                          * if port info is required, or proto is required
1161                          * but there are option headers, extract port
1162                          * and protocol numbers.
1163                          */
1164                         extract_ports6(m, ip6, fin6);
1165                 else {
1166                         fin6->fi6_sport = 0;
1167                         fin6->fi6_dport = 0;
1168                         fin6->fi6_gpi = 0;
1169                 }
1170                 return (1);
1171         }
1172 #endif /* INET6 */
1173
1174         default:
1175                 break;
1176         }
1177
1178         /* failed */
1179         flow->fi_len = sizeof(struct flowinfo);
1180         flow->fi_family = AF_UNSPEC;
1181         return (0);
1182 }
1183
1184 /*
1185  * helper routine to extract port numbers
1186  */
1187 /* structure for ipsec and ipv6 option header template */
1188 struct _opt6 {
1189         u_int8_t        opt6_nxt;       /* next header */
1190         u_int8_t        opt6_hlen;      /* header extension length */
1191         u_int16_t       _pad;
1192         u_int32_t       ah_spi;         /* security parameter index
1193                                            for authentication header */
1194 };
1195
1196 /*
1197  * extract port numbers from a ipv4 packet.
1198  */
1199 static int
1200 extract_ports4(m, ip, fin)
1201         struct mbuf *m;
1202         struct ip *ip;
1203         struct flowinfo_in *fin;
1204 {
1205         struct mbuf *m0;
1206         u_short ip_off;
1207         u_int8_t proto;
1208         int     off;
1209
1210         fin->fi_sport = 0;
1211         fin->fi_dport = 0;
1212         fin->fi_gpi = 0;
1213
1214         ip_off = ntohs(ip->ip_off);
1215         /* if it is a fragment, try cached fragment info */
1216         if (ip_off & IP_OFFMASK) {
1217                 ip4f_lookup(ip, fin);
1218                 return (1);
1219         }
1220
1221         /* locate the mbuf containing the protocol header */
1222         for (m0 = m; m0 != NULL; m0 = m0->m_next)
1223                 if (((caddr_t)ip >= m0->m_data) &&
1224                     ((caddr_t)ip < m0->m_data + m0->m_len))
1225                         break;
1226         if (m0 == NULL) {
1227 #ifdef ALTQ_DEBUG
1228                 printf("extract_ports4: can't locate header! ip=%p\n", ip);
1229 #endif
1230                 return (0);
1231         }
1232         off = ((caddr_t)ip - m0->m_data) + (ip->ip_hl << 2);
1233         proto = ip->ip_p;
1234
1235 #ifdef ALTQ_IPSEC
1236  again:
1237 #endif
1238         while (off >= m0->m_len) {
1239                 off -= m0->m_len;
1240                 m0 = m0->m_next;
1241                 if (m0 == NULL)
1242                         return (0);  /* bogus ip_hl! */
1243         }
1244         if (m0->m_len < off + 4)
1245                 return (0);
1246
1247         switch (proto) {
1248         case IPPROTO_TCP:
1249         case IPPROTO_UDP: {
1250                 struct udphdr *udp;
1251
1252                 udp = (struct udphdr *)(mtod(m0, caddr_t) + off);
1253                 fin->fi_sport = udp->uh_sport;
1254                 fin->fi_dport = udp->uh_dport;
1255                 fin->fi_proto = proto;
1256                 }
1257                 break;
1258
1259 #ifdef ALTQ_IPSEC
1260         case IPPROTO_ESP:
1261                 if (fin->fi_gpi == 0){
1262                         u_int32_t *gpi;
1263
1264                         gpi = (u_int32_t *)(mtod(m0, caddr_t) + off);
1265                         fin->fi_gpi   = *gpi;
1266                 }
1267                 fin->fi_proto = proto;
1268                 break;
1269
1270         case IPPROTO_AH: {
1271                         /* get next header and header length */
1272                         struct _opt6 *opt6;
1273
1274                         opt6 = (struct _opt6 *)(mtod(m0, caddr_t) + off);
1275                         proto = opt6->opt6_nxt;
1276                         off += 8 + (opt6->opt6_hlen * 4);
1277                         if (fin->fi_gpi == 0 && m0->m_len >= off + 8)
1278                                 fin->fi_gpi = opt6->ah_spi;
1279                 }
1280                 /* goto the next header */
1281                 goto again;
1282 #endif  /* ALTQ_IPSEC */
1283
1284         default:
1285                 fin->fi_proto = proto;
1286                 return (0);
1287         }
1288
1289         /* if this is a first fragment, cache it. */
1290         if (ip_off & IP_MF)
1291                 ip4f_cache(ip, fin);
1292
1293         return (1);
1294 }
1295
1296 #ifdef INET6
1297 static int
1298 extract_ports6(m, ip6, fin6)
1299         struct mbuf *m;
1300         struct ip6_hdr *ip6;
1301         struct flowinfo_in6 *fin6;
1302 {
1303         struct mbuf *m0;
1304         int     off;
1305         u_int8_t proto;
1306
1307         fin6->fi6_gpi   = 0;
1308         fin6->fi6_sport = 0;
1309         fin6->fi6_dport = 0;
1310
1311         /* locate the mbuf containing the protocol header */
1312         for (m0 = m; m0 != NULL; m0 = m0->m_next)
1313                 if (((caddr_t)ip6 >= m0->m_data) &&
1314                     ((caddr_t)ip6 < m0->m_data + m0->m_len))
1315                         break;
1316         if (m0 == NULL) {
1317 #ifdef ALTQ_DEBUG
1318                 printf("extract_ports6: can't locate header! ip6=%p\n", ip6);
1319 #endif
1320                 return (0);
1321         }
1322         off = ((caddr_t)ip6 - m0->m_data) + sizeof(struct ip6_hdr);
1323
1324         proto = ip6->ip6_nxt;
1325         do {
1326                 while (off >= m0->m_len) {
1327                         off -= m0->m_len;
1328                         m0 = m0->m_next;
1329                         if (m0 == NULL)
1330                                 return (0);
1331                 }
1332                 if (m0->m_len < off + 4)
1333                         return (0);
1334
1335                 switch (proto) {
1336                 case IPPROTO_TCP:
1337                 case IPPROTO_UDP: {
1338                         struct udphdr *udp;
1339
1340                         udp = (struct udphdr *)(mtod(m0, caddr_t) + off);
1341                         fin6->fi6_sport = udp->uh_sport;
1342                         fin6->fi6_dport = udp->uh_dport;
1343                         fin6->fi6_proto = proto;
1344                         }
1345                         return (1);
1346
1347                 case IPPROTO_ESP:
1348                         if (fin6->fi6_gpi == 0) {
1349                                 u_int32_t *gpi;
1350
1351                                 gpi = (u_int32_t *)(mtod(m0, caddr_t) + off);
1352                                 fin6->fi6_gpi   = *gpi;
1353                         }
1354                         fin6->fi6_proto = proto;
1355                         return (1);
1356
1357                 case IPPROTO_AH: {
1358                         /* get next header and header length */
1359                         struct _opt6 *opt6;
1360
1361                         opt6 = (struct _opt6 *)(mtod(m0, caddr_t) + off);
1362                         if (fin6->fi6_gpi == 0 && m0->m_len >= off + 8)
1363                                 fin6->fi6_gpi = opt6->ah_spi;
1364                         proto = opt6->opt6_nxt;
1365                         off += 8 + (opt6->opt6_hlen * 4);
1366                         /* goto the next header */
1367                         break;
1368                         }
1369
1370                 case IPPROTO_HOPOPTS:
1371                 case IPPROTO_ROUTING:
1372                 case IPPROTO_DSTOPTS: {
1373                         /* get next header and header length */
1374                         struct _opt6 *opt6;
1375
1376                         opt6 = (struct _opt6 *)(mtod(m0, caddr_t) + off);
1377                         proto = opt6->opt6_nxt;
1378                         off += (opt6->opt6_hlen + 1) * 8;
1379                         /* goto the next header */
1380                         break;
1381                         }
1382
1383                 case IPPROTO_FRAGMENT:
1384                         /* ipv6 fragmentations are not supported yet */
1385                 default:
1386                         fin6->fi6_proto = proto;
1387                         return (0);
1388                 }
1389         } while (1);
1390         /*NOTREACHED*/
1391 }
1392 #endif /* INET6 */
1393
1394 /*
1395  * altq common classifier
1396  */
1397 int
1398 acc_add_filter(classifier, filter, class, phandle)
1399         struct acc_classifier *classifier;
1400         struct flow_filter *filter;
1401         void    *class;
1402         u_long  *phandle;
1403 {
1404         struct acc_filter *afp, *prev, *tmp;
1405         int     i, s;
1406
1407 #ifdef INET6
1408         if (filter->ff_flow.fi_family != AF_INET &&
1409             filter->ff_flow.fi_family != AF_INET6)
1410                 return (EINVAL);
1411 #else
1412         if (filter->ff_flow.fi_family != AF_INET)
1413                 return (EINVAL);
1414 #endif
1415
1416         afp = malloc(sizeof(struct acc_filter),
1417                M_DEVBUF, M_WAITOK);
1418         if (afp == NULL)
1419                 return (ENOMEM);
1420         bzero(afp, sizeof(struct acc_filter));
1421
1422         afp->f_filter = *filter;
1423         afp->f_class = class;
1424
1425         i = ACC_WILDCARD_INDEX;
1426         if (filter->ff_flow.fi_family == AF_INET) {
1427                 struct flow_filter *filter4 = &afp->f_filter;
1428
1429                 /*
1430                  * if address is 0, it's a wildcard.  if address mask
1431                  * isn't set, use full mask.
1432                  */
1433                 if (filter4->ff_flow.fi_dst.s_addr == 0)
1434                         filter4->ff_mask.mask_dst.s_addr = 0;
1435                 else if (filter4->ff_mask.mask_dst.s_addr == 0)
1436                         filter4->ff_mask.mask_dst.s_addr = 0xffffffff;
1437                 if (filter4->ff_flow.fi_src.s_addr == 0)
1438                         filter4->ff_mask.mask_src.s_addr = 0;
1439                 else if (filter4->ff_mask.mask_src.s_addr == 0)
1440                         filter4->ff_mask.mask_src.s_addr = 0xffffffff;
1441
1442                 /* clear extra bits in addresses  */
1443                    filter4->ff_flow.fi_dst.s_addr &=
1444                        filter4->ff_mask.mask_dst.s_addr;
1445                    filter4->ff_flow.fi_src.s_addr &=
1446                        filter4->ff_mask.mask_src.s_addr;
1447
1448                 /*
1449                  * if dst address is a wildcard, use hash-entry
1450                  * ACC_WILDCARD_INDEX.
1451                  */
1452                 if (filter4->ff_mask.mask_dst.s_addr != 0xffffffff)
1453                         i = ACC_WILDCARD_INDEX;
1454                 else
1455                         i = ACC_GET_HASH_INDEX(filter4->ff_flow.fi_dst.s_addr);
1456         }
1457 #ifdef INET6
1458         else if (filter->ff_flow.fi_family == AF_INET6) {
1459                 struct flow_filter6 *filter6 =
1460                         (struct flow_filter6 *)&afp->f_filter;
1461 #ifndef IN6MASK0 /* taken from kame ipv6 */
1462 #define IN6MASK0        {{{ 0, 0, 0, 0 }}}
1463 #define IN6MASK128      {{{ 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff }}}
1464                 const struct in6_addr in6mask0 = IN6MASK0;
1465                 const struct in6_addr in6mask128 = IN6MASK128;
1466 #endif
1467
1468                 if (IN6_IS_ADDR_UNSPECIFIED(&filter6->ff_flow6.fi6_dst))
1469                         filter6->ff_mask6.mask6_dst = in6mask0;
1470                 else if (IN6_IS_ADDR_UNSPECIFIED(&filter6->ff_mask6.mask6_dst))
1471                         filter6->ff_mask6.mask6_dst = in6mask128;
1472                 if (IN6_IS_ADDR_UNSPECIFIED(&filter6->ff_flow6.fi6_src))
1473                         filter6->ff_mask6.mask6_src = in6mask0;
1474                 else if (IN6_IS_ADDR_UNSPECIFIED(&filter6->ff_mask6.mask6_src))
1475                         filter6->ff_mask6.mask6_src = in6mask128;
1476
1477                 /* clear extra bits in addresses  */
1478                 for (i = 0; i < 16; i++)
1479                         filter6->ff_flow6.fi6_dst.s6_addr[i] &=
1480                             filter6->ff_mask6.mask6_dst.s6_addr[i];
1481                 for (i = 0; i < 16; i++)
1482                         filter6->ff_flow6.fi6_src.s6_addr[i] &=
1483                             filter6->ff_mask6.mask6_src.s6_addr[i];
1484
1485                 if (filter6->ff_flow6.fi6_flowlabel == 0)
1486                         i = ACC_WILDCARD_INDEX;
1487                 else
1488                         i = ACC_GET_HASH_INDEX(filter6->ff_flow6.fi6_flowlabel);
1489         }
1490 #endif /* INET6 */
1491
1492         afp->f_handle = get_filt_handle(classifier, i);
1493
1494         /* update filter bitmask */
1495         afp->f_fbmask = filt2fibmask(filter);
1496         classifier->acc_fbmask |= afp->f_fbmask;
1497
1498         /*
1499          * add this filter to the filter list.
1500          * filters are ordered from the highest rule number.
1501          */
1502 #ifdef __NetBSD__
1503         s = splnet();
1504 #else
1505         s = splimp();
1506 #endif
1507         prev = NULL;
1508         LIST_FOREACH(tmp, &classifier->acc_filters[i], f_chain) {
1509                 if (tmp->f_filter.ff_ruleno > afp->f_filter.ff_ruleno)
1510                         prev = tmp;
1511                 else
1512                         break;
1513         }
1514         if (prev == NULL)
1515                 LIST_INSERT_HEAD(&classifier->acc_filters[i], afp, f_chain);
1516         else
1517                 LIST_INSERT_AFTER(prev, afp, f_chain);
1518         splx(s);
1519
1520         *phandle = afp->f_handle;
1521         return (0);
1522 }
1523
1524 int
1525 acc_delete_filter(classifier, handle)
1526         struct acc_classifier *classifier;
1527         u_long handle;
1528 {
1529         struct acc_filter *afp;
1530         int     s;
1531
1532         if ((afp = filth_to_filtp(classifier, handle)) == NULL)
1533                 return (EINVAL);
1534
1535 #ifdef __NetBSD__
1536         s = splnet();
1537 #else
1538         s = splimp();
1539 #endif
1540         LIST_REMOVE(afp, f_chain);
1541         splx(s);
1542
1543         free(afp, M_DEVBUF);
1544
1545         /* todo: update filt_bmask */
1546
1547         return (0);
1548 }
1549
1550 /*
1551  * delete filters referencing to the specified class.
1552  * if the all flag is not 0, delete all the filters.
1553  */
1554 int
1555 acc_discard_filters(classifier, class, all)
1556         struct acc_classifier *classifier;
1557         void    *class;
1558         int     all;
1559 {
1560         struct acc_filter *afp;
1561         int     i, s;
1562
1563 #ifdef __NetBSD__
1564         s = splnet();
1565 #else
1566         s = splimp();
1567 #endif
1568         for (i = 0; i < ACC_FILTER_TABLESIZE; i++) {
1569                 do {
1570                         LIST_FOREACH(afp, &classifier->acc_filters[i], f_chain)
1571                                 if (all || afp->f_class == class) {
1572                                         LIST_REMOVE(afp, f_chain);
1573                                         free(afp, M_DEVBUF);
1574                                         /* start again from the head */
1575                                         break;
1576                                 }
1577                 } while (afp != NULL);
1578         }
1579         splx(s);
1580
1581         if (all)
1582                 classifier->acc_fbmask = 0;
1583
1584         return (0);
1585 }
1586
1587 void *
1588 acc_classify(clfier, m, af)
1589         void *clfier;
1590         struct mbuf *m;
1591         int af;
1592 {
1593         struct acc_classifier *classifier;
1594         struct flowinfo flow;
1595         struct acc_filter *afp;
1596         int     i;
1597
1598         classifier = (struct acc_classifier *)clfier;
1599         altq_extractflow(m, af, &flow, classifier->acc_fbmask);
1600
1601         if (flow.fi_family == AF_INET) {
1602                 struct flowinfo_in *fp = (struct flowinfo_in *)&flow;
1603
1604                 if ((classifier->acc_fbmask & FIMB4_ALL) == FIMB4_TOS) {
1605                         /* only tos is used */
1606                         LIST_FOREACH(afp,
1607                                  &classifier->acc_filters[ACC_WILDCARD_INDEX],
1608                                  f_chain)
1609                                 if (apply_tosfilter4(afp->f_fbmask,
1610                                                      &afp->f_filter, fp))
1611                                         /* filter matched */
1612                                         return (afp->f_class);
1613                 } else if ((classifier->acc_fbmask &
1614                         (~(FIMB4_PROTO|FIMB4_SPORT|FIMB4_DPORT) & FIMB4_ALL))
1615                     == 0) {
1616                         /* only proto and ports are used */
1617                         LIST_FOREACH(afp,
1618                                  &classifier->acc_filters[ACC_WILDCARD_INDEX],
1619                                  f_chain)
1620                                 if (apply_ppfilter4(afp->f_fbmask,
1621                                                     &afp->f_filter, fp))
1622                                         /* filter matched */
1623                                         return (afp->f_class);
1624                 } else {
1625                         /* get the filter hash entry from its dest address */
1626                         i = ACC_GET_HASH_INDEX(fp->fi_dst.s_addr);
1627                         do {
1628                                 /*
1629                                  * go through this loop twice.  first for dst
1630                                  * hash, second for wildcards.
1631                                  */
1632                                 LIST_FOREACH(afp, &classifier->acc_filters[i],
1633                                              f_chain)
1634                                         if (apply_filter4(afp->f_fbmask,
1635                                                           &afp->f_filter, fp))
1636                                                 /* filter matched */
1637                                                 return (afp->f_class);
1638
1639                                 /*
1640                                  * check again for filters with a dst addr
1641                                  * wildcard.
1642                                  * (daddr == 0 || dmask != 0xffffffff).
1643                                  */
1644                                 if (i != ACC_WILDCARD_INDEX)
1645                                         i = ACC_WILDCARD_INDEX;
1646                                 else
1647                                         break;
1648                         } while (1);
1649                 }
1650         }
1651 #ifdef INET6
1652         else if (flow.fi_family == AF_INET6) {
1653                 struct flowinfo_in6 *fp6 = (struct flowinfo_in6 *)&flow;
1654
1655                 /* get the filter hash entry from its flow ID */
1656                 if (fp6->fi6_flowlabel != 0)
1657                         i = ACC_GET_HASH_INDEX(fp6->fi6_flowlabel);
1658                 else
1659                         /* flowlable can be zero */
1660                         i = ACC_WILDCARD_INDEX;
1661
1662                 /* go through this loop twice.  first for flow hash, second
1663                    for wildcards. */
1664                 do {
1665                         LIST_FOREACH(afp, &classifier->acc_filters[i], f_chain)
1666                                 if (apply_filter6(afp->f_fbmask,
1667                                         (struct flow_filter6 *)&afp->f_filter,
1668                                         fp6))
1669                                         /* filter matched */
1670                                         return (afp->f_class);
1671
1672                         /*
1673                          * check again for filters with a wildcard.
1674                          */
1675                         if (i != ACC_WILDCARD_INDEX)
1676                                 i = ACC_WILDCARD_INDEX;
1677                         else
1678                                 break;
1679                 } while (1);
1680         }
1681 #endif /* INET6 */
1682
1683         /* no filter matched */
1684         return (NULL);
1685 }
1686
1687 static int
1688 apply_filter4(fbmask, filt, pkt)
1689         u_int32_t       fbmask;
1690         struct flow_filter *filt;
1691         struct flowinfo_in *pkt;
1692 {
1693         if (filt->ff_flow.fi_family != AF_INET)
1694                 return (0);
1695         if ((fbmask & FIMB4_SPORT) && filt->ff_flow.fi_sport != pkt->fi_sport)
1696                 return (0);
1697         if ((fbmask & FIMB4_DPORT) && filt->ff_flow.fi_dport != pkt->fi_dport)
1698                 return (0);
1699         if ((fbmask & FIMB4_DADDR) &&
1700             filt->ff_flow.fi_dst.s_addr !=
1701             (pkt->fi_dst.s_addr & filt->ff_mask.mask_dst.s_addr))
1702                 return (0);
1703         if ((fbmask & FIMB4_SADDR) &&
1704             filt->ff_flow.fi_src.s_addr !=
1705             (pkt->fi_src.s_addr & filt->ff_mask.mask_src.s_addr))
1706                 return (0);
1707         if ((fbmask & FIMB4_PROTO) && filt->ff_flow.fi_proto != pkt->fi_proto)
1708                 return (0);
1709         if ((fbmask & FIMB4_TOS) && filt->ff_flow.fi_tos !=
1710             (pkt->fi_tos & filt->ff_mask.mask_tos))
1711                 return (0);
1712         if ((fbmask & FIMB4_GPI) && filt->ff_flow.fi_gpi != (pkt->fi_gpi))
1713                 return (0);
1714         /* match */
1715         return (1);
1716 }
1717
1718 /*
1719  * filter matching function optimized for a common case that checks
1720  * only protocol and port numbers
1721  */
1722 static int
1723 apply_ppfilter4(fbmask, filt, pkt)
1724         u_int32_t       fbmask;
1725         struct flow_filter *filt;
1726         struct flowinfo_in *pkt;
1727 {
1728         if (filt->ff_flow.fi_family != AF_INET)
1729                 return (0);
1730         if ((fbmask & FIMB4_SPORT) && filt->ff_flow.fi_sport != pkt->fi_sport)
1731                 return (0);
1732         if ((fbmask & FIMB4_DPORT) && filt->ff_flow.fi_dport != pkt->fi_dport)
1733                 return (0);
1734         if ((fbmask & FIMB4_PROTO) && filt->ff_flow.fi_proto != pkt->fi_proto)
1735                 return (0);
1736         /* match */
1737         return (1);
1738 }
1739
1740 /*
1741  * filter matching function only for tos field.
1742  */
1743 static int
1744 apply_tosfilter4(fbmask, filt, pkt)
1745         u_int32_t       fbmask;
1746         struct flow_filter *filt;
1747         struct flowinfo_in *pkt;
1748 {
1749         if (filt->ff_flow.fi_family != AF_INET)
1750                 return (0);
1751         if ((fbmask & FIMB4_TOS) && filt->ff_flow.fi_tos !=
1752             (pkt->fi_tos & filt->ff_mask.mask_tos))
1753                 return (0);
1754         /* match */
1755         return (1);
1756 }
1757
1758 #ifdef INET6
1759 static int
1760 apply_filter6(fbmask, filt, pkt)
1761         u_int32_t       fbmask;
1762         struct flow_filter6 *filt;
1763         struct flowinfo_in6 *pkt;
1764 {
1765         int i;
1766
1767         if (filt->ff_flow6.fi6_family != AF_INET6)
1768                 return (0);
1769         if ((fbmask & FIMB6_FLABEL) &&
1770             filt->ff_flow6.fi6_flowlabel != pkt->fi6_flowlabel)
1771                 return (0);
1772         if ((fbmask & FIMB6_PROTO) &&
1773             filt->ff_flow6.fi6_proto != pkt->fi6_proto)
1774                 return (0);
1775         if ((fbmask & FIMB6_SPORT) &&
1776             filt->ff_flow6.fi6_sport != pkt->fi6_sport)
1777                 return (0);
1778         if ((fbmask & FIMB6_DPORT) &&
1779             filt->ff_flow6.fi6_dport != pkt->fi6_dport)
1780                 return (0);
1781         if (fbmask & FIMB6_SADDR) {
1782                 for (i = 0; i < 4; i++)
1783                         if (filt->ff_flow6.fi6_src.s6_addr32[i] !=
1784                             (pkt->fi6_src.s6_addr32[i] &
1785                              filt->ff_mask6.mask6_src.s6_addr32[i]))
1786                                 return (0);
1787         }
1788         if (fbmask & FIMB6_DADDR) {
1789                 for (i = 0; i < 4; i++)
1790                         if (filt->ff_flow6.fi6_dst.s6_addr32[i] !=
1791                             (pkt->fi6_dst.s6_addr32[i] &
1792                              filt->ff_mask6.mask6_dst.s6_addr32[i]))
1793                                 return (0);
1794         }
1795         if ((fbmask & FIMB6_TCLASS) &&
1796             filt->ff_flow6.fi6_tclass !=
1797             (pkt->fi6_tclass & filt->ff_mask6.mask6_tclass))
1798                 return (0);
1799         if ((fbmask & FIMB6_GPI) &&
1800             filt->ff_flow6.fi6_gpi != pkt->fi6_gpi)
1801                 return (0);
1802         /* match */
1803         return (1);
1804 }
1805 #endif /* INET6 */
1806
1807 /*
1808  *  filter handle:
1809  *      bit 20-28: index to the filter hash table
1810  *      bit  0-19: unique id in the hash bucket.
1811  */
1812 static u_long
1813 get_filt_handle(classifier, i)
1814         struct acc_classifier *classifier;
1815         int     i;
1816 {
1817         static u_long handle_number = 1;
1818         u_long  handle;
1819         struct acc_filter *afp;
1820
1821         while (1) {
1822                 handle = handle_number++ & 0x000fffff;
1823
1824                 if (LIST_EMPTY(&classifier->acc_filters[i]))
1825                         break;
1826
1827                 LIST_FOREACH(afp, &classifier->acc_filters[i], f_chain)
1828                         if ((afp->f_handle & 0x000fffff) == handle)
1829                                 break;
1830                 if (afp == NULL)
1831                         break;
1832                 /* this handle is already used, try again */
1833         }
1834
1835         return ((i << 20) | handle);
1836 }
1837
1838 /* convert filter handle to filter pointer */
1839 static struct acc_filter *
1840 filth_to_filtp(classifier, handle)
1841         struct acc_classifier *classifier;
1842         u_long handle;
1843 {
1844         struct acc_filter *afp;
1845         int     i;
1846
1847         i = ACC_GET_HINDEX(handle);
1848
1849         LIST_FOREACH(afp, &classifier->acc_filters[i], f_chain)
1850                 if (afp->f_handle == handle)
1851                         return (afp);
1852
1853         return (NULL);
1854 }
1855
1856 /* create flowinfo bitmask */
1857 static u_int32_t
1858 filt2fibmask(filt)
1859         struct flow_filter *filt;
1860 {
1861         u_int32_t mask = 0;
1862 #ifdef INET6
1863         struct flow_filter6 *filt6;
1864 #endif
1865
1866         switch (filt->ff_flow.fi_family) {
1867         case AF_INET:
1868                 if (filt->ff_flow.fi_proto != 0)
1869                         mask |= FIMB4_PROTO;
1870                 if (filt->ff_flow.fi_tos != 0)
1871                         mask |= FIMB4_TOS;
1872                 if (filt->ff_flow.fi_dst.s_addr != 0)
1873                         mask |= FIMB4_DADDR;
1874                 if (filt->ff_flow.fi_src.s_addr != 0)
1875                         mask |= FIMB4_SADDR;
1876                 if (filt->ff_flow.fi_sport != 0)
1877                         mask |= FIMB4_SPORT;
1878                 if (filt->ff_flow.fi_dport != 0)
1879                         mask |= FIMB4_DPORT;
1880                 if (filt->ff_flow.fi_gpi != 0)
1881                         mask |= FIMB4_GPI;
1882                 break;
1883 #ifdef INET6
1884         case AF_INET6:
1885                 filt6 = (struct flow_filter6 *)filt;
1886
1887                 if (filt6->ff_flow6.fi6_proto != 0)
1888                         mask |= FIMB6_PROTO;
1889                 if (filt6->ff_flow6.fi6_tclass != 0)
1890                         mask |= FIMB6_TCLASS;
1891                 if (!IN6_IS_ADDR_UNSPECIFIED(&filt6->ff_flow6.fi6_dst))
1892                         mask |= FIMB6_DADDR;
1893                 if (!IN6_IS_ADDR_UNSPECIFIED(&filt6->ff_flow6.fi6_src))
1894                         mask |= FIMB6_SADDR;
1895                 if (filt6->ff_flow6.fi6_sport != 0)
1896                         mask |= FIMB6_SPORT;
1897                 if (filt6->ff_flow6.fi6_dport != 0)
1898                         mask |= FIMB6_DPORT;
1899                 if (filt6->ff_flow6.fi6_gpi != 0)
1900                         mask |= FIMB6_GPI;
1901                 if (filt6->ff_flow6.fi6_flowlabel != 0)
1902                         mask |= FIMB6_FLABEL;
1903                 break;
1904 #endif /* INET6 */
1905         }
1906         return (mask);
1907 }
1908
1909
1910 /*
1911  * helper functions to handle IPv4 fragments.
1912  * currently only in-sequence fragments are handled.
1913  *      - fragment info is cached in a LRU list.
1914  *      - when a first fragment is found, cache its flow info.
1915  *      - when a non-first fragment is found, lookup the cache.
1916  */
1917
1918 struct ip4_frag {
1919     TAILQ_ENTRY(ip4_frag) ip4f_chain;
1920     char    ip4f_valid;
1921     u_short ip4f_id;
1922     struct flowinfo_in ip4f_info;
1923 };
1924
1925 static TAILQ_HEAD(ip4f_list, ip4_frag) ip4f_list; /* IPv4 fragment cache */
1926
1927 #define IP4F_TABSIZE            16      /* IPv4 fragment cache size */
1928
1929
1930 static void
1931 ip4f_cache(ip, fin)
1932         struct ip *ip;
1933         struct flowinfo_in *fin;
1934 {
1935         struct ip4_frag *fp;
1936
1937         if (TAILQ_EMPTY(&ip4f_list)) {
1938                 /* first time call, allocate fragment cache entries. */
1939                 if (ip4f_init() < 0)
1940                         /* allocation failed! */
1941                         return;
1942         }
1943
1944         fp = ip4f_alloc();
1945         fp->ip4f_id = ip->ip_id;
1946         fp->ip4f_info.fi_proto = ip->ip_p;
1947         fp->ip4f_info.fi_src.s_addr = ip->ip_src.s_addr;
1948         fp->ip4f_info.fi_dst.s_addr = ip->ip_dst.s_addr;
1949
1950         /* save port numbers */
1951         fp->ip4f_info.fi_sport = fin->fi_sport;
1952         fp->ip4f_info.fi_dport = fin->fi_dport;
1953         fp->ip4f_info.fi_gpi   = fin->fi_gpi;
1954 }
1955
1956 static int
1957 ip4f_lookup(ip, fin)
1958         struct ip *ip;
1959         struct flowinfo_in *fin;
1960 {
1961         struct ip4_frag *fp;
1962
1963         for (fp = TAILQ_FIRST(&ip4f_list); fp != NULL && fp->ip4f_valid;
1964              fp = TAILQ_NEXT(fp, ip4f_chain))
1965                 if (ip->ip_id == fp->ip4f_id &&
1966                     ip->ip_src.s_addr == fp->ip4f_info.fi_src.s_addr &&
1967                     ip->ip_dst.s_addr == fp->ip4f_info.fi_dst.s_addr &&
1968                     ip->ip_p == fp->ip4f_info.fi_proto) {
1969
1970                         /* found the matching entry */
1971                         fin->fi_sport = fp->ip4f_info.fi_sport;
1972                         fin->fi_dport = fp->ip4f_info.fi_dport;
1973                         fin->fi_gpi   = fp->ip4f_info.fi_gpi;
1974
1975                         if ((ntohs(ip->ip_off) & IP_MF) == 0)
1976                                 /* this is the last fragment,
1977                                    release the entry. */
1978                                 ip4f_free(fp);
1979
1980                         return (1);
1981                 }
1982
1983         /* no matching entry found */
1984         return (0);
1985 }
1986
1987 static int
1988 ip4f_init(void)
1989 {
1990         struct ip4_frag *fp;
1991         int i;
1992
1993         TAILQ_INIT(&ip4f_list);
1994         for (i=0; i<IP4F_TABSIZE; i++) {
1995                 fp = malloc(sizeof(struct ip4_frag),
1996                        M_DEVBUF, M_NOWAIT);
1997                 if (fp == NULL) {
1998                         printf("ip4f_init: can't alloc %dth entry!\n", i);
1999                         if (i == 0)
2000                                 return (-1);
2001                         return (0);
2002                 }
2003                 fp->ip4f_valid = 0;
2004                 TAILQ_INSERT_TAIL(&ip4f_list, fp, ip4f_chain);
2005         }
2006         return (0);
2007 }
2008
2009 static struct ip4_frag *
2010 ip4f_alloc(void)
2011 {
2012         struct ip4_frag *fp;
2013
2014         /* reclaim an entry at the tail, put it at the head */
2015         fp = TAILQ_LAST(&ip4f_list, ip4f_list);
2016         TAILQ_REMOVE(&ip4f_list, fp, ip4f_chain);
2017         fp->ip4f_valid = 1;
2018         TAILQ_INSERT_HEAD(&ip4f_list, fp, ip4f_chain);
2019         return (fp);
2020 }
2021
2022 static void
2023 ip4f_free(fp)
2024         struct ip4_frag *fp;
2025 {
2026         TAILQ_REMOVE(&ip4f_list, fp, ip4f_chain);
2027         fp->ip4f_valid = 0;
2028         TAILQ_INSERT_TAIL(&ip4f_list, fp, ip4f_chain);
2029 }
2030
2031 #endif /* ALTQ3_CLFIER_COMPAT */