2 * Copyright (c) 2004-2008 University of Zagreb
3 * Copyright (c) 2007-2008 FreeBSD Foundation
5 * This software was developed by the University of Zagreb and the
6 * FreeBSD Foundation under sponsorship by the Stichting NLnet and the
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
12 * 1. Redistributions of source code must retain the above copyright
13 * notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 * notice, this list of conditions and the following disclaimer in the
16 * documentation and/or other materials provided with the distribution.
18 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
19 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
22 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
24 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
25 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
26 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
27 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
34 * This node permits simple traffic shaping by emulating bandwidth
35 * and delay, as well as random packet losses.
36 * The node has two hooks, upper and lower. Traffic flowing from upper to
37 * lower hook is referenced as downstream, and vice versa. Parameters for
38 * both directions can be set separately, except for delay.
42 #include <sys/param.h>
43 #include <sys/errno.h>
44 #include <sys/systm.h>
45 #include <sys/kernel.h>
46 #include <sys/malloc.h>
54 #include <netinet/in.h>
55 #include <netinet/in_systm.h>
56 #include <netinet/ip.h>
58 #include <netgraph/ng_message.h>
59 #include <netgraph/netgraph.h>
60 #include <netgraph/ng_parse.h>
61 #include <netgraph/ng_pipe.h>
63 static MALLOC_DEFINE(M_NG_PIPE, "ng_pipe", "ng_pipe");
65 struct mtx ng_pipe_giant;
67 /* Packet header struct */
69 TAILQ_ENTRY(ngp_hdr) ngp_link; /* next pkt in queue */
70 struct timeval when; /* this packet's due time */
71 struct mbuf *m; /* ptr to the packet data */
73 TAILQ_HEAD(p_head, ngp_hdr);
75 /* FIFO queue struct */
77 TAILQ_ENTRY(ngp_fifo) fifo_le; /* list of active queues only */
78 struct p_head packet_head; /* FIFO queue head */
79 u_int32_t hash; /* flow signature */
80 struct timeval vtime; /* virtual time, for WFQ */
81 u_int32_t rr_deficit; /* for DRR */
82 u_int32_t packets; /* # of packets in this queue */
88 int noqueue; /* bypass any processing */
89 TAILQ_HEAD(, ngp_fifo) fifo_head; /* FIFO queues */
90 TAILQ_HEAD(, ngp_hdr) qout_head; /* delay queue head */
91 LIST_ENTRY(hookinfo) active_le; /* active hooks */
92 struct timeval qin_utime;
93 struct ng_pipe_hookcfg cfg;
94 struct ng_pipe_hookrun run;
95 struct ng_pipe_hookstat stats;
96 uint64_t *ber_p; /* loss_p(BER,psize) map */
103 u_int32_t header_offset;
104 struct hookinfo lower;
105 struct hookinfo upper;
107 typedef struct node_priv *priv_p;
109 /* Macro for calculating the virtual time for packet dequeueing in WFQ */
110 #define FIFO_VTIME_SORT(plen) \
111 if (hinfo->cfg.wfq && hinfo->cfg.bandwidth) { \
112 ngp_f->vtime.tv_usec = now->tv_usec + ((uint64_t) (plen) \
113 + priv->overhead ) * hinfo->run.fifo_queues * \
114 8000000 / hinfo->cfg.bandwidth; \
115 ngp_f->vtime.tv_sec = now->tv_sec + \
116 ngp_f->vtime.tv_usec / 1000000; \
117 ngp_f->vtime.tv_usec = ngp_f->vtime.tv_usec % 1000000; \
118 TAILQ_FOREACH(ngp_f1, &hinfo->fifo_head, fifo_le) \
119 if (ngp_f1->vtime.tv_sec > ngp_f->vtime.tv_sec || \
120 (ngp_f1->vtime.tv_sec == ngp_f->vtime.tv_sec && \
121 ngp_f1->vtime.tv_usec > ngp_f->vtime.tv_usec)) \
123 if (ngp_f1 == NULL) \
124 TAILQ_INSERT_TAIL(&hinfo->fifo_head, ngp_f, fifo_le); \
126 TAILQ_INSERT_BEFORE(ngp_f1, ngp_f, fifo_le); \
128 TAILQ_INSERT_TAIL(&hinfo->fifo_head, ngp_f, fifo_le); \
131 static void parse_cfg(struct ng_pipe_hookcfg *, struct ng_pipe_hookcfg *,
132 struct hookinfo *, priv_p);
133 static void pipe_dequeue(struct hookinfo *, struct timeval *);
134 static void pipe_scheduler(void *);
135 static void pipe_poll(void);
136 static int ngp_modevent(module_t, int, void *);
138 /* linked list of active "pipe" hooks */
139 static LIST_HEAD(, hookinfo) active_head;
140 static int active_gen_id = 0;
142 /* timeout handle for pipe_scheduler */
143 static struct callout polling_timer;
145 /* zone for storing ngp_hdr-s */
146 static uma_zone_t ngp_zone;
148 /* Netgraph methods */
149 static ng_constructor_t ngp_constructor;
150 static ng_rcvmsg_t ngp_rcvmsg;
151 static ng_shutdown_t ngp_shutdown;
152 static ng_newhook_t ngp_newhook;
153 static ng_rcvdata_t ngp_rcvdata;
154 static ng_disconnect_t ngp_disconnect;
156 /* Parse type for struct ng_pipe_hookstat */
157 static const struct ng_parse_struct_field
158 ng_pipe_hookstat_type_fields[] = NG_PIPE_HOOKSTAT_INFO;
159 static const struct ng_parse_type ng_pipe_hookstat_type = {
160 &ng_parse_struct_type,
161 &ng_pipe_hookstat_type_fields
164 /* Parse type for struct ng_pipe_stats */
165 static const struct ng_parse_struct_field ng_pipe_stats_type_fields[] =
166 NG_PIPE_STATS_INFO(&ng_pipe_hookstat_type);
167 static const struct ng_parse_type ng_pipe_stats_type = {
168 &ng_parse_struct_type,
169 &ng_pipe_stats_type_fields
172 /* Parse type for struct ng_pipe_hookrun */
173 static const struct ng_parse_struct_field
174 ng_pipe_hookrun_type_fields[] = NG_PIPE_HOOKRUN_INFO;
175 static const struct ng_parse_type ng_pipe_hookrun_type = {
176 &ng_parse_struct_type,
177 &ng_pipe_hookrun_type_fields
180 /* Parse type for struct ng_pipe_run */
181 static const struct ng_parse_struct_field
182 ng_pipe_run_type_fields[] = NG_PIPE_RUN_INFO(&ng_pipe_hookrun_type);
183 static const struct ng_parse_type ng_pipe_run_type = {
184 &ng_parse_struct_type,
185 &ng_pipe_run_type_fields
188 /* Parse type for struct ng_pipe_hookcfg */
189 static const struct ng_parse_struct_field
190 ng_pipe_hookcfg_type_fields[] = NG_PIPE_HOOKCFG_INFO;
191 static const struct ng_parse_type ng_pipe_hookcfg_type = {
192 &ng_parse_struct_type,
193 &ng_pipe_hookcfg_type_fields
196 /* Parse type for struct ng_pipe_cfg */
197 static const struct ng_parse_struct_field
198 ng_pipe_cfg_type_fields[] = NG_PIPE_CFG_INFO(&ng_pipe_hookcfg_type);
199 static const struct ng_parse_type ng_pipe_cfg_type = {
200 &ng_parse_struct_type,
201 &ng_pipe_cfg_type_fields
204 /* List of commands and how to convert arguments to/from ASCII */
205 static const struct ng_cmdlist ngp_cmds[] = {
207 .cookie = NGM_PIPE_COOKIE,
208 .cmd = NGM_PIPE_GET_STATS,
210 .respType = &ng_pipe_stats_type
213 .cookie = NGM_PIPE_COOKIE,
214 .cmd = NGM_PIPE_CLR_STATS,
218 .cookie = NGM_PIPE_COOKIE,
219 .cmd = NGM_PIPE_GETCLR_STATS,
220 .name = "getclrstats",
221 .respType = &ng_pipe_stats_type
224 .cookie = NGM_PIPE_COOKIE,
225 .cmd = NGM_PIPE_GET_RUN,
227 .respType = &ng_pipe_run_type
230 .cookie = NGM_PIPE_COOKIE,
231 .cmd = NGM_PIPE_GET_CFG,
233 .respType = &ng_pipe_cfg_type
236 .cookie = NGM_PIPE_COOKIE,
237 .cmd = NGM_PIPE_SET_CFG,
239 .mesgType = &ng_pipe_cfg_type,
244 /* Netgraph type descriptor */
245 static struct ng_type ng_pipe_typestruct = {
246 .version = NG_ABI_VERSION,
247 .name = NG_PIPE_NODE_TYPE,
248 .mod_event = ngp_modevent,
249 .constructor = ngp_constructor,
250 .shutdown = ngp_shutdown,
251 .rcvmsg = ngp_rcvmsg,
252 .newhook = ngp_newhook,
253 .rcvdata = ngp_rcvdata,
254 .disconnect = ngp_disconnect,
257 NETGRAPH_INIT(pipe, &ng_pipe_typestruct);
259 /* Node constructor */
261 ngp_constructor(node_p node)
265 priv = malloc(sizeof(*priv), M_NG_PIPE, M_ZERO | M_NOWAIT);
268 NG_NODE_SET_PRIVATE(node, priv);
275 ngp_newhook(node_p node, hook_p hook, const char *name)
277 const priv_p priv = NG_NODE_PRIVATE(node);
278 struct hookinfo *hinfo;
280 if (strcmp(name, NG_PIPE_HOOK_UPPER) == 0) {
281 bzero(&priv->upper, sizeof(priv->upper));
282 priv->upper.hook = hook;
283 NG_HOOK_SET_PRIVATE(hook, &priv->upper);
284 } else if (strcmp(name, NG_PIPE_HOOK_LOWER) == 0) {
285 bzero(&priv->lower, sizeof(priv->lower));
286 priv->lower.hook = hook;
287 NG_HOOK_SET_PRIVATE(hook, &priv->lower);
291 /* Load non-zero initial cfg values */
292 hinfo = NG_HOOK_PRIVATE(hook);
293 hinfo->cfg.qin_size_limit = 50;
295 hinfo->cfg.droptail = 1;
296 TAILQ_INIT(&hinfo->fifo_head);
297 TAILQ_INIT(&hinfo->qout_head);
301 /* Receive a control message */
303 ngp_rcvmsg(node_p node, item_p item, hook_p lasthook)
305 const priv_p priv = NG_NODE_PRIVATE(node);
306 struct ng_mesg *resp = NULL;
308 struct ng_pipe_stats *stats;
309 struct ng_pipe_run *run;
310 struct ng_pipe_cfg *cfg;
313 mtx_lock(&ng_pipe_giant);
315 NGI_GET_MSG(item, msg);
316 switch (msg->header.typecookie) {
317 case NGM_PIPE_COOKIE:
318 switch (msg->header.cmd) {
319 case NGM_PIPE_GET_STATS:
320 case NGM_PIPE_CLR_STATS:
321 case NGM_PIPE_GETCLR_STATS:
322 if (msg->header.cmd != NGM_PIPE_CLR_STATS) {
323 NG_MKRESPONSE(resp, msg,
324 sizeof(*stats), M_NOWAIT);
329 stats = (struct ng_pipe_stats *)resp->data;
330 bcopy(&priv->upper.stats, &stats->downstream,
331 sizeof(stats->downstream));
332 bcopy(&priv->lower.stats, &stats->upstream,
333 sizeof(stats->upstream));
335 if (msg->header.cmd != NGM_PIPE_GET_STATS) {
336 bzero(&priv->upper.stats,
337 sizeof(priv->upper.stats));
338 bzero(&priv->lower.stats,
339 sizeof(priv->lower.stats));
342 case NGM_PIPE_GET_RUN:
343 NG_MKRESPONSE(resp, msg, sizeof(*run), M_NOWAIT);
348 run = (struct ng_pipe_run *)resp->data;
349 bcopy(&priv->upper.run, &run->downstream,
350 sizeof(run->downstream));
351 bcopy(&priv->lower.run, &run->upstream,
352 sizeof(run->upstream));
354 case NGM_PIPE_GET_CFG:
355 NG_MKRESPONSE(resp, msg, sizeof(*cfg), M_NOWAIT);
360 cfg = (struct ng_pipe_cfg *)resp->data;
361 bcopy(&priv->upper.cfg, &cfg->downstream,
362 sizeof(cfg->downstream));
363 bcopy(&priv->lower.cfg, &cfg->upstream,
364 sizeof(cfg->upstream));
365 cfg->delay = priv->delay;
366 cfg->overhead = priv->overhead;
367 cfg->header_offset = priv->header_offset;
368 if (cfg->upstream.bandwidth ==
369 cfg->downstream.bandwidth) {
370 cfg->bandwidth = cfg->upstream.bandwidth;
371 cfg->upstream.bandwidth = 0;
372 cfg->downstream.bandwidth = 0;
376 case NGM_PIPE_SET_CFG:
377 cfg = (struct ng_pipe_cfg *)msg->data;
378 if (msg->header.arglen != sizeof(*cfg)) {
383 if (cfg->delay == -1)
385 else if (cfg->delay > 0 && cfg->delay < 10000000)
386 priv->delay = cfg->delay;
388 if (cfg->bandwidth == -1) {
389 priv->upper.cfg.bandwidth = 0;
390 priv->lower.cfg.bandwidth = 0;
392 } else if (cfg->bandwidth >= 100 &&
393 cfg->bandwidth <= 1000000000) {
394 priv->upper.cfg.bandwidth = cfg->bandwidth;
395 priv->lower.cfg.bandwidth = cfg->bandwidth;
396 if (cfg->bandwidth >= 10000000)
397 priv->overhead = 8+4+12; /* Ethernet */
399 priv->overhead = 10; /* HDLC */
402 if (cfg->overhead == -1)
404 else if (cfg->overhead > 0 && cfg->overhead < 256)
405 priv->overhead = cfg->overhead;
407 if (cfg->header_offset == -1)
408 priv->header_offset = 0;
409 else if (cfg->header_offset > 0 &&
410 cfg->header_offset < 64)
411 priv->header_offset = cfg->header_offset;
413 parse_cfg(&priv->upper.cfg, &cfg->downstream,
415 parse_cfg(&priv->lower.cfg, &cfg->upstream,
427 NG_RESPOND_MSG(error, node, item, resp);
430 mtx_unlock(&ng_pipe_giant);
436 parse_cfg(struct ng_pipe_hookcfg *current, struct ng_pipe_hookcfg *new,
437 struct hookinfo *hinfo, priv_p priv)
440 if (new->ber == -1) {
443 free(hinfo->ber_p, M_NG_PIPE);
446 } else if (new->ber >= 1 && new->ber <= 1000000000000) {
447 static const uint64_t one = 0x1000000000000; /* = 2^48 */
451 if (hinfo->ber_p == NULL)
452 hinfo->ber_p = malloc(\
453 (MAX_FSIZE + MAX_OHSIZE)*sizeof(uint64_t), \
454 M_NG_PIPE, M_NOWAIT);
455 current->ber = new->ber;
458 * For given BER and each frame size N (in bytes) calculate
459 * the probability P_OK that the frame is clean:
461 * P_OK(BER,N) = (1 - 1/BER)^(N*8)
463 * We use a 64-bit fixed-point format with decimal point
464 * positioned between bits 47 and 48.
466 p0 = one - one / new->ber;
468 for (fsize = 0; fsize < MAX_FSIZE + MAX_OHSIZE; fsize++) {
469 hinfo->ber_p[fsize] = p;
471 p = (p*(p0&0xffff)>>48) + \
472 (p*((p0>>16)&0xffff)>>32) + \
477 if (new->qin_size_limit == -1)
478 current->qin_size_limit = 0;
479 else if (new->qin_size_limit >= 5)
480 current->qin_size_limit = new->qin_size_limit;
482 if (new->qout_size_limit == -1)
483 current->qout_size_limit = 0;
484 else if (new->qout_size_limit >= 5)
485 current->qout_size_limit = new->qout_size_limit;
487 if (new->duplicate == -1)
488 current->duplicate = 0;
489 else if (new->duplicate > 0 && new->duplicate <= 50)
490 current->duplicate = new->duplicate;
509 current->drr = new->drr;
511 current->drr = 2048; /* default quantum */
515 current->droptail = 1;
516 current->drophead = 0;
520 current->droptail = 0;
521 current->drophead = 1;
524 if (new->bandwidth == -1) {
525 current->bandwidth = 0;
529 } else if (new->bandwidth >= 100 && new->bandwidth <= 1000000000)
530 current->bandwidth = new->bandwidth;
532 if (current->bandwidth | priv->delay |
533 current->duplicate | current->ber)
540 * Compute a hash signature for a packet. This function suffers from the
541 * NIH sindrome, so probably it would be wise to look around what other
542 * folks have found out to be a good and efficient IP hash function...
545 ip_hash(struct mbuf *m, int offset)
548 struct ip *ip = (struct ip *)(mtod(m, u_char *) + offset);
550 if (m->m_len < sizeof(struct ip) + offset ||
551 ip->ip_v != 4 || ip->ip_hl << 2 != sizeof(struct ip))
554 i = ((u_int64_t) ip->ip_src.s_addr ^
555 ((u_int64_t) ip->ip_src.s_addr << 13) ^
556 ((u_int64_t) ip->ip_dst.s_addr << 7) ^
557 ((u_int64_t) ip->ip_dst.s_addr << 19));
558 return (i ^ (i >> 32));
562 * Receive data on a hook - both in upstream and downstream direction.
563 * We put the frame on the inbound queue, and try to initiate dequeuing
564 * sequence immediately. If inbound queue is full, discard one frame
565 * depending on dropping policy (from the head or from the tail of the
569 ngp_rcvdata(hook_p hook, item_p item)
571 struct hookinfo *const hinfo = NG_HOOK_PRIVATE(hook);
572 const priv_p priv = NG_NODE_PRIVATE(NG_HOOK_NODE(hook));
573 struct timeval uuptime;
574 struct timeval *now = &uuptime;
575 struct ngp_fifo *ngp_f = NULL, *ngp_f1;
576 struct ngp_hdr *ngp_h = NULL;
581 if (hinfo->noqueue) {
582 struct hookinfo *dest;
583 if (hinfo == &priv->lower)
587 NG_FWD_ITEM_HOOK(error, item, dest->hook);
591 mtx_lock(&ng_pipe_giant);
595 * Attach us to the list of active ng_pipes if this was an empty
596 * one before, and also update the queue service deadline time.
598 if (hinfo->run.qin_frames == 0) {
599 struct timeval *when = &hinfo->qin_utime;
600 if (when->tv_sec < now->tv_sec || (when->tv_sec == now->tv_sec
601 && when->tv_usec < now->tv_usec)) {
602 when->tv_sec = now->tv_sec;
603 when->tv_usec = now->tv_usec;
605 if (hinfo->run.qout_frames == 0)
606 LIST_INSERT_HEAD(&active_head, hinfo, active_le);
609 /* Populate the packet header */
610 ngp_h = uma_zalloc(ngp_zone, M_NOWAIT);
611 KASSERT((ngp_h != NULL), ("ngp_h zalloc failed (1)"));
613 KASSERT(m != NULL, ("NGI_GET_M failed"));
618 hash = 0; /* all packets go into a single FIFO queue */
620 hash = ip_hash(m, priv->header_offset);
622 /* Find the appropriate FIFO queue for the packet and enqueue it*/
623 TAILQ_FOREACH(ngp_f, &hinfo->fifo_head, fifo_le)
624 if (hash == ngp_f->hash)
627 ngp_f = uma_zalloc(ngp_zone, M_NOWAIT);
628 KASSERT(ngp_h != NULL, ("ngp_h zalloc failed (2)"));
629 TAILQ_INIT(&ngp_f->packet_head);
632 ngp_f->rr_deficit = hinfo->cfg.drr; /* DRR quantum */
633 hinfo->run.fifo_queues++;
634 TAILQ_INSERT_TAIL(&ngp_f->packet_head, ngp_h, ngp_link);
635 FIFO_VTIME_SORT(m->m_pkthdr.len);
637 TAILQ_INSERT_TAIL(&ngp_f->packet_head, ngp_h, ngp_link);
640 hinfo->run.qin_frames++;
641 hinfo->run.qin_octets += m->m_pkthdr.len;
643 /* Discard a frame if inbound queue limit has been reached */
644 if (hinfo->run.qin_frames > hinfo->cfg.qin_size_limit) {
648 /* Find the longest queue */
649 TAILQ_FOREACH(ngp_f1, &hinfo->fifo_head, fifo_le)
650 if (ngp_f1->packets > longest) {
651 longest = ngp_f1->packets;
655 /* Drop a frame from the queue head/tail, depending on cfg */
656 if (hinfo->cfg.drophead)
657 ngp_h = TAILQ_FIRST(&ngp_f->packet_head);
659 ngp_h = TAILQ_LAST(&ngp_f->packet_head, p_head);
660 TAILQ_REMOVE(&ngp_f->packet_head, ngp_h, ngp_link);
662 uma_zfree(ngp_zone, ngp_h);
663 hinfo->run.qin_octets -= m1->m_pkthdr.len;
664 hinfo->stats.in_disc_octets += m1->m_pkthdr.len;
666 if (--(ngp_f->packets) == 0) {
667 TAILQ_REMOVE(&hinfo->fifo_head, ngp_f, fifo_le);
668 uma_zfree(ngp_zone, ngp_f);
669 hinfo->run.fifo_queues--;
671 hinfo->run.qin_frames--;
672 hinfo->stats.in_disc_frames++;
673 } else if (hinfo->run.qin_frames > hinfo->cfg.qin_size_limit) {
677 /* Find the longest queue */
678 TAILQ_FOREACH(ngp_f1, &hinfo->fifo_head, fifo_le)
679 if (ngp_f1->packets > longest) {
680 longest = ngp_f1->packets;
684 /* Drop a frame from the queue head/tail, depending on cfg */
685 if (hinfo->cfg.drophead)
686 ngp_h = TAILQ_FIRST(&ngp_f->packet_head);
688 ngp_h = TAILQ_LAST(&ngp_f->packet_head, p_head);
689 TAILQ_REMOVE(&ngp_f->packet_head, ngp_h, ngp_link);
691 uma_zfree(ngp_zone, ngp_h);
692 hinfo->run.qin_octets -= m1->m_pkthdr.len;
693 hinfo->stats.in_disc_octets += m1->m_pkthdr.len;
695 if (--(ngp_f->packets) == 0) {
696 TAILQ_REMOVE(&hinfo->fifo_head, ngp_f, fifo_le);
697 uma_zfree(ngp_zone, ngp_f);
698 hinfo->run.fifo_queues--;
700 hinfo->run.qin_frames--;
701 hinfo->stats.in_disc_frames++;
705 * Try to start the dequeuing process immediately. We must
706 * hold the ng_pipe_giant lock here and pipe_dequeue() will
709 pipe_dequeue(hinfo, now);
716 * Dequeueing sequence - we basically do the following:
717 * 1) Try to extract the frame from the inbound (bandwidth) queue;
718 * 2) In accordance to BER specified, discard the frame randomly;
719 * 3) If the frame survives BER, prepend it with delay info and move it
720 * to outbound (delay) queue;
721 * 4) Loop to 2) until bandwidth quota for this timeslice is reached, or
722 * inbound queue is flushed completely;
723 * 5) Extract the first frame from the outbound queue, if it's time has
724 * come. Queue the frame for transmission on the outbound hook;
725 * 6) Loop to 5) until outbound queue is flushed completely, or the next
726 * frame in the queue is not scheduled to be dequeued yet;
727 * 7) Transimit all frames queued in 5)
729 * Note: the caller must hold the ng_pipe_giant lock; this function
730 * returns with the lock released.
733 pipe_dequeue(struct hookinfo *hinfo, struct timeval *now) {
734 static uint64_t rand, oldrand;
735 const priv_p priv = NG_NODE_PRIVATE(NG_HOOK_NODE(hinfo->hook));
736 struct hookinfo *dest;
737 struct ngp_fifo *ngp_f, *ngp_f1;
738 struct ngp_hdr *ngp_h;
739 struct timeval *when;
740 struct mbuf *q_head = NULL;
741 struct mbuf *q_tail = NULL;
745 /* Which one is the destination hook? */
746 if (hinfo == &priv->lower)
751 /* Bandwidth queue processing */
752 while ((ngp_f = TAILQ_FIRST(&hinfo->fifo_head))) {
753 when = &hinfo->qin_utime;
754 if (when->tv_sec > now->tv_sec || (when->tv_sec == now->tv_sec
755 && when->tv_usec > now->tv_usec))
758 ngp_h = TAILQ_FIRST(&ngp_f->packet_head);
761 /* Deficit Round Robin (DRR) processing */
762 if (hinfo->cfg.drr) {
763 if (ngp_f->rr_deficit >= m->m_pkthdr.len) {
764 ngp_f->rr_deficit -= m->m_pkthdr.len;
766 ngp_f->rr_deficit += hinfo->cfg.drr;
767 TAILQ_REMOVE(&hinfo->fifo_head, ngp_f, fifo_le);
768 TAILQ_INSERT_TAIL(&hinfo->fifo_head,
775 * Either create a duplicate and pass it on, or dequeue
776 * the original packet...
778 if (hinfo->cfg.duplicate &&
779 random() % 100 <= hinfo->cfg.duplicate) {
780 ngp_h = uma_zalloc(ngp_zone, M_NOWAIT);
781 KASSERT(ngp_h != NULL, ("ngp_h zalloc failed (3)"));
782 ngp_h->m = m_dup(m, M_NOWAIT);
783 KASSERT(ngp_h->m != NULL, ("m_dup failed"));
785 TAILQ_REMOVE(&ngp_f->packet_head, ngp_h, ngp_link);
786 hinfo->run.qin_frames--;
787 hinfo->run.qin_octets -= m->m_pkthdr.len;
791 /* Calculate the serialization delay */
792 if (hinfo->cfg.bandwidth) {
793 hinfo->qin_utime.tv_usec += ((uint64_t) m->m_pkthdr.len
795 8000000 / hinfo->cfg.bandwidth;
796 hinfo->qin_utime.tv_sec +=
797 hinfo->qin_utime.tv_usec / 1000000;
798 hinfo->qin_utime.tv_usec =
799 hinfo->qin_utime.tv_usec % 1000000;
802 when->tv_sec = hinfo->qin_utime.tv_sec;
803 when->tv_usec = hinfo->qin_utime.tv_usec;
805 /* Sort / rearrange inbound queues */
806 if (ngp_f->packets) {
807 if (hinfo->cfg.wfq) {
808 TAILQ_REMOVE(&hinfo->fifo_head, ngp_f, fifo_le);
809 FIFO_VTIME_SORT(TAILQ_FIRST(
810 &ngp_f->packet_head)->m->m_pkthdr.len)
813 TAILQ_REMOVE(&hinfo->fifo_head, ngp_f, fifo_le);
814 uma_zfree(ngp_zone, ngp_f);
815 hinfo->run.fifo_queues--;
818 /* Randomly discard the frame, according to BER setting */
819 if (hinfo->cfg.ber &&
820 ((oldrand = rand) ^ (rand = random())<<17) >=
821 hinfo->ber_p[priv->overhead + m->m_pkthdr.len] ) {
822 hinfo->stats.out_disc_frames++;
823 hinfo->stats.out_disc_octets += m->m_pkthdr.len;
824 uma_zfree(ngp_zone, ngp_h);
829 /* Discard frame if outbound queue size limit exceeded */
830 if (hinfo->cfg.qout_size_limit &&
831 hinfo->run.qout_frames>=hinfo->cfg.qout_size_limit) {
832 hinfo->stats.out_disc_frames++;
833 hinfo->stats.out_disc_octets += m->m_pkthdr.len;
834 uma_zfree(ngp_zone, ngp_h);
839 /* Calculate the propagation delay */
840 when->tv_usec += priv->delay;
841 when->tv_sec += when->tv_usec / 1000000;
842 when->tv_usec = when->tv_usec % 1000000;
844 /* Put the frame into the delay queue */
845 TAILQ_INSERT_TAIL(&hinfo->qout_head, ngp_h, ngp_link);
846 hinfo->run.qout_frames++;
847 hinfo->run.qout_octets += m->m_pkthdr.len;
850 /* Delay queue processing */
851 while ((ngp_h = TAILQ_FIRST(&hinfo->qout_head))) {
852 struct mbuf *m = ngp_h->m;
855 if (when->tv_sec > now->tv_sec ||
856 (when->tv_sec == now->tv_sec &&
857 when->tv_usec > now->tv_usec))
860 /* Update outbound queue stats */
861 hinfo->stats.fwd_frames++;
862 hinfo->stats.fwd_octets += m->m_pkthdr.len;
863 hinfo->run.qout_frames--;
864 hinfo->run.qout_octets -= m->m_pkthdr.len;
866 /* Dequeue the packet from qout */
867 TAILQ_REMOVE(&hinfo->qout_head, ngp_h, ngp_link);
868 uma_zfree(ngp_zone, ngp_h);
870 /* Enqueue locally for sending downstream */
874 q_tail->m_nextpkt = m;
879 /* If both queues are empty detach us from the list of active queues */
880 if (hinfo->run.qin_frames + hinfo->run.qout_frames == 0) {
881 LIST_REMOVE(hinfo, active_le);
885 mtx_unlock(&ng_pipe_giant);
887 while ((m = q_head) != NULL) {
888 q_head = m->m_nextpkt;
890 NG_SEND_DATA(error, dest->hook, m, meta);
896 * This routine is called on every clock tick. We poll all nodes/hooks
897 * for queued frames by calling pipe_dequeue().
900 pipe_scheduler(void *arg)
905 callout_reset(&polling_timer, 1, &pipe_scheduler, NULL);
910 * Traverse the list of all active hooks and attempt to dequeue
911 * some packets. Hooks with empty queues are not traversed since
912 * they are not linked into this list.
917 struct hookinfo *hinfo;
919 int old_gen_id = active_gen_id;
921 mtx_lock(&ng_pipe_giant);
923 LIST_FOREACH(hinfo, &active_head, active_le) {
924 CURVNET_SET(NG_HOOK_NODE(hinfo->hook)->nd_vnet);
925 pipe_dequeue(hinfo, &now);
927 mtx_lock(&ng_pipe_giant);
928 if (old_gen_id != active_gen_id) {
929 /* the list was updated; restart traversing */
930 hinfo = LIST_FIRST(&active_head);
933 old_gen_id = active_gen_id;
937 mtx_unlock(&ng_pipe_giant);
942 * Shutdown processing
944 * This is tricky. If we have both a lower and upper hook, then we
945 * probably want to extricate ourselves and leave the two peers
946 * still linked to each other. Otherwise we should just shut down as
947 * a normal node would.
950 ngp_shutdown(node_p node)
952 const priv_p priv = NG_NODE_PRIVATE(node);
954 if (priv->lower.hook && priv->upper.hook)
955 ng_bypass(priv->lower.hook, priv->upper.hook);
957 if (priv->upper.hook != NULL)
958 ng_rmhook_self(priv->upper.hook);
959 if (priv->lower.hook != NULL)
960 ng_rmhook_self(priv->lower.hook);
963 free(priv, M_NG_PIPE);
972 ngp_disconnect(hook_p hook)
974 struct hookinfo *const hinfo = NG_HOOK_PRIVATE(hook);
975 struct ngp_fifo *ngp_f;
976 struct ngp_hdr *ngp_h;
979 mtx_lock(&ng_pipe_giant);
981 KASSERT(hinfo != NULL, ("%s: null info", __FUNCTION__));
984 /* Flush all fifo queues associated with the hook */
985 while ((ngp_f = TAILQ_FIRST(&hinfo->fifo_head))) {
986 while ((ngp_h = TAILQ_FIRST(&ngp_f->packet_head))) {
987 TAILQ_REMOVE(&ngp_f->packet_head, ngp_h, ngp_link);
989 uma_zfree(ngp_zone, ngp_h);
992 TAILQ_REMOVE(&hinfo->fifo_head, ngp_f, fifo_le);
993 uma_zfree(ngp_zone, ngp_f);
996 /* Flush the delay queue */
997 while ((ngp_h = TAILQ_FIRST(&hinfo->qout_head))) {
998 TAILQ_REMOVE(&hinfo->qout_head, ngp_h, ngp_link);
1000 uma_zfree(ngp_zone, ngp_h);
1005 * Both queues should be empty by now, so detach us from
1006 * the list of active queues
1009 LIST_REMOVE(hinfo, active_le);
1012 if (hinfo->run.qin_frames + hinfo->run.qout_frames != removed)
1013 printf("Mismatch: queued=%d but removed=%d !?!",
1014 hinfo->run.qin_frames + hinfo->run.qout_frames, removed);
1016 /* Release the packet loss probability table (BER) */
1018 free(hinfo->ber_p, M_NG_PIPE);
1020 mtx_unlock(&ng_pipe_giant);
1026 ngp_modevent(module_t mod, int type, void *unused)
1032 ngp_zone = uma_zcreate("ng_pipe", max(sizeof(struct ngp_hdr),
1033 sizeof (struct ngp_fifo)), NULL, NULL, NULL, NULL,
1035 if (ngp_zone == NULL)
1036 panic("ng_pipe: couldn't allocate descriptor zone");
1038 mtx_init(&ng_pipe_giant, "ng_pipe_giant", NULL, MTX_DEF);
1039 LIST_INIT(&active_head);
1040 callout_init(&polling_timer, CALLOUT_MPSAFE);
1041 callout_reset(&polling_timer, 1, &pipe_scheduler, NULL);
1044 callout_drain(&polling_timer);
1045 uma_zdestroy(ngp_zone);
1046 mtx_destroy(&ng_pipe_giant);