From d45d8a3cdeee6fab56bf00071ae799ad87498527 Mon Sep 17 00:00:00 2001 From: luigi Date: Sat, 12 Sep 1998 22:03:21 +0000 Subject: [PATCH] Bring in new files for dummynet support --- sys/netinet/ip_dummynet.c | 609 ++++++++++++++++++++++++++++++++++++++ sys/netinet/ip_dummynet.h | 100 +++++++ 2 files changed, 709 insertions(+) create mode 100644 sys/netinet/ip_dummynet.c create mode 100644 sys/netinet/ip_dummynet.h diff --git a/sys/netinet/ip_dummynet.c b/sys/netinet/ip_dummynet.c new file mode 100644 index 00000000000..31b42499547 --- /dev/null +++ b/sys/netinet/ip_dummynet.c @@ -0,0 +1,609 @@ +/* + * Copyright (c) 1998 Luigi Rizzo + * + * Redistribution and use in source forms, with and without modification, + * are permitted provided that this entire comment appears intact. + * + * Redistribution in binary form may occur without any restrictions. + * Obviously, it would be nice if you gave credit where credit is due + * but requiring it would be too onerous. + * + * This software is provided ``AS IS'' without any warranties of any kind. + * + * $Id: ip_dummynet.c 1.2 1998/08/21 15:01:13 luigi Exp $ + */ + +/* + * This module implements IP dummynet, a bandwidth limiter/delay emulator + * used in conjunction with the ipfw package. + * + * Changes: + * + * 980821: changed conventions in the queueing logic + * packets passed from dummynet to ip_in/out are prepended with + * a vestigial mbuf type MT_DUMMYNET which contains a pointer + * to the matching rule. + * ip_input/output will extract the parameters, free the vestigial mbuf, + * and do the processing. + * + * 980519: fixed behaviour when deleting rules. + * 980518: added splimp()/splx() to protect against races + * 980513: initial release + */ + +/* include files marked with XXX are probably not needed */ + +#include +#include +#include +#include +#include /* XXX */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#ifdef BRIDGE +#include /* for struct arpcom */ +#include +#endif + +static struct dn_pipe *all_pipes = NULL ; /* list of all pipes */ + +static int dn_debug = 0 ; /* verbose */ +static int dn_calls = 0 ; /* number of calls */ +static int dn_idle = 1; +#ifdef SYSCTL_NODE +SYSCTL_NODE(_net_inet_ip, OID_AUTO, dummynet, CTLFLAG_RW, 0, "Dummynet"); +SYSCTL_INT(_net_inet_ip_dummynet, OID_AUTO, debug, CTLFLAG_RW, &dn_debug, 0, ""); +SYSCTL_INT(_net_inet_ip_dummynet, OID_AUTO, calls, CTLFLAG_RD, &dn_calls, 0, ""); +SYSCTL_INT(_net_inet_ip_dummynet, OID_AUTO, idle, CTLFLAG_RD, &dn_idle, 0, ""); +#endif + +static int ip_dn_ctl(int optname, struct mbuf **mm); + +static void dummynet(void); +static void dn_restart(void); +static void dn_move(struct dn_pipe *pipe, int immediate); + +/* + * the following is needed when deleting a pipe, because rules can + * hold references to the pipe. + */ +extern LIST_HEAD (ip_fw_head, ip_fw_chain) ip_fw_chain; + +/* + * invoked to reschedule the periodic task if necessary. + * Should only be called when dn_idle = 1 ; + */ +static void +dn_restart() +{ + struct dn_pipe *pipe; + + if (!dn_idle) + return; + + for (pipe = all_pipes ; pipe ; pipe = pipe->next ) { + /* if there any pipe that needs work, restart */ + if (pipe->r.head || pipe->p.head || pipe->numbytes < 0 ) { + dn_idle = 0; + timeout(dummynet, (caddr_t)NULL, 1); + return ; + } + } +} + +/* + * move packets from R-queue to P-queue + */ +static void +dn_move(struct dn_pipe *pipe, int immediate) +{ + struct dn_pkt *tmp, *pkt; + + /* + * consistency check, should catch new pipes which are + * not initialized properly. + */ + if ( pipe->p.head == NULL && + pipe->ticks_from_last_insert != pipe->delay) { + printf("Warning, empty pipe and delay %d (should be %a)d\n", + pipe->ticks_from_last_insert, pipe->delay); + pipe->ticks_from_last_insert = pipe->delay; + } + /* this ought to go in dn_dequeue() */ + if (!immediate && pipe->ticks_from_last_insert < pipe->delay) + pipe->ticks_from_last_insert++; + if ( pkt = pipe->r.head ) { + /* + * Move at most numbytes bytes from src and move to dst. + * delay is set to ticks_from_last_insert, which + * is reset after the first insertion; + */ + while ( pkt ) { + struct ip *ip=mtod(pkt->dn_m, struct ip *); + + /* + * queue limitation: pass packets down if the len is + * such that the pkt would go out before the next tick. + */ + if (pipe->bandwidth) { + if (pipe->numbytes < ip->ip_len) + break; + pipe->numbytes -= ip->ip_len; + } + pipe->r_len--; /* elements in queue */ + pipe->r_len_bytes -= ip->ip_len ; + + /* + * to add delay jitter, must act here. A lower value + * (bounded to 0) means lower delay. + */ + pkt->delay = pipe->ticks_from_last_insert; + pipe->ticks_from_last_insert = 0; + /* compensate the decrement done next in dn_dequeue */ + if (!immediate && pkt->delay >0 && pipe->p.head==NULL) + pkt->delay++; + if (pipe->p.head == NULL) + pipe->p.head = pkt; + else + (struct dn_pkt *)pipe->p.tail->dn_next = pkt; + pipe->p.tail = pkt; + pkt = (struct dn_pkt *)pkt->dn_next; + pipe->p.tail->dn_next = NULL; + } + pipe->r.head = pkt; + + /*** XXX just a sanity check */ + if ( ( pkt == NULL && pipe->r_len != 0) || + ( pkt != NULL && pipe->r_len == 0) ) + printf("-- Warning, pipe head %x len %d\n", + pkt, pipe->r_len); + } + + /* + * deliver packets downstream after the delay in the P-queue. + */ + + if (pipe->p.head == NULL) + return; + if (!immediate) + pipe->p.head->delay--; + while ( (pkt = pipe->p.head) && pkt->delay < 1) { + /* + * first unlink, then call procedures since ip_input() + * can result in a call to ip_output cnd viceversa, + * thus causing nested calls + */ + pipe->p.head = (struct dn_pkt *) pkt->dn_next ; + + /* + * the trick to avoid flow-id settings here is to prepend a + * vestigial mbuf to the packet, with the following values: + * m_type = MT_DUMMYNET + * m_next = the actual mbuf to be processed by ip_input/output + * m_data = the matching rule + * The vestigial element is the same memory area used by + * the dn_pkt, and IS FREED IN ip_input/ip_output. IT IS + * NOT A REAL MBUF, just a block of memory acquired with malloc(). + */ + switch (pkt->dn_dir) { + case DN_TO_IP_OUT: { + struct rtentry *tmp_rt = pkt->ro.ro_rt ; + + (void)ip_output((struct mbuf *)pkt, (struct mbuf *)pkt->ifp, + &(pkt->ro), pkt->dn_hlen, NULL); + if (tmp_rt) + tmp_rt->rt_refcnt--; /* XXX return a reference count */ + } + break ; + case DN_TO_IP_IN : + ip_input((struct mbuf *)pkt) ; + break ; +#ifdef BRIDGE + case DN_TO_BDG_FWD : + bdg_forward((struct mbuf **)&pkt, pkt->ifp); + break ; +#endif + default: + printf("dummynet: bad switch %d!\n", pkt->dn_dir); + m_freem(pkt->dn_m); + FREE(pkt, M_IPFW); + break ; + } + } +} +/* + * this is the periodic task that moves packets between the R- + * and the P- queue + */ +void +dummynet() +{ + struct dn_pipe *p ; + int s ; + + dn_calls++ ; + for (p = all_pipes ; p ; p = p->next ) { + /* + * Increment the amount of data that can be sent. However, + * don't do that if the channel is idle + * (r.head == NULL && numbytes >= bandwidth). + * This bug fix is from tim shepard (shep@bbn.com) + */ + s = splimp(); + if (p->r.head != NULL || p->numbytes < p->bandwidth ) + p->numbytes += p->bandwidth ; + dn_move(p, 0); /* is it really 0 (also below) ? */ + splx(s); + } + + /* + * finally, if some queue has data, restart the timer. + */ + dn_idle = 1; + dn_restart(); +} + +/* + * dummynet hook for packets. + * input and output use the same code, so i use bit 16 in the pipe + * number to chose the direction: 1 for output packets, 0 for input. + * for input, only m is significant. For output, also the others. + */ +int +dummynet_io(int pipe_nr, int dir, + struct mbuf *m, struct ifnet *ifp, struct route *ro, int hlen, + struct ip_fw_chain *rule) +{ + struct dn_pkt *pkt; + struct dn_pipe *pipe; + struct ip *ip=mtod(m, struct ip *); + + int s=splimp(); + + pipe_nr &= 0xffff ; + /* + * locate pipe. First time is expensive, next have direct access. + */ + + if ( (pipe = rule->rule->pipe_ptr) == NULL ) { + for (pipe=all_pipes; pipe && pipe->pipe_nr !=pipe_nr; pipe=pipe->next) + ; + if (pipe == NULL) { + splx(s); + if (dn_debug) + printf("warning, pkt for no pipe %d\n", pipe_nr); + m_freem(m); + return 0 ; + } else + rule->rule->pipe_ptr = pipe ; + } + + /* + * should i drop ? + * This section implements random packet drop. + */ + if ( (pipe->plr && random() < pipe->plr) || + (pipe->queue_size && pipe->r_len >= pipe->queue_size) || + (pipe->queue_size_bytes && + ip->ip_len + pipe->r_len_bytes > pipe->queue_size_bytes) || + (pkt = (struct dn_pkt *)malloc(sizeof (*pkt), + M_IPFW, M_NOWAIT) ) == NULL ) { + splx(s); + if (dn_debug) + printf("-- dummynet: drop from pipe %d, have %d pks, %d bytes\n", + pipe_nr, pipe->r_len, pipe->r_len_bytes); + pipe->r_drops++ ; + m_freem(m); + return 0 ; /* XXX error */ + } + /* build and enqueue packet */ + pkt->hdr.mh_type = MT_DUMMYNET ; + (struct ip_fw_chain *)pkt->hdr.mh_data = rule ; + pkt->dn_next = NULL; + pkt->dn_m = m; + pkt->dn_dir = dir ; + pkt->delay = 0; + + pkt->ifp = ifp; + if (dir == DN_TO_IP_OUT) { + pkt->ro = *ro; /* XXX copied! */ + if (ro->ro_rt) + ro->ro_rt->rt_refcnt++ ; /* XXX */ + } + pkt->dn_hlen = hlen; + if (pipe->r.head == NULL) + pipe->r.head = pkt; + else + (struct dn_pkt *)pipe->r.tail->dn_next = pkt; + pipe->r.tail = pkt; + pipe->r_len++; + pipe->r_len_bytes += ip->ip_len ; + + /* + * here we could implement RED if we like to + */ + + if (pipe->r.head == pkt) { /* process immediately */ + dn_move(pipe, 1); + } + splx(s); + if (dn_idle) + dn_restart(); + return 0; +} + +/* + * dispose all packets queued on a pipe + */ +static void +purge_pipe(struct dn_pipe *pipe) +{ + struct dn_pkt *pkt, *n ; + struct rtentry *tmp_rt ; + + for (pkt = pipe->r.head ; pkt ; ) { + if (tmp_rt = pkt->ro.ro_rt ) + tmp_rt->rt_refcnt--; /* XXX return a reference count */ + m_freem(pkt->dn_m); + n = pkt ; + pkt = (struct dn_pkt *)pkt->dn_next ; + free(n, M_IPFW) ; + } + for (pkt = pipe->p.head ; pkt ; ) { + if (tmp_rt = pkt->ro.ro_rt ) + tmp_rt->rt_refcnt--; /* XXX return a reference count */ + m_freem(pkt->dn_m); + n = pkt ; + pkt = (struct dn_pkt *)pkt->dn_next ; + free(n, M_IPFW) ; + } +} + +/* + * delete all pipes returning memory + */ +static void +dummynet_flush() +{ + struct dn_pipe *q, *p = all_pipes ; + int s = splnet() ; + + all_pipes = NULL ; + splx(s) ; + /* + * purge all queued pkts and delete all pipes + */ + for ( ; p ; ) { + purge_pipe(p); + q = p ; + p = p->next ; + free(q, M_IPFW); + } +} + +extern struct ip_fw_chain *ip_fw_default_rule ; +/* + * when a firewall rule is deleted, scan all pipes and remove the flow-id + * from packets matching this rule. + */ +void +dn_rule_delete(void *r) +{ + + struct dn_pipe *q, *p = all_pipes ; + + for ( p= all_pipes ; p ; p = p->next ) { + struct dn_pkt *x ; + for (x = p->r.head ; x ; x = (struct dn_pkt *)x->dn_next ) + if (x->hdr.mh_data == r) + x->hdr.mh_data = (void *)ip_fw_default_rule ; + for (x = p->p.head ; x ; x = (struct dn_pkt *)x->dn_next ) + if (x->hdr.mh_data == r) + x->hdr.mh_data = (void *)ip_fw_default_rule ; + } +} + +/* + * handler for the various dummynet socket options + * (get, flush, config, del) + */ +static int +ip_dn_ctl(int optname, struct mbuf **mm) +{ + struct mbuf *m ; + if (optname == IP_DUMMYNET_GET) { + struct dn_pipe *p = all_pipes ; + *mm = m = m_get(M_WAIT, MT_SOOPTS); + m->m_len = 0 ; + m->m_next = NULL ; + for (; p ; p = p->next ) { + struct dn_pipe *q = mtod(m,struct dn_pipe *) ; + memcpy( m->m_data, p, sizeof(*p) ); + /* + * return bw and delay in bits/s and ms, respectively + */ + q->bandwidth *= (8*hz) ; + q->delay = (q->delay * 1000) / hz ; + + m->m_len = sizeof(*p) ; + m->m_next = m_get(M_WAIT, MT_SOOPTS); + m = m->m_next ; + m->m_len = 0 ; + } + return 0 ; + } + if (securelevel > 2) { /* like in the firewall code... */ + if (m) (void)m_free(m); + return (EPERM) ; + } + m = *mm ; + if (optname == IP_DUMMYNET_FLUSH) { + dummynet_flush() ; + if (m) (void)m_free(m); + return 0 ; + } + if (!m) /* need an argument for the following */ + return (EINVAL); + if (optname == IP_DUMMYNET_CONFIGURE) { + struct dn_pipe *p = mtod(m,struct dn_pipe *) ; + struct dn_pipe *x, *a, *b ; + if (m->m_len != sizeof (*p) ) { + printf("dn_pipe Invalid length, %d instead of %d\n", + m->m_len, sizeof(*p) ); + (void)m_free(m); + return (EINVAL); + } + /* + * The config program passes parameters as follows: + * bandwidth = bits/second (0 = no limits); + * must be translated in bytes/tick. + * delay = ms + * must be translated in ticks. + * queue_size = slots (0 = no limit) + * queue_size_bytes = bytes (0 = no limit) + * only one can be set, must be bound-checked + */ + if ( p->bandwidth > 0 ) { + p->bandwidth = p->bandwidth / 8 / hz ; + if (p->bandwidth == 0) /* too little does not make sense! */ + p->bandwidth = 10 ; + } + p->delay = ( p->delay * hz ) / 1000 ; + if (p->queue_size == 0 && p->queue_size_bytes == 0) + p->queue_size = 100 ; + if (p->queue_size != 0 ) /* buffers are prevailing */ + p->queue_size_bytes = 0 ; + if (p->queue_size > 100) + p->queue_size = 100 ; + if (p->queue_size_bytes > 1024*1024) + p->queue_size_bytes = 1024*1024 ; +#if 0 + printf("ip_dn: config pipe %d %d bit/s %d ms %d bufs\n", + p->pipe_nr, + p->bandwidth * 8 * hz , + p->delay * 1000 / hz , p->queue_size); +#endif + for (a = NULL , b = all_pipes ; b && b->pipe_nr < p->pipe_nr ; + a = b , b = b->next) ; + if (b && b->pipe_nr == p->pipe_nr) { + /* XXX should spl and flush old pipe... */ + b->bandwidth = p->bandwidth ; + b->delay = p->delay ; + b->ticks_from_last_insert = p->delay ; + b->queue_size = p->queue_size ; + b->queue_size_bytes = p->queue_size_bytes ; + b->plr = p->plr ; + } else { + int s ; + x = malloc(sizeof(struct dn_pipe), M_IPFW, M_DONTWAIT) ; + if (x == NULL) { + printf("ip_dummynet.c: sorry no memory\n"); + return (ENOSPC) ; + } + bzero(x, sizeof(*x) ); + x->bandwidth = p->bandwidth ; + x->delay = p->delay ; + x->ticks_from_last_insert = p->delay ; + x->pipe_nr = p->pipe_nr ; + x->queue_size = p->queue_size ; + x->queue_size_bytes = p->queue_size_bytes ; + x->plr = p->plr ; + + s = splnet() ; + x->next = b ; + if (a == NULL) + all_pipes = x ; + else + a->next = x ; + splx(s); + } + (void)m_free(m); + return 0 ; + } + if (optname == IP_DUMMYNET_DEL) { + struct dn_pipe *p = mtod(m,struct dn_pipe *) ; + struct dn_pipe *x, *a, *b ; + + for (a = NULL , b = all_pipes ; b && b->pipe_nr < p->pipe_nr ; + a = b , b = b->next) ; + if (b && b->pipe_nr == p->pipe_nr) { /* found pipe */ + int s = splnet() ; + struct ip_fw_chain *chain = ip_fw_chain.lh_first; + + if (a == NULL) + all_pipes = b->next ; + else + a->next = b->next ; + /* + * remove references to this pipe from the ip_fw rules. + */ + for (; chain; chain = chain->chain.le_next) { + register struct ip_fw *const f = chain->rule; + if (f->pipe_ptr == b) + f->pipe_ptr = NULL ; + } + splx(s); + purge_pipe(b); /* remove pkts from here */ + free(b, M_IPFW); + } + } + return 0 ; +} + +void +ip_dn_init(void) +{ + printf("DUMMYNET initialized (980901) -- size dn_pkt %d\n", + sizeof(struct dn_pkt)); + all_pipes = NULL ; + ip_dn_ctl_ptr = ip_dn_ctl; +} + +#ifdef DUMMYNET_MODULE + +#include +#include +#include + +MOD_MISC(dummynet); + +static ip_dn_ctl_t *old_dn_ctl_ptr ; + +static int +dummynet_load(struct lkm_table *lkmtp, int cmd) +{ + int s=splnet(); + old_dn_ctl_ptr = ip_dn_ctl_ptr; + ip_dn_init(); + splx(s); + return 0; +} + +static int +dummynet_unload(struct lkm_table *lkmtp, int cmd) +{ + int s=splnet(); + ip_dn_ctl_ptr = old_dn_ctl_ptr; + splx(s); + dummynet_flush(); + printf("DUMMYNET unloaded\n"); + return 0; +} + +int +dummynet_mod(struct lkm_table *lkmtp, int cmd, int ver) +{ + DISPATCH(lkmtp, cmd, ver, dummynet_load, dummynet_unload, lkm_nullcmd); +} +#endif diff --git a/sys/netinet/ip_dummynet.h b/sys/netinet/ip_dummynet.h new file mode 100644 index 00000000000..33833fcaffb --- /dev/null +++ b/sys/netinet/ip_dummynet.h @@ -0,0 +1,100 @@ +/* + * Copyright (c) 1998 Luigi Rizzo + * + * Redistribution and use in source forms, with and without modification, + * are permitted provided that this entire comment appears intact. + * + * Redistribution in binary form may occur without any restrictions. + * Obviously, it would be nice if you gave credit where credit is due + * but requiring it would be too onerous. + * + * This software is provided ``AS IS'' without any warranties of any kind. + * + * $Id: ip_dummynet.h,v 1.1 1998/05/10 01:30:23 luigi Exp $ + */ + +#ifndef _IP_DUMMYNET_H +#define _IP_DUMMYNET_H + +/* + * Definition of dummynet data structures. + * Dummynet handles a list of pipes, each one identified by a unique + * number (hopefully the list is short so we use a linked list). + * + * Each list contains a set of parameters identifying the pipe, and + * a set of packets queued on the pipe itself. + * + * I could have used queue macros, but the management i have + * is pretty simple and this makes the code more portable. + */ + +/* + * struct dn_pkt identifies a packet in the dummynet queue. The + * first part is really an m_hdr for implementation purposes, and some + * fields are saved there. When passing the packet back to the ip_input/ + * ip_output(), the struct is prepended to the mbuf chain with type + * MT_DUMMYNET, and contains the pointer to the matching rule. + */ +struct dn_pkt { + struct m_hdr hdr ; +#define dn_next hdr.mh_nextpkt /* next element in queue */ +#define dn_m hdr.mh_next /* packet to be forwarded */ +#define dn_hlen hdr.mh_len /* hlen, for ip_output */ +#define dn_dir hdr.mh_flags /* IP_FW_F_IN or IP_FW_F_OUT */ + int delay; /* stays queued until delay=0 */ + struct ifnet *ifp; /* interface, for ip_output */ + struct route ro; /* route, for ip_output. MUST COPY */ + +#ifdef DUMMYNET_DEBUG + struct timeval beg, mid; /* testing only */ + int act_delay; /* testing only */ + int in_delay; /* testing only */ +#endif +}; + +struct dn_queue { + struct dn_pkt *head, *tail; +} ; + +/* + * descriptor of a pipe. The flags field will be used to speed up the + * forwarding code paths, in case some of the parameters are not + * used. + */ +struct dn_pipe { /* a pipe */ + struct dn_pipe *next ; + + u_short pipe_nr ; /* number */ + u_short flags ; /* to speed up things */ +#define DN_HAVE_BW 1 +#define DN_HAVE_QUEUE 2 +#define DN_HAVE_DELAY 4 + int bandwidth; /* really, bytes/tick. */ + int queue_size ; + int queue_size_bytes ; + int delay ; /* really, ticks */ + int plr ; /* pkt loss rate (2^31-1 means 100%) */ + + struct dn_queue r; + int r_len; /* elements in r_queue */ + int r_len_bytes; /* bytes in r_queue */ + int r_drops; /* drops from r_queue */ + struct dn_queue p ; + int ticks_from_last_insert; + long numbytes; /* which can send or receive */ +}; + +/* + * what to do of a packet when it comes out of a pipe + */ +#define DN_TO_IP_OUT 1 +#define DN_TO_IP_IN 2 +#define DN_TO_BDG_FWD 3 +#ifdef KERNEL +void ip_dn_init(void); /* called in ip_input.c */ +void dn_rule_delete(void *r); /* used in ip_fw.c */ +int dummynet_io(int pipe, int dir, + struct mbuf *m, struct ifnet *ifp, struct route *ro, int hlen, + struct ip_fw_chain *rule); +#endif +#endif /* _IP_DUMMYNET_H */ -- 2.45.2