]> CyberLeo.Net >> Repos - FreeBSD/FreeBSD.git/blob - sys/contrib/ipfilter/netinet/ip_sync.c
MFC 57785538c6e0d7e8ca0f161ab95bae10fd304047 and
[FreeBSD/FreeBSD.git] / sys / contrib / ipfilter / netinet / ip_sync.c
1 /*      $FreeBSD$       */
2
3 /*
4  * Copyright (C) 2012 by Darren Reed.
5  *
6  * See the IPFILTER.LICENCE file for details on licencing.
7  */
8 #if defined(KERNEL) || defined(_KERNEL)
9 # undef KERNEL
10 # undef _KERNEL
11 # define        KERNEL  1
12 # define        _KERNEL 1
13 #endif
14 #include <sys/errno.h>
15 #include <sys/types.h>
16 #include <sys/param.h>
17 #include <sys/file.h>
18 #if !defined(_KERNEL) && !defined(__KERNEL__)
19 # include <stdio.h>
20 # include <stdlib.h>
21 # include <string.h>
22 # define _KERNEL
23 # define KERNEL
24 # include <sys/uio.h>
25 # undef _KERNEL
26 # undef KERNEL
27 #else
28 # include <sys/systm.h>
29 # if !defined(__SVR4)
30 #  include <sys/mbuf.h>
31 # endif
32 # include <sys/select.h>
33 # ifdef __FreeBSD__
34 #  include <sys/selinfo.h>
35 # endif
36 #endif
37 #if defined(__NetBSD__) && (__NetBSD_Version__ >= 104000000)
38 # include <sys/proc.h>
39 #endif
40 #if defined(_KERNEL) && defined(__FreeBSD__)
41 # include <sys/filio.h>
42 # include <sys/fcntl.h>
43 #else
44 # include <sys/ioctl.h>
45 #endif
46 #include <sys/time.h>
47 # include <sys/protosw.h>
48 #include <sys/socket.h>
49 #if defined(__SVR4)
50 # include <sys/filio.h>
51 # include <sys/byteorder.h>
52 # ifdef _KERNEL
53 #  include <sys/dditypes.h>
54 # endif
55 # include <sys/stream.h>
56 # include <sys/kmem.h>
57 #endif
58
59 #include <net/if.h>
60 #ifdef sun
61 # include <net/af.h>
62 #endif
63 #include <netinet/in.h>
64 #include <netinet/in_systm.h>
65 #include <netinet/ip.h>
66 #include <netinet/tcp.h>
67 # include <netinet/ip_var.h>
68 # include <netinet/tcp_fsm.h>
69 #include <netinet/udp.h>
70 #include <netinet/ip_icmp.h>
71 #include "netinet/ip_compat.h"
72 #include <netinet/tcpip.h>
73 #include "netinet/ip_fil.h"
74 #include "netinet/ip_nat.h"
75 #include "netinet/ip_frag.h"
76 #include "netinet/ip_state.h"
77 #include "netinet/ip_proxy.h"
78 #include "netinet/ip_sync.h"
79 #ifdef  USE_INET6
80 #include <netinet/icmp6.h>
81 #endif
82 #if defined(__FreeBSD__)
83 # include <sys/malloc.h>
84 # if defined(_KERNEL) && !defined(IPFILTER_LKM)
85 #  include <sys/libkern.h>
86 #  include <sys/systm.h>
87 # endif
88 #endif
89 /* END OF INCLUDES */
90
91 #if !defined(lint)
92 static const char rcsid[] = "@(#)$Id$";
93 #endif
94
95 #define SYNC_STATETABSZ 256
96 #define SYNC_NATTABSZ   256
97
98 typedef struct ipf_sync_softc_s {
99         ipfmutex_t      ipf_syncadd;
100         ipfmutex_t      ipsl_mutex;
101         ipfrwlock_t     ipf_syncstate;
102         ipfrwlock_t     ipf_syncnat;
103 #if SOLARIS && defined(_KERNEL)
104         kcondvar_t      ipslwait;
105 #endif
106         synclist_t      **syncstatetab;
107         synclist_t      **syncnattab;
108         synclogent_t    *synclog;
109         syncupdent_t    *syncupd;
110         u_int           ipf_sync_num;
111         u_int           ipf_sync_wrap;
112         u_int           sl_idx;         /* next available sync log entry */
113         u_int           su_idx;         /* next available sync update entry */
114         u_int           sl_tail;        /* next sync log entry to read */
115         u_int           su_tail;        /* next sync update entry to read */
116         int             ipf_sync_log_sz;
117         int             ipf_sync_nat_tab_sz;
118         int             ipf_sync_state_tab_sz;
119         int             ipf_sync_debug;
120         int             ipf_sync_events;
121         u_32_t          ipf_sync_lastwakeup;
122         int             ipf_sync_wake_interval;
123         int             ipf_sync_event_high_wm;
124         int             ipf_sync_queue_high_wm;
125         int             ipf_sync_inited;
126 } ipf_sync_softc_t;
127
128 static int ipf_sync_flush_table(ipf_sync_softc_t *, int, synclist_t **);
129 static void ipf_sync_wakeup(ipf_main_softc_t *);
130 static void ipf_sync_del(ipf_sync_softc_t *, synclist_t *);
131 static void ipf_sync_poll_wakeup(ipf_main_softc_t *);
132 static int ipf_sync_nat(ipf_main_softc_t *, synchdr_t *, void *);
133 static int ipf_sync_state(ipf_main_softc_t *, synchdr_t *, void *);
134
135 # if !defined(sparc) && !defined(__hppa)
136 void ipf_sync_tcporder(int, struct tcpdata *);
137 void ipf_sync_natorder(int, struct nat *);
138 void ipf_sync_storder(int, struct ipstate *);
139 # endif
140
141
142 void *
143 ipf_sync_soft_create(softc)
144         ipf_main_softc_t *softc;
145 {
146         ipf_sync_softc_t *softs;
147
148         KMALLOC(softs, ipf_sync_softc_t *);
149         if (softs == NULL) {
150                 IPFERROR(110024);
151                 return NULL;
152         }
153
154         bzero((char *)softs, sizeof(*softs));
155
156         softs->ipf_sync_log_sz = SYNCLOG_SZ;
157         softs->ipf_sync_nat_tab_sz = SYNC_STATETABSZ;
158         softs->ipf_sync_state_tab_sz = SYNC_STATETABSZ;
159         softs->ipf_sync_event_high_wm = SYNCLOG_SZ * 100 / 90;  /* 90% */
160         softs->ipf_sync_queue_high_wm = SYNCLOG_SZ * 100 / 90;  /* 90% */
161
162         return softs;
163 }
164
165
166 /* ------------------------------------------------------------------------ */
167 /* Function:    ipf_sync_init                                               */
168 /* Returns:     int - 0 == success, -1 == failure                           */
169 /* Parameters:  Nil                                                         */
170 /*                                                                          */
171 /* Initialise all of the locks required for the sync code and initialise    */
172 /* any data structures, as required.                                        */
173 /* ------------------------------------------------------------------------ */
174 int
175 ipf_sync_soft_init(softc, arg)
176         ipf_main_softc_t *softc;
177         void *arg;
178 {
179         ipf_sync_softc_t *softs = arg;
180
181         KMALLOCS(softs->synclog, synclogent_t *,
182                  softs->ipf_sync_log_sz * sizeof(*softs->synclog));
183         if (softs->synclog == NULL)
184                 return -1;
185         bzero((char *)softs->synclog,
186               softs->ipf_sync_log_sz * sizeof(*softs->synclog));
187
188         KMALLOCS(softs->syncupd, syncupdent_t *,
189                  softs->ipf_sync_log_sz * sizeof(*softs->syncupd));
190         if (softs->syncupd == NULL)
191                 return -2;
192         bzero((char *)softs->syncupd,
193               softs->ipf_sync_log_sz * sizeof(*softs->syncupd));
194
195         KMALLOCS(softs->syncstatetab, synclist_t **,
196                  softs->ipf_sync_state_tab_sz * sizeof(*softs->syncstatetab));
197         if (softs->syncstatetab == NULL)
198                 return -3;
199         bzero((char *)softs->syncstatetab,
200               softs->ipf_sync_state_tab_sz * sizeof(*softs->syncstatetab));
201
202         KMALLOCS(softs->syncnattab, synclist_t **,
203                  softs->ipf_sync_nat_tab_sz * sizeof(*softs->syncnattab));
204         if (softs->syncnattab == NULL)
205                 return -3;
206         bzero((char *)softs->syncnattab,
207               softs->ipf_sync_nat_tab_sz * sizeof(*softs->syncnattab));
208
209         softs->ipf_sync_num = 1;
210         softs->ipf_sync_wrap = 0;
211         softs->sl_idx = 0;
212         softs->su_idx = 0;
213         softs->sl_tail = 0;
214         softs->su_tail = 0;
215         softs->ipf_sync_events = 0;
216         softs->ipf_sync_lastwakeup = 0;
217
218
219 # if SOLARIS && defined(_KERNEL)
220         cv_init(&softs->ipslwait, "ipsl condvar", CV_DRIVER, NULL);
221 # endif
222         RWLOCK_INIT(&softs->ipf_syncstate, "add things to state sync table");
223         RWLOCK_INIT(&softs->ipf_syncnat, "add things to nat sync table");
224         MUTEX_INIT(&softs->ipf_syncadd, "add things to sync table");
225         MUTEX_INIT(&softs->ipsl_mutex, "read ring lock");
226
227         softs->ipf_sync_inited = 1;
228
229         return 0;
230 }
231
232
233 /* ------------------------------------------------------------------------ */
234 /* Function:    ipf_sync_unload                                             */
235 /* Returns:     int - 0 == success, -1 == failure                           */
236 /* Parameters:  Nil                                                         */
237 /*                                                                          */
238 /* Destroy the locks created when initialising and free any memory in use   */
239 /* with the synchronisation tables.                                         */
240 /* ------------------------------------------------------------------------ */
241 int
242 ipf_sync_soft_fini(softc, arg)
243         ipf_main_softc_t *softc;
244         void *arg;
245 {
246         ipf_sync_softc_t *softs = arg;
247
248         if (softs->syncnattab != NULL) {
249                 ipf_sync_flush_table(softs, softs->ipf_sync_nat_tab_sz,
250                                      softs->syncnattab);
251                 KFREES(softs->syncnattab,
252                        softs->ipf_sync_nat_tab_sz * sizeof(*softs->syncnattab));
253                 softs->syncnattab = NULL;
254         }
255
256         if (softs->syncstatetab != NULL) {
257                 ipf_sync_flush_table(softs, softs->ipf_sync_state_tab_sz,
258                                      softs->syncstatetab);
259                 KFREES(softs->syncstatetab,
260                        softs->ipf_sync_state_tab_sz *
261                        sizeof(*softs->syncstatetab));
262                 softs->syncstatetab = NULL;
263         }
264
265         if (softs->syncupd != NULL) {
266                 KFREES(softs->syncupd,
267                        softs->ipf_sync_log_sz * sizeof(*softs->syncupd));
268                 softs->syncupd = NULL;
269         }
270
271         if (softs->synclog != NULL) {
272                 KFREES(softs->synclog,
273                        softs->ipf_sync_log_sz * sizeof(*softs->synclog));
274                 softs->synclog = NULL;
275         }
276
277         if (softs->ipf_sync_inited == 1) {
278                 MUTEX_DESTROY(&softs->ipsl_mutex);
279                 MUTEX_DESTROY(&softs->ipf_syncadd);
280                 RW_DESTROY(&softs->ipf_syncnat);
281                 RW_DESTROY(&softs->ipf_syncstate);
282                 softs->ipf_sync_inited = 0;
283         }
284
285         return 0;
286 }
287
288 void
289 ipf_sync_soft_destroy(softc, arg)
290         ipf_main_softc_t *softc;
291         void *arg;
292 {
293         ipf_sync_softc_t *softs = arg;
294
295         KFREE(softs);
296 }
297
298
299 # if !defined(sparc)
300 /* ------------------------------------------------------------------------ */
301 /* Function:    ipf_sync_tcporder                                           */
302 /* Returns:     Nil                                                         */
303 /* Parameters:  way(I) - direction of byte order conversion.                */
304 /*              td(IO) - pointer to data to be converted.                   */
305 /*                                                                          */
306 /* Do byte swapping on values in the TCP state information structure that   */
307 /* need to be used at both ends by the host in their native byte order.     */
308 /* ------------------------------------------------------------------------ */
309 void
310 ipf_sync_tcporder(way, td)
311         int way;
312         tcpdata_t *td;
313 {
314         if (way) {
315                 td->td_maxwin = htons(td->td_maxwin);
316                 td->td_end = htonl(td->td_end);
317                 td->td_maxend = htonl(td->td_maxend);
318         } else {
319                 td->td_maxwin = ntohs(td->td_maxwin);
320                 td->td_end = ntohl(td->td_end);
321                 td->td_maxend = ntohl(td->td_maxend);
322         }
323 }
324
325
326 /* ------------------------------------------------------------------------ */
327 /* Function:    ipf_sync_natorder                                           */
328 /* Returns:     Nil                                                         */
329 /* Parameters:  way(I)  - direction of byte order conversion.               */
330 /*              nat(IO) - pointer to data to be converted.                  */
331 /*                                                                          */
332 /* Do byte swapping on values in the NAT data structure that need to be     */
333 /* used at both ends by the host in their native byte order.                */
334 /* ------------------------------------------------------------------------ */
335 void
336 ipf_sync_natorder(way, n)
337         int way;
338         nat_t *n;
339 {
340         if (way) {
341                 n->nat_age = htonl(n->nat_age);
342                 n->nat_flags = htonl(n->nat_flags);
343                 n->nat_ipsumd = htonl(n->nat_ipsumd);
344                 n->nat_use = htonl(n->nat_use);
345                 n->nat_dir = htonl(n->nat_dir);
346         } else {
347                 n->nat_age = ntohl(n->nat_age);
348                 n->nat_flags = ntohl(n->nat_flags);
349                 n->nat_ipsumd = ntohl(n->nat_ipsumd);
350                 n->nat_use = ntohl(n->nat_use);
351                 n->nat_dir = ntohl(n->nat_dir);
352         }
353 }
354
355
356 /* ------------------------------------------------------------------------ */
357 /* Function:    ipf_sync_storder                                            */
358 /* Returns:     Nil                                                         */
359 /* Parameters:  way(I)  - direction of byte order conversion.               */
360 /*              ips(IO) - pointer to data to be converted.                  */
361 /*                                                                          */
362 /* Do byte swapping on values in the IP state data structure that need to   */
363 /* be used at both ends by the host in their native byte order.             */
364 /* ------------------------------------------------------------------------ */
365 void
366 ipf_sync_storder(way, ips)
367         int way;
368         ipstate_t *ips;
369 {
370         ipf_sync_tcporder(way, &ips->is_tcp.ts_data[0]);
371         ipf_sync_tcporder(way, &ips->is_tcp.ts_data[1]);
372
373         if (way) {
374                 ips->is_hv = htonl(ips->is_hv);
375                 ips->is_die = htonl(ips->is_die);
376                 ips->is_pass = htonl(ips->is_pass);
377                 ips->is_flags = htonl(ips->is_flags);
378                 ips->is_opt[0] = htonl(ips->is_opt[0]);
379                 ips->is_opt[1] = htonl(ips->is_opt[1]);
380                 ips->is_optmsk[0] = htonl(ips->is_optmsk[0]);
381                 ips->is_optmsk[1] = htonl(ips->is_optmsk[1]);
382                 ips->is_sec = htons(ips->is_sec);
383                 ips->is_secmsk = htons(ips->is_secmsk);
384                 ips->is_auth = htons(ips->is_auth);
385                 ips->is_authmsk = htons(ips->is_authmsk);
386                 ips->is_s0[0] = htonl(ips->is_s0[0]);
387                 ips->is_s0[1] = htonl(ips->is_s0[1]);
388                 ips->is_smsk[0] = htons(ips->is_smsk[0]);
389                 ips->is_smsk[1] = htons(ips->is_smsk[1]);
390         } else {
391                 ips->is_hv = ntohl(ips->is_hv);
392                 ips->is_die = ntohl(ips->is_die);
393                 ips->is_pass = ntohl(ips->is_pass);
394                 ips->is_flags = ntohl(ips->is_flags);
395                 ips->is_opt[0] = ntohl(ips->is_opt[0]);
396                 ips->is_opt[1] = ntohl(ips->is_opt[1]);
397                 ips->is_optmsk[0] = ntohl(ips->is_optmsk[0]);
398                 ips->is_optmsk[1] = ntohl(ips->is_optmsk[1]);
399                 ips->is_sec = ntohs(ips->is_sec);
400                 ips->is_secmsk = ntohs(ips->is_secmsk);
401                 ips->is_auth = ntohs(ips->is_auth);
402                 ips->is_authmsk = ntohs(ips->is_authmsk);
403                 ips->is_s0[0] = ntohl(ips->is_s0[0]);
404                 ips->is_s0[1] = ntohl(ips->is_s0[1]);
405                 ips->is_smsk[0] = ntohl(ips->is_smsk[0]);
406                 ips->is_smsk[1] = ntohl(ips->is_smsk[1]);
407         }
408 }
409 # else /* !defined(sparc) */
410 #  define       ipf_sync_tcporder(x,y)
411 #  define       ipf_sync_natorder(x,y)
412 #  define       ipf_sync_storder(x,y)
413 # endif /* !defined(sparc) */
414
415
416 /* ------------------------------------------------------------------------ */
417 /* Function:    ipf_sync_write                                              */
418 /* Returns:     int    - 0 == success, else error value.                    */
419 /* Parameters:  uio(I) - pointer to information about data to write         */
420 /*                                                                          */
421 /* Moves data from user space into the kernel and uses it for updating data */
422 /* structures in the state/NAT tables.                                      */
423 /* ------------------------------------------------------------------------ */
424 int
425 ipf_sync_write(softc, uio)
426         ipf_main_softc_t *softc;
427         struct uio *uio;
428 {
429         ipf_sync_softc_t *softs = softc->ipf_sync_soft;
430         synchdr_t sh;
431
432         /*
433          * THIS MUST BE SUFFICIENT LARGE TO STORE
434          * ANY POSSIBLE DATA TYPE
435          */
436         char data[2048];
437
438         int err = 0;
439
440 #  if defined(__NetBSD__) || defined(__FreeBSD__)
441         uio->uio_rw = UIO_WRITE;
442 #  endif
443
444         /* Try to get bytes */
445         while (uio->uio_resid > 0) {
446
447                 if (uio->uio_resid >= sizeof(sh)) {
448
449                         err = UIOMOVE(&sh, sizeof(sh), UIO_WRITE, uio);
450
451                         if (err) {
452                                 if (softs->ipf_sync_debug > 2)
453                                         printf("uiomove(header) failed: %d\n",
454                                                 err);
455                                 return err;
456                         }
457
458                         /* convert to host order */
459                         sh.sm_magic = ntohl(sh.sm_magic);
460                         sh.sm_len = ntohl(sh.sm_len);
461                         sh.sm_num = ntohl(sh.sm_num);
462
463                         if (softs->ipf_sync_debug > 8)
464                                 printf("[%d] Read v:%d p:%d cmd:%d table:%d rev:%d len:%d magic:%x\n",
465                                         sh.sm_num, sh.sm_v, sh.sm_p, sh.sm_cmd,
466                                         sh.sm_table, sh.sm_rev, sh.sm_len,
467                                         sh.sm_magic);
468
469                         if (sh.sm_magic != SYNHDRMAGIC) {
470                                 if (softs->ipf_sync_debug > 2)
471                                         printf("uiomove(header) invalid %s\n",
472                                                 "magic");
473                                 IPFERROR(110001);
474                                 return EINVAL;
475                         }
476
477                         if (sh.sm_v != 4 && sh.sm_v != 6) {
478                                 if (softs->ipf_sync_debug > 2)
479                                         printf("uiomove(header) invalid %s\n",
480                                                 "protocol");
481                                 IPFERROR(110002);
482                                 return EINVAL;
483                         }
484
485                         if (sh.sm_cmd > SMC_MAXCMD) {
486                                 if (softs->ipf_sync_debug > 2)
487                                         printf("uiomove(header) invalid %s\n",
488                                                 "command");
489                                 IPFERROR(110003);
490                                 return EINVAL;
491                         }
492
493
494                         if (sh.sm_table > SMC_MAXTBL) {
495                                 if (softs->ipf_sync_debug > 2)
496                                         printf("uiomove(header) invalid %s\n",
497                                                 "table");
498                                 IPFERROR(110004);
499                                 return EINVAL;
500                         }
501
502                 } else {
503                         /* unsufficient data, wait until next call */
504                         if (softs->ipf_sync_debug > 2)
505                                 printf("uiomove(header) insufficient data");
506                         IPFERROR(110005);
507                         return EAGAIN;
508                 }
509
510
511                 /*
512                  * We have a header, so try to read the amount of data
513                  * needed for the request
514                  */
515
516                 /* not supported */
517                 if (sh.sm_len == 0) {
518                         if (softs->ipf_sync_debug > 2)
519                                 printf("uiomove(data zero length %s\n",
520                                         "not supported");
521                         IPFERROR(110006);
522                         return EINVAL;
523                 }
524
525                 if (uio->uio_resid >= sh.sm_len) {
526
527                         err = UIOMOVE(data, sh.sm_len, UIO_WRITE, uio);
528
529                         if (err) {
530                                 if (softs->ipf_sync_debug > 2)
531                                         printf("uiomove(data) failed: %d\n",
532                                                 err);
533                                 return err;
534                         }
535
536                         if (softs->ipf_sync_debug > 7)
537                                 printf("uiomove(data) %d bytes read\n",
538                                         sh.sm_len);
539
540                         if (sh.sm_table == SMC_STATE)
541                                 err = ipf_sync_state(softc, &sh, data);
542                         else if (sh.sm_table == SMC_NAT)
543                                 err = ipf_sync_nat(softc, &sh, data);
544                         if (softs->ipf_sync_debug > 7)
545                                 printf("[%d] Finished with error %d\n",
546                                         sh.sm_num, err);
547
548                 } else {
549                         /* insufficient data, wait until next call */
550                         if (softs->ipf_sync_debug > 2)
551                                 printf("uiomove(data) %s %d bytes, got %d\n",
552                                         "insufficient data, need",
553                                         sh.sm_len, (int)uio->uio_resid);
554                         IPFERROR(110007);
555                         return EAGAIN;
556                 }
557         }
558
559         /* no more data */
560         return 0;
561 }
562
563
564 /* ------------------------------------------------------------------------ */
565 /* Function:    ipf_sync_read                                               */
566 /* Returns:     int    - 0 == success, else error value.                    */
567 /* Parameters:  uio(O) - pointer to information about where to store data   */
568 /*                                                                          */
569 /* This function is called when a user program wants to read some data      */
570 /* for pending state/NAT updates.  If no data is available, the caller is   */
571 /* put to sleep, pending a wakeup from the "lower half" of this code.       */
572 /* ------------------------------------------------------------------------ */
573 int
574 ipf_sync_read(softc, uio)
575         ipf_main_softc_t *softc;
576         struct uio *uio;
577 {
578         ipf_sync_softc_t *softs = softc->ipf_sync_soft;
579         syncupdent_t *su;
580         synclogent_t *sl;
581         int err = 0;
582
583         if ((uio->uio_resid & 3) || (uio->uio_resid < 8)) {
584                 IPFERROR(110008);
585                 return EINVAL;
586         }
587
588 #  if defined(__NetBSD__) || defined(__FreeBSD__)
589         uio->uio_rw = UIO_READ;
590 #  endif
591
592         MUTEX_ENTER(&softs->ipsl_mutex);
593         while ((softs->sl_tail == softs->sl_idx) &&
594                (softs->su_tail == softs->su_idx)) {
595 #  if defined(_KERNEL)
596 #   if SOLARIS
597                 if (!cv_wait_sig(&softs->ipslwait, &softs->ipsl_mutex.ipf_lk)) {
598                         MUTEX_EXIT(&softs->ipsl_mutex);
599                         IPFERROR(110009);
600                         return EINTR;
601                 }
602 #   else
603                 MUTEX_EXIT(&softs->ipsl_mutex);
604                 err = SLEEP(&softs->sl_tail, "ipl sleep");
605                 if (err) {
606                         IPFERROR(110012);
607                         return EINTR;
608                 }
609                 MUTEX_ENTER(&softs->ipsl_mutex);
610 #   endif /* SOLARIS */
611 #  endif /* _KERNEL */
612         }
613
614         while ((softs->sl_tail < softs->sl_idx) &&
615                (uio->uio_resid > sizeof(*sl))) {
616                 sl = softs->synclog + softs->sl_tail++;
617                 MUTEX_EXIT(&softs->ipsl_mutex);
618                 err = UIOMOVE(sl, sizeof(*sl), UIO_READ, uio);
619                 if (err != 0)
620                         goto goterror;
621                 MUTEX_ENTER(&softs->ipsl_mutex);
622         }
623
624         while ((softs->su_tail < softs->su_idx) &&
625                (uio->uio_resid > sizeof(*su))) {
626                 su = softs->syncupd + softs->su_tail;
627                 softs->su_tail++;
628                 MUTEX_EXIT(&softs->ipsl_mutex);
629                 err = UIOMOVE(su, sizeof(*su), UIO_READ, uio);
630                 if (err != 0)
631                         goto goterror;
632                 MUTEX_ENTER(&softs->ipsl_mutex);
633                 if (su->sup_hdr.sm_sl != NULL)
634                         su->sup_hdr.sm_sl->sl_idx = -1;
635         }
636         if (softs->sl_tail == softs->sl_idx)
637                 softs->sl_tail = softs->sl_idx = 0;
638         if (softs->su_tail == softs->su_idx)
639                 softs->su_tail = softs->su_idx = 0;
640         MUTEX_EXIT(&softs->ipsl_mutex);
641 goterror:
642         return err;
643 }
644
645
646 /* ------------------------------------------------------------------------ */
647 /* Function:    ipf_sync_state                                              */
648 /* Returns:     int    - 0 == success, else error value.                    */
649 /* Parameters:  sp(I)  - pointer to sync packet data header                 */
650 /*              uio(I) - pointer to user data for further information       */
651 /*                                                                          */
652 /* Updates the state table according to information passed in the sync      */
653 /* header.  As required, more data is fetched from the uio structure but    */
654 /* varies depending on the contents of the sync header.  This function can  */
655 /* create a new state entry or update one.  Deletion is left to the state   */
656 /* structures being timed out correctly.                                    */
657 /* ------------------------------------------------------------------------ */
658 static int
659 ipf_sync_state(softc, sp, data)
660         ipf_main_softc_t *softc;
661         synchdr_t *sp;
662         void *data;
663 {
664         ipf_sync_softc_t *softs = softc->ipf_sync_soft;
665         synctcp_update_t su;
666         ipstate_t *is, sn;
667         synclist_t *sl;
668         frentry_t *fr;
669         u_int hv;
670         int err = 0;
671
672         hv = sp->sm_num & (softs->ipf_sync_state_tab_sz - 1);
673
674         switch (sp->sm_cmd)
675         {
676         case SMC_CREATE :
677
678                 bcopy(data, &sn, sizeof(sn));
679                 KMALLOC(is, ipstate_t *);
680                 if (is == NULL) {
681                         IPFERROR(110013);
682                         err = ENOMEM;
683                         break;
684                 }
685
686                 KMALLOC(sl, synclist_t *);
687                 if (sl == NULL) {
688                         IPFERROR(110014);
689                         err = ENOMEM;
690                         KFREE(is);
691                         break;
692                 }
693
694                 bzero((char *)is, offsetof(ipstate_t, is_die));
695                 bcopy((char *)&sn.is_die, (char *)&is->is_die,
696                       sizeof(*is) - offsetof(ipstate_t, is_die));
697                 ipf_sync_storder(0, is);
698
699                 /*
700                  * We need to find the same rule on the slave as was used on
701                  * the master to create this state entry.
702                  */
703                 READ_ENTER(&softc->ipf_mutex);
704                 fr = ipf_getrulen(softc, IPL_LOGIPF, sn.is_group, sn.is_rulen);
705                 if (fr != NULL) {
706                         MUTEX_ENTER(&fr->fr_lock);
707                         fr->fr_ref++;
708                         fr->fr_statecnt++;
709                         MUTEX_EXIT(&fr->fr_lock);
710                 }
711                 RWLOCK_EXIT(&softc->ipf_mutex);
712
713                 if (softs->ipf_sync_debug > 4)
714                         printf("[%d] Filter rules = %p\n", sp->sm_num, fr);
715
716                 is->is_rule = fr;
717                 is->is_sync = sl;
718
719                 sl->sl_idx = -1;
720                 sl->sl_ips = is;
721                 bcopy(sp, &sl->sl_hdr, sizeof(struct synchdr));
722
723                 WRITE_ENTER(&softs->ipf_syncstate);
724                 WRITE_ENTER(&softc->ipf_state);
725
726                 sl->sl_pnext = softs->syncstatetab + hv;
727                 sl->sl_next = softs->syncstatetab[hv];
728                 if (softs->syncstatetab[hv] != NULL)
729                         softs->syncstatetab[hv]->sl_pnext = &sl->sl_next;
730                 softs->syncstatetab[hv] = sl;
731                 MUTEX_DOWNGRADE(&softs->ipf_syncstate);
732                 ipf_state_insert(softc, is, sp->sm_rev);
733                 /*
734                  * Do not initialise the interface pointers for the state
735                  * entry as the full complement of interface names may not
736                  * be present.
737                  *
738                  * Put this state entry on its timeout queue.
739                  */
740                 /*fr_setstatequeue(is, sp->sm_rev);*/
741                 break;
742
743         case SMC_UPDATE :
744                 bcopy(data, &su, sizeof(su));
745
746                 if (softs->ipf_sync_debug > 4)
747                         printf("[%d] Update age %lu state %d/%d \n",
748                                 sp->sm_num, su.stu_age, su.stu_state[0],
749                                 su.stu_state[1]);
750
751                 READ_ENTER(&softs->ipf_syncstate);
752                 for (sl = softs->syncstatetab[hv]; (sl != NULL);
753                      sl = sl->sl_next)
754                         if (sl->sl_hdr.sm_num == sp->sm_num)
755                                 break;
756                 if (sl == NULL) {
757                         if (softs->ipf_sync_debug > 1)
758                                 printf("[%d] State not found - can't update\n",
759                                         sp->sm_num);
760                         RWLOCK_EXIT(&softs->ipf_syncstate);
761                         IPFERROR(110015);
762                         err = ENOENT;
763                         break;
764                 }
765
766                 READ_ENTER(&softc->ipf_state);
767
768                 if (softs->ipf_sync_debug > 6)
769                         printf("[%d] Data from state v:%d p:%d cmd:%d table:%d rev:%d\n",
770                                 sp->sm_num, sl->sl_hdr.sm_v, sl->sl_hdr.sm_p,
771                                 sl->sl_hdr.sm_cmd, sl->sl_hdr.sm_table,
772                                 sl->sl_hdr.sm_rev);
773
774                 is = sl->sl_ips;
775
776                 MUTEX_ENTER(&is->is_lock);
777                 switch (sp->sm_p)
778                 {
779                 case IPPROTO_TCP :
780                         /* XXX FV --- shouldn't we do ntohl/htonl???? XXX */
781                         is->is_send = su.stu_data[0].td_end;
782                         is->is_maxsend = su.stu_data[0].td_maxend;
783                         is->is_maxswin = su.stu_data[0].td_maxwin;
784                         is->is_state[0] = su.stu_state[0];
785                         is->is_dend = su.stu_data[1].td_end;
786                         is->is_maxdend = su.stu_data[1].td_maxend;
787                         is->is_maxdwin = su.stu_data[1].td_maxwin;
788                         is->is_state[1] = su.stu_state[1];
789                         break;
790                 default :
791                         break;
792                 }
793
794                 if (softs->ipf_sync_debug > 6)
795                         printf("[%d] Setting timers for state\n", sp->sm_num);
796
797                 ipf_state_setqueue(softc, is, sp->sm_rev);
798
799                 MUTEX_EXIT(&is->is_lock);
800                 break;
801
802         default :
803                 IPFERROR(110016);
804                 err = EINVAL;
805                 break;
806         }
807
808         if (err == 0) {
809                 RWLOCK_EXIT(&softc->ipf_state);
810                 RWLOCK_EXIT(&softs->ipf_syncstate);
811         }
812
813         if (softs->ipf_sync_debug > 6)
814                 printf("[%d] Update completed with error %d\n",
815                         sp->sm_num, err);
816
817         return err;
818 }
819
820
821 /* ------------------------------------------------------------------------ */
822 /* Function:    ipf_sync_del                                                */
823 /* Returns:     Nil                                                         */
824 /* Parameters:  sl(I) - pointer to synclist object to delete                */
825 /*                                                                          */
826 /* Deletes an object from the synclist.                                     */
827 /* ------------------------------------------------------------------------ */
828 static void
829 ipf_sync_del(softs, sl)
830         ipf_sync_softc_t *softs;
831         synclist_t *sl;
832 {
833         *sl->sl_pnext = sl->sl_next;
834         if (sl->sl_next != NULL)
835                 sl->sl_next->sl_pnext = sl->sl_pnext;
836         if (sl->sl_idx != -1)
837                 softs->syncupd[sl->sl_idx].sup_hdr.sm_sl = NULL;
838 }
839
840
841 /* ------------------------------------------------------------------------ */
842 /* Function:    ipf_sync_del_state                                          */
843 /* Returns:     Nil                                                         */
844 /* Parameters:  sl(I) - pointer to synclist object to delete                */
845 /*                                                                          */
846 /* Deletes an object from the synclist state table and free's its memory.   */
847 /* ------------------------------------------------------------------------ */
848 void
849 ipf_sync_del_state(arg, sl)
850         void *arg;
851         synclist_t *sl;
852 {
853         ipf_sync_softc_t *softs = arg;
854
855         WRITE_ENTER(&softs->ipf_syncstate);
856         ipf_sync_del(softs, sl);
857         RWLOCK_EXIT(&softs->ipf_syncstate);
858         KFREE(sl);
859 }
860
861
862 /* ------------------------------------------------------------------------ */
863 /* Function:    ipf_sync_del_nat                                            */
864 /* Returns:     Nil                                                         */
865 /* Parameters:  sl(I) - pointer to synclist object to delete                */
866 /*                                                                          */
867 /* Deletes an object from the synclist nat table and free's its memory.     */
868 /* ------------------------------------------------------------------------ */
869 void
870 ipf_sync_del_nat(arg, sl)
871         void *arg;
872         synclist_t *sl;
873 {
874         ipf_sync_softc_t *softs = arg;
875
876         WRITE_ENTER(&softs->ipf_syncnat);
877         ipf_sync_del(softs, sl);
878         RWLOCK_EXIT(&softs->ipf_syncnat);
879         KFREE(sl);
880 }
881
882
883 /* ------------------------------------------------------------------------ */
884 /* Function:    ipf_sync_nat                                                */
885 /* Returns:     int    - 0 == success, else error value.                    */
886 /* Parameters:  sp(I)  - pointer to sync packet data header                 */
887 /*              uio(I) - pointer to user data for further information       */
888 /*                                                                          */
889 /* Updates the NAT  table according to information passed in the sync       */
890 /* header.  As required, more data is fetched from the uio structure but    */
891 /* varies depending on the contents of the sync header.  This function can  */
892 /* create a new NAT entry or update one.  Deletion is left to the NAT       */
893 /* structures being timed out correctly.                                    */
894 /* ------------------------------------------------------------------------ */
895 static int
896 ipf_sync_nat(softc, sp, data)
897         ipf_main_softc_t *softc;
898         synchdr_t *sp;
899         void *data;
900 {
901         ipf_sync_softc_t *softs = softc->ipf_sync_soft;
902         syncupdent_t su;
903         nat_t *n, *nat;
904         synclist_t *sl;
905         u_int hv = 0;
906         int err = 0;
907
908         READ_ENTER(&softs->ipf_syncnat);
909
910         switch (sp->sm_cmd)
911         {
912         case SMC_CREATE :
913                 KMALLOC(n, nat_t *);
914                 if (n == NULL) {
915                         IPFERROR(110017);
916                         err = ENOMEM;
917                         break;
918                 }
919
920                 KMALLOC(sl, synclist_t *);
921                 if (sl == NULL) {
922                         IPFERROR(110018);
923                         err = ENOMEM;
924                         KFREE(n);
925                         break;
926                 }
927
928                 nat = (nat_t *)data;
929                 bzero((char *)n, offsetof(nat_t, nat_age));
930                 bcopy((char *)&nat->nat_age, (char *)&n->nat_age,
931                       sizeof(*n) - offsetof(nat_t, nat_age));
932                 ipf_sync_natorder(0, n);
933                 n->nat_sync = sl;
934                 n->nat_rev = sl->sl_rev;
935
936                 sl->sl_idx = -1;
937                 sl->sl_ipn = n;
938                 sl->sl_num = ntohl(sp->sm_num);
939
940                 WRITE_ENTER(&softc->ipf_nat);
941                 sl->sl_pnext = softs->syncnattab + hv;
942                 sl->sl_next = softs->syncnattab[hv];
943                 if (softs->syncnattab[hv] != NULL)
944                         softs->syncnattab[hv]->sl_pnext = &sl->sl_next;
945                 softs->syncnattab[hv] = sl;
946                 (void) ipf_nat_insert(softc, softc->ipf_nat_soft, n);
947                 RWLOCK_EXIT(&softc->ipf_nat);
948                 break;
949
950         case SMC_UPDATE :
951                 bcopy(data, &su, sizeof(su));
952
953                 for (sl = softs->syncnattab[hv]; (sl != NULL);
954                      sl = sl->sl_next)
955                         if (sl->sl_hdr.sm_num == sp->sm_num)
956                                 break;
957                 if (sl == NULL) {
958                         IPFERROR(110019);
959                         err = ENOENT;
960                         break;
961                 }
962
963                 READ_ENTER(&softc->ipf_nat);
964
965                 nat = sl->sl_ipn;
966                 nat->nat_rev = sl->sl_rev;
967
968                 MUTEX_ENTER(&nat->nat_lock);
969                 ipf_nat_setqueue(softc, softc->ipf_nat_soft, nat);
970                 MUTEX_EXIT(&nat->nat_lock);
971
972                 RWLOCK_EXIT(&softc->ipf_nat);
973
974                 break;
975
976         default :
977                 IPFERROR(110020);
978                 err = EINVAL;
979                 break;
980         }
981
982         RWLOCK_EXIT(&softs->ipf_syncnat);
983         return err;
984 }
985
986
987 /* ------------------------------------------------------------------------ */
988 /* Function:    ipf_sync_new                                                */
989 /* Returns:     synclist_t* - NULL == failure, else pointer to new synclist */
990 /*                            data structure.                               */
991 /* Parameters:  tab(I) - type of synclist_t to create                       */
992 /*              fin(I) - pointer to packet information                      */
993 /*              ptr(I) - pointer to owning object                           */
994 /*                                                                          */
995 /* Creates a new sync table entry and notifies any sleepers that it's there */
996 /* waiting to be processed.                                                 */
997 /* ------------------------------------------------------------------------ */
998 synclist_t *
999 ipf_sync_new(softc, tab, fin, ptr)
1000         ipf_main_softc_t *softc;
1001         int tab;
1002         fr_info_t *fin;
1003         void *ptr;
1004 {
1005         ipf_sync_softc_t *softs = softc->ipf_sync_soft;
1006         synclist_t *sl, *ss;
1007         synclogent_t *sle;
1008         u_int hv, sz;
1009
1010         if (softs->sl_idx == softs->ipf_sync_log_sz)
1011                 return NULL;
1012         KMALLOC(sl, synclist_t *);
1013         if (sl == NULL)
1014                 return NULL;
1015
1016         MUTEX_ENTER(&softs->ipf_syncadd);
1017         /*
1018          * Get a unique number for this synclist_t.  The number is only meant
1019          * to be unique for the lifetime of the structure and may be reused
1020          * later.
1021          */
1022         softs->ipf_sync_num++;
1023         if (softs->ipf_sync_num == 0) {
1024                 softs->ipf_sync_num = 1;
1025                 softs->ipf_sync_wrap++;
1026         }
1027
1028         /*
1029          * Use the synch number of the object as the hash key.  Should end up
1030          * with relatively even distribution over time.
1031          * XXX - an attacker could lunch an DoS attack, of sorts, if they are
1032          * the only one causing new table entries by only keeping open every
1033          * nth connection they make, where n is a value in the interval
1034          * [0, SYNC_STATETABSZ-1].
1035          */
1036         switch (tab)
1037         {
1038         case SMC_STATE :
1039                 hv = softs->ipf_sync_num & (softs->ipf_sync_state_tab_sz - 1);
1040                 while (softs->ipf_sync_wrap != 0) {
1041                         for (ss = softs->syncstatetab[hv]; ss; ss = ss->sl_next)
1042                                 if (ss->sl_hdr.sm_num == softs->ipf_sync_num)
1043                                         break;
1044                         if (ss == NULL)
1045                                 break;
1046                         softs->ipf_sync_num++;
1047                         hv = softs->ipf_sync_num &
1048                              (softs->ipf_sync_state_tab_sz - 1);
1049                 }
1050                 sl->sl_pnext = softs->syncstatetab + hv;
1051                 sl->sl_next = softs->syncstatetab[hv];
1052                 softs->syncstatetab[hv] = sl;
1053                 break;
1054
1055         case SMC_NAT :
1056                 hv = softs->ipf_sync_num & (softs->ipf_sync_nat_tab_sz - 1);
1057                 while (softs->ipf_sync_wrap != 0) {
1058                         for (ss = softs->syncnattab[hv]; ss; ss = ss->sl_next)
1059                                 if (ss->sl_hdr.sm_num == softs->ipf_sync_num)
1060                                         break;
1061                         if (ss == NULL)
1062                                 break;
1063                         softs->ipf_sync_num++;
1064                         hv = softs->ipf_sync_num &
1065                              (softs->ipf_sync_nat_tab_sz - 1);
1066                 }
1067                 sl->sl_pnext = softs->syncnattab + hv;
1068                 sl->sl_next = softs->syncnattab[hv];
1069                 softs->syncnattab[hv] = sl;
1070                 break;
1071
1072         default :
1073                 break;
1074         }
1075
1076         sl->sl_num = softs->ipf_sync_num;
1077         MUTEX_EXIT(&softs->ipf_syncadd);
1078
1079         sl->sl_magic = htonl(SYNHDRMAGIC);
1080         sl->sl_v = fin->fin_v;
1081         sl->sl_p = fin->fin_p;
1082         sl->sl_cmd = SMC_CREATE;
1083         sl->sl_idx = -1;
1084         sl->sl_table = tab;
1085         sl->sl_rev = fin->fin_rev;
1086         if (tab == SMC_STATE) {
1087                 sl->sl_ips = ptr;
1088                 sz = sizeof(*sl->sl_ips);
1089         } else if (tab == SMC_NAT) {
1090                 sl->sl_ipn = ptr;
1091                 sz = sizeof(*sl->sl_ipn);
1092         } else {
1093                 ptr = NULL;
1094                 sz = 0;
1095         }
1096         sl->sl_len = sz;
1097
1098         /*
1099          * Create the log entry to be read by a user daemon.  When it has been
1100          * finished and put on the queue, send a signal to wakeup any waiters.
1101          */
1102         MUTEX_ENTER(&softs->ipf_syncadd);
1103         sle = softs->synclog + softs->sl_idx++;
1104         bcopy((char *)&sl->sl_hdr, (char *)&sle->sle_hdr,
1105               sizeof(sle->sle_hdr));
1106         sle->sle_hdr.sm_num = htonl(sle->sle_hdr.sm_num);
1107         sle->sle_hdr.sm_len = htonl(sle->sle_hdr.sm_len);
1108         if (ptr != NULL) {
1109                 bcopy((char *)ptr, (char *)&sle->sle_un, sz);
1110                 if (tab == SMC_STATE) {
1111                         ipf_sync_storder(1, &sle->sle_un.sleu_ips);
1112                 } else if (tab == SMC_NAT) {
1113                         ipf_sync_natorder(1, &sle->sle_un.sleu_ipn);
1114                 }
1115         }
1116         MUTEX_EXIT(&softs->ipf_syncadd);
1117
1118         ipf_sync_wakeup(softc);
1119         return sl;
1120 }
1121
1122
1123 /* ------------------------------------------------------------------------ */
1124 /* Function:    ipf_sync_update                                             */
1125 /* Returns:     Nil                                                         */
1126 /* Parameters:  tab(I) - type of synclist_t to create                       */
1127 /*              fin(I) - pointer to packet information                      */
1128 /*              sl(I)  - pointer to synchronisation object                  */
1129 /*                                                                          */
1130 /* For outbound packets, only, create an sync update record for the user    */
1131 /* process to read.                                                         */
1132 /* ------------------------------------------------------------------------ */
1133 void
1134 ipf_sync_update(softc, tab, fin, sl)
1135         ipf_main_softc_t *softc;
1136         int tab;
1137         fr_info_t *fin;
1138         synclist_t *sl;
1139 {
1140         ipf_sync_softc_t *softs = softc->ipf_sync_soft;
1141         synctcp_update_t *st;
1142         syncupdent_t *slu;
1143         ipstate_t *ips;
1144         nat_t *nat;
1145         ipfrwlock_t *lock;
1146
1147         if (fin->fin_out == 0 || sl == NULL)
1148                 return;
1149
1150         if (tab == SMC_STATE) {
1151                 lock = &softs->ipf_syncstate;
1152         } else {
1153                 lock = &softs->ipf_syncnat;
1154         }
1155
1156         READ_ENTER(lock);
1157         if (sl->sl_idx == -1) {
1158                 MUTEX_ENTER(&softs->ipf_syncadd);
1159                 slu = softs->syncupd + softs->su_idx;
1160                 sl->sl_idx = softs->su_idx++;
1161                 MUTEX_EXIT(&softs->ipf_syncadd);
1162
1163                 bcopy((char *)&sl->sl_hdr, (char *)&slu->sup_hdr,
1164                       sizeof(slu->sup_hdr));
1165                 slu->sup_hdr.sm_magic = htonl(SYNHDRMAGIC);
1166                 slu->sup_hdr.sm_sl = sl;
1167                 slu->sup_hdr.sm_cmd = SMC_UPDATE;
1168                 slu->sup_hdr.sm_table = tab;
1169                 slu->sup_hdr.sm_num = htonl(sl->sl_num);
1170                 slu->sup_hdr.sm_len = htonl(sizeof(struct synctcp_update));
1171                 slu->sup_hdr.sm_rev = fin->fin_rev;
1172 # if 0
1173                 if (fin->fin_p == IPPROTO_TCP) {
1174                         st->stu_len[0] = 0;
1175                         st->stu_len[1] = 0;
1176                 }
1177 # endif
1178         } else
1179                 slu = softs->syncupd + sl->sl_idx;
1180
1181         /*
1182          * Only TCP has complex timeouts, others just use default timeouts.
1183          * For TCP, we only need to track the connection state and window.
1184          */
1185         if (fin->fin_p == IPPROTO_TCP) {
1186                 st = &slu->sup_tcp;
1187                 if (tab == SMC_STATE) {
1188                         ips = sl->sl_ips;
1189                         st->stu_age = htonl(ips->is_die);
1190                         st->stu_data[0].td_end = ips->is_send;
1191                         st->stu_data[0].td_maxend = ips->is_maxsend;
1192                         st->stu_data[0].td_maxwin = ips->is_maxswin;
1193                         st->stu_state[0] = ips->is_state[0];
1194                         st->stu_data[1].td_end = ips->is_dend;
1195                         st->stu_data[1].td_maxend = ips->is_maxdend;
1196                         st->stu_data[1].td_maxwin = ips->is_maxdwin;
1197                         st->stu_state[1] = ips->is_state[1];
1198                 } else if (tab == SMC_NAT) {
1199                         nat = sl->sl_ipn;
1200                         st->stu_age = htonl(nat->nat_age);
1201                 }
1202         }
1203         RWLOCK_EXIT(lock);
1204
1205         ipf_sync_wakeup(softc);
1206 }
1207
1208
1209 /* ------------------------------------------------------------------------ */
1210 /* Function:    ipf_sync_flush_table                                        */
1211 /* Returns:     int - number of entries freed by flushing table             */
1212 /* Parameters:  tabsize(I) - size of the array pointed to by table          */
1213 /*              table(I)   - pointer to sync table to empty                 */
1214 /*                                                                          */
1215 /* Walk through a table of sync entries and free each one.  It is assumed   */
1216 /* that some lock is held so that nobody else tries to access the table     */
1217 /* during this cleanup.                                                     */
1218 /* ------------------------------------------------------------------------ */
1219 static int
1220 ipf_sync_flush_table(softs, tabsize, table)
1221         ipf_sync_softc_t *softs;
1222         int tabsize;
1223         synclist_t **table;
1224 {
1225         synclist_t *sl;
1226         int i, items;
1227
1228         items = 0;
1229
1230         for (i = 0; i < tabsize; i++) {
1231                 while ((sl = table[i]) != NULL) {
1232                         switch (sl->sl_table) {
1233                         case SMC_STATE :
1234                                 if (sl->sl_ips != NULL)
1235                                         sl->sl_ips->is_sync = NULL;
1236                                 break;
1237                         case SMC_NAT :
1238                                 if (sl->sl_ipn != NULL)
1239                                         sl->sl_ipn->nat_sync = NULL;
1240                                 break;
1241                         }
1242                         if (sl->sl_next != NULL)
1243                                 sl->sl_next->sl_pnext = sl->sl_pnext;
1244                         table[i] = sl->sl_next;
1245                         if (sl->sl_idx != -1)
1246                                 softs->syncupd[sl->sl_idx].sup_hdr.sm_sl = NULL;
1247                         KFREE(sl);
1248                         items++;
1249                 }
1250         }
1251
1252         return items;
1253 }
1254
1255
1256 /* ------------------------------------------------------------------------ */
1257 /* Function:    ipf_sync_ioctl                                              */
1258 /* Returns:     int - 0 == success, != 0 == failure                         */
1259 /* Parameters:  data(I) - pointer to ioctl data                             */
1260 /*              cmd(I)  - ioctl command integer                             */
1261 /*              mode(I) - file mode bits used with open                     */
1262 /*                                                                          */
1263 /* This function currently does not handle any ioctls and so just returns   */
1264 /* EINVAL on all occasions.                                                 */
1265 /* ------------------------------------------------------------------------ */
1266 int
1267 ipf_sync_ioctl(softc, data, cmd, mode, uid, ctx)
1268         ipf_main_softc_t *softc;
1269         caddr_t data;
1270         ioctlcmd_t cmd;
1271         int mode, uid;
1272         void *ctx;
1273 {
1274         ipf_sync_softc_t *softs = softc->ipf_sync_soft;
1275         int error, i;
1276         SPL_INT(s);
1277
1278         switch (cmd)
1279         {
1280         case SIOCIPFFL:
1281                 error = BCOPYIN(data, &i, sizeof(i));
1282                 if (error != 0) {
1283                         IPFERROR(110023);
1284                         error = EFAULT;
1285                         break;
1286                 }
1287
1288                 switch (i)
1289                 {
1290                 case SMC_RLOG :
1291                         SPL_NET(s);
1292                         MUTEX_ENTER(&softs->ipsl_mutex);
1293                         i = (softs->sl_tail - softs->sl_idx) +
1294                             (softs->su_tail - softs->su_idx);
1295                         softs->sl_idx = 0;
1296                         softs->su_idx = 0;
1297                         softs->sl_tail = 0;
1298                         softs->su_tail = 0;
1299                         MUTEX_EXIT(&softs->ipsl_mutex);
1300                         SPL_X(s);
1301                         break;
1302
1303                 case SMC_NAT :
1304                         SPL_NET(s);
1305                         WRITE_ENTER(&softs->ipf_syncnat);
1306                         i = ipf_sync_flush_table(softs, SYNC_NATTABSZ,
1307                                                  softs->syncnattab);
1308                         RWLOCK_EXIT(&softs->ipf_syncnat);
1309                         SPL_X(s);
1310                         break;
1311
1312                 case SMC_STATE :
1313                         SPL_NET(s);
1314                         WRITE_ENTER(&softs->ipf_syncstate);
1315                         i = ipf_sync_flush_table(softs, SYNC_STATETABSZ,
1316                                                  softs->syncstatetab);
1317                         RWLOCK_EXIT(&softs->ipf_syncstate);
1318                         SPL_X(s);
1319                         break;
1320                 }
1321
1322                 error = BCOPYOUT(&i, data, sizeof(i));
1323                 if (error != 0) {
1324                         IPFERROR(110022);
1325                         error = EFAULT;
1326                 }
1327                 break;
1328
1329         default :
1330                 IPFERROR(110021);
1331                 error = EINVAL;
1332                 break;
1333         }
1334
1335         return error;
1336 }
1337
1338
1339 /* ------------------------------------------------------------------------ */
1340 /* Function:    ipf_sync_canread                                            */
1341 /* Returns:     int - 0 == success, != 0 == failure                         */
1342 /* Parameters:  Nil                                                         */
1343 /*                                                                          */
1344 /* This function provides input to the poll handler about whether or not    */
1345 /* there is data waiting to be read from the /dev/ipsync device.            */
1346 /* ------------------------------------------------------------------------ */
1347 int
1348 ipf_sync_canread(arg)
1349         void *arg;
1350 {
1351         ipf_sync_softc_t *softs = arg;
1352         return !((softs->sl_tail == softs->sl_idx) &&
1353                  (softs->su_tail == softs->su_idx));
1354 }
1355
1356
1357 /* ------------------------------------------------------------------------ */
1358 /* Function:    ipf_sync_canwrite                                           */
1359 /* Returns:     int - 1 == can always write                                 */
1360 /* Parameters:  Nil                                                         */
1361 /*                                                                          */
1362 /* This function lets the poll handler know that it is always ready willing */
1363 /* to accept write events.                                                  */
1364 /* XXX Maybe this should return false if the sync table is full?            */
1365 /* ------------------------------------------------------------------------ */
1366 int
1367 ipf_sync_canwrite(arg)
1368         void *arg;
1369 {
1370         return 1;
1371 }
1372
1373
1374 /* ------------------------------------------------------------------------ */
1375 /* Function:    ipf_sync_wakeup                                             */
1376 /* Parameters:  Nil                                                         */
1377 /* Returns:     Nil                                                         */
1378 /*                                                                          */
1379 /* This function implements the heuristics that decide how often to         */
1380 /* generate a poll wakeup for programs that are waiting for information     */
1381 /* about when they can do a read on /dev/ipsync.                            */
1382 /*                                                                          */
1383 /* There are three different considerations here:                           */
1384 /* - do not keep a program waiting too long: ipf_sync_wake_interval is the  */
1385 /*   maximum number of ipf ticks to let pass by;                            */
1386 /* - do not let the queue of ouststanding things to generate notifies for   */
1387 /*   get too full (ipf_sync_queue_high_wm is the high water mark);          */
1388 /* - do not let too many events get collapsed in before deciding that the   */
1389 /*   other host(s) need an update (ipf_sync_event_high_wm is the high water */
1390 /*   mark for this counter.)                                                */
1391 /* ------------------------------------------------------------------------ */
1392 static void
1393 ipf_sync_wakeup(softc)
1394         ipf_main_softc_t *softc;
1395 {
1396         ipf_sync_softc_t *softs = softc->ipf_sync_soft;
1397
1398         softs->ipf_sync_events++;
1399         if ((softc->ipf_ticks >
1400             softs->ipf_sync_lastwakeup + softs->ipf_sync_wake_interval) ||
1401             (softs->ipf_sync_events > softs->ipf_sync_event_high_wm) ||
1402             ((softs->sl_tail - softs->sl_idx) >
1403              softs->ipf_sync_queue_high_wm) ||
1404             ((softs->su_tail - softs->su_idx) >
1405              softs->ipf_sync_queue_high_wm)) {
1406
1407                 ipf_sync_poll_wakeup(softc);
1408         }
1409 }
1410
1411
1412 /* ------------------------------------------------------------------------ */
1413 /* Function:    ipf_sync_poll_wakeup                                        */
1414 /* Parameters:  Nil                                                         */
1415 /* Returns:     Nil                                                         */
1416 /*                                                                          */
1417 /* Deliver a poll wakeup and reset counters for two of the three heuristics */
1418 /* ------------------------------------------------------------------------ */
1419 static void
1420 ipf_sync_poll_wakeup(softc)
1421         ipf_main_softc_t *softc;
1422 {
1423         ipf_sync_softc_t *softs = softc->ipf_sync_soft;
1424
1425         softs->ipf_sync_events = 0;
1426         softs->ipf_sync_lastwakeup = softc->ipf_ticks;
1427
1428 # ifdef _KERNEL
1429 #  if SOLARIS
1430         MUTEX_ENTER(&softs->ipsl_mutex);
1431         cv_signal(&softs->ipslwait);
1432         MUTEX_EXIT(&softs->ipsl_mutex);
1433         pollwakeup(&softc->ipf_poll_head[IPL_LOGSYNC], POLLIN|POLLRDNORM);
1434 #  else
1435         WAKEUP(&softs->sl_tail, 0);
1436         POLLWAKEUP(IPL_LOGSYNC);
1437 #  endif
1438 # endif
1439 }
1440
1441
1442 /* ------------------------------------------------------------------------ */
1443 /* Function:    ipf_sync_expire                                             */
1444 /* Parameters:  Nil                                                         */
1445 /* Returns:     Nil                                                         */
1446 /*                                                                          */
1447 /* This is the function called even ipf_tick.  It implements one of the     */
1448 /* three heuristics above *IF* there are events waiting.                    */
1449 /* ------------------------------------------------------------------------ */
1450 void
1451 ipf_sync_expire(softc)
1452         ipf_main_softc_t *softc;
1453 {
1454         ipf_sync_softc_t *softs = softc->ipf_sync_soft;
1455
1456         if ((softs->ipf_sync_events > 0) &&
1457             (softc->ipf_ticks >
1458              softs->ipf_sync_lastwakeup + softs->ipf_sync_wake_interval)) {
1459                 ipf_sync_poll_wakeup(softc);
1460         }
1461 }