]> CyberLeo.Net >> Repos - FreeBSD/FreeBSD.git/blob - sys/fs/cuse/cuse.c
fusefs: fix a memory leak regarding FUSE_INTERRUPT
[FreeBSD/FreeBSD.git] / sys / fs / cuse / cuse.c
1 /* $FreeBSD$ */
2 /*-
3  * Copyright (c) 2010-2017 Hans Petter Selasky. All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  *    notice, this list of conditions and the following disclaimer in the
12  *    documentation and/or other materials provided with the distribution.
13  *
14  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
15  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24  * SUCH DAMAGE.
25  */
26
27 #include <sys/stdint.h>
28 #include <sys/stddef.h>
29 #include <sys/param.h>
30 #include <sys/types.h>
31 #include <sys/systm.h>
32 #include <sys/conf.h>
33 #include <sys/kernel.h>
34 #include <sys/bus.h>
35 #include <sys/linker_set.h>
36 #include <sys/module.h>
37 #include <sys/lock.h>
38 #include <sys/mutex.h>
39 #include <sys/condvar.h>
40 #include <sys/sysctl.h>
41 #include <sys/unistd.h>
42 #include <sys/malloc.h>
43 #include <sys/priv.h>
44 #include <sys/uio.h>
45 #include <sys/poll.h>
46 #include <sys/sx.h>
47 #include <sys/rwlock.h>
48 #include <sys/queue.h>
49 #include <sys/fcntl.h>
50 #include <sys/proc.h>
51 #include <sys/vnode.h>
52 #include <sys/selinfo.h>
53 #include <sys/ptrace.h>
54 #include <sys/sysent.h>
55
56 #include <machine/bus.h>
57
58 #include <vm/vm.h>
59 #include <vm/pmap.h>
60 #include <vm/vm_object.h>
61 #include <vm/vm_page.h>
62 #include <vm/vm_pager.h>
63
64 #include <fs/cuse/cuse_defs.h>
65 #include <fs/cuse/cuse_ioctl.h>
66
67 MODULE_VERSION(cuse, 1);
68
69 /*
70  * Prevent cuse4bsd.ko and cuse.ko from loading at the same time by
71  * declaring support for the cuse4bsd interface in cuse.ko:
72  */
73 MODULE_VERSION(cuse4bsd, 1);
74
75 #ifdef FEATURE
76 FEATURE(cuse, "Userspace character devices");
77 #endif
78
79 struct cuse_command;
80 struct cuse_server;
81 struct cuse_client;
82
83 struct cuse_client_command {
84         TAILQ_ENTRY(cuse_client_command) entry;
85         struct cuse_command sub;
86         struct sx sx;
87         struct cv cv;
88         struct thread *entered;
89         struct cuse_client *client;
90         struct proc *proc_curr;
91         int     proc_refs;
92         int     got_signal;
93         int     error;
94         int     command;
95 };
96
97 struct cuse_memory {
98         TAILQ_ENTRY(cuse_memory) entry;
99         vm_object_t object;
100         uint32_t page_count;
101         uint32_t alloc_nr;
102 };
103
104 struct cuse_server_dev {
105         TAILQ_ENTRY(cuse_server_dev) entry;
106         struct cuse_server *server;
107         struct cdev *kern_dev;
108         struct cuse_dev *user_dev;
109 };
110
111 struct cuse_server {
112         TAILQ_ENTRY(cuse_server) entry;
113         TAILQ_HEAD(, cuse_client_command) head;
114         TAILQ_HEAD(, cuse_server_dev) hdev;
115         TAILQ_HEAD(, cuse_client) hcli;
116         TAILQ_HEAD(, cuse_memory) hmem;
117         struct cv cv;
118         struct selinfo selinfo;
119         pid_t   pid;
120         int     is_closing;
121         int     refs;
122 };
123
124 struct cuse_client {
125         TAILQ_ENTRY(cuse_client) entry;
126         TAILQ_ENTRY(cuse_client) entry_ref;
127         struct cuse_client_command cmds[CUSE_CMD_MAX];
128         struct cuse_server *server;
129         struct cuse_server_dev *server_dev;
130
131         uint8_t ioctl_buffer[CUSE_BUFFER_MAX] __aligned(4);
132
133         int     fflags;                 /* file flags */
134         int     cflags;                 /* client flags */
135 #define CUSE_CLI_IS_CLOSING 0x01
136 #define CUSE_CLI_KNOTE_NEED_READ 0x02
137 #define CUSE_CLI_KNOTE_NEED_WRITE 0x04
138 #define CUSE_CLI_KNOTE_HAS_READ 0x08
139 #define CUSE_CLI_KNOTE_HAS_WRITE 0x10
140 };
141
142 #define CUSE_CLIENT_CLOSING(pcc) \
143     ((pcc)->cflags & CUSE_CLI_IS_CLOSING)
144
145 static  MALLOC_DEFINE(M_CUSE, "cuse", "CUSE memory");
146
147 static TAILQ_HEAD(, cuse_server) cuse_server_head;
148 static struct mtx cuse_mtx;
149 static struct cdev *cuse_dev;
150 static struct cuse_server *cuse_alloc_unit[CUSE_DEVICES_MAX];
151 static int cuse_alloc_unit_id[CUSE_DEVICES_MAX];
152
153 static void cuse_server_wakeup_all_client_locked(struct cuse_server *pcs);
154 static void cuse_client_kqfilter_read_detach(struct knote *kn);
155 static void cuse_client_kqfilter_write_detach(struct knote *kn);
156 static int cuse_client_kqfilter_read_event(struct knote *kn, long hint);
157 static int cuse_client_kqfilter_write_event(struct knote *kn, long hint);
158
159 static struct filterops cuse_client_kqfilter_read_ops = {
160         .f_isfd = 1,
161         .f_detach = cuse_client_kqfilter_read_detach,
162         .f_event = cuse_client_kqfilter_read_event,
163 };
164
165 static struct filterops cuse_client_kqfilter_write_ops = {
166         .f_isfd = 1,
167         .f_detach = cuse_client_kqfilter_write_detach,
168         .f_event = cuse_client_kqfilter_write_event,
169 };
170
171 static d_open_t cuse_client_open;
172 static d_close_t cuse_client_close;
173 static d_ioctl_t cuse_client_ioctl;
174 static d_read_t cuse_client_read;
175 static d_write_t cuse_client_write;
176 static d_poll_t cuse_client_poll;
177 static d_mmap_single_t cuse_client_mmap_single;
178 static d_kqfilter_t cuse_client_kqfilter;
179
180 static struct cdevsw cuse_client_devsw = {
181         .d_version = D_VERSION,
182         .d_open = cuse_client_open,
183         .d_close = cuse_client_close,
184         .d_ioctl = cuse_client_ioctl,
185         .d_name = "cuse_client",
186         .d_flags = D_TRACKCLOSE,
187         .d_read = cuse_client_read,
188         .d_write = cuse_client_write,
189         .d_poll = cuse_client_poll,
190         .d_mmap_single = cuse_client_mmap_single,
191         .d_kqfilter = cuse_client_kqfilter,
192 };
193
194 static d_open_t cuse_server_open;
195 static d_close_t cuse_server_close;
196 static d_ioctl_t cuse_server_ioctl;
197 static d_read_t cuse_server_read;
198 static d_write_t cuse_server_write;
199 static d_poll_t cuse_server_poll;
200 static d_mmap_single_t cuse_server_mmap_single;
201
202 static struct cdevsw cuse_server_devsw = {
203         .d_version = D_VERSION,
204         .d_open = cuse_server_open,
205         .d_close = cuse_server_close,
206         .d_ioctl = cuse_server_ioctl,
207         .d_name = "cuse_server",
208         .d_flags = D_TRACKCLOSE,
209         .d_read = cuse_server_read,
210         .d_write = cuse_server_write,
211         .d_poll = cuse_server_poll,
212         .d_mmap_single = cuse_server_mmap_single,
213 };
214
215 static void cuse_client_is_closing(struct cuse_client *);
216 static int cuse_free_unit_by_id_locked(struct cuse_server *, int);
217
218 static void
219 cuse_lock(void)
220 {
221         mtx_lock(&cuse_mtx);
222 }
223
224 static void
225 cuse_unlock(void)
226 {
227         mtx_unlock(&cuse_mtx);
228 }
229
230 static void
231 cuse_cmd_lock(struct cuse_client_command *pccmd)
232 {
233         sx_xlock(&pccmd->sx);
234 }
235
236 static void
237 cuse_cmd_unlock(struct cuse_client_command *pccmd)
238 {
239         sx_xunlock(&pccmd->sx);
240 }
241
242 static void
243 cuse_kern_init(void *arg)
244 {
245         TAILQ_INIT(&cuse_server_head);
246
247         mtx_init(&cuse_mtx, "cuse-mtx", NULL, MTX_DEF);
248
249         cuse_dev = make_dev(&cuse_server_devsw, 0,
250             UID_ROOT, GID_OPERATOR, 0600, "cuse");
251
252         printf("Cuse v%d.%d.%d @ /dev/cuse\n",
253             (CUSE_VERSION >> 16) & 0xFF, (CUSE_VERSION >> 8) & 0xFF,
254             (CUSE_VERSION >> 0) & 0xFF);
255 }
256 SYSINIT(cuse_kern_init, SI_SUB_DEVFS, SI_ORDER_ANY, cuse_kern_init, NULL);
257
258 static void
259 cuse_kern_uninit(void *arg)
260 {
261         void *ptr;
262
263         while (1) {
264
265                 printf("Cuse: Please exit all /dev/cuse instances "
266                     "and processes which have used this device.\n");
267
268                 pause("DRAIN", 2 * hz);
269
270                 cuse_lock();
271                 ptr = TAILQ_FIRST(&cuse_server_head);
272                 cuse_unlock();
273
274                 if (ptr == NULL)
275                         break;
276         }
277
278         if (cuse_dev != NULL)
279                 destroy_dev(cuse_dev);
280
281         mtx_destroy(&cuse_mtx);
282 }
283 SYSUNINIT(cuse_kern_uninit, SI_SUB_DEVFS, SI_ORDER_ANY, cuse_kern_uninit, 0);
284
285 static int
286 cuse_server_get(struct cuse_server **ppcs)
287 {
288         struct cuse_server *pcs;
289         int error;
290
291         error = devfs_get_cdevpriv((void **)&pcs);
292         if (error != 0) {
293                 *ppcs = NULL;
294                 return (error);
295         }
296         /* check if closing */
297         cuse_lock();
298         if (pcs->is_closing) {
299                 cuse_unlock();
300                 *ppcs = NULL;
301                 return (EINVAL);
302         }
303         cuse_unlock();
304         *ppcs = pcs;
305         return (0);
306 }
307
308 static void
309 cuse_server_is_closing(struct cuse_server *pcs)
310 {
311         struct cuse_client *pcc;
312
313         if (pcs->is_closing)
314                 return;
315
316         pcs->is_closing = 1;
317
318         TAILQ_FOREACH(pcc, &pcs->hcli, entry) {
319                 cuse_client_is_closing(pcc);
320         }
321 }
322
323 static struct cuse_client_command *
324 cuse_server_find_command(struct cuse_server *pcs, struct thread *td)
325 {
326         struct cuse_client *pcc;
327         int n;
328
329         if (pcs->is_closing)
330                 goto done;
331
332         TAILQ_FOREACH(pcc, &pcs->hcli, entry) {
333                 if (CUSE_CLIENT_CLOSING(pcc))
334                         continue;
335                 for (n = 0; n != CUSE_CMD_MAX; n++) {
336                         if (pcc->cmds[n].entered == td)
337                                 return (&pcc->cmds[n]);
338                 }
339         }
340 done:
341         return (NULL);
342 }
343
344 static void
345 cuse_str_filter(char *ptr)
346 {
347         int c;
348
349         while (((c = *ptr) != 0)) {
350
351                 if ((c >= 'a') && (c <= 'z')) {
352                         ptr++;
353                         continue;
354                 }
355                 if ((c >= 'A') && (c <= 'Z')) {
356                         ptr++;
357                         continue;
358                 }
359                 if ((c >= '0') && (c <= '9')) {
360                         ptr++;
361                         continue;
362                 }
363                 if ((c == '.') || (c == '_') || (c == '/')) {
364                         ptr++;
365                         continue;
366                 }
367                 *ptr = '_';
368
369                 ptr++;
370         }
371 }
372
373 static int
374 cuse_convert_error(int error)
375 {
376         ;                               /* indent fix */
377         switch (error) {
378         case CUSE_ERR_NONE:
379                 return (0);
380         case CUSE_ERR_BUSY:
381                 return (EBUSY);
382         case CUSE_ERR_WOULDBLOCK:
383                 return (EWOULDBLOCK);
384         case CUSE_ERR_INVALID:
385                 return (EINVAL);
386         case CUSE_ERR_NO_MEMORY:
387                 return (ENOMEM);
388         case CUSE_ERR_FAULT:
389                 return (EFAULT);
390         case CUSE_ERR_SIGNAL:
391                 return (EINTR);
392         case CUSE_ERR_NO_DEVICE:
393                 return (ENODEV);
394         default:
395                 return (ENXIO);
396         }
397 }
398
399 static void
400 cuse_vm_memory_free(struct cuse_memory *mem)
401 {
402         /* last user is gone - free */
403         vm_object_deallocate(mem->object);
404
405         /* free CUSE memory */
406         free(mem, M_CUSE);
407 }
408
409 static int
410 cuse_server_alloc_memory(struct cuse_server *pcs, uint32_t alloc_nr,
411     uint32_t page_count)
412 {
413         struct cuse_memory *temp;
414         struct cuse_memory *mem;
415         vm_object_t object;
416         int error;
417
418         mem = malloc(sizeof(*mem), M_CUSE, M_WAITOK | M_ZERO);
419         if (mem == NULL)
420                 return (ENOMEM);
421
422         object = vm_pager_allocate(OBJT_SWAP, NULL, PAGE_SIZE * page_count,
423             VM_PROT_DEFAULT, 0, curthread->td_ucred);
424         if (object == NULL) {
425                 error = ENOMEM;
426                 goto error_0;
427         }
428
429         cuse_lock();
430         /* check if allocation number already exists */
431         TAILQ_FOREACH(temp, &pcs->hmem, entry) {
432                 if (temp->alloc_nr == alloc_nr)
433                         break;
434         }
435         if (temp != NULL) {
436                 cuse_unlock();
437                 error = EBUSY;
438                 goto error_1;
439         }
440         mem->object = object;
441         mem->page_count = page_count;
442         mem->alloc_nr = alloc_nr;
443         TAILQ_INSERT_TAIL(&pcs->hmem, mem, entry);
444         cuse_unlock();
445
446         return (0);
447
448 error_1:
449         vm_object_deallocate(object);
450 error_0:
451         free(mem, M_CUSE);
452         return (error);
453 }
454
455 static int
456 cuse_server_free_memory(struct cuse_server *pcs, uint32_t alloc_nr)
457 {
458         struct cuse_memory *mem;
459
460         cuse_lock();
461         TAILQ_FOREACH(mem, &pcs->hmem, entry) {
462                 if (mem->alloc_nr == alloc_nr)
463                         break;
464         }
465         if (mem == NULL) {
466                 cuse_unlock();
467                 return (EINVAL);
468         }
469         TAILQ_REMOVE(&pcs->hmem, mem, entry);
470         cuse_unlock();
471
472         cuse_vm_memory_free(mem);
473
474         return (0);
475 }
476
477 static int
478 cuse_client_get(struct cuse_client **ppcc)
479 {
480         struct cuse_client *pcc;
481         int error;
482
483         /* try to get private data */
484         error = devfs_get_cdevpriv((void **)&pcc);
485         if (error != 0) {
486                 *ppcc = NULL;
487                 return (error);
488         }
489         /* check if closing */
490         cuse_lock();
491         if (CUSE_CLIENT_CLOSING(pcc) || pcc->server->is_closing) {
492                 cuse_unlock();
493                 *ppcc = NULL;
494                 return (EINVAL);
495         }
496         cuse_unlock();
497         *ppcc = pcc;
498         return (0);
499 }
500
501 static void
502 cuse_client_is_closing(struct cuse_client *pcc)
503 {
504         struct cuse_client_command *pccmd;
505         uint32_t n;
506
507         if (CUSE_CLIENT_CLOSING(pcc))
508                 return;
509
510         pcc->cflags |= CUSE_CLI_IS_CLOSING;
511         pcc->server_dev = NULL;
512
513         for (n = 0; n != CUSE_CMD_MAX; n++) {
514
515                 pccmd = &pcc->cmds[n];
516
517                 if (pccmd->entry.tqe_prev != NULL) {
518                         TAILQ_REMOVE(&pcc->server->head, pccmd, entry);
519                         pccmd->entry.tqe_prev = NULL;
520                 }
521                 cv_broadcast(&pccmd->cv);
522         }
523 }
524
525 static void
526 cuse_client_send_command_locked(struct cuse_client_command *pccmd,
527     uintptr_t data_ptr, unsigned long arg, int fflags, int ioflag)
528 {
529         unsigned long cuse_fflags = 0;
530         struct cuse_server *pcs;
531
532         if (fflags & FREAD)
533                 cuse_fflags |= CUSE_FFLAG_READ;
534
535         if (fflags & FWRITE)
536                 cuse_fflags |= CUSE_FFLAG_WRITE;
537
538         if (ioflag & IO_NDELAY)
539                 cuse_fflags |= CUSE_FFLAG_NONBLOCK;
540 #if defined(__LP64__)
541         if (SV_CURPROC_FLAG(SV_ILP32))
542                 cuse_fflags |= CUSE_FFLAG_COMPAT32;
543 #endif
544         pccmd->sub.fflags = cuse_fflags;
545         pccmd->sub.data_pointer = data_ptr;
546         pccmd->sub.argument = arg;
547
548         pcs = pccmd->client->server;
549
550         if ((pccmd->entry.tqe_prev == NULL) &&
551             (CUSE_CLIENT_CLOSING(pccmd->client) == 0) &&
552             (pcs->is_closing == 0)) {
553                 TAILQ_INSERT_TAIL(&pcs->head, pccmd, entry);
554                 cv_signal(&pcs->cv);
555         }
556 }
557
558 static void
559 cuse_client_got_signal(struct cuse_client_command *pccmd)
560 {
561         struct cuse_server *pcs;
562
563         pccmd->got_signal = 1;
564
565         pccmd = &pccmd->client->cmds[CUSE_CMD_SIGNAL];
566
567         pcs = pccmd->client->server;
568
569         if ((pccmd->entry.tqe_prev == NULL) &&
570             (CUSE_CLIENT_CLOSING(pccmd->client) == 0) &&
571             (pcs->is_closing == 0)) {
572                 TAILQ_INSERT_TAIL(&pcs->head, pccmd, entry);
573                 cv_signal(&pcs->cv);
574         }
575 }
576
577 static int
578 cuse_client_receive_command_locked(struct cuse_client_command *pccmd,
579     uint8_t *arg_ptr, uint32_t arg_len)
580 {
581         int error;
582
583         error = 0;
584
585         pccmd->proc_curr = curthread->td_proc;
586
587         if (CUSE_CLIENT_CLOSING(pccmd->client) ||
588             pccmd->client->server->is_closing) {
589                 error = CUSE_ERR_OTHER;
590                 goto done;
591         }
592         while (pccmd->command == CUSE_CMD_NONE) {
593                 if (error != 0) {
594                         cv_wait(&pccmd->cv, &cuse_mtx);
595                 } else {
596                         error = cv_wait_sig(&pccmd->cv, &cuse_mtx);
597
598                         if (error != 0)
599                                 cuse_client_got_signal(pccmd);
600                 }
601                 if (CUSE_CLIENT_CLOSING(pccmd->client) ||
602                     pccmd->client->server->is_closing) {
603                         error = CUSE_ERR_OTHER;
604                         goto done;
605                 }
606         }
607
608         error = pccmd->error;
609         pccmd->command = CUSE_CMD_NONE;
610         cv_signal(&pccmd->cv);
611
612 done:
613
614         /* wait until all process references are gone */
615
616         pccmd->proc_curr = NULL;
617
618         while (pccmd->proc_refs != 0)
619                 cv_wait(&pccmd->cv, &cuse_mtx);
620
621         return (error);
622 }
623
624 /*------------------------------------------------------------------------*
625  *      CUSE SERVER PART
626  *------------------------------------------------------------------------*/
627
628 static void
629 cuse_server_free_dev(struct cuse_server_dev *pcsd)
630 {
631         struct cuse_server *pcs;
632         struct cuse_client *pcc;
633
634         /* get server pointer */
635         pcs = pcsd->server;
636
637         /* prevent creation of more devices */
638         cuse_lock();
639         if (pcsd->kern_dev != NULL)
640                 pcsd->kern_dev->si_drv1 = NULL;
641
642         TAILQ_FOREACH(pcc, &pcs->hcli, entry) {
643                 if (pcc->server_dev == pcsd)
644                         cuse_client_is_closing(pcc);
645         }
646         cuse_unlock();
647
648         /* destroy device, if any */
649         if (pcsd->kern_dev != NULL) {
650                 /* destroy device synchronously */
651                 destroy_dev(pcsd->kern_dev);
652         }
653         free(pcsd, M_CUSE);
654 }
655
656 static void
657 cuse_server_unref(struct cuse_server *pcs)
658 {
659         struct cuse_server_dev *pcsd;
660         struct cuse_memory *mem;
661
662         cuse_lock();
663         pcs->refs--;
664         if (pcs->refs != 0) {
665                 cuse_unlock();
666                 return;
667         }
668         cuse_server_is_closing(pcs);
669         /* final client wakeup, if any */
670         cuse_server_wakeup_all_client_locked(pcs);
671
672         TAILQ_REMOVE(&cuse_server_head, pcs, entry);
673
674         cuse_free_unit_by_id_locked(pcs, -1);
675
676         while ((pcsd = TAILQ_FIRST(&pcs->hdev)) != NULL) {
677                 TAILQ_REMOVE(&pcs->hdev, pcsd, entry);
678                 cuse_unlock();
679                 cuse_server_free_dev(pcsd);
680                 cuse_lock();
681         }
682
683         while ((mem = TAILQ_FIRST(&pcs->hmem)) != NULL) {
684                 TAILQ_REMOVE(&pcs->hmem, mem, entry);
685                 cuse_unlock();
686                 cuse_vm_memory_free(mem);
687                 cuse_lock();
688         }
689
690         knlist_clear(&pcs->selinfo.si_note, 1);
691         knlist_destroy(&pcs->selinfo.si_note);
692
693         cuse_unlock();
694
695         seldrain(&pcs->selinfo);
696
697         cv_destroy(&pcs->cv);
698
699         free(pcs, M_CUSE);
700 }
701
702 static void
703 cuse_server_free(void *arg)
704 {
705         struct cuse_server *pcs = arg;
706
707         /* drop refcount */
708         cuse_server_unref(pcs);
709 }
710
711 static int
712 cuse_server_open(struct cdev *dev, int fflags, int devtype, struct thread *td)
713 {
714         struct cuse_server *pcs;
715
716         pcs = malloc(sizeof(*pcs), M_CUSE, M_WAITOK | M_ZERO);
717         if (pcs == NULL)
718                 return (ENOMEM);
719
720         if (devfs_set_cdevpriv(pcs, &cuse_server_free)) {
721                 printf("Cuse: Cannot set cdevpriv.\n");
722                 free(pcs, M_CUSE);
723                 return (ENOMEM);
724         }
725         /* store current process ID */
726         pcs->pid = curproc->p_pid;
727
728         TAILQ_INIT(&pcs->head);
729         TAILQ_INIT(&pcs->hdev);
730         TAILQ_INIT(&pcs->hcli);
731         TAILQ_INIT(&pcs->hmem);
732
733         cv_init(&pcs->cv, "cuse-server-cv");
734
735         knlist_init_mtx(&pcs->selinfo.si_note, &cuse_mtx);
736
737         cuse_lock();
738         pcs->refs++;
739         TAILQ_INSERT_TAIL(&cuse_server_head, pcs, entry);
740         cuse_unlock();
741
742         return (0);
743 }
744
745 static int
746 cuse_server_close(struct cdev *dev, int fflag, int devtype, struct thread *td)
747 {
748         struct cuse_server *pcs;
749         int error;
750
751         error = cuse_server_get(&pcs);
752         if (error != 0)
753                 goto done;
754
755         cuse_lock();
756         cuse_server_is_closing(pcs);
757         /* final client wakeup, if any */
758         cuse_server_wakeup_all_client_locked(pcs);
759
760         knlist_clear(&pcs->selinfo.si_note, 1);
761         cuse_unlock();
762
763 done:
764         return (0);
765 }
766
767 static int
768 cuse_server_read(struct cdev *dev, struct uio *uio, int ioflag)
769 {
770         return (ENXIO);
771 }
772
773 static int
774 cuse_server_write(struct cdev *dev, struct uio *uio, int ioflag)
775 {
776         return (ENXIO);
777 }
778
779 static int
780 cuse_server_ioctl_copy_locked(struct cuse_client_command *pccmd,
781     struct cuse_data_chunk *pchk, int isread)
782 {
783         struct proc *p_proc;
784         uint32_t offset;
785         int error;
786
787         offset = pchk->peer_ptr - CUSE_BUF_MIN_PTR;
788
789         if (pchk->length > CUSE_BUFFER_MAX)
790                 return (EFAULT);
791
792         if (offset >= CUSE_BUFFER_MAX)
793                 return (EFAULT);
794
795         if ((offset + pchk->length) > CUSE_BUFFER_MAX)
796                 return (EFAULT);
797
798         p_proc = pccmd->proc_curr;
799         if (p_proc == NULL)
800                 return (ENXIO);
801
802         if (pccmd->proc_refs < 0)
803                 return (ENOMEM);
804
805         pccmd->proc_refs++;
806
807         cuse_unlock();
808
809         if (isread == 0) {
810                 error = copyin(
811                     (void *)pchk->local_ptr,
812                     pccmd->client->ioctl_buffer + offset,
813                     pchk->length);
814         } else {
815                 error = copyout(
816                     pccmd->client->ioctl_buffer + offset,
817                     (void *)pchk->local_ptr,
818                     pchk->length);
819         }
820
821         cuse_lock();
822
823         pccmd->proc_refs--;
824
825         if (pccmd->proc_curr == NULL)
826                 cv_signal(&pccmd->cv);
827
828         return (error);
829 }
830
831 static int
832 cuse_proc2proc_copy(struct proc *proc_s, vm_offset_t data_s,
833     struct proc *proc_d, vm_offset_t data_d, size_t len)
834 {
835         struct thread *td;
836         struct proc *proc_cur;
837         int error;
838
839         td = curthread;
840         proc_cur = td->td_proc;
841
842         if (proc_cur == proc_d) {
843                 struct iovec iov = {
844                         .iov_base = (caddr_t)data_d,
845                         .iov_len = len,
846                 };
847                 struct uio uio = {
848                         .uio_iov = &iov,
849                         .uio_iovcnt = 1,
850                         .uio_offset = (off_t)data_s,
851                         .uio_resid = len,
852                         .uio_segflg = UIO_USERSPACE,
853                         .uio_rw = UIO_READ,
854                         .uio_td = td,
855                 };
856
857                 PHOLD(proc_s);
858                 error = proc_rwmem(proc_s, &uio);
859                 PRELE(proc_s);
860
861         } else if (proc_cur == proc_s) {
862                 struct iovec iov = {
863                         .iov_base = (caddr_t)data_s,
864                         .iov_len = len,
865                 };
866                 struct uio uio = {
867                         .uio_iov = &iov,
868                         .uio_iovcnt = 1,
869                         .uio_offset = (off_t)data_d,
870                         .uio_resid = len,
871                         .uio_segflg = UIO_USERSPACE,
872                         .uio_rw = UIO_WRITE,
873                         .uio_td = td,
874                 };
875
876                 PHOLD(proc_d);
877                 error = proc_rwmem(proc_d, &uio);
878                 PRELE(proc_d);
879         } else {
880                 error = EINVAL;
881         }
882         return (error);
883 }
884
885 static int
886 cuse_server_data_copy_locked(struct cuse_client_command *pccmd,
887     struct cuse_data_chunk *pchk, int isread)
888 {
889         struct proc *p_proc;
890         int error;
891
892         p_proc = pccmd->proc_curr;
893         if (p_proc == NULL)
894                 return (ENXIO);
895
896         if (pccmd->proc_refs < 0)
897                 return (ENOMEM);
898
899         pccmd->proc_refs++;
900
901         cuse_unlock();
902
903         if (isread == 0) {
904                 error = cuse_proc2proc_copy(
905                     curthread->td_proc, pchk->local_ptr,
906                     p_proc, pchk->peer_ptr,
907                     pchk->length);
908         } else {
909                 error = cuse_proc2proc_copy(
910                     p_proc, pchk->peer_ptr,
911                     curthread->td_proc, pchk->local_ptr,
912                     pchk->length);
913         }
914
915         cuse_lock();
916
917         pccmd->proc_refs--;
918
919         if (pccmd->proc_curr == NULL)
920                 cv_signal(&pccmd->cv);
921
922         return (error);
923 }
924
925 static int
926 cuse_alloc_unit_by_id_locked(struct cuse_server *pcs, int id)
927 {
928         int n;
929         int x = 0;
930         int match;
931
932         do {
933                 for (match = n = 0; n != CUSE_DEVICES_MAX; n++) {
934                         if (cuse_alloc_unit[n] != NULL) {
935                                 if ((cuse_alloc_unit_id[n] ^ id) & CUSE_ID_MASK)
936                                         continue;
937                                 if ((cuse_alloc_unit_id[n] & ~CUSE_ID_MASK) == x) {
938                                         x++;
939                                         match = 1;
940                                 }
941                         }
942                 }
943         } while (match);
944
945         if (x < 256) {
946                 for (n = 0; n != CUSE_DEVICES_MAX; n++) {
947                         if (cuse_alloc_unit[n] == NULL) {
948                                 cuse_alloc_unit[n] = pcs;
949                                 cuse_alloc_unit_id[n] = id | x;
950                                 return (x);
951                         }
952                 }
953         }
954         return (-1);
955 }
956
957 static void
958 cuse_server_wakeup_locked(struct cuse_server *pcs)
959 {
960         selwakeup(&pcs->selinfo);
961         KNOTE_LOCKED(&pcs->selinfo.si_note, 0);
962 }
963
964 static void
965 cuse_server_wakeup_all_client_locked(struct cuse_server *pcs)
966 {
967         struct cuse_client *pcc;
968
969         TAILQ_FOREACH(pcc, &pcs->hcli, entry) {
970                 pcc->cflags |= (CUSE_CLI_KNOTE_NEED_READ |
971                     CUSE_CLI_KNOTE_NEED_WRITE);
972         }
973         cuse_server_wakeup_locked(pcs);
974 }
975
976 static int
977 cuse_free_unit_by_id_locked(struct cuse_server *pcs, int id)
978 {
979         int n;
980         int found = 0;
981
982         for (n = 0; n != CUSE_DEVICES_MAX; n++) {
983                 if (cuse_alloc_unit[n] == pcs) {
984                         if (cuse_alloc_unit_id[n] == id || id == -1) {
985                                 cuse_alloc_unit[n] = NULL;
986                                 cuse_alloc_unit_id[n] = 0;
987                                 found = 1;
988                         }
989                 }
990         }
991
992         return (found ? 0 : EINVAL);
993 }
994
995 static int
996 cuse_server_ioctl(struct cdev *dev, unsigned long cmd,
997     caddr_t data, int fflag, struct thread *td)
998 {
999         struct cuse_server *pcs;
1000         int error;
1001
1002         error = cuse_server_get(&pcs);
1003         if (error != 0)
1004                 return (error);
1005
1006         switch (cmd) {
1007                 struct cuse_client_command *pccmd;
1008                 struct cuse_client *pcc;
1009                 struct cuse_command *pcmd;
1010                 struct cuse_alloc_info *pai;
1011                 struct cuse_create_dev *pcd;
1012                 struct cuse_server_dev *pcsd;
1013                 struct cuse_data_chunk *pchk;
1014                 int n;
1015
1016         case CUSE_IOCTL_GET_COMMAND:
1017                 pcmd = (void *)data;
1018
1019                 cuse_lock();
1020
1021                 while ((pccmd = TAILQ_FIRST(&pcs->head)) == NULL) {
1022                         error = cv_wait_sig(&pcs->cv, &cuse_mtx);
1023
1024                         if (pcs->is_closing)
1025                                 error = ENXIO;
1026
1027                         if (error) {
1028                                 cuse_unlock();
1029                                 return (error);
1030                         }
1031                 }
1032
1033                 TAILQ_REMOVE(&pcs->head, pccmd, entry);
1034                 pccmd->entry.tqe_prev = NULL;
1035
1036                 pccmd->entered = curthread;
1037
1038                 *pcmd = pccmd->sub;
1039
1040                 cuse_unlock();
1041
1042                 break;
1043
1044         case CUSE_IOCTL_SYNC_COMMAND:
1045
1046                 cuse_lock();
1047                 while ((pccmd = cuse_server_find_command(pcs, curthread)) != NULL) {
1048
1049                         /* send sync command */
1050                         pccmd->entered = NULL;
1051                         pccmd->error = *(int *)data;
1052                         pccmd->command = CUSE_CMD_SYNC;
1053
1054                         /* signal peer, if any */
1055                         cv_signal(&pccmd->cv);
1056                 }
1057                 cuse_unlock();
1058
1059                 break;
1060
1061         case CUSE_IOCTL_ALLOC_UNIT:
1062
1063                 cuse_lock();
1064                 n = cuse_alloc_unit_by_id_locked(pcs,
1065                     CUSE_ID_DEFAULT(0));
1066                 cuse_unlock();
1067
1068                 if (n < 0)
1069                         error = ENOMEM;
1070                 else
1071                         *(int *)data = n;
1072                 break;
1073
1074         case CUSE_IOCTL_ALLOC_UNIT_BY_ID:
1075
1076                 n = *(int *)data;
1077
1078                 n = (n & CUSE_ID_MASK);
1079
1080                 cuse_lock();
1081                 n = cuse_alloc_unit_by_id_locked(pcs, n);
1082                 cuse_unlock();
1083
1084                 if (n < 0)
1085                         error = ENOMEM;
1086                 else
1087                         *(int *)data = n;
1088                 break;
1089
1090         case CUSE_IOCTL_FREE_UNIT:
1091
1092                 n = *(int *)data;
1093
1094                 n = CUSE_ID_DEFAULT(n);
1095
1096                 cuse_lock();
1097                 error = cuse_free_unit_by_id_locked(pcs, n);
1098                 cuse_unlock();
1099                 break;
1100
1101         case CUSE_IOCTL_FREE_UNIT_BY_ID:
1102
1103                 n = *(int *)data;
1104
1105                 cuse_lock();
1106                 error = cuse_free_unit_by_id_locked(pcs, n);
1107                 cuse_unlock();
1108                 break;
1109
1110         case CUSE_IOCTL_ALLOC_MEMORY:
1111
1112                 pai = (void *)data;
1113
1114                 if (pai->alloc_nr >= CUSE_ALLOC_UNIT_MAX) {
1115                         error = ENOMEM;
1116                         break;
1117                 }
1118                 if (pai->page_count >= CUSE_ALLOC_PAGES_MAX) {
1119                         error = ENOMEM;
1120                         break;
1121                 }
1122                 error = cuse_server_alloc_memory(pcs,
1123                     pai->alloc_nr, pai->page_count);
1124                 break;
1125
1126         case CUSE_IOCTL_FREE_MEMORY:
1127                 pai = (void *)data;
1128
1129                 if (pai->alloc_nr >= CUSE_ALLOC_UNIT_MAX) {
1130                         error = ENOMEM;
1131                         break;
1132                 }
1133                 error = cuse_server_free_memory(pcs, pai->alloc_nr);
1134                 break;
1135
1136         case CUSE_IOCTL_GET_SIG:
1137
1138                 cuse_lock();
1139                 pccmd = cuse_server_find_command(pcs, curthread);
1140
1141                 if (pccmd != NULL) {
1142                         n = pccmd->got_signal;
1143                         pccmd->got_signal = 0;
1144                 } else {
1145                         n = 0;
1146                 }
1147                 cuse_unlock();
1148
1149                 *(int *)data = n;
1150
1151                 break;
1152
1153         case CUSE_IOCTL_SET_PFH:
1154
1155                 cuse_lock();
1156                 pccmd = cuse_server_find_command(pcs, curthread);
1157
1158                 if (pccmd != NULL) {
1159                         pcc = pccmd->client;
1160                         for (n = 0; n != CUSE_CMD_MAX; n++) {
1161                                 pcc->cmds[n].sub.per_file_handle = *(uintptr_t *)data;
1162                         }
1163                 } else {
1164                         error = ENXIO;
1165                 }
1166                 cuse_unlock();
1167                 break;
1168
1169         case CUSE_IOCTL_CREATE_DEV:
1170
1171                 error = priv_check(curthread, PRIV_DRIVER);
1172                 if (error)
1173                         break;
1174
1175                 pcd = (void *)data;
1176
1177                 /* filter input */
1178
1179                 pcd->devname[sizeof(pcd->devname) - 1] = 0;
1180
1181                 if (pcd->devname[0] == 0) {
1182                         error = EINVAL;
1183                         break;
1184                 }
1185                 cuse_str_filter(pcd->devname);
1186
1187                 pcd->permissions &= 0777;
1188
1189                 /* try to allocate a character device */
1190
1191                 pcsd = malloc(sizeof(*pcsd), M_CUSE, M_WAITOK | M_ZERO);
1192
1193                 if (pcsd == NULL) {
1194                         error = ENOMEM;
1195                         break;
1196                 }
1197                 pcsd->server = pcs;
1198
1199                 pcsd->user_dev = pcd->dev;
1200
1201                 pcsd->kern_dev = make_dev_credf(MAKEDEV_CHECKNAME,
1202                     &cuse_client_devsw, 0, NULL, pcd->user_id, pcd->group_id,
1203                     pcd->permissions, "%s", pcd->devname);
1204
1205                 if (pcsd->kern_dev == NULL) {
1206                         free(pcsd, M_CUSE);
1207                         error = ENOMEM;
1208                         break;
1209                 }
1210                 pcsd->kern_dev->si_drv1 = pcsd;
1211
1212                 cuse_lock();
1213                 TAILQ_INSERT_TAIL(&pcs->hdev, pcsd, entry);
1214                 cuse_unlock();
1215
1216                 break;
1217
1218         case CUSE_IOCTL_DESTROY_DEV:
1219
1220                 error = priv_check(curthread, PRIV_DRIVER);
1221                 if (error)
1222                         break;
1223
1224                 cuse_lock();
1225
1226                 error = EINVAL;
1227
1228                 pcsd = TAILQ_FIRST(&pcs->hdev);
1229                 while (pcsd != NULL) {
1230                         if (pcsd->user_dev == *(struct cuse_dev **)data) {
1231                                 TAILQ_REMOVE(&pcs->hdev, pcsd, entry);
1232                                 cuse_unlock();
1233                                 cuse_server_free_dev(pcsd);
1234                                 cuse_lock();
1235                                 error = 0;
1236                                 pcsd = TAILQ_FIRST(&pcs->hdev);
1237                         } else {
1238                                 pcsd = TAILQ_NEXT(pcsd, entry);
1239                         }
1240                 }
1241
1242                 cuse_unlock();
1243                 break;
1244
1245         case CUSE_IOCTL_WRITE_DATA:
1246         case CUSE_IOCTL_READ_DATA:
1247
1248                 cuse_lock();
1249                 pchk = (struct cuse_data_chunk *)data;
1250
1251                 pccmd = cuse_server_find_command(pcs, curthread);
1252
1253                 if (pccmd == NULL) {
1254                         error = ENXIO;  /* invalid request */
1255                 } else if (pchk->peer_ptr < CUSE_BUF_MIN_PTR) {
1256                         error = EFAULT; /* NULL pointer */
1257                 } else if (pchk->peer_ptr < CUSE_BUF_MAX_PTR) {
1258                         error = cuse_server_ioctl_copy_locked(pccmd,
1259                             pchk, cmd == CUSE_IOCTL_READ_DATA);
1260                 } else {
1261                         error = cuse_server_data_copy_locked(pccmd,
1262                             pchk, cmd == CUSE_IOCTL_READ_DATA);
1263                 }
1264                 cuse_unlock();
1265                 break;
1266
1267         case CUSE_IOCTL_SELWAKEUP:
1268                 cuse_lock();
1269                 /*
1270                  * We don't know which direction caused the event.
1271                  * Wakeup both!
1272                  */
1273                 cuse_server_wakeup_all_client_locked(pcs);
1274                 cuse_unlock();
1275                 break;
1276
1277         default:
1278                 error = ENXIO;
1279                 break;
1280         }
1281         return (error);
1282 }
1283
1284 static int
1285 cuse_server_poll(struct cdev *dev, int events, struct thread *td)
1286 {
1287         return (events & (POLLHUP | POLLPRI | POLLIN |
1288             POLLRDNORM | POLLOUT | POLLWRNORM));
1289 }
1290
1291 static int
1292 cuse_server_mmap_single(struct cdev *dev, vm_ooffset_t *offset,
1293     vm_size_t size, struct vm_object **object, int nprot)
1294 {
1295         uint32_t page_nr = *offset / PAGE_SIZE;
1296         uint32_t alloc_nr = page_nr / CUSE_ALLOC_PAGES_MAX;
1297         struct cuse_memory *mem;
1298         struct cuse_server *pcs;
1299         int error;
1300
1301         error = cuse_server_get(&pcs);
1302         if (error != 0)
1303                 return (error);
1304
1305         cuse_lock();
1306         /* lookup memory structure */
1307         TAILQ_FOREACH(mem, &pcs->hmem, entry) {
1308                 if (mem->alloc_nr == alloc_nr)
1309                         break;
1310         }
1311         if (mem == NULL) {
1312                 cuse_unlock();
1313                 return (ENOMEM);
1314         }
1315         /* verify page offset */
1316         page_nr %= CUSE_ALLOC_PAGES_MAX;
1317         if (page_nr >= mem->page_count) {
1318                 cuse_unlock();
1319                 return (ENXIO);
1320         }
1321         /* verify mmap size */
1322         if ((size % PAGE_SIZE) != 0 || (size < PAGE_SIZE) ||
1323             (size > ((mem->page_count - page_nr) * PAGE_SIZE))) {
1324                 cuse_unlock();
1325                 return (EINVAL);
1326         }
1327         vm_object_reference(mem->object);
1328         *object = mem->object;
1329         cuse_unlock();
1330
1331         /* set new VM object offset to use */
1332         *offset = page_nr * PAGE_SIZE;
1333
1334         /* success */
1335         return (0);
1336 }
1337
1338 /*------------------------------------------------------------------------*
1339  *      CUSE CLIENT PART
1340  *------------------------------------------------------------------------*/
1341 static void
1342 cuse_client_free(void *arg)
1343 {
1344         struct cuse_client *pcc = arg;
1345         struct cuse_client_command *pccmd;
1346         struct cuse_server *pcs;
1347         int n;
1348
1349         cuse_lock();
1350         cuse_client_is_closing(pcc);
1351         TAILQ_REMOVE(&pcc->server->hcli, pcc, entry);
1352         cuse_unlock();
1353
1354         for (n = 0; n != CUSE_CMD_MAX; n++) {
1355
1356                 pccmd = &pcc->cmds[n];
1357
1358                 sx_destroy(&pccmd->sx);
1359                 cv_destroy(&pccmd->cv);
1360         }
1361
1362         pcs = pcc->server;
1363
1364         free(pcc, M_CUSE);
1365
1366         /* drop reference on server */
1367         cuse_server_unref(pcs);
1368 }
1369
1370 static int
1371 cuse_client_open(struct cdev *dev, int fflags, int devtype, struct thread *td)
1372 {
1373         struct cuse_client_command *pccmd;
1374         struct cuse_server_dev *pcsd;
1375         struct cuse_client *pcc;
1376         struct cuse_server *pcs;
1377         struct cuse_dev *pcd;
1378         int error;
1379         int n;
1380
1381         cuse_lock();
1382         pcsd = dev->si_drv1;
1383         if (pcsd != NULL) {
1384                 pcs = pcsd->server;
1385                 pcd = pcsd->user_dev;
1386                 /*
1387                  * Check that the refcount didn't wrap and that the
1388                  * same process is not both client and server. This
1389                  * can easily lead to deadlocks when destroying the
1390                  * CUSE character device nodes:
1391                  */
1392                 pcs->refs++;
1393                 if (pcs->refs < 0 || pcs->pid == curproc->p_pid) {
1394                         /* overflow or wrong PID */
1395                         pcs->refs--;
1396                         pcsd = NULL;
1397                 }
1398         } else {
1399                 pcs = NULL;
1400                 pcd = NULL;
1401         }
1402         cuse_unlock();
1403
1404         if (pcsd == NULL)
1405                 return (EINVAL);
1406
1407         pcc = malloc(sizeof(*pcc), M_CUSE, M_WAITOK | M_ZERO);
1408         if (pcc == NULL) {
1409                 /* drop reference on server */
1410                 cuse_server_unref(pcs);
1411                 return (ENOMEM);
1412         }
1413         if (devfs_set_cdevpriv(pcc, &cuse_client_free)) {
1414                 printf("Cuse: Cannot set cdevpriv.\n");
1415                 /* drop reference on server */
1416                 cuse_server_unref(pcs);
1417                 free(pcc, M_CUSE);
1418                 return (ENOMEM);
1419         }
1420         pcc->fflags = fflags;
1421         pcc->server_dev = pcsd;
1422         pcc->server = pcs;
1423
1424         for (n = 0; n != CUSE_CMD_MAX; n++) {
1425
1426                 pccmd = &pcc->cmds[n];
1427
1428                 pccmd->sub.dev = pcd;
1429                 pccmd->sub.command = n;
1430                 pccmd->client = pcc;
1431
1432                 sx_init(&pccmd->sx, "cuse-client-sx");
1433                 cv_init(&pccmd->cv, "cuse-client-cv");
1434         }
1435
1436         cuse_lock();
1437
1438         /* cuse_client_free() assumes that the client is listed somewhere! */
1439         /* always enqueue */
1440
1441         TAILQ_INSERT_TAIL(&pcs->hcli, pcc, entry);
1442
1443         /* check if server is closing */
1444         if ((pcs->is_closing != 0) || (dev->si_drv1 == NULL)) {
1445                 error = EINVAL;
1446         } else {
1447                 error = 0;
1448         }
1449         cuse_unlock();
1450
1451         if (error) {
1452                 devfs_clear_cdevpriv(); /* XXX bugfix */
1453                 return (error);
1454         }
1455         pccmd = &pcc->cmds[CUSE_CMD_OPEN];
1456
1457         cuse_cmd_lock(pccmd);
1458
1459         cuse_lock();
1460         cuse_client_send_command_locked(pccmd, 0, 0, pcc->fflags, 0);
1461
1462         error = cuse_client_receive_command_locked(pccmd, 0, 0);
1463         cuse_unlock();
1464
1465         if (error < 0) {
1466                 error = cuse_convert_error(error);
1467         } else {
1468                 error = 0;
1469         }
1470
1471         cuse_cmd_unlock(pccmd);
1472
1473         if (error)
1474                 devfs_clear_cdevpriv(); /* XXX bugfix */
1475
1476         return (error);
1477 }
1478
1479 static int
1480 cuse_client_close(struct cdev *dev, int fflag, int devtype, struct thread *td)
1481 {
1482         struct cuse_client_command *pccmd;
1483         struct cuse_client *pcc;
1484         int error;
1485
1486         error = cuse_client_get(&pcc);
1487         if (error != 0)
1488                 return (0);
1489
1490         pccmd = &pcc->cmds[CUSE_CMD_CLOSE];
1491
1492         cuse_cmd_lock(pccmd);
1493
1494         cuse_lock();
1495         cuse_client_send_command_locked(pccmd, 0, 0, pcc->fflags, 0);
1496
1497         error = cuse_client_receive_command_locked(pccmd, 0, 0);
1498         cuse_unlock();
1499
1500         cuse_cmd_unlock(pccmd);
1501
1502         cuse_lock();
1503         cuse_client_is_closing(pcc);
1504         cuse_unlock();
1505
1506         return (0);
1507 }
1508
1509 static void
1510 cuse_client_kqfilter_poll(struct cdev *dev, struct cuse_client *pcc)
1511 {
1512         int temp;
1513
1514         cuse_lock();
1515         temp = (pcc->cflags & (CUSE_CLI_KNOTE_HAS_READ |
1516             CUSE_CLI_KNOTE_HAS_WRITE));
1517         pcc->cflags &= ~(CUSE_CLI_KNOTE_NEED_READ |
1518             CUSE_CLI_KNOTE_NEED_WRITE);
1519         cuse_unlock();
1520
1521         if (temp != 0) {
1522                 /* get the latest polling state from the server */
1523                 temp = cuse_client_poll(dev, POLLIN | POLLOUT, NULL);
1524
1525                 if (temp & (POLLIN | POLLOUT)) {
1526                         cuse_lock();
1527                         if (temp & POLLIN)
1528                                 pcc->cflags |= CUSE_CLI_KNOTE_NEED_READ;
1529                         if (temp & POLLOUT)
1530                                 pcc->cflags |= CUSE_CLI_KNOTE_NEED_WRITE;
1531
1532                         /* make sure the "knote" gets woken up */
1533                         cuse_server_wakeup_locked(pcc->server);
1534                         cuse_unlock();
1535                 }
1536         }
1537 }
1538
1539 static int
1540 cuse_client_read(struct cdev *dev, struct uio *uio, int ioflag)
1541 {
1542         struct cuse_client_command *pccmd;
1543         struct cuse_client *pcc;
1544         int error;
1545         int len;
1546
1547         error = cuse_client_get(&pcc);
1548         if (error != 0)
1549                 return (error);
1550
1551         pccmd = &pcc->cmds[CUSE_CMD_READ];
1552
1553         if (uio->uio_segflg != UIO_USERSPACE) {
1554                 return (EINVAL);
1555         }
1556         uio->uio_segflg = UIO_NOCOPY;
1557
1558         cuse_cmd_lock(pccmd);
1559
1560         while (uio->uio_resid != 0) {
1561
1562                 if (uio->uio_iov->iov_len > CUSE_LENGTH_MAX) {
1563                         error = ENOMEM;
1564                         break;
1565                 }
1566                 len = uio->uio_iov->iov_len;
1567
1568                 cuse_lock();
1569                 cuse_client_send_command_locked(pccmd,
1570                     (uintptr_t)uio->uio_iov->iov_base,
1571                     (unsigned long)(unsigned int)len, pcc->fflags, ioflag);
1572
1573                 error = cuse_client_receive_command_locked(pccmd, 0, 0);
1574                 cuse_unlock();
1575
1576                 if (error < 0) {
1577                         error = cuse_convert_error(error);
1578                         break;
1579                 } else if (error == len) {
1580                         error = uiomove(NULL, error, uio);
1581                         if (error)
1582                                 break;
1583                 } else {
1584                         error = uiomove(NULL, error, uio);
1585                         break;
1586                 }
1587         }
1588         cuse_cmd_unlock(pccmd);
1589
1590         uio->uio_segflg = UIO_USERSPACE;/* restore segment flag */
1591
1592         if (error == EWOULDBLOCK)
1593                 cuse_client_kqfilter_poll(dev, pcc);
1594
1595         return (error);
1596 }
1597
1598 static int
1599 cuse_client_write(struct cdev *dev, struct uio *uio, int ioflag)
1600 {
1601         struct cuse_client_command *pccmd;
1602         struct cuse_client *pcc;
1603         int error;
1604         int len;
1605
1606         error = cuse_client_get(&pcc);
1607         if (error != 0)
1608                 return (error);
1609
1610         pccmd = &pcc->cmds[CUSE_CMD_WRITE];
1611
1612         if (uio->uio_segflg != UIO_USERSPACE) {
1613                 return (EINVAL);
1614         }
1615         uio->uio_segflg = UIO_NOCOPY;
1616
1617         cuse_cmd_lock(pccmd);
1618
1619         while (uio->uio_resid != 0) {
1620
1621                 if (uio->uio_iov->iov_len > CUSE_LENGTH_MAX) {
1622                         error = ENOMEM;
1623                         break;
1624                 }
1625                 len = uio->uio_iov->iov_len;
1626
1627                 cuse_lock();
1628                 cuse_client_send_command_locked(pccmd,
1629                     (uintptr_t)uio->uio_iov->iov_base,
1630                     (unsigned long)(unsigned int)len, pcc->fflags, ioflag);
1631
1632                 error = cuse_client_receive_command_locked(pccmd, 0, 0);
1633                 cuse_unlock();
1634
1635                 if (error < 0) {
1636                         error = cuse_convert_error(error);
1637                         break;
1638                 } else if (error == len) {
1639                         error = uiomove(NULL, error, uio);
1640                         if (error)
1641                                 break;
1642                 } else {
1643                         error = uiomove(NULL, error, uio);
1644                         break;
1645                 }
1646         }
1647         cuse_cmd_unlock(pccmd);
1648
1649         uio->uio_segflg = UIO_USERSPACE;/* restore segment flag */
1650
1651         if (error == EWOULDBLOCK)
1652                 cuse_client_kqfilter_poll(dev, pcc);
1653
1654         return (error);
1655 }
1656
1657 int
1658 cuse_client_ioctl(struct cdev *dev, unsigned long cmd,
1659     caddr_t data, int fflag, struct thread *td)
1660 {
1661         struct cuse_client_command *pccmd;
1662         struct cuse_client *pcc;
1663         int error;
1664         int len;
1665
1666         error = cuse_client_get(&pcc);
1667         if (error != 0)
1668                 return (error);
1669
1670         len = IOCPARM_LEN(cmd);
1671         if (len > CUSE_BUFFER_MAX)
1672                 return (ENOMEM);
1673
1674         pccmd = &pcc->cmds[CUSE_CMD_IOCTL];
1675
1676         cuse_cmd_lock(pccmd);
1677
1678         if (cmd & (IOC_IN | IOC_VOID))
1679                 memcpy(pcc->ioctl_buffer, data, len);
1680
1681         /*
1682          * When the ioctl-length is zero drivers can pass information
1683          * through the data pointer of the ioctl. Make sure this information
1684          * is forwarded to the driver.
1685          */
1686
1687         cuse_lock();
1688         cuse_client_send_command_locked(pccmd,
1689             (len == 0) ? *(long *)data : CUSE_BUF_MIN_PTR,
1690             (unsigned long)cmd, pcc->fflags,
1691             (fflag & O_NONBLOCK) ? IO_NDELAY : 0);
1692
1693         error = cuse_client_receive_command_locked(pccmd, data, len);
1694         cuse_unlock();
1695
1696         if (error < 0) {
1697                 error = cuse_convert_error(error);
1698         } else {
1699                 error = 0;
1700         }
1701
1702         if (cmd & IOC_OUT)
1703                 memcpy(data, pcc->ioctl_buffer, len);
1704
1705         cuse_cmd_unlock(pccmd);
1706
1707         if (error == EWOULDBLOCK)
1708                 cuse_client_kqfilter_poll(dev, pcc);
1709
1710         return (error);
1711 }
1712
1713 static int
1714 cuse_client_poll(struct cdev *dev, int events, struct thread *td)
1715 {
1716         struct cuse_client_command *pccmd;
1717         struct cuse_client *pcc;
1718         unsigned long temp;
1719         int error;
1720         int revents;
1721
1722         error = cuse_client_get(&pcc);
1723         if (error != 0)
1724                 goto pollnval;
1725
1726         temp = 0;
1727
1728         if (events & (POLLPRI | POLLIN | POLLRDNORM))
1729                 temp |= CUSE_POLL_READ;
1730
1731         if (events & (POLLOUT | POLLWRNORM))
1732                 temp |= CUSE_POLL_WRITE;
1733
1734         if (events & POLLHUP)
1735                 temp |= CUSE_POLL_ERROR;
1736
1737         pccmd = &pcc->cmds[CUSE_CMD_POLL];
1738
1739         cuse_cmd_lock(pccmd);
1740
1741         /* Need to selrecord() first to not loose any events. */
1742         if (temp != 0 && td != NULL)
1743                 selrecord(td, &pcc->server->selinfo);
1744
1745         cuse_lock();
1746         cuse_client_send_command_locked(pccmd,
1747             0, temp, pcc->fflags, IO_NDELAY);
1748
1749         error = cuse_client_receive_command_locked(pccmd, 0, 0);
1750         cuse_unlock();
1751
1752         cuse_cmd_unlock(pccmd);
1753
1754         if (error < 0) {
1755                 goto pollnval;
1756         } else {
1757                 revents = 0;
1758                 if (error & CUSE_POLL_READ)
1759                         revents |= (events & (POLLPRI | POLLIN | POLLRDNORM));
1760                 if (error & CUSE_POLL_WRITE)
1761                         revents |= (events & (POLLOUT | POLLWRNORM));
1762                 if (error & CUSE_POLL_ERROR)
1763                         revents |= (events & POLLHUP);
1764         }
1765         return (revents);
1766
1767 pollnval:
1768         /* XXX many clients don't understand POLLNVAL */
1769         return (events & (POLLHUP | POLLPRI | POLLIN |
1770             POLLRDNORM | POLLOUT | POLLWRNORM));
1771 }
1772
1773 static int
1774 cuse_client_mmap_single(struct cdev *dev, vm_ooffset_t *offset,
1775     vm_size_t size, struct vm_object **object, int nprot)
1776 {
1777         uint32_t page_nr = *offset / PAGE_SIZE;
1778         uint32_t alloc_nr = page_nr / CUSE_ALLOC_PAGES_MAX;
1779         struct cuse_memory *mem;
1780         struct cuse_client *pcc;
1781         int error;
1782
1783         error = cuse_client_get(&pcc);
1784         if (error != 0)
1785                 return (error);
1786
1787         cuse_lock();
1788         /* lookup memory structure */
1789         TAILQ_FOREACH(mem, &pcc->server->hmem, entry) {
1790                 if (mem->alloc_nr == alloc_nr)
1791                         break;
1792         }
1793         if (mem == NULL) {
1794                 cuse_unlock();
1795                 return (ENOMEM);
1796         }
1797         /* verify page offset */
1798         page_nr %= CUSE_ALLOC_PAGES_MAX;
1799         if (page_nr >= mem->page_count) {
1800                 cuse_unlock();
1801                 return (ENXIO);
1802         }
1803         /* verify mmap size */
1804         if ((size % PAGE_SIZE) != 0 || (size < PAGE_SIZE) ||
1805             (size > ((mem->page_count - page_nr) * PAGE_SIZE))) {
1806                 cuse_unlock();
1807                 return (EINVAL);
1808         }
1809         vm_object_reference(mem->object);
1810         *object = mem->object;
1811         cuse_unlock();
1812
1813         /* set new VM object offset to use */
1814         *offset = page_nr * PAGE_SIZE;
1815
1816         /* success */
1817         return (0);
1818 }
1819
1820 static void
1821 cuse_client_kqfilter_read_detach(struct knote *kn)
1822 {
1823         struct cuse_client *pcc;
1824
1825         cuse_lock();
1826         pcc = kn->kn_hook;
1827         knlist_remove(&pcc->server->selinfo.si_note, kn, 1);
1828         cuse_unlock();
1829 }
1830
1831 static void
1832 cuse_client_kqfilter_write_detach(struct knote *kn)
1833 {
1834         struct cuse_client *pcc;
1835
1836         cuse_lock();
1837         pcc = kn->kn_hook;
1838         knlist_remove(&pcc->server->selinfo.si_note, kn, 1);
1839         cuse_unlock();
1840 }
1841
1842 static int
1843 cuse_client_kqfilter_read_event(struct knote *kn, long hint)
1844 {
1845         struct cuse_client *pcc;
1846
1847         mtx_assert(&cuse_mtx, MA_OWNED);
1848
1849         pcc = kn->kn_hook;
1850         return ((pcc->cflags & CUSE_CLI_KNOTE_NEED_READ) ? 1 : 0);
1851 }
1852
1853 static int
1854 cuse_client_kqfilter_write_event(struct knote *kn, long hint)
1855 {
1856         struct cuse_client *pcc;
1857
1858         mtx_assert(&cuse_mtx, MA_OWNED);
1859
1860         pcc = kn->kn_hook;
1861         return ((pcc->cflags & CUSE_CLI_KNOTE_NEED_WRITE) ? 1 : 0);
1862 }
1863
1864 static int
1865 cuse_client_kqfilter(struct cdev *dev, struct knote *kn)
1866 {
1867         struct cuse_client *pcc;
1868         struct cuse_server *pcs;
1869         int error;
1870
1871         error = cuse_client_get(&pcc);
1872         if (error != 0)
1873                 return (error);
1874
1875         cuse_lock();
1876         pcs = pcc->server;
1877         switch (kn->kn_filter) {
1878         case EVFILT_READ:
1879                 pcc->cflags |= CUSE_CLI_KNOTE_HAS_READ;
1880                 kn->kn_hook = pcc;
1881                 kn->kn_fop = &cuse_client_kqfilter_read_ops;
1882                 knlist_add(&pcs->selinfo.si_note, kn, 1);
1883                 break;
1884         case EVFILT_WRITE:
1885                 pcc->cflags |= CUSE_CLI_KNOTE_HAS_WRITE;
1886                 kn->kn_hook = pcc;
1887                 kn->kn_fop = &cuse_client_kqfilter_write_ops;
1888                 knlist_add(&pcs->selinfo.si_note, kn, 1);
1889                 break;
1890         default:
1891                 error = EINVAL;
1892                 break;
1893         }
1894         cuse_unlock();
1895
1896         if (error == 0)
1897                 cuse_client_kqfilter_poll(dev, pcc);
1898         return (error);
1899 }