]> CyberLeo.Net >> Repos - FreeBSD/FreeBSD.git/blob - sys/fs/cuse/cuse.c
Merge clang trunk r366426, resolve conflicts, and update FREEBSD-Xlist.
[FreeBSD/FreeBSD.git] / sys / fs / cuse / cuse.c
1 /* $FreeBSD$ */
2 /*-
3  * Copyright (c) 2010-2017 Hans Petter Selasky. All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  *    notice, this list of conditions and the following disclaimer in the
12  *    documentation and/or other materials provided with the distribution.
13  *
14  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
15  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24  * SUCH DAMAGE.
25  */
26
27 #include <sys/stdint.h>
28 #include <sys/stddef.h>
29 #include <sys/param.h>
30 #include <sys/types.h>
31 #include <sys/systm.h>
32 #include <sys/conf.h>
33 #include <sys/kernel.h>
34 #include <sys/bus.h>
35 #include <sys/linker_set.h>
36 #include <sys/module.h>
37 #include <sys/lock.h>
38 #include <sys/mutex.h>
39 #include <sys/condvar.h>
40 #include <sys/sysctl.h>
41 #include <sys/unistd.h>
42 #include <sys/malloc.h>
43 #include <sys/priv.h>
44 #include <sys/uio.h>
45 #include <sys/poll.h>
46 #include <sys/sx.h>
47 #include <sys/rwlock.h>
48 #include <sys/queue.h>
49 #include <sys/fcntl.h>
50 #include <sys/proc.h>
51 #include <sys/vnode.h>
52 #include <sys/selinfo.h>
53 #include <sys/ptrace.h>
54 #include <sys/sysent.h>
55
56 #include <machine/bus.h>
57
58 #include <vm/vm.h>
59 #include <vm/pmap.h>
60 #include <vm/vm_object.h>
61 #include <vm/vm_page.h>
62 #include <vm/vm_pager.h>
63
64 #include <fs/cuse/cuse_defs.h>
65 #include <fs/cuse/cuse_ioctl.h>
66
67 MODULE_VERSION(cuse, 1);
68
69 /*
70  * Prevent cuse4bsd.ko and cuse.ko from loading at the same time by
71  * declaring support for the cuse4bsd interface in cuse.ko:
72  */
73 MODULE_VERSION(cuse4bsd, 1);
74
75 #ifdef FEATURE
76 FEATURE(cuse, "Userspace character devices");
77 #endif
78
79 struct cuse_command;
80 struct cuse_server;
81 struct cuse_client;
82
83 struct cuse_client_command {
84         TAILQ_ENTRY(cuse_client_command) entry;
85         struct cuse_command sub;
86         struct sx sx;
87         struct cv cv;
88         struct thread *entered;
89         struct cuse_client *client;
90         struct proc *proc_curr;
91         int     proc_refs;
92         int     got_signal;
93         int     error;
94         int     command;
95 };
96
97 struct cuse_memory {
98         TAILQ_ENTRY(cuse_memory) entry;
99         vm_object_t object;
100         uint32_t page_count;
101         uint32_t alloc_nr;
102 };
103
104 struct cuse_server_dev {
105         TAILQ_ENTRY(cuse_server_dev) entry;
106         struct cuse_server *server;
107         struct cdev *kern_dev;
108         struct cuse_dev *user_dev;
109 };
110
111 struct cuse_server {
112         TAILQ_ENTRY(cuse_server) entry;
113         TAILQ_HEAD(, cuse_client_command) head;
114         TAILQ_HEAD(, cuse_server_dev) hdev;
115         TAILQ_HEAD(, cuse_client) hcli;
116         TAILQ_HEAD(, cuse_memory) hmem;
117         struct cv cv;
118         struct selinfo selinfo;
119         pid_t   pid;
120         int     is_closing;
121         int     refs;
122 };
123
124 struct cuse_client {
125         TAILQ_ENTRY(cuse_client) entry;
126         TAILQ_ENTRY(cuse_client) entry_ref;
127         struct cuse_client_command cmds[CUSE_CMD_MAX];
128         struct cuse_server *server;
129         struct cuse_server_dev *server_dev;
130
131         uint8_t ioctl_buffer[CUSE_BUFFER_MAX] __aligned(4);
132
133         int     fflags;                 /* file flags */
134         int     cflags;                 /* client flags */
135 #define CUSE_CLI_IS_CLOSING 0x01
136 #define CUSE_CLI_KNOTE_NEED_READ 0x02
137 #define CUSE_CLI_KNOTE_NEED_WRITE 0x04
138 #define CUSE_CLI_KNOTE_HAS_READ 0x08
139 #define CUSE_CLI_KNOTE_HAS_WRITE 0x10
140 };
141
142 #define CUSE_CLIENT_CLOSING(pcc) \
143     ((pcc)->cflags & CUSE_CLI_IS_CLOSING)
144
145 static  MALLOC_DEFINE(M_CUSE, "cuse", "CUSE memory");
146
147 static TAILQ_HEAD(, cuse_server) cuse_server_head;
148 static struct mtx cuse_mtx;
149 static struct cdev *cuse_dev;
150 static struct cuse_server *cuse_alloc_unit[CUSE_DEVICES_MAX];
151 static int cuse_alloc_unit_id[CUSE_DEVICES_MAX];
152
153 static void cuse_server_wakeup_all_client_locked(struct cuse_server *pcs);
154 static void cuse_client_kqfilter_read_detach(struct knote *kn);
155 static void cuse_client_kqfilter_write_detach(struct knote *kn);
156 static int cuse_client_kqfilter_read_event(struct knote *kn, long hint);
157 static int cuse_client_kqfilter_write_event(struct knote *kn, long hint);
158
159 static struct filterops cuse_client_kqfilter_read_ops = {
160         .f_isfd = 1,
161         .f_detach = cuse_client_kqfilter_read_detach,
162         .f_event = cuse_client_kqfilter_read_event,
163 };
164
165 static struct filterops cuse_client_kqfilter_write_ops = {
166         .f_isfd = 1,
167         .f_detach = cuse_client_kqfilter_write_detach,
168         .f_event = cuse_client_kqfilter_write_event,
169 };
170
171 static d_open_t cuse_client_open;
172 static d_close_t cuse_client_close;
173 static d_ioctl_t cuse_client_ioctl;
174 static d_read_t cuse_client_read;
175 static d_write_t cuse_client_write;
176 static d_poll_t cuse_client_poll;
177 static d_mmap_single_t cuse_client_mmap_single;
178 static d_kqfilter_t cuse_client_kqfilter;
179
180 static struct cdevsw cuse_client_devsw = {
181         .d_version = D_VERSION,
182         .d_open = cuse_client_open,
183         .d_close = cuse_client_close,
184         .d_ioctl = cuse_client_ioctl,
185         .d_name = "cuse_client",
186         .d_flags = D_TRACKCLOSE,
187         .d_read = cuse_client_read,
188         .d_write = cuse_client_write,
189         .d_poll = cuse_client_poll,
190         .d_mmap_single = cuse_client_mmap_single,
191         .d_kqfilter = cuse_client_kqfilter,
192 };
193
194 static d_open_t cuse_server_open;
195 static d_close_t cuse_server_close;
196 static d_ioctl_t cuse_server_ioctl;
197 static d_read_t cuse_server_read;
198 static d_write_t cuse_server_write;
199 static d_poll_t cuse_server_poll;
200 static d_mmap_single_t cuse_server_mmap_single;
201
202 static struct cdevsw cuse_server_devsw = {
203         .d_version = D_VERSION,
204         .d_open = cuse_server_open,
205         .d_close = cuse_server_close,
206         .d_ioctl = cuse_server_ioctl,
207         .d_name = "cuse_server",
208         .d_flags = D_TRACKCLOSE,
209         .d_read = cuse_server_read,
210         .d_write = cuse_server_write,
211         .d_poll = cuse_server_poll,
212         .d_mmap_single = cuse_server_mmap_single,
213 };
214
215 static void cuse_client_is_closing(struct cuse_client *);
216 static int cuse_free_unit_by_id_locked(struct cuse_server *, int);
217
218 static void
219 cuse_lock(void)
220 {
221         mtx_lock(&cuse_mtx);
222 }
223
224 static void
225 cuse_unlock(void)
226 {
227         mtx_unlock(&cuse_mtx);
228 }
229
230 static void
231 cuse_cmd_lock(struct cuse_client_command *pccmd)
232 {
233         sx_xlock(&pccmd->sx);
234 }
235
236 static void
237 cuse_cmd_unlock(struct cuse_client_command *pccmd)
238 {
239         sx_xunlock(&pccmd->sx);
240 }
241
242 static void
243 cuse_kern_init(void *arg)
244 {
245         TAILQ_INIT(&cuse_server_head);
246
247         mtx_init(&cuse_mtx, "cuse-mtx", NULL, MTX_DEF);
248
249         cuse_dev = make_dev(&cuse_server_devsw, 0,
250             UID_ROOT, GID_OPERATOR, 0600, "cuse");
251
252         printf("Cuse v%d.%d.%d @ /dev/cuse\n",
253             (CUSE_VERSION >> 16) & 0xFF, (CUSE_VERSION >> 8) & 0xFF,
254             (CUSE_VERSION >> 0) & 0xFF);
255 }
256 SYSINIT(cuse_kern_init, SI_SUB_DEVFS, SI_ORDER_ANY, cuse_kern_init, NULL);
257
258 static void
259 cuse_kern_uninit(void *arg)
260 {
261         void *ptr;
262
263         while (1) {
264
265                 printf("Cuse: Please exit all /dev/cuse instances "
266                     "and processes which have used this device.\n");
267
268                 pause("DRAIN", 2 * hz);
269
270                 cuse_lock();
271                 ptr = TAILQ_FIRST(&cuse_server_head);
272                 cuse_unlock();
273
274                 if (ptr == NULL)
275                         break;
276         }
277
278         if (cuse_dev != NULL)
279                 destroy_dev(cuse_dev);
280
281         mtx_destroy(&cuse_mtx);
282 }
283 SYSUNINIT(cuse_kern_uninit, SI_SUB_DEVFS, SI_ORDER_ANY, cuse_kern_uninit, 0);
284
285 static int
286 cuse_server_get(struct cuse_server **ppcs)
287 {
288         struct cuse_server *pcs;
289         int error;
290
291         error = devfs_get_cdevpriv((void **)&pcs);
292         if (error != 0) {
293                 *ppcs = NULL;
294                 return (error);
295         }
296         /* check if closing */
297         cuse_lock();
298         if (pcs->is_closing) {
299                 cuse_unlock();
300                 *ppcs = NULL;
301                 return (EINVAL);
302         }
303         cuse_unlock();
304         *ppcs = pcs;
305         return (0);
306 }
307
308 static void
309 cuse_server_is_closing(struct cuse_server *pcs)
310 {
311         struct cuse_client *pcc;
312
313         if (pcs->is_closing)
314                 return;
315
316         pcs->is_closing = 1;
317
318         TAILQ_FOREACH(pcc, &pcs->hcli, entry) {
319                 cuse_client_is_closing(pcc);
320         }
321 }
322
323 static struct cuse_client_command *
324 cuse_server_find_command(struct cuse_server *pcs, struct thread *td)
325 {
326         struct cuse_client *pcc;
327         int n;
328
329         if (pcs->is_closing)
330                 goto done;
331
332         TAILQ_FOREACH(pcc, &pcs->hcli, entry) {
333                 if (CUSE_CLIENT_CLOSING(pcc))
334                         continue;
335                 for (n = 0; n != CUSE_CMD_MAX; n++) {
336                         if (pcc->cmds[n].entered == td)
337                                 return (&pcc->cmds[n]);
338                 }
339         }
340 done:
341         return (NULL);
342 }
343
344 static void
345 cuse_str_filter(char *ptr)
346 {
347         int c;
348
349         while (((c = *ptr) != 0)) {
350
351                 if ((c >= 'a') && (c <= 'z')) {
352                         ptr++;
353                         continue;
354                 }
355                 if ((c >= 'A') && (c <= 'Z')) {
356                         ptr++;
357                         continue;
358                 }
359                 if ((c >= '0') && (c <= '9')) {
360                         ptr++;
361                         continue;
362                 }
363                 if ((c == '.') || (c == '_') || (c == '/')) {
364                         ptr++;
365                         continue;
366                 }
367                 *ptr = '_';
368
369                 ptr++;
370         }
371 }
372
373 static int
374 cuse_convert_error(int error)
375 {
376         ;                               /* indent fix */
377         switch (error) {
378         case CUSE_ERR_NONE:
379                 return (0);
380         case CUSE_ERR_BUSY:
381                 return (EBUSY);
382         case CUSE_ERR_WOULDBLOCK:
383                 return (EWOULDBLOCK);
384         case CUSE_ERR_INVALID:
385                 return (EINVAL);
386         case CUSE_ERR_NO_MEMORY:
387                 return (ENOMEM);
388         case CUSE_ERR_FAULT:
389                 return (EFAULT);
390         case CUSE_ERR_SIGNAL:
391                 return (EINTR);
392         case CUSE_ERR_NO_DEVICE:
393                 return (ENODEV);
394         default:
395                 return (ENXIO);
396         }
397 }
398
399 static void
400 cuse_vm_memory_free(struct cuse_memory *mem)
401 {
402         /* last user is gone - free */
403         vm_object_deallocate(mem->object);
404
405         /* free CUSE memory */
406         free(mem, M_CUSE);
407 }
408
409 static int
410 cuse_server_alloc_memory(struct cuse_server *pcs, uint32_t alloc_nr,
411     uint32_t page_count)
412 {
413         struct cuse_memory *temp;
414         struct cuse_memory *mem;
415         vm_object_t object;
416         int error;
417
418         mem = malloc(sizeof(*mem), M_CUSE, M_WAITOK | M_ZERO);
419         if (mem == NULL)
420                 return (ENOMEM);
421
422         object = vm_pager_allocate(OBJT_SWAP, NULL, PAGE_SIZE * page_count,
423             VM_PROT_DEFAULT, 0, curthread->td_ucred);
424         if (object == NULL) {
425                 error = ENOMEM;
426                 goto error_0;
427         }
428
429         cuse_lock();
430         /* check if allocation number already exists */
431         TAILQ_FOREACH(temp, &pcs->hmem, entry) {
432                 if (temp->alloc_nr == alloc_nr)
433                         break;
434         }
435         if (temp != NULL) {
436                 cuse_unlock();
437                 error = EBUSY;
438                 goto error_1;
439         }
440         mem->object = object;
441         mem->page_count = page_count;
442         mem->alloc_nr = alloc_nr;
443         TAILQ_INSERT_TAIL(&pcs->hmem, mem, entry);
444         cuse_unlock();
445
446         return (0);
447
448 error_1:
449         vm_object_deallocate(object);
450 error_0:
451         free(mem, M_CUSE);
452         return (error);
453 }
454
455 static int
456 cuse_server_free_memory(struct cuse_server *pcs, uint32_t alloc_nr)
457 {
458         struct cuse_memory *mem;
459
460         cuse_lock();
461         TAILQ_FOREACH(mem, &pcs->hmem, entry) {
462                 if (mem->alloc_nr == alloc_nr)
463                         break;
464         }
465         if (mem == NULL) {
466                 cuse_unlock();
467                 return (EINVAL);
468         }
469         TAILQ_REMOVE(&pcs->hmem, mem, entry);
470         cuse_unlock();
471
472         cuse_vm_memory_free(mem);
473
474         return (0);
475 }
476
477 static int
478 cuse_client_get(struct cuse_client **ppcc)
479 {
480         struct cuse_client *pcc;
481         int error;
482
483         /* try to get private data */
484         error = devfs_get_cdevpriv((void **)&pcc);
485         if (error != 0) {
486                 *ppcc = NULL;
487                 return (error);
488         }
489         /* check if closing */
490         cuse_lock();
491         if (CUSE_CLIENT_CLOSING(pcc) || pcc->server->is_closing) {
492                 cuse_unlock();
493                 *ppcc = NULL;
494                 return (EINVAL);
495         }
496         cuse_unlock();
497         *ppcc = pcc;
498         return (0);
499 }
500
501 static void
502 cuse_client_is_closing(struct cuse_client *pcc)
503 {
504         struct cuse_client_command *pccmd;
505         uint32_t n;
506
507         if (CUSE_CLIENT_CLOSING(pcc))
508                 return;
509
510         pcc->cflags |= CUSE_CLI_IS_CLOSING;
511         pcc->server_dev = NULL;
512
513         for (n = 0; n != CUSE_CMD_MAX; n++) {
514
515                 pccmd = &pcc->cmds[n];
516
517                 if (pccmd->entry.tqe_prev != NULL) {
518                         TAILQ_REMOVE(&pcc->server->head, pccmd, entry);
519                         pccmd->entry.tqe_prev = NULL;
520                 }
521                 cv_broadcast(&pccmd->cv);
522         }
523 }
524
525 static void
526 cuse_client_send_command_locked(struct cuse_client_command *pccmd,
527     uintptr_t data_ptr, unsigned long arg, int fflags, int ioflag)
528 {
529         unsigned long cuse_fflags = 0;
530         struct cuse_server *pcs;
531
532         if (fflags & FREAD)
533                 cuse_fflags |= CUSE_FFLAG_READ;
534
535         if (fflags & FWRITE)
536                 cuse_fflags |= CUSE_FFLAG_WRITE;
537
538         if (ioflag & IO_NDELAY)
539                 cuse_fflags |= CUSE_FFLAG_NONBLOCK;
540 #if defined(__LP64__)
541         if (SV_CURPROC_FLAG(SV_ILP32))
542                 cuse_fflags |= CUSE_FFLAG_COMPAT32;
543 #endif
544         pccmd->sub.fflags = cuse_fflags;
545         pccmd->sub.data_pointer = data_ptr;
546         pccmd->sub.argument = arg;
547
548         pcs = pccmd->client->server;
549
550         if ((pccmd->entry.tqe_prev == NULL) &&
551             (CUSE_CLIENT_CLOSING(pccmd->client) == 0) &&
552             (pcs->is_closing == 0)) {
553                 TAILQ_INSERT_TAIL(&pcs->head, pccmd, entry);
554                 cv_signal(&pcs->cv);
555         }
556 }
557
558 static void
559 cuse_client_got_signal(struct cuse_client_command *pccmd)
560 {
561         struct cuse_server *pcs;
562
563         pccmd->got_signal = 1;
564
565         pccmd = &pccmd->client->cmds[CUSE_CMD_SIGNAL];
566
567         pcs = pccmd->client->server;
568
569         if ((pccmd->entry.tqe_prev == NULL) &&
570             (CUSE_CLIENT_CLOSING(pccmd->client) == 0) &&
571             (pcs->is_closing == 0)) {
572                 TAILQ_INSERT_TAIL(&pcs->head, pccmd, entry);
573                 cv_signal(&pcs->cv);
574         }
575 }
576
577 static int
578 cuse_client_receive_command_locked(struct cuse_client_command *pccmd,
579     uint8_t *arg_ptr, uint32_t arg_len)
580 {
581         int error;
582
583         error = 0;
584
585         pccmd->proc_curr = curthread->td_proc;
586
587         if (CUSE_CLIENT_CLOSING(pccmd->client) ||
588             pccmd->client->server->is_closing) {
589                 error = CUSE_ERR_OTHER;
590                 goto done;
591         }
592         while (pccmd->command == CUSE_CMD_NONE) {
593                 if (error != 0) {
594                         cv_wait(&pccmd->cv, &cuse_mtx);
595                 } else {
596                         error = cv_wait_sig(&pccmd->cv, &cuse_mtx);
597
598                         if (error != 0)
599                                 cuse_client_got_signal(pccmd);
600                 }
601                 if (CUSE_CLIENT_CLOSING(pccmd->client) ||
602                     pccmd->client->server->is_closing) {
603                         error = CUSE_ERR_OTHER;
604                         goto done;
605                 }
606         }
607
608         error = pccmd->error;
609         pccmd->command = CUSE_CMD_NONE;
610         cv_signal(&pccmd->cv);
611
612 done:
613
614         /* wait until all process references are gone */
615
616         pccmd->proc_curr = NULL;
617
618         while (pccmd->proc_refs != 0)
619                 cv_wait(&pccmd->cv, &cuse_mtx);
620
621         return (error);
622 }
623
624 /*------------------------------------------------------------------------*
625  *      CUSE SERVER PART
626  *------------------------------------------------------------------------*/
627
628 static void
629 cuse_server_free_dev(struct cuse_server_dev *pcsd)
630 {
631         struct cuse_server *pcs;
632         struct cuse_client *pcc;
633
634         /* get server pointer */
635         pcs = pcsd->server;
636
637         /* prevent creation of more devices */
638         cuse_lock();
639         if (pcsd->kern_dev != NULL)
640                 pcsd->kern_dev->si_drv1 = NULL;
641
642         TAILQ_FOREACH(pcc, &pcs->hcli, entry) {
643                 if (pcc->server_dev == pcsd)
644                         cuse_client_is_closing(pcc);
645         }
646         cuse_unlock();
647
648         /* destroy device, if any */
649         if (pcsd->kern_dev != NULL) {
650                 /* destroy device synchronously */
651                 destroy_dev(pcsd->kern_dev);
652         }
653         free(pcsd, M_CUSE);
654 }
655
656 static void
657 cuse_server_unref(struct cuse_server *pcs)
658 {
659         struct cuse_server_dev *pcsd;
660         struct cuse_memory *mem;
661
662         cuse_lock();
663         pcs->refs--;
664         if (pcs->refs != 0) {
665                 cuse_unlock();
666                 return;
667         }
668         cuse_server_is_closing(pcs);
669         /* final client wakeup, if any */
670         cuse_server_wakeup_all_client_locked(pcs);
671
672         TAILQ_REMOVE(&cuse_server_head, pcs, entry);
673
674         while ((pcsd = TAILQ_FIRST(&pcs->hdev)) != NULL) {
675                 TAILQ_REMOVE(&pcs->hdev, pcsd, entry);
676                 cuse_unlock();
677                 cuse_server_free_dev(pcsd);
678                 cuse_lock();
679         }
680
681         cuse_free_unit_by_id_locked(pcs, -1);
682
683         while ((mem = TAILQ_FIRST(&pcs->hmem)) != NULL) {
684                 TAILQ_REMOVE(&pcs->hmem, mem, entry);
685                 cuse_unlock();
686                 cuse_vm_memory_free(mem);
687                 cuse_lock();
688         }
689
690         knlist_clear(&pcs->selinfo.si_note, 1);
691         knlist_destroy(&pcs->selinfo.si_note);
692
693         cuse_unlock();
694
695         seldrain(&pcs->selinfo);
696
697         cv_destroy(&pcs->cv);
698
699         free(pcs, M_CUSE);
700 }
701
702 static int
703 cuse_server_do_close(struct cuse_server *pcs)
704 {
705         int retval;
706
707         cuse_lock();
708         cuse_server_is_closing(pcs);
709         /* final client wakeup, if any */
710         cuse_server_wakeup_all_client_locked(pcs);
711
712         knlist_clear(&pcs->selinfo.si_note, 1);
713
714         retval = pcs->refs;
715         cuse_unlock();
716
717         return (retval);
718 }
719
720 static void
721 cuse_server_free(void *arg)
722 {
723         struct cuse_server *pcs = arg;
724
725         /*
726          * The final server unref should be done by the server thread
727          * to prevent deadlock in the client cdevpriv destructor,
728          * which cannot destroy itself.
729          */
730         while (cuse_server_do_close(pcs) != 1)
731                 pause("W", hz);
732
733         /* drop final refcount */
734         cuse_server_unref(pcs);
735 }
736
737 static int
738 cuse_server_open(struct cdev *dev, int fflags, int devtype, struct thread *td)
739 {
740         struct cuse_server *pcs;
741
742         pcs = malloc(sizeof(*pcs), M_CUSE, M_WAITOK | M_ZERO);
743         if (pcs == NULL)
744                 return (ENOMEM);
745
746         if (devfs_set_cdevpriv(pcs, &cuse_server_free)) {
747                 printf("Cuse: Cannot set cdevpriv.\n");
748                 free(pcs, M_CUSE);
749                 return (ENOMEM);
750         }
751         /* store current process ID */
752         pcs->pid = curproc->p_pid;
753
754         TAILQ_INIT(&pcs->head);
755         TAILQ_INIT(&pcs->hdev);
756         TAILQ_INIT(&pcs->hcli);
757         TAILQ_INIT(&pcs->hmem);
758
759         cv_init(&pcs->cv, "cuse-server-cv");
760
761         knlist_init_mtx(&pcs->selinfo.si_note, &cuse_mtx);
762
763         cuse_lock();
764         pcs->refs++;
765         TAILQ_INSERT_TAIL(&cuse_server_head, pcs, entry);
766         cuse_unlock();
767
768         return (0);
769 }
770
771 static int
772 cuse_server_close(struct cdev *dev, int fflag, int devtype, struct thread *td)
773 {
774         struct cuse_server *pcs;
775
776         if (cuse_server_get(&pcs) == 0)
777                 cuse_server_do_close(pcs);
778
779         return (0);
780 }
781
782 static int
783 cuse_server_read(struct cdev *dev, struct uio *uio, int ioflag)
784 {
785         return (ENXIO);
786 }
787
788 static int
789 cuse_server_write(struct cdev *dev, struct uio *uio, int ioflag)
790 {
791         return (ENXIO);
792 }
793
794 static int
795 cuse_server_ioctl_copy_locked(struct cuse_client_command *pccmd,
796     struct cuse_data_chunk *pchk, int isread)
797 {
798         struct proc *p_proc;
799         uint32_t offset;
800         int error;
801
802         offset = pchk->peer_ptr - CUSE_BUF_MIN_PTR;
803
804         if (pchk->length > CUSE_BUFFER_MAX)
805                 return (EFAULT);
806
807         if (offset >= CUSE_BUFFER_MAX)
808                 return (EFAULT);
809
810         if ((offset + pchk->length) > CUSE_BUFFER_MAX)
811                 return (EFAULT);
812
813         p_proc = pccmd->proc_curr;
814         if (p_proc == NULL)
815                 return (ENXIO);
816
817         if (pccmd->proc_refs < 0)
818                 return (ENOMEM);
819
820         pccmd->proc_refs++;
821
822         cuse_unlock();
823
824         if (isread == 0) {
825                 error = copyin(
826                     (void *)pchk->local_ptr,
827                     pccmd->client->ioctl_buffer + offset,
828                     pchk->length);
829         } else {
830                 error = copyout(
831                     pccmd->client->ioctl_buffer + offset,
832                     (void *)pchk->local_ptr,
833                     pchk->length);
834         }
835
836         cuse_lock();
837
838         pccmd->proc_refs--;
839
840         if (pccmd->proc_curr == NULL)
841                 cv_signal(&pccmd->cv);
842
843         return (error);
844 }
845
846 static int
847 cuse_proc2proc_copy(struct proc *proc_s, vm_offset_t data_s,
848     struct proc *proc_d, vm_offset_t data_d, size_t len)
849 {
850         struct thread *td;
851         struct proc *proc_cur;
852         int error;
853
854         td = curthread;
855         proc_cur = td->td_proc;
856
857         if (proc_cur == proc_d) {
858                 struct iovec iov = {
859                         .iov_base = (caddr_t)data_d,
860                         .iov_len = len,
861                 };
862                 struct uio uio = {
863                         .uio_iov = &iov,
864                         .uio_iovcnt = 1,
865                         .uio_offset = (off_t)data_s,
866                         .uio_resid = len,
867                         .uio_segflg = UIO_USERSPACE,
868                         .uio_rw = UIO_READ,
869                         .uio_td = td,
870                 };
871
872                 PHOLD(proc_s);
873                 error = proc_rwmem(proc_s, &uio);
874                 PRELE(proc_s);
875
876         } else if (proc_cur == proc_s) {
877                 struct iovec iov = {
878                         .iov_base = (caddr_t)data_s,
879                         .iov_len = len,
880                 };
881                 struct uio uio = {
882                         .uio_iov = &iov,
883                         .uio_iovcnt = 1,
884                         .uio_offset = (off_t)data_d,
885                         .uio_resid = len,
886                         .uio_segflg = UIO_USERSPACE,
887                         .uio_rw = UIO_WRITE,
888                         .uio_td = td,
889                 };
890
891                 PHOLD(proc_d);
892                 error = proc_rwmem(proc_d, &uio);
893                 PRELE(proc_d);
894         } else {
895                 error = EINVAL;
896         }
897         return (error);
898 }
899
900 static int
901 cuse_server_data_copy_locked(struct cuse_client_command *pccmd,
902     struct cuse_data_chunk *pchk, int isread)
903 {
904         struct proc *p_proc;
905         int error;
906
907         p_proc = pccmd->proc_curr;
908         if (p_proc == NULL)
909                 return (ENXIO);
910
911         if (pccmd->proc_refs < 0)
912                 return (ENOMEM);
913
914         pccmd->proc_refs++;
915
916         cuse_unlock();
917
918         if (isread == 0) {
919                 error = cuse_proc2proc_copy(
920                     curthread->td_proc, pchk->local_ptr,
921                     p_proc, pchk->peer_ptr,
922                     pchk->length);
923         } else {
924                 error = cuse_proc2proc_copy(
925                     p_proc, pchk->peer_ptr,
926                     curthread->td_proc, pchk->local_ptr,
927                     pchk->length);
928         }
929
930         cuse_lock();
931
932         pccmd->proc_refs--;
933
934         if (pccmd->proc_curr == NULL)
935                 cv_signal(&pccmd->cv);
936
937         return (error);
938 }
939
940 static int
941 cuse_alloc_unit_by_id_locked(struct cuse_server *pcs, int id)
942 {
943         int n;
944         int x = 0;
945         int match;
946
947         do {
948                 for (match = n = 0; n != CUSE_DEVICES_MAX; n++) {
949                         if (cuse_alloc_unit[n] != NULL) {
950                                 if ((cuse_alloc_unit_id[n] ^ id) & CUSE_ID_MASK)
951                                         continue;
952                                 if ((cuse_alloc_unit_id[n] & ~CUSE_ID_MASK) == x) {
953                                         x++;
954                                         match = 1;
955                                 }
956                         }
957                 }
958         } while (match);
959
960         if (x < 256) {
961                 for (n = 0; n != CUSE_DEVICES_MAX; n++) {
962                         if (cuse_alloc_unit[n] == NULL) {
963                                 cuse_alloc_unit[n] = pcs;
964                                 cuse_alloc_unit_id[n] = id | x;
965                                 return (x);
966                         }
967                 }
968         }
969         return (-1);
970 }
971
972 static void
973 cuse_server_wakeup_locked(struct cuse_server *pcs)
974 {
975         selwakeup(&pcs->selinfo);
976         KNOTE_LOCKED(&pcs->selinfo.si_note, 0);
977 }
978
979 static void
980 cuse_server_wakeup_all_client_locked(struct cuse_server *pcs)
981 {
982         struct cuse_client *pcc;
983
984         TAILQ_FOREACH(pcc, &pcs->hcli, entry) {
985                 pcc->cflags |= (CUSE_CLI_KNOTE_NEED_READ |
986                     CUSE_CLI_KNOTE_NEED_WRITE);
987         }
988         cuse_server_wakeup_locked(pcs);
989 }
990
991 static int
992 cuse_free_unit_by_id_locked(struct cuse_server *pcs, int id)
993 {
994         int n;
995         int found = 0;
996
997         for (n = 0; n != CUSE_DEVICES_MAX; n++) {
998                 if (cuse_alloc_unit[n] == pcs) {
999                         if (cuse_alloc_unit_id[n] == id || id == -1) {
1000                                 cuse_alloc_unit[n] = NULL;
1001                                 cuse_alloc_unit_id[n] = 0;
1002                                 found = 1;
1003                         }
1004                 }
1005         }
1006
1007         return (found ? 0 : EINVAL);
1008 }
1009
1010 static int
1011 cuse_server_ioctl(struct cdev *dev, unsigned long cmd,
1012     caddr_t data, int fflag, struct thread *td)
1013 {
1014         struct cuse_server *pcs;
1015         int error;
1016
1017         error = cuse_server_get(&pcs);
1018         if (error != 0)
1019                 return (error);
1020
1021         switch (cmd) {
1022                 struct cuse_client_command *pccmd;
1023                 struct cuse_client *pcc;
1024                 struct cuse_command *pcmd;
1025                 struct cuse_alloc_info *pai;
1026                 struct cuse_create_dev *pcd;
1027                 struct cuse_server_dev *pcsd;
1028                 struct cuse_data_chunk *pchk;
1029                 int n;
1030
1031         case CUSE_IOCTL_GET_COMMAND:
1032                 pcmd = (void *)data;
1033
1034                 cuse_lock();
1035
1036                 while ((pccmd = TAILQ_FIRST(&pcs->head)) == NULL) {
1037                         error = cv_wait_sig(&pcs->cv, &cuse_mtx);
1038
1039                         if (pcs->is_closing)
1040                                 error = ENXIO;
1041
1042                         if (error) {
1043                                 cuse_unlock();
1044                                 return (error);
1045                         }
1046                 }
1047
1048                 TAILQ_REMOVE(&pcs->head, pccmd, entry);
1049                 pccmd->entry.tqe_prev = NULL;
1050
1051                 pccmd->entered = curthread;
1052
1053                 *pcmd = pccmd->sub;
1054
1055                 cuse_unlock();
1056
1057                 break;
1058
1059         case CUSE_IOCTL_SYNC_COMMAND:
1060
1061                 cuse_lock();
1062                 while ((pccmd = cuse_server_find_command(pcs, curthread)) != NULL) {
1063
1064                         /* send sync command */
1065                         pccmd->entered = NULL;
1066                         pccmd->error = *(int *)data;
1067                         pccmd->command = CUSE_CMD_SYNC;
1068
1069                         /* signal peer, if any */
1070                         cv_signal(&pccmd->cv);
1071                 }
1072                 cuse_unlock();
1073
1074                 break;
1075
1076         case CUSE_IOCTL_ALLOC_UNIT:
1077
1078                 cuse_lock();
1079                 n = cuse_alloc_unit_by_id_locked(pcs,
1080                     CUSE_ID_DEFAULT(0));
1081                 cuse_unlock();
1082
1083                 if (n < 0)
1084                         error = ENOMEM;
1085                 else
1086                         *(int *)data = n;
1087                 break;
1088
1089         case CUSE_IOCTL_ALLOC_UNIT_BY_ID:
1090
1091                 n = *(int *)data;
1092
1093                 n = (n & CUSE_ID_MASK);
1094
1095                 cuse_lock();
1096                 n = cuse_alloc_unit_by_id_locked(pcs, n);
1097                 cuse_unlock();
1098
1099                 if (n < 0)
1100                         error = ENOMEM;
1101                 else
1102                         *(int *)data = n;
1103                 break;
1104
1105         case CUSE_IOCTL_FREE_UNIT:
1106
1107                 n = *(int *)data;
1108
1109                 n = CUSE_ID_DEFAULT(n);
1110
1111                 cuse_lock();
1112                 error = cuse_free_unit_by_id_locked(pcs, n);
1113                 cuse_unlock();
1114                 break;
1115
1116         case CUSE_IOCTL_FREE_UNIT_BY_ID:
1117
1118                 n = *(int *)data;
1119
1120                 cuse_lock();
1121                 error = cuse_free_unit_by_id_locked(pcs, n);
1122                 cuse_unlock();
1123                 break;
1124
1125         case CUSE_IOCTL_ALLOC_MEMORY:
1126
1127                 pai = (void *)data;
1128
1129                 if (pai->alloc_nr >= CUSE_ALLOC_UNIT_MAX) {
1130                         error = ENOMEM;
1131                         break;
1132                 }
1133                 if (pai->page_count >= CUSE_ALLOC_PAGES_MAX) {
1134                         error = ENOMEM;
1135                         break;
1136                 }
1137                 error = cuse_server_alloc_memory(pcs,
1138                     pai->alloc_nr, pai->page_count);
1139                 break;
1140
1141         case CUSE_IOCTL_FREE_MEMORY:
1142                 pai = (void *)data;
1143
1144                 if (pai->alloc_nr >= CUSE_ALLOC_UNIT_MAX) {
1145                         error = ENOMEM;
1146                         break;
1147                 }
1148                 error = cuse_server_free_memory(pcs, pai->alloc_nr);
1149                 break;
1150
1151         case CUSE_IOCTL_GET_SIG:
1152
1153                 cuse_lock();
1154                 pccmd = cuse_server_find_command(pcs, curthread);
1155
1156                 if (pccmd != NULL) {
1157                         n = pccmd->got_signal;
1158                         pccmd->got_signal = 0;
1159                 } else {
1160                         n = 0;
1161                 }
1162                 cuse_unlock();
1163
1164                 *(int *)data = n;
1165
1166                 break;
1167
1168         case CUSE_IOCTL_SET_PFH:
1169
1170                 cuse_lock();
1171                 pccmd = cuse_server_find_command(pcs, curthread);
1172
1173                 if (pccmd != NULL) {
1174                         pcc = pccmd->client;
1175                         for (n = 0; n != CUSE_CMD_MAX; n++) {
1176                                 pcc->cmds[n].sub.per_file_handle = *(uintptr_t *)data;
1177                         }
1178                 } else {
1179                         error = ENXIO;
1180                 }
1181                 cuse_unlock();
1182                 break;
1183
1184         case CUSE_IOCTL_CREATE_DEV:
1185
1186                 error = priv_check(curthread, PRIV_DRIVER);
1187                 if (error)
1188                         break;
1189
1190                 pcd = (void *)data;
1191
1192                 /* filter input */
1193
1194                 pcd->devname[sizeof(pcd->devname) - 1] = 0;
1195
1196                 if (pcd->devname[0] == 0) {
1197                         error = EINVAL;
1198                         break;
1199                 }
1200                 cuse_str_filter(pcd->devname);
1201
1202                 pcd->permissions &= 0777;
1203
1204                 /* try to allocate a character device */
1205
1206                 pcsd = malloc(sizeof(*pcsd), M_CUSE, M_WAITOK | M_ZERO);
1207
1208                 if (pcsd == NULL) {
1209                         error = ENOMEM;
1210                         break;
1211                 }
1212                 pcsd->server = pcs;
1213
1214                 pcsd->user_dev = pcd->dev;
1215
1216                 pcsd->kern_dev = make_dev_credf(MAKEDEV_CHECKNAME,
1217                     &cuse_client_devsw, 0, NULL, pcd->user_id, pcd->group_id,
1218                     pcd->permissions, "%s", pcd->devname);
1219
1220                 if (pcsd->kern_dev == NULL) {
1221                         free(pcsd, M_CUSE);
1222                         error = ENOMEM;
1223                         break;
1224                 }
1225                 pcsd->kern_dev->si_drv1 = pcsd;
1226
1227                 cuse_lock();
1228                 TAILQ_INSERT_TAIL(&pcs->hdev, pcsd, entry);
1229                 cuse_unlock();
1230
1231                 break;
1232
1233         case CUSE_IOCTL_DESTROY_DEV:
1234
1235                 error = priv_check(curthread, PRIV_DRIVER);
1236                 if (error)
1237                         break;
1238
1239                 cuse_lock();
1240
1241                 error = EINVAL;
1242
1243                 pcsd = TAILQ_FIRST(&pcs->hdev);
1244                 while (pcsd != NULL) {
1245                         if (pcsd->user_dev == *(struct cuse_dev **)data) {
1246                                 TAILQ_REMOVE(&pcs->hdev, pcsd, entry);
1247                                 cuse_unlock();
1248                                 cuse_server_free_dev(pcsd);
1249                                 cuse_lock();
1250                                 error = 0;
1251                                 pcsd = TAILQ_FIRST(&pcs->hdev);
1252                         } else {
1253                                 pcsd = TAILQ_NEXT(pcsd, entry);
1254                         }
1255                 }
1256
1257                 cuse_unlock();
1258                 break;
1259
1260         case CUSE_IOCTL_WRITE_DATA:
1261         case CUSE_IOCTL_READ_DATA:
1262
1263                 cuse_lock();
1264                 pchk = (struct cuse_data_chunk *)data;
1265
1266                 pccmd = cuse_server_find_command(pcs, curthread);
1267
1268                 if (pccmd == NULL) {
1269                         error = ENXIO;  /* invalid request */
1270                 } else if (pchk->peer_ptr < CUSE_BUF_MIN_PTR) {
1271                         error = EFAULT; /* NULL pointer */
1272                 } else if (pchk->peer_ptr < CUSE_BUF_MAX_PTR) {
1273                         error = cuse_server_ioctl_copy_locked(pccmd,
1274                             pchk, cmd == CUSE_IOCTL_READ_DATA);
1275                 } else {
1276                         error = cuse_server_data_copy_locked(pccmd,
1277                             pchk, cmd == CUSE_IOCTL_READ_DATA);
1278                 }
1279                 cuse_unlock();
1280                 break;
1281
1282         case CUSE_IOCTL_SELWAKEUP:
1283                 cuse_lock();
1284                 /*
1285                  * We don't know which direction caused the event.
1286                  * Wakeup both!
1287                  */
1288                 cuse_server_wakeup_all_client_locked(pcs);
1289                 cuse_unlock();
1290                 break;
1291
1292         default:
1293                 error = ENXIO;
1294                 break;
1295         }
1296         return (error);
1297 }
1298
1299 static int
1300 cuse_server_poll(struct cdev *dev, int events, struct thread *td)
1301 {
1302         return (events & (POLLHUP | POLLPRI | POLLIN |
1303             POLLRDNORM | POLLOUT | POLLWRNORM));
1304 }
1305
1306 static int
1307 cuse_server_mmap_single(struct cdev *dev, vm_ooffset_t *offset,
1308     vm_size_t size, struct vm_object **object, int nprot)
1309 {
1310         uint32_t page_nr = *offset / PAGE_SIZE;
1311         uint32_t alloc_nr = page_nr / CUSE_ALLOC_PAGES_MAX;
1312         struct cuse_memory *mem;
1313         struct cuse_server *pcs;
1314         int error;
1315
1316         error = cuse_server_get(&pcs);
1317         if (error != 0)
1318                 return (error);
1319
1320         cuse_lock();
1321         /* lookup memory structure */
1322         TAILQ_FOREACH(mem, &pcs->hmem, entry) {
1323                 if (mem->alloc_nr == alloc_nr)
1324                         break;
1325         }
1326         if (mem == NULL) {
1327                 cuse_unlock();
1328                 return (ENOMEM);
1329         }
1330         /* verify page offset */
1331         page_nr %= CUSE_ALLOC_PAGES_MAX;
1332         if (page_nr >= mem->page_count) {
1333                 cuse_unlock();
1334                 return (ENXIO);
1335         }
1336         /* verify mmap size */
1337         if ((size % PAGE_SIZE) != 0 || (size < PAGE_SIZE) ||
1338             (size > ((mem->page_count - page_nr) * PAGE_SIZE))) {
1339                 cuse_unlock();
1340                 return (EINVAL);
1341         }
1342         vm_object_reference(mem->object);
1343         *object = mem->object;
1344         cuse_unlock();
1345
1346         /* set new VM object offset to use */
1347         *offset = page_nr * PAGE_SIZE;
1348
1349         /* success */
1350         return (0);
1351 }
1352
1353 /*------------------------------------------------------------------------*
1354  *      CUSE CLIENT PART
1355  *------------------------------------------------------------------------*/
1356 static void
1357 cuse_client_free(void *arg)
1358 {
1359         struct cuse_client *pcc = arg;
1360         struct cuse_client_command *pccmd;
1361         struct cuse_server *pcs;
1362         int n;
1363
1364         cuse_lock();
1365         cuse_client_is_closing(pcc);
1366         TAILQ_REMOVE(&pcc->server->hcli, pcc, entry);
1367         cuse_unlock();
1368
1369         for (n = 0; n != CUSE_CMD_MAX; n++) {
1370
1371                 pccmd = &pcc->cmds[n];
1372
1373                 sx_destroy(&pccmd->sx);
1374                 cv_destroy(&pccmd->cv);
1375         }
1376
1377         pcs = pcc->server;
1378
1379         free(pcc, M_CUSE);
1380
1381         /* drop reference on server */
1382         cuse_server_unref(pcs);
1383 }
1384
1385 static int
1386 cuse_client_open(struct cdev *dev, int fflags, int devtype, struct thread *td)
1387 {
1388         struct cuse_client_command *pccmd;
1389         struct cuse_server_dev *pcsd;
1390         struct cuse_client *pcc;
1391         struct cuse_server *pcs;
1392         struct cuse_dev *pcd;
1393         int error;
1394         int n;
1395
1396         cuse_lock();
1397         pcsd = dev->si_drv1;
1398         if (pcsd != NULL) {
1399                 pcs = pcsd->server;
1400                 pcd = pcsd->user_dev;
1401                 /*
1402                  * Check that the refcount didn't wrap and that the
1403                  * same process is not both client and server. This
1404                  * can easily lead to deadlocks when destroying the
1405                  * CUSE character device nodes:
1406                  */
1407                 pcs->refs++;
1408                 if (pcs->refs < 0 || pcs->pid == curproc->p_pid) {
1409                         /* overflow or wrong PID */
1410                         pcs->refs--;
1411                         pcsd = NULL;
1412                 }
1413         } else {
1414                 pcs = NULL;
1415                 pcd = NULL;
1416         }
1417         cuse_unlock();
1418
1419         if (pcsd == NULL)
1420                 return (EINVAL);
1421
1422         pcc = malloc(sizeof(*pcc), M_CUSE, M_WAITOK | M_ZERO);
1423         if (pcc == NULL) {
1424                 /* drop reference on server */
1425                 cuse_server_unref(pcs);
1426                 return (ENOMEM);
1427         }
1428         if (devfs_set_cdevpriv(pcc, &cuse_client_free)) {
1429                 printf("Cuse: Cannot set cdevpriv.\n");
1430                 /* drop reference on server */
1431                 cuse_server_unref(pcs);
1432                 free(pcc, M_CUSE);
1433                 return (ENOMEM);
1434         }
1435         pcc->fflags = fflags;
1436         pcc->server_dev = pcsd;
1437         pcc->server = pcs;
1438
1439         for (n = 0; n != CUSE_CMD_MAX; n++) {
1440
1441                 pccmd = &pcc->cmds[n];
1442
1443                 pccmd->sub.dev = pcd;
1444                 pccmd->sub.command = n;
1445                 pccmd->client = pcc;
1446
1447                 sx_init(&pccmd->sx, "cuse-client-sx");
1448                 cv_init(&pccmd->cv, "cuse-client-cv");
1449         }
1450
1451         cuse_lock();
1452
1453         /* cuse_client_free() assumes that the client is listed somewhere! */
1454         /* always enqueue */
1455
1456         TAILQ_INSERT_TAIL(&pcs->hcli, pcc, entry);
1457
1458         /* check if server is closing */
1459         if ((pcs->is_closing != 0) || (dev->si_drv1 == NULL)) {
1460                 error = EINVAL;
1461         } else {
1462                 error = 0;
1463         }
1464         cuse_unlock();
1465
1466         if (error) {
1467                 devfs_clear_cdevpriv(); /* XXX bugfix */
1468                 return (error);
1469         }
1470         pccmd = &pcc->cmds[CUSE_CMD_OPEN];
1471
1472         cuse_cmd_lock(pccmd);
1473
1474         cuse_lock();
1475         cuse_client_send_command_locked(pccmd, 0, 0, pcc->fflags, 0);
1476
1477         error = cuse_client_receive_command_locked(pccmd, 0, 0);
1478         cuse_unlock();
1479
1480         if (error < 0) {
1481                 error = cuse_convert_error(error);
1482         } else {
1483                 error = 0;
1484         }
1485
1486         cuse_cmd_unlock(pccmd);
1487
1488         if (error)
1489                 devfs_clear_cdevpriv(); /* XXX bugfix */
1490
1491         return (error);
1492 }
1493
1494 static int
1495 cuse_client_close(struct cdev *dev, int fflag, int devtype, struct thread *td)
1496 {
1497         struct cuse_client_command *pccmd;
1498         struct cuse_client *pcc;
1499         int error;
1500
1501         error = cuse_client_get(&pcc);
1502         if (error != 0)
1503                 return (0);
1504
1505         pccmd = &pcc->cmds[CUSE_CMD_CLOSE];
1506
1507         cuse_cmd_lock(pccmd);
1508
1509         cuse_lock();
1510         cuse_client_send_command_locked(pccmd, 0, 0, pcc->fflags, 0);
1511
1512         error = cuse_client_receive_command_locked(pccmd, 0, 0);
1513         cuse_unlock();
1514
1515         cuse_cmd_unlock(pccmd);
1516
1517         cuse_lock();
1518         cuse_client_is_closing(pcc);
1519         cuse_unlock();
1520
1521         return (0);
1522 }
1523
1524 static void
1525 cuse_client_kqfilter_poll(struct cdev *dev, struct cuse_client *pcc)
1526 {
1527         int temp;
1528
1529         cuse_lock();
1530         temp = (pcc->cflags & (CUSE_CLI_KNOTE_HAS_READ |
1531             CUSE_CLI_KNOTE_HAS_WRITE));
1532         pcc->cflags &= ~(CUSE_CLI_KNOTE_NEED_READ |
1533             CUSE_CLI_KNOTE_NEED_WRITE);
1534         cuse_unlock();
1535
1536         if (temp != 0) {
1537                 /* get the latest polling state from the server */
1538                 temp = cuse_client_poll(dev, POLLIN | POLLOUT, NULL);
1539
1540                 if (temp & (POLLIN | POLLOUT)) {
1541                         cuse_lock();
1542                         if (temp & POLLIN)
1543                                 pcc->cflags |= CUSE_CLI_KNOTE_NEED_READ;
1544                         if (temp & POLLOUT)
1545                                 pcc->cflags |= CUSE_CLI_KNOTE_NEED_WRITE;
1546
1547                         /* make sure the "knote" gets woken up */
1548                         cuse_server_wakeup_locked(pcc->server);
1549                         cuse_unlock();
1550                 }
1551         }
1552 }
1553
1554 static int
1555 cuse_client_read(struct cdev *dev, struct uio *uio, int ioflag)
1556 {
1557         struct cuse_client_command *pccmd;
1558         struct cuse_client *pcc;
1559         int error;
1560         int len;
1561
1562         error = cuse_client_get(&pcc);
1563         if (error != 0)
1564                 return (error);
1565
1566         pccmd = &pcc->cmds[CUSE_CMD_READ];
1567
1568         if (uio->uio_segflg != UIO_USERSPACE) {
1569                 return (EINVAL);
1570         }
1571         uio->uio_segflg = UIO_NOCOPY;
1572
1573         cuse_cmd_lock(pccmd);
1574
1575         while (uio->uio_resid != 0) {
1576
1577                 if (uio->uio_iov->iov_len > CUSE_LENGTH_MAX) {
1578                         error = ENOMEM;
1579                         break;
1580                 }
1581                 len = uio->uio_iov->iov_len;
1582
1583                 cuse_lock();
1584                 cuse_client_send_command_locked(pccmd,
1585                     (uintptr_t)uio->uio_iov->iov_base,
1586                     (unsigned long)(unsigned int)len, pcc->fflags, ioflag);
1587
1588                 error = cuse_client_receive_command_locked(pccmd, 0, 0);
1589                 cuse_unlock();
1590
1591                 if (error < 0) {
1592                         error = cuse_convert_error(error);
1593                         break;
1594                 } else if (error == len) {
1595                         error = uiomove(NULL, error, uio);
1596                         if (error)
1597                                 break;
1598                 } else {
1599                         error = uiomove(NULL, error, uio);
1600                         break;
1601                 }
1602         }
1603         cuse_cmd_unlock(pccmd);
1604
1605         uio->uio_segflg = UIO_USERSPACE;/* restore segment flag */
1606
1607         if (error == EWOULDBLOCK)
1608                 cuse_client_kqfilter_poll(dev, pcc);
1609
1610         return (error);
1611 }
1612
1613 static int
1614 cuse_client_write(struct cdev *dev, struct uio *uio, int ioflag)
1615 {
1616         struct cuse_client_command *pccmd;
1617         struct cuse_client *pcc;
1618         int error;
1619         int len;
1620
1621         error = cuse_client_get(&pcc);
1622         if (error != 0)
1623                 return (error);
1624
1625         pccmd = &pcc->cmds[CUSE_CMD_WRITE];
1626
1627         if (uio->uio_segflg != UIO_USERSPACE) {
1628                 return (EINVAL);
1629         }
1630         uio->uio_segflg = UIO_NOCOPY;
1631
1632         cuse_cmd_lock(pccmd);
1633
1634         while (uio->uio_resid != 0) {
1635
1636                 if (uio->uio_iov->iov_len > CUSE_LENGTH_MAX) {
1637                         error = ENOMEM;
1638                         break;
1639                 }
1640                 len = uio->uio_iov->iov_len;
1641
1642                 cuse_lock();
1643                 cuse_client_send_command_locked(pccmd,
1644                     (uintptr_t)uio->uio_iov->iov_base,
1645                     (unsigned long)(unsigned int)len, pcc->fflags, ioflag);
1646
1647                 error = cuse_client_receive_command_locked(pccmd, 0, 0);
1648                 cuse_unlock();
1649
1650                 if (error < 0) {
1651                         error = cuse_convert_error(error);
1652                         break;
1653                 } else if (error == len) {
1654                         error = uiomove(NULL, error, uio);
1655                         if (error)
1656                                 break;
1657                 } else {
1658                         error = uiomove(NULL, error, uio);
1659                         break;
1660                 }
1661         }
1662         cuse_cmd_unlock(pccmd);
1663
1664         uio->uio_segflg = UIO_USERSPACE;/* restore segment flag */
1665
1666         if (error == EWOULDBLOCK)
1667                 cuse_client_kqfilter_poll(dev, pcc);
1668
1669         return (error);
1670 }
1671
1672 int
1673 cuse_client_ioctl(struct cdev *dev, unsigned long cmd,
1674     caddr_t data, int fflag, struct thread *td)
1675 {
1676         struct cuse_client_command *pccmd;
1677         struct cuse_client *pcc;
1678         int error;
1679         int len;
1680
1681         error = cuse_client_get(&pcc);
1682         if (error != 0)
1683                 return (error);
1684
1685         len = IOCPARM_LEN(cmd);
1686         if (len > CUSE_BUFFER_MAX)
1687                 return (ENOMEM);
1688
1689         pccmd = &pcc->cmds[CUSE_CMD_IOCTL];
1690
1691         cuse_cmd_lock(pccmd);
1692
1693         if (cmd & (IOC_IN | IOC_VOID))
1694                 memcpy(pcc->ioctl_buffer, data, len);
1695
1696         /*
1697          * When the ioctl-length is zero drivers can pass information
1698          * through the data pointer of the ioctl. Make sure this information
1699          * is forwarded to the driver.
1700          */
1701
1702         cuse_lock();
1703         cuse_client_send_command_locked(pccmd,
1704             (len == 0) ? *(long *)data : CUSE_BUF_MIN_PTR,
1705             (unsigned long)cmd, pcc->fflags,
1706             (fflag & O_NONBLOCK) ? IO_NDELAY : 0);
1707
1708         error = cuse_client_receive_command_locked(pccmd, data, len);
1709         cuse_unlock();
1710
1711         if (error < 0) {
1712                 error = cuse_convert_error(error);
1713         } else {
1714                 error = 0;
1715         }
1716
1717         if (cmd & IOC_OUT)
1718                 memcpy(data, pcc->ioctl_buffer, len);
1719
1720         cuse_cmd_unlock(pccmd);
1721
1722         if (error == EWOULDBLOCK)
1723                 cuse_client_kqfilter_poll(dev, pcc);
1724
1725         return (error);
1726 }
1727
1728 static int
1729 cuse_client_poll(struct cdev *dev, int events, struct thread *td)
1730 {
1731         struct cuse_client_command *pccmd;
1732         struct cuse_client *pcc;
1733         unsigned long temp;
1734         int error;
1735         int revents;
1736
1737         error = cuse_client_get(&pcc);
1738         if (error != 0)
1739                 goto pollnval;
1740
1741         temp = 0;
1742
1743         if (events & (POLLPRI | POLLIN | POLLRDNORM))
1744                 temp |= CUSE_POLL_READ;
1745
1746         if (events & (POLLOUT | POLLWRNORM))
1747                 temp |= CUSE_POLL_WRITE;
1748
1749         if (events & POLLHUP)
1750                 temp |= CUSE_POLL_ERROR;
1751
1752         pccmd = &pcc->cmds[CUSE_CMD_POLL];
1753
1754         cuse_cmd_lock(pccmd);
1755
1756         /* Need to selrecord() first to not loose any events. */
1757         if (temp != 0 && td != NULL)
1758                 selrecord(td, &pcc->server->selinfo);
1759
1760         cuse_lock();
1761         cuse_client_send_command_locked(pccmd,
1762             0, temp, pcc->fflags, IO_NDELAY);
1763
1764         error = cuse_client_receive_command_locked(pccmd, 0, 0);
1765         cuse_unlock();
1766
1767         cuse_cmd_unlock(pccmd);
1768
1769         if (error < 0) {
1770                 goto pollnval;
1771         } else {
1772                 revents = 0;
1773                 if (error & CUSE_POLL_READ)
1774                         revents |= (events & (POLLPRI | POLLIN | POLLRDNORM));
1775                 if (error & CUSE_POLL_WRITE)
1776                         revents |= (events & (POLLOUT | POLLWRNORM));
1777                 if (error & CUSE_POLL_ERROR)
1778                         revents |= (events & POLLHUP);
1779         }
1780         return (revents);
1781
1782 pollnval:
1783         /* XXX many clients don't understand POLLNVAL */
1784         return (events & (POLLHUP | POLLPRI | POLLIN |
1785             POLLRDNORM | POLLOUT | POLLWRNORM));
1786 }
1787
1788 static int
1789 cuse_client_mmap_single(struct cdev *dev, vm_ooffset_t *offset,
1790     vm_size_t size, struct vm_object **object, int nprot)
1791 {
1792         uint32_t page_nr = *offset / PAGE_SIZE;
1793         uint32_t alloc_nr = page_nr / CUSE_ALLOC_PAGES_MAX;
1794         struct cuse_memory *mem;
1795         struct cuse_client *pcc;
1796         int error;
1797
1798         error = cuse_client_get(&pcc);
1799         if (error != 0)
1800                 return (error);
1801
1802         cuse_lock();
1803         /* lookup memory structure */
1804         TAILQ_FOREACH(mem, &pcc->server->hmem, entry) {
1805                 if (mem->alloc_nr == alloc_nr)
1806                         break;
1807         }
1808         if (mem == NULL) {
1809                 cuse_unlock();
1810                 return (ENOMEM);
1811         }
1812         /* verify page offset */
1813         page_nr %= CUSE_ALLOC_PAGES_MAX;
1814         if (page_nr >= mem->page_count) {
1815                 cuse_unlock();
1816                 return (ENXIO);
1817         }
1818         /* verify mmap size */
1819         if ((size % PAGE_SIZE) != 0 || (size < PAGE_SIZE) ||
1820             (size > ((mem->page_count - page_nr) * PAGE_SIZE))) {
1821                 cuse_unlock();
1822                 return (EINVAL);
1823         }
1824         vm_object_reference(mem->object);
1825         *object = mem->object;
1826         cuse_unlock();
1827
1828         /* set new VM object offset to use */
1829         *offset = page_nr * PAGE_SIZE;
1830
1831         /* success */
1832         return (0);
1833 }
1834
1835 static void
1836 cuse_client_kqfilter_read_detach(struct knote *kn)
1837 {
1838         struct cuse_client *pcc;
1839
1840         cuse_lock();
1841         pcc = kn->kn_hook;
1842         knlist_remove(&pcc->server->selinfo.si_note, kn, 1);
1843         cuse_unlock();
1844 }
1845
1846 static void
1847 cuse_client_kqfilter_write_detach(struct knote *kn)
1848 {
1849         struct cuse_client *pcc;
1850
1851         cuse_lock();
1852         pcc = kn->kn_hook;
1853         knlist_remove(&pcc->server->selinfo.si_note, kn, 1);
1854         cuse_unlock();
1855 }
1856
1857 static int
1858 cuse_client_kqfilter_read_event(struct knote *kn, long hint)
1859 {
1860         struct cuse_client *pcc;
1861
1862         mtx_assert(&cuse_mtx, MA_OWNED);
1863
1864         pcc = kn->kn_hook;
1865         return ((pcc->cflags & CUSE_CLI_KNOTE_NEED_READ) ? 1 : 0);
1866 }
1867
1868 static int
1869 cuse_client_kqfilter_write_event(struct knote *kn, long hint)
1870 {
1871         struct cuse_client *pcc;
1872
1873         mtx_assert(&cuse_mtx, MA_OWNED);
1874
1875         pcc = kn->kn_hook;
1876         return ((pcc->cflags & CUSE_CLI_KNOTE_NEED_WRITE) ? 1 : 0);
1877 }
1878
1879 static int
1880 cuse_client_kqfilter(struct cdev *dev, struct knote *kn)
1881 {
1882         struct cuse_client *pcc;
1883         struct cuse_server *pcs;
1884         int error;
1885
1886         error = cuse_client_get(&pcc);
1887         if (error != 0)
1888                 return (error);
1889
1890         cuse_lock();
1891         pcs = pcc->server;
1892         switch (kn->kn_filter) {
1893         case EVFILT_READ:
1894                 pcc->cflags |= CUSE_CLI_KNOTE_HAS_READ;
1895                 kn->kn_hook = pcc;
1896                 kn->kn_fop = &cuse_client_kqfilter_read_ops;
1897                 knlist_add(&pcs->selinfo.si_note, kn, 1);
1898                 break;
1899         case EVFILT_WRITE:
1900                 pcc->cflags |= CUSE_CLI_KNOTE_HAS_WRITE;
1901                 kn->kn_hook = pcc;
1902                 kn->kn_fop = &cuse_client_kqfilter_write_ops;
1903                 knlist_add(&pcs->selinfo.si_note, kn, 1);
1904                 break;
1905         default:
1906                 error = EINVAL;
1907                 break;
1908         }
1909         cuse_unlock();
1910
1911         if (error == 0)
1912                 cuse_client_kqfilter_poll(dev, pcc);
1913         return (error);
1914 }