2 * SPDX-License-Identifier: BSD-3-Clause
4 * Copyright (c) 2007-2009 Google Inc.
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions are
11 * * Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * * Redistributions in binary form must reproduce the above
14 * copyright notice, this list of conditions and the following disclaimer
15 * in the documentation and/or other materials provided with the
17 * * Neither the name of Google Inc. nor the names of its
18 * contributors may be used to endorse or promote products derived from
19 * this software without specific prior written permission.
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
33 * Copyright (C) 2005 Csaba Henk.
34 * All rights reserved.
36 * Redistribution and use in source and binary forms, with or without
37 * modification, are permitted provided that the following conditions
39 * 1. Redistributions of source code must retain the above copyright
40 * notice, this list of conditions and the following disclaimer.
41 * 2. Redistributions in binary form must reproduce the above copyright
42 * notice, this list of conditions and the following disclaimer in the
43 * documentation and/or other materials provided with the distribution.
45 * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND
46 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
47 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
48 * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
49 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
50 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
51 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
52 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
53 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
54 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
58 #include <sys/cdefs.h>
59 __FBSDID("$FreeBSD$");
61 #include <sys/types.h>
62 #include <sys/module.h>
63 #include <sys/systm.h>
64 #include <sys/errno.h>
65 #include <sys/param.h>
66 #include <sys/kernel.h>
69 #include <sys/malloc.h>
70 #include <sys/queue.h>
73 #include <sys/mutex.h>
75 #include <sys/mount.h>
78 #include <sys/fcntl.h>
79 #include <sys/sysctl.h>
81 #include <sys/selinfo.h>
84 #include "fuse_internal.h"
87 SDT_PROVIDER_DECLARE(fusefs);
90 * arg0: verbosity. Higher numbers give more verbose messages
91 * arg1: Textual message
93 SDT_PROBE_DEFINE2(fusefs, , device, trace, "int", "char*");
95 static struct cdev *fuse_dev;
97 static d_kqfilter_t fuse_device_filter;
98 static d_open_t fuse_device_open;
99 static d_poll_t fuse_device_poll;
100 static d_read_t fuse_device_read;
101 static d_write_t fuse_device_write;
103 static struct cdevsw fuse_device_cdevsw = {
104 .d_kqfilter = fuse_device_filter,
105 .d_open = fuse_device_open,
107 .d_poll = fuse_device_poll,
108 .d_read = fuse_device_read,
109 .d_write = fuse_device_write,
110 .d_version = D_VERSION,
113 static int fuse_device_filt_read(struct knote *kn, long hint);
114 static void fuse_device_filt_detach(struct knote *kn);
116 struct filterops fuse_device_rfiltops = {
118 .f_detach = fuse_device_filt_detach,
119 .f_event = fuse_device_filt_read,
122 /****************************
124 * >>> Fuse device op defs
126 ****************************/
129 fdata_dtor(void *arg)
131 struct fuse_data *fdata;
132 struct fuse_ticket *tick;
138 fdata_set_dead(fdata);
141 fuse_lck_mtx_lock(fdata->aw_mtx);
142 /* wakup poll()ers */
143 selwakeuppri(&fdata->ks_rsel, PZERO + 1);
144 /* Don't let syscall handlers wait in vain */
145 while ((tick = fuse_aw_pop(fdata))) {
146 fuse_lck_mtx_lock(tick->tk_aw_mtx);
147 fticket_set_answered(tick);
148 tick->tk_aw_errno = ENOTCONN;
150 fuse_lck_mtx_unlock(tick->tk_aw_mtx);
151 FUSE_ASSERT_AW_DONE(tick);
152 fuse_ticket_drop(tick);
154 fuse_lck_mtx_unlock(fdata->aw_mtx);
157 fdata_trydestroy(fdata);
161 fuse_device_filter(struct cdev *dev, struct knote *kn)
163 struct fuse_data *data;
166 error = devfs_get_cdevpriv((void **)&data);
168 /* EVFILT_WRITE is not supported; the device is always ready to write */
169 if (error == 0 && kn->kn_filter == EVFILT_READ) {
170 kn->kn_fop = &fuse_device_rfiltops;
172 knlist_add(&data->ks_rsel.si_note, kn, 0);
174 } else if (error == 0) {
183 fuse_device_filt_detach(struct knote *kn)
185 struct fuse_data *data;
187 data = (struct fuse_data*)kn->kn_hook;
189 knlist_remove(&data->ks_rsel.si_note, kn, 0);
194 fuse_device_filt_read(struct knote *kn, long hint)
196 struct fuse_data *data;
199 data = (struct fuse_data*)kn->kn_hook;
202 mtx_assert(&data->ms_mtx, MA_OWNED);
203 if (fdata_get_dead(data)) {
204 kn->kn_flags |= EV_EOF;
205 kn->kn_fflags = ENODEV;
208 } else if (STAILQ_FIRST(&data->ms_head)) {
209 MPASS(data->ms_count >= 1);
210 kn->kn_data = data->ms_count;
220 * Resources are set up on a per-open basis
223 fuse_device_open(struct cdev *dev, int oflags, int devtype, struct thread *td)
225 struct fuse_data *fdata;
228 SDT_PROBE2(fusefs, , device, trace, 1, "device open");
230 fdata = fdata_alloc(dev, td->td_ucred);
231 error = devfs_set_cdevpriv(fdata, fdata_dtor);
233 fdata_trydestroy(fdata);
235 SDT_PROBE2(fusefs, , device, trace, 1, "device open success");
240 fuse_device_poll(struct cdev *dev, int events, struct thread *td)
242 struct fuse_data *data;
243 int error, revents = 0;
245 error = devfs_get_cdevpriv((void **)&data);
248 (POLLHUP|POLLIN|POLLRDNORM|POLLOUT|POLLWRNORM));
250 if (events & (POLLIN | POLLRDNORM)) {
251 fuse_lck_mtx_lock(data->ms_mtx);
252 if (fdata_get_dead(data) || STAILQ_FIRST(&data->ms_head))
253 revents |= events & (POLLIN | POLLRDNORM);
255 selrecord(td, &data->ks_rsel);
256 fuse_lck_mtx_unlock(data->ms_mtx);
258 if (events & (POLLOUT | POLLWRNORM)) {
259 revents |= events & (POLLOUT | POLLWRNORM);
265 * fuse_device_read hangs on the queue of VFS messages.
266 * When it's notified that there is a new one, it picks that and
267 * passes up to the daemon
270 fuse_device_read(struct cdev *dev, struct uio *uio, int ioflag)
273 struct fuse_data *data;
274 struct fuse_ticket *tick;
275 void *buf[] = {NULL, NULL, NULL};
279 SDT_PROBE2(fusefs, , device, trace, 1, "fuse device read");
281 err = devfs_get_cdevpriv((void **)&data);
285 fuse_lck_mtx_lock(data->ms_mtx);
287 if (fdata_get_dead(data)) {
288 SDT_PROBE2(fusefs, , device, trace, 2,
289 "we know early on that reader should be kicked so we "
290 "don't wait for news");
291 fuse_lck_mtx_unlock(data->ms_mtx);
294 if (!(tick = fuse_ms_pop(data))) {
295 /* check if we may block */
296 if (ioflag & O_NONBLOCK) {
297 /* get outa here soon */
298 fuse_lck_mtx_unlock(data->ms_mtx);
301 err = msleep(data, &data->ms_mtx, PCATCH, "fu_msg", 0);
303 fuse_lck_mtx_unlock(data->ms_mtx);
304 return (fdata_get_dead(data) ? ENODEV : err);
306 tick = fuse_ms_pop(data);
311 * We can get here if fuse daemon suddenly terminates,
312 * eg, by being hit by a SIGKILL
313 * -- and some other cases, too, tho not totally clear, when
314 * (cv_signal/wakeup_one signals the whole process ?)
316 SDT_PROBE2(fusefs, , device, trace, 1, "no message on thread");
319 fuse_lck_mtx_unlock(data->ms_mtx);
321 if (fdata_get_dead(data)) {
323 * somebody somewhere -- eg., umount routine --
324 * wants this liaison finished off
326 SDT_PROBE2(fusefs, , device, trace, 2,
327 "reader is to be sacked");
329 SDT_PROBE2(fusefs, , device, trace, 2, "weird -- "
330 "\"kick\" is set tho there is message");
331 FUSE_ASSERT_MS_DONE(tick);
332 fuse_ticket_drop(tick);
334 return (ENODEV); /* This should make the daemon get off
337 SDT_PROBE2(fusefs, , device, trace, 1,
338 "fuse device read message successfully");
340 KASSERT(tick->tk_ms_bufdata || tick->tk_ms_bufsize == 0,
341 ("non-null buf pointer with positive size"));
343 switch (tick->tk_ms_type) {
345 buf[0] = tick->tk_ms_fiov.base;
346 buflen[0] = tick->tk_ms_fiov.len;
349 buf[0] = tick->tk_ms_fiov.base;
350 buflen[0] = tick->tk_ms_fiov.len;
351 buf[1] = tick->tk_ms_bufdata;
352 buflen[1] = tick->tk_ms_bufsize;
355 panic("unknown message type for fuse_ticket %p", tick);
358 for (i = 0; buf[i]; i++) {
360 * Why not ban mercilessly stupid daemons who can't keep up
361 * with us? (There is no much use of a partial read here...)
364 * XXX note that in such cases Linux FUSE throws EIO at the
365 * syscall invoker and stands back to the message queue. The
366 * rationale should be made clear (and possibly adopt that
367 * behaviour). Keeping the current scheme at least makes
368 * fallacy as loud as possible...
370 if (uio->uio_resid < buflen[i]) {
371 fdata_set_dead(data);
372 SDT_PROBE2(fusefs, , device, trace, 2,
373 "daemon is stupid, kick it off...");
377 err = uiomove(buf[i], buflen[i], uio);
382 FUSE_ASSERT_MS_DONE(tick);
383 fuse_ticket_drop(tick);
389 fuse_ohead_audit(struct fuse_out_header *ohead, struct uio *uio)
391 if (uio->uio_resid + sizeof(struct fuse_out_header) != ohead->len) {
392 SDT_PROBE2(fusefs, , device, trace, 1,
393 "Format error: body size "
394 "differs from size claimed by header");
397 if (uio->uio_resid && ohead->unique != 0 && ohead->error) {
398 SDT_PROBE2(fusefs, , device, trace, 1,
399 "Format error: non zero error but message had a body");
406 SDT_PROBE_DEFINE1(fusefs, , device, fuse_device_write_notify,
407 "struct fuse_out_header*");
408 SDT_PROBE_DEFINE1(fusefs, , device, fuse_device_write_missing_ticket,
410 SDT_PROBE_DEFINE1(fusefs, , device, fuse_device_write_found,
411 "struct fuse_ticket*");
413 * fuse_device_write first reads the header sent by the daemon.
414 * If that's OK, looks up ticket/callback node by the unique id seen in header.
415 * If the callback node contains a handler function, the uio is passed over
419 fuse_device_write(struct cdev *dev, struct uio *uio, int ioflag)
421 struct fuse_out_header ohead;
423 struct fuse_data *data;
425 struct fuse_ticket *tick, *itick, *x_tick;
428 err = devfs_get_cdevpriv((void **)&data);
433 if (uio->uio_resid < sizeof(struct fuse_out_header)) {
434 SDT_PROBE2(fusefs, , device, trace, 1,
435 "fuse_device_write got less than a header!");
436 fdata_set_dead(data);
439 if ((err = uiomove(&ohead, sizeof(struct fuse_out_header), uio)) != 0)
443 * We check header information (which is redundant) and compare it
444 * with what we see. If we see some inconsistency we discard the
445 * whole answer and proceed on as if it had never existed. In
446 * particular, no pretender will be woken up, regardless the
447 * "unique" value in the header.
449 if ((err = fuse_ohead_audit(&ohead, uio))) {
450 fdata_set_dead(data);
453 /* Pass stuff over to callback if there is one installed */
455 /* Looking for ticket with the unique id of header */
456 fuse_lck_mtx_lock(data->aw_mtx);
457 TAILQ_FOREACH_SAFE(tick, &data->aw_head, tk_aw_link,
459 if (tick->tk_unique == ohead.unique) {
460 SDT_PROBE1(fusefs, , device, fuse_device_write_found,
463 fuse_aw_remove(tick);
467 if (found && tick->irq_unique > 0) {
469 * Discard the FUSE_INTERRUPT ticket that tried to interrupt
472 TAILQ_FOREACH_SAFE(itick, &data->aw_head, tk_aw_link,
474 if (itick->tk_unique == tick->irq_unique) {
475 fuse_aw_remove(itick);
479 tick->irq_unique = 0;
481 fuse_lck_mtx_unlock(data->aw_mtx);
484 if (tick->tk_aw_handler) {
486 * We found a callback with proper handler. In this
487 * case the out header will be 0wnd by the callback,
488 * so the fun of freeing that is left for her.
489 * (Then, by all chance, she'll just get that's done
490 * via ticket_drop(), so no manual mucking
493 SDT_PROBE2(fusefs, , device, trace, 1,
494 "pass ticket to a callback");
495 /* Sanitize the linuxism of negative errnos */
497 memcpy(&tick->tk_aw_ohead, &ohead, sizeof(ohead));
498 err = tick->tk_aw_handler(tick, uio);
500 /* pretender doesn't wanna do anything with answer */
501 SDT_PROBE2(fusefs, , device, trace, 1,
502 "stuff devalidated, so we drop it");
506 * As aw_mtx was not held during the callback execution the
507 * ticket may have been inserted again. However, this is safe
508 * because fuse_ticket_drop() will deal with refcount anyway.
510 fuse_ticket_drop(tick);
511 } else if (ohead.unique == 0){
512 /* unique == 0 means asynchronous notification */
513 SDT_PROBE1(fusefs, , device, fuse_device_write_notify, &ohead);
514 switch (ohead.error) {
515 case FUSE_NOTIFY_INVAL_ENTRY:
516 err = fuse_internal_invalidate_entry(mp, uio);
518 case FUSE_NOTIFY_POLL:
519 case FUSE_NOTIFY_INVAL_INODE:
521 /* Not implemented */
525 /* no callback at all! */
526 SDT_PROBE1(fusefs, , device, fuse_device_write_missing_ticket,
528 if (ohead.error == -EAGAIN) {
530 * This was probably a response to a FUSE_INTERRUPT
531 * operation whose original operation is already
532 * complete. We can't store FUSE_INTERRUPT tickets
533 * indefinitely because their responses are optional.
534 * So we delete them when the original operation
535 * completes. And sadly the fuse_header_out doesn't
536 * identify the opcode, so we have to guess.
548 fuse_device_init(void)
551 fuse_dev = make_dev(&fuse_device_cdevsw, 0, UID_ROOT, GID_OPERATOR,
552 S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP | S_IROTH | S_IWOTH, "fuse");
553 if (fuse_dev == NULL)
559 fuse_device_destroy(void)
562 MPASS(fuse_dev != NULL);
563 destroy_dev(fuse_dev);