2 * SPDX-License-Identifier: BSD-3-Clause
4 * Copyright (c) 2007-2009 Google Inc. and Amit Singh
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions are
11 * * Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * * Redistributions in binary form must reproduce the above
14 * copyright notice, this list of conditions and the following disclaimer
15 * in the documentation and/or other materials provided with the
17 * * Neither the name of Google Inc. nor the names of its
18 * contributors may be used to endorse or promote products derived from
19 * this software without specific prior written permission.
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
33 * Copyright (C) 2005 Csaba Henk.
34 * All rights reserved.
36 * Copyright (c) 2019 The FreeBSD Foundation
38 * Portions of this software were developed by BFF Storage Systems, LLC under
39 * sponsorship from the FreeBSD Foundation.
41 * Redistribution and use in source and binary forms, with or without
42 * modification, are permitted provided that the following conditions
44 * 1. Redistributions of source code must retain the above copyright
45 * notice, this list of conditions and the following disclaimer.
46 * 2. Redistributions in binary form must reproduce the above copyright
47 * notice, this list of conditions and the following disclaimer in the
48 * documentation and/or other materials provided with the distribution.
50 * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND
51 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
52 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
53 * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
54 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
55 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
56 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
57 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
58 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
59 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
63 #include <sys/cdefs.h>
64 __FBSDID("$FreeBSD$");
66 #include <sys/param.h>
67 #include <sys/systm.h>
68 #include <sys/counter.h>
69 #include <sys/module.h>
70 #include <sys/errno.h>
71 #include <sys/kernel.h>
74 #include <sys/malloc.h>
75 #include <sys/queue.h>
77 #include <sys/mutex.h>
81 #include <sys/mount.h>
82 #include <sys/vnode.h>
83 #include <sys/namei.h>
85 #include <sys/unistd.h>
86 #include <sys/filedesc.h>
88 #include <sys/fcntl.h>
89 #include <sys/dirent.h>
92 #include <sys/sysctl.h>
96 #include "fuse_file.h"
97 #include "fuse_internal.h"
100 #include "fuse_node.h"
101 #include "fuse_file.h"
103 SDT_PROVIDER_DECLARE(fusefs);
106 * arg0: verbosity. Higher numbers give more verbose messages
107 * arg1: Textual message
109 SDT_PROBE_DEFINE2(fusefs, , internal, trace, "int", "char*");
111 #ifdef ZERO_PAD_INCOMPLETE_BUFS
112 static int isbzero(void *buf, size_t len);
116 counter_u64_t fuse_lookup_cache_hits;
117 counter_u64_t fuse_lookup_cache_misses;
119 SYSCTL_COUNTER_U64(_vfs_fusefs_stats, OID_AUTO, lookup_cache_hits, CTLFLAG_RD,
120 &fuse_lookup_cache_hits, "number of positive cache hits in lookup");
122 SYSCTL_COUNTER_U64(_vfs_fusefs_stats, OID_AUTO, lookup_cache_misses, CTLFLAG_RD,
123 &fuse_lookup_cache_misses, "number of cache misses in lookup");
126 fuse_internal_get_cached_vnode(struct mount* mp, ino_t ino, int flags,
130 struct thread *td = curthread;
131 uint64_t nodeid = ino;
136 error = vfs_hash_get(mp, fuse_vnode_hash(nodeid), flags, td, vpp,
137 fuse_vnode_cmp, &nodeid);
141 * Check the entry cache timeout. We have to do this within fusefs
142 * instead of by using cache_enter_time/cache_lookup because those
143 * routines are only intended to work with pathnames, not inodes
147 if (bintime_cmp(&(VTOFUD(*vpp)->entry_cache_timeout), &now, >)){
148 counter_u64_add(fuse_lookup_cache_hits, 1);
151 /* Entry cache timeout */
152 counter_u64_add(fuse_lookup_cache_misses, 1);
161 SDT_PROBE_DEFINE0(fusefs, , internal, access_vadmin);
162 /* Synchronously send a FUSE_ACCESS operation */
164 fuse_internal_access(struct vnode *vp,
170 uint32_t mask = F_OK;
174 struct fuse_dispatcher fdi;
175 struct fuse_access_in *fai;
176 struct fuse_data *data;
178 mp = vnode_mount(vp);
179 vtype = vnode_vtype(vp);
181 data = fuse_get_mpdata(mp);
182 dataflags = data->dataflags;
187 if (mode & VMODIFY_PERMS && vfs_isrdonly(mp)) {
188 switch (vp->v_type) {
200 /* Unless explicitly permitted, deny everyone except the fs owner. */
201 if (!(dataflags & FSESS_DAEMON_CAN_SPY)) {
202 if (fuse_match_cred(data->daemoncred, cred))
206 if (dataflags & FSESS_DEFAULT_PERMISSIONS) {
209 fuse_internal_getattr(vp, &va, cred, td);
210 return vaccess(vp->v_type, va.va_mode, va.va_uid,
211 va.va_gid, mode, cred);
216 * The FUSE protocol doesn't have an equivalent of VADMIN, so
217 * it's a bug if we ever reach this point with that bit set.
219 SDT_PROBE0(fusefs, , internal, access_vadmin);
222 if (fsess_not_impl(mp, FUSE_ACCESS))
225 if ((mode & (VWRITE | VAPPEND)) != 0)
227 if ((mode & VREAD) != 0)
229 if ((mode & VEXEC) != 0)
232 fdisp_init(&fdi, sizeof(*fai));
233 fdisp_make_vp(&fdi, FUSE_ACCESS, vp, td, cred);
238 err = fdisp_wait_answ(&fdi);
242 fsess_set_notimpl(mp, FUSE_ACCESS);
249 * Cache FUSE attributes from attr, in attribute cache associated with vnode
250 * 'vp'. Optionally, if argument 'vap' is not NULL, store a copy of the
251 * converted attributes there as well.
253 * If the nominal attribute cache TTL is zero, do not cache on the 'vp' (but do
254 * return the result to the caller).
257 fuse_internal_cache_attrs(struct vnode *vp, struct fuse_attr *attr,
258 uint64_t attr_valid, uint32_t attr_valid_nsec, struct vattr *vap,
262 struct fuse_vnode_data *fvdat;
263 struct fuse_data *data;
264 struct vattr *vp_cache_at;
266 mp = vnode_mount(vp);
268 data = fuse_get_mpdata(mp);
270 ASSERT_VOP_ELOCKED(vp, "fuse_internal_cache_attrs");
272 fuse_validity_2_bintime(attr_valid, attr_valid_nsec,
273 &fvdat->attr_cache_timeout);
275 if (vnode_isreg(vp) &&
276 fvdat->cached_attrs.va_size != VNOVAL &&
277 attr->size != fvdat->cached_attrs.va_size)
279 if ( data->cache_mode == FUSE_CACHE_WB &&
280 fvdat->flag & FN_SIZECHANGE)
285 * The server changed the file's size even though we're
286 * using writeback cacheing and and we have outstanding
287 * dirty writes! That's a server bug.
289 if (fuse_libabi_geq(data, 7, 23)) {
290 msg = "writeback cache incoherent!."
291 "To prevent data corruption, disable "
292 "the writeback cache according to your "
293 "FUSE server's documentation.";
295 msg = "writeback cache incoherent!."
296 "To prevent data corruption, disable "
297 "the writeback cache by setting "
298 "vfs.fusefs.data_cache_mode to 0 or 1.";
300 fuse_warn(data, FSESS_WARN_WB_CACHE_INCOHERENT, msg);
302 if (fuse_vnode_attr_cache_valid(vp) &&
303 data->cache_mode != FUSE_CACHE_UC)
306 * The server changed the file's size even though we
307 * have it cached and our cache has not yet expired.
310 fuse_warn(data, FSESS_WARN_CACHE_INCOHERENT,
313 "data corruption, disable the data cache "
314 "by mounting with -o direct_io, or as "
315 "directed otherwise by your FUSE server's "
320 /* Fix our buffers if the filesize changed without us knowing */
321 if (vnode_isreg(vp) && attr->size != fvdat->cached_attrs.va_size) {
322 (void)fuse_vnode_setsize(vp, attr->size, from_server);
323 fvdat->cached_attrs.va_size = attr->size;
326 if (attr_valid > 0 || attr_valid_nsec > 0)
327 vp_cache_at = &(fvdat->cached_attrs);
328 else if (vap != NULL)
333 vattr_null(vp_cache_at);
334 vp_cache_at->va_fsid = mp->mnt_stat.f_fsid.val[0];
335 vp_cache_at->va_fileid = attr->ino;
336 vp_cache_at->va_mode = attr->mode & ~S_IFMT;
337 vp_cache_at->va_nlink = attr->nlink;
338 vp_cache_at->va_uid = attr->uid;
339 vp_cache_at->va_gid = attr->gid;
340 vp_cache_at->va_rdev = attr->rdev;
341 vp_cache_at->va_size = attr->size;
342 /* XXX on i386, seconds are truncated to 32 bits */
343 vp_cache_at->va_atime.tv_sec = attr->atime;
344 vp_cache_at->va_atime.tv_nsec = attr->atimensec;
345 vp_cache_at->va_mtime.tv_sec = attr->mtime;
346 vp_cache_at->va_mtime.tv_nsec = attr->mtimensec;
347 vp_cache_at->va_ctime.tv_sec = attr->ctime;
348 vp_cache_at->va_ctime.tv_nsec = attr->ctimensec;
349 if (fuse_libabi_geq(data, 7, 9) && attr->blksize > 0)
350 vp_cache_at->va_blocksize = attr->blksize;
352 vp_cache_at->va_blocksize = PAGE_SIZE;
353 vp_cache_at->va_type = IFTOVT(attr->mode);
354 vp_cache_at->va_bytes = attr->blocks * S_BLKSIZE;
355 vp_cache_at->va_flags = 0;
357 if (vap != vp_cache_at && vap != NULL)
358 memcpy(vap, vp_cache_at, sizeof(*vap));
364 fuse_internal_fsync_callback(struct fuse_ticket *tick, struct uio *uio)
366 if (tick->tk_aw_ohead.error == ENOSYS) {
367 fsess_set_notimpl(tick->tk_data->mp, fticket_opcode(tick));
373 fuse_internal_fsync(struct vnode *vp,
378 struct fuse_fsync_in *ffsi = NULL;
379 struct fuse_dispatcher fdi;
380 struct fuse_filehandle *fufh;
381 struct fuse_vnode_data *fvdat = VTOFUD(vp);
382 struct mount *mp = vnode_mount(vp);
386 if (fsess_not_impl(vnode_mount(vp),
387 (vnode_vtype(vp) == VDIR ? FUSE_FSYNCDIR : FUSE_FSYNC))) {
393 if (fsess_not_impl(mp, op))
396 fdisp_init(&fdi, sizeof(*ffsi));
398 * fsync every open file handle for this file, because we can't be sure
399 * which file handle the caller is really referring to.
401 LIST_FOREACH(fufh, &fvdat->handles, next) {
402 fdi.iosize = sizeof(*ffsi);
404 fdisp_make_vp(&fdi, op, vp, td, NULL);
406 fdisp_refresh_vp(&fdi, op, vp, td, NULL);
408 ffsi->fh = fufh->fh_id;
409 ffsi->fsync_flags = 0;
412 ffsi->fsync_flags = FUSE_FSYNC_FDATASYNC;
414 if (waitfor == MNT_WAIT) {
415 err = fdisp_wait_answ(&fdi);
417 fuse_insert_callback(fdi.tick,
418 fuse_internal_fsync_callback);
419 fuse_insert_message(fdi.tick, false);
422 /* ENOSYS means "success, and don't call again" */
423 fsess_set_notimpl(mp, op);
433 /* Asynchronous invalidation */
434 SDT_PROBE_DEFINE3(fusefs, , internal, invalidate_entry,
435 "struct vnode*", "struct fuse_notify_inval_entry_out*", "char*");
437 fuse_internal_invalidate_entry(struct mount *mp, struct uio *uio)
439 struct fuse_notify_inval_entry_out fnieo;
440 struct componentname cn;
441 struct vnode *dvp, *vp;
445 if ((err = uiomove(&fnieo, sizeof(fnieo), uio)) != 0)
448 if (fnieo.namelen >= sizeof(name))
451 if ((err = uiomove(name, fnieo.namelen, uio)) != 0)
453 name[fnieo.namelen] = '\0';
454 /* fusefs does not cache "." or ".." entries */
455 if (strncmp(name, ".", sizeof(".")) == 0 ||
456 strncmp(name, "..", sizeof("..")) == 0)
459 if (fnieo.parent == FUSE_ROOT_ID)
460 err = VFS_ROOT(mp, LK_SHARED, &dvp);
462 err = fuse_internal_get_cached_vnode( mp, fnieo.parent,
464 SDT_PROBE3(fusefs, , internal, invalidate_entry, dvp, &fnieo, name);
466 * If dvp is not in the cache, then it must've been reclaimed. And
467 * since fuse_vnop_reclaim does a cache_purge, name's entry must've
468 * been invalidated already. So we can safely return if dvp == NULL
470 if (err != 0 || dvp == NULL)
473 * XXX we can't check dvp's generation because the FUSE invalidate
474 * entry message doesn't include it. Worse case is that we invalidate
475 * an entry that didn't need to be invalidated.
478 cn.cn_nameiop = LOOKUP;
479 cn.cn_flags = 0; /* !MAKEENTRY means free cached entry */
480 cn.cn_thread = curthread;
481 cn.cn_cred = curthread->td_ucred;
482 cn.cn_lkflags = LK_SHARED;
484 cn.cn_nameptr = name;
485 cn.cn_namelen = fnieo.namelen;
486 err = cache_lookup(dvp, &vp, &cn, NULL, NULL);
488 fuse_vnode_clear_attr_cache(dvp);
493 SDT_PROBE_DEFINE2(fusefs, , internal, invalidate_inode,
494 "struct vnode*", "struct fuse_notify_inval_inode_out *");
496 fuse_internal_invalidate_inode(struct mount *mp, struct uio *uio)
498 struct fuse_notify_inval_inode_out fniio;
502 if ((err = uiomove(&fniio, sizeof(fniio), uio)) != 0)
505 if (fniio.ino == FUSE_ROOT_ID)
506 err = VFS_ROOT(mp, LK_EXCLUSIVE, &vp);
508 err = fuse_internal_get_cached_vnode(mp, fniio.ino, LK_SHARED,
510 SDT_PROBE2(fusefs, , internal, invalidate_inode, vp, &fniio);
511 if (err != 0 || vp == NULL)
514 * XXX we can't check vp's generation because the FUSE invalidate
515 * entry message doesn't include it. Worse case is that we invalidate
516 * an inode that didn't need to be invalidated.
520 * Flush and invalidate buffers if off >= 0. Technically we only need
521 * to flush and invalidate the range of offsets [off, off + len), but
522 * for simplicity's sake we do everything.
525 fuse_io_invalbuf(vp, curthread);
526 fuse_vnode_clear_attr_cache(vp);
533 fuse_internal_mknod(struct vnode *dvp, struct vnode **vpp,
534 struct componentname *cnp, struct vattr *vap)
536 struct fuse_data *data;
537 struct fuse_mknod_in fmni;
540 data = fuse_get_mpdata(dvp->v_mount);
542 fmni.mode = MAKEIMODE(vap->va_type, vap->va_mode);
543 fmni.rdev = vap->va_rdev;
544 if (fuse_libabi_geq(data, 7, 12)) {
545 insize = sizeof(fmni);
546 fmni.umask = curthread->td_proc->p_pd->pd_cmask;
548 insize = FUSE_COMPAT_MKNOD_IN_SIZE;
550 return (fuse_internal_newentry(dvp, vpp, cnp, FUSE_MKNOD, &fmni,
551 insize, vap->va_type));
557 fuse_internal_readdir(struct vnode *vp,
560 struct fuse_filehandle *fufh,
561 struct fuse_iov *cookediov,
566 struct fuse_dispatcher fdi;
567 struct fuse_read_in *fri = NULL;
570 if (uio_resid(uio) == 0)
575 * Note that we DO NOT have a UIO_SYSSPACE here (so no need for p2p
580 * fnd_start is set non-zero once the offset in the directory gets
581 * to the startoff. This is done because directories must be read
582 * from the beginning (offset == 0) when fuse_vnop_readdir() needs
583 * to do an open of the directory.
584 * If it is not set non-zero here, it will be set non-zero in
585 * fuse_internal_readdir_processdata() when uio_offset == startoff.
588 if (uio->uio_offset == startoff)
590 while (uio_resid(uio) > 0) {
591 fdi.iosize = sizeof(*fri);
593 fdisp_make_vp(&fdi, FUSE_READDIR, vp, NULL, NULL);
595 fdisp_refresh_vp(&fdi, FUSE_READDIR, vp, NULL, NULL);
598 fri->fh = fufh->fh_id;
599 fri->offset = uio_offset(uio);
600 fri->size = MIN(uio->uio_resid,
601 fuse_get_mpdata(vp->v_mount)->max_read);
603 if ((err = fdisp_wait_answ(&fdi)))
605 if ((err = fuse_internal_readdir_processdata(uio, startoff,
606 &fnd_start, fri->size, fdi.answ, fdi.iosize, cookediov,
607 ncookies, &cookies)))
612 return ((err == -1) ? 0 : err);
616 * Return -1 to indicate that this readdir is finished, 0 if it copied
617 * all the directory data read in and it may be possible to read more
618 * and greater than 0 for a failure.
621 fuse_internal_readdir_processdata(struct uio *uio,
627 struct fuse_iov *cookediov,
636 struct fuse_dirent *fudge;
640 if (bufsize < FUSE_NAME_OFFSET)
643 if (bufsize < FUSE_NAME_OFFSET) {
647 fudge = (struct fuse_dirent *)buf;
648 freclen = FUSE_DIRENT_SIZE(fudge);
650 if (bufsize < freclen) {
652 * This indicates a partial directory entry at the
653 * end of the directory data.
658 #ifdef ZERO_PAD_INCOMPLETE_BUFS
659 if (isbzero(buf, FUSE_NAME_OFFSET)) {
665 if (!fudge->namelen || fudge->namelen > MAXNAMLEN) {
669 oreclen = GENERIC_DIRSIZ((struct pseudo_dirent *)
672 if (oreclen > uio_resid(uio)) {
673 /* Out of space for the dir so we are done. */
678 * Don't start to copy the directory entries out until
679 * the requested offset in the directory is found.
681 if (*fnd_start != 0) {
682 fiov_adjust(cookediov, oreclen);
683 bzero(cookediov->base, oreclen);
685 de = (struct dirent *)cookediov->base;
686 de->d_fileno = fudge->ino;
687 de->d_off = fudge->off;
688 de->d_reclen = oreclen;
689 de->d_type = fudge->type;
690 de->d_namlen = fudge->namelen;
691 memcpy((char *)cookediov->base + sizeof(struct dirent) -
693 (char *)buf + FUSE_NAME_OFFSET, fudge->namelen);
694 dirent_terminate(de);
696 err = uiomove(cookediov->base, cookediov->len, uio);
699 if (cookies != NULL) {
700 if (*ncookies == 0) {
704 *cookies = fudge->off;
708 } else if (startoff == fudge->off)
710 buf = (char *)buf + freclen;
712 uio_setoffset(uio, fudge->off);
722 fuse_internal_remove(struct vnode *dvp,
724 struct componentname *cnp,
727 struct fuse_dispatcher fdi;
731 fdisp_init(&fdi, cnp->cn_namelen + 1);
732 fdisp_make_vp(&fdi, op, dvp, cnp->cn_thread, cnp->cn_cred);
734 memcpy(fdi.indata, cnp->cn_nameptr, cnp->cn_namelen);
735 ((char *)fdi.indata)[cnp->cn_namelen] = '\0';
737 err = fdisp_wait_answ(&fdi);
744 * Access the cached nlink even if the attr cached has expired. If
745 * it's inaccurate, the worst that will happen is:
746 * 1) We'll recycle the vnode even though the file has another link we
747 * don't know about, costing a bit of cpu time, or
748 * 2) We won't recycle the vnode even though all of its links are gone.
749 * It will linger around until vnlru reclaims it, costing a bit of
752 nlink = VTOFUD(vp)->cached_attrs.va_nlink--;
755 * Purge the parent's attribute cache because the daemon
756 * should've updated its mtime and ctime.
758 fuse_vnode_clear_attr_cache(dvp);
760 /* NB: nlink could be zero if it was never cached */
761 if (nlink <= 1 || vnode_vtype(vp) == VDIR) {
762 fuse_internal_vnode_disappear(vp);
765 fuse_vnode_update(vp, FN_CTIMECHANGE);
774 fuse_internal_rename(struct vnode *fdvp,
775 struct componentname *fcnp,
777 struct componentname *tcnp)
779 struct fuse_dispatcher fdi;
780 struct fuse_rename_in *fri;
783 fdisp_init(&fdi, sizeof(*fri) + fcnp->cn_namelen + tcnp->cn_namelen + 2);
784 fdisp_make_vp(&fdi, FUSE_RENAME, fdvp, tcnp->cn_thread, tcnp->cn_cred);
787 fri->newdir = VTOI(tdvp);
788 memcpy((char *)fdi.indata + sizeof(*fri), fcnp->cn_nameptr,
790 ((char *)fdi.indata)[sizeof(*fri) + fcnp->cn_namelen] = '\0';
791 memcpy((char *)fdi.indata + sizeof(*fri) + fcnp->cn_namelen + 1,
792 tcnp->cn_nameptr, tcnp->cn_namelen);
793 ((char *)fdi.indata)[sizeof(*fri) + fcnp->cn_namelen +
794 tcnp->cn_namelen + 1] = '\0';
796 err = fdisp_wait_answ(&fdi);
803 /* entity creation */
806 fuse_internal_newentry_makerequest(struct mount *mp,
808 struct componentname *cnp,
812 struct fuse_dispatcher *fdip)
814 fdip->iosize = bufsize + cnp->cn_namelen + 1;
816 fdisp_make(fdip, op, mp, dnid, cnp->cn_thread, cnp->cn_cred);
817 memcpy(fdip->indata, buf, bufsize);
818 memcpy((char *)fdip->indata + bufsize, cnp->cn_nameptr, cnp->cn_namelen);
819 ((char *)fdip->indata)[bufsize + cnp->cn_namelen] = '\0';
823 fuse_internal_newentry_core(struct vnode *dvp,
825 struct componentname *cnp,
827 struct fuse_dispatcher *fdip)
830 struct fuse_entry_out *feo;
831 struct mount *mp = vnode_mount(dvp);
833 if ((err = fdisp_wait_answ(fdip))) {
838 if ((err = fuse_internal_checkentry(feo, vtyp))) {
841 err = fuse_vnode_get(mp, feo, feo->nodeid, dvp, vpp, cnp, vtyp);
843 fuse_internal_forget_send(mp, cnp->cn_thread, cnp->cn_cred,
849 * Purge the parent's attribute cache because the daemon should've
850 * updated its mtime and ctime
852 fuse_vnode_clear_attr_cache(dvp);
854 fuse_internal_cache_attrs(*vpp, &feo->attr, feo->attr_valid,
855 feo->attr_valid_nsec, NULL, true);
861 fuse_internal_newentry(struct vnode *dvp,
863 struct componentname *cnp,
870 struct fuse_dispatcher fdi;
871 struct mount *mp = vnode_mount(dvp);
874 fuse_internal_newentry_makerequest(mp, VTOI(dvp), cnp, op, buf,
876 err = fuse_internal_newentry_core(dvp, vpp, cnp, vtype, &fdi);
882 /* entity destruction */
885 fuse_internal_forget_callback(struct fuse_ticket *ftick, struct uio *uio)
887 fuse_internal_forget_send(ftick->tk_data->mp, curthread, NULL,
888 ((struct fuse_in_header *)ftick->tk_ms_fiov.base)->nodeid, 1);
894 fuse_internal_forget_send(struct mount *mp,
901 struct fuse_dispatcher fdi;
902 struct fuse_forget_in *ffi;
905 * KASSERT(nlookup > 0, ("zero-times forget for vp #%llu",
906 * (long long unsigned) nodeid));
909 fdisp_init(&fdi, sizeof(*ffi));
910 fdisp_make(&fdi, FUSE_FORGET, mp, nodeid, td, cred);
913 ffi->nlookup = nlookup;
915 fuse_insert_message(fdi.tick, false);
919 /* Fetch the vnode's attributes from the daemon*/
921 fuse_internal_do_getattr(struct vnode *vp, struct vattr *vap,
922 struct ucred *cred, struct thread *td)
924 struct fuse_dispatcher fdi;
925 struct fuse_vnode_data *fvdat = VTOFUD(vp);
926 struct fuse_getattr_in *fgai;
927 struct fuse_attr_out *fao;
928 off_t old_filesize = fvdat->cached_attrs.va_size;
929 struct timespec old_ctime = fvdat->cached_attrs.va_ctime;
930 struct timespec old_mtime = fvdat->cached_attrs.va_mtime;
934 fdisp_init(&fdi, sizeof(*fgai));
935 fdisp_make_vp(&fdi, FUSE_GETATTR, vp, td, cred);
938 * We could look up a file handle and set it in fgai->fh, but that
939 * involves extra runtime work and I'm unaware of any file systems that
942 fgai->getattr_flags = 0;
943 if ((err = fdisp_wait_answ(&fdi))) {
945 fuse_internal_vnode_disappear(vp);
949 fao = (struct fuse_attr_out *)fdi.answ;
950 vtyp = IFTOVT(fao->attr.mode);
951 if (fvdat->flag & FN_SIZECHANGE)
952 fao->attr.size = old_filesize;
953 if (fvdat->flag & FN_CTIMECHANGE) {
954 fao->attr.ctime = old_ctime.tv_sec;
955 fao->attr.ctimensec = old_ctime.tv_nsec;
957 if (fvdat->flag & FN_MTIMECHANGE) {
958 fao->attr.mtime = old_mtime.tv_sec;
959 fao->attr.mtimensec = old_mtime.tv_nsec;
961 fuse_internal_cache_attrs(vp, &fao->attr, fao->attr_valid,
962 fao->attr_valid_nsec, vap, true);
963 if (vtyp != vnode_vtype(vp)) {
964 fuse_internal_vnode_disappear(vp);
973 /* Read a vnode's attributes from cache or fetch them from the fuse daemon */
975 fuse_internal_getattr(struct vnode *vp, struct vattr *vap, struct ucred *cred,
980 if ((attrs = VTOVA(vp)) != NULL) {
981 *vap = *attrs; /* struct copy */
985 return fuse_internal_do_getattr(vp, vap, cred, td);
989 fuse_internal_vnode_disappear(struct vnode *vp)
991 struct fuse_vnode_data *fvdat = VTOFUD(vp);
993 ASSERT_VOP_ELOCKED(vp, "fuse_internal_vnode_disappear");
994 fvdat->flag |= FN_REVOKED;
998 /* fuse start/stop */
1000 SDT_PROBE_DEFINE2(fusefs, , internal, init_done,
1001 "struct fuse_data*", "struct fuse_init_out*");
1003 fuse_internal_init_callback(struct fuse_ticket *tick, struct uio *uio)
1006 struct fuse_data *data = tick->tk_data;
1007 struct fuse_init_out *fiio;
1009 if ((err = tick->tk_aw_ohead.error)) {
1012 if ((err = fticket_pull(tick, uio))) {
1015 fiio = fticket_resp(tick)->base;
1017 data->fuse_libabi_major = fiio->major;
1018 data->fuse_libabi_minor = fiio->minor;
1019 if (!fuse_libabi_geq(data, 7, 4)) {
1021 * With a little work we could support servers as old as 7.1.
1022 * But there would be little payoff.
1024 SDT_PROBE2(fusefs, , internal, trace, 1,
1025 "userpace version too low");
1026 err = EPROTONOSUPPORT;
1030 if (fuse_libabi_geq(data, 7, 5)) {
1031 if (fticket_resp(tick)->len == sizeof(struct fuse_init_out) ||
1032 fticket_resp(tick)->len == FUSE_COMPAT_22_INIT_OUT_SIZE) {
1033 data->max_write = fiio->max_write;
1034 if (fiio->flags & FUSE_ASYNC_READ)
1035 data->dataflags |= FSESS_ASYNC_READ;
1036 if (fiio->flags & FUSE_POSIX_LOCKS)
1037 data->dataflags |= FSESS_POSIX_LOCKS;
1038 if (fiio->flags & FUSE_EXPORT_SUPPORT)
1039 data->dataflags |= FSESS_EXPORT_SUPPORT;
1040 if (fiio->flags & FUSE_NO_OPEN_SUPPORT)
1041 data->dataflags |= FSESS_NO_OPEN_SUPPORT;
1042 if (fiio->flags & FUSE_NO_OPENDIR_SUPPORT)
1043 data->dataflags |= FSESS_NO_OPENDIR_SUPPORT;
1045 * Don't bother to check FUSE_BIG_WRITES, because it's
1046 * redundant with max_write
1049 * max_background and congestion_threshold are not
1056 /* Old fixed values */
1057 data->max_write = 4096;
1060 if (fuse_libabi_geq(data, 7, 6))
1061 data->max_readahead_blocks = fiio->max_readahead / maxbcachebuf;
1063 if (!fuse_libabi_geq(data, 7, 7))
1064 fsess_set_notimpl(data->mp, FUSE_INTERRUPT);
1066 if (!fuse_libabi_geq(data, 7, 8)) {
1067 fsess_set_notimpl(data->mp, FUSE_BMAP);
1068 fsess_set_notimpl(data->mp, FUSE_DESTROY);
1071 if (fuse_libabi_geq(data, 7, 23) && fiio->time_gran >= 1 &&
1072 fiio->time_gran <= 1000000000)
1073 data->time_gran = fiio->time_gran;
1075 data->time_gran = 1;
1077 if (!fuse_libabi_geq(data, 7, 23))
1078 data->cache_mode = fuse_data_cache_mode;
1079 else if (fiio->flags & FUSE_WRITEBACK_CACHE)
1080 data->cache_mode = FUSE_CACHE_WB;
1082 data->cache_mode = FUSE_CACHE_WT;
1084 if (!fuse_libabi_geq(data, 7, 24))
1085 fsess_set_notimpl(data->mp, FUSE_LSEEK);
1087 if (!fuse_libabi_geq(data, 7, 28))
1088 fsess_set_notimpl(data->mp, FUSE_COPY_FILE_RANGE);
1092 fdata_set_dead(data);
1095 data->dataflags |= FSESS_INITED;
1096 SDT_PROBE2(fusefs, , internal, init_done, data, fiio);
1097 wakeup(&data->ticketer);
1104 fuse_internal_send_init(struct fuse_data *data, struct thread *td)
1106 struct fuse_init_in *fiii;
1107 struct fuse_dispatcher fdi;
1109 fdisp_init(&fdi, sizeof(*fiii));
1110 fdisp_make(&fdi, FUSE_INIT, data->mp, 0, td, NULL);
1112 fiii->major = FUSE_KERNEL_VERSION;
1113 fiii->minor = FUSE_KERNEL_MINOR_VERSION;
1115 * fusefs currently reads ahead no more than one cache block at a time.
1116 * See fuse_read_biobackend
1118 fiii->max_readahead = maxbcachebuf;
1120 * Unsupported features:
1121 * FUSE_FILE_OPS: No known FUSE server or client supports it
1122 * FUSE_ATOMIC_O_TRUNC: our VFS cannot support it
1123 * FUSE_DONT_MASK: unlike Linux, FreeBSD always applies the umask, even
1124 * when default ACLs are in use.
1125 * FUSE_SPLICE_WRITE, FUSE_SPLICE_MOVE, FUSE_SPLICE_READ: FreeBSD
1126 * doesn't have splice(2).
1127 * FUSE_FLOCK_LOCKS: not yet implemented
1128 * FUSE_HAS_IOCTL_DIR: not yet implemented
1129 * FUSE_AUTO_INVAL_DATA: not yet implemented
1130 * FUSE_DO_READDIRPLUS: not yet implemented
1131 * FUSE_READDIRPLUS_AUTO: not yet implemented
1132 * FUSE_ASYNC_DIO: not yet implemented
1133 * FUSE_PARALLEL_DIROPS: not yet implemented
1134 * FUSE_HANDLE_KILLPRIV: not yet implemented
1135 * FUSE_POSIX_ACL: not yet implemented
1136 * FUSE_ABORT_ERROR: not yet implemented
1137 * FUSE_CACHE_SYMLINKS: not yet implemented
1138 * FUSE_MAX_PAGES: not yet implemented
1140 fiii->flags = FUSE_ASYNC_READ | FUSE_POSIX_LOCKS | FUSE_EXPORT_SUPPORT
1141 | FUSE_BIG_WRITES | FUSE_WRITEBACK_CACHE
1142 | FUSE_NO_OPEN_SUPPORT | FUSE_NO_OPENDIR_SUPPORT;
1144 fuse_insert_callback(fdi.tick, fuse_internal_init_callback);
1145 fuse_insert_message(fdi.tick, false);
1146 fdisp_destroy(&fdi);
1150 * Send a FUSE_SETATTR operation with no permissions checks. If cred is NULL,
1151 * send the request with root credentials
1153 int fuse_internal_setattr(struct vnode *vp, struct vattr *vap,
1154 struct thread *td, struct ucred *cred)
1156 struct fuse_vnode_data *fvdat;
1157 struct fuse_dispatcher fdi;
1158 struct fuse_setattr_in *fsai;
1160 pid_t pid = td->td_proc->p_pid;
1161 struct fuse_data *data;
1165 int sizechanged = -1;
1166 uint64_t newsize = 0;
1168 mp = vnode_mount(vp);
1170 data = fuse_get_mpdata(mp);
1171 dataflags = data->dataflags;
1173 fdisp_init(&fdi, sizeof(*fsai));
1174 fdisp_make_vp(&fdi, FUSE_SETATTR, vp, td, cred);
1182 if (vap->va_uid != (uid_t)VNOVAL) {
1183 fsai->uid = vap->va_uid;
1184 fsai->valid |= FATTR_UID;
1186 if (vap->va_gid != (gid_t)VNOVAL) {
1187 fsai->gid = vap->va_gid;
1188 fsai->valid |= FATTR_GID;
1190 if (vap->va_size != VNOVAL) {
1191 struct fuse_filehandle *fufh = NULL;
1193 /*Truncate to a new value. */
1194 fsai->size = vap->va_size;
1196 newsize = vap->va_size;
1197 fsai->valid |= FATTR_SIZE;
1199 fuse_filehandle_getrw(vp, FWRITE, &fufh, cred, pid);
1201 fsai->fh = fufh->fh_id;
1202 fsai->valid |= FATTR_FH;
1204 VTOFUD(vp)->flag &= ~FN_SIZECHANGE;
1206 if (vap->va_atime.tv_sec != VNOVAL) {
1207 fsai->atime = vap->va_atime.tv_sec;
1208 fsai->atimensec = vap->va_atime.tv_nsec;
1209 fsai->valid |= FATTR_ATIME;
1210 if (vap->va_vaflags & VA_UTIMES_NULL)
1211 fsai->valid |= FATTR_ATIME_NOW;
1213 if (vap->va_mtime.tv_sec != VNOVAL) {
1214 fsai->mtime = vap->va_mtime.tv_sec;
1215 fsai->mtimensec = vap->va_mtime.tv_nsec;
1216 fsai->valid |= FATTR_MTIME;
1217 if (vap->va_vaflags & VA_UTIMES_NULL)
1218 fsai->valid |= FATTR_MTIME_NOW;
1219 } else if (fvdat->flag & FN_MTIMECHANGE) {
1220 fsai->mtime = fvdat->cached_attrs.va_mtime.tv_sec;
1221 fsai->mtimensec = fvdat->cached_attrs.va_mtime.tv_nsec;
1222 fsai->valid |= FATTR_MTIME;
1224 if (fuse_libabi_geq(data, 7, 23) && fvdat->flag & FN_CTIMECHANGE) {
1225 fsai->ctime = fvdat->cached_attrs.va_ctime.tv_sec;
1226 fsai->ctimensec = fvdat->cached_attrs.va_ctime.tv_nsec;
1227 fsai->valid |= FATTR_CTIME;
1229 if (vap->va_mode != (mode_t)VNOVAL) {
1230 fsai->mode = vap->va_mode & ALLPERMS;
1231 fsai->valid |= FATTR_MODE;
1237 if ((err = fdisp_wait_answ(&fdi)))
1239 vtyp = IFTOVT(((struct fuse_attr_out *)fdi.answ)->attr.mode);
1241 if (vnode_vtype(vp) != vtyp) {
1242 if (vnode_vtype(vp) == VNON && vtyp != VNON) {
1243 SDT_PROBE2(fusefs, , internal, trace, 1, "FUSE: Dang! "
1244 "vnode_vtype is VNON and vtype isn't.");
1247 * STALE vnode, ditch
1249 * The vnode has changed its type "behind our back".
1250 * There's nothing really we can do, so let us just
1251 * force an internal revocation and tell the caller to
1252 * try again, if interested.
1254 fuse_internal_vnode_disappear(vp);
1259 struct fuse_attr_out *fao = (struct fuse_attr_out*)fdi.answ;
1260 fuse_vnode_undirty_cached_timestamps(vp);
1261 fuse_internal_cache_attrs(vp, &fao->attr, fao->attr_valid,
1262 fao->attr_valid_nsec, NULL, false);
1266 fdisp_destroy(&fdi);
1271 * FreeBSD clears the SUID and SGID bits on any write by a non-root user.
1274 fuse_internal_clear_suid_on_write(struct vnode *vp, struct ucred *cred,
1277 struct fuse_data *data;
1282 mp = vnode_mount(vp);
1283 data = fuse_get_mpdata(mp);
1284 dataflags = data->dataflags;
1286 ASSERT_VOP_LOCKED(vp, __func__);
1288 if (dataflags & FSESS_DEFAULT_PERMISSIONS) {
1289 if (priv_check_cred(cred, PRIV_VFS_RETAINSUGID)) {
1290 fuse_internal_getattr(vp, &va, cred, td);
1291 if (va.va_mode & (S_ISUID | S_ISGID)) {
1292 mode_t mode = va.va_mode & ~(S_ISUID | S_ISGID);
1293 /* Clear all vattr fields except mode */
1298 * Ignore fuse_internal_setattr's return value,
1299 * because at this point the write operation has
1300 * already succeeded and we don't want to return
1301 * failing status for that.
1303 (void)fuse_internal_setattr(vp, &va, td, NULL);
1309 #ifdef ZERO_PAD_INCOMPLETE_BUFS
1311 isbzero(void *buf, size_t len)
1315 for (i = 0; i < len; i++) {
1316 if (((char *)buf)[i])
1326 fuse_internal_init(void)
1328 fuse_lookup_cache_misses = counter_u64_alloc(M_WAITOK);
1329 fuse_lookup_cache_hits = counter_u64_alloc(M_WAITOK);
1333 fuse_internal_destroy(void)
1335 counter_u64_free(fuse_lookup_cache_hits);
1336 counter_u64_free(fuse_lookup_cache_misses);