2 * SPDX-License-Identifier: BSD-3-Clause
4 * Copyright (c) 1982, 1986, 1989, 1993
5 * The Regents of the University of California. All rights reserved.
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 * 3. Neither the name of the University nor the names of its contributors
16 * may be used to endorse or promote products derived from this software
17 * without specific prior written permission.
19 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
20 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
23 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
25 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
26 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
28 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
31 * @(#)ffs_subr.c 8.5 (Berkeley) 3/21/95
34 #include <sys/cdefs.h>
35 __FBSDID("$FreeBSD$");
37 #include <sys/param.h>
44 #include <sys/errno.h>
45 #include <ufs/ufs/dinode.h>
46 #include <ufs/ffs/fs.h>
48 uint32_t calculate_crc32c(uint32_t, const void *, size_t);
49 uint32_t ffs_calc_sbhash(struct fs *);
51 #define UFS_MALLOC(size, type, flags) malloc(size)
52 #define UFS_FREE(ptr, type) free(ptr)
53 #define UFS_TIME time(NULL)
55 * Request standard superblock location in ffs_sbget
57 #define STDSB -1 /* Fail if check-hash is bad */
58 #define STDSB_NOHASHFAIL -2 /* Ignore check-hash failure */
61 #include <sys/systm.h>
62 #include <sys/gsb_crc32.h>
64 #include <sys/malloc.h>
65 #include <sys/mount.h>
66 #include <sys/vnode.h>
69 #include <sys/ucred.h>
70 #include <sys/taskqueue.h>
72 #include <ufs/ufs/quota.h>
73 #include <ufs/ufs/inode.h>
74 #include <ufs/ufs/extattr.h>
75 #include <ufs/ufs/ufsmount.h>
76 #include <ufs/ufs/ufs_extern.h>
77 #include <ufs/ffs/ffs_extern.h>
78 #include <ufs/ffs/fs.h>
80 #define UFS_MALLOC(size, type, flags) malloc(size, type, flags)
81 #define UFS_FREE(ptr, type) free(ptr, type)
82 #define UFS_TIME time_second
85 * Return buffer with the contents of block "offset" from the beginning of
86 * directory "ip". If "res" is non-zero, fill it in with a pointer to the
87 * remaining space in the directory.
90 ffs_blkatoff(struct vnode *vp, off_t offset, char **res, struct buf **bpp)
100 lbn = lblkno(fs, offset);
101 bsize = blksize(fs, ip, lbn);
104 error = bread(vp, lbn, bsize, NOCRED, &bp);
109 *res = (char *)bp->b_data + blkoff(fs, offset);
115 * Load up the contents of an inode and copy the appropriate pieces
116 * to the incore copy.
119 ffs_load_inode(struct buf *bp, struct inode *ip, struct fs *fs, ino_t ino)
121 struct ufs1_dinode *dip1;
122 struct ufs2_dinode *dip2;
128 *((struct ufs1_dinode *)bp->b_data + ino_to_fsbo(fs, ino));
129 ip->i_mode = dip1->di_mode;
130 ip->i_nlink = dip1->di_nlink;
131 ip->i_effnlink = dip1->di_nlink;
132 ip->i_size = dip1->di_size;
133 ip->i_flags = dip1->di_flags;
134 ip->i_gen = dip1->di_gen;
135 ip->i_uid = dip1->di_uid;
136 ip->i_gid = dip1->di_gid;
139 dip2 = ((struct ufs2_dinode *)bp->b_data + ino_to_fsbo(fs, ino));
140 if ((error = ffs_verify_dinode_ckhash(fs, dip2)) != 0 &&
141 !ffs_fsfail_cleanup(ITOUMP(ip), error)) {
142 printf("%s: inode %jd: check-hash failed\n", fs->fs_fsmnt,
148 ip->i_mode = dip2->di_mode;
149 ip->i_nlink = dip2->di_nlink;
150 ip->i_effnlink = dip2->di_nlink;
151 ip->i_size = dip2->di_size;
152 ip->i_flags = dip2->di_flags;
153 ip->i_gen = dip2->di_gen;
154 ip->i_uid = dip2->di_uid;
155 ip->i_gid = dip2->di_gid;
160 * Verify that a filesystem block number is a valid data block.
161 * This routine is only called on untrusted filesystems.
164 ffs_check_blkno(struct mount *mp, ino_t inum, ufs2_daddr_t daddr, int blksize)
167 struct ufsmount *ump;
168 ufs2_daddr_t end_daddr;
171 KASSERT((mp->mnt_flag & MNT_UNTRUSTED) != 0,
172 ("ffs_check_blkno called on a trusted file system"));
175 cg = dtog(fs, daddr);
176 end_daddr = daddr + numfrags(fs, blksize);
178 * Verify that the block number is a valid data block. Also check
179 * that it does not point to an inode block or a superblock. Accept
180 * blocks that are unalloacted (0) or part of snapshot metadata
181 * (BLK_NOCOPY or BLK_SNAP).
183 * Thus, the block must be in a valid range for the filesystem and
184 * either in the space before a backup superblock (except the first
185 * cylinder group where that space is used by the bootstrap code) or
186 * after the inode blocks and before the end of the cylinder group.
188 if ((uint64_t)daddr <= BLK_SNAP ||
189 ((uint64_t)end_daddr <= fs->fs_size &&
190 ((cg > 0 && end_daddr <= cgsblock(fs, cg)) ||
191 (daddr >= cgdmin(fs, cg) &&
192 end_daddr <= cgbase(fs, cg) + fs->fs_fpg))))
194 if ((havemtx = mtx_owned(UFS_MTX(ump))) == 0)
196 if (ppsratecheck(&ump->um_last_integritymsg,
197 &ump->um_secs_integritymsg, 1)) {
199 uprintf("\n%s: inode %jd, out-of-range indirect block "
200 "number %jd\n", mp->mnt_stat.f_mntonname, inum, daddr);
209 * Initiate a forcible unmount.
210 * Used to unmount filesystems whose underlying media has gone away.
213 ffs_fsfail_unmount(void *v, int pending)
215 struct fsfail_task *etp;
221 * Find our mount and get a ref on it, then try to unmount.
223 mp = vfs_getvfs(&etp->fsid);
225 dounmount(mp, MNT_FORCE, curthread);
230 * On first ENXIO error, start a task that forcibly unmounts the filesystem.
232 * Return true if a cleanup is in progress.
235 ffs_fsfail_cleanup(struct ufsmount *ump, int error)
240 retval = ffs_fsfail_cleanup_locked(ump, error);
246 ffs_fsfail_cleanup_locked(struct ufsmount *ump, int error)
248 struct fsfail_task *etp;
251 mtx_assert(UFS_MTX(ump), MA_OWNED);
252 if (error == ENXIO && (ump->um_flags & UM_FSFAIL_CLEANUP) == 0) {
253 ump->um_flags |= UM_FSFAIL_CLEANUP;
255 * Queue an async forced unmount.
257 etp = ump->um_fsfail_task;
258 ump->um_fsfail_task = NULL;
261 TASK_INIT(tp, 0, ffs_fsfail_unmount, etp);
262 taskqueue_enqueue(taskqueue_thread, tp);
263 printf("UFS: forcibly unmounting %s from %s\n",
264 ump->um_mountp->mnt_stat.f_mntfromname,
265 ump->um_mountp->mnt_stat.f_mntonname);
268 return ((ump->um_flags & UM_FSFAIL_CLEANUP) != 0);
272 * Wrapper used during ENXIO cleanup to allocate empty buffers when
273 * the kernel is unable to read the real one. They are needed so that
274 * the soft updates code can use them to unwind its dependencies.
277 ffs_breadz(struct ufsmount *ump, struct vnode *vp, daddr_t lblkno,
278 daddr_t dblkno, int size, daddr_t *rablkno, int *rabsize, int cnt,
279 struct ucred *cred, int flags, void (*ckhashfunc)(struct buf *),
284 flags |= GB_CVTENXIO;
285 error = breadn_flags(vp, lblkno, dblkno, size, rablkno, rabsize, cnt,
286 cred, flags, ckhashfunc, bpp);
287 if (error != 0 && ffs_fsfail_cleanup(ump, error)) {
288 error = getblkx(vp, lblkno, dblkno, size, 0, 0, flags, bpp);
289 KASSERT(error == 0, ("getblkx failed"));
290 vfs_bio_bzero_buf(*bpp, 0, size);
297 * Verify an inode check-hash.
300 ffs_verify_dinode_ckhash(struct fs *fs, struct ufs2_dinode *dip)
302 uint32_t ckhash, save_ckhash;
305 * Return success if unallocated or we are not doing inode check-hash.
307 if (dip->di_mode == 0 || (fs->fs_metackhash & CK_INODE) == 0)
310 * Exclude di_ckhash from the crc32 calculation, e.g., always use
311 * a check-hash value of zero when calculating the check-hash.
313 save_ckhash = dip->di_ckhash;
315 ckhash = calculate_crc32c(~0L, (void *)dip, sizeof(*dip));
316 dip->di_ckhash = save_ckhash;
317 if (save_ckhash == ckhash)
323 * Update an inode check-hash.
326 ffs_update_dinode_ckhash(struct fs *fs, struct ufs2_dinode *dip)
329 if (dip->di_mode == 0 || (fs->fs_metackhash & CK_INODE) == 0)
332 * Exclude old di_ckhash from the crc32 calculation, e.g., always use
333 * a check-hash value of zero when calculating the new check-hash.
336 dip->di_ckhash = calculate_crc32c(~0L, (void *)dip, sizeof(*dip));
340 * These are the low-level functions that actually read and write
341 * the superblock and its associated data.
343 static off_t sblock_try[] = SBLOCKSEARCH;
344 static int readsuper(void *, struct fs **, off_t, int, int,
345 int (*)(void *, off_t, void **, int));
348 * Read a superblock from the devfd device.
350 * If an alternate superblock is specified, it is read. Otherwise the
351 * set of locations given in the SBLOCKSEARCH list is searched for a
352 * superblock. Memory is allocated for the superblock by the readfunc and
353 * is returned. If filltype is non-NULL, additional memory is allocated
354 * of type filltype and filled in with the superblock summary information.
355 * All memory is freed when any error is returned.
357 * If a superblock is found, zero is returned. Otherwise one of the
358 * following error values is returned:
359 * EIO: non-existent or truncated superblock.
360 * EIO: error reading summary information.
361 * ENOENT: no usable known superblock found.
362 * ENOSPC: failed to allocate space for the superblock.
363 * EINVAL: The previous newfs operation on this volume did not complete.
364 * The administrator must complete newfs before using this volume.
367 ffs_sbget(void *devfd, struct fs **fsp, off_t altsblock,
368 struct malloc_type *filltype,
369 int (*readfunc)(void *devfd, off_t loc, void **bufp, int size))
372 int i, error, size, blks;
381 if (altsblock >= 0) {
382 if ((error = readsuper(devfd, &fs, altsblock, 1, chkhash,
385 UFS_FREE(fs, filltype);
389 if (altsblock == STDSB_NOHASHFAIL)
391 for (i = 0; sblock_try[i] != -1; i++) {
392 if ((error = readsuper(devfd, &fs, sblock_try[i], 0,
393 chkhash, readfunc)) == 0)
396 UFS_FREE(fs, filltype);
403 if (sblock_try[i] == -1)
407 * Read in the superblock summary information.
409 size = fs->fs_cssize;
410 blks = howmany(size, fs->fs_fsize);
411 if (fs->fs_contigsumsize > 0)
412 size += fs->fs_ncg * sizeof(int32_t);
413 size += fs->fs_ncg * sizeof(u_int8_t);
414 /* When running in libufs or libsa, UFS_MALLOC may fail */
415 if ((space = UFS_MALLOC(size, filltype, M_WAITOK)) == NULL) {
416 UFS_FREE(fs, filltype);
419 fs->fs_csp = (struct csum *)space;
420 for (i = 0; i < blks; i += fs->fs_frag) {
422 if (i + fs->fs_frag > blks)
423 size = (blks - i) * fs->fs_fsize;
425 error = (*readfunc)(devfd,
426 dbtob(fsbtodb(fs, fs->fs_csaddr + i)), (void **)&buf, size);
429 UFS_FREE(buf, filltype);
430 UFS_FREE(fs->fs_csp, filltype);
431 UFS_FREE(fs, filltype);
434 memcpy(space, buf, size);
435 UFS_FREE(buf, filltype);
438 if (fs->fs_contigsumsize > 0) {
439 fs->fs_maxcluster = lp = (int32_t *)space;
440 for (i = 0; i < fs->fs_ncg; i++)
441 *lp++ = fs->fs_contigsumsize;
442 space = (uint8_t *)lp;
444 size = fs->fs_ncg * sizeof(u_int8_t);
445 fs->fs_contigdirs = (u_int8_t *)space;
446 bzero(fs->fs_contigdirs, size);
452 * Try to read a superblock from the location specified by sblockloc.
453 * Return zero on success or an errno on failure.
456 readsuper(void *devfd, struct fs **fsp, off_t sblockloc, int isaltsblk,
457 int chkhash, int (*readfunc)(void *devfd, off_t loc, void **bufp, int size))
463 error = (*readfunc)(devfd, sblockloc, (void **)fsp, SBLOCKSIZE);
467 if (fs->fs_magic == FS_BAD_MAGIC)
469 if (((fs->fs_magic == FS_UFS1_MAGIC && (isaltsblk ||
470 sblockloc <= SBLOCK_UFS1)) ||
471 (fs->fs_magic == FS_UFS2_MAGIC && (isaltsblk ||
472 sblockloc == fs->fs_sblockloc))) &&
474 fs->fs_bsize >= MINBSIZE &&
475 fs->fs_bsize <= MAXBSIZE &&
476 fs->fs_bsize >= roundup(sizeof(struct fs), DEV_BSIZE) &&
477 fs->fs_sbsize <= SBLOCKSIZE) {
479 * If the filesystem has been run on a kernel without
480 * metadata check hashes, disable them.
482 if ((fs->fs_flags & FS_METACKHASH) == 0)
483 fs->fs_metackhash = 0;
484 if (fs->fs_ckhash != (ckhash = ffs_calc_sbhash(fs))) {
486 res = uprintf("Superblock check-hash failed: recorded "
487 "check-hash 0x%x != computed check-hash 0x%x%s\n",
488 fs->fs_ckhash, ckhash,
489 chkhash == 0 ? " (Ignored)" : "");
494 * Print check-hash failure if no controlling terminal
495 * in kernel or always if in user-mode (libufs).
498 printf("Superblock check-hash failed: recorded "
499 "check-hash 0x%x != computed check-hash "
500 "0x%x%s\n", fs->fs_ckhash, ckhash,
501 chkhash == 0 ? " (Ignored)" : "");
503 fs->fs_flags |= FS_NEEDSFSCK;
510 /* Have to set for old filesystems that predate this field */
511 fs->fs_sblockactualloc = sblockloc;
512 /* Not yet any summary information */
520 * Write a superblock to the devfd device from the memory pointed to by fs.
521 * Write out the superblock summary information if it is present.
523 * If the write is successful, zero is returned. Otherwise one of the
524 * following error values is returned:
525 * EIO: failed to write superblock.
526 * EIO: failed to write superblock summary information.
529 ffs_sbput(void *devfd, struct fs *fs, off_t loc,
530 int (*writefunc)(void *devfd, off_t loc, void *buf, int size))
532 int i, error, blks, size;
536 * If there is summary information, write it first, so if there
537 * is an error, the superblock will not be marked as clean.
539 if (fs->fs_csp != NULL) {
540 blks = howmany(fs->fs_cssize, fs->fs_fsize);
541 space = (uint8_t *)fs->fs_csp;
542 for (i = 0; i < blks; i += fs->fs_frag) {
544 if (i + fs->fs_frag > blks)
545 size = (blks - i) * fs->fs_fsize;
546 if ((error = (*writefunc)(devfd,
547 dbtob(fsbtodb(fs, fs->fs_csaddr + i)),
554 fs->fs_time = UFS_TIME;
555 fs->fs_ckhash = ffs_calc_sbhash(fs);
556 if ((error = (*writefunc)(devfd, loc, fs, fs->fs_sbsize)) != 0)
562 * Calculate the check-hash for a superblock.
565 ffs_calc_sbhash(struct fs *fs)
567 uint32_t ckhash, save_ckhash;
570 * A filesystem that was using a superblock ckhash may be moved
571 * to an older kernel that does not support ckhashes. The
572 * older kernel will clear the FS_METACKHASH flag indicating
573 * that it does not update hashes. When the disk is moved back
574 * to a kernel capable of ckhashes it disables them on mount:
576 * if ((fs->fs_flags & FS_METACKHASH) == 0)
577 * fs->fs_metackhash = 0;
579 * This leaves (fs->fs_metackhash & CK_SUPERBLOCK) == 0) with an
580 * old stale value in the fs->fs_ckhash field. Thus the need to
581 * just accept what is there.
583 if ((fs->fs_metackhash & CK_SUPERBLOCK) == 0)
584 return (fs->fs_ckhash);
586 save_ckhash = fs->fs_ckhash;
589 * If newly read from disk, the caller is responsible for
590 * verifying that fs->fs_sbsize <= SBLOCKSIZE.
592 ckhash = calculate_crc32c(~0L, (void *)fs, fs->fs_sbsize);
593 fs->fs_ckhash = save_ckhash;
598 * Update the frsum fields to reflect addition or deletion
602 ffs_fragacct(struct fs *fs, int fragmap, int32_t fraglist[], int cnt)
608 inblk = (int)(fragtbl[fs->fs_frag][fragmap]) << 1;
610 for (siz = 1; siz < fs->fs_frag; siz++) {
611 if ((inblk & (1 << (siz + (fs->fs_frag % NBBY)))) == 0)
614 subfield = inside[siz];
615 for (pos = siz; pos <= fs->fs_frag; pos++) {
616 if ((fragmap & field) == subfield) {
617 fraglist[siz] += cnt;
631 * check if a block is available
634 ffs_isblock(struct fs *fs, unsigned char *cp, ufs1_daddr_t h)
638 switch ((int)fs->fs_frag) {
640 return (cp[h] == 0xff);
642 mask = 0x0f << ((h & 0x1) << 2);
643 return ((cp[h >> 1] & mask) == mask);
645 mask = 0x03 << ((h & 0x3) << 1);
646 return ((cp[h >> 2] & mask) == mask);
648 mask = 0x01 << (h & 0x7);
649 return ((cp[h >> 3] & mask) == mask);
652 panic("ffs_isblock");
660 * check if a block is free
663 ffs_isfreeblock(struct fs *fs, u_char *cp, ufs1_daddr_t h)
666 switch ((int)fs->fs_frag) {
670 return ((cp[h >> 1] & (0x0f << ((h & 0x1) << 2))) == 0);
672 return ((cp[h >> 2] & (0x03 << ((h & 0x3) << 1))) == 0);
674 return ((cp[h >> 3] & (0x01 << (h & 0x7))) == 0);
677 panic("ffs_isfreeblock");
685 * take a block out of the map
688 ffs_clrblock(struct fs *fs, u_char *cp, ufs1_daddr_t h)
691 switch ((int)fs->fs_frag) {
696 cp[h >> 1] &= ~(0x0f << ((h & 0x1) << 2));
699 cp[h >> 2] &= ~(0x03 << ((h & 0x3) << 1));
702 cp[h >> 3] &= ~(0x01 << (h & 0x7));
706 panic("ffs_clrblock");
713 * put a block into the map
716 ffs_setblock(struct fs *fs, unsigned char *cp, ufs1_daddr_t h)
719 switch ((int)fs->fs_frag) {
725 cp[h >> 1] |= (0x0f << ((h & 0x1) << 2));
728 cp[h >> 2] |= (0x03 << ((h & 0x3) << 1));
731 cp[h >> 3] |= (0x01 << (h & 0x7));
735 panic("ffs_setblock");
742 * Update the cluster map because of an allocation or free.
744 * Cnt == 1 means free; cnt == -1 means allocating.
747 ffs_clusteracct(struct fs *fs, struct cg *cgp, ufs1_daddr_t blkno, int cnt)
751 u_char *freemapp, *mapp;
752 int i, start, end, forw, back, map;
755 if (fs->fs_contigsumsize <= 0)
757 freemapp = cg_clustersfree(cgp);
758 sump = cg_clustersum(cgp);
760 * Allocate or clear the actual block.
763 setbit(freemapp, blkno);
765 clrbit(freemapp, blkno);
767 * Find the size of the cluster going forward.
770 end = start + fs->fs_contigsumsize;
771 if (end >= cgp->cg_nclusterblks)
772 end = cgp->cg_nclusterblks;
773 mapp = &freemapp[start / NBBY];
775 bit = 1U << (start % NBBY);
776 for (i = start; i < end; i++) {
777 if ((map & bit) == 0)
779 if ((i & (NBBY - 1)) != (NBBY - 1)) {
788 * Find the size of the cluster going backward.
791 end = start - fs->fs_contigsumsize;
794 mapp = &freemapp[start / NBBY];
796 bit = 1U << (start % NBBY);
797 for (i = start; i > end; i--) {
798 if ((map & bit) == 0)
800 if ((i & (NBBY - 1)) != 0) {
804 bit = 1U << (NBBY - 1);
809 * Account for old cluster and the possibly new forward and
813 if (i > fs->fs_contigsumsize)
814 i = fs->fs_contigsumsize;
821 * Update cluster summary information.
823 lp = &sump[fs->fs_contigsumsize];
824 for (i = fs->fs_contigsumsize; i > 0; i--)
827 fs->fs_maxcluster[cgp->cg_cgx] = i;