2 * SPDX-License-Identifier: BSD-3-Clause
4 * Copyright (c) 1980, 1986, 1993
5 * The Regents of the University of California. All rights reserved.
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 * 3. Neither the name of the University nor the names of its contributors
16 * may be used to endorse or promote products derived from this software
17 * without specific prior written permission.
19 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
20 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
23 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
25 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
26 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
28 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
34 static const char sccsid[] = "@(#)inode.c 8.8 (Berkeley) 4/28/95";
37 #include <sys/cdefs.h>
38 __FBSDID("$FreeBSD$");
40 #include <sys/param.h>
42 #include <sys/stdint.h>
43 #include <sys/sysctl.h>
45 #include <ufs/ufs/dinode.h>
46 #include <ufs/ufs/dir.h>
47 #include <ufs/ffs/fs.h>
57 struct bufarea *icachebp; /* inode cache buffer */
59 static int iblock(struct inodesc *, off_t isize, int type);
60 static ufs2_daddr_t indir_blkatoff(ufs2_daddr_t, ino_t, ufs_lbn_t, ufs_lbn_t,
62 static int snapclean(struct inodesc *idesc);
63 static void chkcopyonwrite(struct fs *, ufs2_daddr_t,
64 ufs2_daddr_t (*checkblkavail)(ufs2_daddr_t, long));
67 ckinode(union dinode *dp, struct inodesc *idesc)
69 off_t remsize, sizepb;
75 char pathbuf[MAXPATHLEN + 1];
77 if (idesc->id_fix != IGNORE)
78 idesc->id_fix = DONTKNOW;
81 idesc->id_lballoc = -1;
83 idesc->id_entryno = 0;
84 idesc->id_filesize = DIP(dp, di_size);
85 mode = DIP(dp, di_mode) & IFMT;
86 if (mode == IFBLK || mode == IFCHR || (mode == IFLNK &&
87 DIP(dp, di_size) < (unsigned)sblock.fs_maxsymlinklen))
89 if (sblock.fs_magic == FS_UFS1_MAGIC)
93 if (DIP(&dino, di_size) < 0) {
94 pfatal("NEGATIVE INODE SIZE %jd\n", DIP(&dino, di_size));
97 ndb = howmany(DIP(&dino, di_size), sblock.fs_bsize);
98 for (i = 0; i < UFS_NDADDR; i++) {
101 (offset = blkoff(&sblock, DIP(&dino, di_size))) != 0)
103 numfrags(&sblock, fragroundup(&sblock, offset));
105 idesc->id_numfrags = sblock.fs_frag;
106 if (DIP(&dino, di_db[i]) == 0) {
107 if (idesc->id_type == DATA && ndb >= 0) {
108 /* An empty block in a directory XXX */
109 getpathname(pathbuf, idesc->id_number,
111 pfatal("DIRECTORY %s: CONTAINS EMPTY BLOCKS",
113 if (reply("ADJUST LENGTH") == 1) {
114 ginode(idesc->id_number, &ip);
115 DIP_SET(ip.i_dp, di_size,
116 i * sblock.fs_bsize);
118 "YOU MUST RERUN FSCK AFTERWARDS\n");
127 idesc->id_blkno = DIP(&dino, di_db[i]);
128 if (idesc->id_type != DATA)
129 ret = (*idesc->id_func)(idesc);
131 ret = dirscan(idesc);
135 idesc->id_numfrags = sblock.fs_frag;
136 remsize = DIP(&dino, di_size) - sblock.fs_bsize * UFS_NDADDR;
137 sizepb = sblock.fs_bsize;
138 for (i = 0; i < UFS_NIADDR; i++) {
139 sizepb *= NINDIR(&sblock);
140 idesc->id_level = i + 1;
141 if (DIP(&dino, di_ib[i])) {
142 idesc->id_blkno = DIP(&dino, di_ib[i]);
143 ret = iblock(idesc, remsize, BT_LEVEL1 + i);
146 } else if (remsize > 0) {
147 idesc->id_lbn += sizepb / sblock.fs_bsize;
148 if (idesc->id_type == DATA) {
149 /* An empty block in a directory XXX */
150 getpathname(pathbuf, idesc->id_number,
152 pfatal("DIRECTORY %s: CONTAINS EMPTY BLOCKS",
154 if (reply("ADJUST LENGTH") == 1) {
155 ginode(idesc->id_number, &ip);
156 DIP_SET(ip.i_dp, di_size,
157 DIP(ip.i_dp, di_size) - remsize);
160 "YOU MUST RERUN FSCK AFTERWARDS\n");
174 iblock(struct inodesc *idesc, off_t isize, int type)
178 int i, n, (*func)(struct inodesc *), nif;
181 char pathbuf[MAXPATHLEN + 1];
183 if (idesc->id_type != DATA) {
184 func = idesc->id_func;
185 if (((n = (*func)(idesc)) & KEEPON) == 0)
189 bp = getdatablk(idesc->id_blkno, sblock.fs_bsize, type);
190 if (bp->b_errs != 0) {
196 for (sizepb = sblock.fs_bsize, i = 0; i < idesc->id_level; i++)
197 sizepb *= NINDIR(&sblock);
198 if (howmany(isize, sizepb) > NINDIR(&sblock))
199 nif = NINDIR(&sblock);
201 nif = howmany(isize, sizepb);
202 if (idesc->id_func == pass1check && nif < NINDIR(&sblock)) {
203 for (i = nif; i < NINDIR(&sblock); i++) {
204 if (IBLK(bp, i) == 0)
206 (void)sprintf(buf, "PARTIALLY TRUNCATED INODE I=%lu",
207 (u_long)idesc->id_number);
210 } else if (dofix(idesc, buf)) {
215 flush(fswritefd, bp);
217 for (i = 0; i < nif; i++) {
219 idesc->id_blkno = IBLK(bp, i);
221 if (idesc->id_level == 0) {
225 n = iblock(idesc, isize, type - 1);
233 idesc->id_lbn += sizepb / sblock.fs_bsize;
234 if (idesc->id_type == DATA && isize > 0) {
235 /* An empty block in a directory XXX */
236 getpathname(pathbuf, idesc->id_number,
238 pfatal("DIRECTORY %s: CONTAINS EMPTY BLOCKS",
240 if (reply("ADJUST LENGTH") == 1) {
241 ginode(idesc->id_number, &ip);
242 DIP_SET(ip.i_dp, di_size,
243 DIP(ip.i_dp, di_size) - isize);
246 "YOU MUST RERUN FSCK AFTERWARDS\n");
261 * Finds the disk block address at the specified lbn within the inode
262 * specified by dp. This follows the whole tree and honors di_size and
263 * di_extsize so it is a true test of reachability. The lbn may be
264 * negative if an extattr or indirect block is requested.
267 ino_blkatoff(union dinode *dp, ino_t ino, ufs_lbn_t lbn, int *frags,
268 struct bufarea **bpp)
279 * Handle extattr blocks first.
281 if (lbn < 0 && lbn >= -UFS_NXADDR) {
283 if (lbn > lblkno(&sblock, dp->dp2.di_extsize - 1))
285 *frags = numfrags(&sblock,
286 sblksize(&sblock, dp->dp2.di_extsize, lbn));
287 return (dp->dp2.di_extb[lbn]);
290 * Now direct and indirect.
292 if (DIP(dp, di_mode) == IFLNK &&
293 DIP(dp, di_size) < sblock.fs_maxsymlinklen)
295 if (lbn >= 0 && lbn < UFS_NDADDR) {
296 *frags = numfrags(&sblock,
297 sblksize(&sblock, DIP(dp, di_size), lbn));
298 return (DIP(dp, di_db[lbn]));
300 *frags = sblock.fs_frag;
302 for (i = 0, tmpval = NINDIR(&sblock), cur = UFS_NDADDR; i < UFS_NIADDR;
303 i++, tmpval *= NINDIR(&sblock), cur = next) {
306 return (DIP(dp, di_ib[i]));
308 * Determine whether the lbn in question is within this tree.
310 if (lbn < 0 && -lbn >= next)
312 if (lbn > 0 && lbn >= next)
314 if (DIP(dp, di_ib[i]) == 0)
316 return (indir_blkatoff(DIP(dp, di_ib[i]), ino, -cur - i, lbn,
319 pfatal("lbn %jd not in ino %ju\n", lbn, (uintmax_t)ino);
324 * Fetch an indirect block to find the block at a given lbn. The lbn
325 * may be negative to fetch a specific indirect block pointer or positive
326 * to fetch a specific block.
329 indir_blkatoff(ufs2_daddr_t blk, ino_t ino, ufs_lbn_t cur, ufs_lbn_t lbn,
330 struct bufarea **bpp)
337 level = lbn_level(cur);
339 pfatal("Invalid indir lbn %jd in ino %ju\n",
340 lbn, (uintmax_t)ino);
341 if (level == 0 && lbn < 0)
342 pfatal("Invalid lbn %jd in ino %ju\n",
343 lbn, (uintmax_t)ino);
345 base = -(cur + level);
346 for (i = level; i > 0; i--)
347 lbnadd *= NINDIR(&sblock);
349 i = (lbn - base) / lbnadd;
351 i = (-lbn - base) / lbnadd;
352 if (i < 0 || i >= NINDIR(&sblock)) {
353 pfatal("Invalid indirect index %d produced by lbn %jd "
354 "in ino %ju\n", i, lbn, (uintmax_t)ino);
358 cur = base + (i * lbnadd);
360 cur = -(base + (i * lbnadd)) - (level - 1);
361 bp = getdatablk(blk, sblock.fs_bsize, BT_LEVEL1 + level);
366 if (cur == lbn || blk == 0) {
375 pfatal("Invalid lbn %jd at level 0 for ino %ju\n", lbn,
377 return (indir_blkatoff(blk, ino, cur, lbn, bpp));
381 * Check that a block in a legal block number.
382 * Return 0 if in range, 1 if out of range.
385 chkrange(ufs2_daddr_t blk, int cnt)
389 if (cnt <= 0 || blk <= 0 || blk >= maxfsblock ||
390 cnt > maxfsblock - blk) {
392 printf("out of range: blk %ld, offset %i, size %d\n",
393 (long)blk, (int)fragnum(&sblock, blk), cnt);
396 if (cnt > sblock.fs_frag ||
397 fragnum(&sblock, blk) + cnt > sblock.fs_frag) {
399 printf("bad size: blk %ld, offset %i, size %d\n",
400 (long)blk, (int)fragnum(&sblock, blk), cnt);
403 c = dtog(&sblock, blk);
404 if (blk < cgdmin(&sblock, c)) {
405 if ((blk + cnt) > cgsblock(&sblock, c)) {
407 printf("blk %ld < cgdmin %ld;",
408 (long)blk, (long)cgdmin(&sblock, c));
409 printf(" blk + cnt %ld > cgsbase %ld\n",
411 (long)cgsblock(&sblock, c));
416 if ((blk + cnt) > cgbase(&sblock, c+1)) {
418 printf("blk %ld >= cgdmin %ld;",
419 (long)blk, (long)cgdmin(&sblock, c));
420 printf(" blk + cnt %ld > sblock.fs_fpg %ld\n",
421 (long)(blk + cnt), (long)sblock.fs_fpg);
430 * General purpose interface for reading inodes.
432 * firstinum and lastinum track contents of getnextino() cache (below).
434 static ino_t firstinum, lastinum;
435 static struct bufarea inobuf;
438 ginode(ino_t inumber, struct inode *ip)
441 struct ufs2_dinode *dp;
443 if (inumber < UFS_ROOTINO || inumber >= maxino)
444 errx(EEXIT, "bad inode number %ju to ginode",
446 ip->i_number = inumber;
447 if (inumber >= firstinum && inumber < lastinum) {
448 /* contents in getnextino() cache */
451 inobuf.b_index = firstinum;
452 } else if (icachebp != NULL &&
453 inumber >= icachebp->b_index &&
454 inumber < icachebp->b_index + INOPB(&sblock)) {
455 /* take an additional reference for the returned inode */
456 icachebp->b_refcnt++;
459 iblk = ino_to_fsba(&sblock, inumber);
460 /* release our cache-hold reference on old icachebp */
461 if (icachebp != NULL)
463 icachebp = getdatablk(iblk, sblock.fs_bsize, BT_INODES);
464 if (icachebp->b_errs != 0) {
470 /* take a cache-hold reference on new icachebp */
471 icachebp->b_refcnt++;
472 icachebp->b_index = rounddown(inumber, INOPB(&sblock));
475 if (sblock.fs_magic == FS_UFS1_MAGIC) {
476 ip->i_dp = (union dinode *)
477 &ip->i_bp->b_un.b_dinode1[inumber - ip->i_bp->b_index];
480 ip->i_dp = (union dinode *)
481 &ip->i_bp->b_un.b_dinode2[inumber - ip->i_bp->b_index];
482 dp = (struct ufs2_dinode *)ip->i_dp;
483 /* Do not check hash of inodes being created */
484 if (dp->di_mode != 0 && ffs_verify_dinode_ckhash(&sblock, dp)) {
485 pwarn("INODE CHECK-HASH FAILED");
487 if (preen || reply("FIX") != 0) {
489 printf(" (FIXED)\n");
490 ffs_update_dinode_ckhash(&sblock, dp);
497 * Release a held inode.
500 irelse(struct inode *ip)
503 /* Check for failed inode read */
504 if (ip->i_bp == NULL)
506 if (debug && sblock.fs_magic == FS_UFS2_MAGIC &&
507 ffs_verify_dinode_ckhash(&sblock, (struct ufs2_dinode *)ip->i_dp)) {
508 pwarn("irelse: releasing inode with bad check-hash");
511 if (ip->i_bp->b_refcnt <= 0)
512 pfatal("irelse: releasing unreferenced ino %ju\n",
513 (uintmax_t) ip->i_number);
518 * Special purpose version of ginode used to optimize first pass
519 * over all the inodes in numerical order.
521 static ino_t nextinum, lastvalidinum;
522 static long readcount, readpercg, fullcnt, inobufsize, partialcnt, partialsize;
525 getnextinode(ino_t inumber, int rebuiltcg)
530 ufs2_daddr_t ndb, blk;
533 static caddr_t nextinop;
535 if (inumber != nextinum++ || inumber > lastvalidinum)
536 errx(EEXIT, "bad inode number %ju to nextinode",
538 if (inumber >= lastinum) {
540 firstinum = lastinum;
541 blk = ino_to_fsba(&sblock, lastinum);
542 if (readcount % readpercg == 0) {
544 lastinum += partialcnt;
550 * Flush old contents in case they have been updated.
551 * If getblk encounters an error, it will already have zeroed
552 * out the buffer, so we do not need to do so here.
554 if (inobuf.b_refcnt != 0)
555 pfatal("Non-zero getnextinode() ref count %d\n",
557 flush(fswritefd, &inobuf);
558 getblk(&inobuf, blk, size);
559 nextinop = inobuf.b_un.b_buf;
561 dp = (union dinode *)nextinop;
562 if (sblock.fs_magic == FS_UFS1_MAGIC)
563 nextinop += sizeof(struct ufs1_dinode);
565 nextinop += sizeof(struct ufs2_dinode);
566 if ((ckhashadd & CK_INODE) != 0) {
567 ffs_update_dinode_ckhash(&sblock, (struct ufs2_dinode *)dp);
570 if (ffs_verify_dinode_ckhash(&sblock, (struct ufs2_dinode *)dp) != 0) {
571 pwarn("INODE CHECK-HASH FAILED");
574 ip.i_number = inumber;
576 if (preen || reply("FIX") != 0) {
578 printf(" (FIXED)\n");
579 ffs_update_dinode_ckhash(&sblock,
580 (struct ufs2_dinode *)dp);
584 if (rebuiltcg && (char *)dp == inobuf.b_un.b_buf) {
586 * Try to determine if we have reached the end of the
589 mode = DIP(dp, di_mode) & IFMT;
591 if (memcmp(dp->dp2.di_db, zino.dp2.di_db,
592 UFS_NDADDR * sizeof(ufs2_daddr_t)) ||
593 memcmp(dp->dp2.di_ib, zino.dp2.di_ib,
594 UFS_NIADDR * sizeof(ufs2_daddr_t)) ||
595 dp->dp2.di_mode || dp->dp2.di_size)
601 ndb = howmany(DIP(dp, di_size), sblock.fs_bsize);
604 if (mode == IFBLK || mode == IFCHR)
608 * Fake ndb value so direct/indirect block checks below
609 * will detect any garbage after symlink string.
611 if (DIP(dp, di_size) < (off_t)sblock.fs_maxsymlinklen) {
612 ndb = howmany(DIP(dp, di_size),
613 sizeof(ufs2_daddr_t));
614 if (ndb > UFS_NDADDR) {
615 j = ndb - UFS_NDADDR;
616 for (ndb = 1; j > 1; j--)
617 ndb *= NINDIR(&sblock);
622 for (j = ndb; ndb < UFS_NDADDR && j < UFS_NDADDR; j++)
623 if (DIP(dp, di_db[j]) != 0)
625 for (j = 0, ndb -= UFS_NDADDR; ndb > 0; j++)
626 ndb /= NINDIR(&sblock);
627 for (; j < UFS_NIADDR; j++)
628 if (DIP(dp, di_ib[j]) != 0)
635 setinodebuf(int cg, ino_t inosused)
639 inum = cg * sblock.fs_ipg;
640 lastvalidinum = inum + inosused - 1;
644 /* Flush old contents in case they have been updated */
645 flush(fswritefd, &inobuf);
647 if (inobuf.b_un.b_buf == NULL) {
648 inobufsize = blkroundup(&sblock,
649 MAX(INOBUFSIZE, sblock.fs_bsize));
650 initbarea(&inobuf, BT_INODES);
651 if ((inobuf.b_un.b_buf = Malloc((unsigned)inobufsize)) == NULL)
652 errx(EEXIT, "cannot allocate space for inode buffer");
654 fullcnt = inobufsize / ((sblock.fs_magic == FS_UFS1_MAGIC) ?
655 sizeof(struct ufs1_dinode) : sizeof(struct ufs2_dinode));
656 readpercg = inosused / fullcnt;
657 partialcnt = inosused % fullcnt;
658 partialsize = fragroundup(&sblock,
659 partialcnt * ((sblock.fs_magic == FS_UFS1_MAGIC) ?
660 sizeof(struct ufs1_dinode) : sizeof(struct ufs2_dinode)));
661 if (partialcnt != 0) {
664 partialcnt = fullcnt;
665 partialsize = inobufsize;
670 freeblock(struct inodesc *idesc)
673 struct bufarea *cgbp;
678 blkno = idesc->id_blkno;
679 if (idesc->id_type == SNAP) {
680 pfatal("clearing a snapshot dinode\n");
683 size = lfragtosize(&sblock, idesc->id_numfrags);
684 if (snapblkfree(&sblock, blkno, size, idesc->id_number,
687 for (nfrags = idesc->id_numfrags; nfrags > 0; blkno++, nfrags--) {
688 if (chkrange(blkno, 1)) {
690 } else if (testbmap(blkno)) {
691 for (dlp = duplist; dlp; dlp = dlp->next) {
692 if (dlp->dup != blkno)
694 dlp->dup = duplist->dup;
696 duplist = duplist->next;
707 * If all successfully returned, account for them.
710 cgbp = cglookup(dtog(&sblock, idesc->id_blkno));
711 cgp = cgbp->b_un.b_cg;
712 if (idesc->id_numfrags == sblock.fs_frag)
713 cgp->cg_cs.cs_nbfree++;
715 cgp->cg_cs.cs_nffree += idesc->id_numfrags;
722 * Prepare a snapshot file for being removed.
725 snapremove(ino_t inum)
727 struct inodesc idesc;
731 for (i = 0; i < snapcnt; i++)
732 if (snaplist[i].i_number == inum)
738 if ((DIP(ip.i_dp, di_flags) & SF_SNAPSHOT) == 0) {
739 printf("snapremove: inode %jd is not a snapshot\n",
746 printf("snapremove: remove %sactive snapshot %jd\n",
747 i == snapcnt ? "in" : "", (intmax_t)inum);
749 * If on active snapshot list, remove it.
752 for (i++; i < FSMAXSNAP; i++) {
753 if (sblock.fs_snapinum[i] == 0)
755 snaplist[i - 1] = snaplist[i];
756 sblock.fs_snapinum[i - 1] = sblock.fs_snapinum[i];
758 sblock.fs_snapinum[i - 1] = 0;
759 bzero(&snaplist[i - 1], sizeof(struct inode));
762 memset(&idesc, 0, sizeof(struct inodesc));
763 idesc.id_type = SNAP;
764 idesc.id_func = snapclean;
765 idesc.id_number = inum;
766 (void)ckinode(ip.i_dp, &idesc);
767 DIP_SET(ip.i_dp, di_flags, DIP(ip.i_dp, di_flags) & ~SF_SNAPSHOT);
773 snapclean(struct inodesc *idesc)
779 blkno = idesc->id_blkno;
784 if (blkno == BLK_NOCOPY || blkno == BLK_SNAP) {
785 if (idesc->id_lbn < UFS_NDADDR) {
786 DIP_SET(dp, di_db[idesc->id_lbn], 0);
789 IBLK_SET(bp, bp->b_index, 0);
797 * Notification that a block is being freed. Return zero if the free
798 * should be allowed to proceed. Return non-zero if the snapshot file
799 * wants to claim the block. The block will be claimed if it is an
800 * uncopied part of one of the snapshots. It will be freed if it is
801 * either a BLK_NOCOPY or has already been copied in all of the snapshots.
802 * If a fragment is being freed, then all snapshots that care about
803 * it must make a copy since a snapshot file can only claim full sized
804 * blocks. Note that if more than one snapshot file maps the block,
805 * we can pick one at random to claim it. Since none of the snapshots
806 * can change, we are assurred that they will all see the same unmodified
807 * image. When deleting a snapshot file (see ino_trunc above), we
808 * must push any of these claimed blocks to one of the other snapshots
809 * that maps it. These claimed blocks are easily identified as they will
810 * have a block number equal to their logical block number within the
811 * snapshot. A copied block can never have this property because they
812 * must always have been allocated from a BLK_NOCOPY location.
815 snapblkfree(struct fs *fs, ufs2_daddr_t bno, long size, ino_t inum,
816 ufs2_daddr_t (*checkblkavail)(ufs2_daddr_t blkno, long frags))
820 struct bufarea *snapbp;
822 ufs2_daddr_t blkno, relblkno;
823 int i, frags, claimedblk, copydone;
825 /* If no snapshots, nothing to do */
829 printf("snapblkfree: in ino %jd free blkno %jd, size %jd\n",
830 (intmax_t)inum, (intmax_t)bno, (intmax_t)size);
831 relblkno = blknum(fs, bno);
832 lbn = fragstoblks(fs, relblkno);
833 /* Direct blocks are always pre-copied */
834 if (lbn < UFS_NDADDR)
838 for (i = 0; i < snapcnt; i++) {
840 * Lookup block being freed.
844 blkno = ino_blkatoff(dp, inum != 0 ? inum : ip.i_number,
845 lbn, &frags, &snapbp);
847 * Check to see if block needs to be copied.
851 * A block that we map is being freed. If it has not
852 * been claimed yet, we will claim or copy it (below).
855 } else if (blkno == BLK_SNAP) {
857 * No previous snapshot claimed the block,
858 * so it will be freed and become a BLK_NOCOPY
859 * (don't care) for us.
862 pfatal("snapblkfree: inconsistent block type");
863 IBLK_SET(snapbp, snapbp->b_index, BLK_NOCOPY);
867 } else /* BLK_NOCOPY or default */ {
869 * If the snapshot has already copied the block
870 * (default), or does not care about the block,
877 * If this is a full size block, we will just grab it
878 * and assign it to the snapshot inode. Otherwise we
879 * will proceed to copy it. See explanation for this
880 * routine as to why only a single snapshot needs to
883 if (size == fs->fs_bsize) {
885 printf("Grabonremove snapshot %ju lbn %jd "
886 "from inum %ju\n", (intmax_t)ip.i_number,
887 (intmax_t)lbn, (uintmax_t)inum);
888 IBLK_SET(snapbp, snapbp->b_index, relblkno);
891 DIP_SET(dp, di_blocks,
892 DIP(dp, di_blocks) + btodb(size));
897 /* First time through, read the contents of the old block. */
900 if (blread(fsreadfd, copybuf, fsbtodb(fs, relblkno),
901 fs->fs_bsize) != 0) {
902 pfatal("Could not read snapshot %ju block "
903 "%jd\n", (intmax_t)ip.i_number,
909 * This allocation will never require any additional
910 * allocations for the snapshot inode.
912 blkno = allocblk(dtog(fs, relblkno), fs->fs_frag,
915 pfatal("Could not allocate block for snapshot %ju\n",
916 (intmax_t)ip.i_number);
920 printf("Copyonremove: snapino %jd lbn %jd for inum %ju "
921 "size %ld new blkno %jd\n", (intmax_t)ip.i_number,
922 (intmax_t)lbn, (uintmax_t)inum, size,
924 blwrite(fswritefd, copybuf, fsbtodb(fs, blkno), fs->fs_bsize);
925 IBLK_SET(snapbp, snapbp->b_index, blkno);
928 DIP_SET(dp, di_blocks,
929 DIP(dp, di_blocks) + btodb(fs->fs_bsize));
936 * Notification that a block is being written. Return if the block
937 * is part of a snapshot as snapshots never track other snapshots.
938 * The block will be copied in all of the snapshots that are tracking
939 * it and have not yet copied it. Some buffers may hold more than one
940 * block. Here we need to check each block in the buffer.
943 copyonwrite(struct fs *fs, struct bufarea *bp,
944 ufs2_daddr_t (*checkblkavail)(ufs2_daddr_t blkno, long frags))
946 ufs2_daddr_t copyblkno;
949 /* If no snapshots, nothing to do. */
952 numblks = blkroundup(fs, bp->b_size) / fs->fs_bsize;
954 prtbuf(bp, "copyonwrite: checking %jd block%s in buffer",
955 (intmax_t)numblks, numblks > 1 ? "s" : "");
956 copyblkno = blknum(fs, dbtofsb(fs, bp->b_bno));
957 for (i = 0; i < numblks; i++) {
958 chkcopyonwrite(fs, copyblkno, checkblkavail);
959 copyblkno += fs->fs_frag;
964 chkcopyonwrite(struct fs *fs, ufs2_daddr_t copyblkno,
965 ufs2_daddr_t (*checkblkavail)(ufs2_daddr_t blkno, long frags))
969 struct bufarea *snapbp;
971 int i, frags, copydone;
974 lbn = fragstoblks(fs, copyblkno);
975 /* Direct blocks are always pre-copied */
976 if (lbn < UFS_NDADDR)
979 for (i = 0; i < snapcnt; i++) {
981 * Lookup block being freed.
985 blkno = ino_blkatoff(dp, ip.i_number, lbn, &frags, &snapbp);
987 * Check to see if block needs to be copied.
991 * A block that we have already copied or don't track.
996 /* First time through, read the contents of the old block. */
999 if (blread(fsreadfd, copybuf, fsbtodb(fs, copyblkno),
1000 fs->fs_bsize) != 0) {
1001 pfatal("Could not read snapshot %ju block "
1002 "%jd\n", (intmax_t)ip.i_number,
1003 (intmax_t)copyblkno);
1008 * This allocation will never require any additional
1009 * allocations for the snapshot inode.
1011 if ((blkno = allocblk(dtog(fs, copyblkno), fs->fs_frag,
1012 checkblkavail)) == 0) {
1013 pfatal("Could not allocate block for snapshot %ju\n",
1014 (intmax_t)ip.i_number);
1018 prtbuf(snapbp, "Copyonwrite: snapino %jd lbn %jd using "
1019 "blkno %ju setting in buffer",
1020 (intmax_t)ip.i_number, (intmax_t)lbn,
1022 blwrite(fswritefd, copybuf, fsbtodb(fs, blkno), fs->fs_bsize);
1023 IBLK_SET(snapbp, snapbp->b_index, blkno);
1026 DIP_SET(dp, di_blocks,
1027 DIP(dp, di_blocks) + btodb(fs->fs_bsize));
1034 * Traverse an inode and check that its block count is correct
1035 * fixing it if necessary.
1038 check_blkcnt(struct inode *ip)
1040 struct inodesc idesc;
1046 memset(&idesc, 0, sizeof(struct inodesc));
1047 idesc.id_func = pass1check;
1048 idesc.id_number = ip->i_number;
1049 idesc.id_type = (DIP(dp, di_flags) & SF_SNAPSHOT) == 0 ? ADDR : SNAP;
1050 (void)ckinode(dp, &idesc);
1051 if (sblock.fs_magic == FS_UFS2_MAGIC && dp->dp2.di_extsize > 0) {
1052 ndb = howmany(dp->dp2.di_extsize, sblock.fs_bsize);
1053 for (j = 0; j < UFS_NXADDR; j++) {
1055 (offset = blkoff(&sblock, dp->dp2.di_extsize)) != 0)
1056 idesc.id_numfrags = numfrags(&sblock,
1057 fragroundup(&sblock, offset));
1059 idesc.id_numfrags = sblock.fs_frag;
1060 if (dp->dp2.di_extb[j] == 0)
1062 idesc.id_blkno = dp->dp2.di_extb[j];
1063 ret = (*idesc.id_func)(&idesc);
1068 idesc.id_entryno *= btodb(sblock.fs_fsize);
1069 if (DIP(dp, di_blocks) != idesc.id_entryno) {
1070 if (!(sujrecovery && preen)) {
1071 pwarn("INCORRECT BLOCK COUNT I=%lu (%ju should be %ju)",
1072 (u_long)idesc.id_number,
1073 (uintmax_t)DIP(dp, di_blocks),
1074 (uintmax_t)idesc.id_entryno);
1076 printf(" (CORRECTED)\n");
1077 else if (reply("CORRECT") == 0)
1080 if (bkgrdflag == 0) {
1081 DIP_SET(dp, di_blocks, idesc.id_entryno);
1084 cmd.value = idesc.id_number;
1085 cmd.size = idesc.id_entryno - DIP(dp, di_blocks);
1087 printf("adjblkcnt ino %ju amount %lld\n",
1088 (uintmax_t)cmd.value, (long long)cmd.size);
1089 if (sysctl(adjblkcnt, MIBSIZE, 0, 0,
1090 &cmd, sizeof cmd) == -1)
1091 rwerror("ADJUST INODE BLOCK COUNT", cmd.value);
1103 * Flush old contents in case they have been updated.
1105 flush(fswritefd, &inobuf);
1106 if (inobuf.b_un.b_buf != NULL)
1107 free((char *)inobuf.b_un.b_buf);
1108 inobuf.b_un.b_buf = NULL;
1109 firstinum = lastinum = 0;
1111 * Reload the snapshot inodes in case any of them changed.
1113 for (i = 0; i < snapcnt; i++) {
1114 bp = snaplist[i].i_bp;
1115 bp->b_errs = blread(fsreadfd, bp->b_un.b_buf, bp->b_bno,
1121 * Routines to maintain information about directory inodes.
1122 * This is built during the first pass and used during the
1123 * second and third passes.
1125 * Enter inodes into the cache.
1128 cacheino(union dinode *dp, ino_t inumber)
1130 struct inoinfo *inp;
1133 if (getinoinfo(inumber) != NULL)
1134 pfatal("cacheino: duplicate entry for ino %jd\n",
1136 if (howmany(DIP(dp, di_size), sblock.fs_bsize) > UFS_NDADDR)
1137 blks = UFS_NDADDR + UFS_NIADDR;
1138 else if (DIP(dp, di_size) > 0)
1139 blks = howmany(DIP(dp, di_size), sblock.fs_bsize);
1142 inp = (struct inoinfo *)
1143 Malloc(sizeof(*inp) + (blks - 1) * sizeof(ufs2_daddr_t));
1145 errx(EEXIT, "cannot increase directory list");
1146 SLIST_INSERT_HEAD(&inphash[inumber % dirhash], inp, i_hash);
1148 inp->i_parent = inumber == UFS_ROOTINO ? UFS_ROOTINO : (ino_t)0;
1149 inp->i_dotdot = (ino_t)0;
1150 inp->i_number = inumber;
1151 inp->i_isize = DIP(dp, di_size);
1152 inp->i_depth = DIP(dp, di_dirdepth);
1153 inp->i_numblks = blks;
1154 for (i = 0; i < MIN(blks, UFS_NDADDR); i++)
1155 inp->i_blks[i] = DIP(dp, di_db[i]);
1156 if (blks > UFS_NDADDR)
1157 for (i = 0; i < UFS_NIADDR; i++)
1158 inp->i_blks[UFS_NDADDR + i] = DIP(dp, di_ib[i]);
1159 if (inplast == listmax) {
1161 inpsort = (struct inoinfo **)reallocarray((char *)inpsort,
1162 listmax, sizeof(struct inoinfo *));
1163 if (inpsort == NULL)
1164 errx(EEXIT, "cannot increase directory list");
1166 inpsort[inplast++] = inp;
1171 * Look up an inode cache structure.
1174 getinoinfo(ino_t inumber)
1176 struct inoinfo *inp;
1178 SLIST_FOREACH(inp, &inphash[inumber % dirhash], i_hash) {
1179 if (inp->i_number != inumber)
1187 * Remove an entry from the inode cache and disk-order sorted list.
1188 * Return 0 on success and 1 on failure.
1191 removecachedino(ino_t inumber)
1193 struct inoinfo *inp, **inpp;
1197 SLIST_FOREACH(inp, &inphash[inumber % dirhash], i_hash) {
1198 if (inp->i_number != inumber)
1200 SLIST_REMOVE(&inphash[inumber % dirhash], inp, inoinfo, i_hash);
1201 for (inpp = &inpsort[inplast - 1]; inpp >= inpsort; inpp--) {
1204 *inpp = inpsort[inplast - 1];
1212 pfatal("removecachedino: entry for ino %jd not found on %s list\n",
1213 (intmax_t)inumber, listtype);
1218 * Clean up all the inode cache structure.
1223 struct inoinfo **inpp;
1225 if (inphash == NULL)
1227 for (inpp = &inpsort[inplast - 1]; inpp >= inpsort; inpp--)
1228 free((char *)(*inpp));
1229 free((char *)inphash);
1231 free((char *)inpsort);
1236 inodirty(struct inode *ip)
1239 if (sblock.fs_magic == FS_UFS2_MAGIC)
1240 ffs_update_dinode_ckhash(&sblock,
1241 (struct ufs2_dinode *)ip->i_dp);
1246 clri(struct inodesc *idesc, const char *type, int flag)
1251 ginode(idesc->id_number, &ip);
1254 pwarn("%s %s", type,
1255 (DIP(dp, di_mode) & IFMT) == IFDIR ? "DIR" : "FILE");
1259 if (preen || reply("CLEAR") == 1) {
1261 printf(" (CLEARED)\n");
1263 if (bkgrdflag == 0) {
1264 if (idesc->id_type == SNAP) {
1265 snapremove(idesc->id_number);
1266 idesc->id_type = ADDR;
1268 (void)ckinode(dp, idesc);
1269 inoinfo(idesc->id_number)->ino_state = USTATE;
1273 cmd.value = idesc->id_number;
1274 cmd.size = -DIP(dp, di_nlink);
1276 printf("adjrefcnt ino %ld amt %lld\n",
1277 (long)cmd.value, (long long)cmd.size);
1278 if (sysctl(adjrefcnt, MIBSIZE, 0, 0,
1279 &cmd, sizeof cmd) == -1)
1280 rwerror("ADJUST INODE", cmd.value);
1287 findname(struct inodesc *idesc)
1289 struct direct *dirp = idesc->id_dirp;
1291 if (dirp->d_ino != idesc->id_parent || idesc->id_entryno < 2) {
1292 idesc->id_entryno++;
1295 memmove(idesc->id_name, dirp->d_name, (size_t)dirp->d_namlen + 1);
1296 return (STOP|FOUND);
1300 findino(struct inodesc *idesc)
1302 struct direct *dirp = idesc->id_dirp;
1304 if (dirp->d_ino == 0)
1306 if (strcmp(dirp->d_name, idesc->id_name) == 0 &&
1307 dirp->d_ino >= UFS_ROOTINO && dirp->d_ino < maxino) {
1308 idesc->id_parent = dirp->d_ino;
1309 return (STOP|FOUND);
1315 clearentry(struct inodesc *idesc)
1317 struct direct *dirp = idesc->id_dirp;
1319 if (dirp->d_ino != idesc->id_parent || idesc->id_entryno < 2) {
1320 idesc->id_entryno++;
1324 return (STOP|FOUND|ALTERED);
1328 prtinode(struct inode *ip)
1336 printf(" I=%lu ", (u_long)ip->i_number);
1337 if (ip->i_number < UFS_ROOTINO || ip->i_number >= maxino)
1340 if ((pw = getpwuid((int)DIP(dp, di_uid))) != NULL)
1341 printf("%s ", pw->pw_name);
1343 printf("%u ", (unsigned)DIP(dp, di_uid));
1344 printf("MODE=%o\n", DIP(dp, di_mode));
1346 printf("%s: ", cdevname);
1347 printf("SIZE=%ju ", (uintmax_t)DIP(dp, di_size));
1348 t = DIP(dp, di_mtime);
1349 if ((p = ctime(&t)) != NULL)
1350 printf("MTIME=%12.12s %4.4s ", &p[4], &p[20]);
1354 blkerror(ino_t ino, const char *type, ufs2_daddr_t blk)
1357 pfatal("%jd %s I=%ju", (intmax_t)blk, type, (uintmax_t)ino);
1359 switch (inoinfo(ino)->ino_state) {
1363 inoinfo(ino)->ino_state = FCLEAR;
1368 inoinfo(ino)->ino_state = DCLEAR;
1376 errx(EEXIT, "BAD STATE %d TO BLKERR", inoinfo(ino)->ino_state);
1382 * allocate an unused inode
1385 allocino(ino_t request, int type)
1390 struct bufarea *cgbp;
1396 request = UFS_ROOTINO;
1398 } else if (inoinfo(request)->ino_state != USTATE)
1401 for (ino = request; ino < maxino; ino++)
1402 if (inoinfo(ino)->ino_state == USTATE)
1406 cg = ino_to_cg(&sblock, ino);
1407 cgbp = cglookup(cg);
1408 cgp = cgbp->b_un.b_cg;
1409 if (!check_cgmagic(cg, cgbp)) {
1412 request = (cg + 1) * sblock.fs_ipg;
1415 setbit(cg_inosused(cgp), ino % sblock.fs_ipg);
1416 cgp->cg_cs.cs_nifree--;
1417 switch (type & IFMT) {
1419 inoinfo(ino)->ino_state = DSTATE;
1420 cgp->cg_cs.cs_ndir++;
1424 inoinfo(ino)->ino_state = FSTATE;
1432 memset(dp, 0, ((sblock.fs_magic == FS_UFS1_MAGIC) ?
1433 sizeof(struct ufs1_dinode) : sizeof(struct ufs2_dinode)));
1434 DIP_SET(dp, di_db[0], allocblk(ino_to_cg(&sblock, ino), (long)1,
1435 std_checkblkavail));
1436 if (DIP(dp, di_db[0]) == 0) {
1437 inoinfo(ino)->ino_state = USTATE;
1442 DIP_SET(dp, di_mode, type);
1443 DIP_SET(dp, di_atime, time(NULL));
1444 DIP_SET(dp, di_ctime, DIP(dp, di_atime));
1445 DIP_SET(dp, di_mtime, DIP(dp, di_ctime));
1446 DIP_SET(dp, di_size, sblock.fs_fsize);
1447 DIP_SET(dp, di_blocks, btodb(sblock.fs_fsize));
1451 inoinfo(ino)->ino_type = IFTODT(type);
1456 * deallocate an inode
1461 struct inodesc idesc;
1465 memset(&idesc, 0, sizeof(struct inodesc));
1466 idesc.id_type = ADDR;
1467 idesc.id_func = freeblock;
1468 idesc.id_number = ino;
1471 (void)ckinode(dp, &idesc);
1475 inoinfo(ino)->ino_state = USTATE;