module/zfs/zpl_super.c

   1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 /*
  22  * Copyright (c) 2011, Lawrence Livermore National Security, LLC.
  23  */
  24
  25
  26 #include <sys/zfs_vfsops.h>
  27 #include <sys/zfs_vnops.h>
  28 #include <sys/zfs_znode.h>
  29 #include <sys/zfs_ctldir.h>
  30 #include <sys/zpl.h>
  31
  32
  33 static struct inode *
  34 zpl_inode_alloc(struct super_block *sb)
  35 {
  36         struct inode *ip;
  37
  38         VERIFY3S(zfs_inode_alloc(sb, &ip), ==, 0);
  39         ip->i_version = 1;
  40
  41         return (ip);
  42 }
  43
  44 static void
  45 zpl_inode_destroy(struct inode *ip)
  46 {
  47         ASSERT(atomic_read(&ip->i_count) == 0);
  48         zfs_inode_destroy(ip);
  49 }
  50
  51 /*
  52  * Called from __mark_inode_dirty() to reflect that something in the
  53  * inode has changed.  We use it to ensure the znode system attributes
  54  * are always strictly update to date with respect to the inode.
  55  */
  56 #ifdef HAVE_DIRTY_INODE_WITH_FLAGS
  57 static void
  58 zpl_dirty_inode(struct inode *ip, int flags)
  59 {
  60         zfs_dirty_inode(ip, flags);
  61 }
  62 #else
  63 static void
  64 zpl_dirty_inode(struct inode *ip)
  65 {
  66         zfs_dirty_inode(ip, 0);
  67 }
  68 #endif /* HAVE_DIRTY_INODE_WITH_FLAGS */
  69
  70 /*
  71  * When ->drop_inode() is called its return value indicates if the
  72  * inode should be evicted from the inode cache.  If the inode is
  73  * unhashed and has no links the default policy is to evict it
  74  * immediately.
  75  *
  76  * Prior to 2.6.36 this eviction was accomplished by the vfs calling
  77  * ->delete_inode().  It was ->delete_inode()'s responsibility to
  78  * truncate the inode pages and call clear_inode().  The call to
  79  * clear_inode() synchronously invalidates all the buffers and
  80  * calls ->clear_inode().  It was ->clear_inode()'s responsibility
  81  * to cleanup and filesystem specific data before freeing the inode.
  82  *
  83  * This elaborate mechanism was replaced by ->evict_inode() which
  84  * does the job of both ->delete_inode() and ->clear_inode().  It
  85  * will be called exactly once, and when it returns the inode must
  86  * be in a state where it can simply be freed.i
  87  *
  88  * The ->evict_inode() callback must minimally truncate the inode pages,
  89  * and call clear_inode().  For 2.6.35 and later kernels this will
  90  * simply update the inode state, with the sync occurring before the
  91  * truncate in evict().  For earlier kernels clear_inode() maps to
  92  * end_writeback() which is responsible for completing all outstanding
  93  * write back.  In either case, once this is done it is safe to cleanup
  94  * any remaining inode specific data via zfs_inactive().
  95  * remaining filesystem specific data.
  96  */
  97 #ifdef HAVE_EVICT_INODE
  98 static void
  99 zpl_evict_inode(struct inode *ip)
 100 {
 101         truncate_setsize(ip, 0);
 102         clear_inode(ip);
 103         zfs_inactive(ip);
 104 }
 105
 106 #else
 107
 108 static void
 109 zpl_clear_inode(struct inode *ip)
 110 {
 111         zfs_inactive(ip);
 112 }
 113
 114 static void
 115 zpl_inode_delete(struct inode *ip)
 116 {
 117         truncate_setsize(ip, 0);
 118         clear_inode(ip);
 119 }
 120
 121 #endif /* HAVE_EVICT_INODE */
 122
 123 static void
 124 zpl_put_super(struct super_block *sb)
 125 {
 126         int error;
 127
 128         error = -zfs_umount(sb);
 129         ASSERT3S(error, <=, 0);
 130 }
 131
 132 static int
 133 zpl_sync_fs(struct super_block *sb, int wait)
 134 {
 135         cred_t *cr = CRED();
 136         int error;
 137
 138         crhold(cr);
 139         error = -zfs_sync(sb, wait, cr);
 140         crfree(cr);
 141         ASSERT3S(error, <=, 0);
 142
 143         return (error);
 144 }
 145
 146 static int
 147 zpl_statfs(struct dentry *dentry, struct kstatfs *statp)
 148 {
 149         int error;
 150
 151         error = -zfs_statvfs(dentry, statp);
 152         ASSERT3S(error, <=, 0);
 153
 154         return (error);
 155 }
 156
 157 static int
 158 zpl_remount_fs(struct super_block *sb, int *flags, char *data)
 159 {
 160         int error;
 161         error = -zfs_remount(sb, flags, data);
 162         ASSERT3S(error, <=, 0);
 163
 164         return (error);
 165 }
 166
 167 static void
 168 zpl_umount_begin(struct super_block *sb)
 169 {
 170         zfs_sb_t *zsb = sb->s_fs_info;
 171         int count;
 172
 173         /*
 174          * Best effort to unmount snapshots in .zfs/snapshot/.  Normally this
 175          * isn't required because snapshots have the MNT_SHRINKABLE flag set.
 176          */
 177         if (zsb->z_ctldir)
 178                 (void) zfsctl_unmount_snapshots(zsb, MNT_FORCE, &count);
 179 }
 180
 181 /*
 182  * ZFS specific features must be explicitly handled here, the VFS will
 183  * automatically handled the following generic functionality.
 184  *
 185  *   MNT_NOSUID,
 186  *   MNT_NODEV,
 187  *   MNT_NOEXEC,
 188  *   MNT_NOATIME,
 189  *   MNT_NODIRATIME,
 190  *   MNT_READONLY,
 191  *   MNT_STRICTATIME,
 192  *   MS_SYNCHRONOUS,
 193  *   MS_DIRSYNC,
 194  *   MS_MANDLOCK.
 195  */
 196 static int
 197 __zpl_show_options(struct seq_file *seq, zfs_sb_t *zsb)
 198 {
 199         seq_printf(seq, ",%s", zsb->z_flags & ZSB_XATTR ? "xattr" : "noxattr");
 200
 201 #ifdef CONFIG_FS_POSIX_ACL
 202         switch (zsb->z_acl_type) {
 203         case ZFS_ACLTYPE_POSIXACL:
 204                 seq_puts(seq, ",posixacl");
 205                 break;
 206         default:
 207                 seq_puts(seq, ",noacl");
 208                 break;
 209         }
 210 #endif /* CONFIG_FS_POSIX_ACL */
 211
 212         return (0);
 213 }
 214
 215 #ifdef HAVE_SHOW_OPTIONS_WITH_DENTRY
 216 static int
 217 zpl_show_options(struct seq_file *seq, struct dentry *root)
 218 {
 219         return __zpl_show_options(seq, root->d_sb->s_fs_info);
 220 }
 221 #else
 222 static int
 223 zpl_show_options(struct seq_file *seq, struct vfsmount *vfsp)
 224 {
 225         return __zpl_show_options(seq, vfsp->mnt_sb->s_fs_info);
 226 }
 227 #endif /* HAVE_SHOW_OPTIONS_WITH_DENTRY */
 228
 229 static int
 230 zpl_fill_super(struct super_block *sb, void *data, int silent)
 231 {
 232         int error;
 233
 234         error = -zfs_domount(sb, data, silent);
 235         ASSERT3S(error, <=, 0);
 236
 237         return (error);
 238 }
 239
 240 #ifdef HAVE_MOUNT_NODEV
 241 static struct dentry *
 242 zpl_mount(struct file_system_type *fs_type, int flags,
 243     const char *osname, void *data)
 244 {
 245         zpl_mount_data_t zmd = { osname, data };
 246
 247         return mount_nodev(fs_type, flags, &zmd, zpl_fill_super);
 248 }
 249 #else
 250 static int
 251 zpl_get_sb(struct file_system_type *fs_type, int flags,
 252     const char *osname, void *data, struct vfsmount *mnt)
 253 {
 254         zpl_mount_data_t zmd = { osname, data };
 255
 256         return get_sb_nodev(fs_type, flags, &zmd, zpl_fill_super, mnt);
 257 }
 258 #endif /* HAVE_MOUNT_NODEV */
 259
 260 static void
 261 zpl_kill_sb(struct super_block *sb)
 262 {
 263         zfs_preumount(sb);
 264         kill_anon_super(sb);
 265
 266 #ifdef HAVE_S_INSTANCES_LIST_HEAD
 267         sb->s_instances.next = &(zpl_fs_type.fs_supers);
 268 #endif /* HAVE_S_INSTANCES_LIST_HEAD */
 269 }
 270
 271 #ifdef HAVE_SHRINK
 272 /*
 273  * Linux 3.1 - 3.x API
 274  *
 275  * The Linux 3.1 API introduced per-sb cache shrinkers to replace the
 276  * global ones.  This allows us a mechanism to cleanly target a specific
 277  * zfs file system when the dnode and inode caches grow too large.
 278  *
 279  * In addition, the 3.0 kernel added the iterate_supers_type() helper
 280  * function which is used to safely walk all of the zfs file systems.
 281  */
 282 static void
 283 zpl_prune_sb(struct super_block *sb, void *arg)
 284 {
 285         int objects = 0;
 286         int error;
 287
 288         error = -zfs_sb_prune(sb, *(unsigned long *)arg, &objects);
 289         ASSERT3S(error, <=, 0);
 290
 291         return;
 292 }
 293
 294 void
 295 zpl_prune_sbs(int64_t bytes_to_scan, void *private)
 296 {
 297         unsigned long nr_to_scan = (bytes_to_scan / sizeof(znode_t));
 298
 299         iterate_supers_type(&zpl_fs_type, zpl_prune_sb, &nr_to_scan);
 300         kmem_reap();
 301 }
 302 #else
 303 /*
 304  * Linux 2.6.x - 3.0 API
 305  *
 306  * These are best effort interfaces are provided by the SPL to induce
 307  * the Linux VM subsystem to reclaim a fraction of the both dnode and
 308  * inode caches.  Ideally, we want to just target the zfs file systems
 309  * however our only option is to reclaim from them all.
 310  */
 311 void
 312 zpl_prune_sbs(int64_t bytes_to_scan, void *private)
 313 {
 314         unsigned long nr_to_scan = (bytes_to_scan / sizeof(znode_t));
 315
 316         shrink_dcache_memory(nr_to_scan, GFP_KERNEL);
 317         shrink_icache_memory(nr_to_scan, GFP_KERNEL);
 318         kmem_reap();
 319 }
 320 #endif /* HAVE_SHRINK */
 321
 322 #ifdef HAVE_NR_CACHED_OBJECTS
 323 static int
 324 zpl_nr_cached_objects(struct super_block *sb)
 325 {
 326         zfs_sb_t *zsb = sb->s_fs_info;
 327         int nr;
 328
 329         mutex_enter(&zsb->z_znodes_lock);
 330         nr = zsb->z_nr_znodes;
 331         mutex_exit(&zsb->z_znodes_lock);
 332
 333         return (nr);
 334 }
 335 #endif /* HAVE_NR_CACHED_OBJECTS */
 336
 337 #ifdef HAVE_FREE_CACHED_OBJECTS
 338 /*
 339  * Attempt to evict some meta data from the cache.  The ARC operates in
 340  * terms of bytes while the Linux VFS uses objects.  Now because this is
 341  * just a best effort eviction and the exact values aren't critical so we
 342  * extrapolate from an object count to a byte size using the znode_t size.
 343  */
 344 static void
 345 zpl_free_cached_objects(struct super_block *sb, int nr_to_scan)
 346 {
 347         arc_adjust_meta(nr_to_scan * sizeof(znode_t), B_FALSE);
 348 }
 349 #endif /* HAVE_FREE_CACHED_OBJECTS */
 350
 351 const struct super_operations zpl_super_operations = {
 352         .alloc_inode            = zpl_inode_alloc,
 353         .destroy_inode          = zpl_inode_destroy,
 354         .dirty_inode            = zpl_dirty_inode,
 355         .write_inode            = NULL,
 356         .drop_inode             = NULL,
 357 #ifdef HAVE_EVICT_INODE
 358         .evict_inode            = zpl_evict_inode,
 359 #else
 360         .clear_inode            = zpl_clear_inode,
 361         .delete_inode           = zpl_inode_delete,
 362 #endif /* HAVE_EVICT_INODE */
 363         .put_super              = zpl_put_super,
 364         .sync_fs                = zpl_sync_fs,
 365         .statfs                 = zpl_statfs,
 366         .remount_fs             = zpl_remount_fs,
 367         .umount_begin           = zpl_umount_begin,
 368         .show_options           = zpl_show_options,
 369         .show_stats             = NULL,
 370 #ifdef HAVE_NR_CACHED_OBJECTS
 371         .nr_cached_objects      = zpl_nr_cached_objects,
 372 #endif /* HAVE_NR_CACHED_OBJECTS */
 373 #ifdef HAVE_FREE_CACHED_OBJECTS
 374         .free_cached_objects    = zpl_free_cached_objects,
 375 #endif /* HAVE_FREE_CACHED_OBJECTS */
 376 };
 377
 378 struct file_system_type zpl_fs_type = {
 379         .owner                  = THIS_MODULE,
 380         .name                   = ZFS_DRIVER,
 381 #ifdef HAVE_MOUNT_NODEV
 382         .mount                  = zpl_mount,
 383 #else
 384         .get_sb                 = zpl_get_sb,
 385 #endif /* HAVE_MOUNT_NODEV */
 386         .kill_sb                = zpl_kill_sb,
 387 };