sys/boot/zfs/zfs.c

   1 /*-
   2  * Copyright (c) 2007 Doug Rabson
   3  * All rights reserved.
   4  *
   5  * Redistribution and use in source and binary forms, with or without
   6  * modification, are permitted provided that the following conditions
   7  * are met:
   8  * 1. Redistributions of source code must retain the above copyright
   9  *    notice, this list of conditions and the following disclaimer.
  10  * 2. Redistributions in binary form must reproduce the above copyright
  11  *    notice, this list of conditions and the following disclaimer in the
  12  *    documentation and/or other materials provided with the distribution.
  13  *
  14  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  15  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  16  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  17  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  18  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  19  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  20  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  21  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  22  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  23  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  24  * SUCH DAMAGE.
  25  *
  26  *      $FreeBSD$
  27  */
  28
  29 #include <sys/cdefs.h>
  30 __FBSDID("$FreeBSD$");
  31
  32 /*
  33  *      Stand-alone file reading package.
  34  */
  35
  36 #include <sys/param.h>
  37 #include <sys/disklabel.h>
  38 #include <sys/time.h>
  39 #include <sys/queue.h>
  40 #include <stddef.h>
  41 #include <stdarg.h>
  42 #include <string.h>
  43 #include <stand.h>
  44 #include <bootstrap.h>
  45
  46 #include "zfsimpl.c"
  47
  48 #define MAXBDDEV        31
  49
  50 static int      zfs_open(const char *path, struct open_file *f);
  51 static int      zfs_write(struct open_file *f, void *buf, size_t size, size_t *resid);
  52 static int      zfs_close(struct open_file *f);
  53 static int      zfs_read(struct open_file *f, void *buf, size_t size, size_t *resid);
  54 static off_t    zfs_seek(struct open_file *f, off_t offset, int where);
  55 static int      zfs_stat(struct open_file *f, struct stat *sb);
  56 static int      zfs_readdir(struct open_file *f, struct dirent *d);
  57
  58 struct devsw zfs_dev;
  59
  60 struct fs_ops zfs_fsops = {
  61         "zfs",
  62         zfs_open,
  63         zfs_close,
  64         zfs_read,
  65         zfs_write,
  66         zfs_seek,
  67         zfs_stat,
  68         zfs_readdir
  69 };
  70
  71 /*
  72  * In-core open file.
  73  */
  74 struct file {
  75         off_t           f_seekp;        /* seek pointer */
  76         dnode_phys_t    f_dnode;
  77         uint64_t        f_zap_type;     /* zap type for readdir */
  78         uint64_t        f_num_leafs;    /* number of fzap leaf blocks */
  79         zap_leaf_phys_t *f_zap_leaf;    /* zap leaf buffer */
  80 };
  81
  82 /*
  83  * Open a file.
  84  */
  85 static int
  86 zfs_open(const char *upath, struct open_file *f)
  87 {
  88         spa_t *spa = (spa_t *) f->f_devdata;
  89         struct file *fp;
  90         int rc;
  91
  92         if (f->f_dev != &zfs_dev)
  93                 return (EINVAL);
  94
  95         rc = zfs_mount_pool(spa);
  96         if (rc)
  97                 return (rc);
  98
  99         /* allocate file system specific data structure */
 100         fp = malloc(sizeof(struct file));
 101         bzero(fp, sizeof(struct file));
 102         f->f_fsdata = (void *)fp;
 103
 104         if (spa->spa_root_objset.os_type != DMU_OST_ZFS) {
 105                 printf("Unexpected object set type %llu\n",
 106                     spa->spa_root_objset.os_type);
 107                 rc = EIO;
 108                 goto out;
 109         }
 110
 111         rc = zfs_lookup(spa, upath, &fp->f_dnode);
 112         if (rc)
 113                 goto out;
 114
 115         fp->f_seekp = 0;
 116 out:
 117         if (rc) {
 118                 f->f_fsdata = NULL;
 119                 free(fp);
 120         }
 121         return (rc);
 122 }
 123
 124 static int
 125 zfs_close(struct open_file *f)
 126 {
 127         struct file *fp = (struct file *)f->f_fsdata;
 128
 129         dnode_cache_obj = 0;
 130         f->f_fsdata = (void *)0;
 131         if (fp == (struct file *)0)
 132                 return (0);
 133
 134         free(fp);
 135         return (0);
 136 }
 137
 138 /*
 139  * Copy a portion of a file into kernel memory.
 140  * Cross block boundaries when necessary.
 141  */
 142 static int
 143 zfs_read(struct open_file *f, void *start, size_t size, size_t *resid   /* out */)
 144 {
 145         spa_t *spa = (spa_t *) f->f_devdata;
 146         struct file *fp = (struct file *)f->f_fsdata;
 147         struct stat sb;
 148         size_t n;
 149         int rc;
 150
 151         rc = zfs_stat(f, &sb);
 152         if (rc)
 153                 return (rc);
 154         n = size;
 155         if (fp->f_seekp + n > sb.st_size)
 156                 n = sb.st_size - fp->f_seekp;
 157
 158         rc = dnode_read(spa, &fp->f_dnode, fp->f_seekp, start, n);
 159         if (rc)
 160                 return (rc);
 161
 162         if (0) {
 163             int i;
 164             for (i = 0; i < n; i++)
 165                 putchar(((char*) start)[i]);
 166         }
 167         fp->f_seekp += n;
 168         if (resid)
 169                 *resid = size - n;
 170
 171         return (0);
 172 }
 173
 174 /*
 175  * Don't be silly - the bootstrap has no business writing anything.
 176  */
 177 static int
 178 zfs_write(struct open_file *f, void *start, size_t size, size_t *resid  /* out */)
 179 {
 180
 181         return (EROFS);
 182 }
 183
 184 static off_t
 185 zfs_seek(struct open_file *f, off_t offset, int where)
 186 {
 187         struct file *fp = (struct file *)f->f_fsdata;
 188
 189         switch (where) {
 190         case SEEK_SET:
 191                 fp->f_seekp = offset;
 192                 break;
 193         case SEEK_CUR:
 194                 fp->f_seekp += offset;
 195                 break;
 196         case SEEK_END:
 197             {
 198                 struct stat sb;
 199                 int error;
 200
 201                 error = zfs_stat(f, &sb);
 202                 if (error != 0) {
 203                         errno = error;
 204                         return (-1);
 205                 }
 206                 fp->f_seekp = sb.st_size - offset;
 207                 break;
 208             }
 209         default:
 210                 errno = EINVAL;
 211                 return (-1);
 212         }
 213         return (fp->f_seekp);
 214 }
 215
 216 static int
 217 zfs_stat(struct open_file *f, struct stat *sb)
 218 {
 219         spa_t *spa = (spa_t *) f->f_devdata;
 220         struct file *fp = (struct file *)f->f_fsdata;
 221
 222         return (zfs_dnode_stat(spa, &fp->f_dnode, sb));
 223 }
 224
 225 static int
 226 zfs_readdir(struct open_file *f, struct dirent *d)
 227 {
 228         spa_t *spa = (spa_t *) f->f_devdata;
 229         struct file *fp = (struct file *)f->f_fsdata;
 230         mzap_ent_phys_t mze;
 231         struct stat sb;
 232         size_t bsize = fp->f_dnode.dn_datablkszsec << SPA_MINBLOCKSHIFT;
 233         int rc;
 234
 235         rc = zfs_stat(f, &sb);
 236         if (rc)
 237                 return (rc);
 238         if (!S_ISDIR(sb.st_mode))
 239                 return (ENOTDIR);
 240
 241         /*
 242          * If this is the first read, get the zap type.
 243          */
 244         if (fp->f_seekp == 0) {
 245                 rc = dnode_read(spa, &fp->f_dnode,
 246                                 0, &fp->f_zap_type, sizeof(fp->f_zap_type));
 247                 if (rc)
 248                         return (rc);
 249
 250                 if (fp->f_zap_type == ZBT_MICRO) {
 251                         fp->f_seekp = offsetof(mzap_phys_t, mz_chunk);
 252                 } else {
 253                         rc = dnode_read(spa, &fp->f_dnode,
 254                                         offsetof(zap_phys_t, zap_num_leafs),
 255                                         &fp->f_num_leafs,
 256                                         sizeof(fp->f_num_leafs));
 257                         if (rc)
 258                                 return (rc);
 259
 260                         fp->f_seekp = bsize;
 261                         fp->f_zap_leaf = (zap_leaf_phys_t *)malloc(bsize);
 262                         rc = dnode_read(spa, &fp->f_dnode,
 263                                         fp->f_seekp,
 264                                         fp->f_zap_leaf,
 265                                         bsize);
 266                         if (rc)
 267                                 return (rc);
 268                 }
 269         }
 270
 271         if (fp->f_zap_type == ZBT_MICRO) {
 272         mzap_next:
 273                 if (fp->f_seekp >= bsize)
 274                         return (ENOENT);
 275
 276                 rc = dnode_read(spa, &fp->f_dnode,
 277                                 fp->f_seekp, &mze, sizeof(mze));
 278                 if (rc)
 279                         return (rc);
 280                 fp->f_seekp += sizeof(mze);
 281
 282                 if (!mze.mze_name[0])
 283                         goto mzap_next;
 284
 285                 d->d_fileno = ZFS_DIRENT_OBJ(mze.mze_value);
 286                 d->d_type = ZFS_DIRENT_TYPE(mze.mze_value);
 287                 strcpy(d->d_name, mze.mze_name);
 288                 d->d_namlen = strlen(d->d_name);
 289                 return (0);
 290         } else {
 291                 zap_leaf_t zl;
 292                 zap_leaf_chunk_t *zc, *nc;
 293                 int chunk;
 294                 size_t namelen;
 295                 char *p;
 296                 uint64_t value;
 297
 298                 /*
 299                  * Initialise this so we can use the ZAP size
 300                  * calculating macros.
 301                  */
 302                 zl.l_bs = ilog2(bsize);
 303                 zl.l_phys = fp->f_zap_leaf;
 304
 305                 /*
 306                  * Figure out which chunk we are currently looking at
 307                  * and consider seeking to the next leaf. We use the
 308                  * low bits of f_seekp as a simple chunk index.
 309                  */
 310         fzap_next:
 311                 chunk = fp->f_seekp & (bsize - 1);
 312                 if (chunk == ZAP_LEAF_NUMCHUNKS(&zl)) {
 313                         fp->f_seekp = (fp->f_seekp & ~(bsize - 1)) + bsize;
 314                         chunk = 0;
 315
 316                         /*
 317                          * Check for EOF and read the new leaf.
 318                          */
 319                         if (fp->f_seekp >= bsize * fp->f_num_leafs)
 320                                 return (ENOENT);
 321
 322                         rc = dnode_read(spa, &fp->f_dnode,
 323                                         fp->f_seekp,
 324                                         fp->f_zap_leaf,
 325                                         bsize);
 326                         if (rc)
 327                                 return (rc);
 328                 }
 329
 330                 zc = &ZAP_LEAF_CHUNK(&zl, chunk);
 331                 fp->f_seekp++;
 332                 if (zc->l_entry.le_type != ZAP_CHUNK_ENTRY)
 333                         goto fzap_next;
 334
 335                 namelen = zc->l_entry.le_name_length;
 336                 if (namelen > sizeof(d->d_name))
 337                         namelen = sizeof(d->d_name);
 338
 339                 /*
 340                  * Paste the name back together.
 341                  */
 342                 nc = &ZAP_LEAF_CHUNK(&zl, zc->l_entry.le_name_chunk);
 343                 p = d->d_name;
 344                 while (namelen > 0) {
 345                         int len;
 346                         len = namelen;
 347                         if (len > ZAP_LEAF_ARRAY_BYTES)
 348                                 len = ZAP_LEAF_ARRAY_BYTES;
 349                         memcpy(p, nc->l_array.la_array, len);
 350                         p += len;
 351                         namelen -= len;
 352                         nc = &ZAP_LEAF_CHUNK(&zl, nc->l_array.la_next);
 353                 }
 354                 d->d_name[sizeof(d->d_name) - 1] = 0;
 355
 356                 /*
 357                  * Assume the first eight bytes of the value are
 358                  * a uint64_t.
 359                  */
 360                 value = fzap_leaf_value(&zl, zc);
 361
 362                 d->d_fileno = ZFS_DIRENT_OBJ(value);
 363                 d->d_type = ZFS_DIRENT_TYPE(value);
 364                 d->d_namlen = strlen(d->d_name);
 365
 366                 return (0);
 367         }
 368 }
 369
 370 static int
 371 vdev_read(vdev_t *vdev, void *priv, off_t offset, void *buf, size_t size)
 372 {
 373         int fd;
 374
 375         fd = (uintptr_t) priv;
 376         lseek(fd, offset, SEEK_SET);
 377         if (read(fd, buf, size) == size) {
 378                 return 0;
 379         } else {
 380                 return (EIO);
 381         }
 382 }
 383
 384 /*
 385  * Convert a pool guid to a 'unit number' suitable for use with zfs_dev_open.
 386  */
 387 int
 388 zfs_guid_to_unit(uint64_t guid)
 389 {
 390         spa_t *spa;
 391         int unit;
 392
 393         unit = 0;
 394         STAILQ_FOREACH(spa, &zfs_pools, spa_link) {
 395                 if (spa->spa_guid == guid)
 396                         return unit;
 397                 unit++;
 398         }
 399         return (-1);
 400 }
 401
 402 static int
 403 zfs_dev_init(void)
 404 {
 405         char devname[512];
 406         int unit, slice;
 407         int fd;
 408
 409         /*
 410          * Open all the disks we can find and see if we can reconstruct
 411          * ZFS pools from them. Bogusly assumes that the disks are named
 412          * diskN, diskNpM or diskNsM.
 413          */
 414         zfs_init();
 415         for (unit = 0; unit < MAXBDDEV; unit++) {
 416                 sprintf(devname, "disk%d:", unit);
 417                 fd = open(devname, O_RDONLY);
 418                 if (fd == -1)
 419                         continue;
 420
 421                 /*
 422                  * If we find a vdev, the zfs code will eat the fd, otherwise
 423                  * we close it.
 424                  */
 425                 if (vdev_probe(vdev_read, (void*) (uintptr_t) fd, 0))
 426                         close(fd);
 427
 428                 for (slice = 1; slice <= 128; slice++) {
 429                         sprintf(devname, "disk%dp%d:", unit, slice);
 430                         fd = open(devname, O_RDONLY);
 431                         if (fd == -1) {
 432                                 sprintf(devname, "disk%ds%d:", unit, slice);
 433                                 fd = open(devname, O_RDONLY);
 434                                 if (fd == -1)
 435                                         continue;
 436                         }
 437                         if (vdev_probe(vdev_read, (void*) (uintptr_t) fd, 0))
 438                                 close(fd);
 439                 }
 440         }
 441
 442         return (0);
 443 }
 444
 445 /*
 446  * Print information about ZFS pools
 447  */
 448 static void
 449 zfs_dev_print(int verbose)
 450 {
 451         spa_t *spa;
 452         char line[80];
 453         int unit;
 454
 455         if (verbose) {
 456                 spa_all_status();
 457                 return;
 458         }
 459         unit = 0;
 460         STAILQ_FOREACH(spa, &zfs_pools, spa_link) {
 461                 sprintf(line, "    zfs%d:   %s\n", unit, spa->spa_name);
 462                 pager_output(line);
 463                 unit++;
 464         }
 465 }
 466
 467 /*
 468  * Attempt to open the pool described by (dev) for use by (f).
 469  */
 470 static int
 471 zfs_dev_open(struct open_file *f, ...)
 472 {
 473         va_list         args;
 474         struct devdesc  *dev;
 475         int             unit, i;
 476         spa_t           *spa;
 477
 478         va_start(args, f);
 479         dev = va_arg(args, struct devdesc*);
 480         va_end(args);
 481
 482         /*
 483          * We mostly ignore the stuff that devopen sends us. For now,
 484          * use the unit to find a pool - later we will override the
 485          * devname parsing so that we can name a pool and a fs within
 486          * the pool.
 487          */
 488         unit = dev->d_unit;
 489
 490         i = 0;
 491         STAILQ_FOREACH(spa, &zfs_pools, spa_link) {
 492                 if (i == unit)
 493                         break;
 494                 i++;
 495         }
 496         if (!spa) {
 497                 return (ENXIO);
 498         }
 499
 500         f->f_devdata = spa;
 501         free(dev);
 502         return (0);
 503 }
 504
 505 static int
 506 zfs_dev_close(struct open_file *f)
 507 {
 508
 509         f->f_devdata = NULL;
 510         return (0);
 511 }
 512
 513 static int
 514 zfs_dev_strategy(void *devdata, int rw, daddr_t dblk, size_t size, char *buf, size_t *rsize)
 515 {
 516
 517         return (ENOSYS);
 518 }
 519
 520 struct devsw zfs_dev = {
 521         .dv_name = "zfs",
 522         .dv_type = DEVT_ZFS,
 523         .dv_init = zfs_dev_init,
 524         .dv_strategy = zfs_dev_strategy,
 525         .dv_open = zfs_dev_open,
 526         .dv_close = zfs_dev_close,
 527         .dv_ioctl = noioctl,
 528         .dv_print = zfs_dev_print,
 529         .dv_cleanup = NULL
 530 };