sys/boot/zfs/zfs.c

   1 /*-
   2  * Copyright (c) 2007 Doug Rabson
   3  * All rights reserved.
   4  *
   5  * Redistribution and use in source and binary forms, with or without
   6  * modification, are permitted provided that the following conditions
   7  * are met:
   8  * 1. Redistributions of source code must retain the above copyright
   9  *    notice, this list of conditions and the following disclaimer.
  10  * 2. Redistributions in binary form must reproduce the above copyright
  11  *    notice, this list of conditions and the following disclaimer in the
  12  *    documentation and/or other materials provided with the distribution.
  13  *
  14  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  15  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  16  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  17  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  18  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  19  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  20  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  21  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  22  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  23  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  24  * SUCH DAMAGE.
  25  *
  26  *      $FreeBSD$
  27  */
  28
  29 #include <sys/cdefs.h>
  30 __FBSDID("$FreeBSD$");
  31
  32 /*
  33  *      Stand-alone file reading package.
  34  */
  35
  36 #include <sys/param.h>
  37 #include <sys/disklabel.h>
  38 #include <sys/time.h>
  39 #include <sys/queue.h>
  40 #include <stddef.h>
  41 #include <stdarg.h>
  42 #include <string.h>
  43 #include <stand.h>
  44 #include <bootstrap.h>
  45
  46 #include "zfsimpl.c"
  47
  48 static int      zfs_open(const char *path, struct open_file *f);
  49 static int      zfs_write(struct open_file *f, void *buf, size_t size, size_t *resid);
  50 static int      zfs_close(struct open_file *f);
  51 static int      zfs_read(struct open_file *f, void *buf, size_t size, size_t *resid);
  52 static off_t    zfs_seek(struct open_file *f, off_t offset, int where);
  53 static int      zfs_stat(struct open_file *f, struct stat *sb);
  54 static int      zfs_readdir(struct open_file *f, struct dirent *d);
  55
  56 struct devsw zfs_dev;
  57
  58 struct fs_ops zfs_fsops = {
  59         "zfs",
  60         zfs_open,
  61         zfs_close,
  62         zfs_read,
  63         zfs_write,
  64         zfs_seek,
  65         zfs_stat,
  66         zfs_readdir
  67 };
  68
  69 /*
  70  * In-core open file.
  71  */
  72 struct file {
  73         off_t           f_seekp;        /* seek pointer */
  74         dnode_phys_t    f_dnode;
  75         uint64_t        f_zap_type;     /* zap type for readdir */
  76         uint64_t        f_num_leafs;    /* number of fzap leaf blocks */
  77         zap_leaf_phys_t *f_zap_leaf;    /* zap leaf buffer */
  78 };
  79
  80 /*
  81  * Open a file.
  82  */
  83 static int
  84 zfs_open(const char *upath, struct open_file *f)
  85 {
  86         spa_t *spa = (spa_t *) f->f_devdata;
  87         struct file *fp;
  88         int rc;
  89
  90         if (f->f_dev != &zfs_dev)
  91                 return (EINVAL);
  92
  93         rc = zfs_mount_pool(spa);
  94         if (rc)
  95                 return (rc);
  96
  97         /* allocate file system specific data structure */
  98         fp = malloc(sizeof(struct file));
  99         bzero(fp, sizeof(struct file));
 100         f->f_fsdata = (void *)fp;
 101
 102         if (spa->spa_root_objset.os_type != DMU_OST_ZFS) {
 103                 printf("Unexpected object set type %llu\n",
 104                     spa->spa_root_objset.os_type);
 105                 rc = EIO;
 106                 goto out;
 107         }
 108
 109         rc = zfs_lookup(spa, upath, &fp->f_dnode);
 110         if (rc)
 111                 goto out;
 112
 113         fp->f_seekp = 0;
 114 out:
 115         if (rc) {
 116                 f->f_fsdata = NULL;
 117                 free(fp);
 118         }
 119         return (rc);
 120 }
 121
 122 static int
 123 zfs_close(struct open_file *f)
 124 {
 125         struct file *fp = (struct file *)f->f_fsdata;
 126
 127         dnode_cache_obj = 0;
 128         f->f_fsdata = (void *)0;
 129         if (fp == (struct file *)0)
 130                 return (0);
 131
 132         free(fp);
 133         return (0);
 134 }
 135
 136 /*
 137  * Copy a portion of a file into kernel memory.
 138  * Cross block boundaries when necessary.
 139  */
 140 static int
 141 zfs_read(struct open_file *f, void *start, size_t size, size_t *resid   /* out */)
 142 {
 143         spa_t *spa = (spa_t *) f->f_devdata;
 144         struct file *fp = (struct file *)f->f_fsdata;
 145         struct stat sb;
 146         size_t n;
 147         int rc;
 148
 149         rc = zfs_stat(f, &sb);
 150         if (rc)
 151                 return (rc);
 152         n = size;
 153         if (fp->f_seekp + n > sb.st_size)
 154                 n = sb.st_size - fp->f_seekp;
 155
 156         rc = dnode_read(spa, &fp->f_dnode, fp->f_seekp, start, n);
 157         if (rc)
 158                 return (rc);
 159
 160         if (0) {
 161             int i;
 162             for (i = 0; i < n; i++)
 163                 putchar(((char*) start)[i]);
 164         }
 165         fp->f_seekp += n;
 166         if (resid)
 167                 *resid = size - n;
 168
 169         return (0);
 170 }
 171
 172 /*
 173  * Don't be silly - the bootstrap has no business writing anything.
 174  */
 175 static int
 176 zfs_write(struct open_file *f, void *start, size_t size, size_t *resid  /* out */)
 177 {
 178
 179         return (EROFS);
 180 }
 181
 182 static off_t
 183 zfs_seek(struct open_file *f, off_t offset, int where)
 184 {
 185         struct file *fp = (struct file *)f->f_fsdata;
 186
 187         switch (where) {
 188         case SEEK_SET:
 189                 fp->f_seekp = offset;
 190                 break;
 191         case SEEK_CUR:
 192                 fp->f_seekp += offset;
 193                 break;
 194         case SEEK_END:
 195             {
 196                 struct stat sb;
 197                 int error;
 198
 199                 error = zfs_stat(f, &sb);
 200                 if (error != 0) {
 201                         errno = error;
 202                         return (-1);
 203                 }
 204                 fp->f_seekp = sb.st_size - offset;
 205                 break;
 206             }
 207         default:
 208                 errno = EINVAL;
 209                 return (-1);
 210         }
 211         return (fp->f_seekp);
 212 }
 213
 214 static int
 215 zfs_stat(struct open_file *f, struct stat *sb)
 216 {
 217         spa_t *spa = (spa_t *) f->f_devdata;
 218         struct file *fp = (struct file *)f->f_fsdata;
 219
 220         return (zfs_dnode_stat(spa, &fp->f_dnode, sb));
 221 }
 222
 223 static int
 224 zfs_readdir(struct open_file *f, struct dirent *d)
 225 {
 226         spa_t *spa = (spa_t *) f->f_devdata;
 227         struct file *fp = (struct file *)f->f_fsdata;
 228         mzap_ent_phys_t mze;
 229         struct stat sb;
 230         size_t bsize = fp->f_dnode.dn_datablkszsec << SPA_MINBLOCKSHIFT;
 231         int rc;
 232
 233         rc = zfs_stat(f, &sb);
 234         if (rc)
 235                 return (rc);
 236         if (!S_ISDIR(sb.st_mode))
 237                 return (ENOTDIR);
 238
 239         /*
 240          * If this is the first read, get the zap type.
 241          */
 242         if (fp->f_seekp == 0) {
 243                 rc = dnode_read(spa, &fp->f_dnode,
 244                                 0, &fp->f_zap_type, sizeof(fp->f_zap_type));
 245                 if (rc)
 246                         return (rc);
 247
 248                 if (fp->f_zap_type == ZBT_MICRO) {
 249                         fp->f_seekp = offsetof(mzap_phys_t, mz_chunk);
 250                 } else {
 251                         rc = dnode_read(spa, &fp->f_dnode,
 252                                         offsetof(zap_phys_t, zap_num_leafs),
 253                                         &fp->f_num_leafs,
 254                                         sizeof(fp->f_num_leafs));
 255                         if (rc)
 256                                 return (rc);
 257
 258                         fp->f_seekp = bsize;
 259                         fp->f_zap_leaf = (zap_leaf_phys_t *)malloc(bsize);
 260                         rc = dnode_read(spa, &fp->f_dnode,
 261                                         fp->f_seekp,
 262                                         fp->f_zap_leaf,
 263                                         bsize);
 264                         if (rc)
 265                                 return (rc);
 266                 }
 267         }
 268
 269         if (fp->f_zap_type == ZBT_MICRO) {
 270         mzap_next:
 271                 if (fp->f_seekp >= bsize)
 272                         return (ENOENT);
 273
 274                 rc = dnode_read(spa, &fp->f_dnode,
 275                                 fp->f_seekp, &mze, sizeof(mze));
 276                 if (rc)
 277                         return (rc);
 278                 fp->f_seekp += sizeof(mze);
 279
 280                 if (!mze.mze_name[0])
 281                         goto mzap_next;
 282
 283                 d->d_fileno = ZFS_DIRENT_OBJ(mze.mze_value);
 284                 d->d_type = ZFS_DIRENT_TYPE(mze.mze_value);
 285                 strcpy(d->d_name, mze.mze_name);
 286                 d->d_namlen = strlen(d->d_name);
 287                 return (0);
 288         } else {
 289                 zap_leaf_t zl;
 290                 zap_leaf_chunk_t *zc, *nc;
 291                 int chunk;
 292                 size_t namelen;
 293                 char *p;
 294                 uint64_t value;
 295
 296                 /*
 297                  * Initialise this so we can use the ZAP size
 298                  * calculating macros.
 299                  */
 300                 zl.l_bs = ilog2(bsize);
 301                 zl.l_phys = fp->f_zap_leaf;
 302
 303                 /*
 304                  * Figure out which chunk we are currently looking at
 305                  * and consider seeking to the next leaf. We use the
 306                  * low bits of f_seekp as a simple chunk index.
 307                  */
 308         fzap_next:
 309                 chunk = fp->f_seekp & (bsize - 1);
 310                 if (chunk == ZAP_LEAF_NUMCHUNKS(&zl)) {
 311                         fp->f_seekp = (fp->f_seekp & ~(bsize - 1)) + bsize;
 312                         chunk = 0;
 313
 314                         /*
 315                          * Check for EOF and read the new leaf.
 316                          */
 317                         if (fp->f_seekp >= bsize * fp->f_num_leafs)
 318                                 return (ENOENT);
 319
 320                         rc = dnode_read(spa, &fp->f_dnode,
 321                                         fp->f_seekp,
 322                                         fp->f_zap_leaf,
 323                                         bsize);
 324                         if (rc)
 325                                 return (rc);
 326                 }
 327
 328                 zc = &ZAP_LEAF_CHUNK(&zl, chunk);
 329                 fp->f_seekp++;
 330                 if (zc->l_entry.le_type != ZAP_CHUNK_ENTRY)
 331                         goto fzap_next;
 332
 333                 namelen = zc->l_entry.le_name_length;
 334                 if (namelen > sizeof(d->d_name))
 335                         namelen = sizeof(d->d_name);
 336
 337                 /*
 338                  * Paste the name back together.
 339                  */
 340                 nc = &ZAP_LEAF_CHUNK(&zl, zc->l_entry.le_name_chunk);
 341                 p = d->d_name;
 342                 while (namelen > 0) {
 343                         int len;
 344                         len = namelen;
 345                         if (len > ZAP_LEAF_ARRAY_BYTES)
 346                                 len = ZAP_LEAF_ARRAY_BYTES;
 347                         memcpy(p, nc->l_array.la_array, len);
 348                         p += len;
 349                         namelen -= len;
 350                         nc = &ZAP_LEAF_CHUNK(&zl, nc->l_array.la_next);
 351                 }
 352                 d->d_name[sizeof(d->d_name) - 1] = 0;
 353
 354                 /*
 355                  * Assume the first eight bytes of the value are
 356                  * a uint64_t.
 357                  */
 358                 value = fzap_leaf_value(&zl, zc);
 359
 360                 d->d_fileno = ZFS_DIRENT_OBJ(value);
 361                 d->d_type = ZFS_DIRENT_TYPE(value);
 362                 d->d_namlen = strlen(d->d_name);
 363
 364                 return (0);
 365         }
 366 }
 367
 368 static int
 369 vdev_read(vdev_t *vdev, void *priv, off_t offset, void *buf, size_t size)
 370 {
 371         int fd;
 372
 373         fd = (uintptr_t) priv;
 374         lseek(fd, offset, SEEK_SET);
 375         if (read(fd, buf, size) == size) {
 376                 return 0;
 377         } else {
 378                 return (EIO);
 379         }
 380 }
 381
 382 /*
 383  * Convert a pool guid to a 'unit number' suitable for use with zfs_dev_open.
 384  */
 385 int
 386 zfs_guid_to_unit(uint64_t guid)
 387 {
 388         spa_t *spa;
 389         int unit;
 390
 391         unit = 0;
 392         STAILQ_FOREACH(spa, &zfs_pools, spa_link) {
 393                 if (spa->spa_guid == guid)
 394                         return unit;
 395                 unit++;
 396         }
 397         return (-1);
 398 }
 399
 400 static int
 401 zfs_dev_init(void)
 402 {
 403         char devname[512];
 404         int unit, slice;
 405         int fd;
 406
 407         /*
 408          * Open all the disks we can find and see if we can reconstruct
 409          * ZFS pools from them. Bogusly assumes that the disks are named
 410          * diskN, diskNpM or diskNsM.
 411          */
 412         zfs_init();
 413         for (unit = 0; unit < 32 /* XXX */; unit++) {
 414                 sprintf(devname, "disk%d:", unit);
 415                 fd = open(devname, O_RDONLY);
 416                 if (fd == -1)
 417                         continue;
 418
 419                 /*
 420                  * If we find a vdev, the zfs code will eat the fd, otherwise
 421                  * we close it.
 422                  */
 423                 if (vdev_probe(vdev_read, (void*) (uintptr_t) fd, 0))
 424                         close(fd);
 425
 426                 for (slice = 1; slice <= 128; slice++) {
 427                         sprintf(devname, "disk%dp%d:", unit, slice);
 428                         fd = open(devname, O_RDONLY);
 429                         if (fd == -1) {
 430                                 sprintf(devname, "disk%ds%d:", unit, slice);
 431                                 fd = open(devname, O_RDONLY);
 432                                 if (fd == -1)
 433                                         continue;
 434                         }
 435                         if (vdev_probe(vdev_read, (void*) (uintptr_t) fd, 0))
 436                                 close(fd);
 437                 }
 438         }
 439
 440         return (0);
 441 }
 442
 443 /*
 444  * Print information about ZFS pools
 445  */
 446 static void
 447 zfs_dev_print(int verbose)
 448 {
 449         spa_t *spa;
 450         char line[80];
 451         int unit;
 452
 453         if (verbose) {
 454                 spa_all_status();
 455                 return;
 456         }
 457         unit = 0;
 458         STAILQ_FOREACH(spa, &zfs_pools, spa_link) {
 459                 sprintf(line, "    zfs%d:   %s\n", unit, spa->spa_name);
 460                 pager_output(line);
 461                 unit++;
 462         }
 463 }
 464
 465 /*
 466  * Attempt to open the pool described by (dev) for use by (f).
 467  */
 468 static int
 469 zfs_dev_open(struct open_file *f, ...)
 470 {
 471         va_list         args;
 472         struct devdesc  *dev;
 473         int             unit, i;
 474         spa_t           *spa;
 475
 476         va_start(args, f);
 477         dev = va_arg(args, struct devdesc*);
 478         va_end(args);
 479
 480         /*
 481          * We mostly ignore the stuff that devopen sends us. For now,
 482          * use the unit to find a pool - later we will override the
 483          * devname parsing so that we can name a pool and a fs within
 484          * the pool.
 485          */
 486         unit = dev->d_unit;
 487
 488         i = 0;
 489         STAILQ_FOREACH(spa, &zfs_pools, spa_link) {
 490                 if (i == unit)
 491                         break;
 492                 i++;
 493         }
 494         if (!spa) {
 495                 return (ENXIO);
 496         }
 497
 498         f->f_devdata = spa;
 499         free(dev);
 500         return (0);
 501 }
 502
 503 static int
 504 zfs_dev_close(struct open_file *f)
 505 {
 506
 507         f->f_devdata = NULL;
 508         return (0);
 509 }
 510
 511 static int
 512 zfs_dev_strategy(void *devdata, int rw, daddr_t dblk, size_t size, char *buf, size_t *rsize)
 513 {
 514
 515         return (ENOSYS);
 516 }
 517
 518 struct devsw zfs_dev = {
 519         .dv_name = "zfs",
 520         .dv_type = DEVT_ZFS,
 521         .dv_init = zfs_dev_init,
 522         .dv_strategy = zfs_dev_strategy,
 523         .dv_open = zfs_dev_open,
 524         .dv_close = zfs_dev_close,
 525         .dv_ioctl = noioctl,
 526         .dv_print = zfs_dev_print,
 527         .dv_cleanup = NULL
 528 };