2 * Copyright (c) 2007 Doug Rabson
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
29 #include <sys/cdefs.h>
30 __FBSDID("$FreeBSD$");
33 * Stand-alone file reading package.
37 #include <sys/param.h>
39 #include <sys/queue.h>
45 #include <bootstrap.h>
51 /* Define the range of indexes to be populated with ZFS Boot Environments */
52 #define ZFS_BE_FIRST 4
55 static int zfs_open(const char *path, struct open_file *f);
56 static int zfs_write(struct open_file *f, void *buf, size_t size, size_t *resid);
57 static int zfs_close(struct open_file *f);
58 static int zfs_read(struct open_file *f, void *buf, size_t size, size_t *resid);
59 static off_t zfs_seek(struct open_file *f, off_t offset, int where);
60 static int zfs_stat(struct open_file *f, struct stat *sb);
61 static int zfs_readdir(struct open_file *f, struct dirent *d);
65 struct fs_ops zfs_fsops = {
80 off_t f_seekp; /* seek pointer */
82 uint64_t f_zap_type; /* zap type for readdir */
83 uint64_t f_num_leafs; /* number of fzap leaf blocks */
84 zap_leaf_phys_t *f_zap_leaf; /* zap leaf buffer */
87 static int zfs_env_index;
88 static int zfs_env_count;
90 SLIST_HEAD(zfs_be_list, zfs_be_entry) zfs_be_head = SLIST_HEAD_INITIALIZER(zfs_be_head);
91 struct zfs_be_list *zfs_be_headp;
94 SLIST_ENTRY(zfs_be_entry) entries;
95 } *zfs_be, *zfs_be_tmp;
101 zfs_open(const char *upath, struct open_file *f)
103 struct zfsmount *mount = (struct zfsmount *)f->f_devdata;
107 if (f->f_dev != &zfs_dev)
110 /* allocate file system specific data structure */
111 fp = malloc(sizeof(struct file));
112 bzero(fp, sizeof(struct file));
113 f->f_fsdata = (void *)fp;
115 rc = zfs_lookup(mount, upath, &fp->f_dnode);
125 zfs_close(struct open_file *f)
127 struct file *fp = (struct file *)f->f_fsdata;
129 dnode_cache_obj = NULL;
130 f->f_fsdata = (void *)0;
131 if (fp == (struct file *)0)
139 * Copy a portion of a file into kernel memory.
140 * Cross block boundaries when necessary.
143 zfs_read(struct open_file *f, void *start, size_t size, size_t *resid /* out */)
145 const spa_t *spa = ((struct zfsmount *)f->f_devdata)->spa;
146 struct file *fp = (struct file *)f->f_fsdata;
151 rc = zfs_stat(f, &sb);
155 if (fp->f_seekp + n > sb.st_size)
156 n = sb.st_size - fp->f_seekp;
158 rc = dnode_read(spa, &fp->f_dnode, fp->f_seekp, start, n);
164 for (i = 0; i < n; i++)
165 putchar(((char*) start)[i]);
175 * Don't be silly - the bootstrap has no business writing anything.
178 zfs_write(struct open_file *f, void *start, size_t size, size_t *resid /* out */)
185 zfs_seek(struct open_file *f, off_t offset, int where)
187 struct file *fp = (struct file *)f->f_fsdata;
191 fp->f_seekp = offset;
194 fp->f_seekp += offset;
201 error = zfs_stat(f, &sb);
206 fp->f_seekp = sb.st_size - offset;
213 return (fp->f_seekp);
217 zfs_stat(struct open_file *f, struct stat *sb)
219 const spa_t *spa = ((struct zfsmount *)f->f_devdata)->spa;
220 struct file *fp = (struct file *)f->f_fsdata;
222 return (zfs_dnode_stat(spa, &fp->f_dnode, sb));
226 zfs_readdir(struct open_file *f, struct dirent *d)
228 const spa_t *spa = ((struct zfsmount *)f->f_devdata)->spa;
229 struct file *fp = (struct file *)f->f_fsdata;
232 size_t bsize = fp->f_dnode.dn_datablkszsec << SPA_MINBLOCKSHIFT;
235 rc = zfs_stat(f, &sb);
238 if (!S_ISDIR(sb.st_mode))
242 * If this is the first read, get the zap type.
244 if (fp->f_seekp == 0) {
245 rc = dnode_read(spa, &fp->f_dnode,
246 0, &fp->f_zap_type, sizeof(fp->f_zap_type));
250 if (fp->f_zap_type == ZBT_MICRO) {
251 fp->f_seekp = offsetof(mzap_phys_t, mz_chunk);
253 rc = dnode_read(spa, &fp->f_dnode,
254 offsetof(zap_phys_t, zap_num_leafs),
256 sizeof(fp->f_num_leafs));
261 fp->f_zap_leaf = (zap_leaf_phys_t *)malloc(bsize);
262 rc = dnode_read(spa, &fp->f_dnode,
271 if (fp->f_zap_type == ZBT_MICRO) {
273 if (fp->f_seekp >= bsize)
276 rc = dnode_read(spa, &fp->f_dnode,
277 fp->f_seekp, &mze, sizeof(mze));
280 fp->f_seekp += sizeof(mze);
282 if (!mze.mze_name[0])
285 d->d_fileno = ZFS_DIRENT_OBJ(mze.mze_value);
286 d->d_type = ZFS_DIRENT_TYPE(mze.mze_value);
287 strcpy(d->d_name, mze.mze_name);
288 d->d_namlen = strlen(d->d_name);
292 zap_leaf_chunk_t *zc, *nc;
299 * Initialise this so we can use the ZAP size
300 * calculating macros.
302 zl.l_bs = ilog2(bsize);
303 zl.l_phys = fp->f_zap_leaf;
306 * Figure out which chunk we are currently looking at
307 * and consider seeking to the next leaf. We use the
308 * low bits of f_seekp as a simple chunk index.
311 chunk = fp->f_seekp & (bsize - 1);
312 if (chunk == ZAP_LEAF_NUMCHUNKS(&zl)) {
313 fp->f_seekp = rounddown2(fp->f_seekp, bsize) + bsize;
317 * Check for EOF and read the new leaf.
319 if (fp->f_seekp >= bsize * fp->f_num_leafs)
322 rc = dnode_read(spa, &fp->f_dnode,
330 zc = &ZAP_LEAF_CHUNK(&zl, chunk);
332 if (zc->l_entry.le_type != ZAP_CHUNK_ENTRY)
335 namelen = zc->l_entry.le_name_numints;
336 if (namelen > sizeof(d->d_name))
337 namelen = sizeof(d->d_name);
340 * Paste the name back together.
342 nc = &ZAP_LEAF_CHUNK(&zl, zc->l_entry.le_name_chunk);
344 while (namelen > 0) {
347 if (len > ZAP_LEAF_ARRAY_BYTES)
348 len = ZAP_LEAF_ARRAY_BYTES;
349 memcpy(p, nc->l_array.la_array, len);
352 nc = &ZAP_LEAF_CHUNK(&zl, nc->l_array.la_next);
354 d->d_name[sizeof(d->d_name) - 1] = 0;
357 * Assume the first eight bytes of the value are
360 value = fzap_leaf_value(&zl, zc);
362 d->d_fileno = ZFS_DIRENT_OBJ(value);
363 d->d_type = ZFS_DIRENT_TYPE(value);
364 d->d_namlen = strlen(d->d_name);
371 vdev_read(vdev_t *vdev, void *priv, off_t offset, void *buf, size_t bytes)
374 size_t res, size, remainder, rb_size, blksz;
377 char *bouncebuf, *rb_buf;
379 fd = (uintptr_t) priv;
382 ret = ioctl(fd, DIOCGSECTORSIZE, &secsz);
386 off = offset / secsz;
387 remainder = offset % secsz;
388 if (lseek(fd, off * secsz, SEEK_SET) == -1)
393 size = roundup2(bytes + remainder, secsz);
395 if (remainder != 0 || size != bytes) {
396 bouncebuf = zfs_alloc(secsz);
397 if (bouncebuf == NULL) {
398 printf("vdev_read: out of memory\n");
402 blksz = rb_size - remainder;
406 res = read(fd, rb_buf, rb_size);
407 if (res != rb_size) {
413 if (bouncebuf != NULL)
414 memcpy(buf, rb_buf + remainder, blksz);
415 buf = (void *)((uintptr_t)buf + blksz);
423 if (bouncebuf != NULL)
424 zfs_free(bouncebuf, secsz);
436 if (archsw.arch_zfs_probe == NULL)
438 archsw.arch_zfs_probe();
441 spa = STAILQ_FIRST(&zfs_pools);
442 while (spa != NULL) {
443 next = STAILQ_NEXT(spa, spa_link);
444 if (zfs_spa_init(spa)) {
446 STAILQ_REMOVE_HEAD(&zfs_pools, spa_link);
448 STAILQ_REMOVE_AFTER(&zfs_pools, prev, spa_link);
456 struct zfs_probe_args {
464 zfs_diskread(void *arg, void *buf, size_t blocks, uint64_t offset)
466 struct zfs_probe_args *ppa;
468 ppa = (struct zfs_probe_args *)arg;
469 return (vdev_read(NULL, (void *)(uintptr_t)ppa->fd,
470 offset * ppa->secsz, buf, blocks * ppa->secsz));
474 zfs_probe(int fd, uint64_t *pool_guid)
479 ret = vdev_probe(vdev_read, (void *)(uintptr_t)fd, &spa);
480 if (ret == 0 && pool_guid != NULL)
481 *pool_guid = spa->spa_guid;
486 zfs_probe_partition(void *arg, const char *partname,
487 const struct ptable_entry *part)
489 struct zfs_probe_args *ppa, pa;
490 struct ptable *table;
494 /* Probe only freebsd-zfs and freebsd partitions */
495 if (part->type != PART_FREEBSD &&
496 part->type != PART_FREEBSD_ZFS)
499 ppa = (struct zfs_probe_args *)arg;
500 strncpy(devname, ppa->devname, strlen(ppa->devname) - 1);
501 devname[strlen(ppa->devname) - 1] = '\0';
502 sprintf(devname, "%s%s:", devname, partname);
503 pa.fd = open(devname, O_RDONLY);
506 ret = zfs_probe(pa.fd, ppa->pool_guid);
509 /* Do we have BSD label here? */
510 if (part->type == PART_FREEBSD) {
511 pa.devname = devname;
512 pa.pool_guid = ppa->pool_guid;
513 pa.secsz = ppa->secsz;
514 table = ptable_open(&pa, part->end - part->start + 1,
515 ppa->secsz, zfs_diskread);
517 ptable_iterate(table, &pa, zfs_probe_partition);
526 zfs_probe_dev(const char *devname, uint64_t *pool_guid)
528 struct ptable *table;
529 struct zfs_probe_args pa;
535 pa.fd = open(devname, O_RDONLY);
538 /* Probe the whole disk */
539 ret = zfs_probe(pa.fd, pool_guid);
543 /* Probe each partition */
544 ret = ioctl(pa.fd, DIOCGMEDIASIZE, &mediasz);
546 ret = ioctl(pa.fd, DIOCGSECTORSIZE, &pa.secsz);
548 pa.devname = devname;
549 pa.pool_guid = pool_guid;
550 table = ptable_open(&pa, mediasz / pa.secsz, pa.secsz,
553 ptable_iterate(table, &pa, zfs_probe_partition);
558 if (pool_guid && *pool_guid == 0)
564 * Print information about ZFS pools
567 zfs_dev_print(int verbose)
573 if (STAILQ_EMPTY(&zfs_pools))
576 printf("%s devices:", zfs_dev.dv_name);
577 if ((ret = pager_output("\n")) != 0)
581 return (spa_all_status());
583 STAILQ_FOREACH(spa, &zfs_pools, spa_link) {
584 snprintf(line, sizeof(line), " zfs:%s\n", spa->spa_name);
585 ret = pager_output(line);
593 * Attempt to open the pool described by (dev) for use by (f).
596 zfs_dev_open(struct open_file *f, ...)
599 struct zfs_devdesc *dev;
600 struct zfsmount *mount;
605 dev = va_arg(args, struct zfs_devdesc *);
608 if (dev->pool_guid == 0)
609 spa = STAILQ_FIRST(&zfs_pools);
611 spa = spa_find_by_guid(dev->pool_guid);
614 mount = malloc(sizeof(*mount));
615 rv = zfs_mount(spa, dev->root_guid, mount);
620 if (mount->objset.os_type != DMU_OST_ZFS) {
621 printf("Unexpected object set type %ju\n",
622 (uintmax_t)mount->objset.os_type);
626 f->f_devdata = mount;
632 zfs_dev_close(struct open_file *f)
641 zfs_dev_strategy(void *devdata, int rw, daddr_t dblk, size_t size, char *buf, size_t *rsize)
647 struct devsw zfs_dev = {
650 .dv_init = zfs_dev_init,
651 .dv_strategy = zfs_dev_strategy,
652 .dv_open = zfs_dev_open,
653 .dv_close = zfs_dev_close,
655 .dv_print = zfs_dev_print,
660 zfs_parsedev(struct zfs_devdesc *dev, const char *devspec, const char **path)
662 static char rootname[ZFS_MAXNAMELEN];
663 static char poolname[ZFS_MAXNAMELEN];
674 end = strchr(np, ':');
677 sep = strchr(np, '/');
678 if (sep == NULL || sep >= end)
680 memcpy(poolname, np, sep - np);
681 poolname[sep - np] = '\0';
684 memcpy(rootname, sep, end - sep);
685 rootname[end - sep] = '\0';
690 spa = spa_find_by_name(poolname);
693 dev->pool_guid = spa->spa_guid;
694 rv = zfs_lookup_dataset(spa, rootname, &dev->root_guid);
698 *path = (*end == '\0') ? end : end + 1;
699 dev->d_dev = &zfs_dev;
700 dev->d_type = zfs_dev.dv_type;
705 zfs_fmtdev(void *vdev)
707 static char rootname[ZFS_MAXNAMELEN];
708 static char buf[2 * ZFS_MAXNAMELEN + 8];
709 struct zfs_devdesc *dev = (struct zfs_devdesc *)vdev;
713 if (dev->d_type != DEVT_ZFS)
716 if (dev->pool_guid == 0) {
717 spa = STAILQ_FIRST(&zfs_pools);
718 dev->pool_guid = spa->spa_guid;
720 spa = spa_find_by_guid(dev->pool_guid);
722 printf("ZFS: can't find pool by guid\n");
725 if (dev->root_guid == 0 && zfs_get_root(spa, &dev->root_guid)) {
726 printf("ZFS: can't find root filesystem\n");
729 if (zfs_rlookup(spa, dev->root_guid, rootname)) {
730 printf("ZFS: can't find filesystem by guid\n");
734 if (rootname[0] == '\0')
735 sprintf(buf, "%s:%s:", dev->d_dev->dv_name, spa->spa_name);
737 sprintf(buf, "%s:%s/%s:", dev->d_dev->dv_name, spa->spa_name,
743 zfs_list(const char *name)
745 static char poolname[ZFS_MAXNAMELEN];
753 dsname = strchr(name, '/');
754 if (dsname != NULL) {
759 memcpy(poolname, name, len);
760 poolname[len] = '\0';
762 spa = spa_find_by_name(poolname);
765 rv = zfs_lookup_dataset(spa, dsname, &objid);
769 return (zfs_list_dataset(spa, objid));
773 init_zfs_bootenv(char *currdev)
777 if (strlen(currdev) == 0)
779 if(strncmp(currdev, "zfs:", 4) != 0)
781 /* Remove the trailing : */
782 currdev[strlen(currdev) - 1] = '\0';
783 setenv("zfs_be_active", currdev, 1);
784 setenv("zfs_be_currpage", "1", 1);
785 /* Forward past zfs: */
786 currdev = strchr(currdev, ':');
788 /* Remove the last element (current bootenv) */
789 beroot = strrchr(currdev, '/');
793 setenv("zfs_be_root", beroot, 1);
797 zfs_bootenv(const char *name)
799 static char poolname[ZFS_MAXNAMELEN], *dsname, *root;
803 int len, rv, pages, perpage, currpage;
807 if ((root = getenv("zfs_be_root")) == NULL)
810 if (strcmp(name, root) != 0) {
811 if (setenv("zfs_be_root", name, 1) != 0)
815 SLIST_INIT(&zfs_be_head);
818 dsname = strchr(name, '/');
819 if (dsname != NULL) {
824 memcpy(poolname, name, len);
825 poolname[len] = '\0';
827 spa = spa_find_by_name(poolname);
830 rv = zfs_lookup_dataset(spa, dsname, &objid);
833 rv = zfs_callback_dataset(spa, objid, zfs_belist_add);
835 /* Calculate and store the number of pages of BEs */
836 perpage = (ZFS_BE_LAST - ZFS_BE_FIRST + 1);
837 pages = (zfs_env_count / perpage) + ((zfs_env_count % perpage) > 0 ? 1 : 0);
838 snprintf(becount, 4, "%d", pages);
839 if (setenv("zfs_be_pages", becount, 1) != 0)
842 /* Roll over the page counter if it has exceeded the maximum */
843 currpage = strtol(getenv("zfs_be_currpage"), NULL, 10);
844 if (currpage > pages) {
845 if (setenv("zfs_be_currpage", "1", 1) != 0)
849 /* Populate the menu environment variables */
852 /* Clean up the SLIST of ZFS BEs */
853 while (!SLIST_EMPTY(&zfs_be_head)) {
854 zfs_be = SLIST_FIRST(&zfs_be_head);
855 SLIST_REMOVE_HEAD(&zfs_be_head, entries);
863 zfs_belist_add(const char *name, uint64_t value __unused)
866 /* Skip special datasets that start with a $ character */
867 if (strncmp(name, "$", 1) == 0) {
870 /* Add the boot environment to the head of the SLIST */
871 zfs_be = malloc(sizeof(struct zfs_be_entry));
872 if (zfs_be == NULL) {
876 SLIST_INSERT_HEAD(&zfs_be_head, zfs_be, entries);
885 char envname[32], envval[256];
886 char *beroot, *pagenum;
889 beroot = getenv("zfs_be_root");
890 if (beroot == NULL) {
894 pagenum = getenv("zfs_be_currpage");
895 if (pagenum != NULL) {
896 page = strtol(pagenum, NULL, 10);
903 zfs_env_index = ZFS_BE_FIRST;
904 SLIST_FOREACH_SAFE(zfs_be, &zfs_be_head, entries, zfs_be_tmp) {
905 /* Skip to the requested page number */
906 if (ctr <= ((ZFS_BE_LAST - ZFS_BE_FIRST + 1) * (page - 1))) {
911 snprintf(envname, sizeof(envname), "bootenvmenu_caption[%d]", zfs_env_index);
912 snprintf(envval, sizeof(envval), "%s", zfs_be->name);
913 rv = setenv(envname, envval, 1);
918 snprintf(envname, sizeof(envname), "bootenvansi_caption[%d]", zfs_env_index);
919 rv = setenv(envname, envval, 1);
924 snprintf(envname, sizeof(envname), "bootenvmenu_command[%d]", zfs_env_index);
925 rv = setenv(envname, "set_bootenv", 1);
930 snprintf(envname, sizeof(envname), "bootenv_root[%d]", zfs_env_index);
931 snprintf(envval, sizeof(envval), "zfs:%s/%s", beroot, zfs_be->name);
932 rv = setenv(envname, envval, 1);
938 if (zfs_env_index > ZFS_BE_LAST) {
944 for (; zfs_env_index <= ZFS_BE_LAST; zfs_env_index++) {
945 snprintf(envname, sizeof(envname), "bootenvmenu_caption[%d]", zfs_env_index);
946 (void)unsetenv(envname);
947 snprintf(envname, sizeof(envname), "bootenvansi_caption[%d]", zfs_env_index);
948 (void)unsetenv(envname);
949 snprintf(envname, sizeof(envname), "bootenvmenu_command[%d]", zfs_env_index);
950 (void)unsetenv(envname);
951 snprintf(envname, sizeof(envname), "bootenv_root[%d]", zfs_env_index);
952 (void)unsetenv(envname);