2 * Copyright (c) 2007 Doug Rabson
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
29 #include <sys/cdefs.h>
30 __FBSDID("$FreeBSD$");
33 * Stand-alone file reading package.
38 #include <sys/param.h>
40 #include <sys/queue.h>
46 #include <bootstrap.h>
52 /* Define the range of indexes to be populated with ZFS Boot Environments */
53 #define ZFS_BE_FIRST 4
56 static int zfs_open(const char *path, struct open_file *f);
57 static int zfs_close(struct open_file *f);
58 static int zfs_read(struct open_file *f, void *buf, size_t size, size_t *resid);
59 static off_t zfs_seek(struct open_file *f, off_t offset, int where);
60 static int zfs_stat(struct open_file *f, struct stat *sb);
61 static int zfs_readdir(struct open_file *f, struct dirent *d);
63 static void zfs_bootenv_initial(const char *);
67 struct fs_ops zfs_fsops = {
82 off_t f_seekp; /* seek pointer */
84 uint64_t f_zap_type; /* zap type for readdir */
85 uint64_t f_num_leafs; /* number of fzap leaf blocks */
86 zap_leaf_phys_t *f_zap_leaf; /* zap leaf buffer */
89 static int zfs_env_index;
90 static int zfs_env_count;
92 SLIST_HEAD(zfs_be_list, zfs_be_entry) zfs_be_head = SLIST_HEAD_INITIALIZER(zfs_be_head);
93 struct zfs_be_list *zfs_be_headp;
96 SLIST_ENTRY(zfs_be_entry) entries;
97 } *zfs_be, *zfs_be_tmp;
103 zfs_open(const char *upath, struct open_file *f)
105 struct zfsmount *mount = (struct zfsmount *)f->f_devdata;
109 if (f->f_dev != &zfs_dev)
112 /* allocate file system specific data structure */
113 fp = malloc(sizeof(struct file));
114 bzero(fp, sizeof(struct file));
115 f->f_fsdata = (void *)fp;
117 rc = zfs_lookup(mount, upath, &fp->f_dnode);
127 zfs_close(struct open_file *f)
129 struct file *fp = (struct file *)f->f_fsdata;
131 dnode_cache_obj = NULL;
132 f->f_fsdata = (void *)0;
133 if (fp == (struct file *)0)
141 * Copy a portion of a file into kernel memory.
142 * Cross block boundaries when necessary.
145 zfs_read(struct open_file *f, void *start, size_t size, size_t *resid /* out */)
147 const spa_t *spa = ((struct zfsmount *)f->f_devdata)->spa;
148 struct file *fp = (struct file *)f->f_fsdata;
153 rc = zfs_stat(f, &sb);
157 if (fp->f_seekp + n > sb.st_size)
158 n = sb.st_size - fp->f_seekp;
160 rc = dnode_read(spa, &fp->f_dnode, fp->f_seekp, start, n);
166 for (i = 0; i < n; i++)
167 putchar(((char*) start)[i]);
177 zfs_seek(struct open_file *f, off_t offset, int where)
179 struct file *fp = (struct file *)f->f_fsdata;
183 fp->f_seekp = offset;
186 fp->f_seekp += offset;
193 error = zfs_stat(f, &sb);
198 fp->f_seekp = sb.st_size - offset;
205 return (fp->f_seekp);
209 zfs_stat(struct open_file *f, struct stat *sb)
211 const spa_t *spa = ((struct zfsmount *)f->f_devdata)->spa;
212 struct file *fp = (struct file *)f->f_fsdata;
214 return (zfs_dnode_stat(spa, &fp->f_dnode, sb));
218 zfs_readdir(struct open_file *f, struct dirent *d)
220 const spa_t *spa = ((struct zfsmount *)f->f_devdata)->spa;
221 struct file *fp = (struct file *)f->f_fsdata;
224 size_t bsize = fp->f_dnode.dn_datablkszsec << SPA_MINBLOCKSHIFT;
227 rc = zfs_stat(f, &sb);
230 if (!S_ISDIR(sb.st_mode))
234 * If this is the first read, get the zap type.
236 if (fp->f_seekp == 0) {
237 rc = dnode_read(spa, &fp->f_dnode,
238 0, &fp->f_zap_type, sizeof(fp->f_zap_type));
242 if (fp->f_zap_type == ZBT_MICRO) {
243 fp->f_seekp = offsetof(mzap_phys_t, mz_chunk);
245 rc = dnode_read(spa, &fp->f_dnode,
246 offsetof(zap_phys_t, zap_num_leafs),
248 sizeof(fp->f_num_leafs));
253 fp->f_zap_leaf = (zap_leaf_phys_t *)malloc(bsize);
254 rc = dnode_read(spa, &fp->f_dnode,
263 if (fp->f_zap_type == ZBT_MICRO) {
265 if (fp->f_seekp >= bsize)
268 rc = dnode_read(spa, &fp->f_dnode,
269 fp->f_seekp, &mze, sizeof(mze));
272 fp->f_seekp += sizeof(mze);
274 if (!mze.mze_name[0])
277 d->d_fileno = ZFS_DIRENT_OBJ(mze.mze_value);
278 d->d_type = ZFS_DIRENT_TYPE(mze.mze_value);
279 strcpy(d->d_name, mze.mze_name);
280 d->d_namlen = strlen(d->d_name);
284 zap_leaf_chunk_t *zc, *nc;
291 * Initialise this so we can use the ZAP size
292 * calculating macros.
294 zl.l_bs = ilog2(bsize);
295 zl.l_phys = fp->f_zap_leaf;
298 * Figure out which chunk we are currently looking at
299 * and consider seeking to the next leaf. We use the
300 * low bits of f_seekp as a simple chunk index.
303 chunk = fp->f_seekp & (bsize - 1);
304 if (chunk == ZAP_LEAF_NUMCHUNKS(&zl)) {
305 fp->f_seekp = rounddown2(fp->f_seekp, bsize) + bsize;
309 * Check for EOF and read the new leaf.
311 if (fp->f_seekp >= bsize * fp->f_num_leafs)
314 rc = dnode_read(spa, &fp->f_dnode,
322 zc = &ZAP_LEAF_CHUNK(&zl, chunk);
324 if (zc->l_entry.le_type != ZAP_CHUNK_ENTRY)
327 namelen = zc->l_entry.le_name_numints;
328 if (namelen > sizeof(d->d_name))
329 namelen = sizeof(d->d_name);
332 * Paste the name back together.
334 nc = &ZAP_LEAF_CHUNK(&zl, zc->l_entry.le_name_chunk);
336 while (namelen > 0) {
339 if (len > ZAP_LEAF_ARRAY_BYTES)
340 len = ZAP_LEAF_ARRAY_BYTES;
341 memcpy(p, nc->l_array.la_array, len);
344 nc = &ZAP_LEAF_CHUNK(&zl, nc->l_array.la_next);
346 d->d_name[sizeof(d->d_name) - 1] = 0;
349 * Assume the first eight bytes of the value are
352 value = fzap_leaf_value(&zl, zc);
354 d->d_fileno = ZFS_DIRENT_OBJ(value);
355 d->d_type = ZFS_DIRENT_TYPE(value);
356 d->d_namlen = strlen(d->d_name);
363 vdev_read(vdev_t *vdev, void *priv, off_t offset, void *buf, size_t bytes)
366 size_t res, head, tail, total_size, full_sec_size;
367 unsigned secsz, do_tail_read;
369 char *outbuf, *bouncebuf;
371 fd = (uintptr_t) priv;
372 outbuf = (char *) buf;
375 ret = ioctl(fd, DIOCGSECTORSIZE, &secsz);
380 * Handling reads of arbitrary offset and size - multi-sector case
381 * and single-sector case.
384 * (do_tail_read = true if tail > 0)
386 * |<----------------------total_size--------------------->|
388 * |<--head-->|<--------------bytes------------>|<--tail-->|
390 * | | |<~full_sec_size~>| | |
391 * +------------------+ +------------------+
392 * | |0101010| . . . |0101011| |
393 * +------------------+ +------------------+
394 * start_sec start_sec + n
398 * (do_tail_read = false)
400 * |<------total_size = secsz----->|
402 * |<-head->|<---bytes--->|<-tail->|
403 * +-------------------------------+
404 * | |0101010101010| |
405 * +-------------------------------+
408 start_sec = offset / secsz;
409 head = offset % secsz;
410 total_size = roundup2(head + bytes, secsz);
411 tail = total_size - (head + bytes);
412 do_tail_read = ((tail > 0) && (head + bytes > secsz));
413 full_sec_size = total_size;
415 full_sec_size -= secsz;
417 full_sec_size -= secsz;
419 /* Return of partial sector data requires a bounce buffer. */
420 if ((head > 0) || do_tail_read) {
421 bouncebuf = zfs_alloc(secsz);
422 if (bouncebuf == NULL) {
423 printf("vdev_read: out of memory\n");
428 if (lseek(fd, start_sec * secsz, SEEK_SET) == -1)
431 /* Partial data return from first sector */
433 res = read(fd, bouncebuf, secsz);
438 memcpy(outbuf, bouncebuf + head, min(secsz - head, bytes));
439 outbuf += min(secsz - head, bytes);
442 /* Full data return from read sectors */
443 if (full_sec_size > 0) {
444 res = read(fd, outbuf, full_sec_size);
445 if (res != full_sec_size) {
449 outbuf += full_sec_size;
452 /* Partial data return from last sector */
454 res = read(fd, bouncebuf, secsz);
459 memcpy(outbuf, bouncebuf, secsz - tail);
464 if (bouncebuf != NULL)
465 zfs_free(bouncebuf, secsz);
477 if (archsw.arch_zfs_probe == NULL)
479 archsw.arch_zfs_probe();
482 spa = STAILQ_FIRST(&zfs_pools);
483 while (spa != NULL) {
484 next = STAILQ_NEXT(spa, spa_link);
485 if (zfs_spa_init(spa)) {
487 STAILQ_REMOVE_HEAD(&zfs_pools, spa_link);
489 STAILQ_REMOVE_AFTER(&zfs_pools, prev, spa_link);
497 struct zfs_probe_args {
505 zfs_diskread(void *arg, void *buf, size_t blocks, uint64_t offset)
507 struct zfs_probe_args *ppa;
509 ppa = (struct zfs_probe_args *)arg;
510 return (vdev_read(NULL, (void *)(uintptr_t)ppa->fd,
511 offset * ppa->secsz, buf, blocks * ppa->secsz));
515 zfs_probe(int fd, uint64_t *pool_guid)
521 ret = vdev_probe(vdev_read, (void *)(uintptr_t)fd, &spa);
522 if (ret == 0 && pool_guid != NULL)
523 *pool_guid = spa->spa_guid;
528 zfs_probe_partition(void *arg, const char *partname,
529 const struct ptable_entry *part)
531 struct zfs_probe_args *ppa, pa;
532 struct ptable *table;
536 /* Probe only freebsd-zfs and freebsd partitions */
537 if (part->type != PART_FREEBSD &&
538 part->type != PART_FREEBSD_ZFS)
541 ppa = (struct zfs_probe_args *)arg;
542 strncpy(devname, ppa->devname, strlen(ppa->devname) - 1);
543 devname[strlen(ppa->devname) - 1] = '\0';
544 sprintf(devname, "%s%s:", devname, partname);
545 pa.fd = open(devname, O_RDONLY);
548 ret = zfs_probe(pa.fd, ppa->pool_guid);
551 /* Do we have BSD label here? */
552 if (part->type == PART_FREEBSD) {
553 pa.devname = devname;
554 pa.pool_guid = ppa->pool_guid;
555 pa.secsz = ppa->secsz;
556 table = ptable_open(&pa, part->end - part->start + 1,
557 ppa->secsz, zfs_diskread);
559 ptable_iterate(table, &pa, zfs_probe_partition);
568 zfs_probe_dev(const char *devname, uint64_t *pool_guid)
570 struct disk_devdesc *dev;
571 struct ptable *table;
572 struct zfs_probe_args pa;
578 pa.fd = open(devname, O_RDONLY);
582 * We will not probe the whole disk, we can not boot from such
583 * disks and some systems will misreport the disk sizes and will
584 * hang while accessing the disk.
586 if (archsw.arch_getdev((void **)&dev, devname, NULL) == 0) {
587 int partition = dev->d_partition;
588 int slice = dev->d_slice;
591 if (partition != D_PARTNONE && slice != D_SLICENONE) {
592 ret = zfs_probe(pa.fd, pool_guid);
598 /* Probe each partition */
599 ret = ioctl(pa.fd, DIOCGMEDIASIZE, &mediasz);
601 ret = ioctl(pa.fd, DIOCGSECTORSIZE, &pa.secsz);
603 pa.devname = devname;
604 pa.pool_guid = pool_guid;
605 table = ptable_open(&pa, mediasz / pa.secsz, pa.secsz,
608 ptable_iterate(table, &pa, zfs_probe_partition);
613 if (pool_guid && *pool_guid == 0)
619 * Print information about ZFS pools
622 zfs_dev_print(int verbose)
628 if (STAILQ_EMPTY(&zfs_pools))
631 printf("%s devices:", zfs_dev.dv_name);
632 if ((ret = pager_output("\n")) != 0)
636 return (spa_all_status());
638 STAILQ_FOREACH(spa, &zfs_pools, spa_link) {
639 snprintf(line, sizeof(line), " zfs:%s\n", spa->spa_name);
640 ret = pager_output(line);
648 * Attempt to open the pool described by (dev) for use by (f).
651 zfs_dev_open(struct open_file *f, ...)
654 struct zfs_devdesc *dev;
655 struct zfsmount *mount;
660 dev = va_arg(args, struct zfs_devdesc *);
663 if (dev->pool_guid == 0)
664 spa = STAILQ_FIRST(&zfs_pools);
666 spa = spa_find_by_guid(dev->pool_guid);
669 mount = malloc(sizeof(*mount));
670 rv = zfs_mount(spa, dev->root_guid, mount);
675 if (mount->objset.os_type != DMU_OST_ZFS) {
676 printf("Unexpected object set type %ju\n",
677 (uintmax_t)mount->objset.os_type);
681 f->f_devdata = mount;
687 zfs_dev_close(struct open_file *f)
696 zfs_dev_strategy(void *devdata, int rw, daddr_t dblk, size_t size, char *buf, size_t *rsize)
702 struct devsw zfs_dev = {
705 .dv_init = zfs_dev_init,
706 .dv_strategy = zfs_dev_strategy,
707 .dv_open = zfs_dev_open,
708 .dv_close = zfs_dev_close,
710 .dv_print = zfs_dev_print,
715 zfs_parsedev(struct zfs_devdesc *dev, const char *devspec, const char **path)
717 static char rootname[ZFS_MAXNAMELEN];
718 static char poolname[ZFS_MAXNAMELEN];
729 end = strrchr(np, ':');
732 sep = strchr(np, '/');
733 if (sep == NULL || sep >= end)
735 memcpy(poolname, np, sep - np);
736 poolname[sep - np] = '\0';
739 memcpy(rootname, sep, end - sep);
740 rootname[end - sep] = '\0';
745 spa = spa_find_by_name(poolname);
748 dev->pool_guid = spa->spa_guid;
749 rv = zfs_lookup_dataset(spa, rootname, &dev->root_guid);
753 *path = (*end == '\0') ? end : end + 1;
754 dev->dd.d_dev = &zfs_dev;
759 zfs_fmtdev(void *vdev)
761 static char rootname[ZFS_MAXNAMELEN];
762 static char buf[2 * ZFS_MAXNAMELEN + 8];
763 struct zfs_devdesc *dev = (struct zfs_devdesc *)vdev;
767 if (dev->dd.d_dev->dv_type != DEVT_ZFS)
770 if (dev->pool_guid == 0) {
771 spa = STAILQ_FIRST(&zfs_pools);
772 dev->pool_guid = spa->spa_guid;
774 spa = spa_find_by_guid(dev->pool_guid);
776 printf("ZFS: can't find pool by guid\n");
779 if (dev->root_guid == 0 && zfs_get_root(spa, &dev->root_guid)) {
780 printf("ZFS: can't find root filesystem\n");
783 if (zfs_rlookup(spa, dev->root_guid, rootname)) {
784 printf("ZFS: can't find filesystem by guid\n");
788 if (rootname[0] == '\0')
789 sprintf(buf, "%s:%s:", dev->dd.d_dev->dv_name, spa->spa_name);
791 sprintf(buf, "%s:%s/%s:", dev->dd.d_dev->dv_name, spa->spa_name,
797 zfs_list(const char *name)
799 static char poolname[ZFS_MAXNAMELEN];
807 dsname = strchr(name, '/');
808 if (dsname != NULL) {
813 memcpy(poolname, name, len);
814 poolname[len] = '\0';
816 spa = spa_find_by_name(poolname);
819 rv = zfs_lookup_dataset(spa, dsname, &objid);
823 return (zfs_list_dataset(spa, objid));
827 init_zfs_bootenv(const char *currdev_in)
829 char *beroot, *currdev;
833 currdev_len = strlen(currdev_in);
834 if (currdev_len == 0)
836 if (strncmp(currdev_in, "zfs:", 4) != 0)
838 currdev = strdup(currdev_in);
841 /* Remove the trailing : */
842 currdev[currdev_len - 1] = '\0';
843 setenv("zfs_be_active", currdev, 1);
844 setenv("zfs_be_currpage", "1", 1);
845 /* Remove the last element (current bootenv) */
846 beroot = strrchr(currdev, '/');
849 beroot = strchr(currdev, ':') + 1;
850 setenv("zfs_be_root", beroot, 1);
851 zfs_bootenv_initial(beroot);
856 zfs_bootenv_initial(const char *name)
858 char poolname[ZFS_MAXNAMELEN], *dsname;
859 char envname[32], envval[256];
862 int bootenvs_idx, len, rv;
864 SLIST_INIT(&zfs_be_head);
867 dsname = strchr(name, '/');
868 if (dsname != NULL) {
873 strlcpy(poolname, name, len + 1);
874 spa = spa_find_by_name(poolname);
877 rv = zfs_lookup_dataset(spa, dsname, &objid);
880 rv = zfs_callback_dataset(spa, objid, zfs_belist_add);
882 /* Populate the initial environment variables */
883 SLIST_FOREACH_SAFE(zfs_be, &zfs_be_head, entries, zfs_be_tmp) {
884 /* Enumerate all bootenvs for general usage */
885 snprintf(envname, sizeof(envname), "bootenvs[%d]", bootenvs_idx);
886 snprintf(envval, sizeof(envval), "zfs:%s/%s", name, zfs_be->name);
887 rv = setenv(envname, envval, 1);
892 snprintf(envval, sizeof(envval), "%d", bootenvs_idx);
893 setenv("bootenvs_count", envval, 1);
895 /* Clean up the SLIST of ZFS BEs */
896 while (!SLIST_EMPTY(&zfs_be_head)) {
897 zfs_be = SLIST_FIRST(&zfs_be_head);
898 SLIST_REMOVE_HEAD(&zfs_be_head, entries);
907 zfs_bootenv(const char *name)
909 static char poolname[ZFS_MAXNAMELEN], *dsname, *root;
913 int len, rv, pages, perpage, currpage;
917 if ((root = getenv("zfs_be_root")) == NULL)
920 if (strcmp(name, root) != 0) {
921 if (setenv("zfs_be_root", name, 1) != 0)
925 SLIST_INIT(&zfs_be_head);
928 dsname = strchr(name, '/');
929 if (dsname != NULL) {
934 memcpy(poolname, name, len);
935 poolname[len] = '\0';
937 spa = spa_find_by_name(poolname);
940 rv = zfs_lookup_dataset(spa, dsname, &objid);
943 rv = zfs_callback_dataset(spa, objid, zfs_belist_add);
945 /* Calculate and store the number of pages of BEs */
946 perpage = (ZFS_BE_LAST - ZFS_BE_FIRST + 1);
947 pages = (zfs_env_count / perpage) + ((zfs_env_count % perpage) > 0 ? 1 : 0);
948 snprintf(becount, 4, "%d", pages);
949 if (setenv("zfs_be_pages", becount, 1) != 0)
952 /* Roll over the page counter if it has exceeded the maximum */
953 currpage = strtol(getenv("zfs_be_currpage"), NULL, 10);
954 if (currpage > pages) {
955 if (setenv("zfs_be_currpage", "1", 1) != 0)
959 /* Populate the menu environment variables */
962 /* Clean up the SLIST of ZFS BEs */
963 while (!SLIST_EMPTY(&zfs_be_head)) {
964 zfs_be = SLIST_FIRST(&zfs_be_head);
965 SLIST_REMOVE_HEAD(&zfs_be_head, entries);
973 zfs_belist_add(const char *name, uint64_t value __unused)
976 /* Skip special datasets that start with a $ character */
977 if (strncmp(name, "$", 1) == 0) {
980 /* Add the boot environment to the head of the SLIST */
981 zfs_be = malloc(sizeof(struct zfs_be_entry));
982 if (zfs_be == NULL) {
986 SLIST_INSERT_HEAD(&zfs_be_head, zfs_be, entries);
995 char envname[32], envval[256];
996 char *beroot, *pagenum;
999 beroot = getenv("zfs_be_root");
1000 if (beroot == NULL) {
1004 pagenum = getenv("zfs_be_currpage");
1005 if (pagenum != NULL) {
1006 page = strtol(pagenum, NULL, 10);
1013 zfs_env_index = ZFS_BE_FIRST;
1014 SLIST_FOREACH_SAFE(zfs_be, &zfs_be_head, entries, zfs_be_tmp) {
1015 /* Skip to the requested page number */
1016 if (ctr <= ((ZFS_BE_LAST - ZFS_BE_FIRST + 1) * (page - 1))) {
1021 snprintf(envname, sizeof(envname), "bootenvmenu_caption[%d]", zfs_env_index);
1022 snprintf(envval, sizeof(envval), "%s", zfs_be->name);
1023 rv = setenv(envname, envval, 1);
1028 snprintf(envname, sizeof(envname), "bootenvansi_caption[%d]", zfs_env_index);
1029 rv = setenv(envname, envval, 1);
1034 snprintf(envname, sizeof(envname), "bootenvmenu_command[%d]", zfs_env_index);
1035 rv = setenv(envname, "set_bootenv", 1);
1040 snprintf(envname, sizeof(envname), "bootenv_root[%d]", zfs_env_index);
1041 snprintf(envval, sizeof(envval), "zfs:%s/%s", beroot, zfs_be->name);
1042 rv = setenv(envname, envval, 1);
1048 if (zfs_env_index > ZFS_BE_LAST) {
1054 for (; zfs_env_index <= ZFS_BE_LAST; zfs_env_index++) {
1055 snprintf(envname, sizeof(envname), "bootenvmenu_caption[%d]", zfs_env_index);
1056 (void)unsetenv(envname);
1057 snprintf(envname, sizeof(envname), "bootenvansi_caption[%d]", zfs_env_index);
1058 (void)unsetenv(envname);
1059 snprintf(envname, sizeof(envname), "bootenvmenu_command[%d]", zfs_env_index);
1060 (void)unsetenv(envname);
1061 snprintf(envname, sizeof(envname), "bootenv_root[%d]", zfs_env_index);
1062 (void)unsetenv(envname);