2 * Copyright (c) 2007 Doug Rabson
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
29 #include <sys/cdefs.h>
30 __FBSDID("$FreeBSD$");
33 * Stand-alone file reading package.
38 #include <sys/param.h>
40 #include <sys/queue.h>
45 #include <bootstrap.h>
51 /* Define the range of indexes to be populated with ZFS Boot Environments */
52 #define ZFS_BE_FIRST 4
55 static int zfs_open(const char *path, struct open_file *f);
56 static int zfs_close(struct open_file *f);
57 static int zfs_read(struct open_file *f, void *buf, size_t size, size_t *resid);
58 static off_t zfs_seek(struct open_file *f, off_t offset, int where);
59 static int zfs_stat(struct open_file *f, struct stat *sb);
60 static int zfs_readdir(struct open_file *f, struct dirent *d);
61 static int zfs_mount(const char *dev, const char *path, void **data);
62 static int zfs_unmount(const char *dev, void *data);
64 static void zfs_bootenv_initial(const char *envname, spa_t *spa,
65 const char *name, const char *dsname, int checkpoint);
66 static void zfs_checkpoints_initial(spa_t *spa, const char *name,
69 static int zfs_parsedev(struct devdesc **idev, const char *devspec,
74 struct fs_ops zfs_fsops = {
77 .fo_close = zfs_close,
79 .fo_write = null_write,
82 .fo_readdir = zfs_readdir,
83 .fo_mount = zfs_mount,
84 .fo_unmount = zfs_unmount
91 off_t f_seekp; /* seek pointer */
93 uint64_t f_zap_type; /* zap type for readdir */
94 uint64_t f_num_leafs; /* number of fzap leaf blocks */
95 zap_leaf_phys_t *f_zap_leaf; /* zap leaf buffer */
98 static int zfs_env_index;
99 static int zfs_env_count;
101 SLIST_HEAD(zfs_be_list, zfs_be_entry) zfs_be_head = SLIST_HEAD_INITIALIZER(zfs_be_head);
102 struct zfs_be_list *zfs_be_headp;
103 struct zfs_be_entry {
105 SLIST_ENTRY(zfs_be_entry) entries;
106 } *zfs_be, *zfs_be_tmp;
112 zfs_open(const char *upath, struct open_file *f)
114 struct devdesc *dev = f->f_devdata;
115 struct zfsmount *mount = dev->d_opendata;
119 if (f->f_dev != &zfs_dev)
122 /* allocate file system specific data structure */
123 fp = calloc(1, sizeof(struct file));
128 rc = zfs_lookup(mount, upath, &fp->f_dnode);
138 zfs_close(struct open_file *f)
140 struct file *fp = (struct file *)f->f_fsdata;
142 dnode_cache_obj = NULL;
150 * Copy a portion of a file into kernel memory.
151 * Cross block boundaries when necessary.
154 zfs_read(struct open_file *f, void *start, size_t size, size_t *resid /* out */)
156 struct devdesc *dev = f->f_devdata;
157 const spa_t *spa = ((struct zfsmount *)dev->d_opendata)->spa;
158 struct file *fp = (struct file *)f->f_fsdata;
163 rc = zfs_stat(f, &sb);
167 if (fp->f_seekp + n > sb.st_size)
168 n = sb.st_size - fp->f_seekp;
170 rc = dnode_read(spa, &fp->f_dnode, fp->f_seekp, start, n);
176 for (i = 0; i < n; i++)
177 putchar(((char*) start)[i]);
187 zfs_seek(struct open_file *f, off_t offset, int where)
189 struct file *fp = (struct file *)f->f_fsdata;
193 fp->f_seekp = offset;
196 fp->f_seekp += offset;
203 error = zfs_stat(f, &sb);
208 fp->f_seekp = sb.st_size - offset;
215 return (fp->f_seekp);
219 zfs_stat(struct open_file *f, struct stat *sb)
221 struct devdesc *dev = f->f_devdata;
222 const spa_t *spa = ((struct zfsmount *)dev->d_opendata)->spa;
223 struct file *fp = (struct file *)f->f_fsdata;
225 return (zfs_dnode_stat(spa, &fp->f_dnode, sb));
229 zfs_readdir(struct open_file *f, struct dirent *d)
231 struct devdesc *dev = f->f_devdata;
232 const spa_t *spa = ((struct zfsmount *)dev->d_opendata)->spa;
233 struct file *fp = (struct file *)f->f_fsdata;
236 size_t bsize = fp->f_dnode.dn_datablkszsec << SPA_MINBLOCKSHIFT;
239 rc = zfs_stat(f, &sb);
242 if (!S_ISDIR(sb.st_mode))
246 * If this is the first read, get the zap type.
248 if (fp->f_seekp == 0) {
249 rc = dnode_read(spa, &fp->f_dnode,
250 0, &fp->f_zap_type, sizeof(fp->f_zap_type));
254 if (fp->f_zap_type == ZBT_MICRO) {
255 fp->f_seekp = offsetof(mzap_phys_t, mz_chunk);
257 rc = dnode_read(spa, &fp->f_dnode,
258 offsetof(zap_phys_t, zap_num_leafs),
260 sizeof(fp->f_num_leafs));
265 fp->f_zap_leaf = malloc(bsize);
266 if (fp->f_zap_leaf == NULL)
268 rc = dnode_read(spa, &fp->f_dnode,
277 if (fp->f_zap_type == ZBT_MICRO) {
279 if (fp->f_seekp >= bsize)
282 rc = dnode_read(spa, &fp->f_dnode,
283 fp->f_seekp, &mze, sizeof(mze));
286 fp->f_seekp += sizeof(mze);
288 if (!mze.mze_name[0])
291 d->d_fileno = ZFS_DIRENT_OBJ(mze.mze_value);
292 d->d_type = ZFS_DIRENT_TYPE(mze.mze_value);
293 strcpy(d->d_name, mze.mze_name);
294 d->d_namlen = strlen(d->d_name);
298 zap_leaf_chunk_t *zc, *nc;
305 * Initialise this so we can use the ZAP size
306 * calculating macros.
308 zl.l_bs = ilog2(bsize);
309 zl.l_phys = fp->f_zap_leaf;
312 * Figure out which chunk we are currently looking at
313 * and consider seeking to the next leaf. We use the
314 * low bits of f_seekp as a simple chunk index.
317 chunk = fp->f_seekp & (bsize - 1);
318 if (chunk == ZAP_LEAF_NUMCHUNKS(&zl)) {
319 fp->f_seekp = rounddown2(fp->f_seekp, bsize) + bsize;
323 * Check for EOF and read the new leaf.
325 if (fp->f_seekp >= bsize * fp->f_num_leafs)
328 rc = dnode_read(spa, &fp->f_dnode,
336 zc = &ZAP_LEAF_CHUNK(&zl, chunk);
338 if (zc->l_entry.le_type != ZAP_CHUNK_ENTRY)
341 namelen = zc->l_entry.le_name_numints;
342 if (namelen > sizeof(d->d_name))
343 namelen = sizeof(d->d_name);
346 * Paste the name back together.
348 nc = &ZAP_LEAF_CHUNK(&zl, zc->l_entry.le_name_chunk);
350 while (namelen > 0) {
353 if (len > ZAP_LEAF_ARRAY_BYTES)
354 len = ZAP_LEAF_ARRAY_BYTES;
355 memcpy(p, nc->l_array.la_array, len);
358 nc = &ZAP_LEAF_CHUNK(&zl, nc->l_array.la_next);
360 d->d_name[sizeof(d->d_name) - 1] = 0;
363 * Assume the first eight bytes of the value are
366 value = fzap_leaf_value(&zl, zc);
368 d->d_fileno = ZFS_DIRENT_OBJ(value);
369 d->d_type = ZFS_DIRENT_TYPE(value);
370 d->d_namlen = strlen(d->d_name);
377 spa_find_by_dev(struct zfs_devdesc *dev)
380 if (dev->dd.d_dev->dv_type != DEVT_ZFS)
383 if (dev->pool_guid == 0)
384 return (STAILQ_FIRST(&zfs_pools));
386 return (spa_find_by_guid(dev->pool_guid));
390 * if path is NULL, create mount structure, but do not add it to list.
393 zfs_mount(const char *dev, const char *path, void **data)
395 struct zfs_devdesc *zfsdev = NULL;
397 struct zfsmount *mnt = NULL;
401 rv = zfs_parsedev((struct devdesc **)&zfsdev, dev, NULL);
406 spa = spa_find_by_dev(zfsdev);
412 mnt = calloc(1, sizeof(*mnt));
418 if (mnt->path != NULL) {
419 mnt->path = strdup(path);
420 if (mnt->path == NULL) {
426 rv = zfs_mount_impl(spa, zfsdev->root_guid, mnt);
428 if (rv == 0 && mnt->objset.os_type != DMU_OST_ZFS) {
429 printf("Unexpected object set type %ju\n",
430 (uintmax_t)mnt->objset.os_type);
444 STAILQ_INSERT_TAIL(&zfsmount, mnt, next);
452 zfs_unmount(const char *dev, void *data)
454 struct zfsmount *mnt = data;
456 STAILQ_REMOVE(&zfsmount, mnt, zfsmount, next);
463 vdev_read(vdev_t *vdev, void *priv, off_t offset, void *buf, size_t bytes)
466 size_t res, head, tail, total_size, full_sec_size;
467 unsigned secsz, do_tail_read;
469 char *outbuf, *bouncebuf;
471 fd = (uintptr_t) priv;
472 outbuf = (char *) buf;
475 ret = ioctl(fd, DIOCGSECTORSIZE, &secsz);
480 * Handling reads of arbitrary offset and size - multi-sector case
481 * and single-sector case.
484 * (do_tail_read = true if tail > 0)
486 * |<----------------------total_size--------------------->|
488 * |<--head-->|<--------------bytes------------>|<--tail-->|
490 * | | |<~full_sec_size~>| | |
491 * +------------------+ +------------------+
492 * | |0101010| . . . |0101011| |
493 * +------------------+ +------------------+
494 * start_sec start_sec + n
498 * (do_tail_read = false)
500 * |<------total_size = secsz----->|
502 * |<-head->|<---bytes--->|<-tail->|
503 * +-------------------------------+
504 * | |0101010101010| |
505 * +-------------------------------+
508 start_sec = offset / secsz;
509 head = offset % secsz;
510 total_size = roundup2(head + bytes, secsz);
511 tail = total_size - (head + bytes);
512 do_tail_read = ((tail > 0) && (head + bytes > secsz));
513 full_sec_size = total_size;
515 full_sec_size -= secsz;
517 full_sec_size -= secsz;
519 /* Return of partial sector data requires a bounce buffer. */
520 if ((head > 0) || do_tail_read || bytes < secsz) {
521 bouncebuf = malloc(secsz);
522 if (bouncebuf == NULL) {
523 printf("vdev_read: out of memory\n");
528 if (lseek(fd, start_sec * secsz, SEEK_SET) == -1) {
533 /* Partial data return from first sector */
535 res = read(fd, bouncebuf, secsz);
540 memcpy(outbuf, bouncebuf + head, min(secsz - head, bytes));
541 outbuf += min(secsz - head, bytes);
545 * Full data return from read sectors.
546 * Note, there is still corner case where we read
547 * from sector boundary, but less than sector size, e.g. reading 512B
550 if (full_sec_size > 0) {
551 if (bytes < full_sec_size) {
552 res = read(fd, bouncebuf, secsz);
557 memcpy(outbuf, bouncebuf, bytes);
559 res = read(fd, outbuf, full_sec_size);
560 if (res != full_sec_size) {
564 outbuf += full_sec_size;
568 /* Partial data return from last sector */
570 res = read(fd, bouncebuf, secsz);
575 memcpy(outbuf, bouncebuf, secsz - tail);
585 vdev_write(vdev_t *vdev, off_t offset, void *buf, size_t bytes)
588 size_t head, tail, total_size, full_sec_size;
589 unsigned secsz, do_tail_write;
592 char *outbuf, *bouncebuf;
594 fd = (uintptr_t)vdev->v_priv;
595 outbuf = (char *)buf;
598 ret = ioctl(fd, DIOCGSECTORSIZE, &secsz);
602 start_sec = offset / secsz;
603 head = offset % secsz;
604 total_size = roundup2(head + bytes, secsz);
605 tail = total_size - (head + bytes);
606 do_tail_write = ((tail > 0) && (head + bytes > secsz));
607 full_sec_size = total_size;
609 full_sec_size -= secsz;
611 full_sec_size -= secsz;
613 /* Partial sector write requires a bounce buffer. */
614 if ((head > 0) || do_tail_write || bytes < secsz) {
615 bouncebuf = malloc(secsz);
616 if (bouncebuf == NULL) {
617 printf("vdev_write: out of memory\n");
622 if (lseek(fd, start_sec * secsz, SEEK_SET) == -1) {
627 /* Partial data for first sector */
629 res = read(fd, bouncebuf, secsz);
630 if ((unsigned)res != secsz) {
634 memcpy(bouncebuf + head, outbuf, min(secsz - head, bytes));
635 (void) lseek(fd, -secsz, SEEK_CUR);
636 res = write(fd, bouncebuf, secsz);
637 if ((unsigned)res != secsz) {
641 outbuf += min(secsz - head, bytes);
645 * Full data write to sectors.
646 * Note, there is still corner case where we write
647 * to sector boundary, but less than sector size, e.g. write 512B
650 if (full_sec_size > 0) {
651 if (bytes < full_sec_size) {
652 res = read(fd, bouncebuf, secsz);
653 if ((unsigned)res != secsz) {
657 memcpy(bouncebuf, outbuf, bytes);
658 (void) lseek(fd, -secsz, SEEK_CUR);
659 res = write(fd, bouncebuf, secsz);
660 if ((unsigned)res != secsz) {
665 res = write(fd, outbuf, full_sec_size);
666 if ((unsigned)res != full_sec_size) {
670 outbuf += full_sec_size;
674 /* Partial data write to last sector */
676 res = read(fd, bouncebuf, secsz);
677 if ((unsigned)res != secsz) {
681 memcpy(bouncebuf, outbuf, secsz - tail);
682 (void) lseek(fd, -secsz, SEEK_CUR);
683 res = write(fd, bouncebuf, secsz);
684 if ((unsigned)res != secsz) {
704 if (archsw.arch_zfs_probe == NULL)
706 archsw.arch_zfs_probe();
709 spa = STAILQ_FIRST(&zfs_pools);
710 while (spa != NULL) {
711 next = STAILQ_NEXT(spa, spa_link);
712 if (zfs_spa_init(spa)) {
714 STAILQ_REMOVE_HEAD(&zfs_pools, spa_link);
716 STAILQ_REMOVE_AFTER(&zfs_pools, prev, spa_link);
724 struct zfs_probe_args {
732 zfs_diskread(void *arg, void *buf, size_t blocks, uint64_t offset)
734 struct zfs_probe_args *ppa;
736 ppa = (struct zfs_probe_args *)arg;
737 return (vdev_read(NULL, (void *)(uintptr_t)ppa->fd,
738 offset * ppa->secsz, buf, blocks * ppa->secsz));
742 zfs_probe(int fd, uint64_t *pool_guid)
748 ret = vdev_probe(vdev_read, vdev_write, (void *)(uintptr_t)fd, &spa);
749 if (ret == 0 && pool_guid != NULL)
751 *pool_guid = spa->spa_guid;
756 zfs_probe_partition(void *arg, const char *partname,
757 const struct ptable_entry *part)
759 struct zfs_probe_args *ppa, pa;
760 struct ptable *table;
764 /* Probe only freebsd-zfs and freebsd partitions */
765 if (part->type != PART_FREEBSD &&
766 part->type != PART_FREEBSD_ZFS)
769 ppa = (struct zfs_probe_args *)arg;
770 strncpy(devname, ppa->devname, strlen(ppa->devname) - 1);
771 devname[strlen(ppa->devname) - 1] = '\0';
772 snprintf(devname, sizeof(devname), "%s%s:", devname, partname);
773 pa.fd = open(devname, O_RDWR);
776 ret = zfs_probe(pa.fd, ppa->pool_guid);
779 /* Do we have BSD label here? */
780 if (part->type == PART_FREEBSD) {
781 pa.devname = devname;
782 pa.pool_guid = ppa->pool_guid;
783 pa.secsz = ppa->secsz;
784 table = ptable_open(&pa, part->end - part->start + 1,
785 ppa->secsz, zfs_diskread);
787 ptable_iterate(table, &pa, zfs_probe_partition);
796 * Return bootenv nvlist from pool label.
799 zfs_get_bootenv(void *vdev, nvlist_t **benvp)
803 if ((spa = spa_find_by_dev((struct zfs_devdesc *)vdev)) == NULL)
806 return (zfs_get_bootenv_spa(spa, benvp));
810 * Store nvlist to pool label bootenv area. Also updates cached pointer in spa.
813 zfs_set_bootenv(void *vdev, nvlist_t *benv)
815 struct zfs_devdesc *dev = (struct zfs_devdesc *)vdev;
819 if (dev->dd.d_dev->dv_type != DEVT_ZFS)
822 if ((spa = spa_find_by_dev(dev)) == NULL)
825 STAILQ_FOREACH(vd, &spa->spa_root_vdev->v_children, v_childlink) {
826 vdev_write_bootenv(vd, benv);
829 spa->spa_bootenv = benv;
834 * Get bootonce value by key. The bootonce <key, value> pair is removed
835 * from the bootenv nvlist and the remaining nvlist is committed back to disk.
838 zfs_get_bootonce(void *vdev, const char *key, char *buf, size_t size)
844 if ((rv = zfs_get_bootenv(vdev, &benv)) != 0)
847 if ((rv = nvlist_find(benv, key, DATA_TYPE_STRING, NULL,
848 &result, &result_size)) == 0) {
849 if (result_size == 0) {
850 /* ignore empty string */
853 size = MIN((size_t)result_size + 1, size);
854 strlcpy(buf, result, size);
856 (void) nvlist_remove(benv, key, DATA_TYPE_STRING);
857 (void) zfs_set_bootenv(vdev, benv);
867 static int zfs_nvstore_setter(void *, int, const char *,
868 const void *, size_t);
869 static int zfs_nvstore_setter_str(void *, const char *, const char *,
871 static int zfs_nvstore_unset_impl(void *, const char *, bool);
872 static int zfs_nvstore_setenv(void *, void *);
875 * nvstore is only present for current rootfs pool.
878 zfs_nvstore_sethook(struct env_var *ev, int flags __unused, const void *value)
880 struct zfs_devdesc *dev;
883 archsw.arch_getdev((void **)&dev, NULL, NULL);
887 rv = zfs_nvstore_setter_str(dev, NULL, ev->ev_name, value);
894 * nvstore is only present for current rootfs pool.
897 zfs_nvstore_unsethook(struct env_var *ev)
899 struct zfs_devdesc *dev;
902 archsw.arch_getdev((void **)&dev, NULL, NULL);
906 rv = zfs_nvstore_unset_impl(dev, ev->ev_name, false);
913 zfs_nvstore_getter(void *vdev, const char *name, void **data)
915 struct zfs_devdesc *dev = (struct zfs_devdesc *)vdev;
922 if (dev->dd.d_dev->dv_type != DEVT_ZFS)
925 if ((spa = spa_find_by_dev(dev)) == NULL)
928 if (spa->spa_bootenv == NULL)
931 if (nvlist_find(spa->spa_bootenv, OS_NVSTORE, DATA_TYPE_NVLIST,
932 NULL, &nv, NULL) != 0)
935 rv = nvlist_find(nv, name, DATA_TYPE_STRING, NULL, &str, &size);
938 asprintf(ptr, "%.*s", size, str);
947 zfs_nvstore_setter(void *vdev, int type, const char *name,
948 const void *data, size_t size)
950 struct zfs_devdesc *dev = (struct zfs_devdesc *)vdev;
956 if (dev->dd.d_dev->dv_type != DEVT_ZFS)
959 if ((spa = spa_find_by_dev(dev)) == NULL)
962 if (spa->spa_bootenv == NULL)
965 if (nvlist_find(spa->spa_bootenv, OS_NVSTORE, DATA_TYPE_NVLIST,
966 NULL, &nv, NULL) != 0) {
967 nv = nvlist_create(NV_UNIQUE_NAME);
975 if (size != sizeof (int8_t)) {
979 rv = nvlist_add_int8(nv, name, *(int8_t *)data);
982 case DATA_TYPE_INT16:
983 if (size != sizeof (int16_t)) {
987 rv = nvlist_add_int16(nv, name, *(int16_t *)data);
990 case DATA_TYPE_INT32:
991 if (size != sizeof (int32_t)) {
995 rv = nvlist_add_int32(nv, name, *(int32_t *)data);
998 case DATA_TYPE_INT64:
999 if (size != sizeof (int64_t)) {
1003 rv = nvlist_add_int64(nv, name, *(int64_t *)data);
1006 case DATA_TYPE_BYTE:
1007 if (size != sizeof (uint8_t)) {
1011 rv = nvlist_add_byte(nv, name, *(int8_t *)data);
1014 case DATA_TYPE_UINT8:
1015 if (size != sizeof (uint8_t)) {
1019 rv = nvlist_add_uint8(nv, name, *(int8_t *)data);
1022 case DATA_TYPE_UINT16:
1023 if (size != sizeof (uint16_t)) {
1027 rv = nvlist_add_uint16(nv, name, *(uint16_t *)data);
1030 case DATA_TYPE_UINT32:
1031 if (size != sizeof (uint32_t)) {
1035 rv = nvlist_add_uint32(nv, name, *(uint32_t *)data);
1038 case DATA_TYPE_UINT64:
1039 if (size != sizeof (uint64_t)) {
1043 rv = nvlist_add_uint64(nv, name, *(uint64_t *)data);
1046 case DATA_TYPE_STRING:
1047 rv = nvlist_add_string(nv, name, data);
1050 case DATA_TYPE_BOOLEAN_VALUE:
1051 if (size != sizeof (boolean_t)) {
1055 rv = nvlist_add_boolean_value(nv, name, *(boolean_t *)data);
1064 rv = nvlist_add_nvlist(spa->spa_bootenv, OS_NVSTORE, nv);
1066 rv = zfs_set_bootenv(vdev, spa->spa_bootenv);
1070 rv = zfs_nvstore_setenv(vdev,
1071 nvpair_find(nv, name));
1073 env_discard(env_getenv(name));
1084 get_int64(const char *data, int64_t *ip)
1090 val = strtoll(data, &end, 0);
1091 if (errno != 0 || *data == '\0' || *end != '\0')
1099 get_uint64(const char *data, uint64_t *ip)
1105 val = strtoull(data, &end, 0);
1106 if (errno != 0 || *data == '\0' || *end != '\0')
1114 * Translate textual data to data type. If type is not set, and we are
1115 * creating new pair, use DATA_TYPE_STRING.
1118 zfs_nvstore_setter_str(void *vdev, const char *type, const char *name,
1121 struct zfs_devdesc *dev = (struct zfs_devdesc *)vdev;
1129 if (dev->dd.d_dev->dv_type != DEVT_ZFS)
1132 if ((spa = spa_find_by_dev(dev)) == NULL)
1135 if (spa->spa_bootenv == NULL)
1138 if (nvlist_find(spa->spa_bootenv, OS_NVSTORE, DATA_TYPE_NVLIST,
1139 NULL, &nv, NULL) != 0) {
1147 * if there is no existing pair, default to string.
1148 * Otherwise, use type from existing pair.
1150 nvh = nvpair_find(nv, name);
1152 dt = DATA_TYPE_STRING;
1154 nv_string_t *nvp_name;
1155 nv_pair_data_t *nvp_data;
1157 nvp_name = (nv_string_t *)(nvh + 1);
1158 nvp_data = (nv_pair_data_t *)(&nvp_name->nv_data[0] +
1159 NV_ALIGN4(nvp_name->nv_size));
1160 dt = nvp_data->nv_type;
1163 dt = nvpair_type_from_name(type);
1169 case DATA_TYPE_INT8:
1170 rv = get_int64(data, &val);
1174 rv = zfs_nvstore_setter(vdev, dt, name, &v, sizeof (v));
1177 case DATA_TYPE_INT16:
1178 rv = get_int64(data, &val);
1182 rv = zfs_nvstore_setter(vdev, dt, name, &v, sizeof (v));
1185 case DATA_TYPE_INT32:
1186 rv = get_int64(data, &val);
1190 rv = zfs_nvstore_setter(vdev, dt, name, &v, sizeof (v));
1193 case DATA_TYPE_INT64:
1194 rv = get_int64(data, &val);
1196 rv = zfs_nvstore_setter(vdev, dt, name, &val,
1201 case DATA_TYPE_BYTE:
1202 rv = get_uint64(data, &uval);
1206 rv = zfs_nvstore_setter(vdev, dt, name, &v, sizeof (v));
1210 case DATA_TYPE_UINT8:
1211 rv = get_uint64(data, &uval);
1215 rv = zfs_nvstore_setter(vdev, dt, name, &v, sizeof (v));
1219 case DATA_TYPE_UINT16:
1220 rv = get_uint64(data, &uval);
1224 rv = zfs_nvstore_setter(vdev, dt, name, &v, sizeof (v));
1228 case DATA_TYPE_UINT32:
1229 rv = get_uint64(data, &uval);
1233 rv = zfs_nvstore_setter(vdev, dt, name, &v, sizeof (v));
1237 case DATA_TYPE_UINT64:
1238 rv = get_uint64(data, &uval);
1240 rv = zfs_nvstore_setter(vdev, dt, name, &uval,
1245 case DATA_TYPE_STRING:
1246 rv = zfs_nvstore_setter(vdev, dt, name, data, strlen(data) + 1);
1249 case DATA_TYPE_BOOLEAN_VALUE:
1250 rv = get_int64(data, &val);
1254 rv = zfs_nvstore_setter(vdev, dt, name, &v, sizeof (v));
1264 zfs_nvstore_unset_impl(void *vdev, const char *name, bool unset_env)
1266 struct zfs_devdesc *dev = (struct zfs_devdesc *)vdev;
1271 if (dev->dd.d_dev->dv_type != DEVT_ZFS)
1274 if ((spa = spa_find_by_dev(dev)) == NULL)
1277 if (spa->spa_bootenv == NULL)
1280 if (nvlist_find(spa->spa_bootenv, OS_NVSTORE, DATA_TYPE_NVLIST,
1281 NULL, &nv, NULL) != 0)
1284 rv = nvlist_remove(nv, name, DATA_TYPE_UNKNOWN);
1286 if (nvlist_next_nvpair(nv, NULL) == NULL) {
1287 rv = nvlist_remove(spa->spa_bootenv, OS_NVSTORE,
1290 rv = nvlist_add_nvlist(spa->spa_bootenv,
1294 rv = zfs_set_bootenv(vdev, spa->spa_bootenv);
1298 env_discard(env_getenv(name));
1303 zfs_nvstore_unset(void *vdev, const char *name)
1305 return (zfs_nvstore_unset_impl(vdev, name, true));
1309 zfs_nvstore_print(void *vdev __unused, void *ptr)
1312 nvpair_print(ptr, 0);
1317 * Create environment variable from nvpair.
1318 * set hook will update nvstore with new value, unset hook will remove
1319 * variable from nvstore.
1322 zfs_nvstore_setenv(void *vdev __unused, void *ptr)
1324 nvp_header_t *nvh = ptr;
1325 nv_string_t *nvp_name, *nvp_value;
1326 nv_pair_data_t *nvp_data;
1333 nvp_name = (nv_string_t *)(nvh + 1);
1334 nvp_data = (nv_pair_data_t *)(&nvp_name->nv_data[0] +
1335 NV_ALIGN4(nvp_name->nv_size));
1337 if ((name = nvstring_get(nvp_name)) == NULL)
1341 switch (nvp_data->nv_type) {
1342 case DATA_TYPE_BYTE:
1343 case DATA_TYPE_UINT8:
1344 (void) asprintf(&value, "%uc",
1345 *(unsigned *)&nvp_data->nv_data[0]);
1350 case DATA_TYPE_INT8:
1351 (void) asprintf(&value, "%c", *(int *)&nvp_data->nv_data[0]);
1356 case DATA_TYPE_INT16:
1357 (void) asprintf(&value, "%hd", *(short *)&nvp_data->nv_data[0]);
1362 case DATA_TYPE_UINT16:
1363 (void) asprintf(&value, "%hu",
1364 *(unsigned short *)&nvp_data->nv_data[0]);
1369 case DATA_TYPE_BOOLEAN_VALUE:
1370 case DATA_TYPE_INT32:
1371 (void) asprintf(&value, "%d", *(int *)&nvp_data->nv_data[0]);
1376 case DATA_TYPE_UINT32:
1377 (void) asprintf(&value, "%u",
1378 *(unsigned *)&nvp_data->nv_data[0]);
1383 case DATA_TYPE_INT64:
1384 (void) asprintf(&value, "%jd",
1385 (intmax_t)*(int64_t *)&nvp_data->nv_data[0]);
1390 case DATA_TYPE_UINT64:
1391 (void) asprintf(&value, "%ju",
1392 (uintmax_t)*(uint64_t *)&nvp_data->nv_data[0]);
1397 case DATA_TYPE_STRING:
1398 nvp_value = (nv_string_t *)&nvp_data->nv_data[0];
1399 if ((value = nvstring_get(nvp_value)) == NULL) {
1410 if (value != NULL) {
1411 rv = env_setenv(name, EV_VOLATILE | EV_NOHOOK, value,
1412 zfs_nvstore_sethook, zfs_nvstore_unsethook);
1420 zfs_nvstore_iterate(void *vdev, int (*cb)(void *, void *))
1422 struct zfs_devdesc *dev = (struct zfs_devdesc *)vdev;
1428 if (dev->dd.d_dev->dv_type != DEVT_ZFS)
1431 if ((spa = spa_find_by_dev(dev)) == NULL)
1434 if (spa->spa_bootenv == NULL)
1437 if (nvlist_find(spa->spa_bootenv, OS_NVSTORE, DATA_TYPE_NVLIST,
1438 NULL, &nv, NULL) != 0)
1443 while ((nvh = nvlist_next_nvpair(nv, nvh)) != NULL) {
1451 nvs_callbacks_t nvstore_zfs_cb = {
1452 .nvs_getter = zfs_nvstore_getter,
1453 .nvs_setter = zfs_nvstore_setter,
1454 .nvs_setter_str = zfs_nvstore_setter_str,
1455 .nvs_unset = zfs_nvstore_unset,
1456 .nvs_print = zfs_nvstore_print,
1457 .nvs_iterate = zfs_nvstore_iterate
1461 zfs_attach_nvstore(void *vdev)
1463 struct zfs_devdesc *dev = vdev;
1468 if (dev->dd.d_dev->dv_type != DEVT_ZFS)
1471 if ((spa = spa_find_by_dev(dev)) == NULL)
1474 rv = nvlist_find(spa->spa_bootenv, BOOTENV_VERSION, DATA_TYPE_UINT64,
1475 NULL, &version, NULL);
1477 if (rv != 0 || version != VB_NVLIST) {
1481 dev = malloc(sizeof (*dev));
1484 memcpy(dev, vdev, sizeof (*dev));
1486 rv = nvstore_init(spa->spa_name, &nvstore_zfs_cb, dev);
1490 rv = zfs_nvstore_iterate(dev, zfs_nvstore_setenv);
1495 zfs_probe_dev(const char *devname, uint64_t *pool_guid, bool parts_too)
1497 struct ptable *table;
1498 struct zfs_probe_args pa;
1504 pa.fd = open(devname, O_RDWR);
1507 /* Probe the whole disk */
1508 ret = zfs_probe(pa.fd, pool_guid);
1514 /* Probe each partition */
1515 ret = ioctl(pa.fd, DIOCGMEDIASIZE, &mediasz);
1517 ret = ioctl(pa.fd, DIOCGSECTORSIZE, &pa.secsz);
1519 pa.devname = devname;
1520 pa.pool_guid = pool_guid;
1521 table = ptable_open(&pa, mediasz / pa.secsz, pa.secsz,
1523 if (table != NULL) {
1524 ptable_iterate(table, &pa, zfs_probe_partition);
1525 ptable_close(table);
1529 if (pool_guid && *pool_guid == 0)
1535 * Print information about ZFS pools
1538 zfs_dev_print(int verbose)
1544 if (STAILQ_EMPTY(&zfs_pools))
1547 printf("%s devices:", zfs_dev.dv_name);
1548 if ((ret = pager_output("\n")) != 0)
1552 return (spa_all_status());
1554 STAILQ_FOREACH(spa, &zfs_pools, spa_link) {
1555 snprintf(line, sizeof(line), " zfs:%s\n", spa->spa_name);
1556 ret = pager_output(line);
1564 * Attempt to open the pool described by (dev) for use by (f).
1567 zfs_dev_open(struct open_file *f, ...)
1570 struct zfs_devdesc *dev;
1571 struct zfsmount *mount;
1576 dev = va_arg(args, struct zfs_devdesc *);
1579 if ((spa = spa_find_by_dev(dev)) == NULL)
1582 STAILQ_FOREACH(mount, &zfsmount, next) {
1583 if (spa->spa_guid == mount->spa->spa_guid)
1588 /* This device is not set as currdev, mount us private copy. */
1590 rv = zfs_mount(devformat(&dev->dd), NULL, (void **)&mount);
1593 dev->dd.d_opendata = mount;
1599 zfs_dev_close(struct open_file *f)
1601 struct devdesc *dev;
1602 struct zfsmount *mnt, *mount;
1605 mnt = dev->d_opendata;
1607 STAILQ_FOREACH(mount, &zfsmount, next) {
1608 if (mnt->spa->spa_guid == mount->spa->spa_guid)
1617 zfs_dev_strategy(void *devdata, int rw, daddr_t dblk, size_t size, char *buf, size_t *rsize)
1623 struct devsw zfs_dev = {
1625 .dv_type = DEVT_ZFS,
1626 .dv_init = zfs_dev_init,
1627 .dv_strategy = zfs_dev_strategy,
1628 .dv_open = zfs_dev_open,
1629 .dv_close = zfs_dev_close,
1630 .dv_ioctl = noioctl,
1631 .dv_print = zfs_dev_print,
1632 .dv_cleanup = nullsys,
1633 .dv_fmtdev = zfs_fmtdev,
1634 .dv_parsedev = zfs_parsedev,
1638 zfs_parsedev(struct devdesc **idev, const char *devspec, const char **path)
1640 static char rootname[ZFS_MAXNAMELEN];
1641 static char poolname[ZFS_MAXNAMELEN];
1647 struct zfs_devdesc *dev;
1649 np = devspec + 3; /* Skip the leading 'zfs' */
1653 end = strrchr(np, ':');
1656 sep = strchr(np, '/');
1657 if (sep == NULL || sep >= end)
1659 memcpy(poolname, np, sep - np);
1660 poolname[sep - np] = '\0';
1663 memcpy(rootname, sep, end - sep);
1664 rootname[end - sep] = '\0';
1669 spa = spa_find_by_name(poolname);
1672 dev = malloc(sizeof(*dev));
1675 dev->pool_guid = spa->spa_guid;
1676 rv = zfs_lookup_dataset(spa, rootname, &dev->root_guid);
1682 *path = (*end == '\0') ? end : end + 1;
1683 dev->dd.d_dev = &zfs_dev;
1689 zfs_fmtdev(struct devdesc *vdev)
1691 static char rootname[ZFS_MAXNAMELEN];
1692 static char buf[2 * ZFS_MAXNAMELEN + 8];
1693 struct zfs_devdesc *dev = (struct zfs_devdesc *)vdev;
1697 if (vdev->d_dev->dv_type != DEVT_ZFS)
1700 /* Do we have any pools? */
1701 spa = STAILQ_FIRST(&zfs_pools);
1705 if (dev->pool_guid == 0)
1706 dev->pool_guid = spa->spa_guid;
1708 spa = spa_find_by_guid(dev->pool_guid);
1711 printf("ZFS: can't find pool by guid\n");
1714 if (dev->root_guid == 0 && zfs_get_root(spa, &dev->root_guid)) {
1715 printf("ZFS: can't find root filesystem\n");
1718 if (zfs_rlookup(spa, dev->root_guid, rootname)) {
1719 printf("ZFS: can't find filesystem by guid\n");
1723 if (rootname[0] == '\0')
1724 snprintf(buf, sizeof(buf), "%s:%s:", dev->dd.d_dev->dv_name,
1727 snprintf(buf, sizeof(buf), "%s:%s/%s:", dev->dd.d_dev->dv_name,
1728 spa->spa_name, rootname);
1733 split_devname(const char *name, char *poolname, size_t size,
1734 const char **dsnamep)
1739 ASSERT(name != NULL);
1740 ASSERT(poolname != NULL);
1743 dsname = strchr(name, '/');
1744 if (dsname != NULL) {
1745 len = dsname - name;
1753 strlcpy(poolname, name, len + 1);
1755 if (dsnamep != NULL)
1762 zfs_list(const char *name)
1764 static char poolname[ZFS_MAXNAMELEN];
1770 if (split_devname(name, poolname, sizeof(poolname), &dsname) != 0)
1773 spa = spa_find_by_name(poolname);
1776 rv = zfs_lookup_dataset(spa, dsname, &objid);
1780 return (zfs_list_dataset(spa, objid));
1784 init_zfs_boot_options(const char *currdev_in)
1786 char poolname[ZFS_MAXNAMELEN];
1787 char *beroot, *currdev;
1793 currdev_len = strlen(currdev_in);
1794 if (currdev_len == 0)
1796 if (strncmp(currdev_in, "zfs:", 4) != 0)
1798 currdev = strdup(currdev_in);
1799 if (currdev == NULL)
1801 /* Remove the trailing : */
1802 currdev[currdev_len - 1] = '\0';
1804 setenv("zfs_be_active", currdev, 1);
1805 setenv("zfs_be_currpage", "1", 1);
1806 /* Remove the last element (current bootenv) */
1807 beroot = strrchr(currdev, '/');
1810 beroot = strchr(currdev, ':') + 1;
1811 setenv("zfs_be_root", beroot, 1);
1813 if (split_devname(beroot, poolname, sizeof(poolname), &dsname) != 0)
1816 spa = spa_find_by_name(poolname);
1820 zfs_bootenv_initial("bootenvs", spa, beroot, dsname, 0);
1821 zfs_checkpoints_initial(spa, beroot, dsname);
1827 zfs_checkpoints_initial(spa_t *spa, const char *name, const char *dsname)
1831 if (spa->spa_uberblock_checkpoint.ub_checkpoint_txg != 0) {
1832 snprintf(envname, sizeof(envname), "zpool_checkpoint");
1833 setenv(envname, name, 1);
1835 spa->spa_uberblock = &spa->spa_uberblock_checkpoint;
1836 spa->spa_mos = &spa->spa_mos_checkpoint;
1838 zfs_bootenv_initial("bootenvs_check", spa, name, dsname, 1);
1840 spa->spa_uberblock = &spa->spa_uberblock_master;
1841 spa->spa_mos = &spa->spa_mos_master;
1846 zfs_bootenv_initial(const char *envprefix, spa_t *spa, const char *rootname,
1847 const char *dsname, int checkpoint)
1849 char envname[32], envval[256];
1851 int bootenvs_idx, rv;
1853 SLIST_INIT(&zfs_be_head);
1856 rv = zfs_lookup_dataset(spa, dsname, &objid);
1860 rv = zfs_callback_dataset(spa, objid, zfs_belist_add);
1862 /* Populate the initial environment variables */
1863 SLIST_FOREACH_SAFE(zfs_be, &zfs_be_head, entries, zfs_be_tmp) {
1864 /* Enumerate all bootenvs for general usage */
1865 snprintf(envname, sizeof(envname), "%s[%d]",
1866 envprefix, bootenvs_idx);
1867 snprintf(envval, sizeof(envval), "zfs:%s%s/%s",
1868 checkpoint ? "!" : "", rootname, zfs_be->name);
1869 rv = setenv(envname, envval, 1);
1874 snprintf(envname, sizeof(envname), "%s_count", envprefix);
1875 snprintf(envval, sizeof(envval), "%d", bootenvs_idx);
1876 setenv(envname, envval, 1);
1878 /* Clean up the SLIST of ZFS BEs */
1879 while (!SLIST_EMPTY(&zfs_be_head)) {
1880 zfs_be = SLIST_FIRST(&zfs_be_head);
1881 SLIST_REMOVE_HEAD(&zfs_be_head, entries);
1888 zfs_bootenv(const char *name)
1890 char poolname[ZFS_MAXNAMELEN], *root;
1895 int rv, pages, perpage, currpage;
1899 if ((root = getenv("zfs_be_root")) == NULL)
1902 if (strcmp(name, root) != 0) {
1903 if (setenv("zfs_be_root", name, 1) != 0)
1907 SLIST_INIT(&zfs_be_head);
1910 if (split_devname(name, poolname, sizeof(poolname), &dsname) != 0)
1913 spa = spa_find_by_name(poolname);
1916 rv = zfs_lookup_dataset(spa, dsname, &objid);
1919 rv = zfs_callback_dataset(spa, objid, zfs_belist_add);
1921 /* Calculate and store the number of pages of BEs */
1922 perpage = (ZFS_BE_LAST - ZFS_BE_FIRST + 1);
1923 pages = (zfs_env_count / perpage) + ((zfs_env_count % perpage) > 0 ? 1 : 0);
1924 snprintf(becount, 4, "%d", pages);
1925 if (setenv("zfs_be_pages", becount, 1) != 0)
1928 /* Roll over the page counter if it has exceeded the maximum */
1929 currpage = strtol(getenv("zfs_be_currpage"), NULL, 10);
1930 if (currpage > pages) {
1931 if (setenv("zfs_be_currpage", "1", 1) != 0)
1935 /* Populate the menu environment variables */
1938 /* Clean up the SLIST of ZFS BEs */
1939 while (!SLIST_EMPTY(&zfs_be_head)) {
1940 zfs_be = SLIST_FIRST(&zfs_be_head);
1941 SLIST_REMOVE_HEAD(&zfs_be_head, entries);
1950 zfs_belist_add(const char *name, uint64_t value __unused)
1953 /* Skip special datasets that start with a $ character */
1954 if (strncmp(name, "$", 1) == 0) {
1957 /* Add the boot environment to the head of the SLIST */
1958 zfs_be = malloc(sizeof(struct zfs_be_entry));
1959 if (zfs_be == NULL) {
1962 zfs_be->name = strdup(name);
1963 if (zfs_be->name == NULL) {
1967 SLIST_INSERT_HEAD(&zfs_be_head, zfs_be, entries);
1976 char envname[32], envval[256];
1977 char *beroot, *pagenum;
1980 beroot = getenv("zfs_be_root");
1981 if (beroot == NULL) {
1985 pagenum = getenv("zfs_be_currpage");
1986 if (pagenum != NULL) {
1987 page = strtol(pagenum, NULL, 10);
1994 zfs_env_index = ZFS_BE_FIRST;
1995 SLIST_FOREACH_SAFE(zfs_be, &zfs_be_head, entries, zfs_be_tmp) {
1996 /* Skip to the requested page number */
1997 if (ctr <= ((ZFS_BE_LAST - ZFS_BE_FIRST + 1) * (page - 1))) {
2002 snprintf(envname, sizeof(envname), "bootenvmenu_caption[%d]", zfs_env_index);
2003 snprintf(envval, sizeof(envval), "%s", zfs_be->name);
2004 rv = setenv(envname, envval, 1);
2009 snprintf(envname, sizeof(envname), "bootenvansi_caption[%d]", zfs_env_index);
2010 rv = setenv(envname, envval, 1);
2015 snprintf(envname, sizeof(envname), "bootenvmenu_command[%d]", zfs_env_index);
2016 rv = setenv(envname, "set_bootenv", 1);
2021 snprintf(envname, sizeof(envname), "bootenv_root[%d]", zfs_env_index);
2022 snprintf(envval, sizeof(envval), "zfs:%s/%s", beroot, zfs_be->name);
2023 rv = setenv(envname, envval, 1);
2029 if (zfs_env_index > ZFS_BE_LAST) {
2035 for (; zfs_env_index <= ZFS_BE_LAST; zfs_env_index++) {
2036 snprintf(envname, sizeof(envname), "bootenvmenu_caption[%d]", zfs_env_index);
2037 (void)unsetenv(envname);
2038 snprintf(envname, sizeof(envname), "bootenvansi_caption[%d]", zfs_env_index);
2039 (void)unsetenv(envname);
2040 snprintf(envname, sizeof(envname), "bootenvmenu_command[%d]", zfs_env_index);
2041 (void)unsetenv(envname);
2042 snprintf(envname, sizeof(envname), "bootenv_root[%d]", zfs_env_index);
2043 (void)unsetenv(envname);