4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
22 * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
23 * Use is subject to license terms.
26 #pragma ident "%Z%%M% %I% %E% SMI"
37 #include <sys/processor.h>
38 #include <sys/zfs_context.h>
40 #include <sys/utsname.h>
43 * Emulation of kernel services in userland.
46 int hz = 119; /* frequency when using gethrtime() >> 23 for lbolt */
48 vnode_t *rootdir = (vnode_t *)0xabcd1234;
51 struct utsname utsname = {
52 "userland", "libzpool", "1", "1", "na"
56 * =========================================================================
58 * =========================================================================
62 zk_thread_create(void (*func)(), void *arg)
66 VERIFY(thr_create(0, 0, (void *(*)(void *))func, arg, THR_DETACHED,
69 return ((void *)(uintptr_t)tid);
73 * =========================================================================
75 * =========================================================================
79 kstat_create(char *module, int instance, char *name, char *class,
80 uchar_t type, ulong_t ndata, uchar_t ks_flag)
87 kstat_install(kstat_t *ksp)
92 kstat_delete(kstat_t *ksp)
96 * =========================================================================
98 * =========================================================================
101 zmutex_init(kmutex_t *mp)
104 (void) _mutex_init(&mp->m_lock, USYNC_THREAD, NULL);
108 zmutex_destroy(kmutex_t *mp)
110 ASSERT(mp->m_owner == NULL);
111 (void) _mutex_destroy(&(mp)->m_lock);
112 mp->m_owner = (void *)-1UL;
116 mutex_enter(kmutex_t *mp)
118 ASSERT(mp->m_owner != (void *)-1UL);
119 ASSERT(mp->m_owner != curthread);
120 VERIFY(mutex_lock(&mp->m_lock) == 0);
121 ASSERT(mp->m_owner == NULL);
122 mp->m_owner = curthread;
126 mutex_tryenter(kmutex_t *mp)
128 ASSERT(mp->m_owner != (void *)-1UL);
129 if (mutex_trylock(&mp->m_lock) == 0) {
130 ASSERT(mp->m_owner == NULL);
131 mp->m_owner = curthread;
139 mutex_exit(kmutex_t *mp)
141 ASSERT(mp->m_owner == curthread);
143 VERIFY(mutex_unlock(&mp->m_lock) == 0);
147 mutex_owner(kmutex_t *mp)
149 return (mp->m_owner);
153 * =========================================================================
155 * =========================================================================
159 rw_init(krwlock_t *rwlp, char *name, int type, void *arg)
161 rwlock_init(&rwlp->rw_lock, USYNC_THREAD, NULL);
162 rwlp->rw_owner = NULL;
167 rw_destroy(krwlock_t *rwlp)
169 rwlock_destroy(&rwlp->rw_lock);
170 rwlp->rw_owner = (void *)-1UL;
175 rw_enter(krwlock_t *rwlp, krw_t rw)
177 //ASSERT(!RW_LOCK_HELD(rwlp));
178 ASSERT(rwlp->rw_owner != (void *)-1UL);
179 ASSERT(rwlp->rw_owner != curthread);
181 if (rw == RW_READER) {
182 (void) rw_rdlock(&rwlp->rw_lock);
183 ASSERT(rwlp->rw_count >= 0);
184 atomic_add_int(&rwlp->rw_count, 1);
186 (void) rw_wrlock(&rwlp->rw_lock);
187 ASSERT(rwlp->rw_count == 0);
189 rwlp->rw_owner = curthread;
194 rw_exit(krwlock_t *rwlp)
196 ASSERT(rwlp->rw_owner != (void *)-1UL);
198 if (rwlp->rw_owner == curthread) {
200 ASSERT(rwlp->rw_count == -1);
202 rwlp->rw_owner = NULL;
205 ASSERT(rwlp->rw_count > 0);
206 atomic_add_int(&rwlp->rw_count, -1);
208 (void) rw_unlock(&rwlp->rw_lock);
212 rw_tryenter(krwlock_t *rwlp, krw_t rw)
216 ASSERT(rwlp->rw_owner != (void *)-1UL);
217 ASSERT(rwlp->rw_owner != curthread);
220 rv = rw_tryrdlock(&rwlp->rw_lock);
222 rv = rw_trywrlock(&rwlp->rw_lock);
225 ASSERT(rwlp->rw_owner == NULL);
226 if (rw == RW_READER) {
227 ASSERT(rwlp->rw_count >= 0);
228 atomic_add_int(&rwlp->rw_count, 1);
230 ASSERT(rwlp->rw_count == 0);
232 rwlp->rw_owner = curthread;
242 rw_tryupgrade(krwlock_t *rwlp)
244 ASSERT(rwlp->rw_owner != (void *)-1UL);
250 rw_lock_held(krwlock_t *rwlp)
253 return (rwlp->rw_count != 0);
257 * =========================================================================
258 * condition variables
259 * =========================================================================
263 cv_init(kcondvar_t *cv, char *name, int type, void *arg)
265 VERIFY(cond_init(cv, name, NULL) == 0);
269 cv_destroy(kcondvar_t *cv)
271 VERIFY(cond_destroy(cv) == 0);
275 cv_wait(kcondvar_t *cv, kmutex_t *mp)
277 ASSERT(mutex_owner(mp) == curthread);
279 int ret = cond_wait(cv, &mp->m_lock);
280 VERIFY(ret == 0 || ret == EINTR);
281 mp->m_owner = curthread;
285 cv_timedwait(kcondvar_t *cv, kmutex_t *mp, clock_t abstime)
298 if (gettimeofday(&tv, NULL) != 0)
299 assert(!"gettimeofday() failed");
301 ts.tv_sec = tv.tv_sec + delta / hz;
302 ts.tv_nsec = tv.tv_usec * 1000 + (delta % hz) * (NANOSEC / hz);
303 ASSERT(ts.tv_nsec >= 0);
305 if(ts.tv_nsec >= NANOSEC) {
307 ts.tv_nsec -= NANOSEC;
310 ASSERT(mutex_owner(mp) == curthread);
312 error = pthread_cond_timedwait(cv, &mp->m_lock, &ts);
313 mp->m_owner = curthread;
318 if (error == ETIMEDOUT)
327 cv_signal(kcondvar_t *cv)
329 VERIFY(cond_signal(cv) == 0);
333 cv_broadcast(kcondvar_t *cv)
335 VERIFY(cond_broadcast(cv) == 0);
339 * =========================================================================
341 * =========================================================================
344 * Note: for the xxxat() versions of these functions, we assume that the
345 * starting vp is always rootdir (which is true for spa_directory.c, the only
346 * ZFS consumer of these interfaces). We assert this is true, and then emulate
347 * them by adding '/' in front of the path.
352 vn_open(char *path, int x1, int flags, int mode, vnode_t **vpp, int x2, int x3)
357 char realpath[MAXPATHLEN];
361 * If we're accessing a real disk from userland, we need to use
362 * the character interface to avoid caching. This is particularly
363 * important if we're trying to look at a real in-kernel storage
364 * pool from userland, e.g. via zdb, because otherwise we won't
365 * see the changes occurring under the segmap cache.
366 * On the other hand, the stupid character device returns zero
367 * for its size. So -- gag -- we open the block device to get
368 * its size, and remember it for subsequent VOP_GETATTR().
370 if (strncmp(path, "/dev/", 5) == 0) {
372 fd = open64(path, O_RDONLY);
375 if (fstat64(fd, &st) == -1) {
380 (void) sprintf(realpath, "%s", path);
381 dsk = strstr(path, "/dsk/");
383 (void) sprintf(realpath + (dsk - path) + 1, "r%s",
386 (void) sprintf(realpath, "%s", path);
387 if (!(flags & FCREAT) && stat64(realpath, &st) == -1)
392 old_umask = umask(0);
395 * The construct 'flags - FREAD' conveniently maps combinations of
396 * FREAD and FWRITE to the corresponding O_RDONLY, O_WRONLY, and O_RDWR.
398 fd = open64(realpath, flags - FREAD, mode);
401 (void) umask(old_umask);
406 if (fstat64(fd, &st) == -1) {
411 (void) fcntl(fd, F_SETFD, FD_CLOEXEC);
413 *vpp = vp = umem_zalloc(sizeof (vnode_t), UMEM_NOFAIL);
416 if (S_ISCHR(st.st_mode))
417 ioctl(fd, DIOCGMEDIASIZE, &vp->v_size);
419 vp->v_size = st.st_size;
420 vp->v_path = spa_strdup(path);
426 vn_openat(char *path, int x1, int flags, int mode, vnode_t **vpp, int x2,
427 int x3, vnode_t *startvp)
429 char *realpath = umem_alloc(strlen(path) + 2, UMEM_NOFAIL);
432 ASSERT(startvp == rootdir);
433 (void) sprintf(realpath, "/%s", path);
435 ret = vn_open(realpath, x1, flags, mode, vpp, x2, x3);
437 umem_free(realpath, strlen(path) + 2);
444 vn_rdwr(int uio, vnode_t *vp, void *addr, ssize_t len, offset_t offset,
445 int x1, int x2, rlim64_t x3, void *x4, ssize_t *residp)
447 ssize_t iolen, split;
449 if (uio == UIO_READ) {
450 iolen = pread64(vp->v_fd, addr, len, offset);
453 * To simulate partial disk writes, we split writes into two
454 * system calls so that the process can be killed in between.
456 split = (len > 0 ? rand() % len : 0);
457 iolen = pwrite64(vp->v_fd, addr, split, offset);
458 iolen += pwrite64(vp->v_fd, (char *)addr + split,
459 len - split, offset + split);
465 *residp = len - iolen;
466 else if (iolen != len)
472 vn_close(vnode_t *vp)
475 spa_strfree(vp->v_path);
476 umem_free(vp, sizeof (vnode_t));
482 * =========================================================================
483 * Figure out which debugging statements to print
484 * =========================================================================
487 static char *dprintf_string;
488 static int dprintf_print_all;
491 dprintf_find_string(const char *string)
493 char *tmp_str = dprintf_string;
494 int len = strlen(string);
497 * Find out if this is a string we want to print.
498 * String format: file1.c,function_name1,file2.c,file3.c
501 while (tmp_str != NULL) {
502 if (strncmp(tmp_str, string, len) == 0 &&
503 (tmp_str[len] == ',' || tmp_str[len] == '\0'))
505 tmp_str = strchr(tmp_str, ',');
507 tmp_str++; /* Get rid of , */
513 dprintf_setup(int *argc, char **argv)
518 * Debugging can be specified two ways: by setting the
519 * environment variable ZFS_DEBUG, or by including a
520 * "debug=..." argument on the command line. The command
521 * line setting overrides the environment variable.
524 for (i = 1; i < *argc; i++) {
525 int len = strlen("debug=");
526 /* First look for a command line argument */
527 if (strncmp("debug=", argv[i], len) == 0) {
528 dprintf_string = argv[i] + len;
529 /* Remove from args */
530 for (j = i; j < *argc; j++)
537 if (dprintf_string == NULL) {
538 /* Look for ZFS_DEBUG environment variable */
539 dprintf_string = getenv("ZFS_DEBUG");
543 * Are we just turning on all debugging?
545 if (dprintf_find_string("on"))
546 dprintf_print_all = 1;
550 * =========================================================================
552 * =========================================================================
555 __dprintf(const char *file, const char *func, int line, const char *fmt, ...)
561 * Get rid of annoying "../common/" prefix to filename.
563 newfile = strrchr(file, '/');
564 if (newfile != NULL) {
565 newfile = newfile + 1; /* Get rid of leading / */
570 if (dprintf_print_all ||
571 dprintf_find_string(newfile) ||
572 dprintf_find_string(func)) {
573 /* Print out just the function name if requested */
575 if (dprintf_find_string("pid"))
576 (void) printf("%d ", getpid());
577 if (dprintf_find_string("tid"))
578 (void) printf("%u ", thr_self());
580 if (dprintf_find_string("cpu"))
581 (void) printf("%u ", getcpuid());
583 if (dprintf_find_string("time"))
584 (void) printf("%llu ", gethrtime());
585 if (dprintf_find_string("long"))
586 (void) printf("%s, line %d: ", newfile, line);
587 (void) printf("%s: ", func);
589 (void) vprintf(fmt, adx);
595 #endif /* ZFS_DEBUG */
598 * =========================================================================
599 * cmn_err() and panic()
600 * =========================================================================
602 static char ce_prefix[CE_IGNORE][10] = { "", "NOTICE: ", "WARNING: ", "" };
603 static char ce_suffix[CE_IGNORE][2] = { "", "\n", "\n", "" };
606 vpanic(const char *fmt, va_list adx)
608 (void) fprintf(stderr, "error: ");
609 (void) vfprintf(stderr, fmt, adx);
610 (void) fprintf(stderr, "\n");
612 abort(); /* think of it as a "user-level crash dump" */
616 panic(const char *fmt, ...)
626 vcmn_err(int ce, const char *fmt, va_list adx)
630 if (ce != CE_NOTE) { /* suppress noise in userland stress testing */
631 (void) fprintf(stderr, "%s", ce_prefix[ce]);
632 (void) vfprintf(stderr, fmt, adx);
633 (void) fprintf(stderr, "%s", ce_suffix[ce]);
639 cmn_err(int ce, const char *fmt, ...)
644 vcmn_err(ce, fmt, adx);
649 * =========================================================================
651 * =========================================================================
654 kobj_open_file(char *name)
659 /* set vp as the _fd field of the file */
660 if (vn_openat(name, UIO_SYSSPACE, FREAD, 0, &vp, 0, 0, rootdir) != 0)
661 return ((void *)-1UL);
663 file = umem_zalloc(sizeof (struct _buf), UMEM_NOFAIL);
664 file->_fd = (intptr_t)vp;
669 kobj_read_file(struct _buf *file, char *buf, unsigned size, unsigned off)
673 vn_rdwr(UIO_READ, (vnode_t *)file->_fd, buf, size, (offset_t)off,
674 UIO_SYSSPACE, 0, 0, 0, &resid);
676 return (size - resid);
680 kobj_close_file(struct _buf *file)
682 vn_close((vnode_t *)file->_fd);
683 umem_free(file, sizeof (struct _buf));
687 kobj_get_filesize(struct _buf *file, uint64_t *size)
690 vnode_t *vp = (vnode_t *)file->_fd;
692 if (fstat64(vp->v_fd, &st) == -1) {
701 * =========================================================================
703 * =========================================================================
709 poll(0, 0, ticks * (1000 / hz));
714 * Find highest one bit set.
715 * Returns bit number + 1 of highest bit that is set, otherwise returns 0.
716 * High order bit is 31 (or 63 in _LP64 kernel).
726 if (i & 0xffffffff00000000ul) {
730 if (i & 0xffff0000) {
750 random_get_bytes_common(uint8_t *ptr, size_t len, char *devname)
752 int fd = open(devname, O_RDONLY);
759 bytes = read(fd, ptr, resid);
771 random_get_bytes(uint8_t *ptr, size_t len)
773 return (random_get_bytes_common(ptr, len, "/dev/random"));
777 random_get_pseudo_bytes(uint8_t *ptr, size_t len)
779 return (random_get_bytes_common(ptr, len, "/dev/urandom"));
783 ddi_strtoul(const char *hw_serial, char **nptr, int base, unsigned long *result)
787 *result = strtoul(hw_serial, &end, base);
794 * =========================================================================
795 * kernel emulation setup & teardown
796 * =========================================================================
799 umem_out_of_memory(void)
801 char errmsg[] = "out of memory -- generating core dump\n";
803 write(fileno(stderr), errmsg, sizeof (errmsg));
809 kernel_init(int mode)
811 umem_nofail_callback(umem_out_of_memory);
813 physmem = sysconf(_SC_PHYS_PAGES);
815 dprintf("physmem = %llu pages (%.2f GB)\n", physmem,
816 (double)physmem * sysconf(_SC_PAGE_SIZE) / (1ULL << 30));
818 snprintf(hw_serial, sizeof (hw_serial), "%ld", gethostid());
830 z_uncompress(void *dst, size_t *dstlen, const void *src, size_t srclen)
833 uLongf len = *dstlen;
835 if ((ret = uncompress(dst, &len, src, srclen)) == Z_OK)
836 *dstlen = (size_t)len;
842 z_compress_level(void *dst, size_t *dstlen, const void *src, size_t srclen,
846 uLongf len = *dstlen;
848 if ((ret = compress2(dst, &len, src, srclen, level)) == Z_OK)
849 *dstlen = (size_t)len;