4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
22 * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
23 * Use is subject to license terms.
26 #pragma ident "%Z%%M% %I% %E% SMI"
29 #include <stdio_ext.h>
32 #include <sys/zfs_context.h>
34 #include <sys/spa_impl.h>
37 #include <sys/fs/zfs.h>
38 #include <sys/zfs_znode.h>
40 #include <sys/vdev_impl.h>
41 #include <sys/metaslab_impl.h>
42 #include <sys/dmu_objset.h>
43 #include <sys/dsl_dir.h>
44 #include <sys/dsl_dataset.h>
45 #include <sys/dsl_pool.h>
48 #include <sys/zil_impl.h>
50 #include <sys/resource.h>
51 #include <sys/dmu_traverse.h>
52 #include <sys/zio_checksum.h>
53 #include <sys/zio_compress.h>
55 const char cmdname[] = "zdb";
56 uint8_t dump_opt[256];
58 typedef void object_viewer_t(objset_t *, uint64_t, void *data, size_t size);
60 extern void dump_intent_log(zilog_t *);
61 uint64_t *zopt_object = NULL;
63 int zdb_advance = ADVANCE_PRE;
64 zbookmark_t zdb_noread = { 0, 0, ZB_NO_LEVEL, 0 };
67 * These libumem hooks provide a reasonable set of defaults for the allocator's
68 * debugging facilities.
73 return ("default,verbose"); /* $UMEM_DEBUG setting */
77 _umem_logging_init(void)
79 return ("fail,contents"); /* $UMEM_LOGGING setting */
85 (void) fprintf(stderr,
86 "Usage: %s [-udibcsvLU] [-O order] [-B os:obj:level:blkid] "
87 "dataset [object...]\n"
90 " %s -R vdev:offset:size:flags\n",
91 cmdname, cmdname, cmdname, cmdname);
93 (void) fprintf(stderr, " -u uberblock\n");
94 (void) fprintf(stderr, " -d datasets\n");
95 (void) fprintf(stderr, " -C cached pool configuration\n");
96 (void) fprintf(stderr, " -i intent logs\n");
97 (void) fprintf(stderr, " -b block statistics\n");
98 (void) fprintf(stderr, " -c checksum all data blocks\n");
99 (void) fprintf(stderr, " -s report stats on zdb's I/O\n");
100 (void) fprintf(stderr, " -v verbose (applies to all others)\n");
101 (void) fprintf(stderr, " -l dump label contents\n");
102 (void) fprintf(stderr, " -L live pool (allows some errors)\n");
103 (void) fprintf(stderr, " -O [!]<pre|post|prune|data|holes> "
104 "visitation order\n");
105 (void) fprintf(stderr, " -U use zpool.cache in /tmp\n");
106 (void) fprintf(stderr, " -B objset:object:level:blkid -- "
107 "simulate bad block\n");
108 (void) fprintf(stderr, " -R read and display block from a"
110 (void) fprintf(stderr, "Specify an option more than once (e.g. -bb) "
111 "to make only that option verbose\n");
112 (void) fprintf(stderr, "Default is to dump everything non-verbosely\n");
117 fatal(const char *fmt, ...)
122 (void) fprintf(stderr, "%s: ", cmdname);
123 (void) vfprintf(stderr, fmt, ap);
125 (void) fprintf(stderr, "\n");
131 dump_nvlist(nvlist_t *list, int indent)
133 nvpair_t *elem = NULL;
135 while ((elem = nvlist_next_nvpair(list, elem)) != NULL) {
136 switch (nvpair_type(elem)) {
137 case DATA_TYPE_STRING:
141 VERIFY(nvpair_value_string(elem, &value) == 0);
142 (void) printf("%*s%s='%s'\n", indent, "",
143 nvpair_name(elem), value);
147 case DATA_TYPE_UINT64:
151 VERIFY(nvpair_value_uint64(elem, &value) == 0);
152 (void) printf("%*s%s=%llu\n", indent, "",
153 nvpair_name(elem), (u_longlong_t)value);
157 case DATA_TYPE_NVLIST:
161 VERIFY(nvpair_value_nvlist(elem, &value) == 0);
162 (void) printf("%*s%s\n", indent, "",
164 dump_nvlist(value, indent + 4);
168 case DATA_TYPE_NVLIST_ARRAY:
173 VERIFY(nvpair_value_nvlist_array(elem, &value,
176 for (c = 0; c < count; c++) {
177 (void) printf("%*s%s[%u]\n", indent, "",
178 nvpair_name(elem), c);
179 dump_nvlist(value[c], indent + 8);
186 (void) printf("bad config type %d for %s\n",
187 nvpair_type(elem), nvpair_name(elem));
194 dump_packed_nvlist(objset_t *os, uint64_t object, void *data, size_t size)
197 size_t nvsize = *(uint64_t *)data;
198 char *packed = umem_alloc(nvsize, UMEM_NOFAIL);
200 VERIFY(0 == dmu_read(os, object, 0, nvsize, packed));
202 VERIFY(nvlist_unpack(packed, nvsize, &nv, 0) == 0);
204 umem_free(packed, nvsize);
211 const char dump_zap_stars[] = "****************************************";
212 const int dump_zap_width = sizeof (dump_zap_stars) - 1;
215 dump_zap_histogram(uint64_t histo[ZAP_HISTOGRAM_SIZE])
218 int minidx = ZAP_HISTOGRAM_SIZE - 1;
222 for (i = 0; i < ZAP_HISTOGRAM_SIZE; i++) {
225 if (histo[i] > 0 && i > maxidx)
227 if (histo[i] > 0 && i < minidx)
231 if (max < dump_zap_width)
232 max = dump_zap_width;
234 for (i = minidx; i <= maxidx; i++)
235 (void) printf("\t\t\t%u: %6llu %s\n", i, (u_longlong_t)histo[i],
236 &dump_zap_stars[(max - histo[i]) * dump_zap_width / max]);
240 dump_zap_stats(objset_t *os, uint64_t object)
245 error = zap_get_stats(os, object, &zs);
249 if (zs.zs_ptrtbl_len == 0) {
250 ASSERT(zs.zs_num_blocks == 1);
251 (void) printf("\tmicrozap: %llu bytes, %llu entries\n",
252 (u_longlong_t)zs.zs_blocksize,
253 (u_longlong_t)zs.zs_num_entries);
257 (void) printf("\tFat ZAP stats:\n");
259 (void) printf("\t\tPointer table:\n");
260 (void) printf("\t\t\t%llu elements\n",
261 (u_longlong_t)zs.zs_ptrtbl_len);
262 (void) printf("\t\t\tzt_blk: %llu\n",
263 (u_longlong_t)zs.zs_ptrtbl_zt_blk);
264 (void) printf("\t\t\tzt_numblks: %llu\n",
265 (u_longlong_t)zs.zs_ptrtbl_zt_numblks);
266 (void) printf("\t\t\tzt_shift: %llu\n",
267 (u_longlong_t)zs.zs_ptrtbl_zt_shift);
268 (void) printf("\t\t\tzt_blks_copied: %llu\n",
269 (u_longlong_t)zs.zs_ptrtbl_blks_copied);
270 (void) printf("\t\t\tzt_nextblk: %llu\n",
271 (u_longlong_t)zs.zs_ptrtbl_nextblk);
273 (void) printf("\t\tZAP entries: %llu\n",
274 (u_longlong_t)zs.zs_num_entries);
275 (void) printf("\t\tLeaf blocks: %llu\n",
276 (u_longlong_t)zs.zs_num_leafs);
277 (void) printf("\t\tTotal blocks: %llu\n",
278 (u_longlong_t)zs.zs_num_blocks);
279 (void) printf("\t\tzap_block_type: 0x%llx\n",
280 (u_longlong_t)zs.zs_block_type);
281 (void) printf("\t\tzap_magic: 0x%llx\n",
282 (u_longlong_t)zs.zs_magic);
283 (void) printf("\t\tzap_salt: 0x%llx\n",
284 (u_longlong_t)zs.zs_salt);
286 (void) printf("\t\tLeafs with 2^n pointers:\n");
287 dump_zap_histogram(zs.zs_leafs_with_2n_pointers);
289 (void) printf("\t\tBlocks with n*5 entries:\n");
290 dump_zap_histogram(zs.zs_blocks_with_n5_entries);
292 (void) printf("\t\tBlocks n/10 full:\n");
293 dump_zap_histogram(zs.zs_blocks_n_tenths_full);
295 (void) printf("\t\tEntries with n chunks:\n");
296 dump_zap_histogram(zs.zs_entries_using_n_chunks);
298 (void) printf("\t\tBuckets with n entries:\n");
299 dump_zap_histogram(zs.zs_buckets_with_n_entries);
304 dump_none(objset_t *os, uint64_t object, void *data, size_t size)
310 dump_uint8(objset_t *os, uint64_t object, void *data, size_t size)
316 dump_uint64(objset_t *os, uint64_t object, void *data, size_t size)
322 dump_zap(objset_t *os, uint64_t object, void *data, size_t size)
325 zap_attribute_t attr;
329 dump_zap_stats(os, object);
332 for (zap_cursor_init(&zc, os, object);
333 zap_cursor_retrieve(&zc, &attr) == 0;
334 zap_cursor_advance(&zc)) {
335 (void) printf("\t\t%s = ", attr.za_name);
336 if (attr.za_num_integers == 0) {
340 prop = umem_zalloc(attr.za_num_integers *
341 attr.za_integer_length, UMEM_NOFAIL);
342 (void) zap_lookup(os, object, attr.za_name,
343 attr.za_integer_length, attr.za_num_integers, prop);
344 if (attr.za_integer_length == 1) {
345 (void) printf("%s", (char *)prop);
347 for (i = 0; i < attr.za_num_integers; i++) {
348 switch (attr.za_integer_length) {
351 ((uint16_t *)prop)[i]);
355 ((uint32_t *)prop)[i]);
358 (void) printf("%lld ",
359 (u_longlong_t)((int64_t *)prop)[i]);
365 umem_free(prop, attr.za_num_integers * attr.za_integer_length);
367 zap_cursor_fini(&zc);
371 dump_spacemap(objset_t *os, space_map_obj_t *smo, space_map_t *sm)
373 uint64_t alloc, offset, entry;
374 uint8_t mapshift = sm->sm_shift;
375 uint64_t mapstart = sm->sm_start;
376 char *ddata[] = { "ALLOC", "FREE", "CONDENSE", "INVALID",
377 "INVALID", "INVALID", "INVALID", "INVALID" };
379 if (smo->smo_object == 0)
383 * Print out the freelist entries in both encoded and decoded form.
386 for (offset = 0; offset < smo->smo_objsize; offset += sizeof (entry)) {
387 VERIFY(0 == dmu_read(os, smo->smo_object, offset,
388 sizeof (entry), &entry));
389 if (SM_DEBUG_DECODE(entry)) {
390 (void) printf("\t\t[%4llu] %s: txg %llu, pass %llu\n",
391 (u_longlong_t)(offset / sizeof (entry)),
392 ddata[SM_DEBUG_ACTION_DECODE(entry)],
393 (u_longlong_t)SM_DEBUG_TXG_DECODE(entry),
394 (u_longlong_t)SM_DEBUG_SYNCPASS_DECODE(entry));
396 (void) printf("\t\t[%4llu] %c range:"
397 " %08llx-%08llx size: %06llx\n",
398 (u_longlong_t)(offset / sizeof (entry)),
399 SM_TYPE_DECODE(entry) == SM_ALLOC ? 'A' : 'F',
400 (u_longlong_t)((SM_OFFSET_DECODE(entry) <<
401 mapshift) + mapstart),
402 (u_longlong_t)((SM_OFFSET_DECODE(entry) <<
403 mapshift) + mapstart + (SM_RUN_DECODE(entry) <<
405 (u_longlong_t)(SM_RUN_DECODE(entry) << mapshift));
406 if (SM_TYPE_DECODE(entry) == SM_ALLOC)
407 alloc += SM_RUN_DECODE(entry) << mapshift;
409 alloc -= SM_RUN_DECODE(entry) << mapshift;
412 if (alloc != smo->smo_alloc) {
413 (void) printf("space_map_object alloc (%llu) INCONSISTENT "
414 "with space map summary (%llu)\n",
415 (u_longlong_t)smo->smo_alloc, (u_longlong_t)alloc);
420 dump_metaslab(metaslab_t *msp)
423 space_map_obj_t *smo = &msp->ms_smo;
424 vdev_t *vd = msp->ms_group->mg_vd;
425 spa_t *spa = vd->vdev_spa;
427 nicenum(msp->ms_map.sm_size - smo->smo_alloc, freebuf);
429 if (dump_opt['d'] <= 5) {
430 (void) printf("\t%10llx %10llu %5s\n",
431 (u_longlong_t)msp->ms_map.sm_start,
432 (u_longlong_t)smo->smo_object,
438 "\tvdev %llu offset %08llx spacemap %4llu free %5s\n",
439 (u_longlong_t)vd->vdev_id, (u_longlong_t)msp->ms_map.sm_start,
440 (u_longlong_t)smo->smo_object, freebuf);
442 ASSERT(msp->ms_map.sm_size == (1ULL << vd->vdev_ms_shift));
444 dump_spacemap(spa->spa_meta_objset, smo, &msp->ms_map);
448 dump_metaslabs(spa_t *spa)
450 vdev_t *rvd = spa->spa_root_vdev;
454 (void) printf("\nMetaslabs:\n");
456 for (c = 0; c < rvd->vdev_children; c++) {
457 vd = rvd->vdev_child[c];
459 spa_config_enter(spa, RW_READER, FTAG);
460 (void) printf("\n vdev %llu = %s\n\n",
461 (u_longlong_t)vd->vdev_id, vdev_description(vd));
462 spa_config_exit(spa, FTAG);
464 if (dump_opt['d'] <= 5) {
465 (void) printf("\t%10s %10s %5s\n",
466 "offset", "spacemap", "free");
467 (void) printf("\t%10s %10s %5s\n",
468 "------", "--------", "----");
470 for (m = 0; m < vd->vdev_ms_count; m++)
471 dump_metaslab(vd->vdev_ms[m]);
477 dump_dtl(vdev_t *vd, int indent)
479 avl_tree_t *t = &vd->vdev_dtl_map.sm_root;
480 spa_t *spa = vd->vdev_spa;
486 (void) printf("\nDirty time logs:\n\n");
488 spa_config_enter(spa, RW_READER, FTAG);
489 (void) printf("\t%*s%s\n", indent, "", vdev_description(vd));
490 spa_config_exit(spa, FTAG);
492 for (ss = avl_first(t); ss; ss = AVL_NEXT(t, ss)) {
494 * Everything in this DTL must appear in all parent DTL unions.
496 for (pvd = vd; pvd; pvd = pvd->vdev_parent)
497 ASSERT(vdev_dtl_contains(&pvd->vdev_dtl_map,
498 ss->ss_start, ss->ss_end - ss->ss_start));
499 (void) printf("\t%*soutage [%llu,%llu] length %llu\n",
501 (u_longlong_t)ss->ss_start,
502 (u_longlong_t)ss->ss_end - 1,
503 (u_longlong_t)(ss->ss_end - ss->ss_start));
508 if (dump_opt['d'] > 5 && vd->vdev_children == 0) {
509 dump_spacemap(vd->vdev_spa->spa_meta_objset, &vd->vdev_dtl,
514 for (c = 0; c < vd->vdev_children; c++)
515 dump_dtl(vd->vdev_child[c], indent + 4);
520 dump_dnode(objset_t *os, uint64_t object, void *data, size_t size)
525 blkid2offset(dnode_phys_t *dnp, int level, uint64_t blkid)
530 return ((blkid << (level * (dnp->dn_indblkshift - SPA_BLKPTRSHIFT))) *
531 dnp->dn_datablkszsec << SPA_MINBLOCKSHIFT);
535 sprintf_blkptr_compact(char *blkbuf, blkptr_t *bp, int alldvas)
537 dva_t *dva = bp->blk_dva;
538 int ndvas = alldvas ? BP_GET_NDVAS(bp) : 1;
543 for (i = 0; i < ndvas; i++)
544 (void) sprintf(blkbuf + strlen(blkbuf), "%llu:%llx:%llx ",
545 (u_longlong_t)DVA_GET_VDEV(&dva[i]),
546 (u_longlong_t)DVA_GET_OFFSET(&dva[i]),
547 (u_longlong_t)DVA_GET_ASIZE(&dva[i]));
549 (void) sprintf(blkbuf + strlen(blkbuf), "%llxL/%llxP F=%llu B=%llu",
550 (u_longlong_t)BP_GET_LSIZE(bp),
551 (u_longlong_t)BP_GET_PSIZE(bp),
552 (u_longlong_t)bp->blk_fill,
553 (u_longlong_t)bp->blk_birth);
558 zdb_indirect_cb(traverse_blk_cache_t *bc, spa_t *spa, void *a)
560 zbookmark_t *zb = &bc->bc_bookmark;
561 blkptr_t *bp = &bc->bc_blkptr;
562 void *data = bc->bc_data;
563 dnode_phys_t *dnp = bc->bc_dnode;
564 char blkbuf[BP_SPRINTF_LEN + 80];
568 (void) sprintf(blkbuf,
569 "Error %d reading <%llu, %llu, %lld, %llu>: ",
571 (u_longlong_t)zb->zb_objset,
572 (u_longlong_t)zb->zb_object,
573 (u_longlong_t)zb->zb_level,
574 (u_longlong_t)zb->zb_blkid);
578 if (zb->zb_level == -1) {
579 ASSERT3U(BP_GET_TYPE(bp), ==, DMU_OT_OBJSET);
580 ASSERT3U(BP_GET_LEVEL(bp), ==, 0);
582 ASSERT3U(BP_GET_TYPE(bp), ==, dnp->dn_type);
583 ASSERT3U(BP_GET_LEVEL(bp), ==, zb->zb_level);
586 if (zb->zb_level > 0) {
588 blkptr_t *bpx, *bpend;
590 for (bpx = data, bpend = bpx + BP_GET_LSIZE(bp) / sizeof (*bpx);
591 bpx < bpend; bpx++) {
592 if (bpx->blk_birth != 0) {
593 fill += bpx->blk_fill;
595 ASSERT(bpx->blk_fill == 0);
598 ASSERT3U(fill, ==, bp->blk_fill);
601 if (zb->zb_level == 0 && dnp->dn_type == DMU_OT_DNODE) {
603 dnode_phys_t *dnx, *dnend;
605 for (dnx = data, dnend = dnx + (BP_GET_LSIZE(bp)>>DNODE_SHIFT);
606 dnx < dnend; dnx++) {
607 if (dnx->dn_type != DMU_OT_NONE)
610 ASSERT3U(fill, ==, bp->blk_fill);
613 (void) sprintf(blkbuf, "%16llx ",
614 (u_longlong_t)blkid2offset(dnp, zb->zb_level, zb->zb_blkid));
616 ASSERT(zb->zb_level >= 0);
618 for (l = dnp->dn_nlevels - 1; l >= -1; l--) {
619 if (l == zb->zb_level) {
620 (void) sprintf(blkbuf + strlen(blkbuf), "L%llx",
621 (u_longlong_t)zb->zb_level);
623 (void) sprintf(blkbuf + strlen(blkbuf), " ");
628 if (bp->blk_birth == 0) {
629 (void) sprintf(blkbuf + strlen(blkbuf), "<hole>");
630 (void) printf("%s\n", blkbuf);
632 sprintf_blkptr_compact(blkbuf + strlen(blkbuf), bp,
633 dump_opt['d'] > 5 ? 1 : 0);
634 (void) printf("%s\n", blkbuf);
637 return (bc->bc_errno ? ERESTART : 0);
642 dump_indirect(objset_t *os, uint64_t object, void *data, size_t size)
644 traverse_handle_t *th;
645 uint64_t objset = dmu_objset_id(os);
646 int advance = zdb_advance;
648 (void) printf("Indirect blocks:\n");
651 advance |= ADVANCE_DATA;
653 th = traverse_init(dmu_objset_spa(os), zdb_indirect_cb, NULL, advance,
655 th->th_noread = zdb_noread;
657 traverse_add_dnode(th, 0, -1ULL, objset, object);
659 while (traverse_more(th) == EAGAIN)
669 dump_dsl_dir(objset_t *os, uint64_t object, void *data, size_t size)
671 dsl_dir_phys_t *dd = data;
673 char used[6], compressed[6], uncompressed[6], quota[6], resv[6];
678 ASSERT(size == sizeof (*dd));
680 crtime = dd->dd_creation_time;
681 nicenum(dd->dd_used_bytes, used);
682 nicenum(dd->dd_compressed_bytes, compressed);
683 nicenum(dd->dd_uncompressed_bytes, uncompressed);
684 nicenum(dd->dd_quota, quota);
685 nicenum(dd->dd_reserved, resv);
687 (void) printf("\t\tcreation_time = %s", ctime(&crtime));
688 (void) printf("\t\thead_dataset_obj = %llu\n",
689 (u_longlong_t)dd->dd_head_dataset_obj);
690 (void) printf("\t\tparent_dir_obj = %llu\n",
691 (u_longlong_t)dd->dd_parent_obj);
692 (void) printf("\t\tclone_parent_obj = %llu\n",
693 (u_longlong_t)dd->dd_clone_parent_obj);
694 (void) printf("\t\tchild_dir_zapobj = %llu\n",
695 (u_longlong_t)dd->dd_child_dir_zapobj);
696 (void) printf("\t\tused_bytes = %s\n", used);
697 (void) printf("\t\tcompressed_bytes = %s\n", compressed);
698 (void) printf("\t\tuncompressed_bytes = %s\n", uncompressed);
699 (void) printf("\t\tquota = %s\n", quota);
700 (void) printf("\t\treserved = %s\n", resv);
701 (void) printf("\t\tprops_zapobj = %llu\n",
702 (u_longlong_t)dd->dd_props_zapobj);
707 dump_dsl_dataset(objset_t *os, uint64_t object, void *data, size_t size)
709 dsl_dataset_phys_t *ds = data;
711 char used[6], compressed[6], uncompressed[6], unique[6];
712 char blkbuf[BP_SPRINTF_LEN];
717 ASSERT(size == sizeof (*ds));
718 crtime = ds->ds_creation_time;
719 nicenum(ds->ds_used_bytes, used);
720 nicenum(ds->ds_compressed_bytes, compressed);
721 nicenum(ds->ds_uncompressed_bytes, uncompressed);
722 nicenum(ds->ds_unique_bytes, unique);
723 sprintf_blkptr(blkbuf, BP_SPRINTF_LEN, &ds->ds_bp);
725 (void) printf("\t\tdataset_obj = %llu\n",
726 (u_longlong_t)ds->ds_dir_obj);
727 (void) printf("\t\tprev_snap_obj = %llu\n",
728 (u_longlong_t)ds->ds_prev_snap_obj);
729 (void) printf("\t\tprev_snap_txg = %llu\n",
730 (u_longlong_t)ds->ds_prev_snap_txg);
731 (void) printf("\t\tnext_snap_obj = %llu\n",
732 (u_longlong_t)ds->ds_next_snap_obj);
733 (void) printf("\t\tsnapnames_zapobj = %llu\n",
734 (u_longlong_t)ds->ds_snapnames_zapobj);
735 (void) printf("\t\tnum_children = %llu\n",
736 (u_longlong_t)ds->ds_num_children);
737 (void) printf("\t\tcreation_time = %s", ctime(&crtime));
738 (void) printf("\t\tcreation_txg = %llu\n",
739 (u_longlong_t)ds->ds_creation_txg);
740 (void) printf("\t\tdeadlist_obj = %llu\n",
741 (u_longlong_t)ds->ds_deadlist_obj);
742 (void) printf("\t\tused_bytes = %s\n", used);
743 (void) printf("\t\tcompressed_bytes = %s\n", compressed);
744 (void) printf("\t\tuncompressed_bytes = %s\n", uncompressed);
745 (void) printf("\t\tunique = %s\n", unique);
746 (void) printf("\t\tfsid_guid = %llu\n",
747 (u_longlong_t)ds->ds_fsid_guid);
748 (void) printf("\t\tguid = %llu\n",
749 (u_longlong_t)ds->ds_guid);
750 (void) printf("\t\tflags = %llx\n",
751 (u_longlong_t)ds->ds_flags);
752 (void) printf("\t\tbp = %s\n", blkbuf);
756 dump_bplist(objset_t *mos, uint64_t object, char *name)
758 bplist_t bpl = { 0 };
759 blkptr_t blk, *bp = &blk;
765 if (dump_opt['d'] < 3)
768 VERIFY(0 == bplist_open(&bpl, mos, object));
769 if (bplist_empty(&bpl)) {
774 nicenum(bpl.bpl_phys->bpl_bytes, bytes);
775 if (bpl.bpl_dbuf->db_size == sizeof (bplist_phys_t)) {
776 nicenum(bpl.bpl_phys->bpl_comp, comp);
777 nicenum(bpl.bpl_phys->bpl_uncomp, uncomp);
778 (void) printf("\n %s: %llu entries, %s (%s/%s comp)\n",
779 name, (u_longlong_t)bpl.bpl_phys->bpl_entries,
780 bytes, comp, uncomp);
782 (void) printf("\n %s: %llu entries, %s\n",
783 name, (u_longlong_t)bpl.bpl_phys->bpl_entries, bytes);
786 if (dump_opt['d'] < 5) {
793 while (bplist_iterate(&bpl, &itor, bp) == 0) {
794 char blkbuf[BP_SPRINTF_LEN];
796 ASSERT(bp->blk_birth != 0);
797 sprintf_blkptr_compact(blkbuf, bp, dump_opt['d'] > 5 ? 1 : 0);
798 (void) printf("\tItem %3llu: %s\n",
799 (u_longlong_t)itor - 1, blkbuf);
807 dump_znode(objset_t *os, uint64_t object, void *data, size_t size)
809 znode_phys_t *zp = data;
810 time_t z_crtime, z_atime, z_mtime, z_ctime;
811 char path[MAXPATHLEN * 2]; /* allow for xattr and failure prefix */
814 ASSERT(size >= sizeof (znode_phys_t));
816 error = zfs_obj_to_path(os, object, path, sizeof (path));
818 (void) snprintf(path, sizeof (path), "\?\?\?<object#%llu>",
819 (u_longlong_t)object);
822 if (dump_opt['d'] < 3) {
823 (void) printf("\t%s\n", path);
827 z_crtime = (time_t)zp->zp_crtime[0];
828 z_atime = (time_t)zp->zp_atime[0];
829 z_mtime = (time_t)zp->zp_mtime[0];
830 z_ctime = (time_t)zp->zp_ctime[0];
832 (void) printf("\tpath %s\n", path);
833 (void) printf("\tatime %s", ctime(&z_atime));
834 (void) printf("\tmtime %s", ctime(&z_mtime));
835 (void) printf("\tctime %s", ctime(&z_ctime));
836 (void) printf("\tcrtime %s", ctime(&z_crtime));
837 (void) printf("\tgen %llu\n", (u_longlong_t)zp->zp_gen);
838 (void) printf("\tmode %llo\n", (u_longlong_t)zp->zp_mode);
839 (void) printf("\tsize %llu\n", (u_longlong_t)zp->zp_size);
840 (void) printf("\tparent %llu\n", (u_longlong_t)zp->zp_parent);
841 (void) printf("\tlinks %llu\n", (u_longlong_t)zp->zp_links);
842 (void) printf("\txattr %llu\n", (u_longlong_t)zp->zp_xattr);
843 (void) printf("\trdev 0x%016llx\n", (u_longlong_t)zp->zp_rdev);
848 dump_acl(objset_t *os, uint64_t object, void *data, size_t size)
854 dump_dmu_objset(objset_t *os, uint64_t object, void *data, size_t size)
858 static object_viewer_t *object_viewer[DMU_OT_NUMTYPES] = {
859 dump_none, /* unallocated */
860 dump_zap, /* object directory */
861 dump_uint64, /* object array */
862 dump_none, /* packed nvlist */
863 dump_packed_nvlist, /* packed nvlist size */
864 dump_none, /* bplist */
865 dump_none, /* bplist header */
866 dump_none, /* SPA space map header */
867 dump_none, /* SPA space map */
868 dump_none, /* ZIL intent log */
869 dump_dnode, /* DMU dnode */
870 dump_dmu_objset, /* DMU objset */
871 dump_dsl_dir, /* DSL directory */
872 dump_zap, /* DSL directory child map */
873 dump_zap, /* DSL dataset snap map */
874 dump_zap, /* DSL props */
875 dump_dsl_dataset, /* DSL dataset */
876 dump_znode, /* ZFS znode */
877 dump_acl, /* ZFS ACL */
878 dump_uint8, /* ZFS plain file */
879 dump_zap, /* ZFS directory */
880 dump_zap, /* ZFS master node */
881 dump_zap, /* ZFS delete queue */
882 dump_uint8, /* zvol object */
883 dump_zap, /* zvol prop */
884 dump_uint8, /* other uint8[] */
885 dump_uint64, /* other uint64[] */
886 dump_zap, /* other ZAP */
887 dump_zap, /* persistent error log */
888 dump_uint8, /* SPA history */
889 dump_uint64, /* SPA history offsets */
890 dump_zap, /* Pool properties */
894 dump_object(objset_t *os, uint64_t object, int verbosity, int *print_header)
896 dmu_buf_t *db = NULL;
897 dmu_object_info_t doi;
901 char iblk[6], dblk[6], lsize[6], asize[6], bonus_size[6], segsize[6];
906 (void) printf("\n Object lvl iblk dblk lsize"
912 dn = os->os->os_meta_dnode;
914 error = dmu_bonus_hold(os, object, FTAG, &db);
916 fatal("dmu_bonus_hold(%llu) failed, errno %u",
920 dn = ((dmu_buf_impl_t *)db)->db_dnode;
922 dmu_object_info_from_dnode(dn, &doi);
924 nicenum(doi.doi_metadata_block_size, iblk);
925 nicenum(doi.doi_data_block_size, dblk);
926 nicenum(doi.doi_data_block_size * (doi.doi_max_block_offset + 1),
928 nicenum(doi.doi_physical_blks << 9, asize);
929 nicenum(doi.doi_bonus_size, bonus_size);
933 if (doi.doi_checksum != ZIO_CHECKSUM_INHERIT || verbosity >= 6)
934 (void) snprintf(aux + strlen(aux), sizeof (aux), " (K=%s)",
935 zio_checksum_table[doi.doi_checksum].ci_name);
937 if (doi.doi_compress != ZIO_COMPRESS_INHERIT || verbosity >= 6)
938 (void) snprintf(aux + strlen(aux), sizeof (aux), " (Z=%s)",
939 zio_compress_table[doi.doi_compress].ci_name);
941 (void) printf("%10lld %3u %5s %5s %5s %5s %s%s\n",
942 (u_longlong_t)object, doi.doi_indirection, iblk, dblk, lsize,
943 asize, dmu_ot[doi.doi_type].ot_name, aux);
945 if (doi.doi_bonus_type != DMU_OT_NONE && verbosity > 3) {
946 (void) printf("%10s %3s %5s %5s %5s %5s %s\n",
947 "", "", "", "", bonus_size, "bonus",
948 dmu_ot[doi.doi_bonus_type].ot_name);
951 if (verbosity >= 4) {
952 object_viewer[doi.doi_bonus_type](os, object, bonus, bsize);
953 object_viewer[doi.doi_type](os, object, NULL, 0);
958 dump_indirect(os, object, NULL, 0);
960 if (verbosity >= 5) {
962 * Report the list of segments that comprise the object.
966 uint64_t blkfill = 1;
969 if (dn->dn_type == DMU_OT_DNODE) {
971 blkfill = DNODES_PER_BLOCK;
975 error = dnode_next_offset(dn, B_FALSE, &start, minlvl,
980 error = dnode_next_offset(dn, B_TRUE, &end, minlvl,
982 nicenum(end - start, segsize);
983 (void) printf("\t\tsegment [%016llx, %016llx)"
984 " size %5s\n", (u_longlong_t)start,
985 (u_longlong_t)end, segsize);
993 dmu_buf_rele(db, FTAG);
996 static char *objset_types[DMU_OST_NUMTYPES] = {
997 "NONE", "META", "ZPL", "ZVOL", "OTHER", "ANY" };
1001 dump_dir(objset_t *os)
1003 dmu_objset_stats_t dds;
1004 uint64_t object, object_count;
1005 uint64_t refdbytes, usedobjs, scratch;
1007 char blkbuf[BP_SPRINTF_LEN];
1008 char osname[MAXNAMELEN];
1009 char *type = "UNKNOWN";
1010 int verbosity = dump_opt['d'];
1011 int print_header = 1;
1014 dmu_objset_fast_stat(os, &dds);
1016 if (dds.dds_type < DMU_OST_NUMTYPES)
1017 type = objset_types[dds.dds_type];
1019 if (dds.dds_type == DMU_OST_META) {
1020 dds.dds_creation_txg = TXG_INITIAL;
1021 usedobjs = os->os->os_rootbp->blk_fill;
1023 os->os->os_spa->spa_dsl_pool->dp_mos_dir->dd_used_bytes;
1025 dmu_objset_space(os, &refdbytes, &scratch, &usedobjs, &scratch);
1028 ASSERT3U(usedobjs, ==, os->os->os_rootbp->blk_fill);
1030 nicenum(refdbytes, numbuf);
1032 if (verbosity >= 4) {
1033 (void) strcpy(blkbuf, ", rootbp ");
1034 sprintf_blkptr(blkbuf + strlen(blkbuf),
1035 BP_SPRINTF_LEN - strlen(blkbuf), os->os->os_rootbp);
1040 dmu_objset_name(os, osname);
1042 (void) printf("Dataset %s [%s], ID %llu, cr_txg %llu, "
1043 "%s, %llu objects%s\n",
1044 osname, type, (u_longlong_t)dmu_objset_id(os),
1045 (u_longlong_t)dds.dds_creation_txg,
1046 numbuf, (u_longlong_t)usedobjs, blkbuf);
1048 dump_intent_log(dmu_objset_zil(os));
1050 if (dmu_objset_ds(os) != NULL)
1051 dump_bplist(dmu_objset_pool(os)->dp_meta_objset,
1052 dmu_objset_ds(os)->ds_phys->ds_deadlist_obj, "Deadlist");
1057 if (zopt_objects != 0) {
1058 for (i = 0; i < zopt_objects; i++)
1059 dump_object(os, zopt_object[i], verbosity,
1061 (void) printf("\n");
1065 dump_object(os, 0, verbosity, &print_header);
1069 while ((error = dmu_object_next(os, &object, B_FALSE, 0)) == 0) {
1070 dump_object(os, object, verbosity, &print_header);
1074 ASSERT3U(object_count, ==, usedobjs);
1076 (void) printf("\n");
1079 fatal("dmu_object_next() = %d", error);
1083 dump_uberblock(uberblock_t *ub)
1085 time_t timestamp = ub->ub_timestamp;
1087 (void) printf("Uberblock\n\n");
1088 (void) printf("\tmagic = %016llx\n", (u_longlong_t)ub->ub_magic);
1089 (void) printf("\tversion = %llu\n", (u_longlong_t)ub->ub_version);
1090 (void) printf("\ttxg = %llu\n", (u_longlong_t)ub->ub_txg);
1091 (void) printf("\tguid_sum = %llu\n", (u_longlong_t)ub->ub_guid_sum);
1092 (void) printf("\ttimestamp = %llu UTC = %s",
1093 (u_longlong_t)ub->ub_timestamp, asctime(localtime(×tamp)));
1094 if (dump_opt['u'] >= 3) {
1095 char blkbuf[BP_SPRINTF_LEN];
1096 sprintf_blkptr(blkbuf, BP_SPRINTF_LEN, &ub->ub_rootbp);
1097 (void) printf("\trootbp = %s\n", blkbuf);
1099 (void) printf("\n");
1103 dump_config(const char *pool)
1107 mutex_enter(&spa_namespace_lock);
1108 while ((spa = spa_next(spa)) != NULL) {
1110 (void) printf("%s\n", spa_name(spa));
1111 if (pool == NULL || strcmp(pool, spa_name(spa)) == 0)
1112 dump_nvlist(spa->spa_config, 4);
1114 mutex_exit(&spa_namespace_lock);
1118 dump_label(const char *dev)
1122 char *buf = label.vl_vdev_phys.vp_nvlist;
1123 size_t buflen = sizeof (label.vl_vdev_phys.vp_nvlist);
1124 struct stat64 statbuf;
1128 if ((fd = open64(dev, O_RDONLY)) < 0) {
1129 (void) printf("cannot open '%s': %s\n", dev, strerror(errno));
1133 if (fstat64(fd, &statbuf) != 0) {
1134 (void) printf("failed to stat '%s': %s\n", dev,
1139 if (S_ISCHR(statbuf.st_mode)) {
1140 if (ioctl(fd, DIOCGMEDIASIZE, &psize) != 0) {
1141 (void) printf("failed to get size '%s': %s\n", dev,
1146 psize = statbuf.st_size;
1148 psize = P2ALIGN(psize, (uint64_t)sizeof (vdev_label_t));
1150 for (l = 0; l < VDEV_LABELS; l++) {
1152 nvlist_t *config = NULL;
1154 (void) printf("--------------------------------------------\n");
1155 (void) printf("LABEL %d\n", l);
1156 (void) printf("--------------------------------------------\n");
1158 if (pread64(fd, &label, sizeof (label),
1159 vdev_label_offset(psize, l, 0)) != sizeof (label)) {
1160 (void) printf("failed to read label %d\n", l);
1164 if (nvlist_unpack(buf, buflen, &config, 0) != 0) {
1165 (void) printf("failed to unpack label %d\n", l);
1168 dump_nvlist(config, 4);
1169 nvlist_free(config);
1175 dump_one_dir(char *dsname, void *arg)
1180 error = dmu_objset_open(dsname, DMU_OST_ANY,
1181 DS_MODE_STANDARD | DS_MODE_READONLY, &os);
1183 (void) printf("Could not open %s\n", dsname);
1187 dmu_objset_close(os);
1192 zdb_space_map_load(spa_t *spa)
1194 vdev_t *rvd = spa->spa_root_vdev;
1198 for (c = 0; c < rvd->vdev_children; c++) {
1199 vd = rvd->vdev_child[c];
1200 for (m = 0; m < vd->vdev_ms_count; m++) {
1201 metaslab_t *msp = vd->vdev_ms[m];
1202 mutex_enter(&msp->ms_lock);
1203 error = space_map_load(&msp->ms_allocmap[0], NULL,
1204 SM_ALLOC, &msp->ms_smo, spa->spa_meta_objset);
1205 mutex_exit(&msp->ms_lock);
1207 fatal("%s bad space map #%d, error %d",
1208 spa->spa_name, c, error);
1214 zdb_space_map_claim(spa_t *spa, blkptr_t *bp, zbookmark_t *zb)
1216 dva_t *dva = bp->blk_dva;
1219 space_map_t *allocmap, *freemap;
1224 for (d = 0; d < BP_GET_NDVAS(bp); d++) {
1225 uint64_t vdev = DVA_GET_VDEV(&dva[d]);
1226 uint64_t offset = DVA_GET_OFFSET(&dva[d]);
1227 uint64_t size = DVA_GET_ASIZE(&dva[d]);
1229 if ((vd = vdev_lookup_top(spa, vdev)) == NULL)
1232 if ((offset >> vd->vdev_ms_shift) >= vd->vdev_ms_count)
1235 msp = vd->vdev_ms[offset >> vd->vdev_ms_shift];
1236 allocmap = &msp->ms_allocmap[0];
1237 freemap = &msp->ms_freemap[0];
1239 /* Prepare our copy of the bp in case we need to read GBHs */
1240 if (DVA_GET_GANG(&dva[d])) {
1241 size = vdev_psize_to_asize(vd, SPA_GANGBLOCKSIZE);
1242 DVA_SET_ASIZE(&blk.blk_dva[d], size);
1243 DVA_SET_GANG(&blk.blk_dva[d], 0);
1246 mutex_enter(&msp->ms_lock);
1247 if (space_map_contains(freemap, offset, size)) {
1248 mutex_exit(&msp->ms_lock);
1249 return (EAGAIN); /* allocated more than once */
1252 if (!space_map_contains(allocmap, offset, size)) {
1253 mutex_exit(&msp->ms_lock);
1254 return (ESTALE); /* not allocated at all */
1257 space_map_remove(allocmap, offset, size);
1258 space_map_add(freemap, offset, size);
1260 mutex_exit(&msp->ms_lock);
1263 if (BP_IS_GANG(bp)) {
1267 /* LINTED - compile time assert */
1268 ASSERT(sizeof (zio_gbh_phys_t) == SPA_GANGBLOCKSIZE);
1270 BP_SET_CHECKSUM(&blk, ZIO_CHECKSUM_GANG_HEADER);
1271 BP_SET_PSIZE(&blk, SPA_GANGBLOCKSIZE);
1272 BP_SET_LSIZE(&blk, SPA_GANGBLOCKSIZE);
1273 BP_SET_COMPRESS(&blk, ZIO_COMPRESS_OFF);
1274 error = zio_wait(zio_read(NULL, spa, &blk, &gbh,
1275 SPA_GANGBLOCKSIZE, NULL, NULL, ZIO_PRIORITY_SYNC_READ,
1276 ZIO_FLAG_CANFAIL | ZIO_FLAG_CONFIG_HELD, zb));
1279 if (BP_SHOULD_BYTESWAP(&blk))
1280 byteswap_uint64_array(&gbh, SPA_GANGBLOCKSIZE);
1281 for (g = 0; g < SPA_GBH_NBLKPTRS; g++) {
1282 if (BP_IS_HOLE(&gbh.zg_blkptr[g]))
1284 error = zdb_space_map_claim(spa, &gbh.zg_blkptr[g], zb);
1294 zdb_leak(space_map_t *sm, uint64_t start, uint64_t size)
1299 msp = (metaslab_t *)((char *)sm - offsetof(metaslab_t, ms_allocmap[0]));
1301 (void) printf("leaked space: vdev %llu, offset 0x%llx, size %llu\n",
1302 (u_longlong_t)msp->ms_group->mg_vd->vdev_id,
1303 (u_longlong_t)start,
1304 (u_longlong_t)size);
1308 zdb_space_map_unload(spa_t *spa)
1310 vdev_t *rvd = spa->spa_root_vdev;
1314 for (c = 0; c < rvd->vdev_children; c++) {
1315 vd = rvd->vdev_child[c];
1316 for (m = 0; m < vd->vdev_ms_count; m++) {
1317 metaslab_t *msp = vd->vdev_ms[m];
1318 mutex_enter(&msp->ms_lock);
1319 space_map_vacate(&msp->ms_allocmap[0], zdb_leak,
1320 &msp->ms_allocmap[0]);
1321 space_map_unload(&msp->ms_allocmap[0]);
1322 space_map_vacate(&msp->ms_freemap[0], NULL, NULL);
1323 mutex_exit(&msp->ms_lock);
1329 zdb_refresh_ubsync(spa_t *spa)
1331 uberblock_t ub = { 0 };
1332 vdev_t *rvd = spa->spa_root_vdev;
1336 * Reload the uberblock.
1338 zio = zio_root(spa, NULL, NULL,
1339 ZIO_FLAG_CANFAIL | ZIO_FLAG_SPECULATIVE);
1340 vdev_uberblock_load(zio, rvd, &ub);
1341 (void) zio_wait(zio);
1344 spa->spa_ubsync = ub;
1348 * Verify that the sum of the sizes of all blocks in the pool adds up
1349 * to the SPA's sa_alloc total.
1351 typedef struct zdb_blkstats {
1358 #define DMU_OT_DEFERRED DMU_OT_NONE
1359 #define DMU_OT_TOTAL DMU_OT_NUMTYPES
1361 #define ZB_TOTAL ZB_MAXLEVEL
1363 typedef struct zdb_cb {
1364 zdb_blkstats_t zcb_type[ZB_TOTAL + 1][DMU_OT_TOTAL + 1];
1365 uint64_t zcb_errors[256];
1366 traverse_blk_cache_t *zcb_cache;
1372 zdb_count_block(spa_t *spa, zdb_cb_t *zcb, blkptr_t *bp, int type)
1376 for (i = 0; i < 4; i++) {
1377 int l = (i < 2) ? BP_GET_LEVEL(bp) : ZB_TOTAL;
1378 int t = (i & 1) ? type : DMU_OT_TOTAL;
1379 zdb_blkstats_t *zb = &zcb->zcb_type[l][t];
1381 zb->zb_asize += BP_GET_ASIZE(bp);
1382 zb->zb_lsize += BP_GET_LSIZE(bp);
1383 zb->zb_psize += BP_GET_PSIZE(bp);
1390 error = zdb_space_map_claim(spa, bp, &zcb->zcb_cache->bc_bookmark);
1395 if (error == EAGAIN)
1396 (void) fatal("double-allocation, bp=%p", bp);
1398 if (error == ESTALE)
1399 (void) fatal("reference to freed block, bp=%p", bp);
1401 (void) fatal("fatal error %d in bp %p", error, bp);
1405 zdb_blkptr_cb(traverse_blk_cache_t *bc, spa_t *spa, void *arg)
1407 zbookmark_t *zb = &bc->bc_bookmark;
1408 zdb_cb_t *zcb = arg;
1409 blkptr_t *bp = &bc->bc_blkptr;
1410 dmu_object_type_t type = BP_GET_TYPE(bp);
1411 char blkbuf[BP_SPRINTF_LEN];
1415 if (zcb->zcb_readfails++ < 10 && dump_opt['L']) {
1416 zdb_refresh_ubsync(spa);
1419 zcb->zcb_haderrors = 1;
1420 zcb->zcb_errors[bc->bc_errno]++;
1424 if (dump_opt['b'] >= 3 || (dump_opt['b'] >= 2 && bc->bc_errno))
1425 sprintf_blkptr(blkbuf, BP_SPRINTF_LEN, bp);
1429 (void) printf("zdb_blkptr_cb: Got error %d reading "
1430 "<%llu, %llu, %lld, %llx> %s -- %s\n",
1432 (u_longlong_t)zb->zb_objset,
1433 (u_longlong_t)zb->zb_object,
1434 (u_longlong_t)zb->zb_level,
1435 (u_longlong_t)zb->zb_blkid,
1437 error == EAGAIN ? "retrying" : "skipping");
1442 zcb->zcb_readfails = 0;
1444 ASSERT(!BP_IS_HOLE(bp));
1446 if (dump_opt['b'] >= 4) {
1447 sprintf_blkptr(blkbuf, BP_SPRINTF_LEN, bp);
1448 (void) printf("objset %llu object %llu offset 0x%llx %s\n",
1449 (u_longlong_t)zb->zb_objset,
1450 (u_longlong_t)zb->zb_object,
1451 (u_longlong_t)blkid2offset(bc->bc_dnode,
1452 zb->zb_level, zb->zb_blkid),
1456 zdb_count_block(spa, zcb, bp, type);
1462 dump_block_stats(spa_t *spa)
1464 traverse_handle_t *th;
1465 zdb_cb_t zcb = { 0 };
1466 traverse_blk_cache_t dummy_cache = { 0 };
1467 zdb_blkstats_t *zb, *tzb;
1468 uint64_t alloc, space;
1470 int advance = zdb_advance;
1474 zcb.zcb_cache = &dummy_cache;
1477 advance |= ADVANCE_DATA;
1479 advance |= ADVANCE_PRUNE | ADVANCE_ZIL;
1481 (void) printf("\nTraversing all blocks to %sverify"
1482 " nothing leaked ...\n",
1483 dump_opt['c'] ? "verify checksums and " : "");
1486 * Load all space maps. As we traverse the pool, if we find a block
1487 * that's not in its space map, that indicates a double-allocation,
1488 * reference to a freed block, or an unclaimed block. Otherwise we
1489 * remove the block from the space map. If the space maps are not
1490 * empty when we're done, that indicates leaked blocks.
1493 zdb_space_map_load(spa);
1496 * If there's a deferred-free bplist, process that first.
1498 if (spa->spa_sync_bplist_obj != 0) {
1499 bplist_t *bpl = &spa->spa_sync_bplist;
1503 VERIFY(0 == bplist_open(bpl, spa->spa_meta_objset,
1504 spa->spa_sync_bplist_obj));
1506 while (bplist_iterate(bpl, &itor, &blk) == 0) {
1507 if (dump_opt['b'] >= 4) {
1508 char blkbuf[BP_SPRINTF_LEN];
1509 sprintf_blkptr(blkbuf, BP_SPRINTF_LEN, &blk);
1510 (void) printf("[%s] %s\n",
1511 "deferred free", blkbuf);
1513 zdb_count_block(spa, &zcb, &blk, DMU_OT_DEFERRED);
1520 * Now traverse the pool. If we're reading all data to verify
1521 * checksums, do a scrubbing read so that we validate all copies.
1523 flags = ZIO_FLAG_CANFAIL;
1524 if (advance & ADVANCE_DATA)
1525 flags |= ZIO_FLAG_SCRUB;
1526 th = traverse_init(spa, zdb_blkptr_cb, &zcb, advance, flags);
1527 th->th_noread = zdb_noread;
1529 traverse_add_pool(th, 0, spa_first_txg(spa) + TXG_CONCURRENT_STATES);
1531 while (traverse_more(th) == EAGAIN)
1536 if (zcb.zcb_haderrors) {
1537 (void) printf("\nError counts:\n\n");
1538 (void) printf("\t%5s %s\n", "errno", "count");
1539 for (e = 0; e < 256; e++) {
1540 if (zcb.zcb_errors[e] != 0) {
1541 (void) printf("\t%5d %llu\n",
1542 e, (u_longlong_t)zcb.zcb_errors[e]);
1548 * Report any leaked segments.
1551 zdb_space_map_unload(spa);
1554 (void) printf("\n\n *** Live pool traversal; "
1555 "block counts are only approximate ***\n\n");
1557 alloc = spa_get_alloc(spa);
1558 space = spa_get_space(spa);
1560 tzb = &zcb.zcb_type[ZB_TOTAL][DMU_OT_TOTAL];
1562 if (tzb->zb_asize == alloc) {
1563 (void) printf("\n\tNo leaks (block sum matches space"
1564 " maps exactly)\n");
1566 (void) printf("block traversal size %llu != alloc %llu "
1568 (u_longlong_t)tzb->zb_asize,
1569 (u_longlong_t)alloc,
1570 (u_longlong_t)(alloc - tzb->zb_asize));
1574 if (tzb->zb_count == 0)
1577 (void) printf("\n");
1578 (void) printf("\tbp count: %10llu\n",
1579 (u_longlong_t)tzb->zb_count);
1580 (void) printf("\tbp logical: %10llu\t avg: %6llu\n",
1581 (u_longlong_t)tzb->zb_lsize,
1582 (u_longlong_t)(tzb->zb_lsize / tzb->zb_count));
1583 (void) printf("\tbp physical: %10llu\t avg:"
1584 " %6llu\tcompression: %6.2f\n",
1585 (u_longlong_t)tzb->zb_psize,
1586 (u_longlong_t)(tzb->zb_psize / tzb->zb_count),
1587 (double)tzb->zb_lsize / tzb->zb_psize);
1588 (void) printf("\tbp allocated: %10llu\t avg:"
1589 " %6llu\tcompression: %6.2f\n",
1590 (u_longlong_t)tzb->zb_asize,
1591 (u_longlong_t)(tzb->zb_asize / tzb->zb_count),
1592 (double)tzb->zb_lsize / tzb->zb_asize);
1593 (void) printf("\tSPA allocated: %10llu\tused: %5.2f%%\n",
1594 (u_longlong_t)alloc, 100.0 * alloc / space);
1596 if (dump_opt['b'] >= 2) {
1598 (void) printf("\nBlocks\tLSIZE\tPSIZE\tASIZE"
1599 "\t avg\t comp\t%%Total\tType\n");
1601 for (t = 0; t <= DMU_OT_NUMTYPES; t++) {
1602 char csize[6], lsize[6], psize[6], asize[6], avg[6];
1605 typename = t == DMU_OT_DEFERRED ? "deferred free" :
1606 t == DMU_OT_TOTAL ? "Total" : dmu_ot[t].ot_name;
1608 if (zcb.zcb_type[ZB_TOTAL][t].zb_asize == 0) {
1609 (void) printf("%6s\t%5s\t%5s\t%5s"
1610 "\t%5s\t%5s\t%6s\t%s\n",
1622 for (l = ZB_TOTAL - 1; l >= -1; l--) {
1623 level = (l == -1 ? ZB_TOTAL : l);
1624 zb = &zcb.zcb_type[level][t];
1626 if (zb->zb_asize == 0)
1629 if (dump_opt['b'] < 3 && level != ZB_TOTAL)
1632 if (level == 0 && zb->zb_asize ==
1633 zcb.zcb_type[ZB_TOTAL][t].zb_asize)
1636 nicenum(zb->zb_count, csize);
1637 nicenum(zb->zb_lsize, lsize);
1638 nicenum(zb->zb_psize, psize);
1639 nicenum(zb->zb_asize, asize);
1640 nicenum(zb->zb_asize / zb->zb_count, avg);
1642 (void) printf("%6s\t%5s\t%5s\t%5s\t%5s"
1644 csize, lsize, psize, asize, avg,
1645 (double)zb->zb_lsize / zb->zb_psize,
1646 100.0 * zb->zb_asize / tzb->zb_asize);
1648 if (level == ZB_TOTAL)
1649 (void) printf("%s\n", typename);
1651 (void) printf(" L%d %s\n",
1657 (void) printf("\n");
1662 if (zcb.zcb_haderrors)
1669 dump_zpool(spa_t *spa)
1671 dsl_pool_t *dp = spa_get_dsl(spa);
1675 dump_uberblock(&spa->spa_uberblock);
1677 if (dump_opt['d'] || dump_opt['i']) {
1678 dump_dir(dp->dp_meta_objset);
1679 if (dump_opt['d'] >= 3) {
1680 dump_bplist(dp->dp_meta_objset,
1681 spa->spa_sync_bplist_obj, "Deferred frees");
1682 dump_dtl(spa->spa_root_vdev, 0);
1683 dump_metaslabs(spa);
1685 (void) dmu_objset_find(spa->spa_name, dump_one_dir, NULL,
1686 DS_FIND_SNAPSHOTS | DS_FIND_CHILDREN);
1689 if (dump_opt['b'] || dump_opt['c'])
1690 rc = dump_block_stats(spa);
1693 show_pool_stats(spa);
1699 #define ZDB_FLAG_CHECKSUM 0x0001
1700 #define ZDB_FLAG_DECOMPRESS 0x0002
1701 #define ZDB_FLAG_BSWAP 0x0004
1702 #define ZDB_FLAG_GBH 0x0008
1703 #define ZDB_FLAG_INDIRECT 0x0010
1704 #define ZDB_FLAG_PHYS 0x0020
1705 #define ZDB_FLAG_RAW 0x0040
1706 #define ZDB_FLAG_PRINT_BLKPTR 0x0080
1711 zdb_print_blkptr(blkptr_t *bp, int flags)
1713 dva_t *dva = bp->blk_dva;
1716 if (flags & ZDB_FLAG_BSWAP)
1717 byteswap_uint64_array((void *)bp, sizeof (blkptr_t));
1719 * Super-ick warning: This code is also duplicated in
1720 * cmd/mdb/common/modules/zfs/zfs.c . Yeah, I hate code
1723 for (d = 0; d < BP_GET_NDVAS(bp); d++) {
1724 (void) printf("\tDVA[%d]: vdev_id %lld / %llx\n", d,
1725 (longlong_t)DVA_GET_VDEV(&dva[d]),
1726 (longlong_t)DVA_GET_OFFSET(&dva[d]));
1727 (void) printf("\tDVA[%d]: GANG: %-5s GRID: %04llx\t"
1729 DVA_GET_GANG(&dva[d]) ? "TRUE" : "FALSE",
1730 (longlong_t)DVA_GET_GRID(&dva[d]),
1731 (longlong_t)DVA_GET_ASIZE(&dva[d]));
1732 (void) printf("\tDVA[%d]: :%llu:%llx:%llx:%s%s%s%s\n", d,
1733 (u_longlong_t)DVA_GET_VDEV(&dva[d]),
1734 (longlong_t)DVA_GET_OFFSET(&dva[d]),
1735 (longlong_t)BP_GET_PSIZE(bp),
1736 BP_SHOULD_BYTESWAP(bp) ? "e" : "",
1737 !DVA_GET_GANG(&dva[d]) && BP_GET_LEVEL(bp) != 0 ?
1739 DVA_GET_GANG(&dva[d]) ? "g" : "",
1740 BP_GET_COMPRESS(bp) != 0 ? "d" : "");
1742 (void) printf("\tLSIZE: %-16llx\t\tPSIZE: %llx\n",
1743 (longlong_t)BP_GET_LSIZE(bp), (longlong_t)BP_GET_PSIZE(bp));
1744 (void) printf("\tENDIAN: %6s\t\t\t\t\tTYPE: %s\n",
1745 BP_GET_BYTEORDER(bp) ? "LITTLE" : "BIG",
1746 dmu_ot[BP_GET_TYPE(bp)].ot_name);
1747 (void) printf("\tBIRTH: %-16llx LEVEL: %-2llu\tFILL: %llx\n",
1748 (u_longlong_t)bp->blk_birth, (u_longlong_t)BP_GET_LEVEL(bp),
1749 (u_longlong_t)bp->blk_fill);
1750 (void) printf("\tCKFUNC: %-16s\t\tCOMP: %s\n",
1751 zio_checksum_table[BP_GET_CHECKSUM(bp)].ci_name,
1752 zio_compress_table[BP_GET_COMPRESS(bp)].ci_name);
1753 (void) printf("\tCKSUM: %llx:%llx:%llx:%llx\n",
1754 (u_longlong_t)bp->blk_cksum.zc_word[0],
1755 (u_longlong_t)bp->blk_cksum.zc_word[1],
1756 (u_longlong_t)bp->blk_cksum.zc_word[2],
1757 (u_longlong_t)bp->blk_cksum.zc_word[3]);
1761 zdb_dump_indirect(blkptr_t *bp, int nbps, int flags)
1765 for (i = 0; i < nbps; i++)
1766 zdb_print_blkptr(&bp[i], flags);
1770 zdb_dump_gbh(void *buf, int flags)
1772 zdb_dump_indirect((blkptr_t *)buf, SPA_GBH_NBLKPTRS, flags);
1776 zdb_dump_block_raw(void *buf, uint64_t size, int flags)
1778 if (flags & ZDB_FLAG_BSWAP)
1779 byteswap_uint64_array(buf, size);
1780 (void) write(2, buf, size);
1784 zdb_dump_block(char *label, void *buf, uint64_t size, int flags)
1786 uint64_t *d = (uint64_t *)buf;
1787 int nwords = size / sizeof (uint64_t);
1788 int do_bswap = !!(flags & ZDB_FLAG_BSWAP);
1794 hdr = " 7 6 5 4 3 2 1 0 f e d c b a 9 8";
1796 hdr = " 0 1 2 3 4 5 6 7 8 9 a b c d e f";
1798 (void) printf("\n%s\n%6s %s 0123456789abcdef\n", label, "", hdr);
1800 for (i = 0; i < nwords; i += 2) {
1801 (void) printf("%06llx: %016llx %016llx ",
1802 (u_longlong_t)(i * sizeof (uint64_t)),
1803 (u_longlong_t)(do_bswap ? BSWAP_64(d[i]) : d[i]),
1804 (u_longlong_t)(do_bswap ? BSWAP_64(d[i + 1]) : d[i + 1]));
1807 for (j = 0; j < 2 * sizeof (uint64_t); j++)
1808 (void) printf("%c", isprint(c[j]) ? c[j] : '.');
1809 (void) printf("\n");
1814 * There are two acceptable formats:
1815 * leaf_name - For example: c1t0d0 or /tmp/ztest.0a
1816 * child[.child]* - For example: 0.1.1
1818 * The second form can be used to specify arbitrary vdevs anywhere
1819 * in the heirarchy. For example, in a pool with a mirror of
1820 * RAID-Zs, you can specify either RAID-Z vdev with 0.0 or 0.1 .
1823 zdb_vdev_lookup(vdev_t *vdev, char *path)
1831 /* First, assume the x.x.x.x format */
1832 i = (int)strtoul(path, &s, 10);
1833 if (s == path || (s && *s != '.' && *s != '\0'))
1835 if (i < 0 || i >= vdev->vdev_children)
1838 vdev = vdev->vdev_child[i];
1841 return (zdb_vdev_lookup(vdev, s+1));
1844 for (i = 0; i < vdev->vdev_children; i++) {
1845 vdev_t *vc = vdev->vdev_child[i];
1847 if (vc->vdev_path == NULL) {
1848 vc = zdb_vdev_lookup(vc, path);
1855 p = strrchr(vc->vdev_path, '/');
1856 p = p ? p + 1 : vc->vdev_path;
1857 q = &vc->vdev_path[strlen(vc->vdev_path) - 2];
1859 if (strcmp(vc->vdev_path, path) == 0)
1861 if (strcmp(p, path) == 0)
1863 if (strcmp(q, "s0") == 0 && strncmp(p, path, q - p) == 0)
1871 * Read a block from a pool and print it out. The syntax of the
1872 * block descriptor is:
1874 * pool:vdev_specifier:offset:size[:flags]
1876 * pool - The name of the pool you wish to read from
1877 * vdev_specifier - Which vdev (see comment for zdb_vdev_lookup)
1878 * offset - offset, in hex, in bytes
1879 * size - Amount of data to read, in hex, in bytes
1880 * flags - A string of characters specifying options
1881 * b: Decode a blkptr at given offset within block
1882 * *c: Calculate and display checksums
1883 * *d: Decompress data before dumping
1884 * e: Byteswap data before dumping
1885 * *g: Display data as a gang block header
1886 * *i: Display as an indirect block
1887 * p: Do I/O to physical offset
1888 * r: Dump raw data to stdout
1890 * * = not yet implemented
1893 zdb_read_block(char *thing, spa_t **spap)
1897 uint64_t offset = 0, size = 0, blkptr_offset = 0;
1901 char *s, *p, *dup, *spa_name, *vdev, *flagstr;
1902 int i, error, zio_flags;
1904 dup = strdup(thing);
1905 s = strtok(dup, ":");
1906 spa_name = s ? s : "";
1907 s = strtok(NULL, ":");
1909 s = strtok(NULL, ":");
1910 offset = strtoull(s ? s : "", NULL, 16);
1911 s = strtok(NULL, ":");
1912 size = strtoull(s ? s : "", NULL, 16);
1913 s = strtok(NULL, ":");
1914 flagstr = s ? s : "";
1918 s = "size must not be zero";
1919 if (!IS_P2ALIGNED(size, DEV_BSIZE))
1920 s = "size must be a multiple of sector size";
1921 if (!IS_P2ALIGNED(offset, DEV_BSIZE))
1922 s = "offset must be a multiple of sector size";
1924 (void) printf("Invalid block specifier: %s - %s\n", thing, s);
1929 for (s = strtok(flagstr, ":"); s; s = strtok(NULL, ":")) {
1930 for (i = 0; flagstr[i]; i++) {
1931 int bit = flagbits[(uchar_t)flagstr[i]];
1934 (void) printf("***Invalid flag: %c\n",
1940 /* If it's not something with an argument, keep going */
1941 if ((bit & (ZDB_FLAG_CHECKSUM | ZDB_FLAG_DECOMPRESS |
1942 ZDB_FLAG_PRINT_BLKPTR)) == 0)
1945 p = &flagstr[i + 1];
1946 if (bit == ZDB_FLAG_PRINT_BLKPTR)
1947 blkptr_offset = strtoull(p, &p, 16);
1948 if (*p != ':' && *p != '\0') {
1949 (void) printf("***Invalid flag arg: '%s'\n", s);
1956 if (spa == NULL || spa->spa_name == NULL ||
1957 strcmp(spa->spa_name, spa_name)) {
1958 if (spa && spa->spa_name)
1959 spa_close(spa, (void *)zdb_read_block);
1960 error = spa_open(spa_name, spap, (void *)zdb_read_block);
1962 fatal("Failed to open pool '%s': %s",
1963 spa_name, strerror(error));
1967 vd = zdb_vdev_lookup(spa->spa_root_vdev, vdev);
1969 (void) printf("***Invalid vdev: %s\n", vdev);
1974 (void) printf("Found vdev: %s\n", vd->vdev_path);
1976 (void) printf("Found vdev type: %s\n",
1977 vd->vdev_ops->vdev_op_type);
1980 buf = umem_alloc(size, UMEM_NOFAIL);
1982 zio_flags = ZIO_FLAG_DONT_CACHE | ZIO_FLAG_DONT_QUEUE |
1983 ZIO_FLAG_DONT_PROPAGATE | ZIO_FLAG_DONT_RETRY | ZIO_FLAG_NOBOOKMARK;
1985 if (flags & ZDB_FLAG_PHYS)
1986 zio_flags |= ZIO_FLAG_PHYSICAL;
1988 zio = zio_root(spa, NULL, NULL, 0);
1989 /* XXX todo - cons up a BP so RAID-Z will be happy */
1990 zio_nowait(zio_vdev_child_io(zio, NULL, vd, offset, buf, size,
1991 ZIO_TYPE_READ, ZIO_PRIORITY_SYNC_READ, zio_flags, NULL, NULL));
1992 error = zio_wait(zio);
1995 (void) printf("Read of %s failed, error: %d\n", thing, error);
1999 if (flags & ZDB_FLAG_PRINT_BLKPTR)
2000 zdb_print_blkptr((blkptr_t *)(void *)
2001 ((uintptr_t)buf + (uintptr_t)blkptr_offset), flags);
2002 else if (flags & ZDB_FLAG_RAW)
2003 zdb_dump_block_raw(buf, size, flags);
2004 else if (flags & ZDB_FLAG_INDIRECT)
2005 zdb_dump_indirect((blkptr_t *)buf, size / sizeof (blkptr_t),
2007 else if (flags & ZDB_FLAG_GBH)
2008 zdb_dump_gbh(buf, flags);
2010 zdb_dump_block(thing, buf, size, flags);
2013 umem_free(buf, size);
2018 main(int argc, char **argv)
2021 struct rlimit rl = { 1024, 1024 };
2023 objset_t *os = NULL;
2030 (void) setrlimit(RLIMIT_NOFILE, &rl);
2031 (void) enable_extended_FILE_stdio(-1, -1);
2033 dprintf_setup(&argc, argv);
2035 while ((c = getopt(argc, argv, "udibcsvCLO:B:UlR")) != -1) {
2054 if (endstr[0] == '!') {
2060 if (strcmp(endstr, "post") == 0) {
2063 } else if (strcmp(endstr, "pre") == 0) {
2065 } else if (strcmp(endstr, "prune") == 0) {
2066 flag = ADVANCE_PRUNE;
2067 } else if (strcmp(endstr, "data") == 0) {
2068 flag = ADVANCE_DATA;
2069 } else if (strcmp(endstr, "holes") == 0) {
2070 flag = ADVANCE_HOLES;
2075 zdb_advance |= flag;
2077 zdb_advance &= ~flag;
2080 endstr = optarg - 1;
2081 zdb_noread.zb_objset = strtoull(endstr + 1, &endstr, 0);
2082 zdb_noread.zb_object = strtoull(endstr + 1, &endstr, 0);
2083 zdb_noread.zb_level = strtol(endstr + 1, &endstr, 0);
2084 zdb_noread.zb_blkid = strtoull(endstr + 1, &endstr, 16);
2085 (void) printf("simulating bad block "
2086 "<%llu, %llu, %lld, %llx>\n",
2087 (u_longlong_t)zdb_noread.zb_objset,
2088 (u_longlong_t)zdb_noread.zb_object,
2089 (u_longlong_t)zdb_noread.zb_level,
2090 (u_longlong_t)zdb_noread.zb_blkid);
2096 spa_config_dir = "/tmp";
2107 * Disable vdev caching. If we don't do this, live pool traversal
2108 * won't make progress because it will never see disk updates.
2110 zfs_vdev_cache_size = 0;
2112 for (c = 0; c < 256; c++) {
2113 if (dump_all && c != 'L' && c != 'l' && c != 'R')
2116 dump_opt[c] += verbose;
2123 if (dump_opt['C']) {
2130 if (dump_opt['l']) {
2131 dump_label(argv[0]);
2135 if (dump_opt['R']) {
2136 flagbits['b'] = ZDB_FLAG_PRINT_BLKPTR;
2137 flagbits['c'] = ZDB_FLAG_CHECKSUM;
2138 flagbits['d'] = ZDB_FLAG_DECOMPRESS;
2139 flagbits['e'] = ZDB_FLAG_BSWAP;
2140 flagbits['g'] = ZDB_FLAG_GBH;
2141 flagbits['i'] = ZDB_FLAG_INDIRECT;
2142 flagbits['p'] = ZDB_FLAG_PHYS;
2143 flagbits['r'] = ZDB_FLAG_RAW;
2147 zdb_read_block(argv[0], &spa);
2152 spa_close(spa, (void *)zdb_read_block);
2157 dump_config(argv[0]);
2159 if (strchr(argv[0], '/') != NULL) {
2160 error = dmu_objset_open(argv[0], DMU_OST_ANY,
2161 DS_MODE_STANDARD | DS_MODE_READONLY, &os);
2163 error = spa_open(argv[0], &spa, FTAG);
2167 fatal("can't open %s: %s", argv[0], strerror(error));
2171 zopt_objects = argc;
2172 zopt_object = calloc(zopt_objects, sizeof (uint64_t));
2173 for (i = 0; i < zopt_objects; i++) {
2175 zopt_object[i] = strtoull(argv[i], NULL, 0);
2176 if (zopt_object[i] == 0 && errno != 0)
2177 fatal("bad object number %s: %s",
2178 argv[i], strerror(errno));
2184 dmu_objset_close(os);
2187 spa_close(spa, FTAG);