]> CyberLeo.Net >> Repos - FreeBSD/releng/8.1.git/blob - cddl/contrib/opensolaris/cmd/zdb/zdb.c
Copy stable/8 to releng/8.1 in preparation for 8.1-RC1.
[FreeBSD/releng/8.1.git] / cddl / contrib / opensolaris / cmd / zdb / zdb.c
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25
26 #include <stdio.h>
27 #include <stdio_ext.h>
28 #include <stdlib.h>
29 #include <ctype.h>
30 #include <sys/zfs_context.h>
31 #include <sys/spa.h>
32 #include <sys/spa_impl.h>
33 #include <sys/dmu.h>
34 #include <sys/zap.h>
35 #include <sys/fs/zfs.h>
36 #include <sys/zfs_znode.h>
37 #include <sys/vdev.h>
38 #include <sys/vdev_impl.h>
39 #include <sys/metaslab_impl.h>
40 #include <sys/dmu_objset.h>
41 #include <sys/dsl_dir.h>
42 #include <sys/dsl_dataset.h>
43 #include <sys/dsl_pool.h>
44 #include <sys/dbuf.h>
45 #include <sys/zil.h>
46 #include <sys/zil_impl.h>
47 #include <sys/stat.h>
48 #include <sys/resource.h>
49 #include <sys/dmu_traverse.h>
50 #include <sys/zio_checksum.h>
51 #include <sys/zio_compress.h>
52 #include <sys/zfs_fuid.h>
53 #include <sys/arc.h>
54 #undef ZFS_MAXNAMELEN
55 #undef verify
56 #include <libzfs.h>
57
58 const char cmdname[] = "zdb";
59 uint8_t dump_opt[256];
60
61 typedef void object_viewer_t(objset_t *, uint64_t, void *data, size_t size);
62
63 extern void dump_intent_log(zilog_t *);
64 uint64_t *zopt_object = NULL;
65 int zopt_objects = 0;
66 libzfs_handle_t *g_zfs;
67 boolean_t zdb_sig_user_data = B_TRUE;
68 int zdb_sig_cksumalg = ZIO_CHECKSUM_SHA256;
69
70 /*
71  * These libumem hooks provide a reasonable set of defaults for the allocator's
72  * debugging facilities.
73  */
74 const char *
75 _umem_debug_init()
76 {
77         return ("default,verbose"); /* $UMEM_DEBUG setting */
78 }
79
80 const char *
81 _umem_logging_init(void)
82 {
83         return ("fail,contents"); /* $UMEM_LOGGING setting */
84 }
85
86 static void
87 usage(void)
88 {
89         (void) fprintf(stderr,
90             "Usage: %s [-udibcsv] [-U cachefile_path] "
91             "[-S user:cksumalg] "
92             "dataset [object...]\n"
93             "       %s -C [pool]\n"
94             "       %s -l dev\n"
95             "       %s -R pool:vdev:offset:size:flags\n"
96             "       %s [-p path_to_vdev_dir]\n"
97             "       %s -e pool | GUID | devid ...\n",
98             cmdname, cmdname, cmdname, cmdname, cmdname, cmdname);
99
100         (void) fprintf(stderr, "        -u uberblock\n");
101         (void) fprintf(stderr, "        -d datasets\n");
102         (void) fprintf(stderr, "        -C cached pool configuration\n");
103         (void) fprintf(stderr, "        -i intent logs\n");
104         (void) fprintf(stderr, "        -b block statistics\n");
105         (void) fprintf(stderr, "        -c checksum all data blocks\n");
106         (void) fprintf(stderr, "        -s report stats on zdb's I/O\n");
107         (void) fprintf(stderr, "        -S <user|all>:<cksum_alg|all> -- "
108             "dump blkptr signatures\n");
109         (void) fprintf(stderr, "        -v verbose (applies to all others)\n");
110         (void) fprintf(stderr, "        -l dump label contents\n");
111         (void) fprintf(stderr, "        -U cachefile_path -- use alternate "
112             "cachefile\n");
113         (void) fprintf(stderr, "        -R read and display block from a "
114             "device\n");
115         (void) fprintf(stderr, "        -e Pool is exported/destroyed/"
116             "has altroot\n");
117         (void) fprintf(stderr, "        -p <Path to vdev dir> (use with -e)\n");
118         (void) fprintf(stderr, "Specify an option more than once (e.g. -bb) "
119             "to make only that option verbose\n");
120         (void) fprintf(stderr, "Default is to dump everything non-verbosely\n");
121         exit(1);
122 }
123
124 static void
125 fatal(const char *fmt, ...)
126 {
127         va_list ap;
128
129         va_start(ap, fmt);
130         (void) fprintf(stderr, "%s: ", cmdname);
131         (void) vfprintf(stderr, fmt, ap);
132         va_end(ap);
133         (void) fprintf(stderr, "\n");
134
135         abort();
136 }
137
138 static void
139 dump_nvlist(nvlist_t *list, int indent)
140 {
141         nvpair_t *elem = NULL;
142
143         while ((elem = nvlist_next_nvpair(list, elem)) != NULL) {
144                 switch (nvpair_type(elem)) {
145                 case DATA_TYPE_STRING:
146                         {
147                                 char *value;
148
149                                 VERIFY(nvpair_value_string(elem, &value) == 0);
150                                 (void) printf("%*s%s='%s'\n", indent, "",
151                                     nvpair_name(elem), value);
152                         }
153                         break;
154
155                 case DATA_TYPE_UINT64:
156                         {
157                                 uint64_t value;
158
159                                 VERIFY(nvpair_value_uint64(elem, &value) == 0);
160                                 (void) printf("%*s%s=%llu\n", indent, "",
161                                     nvpair_name(elem), (u_longlong_t)value);
162                         }
163                         break;
164
165                 case DATA_TYPE_NVLIST:
166                         {
167                                 nvlist_t *value;
168
169                                 VERIFY(nvpair_value_nvlist(elem, &value) == 0);
170                                 (void) printf("%*s%s\n", indent, "",
171                                     nvpair_name(elem));
172                                 dump_nvlist(value, indent + 4);
173                         }
174                         break;
175
176                 case DATA_TYPE_NVLIST_ARRAY:
177                         {
178                                 nvlist_t **value;
179                                 uint_t c, count;
180
181                                 VERIFY(nvpair_value_nvlist_array(elem, &value,
182                                     &count) == 0);
183
184                                 for (c = 0; c < count; c++) {
185                                         (void) printf("%*s%s[%u]\n", indent, "",
186                                             nvpair_name(elem), c);
187                                         dump_nvlist(value[c], indent + 8);
188                                 }
189                         }
190                         break;
191
192                 default:
193
194                         (void) printf("bad config type %d for %s\n",
195                             nvpair_type(elem), nvpair_name(elem));
196                 }
197         }
198 }
199
200 /* ARGSUSED */
201 static void
202 dump_packed_nvlist(objset_t *os, uint64_t object, void *data, size_t size)
203 {
204         nvlist_t *nv;
205         size_t nvsize = *(uint64_t *)data;
206         char *packed = umem_alloc(nvsize, UMEM_NOFAIL);
207
208         VERIFY(0 == dmu_read(os, object, 0, nvsize, packed));
209
210         VERIFY(nvlist_unpack(packed, nvsize, &nv, 0) == 0);
211
212         umem_free(packed, nvsize);
213
214         dump_nvlist(nv, 8);
215
216         nvlist_free(nv);
217 }
218
219 const char dump_zap_stars[] = "****************************************";
220 const int dump_zap_width = sizeof (dump_zap_stars) - 1;
221
222 static void
223 dump_zap_histogram(uint64_t histo[ZAP_HISTOGRAM_SIZE])
224 {
225         int i;
226         int minidx = ZAP_HISTOGRAM_SIZE - 1;
227         int maxidx = 0;
228         uint64_t max = 0;
229
230         for (i = 0; i < ZAP_HISTOGRAM_SIZE; i++) {
231                 if (histo[i] > max)
232                         max = histo[i];
233                 if (histo[i] > 0 && i > maxidx)
234                         maxidx = i;
235                 if (histo[i] > 0 && i < minidx)
236                         minidx = i;
237         }
238
239         if (max < dump_zap_width)
240                 max = dump_zap_width;
241
242         for (i = minidx; i <= maxidx; i++)
243                 (void) printf("\t\t\t%u: %6llu %s\n", i, (u_longlong_t)histo[i],
244                     &dump_zap_stars[(max - histo[i]) * dump_zap_width / max]);
245 }
246
247 static void
248 dump_zap_stats(objset_t *os, uint64_t object)
249 {
250         int error;
251         zap_stats_t zs;
252
253         error = zap_get_stats(os, object, &zs);
254         if (error)
255                 return;
256
257         if (zs.zs_ptrtbl_len == 0) {
258                 ASSERT(zs.zs_num_blocks == 1);
259                 (void) printf("\tmicrozap: %llu bytes, %llu entries\n",
260                     (u_longlong_t)zs.zs_blocksize,
261                     (u_longlong_t)zs.zs_num_entries);
262                 return;
263         }
264
265         (void) printf("\tFat ZAP stats:\n");
266
267         (void) printf("\t\tPointer table:\n");
268         (void) printf("\t\t\t%llu elements\n",
269             (u_longlong_t)zs.zs_ptrtbl_len);
270         (void) printf("\t\t\tzt_blk: %llu\n",
271             (u_longlong_t)zs.zs_ptrtbl_zt_blk);
272         (void) printf("\t\t\tzt_numblks: %llu\n",
273             (u_longlong_t)zs.zs_ptrtbl_zt_numblks);
274         (void) printf("\t\t\tzt_shift: %llu\n",
275             (u_longlong_t)zs.zs_ptrtbl_zt_shift);
276         (void) printf("\t\t\tzt_blks_copied: %llu\n",
277             (u_longlong_t)zs.zs_ptrtbl_blks_copied);
278         (void) printf("\t\t\tzt_nextblk: %llu\n",
279             (u_longlong_t)zs.zs_ptrtbl_nextblk);
280
281         (void) printf("\t\tZAP entries: %llu\n",
282             (u_longlong_t)zs.zs_num_entries);
283         (void) printf("\t\tLeaf blocks: %llu\n",
284             (u_longlong_t)zs.zs_num_leafs);
285         (void) printf("\t\tTotal blocks: %llu\n",
286             (u_longlong_t)zs.zs_num_blocks);
287         (void) printf("\t\tzap_block_type: 0x%llx\n",
288             (u_longlong_t)zs.zs_block_type);
289         (void) printf("\t\tzap_magic: 0x%llx\n",
290             (u_longlong_t)zs.zs_magic);
291         (void) printf("\t\tzap_salt: 0x%llx\n",
292             (u_longlong_t)zs.zs_salt);
293
294         (void) printf("\t\tLeafs with 2^n pointers:\n");
295         dump_zap_histogram(zs.zs_leafs_with_2n_pointers);
296
297         (void) printf("\t\tBlocks with n*5 entries:\n");
298         dump_zap_histogram(zs.zs_blocks_with_n5_entries);
299
300         (void) printf("\t\tBlocks n/10 full:\n");
301         dump_zap_histogram(zs.zs_blocks_n_tenths_full);
302
303         (void) printf("\t\tEntries with n chunks:\n");
304         dump_zap_histogram(zs.zs_entries_using_n_chunks);
305
306         (void) printf("\t\tBuckets with n entries:\n");
307         dump_zap_histogram(zs.zs_buckets_with_n_entries);
308 }
309
310 /*ARGSUSED*/
311 static void
312 dump_none(objset_t *os, uint64_t object, void *data, size_t size)
313 {
314 }
315
316 /*ARGSUSED*/
317 void
318 dump_uint8(objset_t *os, uint64_t object, void *data, size_t size)
319 {
320 }
321
322 /*ARGSUSED*/
323 static void
324 dump_uint64(objset_t *os, uint64_t object, void *data, size_t size)
325 {
326 }
327
328 /*ARGSUSED*/
329 static void
330 dump_zap(objset_t *os, uint64_t object, void *data, size_t size)
331 {
332         zap_cursor_t zc;
333         zap_attribute_t attr;
334         void *prop;
335         int i;
336
337         dump_zap_stats(os, object);
338         (void) printf("\n");
339
340         for (zap_cursor_init(&zc, os, object);
341             zap_cursor_retrieve(&zc, &attr) == 0;
342             zap_cursor_advance(&zc)) {
343                 (void) printf("\t\t%s = ", attr.za_name);
344                 if (attr.za_num_integers == 0) {
345                         (void) printf("\n");
346                         continue;
347                 }
348                 prop = umem_zalloc(attr.za_num_integers *
349                     attr.za_integer_length, UMEM_NOFAIL);
350                 (void) zap_lookup(os, object, attr.za_name,
351                     attr.za_integer_length, attr.za_num_integers, prop);
352                 if (attr.za_integer_length == 1) {
353                         (void) printf("%s", (char *)prop);
354                 } else {
355                         for (i = 0; i < attr.za_num_integers; i++) {
356                                 switch (attr.za_integer_length) {
357                                 case 2:
358                                         (void) printf("%u ",
359                                             ((uint16_t *)prop)[i]);
360                                         break;
361                                 case 4:
362                                         (void) printf("%u ",
363                                             ((uint32_t *)prop)[i]);
364                                         break;
365                                 case 8:
366                                         (void) printf("%lld ",
367                                             (u_longlong_t)((int64_t *)prop)[i]);
368                                         break;
369                                 }
370                         }
371                 }
372                 (void) printf("\n");
373                 umem_free(prop, attr.za_num_integers * attr.za_integer_length);
374         }
375         zap_cursor_fini(&zc);
376 }
377
378 /*ARGSUSED*/
379 static void
380 dump_zpldir(objset_t *os, uint64_t object, void *data, size_t size)
381 {
382         zap_cursor_t zc;
383         zap_attribute_t attr;
384         const char *typenames[] = {
385                 /* 0 */ "not specified",
386                 /* 1 */ "FIFO",
387                 /* 2 */ "Character Device",
388                 /* 3 */ "3 (invalid)",
389                 /* 4 */ "Directory",
390                 /* 5 */ "5 (invalid)",
391                 /* 6 */ "Block Device",
392                 /* 7 */ "7 (invalid)",
393                 /* 8 */ "Regular File",
394                 /* 9 */ "9 (invalid)",
395                 /* 10 */ "Symbolic Link",
396                 /* 11 */ "11 (invalid)",
397                 /* 12 */ "Socket",
398                 /* 13 */ "Door",
399                 /* 14 */ "Event Port",
400                 /* 15 */ "15 (invalid)",
401         };
402
403         dump_zap_stats(os, object);
404         (void) printf("\n");
405
406         for (zap_cursor_init(&zc, os, object);
407             zap_cursor_retrieve(&zc, &attr) == 0;
408             zap_cursor_advance(&zc)) {
409                 (void) printf("\t\t%s = %lld (type: %s)\n",
410                     attr.za_name, ZFS_DIRENT_OBJ(attr.za_first_integer),
411                     typenames[ZFS_DIRENT_TYPE(attr.za_first_integer)]);
412         }
413         zap_cursor_fini(&zc);
414 }
415
416 static void
417 dump_spacemap(objset_t *os, space_map_obj_t *smo, space_map_t *sm)
418 {
419         uint64_t alloc, offset, entry;
420         uint8_t mapshift = sm->sm_shift;
421         uint64_t mapstart = sm->sm_start;
422         char *ddata[] = { "ALLOC", "FREE", "CONDENSE", "INVALID",
423                             "INVALID", "INVALID", "INVALID", "INVALID" };
424
425         if (smo->smo_object == 0)
426                 return;
427
428         /*
429          * Print out the freelist entries in both encoded and decoded form.
430          */
431         alloc = 0;
432         for (offset = 0; offset < smo->smo_objsize; offset += sizeof (entry)) {
433                 VERIFY(0 == dmu_read(os, smo->smo_object, offset,
434                     sizeof (entry), &entry));
435                 if (SM_DEBUG_DECODE(entry)) {
436                         (void) printf("\t\t[%4llu] %s: txg %llu, pass %llu\n",
437                             (u_longlong_t)(offset / sizeof (entry)),
438                             ddata[SM_DEBUG_ACTION_DECODE(entry)],
439                             (u_longlong_t)SM_DEBUG_TXG_DECODE(entry),
440                             (u_longlong_t)SM_DEBUG_SYNCPASS_DECODE(entry));
441                 } else {
442                         (void) printf("\t\t[%4llu]    %c  range:"
443                             " %08llx-%08llx  size: %06llx\n",
444                             (u_longlong_t)(offset / sizeof (entry)),
445                             SM_TYPE_DECODE(entry) == SM_ALLOC ? 'A' : 'F',
446                             (u_longlong_t)((SM_OFFSET_DECODE(entry) <<
447                             mapshift) + mapstart),
448                             (u_longlong_t)((SM_OFFSET_DECODE(entry) <<
449                             mapshift) + mapstart + (SM_RUN_DECODE(entry) <<
450                             mapshift)),
451                             (u_longlong_t)(SM_RUN_DECODE(entry) << mapshift));
452                         if (SM_TYPE_DECODE(entry) == SM_ALLOC)
453                                 alloc += SM_RUN_DECODE(entry) << mapshift;
454                         else
455                                 alloc -= SM_RUN_DECODE(entry) << mapshift;
456                 }
457         }
458         if (alloc != smo->smo_alloc) {
459                 (void) printf("space_map_object alloc (%llu) INCONSISTENT "
460                     "with space map summary (%llu)\n",
461                     (u_longlong_t)smo->smo_alloc, (u_longlong_t)alloc);
462         }
463 }
464
465 static void
466 dump_metaslab(metaslab_t *msp)
467 {
468         char freebuf[5];
469         space_map_obj_t *smo = &msp->ms_smo;
470         vdev_t *vd = msp->ms_group->mg_vd;
471         spa_t *spa = vd->vdev_spa;
472
473         nicenum(msp->ms_map.sm_size - smo->smo_alloc, freebuf);
474
475         if (dump_opt['d'] <= 5) {
476                 (void) printf("\t%10llx   %10llu   %5s\n",
477                     (u_longlong_t)msp->ms_map.sm_start,
478                     (u_longlong_t)smo->smo_object,
479                     freebuf);
480                 return;
481         }
482
483         (void) printf(
484             "\tvdev %llu   offset %08llx   spacemap %4llu   free %5s\n",
485             (u_longlong_t)vd->vdev_id, (u_longlong_t)msp->ms_map.sm_start,
486             (u_longlong_t)smo->smo_object, freebuf);
487
488         ASSERT(msp->ms_map.sm_size == (1ULL << vd->vdev_ms_shift));
489
490         dump_spacemap(spa->spa_meta_objset, smo, &msp->ms_map);
491 }
492
493 static void
494 dump_metaslabs(spa_t *spa)
495 {
496         vdev_t *rvd = spa->spa_root_vdev;
497         vdev_t *vd;
498         int c, m;
499
500         (void) printf("\nMetaslabs:\n");
501
502         for (c = 0; c < rvd->vdev_children; c++) {
503                 vd = rvd->vdev_child[c];
504
505                 (void) printf("\n    vdev %llu\n\n", (u_longlong_t)vd->vdev_id);
506
507                 if (dump_opt['d'] <= 5) {
508                         (void) printf("\t%10s   %10s   %5s\n",
509                             "offset", "spacemap", "free");
510                         (void) printf("\t%10s   %10s   %5s\n",
511                             "------", "--------", "----");
512                 }
513                 for (m = 0; m < vd->vdev_ms_count; m++)
514                         dump_metaslab(vd->vdev_ms[m]);
515                 (void) printf("\n");
516         }
517 }
518
519 static void
520 dump_dtl(vdev_t *vd, int indent)
521 {
522         avl_tree_t *t = &vd->vdev_dtl_map.sm_root;
523         space_seg_t *ss;
524         vdev_t *pvd;
525         int c;
526
527         if (indent == 0)
528                 (void) printf("\nDirty time logs:\n\n");
529
530         (void) printf("\t%*s%s\n", indent, "",
531             vd->vdev_path ? vd->vdev_path :
532             vd->vdev_parent ? vd->vdev_ops->vdev_op_type :
533             spa_name(vd->vdev_spa));
534
535         for (ss = avl_first(t); ss; ss = AVL_NEXT(t, ss)) {
536                 /*
537                  * Everything in this DTL must appear in all parent DTL unions.
538                  */
539                 for (pvd = vd; pvd; pvd = pvd->vdev_parent)
540                         ASSERT(vdev_dtl_contains(&pvd->vdev_dtl_map,
541                             ss->ss_start, ss->ss_end - ss->ss_start));
542                 (void) printf("\t%*soutage [%llu,%llu] length %llu\n",
543                     indent, "",
544                     (u_longlong_t)ss->ss_start,
545                     (u_longlong_t)ss->ss_end - 1,
546                     (u_longlong_t)(ss->ss_end - ss->ss_start));
547         }
548
549         (void) printf("\n");
550
551         if (dump_opt['d'] > 5 && vd->vdev_children == 0) {
552                 dump_spacemap(vd->vdev_spa->spa_meta_objset, &vd->vdev_dtl,
553                     &vd->vdev_dtl_map);
554                 (void) printf("\n");
555         }
556
557         for (c = 0; c < vd->vdev_children; c++)
558                 dump_dtl(vd->vdev_child[c], indent + 4);
559 }
560
561 /*ARGSUSED*/
562 static void
563 dump_dnode(objset_t *os, uint64_t object, void *data, size_t size)
564 {
565 }
566
567 static uint64_t
568 blkid2offset(const dnode_phys_t *dnp, int level, uint64_t blkid)
569 {
570         if (level < 0)
571                 return (blkid);
572
573         return ((blkid << (level * (dnp->dn_indblkshift - SPA_BLKPTRSHIFT))) *
574             dnp->dn_datablkszsec << SPA_MINBLOCKSHIFT);
575 }
576
577 static void
578 sprintf_blkptr_compact(char *blkbuf, blkptr_t *bp, int alldvas)
579 {
580         dva_t *dva = bp->blk_dva;
581         int ndvas = alldvas ? BP_GET_NDVAS(bp) : 1;
582         int i;
583
584         blkbuf[0] = '\0';
585
586         for (i = 0; i < ndvas; i++)
587                 (void) sprintf(blkbuf + strlen(blkbuf), "%llu:%llx:%llx ",
588                     (u_longlong_t)DVA_GET_VDEV(&dva[i]),
589                     (u_longlong_t)DVA_GET_OFFSET(&dva[i]),
590                     (u_longlong_t)DVA_GET_ASIZE(&dva[i]));
591
592         (void) sprintf(blkbuf + strlen(blkbuf), "%llxL/%llxP F=%llu B=%llu",
593             (u_longlong_t)BP_GET_LSIZE(bp),
594             (u_longlong_t)BP_GET_PSIZE(bp),
595             (u_longlong_t)bp->blk_fill,
596             (u_longlong_t)bp->blk_birth);
597 }
598
599 static void
600 print_indirect(blkptr_t *bp, const zbookmark_t *zb,
601     const dnode_phys_t *dnp)
602 {
603         char blkbuf[BP_SPRINTF_LEN];
604         int l;
605
606         ASSERT3U(BP_GET_TYPE(bp), ==, dnp->dn_type);
607         ASSERT3U(BP_GET_LEVEL(bp), ==, zb->zb_level);
608
609         (void) printf("%16llx ",
610             (u_longlong_t)blkid2offset(dnp, zb->zb_level, zb->zb_blkid));
611
612         ASSERT(zb->zb_level >= 0);
613
614         for (l = dnp->dn_nlevels - 1; l >= -1; l--) {
615                 if (l == zb->zb_level) {
616                         (void) printf("L%llx", (u_longlong_t)zb->zb_level);
617                 } else {
618                         (void) printf(" ");
619                 }
620         }
621
622         sprintf_blkptr_compact(blkbuf, bp, dump_opt['d'] > 5 ? 1 : 0);
623         (void) printf("%s\n", blkbuf);
624 }
625
626 #define SET_BOOKMARK(zb, objset, object, level, blkid)  \
627 {                                                       \
628         (zb)->zb_objset = objset;                       \
629         (zb)->zb_object = object;                       \
630         (zb)->zb_level = level;                         \
631         (zb)->zb_blkid = blkid;                         \
632 }
633
634 static int
635 visit_indirect(spa_t *spa, const dnode_phys_t *dnp,
636     blkptr_t *bp, const zbookmark_t *zb)
637 {
638         int err;
639
640         if (bp->blk_birth == 0)
641                 return (0);
642
643         print_indirect(bp, zb, dnp);
644
645         if (BP_GET_LEVEL(bp) > 0) {
646                 uint32_t flags = ARC_WAIT;
647                 int i;
648                 blkptr_t *cbp;
649                 int epb = BP_GET_LSIZE(bp) >> SPA_BLKPTRSHIFT;
650                 arc_buf_t *buf;
651                 uint64_t fill = 0;
652
653                 err = arc_read_nolock(NULL, spa, bp, arc_getbuf_func, &buf,
654                     ZIO_PRIORITY_ASYNC_READ, ZIO_FLAG_CANFAIL, &flags, zb);
655                 if (err)
656                         return (err);
657
658                 /* recursively visit blocks below this */
659                 cbp = buf->b_data;
660                 for (i = 0; i < epb; i++, cbp++) {
661                         zbookmark_t czb;
662
663                         SET_BOOKMARK(&czb, zb->zb_objset, zb->zb_object,
664                             zb->zb_level - 1,
665                             zb->zb_blkid * epb + i);
666                         err = visit_indirect(spa, dnp, cbp, &czb);
667                         if (err)
668                                 break;
669                         fill += cbp->blk_fill;
670                 }
671                 ASSERT3U(fill, ==, bp->blk_fill);
672                 (void) arc_buf_remove_ref(buf, &buf);
673         }
674
675         return (err);
676 }
677
678 /*ARGSUSED*/
679 static void
680 dump_indirect(dnode_t *dn)
681 {
682         dnode_phys_t *dnp = dn->dn_phys;
683         int j;
684         zbookmark_t czb;
685
686         (void) printf("Indirect blocks:\n");
687
688         SET_BOOKMARK(&czb, dmu_objset_id(&dn->dn_objset->os),
689             dn->dn_object, dnp->dn_nlevels - 1, 0);
690         for (j = 0; j < dnp->dn_nblkptr; j++) {
691                 czb.zb_blkid = j;
692                 (void) visit_indirect(dmu_objset_spa(&dn->dn_objset->os), dnp,
693                     &dnp->dn_blkptr[j], &czb);
694         }
695
696         (void) printf("\n");
697 }
698
699 /*ARGSUSED*/
700 static void
701 dump_dsl_dir(objset_t *os, uint64_t object, void *data, size_t size)
702 {
703         dsl_dir_phys_t *dd = data;
704         time_t crtime;
705         char nice[6];
706
707         if (dd == NULL)
708                 return;
709
710         ASSERT3U(size, >=, sizeof (dsl_dir_phys_t));
711
712         crtime = dd->dd_creation_time;
713         (void) printf("\t\tcreation_time = %s", ctime(&crtime));
714         (void) printf("\t\thead_dataset_obj = %llu\n",
715             (u_longlong_t)dd->dd_head_dataset_obj);
716         (void) printf("\t\tparent_dir_obj = %llu\n",
717             (u_longlong_t)dd->dd_parent_obj);
718         (void) printf("\t\torigin_obj = %llu\n",
719             (u_longlong_t)dd->dd_origin_obj);
720         (void) printf("\t\tchild_dir_zapobj = %llu\n",
721             (u_longlong_t)dd->dd_child_dir_zapobj);
722         nicenum(dd->dd_used_bytes, nice);
723         (void) printf("\t\tused_bytes = %s\n", nice);
724         nicenum(dd->dd_compressed_bytes, nice);
725         (void) printf("\t\tcompressed_bytes = %s\n", nice);
726         nicenum(dd->dd_uncompressed_bytes, nice);
727         (void) printf("\t\tuncompressed_bytes = %s\n", nice);
728         nicenum(dd->dd_quota, nice);
729         (void) printf("\t\tquota = %s\n", nice);
730         nicenum(dd->dd_reserved, nice);
731         (void) printf("\t\treserved = %s\n", nice);
732         (void) printf("\t\tprops_zapobj = %llu\n",
733             (u_longlong_t)dd->dd_props_zapobj);
734         (void) printf("\t\tdeleg_zapobj = %llu\n",
735             (u_longlong_t)dd->dd_deleg_zapobj);
736         (void) printf("\t\tflags = %llx\n",
737             (u_longlong_t)dd->dd_flags);
738
739 #define DO(which) \
740         nicenum(dd->dd_used_breakdown[DD_USED_ ## which], nice); \
741         (void) printf("\t\tused_breakdown[" #which "] = %s\n", nice)
742         DO(HEAD);
743         DO(SNAP);
744         DO(CHILD);
745         DO(CHILD_RSRV);
746         DO(REFRSRV);
747 #undef DO
748 }
749
750 /*ARGSUSED*/
751 static void
752 dump_dsl_dataset(objset_t *os, uint64_t object, void *data, size_t size)
753 {
754         dsl_dataset_phys_t *ds = data;
755         time_t crtime;
756         char used[6], compressed[6], uncompressed[6], unique[6];
757         char blkbuf[BP_SPRINTF_LEN];
758
759         if (ds == NULL)
760                 return;
761
762         ASSERT(size == sizeof (*ds));
763         crtime = ds->ds_creation_time;
764         nicenum(ds->ds_used_bytes, used);
765         nicenum(ds->ds_compressed_bytes, compressed);
766         nicenum(ds->ds_uncompressed_bytes, uncompressed);
767         nicenum(ds->ds_unique_bytes, unique);
768         sprintf_blkptr(blkbuf, BP_SPRINTF_LEN, &ds->ds_bp);
769
770         (void) printf("\t\tdir_obj = %llu\n",
771             (u_longlong_t)ds->ds_dir_obj);
772         (void) printf("\t\tprev_snap_obj = %llu\n",
773             (u_longlong_t)ds->ds_prev_snap_obj);
774         (void) printf("\t\tprev_snap_txg = %llu\n",
775             (u_longlong_t)ds->ds_prev_snap_txg);
776         (void) printf("\t\tnext_snap_obj = %llu\n",
777             (u_longlong_t)ds->ds_next_snap_obj);
778         (void) printf("\t\tsnapnames_zapobj = %llu\n",
779             (u_longlong_t)ds->ds_snapnames_zapobj);
780         (void) printf("\t\tnum_children = %llu\n",
781             (u_longlong_t)ds->ds_num_children);
782         (void) printf("\t\tcreation_time = %s", ctime(&crtime));
783         (void) printf("\t\tcreation_txg = %llu\n",
784             (u_longlong_t)ds->ds_creation_txg);
785         (void) printf("\t\tdeadlist_obj = %llu\n",
786             (u_longlong_t)ds->ds_deadlist_obj);
787         (void) printf("\t\tused_bytes = %s\n", used);
788         (void) printf("\t\tcompressed_bytes = %s\n", compressed);
789         (void) printf("\t\tuncompressed_bytes = %s\n", uncompressed);
790         (void) printf("\t\tunique = %s\n", unique);
791         (void) printf("\t\tfsid_guid = %llu\n",
792             (u_longlong_t)ds->ds_fsid_guid);
793         (void) printf("\t\tguid = %llu\n",
794             (u_longlong_t)ds->ds_guid);
795         (void) printf("\t\tflags = %llx\n",
796             (u_longlong_t)ds->ds_flags);
797         (void) printf("\t\tnext_clones_obj = %llu\n",
798             (u_longlong_t)ds->ds_next_clones_obj);
799         (void) printf("\t\tprops_obj = %llu\n",
800             (u_longlong_t)ds->ds_props_obj);
801         (void) printf("\t\tbp = %s\n", blkbuf);
802 }
803
804 static void
805 dump_bplist(objset_t *mos, uint64_t object, char *name)
806 {
807         bplist_t bpl = { 0 };
808         blkptr_t blk, *bp = &blk;
809         uint64_t itor = 0;
810         char bytes[6];
811         char comp[6];
812         char uncomp[6];
813
814         if (dump_opt['d'] < 3)
815                 return;
816
817         mutex_init(&bpl.bpl_lock, NULL, MUTEX_DEFAULT, NULL);
818         VERIFY(0 == bplist_open(&bpl, mos, object));
819         if (bplist_empty(&bpl)) {
820                 bplist_close(&bpl);
821                 mutex_destroy(&bpl.bpl_lock);
822                 return;
823         }
824
825         nicenum(bpl.bpl_phys->bpl_bytes, bytes);
826         if (bpl.bpl_dbuf->db_size == sizeof (bplist_phys_t)) {
827                 nicenum(bpl.bpl_phys->bpl_comp, comp);
828                 nicenum(bpl.bpl_phys->bpl_uncomp, uncomp);
829                 (void) printf("\n    %s: %llu entries, %s (%s/%s comp)\n",
830                     name, (u_longlong_t)bpl.bpl_phys->bpl_entries,
831                     bytes, comp, uncomp);
832         } else {
833                 (void) printf("\n    %s: %llu entries, %s\n",
834                     name, (u_longlong_t)bpl.bpl_phys->bpl_entries, bytes);
835         }
836
837         if (dump_opt['d'] < 5) {
838                 bplist_close(&bpl);
839                 mutex_destroy(&bpl.bpl_lock);
840                 return;
841         }
842
843         (void) printf("\n");
844
845         while (bplist_iterate(&bpl, &itor, bp) == 0) {
846                 char blkbuf[BP_SPRINTF_LEN];
847
848                 ASSERT(bp->blk_birth != 0);
849                 sprintf_blkptr_compact(blkbuf, bp, dump_opt['d'] > 5 ? 1 : 0);
850                 (void) printf("\tItem %3llu: %s\n",
851                     (u_longlong_t)itor - 1, blkbuf);
852         }
853
854         bplist_close(&bpl);
855         mutex_destroy(&bpl.bpl_lock);
856 }
857
858 static avl_tree_t idx_tree;
859 static avl_tree_t domain_tree;
860 static boolean_t fuid_table_loaded;
861
862 static void
863 fuid_table_destroy()
864 {
865         if (fuid_table_loaded) {
866                 zfs_fuid_table_destroy(&idx_tree, &domain_tree);
867                 fuid_table_loaded = B_FALSE;
868         }
869 }
870
871 /*
872  * print uid or gid information.
873  * For normal POSIX id just the id is printed in decimal format.
874  * For CIFS files with FUID the fuid is printed in hex followed by
875  * the doman-rid string.
876  */
877 static void
878 print_idstr(uint64_t id, const char *id_type)
879 {
880         if (FUID_INDEX(id)) {
881                 char *domain;
882
883                 domain = zfs_fuid_idx_domain(&idx_tree, FUID_INDEX(id));
884                 (void) printf("\t%s     %llx [%s-%d]\n", id_type,
885                     (u_longlong_t)id, domain, (int)FUID_RID(id));
886         } else {
887                 (void) printf("\t%s     %llu\n", id_type, (u_longlong_t)id);
888         }
889
890 }
891
892 static void
893 dump_uidgid(objset_t *os, znode_phys_t *zp)
894 {
895         uint32_t uid_idx, gid_idx;
896
897         uid_idx = FUID_INDEX(zp->zp_uid);
898         gid_idx = FUID_INDEX(zp->zp_gid);
899
900         /* Load domain table, if not already loaded */
901         if (!fuid_table_loaded && (uid_idx || gid_idx)) {
902                 uint64_t fuid_obj;
903
904                 /* first find the fuid object.  It lives in the master node */
905                 VERIFY(zap_lookup(os, MASTER_NODE_OBJ, ZFS_FUID_TABLES,
906                     8, 1, &fuid_obj) == 0);
907                 (void) zfs_fuid_table_load(os, fuid_obj,
908                     &idx_tree, &domain_tree);
909                 fuid_table_loaded = B_TRUE;
910         }
911
912         print_idstr(zp->zp_uid, "uid");
913         print_idstr(zp->zp_gid, "gid");
914 }
915
916 /*ARGSUSED*/
917 static void
918 dump_znode(objset_t *os, uint64_t object, void *data, size_t size)
919 {
920         znode_phys_t *zp = data;
921         time_t z_crtime, z_atime, z_mtime, z_ctime;
922         char path[MAXPATHLEN * 2];      /* allow for xattr and failure prefix */
923         int error;
924
925         ASSERT(size >= sizeof (znode_phys_t));
926
927         error = zfs_obj_to_path(os, object, path, sizeof (path));
928         if (error != 0) {
929                 (void) snprintf(path, sizeof (path), "\?\?\?<object#%llu>",
930                     (u_longlong_t)object);
931         }
932
933         if (dump_opt['d'] < 3) {
934                 (void) printf("\t%s\n", path);
935                 return;
936         }
937
938         z_crtime = (time_t)zp->zp_crtime[0];
939         z_atime = (time_t)zp->zp_atime[0];
940         z_mtime = (time_t)zp->zp_mtime[0];
941         z_ctime = (time_t)zp->zp_ctime[0];
942
943         (void) printf("\tpath   %s\n", path);
944         dump_uidgid(os, zp);
945         (void) printf("\tatime  %s", ctime(&z_atime));
946         (void) printf("\tmtime  %s", ctime(&z_mtime));
947         (void) printf("\tctime  %s", ctime(&z_ctime));
948         (void) printf("\tcrtime %s", ctime(&z_crtime));
949         (void) printf("\tgen    %llu\n", (u_longlong_t)zp->zp_gen);
950         (void) printf("\tmode   %llo\n", (u_longlong_t)zp->zp_mode);
951         (void) printf("\tsize   %llu\n", (u_longlong_t)zp->zp_size);
952         (void) printf("\tparent %llu\n", (u_longlong_t)zp->zp_parent);
953         (void) printf("\tlinks  %llu\n", (u_longlong_t)zp->zp_links);
954         (void) printf("\txattr  %llu\n", (u_longlong_t)zp->zp_xattr);
955         (void) printf("\trdev   0x%016llx\n", (u_longlong_t)zp->zp_rdev);
956 }
957
958 /*ARGSUSED*/
959 static void
960 dump_acl(objset_t *os, uint64_t object, void *data, size_t size)
961 {
962 }
963
964 /*ARGSUSED*/
965 static void
966 dump_dmu_objset(objset_t *os, uint64_t object, void *data, size_t size)
967 {
968 }
969
970 static object_viewer_t *object_viewer[DMU_OT_NUMTYPES] = {
971         dump_none,              /* unallocated                  */
972         dump_zap,               /* object directory             */
973         dump_uint64,            /* object array                 */
974         dump_none,              /* packed nvlist                */
975         dump_packed_nvlist,     /* packed nvlist size           */
976         dump_none,              /* bplist                       */
977         dump_none,              /* bplist header                */
978         dump_none,              /* SPA space map header         */
979         dump_none,              /* SPA space map                */
980         dump_none,              /* ZIL intent log               */
981         dump_dnode,             /* DMU dnode                    */
982         dump_dmu_objset,        /* DMU objset                   */
983         dump_dsl_dir,           /* DSL directory                */
984         dump_zap,               /* DSL directory child map      */
985         dump_zap,               /* DSL dataset snap map         */
986         dump_zap,               /* DSL props                    */
987         dump_dsl_dataset,       /* DSL dataset                  */
988         dump_znode,             /* ZFS znode                    */
989         dump_acl,               /* ZFS V0 ACL                   */
990         dump_uint8,             /* ZFS plain file               */
991         dump_zpldir,            /* ZFS directory                */
992         dump_zap,               /* ZFS master node              */
993         dump_zap,               /* ZFS delete queue             */
994         dump_uint8,             /* zvol object                  */
995         dump_zap,               /* zvol prop                    */
996         dump_uint8,             /* other uint8[]                */
997         dump_uint64,            /* other uint64[]               */
998         dump_zap,               /* other ZAP                    */
999         dump_zap,               /* persistent error log         */
1000         dump_uint8,             /* SPA history                  */
1001         dump_uint64,            /* SPA history offsets          */
1002         dump_zap,               /* Pool properties              */
1003         dump_zap,               /* DSL permissions              */
1004         dump_acl,               /* ZFS ACL                      */
1005         dump_uint8,             /* ZFS SYSACL                   */
1006         dump_none,              /* FUID nvlist                  */
1007         dump_packed_nvlist,     /* FUID nvlist size             */
1008         dump_zap,               /* DSL dataset next clones      */
1009         dump_zap,               /* DSL scrub queue              */
1010 };
1011
1012 static void
1013 dump_object(objset_t *os, uint64_t object, int verbosity, int *print_header)
1014 {
1015         dmu_buf_t *db = NULL;
1016         dmu_object_info_t doi;
1017         dnode_t *dn;
1018         void *bonus = NULL;
1019         size_t bsize = 0;
1020         char iblk[6], dblk[6], lsize[6], asize[6], bonus_size[6], segsize[6];
1021         char aux[50];
1022         int error;
1023
1024         if (*print_header) {
1025                 (void) printf("\n    Object  lvl   iblk   dblk  lsize"
1026                     "  asize  type\n");
1027                 *print_header = 0;
1028         }
1029
1030         if (object == 0) {
1031                 dn = os->os->os_meta_dnode;
1032         } else {
1033                 error = dmu_bonus_hold(os, object, FTAG, &db);
1034                 if (error)
1035                         fatal("dmu_bonus_hold(%llu) failed, errno %u",
1036                             object, error);
1037                 bonus = db->db_data;
1038                 bsize = db->db_size;
1039                 dn = ((dmu_buf_impl_t *)db)->db_dnode;
1040         }
1041         dmu_object_info_from_dnode(dn, &doi);
1042
1043         nicenum(doi.doi_metadata_block_size, iblk);
1044         nicenum(doi.doi_data_block_size, dblk);
1045         nicenum(doi.doi_data_block_size * (doi.doi_max_block_offset + 1),
1046             lsize);
1047         nicenum(doi.doi_physical_blks << 9, asize);
1048         nicenum(doi.doi_bonus_size, bonus_size);
1049
1050         aux[0] = '\0';
1051
1052         if (doi.doi_checksum != ZIO_CHECKSUM_INHERIT || verbosity >= 6) {
1053                 (void) snprintf(aux + strlen(aux), sizeof (aux), " (K=%s)",
1054                     zio_checksum_table[doi.doi_checksum].ci_name);
1055         }
1056
1057         if (doi.doi_compress != ZIO_COMPRESS_INHERIT || verbosity >= 6) {
1058                 (void) snprintf(aux + strlen(aux), sizeof (aux), " (Z=%s)",
1059                     zio_compress_table[doi.doi_compress].ci_name);
1060         }
1061
1062         (void) printf("%10lld  %3u  %5s  %5s  %5s  %5s  %s%s\n",
1063             (u_longlong_t)object, doi.doi_indirection, iblk, dblk, lsize,
1064             asize, dmu_ot[doi.doi_type].ot_name, aux);
1065
1066         if (doi.doi_bonus_type != DMU_OT_NONE && verbosity > 3) {
1067                 (void) printf("%10s  %3s  %5s  %5s  %5s  %5s  %s\n",
1068                     "", "", "", "", bonus_size, "bonus",
1069                     dmu_ot[doi.doi_bonus_type].ot_name);
1070         }
1071
1072         if (verbosity >= 4) {
1073                 object_viewer[doi.doi_bonus_type](os, object, bonus, bsize);
1074                 object_viewer[doi.doi_type](os, object, NULL, 0);
1075                 *print_header = 1;
1076         }
1077
1078         if (verbosity >= 5)
1079                 dump_indirect(dn);
1080
1081         if (verbosity >= 5) {
1082                 /*
1083                  * Report the list of segments that comprise the object.
1084                  */
1085                 uint64_t start = 0;
1086                 uint64_t end;
1087                 uint64_t blkfill = 1;
1088                 int minlvl = 1;
1089
1090                 if (dn->dn_type == DMU_OT_DNODE) {
1091                         minlvl = 0;
1092                         blkfill = DNODES_PER_BLOCK;
1093                 }
1094
1095                 for (;;) {
1096                         error = dnode_next_offset(dn,
1097                             0, &start, minlvl, blkfill, 0);
1098                         if (error)
1099                                 break;
1100                         end = start;
1101                         error = dnode_next_offset(dn,
1102                             DNODE_FIND_HOLE, &end, minlvl, blkfill, 0);
1103                         nicenum(end - start, segsize);
1104                         (void) printf("\t\tsegment [%016llx, %016llx)"
1105                             " size %5s\n", (u_longlong_t)start,
1106                             (u_longlong_t)end, segsize);
1107                         if (error)
1108                                 break;
1109                         start = end;
1110                 }
1111         }
1112
1113         if (db != NULL)
1114                 dmu_buf_rele(db, FTAG);
1115 }
1116
1117 static char *objset_types[DMU_OST_NUMTYPES] = {
1118         "NONE", "META", "ZPL", "ZVOL", "OTHER", "ANY" };
1119
1120 static void
1121 dump_dir(objset_t *os)
1122 {
1123         dmu_objset_stats_t dds;
1124         uint64_t object, object_count;
1125         uint64_t refdbytes, usedobjs, scratch;
1126         char numbuf[8];
1127         char blkbuf[BP_SPRINTF_LEN];
1128         char osname[MAXNAMELEN];
1129         char *type = "UNKNOWN";
1130         int verbosity = dump_opt['d'];
1131         int print_header = 1;
1132         int i, error;
1133
1134         dmu_objset_fast_stat(os, &dds);
1135
1136         if (dds.dds_type < DMU_OST_NUMTYPES)
1137                 type = objset_types[dds.dds_type];
1138
1139         if (dds.dds_type == DMU_OST_META) {
1140                 dds.dds_creation_txg = TXG_INITIAL;
1141                 usedobjs = os->os->os_rootbp->blk_fill;
1142                 refdbytes = os->os->os_spa->spa_dsl_pool->
1143                     dp_mos_dir->dd_phys->dd_used_bytes;
1144         } else {
1145                 dmu_objset_space(os, &refdbytes, &scratch, &usedobjs, &scratch);
1146         }
1147
1148         ASSERT3U(usedobjs, ==, os->os->os_rootbp->blk_fill);
1149
1150         nicenum(refdbytes, numbuf);
1151
1152         if (verbosity >= 4) {
1153                 (void) strcpy(blkbuf, ", rootbp ");
1154                 sprintf_blkptr(blkbuf + strlen(blkbuf),
1155                     BP_SPRINTF_LEN - strlen(blkbuf), os->os->os_rootbp);
1156         } else {
1157                 blkbuf[0] = '\0';
1158         }
1159
1160         dmu_objset_name(os, osname);
1161
1162         (void) printf("Dataset %s [%s], ID %llu, cr_txg %llu, "
1163             "%s, %llu objects%s\n",
1164             osname, type, (u_longlong_t)dmu_objset_id(os),
1165             (u_longlong_t)dds.dds_creation_txg,
1166             numbuf, (u_longlong_t)usedobjs, blkbuf);
1167
1168         dump_intent_log(dmu_objset_zil(os));
1169
1170         if (dmu_objset_ds(os) != NULL)
1171                 dump_bplist(dmu_objset_pool(os)->dp_meta_objset,
1172                     dmu_objset_ds(os)->ds_phys->ds_deadlist_obj, "Deadlist");
1173
1174         if (verbosity < 2)
1175                 return;
1176
1177         if (os->os->os_rootbp->blk_birth == 0)
1178                 return;
1179
1180         if (zopt_objects != 0) {
1181                 for (i = 0; i < zopt_objects; i++)
1182                         dump_object(os, zopt_object[i], verbosity,
1183                             &print_header);
1184                 (void) printf("\n");
1185                 return;
1186         }
1187
1188         dump_object(os, 0, verbosity, &print_header);
1189         object_count = 1;
1190
1191         object = 0;
1192         while ((error = dmu_object_next(os, &object, B_FALSE, 0)) == 0) {
1193                 dump_object(os, object, verbosity, &print_header);
1194                 object_count++;
1195         }
1196
1197         ASSERT3U(object_count, ==, usedobjs);
1198
1199         (void) printf("\n");
1200
1201         if (error != ESRCH)
1202                 fatal("dmu_object_next() = %d", error);
1203 }
1204
1205 static void
1206 dump_uberblock(uberblock_t *ub)
1207 {
1208         time_t timestamp = ub->ub_timestamp;
1209
1210         (void) printf("Uberblock\n\n");
1211         (void) printf("\tmagic = %016llx\n", (u_longlong_t)ub->ub_magic);
1212         (void) printf("\tversion = %llu\n", (u_longlong_t)ub->ub_version);
1213         (void) printf("\ttxg = %llu\n", (u_longlong_t)ub->ub_txg);
1214         (void) printf("\tguid_sum = %llu\n", (u_longlong_t)ub->ub_guid_sum);
1215         (void) printf("\ttimestamp = %llu UTC = %s",
1216             (u_longlong_t)ub->ub_timestamp, asctime(localtime(&timestamp)));
1217         if (dump_opt['u'] >= 3) {
1218                 char blkbuf[BP_SPRINTF_LEN];
1219                 sprintf_blkptr(blkbuf, BP_SPRINTF_LEN, &ub->ub_rootbp);
1220                 (void) printf("\trootbp = %s\n", blkbuf);
1221         }
1222         (void) printf("\n");
1223 }
1224
1225 static void
1226 dump_config(const char *pool)
1227 {
1228         spa_t *spa = NULL;
1229
1230         mutex_enter(&spa_namespace_lock);
1231         while ((spa = spa_next(spa)) != NULL) {
1232                 if (pool == NULL)
1233                         (void) printf("%s\n", spa_name(spa));
1234                 if (pool == NULL || strcmp(pool, spa_name(spa)) == 0)
1235                         dump_nvlist(spa->spa_config, 4);
1236         }
1237         mutex_exit(&spa_namespace_lock);
1238 }
1239
1240 static void
1241 dump_cachefile(const char *cachefile)
1242 {
1243         int fd;
1244         struct stat64 statbuf;
1245         char *buf;
1246         nvlist_t *config;
1247
1248         if ((fd = open64(cachefile, O_RDONLY)) < 0) {
1249                 (void) printf("cannot open '%s': %s\n", cachefile,
1250                     strerror(errno));
1251                 exit(1);
1252         }
1253
1254         if (fstat64(fd, &statbuf) != 0) {
1255                 (void) printf("failed to stat '%s': %s\n", cachefile,
1256                     strerror(errno));
1257                 exit(1);
1258         }
1259
1260         if ((buf = malloc(statbuf.st_size)) == NULL) {
1261                 (void) fprintf(stderr, "failed to allocate %llu bytes\n",
1262                     (u_longlong_t)statbuf.st_size);
1263                 exit(1);
1264         }
1265
1266         if (read(fd, buf, statbuf.st_size) != statbuf.st_size) {
1267                 (void) fprintf(stderr, "failed to read %llu bytes\n",
1268                     (u_longlong_t)statbuf.st_size);
1269                 exit(1);
1270         }
1271
1272         (void) close(fd);
1273
1274         if (nvlist_unpack(buf, statbuf.st_size, &config, 0) != 0) {
1275                 (void) fprintf(stderr, "failed to unpack nvlist\n");
1276                 exit(1);
1277         }
1278
1279         free(buf);
1280
1281         dump_nvlist(config, 0);
1282
1283         nvlist_free(config);
1284 }
1285
1286 static void
1287 dump_label(const char *dev)
1288 {
1289         int fd;
1290         vdev_label_t label;
1291         char *buf = label.vl_vdev_phys.vp_nvlist;
1292         size_t buflen = sizeof (label.vl_vdev_phys.vp_nvlist);
1293         struct stat64 statbuf;
1294         uint64_t psize;
1295         int l;
1296
1297         if ((fd = open64(dev, O_RDONLY)) < 0) {
1298                 (void) printf("cannot open '%s': %s\n", dev, strerror(errno));
1299                 exit(1);
1300         }
1301
1302         if (fstat64(fd, &statbuf) != 0) {
1303                 (void) printf("failed to stat '%s': %s\n", dev,
1304                     strerror(errno));
1305                 exit(1);
1306         }
1307
1308         if (S_ISCHR(statbuf.st_mode)) {
1309                 if (ioctl(fd, DIOCGMEDIASIZE, &statbuf.st_size) == -1) {
1310                         (void) printf("failed to get size of '%s': %s\n", dev,
1311                             strerror(errno));
1312                         exit(1);
1313                 }
1314         }
1315
1316         psize = statbuf.st_size;
1317         psize = P2ALIGN(psize, (uint64_t)sizeof (vdev_label_t));
1318
1319         for (l = 0; l < VDEV_LABELS; l++) {
1320
1321                 nvlist_t *config = NULL;
1322
1323                 (void) printf("--------------------------------------------\n");
1324                 (void) printf("LABEL %d\n", l);
1325                 (void) printf("--------------------------------------------\n");
1326
1327                 if (pread64(fd, &label, sizeof (label),
1328                     vdev_label_offset(psize, l, 0)) != sizeof (label)) {
1329                         (void) printf("failed to read label %d\n", l);
1330                         continue;
1331                 }
1332
1333                 if (nvlist_unpack(buf, buflen, &config, 0) != 0) {
1334                         (void) printf("failed to unpack label %d\n", l);
1335                         continue;
1336                 }
1337                 dump_nvlist(config, 4);
1338                 nvlist_free(config);
1339         }
1340 }
1341
1342 /*ARGSUSED*/
1343 static int
1344 dump_one_dir(char *dsname, void *arg)
1345 {
1346         int error;
1347         objset_t *os;
1348
1349         error = dmu_objset_open(dsname, DMU_OST_ANY,
1350             DS_MODE_USER | DS_MODE_READONLY, &os);
1351         if (error) {
1352                 (void) printf("Could not open %s\n", dsname);
1353                 return (0);
1354         }
1355         dump_dir(os);
1356         dmu_objset_close(os);
1357         fuid_table_destroy();
1358         return (0);
1359 }
1360
1361 static void
1362 zdb_leak(space_map_t *sm, uint64_t start, uint64_t size)
1363 {
1364         vdev_t *vd = sm->sm_ppd;
1365
1366         (void) printf("leaked space: vdev %llu, offset 0x%llx, size %llu\n",
1367             (u_longlong_t)vd->vdev_id, (u_longlong_t)start, (u_longlong_t)size);
1368 }
1369
1370 /* ARGSUSED */
1371 static void
1372 zdb_space_map_load(space_map_t *sm)
1373 {
1374 }
1375
1376 static void
1377 zdb_space_map_unload(space_map_t *sm)
1378 {
1379         space_map_vacate(sm, zdb_leak, sm);
1380 }
1381
1382 /* ARGSUSED */
1383 static void
1384 zdb_space_map_claim(space_map_t *sm, uint64_t start, uint64_t size)
1385 {
1386 }
1387
1388 static space_map_ops_t zdb_space_map_ops = {
1389         zdb_space_map_load,
1390         zdb_space_map_unload,
1391         NULL,   /* alloc */
1392         zdb_space_map_claim,
1393         NULL    /* free */
1394 };
1395
1396 static void
1397 zdb_leak_init(spa_t *spa)
1398 {
1399         vdev_t *rvd = spa->spa_root_vdev;
1400
1401         for (int c = 0; c < rvd->vdev_children; c++) {
1402                 vdev_t *vd = rvd->vdev_child[c];
1403                 for (int m = 0; m < vd->vdev_ms_count; m++) {
1404                         metaslab_t *msp = vd->vdev_ms[m];
1405                         mutex_enter(&msp->ms_lock);
1406                         VERIFY(space_map_load(&msp->ms_map, &zdb_space_map_ops,
1407                             SM_ALLOC, &msp->ms_smo, spa->spa_meta_objset) == 0);
1408                         msp->ms_map.sm_ppd = vd;
1409                         mutex_exit(&msp->ms_lock);
1410                 }
1411         }
1412 }
1413
1414 static void
1415 zdb_leak_fini(spa_t *spa)
1416 {
1417         vdev_t *rvd = spa->spa_root_vdev;
1418
1419         for (int c = 0; c < rvd->vdev_children; c++) {
1420                 vdev_t *vd = rvd->vdev_child[c];
1421                 for (int m = 0; m < vd->vdev_ms_count; m++) {
1422                         metaslab_t *msp = vd->vdev_ms[m];
1423                         mutex_enter(&msp->ms_lock);
1424                         space_map_unload(&msp->ms_map);
1425                         mutex_exit(&msp->ms_lock);
1426                 }
1427         }
1428 }
1429
1430 /*
1431  * Verify that the sum of the sizes of all blocks in the pool adds up
1432  * to the SPA's sa_alloc total.
1433  */
1434 typedef struct zdb_blkstats {
1435         uint64_t        zb_asize;
1436         uint64_t        zb_lsize;
1437         uint64_t        zb_psize;
1438         uint64_t        zb_count;
1439 } zdb_blkstats_t;
1440
1441 #define DMU_OT_DEFERRED DMU_OT_NONE
1442 #define DMU_OT_TOTAL    DMU_OT_NUMTYPES
1443
1444 #define ZB_TOTAL        DN_MAX_LEVELS
1445
1446 typedef struct zdb_cb {
1447         zdb_blkstats_t  zcb_type[ZB_TOTAL + 1][DMU_OT_TOTAL + 1];
1448         uint64_t        zcb_errors[256];
1449         int             zcb_readfails;
1450         int             zcb_haderrors;
1451 } zdb_cb_t;
1452
1453 static void
1454 zdb_count_block(spa_t *spa, zdb_cb_t *zcb, blkptr_t *bp, dmu_object_type_t type)
1455 {
1456         for (int i = 0; i < 4; i++) {
1457                 int l = (i < 2) ? BP_GET_LEVEL(bp) : ZB_TOTAL;
1458                 int t = (i & 1) ? type : DMU_OT_TOTAL;
1459                 zdb_blkstats_t *zb = &zcb->zcb_type[l][t];
1460
1461                 zb->zb_asize += BP_GET_ASIZE(bp);
1462                 zb->zb_lsize += BP_GET_LSIZE(bp);
1463                 zb->zb_psize += BP_GET_PSIZE(bp);
1464                 zb->zb_count++;
1465         }
1466
1467         if (dump_opt['S']) {
1468                 boolean_t print_sig;
1469
1470                 print_sig = !zdb_sig_user_data || (BP_GET_LEVEL(bp) == 0 &&
1471                     BP_GET_TYPE(bp) == DMU_OT_PLAIN_FILE_CONTENTS);
1472
1473                 if (BP_GET_CHECKSUM(bp) < zdb_sig_cksumalg)
1474                         print_sig = B_FALSE;
1475
1476                 if (print_sig) {
1477                         (void) printf("%llu\t%lld\t%lld\t%s\t%s\t%s\t"
1478                             "%llx:%llx:%llx:%llx\n",
1479                             (u_longlong_t)BP_GET_LEVEL(bp),
1480                             (longlong_t)BP_GET_PSIZE(bp),
1481                             (longlong_t)BP_GET_NDVAS(bp),
1482                             dmu_ot[BP_GET_TYPE(bp)].ot_name,
1483                             zio_checksum_table[BP_GET_CHECKSUM(bp)].ci_name,
1484                             zio_compress_table[BP_GET_COMPRESS(bp)].ci_name,
1485                             (u_longlong_t)bp->blk_cksum.zc_word[0],
1486                             (u_longlong_t)bp->blk_cksum.zc_word[1],
1487                             (u_longlong_t)bp->blk_cksum.zc_word[2],
1488                             (u_longlong_t)bp->blk_cksum.zc_word[3]);
1489                 }
1490         }
1491
1492         VERIFY(zio_wait(zio_claim(NULL, spa, spa_first_txg(spa), bp,
1493             NULL, NULL, ZIO_FLAG_MUSTSUCCEED)) == 0);
1494 }
1495
1496 static int
1497 zdb_blkptr_cb(spa_t *spa, blkptr_t *bp, const zbookmark_t *zb,
1498     const dnode_phys_t *dnp, void *arg)
1499 {
1500         zdb_cb_t *zcb = arg;
1501         char blkbuf[BP_SPRINTF_LEN];
1502
1503         if (bp == NULL)
1504                 return (0);
1505
1506         zdb_count_block(spa, zcb, bp, BP_GET_TYPE(bp));
1507
1508         if (dump_opt['c'] || dump_opt['S']) {
1509                 int ioerr, size;
1510                 void *data;
1511
1512                 size = BP_GET_LSIZE(bp);
1513                 data = malloc(size);
1514                 ioerr = zio_wait(zio_read(NULL, spa, bp, data, size,
1515                     NULL, NULL, ZIO_PRIORITY_ASYNC_READ,
1516                     ZIO_FLAG_CANFAIL | ZIO_FLAG_SCRUB, zb));
1517                 free(data);
1518
1519                 /* We expect io errors on intent log */
1520                 if (ioerr && BP_GET_TYPE(bp) != DMU_OT_INTENT_LOG) {
1521                         zcb->zcb_haderrors = 1;
1522                         zcb->zcb_errors[ioerr]++;
1523
1524                         if (dump_opt['b'] >= 2)
1525                                 sprintf_blkptr(blkbuf, BP_SPRINTF_LEN, bp);
1526                         else
1527                                 blkbuf[0] = '\0';
1528
1529                         if (!dump_opt['S']) {
1530                                 (void) printf("zdb_blkptr_cb: "
1531                                     "Got error %d reading "
1532                                     "<%llu, %llu, %lld, %llx> %s -- skipping\n",
1533                                     ioerr,
1534                                     (u_longlong_t)zb->zb_objset,
1535                                     (u_longlong_t)zb->zb_object,
1536                                     (u_longlong_t)zb->zb_level,
1537                                     (u_longlong_t)zb->zb_blkid,
1538                                     blkbuf);
1539                         }
1540                 }
1541         }
1542
1543         zcb->zcb_readfails = 0;
1544
1545         if (dump_opt['b'] >= 4) {
1546                 sprintf_blkptr(blkbuf, BP_SPRINTF_LEN, bp);
1547                 (void) printf("objset %llu object %llu offset 0x%llx %s\n",
1548                     (u_longlong_t)zb->zb_objset,
1549                     (u_longlong_t)zb->zb_object,
1550                     (u_longlong_t)blkid2offset(dnp, zb->zb_level, zb->zb_blkid),
1551                     blkbuf);
1552         }
1553
1554         return (0);
1555 }
1556
1557 static int
1558 dump_block_stats(spa_t *spa)
1559 {
1560         zdb_cb_t zcb = { 0 };
1561         zdb_blkstats_t *zb, *tzb;
1562         uint64_t alloc, space, logalloc;
1563         vdev_t *rvd = spa->spa_root_vdev;
1564         int leaks = 0;
1565         int c, e;
1566
1567         if (!dump_opt['S']) {
1568                 (void) printf("\nTraversing all blocks to %sverify"
1569                     " nothing leaked ...\n",
1570                     dump_opt['c'] ? "verify checksums and " : "");
1571         }
1572
1573         /*
1574          * Load all space maps as SM_ALLOC maps, then traverse the pool
1575          * claiming each block we discover.  If the pool is perfectly
1576          * consistent, the space maps will be empty when we're done.
1577          * Anything left over is a leak; any block we can't claim (because
1578          * it's not part of any space map) is a double allocation,
1579          * reference to a freed block, or an unclaimed log block.
1580          */
1581         zdb_leak_init(spa);
1582
1583         /*
1584          * If there's a deferred-free bplist, process that first.
1585          */
1586         if (spa->spa_sync_bplist_obj != 0) {
1587                 bplist_t *bpl = &spa->spa_sync_bplist;
1588                 blkptr_t blk;
1589                 uint64_t itor = 0;
1590
1591                 VERIFY(0 == bplist_open(bpl, spa->spa_meta_objset,
1592                     spa->spa_sync_bplist_obj));
1593
1594                 while (bplist_iterate(bpl, &itor, &blk) == 0) {
1595                         if (dump_opt['b'] >= 4) {
1596                                 char blkbuf[BP_SPRINTF_LEN];
1597                                 sprintf_blkptr(blkbuf, BP_SPRINTF_LEN, &blk);
1598                                 (void) printf("[%s] %s\n",
1599                                     "deferred free", blkbuf);
1600                         }
1601                         zdb_count_block(spa, &zcb, &blk, DMU_OT_DEFERRED);
1602                 }
1603
1604                 bplist_close(bpl);
1605         }
1606
1607         zcb.zcb_haderrors |= traverse_pool(spa, zdb_blkptr_cb, &zcb);
1608
1609         if (zcb.zcb_haderrors && !dump_opt['S']) {
1610                 (void) printf("\nError counts:\n\n");
1611                 (void) printf("\t%5s  %s\n", "errno", "count");
1612                 for (e = 0; e < 256; e++) {
1613                         if (zcb.zcb_errors[e] != 0) {
1614                                 (void) printf("\t%5d  %llu\n",
1615                                     e, (u_longlong_t)zcb.zcb_errors[e]);
1616                         }
1617                 }
1618         }
1619
1620         /*
1621          * Report any leaked segments.
1622          */
1623         zdb_leak_fini(spa);
1624
1625         /*
1626          * If we're interested in printing out the blkptr signatures,
1627          * return now as we don't print out anything else (including
1628          * errors and leaks).
1629          */
1630         if (dump_opt['S'])
1631                 return (zcb.zcb_haderrors ? 3 : 0);
1632
1633         alloc = spa_get_alloc(spa);
1634         space = spa_get_space(spa);
1635
1636         /*
1637          * Log blocks allocated from a separate log device don't count
1638          * as part of the normal pool space; factor them in here.
1639          */
1640         logalloc = 0;
1641
1642         for (c = 0; c < rvd->vdev_children; c++)
1643                 if (rvd->vdev_child[c]->vdev_islog)
1644                         logalloc += rvd->vdev_child[c]->vdev_stat.vs_alloc;
1645
1646         tzb = &zcb.zcb_type[ZB_TOTAL][DMU_OT_TOTAL];
1647
1648         if (tzb->zb_asize == alloc + logalloc) {
1649                 (void) printf("\n\tNo leaks (block sum matches space"
1650                     " maps exactly)\n");
1651         } else {
1652                 (void) printf("block traversal size %llu != alloc %llu "
1653                     "(leaked %lld)\n",
1654                     (u_longlong_t)tzb->zb_asize,
1655                     (u_longlong_t)alloc + logalloc,
1656                     (u_longlong_t)(alloc + logalloc - tzb->zb_asize));
1657                 leaks = 1;
1658         }
1659
1660         if (tzb->zb_count == 0)
1661                 return (2);
1662
1663         (void) printf("\n");
1664         (void) printf("\tbp count:      %10llu\n",
1665             (u_longlong_t)tzb->zb_count);
1666         (void) printf("\tbp logical:    %10llu\t avg: %6llu\n",
1667             (u_longlong_t)tzb->zb_lsize,
1668             (u_longlong_t)(tzb->zb_lsize / tzb->zb_count));
1669         (void) printf("\tbp physical:   %10llu\t avg:"
1670             " %6llu\tcompression: %6.2f\n",
1671             (u_longlong_t)tzb->zb_psize,
1672             (u_longlong_t)(tzb->zb_psize / tzb->zb_count),
1673             (double)tzb->zb_lsize / tzb->zb_psize);
1674         (void) printf("\tbp allocated:  %10llu\t avg:"
1675             " %6llu\tcompression: %6.2f\n",
1676             (u_longlong_t)tzb->zb_asize,
1677             (u_longlong_t)(tzb->zb_asize / tzb->zb_count),
1678             (double)tzb->zb_lsize / tzb->zb_asize);
1679         (void) printf("\tSPA allocated: %10llu\tused: %5.2f%%\n",
1680             (u_longlong_t)alloc, 100.0 * alloc / space);
1681
1682         if (dump_opt['b'] >= 2) {
1683                 int l, t, level;
1684                 (void) printf("\nBlocks\tLSIZE\tPSIZE\tASIZE"
1685                     "\t  avg\t comp\t%%Total\tType\n");
1686
1687                 for (t = 0; t <= DMU_OT_NUMTYPES; t++) {
1688                         char csize[6], lsize[6], psize[6], asize[6], avg[6];
1689                         char *typename;
1690
1691                         typename = t == DMU_OT_DEFERRED ? "deferred free" :
1692                             t == DMU_OT_TOTAL ? "Total" : dmu_ot[t].ot_name;
1693
1694                         if (zcb.zcb_type[ZB_TOTAL][t].zb_asize == 0) {
1695                                 (void) printf("%6s\t%5s\t%5s\t%5s"
1696                                     "\t%5s\t%5s\t%6s\t%s\n",
1697                                     "-",
1698                                     "-",
1699                                     "-",
1700                                     "-",
1701                                     "-",
1702                                     "-",
1703                                     "-",
1704                                     typename);
1705                                 continue;
1706                         }
1707
1708                         for (l = ZB_TOTAL - 1; l >= -1; l--) {
1709                                 level = (l == -1 ? ZB_TOTAL : l);
1710                                 zb = &zcb.zcb_type[level][t];
1711
1712                                 if (zb->zb_asize == 0)
1713                                         continue;
1714
1715                                 if (dump_opt['b'] < 3 && level != ZB_TOTAL)
1716                                         continue;
1717
1718                                 if (level == 0 && zb->zb_asize ==
1719                                     zcb.zcb_type[ZB_TOTAL][t].zb_asize)
1720                                         continue;
1721
1722                                 nicenum(zb->zb_count, csize);
1723                                 nicenum(zb->zb_lsize, lsize);
1724                                 nicenum(zb->zb_psize, psize);
1725                                 nicenum(zb->zb_asize, asize);
1726                                 nicenum(zb->zb_asize / zb->zb_count, avg);
1727
1728                                 (void) printf("%6s\t%5s\t%5s\t%5s\t%5s"
1729                                     "\t%5.2f\t%6.2f\t",
1730                                     csize, lsize, psize, asize, avg,
1731                                     (double)zb->zb_lsize / zb->zb_psize,
1732                                     100.0 * zb->zb_asize / tzb->zb_asize);
1733
1734                                 if (level == ZB_TOTAL)
1735                                         (void) printf("%s\n", typename);
1736                                 else
1737                                         (void) printf("    L%d %s\n",
1738                                             level, typename);
1739                         }
1740                 }
1741         }
1742
1743         (void) printf("\n");
1744
1745         if (leaks)
1746                 return (2);
1747
1748         if (zcb.zcb_haderrors)
1749                 return (3);
1750
1751         return (0);
1752 }
1753
1754 static void
1755 dump_zpool(spa_t *spa)
1756 {
1757         dsl_pool_t *dp = spa_get_dsl(spa);
1758         int rc = 0;
1759
1760         if (dump_opt['u'])
1761                 dump_uberblock(&spa->spa_uberblock);
1762
1763         if (dump_opt['d'] || dump_opt['i']) {
1764                 dump_dir(dp->dp_meta_objset);
1765                 if (dump_opt['d'] >= 3) {
1766                         dump_bplist(dp->dp_meta_objset,
1767                             spa->spa_sync_bplist_obj, "Deferred frees");
1768                         dump_dtl(spa->spa_root_vdev, 0);
1769                         dump_metaslabs(spa);
1770                 }
1771                 (void) dmu_objset_find(spa_name(spa), dump_one_dir, NULL,
1772                     DS_FIND_SNAPSHOTS | DS_FIND_CHILDREN);
1773         }
1774
1775         if (dump_opt['b'] || dump_opt['c'] || dump_opt['S'])
1776                 rc = dump_block_stats(spa);
1777
1778         if (dump_opt['s'])
1779                 show_pool_stats(spa);
1780
1781         if (rc != 0)
1782                 exit(rc);
1783 }
1784
1785 #define ZDB_FLAG_CHECKSUM       0x0001
1786 #define ZDB_FLAG_DECOMPRESS     0x0002
1787 #define ZDB_FLAG_BSWAP          0x0004
1788 #define ZDB_FLAG_GBH            0x0008
1789 #define ZDB_FLAG_INDIRECT       0x0010
1790 #define ZDB_FLAG_PHYS           0x0020
1791 #define ZDB_FLAG_RAW            0x0040
1792 #define ZDB_FLAG_PRINT_BLKPTR   0x0080
1793
1794 int flagbits[256];
1795
1796 static void
1797 zdb_print_blkptr(blkptr_t *bp, int flags)
1798 {
1799         dva_t *dva = bp->blk_dva;
1800         int d;
1801
1802         if (flags & ZDB_FLAG_BSWAP)
1803                 byteswap_uint64_array((void *)bp, sizeof (blkptr_t));
1804         /*
1805          * Super-ick warning:  This code is also duplicated in
1806          * cmd/mdb/common/modules/zfs/zfs.c .  Yeah, I hate code
1807          * replication, too.
1808          */
1809         for (d = 0; d < BP_GET_NDVAS(bp); d++) {
1810                 (void) printf("\tDVA[%d]: vdev_id %lld / %llx\n", d,
1811                     (longlong_t)DVA_GET_VDEV(&dva[d]),
1812                     (longlong_t)DVA_GET_OFFSET(&dva[d]));
1813                 (void) printf("\tDVA[%d]:       GANG: %-5s  GRID:  %04llx\t"
1814                     "ASIZE: %llx\n", d,
1815                     DVA_GET_GANG(&dva[d]) ? "TRUE" : "FALSE",
1816                     (longlong_t)DVA_GET_GRID(&dva[d]),
1817                     (longlong_t)DVA_GET_ASIZE(&dva[d]));
1818                 (void) printf("\tDVA[%d]: :%llu:%llx:%llx:%s%s%s%s\n", d,
1819                     (u_longlong_t)DVA_GET_VDEV(&dva[d]),
1820                     (longlong_t)DVA_GET_OFFSET(&dva[d]),
1821                     (longlong_t)BP_GET_PSIZE(bp),
1822                     BP_SHOULD_BYTESWAP(bp) ? "e" : "",
1823                     !DVA_GET_GANG(&dva[d]) && BP_GET_LEVEL(bp) != 0 ?
1824                     "d" : "",
1825                     DVA_GET_GANG(&dva[d]) ? "g" : "",
1826                     BP_GET_COMPRESS(bp) != 0 ? "d" : "");
1827         }
1828         (void) printf("\tLSIZE:  %-16llx\t\tPSIZE: %llx\n",
1829             (longlong_t)BP_GET_LSIZE(bp), (longlong_t)BP_GET_PSIZE(bp));
1830         (void) printf("\tENDIAN: %6s\t\t\t\t\tTYPE:  %s\n",
1831             BP_GET_BYTEORDER(bp) ? "LITTLE" : "BIG",
1832             dmu_ot[BP_GET_TYPE(bp)].ot_name);
1833         (void) printf("\tBIRTH:  %-16llx   LEVEL: %-2llu\tFILL:  %llx\n",
1834             (u_longlong_t)bp->blk_birth, (u_longlong_t)BP_GET_LEVEL(bp),
1835             (u_longlong_t)bp->blk_fill);
1836         (void) printf("\tCKFUNC: %-16s\t\tCOMP:  %s\n",
1837             zio_checksum_table[BP_GET_CHECKSUM(bp)].ci_name,
1838             zio_compress_table[BP_GET_COMPRESS(bp)].ci_name);
1839         (void) printf("\tCKSUM:  %llx:%llx:%llx:%llx\n",
1840             (u_longlong_t)bp->blk_cksum.zc_word[0],
1841             (u_longlong_t)bp->blk_cksum.zc_word[1],
1842             (u_longlong_t)bp->blk_cksum.zc_word[2],
1843             (u_longlong_t)bp->blk_cksum.zc_word[3]);
1844 }
1845
1846 static void
1847 zdb_dump_indirect(blkptr_t *bp, int nbps, int flags)
1848 {
1849         int i;
1850
1851         for (i = 0; i < nbps; i++)
1852                 zdb_print_blkptr(&bp[i], flags);
1853 }
1854
1855 static void
1856 zdb_dump_gbh(void *buf, int flags)
1857 {
1858         zdb_dump_indirect((blkptr_t *)buf, SPA_GBH_NBLKPTRS, flags);
1859 }
1860
1861 static void
1862 zdb_dump_block_raw(void *buf, uint64_t size, int flags)
1863 {
1864         if (flags & ZDB_FLAG_BSWAP)
1865                 byteswap_uint64_array(buf, size);
1866         (void) write(2, buf, size);
1867 }
1868
1869 static void
1870 zdb_dump_block(char *label, void *buf, uint64_t size, int flags)
1871 {
1872         uint64_t *d = (uint64_t *)buf;
1873         int nwords = size / sizeof (uint64_t);
1874         int do_bswap = !!(flags & ZDB_FLAG_BSWAP);
1875         int i, j;
1876         char *hdr, *c;
1877
1878
1879         if (do_bswap)
1880                 hdr = " 7 6 5 4 3 2 1 0   f e d c b a 9 8";
1881         else
1882                 hdr = " 0 1 2 3 4 5 6 7   8 9 a b c d e f";
1883
1884         (void) printf("\n%s\n%6s   %s  0123456789abcdef\n", label, "", hdr);
1885
1886         for (i = 0; i < nwords; i += 2) {
1887                 (void) printf("%06llx:  %016llx  %016llx  ",
1888                     (u_longlong_t)(i * sizeof (uint64_t)),
1889                     (u_longlong_t)(do_bswap ? BSWAP_64(d[i]) : d[i]),
1890                     (u_longlong_t)(do_bswap ? BSWAP_64(d[i + 1]) : d[i + 1]));
1891
1892                 c = (char *)&d[i];
1893                 for (j = 0; j < 2 * sizeof (uint64_t); j++)
1894                         (void) printf("%c", isprint(c[j]) ? c[j] : '.');
1895                 (void) printf("\n");
1896         }
1897 }
1898
1899 /*
1900  * There are two acceptable formats:
1901  *      leaf_name         - For example: c1t0d0 or /tmp/ztest.0a
1902  *      child[.child]*    - For example: 0.1.1
1903  *
1904  * The second form can be used to specify arbitrary vdevs anywhere
1905  * in the heirarchy.  For example, in a pool with a mirror of
1906  * RAID-Zs, you can specify either RAID-Z vdev with 0.0 or 0.1 .
1907  */
1908 static vdev_t *
1909 zdb_vdev_lookup(vdev_t *vdev, char *path)
1910 {
1911         char *s, *p, *q;
1912         int i;
1913
1914         if (vdev == NULL)
1915                 return (NULL);
1916
1917         /* First, assume the x.x.x.x format */
1918         i = (int)strtoul(path, &s, 10);
1919         if (s == path || (s && *s != '.' && *s != '\0'))
1920                 goto name;
1921         if (i < 0 || i >= vdev->vdev_children)
1922                 return (NULL);
1923
1924         vdev = vdev->vdev_child[i];
1925         if (*s == '\0')
1926                 return (vdev);
1927         return (zdb_vdev_lookup(vdev, s+1));
1928
1929 name:
1930         for (i = 0; i < vdev->vdev_children; i++) {
1931                 vdev_t *vc = vdev->vdev_child[i];
1932
1933                 if (vc->vdev_path == NULL) {
1934                         vc = zdb_vdev_lookup(vc, path);
1935                         if (vc == NULL)
1936                                 continue;
1937                         else
1938                                 return (vc);
1939                 }
1940
1941                 p = strrchr(vc->vdev_path, '/');
1942                 p = p ? p + 1 : vc->vdev_path;
1943                 q = &vc->vdev_path[strlen(vc->vdev_path) - 2];
1944
1945                 if (strcmp(vc->vdev_path, path) == 0)
1946                         return (vc);
1947                 if (strcmp(p, path) == 0)
1948                         return (vc);
1949                 if (strcmp(q, "s0") == 0 && strncmp(p, path, q - p) == 0)
1950                         return (vc);
1951         }
1952
1953         return (NULL);
1954 }
1955
1956 /*
1957  * Read a block from a pool and print it out.  The syntax of the
1958  * block descriptor is:
1959  *
1960  *      pool:vdev_specifier:offset:size[:flags]
1961  *
1962  *      pool           - The name of the pool you wish to read from
1963  *      vdev_specifier - Which vdev (see comment for zdb_vdev_lookup)
1964  *      offset         - offset, in hex, in bytes
1965  *      size           - Amount of data to read, in hex, in bytes
1966  *      flags          - A string of characters specifying options
1967  *               b: Decode a blkptr at given offset within block
1968  *              *c: Calculate and display checksums
1969  *              *d: Decompress data before dumping
1970  *               e: Byteswap data before dumping
1971  *              *g: Display data as a gang block header
1972  *              *i: Display as an indirect block
1973  *               p: Do I/O to physical offset
1974  *               r: Dump raw data to stdout
1975  *
1976  *              * = not yet implemented
1977  */
1978 static void
1979 zdb_read_block(char *thing, spa_t **spap)
1980 {
1981         spa_t *spa = *spap;
1982         int flags = 0;
1983         uint64_t offset = 0, size = 0, blkptr_offset = 0;
1984         zio_t *zio;
1985         vdev_t *vd;
1986         void *buf;
1987         char *s, *p, *dup, *pool, *vdev, *flagstr;
1988         int i, error, zio_flags;
1989
1990         dup = strdup(thing);
1991         s = strtok(dup, ":");
1992         pool = s ? s : "";
1993         s = strtok(NULL, ":");
1994         vdev = s ? s : "";
1995         s = strtok(NULL, ":");
1996         offset = strtoull(s ? s : "", NULL, 16);
1997         s = strtok(NULL, ":");
1998         size = strtoull(s ? s : "", NULL, 16);
1999         s = strtok(NULL, ":");
2000         flagstr = s ? s : "";
2001
2002         s = NULL;
2003         if (size == 0)
2004                 s = "size must not be zero";
2005         if (!IS_P2ALIGNED(size, DEV_BSIZE))
2006                 s = "size must be a multiple of sector size";
2007         if (!IS_P2ALIGNED(offset, DEV_BSIZE))
2008                 s = "offset must be a multiple of sector size";
2009         if (s) {
2010                 (void) printf("Invalid block specifier: %s  - %s\n", thing, s);
2011                 free(dup);
2012                 return;
2013         }
2014
2015         for (s = strtok(flagstr, ":"); s; s = strtok(NULL, ":")) {
2016                 for (i = 0; flagstr[i]; i++) {
2017                         int bit = flagbits[(uchar_t)flagstr[i]];
2018
2019                         if (bit == 0) {
2020                                 (void) printf("***Invalid flag: %c\n",
2021                                     flagstr[i]);
2022                                 continue;
2023                         }
2024                         flags |= bit;
2025
2026                         /* If it's not something with an argument, keep going */
2027                         if ((bit & (ZDB_FLAG_CHECKSUM | ZDB_FLAG_DECOMPRESS |
2028                             ZDB_FLAG_PRINT_BLKPTR)) == 0)
2029                                 continue;
2030
2031                         p = &flagstr[i + 1];
2032                         if (bit == ZDB_FLAG_PRINT_BLKPTR)
2033                                 blkptr_offset = strtoull(p, &p, 16);
2034                         if (*p != ':' && *p != '\0') {
2035                                 (void) printf("***Invalid flag arg: '%s'\n", s);
2036                                 free(dup);
2037                                 return;
2038                         }
2039                 }
2040         }
2041
2042         if (spa == NULL || strcmp(spa_name(spa), pool) != 0) {
2043                 if (spa)
2044                         spa_close(spa, (void *)zdb_read_block);
2045                 error = spa_open(pool, spap, (void *)zdb_read_block);
2046                 if (error)
2047                         fatal("Failed to open pool '%s': %s",
2048                             pool, strerror(error));
2049                 spa = *spap;
2050         }
2051
2052         vd = zdb_vdev_lookup(spa->spa_root_vdev, vdev);
2053         if (vd == NULL) {
2054                 (void) printf("***Invalid vdev: %s\n", vdev);
2055                 free(dup);
2056                 return;
2057         } else {
2058                 if (vd->vdev_path)
2059                         (void) printf("Found vdev: %s\n", vd->vdev_path);
2060                 else
2061                         (void) printf("Found vdev type: %s\n",
2062                             vd->vdev_ops->vdev_op_type);
2063         }
2064
2065         buf = umem_alloc(size, UMEM_NOFAIL);
2066
2067         zio_flags = ZIO_FLAG_DONT_CACHE | ZIO_FLAG_DONT_QUEUE |
2068             ZIO_FLAG_DONT_PROPAGATE | ZIO_FLAG_DONT_RETRY;
2069
2070         spa_config_enter(spa, SCL_STATE, FTAG, RW_READER);
2071         zio = zio_root(spa, NULL, NULL, 0);
2072         /* XXX todo - cons up a BP so RAID-Z will be happy */
2073         zio_nowait(zio_vdev_child_io(zio, NULL, vd, offset, buf, size,
2074             ZIO_TYPE_READ, ZIO_PRIORITY_SYNC_READ, zio_flags, NULL, NULL));
2075         error = zio_wait(zio);
2076         spa_config_exit(spa, SCL_STATE, FTAG);
2077
2078         if (error) {
2079                 (void) printf("Read of %s failed, error: %d\n", thing, error);
2080                 goto out;
2081         }
2082
2083         if (flags & ZDB_FLAG_PRINT_BLKPTR)
2084                 zdb_print_blkptr((blkptr_t *)(void *)
2085                     ((uintptr_t)buf + (uintptr_t)blkptr_offset), flags);
2086         else if (flags & ZDB_FLAG_RAW)
2087                 zdb_dump_block_raw(buf, size, flags);
2088         else if (flags & ZDB_FLAG_INDIRECT)
2089                 zdb_dump_indirect((blkptr_t *)buf, size / sizeof (blkptr_t),
2090                     flags);
2091         else if (flags & ZDB_FLAG_GBH)
2092                 zdb_dump_gbh(buf, flags);
2093         else
2094                 zdb_dump_block(thing, buf, size, flags);
2095
2096 out:
2097         umem_free(buf, size);
2098         free(dup);
2099 }
2100
2101 static boolean_t
2102 nvlist_string_match(nvlist_t *config, char *name, char *tgt)
2103 {
2104         char *s;
2105
2106         if (nvlist_lookup_string(config, name, &s) != 0)
2107                 return (B_FALSE);
2108
2109         return (strcmp(s, tgt) == 0);
2110 }
2111
2112 static boolean_t
2113 nvlist_uint64_match(nvlist_t *config, char *name, uint64_t tgt)
2114 {
2115         uint64_t val;
2116
2117         if (nvlist_lookup_uint64(config, name, &val) != 0)
2118                 return (B_FALSE);
2119
2120         return (val == tgt);
2121 }
2122
2123 static boolean_t
2124 vdev_child_guid_match(nvlist_t *vdev, uint64_t guid)
2125 {
2126         nvlist_t **child;
2127         uint_t c, children;
2128
2129         verify(nvlist_lookup_nvlist_array(vdev, ZPOOL_CONFIG_CHILDREN,
2130             &child, &children) == 0);
2131         for (c = 0; c < children; ++c)
2132                 if (nvlist_uint64_match(child[c], ZPOOL_CONFIG_GUID, guid))
2133                         return (B_TRUE);
2134         return (B_FALSE);
2135 }
2136
2137 static boolean_t
2138 vdev_child_string_match(nvlist_t *vdev, char *tgt)
2139 {
2140         nvlist_t **child;
2141         uint_t c, children;
2142
2143         verify(nvlist_lookup_nvlist_array(vdev, ZPOOL_CONFIG_CHILDREN,
2144             &child, &children) == 0);
2145         for (c = 0; c < children; ++c) {
2146                 if (nvlist_string_match(child[c], ZPOOL_CONFIG_PATH, tgt) ||
2147                     nvlist_string_match(child[c], ZPOOL_CONFIG_DEVID, tgt))
2148                         return (B_TRUE);
2149         }
2150         return (B_FALSE);
2151 }
2152
2153 static boolean_t
2154 vdev_guid_match(nvlist_t *config, uint64_t guid)
2155 {
2156         nvlist_t *nvroot;
2157
2158         verify(nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE,
2159             &nvroot) == 0);
2160
2161         return (nvlist_uint64_match(nvroot, ZPOOL_CONFIG_GUID, guid) ||
2162             vdev_child_guid_match(nvroot, guid));
2163 }
2164
2165 static boolean_t
2166 vdev_string_match(nvlist_t *config, char *tgt)
2167 {
2168         nvlist_t *nvroot;
2169
2170         verify(nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE,
2171             &nvroot) == 0);
2172
2173         return (vdev_child_string_match(nvroot, tgt));
2174 }
2175
2176 static boolean_t
2177 pool_match(nvlist_t *config, char *tgt)
2178 {
2179         uint64_t guid = strtoull(tgt, NULL, 0);
2180
2181         if (guid != 0) {
2182                 return (
2183                     nvlist_uint64_match(config, ZPOOL_CONFIG_POOL_GUID, guid) ||
2184                     vdev_guid_match(config, guid));
2185         } else {
2186                 return (
2187                     nvlist_string_match(config, ZPOOL_CONFIG_POOL_NAME, tgt) ||
2188                     vdev_string_match(config, tgt));
2189         }
2190 }
2191
2192 static int
2193 find_exported_zpool(char *pool_id, nvlist_t **configp, char *vdev_dir)
2194 {
2195         nvlist_t *pools;
2196         int error = ENOENT;
2197         nvlist_t *match = NULL;
2198
2199         if (vdev_dir != NULL)
2200                 pools = zpool_find_import_activeok(g_zfs, 1, &vdev_dir);
2201         else
2202                 pools = zpool_find_import_activeok(g_zfs, 0, NULL);
2203
2204         if (pools != NULL) {
2205                 nvpair_t *elem = NULL;
2206
2207                 while ((elem = nvlist_next_nvpair(pools, elem)) != NULL) {
2208                         verify(nvpair_value_nvlist(elem, configp) == 0);
2209                         if (pool_match(*configp, pool_id)) {
2210                                 if (match != NULL) {
2211                                         (void) fatal(
2212                                             "More than one matching pool - "
2213                                             "specify guid/devid/device path.");
2214                                 } else {
2215                                         match = *configp;
2216                                         error = 0;
2217                                 }
2218                         }
2219                 }
2220         }
2221
2222         *configp = error ? NULL : match;
2223
2224         return (error);
2225 }
2226
2227 int
2228 main(int argc, char **argv)
2229 {
2230         int i, c;
2231         struct rlimit rl = { 1024, 1024 };
2232         spa_t *spa;
2233         objset_t *os = NULL;
2234         char *endstr;
2235         int dump_all = 1;
2236         int verbose = 0;
2237         int error;
2238         int exported = 0;
2239         char *vdev_dir = NULL;
2240
2241         (void) setrlimit(RLIMIT_NOFILE, &rl);
2242         (void) enable_extended_FILE_stdio(-1, -1);
2243
2244         dprintf_setup(&argc, argv);
2245
2246         while ((c = getopt(argc, argv, "udibcsvCS:U:lRep:")) != -1) {
2247                 switch (c) {
2248                 case 'u':
2249                 case 'd':
2250                 case 'i':
2251                 case 'b':
2252                 case 'c':
2253                 case 's':
2254                 case 'C':
2255                 case 'l':
2256                 case 'R':
2257                         dump_opt[c]++;
2258                         dump_all = 0;
2259                         break;
2260                 case 'v':
2261                         verbose++;
2262                         break;
2263                 case 'U':
2264                         spa_config_path = optarg;
2265                         break;
2266                 case 'e':
2267                         exported = 1;
2268                         break;
2269                 case 'p':
2270                         vdev_dir = optarg;
2271                         break;
2272                 case 'S':
2273                         dump_opt[c]++;
2274                         dump_all = 0;
2275                         zdb_sig_user_data = (strncmp(optarg, "user:", 5) == 0);
2276                         if (!zdb_sig_user_data && strncmp(optarg, "all:", 4))
2277                                 usage();
2278                         endstr = strchr(optarg, ':') + 1;
2279                         if (strcmp(endstr, "fletcher2") == 0)
2280                                 zdb_sig_cksumalg = ZIO_CHECKSUM_FLETCHER_2;
2281                         else if (strcmp(endstr, "fletcher4") == 0)
2282                                 zdb_sig_cksumalg = ZIO_CHECKSUM_FLETCHER_4;
2283                         else if (strcmp(endstr, "sha256") == 0)
2284                                 zdb_sig_cksumalg = ZIO_CHECKSUM_SHA256;
2285                         else if (strcmp(endstr, "all") == 0)
2286                                 zdb_sig_cksumalg = ZIO_CHECKSUM_FLETCHER_2;
2287                         else
2288                                 usage();
2289                         break;
2290                 default:
2291                         usage();
2292                         break;
2293                 }
2294         }
2295
2296         if (vdev_dir != NULL && exported == 0) {
2297                 (void) fprintf(stderr, "-p option requires use of -e\n");
2298                 usage();
2299         }
2300
2301         kernel_init(FREAD);
2302         g_zfs = libzfs_init();
2303         ASSERT(g_zfs != NULL);
2304
2305         for (c = 0; c < 256; c++) {
2306                 if (dump_all && c != 'l' && c != 'R')
2307                         dump_opt[c] = 1;
2308                 if (dump_opt[c])
2309                         dump_opt[c] += verbose;
2310         }
2311
2312         argc -= optind;
2313         argv += optind;
2314
2315         if (argc < 1) {
2316                 if (dump_opt['C']) {
2317                         dump_cachefile(spa_config_path);
2318                         return (0);
2319                 }
2320                 usage();
2321         }
2322
2323         if (dump_opt['l']) {
2324                 dump_label(argv[0]);
2325                 return (0);
2326         }
2327
2328         if (dump_opt['R']) {
2329                 flagbits['b'] = ZDB_FLAG_PRINT_BLKPTR;
2330                 flagbits['c'] = ZDB_FLAG_CHECKSUM;
2331                 flagbits['d'] = ZDB_FLAG_DECOMPRESS;
2332                 flagbits['e'] = ZDB_FLAG_BSWAP;
2333                 flagbits['g'] = ZDB_FLAG_GBH;
2334                 flagbits['i'] = ZDB_FLAG_INDIRECT;
2335                 flagbits['p'] = ZDB_FLAG_PHYS;
2336                 flagbits['r'] = ZDB_FLAG_RAW;
2337
2338                 spa = NULL;
2339                 while (argv[0]) {
2340                         zdb_read_block(argv[0], &spa);
2341                         argv++;
2342                         argc--;
2343                 }
2344                 if (spa)
2345                         spa_close(spa, (void *)zdb_read_block);
2346                 return (0);
2347         }
2348
2349         if (dump_opt['C'])
2350                 dump_config(argv[0]);
2351
2352         error = 0;
2353         if (exported) {
2354                 /*
2355                  * Check to see if the name refers to an exported zpool
2356                  */
2357                 char *slash;
2358                 nvlist_t *exported_conf = NULL;
2359
2360                 if ((slash = strchr(argv[0], '/')) != NULL)
2361                         *slash = '\0';
2362
2363                 error = find_exported_zpool(argv[0], &exported_conf, vdev_dir);
2364                 if (error == 0) {
2365                         nvlist_t *nvl = NULL;
2366
2367                         if (vdev_dir != NULL) {
2368                                 if (nvlist_alloc(&nvl, NV_UNIQUE_NAME, 0) != 0)
2369                                         error = ENOMEM;
2370                                 else if (nvlist_add_string(nvl,
2371                                     zpool_prop_to_name(ZPOOL_PROP_ALTROOT),
2372                                     vdev_dir) != 0)
2373                                         error = ENOMEM;
2374                         }
2375
2376                         if (error == 0)
2377                                 error = spa_import_faulted(argv[0],
2378                                     exported_conf, nvl);
2379
2380                         nvlist_free(nvl);
2381                 }
2382
2383                 if (slash != NULL)
2384                         *slash = '/';
2385         }
2386
2387         if (error == 0) {
2388                 if (strchr(argv[0], '/') != NULL) {
2389                         error = dmu_objset_open(argv[0], DMU_OST_ANY,
2390                             DS_MODE_USER | DS_MODE_READONLY, &os);
2391                 } else {
2392                         error = spa_open(argv[0], &spa, FTAG);
2393                 }
2394         }
2395
2396         if (error)
2397                 fatal("can't open %s: %s", argv[0], strerror(error));
2398
2399         argv++;
2400         if (--argc > 0) {
2401                 zopt_objects = argc;
2402                 zopt_object = calloc(zopt_objects, sizeof (uint64_t));
2403                 for (i = 0; i < zopt_objects; i++) {
2404                         errno = 0;
2405                         zopt_object[i] = strtoull(argv[i], NULL, 0);
2406                         if (zopt_object[i] == 0 && errno != 0)
2407                                 fatal("bad object number %s: %s",
2408                                     argv[i], strerror(errno));
2409                 }
2410         }
2411
2412         if (os != NULL) {
2413                 dump_dir(os);
2414                 dmu_objset_close(os);
2415         } else {
2416                 dump_zpool(spa);
2417                 spa_close(spa, FTAG);
2418         }
2419
2420         fuid_table_destroy();
2421
2422         libzfs_fini(g_zfs);
2423         kernel_fini();
2424
2425         return (0);
2426 }