]> CyberLeo.Net >> Repos - FreeBSD/FreeBSD.git/blob - lib/libzfs/libzfs_sendrecv.c
Prevent raw zfs recv -F if dataset is unencrypted
[FreeBSD/FreeBSD.git] / lib / libzfs / libzfs_sendrecv.c
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21
22 /*
23  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
24  * Copyright (c) 2011, 2015 by Delphix. All rights reserved.
25  * Copyright (c) 2012, Joyent, Inc. All rights reserved.
26  * Copyright (c) 2012 Pawel Jakub Dawidek <pawel@dawidek.net>.
27  * All rights reserved
28  * Copyright (c) 2013 Steven Hartland. All rights reserved.
29  * Copyright 2015, OmniTI Computer Consulting, Inc. All rights reserved.
30  * Copyright 2016 Igor Kozhukhov <ikozhukhov@gmail.com>
31  * Copyright (c) 2017, loli10K <ezomori.nozomu@gmail.com>. All rights reserved.
32  */
33
34 #include <assert.h>
35 #include <ctype.h>
36 #include <errno.h>
37 #include <libintl.h>
38 #include <stdio.h>
39 #include <stdlib.h>
40 #include <strings.h>
41 #include <unistd.h>
42 #include <stddef.h>
43 #include <fcntl.h>
44 #include <sys/mount.h>
45 #include <sys/mntent.h>
46 #include <sys/mnttab.h>
47 #include <sys/avl.h>
48 #include <sys/debug.h>
49 #include <sys/stat.h>
50 #include <stddef.h>
51 #include <pthread.h>
52 #include <umem.h>
53 #include <time.h>
54
55 #include <libzfs.h>
56 #include <libzfs_core.h>
57
58 #include "zfs_namecheck.h"
59 #include "zfs_prop.h"
60 #include "zfs_fletcher.h"
61 #include "libzfs_impl.h"
62 #include <zlib.h>
63 #include <sys/zio_checksum.h>
64 #include <sys/dsl_crypt.h>
65 #include <sys/ddt.h>
66 #include <sys/socket.h>
67 #include <sys/sha2.h>
68
69 /* in libzfs_dataset.c */
70 extern void zfs_setprop_error(libzfs_handle_t *, zfs_prop_t, int, char *);
71
72 static int zfs_receive_impl(libzfs_handle_t *, const char *, const char *,
73     recvflags_t *, int, const char *, nvlist_t *, avl_tree_t *, char **, int,
74     uint64_t *, const char *, nvlist_t *);
75 static int guid_to_name(libzfs_handle_t *, const char *,
76     uint64_t, boolean_t, char *);
77
78 static const zio_cksum_t zero_cksum = { { 0 } };
79
80 typedef struct dedup_arg {
81         int     inputfd;
82         int     outputfd;
83         libzfs_handle_t  *dedup_hdl;
84 } dedup_arg_t;
85
86 typedef struct progress_arg {
87         zfs_handle_t *pa_zhp;
88         int pa_fd;
89         boolean_t pa_parsable;
90 } progress_arg_t;
91
92 typedef struct dataref {
93         uint64_t ref_guid;
94         uint64_t ref_object;
95         uint64_t ref_offset;
96 } dataref_t;
97
98 typedef struct dedup_entry {
99         struct dedup_entry      *dde_next;
100         zio_cksum_t dde_chksum;
101         uint64_t dde_prop;
102         dataref_t dde_ref;
103 } dedup_entry_t;
104
105 #define MAX_DDT_PHYSMEM_PERCENT         20
106 #define SMALLEST_POSSIBLE_MAX_DDT_MB            128
107
108 typedef struct dedup_table {
109         dedup_entry_t   **dedup_hash_array;
110         umem_cache_t    *ddecache;
111         uint64_t        max_ddt_size;  /* max dedup table size in bytes */
112         uint64_t        cur_ddt_size;  /* current dedup table size in bytes */
113         uint64_t        ddt_count;
114         int             numhashbits;
115         boolean_t       ddt_full;
116 } dedup_table_t;
117
118 static int
119 high_order_bit(uint64_t n)
120 {
121         int count;
122
123         for (count = 0; n != 0; count++)
124                 n >>= 1;
125         return (count);
126 }
127
128 static size_t
129 ssread(void *buf, size_t len, FILE *stream)
130 {
131         size_t outlen;
132
133         if ((outlen = fread(buf, len, 1, stream)) == 0)
134                 return (0);
135
136         return (outlen);
137 }
138
139 static void
140 ddt_hash_append(libzfs_handle_t *hdl, dedup_table_t *ddt, dedup_entry_t **ddepp,
141     zio_cksum_t *cs, uint64_t prop, dataref_t *dr)
142 {
143         dedup_entry_t   *dde;
144
145         if (ddt->cur_ddt_size >= ddt->max_ddt_size) {
146                 if (ddt->ddt_full == B_FALSE) {
147                         zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
148                             "Dedup table full.  Deduplication will continue "
149                             "with existing table entries"));
150                         ddt->ddt_full = B_TRUE;
151                 }
152                 return;
153         }
154
155         if ((dde = umem_cache_alloc(ddt->ddecache, UMEM_DEFAULT))
156             != NULL) {
157                 assert(*ddepp == NULL);
158                 dde->dde_next = NULL;
159                 dde->dde_chksum = *cs;
160                 dde->dde_prop = prop;
161                 dde->dde_ref = *dr;
162                 *ddepp = dde;
163                 ddt->cur_ddt_size += sizeof (dedup_entry_t);
164                 ddt->ddt_count++;
165         }
166 }
167
168 /*
169  * Using the specified dedup table, do a lookup for an entry with
170  * the checksum cs.  If found, return the block's reference info
171  * in *dr. Otherwise, insert a new entry in the dedup table, using
172  * the reference information specified by *dr.
173  *
174  * return value:  true - entry was found
175  *                false - entry was not found
176  */
177 static boolean_t
178 ddt_update(libzfs_handle_t *hdl, dedup_table_t *ddt, zio_cksum_t *cs,
179     uint64_t prop, dataref_t *dr)
180 {
181         uint32_t hashcode;
182         dedup_entry_t **ddepp;
183
184         hashcode = BF64_GET(cs->zc_word[0], 0, ddt->numhashbits);
185
186         for (ddepp = &(ddt->dedup_hash_array[hashcode]); *ddepp != NULL;
187             ddepp = &((*ddepp)->dde_next)) {
188                 if (ZIO_CHECKSUM_EQUAL(((*ddepp)->dde_chksum), *cs) &&
189                     (*ddepp)->dde_prop == prop) {
190                         *dr = (*ddepp)->dde_ref;
191                         return (B_TRUE);
192                 }
193         }
194         ddt_hash_append(hdl, ddt, ddepp, cs, prop, dr);
195         return (B_FALSE);
196 }
197
198 static int
199 dump_record(dmu_replay_record_t *drr, void *payload, int payload_len,
200     zio_cksum_t *zc, int outfd)
201 {
202         ASSERT3U(offsetof(dmu_replay_record_t, drr_u.drr_checksum.drr_checksum),
203             ==, sizeof (dmu_replay_record_t) - sizeof (zio_cksum_t));
204         fletcher_4_incremental_native(drr,
205             offsetof(dmu_replay_record_t, drr_u.drr_checksum.drr_checksum), zc);
206         if (drr->drr_type != DRR_BEGIN) {
207                 ASSERT(ZIO_CHECKSUM_IS_ZERO(&drr->drr_u.
208                     drr_checksum.drr_checksum));
209                 drr->drr_u.drr_checksum.drr_checksum = *zc;
210         }
211         fletcher_4_incremental_native(&drr->drr_u.drr_checksum.drr_checksum,
212             sizeof (zio_cksum_t), zc);
213         if (write(outfd, drr, sizeof (*drr)) == -1)
214                 return (errno);
215         if (payload_len != 0) {
216                 fletcher_4_incremental_native(payload, payload_len, zc);
217                 if (write(outfd, payload, payload_len) == -1)
218                         return (errno);
219         }
220         return (0);
221 }
222
223 /*
224  * This function is started in a separate thread when the dedup option
225  * has been requested.  The main send thread determines the list of
226  * snapshots to be included in the send stream and makes the ioctl calls
227  * for each one.  But instead of having the ioctl send the output to the
228  * the output fd specified by the caller of zfs_send()), the
229  * ioctl is told to direct the output to a pipe, which is read by the
230  * alternate thread running THIS function.  This function does the
231  * dedup'ing by:
232  *  1. building a dedup table (the DDT)
233  *  2. doing checksums on each data block and inserting a record in the DDT
234  *  3. looking for matching checksums, and
235  *  4.  sending a DRR_WRITE_BYREF record instead of a write record whenever
236  *      a duplicate block is found.
237  * The output of this function then goes to the output fd requested
238  * by the caller of zfs_send().
239  */
240 static void *
241 cksummer(void *arg)
242 {
243         dedup_arg_t *dda = arg;
244         char *buf = zfs_alloc(dda->dedup_hdl, SPA_MAXBLOCKSIZE);
245         dmu_replay_record_t thedrr = { 0 };
246         dmu_replay_record_t *drr = &thedrr;
247         FILE *ofp;
248         int outfd;
249         dedup_table_t ddt;
250         zio_cksum_t stream_cksum;
251         uint64_t numbuckets;
252
253 #ifdef _ILP32
254         ddt.max_ddt_size = SMALLEST_POSSIBLE_MAX_DDT_MB << 20;
255 #else
256         uint64_t physmem = sysconf(_SC_PHYS_PAGES) * sysconf(_SC_PAGESIZE);
257         ddt.max_ddt_size =
258             MAX((physmem * MAX_DDT_PHYSMEM_PERCENT) / 100,
259             SMALLEST_POSSIBLE_MAX_DDT_MB << 20);
260 #endif
261
262         numbuckets = ddt.max_ddt_size / (sizeof (dedup_entry_t));
263
264         /*
265          * numbuckets must be a power of 2.  Increase number to
266          * a power of 2 if necessary.
267          */
268         if (!ISP2(numbuckets))
269                 numbuckets = 1ULL << high_order_bit(numbuckets);
270
271         ddt.dedup_hash_array = calloc(numbuckets, sizeof (dedup_entry_t *));
272         ddt.ddecache = umem_cache_create("dde", sizeof (dedup_entry_t), 0,
273             NULL, NULL, NULL, NULL, NULL, 0);
274         ddt.cur_ddt_size = numbuckets * sizeof (dedup_entry_t *);
275         ddt.numhashbits = high_order_bit(numbuckets) - 1;
276         ddt.ddt_full = B_FALSE;
277
278         outfd = dda->outputfd;
279         ofp = fdopen(dda->inputfd, "r");
280         while (ssread(drr, sizeof (*drr), ofp) != 0) {
281
282                 /*
283                  * kernel filled in checksum, we are going to write same
284                  * record, but need to regenerate checksum.
285                  */
286                 if (drr->drr_type != DRR_BEGIN) {
287                         bzero(&drr->drr_u.drr_checksum.drr_checksum,
288                             sizeof (drr->drr_u.drr_checksum.drr_checksum));
289                 }
290
291                 switch (drr->drr_type) {
292                 case DRR_BEGIN:
293                 {
294                         struct drr_begin *drrb = &drr->drr_u.drr_begin;
295                         int fflags;
296                         int sz = 0;
297                         ZIO_SET_CHECKSUM(&stream_cksum, 0, 0, 0, 0);
298
299                         ASSERT3U(drrb->drr_magic, ==, DMU_BACKUP_MAGIC);
300
301                         /* set the DEDUP feature flag for this stream */
302                         fflags = DMU_GET_FEATUREFLAGS(drrb->drr_versioninfo);
303                         fflags |= (DMU_BACKUP_FEATURE_DEDUP |
304                             DMU_BACKUP_FEATURE_DEDUPPROPS);
305                         DMU_SET_FEATUREFLAGS(drrb->drr_versioninfo, fflags);
306
307                         if (drr->drr_payloadlen != 0) {
308                                 sz = drr->drr_payloadlen;
309
310                                 if (sz > SPA_MAXBLOCKSIZE) {
311                                         buf = zfs_realloc(dda->dedup_hdl, buf,
312                                             SPA_MAXBLOCKSIZE, sz);
313                                 }
314                                 (void) ssread(buf, sz, ofp);
315                                 if (ferror(stdin))
316                                         perror("fread");
317                         }
318                         if (dump_record(drr, buf, sz, &stream_cksum,
319                             outfd) != 0)
320                                 goto out;
321                         break;
322                 }
323
324                 case DRR_END:
325                 {
326                         struct drr_end *drre = &drr->drr_u.drr_end;
327                         /* use the recalculated checksum */
328                         drre->drr_checksum = stream_cksum;
329                         if (dump_record(drr, NULL, 0, &stream_cksum,
330                             outfd) != 0)
331                                 goto out;
332                         break;
333                 }
334
335                 case DRR_OBJECT:
336                 {
337                         struct drr_object *drro = &drr->drr_u.drr_object;
338                         if (drro->drr_bonuslen > 0) {
339                                 (void) ssread(buf,
340                                     DRR_OBJECT_PAYLOAD_SIZE(drro), ofp);
341                         }
342                         if (dump_record(drr, buf, DRR_OBJECT_PAYLOAD_SIZE(drro),
343                             &stream_cksum, outfd) != 0)
344                                 goto out;
345                         break;
346                 }
347
348                 case DRR_SPILL:
349                 {
350                         struct drr_spill *drrs = &drr->drr_u.drr_spill;
351                         (void) ssread(buf, DRR_SPILL_PAYLOAD_SIZE(drrs), ofp);
352                         if (dump_record(drr, buf, DRR_SPILL_PAYLOAD_SIZE(drrs),
353                             &stream_cksum, outfd) != 0)
354                                 goto out;
355                         break;
356                 }
357
358                 case DRR_FREEOBJECTS:
359                 {
360                         if (dump_record(drr, NULL, 0, &stream_cksum,
361                             outfd) != 0)
362                                 goto out;
363                         break;
364                 }
365
366                 case DRR_WRITE:
367                 {
368                         struct drr_write *drrw = &drr->drr_u.drr_write;
369                         dataref_t       dataref;
370                         uint64_t        payload_size;
371
372                         payload_size = DRR_WRITE_PAYLOAD_SIZE(drrw);
373                         (void) ssread(buf, payload_size, ofp);
374
375                         /*
376                          * Use the existing checksum if it's dedup-capable,
377                          * else calculate a SHA256 checksum for it.
378                          */
379
380                         if (ZIO_CHECKSUM_EQUAL(drrw->drr_key.ddk_cksum,
381                             zero_cksum) ||
382                             !DRR_IS_DEDUP_CAPABLE(drrw->drr_flags)) {
383                                 SHA2_CTX ctx;
384                                 zio_cksum_t tmpsha256;
385
386                                 SHA2Init(SHA256, &ctx);
387                                 SHA2Update(&ctx, buf, payload_size);
388                                 SHA2Final(&tmpsha256, &ctx);
389
390                                 drrw->drr_key.ddk_cksum.zc_word[0] =
391                                     BE_64(tmpsha256.zc_word[0]);
392                                 drrw->drr_key.ddk_cksum.zc_word[1] =
393                                     BE_64(tmpsha256.zc_word[1]);
394                                 drrw->drr_key.ddk_cksum.zc_word[2] =
395                                     BE_64(tmpsha256.zc_word[2]);
396                                 drrw->drr_key.ddk_cksum.zc_word[3] =
397                                     BE_64(tmpsha256.zc_word[3]);
398                                 drrw->drr_checksumtype = ZIO_CHECKSUM_SHA256;
399                                 drrw->drr_flags |= DRR_CHECKSUM_DEDUP;
400                         }
401
402                         dataref.ref_guid = drrw->drr_toguid;
403                         dataref.ref_object = drrw->drr_object;
404                         dataref.ref_offset = drrw->drr_offset;
405
406                         if (ddt_update(dda->dedup_hdl, &ddt,
407                             &drrw->drr_key.ddk_cksum, drrw->drr_key.ddk_prop,
408                             &dataref)) {
409                                 dmu_replay_record_t wbr_drr = {0};
410                                 struct drr_write_byref *wbr_drrr =
411                                     &wbr_drr.drr_u.drr_write_byref;
412
413                                 /* block already present in stream */
414                                 wbr_drr.drr_type = DRR_WRITE_BYREF;
415
416                                 wbr_drrr->drr_object = drrw->drr_object;
417                                 wbr_drrr->drr_offset = drrw->drr_offset;
418                                 wbr_drrr->drr_length = drrw->drr_logical_size;
419                                 wbr_drrr->drr_toguid = drrw->drr_toguid;
420                                 wbr_drrr->drr_refguid = dataref.ref_guid;
421                                 wbr_drrr->drr_refobject =
422                                     dataref.ref_object;
423                                 wbr_drrr->drr_refoffset =
424                                     dataref.ref_offset;
425
426                                 wbr_drrr->drr_checksumtype =
427                                     drrw->drr_checksumtype;
428                                 wbr_drrr->drr_flags = drrw->drr_flags;
429                                 wbr_drrr->drr_key.ddk_cksum =
430                                     drrw->drr_key.ddk_cksum;
431                                 wbr_drrr->drr_key.ddk_prop =
432                                     drrw->drr_key.ddk_prop;
433
434                                 if (dump_record(&wbr_drr, NULL, 0,
435                                     &stream_cksum, outfd) != 0)
436                                         goto out;
437                         } else {
438                                 /* block not previously seen */
439                                 if (dump_record(drr, buf, payload_size,
440                                     &stream_cksum, outfd) != 0)
441                                         goto out;
442                         }
443                         break;
444                 }
445
446                 case DRR_WRITE_EMBEDDED:
447                 {
448                         struct drr_write_embedded *drrwe =
449                             &drr->drr_u.drr_write_embedded;
450                         (void) ssread(buf,
451                             P2ROUNDUP((uint64_t)drrwe->drr_psize, 8), ofp);
452                         if (dump_record(drr, buf,
453                             P2ROUNDUP((uint64_t)drrwe->drr_psize, 8),
454                             &stream_cksum, outfd) != 0)
455                                 goto out;
456                         break;
457                 }
458
459                 case DRR_FREE:
460                 {
461                         if (dump_record(drr, NULL, 0, &stream_cksum,
462                             outfd) != 0)
463                                 goto out;
464                         break;
465                 }
466
467                 case DRR_OBJECT_RANGE:
468                 {
469                         if (dump_record(drr, NULL, 0, &stream_cksum,
470                             outfd) != 0)
471                                 goto out;
472                         break;
473                 }
474
475                 default:
476                         (void) fprintf(stderr, "INVALID record type 0x%x\n",
477                             drr->drr_type);
478                         /* should never happen, so assert */
479                         assert(B_FALSE);
480                 }
481         }
482 out:
483         umem_cache_destroy(ddt.ddecache);
484         free(ddt.dedup_hash_array);
485         free(buf);
486         (void) fclose(ofp);
487
488         return (NULL);
489 }
490
491 /*
492  * Routines for dealing with the AVL tree of fs-nvlists
493  */
494 typedef struct fsavl_node {
495         avl_node_t fn_node;
496         nvlist_t *fn_nvfs;
497         char *fn_snapname;
498         uint64_t fn_guid;
499 } fsavl_node_t;
500
501 static int
502 fsavl_compare(const void *arg1, const void *arg2)
503 {
504         const fsavl_node_t *fn1 = (const fsavl_node_t *)arg1;
505         const fsavl_node_t *fn2 = (const fsavl_node_t *)arg2;
506
507         return (AVL_CMP(fn1->fn_guid, fn2->fn_guid));
508 }
509
510 /*
511  * Given the GUID of a snapshot, find its containing filesystem and
512  * (optionally) name.
513  */
514 static nvlist_t *
515 fsavl_find(avl_tree_t *avl, uint64_t snapguid, char **snapname)
516 {
517         fsavl_node_t fn_find;
518         fsavl_node_t *fn;
519
520         fn_find.fn_guid = snapguid;
521
522         fn = avl_find(avl, &fn_find, NULL);
523         if (fn) {
524                 if (snapname)
525                         *snapname = fn->fn_snapname;
526                 return (fn->fn_nvfs);
527         }
528         return (NULL);
529 }
530
531 static void
532 fsavl_destroy(avl_tree_t *avl)
533 {
534         fsavl_node_t *fn;
535         void *cookie;
536
537         if (avl == NULL)
538                 return;
539
540         cookie = NULL;
541         while ((fn = avl_destroy_nodes(avl, &cookie)) != NULL)
542                 free(fn);
543         avl_destroy(avl);
544         free(avl);
545 }
546
547 /*
548  * Given an nvlist, produce an avl tree of snapshots, ordered by guid
549  */
550 static avl_tree_t *
551 fsavl_create(nvlist_t *fss)
552 {
553         avl_tree_t *fsavl;
554         nvpair_t *fselem = NULL;
555
556         if ((fsavl = malloc(sizeof (avl_tree_t))) == NULL)
557                 return (NULL);
558
559         avl_create(fsavl, fsavl_compare, sizeof (fsavl_node_t),
560             offsetof(fsavl_node_t, fn_node));
561
562         while ((fselem = nvlist_next_nvpair(fss, fselem)) != NULL) {
563                 nvlist_t *nvfs, *snaps;
564                 nvpair_t *snapelem = NULL;
565
566                 VERIFY(0 == nvpair_value_nvlist(fselem, &nvfs));
567                 VERIFY(0 == nvlist_lookup_nvlist(nvfs, "snaps", &snaps));
568
569                 while ((snapelem =
570                     nvlist_next_nvpair(snaps, snapelem)) != NULL) {
571                         fsavl_node_t *fn;
572                         uint64_t guid;
573
574                         VERIFY(0 == nvpair_value_uint64(snapelem, &guid));
575                         if ((fn = malloc(sizeof (fsavl_node_t))) == NULL) {
576                                 fsavl_destroy(fsavl);
577                                 return (NULL);
578                         }
579                         fn->fn_nvfs = nvfs;
580                         fn->fn_snapname = nvpair_name(snapelem);
581                         fn->fn_guid = guid;
582
583                         /*
584                          * Note: if there are multiple snaps with the
585                          * same GUID, we ignore all but one.
586                          */
587                         if (avl_find(fsavl, fn, NULL) == NULL)
588                                 avl_add(fsavl, fn);
589                         else
590                                 free(fn);
591                 }
592         }
593
594         return (fsavl);
595 }
596
597 /*
598  * Routines for dealing with the giant nvlist of fs-nvlists, etc.
599  */
600 typedef struct send_data {
601         /*
602          * assigned inside every recursive call,
603          * restored from *_save on return:
604          *
605          * guid of fromsnap snapshot in parent dataset
606          * txg of fromsnap snapshot in current dataset
607          * txg of tosnap snapshot in current dataset
608          */
609
610         uint64_t parent_fromsnap_guid;
611         uint64_t fromsnap_txg;
612         uint64_t tosnap_txg;
613
614         /* the nvlists get accumulated during depth-first traversal */
615         nvlist_t *parent_snaps;
616         nvlist_t *fss;
617         nvlist_t *snapprops;
618
619         /* send-receive configuration, does not change during traversal */
620         const char *fsname;
621         const char *fromsnap;
622         const char *tosnap;
623         boolean_t raw;
624         boolean_t recursive;
625         boolean_t verbose;
626         boolean_t seenfrom;
627         boolean_t seento;
628
629         /*
630          * The header nvlist is of the following format:
631          * {
632          *   "tosnap" -> string
633          *   "fromsnap" -> string (if incremental)
634          *   "fss" -> {
635          *      id -> {
636          *
637          *       "name" -> string (full name; for debugging)
638          *       "parentfromsnap" -> number (guid of fromsnap in parent)
639          *
640          *       "props" -> { name -> value (only if set here) }
641          *       "snaps" -> { name (lastname) -> number (guid) }
642          *       "snapprops" -> { name (lastname) -> { name -> value } }
643          *
644          *       "origin" -> number (guid) (if clone)
645          *       "is_encroot" -> boolean
646          *       "sent" -> boolean (not on-disk)
647          *      }
648          *   }
649          * }
650          *
651          */
652 } send_data_t;
653
654 static void send_iterate_prop(zfs_handle_t *zhp, nvlist_t *nv);
655
656 static int
657 send_iterate_snap(zfs_handle_t *zhp, void *arg)
658 {
659         send_data_t *sd = arg;
660         uint64_t guid = zhp->zfs_dmustats.dds_guid;
661         uint64_t txg = zhp->zfs_dmustats.dds_creation_txg;
662         char *snapname;
663         nvlist_t *nv;
664         boolean_t isfromsnap, istosnap, istosnapwithnofrom;
665
666         snapname = strrchr(zhp->zfs_name, '@')+1;
667         isfromsnap = (sd->fromsnap != NULL &&
668             strcmp(sd->fromsnap, snapname) == 0);
669         istosnap = (sd->tosnap != NULL && (strcmp(sd->tosnap, snapname) == 0));
670         istosnapwithnofrom = (istosnap && sd->fromsnap == NULL);
671
672         if (sd->tosnap_txg != 0 && txg > sd->tosnap_txg) {
673                 if (sd->verbose) {
674                         (void) fprintf(stderr, dgettext(TEXT_DOMAIN,
675                             "skipping snapshot %s because it was created "
676                             "after the destination snapshot (%s)\n"),
677                             zhp->zfs_name, sd->tosnap);
678                 }
679                 zfs_close(zhp);
680                 return (0);
681         }
682
683         VERIFY(0 == nvlist_add_uint64(sd->parent_snaps, snapname, guid));
684         /*
685          * NB: if there is no fromsnap here (it's a newly created fs in
686          * an incremental replication), we will substitute the tosnap.
687          */
688         if (isfromsnap || (sd->parent_fromsnap_guid == 0 && istosnap)) {
689                 sd->parent_fromsnap_guid = guid;
690         }
691
692         if (!sd->recursive) {
693                 if (!sd->seenfrom && isfromsnap) {
694                         sd->seenfrom = B_TRUE;
695                         zfs_close(zhp);
696                         return (0);
697                 }
698
699                 if ((sd->seento || !sd->seenfrom) && !istosnapwithnofrom) {
700                         zfs_close(zhp);
701                         return (0);
702                 }
703
704                 if (istosnap)
705                         sd->seento = B_TRUE;
706         }
707
708         VERIFY(0 == nvlist_alloc(&nv, NV_UNIQUE_NAME, 0));
709         send_iterate_prop(zhp, nv);
710         VERIFY(0 == nvlist_add_nvlist(sd->snapprops, snapname, nv));
711         nvlist_free(nv);
712
713         zfs_close(zhp);
714         return (0);
715 }
716
717 static void
718 send_iterate_prop(zfs_handle_t *zhp, nvlist_t *nv)
719 {
720         nvpair_t *elem = NULL;
721
722         while ((elem = nvlist_next_nvpair(zhp->zfs_props, elem)) != NULL) {
723                 char *propname = nvpair_name(elem);
724                 zfs_prop_t prop = zfs_name_to_prop(propname);
725                 nvlist_t *propnv;
726
727                 if (!zfs_prop_user(propname)) {
728                         /*
729                          * Realistically, this should never happen.  However,
730                          * we want the ability to add DSL properties without
731                          * needing to make incompatible version changes.  We
732                          * need to ignore unknown properties to allow older
733                          * software to still send datasets containing these
734                          * properties, with the unknown properties elided.
735                          */
736                         if (prop == ZPROP_INVAL)
737                                 continue;
738
739                         if (zfs_prop_readonly(prop))
740                                 continue;
741                 }
742
743                 verify(nvpair_value_nvlist(elem, &propnv) == 0);
744                 if (prop == ZFS_PROP_QUOTA || prop == ZFS_PROP_RESERVATION ||
745                     prop == ZFS_PROP_REFQUOTA ||
746                     prop == ZFS_PROP_REFRESERVATION) {
747                         char *source;
748                         uint64_t value;
749                         verify(nvlist_lookup_uint64(propnv,
750                             ZPROP_VALUE, &value) == 0);
751                         if (zhp->zfs_type == ZFS_TYPE_SNAPSHOT)
752                                 continue;
753                         /*
754                          * May have no source before SPA_VERSION_RECVD_PROPS,
755                          * but is still modifiable.
756                          */
757                         if (nvlist_lookup_string(propnv,
758                             ZPROP_SOURCE, &source) == 0) {
759                                 if ((strcmp(source, zhp->zfs_name) != 0) &&
760                                     (strcmp(source,
761                                     ZPROP_SOURCE_VAL_RECVD) != 0))
762                                         continue;
763                         }
764                 } else {
765                         char *source;
766                         if (nvlist_lookup_string(propnv,
767                             ZPROP_SOURCE, &source) != 0)
768                                 continue;
769                         if ((strcmp(source, zhp->zfs_name) != 0) &&
770                             (strcmp(source, ZPROP_SOURCE_VAL_RECVD) != 0))
771                                 continue;
772                 }
773
774                 if (zfs_prop_user(propname) ||
775                     zfs_prop_get_type(prop) == PROP_TYPE_STRING) {
776                         char *value;
777                         verify(nvlist_lookup_string(propnv,
778                             ZPROP_VALUE, &value) == 0);
779                         VERIFY(0 == nvlist_add_string(nv, propname, value));
780                 } else {
781                         uint64_t value;
782                         verify(nvlist_lookup_uint64(propnv,
783                             ZPROP_VALUE, &value) == 0);
784                         VERIFY(0 == nvlist_add_uint64(nv, propname, value));
785                 }
786         }
787 }
788
789 /*
790  * returns snapshot creation txg
791  * and returns 0 if the snapshot does not exist
792  */
793 static uint64_t
794 get_snap_txg(libzfs_handle_t *hdl, const char *fs, const char *snap)
795 {
796         char name[ZFS_MAX_DATASET_NAME_LEN];
797         uint64_t txg = 0;
798
799         if (fs == NULL || fs[0] == '\0' || snap == NULL || snap[0] == '\0')
800                 return (txg);
801
802         (void) snprintf(name, sizeof (name), "%s@%s", fs, snap);
803         if (zfs_dataset_exists(hdl, name, ZFS_TYPE_SNAPSHOT)) {
804                 zfs_handle_t *zhp = zfs_open(hdl, name, ZFS_TYPE_SNAPSHOT);
805                 if (zhp != NULL) {
806                         txg = zfs_prop_get_int(zhp, ZFS_PROP_CREATETXG);
807                         zfs_close(zhp);
808                 }
809         }
810
811         return (txg);
812 }
813
814 /*
815  * recursively generate nvlists describing datasets.  See comment
816  * for the data structure send_data_t above for description of contents
817  * of the nvlist.
818  */
819 static int
820 send_iterate_fs(zfs_handle_t *zhp, void *arg)
821 {
822         send_data_t *sd = arg;
823         nvlist_t *nvfs = NULL, *nv = NULL;
824         int rv = 0;
825         uint64_t parent_fromsnap_guid_save = sd->parent_fromsnap_guid;
826         uint64_t fromsnap_txg_save = sd->fromsnap_txg;
827         uint64_t tosnap_txg_save = sd->tosnap_txg;
828         uint64_t txg = zhp->zfs_dmustats.dds_creation_txg;
829         uint64_t guid = zhp->zfs_dmustats.dds_guid;
830         uint64_t fromsnap_txg, tosnap_txg;
831         char guidstring[64];
832
833         fromsnap_txg = get_snap_txg(zhp->zfs_hdl, zhp->zfs_name, sd->fromsnap);
834         if (fromsnap_txg != 0)
835                 sd->fromsnap_txg = fromsnap_txg;
836
837         tosnap_txg = get_snap_txg(zhp->zfs_hdl, zhp->zfs_name, sd->tosnap);
838         if (tosnap_txg != 0)
839                 sd->tosnap_txg = tosnap_txg;
840
841         /*
842          * on the send side, if the current dataset does not have tosnap,
843          * perform two additional checks:
844          *
845          * - skip sending the current dataset if it was created later than
846          *   the parent tosnap
847          * - return error if the current dataset was created earlier than
848          *   the parent tosnap
849          */
850         if (sd->tosnap != NULL && tosnap_txg == 0) {
851                 if (sd->tosnap_txg != 0 && txg > sd->tosnap_txg) {
852                         if (sd->verbose) {
853                                 (void) fprintf(stderr, dgettext(TEXT_DOMAIN,
854                                     "skipping dataset %s: snapshot %s does "
855                                     "not exist\n"), zhp->zfs_name, sd->tosnap);
856                         }
857                 } else {
858                         (void) fprintf(stderr, dgettext(TEXT_DOMAIN,
859                             "cannot send %s@%s%s: snapshot %s@%s does not "
860                             "exist\n"), sd->fsname, sd->tosnap, sd->recursive ?
861                             dgettext(TEXT_DOMAIN, " recursively") : "",
862                             zhp->zfs_name, sd->tosnap);
863                         rv = -1;
864                 }
865                 goto out;
866         }
867
868         VERIFY(0 == nvlist_alloc(&nvfs, NV_UNIQUE_NAME, 0));
869         VERIFY(0 == nvlist_add_string(nvfs, "name", zhp->zfs_name));
870         VERIFY(0 == nvlist_add_uint64(nvfs, "parentfromsnap",
871             sd->parent_fromsnap_guid));
872
873         if (zhp->zfs_dmustats.dds_origin[0]) {
874                 zfs_handle_t *origin = zfs_open(zhp->zfs_hdl,
875                     zhp->zfs_dmustats.dds_origin, ZFS_TYPE_SNAPSHOT);
876                 if (origin == NULL) {
877                         rv = -1;
878                         goto out;
879                 }
880                 VERIFY(0 == nvlist_add_uint64(nvfs, "origin",
881                     origin->zfs_dmustats.dds_guid));
882
883                 zfs_close(origin);
884         }
885
886         /* iterate over props */
887         VERIFY(0 == nvlist_alloc(&nv, NV_UNIQUE_NAME, 0));
888         send_iterate_prop(zhp, nv);
889
890         if (zfs_prop_get_int(zhp, ZFS_PROP_ENCRYPTION) != ZIO_CRYPT_OFF) {
891                 boolean_t encroot;
892
893                 /* determine if this dataset is an encryption root */
894                 if (zfs_crypto_get_encryption_root(zhp, &encroot, NULL) != 0) {
895                         rv = -1;
896                         goto out;
897                 }
898
899                 if (encroot)
900                         VERIFY(0 == nvlist_add_boolean(nvfs, "is_encroot"));
901
902                 /*
903                  * Encrypted datasets can only be sent with properties if
904                  * the raw flag is specified because the receive side doesn't
905                  * currently have a mechanism for recursively asking the user
906                  * for new encryption parameters.
907                  */
908                 if (!sd->raw) {
909                         (void) fprintf(stderr, dgettext(TEXT_DOMAIN,
910                             "cannot send %s@%s: encrypted dataset %s may not "
911                             "be sent with properties without the raw flag\n"),
912                             sd->fsname, sd->tosnap, zhp->zfs_name);
913                         rv = -1;
914                         goto out;
915                 }
916
917         }
918
919         VERIFY(0 == nvlist_add_nvlist(nvfs, "props", nv));
920
921         /* iterate over snaps, and set sd->parent_fromsnap_guid */
922         sd->parent_fromsnap_guid = 0;
923         VERIFY(0 == nvlist_alloc(&sd->parent_snaps, NV_UNIQUE_NAME, 0));
924         VERIFY(0 == nvlist_alloc(&sd->snapprops, NV_UNIQUE_NAME, 0));
925         (void) zfs_iter_snapshots_sorted(zhp, send_iterate_snap, sd);
926         VERIFY(0 == nvlist_add_nvlist(nvfs, "snaps", sd->parent_snaps));
927         VERIFY(0 == nvlist_add_nvlist(nvfs, "snapprops", sd->snapprops));
928         nvlist_free(sd->parent_snaps);
929         nvlist_free(sd->snapprops);
930
931         /* add this fs to nvlist */
932         (void) snprintf(guidstring, sizeof (guidstring),
933             "0x%llx", (longlong_t)guid);
934         VERIFY(0 == nvlist_add_nvlist(sd->fss, guidstring, nvfs));
935
936         /* iterate over children */
937         if (sd->recursive)
938                 rv = zfs_iter_filesystems(zhp, send_iterate_fs, sd);
939
940 out:
941         sd->parent_fromsnap_guid = parent_fromsnap_guid_save;
942         sd->fromsnap_txg = fromsnap_txg_save;
943         sd->tosnap_txg = tosnap_txg_save;
944         nvlist_free(nv);
945         nvlist_free(nvfs);
946
947         zfs_close(zhp);
948         return (rv);
949 }
950
951 static int
952 gather_nvlist(libzfs_handle_t *hdl, const char *fsname, const char *fromsnap,
953     const char *tosnap, boolean_t recursive, boolean_t raw, boolean_t verbose,
954     nvlist_t **nvlp, avl_tree_t **avlp)
955 {
956         zfs_handle_t *zhp;
957         send_data_t sd = { 0 };
958         int error;
959
960         zhp = zfs_open(hdl, fsname, ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME);
961         if (zhp == NULL)
962                 return (EZFS_BADTYPE);
963
964         VERIFY(0 == nvlist_alloc(&sd.fss, NV_UNIQUE_NAME, 0));
965         sd.fsname = fsname;
966         sd.fromsnap = fromsnap;
967         sd.tosnap = tosnap;
968         sd.recursive = recursive;
969         sd.raw = raw;
970         sd.verbose = verbose;
971
972         if ((error = send_iterate_fs(zhp, &sd)) != 0) {
973                 nvlist_free(sd.fss);
974                 if (avlp != NULL)
975                         *avlp = NULL;
976                 *nvlp = NULL;
977                 return (error);
978         }
979
980         if (avlp != NULL && (*avlp = fsavl_create(sd.fss)) == NULL) {
981                 nvlist_free(sd.fss);
982                 *nvlp = NULL;
983                 return (EZFS_NOMEM);
984         }
985
986         *nvlp = sd.fss;
987         return (0);
988 }
989
990 /*
991  * Routines specific to "zfs send"
992  */
993 typedef struct send_dump_data {
994         /* these are all just the short snapname (the part after the @) */
995         const char *fromsnap;
996         const char *tosnap;
997         char prevsnap[ZFS_MAX_DATASET_NAME_LEN];
998         uint64_t prevsnap_obj;
999         boolean_t seenfrom, seento, replicate, doall, fromorigin;
1000         boolean_t verbose, dryrun, parsable, progress, embed_data, std_out;
1001         boolean_t large_block, compress, raw;
1002         int outfd;
1003         boolean_t err;
1004         nvlist_t *fss;
1005         nvlist_t *snapholds;
1006         avl_tree_t *fsavl;
1007         snapfilter_cb_t *filter_cb;
1008         void *filter_cb_arg;
1009         nvlist_t *debugnv;
1010         char holdtag[ZFS_MAX_DATASET_NAME_LEN];
1011         int cleanup_fd;
1012         uint64_t size;
1013 } send_dump_data_t;
1014
1015 static int
1016 zfs_send_space(zfs_handle_t *zhp, const char *snapname, const char *from,
1017     enum lzc_send_flags flags, uint64_t *spacep)
1018 {
1019         libzfs_handle_t *hdl = zhp->zfs_hdl;
1020         int error;
1021
1022         assert(snapname != NULL);
1023         error = lzc_send_space(snapname, from, flags, spacep);
1024
1025         if (error != 0) {
1026                 char errbuf[1024];
1027                 (void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
1028                     "warning: cannot estimate space for '%s'"), snapname);
1029
1030                 switch (error) {
1031                 case EXDEV:
1032                         zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1033                             "not an earlier snapshot from the same fs"));
1034                         return (zfs_error(hdl, EZFS_CROSSTARGET, errbuf));
1035
1036                 case ENOENT:
1037                         if (zfs_dataset_exists(hdl, snapname,
1038                             ZFS_TYPE_SNAPSHOT)) {
1039                                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1040                                     "incremental source (%s) does not exist"),
1041                                     snapname);
1042                         }
1043                         return (zfs_error(hdl, EZFS_NOENT, errbuf));
1044
1045                 case EDQUOT:
1046                 case EFBIG:
1047                 case EIO:
1048                 case ENOLINK:
1049                 case ENOSPC:
1050                 case ENOSTR:
1051                 case ENXIO:
1052                 case EPIPE:
1053                 case ERANGE:
1054                 case EFAULT:
1055                 case EROFS:
1056                 case EINVAL:
1057                         zfs_error_aux(hdl, strerror(error));
1058                         return (zfs_error(hdl, EZFS_BADBACKUP, errbuf));
1059
1060                 default:
1061                         return (zfs_standard_error(hdl, error, errbuf));
1062                 }
1063         }
1064
1065         return (0);
1066 }
1067
1068 /*
1069  * Dumps a backup of the given snapshot (incremental from fromsnap if it's not
1070  * NULL) to the file descriptor specified by outfd.
1071  */
1072 static int
1073 dump_ioctl(zfs_handle_t *zhp, const char *fromsnap, uint64_t fromsnap_obj,
1074     boolean_t fromorigin, int outfd, enum lzc_send_flags flags,
1075     nvlist_t *debugnv)
1076 {
1077         zfs_cmd_t zc = {"\0"};
1078         libzfs_handle_t *hdl = zhp->zfs_hdl;
1079         nvlist_t *thisdbg;
1080
1081         assert(zhp->zfs_type == ZFS_TYPE_SNAPSHOT);
1082         assert(fromsnap_obj == 0 || !fromorigin);
1083
1084         (void) strlcpy(zc.zc_name, zhp->zfs_name, sizeof (zc.zc_name));
1085         zc.zc_cookie = outfd;
1086         zc.zc_obj = fromorigin;
1087         zc.zc_sendobj = zfs_prop_get_int(zhp, ZFS_PROP_OBJSETID);
1088         zc.zc_fromobj = fromsnap_obj;
1089         zc.zc_flags = flags;
1090
1091         VERIFY(0 == nvlist_alloc(&thisdbg, NV_UNIQUE_NAME, 0));
1092         if (fromsnap && fromsnap[0] != '\0') {
1093                 VERIFY(0 == nvlist_add_string(thisdbg,
1094                     "fromsnap", fromsnap));
1095         }
1096
1097         if (zfs_ioctl(zhp->zfs_hdl, ZFS_IOC_SEND, &zc) != 0) {
1098                 char errbuf[1024];
1099                 (void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
1100                     "warning: cannot send '%s'"), zhp->zfs_name);
1101
1102                 VERIFY(0 == nvlist_add_uint64(thisdbg, "error", errno));
1103                 if (debugnv) {
1104                         VERIFY(0 == nvlist_add_nvlist(debugnv,
1105                             zhp->zfs_name, thisdbg));
1106                 }
1107                 nvlist_free(thisdbg);
1108
1109                 switch (errno) {
1110                 case EXDEV:
1111                         zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1112                             "not an earlier snapshot from the same fs"));
1113                         return (zfs_error(hdl, EZFS_CROSSTARGET, errbuf));
1114
1115                 case EACCES:
1116                         zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1117                             "source key must be loaded"));
1118                         return (zfs_error(hdl, EZFS_CRYPTOFAILED, errbuf));
1119
1120                 case ENOENT:
1121                         if (zfs_dataset_exists(hdl, zc.zc_name,
1122                             ZFS_TYPE_SNAPSHOT)) {
1123                                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1124                                     "incremental source (@%s) does not exist"),
1125                                     zc.zc_value);
1126                         }
1127                         return (zfs_error(hdl, EZFS_NOENT, errbuf));
1128
1129                 case EDQUOT:
1130                 case EFBIG:
1131                 case EIO:
1132                 case ENOLINK:
1133                 case ENOSPC:
1134                 case ENOSTR:
1135                 case ENXIO:
1136                 case EPIPE:
1137                 case ERANGE:
1138                 case EFAULT:
1139                 case EROFS:
1140                         zfs_error_aux(hdl, strerror(errno));
1141                         return (zfs_error(hdl, EZFS_BADBACKUP, errbuf));
1142
1143                 default:
1144                         return (zfs_standard_error(hdl, errno, errbuf));
1145                 }
1146         }
1147
1148         if (debugnv)
1149                 VERIFY(0 == nvlist_add_nvlist(debugnv, zhp->zfs_name, thisdbg));
1150         nvlist_free(thisdbg);
1151
1152         return (0);
1153 }
1154
1155 static void
1156 gather_holds(zfs_handle_t *zhp, send_dump_data_t *sdd)
1157 {
1158         assert(zhp->zfs_type == ZFS_TYPE_SNAPSHOT);
1159
1160         /*
1161          * zfs_send() only sets snapholds for sends that need them,
1162          * e.g. replication and doall.
1163          */
1164         if (sdd->snapholds == NULL)
1165                 return;
1166
1167         fnvlist_add_string(sdd->snapholds, zhp->zfs_name, sdd->holdtag);
1168 }
1169
1170 static void *
1171 send_progress_thread(void *arg)
1172 {
1173         progress_arg_t *pa = arg;
1174         zfs_cmd_t zc = {"\0"};
1175         zfs_handle_t *zhp = pa->pa_zhp;
1176         libzfs_handle_t *hdl = zhp->zfs_hdl;
1177         unsigned long long bytes;
1178         char buf[16];
1179         time_t t;
1180         struct tm *tm;
1181
1182         (void) strlcpy(zc.zc_name, zhp->zfs_name, sizeof (zc.zc_name));
1183
1184         if (!pa->pa_parsable)
1185                 (void) fprintf(stderr, "TIME        SENT   SNAPSHOT\n");
1186
1187         /*
1188          * Print the progress from ZFS_IOC_SEND_PROGRESS every second.
1189          */
1190         for (;;) {
1191                 (void) sleep(1);
1192
1193                 zc.zc_cookie = pa->pa_fd;
1194                 if (zfs_ioctl(hdl, ZFS_IOC_SEND_PROGRESS, &zc) != 0)
1195                         return ((void *)-1);
1196
1197                 (void) time(&t);
1198                 tm = localtime(&t);
1199                 bytes = zc.zc_cookie;
1200
1201                 if (pa->pa_parsable) {
1202                         (void) fprintf(stderr, "%02d:%02d:%02d\t%llu\t%s\n",
1203                             tm->tm_hour, tm->tm_min, tm->tm_sec,
1204                             bytes, zhp->zfs_name);
1205                 } else {
1206                         zfs_nicebytes(bytes, buf, sizeof (buf));
1207                         (void) fprintf(stderr, "%02d:%02d:%02d   %5s   %s\n",
1208                             tm->tm_hour, tm->tm_min, tm->tm_sec,
1209                             buf, zhp->zfs_name);
1210                 }
1211         }
1212 }
1213
1214 static void
1215 send_print_verbose(FILE *fout, const char *tosnap, const char *fromsnap,
1216     uint64_t size, boolean_t parsable)
1217 {
1218         if (parsable) {
1219                 if (fromsnap != NULL) {
1220                         (void) fprintf(fout, "incremental\t%s\t%s",
1221                             fromsnap, tosnap);
1222                 } else {
1223                         (void) fprintf(fout, "full\t%s",
1224                             tosnap);
1225                 }
1226         } else {
1227                 if (fromsnap != NULL) {
1228                         if (strchr(fromsnap, '@') == NULL &&
1229                             strchr(fromsnap, '#') == NULL) {
1230                                 (void) fprintf(fout, dgettext(TEXT_DOMAIN,
1231                                     "send from @%s to %s"),
1232                                     fromsnap, tosnap);
1233                         } else {
1234                                 (void) fprintf(fout, dgettext(TEXT_DOMAIN,
1235                                     "send from %s to %s"),
1236                                     fromsnap, tosnap);
1237                         }
1238                 } else {
1239                         (void) fprintf(fout, dgettext(TEXT_DOMAIN,
1240                             "full send of %s"),
1241                             tosnap);
1242                 }
1243         }
1244
1245         if (parsable) {
1246                 (void) fprintf(fout, "\t%llu",
1247                     (longlong_t)size);
1248         } else if (size != 0) {
1249                 char buf[16];
1250                 zfs_nicebytes(size, buf, sizeof (buf));
1251                 (void) fprintf(fout, dgettext(TEXT_DOMAIN,
1252                     " estimated size is %s"), buf);
1253         }
1254         (void) fprintf(fout, "\n");
1255 }
1256
1257 static int
1258 dump_snapshot(zfs_handle_t *zhp, void *arg)
1259 {
1260         send_dump_data_t *sdd = arg;
1261         progress_arg_t pa = { 0 };
1262         pthread_t tid;
1263         char *thissnap;
1264         enum lzc_send_flags flags = 0;
1265         int err;
1266         boolean_t isfromsnap, istosnap, fromorigin;
1267         boolean_t exclude = B_FALSE;
1268         FILE *fout = sdd->std_out ? stdout : stderr;
1269
1270         err = 0;
1271         thissnap = strchr(zhp->zfs_name, '@') + 1;
1272         isfromsnap = (sdd->fromsnap != NULL &&
1273             strcmp(sdd->fromsnap, thissnap) == 0);
1274
1275         if (!sdd->seenfrom && isfromsnap) {
1276                 gather_holds(zhp, sdd);
1277                 sdd->seenfrom = B_TRUE;
1278                 (void) strlcpy(sdd->prevsnap, thissnap,
1279                     sizeof (sdd->prevsnap));
1280                 sdd->prevsnap_obj = zfs_prop_get_int(zhp, ZFS_PROP_OBJSETID);
1281                 zfs_close(zhp);
1282                 return (0);
1283         }
1284
1285         if (sdd->seento || !sdd->seenfrom) {
1286                 zfs_close(zhp);
1287                 return (0);
1288         }
1289
1290         istosnap = (strcmp(sdd->tosnap, thissnap) == 0);
1291         if (istosnap)
1292                 sdd->seento = B_TRUE;
1293
1294         if (sdd->large_block)
1295                 flags |= LZC_SEND_FLAG_LARGE_BLOCK;
1296         if (sdd->embed_data)
1297                 flags |= LZC_SEND_FLAG_EMBED_DATA;
1298         if (sdd->compress)
1299                 flags |= LZC_SEND_FLAG_COMPRESS;
1300         if (sdd->raw)
1301                 flags |= LZC_SEND_FLAG_RAW;
1302
1303         if (!sdd->doall && !isfromsnap && !istosnap) {
1304                 if (sdd->replicate) {
1305                         char *snapname;
1306                         nvlist_t *snapprops;
1307                         /*
1308                          * Filter out all intermediate snapshots except origin
1309                          * snapshots needed to replicate clones.
1310                          */
1311                         nvlist_t *nvfs = fsavl_find(sdd->fsavl,
1312                             zhp->zfs_dmustats.dds_guid, &snapname);
1313
1314                         VERIFY(0 == nvlist_lookup_nvlist(nvfs,
1315                             "snapprops", &snapprops));
1316                         VERIFY(0 == nvlist_lookup_nvlist(snapprops,
1317                             thissnap, &snapprops));
1318                         exclude = !nvlist_exists(snapprops, "is_clone_origin");
1319                 } else {
1320                         exclude = B_TRUE;
1321                 }
1322         }
1323
1324         /*
1325          * If a filter function exists, call it to determine whether
1326          * this snapshot will be sent.
1327          */
1328         if (exclude || (sdd->filter_cb != NULL &&
1329             sdd->filter_cb(zhp, sdd->filter_cb_arg) == B_FALSE)) {
1330                 /*
1331                  * This snapshot is filtered out.  Don't send it, and don't
1332                  * set prevsnap_obj, so it will be as if this snapshot didn't
1333                  * exist, and the next accepted snapshot will be sent as
1334                  * an incremental from the last accepted one, or as the
1335                  * first (and full) snapshot in the case of a replication,
1336                  * non-incremental send.
1337                  */
1338                 zfs_close(zhp);
1339                 return (0);
1340         }
1341
1342         gather_holds(zhp, sdd);
1343         fromorigin = sdd->prevsnap[0] == '\0' &&
1344             (sdd->fromorigin || sdd->replicate);
1345
1346         if (sdd->verbose) {
1347                 uint64_t size = 0;
1348                 char fromds[ZFS_MAX_DATASET_NAME_LEN];
1349
1350                 if (sdd->prevsnap[0] != '\0') {
1351                         (void) strlcpy(fromds, zhp->zfs_name, sizeof (fromds));
1352                         *(strchr(fromds, '@') + 1) = '\0';
1353                         (void) strlcat(fromds, sdd->prevsnap, sizeof (fromds));
1354                 }
1355                 if (zfs_send_space(zhp, zhp->zfs_name,
1356                     sdd->prevsnap[0] ? fromds : NULL, flags, &size) != 0) {
1357                         size = 0; /* cannot estimate send space */
1358                 } else {
1359                         send_print_verbose(fout, zhp->zfs_name,
1360                             sdd->prevsnap[0] ? sdd->prevsnap : NULL,
1361                             size, sdd->parsable);
1362                 }
1363                 sdd->size += size;
1364         }
1365
1366         if (!sdd->dryrun) {
1367                 /*
1368                  * If progress reporting is requested, spawn a new thread to
1369                  * poll ZFS_IOC_SEND_PROGRESS at a regular interval.
1370                  */
1371                 if (sdd->progress) {
1372                         pa.pa_zhp = zhp;
1373                         pa.pa_fd = sdd->outfd;
1374                         pa.pa_parsable = sdd->parsable;
1375
1376                         if ((err = pthread_create(&tid, NULL,
1377                             send_progress_thread, &pa)) != 0) {
1378                                 zfs_close(zhp);
1379                                 return (err);
1380                         }
1381                 }
1382
1383                 err = dump_ioctl(zhp, sdd->prevsnap, sdd->prevsnap_obj,
1384                     fromorigin, sdd->outfd, flags, sdd->debugnv);
1385
1386                 if (sdd->progress) {
1387                         (void) pthread_cancel(tid);
1388                         (void) pthread_join(tid, NULL);
1389                 }
1390         }
1391
1392         (void) strcpy(sdd->prevsnap, thissnap);
1393         sdd->prevsnap_obj = zfs_prop_get_int(zhp, ZFS_PROP_OBJSETID);
1394         zfs_close(zhp);
1395         return (err);
1396 }
1397
1398 static int
1399 dump_filesystem(zfs_handle_t *zhp, void *arg)
1400 {
1401         int rv = 0;
1402         send_dump_data_t *sdd = arg;
1403         boolean_t missingfrom = B_FALSE;
1404         zfs_cmd_t zc = {"\0"};
1405
1406         (void) snprintf(zc.zc_name, sizeof (zc.zc_name), "%s@%s",
1407             zhp->zfs_name, sdd->tosnap);
1408         if (ioctl(zhp->zfs_hdl->libzfs_fd, ZFS_IOC_OBJSET_STATS, &zc) != 0) {
1409                 (void) fprintf(stderr, dgettext(TEXT_DOMAIN,
1410                     "WARNING: could not send %s@%s: does not exist\n"),
1411                     zhp->zfs_name, sdd->tosnap);
1412                 sdd->err = B_TRUE;
1413                 return (0);
1414         }
1415
1416         if (sdd->replicate && sdd->fromsnap) {
1417                 /*
1418                  * If this fs does not have fromsnap, and we're doing
1419                  * recursive, we need to send a full stream from the
1420                  * beginning (or an incremental from the origin if this
1421                  * is a clone).  If we're doing non-recursive, then let
1422                  * them get the error.
1423                  */
1424                 (void) snprintf(zc.zc_name, sizeof (zc.zc_name), "%s@%s",
1425                     zhp->zfs_name, sdd->fromsnap);
1426                 if (ioctl(zhp->zfs_hdl->libzfs_fd,
1427                     ZFS_IOC_OBJSET_STATS, &zc) != 0) {
1428                         missingfrom = B_TRUE;
1429                 }
1430         }
1431
1432         sdd->seenfrom = sdd->seento = sdd->prevsnap[0] = 0;
1433         sdd->prevsnap_obj = 0;
1434         if (sdd->fromsnap == NULL || missingfrom)
1435                 sdd->seenfrom = B_TRUE;
1436
1437         rv = zfs_iter_snapshots_sorted(zhp, dump_snapshot, arg);
1438         if (!sdd->seenfrom) {
1439                 (void) fprintf(stderr, dgettext(TEXT_DOMAIN,
1440                     "WARNING: could not send %s@%s:\n"
1441                     "incremental source (%s@%s) does not exist\n"),
1442                     zhp->zfs_name, sdd->tosnap,
1443                     zhp->zfs_name, sdd->fromsnap);
1444                 sdd->err = B_TRUE;
1445         } else if (!sdd->seento) {
1446                 if (sdd->fromsnap) {
1447                         (void) fprintf(stderr, dgettext(TEXT_DOMAIN,
1448                             "WARNING: could not send %s@%s:\n"
1449                             "incremental source (%s@%s) "
1450                             "is not earlier than it\n"),
1451                             zhp->zfs_name, sdd->tosnap,
1452                             zhp->zfs_name, sdd->fromsnap);
1453                 } else {
1454                         (void) fprintf(stderr, dgettext(TEXT_DOMAIN,
1455                             "WARNING: "
1456                             "could not send %s@%s: does not exist\n"),
1457                             zhp->zfs_name, sdd->tosnap);
1458                 }
1459                 sdd->err = B_TRUE;
1460         }
1461
1462         return (rv);
1463 }
1464
1465 static int
1466 dump_filesystems(zfs_handle_t *rzhp, void *arg)
1467 {
1468         send_dump_data_t *sdd = arg;
1469         nvpair_t *fspair;
1470         boolean_t needagain, progress;
1471
1472         if (!sdd->replicate)
1473                 return (dump_filesystem(rzhp, sdd));
1474
1475         /* Mark the clone origin snapshots. */
1476         for (fspair = nvlist_next_nvpair(sdd->fss, NULL); fspair;
1477             fspair = nvlist_next_nvpair(sdd->fss, fspair)) {
1478                 nvlist_t *nvfs;
1479                 uint64_t origin_guid = 0;
1480
1481                 VERIFY(0 == nvpair_value_nvlist(fspair, &nvfs));
1482                 (void) nvlist_lookup_uint64(nvfs, "origin", &origin_guid);
1483                 if (origin_guid != 0) {
1484                         char *snapname;
1485                         nvlist_t *origin_nv = fsavl_find(sdd->fsavl,
1486                             origin_guid, &snapname);
1487                         if (origin_nv != NULL) {
1488                                 nvlist_t *snapprops;
1489                                 VERIFY(0 == nvlist_lookup_nvlist(origin_nv,
1490                                     "snapprops", &snapprops));
1491                                 VERIFY(0 == nvlist_lookup_nvlist(snapprops,
1492                                     snapname, &snapprops));
1493                                 VERIFY(0 == nvlist_add_boolean(
1494                                     snapprops, "is_clone_origin"));
1495                         }
1496                 }
1497         }
1498 again:
1499         needagain = progress = B_FALSE;
1500         for (fspair = nvlist_next_nvpair(sdd->fss, NULL); fspair;
1501             fspair = nvlist_next_nvpair(sdd->fss, fspair)) {
1502                 nvlist_t *fslist, *parent_nv;
1503                 char *fsname;
1504                 zfs_handle_t *zhp;
1505                 int err;
1506                 uint64_t origin_guid = 0;
1507                 uint64_t parent_guid = 0;
1508
1509                 VERIFY(nvpair_value_nvlist(fspair, &fslist) == 0);
1510                 if (nvlist_lookup_boolean(fslist, "sent") == 0)
1511                         continue;
1512
1513                 VERIFY(nvlist_lookup_string(fslist, "name", &fsname) == 0);
1514                 (void) nvlist_lookup_uint64(fslist, "origin", &origin_guid);
1515                 (void) nvlist_lookup_uint64(fslist, "parentfromsnap",
1516                     &parent_guid);
1517
1518                 if (parent_guid != 0) {
1519                         parent_nv = fsavl_find(sdd->fsavl, parent_guid, NULL);
1520                         if (!nvlist_exists(parent_nv, "sent")) {
1521                                 /* parent has not been sent; skip this one */
1522                                 needagain = B_TRUE;
1523                                 continue;
1524                         }
1525                 }
1526
1527                 if (origin_guid != 0) {
1528                         nvlist_t *origin_nv = fsavl_find(sdd->fsavl,
1529                             origin_guid, NULL);
1530                         if (origin_nv != NULL &&
1531                             !nvlist_exists(origin_nv, "sent")) {
1532                                 /*
1533                                  * origin has not been sent yet;
1534                                  * skip this clone.
1535                                  */
1536                                 needagain = B_TRUE;
1537                                 continue;
1538                         }
1539                 }
1540
1541                 zhp = zfs_open(rzhp->zfs_hdl, fsname, ZFS_TYPE_DATASET);
1542                 if (zhp == NULL)
1543                         return (-1);
1544                 err = dump_filesystem(zhp, sdd);
1545                 VERIFY(nvlist_add_boolean(fslist, "sent") == 0);
1546                 progress = B_TRUE;
1547                 zfs_close(zhp);
1548                 if (err)
1549                         return (err);
1550         }
1551         if (needagain) {
1552                 assert(progress);
1553                 goto again;
1554         }
1555
1556         /* clean out the sent flags in case we reuse this fss */
1557         for (fspair = nvlist_next_nvpair(sdd->fss, NULL); fspair;
1558             fspair = nvlist_next_nvpair(sdd->fss, fspair)) {
1559                 nvlist_t *fslist;
1560
1561                 VERIFY(nvpair_value_nvlist(fspair, &fslist) == 0);
1562                 (void) nvlist_remove_all(fslist, "sent");
1563         }
1564
1565         return (0);
1566 }
1567
1568 nvlist_t *
1569 zfs_send_resume_token_to_nvlist(libzfs_handle_t *hdl, const char *token)
1570 {
1571         unsigned int version;
1572         int nread, i;
1573         unsigned long long checksum, packed_len;
1574
1575         /*
1576          * Decode token header, which is:
1577          *   <token version>-<checksum of payload>-<uncompressed payload length>
1578          * Note that the only supported token version is 1.
1579          */
1580         nread = sscanf(token, "%u-%llx-%llx-",
1581             &version, &checksum, &packed_len);
1582         if (nread != 3) {
1583                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1584                     "resume token is corrupt (invalid format)"));
1585                 return (NULL);
1586         }
1587
1588         if (version != ZFS_SEND_RESUME_TOKEN_VERSION) {
1589                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1590                     "resume token is corrupt (invalid version %u)"),
1591                     version);
1592                 return (NULL);
1593         }
1594
1595         /* convert hexadecimal representation to binary */
1596         token = strrchr(token, '-') + 1;
1597         int len = strlen(token) / 2;
1598         unsigned char *compressed = zfs_alloc(hdl, len);
1599         for (i = 0; i < len; i++) {
1600                 nread = sscanf(token + i * 2, "%2hhx", compressed + i);
1601                 if (nread != 1) {
1602                         free(compressed);
1603                         zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1604                             "resume token is corrupt "
1605                             "(payload is not hex-encoded)"));
1606                         return (NULL);
1607                 }
1608         }
1609
1610         /* verify checksum */
1611         zio_cksum_t cksum;
1612         fletcher_4_native_varsize(compressed, len, &cksum);
1613         if (cksum.zc_word[0] != checksum) {
1614                 free(compressed);
1615                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1616                     "resume token is corrupt (incorrect checksum)"));
1617                 return (NULL);
1618         }
1619
1620         /* uncompress */
1621         void *packed = zfs_alloc(hdl, packed_len);
1622         uLongf packed_len_long = packed_len;
1623         if (uncompress(packed, &packed_len_long, compressed, len) != Z_OK ||
1624             packed_len_long != packed_len) {
1625                 free(packed);
1626                 free(compressed);
1627                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1628                     "resume token is corrupt (decompression failed)"));
1629                 return (NULL);
1630         }
1631
1632         /* unpack nvlist */
1633         nvlist_t *nv;
1634         int error = nvlist_unpack(packed, packed_len, &nv, KM_SLEEP);
1635         free(packed);
1636         free(compressed);
1637         if (error != 0) {
1638                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1639                     "resume token is corrupt (nvlist_unpack failed)"));
1640                 return (NULL);
1641         }
1642         return (nv);
1643 }
1644
1645 int
1646 zfs_send_resume(libzfs_handle_t *hdl, sendflags_t *flags, int outfd,
1647     const char *resume_token)
1648 {
1649         char errbuf[1024];
1650         char *toname;
1651         char *fromname = NULL;
1652         uint64_t resumeobj, resumeoff, toguid, fromguid, bytes;
1653         zfs_handle_t *zhp;
1654         int error = 0;
1655         char name[ZFS_MAX_DATASET_NAME_LEN];
1656         enum lzc_send_flags lzc_flags = 0;
1657         FILE *fout = (flags->verbose && flags->dryrun) ? stdout : stderr;
1658
1659         (void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
1660             "cannot resume send"));
1661
1662         nvlist_t *resume_nvl =
1663             zfs_send_resume_token_to_nvlist(hdl, resume_token);
1664         if (resume_nvl == NULL) {
1665                 /*
1666                  * zfs_error_aux has already been set by
1667                  * zfs_send_resume_token_to_nvlist
1668                  */
1669                 return (zfs_error(hdl, EZFS_FAULT, errbuf));
1670         }
1671         if (flags->verbose) {
1672                 (void) fprintf(fout, dgettext(TEXT_DOMAIN,
1673                     "resume token contents:\n"));
1674                 nvlist_print(fout, resume_nvl);
1675         }
1676
1677         if (nvlist_lookup_string(resume_nvl, "toname", &toname) != 0 ||
1678             nvlist_lookup_uint64(resume_nvl, "object", &resumeobj) != 0 ||
1679             nvlist_lookup_uint64(resume_nvl, "offset", &resumeoff) != 0 ||
1680             nvlist_lookup_uint64(resume_nvl, "bytes", &bytes) != 0 ||
1681             nvlist_lookup_uint64(resume_nvl, "toguid", &toguid) != 0) {
1682                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1683                     "resume token is corrupt"));
1684                 return (zfs_error(hdl, EZFS_FAULT, errbuf));
1685         }
1686         fromguid = 0;
1687         (void) nvlist_lookup_uint64(resume_nvl, "fromguid", &fromguid);
1688
1689         if (flags->largeblock || nvlist_exists(resume_nvl, "largeblockok"))
1690                 lzc_flags |= LZC_SEND_FLAG_LARGE_BLOCK;
1691         if (flags->embed_data || nvlist_exists(resume_nvl, "embedok"))
1692                 lzc_flags |= LZC_SEND_FLAG_EMBED_DATA;
1693         if (flags->compress || nvlist_exists(resume_nvl, "compressok"))
1694                 lzc_flags |= LZC_SEND_FLAG_COMPRESS;
1695         if (flags->raw || nvlist_exists(resume_nvl, "rawok"))
1696                 lzc_flags |= LZC_SEND_FLAG_RAW;
1697
1698         if (guid_to_name(hdl, toname, toguid, B_FALSE, name) != 0) {
1699                 if (zfs_dataset_exists(hdl, toname, ZFS_TYPE_DATASET)) {
1700                         zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1701                             "'%s' is no longer the same snapshot used in "
1702                             "the initial send"), toname);
1703                 } else {
1704                         zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1705                             "'%s' used in the initial send no longer exists"),
1706                             toname);
1707                 }
1708                 return (zfs_error(hdl, EZFS_BADPATH, errbuf));
1709         }
1710         zhp = zfs_open(hdl, name, ZFS_TYPE_DATASET);
1711         if (zhp == NULL) {
1712                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1713                     "unable to access '%s'"), name);
1714                 return (zfs_error(hdl, EZFS_BADPATH, errbuf));
1715         }
1716
1717         if (fromguid != 0) {
1718                 if (guid_to_name(hdl, toname, fromguid, B_TRUE, name) != 0) {
1719                         zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1720                             "incremental source %#llx no longer exists"),
1721                             (longlong_t)fromguid);
1722                         return (zfs_error(hdl, EZFS_BADPATH, errbuf));
1723                 }
1724                 fromname = name;
1725         }
1726
1727         if (flags->verbose) {
1728                 uint64_t size = 0;
1729                 error = lzc_send_space(zhp->zfs_name, fromname,
1730                     lzc_flags, &size);
1731                 if (error == 0)
1732                         size = MAX(0, (int64_t)(size - bytes));
1733                 send_print_verbose(fout, zhp->zfs_name, fromname,
1734                     size, flags->parsable);
1735         }
1736
1737         if (!flags->dryrun) {
1738                 progress_arg_t pa = { 0 };
1739                 pthread_t tid;
1740                 /*
1741                  * If progress reporting is requested, spawn a new thread to
1742                  * poll ZFS_IOC_SEND_PROGRESS at a regular interval.
1743                  */
1744                 if (flags->progress) {
1745                         pa.pa_zhp = zhp;
1746                         pa.pa_fd = outfd;
1747                         pa.pa_parsable = flags->parsable;
1748
1749                         error = pthread_create(&tid, NULL,
1750                             send_progress_thread, &pa);
1751                         if (error != 0) {
1752                                 zfs_close(zhp);
1753                                 return (error);
1754                         }
1755                 }
1756
1757                 error = lzc_send_resume(zhp->zfs_name, fromname, outfd,
1758                     lzc_flags, resumeobj, resumeoff);
1759
1760                 if (flags->progress) {
1761                         (void) pthread_cancel(tid);
1762                         (void) pthread_join(tid, NULL);
1763                 }
1764
1765                 char errbuf[1024];
1766                 (void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
1767                     "warning: cannot send '%s'"), zhp->zfs_name);
1768
1769                 zfs_close(zhp);
1770
1771                 switch (error) {
1772                 case 0:
1773                         return (0);
1774                 case EACCES:
1775                         zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1776                             "source key must be loaded"));
1777                         return (zfs_error(hdl, EZFS_CRYPTOFAILED, errbuf));
1778
1779                 case EXDEV:
1780                 case ENOENT:
1781                 case EDQUOT:
1782                 case EFBIG:
1783                 case EIO:
1784                 case ENOLINK:
1785                 case ENOSPC:
1786                 case ENOSTR:
1787                 case ENXIO:
1788                 case EPIPE:
1789                 case ERANGE:
1790                 case EFAULT:
1791                 case EROFS:
1792                         zfs_error_aux(hdl, strerror(errno));
1793                         return (zfs_error(hdl, EZFS_BADBACKUP, errbuf));
1794
1795                 default:
1796                         return (zfs_standard_error(hdl, errno, errbuf));
1797                 }
1798         }
1799
1800
1801         zfs_close(zhp);
1802
1803         return (error);
1804 }
1805
1806 /*
1807  * Generate a send stream for the dataset identified by the argument zhp.
1808  *
1809  * The content of the send stream is the snapshot identified by
1810  * 'tosnap'.  Incremental streams are requested in two ways:
1811  *     - from the snapshot identified by "fromsnap" (if non-null) or
1812  *     - from the origin of the dataset identified by zhp, which must
1813  *       be a clone.  In this case, "fromsnap" is null and "fromorigin"
1814  *       is TRUE.
1815  *
1816  * The send stream is recursive (i.e. dumps a hierarchy of snapshots) and
1817  * uses a special header (with a hdrtype field of DMU_COMPOUNDSTREAM)
1818  * if "replicate" is set.  If "doall" is set, dump all the intermediate
1819  * snapshots. The DMU_COMPOUNDSTREAM header is used in the "doall"
1820  * case too. If "props" is set, send properties.
1821  */
1822 int
1823 zfs_send(zfs_handle_t *zhp, const char *fromsnap, const char *tosnap,
1824     sendflags_t *flags, int outfd, snapfilter_cb_t filter_func,
1825     void *cb_arg, nvlist_t **debugnvp)
1826 {
1827         char errbuf[1024];
1828         send_dump_data_t sdd = { 0 };
1829         int err = 0;
1830         nvlist_t *fss = NULL;
1831         avl_tree_t *fsavl = NULL;
1832         static uint64_t holdseq;
1833         int spa_version;
1834         pthread_t tid = 0;
1835         int pipefd[2];
1836         dedup_arg_t dda = { 0 };
1837         int featureflags = 0;
1838         FILE *fout;
1839
1840         (void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
1841             "cannot send '%s'"), zhp->zfs_name);
1842
1843         if (fromsnap && fromsnap[0] == '\0') {
1844                 zfs_error_aux(zhp->zfs_hdl, dgettext(TEXT_DOMAIN,
1845                     "zero-length incremental source"));
1846                 return (zfs_error(zhp->zfs_hdl, EZFS_NOENT, errbuf));
1847         }
1848
1849         if (zhp->zfs_type == ZFS_TYPE_FILESYSTEM) {
1850                 uint64_t version;
1851                 version = zfs_prop_get_int(zhp, ZFS_PROP_VERSION);
1852                 if (version >= ZPL_VERSION_SA) {
1853                         featureflags |= DMU_BACKUP_FEATURE_SA_SPILL;
1854                 }
1855         }
1856
1857         /*
1858          * Start the dedup thread if this is a dedup stream. We do not bother
1859          * doing this if this a raw send of an encrypted dataset with dedup off
1860          * because normal encrypted blocks won't dedup.
1861          */
1862         if (flags->dedup && !flags->dryrun && !(flags->raw &&
1863             zfs_prop_get_int(zhp, ZFS_PROP_ENCRYPTION) != ZIO_CRYPT_OFF &&
1864             zfs_prop_get_int(zhp, ZFS_PROP_DEDUP) == ZIO_CHECKSUM_OFF)) {
1865                 featureflags |= (DMU_BACKUP_FEATURE_DEDUP |
1866                     DMU_BACKUP_FEATURE_DEDUPPROPS);
1867                 if ((err = socketpair(AF_UNIX, SOCK_STREAM, 0, pipefd)) != 0) {
1868                         zfs_error_aux(zhp->zfs_hdl, strerror(errno));
1869                         return (zfs_error(zhp->zfs_hdl, EZFS_PIPEFAILED,
1870                             errbuf));
1871                 }
1872                 dda.outputfd = outfd;
1873                 dda.inputfd = pipefd[1];
1874                 dda.dedup_hdl = zhp->zfs_hdl;
1875                 if ((err = pthread_create(&tid, NULL, cksummer, &dda)) != 0) {
1876                         (void) close(pipefd[0]);
1877                         (void) close(pipefd[1]);
1878                         zfs_error_aux(zhp->zfs_hdl, strerror(errno));
1879                         return (zfs_error(zhp->zfs_hdl,
1880                             EZFS_THREADCREATEFAILED, errbuf));
1881                 }
1882         }
1883
1884         if (flags->replicate || flags->doall || flags->props) {
1885                 dmu_replay_record_t drr = { 0 };
1886                 char *packbuf = NULL;
1887                 size_t buflen = 0;
1888                 zio_cksum_t zc;
1889
1890                 ZIO_SET_CHECKSUM(&zc, 0, 0, 0, 0);
1891
1892                 if (flags->replicate || flags->props) {
1893                         nvlist_t *hdrnv;
1894
1895                         VERIFY(0 == nvlist_alloc(&hdrnv, NV_UNIQUE_NAME, 0));
1896                         if (fromsnap) {
1897                                 VERIFY(0 == nvlist_add_string(hdrnv,
1898                                     "fromsnap", fromsnap));
1899                         }
1900                         VERIFY(0 == nvlist_add_string(hdrnv, "tosnap", tosnap));
1901                         if (!flags->replicate) {
1902                                 VERIFY(0 == nvlist_add_boolean(hdrnv,
1903                                     "not_recursive"));
1904                         }
1905                         if (flags->raw) {
1906                                 VERIFY(0 == nvlist_add_boolean(hdrnv, "raw"));
1907                         }
1908
1909                         err = gather_nvlist(zhp->zfs_hdl, zhp->zfs_name,
1910                             fromsnap, tosnap, flags->replicate, flags->raw,
1911                             flags->verbose, &fss, &fsavl);
1912                         if (err)
1913                                 goto err_out;
1914                         VERIFY(0 == nvlist_add_nvlist(hdrnv, "fss", fss));
1915                         err = nvlist_pack(hdrnv, &packbuf, &buflen,
1916                             NV_ENCODE_XDR, 0);
1917                         if (debugnvp)
1918                                 *debugnvp = hdrnv;
1919                         else
1920                                 nvlist_free(hdrnv);
1921                         if (err)
1922                                 goto stderr_out;
1923                 }
1924
1925                 if (!flags->dryrun) {
1926                         /* write first begin record */
1927                         drr.drr_type = DRR_BEGIN;
1928                         drr.drr_u.drr_begin.drr_magic = DMU_BACKUP_MAGIC;
1929                         DMU_SET_STREAM_HDRTYPE(drr.drr_u.drr_begin.
1930                             drr_versioninfo, DMU_COMPOUNDSTREAM);
1931                         DMU_SET_FEATUREFLAGS(drr.drr_u.drr_begin.
1932                             drr_versioninfo, featureflags);
1933                         if (snprintf(drr.drr_u.drr_begin.drr_toname,
1934                             sizeof (drr.drr_u.drr_begin.drr_toname),
1935                             "%s@%s", zhp->zfs_name, tosnap) >=
1936                             sizeof (drr.drr_u.drr_begin.drr_toname)) {
1937                                 err = EINVAL;
1938                                 goto stderr_out;
1939                         }
1940                         drr.drr_payloadlen = buflen;
1941
1942                         err = dump_record(&drr, packbuf, buflen, &zc, outfd);
1943                         free(packbuf);
1944                         if (err != 0)
1945                                 goto stderr_out;
1946
1947                         /* write end record */
1948                         bzero(&drr, sizeof (drr));
1949                         drr.drr_type = DRR_END;
1950                         drr.drr_u.drr_end.drr_checksum = zc;
1951                         err = write(outfd, &drr, sizeof (drr));
1952                         if (err == -1) {
1953                                 err = errno;
1954                                 goto stderr_out;
1955                         }
1956
1957                         err = 0;
1958                 }
1959         }
1960
1961         /* dump each stream */
1962         sdd.fromsnap = fromsnap;
1963         sdd.tosnap = tosnap;
1964         if (tid != 0)
1965                 sdd.outfd = pipefd[0];
1966         else
1967                 sdd.outfd = outfd;
1968         sdd.replicate = flags->replicate;
1969         sdd.doall = flags->doall;
1970         sdd.fromorigin = flags->fromorigin;
1971         sdd.fss = fss;
1972         sdd.fsavl = fsavl;
1973         sdd.verbose = flags->verbose;
1974         sdd.parsable = flags->parsable;
1975         sdd.progress = flags->progress;
1976         sdd.dryrun = flags->dryrun;
1977         sdd.large_block = flags->largeblock;
1978         sdd.embed_data = flags->embed_data;
1979         sdd.compress = flags->compress;
1980         sdd.raw = flags->raw;
1981         sdd.filter_cb = filter_func;
1982         sdd.filter_cb_arg = cb_arg;
1983         if (debugnvp)
1984                 sdd.debugnv = *debugnvp;
1985         if (sdd.verbose && sdd.dryrun)
1986                 sdd.std_out = B_TRUE;
1987         fout = sdd.std_out ? stdout : stderr;
1988
1989         /*
1990          * Some flags require that we place user holds on the datasets that are
1991          * being sent so they don't get destroyed during the send. We can skip
1992          * this step if the pool is imported read-only since the datasets cannot
1993          * be destroyed.
1994          */
1995         if (!flags->dryrun && !zpool_get_prop_int(zfs_get_pool_handle(zhp),
1996             ZPOOL_PROP_READONLY, NULL) &&
1997             zfs_spa_version(zhp, &spa_version) == 0 &&
1998             spa_version >= SPA_VERSION_USERREFS &&
1999             (flags->doall || flags->replicate)) {
2000                 ++holdseq;
2001                 (void) snprintf(sdd.holdtag, sizeof (sdd.holdtag),
2002                     ".send-%d-%llu", getpid(), (u_longlong_t)holdseq);
2003                 sdd.cleanup_fd = open(ZFS_DEV, O_RDWR);
2004                 if (sdd.cleanup_fd < 0) {
2005                         err = errno;
2006                         goto stderr_out;
2007                 }
2008                 sdd.snapholds = fnvlist_alloc();
2009         } else {
2010                 sdd.cleanup_fd = -1;
2011                 sdd.snapholds = NULL;
2012         }
2013         if (flags->verbose || sdd.snapholds != NULL) {
2014                 /*
2015                  * Do a verbose no-op dry run to get all the verbose output
2016                  * or to gather snapshot hold's before generating any data,
2017                  * then do a non-verbose real run to generate the streams.
2018                  */
2019                 sdd.dryrun = B_TRUE;
2020                 err = dump_filesystems(zhp, &sdd);
2021
2022                 if (err != 0)
2023                         goto stderr_out;
2024
2025                 if (flags->verbose) {
2026                         if (flags->parsable) {
2027                                 (void) fprintf(fout, "size\t%llu\n",
2028                                     (longlong_t)sdd.size);
2029                         } else {
2030                                 char buf[16];
2031                                 zfs_nicebytes(sdd.size, buf, sizeof (buf));
2032                                 (void) fprintf(fout, dgettext(TEXT_DOMAIN,
2033                                     "total estimated size is %s\n"), buf);
2034                         }
2035                 }
2036
2037                 /* Ensure no snaps found is treated as an error. */
2038                 if (!sdd.seento) {
2039                         err = ENOENT;
2040                         goto err_out;
2041                 }
2042
2043                 /* Skip the second run if dryrun was requested. */
2044                 if (flags->dryrun)
2045                         goto err_out;
2046
2047                 if (sdd.snapholds != NULL) {
2048                         err = zfs_hold_nvl(zhp, sdd.cleanup_fd, sdd.snapholds);
2049                         if (err != 0)
2050                                 goto stderr_out;
2051
2052                         fnvlist_free(sdd.snapholds);
2053                         sdd.snapholds = NULL;
2054                 }
2055
2056                 sdd.dryrun = B_FALSE;
2057                 sdd.verbose = B_FALSE;
2058         }
2059
2060         err = dump_filesystems(zhp, &sdd);
2061         fsavl_destroy(fsavl);
2062         nvlist_free(fss);
2063
2064         /* Ensure no snaps found is treated as an error. */
2065         if (err == 0 && !sdd.seento)
2066                 err = ENOENT;
2067
2068         if (tid != 0) {
2069                 if (err != 0)
2070                         (void) pthread_cancel(tid);
2071                 (void) close(pipefd[0]);
2072                 (void) pthread_join(tid, NULL);
2073         }
2074
2075         if (sdd.cleanup_fd != -1) {
2076                 VERIFY(0 == close(sdd.cleanup_fd));
2077                 sdd.cleanup_fd = -1;
2078         }
2079
2080         if (!flags->dryrun && (flags->replicate || flags->doall ||
2081             flags->props)) {
2082                 /*
2083                  * write final end record.  NB: want to do this even if
2084                  * there was some error, because it might not be totally
2085                  * failed.
2086                  */
2087                 dmu_replay_record_t drr = { 0 };
2088                 drr.drr_type = DRR_END;
2089                 if (write(outfd, &drr, sizeof (drr)) == -1) {
2090                         return (zfs_standard_error(zhp->zfs_hdl,
2091                             errno, errbuf));
2092                 }
2093         }
2094
2095         return (err || sdd.err);
2096
2097 stderr_out:
2098         err = zfs_standard_error(zhp->zfs_hdl, err, errbuf);
2099 err_out:
2100         fsavl_destroy(fsavl);
2101         nvlist_free(fss);
2102         fnvlist_free(sdd.snapholds);
2103
2104         if (sdd.cleanup_fd != -1)
2105                 VERIFY(0 == close(sdd.cleanup_fd));
2106         if (tid != 0) {
2107                 (void) pthread_cancel(tid);
2108                 (void) close(pipefd[0]);
2109                 (void) pthread_join(tid, NULL);
2110         }
2111         return (err);
2112 }
2113
2114 int
2115 zfs_send_one(zfs_handle_t *zhp, const char *from, int fd, sendflags_t flags)
2116 {
2117         int err = 0;
2118         libzfs_handle_t *hdl = zhp->zfs_hdl;
2119         enum lzc_send_flags lzc_flags = 0;
2120         FILE *fout = (flags.verbose && flags.dryrun) ? stdout : stderr;
2121         char errbuf[1024];
2122
2123         if (flags.largeblock)
2124                 lzc_flags |= LZC_SEND_FLAG_LARGE_BLOCK;
2125         if (flags.embed_data)
2126                 lzc_flags |= LZC_SEND_FLAG_EMBED_DATA;
2127         if (flags.compress)
2128                 lzc_flags |= LZC_SEND_FLAG_COMPRESS;
2129         if (flags.raw)
2130                 lzc_flags |= LZC_SEND_FLAG_RAW;
2131
2132         if (flags.verbose) {
2133                 uint64_t size = 0;
2134                 err = lzc_send_space(zhp->zfs_name, from, lzc_flags, &size);
2135                 if (err == 0) {
2136                         send_print_verbose(fout, zhp->zfs_name, from, size,
2137                             flags.parsable);
2138                 } else {
2139                         (void) fprintf(stderr, "Cannot estimate send size: "
2140                             "%s\n", strerror(errno));
2141                 }
2142         }
2143
2144         if (flags.dryrun)
2145                 return (err);
2146
2147         (void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
2148             "warning: cannot send '%s'"), zhp->zfs_name);
2149
2150         err = lzc_send(zhp->zfs_name, from, fd, lzc_flags);
2151         if (err != 0) {
2152                 switch (errno) {
2153                 case EXDEV:
2154                         zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
2155                             "not an earlier snapshot from the same fs"));
2156                         return (zfs_error(hdl, EZFS_CROSSTARGET, errbuf));
2157
2158                 case ENOENT:
2159                 case ESRCH:
2160                         if (lzc_exists(zhp->zfs_name)) {
2161                                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
2162                                     "incremental source (%s) does not exist"),
2163                                     from);
2164                         }
2165                         return (zfs_error(hdl, EZFS_NOENT, errbuf));
2166
2167                 case EACCES:
2168                         zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
2169                             "dataset key must be loaded"));
2170                         return (zfs_error(hdl, EZFS_CRYPTOFAILED, errbuf));
2171
2172                 case EBUSY:
2173                         zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
2174                             "target is busy; if a filesystem, "
2175                             "it must not be mounted"));
2176                         return (zfs_error(hdl, EZFS_BUSY, errbuf));
2177
2178                 case EDQUOT:
2179                 case EFBIG:
2180                 case EIO:
2181                 case ENOLINK:
2182                 case ENOSPC:
2183                 case ENOSTR:
2184                 case ENXIO:
2185                 case EPIPE:
2186                 case ERANGE:
2187                 case EFAULT:
2188                 case EROFS:
2189                         zfs_error_aux(hdl, strerror(errno));
2190                         return (zfs_error(hdl, EZFS_BADBACKUP, errbuf));
2191
2192                 default:
2193                         return (zfs_standard_error(hdl, errno, errbuf));
2194                 }
2195         }
2196         return (err != 0);
2197 }
2198
2199 /*
2200  * Routines specific to "zfs recv"
2201  */
2202
2203 static int
2204 recv_read(libzfs_handle_t *hdl, int fd, void *buf, int ilen,
2205     boolean_t byteswap, zio_cksum_t *zc)
2206 {
2207         char *cp = buf;
2208         int rv;
2209         int len = ilen;
2210
2211         assert(ilen <= SPA_MAXBLOCKSIZE);
2212
2213         do {
2214                 rv = read(fd, cp, len);
2215                 cp += rv;
2216                 len -= rv;
2217         } while (rv > 0);
2218
2219         if (rv < 0 || len != 0) {
2220                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
2221                     "failed to read from stream"));
2222                 return (zfs_error(hdl, EZFS_BADSTREAM, dgettext(TEXT_DOMAIN,
2223                     "cannot receive")));
2224         }
2225
2226         if (zc) {
2227                 if (byteswap)
2228                         fletcher_4_incremental_byteswap(buf, ilen, zc);
2229                 else
2230                         fletcher_4_incremental_native(buf, ilen, zc);
2231         }
2232         return (0);
2233 }
2234
2235 static int
2236 recv_read_nvlist(libzfs_handle_t *hdl, int fd, int len, nvlist_t **nvp,
2237     boolean_t byteswap, zio_cksum_t *zc)
2238 {
2239         char *buf;
2240         int err;
2241
2242         buf = zfs_alloc(hdl, len);
2243         if (buf == NULL)
2244                 return (ENOMEM);
2245
2246         err = recv_read(hdl, fd, buf, len, byteswap, zc);
2247         if (err != 0) {
2248                 free(buf);
2249                 return (err);
2250         }
2251
2252         err = nvlist_unpack(buf, len, nvp, 0);
2253         free(buf);
2254         if (err != 0) {
2255                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "invalid "
2256                     "stream (malformed nvlist)"));
2257                 return (EINVAL);
2258         }
2259         return (0);
2260 }
2261
2262 /*
2263  * Returns the grand origin (origin of origin of origin...) of a given handle.
2264  * If this dataset is not a clone, it simply returns a copy of the original
2265  * handle.
2266  */
2267 static zfs_handle_t *
2268 recv_open_grand_origin(zfs_handle_t *zhp)
2269 {
2270         char origin[ZFS_MAX_DATASET_NAME_LEN];
2271         zprop_source_t src;
2272         zfs_handle_t *ozhp = zfs_handle_dup(zhp);
2273
2274         while (ozhp != NULL) {
2275                 if (zfs_prop_get(ozhp, ZFS_PROP_ORIGIN, origin,
2276                     sizeof (origin), &src, NULL, 0, B_FALSE) != 0)
2277                         break;
2278
2279                 (void) zfs_close(ozhp);
2280                 ozhp = zfs_open(zhp->zfs_hdl, origin, ZFS_TYPE_FILESYSTEM);
2281         }
2282
2283         return (ozhp);
2284 }
2285
2286 static int
2287 recv_rename_impl(zfs_handle_t *zhp, zfs_cmd_t *zc)
2288 {
2289         int err;
2290         zfs_handle_t *ozhp = NULL;
2291
2292         /*
2293          * Attempt to rename the dataset. If it fails with EACCES we have
2294          * attempted to rename the dataset outside of its encryption root.
2295          * Force the dataset to become an encryption root and try again.
2296          */
2297         err = ioctl(zhp->zfs_hdl->libzfs_fd, ZFS_IOC_RENAME, &zc);
2298         if (err == EACCES) {
2299                 ozhp = recv_open_grand_origin(zhp);
2300                 if (ozhp == NULL) {
2301                         err = ENOENT;
2302                         goto out;
2303                 }
2304
2305                 err = lzc_change_key(ozhp->zfs_name, DCP_CMD_FORCE_NEW_KEY,
2306                     NULL, NULL, 0);
2307                 if (err != 0)
2308                         goto out;
2309
2310                 err = ioctl(zhp->zfs_hdl->libzfs_fd, ZFS_IOC_RENAME, &zc);
2311         }
2312
2313 out:
2314         if (ozhp != NULL)
2315                 zfs_close(ozhp);
2316         return (err);
2317 }
2318
2319 static int
2320 recv_rename(libzfs_handle_t *hdl, const char *name, const char *tryname,
2321     int baselen, char *newname, recvflags_t *flags)
2322 {
2323         static int seq;
2324         zfs_cmd_t zc = {"\0"};
2325         int err;
2326         prop_changelist_t *clp = NULL;
2327         zfs_handle_t *zhp = NULL;
2328
2329         zhp = zfs_open(hdl, name, ZFS_TYPE_DATASET);
2330         if (zhp == NULL) {
2331                 err = -1;
2332                 goto out;
2333         }
2334         clp = changelist_gather(zhp, ZFS_PROP_NAME, 0,
2335             flags->force ? MS_FORCE : 0);
2336         if (clp == NULL) {
2337                 err = -1;
2338                 goto out;
2339         }
2340         err = changelist_prefix(clp);
2341         if (err)
2342                 goto out;
2343
2344         zc.zc_objset_type = DMU_OST_ZFS;
2345         (void) strlcpy(zc.zc_name, name, sizeof (zc.zc_name));
2346
2347         if (tryname) {
2348                 (void) strcpy(newname, tryname);
2349
2350                 (void) strlcpy(zc.zc_value, tryname, sizeof (zc.zc_value));
2351
2352                 if (flags->verbose) {
2353                         (void) printf("attempting rename %s to %s\n",
2354                             zc.zc_name, zc.zc_value);
2355                 }
2356                 err = recv_rename_impl(zhp, &zc);
2357                 if (err == 0)
2358                         changelist_rename(clp, name, tryname);
2359         } else {
2360                 err = ENOENT;
2361         }
2362
2363         if (err != 0 && strncmp(name + baselen, "recv-", 5) != 0) {
2364                 seq++;
2365
2366                 (void) snprintf(newname, ZFS_MAX_DATASET_NAME_LEN,
2367                     "%.*srecv-%u-%u", baselen, name, getpid(), seq);
2368                 (void) strlcpy(zc.zc_value, newname, sizeof (zc.zc_value));
2369
2370                 if (flags->verbose) {
2371                         (void) printf("failed - trying rename %s to %s\n",
2372                             zc.zc_name, zc.zc_value);
2373                 }
2374                 err = recv_rename_impl(zhp, &zc);
2375                 if (err == 0)
2376                         changelist_rename(clp, name, newname);
2377                 if (err && flags->verbose) {
2378                         (void) printf("failed (%u) - "
2379                             "will try again on next pass\n", errno);
2380                 }
2381                 err = EAGAIN;
2382         } else if (flags->verbose) {
2383                 if (err == 0)
2384                         (void) printf("success\n");
2385                 else
2386                         (void) printf("failed (%u)\n", errno);
2387         }
2388
2389         (void) changelist_postfix(clp);
2390
2391 out:
2392         if (clp != NULL)
2393                 changelist_free(clp);
2394         if (zhp != NULL)
2395                 zfs_close(zhp);
2396
2397         return (err);
2398 }
2399
2400 static int
2401 recv_promote(libzfs_handle_t *hdl, const char *fsname,
2402     const char *origin_fsname, recvflags_t *flags)
2403 {
2404         int err;
2405         zfs_cmd_t zc = {"\0"};
2406         zfs_handle_t *zhp = NULL, *ozhp = NULL;
2407
2408         if (flags->verbose)
2409                 (void) printf("promoting %s\n", fsname);
2410
2411         (void) strlcpy(zc.zc_value, origin_fsname, sizeof (zc.zc_value));
2412         (void) strlcpy(zc.zc_name, fsname, sizeof (zc.zc_name));
2413
2414         /*
2415          * Attempt to promote the dataset. If it fails with EACCES the
2416          * promotion would cause this dataset to leave its encryption root.
2417          * Force the origin to become an encryption root and try again.
2418          */
2419         err = zfs_ioctl(hdl, ZFS_IOC_PROMOTE, &zc);
2420         if (err == EACCES) {
2421                 zhp = zfs_open(hdl, fsname, ZFS_TYPE_DATASET);
2422                 if (zhp == NULL) {
2423                         err = -1;
2424                         goto out;
2425                 }
2426
2427                 ozhp = recv_open_grand_origin(zhp);
2428                 if (ozhp == NULL) {
2429                         err = -1;
2430                         goto out;
2431                 }
2432
2433                 err = lzc_change_key(ozhp->zfs_name, DCP_CMD_FORCE_NEW_KEY,
2434                     NULL, NULL, 0);
2435                 if (err != 0)
2436                         goto out;
2437
2438                 err = zfs_ioctl(hdl, ZFS_IOC_PROMOTE, &zc);
2439         }
2440
2441 out:
2442         if (zhp != NULL)
2443                 zfs_close(zhp);
2444         if (ozhp != NULL)
2445                 zfs_close(ozhp);
2446
2447         return (err);
2448 }
2449
2450 static int
2451 recv_destroy(libzfs_handle_t *hdl, const char *name, int baselen,
2452     char *newname, recvflags_t *flags)
2453 {
2454         zfs_cmd_t zc = {"\0"};
2455         int err = 0;
2456         prop_changelist_t *clp;
2457         zfs_handle_t *zhp;
2458         boolean_t defer = B_FALSE;
2459         int spa_version;
2460
2461         zhp = zfs_open(hdl, name, ZFS_TYPE_DATASET);
2462         if (zhp == NULL)
2463                 return (-1);
2464         clp = changelist_gather(zhp, ZFS_PROP_NAME, 0,
2465             flags->force ? MS_FORCE : 0);
2466         if (zfs_get_type(zhp) == ZFS_TYPE_SNAPSHOT &&
2467             zfs_spa_version(zhp, &spa_version) == 0 &&
2468             spa_version >= SPA_VERSION_USERREFS)
2469                 defer = B_TRUE;
2470         zfs_close(zhp);
2471         if (clp == NULL)
2472                 return (-1);
2473         err = changelist_prefix(clp);
2474         if (err)
2475                 return (err);
2476
2477         zc.zc_objset_type = DMU_OST_ZFS;
2478         zc.zc_defer_destroy = defer;
2479         (void) strlcpy(zc.zc_name, name, sizeof (zc.zc_name));
2480
2481         if (flags->verbose)
2482                 (void) printf("attempting destroy %s\n", zc.zc_name);
2483         err = ioctl(hdl->libzfs_fd, ZFS_IOC_DESTROY, &zc);
2484         if (err == 0) {
2485                 if (flags->verbose)
2486                         (void) printf("success\n");
2487                 changelist_remove(clp, zc.zc_name);
2488         }
2489
2490         (void) changelist_postfix(clp);
2491         changelist_free(clp);
2492
2493         /*
2494          * Deferred destroy might destroy the snapshot or only mark it to be
2495          * destroyed later, and it returns success in either case.
2496          */
2497         if (err != 0 || (defer && zfs_dataset_exists(hdl, name,
2498             ZFS_TYPE_SNAPSHOT))) {
2499                 err = recv_rename(hdl, name, NULL, baselen, newname, flags);
2500         }
2501
2502         return (err);
2503 }
2504
2505 typedef struct guid_to_name_data {
2506         uint64_t guid;
2507         boolean_t bookmark_ok;
2508         char *name;
2509         char *skip;
2510 } guid_to_name_data_t;
2511
2512 static int
2513 guid_to_name_cb(zfs_handle_t *zhp, void *arg)
2514 {
2515         guid_to_name_data_t *gtnd = arg;
2516         const char *slash;
2517         int err;
2518
2519         if (gtnd->skip != NULL &&
2520             (slash = strrchr(zhp->zfs_name, '/')) != NULL &&
2521             strcmp(slash + 1, gtnd->skip) == 0) {
2522                 zfs_close(zhp);
2523                 return (0);
2524         }
2525
2526         if (zfs_prop_get_int(zhp, ZFS_PROP_GUID) == gtnd->guid) {
2527                 (void) strcpy(gtnd->name, zhp->zfs_name);
2528                 zfs_close(zhp);
2529                 return (EEXIST);
2530         }
2531
2532         err = zfs_iter_children(zhp, guid_to_name_cb, gtnd);
2533         if (err != EEXIST && gtnd->bookmark_ok)
2534                 err = zfs_iter_bookmarks(zhp, guid_to_name_cb, gtnd);
2535         zfs_close(zhp);
2536         return (err);
2537 }
2538
2539 /*
2540  * Attempt to find the local dataset associated with this guid.  In the case of
2541  * multiple matches, we attempt to find the "best" match by searching
2542  * progressively larger portions of the hierarchy.  This allows one to send a
2543  * tree of datasets individually and guarantee that we will find the source
2544  * guid within that hierarchy, even if there are multiple matches elsewhere.
2545  */
2546 static int
2547 guid_to_name(libzfs_handle_t *hdl, const char *parent, uint64_t guid,
2548     boolean_t bookmark_ok, char *name)
2549 {
2550         char pname[ZFS_MAX_DATASET_NAME_LEN];
2551         guid_to_name_data_t gtnd;
2552
2553         gtnd.guid = guid;
2554         gtnd.bookmark_ok = bookmark_ok;
2555         gtnd.name = name;
2556         gtnd.skip = NULL;
2557
2558         /*
2559          * Search progressively larger portions of the hierarchy, starting
2560          * with the filesystem specified by 'parent'.  This will
2561          * select the "most local" version of the origin snapshot in the case
2562          * that there are multiple matching snapshots in the system.
2563          */
2564         (void) strlcpy(pname, parent, sizeof (pname));
2565         char *cp = strrchr(pname, '@');
2566         if (cp == NULL)
2567                 cp = strchr(pname, '\0');
2568         for (; cp != NULL; cp = strrchr(pname, '/')) {
2569                 /* Chop off the last component and open the parent */
2570                 *cp = '\0';
2571                 zfs_handle_t *zhp = make_dataset_handle(hdl, pname);
2572
2573                 if (zhp == NULL)
2574                         continue;
2575                 int err = guid_to_name_cb(zfs_handle_dup(zhp), &gtnd);
2576                 if (err != EEXIST)
2577                         err = zfs_iter_children(zhp, guid_to_name_cb, &gtnd);
2578                 if (err != EEXIST && bookmark_ok)
2579                         err = zfs_iter_bookmarks(zhp, guid_to_name_cb, &gtnd);
2580                 zfs_close(zhp);
2581                 if (err == EEXIST)
2582                         return (0);
2583
2584                 /*
2585                  * Remember the last portion of the dataset so we skip it next
2586                  * time through (as we've already searched that portion of the
2587                  * hierarchy).
2588                  */
2589                 gtnd.skip = strrchr(pname, '/') + 1;
2590         }
2591
2592         return (ENOENT);
2593 }
2594
2595 /*
2596  * Return +1 if guid1 is before guid2, 0 if they are the same, and -1 if
2597  * guid1 is after guid2.
2598  */
2599 static int
2600 created_before(libzfs_handle_t *hdl, avl_tree_t *avl,
2601     uint64_t guid1, uint64_t guid2)
2602 {
2603         nvlist_t *nvfs;
2604         char *fsname = NULL, *snapname = NULL;
2605         char buf[ZFS_MAX_DATASET_NAME_LEN];
2606         int rv;
2607         zfs_handle_t *guid1hdl, *guid2hdl;
2608         uint64_t create1, create2;
2609
2610         if (guid2 == 0)
2611                 return (0);
2612         if (guid1 == 0)
2613                 return (1);
2614
2615         nvfs = fsavl_find(avl, guid1, &snapname);
2616         VERIFY(0 == nvlist_lookup_string(nvfs, "name", &fsname));
2617         (void) snprintf(buf, sizeof (buf), "%s@%s", fsname, snapname);
2618         guid1hdl = zfs_open(hdl, buf, ZFS_TYPE_SNAPSHOT);
2619         if (guid1hdl == NULL)
2620                 return (-1);
2621
2622         nvfs = fsavl_find(avl, guid2, &snapname);
2623         VERIFY(0 == nvlist_lookup_string(nvfs, "name", &fsname));
2624         (void) snprintf(buf, sizeof (buf), "%s@%s", fsname, snapname);
2625         guid2hdl = zfs_open(hdl, buf, ZFS_TYPE_SNAPSHOT);
2626         if (guid2hdl == NULL) {
2627                 zfs_close(guid1hdl);
2628                 return (-1);
2629         }
2630
2631         create1 = zfs_prop_get_int(guid1hdl, ZFS_PROP_CREATETXG);
2632         create2 = zfs_prop_get_int(guid2hdl, ZFS_PROP_CREATETXG);
2633
2634         if (create1 < create2)
2635                 rv = -1;
2636         else if (create1 > create2)
2637                 rv = +1;
2638         else
2639                 rv = 0;
2640
2641         zfs_close(guid1hdl);
2642         zfs_close(guid2hdl);
2643
2644         return (rv);
2645 }
2646
2647 /*
2648  * This function reestablishes the heirarchy of encryption roots after a
2649  * recursive incremental receive has completed. This must be done after the
2650  * second call to recv_incremental_replication() has renamed and promoted all
2651  * sent datasets to their final locations in the dataset heriarchy.
2652  */
2653 static int
2654 recv_fix_encryption_heirarchy(libzfs_handle_t *hdl, const char *destname,
2655     nvlist_t *stream_nv, avl_tree_t *stream_avl)
2656 {
2657         int err;
2658         nvpair_t *fselem = NULL;
2659         nvlist_t *stream_fss;
2660         char *cp;
2661         char top_zfs[ZFS_MAX_DATASET_NAME_LEN];
2662
2663         (void) strcpy(top_zfs, destname);
2664         cp = strrchr(top_zfs, '@');
2665         if (cp != NULL)
2666                 *cp = '\0';
2667
2668         VERIFY(0 == nvlist_lookup_nvlist(stream_nv, "fss", &stream_fss));
2669
2670         while ((fselem = nvlist_next_nvpair(stream_fss, fselem)) != NULL) {
2671                 zfs_handle_t *zhp = NULL;
2672                 uint64_t crypt;
2673                 nvlist_t *snaps, *props, *stream_nvfs = NULL;
2674                 nvpair_t *snapel = NULL;
2675                 boolean_t is_encroot, is_clone, stream_encroot;
2676                 char *cp;
2677                 char *stream_keylocation = NULL;
2678                 char keylocation[MAXNAMELEN];
2679                 char fsname[ZFS_MAX_DATASET_NAME_LEN];
2680
2681                 keylocation[0] = '\0';
2682                 VERIFY(0 == nvpair_value_nvlist(fselem, &stream_nvfs));
2683                 VERIFY(0 == nvlist_lookup_nvlist(stream_nvfs, "snaps", &snaps));
2684                 VERIFY(0 == nvlist_lookup_nvlist(stream_nvfs, "props", &props));
2685                 stream_encroot = nvlist_exists(stream_nvfs, "is_encroot");
2686
2687                 /* find a snapshot from the stream that exists locally */
2688                 err = ENOENT;
2689                 while ((snapel = nvlist_next_nvpair(snaps, snapel)) != NULL) {
2690                         uint64_t guid;
2691
2692                         VERIFY(0 == nvpair_value_uint64(snapel, &guid));
2693                         err = guid_to_name(hdl, destname, guid, B_FALSE,
2694                             fsname);
2695                         if (err == 0)
2696                                 break;
2697                 }
2698
2699                 if (err != 0)
2700                         continue;
2701
2702                 cp = strchr(fsname, '@');
2703                 if (cp != NULL)
2704                         *cp = '\0';
2705
2706                 zhp = zfs_open(hdl, fsname, ZFS_TYPE_DATASET);
2707                 if (zhp == NULL) {
2708                         err = ENOENT;
2709                         goto error;
2710                 }
2711
2712                 crypt = zfs_prop_get_int(zhp, ZFS_PROP_ENCRYPTION);
2713                 is_clone = zhp->zfs_dmustats.dds_origin[0] != '\0';
2714                 (void) zfs_crypto_get_encryption_root(zhp, &is_encroot, NULL);
2715
2716                 /* we don't need to do anything for unencrypted filesystems */
2717                 if (crypt == ZIO_CRYPT_OFF) {
2718                         zfs_close(zhp);
2719                         continue;
2720                 }
2721
2722                 /*
2723                  * If the dataset is flagged as an encryption root, was not
2724                  * received as a clone and is not currently an encryption root,
2725                  * force it to become one. Fixup the keylocation if necessary.
2726                  */
2727                 if (stream_encroot) {
2728                         if (!is_clone && !is_encroot) {
2729                                 err = lzc_change_key(fsname,
2730                                     DCP_CMD_FORCE_NEW_KEY, NULL, NULL, 0);
2731                                 if (err != 0) {
2732                                         zfs_close(zhp);
2733                                         goto error;
2734                                 }
2735                         }
2736
2737                         VERIFY(0 == nvlist_lookup_string(props,
2738                             zfs_prop_to_name(ZFS_PROP_KEYLOCATION),
2739                             &stream_keylocation));
2740
2741                         /*
2742                          * Refresh the properties in case the call to
2743                          * lzc_change_key() changed the value.
2744                          */
2745                         zfs_refresh_properties(zhp);
2746                         err = zfs_prop_get(zhp, ZFS_PROP_KEYLOCATION,
2747                             keylocation, sizeof (keylocation), NULL, NULL,
2748                             0, B_TRUE);
2749                         if (err != 0) {
2750                                 zfs_close(zhp);
2751                                 goto error;
2752                         }
2753
2754                         if (strcmp(keylocation, stream_keylocation) != 0) {
2755                                 err = zfs_prop_set(zhp,
2756                                     zfs_prop_to_name(ZFS_PROP_KEYLOCATION),
2757                                     stream_keylocation);
2758                                 if (err != 0) {
2759                                         zfs_close(zhp);
2760                                         goto error;
2761                                 }
2762                         }
2763                 }
2764
2765                 /*
2766                  * If the dataset is not flagged as an encryption root and is
2767                  * currently an encryption root, force it to inherit from its
2768                  * parent. The root of a raw send should never be
2769                  * force-inherited.
2770                  */
2771                 if (!stream_encroot && is_encroot &&
2772                     strcmp(top_zfs, fsname) != 0) {
2773                         err = lzc_change_key(fsname, DCP_CMD_FORCE_INHERIT,
2774                             NULL, NULL, 0);
2775                         if (err != 0) {
2776                                 zfs_close(zhp);
2777                                 goto error;
2778                         }
2779                 }
2780
2781                 zfs_close(zhp);
2782         }
2783
2784         return (0);
2785
2786 error:
2787         return (err);
2788 }
2789
2790 static int
2791 recv_incremental_replication(libzfs_handle_t *hdl, const char *tofs,
2792     recvflags_t *flags, nvlist_t *stream_nv, avl_tree_t *stream_avl,
2793     nvlist_t *renamed)
2794 {
2795         nvlist_t *local_nv, *deleted = NULL;
2796         avl_tree_t *local_avl;
2797         nvpair_t *fselem, *nextfselem;
2798         char *fromsnap;
2799         char newname[ZFS_MAX_DATASET_NAME_LEN];
2800         char guidname[32];
2801         int error;
2802         boolean_t needagain, progress, recursive;
2803         char *s1, *s2;
2804
2805         VERIFY(0 == nvlist_lookup_string(stream_nv, "fromsnap", &fromsnap));
2806
2807         recursive = (nvlist_lookup_boolean(stream_nv, "not_recursive") ==
2808             ENOENT);
2809
2810         if (flags->dryrun)
2811                 return (0);
2812
2813 again:
2814         needagain = progress = B_FALSE;
2815
2816         VERIFY(0 == nvlist_alloc(&deleted, NV_UNIQUE_NAME, 0));
2817
2818         if ((error = gather_nvlist(hdl, tofs, fromsnap, NULL,
2819             recursive, B_TRUE, B_FALSE, &local_nv, &local_avl)) != 0)
2820                 return (error);
2821
2822         /*
2823          * Process deletes and renames
2824          */
2825         for (fselem = nvlist_next_nvpair(local_nv, NULL);
2826             fselem; fselem = nextfselem) {
2827                 nvlist_t *nvfs, *snaps;
2828                 nvlist_t *stream_nvfs = NULL;
2829                 nvpair_t *snapelem, *nextsnapelem;
2830                 uint64_t fromguid = 0;
2831                 uint64_t originguid = 0;
2832                 uint64_t stream_originguid = 0;
2833                 uint64_t parent_fromsnap_guid, stream_parent_fromsnap_guid;
2834                 char *fsname, *stream_fsname;
2835
2836                 nextfselem = nvlist_next_nvpair(local_nv, fselem);
2837
2838                 VERIFY(0 == nvpair_value_nvlist(fselem, &nvfs));
2839                 VERIFY(0 == nvlist_lookup_nvlist(nvfs, "snaps", &snaps));
2840                 VERIFY(0 == nvlist_lookup_string(nvfs, "name", &fsname));
2841                 VERIFY(0 == nvlist_lookup_uint64(nvfs, "parentfromsnap",
2842                     &parent_fromsnap_guid));
2843                 (void) nvlist_lookup_uint64(nvfs, "origin", &originguid);
2844
2845                 /*
2846                  * First find the stream's fs, so we can check for
2847                  * a different origin (due to "zfs promote")
2848                  */
2849                 for (snapelem = nvlist_next_nvpair(snaps, NULL);
2850                     snapelem; snapelem = nvlist_next_nvpair(snaps, snapelem)) {
2851                         uint64_t thisguid;
2852
2853                         VERIFY(0 == nvpair_value_uint64(snapelem, &thisguid));
2854                         stream_nvfs = fsavl_find(stream_avl, thisguid, NULL);
2855
2856                         if (stream_nvfs != NULL)
2857                                 break;
2858                 }
2859
2860                 /* check for promote */
2861                 (void) nvlist_lookup_uint64(stream_nvfs, "origin",
2862                     &stream_originguid);
2863                 if (stream_nvfs && originguid != stream_originguid) {
2864                         switch (created_before(hdl, local_avl,
2865                             stream_originguid, originguid)) {
2866                         case 1: {
2867                                 /* promote it! */
2868                                 nvlist_t *origin_nvfs;
2869                                 char *origin_fsname;
2870
2871                                 origin_nvfs = fsavl_find(local_avl, originguid,
2872                                     NULL);
2873                                 VERIFY(0 == nvlist_lookup_string(origin_nvfs,
2874                                     "name", &origin_fsname));
2875                                 error = recv_promote(hdl, fsname, origin_fsname,
2876                                     flags);
2877                                 if (error == 0)
2878                                         progress = B_TRUE;
2879                                 break;
2880                         }
2881                         default:
2882                                 break;
2883                         case -1:
2884                                 fsavl_destroy(local_avl);
2885                                 nvlist_free(local_nv);
2886                                 return (-1);
2887                         }
2888                         /*
2889                          * We had/have the wrong origin, therefore our
2890                          * list of snapshots is wrong.  Need to handle
2891                          * them on the next pass.
2892                          */
2893                         needagain = B_TRUE;
2894                         continue;
2895                 }
2896
2897                 for (snapelem = nvlist_next_nvpair(snaps, NULL);
2898                     snapelem; snapelem = nextsnapelem) {
2899                         uint64_t thisguid;
2900                         char *stream_snapname;
2901                         nvlist_t *found, *props;
2902
2903                         nextsnapelem = nvlist_next_nvpair(snaps, snapelem);
2904
2905                         VERIFY(0 == nvpair_value_uint64(snapelem, &thisguid));
2906                         found = fsavl_find(stream_avl, thisguid,
2907                             &stream_snapname);
2908
2909                         /* check for delete */
2910                         if (found == NULL) {
2911                                 char name[ZFS_MAX_DATASET_NAME_LEN];
2912
2913                                 if (!flags->force)
2914                                         continue;
2915
2916                                 (void) snprintf(name, sizeof (name), "%s@%s",
2917                                     fsname, nvpair_name(snapelem));
2918
2919                                 error = recv_destroy(hdl, name,
2920                                     strlen(fsname)+1, newname, flags);
2921                                 if (error)
2922                                         needagain = B_TRUE;
2923                                 else
2924                                         progress = B_TRUE;
2925                                 sprintf(guidname, "%llu",
2926                                     (u_longlong_t)thisguid);
2927                                 nvlist_add_boolean(deleted, guidname);
2928                                 continue;
2929                         }
2930
2931                         stream_nvfs = found;
2932
2933                         if (0 == nvlist_lookup_nvlist(stream_nvfs, "snapprops",
2934                             &props) && 0 == nvlist_lookup_nvlist(props,
2935                             stream_snapname, &props)) {
2936                                 zfs_cmd_t zc = {"\0"};
2937
2938                                 zc.zc_cookie = B_TRUE; /* received */
2939                                 (void) snprintf(zc.zc_name, sizeof (zc.zc_name),
2940                                     "%s@%s", fsname, nvpair_name(snapelem));
2941                                 if (zcmd_write_src_nvlist(hdl, &zc,
2942                                     props) == 0) {
2943                                         (void) zfs_ioctl(hdl,
2944                                             ZFS_IOC_SET_PROP, &zc);
2945                                         zcmd_free_nvlists(&zc);
2946                                 }
2947                         }
2948
2949                         /* check for different snapname */
2950                         if (strcmp(nvpair_name(snapelem),
2951                             stream_snapname) != 0) {
2952                                 char name[ZFS_MAX_DATASET_NAME_LEN];
2953                                 char tryname[ZFS_MAX_DATASET_NAME_LEN];
2954
2955                                 (void) snprintf(name, sizeof (name), "%s@%s",
2956                                     fsname, nvpair_name(snapelem));
2957                                 (void) snprintf(tryname, sizeof (name), "%s@%s",
2958                                     fsname, stream_snapname);
2959
2960                                 error = recv_rename(hdl, name, tryname,
2961                                     strlen(fsname)+1, newname, flags);
2962                                 if (error)
2963                                         needagain = B_TRUE;
2964                                 else
2965                                         progress = B_TRUE;
2966                         }
2967
2968                         if (strcmp(stream_snapname, fromsnap) == 0)
2969                                 fromguid = thisguid;
2970                 }
2971
2972                 /* check for delete */
2973                 if (stream_nvfs == NULL) {
2974                         if (!flags->force)
2975                                 continue;
2976
2977                         error = recv_destroy(hdl, fsname, strlen(tofs)+1,
2978                             newname, flags);
2979                         if (error)
2980                                 needagain = B_TRUE;
2981                         else
2982                                 progress = B_TRUE;
2983                         sprintf(guidname, "%llu",
2984                             (u_longlong_t)parent_fromsnap_guid);
2985                         nvlist_add_boolean(deleted, guidname);
2986                         continue;
2987                 }
2988
2989                 if (fromguid == 0) {
2990                         if (flags->verbose) {
2991                                 (void) printf("local fs %s does not have "
2992                                     "fromsnap (%s in stream); must have "
2993                                     "been deleted locally; ignoring\n",
2994                                     fsname, fromsnap);
2995                         }
2996                         continue;
2997                 }
2998
2999                 VERIFY(0 == nvlist_lookup_string(stream_nvfs,
3000                     "name", &stream_fsname));
3001                 VERIFY(0 == nvlist_lookup_uint64(stream_nvfs,
3002                     "parentfromsnap", &stream_parent_fromsnap_guid));
3003
3004                 s1 = strrchr(fsname, '/');
3005                 s2 = strrchr(stream_fsname, '/');
3006
3007                 /*
3008                  * Check if we're going to rename based on parent guid change
3009                  * and the current parent guid was also deleted. If it was then
3010                  * rename will fail and is likely unneeded, so avoid this and
3011                  * force an early retry to determine the new
3012                  * parent_fromsnap_guid.
3013                  */
3014                 if (stream_parent_fromsnap_guid != 0 &&
3015                     parent_fromsnap_guid != 0 &&
3016                     stream_parent_fromsnap_guid != parent_fromsnap_guid) {
3017                         sprintf(guidname, "%llu",
3018                             (u_longlong_t)parent_fromsnap_guid);
3019                         if (nvlist_exists(deleted, guidname)) {
3020                                 progress = B_TRUE;
3021                                 needagain = B_TRUE;
3022                                 goto doagain;
3023                         }
3024                 }
3025
3026                 /*
3027                  * Check for rename. If the exact receive path is specified, it
3028                  * does not count as a rename, but we still need to check the
3029                  * datasets beneath it.
3030                  */
3031                 if ((stream_parent_fromsnap_guid != 0 &&
3032                     parent_fromsnap_guid != 0 &&
3033                     stream_parent_fromsnap_guid != parent_fromsnap_guid) ||
3034                     ((flags->isprefix || strcmp(tofs, fsname) != 0) &&
3035                     (s1 != NULL) && (s2 != NULL) && strcmp(s1, s2) != 0)) {
3036                         nvlist_t *parent;
3037                         char tryname[ZFS_MAX_DATASET_NAME_LEN];
3038
3039                         parent = fsavl_find(local_avl,
3040                             stream_parent_fromsnap_guid, NULL);
3041                         /*
3042                          * NB: parent might not be found if we used the
3043                          * tosnap for stream_parent_fromsnap_guid,
3044                          * because the parent is a newly-created fs;
3045                          * we'll be able to rename it after we recv the
3046                          * new fs.
3047                          */
3048                         if (parent != NULL) {
3049                                 char *pname;
3050
3051                                 VERIFY(0 == nvlist_lookup_string(parent, "name",
3052                                     &pname));
3053                                 (void) snprintf(tryname, sizeof (tryname),
3054                                     "%s%s", pname, strrchr(stream_fsname, '/'));
3055                         } else {
3056                                 tryname[0] = '\0';
3057                                 if (flags->verbose) {
3058                                         (void) printf("local fs %s new parent "
3059                                             "not found\n", fsname);
3060                                 }
3061                         }
3062
3063                         newname[0] = '\0';
3064
3065                         error = recv_rename(hdl, fsname, tryname,
3066                             strlen(tofs)+1, newname, flags);
3067
3068                         if (renamed != NULL && newname[0] != '\0') {
3069                                 VERIFY(0 == nvlist_add_boolean(renamed,
3070                                     newname));
3071                         }
3072
3073                         if (error)
3074                                 needagain = B_TRUE;
3075                         else
3076                                 progress = B_TRUE;
3077                 }
3078         }
3079
3080 doagain:
3081         fsavl_destroy(local_avl);
3082         nvlist_free(local_nv);
3083         nvlist_free(deleted);
3084
3085         if (needagain && progress) {
3086                 /* do another pass to fix up temporary names */
3087                 if (flags->verbose)
3088                         (void) printf("another pass:\n");
3089                 goto again;
3090         }
3091
3092         return (needagain || error != 0);
3093 }
3094
3095 static int
3096 zfs_receive_package(libzfs_handle_t *hdl, int fd, const char *destname,
3097     recvflags_t *flags, dmu_replay_record_t *drr, zio_cksum_t *zc,
3098     char **top_zfs, int cleanup_fd, uint64_t *action_handlep,
3099     nvlist_t *cmdprops)
3100 {
3101         nvlist_t *stream_nv = NULL;
3102         avl_tree_t *stream_avl = NULL;
3103         char *fromsnap = NULL;
3104         char *sendsnap = NULL;
3105         char *cp;
3106         char tofs[ZFS_MAX_DATASET_NAME_LEN];
3107         char sendfs[ZFS_MAX_DATASET_NAME_LEN];
3108         char errbuf[1024];
3109         dmu_replay_record_t drre;
3110         int error;
3111         boolean_t anyerr = B_FALSE;
3112         boolean_t softerr = B_FALSE;
3113         boolean_t recursive, raw;
3114
3115         (void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
3116             "cannot receive"));
3117
3118         assert(drr->drr_type == DRR_BEGIN);
3119         assert(drr->drr_u.drr_begin.drr_magic == DMU_BACKUP_MAGIC);
3120         assert(DMU_GET_STREAM_HDRTYPE(drr->drr_u.drr_begin.drr_versioninfo) ==
3121             DMU_COMPOUNDSTREAM);
3122
3123         /*
3124          * Read in the nvlist from the stream.
3125          */
3126         if (drr->drr_payloadlen != 0) {
3127                 error = recv_read_nvlist(hdl, fd, drr->drr_payloadlen,
3128                     &stream_nv, flags->byteswap, zc);
3129                 if (error) {
3130                         error = zfs_error(hdl, EZFS_BADSTREAM, errbuf);
3131                         goto out;
3132                 }
3133         }
3134
3135         recursive = (nvlist_lookup_boolean(stream_nv, "not_recursive") ==
3136             ENOENT);
3137         raw = (nvlist_lookup_boolean(stream_nv, "raw") == 0);
3138
3139         if (recursive && strchr(destname, '@')) {
3140                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
3141                     "cannot specify snapshot name for multi-snapshot stream"));
3142                 error = zfs_error(hdl, EZFS_BADSTREAM, errbuf);
3143                 goto out;
3144         }
3145
3146         /*
3147          * Read in the end record and verify checksum.
3148          */
3149         if (0 != (error = recv_read(hdl, fd, &drre, sizeof (drre),
3150             flags->byteswap, NULL)))
3151                 goto out;
3152         if (flags->byteswap) {
3153                 drre.drr_type = BSWAP_32(drre.drr_type);
3154                 drre.drr_u.drr_end.drr_checksum.zc_word[0] =
3155                     BSWAP_64(drre.drr_u.drr_end.drr_checksum.zc_word[0]);
3156                 drre.drr_u.drr_end.drr_checksum.zc_word[1] =
3157                     BSWAP_64(drre.drr_u.drr_end.drr_checksum.zc_word[1]);
3158                 drre.drr_u.drr_end.drr_checksum.zc_word[2] =
3159                     BSWAP_64(drre.drr_u.drr_end.drr_checksum.zc_word[2]);
3160                 drre.drr_u.drr_end.drr_checksum.zc_word[3] =
3161                     BSWAP_64(drre.drr_u.drr_end.drr_checksum.zc_word[3]);
3162         }
3163         if (drre.drr_type != DRR_END) {
3164                 error = zfs_error(hdl, EZFS_BADSTREAM, errbuf);
3165                 goto out;
3166         }
3167         if (!ZIO_CHECKSUM_EQUAL(drre.drr_u.drr_end.drr_checksum, *zc)) {
3168                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
3169                     "incorrect header checksum"));
3170                 error = zfs_error(hdl, EZFS_BADSTREAM, errbuf);
3171                 goto out;
3172         }
3173
3174         (void) nvlist_lookup_string(stream_nv, "fromsnap", &fromsnap);
3175
3176         if (drr->drr_payloadlen != 0) {
3177                 nvlist_t *stream_fss;
3178
3179                 VERIFY(0 == nvlist_lookup_nvlist(stream_nv, "fss",
3180                     &stream_fss));
3181                 if ((stream_avl = fsavl_create(stream_fss)) == NULL) {
3182                         zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
3183                             "couldn't allocate avl tree"));
3184                         error = zfs_error(hdl, EZFS_NOMEM, errbuf);
3185                         goto out;
3186                 }
3187
3188                 if (fromsnap != NULL && recursive) {
3189                         nvlist_t *renamed = NULL;
3190                         nvpair_t *pair = NULL;
3191
3192                         (void) strlcpy(tofs, destname, sizeof (tofs));
3193                         if (flags->isprefix) {
3194                                 struct drr_begin *drrb = &drr->drr_u.drr_begin;
3195                                 int i;
3196
3197                                 if (flags->istail) {
3198                                         cp = strrchr(drrb->drr_toname, '/');
3199                                         if (cp == NULL) {
3200                                                 (void) strlcat(tofs, "/",
3201                                                     sizeof (tofs));
3202                                                 i = 0;
3203                                         } else {
3204                                                 i = (cp - drrb->drr_toname);
3205                                         }
3206                                 } else {
3207                                         i = strcspn(drrb->drr_toname, "/@");
3208                                 }
3209                                 /* zfs_receive_one() will create_parents() */
3210                                 (void) strlcat(tofs, &drrb->drr_toname[i],
3211                                     sizeof (tofs));
3212                                 *strchr(tofs, '@') = '\0';
3213                         }
3214
3215                         if (!flags->dryrun && !flags->nomount) {
3216                                 VERIFY(0 == nvlist_alloc(&renamed,
3217                                     NV_UNIQUE_NAME, 0));
3218                         }
3219
3220                         softerr = recv_incremental_replication(hdl, tofs, flags,
3221                             stream_nv, stream_avl, renamed);
3222
3223                         /* Unmount renamed filesystems before receiving. */
3224                         while ((pair = nvlist_next_nvpair(renamed,
3225                             pair)) != NULL) {
3226                                 zfs_handle_t *zhp;
3227                                 prop_changelist_t *clp = NULL;
3228
3229                                 zhp = zfs_open(hdl, nvpair_name(pair),
3230                                     ZFS_TYPE_FILESYSTEM);
3231                                 if (zhp != NULL) {
3232                                         clp = changelist_gather(zhp,
3233                                             ZFS_PROP_MOUNTPOINT, 0, 0);
3234                                         zfs_close(zhp);
3235                                         if (clp != NULL) {
3236                                                 softerr |=
3237                                                     changelist_prefix(clp);
3238                                                 changelist_free(clp);
3239                                         }
3240                                 }
3241                         }
3242
3243                         nvlist_free(renamed);
3244                 }
3245         }
3246
3247         /*
3248          * Get the fs specified by the first path in the stream (the top level
3249          * specified by 'zfs send') and pass it to each invocation of
3250          * zfs_receive_one().
3251          */
3252         (void) strlcpy(sendfs, drr->drr_u.drr_begin.drr_toname,
3253             sizeof (sendfs));
3254         if ((cp = strchr(sendfs, '@')) != NULL) {
3255                 *cp = '\0';
3256                 /*
3257                  * Find the "sendsnap", the final snapshot in a replication
3258                  * stream.  zfs_receive_one() handles certain errors
3259                  * differently, depending on if the contained stream is the
3260                  * last one or not.
3261                  */
3262                 sendsnap = (cp + 1);
3263         }
3264
3265         /* Finally, receive each contained stream */
3266         do {
3267                 /*
3268                  * we should figure out if it has a recoverable
3269                  * error, in which case do a recv_skip() and drive on.
3270                  * Note, if we fail due to already having this guid,
3271                  * zfs_receive_one() will take care of it (ie,
3272                  * recv_skip() and return 0).
3273                  */
3274                 error = zfs_receive_impl(hdl, destname, NULL, flags, fd,
3275                     sendfs, stream_nv, stream_avl, top_zfs, cleanup_fd,
3276                     action_handlep, sendsnap, cmdprops);
3277                 if (error == ENODATA) {
3278                         error = 0;
3279                         break;
3280                 }
3281                 anyerr |= error;
3282         } while (error == 0);
3283
3284         if (drr->drr_payloadlen != 0 && recursive && fromsnap != NULL) {
3285                 /*
3286                  * Now that we have the fs's they sent us, try the
3287                  * renames again.
3288                  */
3289                 softerr = recv_incremental_replication(hdl, tofs, flags,
3290                     stream_nv, stream_avl, NULL);
3291         }
3292
3293         if (raw && softerr == 0) {
3294                 softerr = recv_fix_encryption_heirarchy(hdl, destname,
3295                     stream_nv, stream_avl);
3296         }
3297
3298 out:
3299         fsavl_destroy(stream_avl);
3300         nvlist_free(stream_nv);
3301         if (softerr)
3302                 error = -2;
3303         if (anyerr)
3304                 error = -1;
3305         return (error);
3306 }
3307
3308 static void
3309 trunc_prop_errs(int truncated)
3310 {
3311         ASSERT(truncated != 0);
3312
3313         if (truncated == 1)
3314                 (void) fprintf(stderr, dgettext(TEXT_DOMAIN,
3315                     "1 more property could not be set\n"));
3316         else
3317                 (void) fprintf(stderr, dgettext(TEXT_DOMAIN,
3318                     "%d more properties could not be set\n"), truncated);
3319 }
3320
3321 static int
3322 recv_skip(libzfs_handle_t *hdl, int fd, boolean_t byteswap)
3323 {
3324         dmu_replay_record_t *drr;
3325         void *buf = zfs_alloc(hdl, SPA_MAXBLOCKSIZE);
3326         char errbuf[1024];
3327
3328         (void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
3329             "cannot receive:"));
3330
3331         /* XXX would be great to use lseek if possible... */
3332         drr = buf;
3333
3334         while (recv_read(hdl, fd, drr, sizeof (dmu_replay_record_t),
3335             byteswap, NULL) == 0) {
3336                 if (byteswap)
3337                         drr->drr_type = BSWAP_32(drr->drr_type);
3338
3339                 switch (drr->drr_type) {
3340                 case DRR_BEGIN:
3341                         if (drr->drr_payloadlen != 0) {
3342                                 (void) recv_read(hdl, fd, buf,
3343                                     drr->drr_payloadlen, B_FALSE, NULL);
3344                         }
3345                         break;
3346
3347                 case DRR_END:
3348                         free(buf);
3349                         return (0);
3350
3351                 case DRR_OBJECT:
3352                         if (byteswap) {
3353                                 drr->drr_u.drr_object.drr_bonuslen =
3354                                     BSWAP_32(drr->drr_u.drr_object.
3355                                     drr_bonuslen);
3356                         }
3357                         (void) recv_read(hdl, fd, buf,
3358                             P2ROUNDUP(drr->drr_u.drr_object.drr_bonuslen, 8),
3359                             B_FALSE, NULL);
3360                         break;
3361
3362                 case DRR_WRITE:
3363                         if (byteswap) {
3364                                 drr->drr_u.drr_write.drr_logical_size =
3365                                     BSWAP_64(
3366                                     drr->drr_u.drr_write.drr_logical_size);
3367                                 drr->drr_u.drr_write.drr_compressed_size =
3368                                     BSWAP_64(
3369                                     drr->drr_u.drr_write.drr_compressed_size);
3370                         }
3371                         uint64_t payload_size =
3372                             DRR_WRITE_PAYLOAD_SIZE(&drr->drr_u.drr_write);
3373                         (void) recv_read(hdl, fd, buf,
3374                             payload_size, B_FALSE, NULL);
3375                         break;
3376                 case DRR_SPILL:
3377                         if (byteswap) {
3378                                 drr->drr_u.drr_spill.drr_length =
3379                                     BSWAP_64(drr->drr_u.drr_spill.drr_length);
3380                         }
3381                         (void) recv_read(hdl, fd, buf,
3382                             drr->drr_u.drr_spill.drr_length, B_FALSE, NULL);
3383                         break;
3384                 case DRR_WRITE_EMBEDDED:
3385                         if (byteswap) {
3386                                 drr->drr_u.drr_write_embedded.drr_psize =
3387                                     BSWAP_32(drr->drr_u.drr_write_embedded.
3388                                     drr_psize);
3389                         }
3390                         (void) recv_read(hdl, fd, buf,
3391                             P2ROUNDUP(drr->drr_u.drr_write_embedded.drr_psize,
3392                             8), B_FALSE, NULL);
3393                         break;
3394                 case DRR_WRITE_BYREF:
3395                 case DRR_FREEOBJECTS:
3396                 case DRR_FREE:
3397                         break;
3398
3399                 default:
3400                         zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
3401                             "invalid record type"));
3402                         free(buf);
3403                         return (zfs_error(hdl, EZFS_BADSTREAM, errbuf));
3404                 }
3405         }
3406
3407         free(buf);
3408         return (-1);
3409 }
3410
3411 static void
3412 recv_ecksum_set_aux(libzfs_handle_t *hdl, const char *target_snap,
3413     boolean_t resumable)
3414 {
3415         char target_fs[ZFS_MAX_DATASET_NAME_LEN];
3416
3417         zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
3418             "checksum mismatch or incomplete stream"));
3419
3420         if (!resumable)
3421                 return;
3422         (void) strlcpy(target_fs, target_snap, sizeof (target_fs));
3423         *strchr(target_fs, '@') = '\0';
3424         zfs_handle_t *zhp = zfs_open(hdl, target_fs,
3425             ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME);
3426         if (zhp == NULL)
3427                 return;
3428
3429         char token_buf[ZFS_MAXPROPLEN];
3430         int error = zfs_prop_get(zhp, ZFS_PROP_RECEIVE_RESUME_TOKEN,
3431             token_buf, sizeof (token_buf),
3432             NULL, NULL, 0, B_TRUE);
3433         if (error == 0) {
3434                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
3435                     "checksum mismatch or incomplete stream.\n"
3436                     "Partially received snapshot is saved.\n"
3437                     "A resuming stream can be generated on the sending "
3438                     "system by running:\n"
3439                     "    zfs send -t %s"),
3440                     token_buf);
3441         }
3442         zfs_close(zhp);
3443 }
3444
3445 /*
3446  * Prepare a new nvlist of properties that are to override (-o) or be excluded
3447  * (-x) from the received dataset
3448  * recvprops: received properties from the send stream
3449  * cmdprops: raw input properties from command line
3450  * origprops: properties, both locally-set and received, currently set on the
3451  *            target dataset if it exists, NULL otherwise.
3452  * oxprops: valid output override (-o) and excluded (-x) properties
3453  */
3454 static int
3455 zfs_setup_cmdline_props(libzfs_handle_t *hdl, zfs_type_t type, boolean_t zoned,
3456     boolean_t recursive, boolean_t toplevel, nvlist_t *recvprops,
3457     nvlist_t *cmdprops, nvlist_t *origprops, nvlist_t **oxprops,
3458     const char *errbuf)
3459 {
3460         nvpair_t *nvp;
3461         nvlist_t *oprops, *voprops;
3462         zfs_handle_t *zhp = NULL;
3463         zpool_handle_t *zpool_hdl = NULL;
3464         int ret = 0;
3465
3466         if (nvlist_empty(cmdprops))
3467                 return (0); /* No properties to override or exclude */
3468
3469         *oxprops = fnvlist_alloc();
3470         oprops = fnvlist_alloc();
3471
3472         /*
3473          * first iteration: process excluded (-x) properties now and gather
3474          * added (-o) properties to be later processed by zfs_valid_proplist()
3475          */
3476         nvp = NULL;
3477         while ((nvp = nvlist_next_nvpair(cmdprops, nvp)) != NULL) {
3478                 const char *name = nvpair_name(nvp);
3479                 zfs_prop_t prop = zfs_name_to_prop(name);
3480
3481                 /* "origin" is processed separately, don't handle it here */
3482                 if (prop == ZFS_PROP_ORIGIN)
3483                         continue;
3484
3485                 /*
3486                  * we're trying to override or exclude a property that does not
3487                  * make sense for this type of dataset, but we don't want to
3488                  * fail if the receive is recursive: this comes in handy when
3489                  * the send stream contains, for instance, a child ZVOL and
3490                  * we're trying to receive it with "-o atime=on"
3491                  */
3492                 if (!zfs_prop_valid_for_type(prop, type, B_FALSE) &&
3493                     !zfs_prop_user(name)) {
3494                         if (recursive)
3495                                 continue;
3496                         zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
3497                             "property '%s' does not apply to datasets of this "
3498                             "type"), name);
3499                         ret = zfs_error(hdl, EZFS_BADPROP, errbuf);
3500                         goto error;
3501                 }
3502
3503                 switch (nvpair_type(nvp)) {
3504                 case DATA_TYPE_BOOLEAN: /* -x property */
3505                         /*
3506                          * DATA_TYPE_BOOLEAN is the way we're asked to "exclude"
3507                          * a property: this is done by forcing an explicit
3508                          * inherit on the destination so the effective value is
3509                          * not the one we received from the send stream.
3510                          * We do this only if the property is not already
3511                          * locally-set, in which case its value will take
3512                          * priority over the received anyway.
3513                          */
3514                         if (nvlist_exists(origprops, name)) {
3515                                 nvlist_t *attrs;
3516
3517                                 attrs = fnvlist_lookup_nvlist(origprops, name);
3518                                 if (strcmp(fnvlist_lookup_string(attrs,
3519                                     ZPROP_SOURCE), ZPROP_SOURCE_VAL_RECVD) != 0)
3520                                         continue;
3521                         }
3522                         /*
3523                          * We can't force an explicit inherit on non-inheritable
3524                          * properties: if we're asked to exclude this kind of
3525                          * values we remove them from "recvprops" input nvlist.
3526                          */
3527                         if (!zfs_prop_inheritable(prop) &&
3528                             !zfs_prop_user(name) && /* can be inherited too */
3529                             nvlist_exists(recvprops, name))
3530                                 fnvlist_remove(recvprops, name);
3531                         else
3532                                 fnvlist_add_nvpair(*oxprops, nvp);
3533                         break;
3534                 case DATA_TYPE_STRING: /* -o property=value */
3535                         fnvlist_add_nvpair(oprops, nvp);
3536                         break;
3537                 default:
3538                         zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
3539                             "property '%s' must be a string or boolean"), name);
3540                         ret = zfs_error(hdl, EZFS_BADPROP, errbuf);
3541                         goto error;
3542                 }
3543         }
3544
3545         if (toplevel) {
3546                 /* convert override strings properties to native */
3547                 if ((voprops = zfs_valid_proplist(hdl, ZFS_TYPE_DATASET,
3548                     oprops, zoned, zhp, zpool_hdl, B_FALSE, errbuf)) == NULL) {
3549                         ret = zfs_error(hdl, EZFS_BADPROP, errbuf);
3550                         goto error;
3551                 }
3552
3553                 /* second pass: process "-o" properties */
3554                 fnvlist_merge(*oxprops, voprops);
3555                 fnvlist_free(voprops);
3556         } else {
3557                 /* override props on child dataset are inherited */
3558                 nvp = NULL;
3559                 while ((nvp = nvlist_next_nvpair(oprops, nvp)) != NULL) {
3560                         const char *name = nvpair_name(nvp);
3561                         fnvlist_add_boolean(*oxprops, name);
3562                 }
3563         }
3564
3565 error:
3566         fnvlist_free(oprops);
3567         return (ret);
3568 }
3569
3570 /*
3571  * Restores a backup of tosnap from the file descriptor specified by infd.
3572  */
3573 static int
3574 zfs_receive_one(libzfs_handle_t *hdl, int infd, const char *tosnap,
3575     const char *originsnap, recvflags_t *flags, dmu_replay_record_t *drr,
3576     dmu_replay_record_t *drr_noswap, const char *sendfs, nvlist_t *stream_nv,
3577     avl_tree_t *stream_avl, char **top_zfs, int cleanup_fd,
3578     uint64_t *action_handlep, const char *finalsnap, nvlist_t *cmdprops)
3579 {
3580         time_t begin_time;
3581         int ioctl_err, ioctl_errno, err;
3582         char *cp;
3583         struct drr_begin *drrb = &drr->drr_u.drr_begin;
3584         char errbuf[1024];
3585         const char *chopprefix;
3586         boolean_t newfs = B_FALSE;
3587         boolean_t stream_wantsnewfs;
3588         boolean_t newprops = B_FALSE;
3589         uint64_t read_bytes = 0;
3590         uint64_t errflags = 0;
3591         uint64_t parent_snapguid = 0;
3592         prop_changelist_t *clp = NULL;
3593         nvlist_t *snapprops_nvlist = NULL;
3594         zprop_errflags_t prop_errflags;
3595         nvlist_t *prop_errors = NULL;
3596         boolean_t recursive;
3597         char *snapname = NULL;
3598         char destsnap[MAXPATHLEN * 2];
3599         char origin[MAXNAMELEN];
3600         char name[MAXPATHLEN];
3601         char tmp_keylocation[MAXNAMELEN];
3602         nvlist_t *rcvprops = NULL; /* props received from the send stream */
3603         nvlist_t *oxprops = NULL; /* override (-o) and exclude (-x) props */
3604         nvlist_t *origprops = NULL; /* original props (if destination exists) */
3605         zfs_type_t type;
3606         boolean_t toplevel = B_FALSE;
3607         boolean_t zoned = B_FALSE;
3608         boolean_t hastoken = B_FALSE;
3609
3610         begin_time = time(NULL);
3611         bzero(origin, MAXNAMELEN);
3612         bzero(tmp_keylocation, MAXNAMELEN);
3613
3614         (void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
3615             "cannot receive"));
3616
3617         recursive = (nvlist_lookup_boolean(stream_nv, "not_recursive") ==
3618             ENOENT);
3619
3620         if (stream_avl != NULL) {
3621                 char *keylocation = NULL;
3622                 nvlist_t *lookup = NULL;
3623                 nvlist_t *fs = fsavl_find(stream_avl, drrb->drr_toguid,
3624                     &snapname);
3625
3626                 (void) nvlist_lookup_uint64(fs, "parentfromsnap",
3627                     &parent_snapguid);
3628                 err = nvlist_lookup_nvlist(fs, "props", &rcvprops);
3629                 if (err) {
3630                         VERIFY(0 == nvlist_alloc(&rcvprops, NV_UNIQUE_NAME, 0));
3631                         newprops = B_TRUE;
3632                 }
3633
3634                 /*
3635                  * The keylocation property may only be set on encryption roots,
3636                  * but this dataset might not become an encryption root until
3637                  * recv_fix_encryption_heirarchy() is called. That function
3638                  * will fixup the keylocation anyway, so we temporarily unset
3639                  * the keylocation for now to avoid any errors from the receive
3640                  * ioctl.
3641                  */
3642                 err = nvlist_lookup_string(rcvprops,
3643                     zfs_prop_to_name(ZFS_PROP_KEYLOCATION), &keylocation);
3644                 if (err == 0) {
3645                         strcpy(tmp_keylocation, keylocation);
3646                         (void) nvlist_remove_all(rcvprops,
3647                             zfs_prop_to_name(ZFS_PROP_KEYLOCATION));
3648                 }
3649
3650                 if (flags->canmountoff) {
3651                         VERIFY(0 == nvlist_add_uint64(rcvprops,
3652                             zfs_prop_to_name(ZFS_PROP_CANMOUNT), 0));
3653                 }
3654                 if (0 == nvlist_lookup_nvlist(fs, "snapprops", &lookup)) {
3655                         VERIFY(0 == nvlist_lookup_nvlist(lookup,
3656                             snapname, &snapprops_nvlist));
3657                 }
3658         }
3659
3660         cp = NULL;
3661
3662         /*
3663          * Determine how much of the snapshot name stored in the stream
3664          * we are going to tack on to the name they specified on the
3665          * command line, and how much we are going to chop off.
3666          *
3667          * If they specified a snapshot, chop the entire name stored in
3668          * the stream.
3669          */
3670         if (flags->istail) {
3671                 /*
3672                  * A filesystem was specified with -e. We want to tack on only
3673                  * the tail of the sent snapshot path.
3674                  */
3675                 if (strchr(tosnap, '@')) {
3676                         zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "invalid "
3677                             "argument - snapshot not allowed with -e"));
3678                         err = zfs_error(hdl, EZFS_INVALIDNAME, errbuf);
3679                         goto out;
3680                 }
3681
3682                 chopprefix = strrchr(sendfs, '/');
3683
3684                 if (chopprefix == NULL) {
3685                         /*
3686                          * The tail is the poolname, so we need to
3687                          * prepend a path separator.
3688                          */
3689                         int len = strlen(drrb->drr_toname);
3690                         cp = malloc(len + 2);
3691                         cp[0] = '/';
3692                         (void) strcpy(&cp[1], drrb->drr_toname);
3693                         chopprefix = cp;
3694                 } else {
3695                         chopprefix = drrb->drr_toname + (chopprefix - sendfs);
3696                 }
3697         } else if (flags->isprefix) {
3698                 /*
3699                  * A filesystem was specified with -d. We want to tack on
3700                  * everything but the first element of the sent snapshot path
3701                  * (all but the pool name).
3702                  */
3703                 if (strchr(tosnap, '@')) {
3704                         zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "invalid "
3705                             "argument - snapshot not allowed with -d"));
3706                         err = zfs_error(hdl, EZFS_INVALIDNAME, errbuf);
3707                         goto out;
3708                 }
3709
3710                 chopprefix = strchr(drrb->drr_toname, '/');
3711                 if (chopprefix == NULL)
3712                         chopprefix = strchr(drrb->drr_toname, '@');
3713         } else if (strchr(tosnap, '@') == NULL) {
3714                 /*
3715                  * If a filesystem was specified without -d or -e, we want to
3716                  * tack on everything after the fs specified by 'zfs send'.
3717                  */
3718                 chopprefix = drrb->drr_toname + strlen(sendfs);
3719         } else {
3720                 /* A snapshot was specified as an exact path (no -d or -e). */
3721                 if (recursive) {
3722                         zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
3723                             "cannot specify snapshot name for multi-snapshot "
3724                             "stream"));
3725                         err = zfs_error(hdl, EZFS_BADSTREAM, errbuf);
3726                         goto out;
3727                 }
3728                 chopprefix = drrb->drr_toname + strlen(drrb->drr_toname);
3729         }
3730
3731         ASSERT(strstr(drrb->drr_toname, sendfs) == drrb->drr_toname);
3732         ASSERT(chopprefix > drrb->drr_toname);
3733         ASSERT(chopprefix <= drrb->drr_toname + strlen(drrb->drr_toname));
3734         ASSERT(chopprefix[0] == '/' || chopprefix[0] == '@' ||
3735             chopprefix[0] == '\0');
3736
3737         /*
3738          * Determine name of destination snapshot.
3739          */
3740         (void) strlcpy(destsnap, tosnap, sizeof (destsnap));
3741         (void) strlcat(destsnap, chopprefix, sizeof (destsnap));
3742         free(cp);
3743         if (!zfs_name_valid(destsnap, ZFS_TYPE_SNAPSHOT)) {
3744                 err = zfs_error(hdl, EZFS_INVALIDNAME, errbuf);
3745                 goto out;
3746         }
3747
3748         /*
3749          * Determine the name of the origin snapshot.
3750          */
3751         if (originsnap) {
3752                 (void) strncpy(origin, originsnap, sizeof (origin));
3753                 if (flags->verbose)
3754                         (void) printf("using provided clone origin %s\n",
3755                             origin);
3756         } else if (drrb->drr_flags & DRR_FLAG_CLONE) {
3757                 if (guid_to_name(hdl, destsnap,
3758                     drrb->drr_fromguid, B_FALSE, origin) != 0) {
3759                         zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
3760                             "local origin for clone %s does not exist"),
3761                             destsnap);
3762                         err = zfs_error(hdl, EZFS_NOENT, errbuf);
3763                         goto out;
3764                 }
3765                 if (flags->verbose)
3766                         (void) printf("found clone origin %s\n", origin);
3767         }
3768
3769         boolean_t resuming = DMU_GET_FEATUREFLAGS(drrb->drr_versioninfo) &
3770             DMU_BACKUP_FEATURE_RESUMING;
3771         boolean_t raw = DMU_GET_FEATUREFLAGS(drrb->drr_versioninfo) &
3772             DMU_BACKUP_FEATURE_RAW;
3773         boolean_t embedded = DMU_GET_FEATUREFLAGS(drrb->drr_versioninfo) &
3774             DMU_BACKUP_FEATURE_EMBED_DATA;
3775         stream_wantsnewfs = (drrb->drr_fromguid == 0 ||
3776             (drrb->drr_flags & DRR_FLAG_CLONE) || originsnap) && !resuming;
3777
3778         if (stream_wantsnewfs) {
3779                 /*
3780                  * if the parent fs does not exist, look for it based on
3781                  * the parent snap GUID
3782                  */
3783                 (void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
3784                     "cannot receive new filesystem stream"));
3785
3786                 (void) strcpy(name, destsnap);
3787                 cp = strrchr(name, '/');
3788                 if (cp)
3789                         *cp = '\0';
3790                 if (cp &&
3791                     !zfs_dataset_exists(hdl, name, ZFS_TYPE_DATASET)) {
3792                         char suffix[ZFS_MAX_DATASET_NAME_LEN];
3793                         (void) strcpy(suffix, strrchr(destsnap, '/'));
3794                         if (guid_to_name(hdl, name, parent_snapguid,
3795                             B_FALSE, destsnap) == 0) {
3796                                 *strchr(destsnap, '@') = '\0';
3797                                 (void) strcat(destsnap, suffix);
3798                         }
3799                 }
3800         } else {
3801                 /*
3802                  * if the fs does not exist, look for it based on the
3803                  * fromsnap GUID
3804                  */
3805                 (void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
3806                     "cannot receive incremental stream"));
3807
3808                 (void) strcpy(name, destsnap);
3809                 *strchr(name, '@') = '\0';
3810
3811                 /*
3812                  * If the exact receive path was specified and this is the
3813                  * topmost path in the stream, then if the fs does not exist we
3814                  * should look no further.
3815                  */
3816                 if ((flags->isprefix || (*(chopprefix = drrb->drr_toname +
3817                     strlen(sendfs)) != '\0' && *chopprefix != '@')) &&
3818                     !zfs_dataset_exists(hdl, name, ZFS_TYPE_DATASET)) {
3819                         char snap[ZFS_MAX_DATASET_NAME_LEN];
3820                         (void) strcpy(snap, strchr(destsnap, '@'));
3821                         if (guid_to_name(hdl, name, drrb->drr_fromguid,
3822                             B_FALSE, destsnap) == 0) {
3823                                 *strchr(destsnap, '@') = '\0';
3824                                 (void) strcat(destsnap, snap);
3825                         }
3826                 }
3827         }
3828
3829         (void) strcpy(name, destsnap);
3830         *strchr(name, '@') = '\0';
3831
3832         if (zfs_dataset_exists(hdl, name, ZFS_TYPE_DATASET)) {
3833                 zfs_cmd_t zc = {"\0"};
3834                 zfs_handle_t *zhp;
3835                 boolean_t encrypted;
3836
3837                 (void) strcpy(zc.zc_name, name);
3838
3839                 /*
3840                  * Destination fs exists.  It must be one of these cases:
3841                  *  - an incremental send stream
3842                  *  - the stream specifies a new fs (full stream or clone)
3843                  *    and they want us to blow away the existing fs (and
3844                  *    have therefore specified -F and removed any snapshots)
3845                  *  - we are resuming a failed receive.
3846                  */
3847                 if (stream_wantsnewfs) {
3848                         if (!flags->force) {
3849                                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
3850                                     "destination '%s' exists\n"
3851                                     "must specify -F to overwrite it"), name);
3852                                 err = zfs_error(hdl, EZFS_EXISTS, errbuf);
3853                                 goto out;
3854                         }
3855                         if (ioctl(hdl->libzfs_fd, ZFS_IOC_SNAPSHOT_LIST_NEXT,
3856                             &zc) == 0) {
3857                                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
3858                                     "destination has snapshots (eg. %s)\n"
3859                                     "must destroy them to overwrite it"),
3860                                     name);
3861                                 err = zfs_error(hdl, EZFS_EXISTS, errbuf);
3862                                 goto out;
3863                         }
3864                 }
3865
3866                 if ((zhp = zfs_open(hdl, name,
3867                     ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME)) == NULL) {
3868                         err = -1;
3869                         goto out;
3870                 }
3871
3872                 if (stream_wantsnewfs &&
3873                     zhp->zfs_dmustats.dds_origin[0]) {
3874                         zfs_close(zhp);
3875                         zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
3876                             "destination '%s' is a clone\n"
3877                             "must destroy it to overwrite it"), name);
3878                         err = zfs_error(hdl, EZFS_EXISTS, errbuf);
3879                         goto out;
3880                 }
3881
3882                 /*
3883                  * Raw sends can not be performed as an incremental on top
3884                  * of existing unencryppted datasets. zfs recv -F cant be
3885                  * used to blow away an existing encrypted filesystem. This
3886                  * is because it would require the dsl dir to point to the
3887                  * new key (or lack of a key) and the old key at the same
3888                  * time. The -F flag may still be used for deleting
3889                  * intermediate snapshots that would otherwise prevent the
3890                  * receive from working.
3891                  */
3892                 encrypted = zfs_prop_get_int(zhp, ZFS_PROP_ENCRYPTION) !=
3893                     ZIO_CRYPT_OFF;
3894                 if (!stream_wantsnewfs && !encrypted && raw) {
3895                         zfs_close(zhp);
3896                         zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
3897                             "cannot perform raw receive on top of "
3898                             "existing unencrypted dataset"));
3899                         err = zfs_error(hdl, EZFS_BADRESTORE, errbuf);
3900                         goto out;
3901                 }
3902
3903                 if (stream_wantsnewfs && flags->force &&
3904                     ((raw && !encrypted) || encrypted)) {
3905                         zfs_close(zhp);
3906                         zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
3907                             "zfs receive -F cannot be used to destroy an "
3908                             "encrypted filesystem or overwrite an "
3909                             "unencrypted one with an encrypted one"));
3910                         err = zfs_error(hdl, EZFS_BADRESTORE, errbuf);
3911                         goto out;
3912                 }
3913
3914                 if (!flags->dryrun && zhp->zfs_type == ZFS_TYPE_FILESYSTEM &&
3915                     stream_wantsnewfs) {
3916                         /* We can't do online recv in this case */
3917                         clp = changelist_gather(zhp, ZFS_PROP_NAME, 0, 0);
3918                         if (clp == NULL) {
3919                                 zfs_close(zhp);
3920                                 err = -1;
3921                                 goto out;
3922                         }
3923                         if (changelist_prefix(clp) != 0) {
3924                                 changelist_free(clp);
3925                                 zfs_close(zhp);
3926                                 err = -1;
3927                                 goto out;
3928                         }
3929                 }
3930
3931                 /*
3932                  * If we are resuming a newfs, set newfs here so that we will
3933                  * mount it if the recv succeeds this time.  We can tell
3934                  * that it was a newfs on the first recv because the fs
3935                  * itself will be inconsistent (if the fs existed when we
3936                  * did the first recv, we would have received it into
3937                  * .../%recv).
3938                  */
3939                 if (resuming && zfs_prop_get_int(zhp, ZFS_PROP_INCONSISTENT))
3940                         newfs = B_TRUE;
3941
3942                 /* we want to know if we're zoned when validating -o|-x props */
3943                 zoned = zfs_prop_get_int(zhp, ZFS_PROP_ZONED);
3944
3945                 /* may need this info later, get it now we have zhp around */
3946                 if (zfs_prop_get(zhp, ZFS_PROP_RECEIVE_RESUME_TOKEN, NULL, 0,
3947                     NULL, NULL, 0, B_TRUE) == 0)
3948                         hastoken = B_TRUE;
3949
3950                 /* gather existing properties on destination */
3951                 origprops = fnvlist_alloc();
3952                 fnvlist_merge(origprops, zhp->zfs_props);
3953                 fnvlist_merge(origprops, zhp->zfs_user_props);
3954
3955                 zfs_close(zhp);
3956         } else {
3957                 zfs_handle_t *zhp;
3958
3959                 /*
3960                  * Destination filesystem does not exist.  Therefore we better
3961                  * be creating a new filesystem (either from a full backup, or
3962                  * a clone).  It would therefore be invalid if the user
3963                  * specified only the pool name (i.e. if the destination name
3964                  * contained no slash character).
3965                  */
3966                 cp = strrchr(name, '/');
3967
3968                 if (!stream_wantsnewfs || cp == NULL) {
3969                         zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
3970                             "destination '%s' does not exist"), name);
3971                         err = zfs_error(hdl, EZFS_NOENT, errbuf);
3972                         goto out;
3973                 }
3974
3975                 /*
3976                  * Trim off the final dataset component so we perform the
3977                  * recvbackup ioctl to the filesystems's parent.
3978                  */
3979                 *cp = '\0';
3980
3981                 if (flags->isprefix && !flags->istail && !flags->dryrun &&
3982                     create_parents(hdl, destsnap, strlen(tosnap)) != 0) {
3983                         err = zfs_error(hdl, EZFS_BADRESTORE, errbuf);
3984                         goto out;
3985                 }
3986
3987                 /*
3988                  * It is invalid to receive a properties stream that was
3989                  * unencrypted on the send side as a child of an encrypted
3990                  * parent. Technically there is nothing preventing this, but
3991                  * it would mean that the encryption=off property which is
3992                  * locally set on the send side would not be received correctly.
3993                  * We can infer encryption=off if the stream is not raw and
3994                  * properties were included since the send side will only ever
3995                  * send the encryption property in a raw nvlist header.
3996                  */
3997                 if (!raw && rcvprops != NULL) {
3998                         uint64_t crypt;
3999
4000                         zhp = zfs_open(hdl, name, ZFS_TYPE_DATASET);
4001                         if (zhp == NULL) {
4002                                 err = zfs_error(hdl, EZFS_BADRESTORE, errbuf);
4003                                 goto out;
4004                         }
4005
4006                         crypt = zfs_prop_get_int(zhp, ZFS_PROP_ENCRYPTION);
4007                         zfs_close(zhp);
4008
4009                         if (crypt != ZIO_CRYPT_OFF) {
4010                                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
4011                                     "parent '%s' must not be encrypted to "
4012                                     "receive unenecrypted property"), name);
4013                                 err = zfs_error(hdl, EZFS_BADPROP, errbuf);
4014                                 goto out;
4015                         }
4016                 }
4017
4018                 newfs = B_TRUE;
4019                 *cp = '/';
4020         }
4021
4022         if (flags->verbose) {
4023                 (void) printf("%s %s stream of %s into %s\n",
4024                     flags->dryrun ? "would receive" : "receiving",
4025                     drrb->drr_fromguid ? "incremental" : "full",
4026                     drrb->drr_toname, destsnap);
4027                 (void) fflush(stdout);
4028         }
4029
4030         if (flags->dryrun) {
4031                 err = recv_skip(hdl, infd, flags->byteswap);
4032                 goto out;
4033         }
4034
4035         if (top_zfs && *top_zfs == NULL)
4036                 toplevel = B_TRUE;
4037         if (drrb->drr_type == DMU_OST_ZVOL) {
4038                 type = ZFS_TYPE_VOLUME;
4039         } else if (drrb->drr_type == DMU_OST_ZFS) {
4040                 type = ZFS_TYPE_FILESYSTEM;
4041         } else {
4042                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
4043                     "invalid record type: 0x%d"), drrb->drr_type);
4044                 err = zfs_error(hdl, EZFS_BADSTREAM, errbuf);
4045                 goto out;
4046         }
4047         if ((err = zfs_setup_cmdline_props(hdl, type, zoned, recursive,
4048             toplevel, rcvprops, cmdprops, origprops, &oxprops, errbuf)) != 0)
4049                 goto out;
4050
4051         err = ioctl_err = lzc_receive_with_cmdprops(destsnap, rcvprops, oxprops,
4052             origin, flags->force, flags->resumable, raw, infd, drr_noswap,
4053             cleanup_fd, &read_bytes, &errflags, action_handlep, &prop_errors);
4054         ioctl_errno = ioctl_err;
4055         prop_errflags = errflags;
4056
4057         if (err == 0) {
4058                 nvpair_t *prop_err = NULL;
4059
4060                 while ((prop_err = nvlist_next_nvpair(prop_errors,
4061                     prop_err)) != NULL) {
4062                         char tbuf[1024];
4063                         zfs_prop_t prop;
4064                         int intval;
4065
4066                         prop = zfs_name_to_prop(nvpair_name(prop_err));
4067                         (void) nvpair_value_int32(prop_err, &intval);
4068                         if (strcmp(nvpair_name(prop_err),
4069                             ZPROP_N_MORE_ERRORS) == 0) {
4070                                 trunc_prop_errs(intval);
4071                                 break;
4072                         } else if (snapname == NULL || finalsnap == NULL ||
4073                             strcmp(finalsnap, snapname) == 0 ||
4074                             strcmp(nvpair_name(prop_err),
4075                             zfs_prop_to_name(ZFS_PROP_REFQUOTA)) != 0) {
4076                                 /*
4077                                  * Skip the special case of, for example,
4078                                  * "refquota", errors on intermediate
4079                                  * snapshots leading up to a final one.
4080                                  * That's why we have all of the checks above.
4081                                  *
4082                                  * See zfs_ioctl.c's extract_delay_props() for
4083                                  * a list of props which can fail on
4084                                  * intermediate snapshots, but shouldn't
4085                                  * affect the overall receive.
4086                                  */
4087                                 (void) snprintf(tbuf, sizeof (tbuf),
4088                                     dgettext(TEXT_DOMAIN,
4089                                     "cannot receive %s property on %s"),
4090                                     nvpair_name(prop_err), name);
4091                                 zfs_setprop_error(hdl, prop, intval, tbuf);
4092                         }
4093                 }
4094         }
4095
4096         if (err == 0 && snapprops_nvlist) {
4097                 zfs_cmd_t zc = {"\0"};
4098
4099                 (void) strcpy(zc.zc_name, destsnap);
4100                 zc.zc_cookie = B_TRUE; /* received */
4101                 if (zcmd_write_src_nvlist(hdl, &zc, snapprops_nvlist) == 0) {
4102                         (void) zfs_ioctl(hdl, ZFS_IOC_SET_PROP, &zc);
4103                         zcmd_free_nvlists(&zc);
4104                 }
4105         }
4106
4107         if (err && (ioctl_errno == ENOENT || ioctl_errno == EEXIST)) {
4108                 /*
4109                  * It may be that this snapshot already exists,
4110                  * in which case we want to consume & ignore it
4111                  * rather than failing.
4112                  */
4113                 avl_tree_t *local_avl;
4114                 nvlist_t *local_nv, *fs;
4115                 cp = strchr(destsnap, '@');
4116
4117                 /*
4118                  * XXX Do this faster by just iterating over snaps in
4119                  * this fs.  Also if zc_value does not exist, we will
4120                  * get a strange "does not exist" error message.
4121                  */
4122                 *cp = '\0';
4123                 if (gather_nvlist(hdl, destsnap, NULL, NULL, B_FALSE, B_TRUE,
4124                     B_FALSE, &local_nv, &local_avl) == 0) {
4125                         *cp = '@';
4126                         fs = fsavl_find(local_avl, drrb->drr_toguid, NULL);
4127                         fsavl_destroy(local_avl);
4128                         nvlist_free(local_nv);
4129
4130                         if (fs != NULL) {
4131                                 if (flags->verbose) {
4132                                         (void) printf("snap %s already exists; "
4133                                             "ignoring\n", destsnap);
4134                                 }
4135                                 err = ioctl_err = recv_skip(hdl, infd,
4136                                     flags->byteswap);
4137                         }
4138                 }
4139                 *cp = '@';
4140         }
4141
4142         if (ioctl_err != 0) {
4143                 switch (ioctl_errno) {
4144                 case ENODEV:
4145                         cp = strchr(destsnap, '@');
4146                         *cp = '\0';
4147                         zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
4148                             "most recent snapshot of %s does not\n"
4149                             "match incremental source"), destsnap);
4150                         (void) zfs_error(hdl, EZFS_BADRESTORE, errbuf);
4151                         *cp = '@';
4152                         break;
4153                 case ETXTBSY:
4154                         zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
4155                             "destination %s has been modified\n"
4156                             "since most recent snapshot"), name);
4157                         (void) zfs_error(hdl, EZFS_BADRESTORE, errbuf);
4158                         break;
4159                 case EACCES:
4160                         if (raw && stream_wantsnewfs) {
4161                                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
4162                                     "failed to create encryption key"));
4163                         } else if (raw && !stream_wantsnewfs) {
4164                                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
4165                                     "encryption key does not match "
4166                                     "existing key"));
4167                         } else {
4168                                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
4169                                     "inherited key must be loaded"));
4170                         }
4171                         (void) zfs_error(hdl, EZFS_CRYPTOFAILED, errbuf);
4172                         break;
4173                 case EEXIST:
4174                         cp = strchr(destsnap, '@');
4175                         if (newfs) {
4176                                 /* it's the containing fs that exists */
4177                                 *cp = '\0';
4178                         }
4179                         zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
4180                             "destination already exists"));
4181                         (void) zfs_error_fmt(hdl, EZFS_EXISTS,
4182                             dgettext(TEXT_DOMAIN, "cannot restore to %s"),
4183                             destsnap);
4184                         *cp = '@';
4185                         break;
4186                 case EINVAL:
4187                         if (flags->resumable)
4188                                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
4189                                     "kernel modules must be upgraded to "
4190                                     "receive this stream."));
4191                         if (embedded && !raw)
4192                                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
4193                                     "incompatible embedded data stream "
4194                                     "feature with encrypted receive."));
4195                         (void) zfs_error(hdl, EZFS_BADSTREAM, errbuf);
4196                         break;
4197                 case ECKSUM:
4198                         recv_ecksum_set_aux(hdl, destsnap, flags->resumable);
4199                         (void) zfs_error(hdl, EZFS_BADSTREAM, errbuf);
4200                         break;
4201                 case ENOTSUP:
4202                         zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
4203                             "pool must be upgraded to receive this stream."));
4204                         (void) zfs_error(hdl, EZFS_BADVERSION, errbuf);
4205                         break;
4206                 case EDQUOT:
4207                         zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
4208                             "destination %s space quota exceeded."), name);
4209                         (void) zfs_error(hdl, EZFS_NOSPC, errbuf);
4210                         break;
4211                 case EBUSY:
4212                         if (hastoken) {
4213                                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
4214                                     "destination %s contains "
4215                                     "partially-complete state from "
4216                                     "\"zfs receive -s\"."), name);
4217                                 (void) zfs_error(hdl, EZFS_BUSY, errbuf);
4218                                 break;
4219                         }
4220                         /* fallthru */
4221                 default:
4222                         (void) zfs_standard_error(hdl, ioctl_errno, errbuf);
4223                 }
4224         }
4225
4226         /*
4227          * Mount the target filesystem (if created).  Also mount any
4228          * children of the target filesystem if we did a replication
4229          * receive (indicated by stream_avl being non-NULL).
4230          */
4231         cp = strchr(destsnap, '@');
4232         if (cp && (ioctl_err == 0 || !newfs)) {
4233                 zfs_handle_t *h;
4234
4235                 *cp = '\0';
4236                 h = zfs_open(hdl, destsnap,
4237                     ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME);
4238                 if (h != NULL) {
4239                         if (h->zfs_type == ZFS_TYPE_VOLUME) {
4240                                 *cp = '@';
4241                         } else if (newfs || stream_avl) {
4242                                 /*
4243                                  * Track the first/top of hierarchy fs,
4244                                  * for mounting and sharing later.
4245                                  */
4246                                 if (top_zfs && *top_zfs == NULL)
4247                                         *top_zfs = zfs_strdup(hdl, destsnap);
4248                         }
4249                         zfs_close(h);
4250                 }
4251                 *cp = '@';
4252         }
4253
4254         if (clp) {
4255                 if (!flags->nomount)
4256                         err |= changelist_postfix(clp);
4257                 changelist_free(clp);
4258         }
4259
4260         if (prop_errflags & ZPROP_ERR_NOCLEAR) {
4261                 (void) fprintf(stderr, dgettext(TEXT_DOMAIN, "Warning: "
4262                     "failed to clear unreceived properties on %s"), name);
4263                 (void) fprintf(stderr, "\n");
4264         }
4265         if (prop_errflags & ZPROP_ERR_NORESTORE) {
4266                 (void) fprintf(stderr, dgettext(TEXT_DOMAIN, "Warning: "
4267                     "failed to restore original properties on %s"), name);
4268                 (void) fprintf(stderr, "\n");
4269         }
4270
4271         if (err || ioctl_err) {
4272                 err = -1;
4273                 goto out;
4274         }
4275
4276         if (flags->verbose) {
4277                 char buf1[64];
4278                 char buf2[64];
4279                 uint64_t bytes = read_bytes;
4280                 time_t delta = time(NULL) - begin_time;
4281                 if (delta == 0)
4282                         delta = 1;
4283                 zfs_nicebytes(bytes, buf1, sizeof (buf1));
4284                 zfs_nicebytes(bytes/delta, buf2, sizeof (buf1));
4285
4286                 (void) printf("received %s stream in %lu seconds (%s/sec)\n",
4287                     buf1, delta, buf2);
4288         }
4289
4290         err = 0;
4291 out:
4292         if (prop_errors != NULL)
4293                 nvlist_free(prop_errors);
4294
4295         if (tmp_keylocation[0] != '\0') {
4296                 VERIFY(0 == nvlist_add_string(rcvprops,
4297                     zfs_prop_to_name(ZFS_PROP_KEYLOCATION), tmp_keylocation));
4298         }
4299
4300         if (newprops)
4301                 nvlist_free(rcvprops);
4302
4303         nvlist_free(oxprops);
4304         nvlist_free(origprops);
4305
4306         return (err);
4307 }
4308
4309 /*
4310  * Check properties we were asked to override (both -o|-x)
4311  */
4312 static boolean_t
4313 zfs_receive_checkprops(libzfs_handle_t *hdl, nvlist_t *props,
4314     const char *errbuf)
4315 {
4316         nvpair_t *nvp;
4317         zfs_prop_t prop;
4318         const char *name;
4319
4320         nvp = NULL;
4321         while ((nvp = nvlist_next_nvpair(props, nvp)) != NULL) {
4322                 name = nvpair_name(nvp);
4323                 prop = zfs_name_to_prop(name);
4324
4325                 if (prop == ZPROP_INVAL) {
4326                         if (!zfs_prop_user(name)) {
4327                                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
4328                                     "invalid property '%s'"), name);
4329                                 return (B_FALSE);
4330                         }
4331                         continue;
4332                 }
4333                 /*
4334                  * "origin" is readonly but is used to receive datasets as
4335                  * clones so we don't raise an error here
4336                  */
4337                 if (prop == ZFS_PROP_ORIGIN)
4338                         continue;
4339
4340                 /*
4341                  * cannot override readonly, set-once and other specific
4342                  * settable properties
4343                  */
4344                 if (zfs_prop_readonly(prop) || prop == ZFS_PROP_VERSION ||
4345                     prop == ZFS_PROP_VOLSIZE) {
4346                         zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
4347                             "invalid property '%s'"), name);
4348                         return (B_FALSE);
4349                 }
4350         }
4351
4352         return (B_TRUE);
4353 }
4354
4355 static int
4356 zfs_receive_impl(libzfs_handle_t *hdl, const char *tosnap,
4357     const char *originsnap, recvflags_t *flags, int infd, const char *sendfs,
4358     nvlist_t *stream_nv, avl_tree_t *stream_avl, char **top_zfs, int cleanup_fd,
4359     uint64_t *action_handlep, const char *finalsnap, nvlist_t *cmdprops)
4360 {
4361         int err;
4362         dmu_replay_record_t drr, drr_noswap;
4363         struct drr_begin *drrb = &drr.drr_u.drr_begin;
4364         char errbuf[1024];
4365         zio_cksum_t zcksum = { { 0 } };
4366         uint64_t featureflags;
4367         int hdrtype;
4368
4369         (void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
4370             "cannot receive"));
4371
4372         /* check cmdline props, raise an error if they cannot be received */
4373         if (!zfs_receive_checkprops(hdl, cmdprops, errbuf)) {
4374                 return (zfs_error(hdl, EZFS_BADPROP, errbuf));
4375         }
4376
4377         if (flags->isprefix &&
4378             !zfs_dataset_exists(hdl, tosnap, ZFS_TYPE_DATASET)) {
4379                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "specified fs "
4380                     "(%s) does not exist"), tosnap);
4381                 return (zfs_error(hdl, EZFS_NOENT, errbuf));
4382         }
4383         if (originsnap &&
4384             !zfs_dataset_exists(hdl, originsnap, ZFS_TYPE_DATASET)) {
4385                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "specified origin fs "
4386                     "(%s) does not exist"), originsnap);
4387                 return (zfs_error(hdl, EZFS_NOENT, errbuf));
4388         }
4389
4390         /* read in the BEGIN record */
4391         if (0 != (err = recv_read(hdl, infd, &drr, sizeof (drr), B_FALSE,
4392             &zcksum)))
4393                 return (err);
4394
4395         if (drr.drr_type == DRR_END || drr.drr_type == BSWAP_32(DRR_END)) {
4396                 /* It's the double end record at the end of a package */
4397                 return (ENODATA);
4398         }
4399
4400         /* the kernel needs the non-byteswapped begin record */
4401         drr_noswap = drr;
4402
4403         flags->byteswap = B_FALSE;
4404         if (drrb->drr_magic == BSWAP_64(DMU_BACKUP_MAGIC)) {
4405                 /*
4406                  * We computed the checksum in the wrong byteorder in
4407                  * recv_read() above; do it again correctly.
4408                  */
4409                 bzero(&zcksum, sizeof (zio_cksum_t));
4410                 fletcher_4_incremental_byteswap(&drr, sizeof (drr), &zcksum);
4411                 flags->byteswap = B_TRUE;
4412
4413                 drr.drr_type = BSWAP_32(drr.drr_type);
4414                 drr.drr_payloadlen = BSWAP_32(drr.drr_payloadlen);
4415                 drrb->drr_magic = BSWAP_64(drrb->drr_magic);
4416                 drrb->drr_versioninfo = BSWAP_64(drrb->drr_versioninfo);
4417                 drrb->drr_creation_time = BSWAP_64(drrb->drr_creation_time);
4418                 drrb->drr_type = BSWAP_32(drrb->drr_type);
4419                 drrb->drr_flags = BSWAP_32(drrb->drr_flags);
4420                 drrb->drr_toguid = BSWAP_64(drrb->drr_toguid);
4421                 drrb->drr_fromguid = BSWAP_64(drrb->drr_fromguid);
4422         }
4423
4424         if (drrb->drr_magic != DMU_BACKUP_MAGIC || drr.drr_type != DRR_BEGIN) {
4425                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "invalid "
4426                     "stream (bad magic number)"));
4427                 return (zfs_error(hdl, EZFS_BADSTREAM, errbuf));
4428         }
4429
4430         featureflags = DMU_GET_FEATUREFLAGS(drrb->drr_versioninfo);
4431         hdrtype = DMU_GET_STREAM_HDRTYPE(drrb->drr_versioninfo);
4432
4433         if (!DMU_STREAM_SUPPORTED(featureflags) ||
4434             (hdrtype != DMU_SUBSTREAM && hdrtype != DMU_COMPOUNDSTREAM)) {
4435                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
4436                     "stream has unsupported feature, feature flags = %lx"),
4437                     featureflags);
4438                 return (zfs_error(hdl, EZFS_BADSTREAM, errbuf));
4439         }
4440
4441         if (strchr(drrb->drr_toname, '@') == NULL) {
4442                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "invalid "
4443                     "stream (bad snapshot name)"));
4444                 return (zfs_error(hdl, EZFS_BADSTREAM, errbuf));
4445         }
4446
4447         if (DMU_GET_STREAM_HDRTYPE(drrb->drr_versioninfo) == DMU_SUBSTREAM) {
4448                 char nonpackage_sendfs[ZFS_MAX_DATASET_NAME_LEN];
4449                 if (sendfs == NULL) {
4450                         /*
4451                          * We were not called from zfs_receive_package(). Get
4452                          * the fs specified by 'zfs send'.
4453                          */
4454                         char *cp;
4455                         (void) strlcpy(nonpackage_sendfs,
4456                             drr.drr_u.drr_begin.drr_toname,
4457                             sizeof (nonpackage_sendfs));
4458                         if ((cp = strchr(nonpackage_sendfs, '@')) != NULL)
4459                                 *cp = '\0';
4460                         sendfs = nonpackage_sendfs;
4461                         VERIFY(finalsnap == NULL);
4462                 }
4463                 return (zfs_receive_one(hdl, infd, tosnap, originsnap, flags,
4464                     &drr, &drr_noswap, sendfs, stream_nv, stream_avl, top_zfs,
4465                     cleanup_fd, action_handlep, finalsnap, cmdprops));
4466         } else {
4467                 assert(DMU_GET_STREAM_HDRTYPE(drrb->drr_versioninfo) ==
4468                     DMU_COMPOUNDSTREAM);
4469                 return (zfs_receive_package(hdl, infd, tosnap, flags, &drr,
4470                     &zcksum, top_zfs, cleanup_fd, action_handlep, cmdprops));
4471         }
4472 }
4473
4474 /*
4475  * Restores a backup of tosnap from the file descriptor specified by infd.
4476  * Return 0 on total success, -2 if some things couldn't be
4477  * destroyed/renamed/promoted, -1 if some things couldn't be received.
4478  * (-1 will override -2, if -1 and the resumable flag was specified the
4479  * transfer can be resumed if the sending side supports it).
4480  */
4481 int
4482 zfs_receive(libzfs_handle_t *hdl, const char *tosnap, nvlist_t *props,
4483     recvflags_t *flags, int infd, avl_tree_t *stream_avl)
4484 {
4485         char *top_zfs = NULL;
4486         int err;
4487         int cleanup_fd;
4488         uint64_t action_handle = 0;
4489         struct stat sb;
4490         char *originsnap = NULL;
4491
4492         /*
4493          * The only way fstat can fail is if we do not have a valid file
4494          * descriptor.
4495          */
4496         if (fstat(infd, &sb) == -1) {
4497                 perror("fstat");
4498                 return (-2);
4499         }
4500
4501 #ifdef __linux__
4502 #ifndef F_SETPIPE_SZ
4503 #define F_SETPIPE_SZ (F_SETLEASE + 7)
4504 #endif /* F_SETPIPE_SZ */
4505
4506 #ifndef F_GETPIPE_SZ
4507 #define F_GETPIPE_SZ (F_GETLEASE + 7)
4508 #endif /* F_GETPIPE_SZ */
4509
4510         /*
4511          * It is not uncommon for gigabytes to be processed in zfs receive.
4512          * Speculatively increase the buffer size via Linux-specific fcntl()
4513          * call.
4514          */
4515         if (S_ISFIFO(sb.st_mode)) {
4516                 FILE *procf = fopen("/proc/sys/fs/pipe-max-size", "r");
4517
4518                 if (procf != NULL) {
4519                         unsigned long max_psize;
4520                         long cur_psize;
4521                         if (fscanf(procf, "%lu", &max_psize) > 0) {
4522                                 cur_psize = fcntl(infd, F_GETPIPE_SZ);
4523                                 if (cur_psize > 0 &&
4524                                     max_psize > (unsigned long) cur_psize)
4525                                         (void) fcntl(infd, F_SETPIPE_SZ,
4526                                             max_psize);
4527                         }
4528                         fclose(procf);
4529                 }
4530         }
4531 #endif /* __linux__ */
4532
4533         if (props) {
4534                 err = nvlist_lookup_string(props, "origin", &originsnap);
4535                 if (err && err != ENOENT)
4536                         return (err);
4537         }
4538
4539         cleanup_fd = open(ZFS_DEV, O_RDWR);
4540         VERIFY(cleanup_fd >= 0);
4541
4542         err = zfs_receive_impl(hdl, tosnap, originsnap, flags, infd, NULL, NULL,
4543             stream_avl, &top_zfs, cleanup_fd, &action_handle, NULL, props);
4544
4545         VERIFY(0 == close(cleanup_fd));
4546
4547         if (err == 0 && !flags->nomount && top_zfs) {
4548                 zfs_handle_t *zhp = NULL;
4549                 prop_changelist_t *clp = NULL;
4550
4551                 zhp = zfs_open(hdl, top_zfs, ZFS_TYPE_FILESYSTEM);
4552                 if (zhp != NULL) {
4553                         clp = changelist_gather(zhp, ZFS_PROP_MOUNTPOINT,
4554                             CL_GATHER_MOUNT_ALWAYS, 0);
4555                         zfs_close(zhp);
4556                         if (clp != NULL) {
4557                                 /* mount and share received datasets */
4558                                 err = changelist_postfix(clp);
4559                                 changelist_free(clp);
4560                         }
4561                 }
4562                 if (zhp == NULL || clp == NULL || err)
4563                         err = -1;
4564         }
4565         if (top_zfs)
4566                 free(top_zfs);
4567
4568         return (err);
4569 }