]> CyberLeo.Net >> Repos - FreeBSD/stable/10.git/blob - cddl/contrib/opensolaris/lib/libzfs/common/libzfs_sendrecv.c
MFC r324348: MFV r316934: 7340 receive manual origin should override automatic origin
[FreeBSD/stable/10.git] / cddl / contrib / opensolaris / lib / libzfs / common / libzfs_sendrecv.c
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21
22 /*
23  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
24  * Copyright (c) 2011, 2015 by Delphix. All rights reserved.
25  * Copyright (c) 2012, Joyent, Inc. All rights reserved.
26  * Copyright (c) 2012 Pawel Jakub Dawidek. All rights reserved.
27  * Copyright (c) 2013 Steven Hartland. All rights reserved.
28  * Copyright 2015, OmniTI Computer Consulting, Inc. All rights reserved.
29  * Copyright (c) 2014 Integros [integros.com]
30  * Copyright 2016 Igor Kozhukhov <ikozhukhov@gmail.com>
31  */
32
33 #include <assert.h>
34 #include <ctype.h>
35 #include <errno.h>
36 #include <libintl.h>
37 #include <stdio.h>
38 #include <stdlib.h>
39 #include <strings.h>
40 #include <unistd.h>
41 #include <stddef.h>
42 #include <fcntl.h>
43 #include <sys/param.h>
44 #include <sys/mount.h>
45 #include <pthread.h>
46 #include <umem.h>
47 #include <time.h>
48
49 #include <libzfs.h>
50 #include <libzfs_core.h>
51
52 #include "zfs_namecheck.h"
53 #include "zfs_prop.h"
54 #include "zfs_fletcher.h"
55 #include "libzfs_impl.h"
56 #include <zlib.h>
57 #include <sha2.h>
58 #include <sys/zio_checksum.h>
59 #include <sys/ddt.h>
60
61 #ifdef __FreeBSD__
62 extern int zfs_ioctl_version;
63 #endif
64
65 /* in libzfs_dataset.c */
66 extern void zfs_setprop_error(libzfs_handle_t *, zfs_prop_t, int, char *);
67 /* We need to use something for ENODATA. */
68 #define ENODATA EIDRM
69
70 static int zfs_receive_impl(libzfs_handle_t *, const char *, const char *,
71     recvflags_t *, int, const char *, nvlist_t *, avl_tree_t *, char **, int,
72     uint64_t *, const char *);
73 static int guid_to_name(libzfs_handle_t *, const char *,
74     uint64_t, boolean_t, char *);
75
76 static const zio_cksum_t zero_cksum = { 0 };
77
78 typedef struct dedup_arg {
79         int     inputfd;
80         int     outputfd;
81         libzfs_handle_t  *dedup_hdl;
82 } dedup_arg_t;
83
84 typedef struct progress_arg {
85         zfs_handle_t *pa_zhp;
86         int pa_fd;
87         boolean_t pa_parsable;
88 } progress_arg_t;
89
90 typedef struct dataref {
91         uint64_t ref_guid;
92         uint64_t ref_object;
93         uint64_t ref_offset;
94 } dataref_t;
95
96 typedef struct dedup_entry {
97         struct dedup_entry      *dde_next;
98         zio_cksum_t dde_chksum;
99         uint64_t dde_prop;
100         dataref_t dde_ref;
101 } dedup_entry_t;
102
103 #define MAX_DDT_PHYSMEM_PERCENT         20
104 #define SMALLEST_POSSIBLE_MAX_DDT_MB            128
105
106 typedef struct dedup_table {
107         dedup_entry_t   **dedup_hash_array;
108         umem_cache_t    *ddecache;
109         uint64_t        max_ddt_size;  /* max dedup table size in bytes */
110         uint64_t        cur_ddt_size;  /* current dedup table size in bytes */
111         uint64_t        ddt_count;
112         int             numhashbits;
113         boolean_t       ddt_full;
114 } dedup_table_t;
115
116 static int
117 high_order_bit(uint64_t n)
118 {
119         int count;
120
121         for (count = 0; n != 0; count++)
122                 n >>= 1;
123         return (count);
124 }
125
126 static size_t
127 ssread(void *buf, size_t len, FILE *stream)
128 {
129         size_t outlen;
130
131         if ((outlen = fread(buf, len, 1, stream)) == 0)
132                 return (0);
133
134         return (outlen);
135 }
136
137 static void
138 ddt_hash_append(libzfs_handle_t *hdl, dedup_table_t *ddt, dedup_entry_t **ddepp,
139     zio_cksum_t *cs, uint64_t prop, dataref_t *dr)
140 {
141         dedup_entry_t   *dde;
142
143         if (ddt->cur_ddt_size >= ddt->max_ddt_size) {
144                 if (ddt->ddt_full == B_FALSE) {
145                         zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
146                             "Dedup table full.  Deduplication will continue "
147                             "with existing table entries"));
148                         ddt->ddt_full = B_TRUE;
149                 }
150                 return;
151         }
152
153         if ((dde = umem_cache_alloc(ddt->ddecache, UMEM_DEFAULT))
154             != NULL) {
155                 assert(*ddepp == NULL);
156                 dde->dde_next = NULL;
157                 dde->dde_chksum = *cs;
158                 dde->dde_prop = prop;
159                 dde->dde_ref = *dr;
160                 *ddepp = dde;
161                 ddt->cur_ddt_size += sizeof (dedup_entry_t);
162                 ddt->ddt_count++;
163         }
164 }
165
166 /*
167  * Using the specified dedup table, do a lookup for an entry with
168  * the checksum cs.  If found, return the block's reference info
169  * in *dr. Otherwise, insert a new entry in the dedup table, using
170  * the reference information specified by *dr.
171  *
172  * return value:  true - entry was found
173  *                false - entry was not found
174  */
175 static boolean_t
176 ddt_update(libzfs_handle_t *hdl, dedup_table_t *ddt, zio_cksum_t *cs,
177     uint64_t prop, dataref_t *dr)
178 {
179         uint32_t hashcode;
180         dedup_entry_t **ddepp;
181
182         hashcode = BF64_GET(cs->zc_word[0], 0, ddt->numhashbits);
183
184         for (ddepp = &(ddt->dedup_hash_array[hashcode]); *ddepp != NULL;
185             ddepp = &((*ddepp)->dde_next)) {
186                 if (ZIO_CHECKSUM_EQUAL(((*ddepp)->dde_chksum), *cs) &&
187                     (*ddepp)->dde_prop == prop) {
188                         *dr = (*ddepp)->dde_ref;
189                         return (B_TRUE);
190                 }
191         }
192         ddt_hash_append(hdl, ddt, ddepp, cs, prop, dr);
193         return (B_FALSE);
194 }
195
196 static int
197 dump_record(dmu_replay_record_t *drr, void *payload, int payload_len,
198     zio_cksum_t *zc, int outfd)
199 {
200         ASSERT3U(offsetof(dmu_replay_record_t, drr_u.drr_checksum.drr_checksum),
201             ==, sizeof (dmu_replay_record_t) - sizeof (zio_cksum_t));
202         fletcher_4_incremental_native(drr,
203             offsetof(dmu_replay_record_t, drr_u.drr_checksum.drr_checksum), zc);
204         if (drr->drr_type != DRR_BEGIN) {
205                 ASSERT(ZIO_CHECKSUM_IS_ZERO(&drr->drr_u.
206                     drr_checksum.drr_checksum));
207                 drr->drr_u.drr_checksum.drr_checksum = *zc;
208         }
209         fletcher_4_incremental_native(&drr->drr_u.drr_checksum.drr_checksum,
210             sizeof (zio_cksum_t), zc);
211         if (write(outfd, drr, sizeof (*drr)) == -1)
212                 return (errno);
213         if (payload_len != 0) {
214                 fletcher_4_incremental_native(payload, payload_len, zc);
215                 if (write(outfd, payload, payload_len) == -1)
216                         return (errno);
217         }
218         return (0);
219 }
220
221 /*
222  * This function is started in a separate thread when the dedup option
223  * has been requested.  The main send thread determines the list of
224  * snapshots to be included in the send stream and makes the ioctl calls
225  * for each one.  But instead of having the ioctl send the output to the
226  * the output fd specified by the caller of zfs_send()), the
227  * ioctl is told to direct the output to a pipe, which is read by the
228  * alternate thread running THIS function.  This function does the
229  * dedup'ing by:
230  *  1. building a dedup table (the DDT)
231  *  2. doing checksums on each data block and inserting a record in the DDT
232  *  3. looking for matching checksums, and
233  *  4.  sending a DRR_WRITE_BYREF record instead of a write record whenever
234  *      a duplicate block is found.
235  * The output of this function then goes to the output fd requested
236  * by the caller of zfs_send().
237  */
238 static void *
239 cksummer(void *arg)
240 {
241         dedup_arg_t *dda = arg;
242         char *buf = zfs_alloc(dda->dedup_hdl, SPA_MAXBLOCKSIZE);
243         dmu_replay_record_t thedrr;
244         dmu_replay_record_t *drr = &thedrr;
245         FILE *ofp;
246         int outfd;
247         dedup_table_t ddt;
248         zio_cksum_t stream_cksum;
249         uint64_t physmem = sysconf(_SC_PHYS_PAGES) * sysconf(_SC_PAGESIZE);
250         uint64_t numbuckets;
251
252         ddt.max_ddt_size =
253             MAX((physmem * MAX_DDT_PHYSMEM_PERCENT) / 100,
254             SMALLEST_POSSIBLE_MAX_DDT_MB << 20);
255
256         numbuckets = ddt.max_ddt_size / (sizeof (dedup_entry_t));
257
258         /*
259          * numbuckets must be a power of 2.  Increase number to
260          * a power of 2 if necessary.
261          */
262         if (!ISP2(numbuckets))
263                 numbuckets = 1 << high_order_bit(numbuckets);
264
265         ddt.dedup_hash_array = calloc(numbuckets, sizeof (dedup_entry_t *));
266         ddt.ddecache = umem_cache_create("dde", sizeof (dedup_entry_t), 0,
267             NULL, NULL, NULL, NULL, NULL, 0);
268         ddt.cur_ddt_size = numbuckets * sizeof (dedup_entry_t *);
269         ddt.numhashbits = high_order_bit(numbuckets) - 1;
270         ddt.ddt_full = B_FALSE;
271
272         outfd = dda->outputfd;
273         ofp = fdopen(dda->inputfd, "r");
274         while (ssread(drr, sizeof (*drr), ofp) != 0) {
275
276                 switch (drr->drr_type) {
277                 case DRR_BEGIN:
278                 {
279                         struct drr_begin *drrb = &drr->drr_u.drr_begin;
280                         int fflags;
281                         int sz = 0;
282                         ZIO_SET_CHECKSUM(&stream_cksum, 0, 0, 0, 0);
283
284                         ASSERT3U(drrb->drr_magic, ==, DMU_BACKUP_MAGIC);
285
286                         /* set the DEDUP feature flag for this stream */
287                         fflags = DMU_GET_FEATUREFLAGS(drrb->drr_versioninfo);
288                         fflags |= (DMU_BACKUP_FEATURE_DEDUP |
289                             DMU_BACKUP_FEATURE_DEDUPPROPS);
290                         DMU_SET_FEATUREFLAGS(drrb->drr_versioninfo, fflags);
291
292                         if (drr->drr_payloadlen != 0) {
293                                 sz = drr->drr_payloadlen;
294
295                                 if (sz > SPA_MAXBLOCKSIZE) {
296                                         buf = zfs_realloc(dda->dedup_hdl, buf,
297                                             SPA_MAXBLOCKSIZE, sz);
298                                 }
299                                 (void) ssread(buf, sz, ofp);
300                                 if (ferror(stdin))
301                                         perror("fread");
302                         }
303                         if (dump_record(drr, buf, sz, &stream_cksum,
304                             outfd) != 0)
305                                 goto out;
306                         break;
307                 }
308
309                 case DRR_END:
310                 {
311                         struct drr_end *drre = &drr->drr_u.drr_end;
312                         /* use the recalculated checksum */
313                         drre->drr_checksum = stream_cksum;
314                         if (dump_record(drr, NULL, 0, &stream_cksum,
315                             outfd) != 0)
316                                 goto out;
317                         break;
318                 }
319
320                 case DRR_OBJECT:
321                 {
322                         struct drr_object *drro = &drr->drr_u.drr_object;
323                         if (drro->drr_bonuslen > 0) {
324                                 (void) ssread(buf,
325                                     P2ROUNDUP((uint64_t)drro->drr_bonuslen, 8),
326                                     ofp);
327                         }
328                         if (dump_record(drr, buf,
329                             P2ROUNDUP((uint64_t)drro->drr_bonuslen, 8),
330                             &stream_cksum, outfd) != 0)
331                                 goto out;
332                         break;
333                 }
334
335                 case DRR_SPILL:
336                 {
337                         struct drr_spill *drrs = &drr->drr_u.drr_spill;
338                         (void) ssread(buf, drrs->drr_length, ofp);
339                         if (dump_record(drr, buf, drrs->drr_length,
340                             &stream_cksum, outfd) != 0)
341                                 goto out;
342                         break;
343                 }
344
345                 case DRR_FREEOBJECTS:
346                 {
347                         if (dump_record(drr, NULL, 0, &stream_cksum,
348                             outfd) != 0)
349                                 goto out;
350                         break;
351                 }
352
353                 case DRR_WRITE:
354                 {
355                         struct drr_write *drrw = &drr->drr_u.drr_write;
356                         dataref_t       dataref;
357
358                         (void) ssread(buf, drrw->drr_length, ofp);
359
360                         /*
361                          * Use the existing checksum if it's dedup-capable,
362                          * else calculate a SHA256 checksum for it.
363                          */
364
365                         if (ZIO_CHECKSUM_EQUAL(drrw->drr_key.ddk_cksum,
366                             zero_cksum) ||
367                             !DRR_IS_DEDUP_CAPABLE(drrw->drr_checksumflags)) {
368                                 SHA256_CTX      ctx;
369                                 zio_cksum_t     tmpsha256;
370
371                                 SHA256Init(&ctx);
372                                 SHA256Update(&ctx, buf, drrw->drr_length);
373                                 SHA256Final(&tmpsha256, &ctx);
374                                 drrw->drr_key.ddk_cksum.zc_word[0] =
375                                     BE_64(tmpsha256.zc_word[0]);
376                                 drrw->drr_key.ddk_cksum.zc_word[1] =
377                                     BE_64(tmpsha256.zc_word[1]);
378                                 drrw->drr_key.ddk_cksum.zc_word[2] =
379                                     BE_64(tmpsha256.zc_word[2]);
380                                 drrw->drr_key.ddk_cksum.zc_word[3] =
381                                     BE_64(tmpsha256.zc_word[3]);
382                                 drrw->drr_checksumtype = ZIO_CHECKSUM_SHA256;
383                                 drrw->drr_checksumflags = DRR_CHECKSUM_DEDUP;
384                         }
385
386                         dataref.ref_guid = drrw->drr_toguid;
387                         dataref.ref_object = drrw->drr_object;
388                         dataref.ref_offset = drrw->drr_offset;
389
390                         if (ddt_update(dda->dedup_hdl, &ddt,
391                             &drrw->drr_key.ddk_cksum, drrw->drr_key.ddk_prop,
392                             &dataref)) {
393                                 dmu_replay_record_t wbr_drr = {0};
394                                 struct drr_write_byref *wbr_drrr =
395                                     &wbr_drr.drr_u.drr_write_byref;
396
397                                 /* block already present in stream */
398                                 wbr_drr.drr_type = DRR_WRITE_BYREF;
399
400                                 wbr_drrr->drr_object = drrw->drr_object;
401                                 wbr_drrr->drr_offset = drrw->drr_offset;
402                                 wbr_drrr->drr_length = drrw->drr_length;
403                                 wbr_drrr->drr_toguid = drrw->drr_toguid;
404                                 wbr_drrr->drr_refguid = dataref.ref_guid;
405                                 wbr_drrr->drr_refobject =
406                                     dataref.ref_object;
407                                 wbr_drrr->drr_refoffset =
408                                     dataref.ref_offset;
409
410                                 wbr_drrr->drr_checksumtype =
411                                     drrw->drr_checksumtype;
412                                 wbr_drrr->drr_checksumflags =
413                                     drrw->drr_checksumtype;
414                                 wbr_drrr->drr_key.ddk_cksum =
415                                     drrw->drr_key.ddk_cksum;
416                                 wbr_drrr->drr_key.ddk_prop =
417                                     drrw->drr_key.ddk_prop;
418
419                                 if (dump_record(&wbr_drr, NULL, 0,
420                                     &stream_cksum, outfd) != 0)
421                                         goto out;
422                         } else {
423                                 /* block not previously seen */
424                                 if (dump_record(drr, buf, drrw->drr_length,
425                                     &stream_cksum, outfd) != 0)
426                                         goto out;
427                         }
428                         break;
429                 }
430
431                 case DRR_WRITE_EMBEDDED:
432                 {
433                         struct drr_write_embedded *drrwe =
434                             &drr->drr_u.drr_write_embedded;
435                         (void) ssread(buf,
436                             P2ROUNDUP((uint64_t)drrwe->drr_psize, 8), ofp);
437                         if (dump_record(drr, buf,
438                             P2ROUNDUP((uint64_t)drrwe->drr_psize, 8),
439                             &stream_cksum, outfd) != 0)
440                                 goto out;
441                         break;
442                 }
443
444                 case DRR_FREE:
445                 {
446                         if (dump_record(drr, NULL, 0, &stream_cksum,
447                             outfd) != 0)
448                                 goto out;
449                         break;
450                 }
451
452                 default:
453                         (void) fprintf(stderr, "INVALID record type 0x%x\n",
454                             drr->drr_type);
455                         /* should never happen, so assert */
456                         assert(B_FALSE);
457                 }
458         }
459 out:
460         umem_cache_destroy(ddt.ddecache);
461         free(ddt.dedup_hash_array);
462         free(buf);
463         (void) fclose(ofp);
464
465         return (NULL);
466 }
467
468 /*
469  * Routines for dealing with the AVL tree of fs-nvlists
470  */
471 typedef struct fsavl_node {
472         avl_node_t fn_node;
473         nvlist_t *fn_nvfs;
474         char *fn_snapname;
475         uint64_t fn_guid;
476 } fsavl_node_t;
477
478 static int
479 fsavl_compare(const void *arg1, const void *arg2)
480 {
481         const fsavl_node_t *fn1 = arg1;
482         const fsavl_node_t *fn2 = arg2;
483
484         if (fn1->fn_guid > fn2->fn_guid)
485                 return (+1);
486         else if (fn1->fn_guid < fn2->fn_guid)
487                 return (-1);
488         else
489                 return (0);
490 }
491
492 /*
493  * Given the GUID of a snapshot, find its containing filesystem and
494  * (optionally) name.
495  */
496 static nvlist_t *
497 fsavl_find(avl_tree_t *avl, uint64_t snapguid, char **snapname)
498 {
499         fsavl_node_t fn_find;
500         fsavl_node_t *fn;
501
502         fn_find.fn_guid = snapguid;
503
504         fn = avl_find(avl, &fn_find, NULL);
505         if (fn) {
506                 if (snapname)
507                         *snapname = fn->fn_snapname;
508                 return (fn->fn_nvfs);
509         }
510         return (NULL);
511 }
512
513 static void
514 fsavl_destroy(avl_tree_t *avl)
515 {
516         fsavl_node_t *fn;
517         void *cookie;
518
519         if (avl == NULL)
520                 return;
521
522         cookie = NULL;
523         while ((fn = avl_destroy_nodes(avl, &cookie)) != NULL)
524                 free(fn);
525         avl_destroy(avl);
526         free(avl);
527 }
528
529 /*
530  * Given an nvlist, produce an avl tree of snapshots, ordered by guid
531  */
532 static avl_tree_t *
533 fsavl_create(nvlist_t *fss)
534 {
535         avl_tree_t *fsavl;
536         nvpair_t *fselem = NULL;
537
538         if ((fsavl = malloc(sizeof (avl_tree_t))) == NULL)
539                 return (NULL);
540
541         avl_create(fsavl, fsavl_compare, sizeof (fsavl_node_t),
542             offsetof(fsavl_node_t, fn_node));
543
544         while ((fselem = nvlist_next_nvpair(fss, fselem)) != NULL) {
545                 nvlist_t *nvfs, *snaps;
546                 nvpair_t *snapelem = NULL;
547
548                 VERIFY(0 == nvpair_value_nvlist(fselem, &nvfs));
549                 VERIFY(0 == nvlist_lookup_nvlist(nvfs, "snaps", &snaps));
550
551                 while ((snapelem =
552                     nvlist_next_nvpair(snaps, snapelem)) != NULL) {
553                         fsavl_node_t *fn;
554                         uint64_t guid;
555
556                         VERIFY(0 == nvpair_value_uint64(snapelem, &guid));
557                         if ((fn = malloc(sizeof (fsavl_node_t))) == NULL) {
558                                 fsavl_destroy(fsavl);
559                                 return (NULL);
560                         }
561                         fn->fn_nvfs = nvfs;
562                         fn->fn_snapname = nvpair_name(snapelem);
563                         fn->fn_guid = guid;
564
565                         /*
566                          * Note: if there are multiple snaps with the
567                          * same GUID, we ignore all but one.
568                          */
569                         if (avl_find(fsavl, fn, NULL) == NULL)
570                                 avl_add(fsavl, fn);
571                         else
572                                 free(fn);
573                 }
574         }
575
576         return (fsavl);
577 }
578
579 /*
580  * Routines for dealing with the giant nvlist of fs-nvlists, etc.
581  */
582 typedef struct send_data {
583         /*
584          * assigned inside every recursive call,
585          * restored from *_save on return:
586          *
587          * guid of fromsnap snapshot in parent dataset
588          * txg of fromsnap snapshot in current dataset
589          * txg of tosnap snapshot in current dataset
590          */
591
592         uint64_t parent_fromsnap_guid;
593         uint64_t fromsnap_txg;
594         uint64_t tosnap_txg;
595
596         /* the nvlists get accumulated during depth-first traversal */
597         nvlist_t *parent_snaps;
598         nvlist_t *fss;
599         nvlist_t *snapprops;
600
601         /* send-receive configuration, does not change during traversal */
602         const char *fsname;
603         const char *fromsnap;
604         const char *tosnap;
605         boolean_t recursive;
606         boolean_t verbose;
607
608         /*
609          * The header nvlist is of the following format:
610          * {
611          *   "tosnap" -> string
612          *   "fromsnap" -> string (if incremental)
613          *   "fss" -> {
614          *      id -> {
615          *
616          *       "name" -> string (full name; for debugging)
617          *       "parentfromsnap" -> number (guid of fromsnap in parent)
618          *
619          *       "props" -> { name -> value (only if set here) }
620          *       "snaps" -> { name (lastname) -> number (guid) }
621          *       "snapprops" -> { name (lastname) -> { name -> value } }
622          *
623          *       "origin" -> number (guid) (if clone)
624          *       "sent" -> boolean (not on-disk)
625          *      }
626          *   }
627          * }
628          *
629          */
630 } send_data_t;
631
632 static void send_iterate_prop(zfs_handle_t *zhp, nvlist_t *nv);
633
634 static int
635 send_iterate_snap(zfs_handle_t *zhp, void *arg)
636 {
637         send_data_t *sd = arg;
638         uint64_t guid = zhp->zfs_dmustats.dds_guid;
639         uint64_t txg = zhp->zfs_dmustats.dds_creation_txg;
640         char *snapname;
641         nvlist_t *nv;
642
643         snapname = strrchr(zhp->zfs_name, '@')+1;
644
645         if (sd->tosnap_txg != 0 && txg > sd->tosnap_txg) {
646                 if (sd->verbose) {
647                         (void) fprintf(stderr, dgettext(TEXT_DOMAIN,
648                             "skipping snapshot %s because it was created "
649                             "after the destination snapshot (%s)\n"),
650                             zhp->zfs_name, sd->tosnap);
651                 }
652                 zfs_close(zhp);
653                 return (0);
654         }
655
656         VERIFY(0 == nvlist_add_uint64(sd->parent_snaps, snapname, guid));
657         /*
658          * NB: if there is no fromsnap here (it's a newly created fs in
659          * an incremental replication), we will substitute the tosnap.
660          */
661         if ((sd->fromsnap && strcmp(snapname, sd->fromsnap) == 0) ||
662             (sd->parent_fromsnap_guid == 0 && sd->tosnap &&
663             strcmp(snapname, sd->tosnap) == 0)) {
664                 sd->parent_fromsnap_guid = guid;
665         }
666
667         VERIFY(0 == nvlist_alloc(&nv, NV_UNIQUE_NAME, 0));
668         send_iterate_prop(zhp, nv);
669         VERIFY(0 == nvlist_add_nvlist(sd->snapprops, snapname, nv));
670         nvlist_free(nv);
671
672         zfs_close(zhp);
673         return (0);
674 }
675
676 static void
677 send_iterate_prop(zfs_handle_t *zhp, nvlist_t *nv)
678 {
679         nvpair_t *elem = NULL;
680
681         while ((elem = nvlist_next_nvpair(zhp->zfs_props, elem)) != NULL) {
682                 char *propname = nvpair_name(elem);
683                 zfs_prop_t prop = zfs_name_to_prop(propname);
684                 nvlist_t *propnv;
685
686                 if (!zfs_prop_user(propname)) {
687                         /*
688                          * Realistically, this should never happen.  However,
689                          * we want the ability to add DSL properties without
690                          * needing to make incompatible version changes.  We
691                          * need to ignore unknown properties to allow older
692                          * software to still send datasets containing these
693                          * properties, with the unknown properties elided.
694                          */
695                         if (prop == ZPROP_INVAL)
696                                 continue;
697
698                         if (zfs_prop_readonly(prop))
699                                 continue;
700                 }
701
702                 verify(nvpair_value_nvlist(elem, &propnv) == 0);
703                 if (prop == ZFS_PROP_QUOTA || prop == ZFS_PROP_RESERVATION ||
704                     prop == ZFS_PROP_REFQUOTA ||
705                     prop == ZFS_PROP_REFRESERVATION) {
706                         char *source;
707                         uint64_t value;
708                         verify(nvlist_lookup_uint64(propnv,
709                             ZPROP_VALUE, &value) == 0);
710                         if (zhp->zfs_type == ZFS_TYPE_SNAPSHOT)
711                                 continue;
712                         /*
713                          * May have no source before SPA_VERSION_RECVD_PROPS,
714                          * but is still modifiable.
715                          */
716                         if (nvlist_lookup_string(propnv,
717                             ZPROP_SOURCE, &source) == 0) {
718                                 if ((strcmp(source, zhp->zfs_name) != 0) &&
719                                     (strcmp(source,
720                                     ZPROP_SOURCE_VAL_RECVD) != 0))
721                                         continue;
722                         }
723                 } else {
724                         char *source;
725                         if (nvlist_lookup_string(propnv,
726                             ZPROP_SOURCE, &source) != 0)
727                                 continue;
728                         if ((strcmp(source, zhp->zfs_name) != 0) &&
729                             (strcmp(source, ZPROP_SOURCE_VAL_RECVD) != 0))
730                                 continue;
731                 }
732
733                 if (zfs_prop_user(propname) ||
734                     zfs_prop_get_type(prop) == PROP_TYPE_STRING) {
735                         char *value;
736                         verify(nvlist_lookup_string(propnv,
737                             ZPROP_VALUE, &value) == 0);
738                         VERIFY(0 == nvlist_add_string(nv, propname, value));
739                 } else {
740                         uint64_t value;
741                         verify(nvlist_lookup_uint64(propnv,
742                             ZPROP_VALUE, &value) == 0);
743                         VERIFY(0 == nvlist_add_uint64(nv, propname, value));
744                 }
745         }
746 }
747
748 /*
749  * returns snapshot creation txg
750  * and returns 0 if the snapshot does not exist
751  */
752 static uint64_t
753 get_snap_txg(libzfs_handle_t *hdl, const char *fs, const char *snap)
754 {
755         char name[ZFS_MAX_DATASET_NAME_LEN];
756         uint64_t txg = 0;
757
758         if (fs == NULL || fs[0] == '\0' || snap == NULL || snap[0] == '\0')
759                 return (txg);
760
761         (void) snprintf(name, sizeof (name), "%s@%s", fs, snap);
762         if (zfs_dataset_exists(hdl, name, ZFS_TYPE_SNAPSHOT)) {
763                 zfs_handle_t *zhp = zfs_open(hdl, name, ZFS_TYPE_SNAPSHOT);
764                 if (zhp != NULL) {
765                         txg = zfs_prop_get_int(zhp, ZFS_PROP_CREATETXG);
766                         zfs_close(zhp);
767                 }
768         }
769
770         return (txg);
771 }
772
773 /*
774  * recursively generate nvlists describing datasets.  See comment
775  * for the data structure send_data_t above for description of contents
776  * of the nvlist.
777  */
778 static int
779 send_iterate_fs(zfs_handle_t *zhp, void *arg)
780 {
781         send_data_t *sd = arg;
782         nvlist_t *nvfs, *nv;
783         int rv = 0;
784         uint64_t parent_fromsnap_guid_save = sd->parent_fromsnap_guid;
785         uint64_t fromsnap_txg_save = sd->fromsnap_txg;
786         uint64_t tosnap_txg_save = sd->tosnap_txg;
787         uint64_t txg = zhp->zfs_dmustats.dds_creation_txg;
788         uint64_t guid = zhp->zfs_dmustats.dds_guid;
789         uint64_t fromsnap_txg, tosnap_txg;
790         char guidstring[64];
791
792         fromsnap_txg = get_snap_txg(zhp->zfs_hdl, zhp->zfs_name, sd->fromsnap);
793         if (fromsnap_txg != 0)
794                 sd->fromsnap_txg = fromsnap_txg;
795
796         tosnap_txg = get_snap_txg(zhp->zfs_hdl, zhp->zfs_name, sd->tosnap);
797         if (tosnap_txg != 0)
798                 sd->tosnap_txg = tosnap_txg;
799
800         /*
801          * on the send side, if the current dataset does not have tosnap,
802          * perform two additional checks:
803          *
804          * - skip sending the current dataset if it was created later than
805          *   the parent tosnap
806          * - return error if the current dataset was created earlier than
807          *   the parent tosnap
808          */
809         if (sd->tosnap != NULL && tosnap_txg == 0) {
810                 if (sd->tosnap_txg != 0 && txg > sd->tosnap_txg) {
811                         if (sd->verbose) {
812                                 (void) fprintf(stderr, dgettext(TEXT_DOMAIN,
813                                     "skipping dataset %s: snapshot %s does "
814                                     "not exist\n"), zhp->zfs_name, sd->tosnap);
815                         }
816                 } else {
817                         (void) fprintf(stderr, dgettext(TEXT_DOMAIN,
818                             "cannot send %s@%s%s: snapshot %s@%s does not "
819                             "exist\n"), sd->fsname, sd->tosnap, sd->recursive ?
820                             dgettext(TEXT_DOMAIN, " recursively") : "",
821                             zhp->zfs_name, sd->tosnap);
822                         rv = -1;
823                 }
824                 goto out;
825         }
826
827         VERIFY(0 == nvlist_alloc(&nvfs, NV_UNIQUE_NAME, 0));
828         VERIFY(0 == nvlist_add_string(nvfs, "name", zhp->zfs_name));
829         VERIFY(0 == nvlist_add_uint64(nvfs, "parentfromsnap",
830             sd->parent_fromsnap_guid));
831
832         if (zhp->zfs_dmustats.dds_origin[0]) {
833                 zfs_handle_t *origin = zfs_open(zhp->zfs_hdl,
834                     zhp->zfs_dmustats.dds_origin, ZFS_TYPE_SNAPSHOT);
835                 if (origin == NULL) {
836                         rv = -1;
837                         goto out;
838                 }
839                 VERIFY(0 == nvlist_add_uint64(nvfs, "origin",
840                     origin->zfs_dmustats.dds_guid));
841         }
842
843         /* iterate over props */
844         VERIFY(0 == nvlist_alloc(&nv, NV_UNIQUE_NAME, 0));
845         send_iterate_prop(zhp, nv);
846         VERIFY(0 == nvlist_add_nvlist(nvfs, "props", nv));
847         nvlist_free(nv);
848
849         /* iterate over snaps, and set sd->parent_fromsnap_guid */
850         sd->parent_fromsnap_guid = 0;
851         VERIFY(0 == nvlist_alloc(&sd->parent_snaps, NV_UNIQUE_NAME, 0));
852         VERIFY(0 == nvlist_alloc(&sd->snapprops, NV_UNIQUE_NAME, 0));
853         (void) zfs_iter_snapshots_sorted(zhp, send_iterate_snap, sd);
854         VERIFY(0 == nvlist_add_nvlist(nvfs, "snaps", sd->parent_snaps));
855         VERIFY(0 == nvlist_add_nvlist(nvfs, "snapprops", sd->snapprops));
856         nvlist_free(sd->parent_snaps);
857         nvlist_free(sd->snapprops);
858
859         /* add this fs to nvlist */
860         (void) snprintf(guidstring, sizeof (guidstring),
861             "0x%llx", (longlong_t)guid);
862         VERIFY(0 == nvlist_add_nvlist(sd->fss, guidstring, nvfs));
863         nvlist_free(nvfs);
864
865         /* iterate over children */
866         if (sd->recursive)
867                 rv = zfs_iter_filesystems(zhp, send_iterate_fs, sd);
868
869 out:
870         sd->parent_fromsnap_guid = parent_fromsnap_guid_save;
871         sd->fromsnap_txg = fromsnap_txg_save;
872         sd->tosnap_txg = tosnap_txg_save;
873
874         zfs_close(zhp);
875         return (rv);
876 }
877
878 static int
879 gather_nvlist(libzfs_handle_t *hdl, const char *fsname, const char *fromsnap,
880     const char *tosnap, boolean_t recursive, boolean_t verbose,
881     nvlist_t **nvlp, avl_tree_t **avlp)
882 {
883         zfs_handle_t *zhp;
884         send_data_t sd = { 0 };
885         int error;
886
887         zhp = zfs_open(hdl, fsname, ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME);
888         if (zhp == NULL)
889                 return (EZFS_BADTYPE);
890
891         VERIFY(0 == nvlist_alloc(&sd.fss, NV_UNIQUE_NAME, 0));
892         sd.fsname = fsname;
893         sd.fromsnap = fromsnap;
894         sd.tosnap = tosnap;
895         sd.recursive = recursive;
896         sd.verbose = verbose;
897
898         if ((error = send_iterate_fs(zhp, &sd)) != 0) {
899                 nvlist_free(sd.fss);
900                 if (avlp != NULL)
901                         *avlp = NULL;
902                 *nvlp = NULL;
903                 return (error);
904         }
905
906         if (avlp != NULL && (*avlp = fsavl_create(sd.fss)) == NULL) {
907                 nvlist_free(sd.fss);
908                 *nvlp = NULL;
909                 return (EZFS_NOMEM);
910         }
911
912         *nvlp = sd.fss;
913         return (0);
914 }
915
916 /*
917  * Routines specific to "zfs send"
918  */
919 typedef struct send_dump_data {
920         /* these are all just the short snapname (the part after the @) */
921         const char *fromsnap;
922         const char *tosnap;
923         char prevsnap[ZFS_MAX_DATASET_NAME_LEN];
924         uint64_t prevsnap_obj;
925         boolean_t seenfrom, seento, replicate, doall, fromorigin;
926         boolean_t verbose, dryrun, parsable, progress, embed_data, std_out;
927         boolean_t large_block;
928         int outfd;
929         boolean_t err;
930         nvlist_t *fss;
931         nvlist_t *snapholds;
932         avl_tree_t *fsavl;
933         snapfilter_cb_t *filter_cb;
934         void *filter_cb_arg;
935         nvlist_t *debugnv;
936         char holdtag[ZFS_MAX_DATASET_NAME_LEN];
937         int cleanup_fd;
938         uint64_t size;
939 } send_dump_data_t;
940
941 static int
942 estimate_ioctl(zfs_handle_t *zhp, uint64_t fromsnap_obj,
943     boolean_t fromorigin, uint64_t *sizep)
944 {
945         zfs_cmd_t zc = { 0 };
946         libzfs_handle_t *hdl = zhp->zfs_hdl;
947
948         assert(zhp->zfs_type == ZFS_TYPE_SNAPSHOT);
949         assert(fromsnap_obj == 0 || !fromorigin);
950
951         (void) strlcpy(zc.zc_name, zhp->zfs_name, sizeof (zc.zc_name));
952         zc.zc_obj = fromorigin;
953         zc.zc_sendobj = zfs_prop_get_int(zhp, ZFS_PROP_OBJSETID);
954         zc.zc_fromobj = fromsnap_obj;
955         zc.zc_guid = 1;  /* estimate flag */
956
957         if (zfs_ioctl(zhp->zfs_hdl, ZFS_IOC_SEND, &zc) != 0) {
958                 char errbuf[1024];
959                 (void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
960                     "warning: cannot estimate space for '%s'"), zhp->zfs_name);
961
962                 switch (errno) {
963                 case EXDEV:
964                         zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
965                             "not an earlier snapshot from the same fs"));
966                         return (zfs_error(hdl, EZFS_CROSSTARGET, errbuf));
967
968                 case ENOENT:
969                         if (zfs_dataset_exists(hdl, zc.zc_name,
970                             ZFS_TYPE_SNAPSHOT)) {
971                                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
972                                     "incremental source (@%s) does not exist"),
973                                     zc.zc_value);
974                         }
975                         return (zfs_error(hdl, EZFS_NOENT, errbuf));
976
977                 case EDQUOT:
978                 case EFBIG:
979                 case EIO:
980                 case ENOLINK:
981                 case ENOSPC:
982                 case ENXIO:
983                 case EPIPE:
984                 case ERANGE:
985                 case EFAULT:
986                 case EROFS:
987                         zfs_error_aux(hdl, strerror(errno));
988                         return (zfs_error(hdl, EZFS_BADBACKUP, errbuf));
989
990                 default:
991                         return (zfs_standard_error(hdl, errno, errbuf));
992                 }
993         }
994
995         *sizep = zc.zc_objset_type;
996
997         return (0);
998 }
999
1000 /*
1001  * Dumps a backup of the given snapshot (incremental from fromsnap if it's not
1002  * NULL) to the file descriptor specified by outfd.
1003  */
1004 static int
1005 dump_ioctl(zfs_handle_t *zhp, const char *fromsnap, uint64_t fromsnap_obj,
1006     boolean_t fromorigin, int outfd, enum lzc_send_flags flags,
1007     nvlist_t *debugnv)
1008 {
1009         zfs_cmd_t zc = { 0 };
1010         libzfs_handle_t *hdl = zhp->zfs_hdl;
1011         nvlist_t *thisdbg;
1012
1013         assert(zhp->zfs_type == ZFS_TYPE_SNAPSHOT);
1014         assert(fromsnap_obj == 0 || !fromorigin);
1015
1016         (void) strlcpy(zc.zc_name, zhp->zfs_name, sizeof (zc.zc_name));
1017         zc.zc_cookie = outfd;
1018         zc.zc_obj = fromorigin;
1019         zc.zc_sendobj = zfs_prop_get_int(zhp, ZFS_PROP_OBJSETID);
1020         zc.zc_fromobj = fromsnap_obj;
1021         zc.zc_flags = flags;
1022
1023         VERIFY(0 == nvlist_alloc(&thisdbg, NV_UNIQUE_NAME, 0));
1024         if (fromsnap && fromsnap[0] != '\0') {
1025                 VERIFY(0 == nvlist_add_string(thisdbg,
1026                     "fromsnap", fromsnap));
1027         }
1028
1029         if (zfs_ioctl(zhp->zfs_hdl, ZFS_IOC_SEND, &zc) != 0) {
1030                 char errbuf[1024];
1031                 (void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
1032                     "warning: cannot send '%s'"), zhp->zfs_name);
1033
1034                 VERIFY(0 == nvlist_add_uint64(thisdbg, "error", errno));
1035                 if (debugnv) {
1036                         VERIFY(0 == nvlist_add_nvlist(debugnv,
1037                             zhp->zfs_name, thisdbg));
1038                 }
1039                 nvlist_free(thisdbg);
1040
1041                 switch (errno) {
1042                 case EXDEV:
1043                         zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1044                             "not an earlier snapshot from the same fs"));
1045                         return (zfs_error(hdl, EZFS_CROSSTARGET, errbuf));
1046
1047                 case ENOENT:
1048                         if (zfs_dataset_exists(hdl, zc.zc_name,
1049                             ZFS_TYPE_SNAPSHOT)) {
1050                                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1051                                     "incremental source (@%s) does not exist"),
1052                                     zc.zc_value);
1053                         }
1054                         return (zfs_error(hdl, EZFS_NOENT, errbuf));
1055
1056                 case EDQUOT:
1057                 case EFBIG:
1058                 case EIO:
1059                 case ENOLINK:
1060                 case ENOSPC:
1061 #ifdef illumos
1062                 case ENOSTR:
1063 #endif
1064                 case ENXIO:
1065                 case EPIPE:
1066                 case ERANGE:
1067                 case EFAULT:
1068                 case EROFS:
1069                         zfs_error_aux(hdl, strerror(errno));
1070                         return (zfs_error(hdl, EZFS_BADBACKUP, errbuf));
1071
1072                 default:
1073                         return (zfs_standard_error(hdl, errno, errbuf));
1074                 }
1075         }
1076
1077         if (debugnv)
1078                 VERIFY(0 == nvlist_add_nvlist(debugnv, zhp->zfs_name, thisdbg));
1079         nvlist_free(thisdbg);
1080
1081         return (0);
1082 }
1083
1084 static void
1085 gather_holds(zfs_handle_t *zhp, send_dump_data_t *sdd)
1086 {
1087         assert(zhp->zfs_type == ZFS_TYPE_SNAPSHOT);
1088
1089         /*
1090          * zfs_send() only sets snapholds for sends that need them,
1091          * e.g. replication and doall.
1092          */
1093         if (sdd->snapholds == NULL)
1094                 return;
1095
1096         fnvlist_add_string(sdd->snapholds, zhp->zfs_name, sdd->holdtag);
1097 }
1098
1099 static void *
1100 send_progress_thread(void *arg)
1101 {
1102         progress_arg_t *pa = arg;
1103         zfs_cmd_t zc = { 0 };
1104         zfs_handle_t *zhp = pa->pa_zhp;
1105         libzfs_handle_t *hdl = zhp->zfs_hdl;
1106         unsigned long long bytes;
1107         char buf[16];
1108         time_t t;
1109         struct tm *tm;
1110
1111         (void) strlcpy(zc.zc_name, zhp->zfs_name, sizeof (zc.zc_name));
1112
1113         if (!pa->pa_parsable)
1114                 (void) fprintf(stderr, "TIME        SENT   SNAPSHOT\n");
1115
1116         /*
1117          * Print the progress from ZFS_IOC_SEND_PROGRESS every second.
1118          */
1119         for (;;) {
1120                 (void) sleep(1);
1121
1122                 zc.zc_cookie = pa->pa_fd;
1123                 if (zfs_ioctl(hdl, ZFS_IOC_SEND_PROGRESS, &zc) != 0)
1124                         return ((void *)-1);
1125
1126                 (void) time(&t);
1127                 tm = localtime(&t);
1128                 bytes = zc.zc_cookie;
1129
1130                 if (pa->pa_parsable) {
1131                         (void) fprintf(stderr, "%02d:%02d:%02d\t%llu\t%s\n",
1132                             tm->tm_hour, tm->tm_min, tm->tm_sec,
1133                             bytes, zhp->zfs_name);
1134                 } else {
1135                         zfs_nicenum(bytes, buf, sizeof (buf));
1136                         (void) fprintf(stderr, "%02d:%02d:%02d   %5s   %s\n",
1137                             tm->tm_hour, tm->tm_min, tm->tm_sec,
1138                             buf, zhp->zfs_name);
1139                 }
1140         }
1141 }
1142
1143 static void
1144 send_print_verbose(FILE *fout, const char *tosnap, const char *fromsnap,
1145     uint64_t size, boolean_t parsable)
1146 {
1147         if (parsable) {
1148                 if (fromsnap != NULL) {
1149                         (void) fprintf(fout, "incremental\t%s\t%s",
1150                             fromsnap, tosnap);
1151                 } else {
1152                         (void) fprintf(fout, "full\t%s",
1153                             tosnap);
1154                 }
1155         } else {
1156                 if (fromsnap != NULL) {
1157                         if (strchr(fromsnap, '@') == NULL &&
1158                             strchr(fromsnap, '#') == NULL) {
1159                                 (void) fprintf(fout, dgettext(TEXT_DOMAIN,
1160                                     "send from @%s to %s"),
1161                                     fromsnap, tosnap);
1162                         } else {
1163                                 (void) fprintf(fout, dgettext(TEXT_DOMAIN,
1164                                     "send from %s to %s"),
1165                                     fromsnap, tosnap);
1166                         }
1167                 } else {
1168                         (void) fprintf(fout, dgettext(TEXT_DOMAIN,
1169                             "full send of %s"),
1170                             tosnap);
1171                 }
1172         }
1173
1174         if (size != 0) {
1175                 if (parsable) {
1176                         (void) fprintf(fout, "\t%llu",
1177                             (longlong_t)size);
1178                 } else {
1179                         char buf[16];
1180                         zfs_nicenum(size, buf, sizeof (buf));
1181                         (void) fprintf(fout, dgettext(TEXT_DOMAIN,
1182                             " estimated size is %s"), buf);
1183                 }
1184         }
1185         (void) fprintf(fout, "\n");
1186 }
1187
1188 static int
1189 dump_snapshot(zfs_handle_t *zhp, void *arg)
1190 {
1191         send_dump_data_t *sdd = arg;
1192         progress_arg_t pa = { 0 };
1193         pthread_t tid;
1194         char *thissnap;
1195         int err;
1196         boolean_t isfromsnap, istosnap, fromorigin;
1197         boolean_t exclude = B_FALSE;
1198         FILE *fout = sdd->std_out ? stdout : stderr;
1199
1200         err = 0;
1201         thissnap = strchr(zhp->zfs_name, '@') + 1;
1202         isfromsnap = (sdd->fromsnap != NULL &&
1203             strcmp(sdd->fromsnap, thissnap) == 0);
1204
1205         if (!sdd->seenfrom && isfromsnap) {
1206                 gather_holds(zhp, sdd);
1207                 sdd->seenfrom = B_TRUE;
1208                 (void) strcpy(sdd->prevsnap, thissnap);
1209                 sdd->prevsnap_obj = zfs_prop_get_int(zhp, ZFS_PROP_OBJSETID);
1210                 zfs_close(zhp);
1211                 return (0);
1212         }
1213
1214         if (sdd->seento || !sdd->seenfrom) {
1215                 zfs_close(zhp);
1216                 return (0);
1217         }
1218
1219         istosnap = (strcmp(sdd->tosnap, thissnap) == 0);
1220         if (istosnap)
1221                 sdd->seento = B_TRUE;
1222
1223         if (!sdd->doall && !isfromsnap && !istosnap) {
1224                 if (sdd->replicate) {
1225                         char *snapname;
1226                         nvlist_t *snapprops;
1227                         /*
1228                          * Filter out all intermediate snapshots except origin
1229                          * snapshots needed to replicate clones.
1230                          */
1231                         nvlist_t *nvfs = fsavl_find(sdd->fsavl,
1232                             zhp->zfs_dmustats.dds_guid, &snapname);
1233
1234                         VERIFY(0 == nvlist_lookup_nvlist(nvfs,
1235                             "snapprops", &snapprops));
1236                         VERIFY(0 == nvlist_lookup_nvlist(snapprops,
1237                             thissnap, &snapprops));
1238                         exclude = !nvlist_exists(snapprops, "is_clone_origin");
1239                 } else {
1240                         exclude = B_TRUE;
1241                 }
1242         }
1243
1244         /*
1245          * If a filter function exists, call it to determine whether
1246          * this snapshot will be sent.
1247          */
1248         if (exclude || (sdd->filter_cb != NULL &&
1249             sdd->filter_cb(zhp, sdd->filter_cb_arg) == B_FALSE)) {
1250                 /*
1251                  * This snapshot is filtered out.  Don't send it, and don't
1252                  * set prevsnap_obj, so it will be as if this snapshot didn't
1253                  * exist, and the next accepted snapshot will be sent as
1254                  * an incremental from the last accepted one, or as the
1255                  * first (and full) snapshot in the case of a replication,
1256                  * non-incremental send.
1257                  */
1258                 zfs_close(zhp);
1259                 return (0);
1260         }
1261
1262         gather_holds(zhp, sdd);
1263         fromorigin = sdd->prevsnap[0] == '\0' &&
1264             (sdd->fromorigin || sdd->replicate);
1265
1266         if (sdd->verbose) {
1267                 uint64_t size = 0;
1268                 (void) estimate_ioctl(zhp, sdd->prevsnap_obj,
1269                     fromorigin, &size);
1270
1271                 send_print_verbose(fout, zhp->zfs_name,
1272                     sdd->prevsnap[0] ? sdd->prevsnap : NULL,
1273                     size, sdd->parsable);
1274                 sdd->size += size;
1275         }
1276
1277         if (!sdd->dryrun) {
1278                 /*
1279                  * If progress reporting is requested, spawn a new thread to
1280                  * poll ZFS_IOC_SEND_PROGRESS at a regular interval.
1281                  */
1282                 if (sdd->progress) {
1283                         pa.pa_zhp = zhp;
1284                         pa.pa_fd = sdd->outfd;
1285                         pa.pa_parsable = sdd->parsable;
1286
1287                         if ((err = pthread_create(&tid, NULL,
1288                             send_progress_thread, &pa)) != 0) {
1289                                 zfs_close(zhp);
1290                                 return (err);
1291                         }
1292                 }
1293
1294                 enum lzc_send_flags flags = 0;
1295                 if (sdd->large_block)
1296                         flags |= LZC_SEND_FLAG_LARGE_BLOCK;
1297                 if (sdd->embed_data)
1298                         flags |= LZC_SEND_FLAG_EMBED_DATA;
1299
1300                 err = dump_ioctl(zhp, sdd->prevsnap, sdd->prevsnap_obj,
1301                     fromorigin, sdd->outfd, flags, sdd->debugnv);
1302
1303                 if (sdd->progress) {
1304                         (void) pthread_cancel(tid);
1305                         (void) pthread_join(tid, NULL);
1306                 }
1307         }
1308
1309         (void) strcpy(sdd->prevsnap, thissnap);
1310         sdd->prevsnap_obj = zfs_prop_get_int(zhp, ZFS_PROP_OBJSETID);
1311         zfs_close(zhp);
1312         return (err);
1313 }
1314
1315 static int
1316 dump_filesystem(zfs_handle_t *zhp, void *arg)
1317 {
1318         int rv = 0;
1319         send_dump_data_t *sdd = arg;
1320         boolean_t missingfrom = B_FALSE;
1321         zfs_cmd_t zc = { 0 };
1322
1323         (void) snprintf(zc.zc_name, sizeof (zc.zc_name), "%s@%s",
1324             zhp->zfs_name, sdd->tosnap);
1325         if (ioctl(zhp->zfs_hdl->libzfs_fd, ZFS_IOC_OBJSET_STATS, &zc) != 0) {
1326                 (void) fprintf(stderr, dgettext(TEXT_DOMAIN,
1327                     "WARNING: could not send %s@%s: does not exist\n"),
1328                     zhp->zfs_name, sdd->tosnap);
1329                 sdd->err = B_TRUE;
1330                 return (0);
1331         }
1332
1333         if (sdd->replicate && sdd->fromsnap) {
1334                 /*
1335                  * If this fs does not have fromsnap, and we're doing
1336                  * recursive, we need to send a full stream from the
1337                  * beginning (or an incremental from the origin if this
1338                  * is a clone).  If we're doing non-recursive, then let
1339                  * them get the error.
1340                  */
1341                 (void) snprintf(zc.zc_name, sizeof (zc.zc_name), "%s@%s",
1342                     zhp->zfs_name, sdd->fromsnap);
1343                 if (ioctl(zhp->zfs_hdl->libzfs_fd,
1344                     ZFS_IOC_OBJSET_STATS, &zc) != 0) {
1345                         missingfrom = B_TRUE;
1346                 }
1347         }
1348
1349         sdd->seenfrom = sdd->seento = sdd->prevsnap[0] = 0;
1350         sdd->prevsnap_obj = 0;
1351         if (sdd->fromsnap == NULL || missingfrom)
1352                 sdd->seenfrom = B_TRUE;
1353
1354         rv = zfs_iter_snapshots_sorted(zhp, dump_snapshot, arg);
1355         if (!sdd->seenfrom) {
1356                 (void) fprintf(stderr, dgettext(TEXT_DOMAIN,
1357                     "WARNING: could not send %s@%s:\n"
1358                     "incremental source (%s@%s) does not exist\n"),
1359                     zhp->zfs_name, sdd->tosnap,
1360                     zhp->zfs_name, sdd->fromsnap);
1361                 sdd->err = B_TRUE;
1362         } else if (!sdd->seento) {
1363                 if (sdd->fromsnap) {
1364                         (void) fprintf(stderr, dgettext(TEXT_DOMAIN,
1365                             "WARNING: could not send %s@%s:\n"
1366                             "incremental source (%s@%s) "
1367                             "is not earlier than it\n"),
1368                             zhp->zfs_name, sdd->tosnap,
1369                             zhp->zfs_name, sdd->fromsnap);
1370                 } else {
1371                         (void) fprintf(stderr, dgettext(TEXT_DOMAIN,
1372                             "WARNING: "
1373                             "could not send %s@%s: does not exist\n"),
1374                             zhp->zfs_name, sdd->tosnap);
1375                 }
1376                 sdd->err = B_TRUE;
1377         }
1378
1379         return (rv);
1380 }
1381
1382 static int
1383 dump_filesystems(zfs_handle_t *rzhp, void *arg)
1384 {
1385         send_dump_data_t *sdd = arg;
1386         nvpair_t *fspair;
1387         boolean_t needagain, progress;
1388
1389         if (!sdd->replicate)
1390                 return (dump_filesystem(rzhp, sdd));
1391
1392         /* Mark the clone origin snapshots. */
1393         for (fspair = nvlist_next_nvpair(sdd->fss, NULL); fspair;
1394             fspair = nvlist_next_nvpair(sdd->fss, fspair)) {
1395                 nvlist_t *nvfs;
1396                 uint64_t origin_guid = 0;
1397
1398                 VERIFY(0 == nvpair_value_nvlist(fspair, &nvfs));
1399                 (void) nvlist_lookup_uint64(nvfs, "origin", &origin_guid);
1400                 if (origin_guid != 0) {
1401                         char *snapname;
1402                         nvlist_t *origin_nv = fsavl_find(sdd->fsavl,
1403                             origin_guid, &snapname);
1404                         if (origin_nv != NULL) {
1405                                 nvlist_t *snapprops;
1406                                 VERIFY(0 == nvlist_lookup_nvlist(origin_nv,
1407                                     "snapprops", &snapprops));
1408                                 VERIFY(0 == nvlist_lookup_nvlist(snapprops,
1409                                     snapname, &snapprops));
1410                                 VERIFY(0 == nvlist_add_boolean(
1411                                     snapprops, "is_clone_origin"));
1412                         }
1413                 }
1414         }
1415 again:
1416         needagain = progress = B_FALSE;
1417         for (fspair = nvlist_next_nvpair(sdd->fss, NULL); fspair;
1418             fspair = nvlist_next_nvpair(sdd->fss, fspair)) {
1419                 nvlist_t *fslist, *parent_nv;
1420                 char *fsname;
1421                 zfs_handle_t *zhp;
1422                 int err;
1423                 uint64_t origin_guid = 0;
1424                 uint64_t parent_guid = 0;
1425
1426                 VERIFY(nvpair_value_nvlist(fspair, &fslist) == 0);
1427                 if (nvlist_lookup_boolean(fslist, "sent") == 0)
1428                         continue;
1429
1430                 VERIFY(nvlist_lookup_string(fslist, "name", &fsname) == 0);
1431                 (void) nvlist_lookup_uint64(fslist, "origin", &origin_guid);
1432                 (void) nvlist_lookup_uint64(fslist, "parentfromsnap",
1433                     &parent_guid);
1434
1435                 if (parent_guid != 0) {
1436                         parent_nv = fsavl_find(sdd->fsavl, parent_guid, NULL);
1437                         if (!nvlist_exists(parent_nv, "sent")) {
1438                                 /* parent has not been sent; skip this one */
1439                                 needagain = B_TRUE;
1440                                 continue;
1441                         }
1442                 }
1443
1444                 if (origin_guid != 0) {
1445                         nvlist_t *origin_nv = fsavl_find(sdd->fsavl,
1446                             origin_guid, NULL);
1447                         if (origin_nv != NULL &&
1448                             !nvlist_exists(origin_nv, "sent")) {
1449                                 /*
1450                                  * origin has not been sent yet;
1451                                  * skip this clone.
1452                                  */
1453                                 needagain = B_TRUE;
1454                                 continue;
1455                         }
1456                 }
1457
1458                 zhp = zfs_open(rzhp->zfs_hdl, fsname, ZFS_TYPE_DATASET);
1459                 if (zhp == NULL)
1460                         return (-1);
1461                 err = dump_filesystem(zhp, sdd);
1462                 VERIFY(nvlist_add_boolean(fslist, "sent") == 0);
1463                 progress = B_TRUE;
1464                 zfs_close(zhp);
1465                 if (err)
1466                         return (err);
1467         }
1468         if (needagain) {
1469                 assert(progress);
1470                 goto again;
1471         }
1472
1473         /* clean out the sent flags in case we reuse this fss */
1474         for (fspair = nvlist_next_nvpair(sdd->fss, NULL); fspair;
1475             fspair = nvlist_next_nvpair(sdd->fss, fspair)) {
1476                 nvlist_t *fslist;
1477
1478                 VERIFY(nvpair_value_nvlist(fspair, &fslist) == 0);
1479                 (void) nvlist_remove_all(fslist, "sent");
1480         }
1481
1482         return (0);
1483 }
1484
1485 nvlist_t *
1486 zfs_send_resume_token_to_nvlist(libzfs_handle_t *hdl, const char *token)
1487 {
1488         unsigned int version;
1489         int nread;
1490         unsigned long long checksum, packed_len;
1491
1492         /*
1493          * Decode token header, which is:
1494          *   <token version>-<checksum of payload>-<uncompressed payload length>
1495          * Note that the only supported token version is 1.
1496          */
1497         nread = sscanf(token, "%u-%llx-%llx-",
1498             &version, &checksum, &packed_len);
1499         if (nread != 3) {
1500                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1501                     "resume token is corrupt (invalid format)"));
1502                 return (NULL);
1503         }
1504
1505         if (version != ZFS_SEND_RESUME_TOKEN_VERSION) {
1506                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1507                     "resume token is corrupt (invalid version %u)"),
1508                     version);
1509                 return (NULL);
1510         }
1511
1512         /* convert hexadecimal representation to binary */
1513         token = strrchr(token, '-') + 1;
1514         int len = strlen(token) / 2;
1515         unsigned char *compressed = zfs_alloc(hdl, len);
1516         for (int i = 0; i < len; i++) {
1517                 nread = sscanf(token + i * 2, "%2hhx", compressed + i);
1518                 if (nread != 1) {
1519                         free(compressed);
1520                         zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1521                             "resume token is corrupt "
1522                             "(payload is not hex-encoded)"));
1523                         return (NULL);
1524                 }
1525         }
1526
1527         /* verify checksum */
1528         zio_cksum_t cksum;
1529         fletcher_4_native(compressed, len, NULL, &cksum);
1530         if (cksum.zc_word[0] != checksum) {
1531                 free(compressed);
1532                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1533                     "resume token is corrupt (incorrect checksum)"));
1534                 return (NULL);
1535         }
1536
1537         /* uncompress */
1538         void *packed = zfs_alloc(hdl, packed_len);
1539         uLongf packed_len_long = packed_len;
1540         if (uncompress(packed, &packed_len_long, compressed, len) != Z_OK ||
1541             packed_len_long != packed_len) {
1542                 free(packed);
1543                 free(compressed);
1544                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1545                     "resume token is corrupt (decompression failed)"));
1546                 return (NULL);
1547         }
1548
1549         /* unpack nvlist */
1550         nvlist_t *nv;
1551         int error = nvlist_unpack(packed, packed_len, &nv, KM_SLEEP);
1552         free(packed);
1553         free(compressed);
1554         if (error != 0) {
1555                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1556                     "resume token is corrupt (nvlist_unpack failed)"));
1557                 return (NULL);
1558         }
1559         return (nv);
1560 }
1561
1562 int
1563 zfs_send_resume(libzfs_handle_t *hdl, sendflags_t *flags, int outfd,
1564     const char *resume_token)
1565 {
1566         char errbuf[1024];
1567         char *toname;
1568         char *fromname = NULL;
1569         uint64_t resumeobj, resumeoff, toguid, fromguid, bytes;
1570         zfs_handle_t *zhp;
1571         int error = 0;
1572         char name[ZFS_MAX_DATASET_NAME_LEN];
1573         enum lzc_send_flags lzc_flags = 0;
1574
1575         (void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
1576             "cannot resume send"));
1577
1578         nvlist_t *resume_nvl =
1579             zfs_send_resume_token_to_nvlist(hdl, resume_token);
1580         if (resume_nvl == NULL) {
1581                 /*
1582                  * zfs_error_aux has already been set by
1583                  * zfs_send_resume_token_to_nvlist
1584                  */
1585                 return (zfs_error(hdl, EZFS_FAULT, errbuf));
1586         }
1587         if (flags->verbose) {
1588                 (void) fprintf(stderr, dgettext(TEXT_DOMAIN,
1589                     "resume token contents:\n"));
1590                 nvlist_print(stderr, resume_nvl);
1591         }
1592
1593         if (nvlist_lookup_string(resume_nvl, "toname", &toname) != 0 ||
1594             nvlist_lookup_uint64(resume_nvl, "object", &resumeobj) != 0 ||
1595             nvlist_lookup_uint64(resume_nvl, "offset", &resumeoff) != 0 ||
1596             nvlist_lookup_uint64(resume_nvl, "bytes", &bytes) != 0 ||
1597             nvlist_lookup_uint64(resume_nvl, "toguid", &toguid) != 0) {
1598                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1599                     "resume token is corrupt"));
1600                 return (zfs_error(hdl, EZFS_FAULT, errbuf));
1601         }
1602         fromguid = 0;
1603         (void) nvlist_lookup_uint64(resume_nvl, "fromguid", &fromguid);
1604
1605         if (flags->embed_data || nvlist_exists(resume_nvl, "embedok"))
1606                 lzc_flags |= LZC_SEND_FLAG_EMBED_DATA;
1607
1608         if (guid_to_name(hdl, toname, toguid, B_FALSE, name) != 0) {
1609                 if (zfs_dataset_exists(hdl, toname, ZFS_TYPE_DATASET)) {
1610                         zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1611                             "'%s' is no longer the same snapshot used in "
1612                             "the initial send"), toname);
1613                 } else {
1614                         zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1615                             "'%s' used in the initial send no longer exists"),
1616                             toname);
1617                 }
1618                 return (zfs_error(hdl, EZFS_BADPATH, errbuf));
1619         }
1620         zhp = zfs_open(hdl, name, ZFS_TYPE_DATASET);
1621         if (zhp == NULL) {
1622                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1623                     "unable to access '%s'"), name);
1624                 return (zfs_error(hdl, EZFS_BADPATH, errbuf));
1625         }
1626
1627         if (fromguid != 0) {
1628                 if (guid_to_name(hdl, toname, fromguid, B_TRUE, name) != 0) {
1629                         zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1630                             "incremental source %#llx no longer exists"),
1631                             (longlong_t)fromguid);
1632                         return (zfs_error(hdl, EZFS_BADPATH, errbuf));
1633                 }
1634                 fromname = name;
1635         }
1636
1637         if (flags->verbose) {
1638                 uint64_t size = 0;
1639                 error = lzc_send_space(zhp->zfs_name, fromname, &size);
1640                 if (error == 0)
1641                         size = MAX(0, (int64_t)(size - bytes));
1642                 send_print_verbose(stderr, zhp->zfs_name, fromname,
1643                     size, flags->parsable);
1644         }
1645
1646         if (!flags->dryrun) {
1647                 progress_arg_t pa = { 0 };
1648                 pthread_t tid;
1649                 /*
1650                  * If progress reporting is requested, spawn a new thread to
1651                  * poll ZFS_IOC_SEND_PROGRESS at a regular interval.
1652                  */
1653                 if (flags->progress) {
1654                         pa.pa_zhp = zhp;
1655                         pa.pa_fd = outfd;
1656                         pa.pa_parsable = flags->parsable;
1657
1658                         error = pthread_create(&tid, NULL,
1659                             send_progress_thread, &pa);
1660                         if (error != 0) {
1661                                 zfs_close(zhp);
1662                                 return (error);
1663                         }
1664                 }
1665
1666                 error = lzc_send_resume(zhp->zfs_name, fromname, outfd,
1667                     lzc_flags, resumeobj, resumeoff);
1668
1669                 if (flags->progress) {
1670                         (void) pthread_cancel(tid);
1671                         (void) pthread_join(tid, NULL);
1672                 }
1673
1674                 char errbuf[1024];
1675                 (void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
1676                     "warning: cannot send '%s'"), zhp->zfs_name);
1677
1678                 zfs_close(zhp);
1679
1680                 switch (error) {
1681                 case 0:
1682                         return (0);
1683                 case EXDEV:
1684                 case ENOENT:
1685                 case EDQUOT:
1686                 case EFBIG:
1687                 case EIO:
1688                 case ENOLINK:
1689                 case ENOSPC:
1690 #ifdef illumos
1691                 case ENOSTR:
1692 #endif
1693                 case ENXIO:
1694                 case EPIPE:
1695                 case ERANGE:
1696                 case EFAULT:
1697                 case EROFS:
1698                         zfs_error_aux(hdl, strerror(errno));
1699                         return (zfs_error(hdl, EZFS_BADBACKUP, errbuf));
1700
1701                 default:
1702                         return (zfs_standard_error(hdl, errno, errbuf));
1703                 }
1704         }
1705
1706
1707         zfs_close(zhp);
1708
1709         return (error);
1710 }
1711
1712 /*
1713  * Generate a send stream for the dataset identified by the argument zhp.
1714  *
1715  * The content of the send stream is the snapshot identified by
1716  * 'tosnap'.  Incremental streams are requested in two ways:
1717  *     - from the snapshot identified by "fromsnap" (if non-null) or
1718  *     - from the origin of the dataset identified by zhp, which must
1719  *       be a clone.  In this case, "fromsnap" is null and "fromorigin"
1720  *       is TRUE.
1721  *
1722  * The send stream is recursive (i.e. dumps a hierarchy of snapshots) and
1723  * uses a special header (with a hdrtype field of DMU_COMPOUNDSTREAM)
1724  * if "replicate" is set.  If "doall" is set, dump all the intermediate
1725  * snapshots. The DMU_COMPOUNDSTREAM header is used in the "doall"
1726  * case too. If "props" is set, send properties.
1727  */
1728 int
1729 zfs_send(zfs_handle_t *zhp, const char *fromsnap, const char *tosnap,
1730     sendflags_t *flags, int outfd, snapfilter_cb_t filter_func,
1731     void *cb_arg, nvlist_t **debugnvp)
1732 {
1733         char errbuf[1024];
1734         send_dump_data_t sdd = { 0 };
1735         int err = 0;
1736         nvlist_t *fss = NULL;
1737         avl_tree_t *fsavl = NULL;
1738         static uint64_t holdseq;
1739         int spa_version;
1740         pthread_t tid = 0;
1741         int pipefd[2];
1742         dedup_arg_t dda = { 0 };
1743         int featureflags = 0;
1744         FILE *fout;
1745
1746         (void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
1747             "cannot send '%s'"), zhp->zfs_name);
1748
1749         if (fromsnap && fromsnap[0] == '\0') {
1750                 zfs_error_aux(zhp->zfs_hdl, dgettext(TEXT_DOMAIN,
1751                     "zero-length incremental source"));
1752                 return (zfs_error(zhp->zfs_hdl, EZFS_NOENT, errbuf));
1753         }
1754
1755         if (zhp->zfs_type == ZFS_TYPE_FILESYSTEM) {
1756                 uint64_t version;
1757                 version = zfs_prop_get_int(zhp, ZFS_PROP_VERSION);
1758                 if (version >= ZPL_VERSION_SA) {
1759                         featureflags |= DMU_BACKUP_FEATURE_SA_SPILL;
1760                 }
1761         }
1762
1763         if (flags->dedup && !flags->dryrun) {
1764                 featureflags |= (DMU_BACKUP_FEATURE_DEDUP |
1765                     DMU_BACKUP_FEATURE_DEDUPPROPS);
1766                 if ((err = pipe(pipefd)) != 0) {
1767                         zfs_error_aux(zhp->zfs_hdl, strerror(errno));
1768                         return (zfs_error(zhp->zfs_hdl, EZFS_PIPEFAILED,
1769                             errbuf));
1770                 }
1771                 dda.outputfd = outfd;
1772                 dda.inputfd = pipefd[1];
1773                 dda.dedup_hdl = zhp->zfs_hdl;
1774                 if ((err = pthread_create(&tid, NULL, cksummer, &dda)) != 0) {
1775                         (void) close(pipefd[0]);
1776                         (void) close(pipefd[1]);
1777                         zfs_error_aux(zhp->zfs_hdl, strerror(errno));
1778                         return (zfs_error(zhp->zfs_hdl,
1779                             EZFS_THREADCREATEFAILED, errbuf));
1780                 }
1781         }
1782
1783         if (flags->replicate || flags->doall || flags->props) {
1784                 dmu_replay_record_t drr = { 0 };
1785                 char *packbuf = NULL;
1786                 size_t buflen = 0;
1787                 zio_cksum_t zc = { 0 };
1788
1789                 if (flags->replicate || flags->props) {
1790                         nvlist_t *hdrnv;
1791
1792                         VERIFY(0 == nvlist_alloc(&hdrnv, NV_UNIQUE_NAME, 0));
1793                         if (fromsnap) {
1794                                 VERIFY(0 == nvlist_add_string(hdrnv,
1795                                     "fromsnap", fromsnap));
1796                         }
1797                         VERIFY(0 == nvlist_add_string(hdrnv, "tosnap", tosnap));
1798                         if (!flags->replicate) {
1799                                 VERIFY(0 == nvlist_add_boolean(hdrnv,
1800                                     "not_recursive"));
1801                         }
1802
1803                         err = gather_nvlist(zhp->zfs_hdl, zhp->zfs_name,
1804                             fromsnap, tosnap, flags->replicate, flags->verbose,
1805                             &fss, &fsavl);
1806                         if (err)
1807                                 goto err_out;
1808                         VERIFY(0 == nvlist_add_nvlist(hdrnv, "fss", fss));
1809                         err = nvlist_pack(hdrnv, &packbuf, &buflen,
1810                             NV_ENCODE_XDR, 0);
1811                         if (debugnvp)
1812                                 *debugnvp = hdrnv;
1813                         else
1814                                 nvlist_free(hdrnv);
1815                         if (err)
1816                                 goto stderr_out;
1817                 }
1818
1819                 if (!flags->dryrun) {
1820                         /* write first begin record */
1821                         drr.drr_type = DRR_BEGIN;
1822                         drr.drr_u.drr_begin.drr_magic = DMU_BACKUP_MAGIC;
1823                         DMU_SET_STREAM_HDRTYPE(drr.drr_u.drr_begin.
1824                             drr_versioninfo, DMU_COMPOUNDSTREAM);
1825                         DMU_SET_FEATUREFLAGS(drr.drr_u.drr_begin.
1826                             drr_versioninfo, featureflags);
1827                         (void) snprintf(drr.drr_u.drr_begin.drr_toname,
1828                             sizeof (drr.drr_u.drr_begin.drr_toname),
1829                             "%s@%s", zhp->zfs_name, tosnap);
1830                         drr.drr_payloadlen = buflen;
1831
1832                         err = dump_record(&drr, packbuf, buflen, &zc, outfd);
1833                         free(packbuf);
1834                         if (err != 0)
1835                                 goto stderr_out;
1836
1837                         /* write end record */
1838                         bzero(&drr, sizeof (drr));
1839                         drr.drr_type = DRR_END;
1840                         drr.drr_u.drr_end.drr_checksum = zc;
1841                         err = write(outfd, &drr, sizeof (drr));
1842                         if (err == -1) {
1843                                 err = errno;
1844                                 goto stderr_out;
1845                         }
1846
1847                         err = 0;
1848                 }
1849         }
1850
1851         /* dump each stream */
1852         sdd.fromsnap = fromsnap;
1853         sdd.tosnap = tosnap;
1854         if (tid != 0)
1855                 sdd.outfd = pipefd[0];
1856         else
1857                 sdd.outfd = outfd;
1858         sdd.replicate = flags->replicate;
1859         sdd.doall = flags->doall;
1860         sdd.fromorigin = flags->fromorigin;
1861         sdd.fss = fss;
1862         sdd.fsavl = fsavl;
1863         sdd.verbose = flags->verbose;
1864         sdd.parsable = flags->parsable;
1865         sdd.progress = flags->progress;
1866         sdd.dryrun = flags->dryrun;
1867         sdd.large_block = flags->largeblock;
1868         sdd.embed_data = flags->embed_data;
1869         sdd.filter_cb = filter_func;
1870         sdd.filter_cb_arg = cb_arg;
1871         if (debugnvp)
1872                 sdd.debugnv = *debugnvp;
1873         if (sdd.verbose && sdd.dryrun)
1874                 sdd.std_out = B_TRUE;
1875         fout = sdd.std_out ? stdout : stderr;
1876
1877         /*
1878          * Some flags require that we place user holds on the datasets that are
1879          * being sent so they don't get destroyed during the send. We can skip
1880          * this step if the pool is imported read-only since the datasets cannot
1881          * be destroyed.
1882          */
1883         if (!flags->dryrun && !zpool_get_prop_int(zfs_get_pool_handle(zhp),
1884             ZPOOL_PROP_READONLY, NULL) &&
1885             zfs_spa_version(zhp, &spa_version) == 0 &&
1886             spa_version >= SPA_VERSION_USERREFS &&
1887             (flags->doall || flags->replicate)) {
1888                 ++holdseq;
1889                 (void) snprintf(sdd.holdtag, sizeof (sdd.holdtag),
1890                     ".send-%d-%llu", getpid(), (u_longlong_t)holdseq);
1891                 sdd.cleanup_fd = open(ZFS_DEV, O_RDWR|O_EXCL);
1892                 if (sdd.cleanup_fd < 0) {
1893                         err = errno;
1894                         goto stderr_out;
1895                 }
1896                 sdd.snapholds = fnvlist_alloc();
1897         } else {
1898                 sdd.cleanup_fd = -1;
1899                 sdd.snapholds = NULL;
1900         }
1901         if (flags->verbose || sdd.snapholds != NULL) {
1902                 /*
1903                  * Do a verbose no-op dry run to get all the verbose output
1904                  * or to gather snapshot hold's before generating any data,
1905                  * then do a non-verbose real run to generate the streams.
1906                  */
1907                 sdd.dryrun = B_TRUE;
1908                 err = dump_filesystems(zhp, &sdd);
1909
1910                 if (err != 0)
1911                         goto stderr_out;
1912
1913                 if (flags->verbose) {
1914                         if (flags->parsable) {
1915                                 (void) fprintf(fout, "size\t%llu\n",
1916                                     (longlong_t)sdd.size);
1917                         } else {
1918                                 char buf[16];
1919                                 zfs_nicenum(sdd.size, buf, sizeof (buf));
1920                                 (void) fprintf(fout, dgettext(TEXT_DOMAIN,
1921                                     "total estimated size is %s\n"), buf);
1922                         }
1923                 }
1924
1925                 /* Ensure no snaps found is treated as an error. */
1926                 if (!sdd.seento) {
1927                         err = ENOENT;
1928                         goto err_out;
1929                 }
1930
1931                 /* Skip the second run if dryrun was requested. */
1932                 if (flags->dryrun)
1933                         goto err_out;
1934
1935                 if (sdd.snapholds != NULL) {
1936                         err = zfs_hold_nvl(zhp, sdd.cleanup_fd, sdd.snapholds);
1937                         if (err != 0)
1938                                 goto stderr_out;
1939
1940                         fnvlist_free(sdd.snapholds);
1941                         sdd.snapholds = NULL;
1942                 }
1943
1944                 sdd.dryrun = B_FALSE;
1945                 sdd.verbose = B_FALSE;
1946         }
1947
1948         err = dump_filesystems(zhp, &sdd);
1949         fsavl_destroy(fsavl);
1950         nvlist_free(fss);
1951
1952         /* Ensure no snaps found is treated as an error. */
1953         if (err == 0 && !sdd.seento)
1954                 err = ENOENT;
1955
1956         if (tid != 0) {
1957                 if (err != 0)
1958                         (void) pthread_cancel(tid);
1959                 (void) close(pipefd[0]);
1960                 (void) pthread_join(tid, NULL);
1961         }
1962
1963         if (sdd.cleanup_fd != -1) {
1964                 VERIFY(0 == close(sdd.cleanup_fd));
1965                 sdd.cleanup_fd = -1;
1966         }
1967
1968         if (!flags->dryrun && (flags->replicate || flags->doall ||
1969             flags->props)) {
1970                 /*
1971                  * write final end record.  NB: want to do this even if
1972                  * there was some error, because it might not be totally
1973                  * failed.
1974                  */
1975                 dmu_replay_record_t drr = { 0 };
1976                 drr.drr_type = DRR_END;
1977                 if (write(outfd, &drr, sizeof (drr)) == -1) {
1978                         return (zfs_standard_error(zhp->zfs_hdl,
1979                             errno, errbuf));
1980                 }
1981         }
1982
1983         return (err || sdd.err);
1984
1985 stderr_out:
1986         err = zfs_standard_error(zhp->zfs_hdl, err, errbuf);
1987 err_out:
1988         fsavl_destroy(fsavl);
1989         nvlist_free(fss);
1990         fnvlist_free(sdd.snapholds);
1991
1992         if (sdd.cleanup_fd != -1)
1993                 VERIFY(0 == close(sdd.cleanup_fd));
1994         if (tid != 0) {
1995                 (void) pthread_cancel(tid);
1996                 (void) close(pipefd[0]);
1997                 (void) pthread_join(tid, NULL);
1998         }
1999         return (err);
2000 }
2001
2002 int
2003 zfs_send_one(zfs_handle_t *zhp, const char *from, int fd,
2004     enum lzc_send_flags flags)
2005 {
2006         int err;
2007         libzfs_handle_t *hdl = zhp->zfs_hdl;
2008
2009         char errbuf[1024];
2010         (void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
2011             "warning: cannot send '%s'"), zhp->zfs_name);
2012
2013         err = lzc_send(zhp->zfs_name, from, fd, flags);
2014         if (err != 0) {
2015                 switch (errno) {
2016                 case EXDEV:
2017                         zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
2018                             "not an earlier snapshot from the same fs"));
2019                         return (zfs_error(hdl, EZFS_CROSSTARGET, errbuf));
2020
2021                 case ENOENT:
2022                 case ESRCH:
2023                         if (lzc_exists(zhp->zfs_name)) {
2024                                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
2025                                     "incremental source (%s) does not exist"),
2026                                     from);
2027                         }
2028                         return (zfs_error(hdl, EZFS_NOENT, errbuf));
2029
2030                 case EBUSY:
2031                         zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
2032                             "target is busy; if a filesystem, "
2033                             "it must not be mounted"));
2034                         return (zfs_error(hdl, EZFS_BUSY, errbuf));
2035
2036                 case EDQUOT:
2037                 case EFBIG:
2038                 case EIO:
2039                 case ENOLINK:
2040                 case ENOSPC:
2041 #ifdef illumos
2042                 case ENOSTR:
2043 #endif
2044                 case ENXIO:
2045                 case EPIPE:
2046                 case ERANGE:
2047                 case EFAULT:
2048                 case EROFS:
2049                         zfs_error_aux(hdl, strerror(errno));
2050                         return (zfs_error(hdl, EZFS_BADBACKUP, errbuf));
2051
2052                 default:
2053                         return (zfs_standard_error(hdl, errno, errbuf));
2054                 }
2055         }
2056         return (err != 0);
2057 }
2058
2059 /*
2060  * Routines specific to "zfs recv"
2061  */
2062
2063 static int
2064 recv_read(libzfs_handle_t *hdl, int fd, void *buf, int ilen,
2065     boolean_t byteswap, zio_cksum_t *zc)
2066 {
2067         char *cp = buf;
2068         int rv;
2069         int len = ilen;
2070
2071         assert(ilen <= SPA_MAXBLOCKSIZE);
2072
2073         do {
2074                 rv = read(fd, cp, len);
2075                 cp += rv;
2076                 len -= rv;
2077         } while (rv > 0);
2078
2079         if (rv < 0 || len != 0) {
2080                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
2081                     "failed to read from stream"));
2082                 return (zfs_error(hdl, EZFS_BADSTREAM, dgettext(TEXT_DOMAIN,
2083                     "cannot receive")));
2084         }
2085
2086         if (zc) {
2087                 if (byteswap)
2088                         fletcher_4_incremental_byteswap(buf, ilen, zc);
2089                 else
2090                         fletcher_4_incremental_native(buf, ilen, zc);
2091         }
2092         return (0);
2093 }
2094
2095 static int
2096 recv_read_nvlist(libzfs_handle_t *hdl, int fd, int len, nvlist_t **nvp,
2097     boolean_t byteswap, zio_cksum_t *zc)
2098 {
2099         char *buf;
2100         int err;
2101
2102         buf = zfs_alloc(hdl, len);
2103         if (buf == NULL)
2104                 return (ENOMEM);
2105
2106         err = recv_read(hdl, fd, buf, len, byteswap, zc);
2107         if (err != 0) {
2108                 free(buf);
2109                 return (err);
2110         }
2111
2112         err = nvlist_unpack(buf, len, nvp, 0);
2113         free(buf);
2114         if (err != 0) {
2115                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "invalid "
2116                     "stream (malformed nvlist)"));
2117                 return (EINVAL);
2118         }
2119         return (0);
2120 }
2121
2122 static int
2123 recv_rename(libzfs_handle_t *hdl, const char *name, const char *tryname,
2124     int baselen, char *newname, recvflags_t *flags)
2125 {
2126         static int seq;
2127         zfs_cmd_t zc = { 0 };
2128         int err;
2129         prop_changelist_t *clp;
2130         zfs_handle_t *zhp;
2131
2132         zhp = zfs_open(hdl, name, ZFS_TYPE_DATASET);
2133         if (zhp == NULL)
2134                 return (-1);
2135         clp = changelist_gather(zhp, ZFS_PROP_NAME, 0,
2136             flags->force ? MS_FORCE : 0);
2137         zfs_close(zhp);
2138         if (clp == NULL)
2139                 return (-1);
2140         err = changelist_prefix(clp);
2141         if (err)
2142                 return (err);
2143
2144         zc.zc_objset_type = DMU_OST_ZFS;
2145         (void) strlcpy(zc.zc_name, name, sizeof (zc.zc_name));
2146
2147         if (tryname) {
2148                 (void) strcpy(newname, tryname);
2149
2150                 (void) strlcpy(zc.zc_value, tryname, sizeof (zc.zc_value));
2151
2152                 if (flags->verbose) {
2153                         (void) printf("attempting rename %s to %s\n",
2154                             zc.zc_name, zc.zc_value);
2155                 }
2156                 err = ioctl(hdl->libzfs_fd, ZFS_IOC_RENAME, &zc);
2157                 if (err == 0)
2158                         changelist_rename(clp, name, tryname);
2159         } else {
2160                 err = ENOENT;
2161         }
2162
2163         if (err != 0 && strncmp(name + baselen, "recv-", 5) != 0) {
2164                 seq++;
2165
2166                 (void) snprintf(newname, ZFS_MAX_DATASET_NAME_LEN,
2167                     "%.*srecv-%u-%u", baselen, name, getpid(), seq);
2168                 (void) strlcpy(zc.zc_value, newname, sizeof (zc.zc_value));
2169
2170                 if (flags->verbose) {
2171                         (void) printf("failed - trying rename %s to %s\n",
2172                             zc.zc_name, zc.zc_value);
2173                 }
2174                 err = ioctl(hdl->libzfs_fd, ZFS_IOC_RENAME, &zc);
2175                 if (err == 0)
2176                         changelist_rename(clp, name, newname);
2177                 if (err && flags->verbose) {
2178                         (void) printf("failed (%u) - "
2179                             "will try again on next pass\n", errno);
2180                 }
2181                 err = EAGAIN;
2182         } else if (flags->verbose) {
2183                 if (err == 0)
2184                         (void) printf("success\n");
2185                 else
2186                         (void) printf("failed (%u)\n", errno);
2187         }
2188
2189         (void) changelist_postfix(clp);
2190         changelist_free(clp);
2191
2192         return (err);
2193 }
2194
2195 static int
2196 recv_destroy(libzfs_handle_t *hdl, const char *name, int baselen,
2197     char *newname, recvflags_t *flags)
2198 {
2199         zfs_cmd_t zc = { 0 };
2200         int err = 0;
2201         prop_changelist_t *clp;
2202         zfs_handle_t *zhp;
2203         boolean_t defer = B_FALSE;
2204         int spa_version;
2205
2206         zhp = zfs_open(hdl, name, ZFS_TYPE_DATASET);
2207         if (zhp == NULL)
2208                 return (-1);
2209         clp = changelist_gather(zhp, ZFS_PROP_NAME, 0,
2210             flags->force ? MS_FORCE : 0);
2211         if (zfs_get_type(zhp) == ZFS_TYPE_SNAPSHOT &&
2212             zfs_spa_version(zhp, &spa_version) == 0 &&
2213             spa_version >= SPA_VERSION_USERREFS)
2214                 defer = B_TRUE;
2215         zfs_close(zhp);
2216         if (clp == NULL)
2217                 return (-1);
2218         err = changelist_prefix(clp);
2219         if (err)
2220                 return (err);
2221
2222         zc.zc_objset_type = DMU_OST_ZFS;
2223         zc.zc_defer_destroy = defer;
2224         (void) strlcpy(zc.zc_name, name, sizeof (zc.zc_name));
2225
2226         if (flags->verbose)
2227                 (void) printf("attempting destroy %s\n", zc.zc_name);
2228         err = ioctl(hdl->libzfs_fd, ZFS_IOC_DESTROY, &zc);
2229         if (err == 0) {
2230                 if (flags->verbose)
2231                         (void) printf("success\n");
2232                 changelist_remove(clp, zc.zc_name);
2233         }
2234
2235         (void) changelist_postfix(clp);
2236         changelist_free(clp);
2237
2238         /*
2239          * Deferred destroy might destroy the snapshot or only mark it to be
2240          * destroyed later, and it returns success in either case.
2241          */
2242         if (err != 0 || (defer && zfs_dataset_exists(hdl, name,
2243             ZFS_TYPE_SNAPSHOT))) {
2244                 err = recv_rename(hdl, name, NULL, baselen, newname, flags);
2245         }
2246
2247         return (err);
2248 }
2249
2250 typedef struct guid_to_name_data {
2251         uint64_t guid;
2252         boolean_t bookmark_ok;
2253         char *name;
2254         char *skip;
2255 } guid_to_name_data_t;
2256
2257 static int
2258 guid_to_name_cb(zfs_handle_t *zhp, void *arg)
2259 {
2260         guid_to_name_data_t *gtnd = arg;
2261         const char *slash;
2262         int err;
2263
2264         if (gtnd->skip != NULL &&
2265             (slash = strrchr(zhp->zfs_name, '/')) != NULL &&
2266             strcmp(slash + 1, gtnd->skip) == 0) {
2267                 zfs_close(zhp);
2268                 return (0);
2269         }
2270
2271         if (zfs_prop_get_int(zhp, ZFS_PROP_GUID) == gtnd->guid) {
2272                 (void) strcpy(gtnd->name, zhp->zfs_name);
2273                 zfs_close(zhp);
2274                 return (EEXIST);
2275         }
2276
2277         err = zfs_iter_children(zhp, guid_to_name_cb, gtnd);
2278         if (err != EEXIST && gtnd->bookmark_ok)
2279                 err = zfs_iter_bookmarks(zhp, guid_to_name_cb, gtnd);
2280         zfs_close(zhp);
2281         return (err);
2282 }
2283
2284 /*
2285  * Attempt to find the local dataset associated with this guid.  In the case of
2286  * multiple matches, we attempt to find the "best" match by searching
2287  * progressively larger portions of the hierarchy.  This allows one to send a
2288  * tree of datasets individually and guarantee that we will find the source
2289  * guid within that hierarchy, even if there are multiple matches elsewhere.
2290  */
2291 static int
2292 guid_to_name(libzfs_handle_t *hdl, const char *parent, uint64_t guid,
2293     boolean_t bookmark_ok, char *name)
2294 {
2295         char pname[ZFS_MAX_DATASET_NAME_LEN];
2296         guid_to_name_data_t gtnd;
2297
2298         gtnd.guid = guid;
2299         gtnd.bookmark_ok = bookmark_ok;
2300         gtnd.name = name;
2301         gtnd.skip = NULL;
2302
2303         /*
2304          * Search progressively larger portions of the hierarchy, starting
2305          * with the filesystem specified by 'parent'.  This will
2306          * select the "most local" version of the origin snapshot in the case
2307          * that there are multiple matching snapshots in the system.
2308          */
2309         (void) strlcpy(pname, parent, sizeof (pname));
2310         char *cp = strrchr(pname, '@');
2311         if (cp == NULL)
2312                 cp = strchr(pname, '\0');
2313         for (; cp != NULL; cp = strrchr(pname, '/')) {
2314                 /* Chop off the last component and open the parent */
2315                 *cp = '\0';
2316                 zfs_handle_t *zhp = make_dataset_handle(hdl, pname);
2317
2318                 if (zhp == NULL)
2319                         continue;
2320                 int err = guid_to_name_cb(zfs_handle_dup(zhp), &gtnd);
2321                 if (err != EEXIST)
2322                         err = zfs_iter_children(zhp, guid_to_name_cb, &gtnd);
2323                 if (err != EEXIST && bookmark_ok)
2324                         err = zfs_iter_bookmarks(zhp, guid_to_name_cb, &gtnd);
2325                 zfs_close(zhp);
2326                 if (err == EEXIST)
2327                         return (0);
2328
2329                 /*
2330                  * Remember the last portion of the dataset so we skip it next
2331                  * time through (as we've already searched that portion of the
2332                  * hierarchy).
2333                  */
2334                 gtnd.skip = strrchr(pname, '/') + 1;
2335         }
2336
2337         return (ENOENT);
2338 }
2339
2340 /*
2341  * Return +1 if guid1 is before guid2, 0 if they are the same, and -1 if
2342  * guid1 is after guid2.
2343  */
2344 static int
2345 created_before(libzfs_handle_t *hdl, avl_tree_t *avl,
2346     uint64_t guid1, uint64_t guid2)
2347 {
2348         nvlist_t *nvfs;
2349         char *fsname, *snapname;
2350         char buf[ZFS_MAX_DATASET_NAME_LEN];
2351         int rv;
2352         zfs_handle_t *guid1hdl, *guid2hdl;
2353         uint64_t create1, create2;
2354
2355         if (guid2 == 0)
2356                 return (0);
2357         if (guid1 == 0)
2358                 return (1);
2359
2360         nvfs = fsavl_find(avl, guid1, &snapname);
2361         VERIFY(0 == nvlist_lookup_string(nvfs, "name", &fsname));
2362         (void) snprintf(buf, sizeof (buf), "%s@%s", fsname, snapname);
2363         guid1hdl = zfs_open(hdl, buf, ZFS_TYPE_SNAPSHOT);
2364         if (guid1hdl == NULL)
2365                 return (-1);
2366
2367         nvfs = fsavl_find(avl, guid2, &snapname);
2368         VERIFY(0 == nvlist_lookup_string(nvfs, "name", &fsname));
2369         (void) snprintf(buf, sizeof (buf), "%s@%s", fsname, snapname);
2370         guid2hdl = zfs_open(hdl, buf, ZFS_TYPE_SNAPSHOT);
2371         if (guid2hdl == NULL) {
2372                 zfs_close(guid1hdl);
2373                 return (-1);
2374         }
2375
2376         create1 = zfs_prop_get_int(guid1hdl, ZFS_PROP_CREATETXG);
2377         create2 = zfs_prop_get_int(guid2hdl, ZFS_PROP_CREATETXG);
2378
2379         if (create1 < create2)
2380                 rv = -1;
2381         else if (create1 > create2)
2382                 rv = +1;
2383         else
2384                 rv = 0;
2385
2386         zfs_close(guid1hdl);
2387         zfs_close(guid2hdl);
2388
2389         return (rv);
2390 }
2391
2392 static int
2393 recv_incremental_replication(libzfs_handle_t *hdl, const char *tofs,
2394     recvflags_t *flags, nvlist_t *stream_nv, avl_tree_t *stream_avl,
2395     nvlist_t *renamed)
2396 {
2397         nvlist_t *local_nv, *deleted = NULL;
2398         avl_tree_t *local_avl;
2399         nvpair_t *fselem, *nextfselem;
2400         char *fromsnap;
2401         char newname[ZFS_MAX_DATASET_NAME_LEN];
2402         char guidname[32];
2403         int error;
2404         boolean_t needagain, progress, recursive;
2405         char *s1, *s2;
2406
2407         VERIFY(0 == nvlist_lookup_string(stream_nv, "fromsnap", &fromsnap));
2408
2409         recursive = (nvlist_lookup_boolean(stream_nv, "not_recursive") ==
2410             ENOENT);
2411
2412         if (flags->dryrun)
2413                 return (0);
2414
2415 again:
2416         needagain = progress = B_FALSE;
2417
2418         VERIFY(0 == nvlist_alloc(&deleted, NV_UNIQUE_NAME, 0));
2419
2420         if ((error = gather_nvlist(hdl, tofs, fromsnap, NULL,
2421             recursive, B_FALSE, &local_nv, &local_avl)) != 0)
2422                 return (error);
2423
2424         /*
2425          * Process deletes and renames
2426          */
2427         for (fselem = nvlist_next_nvpair(local_nv, NULL);
2428             fselem; fselem = nextfselem) {
2429                 nvlist_t *nvfs, *snaps;
2430                 nvlist_t *stream_nvfs = NULL;
2431                 nvpair_t *snapelem, *nextsnapelem;
2432                 uint64_t fromguid = 0;
2433                 uint64_t originguid = 0;
2434                 uint64_t stream_originguid = 0;
2435                 uint64_t parent_fromsnap_guid, stream_parent_fromsnap_guid;
2436                 char *fsname, *stream_fsname;
2437
2438                 nextfselem = nvlist_next_nvpair(local_nv, fselem);
2439
2440                 VERIFY(0 == nvpair_value_nvlist(fselem, &nvfs));
2441                 VERIFY(0 == nvlist_lookup_nvlist(nvfs, "snaps", &snaps));
2442                 VERIFY(0 == nvlist_lookup_string(nvfs, "name", &fsname));
2443                 VERIFY(0 == nvlist_lookup_uint64(nvfs, "parentfromsnap",
2444                     &parent_fromsnap_guid));
2445                 (void) nvlist_lookup_uint64(nvfs, "origin", &originguid);
2446
2447                 /*
2448                  * First find the stream's fs, so we can check for
2449                  * a different origin (due to "zfs promote")
2450                  */
2451                 for (snapelem = nvlist_next_nvpair(snaps, NULL);
2452                     snapelem; snapelem = nvlist_next_nvpair(snaps, snapelem)) {
2453                         uint64_t thisguid;
2454
2455                         VERIFY(0 == nvpair_value_uint64(snapelem, &thisguid));
2456                         stream_nvfs = fsavl_find(stream_avl, thisguid, NULL);
2457
2458                         if (stream_nvfs != NULL)
2459                                 break;
2460                 }
2461
2462                 /* check for promote */
2463                 (void) nvlist_lookup_uint64(stream_nvfs, "origin",
2464                     &stream_originguid);
2465                 if (stream_nvfs && originguid != stream_originguid) {
2466                         switch (created_before(hdl, local_avl,
2467                             stream_originguid, originguid)) {
2468                         case 1: {
2469                                 /* promote it! */
2470                                 zfs_cmd_t zc = { 0 };
2471                                 nvlist_t *origin_nvfs;
2472                                 char *origin_fsname;
2473
2474                                 if (flags->verbose)
2475                                         (void) printf("promoting %s\n", fsname);
2476
2477                                 origin_nvfs = fsavl_find(local_avl, originguid,
2478                                     NULL);
2479                                 VERIFY(0 == nvlist_lookup_string(origin_nvfs,
2480                                     "name", &origin_fsname));
2481                                 (void) strlcpy(zc.zc_value, origin_fsname,
2482                                     sizeof (zc.zc_value));
2483                                 (void) strlcpy(zc.zc_name, fsname,
2484                                     sizeof (zc.zc_name));
2485                                 error = zfs_ioctl(hdl, ZFS_IOC_PROMOTE, &zc);
2486                                 if (error == 0)
2487                                         progress = B_TRUE;
2488                                 break;
2489                         }
2490                         default:
2491                                 break;
2492                         case -1:
2493                                 fsavl_destroy(local_avl);
2494                                 nvlist_free(local_nv);
2495                                 return (-1);
2496                         }
2497                         /*
2498                          * We had/have the wrong origin, therefore our
2499                          * list of snapshots is wrong.  Need to handle
2500                          * them on the next pass.
2501                          */
2502                         needagain = B_TRUE;
2503                         continue;
2504                 }
2505
2506                 for (snapelem = nvlist_next_nvpair(snaps, NULL);
2507                     snapelem; snapelem = nextsnapelem) {
2508                         uint64_t thisguid;
2509                         char *stream_snapname;
2510                         nvlist_t *found, *props;
2511
2512                         nextsnapelem = nvlist_next_nvpair(snaps, snapelem);
2513
2514                         VERIFY(0 == nvpair_value_uint64(snapelem, &thisguid));
2515                         found = fsavl_find(stream_avl, thisguid,
2516                             &stream_snapname);
2517
2518                         /* check for delete */
2519                         if (found == NULL) {
2520                                 char name[ZFS_MAX_DATASET_NAME_LEN];
2521
2522                                 if (!flags->force)
2523                                         continue;
2524
2525                                 (void) snprintf(name, sizeof (name), "%s@%s",
2526                                     fsname, nvpair_name(snapelem));
2527
2528                                 error = recv_destroy(hdl, name,
2529                                     strlen(fsname)+1, newname, flags);
2530                                 if (error)
2531                                         needagain = B_TRUE;
2532                                 else
2533                                         progress = B_TRUE;
2534                                 sprintf(guidname, "%lu", thisguid);
2535                                 nvlist_add_boolean(deleted, guidname);
2536                                 continue;
2537                         }
2538
2539                         stream_nvfs = found;
2540
2541                         if (0 == nvlist_lookup_nvlist(stream_nvfs, "snapprops",
2542                             &props) && 0 == nvlist_lookup_nvlist(props,
2543                             stream_snapname, &props)) {
2544                                 zfs_cmd_t zc = { 0 };
2545
2546                                 zc.zc_cookie = B_TRUE; /* received */
2547                                 (void) snprintf(zc.zc_name, sizeof (zc.zc_name),
2548                                     "%s@%s", fsname, nvpair_name(snapelem));
2549                                 if (zcmd_write_src_nvlist(hdl, &zc,
2550                                     props) == 0) {
2551                                         (void) zfs_ioctl(hdl,
2552                                             ZFS_IOC_SET_PROP, &zc);
2553                                         zcmd_free_nvlists(&zc);
2554                                 }
2555                         }
2556
2557                         /* check for different snapname */
2558                         if (strcmp(nvpair_name(snapelem),
2559                             stream_snapname) != 0) {
2560                                 char name[ZFS_MAX_DATASET_NAME_LEN];
2561                                 char tryname[ZFS_MAX_DATASET_NAME_LEN];
2562
2563                                 (void) snprintf(name, sizeof (name), "%s@%s",
2564                                     fsname, nvpair_name(snapelem));
2565                                 (void) snprintf(tryname, sizeof (name), "%s@%s",
2566                                     fsname, stream_snapname);
2567
2568                                 error = recv_rename(hdl, name, tryname,
2569                                     strlen(fsname)+1, newname, flags);
2570                                 if (error)
2571                                         needagain = B_TRUE;
2572                                 else
2573                                         progress = B_TRUE;
2574                         }
2575
2576                         if (strcmp(stream_snapname, fromsnap) == 0)
2577                                 fromguid = thisguid;
2578                 }
2579
2580                 /* check for delete */
2581                 if (stream_nvfs == NULL) {
2582                         if (!flags->force)
2583                                 continue;
2584
2585                         error = recv_destroy(hdl, fsname, strlen(tofs)+1,
2586                             newname, flags);
2587                         if (error)
2588                                 needagain = B_TRUE;
2589                         else
2590                                 progress = B_TRUE;
2591                         sprintf(guidname, "%lu", parent_fromsnap_guid);
2592                         nvlist_add_boolean(deleted, guidname);
2593                         continue;
2594                 }
2595
2596                 if (fromguid == 0) {
2597                         if (flags->verbose) {
2598                                 (void) printf("local fs %s does not have "
2599                                     "fromsnap (%s in stream); must have "
2600                                     "been deleted locally; ignoring\n",
2601                                     fsname, fromsnap);
2602                         }
2603                         continue;
2604                 }
2605
2606                 VERIFY(0 == nvlist_lookup_string(stream_nvfs,
2607                     "name", &stream_fsname));
2608                 VERIFY(0 == nvlist_lookup_uint64(stream_nvfs,
2609                     "parentfromsnap", &stream_parent_fromsnap_guid));
2610
2611                 s1 = strrchr(fsname, '/');
2612                 s2 = strrchr(stream_fsname, '/');
2613
2614                 /*
2615                  * Check if we're going to rename based on parent guid change
2616                  * and the current parent guid was also deleted. If it was then
2617                  * rename will fail and is likely unneeded, so avoid this and
2618                  * force an early retry to determine the new
2619                  * parent_fromsnap_guid.
2620                  */
2621                 if (stream_parent_fromsnap_guid != 0 &&
2622                     parent_fromsnap_guid != 0 &&
2623                     stream_parent_fromsnap_guid != parent_fromsnap_guid) {
2624                         sprintf(guidname, "%lu", parent_fromsnap_guid);
2625                         if (nvlist_exists(deleted, guidname)) {
2626                                 progress = B_TRUE;
2627                                 needagain = B_TRUE;
2628                                 goto doagain;
2629                         }
2630                 }
2631
2632                 /*
2633                  * Check for rename. If the exact receive path is specified, it
2634                  * does not count as a rename, but we still need to check the
2635                  * datasets beneath it.
2636                  */
2637                 if ((stream_parent_fromsnap_guid != 0 &&
2638                     parent_fromsnap_guid != 0 &&
2639                     stream_parent_fromsnap_guid != parent_fromsnap_guid) ||
2640                     ((flags->isprefix || strcmp(tofs, fsname) != 0) &&
2641                     (s1 != NULL) && (s2 != NULL) && strcmp(s1, s2) != 0)) {
2642                         nvlist_t *parent;
2643                         char tryname[ZFS_MAX_DATASET_NAME_LEN];
2644
2645                         parent = fsavl_find(local_avl,
2646                             stream_parent_fromsnap_guid, NULL);
2647                         /*
2648                          * NB: parent might not be found if we used the
2649                          * tosnap for stream_parent_fromsnap_guid,
2650                          * because the parent is a newly-created fs;
2651                          * we'll be able to rename it after we recv the
2652                          * new fs.
2653                          */
2654                         if (parent != NULL) {
2655                                 char *pname;
2656
2657                                 VERIFY(0 == nvlist_lookup_string(parent, "name",
2658                                     &pname));
2659                                 (void) snprintf(tryname, sizeof (tryname),
2660                                     "%s%s", pname, strrchr(stream_fsname, '/'));
2661                         } else {
2662                                 tryname[0] = '\0';
2663                                 if (flags->verbose) {
2664                                         (void) printf("local fs %s new parent "
2665                                             "not found\n", fsname);
2666                                 }
2667                         }
2668
2669                         newname[0] = '\0';
2670
2671                         error = recv_rename(hdl, fsname, tryname,
2672                             strlen(tofs)+1, newname, flags);
2673
2674                         if (renamed != NULL && newname[0] != '\0') {
2675                                 VERIFY(0 == nvlist_add_boolean(renamed,
2676                                     newname));
2677                         }
2678
2679                         if (error)
2680                                 needagain = B_TRUE;
2681                         else
2682                                 progress = B_TRUE;
2683                 }
2684         }
2685
2686 doagain:
2687         fsavl_destroy(local_avl);
2688         nvlist_free(local_nv);
2689         nvlist_free(deleted);
2690
2691         if (needagain && progress) {
2692                 /* do another pass to fix up temporary names */
2693                 if (flags->verbose)
2694                         (void) printf("another pass:\n");
2695                 goto again;
2696         }
2697
2698         return (needagain);
2699 }
2700
2701 static int
2702 zfs_receive_package(libzfs_handle_t *hdl, int fd, const char *destname,
2703     recvflags_t *flags, dmu_replay_record_t *drr, zio_cksum_t *zc,
2704     char **top_zfs, int cleanup_fd, uint64_t *action_handlep)
2705 {
2706         nvlist_t *stream_nv = NULL;
2707         avl_tree_t *stream_avl = NULL;
2708         char *fromsnap = NULL;
2709         char *sendsnap = NULL;
2710         char *cp;
2711         char tofs[ZFS_MAX_DATASET_NAME_LEN];
2712         char sendfs[ZFS_MAX_DATASET_NAME_LEN];
2713         char errbuf[1024];
2714         dmu_replay_record_t drre;
2715         int error;
2716         boolean_t anyerr = B_FALSE;
2717         boolean_t softerr = B_FALSE;
2718         boolean_t recursive;
2719
2720         (void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
2721             "cannot receive"));
2722
2723         assert(drr->drr_type == DRR_BEGIN);
2724         assert(drr->drr_u.drr_begin.drr_magic == DMU_BACKUP_MAGIC);
2725         assert(DMU_GET_STREAM_HDRTYPE(drr->drr_u.drr_begin.drr_versioninfo) ==
2726             DMU_COMPOUNDSTREAM);
2727
2728         /*
2729          * Read in the nvlist from the stream.
2730          */
2731         if (drr->drr_payloadlen != 0) {
2732                 error = recv_read_nvlist(hdl, fd, drr->drr_payloadlen,
2733                     &stream_nv, flags->byteswap, zc);
2734                 if (error) {
2735                         error = zfs_error(hdl, EZFS_BADSTREAM, errbuf);
2736                         goto out;
2737                 }
2738         }
2739
2740         recursive = (nvlist_lookup_boolean(stream_nv, "not_recursive") ==
2741             ENOENT);
2742
2743         if (recursive && strchr(destname, '@')) {
2744                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
2745                     "cannot specify snapshot name for multi-snapshot stream"));
2746                 error = zfs_error(hdl, EZFS_BADSTREAM, errbuf);
2747                 goto out;
2748         }
2749
2750         /*
2751          * Read in the end record and verify checksum.
2752          */
2753         if (0 != (error = recv_read(hdl, fd, &drre, sizeof (drre),
2754             flags->byteswap, NULL)))
2755                 goto out;
2756         if (flags->byteswap) {
2757                 drre.drr_type = BSWAP_32(drre.drr_type);
2758                 drre.drr_u.drr_end.drr_checksum.zc_word[0] =
2759                     BSWAP_64(drre.drr_u.drr_end.drr_checksum.zc_word[0]);
2760                 drre.drr_u.drr_end.drr_checksum.zc_word[1] =
2761                     BSWAP_64(drre.drr_u.drr_end.drr_checksum.zc_word[1]);
2762                 drre.drr_u.drr_end.drr_checksum.zc_word[2] =
2763                     BSWAP_64(drre.drr_u.drr_end.drr_checksum.zc_word[2]);
2764                 drre.drr_u.drr_end.drr_checksum.zc_word[3] =
2765                     BSWAP_64(drre.drr_u.drr_end.drr_checksum.zc_word[3]);
2766         }
2767         if (drre.drr_type != DRR_END) {
2768                 error = zfs_error(hdl, EZFS_BADSTREAM, errbuf);
2769                 goto out;
2770         }
2771         if (!ZIO_CHECKSUM_EQUAL(drre.drr_u.drr_end.drr_checksum, *zc)) {
2772                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
2773                     "incorrect header checksum"));
2774                 error = zfs_error(hdl, EZFS_BADSTREAM, errbuf);
2775                 goto out;
2776         }
2777
2778         (void) nvlist_lookup_string(stream_nv, "fromsnap", &fromsnap);
2779
2780         if (drr->drr_payloadlen != 0) {
2781                 nvlist_t *stream_fss;
2782
2783                 VERIFY(0 == nvlist_lookup_nvlist(stream_nv, "fss",
2784                     &stream_fss));
2785                 if ((stream_avl = fsavl_create(stream_fss)) == NULL) {
2786                         zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
2787                             "couldn't allocate avl tree"));
2788                         error = zfs_error(hdl, EZFS_NOMEM, errbuf);
2789                         goto out;
2790                 }
2791
2792                 if (fromsnap != NULL) {
2793                         nvlist_t *renamed = NULL;
2794                         nvpair_t *pair = NULL;
2795
2796                         (void) strlcpy(tofs, destname, sizeof (tofs));
2797                         if (flags->isprefix) {
2798                                 struct drr_begin *drrb = &drr->drr_u.drr_begin;
2799                                 int i;
2800
2801                                 if (flags->istail) {
2802                                         cp = strrchr(drrb->drr_toname, '/');
2803                                         if (cp == NULL) {
2804                                                 (void) strlcat(tofs, "/",
2805                                                     sizeof (tofs));
2806                                                 i = 0;
2807                                         } else {
2808                                                 i = (cp - drrb->drr_toname);
2809                                         }
2810                                 } else {
2811                                         i = strcspn(drrb->drr_toname, "/@");
2812                                 }
2813                                 /* zfs_receive_one() will create_parents() */
2814                                 (void) strlcat(tofs, &drrb->drr_toname[i],
2815                                     sizeof (tofs));
2816                                 *strchr(tofs, '@') = '\0';
2817                         }
2818
2819                         if (recursive && !flags->dryrun && !flags->nomount) {
2820                                 VERIFY(0 == nvlist_alloc(&renamed,
2821                                     NV_UNIQUE_NAME, 0));
2822                         }
2823
2824                         softerr = recv_incremental_replication(hdl, tofs, flags,
2825                             stream_nv, stream_avl, renamed);
2826
2827                         /* Unmount renamed filesystems before receiving. */
2828                         while ((pair = nvlist_next_nvpair(renamed,
2829                             pair)) != NULL) {
2830                                 zfs_handle_t *zhp;
2831                                 prop_changelist_t *clp = NULL;
2832
2833                                 zhp = zfs_open(hdl, nvpair_name(pair),
2834                                     ZFS_TYPE_FILESYSTEM);
2835                                 if (zhp != NULL) {
2836                                         clp = changelist_gather(zhp,
2837                                             ZFS_PROP_MOUNTPOINT, 0, 0);
2838                                         zfs_close(zhp);
2839                                         if (clp != NULL) {
2840                                                 softerr |=
2841                                                     changelist_prefix(clp);
2842                                                 changelist_free(clp);
2843                                         }
2844                                 }
2845                         }
2846
2847                         nvlist_free(renamed);
2848                 }
2849         }
2850
2851         /*
2852          * Get the fs specified by the first path in the stream (the top level
2853          * specified by 'zfs send') and pass it to each invocation of
2854          * zfs_receive_one().
2855          */
2856         (void) strlcpy(sendfs, drr->drr_u.drr_begin.drr_toname,
2857             sizeof (sendfs));
2858         if ((cp = strchr(sendfs, '@')) != NULL) {
2859                 *cp = '\0';
2860                 /*
2861                  * Find the "sendsnap", the final snapshot in a replication
2862                  * stream.  zfs_receive_one() handles certain errors
2863                  * differently, depending on if the contained stream is the
2864                  * last one or not.
2865                  */
2866                 sendsnap = (cp + 1);
2867         }
2868
2869         /* Finally, receive each contained stream */
2870         do {
2871                 /*
2872                  * we should figure out if it has a recoverable
2873                  * error, in which case do a recv_skip() and drive on.
2874                  * Note, if we fail due to already having this guid,
2875                  * zfs_receive_one() will take care of it (ie,
2876                  * recv_skip() and return 0).
2877                  */
2878                 error = zfs_receive_impl(hdl, destname, NULL, flags, fd,
2879                     sendfs, stream_nv, stream_avl, top_zfs, cleanup_fd,
2880                     action_handlep, sendsnap);
2881                 if (error == ENODATA) {
2882                         error = 0;
2883                         break;
2884                 }
2885                 anyerr |= error;
2886         } while (error == 0);
2887
2888         if (drr->drr_payloadlen != 0 && fromsnap != NULL) {
2889                 /*
2890                  * Now that we have the fs's they sent us, try the
2891                  * renames again.
2892                  */
2893                 softerr = recv_incremental_replication(hdl, tofs, flags,
2894                     stream_nv, stream_avl, NULL);
2895         }
2896
2897 out:
2898         fsavl_destroy(stream_avl);
2899         nvlist_free(stream_nv);
2900         if (softerr)
2901                 error = -2;
2902         if (anyerr)
2903                 error = -1;
2904         return (error);
2905 }
2906
2907 static void
2908 trunc_prop_errs(int truncated)
2909 {
2910         ASSERT(truncated != 0);
2911
2912         if (truncated == 1)
2913                 (void) fprintf(stderr, dgettext(TEXT_DOMAIN,
2914                     "1 more property could not be set\n"));
2915         else
2916                 (void) fprintf(stderr, dgettext(TEXT_DOMAIN,
2917                     "%d more properties could not be set\n"), truncated);
2918 }
2919
2920 static int
2921 recv_skip(libzfs_handle_t *hdl, int fd, boolean_t byteswap)
2922 {
2923         dmu_replay_record_t *drr;
2924         void *buf = zfs_alloc(hdl, SPA_MAXBLOCKSIZE);
2925         char errbuf[1024];
2926
2927         (void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
2928             "cannot receive:"));
2929
2930         /* XXX would be great to use lseek if possible... */
2931         drr = buf;
2932
2933         while (recv_read(hdl, fd, drr, sizeof (dmu_replay_record_t),
2934             byteswap, NULL) == 0) {
2935                 if (byteswap)
2936                         drr->drr_type = BSWAP_32(drr->drr_type);
2937
2938                 switch (drr->drr_type) {
2939                 case DRR_BEGIN:
2940                         if (drr->drr_payloadlen != 0) {
2941                                 (void) recv_read(hdl, fd, buf,
2942                                     drr->drr_payloadlen, B_FALSE, NULL);
2943                         }
2944                         break;
2945
2946                 case DRR_END:
2947                         free(buf);
2948                         return (0);
2949
2950                 case DRR_OBJECT:
2951                         if (byteswap) {
2952                                 drr->drr_u.drr_object.drr_bonuslen =
2953                                     BSWAP_32(drr->drr_u.drr_object.
2954                                     drr_bonuslen);
2955                         }
2956                         (void) recv_read(hdl, fd, buf,
2957                             P2ROUNDUP(drr->drr_u.drr_object.drr_bonuslen, 8),
2958                             B_FALSE, NULL);
2959                         break;
2960
2961                 case DRR_WRITE:
2962                         if (byteswap) {
2963                                 drr->drr_u.drr_write.drr_length =
2964                                     BSWAP_64(drr->drr_u.drr_write.drr_length);
2965                         }
2966                         (void) recv_read(hdl, fd, buf,
2967                             drr->drr_u.drr_write.drr_length, B_FALSE, NULL);
2968                         break;
2969                 case DRR_SPILL:
2970                         if (byteswap) {
2971                                 drr->drr_u.drr_spill.drr_length =
2972                                     BSWAP_64(drr->drr_u.drr_spill.drr_length);
2973                         }
2974                         (void) recv_read(hdl, fd, buf,
2975                             drr->drr_u.drr_spill.drr_length, B_FALSE, NULL);
2976                         break;
2977                 case DRR_WRITE_EMBEDDED:
2978                         if (byteswap) {
2979                                 drr->drr_u.drr_write_embedded.drr_psize =
2980                                     BSWAP_32(drr->drr_u.drr_write_embedded.
2981                                     drr_psize);
2982                         }
2983                         (void) recv_read(hdl, fd, buf,
2984                             P2ROUNDUP(drr->drr_u.drr_write_embedded.drr_psize,
2985                             8), B_FALSE, NULL);
2986                         break;
2987                 case DRR_WRITE_BYREF:
2988                 case DRR_FREEOBJECTS:
2989                 case DRR_FREE:
2990                         break;
2991
2992                 default:
2993                         zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
2994                             "invalid record type"));
2995                         return (zfs_error(hdl, EZFS_BADSTREAM, errbuf));
2996                 }
2997         }
2998
2999         free(buf);
3000         return (-1);
3001 }
3002
3003 static void
3004 recv_ecksum_set_aux(libzfs_handle_t *hdl, const char *target_snap,
3005     boolean_t resumable)
3006 {
3007         char target_fs[ZFS_MAX_DATASET_NAME_LEN];
3008
3009         zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
3010             "checksum mismatch or incomplete stream"));
3011
3012         if (!resumable)
3013                 return;
3014         (void) strlcpy(target_fs, target_snap, sizeof (target_fs));
3015         *strchr(target_fs, '@') = '\0';
3016         zfs_handle_t *zhp = zfs_open(hdl, target_fs,
3017             ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME);
3018         if (zhp == NULL)
3019                 return;
3020
3021         char token_buf[ZFS_MAXPROPLEN];
3022         int error = zfs_prop_get(zhp, ZFS_PROP_RECEIVE_RESUME_TOKEN,
3023             token_buf, sizeof (token_buf),
3024             NULL, NULL, 0, B_TRUE);
3025         if (error == 0) {
3026                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
3027                     "checksum mismatch or incomplete stream.\n"
3028                     "Partially received snapshot is saved.\n"
3029                     "A resuming stream can be generated on the sending "
3030                     "system by running:\n"
3031                     "    zfs send -t %s"),
3032                     token_buf);
3033         }
3034         zfs_close(zhp);
3035 }
3036
3037 /*
3038  * Restores a backup of tosnap from the file descriptor specified by infd.
3039  */
3040 static int
3041 zfs_receive_one(libzfs_handle_t *hdl, int infd, const char *tosnap,
3042     const char *originsnap, recvflags_t *flags, dmu_replay_record_t *drr,
3043     dmu_replay_record_t *drr_noswap, const char *sendfs, nvlist_t *stream_nv,
3044     avl_tree_t *stream_avl, char **top_zfs, int cleanup_fd,
3045     uint64_t *action_handlep, const char *finalsnap)
3046 {
3047         zfs_cmd_t zc = { 0 };
3048         time_t begin_time;
3049         int ioctl_err, ioctl_errno, err;
3050         char *cp;
3051         struct drr_begin *drrb = &drr->drr_u.drr_begin;
3052         char errbuf[1024];
3053         char prop_errbuf[1024];
3054         const char *chopprefix;
3055         boolean_t newfs = B_FALSE;
3056         boolean_t stream_wantsnewfs;
3057         uint64_t parent_snapguid = 0;
3058         prop_changelist_t *clp = NULL;
3059         nvlist_t *snapprops_nvlist = NULL;
3060         zprop_errflags_t prop_errflags;
3061         boolean_t recursive;
3062         char *snapname = NULL;
3063
3064         begin_time = time(NULL);
3065
3066         (void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
3067             "cannot receive"));
3068
3069         recursive = (nvlist_lookup_boolean(stream_nv, "not_recursive") ==
3070             ENOENT);
3071
3072         if (stream_avl != NULL) {
3073                 nvlist_t *fs = fsavl_find(stream_avl, drrb->drr_toguid,
3074                     &snapname);
3075                 nvlist_t *props;
3076                 int ret;
3077
3078                 (void) nvlist_lookup_uint64(fs, "parentfromsnap",
3079                     &parent_snapguid);
3080                 err = nvlist_lookup_nvlist(fs, "props", &props);
3081                 if (err)
3082                         VERIFY(0 == nvlist_alloc(&props, NV_UNIQUE_NAME, 0));
3083
3084                 if (flags->canmountoff) {
3085                         VERIFY(0 == nvlist_add_uint64(props,
3086                             zfs_prop_to_name(ZFS_PROP_CANMOUNT), 0));
3087                 }
3088                 ret = zcmd_write_src_nvlist(hdl, &zc, props);
3089                 if (err)
3090                         nvlist_free(props);
3091
3092                 if (0 == nvlist_lookup_nvlist(fs, "snapprops", &props)) {
3093                         VERIFY(0 == nvlist_lookup_nvlist(props,
3094                             snapname, &snapprops_nvlist));
3095                 }
3096
3097                 if (ret != 0)
3098                         return (-1);
3099         }
3100
3101         cp = NULL;
3102
3103         /*
3104          * Determine how much of the snapshot name stored in the stream
3105          * we are going to tack on to the name they specified on the
3106          * command line, and how much we are going to chop off.
3107          *
3108          * If they specified a snapshot, chop the entire name stored in
3109          * the stream.
3110          */
3111         if (flags->istail) {
3112                 /*
3113                  * A filesystem was specified with -e. We want to tack on only
3114                  * the tail of the sent snapshot path.
3115                  */
3116                 if (strchr(tosnap, '@')) {
3117                         zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "invalid "
3118                             "argument - snapshot not allowed with -e"));
3119                         return (zfs_error(hdl, EZFS_INVALIDNAME, errbuf));
3120                 }
3121
3122                 chopprefix = strrchr(sendfs, '/');
3123
3124                 if (chopprefix == NULL) {
3125                         /*
3126                          * The tail is the poolname, so we need to
3127                          * prepend a path separator.
3128                          */
3129                         int len = strlen(drrb->drr_toname);
3130                         cp = malloc(len + 2);
3131                         cp[0] = '/';
3132                         (void) strcpy(&cp[1], drrb->drr_toname);
3133                         chopprefix = cp;
3134                 } else {
3135                         chopprefix = drrb->drr_toname + (chopprefix - sendfs);
3136                 }
3137         } else if (flags->isprefix) {
3138                 /*
3139                  * A filesystem was specified with -d. We want to tack on
3140                  * everything but the first element of the sent snapshot path
3141                  * (all but the pool name).
3142                  */
3143                 if (strchr(tosnap, '@')) {
3144                         zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "invalid "
3145                             "argument - snapshot not allowed with -d"));
3146                         return (zfs_error(hdl, EZFS_INVALIDNAME, errbuf));
3147                 }
3148
3149                 chopprefix = strchr(drrb->drr_toname, '/');
3150                 if (chopprefix == NULL)
3151                         chopprefix = strchr(drrb->drr_toname, '@');
3152         } else if (strchr(tosnap, '@') == NULL) {
3153                 /*
3154                  * If a filesystem was specified without -d or -e, we want to
3155                  * tack on everything after the fs specified by 'zfs send'.
3156                  */
3157                 chopprefix = drrb->drr_toname + strlen(sendfs);
3158         } else {
3159                 /* A snapshot was specified as an exact path (no -d or -e). */
3160                 if (recursive) {
3161                         zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
3162                             "cannot specify snapshot name for multi-snapshot "
3163                             "stream"));
3164                         return (zfs_error(hdl, EZFS_BADSTREAM, errbuf));
3165                 }
3166                 chopprefix = drrb->drr_toname + strlen(drrb->drr_toname);
3167         }
3168
3169         ASSERT(strstr(drrb->drr_toname, sendfs) == drrb->drr_toname);
3170         ASSERT(chopprefix > drrb->drr_toname);
3171         ASSERT(chopprefix <= drrb->drr_toname + strlen(drrb->drr_toname));
3172         ASSERT(chopprefix[0] == '/' || chopprefix[0] == '@' ||
3173             chopprefix[0] == '\0');
3174
3175         /*
3176          * Determine name of destination snapshot, store in zc_value.
3177          */
3178         (void) strcpy(zc.zc_value, tosnap);
3179         (void) strncat(zc.zc_value, chopprefix, sizeof (zc.zc_value));
3180 #ifdef __FreeBSD__
3181         if (zfs_ioctl_version == ZFS_IOCVER_UNDEF)
3182                 zfs_ioctl_version = get_zfs_ioctl_version();
3183         /*
3184          * For forward compatibility hide tosnap in zc_value
3185          */
3186         if (zfs_ioctl_version < ZFS_IOCVER_LZC)
3187                 (void) strcpy(zc.zc_value + strlen(zc.zc_value) + 1, tosnap);
3188 #endif
3189         free(cp);
3190         if (!zfs_name_valid(zc.zc_value, ZFS_TYPE_SNAPSHOT)) {
3191                 zcmd_free_nvlists(&zc);
3192                 return (zfs_error(hdl, EZFS_INVALIDNAME, errbuf));
3193         }
3194
3195         /*
3196          * Determine the name of the origin snapshot, store in zc_string.
3197          */
3198         if (originsnap) {
3199                 (void) strncpy(zc.zc_string, originsnap, sizeof (zc.zc_string));
3200                 if (flags->verbose)
3201                         (void) printf("using provided clone origin %s\n",
3202                             zc.zc_string);
3203         } else if (drrb->drr_flags & DRR_FLAG_CLONE) {
3204                 if (guid_to_name(hdl, zc.zc_value,
3205                     drrb->drr_fromguid, B_FALSE, zc.zc_string) != 0) {
3206                         zcmd_free_nvlists(&zc);
3207                         zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
3208                             "local origin for clone %s does not exist"),
3209                             zc.zc_value);
3210                         return (zfs_error(hdl, EZFS_NOENT, errbuf));
3211                 }
3212                 if (flags->verbose)
3213                         (void) printf("found clone origin %s\n", zc.zc_string);
3214         }
3215
3216         boolean_t resuming = DMU_GET_FEATUREFLAGS(drrb->drr_versioninfo) &
3217             DMU_BACKUP_FEATURE_RESUMING;
3218         stream_wantsnewfs = (drrb->drr_fromguid == 0 ||
3219             (drrb->drr_flags & DRR_FLAG_CLONE) || originsnap) && !resuming;
3220
3221         if (stream_wantsnewfs) {
3222                 /*
3223                  * if the parent fs does not exist, look for it based on
3224                  * the parent snap GUID
3225                  */
3226                 (void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
3227                     "cannot receive new filesystem stream"));
3228
3229                 (void) strcpy(zc.zc_name, zc.zc_value);
3230                 cp = strrchr(zc.zc_name, '/');
3231                 if (cp)
3232                         *cp = '\0';
3233                 if (cp &&
3234                     !zfs_dataset_exists(hdl, zc.zc_name, ZFS_TYPE_DATASET)) {
3235                         char suffix[ZFS_MAX_DATASET_NAME_LEN];
3236                         (void) strcpy(suffix, strrchr(zc.zc_value, '/'));
3237                         if (guid_to_name(hdl, zc.zc_name, parent_snapguid,
3238                             B_FALSE, zc.zc_value) == 0) {
3239                                 *strchr(zc.zc_value, '@') = '\0';
3240                                 (void) strcat(zc.zc_value, suffix);
3241                         }
3242                 }
3243         } else {
3244                 /*
3245                  * if the fs does not exist, look for it based on the
3246                  * fromsnap GUID
3247                  */
3248                 (void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
3249                     "cannot receive incremental stream"));
3250
3251                 (void) strcpy(zc.zc_name, zc.zc_value);
3252                 *strchr(zc.zc_name, '@') = '\0';
3253
3254                 /*
3255                  * If the exact receive path was specified and this is the
3256                  * topmost path in the stream, then if the fs does not exist we
3257                  * should look no further.
3258                  */
3259                 if ((flags->isprefix || (*(chopprefix = drrb->drr_toname +
3260                     strlen(sendfs)) != '\0' && *chopprefix != '@')) &&
3261                     !zfs_dataset_exists(hdl, zc.zc_name, ZFS_TYPE_DATASET)) {
3262                         char snap[ZFS_MAX_DATASET_NAME_LEN];
3263                         (void) strcpy(snap, strchr(zc.zc_value, '@'));
3264                         if (guid_to_name(hdl, zc.zc_name, drrb->drr_fromguid,
3265                             B_FALSE, zc.zc_value) == 0) {
3266                                 *strchr(zc.zc_value, '@') = '\0';
3267                                 (void) strcat(zc.zc_value, snap);
3268                         }
3269                 }
3270         }
3271
3272         (void) strcpy(zc.zc_name, zc.zc_value);
3273         *strchr(zc.zc_name, '@') = '\0';
3274
3275         if (zfs_dataset_exists(hdl, zc.zc_name, ZFS_TYPE_DATASET)) {
3276                 zfs_handle_t *zhp;
3277
3278                 /*
3279                  * Destination fs exists.  It must be one of these cases:
3280                  *  - an incremental send stream
3281                  *  - the stream specifies a new fs (full stream or clone)
3282                  *    and they want us to blow away the existing fs (and
3283                  *    have therefore specified -F and removed any snapshots)
3284                  *  - we are resuming a failed receive.
3285                  */
3286                 if (stream_wantsnewfs) {
3287                         if (!flags->force) {
3288                                 zcmd_free_nvlists(&zc);
3289                                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
3290                                     "destination '%s' exists\n"
3291                                     "must specify -F to overwrite it"),
3292                                     zc.zc_name);
3293                                 return (zfs_error(hdl, EZFS_EXISTS, errbuf));
3294                         }
3295                         if (ioctl(hdl->libzfs_fd, ZFS_IOC_SNAPSHOT_LIST_NEXT,
3296                             &zc) == 0) {
3297                                 zcmd_free_nvlists(&zc);
3298                                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
3299                                     "destination has snapshots (eg. %s)\n"
3300                                     "must destroy them to overwrite it"),
3301                                     zc.zc_name);
3302                                 return (zfs_error(hdl, EZFS_EXISTS, errbuf));
3303                         }
3304                 }
3305
3306                 if ((zhp = zfs_open(hdl, zc.zc_name,
3307                     ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME)) == NULL) {
3308                         zcmd_free_nvlists(&zc);
3309                         return (-1);
3310                 }
3311
3312                 if (stream_wantsnewfs &&
3313                     zhp->zfs_dmustats.dds_origin[0]) {
3314                         zcmd_free_nvlists(&zc);
3315                         zfs_close(zhp);
3316                         zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
3317                             "destination '%s' is a clone\n"
3318                             "must destroy it to overwrite it"),
3319                             zc.zc_name);
3320                         return (zfs_error(hdl, EZFS_EXISTS, errbuf));
3321                 }
3322
3323                 if (!flags->dryrun && zhp->zfs_type == ZFS_TYPE_FILESYSTEM &&
3324                     stream_wantsnewfs) {
3325                         /* We can't do online recv in this case */
3326                         clp = changelist_gather(zhp, ZFS_PROP_NAME, 0, 0);
3327                         if (clp == NULL) {
3328                                 zfs_close(zhp);
3329                                 zcmd_free_nvlists(&zc);
3330                                 return (-1);
3331                         }
3332                         if (changelist_prefix(clp) != 0) {
3333                                 changelist_free(clp);
3334                                 zfs_close(zhp);
3335                                 zcmd_free_nvlists(&zc);
3336                                 return (-1);
3337                         }
3338                 }
3339
3340                 /*
3341                  * If we are resuming a newfs, set newfs here so that we will
3342                  * mount it if the recv succeeds this time.  We can tell
3343                  * that it was a newfs on the first recv because the fs
3344                  * itself will be inconsistent (if the fs existed when we
3345                  * did the first recv, we would have received it into
3346                  * .../%recv).
3347                  */
3348                 if (resuming && zfs_prop_get_int(zhp, ZFS_PROP_INCONSISTENT))
3349                         newfs = B_TRUE;
3350
3351                 zfs_close(zhp);
3352         } else {
3353                 /*
3354                  * Destination filesystem does not exist.  Therefore we better
3355                  * be creating a new filesystem (either from a full backup, or
3356                  * a clone).  It would therefore be invalid if the user
3357                  * specified only the pool name (i.e. if the destination name
3358                  * contained no slash character).
3359                  */
3360                 if (!stream_wantsnewfs ||
3361                     (cp = strrchr(zc.zc_name, '/')) == NULL) {
3362                         zcmd_free_nvlists(&zc);
3363                         zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
3364                             "destination '%s' does not exist"), zc.zc_name);
3365                         return (zfs_error(hdl, EZFS_NOENT, errbuf));
3366                 }
3367
3368                 /*
3369                  * Trim off the final dataset component so we perform the
3370                  * recvbackup ioctl to the filesystems's parent.
3371                  */
3372                 *cp = '\0';
3373
3374                 if (flags->isprefix && !flags->istail && !flags->dryrun &&
3375                     create_parents(hdl, zc.zc_value, strlen(tosnap)) != 0) {
3376                         zcmd_free_nvlists(&zc);
3377                         return (zfs_error(hdl, EZFS_BADRESTORE, errbuf));
3378                 }
3379
3380                 newfs = B_TRUE;
3381         }
3382
3383         zc.zc_begin_record = *drr_noswap;
3384         zc.zc_cookie = infd;
3385         zc.zc_guid = flags->force;
3386         zc.zc_resumable = flags->resumable;
3387         if (flags->verbose) {
3388                 (void) printf("%s %s stream of %s into %s\n",
3389                     flags->dryrun ? "would receive" : "receiving",
3390                     drrb->drr_fromguid ? "incremental" : "full",
3391                     drrb->drr_toname, zc.zc_value);
3392                 (void) fflush(stdout);
3393         }
3394
3395         if (flags->dryrun) {
3396                 zcmd_free_nvlists(&zc);
3397                 return (recv_skip(hdl, infd, flags->byteswap));
3398         }
3399
3400         zc.zc_nvlist_dst = (uint64_t)(uintptr_t)prop_errbuf;
3401         zc.zc_nvlist_dst_size = sizeof (prop_errbuf);
3402         zc.zc_cleanup_fd = cleanup_fd;
3403         zc.zc_action_handle = *action_handlep;
3404
3405         err = ioctl_err = zfs_ioctl(hdl, ZFS_IOC_RECV, &zc);
3406         ioctl_errno = errno;
3407         prop_errflags = (zprop_errflags_t)zc.zc_obj;
3408
3409         if (err == 0) {
3410                 nvlist_t *prop_errors;
3411                 VERIFY(0 == nvlist_unpack((void *)(uintptr_t)zc.zc_nvlist_dst,
3412                     zc.zc_nvlist_dst_size, &prop_errors, 0));
3413
3414                 nvpair_t *prop_err = NULL;
3415
3416                 while ((prop_err = nvlist_next_nvpair(prop_errors,
3417                     prop_err)) != NULL) {
3418                         char tbuf[1024];
3419                         zfs_prop_t prop;
3420                         int intval;
3421
3422                         prop = zfs_name_to_prop(nvpair_name(prop_err));
3423                         (void) nvpair_value_int32(prop_err, &intval);
3424                         if (strcmp(nvpair_name(prop_err),
3425                             ZPROP_N_MORE_ERRORS) == 0) {
3426                                 trunc_prop_errs(intval);
3427                                 break;
3428                         } else if (snapname == NULL || finalsnap == NULL ||
3429                             strcmp(finalsnap, snapname) == 0 ||
3430                             strcmp(nvpair_name(prop_err),
3431                             zfs_prop_to_name(ZFS_PROP_REFQUOTA)) != 0) {
3432                                 /*
3433                                  * Skip the special case of, for example,
3434                                  * "refquota", errors on intermediate
3435                                  * snapshots leading up to a final one.
3436                                  * That's why we have all of the checks above.
3437                                  *
3438                                  * See zfs_ioctl.c's extract_delay_props() for
3439                                  * a list of props which can fail on
3440                                  * intermediate snapshots, but shouldn't
3441                                  * affect the overall receive.
3442                                  */
3443                                 (void) snprintf(tbuf, sizeof (tbuf),
3444                                     dgettext(TEXT_DOMAIN,
3445                                     "cannot receive %s property on %s"),
3446                                     nvpair_name(prop_err), zc.zc_name);
3447                                 zfs_setprop_error(hdl, prop, intval, tbuf);
3448                         }
3449                 }
3450                 nvlist_free(prop_errors);
3451         }
3452
3453         zc.zc_nvlist_dst = 0;
3454         zc.zc_nvlist_dst_size = 0;
3455         zcmd_free_nvlists(&zc);
3456
3457         if (err == 0 && snapprops_nvlist) {
3458                 zfs_cmd_t zc2 = { 0 };
3459
3460                 (void) strcpy(zc2.zc_name, zc.zc_value);
3461                 zc2.zc_cookie = B_TRUE; /* received */
3462                 if (zcmd_write_src_nvlist(hdl, &zc2, snapprops_nvlist) == 0) {
3463                         (void) zfs_ioctl(hdl, ZFS_IOC_SET_PROP, &zc2);
3464                         zcmd_free_nvlists(&zc2);
3465                 }
3466         }
3467
3468         if (err && (ioctl_errno == ENOENT || ioctl_errno == EEXIST)) {
3469                 /*
3470                  * It may be that this snapshot already exists,
3471                  * in which case we want to consume & ignore it
3472                  * rather than failing.
3473                  */
3474                 avl_tree_t *local_avl;
3475                 nvlist_t *local_nv, *fs;
3476                 cp = strchr(zc.zc_value, '@');
3477
3478                 /*
3479                  * XXX Do this faster by just iterating over snaps in
3480                  * this fs.  Also if zc_value does not exist, we will
3481                  * get a strange "does not exist" error message.
3482                  */
3483                 *cp = '\0';
3484                 if (gather_nvlist(hdl, zc.zc_value, NULL, NULL, B_FALSE,
3485                     B_FALSE, &local_nv, &local_avl) == 0) {
3486                         *cp = '@';
3487                         fs = fsavl_find(local_avl, drrb->drr_toguid, NULL);
3488                         fsavl_destroy(local_avl);
3489                         nvlist_free(local_nv);
3490
3491                         if (fs != NULL) {
3492                                 if (flags->verbose) {
3493                                         (void) printf("snap %s already exists; "
3494                                             "ignoring\n", zc.zc_value);
3495                                 }
3496                                 err = ioctl_err = recv_skip(hdl, infd,
3497                                     flags->byteswap);
3498                         }
3499                 }
3500                 *cp = '@';
3501         }
3502
3503         if (ioctl_err != 0) {
3504                 switch (ioctl_errno) {
3505                 case ENODEV:
3506                         cp = strchr(zc.zc_value, '@');
3507                         *cp = '\0';
3508                         zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
3509                             "most recent snapshot of %s does not\n"
3510                             "match incremental source"), zc.zc_value);
3511                         (void) zfs_error(hdl, EZFS_BADRESTORE, errbuf);
3512                         *cp = '@';
3513                         break;
3514                 case ETXTBSY:
3515                         zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
3516                             "destination %s has been modified\n"
3517                             "since most recent snapshot"), zc.zc_name);
3518                         (void) zfs_error(hdl, EZFS_BADRESTORE, errbuf);
3519                         break;
3520                 case EEXIST:
3521                         cp = strchr(zc.zc_value, '@');
3522                         if (newfs) {
3523                                 /* it's the containing fs that exists */
3524                                 *cp = '\0';
3525                         }
3526                         zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
3527                             "destination already exists"));
3528                         (void) zfs_error_fmt(hdl, EZFS_EXISTS,
3529                             dgettext(TEXT_DOMAIN, "cannot restore to %s"),
3530                             zc.zc_value);
3531                         *cp = '@';
3532                         break;
3533                 case EINVAL:
3534                         (void) zfs_error(hdl, EZFS_BADSTREAM, errbuf);
3535                         break;
3536                 case ECKSUM:
3537                         recv_ecksum_set_aux(hdl, zc.zc_value, flags->resumable);
3538                         (void) zfs_error(hdl, EZFS_BADSTREAM, errbuf);
3539                         break;
3540                 case ENOTSUP:
3541                         zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
3542                             "pool must be upgraded to receive this stream."));
3543                         (void) zfs_error(hdl, EZFS_BADVERSION, errbuf);
3544                         break;
3545                 case EDQUOT:
3546                         zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
3547                             "destination %s space quota exceeded"), zc.zc_name);
3548                         (void) zfs_error(hdl, EZFS_NOSPC, errbuf);
3549                         break;
3550                 default:
3551                         (void) zfs_standard_error(hdl, ioctl_errno, errbuf);
3552                 }
3553         }
3554
3555         /*
3556          * Mount the target filesystem (if created).  Also mount any
3557          * children of the target filesystem if we did a replication
3558          * receive (indicated by stream_avl being non-NULL).
3559          */
3560         cp = strchr(zc.zc_value, '@');
3561         if (cp && (ioctl_err == 0 || !newfs)) {
3562                 zfs_handle_t *h;
3563
3564                 *cp = '\0';
3565                 h = zfs_open(hdl, zc.zc_value,
3566                     ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME);
3567                 if (h != NULL) {
3568                         if (h->zfs_type == ZFS_TYPE_VOLUME) {
3569                                 *cp = '@';
3570                         } else if (newfs || stream_avl) {
3571                                 /*
3572                                  * Track the first/top of hierarchy fs,
3573                                  * for mounting and sharing later.
3574                                  */
3575                                 if (top_zfs && *top_zfs == NULL)
3576                                         *top_zfs = zfs_strdup(hdl, zc.zc_value);
3577                         }
3578                         zfs_close(h);
3579                 }
3580                 *cp = '@';
3581         }
3582
3583         if (clp) {
3584                 if (!flags->nomount)
3585                         err |= changelist_postfix(clp);
3586                 changelist_free(clp);
3587         }
3588
3589         if (prop_errflags & ZPROP_ERR_NOCLEAR) {
3590                 (void) fprintf(stderr, dgettext(TEXT_DOMAIN, "Warning: "
3591                     "failed to clear unreceived properties on %s"),
3592                     zc.zc_name);
3593                 (void) fprintf(stderr, "\n");
3594         }
3595         if (prop_errflags & ZPROP_ERR_NORESTORE) {
3596                 (void) fprintf(stderr, dgettext(TEXT_DOMAIN, "Warning: "
3597                     "failed to restore original properties on %s"),
3598                     zc.zc_name);
3599                 (void) fprintf(stderr, "\n");
3600         }
3601
3602         if (err || ioctl_err)
3603                 return (-1);
3604
3605         *action_handlep = zc.zc_action_handle;
3606
3607         if (flags->verbose) {
3608                 char buf1[64];
3609                 char buf2[64];
3610                 uint64_t bytes = zc.zc_cookie;
3611                 time_t delta = time(NULL) - begin_time;
3612                 if (delta == 0)
3613                         delta = 1;
3614                 zfs_nicenum(bytes, buf1, sizeof (buf1));
3615                 zfs_nicenum(bytes/delta, buf2, sizeof (buf1));
3616
3617                 (void) printf("received %sB stream in %lu seconds (%sB/sec)\n",
3618                     buf1, delta, buf2);
3619         }
3620
3621         return (0);
3622 }
3623
3624 static int
3625 zfs_receive_impl(libzfs_handle_t *hdl, const char *tosnap,
3626     const char *originsnap, recvflags_t *flags, int infd, const char *sendfs,
3627     nvlist_t *stream_nv, avl_tree_t *stream_avl, char **top_zfs, int cleanup_fd,
3628     uint64_t *action_handlep, const char *finalsnap)
3629 {
3630         int err;
3631         dmu_replay_record_t drr, drr_noswap;
3632         struct drr_begin *drrb = &drr.drr_u.drr_begin;
3633         char errbuf[1024];
3634         zio_cksum_t zcksum = { 0 };
3635         uint64_t featureflags;
3636         int hdrtype;
3637
3638         (void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
3639             "cannot receive"));
3640
3641         if (flags->isprefix &&
3642             !zfs_dataset_exists(hdl, tosnap, ZFS_TYPE_DATASET)) {
3643                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "specified fs "
3644                     "(%s) does not exist"), tosnap);
3645                 return (zfs_error(hdl, EZFS_NOENT, errbuf));
3646         }
3647         if (originsnap &&
3648             !zfs_dataset_exists(hdl, originsnap, ZFS_TYPE_DATASET)) {
3649                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "specified origin fs "
3650                     "(%s) does not exist"), originsnap);
3651                 return (zfs_error(hdl, EZFS_NOENT, errbuf));
3652         }
3653
3654         /* read in the BEGIN record */
3655         if (0 != (err = recv_read(hdl, infd, &drr, sizeof (drr), B_FALSE,
3656             &zcksum)))
3657                 return (err);
3658
3659         if (drr.drr_type == DRR_END || drr.drr_type == BSWAP_32(DRR_END)) {
3660                 /* It's the double end record at the end of a package */
3661                 return (ENODATA);
3662         }
3663
3664         /* the kernel needs the non-byteswapped begin record */
3665         drr_noswap = drr;
3666
3667         flags->byteswap = B_FALSE;
3668         if (drrb->drr_magic == BSWAP_64(DMU_BACKUP_MAGIC)) {
3669                 /*
3670                  * We computed the checksum in the wrong byteorder in
3671                  * recv_read() above; do it again correctly.
3672                  */
3673                 bzero(&zcksum, sizeof (zio_cksum_t));
3674                 fletcher_4_incremental_byteswap(&drr, sizeof (drr), &zcksum);
3675                 flags->byteswap = B_TRUE;
3676
3677                 drr.drr_type = BSWAP_32(drr.drr_type);
3678                 drr.drr_payloadlen = BSWAP_32(drr.drr_payloadlen);
3679                 drrb->drr_magic = BSWAP_64(drrb->drr_magic);
3680                 drrb->drr_versioninfo = BSWAP_64(drrb->drr_versioninfo);
3681                 drrb->drr_creation_time = BSWAP_64(drrb->drr_creation_time);
3682                 drrb->drr_type = BSWAP_32(drrb->drr_type);
3683                 drrb->drr_flags = BSWAP_32(drrb->drr_flags);
3684                 drrb->drr_toguid = BSWAP_64(drrb->drr_toguid);
3685                 drrb->drr_fromguid = BSWAP_64(drrb->drr_fromguid);
3686         }
3687
3688         if (drrb->drr_magic != DMU_BACKUP_MAGIC || drr.drr_type != DRR_BEGIN) {
3689                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "invalid "
3690                     "stream (bad magic number)"));
3691                 return (zfs_error(hdl, EZFS_BADSTREAM, errbuf));
3692         }
3693
3694         featureflags = DMU_GET_FEATUREFLAGS(drrb->drr_versioninfo);
3695         hdrtype = DMU_GET_STREAM_HDRTYPE(drrb->drr_versioninfo);
3696
3697         if (!DMU_STREAM_SUPPORTED(featureflags) ||
3698             (hdrtype != DMU_SUBSTREAM && hdrtype != DMU_COMPOUNDSTREAM)) {
3699                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
3700                     "stream has unsupported feature, feature flags = %lx"),
3701                     featureflags);
3702                 return (zfs_error(hdl, EZFS_BADSTREAM, errbuf));
3703         }
3704
3705         if (strchr(drrb->drr_toname, '@') == NULL) {
3706                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "invalid "
3707                     "stream (bad snapshot name)"));
3708                 return (zfs_error(hdl, EZFS_BADSTREAM, errbuf));
3709         }
3710
3711         if (DMU_GET_STREAM_HDRTYPE(drrb->drr_versioninfo) == DMU_SUBSTREAM) {
3712                 char nonpackage_sendfs[ZFS_MAX_DATASET_NAME_LEN];
3713                 if (sendfs == NULL) {
3714                         /*
3715                          * We were not called from zfs_receive_package(). Get
3716                          * the fs specified by 'zfs send'.
3717                          */
3718                         char *cp;
3719                         (void) strlcpy(nonpackage_sendfs,
3720                             drr.drr_u.drr_begin.drr_toname,
3721                             sizeof (nonpackage_sendfs));
3722                         if ((cp = strchr(nonpackage_sendfs, '@')) != NULL)
3723                                 *cp = '\0';
3724                         sendfs = nonpackage_sendfs;
3725                         VERIFY(finalsnap == NULL);
3726                 }
3727                 return (zfs_receive_one(hdl, infd, tosnap, originsnap, flags,
3728                     &drr, &drr_noswap, sendfs, stream_nv, stream_avl, top_zfs,
3729                     cleanup_fd, action_handlep, finalsnap));
3730         } else {
3731                 assert(DMU_GET_STREAM_HDRTYPE(drrb->drr_versioninfo) ==
3732                     DMU_COMPOUNDSTREAM);
3733                 return (zfs_receive_package(hdl, infd, tosnap, flags, &drr,
3734                     &zcksum, top_zfs, cleanup_fd, action_handlep));
3735         }
3736 }
3737
3738 /*
3739  * Restores a backup of tosnap from the file descriptor specified by infd.
3740  * Return 0 on total success, -2 if some things couldn't be
3741  * destroyed/renamed/promoted, -1 if some things couldn't be received.
3742  * (-1 will override -2, if -1 and the resumable flag was specified the
3743  * transfer can be resumed if the sending side supports it).
3744  */
3745 int
3746 zfs_receive(libzfs_handle_t *hdl, const char *tosnap, nvlist_t *props,
3747     recvflags_t *flags, int infd, avl_tree_t *stream_avl)
3748 {
3749         char *top_zfs = NULL;
3750         int err;
3751         int cleanup_fd;
3752         uint64_t action_handle = 0;
3753         char *originsnap = NULL;
3754         if (props) {
3755                 err = nvlist_lookup_string(props, "origin", &originsnap);
3756                 if (err && err != ENOENT)
3757                         return (err);
3758         }
3759
3760         cleanup_fd = open(ZFS_DEV, O_RDWR|O_EXCL);
3761         VERIFY(cleanup_fd >= 0);
3762
3763         err = zfs_receive_impl(hdl, tosnap, originsnap, flags, infd, NULL, NULL,
3764             stream_avl, &top_zfs, cleanup_fd, &action_handle, NULL);
3765
3766         VERIFY(0 == close(cleanup_fd));
3767
3768         if (err == 0 && !flags->nomount && top_zfs) {
3769                 zfs_handle_t *zhp;
3770                 prop_changelist_t *clp;
3771
3772                 zhp = zfs_open(hdl, top_zfs, ZFS_TYPE_FILESYSTEM);
3773                 if (zhp != NULL) {
3774                         clp = changelist_gather(zhp, ZFS_PROP_MOUNTPOINT,
3775                             CL_GATHER_MOUNT_ALWAYS, 0);
3776                         zfs_close(zhp);
3777                         if (clp != NULL) {
3778                                 /* mount and share received datasets */
3779                                 err = changelist_postfix(clp);
3780                                 changelist_free(clp);
3781                         }
3782                 }
3783                 if (zhp == NULL || clp == NULL || err)
3784                         err = -1;
3785         }
3786         if (top_zfs)
3787                 free(top_zfs);
3788
3789         return (err);
3790 }