]> CyberLeo.Net >> Repos - FreeBSD/FreeBSD.git/blob - lib/libzfs/libzfs_sendrecv.c
zfs send -p send properties only for snapshots that are actually sent
[FreeBSD/FreeBSD.git] / lib / libzfs / libzfs_sendrecv.c
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21
22 /*
23  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
24  * Copyright (c) 2011, 2014 by Delphix. All rights reserved.
25  * Copyright (c) 2012, Joyent, Inc. All rights reserved.
26  * Copyright (c) 2012 Pawel Jakub Dawidek <pawel@dawidek.net>.
27  * All rights reserved
28  * Copyright (c) 2013 Steven Hartland. All rights reserved.
29  */
30
31 #include <assert.h>
32 #include <ctype.h>
33 #include <errno.h>
34 #include <libintl.h>
35 #include <stdio.h>
36 #include <stdlib.h>
37 #include <strings.h>
38 #include <unistd.h>
39 #include <stddef.h>
40 #include <fcntl.h>
41 #include <sys/mount.h>
42 #include <sys/mntent.h>
43 #include <sys/mnttab.h>
44 #include <sys/avl.h>
45 #include <sys/debug.h>
46 #include <stddef.h>
47 #include <pthread.h>
48 #include <umem.h>
49 #include <time.h>
50
51 #include <libzfs.h>
52 #include <libzfs_core.h>
53
54 #include "zfs_namecheck.h"
55 #include "zfs_prop.h"
56 #include "zfs_fletcher.h"
57 #include "libzfs_impl.h"
58 #include <sys/zio_checksum.h>
59 #include <sys/ddt.h>
60 #include <sys/socket.h>
61
62 /* in libzfs_dataset.c */
63 extern void zfs_setprop_error(libzfs_handle_t *, zfs_prop_t, int, char *);
64
65 static int zfs_receive_impl(libzfs_handle_t *, const char *, recvflags_t *,
66     int, const char *, nvlist_t *, avl_tree_t *, char **, int, uint64_t *);
67
68 static const zio_cksum_t zero_cksum = { { 0 } };
69
70 typedef struct dedup_arg {
71         int     inputfd;
72         int     outputfd;
73         libzfs_handle_t  *dedup_hdl;
74 } dedup_arg_t;
75
76 typedef struct progress_arg {
77         zfs_handle_t *pa_zhp;
78         int pa_fd;
79         boolean_t pa_parsable;
80 } progress_arg_t;
81
82 typedef struct dataref {
83         uint64_t ref_guid;
84         uint64_t ref_object;
85         uint64_t ref_offset;
86 } dataref_t;
87
88 typedef struct dedup_entry {
89         struct dedup_entry      *dde_next;
90         zio_cksum_t dde_chksum;
91         uint64_t dde_prop;
92         dataref_t dde_ref;
93 } dedup_entry_t;
94
95 #define MAX_DDT_PHYSMEM_PERCENT         20
96 #define SMALLEST_POSSIBLE_MAX_DDT_MB            128
97
98 typedef struct dedup_table {
99         dedup_entry_t   **dedup_hash_array;
100         umem_cache_t    *ddecache;
101         uint64_t        max_ddt_size;  /* max dedup table size in bytes */
102         uint64_t        cur_ddt_size;  /* current dedup table size in bytes */
103         uint64_t        ddt_count;
104         int             numhashbits;
105         boolean_t       ddt_full;
106 } dedup_table_t;
107
108 static int
109 high_order_bit(uint64_t n)
110 {
111         int count;
112
113         for (count = 0; n != 0; count++)
114                 n >>= 1;
115         return (count);
116 }
117
118 static size_t
119 ssread(void *buf, size_t len, FILE *stream)
120 {
121         size_t outlen;
122
123         if ((outlen = fread(buf, len, 1, stream)) == 0)
124                 return (0);
125
126         return (outlen);
127 }
128
129 static void
130 ddt_hash_append(libzfs_handle_t *hdl, dedup_table_t *ddt, dedup_entry_t **ddepp,
131     zio_cksum_t *cs, uint64_t prop, dataref_t *dr)
132 {
133         dedup_entry_t   *dde;
134
135         if (ddt->cur_ddt_size >= ddt->max_ddt_size) {
136                 if (ddt->ddt_full == B_FALSE) {
137                         zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
138                             "Dedup table full.  Deduplication will continue "
139                             "with existing table entries"));
140                         ddt->ddt_full = B_TRUE;
141                 }
142                 return;
143         }
144
145         if ((dde = umem_cache_alloc(ddt->ddecache, UMEM_DEFAULT))
146             != NULL) {
147                 assert(*ddepp == NULL);
148                 dde->dde_next = NULL;
149                 dde->dde_chksum = *cs;
150                 dde->dde_prop = prop;
151                 dde->dde_ref = *dr;
152                 *ddepp = dde;
153                 ddt->cur_ddt_size += sizeof (dedup_entry_t);
154                 ddt->ddt_count++;
155         }
156 }
157
158 /*
159  * Using the specified dedup table, do a lookup for an entry with
160  * the checksum cs.  If found, return the block's reference info
161  * in *dr. Otherwise, insert a new entry in the dedup table, using
162  * the reference information specified by *dr.
163  *
164  * return value:  true - entry was found
165  *                false - entry was not found
166  */
167 static boolean_t
168 ddt_update(libzfs_handle_t *hdl, dedup_table_t *ddt, zio_cksum_t *cs,
169     uint64_t prop, dataref_t *dr)
170 {
171         uint32_t hashcode;
172         dedup_entry_t **ddepp;
173
174         hashcode = BF64_GET(cs->zc_word[0], 0, ddt->numhashbits);
175
176         for (ddepp = &(ddt->dedup_hash_array[hashcode]); *ddepp != NULL;
177             ddepp = &((*ddepp)->dde_next)) {
178                 if (ZIO_CHECKSUM_EQUAL(((*ddepp)->dde_chksum), *cs) &&
179                     (*ddepp)->dde_prop == prop) {
180                         *dr = (*ddepp)->dde_ref;
181                         return (B_TRUE);
182                 }
183         }
184         ddt_hash_append(hdl, ddt, ddepp, cs, prop, dr);
185         return (B_FALSE);
186 }
187
188 static int
189 cksum_and_write(const void *buf, uint64_t len, zio_cksum_t *zc, int outfd)
190 {
191         fletcher_4_incremental_native(buf, len, zc);
192         return (write(outfd, buf, len));
193 }
194
195 /*
196  * This function is started in a separate thread when the dedup option
197  * has been requested.  The main send thread determines the list of
198  * snapshots to be included in the send stream and makes the ioctl calls
199  * for each one.  But instead of having the ioctl send the output to the
200  * the output fd specified by the caller of zfs_send()), the
201  * ioctl is told to direct the output to a pipe, which is read by the
202  * alternate thread running THIS function.  This function does the
203  * dedup'ing by:
204  *  1. building a dedup table (the DDT)
205  *  2. doing checksums on each data block and inserting a record in the DDT
206  *  3. looking for matching checksums, and
207  *  4.  sending a DRR_WRITE_BYREF record instead of a write record whenever
208  *      a duplicate block is found.
209  * The output of this function then goes to the output fd requested
210  * by the caller of zfs_send().
211  */
212 static void *
213 cksummer(void *arg)
214 {
215         dedup_arg_t *dda = arg;
216         char *buf = malloc(1<<20);
217         dmu_replay_record_t thedrr;
218         dmu_replay_record_t *drr = &thedrr;
219         struct drr_begin *drrb = &thedrr.drr_u.drr_begin;
220         struct drr_end *drre = &thedrr.drr_u.drr_end;
221         struct drr_object *drro = &thedrr.drr_u.drr_object;
222         struct drr_write *drrw = &thedrr.drr_u.drr_write;
223         struct drr_spill *drrs = &thedrr.drr_u.drr_spill;
224         struct drr_write_embedded *drrwe = &thedrr.drr_u.drr_write_embedded;
225         FILE *ofp;
226         int outfd;
227         dmu_replay_record_t wbr_drr = {0};
228         struct drr_write_byref *wbr_drrr = &wbr_drr.drr_u.drr_write_byref;
229         dedup_table_t ddt;
230         zio_cksum_t stream_cksum;
231         uint64_t physmem = sysconf(_SC_PHYS_PAGES) * sysconf(_SC_PAGESIZE);
232         uint64_t numbuckets;
233
234         ddt.max_ddt_size =
235             MAX((physmem * MAX_DDT_PHYSMEM_PERCENT)/100,
236             SMALLEST_POSSIBLE_MAX_DDT_MB<<20);
237
238         numbuckets = ddt.max_ddt_size/(sizeof (dedup_entry_t));
239
240         /*
241          * numbuckets must be a power of 2.  Increase number to
242          * a power of 2 if necessary.
243          */
244         if (!ISP2(numbuckets))
245                 numbuckets = 1 << high_order_bit(numbuckets);
246
247         ddt.dedup_hash_array = calloc(numbuckets, sizeof (dedup_entry_t *));
248         ddt.ddecache = umem_cache_create("dde", sizeof (dedup_entry_t), 0,
249             NULL, NULL, NULL, NULL, NULL, 0);
250         ddt.cur_ddt_size = numbuckets * sizeof (dedup_entry_t *);
251         ddt.numhashbits = high_order_bit(numbuckets) - 1;
252         ddt.ddt_full = B_FALSE;
253
254         /* Initialize the write-by-reference block. */
255         wbr_drr.drr_type = DRR_WRITE_BYREF;
256         wbr_drr.drr_payloadlen = 0;
257
258         outfd = dda->outputfd;
259         ofp = fdopen(dda->inputfd, "r");
260         while (ssread(drr, sizeof (dmu_replay_record_t), ofp) != 0) {
261
262                 switch (drr->drr_type) {
263                 case DRR_BEGIN:
264                 {
265                         int     fflags;
266                         ZIO_SET_CHECKSUM(&stream_cksum, 0, 0, 0, 0);
267
268                         /* set the DEDUP feature flag for this stream */
269                         fflags = DMU_GET_FEATUREFLAGS(drrb->drr_versioninfo);
270                         fflags |= (DMU_BACKUP_FEATURE_DEDUP |
271                             DMU_BACKUP_FEATURE_DEDUPPROPS);
272                         DMU_SET_FEATUREFLAGS(drrb->drr_versioninfo, fflags);
273
274                         if (cksum_and_write(drr, sizeof (dmu_replay_record_t),
275                             &stream_cksum, outfd) == -1)
276                                 goto out;
277                         if (DMU_GET_STREAM_HDRTYPE(drrb->drr_versioninfo) ==
278                             DMU_COMPOUNDSTREAM && drr->drr_payloadlen != 0) {
279                                 int sz = drr->drr_payloadlen;
280
281                                 if (sz > 1<<20) {
282                                         free(buf);
283                                         buf = malloc(sz);
284                                 }
285                                 (void) ssread(buf, sz, ofp);
286                                 if (ferror(stdin))
287                                         perror("fread");
288                                 if (cksum_and_write(buf, sz, &stream_cksum,
289                                     outfd) == -1)
290                                         goto out;
291                         }
292                         break;
293                 }
294
295                 case DRR_END:
296                 {
297                         /* use the recalculated checksum */
298                         ZIO_SET_CHECKSUM(&drre->drr_checksum,
299                             stream_cksum.zc_word[0], stream_cksum.zc_word[1],
300                             stream_cksum.zc_word[2], stream_cksum.zc_word[3]);
301                         if ((write(outfd, drr,
302                             sizeof (dmu_replay_record_t))) == -1)
303                                 goto out;
304                         break;
305                 }
306
307                 case DRR_OBJECT:
308                 {
309                         if (cksum_and_write(drr, sizeof (dmu_replay_record_t),
310                             &stream_cksum, outfd) == -1)
311                                 goto out;
312                         if (drro->drr_bonuslen > 0) {
313                                 (void) ssread(buf,
314                                     P2ROUNDUP((uint64_t)drro->drr_bonuslen, 8),
315                                     ofp);
316                                 if (cksum_and_write(buf,
317                                     P2ROUNDUP((uint64_t)drro->drr_bonuslen, 8),
318                                     &stream_cksum, outfd) == -1)
319                                         goto out;
320                         }
321                         break;
322                 }
323
324                 case DRR_SPILL:
325                 {
326                         if (cksum_and_write(drr, sizeof (dmu_replay_record_t),
327                             &stream_cksum, outfd) == -1)
328                                 goto out;
329                         (void) ssread(buf, drrs->drr_length, ofp);
330                         if (cksum_and_write(buf, drrs->drr_length,
331                             &stream_cksum, outfd) == -1)
332                                 goto out;
333                         break;
334                 }
335
336                 case DRR_FREEOBJECTS:
337                 {
338                         if (cksum_and_write(drr, sizeof (dmu_replay_record_t),
339                             &stream_cksum, outfd) == -1)
340                                 goto out;
341                         break;
342                 }
343
344                 case DRR_WRITE:
345                 {
346                         dataref_t       dataref;
347
348                         (void) ssread(buf, drrw->drr_length, ofp);
349
350                         /*
351                          * Use the existing checksum if it's dedup-capable,
352                          * else calculate a SHA256 checksum for it.
353                          */
354
355                         if (ZIO_CHECKSUM_EQUAL(drrw->drr_key.ddk_cksum,
356                             zero_cksum) ||
357                             !DRR_IS_DEDUP_CAPABLE(drrw->drr_checksumflags)) {
358                                 zio_cksum_t tmpsha256;
359
360                                 zio_checksum_SHA256(buf,
361                                     drrw->drr_length, &tmpsha256);
362
363                                 drrw->drr_key.ddk_cksum.zc_word[0] =
364                                     BE_64(tmpsha256.zc_word[0]);
365                                 drrw->drr_key.ddk_cksum.zc_word[1] =
366                                     BE_64(tmpsha256.zc_word[1]);
367                                 drrw->drr_key.ddk_cksum.zc_word[2] =
368                                     BE_64(tmpsha256.zc_word[2]);
369                                 drrw->drr_key.ddk_cksum.zc_word[3] =
370                                     BE_64(tmpsha256.zc_word[3]);
371                                 drrw->drr_checksumtype = ZIO_CHECKSUM_SHA256;
372                                 drrw->drr_checksumflags = DRR_CHECKSUM_DEDUP;
373                         }
374
375                         dataref.ref_guid = drrw->drr_toguid;
376                         dataref.ref_object = drrw->drr_object;
377                         dataref.ref_offset = drrw->drr_offset;
378
379                         if (ddt_update(dda->dedup_hdl, &ddt,
380                             &drrw->drr_key.ddk_cksum, drrw->drr_key.ddk_prop,
381                             &dataref)) {
382                                 /* block already present in stream */
383                                 wbr_drrr->drr_object = drrw->drr_object;
384                                 wbr_drrr->drr_offset = drrw->drr_offset;
385                                 wbr_drrr->drr_length = drrw->drr_length;
386                                 wbr_drrr->drr_toguid = drrw->drr_toguid;
387                                 wbr_drrr->drr_refguid = dataref.ref_guid;
388                                 wbr_drrr->drr_refobject =
389                                     dataref.ref_object;
390                                 wbr_drrr->drr_refoffset =
391                                     dataref.ref_offset;
392
393                                 wbr_drrr->drr_checksumtype =
394                                     drrw->drr_checksumtype;
395                                 wbr_drrr->drr_checksumflags =
396                                     drrw->drr_checksumtype;
397                                 wbr_drrr->drr_key.ddk_cksum =
398                                     drrw->drr_key.ddk_cksum;
399                                 wbr_drrr->drr_key.ddk_prop =
400                                     drrw->drr_key.ddk_prop;
401
402                                 if (cksum_and_write(&wbr_drr,
403                                     sizeof (dmu_replay_record_t), &stream_cksum,
404                                     outfd) == -1)
405                                         goto out;
406                         } else {
407                                 /* block not previously seen */
408                                 if (cksum_and_write(drr,
409                                     sizeof (dmu_replay_record_t), &stream_cksum,
410                                     outfd) == -1)
411                                         goto out;
412                                 if (cksum_and_write(buf,
413                                     drrw->drr_length,
414                                     &stream_cksum, outfd) == -1)
415                                         goto out;
416                         }
417                         break;
418                 }
419
420                 case DRR_WRITE_EMBEDDED:
421                 {
422                         if (cksum_and_write(drr, sizeof (dmu_replay_record_t),
423                             &stream_cksum, outfd) == -1)
424                                 goto out;
425                         (void) ssread(buf,
426                             P2ROUNDUP((uint64_t)drrwe->drr_psize, 8), ofp);
427                         if (cksum_and_write(buf,
428                             P2ROUNDUP((uint64_t)drrwe->drr_psize, 8),
429                             &stream_cksum, outfd) == -1)
430                                 goto out;
431                         break;
432                 }
433
434                 case DRR_FREE:
435                 {
436                         if (cksum_and_write(drr, sizeof (dmu_replay_record_t),
437                             &stream_cksum, outfd) == -1)
438                                 goto out;
439                         break;
440                 }
441
442                 default:
443                         (void) printf("INVALID record type 0x%x\n",
444                             drr->drr_type);
445                         /* should never happen, so assert */
446                         assert(B_FALSE);
447                 }
448         }
449 out:
450         umem_cache_destroy(ddt.ddecache);
451         free(ddt.dedup_hash_array);
452         free(buf);
453         (void) fclose(ofp);
454
455         return (NULL);
456 }
457
458 /*
459  * Routines for dealing with the AVL tree of fs-nvlists
460  */
461 typedef struct fsavl_node {
462         avl_node_t fn_node;
463         nvlist_t *fn_nvfs;
464         char *fn_snapname;
465         uint64_t fn_guid;
466 } fsavl_node_t;
467
468 static int
469 fsavl_compare(const void *arg1, const void *arg2)
470 {
471         const fsavl_node_t *fn1 = arg1;
472         const fsavl_node_t *fn2 = arg2;
473
474         if (fn1->fn_guid > fn2->fn_guid)
475                 return (+1);
476         else if (fn1->fn_guid < fn2->fn_guid)
477                 return (-1);
478         else
479                 return (0);
480 }
481
482 /*
483  * Given the GUID of a snapshot, find its containing filesystem and
484  * (optionally) name.
485  */
486 static nvlist_t *
487 fsavl_find(avl_tree_t *avl, uint64_t snapguid, char **snapname)
488 {
489         fsavl_node_t fn_find;
490         fsavl_node_t *fn;
491
492         fn_find.fn_guid = snapguid;
493
494         fn = avl_find(avl, &fn_find, NULL);
495         if (fn) {
496                 if (snapname)
497                         *snapname = fn->fn_snapname;
498                 return (fn->fn_nvfs);
499         }
500         return (NULL);
501 }
502
503 static void
504 fsavl_destroy(avl_tree_t *avl)
505 {
506         fsavl_node_t *fn;
507         void *cookie;
508
509         if (avl == NULL)
510                 return;
511
512         cookie = NULL;
513         while ((fn = avl_destroy_nodes(avl, &cookie)) != NULL)
514                 free(fn);
515         avl_destroy(avl);
516         free(avl);
517 }
518
519 /*
520  * Given an nvlist, produce an avl tree of snapshots, ordered by guid
521  */
522 static avl_tree_t *
523 fsavl_create(nvlist_t *fss)
524 {
525         avl_tree_t *fsavl;
526         nvpair_t *fselem = NULL;
527
528         if ((fsavl = malloc(sizeof (avl_tree_t))) == NULL)
529                 return (NULL);
530
531         avl_create(fsavl, fsavl_compare, sizeof (fsavl_node_t),
532             offsetof(fsavl_node_t, fn_node));
533
534         while ((fselem = nvlist_next_nvpair(fss, fselem)) != NULL) {
535                 nvlist_t *nvfs, *snaps;
536                 nvpair_t *snapelem = NULL;
537
538                 VERIFY(0 == nvpair_value_nvlist(fselem, &nvfs));
539                 VERIFY(0 == nvlist_lookup_nvlist(nvfs, "snaps", &snaps));
540
541                 while ((snapelem =
542                     nvlist_next_nvpair(snaps, snapelem)) != NULL) {
543                         fsavl_node_t *fn;
544                         uint64_t guid;
545
546                         VERIFY(0 == nvpair_value_uint64(snapelem, &guid));
547                         if ((fn = malloc(sizeof (fsavl_node_t))) == NULL) {
548                                 fsavl_destroy(fsavl);
549                                 return (NULL);
550                         }
551                         fn->fn_nvfs = nvfs;
552                         fn->fn_snapname = nvpair_name(snapelem);
553                         fn->fn_guid = guid;
554
555                         /*
556                          * Note: if there are multiple snaps with the
557                          * same GUID, we ignore all but one.
558                          */
559                         if (avl_find(fsavl, fn, NULL) == NULL)
560                                 avl_add(fsavl, fn);
561                         else
562                                 free(fn);
563                 }
564         }
565
566         return (fsavl);
567 }
568
569 /*
570  * Routines for dealing with the giant nvlist of fs-nvlists, etc.
571  */
572 typedef struct send_data {
573         uint64_t parent_fromsnap_guid;
574         nvlist_t *parent_snaps;
575         nvlist_t *fss;
576         nvlist_t *snapprops;
577         const char *fromsnap;
578         const char *tosnap;
579         boolean_t recursive;
580         boolean_t seenfrom;
581         boolean_t seento;
582
583         /*
584          * The header nvlist is of the following format:
585          * {
586          *   "tosnap" -> string
587          *   "fromsnap" -> string (if incremental)
588          *   "fss" -> {
589          *      id -> {
590          *
591          *       "name" -> string (full name; for debugging)
592          *       "parentfromsnap" -> number (guid of fromsnap in parent)
593          *
594          *       "props" -> { name -> value (only if set here) }
595          *       "snaps" -> { name (lastname) -> number (guid) }
596          *       "snapprops" -> { name (lastname) -> { name -> value } }
597          *
598          *       "origin" -> number (guid) (if clone)
599          *       "sent" -> boolean (not on-disk)
600          *      }
601          *   }
602          * }
603          *
604          */
605 } send_data_t;
606
607 static void send_iterate_prop(zfs_handle_t *zhp, nvlist_t *nv);
608
609 static int
610 send_iterate_snap(zfs_handle_t *zhp, void *arg)
611 {
612         send_data_t *sd = arg;
613         uint64_t guid = zhp->zfs_dmustats.dds_guid;
614         char *snapname;
615         nvlist_t *nv;
616         boolean_t isfromsnap, istosnap;
617
618         snapname = strrchr(zhp->zfs_name, '@')+1;
619         isfromsnap = (sd->fromsnap != NULL &&
620             strcmp(sd->fromsnap, snapname) == 0);
621         istosnap = (sd->tosnap != NULL && (strcmp(sd->tosnap, snapname) == 0));
622
623         /*
624          * NB: if there is no fromsnap here (it's a newly created fs in
625          * an incremental replication), we will substitute the tosnap.
626          */
627         if (isfromsnap || (sd->parent_fromsnap_guid == 0 && istosnap)) {
628                 sd->parent_fromsnap_guid = guid;
629         }
630
631         if (!sd->recursive) {
632                 if (!sd->seenfrom && isfromsnap) {
633                         sd->seenfrom = B_TRUE;
634                         zfs_close(zhp);
635                         return (0);
636                 }
637
638                 if (sd->seento || !sd->seenfrom) {
639                         zfs_close(zhp);
640                         return (0);
641                 }
642
643                 if (istosnap)
644                         sd->seento = B_TRUE;
645         }
646
647         VERIFY(0 == nvlist_add_uint64(sd->parent_snaps, snapname, guid));
648
649         VERIFY(0 == nvlist_alloc(&nv, NV_UNIQUE_NAME, 0));
650         send_iterate_prop(zhp, nv);
651         VERIFY(0 == nvlist_add_nvlist(sd->snapprops, snapname, nv));
652         nvlist_free(nv);
653
654         zfs_close(zhp);
655         return (0);
656 }
657
658 static void
659 send_iterate_prop(zfs_handle_t *zhp, nvlist_t *nv)
660 {
661         nvpair_t *elem = NULL;
662
663         while ((elem = nvlist_next_nvpair(zhp->zfs_props, elem)) != NULL) {
664                 char *propname = nvpair_name(elem);
665                 zfs_prop_t prop = zfs_name_to_prop(propname);
666                 nvlist_t *propnv;
667
668                 if (!zfs_prop_user(propname)) {
669                         /*
670                          * Realistically, this should never happen.  However,
671                          * we want the ability to add DSL properties without
672                          * needing to make incompatible version changes.  We
673                          * need to ignore unknown properties to allow older
674                          * software to still send datasets containing these
675                          * properties, with the unknown properties elided.
676                          */
677                         if (prop == ZPROP_INVAL)
678                                 continue;
679
680                         if (zfs_prop_readonly(prop))
681                                 continue;
682                 }
683
684                 verify(nvpair_value_nvlist(elem, &propnv) == 0);
685                 if (prop == ZFS_PROP_QUOTA || prop == ZFS_PROP_RESERVATION ||
686                     prop == ZFS_PROP_REFQUOTA ||
687                     prop == ZFS_PROP_REFRESERVATION) {
688                         char *source;
689                         uint64_t value;
690                         verify(nvlist_lookup_uint64(propnv,
691                             ZPROP_VALUE, &value) == 0);
692                         if (zhp->zfs_type == ZFS_TYPE_SNAPSHOT)
693                                 continue;
694                         /*
695                          * May have no source before SPA_VERSION_RECVD_PROPS,
696                          * but is still modifiable.
697                          */
698                         if (nvlist_lookup_string(propnv,
699                             ZPROP_SOURCE, &source) == 0) {
700                                 if ((strcmp(source, zhp->zfs_name) != 0) &&
701                                     (strcmp(source,
702                                     ZPROP_SOURCE_VAL_RECVD) != 0))
703                                         continue;
704                         }
705                 } else {
706                         char *source;
707                         if (nvlist_lookup_string(propnv,
708                             ZPROP_SOURCE, &source) != 0)
709                                 continue;
710                         if ((strcmp(source, zhp->zfs_name) != 0) &&
711                             (strcmp(source, ZPROP_SOURCE_VAL_RECVD) != 0))
712                                 continue;
713                 }
714
715                 if (zfs_prop_user(propname) ||
716                     zfs_prop_get_type(prop) == PROP_TYPE_STRING) {
717                         char *value;
718                         verify(nvlist_lookup_string(propnv,
719                             ZPROP_VALUE, &value) == 0);
720                         VERIFY(0 == nvlist_add_string(nv, propname, value));
721                 } else {
722                         uint64_t value;
723                         verify(nvlist_lookup_uint64(propnv,
724                             ZPROP_VALUE, &value) == 0);
725                         VERIFY(0 == nvlist_add_uint64(nv, propname, value));
726                 }
727         }
728 }
729
730 /*
731  * recursively generate nvlists describing datasets.  See comment
732  * for the data structure send_data_t above for description of contents
733  * of the nvlist.
734  */
735 static int
736 send_iterate_fs(zfs_handle_t *zhp, void *arg)
737 {
738         send_data_t *sd = arg;
739         nvlist_t *nvfs, *nv;
740         int rv = 0;
741         uint64_t parent_fromsnap_guid_save = sd->parent_fromsnap_guid;
742         uint64_t guid = zhp->zfs_dmustats.dds_guid;
743         char guidstring[64];
744
745         VERIFY(0 == nvlist_alloc(&nvfs, NV_UNIQUE_NAME, 0));
746         VERIFY(0 == nvlist_add_string(nvfs, "name", zhp->zfs_name));
747         VERIFY(0 == nvlist_add_uint64(nvfs, "parentfromsnap",
748             sd->parent_fromsnap_guid));
749
750         if (zhp->zfs_dmustats.dds_origin[0]) {
751                 zfs_handle_t *origin = zfs_open(zhp->zfs_hdl,
752                     zhp->zfs_dmustats.dds_origin, ZFS_TYPE_SNAPSHOT);
753                 if (origin == NULL)
754                         return (-1);
755                 VERIFY(0 == nvlist_add_uint64(nvfs, "origin",
756                     origin->zfs_dmustats.dds_guid));
757         }
758
759         /* iterate over props */
760         VERIFY(0 == nvlist_alloc(&nv, NV_UNIQUE_NAME, 0));
761         send_iterate_prop(zhp, nv);
762         VERIFY(0 == nvlist_add_nvlist(nvfs, "props", nv));
763         nvlist_free(nv);
764
765         /* iterate over snaps, and set sd->parent_fromsnap_guid */
766         sd->parent_fromsnap_guid = 0;
767         VERIFY(0 == nvlist_alloc(&sd->parent_snaps, NV_UNIQUE_NAME, 0));
768         VERIFY(0 == nvlist_alloc(&sd->snapprops, NV_UNIQUE_NAME, 0));
769         (void) zfs_iter_snapshots_sorted(zhp, send_iterate_snap, sd);
770         VERIFY(0 == nvlist_add_nvlist(nvfs, "snaps", sd->parent_snaps));
771         VERIFY(0 == nvlist_add_nvlist(nvfs, "snapprops", sd->snapprops));
772         nvlist_free(sd->parent_snaps);
773         nvlist_free(sd->snapprops);
774
775         /* add this fs to nvlist */
776         (void) snprintf(guidstring, sizeof (guidstring),
777             "0x%llx", (longlong_t)guid);
778         VERIFY(0 == nvlist_add_nvlist(sd->fss, guidstring, nvfs));
779         nvlist_free(nvfs);
780
781         /* iterate over children */
782         if (sd->recursive)
783                 rv = zfs_iter_filesystems(zhp, send_iterate_fs, sd);
784
785         sd->parent_fromsnap_guid = parent_fromsnap_guid_save;
786
787         zfs_close(zhp);
788         return (rv);
789 }
790
791 static int
792 gather_nvlist(libzfs_handle_t *hdl, const char *fsname, const char *fromsnap,
793     const char *tosnap, boolean_t recursive, nvlist_t **nvlp, avl_tree_t **avlp)
794 {
795         zfs_handle_t *zhp;
796         send_data_t sd = { 0 };
797         int error;
798
799         zhp = zfs_open(hdl, fsname, ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME);
800         if (zhp == NULL)
801                 return (EZFS_BADTYPE);
802
803         VERIFY(0 == nvlist_alloc(&sd.fss, NV_UNIQUE_NAME, 0));
804         sd.fromsnap = fromsnap;
805         sd.tosnap = tosnap;
806         sd.recursive = recursive;
807
808         if ((error = send_iterate_fs(zhp, &sd)) != 0) {
809                 nvlist_free(sd.fss);
810                 if (avlp != NULL)
811                         *avlp = NULL;
812                 *nvlp = NULL;
813                 return (error);
814         }
815
816         if (avlp != NULL && (*avlp = fsavl_create(sd.fss)) == NULL) {
817                 nvlist_free(sd.fss);
818                 *nvlp = NULL;
819                 return (EZFS_NOMEM);
820         }
821
822         *nvlp = sd.fss;
823         return (0);
824 }
825
826 /*
827  * Routines specific to "zfs send"
828  */
829 typedef struct send_dump_data {
830         /* these are all just the short snapname (the part after the @) */
831         const char *fromsnap;
832         const char *tosnap;
833         char prevsnap[ZFS_MAXNAMELEN];
834         uint64_t prevsnap_obj;
835         boolean_t seenfrom, seento, replicate, doall, fromorigin;
836         boolean_t verbose, dryrun, parsable, progress, embed_data;
837         int outfd;
838         boolean_t err;
839         nvlist_t *fss;
840         nvlist_t *snapholds;
841         avl_tree_t *fsavl;
842         snapfilter_cb_t *filter_cb;
843         void *filter_cb_arg;
844         nvlist_t *debugnv;
845         char holdtag[ZFS_MAXNAMELEN];
846         int cleanup_fd;
847         uint64_t size;
848 } send_dump_data_t;
849
850 static int
851 estimate_ioctl(zfs_handle_t *zhp, uint64_t fromsnap_obj,
852     boolean_t fromorigin, uint64_t *sizep)
853 {
854         zfs_cmd_t zc = {"\0"};
855         libzfs_handle_t *hdl = zhp->zfs_hdl;
856
857         assert(zhp->zfs_type == ZFS_TYPE_SNAPSHOT);
858         assert(fromsnap_obj == 0 || !fromorigin);
859
860         (void) strlcpy(zc.zc_name, zhp->zfs_name, sizeof (zc.zc_name));
861         zc.zc_obj = fromorigin;
862         zc.zc_sendobj = zfs_prop_get_int(zhp, ZFS_PROP_OBJSETID);
863         zc.zc_fromobj = fromsnap_obj;
864         zc.zc_guid = 1;  /* estimate flag */
865
866         if (zfs_ioctl(zhp->zfs_hdl, ZFS_IOC_SEND, &zc) != 0) {
867                 char errbuf[1024];
868                 (void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
869                     "warning: cannot estimate space for '%s'"), zhp->zfs_name);
870
871                 switch (errno) {
872                 case EXDEV:
873                         zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
874                             "not an earlier snapshot from the same fs"));
875                         return (zfs_error(hdl, EZFS_CROSSTARGET, errbuf));
876
877                 case ENOENT:
878                         if (zfs_dataset_exists(hdl, zc.zc_name,
879                             ZFS_TYPE_SNAPSHOT)) {
880                                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
881                                     "incremental source (@%s) does not exist"),
882                                     zc.zc_value);
883                         }
884                         return (zfs_error(hdl, EZFS_NOENT, errbuf));
885
886                 case EDQUOT:
887                 case EFBIG:
888                 case EIO:
889                 case ENOLINK:
890                 case ENOSPC:
891                 case ENOSTR:
892                 case ENXIO:
893                 case EPIPE:
894                 case ERANGE:
895                 case EFAULT:
896                 case EROFS:
897                         zfs_error_aux(hdl, strerror(errno));
898                         return (zfs_error(hdl, EZFS_BADBACKUP, errbuf));
899
900                 default:
901                         return (zfs_standard_error(hdl, errno, errbuf));
902                 }
903         }
904
905         *sizep = zc.zc_objset_type;
906
907         return (0);
908 }
909
910 /*
911  * Dumps a backup of the given snapshot (incremental from fromsnap if it's not
912  * NULL) to the file descriptor specified by outfd.
913  */
914 static int
915 dump_ioctl(zfs_handle_t *zhp, const char *fromsnap, uint64_t fromsnap_obj,
916     boolean_t fromorigin, int outfd, enum lzc_send_flags flags,
917     nvlist_t *debugnv)
918 {
919         zfs_cmd_t zc = {"\0"};
920         libzfs_handle_t *hdl = zhp->zfs_hdl;
921         nvlist_t *thisdbg;
922
923         assert(zhp->zfs_type == ZFS_TYPE_SNAPSHOT);
924         assert(fromsnap_obj == 0 || !fromorigin);
925
926         (void) strlcpy(zc.zc_name, zhp->zfs_name, sizeof (zc.zc_name));
927         zc.zc_cookie = outfd;
928         zc.zc_obj = fromorigin;
929         zc.zc_sendobj = zfs_prop_get_int(zhp, ZFS_PROP_OBJSETID);
930         zc.zc_fromobj = fromsnap_obj;
931         zc.zc_flags = flags;
932
933         VERIFY(0 == nvlist_alloc(&thisdbg, NV_UNIQUE_NAME, 0));
934         if (fromsnap && fromsnap[0] != '\0') {
935                 VERIFY(0 == nvlist_add_string(thisdbg,
936                     "fromsnap", fromsnap));
937         }
938
939         if (zfs_ioctl(zhp->zfs_hdl, ZFS_IOC_SEND, &zc) != 0) {
940                 char errbuf[1024];
941                 (void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
942                     "warning: cannot send '%s'"), zhp->zfs_name);
943
944                 VERIFY(0 == nvlist_add_uint64(thisdbg, "error", errno));
945                 if (debugnv) {
946                         VERIFY(0 == nvlist_add_nvlist(debugnv,
947                             zhp->zfs_name, thisdbg));
948                 }
949                 nvlist_free(thisdbg);
950
951                 switch (errno) {
952                 case EXDEV:
953                         zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
954                             "not an earlier snapshot from the same fs"));
955                         return (zfs_error(hdl, EZFS_CROSSTARGET, errbuf));
956
957                 case ENOENT:
958                         if (zfs_dataset_exists(hdl, zc.zc_name,
959                             ZFS_TYPE_SNAPSHOT)) {
960                                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
961                                     "incremental source (@%s) does not exist"),
962                                     zc.zc_value);
963                         }
964                         return (zfs_error(hdl, EZFS_NOENT, errbuf));
965
966                 case EDQUOT:
967                 case EFBIG:
968                 case EIO:
969                 case ENOLINK:
970                 case ENOSPC:
971                 case ENOSTR:
972                 case ENXIO:
973                 case EPIPE:
974                 case ERANGE:
975                 case EFAULT:
976                 case EROFS:
977                         zfs_error_aux(hdl, strerror(errno));
978                         return (zfs_error(hdl, EZFS_BADBACKUP, errbuf));
979
980                 default:
981                         return (zfs_standard_error(hdl, errno, errbuf));
982                 }
983         }
984
985         if (debugnv)
986                 VERIFY(0 == nvlist_add_nvlist(debugnv, zhp->zfs_name, thisdbg));
987         nvlist_free(thisdbg);
988
989         return (0);
990 }
991
992 static void
993 gather_holds(zfs_handle_t *zhp, send_dump_data_t *sdd)
994 {
995         assert(zhp->zfs_type == ZFS_TYPE_SNAPSHOT);
996
997         /*
998          * zfs_send() only sets snapholds for sends that need them,
999          * e.g. replication and doall.
1000          */
1001         if (sdd->snapholds == NULL)
1002                 return;
1003
1004         fnvlist_add_string(sdd->snapholds, zhp->zfs_name, sdd->holdtag);
1005 }
1006
1007 static void *
1008 send_progress_thread(void *arg)
1009 {
1010         progress_arg_t *pa = arg;
1011
1012         zfs_cmd_t zc = {"\0"};
1013         zfs_handle_t *zhp = pa->pa_zhp;
1014         libzfs_handle_t *hdl = zhp->zfs_hdl;
1015         unsigned long long bytes;
1016         char buf[16];
1017
1018         time_t t;
1019         struct tm *tm;
1020
1021         assert(zhp->zfs_type == ZFS_TYPE_SNAPSHOT);
1022         (void) strlcpy(zc.zc_name, zhp->zfs_name, sizeof (zc.zc_name));
1023
1024         if (!pa->pa_parsable)
1025                 (void) fprintf(stderr, "TIME        SENT   SNAPSHOT\n");
1026
1027         /*
1028          * Print the progress from ZFS_IOC_SEND_PROGRESS every second.
1029          */
1030         for (;;) {
1031                 (void) sleep(1);
1032
1033                 zc.zc_cookie = pa->pa_fd;
1034                 if (zfs_ioctl(hdl, ZFS_IOC_SEND_PROGRESS, &zc) != 0)
1035                         return ((void *)-1);
1036
1037                 (void) time(&t);
1038                 tm = localtime(&t);
1039                 bytes = zc.zc_cookie;
1040
1041                 if (pa->pa_parsable) {
1042                         (void) fprintf(stderr, "%02d:%02d:%02d\t%llu\t%s\n",
1043                             tm->tm_hour, tm->tm_min, tm->tm_sec,
1044                             bytes, zhp->zfs_name);
1045                 } else {
1046                         zfs_nicenum(bytes, buf, sizeof (buf));
1047                         (void) fprintf(stderr, "%02d:%02d:%02d   %5s   %s\n",
1048                             tm->tm_hour, tm->tm_min, tm->tm_sec,
1049                             buf, zhp->zfs_name);
1050                 }
1051         }
1052 }
1053
1054 static int
1055 dump_snapshot(zfs_handle_t *zhp, void *arg)
1056 {
1057         send_dump_data_t *sdd = arg;
1058         progress_arg_t pa = { 0 };
1059         pthread_t tid;
1060         char *thissnap;
1061         int err;
1062         boolean_t isfromsnap, istosnap, fromorigin;
1063         boolean_t exclude = B_FALSE;
1064
1065         err = 0;
1066         thissnap = strchr(zhp->zfs_name, '@') + 1;
1067         isfromsnap = (sdd->fromsnap != NULL &&
1068             strcmp(sdd->fromsnap, thissnap) == 0);
1069
1070         if (!sdd->seenfrom && isfromsnap) {
1071                 gather_holds(zhp, sdd);
1072                 sdd->seenfrom = B_TRUE;
1073                 (void) strcpy(sdd->prevsnap, thissnap);
1074                 sdd->prevsnap_obj = zfs_prop_get_int(zhp, ZFS_PROP_OBJSETID);
1075                 zfs_close(zhp);
1076                 return (0);
1077         }
1078
1079         if (sdd->seento || !sdd->seenfrom) {
1080                 zfs_close(zhp);
1081                 return (0);
1082         }
1083
1084         istosnap = (strcmp(sdd->tosnap, thissnap) == 0);
1085         if (istosnap)
1086                 sdd->seento = B_TRUE;
1087
1088         if (!sdd->doall && !isfromsnap && !istosnap) {
1089                 if (sdd->replicate) {
1090                         char *snapname;
1091                         nvlist_t *snapprops;
1092                         /*
1093                          * Filter out all intermediate snapshots except origin
1094                          * snapshots needed to replicate clones.
1095                          */
1096                         nvlist_t *nvfs = fsavl_find(sdd->fsavl,
1097                             zhp->zfs_dmustats.dds_guid, &snapname);
1098
1099                         VERIFY(0 == nvlist_lookup_nvlist(nvfs,
1100                             "snapprops", &snapprops));
1101                         VERIFY(0 == nvlist_lookup_nvlist(snapprops,
1102                             thissnap, &snapprops));
1103                         exclude = !nvlist_exists(snapprops, "is_clone_origin");
1104                 } else {
1105                         exclude = B_TRUE;
1106                 }
1107         }
1108
1109         /*
1110          * If a filter function exists, call it to determine whether
1111          * this snapshot will be sent.
1112          */
1113         if (exclude || (sdd->filter_cb != NULL &&
1114             sdd->filter_cb(zhp, sdd->filter_cb_arg) == B_FALSE)) {
1115                 /*
1116                  * This snapshot is filtered out.  Don't send it, and don't
1117                  * set prevsnap_obj, so it will be as if this snapshot didn't
1118                  * exist, and the next accepted snapshot will be sent as
1119                  * an incremental from the last accepted one, or as the
1120                  * first (and full) snapshot in the case of a replication,
1121                  * non-incremental send.
1122                  */
1123                 zfs_close(zhp);
1124                 return (0);
1125         }
1126
1127         gather_holds(zhp, sdd);
1128         fromorigin = sdd->prevsnap[0] == '\0' &&
1129             (sdd->fromorigin || sdd->replicate);
1130
1131         if (sdd->verbose) {
1132                 uint64_t size;
1133                 err = estimate_ioctl(zhp, sdd->prevsnap_obj,
1134                     fromorigin, &size);
1135
1136                 if (sdd->parsable) {
1137                         if (sdd->prevsnap[0] != '\0') {
1138                                 (void) fprintf(stderr, "incremental\t%s\t%s",
1139                                     sdd->prevsnap, zhp->zfs_name);
1140                         } else {
1141                                 (void) fprintf(stderr, "full\t%s",
1142                                     zhp->zfs_name);
1143                         }
1144                 } else {
1145                         (void) fprintf(stderr, dgettext(TEXT_DOMAIN,
1146                             "send from @%s to %s"),
1147                             sdd->prevsnap, zhp->zfs_name);
1148                 }
1149                 if (err == 0) {
1150                         if (sdd->parsable) {
1151                                 (void) fprintf(stderr, "\t%llu\n",
1152                                     (longlong_t)size);
1153                         } else {
1154                                 char buf[16];
1155                                 zfs_nicenum(size, buf, sizeof (buf));
1156                                 (void) fprintf(stderr, dgettext(TEXT_DOMAIN,
1157                                     " estimated size is %s\n"), buf);
1158                         }
1159                         sdd->size += size;
1160                 } else {
1161                         (void) fprintf(stderr, "\n");
1162                 }
1163         }
1164
1165         if (!sdd->dryrun) {
1166                 /*
1167                  * If progress reporting is requested, spawn a new thread to
1168                  * poll ZFS_IOC_SEND_PROGRESS at a regular interval.
1169                  */
1170                 if (sdd->progress) {
1171                         pa.pa_zhp = zhp;
1172                         pa.pa_fd = sdd->outfd;
1173                         pa.pa_parsable = sdd->parsable;
1174
1175                         if ((err = pthread_create(&tid, NULL,
1176                             send_progress_thread, &pa))) {
1177                                 zfs_close(zhp);
1178                                 return (err);
1179                         }
1180                 }
1181
1182                 enum lzc_send_flags flags = 0;
1183                 if (sdd->embed_data)
1184                         flags |= LZC_SEND_FLAG_EMBED_DATA;
1185
1186                 err = dump_ioctl(zhp, sdd->prevsnap, sdd->prevsnap_obj,
1187                     fromorigin, sdd->outfd, flags, sdd->debugnv);
1188
1189                 if (sdd->progress) {
1190                         (void) pthread_cancel(tid);
1191                         (void) pthread_join(tid, NULL);
1192                 }
1193         }
1194
1195         (void) strcpy(sdd->prevsnap, thissnap);
1196         sdd->prevsnap_obj = zfs_prop_get_int(zhp, ZFS_PROP_OBJSETID);
1197         zfs_close(zhp);
1198         return (err);
1199 }
1200
1201 static int
1202 dump_filesystem(zfs_handle_t *zhp, void *arg)
1203 {
1204         int rv = 0;
1205         send_dump_data_t *sdd = arg;
1206         boolean_t missingfrom = B_FALSE;
1207         zfs_cmd_t zc = {"\0"};
1208
1209         (void) snprintf(zc.zc_name, sizeof (zc.zc_name), "%s@%s",
1210             zhp->zfs_name, sdd->tosnap);
1211         if (ioctl(zhp->zfs_hdl->libzfs_fd, ZFS_IOC_OBJSET_STATS, &zc) != 0) {
1212                 (void) fprintf(stderr, dgettext(TEXT_DOMAIN,
1213                     "WARNING: could not send %s@%s: does not exist\n"),
1214                     zhp->zfs_name, sdd->tosnap);
1215                 sdd->err = B_TRUE;
1216                 return (0);
1217         }
1218
1219         if (sdd->replicate && sdd->fromsnap) {
1220                 /*
1221                  * If this fs does not have fromsnap, and we're doing
1222                  * recursive, we need to send a full stream from the
1223                  * beginning (or an incremental from the origin if this
1224                  * is a clone).  If we're doing non-recursive, then let
1225                  * them get the error.
1226                  */
1227                 (void) snprintf(zc.zc_name, sizeof (zc.zc_name), "%s@%s",
1228                     zhp->zfs_name, sdd->fromsnap);
1229                 if (ioctl(zhp->zfs_hdl->libzfs_fd,
1230                     ZFS_IOC_OBJSET_STATS, &zc) != 0) {
1231                         missingfrom = B_TRUE;
1232                 }
1233         }
1234
1235         sdd->seenfrom = sdd->seento = sdd->prevsnap[0] = 0;
1236         sdd->prevsnap_obj = 0;
1237         if (sdd->fromsnap == NULL || missingfrom)
1238                 sdd->seenfrom = B_TRUE;
1239
1240         rv = zfs_iter_snapshots_sorted(zhp, dump_snapshot, arg);
1241         if (!sdd->seenfrom) {
1242                 (void) fprintf(stderr, dgettext(TEXT_DOMAIN,
1243                     "WARNING: could not send %s@%s:\n"
1244                     "incremental source (%s@%s) does not exist\n"),
1245                     zhp->zfs_name, sdd->tosnap,
1246                     zhp->zfs_name, sdd->fromsnap);
1247                 sdd->err = B_TRUE;
1248         } else if (!sdd->seento) {
1249                 if (sdd->fromsnap) {
1250                         (void) fprintf(stderr, dgettext(TEXT_DOMAIN,
1251                             "WARNING: could not send %s@%s:\n"
1252                             "incremental source (%s@%s) "
1253                             "is not earlier than it\n"),
1254                             zhp->zfs_name, sdd->tosnap,
1255                             zhp->zfs_name, sdd->fromsnap);
1256                 } else {
1257                         (void) fprintf(stderr, dgettext(TEXT_DOMAIN,
1258                             "WARNING: "
1259                             "could not send %s@%s: does not exist\n"),
1260                             zhp->zfs_name, sdd->tosnap);
1261                 }
1262                 sdd->err = B_TRUE;
1263         }
1264
1265         return (rv);
1266 }
1267
1268 static int
1269 dump_filesystems(zfs_handle_t *rzhp, void *arg)
1270 {
1271         send_dump_data_t *sdd = arg;
1272         nvpair_t *fspair;
1273         boolean_t needagain, progress;
1274
1275         if (!sdd->replicate)
1276                 return (dump_filesystem(rzhp, sdd));
1277
1278         /* Mark the clone origin snapshots. */
1279         for (fspair = nvlist_next_nvpair(sdd->fss, NULL); fspair;
1280             fspair = nvlist_next_nvpair(sdd->fss, fspair)) {
1281                 nvlist_t *nvfs;
1282                 uint64_t origin_guid = 0;
1283
1284                 VERIFY(0 == nvpair_value_nvlist(fspair, &nvfs));
1285                 (void) nvlist_lookup_uint64(nvfs, "origin", &origin_guid);
1286                 if (origin_guid != 0) {
1287                         char *snapname;
1288                         nvlist_t *origin_nv = fsavl_find(sdd->fsavl,
1289                             origin_guid, &snapname);
1290                         if (origin_nv != NULL) {
1291                                 nvlist_t *snapprops;
1292                                 VERIFY(0 == nvlist_lookup_nvlist(origin_nv,
1293                                     "snapprops", &snapprops));
1294                                 VERIFY(0 == nvlist_lookup_nvlist(snapprops,
1295                                     snapname, &snapprops));
1296                                 VERIFY(0 == nvlist_add_boolean(
1297                                     snapprops, "is_clone_origin"));
1298                         }
1299                 }
1300         }
1301 again:
1302         needagain = progress = B_FALSE;
1303         for (fspair = nvlist_next_nvpair(sdd->fss, NULL); fspair;
1304             fspair = nvlist_next_nvpair(sdd->fss, fspair)) {
1305                 nvlist_t *fslist, *parent_nv;
1306                 char *fsname;
1307                 zfs_handle_t *zhp;
1308                 int err;
1309                 uint64_t origin_guid = 0;
1310                 uint64_t parent_guid = 0;
1311
1312                 VERIFY(nvpair_value_nvlist(fspair, &fslist) == 0);
1313                 if (nvlist_lookup_boolean(fslist, "sent") == 0)
1314                         continue;
1315
1316                 VERIFY(nvlist_lookup_string(fslist, "name", &fsname) == 0);
1317                 (void) nvlist_lookup_uint64(fslist, "origin", &origin_guid);
1318                 (void) nvlist_lookup_uint64(fslist, "parentfromsnap",
1319                     &parent_guid);
1320
1321                 if (parent_guid != 0) {
1322                         parent_nv = fsavl_find(sdd->fsavl, parent_guid, NULL);
1323                         if (!nvlist_exists(parent_nv, "sent")) {
1324                                 /* parent has not been sent; skip this one */
1325                                 needagain = B_TRUE;
1326                                 continue;
1327                         }
1328                 }
1329
1330                 if (origin_guid != 0) {
1331                         nvlist_t *origin_nv = fsavl_find(sdd->fsavl,
1332                             origin_guid, NULL);
1333                         if (origin_nv != NULL &&
1334                             !nvlist_exists(origin_nv, "sent")) {
1335                                 /*
1336                                  * origin has not been sent yet;
1337                                  * skip this clone.
1338                                  */
1339                                 needagain = B_TRUE;
1340                                 continue;
1341                         }
1342                 }
1343
1344                 zhp = zfs_open(rzhp->zfs_hdl, fsname, ZFS_TYPE_DATASET);
1345                 if (zhp == NULL)
1346                         return (-1);
1347                 err = dump_filesystem(zhp, sdd);
1348                 VERIFY(nvlist_add_boolean(fslist, "sent") == 0);
1349                 progress = B_TRUE;
1350                 zfs_close(zhp);
1351                 if (err)
1352                         return (err);
1353         }
1354         if (needagain) {
1355                 assert(progress);
1356                 goto again;
1357         }
1358
1359         /* clean out the sent flags in case we reuse this fss */
1360         for (fspair = nvlist_next_nvpair(sdd->fss, NULL); fspair;
1361             fspair = nvlist_next_nvpair(sdd->fss, fspair)) {
1362                 nvlist_t *fslist;
1363
1364                 VERIFY(nvpair_value_nvlist(fspair, &fslist) == 0);
1365                 (void) nvlist_remove_all(fslist, "sent");
1366         }
1367
1368         return (0);
1369 }
1370
1371 /*
1372  * Generate a send stream for the dataset identified by the argument zhp.
1373  *
1374  * The content of the send stream is the snapshot identified by
1375  * 'tosnap'.  Incremental streams are requested in two ways:
1376  *     - from the snapshot identified by "fromsnap" (if non-null) or
1377  *     - from the origin of the dataset identified by zhp, which must
1378  *       be a clone.  In this case, "fromsnap" is null and "fromorigin"
1379  *       is TRUE.
1380  *
1381  * The send stream is recursive (i.e. dumps a hierarchy of snapshots) and
1382  * uses a special header (with a hdrtype field of DMU_COMPOUNDSTREAM)
1383  * if "replicate" is set.  If "doall" is set, dump all the intermediate
1384  * snapshots. The DMU_COMPOUNDSTREAM header is used in the "doall"
1385  * case too. If "props" is set, send properties.
1386  */
1387 int
1388 zfs_send(zfs_handle_t *zhp, const char *fromsnap, const char *tosnap,
1389     sendflags_t *flags, int outfd, snapfilter_cb_t filter_func,
1390     void *cb_arg, nvlist_t **debugnvp)
1391 {
1392         char errbuf[1024];
1393         send_dump_data_t sdd = { 0 };
1394         int err = 0;
1395         nvlist_t *fss = NULL;
1396         avl_tree_t *fsavl = NULL;
1397         static uint64_t holdseq;
1398         int spa_version;
1399         pthread_t tid = 0;
1400         int pipefd[2];
1401         dedup_arg_t dda = { 0 };
1402         int featureflags = 0;
1403
1404         (void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
1405             "cannot send '%s'"), zhp->zfs_name);
1406
1407         if (fromsnap && fromsnap[0] == '\0') {
1408                 zfs_error_aux(zhp->zfs_hdl, dgettext(TEXT_DOMAIN,
1409                     "zero-length incremental source"));
1410                 return (zfs_error(zhp->zfs_hdl, EZFS_NOENT, errbuf));
1411         }
1412
1413         if (zhp->zfs_type == ZFS_TYPE_FILESYSTEM) {
1414                 uint64_t version;
1415                 version = zfs_prop_get_int(zhp, ZFS_PROP_VERSION);
1416                 if (version >= ZPL_VERSION_SA) {
1417                         featureflags |= DMU_BACKUP_FEATURE_SA_SPILL;
1418                 }
1419         }
1420
1421         if (flags->dedup && !flags->dryrun) {
1422                 featureflags |= (DMU_BACKUP_FEATURE_DEDUP |
1423                     DMU_BACKUP_FEATURE_DEDUPPROPS);
1424                 if ((err = socketpair(AF_UNIX, SOCK_STREAM, 0, pipefd))) {
1425                         zfs_error_aux(zhp->zfs_hdl, strerror(errno));
1426                         return (zfs_error(zhp->zfs_hdl, EZFS_PIPEFAILED,
1427                             errbuf));
1428                 }
1429                 dda.outputfd = outfd;
1430                 dda.inputfd = pipefd[1];
1431                 dda.dedup_hdl = zhp->zfs_hdl;
1432                 if ((err = pthread_create(&tid, NULL, cksummer, &dda))) {
1433                         (void) close(pipefd[0]);
1434                         (void) close(pipefd[1]);
1435                         zfs_error_aux(zhp->zfs_hdl, strerror(errno));
1436                         return (zfs_error(zhp->zfs_hdl,
1437                             EZFS_THREADCREATEFAILED, errbuf));
1438                 }
1439         }
1440
1441         if (flags->replicate || flags->doall || flags->props) {
1442                 dmu_replay_record_t drr = { 0 };
1443                 char *packbuf = NULL;
1444                 size_t buflen = 0;
1445                 zio_cksum_t zc = { { 0 } };
1446
1447                 if (flags->replicate || flags->props) {
1448                         nvlist_t *hdrnv;
1449
1450                         VERIFY(0 == nvlist_alloc(&hdrnv, NV_UNIQUE_NAME, 0));
1451                         if (fromsnap) {
1452                                 VERIFY(0 == nvlist_add_string(hdrnv,
1453                                     "fromsnap", fromsnap));
1454                         }
1455                         VERIFY(0 == nvlist_add_string(hdrnv, "tosnap", tosnap));
1456                         if (!flags->replicate) {
1457                                 VERIFY(0 == nvlist_add_boolean(hdrnv,
1458                                     "not_recursive"));
1459                         }
1460
1461                         err = gather_nvlist(zhp->zfs_hdl, zhp->zfs_name,
1462                             fromsnap, tosnap, flags->replicate, &fss, &fsavl);
1463                         if (err)
1464                                 goto err_out;
1465                         VERIFY(0 == nvlist_add_nvlist(hdrnv, "fss", fss));
1466                         err = nvlist_pack(hdrnv, &packbuf, &buflen,
1467                             NV_ENCODE_XDR, 0);
1468                         if (debugnvp)
1469                                 *debugnvp = hdrnv;
1470                         else
1471                                 nvlist_free(hdrnv);
1472                         if (err)
1473                                 goto stderr_out;
1474                 }
1475
1476                 if (!flags->dryrun) {
1477                         /* write first begin record */
1478                         drr.drr_type = DRR_BEGIN;
1479                         drr.drr_u.drr_begin.drr_magic = DMU_BACKUP_MAGIC;
1480                         DMU_SET_STREAM_HDRTYPE(drr.drr_u.drr_begin.
1481                             drr_versioninfo, DMU_COMPOUNDSTREAM);
1482                         DMU_SET_FEATUREFLAGS(drr.drr_u.drr_begin.
1483                             drr_versioninfo, featureflags);
1484                         (void) snprintf(drr.drr_u.drr_begin.drr_toname,
1485                             sizeof (drr.drr_u.drr_begin.drr_toname),
1486                             "%s@%s", zhp->zfs_name, tosnap);
1487                         drr.drr_payloadlen = buflen;
1488                         err = cksum_and_write(&drr, sizeof (drr), &zc, outfd);
1489
1490                         /* write header nvlist */
1491                         if (err != -1 && packbuf != NULL) {
1492                                 err = cksum_and_write(packbuf, buflen, &zc,
1493                                     outfd);
1494                         }
1495                         free(packbuf);
1496                         if (err == -1) {
1497                                 err = errno;
1498                                 goto stderr_out;
1499                         }
1500
1501                         /* write end record */
1502                         bzero(&drr, sizeof (drr));
1503                         drr.drr_type = DRR_END;
1504                         drr.drr_u.drr_end.drr_checksum = zc;
1505                         err = write(outfd, &drr, sizeof (drr));
1506                         if (err == -1) {
1507                                 err = errno;
1508                                 goto stderr_out;
1509                         }
1510
1511                         err = 0;
1512                 }
1513         }
1514
1515         /* dump each stream */
1516         sdd.fromsnap = fromsnap;
1517         sdd.tosnap = tosnap;
1518         if (tid != 0)
1519                 sdd.outfd = pipefd[0];
1520         else
1521                 sdd.outfd = outfd;
1522         sdd.replicate = flags->replicate;
1523         sdd.doall = flags->doall;
1524         sdd.fromorigin = flags->fromorigin;
1525         sdd.fss = fss;
1526         sdd.fsavl = fsavl;
1527         sdd.verbose = flags->verbose;
1528         sdd.parsable = flags->parsable;
1529         sdd.progress = flags->progress;
1530         sdd.dryrun = flags->dryrun;
1531         sdd.embed_data = flags->embed_data;
1532         sdd.filter_cb = filter_func;
1533         sdd.filter_cb_arg = cb_arg;
1534         if (debugnvp)
1535                 sdd.debugnv = *debugnvp;
1536
1537         /*
1538          * Some flags require that we place user holds on the datasets that are
1539          * being sent so they don't get destroyed during the send. We can skip
1540          * this step if the pool is imported read-only since the datasets cannot
1541          * be destroyed.
1542          */
1543         if (!flags->dryrun && !zpool_get_prop_int(zfs_get_pool_handle(zhp),
1544             ZPOOL_PROP_READONLY, NULL) &&
1545             zfs_spa_version(zhp, &spa_version) == 0 &&
1546             spa_version >= SPA_VERSION_USERREFS &&
1547             (flags->doall || flags->replicate)) {
1548                 ++holdseq;
1549                 (void) snprintf(sdd.holdtag, sizeof (sdd.holdtag),
1550                     ".send-%d-%llu", getpid(), (u_longlong_t)holdseq);
1551                 sdd.cleanup_fd = open(ZFS_DEV, O_RDWR);
1552                 if (sdd.cleanup_fd < 0) {
1553                         err = errno;
1554                         goto stderr_out;
1555                 }
1556                 sdd.snapholds = fnvlist_alloc();
1557         } else {
1558                 sdd.cleanup_fd = -1;
1559                 sdd.snapholds = NULL;
1560         }
1561         if (flags->verbose || sdd.snapholds != NULL) {
1562                 /*
1563                  * Do a verbose no-op dry run to get all the verbose output
1564                  * or to gather snapshot hold's before generating any data,
1565                  * then do a non-verbose real run to generate the streams.
1566                  */
1567                 sdd.dryrun = B_TRUE;
1568                 err = dump_filesystems(zhp, &sdd);
1569
1570                 if (err != 0)
1571                         goto stderr_out;
1572
1573                 if (flags->verbose) {
1574                         if (flags->parsable) {
1575                                 (void) fprintf(stderr, "size\t%llu\n",
1576                                     (longlong_t)sdd.size);
1577                         } else {
1578                                 char buf[16];
1579                                 zfs_nicenum(sdd.size, buf, sizeof (buf));
1580                                 (void) fprintf(stderr, dgettext(TEXT_DOMAIN,
1581                                     "total estimated size is %s\n"), buf);
1582                         }
1583                 }
1584
1585                 /* Ensure no snaps found is treated as an error. */
1586                 if (!sdd.seento) {
1587                         err = ENOENT;
1588                         goto err_out;
1589                 }
1590
1591                 /* Skip the second run if dryrun was requested. */
1592                 if (flags->dryrun)
1593                         goto err_out;
1594
1595                 if (sdd.snapholds != NULL) {
1596                         err = zfs_hold_nvl(zhp, sdd.cleanup_fd, sdd.snapholds);
1597                         if (err != 0)
1598                                 goto stderr_out;
1599
1600                         fnvlist_free(sdd.snapholds);
1601                         sdd.snapholds = NULL;
1602                 }
1603
1604                 sdd.dryrun = B_FALSE;
1605                 sdd.verbose = B_FALSE;
1606         }
1607
1608         err = dump_filesystems(zhp, &sdd);
1609         fsavl_destroy(fsavl);
1610         nvlist_free(fss);
1611
1612         /* Ensure no snaps found is treated as an error. */
1613         if (err == 0 && !sdd.seento)
1614                 err = ENOENT;
1615
1616         if (tid != 0) {
1617                 if (err != 0)
1618                         (void) pthread_cancel(tid);
1619                 (void) close(pipefd[0]);
1620                 (void) pthread_join(tid, NULL);
1621         }
1622
1623         if (sdd.cleanup_fd != -1) {
1624                 VERIFY(0 == close(sdd.cleanup_fd));
1625                 sdd.cleanup_fd = -1;
1626         }
1627
1628         if (!flags->dryrun && (flags->replicate || flags->doall ||
1629             flags->props)) {
1630                 /*
1631                  * write final end record.  NB: want to do this even if
1632                  * there was some error, because it might not be totally
1633                  * failed.
1634                  */
1635                 dmu_replay_record_t drr = { 0 };
1636                 drr.drr_type = DRR_END;
1637                 if (write(outfd, &drr, sizeof (drr)) == -1) {
1638                         return (zfs_standard_error(zhp->zfs_hdl,
1639                             errno, errbuf));
1640                 }
1641         }
1642
1643         return (err || sdd.err);
1644
1645 stderr_out:
1646         err = zfs_standard_error(zhp->zfs_hdl, err, errbuf);
1647 err_out:
1648         fsavl_destroy(fsavl);
1649         nvlist_free(fss);
1650         fnvlist_free(sdd.snapholds);
1651
1652         if (sdd.cleanup_fd != -1)
1653                 VERIFY(0 == close(sdd.cleanup_fd));
1654         if (tid != 0) {
1655                 (void) pthread_cancel(tid);
1656                 (void) close(pipefd[0]);
1657                 (void) pthread_join(tid, NULL);
1658         }
1659         return (err);
1660 }
1661
1662 int
1663 zfs_send_one(zfs_handle_t *zhp, const char *from, int fd,
1664     enum lzc_send_flags flags)
1665 {
1666         int err;
1667         libzfs_handle_t *hdl = zhp->zfs_hdl;
1668
1669         char errbuf[1024];
1670         (void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
1671             "warning: cannot send '%s'"), zhp->zfs_name);
1672
1673         err = lzc_send(zhp->zfs_name, from, fd, flags);
1674         if (err != 0) {
1675                 switch (errno) {
1676                 case EXDEV:
1677                         zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1678                             "not an earlier snapshot from the same fs"));
1679                         return (zfs_error(hdl, EZFS_CROSSTARGET, errbuf));
1680
1681                 case ENOENT:
1682                 case ESRCH:
1683                         if (lzc_exists(zhp->zfs_name)) {
1684                                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1685                                     "incremental source (%s) does not exist"),
1686                                     from);
1687                         }
1688                         return (zfs_error(hdl, EZFS_NOENT, errbuf));
1689
1690                 case EBUSY:
1691                         zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1692                             "target is busy; if a filesystem, "
1693                             "it must not be mounted"));
1694                         return (zfs_error(hdl, EZFS_BUSY, errbuf));
1695
1696                 case EDQUOT:
1697                 case EFBIG:
1698                 case EIO:
1699                 case ENOLINK:
1700                 case ENOSPC:
1701                 case ENOSTR:
1702                 case ENXIO:
1703                 case EPIPE:
1704                 case ERANGE:
1705                 case EFAULT:
1706                 case EROFS:
1707                         zfs_error_aux(hdl, strerror(errno));
1708                         return (zfs_error(hdl, EZFS_BADBACKUP, errbuf));
1709
1710                 default:
1711                         return (zfs_standard_error(hdl, errno, errbuf));
1712                 }
1713         }
1714         return (err != 0);
1715 }
1716
1717 /*
1718  * Routines specific to "zfs recv"
1719  */
1720
1721 static int
1722 recv_read(libzfs_handle_t *hdl, int fd, void *buf, int ilen,
1723     boolean_t byteswap, zio_cksum_t *zc)
1724 {
1725         char *cp = buf;
1726         int rv;
1727         int len = ilen;
1728
1729         do {
1730                 rv = read(fd, cp, len);
1731                 cp += rv;
1732                 len -= rv;
1733         } while (rv > 0);
1734
1735         if (rv < 0 || len != 0) {
1736                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1737                     "failed to read from stream"));
1738                 return (zfs_error(hdl, EZFS_BADSTREAM, dgettext(TEXT_DOMAIN,
1739                     "cannot receive")));
1740         }
1741
1742         if (zc) {
1743                 if (byteswap)
1744                         fletcher_4_incremental_byteswap(buf, ilen, zc);
1745                 else
1746                         fletcher_4_incremental_native(buf, ilen, zc);
1747         }
1748         return (0);
1749 }
1750
1751 static int
1752 recv_read_nvlist(libzfs_handle_t *hdl, int fd, int len, nvlist_t **nvp,
1753     boolean_t byteswap, zio_cksum_t *zc)
1754 {
1755         char *buf;
1756         int err;
1757
1758         buf = zfs_alloc(hdl, len);
1759         if (buf == NULL)
1760                 return (ENOMEM);
1761
1762         err = recv_read(hdl, fd, buf, len, byteswap, zc);
1763         if (err != 0) {
1764                 free(buf);
1765                 return (err);
1766         }
1767
1768         err = nvlist_unpack(buf, len, nvp, 0);
1769         free(buf);
1770         if (err != 0) {
1771                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "invalid "
1772                     "stream (malformed nvlist)"));
1773                 return (EINVAL);
1774         }
1775         return (0);
1776 }
1777
1778 static int
1779 recv_rename(libzfs_handle_t *hdl, const char *name, const char *tryname,
1780     int baselen, char *newname, recvflags_t *flags)
1781 {
1782         static int seq;
1783         zfs_cmd_t zc = {"\0"};
1784         int err;
1785         prop_changelist_t *clp;
1786         zfs_handle_t *zhp;
1787
1788         zhp = zfs_open(hdl, name, ZFS_TYPE_DATASET);
1789         if (zhp == NULL)
1790                 return (-1);
1791         clp = changelist_gather(zhp, ZFS_PROP_NAME, 0,
1792             flags->force ? MS_FORCE : 0);
1793         zfs_close(zhp);
1794         if (clp == NULL)
1795                 return (-1);
1796         err = changelist_prefix(clp);
1797         if (err)
1798                 return (err);
1799
1800         zc.zc_objset_type = DMU_OST_ZFS;
1801         (void) strlcpy(zc.zc_name, name, sizeof (zc.zc_name));
1802
1803         if (tryname) {
1804                 (void) strcpy(newname, tryname);
1805
1806                 (void) strlcpy(zc.zc_value, tryname, sizeof (zc.zc_value));
1807
1808                 if (flags->verbose) {
1809                         (void) printf("attempting rename %s to %s\n",
1810                             zc.zc_name, zc.zc_value);
1811                 }
1812                 err = ioctl(hdl->libzfs_fd, ZFS_IOC_RENAME, &zc);
1813                 if (err == 0)
1814                         changelist_rename(clp, name, tryname);
1815         } else {
1816                 err = ENOENT;
1817         }
1818
1819         if (err != 0 && strncmp(name + baselen, "recv-", 5) != 0) {
1820                 seq++;
1821
1822                 (void) snprintf(newname, ZFS_MAXNAMELEN, "%.*srecv-%u-%u",
1823                     baselen, name, getpid(), seq);
1824                 (void) strlcpy(zc.zc_value, newname, sizeof (zc.zc_value));
1825
1826                 if (flags->verbose) {
1827                         (void) printf("failed - trying rename %s to %s\n",
1828                             zc.zc_name, zc.zc_value);
1829                 }
1830                 err = ioctl(hdl->libzfs_fd, ZFS_IOC_RENAME, &zc);
1831                 if (err == 0)
1832                         changelist_rename(clp, name, newname);
1833                 if (err && flags->verbose) {
1834                         (void) printf("failed (%u) - "
1835                             "will try again on next pass\n", errno);
1836                 }
1837                 err = EAGAIN;
1838         } else if (flags->verbose) {
1839                 if (err == 0)
1840                         (void) printf("success\n");
1841                 else
1842                         (void) printf("failed (%u)\n", errno);
1843         }
1844
1845         (void) changelist_postfix(clp);
1846         changelist_free(clp);
1847
1848         return (err);
1849 }
1850
1851 static int
1852 recv_destroy(libzfs_handle_t *hdl, const char *name, int baselen,
1853     char *newname, recvflags_t *flags)
1854 {
1855         zfs_cmd_t zc = {"\0"};
1856         int err = 0;
1857         prop_changelist_t *clp;
1858         zfs_handle_t *zhp;
1859         boolean_t defer = B_FALSE;
1860         int spa_version;
1861
1862         zhp = zfs_open(hdl, name, ZFS_TYPE_DATASET);
1863         if (zhp == NULL)
1864                 return (-1);
1865         clp = changelist_gather(zhp, ZFS_PROP_NAME, 0,
1866             flags->force ? MS_FORCE : 0);
1867         if (zfs_get_type(zhp) == ZFS_TYPE_SNAPSHOT &&
1868             zfs_spa_version(zhp, &spa_version) == 0 &&
1869             spa_version >= SPA_VERSION_USERREFS)
1870                 defer = B_TRUE;
1871         zfs_close(zhp);
1872         if (clp == NULL)
1873                 return (-1);
1874         err = changelist_prefix(clp);
1875         if (err)
1876                 return (err);
1877
1878         zc.zc_objset_type = DMU_OST_ZFS;
1879         zc.zc_defer_destroy = defer;
1880         (void) strlcpy(zc.zc_name, name, sizeof (zc.zc_name));
1881
1882         if (flags->verbose)
1883                 (void) printf("attempting destroy %s\n", zc.zc_name);
1884         err = ioctl(hdl->libzfs_fd, ZFS_IOC_DESTROY, &zc);
1885         if (err == 0) {
1886                 if (flags->verbose)
1887                         (void) printf("success\n");
1888                 changelist_remove(clp, zc.zc_name);
1889         }
1890
1891         (void) changelist_postfix(clp);
1892         changelist_free(clp);
1893
1894         /*
1895          * Deferred destroy might destroy the snapshot or only mark it to be
1896          * destroyed later, and it returns success in either case.
1897          */
1898         if (err != 0 || (defer && zfs_dataset_exists(hdl, name,
1899             ZFS_TYPE_SNAPSHOT))) {
1900                 err = recv_rename(hdl, name, NULL, baselen, newname, flags);
1901         }
1902
1903         return (err);
1904 }
1905
1906 typedef struct guid_to_name_data {
1907         uint64_t guid;
1908         char *name;
1909         char *skip;
1910 } guid_to_name_data_t;
1911
1912 static int
1913 guid_to_name_cb(zfs_handle_t *zhp, void *arg)
1914 {
1915         guid_to_name_data_t *gtnd = arg;
1916         int err;
1917
1918         if (gtnd->skip != NULL &&
1919             strcmp(zhp->zfs_name, gtnd->skip) == 0) {
1920                 return (0);
1921         }
1922
1923         if (zhp->zfs_dmustats.dds_guid == gtnd->guid) {
1924                 (void) strcpy(gtnd->name, zhp->zfs_name);
1925                 zfs_close(zhp);
1926                 return (EEXIST);
1927         }
1928
1929         err = zfs_iter_children(zhp, guid_to_name_cb, gtnd);
1930         zfs_close(zhp);
1931         return (err);
1932 }
1933
1934 /*
1935  * Attempt to find the local dataset associated with this guid.  In the case of
1936  * multiple matches, we attempt to find the "best" match by searching
1937  * progressively larger portions of the hierarchy.  This allows one to send a
1938  * tree of datasets individually and guarantee that we will find the source
1939  * guid within that hierarchy, even if there are multiple matches elsewhere.
1940  */
1941 static int
1942 guid_to_name(libzfs_handle_t *hdl, const char *parent, uint64_t guid,
1943     char *name)
1944 {
1945         /* exhaustive search all local snapshots */
1946         char pname[ZFS_MAXNAMELEN];
1947         guid_to_name_data_t gtnd;
1948         int err = 0;
1949         zfs_handle_t *zhp;
1950         char *cp;
1951
1952         gtnd.guid = guid;
1953         gtnd.name = name;
1954         gtnd.skip = NULL;
1955
1956         (void) strlcpy(pname, parent, sizeof (pname));
1957
1958         /*
1959          * Search progressively larger portions of the hierarchy.  This will
1960          * select the "most local" version of the origin snapshot in the case
1961          * that there are multiple matching snapshots in the system.
1962          */
1963         while ((cp = strrchr(pname, '/')) != NULL) {
1964
1965                 /* Chop off the last component and open the parent */
1966                 *cp = '\0';
1967                 zhp = make_dataset_handle(hdl, pname);
1968
1969                 if (zhp == NULL)
1970                         continue;
1971
1972                 err = zfs_iter_children(zhp, guid_to_name_cb, &gtnd);
1973                 zfs_close(zhp);
1974                 if (err == EEXIST)
1975                         return (0);
1976
1977                 /*
1978                  * Remember the dataset that we already searched, so we
1979                  * skip it next time through.
1980                  */
1981                 gtnd.skip = pname;
1982         }
1983
1984         return (ENOENT);
1985 }
1986
1987 /*
1988  * Return +1 if guid1 is before guid2, 0 if they are the same, and -1 if
1989  * guid1 is after guid2.
1990  */
1991 static int
1992 created_before(libzfs_handle_t *hdl, avl_tree_t *avl,
1993     uint64_t guid1, uint64_t guid2)
1994 {
1995         nvlist_t *nvfs;
1996         char *fsname, *snapname;
1997         char buf[ZFS_MAXNAMELEN];
1998         int rv;
1999         zfs_handle_t *guid1hdl, *guid2hdl;
2000         uint64_t create1, create2;
2001
2002         if (guid2 == 0)
2003                 return (0);
2004         if (guid1 == 0)
2005                 return (1);
2006
2007         nvfs = fsavl_find(avl, guid1, &snapname);
2008         VERIFY(0 == nvlist_lookup_string(nvfs, "name", &fsname));
2009         (void) snprintf(buf, sizeof (buf), "%s@%s", fsname, snapname);
2010         guid1hdl = zfs_open(hdl, buf, ZFS_TYPE_SNAPSHOT);
2011         if (guid1hdl == NULL)
2012                 return (-1);
2013
2014         nvfs = fsavl_find(avl, guid2, &snapname);
2015         VERIFY(0 == nvlist_lookup_string(nvfs, "name", &fsname));
2016         (void) snprintf(buf, sizeof (buf), "%s@%s", fsname, snapname);
2017         guid2hdl = zfs_open(hdl, buf, ZFS_TYPE_SNAPSHOT);
2018         if (guid2hdl == NULL) {
2019                 zfs_close(guid1hdl);
2020                 return (-1);
2021         }
2022
2023         create1 = zfs_prop_get_int(guid1hdl, ZFS_PROP_CREATETXG);
2024         create2 = zfs_prop_get_int(guid2hdl, ZFS_PROP_CREATETXG);
2025
2026         if (create1 < create2)
2027                 rv = -1;
2028         else if (create1 > create2)
2029                 rv = +1;
2030         else
2031                 rv = 0;
2032
2033         zfs_close(guid1hdl);
2034         zfs_close(guid2hdl);
2035
2036         return (rv);
2037 }
2038
2039 static int
2040 recv_incremental_replication(libzfs_handle_t *hdl, const char *tofs,
2041     recvflags_t *flags, nvlist_t *stream_nv, avl_tree_t *stream_avl,
2042     nvlist_t *renamed)
2043 {
2044         nvlist_t *local_nv, *deleted = NULL;
2045         avl_tree_t *local_avl;
2046         nvpair_t *fselem, *nextfselem;
2047         char *fromsnap;
2048         char newname[ZFS_MAXNAMELEN];
2049         char guidname[32];
2050         int error;
2051         boolean_t needagain, progress, recursive;
2052         char *s1, *s2;
2053
2054         VERIFY(0 == nvlist_lookup_string(stream_nv, "fromsnap", &fromsnap));
2055
2056         recursive = (nvlist_lookup_boolean(stream_nv, "not_recursive") ==
2057             ENOENT);
2058
2059         if (flags->dryrun)
2060                 return (0);
2061
2062 again:
2063         needagain = progress = B_FALSE;
2064
2065         VERIFY(0 == nvlist_alloc(&deleted, NV_UNIQUE_NAME, 0));
2066
2067         if ((error = gather_nvlist(hdl, tofs, fromsnap, NULL,
2068             recursive, &local_nv, &local_avl)) != 0)
2069                 return (error);
2070
2071         /*
2072          * Process deletes and renames
2073          */
2074         for (fselem = nvlist_next_nvpair(local_nv, NULL);
2075             fselem; fselem = nextfselem) {
2076                 nvlist_t *nvfs, *snaps;
2077                 nvlist_t *stream_nvfs = NULL;
2078                 nvpair_t *snapelem, *nextsnapelem;
2079                 uint64_t fromguid = 0;
2080                 uint64_t originguid = 0;
2081                 uint64_t stream_originguid = 0;
2082                 uint64_t parent_fromsnap_guid, stream_parent_fromsnap_guid;
2083                 char *fsname, *stream_fsname;
2084
2085                 nextfselem = nvlist_next_nvpair(local_nv, fselem);
2086
2087                 VERIFY(0 == nvpair_value_nvlist(fselem, &nvfs));
2088                 VERIFY(0 == nvlist_lookup_nvlist(nvfs, "snaps", &snaps));
2089                 VERIFY(0 == nvlist_lookup_string(nvfs, "name", &fsname));
2090                 VERIFY(0 == nvlist_lookup_uint64(nvfs, "parentfromsnap",
2091                     &parent_fromsnap_guid));
2092                 (void) nvlist_lookup_uint64(nvfs, "origin", &originguid);
2093
2094                 /*
2095                  * First find the stream's fs, so we can check for
2096                  * a different origin (due to "zfs promote")
2097                  */
2098                 for (snapelem = nvlist_next_nvpair(snaps, NULL);
2099                     snapelem; snapelem = nvlist_next_nvpair(snaps, snapelem)) {
2100                         uint64_t thisguid;
2101
2102                         VERIFY(0 == nvpair_value_uint64(snapelem, &thisguid));
2103                         stream_nvfs = fsavl_find(stream_avl, thisguid, NULL);
2104
2105                         if (stream_nvfs != NULL)
2106                                 break;
2107                 }
2108
2109                 /* check for promote */
2110                 (void) nvlist_lookup_uint64(stream_nvfs, "origin",
2111                     &stream_originguid);
2112                 if (stream_nvfs && originguid != stream_originguid) {
2113                         switch (created_before(hdl, local_avl,
2114                             stream_originguid, originguid)) {
2115                         case 1: {
2116                                 /* promote it! */
2117                                 zfs_cmd_t zc = {"\0"};
2118                                 nvlist_t *origin_nvfs;
2119                                 char *origin_fsname;
2120
2121                                 if (flags->verbose)
2122                                         (void) printf("promoting %s\n", fsname);
2123
2124                                 origin_nvfs = fsavl_find(local_avl, originguid,
2125                                     NULL);
2126                                 VERIFY(0 == nvlist_lookup_string(origin_nvfs,
2127                                     "name", &origin_fsname));
2128                                 (void) strlcpy(zc.zc_value, origin_fsname,
2129                                     sizeof (zc.zc_value));
2130                                 (void) strlcpy(zc.zc_name, fsname,
2131                                     sizeof (zc.zc_name));
2132                                 error = zfs_ioctl(hdl, ZFS_IOC_PROMOTE, &zc);
2133                                 if (error == 0)
2134                                         progress = B_TRUE;
2135                                 break;
2136                         }
2137                         default:
2138                                 break;
2139                         case -1:
2140                                 fsavl_destroy(local_avl);
2141                                 nvlist_free(local_nv);
2142                                 return (-1);
2143                         }
2144                         /*
2145                          * We had/have the wrong origin, therefore our
2146                          * list of snapshots is wrong.  Need to handle
2147                          * them on the next pass.
2148                          */
2149                         needagain = B_TRUE;
2150                         continue;
2151                 }
2152
2153                 for (snapelem = nvlist_next_nvpair(snaps, NULL);
2154                     snapelem; snapelem = nextsnapelem) {
2155                         uint64_t thisguid;
2156                         char *stream_snapname;
2157                         nvlist_t *found, *props;
2158
2159                         nextsnapelem = nvlist_next_nvpair(snaps, snapelem);
2160
2161                         VERIFY(0 == nvpair_value_uint64(snapelem, &thisguid));
2162                         found = fsavl_find(stream_avl, thisguid,
2163                             &stream_snapname);
2164
2165                         /* check for delete */
2166                         if (found == NULL) {
2167                                 char name[ZFS_MAXNAMELEN];
2168
2169                                 if (!flags->force)
2170                                         continue;
2171
2172                                 (void) snprintf(name, sizeof (name), "%s@%s",
2173                                     fsname, nvpair_name(snapelem));
2174
2175                                 error = recv_destroy(hdl, name,
2176                                     strlen(fsname)+1, newname, flags);
2177                                 if (error)
2178                                         needagain = B_TRUE;
2179                                 else
2180                                         progress = B_TRUE;
2181                                 sprintf(guidname, "%lu", thisguid);
2182                                 nvlist_add_boolean(deleted, guidname);
2183                                 continue;
2184                         }
2185
2186                         stream_nvfs = found;
2187
2188                         if (0 == nvlist_lookup_nvlist(stream_nvfs, "snapprops",
2189                             &props) && 0 == nvlist_lookup_nvlist(props,
2190                             stream_snapname, &props)) {
2191                                 zfs_cmd_t zc = {"\0"};
2192
2193                                 zc.zc_cookie = B_TRUE; /* received */
2194                                 (void) snprintf(zc.zc_name, sizeof (zc.zc_name),
2195                                     "%s@%s", fsname, nvpair_name(snapelem));
2196                                 if (zcmd_write_src_nvlist(hdl, &zc,
2197                                     props) == 0) {
2198                                         (void) zfs_ioctl(hdl,
2199                                             ZFS_IOC_SET_PROP, &zc);
2200                                         zcmd_free_nvlists(&zc);
2201                                 }
2202                         }
2203
2204                         /* check for different snapname */
2205                         if (strcmp(nvpair_name(snapelem),
2206                             stream_snapname) != 0) {
2207                                 char name[ZFS_MAXNAMELEN];
2208                                 char tryname[ZFS_MAXNAMELEN];
2209
2210                                 (void) snprintf(name, sizeof (name), "%s@%s",
2211                                     fsname, nvpair_name(snapelem));
2212                                 (void) snprintf(tryname, sizeof (name), "%s@%s",
2213                                     fsname, stream_snapname);
2214
2215                                 error = recv_rename(hdl, name, tryname,
2216                                     strlen(fsname)+1, newname, flags);
2217                                 if (error)
2218                                         needagain = B_TRUE;
2219                                 else
2220                                         progress = B_TRUE;
2221                         }
2222
2223                         if (strcmp(stream_snapname, fromsnap) == 0)
2224                                 fromguid = thisguid;
2225                 }
2226
2227                 /* check for delete */
2228                 if (stream_nvfs == NULL) {
2229                         if (!flags->force)
2230                                 continue;
2231
2232                         error = recv_destroy(hdl, fsname, strlen(tofs)+1,
2233                             newname, flags);
2234                         if (error)
2235                                 needagain = B_TRUE;
2236                         else
2237                                 progress = B_TRUE;
2238                         sprintf(guidname, "%lu", parent_fromsnap_guid);
2239                         nvlist_add_boolean(deleted, guidname);
2240                         continue;
2241                 }
2242
2243                 if (fromguid == 0) {
2244                         if (flags->verbose) {
2245                                 (void) printf("local fs %s does not have "
2246                                     "fromsnap (%s in stream); must have "
2247                                     "been deleted locally; ignoring\n",
2248                                     fsname, fromsnap);
2249                         }
2250                         continue;
2251                 }
2252
2253                 VERIFY(0 == nvlist_lookup_string(stream_nvfs,
2254                     "name", &stream_fsname));
2255                 VERIFY(0 == nvlist_lookup_uint64(stream_nvfs,
2256                     "parentfromsnap", &stream_parent_fromsnap_guid));
2257
2258                 s1 = strrchr(fsname, '/');
2259                 s2 = strrchr(stream_fsname, '/');
2260
2261                 /*
2262                  * Check if we're going to rename based on parent guid change
2263                  * and the current parent guid was also deleted. If it was then
2264                  * rename will fail and is likely unneeded, so avoid this and
2265                  * force an early retry to determine the new
2266                  * parent_fromsnap_guid.
2267                  */
2268                 if (stream_parent_fromsnap_guid != 0 &&
2269                     parent_fromsnap_guid != 0 &&
2270                     stream_parent_fromsnap_guid != parent_fromsnap_guid) {
2271                         sprintf(guidname, "%lu", parent_fromsnap_guid);
2272                         if (nvlist_exists(deleted, guidname)) {
2273                                 progress = B_TRUE;
2274                                 needagain = B_TRUE;
2275                                 goto doagain;
2276                         }
2277                 }
2278
2279                 /*
2280                  * Check for rename. If the exact receive path is specified, it
2281                  * does not count as a rename, but we still need to check the
2282                  * datasets beneath it.
2283                  */
2284                 if ((stream_parent_fromsnap_guid != 0 &&
2285                     parent_fromsnap_guid != 0 &&
2286                     stream_parent_fromsnap_guid != parent_fromsnap_guid) ||
2287                     ((flags->isprefix || strcmp(tofs, fsname) != 0) &&
2288                     (s1 != NULL) && (s2 != NULL) && strcmp(s1, s2) != 0)) {
2289                         nvlist_t *parent;
2290                         char tryname[ZFS_MAXNAMELEN];
2291
2292                         parent = fsavl_find(local_avl,
2293                             stream_parent_fromsnap_guid, NULL);
2294                         /*
2295                          * NB: parent might not be found if we used the
2296                          * tosnap for stream_parent_fromsnap_guid,
2297                          * because the parent is a newly-created fs;
2298                          * we'll be able to rename it after we recv the
2299                          * new fs.
2300                          */
2301                         if (parent != NULL) {
2302                                 char *pname;
2303
2304                                 VERIFY(0 == nvlist_lookup_string(parent, "name",
2305                                     &pname));
2306                                 (void) snprintf(tryname, sizeof (tryname),
2307                                     "%s%s", pname, strrchr(stream_fsname, '/'));
2308                         } else {
2309                                 tryname[0] = '\0';
2310                                 if (flags->verbose) {
2311                                         (void) printf("local fs %s new parent "
2312                                             "not found\n", fsname);
2313                                 }
2314                         }
2315
2316                         newname[0] = '\0';
2317
2318                         error = recv_rename(hdl, fsname, tryname,
2319                             strlen(tofs)+1, newname, flags);
2320
2321                         if (renamed != NULL && newname[0] != '\0') {
2322                                 VERIFY(0 == nvlist_add_boolean(renamed,
2323                                     newname));
2324                         }
2325
2326                         if (error)
2327                                 needagain = B_TRUE;
2328                         else
2329                                 progress = B_TRUE;
2330                 }
2331         }
2332
2333 doagain:
2334         fsavl_destroy(local_avl);
2335         nvlist_free(local_nv);
2336         nvlist_free(deleted);
2337
2338         if (needagain && progress) {
2339                 /* do another pass to fix up temporary names */
2340                 if (flags->verbose)
2341                         (void) printf("another pass:\n");
2342                 goto again;
2343         }
2344
2345         return (needagain);
2346 }
2347
2348 static int
2349 zfs_receive_package(libzfs_handle_t *hdl, int fd, const char *destname,
2350     recvflags_t *flags, dmu_replay_record_t *drr, zio_cksum_t *zc,
2351     char **top_zfs, int cleanup_fd, uint64_t *action_handlep)
2352 {
2353         nvlist_t *stream_nv = NULL;
2354         avl_tree_t *stream_avl = NULL;
2355         char *fromsnap = NULL;
2356         char *cp;
2357         char tofs[ZFS_MAXNAMELEN];
2358         char sendfs[ZFS_MAXNAMELEN];
2359         char errbuf[1024];
2360         dmu_replay_record_t drre;
2361         int error;
2362         boolean_t anyerr = B_FALSE;
2363         boolean_t softerr = B_FALSE;
2364         boolean_t recursive;
2365
2366         (void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
2367             "cannot receive"));
2368
2369         assert(drr->drr_type == DRR_BEGIN);
2370         assert(drr->drr_u.drr_begin.drr_magic == DMU_BACKUP_MAGIC);
2371         assert(DMU_GET_STREAM_HDRTYPE(drr->drr_u.drr_begin.drr_versioninfo) ==
2372             DMU_COMPOUNDSTREAM);
2373
2374         /*
2375          * Read in the nvlist from the stream.
2376          */
2377         if (drr->drr_payloadlen != 0) {
2378                 error = recv_read_nvlist(hdl, fd, drr->drr_payloadlen,
2379                     &stream_nv, flags->byteswap, zc);
2380                 if (error) {
2381                         error = zfs_error(hdl, EZFS_BADSTREAM, errbuf);
2382                         goto out;
2383                 }
2384         }
2385
2386         recursive = (nvlist_lookup_boolean(stream_nv, "not_recursive") ==
2387             ENOENT);
2388
2389         if (recursive && strchr(destname, '@')) {
2390                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
2391                     "cannot specify snapshot name for multi-snapshot stream"));
2392                 error = zfs_error(hdl, EZFS_BADSTREAM, errbuf);
2393                 goto out;
2394         }
2395
2396         /*
2397          * Read in the end record and verify checksum.
2398          */
2399         if (0 != (error = recv_read(hdl, fd, &drre, sizeof (drre),
2400             flags->byteswap, NULL)))
2401                 goto out;
2402         if (flags->byteswap) {
2403                 drre.drr_type = BSWAP_32(drre.drr_type);
2404                 drre.drr_u.drr_end.drr_checksum.zc_word[0] =
2405                     BSWAP_64(drre.drr_u.drr_end.drr_checksum.zc_word[0]);
2406                 drre.drr_u.drr_end.drr_checksum.zc_word[1] =
2407                     BSWAP_64(drre.drr_u.drr_end.drr_checksum.zc_word[1]);
2408                 drre.drr_u.drr_end.drr_checksum.zc_word[2] =
2409                     BSWAP_64(drre.drr_u.drr_end.drr_checksum.zc_word[2]);
2410                 drre.drr_u.drr_end.drr_checksum.zc_word[3] =
2411                     BSWAP_64(drre.drr_u.drr_end.drr_checksum.zc_word[3]);
2412         }
2413         if (drre.drr_type != DRR_END) {
2414                 error = zfs_error(hdl, EZFS_BADSTREAM, errbuf);
2415                 goto out;
2416         }
2417         if (!ZIO_CHECKSUM_EQUAL(drre.drr_u.drr_end.drr_checksum, *zc)) {
2418                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
2419                     "incorrect header checksum"));
2420                 error = zfs_error(hdl, EZFS_BADSTREAM, errbuf);
2421                 goto out;
2422         }
2423
2424         (void) nvlist_lookup_string(stream_nv, "fromsnap", &fromsnap);
2425
2426         if (drr->drr_payloadlen != 0) {
2427                 nvlist_t *stream_fss;
2428
2429                 VERIFY(0 == nvlist_lookup_nvlist(stream_nv, "fss",
2430                     &stream_fss));
2431                 if ((stream_avl = fsavl_create(stream_fss)) == NULL) {
2432                         zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
2433                             "couldn't allocate avl tree"));
2434                         error = zfs_error(hdl, EZFS_NOMEM, errbuf);
2435                         goto out;
2436                 }
2437
2438                 if (fromsnap != NULL) {
2439                         nvlist_t *renamed = NULL;
2440                         nvpair_t *pair = NULL;
2441
2442                         (void) strlcpy(tofs, destname, ZFS_MAXNAMELEN);
2443                         if (flags->isprefix) {
2444                                 struct drr_begin *drrb = &drr->drr_u.drr_begin;
2445                                 int i;
2446
2447                                 if (flags->istail) {
2448                                         cp = strrchr(drrb->drr_toname, '/');
2449                                         if (cp == NULL) {
2450                                                 (void) strlcat(tofs, "/",
2451                                                     ZFS_MAXNAMELEN);
2452                                                 i = 0;
2453                                         } else {
2454                                                 i = (cp - drrb->drr_toname);
2455                                         }
2456                                 } else {
2457                                         i = strcspn(drrb->drr_toname, "/@");
2458                                 }
2459                                 /* zfs_receive_one() will create_parents() */
2460                                 (void) strlcat(tofs, &drrb->drr_toname[i],
2461                                     ZFS_MAXNAMELEN);
2462                                 *strchr(tofs, '@') = '\0';
2463                         }
2464
2465                         if (recursive && !flags->dryrun && !flags->nomount) {
2466                                 VERIFY(0 == nvlist_alloc(&renamed,
2467                                     NV_UNIQUE_NAME, 0));
2468                         }
2469
2470                         softerr = recv_incremental_replication(hdl, tofs, flags,
2471                             stream_nv, stream_avl, renamed);
2472
2473                         /* Unmount renamed filesystems before receiving. */
2474                         while ((pair = nvlist_next_nvpair(renamed,
2475                             pair)) != NULL) {
2476                                 zfs_handle_t *zhp;
2477                                 prop_changelist_t *clp = NULL;
2478
2479                                 zhp = zfs_open(hdl, nvpair_name(pair),
2480                                     ZFS_TYPE_FILESYSTEM);
2481                                 if (zhp != NULL) {
2482                                         clp = changelist_gather(zhp,
2483                                             ZFS_PROP_MOUNTPOINT, 0, 0);
2484                                         zfs_close(zhp);
2485                                         if (clp != NULL) {
2486                                                 softerr |=
2487                                                     changelist_prefix(clp);
2488                                                 changelist_free(clp);
2489                                         }
2490                                 }
2491                         }
2492
2493                         nvlist_free(renamed);
2494                 }
2495         }
2496
2497         /*
2498          * Get the fs specified by the first path in the stream (the top level
2499          * specified by 'zfs send') and pass it to each invocation of
2500          * zfs_receive_one().
2501          */
2502         (void) strlcpy(sendfs, drr->drr_u.drr_begin.drr_toname,
2503             ZFS_MAXNAMELEN);
2504         if ((cp = strchr(sendfs, '@')) != NULL)
2505                 *cp = '\0';
2506
2507         /* Finally, receive each contained stream */
2508         do {
2509                 /*
2510                  * we should figure out if it has a recoverable
2511                  * error, in which case do a recv_skip() and drive on.
2512                  * Note, if we fail due to already having this guid,
2513                  * zfs_receive_one() will take care of it (ie,
2514                  * recv_skip() and return 0).
2515                  */
2516                 error = zfs_receive_impl(hdl, destname, flags, fd,
2517                     sendfs, stream_nv, stream_avl, top_zfs, cleanup_fd,
2518                     action_handlep);
2519                 if (error == ENODATA) {
2520                         error = 0;
2521                         break;
2522                 }
2523                 anyerr |= error;
2524         } while (error == 0);
2525
2526         if (drr->drr_payloadlen != 0 && fromsnap != NULL) {
2527                 /*
2528                  * Now that we have the fs's they sent us, try the
2529                  * renames again.
2530                  */
2531                 softerr = recv_incremental_replication(hdl, tofs, flags,
2532                     stream_nv, stream_avl, NULL);
2533         }
2534
2535 out:
2536         fsavl_destroy(stream_avl);
2537         if (stream_nv)
2538                 nvlist_free(stream_nv);
2539         if (softerr)
2540                 error = -2;
2541         if (anyerr)
2542                 error = -1;
2543         return (error);
2544 }
2545
2546 static void
2547 trunc_prop_errs(int truncated)
2548 {
2549         ASSERT(truncated != 0);
2550
2551         if (truncated == 1)
2552                 (void) fprintf(stderr, dgettext(TEXT_DOMAIN,
2553                     "1 more property could not be set\n"));
2554         else
2555                 (void) fprintf(stderr, dgettext(TEXT_DOMAIN,
2556                     "%d more properties could not be set\n"), truncated);
2557 }
2558
2559 static int
2560 recv_skip(libzfs_handle_t *hdl, int fd, boolean_t byteswap)
2561 {
2562         dmu_replay_record_t *drr;
2563         void *buf = malloc(1<<20);
2564         char errbuf[1024];
2565
2566         (void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
2567             "cannot receive:"));
2568
2569         /* XXX would be great to use lseek if possible... */
2570         drr = buf;
2571
2572         while (recv_read(hdl, fd, drr, sizeof (dmu_replay_record_t),
2573             byteswap, NULL) == 0) {
2574                 if (byteswap)
2575                         drr->drr_type = BSWAP_32(drr->drr_type);
2576
2577                 switch (drr->drr_type) {
2578                 case DRR_BEGIN:
2579                         /* NB: not to be used on v2 stream packages */
2580                         if (drr->drr_payloadlen != 0) {
2581                                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
2582                                     "invalid substream header"));
2583                                 return (zfs_error(hdl, EZFS_BADSTREAM, errbuf));
2584                         }
2585                         break;
2586
2587                 case DRR_END:
2588                         free(buf);
2589                         return (0);
2590
2591                 case DRR_OBJECT:
2592                         if (byteswap) {
2593                                 drr->drr_u.drr_object.drr_bonuslen =
2594                                     BSWAP_32(drr->drr_u.drr_object.
2595                                     drr_bonuslen);
2596                         }
2597                         (void) recv_read(hdl, fd, buf,
2598                             P2ROUNDUP(drr->drr_u.drr_object.drr_bonuslen, 8),
2599                             B_FALSE, NULL);
2600                         break;
2601
2602                 case DRR_WRITE:
2603                         if (byteswap) {
2604                                 drr->drr_u.drr_write.drr_length =
2605                                     BSWAP_64(drr->drr_u.drr_write.drr_length);
2606                         }
2607                         (void) recv_read(hdl, fd, buf,
2608                             drr->drr_u.drr_write.drr_length, B_FALSE, NULL);
2609                         break;
2610                 case DRR_SPILL:
2611                         if (byteswap) {
2612                                 drr->drr_u.drr_write.drr_length =
2613                                     BSWAP_64(drr->drr_u.drr_spill.drr_length);
2614                         }
2615                         (void) recv_read(hdl, fd, buf,
2616                             drr->drr_u.drr_spill.drr_length, B_FALSE, NULL);
2617                         break;
2618                 case DRR_WRITE_EMBEDDED:
2619                         if (byteswap) {
2620                                 drr->drr_u.drr_write_embedded.drr_psize =
2621                                     BSWAP_32(drr->drr_u.drr_write_embedded.
2622                                     drr_psize);
2623                         }
2624                         (void) recv_read(hdl, fd, buf,
2625                             P2ROUNDUP(drr->drr_u.drr_write_embedded.drr_psize,
2626                             8), B_FALSE, NULL);
2627                         break;
2628                 case DRR_WRITE_BYREF:
2629                 case DRR_FREEOBJECTS:
2630                 case DRR_FREE:
2631                         break;
2632
2633                 default:
2634                         zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
2635                             "invalid record type"));
2636                         return (zfs_error(hdl, EZFS_BADSTREAM, errbuf));
2637                 }
2638         }
2639
2640         free(buf);
2641         return (-1);
2642 }
2643
2644 /*
2645  * Restores a backup of tosnap from the file descriptor specified by infd.
2646  */
2647 static int
2648 zfs_receive_one(libzfs_handle_t *hdl, int infd, const char *tosnap,
2649     recvflags_t *flags, dmu_replay_record_t *drr,
2650     dmu_replay_record_t *drr_noswap, const char *sendfs,
2651     nvlist_t *stream_nv, avl_tree_t *stream_avl, char **top_zfs, int cleanup_fd,
2652     uint64_t *action_handlep)
2653 {
2654         zfs_cmd_t zc = {"\0"};
2655         time_t begin_time;
2656         int ioctl_err, ioctl_errno, err;
2657         char *cp;
2658         struct drr_begin *drrb = &drr->drr_u.drr_begin;
2659         char errbuf[1024];
2660         char prop_errbuf[1024];
2661         const char *chopprefix;
2662         boolean_t newfs = B_FALSE;
2663         boolean_t stream_wantsnewfs;
2664         uint64_t parent_snapguid = 0;
2665         prop_changelist_t *clp = NULL;
2666         nvlist_t *snapprops_nvlist = NULL;
2667         zprop_errflags_t prop_errflags;
2668         boolean_t recursive;
2669
2670         begin_time = time(NULL);
2671
2672         (void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
2673             "cannot receive"));
2674
2675         recursive = (nvlist_lookup_boolean(stream_nv, "not_recursive") ==
2676             ENOENT);
2677
2678         if (stream_avl != NULL) {
2679                 char *snapname;
2680                 nvlist_t *fs = fsavl_find(stream_avl, drrb->drr_toguid,
2681                     &snapname);
2682                 nvlist_t *props;
2683                 int ret;
2684
2685                 (void) nvlist_lookup_uint64(fs, "parentfromsnap",
2686                     &parent_snapguid);
2687                 err = nvlist_lookup_nvlist(fs, "props", &props);
2688                 if (err)
2689                         VERIFY(0 == nvlist_alloc(&props, NV_UNIQUE_NAME, 0));
2690
2691                 if (flags->canmountoff) {
2692                         VERIFY(0 == nvlist_add_uint64(props,
2693                             zfs_prop_to_name(ZFS_PROP_CANMOUNT), 0));
2694                 }
2695                 ret = zcmd_write_src_nvlist(hdl, &zc, props);
2696                 if (err)
2697                         nvlist_free(props);
2698
2699                 if (0 == nvlist_lookup_nvlist(fs, "snapprops", &props)) {
2700                         VERIFY(0 == nvlist_lookup_nvlist(props,
2701                             snapname, &snapprops_nvlist));
2702                 }
2703
2704                 if (ret != 0)
2705                         return (-1);
2706         }
2707
2708         cp = NULL;
2709
2710         /*
2711          * Determine how much of the snapshot name stored in the stream
2712          * we are going to tack on to the name they specified on the
2713          * command line, and how much we are going to chop off.
2714          *
2715          * If they specified a snapshot, chop the entire name stored in
2716          * the stream.
2717          */
2718         if (flags->istail) {
2719                 /*
2720                  * A filesystem was specified with -e. We want to tack on only
2721                  * the tail of the sent snapshot path.
2722                  */
2723                 if (strchr(tosnap, '@')) {
2724                         zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "invalid "
2725                             "argument - snapshot not allowed with -e"));
2726                         return (zfs_error(hdl, EZFS_INVALIDNAME, errbuf));
2727                 }
2728
2729                 chopprefix = strrchr(sendfs, '/');
2730
2731                 if (chopprefix == NULL) {
2732                         /*
2733                          * The tail is the poolname, so we need to
2734                          * prepend a path separator.
2735                          */
2736                         int len = strlen(drrb->drr_toname);
2737                         cp = malloc(len + 2);
2738                         cp[0] = '/';
2739                         (void) strcpy(&cp[1], drrb->drr_toname);
2740                         chopprefix = cp;
2741                 } else {
2742                         chopprefix = drrb->drr_toname + (chopprefix - sendfs);
2743                 }
2744         } else if (flags->isprefix) {
2745                 /*
2746                  * A filesystem was specified with -d. We want to tack on
2747                  * everything but the first element of the sent snapshot path
2748                  * (all but the pool name).
2749                  */
2750                 if (strchr(tosnap, '@')) {
2751                         zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "invalid "
2752                             "argument - snapshot not allowed with -d"));
2753                         return (zfs_error(hdl, EZFS_INVALIDNAME, errbuf));
2754                 }
2755
2756                 chopprefix = strchr(drrb->drr_toname, '/');
2757                 if (chopprefix == NULL)
2758                         chopprefix = strchr(drrb->drr_toname, '@');
2759         } else if (strchr(tosnap, '@') == NULL) {
2760                 /*
2761                  * If a filesystem was specified without -d or -e, we want to
2762                  * tack on everything after the fs specified by 'zfs send'.
2763                  */
2764                 chopprefix = drrb->drr_toname + strlen(sendfs);
2765         } else {
2766                 /* A snapshot was specified as an exact path (no -d or -e). */
2767                 if (recursive) {
2768                         zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
2769                             "cannot specify snapshot name for multi-snapshot "
2770                             "stream"));
2771                         return (zfs_error(hdl, EZFS_BADSTREAM, errbuf));
2772                 }
2773                 chopprefix = drrb->drr_toname + strlen(drrb->drr_toname);
2774         }
2775
2776         ASSERT(strstr(drrb->drr_toname, sendfs) == drrb->drr_toname);
2777         ASSERT(chopprefix > drrb->drr_toname);
2778         ASSERT(chopprefix <= drrb->drr_toname + strlen(drrb->drr_toname));
2779         ASSERT(chopprefix[0] == '/' || chopprefix[0] == '@' ||
2780             chopprefix[0] == '\0');
2781
2782         /*
2783          * Determine name of destination snapshot, store in zc_value.
2784          */
2785         (void) strcpy(zc.zc_value, tosnap);
2786         (void) strlcat(zc.zc_value, chopprefix, sizeof (zc.zc_value));
2787         free(cp);
2788         if (!zfs_name_valid(zc.zc_value, ZFS_TYPE_SNAPSHOT)) {
2789                 zcmd_free_nvlists(&zc);
2790                 return (zfs_error(hdl, EZFS_INVALIDNAME, errbuf));
2791         }
2792
2793         /*
2794          * Determine the name of the origin snapshot, store in zc_string.
2795          */
2796         if (drrb->drr_flags & DRR_FLAG_CLONE) {
2797                 if (guid_to_name(hdl, zc.zc_value,
2798                     drrb->drr_fromguid, zc.zc_string) != 0) {
2799                         zcmd_free_nvlists(&zc);
2800                         zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
2801                             "local origin for clone %s does not exist"),
2802                             zc.zc_value);
2803                         return (zfs_error(hdl, EZFS_NOENT, errbuf));
2804                 }
2805                 if (flags->verbose)
2806                         (void) printf("found clone origin %s\n", zc.zc_string);
2807         }
2808
2809         stream_wantsnewfs = (drrb->drr_fromguid == 0 ||
2810             (drrb->drr_flags & DRR_FLAG_CLONE));
2811
2812         if (stream_wantsnewfs) {
2813                 /*
2814                  * if the parent fs does not exist, look for it based on
2815                  * the parent snap GUID
2816                  */
2817                 (void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
2818                     "cannot receive new filesystem stream"));
2819
2820                 (void) strcpy(zc.zc_name, zc.zc_value);
2821                 cp = strrchr(zc.zc_name, '/');
2822                 if (cp)
2823                         *cp = '\0';
2824                 if (cp &&
2825                     !zfs_dataset_exists(hdl, zc.zc_name, ZFS_TYPE_DATASET)) {
2826                         char suffix[ZFS_MAXNAMELEN];
2827                         (void) strcpy(suffix, strrchr(zc.zc_value, '/'));
2828                         if (guid_to_name(hdl, zc.zc_name, parent_snapguid,
2829                             zc.zc_value) == 0) {
2830                                 *strchr(zc.zc_value, '@') = '\0';
2831                                 (void) strcat(zc.zc_value, suffix);
2832                         }
2833                 }
2834         } else {
2835                 /*
2836                  * if the fs does not exist, look for it based on the
2837                  * fromsnap GUID
2838                  */
2839                 (void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
2840                     "cannot receive incremental stream"));
2841
2842                 (void) strcpy(zc.zc_name, zc.zc_value);
2843                 *strchr(zc.zc_name, '@') = '\0';
2844
2845                 /*
2846                  * If the exact receive path was specified and this is the
2847                  * topmost path in the stream, then if the fs does not exist we
2848                  * should look no further.
2849                  */
2850                 if ((flags->isprefix || (*(chopprefix = drrb->drr_toname +
2851                     strlen(sendfs)) != '\0' && *chopprefix != '@')) &&
2852                     !zfs_dataset_exists(hdl, zc.zc_name, ZFS_TYPE_DATASET)) {
2853                         char snap[ZFS_MAXNAMELEN];
2854                         (void) strcpy(snap, strchr(zc.zc_value, '@'));
2855                         if (guid_to_name(hdl, zc.zc_name, drrb->drr_fromguid,
2856                             zc.zc_value) == 0) {
2857                                 *strchr(zc.zc_value, '@') = '\0';
2858                                 (void) strcat(zc.zc_value, snap);
2859                         }
2860                 }
2861         }
2862
2863         (void) strcpy(zc.zc_name, zc.zc_value);
2864         *strchr(zc.zc_name, '@') = '\0';
2865
2866         if (zfs_dataset_exists(hdl, zc.zc_name, ZFS_TYPE_DATASET)) {
2867                 zfs_handle_t *zhp;
2868
2869                 /*
2870                  * Destination fs exists.  Therefore this should either
2871                  * be an incremental, or the stream specifies a new fs
2872                  * (full stream or clone) and they want us to blow it
2873                  * away (and have therefore specified -F and removed any
2874                  * snapshots).
2875                  */
2876                 if (stream_wantsnewfs) {
2877                         if (!flags->force) {
2878                                 zcmd_free_nvlists(&zc);
2879                                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
2880                                     "destination '%s' exists\n"
2881                                     "must specify -F to overwrite it"),
2882                                     zc.zc_name);
2883                                 return (zfs_error(hdl, EZFS_EXISTS, errbuf));
2884                         }
2885                         if (ioctl(hdl->libzfs_fd, ZFS_IOC_SNAPSHOT_LIST_NEXT,
2886                             &zc) == 0) {
2887                                 zcmd_free_nvlists(&zc);
2888                                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
2889                                     "destination has snapshots (eg. %s)\n"
2890                                     "must destroy them to overwrite it"),
2891                                     zc.zc_name);
2892                                 return (zfs_error(hdl, EZFS_EXISTS, errbuf));
2893                         }
2894                 }
2895
2896                 if ((zhp = zfs_open(hdl, zc.zc_name,
2897                     ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME)) == NULL) {
2898                         zcmd_free_nvlists(&zc);
2899                         return (-1);
2900                 }
2901
2902                 if (stream_wantsnewfs &&
2903                     zhp->zfs_dmustats.dds_origin[0]) {
2904                         zcmd_free_nvlists(&zc);
2905                         zfs_close(zhp);
2906                         zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
2907                             "destination '%s' is a clone\n"
2908                             "must destroy it to overwrite it"),
2909                             zc.zc_name);
2910                         return (zfs_error(hdl, EZFS_EXISTS, errbuf));
2911                 }
2912
2913                 if (!flags->dryrun && zhp->zfs_type == ZFS_TYPE_FILESYSTEM &&
2914                     stream_wantsnewfs) {
2915                         /* We can't do online recv in this case */
2916                         clp = changelist_gather(zhp, ZFS_PROP_NAME, 0, 0);
2917                         if (clp == NULL) {
2918                                 zfs_close(zhp);
2919                                 zcmd_free_nvlists(&zc);
2920                                 return (-1);
2921                         }
2922                         if (changelist_prefix(clp) != 0) {
2923                                 changelist_free(clp);
2924                                 zfs_close(zhp);
2925                                 zcmd_free_nvlists(&zc);
2926                                 return (-1);
2927                         }
2928                 }
2929                 zfs_close(zhp);
2930         } else {
2931                 /*
2932                  * Destination filesystem does not exist.  Therefore we better
2933                  * be creating a new filesystem (either from a full backup, or
2934                  * a clone).  It would therefore be invalid if the user
2935                  * specified only the pool name (i.e. if the destination name
2936                  * contained no slash character).
2937                  */
2938                 if (!stream_wantsnewfs ||
2939                     (cp = strrchr(zc.zc_name, '/')) == NULL) {
2940                         zcmd_free_nvlists(&zc);
2941                         zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
2942                             "destination '%s' does not exist"), zc.zc_name);
2943                         return (zfs_error(hdl, EZFS_NOENT, errbuf));
2944                 }
2945
2946                 /*
2947                  * Trim off the final dataset component so we perform the
2948                  * recvbackup ioctl to the filesystems's parent.
2949                  */
2950                 *cp = '\0';
2951
2952                 if (flags->isprefix && !flags->istail && !flags->dryrun &&
2953                     create_parents(hdl, zc.zc_value, strlen(tosnap)) != 0) {
2954                         zcmd_free_nvlists(&zc);
2955                         return (zfs_error(hdl, EZFS_BADRESTORE, errbuf));
2956                 }
2957
2958                 newfs = B_TRUE;
2959         }
2960
2961         zc.zc_begin_record = drr_noswap->drr_u.drr_begin;
2962         zc.zc_cookie = infd;
2963         zc.zc_guid = flags->force;
2964         if (flags->verbose) {
2965                 (void) printf("%s %s stream of %s into %s\n",
2966                     flags->dryrun ? "would receive" : "receiving",
2967                     drrb->drr_fromguid ? "incremental" : "full",
2968                     drrb->drr_toname, zc.zc_value);
2969                 (void) fflush(stdout);
2970         }
2971
2972         if (flags->dryrun) {
2973                 zcmd_free_nvlists(&zc);
2974                 return (recv_skip(hdl, infd, flags->byteswap));
2975         }
2976
2977         zc.zc_nvlist_dst = (uint64_t)(uintptr_t)prop_errbuf;
2978         zc.zc_nvlist_dst_size = sizeof (prop_errbuf);
2979         zc.zc_cleanup_fd = cleanup_fd;
2980         zc.zc_action_handle = *action_handlep;
2981
2982         err = ioctl_err = zfs_ioctl(hdl, ZFS_IOC_RECV, &zc);
2983         ioctl_errno = errno;
2984         prop_errflags = (zprop_errflags_t)zc.zc_obj;
2985
2986         if (err == 0) {
2987                 nvlist_t *prop_errors;
2988                 VERIFY(0 == nvlist_unpack((void *)(uintptr_t)zc.zc_nvlist_dst,
2989                     zc.zc_nvlist_dst_size, &prop_errors, 0));
2990
2991                 nvpair_t *prop_err = NULL;
2992
2993                 while ((prop_err = nvlist_next_nvpair(prop_errors,
2994                     prop_err)) != NULL) {
2995                         char tbuf[1024];
2996                         zfs_prop_t prop;
2997                         int intval;
2998
2999                         prop = zfs_name_to_prop(nvpair_name(prop_err));
3000                         (void) nvpair_value_int32(prop_err, &intval);
3001                         if (strcmp(nvpair_name(prop_err),
3002                             ZPROP_N_MORE_ERRORS) == 0) {
3003                                 trunc_prop_errs(intval);
3004                                 break;
3005                         } else {
3006                                 (void) snprintf(tbuf, sizeof (tbuf),
3007                                     dgettext(TEXT_DOMAIN,
3008                                     "cannot receive %s property on %s"),
3009                                     nvpair_name(prop_err), zc.zc_name);
3010                                 zfs_setprop_error(hdl, prop, intval, tbuf);
3011                         }
3012                 }
3013                 nvlist_free(prop_errors);
3014         }
3015
3016         zc.zc_nvlist_dst = 0;
3017         zc.zc_nvlist_dst_size = 0;
3018         zcmd_free_nvlists(&zc);
3019
3020         if (err == 0 && snapprops_nvlist) {
3021                 zfs_cmd_t zc2 = {"\0"};
3022
3023                 (void) strcpy(zc2.zc_name, zc.zc_value);
3024                 zc2.zc_cookie = B_TRUE; /* received */
3025                 if (zcmd_write_src_nvlist(hdl, &zc2, snapprops_nvlist) == 0) {
3026                         (void) zfs_ioctl(hdl, ZFS_IOC_SET_PROP, &zc2);
3027                         zcmd_free_nvlists(&zc2);
3028                 }
3029         }
3030
3031         if (err && (ioctl_errno == ENOENT || ioctl_errno == EEXIST)) {
3032                 /*
3033                  * It may be that this snapshot already exists,
3034                  * in which case we want to consume & ignore it
3035                  * rather than failing.
3036                  */
3037                 avl_tree_t *local_avl;
3038                 nvlist_t *local_nv, *fs;
3039                 cp = strchr(zc.zc_value, '@');
3040
3041                 /*
3042                  * XXX Do this faster by just iterating over snaps in
3043                  * this fs.  Also if zc_value does not exist, we will
3044                  * get a strange "does not exist" error message.
3045                  */
3046                 *cp = '\0';
3047                 if (gather_nvlist(hdl, zc.zc_value, NULL, NULL, B_FALSE,
3048                     &local_nv, &local_avl) == 0) {
3049                         *cp = '@';
3050                         fs = fsavl_find(local_avl, drrb->drr_toguid, NULL);
3051                         fsavl_destroy(local_avl);
3052                         nvlist_free(local_nv);
3053
3054                         if (fs != NULL) {
3055                                 if (flags->verbose) {
3056                                         (void) printf("snap %s already exists; "
3057                                             "ignoring\n", zc.zc_value);
3058                                 }
3059                                 err = ioctl_err = recv_skip(hdl, infd,
3060                                     flags->byteswap);
3061                         }
3062                 }
3063                 *cp = '@';
3064         }
3065
3066         if (ioctl_err != 0) {
3067                 switch (ioctl_errno) {
3068                 case ENODEV:
3069                         cp = strchr(zc.zc_value, '@');
3070                         *cp = '\0';
3071                         zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
3072                             "most recent snapshot of %s does not\n"
3073                             "match incremental source"), zc.zc_value);
3074                         (void) zfs_error(hdl, EZFS_BADRESTORE, errbuf);
3075                         *cp = '@';
3076                         break;
3077                 case ETXTBSY:
3078                         zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
3079                             "destination %s has been modified\n"
3080                             "since most recent snapshot"), zc.zc_name);
3081                         (void) zfs_error(hdl, EZFS_BADRESTORE, errbuf);
3082                         break;
3083                 case EEXIST:
3084                         cp = strchr(zc.zc_value, '@');
3085                         if (newfs) {
3086                                 /* it's the containing fs that exists */
3087                                 *cp = '\0';
3088                         }
3089                         zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
3090                             "destination already exists"));
3091                         (void) zfs_error_fmt(hdl, EZFS_EXISTS,
3092                             dgettext(TEXT_DOMAIN, "cannot restore to %s"),
3093                             zc.zc_value);
3094                         *cp = '@';
3095                         break;
3096                 case EINVAL:
3097                         (void) zfs_error(hdl, EZFS_BADSTREAM, errbuf);
3098                         break;
3099                 case ECKSUM:
3100                         zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
3101                             "invalid stream (checksum mismatch)"));
3102                         (void) zfs_error(hdl, EZFS_BADSTREAM, errbuf);
3103                         break;
3104                 case ENOTSUP:
3105                         zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
3106                             "pool must be upgraded to receive this stream."));
3107                         (void) zfs_error(hdl, EZFS_BADVERSION, errbuf);
3108                         break;
3109                 case EDQUOT:
3110                         zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
3111                             "destination %s space quota exceeded"), zc.zc_name);
3112                         (void) zfs_error(hdl, EZFS_NOSPC, errbuf);
3113                         break;
3114                 default:
3115                         (void) zfs_standard_error(hdl, ioctl_errno, errbuf);
3116                 }
3117         }
3118
3119         /*
3120          * Mount the target filesystem (if created).  Also mount any
3121          * children of the target filesystem if we did a replication
3122          * receive (indicated by stream_avl being non-NULL).
3123          */
3124         cp = strchr(zc.zc_value, '@');
3125         if (cp && (ioctl_err == 0 || !newfs)) {
3126                 zfs_handle_t *h;
3127
3128                 *cp = '\0';
3129                 h = zfs_open(hdl, zc.zc_value,
3130                     ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME);
3131                 if (h != NULL) {
3132                         if (h->zfs_type == ZFS_TYPE_VOLUME) {
3133                                 *cp = '@';
3134                         } else if (newfs || stream_avl) {
3135                                 /*
3136                                  * Track the first/top of hierarchy fs,
3137                                  * for mounting and sharing later.
3138                                  */
3139                                 if (top_zfs && *top_zfs == NULL)
3140                                         *top_zfs = zfs_strdup(hdl, zc.zc_value);
3141                         }
3142                         zfs_close(h);
3143                 }
3144                 *cp = '@';
3145         }
3146
3147         if (clp) {
3148                 err |= changelist_postfix(clp);
3149                 changelist_free(clp);
3150         }
3151
3152         if (prop_errflags & ZPROP_ERR_NOCLEAR) {
3153                 (void) fprintf(stderr, dgettext(TEXT_DOMAIN, "Warning: "
3154                     "failed to clear unreceived properties on %s"),
3155                     zc.zc_name);
3156                 (void) fprintf(stderr, "\n");
3157         }
3158         if (prop_errflags & ZPROP_ERR_NORESTORE) {
3159                 (void) fprintf(stderr, dgettext(TEXT_DOMAIN, "Warning: "
3160                     "failed to restore original properties on %s"),
3161                     zc.zc_name);
3162                 (void) fprintf(stderr, "\n");
3163         }
3164
3165         if (err || ioctl_err)
3166                 return (-1);
3167
3168         *action_handlep = zc.zc_action_handle;
3169
3170         if (flags->verbose) {
3171                 char buf1[64];
3172                 char buf2[64];
3173                 uint64_t bytes = zc.zc_cookie;
3174                 time_t delta = time(NULL) - begin_time;
3175                 if (delta == 0)
3176                         delta = 1;
3177                 zfs_nicenum(bytes, buf1, sizeof (buf1));
3178                 zfs_nicenum(bytes/delta, buf2, sizeof (buf1));
3179
3180                 (void) printf("received %sB stream in %lu seconds (%sB/sec)\n",
3181                     buf1, delta, buf2);
3182         }
3183
3184         return (0);
3185 }
3186
3187 static int
3188 zfs_receive_impl(libzfs_handle_t *hdl, const char *tosnap, recvflags_t *flags,
3189     int infd, const char *sendfs, nvlist_t *stream_nv, avl_tree_t *stream_avl,
3190     char **top_zfs, int cleanup_fd, uint64_t *action_handlep)
3191 {
3192         int err;
3193         dmu_replay_record_t drr, drr_noswap;
3194         struct drr_begin *drrb = &drr.drr_u.drr_begin;
3195         char errbuf[1024];
3196         zio_cksum_t zcksum = { { 0 } };
3197         uint64_t featureflags;
3198         int hdrtype;
3199
3200         (void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
3201             "cannot receive"));
3202
3203         if (flags->isprefix &&
3204             !zfs_dataset_exists(hdl, tosnap, ZFS_TYPE_DATASET)) {
3205                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "specified fs "
3206                     "(%s) does not exist"), tosnap);
3207                 return (zfs_error(hdl, EZFS_NOENT, errbuf));
3208         }
3209
3210         /* read in the BEGIN record */
3211         if (0 != (err = recv_read(hdl, infd, &drr, sizeof (drr), B_FALSE,
3212             &zcksum)))
3213                 return (err);
3214
3215         if (drr.drr_type == DRR_END || drr.drr_type == BSWAP_32(DRR_END)) {
3216                 /* It's the double end record at the end of a package */
3217                 return (ENODATA);
3218         }
3219
3220         /* the kernel needs the non-byteswapped begin record */
3221         drr_noswap = drr;
3222
3223         flags->byteswap = B_FALSE;
3224         if (drrb->drr_magic == BSWAP_64(DMU_BACKUP_MAGIC)) {
3225                 /*
3226                  * We computed the checksum in the wrong byteorder in
3227                  * recv_read() above; do it again correctly.
3228                  */
3229                 bzero(&zcksum, sizeof (zio_cksum_t));
3230                 fletcher_4_incremental_byteswap(&drr, sizeof (drr), &zcksum);
3231                 flags->byteswap = B_TRUE;
3232
3233                 drr.drr_type = BSWAP_32(drr.drr_type);
3234                 drr.drr_payloadlen = BSWAP_32(drr.drr_payloadlen);
3235                 drrb->drr_magic = BSWAP_64(drrb->drr_magic);
3236                 drrb->drr_versioninfo = BSWAP_64(drrb->drr_versioninfo);
3237                 drrb->drr_creation_time = BSWAP_64(drrb->drr_creation_time);
3238                 drrb->drr_type = BSWAP_32(drrb->drr_type);
3239                 drrb->drr_flags = BSWAP_32(drrb->drr_flags);
3240                 drrb->drr_toguid = BSWAP_64(drrb->drr_toguid);
3241                 drrb->drr_fromguid = BSWAP_64(drrb->drr_fromguid);
3242         }
3243
3244         if (drrb->drr_magic != DMU_BACKUP_MAGIC || drr.drr_type != DRR_BEGIN) {
3245                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "invalid "
3246                     "stream (bad magic number)"));
3247                 return (zfs_error(hdl, EZFS_BADSTREAM, errbuf));
3248         }
3249
3250         featureflags = DMU_GET_FEATUREFLAGS(drrb->drr_versioninfo);
3251         hdrtype = DMU_GET_STREAM_HDRTYPE(drrb->drr_versioninfo);
3252
3253         if (!DMU_STREAM_SUPPORTED(featureflags) ||
3254             (hdrtype != DMU_SUBSTREAM && hdrtype != DMU_COMPOUNDSTREAM)) {
3255                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
3256                     "stream has unsupported feature, feature flags = %lx"),
3257                     featureflags);
3258                 return (zfs_error(hdl, EZFS_BADSTREAM, errbuf));
3259         }
3260
3261         if (strchr(drrb->drr_toname, '@') == NULL) {
3262                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "invalid "
3263                     "stream (bad snapshot name)"));
3264                 return (zfs_error(hdl, EZFS_BADSTREAM, errbuf));
3265         }
3266
3267         if (DMU_GET_STREAM_HDRTYPE(drrb->drr_versioninfo) == DMU_SUBSTREAM) {
3268                 char nonpackage_sendfs[ZFS_MAXNAMELEN];
3269                 if (sendfs == NULL) {
3270                         /*
3271                          * We were not called from zfs_receive_package(). Get
3272                          * the fs specified by 'zfs send'.
3273                          */
3274                         char *cp;
3275                         (void) strlcpy(nonpackage_sendfs,
3276                             drr.drr_u.drr_begin.drr_toname, ZFS_MAXNAMELEN);
3277                         if ((cp = strchr(nonpackage_sendfs, '@')) != NULL)
3278                                 *cp = '\0';
3279                         sendfs = nonpackage_sendfs;
3280                 }
3281                 return (zfs_receive_one(hdl, infd, tosnap, flags,
3282                     &drr, &drr_noswap, sendfs, stream_nv, stream_avl,
3283                     top_zfs, cleanup_fd, action_handlep));
3284         } else {
3285                 assert(DMU_GET_STREAM_HDRTYPE(drrb->drr_versioninfo) ==
3286                     DMU_COMPOUNDSTREAM);
3287                 return (zfs_receive_package(hdl, infd, tosnap, flags,
3288                     &drr, &zcksum, top_zfs, cleanup_fd, action_handlep));
3289         }
3290 }
3291
3292 /*
3293  * Restores a backup of tosnap from the file descriptor specified by infd.
3294  * Return 0 on total success, -2 if some things couldn't be
3295  * destroyed/renamed/promoted, -1 if some things couldn't be received.
3296  * (-1 will override -2).
3297  */
3298 int
3299 zfs_receive(libzfs_handle_t *hdl, const char *tosnap, recvflags_t *flags,
3300     int infd, avl_tree_t *stream_avl)
3301 {
3302         char *top_zfs = NULL;
3303         int err;
3304         int cleanup_fd;
3305         uint64_t action_handle = 0;
3306
3307         cleanup_fd = open(ZFS_DEV, O_RDWR);
3308         VERIFY(cleanup_fd >= 0);
3309
3310         err = zfs_receive_impl(hdl, tosnap, flags, infd, NULL, NULL,
3311             stream_avl, &top_zfs, cleanup_fd, &action_handle);
3312
3313         VERIFY(0 == close(cleanup_fd));
3314
3315         if (err == 0 && !flags->nomount && top_zfs) {
3316                 zfs_handle_t *zhp;
3317                 prop_changelist_t *clp;
3318
3319                 zhp = zfs_open(hdl, top_zfs, ZFS_TYPE_FILESYSTEM);
3320                 if (zhp != NULL) {
3321                         clp = changelist_gather(zhp, ZFS_PROP_MOUNTPOINT,
3322                             CL_GATHER_MOUNT_ALWAYS, 0);
3323                         zfs_close(zhp);
3324                         if (clp != NULL) {
3325                                 /* mount and share received datasets */
3326                                 err = changelist_postfix(clp);
3327                                 changelist_free(clp);
3328                         }
3329                 }
3330                 if (zhp == NULL || clp == NULL || err)
3331                         err = -1;
3332         }
3333         if (top_zfs)
3334                 free(top_zfs);
3335
3336         return (err);
3337 }