]> CyberLeo.Net >> Repos - FreeBSD/releng/9.0.git/blob - cddl/contrib/opensolaris/lib/libzfs/common/libzfs_sendrecv.c
Copy stable/9 to releng/9.0 as part of the FreeBSD 9.0-RELEASE release
[FreeBSD/releng/9.0.git] / cddl / contrib / opensolaris / lib / libzfs / common / libzfs_sendrecv.c
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21
22 /*
23  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
24  */
25
26 #include <assert.h>
27 #include <ctype.h>
28 #include <errno.h>
29 #include <libintl.h>
30 #include <stdio.h>
31 #include <stdlib.h>
32 #include <strings.h>
33 #include <unistd.h>
34 #include <stddef.h>
35 #include <fcntl.h>
36 #include <sys/param.h>
37 #include <sys/mount.h>
38 #include <pthread.h>
39 #include <umem.h>
40
41 #include <libzfs.h>
42
43 #include "zfs_namecheck.h"
44 #include "zfs_prop.h"
45 #include "zfs_fletcher.h"
46 #include "libzfs_impl.h"
47 #include <sha2.h>
48 #include <sys/zio_checksum.h>
49 #include <sys/ddt.h>
50
51 /* in libzfs_dataset.c */
52 extern void zfs_setprop_error(libzfs_handle_t *, zfs_prop_t, int, char *);
53 /* We need to use something for ENODATA. */
54 #define ENODATA EIDRM
55
56 static int zfs_receive_impl(libzfs_handle_t *, const char *, recvflags_t,
57     int, const char *, nvlist_t *, avl_tree_t *, char **, int, uint64_t *);
58
59 static const zio_cksum_t zero_cksum = { 0 };
60
61 typedef struct dedup_arg {
62         int     inputfd;
63         int     outputfd;
64         libzfs_handle_t  *dedup_hdl;
65 } dedup_arg_t;
66
67 typedef struct dataref {
68         uint64_t ref_guid;
69         uint64_t ref_object;
70         uint64_t ref_offset;
71 } dataref_t;
72
73 typedef struct dedup_entry {
74         struct dedup_entry      *dde_next;
75         zio_cksum_t dde_chksum;
76         uint64_t dde_prop;
77         dataref_t dde_ref;
78 } dedup_entry_t;
79
80 #define MAX_DDT_PHYSMEM_PERCENT         20
81 #define SMALLEST_POSSIBLE_MAX_DDT_MB            128
82
83 typedef struct dedup_table {
84         dedup_entry_t   **dedup_hash_array;
85         umem_cache_t    *ddecache;
86         uint64_t        max_ddt_size;  /* max dedup table size in bytes */
87         uint64_t        cur_ddt_size;  /* current dedup table size in bytes */
88         uint64_t        ddt_count;
89         int             numhashbits;
90         boolean_t       ddt_full;
91 } dedup_table_t;
92
93 static int
94 high_order_bit(uint64_t n)
95 {
96         int count;
97
98         for (count = 0; n != 0; count++)
99                 n >>= 1;
100         return (count);
101 }
102
103 static size_t
104 ssread(void *buf, size_t len, FILE *stream)
105 {
106         size_t outlen;
107
108         if ((outlen = fread(buf, len, 1, stream)) == 0)
109                 return (0);
110
111         return (outlen);
112 }
113
114 static void
115 ddt_hash_append(libzfs_handle_t *hdl, dedup_table_t *ddt, dedup_entry_t **ddepp,
116     zio_cksum_t *cs, uint64_t prop, dataref_t *dr)
117 {
118         dedup_entry_t   *dde;
119
120         if (ddt->cur_ddt_size >= ddt->max_ddt_size) {
121                 if (ddt->ddt_full == B_FALSE) {
122                         zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
123                             "Dedup table full.  Deduplication will continue "
124                             "with existing table entries"));
125                         ddt->ddt_full = B_TRUE;
126                 }
127                 return;
128         }
129
130         if ((dde = umem_cache_alloc(ddt->ddecache, UMEM_DEFAULT))
131             != NULL) {
132                 assert(*ddepp == NULL);
133                 dde->dde_next = NULL;
134                 dde->dde_chksum = *cs;
135                 dde->dde_prop = prop;
136                 dde->dde_ref = *dr;
137                 *ddepp = dde;
138                 ddt->cur_ddt_size += sizeof (dedup_entry_t);
139                 ddt->ddt_count++;
140         }
141 }
142
143 /*
144  * Using the specified dedup table, do a lookup for an entry with
145  * the checksum cs.  If found, return the block's reference info
146  * in *dr. Otherwise, insert a new entry in the dedup table, using
147  * the reference information specified by *dr.
148  *
149  * return value:  true - entry was found
150  *                false - entry was not found
151  */
152 static boolean_t
153 ddt_update(libzfs_handle_t *hdl, dedup_table_t *ddt, zio_cksum_t *cs,
154     uint64_t prop, dataref_t *dr)
155 {
156         uint32_t hashcode;
157         dedup_entry_t **ddepp;
158
159         hashcode = BF64_GET(cs->zc_word[0], 0, ddt->numhashbits);
160
161         for (ddepp = &(ddt->dedup_hash_array[hashcode]); *ddepp != NULL;
162             ddepp = &((*ddepp)->dde_next)) {
163                 if (ZIO_CHECKSUM_EQUAL(((*ddepp)->dde_chksum), *cs) &&
164                     (*ddepp)->dde_prop == prop) {
165                         *dr = (*ddepp)->dde_ref;
166                         return (B_TRUE);
167                 }
168         }
169         ddt_hash_append(hdl, ddt, ddepp, cs, prop, dr);
170         return (B_FALSE);
171 }
172
173 static int
174 cksum_and_write(const void *buf, uint64_t len, zio_cksum_t *zc, int outfd)
175 {
176         fletcher_4_incremental_native(buf, len, zc);
177         return (write(outfd, buf, len));
178 }
179
180 /*
181  * This function is started in a separate thread when the dedup option
182  * has been requested.  The main send thread determines the list of
183  * snapshots to be included in the send stream and makes the ioctl calls
184  * for each one.  But instead of having the ioctl send the output to the
185  * the output fd specified by the caller of zfs_send()), the
186  * ioctl is told to direct the output to a pipe, which is read by the
187  * alternate thread running THIS function.  This function does the
188  * dedup'ing by:
189  *  1. building a dedup table (the DDT)
190  *  2. doing checksums on each data block and inserting a record in the DDT
191  *  3. looking for matching checksums, and
192  *  4.  sending a DRR_WRITE_BYREF record instead of a write record whenever
193  *      a duplicate block is found.
194  * The output of this function then goes to the output fd requested
195  * by the caller of zfs_send().
196  */
197 static void *
198 cksummer(void *arg)
199 {
200         dedup_arg_t *dda = arg;
201         char *buf = malloc(1<<20);
202         dmu_replay_record_t thedrr;
203         dmu_replay_record_t *drr = &thedrr;
204         struct drr_begin *drrb = &thedrr.drr_u.drr_begin;
205         struct drr_end *drre = &thedrr.drr_u.drr_end;
206         struct drr_object *drro = &thedrr.drr_u.drr_object;
207         struct drr_write *drrw = &thedrr.drr_u.drr_write;
208         struct drr_spill *drrs = &thedrr.drr_u.drr_spill;
209         FILE *ofp;
210         int outfd;
211         dmu_replay_record_t wbr_drr = {0};
212         struct drr_write_byref *wbr_drrr = &wbr_drr.drr_u.drr_write_byref;
213         dedup_table_t ddt;
214         zio_cksum_t stream_cksum;
215         uint64_t physmem = sysconf(_SC_PHYS_PAGES) * sysconf(_SC_PAGESIZE);
216         uint64_t numbuckets;
217
218         ddt.max_ddt_size =
219             MAX((physmem * MAX_DDT_PHYSMEM_PERCENT)/100,
220             SMALLEST_POSSIBLE_MAX_DDT_MB<<20);
221
222         numbuckets = ddt.max_ddt_size/(sizeof (dedup_entry_t));
223
224         /*
225          * numbuckets must be a power of 2.  Increase number to
226          * a power of 2 if necessary.
227          */
228         if (!ISP2(numbuckets))
229                 numbuckets = 1 << high_order_bit(numbuckets);
230
231         ddt.dedup_hash_array = calloc(numbuckets, sizeof (dedup_entry_t *));
232         ddt.ddecache = umem_cache_create("dde", sizeof (dedup_entry_t), 0,
233             NULL, NULL, NULL, NULL, NULL, 0);
234         ddt.cur_ddt_size = numbuckets * sizeof (dedup_entry_t *);
235         ddt.numhashbits = high_order_bit(numbuckets) - 1;
236         ddt.ddt_full = B_FALSE;
237
238         /* Initialize the write-by-reference block. */
239         wbr_drr.drr_type = DRR_WRITE_BYREF;
240         wbr_drr.drr_payloadlen = 0;
241
242         outfd = dda->outputfd;
243         ofp = fdopen(dda->inputfd, "r");
244         while (ssread(drr, sizeof (dmu_replay_record_t), ofp) != 0) {
245
246                 switch (drr->drr_type) {
247                 case DRR_BEGIN:
248                 {
249                         int     fflags;
250                         ZIO_SET_CHECKSUM(&stream_cksum, 0, 0, 0, 0);
251
252                         /* set the DEDUP feature flag for this stream */
253                         fflags = DMU_GET_FEATUREFLAGS(drrb->drr_versioninfo);
254                         fflags |= (DMU_BACKUP_FEATURE_DEDUP |
255                             DMU_BACKUP_FEATURE_DEDUPPROPS);
256                         DMU_SET_FEATUREFLAGS(drrb->drr_versioninfo, fflags);
257
258                         if (cksum_and_write(drr, sizeof (dmu_replay_record_t),
259                             &stream_cksum, outfd) == -1)
260                                 goto out;
261                         if (DMU_GET_STREAM_HDRTYPE(drrb->drr_versioninfo) ==
262                             DMU_COMPOUNDSTREAM && drr->drr_payloadlen != 0) {
263                                 int sz = drr->drr_payloadlen;
264
265                                 if (sz > 1<<20) {
266                                         free(buf);
267                                         buf = malloc(sz);
268                                 }
269                                 (void) ssread(buf, sz, ofp);
270                                 if (ferror(stdin))
271                                         perror("fread");
272                                 if (cksum_and_write(buf, sz, &stream_cksum,
273                                     outfd) == -1)
274                                         goto out;
275                         }
276                         break;
277                 }
278
279                 case DRR_END:
280                 {
281                         /* use the recalculated checksum */
282                         ZIO_SET_CHECKSUM(&drre->drr_checksum,
283                             stream_cksum.zc_word[0], stream_cksum.zc_word[1],
284                             stream_cksum.zc_word[2], stream_cksum.zc_word[3]);
285                         if ((write(outfd, drr,
286                             sizeof (dmu_replay_record_t))) == -1)
287                                 goto out;
288                         break;
289                 }
290
291                 case DRR_OBJECT:
292                 {
293                         if (cksum_and_write(drr, sizeof (dmu_replay_record_t),
294                             &stream_cksum, outfd) == -1)
295                                 goto out;
296                         if (drro->drr_bonuslen > 0) {
297                                 (void) ssread(buf,
298                                     P2ROUNDUP((uint64_t)drro->drr_bonuslen, 8),
299                                     ofp);
300                                 if (cksum_and_write(buf,
301                                     P2ROUNDUP((uint64_t)drro->drr_bonuslen, 8),
302                                     &stream_cksum, outfd) == -1)
303                                         goto out;
304                         }
305                         break;
306                 }
307
308                 case DRR_SPILL:
309                 {
310                         if (cksum_and_write(drr, sizeof (dmu_replay_record_t),
311                             &stream_cksum, outfd) == -1)
312                                 goto out;
313                         (void) ssread(buf, drrs->drr_length, ofp);
314                         if (cksum_and_write(buf, drrs->drr_length,
315                             &stream_cksum, outfd) == -1)
316                                 goto out;
317                         break;
318                 }
319
320                 case DRR_FREEOBJECTS:
321                 {
322                         if (cksum_and_write(drr, sizeof (dmu_replay_record_t),
323                             &stream_cksum, outfd) == -1)
324                                 goto out;
325                         break;
326                 }
327
328                 case DRR_WRITE:
329                 {
330                         dataref_t       dataref;
331
332                         (void) ssread(buf, drrw->drr_length, ofp);
333
334                         /*
335                          * Use the existing checksum if it's dedup-capable,
336                          * else calculate a SHA256 checksum for it.
337                          */
338
339                         if (ZIO_CHECKSUM_EQUAL(drrw->drr_key.ddk_cksum,
340                             zero_cksum) ||
341                             !DRR_IS_DEDUP_CAPABLE(drrw->drr_checksumflags)) {
342                                 SHA256_CTX      ctx;
343                                 zio_cksum_t     tmpsha256;
344
345                                 SHA256Init(&ctx);
346                                 SHA256Update(&ctx, buf, drrw->drr_length);
347                                 SHA256Final(&tmpsha256, &ctx);
348                                 drrw->drr_key.ddk_cksum.zc_word[0] =
349                                     BE_64(tmpsha256.zc_word[0]);
350                                 drrw->drr_key.ddk_cksum.zc_word[1] =
351                                     BE_64(tmpsha256.zc_word[1]);
352                                 drrw->drr_key.ddk_cksum.zc_word[2] =
353                                     BE_64(tmpsha256.zc_word[2]);
354                                 drrw->drr_key.ddk_cksum.zc_word[3] =
355                                     BE_64(tmpsha256.zc_word[3]);
356                                 drrw->drr_checksumtype = ZIO_CHECKSUM_SHA256;
357                                 drrw->drr_checksumflags = DRR_CHECKSUM_DEDUP;
358                         }
359
360                         dataref.ref_guid = drrw->drr_toguid;
361                         dataref.ref_object = drrw->drr_object;
362                         dataref.ref_offset = drrw->drr_offset;
363
364                         if (ddt_update(dda->dedup_hdl, &ddt,
365                             &drrw->drr_key.ddk_cksum, drrw->drr_key.ddk_prop,
366                             &dataref)) {
367                                 /* block already present in stream */
368                                 wbr_drrr->drr_object = drrw->drr_object;
369                                 wbr_drrr->drr_offset = drrw->drr_offset;
370                                 wbr_drrr->drr_length = drrw->drr_length;
371                                 wbr_drrr->drr_toguid = drrw->drr_toguid;
372                                 wbr_drrr->drr_refguid = dataref.ref_guid;
373                                 wbr_drrr->drr_refobject =
374                                     dataref.ref_object;
375                                 wbr_drrr->drr_refoffset =
376                                     dataref.ref_offset;
377
378                                 wbr_drrr->drr_checksumtype =
379                                     drrw->drr_checksumtype;
380                                 wbr_drrr->drr_checksumflags =
381                                     drrw->drr_checksumtype;
382                                 wbr_drrr->drr_key.ddk_cksum =
383                                     drrw->drr_key.ddk_cksum;
384                                 wbr_drrr->drr_key.ddk_prop =
385                                     drrw->drr_key.ddk_prop;
386
387                                 if (cksum_and_write(&wbr_drr,
388                                     sizeof (dmu_replay_record_t), &stream_cksum,
389                                     outfd) == -1)
390                                         goto out;
391                         } else {
392                                 /* block not previously seen */
393                                 if (cksum_and_write(drr,
394                                     sizeof (dmu_replay_record_t), &stream_cksum,
395                                     outfd) == -1)
396                                         goto out;
397                                 if (cksum_and_write(buf,
398                                     drrw->drr_length,
399                                     &stream_cksum, outfd) == -1)
400                                         goto out;
401                         }
402                         break;
403                 }
404
405                 case DRR_FREE:
406                 {
407                         if (cksum_and_write(drr, sizeof (dmu_replay_record_t),
408                             &stream_cksum, outfd) == -1)
409                                 goto out;
410                         break;
411                 }
412
413                 default:
414                         (void) printf("INVALID record type 0x%x\n",
415                             drr->drr_type);
416                         /* should never happen, so assert */
417                         assert(B_FALSE);
418                 }
419         }
420 out:
421         umem_cache_destroy(ddt.ddecache);
422         free(ddt.dedup_hash_array);
423         free(buf);
424         (void) fclose(ofp);
425
426         return (NULL);
427 }
428
429 /*
430  * Routines for dealing with the AVL tree of fs-nvlists
431  */
432 typedef struct fsavl_node {
433         avl_node_t fn_node;
434         nvlist_t *fn_nvfs;
435         char *fn_snapname;
436         uint64_t fn_guid;
437 } fsavl_node_t;
438
439 static int
440 fsavl_compare(const void *arg1, const void *arg2)
441 {
442         const fsavl_node_t *fn1 = arg1;
443         const fsavl_node_t *fn2 = arg2;
444
445         if (fn1->fn_guid > fn2->fn_guid)
446                 return (+1);
447         else if (fn1->fn_guid < fn2->fn_guid)
448                 return (-1);
449         else
450                 return (0);
451 }
452
453 /*
454  * Given the GUID of a snapshot, find its containing filesystem and
455  * (optionally) name.
456  */
457 static nvlist_t *
458 fsavl_find(avl_tree_t *avl, uint64_t snapguid, char **snapname)
459 {
460         fsavl_node_t fn_find;
461         fsavl_node_t *fn;
462
463         fn_find.fn_guid = snapguid;
464
465         fn = avl_find(avl, &fn_find, NULL);
466         if (fn) {
467                 if (snapname)
468                         *snapname = fn->fn_snapname;
469                 return (fn->fn_nvfs);
470         }
471         return (NULL);
472 }
473
474 static void
475 fsavl_destroy(avl_tree_t *avl)
476 {
477         fsavl_node_t *fn;
478         void *cookie;
479
480         if (avl == NULL)
481                 return;
482
483         cookie = NULL;
484         while ((fn = avl_destroy_nodes(avl, &cookie)) != NULL)
485                 free(fn);
486         avl_destroy(avl);
487         free(avl);
488 }
489
490 /*
491  * Given an nvlist, produce an avl tree of snapshots, ordered by guid
492  */
493 static avl_tree_t *
494 fsavl_create(nvlist_t *fss)
495 {
496         avl_tree_t *fsavl;
497         nvpair_t *fselem = NULL;
498
499         if ((fsavl = malloc(sizeof (avl_tree_t))) == NULL)
500                 return (NULL);
501
502         avl_create(fsavl, fsavl_compare, sizeof (fsavl_node_t),
503             offsetof(fsavl_node_t, fn_node));
504
505         while ((fselem = nvlist_next_nvpair(fss, fselem)) != NULL) {
506                 nvlist_t *nvfs, *snaps;
507                 nvpair_t *snapelem = NULL;
508
509                 VERIFY(0 == nvpair_value_nvlist(fselem, &nvfs));
510                 VERIFY(0 == nvlist_lookup_nvlist(nvfs, "snaps", &snaps));
511
512                 while ((snapelem =
513                     nvlist_next_nvpair(snaps, snapelem)) != NULL) {
514                         fsavl_node_t *fn;
515                         uint64_t guid;
516
517                         VERIFY(0 == nvpair_value_uint64(snapelem, &guid));
518                         if ((fn = malloc(sizeof (fsavl_node_t))) == NULL) {
519                                 fsavl_destroy(fsavl);
520                                 return (NULL);
521                         }
522                         fn->fn_nvfs = nvfs;
523                         fn->fn_snapname = nvpair_name(snapelem);
524                         fn->fn_guid = guid;
525
526                         /*
527                          * Note: if there are multiple snaps with the
528                          * same GUID, we ignore all but one.
529                          */
530                         if (avl_find(fsavl, fn, NULL) == NULL)
531                                 avl_add(fsavl, fn);
532                         else
533                                 free(fn);
534                 }
535         }
536
537         return (fsavl);
538 }
539
540 /*
541  * Routines for dealing with the giant nvlist of fs-nvlists, etc.
542  */
543 typedef struct send_data {
544         uint64_t parent_fromsnap_guid;
545         nvlist_t *parent_snaps;
546         nvlist_t *fss;
547         nvlist_t *snapprops;
548         const char *fromsnap;
549         const char *tosnap;
550         boolean_t recursive;
551
552         /*
553          * The header nvlist is of the following format:
554          * {
555          *   "tosnap" -> string
556          *   "fromsnap" -> string (if incremental)
557          *   "fss" -> {
558          *      id -> {
559          *
560          *       "name" -> string (full name; for debugging)
561          *       "parentfromsnap" -> number (guid of fromsnap in parent)
562          *
563          *       "props" -> { name -> value (only if set here) }
564          *       "snaps" -> { name (lastname) -> number (guid) }
565          *       "snapprops" -> { name (lastname) -> { name -> value } }
566          *
567          *       "origin" -> number (guid) (if clone)
568          *       "sent" -> boolean (not on-disk)
569          *      }
570          *   }
571          * }
572          *
573          */
574 } send_data_t;
575
576 static void send_iterate_prop(zfs_handle_t *zhp, nvlist_t *nv);
577
578 static int
579 send_iterate_snap(zfs_handle_t *zhp, void *arg)
580 {
581         send_data_t *sd = arg;
582         uint64_t guid = zhp->zfs_dmustats.dds_guid;
583         char *snapname;
584         nvlist_t *nv;
585
586         snapname = strrchr(zhp->zfs_name, '@')+1;
587
588         VERIFY(0 == nvlist_add_uint64(sd->parent_snaps, snapname, guid));
589         /*
590          * NB: if there is no fromsnap here (it's a newly created fs in
591          * an incremental replication), we will substitute the tosnap.
592          */
593         if ((sd->fromsnap && strcmp(snapname, sd->fromsnap) == 0) ||
594             (sd->parent_fromsnap_guid == 0 && sd->tosnap &&
595             strcmp(snapname, sd->tosnap) == 0)) {
596                 sd->parent_fromsnap_guid = guid;
597         }
598
599         VERIFY(0 == nvlist_alloc(&nv, NV_UNIQUE_NAME, 0));
600         send_iterate_prop(zhp, nv);
601         VERIFY(0 == nvlist_add_nvlist(sd->snapprops, snapname, nv));
602         nvlist_free(nv);
603
604         zfs_close(zhp);
605         return (0);
606 }
607
608 static void
609 send_iterate_prop(zfs_handle_t *zhp, nvlist_t *nv)
610 {
611         nvpair_t *elem = NULL;
612
613         while ((elem = nvlist_next_nvpair(zhp->zfs_props, elem)) != NULL) {
614                 char *propname = nvpair_name(elem);
615                 zfs_prop_t prop = zfs_name_to_prop(propname);
616                 nvlist_t *propnv;
617
618                 if (!zfs_prop_user(propname)) {
619                         /*
620                          * Realistically, this should never happen.  However,
621                          * we want the ability to add DSL properties without
622                          * needing to make incompatible version changes.  We
623                          * need to ignore unknown properties to allow older
624                          * software to still send datasets containing these
625                          * properties, with the unknown properties elided.
626                          */
627                         if (prop == ZPROP_INVAL)
628                                 continue;
629
630                         if (zfs_prop_readonly(prop))
631                                 continue;
632                 }
633
634                 verify(nvpair_value_nvlist(elem, &propnv) == 0);
635                 if (prop == ZFS_PROP_QUOTA || prop == ZFS_PROP_RESERVATION ||
636                     prop == ZFS_PROP_REFQUOTA ||
637                     prop == ZFS_PROP_REFRESERVATION) {
638                         char *source;
639                         uint64_t value;
640                         verify(nvlist_lookup_uint64(propnv,
641                             ZPROP_VALUE, &value) == 0);
642                         if (zhp->zfs_type == ZFS_TYPE_SNAPSHOT)
643                                 continue;
644                         /*
645                          * May have no source before SPA_VERSION_RECVD_PROPS,
646                          * but is still modifiable.
647                          */
648                         if (nvlist_lookup_string(propnv,
649                             ZPROP_SOURCE, &source) == 0) {
650                                 if ((strcmp(source, zhp->zfs_name) != 0) &&
651                                     (strcmp(source,
652                                     ZPROP_SOURCE_VAL_RECVD) != 0))
653                                         continue;
654                         }
655                 } else {
656                         char *source;
657                         if (nvlist_lookup_string(propnv,
658                             ZPROP_SOURCE, &source) != 0)
659                                 continue;
660                         if ((strcmp(source, zhp->zfs_name) != 0) &&
661                             (strcmp(source, ZPROP_SOURCE_VAL_RECVD) != 0))
662                                 continue;
663                 }
664
665                 if (zfs_prop_user(propname) ||
666                     zfs_prop_get_type(prop) == PROP_TYPE_STRING) {
667                         char *value;
668                         verify(nvlist_lookup_string(propnv,
669                             ZPROP_VALUE, &value) == 0);
670                         VERIFY(0 == nvlist_add_string(nv, propname, value));
671                 } else {
672                         uint64_t value;
673                         verify(nvlist_lookup_uint64(propnv,
674                             ZPROP_VALUE, &value) == 0);
675                         VERIFY(0 == nvlist_add_uint64(nv, propname, value));
676                 }
677         }
678 }
679
680 /*
681  * recursively generate nvlists describing datasets.  See comment
682  * for the data structure send_data_t above for description of contents
683  * of the nvlist.
684  */
685 static int
686 send_iterate_fs(zfs_handle_t *zhp, void *arg)
687 {
688         send_data_t *sd = arg;
689         nvlist_t *nvfs, *nv;
690         int rv = 0;
691         uint64_t parent_fromsnap_guid_save = sd->parent_fromsnap_guid;
692         uint64_t guid = zhp->zfs_dmustats.dds_guid;
693         char guidstring[64];
694
695         VERIFY(0 == nvlist_alloc(&nvfs, NV_UNIQUE_NAME, 0));
696         VERIFY(0 == nvlist_add_string(nvfs, "name", zhp->zfs_name));
697         VERIFY(0 == nvlist_add_uint64(nvfs, "parentfromsnap",
698             sd->parent_fromsnap_guid));
699
700         if (zhp->zfs_dmustats.dds_origin[0]) {
701                 zfs_handle_t *origin = zfs_open(zhp->zfs_hdl,
702                     zhp->zfs_dmustats.dds_origin, ZFS_TYPE_SNAPSHOT);
703                 if (origin == NULL)
704                         return (-1);
705                 VERIFY(0 == nvlist_add_uint64(nvfs, "origin",
706                     origin->zfs_dmustats.dds_guid));
707         }
708
709         /* iterate over props */
710         VERIFY(0 == nvlist_alloc(&nv, NV_UNIQUE_NAME, 0));
711         send_iterate_prop(zhp, nv);
712         VERIFY(0 == nvlist_add_nvlist(nvfs, "props", nv));
713         nvlist_free(nv);
714
715         /* iterate over snaps, and set sd->parent_fromsnap_guid */
716         sd->parent_fromsnap_guid = 0;
717         VERIFY(0 == nvlist_alloc(&sd->parent_snaps, NV_UNIQUE_NAME, 0));
718         VERIFY(0 == nvlist_alloc(&sd->snapprops, NV_UNIQUE_NAME, 0));
719         (void) zfs_iter_snapshots(zhp, send_iterate_snap, sd);
720         VERIFY(0 == nvlist_add_nvlist(nvfs, "snaps", sd->parent_snaps));
721         VERIFY(0 == nvlist_add_nvlist(nvfs, "snapprops", sd->snapprops));
722         nvlist_free(sd->parent_snaps);
723         nvlist_free(sd->snapprops);
724
725         /* add this fs to nvlist */
726         (void) snprintf(guidstring, sizeof (guidstring),
727             "0x%llx", (longlong_t)guid);
728         VERIFY(0 == nvlist_add_nvlist(sd->fss, guidstring, nvfs));
729         nvlist_free(nvfs);
730
731         /* iterate over children */
732         if (sd->recursive)
733                 rv = zfs_iter_filesystems(zhp, send_iterate_fs, sd);
734
735         sd->parent_fromsnap_guid = parent_fromsnap_guid_save;
736
737         zfs_close(zhp);
738         return (rv);
739 }
740
741 static int
742 gather_nvlist(libzfs_handle_t *hdl, const char *fsname, const char *fromsnap,
743     const char *tosnap, boolean_t recursive, nvlist_t **nvlp, avl_tree_t **avlp)
744 {
745         zfs_handle_t *zhp;
746         send_data_t sd = { 0 };
747         int error;
748
749         zhp = zfs_open(hdl, fsname, ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME);
750         if (zhp == NULL)
751                 return (EZFS_BADTYPE);
752
753         VERIFY(0 == nvlist_alloc(&sd.fss, NV_UNIQUE_NAME, 0));
754         sd.fromsnap = fromsnap;
755         sd.tosnap = tosnap;
756         sd.recursive = recursive;
757
758         if ((error = send_iterate_fs(zhp, &sd)) != 0) {
759                 nvlist_free(sd.fss);
760                 if (avlp != NULL)
761                         *avlp = NULL;
762                 *nvlp = NULL;
763                 return (error);
764         }
765
766         if (avlp != NULL && (*avlp = fsavl_create(sd.fss)) == NULL) {
767                 nvlist_free(sd.fss);
768                 *nvlp = NULL;
769                 return (EZFS_NOMEM);
770         }
771
772         *nvlp = sd.fss;
773         return (0);
774 }
775
776 /*
777  * Routines for dealing with the sorted snapshot functionality
778  */
779 typedef struct zfs_node {
780         zfs_handle_t    *zn_handle;
781         avl_node_t      zn_avlnode;
782 } zfs_node_t;
783
784 static int
785 zfs_sort_snaps(zfs_handle_t *zhp, void *data)
786 {
787         avl_tree_t *avl = data;
788         zfs_node_t *node;
789         zfs_node_t search;
790
791         search.zn_handle = zhp;
792         node = avl_find(avl, &search, NULL);
793         if (node) {
794                 /*
795                  * If this snapshot was renamed while we were creating the
796                  * AVL tree, it's possible that we already inserted it under
797                  * its old name. Remove the old handle before adding the new
798                  * one.
799                  */
800                 zfs_close(node->zn_handle);
801                 avl_remove(avl, node);
802                 free(node);
803         }
804
805         node = zfs_alloc(zhp->zfs_hdl, sizeof (zfs_node_t));
806         node->zn_handle = zhp;
807         avl_add(avl, node);
808
809         return (0);
810 }
811
812 static int
813 zfs_snapshot_compare(const void *larg, const void *rarg)
814 {
815         zfs_handle_t *l = ((zfs_node_t *)larg)->zn_handle;
816         zfs_handle_t *r = ((zfs_node_t *)rarg)->zn_handle;
817         uint64_t lcreate, rcreate;
818
819         /*
820          * Sort them according to creation time.  We use the hidden
821          * CREATETXG property to get an absolute ordering of snapshots.
822          */
823         lcreate = zfs_prop_get_int(l, ZFS_PROP_CREATETXG);
824         rcreate = zfs_prop_get_int(r, ZFS_PROP_CREATETXG);
825
826         if (lcreate < rcreate)
827                 return (-1);
828         else if (lcreate > rcreate)
829                 return (+1);
830         else
831                 return (0);
832 }
833
834 int
835 zfs_iter_snapshots_sorted(zfs_handle_t *zhp, zfs_iter_f callback, void *data)
836 {
837         int ret = 0;
838         zfs_node_t *node;
839         avl_tree_t avl;
840         void *cookie = NULL;
841
842         avl_create(&avl, zfs_snapshot_compare,
843             sizeof (zfs_node_t), offsetof(zfs_node_t, zn_avlnode));
844
845         ret = zfs_iter_snapshots(zhp, zfs_sort_snaps, &avl);
846
847         for (node = avl_first(&avl); node != NULL; node = AVL_NEXT(&avl, node))
848                 ret |= callback(node->zn_handle, data);
849
850         while ((node = avl_destroy_nodes(&avl, &cookie)) != NULL)
851                 free(node);
852
853         avl_destroy(&avl);
854
855         return (ret);
856 }
857
858 /*
859  * Routines specific to "zfs send"
860  */
861 typedef struct send_dump_data {
862         /* these are all just the short snapname (the part after the @) */
863         const char *fromsnap;
864         const char *tosnap;
865         char prevsnap[ZFS_MAXNAMELEN];
866         uint64_t prevsnap_obj;
867         boolean_t seenfrom, seento, replicate, doall, fromorigin;
868         boolean_t verbose;
869         int outfd;
870         boolean_t err;
871         nvlist_t *fss;
872         avl_tree_t *fsavl;
873         snapfilter_cb_t *filter_cb;
874         void *filter_cb_arg;
875         nvlist_t *debugnv;
876         char holdtag[ZFS_MAXNAMELEN];
877         int cleanup_fd;
878 } send_dump_data_t;
879
880 /*
881  * Dumps a backup of the given snapshot (incremental from fromsnap if it's not
882  * NULL) to the file descriptor specified by outfd.
883  */
884 static int
885 dump_ioctl(zfs_handle_t *zhp, const char *fromsnap, uint64_t fromsnap_obj,
886     boolean_t fromorigin, int outfd, nvlist_t *debugnv)
887 {
888         zfs_cmd_t zc = { 0 };
889         libzfs_handle_t *hdl = zhp->zfs_hdl;
890         nvlist_t *thisdbg;
891
892         assert(zhp->zfs_type == ZFS_TYPE_SNAPSHOT);
893         assert(fromsnap_obj == 0 || !fromorigin);
894
895         (void) strlcpy(zc.zc_name, zhp->zfs_name, sizeof (zc.zc_name));
896         zc.zc_cookie = outfd;
897         zc.zc_obj = fromorigin;
898         zc.zc_sendobj = zfs_prop_get_int(zhp, ZFS_PROP_OBJSETID);
899         zc.zc_fromobj = fromsnap_obj;
900
901         VERIFY(0 == nvlist_alloc(&thisdbg, NV_UNIQUE_NAME, 0));
902         if (fromsnap && fromsnap[0] != '\0') {
903                 VERIFY(0 == nvlist_add_string(thisdbg,
904                     "fromsnap", fromsnap));
905         }
906
907         if (ioctl(zhp->zfs_hdl->libzfs_fd, ZFS_IOC_SEND, &zc) != 0) {
908                 char errbuf[1024];
909                 (void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
910                     "warning: cannot send '%s'"), zhp->zfs_name);
911
912                 VERIFY(0 == nvlist_add_uint64(thisdbg, "error", errno));
913                 if (debugnv) {
914                         VERIFY(0 == nvlist_add_nvlist(debugnv,
915                             zhp->zfs_name, thisdbg));
916                 }
917                 nvlist_free(thisdbg);
918
919                 switch (errno) {
920
921                 case EXDEV:
922                         zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
923                             "not an earlier snapshot from the same fs"));
924                         return (zfs_error(hdl, EZFS_CROSSTARGET, errbuf));
925
926                 case ENOENT:
927                         if (zfs_dataset_exists(hdl, zc.zc_name,
928                             ZFS_TYPE_SNAPSHOT)) {
929                                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
930                                     "incremental source (@%s) does not exist"),
931                                     zc.zc_value);
932                         }
933                         return (zfs_error(hdl, EZFS_NOENT, errbuf));
934
935                 case EDQUOT:
936                 case EFBIG:
937                 case EIO:
938                 case ENOLINK:
939                 case ENOSPC:
940                 case ENXIO:
941                 case EPIPE:
942                 case ERANGE:
943                 case EFAULT:
944                 case EROFS:
945                         zfs_error_aux(hdl, strerror(errno));
946                         return (zfs_error(hdl, EZFS_BADBACKUP, errbuf));
947
948                 default:
949                         return (zfs_standard_error(hdl, errno, errbuf));
950                 }
951         }
952
953         if (debugnv)
954                 VERIFY(0 == nvlist_add_nvlist(debugnv, zhp->zfs_name, thisdbg));
955         nvlist_free(thisdbg);
956
957         return (0);
958 }
959
960 static int
961 hold_for_send(zfs_handle_t *zhp, send_dump_data_t *sdd)
962 {
963         zfs_handle_t *pzhp;
964         int error = 0;
965         char *thissnap;
966
967         assert(zhp->zfs_type == ZFS_TYPE_SNAPSHOT);
968
969         /*
970          * zfs_send() only opens a cleanup_fd for sends that need it,
971          * e.g. replication and doall.
972          */
973         if (sdd->cleanup_fd == -1)
974                 return (0);
975
976         thissnap = strchr(zhp->zfs_name, '@') + 1;
977         *(thissnap - 1) = '\0';
978         pzhp = zfs_open(zhp->zfs_hdl, zhp->zfs_name, ZFS_TYPE_DATASET);
979         *(thissnap - 1) = '@';
980
981         /*
982          * It's OK if the parent no longer exists.  The send code will
983          * handle that error.
984          */
985         if (pzhp) {
986                 error = zfs_hold(pzhp, thissnap, sdd->holdtag,
987                     B_FALSE, B_TRUE, B_TRUE, sdd->cleanup_fd,
988                     zfs_prop_get_int(zhp, ZFS_PROP_OBJSETID),
989                     zfs_prop_get_int(zhp, ZFS_PROP_CREATETXG));
990                 zfs_close(pzhp);
991         }
992
993         return (error);
994 }
995
996 static int
997 dump_snapshot(zfs_handle_t *zhp, void *arg)
998 {
999         send_dump_data_t *sdd = arg;
1000         char *thissnap;
1001         int err;
1002         boolean_t isfromsnap, istosnap;
1003         boolean_t exclude = B_FALSE;
1004
1005         thissnap = strchr(zhp->zfs_name, '@') + 1;
1006         isfromsnap = (sdd->fromsnap != NULL &&
1007             strcmp(sdd->fromsnap, thissnap) == 0);
1008
1009         if (!sdd->seenfrom && isfromsnap) {
1010                 err = hold_for_send(zhp, sdd);
1011                 if (err == 0) {
1012                         sdd->seenfrom = B_TRUE;
1013                         (void) strcpy(sdd->prevsnap, thissnap);
1014                         sdd->prevsnap_obj = zfs_prop_get_int(zhp,
1015                             ZFS_PROP_OBJSETID);
1016                 } else if (err == ENOENT) {
1017                         err = 0;
1018                 }
1019                 zfs_close(zhp);
1020                 return (err);
1021         }
1022
1023         if (sdd->seento || !sdd->seenfrom) {
1024                 zfs_close(zhp);
1025                 return (0);
1026         }
1027
1028         istosnap = (strcmp(sdd->tosnap, thissnap) == 0);
1029         if (istosnap)
1030                 sdd->seento = B_TRUE;
1031
1032         if (!sdd->doall && !isfromsnap && !istosnap) {
1033                 if (sdd->replicate) {
1034                         char *snapname;
1035                         nvlist_t *snapprops;
1036                         /*
1037                          * Filter out all intermediate snapshots except origin
1038                          * snapshots needed to replicate clones.
1039                          */
1040                         nvlist_t *nvfs = fsavl_find(sdd->fsavl,
1041                             zhp->zfs_dmustats.dds_guid, &snapname);
1042
1043                         VERIFY(0 == nvlist_lookup_nvlist(nvfs,
1044                             "snapprops", &snapprops));
1045                         VERIFY(0 == nvlist_lookup_nvlist(snapprops,
1046                             thissnap, &snapprops));
1047                         exclude = !nvlist_exists(snapprops, "is_clone_origin");
1048                 } else {
1049                         exclude = B_TRUE;
1050                 }
1051         }
1052
1053         /*
1054          * If a filter function exists, call it to determine whether
1055          * this snapshot will be sent.
1056          */
1057         if (exclude || (sdd->filter_cb != NULL &&
1058             sdd->filter_cb(zhp, sdd->filter_cb_arg) == B_FALSE)) {
1059                 /*
1060                  * This snapshot is filtered out.  Don't send it, and don't
1061                  * set prevsnap_obj, so it will be as if this snapshot didn't
1062                  * exist, and the next accepted snapshot will be sent as
1063                  * an incremental from the last accepted one, or as the
1064                  * first (and full) snapshot in the case of a replication,
1065                  * non-incremental send.
1066                  */
1067                 zfs_close(zhp);
1068                 return (0);
1069         }
1070
1071         err = hold_for_send(zhp, sdd);
1072         if (err) {
1073                 if (err == ENOENT)
1074                         err = 0;
1075                 zfs_close(zhp);
1076                 return (err);
1077         }
1078
1079         /* send it */
1080         if (sdd->verbose) {
1081                 (void) fprintf(stderr, "sending from @%s to %s\n",
1082                     sdd->prevsnap, zhp->zfs_name);
1083         }
1084
1085         err = dump_ioctl(zhp, sdd->prevsnap, sdd->prevsnap_obj,
1086             sdd->prevsnap[0] == '\0' && (sdd->fromorigin || sdd->replicate),
1087             sdd->outfd, sdd->debugnv);
1088
1089         (void) strcpy(sdd->prevsnap, thissnap);
1090         sdd->prevsnap_obj = zfs_prop_get_int(zhp, ZFS_PROP_OBJSETID);
1091         zfs_close(zhp);
1092         return (err);
1093 }
1094
1095 static int
1096 dump_filesystem(zfs_handle_t *zhp, void *arg)
1097 {
1098         int rv = 0;
1099         send_dump_data_t *sdd = arg;
1100         boolean_t missingfrom = B_FALSE;
1101         zfs_cmd_t zc = { 0 };
1102
1103         (void) snprintf(zc.zc_name, sizeof (zc.zc_name), "%s@%s",
1104             zhp->zfs_name, sdd->tosnap);
1105         if (ioctl(zhp->zfs_hdl->libzfs_fd, ZFS_IOC_OBJSET_STATS, &zc) != 0) {
1106                 (void) fprintf(stderr, "WARNING: "
1107                     "could not send %s@%s: does not exist\n",
1108                     zhp->zfs_name, sdd->tosnap);
1109                 sdd->err = B_TRUE;
1110                 return (0);
1111         }
1112
1113         if (sdd->replicate && sdd->fromsnap) {
1114                 /*
1115                  * If this fs does not have fromsnap, and we're doing
1116                  * recursive, we need to send a full stream from the
1117                  * beginning (or an incremental from the origin if this
1118                  * is a clone).  If we're doing non-recursive, then let
1119                  * them get the error.
1120                  */
1121                 (void) snprintf(zc.zc_name, sizeof (zc.zc_name), "%s@%s",
1122                     zhp->zfs_name, sdd->fromsnap);
1123                 if (ioctl(zhp->zfs_hdl->libzfs_fd,
1124                     ZFS_IOC_OBJSET_STATS, &zc) != 0) {
1125                         missingfrom = B_TRUE;
1126                 }
1127         }
1128
1129         sdd->seenfrom = sdd->seento = sdd->prevsnap[0] = 0;
1130         sdd->prevsnap_obj = 0;
1131         if (sdd->fromsnap == NULL || missingfrom)
1132                 sdd->seenfrom = B_TRUE;
1133
1134         rv = zfs_iter_snapshots_sorted(zhp, dump_snapshot, arg);
1135         if (!sdd->seenfrom) {
1136                 (void) fprintf(stderr,
1137                     "WARNING: could not send %s@%s:\n"
1138                     "incremental source (%s@%s) does not exist\n",
1139                     zhp->zfs_name, sdd->tosnap,
1140                     zhp->zfs_name, sdd->fromsnap);
1141                 sdd->err = B_TRUE;
1142         } else if (!sdd->seento) {
1143                 if (sdd->fromsnap) {
1144                         (void) fprintf(stderr,
1145                             "WARNING: could not send %s@%s:\n"
1146                             "incremental source (%s@%s) "
1147                             "is not earlier than it\n",
1148                             zhp->zfs_name, sdd->tosnap,
1149                             zhp->zfs_name, sdd->fromsnap);
1150                 } else {
1151                         (void) fprintf(stderr, "WARNING: "
1152                             "could not send %s@%s: does not exist\n",
1153                             zhp->zfs_name, sdd->tosnap);
1154                 }
1155                 sdd->err = B_TRUE;
1156         }
1157
1158         return (rv);
1159 }
1160
1161 static int
1162 dump_filesystems(zfs_handle_t *rzhp, void *arg)
1163 {
1164         send_dump_data_t *sdd = arg;
1165         nvpair_t *fspair;
1166         boolean_t needagain, progress;
1167
1168         if (!sdd->replicate)
1169                 return (dump_filesystem(rzhp, sdd));
1170
1171         /* Mark the clone origin snapshots. */
1172         for (fspair = nvlist_next_nvpair(sdd->fss, NULL); fspair;
1173             fspair = nvlist_next_nvpair(sdd->fss, fspair)) {
1174                 nvlist_t *nvfs;
1175                 uint64_t origin_guid = 0;
1176
1177                 VERIFY(0 == nvpair_value_nvlist(fspair, &nvfs));
1178                 (void) nvlist_lookup_uint64(nvfs, "origin", &origin_guid);
1179                 if (origin_guid != 0) {
1180                         char *snapname;
1181                         nvlist_t *origin_nv = fsavl_find(sdd->fsavl,
1182                             origin_guid, &snapname);
1183                         if (origin_nv != NULL) {
1184                                 nvlist_t *snapprops;
1185                                 VERIFY(0 == nvlist_lookup_nvlist(origin_nv,
1186                                     "snapprops", &snapprops));
1187                                 VERIFY(0 == nvlist_lookup_nvlist(snapprops,
1188                                     snapname, &snapprops));
1189                                 VERIFY(0 == nvlist_add_boolean(
1190                                     snapprops, "is_clone_origin"));
1191                         }
1192                 }
1193         }
1194 again:
1195         needagain = progress = B_FALSE;
1196         for (fspair = nvlist_next_nvpair(sdd->fss, NULL); fspair;
1197             fspair = nvlist_next_nvpair(sdd->fss, fspair)) {
1198                 nvlist_t *fslist;
1199                 char *fsname;
1200                 zfs_handle_t *zhp;
1201                 int err;
1202                 uint64_t origin_guid = 0;
1203
1204                 VERIFY(nvpair_value_nvlist(fspair, &fslist) == 0);
1205                 if (nvlist_lookup_boolean(fslist, "sent") == 0)
1206                         continue;
1207
1208                 VERIFY(nvlist_lookup_string(fslist, "name", &fsname) == 0);
1209                 (void) nvlist_lookup_uint64(fslist, "origin", &origin_guid);
1210
1211                 if (origin_guid != 0) {
1212                         nvlist_t *origin_nv = fsavl_find(sdd->fsavl,
1213                             origin_guid, NULL);
1214                         if (origin_nv != NULL &&
1215                             nvlist_lookup_boolean(origin_nv,
1216                             "sent") == ENOENT) {
1217                                 /*
1218                                  * origin has not been sent yet;
1219                                  * skip this clone.
1220                                  */
1221                                 needagain = B_TRUE;
1222                                 continue;
1223                         }
1224                 }
1225
1226                 zhp = zfs_open(rzhp->zfs_hdl, fsname, ZFS_TYPE_DATASET);
1227                 if (zhp == NULL)
1228                         return (-1);
1229                 err = dump_filesystem(zhp, sdd);
1230                 VERIFY(nvlist_add_boolean(fslist, "sent") == 0);
1231                 progress = B_TRUE;
1232                 zfs_close(zhp);
1233                 if (err)
1234                         return (err);
1235         }
1236         if (needagain) {
1237                 assert(progress);
1238                 goto again;
1239         }
1240         return (0);
1241 }
1242
1243 /*
1244  * Generate a send stream for the dataset identified by the argument zhp.
1245  *
1246  * The content of the send stream is the snapshot identified by
1247  * 'tosnap'.  Incremental streams are requested in two ways:
1248  *     - from the snapshot identified by "fromsnap" (if non-null) or
1249  *     - from the origin of the dataset identified by zhp, which must
1250  *       be a clone.  In this case, "fromsnap" is null and "fromorigin"
1251  *       is TRUE.
1252  *
1253  * The send stream is recursive (i.e. dumps a hierarchy of snapshots) and
1254  * uses a special header (with a hdrtype field of DMU_COMPOUNDSTREAM)
1255  * if "replicate" is set.  If "doall" is set, dump all the intermediate
1256  * snapshots. The DMU_COMPOUNDSTREAM header is used in the "doall"
1257  * case too. If "props" is set, send properties.
1258  */
1259 int
1260 zfs_send(zfs_handle_t *zhp, const char *fromsnap, const char *tosnap,
1261     sendflags_t flags, int outfd, snapfilter_cb_t filter_func,
1262     void *cb_arg, nvlist_t **debugnvp)
1263 {
1264         char errbuf[1024];
1265         send_dump_data_t sdd = { 0 };
1266         int err;
1267         nvlist_t *fss = NULL;
1268         avl_tree_t *fsavl = NULL;
1269         static uint64_t holdseq;
1270         int spa_version;
1271         boolean_t holdsnaps = B_FALSE;
1272         pthread_t tid;
1273         int pipefd[2];
1274         dedup_arg_t dda = { 0 };
1275         int featureflags = 0;
1276
1277         (void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
1278             "cannot send '%s'"), zhp->zfs_name);
1279
1280         if (fromsnap && fromsnap[0] == '\0') {
1281                 zfs_error_aux(zhp->zfs_hdl, dgettext(TEXT_DOMAIN,
1282                     "zero-length incremental source"));
1283                 return (zfs_error(zhp->zfs_hdl, EZFS_NOENT, errbuf));
1284         }
1285
1286         if (zhp->zfs_type == ZFS_TYPE_FILESYSTEM) {
1287                 uint64_t version;
1288                 version = zfs_prop_get_int(zhp, ZFS_PROP_VERSION);
1289                 if (version >= ZPL_VERSION_SA) {
1290                         featureflags |= DMU_BACKUP_FEATURE_SA_SPILL;
1291                 }
1292         }
1293
1294         if (zfs_spa_version(zhp, &spa_version) == 0 &&
1295             spa_version >= SPA_VERSION_USERREFS &&
1296             (flags.doall || flags.replicate))
1297                 holdsnaps = B_TRUE;
1298
1299         if (flags.dedup) {
1300                 featureflags |= (DMU_BACKUP_FEATURE_DEDUP |
1301                     DMU_BACKUP_FEATURE_DEDUPPROPS);
1302                 if (err = pipe(pipefd)) {
1303                         zfs_error_aux(zhp->zfs_hdl, strerror(errno));
1304                         return (zfs_error(zhp->zfs_hdl, EZFS_PIPEFAILED,
1305                             errbuf));
1306                 }
1307                 dda.outputfd = outfd;
1308                 dda.inputfd = pipefd[1];
1309                 dda.dedup_hdl = zhp->zfs_hdl;
1310                 if (err = pthread_create(&tid, NULL, cksummer, &dda)) {
1311                         (void) close(pipefd[0]);
1312                         (void) close(pipefd[1]);
1313                         zfs_error_aux(zhp->zfs_hdl, strerror(errno));
1314                         return (zfs_error(zhp->zfs_hdl,
1315                             EZFS_THREADCREATEFAILED, errbuf));
1316                 }
1317         }
1318
1319         if (flags.replicate || flags.doall || flags.props) {
1320                 dmu_replay_record_t drr = { 0 };
1321                 char *packbuf = NULL;
1322                 size_t buflen = 0;
1323                 zio_cksum_t zc = { 0 };
1324
1325                 if (flags.replicate || flags.props) {
1326                         nvlist_t *hdrnv;
1327
1328                         VERIFY(0 == nvlist_alloc(&hdrnv, NV_UNIQUE_NAME, 0));
1329                         if (fromsnap) {
1330                                 VERIFY(0 == nvlist_add_string(hdrnv,
1331                                     "fromsnap", fromsnap));
1332                         }
1333                         VERIFY(0 == nvlist_add_string(hdrnv, "tosnap", tosnap));
1334                         if (!flags.replicate) {
1335                                 VERIFY(0 == nvlist_add_boolean(hdrnv,
1336                                     "not_recursive"));
1337                         }
1338
1339                         err = gather_nvlist(zhp->zfs_hdl, zhp->zfs_name,
1340                             fromsnap, tosnap, flags.replicate, &fss, &fsavl);
1341                         if (err)
1342                                 goto err_out;
1343                         VERIFY(0 == nvlist_add_nvlist(hdrnv, "fss", fss));
1344                         err = nvlist_pack(hdrnv, &packbuf, &buflen,
1345                             NV_ENCODE_XDR, 0);
1346                         if (debugnvp)
1347                                 *debugnvp = hdrnv;
1348                         else
1349                                 nvlist_free(hdrnv);
1350                         if (err) {
1351                                 fsavl_destroy(fsavl);
1352                                 nvlist_free(fss);
1353                                 goto stderr_out;
1354                         }
1355                 }
1356
1357                 /* write first begin record */
1358                 drr.drr_type = DRR_BEGIN;
1359                 drr.drr_u.drr_begin.drr_magic = DMU_BACKUP_MAGIC;
1360                 DMU_SET_STREAM_HDRTYPE(drr.drr_u.drr_begin.drr_versioninfo,
1361                     DMU_COMPOUNDSTREAM);
1362                 DMU_SET_FEATUREFLAGS(drr.drr_u.drr_begin.drr_versioninfo,
1363                     featureflags);
1364                 (void) snprintf(drr.drr_u.drr_begin.drr_toname,
1365                     sizeof (drr.drr_u.drr_begin.drr_toname),
1366                     "%s@%s", zhp->zfs_name, tosnap);
1367                 drr.drr_payloadlen = buflen;
1368                 err = cksum_and_write(&drr, sizeof (drr), &zc, outfd);
1369
1370                 /* write header nvlist */
1371                 if (err != -1 && packbuf != NULL) {
1372                         err = cksum_and_write(packbuf, buflen, &zc, outfd);
1373                 }
1374                 free(packbuf);
1375                 if (err == -1) {
1376                         fsavl_destroy(fsavl);
1377                         nvlist_free(fss);
1378                         err = errno;
1379                         goto stderr_out;
1380                 }
1381
1382                 /* write end record */
1383                 if (err != -1) {
1384                         bzero(&drr, sizeof (drr));
1385                         drr.drr_type = DRR_END;
1386                         drr.drr_u.drr_end.drr_checksum = zc;
1387                         err = write(outfd, &drr, sizeof (drr));
1388                         if (err == -1) {
1389                                 fsavl_destroy(fsavl);
1390                                 nvlist_free(fss);
1391                                 err = errno;
1392                                 goto stderr_out;
1393                         }
1394                 }
1395         }
1396
1397         /* dump each stream */
1398         sdd.fromsnap = fromsnap;
1399         sdd.tosnap = tosnap;
1400         if (flags.dedup)
1401                 sdd.outfd = pipefd[0];
1402         else
1403                 sdd.outfd = outfd;
1404         sdd.replicate = flags.replicate;
1405         sdd.doall = flags.doall;
1406         sdd.fromorigin = flags.fromorigin;
1407         sdd.fss = fss;
1408         sdd.fsavl = fsavl;
1409         sdd.verbose = flags.verbose;
1410         sdd.filter_cb = filter_func;
1411         sdd.filter_cb_arg = cb_arg;
1412         if (debugnvp)
1413                 sdd.debugnv = *debugnvp;
1414         if (holdsnaps) {
1415                 ++holdseq;
1416                 (void) snprintf(sdd.holdtag, sizeof (sdd.holdtag),
1417                     ".send-%d-%llu", getpid(), (u_longlong_t)holdseq);
1418                 sdd.cleanup_fd = open(ZFS_DEV, O_RDWR|O_EXCL);
1419                 if (sdd.cleanup_fd < 0) {
1420                         err = errno;
1421                         goto stderr_out;
1422                 }
1423         } else {
1424                 sdd.cleanup_fd = -1;
1425         }
1426         err = dump_filesystems(zhp, &sdd);
1427         fsavl_destroy(fsavl);
1428         nvlist_free(fss);
1429
1430         if (flags.dedup) {
1431                 (void) close(pipefd[0]);
1432                 (void) pthread_join(tid, NULL);
1433         }
1434
1435         if (sdd.cleanup_fd != -1) {
1436                 VERIFY(0 == close(sdd.cleanup_fd));
1437                 sdd.cleanup_fd = -1;
1438         }
1439
1440         if (flags.replicate || flags.doall || flags.props) {
1441                 /*
1442                  * write final end record.  NB: want to do this even if
1443                  * there was some error, because it might not be totally
1444                  * failed.
1445                  */
1446                 dmu_replay_record_t drr = { 0 };
1447                 drr.drr_type = DRR_END;
1448                 if (write(outfd, &drr, sizeof (drr)) == -1) {
1449                         return (zfs_standard_error(zhp->zfs_hdl,
1450                             errno, errbuf));
1451                 }
1452         }
1453
1454         return (err || sdd.err);
1455
1456 stderr_out:
1457         err = zfs_standard_error(zhp->zfs_hdl, err, errbuf);
1458 err_out:
1459         if (sdd.cleanup_fd != -1)
1460                 VERIFY(0 == close(sdd.cleanup_fd));
1461         if (flags.dedup) {
1462                 (void) pthread_cancel(tid);
1463                 (void) pthread_join(tid, NULL);
1464                 (void) close(pipefd[0]);
1465         }
1466         return (err);
1467 }
1468
1469 /*
1470  * Routines specific to "zfs recv"
1471  */
1472
1473 static int
1474 recv_read(libzfs_handle_t *hdl, int fd, void *buf, int ilen,
1475     boolean_t byteswap, zio_cksum_t *zc)
1476 {
1477         char *cp = buf;
1478         int rv;
1479         int len = ilen;
1480
1481         do {
1482                 rv = read(fd, cp, len);
1483                 cp += rv;
1484                 len -= rv;
1485         } while (rv > 0);
1486
1487         if (rv < 0 || len != 0) {
1488                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1489                     "failed to read from stream"));
1490                 return (zfs_error(hdl, EZFS_BADSTREAM, dgettext(TEXT_DOMAIN,
1491                     "cannot receive")));
1492         }
1493
1494         if (zc) {
1495                 if (byteswap)
1496                         fletcher_4_incremental_byteswap(buf, ilen, zc);
1497                 else
1498                         fletcher_4_incremental_native(buf, ilen, zc);
1499         }
1500         return (0);
1501 }
1502
1503 static int
1504 recv_read_nvlist(libzfs_handle_t *hdl, int fd, int len, nvlist_t **nvp,
1505     boolean_t byteswap, zio_cksum_t *zc)
1506 {
1507         char *buf;
1508         int err;
1509
1510         buf = zfs_alloc(hdl, len);
1511         if (buf == NULL)
1512                 return (ENOMEM);
1513
1514         err = recv_read(hdl, fd, buf, len, byteswap, zc);
1515         if (err != 0) {
1516                 free(buf);
1517                 return (err);
1518         }
1519
1520         err = nvlist_unpack(buf, len, nvp, 0);
1521         free(buf);
1522         if (err != 0) {
1523                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "invalid "
1524                     "stream (malformed nvlist)"));
1525                 return (EINVAL);
1526         }
1527         return (0);
1528 }
1529
1530 static int
1531 recv_rename(libzfs_handle_t *hdl, const char *name, const char *tryname,
1532     int baselen, char *newname, recvflags_t flags)
1533 {
1534         static int seq;
1535         zfs_cmd_t zc = { 0 };
1536         int err;
1537         prop_changelist_t *clp;
1538         zfs_handle_t *zhp;
1539
1540         zhp = zfs_open(hdl, name, ZFS_TYPE_DATASET);
1541         if (zhp == NULL)
1542                 return (-1);
1543         clp = changelist_gather(zhp, ZFS_PROP_NAME, 0,
1544             flags.force ? MS_FORCE : 0);
1545         zfs_close(zhp);
1546         if (clp == NULL)
1547                 return (-1);
1548         err = changelist_prefix(clp);
1549         if (err)
1550                 return (err);
1551
1552         zc.zc_objset_type = DMU_OST_ZFS;
1553         (void) strlcpy(zc.zc_name, name, sizeof (zc.zc_name));
1554
1555         if (tryname) {
1556                 (void) strcpy(newname, tryname);
1557
1558                 (void) strlcpy(zc.zc_value, tryname, sizeof (zc.zc_value));
1559
1560                 if (flags.verbose) {
1561                         (void) printf("attempting rename %s to %s\n",
1562                             zc.zc_name, zc.zc_value);
1563                 }
1564                 err = ioctl(hdl->libzfs_fd, ZFS_IOC_RENAME, &zc);
1565                 if (err == 0)
1566                         changelist_rename(clp, name, tryname);
1567         } else {
1568                 err = ENOENT;
1569         }
1570
1571         if (err != 0 && strncmp(name+baselen, "recv-", 5) != 0) {
1572                 seq++;
1573
1574                 (void) strncpy(newname, name, baselen);
1575                 (void) snprintf(newname+baselen, ZFS_MAXNAMELEN-baselen,
1576                     "recv-%u-%u", getpid(), seq);
1577                 (void) strlcpy(zc.zc_value, newname, sizeof (zc.zc_value));
1578
1579                 if (flags.verbose) {
1580                         (void) printf("failed - trying rename %s to %s\n",
1581                             zc.zc_name, zc.zc_value);
1582                 }
1583                 err = ioctl(hdl->libzfs_fd, ZFS_IOC_RENAME, &zc);
1584                 if (err == 0)
1585                         changelist_rename(clp, name, newname);
1586                 if (err && flags.verbose) {
1587                         (void) printf("failed (%u) - "
1588                             "will try again on next pass\n", errno);
1589                 }
1590                 err = EAGAIN;
1591         } else if (flags.verbose) {
1592                 if (err == 0)
1593                         (void) printf("success\n");
1594                 else
1595                         (void) printf("failed (%u)\n", errno);
1596         }
1597
1598         (void) changelist_postfix(clp);
1599         changelist_free(clp);
1600
1601         return (err);
1602 }
1603
1604 static int
1605 recv_destroy(libzfs_handle_t *hdl, const char *name, int baselen,
1606     char *newname, recvflags_t flags)
1607 {
1608         zfs_cmd_t zc = { 0 };
1609         int err = 0;
1610         prop_changelist_t *clp;
1611         zfs_handle_t *zhp;
1612         boolean_t defer = B_FALSE;
1613         int spa_version;
1614
1615         zhp = zfs_open(hdl, name, ZFS_TYPE_DATASET);
1616         if (zhp == NULL)
1617                 return (-1);
1618         clp = changelist_gather(zhp, ZFS_PROP_NAME, 0,
1619             flags.force ? MS_FORCE : 0);
1620         if (zfs_get_type(zhp) == ZFS_TYPE_SNAPSHOT &&
1621             zfs_spa_version(zhp, &spa_version) == 0 &&
1622             spa_version >= SPA_VERSION_USERREFS)
1623                 defer = B_TRUE;
1624         zfs_close(zhp);
1625         if (clp == NULL)
1626                 return (-1);
1627         err = changelist_prefix(clp);
1628         if (err)
1629                 return (err);
1630
1631         zc.zc_objset_type = DMU_OST_ZFS;
1632         zc.zc_defer_destroy = defer;
1633         (void) strlcpy(zc.zc_name, name, sizeof (zc.zc_name));
1634
1635         if (flags.verbose)
1636                 (void) printf("attempting destroy %s\n", zc.zc_name);
1637         err = ioctl(hdl->libzfs_fd, ZFS_IOC_DESTROY, &zc);
1638         if (err == 0) {
1639                 if (flags.verbose)
1640                         (void) printf("success\n");
1641                 changelist_remove(clp, zc.zc_name);
1642         }
1643
1644         (void) changelist_postfix(clp);
1645         changelist_free(clp);
1646
1647         /*
1648          * Deferred destroy might destroy the snapshot or only mark it to be
1649          * destroyed later, and it returns success in either case.
1650          */
1651         if (err != 0 || (defer && zfs_dataset_exists(hdl, name,
1652             ZFS_TYPE_SNAPSHOT))) {
1653                 err = recv_rename(hdl, name, NULL, baselen, newname, flags);
1654         }
1655
1656         return (err);
1657 }
1658
1659 typedef struct guid_to_name_data {
1660         uint64_t guid;
1661         char *name;
1662 } guid_to_name_data_t;
1663
1664 static int
1665 guid_to_name_cb(zfs_handle_t *zhp, void *arg)
1666 {
1667         guid_to_name_data_t *gtnd = arg;
1668         int err;
1669
1670         if (zhp->zfs_dmustats.dds_guid == gtnd->guid) {
1671                 (void) strcpy(gtnd->name, zhp->zfs_name);
1672                 zfs_close(zhp);
1673                 return (EEXIST);
1674         }
1675         err = zfs_iter_children(zhp, guid_to_name_cb, gtnd);
1676         zfs_close(zhp);
1677         return (err);
1678 }
1679
1680 static int
1681 guid_to_name(libzfs_handle_t *hdl, const char *parent, uint64_t guid,
1682     char *name)
1683 {
1684         /* exhaustive search all local snapshots */
1685         guid_to_name_data_t gtnd;
1686         int err = 0;
1687         zfs_handle_t *zhp;
1688         char *cp;
1689
1690         gtnd.guid = guid;
1691         gtnd.name = name;
1692
1693         if (strchr(parent, '@') == NULL) {
1694                 zhp = make_dataset_handle(hdl, parent);
1695                 if (zhp != NULL) {
1696                         err = zfs_iter_children(zhp, guid_to_name_cb, &gtnd);
1697                         zfs_close(zhp);
1698                         if (err == EEXIST)
1699                                 return (0);
1700                 }
1701         }
1702
1703         cp = strchr(parent, '/');
1704         if (cp)
1705                 *cp = '\0';
1706         zhp = make_dataset_handle(hdl, parent);
1707         if (cp)
1708                 *cp = '/';
1709
1710         if (zhp) {
1711                 err = zfs_iter_children(zhp, guid_to_name_cb, &gtnd);
1712                 zfs_close(zhp);
1713         }
1714
1715         return (err == EEXIST ? 0 : ENOENT);
1716
1717 }
1718
1719 /*
1720  * Return true if dataset guid1 is created before guid2.
1721  */
1722 static int
1723 created_before(libzfs_handle_t *hdl, avl_tree_t *avl,
1724     uint64_t guid1, uint64_t guid2)
1725 {
1726         nvlist_t *nvfs;
1727         char *fsname, *snapname;
1728         char buf[ZFS_MAXNAMELEN];
1729         int rv;
1730         zfs_node_t zn1, zn2;
1731
1732         if (guid2 == 0)
1733                 return (0);
1734         if (guid1 == 0)
1735                 return (1);
1736
1737         nvfs = fsavl_find(avl, guid1, &snapname);
1738         VERIFY(0 == nvlist_lookup_string(nvfs, "name", &fsname));
1739         (void) snprintf(buf, sizeof (buf), "%s@%s", fsname, snapname);
1740         zn1.zn_handle = zfs_open(hdl, buf, ZFS_TYPE_SNAPSHOT);
1741         if (zn1.zn_handle == NULL)
1742                 return (-1);
1743
1744         nvfs = fsavl_find(avl, guid2, &snapname);
1745         VERIFY(0 == nvlist_lookup_string(nvfs, "name", &fsname));
1746         (void) snprintf(buf, sizeof (buf), "%s@%s", fsname, snapname);
1747         zn2.zn_handle = zfs_open(hdl, buf, ZFS_TYPE_SNAPSHOT);
1748         if (zn2.zn_handle == NULL) {
1749                 zfs_close(zn2.zn_handle);
1750                 return (-1);
1751         }
1752
1753         rv = (zfs_snapshot_compare(&zn1, &zn2) == -1);
1754
1755         zfs_close(zn1.zn_handle);
1756         zfs_close(zn2.zn_handle);
1757
1758         return (rv);
1759 }
1760
1761 static int
1762 recv_incremental_replication(libzfs_handle_t *hdl, const char *tofs,
1763     recvflags_t flags, nvlist_t *stream_nv, avl_tree_t *stream_avl,
1764     nvlist_t *renamed)
1765 {
1766         nvlist_t *local_nv;
1767         avl_tree_t *local_avl;
1768         nvpair_t *fselem, *nextfselem;
1769         char *fromsnap;
1770         char newname[ZFS_MAXNAMELEN];
1771         int error;
1772         boolean_t needagain, progress, recursive;
1773         char *s1, *s2;
1774
1775         VERIFY(0 == nvlist_lookup_string(stream_nv, "fromsnap", &fromsnap));
1776
1777         recursive = (nvlist_lookup_boolean(stream_nv, "not_recursive") ==
1778             ENOENT);
1779
1780         if (flags.dryrun)
1781                 return (0);
1782
1783 again:
1784         needagain = progress = B_FALSE;
1785
1786         if ((error = gather_nvlist(hdl, tofs, fromsnap, NULL,
1787             recursive, &local_nv, &local_avl)) != 0)
1788                 return (error);
1789
1790         /*
1791          * Process deletes and renames
1792          */
1793         for (fselem = nvlist_next_nvpair(local_nv, NULL);
1794             fselem; fselem = nextfselem) {
1795                 nvlist_t *nvfs, *snaps;
1796                 nvlist_t *stream_nvfs = NULL;
1797                 nvpair_t *snapelem, *nextsnapelem;
1798                 uint64_t fromguid = 0;
1799                 uint64_t originguid = 0;
1800                 uint64_t stream_originguid = 0;
1801                 uint64_t parent_fromsnap_guid, stream_parent_fromsnap_guid;
1802                 char *fsname, *stream_fsname;
1803
1804                 nextfselem = nvlist_next_nvpair(local_nv, fselem);
1805
1806                 VERIFY(0 == nvpair_value_nvlist(fselem, &nvfs));
1807                 VERIFY(0 == nvlist_lookup_nvlist(nvfs, "snaps", &snaps));
1808                 VERIFY(0 == nvlist_lookup_string(nvfs, "name", &fsname));
1809                 VERIFY(0 == nvlist_lookup_uint64(nvfs, "parentfromsnap",
1810                     &parent_fromsnap_guid));
1811                 (void) nvlist_lookup_uint64(nvfs, "origin", &originguid);
1812
1813                 /*
1814                  * First find the stream's fs, so we can check for
1815                  * a different origin (due to "zfs promote")
1816                  */
1817                 for (snapelem = nvlist_next_nvpair(snaps, NULL);
1818                     snapelem; snapelem = nvlist_next_nvpair(snaps, snapelem)) {
1819                         uint64_t thisguid;
1820
1821                         VERIFY(0 == nvpair_value_uint64(snapelem, &thisguid));
1822                         stream_nvfs = fsavl_find(stream_avl, thisguid, NULL);
1823
1824                         if (stream_nvfs != NULL)
1825                                 break;
1826                 }
1827
1828                 /* check for promote */
1829                 (void) nvlist_lookup_uint64(stream_nvfs, "origin",
1830                     &stream_originguid);
1831                 if (stream_nvfs && originguid != stream_originguid) {
1832                         switch (created_before(hdl, local_avl,
1833                             stream_originguid, originguid)) {
1834                         case 1: {
1835                                 /* promote it! */
1836                                 zfs_cmd_t zc = { 0 };
1837                                 nvlist_t *origin_nvfs;
1838                                 char *origin_fsname;
1839
1840                                 if (flags.verbose)
1841                                         (void) printf("promoting %s\n", fsname);
1842
1843                                 origin_nvfs = fsavl_find(local_avl, originguid,
1844                                     NULL);
1845                                 VERIFY(0 == nvlist_lookup_string(origin_nvfs,
1846                                     "name", &origin_fsname));
1847                                 (void) strlcpy(zc.zc_value, origin_fsname,
1848                                     sizeof (zc.zc_value));
1849                                 (void) strlcpy(zc.zc_name, fsname,
1850                                     sizeof (zc.zc_name));
1851                                 error = zfs_ioctl(hdl, ZFS_IOC_PROMOTE, &zc);
1852                                 if (error == 0)
1853                                         progress = B_TRUE;
1854                                 break;
1855                         }
1856                         default:
1857                                 break;
1858                         case -1:
1859                                 fsavl_destroy(local_avl);
1860                                 nvlist_free(local_nv);
1861                                 return (-1);
1862                         }
1863                         /*
1864                          * We had/have the wrong origin, therefore our
1865                          * list of snapshots is wrong.  Need to handle
1866                          * them on the next pass.
1867                          */
1868                         needagain = B_TRUE;
1869                         continue;
1870                 }
1871
1872                 for (snapelem = nvlist_next_nvpair(snaps, NULL);
1873                     snapelem; snapelem = nextsnapelem) {
1874                         uint64_t thisguid;
1875                         char *stream_snapname;
1876                         nvlist_t *found, *props;
1877
1878                         nextsnapelem = nvlist_next_nvpair(snaps, snapelem);
1879
1880                         VERIFY(0 == nvpair_value_uint64(snapelem, &thisguid));
1881                         found = fsavl_find(stream_avl, thisguid,
1882                             &stream_snapname);
1883
1884                         /* check for delete */
1885                         if (found == NULL) {
1886                                 char name[ZFS_MAXNAMELEN];
1887
1888                                 if (!flags.force)
1889                                         continue;
1890
1891                                 (void) snprintf(name, sizeof (name), "%s@%s",
1892                                     fsname, nvpair_name(snapelem));
1893
1894                                 error = recv_destroy(hdl, name,
1895                                     strlen(fsname)+1, newname, flags);
1896                                 if (error)
1897                                         needagain = B_TRUE;
1898                                 else
1899                                         progress = B_TRUE;
1900                                 continue;
1901                         }
1902
1903                         stream_nvfs = found;
1904
1905                         if (0 == nvlist_lookup_nvlist(stream_nvfs, "snapprops",
1906                             &props) && 0 == nvlist_lookup_nvlist(props,
1907                             stream_snapname, &props)) {
1908                                 zfs_cmd_t zc = { 0 };
1909
1910                                 zc.zc_cookie = B_TRUE; /* received */
1911                                 (void) snprintf(zc.zc_name, sizeof (zc.zc_name),
1912                                     "%s@%s", fsname, nvpair_name(snapelem));
1913                                 if (zcmd_write_src_nvlist(hdl, &zc,
1914                                     props) == 0) {
1915                                         (void) zfs_ioctl(hdl,
1916                                             ZFS_IOC_SET_PROP, &zc);
1917                                         zcmd_free_nvlists(&zc);
1918                                 }
1919                         }
1920
1921                         /* check for different snapname */
1922                         if (strcmp(nvpair_name(snapelem),
1923                             stream_snapname) != 0) {
1924                                 char name[ZFS_MAXNAMELEN];
1925                                 char tryname[ZFS_MAXNAMELEN];
1926
1927                                 (void) snprintf(name, sizeof (name), "%s@%s",
1928                                     fsname, nvpair_name(snapelem));
1929                                 (void) snprintf(tryname, sizeof (name), "%s@%s",
1930                                     fsname, stream_snapname);
1931
1932                                 error = recv_rename(hdl, name, tryname,
1933                                     strlen(fsname)+1, newname, flags);
1934                                 if (error)
1935                                         needagain = B_TRUE;
1936                                 else
1937                                         progress = B_TRUE;
1938                         }
1939
1940                         if (strcmp(stream_snapname, fromsnap) == 0)
1941                                 fromguid = thisguid;
1942                 }
1943
1944                 /* check for delete */
1945                 if (stream_nvfs == NULL) {
1946                         if (!flags.force)
1947                                 continue;
1948
1949                         error = recv_destroy(hdl, fsname, strlen(tofs)+1,
1950                             newname, flags);
1951                         if (error)
1952                                 needagain = B_TRUE;
1953                         else
1954                                 progress = B_TRUE;
1955                         continue;
1956                 }
1957
1958                 if (fromguid == 0) {
1959                         if (flags.verbose) {
1960                                 (void) printf("local fs %s does not have "
1961                                     "fromsnap (%s in stream); must have "
1962                                     "been deleted locally; ignoring\n",
1963                                     fsname, fromsnap);
1964                         }
1965                         continue;
1966                 }
1967
1968                 VERIFY(0 == nvlist_lookup_string(stream_nvfs,
1969                     "name", &stream_fsname));
1970                 VERIFY(0 == nvlist_lookup_uint64(stream_nvfs,
1971                     "parentfromsnap", &stream_parent_fromsnap_guid));
1972
1973                 s1 = strrchr(fsname, '/');
1974                 s2 = strrchr(stream_fsname, '/');
1975
1976                 /*
1977                  * Check for rename. If the exact receive path is specified, it
1978                  * does not count as a rename, but we still need to check the
1979                  * datasets beneath it.
1980                  */
1981                 if ((stream_parent_fromsnap_guid != 0 &&
1982                     parent_fromsnap_guid != 0 &&
1983                     stream_parent_fromsnap_guid != parent_fromsnap_guid) ||
1984                     ((flags.isprefix || strcmp(tofs, fsname) != 0) &&
1985                     (s1 != NULL) && (s2 != NULL) && strcmp(s1, s2) != 0)) {
1986                         nvlist_t *parent;
1987                         char tryname[ZFS_MAXNAMELEN];
1988
1989                         parent = fsavl_find(local_avl,
1990                             stream_parent_fromsnap_guid, NULL);
1991                         /*
1992                          * NB: parent might not be found if we used the
1993                          * tosnap for stream_parent_fromsnap_guid,
1994                          * because the parent is a newly-created fs;
1995                          * we'll be able to rename it after we recv the
1996                          * new fs.
1997                          */
1998                         if (parent != NULL) {
1999                                 char *pname;
2000
2001                                 VERIFY(0 == nvlist_lookup_string(parent, "name",
2002                                     &pname));
2003                                 (void) snprintf(tryname, sizeof (tryname),
2004                                     "%s%s", pname, strrchr(stream_fsname, '/'));
2005                         } else {
2006                                 tryname[0] = '\0';
2007                                 if (flags.verbose) {
2008                                         (void) printf("local fs %s new parent "
2009                                             "not found\n", fsname);
2010                                 }
2011                         }
2012
2013                         newname[0] = '\0';
2014
2015                         error = recv_rename(hdl, fsname, tryname,
2016                             strlen(tofs)+1, newname, flags);
2017
2018                         if (renamed != NULL && newname[0] != '\0') {
2019                                 VERIFY(0 == nvlist_add_boolean(renamed,
2020                                     newname));
2021                         }
2022
2023                         if (error)
2024                                 needagain = B_TRUE;
2025                         else
2026                                 progress = B_TRUE;
2027                 }
2028         }
2029
2030         fsavl_destroy(local_avl);
2031         nvlist_free(local_nv);
2032
2033         if (needagain && progress) {
2034                 /* do another pass to fix up temporary names */
2035                 if (flags.verbose)
2036                         (void) printf("another pass:\n");
2037                 goto again;
2038         }
2039
2040         return (needagain);
2041 }
2042
2043 static int
2044 zfs_receive_package(libzfs_handle_t *hdl, int fd, const char *destname,
2045     recvflags_t flags, dmu_replay_record_t *drr, zio_cksum_t *zc,
2046     char **top_zfs, int cleanup_fd, uint64_t *action_handlep)
2047 {
2048         nvlist_t *stream_nv = NULL;
2049         avl_tree_t *stream_avl = NULL;
2050         char *fromsnap = NULL;
2051         char *cp;
2052         char tofs[ZFS_MAXNAMELEN];
2053         char sendfs[ZFS_MAXNAMELEN];
2054         char errbuf[1024];
2055         dmu_replay_record_t drre;
2056         int error;
2057         boolean_t anyerr = B_FALSE;
2058         boolean_t softerr = B_FALSE;
2059         boolean_t recursive;
2060
2061         (void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
2062             "cannot receive"));
2063
2064         assert(drr->drr_type == DRR_BEGIN);
2065         assert(drr->drr_u.drr_begin.drr_magic == DMU_BACKUP_MAGIC);
2066         assert(DMU_GET_STREAM_HDRTYPE(drr->drr_u.drr_begin.drr_versioninfo) ==
2067             DMU_COMPOUNDSTREAM);
2068
2069         /*
2070          * Read in the nvlist from the stream.
2071          */
2072         if (drr->drr_payloadlen != 0) {
2073                 error = recv_read_nvlist(hdl, fd, drr->drr_payloadlen,
2074                     &stream_nv, flags.byteswap, zc);
2075                 if (error) {
2076                         error = zfs_error(hdl, EZFS_BADSTREAM, errbuf);
2077                         goto out;
2078                 }
2079         }
2080
2081         recursive = (nvlist_lookup_boolean(stream_nv, "not_recursive") ==
2082             ENOENT);
2083
2084         if (recursive && strchr(destname, '@')) {
2085                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
2086                     "cannot specify snapshot name for multi-snapshot stream"));
2087                 error = zfs_error(hdl, EZFS_BADSTREAM, errbuf);
2088                 goto out;
2089         }
2090
2091         /*
2092          * Read in the end record and verify checksum.
2093          */
2094         if (0 != (error = recv_read(hdl, fd, &drre, sizeof (drre),
2095             flags.byteswap, NULL)))
2096                 goto out;
2097         if (flags.byteswap) {
2098                 drre.drr_type = BSWAP_32(drre.drr_type);
2099                 drre.drr_u.drr_end.drr_checksum.zc_word[0] =
2100                     BSWAP_64(drre.drr_u.drr_end.drr_checksum.zc_word[0]);
2101                 drre.drr_u.drr_end.drr_checksum.zc_word[1] =
2102                     BSWAP_64(drre.drr_u.drr_end.drr_checksum.zc_word[1]);
2103                 drre.drr_u.drr_end.drr_checksum.zc_word[2] =
2104                     BSWAP_64(drre.drr_u.drr_end.drr_checksum.zc_word[2]);
2105                 drre.drr_u.drr_end.drr_checksum.zc_word[3] =
2106                     BSWAP_64(drre.drr_u.drr_end.drr_checksum.zc_word[3]);
2107         }
2108         if (drre.drr_type != DRR_END) {
2109                 error = zfs_error(hdl, EZFS_BADSTREAM, errbuf);
2110                 goto out;
2111         }
2112         if (!ZIO_CHECKSUM_EQUAL(drre.drr_u.drr_end.drr_checksum, *zc)) {
2113                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
2114                     "incorrect header checksum"));
2115                 error = zfs_error(hdl, EZFS_BADSTREAM, errbuf);
2116                 goto out;
2117         }
2118
2119         (void) nvlist_lookup_string(stream_nv, "fromsnap", &fromsnap);
2120
2121         if (drr->drr_payloadlen != 0) {
2122                 nvlist_t *stream_fss;
2123
2124                 VERIFY(0 == nvlist_lookup_nvlist(stream_nv, "fss",
2125                     &stream_fss));
2126                 if ((stream_avl = fsavl_create(stream_fss)) == NULL) {
2127                         zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
2128                             "couldn't allocate avl tree"));
2129                         error = zfs_error(hdl, EZFS_NOMEM, errbuf);
2130                         goto out;
2131                 }
2132
2133                 if (fromsnap != NULL) {
2134                         nvlist_t *renamed = NULL;
2135                         nvpair_t *pair = NULL;
2136
2137                         (void) strlcpy(tofs, destname, ZFS_MAXNAMELEN);
2138                         if (flags.isprefix) {
2139                                 struct drr_begin *drrb = &drr->drr_u.drr_begin;
2140                                 int i;
2141
2142                                 if (flags.istail) {
2143                                         cp = strrchr(drrb->drr_toname, '/');
2144                                         if (cp == NULL) {
2145                                                 (void) strlcat(tofs, "/",
2146                                                     ZFS_MAXNAMELEN);
2147                                                 i = 0;
2148                                         } else {
2149                                                 i = (cp - drrb->drr_toname);
2150                                         }
2151                                 } else {
2152                                         i = strcspn(drrb->drr_toname, "/@");
2153                                 }
2154                                 /* zfs_receive_one() will create_parents() */
2155                                 (void) strlcat(tofs, &drrb->drr_toname[i],
2156                                     ZFS_MAXNAMELEN);
2157                                 *strchr(tofs, '@') = '\0';
2158                         }
2159
2160                         if (recursive && !flags.dryrun && !flags.nomount) {
2161                                 VERIFY(0 == nvlist_alloc(&renamed,
2162                                     NV_UNIQUE_NAME, 0));
2163                         }
2164
2165                         softerr = recv_incremental_replication(hdl, tofs, flags,
2166                             stream_nv, stream_avl, renamed);
2167
2168                         /* Unmount renamed filesystems before receiving. */
2169                         while ((pair = nvlist_next_nvpair(renamed,
2170                             pair)) != NULL) {
2171                                 zfs_handle_t *zhp;
2172                                 prop_changelist_t *clp = NULL;
2173
2174                                 zhp = zfs_open(hdl, nvpair_name(pair),
2175                                     ZFS_TYPE_FILESYSTEM);
2176                                 if (zhp != NULL) {
2177                                         clp = changelist_gather(zhp,
2178                                             ZFS_PROP_MOUNTPOINT, 0, 0);
2179                                         zfs_close(zhp);
2180                                         if (clp != NULL) {
2181                                                 softerr |=
2182                                                     changelist_prefix(clp);
2183                                                 changelist_free(clp);
2184                                         }
2185                                 }
2186                         }
2187
2188                         nvlist_free(renamed);
2189                 }
2190         }
2191
2192         /*
2193          * Get the fs specified by the first path in the stream (the top level
2194          * specified by 'zfs send') and pass it to each invocation of
2195          * zfs_receive_one().
2196          */
2197         (void) strlcpy(sendfs, drr->drr_u.drr_begin.drr_toname,
2198             ZFS_MAXNAMELEN);
2199         if ((cp = strchr(sendfs, '@')) != NULL)
2200                 *cp = '\0';
2201
2202         /* Finally, receive each contained stream */
2203         do {
2204                 /*
2205                  * we should figure out if it has a recoverable
2206                  * error, in which case do a recv_skip() and drive on.
2207                  * Note, if we fail due to already having this guid,
2208                  * zfs_receive_one() will take care of it (ie,
2209                  * recv_skip() and return 0).
2210                  */
2211                 error = zfs_receive_impl(hdl, destname, flags, fd,
2212                     sendfs, stream_nv, stream_avl, top_zfs, cleanup_fd,
2213                     action_handlep);
2214                 if (error == ENODATA) {
2215                         error = 0;
2216                         break;
2217                 }
2218                 anyerr |= error;
2219         } while (error == 0);
2220
2221         if (drr->drr_payloadlen != 0 && fromsnap != NULL) {
2222                 /*
2223                  * Now that we have the fs's they sent us, try the
2224                  * renames again.
2225                  */
2226                 softerr = recv_incremental_replication(hdl, tofs, flags,
2227                     stream_nv, stream_avl, NULL);
2228         }
2229
2230 out:
2231         fsavl_destroy(stream_avl);
2232         if (stream_nv)
2233                 nvlist_free(stream_nv);
2234         if (softerr)
2235                 error = -2;
2236         if (anyerr)
2237                 error = -1;
2238         return (error);
2239 }
2240
2241 static void
2242 trunc_prop_errs(int truncated)
2243 {
2244         ASSERT(truncated != 0);
2245
2246         if (truncated == 1)
2247                 (void) fprintf(stderr, dgettext(TEXT_DOMAIN,
2248                     "1 more property could not be set\n"));
2249         else
2250                 (void) fprintf(stderr, dgettext(TEXT_DOMAIN,
2251                     "%d more properties could not be set\n"), truncated);
2252 }
2253
2254 static int
2255 recv_skip(libzfs_handle_t *hdl, int fd, boolean_t byteswap)
2256 {
2257         dmu_replay_record_t *drr;
2258         void *buf = malloc(1<<20);
2259         char errbuf[1024];
2260
2261         (void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
2262             "cannot receive:"));
2263
2264         /* XXX would be great to use lseek if possible... */
2265         drr = buf;
2266
2267         while (recv_read(hdl, fd, drr, sizeof (dmu_replay_record_t),
2268             byteswap, NULL) == 0) {
2269                 if (byteswap)
2270                         drr->drr_type = BSWAP_32(drr->drr_type);
2271
2272                 switch (drr->drr_type) {
2273                 case DRR_BEGIN:
2274                         /* NB: not to be used on v2 stream packages */
2275                         if (drr->drr_payloadlen != 0) {
2276                                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
2277                                     "invalid substream header"));
2278                                 return (zfs_error(hdl, EZFS_BADSTREAM, errbuf));
2279                         }
2280                         break;
2281
2282                 case DRR_END:
2283                         free(buf);
2284                         return (0);
2285
2286                 case DRR_OBJECT:
2287                         if (byteswap) {
2288                                 drr->drr_u.drr_object.drr_bonuslen =
2289                                     BSWAP_32(drr->drr_u.drr_object.
2290                                     drr_bonuslen);
2291                         }
2292                         (void) recv_read(hdl, fd, buf,
2293                             P2ROUNDUP(drr->drr_u.drr_object.drr_bonuslen, 8),
2294                             B_FALSE, NULL);
2295                         break;
2296
2297                 case DRR_WRITE:
2298                         if (byteswap) {
2299                                 drr->drr_u.drr_write.drr_length =
2300                                     BSWAP_64(drr->drr_u.drr_write.drr_length);
2301                         }
2302                         (void) recv_read(hdl, fd, buf,
2303                             drr->drr_u.drr_write.drr_length, B_FALSE, NULL);
2304                         break;
2305                 case DRR_SPILL:
2306                         if (byteswap) {
2307                                 drr->drr_u.drr_write.drr_length =
2308                                     BSWAP_64(drr->drr_u.drr_spill.drr_length);
2309                         }
2310                         (void) recv_read(hdl, fd, buf,
2311                             drr->drr_u.drr_spill.drr_length, B_FALSE, NULL);
2312                         break;
2313                 case DRR_WRITE_BYREF:
2314                 case DRR_FREEOBJECTS:
2315                 case DRR_FREE:
2316                         break;
2317
2318                 default:
2319                         zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
2320                             "invalid record type"));
2321                         return (zfs_error(hdl, EZFS_BADSTREAM, errbuf));
2322                 }
2323         }
2324
2325         free(buf);
2326         return (-1);
2327 }
2328
2329 /*
2330  * Restores a backup of tosnap from the file descriptor specified by infd.
2331  */
2332 static int
2333 zfs_receive_one(libzfs_handle_t *hdl, int infd, const char *tosnap,
2334     recvflags_t flags, dmu_replay_record_t *drr,
2335     dmu_replay_record_t *drr_noswap, const char *sendfs,
2336     nvlist_t *stream_nv, avl_tree_t *stream_avl, char **top_zfs, int cleanup_fd,
2337     uint64_t *action_handlep)
2338 {
2339         zfs_cmd_t zc = { 0 };
2340         time_t begin_time;
2341         int ioctl_err, ioctl_errno, err;
2342         char *cp;
2343         struct drr_begin *drrb = &drr->drr_u.drr_begin;
2344         char errbuf[1024];
2345         char prop_errbuf[1024];
2346         const char *chopprefix;
2347         boolean_t newfs = B_FALSE;
2348         boolean_t stream_wantsnewfs;
2349         uint64_t parent_snapguid = 0;
2350         prop_changelist_t *clp = NULL;
2351         nvlist_t *snapprops_nvlist = NULL;
2352         zprop_errflags_t prop_errflags;
2353         boolean_t recursive;
2354
2355         begin_time = time(NULL);
2356
2357         (void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
2358             "cannot receive"));
2359
2360         recursive = (nvlist_lookup_boolean(stream_nv, "not_recursive") ==
2361             ENOENT);
2362
2363         if (stream_avl != NULL) {
2364                 char *snapname;
2365                 nvlist_t *fs = fsavl_find(stream_avl, drrb->drr_toguid,
2366                     &snapname);
2367                 nvlist_t *props;
2368                 int ret;
2369
2370                 (void) nvlist_lookup_uint64(fs, "parentfromsnap",
2371                     &parent_snapguid);
2372                 err = nvlist_lookup_nvlist(fs, "props", &props);
2373                 if (err)
2374                         VERIFY(0 == nvlist_alloc(&props, NV_UNIQUE_NAME, 0));
2375
2376                 if (flags.canmountoff) {
2377                         VERIFY(0 == nvlist_add_uint64(props,
2378                             zfs_prop_to_name(ZFS_PROP_CANMOUNT), 0));
2379                 }
2380                 ret = zcmd_write_src_nvlist(hdl, &zc, props);
2381                 if (err)
2382                         nvlist_free(props);
2383
2384                 if (0 == nvlist_lookup_nvlist(fs, "snapprops", &props)) {
2385                         VERIFY(0 == nvlist_lookup_nvlist(props,
2386                             snapname, &snapprops_nvlist));
2387                 }
2388
2389                 if (ret != 0)
2390                         return (-1);
2391         }
2392
2393         cp = NULL;
2394
2395         /*
2396          * Determine how much of the snapshot name stored in the stream
2397          * we are going to tack on to the name they specified on the
2398          * command line, and how much we are going to chop off.
2399          *
2400          * If they specified a snapshot, chop the entire name stored in
2401          * the stream.
2402          */
2403         if (flags.istail) {
2404                 /*
2405                  * A filesystem was specified with -e. We want to tack on only
2406                  * the tail of the sent snapshot path.
2407                  */
2408                 if (strchr(tosnap, '@')) {
2409                         zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "invalid "
2410                             "argument - snapshot not allowed with -e"));
2411                         return (zfs_error(hdl, EZFS_INVALIDNAME, errbuf));
2412                 }
2413
2414                 chopprefix = strrchr(sendfs, '/');
2415
2416                 if (chopprefix == NULL) {
2417                         /*
2418                          * The tail is the poolname, so we need to
2419                          * prepend a path separator.
2420                          */
2421                         int len = strlen(drrb->drr_toname);
2422                         cp = malloc(len + 2);
2423                         cp[0] = '/';
2424                         (void) strcpy(&cp[1], drrb->drr_toname);
2425                         chopprefix = cp;
2426                 } else {
2427                         chopprefix = drrb->drr_toname + (chopprefix - sendfs);
2428                 }
2429         } else if (flags.isprefix) {
2430                 /*
2431                  * A filesystem was specified with -d. We want to tack on
2432                  * everything but the first element of the sent snapshot path
2433                  * (all but the pool name).
2434                  */
2435                 if (strchr(tosnap, '@')) {
2436                         zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "invalid "
2437                             "argument - snapshot not allowed with -d"));
2438                         return (zfs_error(hdl, EZFS_INVALIDNAME, errbuf));
2439                 }
2440
2441                 chopprefix = strchr(drrb->drr_toname, '/');
2442                 if (chopprefix == NULL)
2443                         chopprefix = strchr(drrb->drr_toname, '@');
2444         } else if (strchr(tosnap, '@') == NULL) {
2445                 /*
2446                  * If a filesystem was specified without -d or -e, we want to
2447                  * tack on everything after the fs specified by 'zfs send'.
2448                  */
2449                 chopprefix = drrb->drr_toname + strlen(sendfs);
2450         } else {
2451                 /* A snapshot was specified as an exact path (no -d or -e). */
2452                 if (recursive) {
2453                         zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
2454                             "cannot specify snapshot name for multi-snapshot "
2455                             "stream"));
2456                         return (zfs_error(hdl, EZFS_BADSTREAM, errbuf));
2457                 }
2458                 chopprefix = drrb->drr_toname + strlen(drrb->drr_toname);
2459         }
2460
2461         ASSERT(strstr(drrb->drr_toname, sendfs) == drrb->drr_toname);
2462         ASSERT(chopprefix > drrb->drr_toname);
2463         ASSERT(chopprefix <= drrb->drr_toname + strlen(drrb->drr_toname));
2464         ASSERT(chopprefix[0] == '/' || chopprefix[0] == '@' ||
2465             chopprefix[0] == '\0');
2466
2467         /*
2468          * Determine name of destination snapshot, store in zc_value.
2469          */
2470         (void) strcpy(zc.zc_top_ds, tosnap);
2471         (void) strcpy(zc.zc_value, tosnap);
2472         (void) strncat(zc.zc_value, chopprefix, sizeof (zc.zc_value));
2473         free(cp);
2474         if (!zfs_name_valid(zc.zc_value, ZFS_TYPE_SNAPSHOT)) {
2475                 zcmd_free_nvlists(&zc);
2476                 return (zfs_error(hdl, EZFS_INVALIDNAME, errbuf));
2477         }
2478
2479         /*
2480          * Determine the name of the origin snapshot, store in zc_string.
2481          */
2482         if (drrb->drr_flags & DRR_FLAG_CLONE) {
2483                 if (guid_to_name(hdl, tosnap,
2484                     drrb->drr_fromguid, zc.zc_string) != 0) {
2485                         zcmd_free_nvlists(&zc);
2486                         zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
2487                             "local origin for clone %s does not exist"),
2488                             zc.zc_value);
2489                         return (zfs_error(hdl, EZFS_NOENT, errbuf));
2490                 }
2491                 if (flags.verbose)
2492                         (void) printf("found clone origin %s\n", zc.zc_string);
2493         }
2494
2495         stream_wantsnewfs = (drrb->drr_fromguid == 0 ||
2496             (drrb->drr_flags & DRR_FLAG_CLONE));
2497
2498         if (stream_wantsnewfs) {
2499                 /*
2500                  * if the parent fs does not exist, look for it based on
2501                  * the parent snap GUID
2502                  */
2503                 (void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
2504                     "cannot receive new filesystem stream"));
2505
2506                 (void) strcpy(zc.zc_name, zc.zc_value);
2507                 cp = strrchr(zc.zc_name, '/');
2508                 if (cp)
2509                         *cp = '\0';
2510                 if (cp &&
2511                     !zfs_dataset_exists(hdl, zc.zc_name, ZFS_TYPE_DATASET)) {
2512                         char suffix[ZFS_MAXNAMELEN];
2513                         (void) strcpy(suffix, strrchr(zc.zc_value, '/'));
2514                         if (guid_to_name(hdl, tosnap, parent_snapguid,
2515                             zc.zc_value) == 0) {
2516                                 *strchr(zc.zc_value, '@') = '\0';
2517                                 (void) strcat(zc.zc_value, suffix);
2518                         }
2519                 }
2520         } else {
2521                 /*
2522                  * if the fs does not exist, look for it based on the
2523                  * fromsnap GUID
2524                  */
2525                 (void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
2526                     "cannot receive incremental stream"));
2527
2528                 (void) strcpy(zc.zc_name, zc.zc_value);
2529                 *strchr(zc.zc_name, '@') = '\0';
2530
2531                 /*
2532                  * If the exact receive path was specified and this is the
2533                  * topmost path in the stream, then if the fs does not exist we
2534                  * should look no further.
2535                  */
2536                 if ((flags.isprefix || (*(chopprefix = drrb->drr_toname +
2537                     strlen(sendfs)) != '\0' && *chopprefix != '@')) &&
2538                     !zfs_dataset_exists(hdl, zc.zc_name, ZFS_TYPE_DATASET)) {
2539                         char snap[ZFS_MAXNAMELEN];
2540                         (void) strcpy(snap, strchr(zc.zc_value, '@'));
2541                         if (guid_to_name(hdl, tosnap, drrb->drr_fromguid,
2542                             zc.zc_value) == 0) {
2543                                 *strchr(zc.zc_value, '@') = '\0';
2544                                 (void) strcat(zc.zc_value, snap);
2545                         }
2546                 }
2547         }
2548
2549         (void) strcpy(zc.zc_name, zc.zc_value);
2550         *strchr(zc.zc_name, '@') = '\0';
2551
2552         if (zfs_dataset_exists(hdl, zc.zc_name, ZFS_TYPE_DATASET)) {
2553                 zfs_handle_t *zhp;
2554
2555                 /*
2556                  * Destination fs exists.  Therefore this should either
2557                  * be an incremental, or the stream specifies a new fs
2558                  * (full stream or clone) and they want us to blow it
2559                  * away (and have therefore specified -F and removed any
2560                  * snapshots).
2561                  */
2562                 if (stream_wantsnewfs) {
2563                         if (!flags.force) {
2564                                 zcmd_free_nvlists(&zc);
2565                                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
2566                                     "destination '%s' exists\n"
2567                                     "must specify -F to overwrite it"),
2568                                     zc.zc_name);
2569                                 return (zfs_error(hdl, EZFS_EXISTS, errbuf));
2570                         }
2571                         if (ioctl(hdl->libzfs_fd, ZFS_IOC_SNAPSHOT_LIST_NEXT,
2572                             &zc) == 0) {
2573                                 zcmd_free_nvlists(&zc);
2574                                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
2575                                     "destination has snapshots (eg. %s)\n"
2576                                     "must destroy them to overwrite it"),
2577                                     zc.zc_name);
2578                                 return (zfs_error(hdl, EZFS_EXISTS, errbuf));
2579                         }
2580                 }
2581
2582                 if ((zhp = zfs_open(hdl, zc.zc_name,
2583                     ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME)) == NULL) {
2584                         zcmd_free_nvlists(&zc);
2585                         return (-1);
2586                 }
2587
2588                 if (stream_wantsnewfs &&
2589                     zhp->zfs_dmustats.dds_origin[0]) {
2590                         zcmd_free_nvlists(&zc);
2591                         zfs_close(zhp);
2592                         zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
2593                             "destination '%s' is a clone\n"
2594                             "must destroy it to overwrite it"),
2595                             zc.zc_name);
2596                         return (zfs_error(hdl, EZFS_EXISTS, errbuf));
2597                 }
2598
2599                 if (!flags.dryrun && zhp->zfs_type == ZFS_TYPE_FILESYSTEM &&
2600                     stream_wantsnewfs) {
2601                         /* We can't do online recv in this case */
2602                         clp = changelist_gather(zhp, ZFS_PROP_NAME, 0, 0);
2603                         if (clp == NULL) {
2604                                 zfs_close(zhp);
2605                                 zcmd_free_nvlists(&zc);
2606                                 return (-1);
2607                         }
2608                         if (changelist_prefix(clp) != 0) {
2609                                 changelist_free(clp);
2610                                 zfs_close(zhp);
2611                                 zcmd_free_nvlists(&zc);
2612                                 return (-1);
2613                         }
2614                 }
2615                 zfs_close(zhp);
2616         } else {
2617                 /*
2618                  * Destination filesystem does not exist.  Therefore we better
2619                  * be creating a new filesystem (either from a full backup, or
2620                  * a clone).  It would therefore be invalid if the user
2621                  * specified only the pool name (i.e. if the destination name
2622                  * contained no slash character).
2623                  */
2624                 if (!stream_wantsnewfs ||
2625                     (cp = strrchr(zc.zc_name, '/')) == NULL) {
2626                         zcmd_free_nvlists(&zc);
2627                         zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
2628                             "destination '%s' does not exist"), zc.zc_name);
2629                         return (zfs_error(hdl, EZFS_NOENT, errbuf));
2630                 }
2631
2632                 /*
2633                  * Trim off the final dataset component so we perform the
2634                  * recvbackup ioctl to the filesystems's parent.
2635                  */
2636                 *cp = '\0';
2637
2638                 if (flags.isprefix && !flags.istail && !flags.dryrun &&
2639                     create_parents(hdl, zc.zc_value, strlen(tosnap)) != 0) {
2640                         zcmd_free_nvlists(&zc);
2641                         return (zfs_error(hdl, EZFS_BADRESTORE, errbuf));
2642                 }
2643
2644                 newfs = B_TRUE;
2645         }
2646
2647         zc.zc_begin_record = drr_noswap->drr_u.drr_begin;
2648         zc.zc_cookie = infd;
2649         zc.zc_guid = flags.force;
2650         if (flags.verbose) {
2651                 (void) printf("%s %s stream of %s into %s\n",
2652                     flags.dryrun ? "would receive" : "receiving",
2653                     drrb->drr_fromguid ? "incremental" : "full",
2654                     drrb->drr_toname, zc.zc_value);
2655                 (void) fflush(stdout);
2656         }
2657
2658         if (flags.dryrun) {
2659                 zcmd_free_nvlists(&zc);
2660                 return (recv_skip(hdl, infd, flags.byteswap));
2661         }
2662
2663         zc.zc_nvlist_dst = (uint64_t)(uintptr_t)prop_errbuf;
2664         zc.zc_nvlist_dst_size = sizeof (prop_errbuf);
2665         zc.zc_cleanup_fd = cleanup_fd;
2666         zc.zc_action_handle = *action_handlep;
2667
2668         err = ioctl_err = zfs_ioctl(hdl, ZFS_IOC_RECV, &zc);
2669         ioctl_errno = errno;
2670         prop_errflags = (zprop_errflags_t)zc.zc_obj;
2671
2672         if (err == 0) {
2673                 nvlist_t *prop_errors;
2674                 VERIFY(0 == nvlist_unpack((void *)(uintptr_t)zc.zc_nvlist_dst,
2675                     zc.zc_nvlist_dst_size, &prop_errors, 0));
2676
2677                 nvpair_t *prop_err = NULL;
2678
2679                 while ((prop_err = nvlist_next_nvpair(prop_errors,
2680                     prop_err)) != NULL) {
2681                         char tbuf[1024];
2682                         zfs_prop_t prop;
2683                         int intval;
2684
2685                         prop = zfs_name_to_prop(nvpair_name(prop_err));
2686                         (void) nvpair_value_int32(prop_err, &intval);
2687                         if (strcmp(nvpair_name(prop_err),
2688                             ZPROP_N_MORE_ERRORS) == 0) {
2689                                 trunc_prop_errs(intval);
2690                                 break;
2691                         } else {
2692                                 (void) snprintf(tbuf, sizeof (tbuf),
2693                                     dgettext(TEXT_DOMAIN,
2694                                     "cannot receive %s property on %s"),
2695                                     nvpair_name(prop_err), zc.zc_name);
2696                                 zfs_setprop_error(hdl, prop, intval, tbuf);
2697                         }
2698                 }
2699                 nvlist_free(prop_errors);
2700         }
2701
2702         zc.zc_nvlist_dst = 0;
2703         zc.zc_nvlist_dst_size = 0;
2704         zcmd_free_nvlists(&zc);
2705
2706         if (err == 0 && snapprops_nvlist) {
2707                 zfs_cmd_t zc2 = { 0 };
2708
2709                 (void) strcpy(zc2.zc_name, zc.zc_value);
2710                 zc2.zc_cookie = B_TRUE; /* received */
2711                 if (zcmd_write_src_nvlist(hdl, &zc2, snapprops_nvlist) == 0) {
2712                         (void) zfs_ioctl(hdl, ZFS_IOC_SET_PROP, &zc2);
2713                         zcmd_free_nvlists(&zc2);
2714                 }
2715         }
2716
2717         if (err && (ioctl_errno == ENOENT || ioctl_errno == EEXIST)) {
2718                 /*
2719                  * It may be that this snapshot already exists,
2720                  * in which case we want to consume & ignore it
2721                  * rather than failing.
2722                  */
2723                 avl_tree_t *local_avl;
2724                 nvlist_t *local_nv, *fs;
2725                 cp = strchr(zc.zc_value, '@');
2726
2727                 /*
2728                  * XXX Do this faster by just iterating over snaps in
2729                  * this fs.  Also if zc_value does not exist, we will
2730                  * get a strange "does not exist" error message.
2731                  */
2732                 *cp = '\0';
2733                 if (gather_nvlist(hdl, zc.zc_value, NULL, NULL, B_FALSE,
2734                     &local_nv, &local_avl) == 0) {
2735                         *cp = '@';
2736                         fs = fsavl_find(local_avl, drrb->drr_toguid, NULL);
2737                         fsavl_destroy(local_avl);
2738                         nvlist_free(local_nv);
2739
2740                         if (fs != NULL) {
2741                                 if (flags.verbose) {
2742                                         (void) printf("snap %s already exists; "
2743                                             "ignoring\n", zc.zc_value);
2744                                 }
2745                                 err = ioctl_err = recv_skip(hdl, infd,
2746                                     flags.byteswap);
2747                         }
2748                 }
2749                 *cp = '@';
2750         }
2751
2752         if (ioctl_err != 0) {
2753                 switch (ioctl_errno) {
2754                 case ENODEV:
2755                         cp = strchr(zc.zc_value, '@');
2756                         *cp = '\0';
2757                         zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
2758                             "most recent snapshot of %s does not\n"
2759                             "match incremental source"), zc.zc_value);
2760                         (void) zfs_error(hdl, EZFS_BADRESTORE, errbuf);
2761                         *cp = '@';
2762                         break;
2763                 case ETXTBSY:
2764                         zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
2765                             "destination %s has been modified\n"
2766                             "since most recent snapshot"), zc.zc_name);
2767                         (void) zfs_error(hdl, EZFS_BADRESTORE, errbuf);
2768                         break;
2769                 case EEXIST:
2770                         cp = strchr(zc.zc_value, '@');
2771                         if (newfs) {
2772                                 /* it's the containing fs that exists */
2773                                 *cp = '\0';
2774                         }
2775                         zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
2776                             "destination already exists"));
2777                         (void) zfs_error_fmt(hdl, EZFS_EXISTS,
2778                             dgettext(TEXT_DOMAIN, "cannot restore to %s"),
2779                             zc.zc_value);
2780                         *cp = '@';
2781                         break;
2782                 case EINVAL:
2783                         (void) zfs_error(hdl, EZFS_BADSTREAM, errbuf);
2784                         break;
2785                 case ECKSUM:
2786                         zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
2787                             "invalid stream (checksum mismatch)"));
2788                         (void) zfs_error(hdl, EZFS_BADSTREAM, errbuf);
2789                         break;
2790                 case ENOTSUP:
2791                         zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
2792                             "pool must be upgraded to receive this stream."));
2793                         (void) zfs_error(hdl, EZFS_BADVERSION, errbuf);
2794                         break;
2795                 case EDQUOT:
2796                         zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
2797                             "destination %s space quota exceeded"), zc.zc_name);
2798                         (void) zfs_error(hdl, EZFS_BADRESTORE, errbuf);
2799                         break;
2800                 default:
2801                         (void) zfs_standard_error(hdl, ioctl_errno, errbuf);
2802                 }
2803         }
2804
2805         /*
2806          * Mount the target filesystem (if created).  Also mount any
2807          * children of the target filesystem if we did a replication
2808          * receive (indicated by stream_avl being non-NULL).
2809          */
2810         cp = strchr(zc.zc_value, '@');
2811         if (cp && (ioctl_err == 0 || !newfs)) {
2812                 zfs_handle_t *h;
2813
2814                 *cp = '\0';
2815                 h = zfs_open(hdl, zc.zc_value,
2816                     ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME);
2817                 if (h != NULL) {
2818                         if (h->zfs_type == ZFS_TYPE_VOLUME) {
2819                                 *cp = '@';
2820                         } else if (newfs || stream_avl) {
2821                                 /*
2822                                  * Track the first/top of hierarchy fs,
2823                                  * for mounting and sharing later.
2824                                  */
2825                                 if (top_zfs && *top_zfs == NULL)
2826                                         *top_zfs = zfs_strdup(hdl, zc.zc_value);
2827                         }
2828                         zfs_close(h);
2829                 }
2830                 *cp = '@';
2831         }
2832
2833         if (clp) {
2834                 err |= changelist_postfix(clp);
2835                 changelist_free(clp);
2836         }
2837
2838         if (prop_errflags & ZPROP_ERR_NOCLEAR) {
2839                 (void) fprintf(stderr, dgettext(TEXT_DOMAIN, "Warning: "
2840                     "failed to clear unreceived properties on %s"),
2841                     zc.zc_name);
2842                 (void) fprintf(stderr, "\n");
2843         }
2844         if (prop_errflags & ZPROP_ERR_NORESTORE) {
2845                 (void) fprintf(stderr, dgettext(TEXT_DOMAIN, "Warning: "
2846                     "failed to restore original properties on %s"),
2847                     zc.zc_name);
2848                 (void) fprintf(stderr, "\n");
2849         }
2850
2851         if (err || ioctl_err)
2852                 return (-1);
2853
2854         *action_handlep = zc.zc_action_handle;
2855
2856         if (flags.verbose) {
2857                 char buf1[64];
2858                 char buf2[64];
2859                 uint64_t bytes = zc.zc_cookie;
2860                 time_t delta = time(NULL) - begin_time;
2861                 if (delta == 0)
2862                         delta = 1;
2863                 zfs_nicenum(bytes, buf1, sizeof (buf1));
2864                 zfs_nicenum(bytes/delta, buf2, sizeof (buf1));
2865
2866                 (void) printf("received %sB stream in %lu seconds (%sB/sec)\n",
2867                     buf1, delta, buf2);
2868         }
2869
2870         return (0);
2871 }
2872
2873 static int
2874 zfs_receive_impl(libzfs_handle_t *hdl, const char *tosnap, recvflags_t flags,
2875     int infd, const char *sendfs, nvlist_t *stream_nv, avl_tree_t *stream_avl,
2876     char **top_zfs, int cleanup_fd, uint64_t *action_handlep)
2877 {
2878         int err;
2879         dmu_replay_record_t drr, drr_noswap;
2880         struct drr_begin *drrb = &drr.drr_u.drr_begin;
2881         char errbuf[1024];
2882         zio_cksum_t zcksum = { 0 };
2883         uint64_t featureflags;
2884         int hdrtype;
2885
2886         (void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
2887             "cannot receive"));
2888
2889         if (flags.isprefix &&
2890             !zfs_dataset_exists(hdl, tosnap, ZFS_TYPE_DATASET)) {
2891                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "specified fs "
2892                     "(%s) does not exist"), tosnap);
2893                 return (zfs_error(hdl, EZFS_NOENT, errbuf));
2894         }
2895
2896         /* read in the BEGIN record */
2897         if (0 != (err = recv_read(hdl, infd, &drr, sizeof (drr), B_FALSE,
2898             &zcksum)))
2899                 return (err);
2900
2901         if (drr.drr_type == DRR_END || drr.drr_type == BSWAP_32(DRR_END)) {
2902                 /* It's the double end record at the end of a package */
2903                 return (ENODATA);
2904         }
2905
2906         /* the kernel needs the non-byteswapped begin record */
2907         drr_noswap = drr;
2908
2909         flags.byteswap = B_FALSE;
2910         if (drrb->drr_magic == BSWAP_64(DMU_BACKUP_MAGIC)) {
2911                 /*
2912                  * We computed the checksum in the wrong byteorder in
2913                  * recv_read() above; do it again correctly.
2914                  */
2915                 bzero(&zcksum, sizeof (zio_cksum_t));
2916                 fletcher_4_incremental_byteswap(&drr, sizeof (drr), &zcksum);
2917                 flags.byteswap = B_TRUE;
2918
2919                 drr.drr_type = BSWAP_32(drr.drr_type);
2920                 drr.drr_payloadlen = BSWAP_32(drr.drr_payloadlen);
2921                 drrb->drr_magic = BSWAP_64(drrb->drr_magic);
2922                 drrb->drr_versioninfo = BSWAP_64(drrb->drr_versioninfo);
2923                 drrb->drr_creation_time = BSWAP_64(drrb->drr_creation_time);
2924                 drrb->drr_type = BSWAP_32(drrb->drr_type);
2925                 drrb->drr_flags = BSWAP_32(drrb->drr_flags);
2926                 drrb->drr_toguid = BSWAP_64(drrb->drr_toguid);
2927                 drrb->drr_fromguid = BSWAP_64(drrb->drr_fromguid);
2928         }
2929
2930         if (drrb->drr_magic != DMU_BACKUP_MAGIC || drr.drr_type != DRR_BEGIN) {
2931                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "invalid "
2932                     "stream (bad magic number)"));
2933                 return (zfs_error(hdl, EZFS_BADSTREAM, errbuf));
2934         }
2935
2936         featureflags = DMU_GET_FEATUREFLAGS(drrb->drr_versioninfo);
2937         hdrtype = DMU_GET_STREAM_HDRTYPE(drrb->drr_versioninfo);
2938
2939         if (!DMU_STREAM_SUPPORTED(featureflags) ||
2940             (hdrtype != DMU_SUBSTREAM && hdrtype != DMU_COMPOUNDSTREAM)) {
2941                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
2942                     "stream has unsupported feature, feature flags = %lx"),
2943                     featureflags);
2944                 return (zfs_error(hdl, EZFS_BADSTREAM, errbuf));
2945         }
2946
2947         if (strchr(drrb->drr_toname, '@') == NULL) {
2948                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "invalid "
2949                     "stream (bad snapshot name)"));
2950                 return (zfs_error(hdl, EZFS_BADSTREAM, errbuf));
2951         }
2952
2953         if (DMU_GET_STREAM_HDRTYPE(drrb->drr_versioninfo) == DMU_SUBSTREAM) {
2954                 char nonpackage_sendfs[ZFS_MAXNAMELEN];
2955                 if (sendfs == NULL) {
2956                         /*
2957                          * We were not called from zfs_receive_package(). Get
2958                          * the fs specified by 'zfs send'.
2959                          */
2960                         char *cp;
2961                         (void) strlcpy(nonpackage_sendfs,
2962                             drr.drr_u.drr_begin.drr_toname, ZFS_MAXNAMELEN);
2963                         if ((cp = strchr(nonpackage_sendfs, '@')) != NULL)
2964                                 *cp = '\0';
2965                         sendfs = nonpackage_sendfs;
2966                 }
2967                 return (zfs_receive_one(hdl, infd, tosnap, flags,
2968                     &drr, &drr_noswap, sendfs, stream_nv, stream_avl,
2969                     top_zfs, cleanup_fd, action_handlep));
2970         } else {
2971                 assert(DMU_GET_STREAM_HDRTYPE(drrb->drr_versioninfo) ==
2972                     DMU_COMPOUNDSTREAM);
2973                 return (zfs_receive_package(hdl, infd, tosnap, flags,
2974                     &drr, &zcksum, top_zfs, cleanup_fd, action_handlep));
2975         }
2976 }
2977
2978 /*
2979  * Restores a backup of tosnap from the file descriptor specified by infd.
2980  * Return 0 on total success, -2 if some things couldn't be
2981  * destroyed/renamed/promoted, -1 if some things couldn't be received.
2982  * (-1 will override -2).
2983  */
2984 int
2985 zfs_receive(libzfs_handle_t *hdl, const char *tosnap, recvflags_t flags,
2986     int infd, avl_tree_t *stream_avl)
2987 {
2988         char *top_zfs = NULL;
2989         int err;
2990         int cleanup_fd;
2991         uint64_t action_handle = 0;
2992
2993         cleanup_fd = open(ZFS_DEV, O_RDWR|O_EXCL);
2994         VERIFY(cleanup_fd >= 0);
2995
2996         err = zfs_receive_impl(hdl, tosnap, flags, infd, NULL, NULL,
2997             stream_avl, &top_zfs, cleanup_fd, &action_handle);
2998
2999         VERIFY(0 == close(cleanup_fd));
3000
3001         if (err == 0 && !flags.nomount && top_zfs) {
3002                 zfs_handle_t *zhp;
3003                 prop_changelist_t *clp;
3004
3005                 zhp = zfs_open(hdl, top_zfs, ZFS_TYPE_FILESYSTEM);
3006                 if (zhp != NULL) {
3007                         clp = changelist_gather(zhp, ZFS_PROP_MOUNTPOINT,
3008                             CL_GATHER_MOUNT_ALWAYS, 0);
3009                         zfs_close(zhp);
3010                         if (clp != NULL) {
3011                                 /* mount and share received datasets */
3012                                 err = changelist_postfix(clp);
3013                                 changelist_free(clp);
3014                         }
3015                 }
3016                 if (zhp == NULL || clp == NULL || err)
3017                         err = -1;
3018         }
3019         if (top_zfs)
3020                 free(top_zfs);
3021
3022         return (err);
3023 }