1 /* dump.c --- writing filesystem contents into a portable 'dumpfile' format.
3 * ====================================================================
4 * Licensed to the Apache Software Foundation (ASF) under one
5 * or more contributor license agreements. See the NOTICE file
6 * distributed with this work for additional information
7 * regarding copyright ownership. The ASF licenses this file
8 * to you under the Apache License, Version 2.0 (the
9 * "License"); you may not use this file except in compliance
10 * with the License. You may obtain a copy of the License at
12 * http://www.apache.org/licenses/LICENSE-2.0
14 * Unless required by applicable law or agreed to in writing,
15 * software distributed under the License is distributed on an
16 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
17 * KIND, either express or implied. See the License for the
18 * specific language governing permissions and limitations
20 * ====================================================================
24 #include "svn_private_config.h"
25 #include "svn_pools.h"
26 #include "svn_error.h"
30 #include "svn_repos.h"
31 #include "svn_string.h"
32 #include "svn_dirent_uri.h"
35 #include "svn_checksum.h"
36 #include "svn_props.h"
37 #include "svn_sorts.h"
39 #include "private/svn_mergeinfo_private.h"
40 #include "private/svn_fs_private.h"
42 #define ARE_VALID_COPY_ARGS(p,r) ((p) && SVN_IS_VALID_REVNUM(r))
44 /*----------------------------------------------------------------------*/
48 /* Compute the delta between OLDROOT/OLDPATH and NEWROOT/NEWPATH and
49 store it into a new temporary file *TEMPFILE. OLDROOT may be NULL,
50 in which case the delta will be computed against an empty file, as
51 per the svn_fs_get_file_delta_stream docstring. Record the length
52 of the temporary file in *LEN, and rewind the file before
55 store_delta(apr_file_t **tempfile, svn_filesize_t *len,
56 svn_fs_root_t *oldroot, const char *oldpath,
57 svn_fs_root_t *newroot, const char *newpath, apr_pool_t *pool)
59 svn_stream_t *temp_stream;
61 svn_txdelta_stream_t *delta_stream;
62 svn_txdelta_window_handler_t wh;
65 /* Create a temporary file and open a stream to it. Note that we need
66 the file handle in order to rewind it. */
67 SVN_ERR(svn_io_open_unique_file3(tempfile, NULL, NULL,
68 svn_io_file_del_on_pool_cleanup,
70 temp_stream = svn_stream_from_aprfile2(*tempfile, TRUE, pool);
72 /* Compute the delta and send it to the temporary file. */
73 SVN_ERR(svn_fs_get_file_delta_stream(&delta_stream, oldroot, oldpath,
74 newroot, newpath, pool));
75 svn_txdelta_to_svndiff3(&wh, &whb, temp_stream, 0,
76 SVN_DELTA_COMPRESSION_LEVEL_DEFAULT, pool);
77 SVN_ERR(svn_txdelta_send_txstream(delta_stream, wh, whb, pool));
79 /* Get the length of the temporary file and rewind it. */
80 SVN_ERR(svn_io_file_seek(*tempfile, APR_CUR, &offset, pool));
83 return svn_io_file_seek(*tempfile, APR_SET, &offset, pool);
87 /*----------------------------------------------------------------------*/
89 /** An editor which dumps node-data in 'dumpfile format' to a file. **/
91 /* Look, mom! No file batons! */
95 /* The relpath which implicitly prepends all full paths coming into
96 this editor. This will almost always be "". */
99 /* The stream to dump to. */
100 svn_stream_t *stream;
102 /* Send feedback here, if non-NULL */
103 svn_repos_notify_func_t notify_func;
106 /* The fs revision root, so we can read the contents of paths. */
107 svn_fs_root_t *fs_root;
108 svn_revnum_t current_rev;
110 /* The fs, so we can grab historic information if needed. */
113 /* True if dumped nodes should output deltas instead of full text. */
114 svn_boolean_t use_deltas;
116 /* True if this "dump" is in fact a verify. */
117 svn_boolean_t verify;
119 /* The first revision dumped in this dumpstream. */
120 svn_revnum_t oldest_dumped_rev;
122 /* If not NULL, set to true if any references to revisions older than
123 OLDEST_DUMPED_REV were found in the dumpstream. */
124 svn_boolean_t *found_old_reference;
126 /* If not NULL, set to true if any mergeinfo was dumped which contains
127 revisions older than OLDEST_DUMPED_REV. */
128 svn_boolean_t *found_old_mergeinfo;
130 /* reusable buffer for writing file contents */
131 char buffer[SVN__STREAM_CHUNK_SIZE];
137 struct edit_baton *edit_baton;
138 struct dir_baton *parent_dir_baton;
140 /* is this directory a new addition to this revision? */
143 /* has this directory been written to the output stream? */
144 svn_boolean_t written_out;
146 /* the repository relpath associated with this directory */
149 /* The comparison repository relpath and revision of this directory.
150 If both of these are valid, use them as a source against which to
151 compare the directory instead of the default comparison source of
152 PATH in the previous revision. */
153 const char *cmp_path;
154 svn_revnum_t cmp_rev;
156 /* hash of paths that need to be deleted, though some -might- be
157 replaced. maps const char * paths to this dir_baton. (they're
158 full paths, because that's what the editor driver gives us. but
159 really, they're all within this directory.) */
160 apr_hash_t *deleted_entries;
162 /* pool to be used for deleting the hash items */
167 /* Make a directory baton to represent the directory was path
168 (relative to EDIT_BATON's path) is PATH.
170 CMP_PATH/CMP_REV are the path/revision against which this directory
171 should be compared for changes. If either is omitted (NULL for the
172 path, SVN_INVALID_REVNUM for the rev), just compare this directory
173 PATH against itself in the previous revision.
175 PARENT_DIR_BATON is the directory baton of this directory's parent,
176 or NULL if this is the top-level directory of the edit. ADDED
177 indicated if this directory is newly added in this revision.
178 Perform all allocations in POOL. */
179 static struct dir_baton *
180 make_dir_baton(const char *path,
181 const char *cmp_path,
182 svn_revnum_t cmp_rev,
184 void *parent_dir_baton,
188 struct edit_baton *eb = edit_baton;
189 struct dir_baton *pb = parent_dir_baton;
190 struct dir_baton *new_db = apr_pcalloc(pool, sizeof(*new_db));
191 const char *full_path;
193 /* A path relative to nothing? I don't think so. */
194 SVN_ERR_ASSERT_NO_RETURN(!path || pb);
196 /* Construct the full path of this node. */
198 full_path = svn_relpath_join(eb->path, path, pool);
200 full_path = apr_pstrdup(pool, eb->path);
202 /* Remove leading slashes from copyfrom paths. */
204 cmp_path = svn_relpath_canonicalize(cmp_path, pool);
206 new_db->edit_baton = eb;
207 new_db->parent_dir_baton = pb;
208 new_db->path = full_path;
209 new_db->cmp_path = cmp_path;
210 new_db->cmp_rev = cmp_rev;
211 new_db->added = added;
212 new_db->written_out = FALSE;
213 new_db->deleted_entries = apr_hash_make(pool);
220 /* If the mergeinfo in MERGEINFO_STR refers to any revisions older than
221 * OLDEST_DUMPED_REV, issue a warning and set *FOUND_OLD_MERGEINFO to TRUE,
222 * otherwise leave *FOUND_OLD_MERGEINFO unchanged.
225 verify_mergeinfo_revisions(svn_boolean_t *found_old_mergeinfo,
226 const char *mergeinfo_str,
227 svn_revnum_t oldest_dumped_rev,
228 svn_repos_notify_func_t notify_func,
232 svn_mergeinfo_t mergeinfo, old_mergeinfo;
234 SVN_ERR(svn_mergeinfo_parse(&mergeinfo, mergeinfo_str, pool));
235 SVN_ERR(svn_mergeinfo__filter_mergeinfo_by_ranges(
236 &old_mergeinfo, mergeinfo,
237 oldest_dumped_rev - 1, 0,
240 if (apr_hash_count(old_mergeinfo))
242 svn_repos_notify_t *notify =
243 svn_repos_notify_create(svn_repos_notify_warning, pool);
245 notify->warning = svn_repos_notify_warning_found_old_mergeinfo;
246 notify->warning_str = apr_psprintf(
248 _("Mergeinfo referencing revision(s) prior "
249 "to the oldest dumped revision (r%ld). "
250 "Loading this dump may result in invalid "
254 if (found_old_mergeinfo)
255 *found_old_mergeinfo = TRUE;
256 notify_func(notify_baton, notify, pool);
263 /* This helper is the main "meat" of the editor -- it does all the
264 work of writing a node record.
266 Write out a node record for PATH of type KIND under EB->FS_ROOT.
267 ACTION describes what is happening to the node (see enum svn_node_action).
268 Write record to writable EB->STREAM, using EB->BUFFER to write in chunks.
270 If the node was itself copied, IS_COPY is TRUE and the
271 path/revision of the copy source are in CMP_PATH/CMP_REV. If
272 IS_COPY is FALSE, yet CMP_PATH/CMP_REV are valid, this node is part
276 dump_node(struct edit_baton *eb,
278 svn_node_kind_t kind,
279 enum svn_node_action action,
280 svn_boolean_t is_copy,
281 const char *cmp_path,
282 svn_revnum_t cmp_rev,
285 svn_stringbuf_t *propstring;
286 svn_filesize_t content_length = 0;
288 svn_boolean_t must_dump_text = FALSE, must_dump_props = FALSE;
289 const char *compare_path = path;
290 svn_revnum_t compare_rev = eb->current_rev - 1;
291 svn_fs_root_t *compare_root = NULL;
292 apr_file_t *delta_file = NULL;
294 /* Maybe validate the path. */
295 if (eb->verify || eb->notify_func)
297 svn_error_t *err = svn_fs__path_valid(path, pool);
303 char errbuf[512]; /* ### svn_strerror() magic number */
304 svn_repos_notify_t *notify;
305 notify = svn_repos_notify_create(svn_repos_notify_warning, pool);
307 notify->warning = svn_repos_notify_warning_invalid_fspath;
308 notify->warning_str = apr_psprintf(
310 _("E%06d: While validating fspath '%s': %s"),
312 svn_err_best_message(err, errbuf, sizeof(errbuf)));
314 eb->notify_func(eb->notify_baton, notify, pool);
317 /* Return the error in addition to notifying about it. */
319 return svn_error_trace(err);
321 svn_error_clear(err);
325 /* Write out metadata headers for this file node. */
326 SVN_ERR(svn_stream_printf(eb->stream, pool,
327 SVN_REPOS_DUMPFILE_NODE_PATH ": %s\n",
329 if (kind == svn_node_file)
330 SVN_ERR(svn_stream_puts(eb->stream,
331 SVN_REPOS_DUMPFILE_NODE_KIND ": file\n"));
332 else if (kind == svn_node_dir)
333 SVN_ERR(svn_stream_puts(eb->stream,
334 SVN_REPOS_DUMPFILE_NODE_KIND ": dir\n"));
336 /* Remove leading slashes from copyfrom paths. */
338 cmp_path = svn_relpath_canonicalize(cmp_path, pool);
340 /* Validate the comparison path/rev. */
341 if (ARE_VALID_COPY_ARGS(cmp_path, cmp_rev))
343 compare_path = cmp_path;
344 compare_rev = cmp_rev;
347 if (action == svn_node_action_change)
349 SVN_ERR(svn_stream_puts(eb->stream,
350 SVN_REPOS_DUMPFILE_NODE_ACTION ": change\n"));
352 /* either the text or props changed, or possibly both. */
353 SVN_ERR(svn_fs_revision_root(&compare_root,
354 svn_fs_root_fs(eb->fs_root),
357 SVN_ERR(svn_fs_props_changed(&must_dump_props,
358 compare_root, compare_path,
359 eb->fs_root, path, pool));
360 if (kind == svn_node_file)
361 SVN_ERR(svn_fs_contents_changed(&must_dump_text,
362 compare_root, compare_path,
363 eb->fs_root, path, pool));
365 else if (action == svn_node_action_replace)
369 /* a simple delete+add, implied by a single 'replace' action. */
370 SVN_ERR(svn_stream_puts(eb->stream,
371 SVN_REPOS_DUMPFILE_NODE_ACTION
374 /* definitely need to dump all content for a replace. */
375 if (kind == svn_node_file)
376 must_dump_text = TRUE;
377 must_dump_props = TRUE;
381 /* more complex: delete original, then add-with-history. */
383 /* the path & kind headers have already been printed; just
384 add a delete action, and end the current record.*/
385 SVN_ERR(svn_stream_puts(eb->stream,
386 SVN_REPOS_DUMPFILE_NODE_ACTION
389 /* recurse: print an additional add-with-history record. */
390 SVN_ERR(dump_node(eb, path, kind, svn_node_action_add,
391 is_copy, compare_path, compare_rev, pool));
393 /* we can leave this routine quietly now, don't need to dump
394 any content; that was already done in the second record. */
395 must_dump_text = FALSE;
396 must_dump_props = FALSE;
399 else if (action == svn_node_action_delete)
401 SVN_ERR(svn_stream_puts(eb->stream,
402 SVN_REPOS_DUMPFILE_NODE_ACTION ": delete\n"));
404 /* we can leave this routine quietly now, don't need to dump
406 must_dump_text = FALSE;
407 must_dump_props = FALSE;
409 else if (action == svn_node_action_add)
411 SVN_ERR(svn_stream_puts(eb->stream,
412 SVN_REPOS_DUMPFILE_NODE_ACTION ": add\n"));
416 /* Dump all contents for a simple 'add'. */
417 if (kind == svn_node_file)
418 must_dump_text = TRUE;
419 must_dump_props = TRUE;
423 if (!eb->verify && cmp_rev < eb->oldest_dumped_rev
426 svn_repos_notify_t *notify =
427 svn_repos_notify_create(svn_repos_notify_warning, pool);
429 notify->warning = svn_repos_notify_warning_found_old_reference;
430 notify->warning_str = apr_psprintf(
432 _("Referencing data in revision %ld,"
433 " which is older than the oldest"
434 " dumped revision (r%ld). Loading this dump"
435 " into an empty repository"
437 cmp_rev, eb->oldest_dumped_rev);
438 if (eb->found_old_reference)
439 *eb->found_old_reference = TRUE;
440 eb->notify_func(eb->notify_baton, notify, pool);
443 SVN_ERR(svn_stream_printf(eb->stream, pool,
444 SVN_REPOS_DUMPFILE_NODE_COPYFROM_REV
446 SVN_REPOS_DUMPFILE_NODE_COPYFROM_PATH
450 SVN_ERR(svn_fs_revision_root(&compare_root,
451 svn_fs_root_fs(eb->fs_root),
454 /* Need to decide if the copied node had any extra textual or
455 property mods as well. */
456 SVN_ERR(svn_fs_props_changed(&must_dump_props,
457 compare_root, compare_path,
458 eb->fs_root, path, pool));
459 if (kind == svn_node_file)
461 svn_checksum_t *checksum;
462 const char *hex_digest;
463 SVN_ERR(svn_fs_contents_changed(&must_dump_text,
464 compare_root, compare_path,
465 eb->fs_root, path, pool));
467 SVN_ERR(svn_fs_file_checksum(&checksum, svn_checksum_md5,
468 compare_root, compare_path,
470 hex_digest = svn_checksum_to_cstring(checksum, pool);
472 SVN_ERR(svn_stream_printf(eb->stream, pool,
473 SVN_REPOS_DUMPFILE_TEXT_COPY_SOURCE_MD5
474 ": %s\n", hex_digest));
476 SVN_ERR(svn_fs_file_checksum(&checksum, svn_checksum_sha1,
477 compare_root, compare_path,
479 hex_digest = svn_checksum_to_cstring(checksum, pool);
481 SVN_ERR(svn_stream_printf(eb->stream, pool,
482 SVN_REPOS_DUMPFILE_TEXT_COPY_SOURCE_SHA1
483 ": %s\n", hex_digest));
488 if ((! must_dump_text) && (! must_dump_props))
490 /* If we're not supposed to dump text or props, so be it, we can
491 just go home. However, if either one needs to be dumped,
492 then our dumpstream format demands that at a *minimum*, we
493 see a lone "PROPS-END" as a divider between text and props
494 content within the content-block. */
496 return svn_stream_write(eb->stream, "\n\n", &len); /* ### needed? */
499 /*** Start prepping content to dump... ***/
501 /* If we are supposed to dump properties, write out a property
502 length header and generate a stringbuf that contains those
503 property values here. */
506 apr_hash_t *prophash, *oldhash = NULL;
508 svn_stream_t *propstream;
510 SVN_ERR(svn_fs_node_proplist(&prophash, eb->fs_root, path, pool));
512 /* If this is a partial dump, then issue a warning if we dump mergeinfo
513 properties that refer to revisions older than the first revision
515 if (!eb->verify && eb->notify_func && eb->oldest_dumped_rev > 1)
517 svn_string_t *mergeinfo_str = svn_hash_gets(prophash,
521 /* An error in verifying the mergeinfo must not prevent dumping
522 the data. Ignore any such error. */
523 svn_error_clear(verify_mergeinfo_revisions(
524 eb->found_old_mergeinfo,
525 mergeinfo_str->data, eb->oldest_dumped_rev,
526 eb->notify_func, eb->notify_baton,
531 if (eb->use_deltas && compare_root)
533 /* Fetch the old property hash to diff against and output a header
534 saying that our property contents are a delta. */
535 SVN_ERR(svn_fs_node_proplist(&oldhash, compare_root, compare_path,
537 SVN_ERR(svn_stream_puts(eb->stream,
538 SVN_REPOS_DUMPFILE_PROP_DELTA ": true\n"));
541 oldhash = apr_hash_make(pool);
542 propstring = svn_stringbuf_create_ensure(0, pool);
543 propstream = svn_stream_from_stringbuf(propstring, pool);
544 SVN_ERR(svn_hash_write_incremental(prophash, oldhash, propstream,
546 SVN_ERR(svn_stream_close(propstream));
547 proplen = propstring->len;
548 content_length += proplen;
549 SVN_ERR(svn_stream_printf(eb->stream, pool,
550 SVN_REPOS_DUMPFILE_PROP_CONTENT_LENGTH
551 ": %" APR_SIZE_T_FMT "\n", proplen));
554 /* If we are supposed to dump text, write out a text length header
555 here, and an MD5 checksum (if available). */
556 if (must_dump_text && (kind == svn_node_file))
558 svn_checksum_t *checksum;
559 const char *hex_digest;
560 svn_filesize_t textlen;
564 /* Compute the text delta now and write it into a temporary
565 file, so that we can find its length. Output a header
566 saying our text contents are a delta. */
567 SVN_ERR(store_delta(&delta_file, &textlen, compare_root,
568 compare_path, eb->fs_root, path, pool));
569 SVN_ERR(svn_stream_puts(eb->stream,
570 SVN_REPOS_DUMPFILE_TEXT_DELTA ": true\n"));
574 SVN_ERR(svn_fs_file_checksum(&checksum, svn_checksum_md5,
575 compare_root, compare_path,
577 hex_digest = svn_checksum_to_cstring(checksum, pool);
579 SVN_ERR(svn_stream_printf(eb->stream, pool,
580 SVN_REPOS_DUMPFILE_TEXT_DELTA_BASE_MD5
581 ": %s\n", hex_digest));
583 SVN_ERR(svn_fs_file_checksum(&checksum, svn_checksum_sha1,
584 compare_root, compare_path,
586 hex_digest = svn_checksum_to_cstring(checksum, pool);
588 SVN_ERR(svn_stream_printf(eb->stream, pool,
589 SVN_REPOS_DUMPFILE_TEXT_DELTA_BASE_SHA1
590 ": %s\n", hex_digest));
595 /* Just fetch the length of the file. */
596 SVN_ERR(svn_fs_file_length(&textlen, eb->fs_root, path, pool));
599 content_length += textlen;
600 SVN_ERR(svn_stream_printf(eb->stream, pool,
601 SVN_REPOS_DUMPFILE_TEXT_CONTENT_LENGTH
602 ": %" SVN_FILESIZE_T_FMT "\n", textlen));
604 SVN_ERR(svn_fs_file_checksum(&checksum, svn_checksum_md5,
605 eb->fs_root, path, FALSE, pool));
606 hex_digest = svn_checksum_to_cstring(checksum, pool);
608 SVN_ERR(svn_stream_printf(eb->stream, pool,
609 SVN_REPOS_DUMPFILE_TEXT_CONTENT_MD5
610 ": %s\n", hex_digest));
612 SVN_ERR(svn_fs_file_checksum(&checksum, svn_checksum_sha1,
613 eb->fs_root, path, FALSE, pool));
614 hex_digest = svn_checksum_to_cstring(checksum, pool);
616 SVN_ERR(svn_stream_printf(eb->stream, pool,
617 SVN_REPOS_DUMPFILE_TEXT_CONTENT_SHA1
618 ": %s\n", hex_digest));
621 /* 'Content-length:' is the last header before we dump the content,
622 and is the sum of the text and prop contents lengths. We write
623 this only for the benefit of non-Subversion RFC-822 parsers. */
624 SVN_ERR(svn_stream_printf(eb->stream, pool,
625 SVN_REPOS_DUMPFILE_CONTENT_LENGTH
626 ": %" SVN_FILESIZE_T_FMT "\n\n",
629 /* Dump property content if we're supposed to do so. */
632 len = propstring->len;
633 SVN_ERR(svn_stream_write(eb->stream, propstring->data, &len));
636 /* Dump text content */
637 if (must_dump_text && (kind == svn_node_file))
639 svn_stream_t *contents;
643 /* Make sure to close the underlying file when the stream is
645 contents = svn_stream_from_aprfile2(delta_file, FALSE, pool);
648 SVN_ERR(svn_fs_file_contents(&contents, eb->fs_root, path, pool));
650 SVN_ERR(svn_stream_copy3(contents, svn_stream_disown(eb->stream, pool),
655 return svn_stream_write(eb->stream, "\n\n", &len); /* ### needed? */
660 open_root(void *edit_baton,
661 svn_revnum_t base_revision,
665 *root_baton = make_dir_baton(NULL, NULL, SVN_INVALID_REVNUM,
666 edit_baton, NULL, FALSE, pool);
672 delete_entry(const char *path,
673 svn_revnum_t revision,
677 struct dir_baton *pb = parent_baton;
678 const char *mypath = apr_pstrdup(pb->pool, path);
680 /* remember this path needs to be deleted. */
681 svn_hash_sets(pb->deleted_entries, mypath, pb);
688 add_directory(const char *path,
690 const char *copyfrom_path,
691 svn_revnum_t copyfrom_rev,
695 struct dir_baton *pb = parent_baton;
696 struct edit_baton *eb = pb->edit_baton;
698 svn_boolean_t is_copy = FALSE;
699 struct dir_baton *new_db
700 = make_dir_baton(path, copyfrom_path, copyfrom_rev, eb, pb, TRUE, pool);
702 /* This might be a replacement -- is the path already deleted? */
703 val = svn_hash_gets(pb->deleted_entries, path);
705 /* Detect an add-with-history. */
706 is_copy = ARE_VALID_COPY_ARGS(copyfrom_path, copyfrom_rev);
709 SVN_ERR(dump_node(eb, path,
711 val ? svn_node_action_replace : svn_node_action_add,
713 is_copy ? copyfrom_path : NULL,
714 is_copy ? copyfrom_rev : SVN_INVALID_REVNUM,
718 /* Delete the path, it's now been dumped. */
719 svn_hash_sets(pb->deleted_entries, path, NULL);
721 new_db->written_out = TRUE;
723 *child_baton = new_db;
729 open_directory(const char *path,
731 svn_revnum_t base_revision,
735 struct dir_baton *pb = parent_baton;
736 struct edit_baton *eb = pb->edit_baton;
737 struct dir_baton *new_db;
738 const char *cmp_path = NULL;
739 svn_revnum_t cmp_rev = SVN_INVALID_REVNUM;
741 /* If the parent directory has explicit comparison path and rev,
742 record the same for this one. */
743 if (ARE_VALID_COPY_ARGS(pb->cmp_path, pb->cmp_rev))
745 cmp_path = svn_relpath_join(pb->cmp_path,
746 svn_relpath_basename(path, pool), pool);
747 cmp_rev = pb->cmp_rev;
750 new_db = make_dir_baton(path, cmp_path, cmp_rev, eb, pb, FALSE, pool);
751 *child_baton = new_db;
757 close_directory(void *dir_baton,
760 struct dir_baton *db = dir_baton;
761 struct edit_baton *eb = db->edit_baton;
762 apr_pool_t *subpool = svn_pool_create(pool);
764 apr_array_header_t *sorted_entries;
766 /* Sort entries lexically instead of as paths. Even though the entries
767 * are full paths they're all in the same directory (see comment in struct
768 * dir_baton definition). So we really want to sort by basename, in which
769 * case the lexical sort function is more efficient. */
770 sorted_entries = svn_sort__hash(db->deleted_entries,
771 svn_sort_compare_items_lexically, pool);
772 for (i = 0; i < sorted_entries->nelts; i++)
774 const char *path = APR_ARRAY_IDX(sorted_entries, i,
775 svn_sort__item_t).key;
777 svn_pool_clear(subpool);
779 /* By sending 'svn_node_unknown', the Node-kind: header simply won't
780 be written out. No big deal at all, really. The loader
782 SVN_ERR(dump_node(eb, path,
783 svn_node_unknown, svn_node_action_delete,
784 FALSE, NULL, SVN_INVALID_REVNUM, subpool));
787 svn_pool_destroy(subpool);
793 add_file(const char *path,
795 const char *copyfrom_path,
796 svn_revnum_t copyfrom_rev,
800 struct dir_baton *pb = parent_baton;
801 struct edit_baton *eb = pb->edit_baton;
803 svn_boolean_t is_copy = FALSE;
805 /* This might be a replacement -- is the path already deleted? */
806 val = svn_hash_gets(pb->deleted_entries, path);
808 /* Detect add-with-history. */
809 is_copy = ARE_VALID_COPY_ARGS(copyfrom_path, copyfrom_rev);
812 SVN_ERR(dump_node(eb, path,
814 val ? svn_node_action_replace : svn_node_action_add,
816 is_copy ? copyfrom_path : NULL,
817 is_copy ? copyfrom_rev : SVN_INVALID_REVNUM,
821 /* delete the path, it's now been dumped. */
822 svn_hash_sets(pb->deleted_entries, path, NULL);
824 *file_baton = NULL; /* muhahahaha */
830 open_file(const char *path,
832 svn_revnum_t ancestor_revision,
836 struct dir_baton *pb = parent_baton;
837 struct edit_baton *eb = pb->edit_baton;
838 const char *cmp_path = NULL;
839 svn_revnum_t cmp_rev = SVN_INVALID_REVNUM;
841 /* If the parent directory has explicit comparison path and rev,
842 record the same for this one. */
843 if (ARE_VALID_COPY_ARGS(pb->cmp_path, pb->cmp_rev))
845 cmp_path = svn_relpath_join(pb->cmp_path,
846 svn_relpath_basename(path, pool), pool);
847 cmp_rev = pb->cmp_rev;
850 SVN_ERR(dump_node(eb, path,
851 svn_node_file, svn_node_action_change,
852 FALSE, cmp_path, cmp_rev, pool));
854 *file_baton = NULL; /* muhahahaha again */
860 change_dir_prop(void *parent_baton,
862 const svn_string_t *value,
865 struct dir_baton *db = parent_baton;
866 struct edit_baton *eb = db->edit_baton;
868 /* This function is what distinguishes between a directory that is
869 opened to merely get somewhere, vs. one that is opened because it
870 *actually* changed by itself. */
871 if (! db->written_out)
873 SVN_ERR(dump_node(eb, db->path,
874 svn_node_dir, svn_node_action_change,
875 FALSE, db->cmp_path, db->cmp_rev, pool));
876 db->written_out = TRUE;
882 fetch_props_func(apr_hash_t **props,
885 svn_revnum_t base_revision,
886 apr_pool_t *result_pool,
887 apr_pool_t *scratch_pool)
889 struct edit_baton *eb = baton;
891 svn_fs_root_t *fs_root;
893 if (!SVN_IS_VALID_REVNUM(base_revision))
894 base_revision = eb->current_rev - 1;
896 SVN_ERR(svn_fs_revision_root(&fs_root, eb->fs, base_revision, scratch_pool));
898 err = svn_fs_node_proplist(props, fs_root, path, result_pool);
899 if (err && err->apr_err == SVN_ERR_FS_NOT_FOUND)
901 svn_error_clear(err);
902 *props = apr_hash_make(result_pool);
906 return svn_error_trace(err);
912 fetch_kind_func(svn_node_kind_t *kind,
915 svn_revnum_t base_revision,
916 apr_pool_t *scratch_pool)
918 struct edit_baton *eb = baton;
919 svn_fs_root_t *fs_root;
921 if (!SVN_IS_VALID_REVNUM(base_revision))
922 base_revision = eb->current_rev - 1;
924 SVN_ERR(svn_fs_revision_root(&fs_root, eb->fs, base_revision, scratch_pool));
926 SVN_ERR(svn_fs_check_path(kind, fs_root, path, scratch_pool));
932 fetch_base_func(const char **filename,
935 svn_revnum_t base_revision,
936 apr_pool_t *result_pool,
937 apr_pool_t *scratch_pool)
939 struct edit_baton *eb = baton;
940 svn_stream_t *contents;
941 svn_stream_t *file_stream;
942 const char *tmp_filename;
944 svn_fs_root_t *fs_root;
946 if (!SVN_IS_VALID_REVNUM(base_revision))
947 base_revision = eb->current_rev - 1;
949 SVN_ERR(svn_fs_revision_root(&fs_root, eb->fs, base_revision, scratch_pool));
951 err = svn_fs_file_contents(&contents, fs_root, path, scratch_pool);
952 if (err && err->apr_err == SVN_ERR_FS_NOT_FOUND)
954 svn_error_clear(err);
959 return svn_error_trace(err);
960 SVN_ERR(svn_stream_open_unique(&file_stream, &tmp_filename, NULL,
961 svn_io_file_del_on_pool_cleanup,
962 scratch_pool, scratch_pool));
963 SVN_ERR(svn_stream_copy3(contents, file_stream, NULL, NULL, scratch_pool));
965 *filename = apr_pstrdup(result_pool, tmp_filename);
972 get_dump_editor(const svn_delta_editor_t **editor,
976 const char *root_path,
977 svn_stream_t *stream,
978 svn_boolean_t *found_old_reference,
979 svn_boolean_t *found_old_mergeinfo,
980 svn_error_t *(*custom_close_directory)(void *dir_baton,
981 apr_pool_t *scratch_pool),
982 svn_repos_notify_func_t notify_func,
984 svn_revnum_t oldest_dumped_rev,
985 svn_boolean_t use_deltas,
986 svn_boolean_t verify,
989 /* Allocate an edit baton to be stored in every directory baton.
990 Set it up for the directory baton we create here, which is the
992 struct edit_baton *eb = apr_pcalloc(pool, sizeof(*eb));
993 svn_delta_editor_t *dump_editor = svn_delta_default_editor(pool);
994 svn_delta_shim_callbacks_t *shim_callbacks =
995 svn_delta_shim_callbacks_default(pool);
997 /* Set up the edit baton. */
999 eb->notify_func = notify_func;
1000 eb->notify_baton = notify_baton;
1001 eb->oldest_dumped_rev = oldest_dumped_rev;
1002 eb->bufsize = sizeof(eb->buffer);
1003 eb->path = apr_pstrdup(pool, root_path);
1004 SVN_ERR(svn_fs_revision_root(&(eb->fs_root), fs, to_rev, pool));
1006 eb->current_rev = to_rev;
1007 eb->use_deltas = use_deltas;
1008 eb->verify = verify;
1009 eb->found_old_reference = found_old_reference;
1010 eb->found_old_mergeinfo = found_old_mergeinfo;
1012 /* Set up the editor. */
1013 dump_editor->open_root = open_root;
1014 dump_editor->delete_entry = delete_entry;
1015 dump_editor->add_directory = add_directory;
1016 dump_editor->open_directory = open_directory;
1017 if (custom_close_directory)
1018 dump_editor->close_directory = custom_close_directory;
1020 dump_editor->close_directory = close_directory;
1021 dump_editor->change_dir_prop = change_dir_prop;
1022 dump_editor->add_file = add_file;
1023 dump_editor->open_file = open_file;
1026 *editor = dump_editor;
1028 shim_callbacks->fetch_kind_func = fetch_kind_func;
1029 shim_callbacks->fetch_props_func = fetch_props_func;
1030 shim_callbacks->fetch_base_func = fetch_base_func;
1031 shim_callbacks->fetch_baton = eb;
1033 SVN_ERR(svn_editor__insert_shims(editor, edit_baton, *editor, *edit_baton,
1034 NULL, NULL, shim_callbacks, pool, pool));
1036 return SVN_NO_ERROR;
1039 /*----------------------------------------------------------------------*/
1041 /** The main dumping routine, svn_repos_dump_fs. **/
1044 /* Helper for svn_repos_dump_fs.
1046 Write a revision record of REV in FS to writable STREAM, using POOL.
1048 static svn_error_t *
1049 write_revision_record(svn_stream_t *stream,
1056 svn_stringbuf_t *encoded_prophash;
1057 apr_time_t timetemp;
1058 svn_string_t *datevalue;
1059 svn_stream_t *propstream;
1061 /* Read the revision props even if we're aren't going to dump
1062 them for verification purposes */
1063 SVN_ERR(svn_fs_revision_proplist(&props, fs, rev, pool));
1065 /* Run revision date properties through the time conversion to
1066 canonicalize them. */
1067 /* ### Remove this when it is no longer needed for sure. */
1068 datevalue = svn_hash_gets(props, SVN_PROP_REVISION_DATE);
1071 SVN_ERR(svn_time_from_cstring(&timetemp, datevalue->data, pool));
1072 datevalue = svn_string_create(svn_time_to_cstring(timetemp, pool),
1074 svn_hash_sets(props, SVN_PROP_REVISION_DATE, datevalue);
1077 encoded_prophash = svn_stringbuf_create_ensure(0, pool);
1078 propstream = svn_stream_from_stringbuf(encoded_prophash, pool);
1079 SVN_ERR(svn_hash_write2(props, propstream, "PROPS-END", pool));
1080 SVN_ERR(svn_stream_close(propstream));
1082 /* ### someday write a revision-content-checksum */
1084 SVN_ERR(svn_stream_printf(stream, pool,
1085 SVN_REPOS_DUMPFILE_REVISION_NUMBER
1087 SVN_ERR(svn_stream_printf(stream, pool,
1088 SVN_REPOS_DUMPFILE_PROP_CONTENT_LENGTH
1089 ": %" APR_SIZE_T_FMT "\n",
1090 encoded_prophash->len));
1092 /* Write out a regular Content-length header for the benefit of
1093 non-Subversion RFC-822 parsers. */
1094 SVN_ERR(svn_stream_printf(stream, pool,
1095 SVN_REPOS_DUMPFILE_CONTENT_LENGTH
1096 ": %" APR_SIZE_T_FMT "\n\n",
1097 encoded_prophash->len));
1099 len = encoded_prophash->len;
1100 SVN_ERR(svn_stream_write(stream, encoded_prophash->data, &len));
1103 return svn_stream_write(stream, "\n", &len);
1108 /* The main dumper. */
1110 svn_repos_dump_fs3(svn_repos_t *repos,
1111 svn_stream_t *stream,
1112 svn_revnum_t start_rev,
1113 svn_revnum_t end_rev,
1114 svn_boolean_t incremental,
1115 svn_boolean_t use_deltas,
1116 svn_repos_notify_func_t notify_func,
1118 svn_cancel_func_t cancel_func,
1122 const svn_delta_editor_t *dump_editor;
1123 void *dump_edit_baton = NULL;
1125 svn_fs_t *fs = svn_repos_fs(repos);
1126 apr_pool_t *subpool = svn_pool_create(pool);
1127 svn_revnum_t youngest;
1130 svn_boolean_t found_old_reference = FALSE;
1131 svn_boolean_t found_old_mergeinfo = FALSE;
1132 svn_repos_notify_t *notify;
1134 /* Determine the current youngest revision of the filesystem. */
1135 SVN_ERR(svn_fs_youngest_rev(&youngest, fs, pool));
1137 /* Use default vals if necessary. */
1138 if (! SVN_IS_VALID_REVNUM(start_rev))
1140 if (! SVN_IS_VALID_REVNUM(end_rev))
1143 stream = svn_stream_empty(pool);
1145 /* Validate the revisions. */
1146 if (start_rev > end_rev)
1147 return svn_error_createf(SVN_ERR_REPOS_BAD_ARGS, NULL,
1148 _("Start revision %ld"
1149 " is greater than end revision %ld"),
1150 start_rev, end_rev);
1151 if (end_rev > youngest)
1152 return svn_error_createf(SVN_ERR_REPOS_BAD_ARGS, NULL,
1153 _("End revision %ld is invalid "
1154 "(youngest revision is %ld)"),
1156 if ((start_rev == 0) && incremental)
1157 incremental = FALSE; /* revision 0 looks the same regardless of
1158 whether or not this is an incremental
1159 dump, so just simplify things. */
1161 /* Write out the UUID. */
1162 SVN_ERR(svn_fs_get_uuid(fs, &uuid, pool));
1164 /* If we're not using deltas, use the previous version, for
1165 compatibility with svn 1.0.x. */
1166 version = SVN_REPOS_DUMPFILE_FORMAT_VERSION;
1170 /* Write out "general" metadata for the dumpfile. In this case, a
1171 magic header followed by a dumpfile format version. */
1172 SVN_ERR(svn_stream_printf(stream, pool,
1173 SVN_REPOS_DUMPFILE_MAGIC_HEADER ": %d\n\n",
1175 SVN_ERR(svn_stream_printf(stream, pool, SVN_REPOS_DUMPFILE_UUID
1178 /* Create a notify object that we can reuse in the loop. */
1180 notify = svn_repos_notify_create(svn_repos_notify_dump_rev_end,
1183 /* Main loop: we're going to dump revision i. */
1184 for (i = start_rev; i <= end_rev; i++)
1186 svn_revnum_t from_rev, to_rev;
1187 svn_fs_root_t *to_root;
1188 svn_boolean_t use_deltas_for_rev;
1190 svn_pool_clear(subpool);
1192 /* Check for cancellation. */
1194 SVN_ERR(cancel_func(cancel_baton));
1196 /* Special-case the initial revision dump: it needs to contain
1197 *all* nodes, because it's the foundation of all future
1198 revisions in the dumpfile. */
1199 if ((i == start_rev) && (! incremental))
1201 /* Special-special-case a dump of revision 0. */
1204 /* Just write out the one revision 0 record and move on.
1205 The parser might want to use its properties. */
1206 SVN_ERR(write_revision_record(stream, fs, 0, subpool));
1211 /* Compare START_REV to revision 0, so that everything
1212 appears to be added. */
1218 /* In the normal case, we want to compare consecutive revs. */
1223 /* Write the revision record. */
1224 SVN_ERR(write_revision_record(stream, fs, to_rev, subpool));
1226 /* Fetch the editor which dumps nodes to a file. Regardless of
1227 what we've been told, don't use deltas for the first rev of a
1228 non-incremental dump. */
1229 use_deltas_for_rev = use_deltas && (incremental || i != start_rev);
1230 SVN_ERR(get_dump_editor(&dump_editor, &dump_edit_baton, fs, to_rev,
1231 "", stream, &found_old_reference,
1232 &found_old_mergeinfo, NULL,
1233 notify_func, notify_baton,
1234 start_rev, use_deltas_for_rev, FALSE, subpool));
1236 /* Drive the editor in one way or another. */
1237 SVN_ERR(svn_fs_revision_root(&to_root, fs, to_rev, subpool));
1239 /* If this is the first revision of a non-incremental dump,
1240 we're in for a full tree dump. Otherwise, we want to simply
1241 replay the revision. */
1242 if ((i == start_rev) && (! incremental))
1244 svn_fs_root_t *from_root;
1245 SVN_ERR(svn_fs_revision_root(&from_root, fs, from_rev, subpool));
1246 SVN_ERR(svn_repos_dir_delta2(from_root, "", "",
1248 dump_editor, dump_edit_baton,
1251 FALSE, /* don't send text-deltas */
1253 FALSE, /* don't send entry props */
1254 FALSE, /* don't ignore ancestry */
1259 SVN_ERR(svn_repos_replay2(to_root, "", SVN_INVALID_REVNUM, FALSE,
1260 dump_editor, dump_edit_baton,
1261 NULL, NULL, subpool));
1263 /* While our editor close_edit implementation is a no-op, we still
1264 do this for completeness. */
1265 SVN_ERR(dump_editor->close_edit(dump_edit_baton, subpool));
1271 notify->revision = to_rev;
1272 notify_func(notify_baton, notify, subpool);
1278 /* Did we issue any warnings about references to revisions older than
1279 the oldest dumped revision? If so, then issue a final generic
1280 warning, since the inline warnings already issued might easily be
1283 notify = svn_repos_notify_create(svn_repos_notify_dump_end, subpool);
1284 notify_func(notify_baton, notify, subpool);
1286 if (found_old_reference)
1288 notify = svn_repos_notify_create(svn_repos_notify_warning, subpool);
1290 notify->warning = svn_repos_notify_warning_found_old_reference;
1291 notify->warning_str = _("The range of revisions dumped "
1292 "contained references to "
1293 "copy sources outside that "
1295 notify_func(notify_baton, notify, subpool);
1298 /* Ditto if we issued any warnings about old revisions referenced
1299 in dumped mergeinfo. */
1300 if (found_old_mergeinfo)
1302 notify = svn_repos_notify_create(svn_repos_notify_warning, subpool);
1304 notify->warning = svn_repos_notify_warning_found_old_mergeinfo;
1305 notify->warning_str = _("The range of revisions dumped "
1306 "contained mergeinfo "
1307 "which reference revisions outside "
1309 notify_func(notify_baton, notify, subpool);
1313 svn_pool_destroy(subpool);
1315 return SVN_NO_ERROR;
1319 /*----------------------------------------------------------------------*/
1321 /* verify, based on dump */
1324 /* Creating a new revision that changes /A/B/E/bravo means creating new
1325 directory listings for /, /A, /A/B, and /A/B/E in the new revision, with
1326 each entry not changed in the new revision a link back to the entry in a
1327 previous revision. svn_repos_replay()ing a revision does not verify that
1328 those links are correct.
1330 For paths actually changed in the revision we verify, we get directory
1331 contents or file length twice: once in the dump editor, and once here.
1332 We could create a new verify baton, store in it the changed paths, and
1333 skip those here, but that means building an entire wrapper editor and
1334 managing two levels of batons. The impact from checking these entries
1335 twice should be minimal, while the code to avoid it is not.
1338 static svn_error_t *
1339 verify_directory_entry(void *baton, const void *key, apr_ssize_t klen,
1340 void *val, apr_pool_t *pool)
1342 struct dir_baton *db = baton;
1343 svn_fs_dirent_t *dirent = (svn_fs_dirent_t *)val;
1344 char *path = svn_relpath_join(db->path, (const char *)key, pool);
1345 apr_hash_t *dirents;
1348 /* since we can't access the directory entries directly by their ID,
1349 we need to navigate from the FS_ROOT to them (relatively expensive
1350 because we may start at a never rev than the last change to node). */
1351 switch (dirent->kind) {
1353 /* Getting this directory's contents is enough to ensure that our
1354 link to it is correct. */
1355 SVN_ERR(svn_fs_dir_entries(&dirents, db->edit_baton->fs_root, path, pool));
1358 /* Getting this file's size is enough to ensure that our link to it
1360 SVN_ERR(svn_fs_file_length(&len, db->edit_baton->fs_root, path, pool));
1363 return svn_error_createf(SVN_ERR_NODE_UNEXPECTED_KIND, NULL,
1364 _("Unexpected node kind %d for '%s'"),
1365 dirent->kind, path);
1368 return SVN_NO_ERROR;
1371 static svn_error_t *
1372 verify_close_directory(void *dir_baton,
1375 struct dir_baton *db = dir_baton;
1376 apr_hash_t *dirents;
1377 SVN_ERR(svn_fs_dir_entries(&dirents, db->edit_baton->fs_root,
1379 SVN_ERR(svn_iter_apr_hash(NULL, dirents, verify_directory_entry,
1381 return close_directory(dir_baton, pool);
1384 /* Baton type used for forwarding notifications from FS API to REPOS API. */
1385 struct verify_fs2_notify_func_baton_t
1387 /* notification function to call (must not be NULL) */
1388 svn_repos_notify_func_t notify_func;
1390 /* baton to use for it */
1393 /* type of notification to send (we will simply plug in the revision) */
1394 svn_repos_notify_t *notify;
1397 /* Forward the notification to BATON. */
1399 verify_fs2_notify_func(svn_revnum_t revision,
1403 struct verify_fs2_notify_func_baton_t *notify_baton = baton;
1405 notify_baton->notify->revision = revision;
1406 notify_baton->notify_func(notify_baton->notify_baton,
1407 notify_baton->notify, pool);
1411 svn_repos_verify_fs2(svn_repos_t *repos,
1412 svn_revnum_t start_rev,
1413 svn_revnum_t end_rev,
1414 svn_repos_notify_func_t notify_func,
1416 svn_cancel_func_t cancel_func,
1420 svn_fs_t *fs = svn_repos_fs(repos);
1421 svn_revnum_t youngest;
1423 apr_pool_t *iterpool = svn_pool_create(pool);
1424 svn_repos_notify_t *notify;
1425 svn_fs_progress_notify_func_t verify_notify = NULL;
1426 struct verify_fs2_notify_func_baton_t *verify_notify_baton = NULL;
1428 /* Determine the current youngest revision of the filesystem. */
1429 SVN_ERR(svn_fs_youngest_rev(&youngest, fs, pool));
1431 /* Use default vals if necessary. */
1432 if (! SVN_IS_VALID_REVNUM(start_rev))
1434 if (! SVN_IS_VALID_REVNUM(end_rev))
1437 /* Validate the revisions. */
1438 if (start_rev > end_rev)
1439 return svn_error_createf(SVN_ERR_REPOS_BAD_ARGS, NULL,
1440 _("Start revision %ld"
1441 " is greater than end revision %ld"),
1442 start_rev, end_rev);
1443 if (end_rev > youngest)
1444 return svn_error_createf(SVN_ERR_REPOS_BAD_ARGS, NULL,
1445 _("End revision %ld is invalid "
1446 "(youngest revision is %ld)"),
1449 /* Create a notify object that we can reuse within the loop and a
1450 forwarding structure for notifications from inside svn_fs_verify(). */
1453 notify = svn_repos_notify_create(svn_repos_notify_verify_rev_end,
1456 verify_notify = verify_fs2_notify_func;
1457 verify_notify_baton = apr_palloc(pool, sizeof(*verify_notify_baton));
1458 verify_notify_baton->notify_func = notify_func;
1459 verify_notify_baton->notify_baton = notify_baton;
1460 verify_notify_baton->notify
1461 = svn_repos_notify_create(svn_repos_notify_verify_rev_structure, pool);
1464 /* Verify global metadata and backend-specific data first. */
1465 SVN_ERR(svn_fs_verify(svn_fs_path(fs, pool), svn_fs_config(fs, pool),
1467 verify_notify, verify_notify_baton,
1468 cancel_func, cancel_baton, pool));
1470 for (rev = start_rev; rev <= end_rev; rev++)
1472 const svn_delta_editor_t *dump_editor;
1473 void *dump_edit_baton;
1474 const svn_delta_editor_t *cancel_editor;
1475 void *cancel_edit_baton;
1476 svn_fs_root_t *to_root;
1479 svn_pool_clear(iterpool);
1481 /* Get cancellable dump editor, but with our close_directory handler. */
1482 SVN_ERR(get_dump_editor(&dump_editor, &dump_edit_baton,
1484 svn_stream_empty(iterpool),
1486 verify_close_directory,
1487 notify_func, notify_baton,
1489 FALSE, TRUE, /* use_deltas, verify */
1491 SVN_ERR(svn_delta_get_cancellation_editor(cancel_func, cancel_baton,
1492 dump_editor, dump_edit_baton,
1497 SVN_ERR(svn_fs_revision_root(&to_root, fs, rev, iterpool));
1498 SVN_ERR(svn_fs_verify_root(to_root, iterpool));
1500 SVN_ERR(svn_repos_replay2(to_root, "", SVN_INVALID_REVNUM, FALSE,
1501 cancel_editor, cancel_edit_baton,
1502 NULL, NULL, iterpool));
1503 /* While our editor close_edit implementation is a no-op, we still
1504 do this for completeness. */
1505 SVN_ERR(cancel_editor->close_edit(cancel_edit_baton, iterpool));
1507 SVN_ERR(svn_fs_revision_proplist(&props, fs, rev, iterpool));
1511 notify->revision = rev;
1512 notify_func(notify_baton, notify, iterpool);
1519 notify = svn_repos_notify_create(svn_repos_notify_verify_end, iterpool);
1520 notify_func(notify_baton, notify, iterpool);
1523 /* Per-backend verification. */
1524 svn_pool_destroy(iterpool);
1526 return SVN_NO_ERROR;