1 /* load.c --- parsing a 'dumpfile'-formatted stream.
3 * ====================================================================
4 * Licensed to the Apache Software Foundation (ASF) under one
5 * or more contributor license agreements. See the NOTICE file
6 * distributed with this work for additional information
7 * regarding copyright ownership. The ASF licenses this file
8 * to you under the Apache License, Version 2.0 (the
9 * "License"); you may not use this file except in compliance
10 * with the License. You may obtain a copy of the License at
12 * http://www.apache.org/licenses/LICENSE-2.0
14 * Unless required by applicable law or agreed to in writing,
15 * software distributed under the License is distributed on an
16 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
17 * KIND, either express or implied. See the License for the
18 * specific language governing permissions and limitations
20 * ====================================================================
27 #include "svn_pools.h"
28 #include "svn_error.h"
29 #include "svn_repos.h"
30 #include "svn_string.h"
32 #include "svn_private_config.h"
33 #include "svn_ctype.h"
35 #include "private/svn_dep_compat.h"
37 /*----------------------------------------------------------------------*/
39 /** The parser and related helper funcs **/
45 return svn_error_create(SVN_ERR_INCOMPLETE_DATA, NULL,
46 _("Premature end of content data in dumpstream"));
50 stream_malformed(void)
52 return svn_error_create(SVN_ERR_STREAM_MALFORMED_DATA, NULL,
53 _("Dumpstream data appears to be malformed"));
56 /* Allocate a new hash *HEADERS in POOL, and read a series of
57 RFC822-style headers from STREAM. Duplicate each header's name and
58 value into POOL and store in hash as a const char * ==> const char *.
60 The headers are assumed to be terminated by a single blank line,
61 which will be permanently sucked from the stream and tossed.
63 If the caller has already read in the first header line, it should
64 be passed in as FIRST_HEADER. If not, pass NULL instead.
67 read_header_block(svn_stream_t *stream,
68 svn_stringbuf_t *first_header,
72 *headers = apr_hash_make(pool);
76 svn_stringbuf_t *header_str;
77 const char *name, *value;
81 if (first_header != NULL)
83 header_str = first_header;
84 first_header = NULL; /* so we never visit this block again. */
89 /* Read the next line into a stringbuf. */
90 SVN_ERR(svn_stream_readline(stream, &header_str, "\n", &eof, pool));
92 if (svn_stringbuf_isempty(header_str))
93 break; /* end of header block */
95 return stream_ran_dry();
97 /* Find the next colon in the stringbuf. */
98 while (header_str->data[i] != ':')
100 if (header_str->data[i] == '\0')
101 return svn_error_createf(SVN_ERR_STREAM_MALFORMED_DATA, NULL,
102 _("Dump stream contains a malformed "
103 "header (with no ':') at '%.20s'"),
107 /* Create a 'name' string and point to it. */
108 header_str->data[i] = '\0';
109 name = header_str->data;
111 /* Skip over the NULL byte and the space following it. */
113 if (i > header_str->len)
114 return svn_error_createf(SVN_ERR_STREAM_MALFORMED_DATA, NULL,
115 _("Dump stream contains a malformed "
116 "header (with no value) at '%.20s'"),
119 /* Point to the 'value' string. */
120 value = header_str->data + i;
122 /* Store name/value in hash. */
123 svn_hash_sets(*headers, name, value);
130 /* Set *PBUF to a string of length LEN, allocated in POOL, read from STREAM.
131 Also read a newline from STREAM and increase *ACTUAL_LEN by the total
132 number of bytes read from STREAM. */
134 read_key_or_val(char **pbuf,
135 svn_filesize_t *actual_length,
136 svn_stream_t *stream,
140 char *buf = apr_pcalloc(pool, len + 1);
145 SVN_ERR(svn_stream_read_full(stream, buf, &numread));
146 *actual_length += numread;
148 return svn_error_trace(stream_ran_dry());
151 /* Suck up extra newline after key data */
153 SVN_ERR(svn_stream_read_full(stream, &c, &numread));
154 *actual_length += numread;
156 return svn_error_trace(stream_ran_dry());
158 return svn_error_trace(stream_malformed());
165 /* Read CONTENT_LENGTH bytes from STREAM, parsing the bytes as an
166 encoded Subversion properties hash, and making multiple calls to
167 PARSE_FNS->set_*_property on RECORD_BATON (depending on the value
170 Set *ACTUAL_LENGTH to the number of bytes consumed from STREAM.
171 If an error is returned, the value of *ACTUAL_LENGTH is undefined.
173 Use POOL for all allocations. */
175 parse_property_block(svn_stream_t *stream,
176 svn_filesize_t content_length,
177 const svn_repos_parse_fns3_t *parse_fns,
180 svn_boolean_t is_node,
181 svn_filesize_t *actual_length,
184 svn_stringbuf_t *strbuf;
185 apr_pool_t *proppool = svn_pool_create(pool);
188 while (content_length != *actual_length)
190 char *buf; /* a pointer into the stringbuf's data */
193 svn_pool_clear(proppool);
195 /* Read a key length line. (Actually, it might be PROPS_END). */
196 SVN_ERR(svn_stream_readline(stream, &strbuf, "\n", &eof, proppool));
200 /* We could just use stream_ran_dry() or stream_malformed(),
201 but better to give a non-generic property block error. */
202 return svn_error_create
203 (SVN_ERR_STREAM_MALFORMED_DATA, NULL,
204 _("Incomplete or unterminated property block"));
207 *actual_length += (strbuf->len + 1); /* +1 because we read a \n too. */
210 if (! strcmp(buf, "PROPS-END"))
211 break; /* no more properties. */
213 else if ((buf[0] == 'K') && (buf[1] == ' '))
218 SVN_ERR(svn_cstring_strtoui64(&len, buf + 2, 0, APR_SIZE_MAX, 10));
219 SVN_ERR(read_key_or_val(&keybuf, actual_length,
220 stream, (apr_size_t)len, proppool));
222 /* Read a val length line */
223 SVN_ERR(svn_stream_readline(stream, &strbuf, "\n", &eof, proppool));
225 return stream_ran_dry();
227 *actual_length += (strbuf->len + 1); /* +1 because we read \n too */
230 if ((buf[0] == 'V') && (buf[1] == ' '))
232 svn_string_t propstring;
236 SVN_ERR(svn_cstring_atoi64(&val, buf + 2));
237 propstring.len = (apr_size_t)val;
238 SVN_ERR(read_key_or_val(&valbuf, actual_length,
239 stream, propstring.len, proppool));
240 propstring.data = valbuf;
242 /* Now, send the property pair to the vtable! */
245 SVN_ERR(parse_fns->set_node_property(record_baton,
251 SVN_ERR(parse_fns->set_revision_property(record_baton,
257 return stream_malformed(); /* didn't find expected 'V' line */
259 else if ((buf[0] == 'D') && (buf[1] == ' '))
264 SVN_ERR(svn_cstring_strtoui64(&len, buf + 2, 0, APR_SIZE_MAX, 10));
265 SVN_ERR(read_key_or_val(&keybuf, actual_length,
266 stream, (apr_size_t)len, proppool));
268 /* We don't expect these in revision properties, and if we see
269 one when we don't have a delete_node_property callback,
270 then we're seeing a v3 feature in a v2 dump. */
271 if (!is_node || !parse_fns->delete_node_property)
272 return stream_malformed();
274 SVN_ERR(parse_fns->delete_node_property(record_baton, keybuf));
277 return stream_malformed(); /* didn't find expected 'K' line */
281 svn_pool_destroy(proppool);
286 /* Read CONTENT_LENGTH bytes from STREAM. If IS_DELTA is true, use
287 PARSE_FNS->apply_textdelta to push a text delta, otherwise use
288 PARSE_FNS->set_fulltext to push those bytes as replace fulltext for
289 a node. Use BUFFER/BUFLEN to push the fulltext in "chunks".
291 Use POOL for all allocations. */
293 parse_text_block(svn_stream_t *stream,
294 svn_filesize_t content_length,
295 svn_boolean_t is_delta,
296 const svn_repos_parse_fns3_t *parse_fns,
302 svn_stream_t *text_stream = NULL;
303 apr_size_t num_to_read, rlen, wlen;
307 svn_txdelta_window_handler_t wh;
310 SVN_ERR(parse_fns->apply_textdelta(&wh, &whb, record_baton));
312 text_stream = svn_txdelta_parse_svndiff(wh, whb, TRUE, pool);
316 /* Get a stream to which we can push the data. */
317 SVN_ERR(parse_fns->set_fulltext(&text_stream, record_baton));
320 /* Regardless of whether or not we have a sink for our data, we
322 while (content_length)
324 if (content_length >= (svn_filesize_t)buflen)
327 rlen = (apr_size_t) content_length;
330 SVN_ERR(svn_stream_read_full(stream, buffer, &rlen));
331 content_length -= rlen;
332 if (rlen != num_to_read)
333 return stream_ran_dry();
337 /* write however many bytes you read. */
339 SVN_ERR(svn_stream_write(text_stream, buffer, &wlen));
342 /* Uh oh, didn't write as many bytes as we read. */
343 return svn_error_create(SVN_ERR_STREAM_UNEXPECTED_EOF, NULL,
344 _("Unexpected EOF writing contents"));
349 /* If we opened a stream, we must close it. */
351 SVN_ERR(svn_stream_close(text_stream));
358 /* Parse VERSIONSTRING from STREAM and verify that we support the dumpfile
359 format version number, setting *VERSION appropriately. */
361 parse_format_version(int *version,
362 svn_stream_t *stream,
363 apr_pool_t *scratch_pool)
365 static const int magic_len = sizeof(SVN_REPOS_DUMPFILE_MAGIC_HEADER) - 1;
366 svn_stringbuf_t *linebuf;
370 /* No svn_stream_readline() here, because malformed streams may not have
371 the EOL at all, and currently svn_stream_readline() keeps loading the
372 whole thing into memory until it encounters an EOL or the stream ends.
373 This is particularly troublesome, because users may incorrectly attempt
374 to load arbitrary large files instread of proper dump files.
376 As a workaround, parse the first line with a length limit. While this
377 is not a complete solution, doing so handles the common case described
378 above. For a complete solution, svn_stream_readline() may need to grow
379 a `limit` argument that would allow us to safely use it everywhere within
382 linebuf = svn_stringbuf_create_empty(scratch_pool);
389 SVN_ERR(svn_stream_read_full(stream, &c, &len));
391 return stream_ran_dry();
396 if (linebuf->len + 1 > 80)
397 return svn_error_createf(SVN_ERR_STREAM_MALFORMED_DATA, NULL,
398 _("Malformed dumpfile header '%s'"),
401 svn_stringbuf_appendbyte(linebuf, c);
404 p = strchr(linebuf->data, ':');
407 || p != (linebuf->data + magic_len)
408 || strncmp(linebuf->data,
409 SVN_REPOS_DUMPFILE_MAGIC_HEADER,
411 return svn_error_createf(SVN_ERR_STREAM_MALFORMED_DATA, NULL,
412 _("Malformed dumpfile header '%s'"),
415 SVN_ERR(svn_cstring_atoi(&value, p + 1));
417 if (value > SVN_REPOS_DUMPFILE_FORMAT_VERSION)
418 return svn_error_createf(SVN_ERR_STREAM_MALFORMED_DATA, NULL,
419 _("Unsupported dumpfile version: %d"),
426 /*----------------------------------------------------------------------*/
428 /** Dummy callback implementations for functions not provided by the user **/
431 dummy_handler_magic_header_record(int version,
439 dummy_handler_uuid_record(const char *uuid,
447 dummy_handler_new_revision_record(void **revision_baton,
452 *revision_baton = NULL;
457 dummy_handler_new_node_record(void **node_baton,
459 void *revision_baton,
467 dummy_handler_set_revision_property(void *revision_baton,
469 const svn_string_t *value)
475 dummy_handler_set_node_property(void *node_baton,
477 const svn_string_t *value)
483 dummy_handler_delete_node_property(void *node_baton,
490 dummy_handler_remove_node_props(void *node_baton)
496 dummy_handler_set_fulltext(svn_stream_t **stream,
503 dummy_handler_apply_textdelta(svn_txdelta_window_handler_t *handler,
504 void **handler_baton,
507 /* Only called by parse_text_block() and that tests for NULL handlers. */
509 *handler_baton = NULL;
514 dummy_handler_close_node(void *node_baton)
520 dummy_handler_close_revision(void *revision_baton)
525 /* Helper macro to copy the function pointer SOURCE->NAME to DEST->NAME.
526 * If the source pointer is NULL, pick the corresponding dummy handler
528 #define SET_VTABLE_ENTRY(dest, source, name) \
529 dest->name = provided->name ? provided->name : dummy_handler_##name
531 /* Return a copy of PROVIDED with all NULL callbacks replaced by a dummy
532 * handler. Allocate the result in RESULT_POOL. */
533 static const svn_repos_parse_fns3_t *
534 complete_vtable(const svn_repos_parse_fns3_t *provided,
535 apr_pool_t *result_pool)
537 svn_repos_parse_fns3_t *completed = apr_pcalloc(result_pool,
540 SET_VTABLE_ENTRY(completed, provided, magic_header_record);
541 SET_VTABLE_ENTRY(completed, provided, uuid_record);
542 SET_VTABLE_ENTRY(completed, provided, new_revision_record);
543 SET_VTABLE_ENTRY(completed, provided, new_node_record);
544 SET_VTABLE_ENTRY(completed, provided, set_revision_property);
545 SET_VTABLE_ENTRY(completed, provided, set_node_property);
546 SET_VTABLE_ENTRY(completed, provided, delete_node_property);
547 SET_VTABLE_ENTRY(completed, provided, remove_node_props);
548 SET_VTABLE_ENTRY(completed, provided, set_fulltext);
549 SET_VTABLE_ENTRY(completed, provided, apply_textdelta);
550 SET_VTABLE_ENTRY(completed, provided, close_node);
551 SET_VTABLE_ENTRY(completed, provided, close_revision);
556 /*----------------------------------------------------------------------*/
558 /** The public routines **/
561 svn_repos_parse_dumpstream3(svn_stream_t *stream,
562 const svn_repos_parse_fns3_t *parse_fns,
564 svn_boolean_t deltas_are_text,
565 svn_cancel_func_t cancel_func,
570 svn_stringbuf_t *linebuf;
571 void *rev_baton = NULL;
572 char *buffer = apr_palloc(pool, SVN__STREAM_CHUNK_SIZE);
573 apr_size_t buflen = SVN__STREAM_CHUNK_SIZE;
574 apr_pool_t *linepool = svn_pool_create(pool);
575 apr_pool_t *revpool = svn_pool_create(pool);
576 apr_pool_t *nodepool = svn_pool_create(pool);
579 /* Make sure we can blindly invoke callbacks. */
580 parse_fns = complete_vtable(parse_fns, pool);
582 /* Start parsing process. */
583 /* The first two lines of the stream are the dumpfile-format version
584 number, and a blank line. To preserve backward compatibility,
585 don't assume the existence of newer parser-vtable functions. */
586 SVN_ERR(parse_format_version(&version, stream, linepool));
587 if (parse_fns->magic_header_record != NULL)
588 SVN_ERR(parse_fns->magic_header_record(version, parse_baton, pool));
590 /* A dumpfile "record" is defined to be a header-block of
591 rfc822-style headers, possibly followed by a content-block.
593 - A header-block is always terminated by a single blank line (\n\n)
595 - We know whether the record has a content-block by looking for
596 a 'Content-length:' header. The content-block will always be
597 of a specific length, plus an extra newline.
599 Once a record is fully sucked from the stream, an indeterminate
600 number of blank lines (or lines that begin with whitespace) may
601 follow before the next record (or the end of the stream.)
608 svn_boolean_t found_node = FALSE;
609 svn_boolean_t old_v1_with_cl = FALSE;
610 const char *content_length;
614 svn_filesize_t actual_prop_length;
616 /* Clear our per-line pool. */
617 svn_pool_clear(linepool);
619 /* Check for cancellation. */
621 SVN_ERR(cancel_func(cancel_baton));
623 /* Keep reading blank lines until we discover a new record, or until
624 the stream runs out. */
625 SVN_ERR(svn_stream_readline(stream, &linebuf, "\n", &eof, linepool));
629 if (svn_stringbuf_isempty(linebuf))
630 break; /* end of stream, go home. */
632 return stream_ran_dry();
635 if ((linebuf->len == 0) || (svn_ctype_isspace(linebuf->data[0])))
636 continue; /* empty line ... loop */
638 /*** Found the beginning of a new record. ***/
640 /* The last line we read better be a header of some sort.
641 Read the whole header-block into a hash. */
642 SVN_ERR(read_header_block(stream, linebuf, &headers, linepool));
644 /*** Handle the various header blocks. ***/
646 /* Is this a revision record? */
647 if (svn_hash_gets(headers, SVN_REPOS_DUMPFILE_REVISION_NUMBER))
649 /* If we already have a rev_baton open, we need to close it
650 and clear the per-revision subpool. */
651 if (rev_baton != NULL)
653 SVN_ERR(parse_fns->close_revision(rev_baton));
654 svn_pool_clear(revpool);
657 SVN_ERR(parse_fns->new_revision_record(&rev_baton,
658 headers, parse_baton,
661 /* Or is this, perhaps, a node record? */
662 else if (svn_hash_gets(headers, SVN_REPOS_DUMPFILE_NODE_PATH))
664 SVN_ERR(parse_fns->new_node_record(&node_baton,
670 /* Or is this the repos UUID? */
671 else if ((value = svn_hash_gets(headers, SVN_REPOS_DUMPFILE_UUID)))
673 SVN_ERR(parse_fns->uuid_record(value, parse_baton, pool));
675 /* Or perhaps a dumpfile format? */
676 /* ### TODO: use parse_format_version */
677 else if ((value = svn_hash_gets(headers,
678 SVN_REPOS_DUMPFILE_MAGIC_HEADER)))
680 /* ### someday, switch modes of operation here. */
681 SVN_ERR(svn_cstring_atoi(&version, value));
683 /* Or is this bogosity?! */
686 /* What the heck is this record?!? */
687 return svn_error_create(SVN_ERR_STREAM_MALFORMED_DATA, NULL,
688 _("Unrecognized record type in stream"));
691 /* Need 3 values below to determine v1 dump type
693 Old (pre 0.14?) v1 dumps don't have Prop-content-length
694 and Text-content-length fields, but always have a properties
695 block in a block with Content-Length > 0 */
697 content_length = svn_hash_gets(headers,
698 SVN_REPOS_DUMPFILE_CONTENT_LENGTH);
699 prop_cl = svn_hash_gets(headers, SVN_REPOS_DUMPFILE_PROP_CONTENT_LENGTH);
700 text_cl = svn_hash_gets(headers, SVN_REPOS_DUMPFILE_TEXT_CONTENT_LENGTH);
702 version == 1 && content_length && ! prop_cl && ! text_cl;
704 /* Is there a props content-block to parse? */
705 if (prop_cl || old_v1_with_cl)
707 const char *delta = svn_hash_gets(headers,
708 SVN_REPOS_DUMPFILE_PROP_DELTA);
709 svn_boolean_t is_delta = (delta && strcmp(delta, "true") == 0);
711 /* First, remove all node properties, unless this is a delta
713 if (found_node && !is_delta)
714 SVN_ERR(parse_fns->remove_node_props(node_baton));
716 SVN_ERR(parse_property_block
718 svn__atoui64(prop_cl ? prop_cl : content_length),
720 found_node ? node_baton : rev_baton,
724 found_node ? nodepool : revpool));
727 /* Is there a text content-block to parse? */
730 const char *delta = svn_hash_gets(headers,
731 SVN_REPOS_DUMPFILE_TEXT_DELTA);
732 svn_boolean_t is_delta = FALSE;
733 if (! deltas_are_text)
734 is_delta = (delta && strcmp(delta, "true") == 0);
736 SVN_ERR(parse_text_block(stream,
737 svn__atoui64(text_cl),
740 found_node ? node_baton : rev_baton,
743 found_node ? nodepool : revpool));
745 else if (old_v1_with_cl)
747 /* An old-v1 block with a Content-length might have a text block.
748 If the property block did not consume all the bytes of the
749 Content-length, then it clearly does have a text block.
750 If not, then we must deduce whether we have an *empty* text
751 block or an *absent* text block. The rules are:
752 - "Node-kind: file" blocks have an empty (i.e. present, but
753 zero-length) text block, since they represent a file
754 modification. Note that file-copied-text-unmodified blocks
755 have no Content-length - even if they should have contained
756 a modified property block, the pre-0.14 dumper forgets to
757 dump the modified properties.
758 - If it is not a file node, then it is a revision or directory,
759 and so has an absent text block.
761 const char *node_kind;
762 svn_filesize_t cl_value = svn__atoui64(content_length)
763 - actual_prop_length;
766 ((node_kind = svn_hash_gets(headers,
767 SVN_REPOS_DUMPFILE_NODE_KIND))
768 && strcmp(node_kind, "file") == 0)
770 SVN_ERR(parse_text_block(stream,
774 found_node ? node_baton : rev_baton,
777 found_node ? nodepool : revpool));
780 /* if we have a content-length header, did we read all of it?
781 in case of an old v1, we *always* read all of it, because
782 text-content-length == content-length - prop-content-length
784 if (content_length && ! old_v1_with_cl)
786 apr_size_t rlen, num_to_read;
787 svn_filesize_t remaining =
788 svn__atoui64(content_length) -
789 (prop_cl ? svn__atoui64(prop_cl) : 0) -
790 (text_cl ? svn__atoui64(text_cl) : 0);
794 return svn_error_create(SVN_ERR_STREAM_MALFORMED_DATA, NULL,
795 _("Sum of subblock sizes larger than "
796 "total block content length"));
798 /* Consume remaining bytes in this content block */
799 while (remaining > 0)
801 if (remaining >= (svn_filesize_t)buflen)
804 rlen = (apr_size_t) remaining;
807 SVN_ERR(svn_stream_read_full(stream, buffer, &rlen));
809 if (rlen != num_to_read)
810 return stream_ran_dry();
814 /* If we just finished processing a node record, we need to
815 close the node record and clear the per-node subpool. */
818 SVN_ERR(parse_fns->close_node(node_baton));
819 svn_pool_clear(nodepool);
822 /*** End of processing for one record. ***/
824 } /* end of stream */
826 /* Close out whatever revision we're in. */
827 if (rev_baton != NULL)
828 SVN_ERR(parse_fns->close_revision(rev_baton));
830 svn_pool_destroy(linepool);
831 svn_pool_destroy(revpool);
832 svn_pool_destroy(nodepool);