1 /* load.c --- parsing a 'dumpfile'-formatted stream.
3 * ====================================================================
4 * Licensed to the Apache Software Foundation (ASF) under one
5 * or more contributor license agreements. See the NOTICE file
6 * distributed with this work for additional information
7 * regarding copyright ownership. The ASF licenses this file
8 * to you under the Apache License, Version 2.0 (the
9 * "License"); you may not use this file except in compliance
10 * with the License. You may obtain a copy of the License at
12 * http://www.apache.org/licenses/LICENSE-2.0
14 * Unless required by applicable law or agreed to in writing,
15 * software distributed under the License is distributed on an
16 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
17 * KIND, either express or implied. See the License for the
18 * specific language governing permissions and limitations
20 * ====================================================================
27 #include "svn_pools.h"
28 #include "svn_error.h"
29 #include "svn_repos.h"
30 #include "svn_string.h"
32 #include "svn_private_config.h"
33 #include "svn_ctype.h"
35 #include "private/svn_dep_compat.h"
37 /*----------------------------------------------------------------------*/
39 /** The parser and related helper funcs **/
45 return svn_error_create(SVN_ERR_INCOMPLETE_DATA, NULL,
46 _("Premature end of content data in dumpstream"));
50 stream_malformed(void)
52 return svn_error_create(SVN_ERR_STREAM_MALFORMED_DATA, NULL,
53 _("Dumpstream data appears to be malformed"));
56 /* Allocate a new hash *HEADERS in POOL, and read a series of
57 RFC822-style headers from STREAM. Duplicate each header's name and
58 value into POOL and store in hash as a const char * ==> const char *.
60 The headers are assumed to be terminated by a single blank line,
61 which will be permanently sucked from the stream and tossed.
63 If the caller has already read in the first header line, it should
64 be passed in as FIRST_HEADER. If not, pass NULL instead.
67 read_header_block(svn_stream_t *stream,
68 svn_stringbuf_t *first_header,
72 *headers = apr_hash_make(pool);
76 svn_stringbuf_t *header_str;
77 const char *name, *value;
81 if (first_header != NULL)
83 header_str = first_header;
84 first_header = NULL; /* so we never visit this block again. */
89 /* Read the next line into a stringbuf. */
90 SVN_ERR(svn_stream_readline(stream, &header_str, "\n", &eof, pool));
92 if (svn_stringbuf_isempty(header_str))
93 break; /* end of header block */
95 return stream_ran_dry();
97 /* Find the next colon in the stringbuf. */
98 while (header_str->data[i] != ':')
100 if (header_str->data[i] == '\0')
101 return svn_error_createf(SVN_ERR_STREAM_MALFORMED_DATA, NULL,
102 _("Dump stream contains a malformed "
103 "header (with no ':') at '%.20s'"),
107 /* Create a 'name' string and point to it. */
108 header_str->data[i] = '\0';
109 name = header_str->data;
111 /* Skip over the NULL byte and the space following it. */
113 if (i > header_str->len)
114 return svn_error_createf(SVN_ERR_STREAM_MALFORMED_DATA, NULL,
115 _("Dump stream contains a malformed "
116 "header (with no value) at '%.20s'"),
119 /* Point to the 'value' string. */
120 value = header_str->data + i;
122 /* Store name/value in hash. */
123 svn_hash_sets(*headers, name, value);
130 /* Set *PBUF to a string of length LEN, allocated in POOL, read from STREAM.
131 Also read a newline from STREAM and increase *ACTUAL_LEN by the total
132 number of bytes read from STREAM. */
134 read_key_or_val(char **pbuf,
135 svn_filesize_t *actual_length,
136 svn_stream_t *stream,
140 char *buf = apr_pcalloc(pool, len + 1);
145 SVN_ERR(svn_stream_read_full(stream, buf, &numread));
146 *actual_length += numread;
148 return svn_error_trace(stream_ran_dry());
151 /* Suck up extra newline after key data */
153 SVN_ERR(svn_stream_read_full(stream, &c, &numread));
154 *actual_length += numread;
156 return svn_error_trace(stream_ran_dry());
158 return svn_error_trace(stream_malformed());
165 /* Read CONTENT_LENGTH bytes from STREAM, parsing the bytes as an
166 encoded Subversion properties hash, and making multiple calls to
167 PARSE_FNS->set_*_property on RECORD_BATON (depending on the value
170 Set *ACTUAL_LENGTH to the number of bytes consumed from STREAM.
171 If an error is returned, the value of *ACTUAL_LENGTH is undefined.
173 Use POOL for all allocations. */
175 parse_property_block(svn_stream_t *stream,
176 svn_filesize_t content_length,
177 const svn_repos_parse_fns3_t *parse_fns,
180 svn_boolean_t is_node,
181 svn_filesize_t *actual_length,
184 svn_stringbuf_t *strbuf;
185 apr_pool_t *proppool = svn_pool_create(pool);
188 while (content_length != *actual_length)
190 char *buf; /* a pointer into the stringbuf's data */
193 svn_pool_clear(proppool);
195 /* Read a key length line. (Actually, it might be PROPS_END). */
196 SVN_ERR(svn_stream_readline(stream, &strbuf, "\n", &eof, proppool));
200 /* We could just use stream_ran_dry() or stream_malformed(),
201 but better to give a non-generic property block error. */
202 return svn_error_create
203 (SVN_ERR_STREAM_MALFORMED_DATA, NULL,
204 _("Incomplete or unterminated property block"));
207 *actual_length += (strbuf->len + 1); /* +1 because we read a \n too. */
210 if (! strcmp(buf, "PROPS-END"))
211 break; /* no more properties. */
213 else if ((buf[0] == 'K') && (buf[1] == ' '))
218 SVN_ERR(svn_cstring_strtoui64(&len, buf + 2, 0, APR_SIZE_MAX, 10));
219 SVN_ERR(read_key_or_val(&keybuf, actual_length,
220 stream, (apr_size_t)len, proppool));
222 /* Read a val length line */
223 SVN_ERR(svn_stream_readline(stream, &strbuf, "\n", &eof, proppool));
225 return stream_ran_dry();
227 *actual_length += (strbuf->len + 1); /* +1 because we read \n too */
230 if ((buf[0] == 'V') && (buf[1] == ' '))
232 svn_string_t propstring;
236 SVN_ERR(svn_cstring_atoi64(&val, buf + 2));
237 propstring.len = (apr_size_t)val;
238 SVN_ERR(read_key_or_val(&valbuf, actual_length,
239 stream, propstring.len, proppool));
240 propstring.data = valbuf;
242 /* Now, send the property pair to the vtable! */
245 SVN_ERR(parse_fns->set_node_property(record_baton,
251 SVN_ERR(parse_fns->set_revision_property(record_baton,
257 return stream_malformed(); /* didn't find expected 'V' line */
259 else if ((buf[0] == 'D') && (buf[1] == ' '))
264 SVN_ERR(svn_cstring_strtoui64(&len, buf + 2, 0, APR_SIZE_MAX, 10));
265 SVN_ERR(read_key_or_val(&keybuf, actual_length,
266 stream, (apr_size_t)len, proppool));
268 /* We don't expect these in revision properties, and if we see
269 one when we don't have a delete_node_property callback,
270 then we're seeing a v3 feature in a v2 dump. */
271 if (!is_node || !parse_fns->delete_node_property)
272 return stream_malformed();
274 SVN_ERR(parse_fns->delete_node_property(record_baton, keybuf));
277 return stream_malformed(); /* didn't find expected 'K' line */
281 svn_pool_destroy(proppool);
286 /* Read CONTENT_LENGTH bytes from STREAM. If IS_DELTA is true, use
287 PARSE_FNS->apply_textdelta to push a text delta, otherwise use
288 PARSE_FNS->set_fulltext to push those bytes as replace fulltext for
289 a node. Use BUFFER/BUFLEN to push the fulltext in "chunks".
291 Use POOL for all allocations. */
293 parse_text_block(svn_stream_t *stream,
294 svn_filesize_t content_length,
295 svn_boolean_t is_delta,
296 const svn_repos_parse_fns3_t *parse_fns,
302 svn_stream_t *text_stream = NULL;
303 apr_size_t num_to_read, rlen, wlen;
307 svn_txdelta_window_handler_t wh;
310 SVN_ERR(parse_fns->apply_textdelta(&wh, &whb, record_baton));
312 text_stream = svn_txdelta_parse_svndiff(wh, whb, TRUE, pool);
316 /* Get a stream to which we can push the data. */
317 SVN_ERR(parse_fns->set_fulltext(&text_stream, record_baton));
320 /* Regardless of whether or not we have a sink for our data, we
322 while (content_length)
324 if (content_length >= (svn_filesize_t)buflen)
327 rlen = (apr_size_t) content_length;
330 SVN_ERR(svn_stream_read_full(stream, buffer, &rlen));
331 content_length -= rlen;
332 if (rlen != num_to_read)
333 return stream_ran_dry();
337 /* write however many bytes you read. */
339 SVN_ERR(svn_stream_write(text_stream, buffer, &wlen));
342 /* Uh oh, didn't write as many bytes as we read. */
343 return svn_error_create(SVN_ERR_STREAM_UNEXPECTED_EOF, NULL,
344 _("Unexpected EOF writing contents"));
349 /* If we opened a stream, we must close it. */
351 SVN_ERR(svn_stream_close(text_stream));
358 /* Parse VERSIONSTRING and verify that we support the dumpfile format
359 version number, setting *VERSION appropriately. */
361 parse_format_version(int *version,
362 const char *versionstring)
364 static const int magic_len = sizeof(SVN_REPOS_DUMPFILE_MAGIC_HEADER) - 1;
365 const char *p = strchr(versionstring, ':');
369 || p != (versionstring + magic_len)
370 || strncmp(versionstring,
371 SVN_REPOS_DUMPFILE_MAGIC_HEADER,
373 return svn_error_createf(SVN_ERR_STREAM_MALFORMED_DATA, NULL,
374 _("Malformed dumpfile header '%s'"),
377 SVN_ERR(svn_cstring_atoi(&value, p + 1));
379 if (value > SVN_REPOS_DUMPFILE_FORMAT_VERSION)
380 return svn_error_createf(SVN_ERR_STREAM_MALFORMED_DATA, NULL,
381 _("Unsupported dumpfile version: %d"),
388 /*----------------------------------------------------------------------*/
390 /** Dummy callback implementations for functions not provided by the user **/
393 dummy_handler_magic_header_record(int version,
401 dummy_handler_uuid_record(const char *uuid,
409 dummy_handler_new_revision_record(void **revision_baton,
414 *revision_baton = NULL;
419 dummy_handler_new_node_record(void **node_baton,
421 void *revision_baton,
429 dummy_handler_set_revision_property(void *revision_baton,
431 const svn_string_t *value)
437 dummy_handler_set_node_property(void *node_baton,
439 const svn_string_t *value)
445 dummy_handler_delete_node_property(void *node_baton,
452 dummy_handler_remove_node_props(void *node_baton)
458 dummy_handler_set_fulltext(svn_stream_t **stream,
465 dummy_handler_apply_textdelta(svn_txdelta_window_handler_t *handler,
466 void **handler_baton,
469 /* Only called by parse_text_block() and that tests for NULL handlers. */
471 *handler_baton = NULL;
476 dummy_handler_close_node(void *node_baton)
482 dummy_handler_close_revision(void *revision_baton)
487 /* Helper macro to copy the function pointer SOURCE->NAME to DEST->NAME.
488 * If the source pointer is NULL, pick the corresponding dummy handler
490 #define SET_VTABLE_ENTRY(dest, source, name) \
491 dest->name = provided->name ? provided->name : dummy_handler_##name
493 /* Return a copy of PROVIDED with all NULL callbacks replaced by a dummy
494 * handler. Allocate the result in RESULT_POOL. */
495 static const svn_repos_parse_fns3_t *
496 complete_vtable(const svn_repos_parse_fns3_t *provided,
497 apr_pool_t *result_pool)
499 svn_repos_parse_fns3_t *completed = apr_pcalloc(result_pool,
502 SET_VTABLE_ENTRY(completed, provided, magic_header_record);
503 SET_VTABLE_ENTRY(completed, provided, uuid_record);
504 SET_VTABLE_ENTRY(completed, provided, new_revision_record);
505 SET_VTABLE_ENTRY(completed, provided, new_node_record);
506 SET_VTABLE_ENTRY(completed, provided, set_revision_property);
507 SET_VTABLE_ENTRY(completed, provided, set_node_property);
508 SET_VTABLE_ENTRY(completed, provided, delete_node_property);
509 SET_VTABLE_ENTRY(completed, provided, remove_node_props);
510 SET_VTABLE_ENTRY(completed, provided, set_fulltext);
511 SET_VTABLE_ENTRY(completed, provided, apply_textdelta);
512 SET_VTABLE_ENTRY(completed, provided, close_node);
513 SET_VTABLE_ENTRY(completed, provided, close_revision);
518 /*----------------------------------------------------------------------*/
520 /** The public routines **/
523 svn_repos_parse_dumpstream3(svn_stream_t *stream,
524 const svn_repos_parse_fns3_t *parse_fns,
526 svn_boolean_t deltas_are_text,
527 svn_cancel_func_t cancel_func,
532 svn_stringbuf_t *linebuf;
533 void *rev_baton = NULL;
534 char *buffer = apr_palloc(pool, SVN__STREAM_CHUNK_SIZE);
535 apr_size_t buflen = SVN__STREAM_CHUNK_SIZE;
536 apr_pool_t *linepool = svn_pool_create(pool);
537 apr_pool_t *revpool = svn_pool_create(pool);
538 apr_pool_t *nodepool = svn_pool_create(pool);
541 /* Make sure we can blindly invoke callbacks. */
542 parse_fns = complete_vtable(parse_fns, pool);
544 /* Start parsing process. */
545 SVN_ERR(svn_stream_readline(stream, &linebuf, "\n", &eof, linepool));
547 return stream_ran_dry();
549 /* The first two lines of the stream are the dumpfile-format version
550 number, and a blank line. To preserve backward compatibility,
551 don't assume the existence of newer parser-vtable functions. */
552 SVN_ERR(parse_format_version(&version, linebuf->data));
553 if (parse_fns->magic_header_record != NULL)
554 SVN_ERR(parse_fns->magic_header_record(version, parse_baton, pool));
556 /* A dumpfile "record" is defined to be a header-block of
557 rfc822-style headers, possibly followed by a content-block.
559 - A header-block is always terminated by a single blank line (\n\n)
561 - We know whether the record has a content-block by looking for
562 a 'Content-length:' header. The content-block will always be
563 of a specific length, plus an extra newline.
565 Once a record is fully sucked from the stream, an indeterminate
566 number of blank lines (or lines that begin with whitespace) may
567 follow before the next record (or the end of the stream.)
574 svn_boolean_t found_node = FALSE;
575 svn_boolean_t old_v1_with_cl = FALSE;
576 const char *content_length;
580 svn_filesize_t actual_prop_length;
582 /* Clear our per-line pool. */
583 svn_pool_clear(linepool);
585 /* Check for cancellation. */
587 SVN_ERR(cancel_func(cancel_baton));
589 /* Keep reading blank lines until we discover a new record, or until
590 the stream runs out. */
591 SVN_ERR(svn_stream_readline(stream, &linebuf, "\n", &eof, linepool));
595 if (svn_stringbuf_isempty(linebuf))
596 break; /* end of stream, go home. */
598 return stream_ran_dry();
601 if ((linebuf->len == 0) || (svn_ctype_isspace(linebuf->data[0])))
602 continue; /* empty line ... loop */
604 /*** Found the beginning of a new record. ***/
606 /* The last line we read better be a header of some sort.
607 Read the whole header-block into a hash. */
608 SVN_ERR(read_header_block(stream, linebuf, &headers, linepool));
610 /*** Handle the various header blocks. ***/
612 /* Is this a revision record? */
613 if (svn_hash_gets(headers, SVN_REPOS_DUMPFILE_REVISION_NUMBER))
615 /* If we already have a rev_baton open, we need to close it
616 and clear the per-revision subpool. */
617 if (rev_baton != NULL)
619 SVN_ERR(parse_fns->close_revision(rev_baton));
620 svn_pool_clear(revpool);
623 SVN_ERR(parse_fns->new_revision_record(&rev_baton,
624 headers, parse_baton,
627 /* Or is this, perhaps, a node record? */
628 else if (svn_hash_gets(headers, SVN_REPOS_DUMPFILE_NODE_PATH))
630 SVN_ERR(parse_fns->new_node_record(&node_baton,
636 /* Or is this the repos UUID? */
637 else if ((value = svn_hash_gets(headers, SVN_REPOS_DUMPFILE_UUID)))
639 SVN_ERR(parse_fns->uuid_record(value, parse_baton, pool));
641 /* Or perhaps a dumpfile format? */
642 /* ### TODO: use parse_format_version */
643 else if ((value = svn_hash_gets(headers,
644 SVN_REPOS_DUMPFILE_MAGIC_HEADER)))
646 /* ### someday, switch modes of operation here. */
647 SVN_ERR(svn_cstring_atoi(&version, value));
649 /* Or is this bogosity?! */
652 /* What the heck is this record?!? */
653 return svn_error_create(SVN_ERR_STREAM_MALFORMED_DATA, NULL,
654 _("Unrecognized record type in stream"));
657 /* Need 3 values below to determine v1 dump type
659 Old (pre 0.14?) v1 dumps don't have Prop-content-length
660 and Text-content-length fields, but always have a properties
661 block in a block with Content-Length > 0 */
663 content_length = svn_hash_gets(headers,
664 SVN_REPOS_DUMPFILE_CONTENT_LENGTH);
665 prop_cl = svn_hash_gets(headers, SVN_REPOS_DUMPFILE_PROP_CONTENT_LENGTH);
666 text_cl = svn_hash_gets(headers, SVN_REPOS_DUMPFILE_TEXT_CONTENT_LENGTH);
668 version == 1 && content_length && ! prop_cl && ! text_cl;
670 /* Is there a props content-block to parse? */
671 if (prop_cl || old_v1_with_cl)
673 const char *delta = svn_hash_gets(headers,
674 SVN_REPOS_DUMPFILE_PROP_DELTA);
675 svn_boolean_t is_delta = (delta && strcmp(delta, "true") == 0);
677 /* First, remove all node properties, unless this is a delta
679 if (found_node && !is_delta)
680 SVN_ERR(parse_fns->remove_node_props(node_baton));
682 SVN_ERR(parse_property_block
684 svn__atoui64(prop_cl ? prop_cl : content_length),
686 found_node ? node_baton : rev_baton,
690 found_node ? nodepool : revpool));
693 /* Is there a text content-block to parse? */
696 const char *delta = svn_hash_gets(headers,
697 SVN_REPOS_DUMPFILE_TEXT_DELTA);
698 svn_boolean_t is_delta = FALSE;
699 if (! deltas_are_text)
700 is_delta = (delta && strcmp(delta, "true") == 0);
702 SVN_ERR(parse_text_block(stream,
703 svn__atoui64(text_cl),
706 found_node ? node_baton : rev_baton,
709 found_node ? nodepool : revpool));
711 else if (old_v1_with_cl)
713 /* An old-v1 block with a Content-length might have a text block.
714 If the property block did not consume all the bytes of the
715 Content-length, then it clearly does have a text block.
716 If not, then we must deduce whether we have an *empty* text
717 block or an *absent* text block. The rules are:
718 - "Node-kind: file" blocks have an empty (i.e. present, but
719 zero-length) text block, since they represent a file
720 modification. Note that file-copied-text-unmodified blocks
721 have no Content-length - even if they should have contained
722 a modified property block, the pre-0.14 dumper forgets to
723 dump the modified properties.
724 - If it is not a file node, then it is a revision or directory,
725 and so has an absent text block.
727 const char *node_kind;
728 svn_filesize_t cl_value = svn__atoui64(content_length)
729 - actual_prop_length;
732 ((node_kind = svn_hash_gets(headers,
733 SVN_REPOS_DUMPFILE_NODE_KIND))
734 && strcmp(node_kind, "file") == 0)
736 SVN_ERR(parse_text_block(stream,
740 found_node ? node_baton : rev_baton,
743 found_node ? nodepool : revpool));
746 /* if we have a content-length header, did we read all of it?
747 in case of an old v1, we *always* read all of it, because
748 text-content-length == content-length - prop-content-length
750 if (content_length && ! old_v1_with_cl)
752 apr_size_t rlen, num_to_read;
753 svn_filesize_t remaining =
754 svn__atoui64(content_length) -
755 (prop_cl ? svn__atoui64(prop_cl) : 0) -
756 (text_cl ? svn__atoui64(text_cl) : 0);
760 return svn_error_create(SVN_ERR_STREAM_MALFORMED_DATA, NULL,
761 _("Sum of subblock sizes larger than "
762 "total block content length"));
764 /* Consume remaining bytes in this content block */
765 while (remaining > 0)
767 if (remaining >= (svn_filesize_t)buflen)
770 rlen = (apr_size_t) remaining;
773 SVN_ERR(svn_stream_read_full(stream, buffer, &rlen));
775 if (rlen != num_to_read)
776 return stream_ran_dry();
780 /* If we just finished processing a node record, we need to
781 close the node record and clear the per-node subpool. */
784 SVN_ERR(parse_fns->close_node(node_baton));
785 svn_pool_clear(nodepool);
788 /*** End of processing for one record. ***/
790 } /* end of stream */
792 /* Close out whatever revision we're in. */
793 if (rev_baton != NULL)
794 SVN_ERR(parse_fns->close_revision(rev_baton));
796 svn_pool_destroy(linepool);
797 svn_pool_destroy(revpool);
798 svn_pool_destroy(nodepool);