1 /* load.c --- parsing a 'dumpfile'-formatted stream.
3 * ====================================================================
4 * Licensed to the Apache Software Foundation (ASF) under one
5 * or more contributor license agreements. See the NOTICE file
6 * distributed with this work for additional information
7 * regarding copyright ownership. The ASF licenses this file
8 * to you under the Apache License, Version 2.0 (the
9 * "License"); you may not use this file except in compliance
10 * with the License. You may obtain a copy of the License at
12 * http://www.apache.org/licenses/LICENSE-2.0
14 * Unless required by applicable law or agreed to in writing,
15 * software distributed under the License is distributed on an
16 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
17 * KIND, either express or implied. See the License for the
18 * specific language governing permissions and limitations
20 * ====================================================================
24 #include "svn_private_config.h"
26 #include "svn_pools.h"
27 #include "svn_error.h"
29 #include "svn_repos.h"
30 #include "svn_string.h"
32 #include "svn_props.h"
34 #include "svn_private_config.h"
35 #include "svn_mergeinfo.h"
36 #include "svn_checksum.h"
37 #include "svn_subst.h"
38 #include "svn_ctype.h"
42 #include "private/svn_dep_compat.h"
43 #include "private/svn_mergeinfo_private.h"
45 /*----------------------------------------------------------------------*/
47 /** The parser and related helper funcs **/
53 return svn_error_create(SVN_ERR_INCOMPLETE_DATA, NULL,
54 _("Premature end of content data in dumpstream"));
58 stream_malformed(void)
60 return svn_error_create(SVN_ERR_STREAM_MALFORMED_DATA, NULL,
61 _("Dumpstream data appears to be malformed"));
64 /* Allocate a new hash *HEADERS in POOL, and read a series of
65 RFC822-style headers from STREAM. Duplicate each header's name and
66 value into POOL and store in hash as a const char * ==> const char *.
68 The headers are assumed to be terminated by a single blank line,
69 which will be permanently sucked from the stream and tossed.
71 If the caller has already read in the first header line, it should
72 be passed in as FIRST_HEADER. If not, pass NULL instead.
75 read_header_block(svn_stream_t *stream,
76 svn_stringbuf_t *first_header,
80 *headers = apr_hash_make(pool);
84 svn_stringbuf_t *header_str;
85 const char *name, *value;
89 if (first_header != NULL)
91 header_str = first_header;
92 first_header = NULL; /* so we never visit this block again. */
97 /* Read the next line into a stringbuf. */
98 SVN_ERR(svn_stream_readline(stream, &header_str, "\n", &eof, pool));
100 if (svn_stringbuf_isempty(header_str))
101 break; /* end of header block */
103 return stream_ran_dry();
105 /* Find the next colon in the stringbuf. */
106 while (header_str->data[i] != ':')
108 if (header_str->data[i] == '\0')
109 return svn_error_createf(SVN_ERR_STREAM_MALFORMED_DATA, NULL,
110 _("Dump stream contains a malformed "
111 "header (with no ':') at '%.20s'"),
115 /* Create a 'name' string and point to it. */
116 header_str->data[i] = '\0';
117 name = header_str->data;
119 /* Skip over the NULL byte and the space following it. */
121 if (i > header_str->len)
122 return svn_error_createf(SVN_ERR_STREAM_MALFORMED_DATA, NULL,
123 _("Dump stream contains a malformed "
124 "header (with no value) at '%.20s'"),
127 /* Point to the 'value' string. */
128 value = header_str->data + i;
130 /* Store name/value in hash. */
131 svn_hash_sets(*headers, name, value);
138 /* Set *PBUF to a string of length LEN, allocated in POOL, read from STREAM.
139 Also read a newline from STREAM and increase *ACTUAL_LEN by the total
140 number of bytes read from STREAM. */
142 read_key_or_val(char **pbuf,
143 svn_filesize_t *actual_length,
144 svn_stream_t *stream,
148 char *buf = apr_pcalloc(pool, len + 1);
153 SVN_ERR(svn_stream_read(stream, buf, &numread));
154 *actual_length += numread;
156 return svn_error_trace(stream_ran_dry());
159 /* Suck up extra newline after key data */
161 SVN_ERR(svn_stream_read(stream, &c, &numread));
162 *actual_length += numread;
164 return svn_error_trace(stream_ran_dry());
166 return svn_error_trace(stream_malformed());
173 /* Read CONTENT_LENGTH bytes from STREAM, parsing the bytes as an
174 encoded Subversion properties hash, and making multiple calls to
175 PARSE_FNS->set_*_property on RECORD_BATON (depending on the value
178 Set *ACTUAL_LENGTH to the number of bytes consumed from STREAM.
179 If an error is returned, the value of *ACTUAL_LENGTH is undefined.
181 Use POOL for all allocations. */
183 parse_property_block(svn_stream_t *stream,
184 svn_filesize_t content_length,
185 const svn_repos_parse_fns3_t *parse_fns,
188 svn_boolean_t is_node,
189 svn_filesize_t *actual_length,
192 svn_stringbuf_t *strbuf;
193 apr_pool_t *proppool = svn_pool_create(pool);
196 while (content_length != *actual_length)
198 char *buf; /* a pointer into the stringbuf's data */
201 svn_pool_clear(proppool);
203 /* Read a key length line. (Actually, it might be PROPS_END). */
204 SVN_ERR(svn_stream_readline(stream, &strbuf, "\n", &eof, proppool));
208 /* We could just use stream_ran_dry() or stream_malformed(),
209 but better to give a non-generic property block error. */
210 return svn_error_create
211 (SVN_ERR_STREAM_MALFORMED_DATA, NULL,
212 _("Incomplete or unterminated property block"));
215 *actual_length += (strbuf->len + 1); /* +1 because we read a \n too. */
218 if (! strcmp(buf, "PROPS-END"))
219 break; /* no more properties. */
221 else if ((buf[0] == 'K') && (buf[1] == ' '))
226 SVN_ERR(svn_cstring_strtoui64(&len, buf + 2, 0, APR_SIZE_MAX, 10));
227 SVN_ERR(read_key_or_val(&keybuf, actual_length,
228 stream, (apr_size_t)len, proppool));
230 /* Read a val length line */
231 SVN_ERR(svn_stream_readline(stream, &strbuf, "\n", &eof, proppool));
233 return stream_ran_dry();
235 *actual_length += (strbuf->len + 1); /* +1 because we read \n too */
238 if ((buf[0] == 'V') && (buf[1] == ' '))
240 svn_string_t propstring;
244 SVN_ERR(svn_cstring_atoi64(&val, buf + 2));
245 propstring.len = (apr_size_t)val;
246 SVN_ERR(read_key_or_val(&valbuf, actual_length,
247 stream, propstring.len, proppool));
248 propstring.data = valbuf;
250 /* Now, send the property pair to the vtable! */
253 SVN_ERR(parse_fns->set_node_property(record_baton,
259 SVN_ERR(parse_fns->set_revision_property(record_baton,
265 return stream_malformed(); /* didn't find expected 'V' line */
267 else if ((buf[0] == 'D') && (buf[1] == ' '))
272 SVN_ERR(svn_cstring_strtoui64(&len, buf + 2, 0, APR_SIZE_MAX, 10));
273 SVN_ERR(read_key_or_val(&keybuf, actual_length,
274 stream, (apr_size_t)len, proppool));
276 /* We don't expect these in revision properties, and if we see
277 one when we don't have a delete_node_property callback,
278 then we're seeing a v3 feature in a v2 dump. */
279 if (!is_node || !parse_fns->delete_node_property)
280 return stream_malformed();
282 SVN_ERR(parse_fns->delete_node_property(record_baton, keybuf));
285 return stream_malformed(); /* didn't find expected 'K' line */
289 svn_pool_destroy(proppool);
294 /* Read CONTENT_LENGTH bytes from STREAM, and use
295 PARSE_FNS->set_fulltext to push those bytes as replace fulltext for
296 a node. Use BUFFER/BUFLEN to push the fulltext in "chunks".
298 Use POOL for all allocations. */
300 parse_text_block(svn_stream_t *stream,
301 svn_filesize_t content_length,
302 svn_boolean_t is_delta,
303 const svn_repos_parse_fns3_t *parse_fns,
309 svn_stream_t *text_stream = NULL;
310 apr_size_t num_to_read, rlen, wlen;
314 svn_txdelta_window_handler_t wh;
317 SVN_ERR(parse_fns->apply_textdelta(&wh, &whb, record_baton));
319 text_stream = svn_txdelta_parse_svndiff(wh, whb, TRUE, pool);
323 /* Get a stream to which we can push the data. */
324 SVN_ERR(parse_fns->set_fulltext(&text_stream, record_baton));
327 /* If there are no contents to read, just write an empty buffer
328 through our callback. */
329 if (content_length == 0)
333 SVN_ERR(svn_stream_write(text_stream, "", &wlen));
336 /* Regardless of whether or not we have a sink for our data, we
338 while (content_length)
340 if (content_length >= (svn_filesize_t)buflen)
343 rlen = (apr_size_t) content_length;
346 SVN_ERR(svn_stream_read(stream, buffer, &rlen));
347 content_length -= rlen;
348 if (rlen != num_to_read)
349 return stream_ran_dry();
353 /* write however many bytes you read. */
355 SVN_ERR(svn_stream_write(text_stream, buffer, &wlen));
358 /* Uh oh, didn't write as many bytes as we read. */
359 return svn_error_create(SVN_ERR_STREAM_UNEXPECTED_EOF, NULL,
360 _("Unexpected EOF writing contents"));
365 /* If we opened a stream, we must close it. */
367 SVN_ERR(svn_stream_close(text_stream));
374 /* Parse VERSIONSTRING and verify that we support the dumpfile format
375 version number, setting *VERSION appropriately. */
377 parse_format_version(int *version,
378 const char *versionstring)
380 static const int magic_len = sizeof(SVN_REPOS_DUMPFILE_MAGIC_HEADER) - 1;
381 const char *p = strchr(versionstring, ':');
385 || p != (versionstring + magic_len)
386 || strncmp(versionstring,
387 SVN_REPOS_DUMPFILE_MAGIC_HEADER,
389 return svn_error_createf(SVN_ERR_STREAM_MALFORMED_DATA, NULL,
390 _("Malformed dumpfile header '%s'"),
393 SVN_ERR(svn_cstring_atoi(&value, p + 1));
395 if (value > SVN_REPOS_DUMPFILE_FORMAT_VERSION)
396 return svn_error_createf(SVN_ERR_STREAM_MALFORMED_DATA, NULL,
397 _("Unsupported dumpfile version: %d"),
406 /*----------------------------------------------------------------------*/
408 /** The public routines **/
411 svn_repos_parse_dumpstream3(svn_stream_t *stream,
412 const svn_repos_parse_fns3_t *parse_fns,
414 svn_boolean_t deltas_are_text,
415 svn_cancel_func_t cancel_func,
420 svn_stringbuf_t *linebuf;
421 void *rev_baton = NULL;
422 char *buffer = apr_palloc(pool, SVN__STREAM_CHUNK_SIZE);
423 apr_size_t buflen = SVN__STREAM_CHUNK_SIZE;
424 apr_pool_t *linepool = svn_pool_create(pool);
425 apr_pool_t *revpool = svn_pool_create(pool);
426 apr_pool_t *nodepool = svn_pool_create(pool);
429 SVN_ERR(svn_stream_readline(stream, &linebuf, "\n", &eof, linepool));
431 return stream_ran_dry();
433 /* The first two lines of the stream are the dumpfile-format version
434 number, and a blank line. To preserve backward compatibility,
435 don't assume the existence of newer parser-vtable functions. */
436 SVN_ERR(parse_format_version(&version, linebuf->data));
437 if (parse_fns->magic_header_record != NULL)
438 SVN_ERR(parse_fns->magic_header_record(version, parse_baton, pool));
440 /* A dumpfile "record" is defined to be a header-block of
441 rfc822-style headers, possibly followed by a content-block.
443 - A header-block is always terminated by a single blank line (\n\n)
445 - We know whether the record has a content-block by looking for
446 a 'Content-length:' header. The content-block will always be
447 of a specific length, plus an extra newline.
449 Once a record is fully sucked from the stream, an indeterminate
450 number of blank lines (or lines that begin with whitespace) may
451 follow before the next record (or the end of the stream.)
458 svn_boolean_t found_node = FALSE;
459 svn_boolean_t old_v1_with_cl = FALSE;
460 const char *content_length;
464 svn_filesize_t actual_prop_length;
466 /* Clear our per-line pool. */
467 svn_pool_clear(linepool);
469 /* Check for cancellation. */
471 SVN_ERR(cancel_func(cancel_baton));
473 /* Keep reading blank lines until we discover a new record, or until
474 the stream runs out. */
475 SVN_ERR(svn_stream_readline(stream, &linebuf, "\n", &eof, linepool));
479 if (svn_stringbuf_isempty(linebuf))
480 break; /* end of stream, go home. */
482 return stream_ran_dry();
485 if ((linebuf->len == 0) || (svn_ctype_isspace(linebuf->data[0])))
486 continue; /* empty line ... loop */
488 /*** Found the beginning of a new record. ***/
490 /* The last line we read better be a header of some sort.
491 Read the whole header-block into a hash. */
492 SVN_ERR(read_header_block(stream, linebuf, &headers, linepool));
494 /*** Handle the various header blocks. ***/
496 /* Is this a revision record? */
497 if (svn_hash_gets(headers, SVN_REPOS_DUMPFILE_REVISION_NUMBER))
499 /* If we already have a rev_baton open, we need to close it
500 and clear the per-revision subpool. */
501 if (rev_baton != NULL)
503 SVN_ERR(parse_fns->close_revision(rev_baton));
504 svn_pool_clear(revpool);
507 SVN_ERR(parse_fns->new_revision_record(&rev_baton,
508 headers, parse_baton,
511 /* Or is this, perhaps, a node record? */
512 else if (svn_hash_gets(headers, SVN_REPOS_DUMPFILE_NODE_PATH))
514 SVN_ERR(parse_fns->new_node_record(&node_baton,
520 /* Or is this the repos UUID? */
521 else if ((value = svn_hash_gets(headers, SVN_REPOS_DUMPFILE_UUID)))
523 SVN_ERR(parse_fns->uuid_record(value, parse_baton, pool));
525 /* Or perhaps a dumpfile format? */
526 /* ### TODO: use parse_format_version */
527 else if ((value = svn_hash_gets(headers,
528 SVN_REPOS_DUMPFILE_MAGIC_HEADER)))
530 /* ### someday, switch modes of operation here. */
531 SVN_ERR(svn_cstring_atoi(&version, value));
533 /* Or is this bogosity?! */
536 /* What the heck is this record?!? */
537 return svn_error_create(SVN_ERR_STREAM_MALFORMED_DATA, NULL,
538 _("Unrecognized record type in stream"));
541 /* Need 3 values below to determine v1 dump type
543 Old (pre 0.14?) v1 dumps don't have Prop-content-length
544 and Text-content-length fields, but always have a properties
545 block in a block with Content-Length > 0 */
547 content_length = svn_hash_gets(headers,
548 SVN_REPOS_DUMPFILE_CONTENT_LENGTH);
549 prop_cl = svn_hash_gets(headers, SVN_REPOS_DUMPFILE_PROP_CONTENT_LENGTH);
550 text_cl = svn_hash_gets(headers, SVN_REPOS_DUMPFILE_TEXT_CONTENT_LENGTH);
552 version == 1 && content_length && ! prop_cl && ! text_cl;
554 /* Is there a props content-block to parse? */
555 if (prop_cl || old_v1_with_cl)
557 const char *delta = svn_hash_gets(headers,
558 SVN_REPOS_DUMPFILE_PROP_DELTA);
559 svn_boolean_t is_delta = (delta && strcmp(delta, "true") == 0);
561 /* First, remove all node properties, unless this is a delta
563 if (found_node && !is_delta)
564 SVN_ERR(parse_fns->remove_node_props(node_baton));
566 SVN_ERR(parse_property_block
568 svn__atoui64(prop_cl ? prop_cl : content_length),
570 found_node ? node_baton : rev_baton,
574 found_node ? nodepool : revpool));
577 /* Is there a text content-block to parse? */
580 const char *delta = svn_hash_gets(headers,
581 SVN_REPOS_DUMPFILE_TEXT_DELTA);
582 svn_boolean_t is_delta = FALSE;
583 if (! deltas_are_text)
584 is_delta = (delta && strcmp(delta, "true") == 0);
586 SVN_ERR(parse_text_block(stream,
587 svn__atoui64(text_cl),
590 found_node ? node_baton : rev_baton,
593 found_node ? nodepool : revpool));
595 else if (old_v1_with_cl)
597 /* An old-v1 block with a Content-length might have a text block.
598 If the property block did not consume all the bytes of the
599 Content-length, then it clearly does have a text block.
600 If not, then we must deduce whether we have an *empty* text
601 block or an *absent* text block. The rules are:
602 - "Node-kind: file" blocks have an empty (i.e. present, but
603 zero-length) text block, since they represent a file
604 modification. Note that file-copied-text-unmodified blocks
605 have no Content-length - even if they should have contained
606 a modified property block, the pre-0.14 dumper forgets to
607 dump the modified properties.
608 - If it is not a file node, then it is a revision or directory,
609 and so has an absent text block.
611 const char *node_kind;
612 svn_filesize_t cl_value = svn__atoui64(content_length)
613 - actual_prop_length;
616 ((node_kind = svn_hash_gets(headers,
617 SVN_REPOS_DUMPFILE_NODE_KIND))
618 && strcmp(node_kind, "file") == 0)
620 SVN_ERR(parse_text_block(stream,
624 found_node ? node_baton : rev_baton,
627 found_node ? nodepool : revpool));
630 /* if we have a content-length header, did we read all of it?
631 in case of an old v1, we *always* read all of it, because
632 text-content-length == content-length - prop-content-length
634 if (content_length && ! old_v1_with_cl)
636 apr_size_t rlen, num_to_read;
637 svn_filesize_t remaining =
638 svn__atoui64(content_length) -
639 (prop_cl ? svn__atoui64(prop_cl) : 0) -
640 (text_cl ? svn__atoui64(text_cl) : 0);
644 return svn_error_create(SVN_ERR_STREAM_MALFORMED_DATA, NULL,
645 _("Sum of subblock sizes larger than "
646 "total block content length"));
648 /* Consume remaining bytes in this content block */
649 while (remaining > 0)
651 if (remaining >= (svn_filesize_t)buflen)
654 rlen = (apr_size_t) remaining;
657 SVN_ERR(svn_stream_read(stream, buffer, &rlen));
659 if (rlen != num_to_read)
660 return stream_ran_dry();
664 /* If we just finished processing a node record, we need to
665 close the node record and clear the per-node subpool. */
668 SVN_ERR(parse_fns->close_node(node_baton));
669 svn_pool_clear(nodepool);
672 /*** End of processing for one record. ***/
674 } /* end of stream */
676 /* Close out whatever revision we're in. */
677 if (rev_baton != NULL)
678 SVN_ERR(parse_fns->close_revision(rev_baton));
680 svn_pool_destroy(linepool);
681 svn_pool_destroy(revpool);
682 svn_pool_destroy(nodepool);