1 /* load.c --- parsing a 'dumpfile'-formatted stream.
3 * ====================================================================
4 * Licensed to the Apache Software Foundation (ASF) under one
5 * or more contributor license agreements. See the NOTICE file
6 * distributed with this work for additional information
7 * regarding copyright ownership. The ASF licenses this file
8 * to you under the Apache License, Version 2.0 (the
9 * "License"); you may not use this file except in compliance
10 * with the License. You may obtain a copy of the License at
12 * http://www.apache.org/licenses/LICENSE-2.0
14 * Unless required by applicable law or agreed to in writing,
15 * software distributed under the License is distributed on an
16 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
17 * KIND, either express or implied. See the License for the
18 * specific language governing permissions and limitations
20 * ====================================================================
27 #include "svn_pools.h"
28 #include "svn_error.h"
29 #include "svn_repos.h"
30 #include "svn_string.h"
32 #include "svn_private_config.h"
33 #include "svn_ctype.h"
35 #include "private/svn_dep_compat.h"
37 /*----------------------------------------------------------------------*/
39 /** The parser and related helper funcs **/
45 return svn_error_create(SVN_ERR_INCOMPLETE_DATA, NULL,
46 _("Premature end of content data in dumpstream"));
50 stream_malformed(void)
52 return svn_error_create(SVN_ERR_STREAM_MALFORMED_DATA, NULL,
53 _("Dumpstream data appears to be malformed"));
56 /* Allocate a new hash *HEADERS in POOL, and read a series of
57 RFC822-style headers from STREAM. Duplicate each header's name and
58 value into POOL and store in hash as a const char * ==> const char *.
60 The headers are assumed to be terminated by a single blank line,
61 which will be permanently sucked from the stream and tossed.
63 If the caller has already read in the first header line, it should
64 be passed in as FIRST_HEADER. If not, pass NULL instead.
67 read_header_block(svn_stream_t *stream,
68 svn_stringbuf_t *first_header,
72 *headers = apr_hash_make(pool);
76 svn_stringbuf_t *header_str;
77 const char *name, *value;
81 if (first_header != NULL)
83 header_str = first_header;
84 first_header = NULL; /* so we never visit this block again. */
89 /* Read the next line into a stringbuf. */
90 SVN_ERR(svn_stream_readline(stream, &header_str, "\n", &eof, pool));
92 if (svn_stringbuf_isempty(header_str))
93 break; /* end of header block */
95 return stream_ran_dry();
97 /* Find the next colon in the stringbuf. */
98 while (header_str->data[i] != ':')
100 if (header_str->data[i] == '\0')
101 return svn_error_createf(SVN_ERR_STREAM_MALFORMED_DATA, NULL,
102 _("Dump stream contains a malformed "
103 "header (with no ':') at '%.20s'"),
107 /* Create a 'name' string and point to it. */
108 header_str->data[i] = '\0';
109 name = header_str->data;
111 /* Skip over the NULL byte and the space following it. */
113 if (i > header_str->len)
114 return svn_error_createf(SVN_ERR_STREAM_MALFORMED_DATA, NULL,
115 _("Dump stream contains a malformed "
116 "header (with no value) at '%.20s'"),
119 /* Point to the 'value' string. */
120 value = header_str->data + i;
122 /* Store name/value in hash. */
123 svn_hash_sets(*headers, name, value);
130 /* Set *PBUF to a string of length LEN, allocated in POOL, read from STREAM.
131 Also read a newline from STREAM and increase *ACTUAL_LEN by the total
132 number of bytes read from STREAM. */
134 read_key_or_val(char **pbuf,
135 svn_filesize_t *actual_length,
136 svn_stream_t *stream,
140 char *buf = apr_pcalloc(pool, len + 1);
145 SVN_ERR(svn_stream_read_full(stream, buf, &numread));
146 *actual_length += numread;
148 return svn_error_trace(stream_ran_dry());
151 /* Suck up extra newline after key data */
153 SVN_ERR(svn_stream_read_full(stream, &c, &numread));
154 *actual_length += numread;
156 return svn_error_trace(stream_ran_dry());
158 return svn_error_trace(stream_malformed());
165 /* Read CONTENT_LENGTH bytes from STREAM, parsing the bytes as an
166 encoded Subversion properties hash, and making multiple calls to
167 PARSE_FNS->set_*_property on RECORD_BATON (depending on the value
170 Set *ACTUAL_LENGTH to the number of bytes consumed from STREAM.
171 If an error is returned, the value of *ACTUAL_LENGTH is undefined.
173 Use POOL for all allocations. */
175 parse_property_block(svn_stream_t *stream,
176 svn_filesize_t content_length,
177 const svn_repos_parse_fns3_t *parse_fns,
180 svn_boolean_t is_node,
181 svn_filesize_t *actual_length,
184 svn_stringbuf_t *strbuf;
185 apr_pool_t *proppool = svn_pool_create(pool);
188 while (content_length != *actual_length)
190 char *buf; /* a pointer into the stringbuf's data */
193 svn_pool_clear(proppool);
195 /* Read a key length line. (Actually, it might be PROPS_END). */
196 SVN_ERR(svn_stream_readline(stream, &strbuf, "\n", &eof, proppool));
200 /* We could just use stream_ran_dry() or stream_malformed(),
201 but better to give a non-generic property block error. */
202 return svn_error_create
203 (SVN_ERR_STREAM_MALFORMED_DATA, NULL,
204 _("Incomplete or unterminated property block"));
207 *actual_length += (strbuf->len + 1); /* +1 because we read a \n too. */
210 if (! strcmp(buf, "PROPS-END"))
211 break; /* no more properties. */
213 else if ((buf[0] == 'K') && (buf[1] == ' '))
218 SVN_ERR(svn_cstring_strtoui64(&len, buf + 2, 0, APR_SIZE_MAX, 10));
219 SVN_ERR(read_key_or_val(&keybuf, actual_length,
220 stream, (apr_size_t)len, proppool));
222 /* Read a val length line */
223 SVN_ERR(svn_stream_readline(stream, &strbuf, "\n", &eof, proppool));
225 return stream_ran_dry();
227 *actual_length += (strbuf->len + 1); /* +1 because we read \n too */
230 if ((buf[0] == 'V') && (buf[1] == ' '))
232 svn_string_t propstring;
236 SVN_ERR(svn_cstring_atoi64(&val, buf + 2));
237 propstring.len = (apr_size_t)val;
238 SVN_ERR(read_key_or_val(&valbuf, actual_length,
239 stream, propstring.len, proppool));
240 propstring.data = valbuf;
242 /* Now, send the property pair to the vtable! */
245 SVN_ERR(parse_fns->set_node_property(record_baton,
251 SVN_ERR(parse_fns->set_revision_property(record_baton,
257 return stream_malformed(); /* didn't find expected 'V' line */
259 else if ((buf[0] == 'D') && (buf[1] == ' '))
264 SVN_ERR(svn_cstring_strtoui64(&len, buf + 2, 0, APR_SIZE_MAX, 10));
265 SVN_ERR(read_key_or_val(&keybuf, actual_length,
266 stream, (apr_size_t)len, proppool));
268 /* We don't expect these in revision properties, and if we see
269 one when we don't have a delete_node_property callback,
270 then we're seeing a v3 feature in a v2 dump. */
271 if (!is_node || !parse_fns->delete_node_property)
272 return stream_malformed();
274 SVN_ERR(parse_fns->delete_node_property(record_baton, keybuf));
277 return stream_malformed(); /* didn't find expected 'K' line */
281 svn_pool_destroy(proppool);
286 /* Read CONTENT_LENGTH bytes from STREAM. If IS_DELTA is true, use
287 PARSE_FNS->apply_textdelta to push a text delta, otherwise use
288 PARSE_FNS->set_fulltext to push those bytes as replace fulltext for
289 a node. Use BUFFER/BUFLEN to push the fulltext in "chunks".
291 Use POOL for all allocations. */
293 parse_text_block(svn_stream_t *stream,
294 svn_filesize_t content_length,
295 svn_boolean_t is_delta,
296 const svn_repos_parse_fns3_t *parse_fns,
302 svn_stream_t *text_stream = NULL;
303 apr_size_t num_to_read, rlen, wlen;
307 svn_txdelta_window_handler_t wh;
310 SVN_ERR(parse_fns->apply_textdelta(&wh, &whb, record_baton));
312 text_stream = svn_txdelta_parse_svndiff(wh, whb, TRUE, pool);
316 /* Get a stream to which we can push the data. */
317 SVN_ERR(parse_fns->set_fulltext(&text_stream, record_baton));
320 /* Regardless of whether or not we have a sink for our data, we
322 while (content_length)
324 if (content_length >= (svn_filesize_t)buflen)
327 rlen = (apr_size_t) content_length;
330 SVN_ERR(svn_stream_read_full(stream, buffer, &rlen));
331 content_length -= rlen;
332 if (rlen != num_to_read)
333 return stream_ran_dry();
337 /* write however many bytes you read. */
339 SVN_ERR(svn_stream_write(text_stream, buffer, &wlen));
342 /* Uh oh, didn't write as many bytes as we read. */
343 return svn_error_create(SVN_ERR_STREAM_UNEXPECTED_EOF, NULL,
344 _("Unexpected EOF writing contents"));
349 /* If we opened a stream, we must close it. */
351 SVN_ERR(svn_stream_close(text_stream));
358 /* Parse VERSIONSTRING and verify that we support the dumpfile format
359 version number, setting *VERSION appropriately. */
361 parse_format_version(int *version,
362 const char *versionstring)
364 static const int magic_len = sizeof(SVN_REPOS_DUMPFILE_MAGIC_HEADER) - 1;
365 const char *p = strchr(versionstring, ':');
369 || p != (versionstring + magic_len)
370 || strncmp(versionstring,
371 SVN_REPOS_DUMPFILE_MAGIC_HEADER,
373 return svn_error_createf(SVN_ERR_STREAM_MALFORMED_DATA, NULL,
374 _("Malformed dumpfile header '%s'"),
377 SVN_ERR(svn_cstring_atoi(&value, p + 1));
379 if (value > SVN_REPOS_DUMPFILE_FORMAT_VERSION)
380 return svn_error_createf(SVN_ERR_STREAM_MALFORMED_DATA, NULL,
381 _("Unsupported dumpfile version: %d"),
390 /*----------------------------------------------------------------------*/
392 /** The public routines **/
395 svn_repos_parse_dumpstream3(svn_stream_t *stream,
396 const svn_repos_parse_fns3_t *parse_fns,
398 svn_boolean_t deltas_are_text,
399 svn_cancel_func_t cancel_func,
404 svn_stringbuf_t *linebuf;
405 void *rev_baton = NULL;
406 char *buffer = apr_palloc(pool, SVN__STREAM_CHUNK_SIZE);
407 apr_size_t buflen = SVN__STREAM_CHUNK_SIZE;
408 apr_pool_t *linepool = svn_pool_create(pool);
409 apr_pool_t *revpool = svn_pool_create(pool);
410 apr_pool_t *nodepool = svn_pool_create(pool);
413 SVN_ERR(svn_stream_readline(stream, &linebuf, "\n", &eof, linepool));
415 return stream_ran_dry();
417 /* The first two lines of the stream are the dumpfile-format version
418 number, and a blank line. To preserve backward compatibility,
419 don't assume the existence of newer parser-vtable functions. */
420 SVN_ERR(parse_format_version(&version, linebuf->data));
421 if (parse_fns->magic_header_record != NULL)
422 SVN_ERR(parse_fns->magic_header_record(version, parse_baton, pool));
424 /* A dumpfile "record" is defined to be a header-block of
425 rfc822-style headers, possibly followed by a content-block.
427 - A header-block is always terminated by a single blank line (\n\n)
429 - We know whether the record has a content-block by looking for
430 a 'Content-length:' header. The content-block will always be
431 of a specific length, plus an extra newline.
433 Once a record is fully sucked from the stream, an indeterminate
434 number of blank lines (or lines that begin with whitespace) may
435 follow before the next record (or the end of the stream.)
442 svn_boolean_t found_node = FALSE;
443 svn_boolean_t old_v1_with_cl = FALSE;
444 const char *content_length;
448 svn_filesize_t actual_prop_length;
450 /* Clear our per-line pool. */
451 svn_pool_clear(linepool);
453 /* Check for cancellation. */
455 SVN_ERR(cancel_func(cancel_baton));
457 /* Keep reading blank lines until we discover a new record, or until
458 the stream runs out. */
459 SVN_ERR(svn_stream_readline(stream, &linebuf, "\n", &eof, linepool));
463 if (svn_stringbuf_isempty(linebuf))
464 break; /* end of stream, go home. */
466 return stream_ran_dry();
469 if ((linebuf->len == 0) || (svn_ctype_isspace(linebuf->data[0])))
470 continue; /* empty line ... loop */
472 /*** Found the beginning of a new record. ***/
474 /* The last line we read better be a header of some sort.
475 Read the whole header-block into a hash. */
476 SVN_ERR(read_header_block(stream, linebuf, &headers, linepool));
478 /*** Handle the various header blocks. ***/
480 /* Is this a revision record? */
481 if (svn_hash_gets(headers, SVN_REPOS_DUMPFILE_REVISION_NUMBER))
483 /* If we already have a rev_baton open, we need to close it
484 and clear the per-revision subpool. */
485 if (rev_baton != NULL)
487 SVN_ERR(parse_fns->close_revision(rev_baton));
488 svn_pool_clear(revpool);
491 SVN_ERR(parse_fns->new_revision_record(&rev_baton,
492 headers, parse_baton,
495 /* Or is this, perhaps, a node record? */
496 else if (svn_hash_gets(headers, SVN_REPOS_DUMPFILE_NODE_PATH))
498 SVN_ERR(parse_fns->new_node_record(&node_baton,
504 /* Or is this the repos UUID? */
505 else if ((value = svn_hash_gets(headers, SVN_REPOS_DUMPFILE_UUID)))
507 SVN_ERR(parse_fns->uuid_record(value, parse_baton, pool));
509 /* Or perhaps a dumpfile format? */
510 /* ### TODO: use parse_format_version */
511 else if ((value = svn_hash_gets(headers,
512 SVN_REPOS_DUMPFILE_MAGIC_HEADER)))
514 /* ### someday, switch modes of operation here. */
515 SVN_ERR(svn_cstring_atoi(&version, value));
517 /* Or is this bogosity?! */
520 /* What the heck is this record?!? */
521 return svn_error_create(SVN_ERR_STREAM_MALFORMED_DATA, NULL,
522 _("Unrecognized record type in stream"));
525 /* Need 3 values below to determine v1 dump type
527 Old (pre 0.14?) v1 dumps don't have Prop-content-length
528 and Text-content-length fields, but always have a properties
529 block in a block with Content-Length > 0 */
531 content_length = svn_hash_gets(headers,
532 SVN_REPOS_DUMPFILE_CONTENT_LENGTH);
533 prop_cl = svn_hash_gets(headers, SVN_REPOS_DUMPFILE_PROP_CONTENT_LENGTH);
534 text_cl = svn_hash_gets(headers, SVN_REPOS_DUMPFILE_TEXT_CONTENT_LENGTH);
536 version == 1 && content_length && ! prop_cl && ! text_cl;
538 /* Is there a props content-block to parse? */
539 if (prop_cl || old_v1_with_cl)
541 const char *delta = svn_hash_gets(headers,
542 SVN_REPOS_DUMPFILE_PROP_DELTA);
543 svn_boolean_t is_delta = (delta && strcmp(delta, "true") == 0);
545 /* First, remove all node properties, unless this is a delta
547 if (found_node && !is_delta)
548 SVN_ERR(parse_fns->remove_node_props(node_baton));
550 SVN_ERR(parse_property_block
552 svn__atoui64(prop_cl ? prop_cl : content_length),
554 found_node ? node_baton : rev_baton,
558 found_node ? nodepool : revpool));
561 /* Is there a text content-block to parse? */
564 const char *delta = svn_hash_gets(headers,
565 SVN_REPOS_DUMPFILE_TEXT_DELTA);
566 svn_boolean_t is_delta = FALSE;
567 if (! deltas_are_text)
568 is_delta = (delta && strcmp(delta, "true") == 0);
570 SVN_ERR(parse_text_block(stream,
571 svn__atoui64(text_cl),
574 found_node ? node_baton : rev_baton,
577 found_node ? nodepool : revpool));
579 else if (old_v1_with_cl)
581 /* An old-v1 block with a Content-length might have a text block.
582 If the property block did not consume all the bytes of the
583 Content-length, then it clearly does have a text block.
584 If not, then we must deduce whether we have an *empty* text
585 block or an *absent* text block. The rules are:
586 - "Node-kind: file" blocks have an empty (i.e. present, but
587 zero-length) text block, since they represent a file
588 modification. Note that file-copied-text-unmodified blocks
589 have no Content-length - even if they should have contained
590 a modified property block, the pre-0.14 dumper forgets to
591 dump the modified properties.
592 - If it is not a file node, then it is a revision or directory,
593 and so has an absent text block.
595 const char *node_kind;
596 svn_filesize_t cl_value = svn__atoui64(content_length)
597 - actual_prop_length;
600 ((node_kind = svn_hash_gets(headers,
601 SVN_REPOS_DUMPFILE_NODE_KIND))
602 && strcmp(node_kind, "file") == 0)
604 SVN_ERR(parse_text_block(stream,
608 found_node ? node_baton : rev_baton,
611 found_node ? nodepool : revpool));
614 /* if we have a content-length header, did we read all of it?
615 in case of an old v1, we *always* read all of it, because
616 text-content-length == content-length - prop-content-length
618 if (content_length && ! old_v1_with_cl)
620 apr_size_t rlen, num_to_read;
621 svn_filesize_t remaining =
622 svn__atoui64(content_length) -
623 (prop_cl ? svn__atoui64(prop_cl) : 0) -
624 (text_cl ? svn__atoui64(text_cl) : 0);
628 return svn_error_create(SVN_ERR_STREAM_MALFORMED_DATA, NULL,
629 _("Sum of subblock sizes larger than "
630 "total block content length"));
632 /* Consume remaining bytes in this content block */
633 while (remaining > 0)
635 if (remaining >= (svn_filesize_t)buflen)
638 rlen = (apr_size_t) remaining;
641 SVN_ERR(svn_stream_read_full(stream, buffer, &rlen));
643 if (rlen != num_to_read)
644 return stream_ran_dry();
648 /* If we just finished processing a node record, we need to
649 close the node record and clear the per-node subpool. */
652 SVN_ERR(parse_fns->close_node(node_baton));
653 svn_pool_clear(nodepool);
656 /*** End of processing for one record. ***/
658 } /* end of stream */
660 /* Close out whatever revision we're in. */
661 if (rev_baton != NULL)
662 SVN_ERR(parse_fns->close_revision(rev_baton));
664 svn_pool_destroy(linepool);
665 svn_pool_destroy(revpool);
666 svn_pool_destroy(nodepool);