1 /* reps-strings.c : intepreting representations with respect to strings
3 * ====================================================================
4 * Licensed to the Apache Software Foundation (ASF) under one
5 * or more contributor license agreements. See the NOTICE file
6 * distributed with this work for additional information
7 * regarding copyright ownership. The ASF licenses this file
8 * to you under the Apache License, Version 2.0 (the
9 * "License"); you may not use this file except in compliance
10 * with the License. You may obtain a copy of the License at
12 * http://www.apache.org/licenses/LICENSE-2.0
14 * Unless required by applicable law or agreed to in writing,
15 * software distributed under the License is distributed on an
16 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
17 * KIND, either express or implied. See the License for the
18 * specific language governing permissions and limitations
20 * ====================================================================
26 #include "svn_pools.h"
31 #include "reps-strings.h"
33 #include "bdb/reps-table.h"
34 #include "bdb/strings-table.h"
36 #include "../libsvn_fs/fs-loader.h"
38 #include "svn_private_config.h"
41 /*** Helper Functions ***/
44 /* Return non-zero iff REP is mutable under transaction TXN_ID. */
45 static svn_boolean_t rep_is_mutable(representation_t *rep,
48 if ((! rep->txn_id) || (strcmp(rep->txn_id, txn_id) != 0))
53 /* Helper macro that evaluates to an error message indicating that
54 the representation referred to by X has an unknown node kind. */
55 #define UNKNOWN_NODE_KIND(x) \
57 (SVN_ERR_FS_CORRUPT, NULL, \
58 _("Unknown node kind for representation '%s'"), x)
60 /* Return a `fulltext' representation, allocated in POOL, which
61 * references the string STR_KEY.
63 * If TXN_ID is non-zero and non-NULL, make the representation mutable
66 * If STR_KEY is non-null, copy it into an allocation from POOL.
68 * If MD5_CHECKSUM is non-null, use it as the MD5 checksum for the new
69 * rep; else initialize the rep with an all-zero (i.e., always
70 * successful) MD5 checksum.
72 * If SHA1_CHECKSUM is non-null, use it as the SHA1 checksum for the new
73 * rep; else initialize the rep with an all-zero (i.e., always
74 * successful) SHA1 checksum.
76 static representation_t *
77 make_fulltext_rep(const char *str_key,
79 svn_checksum_t *md5_checksum,
80 svn_checksum_t *sha1_checksum,
84 representation_t *rep = apr_pcalloc(pool, sizeof(*rep));
85 if (txn_id && *txn_id)
86 rep->txn_id = apr_pstrdup(pool, txn_id);
87 rep->kind = rep_kind_fulltext;
88 rep->md5_checksum = svn_checksum_dup(md5_checksum, pool);
89 rep->sha1_checksum = svn_checksum_dup(sha1_checksum, pool);
90 rep->contents.fulltext.string_key
91 = str_key ? apr_pstrdup(pool, str_key) : NULL;
96 /* Set *KEYS to an array of string keys gleaned from `delta'
97 representation REP. Allocate *KEYS in POOL. */
99 delta_string_keys(apr_array_header_t **keys,
100 const representation_t *rep,
105 apr_array_header_t *chunks;
107 if (rep->kind != rep_kind_delta)
108 return svn_error_create
109 (SVN_ERR_FS_GENERAL, NULL,
110 _("Representation is not of type 'delta'"));
112 /* Set up a convenience variable. */
113 chunks = rep->contents.delta.chunks;
115 /* Initialize *KEYS to an empty array. */
116 *keys = apr_array_make(pool, chunks->nelts, sizeof(key));
120 /* Now, push the string keys for each window into *KEYS */
121 for (i = 0; i < chunks->nelts; i++)
123 rep_delta_chunk_t *chunk = APR_ARRAY_IDX(chunks, i, rep_delta_chunk_t *);
125 key = apr_pstrdup(pool, chunk->string_key);
126 APR_ARRAY_PUSH(*keys, const char *) = key;
133 /* Delete the strings associated with array KEYS in FS as part of TRAIL. */
135 delete_strings(const apr_array_header_t *keys,
142 apr_pool_t *subpool = svn_pool_create(pool);
144 for (i = 0; i < keys->nelts; i++)
146 svn_pool_clear(subpool);
147 str_key = APR_ARRAY_IDX(keys, i, const char *);
148 SVN_ERR(svn_fs_bdb__string_delete(fs, str_key, trail, subpool));
150 svn_pool_destroy(subpool);
156 /*** Reading the contents from a representation. ***/
158 struct compose_handler_baton
160 /* The combined window, and the pool it's allocated from. */
161 svn_txdelta_window_t *window;
162 apr_pool_t *window_pool;
164 /* If the incoming window was self-compressed, and the combined WINDOW
165 exists from previous iterations, SOURCE_BUF will point to the
166 expanded self-compressed window. */
169 /* The trail for this operation. WINDOW_POOL will be a child of
170 TRAIL->pool. No allocations will be made from TRAIL->pool itself. */
173 /* TRUE when no more windows have to be read/combined. */
176 /* TRUE if we've just started reading a new window. We need this
177 because the svndiff handler will push a NULL window at the end of
178 the stream, and we have to ignore that; but we must also know
179 when it's appropriate to push a NULL window at the combiner. */
184 /* Handle one window. If BATON is emtpy, copy the WINDOW into it;
185 otherwise, combine WINDOW with the one in BATON, unless WINDOW
186 is self-compressed (i.e., does not copy from the source view),
187 in which case expand. */
190 compose_handler(svn_txdelta_window_t *window, void *baton)
192 struct compose_handler_baton *cb = baton;
193 SVN_ERR_ASSERT(!cb->done || window == NULL);
194 SVN_ERR_ASSERT(cb->trail && cb->trail->pool);
196 if (!cb->init && !window)
199 /* We should never get here if we've already expanded a
200 self-compressed window. */
201 SVN_ERR_ASSERT(!cb->source_buf);
205 if (window && (window->sview_len == 0 || window->src_ops == 0))
207 /* This is a self-compressed window. Don't combine it with
208 the others, because the combiner may go quadratic. Instead,
209 expand it here and signal that the combination has
211 apr_size_t source_len = window->tview_len;
212 SVN_ERR_ASSERT(cb->window->sview_len == source_len);
213 cb->source_buf = apr_palloc(cb->window_pool, source_len);
214 svn_txdelta_apply_instructions(window, NULL,
215 cb->source_buf, &source_len);
220 /* Combine the incoming window with whatever's in the baton. */
221 apr_pool_t *composite_pool = svn_pool_create(cb->trail->pool);
222 svn_txdelta_window_t *composite;
224 composite = svn_txdelta_compose_windows(window, cb->window,
226 svn_pool_destroy(cb->window_pool);
227 cb->window = composite;
228 cb->window_pool = composite_pool;
229 cb->done = (composite->sview_len == 0 || composite->src_ops == 0);
234 /* Copy the (first) window into the baton. */
235 apr_pool_t *window_pool = svn_pool_create(cb->trail->pool);
236 SVN_ERR_ASSERT(cb->window_pool == NULL);
237 cb->window = svn_txdelta_window_dup(window, window_pool);
238 cb->window_pool = window_pool;
239 cb->done = (window->sview_len == 0 || window->src_ops == 0);
250 /* Read one delta window from REP[CUR_CHUNK] and push it at the
251 composition handler. */
254 get_one_window(struct compose_handler_baton *cb,
256 representation_t *rep,
259 svn_stream_t *wstream;
260 char diffdata[4096]; /* hunk of svndiff data */
261 svn_filesize_t off; /* offset into svndiff data */
262 apr_size_t amt; /* how much svndiff data to/was read */
265 apr_array_header_t *chunks = rep->contents.delta.chunks;
266 rep_delta_chunk_t *this_chunk, *first_chunk;
269 if (chunks->nelts <= cur_chunk)
270 return compose_handler(NULL, cb);
272 /* Set up a window handling stream for the svndiff data. */
273 wstream = svn_txdelta_parse_svndiff(compose_handler, cb, TRUE,
276 /* First things first: send the "SVN"{version} header through the
277 stream. ### For now, we will just use the version specified
278 in the first chunk, and then verify that no chunks have a
279 different version number than the one used. In the future,
280 we might simply convert chunks that use a different version
281 of the diff format -- or, heck, a different format
282 altogether -- to the format/version of the first chunk. */
283 first_chunk = APR_ARRAY_IDX(chunks, 0, rep_delta_chunk_t*);
287 diffdata[3] = (char) (first_chunk->version);
289 SVN_ERR(svn_stream_write(wstream, diffdata, &amt));
290 /* FIXME: The stream write handler is borked; assert (amt == 4); */
292 /* Get this string key which holds this window's data.
293 ### todo: make sure this is an `svndiff' DIFF skel here. */
294 this_chunk = APR_ARRAY_IDX(chunks, cur_chunk, rep_delta_chunk_t*);
295 str_key = this_chunk->string_key;
297 /* Run through the svndiff data, at least as far as necessary. */
301 amt = sizeof(diffdata);
302 SVN_ERR(svn_fs_bdb__string_read(fs, str_key, diffdata,
303 off, &amt, cb->trail,
306 SVN_ERR(svn_stream_write(wstream, diffdata, &amt));
309 SVN_ERR(svn_stream_close(wstream));
311 SVN_ERR_ASSERT(!cb->init);
312 SVN_ERR_ASSERT(cb->window != NULL);
313 SVN_ERR_ASSERT(cb->window_pool != NULL);
318 /* Undeltify a range of data. DELTAS is the set of delta windows to
319 combine, FULLTEXT is the source text, CUR_CHUNK is the index of the
320 delta chunk we're starting from. OFFSET is the relative offset of
321 the requested data within the chunk; BUF and LEN are what we're
325 rep_undeltify_range(svn_fs_t *fs,
326 const apr_array_header_t *deltas,
327 representation_t *fulltext,
335 apr_size_t len_read = 0;
339 struct compose_handler_baton cb = { 0 };
340 char *source_buf, *target_buf;
341 apr_size_t target_len;
346 for (cur_rep = 0; !cb.done && cur_rep < deltas->nelts; ++cur_rep)
348 representation_t *const rep =
349 APR_ARRAY_IDX(deltas, cur_rep, representation_t*);
350 SVN_ERR(get_one_window(&cb, fs, rep, cur_chunk));
354 /* That's it, no more source data is available. */
357 /* The source view length should not be 0 if there are source
358 copy ops in the window. */
359 SVN_ERR_ASSERT(cb.window->sview_len > 0 || cb.window->src_ops == 0);
361 /* cb.window is the combined delta window. Read the source text
365 /* The combiner already created the source text from a
366 self-compressed window. */
367 source_buf = cb.source_buf;
369 else if (fulltext && cb.window->sview_len > 0 && cb.window->src_ops > 0)
371 apr_size_t source_len = cb.window->sview_len;
372 source_buf = apr_palloc(cb.window_pool, source_len);
373 SVN_ERR(svn_fs_bdb__string_read
374 (fs, fulltext->contents.fulltext.string_key,
375 source_buf, cb.window->sview_offset, &source_len,
377 if (source_len != cb.window->sview_len)
378 return svn_error_create
379 (SVN_ERR_FS_CORRUPT, NULL,
380 _("Svndiff source length inconsistency"));
384 source_buf = NULL; /* Won't read anything from here. */
389 target_len = *len - len_read + offset;
390 target_buf = apr_palloc(cb.window_pool, target_len);
394 target_len = *len - len_read;
398 svn_txdelta_apply_instructions(cb.window, source_buf,
399 target_buf, &target_len);
402 SVN_ERR_ASSERT(target_len > offset);
403 target_len -= offset;
404 memcpy(buf, target_buf + offset, target_len);
405 offset = 0; /* Read from the beginning of the next chunk. */
407 /* Don't need this window any more. */
408 svn_pool_destroy(cb.window_pool);
410 len_read += target_len;
414 while (len_read < *len);
422 /* Calculate the index of the chunk in REP that contains REP_OFFSET,
423 and find the relative CHUNK_OFFSET within the chunk.
424 Return -1 if offset is beyond the end of the represented data.
425 ### The basic assumption is that all delta windows are the same size
426 and aligned at the same offset, so this number is the same in all
427 dependent deltas. Oh, and the chunks in REP must be ordered. */
430 get_chunk_offset(representation_t *rep,
431 svn_filesize_t rep_offset,
432 apr_size_t *chunk_offset)
434 const apr_array_header_t *chunks = rep->contents.delta.chunks;
436 assert(chunks->nelts);
438 /* ### Yes, this is a linear search. I'll change this to bisection
439 the very second we notice it's slowing us down. */
440 for (cur_chunk = 0; cur_chunk < chunks->nelts; ++cur_chunk)
442 const rep_delta_chunk_t *const this_chunk
443 = APR_ARRAY_IDX(chunks, cur_chunk, rep_delta_chunk_t*);
445 if ((this_chunk->offset + this_chunk->size) > rep_offset)
447 assert(this_chunk->offset <= rep_offset);
448 assert(rep_offset - this_chunk->offset < SVN_MAX_OBJECT_SIZE);
449 *chunk_offset = (apr_size_t) (rep_offset - this_chunk->offset);
457 /* Copy into BUF *LEN bytes starting at OFFSET from the string
458 represented via REP_KEY in FS, as part of TRAIL.
459 The number of bytes actually copied is stored in *LEN. */
461 rep_read_range(svn_fs_t *fs,
463 svn_filesize_t offset,
469 representation_t *rep;
470 apr_size_t chunk_offset;
472 /* Read in our REP. */
473 SVN_ERR(svn_fs_bdb__read_rep(&rep, fs, rep_key, trail, pool));
474 if (rep->kind == rep_kind_fulltext)
476 SVN_ERR(svn_fs_bdb__string_read(fs, rep->contents.fulltext.string_key,
477 buf, offset, len, trail, pool));
479 else if (rep->kind == rep_kind_delta)
481 const int cur_chunk = get_chunk_offset(rep, offset, &chunk_offset);
487 /* Preserve for potential use in error message. */
488 const char *first_rep_key = rep_key;
489 /* Make a list of all the rep's we need to undeltify this range.
490 We'll have to read them within this trail anyway, so we might
491 as well do it once and up front. */
492 apr_array_header_t *reps = apr_array_make(pool, 30, sizeof(rep));
495 const rep_delta_chunk_t *const first_chunk
496 = APR_ARRAY_IDX(rep->contents.delta.chunks,
497 0, rep_delta_chunk_t*);
498 const rep_delta_chunk_t *const chunk
499 = APR_ARRAY_IDX(rep->contents.delta.chunks,
500 cur_chunk, rep_delta_chunk_t*);
502 /* Verify that this chunk is of the same version as the first. */
503 if (first_chunk->version != chunk->version)
504 return svn_error_createf
505 (SVN_ERR_FS_CORRUPT, NULL,
506 _("Diff version inconsistencies in representation '%s'"),
509 rep_key = chunk->rep_key;
510 APR_ARRAY_PUSH(reps, representation_t *) = rep;
511 SVN_ERR(svn_fs_bdb__read_rep(&rep, fs, rep_key,
514 while (rep->kind == rep_kind_delta
515 && rep->contents.delta.chunks->nelts > cur_chunk);
517 /* Right. We've either just read the fulltext rep, or a rep that's
518 too short, in which case we'll undeltify without source data.*/
519 if (rep->kind != rep_kind_delta && rep->kind != rep_kind_fulltext)
520 return UNKNOWN_NODE_KIND(rep_key);
522 if (rep->kind == rep_kind_delta)
523 rep = NULL; /* Don't use source data */
525 err = rep_undeltify_range(fs, reps, rep, cur_chunk, buf,
526 chunk_offset, len, trail, pool);
529 if (err->apr_err == SVN_ERR_FS_CORRUPT)
530 return svn_error_createf
531 (SVN_ERR_FS_CORRUPT, err,
532 _("Corruption detected whilst reading delta chain from "
533 "representation '%s' to '%s'"), first_rep_key, rep_key);
535 return svn_error_trace(err);
539 else /* unknown kind */
540 return UNKNOWN_NODE_KIND(rep_key);
547 svn_fs_base__get_mutable_rep(const char **new_rep_key,
554 representation_t *rep = NULL;
555 const char *new_str = NULL;
557 /* We were passed an existing REP_KEY, so examine it. If it is
558 mutable already, then just return REP_KEY as the mutable result
560 if (rep_key && (rep_key[0] != '\0'))
562 SVN_ERR(svn_fs_bdb__read_rep(&rep, fs, rep_key, trail, pool));
563 if (rep_is_mutable(rep, txn_id))
565 *new_rep_key = rep_key;
570 /* Either we weren't provided a base key to examine, or the base key
571 we were provided was not mutable. So, let's make a new
572 representation and return its key to the caller. */
573 SVN_ERR(svn_fs_bdb__string_append(fs, &new_str, 0, NULL, trail, pool));
574 rep = make_fulltext_rep(new_str, txn_id,
575 svn_checksum_empty_checksum(svn_checksum_md5,
577 svn_checksum_empty_checksum(svn_checksum_sha1,
580 return svn_fs_bdb__write_new_rep(new_rep_key, fs, rep, trail, pool);
585 svn_fs_base__delete_rep_if_mutable(svn_fs_t *fs,
591 representation_t *rep;
593 SVN_ERR(svn_fs_bdb__read_rep(&rep, fs, rep_key, trail, pool));
594 if (! rep_is_mutable(rep, txn_id))
597 if (rep->kind == rep_kind_fulltext)
599 SVN_ERR(svn_fs_bdb__string_delete(fs,
600 rep->contents.fulltext.string_key,
603 else if (rep->kind == rep_kind_delta)
605 apr_array_header_t *keys;
606 SVN_ERR(delta_string_keys(&keys, rep, pool));
607 SVN_ERR(delete_strings(keys, fs, trail, pool));
609 else /* unknown kind */
610 return UNKNOWN_NODE_KIND(rep_key);
612 return svn_fs_bdb__delete_rep(fs, rep_key, trail, pool);
617 /*** Reading and writing data via representations. ***/
621 struct rep_read_baton
623 /* The FS from which we're reading. */
626 /* The representation skel whose contents we want to read. If this
627 is NULL, the rep has never had any contents, so all reads fetch 0
630 Formerly, we cached the entire rep skel here, not just the key.
631 That way we didn't have to fetch the rep from the db every time
632 we want to read a little bit more of the file. Unfortunately,
633 this has a problem: if, say, a file's representation changes
634 while we're reading (changes from fulltext to delta, for
635 example), we'll never know it. So for correctness, we now
636 refetch the representation skel every time we want to read
640 /* How many bytes have been read already. */
641 svn_filesize_t offset;
643 /* If present, the read will be done as part of this trail, and the
644 trail's pool will be used. Otherwise, see `pool' below. */
647 /* MD5 checksum context. Initialized when the baton is created, updated as
648 we read data, and finalized when the stream is closed. */
649 svn_checksum_ctx_t *md5_checksum_ctx;
651 /* Final resting place of the checksum created by md5_checksum_cxt. */
652 svn_checksum_t *md5_checksum;
654 /* SHA1 checksum context. Initialized when the baton is created, updated as
655 we read data, and finalized when the stream is closed. */
656 svn_checksum_ctx_t *sha1_checksum_ctx;
658 /* Final resting place of the checksum created by sha1_checksum_cxt. */
659 svn_checksum_t *sha1_checksum;
661 /* The length of the rep's contents (as fulltext, that is,
662 independent of how the rep actually stores the data.) This is
663 retrieved when the baton is created, and used to determine when
664 we have read the last byte, at which point we compare checksums.
666 Getting this at baton creation time makes interleaved reads and
667 writes on the same rep in the same trail impossible. But we're
668 not doing that, and probably no one ever should. And anyway if
669 they do, they should see problems immediately. */
672 /* Set to FALSE when the baton is created, TRUE when the checksum_ctx
674 svn_boolean_t checksum_finalized;
676 /* Used for temporary allocations. This pool is cleared at the
677 start of each invocation of the relevant stream read function --
678 see rep_read_contents(). */
679 apr_pool_t *scratch_pool;
685 rep_read_get_baton(struct rep_read_baton **rb_p,
688 svn_boolean_t use_trail_for_reads,
692 struct rep_read_baton *b;
694 b = apr_pcalloc(pool, sizeof(*b));
695 b->md5_checksum_ctx = svn_checksum_ctx_create(svn_checksum_md5, pool);
696 b->sha1_checksum_ctx = svn_checksum_ctx_create(svn_checksum_sha1, pool);
699 SVN_ERR(svn_fs_base__rep_contents_size(&(b->size), fs, rep_key,
704 b->checksum_finalized = FALSE;
706 b->trail = use_trail_for_reads ? trail : NULL;
707 b->scratch_pool = svn_pool_create(pool);
708 b->rep_key = rep_key;
718 /*** Retrieving data. ***/
721 svn_fs_base__rep_contents_size(svn_filesize_t *size_p,
727 representation_t *rep;
729 SVN_ERR(svn_fs_bdb__read_rep(&rep, fs, rep_key, trail, pool));
731 if (rep->kind == rep_kind_fulltext)
733 /* Get the size by asking Berkeley for the string's length. */
734 SVN_ERR(svn_fs_bdb__string_size(size_p, fs,
735 rep->contents.fulltext.string_key,
738 else if (rep->kind == rep_kind_delta)
740 /* Get the size by finding the last window pkg in the delta and
741 adding its offset to its size. This way, we won't even be
742 messed up by overlapping windows, as long as the window pkgs
743 are still ordered. */
744 apr_array_header_t *chunks = rep->contents.delta.chunks;
745 rep_delta_chunk_t *last_chunk;
747 SVN_ERR_ASSERT(chunks->nelts);
749 last_chunk = APR_ARRAY_IDX(chunks, chunks->nelts - 1,
750 rep_delta_chunk_t *);
751 *size_p = last_chunk->offset + last_chunk->size;
753 else /* unknown kind */
754 return UNKNOWN_NODE_KIND(rep_key);
761 svn_fs_base__rep_contents_checksums(svn_checksum_t **md5_checksum,
762 svn_checksum_t **sha1_checksum,
768 representation_t *rep;
770 SVN_ERR(svn_fs_bdb__read_rep(&rep, fs, rep_key, trail, pool));
772 *md5_checksum = svn_checksum_dup(rep->md5_checksum, pool);
774 *sha1_checksum = svn_checksum_dup(rep->sha1_checksum, pool);
781 svn_fs_base__rep_contents(svn_string_t *str,
787 svn_filesize_t contents_size;
791 SVN_ERR(svn_fs_base__rep_contents_size(&contents_size, fs, rep_key,
794 /* What if the contents are larger than we can handle? */
795 if (contents_size > SVN_MAX_OBJECT_SIZE)
796 return svn_error_createf
797 (SVN_ERR_FS_GENERAL, NULL,
798 _("Rep contents are too large: "
799 "got %s, limit is %s"),
800 apr_psprintf(pool, "%" SVN_FILESIZE_T_FMT, contents_size),
801 apr_psprintf(pool, "%" APR_SIZE_T_FMT, SVN_MAX_OBJECT_SIZE));
803 str->len = (apr_size_t) contents_size;
805 data = apr_palloc(pool, str->len);
808 SVN_ERR(rep_read_range(fs, rep_key, 0, data, &len, trail, pool));
812 return svn_error_createf
813 (SVN_ERR_FS_CORRUPT, NULL,
814 _("Failure reading representation '%s'"), rep_key);
816 /* Just the standard paranoia. */
818 representation_t *rep;
819 svn_checksum_t *checksum, *rep_checksum;
821 SVN_ERR(svn_fs_bdb__read_rep(&rep, fs, rep_key, trail, pool));
822 rep_checksum = rep->sha1_checksum ? rep->sha1_checksum : rep->md5_checksum;
823 SVN_ERR(svn_checksum(&checksum, rep_checksum->kind, str->data, str->len,
826 if (! svn_checksum_match(checksum, rep_checksum))
827 return svn_error_create(SVN_ERR_FS_CORRUPT,
828 svn_checksum_mismatch_err(rep_checksum, checksum, pool,
829 _("Checksum mismatch on representation '%s'"),
840 struct rep_read_baton *rb; /* The data source. */
841 char *buf; /* Where to put what we read. */
842 apr_size_t *len; /* How much to read / was read. */
846 /* BATON is of type `read_rep_args':
848 Read into BATON->rb->buf the *(BATON->len) bytes starting at
849 BATON->rb->offset from the data represented at BATON->rb->rep_key
850 in BATON->rb->fs, as part of TRAIL.
852 Afterwards, *(BATON->len) is the number of bytes actually read, and
853 BATON->rb->offset is incremented by that amount.
855 If BATON->rb->rep_key is null, this is assumed to mean the file's
856 contents have no representation, i.e., the file has no contents.
857 In that case, if BATON->rb->offset > 0, return the error
858 SVN_ERR_FS_FILE_CONTENTS_CHANGED, else just set *(BATON->len) to
861 txn_body_read_rep(void *baton, trail_t *trail)
863 struct read_rep_args *args = baton;
865 if (args->rb->rep_key)
867 SVN_ERR(rep_read_range(args->rb->fs,
873 args->rb->scratch_pool));
875 args->rb->offset += *(args->len);
877 /* We calculate the checksum just once, the moment we see the
878 * last byte of data. But we can't assume there was a short
879 * read. The caller may have known the length of the data and
880 * requested exactly that amount, so there would never be a
881 * short read. (That's why the read baton has to know the
882 * length of the data in advance.)
884 * On the other hand, some callers invoke the stream reader in a
885 * loop whose termination condition is that the read returned
886 * zero bytes of data -- which usually results in the read
887 * function being called one more time *after* the call that got
888 * a short read (indicating end-of-stream).
890 * The conditions below ensure that we compare checksums even
891 * when there is no short read associated with the last byte of
892 * data, while also ensuring that it's harmless to repeatedly
893 * read 0 bytes from the stream.
895 if (! args->rb->checksum_finalized)
897 SVN_ERR(svn_checksum_update(args->rb->md5_checksum_ctx, args->buf,
899 SVN_ERR(svn_checksum_update(args->rb->sha1_checksum_ctx, args->buf,
902 if (args->rb->offset == args->rb->size)
904 representation_t *rep;
906 SVN_ERR(svn_checksum_final(&args->rb->md5_checksum,
907 args->rb->md5_checksum_ctx,
909 SVN_ERR(svn_checksum_final(&args->rb->sha1_checksum,
910 args->rb->sha1_checksum_ctx,
912 args->rb->checksum_finalized = TRUE;
914 SVN_ERR(svn_fs_bdb__read_rep(&rep, args->rb->fs,
916 trail, trail->pool));
918 if (rep->md5_checksum
919 && (! svn_checksum_match(rep->md5_checksum,
920 args->rb->md5_checksum)))
921 return svn_error_create(SVN_ERR_FS_CORRUPT,
922 svn_checksum_mismatch_err(rep->md5_checksum,
923 args->rb->md5_checksum, trail->pool,
924 _("MD5 checksum mismatch on representation '%s'"),
928 if (rep->sha1_checksum
929 && (! svn_checksum_match(rep->sha1_checksum,
930 args->rb->sha1_checksum)))
931 return svn_error_createf(SVN_ERR_FS_CORRUPT,
932 svn_checksum_mismatch_err(rep->sha1_checksum,
933 args->rb->sha1_checksum, trail->pool,
934 _("SHA1 checksum mismatch on representation '%s'"),
940 else if (args->rb->offset > 0)
944 (SVN_ERR_FS_REP_CHANGED, NULL,
945 _("Null rep, but offset past zero already"));
955 rep_read_contents(void *baton, char *buf, apr_size_t *len)
957 struct rep_read_baton *rb = baton;
958 struct read_rep_args args;
960 /* Clear the scratch pool of the results of previous invocations. */
961 svn_pool_clear(rb->scratch_pool);
967 /* If we got a trail, use it; else make one. */
969 SVN_ERR(txn_body_read_rep(&args, rb->trail));
972 /* In the case of reading from the db, any returned data should
973 live in our pre-allocated buffer, so the whole operation can
974 happen within a single malloc/free cycle. This prevents us
975 from creating millions of unnecessary trail subpools when
976 reading a big file. */
977 SVN_ERR(svn_fs_base__retry_txn(rb->fs,
990 struct rep_write_baton
992 /* The FS in which we're writing. */
995 /* The representation skel whose contents we want to write. */
998 /* The transaction id under which this write action will take
1002 /* If present, do the write as part of this trail, and use trail's
1003 pool. Otherwise, see `pool' below. */
1006 /* SHA1 and MD5 checksums. Initialized when the baton is created,
1007 updated as we write data, and finalized and stored when the
1008 stream is closed. */
1009 svn_checksum_ctx_t *md5_checksum_ctx;
1010 svn_checksum_t *md5_checksum;
1011 svn_checksum_ctx_t *sha1_checksum_ctx;
1012 svn_checksum_t *sha1_checksum;
1013 svn_boolean_t finalized;
1015 /* Used for temporary allocations, iff `trail' (above) is null. */
1021 static struct rep_write_baton *
1022 rep_write_get_baton(svn_fs_t *fs,
1023 const char *rep_key,
1028 struct rep_write_baton *b;
1030 b = apr_pcalloc(pool, sizeof(*b));
1031 b->md5_checksum_ctx = svn_checksum_ctx_create(svn_checksum_md5, pool);
1032 b->sha1_checksum_ctx = svn_checksum_ctx_create(svn_checksum_sha1, pool);
1036 b->rep_key = rep_key;
1043 /* Write LEN bytes from BUF into the end of the string represented via
1044 REP_KEY in FS, as part of TRAIL. If the representation is not
1045 mutable, return the error SVN_FS_REP_NOT_MUTABLE. */
1046 static svn_error_t *
1047 rep_write(svn_fs_t *fs,
1048 const char *rep_key,
1055 representation_t *rep;
1057 SVN_ERR(svn_fs_bdb__read_rep(&rep, fs, rep_key, trail, pool));
1059 if (! rep_is_mutable(rep, txn_id))
1060 return svn_error_createf
1061 (SVN_ERR_FS_REP_NOT_MUTABLE, NULL,
1062 _("Rep '%s' is not mutable"), rep_key);
1064 if (rep->kind == rep_kind_fulltext)
1066 SVN_ERR(svn_fs_bdb__string_append
1067 (fs, &(rep->contents.fulltext.string_key), len, buf,
1070 else if (rep->kind == rep_kind_delta)
1072 /* There should never be a case when we have a mutable
1073 non-fulltext rep. The only code that creates mutable reps is
1074 in this file, and it creates them fulltext. */
1075 return svn_error_createf
1076 (SVN_ERR_FS_CORRUPT, NULL,
1077 _("Rep '%s' both mutable and non-fulltext"), rep_key);
1079 else /* unknown kind */
1080 return UNKNOWN_NODE_KIND(rep_key);
1082 return SVN_NO_ERROR;
1086 struct write_rep_args
1088 struct rep_write_baton *wb; /* Destination. */
1089 const char *buf; /* Data. */
1090 apr_size_t len; /* How much to write. */
1094 /* BATON is of type `write_rep_args':
1095 Append onto BATON->wb->rep_key's contents BATON->len bytes of
1096 data from BATON->wb->buf, in BATON->rb->fs, as part of TRAIL.
1098 If the representation is not mutable, return the error
1099 SVN_FS_REP_NOT_MUTABLE. */
1100 static svn_error_t *
1101 txn_body_write_rep(void *baton, trail_t *trail)
1103 struct write_rep_args *args = baton;
1105 SVN_ERR(rep_write(args->wb->fs,
1112 SVN_ERR(svn_checksum_update(args->wb->md5_checksum_ctx,
1113 args->buf, args->len));
1114 SVN_ERR(svn_checksum_update(args->wb->sha1_checksum_ctx,
1115 args->buf, args->len));
1116 return SVN_NO_ERROR;
1120 static svn_error_t *
1121 rep_write_contents(void *baton,
1125 struct rep_write_baton *wb = baton;
1126 struct write_rep_args args;
1128 /* We toss LEN's indirectness because if not all the bytes are
1129 written, it's an error, so we wouldn't be reporting anything back
1130 through *LEN anyway. */
1135 /* If we got a trail, use it; else make one. */
1137 SVN_ERR(txn_body_write_rep(&args, wb->trail));
1140 /* In the case of simply writing the rep to the db, we're
1141 *certain* that there's no data coming back to us that needs
1142 to be preserved... so the whole operation can happen within a
1143 single malloc/free cycle. This prevents us from creating
1144 millions of unnecessary trail subpools when writing a big
1146 SVN_ERR(svn_fs_base__retry_txn(wb->fs,
1153 return SVN_NO_ERROR;
1157 /* Helper for rep_write_close_contents(); see that doc string for
1158 more. BATON is of type `struct rep_write_baton'. */
1159 static svn_error_t *
1160 txn_body_write_close_rep(void *baton, trail_t *trail)
1162 struct rep_write_baton *wb = baton;
1163 representation_t *rep;
1165 SVN_ERR(svn_fs_bdb__read_rep(&rep, wb->fs, wb->rep_key,
1166 trail, trail->pool));
1167 rep->md5_checksum = svn_checksum_dup(wb->md5_checksum, trail->pool);
1168 rep->sha1_checksum = svn_checksum_dup(wb->sha1_checksum, trail->pool);
1169 return svn_fs_bdb__write_rep(wb->fs, wb->rep_key, rep,
1170 trail, trail->pool);
1174 /* BATON is of type `struct rep_write_baton'.
1176 * Finalize BATON->md5_context and store the resulting digest under
1179 static svn_error_t *
1180 rep_write_close_contents(void *baton)
1182 struct rep_write_baton *wb = baton;
1184 /* ### Thought: if we fixed apr-util MD5 contexts to allow repeated
1185 digestification, then we wouldn't need a stream close function at
1186 all -- instead, we could update the stored checksum each time a
1187 write occurred, which would have the added advantage of making
1188 interleaving reads and writes work. Currently, they'd fail with
1189 a checksum mismatch, it just happens that our code never tries to
1192 if (! wb->finalized)
1194 SVN_ERR(svn_checksum_final(&wb->md5_checksum, wb->md5_checksum_ctx,
1196 SVN_ERR(svn_checksum_final(&wb->sha1_checksum, wb->sha1_checksum_ctx,
1198 wb->finalized = TRUE;
1201 /* If we got a trail, use it; else make one. */
1203 return txn_body_write_close_rep(wb, wb->trail);
1205 /* We need to keep our trail pool around this time so the
1206 checksums we've calculated survive. */
1207 return svn_fs_base__retry_txn(wb->fs, txn_body_write_close_rep,
1208 wb, FALSE, wb->pool);
1212 /** Public read and write stream constructors. **/
1215 svn_fs_base__rep_contents_read_stream(svn_stream_t **rs_p,
1217 const char *rep_key,
1218 svn_boolean_t use_trail_for_reads,
1222 struct rep_read_baton *rb;
1224 SVN_ERR(rep_read_get_baton(&rb, fs, rep_key, use_trail_for_reads,
1226 *rs_p = svn_stream_create(rb, pool);
1227 svn_stream_set_read2(*rs_p, NULL /* only full read support */,
1230 return SVN_NO_ERROR;
1234 /* Clear the contents of REP_KEY, so that it represents the empty
1235 string, as part of TRAIL. TXN_ID is the id of the Subversion
1236 transaction under which this occurs. If REP_KEY is not mutable,
1237 return the error SVN_ERR_FS_REP_NOT_MUTABLE. */
1238 static svn_error_t *
1239 rep_contents_clear(svn_fs_t *fs,
1240 const char *rep_key,
1245 representation_t *rep;
1246 const char *str_key;
1248 SVN_ERR(svn_fs_bdb__read_rep(&rep, fs, rep_key, trail, pool));
1250 /* Make sure it's mutable. */
1251 if (! rep_is_mutable(rep, txn_id))
1252 return svn_error_createf
1253 (SVN_ERR_FS_REP_NOT_MUTABLE, NULL,
1254 _("Rep '%s' is not mutable"), rep_key);
1256 SVN_ERR_ASSERT(rep->kind == rep_kind_fulltext);
1258 /* If rep has no string, just return success. Else, clear the
1259 underlying string. */
1260 str_key = rep->contents.fulltext.string_key;
1261 if (str_key && *str_key)
1263 SVN_ERR(svn_fs_bdb__string_clear(fs, str_key, trail, pool));
1264 rep->md5_checksum = NULL;
1265 rep->sha1_checksum = NULL;
1266 SVN_ERR(svn_fs_bdb__write_rep(fs, rep_key, rep, trail, pool));
1268 return SVN_NO_ERROR;
1273 svn_fs_base__rep_contents_write_stream(svn_stream_t **ws_p,
1275 const char *rep_key,
1277 svn_boolean_t use_trail_for_writes,
1281 struct rep_write_baton *wb;
1283 /* Clear the current rep contents (free mutability check!). */
1284 SVN_ERR(rep_contents_clear(fs, rep_key, txn_id, trail, pool));
1286 /* Now, generate the write baton and stream. */
1287 wb = rep_write_get_baton(fs, rep_key, txn_id,
1288 use_trail_for_writes ? trail : NULL, pool);
1289 *ws_p = svn_stream_create(wb, pool);
1290 svn_stream_set_write(*ws_p, rep_write_contents);
1291 svn_stream_set_close(*ws_p, rep_write_close_contents);
1293 return SVN_NO_ERROR;
1298 /*** Deltified storage. ***/
1300 /* Baton for svn_write_fn_t write_string_set(). */
1301 struct write_svndiff_strings_baton
1303 /* The fs where lives the string we're writing. */
1306 /* The key of the string we're writing to. Typically this is
1307 initialized to NULL, so svn_fs_base__string_append() can fill in a
1311 /* The amount of txdelta data written to the current
1312 string-in-progress. */
1315 /* The amount of svndiff header information we've written thus far
1316 to the strings table. */
1317 apr_size_t header_read;
1319 /* The version number of the svndiff data written. ### You'd better
1320 not count on this being populated after the first chunk is sent
1321 through the interface, since it lives at the 4th byte of the
1325 /* The trail we're writing in. */
1331 /* Function of type `svn_write_fn_t', for writing to a collection of
1332 strings; BATON is `struct write_svndiff_strings_baton *'.
1334 On the first call, BATON->key is null. A new string key in
1335 BATON->fs is chosen and stored in BATON->key; each call appends
1336 *LEN bytes from DATA onto the string. *LEN is never changed; if
1337 the write fails to write all *LEN bytes, an error is returned.
1338 BATON->size is used to track the total amount of data written via
1339 this handler, and must be reset by the caller to 0 when appropriate. */
1340 static svn_error_t *
1341 write_svndiff_strings(void *baton, const char *data, apr_size_t *len)
1343 struct write_svndiff_strings_baton *wb = baton;
1344 const char *buf = data;
1345 apr_size_t nheader = 0;
1347 /* If we haven't stripped all the header information from this
1348 stream yet, keep stripping. If someone sends a first window
1349 through here that's shorter than 4 bytes long, this will probably
1350 cause a nuclear reactor meltdown somewhere in the American
1352 if (wb->header_read < 4)
1354 nheader = 4 - wb->header_read;
1357 wb->header_read += nheader;
1359 /* If we have *now* read the full 4-byte header, check that
1360 least byte for the version number of the svndiff format. */
1361 if (wb->header_read == 4)
1362 wb->version = *(buf - 1);
1365 /* Append to the current string we're writing (or create a new one
1366 if WB->key is NULL). */
1367 SVN_ERR(svn_fs_bdb__string_append(wb->fs, &(wb->key), *len,
1368 buf, wb->trail, wb->trail->pool));
1370 /* Make sure we (still) have a key. */
1371 if (wb->key == NULL)
1372 return svn_error_create(SVN_ERR_FS_GENERAL, NULL,
1373 _("Failed to get new string key"));
1375 /* Restore *LEN to the value it *would* have been were it not for
1376 header stripping. */
1379 /* Increment our running total of bytes written to this string. */
1382 return SVN_NO_ERROR;
1386 typedef struct window_write_t
1388 const char *key; /* string key for this window */
1389 apr_size_t svndiff_len; /* amount of svndiff data written to the string */
1390 svn_filesize_t text_off; /* offset of fulltext represented by this window */
1391 apr_size_t text_len; /* amount of fulltext data represented by this window */
1397 svn_fs_base__rep_deltify(svn_fs_t *fs,
1403 base_fs_data_t *bfd = fs->fsap_data;
1404 svn_stream_t *source_stream; /* stream to read the source */
1405 svn_stream_t *target_stream; /* stream to read the target */
1406 svn_txdelta_stream_t *txdelta_stream; /* stream to read delta windows */
1408 /* window-y things, and an array to track them */
1410 apr_array_header_t *windows;
1412 /* stream to write new (deltified) target data and its baton */
1413 svn_stream_t *new_target_stream;
1414 struct write_svndiff_strings_baton new_target_baton;
1416 /* window handler/baton for writing to above stream */
1417 svn_txdelta_window_handler_t new_target_handler;
1418 void *new_target_handler_baton;
1420 /* yes, we do windows */
1421 svn_txdelta_window_t *window;
1423 /* The current offset into the fulltext that our window is about to
1424 write. This doubles, after all windows are written, as the
1425 total size of the svndiff data for the deltification process. */
1426 svn_filesize_t tview_off = 0;
1428 /* The total amount of diff data written while deltifying. */
1429 svn_filesize_t diffsize = 0;
1431 /* TARGET's original string keys */
1432 apr_array_header_t *orig_str_keys;
1434 /* The checksums for the representation's fulltext contents. */
1435 svn_checksum_t *rep_md5_checksum;
1436 svn_checksum_t *rep_sha1_checksum;
1439 const unsigned char *digest;
1441 /* pool for holding the windows */
1444 /* Paranoia: never allow a rep to be deltified against itself,
1445 because then there would be no fulltext reachable in the delta
1446 chain, and badness would ensue. */
1447 if (strcmp(target, source) == 0)
1448 return svn_error_createf
1449 (SVN_ERR_FS_CORRUPT, NULL,
1450 _("Attempt to deltify '%s' against itself"),
1453 /* Set up a handler for the svndiff data, which will write each
1454 window to its own string in the `strings' table. */
1455 new_target_baton.fs = fs;
1456 new_target_baton.trail = trail;
1457 new_target_baton.header_read = FALSE;
1458 new_target_stream = svn_stream_create(&new_target_baton, pool);
1459 svn_stream_set_write(new_target_stream, write_svndiff_strings);
1461 /* Get streams to our source and target text data. */
1462 SVN_ERR(svn_fs_base__rep_contents_read_stream(&source_stream, fs, source,
1463 TRUE, trail, pool));
1464 SVN_ERR(svn_fs_base__rep_contents_read_stream(&target_stream, fs, target,
1465 TRUE, trail, pool));
1467 /* Setup a stream to convert the textdelta data into svndiff windows. */
1468 svn_txdelta2(&txdelta_stream, source_stream, target_stream, TRUE, pool);
1470 if (bfd->format >= SVN_FS_BASE__MIN_SVNDIFF1_FORMAT)
1471 svn_txdelta_to_svndiff3(&new_target_handler, &new_target_handler_baton,
1472 new_target_stream, 1,
1473 SVN_DELTA_COMPRESSION_LEVEL_DEFAULT, pool);
1475 svn_txdelta_to_svndiff3(&new_target_handler, &new_target_handler_baton,
1476 new_target_stream, 0,
1477 SVN_DELTA_COMPRESSION_LEVEL_DEFAULT, pool);
1479 /* subpool for the windows */
1480 wpool = svn_pool_create(pool);
1482 /* Now, loop, manufacturing and dispatching windows of svndiff data. */
1483 windows = apr_array_make(pool, 1, sizeof(ww));
1486 /* Reset some baton variables. */
1487 new_target_baton.size = 0;
1488 new_target_baton.key = NULL;
1490 /* Free the window. */
1491 svn_pool_clear(wpool);
1493 /* Fetch the next window of txdelta data. */
1494 SVN_ERR(svn_txdelta_next_window(&window, txdelta_stream, wpool));
1496 /* Send off this package to be written as svndiff data. */
1497 SVN_ERR(new_target_handler(window, new_target_handler_baton));
1500 /* Add a new window description to our array. */
1501 ww = apr_pcalloc(pool, sizeof(*ww));
1502 ww->key = new_target_baton.key;
1503 ww->svndiff_len = new_target_baton.size;
1504 ww->text_off = tview_off;
1505 ww->text_len = window->tview_len;
1506 APR_ARRAY_PUSH(windows, window_write_t *) = ww;
1508 /* Update our recordkeeping variables. */
1509 tview_off += window->tview_len;
1510 diffsize += ww->svndiff_len;
1515 svn_pool_destroy(wpool);
1517 /* Having processed all the windows, we can query the MD5 digest
1519 digest = svn_txdelta_md5_digest(txdelta_stream);
1521 return svn_error_createf
1522 (SVN_ERR_DELTA_MD5_CHECKSUM_ABSENT, NULL,
1523 _("Failed to calculate MD5 digest for '%s'"),
1526 /* Construct a list of the strings used by the old representation so
1527 that we can delete them later. While we are here, if the old
1528 representation was a fulltext, check to make sure the delta we're
1529 replacing it with is actually smaller. (Don't perform this check
1530 if we're replacing a delta; in that case, we're going for a time
1531 optimization, not a space optimization.) */
1533 representation_t *old_rep;
1534 const char *str_key;
1536 SVN_ERR(svn_fs_bdb__read_rep(&old_rep, fs, target, trail, pool));
1537 if (old_rep->kind == rep_kind_fulltext)
1539 svn_filesize_t old_size = 0;
1541 str_key = old_rep->contents.fulltext.string_key;
1542 SVN_ERR(svn_fs_bdb__string_size(&old_size, fs, str_key,
1544 orig_str_keys = apr_array_make(pool, 1, sizeof(str_key));
1545 APR_ARRAY_PUSH(orig_str_keys, const char *) = str_key;
1547 /* If the new data is NOT an space optimization, destroy the
1548 string(s) we created, and get outta here. */
1549 if (diffsize >= old_size)
1552 for (i = 0; i < windows->nelts; i++)
1554 ww = APR_ARRAY_IDX(windows, i, window_write_t *);
1555 SVN_ERR(svn_fs_bdb__string_delete(fs, ww->key, trail, pool));
1557 return SVN_NO_ERROR;
1560 else if (old_rep->kind == rep_kind_delta)
1561 SVN_ERR(delta_string_keys(&orig_str_keys, old_rep, pool));
1562 else /* unknown kind */
1563 return UNKNOWN_NODE_KIND(target);
1565 /* Save the checksums, since the new rep needs them. */
1566 rep_md5_checksum = svn_checksum_dup(old_rep->md5_checksum, pool);
1567 rep_sha1_checksum = svn_checksum_dup(old_rep->sha1_checksum, pool);
1570 /* Hook the new strings we wrote into the rest of the filesystem by
1571 building a new representation to replace our old one. */
1573 representation_t new_rep;
1574 rep_delta_chunk_t *chunk;
1575 apr_array_header_t *chunks;
1578 new_rep.kind = rep_kind_delta;
1579 new_rep.txn_id = NULL;
1581 /* Migrate the old rep's checksums to the new rep. */
1582 new_rep.md5_checksum = svn_checksum_dup(rep_md5_checksum, pool);
1583 new_rep.sha1_checksum = svn_checksum_dup(rep_sha1_checksum, pool);
1585 chunks = apr_array_make(pool, windows->nelts, sizeof(chunk));
1587 /* Loop through the windows we wrote, creating and adding new
1588 chunks to the representation. */
1589 for (i = 0; i < windows->nelts; i++)
1591 ww = APR_ARRAY_IDX(windows, i, window_write_t *);
1593 /* Allocate a chunk and its window */
1594 chunk = apr_palloc(pool, sizeof(*chunk));
1595 chunk->offset = ww->text_off;
1597 /* Populate the window */
1598 chunk->version = new_target_baton.version;
1599 chunk->string_key = ww->key;
1600 chunk->size = ww->text_len;
1601 chunk->rep_key = source;
1603 /* Add this chunk to the array. */
1604 APR_ARRAY_PUSH(chunks, rep_delta_chunk_t *) = chunk;
1607 /* Put the chunks array into the representation. */
1608 new_rep.contents.delta.chunks = chunks;
1610 /* Write out the new representation. */
1611 SVN_ERR(svn_fs_bdb__write_rep(fs, target, &new_rep, trail, pool));
1613 /* Delete the original pre-deltified strings. */
1614 SVN_ERR(delete_strings(orig_str_keys, fs, trail, pool));
1617 return SVN_NO_ERROR;