/* revprops.c --- everything needed to handle revprops in FSX * * ==================================================================== * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, * software distributed under the License is distributed on an * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY * KIND, either express or implied. See the License for the * specific language governing permissions and limitations * under the License. * ==================================================================== */ #include #include #include "svn_pools.h" #include "svn_hash.h" #include "svn_dirent_uri.h" #include "fs_x.h" #include "revprops.h" #include "util.h" #include "transaction.h" #include "private/svn_subr_private.h" #include "private/svn_string_private.h" #include "../libsvn_fs/fs-loader.h" #include "svn_private_config.h" /* Give writing processes 10 seconds to replace an existing revprop file with a new one. After that time, we assume that the writing process got aborted and that we have re-read revprops. */ #define REVPROP_CHANGE_TIMEOUT (10 * 1000000) /* In case of an inconsistent read, close the generation file, yield, re-open and re-read. This is the number of times we try this before giving up. */ #define GENERATION_READ_RETRY_COUNT 100 /* Maximum size of the generation number file contents (including NUL). */ #define CHECKSUMMED_NUMBER_BUFFER_LEN \ (SVN_INT64_BUFFER_SIZE + 3 + APR_MD5_DIGESTSIZE * 2) svn_error_t * svn_fs_x__upgrade_pack_revprops(svn_fs_t *fs, svn_fs_upgrade_notify_t notify_func, void *notify_baton, svn_cancel_func_t cancel_func, void *cancel_baton, apr_pool_t *scratch_pool) { svn_fs_x__data_t *ffd = fs->fsap_data; const char *revprops_shard_path; const char *revprops_pack_file_dir; apr_int64_t shard; apr_int64_t first_unpacked_shard = ffd->min_unpacked_rev / ffd->max_files_per_dir; apr_pool_t *iterpool = svn_pool_create(scratch_pool); const char *revsprops_dir = svn_dirent_join(fs->path, PATH_REVPROPS_DIR, scratch_pool); int compression_level = ffd->compress_packed_revprops ? SVN_DELTA_COMPRESSION_LEVEL_DEFAULT : SVN_DELTA_COMPRESSION_LEVEL_NONE; /* first, pack all revprops shards to match the packed revision shards */ for (shard = 0; shard < first_unpacked_shard; ++shard) { svn_pool_clear(iterpool); revprops_pack_file_dir = svn_dirent_join(revsprops_dir, apr_psprintf(iterpool, "%" APR_INT64_T_FMT PATH_EXT_PACKED_SHARD, shard), iterpool); revprops_shard_path = svn_dirent_join(revsprops_dir, apr_psprintf(iterpool, "%" APR_INT64_T_FMT, shard), iterpool); SVN_ERR(svn_fs_x__pack_revprops_shard(revprops_pack_file_dir, revprops_shard_path, shard, ffd->max_files_per_dir, (int)(0.9 * ffd->revprop_pack_size), compression_level, cancel_func, cancel_baton, iterpool)); if (notify_func) SVN_ERR(notify_func(notify_baton, shard, svn_fs_upgrade_pack_revprops, iterpool)); } svn_pool_destroy(iterpool); return SVN_NO_ERROR; } svn_error_t * svn_fs_x__upgrade_cleanup_pack_revprops(svn_fs_t *fs, svn_fs_upgrade_notify_t notify_func, void *notify_baton, svn_cancel_func_t cancel_func, void *cancel_baton, apr_pool_t *scratch_pool) { svn_fs_x__data_t *ffd = fs->fsap_data; const char *revprops_shard_path; apr_int64_t shard; apr_int64_t first_unpacked_shard = ffd->min_unpacked_rev / ffd->max_files_per_dir; apr_pool_t *iterpool = svn_pool_create(scratch_pool); const char *revsprops_dir = svn_dirent_join(fs->path, PATH_REVPROPS_DIR, scratch_pool); /* delete the non-packed revprops shards afterwards */ for (shard = 0; shard < first_unpacked_shard; ++shard) { svn_pool_clear(iterpool); revprops_shard_path = svn_dirent_join(revsprops_dir, apr_psprintf(iterpool, "%" APR_INT64_T_FMT, shard), iterpool); SVN_ERR(svn_fs_x__delete_revprops_shard(revprops_shard_path, shard, ffd->max_files_per_dir, cancel_func, cancel_baton, iterpool)); if (notify_func) SVN_ERR(notify_func(notify_baton, shard, svn_fs_upgrade_cleanup_revprops, iterpool)); } svn_pool_destroy(iterpool); return SVN_NO_ERROR; } /* Revprop caching management. * * Mechanism: * ---------- * * Revprop caching needs to be activated and will be deactivated for the * respective FS instance if the necessary infrastructure could not be * initialized. As long as no revprops are being read or changed, revprop * caching imposes no overhead. * * When activated, we cache revprops using (revision, generation) pairs * as keys with the generation being incremented upon every revprop change. * Since the cache is process-local, the generation needs to be tracked * for at least as long as the process lives but may be reset afterwards. * * We track the revprop generation in a persistent, unbuffered file that * we may keep open for the lifetime of the svn_fs_t. It is the OS' * responsibility to provide us with the latest contents upon read. To * detect incomplete updates due to non-atomic reads, we put a MD5 checksum * next to the actual generation number and verify that it matches. * * Since we cannot guarantee that the OS will provide us with up-to-date * data buffers for open files, we re-open and re-read the file before * modifying it. This will prevent lost updates. * * A race condition exists between switching to the modified revprop data * and bumping the generation number. In particular, the process may crash * just after switching to the new revprop data and before bumping the * generation. To be able to detect this scenario, we bump the generation * twice per revprop change: once immediately before (creating an odd number) * and once after the atomic switch (even generation). * * A writer holding the write lock can immediately assume a crashed writer * in case of an odd generation or they would not have been able to acquire * the lock. A reader detecting an odd generation will use that number and * be forced to re-read any revprop data - usually getting the new revprops * already. If the generation file modification timestamp is too old, the * reader will assume a crashed writer, acquire the write lock and bump * the generation if it is still odd. So, for about REVPROP_CHANGE_TIMEOUT * after the crash, reader caches may be stale. */ /* If the revprop generation file in FS is open, close it. This is a no-op * if the file is not open. */ static svn_error_t * close_revprop_generation_file(svn_fs_t *fs, apr_pool_t *scratch_pool) { svn_fs_x__data_t *ffd = fs->fsap_data; if (ffd->revprop_generation_file) { SVN_ERR(svn_io_file_close(ffd->revprop_generation_file, scratch_pool)); ffd->revprop_generation_file = NULL; } return SVN_NO_ERROR; } /* Make sure the revprop_generation member in FS is set. If READ_ONLY is * set, open the file w/o write permission if the file is not open yet. * The file is kept open if it has sufficient rights (or more) but will be * closed and re-opened if it provided insufficient access rights. * * Call only for repos that support revprop caching. */ static svn_error_t * open_revprop_generation_file(svn_fs_t *fs, svn_boolean_t read_only, apr_pool_t *scratch_pool) { svn_fs_x__data_t *ffd = fs->fsap_data; apr_int32_t flags = read_only ? APR_READ : (APR_READ | APR_WRITE); /* Close the current file handle if it has insufficient rights. */ if ( ffd->revprop_generation_file && (apr_file_flags_get(ffd->revprop_generation_file) & flags) != flags) SVN_ERR(close_revprop_generation_file(fs, scratch_pool)); /* If not open already, open with sufficient rights. */ if (ffd->revprop_generation_file == NULL) { const char *path = svn_fs_x__path_revprop_generation(fs, scratch_pool); SVN_ERR(svn_io_file_open(&ffd->revprop_generation_file, path, flags, APR_OS_DEFAULT, fs->pool)); } return SVN_NO_ERROR; } /* Return the textual representation of NUMBER and its checksum in *BUFFER. */ static svn_error_t * checkedsummed_number(svn_stringbuf_t **buffer, apr_int64_t number, apr_pool_t *result_pool, apr_pool_t *scratch_pool) { svn_checksum_t *checksum; const char *digest; char str[SVN_INT64_BUFFER_SIZE]; apr_size_t len = svn__i64toa(str, number); str[len] = 0; SVN_ERR(svn_checksum(&checksum, svn_checksum_md5, str, len, scratch_pool)); digest = svn_checksum_to_cstring_display(checksum, scratch_pool); *buffer = svn_stringbuf_createf(result_pool, "%s %s\n", digest, str); return SVN_NO_ERROR; } /* Extract the generation number from the text BUFFER of LEN bytes and * verify it against the checksum in the same BUFFER. If they match, return * the generation in *NUMBER. Otherwise, return an error. * BUFFER does not need to be NUL-terminated. */ static svn_error_t * verify_extract_number(apr_int64_t *number, const char *buffer, apr_size_t len, apr_pool_t *scratch_pool) { const char *digest_end = strchr(buffer, ' '); /* Does the buffer even contain checksum _and_ number? */ if (digest_end != NULL) { svn_checksum_t *expected; svn_checksum_t *actual; SVN_ERR(svn_checksum_parse_hex(&expected, svn_checksum_md5, buffer, scratch_pool)); SVN_ERR(svn_checksum(&actual, svn_checksum_md5, digest_end + 1, (buffer + len) - (digest_end + 1), scratch_pool)); if (svn_checksum_match(expected, actual)) return svn_error_trace(svn_cstring_atoi64(number, digest_end + 1)); } /* Incomplete buffer or not a match. */ return svn_error_create(SVN_ERR_FS_INVALID_GENERATION, NULL, _("Invalid generation number data.")); } /* Read revprop generation as stored on disk for repository FS. The result is * returned in *CURRENT. Call only for repos that support revprop caching. */ static svn_error_t * read_revprop_generation_file(apr_int64_t *current, svn_fs_t *fs, apr_pool_t *scratch_pool) { svn_fs_x__data_t *ffd = fs->fsap_data; apr_pool_t *iterpool = svn_pool_create(scratch_pool); char buf[CHECKSUMMED_NUMBER_BUFFER_LEN]; apr_size_t len; apr_off_t offset = 0; int i; svn_error_t *err = SVN_NO_ERROR; /* Retry in case of incomplete file buffer updates. */ for (i = 0; i < GENERATION_READ_RETRY_COUNT; ++i) { svn_error_clear(err); svn_pool_clear(iterpool); /* If we can't even access the data, things are very wrong. * Don't retry in that case. */ SVN_ERR(open_revprop_generation_file(fs, TRUE, iterpool)); SVN_ERR(svn_io_file_seek(ffd->revprop_generation_file, APR_SET, &offset, iterpool)); len = sizeof(buf); SVN_ERR(svn_io_read_length_line(ffd->revprop_generation_file, buf, &len, iterpool)); /* Some data has been read. It will most likely be complete and * consistent. Extract and verify anyway. */ err = verify_extract_number(current, buf, len, iterpool); if (!err) break; /* Got unlucky and data was invalid. Retry. */ SVN_ERR(close_revprop_generation_file(fs, iterpool)); #if APR_HAS_THREADS apr_thread_yield(); #else apr_sleep(0); #endif } svn_pool_destroy(iterpool); /* If we had to give up, propagate the error. */ return svn_error_trace(err); } /* Write the CURRENT revprop generation to disk for repository FS. * Call only for repos that support revprop caching. */ static svn_error_t * write_revprop_generation_file(svn_fs_t *fs, apr_int64_t current, apr_pool_t *scratch_pool) { svn_fs_x__data_t *ffd = fs->fsap_data; svn_stringbuf_t *buffer; apr_off_t offset = 0; SVN_ERR(checkedsummed_number(&buffer, current, scratch_pool, scratch_pool)); SVN_ERR(open_revprop_generation_file(fs, FALSE, scratch_pool)); SVN_ERR(svn_io_file_seek(ffd->revprop_generation_file, APR_SET, &offset, scratch_pool)); SVN_ERR(svn_io_file_write_full(ffd->revprop_generation_file, buffer->data, buffer->len, NULL, scratch_pool)); SVN_ERR(svn_io_file_flush_to_disk(ffd->revprop_generation_file, scratch_pool)); return SVN_NO_ERROR; } svn_error_t * svn_fs_x__reset_revprop_generation_file(svn_fs_t *fs, apr_pool_t *scratch_pool) { const char *path = svn_fs_x__path_revprop_generation(fs, scratch_pool); svn_stringbuf_t *buffer; /* Unconditionally close the revprop generation file. * Don't care about FS formats. This ensures consistent internal state. */ SVN_ERR(close_revprop_generation_file(fs, scratch_pool)); /* Unconditionally remove any old revprop generation file. * Don't care about FS formats. This ensures consistent on-disk state * for old format repositories. */ SVN_ERR(svn_io_remove_file2(path, TRUE, scratch_pool)); /* Write the initial revprop generation file contents, if supported by * the current format. This ensures consistent on-disk state for new * format repositories. */ SVN_ERR(checkedsummed_number(&buffer, 0, scratch_pool, scratch_pool)); SVN_ERR(svn_io_write_atomic(path, buffer->data, buffer->len, NULL, scratch_pool)); /* ffd->revprop_generation_file will be re-opened on demand. */ return SVN_NO_ERROR; } /* Create an error object with the given MESSAGE and pass it to the WARNING member of FS. Clears UNDERLYING_ERR. */ static void log_revprop_cache_init_warning(svn_fs_t *fs, svn_error_t *underlying_err, const char *message, apr_pool_t *scratch_pool) { svn_error_t *err = svn_error_createf( SVN_ERR_FS_REVPROP_CACHE_INIT_FAILURE, underlying_err, message, svn_dirent_local_style(fs->path, scratch_pool)); if (fs->warning) (fs->warning)(fs->warning_baton, err); svn_error_clear(err); } /* Test whether revprop cache and necessary infrastructure are available in FS. */ static svn_boolean_t has_revprop_cache(svn_fs_t *fs, apr_pool_t *scratch_pool) { svn_fs_x__data_t *ffd = fs->fsap_data; svn_error_t *error; /* is the cache (still) enabled? */ if (ffd->revprop_cache == NULL) return FALSE; /* try initialize our file-backed infrastructure */ error = open_revprop_generation_file(fs, TRUE, scratch_pool); if (error) { /* failure -> disable revprop cache for good */ ffd->revprop_cache = NULL; log_revprop_cache_init_warning(fs, error, "Revprop caching for '%s' disabled " "because infrastructure for revprop " "caching failed to initialize.", scratch_pool); return FALSE; } return TRUE; } /* Baton structure for revprop_generation_fixup. */ typedef struct revprop_generation_fixup_t { /* revprop generation to read */ apr_int64_t *generation; /* file system context */ svn_fs_t *fs; } revprop_generation_upgrade_t; /* If the revprop generation has an odd value, it means the original writer of the revprop got killed. We don't know whether that process as able to change the revprop data but we assume that it was. Therefore, we increase the generation in that case to basically invalidate everyone's cache content. Execute this only while holding the write lock to the repo in baton->FFD. */ static svn_error_t * revprop_generation_fixup(void *void_baton, apr_pool_t *scratch_pool) { revprop_generation_upgrade_t *baton = void_baton; svn_fs_x__data_t *ffd = baton->fs->fsap_data; assert(ffd->has_write_lock); /* Make sure we don't operate on stale OS buffers. */ SVN_ERR(close_revprop_generation_file(baton->fs, scratch_pool)); /* Maybe, either the original revprop writer or some other reader has already corrected / bumped the revprop generation. Thus, we need to read it again. However, we will now be the only ones changing the file contents due to us holding the write lock. */ SVN_ERR(read_revprop_generation_file(baton->generation, baton->fs, scratch_pool)); /* Cause everyone to re-read revprops upon their next access, if the last revprop write did not complete properly. */ if (*baton->generation % 2) { ++*baton->generation; SVN_ERR(write_revprop_generation_file(baton->fs, *baton->generation, scratch_pool)); } return SVN_NO_ERROR; } /* Read the current revprop generation and return it in *GENERATION. Also, detect aborted / crashed writers and recover from that. Use the access object in FS to set the shared mem values. */ static svn_error_t * read_revprop_generation(apr_int64_t *generation, svn_fs_t *fs, apr_pool_t *scratch_pool) { apr_int64_t current = 0; svn_fs_x__data_t *ffd = fs->fsap_data; /* read the current revprop generation number */ SVN_ERR(read_revprop_generation_file(¤t, fs, scratch_pool)); /* is an unfinished revprop write under the way? */ if (current % 2) { svn_boolean_t timeout = FALSE; /* Has the writer process been aborted? * Either by timeout or by us being the writer now. */ if (!ffd->has_write_lock) { apr_time_t mtime; SVN_ERR(svn_io_file_affected_time(&mtime, svn_fs_x__path_revprop_generation(fs, scratch_pool), scratch_pool)); timeout = apr_time_now() > mtime + REVPROP_CHANGE_TIMEOUT; } if (ffd->has_write_lock || timeout) { revprop_generation_upgrade_t baton; baton.generation = ¤t; baton.fs = fs; /* Ensure that the original writer process no longer exists by * acquiring the write lock to this repository. Then, fix up * the revprop generation. */ if (ffd->has_write_lock) SVN_ERR(revprop_generation_fixup(&baton, scratch_pool)); else SVN_ERR(svn_fs_x__with_write_lock(fs, revprop_generation_fixup, &baton, scratch_pool)); } } /* return the value we just got */ *generation = current; return SVN_NO_ERROR; } /* Set the revprop generation in FS to the next odd number to indicate that there is a revprop write process under way. Return that value in *GENERATION. If the change times out, readers shall recover from that state & re-read revprops. This is a no-op for repo formats that don't support revprop caching. */ static svn_error_t * begin_revprop_change(apr_int64_t *generation, svn_fs_t *fs, apr_pool_t *scratch_pool) { svn_fs_x__data_t *ffd = fs->fsap_data; SVN_ERR_ASSERT(ffd->has_write_lock); /* Close and re-open to make sure we read the latest data. */ SVN_ERR(close_revprop_generation_file(fs, scratch_pool)); SVN_ERR(open_revprop_generation_file(fs, FALSE, scratch_pool)); /* Set the revprop generation to an odd value to indicate * that a write is in progress. */ SVN_ERR(read_revprop_generation(generation, fs, scratch_pool)); ++*generation; SVN_ERR(write_revprop_generation_file(fs, *generation, scratch_pool)); return SVN_NO_ERROR; } /* Set the revprop generation in FS to the next even generation after the odd value in GENERATION to indicate that a) readers shall re-read revprops, and b) the write process has been completed (no recovery required). This is a no-op for repo formats that don't support revprop caching. */ static svn_error_t * end_revprop_change(svn_fs_t *fs, apr_int64_t generation, apr_pool_t *scratch_pool) { svn_fs_x__data_t *ffd = fs->fsap_data; SVN_ERR_ASSERT(ffd->has_write_lock); SVN_ERR_ASSERT(generation % 2); /* Set the revprop generation to an even value to indicate * that a write has been completed. Since we held the write * lock, nobody else could have updated the file contents. */ SVN_ERR(write_revprop_generation_file(fs, generation + 1, scratch_pool)); return SVN_NO_ERROR; } /* Container for all data required to access the packed revprop file * for a given REVISION. This structure will be filled incrementally * by read_pack_revprops() its sub-routines. */ typedef struct packed_revprops_t { /* revision number to read (not necessarily the first in the pack) */ svn_revnum_t revision; /* current revprop generation. Used when populating the revprop cache */ apr_int64_t generation; /* the actual revision properties */ apr_hash_t *properties; /* their size when serialized to a single string * (as found in PACKED_REVPROPS) */ apr_size_t serialized_size; /* name of the pack file (without folder path) */ const char *filename; /* packed shard folder path */ const char *folder; /* sum of values in SIZES */ apr_size_t total_size; /* first revision in the pack (>= MANIFEST_START) */ svn_revnum_t start_revision; /* size of the revprops in PACKED_REVPROPS */ apr_array_header_t *sizes; /* offset of the revprops in PACKED_REVPROPS */ apr_array_header_t *offsets; /* concatenation of the serialized representation of all revprops * in the pack, i.e. the pack content without header and compression */ svn_stringbuf_t *packed_revprops; /* First revision covered by MANIFEST. * Will equal the shard start revision or 1, for the 1st shard. */ svn_revnum_t manifest_start; /* content of the manifest. * Maps long(rev - MANIFEST_START) to const char* pack file name */ apr_array_header_t *manifest; } packed_revprops_t; /* Parse the serialized revprops in CONTENT and return them in *PROPERTIES. * Also, put them into the revprop cache, if activated, for future use. * Three more parameters are being used to update the revprop cache: FS is * our file system, the revprops belong to REVISION and the global revprop * GENERATION is used as well. * * The returned hash will be allocated in RESULT_POOL, SCRATCH_POOL is * being used for temporary allocations. */ static svn_error_t * parse_revprop(apr_hash_t **properties, svn_fs_t *fs, svn_revnum_t revision, apr_int64_t generation, svn_string_t *content, apr_pool_t *result_pool, apr_pool_t *scratch_pool) { svn_stream_t *stream = svn_stream_from_string(content, scratch_pool); *properties = apr_hash_make(result_pool); SVN_ERR(svn_hash_read2(*properties, stream, SVN_HASH_TERMINATOR, result_pool)); if (has_revprop_cache(fs, scratch_pool)) { svn_fs_x__data_t *ffd = fs->fsap_data; svn_fs_x__pair_cache_key_t key = { 0 }; key.revision = revision; key.second = generation; SVN_ERR(svn_cache__set(ffd->revprop_cache, &key, *properties, scratch_pool)); } return SVN_NO_ERROR; } /* Read the non-packed revprops for revision REV in FS, put them into the * revprop cache if activated and return them in *PROPERTIES. GENERATION * is the current revprop generation. * * If the data could not be read due to an otherwise recoverable error, * leave *PROPERTIES unchanged. No error will be returned in that case. * * Allocate *PROPERTIES in RESULT_POOL and temporaries in SCRATCH_POOL. */ static svn_error_t * read_non_packed_revprop(apr_hash_t **properties, svn_fs_t *fs, svn_revnum_t rev, apr_int64_t generation, apr_pool_t *result_pool, apr_pool_t *scratch_pool) { svn_stringbuf_t *content = NULL; apr_pool_t *iterpool = svn_pool_create(scratch_pool); svn_boolean_t missing = FALSE; int i; for (i = 0; i < SVN_FS_X__RECOVERABLE_RETRY_COUNT && !missing && !content; ++i) { svn_pool_clear(iterpool); SVN_ERR(svn_fs_x__try_stringbuf_from_file(&content, &missing, svn_fs_x__path_revprops(fs, rev, iterpool), i + 1 < SVN_FS_X__RECOVERABLE_RETRY_COUNT, iterpool)); } if (content) SVN_ERR(parse_revprop(properties, fs, rev, generation, svn_stringbuf__morph_into_string(content), result_pool, iterpool)); svn_pool_clear(iterpool); return SVN_NO_ERROR; } /* Return the minimum length of any packed revprop file name in REVPROPS. */ static apr_size_t get_min_filename_len(packed_revprops_t *revprops) { char number_buffer[SVN_INT64_BUFFER_SIZE]; /* The revprop filenames have the format . - with being * at least the first rev in the shard and having at least one * digit. Thus, the minimum is 2 + #decimal places in the start rev. */ return svn__i64toa(number_buffer, revprops->manifest_start) + 2; } /* Given FS and REVPROPS->REVISION, fill the FILENAME, FOLDER and MANIFEST * members. Use RESULT_POOL for allocating results and SCRATCH_POOL for * temporaries. */ static svn_error_t * get_revprop_packname(svn_fs_t *fs, packed_revprops_t *revprops, apr_pool_t *result_pool, apr_pool_t *scratch_pool) { svn_fs_x__data_t *ffd = fs->fsap_data; svn_stringbuf_t *content = NULL; const char *manifest_file_path; int idx, rev_count; char *buffer, *buffer_end; const char **filenames, **filenames_end; apr_size_t min_filename_len; /* Determine the dimensions. Rev 0 is excluded from the first shard. */ rev_count = ffd->max_files_per_dir; revprops->manifest_start = revprops->revision - (revprops->revision % rev_count); if (revprops->manifest_start == 0) { ++revprops->manifest_start; --rev_count; } revprops->manifest = apr_array_make(result_pool, rev_count, sizeof(const char*)); /* No line in the file can be less than this number of chars long. */ min_filename_len = get_min_filename_len(revprops); /* Read the content of the manifest file */ revprops->folder = svn_fs_x__path_revprops_pack_shard(fs, revprops->revision, result_pool); manifest_file_path = svn_dirent_join(revprops->folder, PATH_MANIFEST, result_pool); SVN_ERR(svn_fs_x__read_content(&content, manifest_file_path, result_pool)); /* There CONTENT must have a certain minimal size and there no * unterminated lines at the end of the file. Both guarantees also * simplify the parser loop below. */ if ( content->len < rev_count * (min_filename_len + 1) || content->data[content->len - 1] != '\n') return svn_error_createf(SVN_ERR_FS_CORRUPT, NULL, _("Packed revprop manifest for r%ld not " "properly terminated"), revprops->revision); /* Chop (parse) the manifest CONTENT into filenames, one per line. * We only have to replace all newlines with NUL and add all line * starts to REVPROPS->MANIFEST. * * There must be exactly REV_COUNT lines and that is the number of * lines we parse from BUFFER to FILENAMES. Set the end pointer for * the source BUFFER such that BUFFER+MIN_FILENAME_LEN is still valid * BUFFER_END is always valid due to CONTENT->LEN > MIN_FILENAME_LEN. * * Please note that this loop is performance critical for e.g. 'svn log'. * It is run 1000x per revprop access, i.e. per revision and about * 50 million times per sec (and CPU core). */ for (filenames = (const char **)revprops->manifest->elts, filenames_end = filenames + rev_count, buffer = content->data, buffer_end = buffer + content->len - min_filename_len; (filenames < filenames_end) && (buffer < buffer_end); ++filenames) { /* BUFFER always points to the start of the next line / filename. */ *filenames = buffer; /* Find the next EOL. This is guaranteed to stay within the CONTENT * buffer because we left enough room after BUFFER_END and we know * we will always see a newline as the last non-NUL char. */ buffer += min_filename_len; while (*buffer != '\n') ++buffer; /* Found EOL. Turn it into the filename terminator and move BUFFER * to the start of the next line or CONTENT buffer end. */ *buffer = '\0'; ++buffer; } /* We must have reached the end of both buffers. */ if (buffer < content->data + content->len) return svn_error_createf(SVN_ERR_FS_CORRUPT, NULL, _("Packed revprop manifest for r%ld " "has too many entries"), revprops->revision); if (filenames < filenames_end) return svn_error_createf(SVN_ERR_FS_CORRUPT, NULL, _("Packed revprop manifest for r%ld " "has too few entries"), revprops->revision); /* The target array has now exactly one entry per revision. */ revprops->manifest->nelts = rev_count; /* Now get the file name */ idx = (int)(revprops->revision - revprops->manifest_start); revprops->filename = APR_ARRAY_IDX(revprops->manifest, idx, const char*); return SVN_NO_ERROR; } /* Return TRUE, if revision R1 and R2 refer to the same shard in FS. */ static svn_boolean_t same_shard(svn_fs_t *fs, svn_revnum_t r1, svn_revnum_t r2) { svn_fs_x__data_t *ffd = fs->fsap_data; return (r1 / ffd->max_files_per_dir) == (r2 / ffd->max_files_per_dir); } /* Given FS and the full packed file content in REVPROPS->PACKED_REVPROPS, * fill the START_REVISION member, and make PACKED_REVPROPS point to the * first serialized revprop. If READ_ALL is set, initialize the SIZES * and OFFSETS members as well. * * Parse the revprops for REVPROPS->REVISION and set the PROPERTIES as * well as the SERIALIZED_SIZE member. If revprop caching has been * enabled, parse all revprops in the pack and cache them. */ static svn_error_t * parse_packed_revprops(svn_fs_t *fs, packed_revprops_t *revprops, svn_boolean_t read_all, apr_pool_t *result_pool, apr_pool_t *scratch_pool) { svn_stream_t *stream; apr_int64_t first_rev, count, i; apr_off_t offset; const char *header_end; apr_pool_t *iterpool = svn_pool_create(scratch_pool); svn_boolean_t cache_all = has_revprop_cache(fs, scratch_pool); /* decompress (even if the data is only "stored", there is still a * length header to remove) */ svn_stringbuf_t *compressed = revprops->packed_revprops; svn_stringbuf_t *uncompressed = svn_stringbuf_create_empty(result_pool); SVN_ERR(svn__decompress(compressed, uncompressed, APR_SIZE_MAX)); /* read first revision number and number of revisions in the pack */ stream = svn_stream_from_stringbuf(uncompressed, scratch_pool); SVN_ERR(svn_fs_x__read_number_from_stream(&first_rev, NULL, stream, iterpool)); SVN_ERR(svn_fs_x__read_number_from_stream(&count, NULL, stream, iterpool)); /* Check revision range for validity. */ if ( !same_shard(fs, revprops->revision, first_rev) || !same_shard(fs, revprops->revision, first_rev + count - 1) || count < 1) return svn_error_createf(SVN_ERR_FS_CORRUPT, NULL, _("Revprop pack for revision r%ld" " contains revprops for r%ld .. r%ld"), revprops->revision, (svn_revnum_t)first_rev, (svn_revnum_t)(first_rev + count -1)); /* Since start & end are in the same shard, it is enough to just test * the FIRST_REV for being actually packed. That will also cover the * special case of rev 0 never being packed. */ if (!svn_fs_x__is_packed_revprop(fs, first_rev)) return svn_error_createf(SVN_ERR_FS_CORRUPT, NULL, _("Revprop pack for revision r%ld" " starts at non-packed revisions r%ld"), revprops->revision, (svn_revnum_t)first_rev); /* make PACKED_REVPROPS point to the first char after the header. * This is where the serialized revprops are. */ header_end = strstr(uncompressed->data, "\n\n"); if (header_end == NULL) return svn_error_create(SVN_ERR_FS_CORRUPT, NULL, _("Header end not found")); offset = header_end - uncompressed->data + 2; revprops->packed_revprops = svn_stringbuf_create_empty(result_pool); revprops->packed_revprops->data = uncompressed->data + offset; revprops->packed_revprops->len = (apr_size_t)(uncompressed->len - offset); revprops->packed_revprops->blocksize = (apr_size_t)(uncompressed->blocksize - offset); /* STREAM still points to the first entry in the sizes list. */ revprops->start_revision = (svn_revnum_t)first_rev; if (read_all) { /* Init / construct REVPROPS members. */ revprops->sizes = apr_array_make(result_pool, (int)count, sizeof(offset)); revprops->offsets = apr_array_make(result_pool, (int)count, sizeof(offset)); } /* Now parse, revision by revision, the size and content of each * revisions' revprops. */ for (i = 0, offset = 0, revprops->total_size = 0; i < count; ++i) { apr_int64_t size; svn_string_t serialized; svn_revnum_t revision = (svn_revnum_t)(first_rev + i); svn_pool_clear(iterpool); /* read & check the serialized size */ SVN_ERR(svn_fs_x__read_number_from_stream(&size, NULL, stream, iterpool)); if (size + offset > (apr_int64_t)revprops->packed_revprops->len) return svn_error_create(SVN_ERR_FS_CORRUPT, NULL, _("Packed revprop size exceeds pack file size")); /* Parse this revprops list, if necessary */ serialized.data = revprops->packed_revprops->data + offset; serialized.len = (apr_size_t)size; if (revision == revprops->revision) { /* Parse (and possibly cache) the one revprop list we care about. */ SVN_ERR(parse_revprop(&revprops->properties, fs, revision, revprops->generation, &serialized, result_pool, iterpool)); revprops->serialized_size = serialized.len; /* If we only wanted the revprops for REVISION then we are done. */ if (!read_all && !cache_all) break; } else if (cache_all) { /* Parse and cache all other revprop lists. */ apr_hash_t *properties; SVN_ERR(parse_revprop(&properties, fs, revision, revprops->generation, &serialized, iterpool, iterpool)); } if (read_all) { /* fill REVPROPS data structures */ APR_ARRAY_PUSH(revprops->sizes, apr_off_t) = serialized.len; APR_ARRAY_PUSH(revprops->offsets, apr_off_t) = offset; } revprops->total_size += serialized.len; offset += serialized.len; } return SVN_NO_ERROR; } /* In filesystem FS, read the packed revprops for revision REV into * *REVPROPS. Use GENERATION to populate the revprop cache, if enabled. * If you want to modify revprop contents / update REVPROPS, READ_ALL * must be set. Otherwise, only the properties of REV are being provided. * * Allocate *PROPERTIES in RESULT_POOL and temporaries in SCRATCH_POOL. */ static svn_error_t * read_pack_revprop(packed_revprops_t **revprops, svn_fs_t *fs, svn_revnum_t rev, apr_int64_t generation, svn_boolean_t read_all, apr_pool_t *result_pool, apr_pool_t *scratch_pool) { apr_pool_t *iterpool = svn_pool_create(scratch_pool); svn_boolean_t missing = FALSE; svn_error_t *err; packed_revprops_t *result; int i; /* someone insisted that REV is packed. Double-check if necessary */ if (!svn_fs_x__is_packed_revprop(fs, rev)) SVN_ERR(svn_fs_x__update_min_unpacked_rev(fs, iterpool)); if (!svn_fs_x__is_packed_revprop(fs, rev)) return svn_error_createf(SVN_ERR_FS_NO_SUCH_REVISION, NULL, _("No such packed revision %ld"), rev); /* initialize the result data structure */ result = apr_pcalloc(result_pool, sizeof(*result)); result->revision = rev; result->generation = generation; /* try to read the packed revprops. This may require retries if we have * concurrent writers. */ for (i = 0; i < SVN_FS_X__RECOVERABLE_RETRY_COUNT && !result->packed_revprops; ++i) { const char *file_path; svn_pool_clear(iterpool); /* there might have been concurrent writes. * Re-read the manifest and the pack file. */ SVN_ERR(get_revprop_packname(fs, result, result_pool, iterpool)); file_path = svn_dirent_join(result->folder, result->filename, iterpool); SVN_ERR(svn_fs_x__try_stringbuf_from_file(&result->packed_revprops, &missing, file_path, i + 1 < SVN_FS_X__RECOVERABLE_RETRY_COUNT, result_pool)); /* If we could not find the file, there was a write. * So, we should refresh our revprop generation info as well such * that others may find data we will put into the cache. They would * consider it outdated, otherwise. */ if (missing && has_revprop_cache(fs, iterpool)) SVN_ERR(read_revprop_generation(&result->generation, fs, iterpool)); } /* the file content should be available now */ if (!result->packed_revprops) return svn_error_createf(SVN_ERR_FS_PACKED_REVPROP_READ_FAILURE, NULL, _("Failed to read revprop pack file for r%ld"), rev); /* parse it. RESULT will be complete afterwards. */ err = parse_packed_revprops(fs, result, read_all, result_pool, iterpool); svn_pool_destroy(iterpool); if (err) return svn_error_createf(SVN_ERR_FS_CORRUPT, err, _("Revprop pack file for r%ld is corrupt"), rev); *revprops = result; return SVN_NO_ERROR; } /* Read the revprops for revision REV in FS and return them in *PROPERTIES_P. * * Allocations will be done in POOL. */ svn_error_t * svn_fs_x__get_revision_proplist(apr_hash_t **proplist_p, svn_fs_t *fs, svn_revnum_t rev, svn_boolean_t bypass_cache, apr_pool_t *result_pool, apr_pool_t *scratch_pool) { svn_fs_x__data_t *ffd = fs->fsap_data; apr_int64_t generation = 0; /* not found, yet */ *proplist_p = NULL; /* should they be available at all? */ SVN_ERR(svn_fs_x__ensure_revision_exists(rev, fs, scratch_pool)); /* Try cache lookup first. */ if (!bypass_cache && has_revprop_cache(fs, scratch_pool)) { svn_boolean_t is_cached; svn_fs_x__pair_cache_key_t key = { 0 }; SVN_ERR(read_revprop_generation(&generation, fs, scratch_pool)); key.revision = rev; key.second = generation; SVN_ERR(svn_cache__get((void **) proplist_p, &is_cached, ffd->revprop_cache, &key, result_pool)); if (is_cached) return SVN_NO_ERROR; } /* if REV had not been packed when we began, try reading it from the * non-packed shard. If that fails, we will fall through to packed * shard reads. */ if (!svn_fs_x__is_packed_revprop(fs, rev)) { svn_error_t *err = read_non_packed_revprop(proplist_p, fs, rev, generation, result_pool, scratch_pool); if (err) { if (!APR_STATUS_IS_ENOENT(err->apr_err)) return svn_error_trace(err); svn_error_clear(err); *proplist_p = NULL; /* in case read_non_packed_revprop changed it */ } } /* if revprop packing is available and we have not read the revprops, yet, * try reading them from a packed shard. If that fails, REV is most * likely invalid (or its revprops highly contested). */ if (!*proplist_p) { packed_revprops_t *revprops; SVN_ERR(read_pack_revprop(&revprops, fs, rev, generation, FALSE, result_pool, scratch_pool)); *proplist_p = revprops->properties; } /* The revprops should have been there. Did we get them? */ if (!*proplist_p) return svn_error_createf(SVN_ERR_FS_NO_SUCH_REVISION, NULL, _("Could not read revprops for revision %ld"), rev); return SVN_NO_ERROR; } /* Serialize the revision property list PROPLIST of revision REV in * filesystem FS to a non-packed file. Return the name of that temporary * file in *TMP_PATH and the file path that it must be moved to in * *FINAL_PATH. * * Allocate *FINAL_PATH and *TMP_PATH in RESULT_POOL. Use SCRATCH_POOL * for temporary allocations. */ static svn_error_t * write_non_packed_revprop(const char **final_path, const char **tmp_path, svn_fs_t *fs, svn_revnum_t rev, apr_hash_t *proplist, apr_pool_t *result_pool, apr_pool_t *scratch_pool) { svn_stream_t *stream; *final_path = svn_fs_x__path_revprops(fs, rev, result_pool); /* ### do we have a directory sitting around already? we really shouldn't ### have to get the dirname here. */ SVN_ERR(svn_stream_open_unique(&stream, tmp_path, svn_dirent_dirname(*final_path, scratch_pool), svn_io_file_del_none, result_pool, scratch_pool)); SVN_ERR(svn_hash_write2(proplist, stream, SVN_HASH_TERMINATOR, scratch_pool)); SVN_ERR(svn_stream_close(stream)); return SVN_NO_ERROR; } /* After writing the new revprop file(s), call this function to move the * file at TMP_PATH to FINAL_PATH and give it the permissions from * PERMS_REFERENCE. * * If indicated in BUMP_GENERATION, increase FS' revprop generation. * Finally, delete all the temporary files given in FILES_TO_DELETE. * The latter may be NULL. * * Use SCRATCH_POOL for temporary allocations. */ static svn_error_t * switch_to_new_revprop(svn_fs_t *fs, const char *final_path, const char *tmp_path, const char *perms_reference, apr_array_header_t *files_to_delete, svn_boolean_t bump_generation, apr_pool_t *scratch_pool) { apr_int64_t generation; /* Now, we may actually be replacing revprops. Make sure that all other threads and processes will know about this. */ if (bump_generation) SVN_ERR(begin_revprop_change(&generation, fs, scratch_pool)); SVN_ERR(svn_fs_x__move_into_place(tmp_path, final_path, perms_reference, scratch_pool)); /* Indicate that the update (if relevant) has been completed. */ if (bump_generation) SVN_ERR(end_revprop_change(fs, generation, scratch_pool)); /* Clean up temporary files, if necessary. */ if (files_to_delete) { apr_pool_t *iterpool = svn_pool_create(scratch_pool); int i; for (i = 0; i < files_to_delete->nelts; ++i) { const char *path = APR_ARRAY_IDX(files_to_delete, i, const char*); svn_pool_clear(iterpool); SVN_ERR(svn_io_remove_file2(path, TRUE, iterpool)); } svn_pool_destroy(iterpool); } return SVN_NO_ERROR; } /* Write a pack file header to STREAM that starts at revision START_REVISION * and contains the indexes [START,END) of SIZES. */ static svn_error_t * serialize_revprops_header(svn_stream_t *stream, svn_revnum_t start_revision, apr_array_header_t *sizes, int start, int end, apr_pool_t *scratch_pool) { apr_pool_t *iterpool = svn_pool_create(scratch_pool); int i; SVN_ERR_ASSERT(start < end); /* start revision and entry count */ SVN_ERR(svn_stream_printf(stream, scratch_pool, "%ld\n", start_revision)); SVN_ERR(svn_stream_printf(stream, scratch_pool, "%d\n", end - start)); /* the sizes array */ for (i = start; i < end; ++i) { /* Non-standard pool usage. * * We only allocate a few bytes each iteration -- even with a * million iterations we would still be in good shape memory-wise. */ apr_off_t size = APR_ARRAY_IDX(sizes, i, apr_off_t); SVN_ERR(svn_stream_printf(stream, iterpool, "%" APR_OFF_T_FMT "\n", size)); } /* the double newline char indicates the end of the header */ SVN_ERR(svn_stream_printf(stream, iterpool, "\n")); svn_pool_destroy(iterpool); return SVN_NO_ERROR; } /* Writes the a pack file to FILE_STREAM. It copies the serialized data * from REVPROPS for the indexes [START,END) except for index CHANGED_INDEX. * * The data for the latter is taken from NEW_SERIALIZED. Note, that * CHANGED_INDEX may be outside the [START,END) range, i.e. no new data is * taken in that case but only a subset of the old data will be copied. * * NEW_TOTAL_SIZE is a hint for pre-allocating buffers of appropriate size. * SCRATCH_POOL is used for temporary allocations. */ static svn_error_t * repack_revprops(svn_fs_t *fs, packed_revprops_t *revprops, int start, int end, int changed_index, svn_stringbuf_t *new_serialized, apr_off_t new_total_size, svn_stream_t *file_stream, apr_pool_t *scratch_pool) { svn_fs_x__data_t *ffd = fs->fsap_data; svn_stream_t *stream; int i; /* create data empty buffers and the stream object */ svn_stringbuf_t *uncompressed = svn_stringbuf_create_ensure((apr_size_t)new_total_size, scratch_pool); svn_stringbuf_t *compressed = svn_stringbuf_create_empty(scratch_pool); stream = svn_stream_from_stringbuf(uncompressed, scratch_pool); /* write the header*/ SVN_ERR(serialize_revprops_header(stream, revprops->start_revision + start, revprops->sizes, start, end, scratch_pool)); /* append the serialized revprops */ for (i = start; i < end; ++i) if (i == changed_index) { SVN_ERR(svn_stream_write(stream, new_serialized->data, &new_serialized->len)); } else { apr_size_t size = (apr_size_t)APR_ARRAY_IDX(revprops->sizes, i, apr_off_t); apr_size_t offset = (apr_size_t)APR_ARRAY_IDX(revprops->offsets, i, apr_off_t); SVN_ERR(svn_stream_write(stream, revprops->packed_revprops->data + offset, &size)); } /* flush the stream buffer (if any) to our underlying data buffer */ SVN_ERR(svn_stream_close(stream)); /* compress / store the data */ SVN_ERR(svn__compress(uncompressed, compressed, ffd->compress_packed_revprops ? SVN_DELTA_COMPRESSION_LEVEL_DEFAULT : SVN_DELTA_COMPRESSION_LEVEL_NONE)); /* finally, write the content to the target stream and close it */ SVN_ERR(svn_stream_write(file_stream, compressed->data, &compressed->len)); SVN_ERR(svn_stream_close(file_stream)); return SVN_NO_ERROR; } /* Allocate a new pack file name for revisions * [REVPROPS->START_REVISION + START, REVPROPS->START_REVISION + END - 1] * of REVPROPS->MANIFEST. Add the name of old file to FILES_TO_DELETE, * auto-create that array if necessary. Return an open file stream to * the new file in *STREAM allocated in RESULT_POOL. Allocate the paths * in *FILES_TO_DELETE from the same pool that contains the array itself. * * Use SCRATCH_POOL for temporary allocations. */ static svn_error_t * repack_stream_open(svn_stream_t **stream, svn_fs_t *fs, packed_revprops_t *revprops, int start, int end, apr_array_header_t **files_to_delete, apr_pool_t *result_pool, apr_pool_t *scratch_pool) { apr_int64_t tag; const char *tag_string; svn_string_t *new_filename; int i; apr_file_t *file; int manifest_offset = (int)(revprops->start_revision - revprops->manifest_start); /* get the old (= current) file name and enlist it for later deletion */ const char *old_filename = APR_ARRAY_IDX(revprops->manifest, start + manifest_offset, const char*); if (*files_to_delete == NULL) *files_to_delete = apr_array_make(result_pool, 3, sizeof(const char*)); APR_ARRAY_PUSH(*files_to_delete, const char*) = svn_dirent_join(revprops->folder, old_filename, (*files_to_delete)->pool); /* increase the tag part, i.e. the counter after the dot */ tag_string = strchr(old_filename, '.'); if (tag_string == NULL) return svn_error_createf(SVN_ERR_FS_CORRUPT, NULL, _("Packed file '%s' misses a tag"), old_filename); SVN_ERR(svn_cstring_atoi64(&tag, tag_string + 1)); new_filename = svn_string_createf((*files_to_delete)->pool, "%ld.%" APR_INT64_T_FMT, revprops->start_revision + start, ++tag); /* update the manifest to point to the new file */ for (i = start; i < end; ++i) APR_ARRAY_IDX(revprops->manifest, i + manifest_offset, const char*) = new_filename->data; /* create a file stream for the new file */ SVN_ERR(svn_io_file_open(&file, svn_dirent_join(revprops->folder, new_filename->data, scratch_pool), APR_WRITE | APR_CREATE, APR_OS_DEFAULT, result_pool)); *stream = svn_stream_from_aprfile2(file, FALSE, result_pool); return SVN_NO_ERROR; } /* For revision REV in filesystem FS, set the revision properties to * PROPLIST. Return a new file in *TMP_PATH that the caller shall move * to *FINAL_PATH to make the change visible. Files to be deleted will * be listed in *FILES_TO_DELETE which may remain unchanged / unallocated. * * Allocate output values in RESULT_POOL and temporaries from SCRATCH_POOL. */ static svn_error_t * write_packed_revprop(const char **final_path, const char **tmp_path, apr_array_header_t **files_to_delete, svn_fs_t *fs, svn_revnum_t rev, apr_hash_t *proplist, apr_pool_t *result_pool, apr_pool_t *scratch_pool) { svn_fs_x__data_t *ffd = fs->fsap_data; packed_revprops_t *revprops; apr_int64_t generation = 0; svn_stream_t *stream; svn_stringbuf_t *serialized; apr_off_t new_total_size; int changed_index; /* read the current revprop generation. This value will not change * while we hold the global write lock to this FS. */ if (has_revprop_cache(fs, scratch_pool)) SVN_ERR(read_revprop_generation(&generation, fs, scratch_pool)); /* read contents of the current pack file */ SVN_ERR(read_pack_revprop(&revprops, fs, rev, generation, TRUE, scratch_pool, scratch_pool)); /* serialize the new revprops */ serialized = svn_stringbuf_create_empty(scratch_pool); stream = svn_stream_from_stringbuf(serialized, scratch_pool); SVN_ERR(svn_hash_write2(proplist, stream, SVN_HASH_TERMINATOR, scratch_pool)); SVN_ERR(svn_stream_close(stream)); /* calculate the size of the new data */ changed_index = (int)(rev - revprops->start_revision); new_total_size = revprops->total_size - revprops->serialized_size + serialized->len + (revprops->offsets->nelts + 2) * SVN_INT64_BUFFER_SIZE; APR_ARRAY_IDX(revprops->sizes, changed_index, apr_off_t) = serialized->len; /* can we put the new data into the same pack as the before? */ if ( new_total_size < ffd->revprop_pack_size || revprops->sizes->nelts == 1) { /* simply replace the old pack file with new content as we do it * in the non-packed case */ *final_path = svn_dirent_join(revprops->folder, revprops->filename, result_pool); SVN_ERR(svn_stream_open_unique(&stream, tmp_path, revprops->folder, svn_io_file_del_none, result_pool, scratch_pool)); SVN_ERR(repack_revprops(fs, revprops, 0, revprops->sizes->nelts, changed_index, serialized, new_total_size, stream, scratch_pool)); } else { /* split the pack file into two of roughly equal size */ int right_count, left_count, i; int left = 0; int right = revprops->sizes->nelts - 1; apr_off_t left_size = 2 * SVN_INT64_BUFFER_SIZE; apr_off_t right_size = 2 * SVN_INT64_BUFFER_SIZE; /* let left and right side grow such that their size difference * is minimal after each step. */ while (left <= right) if ( left_size + APR_ARRAY_IDX(revprops->sizes, left, apr_off_t) < right_size + APR_ARRAY_IDX(revprops->sizes, right, apr_off_t)) { left_size += APR_ARRAY_IDX(revprops->sizes, left, apr_off_t) + SVN_INT64_BUFFER_SIZE; ++left; } else { right_size += APR_ARRAY_IDX(revprops->sizes, right, apr_off_t) + SVN_INT64_BUFFER_SIZE; --right; } /* since the items need much less than SVN_INT64_BUFFER_SIZE * bytes to represent their length, the split may not be optimal */ left_count = left; right_count = revprops->sizes->nelts - left; /* if new_size is large, one side may exceed the pack size limit. * In that case, split before and after the modified revprop.*/ if ( left_size > ffd->revprop_pack_size || right_size > ffd->revprop_pack_size) { left_count = changed_index; right_count = revprops->sizes->nelts - left_count - 1; } /* Allocate this here such that we can call the repack functions with * the scratch pool alone. */ if (*files_to_delete == NULL) *files_to_delete = apr_array_make(result_pool, 3, sizeof(const char*)); /* write the new, split files */ if (left_count) { SVN_ERR(repack_stream_open(&stream, fs, revprops, 0, left_count, files_to_delete, scratch_pool, scratch_pool)); SVN_ERR(repack_revprops(fs, revprops, 0, left_count, changed_index, serialized, new_total_size, stream, scratch_pool)); } if (left_count + right_count < revprops->sizes->nelts) { SVN_ERR(repack_stream_open(&stream, fs, revprops, changed_index, changed_index + 1, files_to_delete, scratch_pool, scratch_pool)); SVN_ERR(repack_revprops(fs, revprops, changed_index, changed_index + 1, changed_index, serialized, new_total_size, stream, scratch_pool)); } if (right_count) { SVN_ERR(repack_stream_open(&stream, fs, revprops, revprops->sizes->nelts - right_count, revprops->sizes->nelts, files_to_delete, scratch_pool, scratch_pool)); SVN_ERR(repack_revprops(fs, revprops, revprops->sizes->nelts - right_count, revprops->sizes->nelts, changed_index, serialized, new_total_size, stream, scratch_pool)); } /* write the new manifest */ *final_path = svn_dirent_join(revprops->folder, PATH_MANIFEST, result_pool); SVN_ERR(svn_stream_open_unique(&stream, tmp_path, revprops->folder, svn_io_file_del_none, result_pool, scratch_pool)); for (i = 0; i < revprops->manifest->nelts; ++i) { const char *filename = APR_ARRAY_IDX(revprops->manifest, i, const char*); SVN_ERR(svn_stream_printf(stream, scratch_pool, "%s\n", filename)); } SVN_ERR(svn_stream_close(stream)); } return SVN_NO_ERROR; } /* Set the revision property list of revision REV in filesystem FS to PROPLIST. Use SCRATCH_POOL for temporary allocations. */ svn_error_t * svn_fs_x__set_revision_proplist(svn_fs_t *fs, svn_revnum_t rev, apr_hash_t *proplist, apr_pool_t *scratch_pool) { svn_boolean_t is_packed; svn_boolean_t bump_generation = FALSE; const char *final_path; const char *tmp_path; const char *perms_reference; apr_array_header_t *files_to_delete = NULL; SVN_ERR(svn_fs_x__ensure_revision_exists(rev, fs, scratch_pool)); /* this info will not change while we hold the global FS write lock */ is_packed = svn_fs_x__is_packed_revprop(fs, rev); /* Test whether revprops already exist for this revision. * Only then will we need to bump the revprop generation. * The fact that they did not yet exist is never cached. */ if (is_packed) { bump_generation = TRUE; } else { svn_node_kind_t kind; SVN_ERR(svn_io_check_path(svn_fs_x__path_revprops(fs, rev, scratch_pool), &kind, scratch_pool)); bump_generation = kind != svn_node_none; } /* Serialize the new revprop data */ if (is_packed) SVN_ERR(write_packed_revprop(&final_path, &tmp_path, &files_to_delete, fs, rev, proplist, scratch_pool, scratch_pool)); else SVN_ERR(write_non_packed_revprop(&final_path, &tmp_path, fs, rev, proplist, scratch_pool, scratch_pool)); /* We use the rev file of this revision as the perms reference, * because when setting revprops for the first time, the revprop * file won't exist and therefore can't serve as its own reference. * (Whereas the rev file should already exist at this point.) */ perms_reference = svn_fs_x__path_rev_absolute(fs, rev, scratch_pool); /* Now, switch to the new revprop data. */ SVN_ERR(switch_to_new_revprop(fs, final_path, tmp_path, perms_reference, files_to_delete, bump_generation, scratch_pool)); return SVN_NO_ERROR; } /* Return TRUE, if for REVISION in FS, we can find the revprop pack file. * Use SCRATCH_POOL for temporary allocations. * Set *MISSING, if the reason is a missing manifest or pack file. */ svn_boolean_t svn_fs_x__packed_revprop_available(svn_boolean_t *missing, svn_fs_t *fs, svn_revnum_t revision, apr_pool_t *scratch_pool) { svn_fs_x__data_t *ffd = fs->fsap_data; svn_stringbuf_t *content = NULL; /* try to read the manifest file */ const char *folder = svn_fs_x__path_revprops_pack_shard(fs, revision, scratch_pool); const char *manifest_path = svn_dirent_join(folder, PATH_MANIFEST, scratch_pool); svn_error_t *err = svn_fs_x__try_stringbuf_from_file(&content, missing, manifest_path, FALSE, scratch_pool); /* if the manifest cannot be read, consider the pack files inaccessible * even if the file itself exists. */ if (err) { svn_error_clear(err); return FALSE; } if (*missing) return FALSE; /* parse manifest content until we find the entry for REVISION. * Revision 0 is never packed. */ revision = revision < ffd->max_files_per_dir ? revision - 1 : revision % ffd->max_files_per_dir; while (content->data) { char *next = strchr(content->data, '\n'); if (next) { *next = 0; ++next; } if (revision-- == 0) { /* the respective pack file must exist (and be a file) */ svn_node_kind_t kind; err = svn_io_check_path(svn_dirent_join(folder, content->data, scratch_pool), &kind, scratch_pool); if (err) { svn_error_clear(err); return FALSE; } *missing = kind == svn_node_none; return kind == svn_node_file; } content->data = next; } return FALSE; } /****** Packing FSX shards *********/ svn_error_t * svn_fs_x__copy_revprops(const char *pack_file_dir, const char *pack_filename, const char *shard_path, svn_revnum_t start_rev, svn_revnum_t end_rev, apr_array_header_t *sizes, apr_size_t total_size, int compression_level, svn_cancel_func_t cancel_func, void *cancel_baton, apr_pool_t *scratch_pool) { svn_stream_t *pack_stream; apr_file_t *pack_file; svn_revnum_t rev; apr_pool_t *iterpool = svn_pool_create(scratch_pool); svn_stream_t *stream; /* create empty data buffer and a write stream on top of it */ svn_stringbuf_t *uncompressed = svn_stringbuf_create_ensure(total_size, scratch_pool); svn_stringbuf_t *compressed = svn_stringbuf_create_empty(scratch_pool); pack_stream = svn_stream_from_stringbuf(uncompressed, scratch_pool); /* write the pack file header */ SVN_ERR(serialize_revprops_header(pack_stream, start_rev, sizes, 0, sizes->nelts, iterpool)); /* Some useful paths. */ SVN_ERR(svn_io_file_open(&pack_file, svn_dirent_join(pack_file_dir, pack_filename, scratch_pool), APR_WRITE | APR_CREATE, APR_OS_DEFAULT, scratch_pool)); /* Iterate over the revisions in this shard, squashing them together. */ for (rev = start_rev; rev <= end_rev; rev++) { const char *path; svn_pool_clear(iterpool); /* Construct the file name. */ path = svn_dirent_join(shard_path, apr_psprintf(iterpool, "%ld", rev), iterpool); /* Copy all the bits from the non-packed revprop file to the end of * the pack file. */ SVN_ERR(svn_stream_open_readonly(&stream, path, iterpool, iterpool)); SVN_ERR(svn_stream_copy3(stream, pack_stream, cancel_func, cancel_baton, iterpool)); } /* flush stream buffers to content buffer */ SVN_ERR(svn_stream_close(pack_stream)); /* compress the content (or just store it for COMPRESSION_LEVEL 0) */ SVN_ERR(svn__compress(uncompressed, compressed, compression_level)); /* write the pack file content to disk */ stream = svn_stream_from_aprfile2(pack_file, FALSE, scratch_pool); SVN_ERR(svn_stream_write(stream, compressed->data, &compressed->len)); SVN_ERR(svn_stream_close(stream)); svn_pool_destroy(iterpool); return SVN_NO_ERROR; } svn_error_t * svn_fs_x__pack_revprops_shard(const char *pack_file_dir, const char *shard_path, apr_int64_t shard, int max_files_per_dir, apr_off_t max_pack_size, int compression_level, svn_cancel_func_t cancel_func, void *cancel_baton, apr_pool_t *scratch_pool) { const char *manifest_file_path, *pack_filename = NULL; svn_stream_t *manifest_stream; svn_revnum_t start_rev, end_rev, rev; apr_off_t total_size; apr_pool_t *iterpool = svn_pool_create(scratch_pool); apr_array_header_t *sizes; /* Some useful paths. */ manifest_file_path = svn_dirent_join(pack_file_dir, PATH_MANIFEST, scratch_pool); /* Remove any existing pack file for this shard, since it is incomplete. */ SVN_ERR(svn_io_remove_dir2(pack_file_dir, TRUE, cancel_func, cancel_baton, scratch_pool)); /* Create the new directory and manifest file stream. */ SVN_ERR(svn_io_dir_make(pack_file_dir, APR_OS_DEFAULT, scratch_pool)); SVN_ERR(svn_stream_open_writable(&manifest_stream, manifest_file_path, scratch_pool, scratch_pool)); /* revisions to handle. Special case: revision 0 */ start_rev = (svn_revnum_t) (shard * max_files_per_dir); end_rev = (svn_revnum_t) ((shard + 1) * (max_files_per_dir) - 1); if (start_rev == 0) ++start_rev; /* Special special case: if max_files_per_dir is 1, then at this point start_rev == 1 and end_rev == 0 (!). Fortunately, everything just works. */ /* initialize the revprop size info */ sizes = apr_array_make(scratch_pool, max_files_per_dir, sizeof(apr_off_t)); total_size = 2 * SVN_INT64_BUFFER_SIZE; /* Iterate over the revisions in this shard, determine their size and * squashing them together into pack files. */ for (rev = start_rev; rev <= end_rev; rev++) { apr_finfo_t finfo; const char *path; svn_pool_clear(iterpool); /* Get the size of the file. */ path = svn_dirent_join(shard_path, apr_psprintf(iterpool, "%ld", rev), iterpool); SVN_ERR(svn_io_stat(&finfo, path, APR_FINFO_SIZE, iterpool)); /* if we already have started a pack file and this revprop cannot be * appended to it, write the previous pack file. */ if (sizes->nelts != 0 && total_size + SVN_INT64_BUFFER_SIZE + finfo.size > max_pack_size) { SVN_ERR(svn_fs_x__copy_revprops(pack_file_dir, pack_filename, shard_path, start_rev, rev-1, sizes, (apr_size_t)total_size, compression_level, cancel_func, cancel_baton, iterpool)); /* next pack file starts empty again */ apr_array_clear(sizes); total_size = 2 * SVN_INT64_BUFFER_SIZE; start_rev = rev; } /* Update the manifest. Allocate a file name for the current pack * file if it is a new one */ if (sizes->nelts == 0) pack_filename = apr_psprintf(scratch_pool, "%ld.0", rev); SVN_ERR(svn_stream_printf(manifest_stream, iterpool, "%s\n", pack_filename)); /* add to list of files to put into the current pack file */ APR_ARRAY_PUSH(sizes, apr_off_t) = finfo.size; total_size += SVN_INT64_BUFFER_SIZE + finfo.size; } /* write the last pack file */ if (sizes->nelts != 0) SVN_ERR(svn_fs_x__copy_revprops(pack_file_dir, pack_filename, shard_path, start_rev, rev-1, sizes, (apr_size_t)total_size, compression_level, cancel_func, cancel_baton, iterpool)); /* flush the manifest file and update permissions */ SVN_ERR(svn_stream_close(manifest_stream)); SVN_ERR(svn_io_copy_perms(shard_path, pack_file_dir, iterpool)); svn_pool_destroy(iterpool); return SVN_NO_ERROR; } svn_error_t * svn_fs_x__delete_revprops_shard(const char *shard_path, apr_int64_t shard, int max_files_per_dir, svn_cancel_func_t cancel_func, void *cancel_baton, apr_pool_t *scratch_pool) { if (shard == 0) { apr_pool_t *iterpool = svn_pool_create(scratch_pool); int i; /* delete all files except the one for revision 0 */ for (i = 1; i < max_files_per_dir; ++i) { const char *path; svn_pool_clear(iterpool); path = svn_dirent_join(shard_path, apr_psprintf(iterpool, "%d", i), iterpool); if (cancel_func) SVN_ERR((*cancel_func)(cancel_baton)); SVN_ERR(svn_io_remove_file2(path, TRUE, iterpool)); } svn_pool_destroy(iterpool); } else SVN_ERR(svn_io_remove_dir2(shard_path, TRUE, cancel_func, cancel_baton, scratch_pool)); return SVN_NO_ERROR; }