1 /* packed_data.h : Interface to the packed binary stream data structure
3 * ====================================================================
4 * Licensed to the Apache Software Foundation (ASF) under one
5 * or more contributor license agreements. See the NOTICE file
6 * distributed with this work for additional information
7 * regarding copyright ownership. The ASF licenses this file
8 * to you under the Apache License, Version 2.0 (the
9 * "License"); you may not use this file except in compliance
10 * with the License. You may obtain a copy of the License at
12 * http://www.apache.org/licenses/LICENSE-2.0
14 * Unless required by applicable law or agreed to in writing,
15 * software distributed under the License is distributed on an
16 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
17 * KIND, either express or implied. See the License for the
18 * specific language governing permissions and limitations
20 * ====================================================================
23 #ifndef SVN_PACKED_DATA_H
24 #define SVN_PACKED_DATA_H
26 #include "svn_string.h"
31 #endif /* __cplusplus */
33 /* This API provides Yet Another Serialization Framework.
35 * It is geared towards efficiently encoding collections of structured
36 * binary data (e.g. an array of noderev objects). The basic idea is to
37 * transform them into hierarchies of streams with each stream usually
38 * corresponding to a single attribute in the original data structures.
39 * The user is free model the mapping structure <-> streams mapping as she
42 * With all data inside the same (sub-)stream carrying similar attribute
43 * values, the whole stream lends itself to data compression. Strings /
44 * plain byte sequences will be stored as is. Numbers use a 7b/8b encoding
45 * scheme to eliminate leading zeros. Because values are often dependent
46 * (increasing offsets, roughly similar revision number, etc.), streams
47 * can be configured as storing (hopefully shorter) deltas instead of the
50 * Two stream types are provided: integer and byte streams. While the
51 * first store 64 bit integers only and can be configured to assume
52 * signed and / or deltifyable data, the second will store arbitrary
53 * byte sequences including their length. At the root level, you may
54 * create an arbitrary number of integer and byte streams. Any stream
55 * may have an arbitrary number of sub-streams of the same kind. You
56 * should create the full stream hierarchy before writing any data to it.
58 * As a convenience, when an integer stream has sub-streams, you may write
59 * to the parent stream instead of all sub-streams individually and the
60 * values will be passed down automatically in a round-robin fashion.
61 * Reading from the parent stream is similarly supported.
63 * When all data has been added to the stream, it can be written to an
64 * ordinary svn_stream_t. First, we write a description of the stream
65 * structure (types, sub-streams, sizes and configurations) followed by
66 * zlib compressed stream content. For each top-level stream, all sub-
67 * stream data will be concatenated and then compressed as a single block.
68 * To maximize the effect of this, make sure all data in that stream
69 * hierarchy has a similar value distribution.
71 * Reading data starts with an svn_stream_t and automatically recreates
72 * the stream hierarchies. You only need to extract data from it in the
73 * same order as you wrote it.
75 * Although not enforced programmatically, you may either only write to a
76 * stream hierarchy or only read from it but you cannot do both on the
77 * same data structure.
82 /* We pack / unpack integers en block to minimize calling and setup overhead.
83 * This is the number of integers we put into a buffer before writing them
84 * them to / after reading them from the 7b/8b stream. Under 64 bits, this
85 * value creates a 128 byte data structure (14 + 2 integers, 8 bytes each).
87 #define SVN__PACKED_DATA_BUFFER_SIZE 14
92 /* Opaque type for the root object.
94 typedef struct svn_packed__data_root_t svn_packed__data_root_t;
96 /* Opaque type for byte streams.
98 typedef struct svn_packed__byte_stream_t svn_packed__byte_stream_t;
100 /* Semi-opaque type for integer streams. We expose the unpacked buffer
101 * to allow for replacing svn_packed__add_uint and friends by macros.
103 typedef struct svn_packed__int_stream_t
105 /* pointer to the remainder of the data structure */
108 /* number of value entries in BUFFER */
109 apr_size_t buffer_used;
111 /* unpacked integers (either yet to be packed or pre-fetched from the
112 * packed buffers). Only the first BUFFER_USED entries are valid. */
113 apr_uint64_t buffer[SVN__PACKED_DATA_BUFFER_SIZE];
114 } svn_packed__int_stream_t;
119 /* Return a new serialization root object, allocated in POOL.
121 svn_packed__data_root_t *
122 svn_packed__data_create_root(apr_pool_t *pool);
124 /* Create and return a new top-level integer stream in ROOT. If signed,
125 * negative numbers will be put into that stream, SIGNED_INTS should be
126 * TRUE as a more efficient encoding will be used in that case. Set
127 * DIFF to TRUE if you expect the difference between consecutive numbers
128 * to be much smaller (~100 times) than the actual numbers.
130 svn_packed__int_stream_t *
131 svn_packed__create_int_stream(svn_packed__data_root_t *root,
133 svn_boolean_t signed_ints);
135 /* Create and return a sub-stream to the existing integer stream PARENT.
136 * If signed, negative numbers will be put into that stream, SIGNED_INTS
137 * should be TRUE as a more efficient encoding will be used in that case.
138 * Set DIFF to TRUE if you expect the difference between consecutive numbers
139 * to be much smaller (~100 times) than the actual numbers.
141 svn_packed__int_stream_t *
142 svn_packed__create_int_substream(svn_packed__int_stream_t *parent,
144 svn_boolean_t signed_ints);
146 /* Create and return a new top-level byte sequence stream in ROOT.
148 svn_packed__byte_stream_t *
149 svn_packed__create_bytes_stream(svn_packed__data_root_t *root);
151 /* Write the unsigned integer VALUE to STEAM.
154 svn_packed__add_uint(svn_packed__int_stream_t *stream,
157 /* Write the signed integer VALUE to STEAM.
160 svn_packed__add_int(svn_packed__int_stream_t *stream,
163 /* Write the sequence stating at DATA containing LEN bytes to STEAM.
166 svn_packed__add_bytes(svn_packed__byte_stream_t *stream,
170 /* Write all contents of ROOT (including all sub-streams) to STREAM.
171 * Use SCRATCH_POOL for temporary allocations.
174 svn_packed__data_write(svn_stream_t *stream,
175 svn_packed__data_root_t *root,
176 apr_pool_t *scratch_pool);
181 /* Return the first integer stream in ROOT. Returns NULL in case there
184 svn_packed__int_stream_t *
185 svn_packed__first_int_stream(svn_packed__data_root_t *root);
187 /* Return the first byte sequence stream in ROOT. Returns NULL in case
190 svn_packed__byte_stream_t *
191 svn_packed__first_byte_stream(svn_packed__data_root_t *root);
193 /* Return the next (sibling) integer stream to STREAM. Returns NULL in
194 * case there isn't any.
196 svn_packed__int_stream_t *
197 svn_packed__next_int_stream(svn_packed__int_stream_t *stream);
199 /* Return the next (sibling) byte sequence stream to STREAM. Returns NULL
200 * in case there isn't any.
202 svn_packed__byte_stream_t *
203 svn_packed__next_byte_stream(svn_packed__byte_stream_t *stream);
205 /* Return the first sub-stream of STREAM. Returns NULL in case there
208 svn_packed__int_stream_t *
209 svn_packed__first_int_substream(svn_packed__int_stream_t *stream);
211 /* Return the number of integers left to read from STREAM.
214 svn_packed__int_count(svn_packed__int_stream_t *stream);
216 /* Return the number of bytes left to read from STREAM.
219 svn_packed__byte_count(svn_packed__byte_stream_t *stream);
221 /* Return the next number from STREAM as unsigned integer. Returns 0 when
222 * reading beyond the end of the stream.
225 svn_packed__get_uint(svn_packed__int_stream_t *stream);
227 /* Return the next number from STREAM as signed integer. Returns 0 when
228 * reading beyond the end of the stream.
231 svn_packed__get_int(svn_packed__int_stream_t *stream);
233 /* Return the next byte sequence from STREAM and set *LEN to the length
234 * of that sequence. Sets *LEN to 0 when reading beyond the end of the
238 svn_packed__get_bytes(svn_packed__byte_stream_t *stream,
241 /* Allocate a new packed data root in RESULT_POOL, read its structure and
242 * stream contents from STREAM and return it in *ROOT_P. Use SCRATCH_POOL
243 * for temporary allocations.
246 svn_packed__data_read(svn_packed__data_root_t **root_p,
247 svn_stream_t *stream,
248 apr_pool_t *result_pool,
249 apr_pool_t *scratch_pool);
253 #endif /* __cplusplus */
255 #endif /* SVN_PACKED_DATA_H */