contrib/xz/src/liblzma/common/block_buffer_encoder.c

   1 ///////////////////////////////////////////////////////////////////////////////
   2 //
   3 /// \file       block_buffer_encoder.c
   4 /// \brief      Single-call .xz Block encoder
   5 //
   6 //  Author:     Lasse Collin
   7 //
   8 //  This file has been put into the public domain.
   9 //  You can do whatever you want with this file.
  10 //
  11 ///////////////////////////////////////////////////////////////////////////////
  12
  13 #include "block_buffer_encoder.h"
  14 #include "block_encoder.h"
  15 #include "filter_encoder.h"
  16 #include "lzma2_encoder.h"
  17 #include "check.h"
  18
  19
  20 /// Estimate the maximum size of the Block Header and Check fields for
  21 /// a Block that uses LZMA2 uncompressed chunks. We could use
  22 /// lzma_block_header_size() but this is simpler.
  23 ///
  24 /// Block Header Size + Block Flags + Compressed Size
  25 /// + Uncompressed Size + Filter Flags for LZMA2 + CRC32 + Check
  26 /// and round up to the next multiple of four to take Header Padding
  27 /// into account.
  28 #define HEADERS_BOUND ((1 + 1 + 2 * LZMA_VLI_BYTES_MAX + 3 + 4 \
  29                 + LZMA_CHECK_SIZE_MAX + 3) & ~3)
  30
  31
  32 static uint64_t
  33 lzma2_bound(uint64_t uncompressed_size)
  34 {
  35         // Prevent integer overflow in overhead calculation.
  36         if (uncompressed_size > COMPRESSED_SIZE_MAX)
  37                 return 0;
  38
  39         // Calculate the exact overhead of the LZMA2 headers: Round
  40         // uncompressed_size up to the next multiple of LZMA2_CHUNK_MAX,
  41         // multiply by the size of per-chunk header, and add one byte for
  42         // the end marker.
  43         const uint64_t overhead = ((uncompressed_size + LZMA2_CHUNK_MAX - 1)
  44                                 / LZMA2_CHUNK_MAX)
  45                         * LZMA2_HEADER_UNCOMPRESSED + 1;
  46
  47         // Catch the possible integer overflow.
  48         if (COMPRESSED_SIZE_MAX - overhead < uncompressed_size)
  49                 return 0;
  50
  51         return uncompressed_size + overhead;
  52 }
  53
  54
  55 extern uint64_t
  56 lzma_block_buffer_bound64(uint64_t uncompressed_size)
  57 {
  58         // If the data doesn't compress, we always use uncompressed
  59         // LZMA2 chunks.
  60         uint64_t lzma2_size = lzma2_bound(uncompressed_size);
  61         if (lzma2_size == 0)
  62                 return 0;
  63
  64         // Take Block Padding into account.
  65         lzma2_size = (lzma2_size + 3) & ~UINT64_C(3);
  66
  67         // No risk of integer overflow because lzma2_bound() already takes
  68         // into account the size of the headers in the Block.
  69         return HEADERS_BOUND + lzma2_size;
  70 }
  71
  72
  73 extern LZMA_API(size_t)
  74 lzma_block_buffer_bound(size_t uncompressed_size)
  75 {
  76         uint64_t ret = lzma_block_buffer_bound64(uncompressed_size);
  77
  78 #if SIZE_MAX < UINT64_MAX
  79         // Catch the possible integer overflow on 32-bit systems.
  80         if (ret > SIZE_MAX)
  81                 return 0;
  82 #endif
  83
  84         return ret;
  85 }
  86
  87
  88 static lzma_ret
  89 block_encode_uncompressed(lzma_block *block, const uint8_t *in, size_t in_size,
  90                 uint8_t *out, size_t *out_pos, size_t out_size)
  91 {
  92         // Use LZMA2 uncompressed chunks. We wouldn't need a dictionary at
  93         // all, but LZMA2 always requires a dictionary, so use the minimum
  94         // value to minimize memory usage of the decoder.
  95         lzma_options_lzma lzma2 = {
  96                 .dict_size = LZMA_DICT_SIZE_MIN,
  97         };
  98
  99         lzma_filter filters[2];
 100         filters[0].id = LZMA_FILTER_LZMA2;
 101         filters[0].options = &lzma2;
 102         filters[1].id = LZMA_VLI_UNKNOWN;
 103
 104         // Set the above filter options to *block temporarily so that we can
 105         // encode the Block Header.
 106         lzma_filter *filters_orig = block->filters;
 107         block->filters = filters;
 108
 109         if (lzma_block_header_size(block) != LZMA_OK) {
 110                 block->filters = filters_orig;
 111                 return LZMA_PROG_ERROR;
 112         }
 113
 114         // Check that there's enough output space. The caller has already
 115         // set block->compressed_size to what lzma2_bound() has returned,
 116         // so we can reuse that value. We know that compressed_size is a
 117         // known valid VLI and header_size is a small value so their sum
 118         // will never overflow.
 119         assert(block->compressed_size == lzma2_bound(in_size));
 120         if (out_size - *out_pos
 121                         < block->header_size + block->compressed_size) {
 122                 block->filters = filters_orig;
 123                 return LZMA_BUF_ERROR;
 124         }
 125
 126         if (lzma_block_header_encode(block, out + *out_pos) != LZMA_OK) {
 127                 block->filters = filters_orig;
 128                 return LZMA_PROG_ERROR;
 129         }
 130
 131         block->filters = filters_orig;
 132         *out_pos += block->header_size;
 133
 134         // Encode the data using LZMA2 uncompressed chunks.
 135         size_t in_pos = 0;
 136         uint8_t control = 0x01; // Dictionary reset
 137
 138         while (in_pos < in_size) {
 139                 // Control byte: Indicate uncompressed chunk, of which
 140                 // the first resets the dictionary.
 141                 out[(*out_pos)++] = control;
 142                 control = 0x02; // No dictionary reset
 143
 144                 // Size of the uncompressed chunk
 145                 const size_t copy_size
 146                                 = my_min(in_size - in_pos, LZMA2_CHUNK_MAX);
 147                 out[(*out_pos)++] = (copy_size - 1) >> 8;
 148                 out[(*out_pos)++] = (copy_size - 1) & 0xFF;
 149
 150                 // The actual data
 151                 assert(*out_pos + copy_size <= out_size);
 152                 memcpy(out + *out_pos, in + in_pos, copy_size);
 153
 154                 in_pos += copy_size;
 155                 *out_pos += copy_size;
 156         }
 157
 158         // End marker
 159         out[(*out_pos)++] = 0x00;
 160         assert(*out_pos <= out_size);
 161
 162         return LZMA_OK;
 163 }
 164
 165
 166 static lzma_ret
 167 block_encode_normal(lzma_block *block, const lzma_allocator *allocator,
 168                 const uint8_t *in, size_t in_size,
 169                 uint8_t *out, size_t *out_pos, size_t out_size)
 170 {
 171         // Find out the size of the Block Header.
 172         return_if_error(lzma_block_header_size(block));
 173
 174         // Reserve space for the Block Header and skip it for now.
 175         if (out_size - *out_pos <= block->header_size)
 176                 return LZMA_BUF_ERROR;
 177
 178         const size_t out_start = *out_pos;
 179         *out_pos += block->header_size;
 180
 181         // Limit out_size so that we stop encoding if the output would grow
 182         // bigger than what uncompressed Block would be.
 183         if (out_size - *out_pos > block->compressed_size)
 184                 out_size = *out_pos + block->compressed_size;
 185
 186         // TODO: In many common cases this could be optimized to use
 187         // significantly less memory.
 188         lzma_next_coder raw_encoder = LZMA_NEXT_CODER_INIT;
 189         lzma_ret ret = lzma_raw_encoder_init(
 190                         &raw_encoder, allocator, block->filters);
 191
 192         if (ret == LZMA_OK) {
 193                 size_t in_pos = 0;
 194                 ret = raw_encoder.code(raw_encoder.coder, allocator,
 195                                 in, &in_pos, in_size, out, out_pos, out_size,
 196                                 LZMA_FINISH);
 197         }
 198
 199         // NOTE: This needs to be run even if lzma_raw_encoder_init() failed.
 200         lzma_next_end(&raw_encoder, allocator);
 201
 202         if (ret == LZMA_STREAM_END) {
 203                 // Compression was successful. Write the Block Header.
 204                 block->compressed_size
 205                                 = *out_pos - (out_start + block->header_size);
 206                 ret = lzma_block_header_encode(block, out + out_start);
 207                 if (ret != LZMA_OK)
 208                         ret = LZMA_PROG_ERROR;
 209
 210         } else if (ret == LZMA_OK) {
 211                 // Output buffer became full.
 212                 ret = LZMA_BUF_ERROR;
 213         }
 214
 215         // Reset *out_pos if something went wrong.
 216         if (ret != LZMA_OK)
 217                 *out_pos = out_start;
 218
 219         return ret;
 220 }
 221
 222
 223 static lzma_ret
 224 block_buffer_encode(lzma_block *block, const lzma_allocator *allocator,
 225                 const uint8_t *in, size_t in_size,
 226                 uint8_t *out, size_t *out_pos, size_t out_size,
 227                 bool try_to_compress)
 228 {
 229         // Validate the arguments.
 230         if (block == NULL || (in == NULL && in_size != 0) || out == NULL
 231                         || out_pos == NULL || *out_pos > out_size)
 232                 return LZMA_PROG_ERROR;
 233
 234         // The contents of the structure may depend on the version so
 235         // check the version before validating the contents of *block.
 236         if (block->version > 1)
 237                 return LZMA_OPTIONS_ERROR;
 238
 239         if ((unsigned int)(block->check) > LZMA_CHECK_ID_MAX
 240                         || (try_to_compress && block->filters == NULL))
 241                 return LZMA_PROG_ERROR;
 242
 243         if (!lzma_check_is_supported(block->check))
 244                 return LZMA_UNSUPPORTED_CHECK;
 245
 246         // Size of a Block has to be a multiple of four, so limit the size
 247         // here already. This way we don't need to check it again when adding
 248         // Block Padding.
 249         out_size -= (out_size - *out_pos) & 3;
 250
 251         // Get the size of the Check field.
 252         const size_t check_size = lzma_check_size(block->check);
 253         assert(check_size != UINT32_MAX);
 254
 255         // Reserve space for the Check field.
 256         if (out_size - *out_pos <= check_size)
 257                 return LZMA_BUF_ERROR;
 258
 259         out_size -= check_size;
 260
 261         // Initialize block->uncompressed_size and calculate the worst-case
 262         // value for block->compressed_size.
 263         block->uncompressed_size = in_size;
 264         block->compressed_size = lzma2_bound(in_size);
 265         if (block->compressed_size == 0)
 266                 return LZMA_DATA_ERROR;
 267
 268         // Do the actual compression.
 269         lzma_ret ret = LZMA_BUF_ERROR;
 270         if (try_to_compress)
 271                 ret = block_encode_normal(block, allocator,
 272                                 in, in_size, out, out_pos, out_size);
 273
 274         if (ret != LZMA_OK) {
 275                 // If the error was something else than output buffer
 276                 // becoming full, return the error now.
 277                 if (ret != LZMA_BUF_ERROR)
 278                         return ret;
 279
 280                 // The data was uncompressible (at least with the options
 281                 // given to us) or the output buffer was too small. Use the
 282                 // uncompressed chunks of LZMA2 to wrap the data into a valid
 283                 // Block. If we haven't been given enough output space, even
 284                 // this may fail.
 285                 return_if_error(block_encode_uncompressed(block, in, in_size,
 286                                 out, out_pos, out_size));
 287         }
 288
 289         assert(*out_pos <= out_size);
 290
 291         // Block Padding. No buffer overflow here, because we already adjusted
 292         // out_size so that (out_size - out_start) is a multiple of four.
 293         // Thus, if the buffer is full, the loop body can never run.
 294         for (size_t i = (size_t)(block->compressed_size); i & 3; ++i) {
 295                 assert(*out_pos < out_size);
 296                 out[(*out_pos)++] = 0x00;
 297         }
 298
 299         // If there's no Check field, we are done now.
 300         if (check_size > 0) {
 301                 // Calculate the integrity check. We reserved space for
 302                 // the Check field earlier so we don't need to check for
 303                 // available output space here.
 304                 lzma_check_state check;
 305                 lzma_check_init(&check, block->check);
 306                 lzma_check_update(&check, block->check, in, in_size);
 307                 lzma_check_finish(&check, block->check);
 308
 309                 memcpy(block->raw_check, check.buffer.u8, check_size);
 310                 memcpy(out + *out_pos, check.buffer.u8, check_size);
 311                 *out_pos += check_size;
 312         }
 313
 314         return LZMA_OK;
 315 }
 316
 317
 318 extern LZMA_API(lzma_ret)
 319 lzma_block_buffer_encode(lzma_block *block, const lzma_allocator *allocator,
 320                 const uint8_t *in, size_t in_size,
 321                 uint8_t *out, size_t *out_pos, size_t out_size)
 322 {
 323         return block_buffer_encode(block, allocator,
 324                         in, in_size, out, out_pos, out_size, true);
 325 }
 326
 327
 328 extern LZMA_API(lzma_ret)
 329 lzma_block_uncomp_encode(lzma_block *block,
 330                 const uint8_t *in, size_t in_size,
 331                 uint8_t *out, size_t *out_pos, size_t out_size)
 332 {
 333         // It won't allocate any memory from heap so no need
 334         // for lzma_allocator.
 335         return block_buffer_encode(block, NULL,
 336                         in, in_size, out, out_pos, out_size, false);
 337 }