1 ///////////////////////////////////////////////////////////////////////////////
3 /// \file subblock_decoder.c
4 /// \brief Decoder of the Subblock filter
6 // Author: Lasse Collin
8 // This file has been put into the public domain.
9 // You can do whatever you want with this file.
11 ///////////////////////////////////////////////////////////////////////////////
13 #include "subblock_decoder.h"
14 #include "subblock_decoder_helper.h"
15 #include "filter_decoder.h"
18 /// Maximum number of consecutive Subblocks with Subblock Type Padding
19 #define PADDING_MAX 31
26 // These require that there is at least one input
38 SEQ_SIZE_3, // This must be right before SEQ_DATA.
40 // These don't require any input to be available.
46 /// Number of bytes left in the current Subblock Data field.
49 /// Number of consecutive Subblocks with Subblock Type Padding
52 /// True when .next.code() has returned LZMA_STREAM_END.
55 /// True when the Subblock decoder has detected End of Payload Marker.
56 /// This may become true before next_finished becomes true.
59 /// True if Subfilters are allowed.
60 bool allow_subfilters;
62 /// Indicates if at least one byte of decoded output has been
63 /// produced after enabling Subfilter.
64 bool got_output_with_subfilter;
66 /// Possible subfilter
67 lzma_next_coder subfilter;
69 /// Filter Flags decoder is needed to parse the ID and Properties
71 lzma_next_coder filter_flags_decoder;
73 /// The filter_flags_decoder stores its results here.
74 lzma_filter filter_flags;
76 /// Options for the Subblock decoder helper. This is used to tell
77 /// the helper when it should return LZMA_STREAM_END to the subfilter.
78 lzma_options_subblock_helper helper;
81 /// How many times buffer should be repeated
84 /// Size of the buffer
87 /// Position in the buffer
90 /// Buffer to hold the data to be repeated
91 uint8_t buffer[LZMA_SUBBLOCK_RLE_MAX];
94 /// Temporary buffer needed when the Subblock filter is not the last
95 /// filter in the chain. The output of the next filter is first
96 /// decoded into buffer[], which is then used as input for the actual
101 uint8_t buffer[LZMA_BUFFER_SIZE];
106 /// Values of valid Subblock Flags
117 /// Calls the subfilter and updates coder->uncompressed_size.
119 subfilter_decode(lzma_coder *coder, lzma_allocator *allocator,
120 const uint8_t *in, size_t *in_pos,
121 size_t in_size, uint8_t *restrict out,
122 size_t *restrict out_pos, size_t out_size, lzma_action action)
124 assert(coder->subfilter.code != NULL);
126 // Call the subfilter.
127 const lzma_ret ret = coder->subfilter.code(
128 coder->subfilter.coder, allocator,
129 in, in_pos, in_size, out, out_pos, out_size, action);
136 decode_buffer(lzma_coder *coder, lzma_allocator *allocator,
137 const uint8_t *in, size_t *in_pos,
138 size_t in_size, uint8_t *restrict out,
139 size_t *restrict out_pos, size_t out_size, lzma_action action)
141 while (*out_pos < out_size && (*in_pos < in_size
142 || coder->sequence >= SEQ_DATA))
143 switch (coder->sequence) {
145 // Do the correct action depending on the Subblock Type.
146 switch (in[*in_pos] >> 4) {
148 // Only check that reserved bits are zero.
149 if (++coder->padding > PADDING_MAX
150 || in[*in_pos] & 0x0F)
151 return LZMA_DATA_ERROR;
156 // There must be no Padding before EOPM.
157 if (coder->padding != 0)
158 return LZMA_DATA_ERROR;
160 // Check that reserved bits are zero.
161 if (in[*in_pos] & 0x0F)
162 return LZMA_DATA_ERROR;
164 // There must be no Subfilter enabled.
165 if (coder->subfilter.code != NULL)
166 return LZMA_DATA_ERROR;
169 return LZMA_STREAM_END;
172 // First four bits of the Subblock Data size.
173 coder->size = in[*in_pos] & 0x0F;
175 coder->got_output_with_subfilter = true;
176 coder->sequence = SEQ_SIZE_1;
180 // First four bits of the Repeat Count. We use
181 // coder->size as a temporary place for it.
182 coder->size = in[*in_pos] & 0x0F;
184 coder->got_output_with_subfilter = true;
185 coder->sequence = SEQ_REPEAT_COUNT_1;
188 case FLAG_SET_SUBFILTER: {
189 if (coder->padding != 0 || (in[*in_pos] & 0x0F)
190 || coder->subfilter.code != NULL
191 || !coder->allow_subfilters)
192 return LZMA_DATA_ERROR;
194 assert(coder->filter_flags.options == NULL);
196 // return_if_error(lzma_filter_flags_decoder_init(
197 // &coder->filter_flags_decoder,
198 // allocator, &coder->filter_flags));
200 coder->got_output_with_subfilter = false;
203 coder->sequence = SEQ_FILTER_FLAGS;
207 case FLAG_END_SUBFILTER: {
208 if (coder->padding != 0 || (in[*in_pos] & 0x0F)
209 || coder->subfilter.code == NULL
210 || !coder->got_output_with_subfilter)
211 return LZMA_DATA_ERROR;
213 // Tell the helper filter to indicate End of Input
215 coder->helper.end_was_reached = true;
218 const lzma_ret ret = subfilter_decode(coder, allocator,
219 NULL, &dummy, 0, out, out_pos,out_size,
222 // If we didn't reach the end of the subfilter's output
223 // yet, return to the application. On the next call we
224 // will get to this same switch-case again, because we
225 // haven't updated *in_pos yet.
226 if (ret != LZMA_STREAM_END)
229 // Free Subfilter's memory. This is a bit debatable,
230 // since we could avoid some malloc()/free() calls
231 // if the same Subfilter gets used soon again. But
232 // if Subfilter isn't used again, we could leave
233 // a memory-hogging filter dangling until someone
234 // frees Subblock filter itself.
235 lzma_next_end(&coder->subfilter, allocator);
237 // Free memory used for subfilter options. This is
238 // safe, because we don't support any Subfilter that
239 // would allow pointers in the options structure.
240 lzma_free(coder->filter_flags.options, allocator);
241 coder->filter_flags.options = NULL;
249 return LZMA_DATA_ERROR;
255 case SEQ_FILTER_FLAGS: {
256 const lzma_ret ret = coder->filter_flags_decoder.code(
257 coder->filter_flags_decoder.coder, allocator,
258 in, in_pos, in_size, NULL, NULL, 0, LZMA_RUN);
259 if (ret != LZMA_STREAM_END)
260 return ret == LZMA_OPTIONS_ERROR
261 ? LZMA_DATA_ERROR : ret;
263 // Don't free the filter_flags_decoder. It doesn't take much
264 // memory and we may need it again.
266 // Initialize the Subfilter. Subblock and Copy filters are
268 if (coder->filter_flags.id == LZMA_FILTER_SUBBLOCK)
269 return LZMA_DATA_ERROR;
271 coder->helper.end_was_reached = false;
273 lzma_filter filters[3] = {
275 .id = coder->filter_flags.id,
276 .options = coder->filter_flags.options,
278 .id = LZMA_FILTER_SUBBLOCK_HELPER,
279 .options = &coder->helper,
281 .id = LZMA_VLI_UNKNOWN,
286 // Optimization: We know that LZMA uses End of Payload Marker
287 // (not End of Input), so we can omit the helper filter.
288 if (filters[0].id == LZMA_FILTER_LZMA1)
289 filters[1].id = LZMA_VLI_UNKNOWN;
291 return_if_error(lzma_raw_decoder_init(
292 &coder->subfilter, allocator, filters));
294 coder->sequence = SEQ_FLAGS;
299 // We are in the beginning of a Subblock. The next Subblock
300 // whose type is not Padding, must indicate end of Subfilter.
301 if (in[*in_pos] == (FLAG_PADDING << 4)) {
306 if (in[*in_pos] != (FLAG_END_SUBFILTER << 4))
307 return LZMA_DATA_ERROR;
309 coder->sequence = SEQ_FLAGS;
312 case SEQ_REPEAT_COUNT_1:
314 // We use the same code to parse
315 // - the Size (28 bits) in Subblocks of type Data; and
316 // - the Repeat count (28 bits) in Subblocks of type
318 coder->size |= (size_t)(in[*in_pos]) << 4;
323 case SEQ_REPEAT_COUNT_2:
325 coder->size |= (size_t)(in[*in_pos]) << 12;
330 case SEQ_REPEAT_COUNT_3:
332 coder->size |= (size_t)(in[*in_pos]) << 20;
335 // The real value is the stored value plus one.
338 // This moves to SEQ_REPEAT_SIZE or SEQ_DATA. That's why
339 // SEQ_DATA must be right after SEQ_SIZE_3 in coder->sequence.
343 case SEQ_REPEAT_SIZE:
344 // Move the Repeat Count to the correct variable and parse
345 // the Size of the Data to be repeated.
346 coder->repeat.count = coder->size;
347 coder->repeat.size = (size_t)(in[*in_pos]) + 1;
348 coder->repeat.pos = 0;
350 // The size of the Data field must be bigger than the number
351 // of Padding bytes before this Subblock.
352 if (coder->repeat.size <= coder->padding)
353 return LZMA_DATA_ERROR;
357 coder->sequence = SEQ_REPEAT_READ_DATA;
360 case SEQ_REPEAT_READ_DATA: {
361 // Fill coder->repeat.buffer[].
362 const size_t in_avail = in_size - *in_pos;
363 const size_t out_avail
364 = coder->repeat.size - coder->repeat.pos;
365 const size_t copy_size = MIN(in_avail, out_avail);
367 memcpy(coder->repeat.buffer + coder->repeat.pos,
368 in + *in_pos, copy_size);
369 *in_pos += copy_size;
370 coder->repeat.pos += copy_size;
372 if (coder->repeat.pos == coder->repeat.size) {
373 coder->repeat.pos = 0;
375 if (coder->repeat.size == 1
376 && coder->subfilter.code == NULL)
377 coder->sequence = SEQ_REPEAT_FAST;
379 coder->sequence = SEQ_REPEAT_NORMAL;
386 // The size of the Data field must be bigger than the number
387 // of Padding bytes before this Subblock.
388 assert(coder->size > 0);
389 if (coder->size <= coder->padding)
390 return LZMA_DATA_ERROR;
394 // Limit the amount of input to match the available
395 // Subblock Data size.
397 if (in_size - *in_pos > coder->size)
398 in_limit = *in_pos + coder->size;
402 if (coder->subfilter.code == NULL) {
403 const size_t copy_size = lzma_bufcpy(
404 in, in_pos, in_limit,
405 out, out_pos, out_size);
407 coder->size -= copy_size;
409 const size_t in_start = *in_pos;
410 const lzma_ret ret = subfilter_decode(
412 in, in_pos, in_limit,
413 out, out_pos, out_size,
416 // Update the number of unprocessed bytes left in
417 // this Subblock. This assert() is true because
418 // in_limit prevents *in_pos getting too big.
419 assert(*in_pos - in_start <= coder->size);
420 coder->size -= *in_pos - in_start;
422 if (ret == LZMA_STREAM_END) {
423 // End of Subfilter can occur only at
424 // a Subblock boundary.
425 if (coder->size != 0)
426 return LZMA_DATA_ERROR;
428 // We need a Subblock with Unset
429 // Subfilter before more data.
430 coder->sequence = SEQ_FILTER_END;
438 // If we couldn't process the whole Subblock Data yet, return.
442 coder->sequence = SEQ_FLAGS;
446 case SEQ_REPEAT_FAST: {
447 // Optimization for cases when there is only one byte to
448 // repeat and no Subfilter.
449 const size_t out_avail = out_size - *out_pos;
450 const size_t copy_size = MIN(coder->repeat.count, out_avail);
452 memset(out + *out_pos, coder->repeat.buffer[0], copy_size);
454 *out_pos += copy_size;
455 coder->repeat.count -= copy_size;
457 if (coder->repeat.count != 0)
460 coder->sequence = SEQ_FLAGS;
464 case SEQ_REPEAT_NORMAL:
466 // Cycle the repeat buffer if needed.
467 if (coder->repeat.pos == coder->repeat.size) {
468 if (--coder->repeat.count == 0) {
469 coder->sequence = SEQ_FLAGS;
473 coder->repeat.pos = 0;
476 if (coder->subfilter.code == NULL) {
477 lzma_bufcpy(coder->repeat.buffer,
480 out, out_pos, out_size);
482 const lzma_ret ret = subfilter_decode(
484 coder->repeat.buffer,
487 out, out_pos, out_size,
490 if (ret == LZMA_STREAM_END) {
491 // End of Subfilter can occur only at
492 // a Subblock boundary.
493 if (coder->repeat.pos
494 != coder->repeat.size
497 return LZMA_DATA_ERROR;
499 // We need a Subblock with Unset
500 // Subfilter before more data.
501 coder->sequence = SEQ_FILTER_END;
504 } else if (ret != LZMA_OK) {
508 } while (*out_pos < out_size);
513 return LZMA_PROG_ERROR;
521 subblock_decode(lzma_coder *coder, lzma_allocator *allocator,
522 const uint8_t *restrict in, size_t *restrict in_pos,
523 size_t in_size, uint8_t *restrict out,
524 size_t *restrict out_pos, size_t out_size, lzma_action action)
526 if (coder->next.code == NULL)
527 return decode_buffer(coder, allocator, in, in_pos, in_size,
528 out, out_pos, out_size, action);
530 while (*out_pos < out_size) {
531 if (!coder->next_finished
532 && coder->temp.pos == coder->temp.size) {
534 coder->temp.size = 0;
536 const lzma_ret ret = coder->next.code(
538 allocator, in, in_pos, in_size,
539 coder->temp.buffer, &coder->temp.size,
540 LZMA_BUFFER_SIZE, action);
542 if (ret == LZMA_STREAM_END)
543 coder->next_finished = true;
544 else if (coder->temp.size == 0 || ret != LZMA_OK)
548 if (coder->this_finished) {
549 if (coder->temp.pos != coder->temp.size)
550 return LZMA_DATA_ERROR;
552 if (coder->next_finished)
553 return LZMA_STREAM_END;
558 const lzma_ret ret = decode_buffer(coder, allocator,
559 coder->temp.buffer, &coder->temp.pos,
561 out, out_pos, out_size, action);
563 if (ret == LZMA_STREAM_END)
564 // The next coder in the chain hasn't finished
565 // yet. If the input data is valid, there
566 // must be no more output coming, but the
567 // next coder may still need a litle more
568 // input to detect End of Payload Marker.
569 coder->this_finished = true;
570 else if (ret != LZMA_OK)
572 else if (coder->next_finished && *out_pos < out_size)
573 return LZMA_DATA_ERROR;
581 subblock_decoder_end(lzma_coder *coder, lzma_allocator *allocator)
583 lzma_next_end(&coder->next, allocator);
584 lzma_next_end(&coder->subfilter, allocator);
585 lzma_next_end(&coder->filter_flags_decoder, allocator);
586 lzma_free(coder->filter_flags.options, allocator);
587 lzma_free(coder, allocator);
593 lzma_subblock_decoder_init(lzma_next_coder *next, lzma_allocator *allocator,
594 const lzma_filter_info *filters)
596 if (next->coder == NULL) {
597 next->coder = lzma_alloc(sizeof(lzma_coder), allocator);
598 if (next->coder == NULL)
599 return LZMA_MEM_ERROR;
601 next->code = &subblock_decode;
602 next->end = &subblock_decoder_end;
604 next->coder->next = LZMA_NEXT_CODER_INIT;
605 next->coder->subfilter = LZMA_NEXT_CODER_INIT;
606 next->coder->filter_flags_decoder = LZMA_NEXT_CODER_INIT;
609 lzma_next_end(&next->coder->subfilter, allocator);
610 lzma_free(next->coder->filter_flags.options, allocator);
613 next->coder->filter_flags.options = NULL;
615 next->coder->sequence = SEQ_FLAGS;
616 next->coder->padding = 0;
617 next->coder->next_finished = false;
618 next->coder->this_finished = false;
619 next->coder->temp.pos = 0;
620 next->coder->temp.size = 0;
622 if (filters[0].options != NULL)
623 next->coder->allow_subfilters = ((lzma_options_subblock *)(
624 filters[0].options))->allow_subfilters;
626 next->coder->allow_subfilters = false;
628 return lzma_next_filter_init(
629 &next->coder->next, allocator, filters + 1);