]> CyberLeo.Net >> Repos - FreeBSD/releng/8.1.git/blob - contrib/xz/src/liblzma/subblock/subblock_decoder.c
Copy stable/8 to releng/8.1 in preparation for 8.1-RC1.
[FreeBSD/releng/8.1.git] / contrib / xz / src / liblzma / subblock / subblock_decoder.c
1 ///////////////////////////////////////////////////////////////////////////////
2 //
3 /// \file       subblock_decoder.c
4 /// \brief      Decoder of the Subblock filter
5 //
6 //  Author:     Lasse Collin
7 //
8 //  This file has been put into the public domain.
9 //  You can do whatever you want with this file.
10 //
11 ///////////////////////////////////////////////////////////////////////////////
12
13 #include "subblock_decoder.h"
14 #include "subblock_decoder_helper.h"
15 #include "filter_decoder.h"
16
17
18 /// Maximum number of consecutive Subblocks with Subblock Type Padding
19 #define PADDING_MAX 31
20
21
22 struct lzma_coder_s {
23         lzma_next_coder next;
24
25         enum {
26                 // These require that there is at least one input
27                 // byte available.
28                 SEQ_FLAGS,
29                 SEQ_FILTER_FLAGS,
30                 SEQ_FILTER_END,
31                 SEQ_REPEAT_COUNT_1,
32                 SEQ_REPEAT_COUNT_2,
33                 SEQ_REPEAT_COUNT_3,
34                 SEQ_REPEAT_SIZE,
35                 SEQ_REPEAT_READ_DATA,
36                 SEQ_SIZE_1,
37                 SEQ_SIZE_2,
38                 SEQ_SIZE_3, // This must be right before SEQ_DATA.
39
40                 // These don't require any input to be available.
41                 SEQ_DATA,
42                 SEQ_REPEAT_FAST,
43                 SEQ_REPEAT_NORMAL,
44         } sequence;
45
46         /// Number of bytes left in the current Subblock Data field.
47         size_t size;
48
49         /// Number of consecutive Subblocks with Subblock Type Padding
50         uint32_t padding;
51
52         /// True when .next.code() has returned LZMA_STREAM_END.
53         bool next_finished;
54
55         /// True when the Subblock decoder has detected End of Payload Marker.
56         /// This may become true before next_finished becomes true.
57         bool this_finished;
58
59         /// True if Subfilters are allowed.
60         bool allow_subfilters;
61
62         /// Indicates if at least one byte of decoded output has been
63         /// produced after enabling Subfilter.
64         bool got_output_with_subfilter;
65
66         /// Possible subfilter
67         lzma_next_coder subfilter;
68
69         /// Filter Flags decoder is needed to parse the ID and Properties
70         /// of the subfilter.
71         lzma_next_coder filter_flags_decoder;
72
73         /// The filter_flags_decoder stores its results here.
74         lzma_filter filter_flags;
75
76         /// Options for the Subblock decoder helper. This is used to tell
77         /// the helper when it should return LZMA_STREAM_END to the subfilter.
78         lzma_options_subblock_helper helper;
79
80         struct {
81                 /// How many times buffer should be repeated
82                 size_t count;
83
84                 /// Size of the buffer
85                 size_t size;
86
87                 /// Position in the buffer
88                 size_t pos;
89
90                 /// Buffer to hold the data to be repeated
91                 uint8_t buffer[LZMA_SUBBLOCK_RLE_MAX];
92         } repeat;
93
94         /// Temporary buffer needed when the Subblock filter is not the last
95         /// filter in the chain. The output of the next filter is first
96         /// decoded into buffer[], which is then used as input for the actual
97         /// Subblock decoder.
98         struct {
99                 size_t pos;
100                 size_t size;
101                 uint8_t buffer[LZMA_BUFFER_SIZE];
102         } temp;
103 };
104
105
106 /// Values of valid Subblock Flags
107 enum {
108         FLAG_PADDING,
109         FLAG_EOPM,
110         FLAG_DATA,
111         FLAG_REPEAT,
112         FLAG_SET_SUBFILTER,
113         FLAG_END_SUBFILTER,
114 };
115
116
117 /// Calls the subfilter and updates coder->uncompressed_size.
118 static lzma_ret
119 subfilter_decode(lzma_coder *coder, lzma_allocator *allocator,
120                 const uint8_t *in, size_t *in_pos,
121                 size_t in_size, uint8_t *restrict out,
122                 size_t *restrict out_pos, size_t out_size, lzma_action action)
123 {
124         assert(coder->subfilter.code != NULL);
125
126         // Call the subfilter.
127         const lzma_ret ret = coder->subfilter.code(
128                         coder->subfilter.coder, allocator,
129                         in, in_pos, in_size, out, out_pos, out_size, action);
130
131         return ret;
132 }
133
134
135 static lzma_ret
136 decode_buffer(lzma_coder *coder, lzma_allocator *allocator,
137                 const uint8_t *in, size_t *in_pos,
138                 size_t in_size, uint8_t *restrict out,
139                 size_t *restrict out_pos, size_t out_size, lzma_action action)
140 {
141         while (*out_pos < out_size && (*in_pos < in_size
142                         || coder->sequence >= SEQ_DATA))
143         switch (coder->sequence) {
144         case SEQ_FLAGS: {
145                 // Do the correct action depending on the Subblock Type.
146                 switch (in[*in_pos] >> 4) {
147                 case FLAG_PADDING:
148                         // Only check that reserved bits are zero.
149                         if (++coder->padding > PADDING_MAX
150                                         || in[*in_pos] & 0x0F)
151                                 return LZMA_DATA_ERROR;
152                         ++*in_pos;
153                         break;
154
155                 case FLAG_EOPM:
156                         // There must be no Padding before EOPM.
157                         if (coder->padding != 0)
158                                 return LZMA_DATA_ERROR;
159
160                         // Check that reserved bits are zero.
161                         if (in[*in_pos] & 0x0F)
162                                 return LZMA_DATA_ERROR;
163
164                         // There must be no Subfilter enabled.
165                         if (coder->subfilter.code != NULL)
166                                 return LZMA_DATA_ERROR;
167
168                         ++*in_pos;
169                         return LZMA_STREAM_END;
170
171                 case FLAG_DATA:
172                         // First four bits of the Subblock Data size.
173                         coder->size = in[*in_pos] & 0x0F;
174                         ++*in_pos;
175                         coder->got_output_with_subfilter = true;
176                         coder->sequence = SEQ_SIZE_1;
177                         break;
178
179                 case FLAG_REPEAT:
180                         // First four bits of the Repeat Count. We use
181                         // coder->size as a temporary place for it.
182                         coder->size = in[*in_pos] & 0x0F;
183                         ++*in_pos;
184                         coder->got_output_with_subfilter = true;
185                         coder->sequence = SEQ_REPEAT_COUNT_1;
186                         break;
187
188                 case FLAG_SET_SUBFILTER: {
189                         if (coder->padding != 0 || (in[*in_pos] & 0x0F)
190                                         || coder->subfilter.code != NULL
191                                         || !coder->allow_subfilters)
192                                 return LZMA_DATA_ERROR;
193
194                         assert(coder->filter_flags.options == NULL);
195                         abort();
196 //                      return_if_error(lzma_filter_flags_decoder_init(
197 //                                      &coder->filter_flags_decoder,
198 //                                      allocator, &coder->filter_flags));
199
200                         coder->got_output_with_subfilter = false;
201
202                         ++*in_pos;
203                         coder->sequence = SEQ_FILTER_FLAGS;
204                         break;
205                 }
206
207                 case FLAG_END_SUBFILTER: {
208                         if (coder->padding != 0 || (in[*in_pos] & 0x0F)
209                                         || coder->subfilter.code == NULL
210                                         || !coder->got_output_with_subfilter)
211                                 return LZMA_DATA_ERROR;
212
213                         // Tell the helper filter to indicate End of Input
214                         // to our subfilter.
215                         coder->helper.end_was_reached = true;
216
217                         size_t dummy = 0;
218                         const lzma_ret ret = subfilter_decode(coder, allocator,
219                                         NULL, &dummy, 0, out, out_pos,out_size,
220                                         action);
221
222                         // If we didn't reach the end of the subfilter's output
223                         // yet, return to the application. On the next call we
224                         // will get to this same switch-case again, because we
225                         // haven't updated *in_pos yet.
226                         if (ret != LZMA_STREAM_END)
227                                 return ret;
228
229                         // Free Subfilter's memory. This is a bit debatable,
230                         // since we could avoid some malloc()/free() calls
231                         // if the same Subfilter gets used soon again. But
232                         // if Subfilter isn't used again, we could leave
233                         // a memory-hogging filter dangling until someone
234                         // frees Subblock filter itself.
235                         lzma_next_end(&coder->subfilter, allocator);
236
237                         // Free memory used for subfilter options. This is
238                         // safe, because we don't support any Subfilter that
239                         // would allow pointers in the options structure.
240                         lzma_free(coder->filter_flags.options, allocator);
241                         coder->filter_flags.options = NULL;
242
243                         ++*in_pos;
244
245                         break;
246                 }
247
248                 default:
249                         return LZMA_DATA_ERROR;
250                 }
251
252                 break;
253         }
254
255         case SEQ_FILTER_FLAGS: {
256                 const lzma_ret ret = coder->filter_flags_decoder.code(
257                                 coder->filter_flags_decoder.coder, allocator,
258                                 in, in_pos, in_size, NULL, NULL, 0, LZMA_RUN);
259                 if (ret != LZMA_STREAM_END)
260                         return ret == LZMA_OPTIONS_ERROR
261                                         ? LZMA_DATA_ERROR : ret;
262
263                 // Don't free the filter_flags_decoder. It doesn't take much
264                 // memory and we may need it again.
265
266                 // Initialize the Subfilter. Subblock and Copy filters are
267                 // not allowed.
268                 if (coder->filter_flags.id == LZMA_FILTER_SUBBLOCK)
269                         return LZMA_DATA_ERROR;
270
271                 coder->helper.end_was_reached = false;
272
273                 lzma_filter filters[3] = {
274                         {
275                                 .id = coder->filter_flags.id,
276                                 .options = coder->filter_flags.options,
277                         }, {
278                                 .id = LZMA_FILTER_SUBBLOCK_HELPER,
279                                 .options = &coder->helper,
280                         }, {
281                                 .id = LZMA_VLI_UNKNOWN,
282                                 .options = NULL,
283                         }
284                 };
285
286                 // Optimization: We know that LZMA uses End of Payload Marker
287                 // (not End of Input), so we can omit the helper filter.
288                 if (filters[0].id == LZMA_FILTER_LZMA1)
289                         filters[1].id = LZMA_VLI_UNKNOWN;
290
291                 return_if_error(lzma_raw_decoder_init(
292                                 &coder->subfilter, allocator, filters));
293
294                 coder->sequence = SEQ_FLAGS;
295                 break;
296         }
297
298         case SEQ_FILTER_END:
299                 // We are in the beginning of a Subblock. The next Subblock
300                 // whose type is not Padding, must indicate end of Subfilter.
301                 if (in[*in_pos] == (FLAG_PADDING << 4)) {
302                         ++*in_pos;
303                         break;
304                 }
305
306                 if (in[*in_pos] != (FLAG_END_SUBFILTER << 4))
307                         return LZMA_DATA_ERROR;
308
309                 coder->sequence = SEQ_FLAGS;
310                 break;
311
312         case SEQ_REPEAT_COUNT_1:
313         case SEQ_SIZE_1:
314                 // We use the same code to parse
315                 //  - the Size (28 bits) in Subblocks of type Data; and
316                 //  - the Repeat count (28 bits) in Subblocks of type
317                 //    Repeating Data.
318                 coder->size |= (size_t)(in[*in_pos]) << 4;
319                 ++*in_pos;
320                 ++coder->sequence;
321                 break;
322
323         case SEQ_REPEAT_COUNT_2:
324         case SEQ_SIZE_2:
325                 coder->size |= (size_t)(in[*in_pos]) << 12;
326                 ++*in_pos;
327                 ++coder->sequence;
328                 break;
329
330         case SEQ_REPEAT_COUNT_3:
331         case SEQ_SIZE_3:
332                 coder->size |= (size_t)(in[*in_pos]) << 20;
333                 ++*in_pos;
334
335                 // The real value is the stored value plus one.
336                 ++coder->size;
337
338                 // This moves to SEQ_REPEAT_SIZE or SEQ_DATA. That's why
339                 // SEQ_DATA must be right after SEQ_SIZE_3 in coder->sequence.
340                 ++coder->sequence;
341                 break;
342
343         case SEQ_REPEAT_SIZE:
344                 // Move the Repeat Count to the correct variable and parse
345                 // the Size of the Data to be repeated.
346                 coder->repeat.count = coder->size;
347                 coder->repeat.size = (size_t)(in[*in_pos]) + 1;
348                 coder->repeat.pos = 0;
349
350                 // The size of the Data field must be bigger than the number
351                 // of Padding bytes before this Subblock.
352                 if (coder->repeat.size <= coder->padding)
353                         return LZMA_DATA_ERROR;
354
355                 ++*in_pos;
356                 coder->padding = 0;
357                 coder->sequence = SEQ_REPEAT_READ_DATA;
358                 break;
359
360         case SEQ_REPEAT_READ_DATA: {
361                 // Fill coder->repeat.buffer[].
362                 const size_t in_avail = in_size - *in_pos;
363                 const size_t out_avail
364                                 = coder->repeat.size - coder->repeat.pos;
365                 const size_t copy_size = MIN(in_avail, out_avail);
366
367                 memcpy(coder->repeat.buffer + coder->repeat.pos,
368                                 in + *in_pos, copy_size);
369                 *in_pos += copy_size;
370                 coder->repeat.pos += copy_size;
371
372                 if (coder->repeat.pos == coder->repeat.size) {
373                         coder->repeat.pos = 0;
374
375                         if (coder->repeat.size == 1
376                                         && coder->subfilter.code == NULL)
377                                 coder->sequence = SEQ_REPEAT_FAST;
378                         else
379                                 coder->sequence = SEQ_REPEAT_NORMAL;
380                 }
381
382                 break;
383         }
384
385         case SEQ_DATA: {
386                 // The size of the Data field must be bigger than the number
387                 // of Padding bytes before this Subblock.
388                 assert(coder->size > 0);
389                 if (coder->size <= coder->padding)
390                         return LZMA_DATA_ERROR;
391
392                 coder->padding = 0;
393
394                 // Limit the amount of input to match the available
395                 // Subblock Data size.
396                 size_t in_limit;
397                 if (in_size - *in_pos > coder->size)
398                         in_limit = *in_pos + coder->size;
399                 else
400                         in_limit = in_size;
401
402                 if (coder->subfilter.code == NULL) {
403                         const size_t copy_size = lzma_bufcpy(
404                                         in, in_pos, in_limit,
405                                         out, out_pos, out_size);
406
407                         coder->size -= copy_size;
408                 } else {
409                         const size_t in_start = *in_pos;
410                         const lzma_ret ret = subfilter_decode(
411                                         coder, allocator,
412                                         in, in_pos, in_limit,
413                                         out, out_pos, out_size,
414                                         action);
415
416                         // Update the number of unprocessed bytes left in
417                         // this Subblock. This assert() is true because
418                         // in_limit prevents *in_pos getting too big.
419                         assert(*in_pos - in_start <= coder->size);
420                         coder->size -= *in_pos - in_start;
421
422                         if (ret == LZMA_STREAM_END) {
423                                 // End of Subfilter can occur only at
424                                 // a Subblock boundary.
425                                 if (coder->size != 0)
426                                         return LZMA_DATA_ERROR;
427
428                                 // We need a Subblock with Unset
429                                 // Subfilter before more data.
430                                 coder->sequence = SEQ_FILTER_END;
431                                 break;
432                         }
433
434                         if (ret != LZMA_OK)
435                                 return ret;
436                 }
437
438                 // If we couldn't process the whole Subblock Data yet, return.
439                 if (coder->size > 0)
440                         return LZMA_OK;
441
442                 coder->sequence = SEQ_FLAGS;
443                 break;
444         }
445
446         case SEQ_REPEAT_FAST: {
447                 // Optimization for cases when there is only one byte to
448                 // repeat and no Subfilter.
449                 const size_t out_avail = out_size - *out_pos;
450                 const size_t copy_size = MIN(coder->repeat.count, out_avail);
451
452                 memset(out + *out_pos, coder->repeat.buffer[0], copy_size);
453
454                 *out_pos += copy_size;
455                 coder->repeat.count -= copy_size;
456
457                 if (coder->repeat.count != 0)
458                         return LZMA_OK;
459
460                 coder->sequence = SEQ_FLAGS;
461                 break;
462         }
463
464         case SEQ_REPEAT_NORMAL:
465                 do {
466                         // Cycle the repeat buffer if needed.
467                         if (coder->repeat.pos == coder->repeat.size) {
468                                 if (--coder->repeat.count == 0) {
469                                         coder->sequence = SEQ_FLAGS;
470                                         break;
471                                 }
472
473                                 coder->repeat.pos = 0;
474                         }
475
476                         if (coder->subfilter.code == NULL) {
477                                 lzma_bufcpy(coder->repeat.buffer,
478                                                 &coder->repeat.pos,
479                                                 coder->repeat.size,
480                                                 out, out_pos, out_size);
481                         } else {
482                                 const lzma_ret ret = subfilter_decode(
483                                                 coder, allocator,
484                                                 coder->repeat.buffer,
485                                                 &coder->repeat.pos,
486                                                 coder->repeat.size,
487                                                 out, out_pos, out_size,
488                                                 action);
489
490                                 if (ret == LZMA_STREAM_END) {
491                                         // End of Subfilter can occur only at
492                                         // a Subblock boundary.
493                                         if (coder->repeat.pos
494                                                         != coder->repeat.size
495                                                         || --coder->repeat
496                                                                 .count != 0)
497                                                 return LZMA_DATA_ERROR;
498
499                                         // We need a Subblock with Unset
500                                         // Subfilter before more data.
501                                         coder->sequence = SEQ_FILTER_END;
502                                         break;
503
504                                 } else if (ret != LZMA_OK) {
505                                         return ret;
506                                 }
507                         }
508                 } while (*out_pos < out_size);
509
510                 break;
511
512         default:
513                 return LZMA_PROG_ERROR;
514         }
515
516         return LZMA_OK;
517 }
518
519
520 static lzma_ret
521 subblock_decode(lzma_coder *coder, lzma_allocator *allocator,
522                 const uint8_t *restrict in, size_t *restrict in_pos,
523                 size_t in_size, uint8_t *restrict out,
524                 size_t *restrict out_pos, size_t out_size, lzma_action action)
525 {
526         if (coder->next.code == NULL)
527                 return decode_buffer(coder, allocator, in, in_pos, in_size,
528                                 out, out_pos, out_size, action);
529
530         while (*out_pos < out_size) {
531                 if (!coder->next_finished
532                                 && coder->temp.pos == coder->temp.size) {
533                         coder->temp.pos = 0;
534                         coder->temp.size = 0;
535
536                         const lzma_ret ret = coder->next.code(
537                                         coder->next.coder,
538                                         allocator, in, in_pos, in_size,
539                                         coder->temp.buffer, &coder->temp.size,
540                                         LZMA_BUFFER_SIZE, action);
541
542                         if (ret == LZMA_STREAM_END)
543                                 coder->next_finished = true;
544                         else if (coder->temp.size == 0 || ret != LZMA_OK)
545                                 return ret;
546                 }
547
548                 if (coder->this_finished) {
549                         if (coder->temp.pos != coder->temp.size)
550                                 return LZMA_DATA_ERROR;
551
552                         if (coder->next_finished)
553                                 return LZMA_STREAM_END;
554
555                         return LZMA_OK;
556                 }
557
558                 const lzma_ret ret = decode_buffer(coder, allocator,
559                                 coder->temp.buffer, &coder->temp.pos,
560                                 coder->temp.size,
561                                 out, out_pos, out_size, action);
562
563                 if (ret == LZMA_STREAM_END)
564                         // The next coder in the chain hasn't finished
565                         // yet. If the input data is valid, there
566                         // must be no more output coming, but the
567                         // next coder may still need a litle more
568                         // input to detect End of Payload Marker.
569                         coder->this_finished = true;
570                 else if (ret != LZMA_OK)
571                         return ret;
572                 else if (coder->next_finished && *out_pos < out_size)
573                         return LZMA_DATA_ERROR;
574         }
575
576         return LZMA_OK;
577 }
578
579
580 static void
581 subblock_decoder_end(lzma_coder *coder, lzma_allocator *allocator)
582 {
583         lzma_next_end(&coder->next, allocator);
584         lzma_next_end(&coder->subfilter, allocator);
585         lzma_next_end(&coder->filter_flags_decoder, allocator);
586         lzma_free(coder->filter_flags.options, allocator);
587         lzma_free(coder, allocator);
588         return;
589 }
590
591
592 extern lzma_ret
593 lzma_subblock_decoder_init(lzma_next_coder *next, lzma_allocator *allocator,
594                 const lzma_filter_info *filters)
595 {
596         if (next->coder == NULL) {
597                 next->coder = lzma_alloc(sizeof(lzma_coder), allocator);
598                 if (next->coder == NULL)
599                         return LZMA_MEM_ERROR;
600
601                 next->code = &subblock_decode;
602                 next->end = &subblock_decoder_end;
603
604                 next->coder->next = LZMA_NEXT_CODER_INIT;
605                 next->coder->subfilter = LZMA_NEXT_CODER_INIT;
606                 next->coder->filter_flags_decoder = LZMA_NEXT_CODER_INIT;
607
608         } else {
609                 lzma_next_end(&next->coder->subfilter, allocator);
610                 lzma_free(next->coder->filter_flags.options, allocator);
611         }
612
613         next->coder->filter_flags.options = NULL;
614
615         next->coder->sequence = SEQ_FLAGS;
616         next->coder->padding = 0;
617         next->coder->next_finished = false;
618         next->coder->this_finished = false;
619         next->coder->temp.pos = 0;
620         next->coder->temp.size = 0;
621
622         if (filters[0].options != NULL)
623                 next->coder->allow_subfilters = ((lzma_options_subblock *)(
624                                 filters[0].options))->allow_subfilters;
625         else
626                 next->coder->allow_subfilters = false;
627
628         return lzma_next_filter_init(
629                         &next->coder->next, allocator, filters + 1);
630 }