2 /*-------------------------------------------------------------*/
3 /*--- Decompression machinery ---*/
4 /*--- decompress.c ---*/
5 /*-------------------------------------------------------------*/
7 /* ------------------------------------------------------------------
8 This file is part of bzip2/libbzip2, a program and library for
9 lossless, block-sorting data compression.
11 bzip2/libbzip2 version 1.0.8 of 13 July 2019
12 Copyright (C) 1996-2019 Julian Seward <jseward@acm.org>
14 Please read the WARNING, DISCLAIMER and PATENTS sections in the
17 This program is released under the terms of the license contained
19 ------------------------------------------------------------------ */
22 #include "bzlib_private.h"
25 /*---------------------------------------------------*/
27 void makeMaps_d ( DState* s )
31 for (i = 0; i < 256; i++)
33 s->seqToUnseq[s->nInUse] = i;
39 /*---------------------------------------------------*/
41 { retVal = rrr; goto save_state_and_return; };
43 #define GET_BITS(lll,vvv,nnn) \
44 case lll: s->state = lll; \
46 if (s->bsLive >= nnn) { \
49 (s->bsLive-nnn)) & ((1 << nnn)-1); \
54 if (s->strm->avail_in == 0) RETURN(BZ_OK); \
56 = (s->bsBuff << 8) | \
58 (*((UChar*)(s->strm->next_in)))); \
61 s->strm->avail_in--; \
62 s->strm->total_in_lo32++; \
63 if (s->strm->total_in_lo32 == 0) \
64 s->strm->total_in_hi32++; \
67 #define GET_UCHAR(lll,uuu) \
70 #define GET_BIT(lll,uuu) \
73 /*---------------------------------------------------*/
74 #define GET_MTF_VAL(label1,label2,lval) \
76 if (groupPos == 0) { \
78 if (groupNo >= nSelectors) \
79 RETURN(BZ_DATA_ERROR); \
80 groupPos = BZ_G_SIZE; \
81 gSel = s->selector[groupNo]; \
82 gMinlen = s->minLens[gSel]; \
83 gLimit = &(s->limit[gSel][0]); \
84 gPerm = &(s->perm[gSel][0]); \
85 gBase = &(s->base[gSel][0]); \
89 GET_BITS(label1, zvec, zn); \
91 if (zn > 20 /* the longest code */) \
92 RETURN(BZ_DATA_ERROR); \
93 if (zvec <= gLimit[zn]) break; \
95 GET_BIT(label2, zj); \
96 zvec = (zvec << 1) | zj; \
98 if (zvec - gBase[zn] < 0 \
99 || zvec - gBase[zn] >= BZ_MAX_ALPHA_SIZE) \
100 RETURN(BZ_DATA_ERROR); \
101 lval = gPerm[zvec - gBase[zn]]; \
105 /*---------------------------------------------------*/
106 Int32 BZ2_decompress ( DState* s )
110 Int32 minLen, maxLen;
111 bz_stream* strm = s->strm;
113 /* stuff that needs to be saved/restored */
139 if (s->state == BZ_X_MAGIC_1) {
140 /*initialise the save area*/
144 s->save_alphaSize = 0;
146 s->save_nSelectors = 0;
149 s->save_groupPos = 0;
151 s->save_nblockMAX = 0;
162 s->save_gLimit = NULL;
163 s->save_gBase = NULL;
164 s->save_gPerm = NULL;
167 /*restore from the save area*/
171 alphaSize = s->save_alphaSize;
172 nGroups = s->save_nGroups;
173 nSelectors = s->save_nSelectors;
175 groupNo = s->save_groupNo;
176 groupPos = s->save_groupPos;
177 nextSym = s->save_nextSym;
178 nblockMAX = s->save_nblockMAX;
179 nblock = s->save_nblock;
188 gMinlen = s->save_gMinlen;
189 gLimit = s->save_gLimit;
190 gBase = s->save_gBase;
191 gPerm = s->save_gPerm;
197 GET_UCHAR(BZ_X_MAGIC_1, uc);
198 if (uc != BZ_HDR_B) RETURN(BZ_DATA_ERROR_MAGIC);
200 GET_UCHAR(BZ_X_MAGIC_2, uc);
201 if (uc != BZ_HDR_Z) RETURN(BZ_DATA_ERROR_MAGIC);
203 GET_UCHAR(BZ_X_MAGIC_3, uc)
204 if (uc != BZ_HDR_h) RETURN(BZ_DATA_ERROR_MAGIC);
206 GET_BITS(BZ_X_MAGIC_4, s->blockSize100k, 8)
207 if (s->blockSize100k < (BZ_HDR_0 + 1) ||
208 s->blockSize100k > (BZ_HDR_0 + 9)) RETURN(BZ_DATA_ERROR_MAGIC);
209 s->blockSize100k -= BZ_HDR_0;
211 if (s->smallDecompress) {
212 s->ll16 = BZALLOC( s->blockSize100k * 100000 * sizeof(UInt16) );
214 ((1 + s->blockSize100k * 100000) >> 1) * sizeof(UChar)
216 if (s->ll16 == NULL || s->ll4 == NULL) RETURN(BZ_MEM_ERROR);
218 s->tt = BZALLOC( s->blockSize100k * 100000 * sizeof(Int32) );
219 if (s->tt == NULL) RETURN(BZ_MEM_ERROR);
222 GET_UCHAR(BZ_X_BLKHDR_1, uc);
224 if (uc == 0x17) goto endhdr_2;
225 if (uc != 0x31) RETURN(BZ_DATA_ERROR);
226 GET_UCHAR(BZ_X_BLKHDR_2, uc);
227 if (uc != 0x41) RETURN(BZ_DATA_ERROR);
228 GET_UCHAR(BZ_X_BLKHDR_3, uc);
229 if (uc != 0x59) RETURN(BZ_DATA_ERROR);
230 GET_UCHAR(BZ_X_BLKHDR_4, uc);
231 if (uc != 0x26) RETURN(BZ_DATA_ERROR);
232 GET_UCHAR(BZ_X_BLKHDR_5, uc);
233 if (uc != 0x53) RETURN(BZ_DATA_ERROR);
234 GET_UCHAR(BZ_X_BLKHDR_6, uc);
235 if (uc != 0x59) RETURN(BZ_DATA_ERROR);
238 if (s->verbosity >= 2)
239 VPrintf1 ( "\n [%d: huff+mtf ", s->currBlockNo );
241 s->storedBlockCRC = 0;
242 GET_UCHAR(BZ_X_BCRC_1, uc);
243 s->storedBlockCRC = (s->storedBlockCRC << 8) | ((UInt32)uc);
244 GET_UCHAR(BZ_X_BCRC_2, uc);
245 s->storedBlockCRC = (s->storedBlockCRC << 8) | ((UInt32)uc);
246 GET_UCHAR(BZ_X_BCRC_3, uc);
247 s->storedBlockCRC = (s->storedBlockCRC << 8) | ((UInt32)uc);
248 GET_UCHAR(BZ_X_BCRC_4, uc);
249 s->storedBlockCRC = (s->storedBlockCRC << 8) | ((UInt32)uc);
251 GET_BITS(BZ_X_RANDBIT, s->blockRandomised, 1);
254 GET_UCHAR(BZ_X_ORIGPTR_1, uc);
255 s->origPtr = (s->origPtr << 8) | ((Int32)uc);
256 GET_UCHAR(BZ_X_ORIGPTR_2, uc);
257 s->origPtr = (s->origPtr << 8) | ((Int32)uc);
258 GET_UCHAR(BZ_X_ORIGPTR_3, uc);
259 s->origPtr = (s->origPtr << 8) | ((Int32)uc);
262 RETURN(BZ_DATA_ERROR);
263 if (s->origPtr > 10 + 100000*s->blockSize100k)
264 RETURN(BZ_DATA_ERROR);
266 /*--- Receive the mapping table ---*/
267 for (i = 0; i < 16; i++) {
268 GET_BIT(BZ_X_MAPPING_1, uc);
270 s->inUse16[i] = True; else
271 s->inUse16[i] = False;
274 for (i = 0; i < 256; i++) s->inUse[i] = False;
276 for (i = 0; i < 16; i++)
278 for (j = 0; j < 16; j++) {
279 GET_BIT(BZ_X_MAPPING_2, uc);
280 if (uc == 1) s->inUse[i * 16 + j] = True;
283 if (s->nInUse == 0) RETURN(BZ_DATA_ERROR);
284 alphaSize = s->nInUse+2;
286 /*--- Now the selectors ---*/
287 GET_BITS(BZ_X_SELECTOR_1, nGroups, 3);
288 if (nGroups < 2 || nGroups > BZ_N_GROUPS) RETURN(BZ_DATA_ERROR);
289 GET_BITS(BZ_X_SELECTOR_2, nSelectors, 15);
290 if (nSelectors < 1) RETURN(BZ_DATA_ERROR);
291 for (i = 0; i < nSelectors; i++) {
294 GET_BIT(BZ_X_SELECTOR_3, uc);
297 if (j >= nGroups) RETURN(BZ_DATA_ERROR);
299 /* Having more than BZ_MAX_SELECTORS doesn't make much sense
300 since they will never be used, but some implementations might
301 "round up" the number of selectors, so just ignore those. */
302 if (i < BZ_MAX_SELECTORS)
303 s->selectorMtf[i] = j;
305 if (nSelectors > BZ_MAX_SELECTORS)
306 nSelectors = BZ_MAX_SELECTORS;
308 /*--- Undo the MTF values for the selectors. ---*/
310 UChar pos[BZ_N_GROUPS], tmp, v;
311 for (v = 0; v < nGroups; v++) pos[v] = v;
313 for (i = 0; i < nSelectors; i++) {
314 v = s->selectorMtf[i];
316 while (v > 0) { pos[v] = pos[v-1]; v--; }
318 s->selector[i] = tmp;
322 /*--- Now the coding tables ---*/
323 for (t = 0; t < nGroups; t++) {
324 GET_BITS(BZ_X_CODING_1, curr, 5);
325 for (i = 0; i < alphaSize; i++) {
327 if (curr < 1 || curr > 20) RETURN(BZ_DATA_ERROR);
328 GET_BIT(BZ_X_CODING_2, uc);
330 GET_BIT(BZ_X_CODING_3, uc);
331 if (uc == 0) curr++; else curr--;
337 /*--- Create the Huffman decoding tables ---*/
338 for (t = 0; t < nGroups; t++) {
341 for (i = 0; i < alphaSize; i++) {
342 if (s->len[t][i] > maxLen) maxLen = s->len[t][i];
343 if (s->len[t][i] < minLen) minLen = s->len[t][i];
345 BZ2_hbCreateDecodeTables (
350 minLen, maxLen, alphaSize
352 s->minLens[t] = minLen;
355 /*--- Now the MTF values ---*/
358 nblockMAX = 100000 * s->blockSize100k;
362 for (i = 0; i <= 255; i++) s->unzftab[i] = 0;
368 for (ii = 256 / MTFL_SIZE - 1; ii >= 0; ii--) {
369 for (jj = MTFL_SIZE-1; jj >= 0; jj--) {
370 s->mtfa[kk] = (UChar)(ii * MTFL_SIZE + jj);
373 s->mtfbase[ii] = kk + 1;
376 /*-- end MTF init --*/
379 GET_MTF_VAL(BZ_X_MTF_1, BZ_X_MTF_2, nextSym);
383 if (nextSym == EOB) break;
385 if (nextSym == BZ_RUNA || nextSym == BZ_RUNB) {
390 /* Check that N doesn't get too big, so that es doesn't
391 go negative. The maximum value that can be
392 RUNA/RUNB encoded is equal to the block size (post
393 the initial RLE), viz, 900k, so bounding N at 2
394 million should guard against overflow without
395 rejecting any legitimate inputs. */
396 if (N >= 2*1024*1024) RETURN(BZ_DATA_ERROR);
397 if (nextSym == BZ_RUNA) es = es + (0+1) * N; else
398 if (nextSym == BZ_RUNB) es = es + (1+1) * N;
400 GET_MTF_VAL(BZ_X_MTF_3, BZ_X_MTF_4, nextSym);
402 while (nextSym == BZ_RUNA || nextSym == BZ_RUNB);
405 uc = s->seqToUnseq[ s->mtfa[s->mtfbase[0]] ];
406 s->unzftab[uc] += es;
408 if (s->smallDecompress)
410 if (nblock >= nblockMAX) RETURN(BZ_DATA_ERROR);
411 s->ll16[nblock] = (UInt16)uc;
417 if (nblock >= nblockMAX) RETURN(BZ_DATA_ERROR);
418 s->tt[nblock] = (UInt32)uc;
427 if (nblock >= nblockMAX) RETURN(BZ_DATA_ERROR);
429 /*-- uc = MTF ( nextSym-1 ) --*/
431 Int32 ii, jj, kk, pp, lno, off;
433 nn = (UInt32)(nextSym - 1);
435 if (nn < MTFL_SIZE) {
436 /* avoid general-case expense */
441 s->mtfa[(z) ] = s->mtfa[(z)-1];
442 s->mtfa[(z)-1] = s->mtfa[(z)-2];
443 s->mtfa[(z)-2] = s->mtfa[(z)-3];
444 s->mtfa[(z)-3] = s->mtfa[(z)-4];
448 s->mtfa[(pp+nn)] = s->mtfa[(pp+nn)-1]; nn--;
453 lno = nn / MTFL_SIZE;
454 off = nn % MTFL_SIZE;
455 pp = s->mtfbase[lno] + off;
457 while (pp > s->mtfbase[lno]) {
458 s->mtfa[pp] = s->mtfa[pp-1]; pp--;
463 s->mtfa[s->mtfbase[lno]]
464 = s->mtfa[s->mtfbase[lno-1] + MTFL_SIZE - 1];
468 s->mtfa[s->mtfbase[0]] = uc;
469 if (s->mtfbase[0] == 0) {
471 for (ii = 256 / MTFL_SIZE-1; ii >= 0; ii--) {
472 for (jj = MTFL_SIZE-1; jj >= 0; jj--) {
473 s->mtfa[kk] = s->mtfa[s->mtfbase[ii] + jj];
476 s->mtfbase[ii] = kk + 1;
481 /*-- end uc = MTF ( nextSym-1 ) --*/
483 s->unzftab[s->seqToUnseq[uc]]++;
484 if (s->smallDecompress)
485 s->ll16[nblock] = (UInt16)(s->seqToUnseq[uc]); else
486 s->tt[nblock] = (UInt32)(s->seqToUnseq[uc]);
489 GET_MTF_VAL(BZ_X_MTF_5, BZ_X_MTF_6, nextSym);
494 /* Now we know what nblock is, we can do a better sanity
497 if (s->origPtr < 0 || s->origPtr >= nblock)
498 RETURN(BZ_DATA_ERROR);
500 /*-- Set up cftab to facilitate generation of T^(-1) --*/
501 /* Check: unzftab entries in range. */
502 for (i = 0; i <= 255; i++) {
503 if (s->unzftab[i] < 0 || s->unzftab[i] > nblock)
504 RETURN(BZ_DATA_ERROR);
506 /* Actually generate cftab. */
508 for (i = 1; i <= 256; i++) s->cftab[i] = s->unzftab[i-1];
509 for (i = 1; i <= 256; i++) s->cftab[i] += s->cftab[i-1];
510 /* Check: cftab entries in range. */
511 for (i = 0; i <= 256; i++) {
512 if (s->cftab[i] < 0 || s->cftab[i] > nblock) {
513 /* s->cftab[i] can legitimately be == nblock */
514 RETURN(BZ_DATA_ERROR);
517 /* Check: cftab entries non-descending. */
518 for (i = 1; i <= 256; i++) {
519 if (s->cftab[i-1] > s->cftab[i]) {
520 RETURN(BZ_DATA_ERROR);
524 s->state_out_len = 0;
526 BZ_INITIALISE_CRC ( s->calculatedBlockCRC );
527 s->state = BZ_X_OUTPUT;
528 if (s->verbosity >= 2) VPrintf0 ( "rt+rld" );
530 if (s->smallDecompress) {
532 /*-- Make a copy of cftab, used in generation of T --*/
533 for (i = 0; i <= 256; i++) s->cftabCopy[i] = s->cftab[i];
535 /*-- compute the T vector --*/
536 for (i = 0; i < nblock; i++) {
537 uc = (UChar)(s->ll16[i]);
538 SET_LL(i, s->cftabCopy[uc]);
542 /*-- Compute T^(-1) by pointer reversal on T --*/
546 Int32 tmp = GET_LL(j);
551 while (i != s->origPtr);
553 s->tPos = s->origPtr;
555 if (s->blockRandomised) {
557 BZ_GET_SMALL(s->k0); s->nblock_used++;
558 BZ_RAND_UPD_MASK; s->k0 ^= BZ_RAND_MASK;
560 BZ_GET_SMALL(s->k0); s->nblock_used++;
565 /*-- compute the T^(-1) vector --*/
566 for (i = 0; i < nblock; i++) {
567 uc = (UChar)(s->tt[i] & 0xff);
568 s->tt[s->cftab[uc]] |= (i << 8);
572 s->tPos = s->tt[s->origPtr] >> 8;
574 if (s->blockRandomised) {
576 BZ_GET_FAST(s->k0); s->nblock_used++;
577 BZ_RAND_UPD_MASK; s->k0 ^= BZ_RAND_MASK;
579 BZ_GET_FAST(s->k0); s->nblock_used++;
590 GET_UCHAR(BZ_X_ENDHDR_2, uc);
591 if (uc != 0x72) RETURN(BZ_DATA_ERROR);
592 GET_UCHAR(BZ_X_ENDHDR_3, uc);
593 if (uc != 0x45) RETURN(BZ_DATA_ERROR);
594 GET_UCHAR(BZ_X_ENDHDR_4, uc);
595 if (uc != 0x38) RETURN(BZ_DATA_ERROR);
596 GET_UCHAR(BZ_X_ENDHDR_5, uc);
597 if (uc != 0x50) RETURN(BZ_DATA_ERROR);
598 GET_UCHAR(BZ_X_ENDHDR_6, uc);
599 if (uc != 0x90) RETURN(BZ_DATA_ERROR);
601 s->storedCombinedCRC = 0;
602 GET_UCHAR(BZ_X_CCRC_1, uc);
603 s->storedCombinedCRC = (s->storedCombinedCRC << 8) | ((UInt32)uc);
604 GET_UCHAR(BZ_X_CCRC_2, uc);
605 s->storedCombinedCRC = (s->storedCombinedCRC << 8) | ((UInt32)uc);
606 GET_UCHAR(BZ_X_CCRC_3, uc);
607 s->storedCombinedCRC = (s->storedCombinedCRC << 8) | ((UInt32)uc);
608 GET_UCHAR(BZ_X_CCRC_4, uc);
609 s->storedCombinedCRC = (s->storedCombinedCRC << 8) | ((UInt32)uc);
611 s->state = BZ_X_IDLE;
612 RETURN(BZ_STREAM_END);
614 default: AssertH ( False, 4001 );
617 AssertH ( False, 4002 );
619 save_state_and_return:
624 s->save_alphaSize = alphaSize;
625 s->save_nGroups = nGroups;
626 s->save_nSelectors = nSelectors;
628 s->save_groupNo = groupNo;
629 s->save_groupPos = groupPos;
630 s->save_nextSym = nextSym;
631 s->save_nblockMAX = nblockMAX;
632 s->save_nblock = nblock;
641 s->save_gMinlen = gMinlen;
642 s->save_gLimit = gLimit;
643 s->save_gBase = gBase;
644 s->save_gPerm = gPerm;
650 /*-------------------------------------------------------------*/
651 /*--- end decompress.c ---*/
652 /*-------------------------------------------------------------*/