2 * Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
5 * This source code is licensed under both the BSD-style license (found in the
6 * LICENSE file in the root directory of this source tree) and the GPLv2 (found
7 * in the COPYING file in the root directory of this source tree).
8 * You may select, at your option, one of the above-listed licenses.
12 /******************************************
14 ******************************************/
15 #include <stddef.h> /* size_t, ptrdiff_t */
17 #include "error_private.h"
20 /******************************************
22 ******************************************/
23 /* You can statically allocate FSE CTable/DTable as a table of unsigned using below macro */
24 #define FSE_DTABLE_SIZE_U32(maxTableLog) (1 + (1<<maxTableLog))
26 /* You can statically allocate Huff0 DTable as a table of unsigned short using below macro */
27 #define HUF_DTABLE_SIZE_U16(maxTableLog) (1 + (1<<maxTableLog))
28 #define HUF_CREATE_STATIC_DTABLE(DTable, maxTableLog) \
29 unsigned short DTable[HUF_DTABLE_SIZE_U16(maxTableLog)] = { maxTableLog }
32 /******************************************
34 ******************************************/
35 #define FSE_LIST_ERRORS(ITEM) \
36 ITEM(FSE_OK_NoError) ITEM(FSE_ERROR_GENERIC) \
37 ITEM(FSE_ERROR_tableLog_tooLarge) ITEM(FSE_ERROR_maxSymbolValue_tooLarge) ITEM(FSE_ERROR_maxSymbolValue_tooSmall) \
38 ITEM(FSE_ERROR_dstSize_tooSmall) ITEM(FSE_ERROR_srcSize_wrong)\
39 ITEM(FSE_ERROR_corruptionDetected) \
40 ITEM(FSE_ERROR_maxCode)
42 #define FSE_GENERATE_ENUM(ENUM) ENUM,
43 typedef enum { FSE_LIST_ERRORS(FSE_GENERATE_ENUM) } FSE_errorCodes; /* enum is exposed, to detect & handle specific errors; compare function result to -enum value */
46 /******************************************
47 * FSE symbol compression API
48 ******************************************/
50 This API consists of small unitary functions, which highly benefit from being inlined.
51 You will want to enable link-time-optimization to ensure these functions are properly inlined in your binary.
52 Visual seems to do it automatically.
53 For gcc or clang, you'll need to add -flto flag at compilation and linking stages.
54 If none of these solutions is applicable, include "fse.c" directly.
57 typedef unsigned FSE_CTable; /* don't allocate that. It's just a way to be more restrictive than void* */
58 typedef unsigned FSE_DTable; /* don't allocate that. It's just a way to be more restrictive than void* */
72 const void* stateTable;
80 unsigned bitsConsumed;
88 const void* table; /* precise table may vary, depending on U16 */
91 typedef enum { FSE_DStream_unfinished = 0,
92 FSE_DStream_endOfBuffer = 1,
93 FSE_DStream_completed = 2,
94 FSE_DStream_tooFar = 3 } FSE_DStream_status; /* result of FSE_reloadDStream() */
95 /* 1,2,4,8 would be better for bitmap combinations, but slows down performance a bit ... ?! */
98 /****************************************************************
100 ****************************************************************/
102 * Memory usage formula : N->2^N Bytes (examples : 10 -> 1KB; 12 -> 4KB ; 16 -> 64KB; 20 -> 1MB; etc.)
103 * Increasing memory usage improves compression ratio
104 * Reduced memory usage can improve speed, due to cache effect
105 * Recommended max value is 14, for 16KB, which nicely fits into Intel x86 L1 cache */
106 #define FSE_MAX_MEMORY_USAGE 14
107 #define FSE_DEFAULT_MEMORY_USAGE 13
109 /* FSE_MAX_SYMBOL_VALUE :
110 * Maximum symbol value authorized.
111 * Required for proper stack allocation */
112 #define FSE_MAX_SYMBOL_VALUE 255
115 /****************************************************************
116 * template functions type & suffix
117 ****************************************************************/
118 #define FSE_FUNCTION_TYPE BYTE
119 #define FSE_FUNCTION_EXTENSION
122 /****************************************************************
124 ****************************************************************/
127 unsigned short newState;
128 unsigned char symbol;
129 unsigned char nbBits;
130 } FSE_decode_t; /* size == U32 */
134 /****************************************************************
136 ****************************************************************/
137 #ifdef _MSC_VER /* Visual Studio */
138 # define FORCE_INLINE static __forceinline
139 # include <intrin.h> /* For Visual 2005 */
140 # pragma warning(disable : 4127) /* disable: C4127: conditional expression is constant */
141 # pragma warning(disable : 4214) /* disable: C4214: non-int bitfields */
143 # define GCC_VERSION (__GNUC__ * 100 + __GNUC_MINOR__)
144 # if defined (__cplusplus) || defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L /* C99 */
146 # define FORCE_INLINE static inline __attribute__((always_inline))
148 # define FORCE_INLINE static inline
151 # define FORCE_INLINE static
152 # endif /* __STDC_VERSION__ */
156 /****************************************************************
158 ****************************************************************/
159 #include <stdlib.h> /* malloc, free, qsort */
160 #include <string.h> /* memcpy, memset */
161 #include <stdio.h> /* printf (debug) */
164 #ifndef MEM_ACCESS_MODULE
165 #define MEM_ACCESS_MODULE
166 /****************************************************************
168 *****************************************************************/
169 #if defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L /* C99 */
171 typedef uint8_t BYTE;
172 typedef uint16_t U16;
174 typedef uint32_t U32;
176 typedef uint64_t U64;
179 typedef unsigned char BYTE;
180 typedef unsigned short U16;
181 typedef signed short S16;
182 typedef unsigned int U32;
183 typedef signed int S32;
184 typedef unsigned long long U64;
185 typedef signed long long S64;
188 #endif /* MEM_ACCESS_MODULE */
190 /****************************************************************
192 *****************************************************************/
193 /* FSE_FORCE_MEMORY_ACCESS
194 * By default, access to unaligned memory is controlled by `memcpy()`, which is safe and portable.
195 * Unfortunately, on some target/compiler combinations, the generated assembly is sub-optimal.
196 * The below switch allow to select different access method for improved performance.
197 * Method 0 (default) : use `memcpy()`. Safe and portable.
198 * Method 1 : `__packed` statement. It depends on compiler extension (ie, not portable).
199 * This method is safe if your compiler supports it, and *generally* as fast or faster than `memcpy`.
200 * Method 2 : direct access. This method is portable but violate C standard.
201 * It can generate buggy code on targets generating assembly depending on alignment.
202 * But in some circumstances, it's the only known way to get the most performance (ie GCC + ARMv6)
203 * See http://fastcompression.blogspot.fr/2015/08/accessing-unaligned-memory.html for details.
204 * Prefer these methods in priority order (0 > 1 > 2)
206 #ifndef FSE_FORCE_MEMORY_ACCESS /* can be defined externally, on command line for example */
207 # if defined(__GNUC__) && ( defined(__ARM_ARCH_6__) || defined(__ARM_ARCH_6J__) || defined(__ARM_ARCH_6K__) || defined(__ARM_ARCH_6Z__) || defined(__ARM_ARCH_6ZK__) || defined(__ARM_ARCH_6T2__) )
208 # define FSE_FORCE_MEMORY_ACCESS 2
209 # elif (defined(__INTEL_COMPILER) && !defined(WIN32)) || \
210 (defined(__GNUC__) && ( defined(__ARM_ARCH_7__) || defined(__ARM_ARCH_7A__) || defined(__ARM_ARCH_7R__) || defined(__ARM_ARCH_7M__) || defined(__ARM_ARCH_7S__) ))
211 # define FSE_FORCE_MEMORY_ACCESS 1
216 static unsigned FSE_32bits(void)
218 return sizeof(void*)==4;
221 static unsigned FSE_isLittleEndian(void)
223 const union { U32 i; BYTE c[4]; } one = { 1 }; /* don't use static : performance detrimental */
227 #if defined(FSE_FORCE_MEMORY_ACCESS) && (FSE_FORCE_MEMORY_ACCESS==2)
229 static U16 FSE_read16(const void* memPtr) { return *(const U16*) memPtr; }
230 static U32 FSE_read32(const void* memPtr) { return *(const U32*) memPtr; }
231 static U64 FSE_read64(const void* memPtr) { return *(const U64*) memPtr; }
233 #elif defined(FSE_FORCE_MEMORY_ACCESS) && (FSE_FORCE_MEMORY_ACCESS==1)
235 /* __pack instructions are safer, but compiler specific, hence potentially problematic for some compilers */
236 /* currently only defined for gcc and icc */
237 typedef union { U16 u16; U32 u32; U64 u64; } __attribute__((packed)) unalign;
239 static U16 FSE_read16(const void* ptr) { return ((const unalign*)ptr)->u16; }
240 static U32 FSE_read32(const void* ptr) { return ((const unalign*)ptr)->u32; }
241 static U64 FSE_read64(const void* ptr) { return ((const unalign*)ptr)->u64; }
245 static U16 FSE_read16(const void* memPtr)
247 U16 val; memcpy(&val, memPtr, sizeof(val)); return val;
250 static U32 FSE_read32(const void* memPtr)
252 U32 val; memcpy(&val, memPtr, sizeof(val)); return val;
255 static U64 FSE_read64(const void* memPtr)
257 U64 val; memcpy(&val, memPtr, sizeof(val)); return val;
260 #endif // FSE_FORCE_MEMORY_ACCESS
262 static U16 FSE_readLE16(const void* memPtr)
264 if (FSE_isLittleEndian())
265 return FSE_read16(memPtr);
268 const BYTE* p = (const BYTE*)memPtr;
269 return (U16)(p[0] + (p[1]<<8));
273 static U32 FSE_readLE32(const void* memPtr)
275 if (FSE_isLittleEndian())
276 return FSE_read32(memPtr);
279 const BYTE* p = (const BYTE*)memPtr;
280 return (U32)((U32)p[0] + ((U32)p[1]<<8) + ((U32)p[2]<<16) + ((U32)p[3]<<24));
285 static U64 FSE_readLE64(const void* memPtr)
287 if (FSE_isLittleEndian())
288 return FSE_read64(memPtr);
291 const BYTE* p = (const BYTE*)memPtr;
292 return (U64)((U64)p[0] + ((U64)p[1]<<8) + ((U64)p[2]<<16) + ((U64)p[3]<<24)
293 + ((U64)p[4]<<32) + ((U64)p[5]<<40) + ((U64)p[6]<<48) + ((U64)p[7]<<56));
297 static size_t FSE_readLEST(const void* memPtr)
300 return (size_t)FSE_readLE32(memPtr);
302 return (size_t)FSE_readLE64(memPtr);
307 /****************************************************************
309 *****************************************************************/
310 #define FSE_MAX_TABLELOG (FSE_MAX_MEMORY_USAGE-2)
311 #define FSE_MAX_TABLESIZE (1U<<FSE_MAX_TABLELOG)
312 #define FSE_MAXTABLESIZE_MASK (FSE_MAX_TABLESIZE-1)
313 #define FSE_DEFAULT_TABLELOG (FSE_DEFAULT_MEMORY_USAGE-2)
314 #define FSE_MIN_TABLELOG 5
316 #define FSE_TABLELOG_ABSOLUTE_MAX 15
317 #if FSE_MAX_TABLELOG > FSE_TABLELOG_ABSOLUTE_MAX
318 #error "FSE_MAX_TABLELOG > FSE_TABLELOG_ABSOLUTE_MAX is not supported"
322 /****************************************************************
324 ****************************************************************/
325 #define FSE_STATIC_ASSERT(c) { enum { FSE_static_assert = 1/(int)(!!(c)) }; } /* use only *after* variable declarations */
328 /****************************************************************
330 ****************************************************************/
335 } FSE_symbolCompressionTransform; /* total 8 bytes */
337 typedef U32 DTable_max_t[FSE_DTABLE_SIZE_U32(FSE_MAX_TABLELOG)];
339 /****************************************************************
341 ****************************************************************/
342 FORCE_INLINE unsigned FSE_highbit32 (U32 val)
344 # if defined(_MSC_VER) /* Visual */
346 _BitScanReverse ( &r, val );
348 # elif defined(__GNUC__) && (GCC_VERSION >= 304) /* GCC Intrinsic */
349 return 31 - __builtin_clz (val);
350 # else /* Software version */
351 static const unsigned DeBruijnClz[32] = { 0, 9, 1, 10, 13, 21, 2, 29, 11, 14, 16, 18, 22, 25, 3, 30, 8, 12, 20, 28, 15, 17, 24, 7, 19, 27, 23, 6, 26, 5, 4, 31 };
359 r = DeBruijnClz[ (U32) (v * 0x07C4ACDDU) >> 27];
365 /****************************************************************
367 ****************************************************************/
369 designed to be included
370 for type-specific functions (template emulation in C)
371 Objective is to write these functions only once, for improved maintenance
375 #ifndef FSE_FUNCTION_EXTENSION
376 # error "FSE_FUNCTION_EXTENSION must be defined"
378 #ifndef FSE_FUNCTION_TYPE
379 # error "FSE_FUNCTION_TYPE must be defined"
383 #define FSE_CAT(X,Y) X##Y
384 #define FSE_FUNCTION_NAME(X,Y) FSE_CAT(X,Y)
385 #define FSE_TYPE_NAME(X,Y) FSE_CAT(X,Y)
389 static U32 FSE_tableStep(U32 tableSize) { return (tableSize>>1) + (tableSize>>3) + 3; }
391 #define FSE_DECODE_TYPE FSE_decode_t
397 } FSE_DTableHeader; /* sizeof U32 */
399 static size_t FSE_buildDTable
400 (FSE_DTable* dt, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog)
403 FSE_DTableHeader* const DTableH = (FSE_DTableHeader*)ptr;
404 FSE_DECODE_TYPE* const tableDecode = (FSE_DECODE_TYPE*)(ptr) + 1; /* because dt is unsigned, 32-bits aligned on 32-bits */
405 const U32 tableSize = 1 << tableLog;
406 const U32 tableMask = tableSize-1;
407 const U32 step = FSE_tableStep(tableSize);
408 U16 symbolNext[FSE_MAX_SYMBOL_VALUE+1];
410 U32 highThreshold = tableSize-1;
411 const S16 largeLimit= (S16)(1 << (tableLog-1));
416 if (maxSymbolValue > FSE_MAX_SYMBOL_VALUE) return (size_t)-FSE_ERROR_maxSymbolValue_tooLarge;
417 if (tableLog > FSE_MAX_TABLELOG) return (size_t)-FSE_ERROR_tableLog_tooLarge;
419 /* Init, lay down lowprob symbols */
420 DTableH[0].tableLog = (U16)tableLog;
421 for (s=0; s<=maxSymbolValue; s++)
423 if (normalizedCounter[s]==-1)
425 tableDecode[highThreshold--].symbol = (FSE_FUNCTION_TYPE)s;
430 if (normalizedCounter[s] >= largeLimit) noLarge=0;
431 symbolNext[s] = normalizedCounter[s];
436 for (s=0; s<=maxSymbolValue; s++)
439 for (i=0; i<normalizedCounter[s]; i++)
441 tableDecode[position].symbol = (FSE_FUNCTION_TYPE)s;
442 position = (position + step) & tableMask;
443 while (position > highThreshold) position = (position + step) & tableMask; /* lowprob area */
447 if (position!=0) return (size_t)-FSE_ERROR_GENERIC; /* position must reach all cells once, otherwise normalizedCounter is incorrect */
449 /* Build Decoding table */
452 for (i=0; i<tableSize; i++)
454 FSE_FUNCTION_TYPE symbol = (FSE_FUNCTION_TYPE)(tableDecode[i].symbol);
455 U16 nextState = symbolNext[symbol]++;
456 tableDecode[i].nbBits = (BYTE) (tableLog - FSE_highbit32 ((U32)nextState) );
457 tableDecode[i].newState = (U16) ( (nextState << tableDecode[i].nbBits) - tableSize);
461 DTableH->fastMode = (U16)noLarge;
466 /******************************************
468 ******************************************/
469 #ifndef FSE_COMMONDEFS_ONLY
471 static unsigned FSE_isError(size_t code) { return (code > (size_t)(-FSE_ERROR_maxCode)); }
473 static short FSE_abs(short a)
479 /****************************************************************
480 * Header bitstream management
481 ****************************************************************/
482 static size_t FSE_readNCount (short* normalizedCounter, unsigned* maxSVPtr, unsigned* tableLogPtr,
483 const void* headerBuffer, size_t hbSize)
485 const BYTE* const istart = (const BYTE*) headerBuffer;
486 const BYTE* const iend = istart + hbSize;
487 const BYTE* ip = istart;
493 unsigned charnum = 0;
496 if (hbSize < 4) return (size_t)-FSE_ERROR_srcSize_wrong;
497 bitStream = FSE_readLE32(ip);
498 nbBits = (bitStream & 0xF) + FSE_MIN_TABLELOG; /* extract tableLog */
499 if (nbBits > FSE_TABLELOG_ABSOLUTE_MAX) return (size_t)-FSE_ERROR_tableLog_tooLarge;
502 *tableLogPtr = nbBits;
503 remaining = (1<<nbBits)+1;
504 threshold = 1<<nbBits;
507 while ((remaining>1) && (charnum<=*maxSVPtr))
511 unsigned n0 = charnum;
512 while ((bitStream & 0xFFFF) == 0xFFFF)
518 bitStream = FSE_readLE32(ip) >> bitCount;
526 while ((bitStream & 3) == 3)
534 if (n0 > *maxSVPtr) return (size_t)-FSE_ERROR_maxSymbolValue_tooSmall;
535 while (charnum < n0) normalizedCounter[charnum++] = 0;
536 if ((ip <= iend-7) || (ip + (bitCount>>3) <= iend-4))
540 bitStream = FSE_readLE32(ip) >> bitCount;
546 const short max = (short)((2*threshold-1)-remaining);
549 if ((bitStream & (threshold-1)) < (U32)max)
551 count = (short)(bitStream & (threshold-1));
552 bitCount += nbBits-1;
556 count = (short)(bitStream & (2*threshold-1));
557 if (count >= threshold) count -= max;
561 count--; /* extra accuracy */
562 remaining -= FSE_abs(count);
563 normalizedCounter[charnum++] = count;
565 while (remaining < threshold)
572 if ((ip <= iend-7) || (ip + (bitCount>>3) <= iend-4))
579 bitCount -= (int)(8 * (iend - 4 - ip));
582 bitStream = FSE_readLE32(ip) >> (bitCount & 31);
586 if (remaining != 1) return (size_t)-FSE_ERROR_GENERIC;
587 *maxSVPtr = charnum-1;
589 ip += (bitCount+7)>>3;
590 if ((size_t)(ip-istart) > hbSize) return (size_t)-FSE_ERROR_srcSize_wrong;
595 /*********************************************************
596 * Decompression (Byte symbols)
597 *********************************************************/
598 static size_t FSE_buildDTable_rle (FSE_DTable* dt, BYTE symbolValue)
601 FSE_DTableHeader* const DTableH = (FSE_DTableHeader*)ptr;
602 FSE_decode_t* const cell = (FSE_decode_t*)(ptr) + 1; /* because dt is unsigned */
604 DTableH->tableLog = 0;
605 DTableH->fastMode = 0;
608 cell->symbol = symbolValue;
615 static size_t FSE_buildDTable_raw (FSE_DTable* dt, unsigned nbBits)
618 FSE_DTableHeader* const DTableH = (FSE_DTableHeader*)ptr;
619 FSE_decode_t* const dinfo = (FSE_decode_t*)(ptr) + 1; /* because dt is unsigned */
620 const unsigned tableSize = 1 << nbBits;
621 const unsigned tableMask = tableSize - 1;
622 const unsigned maxSymbolValue = tableMask;
626 if (nbBits < 1) return (size_t)-FSE_ERROR_GENERIC; /* min size */
628 /* Build Decoding Table */
629 DTableH->tableLog = (U16)nbBits;
630 DTableH->fastMode = 1;
631 for (s=0; s<=maxSymbolValue; s++)
633 dinfo[s].newState = 0;
634 dinfo[s].symbol = (BYTE)s;
635 dinfo[s].nbBits = (BYTE)nbBits;
643 * Initialize a FSE_DStream_t.
644 * srcBuffer must point at the beginning of an FSE block.
645 * The function result is the size of the FSE_block (== srcSize).
646 * If srcSize is too small, the function will return an errorCode;
648 static size_t FSE_initDStream(FSE_DStream_t* bitD, const void* srcBuffer, size_t srcSize)
650 if (srcSize < 1) return (size_t)-FSE_ERROR_srcSize_wrong;
652 if (srcSize >= sizeof(size_t))
655 bitD->start = (const char*)srcBuffer;
656 bitD->ptr = (const char*)srcBuffer + srcSize - sizeof(size_t);
657 bitD->bitContainer = FSE_readLEST(bitD->ptr);
658 contain32 = ((const BYTE*)srcBuffer)[srcSize-1];
659 if (contain32 == 0) return (size_t)-FSE_ERROR_GENERIC; /* stop bit not present */
660 bitD->bitsConsumed = 8 - FSE_highbit32(contain32);
665 bitD->start = (const char*)srcBuffer;
666 bitD->ptr = bitD->start;
667 bitD->bitContainer = *(const BYTE*)(bitD->start);
670 case 7: bitD->bitContainer += (size_t)(((const BYTE*)(bitD->start))[6]) << (sizeof(size_t)*8 - 16);
672 case 6: bitD->bitContainer += (size_t)(((const BYTE*)(bitD->start))[5]) << (sizeof(size_t)*8 - 24);
674 case 5: bitD->bitContainer += (size_t)(((const BYTE*)(bitD->start))[4]) << (sizeof(size_t)*8 - 32);
676 case 4: bitD->bitContainer += (size_t)(((const BYTE*)(bitD->start))[3]) << 24;
678 case 3: bitD->bitContainer += (size_t)(((const BYTE*)(bitD->start))[2]) << 16;
680 case 2: bitD->bitContainer += (size_t)(((const BYTE*)(bitD->start))[1]) << 8;
684 contain32 = ((const BYTE*)srcBuffer)[srcSize-1];
685 if (contain32 == 0) return (size_t)-FSE_ERROR_GENERIC; /* stop bit not present */
686 bitD->bitsConsumed = 8 - FSE_highbit32(contain32);
687 bitD->bitsConsumed += (U32)(sizeof(size_t) - srcSize)*8;
695 * Provides next n bits from the bitContainer.
696 * bitContainer is not modified (bits are still present for next read/look)
697 * On 32-bits, maxNbBits==25
698 * On 64-bits, maxNbBits==57
699 * return : value extracted.
701 static size_t FSE_lookBits(FSE_DStream_t* bitD, U32 nbBits)
703 const U32 bitMask = sizeof(bitD->bitContainer)*8 - 1;
704 return ((bitD->bitContainer << (bitD->bitsConsumed & bitMask)) >> 1) >> ((bitMask-nbBits) & bitMask);
707 static size_t FSE_lookBitsFast(FSE_DStream_t* bitD, U32 nbBits) /* only if nbBits >= 1 !! */
709 const U32 bitMask = sizeof(bitD->bitContainer)*8 - 1;
710 return (bitD->bitContainer << (bitD->bitsConsumed & bitMask)) >> (((bitMask+1)-nbBits) & bitMask);
713 static void FSE_skipBits(FSE_DStream_t* bitD, U32 nbBits)
715 bitD->bitsConsumed += nbBits;
720 * Read next n bits from the bitContainer.
721 * On 32-bits, don't read more than maxNbBits==25
722 * On 64-bits, don't read more than maxNbBits==57
723 * Use the fast variant *only* if n >= 1.
724 * return : value extracted.
726 static size_t FSE_readBits(FSE_DStream_t* bitD, U32 nbBits)
728 size_t value = FSE_lookBits(bitD, nbBits);
729 FSE_skipBits(bitD, nbBits);
733 static size_t FSE_readBitsFast(FSE_DStream_t* bitD, U32 nbBits) /* only if nbBits >= 1 !! */
735 size_t value = FSE_lookBitsFast(bitD, nbBits);
736 FSE_skipBits(bitD, nbBits);
740 static unsigned FSE_reloadDStream(FSE_DStream_t* bitD)
742 if (bitD->bitsConsumed > (sizeof(bitD->bitContainer)*8)) /* should never happen */
743 return FSE_DStream_tooFar;
745 if (bitD->ptr >= bitD->start + sizeof(bitD->bitContainer))
747 bitD->ptr -= bitD->bitsConsumed >> 3;
748 bitD->bitsConsumed &= 7;
749 bitD->bitContainer = FSE_readLEST(bitD->ptr);
750 return FSE_DStream_unfinished;
752 if (bitD->ptr == bitD->start)
754 if (bitD->bitsConsumed < sizeof(bitD->bitContainer)*8) return FSE_DStream_endOfBuffer;
755 return FSE_DStream_completed;
758 U32 nbBytes = bitD->bitsConsumed >> 3;
759 U32 result = FSE_DStream_unfinished;
760 if (bitD->ptr - nbBytes < bitD->start)
762 nbBytes = (U32)(bitD->ptr - bitD->start); /* ptr > start */
763 result = FSE_DStream_endOfBuffer;
765 bitD->ptr -= nbBytes;
766 bitD->bitsConsumed -= nbBytes*8;
767 bitD->bitContainer = FSE_readLEST(bitD->ptr); /* reminder : srcSize > sizeof(bitD) */
773 static void FSE_initDState(FSE_DState_t* DStatePtr, FSE_DStream_t* bitD, const FSE_DTable* dt)
775 const void* ptr = dt;
776 const FSE_DTableHeader* const DTableH = (const FSE_DTableHeader*)ptr;
777 DStatePtr->state = FSE_readBits(bitD, DTableH->tableLog);
778 FSE_reloadDStream(bitD);
779 DStatePtr->table = dt + 1;
782 static BYTE FSE_decodeSymbol(FSE_DState_t* DStatePtr, FSE_DStream_t* bitD)
784 const FSE_decode_t DInfo = ((const FSE_decode_t*)(DStatePtr->table))[DStatePtr->state];
785 const U32 nbBits = DInfo.nbBits;
786 BYTE symbol = DInfo.symbol;
787 size_t lowBits = FSE_readBits(bitD, nbBits);
789 DStatePtr->state = DInfo.newState + lowBits;
793 static BYTE FSE_decodeSymbolFast(FSE_DState_t* DStatePtr, FSE_DStream_t* bitD)
795 const FSE_decode_t DInfo = ((const FSE_decode_t*)(DStatePtr->table))[DStatePtr->state];
796 const U32 nbBits = DInfo.nbBits;
797 BYTE symbol = DInfo.symbol;
798 size_t lowBits = FSE_readBitsFast(bitD, nbBits);
800 DStatePtr->state = DInfo.newState + lowBits;
805 Tells if bitD has reached end of bitStream or not */
807 static unsigned FSE_endOfDStream(const FSE_DStream_t* bitD)
809 return ((bitD->ptr == bitD->start) && (bitD->bitsConsumed == sizeof(bitD->bitContainer)*8));
812 static unsigned FSE_endOfDState(const FSE_DState_t* DStatePtr)
814 return DStatePtr->state == 0;
818 FORCE_INLINE size_t FSE_decompress_usingDTable_generic(
819 void* dst, size_t maxDstSize,
820 const void* cSrc, size_t cSrcSize,
821 const FSE_DTable* dt, const unsigned fast)
823 BYTE* const ostart = (BYTE*) dst;
825 BYTE* const omax = op + maxDstSize;
826 BYTE* const olimit = omax-3;
834 errorCode = FSE_initDStream(&bitD, cSrc, cSrcSize); /* replaced last arg by maxCompressed Size */
835 if (FSE_isError(errorCode)) return errorCode;
837 FSE_initDState(&state1, &bitD, dt);
838 FSE_initDState(&state2, &bitD, dt);
840 #define FSE_GETSYMBOL(statePtr) fast ? FSE_decodeSymbolFast(statePtr, &bitD) : FSE_decodeSymbol(statePtr, &bitD)
842 /* 4 symbols per loop */
843 for ( ; (FSE_reloadDStream(&bitD)==FSE_DStream_unfinished) && (op<olimit) ; op+=4)
845 op[0] = FSE_GETSYMBOL(&state1);
847 if (FSE_MAX_TABLELOG*2+7 > sizeof(bitD.bitContainer)*8) /* This test must be static */
848 FSE_reloadDStream(&bitD);
850 op[1] = FSE_GETSYMBOL(&state2);
852 if (FSE_MAX_TABLELOG*4+7 > sizeof(bitD.bitContainer)*8) /* This test must be static */
853 { if (FSE_reloadDStream(&bitD) > FSE_DStream_unfinished) { op+=2; break; } }
855 op[2] = FSE_GETSYMBOL(&state1);
857 if (FSE_MAX_TABLELOG*2+7 > sizeof(bitD.bitContainer)*8) /* This test must be static */
858 FSE_reloadDStream(&bitD);
860 op[3] = FSE_GETSYMBOL(&state2);
864 /* note : FSE_reloadDStream(&bitD) >= FSE_DStream_partiallyFilled; Ends at exactly FSE_DStream_completed */
867 if ( (FSE_reloadDStream(&bitD)>FSE_DStream_completed) || (op==omax) || (FSE_endOfDStream(&bitD) && (fast || FSE_endOfDState(&state1))) )
870 *op++ = FSE_GETSYMBOL(&state1);
872 if ( (FSE_reloadDStream(&bitD)>FSE_DStream_completed) || (op==omax) || (FSE_endOfDStream(&bitD) && (fast || FSE_endOfDState(&state2))) )
875 *op++ = FSE_GETSYMBOL(&state2);
879 if (FSE_endOfDStream(&bitD) && FSE_endOfDState(&state1) && FSE_endOfDState(&state2))
882 if (op==omax) return (size_t)-FSE_ERROR_dstSize_tooSmall; /* dst buffer is full, but cSrc unfinished */
884 return (size_t)-FSE_ERROR_corruptionDetected;
888 static size_t FSE_decompress_usingDTable(void* dst, size_t originalSize,
889 const void* cSrc, size_t cSrcSize,
890 const FSE_DTable* dt)
892 FSE_DTableHeader DTableH;
893 memcpy(&DTableH, dt, sizeof(DTableH)); /* memcpy() into local variable, to avoid strict aliasing warning */
895 /* select fast mode (static) */
896 if (DTableH.fastMode) return FSE_decompress_usingDTable_generic(dst, originalSize, cSrc, cSrcSize, dt, 1);
897 return FSE_decompress_usingDTable_generic(dst, originalSize, cSrc, cSrcSize, dt, 0);
901 static size_t FSE_decompress(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize)
903 const BYTE* const istart = (const BYTE*)cSrc;
904 const BYTE* ip = istart;
905 short counting[FSE_MAX_SYMBOL_VALUE+1];
906 DTable_max_t dt; /* Static analyzer seems unable to understand this table will be properly initialized later */
908 unsigned maxSymbolValue = FSE_MAX_SYMBOL_VALUE;
911 if (cSrcSize<2) return (size_t)-FSE_ERROR_srcSize_wrong; /* too small input size */
913 /* normal FSE decoding mode */
914 errorCode = FSE_readNCount (counting, &maxSymbolValue, &tableLog, istart, cSrcSize);
915 if (FSE_isError(errorCode)) return errorCode;
916 if (errorCode >= cSrcSize) return (size_t)-FSE_ERROR_srcSize_wrong; /* too small input size */
918 cSrcSize -= errorCode;
920 errorCode = FSE_buildDTable (dt, counting, maxSymbolValue, tableLog);
921 if (FSE_isError(errorCode)) return errorCode;
923 /* always return, even if it is an error code */
924 return FSE_decompress_usingDTable (dst, maxDstSize, ip, cSrcSize, dt);
929 /* *******************************************************
930 * Huff0 : Huffman block compression
931 *********************************************************/
932 #define HUF_MAX_SYMBOL_VALUE 255
933 #define HUF_DEFAULT_TABLELOG 12 /* used by default, when not specified */
934 #define HUF_MAX_TABLELOG 12 /* max possible tableLog; for allocation purpose; can be modified */
935 #define HUF_ABSOLUTEMAX_TABLELOG 16 /* absolute limit of HUF_MAX_TABLELOG. Beyond that value, code does not work */
936 #if (HUF_MAX_TABLELOG > HUF_ABSOLUTEMAX_TABLELOG)
937 # error "HUF_MAX_TABLELOG is too large !"
940 typedef struct HUF_CElt_s {
945 typedef struct nodeElt_s {
953 /* *******************************************************
954 * Huff0 : Huffman block decompression
955 *********************************************************/
961 static size_t HUF_readDTable (U16* DTable, const void* src, size_t srcSize)
963 BYTE huffWeight[HUF_MAX_SYMBOL_VALUE + 1];
964 U32 rankVal[HUF_ABSOLUTEMAX_TABLELOG + 1]; /* large enough for values from 0 to 16 */
967 const BYTE* ip = (const BYTE*) src;
972 void* ptr = DTable+1;
973 HUF_DElt* const dt = (HUF_DElt*)ptr;
975 if (!srcSize) return (size_t)-FSE_ERROR_srcSize_wrong;
978 FSE_STATIC_ASSERT(sizeof(HUF_DElt) == sizeof(U16)); /* if compilation fails here, assertion is false */
979 //memset(huffWeight, 0, sizeof(huffWeight)); /* should not be necessary, but some analyzer complain ... */
980 if (iSize >= 128) /* special header */
982 if (iSize >= (242)) /* RLE */
984 static int l[14] = { 1, 2, 3, 4, 7, 8, 15, 16, 31, 32, 63, 64, 127, 128 };
985 oSize = l[iSize-242];
986 memset(huffWeight, 1, sizeof(huffWeight));
989 else /* Incompressible */
992 iSize = ((oSize+1)/2);
993 if (iSize+1 > srcSize) return (size_t)-FSE_ERROR_srcSize_wrong;
995 for (n=0; n<oSize; n+=2)
997 huffWeight[n] = ip[n/2] >> 4;
998 huffWeight[n+1] = ip[n/2] & 15;
1002 else /* header compressed with FSE (normal case) */
1004 if (iSize+1 > srcSize) return (size_t)-FSE_ERROR_srcSize_wrong;
1005 oSize = FSE_decompress(huffWeight, HUF_MAX_SYMBOL_VALUE, ip+1, iSize); /* max 255 values decoded, last one is implied */
1006 if (FSE_isError(oSize)) return oSize;
1009 /* collect weight stats */
1010 memset(rankVal, 0, sizeof(rankVal));
1012 for (n=0; n<oSize; n++)
1014 if (huffWeight[n] >= HUF_ABSOLUTEMAX_TABLELOG) return (size_t)-FSE_ERROR_corruptionDetected;
1015 rankVal[huffWeight[n]]++;
1016 weightTotal += (1 << huffWeight[n]) >> 1;
1018 if (weightTotal == 0) return (size_t)-FSE_ERROR_corruptionDetected;
1020 /* get last non-null symbol weight (implied, total must be 2^n) */
1021 maxBits = FSE_highbit32(weightTotal) + 1;
1022 if (maxBits > DTable[0]) return (size_t)-FSE_ERROR_tableLog_tooLarge; /* DTable is too small */
1023 DTable[0] = (U16)maxBits;
1025 U32 total = 1 << maxBits;
1026 U32 rest = total - weightTotal;
1027 U32 verif = 1 << FSE_highbit32(rest);
1028 U32 lastWeight = FSE_highbit32(rest) + 1;
1029 if (verif != rest) return (size_t)-FSE_ERROR_corruptionDetected; /* last value must be a clean power of 2 */
1030 huffWeight[oSize] = (BYTE)lastWeight;
1031 rankVal[lastWeight]++;
1034 /* check tree construction validity */
1035 if ((rankVal[1] < 2) || (rankVal[1] & 1)) return (size_t)-FSE_ERROR_corruptionDetected; /* by construction : at least 2 elts of rank 1, must be even */
1039 for (n=1; n<=maxBits; n++)
1041 U32 current = nextRankStart;
1042 nextRankStart += (rankVal[n] << (n-1));
1043 rankVal[n] = current;
1047 for (n=0; n<=oSize; n++)
1049 const U32 w = huffWeight[n];
1050 const U32 length = (1 << w) >> 1;
1053 D.byte = (BYTE)n; D.nbBits = (BYTE)(maxBits + 1 - w);
1054 for (i = rankVal[w]; i < rankVal[w] + length; i++)
1056 rankVal[w] += length;
1063 static BYTE HUF_decodeSymbol(FSE_DStream_t* Dstream, const HUF_DElt* dt, const U32 dtLog)
1065 const size_t val = FSE_lookBitsFast(Dstream, dtLog); /* note : dtLog >= 1 */
1066 const BYTE c = dt[val].byte;
1067 FSE_skipBits(Dstream, dt[val].nbBits);
1071 static size_t HUF_decompress_usingDTable( /* -3% slower when non static */
1072 void* dst, size_t maxDstSize,
1073 const void* cSrc, size_t cSrcSize,
1076 BYTE* const ostart = (BYTE*) dst;
1078 BYTE* const omax = op + maxDstSize;
1079 BYTE* const olimit = omax-15;
1081 const void* ptr = DTable;
1082 const HUF_DElt* const dt = (const HUF_DElt*)(ptr)+1;
1083 const U32 dtLog = DTable[0];
1089 const U16* jumpTable = (const U16*)cSrc;
1090 const size_t length1 = FSE_readLE16(jumpTable);
1091 const size_t length2 = FSE_readLE16(jumpTable+1);
1092 const size_t length3 = FSE_readLE16(jumpTable+2);
1093 const size_t length4 = cSrcSize - 6 - length1 - length2 - length3; // check coherency !!
1094 const char* const start1 = (const char*)(cSrc) + 6;
1095 const char* const start2 = start1 + length1;
1096 const char* const start3 = start2 + length2;
1097 const char* const start4 = start3 + length3;
1098 FSE_DStream_t bitD1, bitD2, bitD3, bitD4;
1100 if (length1+length2+length3+6 >= cSrcSize) return (size_t)-FSE_ERROR_srcSize_wrong;
1102 errorCode = FSE_initDStream(&bitD1, start1, length1);
1103 if (FSE_isError(errorCode)) return errorCode;
1104 errorCode = FSE_initDStream(&bitD2, start2, length2);
1105 if (FSE_isError(errorCode)) return errorCode;
1106 errorCode = FSE_initDStream(&bitD3, start3, length3);
1107 if (FSE_isError(errorCode)) return errorCode;
1108 errorCode = FSE_initDStream(&bitD4, start4, length4);
1109 if (FSE_isError(errorCode)) return errorCode;
1111 reloadStatus=FSE_reloadDStream(&bitD2);
1113 /* 16 symbols per loop */
1114 for ( ; (reloadStatus<FSE_DStream_completed) && (op<olimit); /* D2-3-4 are supposed to be synchronized and finish together */
1115 op+=16, reloadStatus = FSE_reloadDStream(&bitD2) | FSE_reloadDStream(&bitD3) | FSE_reloadDStream(&bitD4), FSE_reloadDStream(&bitD1))
1117 #define HUF_DECODE_SYMBOL_0(n, Dstream) \
1118 op[n] = HUF_decodeSymbol(&Dstream, dt, dtLog);
1120 #define HUF_DECODE_SYMBOL_1(n, Dstream) \
1121 op[n] = HUF_decodeSymbol(&Dstream, dt, dtLog); \
1122 if (FSE_32bits() && (HUF_MAX_TABLELOG>12)) FSE_reloadDStream(&Dstream)
1124 #define HUF_DECODE_SYMBOL_2(n, Dstream) \
1125 op[n] = HUF_decodeSymbol(&Dstream, dt, dtLog); \
1126 if (FSE_32bits()) FSE_reloadDStream(&Dstream)
1128 HUF_DECODE_SYMBOL_1( 0, bitD1);
1129 HUF_DECODE_SYMBOL_1( 1, bitD2);
1130 HUF_DECODE_SYMBOL_1( 2, bitD3);
1131 HUF_DECODE_SYMBOL_1( 3, bitD4);
1132 HUF_DECODE_SYMBOL_2( 4, bitD1);
1133 HUF_DECODE_SYMBOL_2( 5, bitD2);
1134 HUF_DECODE_SYMBOL_2( 6, bitD3);
1135 HUF_DECODE_SYMBOL_2( 7, bitD4);
1136 HUF_DECODE_SYMBOL_1( 8, bitD1);
1137 HUF_DECODE_SYMBOL_1( 9, bitD2);
1138 HUF_DECODE_SYMBOL_1(10, bitD3);
1139 HUF_DECODE_SYMBOL_1(11, bitD4);
1140 HUF_DECODE_SYMBOL_0(12, bitD1);
1141 HUF_DECODE_SYMBOL_0(13, bitD2);
1142 HUF_DECODE_SYMBOL_0(14, bitD3);
1143 HUF_DECODE_SYMBOL_0(15, bitD4);
1146 if (reloadStatus!=FSE_DStream_completed) /* not complete : some bitStream might be FSE_DStream_unfinished */
1147 return (size_t)-FSE_ERROR_corruptionDetected;
1151 // bitTail = bitD1; // *much* slower : -20% !??!
1152 FSE_DStream_t bitTail;
1153 bitTail.ptr = bitD1.ptr;
1154 bitTail.bitsConsumed = bitD1.bitsConsumed;
1155 bitTail.bitContainer = bitD1.bitContainer; // required in case of FSE_DStream_endOfBuffer
1156 bitTail.start = start1;
1157 for ( ; (FSE_reloadDStream(&bitTail) < FSE_DStream_completed) && (op<omax) ; op++)
1159 HUF_DECODE_SYMBOL_0(0, bitTail);
1162 if (FSE_endOfDStream(&bitTail))
1166 if (op==omax) return (size_t)-FSE_ERROR_dstSize_tooSmall; /* dst buffer is full, but cSrc unfinished */
1168 return (size_t)-FSE_ERROR_corruptionDetected;
1172 static size_t HUF_decompress (void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize)
1174 HUF_CREATE_STATIC_DTABLE(DTable, HUF_MAX_TABLELOG);
1175 const BYTE* ip = (const BYTE*) cSrc;
1178 errorCode = HUF_readDTable (DTable, cSrc, cSrcSize);
1179 if (FSE_isError(errorCode)) return errorCode;
1180 if (errorCode >= cSrcSize) return (size_t)-FSE_ERROR_srcSize_wrong;
1182 cSrcSize -= errorCode;
1184 return HUF_decompress_usingDTable (dst, maxDstSize, ip, cSrcSize, DTable);
1188 #endif /* FSE_COMMONDEFS_ONLY */
1191 zstd - standard compression library
1192 Copyright (C) 2014-2015, Yann Collet.
1194 BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
1196 Redistribution and use in source and binary forms, with or without
1197 modification, are permitted provided that the following conditions are
1199 * Redistributions of source code must retain the above copyright
1200 notice, this list of conditions and the following disclaimer.
1201 * Redistributions in binary form must reproduce the above
1202 copyright notice, this list of conditions and the following disclaimer
1203 in the documentation and/or other materials provided with the
1205 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
1206 "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
1207 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
1208 A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
1209 OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
1210 SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
1211 LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
1212 DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
1213 THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
1214 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
1215 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
1217 You can contact the author at :
1218 - zstd source repository : https://github.com/Cyan4973/zstd
1219 - ztsd public forum : https://groups.google.com/forum/#!forum/lz4c
1222 /****************************************************************
1224 *****************************************************************/
1226 * Memory usage formula : N->2^N Bytes (examples : 10 -> 1KB; 12 -> 4KB ; 16 -> 64KB; 20 -> 1MB; etc.)
1227 * Increasing memory usage improves compression ratio
1228 * Reduced memory usage can improve speed, due to cache effect */
1229 #define ZSTD_MEMORY_USAGE 17
1232 /**************************************
1233 CPU Feature Detection
1234 **************************************/
1236 * Automated efficient unaligned memory access detection
1237 * Based on known hardware architectures
1238 * This list will be updated thanks to feedbacks
1240 #if defined(CPU_HAS_EFFICIENT_UNALIGNED_MEMORY_ACCESS) \
1241 || defined(__ARM_FEATURE_UNALIGNED) \
1242 || defined(__i386__) || defined(__x86_64__) \
1243 || defined(_M_IX86) || defined(_M_X64) \
1244 || defined(__ARM_ARCH_7__) || defined(__ARM_ARCH_8__) \
1245 || (defined(_M_ARM) && (_M_ARM >= 7))
1246 # define ZSTD_UNALIGNED_ACCESS 1
1248 # define ZSTD_UNALIGNED_ACCESS 0
1252 /********************************************************
1254 *********************************************************/
1255 #include <stdlib.h> /* calloc */
1256 #include <string.h> /* memcpy, memmove */
1257 #include <stdio.h> /* debug : printf */
1260 /********************************************************
1261 * Compiler specifics
1262 *********************************************************/
1264 # include <immintrin.h> /* AVX2 intrinsics */
1267 #ifdef _MSC_VER /* Visual Studio */
1268 # include <intrin.h> /* For Visual 2005 */
1269 # pragma warning(disable : 4127) /* disable: C4127: conditional expression is constant */
1270 # pragma warning(disable : 4324) /* disable: C4324: padded structure */
1274 #ifndef MEM_ACCESS_MODULE
1275 #define MEM_ACCESS_MODULE
1276 /********************************************************
1278 *********************************************************/
1279 #if defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L /* C99 */
1280 # include <stdint.h>
1281 typedef uint8_t BYTE;
1282 typedef uint16_t U16;
1283 typedef int16_t S16;
1284 typedef uint32_t U32;
1285 typedef int32_t S32;
1286 typedef uint64_t U64;
1288 typedef unsigned char BYTE;
1289 typedef unsigned short U16;
1290 typedef signed short S16;
1291 typedef unsigned int U32;
1292 typedef signed int S32;
1293 typedef unsigned long long U64;
1296 #endif /* MEM_ACCESS_MODULE */
1299 /********************************************************
1301 *********************************************************/
1302 static const U32 ZSTD_magicNumber = 0xFD2FB51E; /* 3rd version : seqNb header */
1304 #define HASH_LOG (ZSTD_MEMORY_USAGE - 2)
1305 #define HASH_TABLESIZE (1 << HASH_LOG)
1306 #define HASH_MASK (HASH_TABLESIZE - 1)
1308 #define KNUTH 2654435761
1315 #define KB *(1 <<10)
1316 #define MB *(1 <<20)
1317 #define GB *(1U<<30)
1319 #define BLOCKSIZE (128 KB) /* define, for static allocation */
1321 #define WORKPLACESIZE (BLOCKSIZE*3)
1326 #define MaxML ((1<<MLbits )-1)
1327 #define MaxLL ((1<<LLbits )-1)
1328 #define MaxOff ((1<<Offbits)-1)
1329 #define LitFSELog 11
1333 #define MAX(a,b) ((a)<(b)?(b):(a))
1334 #define MaxSeq MAX(MaxLL, MaxML)
1336 #define LITERAL_NOENTROPY 63
1337 #define COMMAND_NOENTROPY 7 /* to remove */
1339 static const size_t ZSTD_blockHeaderSize = 3;
1340 static const size_t ZSTD_frameHeaderSize = 4;
1343 /********************************************************
1345 *********************************************************/
1346 static unsigned ZSTD_32bits(void) { return sizeof(void*)==4; }
1348 static unsigned ZSTD_isLittleEndian(void)
1350 const union { U32 i; BYTE c[4]; } one = { 1 }; /* don't use static : performance detrimental */
1354 static U16 ZSTD_read16(const void* p) { U16 r; memcpy(&r, p, sizeof(r)); return r; }
1356 static U32 ZSTD_read32(const void* p) { U32 r; memcpy(&r, p, sizeof(r)); return r; }
1358 static void ZSTD_copy4(void* dst, const void* src) { memcpy(dst, src, 4); }
1360 static void ZSTD_copy8(void* dst, const void* src) { memcpy(dst, src, 8); }
1362 #define COPY8(d,s) { ZSTD_copy8(d,s); d+=8; s+=8; }
1364 static void ZSTD_wildcopy(void* dst, const void* src, ptrdiff_t length)
1366 const BYTE* ip = (const BYTE*)src;
1367 BYTE* op = (BYTE*)dst;
1368 BYTE* const oend = op + length;
1369 while (op < oend) COPY8(op, ip);
1372 static U16 ZSTD_readLE16(const void* memPtr)
1374 if (ZSTD_isLittleEndian()) return ZSTD_read16(memPtr);
1377 const BYTE* p = (const BYTE*)memPtr;
1378 return (U16)((U16)p[0] + ((U16)p[1]<<8));
1383 static U32 ZSTD_readLE32(const void* memPtr)
1385 if (ZSTD_isLittleEndian())
1386 return ZSTD_read32(memPtr);
1389 const BYTE* p = (const BYTE*)memPtr;
1390 return (U32)((U32)p[0] + ((U32)p[1]<<8) + ((U32)p[2]<<16) + ((U32)p[3]<<24));
1394 static U32 ZSTD_readBE32(const void* memPtr)
1396 const BYTE* p = (const BYTE*)memPtr;
1397 return (U32)(((U32)p[0]<<24) + ((U32)p[1]<<16) + ((U32)p[2]<<8) + ((U32)p[3]<<0));
1401 /**************************************
1403 ***************************************/
1404 typedef struct ZSTD_Cctx_s ZSTD_Cctx;
1406 typedef enum { bt_compressed, bt_raw, bt_rle, bt_end } blockType_t;
1410 blockType_t blockType;
1412 } blockProperties_t;
1422 BYTE* litLengthStart;
1424 BYTE* matchLengthStart;
1431 typedef struct ZSTD_Cctx_s
1436 seqStore_t seqStore;
1438 __m256i hashTable[HASH_TABLESIZE>>3];
1440 U32 hashTable[HASH_TABLESIZE];
1442 BYTE buffer[WORKPLACESIZE];
1448 /**************************************
1450 **************************************/
1451 /* published entry point */
1452 unsigned ZSTDv01_isError(size_t code) { return ERR_isError(code); }
1455 /**************************************
1457 **************************************/
1458 #define ZSTD_VERSION_MAJOR 0 /* for breaking interface changes */
1459 #define ZSTD_VERSION_MINOR 1 /* for new (non-breaking) interface capabilities */
1460 #define ZSTD_VERSION_RELEASE 3 /* for tweaks, bug-fixes, or development */
1461 #define ZSTD_VERSION_NUMBER (ZSTD_VERSION_MAJOR *100*100 + ZSTD_VERSION_MINOR *100 + ZSTD_VERSION_RELEASE)
1463 /**************************************************************
1464 * Decompression code
1465 **************************************************************/
1467 static size_t ZSTDv01_getcBlockSize(const void* src, size_t srcSize, blockProperties_t* bpPtr)
1469 const BYTE* const in = (const BYTE* const)src;
1473 if (srcSize < 3) return ERROR(srcSize_wrong);
1476 cSize = in[2] + (in[1]<<8) + ((in[0] & 7)<<16);
1478 bpPtr->blockType = (blockType_t)(headerFlags >> 6);
1479 bpPtr->origSize = (bpPtr->blockType == bt_rle) ? cSize : 0;
1481 if (bpPtr->blockType == bt_end) return 0;
1482 if (bpPtr->blockType == bt_rle) return 1;
1487 static size_t ZSTD_copyUncompressedBlock(void* dst, size_t maxDstSize, const void* src, size_t srcSize)
1489 if (srcSize > maxDstSize) return ERROR(dstSize_tooSmall);
1490 memcpy(dst, src, srcSize);
1495 static size_t ZSTD_decompressLiterals(void* ctx,
1496 void* dst, size_t maxDstSize,
1497 const void* src, size_t srcSize)
1499 BYTE* op = (BYTE*)dst;
1500 BYTE* const oend = op + maxDstSize;
1501 const BYTE* ip = (const BYTE*)src;
1505 /* check : minimum 2, for litSize, +1, for content */
1506 if (srcSize <= 3) return ERROR(corruption_detected);
1508 litSize = ip[1] + (ip[0]<<8);
1509 litSize += ((ip[-3] >> 3) & 7) << 16; // mmmmh....
1510 op = oend - litSize;
1513 if (litSize > maxDstSize) return ERROR(dstSize_tooSmall);
1514 errorCode = HUF_decompress(op, litSize, ip+2, srcSize-2);
1515 if (FSE_isError(errorCode)) return ERROR(GENERIC);
1520 static size_t ZSTDv01_decodeLiteralsBlock(void* ctx,
1521 void* dst, size_t maxDstSize,
1522 const BYTE** litStart, size_t* litSize,
1523 const void* src, size_t srcSize)
1525 const BYTE* const istart = (const BYTE* const)src;
1526 const BYTE* ip = istart;
1527 BYTE* const ostart = (BYTE* const)dst;
1528 BYTE* const oend = ostart + maxDstSize;
1529 blockProperties_t litbp;
1531 size_t litcSize = ZSTDv01_getcBlockSize(src, srcSize, &litbp);
1532 if (ZSTDv01_isError(litcSize)) return litcSize;
1533 if (litcSize > srcSize - ZSTD_blockHeaderSize) return ERROR(srcSize_wrong);
1534 ip += ZSTD_blockHeaderSize;
1536 switch(litbp.blockType)
1541 *litSize = litcSize;
1545 size_t rleSize = litbp.origSize;
1546 if (rleSize>maxDstSize) return ERROR(dstSize_tooSmall);
1547 if (!srcSize) return ERROR(srcSize_wrong);
1548 memset(oend - rleSize, *ip, rleSize);
1549 *litStart = oend - rleSize;
1556 size_t decodedLitSize = ZSTD_decompressLiterals(ctx, dst, maxDstSize, ip, litcSize);
1557 if (ZSTDv01_isError(decodedLitSize)) return decodedLitSize;
1558 *litStart = oend - decodedLitSize;
1559 *litSize = decodedLitSize;
1565 return ERROR(GENERIC);
1572 static size_t ZSTDv01_decodeSeqHeaders(int* nbSeq, const BYTE** dumpsPtr, size_t* dumpsLengthPtr,
1573 FSE_DTable* DTableLL, FSE_DTable* DTableML, FSE_DTable* DTableOffb,
1574 const void* src, size_t srcSize)
1576 const BYTE* const istart = (const BYTE* const)src;
1577 const BYTE* ip = istart;
1578 const BYTE* const iend = istart + srcSize;
1579 U32 LLtype, Offtype, MLtype;
1580 U32 LLlog, Offlog, MLlog;
1584 if (srcSize < 5) return ERROR(srcSize_wrong);
1587 *nbSeq = ZSTD_readLE16(ip); ip+=2;
1589 Offtype = (*ip >> 4) & 3;
1590 MLtype = (*ip >> 2) & 3;
1593 dumpsLength = ip[2];
1594 dumpsLength += ip[1] << 8;
1599 dumpsLength = ip[1];
1600 dumpsLength += (ip[0] & 1) << 8;
1605 *dumpsLengthPtr = dumpsLength;
1608 if (ip > iend-3) return ERROR(srcSize_wrong); /* min : all 3 are "raw", hence no header, but at least xxLog bits per type */
1612 S16 norm[MaxML+1]; /* assumption : MaxML >= MaxLL and MaxOff */
1620 FSE_buildDTable_rle(DTableLL, *ip++); break;
1623 FSE_buildDTable_raw(DTableLL, LLbits); break;
1626 headerSize = FSE_readNCount(norm, &max, &LLlog, ip, iend-ip);
1627 if (FSE_isError(headerSize)) return ERROR(GENERIC);
1628 if (LLlog > LLFSELog) return ERROR(corruption_detected);
1630 FSE_buildDTable(DTableLL, norm, max, LLlog);
1637 if (ip > iend-2) return ERROR(srcSize_wrong); /* min : "raw", hence no header, but at least xxLog bits */
1638 FSE_buildDTable_rle(DTableOffb, *ip++); break;
1641 FSE_buildDTable_raw(DTableOffb, Offbits); break;
1644 headerSize = FSE_readNCount(norm, &max, &Offlog, ip, iend-ip);
1645 if (FSE_isError(headerSize)) return ERROR(GENERIC);
1646 if (Offlog > OffFSELog) return ERROR(corruption_detected);
1648 FSE_buildDTable(DTableOffb, norm, max, Offlog);
1655 if (ip > iend-2) return ERROR(srcSize_wrong); /* min : "raw", hence no header, but at least xxLog bits */
1656 FSE_buildDTable_rle(DTableML, *ip++); break;
1659 FSE_buildDTable_raw(DTableML, MLbits); break;
1662 headerSize = FSE_readNCount(norm, &max, &MLlog, ip, iend-ip);
1663 if (FSE_isError(headerSize)) return ERROR(GENERIC);
1664 if (MLlog > MLFSELog) return ERROR(corruption_detected);
1666 FSE_buildDTable(DTableML, norm, max, MLlog);
1680 FSE_DStream_t DStream;
1681 FSE_DState_t stateLL;
1682 FSE_DState_t stateOffb;
1683 FSE_DState_t stateML;
1686 const BYTE* dumpsEnd;
1690 static void ZSTD_decodeSequence(seq_t* seq, seqState_t* seqState)
1696 const BYTE* dumps = seqState->dumps;
1697 const BYTE* const de = seqState->dumpsEnd;
1699 /* Literal length */
1700 litLength = FSE_decodeSymbol(&(seqState->stateLL), &(seqState->DStream));
1701 prevOffset = litLength ? seq->offset : seqState->prevOffset;
1702 seqState->prevOffset = seq->offset;
1703 if (litLength == MaxLL)
1705 U32 add = dumps<de ? *dumps++ : 0;
1706 if (add < 255) litLength += add;
1711 litLength = ZSTD_readLE32(dumps) & 0xFFFFFF; /* no pb : dumps is always followed by seq tables > 1 byte */
1719 U32 offsetCode, nbBits;
1720 offsetCode = FSE_decodeSymbol(&(seqState->stateOffb), &(seqState->DStream));
1721 if (ZSTD_32bits()) FSE_reloadDStream(&(seqState->DStream));
1722 nbBits = offsetCode - 1;
1723 if (offsetCode==0) nbBits = 0; /* cmove */
1724 offset = ((size_t)1 << (nbBits & ((sizeof(offset)*8)-1))) + FSE_readBits(&(seqState->DStream), nbBits);
1725 if (ZSTD_32bits()) FSE_reloadDStream(&(seqState->DStream));
1726 if (offsetCode==0) offset = prevOffset;
1730 matchLength = FSE_decodeSymbol(&(seqState->stateML), &(seqState->DStream));
1731 if (matchLength == MaxML)
1733 U32 add = dumps<de ? *dumps++ : 0;
1734 if (add < 255) matchLength += add;
1739 matchLength = ZSTD_readLE32(dumps) & 0xFFFFFF; /* no pb : dumps is always followed by seq tables > 1 byte */
1744 matchLength += MINMATCH;
1747 seq->litLength = litLength;
1748 seq->offset = offset;
1749 seq->matchLength = matchLength;
1750 seqState->dumps = dumps;
1754 static size_t ZSTD_execSequence(BYTE* op,
1756 const BYTE** litPtr, const BYTE* const litLimit,
1757 BYTE* const base, BYTE* const oend)
1759 static const int dec32table[] = {0, 1, 2, 1, 4, 4, 4, 4}; /* added */
1760 static const int dec64table[] = {8, 8, 8, 7, 8, 9,10,11}; /* substracted */
1761 const BYTE* const ostart = op;
1762 const size_t litLength = sequence.litLength;
1763 BYTE* const endMatch = op + litLength + sequence.matchLength; /* risk : address space overflow (32-bits) */
1764 const BYTE* const litEnd = *litPtr + litLength;
1767 if (endMatch > oend) return ERROR(dstSize_tooSmall); /* overwrite beyond dst buffer */
1768 if (litEnd > litLimit) return ERROR(corruption_detected);
1769 if (sequence.matchLength > (size_t)(*litPtr-op)) return ERROR(dstSize_tooSmall); /* overwrite literal segment */
1772 if (((size_t)(*litPtr - op) < 8) || ((size_t)(oend-litEnd) < 8) || (op+litLength > oend-8))
1773 memmove(op, *litPtr, litLength); /* overwrite risk */
1775 ZSTD_wildcopy(op, *litPtr, litLength);
1777 *litPtr = litEnd; /* update for next sequence */
1779 /* check : last match must be at a minimum distance of 8 from end of dest buffer */
1780 if (oend-op < 8) return ERROR(dstSize_tooSmall);
1784 const U32 overlapRisk = (((size_t)(litEnd - endMatch)) < 12);
1785 const BYTE* match = op - sequence.offset; /* possible underflow at op - offset ? */
1790 if (match < base) return ERROR(corruption_detected);
1791 if (sequence.offset > (size_t)base) return ERROR(corruption_detected);
1793 /* save beginning of literal sequence, in case of write overlap */
1796 if ((endMatch + qutt) > oend) qutt = oend-endMatch;
1797 memcpy(saved, endMatch, qutt);
1800 if (sequence.offset < 8)
1802 const int dec64 = dec64table[sequence.offset];
1807 match += dec32table[sequence.offset];
1808 ZSTD_copy4(op+4, match);
1810 } else { ZSTD_copy8(op, match); }
1811 op += 8; match += 8;
1813 if (endMatch > oend-(16-MINMATCH))
1817 ZSTD_wildcopy(op, match, (oend-8) - op);
1818 match += (oend-8) - op;
1821 while (op<endMatch) *op++ = *match++;
1824 ZSTD_wildcopy(op, match, (ptrdiff_t)sequence.matchLength-8); /* works even if matchLength < 8 */
1826 /* restore, in case of overlap */
1827 if (overlapRisk) memcpy(endMatch, saved, qutt);
1830 return endMatch-ostart;
1833 typedef struct ZSTDv01_Dctx_s
1835 U32 LLTable[FSE_DTABLE_SIZE_U32(LLFSELog)];
1836 U32 OffTable[FSE_DTABLE_SIZE_U32(OffFSELog)];
1837 U32 MLTable[FSE_DTABLE_SIZE_U32(MLFSELog)];
1838 void* previousDstEnd;
1846 static size_t ZSTD_decompressSequences(
1848 void* dst, size_t maxDstSize,
1849 const void* seqStart, size_t seqSize,
1850 const BYTE* litStart, size_t litSize)
1852 dctx_t* dctx = (dctx_t*)ctx;
1853 const BYTE* ip = (const BYTE*)seqStart;
1854 const BYTE* const iend = ip + seqSize;
1855 BYTE* const ostart = (BYTE* const)dst;
1857 BYTE* const oend = ostart + maxDstSize;
1858 size_t errorCode, dumpsLength;
1859 const BYTE* litPtr = litStart;
1860 const BYTE* const litEnd = litStart + litSize;
1863 U32* DTableLL = dctx->LLTable;
1864 U32* DTableML = dctx->MLTable;
1865 U32* DTableOffb = dctx->OffTable;
1866 BYTE* const base = (BYTE*) (dctx->base);
1868 /* Build Decoding Tables */
1869 errorCode = ZSTDv01_decodeSeqHeaders(&nbSeq, &dumps, &dumpsLength,
1870 DTableLL, DTableML, DTableOffb,
1872 if (ZSTDv01_isError(errorCode)) return errorCode;
1875 /* Regen sequences */
1878 seqState_t seqState;
1880 memset(&sequence, 0, sizeof(sequence));
1881 seqState.dumps = dumps;
1882 seqState.dumpsEnd = dumps + dumpsLength;
1883 seqState.prevOffset = 1;
1884 errorCode = FSE_initDStream(&(seqState.DStream), ip, iend-ip);
1885 if (FSE_isError(errorCode)) return ERROR(corruption_detected);
1886 FSE_initDState(&(seqState.stateLL), &(seqState.DStream), DTableLL);
1887 FSE_initDState(&(seqState.stateOffb), &(seqState.DStream), DTableOffb);
1888 FSE_initDState(&(seqState.stateML), &(seqState.DStream), DTableML);
1890 for ( ; (FSE_reloadDStream(&(seqState.DStream)) <= FSE_DStream_completed) && (nbSeq>0) ; )
1894 ZSTD_decodeSequence(&sequence, &seqState);
1895 oneSeqSize = ZSTD_execSequence(op, sequence, &litPtr, litEnd, base, oend);
1896 if (ZSTDv01_isError(oneSeqSize)) return oneSeqSize;
1900 /* check if reached exact end */
1901 if ( !FSE_endOfDStream(&(seqState.DStream)) ) return ERROR(corruption_detected); /* requested too much : data is corrupted */
1902 if (nbSeq<0) return ERROR(corruption_detected); /* requested too many sequences : data is corrupted */
1904 /* last literal segment */
1906 size_t lastLLSize = litEnd - litPtr;
1907 if (op+lastLLSize > oend) return ERROR(dstSize_tooSmall);
1908 if (op != litPtr) memmove(op, litPtr, lastLLSize);
1917 static size_t ZSTD_decompressBlock(
1919 void* dst, size_t maxDstSize,
1920 const void* src, size_t srcSize)
1922 /* blockType == blockCompressed, srcSize is trusted */
1923 const BYTE* ip = (const BYTE*)src;
1924 const BYTE* litPtr = NULL;
1928 /* Decode literals sub-block */
1929 errorCode = ZSTDv01_decodeLiteralsBlock(ctx, dst, maxDstSize, &litPtr, &litSize, src, srcSize);
1930 if (ZSTDv01_isError(errorCode)) return errorCode;
1932 srcSize -= errorCode;
1934 return ZSTD_decompressSequences(ctx, dst, maxDstSize, ip, srcSize, litPtr, litSize);
1938 size_t ZSTDv01_decompressDCtx(void* ctx, void* dst, size_t maxDstSize, const void* src, size_t srcSize)
1940 const BYTE* ip = (const BYTE*)src;
1941 const BYTE* iend = ip + srcSize;
1942 BYTE* const ostart = (BYTE* const)dst;
1944 BYTE* const oend = ostart + maxDstSize;
1945 size_t remainingSize = srcSize;
1948 blockProperties_t blockProperties;
1951 if (srcSize < ZSTD_frameHeaderSize+ZSTD_blockHeaderSize) return ERROR(srcSize_wrong);
1952 magicNumber = ZSTD_readBE32(src);
1953 if (magicNumber != ZSTD_magicNumber) return ERROR(prefix_unknown);
1954 ip += ZSTD_frameHeaderSize; remainingSize -= ZSTD_frameHeaderSize;
1956 /* Loop on each block */
1959 size_t blockSize = ZSTDv01_getcBlockSize(ip, iend-ip, &blockProperties);
1960 if (ZSTDv01_isError(blockSize)) return blockSize;
1962 ip += ZSTD_blockHeaderSize;
1963 remainingSize -= ZSTD_blockHeaderSize;
1964 if (blockSize > remainingSize) return ERROR(srcSize_wrong);
1966 switch(blockProperties.blockType)
1969 errorCode = ZSTD_decompressBlock(ctx, op, oend-op, ip, blockSize);
1972 errorCode = ZSTD_copyUncompressedBlock(op, oend-op, ip, blockSize);
1975 return ERROR(GENERIC); /* not yet supported */
1979 if (remainingSize) return ERROR(srcSize_wrong);
1982 return ERROR(GENERIC);
1984 if (blockSize == 0) break; /* bt_end */
1986 if (ZSTDv01_isError(errorCode)) return errorCode;
1989 remainingSize -= blockSize;
1995 size_t ZSTDv01_decompress(void* dst, size_t maxDstSize, const void* src, size_t srcSize)
1999 return ZSTDv01_decompressDCtx(&ctx, dst, maxDstSize, src, srcSize);
2002 size_t ZSTDv01_findFrameCompressedSize(const void* src, size_t srcSize)
2004 const BYTE* ip = (const BYTE*)src;
2005 size_t remainingSize = srcSize;
2007 blockProperties_t blockProperties;
2010 if (srcSize < ZSTD_frameHeaderSize+ZSTD_blockHeaderSize) return ERROR(srcSize_wrong);
2011 magicNumber = ZSTD_readBE32(src);
2012 if (magicNumber != ZSTD_magicNumber) return ERROR(prefix_unknown);
2013 ip += ZSTD_frameHeaderSize; remainingSize -= ZSTD_frameHeaderSize;
2015 /* Loop on each block */
2018 size_t blockSize = ZSTDv01_getcBlockSize(ip, remainingSize, &blockProperties);
2019 if (ZSTDv01_isError(blockSize)) return blockSize;
2021 ip += ZSTD_blockHeaderSize;
2022 remainingSize -= ZSTD_blockHeaderSize;
2023 if (blockSize > remainingSize) return ERROR(srcSize_wrong);
2025 if (blockSize == 0) break; /* bt_end */
2028 remainingSize -= blockSize;
2031 return ip - (const BYTE*)src;
2034 /*******************************
2035 * Streaming Decompression API
2036 *******************************/
2038 size_t ZSTDv01_resetDCtx(ZSTDv01_Dctx* dctx)
2040 dctx->expected = ZSTD_frameHeaderSize;
2042 dctx->previousDstEnd = NULL;
2047 ZSTDv01_Dctx* ZSTDv01_createDCtx(void)
2049 ZSTDv01_Dctx* dctx = (ZSTDv01_Dctx*)malloc(sizeof(ZSTDv01_Dctx));
2050 if (dctx==NULL) return NULL;
2051 ZSTDv01_resetDCtx(dctx);
2055 size_t ZSTDv01_freeDCtx(ZSTDv01_Dctx* dctx)
2061 size_t ZSTDv01_nextSrcSizeToDecompress(ZSTDv01_Dctx* dctx)
2063 return ((dctx_t*)dctx)->expected;
2066 size_t ZSTDv01_decompressContinue(ZSTDv01_Dctx* dctx, void* dst, size_t maxDstSize, const void* src, size_t srcSize)
2068 dctx_t* ctx = (dctx_t*)dctx;
2071 if (srcSize != ctx->expected) return ERROR(srcSize_wrong);
2072 if (dst != ctx->previousDstEnd) /* not contiguous */
2075 /* Decompress : frame header */
2076 if (ctx->phase == 0)
2078 /* Check frame magic header */
2079 U32 magicNumber = ZSTD_readBE32(src);
2080 if (magicNumber != ZSTD_magicNumber) return ERROR(prefix_unknown);
2082 ctx->expected = ZSTD_blockHeaderSize;
2086 /* Decompress : block header */
2087 if (ctx->phase == 1)
2089 blockProperties_t bp;
2090 size_t blockSize = ZSTDv01_getcBlockSize(src, ZSTD_blockHeaderSize, &bp);
2091 if (ZSTDv01_isError(blockSize)) return blockSize;
2092 if (bp.blockType == bt_end)
2099 ctx->expected = blockSize;
2100 ctx->bType = bp.blockType;
2107 /* Decompress : block content */
2113 rSize = ZSTD_decompressBlock(ctx, dst, maxDstSize, src, srcSize);
2116 rSize = ZSTD_copyUncompressedBlock(dst, maxDstSize, src, srcSize);
2119 return ERROR(GENERIC); /* not yet handled */
2121 case bt_end : /* should never happen (filtered at phase 1) */
2125 return ERROR(GENERIC);
2128 ctx->expected = ZSTD_blockHeaderSize;
2129 ctx->previousDstEnd = (void*)( ((char*)dst) + rSize);