sys/contrib/openzfs/module/zstd/lib/zstd.c

   1 /*
   2  * BSD 3-Clause Clear License
   3  *
   4  * Redistribution and use in source and binary forms, with or without
   5  * modification, are permitted provided that the following conditions are met:
   6  *
   7  * 1. Redistributions of source code must retain the above copyright notice,
   8  * this list of conditions and the following disclaimer.
   9  *
  10  * 2. Redistributions in binary form must reproduce the above copyright notice,
  11  * this list of conditions and the following disclaimer in the documentation
  12  * and/or other materials provided with the distribution.
  13  *
  14  * 3. Neither the name of the copyright holder nor the names of its
  15  * contributors may be used to endorse or promote products derived from this
  16  * software without specific prior written permission.
  17  *
  18  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
  19  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  20  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  21  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
  22  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  23  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  24  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
  25  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
  26  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  27  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
  28  * POSSIBILITY OF SUCH DAMAGE.
  29  */
  30
  31 /*
  32  * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc. All rights reserved.
  33  * Copyright (c) 2019-2020, Michael Niewöhner. All rights reserved.
  34  */
  35
  36 #define MEM_MODULE
  37 #define XXH_NAMESPACE ZSTD_
  38 #define XXH_PRIVATE_API
  39 #define XXH_INLINE_ALL
  40 #define ZSTD_LEGACY_SUPPORT 0
  41 #define ZSTD_LIB_DICTBUILDER 0
  42 #define ZSTD_LIB_DEPRECATED 0
  43 #define ZSTD_NOBENCH
  44
  45 /**** start inlining common/debug.c ****/
  46 /* ******************************************************************
  47  * debug
  48  * Part of FSE library
  49  * Copyright (c) 2013-2020, Yann Collet, Facebook, Inc.
  50  *
  51  * You can contact the author at :
  52  * - Source repository : https://github.com/Cyan4973/FiniteStateEntropy
  53  *
  54  * This source code is licensed under both the BSD-style license (found in the
  55  * LICENSE file in the root directory of this source tree) and the GPLv2 (found
  56  * in the COPYING file in the root directory of this source tree).
  57  * You may select, at your option, one of the above-listed licenses.
  58 ****************************************************************** */
  59
  60
  61 /*
  62  * This module only hosts one global variable
  63  * which can be used to dynamically influence the verbosity of traces,
  64  * such as DEBUGLOG and RAWLOG
  65  */
  66
  67 /**** start inlining debug.h ****/
  68 /* ******************************************************************
  69  * debug
  70  * Part of FSE library
  71  * Copyright (c) 2013-2020, Yann Collet, Facebook, Inc.
  72  *
  73  * You can contact the author at :
  74  * - Source repository : https://github.com/Cyan4973/FiniteStateEntropy
  75  *
  76  * This source code is licensed under both the BSD-style license (found in the
  77  * LICENSE file in the root directory of this source tree) and the GPLv2 (found
  78  * in the COPYING file in the root directory of this source tree).
  79  * You may select, at your option, one of the above-listed licenses.
  80 ****************************************************************** */
  81
  82
  83 /*
  84  * The purpose of this header is to enable debug functions.
  85  * They regroup assert(), DEBUGLOG() and RAWLOG() for run-time,
  86  * and DEBUG_STATIC_ASSERT() for compile-time.
  87  *
  88  * By default, DEBUGLEVEL==0, which means run-time debug is disabled.
  89  *
  90  * Level 1 enables assert() only.
  91  * Starting level 2, traces can be generated and pushed to stderr.
  92  * The higher the level, the more verbose the traces.
  93  *
  94  * It's possible to dynamically adjust level using variable g_debug_level,
  95  * which is only declared if DEBUGLEVEL>=2,
  96  * and is a global variable, not multi-thread protected (use with care)
  97  */
  98
  99 #ifndef DEBUG_H_12987983217
 100 #define DEBUG_H_12987983217
 101
 102 #if defined (__cplusplus)
 103 extern "C" {
 104 #endif
 105
 106
 107 /* static assert is triggered at compile time, leaving no runtime artefact.
 108  * static assert only works with compile-time constants.
 109  * Also, this variant can only be used inside a function. */
 110 #define DEBUG_STATIC_ASSERT(c) (void)sizeof(char[(c) ? 1 : -1])
 111
 112
 113 /* DEBUGLEVEL is expected to be defined externally,
 114  * typically through compiler command line.
 115  * Value must be a number. */
 116 #ifndef DEBUGLEVEL
 117 #  define DEBUGLEVEL 0
 118 #endif
 119
 120
 121 /* DEBUGFILE can be defined externally,
 122  * typically through compiler command line.
 123  * note : currently useless.
 124  * Value must be stderr or stdout */
 125 #ifndef DEBUGFILE
 126 #  define DEBUGFILE stderr
 127 #endif
 128
 129
 130 /* recommended values for DEBUGLEVEL :
 131  * 0 : release mode, no debug, all run-time checks disabled
 132  * 1 : enables assert() only, no display
 133  * 2 : reserved, for currently active debug path
 134  * 3 : events once per object lifetime (CCtx, CDict, etc.)
 135  * 4 : events once per frame
 136  * 5 : events once per block
 137  * 6 : events once per sequence (verbose)
 138  * 7+: events at every position (*very* verbose)
 139  *
 140  * It's generally inconvenient to output traces > 5.
 141  * In which case, it's possible to selectively trigger high verbosity levels
 142  * by modifying g_debug_level.
 143  */
 144
 145 #if (DEBUGLEVEL>=1)
 146 #  include <assert.h>
 147 #else
 148 #  ifndef assert   /* assert may be already defined, due to prior #include <assert.h> */
 149 #    define assert(condition) ((void)0)   /* disable assert (default) */
 150 #  endif
 151 #endif
 152
 153 #if (DEBUGLEVEL>=2)
 154 #  include <stdio.h>
 155 extern int g_debuglevel; /* the variable is only declared,
 156                             it actually lives in debug.c,
 157                             and is shared by the whole process.
 158                             It's not thread-safe.
 159                             It's useful when enabling very verbose levels
 160                             on selective conditions (such as position in src) */
 161
 162 #  define RAWLOG(l, ...) {                                      \
 163                 if (l<=g_debuglevel) {                          \
 164                     fprintf(stderr, __VA_ARGS__);               \
 165             }   }
 166 #  define DEBUGLOG(l, ...) {                                    \
 167                 if (l<=g_debuglevel) {                          \
 168                     fprintf(stderr, __FILE__ ": " __VA_ARGS__); \
 169                     fprintf(stderr, " \n");                     \
 170             }   }
 171 #else
 172 #  define RAWLOG(l, ...)      {}    /* disabled */
 173 #  define DEBUGLOG(l, ...)    {}    /* disabled */
 174 #endif
 175
 176
 177 #if defined (__cplusplus)
 178 }
 179 #endif
 180
 181 #endif /* DEBUG_H_12987983217 */
 182 /**** ended inlining debug.h ****/
 183
 184 int g_debuglevel = DEBUGLEVEL;
 185 /**** ended inlining common/debug.c ****/
 186 /**** start inlining common/entropy_common.c ****/
 187 /* ******************************************************************
 188  * Common functions of New Generation Entropy library
 189  * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
 190  *
 191  *  You can contact the author at :
 192  *  - FSE+HUF source repository : https://github.com/Cyan4973/FiniteStateEntropy
 193  *  - Public forum : https://groups.google.com/forum/#!forum/lz4c
 194  *
 195  * This source code is licensed under both the BSD-style license (found in the
 196  * LICENSE file in the root directory of this source tree) and the GPLv2 (found
 197  * in the COPYING file in the root directory of this source tree).
 198  * You may select, at your option, one of the above-listed licenses.
 199 ****************************************************************** */
 200
 201 /* *************************************
 202 *  Dependencies
 203 ***************************************/
 204 /**** start inlining mem.h ****/
 205 /*
 206  * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
 207  * All rights reserved.
 208  *
 209  * This source code is licensed under both the BSD-style license (found in the
 210  * LICENSE file in the root directory of this source tree) and the GPLv2 (found
 211  * in the COPYING file in the root directory of this source tree).
 212  * You may select, at your option, one of the above-listed licenses.
 213  */
 214
 215 #ifndef MEM_H_MODULE
 216 #define MEM_H_MODULE
 217
 218 #if defined (__cplusplus)
 219 extern "C" {
 220 #endif
 221
 222 /*-****************************************
 223 *  Dependencies
 224 ******************************************/
 225 #include <stddef.h>     /* size_t, ptrdiff_t */
 226 #include <string.h>     /* memcpy */
 227
 228
 229 /*-****************************************
 230 *  Compiler specifics
 231 ******************************************/
 232 #if defined(_MSC_VER)   /* Visual Studio */
 233 #   include <stdlib.h>  /* _byteswap_ulong */
 234 #   include <intrin.h>  /* _byteswap_* */
 235 #endif
 236 #if defined(__GNUC__)
 237 #  define MEM_STATIC static __inline __attribute__((unused))
 238 #elif defined (__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */)
 239 #  define MEM_STATIC static inline
 240 #elif defined(_MSC_VER)
 241 #  define MEM_STATIC static __inline
 242 #else
 243 #  define MEM_STATIC static  /* this version may generate warnings for unused static functions; disable the relevant warning */
 244 #endif
 245
 246 #ifndef __has_builtin
 247 #  define __has_builtin(x) 0  /* compat. with non-clang compilers */
 248 #endif
 249
 250 /* code only tested on 32 and 64 bits systems */
 251 #define MEM_STATIC_ASSERT(c)   { enum { MEM_static_assert = 1/(int)(!!(c)) }; }
 252 MEM_STATIC void MEM_check(void) { MEM_STATIC_ASSERT((sizeof(size_t)==4) || (sizeof(size_t)==8)); }
 253
 254 /* detects whether we are being compiled under msan */
 255 #if defined (__has_feature)
 256 #  if __has_feature(memory_sanitizer)
 257 #    define MEMORY_SANITIZER 1
 258 #  endif
 259 #endif
 260
 261 #if defined (MEMORY_SANITIZER)
 262 /* Not all platforms that support msan provide sanitizers/msan_interface.h.
 263  * We therefore declare the functions we need ourselves, rather than trying to
 264  * include the header file... */
 265
 266 #include <stdint.h> /* intptr_t */
 267
 268 /* Make memory region fully initialized (without changing its contents). */
 269 void __msan_unpoison(const volatile void *a, size_t size);
 270
 271 /* Make memory region fully uninitialized (without changing its contents).
 272    This is a legacy interface that does not update origin information. Use
 273    __msan_allocated_memory() instead. */
 274 void __msan_poison(const volatile void *a, size_t size);
 275
 276 /* Returns the offset of the first (at least partially) poisoned byte in the
 277    memory range, or -1 if the whole range is good. */
 278 intptr_t __msan_test_shadow(const volatile void *x, size_t size);
 279 #endif
 280
 281 /* detects whether we are being compiled under asan */
 282 #if defined (__has_feature)
 283 #  if __has_feature(address_sanitizer)
 284 #    define ADDRESS_SANITIZER 1
 285 #  endif
 286 #elif defined(__SANITIZE_ADDRESS__)
 287 #  define ADDRESS_SANITIZER 1
 288 #endif
 289
 290 #if defined (ADDRESS_SANITIZER)
 291 /* Not all platforms that support asan provide sanitizers/asan_interface.h.
 292  * We therefore declare the functions we need ourselves, rather than trying to
 293  * include the header file... */
 294
 295 /**
 296  * Marks a memory region (<c>[addr, addr+size)</c>) as unaddressable.
 297  *
 298  * This memory must be previously allocated by your program. Instrumented
 299  * code is forbidden from accessing addresses in this region until it is
 300  * unpoisoned. This function is not guaranteed to poison the entire region -
 301  * it could poison only a subregion of <c>[addr, addr+size)</c> due to ASan
 302  * alignment restrictions.
 303  *
 304  * \note This function is not thread-safe because no two threads can poison or
 305  * unpoison memory in the same memory region simultaneously.
 306  *
 307  * \param addr Start of memory region.
 308  * \param size Size of memory region. */
 309 void __asan_poison_memory_region(void const volatile *addr, size_t size);
 310
 311 /**
 312  * Marks a memory region (<c>[addr, addr+size)</c>) as addressable.
 313  *
 314  * This memory must be previously allocated by your program. Accessing
 315  * addresses in this region is allowed until this region is poisoned again.
 316  * This function could unpoison a super-region of <c>[addr, addr+size)</c> due
 317  * to ASan alignment restrictions.
 318  *
 319  * \note This function is not thread-safe because no two threads can
 320  * poison or unpoison memory in the same memory region simultaneously.
 321  *
 322  * \param addr Start of memory region.
 323  * \param size Size of memory region. */
 324 void __asan_unpoison_memory_region(void const volatile *addr, size_t size);
 325 #endif
 326
 327
 328 /*-**************************************************************
 329 *  Basic Types
 330 *****************************************************************/
 331 #if  !defined (__VMS) && (defined (__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */) )
 332 # include <stdint.h>
 333   typedef   uint8_t BYTE;
 334   typedef  uint16_t U16;
 335   typedef   int16_t S16;
 336   typedef  uint32_t U32;
 337   typedef   int32_t S32;
 338   typedef  uint64_t U64;
 339   typedef   int64_t S64;
 340 #else
 341 # include <limits.h>
 342 #if CHAR_BIT != 8
 343 #  error "this implementation requires char to be exactly 8-bit type"
 344 #endif
 345   typedef unsigned char      BYTE;
 346 #if USHRT_MAX != 65535
 347 #  error "this implementation requires short to be exactly 16-bit type"
 348 #endif
 349   typedef unsigned short      U16;
 350   typedef   signed short      S16;
 351 #if UINT_MAX != 4294967295
 352 #  error "this implementation requires int to be exactly 32-bit type"
 353 #endif
 354   typedef unsigned int        U32;
 355   typedef   signed int        S32;
 356 /* note : there are no limits defined for long long type in C90.
 357  * limits exist in C99, however, in such case, <stdint.h> is preferred */
 358   typedef unsigned long long  U64;
 359   typedef   signed long long  S64;
 360 #endif
 361
 362
 363 /*-**************************************************************
 364 *  Memory I/O
 365 *****************************************************************/
 366 /* MEM_FORCE_MEMORY_ACCESS :
 367  * By default, access to unaligned memory is controlled by `memcpy()`, which is safe and portable.
 368  * Unfortunately, on some target/compiler combinations, the generated assembly is sub-optimal.
 369  * The below switch allow to select different access method for improved performance.
 370  * Method 0 (default) : use `memcpy()`. Safe and portable.
 371  * Method 1 : `__packed` statement. It depends on compiler extension (i.e., not portable).
 372  *            This method is safe if your compiler supports it, and *generally* as fast or faster than `memcpy`.
 373  * Method 2 : direct access. This method is portable but violate C standard.
 374  *            It can generate buggy code on targets depending on alignment.
 375  *            In some circumstances, it's the only known way to get the most performance (i.e. GCC + ARMv6)
 376  * See http://fastcompression.blogspot.fr/2015/08/accessing-unaligned-memory.html for details.
 377  * Prefer these methods in priority order (0 > 1 > 2)
 378  */
 379 #ifndef MEM_FORCE_MEMORY_ACCESS   /* can be defined externally, on command line for example */
 380 #  if defined(__GNUC__) && ( defined(__ARM_ARCH_6__) || defined(__ARM_ARCH_6J__) || defined(__ARM_ARCH_6K__) || defined(__ARM_ARCH_6Z__) || defined(__ARM_ARCH_6ZK__) || defined(__ARM_ARCH_6T2__) )
 381 #    define MEM_FORCE_MEMORY_ACCESS 2
 382 #  elif defined(__INTEL_COMPILER) || defined(__GNUC__) || defined(__ICCARM__)
 383 #    define MEM_FORCE_MEMORY_ACCESS 1
 384 #  endif
 385 #endif
 386
 387 MEM_STATIC unsigned MEM_32bits(void) { return sizeof(size_t)==4; }
 388 MEM_STATIC unsigned MEM_64bits(void) { return sizeof(size_t)==8; }
 389
 390 MEM_STATIC unsigned MEM_isLittleEndian(void)
 391 {
 392     const union { U32 u; BYTE c[4]; } one = { 1 };   /* don't use static : performance detrimental  */
 393     return one.c[0];
 394 }
 395
 396 #if defined(MEM_FORCE_MEMORY_ACCESS) && (MEM_FORCE_MEMORY_ACCESS==2)
 397
 398 /* violates C standard, by lying on structure alignment.
 399 Only use if no other choice to achieve best performance on target platform */
 400 MEM_STATIC U16 MEM_read16(const void* memPtr) { return *(const U16*) memPtr; }
 401 MEM_STATIC U32 MEM_read32(const void* memPtr) { return *(const U32*) memPtr; }
 402 MEM_STATIC U64 MEM_read64(const void* memPtr) { return *(const U64*) memPtr; }
 403 MEM_STATIC size_t MEM_readST(const void* memPtr) { return *(const size_t*) memPtr; }
 404
 405 MEM_STATIC void MEM_write16(void* memPtr, U16 value) { *(U16*)memPtr = value; }
 406 MEM_STATIC void MEM_write32(void* memPtr, U32 value) { *(U32*)memPtr = value; }
 407 MEM_STATIC void MEM_write64(void* memPtr, U64 value) { *(U64*)memPtr = value; }
 408
 409 #elif defined(MEM_FORCE_MEMORY_ACCESS) && (MEM_FORCE_MEMORY_ACCESS==1)
 410
 411 /* __pack instructions are safer, but compiler specific, hence potentially problematic for some compilers */
 412 /* currently only defined for gcc and icc */
 413 #if defined(_MSC_VER) || (defined(__INTEL_COMPILER) && defined(WIN32))
 414     __pragma( pack(push, 1) )
 415     typedef struct { U16 v; } unalign16;
 416     typedef struct { U32 v; } unalign32;
 417     typedef struct { U64 v; } unalign64;
 418     typedef struct { size_t v; } unalignArch;
 419     __pragma( pack(pop) )
 420 #else
 421     typedef struct { U16 v; } __attribute__((packed)) unalign16;
 422     typedef struct { U32 v; } __attribute__((packed)) unalign32;
 423     typedef struct { U64 v; } __attribute__((packed)) unalign64;
 424     typedef struct { size_t v; } __attribute__((packed)) unalignArch;
 425 #endif
 426
 427 MEM_STATIC U16 MEM_read16(const void* ptr) { return ((const unalign16*)ptr)->v; }
 428 MEM_STATIC U32 MEM_read32(const void* ptr) { return ((const unalign32*)ptr)->v; }
 429 MEM_STATIC U64 MEM_read64(const void* ptr) { return ((const unalign64*)ptr)->v; }
 430 MEM_STATIC size_t MEM_readST(const void* ptr) { return ((const unalignArch*)ptr)->v; }
 431
 432 MEM_STATIC void MEM_write16(void* memPtr, U16 value) { ((unalign16*)memPtr)->v = value; }
 433 MEM_STATIC void MEM_write32(void* memPtr, U32 value) { ((unalign32*)memPtr)->v = value; }
 434 MEM_STATIC void MEM_write64(void* memPtr, U64 value) { ((unalign64*)memPtr)->v = value; }
 435
 436 #else
 437
 438 /* default method, safe and standard.
 439    can sometimes prove slower */
 440
 441 MEM_STATIC U16 MEM_read16(const void* memPtr)
 442 {
 443     U16 val; memcpy(&val, memPtr, sizeof(val)); return val;
 444 }
 445
 446 MEM_STATIC U32 MEM_read32(const void* memPtr)
 447 {
 448     U32 val; memcpy(&val, memPtr, sizeof(val)); return val;
 449 }
 450
 451 MEM_STATIC U64 MEM_read64(const void* memPtr)
 452 {
 453     U64 val; memcpy(&val, memPtr, sizeof(val)); return val;
 454 }
 455
 456 MEM_STATIC size_t MEM_readST(const void* memPtr)
 457 {
 458     size_t val; memcpy(&val, memPtr, sizeof(val)); return val;
 459 }
 460
 461 MEM_STATIC void MEM_write16(void* memPtr, U16 value)
 462 {
 463     memcpy(memPtr, &value, sizeof(value));
 464 }
 465
 466 MEM_STATIC void MEM_write32(void* memPtr, U32 value)
 467 {
 468     memcpy(memPtr, &value, sizeof(value));
 469 }
 470
 471 MEM_STATIC void MEM_write64(void* memPtr, U64 value)
 472 {
 473     memcpy(memPtr, &value, sizeof(value));
 474 }
 475
 476 #endif /* MEM_FORCE_MEMORY_ACCESS */
 477
 478 MEM_STATIC U32 MEM_swap32(U32 in)
 479 {
 480 #if defined(_MSC_VER)     /* Visual Studio */
 481     return _byteswap_ulong(in);
 482 #elif (defined (__GNUC__) && (__GNUC__ * 100 + __GNUC_MINOR__ >= 403)) \
 483   || (defined(__clang__) && __has_builtin(__builtin_bswap32))
 484     return __builtin_bswap32(in);
 485 #else
 486     return  ((in << 24) & 0xff000000 ) |
 487             ((in <<  8) & 0x00ff0000 ) |
 488             ((in >>  8) & 0x0000ff00 ) |
 489             ((in >> 24) & 0x000000ff );
 490 #endif
 491 }
 492
 493 MEM_STATIC U64 MEM_swap64(U64 in)
 494 {
 495 #if defined(_MSC_VER)     /* Visual Studio */
 496     return _byteswap_uint64(in);
 497 #elif (defined (__GNUC__) && (__GNUC__ * 100 + __GNUC_MINOR__ >= 403)) \
 498   || (defined(__clang__) && __has_builtin(__builtin_bswap64))
 499     return __builtin_bswap64(in);
 500 #else
 501     return  ((in << 56) & 0xff00000000000000ULL) |
 502             ((in << 40) & 0x00ff000000000000ULL) |
 503             ((in << 24) & 0x0000ff0000000000ULL) |
 504             ((in << 8)  & 0x000000ff00000000ULL) |
 505             ((in >> 8)  & 0x00000000ff000000ULL) |
 506             ((in >> 24) & 0x0000000000ff0000ULL) |
 507             ((in >> 40) & 0x000000000000ff00ULL) |
 508             ((in >> 56) & 0x00000000000000ffULL);
 509 #endif
 510 }
 511
 512 MEM_STATIC size_t MEM_swapST(size_t in)
 513 {
 514     if (MEM_32bits())
 515         return (size_t)MEM_swap32((U32)in);
 516     else
 517         return (size_t)MEM_swap64((U64)in);
 518 }
 519
 520 /*=== Little endian r/w ===*/
 521
 522 MEM_STATIC U16 MEM_readLE16(const void* memPtr)
 523 {
 524     if (MEM_isLittleEndian())
 525         return MEM_read16(memPtr);
 526     else {
 527         const BYTE* p = (const BYTE*)memPtr;
 528         return (U16)(p[0] + (p[1]<<8));
 529     }
 530 }
 531
 532 MEM_STATIC void MEM_writeLE16(void* memPtr, U16 val)
 533 {
 534     if (MEM_isLittleEndian()) {
 535         MEM_write16(memPtr, val);
 536     } else {
 537         BYTE* p = (BYTE*)memPtr;
 538         p[0] = (BYTE)val;
 539         p[1] = (BYTE)(val>>8);
 540     }
 541 }
 542
 543 MEM_STATIC U32 MEM_readLE24(const void* memPtr)
 544 {
 545     return MEM_readLE16(memPtr) + (((const BYTE*)memPtr)[2] << 16);
 546 }
 547
 548 MEM_STATIC void MEM_writeLE24(void* memPtr, U32 val)
 549 {
 550     MEM_writeLE16(memPtr, (U16)val);
 551     ((BYTE*)memPtr)[2] = (BYTE)(val>>16);
 552 }
 553
 554 MEM_STATIC U32 MEM_readLE32(const void* memPtr)
 555 {
 556     if (MEM_isLittleEndian())
 557         return MEM_read32(memPtr);
 558     else
 559         return MEM_swap32(MEM_read32(memPtr));
 560 }
 561
 562 MEM_STATIC void MEM_writeLE32(void* memPtr, U32 val32)
 563 {
 564     if (MEM_isLittleEndian())
 565         MEM_write32(memPtr, val32);
 566     else
 567         MEM_write32(memPtr, MEM_swap32(val32));
 568 }
 569
 570 MEM_STATIC U64 MEM_readLE64(const void* memPtr)
 571 {
 572     if (MEM_isLittleEndian())
 573         return MEM_read64(memPtr);
 574     else
 575         return MEM_swap64(MEM_read64(memPtr));
 576 }
 577
 578 MEM_STATIC void MEM_writeLE64(void* memPtr, U64 val64)
 579 {
 580     if (MEM_isLittleEndian())
 581         MEM_write64(memPtr, val64);
 582     else
 583         MEM_write64(memPtr, MEM_swap64(val64));
 584 }
 585
 586 MEM_STATIC size_t MEM_readLEST(const void* memPtr)
 587 {
 588     if (MEM_32bits())
 589         return (size_t)MEM_readLE32(memPtr);
 590     else
 591         return (size_t)MEM_readLE64(memPtr);
 592 }
 593
 594 MEM_STATIC void MEM_writeLEST(void* memPtr, size_t val)
 595 {
 596     if (MEM_32bits())
 597         MEM_writeLE32(memPtr, (U32)val);
 598     else
 599         MEM_writeLE64(memPtr, (U64)val);
 600 }
 601
 602 /*=== Big endian r/w ===*/
 603
 604 MEM_STATIC U32 MEM_readBE32(const void* memPtr)
 605 {
 606     if (MEM_isLittleEndian())
 607         return MEM_swap32(MEM_read32(memPtr));
 608     else
 609         return MEM_read32(memPtr);
 610 }
 611
 612 MEM_STATIC void MEM_writeBE32(void* memPtr, U32 val32)
 613 {
 614     if (MEM_isLittleEndian())
 615         MEM_write32(memPtr, MEM_swap32(val32));
 616     else
 617         MEM_write32(memPtr, val32);
 618 }
 619
 620 MEM_STATIC U64 MEM_readBE64(const void* memPtr)
 621 {
 622     if (MEM_isLittleEndian())
 623         return MEM_swap64(MEM_read64(memPtr));
 624     else
 625         return MEM_read64(memPtr);
 626 }
 627
 628 MEM_STATIC void MEM_writeBE64(void* memPtr, U64 val64)
 629 {
 630     if (MEM_isLittleEndian())
 631         MEM_write64(memPtr, MEM_swap64(val64));
 632     else
 633         MEM_write64(memPtr, val64);
 634 }
 635
 636 MEM_STATIC size_t MEM_readBEST(const void* memPtr)
 637 {
 638     if (MEM_32bits())
 639         return (size_t)MEM_readBE32(memPtr);
 640     else
 641         return (size_t)MEM_readBE64(memPtr);
 642 }
 643
 644 MEM_STATIC void MEM_writeBEST(void* memPtr, size_t val)
 645 {
 646     if (MEM_32bits())
 647         MEM_writeBE32(memPtr, (U32)val);
 648     else
 649         MEM_writeBE64(memPtr, (U64)val);
 650 }
 651
 652
 653 #if defined (__cplusplus)
 654 }
 655 #endif
 656
 657 #endif /* MEM_H_MODULE */
 658 /**** ended inlining mem.h ****/
 659 /**** start inlining error_private.h ****/
 660 /*
 661  * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
 662  * All rights reserved.
 663  *
 664  * This source code is licensed under both the BSD-style license (found in the
 665  * LICENSE file in the root directory of this source tree) and the GPLv2 (found
 666  * in the COPYING file in the root directory of this source tree).
 667  * You may select, at your option, one of the above-listed licenses.
 668  */
 669
 670 /* Note : this module is expected to remain private, do not expose it */
 671
 672 #ifndef ERROR_H_MODULE
 673 #define ERROR_H_MODULE
 674
 675 #if defined (__cplusplus)
 676 extern "C" {
 677 #endif
 678
 679
 680 /* ****************************************
 681 *  Dependencies
 682 ******************************************/
 683 #include <stddef.h>        /* size_t */
 684 /**** start inlining zstd_errors.h ****/
 685 /*
 686  * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
 687  * All rights reserved.
 688  *
 689  * This source code is licensed under both the BSD-style license (found in the
 690  * LICENSE file in the root directory of this source tree) and the GPLv2 (found
 691  * in the COPYING file in the root directory of this source tree).
 692  * You may select, at your option, one of the above-listed licenses.
 693  */
 694
 695 #ifndef ZSTD_ERRORS_H_398273423
 696 #define ZSTD_ERRORS_H_398273423
 697
 698 #if defined (__cplusplus)
 699 extern "C" {
 700 #endif
 701
 702 /*===== dependency =====*/
 703 #include <stddef.h>   /* size_t */
 704
 705
 706 /* =====   ZSTDERRORLIB_API : control library symbols visibility   ===== */
 707 #ifndef ZSTDERRORLIB_VISIBILITY
 708 #  if defined(__GNUC__) && (__GNUC__ >= 4)
 709 #    define ZSTDERRORLIB_VISIBILITY __attribute__ ((visibility ("default")))
 710 #  else
 711 #    define ZSTDERRORLIB_VISIBILITY
 712 #  endif
 713 #endif
 714 #if defined(ZSTD_DLL_EXPORT) && (ZSTD_DLL_EXPORT==1)
 715 #  define ZSTDERRORLIB_API __declspec(dllexport) ZSTDERRORLIB_VISIBILITY
 716 #elif defined(ZSTD_DLL_IMPORT) && (ZSTD_DLL_IMPORT==1)
 717 #  define ZSTDERRORLIB_API __declspec(dllimport) ZSTDERRORLIB_VISIBILITY /* It isn't required but allows to generate better code, saving a function pointer load from the IAT and an indirect jump.*/
 718 #else
 719 #  define ZSTDERRORLIB_API ZSTDERRORLIB_VISIBILITY
 720 #endif
 721
 722 /*-*********************************************
 723  *  Error codes list
 724  *-*********************************************
 725  *  Error codes _values_ are pinned down since v1.3.1 only.
 726  *  Therefore, don't rely on values if you may link to any version < v1.3.1.
 727  *
 728  *  Only values < 100 are considered stable.
 729  *
 730  *  note 1 : this API shall be used with static linking only.
 731  *           dynamic linking is not yet officially supported.
 732  *  note 2 : Prefer relying on the enum than on its value whenever possible
 733  *           This is the only supported way to use the error list < v1.3.1
 734  *  note 3 : ZSTD_isError() is always correct, whatever the library version.
 735  **********************************************/
 736 typedef enum {
 737   ZSTD_error_no_error = 0,
 738   ZSTD_error_GENERIC  = 1,
 739   ZSTD_error_prefix_unknown                = 10,
 740   ZSTD_error_version_unsupported           = 12,
 741   ZSTD_error_frameParameter_unsupported    = 14,
 742   ZSTD_error_frameParameter_windowTooLarge = 16,
 743   ZSTD_error_corruption_detected = 20,
 744   ZSTD_error_checksum_wrong      = 22,
 745   ZSTD_error_dictionary_corrupted      = 30,
 746   ZSTD_error_dictionary_wrong          = 32,
 747   ZSTD_error_dictionaryCreation_failed = 34,
 748   ZSTD_error_parameter_unsupported   = 40,
 749   ZSTD_error_parameter_outOfBound    = 42,
 750   ZSTD_error_tableLog_tooLarge       = 44,
 751   ZSTD_error_maxSymbolValue_tooLarge = 46,
 752   ZSTD_error_maxSymbolValue_tooSmall = 48,
 753   ZSTD_error_stage_wrong       = 60,
 754   ZSTD_error_init_missing      = 62,
 755   ZSTD_error_memory_allocation = 64,
 756   ZSTD_error_workSpace_tooSmall= 66,
 757   ZSTD_error_dstSize_tooSmall = 70,
 758   ZSTD_error_srcSize_wrong    = 72,
 759   ZSTD_error_dstBuffer_null   = 74,
 760   /* following error codes are __NOT STABLE__, they can be removed or changed in future versions */
 761   ZSTD_error_frameIndex_tooLarge = 100,
 762   ZSTD_error_seekableIO          = 102,
 763   ZSTD_error_dstBuffer_wrong     = 104,
 764   ZSTD_error_maxCode = 120  /* never EVER use this value directly, it can change in future versions! Use ZSTD_isError() instead */
 765 } ZSTD_ErrorCode;
 766
 767 /*! ZSTD_getErrorCode() :
 768     convert a `size_t` function result into a `ZSTD_ErrorCode` enum type,
 769     which can be used to compare with enum list published above */
 770 ZSTDERRORLIB_API ZSTD_ErrorCode ZSTD_getErrorCode(size_t functionResult);
 771 ZSTDERRORLIB_API const char* ZSTD_getErrorString(ZSTD_ErrorCode code);   /**< Same as ZSTD_getErrorName, but using a `ZSTD_ErrorCode` enum argument */
 772
 773
 774 #if defined (__cplusplus)
 775 }
 776 #endif
 777
 778 #endif /* ZSTD_ERRORS_H_398273423 */
 779 /**** ended inlining zstd_errors.h ****/
 780
 781
 782 /* ****************************************
 783 *  Compiler-specific
 784 ******************************************/
 785 #if defined(__GNUC__)
 786 #  define ERR_STATIC static __attribute__((unused))
 787 #elif defined (__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */)
 788 #  define ERR_STATIC static inline
 789 #elif defined(_MSC_VER)
 790 #  define ERR_STATIC static __inline
 791 #else
 792 #  define ERR_STATIC static  /* this version may generate warnings for unused static functions; disable the relevant warning */
 793 #endif
 794
 795
 796 /*-****************************************
 797 *  Customization (error_public.h)
 798 ******************************************/
 799 typedef ZSTD_ErrorCode ERR_enum;
 800 #define PREFIX(name) ZSTD_error_##name
 801
 802
 803 /*-****************************************
 804 *  Error codes handling
 805 ******************************************/
 806 #undef ERROR   /* already defined on Visual Studio */
 807 #define ERROR(name) ZSTD_ERROR(name)
 808 #define ZSTD_ERROR(name) ((size_t)-PREFIX(name))
 809
 810 ERR_STATIC unsigned ERR_isError(size_t code) { return (code > ERROR(maxCode)); }
 811
 812 ERR_STATIC ERR_enum ERR_getErrorCode(size_t code) { if (!ERR_isError(code)) return (ERR_enum)0; return (ERR_enum) (0-code); }
 813
 814 /* check and forward error code */
 815 #define CHECK_V_F(e, f) size_t const e = f; if (ERR_isError(e)) return e
 816 #define CHECK_F(f)   { CHECK_V_F(_var_err__, f); }
 817
 818
 819 /*-****************************************
 820 *  Error Strings
 821 ******************************************/
 822
 823 const char* ERR_getErrorString(ERR_enum code);   /* error_private.c */
 824
 825 ERR_STATIC const char* ERR_getErrorName(size_t code)
 826 {
 827     return ERR_getErrorString(ERR_getErrorCode(code));
 828 }
 829
 830 #if defined (__cplusplus)
 831 }
 832 #endif
 833
 834 #endif /* ERROR_H_MODULE */
 835 /**** ended inlining error_private.h ****/
 836 #define FSE_STATIC_LINKING_ONLY  /* FSE_MIN_TABLELOG */
 837 /**** start inlining fse.h ****/
 838 /* ******************************************************************
 839  * FSE : Finite State Entropy codec
 840  * Public Prototypes declaration
 841  * Copyright (c) 2013-2020, Yann Collet, Facebook, Inc.
 842  *
 843  * You can contact the author at :
 844  * - Source repository : https://github.com/Cyan4973/FiniteStateEntropy
 845  *
 846  * This source code is licensed under both the BSD-style license (found in the
 847  * LICENSE file in the root directory of this source tree) and the GPLv2 (found
 848  * in the COPYING file in the root directory of this source tree).
 849  * You may select, at your option, one of the above-listed licenses.
 850 ****************************************************************** */
 851
 852 #if defined (__cplusplus)
 853 extern "C" {
 854 #endif
 855
 856 #ifndef FSE_H
 857 #define FSE_H
 858
 859
 860 /*-*****************************************
 861 *  Dependencies
 862 ******************************************/
 863 #include <stddef.h>    /* size_t, ptrdiff_t */
 864
 865
 866 /*-*****************************************
 867 *  FSE_PUBLIC_API : control library symbols visibility
 868 ******************************************/
 869 #if defined(FSE_DLL_EXPORT) && (FSE_DLL_EXPORT==1) && defined(__GNUC__) && (__GNUC__ >= 4)
 870 #  define FSE_PUBLIC_API __attribute__ ((visibility ("default")))
 871 #elif defined(FSE_DLL_EXPORT) && (FSE_DLL_EXPORT==1)   /* Visual expected */
 872 #  define FSE_PUBLIC_API __declspec(dllexport)
 873 #elif defined(FSE_DLL_IMPORT) && (FSE_DLL_IMPORT==1)
 874 #  define FSE_PUBLIC_API __declspec(dllimport) /* It isn't required but allows to generate better code, saving a function pointer load from the IAT and an indirect jump.*/
 875 #else
 876 #  define FSE_PUBLIC_API
 877 #endif
 878
 879 /*------   Version   ------*/
 880 #define FSE_VERSION_MAJOR    0
 881 #define FSE_VERSION_MINOR    9
 882 #define FSE_VERSION_RELEASE  0
 883
 884 #define FSE_LIB_VERSION FSE_VERSION_MAJOR.FSE_VERSION_MINOR.FSE_VERSION_RELEASE
 885 #define FSE_QUOTE(str) #str
 886 #define FSE_EXPAND_AND_QUOTE(str) FSE_QUOTE(str)
 887 #define FSE_VERSION_STRING FSE_EXPAND_AND_QUOTE(FSE_LIB_VERSION)
 888
 889 #define FSE_VERSION_NUMBER  (FSE_VERSION_MAJOR *100*100 + FSE_VERSION_MINOR *100 + FSE_VERSION_RELEASE)
 890 FSE_PUBLIC_API unsigned FSE_versionNumber(void);   /**< library version number; to be used when checking dll version */
 891
 892
 893 /*-****************************************
 894 *  FSE simple functions
 895 ******************************************/
 896 /*! FSE_compress() :
 897     Compress content of buffer 'src', of size 'srcSize', into destination buffer 'dst'.
 898     'dst' buffer must be already allocated. Compression runs faster is dstCapacity >= FSE_compressBound(srcSize).
 899     @return : size of compressed data (<= dstCapacity).
 900     Special values : if return == 0, srcData is not compressible => Nothing is stored within dst !!!
 901                      if return == 1, srcData is a single byte symbol * srcSize times. Use RLE compression instead.
 902                      if FSE_isError(return), compression failed (more details using FSE_getErrorName())
 903 */
 904 FSE_PUBLIC_API size_t FSE_compress(void* dst, size_t dstCapacity,
 905                              const void* src, size_t srcSize);
 906
 907 /*! FSE_decompress():
 908     Decompress FSE data from buffer 'cSrc', of size 'cSrcSize',
 909     into already allocated destination buffer 'dst', of size 'dstCapacity'.
 910     @return : size of regenerated data (<= maxDstSize),
 911               or an error code, which can be tested using FSE_isError() .
 912
 913     ** Important ** : FSE_decompress() does not decompress non-compressible nor RLE data !!!
 914     Why ? : making this distinction requires a header.
 915     Header management is intentionally delegated to the user layer, which can better manage special cases.
 916 */
 917 FSE_PUBLIC_API size_t FSE_decompress(void* dst,  size_t dstCapacity,
 918                                const void* cSrc, size_t cSrcSize);
 919
 920
 921 /*-*****************************************
 922 *  Tool functions
 923 ******************************************/
 924 FSE_PUBLIC_API size_t FSE_compressBound(size_t size);       /* maximum compressed size */
 925
 926 /* Error Management */
 927 FSE_PUBLIC_API unsigned    FSE_isError(size_t code);        /* tells if a return value is an error code */
 928 FSE_PUBLIC_API const char* FSE_getErrorName(size_t code);   /* provides error code string (useful for debugging) */
 929
 930
 931 /*-*****************************************
 932 *  FSE advanced functions
 933 ******************************************/
 934 /*! FSE_compress2() :
 935     Same as FSE_compress(), but allows the selection of 'maxSymbolValue' and 'tableLog'
 936     Both parameters can be defined as '0' to mean : use default value
 937     @return : size of compressed data
 938     Special values : if return == 0, srcData is not compressible => Nothing is stored within cSrc !!!
 939                      if return == 1, srcData is a single byte symbol * srcSize times. Use RLE compression.
 940                      if FSE_isError(return), it's an error code.
 941 */
 942 FSE_PUBLIC_API size_t FSE_compress2 (void* dst, size_t dstSize, const void* src, size_t srcSize, unsigned maxSymbolValue, unsigned tableLog);
 943
 944
 945 /*-*****************************************
 946 *  FSE detailed API
 947 ******************************************/
 948 /*!
 949 FSE_compress() does the following:
 950 1. count symbol occurrence from source[] into table count[] (see hist.h)
 951 2. normalize counters so that sum(count[]) == Power_of_2 (2^tableLog)
 952 3. save normalized counters to memory buffer using writeNCount()
 953 4. build encoding table 'CTable' from normalized counters
 954 5. encode the data stream using encoding table 'CTable'
 955
 956 FSE_decompress() does the following:
 957 1. read normalized counters with readNCount()
 958 2. build decoding table 'DTable' from normalized counters
 959 3. decode the data stream using decoding table 'DTable'
 960
 961 The following API allows targeting specific sub-functions for advanced tasks.
 962 For example, it's possible to compress several blocks using the same 'CTable',
 963 or to save and provide normalized distribution using external method.
 964 */
 965
 966 /* *** COMPRESSION *** */
 967
 968 /*! FSE_optimalTableLog():
 969     dynamically downsize 'tableLog' when conditions are met.
 970     It saves CPU time, by using smaller tables, while preserving or even improving compression ratio.
 971     @return : recommended tableLog (necessarily <= 'maxTableLog') */
 972 FSE_PUBLIC_API unsigned FSE_optimalTableLog(unsigned maxTableLog, size_t srcSize, unsigned maxSymbolValue);
 973
 974 /*! FSE_normalizeCount():
 975     normalize counts so that sum(count[]) == Power_of_2 (2^tableLog)
 976     'normalizedCounter' is a table of short, of minimum size (maxSymbolValue+1).
 977     @return : tableLog,
 978               or an errorCode, which can be tested using FSE_isError() */
 979 FSE_PUBLIC_API size_t FSE_normalizeCount(short* normalizedCounter, unsigned tableLog,
 980                     const unsigned* count, size_t srcSize, unsigned maxSymbolValue);
 981
 982 /*! FSE_NCountWriteBound():
 983     Provides the maximum possible size of an FSE normalized table, given 'maxSymbolValue' and 'tableLog'.
 984     Typically useful for allocation purpose. */
 985 FSE_PUBLIC_API size_t FSE_NCountWriteBound(unsigned maxSymbolValue, unsigned tableLog);
 986
 987 /*! FSE_writeNCount():
 988     Compactly save 'normalizedCounter' into 'buffer'.
 989     @return : size of the compressed table,
 990               or an errorCode, which can be tested using FSE_isError(). */
 991 FSE_PUBLIC_API size_t FSE_writeNCount (void* buffer, size_t bufferSize,
 992                                  const short* normalizedCounter,
 993                                  unsigned maxSymbolValue, unsigned tableLog);
 994
 995 /*! Constructor and Destructor of FSE_CTable.
 996     Note that FSE_CTable size depends on 'tableLog' and 'maxSymbolValue' */
 997 typedef unsigned FSE_CTable;   /* don't allocate that. It's only meant to be more restrictive than void* */
 998 FSE_PUBLIC_API FSE_CTable* FSE_createCTable (unsigned maxSymbolValue, unsigned tableLog);
 999 FSE_PUBLIC_API void        FSE_freeCTable (FSE_CTable* ct);
1000
1001 /*! FSE_buildCTable():
1002     Builds `ct`, which must be already allocated, using FSE_createCTable().
1003     @return : 0, or an errorCode, which can be tested using FSE_isError() */
1004 FSE_PUBLIC_API size_t FSE_buildCTable(FSE_CTable* ct, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog);
1005
1006 /*! FSE_compress_usingCTable():
1007     Compress `src` using `ct` into `dst` which must be already allocated.
1008     @return : size of compressed data (<= `dstCapacity`),
1009               or 0 if compressed data could not fit into `dst`,
1010               or an errorCode, which can be tested using FSE_isError() */
1011 FSE_PUBLIC_API size_t FSE_compress_usingCTable (void* dst, size_t dstCapacity, const void* src, size_t srcSize, const FSE_CTable* ct);
1012
1013 /*!
1014 Tutorial :
1015 ----------
1016 The first step is to count all symbols. FSE_count() does this job very fast.
1017 Result will be saved into 'count', a table of unsigned int, which must be already allocated, and have 'maxSymbolValuePtr[0]+1' cells.
1018 'src' is a table of bytes of size 'srcSize'. All values within 'src' MUST be <= maxSymbolValuePtr[0]
1019 maxSymbolValuePtr[0] will be updated, with its real value (necessarily <= original value)
1020 FSE_count() will return the number of occurrence of the most frequent symbol.
1021 This can be used to know if there is a single symbol within 'src', and to quickly evaluate its compressibility.
1022 If there is an error, the function will return an ErrorCode (which can be tested using FSE_isError()).
1023
1024 The next step is to normalize the frequencies.
1025 FSE_normalizeCount() will ensure that sum of frequencies is == 2 ^'tableLog'.
1026 It also guarantees a minimum of 1 to any Symbol with frequency >= 1.
1027 You can use 'tableLog'==0 to mean "use default tableLog value".
1028 If you are unsure of which tableLog value to use, you can ask FSE_optimalTableLog(),
1029 which will provide the optimal valid tableLog given sourceSize, maxSymbolValue, and a user-defined maximum (0 means "default").
1030
1031 The result of FSE_normalizeCount() will be saved into a table,
1032 called 'normalizedCounter', which is a table of signed short.
1033 'normalizedCounter' must be already allocated, and have at least 'maxSymbolValue+1' cells.
1034 The return value is tableLog if everything proceeded as expected.
1035 It is 0 if there is a single symbol within distribution.
1036 If there is an error (ex: invalid tableLog value), the function will return an ErrorCode (which can be tested using FSE_isError()).
1037
1038 'normalizedCounter' can be saved in a compact manner to a memory area using FSE_writeNCount().
1039 'buffer' must be already allocated.
1040 For guaranteed success, buffer size must be at least FSE_headerBound().
1041 The result of the function is the number of bytes written into 'buffer'.
1042 If there is an error, the function will return an ErrorCode (which can be tested using FSE_isError(); ex : buffer size too small).
1043
1044 'normalizedCounter' can then be used to create the compression table 'CTable'.
1045 The space required by 'CTable' must be already allocated, using FSE_createCTable().
1046 You can then use FSE_buildCTable() to fill 'CTable'.
1047 If there is an error, both functions will return an ErrorCode (which can be tested using FSE_isError()).
1048
1049 'CTable' can then be used to compress 'src', with FSE_compress_usingCTable().
1050 Similar to FSE_count(), the convention is that 'src' is assumed to be a table of char of size 'srcSize'
1051 The function returns the size of compressed data (without header), necessarily <= `dstCapacity`.
1052 If it returns '0', compressed data could not fit into 'dst'.
1053 If there is an error, the function will return an ErrorCode (which can be tested using FSE_isError()).
1054 */
1055
1056
1057 /* *** DECOMPRESSION *** */
1058
1059 /*! FSE_readNCount():
1060     Read compactly saved 'normalizedCounter' from 'rBuffer'.
1061     @return : size read from 'rBuffer',
1062               or an errorCode, which can be tested using FSE_isError().
1063               maxSymbolValuePtr[0] and tableLogPtr[0] will also be updated with their respective values */
1064 FSE_PUBLIC_API size_t FSE_readNCount (short* normalizedCounter,
1065                            unsigned* maxSymbolValuePtr, unsigned* tableLogPtr,
1066                            const void* rBuffer, size_t rBuffSize);
1067
1068 /*! Constructor and Destructor of FSE_DTable.
1069     Note that its size depends on 'tableLog' */
1070 typedef unsigned FSE_DTable;   /* don't allocate that. It's just a way to be more restrictive than void* */
1071 FSE_PUBLIC_API FSE_DTable* FSE_createDTable(unsigned tableLog);
1072 FSE_PUBLIC_API void        FSE_freeDTable(FSE_DTable* dt);
1073
1074 /*! FSE_buildDTable():
1075     Builds 'dt', which must be already allocated, using FSE_createDTable().
1076     return : 0, or an errorCode, which can be tested using FSE_isError() */
1077 FSE_PUBLIC_API size_t FSE_buildDTable (FSE_DTable* dt, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog);
1078
1079 /*! FSE_decompress_usingDTable():
1080     Decompress compressed source `cSrc` of size `cSrcSize` using `dt`
1081     into `dst` which must be already allocated.
1082     @return : size of regenerated data (necessarily <= `dstCapacity`),
1083               or an errorCode, which can be tested using FSE_isError() */
1084 FSE_PUBLIC_API size_t FSE_decompress_usingDTable(void* dst, size_t dstCapacity, const void* cSrc, size_t cSrcSize, const FSE_DTable* dt);
1085
1086 /*!
1087 Tutorial :
1088 ----------
1089 (Note : these functions only decompress FSE-compressed blocks.
1090  If block is uncompressed, use memcpy() instead
1091  If block is a single repeated byte, use memset() instead )
1092
1093 The first step is to obtain the normalized frequencies of symbols.
1094 This can be performed by FSE_readNCount() if it was saved using FSE_writeNCount().
1095 'normalizedCounter' must be already allocated, and have at least 'maxSymbolValuePtr[0]+1' cells of signed short.
1096 In practice, that means it's necessary to know 'maxSymbolValue' beforehand,
1097 or size the table to handle worst case situations (typically 256).
1098 FSE_readNCount() will provide 'tableLog' and 'maxSymbolValue'.
1099 The result of FSE_readNCount() is the number of bytes read from 'rBuffer'.
1100 Note that 'rBufferSize' must be at least 4 bytes, even if useful information is less than that.
1101 If there is an error, the function will return an error code, which can be tested using FSE_isError().
1102
1103 The next step is to build the decompression tables 'FSE_DTable' from 'normalizedCounter'.
1104 This is performed by the function FSE_buildDTable().
1105 The space required by 'FSE_DTable' must be already allocated using FSE_createDTable().
1106 If there is an error, the function will return an error code, which can be tested using FSE_isError().
1107
1108 `FSE_DTable` can then be used to decompress `cSrc`, with FSE_decompress_usingDTable().
1109 `cSrcSize` must be strictly correct, otherwise decompression will fail.
1110 FSE_decompress_usingDTable() result will tell how many bytes were regenerated (<=`dstCapacity`).
1111 If there is an error, the function will return an error code, which can be tested using FSE_isError(). (ex: dst buffer too small)
1112 */
1113
1114 #endif  /* FSE_H */
1115
1116 #if defined(FSE_STATIC_LINKING_ONLY) && !defined(FSE_H_FSE_STATIC_LINKING_ONLY)
1117 #define FSE_H_FSE_STATIC_LINKING_ONLY
1118
1119 /* *** Dependency *** */
1120 /**** start inlining bitstream.h ****/
1121 /* ******************************************************************
1122  * bitstream
1123  * Part of FSE library
1124  * Copyright (c) 2013-2020, Yann Collet, Facebook, Inc.
1125  *
1126  * You can contact the author at :
1127  * - Source repository : https://github.com/Cyan4973/FiniteStateEntropy
1128  *
1129  * This source code is licensed under both the BSD-style license (found in the
1130  * LICENSE file in the root directory of this source tree) and the GPLv2 (found
1131  * in the COPYING file in the root directory of this source tree).
1132  * You may select, at your option, one of the above-listed licenses.
1133 ****************************************************************** */
1134 #ifndef BITSTREAM_H_MODULE
1135 #define BITSTREAM_H_MODULE
1136
1137 #if defined (__cplusplus)
1138 extern "C" {
1139 #endif
1140
1141 /*
1142 *  This API consists of small unitary functions, which must be inlined for best performance.
1143 *  Since link-time-optimization is not available for all compilers,
1144 *  these functions are defined into a .h to be included.
1145 */
1146
1147 /*-****************************************
1148 *  Dependencies
1149 ******************************************/
1150 /**** skipping file: mem.h ****/
1151 /**** start inlining compiler.h ****/
1152 /*
1153  * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
1154  * All rights reserved.
1155  *
1156  * This source code is licensed under both the BSD-style license (found in the
1157  * LICENSE file in the root directory of this source tree) and the GPLv2 (found
1158  * in the COPYING file in the root directory of this source tree).
1159  * You may select, at your option, one of the above-listed licenses.
1160  */
1161
1162 #ifndef ZSTD_COMPILER_H
1163 #define ZSTD_COMPILER_H
1164
1165 /*-*******************************************************
1166 *  Compiler specifics
1167 *********************************************************/
1168 /* force inlining */
1169
1170 #if !defined(ZSTD_NO_INLINE)
1171 #if (defined(__GNUC__) && !defined(__STRICT_ANSI__)) || defined(__cplusplus) || defined(__STDC_VERSION__) && __STDC_VERSION__ >= 199901L   /* C99 */
1172 #  define INLINE_KEYWORD inline
1173 #else
1174 #  define INLINE_KEYWORD
1175 #endif
1176
1177 #if defined(__GNUC__) || defined(__ICCARM__)
1178 #  define FORCE_INLINE_ATTR __attribute__((always_inline))
1179 #elif defined(_MSC_VER)
1180 #  define FORCE_INLINE_ATTR __forceinline
1181 #else
1182 #  define FORCE_INLINE_ATTR
1183 #endif
1184
1185 #else
1186
1187 #define INLINE_KEYWORD
1188 #define FORCE_INLINE_ATTR
1189
1190 #endif
1191
1192 /**
1193  * FORCE_INLINE_TEMPLATE is used to define C "templates", which take constant
1194  * parameters. They must be inlined for the compiler to eliminate the constant
1195  * branches.
1196  */
1197 #define FORCE_INLINE_TEMPLATE static INLINE_KEYWORD FORCE_INLINE_ATTR
1198 /**
1199  * HINT_INLINE is used to help the compiler generate better code. It is *not*
1200  * used for "templates", so it can be tweaked based on the compilers
1201  * performance.
1202  *
1203  * gcc-4.8 and gcc-4.9 have been shown to benefit from leaving off the
1204  * always_inline attribute.
1205  *
1206  * clang up to 5.0.0 (trunk) benefit tremendously from the always_inline
1207  * attribute.
1208  */
1209 #if !defined(__clang__) && defined(__GNUC__) && __GNUC__ >= 4 && __GNUC_MINOR__ >= 8 && __GNUC__ < 5
1210 #  define HINT_INLINE static INLINE_KEYWORD
1211 #else
1212 #  define HINT_INLINE static INLINE_KEYWORD FORCE_INLINE_ATTR
1213 #endif
1214
1215 /* UNUSED_ATTR tells the compiler it is okay if the function is unused. */
1216 #if defined(__GNUC__)
1217 #  define UNUSED_ATTR __attribute__((unused))
1218 #else
1219 #  define UNUSED_ATTR
1220 #endif
1221
1222 /* force no inlining */
1223 #ifdef _MSC_VER
1224 #  define FORCE_NOINLINE static __declspec(noinline)
1225 #else
1226 #  if defined(__GNUC__) || defined(__ICCARM__)
1227 #    define FORCE_NOINLINE static __attribute__((__noinline__))
1228 #  else
1229 #    define FORCE_NOINLINE static
1230 #  endif
1231 #endif
1232
1233 /* target attribute */
1234 #ifndef __has_attribute
1235   #define __has_attribute(x) 0  /* Compatibility with non-clang compilers. */
1236 #endif
1237 #if defined(__GNUC__) || defined(__ICCARM__)
1238 #  define TARGET_ATTRIBUTE(target) __attribute__((__target__(target)))
1239 #else
1240 #  define TARGET_ATTRIBUTE(target)
1241 #endif
1242
1243 /* Enable runtime BMI2 dispatch based on the CPU.
1244  * Enabled for clang & gcc >=4.8 on x86 when BMI2 isn't enabled by default.
1245  */
1246 #ifndef DYNAMIC_BMI2
1247   #if ((defined(__clang__) && __has_attribute(__target__)) \
1248       || (defined(__GNUC__) \
1249           && (__GNUC__ >= 5 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 8)))) \
1250       && (defined(__x86_64__) || defined(_M_X86)) \
1251       && !defined(__BMI2__)
1252   #  define DYNAMIC_BMI2 1
1253   #else
1254   #  define DYNAMIC_BMI2 0
1255   #endif
1256 #endif
1257
1258 /* prefetch
1259  * can be disabled, by declaring NO_PREFETCH build macro */
1260 #if defined(NO_PREFETCH)
1261 #  define PREFETCH_L1(ptr)  (void)(ptr)  /* disabled */
1262 #  define PREFETCH_L2(ptr)  (void)(ptr)  /* disabled */
1263 #else
1264 #  if defined(_MSC_VER) && (defined(_M_X64) || defined(_M_I86))  /* _mm_prefetch() is not defined outside of x86/x64 */
1265 #    include <mmintrin.h>   /* https://msdn.microsoft.com/fr-fr/library/84szxsww(v=vs.90).aspx */
1266 #    define PREFETCH_L1(ptr)  _mm_prefetch((const char*)(ptr), _MM_HINT_T0)
1267 #    define PREFETCH_L2(ptr)  _mm_prefetch((const char*)(ptr), _MM_HINT_T1)
1268 #    elif defined(__aarch64__)
1269 #     define PREFETCH_L1(ptr)  __asm__ __volatile__("prfm pldl1keep, %0" ::"Q"(*(ptr)))
1270 #     define PREFETCH_L2(ptr)  __asm__ __volatile__("prfm pldl2keep, %0" ::"Q"(*(ptr)))
1271 #  elif defined(__GNUC__) && ( (__GNUC__ >= 4) || ( (__GNUC__ == 3) && (__GNUC_MINOR__ >= 1) ) )
1272 #    define PREFETCH_L1(ptr)  __builtin_prefetch((ptr), 0 /* rw==read */, 3 /* locality */)
1273 #    define PREFETCH_L2(ptr)  __builtin_prefetch((ptr), 0 /* rw==read */, 2 /* locality */)
1274 #  else
1275 #    define PREFETCH_L1(ptr) (void)(ptr)  /* disabled */
1276 #    define PREFETCH_L2(ptr) (void)(ptr)  /* disabled */
1277 #  endif
1278 #endif  /* NO_PREFETCH */
1279
1280 #define CACHELINE_SIZE 64
1281
1282 #define PREFETCH_AREA(p, s)  {            \
1283     const char* const _ptr = (const char*)(p);  \
1284     size_t const _size = (size_t)(s);     \
1285     size_t _pos;                          \
1286     for (_pos=0; _pos<_size; _pos+=CACHELINE_SIZE) {  \
1287         PREFETCH_L2(_ptr + _pos);         \
1288     }                                     \
1289 }
1290
1291 /* vectorization
1292  * older GCC (pre gcc-4.3 picked as the cutoff) uses a different syntax */
1293 #if !defined(__INTEL_COMPILER) && !defined(__clang__) && defined(__GNUC__)
1294 #  if (__GNUC__ == 4 && __GNUC_MINOR__ > 3) || (__GNUC__ >= 5)
1295 #    define DONT_VECTORIZE __attribute__((optimize("no-tree-vectorize")))
1296 #  else
1297 #    define DONT_VECTORIZE _Pragma("GCC optimize(\"no-tree-vectorize\")")
1298 #  endif
1299 #else
1300 #  define DONT_VECTORIZE
1301 #endif
1302
1303 /* Tell the compiler that a branch is likely or unlikely.
1304  * Only use these macros if it causes the compiler to generate better code.
1305  * If you can remove a LIKELY/UNLIKELY annotation without speed changes in gcc
1306  * and clang, please do.
1307  */
1308 #if defined(__GNUC__)
1309 #define LIKELY(x) (__builtin_expect((x), 1))
1310 #define UNLIKELY(x) (__builtin_expect((x), 0))
1311 #else
1312 #define LIKELY(x) (x)
1313 #define UNLIKELY(x) (x)
1314 #endif
1315
1316 /* disable warnings */
1317 #ifdef _MSC_VER    /* Visual Studio */
1318 #  include <intrin.h>                    /* For Visual 2005 */
1319 #  pragma warning(disable : 4100)        /* disable: C4100: unreferenced formal parameter */
1320 #  pragma warning(disable : 4127)        /* disable: C4127: conditional expression is constant */
1321 #  pragma warning(disable : 4204)        /* disable: C4204: non-constant aggregate initializer */
1322 #  pragma warning(disable : 4214)        /* disable: C4214: non-int bitfields */
1323 #  pragma warning(disable : 4324)        /* disable: C4324: padded structure */
1324 #endif
1325
1326 #endif /* ZSTD_COMPILER_H */
1327 /**** ended inlining compiler.h ****/
1328 /**** skipping file: debug.h ****/
1329 /**** skipping file: error_private.h ****/
1330
1331
1332 /*=========================================
1333 *  Target specific
1334 =========================================*/
1335 #if defined(__BMI__) && defined(__GNUC__)
1336 #  include <immintrin.h>   /* support for bextr (experimental) */
1337 #elif defined(__ICCARM__)
1338 #  include <intrinsics.h>
1339 #endif
1340
1341 #define STREAM_ACCUMULATOR_MIN_32  25
1342 #define STREAM_ACCUMULATOR_MIN_64  57
1343 #define STREAM_ACCUMULATOR_MIN    ((U32)(MEM_32bits() ? STREAM_ACCUMULATOR_MIN_32 : STREAM_ACCUMULATOR_MIN_64))
1344
1345
1346 /*-******************************************
1347 *  bitStream encoding API (write forward)
1348 ********************************************/
1349 /* bitStream can mix input from multiple sources.
1350  * A critical property of these streams is that they encode and decode in **reverse** direction.
1351  * So the first bit sequence you add will be the last to be read, like a LIFO stack.
1352  */
1353 typedef struct {
1354     size_t bitContainer;
1355     unsigned bitPos;
1356     char*  startPtr;
1357     char*  ptr;
1358     char*  endPtr;
1359 } BIT_CStream_t;
1360
1361 MEM_STATIC size_t BIT_initCStream(BIT_CStream_t* bitC, void* dstBuffer, size_t dstCapacity);
1362 MEM_STATIC void   BIT_addBits(BIT_CStream_t* bitC, size_t value, unsigned nbBits);
1363 MEM_STATIC void   BIT_flushBits(BIT_CStream_t* bitC);
1364 MEM_STATIC size_t BIT_closeCStream(BIT_CStream_t* bitC);
1365
1366 /* Start with initCStream, providing the size of buffer to write into.
1367 *  bitStream will never write outside of this buffer.
1368 *  `dstCapacity` must be >= sizeof(bitD->bitContainer), otherwise @return will be an error code.
1369 *
1370 *  bits are first added to a local register.
1371 *  Local register is size_t, hence 64-bits on 64-bits systems, or 32-bits on 32-bits systems.
1372 *  Writing data into memory is an explicit operation, performed by the flushBits function.
1373 *  Hence keep track how many bits are potentially stored into local register to avoid register overflow.
1374 *  After a flushBits, a maximum of 7 bits might still be stored into local register.
1375 *
1376 *  Avoid storing elements of more than 24 bits if you want compatibility with 32-bits bitstream readers.
1377 *
1378 *  Last operation is to close the bitStream.
1379 *  The function returns the final size of CStream in bytes.
1380 *  If data couldn't fit into `dstBuffer`, it will return a 0 ( == not storable)
1381 */
1382
1383
1384 /*-********************************************
1385 *  bitStream decoding API (read backward)
1386 **********************************************/
1387 typedef struct {
1388     size_t   bitContainer;
1389     unsigned bitsConsumed;
1390     const char* ptr;
1391     const char* start;
1392     const char* limitPtr;
1393 } BIT_DStream_t;
1394
1395 typedef enum { BIT_DStream_unfinished = 0,
1396                BIT_DStream_endOfBuffer = 1,
1397                BIT_DStream_completed = 2,
1398                BIT_DStream_overflow = 3 } BIT_DStream_status;  /* result of BIT_reloadDStream() */
1399                /* 1,2,4,8 would be better for bitmap combinations, but slows down performance a bit ... :( */
1400
1401 MEM_STATIC size_t   BIT_initDStream(BIT_DStream_t* bitD, const void* srcBuffer, size_t srcSize);
1402 MEM_STATIC size_t   BIT_readBits(BIT_DStream_t* bitD, unsigned nbBits);
1403 MEM_STATIC BIT_DStream_status BIT_reloadDStream(BIT_DStream_t* bitD);
1404 MEM_STATIC unsigned BIT_endOfDStream(const BIT_DStream_t* bitD);
1405
1406
1407 /* Start by invoking BIT_initDStream().
1408 *  A chunk of the bitStream is then stored into a local register.
1409 *  Local register size is 64-bits on 64-bits systems, 32-bits on 32-bits systems (size_t).
1410 *  You can then retrieve bitFields stored into the local register, **in reverse order**.
1411 *  Local register is explicitly reloaded from memory by the BIT_reloadDStream() method.
1412 *  A reload guarantee a minimum of ((8*sizeof(bitD->bitContainer))-7) bits when its result is BIT_DStream_unfinished.
1413 *  Otherwise, it can be less than that, so proceed accordingly.
1414 *  Checking if DStream has reached its end can be performed with BIT_endOfDStream().
1415 */
1416
1417
1418 /*-****************************************
1419 *  unsafe API
1420 ******************************************/
1421 MEM_STATIC void BIT_addBitsFast(BIT_CStream_t* bitC, size_t value, unsigned nbBits);
1422 /* faster, but works only if value is "clean", meaning all high bits above nbBits are 0 */
1423
1424 MEM_STATIC void BIT_flushBitsFast(BIT_CStream_t* bitC);
1425 /* unsafe version; does not check buffer overflow */
1426
1427 MEM_STATIC size_t BIT_readBitsFast(BIT_DStream_t* bitD, unsigned nbBits);
1428 /* faster, but works only if nbBits >= 1 */
1429
1430
1431
1432 /*-**************************************************************
1433 *  Internal functions
1434 ****************************************************************/
1435 MEM_STATIC unsigned BIT_highbit32 (U32 val)
1436 {
1437     assert(val != 0);
1438     {
1439 #   if defined(_MSC_VER)   /* Visual */
1440         unsigned long r=0;
1441         return _BitScanReverse ( &r, val ) ? (unsigned)r : 0;
1442 #   elif defined(__GNUC__) && (__GNUC__ >= 3)   /* Use GCC Intrinsic */
1443         return __builtin_clz (val) ^ 31;
1444 #   elif defined(__ICCARM__)    /* IAR Intrinsic */
1445         return 31 - __CLZ(val);
1446 #   else   /* Software version */
1447         static const unsigned DeBruijnClz[32] = { 0,  9,  1, 10, 13, 21,  2, 29,
1448                                                  11, 14, 16, 18, 22, 25,  3, 30,
1449                                                   8, 12, 20, 28, 15, 17, 24,  7,
1450                                                  19, 27, 23,  6, 26,  5,  4, 31 };
1451         U32 v = val;
1452         v |= v >> 1;
1453         v |= v >> 2;
1454         v |= v >> 4;
1455         v |= v >> 8;
1456         v |= v >> 16;
1457         return DeBruijnClz[ (U32) (v * 0x07C4ACDDU) >> 27];
1458 #   endif
1459     }
1460 }
1461
1462 /*=====    Local Constants   =====*/
1463 static const unsigned BIT_mask[] = {
1464     0,          1,         3,         7,         0xF,       0x1F,
1465     0x3F,       0x7F,      0xFF,      0x1FF,     0x3FF,     0x7FF,
1466     0xFFF,      0x1FFF,    0x3FFF,    0x7FFF,    0xFFFF,    0x1FFFF,
1467     0x3FFFF,    0x7FFFF,   0xFFFFF,   0x1FFFFF,  0x3FFFFF,  0x7FFFFF,
1468     0xFFFFFF,   0x1FFFFFF, 0x3FFFFFF, 0x7FFFFFF, 0xFFFFFFF, 0x1FFFFFFF,
1469     0x3FFFFFFF, 0x7FFFFFFF}; /* up to 31 bits */
1470 #define BIT_MASK_SIZE (sizeof(BIT_mask) / sizeof(BIT_mask[0]))
1471
1472 /*-**************************************************************
1473 *  bitStream encoding
1474 ****************************************************************/
1475 /*! BIT_initCStream() :
1476  *  `dstCapacity` must be > sizeof(size_t)
1477  *  @return : 0 if success,
1478  *            otherwise an error code (can be tested using ERR_isError()) */
1479 MEM_STATIC size_t BIT_initCStream(BIT_CStream_t* bitC,
1480                                   void* startPtr, size_t dstCapacity)
1481 {
1482     bitC->bitContainer = 0;
1483     bitC->bitPos = 0;
1484     bitC->startPtr = (char*)startPtr;
1485     bitC->ptr = bitC->startPtr;
1486     bitC->endPtr = bitC->startPtr + dstCapacity - sizeof(bitC->bitContainer);
1487     if (dstCapacity <= sizeof(bitC->bitContainer)) return ERROR(dstSize_tooSmall);
1488     return 0;
1489 }
1490
1491 /*! BIT_addBits() :
1492  *  can add up to 31 bits into `bitC`.
1493  *  Note : does not check for register overflow ! */
1494 MEM_STATIC void BIT_addBits(BIT_CStream_t* bitC,
1495                             size_t value, unsigned nbBits)
1496 {
1497     MEM_STATIC_ASSERT(BIT_MASK_SIZE == 32);
1498     assert(nbBits < BIT_MASK_SIZE);
1499     assert(nbBits + bitC->bitPos < sizeof(bitC->bitContainer) * 8);
1500     bitC->bitContainer |= (value & BIT_mask[nbBits]) << bitC->bitPos;
1501     bitC->bitPos += nbBits;
1502 }
1503
1504 /*! BIT_addBitsFast() :
1505  *  works only if `value` is _clean_,
1506  *  meaning all high bits above nbBits are 0 */
1507 MEM_STATIC void BIT_addBitsFast(BIT_CStream_t* bitC,
1508                                 size_t value, unsigned nbBits)
1509 {
1510     assert((value>>nbBits) == 0);
1511     assert(nbBits + bitC->bitPos < sizeof(bitC->bitContainer) * 8);
1512     bitC->bitContainer |= value << bitC->bitPos;
1513     bitC->bitPos += nbBits;
1514 }
1515
1516 /*! BIT_flushBitsFast() :
1517  *  assumption : bitContainer has not overflowed
1518  *  unsafe version; does not check buffer overflow */
1519 MEM_STATIC void BIT_flushBitsFast(BIT_CStream_t* bitC)
1520 {
1521     size_t const nbBytes = bitC->bitPos >> 3;
1522     assert(bitC->bitPos < sizeof(bitC->bitContainer) * 8);
1523     assert(bitC->ptr <= bitC->endPtr);
1524     MEM_writeLEST(bitC->ptr, bitC->bitContainer);
1525     bitC->ptr += nbBytes;
1526     bitC->bitPos &= 7;
1527     bitC->bitContainer >>= nbBytes*8;
1528 }
1529
1530 /*! BIT_flushBits() :
1531  *  assumption : bitContainer has not overflowed
1532  *  safe version; check for buffer overflow, and prevents it.
1533  *  note : does not signal buffer overflow.
1534  *  overflow will be revealed later on using BIT_closeCStream() */
1535 MEM_STATIC void BIT_flushBits(BIT_CStream_t* bitC)
1536 {
1537     size_t const nbBytes = bitC->bitPos >> 3;
1538     assert(bitC->bitPos < sizeof(bitC->bitContainer) * 8);
1539     assert(bitC->ptr <= bitC->endPtr);
1540     MEM_writeLEST(bitC->ptr, bitC->bitContainer);
1541     bitC->ptr += nbBytes;
1542     if (bitC->ptr > bitC->endPtr) bitC->ptr = bitC->endPtr;
1543     bitC->bitPos &= 7;
1544     bitC->bitContainer >>= nbBytes*8;
1545 }
1546
1547 /*! BIT_closeCStream() :
1548  *  @return : size of CStream, in bytes,
1549  *            or 0 if it could not fit into dstBuffer */
1550 MEM_STATIC size_t BIT_closeCStream(BIT_CStream_t* bitC)
1551 {
1552     BIT_addBitsFast(bitC, 1, 1);   /* endMark */
1553     BIT_flushBits(bitC);
1554     if (bitC->ptr >= bitC->endPtr) return 0; /* overflow detected */
1555     return (bitC->ptr - bitC->startPtr) + (bitC->bitPos > 0);
1556 }
1557
1558
1559 /*-********************************************************
1560 *  bitStream decoding
1561 **********************************************************/
1562 /*! BIT_initDStream() :
1563  *  Initialize a BIT_DStream_t.
1564  * `bitD` : a pointer to an already allocated BIT_DStream_t structure.
1565  * `srcSize` must be the *exact* size of the bitStream, in bytes.
1566  * @return : size of stream (== srcSize), or an errorCode if a problem is detected
1567  */
1568 MEM_STATIC size_t BIT_initDStream(BIT_DStream_t* bitD, const void* srcBuffer, size_t srcSize)
1569 {
1570     if (srcSize < 1) { memset(bitD, 0, sizeof(*bitD)); return ERROR(srcSize_wrong); }
1571
1572     bitD->start = (const char*)srcBuffer;
1573     bitD->limitPtr = bitD->start + sizeof(bitD->bitContainer);
1574
1575     if (srcSize >=  sizeof(bitD->bitContainer)) {  /* normal case */
1576         bitD->ptr   = (const char*)srcBuffer + srcSize - sizeof(bitD->bitContainer);
1577         bitD->bitContainer = MEM_readLEST(bitD->ptr);
1578         { BYTE const lastByte = ((const BYTE*)srcBuffer)[srcSize-1];
1579           bitD->bitsConsumed = lastByte ? 8 - BIT_highbit32(lastByte) : 0;  /* ensures bitsConsumed is always set */
1580           if (lastByte == 0) return ERROR(GENERIC); /* endMark not present */ }
1581     } else {
1582         bitD->ptr   = bitD->start;
1583         bitD->bitContainer = *(const BYTE*)(bitD->start);
1584         switch(srcSize)
1585         {
1586         case 7: bitD->bitContainer += (size_t)(((const BYTE*)(srcBuffer))[6]) << (sizeof(bitD->bitContainer)*8 - 16);
1587                 /* fall-through */
1588
1589         case 6: bitD->bitContainer += (size_t)(((const BYTE*)(srcBuffer))[5]) << (sizeof(bitD->bitContainer)*8 - 24);
1590                 /* fall-through */
1591
1592         case 5: bitD->bitContainer += (size_t)(((const BYTE*)(srcBuffer))[4]) << (sizeof(bitD->bitContainer)*8 - 32);
1593                 /* fall-through */
1594
1595         case 4: bitD->bitContainer += (size_t)(((const BYTE*)(srcBuffer))[3]) << 24;
1596                 /* fall-through */
1597
1598         case 3: bitD->bitContainer += (size_t)(((const BYTE*)(srcBuffer))[2]) << 16;
1599                 /* fall-through */
1600
1601         case 2: bitD->bitContainer += (size_t)(((const BYTE*)(srcBuffer))[1]) <<  8;
1602                 /* fall-through */
1603
1604         default: break;
1605         }
1606         {   BYTE const lastByte = ((const BYTE*)srcBuffer)[srcSize-1];
1607             bitD->bitsConsumed = lastByte ? 8 - BIT_highbit32(lastByte) : 0;
1608             if (lastByte == 0) return ERROR(corruption_detected);  /* endMark not present */
1609         }
1610         bitD->bitsConsumed += (U32)(sizeof(bitD->bitContainer) - srcSize)*8;
1611     }
1612
1613     return srcSize;
1614 }
1615
1616 MEM_STATIC size_t BIT_getUpperBits(size_t bitContainer, U32 const start)
1617 {
1618     return bitContainer >> start;
1619 }
1620
1621 MEM_STATIC size_t BIT_getMiddleBits(size_t bitContainer, U32 const start, U32 const nbBits)
1622 {
1623     U32 const regMask = sizeof(bitContainer)*8 - 1;
1624     /* if start > regMask, bitstream is corrupted, and result is undefined */
1625     assert(nbBits < BIT_MASK_SIZE);
1626     return (bitContainer >> (start & regMask)) & BIT_mask[nbBits];
1627 }
1628
1629 MEM_STATIC size_t BIT_getLowerBits(size_t bitContainer, U32 const nbBits)
1630 {
1631     assert(nbBits < BIT_MASK_SIZE);
1632     return bitContainer & BIT_mask[nbBits];
1633 }
1634
1635 /*! BIT_lookBits() :
1636  *  Provides next n bits from local register.
1637  *  local register is not modified.
1638  *  On 32-bits, maxNbBits==24.
1639  *  On 64-bits, maxNbBits==56.
1640  * @return : value extracted */
1641 MEM_STATIC size_t BIT_lookBits(const BIT_DStream_t* bitD, U32 nbBits)
1642 {
1643     /* arbitrate between double-shift and shift+mask */
1644 #if 1
1645     /* if bitD->bitsConsumed + nbBits > sizeof(bitD->bitContainer)*8,
1646      * bitstream is likely corrupted, and result is undefined */
1647     return BIT_getMiddleBits(bitD->bitContainer, (sizeof(bitD->bitContainer)*8) - bitD->bitsConsumed - nbBits, nbBits);
1648 #else
1649     /* this code path is slower on my os-x laptop */
1650     U32 const regMask = sizeof(bitD->bitContainer)*8 - 1;
1651     return ((bitD->bitContainer << (bitD->bitsConsumed & regMask)) >> 1) >> ((regMask-nbBits) & regMask);
1652 #endif
1653 }
1654
1655 /*! BIT_lookBitsFast() :
1656  *  unsafe version; only works if nbBits >= 1 */
1657 MEM_STATIC size_t BIT_lookBitsFast(const BIT_DStream_t* bitD, U32 nbBits)
1658 {
1659     U32 const regMask = sizeof(bitD->bitContainer)*8 - 1;
1660     assert(nbBits >= 1);
1661     return (bitD->bitContainer << (bitD->bitsConsumed & regMask)) >> (((regMask+1)-nbBits) & regMask);
1662 }
1663
1664 MEM_STATIC void BIT_skipBits(BIT_DStream_t* bitD, U32 nbBits)
1665 {
1666     bitD->bitsConsumed += nbBits;
1667 }
1668
1669 /*! BIT_readBits() :
1670  *  Read (consume) next n bits from local register and update.
1671  *  Pay attention to not read more than nbBits contained into local register.
1672  * @return : extracted value. */
1673 MEM_STATIC size_t BIT_readBits(BIT_DStream_t* bitD, unsigned nbBits)
1674 {
1675     size_t const value = BIT_lookBits(bitD, nbBits);
1676     BIT_skipBits(bitD, nbBits);
1677     return value;
1678 }
1679
1680 /*! BIT_readBitsFast() :
1681  *  unsafe version; only works only if nbBits >= 1 */
1682 MEM_STATIC size_t BIT_readBitsFast(BIT_DStream_t* bitD, unsigned nbBits)
1683 {
1684     size_t const value = BIT_lookBitsFast(bitD, nbBits);
1685     assert(nbBits >= 1);
1686     BIT_skipBits(bitD, nbBits);
1687     return value;
1688 }
1689
1690 /*! BIT_reloadDStreamFast() :
1691  *  Similar to BIT_reloadDStream(), but with two differences:
1692  *  1. bitsConsumed <= sizeof(bitD->bitContainer)*8 must hold!
1693  *  2. Returns BIT_DStream_overflow when bitD->ptr < bitD->limitPtr, at this
1694  *     point you must use BIT_reloadDStream() to reload.
1695  */
1696 MEM_STATIC BIT_DStream_status BIT_reloadDStreamFast(BIT_DStream_t* bitD)
1697 {
1698     if (UNLIKELY(bitD->ptr < bitD->limitPtr))
1699         return BIT_DStream_overflow;
1700     assert(bitD->bitsConsumed <= sizeof(bitD->bitContainer)*8);
1701     bitD->ptr -= bitD->bitsConsumed >> 3;
1702     bitD->bitsConsumed &= 7;
1703     bitD->bitContainer = MEM_readLEST(bitD->ptr);
1704     return BIT_DStream_unfinished;
1705 }
1706
1707 /*! BIT_reloadDStream() :
1708  *  Refill `bitD` from buffer previously set in BIT_initDStream() .
1709  *  This function is safe, it guarantees it will not read beyond src buffer.
1710  * @return : status of `BIT_DStream_t` internal register.
1711  *           when status == BIT_DStream_unfinished, internal register is filled with at least 25 or 57 bits */
1712 MEM_STATIC BIT_DStream_status BIT_reloadDStream(BIT_DStream_t* bitD)
1713 {
1714     if (bitD->bitsConsumed > (sizeof(bitD->bitContainer)*8))  /* overflow detected, like end of stream */
1715         return BIT_DStream_overflow;
1716
1717     if (bitD->ptr >= bitD->limitPtr) {
1718         return BIT_reloadDStreamFast(bitD);
1719     }
1720     if (bitD->ptr == bitD->start) {
1721         if (bitD->bitsConsumed < sizeof(bitD->bitContainer)*8) return BIT_DStream_endOfBuffer;
1722         return BIT_DStream_completed;
1723     }
1724     /* start < ptr < limitPtr */
1725     {   U32 nbBytes = bitD->bitsConsumed >> 3;
1726         BIT_DStream_status result = BIT_DStream_unfinished;
1727         if (bitD->ptr - nbBytes < bitD->start) {
1728             nbBytes = (U32)(bitD->ptr - bitD->start);  /* ptr > start */
1729             result = BIT_DStream_endOfBuffer;
1730         }
1731         bitD->ptr -= nbBytes;
1732         bitD->bitsConsumed -= nbBytes*8;
1733         bitD->bitContainer = MEM_readLEST(bitD->ptr);   /* reminder : srcSize > sizeof(bitD->bitContainer), otherwise bitD->ptr == bitD->start */
1734         return result;
1735     }
1736 }
1737
1738 /*! BIT_endOfDStream() :
1739  * @return : 1 if DStream has _exactly_ reached its end (all bits consumed).
1740  */
1741 MEM_STATIC unsigned BIT_endOfDStream(const BIT_DStream_t* DStream)
1742 {
1743     return ((DStream->ptr == DStream->start) && (DStream->bitsConsumed == sizeof(DStream->bitContainer)*8));
1744 }
1745
1746 #if defined (__cplusplus)
1747 }
1748 #endif
1749
1750 #endif /* BITSTREAM_H_MODULE */
1751 /**** ended inlining bitstream.h ****/
1752
1753
1754 /* *****************************************
1755 *  Static allocation
1756 *******************************************/
1757 /* FSE buffer bounds */
1758 #define FSE_NCOUNTBOUND 512
1759 #define FSE_BLOCKBOUND(size) (size + (size>>7) + 4 /* fse states */ + sizeof(size_t) /* bitContainer */)
1760 #define FSE_COMPRESSBOUND(size) (FSE_NCOUNTBOUND + FSE_BLOCKBOUND(size))   /* Macro version, useful for static allocation */
1761
1762 /* It is possible to statically allocate FSE CTable/DTable as a table of FSE_CTable/FSE_DTable using below macros */
1763 #define FSE_CTABLE_SIZE_U32(maxTableLog, maxSymbolValue)   (1 + (1<<(maxTableLog-1)) + ((maxSymbolValue+1)*2))
1764 #define FSE_DTABLE_SIZE_U32(maxTableLog)                   (1 + (1<<maxTableLog))
1765
1766 /* or use the size to malloc() space directly. Pay attention to alignment restrictions though */
1767 #define FSE_CTABLE_SIZE(maxTableLog, maxSymbolValue)   (FSE_CTABLE_SIZE_U32(maxTableLog, maxSymbolValue) * sizeof(FSE_CTable))
1768 #define FSE_DTABLE_SIZE(maxTableLog)                   (FSE_DTABLE_SIZE_U32(maxTableLog) * sizeof(FSE_DTable))
1769
1770
1771 /* *****************************************
1772  *  FSE advanced API
1773  ***************************************** */
1774
1775 unsigned FSE_optimalTableLog_internal(unsigned maxTableLog, size_t srcSize, unsigned maxSymbolValue, unsigned minus);
1776 /**< same as FSE_optimalTableLog(), which used `minus==2` */
1777
1778 /* FSE_compress_wksp() :
1779  * Same as FSE_compress2(), but using an externally allocated scratch buffer (`workSpace`).
1780  * FSE_WKSP_SIZE_U32() provides the minimum size required for `workSpace` as a table of FSE_CTable.
1781  */
1782 #define FSE_WKSP_SIZE_U32(maxTableLog, maxSymbolValue)   ( FSE_CTABLE_SIZE_U32(maxTableLog, maxSymbolValue) + ((maxTableLog > 12) ? (1 << (maxTableLog - 2)) : 1024) )
1783 size_t FSE_compress_wksp (void* dst, size_t dstSize, const void* src, size_t srcSize, unsigned maxSymbolValue, unsigned tableLog, void* workSpace, size_t wkspSize);
1784
1785 size_t FSE_buildCTable_raw (FSE_CTable* ct, unsigned nbBits);
1786 /**< build a fake FSE_CTable, designed for a flat distribution, where each symbol uses nbBits */
1787
1788 size_t FSE_buildCTable_rle (FSE_CTable* ct, unsigned char symbolValue);
1789 /**< build a fake FSE_CTable, designed to compress always the same symbolValue */
1790
1791 /* FSE_buildCTable_wksp() :
1792  * Same as FSE_buildCTable(), but using an externally allocated scratch buffer (`workSpace`).
1793  * `wkspSize` must be >= `(1<<tableLog)`.
1794  */
1795 size_t FSE_buildCTable_wksp(FSE_CTable* ct, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog, void* workSpace, size_t wkspSize);
1796
1797 size_t FSE_buildDTable_raw (FSE_DTable* dt, unsigned nbBits);
1798 /**< build a fake FSE_DTable, designed to read a flat distribution where each symbol uses nbBits */
1799
1800 size_t FSE_buildDTable_rle (FSE_DTable* dt, unsigned char symbolValue);
1801 /**< build a fake FSE_DTable, designed to always generate the same symbolValue */
1802
1803 size_t FSE_decompress_wksp(void* dst, size_t dstCapacity, const void* cSrc, size_t cSrcSize, FSE_DTable* workSpace, unsigned maxLog);
1804 /**< same as FSE_decompress(), using an externally allocated `workSpace` produced with `FSE_DTABLE_SIZE_U32(maxLog)` */
1805
1806 typedef enum {
1807    FSE_repeat_none,  /**< Cannot use the previous table */
1808    FSE_repeat_check, /**< Can use the previous table but it must be checked */
1809    FSE_repeat_valid  /**< Can use the previous table and it is assumed to be valid */
1810  } FSE_repeat;
1811
1812 /* *****************************************
1813 *  FSE symbol compression API
1814 *******************************************/
1815 /*!
1816    This API consists of small unitary functions, which highly benefit from being inlined.
1817    Hence their body are included in next section.
1818 */
1819 typedef struct {
1820     ptrdiff_t   value;
1821     const void* stateTable;
1822     const void* symbolTT;
1823     unsigned    stateLog;
1824 } FSE_CState_t;
1825
1826 static void FSE_initCState(FSE_CState_t* CStatePtr, const FSE_CTable* ct);
1827
1828 static void FSE_encodeSymbol(BIT_CStream_t* bitC, FSE_CState_t* CStatePtr, unsigned symbol);
1829
1830 static void FSE_flushCState(BIT_CStream_t* bitC, const FSE_CState_t* CStatePtr);
1831
1832 /**<
1833 These functions are inner components of FSE_compress_usingCTable().
1834 They allow the creation of custom streams, mixing multiple tables and bit sources.
1835
1836 A key property to keep in mind is that encoding and decoding are done **in reverse direction**.
1837 So the first symbol you will encode is the last you will decode, like a LIFO stack.
1838
1839 You will need a few variables to track your CStream. They are :
1840
1841 FSE_CTable    ct;         // Provided by FSE_buildCTable()
1842 BIT_CStream_t bitStream;  // bitStream tracking structure
1843 FSE_CState_t  state;      // State tracking structure (can have several)
1844
1845
1846 The first thing to do is to init bitStream and state.
1847     size_t errorCode = BIT_initCStream(&bitStream, dstBuffer, maxDstSize);
1848     FSE_initCState(&state, ct);
1849
1850 Note that BIT_initCStream() can produce an error code, so its result should be tested, using FSE_isError();
1851 You can then encode your input data, byte after byte.
1852 FSE_encodeSymbol() outputs a maximum of 'tableLog' bits at a time.
1853 Remember decoding will be done in reverse direction.
1854     FSE_encodeByte(&bitStream, &state, symbol);
1855
1856 At any time, you can also add any bit sequence.
1857 Note : maximum allowed nbBits is 25, for compatibility with 32-bits decoders
1858     BIT_addBits(&bitStream, bitField, nbBits);
1859
1860 The above methods don't commit data to memory, they just store it into local register, for speed.
1861 Local register size is 64-bits on 64-bits systems, 32-bits on 32-bits systems (size_t).
1862 Writing data to memory is a manual operation, performed by the flushBits function.
1863     BIT_flushBits(&bitStream);
1864
1865 Your last FSE encoding operation shall be to flush your last state value(s).
1866     FSE_flushState(&bitStream, &state);
1867
1868 Finally, you must close the bitStream.
1869 The function returns the size of CStream in bytes.
1870 If data couldn't fit into dstBuffer, it will return a 0 ( == not compressible)
1871 If there is an error, it returns an errorCode (which can be tested using FSE_isError()).
1872     size_t size = BIT_closeCStream(&bitStream);
1873 */
1874
1875
1876 /* *****************************************
1877 *  FSE symbol decompression API
1878 *******************************************/
1879 typedef struct {
1880     size_t      state;
1881     const void* table;   /* precise table may vary, depending on U16 */
1882 } FSE_DState_t;
1883
1884
1885 static void     FSE_initDState(FSE_DState_t* DStatePtr, BIT_DStream_t* bitD, const FSE_DTable* dt);
1886
1887 static unsigned char FSE_decodeSymbol(FSE_DState_t* DStatePtr, BIT_DStream_t* bitD);
1888
1889 static unsigned FSE_endOfDState(const FSE_DState_t* DStatePtr);
1890
1891 /**<
1892 Let's now decompose FSE_decompress_usingDTable() into its unitary components.
1893 You will decode FSE-encoded symbols from the bitStream,
1894 and also any other bitFields you put in, **in reverse order**.
1895
1896 You will need a few variables to track your bitStream. They are :
1897
1898 BIT_DStream_t DStream;    // Stream context
1899 FSE_DState_t  DState;     // State context. Multiple ones are possible
1900 FSE_DTable*   DTablePtr;  // Decoding table, provided by FSE_buildDTable()
1901
1902 The first thing to do is to init the bitStream.
1903     errorCode = BIT_initDStream(&DStream, srcBuffer, srcSize);
1904
1905 You should then retrieve your initial state(s)
1906 (in reverse flushing order if you have several ones) :
1907     errorCode = FSE_initDState(&DState, &DStream, DTablePtr);
1908
1909 You can then decode your data, symbol after symbol.
1910 For information the maximum number of bits read by FSE_decodeSymbol() is 'tableLog'.
1911 Keep in mind that symbols are decoded in reverse order, like a LIFO stack (last in, first out).
1912     unsigned char symbol = FSE_decodeSymbol(&DState, &DStream);
1913
1914 You can retrieve any bitfield you eventually stored into the bitStream (in reverse order)
1915 Note : maximum allowed nbBits is 25, for 32-bits compatibility
1916     size_t bitField = BIT_readBits(&DStream, nbBits);
1917
1918 All above operations only read from local register (which size depends on size_t).
1919 Refueling the register from memory is manually performed by the reload method.
1920     endSignal = FSE_reloadDStream(&DStream);
1921
1922 BIT_reloadDStream() result tells if there is still some more data to read from DStream.
1923 BIT_DStream_unfinished : there is still some data left into the DStream.
1924 BIT_DStream_endOfBuffer : Dstream reached end of buffer. Its container may no longer be completely filled.
1925 BIT_DStream_completed : Dstream reached its exact end, corresponding in general to decompression completed.
1926 BIT_DStream_tooFar : Dstream went too far. Decompression result is corrupted.
1927
1928 When reaching end of buffer (BIT_DStream_endOfBuffer), progress slowly, notably if you decode multiple symbols per loop,
1929 to properly detect the exact end of stream.
1930 After each decoded symbol, check if DStream is fully consumed using this simple test :
1931     BIT_reloadDStream(&DStream) >= BIT_DStream_completed
1932
1933 When it's done, verify decompression is fully completed, by checking both DStream and the relevant states.
1934 Checking if DStream has reached its end is performed by :
1935     BIT_endOfDStream(&DStream);
1936 Check also the states. There might be some symbols left there, if some high probability ones (>50%) are possible.
1937     FSE_endOfDState(&DState);
1938 */
1939
1940
1941 /* *****************************************
1942 *  FSE unsafe API
1943 *******************************************/
1944 static unsigned char FSE_decodeSymbolFast(FSE_DState_t* DStatePtr, BIT_DStream_t* bitD);
1945 /* faster, but works only if nbBits is always >= 1 (otherwise, result will be corrupted) */
1946
1947
1948 /* *****************************************
1949 *  Implementation of inlined functions
1950 *******************************************/
1951 typedef struct {
1952     int deltaFindState;
1953     U32 deltaNbBits;
1954 } FSE_symbolCompressionTransform; /* total 8 bytes */
1955
1956 MEM_STATIC void FSE_initCState(FSE_CState_t* statePtr, const FSE_CTable* ct)
1957 {
1958     const void* ptr = ct;
1959     const U16* u16ptr = (const U16*) ptr;
1960     const U32 tableLog = MEM_read16(ptr);
1961     statePtr->value = (ptrdiff_t)1<<tableLog;
1962     statePtr->stateTable = u16ptr+2;
1963     statePtr->symbolTT = ct + 1 + (tableLog ? (1<<(tableLog-1)) : 1);
1964     statePtr->stateLog = tableLog;
1965 }
1966
1967
1968 /*! FSE_initCState2() :
1969 *   Same as FSE_initCState(), but the first symbol to include (which will be the last to be read)
1970 *   uses the smallest state value possible, saving the cost of this symbol */
1971 MEM_STATIC void FSE_initCState2(FSE_CState_t* statePtr, const FSE_CTable* ct, U32 symbol)
1972 {
1973     FSE_initCState(statePtr, ct);
1974     {   const FSE_symbolCompressionTransform symbolTT = ((const FSE_symbolCompressionTransform*)(statePtr->symbolTT))[symbol];
1975         const U16* stateTable = (const U16*)(statePtr->stateTable);
1976         U32 nbBitsOut  = (U32)((symbolTT.deltaNbBits + (1<<15)) >> 16);
1977         statePtr->value = (nbBitsOut << 16) - symbolTT.deltaNbBits;
1978         statePtr->value = stateTable[(statePtr->value >> nbBitsOut) + symbolTT.deltaFindState];
1979     }
1980 }
1981
1982 MEM_STATIC void FSE_encodeSymbol(BIT_CStream_t* bitC, FSE_CState_t* statePtr, unsigned symbol)
1983 {
1984     FSE_symbolCompressionTransform const symbolTT = ((const FSE_symbolCompressionTransform*)(statePtr->symbolTT))[symbol];
1985     const U16* const stateTable = (const U16*)(statePtr->stateTable);
1986     U32 const nbBitsOut  = (U32)((statePtr->value + symbolTT.deltaNbBits) >> 16);
1987     BIT_addBits(bitC, statePtr->value, nbBitsOut);
1988     statePtr->value = stateTable[ (statePtr->value >> nbBitsOut) + symbolTT.deltaFindState];
1989 }
1990
1991 MEM_STATIC void FSE_flushCState(BIT_CStream_t* bitC, const FSE_CState_t* statePtr)
1992 {
1993     BIT_addBits(bitC, statePtr->value, statePtr->stateLog);
1994     BIT_flushBits(bitC);
1995 }
1996
1997
1998 /* FSE_getMaxNbBits() :
1999  * Approximate maximum cost of a symbol, in bits.
2000  * Fractional get rounded up (i.e : a symbol with a normalized frequency of 3 gives the same result as a frequency of 2)
2001  * note 1 : assume symbolValue is valid (<= maxSymbolValue)
2002  * note 2 : if freq[symbolValue]==0, @return a fake cost of tableLog+1 bits */
2003 MEM_STATIC U32 FSE_getMaxNbBits(const void* symbolTTPtr, U32 symbolValue)
2004 {
2005     const FSE_symbolCompressionTransform* symbolTT = (const FSE_symbolCompressionTransform*) symbolTTPtr;
2006     return (symbolTT[symbolValue].deltaNbBits + ((1<<16)-1)) >> 16;
2007 }
2008
2009 /* FSE_bitCost() :
2010  * Approximate symbol cost, as fractional value, using fixed-point format (accuracyLog fractional bits)
2011  * note 1 : assume symbolValue is valid (<= maxSymbolValue)
2012  * note 2 : if freq[symbolValue]==0, @return a fake cost of tableLog+1 bits */
2013 MEM_STATIC U32 FSE_bitCost(const void* symbolTTPtr, U32 tableLog, U32 symbolValue, U32 accuracyLog)
2014 {
2015     const FSE_symbolCompressionTransform* symbolTT = (const FSE_symbolCompressionTransform*) symbolTTPtr;
2016     U32 const minNbBits = symbolTT[symbolValue].deltaNbBits >> 16;
2017     U32 const threshold = (minNbBits+1) << 16;
2018     assert(tableLog < 16);
2019     assert(accuracyLog < 31-tableLog);  /* ensure enough room for renormalization double shift */
2020     {   U32 const tableSize = 1 << tableLog;
2021         U32 const deltaFromThreshold = threshold - (symbolTT[symbolValue].deltaNbBits + tableSize);
2022         U32 const normalizedDeltaFromThreshold = (deltaFromThreshold << accuracyLog) >> tableLog;   /* linear interpolation (very approximate) */
2023         U32 const bitMultiplier = 1 << accuracyLog;
2024         assert(symbolTT[symbolValue].deltaNbBits + tableSize <= threshold);
2025         assert(normalizedDeltaFromThreshold <= bitMultiplier);
2026         return (minNbBits+1)*bitMultiplier - normalizedDeltaFromThreshold;
2027     }
2028 }
2029
2030
2031 /* ======    Decompression    ====== */
2032
2033 typedef struct {
2034     U16 tableLog;
2035     U16 fastMode;
2036 } FSE_DTableHeader;   /* sizeof U32 */
2037
2038 typedef struct
2039 {
2040     unsigned short newState;
2041     unsigned char  symbol;
2042     unsigned char  nbBits;
2043 } FSE_decode_t;   /* size == U32 */
2044
2045 MEM_STATIC void FSE_initDState(FSE_DState_t* DStatePtr, BIT_DStream_t* bitD, const FSE_DTable* dt)
2046 {
2047     const void* ptr = dt;
2048     const FSE_DTableHeader* const DTableH = (const FSE_DTableHeader*)ptr;
2049     DStatePtr->state = BIT_readBits(bitD, DTableH->tableLog);
2050     BIT_reloadDStream(bitD);
2051     DStatePtr->table = dt + 1;
2052 }
2053
2054 MEM_STATIC BYTE FSE_peekSymbol(const FSE_DState_t* DStatePtr)
2055 {
2056     FSE_decode_t const DInfo = ((const FSE_decode_t*)(DStatePtr->table))[DStatePtr->state];
2057     return DInfo.symbol;
2058 }
2059
2060 MEM_STATIC void FSE_updateState(FSE_DState_t* DStatePtr, BIT_DStream_t* bitD)
2061 {
2062     FSE_decode_t const DInfo = ((const FSE_decode_t*)(DStatePtr->table))[DStatePtr->state];
2063     U32 const nbBits = DInfo.nbBits;
2064     size_t const lowBits = BIT_readBits(bitD, nbBits);
2065     DStatePtr->state = DInfo.newState + lowBits;
2066 }
2067
2068 MEM_STATIC BYTE FSE_decodeSymbol(FSE_DState_t* DStatePtr, BIT_DStream_t* bitD)
2069 {
2070     FSE_decode_t const DInfo = ((const FSE_decode_t*)(DStatePtr->table))[DStatePtr->state];
2071     U32 const nbBits = DInfo.nbBits;
2072     BYTE const symbol = DInfo.symbol;
2073     size_t const lowBits = BIT_readBits(bitD, nbBits);
2074
2075     DStatePtr->state = DInfo.newState + lowBits;
2076     return symbol;
2077 }
2078
2079 /*! FSE_decodeSymbolFast() :
2080     unsafe, only works if no symbol has a probability > 50% */
2081 MEM_STATIC BYTE FSE_decodeSymbolFast(FSE_DState_t* DStatePtr, BIT_DStream_t* bitD)
2082 {
2083     FSE_decode_t const DInfo = ((const FSE_decode_t*)(DStatePtr->table))[DStatePtr->state];
2084     U32 const nbBits = DInfo.nbBits;
2085     BYTE const symbol = DInfo.symbol;
2086     size_t const lowBits = BIT_readBitsFast(bitD, nbBits);
2087
2088     DStatePtr->state = DInfo.newState + lowBits;
2089     return symbol;
2090 }
2091
2092 MEM_STATIC unsigned FSE_endOfDState(const FSE_DState_t* DStatePtr)
2093 {
2094     return DStatePtr->state == 0;
2095 }
2096
2097
2098
2099 #ifndef FSE_COMMONDEFS_ONLY
2100
2101 /* **************************************************************
2102 *  Tuning parameters
2103 ****************************************************************/
2104 /*!MEMORY_USAGE :
2105 *  Memory usage formula : N->2^N Bytes (examples : 10 -> 1KB; 12 -> 4KB ; 16 -> 64KB; 20 -> 1MB; etc.)
2106 *  Increasing memory usage improves compression ratio
2107 *  Reduced memory usage can improve speed, due to cache effect
2108 *  Recommended max value is 14, for 16KB, which nicely fits into Intel x86 L1 cache */
2109 #ifndef FSE_MAX_MEMORY_USAGE
2110 #  define FSE_MAX_MEMORY_USAGE 14
2111 #endif
2112 #ifndef FSE_DEFAULT_MEMORY_USAGE
2113 #  define FSE_DEFAULT_MEMORY_USAGE 13
2114 #endif
2115
2116 /*!FSE_MAX_SYMBOL_VALUE :
2117 *  Maximum symbol value authorized.
2118 *  Required for proper stack allocation */
2119 #ifndef FSE_MAX_SYMBOL_VALUE
2120 #  define FSE_MAX_SYMBOL_VALUE 255
2121 #endif
2122
2123 /* **************************************************************
2124 *  template functions type & suffix
2125 ****************************************************************/
2126 #define FSE_FUNCTION_TYPE BYTE
2127 #define FSE_FUNCTION_EXTENSION
2128 #define FSE_DECODE_TYPE FSE_decode_t
2129
2130
2131 #endif   /* !FSE_COMMONDEFS_ONLY */
2132
2133
2134 /* ***************************************************************
2135 *  Constants
2136 *****************************************************************/
2137 #define FSE_MAX_TABLELOG  (FSE_MAX_MEMORY_USAGE-2)
2138 #define FSE_MAX_TABLESIZE (1U<<FSE_MAX_TABLELOG)
2139 #define FSE_MAXTABLESIZE_MASK (FSE_MAX_TABLESIZE-1)
2140 #define FSE_DEFAULT_TABLELOG (FSE_DEFAULT_MEMORY_USAGE-2)
2141 #define FSE_MIN_TABLELOG 5
2142
2143 #define FSE_TABLELOG_ABSOLUTE_MAX 15
2144 #if FSE_MAX_TABLELOG > FSE_TABLELOG_ABSOLUTE_MAX
2145 #  error "FSE_MAX_TABLELOG > FSE_TABLELOG_ABSOLUTE_MAX is not supported"
2146 #endif
2147
2148 #define FSE_TABLESTEP(tableSize) ((tableSize>>1) + (tableSize>>3) + 3)
2149
2150
2151 #endif /* FSE_STATIC_LINKING_ONLY */
2152
2153
2154 #if defined (__cplusplus)
2155 }
2156 #endif
2157 /**** ended inlining fse.h ****/
2158 #define HUF_STATIC_LINKING_ONLY  /* HUF_TABLELOG_ABSOLUTEMAX */
2159 /**** start inlining huf.h ****/
2160 /* ******************************************************************
2161  * huff0 huffman codec,
2162  * part of Finite State Entropy library
2163  * Copyright (c) 2013-2020, Yann Collet, Facebook, Inc.
2164  *
2165  * You can contact the author at :
2166  * - Source repository : https://github.com/Cyan4973/FiniteStateEntropy
2167  *
2168  * This source code is licensed under both the BSD-style license (found in the
2169  * LICENSE file in the root directory of this source tree) and the GPLv2 (found
2170  * in the COPYING file in the root directory of this source tree).
2171  * You may select, at your option, one of the above-listed licenses.
2172 ****************************************************************** */
2173
2174 #if defined (__cplusplus)
2175 extern "C" {
2176 #endif
2177
2178 #ifndef HUF_H_298734234
2179 #define HUF_H_298734234
2180
2181 /* *** Dependencies *** */
2182 #include <stddef.h>    /* size_t */
2183
2184
2185 /* *** library symbols visibility *** */
2186 /* Note : when linking with -fvisibility=hidden on gcc, or by default on Visual,
2187  *        HUF symbols remain "private" (internal symbols for library only).
2188  *        Set macro FSE_DLL_EXPORT to 1 if you want HUF symbols visible on DLL interface */
2189 #if defined(FSE_DLL_EXPORT) && (FSE_DLL_EXPORT==1) && defined(__GNUC__) && (__GNUC__ >= 4)
2190 #  define HUF_PUBLIC_API __attribute__ ((visibility ("default")))
2191 #elif defined(FSE_DLL_EXPORT) && (FSE_DLL_EXPORT==1)   /* Visual expected */
2192 #  define HUF_PUBLIC_API __declspec(dllexport)
2193 #elif defined(FSE_DLL_IMPORT) && (FSE_DLL_IMPORT==1)
2194 #  define HUF_PUBLIC_API __declspec(dllimport)  /* not required, just to generate faster code (saves a function pointer load from IAT and an indirect jump) */
2195 #else
2196 #  define HUF_PUBLIC_API
2197 #endif
2198
2199
2200 /* ========================== */
2201 /* ***  simple functions  *** */
2202 /* ========================== */
2203
2204 /** HUF_compress() :
2205  *  Compress content from buffer 'src', of size 'srcSize', into buffer 'dst'.
2206  * 'dst' buffer must be already allocated.
2207  *  Compression runs faster if `dstCapacity` >= HUF_compressBound(srcSize).
2208  * `srcSize` must be <= `HUF_BLOCKSIZE_MAX` == 128 KB.
2209  * @return : size of compressed data (<= `dstCapacity`).
2210  *  Special values : if return == 0, srcData is not compressible => Nothing is stored within dst !!!
2211  *                   if HUF_isError(return), compression failed (more details using HUF_getErrorName())
2212  */
2213 HUF_PUBLIC_API size_t HUF_compress(void* dst, size_t dstCapacity,
2214                              const void* src, size_t srcSize);
2215
2216 /** HUF_decompress() :
2217  *  Decompress HUF data from buffer 'cSrc', of size 'cSrcSize',
2218  *  into already allocated buffer 'dst', of minimum size 'dstSize'.
2219  * `originalSize` : **must** be the ***exact*** size of original (uncompressed) data.
2220  *  Note : in contrast with FSE, HUF_decompress can regenerate
2221  *         RLE (cSrcSize==1) and uncompressed (cSrcSize==dstSize) data,
2222  *         because it knows size to regenerate (originalSize).
2223  * @return : size of regenerated data (== originalSize),
2224  *           or an error code, which can be tested using HUF_isError()
2225  */
2226 HUF_PUBLIC_API size_t HUF_decompress(void* dst,  size_t originalSize,
2227                                const void* cSrc, size_t cSrcSize);
2228
2229
2230 /* ***   Tool functions *** */
2231 #define HUF_BLOCKSIZE_MAX (128 * 1024)                  /**< maximum input size for a single block compressed with HUF_compress */
2232 HUF_PUBLIC_API size_t HUF_compressBound(size_t size);   /**< maximum compressed size (worst case) */
2233
2234 /* Error Management */
2235 HUF_PUBLIC_API unsigned    HUF_isError(size_t code);       /**< tells if a return value is an error code */
2236 HUF_PUBLIC_API const char* HUF_getErrorName(size_t code);  /**< provides error code string (useful for debugging) */
2237
2238
2239 /* ***   Advanced function   *** */
2240
2241 /** HUF_compress2() :
2242  *  Same as HUF_compress(), but offers control over `maxSymbolValue` and `tableLog`.
2243  * `maxSymbolValue` must be <= HUF_SYMBOLVALUE_MAX .
2244  * `tableLog` must be `<= HUF_TABLELOG_MAX` . */
2245 HUF_PUBLIC_API size_t HUF_compress2 (void* dst, size_t dstCapacity,
2246                                const void* src, size_t srcSize,
2247                                unsigned maxSymbolValue, unsigned tableLog);
2248
2249 /** HUF_compress4X_wksp() :
2250  *  Same as HUF_compress2(), but uses externally allocated `workSpace`.
2251  * `workspace` must have minimum alignment of 4, and be at least as large as HUF_WORKSPACE_SIZE */
2252 #define HUF_WORKSPACE_SIZE ((6 << 10) + 256)
2253 #define HUF_WORKSPACE_SIZE_U32 (HUF_WORKSPACE_SIZE / sizeof(U32))
2254 HUF_PUBLIC_API size_t HUF_compress4X_wksp (void* dst, size_t dstCapacity,
2255                                      const void* src, size_t srcSize,
2256                                      unsigned maxSymbolValue, unsigned tableLog,
2257                                      void* workSpace, size_t wkspSize);
2258
2259 #endif   /* HUF_H_298734234 */
2260
2261 /* ******************************************************************
2262  *  WARNING !!
2263  *  The following section contains advanced and experimental definitions
2264  *  which shall never be used in the context of a dynamic library,
2265  *  because they are not guaranteed to remain stable in the future.
2266  *  Only consider them in association with static linking.
2267  * *****************************************************************/
2268 #if defined(HUF_STATIC_LINKING_ONLY) && !defined(HUF_H_HUF_STATIC_LINKING_ONLY)
2269 #define HUF_H_HUF_STATIC_LINKING_ONLY
2270
2271 /* *** Dependencies *** */
2272 /**** skipping file: mem.h ****/
2273
2274
2275 /* *** Constants *** */
2276 #define HUF_TABLELOG_MAX      12      /* max runtime value of tableLog (due to static allocation); can be modified up to HUF_ABSOLUTEMAX_TABLELOG */
2277 #define HUF_TABLELOG_DEFAULT  11      /* default tableLog value when none specified */
2278 #define HUF_SYMBOLVALUE_MAX  255
2279
2280 #define HUF_TABLELOG_ABSOLUTEMAX  15  /* absolute limit of HUF_MAX_TABLELOG. Beyond that value, code does not work */
2281 #if (HUF_TABLELOG_MAX > HUF_TABLELOG_ABSOLUTEMAX)
2282 #  error "HUF_TABLELOG_MAX is too large !"
2283 #endif
2284
2285
2286 /* ****************************************
2287 *  Static allocation
2288 ******************************************/
2289 /* HUF buffer bounds */
2290 #define HUF_CTABLEBOUND 129
2291 #define HUF_BLOCKBOUND(size) (size + (size>>8) + 8)   /* only true when incompressible is pre-filtered with fast heuristic */
2292 #define HUF_COMPRESSBOUND(size) (HUF_CTABLEBOUND + HUF_BLOCKBOUND(size))   /* Macro version, useful for static allocation */
2293
2294 /* static allocation of HUF's Compression Table */
2295 #define HUF_CTABLE_SIZE_U32(maxSymbolValue)   ((maxSymbolValue)+1)   /* Use tables of U32, for proper alignment */
2296 #define HUF_CTABLE_SIZE(maxSymbolValue)       (HUF_CTABLE_SIZE_U32(maxSymbolValue) * sizeof(U32))
2297 #define HUF_CREATE_STATIC_CTABLE(name, maxSymbolValue) \
2298     U32 name##hb[HUF_CTABLE_SIZE_U32(maxSymbolValue)]; \
2299     void* name##hv = &(name##hb); \
2300     HUF_CElt* name = (HUF_CElt*)(name##hv)   /* no final ; */
2301
2302 /* static allocation of HUF's DTable */
2303 typedef U32 HUF_DTable;
2304 #define HUF_DTABLE_SIZE(maxTableLog)   (1 + (1<<(maxTableLog)))
2305 #define HUF_CREATE_STATIC_DTABLEX1(DTable, maxTableLog) \
2306         HUF_DTable DTable[HUF_DTABLE_SIZE((maxTableLog)-1)] = { ((U32)((maxTableLog)-1) * 0x01000001) }
2307 #define HUF_CREATE_STATIC_DTABLEX2(DTable, maxTableLog) \
2308         HUF_DTable DTable[HUF_DTABLE_SIZE(maxTableLog)] = { ((U32)(maxTableLog) * 0x01000001) }
2309
2310
2311 /* ****************************************
2312 *  Advanced decompression functions
2313 ******************************************/
2314 size_t HUF_decompress4X1 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize);   /**< single-symbol decoder */
2315 #ifndef HUF_FORCE_DECOMPRESS_X1
2316 size_t HUF_decompress4X2 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize);   /**< double-symbols decoder */
2317 #endif
2318
2319 size_t HUF_decompress4X_DCtx (HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize);   /**< decodes RLE and uncompressed */
2320 size_t HUF_decompress4X_hufOnly(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /**< considers RLE and uncompressed as errors */
2321 size_t HUF_decompress4X_hufOnly_wksp(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize); /**< considers RLE and uncompressed as errors */
2322 size_t HUF_decompress4X1_DCtx(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize);   /**< single-symbol decoder */
2323 size_t HUF_decompress4X1_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize);   /**< single-symbol decoder */
2324 #ifndef HUF_FORCE_DECOMPRESS_X1
2325 size_t HUF_decompress4X2_DCtx(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize);   /**< double-symbols decoder */
2326 size_t HUF_decompress4X2_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize);   /**< double-symbols decoder */
2327 #endif
2328
2329
2330 /* ****************************************
2331  *  HUF detailed API
2332  * ****************************************/
2333
2334 /*! HUF_compress() does the following:
2335  *  1. count symbol occurrence from source[] into table count[] using FSE_count() (exposed within "fse.h")
2336  *  2. (optional) refine tableLog using HUF_optimalTableLog()
2337  *  3. build Huffman table from count using HUF_buildCTable()
2338  *  4. save Huffman table to memory buffer using HUF_writeCTable()
2339  *  5. encode the data stream using HUF_compress4X_usingCTable()
2340  *
2341  *  The following API allows targeting specific sub-functions for advanced tasks.
2342  *  For example, it's possible to compress several blocks using the same 'CTable',
2343  *  or to save and regenerate 'CTable' using external methods.
2344  */
2345 unsigned HUF_optimalTableLog(unsigned maxTableLog, size_t srcSize, unsigned maxSymbolValue);
2346 typedef struct HUF_CElt_s HUF_CElt;   /* incomplete type */
2347 size_t HUF_buildCTable (HUF_CElt* CTable, const unsigned* count, unsigned maxSymbolValue, unsigned maxNbBits);   /* @return : maxNbBits; CTable and count can overlap. In which case, CTable will overwrite count content */
2348 size_t HUF_writeCTable (void* dst, size_t maxDstSize, const HUF_CElt* CTable, unsigned maxSymbolValue, unsigned huffLog);
2349 size_t HUF_compress4X_usingCTable(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable);
2350 size_t HUF_estimateCompressedSize(const HUF_CElt* CTable, const unsigned* count, unsigned maxSymbolValue);
2351 int HUF_validateCTable(const HUF_CElt* CTable, const unsigned* count, unsigned maxSymbolValue);
2352
2353 typedef enum {
2354    HUF_repeat_none,  /**< Cannot use the previous table */
2355    HUF_repeat_check, /**< Can use the previous table but it must be checked. Note : The previous table must have been constructed by HUF_compress{1, 4}X_repeat */
2356    HUF_repeat_valid  /**< Can use the previous table and it is assumed to be valid */
2357  } HUF_repeat;
2358 /** HUF_compress4X_repeat() :
2359  *  Same as HUF_compress4X_wksp(), but considers using hufTable if *repeat != HUF_repeat_none.
2360  *  If it uses hufTable it does not modify hufTable or repeat.
2361  *  If it doesn't, it sets *repeat = HUF_repeat_none, and it sets hufTable to the table used.
2362  *  If preferRepeat then the old table will always be used if valid. */
2363 size_t HUF_compress4X_repeat(void* dst, size_t dstSize,
2364                        const void* src, size_t srcSize,
2365                        unsigned maxSymbolValue, unsigned tableLog,
2366                        void* workSpace, size_t wkspSize,    /**< `workSpace` must be aligned on 4-bytes boundaries, `wkspSize` must be >= HUF_WORKSPACE_SIZE */
2367                        HUF_CElt* hufTable, HUF_repeat* repeat, int preferRepeat, int bmi2);
2368
2369 /** HUF_buildCTable_wksp() :
2370  *  Same as HUF_buildCTable(), but using externally allocated scratch buffer.
2371  * `workSpace` must be aligned on 4-bytes boundaries, and its size must be >= HUF_CTABLE_WORKSPACE_SIZE.
2372  */
2373 #define HUF_CTABLE_WORKSPACE_SIZE_U32 (2*HUF_SYMBOLVALUE_MAX +1 +1)
2374 #define HUF_CTABLE_WORKSPACE_SIZE (HUF_CTABLE_WORKSPACE_SIZE_U32 * sizeof(unsigned))
2375 size_t HUF_buildCTable_wksp (HUF_CElt* tree,
2376                        const unsigned* count, U32 maxSymbolValue, U32 maxNbBits,
2377                              void* workSpace, size_t wkspSize);
2378
2379 /*! HUF_readStats() :
2380  *  Read compact Huffman tree, saved by HUF_writeCTable().
2381  * `huffWeight` is destination buffer.
2382  * @return : size read from `src` , or an error Code .
2383  *  Note : Needed by HUF_readCTable() and HUF_readDTableXn() . */
2384 size_t HUF_readStats(BYTE* huffWeight, size_t hwSize,
2385                      U32* rankStats, U32* nbSymbolsPtr, U32* tableLogPtr,
2386                      const void* src, size_t srcSize);
2387
2388 /** HUF_readCTable() :
2389  *  Loading a CTable saved with HUF_writeCTable() */
2390 size_t HUF_readCTable (HUF_CElt* CTable, unsigned* maxSymbolValuePtr, const void* src, size_t srcSize, unsigned *hasZeroWeights);
2391
2392 /** HUF_getNbBits() :
2393  *  Read nbBits from CTable symbolTable, for symbol `symbolValue` presumed <= HUF_SYMBOLVALUE_MAX
2394  *  Note 1 : is not inlined, as HUF_CElt definition is private
2395  *  Note 2 : const void* used, so that it can provide a statically allocated table as argument (which uses type U32) */
2396 U32 HUF_getNbBits(const void* symbolTable, U32 symbolValue);
2397
2398 /*
2399  * HUF_decompress() does the following:
2400  * 1. select the decompression algorithm (X1, X2) based on pre-computed heuristics
2401  * 2. build Huffman table from save, using HUF_readDTableX?()
2402  * 3. decode 1 or 4 segments in parallel using HUF_decompress?X?_usingDTable()
2403  */
2404
2405 /** HUF_selectDecoder() :
2406  *  Tells which decoder is likely to decode faster,
2407  *  based on a set of pre-computed metrics.
2408  * @return : 0==HUF_decompress4X1, 1==HUF_decompress4X2 .
2409  *  Assumption : 0 < dstSize <= 128 KB */
2410 U32 HUF_selectDecoder (size_t dstSize, size_t cSrcSize);
2411
2412 /**
2413  *  The minimum workspace size for the `workSpace` used in
2414  *  HUF_readDTableX1_wksp() and HUF_readDTableX2_wksp().
2415  *
2416  *  The space used depends on HUF_TABLELOG_MAX, ranging from ~1500 bytes when
2417  *  HUF_TABLE_LOG_MAX=12 to ~1850 bytes when HUF_TABLE_LOG_MAX=15.
2418  *  Buffer overflow errors may potentially occur if code modifications result in
2419  *  a required workspace size greater than that specified in the following
2420  *  macro.
2421  */
2422 #define HUF_DECOMPRESS_WORKSPACE_SIZE (2 << 10)
2423 #define HUF_DECOMPRESS_WORKSPACE_SIZE_U32 (HUF_DECOMPRESS_WORKSPACE_SIZE / sizeof(U32))
2424
2425 #ifndef HUF_FORCE_DECOMPRESS_X2
2426 size_t HUF_readDTableX1 (HUF_DTable* DTable, const void* src, size_t srcSize);
2427 size_t HUF_readDTableX1_wksp (HUF_DTable* DTable, const void* src, size_t srcSize, void* workSpace, size_t wkspSize);
2428 #endif
2429 #ifndef HUF_FORCE_DECOMPRESS_X1
2430 size_t HUF_readDTableX2 (HUF_DTable* DTable, const void* src, size_t srcSize);
2431 size_t HUF_readDTableX2_wksp (HUF_DTable* DTable, const void* src, size_t srcSize, void* workSpace, size_t wkspSize);
2432 #endif
2433
2434 size_t HUF_decompress4X_usingDTable(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable);
2435 #ifndef HUF_FORCE_DECOMPRESS_X2
2436 size_t HUF_decompress4X1_usingDTable(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable);
2437 #endif
2438 #ifndef HUF_FORCE_DECOMPRESS_X1
2439 size_t HUF_decompress4X2_usingDTable(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable);
2440 #endif
2441
2442
2443 /* ====================== */
2444 /* single stream variants */
2445 /* ====================== */
2446
2447 size_t HUF_compress1X (void* dst, size_t dstSize, const void* src, size_t srcSize, unsigned maxSymbolValue, unsigned tableLog);
2448 size_t HUF_compress1X_wksp (void* dst, size_t dstSize, const void* src, size_t srcSize, unsigned maxSymbolValue, unsigned tableLog, void* workSpace, size_t wkspSize);  /**< `workSpace` must be a table of at least HUF_WORKSPACE_SIZE_U32 unsigned */
2449 size_t HUF_compress1X_usingCTable(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable);
2450 /** HUF_compress1X_repeat() :
2451  *  Same as HUF_compress1X_wksp(), but considers using hufTable if *repeat != HUF_repeat_none.
2452  *  If it uses hufTable it does not modify hufTable or repeat.
2453  *  If it doesn't, it sets *repeat = HUF_repeat_none, and it sets hufTable to the table used.
2454  *  If preferRepeat then the old table will always be used if valid. */
2455 size_t HUF_compress1X_repeat(void* dst, size_t dstSize,
2456                        const void* src, size_t srcSize,
2457                        unsigned maxSymbolValue, unsigned tableLog,
2458                        void* workSpace, size_t wkspSize,   /**< `workSpace` must be aligned on 4-bytes boundaries, `wkspSize` must be >= HUF_WORKSPACE_SIZE */
2459                        HUF_CElt* hufTable, HUF_repeat* repeat, int preferRepeat, int bmi2);
2460
2461 size_t HUF_decompress1X1 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize);   /* single-symbol decoder */
2462 #ifndef HUF_FORCE_DECOMPRESS_X1
2463 size_t HUF_decompress1X2 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize);   /* double-symbol decoder */
2464 #endif
2465
2466 size_t HUF_decompress1X_DCtx (HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize);
2467 size_t HUF_decompress1X_DCtx_wksp (HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize);
2468 #ifndef HUF_FORCE_DECOMPRESS_X2
2469 size_t HUF_decompress1X1_DCtx(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize);   /**< single-symbol decoder */
2470 size_t HUF_decompress1X1_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize);   /**< single-symbol decoder */
2471 #endif
2472 #ifndef HUF_FORCE_DECOMPRESS_X1
2473 size_t HUF_decompress1X2_DCtx(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize);   /**< double-symbols decoder */
2474 size_t HUF_decompress1X2_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize);   /**< double-symbols decoder */
2475 #endif
2476
2477 size_t HUF_decompress1X_usingDTable(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable);   /**< automatic selection of sing or double symbol decoder, based on DTable */
2478 #ifndef HUF_FORCE_DECOMPRESS_X2
2479 size_t HUF_decompress1X1_usingDTable(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable);
2480 #endif
2481 #ifndef HUF_FORCE_DECOMPRESS_X1
2482 size_t HUF_decompress1X2_usingDTable(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable);
2483 #endif
2484
2485 /* BMI2 variants.
2486  * If the CPU has BMI2 support, pass bmi2=1, otherwise pass bmi2=0.
2487  */
2488 size_t HUF_decompress1X_usingDTable_bmi2(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable, int bmi2);
2489 #ifndef HUF_FORCE_DECOMPRESS_X2
2490 size_t HUF_decompress1X1_DCtx_wksp_bmi2(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize, int bmi2);
2491 #endif
2492 size_t HUF_decompress4X_usingDTable_bmi2(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable, int bmi2);
2493 size_t HUF_decompress4X_hufOnly_wksp_bmi2(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize, int bmi2);
2494
2495 #endif /* HUF_STATIC_LINKING_ONLY */
2496
2497 #if defined (__cplusplus)
2498 }
2499 #endif
2500 /**** ended inlining huf.h ****/
2501
2502
2503 /*===   Version   ===*/
2504 unsigned FSE_versionNumber(void) { return FSE_VERSION_NUMBER; }
2505
2506
2507 /*===   Error Management   ===*/
2508 unsigned FSE_isError(size_t code) { return ERR_isError(code); }
2509 const char* FSE_getErrorName(size_t code) { return ERR_getErrorName(code); }
2510
2511 unsigned HUF_isError(size_t code) { return ERR_isError(code); }
2512 const char* HUF_getErrorName(size_t code) { return ERR_getErrorName(code); }
2513
2514
2515 /*-**************************************************************
2516 *  FSE NCount encoding-decoding
2517 ****************************************************************/
2518 size_t FSE_readNCount (short* normalizedCounter, unsigned* maxSVPtr, unsigned* tableLogPtr,
2519                  const void* headerBuffer, size_t hbSize)
2520 {
2521     const BYTE* const istart = (const BYTE*) headerBuffer;
2522     const BYTE* const iend = istart + hbSize;
2523     const BYTE* ip = istart;
2524     int nbBits;
2525     int remaining;
2526     int threshold;
2527     U32 bitStream;
2528     int bitCount;
2529     unsigned charnum = 0;
2530     int previous0 = 0;
2531
2532     if (hbSize < 4) {
2533         /* This function only works when hbSize >= 4 */
2534         char buffer[4];
2535         memset(buffer, 0, sizeof(buffer));
2536         memcpy(buffer, headerBuffer, hbSize);
2537         {   size_t const countSize = FSE_readNCount(normalizedCounter, maxSVPtr, tableLogPtr,
2538                                                     buffer, sizeof(buffer));
2539             if (FSE_isError(countSize)) return countSize;
2540             if (countSize > hbSize) return ERROR(corruption_detected);
2541             return countSize;
2542     }   }
2543     assert(hbSize >= 4);
2544
2545     /* init */
2546     memset(normalizedCounter, 0, (*maxSVPtr+1) * sizeof(normalizedCounter[0]));   /* all symbols not present in NCount have a frequency of 0 */
2547     bitStream = MEM_readLE32(ip);
2548     nbBits = (bitStream & 0xF) + FSE_MIN_TABLELOG;   /* extract tableLog */
2549     if (nbBits > FSE_TABLELOG_ABSOLUTE_MAX) return ERROR(tableLog_tooLarge);
2550     bitStream >>= 4;
2551     bitCount = 4;
2552     *tableLogPtr = nbBits;
2553     remaining = (1<<nbBits)+1;
2554     threshold = 1<<nbBits;
2555     nbBits++;
2556
2557     while ((remaining>1) & (charnum<=*maxSVPtr)) {
2558         if (previous0) {
2559             unsigned n0 = charnum;
2560             while ((bitStream & 0xFFFF) == 0xFFFF) {
2561                 n0 += 24;
2562                 if (ip < iend-5) {
2563                     ip += 2;
2564                     bitStream = MEM_readLE32(ip) >> bitCount;
2565                 } else {
2566                     bitStream >>= 16;
2567                     bitCount   += 16;
2568             }   }
2569             while ((bitStream & 3) == 3) {
2570                 n0 += 3;
2571                 bitStream >>= 2;
2572                 bitCount += 2;
2573             }
2574             n0 += bitStream & 3;
2575             bitCount += 2;
2576             if (n0 > *maxSVPtr) return ERROR(maxSymbolValue_tooSmall);
2577             while (charnum < n0) normalizedCounter[charnum++] = 0;
2578             if ((ip <= iend-7) || (ip + (bitCount>>3) <= iend-4)) {
2579                 assert((bitCount >> 3) <= 3); /* For first condition to work */
2580                 ip += bitCount>>3;
2581                 bitCount &= 7;
2582                 bitStream = MEM_readLE32(ip) >> bitCount;
2583             } else {
2584                 bitStream >>= 2;
2585         }   }
2586         {   int const max = (2*threshold-1) - remaining;
2587             int count;
2588
2589             if ((bitStream & (threshold-1)) < (U32)max) {
2590                 count = bitStream & (threshold-1);
2591                 bitCount += nbBits-1;
2592             } else {
2593                 count = bitStream & (2*threshold-1);
2594                 if (count >= threshold) count -= max;
2595                 bitCount += nbBits;
2596             }
2597
2598             count--;   /* extra accuracy */
2599             remaining -= count < 0 ? -count : count;   /* -1 means +1 */
2600             normalizedCounter[charnum++] = (short)count;
2601             previous0 = !count;
2602             while (remaining < threshold) {
2603                 nbBits--;
2604                 threshold >>= 1;
2605             }
2606
2607             if ((ip <= iend-7) || (ip + (bitCount>>3) <= iend-4)) {
2608                 ip += bitCount>>3;
2609                 bitCount &= 7;
2610             } else {
2611                 bitCount -= (int)(8 * (iend - 4 - ip));
2612                 ip = iend - 4;
2613             }
2614             bitStream = MEM_readLE32(ip) >> (bitCount & 31);
2615     }   }   /* while ((remaining>1) & (charnum<=*maxSVPtr)) */
2616     if (remaining != 1) return ERROR(corruption_detected);
2617     if (bitCount > 32) return ERROR(corruption_detected);
2618     *maxSVPtr = charnum-1;
2619
2620     ip += (bitCount+7)>>3;
2621     return ip-istart;
2622 }
2623
2624
2625 /*! HUF_readStats() :
2626     Read compact Huffman tree, saved by HUF_writeCTable().
2627     `huffWeight` is destination buffer.
2628     `rankStats` is assumed to be a table of at least HUF_TABLELOG_MAX U32.
2629     @return : size read from `src` , or an error Code .
2630     Note : Needed by HUF_readCTable() and HUF_readDTableX?() .
2631 */
2632 size_t HUF_readStats(BYTE* huffWeight, size_t hwSize, U32* rankStats,
2633                      U32* nbSymbolsPtr, U32* tableLogPtr,
2634                      const void* src, size_t srcSize)
2635 {
2636     U32 weightTotal;
2637     const BYTE* ip = (const BYTE*) src;
2638     size_t iSize;
2639     size_t oSize;
2640
2641     if (!srcSize) return ERROR(srcSize_wrong);
2642     iSize = ip[0];
2643     /* memset(huffWeight, 0, hwSize);   *//* is not necessary, even though some analyzer complain ... */
2644
2645     if (iSize >= 128) {  /* special header */
2646         oSize = iSize - 127;
2647         iSize = ((oSize+1)/2);
2648         if (iSize+1 > srcSize) return ERROR(srcSize_wrong);
2649         if (oSize >= hwSize) return ERROR(corruption_detected);
2650         ip += 1;
2651         {   U32 n;
2652             for (n=0; n<oSize; n+=2) {
2653                 huffWeight[n]   = ip[n/2] >> 4;
2654                 huffWeight[n+1] = ip[n/2] & 15;
2655     }   }   }
2656     else  {   /* header compressed with FSE (normal case) */
2657         FSE_DTable fseWorkspace[FSE_DTABLE_SIZE_U32(6)];  /* 6 is max possible tableLog for HUF header (maybe even 5, to be tested) */
2658         if (iSize+1 > srcSize) return ERROR(srcSize_wrong);
2659         oSize = FSE_decompress_wksp(huffWeight, hwSize-1, ip+1, iSize, fseWorkspace, 6);   /* max (hwSize-1) values decoded, as last one is implied */
2660         if (FSE_isError(oSize)) return oSize;
2661     }
2662
2663     /* collect weight stats */
2664     memset(rankStats, 0, (HUF_TABLELOG_MAX + 1) * sizeof(U32));
2665     weightTotal = 0;
2666     {   U32 n; for (n=0; n<oSize; n++) {
2667             if (huffWeight[n] >= HUF_TABLELOG_MAX) return ERROR(corruption_detected);
2668             rankStats[huffWeight[n]]++;
2669             weightTotal += (1 << huffWeight[n]) >> 1;
2670     }   }
2671     if (weightTotal == 0) return ERROR(corruption_detected);
2672
2673     /* get last non-null symbol weight (implied, total must be 2^n) */
2674     {   U32 const tableLog = BIT_highbit32(weightTotal) + 1;
2675         if (tableLog > HUF_TABLELOG_MAX) return ERROR(corruption_detected);
2676         *tableLogPtr = tableLog;
2677         /* determine last weight */
2678         {   U32 const total = 1 << tableLog;
2679             U32 const rest = total - weightTotal;
2680             U32 const verif = 1 << BIT_highbit32(rest);
2681             U32 const lastWeight = BIT_highbit32(rest) + 1;
2682             if (verif != rest) return ERROR(corruption_detected);    /* last value must be a clean power of 2 */
2683             huffWeight[oSize] = (BYTE)lastWeight;
2684             rankStats[lastWeight]++;
2685     }   }
2686
2687     /* check tree construction validity */
2688     if ((rankStats[1] < 2) || (rankStats[1] & 1)) return ERROR(corruption_detected);   /* by construction : at least 2 elts of rank 1, must be even */
2689
2690     /* results */
2691     *nbSymbolsPtr = (U32)(oSize+1);
2692     return iSize+1;
2693 }
2694 /**** ended inlining common/entropy_common.c ****/
2695 /**** start inlining common/error_private.c ****/
2696 /*
2697  * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
2698  * All rights reserved.
2699  *
2700  * This source code is licensed under both the BSD-style license (found in the
2701  * LICENSE file in the root directory of this source tree) and the GPLv2 (found
2702  * in the COPYING file in the root directory of this source tree).
2703  * You may select, at your option, one of the above-listed licenses.
2704  */
2705
2706 /* The purpose of this file is to have a single list of error strings embedded in binary */
2707
2708 /**** skipping file: error_private.h ****/
2709
2710 const char* ERR_getErrorString(ERR_enum code)
2711 {
2712 #ifdef ZSTD_STRIP_ERROR_STRINGS
2713     (void)code;
2714     return "Error strings stripped";
2715 #else
2716     static const char* const notErrorCode = "Unspecified error code";
2717     switch( code )
2718     {
2719     case PREFIX(no_error): return "No error detected";
2720     case PREFIX(GENERIC):  return "Error (generic)";
2721     case PREFIX(prefix_unknown): return "Unknown frame descriptor";
2722     case PREFIX(version_unsupported): return "Version not supported";
2723     case PREFIX(frameParameter_unsupported): return "Unsupported frame parameter";
2724     case PREFIX(frameParameter_windowTooLarge): return "Frame requires too much memory for decoding";
2725     case PREFIX(corruption_detected): return "Corrupted block detected";
2726     case PREFIX(checksum_wrong): return "Restored data doesn't match checksum";
2727     case PREFIX(parameter_unsupported): return "Unsupported parameter";
2728     case PREFIX(parameter_outOfBound): return "Parameter is out of bound";
2729     case PREFIX(init_missing): return "Context should be init first";
2730     case PREFIX(memory_allocation): return "Allocation error : not enough memory";
2731     case PREFIX(workSpace_tooSmall): return "workSpace buffer is not large enough";
2732     case PREFIX(stage_wrong): return "Operation not authorized at current processing stage";
2733     case PREFIX(tableLog_tooLarge): return "tableLog requires too much memory : unsupported";
2734     case PREFIX(maxSymbolValue_tooLarge): return "Unsupported max Symbol Value : too large";
2735     case PREFIX(maxSymbolValue_tooSmall): return "Specified maxSymbolValue is too small";
2736     case PREFIX(dictionary_corrupted): return "Dictionary is corrupted";
2737     case PREFIX(dictionary_wrong): return "Dictionary mismatch";
2738     case PREFIX(dictionaryCreation_failed): return "Cannot create Dictionary from provided samples";
2739     case PREFIX(dstSize_tooSmall): return "Destination buffer is too small";
2740     case PREFIX(srcSize_wrong): return "Src size is incorrect";
2741     case PREFIX(dstBuffer_null): return "Operation on NULL destination buffer";
2742         /* following error codes are not stable and may be removed or changed in a future version */
2743     case PREFIX(frameIndex_tooLarge): return "Frame index is too large";
2744     case PREFIX(seekableIO): return "An I/O error occurred when reading/seeking";
2745     case PREFIX(dstBuffer_wrong): return "Destination buffer is wrong";
2746     case PREFIX(maxCode):
2747     default: return notErrorCode;
2748     }
2749 #endif
2750 }
2751 /**** ended inlining common/error_private.c ****/
2752 /**** start inlining common/fse_decompress.c ****/
2753 /* ******************************************************************
2754  * FSE : Finite State Entropy decoder
2755  * Copyright (c) 2013-2020, Yann Collet, Facebook, Inc.
2756  *
2757  *  You can contact the author at :
2758  *  - FSE source repository : https://github.com/Cyan4973/FiniteStateEntropy
2759  *  - Public forum : https://groups.google.com/forum/#!forum/lz4c
2760  *
2761  * This source code is licensed under both the BSD-style license (found in the
2762  * LICENSE file in the root directory of this source tree) and the GPLv2 (found
2763  * in the COPYING file in the root directory of this source tree).
2764  * You may select, at your option, one of the above-listed licenses.
2765 ****************************************************************** */
2766
2767
2768 /* **************************************************************
2769 *  Includes
2770 ****************************************************************/
2771 #include <stdlib.h>     /* malloc, free, qsort */
2772 #include <string.h>     /* memcpy, memset */
2773 /**** skipping file: bitstream.h ****/
2774 /**** skipping file: compiler.h ****/
2775 #define FSE_STATIC_LINKING_ONLY
2776 /**** skipping file: fse.h ****/
2777 /**** skipping file: error_private.h ****/
2778
2779
2780 /* **************************************************************
2781 *  Error Management
2782 ****************************************************************/
2783 #define FSE_isError ERR_isError
2784 #define FSE_STATIC_ASSERT(c) DEBUG_STATIC_ASSERT(c)   /* use only *after* variable declarations */
2785
2786
2787 /* **************************************************************
2788 *  Templates
2789 ****************************************************************/
2790 /*
2791   designed to be included
2792   for type-specific functions (template emulation in C)
2793   Objective is to write these functions only once, for improved maintenance
2794 */
2795
2796 /* safety checks */
2797 #ifndef FSE_FUNCTION_EXTENSION
2798 #  error "FSE_FUNCTION_EXTENSION must be defined"
2799 #endif
2800 #ifndef FSE_FUNCTION_TYPE
2801 #  error "FSE_FUNCTION_TYPE must be defined"
2802 #endif
2803
2804 /* Function names */
2805 #define FSE_CAT(X,Y) X##Y
2806 #define FSE_FUNCTION_NAME(X,Y) FSE_CAT(X,Y)
2807 #define FSE_TYPE_NAME(X,Y) FSE_CAT(X,Y)
2808
2809
2810 /* Function templates */
2811 FSE_DTable* FSE_createDTable (unsigned tableLog)
2812 {
2813     if (tableLog > FSE_TABLELOG_ABSOLUTE_MAX) tableLog = FSE_TABLELOG_ABSOLUTE_MAX;
2814     return (FSE_DTable*)malloc( FSE_DTABLE_SIZE_U32(tableLog) * sizeof (U32) );
2815 }
2816
2817 void FSE_freeDTable (FSE_DTable* dt)
2818 {
2819     free(dt);
2820 }
2821
2822 size_t FSE_buildDTable(FSE_DTable* dt, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog)
2823 {
2824     void* const tdPtr = dt+1;   /* because *dt is unsigned, 32-bits aligned on 32-bits */
2825     FSE_DECODE_TYPE* const tableDecode = (FSE_DECODE_TYPE*) (tdPtr);
2826     U16 symbolNext[FSE_MAX_SYMBOL_VALUE+1];
2827
2828     U32 const maxSV1 = maxSymbolValue + 1;
2829     U32 const tableSize = 1 << tableLog;
2830     U32 highThreshold = tableSize-1;
2831
2832     /* Sanity Checks */
2833     if (maxSymbolValue > FSE_MAX_SYMBOL_VALUE) return ERROR(maxSymbolValue_tooLarge);
2834     if (tableLog > FSE_MAX_TABLELOG) return ERROR(tableLog_tooLarge);
2835
2836     /* Init, lay down lowprob symbols */
2837     {   FSE_DTableHeader DTableH;
2838         DTableH.tableLog = (U16)tableLog;
2839         DTableH.fastMode = 1;
2840         {   S16 const largeLimit= (S16)(1 << (tableLog-1));
2841             U32 s;
2842             for (s=0; s<maxSV1; s++) {
2843                 if (normalizedCounter[s]==-1) {
2844                     tableDecode[highThreshold--].symbol = (FSE_FUNCTION_TYPE)s;
2845                     symbolNext[s] = 1;
2846                 } else {
2847                     if (normalizedCounter[s] >= largeLimit) DTableH.fastMode=0;
2848                     symbolNext[s] = normalizedCounter[s];
2849         }   }   }
2850         memcpy(dt, &DTableH, sizeof(DTableH));
2851     }
2852
2853     /* Spread symbols */
2854     {   U32 const tableMask = tableSize-1;
2855         U32 const step = FSE_TABLESTEP(tableSize);
2856         U32 s, position = 0;
2857         for (s=0; s<maxSV1; s++) {
2858             int i;
2859             for (i=0; i<normalizedCounter[s]; i++) {
2860                 tableDecode[position].symbol = (FSE_FUNCTION_TYPE)s;
2861                 position = (position + step) & tableMask;
2862                 while (position > highThreshold) position = (position + step) & tableMask;   /* lowprob area */
2863         }   }
2864         if (position!=0) return ERROR(GENERIC);   /* position must reach all cells once, otherwise normalizedCounter is incorrect */
2865     }
2866
2867     /* Build Decoding table */
2868     {   U32 u;
2869         for (u=0; u<tableSize; u++) {
2870             FSE_FUNCTION_TYPE const symbol = (FSE_FUNCTION_TYPE)(tableDecode[u].symbol);
2871             U32 const nextState = symbolNext[symbol]++;
2872             tableDecode[u].nbBits = (BYTE) (tableLog - BIT_highbit32(nextState) );
2873             tableDecode[u].newState = (U16) ( (nextState << tableDecode[u].nbBits) - tableSize);
2874     }   }
2875
2876     return 0;
2877 }
2878
2879
2880 #ifndef FSE_COMMONDEFS_ONLY
2881
2882 /*-*******************************************************
2883 *  Decompression (Byte symbols)
2884 *********************************************************/
2885 size_t FSE_buildDTable_rle (FSE_DTable* dt, BYTE symbolValue)
2886 {
2887     void* ptr = dt;
2888     FSE_DTableHeader* const DTableH = (FSE_DTableHeader*)ptr;
2889     void* dPtr = dt + 1;
2890     FSE_decode_t* const cell = (FSE_decode_t*)dPtr;
2891
2892     DTableH->tableLog = 0;
2893     DTableH->fastMode = 0;
2894
2895     cell->newState = 0;
2896     cell->symbol = symbolValue;
2897     cell->nbBits = 0;
2898
2899     return 0;
2900 }
2901
2902
2903 size_t FSE_buildDTable_raw (FSE_DTable* dt, unsigned nbBits)
2904 {
2905     void* ptr = dt;
2906     FSE_DTableHeader* const DTableH = (FSE_DTableHeader*)ptr;
2907     void* dPtr = dt + 1;
2908     FSE_decode_t* const dinfo = (FSE_decode_t*)dPtr;
2909     const unsigned tableSize = 1 << nbBits;
2910     const unsigned tableMask = tableSize - 1;
2911     const unsigned maxSV1 = tableMask+1;
2912     unsigned s;
2913
2914     /* Sanity checks */
2915     if (nbBits < 1) return ERROR(GENERIC);         /* min size */
2916
2917     /* Build Decoding Table */
2918     DTableH->tableLog = (U16)nbBits;
2919     DTableH->fastMode = 1;
2920     for (s=0; s<maxSV1; s++) {
2921         dinfo[s].newState = 0;
2922         dinfo[s].symbol = (BYTE)s;
2923         dinfo[s].nbBits = (BYTE)nbBits;
2924     }
2925
2926     return 0;
2927 }
2928
2929 FORCE_INLINE_TEMPLATE size_t FSE_decompress_usingDTable_generic(
2930           void* dst, size_t maxDstSize,
2931     const void* cSrc, size_t cSrcSize,
2932     const FSE_DTable* dt, const unsigned fast)
2933 {
2934     BYTE* const ostart = (BYTE*) dst;
2935     BYTE* op = ostart;
2936     BYTE* const omax = op + maxDstSize;
2937     BYTE* const olimit = omax-3;
2938
2939     BIT_DStream_t bitD;
2940     FSE_DState_t state1;
2941     FSE_DState_t state2;
2942
2943     /* Init */
2944     CHECK_F(BIT_initDStream(&bitD, cSrc, cSrcSize));
2945
2946     FSE_initDState(&state1, &bitD, dt);
2947     FSE_initDState(&state2, &bitD, dt);
2948
2949 #define FSE_GETSYMBOL(statePtr) fast ? FSE_decodeSymbolFast(statePtr, &bitD) : FSE_decodeSymbol(statePtr, &bitD)
2950
2951     /* 4 symbols per loop */
2952     for ( ; (BIT_reloadDStream(&bitD)==BIT_DStream_unfinished) & (op<olimit) ; op+=4) {
2953         op[0] = FSE_GETSYMBOL(&state1);
2954
2955         if (FSE_MAX_TABLELOG*2+7 > sizeof(bitD.bitContainer)*8)    /* This test must be static */
2956             BIT_reloadDStream(&bitD);
2957
2958         op[1] = FSE_GETSYMBOL(&state2);
2959
2960         if (FSE_MAX_TABLELOG*4+7 > sizeof(bitD.bitContainer)*8)    /* This test must be static */
2961             { if (BIT_reloadDStream(&bitD) > BIT_DStream_unfinished) { op+=2; break; } }
2962
2963         op[2] = FSE_GETSYMBOL(&state1);
2964
2965         if (FSE_MAX_TABLELOG*2+7 > sizeof(bitD.bitContainer)*8)    /* This test must be static */
2966             BIT_reloadDStream(&bitD);
2967
2968         op[3] = FSE_GETSYMBOL(&state2);
2969     }
2970
2971     /* tail */
2972     /* note : BIT_reloadDStream(&bitD) >= FSE_DStream_partiallyFilled; Ends at exactly BIT_DStream_completed */
2973     while (1) {
2974         if (op>(omax-2)) return ERROR(dstSize_tooSmall);
2975         *op++ = FSE_GETSYMBOL(&state1);
2976         if (BIT_reloadDStream(&bitD)==BIT_DStream_overflow) {
2977             *op++ = FSE_GETSYMBOL(&state2);
2978             break;
2979         }
2980
2981         if (op>(omax-2)) return ERROR(dstSize_tooSmall);
2982         *op++ = FSE_GETSYMBOL(&state2);
2983         if (BIT_reloadDStream(&bitD)==BIT_DStream_overflow) {
2984             *op++ = FSE_GETSYMBOL(&state1);
2985             break;
2986     }   }
2987
2988     return op-ostart;
2989 }
2990
2991
2992 size_t FSE_decompress_usingDTable(void* dst, size_t originalSize,
2993                             const void* cSrc, size_t cSrcSize,
2994                             const FSE_DTable* dt)
2995 {
2996     const void* ptr = dt;
2997     const FSE_DTableHeader* DTableH = (const FSE_DTableHeader*)ptr;
2998     const U32 fastMode = DTableH->fastMode;
2999
3000     /* select fast mode (static) */
3001     if (fastMode) return FSE_decompress_usingDTable_generic(dst, originalSize, cSrc, cSrcSize, dt, 1);
3002     return FSE_decompress_usingDTable_generic(dst, originalSize, cSrc, cSrcSize, dt, 0);
3003 }
3004
3005
3006 size_t FSE_decompress_wksp(void* dst, size_t dstCapacity, const void* cSrc, size_t cSrcSize, FSE_DTable* workSpace, unsigned maxLog)
3007 {
3008     const BYTE* const istart = (const BYTE*)cSrc;
3009     const BYTE* ip = istart;
3010     short counting[FSE_MAX_SYMBOL_VALUE+1];
3011     unsigned tableLog;
3012     unsigned maxSymbolValue = FSE_MAX_SYMBOL_VALUE;
3013
3014     /* normal FSE decoding mode */
3015     size_t const NCountLength = FSE_readNCount (counting, &maxSymbolValue, &tableLog, istart, cSrcSize);
3016     if (FSE_isError(NCountLength)) return NCountLength;
3017     /* if (NCountLength >= cSrcSize) return ERROR(srcSize_wrong); */  /* too small input size; supposed to be already checked in NCountLength, only remaining case : NCountLength==cSrcSize */
3018     if (tableLog > maxLog) return ERROR(tableLog_tooLarge);
3019     ip += NCountLength;
3020     cSrcSize -= NCountLength;
3021
3022     CHECK_F( FSE_buildDTable (workSpace, counting, maxSymbolValue, tableLog) );
3023
3024     return FSE_decompress_usingDTable (dst, dstCapacity, ip, cSrcSize, workSpace);   /* always return, even if it is an error code */
3025 }
3026
3027
3028 typedef FSE_DTable DTable_max_t[FSE_DTABLE_SIZE_U32(FSE_MAX_TABLELOG)];
3029
3030 size_t FSE_decompress(void* dst, size_t dstCapacity, const void* cSrc, size_t cSrcSize)
3031 {
3032     DTable_max_t dt;   /* Static analyzer seems unable to understand this table will be properly initialized later */
3033     return FSE_decompress_wksp(dst, dstCapacity, cSrc, cSrcSize, dt, FSE_MAX_TABLELOG);
3034 }
3035
3036
3037
3038 #endif   /* FSE_COMMONDEFS_ONLY */
3039 /**** ended inlining common/fse_decompress.c ****/
3040 /**** start inlining common/pool.c ****/
3041 /*
3042  * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
3043  * All rights reserved.
3044  *
3045  * This source code is licensed under both the BSD-style license (found in the
3046  * LICENSE file in the root directory of this source tree) and the GPLv2 (found
3047  * in the COPYING file in the root directory of this source tree).
3048  * You may select, at your option, one of the above-listed licenses.
3049  */
3050
3051
3052 /* ======   Dependencies   ======= */
3053 #include <stddef.h>    /* size_t */
3054 /**** skipping file: debug.h ****/
3055 /**** start inlining zstd_internal.h ****/
3056 /*
3057  * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
3058  * All rights reserved.
3059  *
3060  * This source code is licensed under both the BSD-style license (found in the
3061  * LICENSE file in the root directory of this source tree) and the GPLv2 (found
3062  * in the COPYING file in the root directory of this source tree).
3063  * You may select, at your option, one of the above-listed licenses.
3064  */
3065
3066 #ifndef ZSTD_CCOMMON_H_MODULE
3067 #define ZSTD_CCOMMON_H_MODULE
3068
3069 /* this module contains definitions which must be identical
3070  * across compression, decompression and dictBuilder.
3071  * It also contains a few functions useful to at least 2 of them
3072  * and which benefit from being inlined */
3073
3074 /*-*************************************
3075 *  Dependencies
3076 ***************************************/
3077 #if !defined(ZSTD_NO_INTRINSICS) && defined(__ARM_NEON)
3078 #include <arm_neon.h>
3079 #endif
3080 /**** skipping file: compiler.h ****/
3081 /**** skipping file: mem.h ****/
3082 /**** skipping file: debug.h ****/
3083 /**** skipping file: error_private.h ****/
3084 #define ZSTD_STATIC_LINKING_ONLY
3085 /**** start inlining ../zstd.h ****/
3086 /*
3087  * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
3088  * All rights reserved.
3089  *
3090  * This source code is licensed under both the BSD-style license (found in the
3091  * LICENSE file in the root directory of this source tree) and the GPLv2 (found
3092  * in the COPYING file in the root directory of this source tree).
3093  * You may select, at your option, one of the above-listed licenses.
3094  */
3095 #if defined (__cplusplus)
3096 extern "C" {
3097 #endif
3098
3099 #ifndef ZSTD_H_235446
3100 #define ZSTD_H_235446
3101
3102 /* ======   Dependency   ======*/
3103 #include <limits.h>   /* INT_MAX */
3104 #include <stddef.h>   /* size_t */
3105
3106
3107 /* =====   ZSTDLIB_API : control library symbols visibility   ===== */
3108 #ifndef ZSTDLIB_VISIBILITY
3109 #  if defined(__GNUC__) && (__GNUC__ >= 4)
3110 #    define ZSTDLIB_VISIBILITY __attribute__ ((visibility ("default")))
3111 #  else
3112 #    define ZSTDLIB_VISIBILITY
3113 #  endif
3114 #endif
3115 #if defined(ZSTD_DLL_EXPORT) && (ZSTD_DLL_EXPORT==1)
3116 #  define ZSTDLIB_API __declspec(dllexport) ZSTDLIB_VISIBILITY
3117 #elif defined(ZSTD_DLL_IMPORT) && (ZSTD_DLL_IMPORT==1)
3118 #  define ZSTDLIB_API __declspec(dllimport) ZSTDLIB_VISIBILITY /* It isn't required but allows to generate better code, saving a function pointer load from the IAT and an indirect jump.*/
3119 #else
3120 #  define ZSTDLIB_API ZSTDLIB_VISIBILITY
3121 #endif
3122
3123
3124 /*******************************************************************************
3125   Introduction
3126
3127   zstd, short for Zstandard, is a fast lossless compression algorithm, targeting
3128   real-time compression scenarios at zlib-level and better compression ratios.
3129   The zstd compression library provides in-memory compression and decompression
3130   functions.
3131
3132   The library supports regular compression levels from 1 up to ZSTD_maxCLevel(),
3133   which is currently 22. Levels >= 20, labeled `--ultra`, should be used with
3134   caution, as they require more memory. The library also offers negative
3135   compression levels, which extend the range of speed vs. ratio preferences.
3136   The lower the level, the faster the speed (at the cost of compression).
3137
3138   Compression can be done in:
3139     - a single step (described as Simple API)
3140     - a single step, reusing a context (described as Explicit context)
3141     - unbounded multiple steps (described as Streaming compression)
3142
3143   The compression ratio achievable on small data can be highly improved using
3144   a dictionary. Dictionary compression can be performed in:
3145     - a single step (described as Simple dictionary API)
3146     - a single step, reusing a dictionary (described as Bulk-processing
3147       dictionary API)
3148
3149   Advanced experimental functions can be accessed using
3150   `#define ZSTD_STATIC_LINKING_ONLY` before including zstd.h.
3151
3152   Advanced experimental APIs should never be used with a dynamically-linked
3153   library. They are not "stable"; their definitions or signatures may change in
3154   the future. Only static linking is allowed.
3155 *******************************************************************************/
3156
3157 /*------   Version   ------*/
3158 #define ZSTD_VERSION_MAJOR    1
3159 #define ZSTD_VERSION_MINOR    4
3160 #define ZSTD_VERSION_RELEASE  5
3161
3162 #define ZSTD_VERSION_NUMBER  (ZSTD_VERSION_MAJOR *100*100 + ZSTD_VERSION_MINOR *100 + ZSTD_VERSION_RELEASE)
3163 ZSTDLIB_API unsigned ZSTD_versionNumber(void);   /**< to check runtime library version */
3164
3165 #define ZSTD_LIB_VERSION ZSTD_VERSION_MAJOR.ZSTD_VERSION_MINOR.ZSTD_VERSION_RELEASE
3166 #define ZSTD_QUOTE(str) #str
3167 #define ZSTD_EXPAND_AND_QUOTE(str) ZSTD_QUOTE(str)
3168 #define ZSTD_VERSION_STRING ZSTD_EXPAND_AND_QUOTE(ZSTD_LIB_VERSION)
3169 ZSTDLIB_API const char* ZSTD_versionString(void);   /* requires v1.3.0+ */
3170
3171 /* *************************************
3172  *  Default constant
3173  ***************************************/
3174 #ifndef ZSTD_CLEVEL_DEFAULT
3175 #  define ZSTD_CLEVEL_DEFAULT 3
3176 #endif
3177
3178 /* *************************************
3179  *  Constants
3180  ***************************************/
3181
3182 /* All magic numbers are supposed read/written to/from files/memory using little-endian convention */
3183 #define ZSTD_MAGICNUMBER            0xFD2FB528    /* valid since v0.8.0 */
3184 #define ZSTD_MAGIC_DICTIONARY       0xEC30A437    /* valid since v0.7.0 */
3185 #define ZSTD_MAGIC_SKIPPABLE_START  0x184D2A50    /* all 16 values, from 0x184D2A50 to 0x184D2A5F, signal the beginning of a skippable frame */
3186 #define ZSTD_MAGIC_SKIPPABLE_MASK   0xFFFFFFF0
3187
3188 #define ZSTD_BLOCKSIZELOG_MAX  17
3189 #define ZSTD_BLOCKSIZE_MAX     (1<<ZSTD_BLOCKSIZELOG_MAX)
3190
3191
3192
3193 /***************************************
3194 *  Simple API
3195 ***************************************/
3196 /*! ZSTD_compress() :
3197  *  Compresses `src` content as a single zstd compressed frame into already allocated `dst`.
3198  *  Hint : compression runs faster if `dstCapacity` >=  `ZSTD_compressBound(srcSize)`.
3199  *  @return : compressed size written into `dst` (<= `dstCapacity),
3200  *            or an error code if it fails (which can be tested using ZSTD_isError()). */
3201 ZSTDLIB_API size_t ZSTD_compress( void* dst, size_t dstCapacity,
3202                             const void* src, size_t srcSize,
3203                                   int compressionLevel);
3204
3205 /*! ZSTD_decompress() :
3206  *  `compressedSize` : must be the _exact_ size of some number of compressed and/or skippable frames.
3207  *  `dstCapacity` is an upper bound of originalSize to regenerate.
3208  *  If user cannot imply a maximum upper bound, it's better to use streaming mode to decompress data.
3209  *  @return : the number of bytes decompressed into `dst` (<= `dstCapacity`),
3210  *            or an errorCode if it fails (which can be tested using ZSTD_isError()). */
3211 ZSTDLIB_API size_t ZSTD_decompress( void* dst, size_t dstCapacity,
3212                               const void* src, size_t compressedSize);
3213
3214 /*! ZSTD_getFrameContentSize() : requires v1.3.0+
3215  *  `src` should point to the start of a ZSTD encoded frame.
3216  *  `srcSize` must be at least as large as the frame header.
3217  *            hint : any size >= `ZSTD_frameHeaderSize_max` is large enough.
3218  *  @return : - decompressed size of `src` frame content, if known
3219  *            - ZSTD_CONTENTSIZE_UNKNOWN if the size cannot be determined
3220  *            - ZSTD_CONTENTSIZE_ERROR if an error occurred (e.g. invalid magic number, srcSize too small)
3221  *   note 1 : a 0 return value means the frame is valid but "empty".
3222  *   note 2 : decompressed size is an optional field, it may not be present, typically in streaming mode.
3223  *            When `return==ZSTD_CONTENTSIZE_UNKNOWN`, data to decompress could be any size.
3224  *            In which case, it's necessary to use streaming mode to decompress data.
3225  *            Optionally, application can rely on some implicit limit,
3226  *            as ZSTD_decompress() only needs an upper bound of decompressed size.
3227  *            (For example, data could be necessarily cut into blocks <= 16 KB).
3228  *   note 3 : decompressed size is always present when compression is completed using single-pass functions,
3229  *            such as ZSTD_compress(), ZSTD_compressCCtx() ZSTD_compress_usingDict() or ZSTD_compress_usingCDict().
3230  *   note 4 : decompressed size can be very large (64-bits value),
3231  *            potentially larger than what local system can handle as a single memory segment.
3232  *            In which case, it's necessary to use streaming mode to decompress data.
3233  *   note 5 : If source is untrusted, decompressed size could be wrong or intentionally modified.
3234  *            Always ensure return value fits within application's authorized limits.
3235  *            Each application can set its own limits.
3236  *   note 6 : This function replaces ZSTD_getDecompressedSize() */
3237 #define ZSTD_CONTENTSIZE_UNKNOWN (0ULL - 1)
3238 #define ZSTD_CONTENTSIZE_ERROR   (0ULL - 2)
3239 ZSTDLIB_API unsigned long long ZSTD_getFrameContentSize(const void *src, size_t srcSize);
3240
3241 /*! ZSTD_getDecompressedSize() :
3242  *  NOTE: This function is now obsolete, in favor of ZSTD_getFrameContentSize().
3243  *  Both functions work the same way, but ZSTD_getDecompressedSize() blends
3244  *  "empty", "unknown" and "error" results to the same return value (0),
3245  *  while ZSTD_getFrameContentSize() gives them separate return values.
3246  * @return : decompressed size of `src` frame content _if known and not empty_, 0 otherwise. */
3247 ZSTDLIB_API unsigned long long ZSTD_getDecompressedSize(const void* src, size_t srcSize);
3248
3249 /*! ZSTD_findFrameCompressedSize() :
3250  * `src` should point to the start of a ZSTD frame or skippable frame.
3251  * `srcSize` must be >= first frame size
3252  * @return : the compressed size of the first frame starting at `src`,
3253  *           suitable to pass as `srcSize` to `ZSTD_decompress` or similar,
3254  *        or an error code if input is invalid */
3255 ZSTDLIB_API size_t ZSTD_findFrameCompressedSize(const void* src, size_t srcSize);
3256
3257
3258 /*======  Helper functions  ======*/
3259 #define ZSTD_COMPRESSBOUND(srcSize)   ((srcSize) + ((srcSize)>>8) + (((srcSize) < (128<<10)) ? (((128<<10) - (srcSize)) >> 11) /* margin, from 64 to 0 */ : 0))  /* this formula ensures that bound(A) + bound(B) <= bound(A+B) as long as A and B >= 128 KB */
3260 ZSTDLIB_API size_t      ZSTD_compressBound(size_t srcSize); /*!< maximum compressed size in worst case single-pass scenario */
3261 ZSTDLIB_API unsigned    ZSTD_isError(size_t code);          /*!< tells if a `size_t` function result is an error code */
3262 ZSTDLIB_API const char* ZSTD_getErrorName(size_t code);     /*!< provides readable string from an error code */
3263 ZSTDLIB_API int         ZSTD_minCLevel(void);               /*!< minimum negative compression level allowed */
3264 ZSTDLIB_API int         ZSTD_maxCLevel(void);               /*!< maximum compression level available */
3265
3266
3267 /***************************************
3268 *  Explicit context
3269 ***************************************/
3270 /*= Compression context
3271  *  When compressing many times,
3272  *  it is recommended to allocate a context just once,
3273  *  and re-use it for each successive compression operation.
3274  *  This will make workload friendlier for system's memory.
3275  *  Note : re-using context is just a speed / resource optimization.
3276  *         It doesn't change the compression ratio, which remains identical.
3277  *  Note 2 : In multi-threaded environments,
3278  *         use one different context per thread for parallel execution.
3279  */
3280 typedef struct ZSTD_CCtx_s ZSTD_CCtx;
3281 ZSTDLIB_API ZSTD_CCtx* ZSTD_createCCtx(void);
3282 ZSTDLIB_API size_t     ZSTD_freeCCtx(ZSTD_CCtx* cctx);
3283
3284 /*! ZSTD_compressCCtx() :
3285  *  Same as ZSTD_compress(), using an explicit ZSTD_CCtx.
3286  *  Important : in order to behave similarly to `ZSTD_compress()`,
3287  *  this function compresses at requested compression level,
3288  *  __ignoring any other parameter__ .
3289  *  If any advanced parameter was set using the advanced API,
3290  *  they will all be reset. Only `compressionLevel` remains.
3291  */
3292 ZSTDLIB_API size_t ZSTD_compressCCtx(ZSTD_CCtx* cctx,
3293                                      void* dst, size_t dstCapacity,
3294                                const void* src, size_t srcSize,
3295                                      int compressionLevel);
3296
3297 /*= Decompression context
3298  *  When decompressing many times,
3299  *  it is recommended to allocate a context only once,
3300  *  and re-use it for each successive compression operation.
3301  *  This will make workload friendlier for system's memory.
3302  *  Use one context per thread for parallel execution. */
3303 typedef struct ZSTD_DCtx_s ZSTD_DCtx;
3304 ZSTDLIB_API ZSTD_DCtx* ZSTD_createDCtx(void);
3305 ZSTDLIB_API size_t     ZSTD_freeDCtx(ZSTD_DCtx* dctx);
3306
3307 /*! ZSTD_decompressDCtx() :
3308  *  Same as ZSTD_decompress(),
3309  *  requires an allocated ZSTD_DCtx.
3310  *  Compatible with sticky parameters.
3311  */
3312 ZSTDLIB_API size_t ZSTD_decompressDCtx(ZSTD_DCtx* dctx,
3313                                        void* dst, size_t dstCapacity,
3314                                  const void* src, size_t srcSize);
3315
3316
3317 /***************************************
3318 *  Advanced compression API
3319 ***************************************/
3320
3321 /* API design :
3322  *   Parameters are pushed one by one into an existing context,
3323  *   using ZSTD_CCtx_set*() functions.
3324  *   Pushed parameters are sticky : they are valid for next compressed frame, and any subsequent frame.
3325  *   "sticky" parameters are applicable to `ZSTD_compress2()` and `ZSTD_compressStream*()` !
3326  *   __They do not apply to "simple" one-shot variants such as ZSTD_compressCCtx()__ .
3327  *
3328  *   It's possible to reset all parameters to "default" using ZSTD_CCtx_reset().
3329  *
3330  *   This API supercedes all other "advanced" API entry points in the experimental section.
3331  *   In the future, we expect to remove from experimental API entry points which are redundant with this API.
3332  */
3333
3334
3335 /* Compression strategies, listed from fastest to strongest */
3336 typedef enum { ZSTD_fast=1,
3337                ZSTD_dfast=2,
3338                ZSTD_greedy=3,
3339                ZSTD_lazy=4,
3340                ZSTD_lazy2=5,
3341                ZSTD_btlazy2=6,
3342                ZSTD_btopt=7,
3343                ZSTD_btultra=8,
3344                ZSTD_btultra2=9
3345                /* note : new strategies _might_ be added in the future.
3346                          Only the order (from fast to strong) is guaranteed */
3347 } ZSTD_strategy;
3348
3349
3350 typedef enum {
3351
3352     /* compression parameters
3353      * Note: When compressing with a ZSTD_CDict these parameters are superseded
3354      * by the parameters used to construct the ZSTD_CDict.
3355      * See ZSTD_CCtx_refCDict() for more info (superseded-by-cdict). */
3356     ZSTD_c_compressionLevel=100, /* Set compression parameters according to pre-defined cLevel table.
3357                               * Note that exact compression parameters are dynamically determined,
3358                               * depending on both compression level and srcSize (when known).
3359                               * Default level is ZSTD_CLEVEL_DEFAULT==3.
3360                               * Special: value 0 means default, which is controlled by ZSTD_CLEVEL_DEFAULT.
3361                               * Note 1 : it's possible to pass a negative compression level.
3362                               * Note 2 : setting a level does not automatically set all other compression parameters
3363                               *   to default. Setting this will however eventually dynamically impact the compression
3364                               *   parameters which have not been manually set. The manually set
3365                               *   ones will 'stick'. */
3366     /* Advanced compression parameters :
3367      * It's possible to pin down compression parameters to some specific values.
3368      * In which case, these values are no longer dynamically selected by the compressor */
3369     ZSTD_c_windowLog=101,    /* Maximum allowed back-reference distance, expressed as power of 2.
3370                               * This will set a memory budget for streaming decompression,
3371                               * with larger values requiring more memory
3372                               * and typically compressing more.
3373                               * Must be clamped between ZSTD_WINDOWLOG_MIN and ZSTD_WINDOWLOG_MAX.
3374                               * Special: value 0 means "use default windowLog".
3375                               * Note: Using a windowLog greater than ZSTD_WINDOWLOG_LIMIT_DEFAULT
3376                               *       requires explicitly allowing such size at streaming decompression stage. */
3377     ZSTD_c_hashLog=102,      /* Size of the initial probe table, as a power of 2.
3378                               * Resulting memory usage is (1 << (hashLog+2)).
3379                               * Must be clamped between ZSTD_HASHLOG_MIN and ZSTD_HASHLOG_MAX.
3380                               * Larger tables improve compression ratio of strategies <= dFast,
3381                               * and improve speed of strategies > dFast.
3382                               * Special: value 0 means "use default hashLog". */
3383     ZSTD_c_chainLog=103,     /* Size of the multi-probe search table, as a power of 2.
3384                               * Resulting memory usage is (1 << (chainLog+2)).
3385                               * Must be clamped between ZSTD_CHAINLOG_MIN and ZSTD_CHAINLOG_MAX.
3386                               * Larger tables result in better and slower compression.
3387                               * This parameter is useless for "fast" strategy.
3388                               * It's still useful when using "dfast" strategy,
3389                               * in which case it defines a secondary probe table.
3390                               * Special: value 0 means "use default chainLog". */
3391     ZSTD_c_searchLog=104,    /* Number of search attempts, as a power of 2.
3392                               * More attempts result in better and slower compression.
3393                               * This parameter is useless for "fast" and "dFast" strategies.
3394                               * Special: value 0 means "use default searchLog". */
3395     ZSTD_c_minMatch=105,     /* Minimum size of searched matches.
3396                               * Note that Zstandard can still find matches of smaller size,
3397                               * it just tweaks its search algorithm to look for this size and larger.
3398                               * Larger values increase compression and decompression speed, but decrease ratio.
3399                               * Must be clamped between ZSTD_MINMATCH_MIN and ZSTD_MINMATCH_MAX.
3400                               * Note that currently, for all strategies < btopt, effective minimum is 4.
3401                               *                    , for all strategies > fast, effective maximum is 6.
3402                               * Special: value 0 means "use default minMatchLength". */
3403     ZSTD_c_targetLength=106, /* Impact of this field depends on strategy.
3404                               * For strategies btopt, btultra & btultra2:
3405                               *     Length of Match considered "good enough" to stop search.
3406                               *     Larger values make compression stronger, and slower.
3407                               * For strategy fast:
3408                               *     Distance between match sampling.
3409                               *     Larger values make compression faster, and weaker.
3410                               * Special: value 0 means "use default targetLength". */
3411     ZSTD_c_strategy=107,     /* See ZSTD_strategy enum definition.
3412                               * The higher the value of selected strategy, the more complex it is,
3413                               * resulting in stronger and slower compression.
3414                               * Special: value 0 means "use default strategy". */
3415
3416     /* LDM mode parameters */
3417     ZSTD_c_enableLongDistanceMatching=160, /* Enable long distance matching.
3418                                      * This parameter is designed to improve compression ratio
3419                                      * for large inputs, by finding large matches at long distance.
3420                                      * It increases memory usage and window size.
3421                                      * Note: enabling this parameter increases default ZSTD_c_windowLog to 128 MB
3422                                      * except when expressly set to a different value. */
3423     ZSTD_c_ldmHashLog=161,   /* Size of the table for long distance matching, as a power of 2.
3424                               * Larger values increase memory usage and compression ratio,
3425                               * but decrease compression speed.
3426                               * Must be clamped between ZSTD_HASHLOG_MIN and ZSTD_HASHLOG_MAX
3427                               * default: windowlog - 7.
3428                               * Special: value 0 means "automatically determine hashlog". */
3429     ZSTD_c_ldmMinMatch=162,  /* Minimum match size for long distance matcher.
3430                               * Larger/too small values usually decrease compression ratio.
3431                               * Must be clamped between ZSTD_LDM_MINMATCH_MIN and ZSTD_LDM_MINMATCH_MAX.
3432                               * Special: value 0 means "use default value" (default: 64). */
3433     ZSTD_c_ldmBucketSizeLog=163, /* Log size of each bucket in the LDM hash table for collision resolution.
3434                               * Larger values improve collision resolution but decrease compression speed.
3435                               * The maximum value is ZSTD_LDM_BUCKETSIZELOG_MAX.
3436                               * Special: value 0 means "use default value" (default: 3). */
3437     ZSTD_c_ldmHashRateLog=164, /* Frequency of inserting/looking up entries into the LDM hash table.
3438                               * Must be clamped between 0 and (ZSTD_WINDOWLOG_MAX - ZSTD_HASHLOG_MIN).
3439                               * Default is MAX(0, (windowLog - ldmHashLog)), optimizing hash table usage.
3440                               * Larger values improve compression speed.
3441                               * Deviating far from default value will likely result in a compression ratio decrease.
3442                               * Special: value 0 means "automatically determine hashRateLog". */
3443
3444     /* frame parameters */
3445     ZSTD_c_contentSizeFlag=200, /* Content size will be written into frame header _whenever known_ (default:1)
3446                               * Content size must be known at the beginning of compression.
3447                               * This is automatically the case when using ZSTD_compress2(),
3448                               * For streaming scenarios, content size must be provided with ZSTD_CCtx_setPledgedSrcSize() */
3449     ZSTD_c_checksumFlag=201, /* A 32-bits checksum of content is written at end of frame (default:0) */
3450     ZSTD_c_dictIDFlag=202,   /* When applicable, dictionary's ID is written into frame header (default:1) */
3451
3452     /* multi-threading parameters */
3453     /* These parameters are only useful if multi-threading is enabled (compiled with build macro ZSTD_MULTITHREAD).
3454      * They return an error otherwise. */
3455     ZSTD_c_nbWorkers=400,    /* Select how many threads will be spawned to compress in parallel.
3456                               * When nbWorkers >= 1, triggers asynchronous mode when used with ZSTD_compressStream*() :
3457                               * ZSTD_compressStream*() consumes input and flush output if possible, but immediately gives back control to caller,
3458                               * while compression work is performed in parallel, within worker threads.
3459                               * (note : a strong exception to this rule is when first invocation of ZSTD_compressStream2() sets ZSTD_e_end :
3460                               *  in which case, ZSTD_compressStream2() delegates to ZSTD_compress2(), which is always a blocking call).
3461                               * More workers improve speed, but also increase memory usage.
3462                               * Default value is `0`, aka "single-threaded mode" : no worker is spawned, compression is performed inside Caller's thread, all invocations are blocking */
3463     ZSTD_c_jobSize=401,      /* Size of a compression job. This value is enforced only when nbWorkers >= 1.
3464                               * Each compression job is completed in parallel, so this value can indirectly impact the nb of active threads.
3465                               * 0 means default, which is dynamically determined based on compression parameters.
3466                               * Job size must be a minimum of overlap size, or 1 MB, whichever is largest.
3467                               * The minimum size is automatically and transparently enforced. */
3468     ZSTD_c_overlapLog=402,   /* Control the overlap size, as a fraction of window size.
3469                               * The overlap size is an amount of data reloaded from previous job at the beginning of a new job.
3470                               * It helps preserve compression ratio, while each job is compressed in parallel.
3471                               * This value is enforced only when nbWorkers >= 1.
3472                               * Larger values increase compression ratio, but decrease speed.
3473                               * Possible values range from 0 to 9 :
3474                               * - 0 means "default" : value will be determined by the library, depending on strategy
3475                               * - 1 means "no overlap"
3476                               * - 9 means "full overlap", using a full window size.
3477                               * Each intermediate rank increases/decreases load size by a factor 2 :
3478                               * 9: full window;  8: w/2;  7: w/4;  6: w/8;  5:w/16;  4: w/32;  3:w/64;  2:w/128;  1:no overlap;  0:default
3479                               * default value varies between 6 and 9, depending on strategy */
3480
3481     /* note : additional experimental parameters are also available
3482      * within the experimental section of the API.
3483      * At the time of this writing, they include :
3484      * ZSTD_c_rsyncable
3485      * ZSTD_c_format
3486      * ZSTD_c_forceMaxWindow
3487      * ZSTD_c_forceAttachDict
3488      * ZSTD_c_literalCompressionMode
3489      * ZSTD_c_targetCBlockSize
3490      * ZSTD_c_srcSizeHint
3491      * Because they are not stable, it's necessary to define ZSTD_STATIC_LINKING_ONLY to access them.
3492      * note : never ever use experimentalParam? names directly;
3493      *        also, the enums values themselves are unstable and can still change.
3494      */
3495      ZSTD_c_experimentalParam1=500,
3496      ZSTD_c_experimentalParam2=10,
3497      ZSTD_c_experimentalParam3=1000,
3498      ZSTD_c_experimentalParam4=1001,
3499      ZSTD_c_experimentalParam5=1002,
3500      ZSTD_c_experimentalParam6=1003,
3501      ZSTD_c_experimentalParam7=1004
3502 } ZSTD_cParameter;
3503
3504 typedef struct {
3505     size_t error;
3506     int lowerBound;
3507     int upperBound;
3508 } ZSTD_bounds;
3509
3510 /*! ZSTD_cParam_getBounds() :
3511  *  All parameters must belong to an interval with lower and upper bounds,
3512  *  otherwise they will either trigger an error or be automatically clamped.
3513  * @return : a structure, ZSTD_bounds, which contains
3514  *         - an error status field, which must be tested using ZSTD_isError()
3515  *         - lower and upper bounds, both inclusive
3516  */
3517 ZSTDLIB_API ZSTD_bounds ZSTD_cParam_getBounds(ZSTD_cParameter cParam);
3518
3519 /*! ZSTD_CCtx_setParameter() :
3520  *  Set one compression parameter, selected by enum ZSTD_cParameter.
3521  *  All parameters have valid bounds. Bounds can be queried using ZSTD_cParam_getBounds().
3522  *  Providing a value beyond bound will either clamp it, or trigger an error (depending on parameter).
3523  *  Setting a parameter is generally only possible during frame initialization (before starting compression).
3524  *  Exception : when using multi-threading mode (nbWorkers >= 1),
3525  *              the following parameters can be updated _during_ compression (within same frame):
3526  *              => compressionLevel, hashLog, chainLog, searchLog, minMatch, targetLength and strategy.
3527  *              new parameters will be active for next job only (after a flush()).
3528  * @return : an error code (which can be tested using ZSTD_isError()).
3529  */
3530 ZSTDLIB_API size_t ZSTD_CCtx_setParameter(ZSTD_CCtx* cctx, ZSTD_cParameter param, int value);
3531
3532 /*! ZSTD_CCtx_setPledgedSrcSize() :
3533  *  Total input data size to be compressed as a single frame.
3534  *  Value will be written in frame header, unless if explicitly forbidden using ZSTD_c_contentSizeFlag.
3535  *  This value will also be controlled at end of frame, and trigger an error if not respected.
3536  * @result : 0, or an error code (which can be tested with ZSTD_isError()).
3537  *  Note 1 : pledgedSrcSize==0 actually means zero, aka an empty frame.
3538  *           In order to mean "unknown content size", pass constant ZSTD_CONTENTSIZE_UNKNOWN.
3539  *           ZSTD_CONTENTSIZE_UNKNOWN is default value for any new frame.
3540  *  Note 2 : pledgedSrcSize is only valid once, for the next frame.
3541  *           It's discarded at the end of the frame, and replaced by ZSTD_CONTENTSIZE_UNKNOWN.
3542  *  Note 3 : Whenever all input data is provided and consumed in a single round,
3543  *           for example with ZSTD_compress2(),
3544  *           or invoking immediately ZSTD_compressStream2(,,,ZSTD_e_end),
3545  *           this value is automatically overridden by srcSize instead.
3546  */
3547 ZSTDLIB_API size_t ZSTD_CCtx_setPledgedSrcSize(ZSTD_CCtx* cctx, unsigned long long pledgedSrcSize);
3548
3549 typedef enum {
3550     ZSTD_reset_session_only = 1,
3551     ZSTD_reset_parameters = 2,
3552     ZSTD_reset_session_and_parameters = 3
3553 } ZSTD_ResetDirective;
3554
3555 /*! ZSTD_CCtx_reset() :
3556  *  There are 2 different things that can be reset, independently or jointly :
3557  *  - The session : will stop compressing current frame, and make CCtx ready to start a new one.
3558  *                  Useful after an error, or to interrupt any ongoing compression.
3559  *                  Any internal data not yet flushed is cancelled.
3560  *                  Compression parameters and dictionary remain unchanged.
3561  *                  They will be used to compress next frame.
3562  *                  Resetting session never fails.
3563  *  - The parameters : changes all parameters back to "default".
3564  *                  This removes any reference to any dictionary too.
3565  *                  Parameters can only be changed between 2 sessions (i.e. no compression is currently ongoing)
3566  *                  otherwise the reset fails, and function returns an error value (which can be tested using ZSTD_isError())
3567  *  - Both : similar to resetting the session, followed by resetting parameters.
3568  */
3569 ZSTDLIB_API size_t ZSTD_CCtx_reset(ZSTD_CCtx* cctx, ZSTD_ResetDirective reset);
3570
3571 /*! ZSTD_compress2() :
3572  *  Behave the same as ZSTD_compressCCtx(), but compression parameters are set using the advanced API.
3573  *  ZSTD_compress2() always starts a new frame.
3574  *  Should cctx hold data from a previously unfinished frame, everything about it is forgotten.
3575  *  - Compression parameters are pushed into CCtx before starting compression, using ZSTD_CCtx_set*()
3576  *  - The function is always blocking, returns when compression is completed.
3577  *  Hint : compression runs faster if `dstCapacity` >=  `ZSTD_compressBound(srcSize)`.
3578  * @return : compressed size written into `dst` (<= `dstCapacity),
3579  *           or an error code if it fails (which can be tested using ZSTD_isError()).
3580  */
3581 ZSTDLIB_API size_t ZSTD_compress2( ZSTD_CCtx* cctx,
3582                                    void* dst, size_t dstCapacity,
3583                              const void* src, size_t srcSize);
3584
3585
3586 /***************************************
3587 *  Advanced decompression API
3588 ***************************************/
3589
3590 /* The advanced API pushes parameters one by one into an existing DCtx context.
3591  * Parameters are sticky, and remain valid for all following frames
3592  * using the same DCtx context.
3593  * It's possible to reset parameters to default values using ZSTD_DCtx_reset().
3594  * Note : This API is compatible with existing ZSTD_decompressDCtx() and ZSTD_decompressStream().
3595  *        Therefore, no new decompression function is necessary.
3596  */
3597
3598 typedef enum {
3599
3600     ZSTD_d_windowLogMax=100, /* Select a size limit (in power of 2) beyond which
3601                               * the streaming API will refuse to allocate memory buffer
3602                               * in order to protect the host from unreasonable memory requirements.
3603                               * This parameter is only useful in streaming mode, since no internal buffer is allocated in single-pass mode.
3604                               * By default, a decompression context accepts window sizes <= (1 << ZSTD_WINDOWLOG_LIMIT_DEFAULT).
3605                               * Special: value 0 means "use default maximum windowLog". */
3606
3607     /* note : additional experimental parameters are also available
3608      * within the experimental section of the API.
3609      * At the time of this writing, they include :
3610      * ZSTD_d_format
3611      * ZSTD_d_stableOutBuffer
3612      * Because they are not stable, it's necessary to define ZSTD_STATIC_LINKING_ONLY to access them.
3613      * note : never ever use experimentalParam? names directly
3614      */
3615      ZSTD_d_experimentalParam1=1000,
3616      ZSTD_d_experimentalParam2=1001
3617
3618 } ZSTD_dParameter;
3619
3620 /*! ZSTD_dParam_getBounds() :
3621  *  All parameters must belong to an interval with lower and upper bounds,
3622  *  otherwise they will either trigger an error or be automatically clamped.
3623  * @return : a structure, ZSTD_bounds, which contains
3624  *         - an error status field, which must be tested using ZSTD_isError()
3625  *         - both lower and upper bounds, inclusive
3626  */
3627 ZSTDLIB_API ZSTD_bounds ZSTD_dParam_getBounds(ZSTD_dParameter dParam);
3628
3629 /*! ZSTD_DCtx_setParameter() :
3630  *  Set one compression parameter, selected by enum ZSTD_dParameter.
3631  *  All parameters have valid bounds. Bounds can be queried using ZSTD_dParam_getBounds().
3632  *  Providing a value beyond bound will either clamp it, or trigger an error (depending on parameter).
3633  *  Setting a parameter is only possible during frame initialization (before starting decompression).
3634  * @return : 0, or an error code (which can be tested using ZSTD_isError()).
3635  */
3636 ZSTDLIB_API size_t ZSTD_DCtx_setParameter(ZSTD_DCtx* dctx, ZSTD_dParameter param, int value);
3637
3638 /*! ZSTD_DCtx_reset() :
3639  *  Return a DCtx to clean state.
3640  *  Session and parameters can be reset jointly or separately.
3641  *  Parameters can only be reset when no active frame is being decompressed.
3642  * @return : 0, or an error code, which can be tested with ZSTD_isError()
3643  */
3644 ZSTDLIB_API size_t ZSTD_DCtx_reset(ZSTD_DCtx* dctx, ZSTD_ResetDirective reset);
3645
3646
3647 /****************************
3648 *  Streaming
3649 ****************************/
3650
3651 typedef struct ZSTD_inBuffer_s {
3652   const void* src;    /**< start of input buffer */
3653   size_t size;        /**< size of input buffer */
3654   size_t pos;         /**< position where reading stopped. Will be updated. Necessarily 0 <= pos <= size */
3655 } ZSTD_inBuffer;
3656
3657 typedef struct ZSTD_outBuffer_s {
3658   void*  dst;         /**< start of output buffer */
3659   size_t size;        /**< size of output buffer */
3660   size_t pos;         /**< position where writing stopped. Will be updated. Necessarily 0 <= pos <= size */
3661 } ZSTD_outBuffer;
3662
3663
3664
3665 /*-***********************************************************************
3666 *  Streaming compression - HowTo
3667 *
3668 *  A ZSTD_CStream object is required to track streaming operation.
3669 *  Use ZSTD_createCStream() and ZSTD_freeCStream() to create/release resources.
3670 *  ZSTD_CStream objects can be reused multiple times on consecutive compression operations.
3671 *  It is recommended to re-use ZSTD_CStream since it will play nicer with system's memory, by re-using already allocated memory.
3672 *
3673 *  For parallel execution, use one separate ZSTD_CStream per thread.
3674 *
3675 *  note : since v1.3.0, ZSTD_CStream and ZSTD_CCtx are the same thing.
3676 *
3677 *  Parameters are sticky : when starting a new compression on the same context,
3678 *  it will re-use the same sticky parameters as previous compression session.
3679 *  When in doubt, it's recommended to fully initialize the context before usage.
3680 *  Use ZSTD_CCtx_reset() to reset the context and ZSTD_CCtx_setParameter(),
3681 *  ZSTD_CCtx_setPledgedSrcSize(), or ZSTD_CCtx_loadDictionary() and friends to
3682 *  set more specific parameters, the pledged source size, or load a dictionary.
3683 *
3684 *  Use ZSTD_compressStream2() with ZSTD_e_continue as many times as necessary to
3685 *  consume input stream. The function will automatically update both `pos`
3686 *  fields within `input` and `output`.
3687 *  Note that the function may not consume the entire input, for example, because
3688 *  the output buffer is already full, in which case `input.pos < input.size`.
3689 *  The caller must check if input has been entirely consumed.
3690 *  If not, the caller must make some room to receive more compressed data,
3691 *  and then present again remaining input data.
3692 *  note: ZSTD_e_continue is guaranteed to make some forward progress when called,
3693 *        but doesn't guarantee maximal forward progress. This is especially relevant
3694 *        when compressing with multiple threads. The call won't block if it can
3695 *        consume some input, but if it can't it will wait for some, but not all,
3696 *        output to be flushed.
3697 * @return : provides a minimum amount of data remaining to be flushed from internal buffers
3698 *           or an error code, which can be tested using ZSTD_isError().
3699 *
3700 *  At any moment, it's possible to flush whatever data might remain stuck within internal buffer,
3701 *  using ZSTD_compressStream2() with ZSTD_e_flush. `output->pos` will be updated.
3702 *  Note that, if `output->size` is too small, a single invocation with ZSTD_e_flush might not be enough (return code > 0).
3703 *  In which case, make some room to receive more compressed data, and call again ZSTD_compressStream2() with ZSTD_e_flush.
3704 *  You must continue calling ZSTD_compressStream2() with ZSTD_e_flush until it returns 0, at which point you can change the
3705 *  operation.
3706 *  note: ZSTD_e_flush will flush as much output as possible, meaning when compressing with multiple threads, it will
3707 *        block until the flush is complete or the output buffer is full.
3708 *  @return : 0 if internal buffers are entirely flushed,
3709 *            >0 if some data still present within internal buffer (the value is minimal estimation of remaining size),
3710 *            or an error code, which can be tested using ZSTD_isError().
3711 *
3712 *  Calling ZSTD_compressStream2() with ZSTD_e_end instructs to finish a frame.
3713 *  It will perform a flush and write frame epilogue.
3714 *  The epilogue is required for decoders to consider a frame completed.
3715 *  flush operation is the same, and follows same rules as calling ZSTD_compressStream2() with ZSTD_e_flush.
3716 *  You must continue calling ZSTD_compressStream2() with ZSTD_e_end until it returns 0, at which point you are free to
3717 *  start a new frame.
3718 *  note: ZSTD_e_end will flush as much output as possible, meaning when compressing with multiple threads, it will
3719 *        block until the flush is complete or the output buffer is full.
3720 *  @return : 0 if frame fully completed and fully flushed,
3721 *            >0 if some data still present within internal buffer (the value is minimal estimation of remaining size),
3722 *            or an error code, which can be tested using ZSTD_isError().
3723 *
3724 * *******************************************************************/
3725
3726 typedef ZSTD_CCtx ZSTD_CStream;  /**< CCtx and CStream are now effectively same object (>= v1.3.0) */
3727                                  /* Continue to distinguish them for compatibility with older versions <= v1.2.0 */
3728 /*===== ZSTD_CStream management functions =====*/
3729 ZSTDLIB_API ZSTD_CStream* ZSTD_createCStream(void);
3730 ZSTDLIB_API size_t ZSTD_freeCStream(ZSTD_CStream* zcs);
3731
3732 /*===== Streaming compression functions =====*/
3733 typedef enum {
3734     ZSTD_e_continue=0, /* collect more data, encoder decides when to output compressed result, for optimal compression ratio */
3735     ZSTD_e_flush=1,    /* flush any data provided so far,
3736                         * it creates (at least) one new block, that can be decoded immediately on reception;
3737                         * frame will continue: any future data can still reference previously compressed data, improving compression.
3738                         * note : multithreaded compression will block to flush as much output as possible. */
3739     ZSTD_e_end=2       /* flush any remaining data _and_ close current frame.
3740                         * note that frame is only closed after compressed data is fully flushed (return value == 0).
3741                         * After that point, any additional data starts a new frame.
3742                         * note : each frame is independent (does not reference any content from previous frame).
3743                         : note : multithreaded compression will block to flush as much output as possible. */
3744 } ZSTD_EndDirective;
3745
3746 /*! ZSTD_compressStream2() :
3747  *  Behaves about the same as ZSTD_compressStream, with additional control on end directive.
3748  *  - Compression parameters are pushed into CCtx before starting compression, using ZSTD_CCtx_set*()
3749  *  - Compression parameters cannot be changed once compression is started (save a list of exceptions in multi-threading mode)
3750  *  - output->pos must be <= dstCapacity, input->pos must be <= srcSize
3751  *  - output->pos and input->pos will be updated. They are guaranteed to remain below their respective limit.
3752  *  - When nbWorkers==0 (default), function is blocking : it completes its job before returning to caller.
3753  *  - When nbWorkers>=1, function is non-blocking : it just acquires a copy of input, and distributes jobs to internal worker threads, flush whatever is available,
3754  *                                                  and then immediately returns, just indicating that there is some data remaining to be flushed.
3755  *                                                  The function nonetheless guarantees forward progress : it will return only after it reads or write at least 1+ byte.
3756  *  - Exception : if the first call requests a ZSTD_e_end directive and provides enough dstCapacity, the function delegates to ZSTD_compress2() which is always blocking.
3757  *  - @return provides a minimum amount of data remaining to be flushed from internal buffers
3758  *            or an error code, which can be tested using ZSTD_isError().
3759  *            if @return != 0, flush is not fully completed, there is still some data left within internal buffers.
3760  *            This is useful for ZSTD_e_flush, since in this case more flushes are necessary to empty all buffers.
3761  *            For ZSTD_e_end, @return == 0 when internal buffers are fully flushed and frame is completed.
3762  *  - after a ZSTD_e_end directive, if internal buffer is not fully flushed (@return != 0),
3763  *            only ZSTD_e_end or ZSTD_e_flush operations are allowed.
3764  *            Before starting a new compression job, or changing compression parameters,
3765  *            it is required to fully flush internal buffers.
3766  */
3767 ZSTDLIB_API size_t ZSTD_compressStream2( ZSTD_CCtx* cctx,
3768                                          ZSTD_outBuffer* output,
3769                                          ZSTD_inBuffer* input,
3770                                          ZSTD_EndDirective endOp);
3771
3772
3773 /* These buffer sizes are softly recommended.
3774  * They are not required : ZSTD_compressStream*() happily accepts any buffer size, for both input and output.
3775  * Respecting the recommended size just makes it a bit easier for ZSTD_compressStream*(),
3776  * reducing the amount of memory shuffling and buffering, resulting in minor performance savings.
3777  *
3778  * However, note that these recommendations are from the perspective of a C caller program.
3779  * If the streaming interface is invoked from some other language,
3780  * especially managed ones such as Java or Go, through a foreign function interface such as jni or cgo,
3781  * a major performance rule is to reduce crossing such interface to an absolute minimum.
3782  * It's not rare that performance ends being spent more into the interface, rather than compression itself.
3783  * In which cases, prefer using large buffers, as large as practical,
3784  * for both input and output, to reduce the nb of roundtrips.
3785  */
3786 ZSTDLIB_API size_t ZSTD_CStreamInSize(void);    /**< recommended size for input buffer */
3787 ZSTDLIB_API size_t ZSTD_CStreamOutSize(void);   /**< recommended size for output buffer. Guarantee to successfully flush at least one complete compressed block. */
3788
3789
3790 /* *****************************************************************************
3791  * This following is a legacy streaming API.
3792  * It can be replaced by ZSTD_CCtx_reset() and ZSTD_compressStream2().
3793  * It is redundant, but remains fully supported.
3794  * Advanced parameters and dictionary compression can only be used through the
3795  * new API.
3796  ******************************************************************************/
3797
3798 /*!
3799  * Equivalent to:
3800  *
3801  *     ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only);
3802  *     ZSTD_CCtx_refCDict(zcs, NULL); // clear the dictionary (if any)
3803  *     ZSTD_CCtx_setParameter(zcs, ZSTD_c_compressionLevel, compressionLevel);
3804  */
3805 ZSTDLIB_API size_t ZSTD_initCStream(ZSTD_CStream* zcs, int compressionLevel);
3806 /*!
3807  * Alternative for ZSTD_compressStream2(zcs, output, input, ZSTD_e_continue).
3808  * NOTE: The return value is different. ZSTD_compressStream() returns a hint for
3809  * the next read size (if non-zero and not an error). ZSTD_compressStream2()
3810  * returns the minimum nb of bytes left to flush (if non-zero and not an error).
3811  */
3812 ZSTDLIB_API size_t ZSTD_compressStream(ZSTD_CStream* zcs, ZSTD_outBuffer* output, ZSTD_inBuffer* input);
3813 /*! Equivalent to ZSTD_compressStream2(zcs, output, &emptyInput, ZSTD_e_flush). */
3814 ZSTDLIB_API size_t ZSTD_flushStream(ZSTD_CStream* zcs, ZSTD_outBuffer* output);
3815 /*! Equivalent to ZSTD_compressStream2(zcs, output, &emptyInput, ZSTD_e_end). */
3816 ZSTDLIB_API size_t ZSTD_endStream(ZSTD_CStream* zcs, ZSTD_outBuffer* output);
3817
3818
3819 /*-***************************************************************************
3820 *  Streaming decompression - HowTo
3821 *
3822 *  A ZSTD_DStream object is required to track streaming operations.
3823 *  Use ZSTD_createDStream() and ZSTD_freeDStream() to create/release resources.
3824 *  ZSTD_DStream objects can be re-used multiple times.
3825 *
3826 *  Use ZSTD_initDStream() to start a new decompression operation.
3827 * @return : recommended first input size
3828 *  Alternatively, use advanced API to set specific properties.
3829 *
3830 *  Use ZSTD_decompressStream() repetitively to consume your input.
3831 *  The function will update both `pos` fields.
3832 *  If `input.pos < input.size`, some input has not been consumed.
3833 *  It's up to the caller to present again remaining data.
3834 *  The function tries to flush all data decoded immediately, respecting output buffer size.
3835 *  If `output.pos < output.size`, decoder has flushed everything it could.
3836 *  But if `output.pos == output.size`, there might be some data left within internal buffers.,
3837 *  In which case, call ZSTD_decompressStream() again to flush whatever remains in the buffer.
3838 *  Note : with no additional input provided, amount of data flushed is necessarily <= ZSTD_BLOCKSIZE_MAX.
3839 * @return : 0 when a frame is completely decoded and fully flushed,
3840 *        or an error code, which can be tested using ZSTD_isError(),
3841 *        or any other value > 0, which means there is still some decoding or flushing to do to complete current frame :
3842 *                                the return value is a suggested next input size (just a hint for better latency)
3843 *                                that will never request more than the remaining frame size.
3844 * *******************************************************************************/
3845
3846 typedef ZSTD_DCtx ZSTD_DStream;  /**< DCtx and DStream are now effectively same object (>= v1.3.0) */
3847                                  /* For compatibility with versions <= v1.2.0, prefer differentiating them. */
3848 /*===== ZSTD_DStream management functions =====*/
3849 ZSTDLIB_API ZSTD_DStream* ZSTD_createDStream(void);
3850 ZSTDLIB_API size_t ZSTD_freeDStream(ZSTD_DStream* zds);
3851
3852 /*===== Streaming decompression functions =====*/
3853
3854 /* This function is redundant with the advanced API and equivalent to:
3855  *
3856  *     ZSTD_DCtx_reset(zds, ZSTD_reset_session_only);
3857  *     ZSTD_DCtx_refDDict(zds, NULL);
3858  */
3859 ZSTDLIB_API size_t ZSTD_initDStream(ZSTD_DStream* zds);
3860
3861 ZSTDLIB_API size_t ZSTD_decompressStream(ZSTD_DStream* zds, ZSTD_outBuffer* output, ZSTD_inBuffer* input);
3862
3863 ZSTDLIB_API size_t ZSTD_DStreamInSize(void);    /*!< recommended size for input buffer */
3864 ZSTDLIB_API size_t ZSTD_DStreamOutSize(void);   /*!< recommended size for output buffer. Guarantee to successfully flush at least one complete block in all circumstances. */
3865
3866
3867 /**************************
3868 *  Simple dictionary API
3869 ***************************/
3870 /*! ZSTD_compress_usingDict() :
3871  *  Compression at an explicit compression level using a Dictionary.
3872  *  A dictionary can be any arbitrary data segment (also called a prefix),
3873  *  or a buffer with specified information (see dictBuilder/zdict.h).
3874  *  Note : This function loads the dictionary, resulting in significant startup delay.
3875  *         It's intended for a dictionary used only once.
3876  *  Note 2 : When `dict == NULL || dictSize < 8` no dictionary is used. */
3877 ZSTDLIB_API size_t ZSTD_compress_usingDict(ZSTD_CCtx* ctx,
3878                                            void* dst, size_t dstCapacity,
3879                                      const void* src, size_t srcSize,
3880                                      const void* dict,size_t dictSize,
3881                                            int compressionLevel);
3882
3883 /*! ZSTD_decompress_usingDict() :
3884  *  Decompression using a known Dictionary.
3885  *  Dictionary must be identical to the one used during compression.
3886  *  Note : This function loads the dictionary, resulting in significant startup delay.
3887  *         It's intended for a dictionary used only once.
3888  *  Note : When `dict == NULL || dictSize < 8` no dictionary is used. */
3889 ZSTDLIB_API size_t ZSTD_decompress_usingDict(ZSTD_DCtx* dctx,
3890                                              void* dst, size_t dstCapacity,
3891                                        const void* src, size_t srcSize,
3892                                        const void* dict,size_t dictSize);
3893
3894
3895 /***********************************
3896  *  Bulk processing dictionary API
3897  **********************************/
3898 typedef struct ZSTD_CDict_s ZSTD_CDict;
3899
3900 /*! ZSTD_createCDict() :
3901  *  When compressing multiple messages or blocks using the same dictionary,
3902  *  it's recommended to digest the dictionary only once, since it's a costly operation.
3903  *  ZSTD_createCDict() will create a state from digesting a dictionary.
3904  *  The resulting state can be used for future compression operations with very limited startup cost.
3905  *  ZSTD_CDict can be created once and shared by multiple threads concurrently, since its usage is read-only.
3906  * @dictBuffer can be released after ZSTD_CDict creation, because its content is copied within CDict.
3907  *  Note 1 : Consider experimental function `ZSTD_createCDict_byReference()` if you prefer to not duplicate @dictBuffer content.
3908  *  Note 2 : A ZSTD_CDict can be created from an empty @dictBuffer,
3909  *      in which case the only thing that it transports is the @compressionLevel.
3910  *      This can be useful in a pipeline featuring ZSTD_compress_usingCDict() exclusively,
3911  *      expecting a ZSTD_CDict parameter with any data, including those without a known dictionary. */
3912 ZSTDLIB_API ZSTD_CDict* ZSTD_createCDict(const void* dictBuffer, size_t dictSize,
3913                                          int compressionLevel);
3914
3915 /*! ZSTD_freeCDict() :
3916  *  Function frees memory allocated by ZSTD_createCDict(). */
3917 ZSTDLIB_API size_t      ZSTD_freeCDict(ZSTD_CDict* CDict);
3918
3919 /*! ZSTD_compress_usingCDict() :
3920  *  Compression using a digested Dictionary.
3921  *  Recommended when same dictionary is used multiple times.
3922  *  Note : compression level is _decided at dictionary creation time_,
3923  *     and frame parameters are hardcoded (dictID=yes, contentSize=yes, checksum=no) */
3924 ZSTDLIB_API size_t ZSTD_compress_usingCDict(ZSTD_CCtx* cctx,
3925                                             void* dst, size_t dstCapacity,
3926                                       const void* src, size_t srcSize,
3927                                       const ZSTD_CDict* cdict);
3928
3929
3930 typedef struct ZSTD_DDict_s ZSTD_DDict;
3931
3932 /*! ZSTD_createDDict() :
3933  *  Create a digested dictionary, ready to start decompression operation without startup delay.
3934  *  dictBuffer can be released after DDict creation, as its content is copied inside DDict. */
3935 ZSTDLIB_API ZSTD_DDict* ZSTD_createDDict(const void* dictBuffer, size_t dictSize);
3936
3937 /*! ZSTD_freeDDict() :
3938  *  Function frees memory allocated with ZSTD_createDDict() */
3939 ZSTDLIB_API size_t      ZSTD_freeDDict(ZSTD_DDict* ddict);
3940
3941 /*! ZSTD_decompress_usingDDict() :
3942  *  Decompression using a digested Dictionary.
3943  *  Recommended when same dictionary is used multiple times. */
3944 ZSTDLIB_API size_t ZSTD_decompress_usingDDict(ZSTD_DCtx* dctx,
3945                                               void* dst, size_t dstCapacity,
3946                                         const void* src, size_t srcSize,
3947                                         const ZSTD_DDict* ddict);
3948
3949
3950 /********************************
3951  *  Dictionary helper functions
3952  *******************************/
3953
3954 /*! ZSTD_getDictID_fromDict() :
3955  *  Provides the dictID stored within dictionary.
3956  *  if @return == 0, the dictionary is not conformant with Zstandard specification.
3957  *  It can still be loaded, but as a content-only dictionary. */
3958 ZSTDLIB_API unsigned ZSTD_getDictID_fromDict(const void* dict, size_t dictSize);
3959
3960 /*! ZSTD_getDictID_fromDDict() :
3961  *  Provides the dictID of the dictionary loaded into `ddict`.
3962  *  If @return == 0, the dictionary is not conformant to Zstandard specification, or empty.
3963  *  Non-conformant dictionaries can still be loaded, but as content-only dictionaries. */
3964 ZSTDLIB_API unsigned ZSTD_getDictID_fromDDict(const ZSTD_DDict* ddict);
3965
3966 /*! ZSTD_getDictID_fromFrame() :
3967  *  Provides the dictID required to decompressed the frame stored within `src`.
3968  *  If @return == 0, the dictID could not be decoded.
3969  *  This could for one of the following reasons :
3970  *  - The frame does not require a dictionary to be decoded (most common case).
3971  *  - The frame was built with dictID intentionally removed. Whatever dictionary is necessary is a hidden information.
3972  *    Note : this use case also happens when using a non-conformant dictionary.
3973  *  - `srcSize` is too small, and as a result, the frame header could not be decoded (only possible if `srcSize < ZSTD_FRAMEHEADERSIZE_MAX`).
3974  *  - This is not a Zstandard frame.
3975  *  When identifying the exact failure cause, it's possible to use ZSTD_getFrameHeader(), which will provide a more precise error code. */
3976 ZSTDLIB_API unsigned ZSTD_getDictID_fromFrame(const void* src, size_t srcSize);
3977
3978
3979 /*******************************************************************************
3980  * Advanced dictionary and prefix API
3981  *
3982  * This API allows dictionaries to be used with ZSTD_compress2(),
3983  * ZSTD_compressStream2(), and ZSTD_decompress(). Dictionaries are sticky, and
3984  * only reset with the context is reset with ZSTD_reset_parameters or
3985  * ZSTD_reset_session_and_parameters. Prefixes are single-use.
3986  ******************************************************************************/
3987
3988
3989 /*! ZSTD_CCtx_loadDictionary() :
3990  *  Create an internal CDict from `dict` buffer.
3991  *  Decompression will have to use same dictionary.
3992  * @result : 0, or an error code (which can be tested with ZSTD_isError()).
3993  *  Special: Loading a NULL (or 0-size) dictionary invalidates previous dictionary,
3994  *           meaning "return to no-dictionary mode".
3995  *  Note 1 : Dictionary is sticky, it will be used for all future compressed frames.
3996  *           To return to "no-dictionary" situation, load a NULL dictionary (or reset parameters).
3997  *  Note 2 : Loading a dictionary involves building tables.
3998  *           It's also a CPU consuming operation, with non-negligible impact on latency.
3999  *           Tables are dependent on compression parameters, and for this reason,
4000  *           compression parameters can no longer be changed after loading a dictionary.
4001  *  Note 3 :`dict` content will be copied internally.
4002  *           Use experimental ZSTD_CCtx_loadDictionary_byReference() to reference content instead.
4003  *           In such a case, dictionary buffer must outlive its users.
4004  *  Note 4 : Use ZSTD_CCtx_loadDictionary_advanced()
4005  *           to precisely select how dictionary content must be interpreted. */
4006 ZSTDLIB_API size_t ZSTD_CCtx_loadDictionary(ZSTD_CCtx* cctx, const void* dict, size_t dictSize);
4007
4008 /*! ZSTD_CCtx_refCDict() :
4009  *  Reference a prepared dictionary, to be used for all next compressed frames.
4010  *  Note that compression parameters are enforced from within CDict,
4011  *  and supersede any compression parameter previously set within CCtx.
4012  *  The parameters ignored are labled as "superseded-by-cdict" in the ZSTD_cParameter enum docs.
4013  *  The ignored parameters will be used again if the CCtx is returned to no-dictionary mode.
4014  *  The dictionary will remain valid for future compressed frames using same CCtx.
4015  * @result : 0, or an error code (which can be tested with ZSTD_isError()).
4016  *  Special : Referencing a NULL CDict means "return to no-dictionary mode".
4017  *  Note 1 : Currently, only one dictionary can be managed.
4018  *           Referencing a new dictionary effectively "discards" any previous one.
4019  *  Note 2 : CDict is just referenced, its lifetime must outlive its usage within CCtx. */
4020 ZSTDLIB_API size_t ZSTD_CCtx_refCDict(ZSTD_CCtx* cctx, const ZSTD_CDict* cdict);
4021
4022 /*! ZSTD_CCtx_refPrefix() :
4023  *  Reference a prefix (single-usage dictionary) for next compressed frame.
4024  *  A prefix is **only used once**. Tables are discarded at end of frame (ZSTD_e_end).
4025  *  Decompression will need same prefix to properly regenerate data.
4026  *  Compressing with a prefix is similar in outcome as performing a diff and compressing it,
4027  *  but performs much faster, especially during decompression (compression speed is tunable with compression level).
4028  * @result : 0, or an error code (which can be tested with ZSTD_isError()).
4029  *  Special: Adding any prefix (including NULL) invalidates any previous prefix or dictionary
4030  *  Note 1 : Prefix buffer is referenced. It **must** outlive compression.
4031  *           Its content must remain unmodified during compression.
4032  *  Note 2 : If the intention is to diff some large src data blob with some prior version of itself,
4033  *           ensure that the window size is large enough to contain the entire source.
4034  *           See ZSTD_c_windowLog.
4035  *  Note 3 : Referencing a prefix involves building tables, which are dependent on compression parameters.
4036  *           It's a CPU consuming operation, with non-negligible impact on latency.
4037  *           If there is a need to use the same prefix multiple times, consider loadDictionary instead.
4038  *  Note 4 : By default, the prefix is interpreted as raw content (ZSTD_dct_rawContent).
4039  *           Use experimental ZSTD_CCtx_refPrefix_advanced() to alter dictionary interpretation. */
4040 ZSTDLIB_API size_t ZSTD_CCtx_refPrefix(ZSTD_CCtx* cctx,
4041                                  const void* prefix, size_t prefixSize);
4042
4043 /*! ZSTD_DCtx_loadDictionary() :
4044  *  Create an internal DDict from dict buffer,
4045  *  to be used to decompress next frames.
4046  *  The dictionary remains valid for all future frames, until explicitly invalidated.
4047  * @result : 0, or an error code (which can be tested with ZSTD_isError()).
4048  *  Special : Adding a NULL (or 0-size) dictionary invalidates any previous dictionary,
4049  *            meaning "return to no-dictionary mode".
4050  *  Note 1 : Loading a dictionary involves building tables,
4051  *           which has a non-negligible impact on CPU usage and latency.
4052  *           It's recommended to "load once, use many times", to amortize the cost
4053  *  Note 2 :`dict` content will be copied internally, so `dict` can be released after loading.
4054  *           Use ZSTD_DCtx_loadDictionary_byReference() to reference dictionary content instead.
4055  *  Note 3 : Use ZSTD_DCtx_loadDictionary_advanced() to take control of
4056  *           how dictionary content is loaded and interpreted.
4057  */
4058 ZSTDLIB_API size_t ZSTD_DCtx_loadDictionary(ZSTD_DCtx* dctx, const void* dict, size_t dictSize);
4059
4060 /*! ZSTD_DCtx_refDDict() :
4061  *  Reference a prepared dictionary, to be used to decompress next frames.
4062  *  The dictionary remains active for decompression of future frames using same DCtx.
4063  * @result : 0, or an error code (which can be tested with ZSTD_isError()).
4064  *  Note 1 : Currently, only one dictionary can be managed.
4065  *           Referencing a new dictionary effectively "discards" any previous one.
4066  *  Special: referencing a NULL DDict means "return to no-dictionary mode".
4067  *  Note 2 : DDict is just referenced, its lifetime must outlive its usage from DCtx.
4068  */
4069 ZSTDLIB_API size_t ZSTD_DCtx_refDDict(ZSTD_DCtx* dctx, const ZSTD_DDict* ddict);
4070
4071 /*! ZSTD_DCtx_refPrefix() :
4072  *  Reference a prefix (single-usage dictionary) to decompress next frame.
4073  *  This is the reverse operation of ZSTD_CCtx_refPrefix(),
4074  *  and must use the same prefix as the one used during compression.
4075  *  Prefix is **only used once**. Reference is discarded at end of frame.
4076  *  End of frame is reached when ZSTD_decompressStream() returns 0.
4077  * @result : 0, or an error code (which can be tested with ZSTD_isError()).
4078  *  Note 1 : Adding any prefix (including NULL) invalidates any previously set prefix or dictionary
4079  *  Note 2 : Prefix buffer is referenced. It **must** outlive decompression.
4080  *           Prefix buffer must remain unmodified up to the end of frame,
4081  *           reached when ZSTD_decompressStream() returns 0.
4082  *  Note 3 : By default, the prefix is treated as raw content (ZSTD_dct_rawContent).
4083  *           Use ZSTD_CCtx_refPrefix_advanced() to alter dictMode (Experimental section)
4084  *  Note 4 : Referencing a raw content prefix has almost no cpu nor memory cost.
4085  *           A full dictionary is more costly, as it requires building tables.
4086  */
4087 ZSTDLIB_API size_t ZSTD_DCtx_refPrefix(ZSTD_DCtx* dctx,
4088                                  const void* prefix, size_t prefixSize);
4089
4090 /* ===   Memory management   === */
4091
4092 /*! ZSTD_sizeof_*() :
4093  *  These functions give the _current_ memory usage of selected object.
4094  *  Note that object memory usage can evolve (increase or decrease) over time. */
4095 ZSTDLIB_API size_t ZSTD_sizeof_CCtx(const ZSTD_CCtx* cctx);
4096 ZSTDLIB_API size_t ZSTD_sizeof_DCtx(const ZSTD_DCtx* dctx);
4097 ZSTDLIB_API size_t ZSTD_sizeof_CStream(const ZSTD_CStream* zcs);
4098 ZSTDLIB_API size_t ZSTD_sizeof_DStream(const ZSTD_DStream* zds);
4099 ZSTDLIB_API size_t ZSTD_sizeof_CDict(const ZSTD_CDict* cdict);
4100 ZSTDLIB_API size_t ZSTD_sizeof_DDict(const ZSTD_DDict* ddict);
4101
4102 #endif  /* ZSTD_H_235446 */
4103
4104
4105 /* **************************************************************************************
4106  *   ADVANCED AND EXPERIMENTAL FUNCTIONS
4107  ****************************************************************************************
4108  * The definitions in the following section are considered experimental.
4109  * They are provided for advanced scenarios.
4110  * They should never be used with a dynamic library, as prototypes may change in the future.
4111  * Use them only in association with static linking.
4112  * ***************************************************************************************/
4113
4114 #if defined(ZSTD_STATIC_LINKING_ONLY) && !defined(ZSTD_H_ZSTD_STATIC_LINKING_ONLY)
4115 #define ZSTD_H_ZSTD_STATIC_LINKING_ONLY
4116
4117 /****************************************************************************************
4118  *   experimental API (static linking only)
4119  ****************************************************************************************
4120  * The following symbols and constants
4121  * are not planned to join "stable API" status in the near future.
4122  * They can still change in future versions.
4123  * Some of them are planned to remain in the static_only section indefinitely.
4124  * Some of them might be removed in the future (especially when redundant with existing stable functions)
4125  * ***************************************************************************************/
4126
4127 #define ZSTD_FRAMEHEADERSIZE_PREFIX(format) ((format) == ZSTD_f_zstd1 ? 5 : 1)   /* minimum input size required to query frame header size */
4128 #define ZSTD_FRAMEHEADERSIZE_MIN(format)    ((format) == ZSTD_f_zstd1 ? 6 : 2)
4129 #define ZSTD_FRAMEHEADERSIZE_MAX   18   /* can be useful for static allocation */
4130 #define ZSTD_SKIPPABLEHEADERSIZE    8
4131
4132 /* compression parameter bounds */
4133 #define ZSTD_WINDOWLOG_MAX_32    30
4134 #define ZSTD_WINDOWLOG_MAX_64    31
4135 #define ZSTD_WINDOWLOG_MAX     ((int)(sizeof(size_t) == 4 ? ZSTD_WINDOWLOG_MAX_32 : ZSTD_WINDOWLOG_MAX_64))
4136 #define ZSTD_WINDOWLOG_MIN       10
4137 #define ZSTD_HASHLOG_MAX       ((ZSTD_WINDOWLOG_MAX < 30) ? ZSTD_WINDOWLOG_MAX : 30)
4138 #define ZSTD_HASHLOG_MIN          6
4139 #define ZSTD_CHAINLOG_MAX_32     29
4140 #define ZSTD_CHAINLOG_MAX_64     30
4141 #define ZSTD_CHAINLOG_MAX      ((int)(sizeof(size_t) == 4 ? ZSTD_CHAINLOG_MAX_32 : ZSTD_CHAINLOG_MAX_64))
4142 #define ZSTD_CHAINLOG_MIN        ZSTD_HASHLOG_MIN
4143 #define ZSTD_SEARCHLOG_MAX      (ZSTD_WINDOWLOG_MAX-1)
4144 #define ZSTD_SEARCHLOG_MIN        1
4145 #define ZSTD_MINMATCH_MAX         7   /* only for ZSTD_fast, other strategies are limited to 6 */
4146 #define ZSTD_MINMATCH_MIN         3   /* only for ZSTD_btopt+, faster strategies are limited to 4 */
4147 #define ZSTD_TARGETLENGTH_MAX    ZSTD_BLOCKSIZE_MAX
4148 #define ZSTD_TARGETLENGTH_MIN     0   /* note : comparing this constant to an unsigned results in a tautological test */
4149 #define ZSTD_STRATEGY_MIN        ZSTD_fast
4150 #define ZSTD_STRATEGY_MAX        ZSTD_btultra2
4151
4152
4153 #define ZSTD_OVERLAPLOG_MIN       0
4154 #define ZSTD_OVERLAPLOG_MAX       9
4155
4156 #define ZSTD_WINDOWLOG_LIMIT_DEFAULT 27   /* by default, the streaming decoder will refuse any frame
4157                                            * requiring larger than (1<<ZSTD_WINDOWLOG_LIMIT_DEFAULT) window size,
4158                                            * to preserve host's memory from unreasonable requirements.
4159                                            * This limit can be overridden using ZSTD_DCtx_setParameter(,ZSTD_d_windowLogMax,).
4160                                            * The limit does not apply for one-pass decoders (such as ZSTD_decompress()), since no additional memory is allocated */
4161
4162
4163 /* LDM parameter bounds */
4164 #define ZSTD_LDM_HASHLOG_MIN      ZSTD_HASHLOG_MIN
4165 #define ZSTD_LDM_HASHLOG_MAX      ZSTD_HASHLOG_MAX
4166 #define ZSTD_LDM_MINMATCH_MIN        4
4167 #define ZSTD_LDM_MINMATCH_MAX     4096
4168 #define ZSTD_LDM_BUCKETSIZELOG_MIN   1
4169 #define ZSTD_LDM_BUCKETSIZELOG_MAX   8
4170 #define ZSTD_LDM_HASHRATELOG_MIN     0
4171 #define ZSTD_LDM_HASHRATELOG_MAX (ZSTD_WINDOWLOG_MAX - ZSTD_HASHLOG_MIN)
4172
4173 /* Advanced parameter bounds */
4174 #define ZSTD_TARGETCBLOCKSIZE_MIN   64
4175 #define ZSTD_TARGETCBLOCKSIZE_MAX   ZSTD_BLOCKSIZE_MAX
4176 #define ZSTD_SRCSIZEHINT_MIN        0
4177 #define ZSTD_SRCSIZEHINT_MAX        INT_MAX
4178
4179 /* internal */
4180 #define ZSTD_HASHLOG3_MAX           17
4181
4182
4183 /* ---  Advanced types  --- */
4184
4185 typedef struct ZSTD_CCtx_params_s ZSTD_CCtx_params;
4186
4187 typedef struct {
4188     unsigned int matchPos; /* Match pos in dst */
4189     /* If seqDef.offset > 3, then this is seqDef.offset - 3
4190      * If seqDef.offset < 3, then this is the corresponding repeat offset
4191      * But if seqDef.offset < 3 and litLength == 0, this is the
4192      *   repeat offset before the corresponding repeat offset
4193      * And if seqDef.offset == 3 and litLength == 0, this is the
4194      *   most recent repeat offset - 1
4195      */
4196     unsigned int offset;
4197     unsigned int litLength; /* Literal length */
4198     unsigned int matchLength; /* Match length */
4199     /* 0 when seq not rep and seqDef.offset otherwise
4200      * when litLength == 0 this will be <= 4, otherwise <= 3 like normal
4201      */
4202     unsigned int rep;
4203 } ZSTD_Sequence;
4204
4205 typedef struct {
4206     unsigned windowLog;       /**< largest match distance : larger == more compression, more memory needed during decompression */
4207     unsigned chainLog;        /**< fully searched segment : larger == more compression, slower, more memory (useless for fast) */
4208     unsigned hashLog;         /**< dispatch table : larger == faster, more memory */
4209     unsigned searchLog;       /**< nb of searches : larger == more compression, slower */
4210     unsigned minMatch;        /**< match length searched : larger == faster decompression, sometimes less compression */
4211     unsigned targetLength;    /**< acceptable match size for optimal parser (only) : larger == more compression, slower */
4212     ZSTD_strategy strategy;   /**< see ZSTD_strategy definition above */
4213 } ZSTD_compressionParameters;
4214
4215 typedef struct {
4216     int contentSizeFlag; /**< 1: content size will be in frame header (when known) */
4217     int checksumFlag;    /**< 1: generate a 32-bits checksum using XXH64 algorithm at end of frame, for error detection */
4218     int noDictIDFlag;    /**< 1: no dictID will be saved into frame header (dictID is only useful for dictionary compression) */
4219 } ZSTD_frameParameters;
4220
4221 typedef struct {
4222     ZSTD_compressionParameters cParams;
4223     ZSTD_frameParameters fParams;
4224 } ZSTD_parameters;
4225
4226 typedef enum {
4227     ZSTD_dct_auto = 0,       /* dictionary is "full" when starting with ZSTD_MAGIC_DICTIONARY, otherwise it is "rawContent" */
4228     ZSTD_dct_rawContent = 1, /* ensures dictionary is always loaded as rawContent, even if it starts with ZSTD_MAGIC_DICTIONARY */
4229     ZSTD_dct_fullDict = 2    /* refuses to load a dictionary if it does not respect Zstandard's specification, starting with ZSTD_MAGIC_DICTIONARY */
4230 } ZSTD_dictContentType_e;
4231
4232 typedef enum {
4233     ZSTD_dlm_byCopy = 0,  /**< Copy dictionary content internally */
4234     ZSTD_dlm_byRef = 1    /**< Reference dictionary content -- the dictionary buffer must outlive its users. */
4235 } ZSTD_dictLoadMethod_e;
4236
4237 typedef enum {
4238     ZSTD_f_zstd1 = 0,           /* zstd frame format, specified in zstd_compression_format.md (default) */
4239     ZSTD_f_zstd1_magicless = 1  /* Variant of zstd frame format, without initial 4-bytes magic number.
4240                                  * Useful to save 4 bytes per generated frame.
4241                                  * Decoder cannot recognise automatically this format, requiring this instruction. */
4242 } ZSTD_format_e;
4243
4244 typedef enum {
4245     /* Note: this enum and the behavior it controls are effectively internal
4246      * implementation details of the compressor. They are expected to continue
4247      * to evolve and should be considered only in the context of extremely
4248      * advanced performance tuning.
4249      *
4250      * Zstd currently supports the use of a CDict in three ways:
4251      *
4252      * - The contents of the CDict can be copied into the working context. This
4253      *   means that the compression can search both the dictionary and input
4254      *   while operating on a single set of internal tables. This makes
4255      *   the compression faster per-byte of input. However, the initial copy of
4256      *   the CDict's tables incurs a fixed cost at the beginning of the
4257      *   compression. For small compressions (< 8 KB), that copy can dominate
4258      *   the cost of the compression.
4259      *
4260      * - The CDict's tables can be used in-place. In this model, compression is
4261      *   slower per input byte, because the compressor has to search two sets of
4262      *   tables. However, this model incurs no start-up cost (as long as the
4263      *   working context's tables can be reused). For small inputs, this can be
4264      *   faster than copying the CDict's tables.
4265      *
4266      * - The CDict's tables are not used at all, and instead we use the working
4267      *   context alone to reload the dictionary and use params based on the source
4268      *   size. See ZSTD_compress_insertDictionary() and ZSTD_compress_usingDict().
4269      *   This method is effective when the dictionary sizes are very small relative
4270      *   to the input size, and the input size is fairly large to begin with.
4271      *
4272      * Zstd has a simple internal heuristic that selects which strategy to use
4273      * at the beginning of a compression. However, if experimentation shows that
4274      * Zstd is making poor choices, it is possible to override that choice with
4275      * this enum.
4276      */
4277     ZSTD_dictDefaultAttach = 0, /* Use the default heuristic. */
4278     ZSTD_dictForceAttach   = 1, /* Never copy the dictionary. */
4279     ZSTD_dictForceCopy     = 2, /* Always copy the dictionary. */
4280     ZSTD_dictForceLoad     = 3  /* Always reload the dictionary */
4281 } ZSTD_dictAttachPref_e;
4282
4283 typedef enum {
4284   ZSTD_lcm_auto = 0,          /**< Automatically determine the compression mode based on the compression level.
4285                                *   Negative compression levels will be uncompressed, and positive compression
4286                                *   levels will be compressed. */
4287   ZSTD_lcm_huffman = 1,       /**< Always attempt Huffman compression. Uncompressed literals will still be
4288                                *   emitted if Huffman compression is not profitable. */
4289   ZSTD_lcm_uncompressed = 2   /**< Always emit uncompressed literals. */
4290 } ZSTD_literalCompressionMode_e;
4291
4292
4293 /***************************************
4294 *  Frame size functions
4295 ***************************************/
4296
4297 /*! ZSTD_findDecompressedSize() :
4298  *  `src` should point to the start of a series of ZSTD encoded and/or skippable frames
4299  *  `srcSize` must be the _exact_ size of this series
4300  *       (i.e. there should be a frame boundary at `src + srcSize`)
4301  *  @return : - decompressed size of all data in all successive frames
4302  *            - if the decompressed size cannot be determined: ZSTD_CONTENTSIZE_UNKNOWN
4303  *            - if an error occurred: ZSTD_CONTENTSIZE_ERROR
4304  *
4305  *   note 1 : decompressed size is an optional field, that may not be present, especially in streaming mode.
4306  *            When `return==ZSTD_CONTENTSIZE_UNKNOWN`, data to decompress could be any size.
4307  *            In which case, it's necessary to use streaming mode to decompress data.
4308  *   note 2 : decompressed size is always present when compression is done with ZSTD_compress()
4309  *   note 3 : decompressed size can be very large (64-bits value),
4310  *            potentially larger than what local system can handle as a single memory segment.
4311  *            In which case, it's necessary to use streaming mode to decompress data.
4312  *   note 4 : If source is untrusted, decompressed size could be wrong or intentionally modified.
4313  *            Always ensure result fits within application's authorized limits.
4314  *            Each application can set its own limits.
4315  *   note 5 : ZSTD_findDecompressedSize handles multiple frames, and so it must traverse the input to
4316  *            read each contained frame header.  This is fast as most of the data is skipped,
4317  *            however it does mean that all frame data must be present and valid. */
4318 ZSTDLIB_API unsigned long long ZSTD_findDecompressedSize(const void* src, size_t srcSize);
4319
4320 /*! ZSTD_decompressBound() :
4321  *  `src` should point to the start of a series of ZSTD encoded and/or skippable frames
4322  *  `srcSize` must be the _exact_ size of this series
4323  *       (i.e. there should be a frame boundary at `src + srcSize`)
4324  *  @return : - upper-bound for the decompressed size of all data in all successive frames
4325  *            - if an error occured: ZSTD_CONTENTSIZE_ERROR
4326  *
4327  *  note 1  : an error can occur if `src` contains an invalid or incorrectly formatted frame.
4328  *  note 2  : the upper-bound is exact when the decompressed size field is available in every ZSTD encoded frame of `src`.
4329  *            in this case, `ZSTD_findDecompressedSize` and `ZSTD_decompressBound` return the same value.
4330  *  note 3  : when the decompressed size field isn't available, the upper-bound for that frame is calculated by:
4331  *              upper-bound = # blocks * min(128 KB, Window_Size)
4332  */
4333 ZSTDLIB_API unsigned long long ZSTD_decompressBound(const void* src, size_t srcSize);
4334
4335 /*! ZSTD_frameHeaderSize() :
4336  *  srcSize must be >= ZSTD_FRAMEHEADERSIZE_PREFIX.
4337  * @return : size of the Frame Header,
4338  *           or an error code (if srcSize is too small) */
4339 ZSTDLIB_API size_t ZSTD_frameHeaderSize(const void* src, size_t srcSize);
4340
4341 /*! ZSTD_getSequences() :
4342  * Extract sequences from the sequence store
4343  * zc can be used to insert custom compression params.
4344  * This function invokes ZSTD_compress2
4345  * @return : number of sequences extracted
4346  */
4347 ZSTDLIB_API size_t ZSTD_getSequences(ZSTD_CCtx* zc, ZSTD_Sequence* outSeqs,
4348     size_t outSeqsSize, const void* src, size_t srcSize);
4349
4350
4351 /***************************************
4352 *  Memory management
4353 ***************************************/
4354
4355 /*! ZSTD_estimate*() :
4356  *  These functions make it possible to estimate memory usage
4357  *  of a future {D,C}Ctx, before its creation.
4358  *
4359  *  ZSTD_estimateCCtxSize() will provide a memory budget large enough
4360  *  for any compression level up to selected one.
4361  *  Note : Unlike ZSTD_estimateCStreamSize*(), this estimate
4362  *         does not include space for a window buffer.
4363  *         Therefore, the estimation is only guaranteed for single-shot compressions, not streaming.
4364  *  The estimate will assume the input may be arbitrarily large,
4365  *  which is the worst case.
4366  *
4367  *  When srcSize can be bound by a known and rather "small" value,
4368  *  this fact can be used to provide a tighter estimation
4369  *  because the CCtx compression context will need less memory.
4370  *  This tighter estimation can be provided by more advanced functions
4371  *  ZSTD_estimateCCtxSize_usingCParams(), which can be used in tandem with ZSTD_getCParams(),
4372  *  and ZSTD_estimateCCtxSize_usingCCtxParams(), which can be used in tandem with ZSTD_CCtxParams_setParameter().
4373  *  Both can be used to estimate memory using custom compression parameters and arbitrary srcSize limits.
4374  *
4375  *  Note 2 : only single-threaded compression is supported.
4376  *  ZSTD_estimateCCtxSize_usingCCtxParams() will return an error code if ZSTD_c_nbWorkers is >= 1.
4377  */
4378 ZSTDLIB_API size_t ZSTD_estimateCCtxSize(int compressionLevel);
4379 ZSTDLIB_API size_t ZSTD_estimateCCtxSize_usingCParams(ZSTD_compressionParameters cParams);
4380 ZSTDLIB_API size_t ZSTD_estimateCCtxSize_usingCCtxParams(const ZSTD_CCtx_params* params);
4381 ZSTDLIB_API size_t ZSTD_estimateDCtxSize(void);
4382
4383 /*! ZSTD_estimateCStreamSize() :
4384  *  ZSTD_estimateCStreamSize() will provide a budget large enough for any compression level up to selected one.
4385  *  It will also consider src size to be arbitrarily "large", which is worst case.
4386  *  If srcSize is known to always be small, ZSTD_estimateCStreamSize_usingCParams() can provide a tighter estimation.
4387  *  ZSTD_estimateCStreamSize_usingCParams() can be used in tandem with ZSTD_getCParams() to create cParams from compressionLevel.
4388  *  ZSTD_estimateCStreamSize_usingCCtxParams() can be used in tandem with ZSTD_CCtxParams_setParameter(). Only single-threaded compression is supported. This function will return an error code if ZSTD_c_nbWorkers is >= 1.
4389  *  Note : CStream size estimation is only correct for single-threaded compression.
4390  *  ZSTD_DStream memory budget depends on window Size.
4391  *  This information can be passed manually, using ZSTD_estimateDStreamSize,
4392  *  or deducted from a valid frame Header, using ZSTD_estimateDStreamSize_fromFrame();
4393  *  Note : if streaming is init with function ZSTD_init?Stream_usingDict(),
4394  *         an internal ?Dict will be created, which additional size is not estimated here.
4395  *         In this case, get total size by adding ZSTD_estimate?DictSize */
4396 ZSTDLIB_API size_t ZSTD_estimateCStreamSize(int compressionLevel);
4397 ZSTDLIB_API size_t ZSTD_estimateCStreamSize_usingCParams(ZSTD_compressionParameters cParams);
4398 ZSTDLIB_API size_t ZSTD_estimateCStreamSize_usingCCtxParams(const ZSTD_CCtx_params* params);
4399 ZSTDLIB_API size_t ZSTD_estimateDStreamSize(size_t windowSize);
4400 ZSTDLIB_API size_t ZSTD_estimateDStreamSize_fromFrame(const void* src, size_t srcSize);
4401
4402 /*! ZSTD_estimate?DictSize() :
4403  *  ZSTD_estimateCDictSize() will bet that src size is relatively "small", and content is copied, like ZSTD_createCDict().
4404  *  ZSTD_estimateCDictSize_advanced() makes it possible to control compression parameters precisely, like ZSTD_createCDict_advanced().
4405  *  Note : dictionaries created by reference (`ZSTD_dlm_byRef`) are logically smaller.
4406  */
4407 ZSTDLIB_API size_t ZSTD_estimateCDictSize(size_t dictSize, int compressionLevel);
4408 ZSTDLIB_API size_t ZSTD_estimateCDictSize_advanced(size_t dictSize, ZSTD_compressionParameters cParams, ZSTD_dictLoadMethod_e dictLoadMethod);
4409 ZSTDLIB_API size_t ZSTD_estimateDDictSize(size_t dictSize, ZSTD_dictLoadMethod_e dictLoadMethod);
4410
4411 /*! ZSTD_initStatic*() :
4412  *  Initialize an object using a pre-allocated fixed-size buffer.
4413  *  workspace: The memory area to emplace the object into.
4414  *             Provided pointer *must be 8-bytes aligned*.
4415  *             Buffer must outlive object.
4416  *  workspaceSize: Use ZSTD_estimate*Size() to determine
4417  *                 how large workspace must be to support target scenario.
4418  * @return : pointer to object (same address as workspace, just different type),
4419  *           or NULL if error (size too small, incorrect alignment, etc.)
4420  *  Note : zstd will never resize nor malloc() when using a static buffer.
4421  *         If the object requires more memory than available,
4422  *         zstd will just error out (typically ZSTD_error_memory_allocation).
4423  *  Note 2 : there is no corresponding "free" function.
4424  *           Since workspace is allocated externally, it must be freed externally too.
4425  *  Note 3 : cParams : use ZSTD_getCParams() to convert a compression level
4426  *           into its associated cParams.
4427  *  Limitation 1 : currently not compatible with internal dictionary creation, triggered by
4428  *                 ZSTD_CCtx_loadDictionary(), ZSTD_initCStream_usingDict() or ZSTD_initDStream_usingDict().
4429  *  Limitation 2 : static cctx currently not compatible with multi-threading.
4430  *  Limitation 3 : static dctx is incompatible with legacy support.
4431  */
4432 ZSTDLIB_API ZSTD_CCtx*    ZSTD_initStaticCCtx(void* workspace, size_t workspaceSize);
4433 ZSTDLIB_API ZSTD_CStream* ZSTD_initStaticCStream(void* workspace, size_t workspaceSize);    /**< same as ZSTD_initStaticCCtx() */
4434
4435 ZSTDLIB_API ZSTD_DCtx*    ZSTD_initStaticDCtx(void* workspace, size_t workspaceSize);
4436 ZSTDLIB_API ZSTD_DStream* ZSTD_initStaticDStream(void* workspace, size_t workspaceSize);    /**< same as ZSTD_initStaticDCtx() */
4437
4438 ZSTDLIB_API const ZSTD_CDict* ZSTD_initStaticCDict(
4439                                         void* workspace, size_t workspaceSize,
4440                                         const void* dict, size_t dictSize,
4441                                         ZSTD_dictLoadMethod_e dictLoadMethod,
4442                                         ZSTD_dictContentType_e dictContentType,
4443                                         ZSTD_compressionParameters cParams);
4444
4445 ZSTDLIB_API const ZSTD_DDict* ZSTD_initStaticDDict(
4446                                         void* workspace, size_t workspaceSize,
4447                                         const void* dict, size_t dictSize,
4448                                         ZSTD_dictLoadMethod_e dictLoadMethod,
4449                                         ZSTD_dictContentType_e dictContentType);
4450
4451
4452 /*! Custom memory allocation :
4453  *  These prototypes make it possible to pass your own allocation/free functions.
4454  *  ZSTD_customMem is provided at creation time, using ZSTD_create*_advanced() variants listed below.
4455  *  All allocation/free operations will be completed using these custom variants instead of regular <stdlib.h> ones.
4456  */
4457 typedef void* (*ZSTD_allocFunction) (void* opaque, size_t size);
4458 typedef void  (*ZSTD_freeFunction) (void* opaque, void* address);
4459 typedef struct { ZSTD_allocFunction customAlloc; ZSTD_freeFunction customFree; void* opaque; } ZSTD_customMem;
4460 static ZSTD_customMem const ZSTD_defaultCMem = { NULL, NULL, NULL };  /**< this constant defers to stdlib's functions */
4461
4462 ZSTDLIB_API ZSTD_CCtx*    ZSTD_createCCtx_advanced(ZSTD_customMem customMem);
4463 ZSTDLIB_API ZSTD_CStream* ZSTD_createCStream_advanced(ZSTD_customMem customMem);
4464 ZSTDLIB_API ZSTD_DCtx*    ZSTD_createDCtx_advanced(ZSTD_customMem customMem);
4465 ZSTDLIB_API ZSTD_DStream* ZSTD_createDStream_advanced(ZSTD_customMem customMem);
4466
4467 ZSTDLIB_API ZSTD_CDict* ZSTD_createCDict_advanced(const void* dict, size_t dictSize,
4468                                                   ZSTD_dictLoadMethod_e dictLoadMethod,
4469                                                   ZSTD_dictContentType_e dictContentType,
4470                                                   ZSTD_compressionParameters cParams,
4471                                                   ZSTD_customMem customMem);
4472
4473 ZSTDLIB_API ZSTD_DDict* ZSTD_createDDict_advanced(const void* dict, size_t dictSize,
4474                                                   ZSTD_dictLoadMethod_e dictLoadMethod,
4475                                                   ZSTD_dictContentType_e dictContentType,
4476                                                   ZSTD_customMem customMem);
4477
4478
4479
4480 /***************************************
4481 *  Advanced compression functions
4482 ***************************************/
4483
4484 /*! ZSTD_createCDict_byReference() :
4485  *  Create a digested dictionary for compression
4486  *  Dictionary content is just referenced, not duplicated.
4487  *  As a consequence, `dictBuffer` **must** outlive CDict,
4488  *  and its content must remain unmodified throughout the lifetime of CDict.
4489  *  note: equivalent to ZSTD_createCDict_advanced(), with dictLoadMethod==ZSTD_dlm_byRef */
4490 ZSTDLIB_API ZSTD_CDict* ZSTD_createCDict_byReference(const void* dictBuffer, size_t dictSize, int compressionLevel);
4491
4492 /*! ZSTD_getCParams() :
4493  * @return ZSTD_compressionParameters structure for a selected compression level and estimated srcSize.
4494  * `estimatedSrcSize` value is optional, select 0 if not known */
4495 ZSTDLIB_API ZSTD_compressionParameters ZSTD_getCParams(int compressionLevel, unsigned long long estimatedSrcSize, size_t dictSize);
4496
4497 /*! ZSTD_getParams() :
4498  *  same as ZSTD_getCParams(), but @return a full `ZSTD_parameters` object instead of sub-component `ZSTD_compressionParameters`.
4499  *  All fields of `ZSTD_frameParameters` are set to default : contentSize=1, checksum=0, noDictID=0 */
4500 ZSTDLIB_API ZSTD_parameters ZSTD_getParams(int compressionLevel, unsigned long long estimatedSrcSize, size_t dictSize);
4501
4502 /*! ZSTD_checkCParams() :
4503  *  Ensure param values remain within authorized range.
4504  * @return 0 on success, or an error code (can be checked with ZSTD_isError()) */
4505 ZSTDLIB_API size_t ZSTD_checkCParams(ZSTD_compressionParameters params);
4506
4507 /*! ZSTD_adjustCParams() :
4508  *  optimize params for a given `srcSize` and `dictSize`.
4509  * `srcSize` can be unknown, in which case use ZSTD_CONTENTSIZE_UNKNOWN.
4510  * `dictSize` must be `0` when there is no dictionary.
4511  *  cPar can be invalid : all parameters will be clamped within valid range in the @return struct.
4512  *  This function never fails (wide contract) */
4513 ZSTDLIB_API ZSTD_compressionParameters ZSTD_adjustCParams(ZSTD_compressionParameters cPar, unsigned long long srcSize, size_t dictSize);
4514
4515 /*! ZSTD_compress_advanced() :
4516  *  Note : this function is now DEPRECATED.
4517  *         It can be replaced by ZSTD_compress2(), in combination with ZSTD_CCtx_setParameter() and other parameter setters.
4518  *  This prototype will be marked as deprecated and generate compilation warning on reaching v1.5.x */
4519 ZSTDLIB_API size_t ZSTD_compress_advanced(ZSTD_CCtx* cctx,
4520                                           void* dst, size_t dstCapacity,
4521                                     const void* src, size_t srcSize,
4522                                     const void* dict,size_t dictSize,
4523                                           ZSTD_parameters params);
4524
4525 /*! ZSTD_compress_usingCDict_advanced() :
4526  *  Note : this function is now REDUNDANT.
4527  *         It can be replaced by ZSTD_compress2(), in combination with ZSTD_CCtx_loadDictionary() and other parameter setters.
4528  *  This prototype will be marked as deprecated and generate compilation warning in some future version */
4529 ZSTDLIB_API size_t ZSTD_compress_usingCDict_advanced(ZSTD_CCtx* cctx,
4530                                               void* dst, size_t dstCapacity,
4531                                         const void* src, size_t srcSize,
4532                                         const ZSTD_CDict* cdict,
4533                                               ZSTD_frameParameters fParams);
4534
4535
4536 /*! ZSTD_CCtx_loadDictionary_byReference() :
4537  *  Same as ZSTD_CCtx_loadDictionary(), but dictionary content is referenced, instead of being copied into CCtx.
4538  *  It saves some memory, but also requires that `dict` outlives its usage within `cctx` */
4539 ZSTDLIB_API size_t ZSTD_CCtx_loadDictionary_byReference(ZSTD_CCtx* cctx, const void* dict, size_t dictSize);
4540
4541 /*! ZSTD_CCtx_loadDictionary_advanced() :
4542  *  Same as ZSTD_CCtx_loadDictionary(), but gives finer control over
4543  *  how to load the dictionary (by copy ? by reference ?)
4544  *  and how to interpret it (automatic ? force raw mode ? full mode only ?) */
4545 ZSTDLIB_API size_t ZSTD_CCtx_loadDictionary_advanced(ZSTD_CCtx* cctx, const void* dict, size_t dictSize, ZSTD_dictLoadMethod_e dictLoadMethod, ZSTD_dictContentType_e dictContentType);
4546
4547 /*! ZSTD_CCtx_refPrefix_advanced() :
4548  *  Same as ZSTD_CCtx_refPrefix(), but gives finer control over
4549  *  how to interpret prefix content (automatic ? force raw mode (default) ? full mode only ?) */
4550 ZSTDLIB_API size_t ZSTD_CCtx_refPrefix_advanced(ZSTD_CCtx* cctx, const void* prefix, size_t prefixSize, ZSTD_dictContentType_e dictContentType);
4551
4552 /* ===   experimental parameters   === */
4553 /* these parameters can be used with ZSTD_setParameter()
4554  * they are not guaranteed to remain supported in the future */
4555
4556  /* Enables rsyncable mode,
4557   * which makes compressed files more rsync friendly
4558   * by adding periodic synchronization points to the compressed data.
4559   * The target average block size is ZSTD_c_jobSize / 2.
4560   * It's possible to modify the job size to increase or decrease
4561   * the granularity of the synchronization point.
4562   * Once the jobSize is smaller than the window size,
4563   * it will result in compression ratio degradation.
4564   * NOTE 1: rsyncable mode only works when multithreading is enabled.
4565   * NOTE 2: rsyncable performs poorly in combination with long range mode,
4566   * since it will decrease the effectiveness of synchronization points,
4567   * though mileage may vary.
4568   * NOTE 3: Rsyncable mode limits maximum compression speed to ~400 MB/s.
4569   * If the selected compression level is already running significantly slower,
4570   * the overall speed won't be significantly impacted.
4571   */
4572  #define ZSTD_c_rsyncable ZSTD_c_experimentalParam1
4573
4574 /* Select a compression format.
4575  * The value must be of type ZSTD_format_e.
4576  * See ZSTD_format_e enum definition for details */
4577 #define ZSTD_c_format ZSTD_c_experimentalParam2
4578
4579 /* Force back-reference distances to remain < windowSize,
4580  * even when referencing into Dictionary content (default:0) */
4581 #define ZSTD_c_forceMaxWindow ZSTD_c_experimentalParam3
4582
4583 /* Controls whether the contents of a CDict
4584  * are used in place, or copied into the working context.
4585  * Accepts values from the ZSTD_dictAttachPref_e enum.
4586  * See the comments on that enum for an explanation of the feature. */
4587 #define ZSTD_c_forceAttachDict ZSTD_c_experimentalParam4
4588
4589 /* Controls how the literals are compressed (default is auto).
4590  * The value must be of type ZSTD_literalCompressionMode_e.
4591  * See ZSTD_literalCompressionMode_t enum definition for details.
4592  */
4593 #define ZSTD_c_literalCompressionMode ZSTD_c_experimentalParam5
4594
4595 /* Tries to fit compressed block size to be around targetCBlockSize.
4596  * No target when targetCBlockSize == 0.
4597  * There is no guarantee on compressed block size (default:0) */
4598 #define ZSTD_c_targetCBlockSize ZSTD_c_experimentalParam6
4599
4600 /* User's best guess of source size.
4601  * Hint is not valid when srcSizeHint == 0.
4602  * There is no guarantee that hint is close to actual source size,
4603  * but compression ratio may regress significantly if guess considerably underestimates */
4604 #define ZSTD_c_srcSizeHint ZSTD_c_experimentalParam7
4605
4606 /*! ZSTD_CCtx_getParameter() :
4607  *  Get the requested compression parameter value, selected by enum ZSTD_cParameter,
4608  *  and store it into int* value.
4609  * @return : 0, or an error code (which can be tested with ZSTD_isError()).
4610  */
4611 ZSTDLIB_API size_t ZSTD_CCtx_getParameter(ZSTD_CCtx* cctx, ZSTD_cParameter param, int* value);
4612
4613
4614 /*! ZSTD_CCtx_params :
4615  *  Quick howto :
4616  *  - ZSTD_createCCtxParams() : Create a ZSTD_CCtx_params structure
4617  *  - ZSTD_CCtxParams_setParameter() : Push parameters one by one into
4618  *                                     an existing ZSTD_CCtx_params structure.
4619  *                                     This is similar to
4620  *                                     ZSTD_CCtx_setParameter().
4621  *  - ZSTD_CCtx_setParametersUsingCCtxParams() : Apply parameters to
4622  *                                    an existing CCtx.
4623  *                                    These parameters will be applied to
4624  *                                    all subsequent frames.
4625  *  - ZSTD_compressStream2() : Do compression using the CCtx.
4626  *  - ZSTD_freeCCtxParams() : Free the memory.
4627  *
4628  *  This can be used with ZSTD_estimateCCtxSize_advanced_usingCCtxParams()
4629  *  for static allocation of CCtx for single-threaded compression.
4630  */
4631 ZSTDLIB_API ZSTD_CCtx_params* ZSTD_createCCtxParams(void);
4632 ZSTDLIB_API size_t ZSTD_freeCCtxParams(ZSTD_CCtx_params* params);
4633
4634 /*! ZSTD_CCtxParams_reset() :
4635  *  Reset params to default values.
4636  */
4637 ZSTDLIB_API size_t ZSTD_CCtxParams_reset(ZSTD_CCtx_params* params);
4638
4639 /*! ZSTD_CCtxParams_init() :
4640  *  Initializes the compression parameters of cctxParams according to
4641  *  compression level. All other parameters are reset to their default values.
4642  */
4643 ZSTDLIB_API size_t ZSTD_CCtxParams_init(ZSTD_CCtx_params* cctxParams, int compressionLevel);
4644
4645 /*! ZSTD_CCtxParams_init_advanced() :
4646  *  Initializes the compression and frame parameters of cctxParams according to
4647  *  params. All other parameters are reset to their default values.
4648  */
4649 ZSTDLIB_API size_t ZSTD_CCtxParams_init_advanced(ZSTD_CCtx_params* cctxParams, ZSTD_parameters params);
4650
4651 /*! ZSTD_CCtxParams_setParameter() :
4652  *  Similar to ZSTD_CCtx_setParameter.
4653  *  Set one compression parameter, selected by enum ZSTD_cParameter.
4654  *  Parameters must be applied to a ZSTD_CCtx using ZSTD_CCtx_setParametersUsingCCtxParams().
4655  * @result : 0, or an error code (which can be tested with ZSTD_isError()).
4656  */
4657 ZSTDLIB_API size_t ZSTD_CCtxParams_setParameter(ZSTD_CCtx_params* params, ZSTD_cParameter param, int value);
4658
4659 /*! ZSTD_CCtxParams_getParameter() :
4660  * Similar to ZSTD_CCtx_getParameter.
4661  * Get the requested value of one compression parameter, selected by enum ZSTD_cParameter.
4662  * @result : 0, or an error code (which can be tested with ZSTD_isError()).
4663  */
4664 ZSTDLIB_API size_t ZSTD_CCtxParams_getParameter(ZSTD_CCtx_params* params, ZSTD_cParameter param, int* value);
4665
4666 /*! ZSTD_CCtx_setParametersUsingCCtxParams() :
4667  *  Apply a set of ZSTD_CCtx_params to the compression context.
4668  *  This can be done even after compression is started,
4669  *    if nbWorkers==0, this will have no impact until a new compression is started.
4670  *    if nbWorkers>=1, new parameters will be picked up at next job,
4671  *       with a few restrictions (windowLog, pledgedSrcSize, nbWorkers, jobSize, and overlapLog are not updated).
4672  */
4673 ZSTDLIB_API size_t ZSTD_CCtx_setParametersUsingCCtxParams(
4674         ZSTD_CCtx* cctx, const ZSTD_CCtx_params* params);
4675
4676 /*! ZSTD_compressStream2_simpleArgs() :
4677  *  Same as ZSTD_compressStream2(),
4678  *  but using only integral types as arguments.
4679  *  This variant might be helpful for binders from dynamic languages
4680  *  which have troubles handling structures containing memory pointers.
4681  */
4682 ZSTDLIB_API size_t ZSTD_compressStream2_simpleArgs (
4683                             ZSTD_CCtx* cctx,
4684                             void* dst, size_t dstCapacity, size_t* dstPos,
4685                       const void* src, size_t srcSize, size_t* srcPos,
4686                             ZSTD_EndDirective endOp);
4687
4688
4689 /***************************************
4690 *  Advanced decompression functions
4691 ***************************************/
4692
4693 /*! ZSTD_isFrame() :
4694  *  Tells if the content of `buffer` starts with a valid Frame Identifier.
4695  *  Note : Frame Identifier is 4 bytes. If `size < 4`, @return will always be 0.
4696  *  Note 2 : Legacy Frame Identifiers are considered valid only if Legacy Support is enabled.
4697  *  Note 3 : Skippable Frame Identifiers are considered valid. */
4698 ZSTDLIB_API unsigned ZSTD_isFrame(const void* buffer, size_t size);
4699
4700 /*! ZSTD_createDDict_byReference() :
4701  *  Create a digested dictionary, ready to start decompression operation without startup delay.
4702  *  Dictionary content is referenced, and therefore stays in dictBuffer.
4703  *  It is important that dictBuffer outlives DDict,
4704  *  it must remain read accessible throughout the lifetime of DDict */
4705 ZSTDLIB_API ZSTD_DDict* ZSTD_createDDict_byReference(const void* dictBuffer, size_t dictSize);
4706
4707 /*! ZSTD_DCtx_loadDictionary_byReference() :
4708  *  Same as ZSTD_DCtx_loadDictionary(),
4709  *  but references `dict` content instead of copying it into `dctx`.
4710  *  This saves memory if `dict` remains around.,
4711  *  However, it's imperative that `dict` remains accessible (and unmodified) while being used, so it must outlive decompression. */
4712 ZSTDLIB_API size_t ZSTD_DCtx_loadDictionary_byReference(ZSTD_DCtx* dctx, const void* dict, size_t dictSize);
4713
4714 /*! ZSTD_DCtx_loadDictionary_advanced() :
4715  *  Same as ZSTD_DCtx_loadDictionary(),
4716  *  but gives direct control over
4717  *  how to load the dictionary (by copy ? by reference ?)
4718  *  and how to interpret it (automatic ? force raw mode ? full mode only ?). */
4719 ZSTDLIB_API size_t ZSTD_DCtx_loadDictionary_advanced(ZSTD_DCtx* dctx, const void* dict, size_t dictSize, ZSTD_dictLoadMethod_e dictLoadMethod, ZSTD_dictContentType_e dictContentType);
4720
4721 /*! ZSTD_DCtx_refPrefix_advanced() :
4722  *  Same as ZSTD_DCtx_refPrefix(), but gives finer control over
4723  *  how to interpret prefix content (automatic ? force raw mode (default) ? full mode only ?) */
4724 ZSTDLIB_API size_t ZSTD_DCtx_refPrefix_advanced(ZSTD_DCtx* dctx, const void* prefix, size_t prefixSize, ZSTD_dictContentType_e dictContentType);
4725
4726 /*! ZSTD_DCtx_setMaxWindowSize() :
4727  *  Refuses allocating internal buffers for frames requiring a window size larger than provided limit.
4728  *  This protects a decoder context from reserving too much memory for itself (potential attack scenario).
4729  *  This parameter is only useful in streaming mode, since no internal buffer is allocated in single-pass mode.
4730  *  By default, a decompression context accepts all window sizes <= (1 << ZSTD_WINDOWLOG_LIMIT_DEFAULT)
4731  * @return : 0, or an error code (which can be tested using ZSTD_isError()).
4732  */
4733 ZSTDLIB_API size_t ZSTD_DCtx_setMaxWindowSize(ZSTD_DCtx* dctx, size_t maxWindowSize);
4734
4735 /* ZSTD_d_format
4736  * experimental parameter,
4737  * allowing selection between ZSTD_format_e input compression formats
4738  */
4739 #define ZSTD_d_format ZSTD_d_experimentalParam1
4740 /* ZSTD_d_stableOutBuffer
4741  * Experimental parameter.
4742  * Default is 0 == disabled. Set to 1 to enable.
4743  *
4744  * Tells the decompressor that the ZSTD_outBuffer will ALWAYS be the same
4745  * between calls, except for the modifications that zstd makes to pos (the
4746  * caller must not modify pos). This is checked by the decompressor, and
4747  * decompression will fail if it ever changes. Therefore the ZSTD_outBuffer
4748  * MUST be large enough to fit the entire decompressed frame. This will be
4749  * checked when the frame content size is known. The data in the ZSTD_outBuffer
4750  * in the range [dst, dst + pos) MUST not be modified during decompression
4751  * or you will get data corruption.
4752  *
4753  * When this flags is enabled zstd won't allocate an output buffer, because
4754  * it can write directly to the ZSTD_outBuffer, but it will still allocate
4755  * an input buffer large enough to fit any compressed block. This will also
4756  * avoid the memcpy() from the internal output buffer to the ZSTD_outBuffer.
4757  * If you need to avoid the input buffer allocation use the buffer-less
4758  * streaming API.
4759  *
4760  * NOTE: So long as the ZSTD_outBuffer always points to valid memory, using
4761  * this flag is ALWAYS memory safe, and will never access out-of-bounds
4762  * memory. However, decompression WILL fail if you violate the preconditions.
4763  *
4764  * WARNING: The data in the ZSTD_outBuffer in the range [dst, dst + pos) MUST
4765  * not be modified during decompression or you will get data corruption. This
4766  * is because zstd needs to reference data in the ZSTD_outBuffer to regenerate
4767  * matches. Normally zstd maintains its own buffer for this purpose, but passing
4768  * this flag tells zstd to use the user provided buffer.
4769  */
4770 #define ZSTD_d_stableOutBuffer ZSTD_d_experimentalParam2
4771
4772 /*! ZSTD_DCtx_setFormat() :
4773  *  Instruct the decoder context about what kind of data to decode next.
4774  *  This instruction is mandatory to decode data without a fully-formed header,
4775  *  such ZSTD_f_zstd1_magicless for example.
4776  * @return : 0, or an error code (which can be tested using ZSTD_isError()). */
4777 ZSTDLIB_API size_t ZSTD_DCtx_setFormat(ZSTD_DCtx* dctx, ZSTD_format_e format);
4778
4779 /*! ZSTD_decompressStream_simpleArgs() :
4780  *  Same as ZSTD_decompressStream(),
4781  *  but using only integral types as arguments.
4782  *  This can be helpful for binders from dynamic languages
4783  *  which have troubles handling structures containing memory pointers.
4784  */
4785 ZSTDLIB_API size_t ZSTD_decompressStream_simpleArgs (
4786                             ZSTD_DCtx* dctx,
4787                             void* dst, size_t dstCapacity, size_t* dstPos,
4788                       const void* src, size_t srcSize, size_t* srcPos);
4789
4790
4791 /********************************************************************
4792 *  Advanced streaming functions
4793 *  Warning : most of these functions are now redundant with the Advanced API.
4794 *  Once Advanced API reaches "stable" status,
4795 *  redundant functions will be deprecated, and then at some point removed.
4796 ********************************************************************/
4797
4798 /*=====   Advanced Streaming compression functions  =====*/
4799 /**! ZSTD_initCStream_srcSize() :
4800  * This function is deprecated, and equivalent to:
4801  *     ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only);
4802  *     ZSTD_CCtx_refCDict(zcs, NULL); // clear the dictionary (if any)
4803  *     ZSTD_CCtx_setParameter(zcs, ZSTD_c_compressionLevel, compressionLevel);
4804  *     ZSTD_CCtx_setPledgedSrcSize(zcs, pledgedSrcSize);
4805  *
4806  * pledgedSrcSize must be correct. If it is not known at init time, use
4807  * ZSTD_CONTENTSIZE_UNKNOWN. Note that, for compatibility with older programs,
4808  * "0" also disables frame content size field. It may be enabled in the future.
4809  * Note : this prototype will be marked as deprecated and generate compilation warnings on reaching v1.5.x
4810  */
4811 ZSTDLIB_API size_t
4812 ZSTD_initCStream_srcSize(ZSTD_CStream* zcs,
4813                          int compressionLevel,
4814                          unsigned long long pledgedSrcSize);
4815
4816 /**! ZSTD_initCStream_usingDict() :
4817  * This function is deprecated, and is equivalent to:
4818  *     ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only);
4819  *     ZSTD_CCtx_setParameter(zcs, ZSTD_c_compressionLevel, compressionLevel);
4820  *     ZSTD_CCtx_loadDictionary(zcs, dict, dictSize);
4821  *
4822  * Creates of an internal CDict (incompatible with static CCtx), except if
4823  * dict == NULL or dictSize < 8, in which case no dict is used.
4824  * Note: dict is loaded with ZSTD_dct_auto (treated as a full zstd dictionary if
4825  * it begins with ZSTD_MAGIC_DICTIONARY, else as raw content) and ZSTD_dlm_byCopy.
4826  * Note : this prototype will be marked as deprecated and generate compilation warnings on reaching v1.5.x
4827  */
4828 ZSTDLIB_API size_t
4829 ZSTD_initCStream_usingDict(ZSTD_CStream* zcs,
4830                      const void* dict, size_t dictSize,
4831                            int compressionLevel);
4832
4833 /**! ZSTD_initCStream_advanced() :
4834  * This function is deprecated, and is approximately equivalent to:
4835  *     ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only);
4836  *     // Pseudocode: Set each zstd parameter and leave the rest as-is.
4837  *     for ((param, value) : params) {
4838  *         ZSTD_CCtx_setParameter(zcs, param, value);
4839  *     }
4840  *     ZSTD_CCtx_setPledgedSrcSize(zcs, pledgedSrcSize);
4841  *     ZSTD_CCtx_loadDictionary(zcs, dict, dictSize);
4842  *
4843  * dict is loaded with ZSTD_dct_auto and ZSTD_dlm_byCopy.
4844  * pledgedSrcSize must be correct.
4845  * If srcSize is not known at init time, use value ZSTD_CONTENTSIZE_UNKNOWN.
4846  * Note : this prototype will be marked as deprecated and generate compilation warnings on reaching v1.5.x
4847  */
4848 ZSTDLIB_API size_t
4849 ZSTD_initCStream_advanced(ZSTD_CStream* zcs,
4850                     const void* dict, size_t dictSize,
4851                           ZSTD_parameters params,
4852                           unsigned long long pledgedSrcSize);
4853
4854 /**! ZSTD_initCStream_usingCDict() :
4855  * This function is deprecated, and equivalent to:
4856  *     ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only);
4857  *     ZSTD_CCtx_refCDict(zcs, cdict);
4858  *
4859  * note : cdict will just be referenced, and must outlive compression session
4860  * Note : this prototype will be marked as deprecated and generate compilation warnings on reaching v1.5.x
4861  */
4862 ZSTDLIB_API size_t ZSTD_initCStream_usingCDict(ZSTD_CStream* zcs, const ZSTD_CDict* cdict);
4863
4864 /**! ZSTD_initCStream_usingCDict_advanced() :
4865  *   This function is DEPRECATED, and is approximately equivalent to:
4866  *     ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only);
4867  *     // Pseudocode: Set each zstd frame parameter and leave the rest as-is.
4868  *     for ((fParam, value) : fParams) {
4869  *         ZSTD_CCtx_setParameter(zcs, fParam, value);
4870  *     }
4871  *     ZSTD_CCtx_setPledgedSrcSize(zcs, pledgedSrcSize);
4872  *     ZSTD_CCtx_refCDict(zcs, cdict);
4873  *
4874  * same as ZSTD_initCStream_usingCDict(), with control over frame parameters.
4875  * pledgedSrcSize must be correct. If srcSize is not known at init time, use
4876  * value ZSTD_CONTENTSIZE_UNKNOWN.
4877  * Note : this prototype will be marked as deprecated and generate compilation warnings on reaching v1.5.x
4878  */
4879 ZSTDLIB_API size_t
4880 ZSTD_initCStream_usingCDict_advanced(ZSTD_CStream* zcs,
4881                                const ZSTD_CDict* cdict,
4882                                      ZSTD_frameParameters fParams,
4883                                      unsigned long long pledgedSrcSize);
4884
4885 /*! ZSTD_resetCStream() :
4886  * This function is deprecated, and is equivalent to:
4887  *     ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only);
4888  *     ZSTD_CCtx_setPledgedSrcSize(zcs, pledgedSrcSize);
4889  *
4890  *  start a new frame, using same parameters from previous frame.
4891  *  This is typically useful to skip dictionary loading stage, since it will re-use it in-place.
4892  *  Note that zcs must be init at least once before using ZSTD_resetCStream().
4893  *  If pledgedSrcSize is not known at reset time, use macro ZSTD_CONTENTSIZE_UNKNOWN.
4894  *  If pledgedSrcSize > 0, its value must be correct, as it will be written in header, and controlled at the end.
4895  *  For the time being, pledgedSrcSize==0 is interpreted as "srcSize unknown" for compatibility with older programs,
4896  *  but it will change to mean "empty" in future version, so use macro ZSTD_CONTENTSIZE_UNKNOWN instead.
4897  * @return : 0, or an error code (which can be tested using ZSTD_isError())
4898  *  Note : this prototype will be marked as deprecated and generate compilation warnings on reaching v1.5.x
4899  */
4900 ZSTDLIB_API size_t ZSTD_resetCStream(ZSTD_CStream* zcs, unsigned long long pledgedSrcSize);
4901
4902
4903 typedef struct {
4904     unsigned long long ingested;   /* nb input bytes read and buffered */
4905     unsigned long long consumed;   /* nb input bytes actually compressed */
4906     unsigned long long produced;   /* nb of compressed bytes generated and buffered */
4907     unsigned long long flushed;    /* nb of compressed bytes flushed : not provided; can be tracked from caller side */
4908     unsigned currentJobID;         /* MT only : latest started job nb */
4909     unsigned nbActiveWorkers;      /* MT only : nb of workers actively compressing at probe time */
4910 } ZSTD_frameProgression;
4911
4912 /* ZSTD_getFrameProgression() :
4913  * tells how much data has been ingested (read from input)
4914  * consumed (input actually compressed) and produced (output) for current frame.
4915  * Note : (ingested - consumed) is amount of input data buffered internally, not yet compressed.
4916  * Aggregates progression inside active worker threads.
4917  */
4918 ZSTDLIB_API ZSTD_frameProgression ZSTD_getFrameProgression(const ZSTD_CCtx* cctx);
4919
4920 /*! ZSTD_toFlushNow() :
4921  *  Tell how many bytes are ready to be flushed immediately.
4922  *  Useful for multithreading scenarios (nbWorkers >= 1).
4923  *  Probe the oldest active job, defined as oldest job not yet entirely flushed,
4924  *  and check its output buffer.
4925  * @return : amount of data stored in oldest job and ready to be flushed immediately.
4926  *  if @return == 0, it means either :
4927  *  + there is no active job (could be checked with ZSTD_frameProgression()), or
4928  *  + oldest job is still actively compressing data,
4929  *    but everything it has produced has also been flushed so far,
4930  *    therefore flush speed is limited by production speed of oldest job
4931  *    irrespective of the speed of concurrent (and newer) jobs.
4932  */
4933 ZSTDLIB_API size_t ZSTD_toFlushNow(ZSTD_CCtx* cctx);
4934
4935
4936 /*=====   Advanced Streaming decompression functions  =====*/
4937 /**
4938  * This function is deprecated, and is equivalent to:
4939  *
4940  *     ZSTD_DCtx_reset(zds, ZSTD_reset_session_only);
4941  *     ZSTD_DCtx_loadDictionary(zds, dict, dictSize);
4942  *
4943  * note: no dictionary will be used if dict == NULL or dictSize < 8
4944  * Note : this prototype will be marked as deprecated and generate compilation warnings on reaching v1.5.x
4945  */
4946 ZSTDLIB_API size_t ZSTD_initDStream_usingDict(ZSTD_DStream* zds, const void* dict, size_t dictSize);
4947
4948 /**
4949  * This function is deprecated, and is equivalent to:
4950  *
4951  *     ZSTD_DCtx_reset(zds, ZSTD_reset_session_only);
4952  *     ZSTD_DCtx_refDDict(zds, ddict);
4953  *
4954  * note : ddict is referenced, it must outlive decompression session
4955  * Note : this prototype will be marked as deprecated and generate compilation warnings on reaching v1.5.x
4956  */
4957 ZSTDLIB_API size_t ZSTD_initDStream_usingDDict(ZSTD_DStream* zds, const ZSTD_DDict* ddict);
4958
4959 /**
4960  * This function is deprecated, and is equivalent to:
4961  *
4962  *     ZSTD_DCtx_reset(zds, ZSTD_reset_session_only);
4963  *
4964  * re-use decompression parameters from previous init; saves dictionary loading
4965  * Note : this prototype will be marked as deprecated and generate compilation warnings on reaching v1.5.x
4966  */
4967 ZSTDLIB_API size_t ZSTD_resetDStream(ZSTD_DStream* zds);
4968
4969
4970 /*********************************************************************
4971 *  Buffer-less and synchronous inner streaming functions
4972 *
4973 *  This is an advanced API, giving full control over buffer management, for users which need direct control over memory.
4974 *  But it's also a complex one, with several restrictions, documented below.
4975 *  Prefer normal streaming API for an easier experience.
4976 ********************************************************************* */
4977
4978 /**
4979   Buffer-less streaming compression (synchronous mode)
4980
4981   A ZSTD_CCtx object is required to track streaming operations.
4982   Use ZSTD_createCCtx() / ZSTD_freeCCtx() to manage resource.
4983   ZSTD_CCtx object can be re-used multiple times within successive compression operations.
4984
4985   Start by initializing a context.
4986   Use ZSTD_compressBegin(), or ZSTD_compressBegin_usingDict() for dictionary compression,
4987   or ZSTD_compressBegin_advanced(), for finer parameter control.
4988   It's also possible to duplicate a reference context which has already been initialized, using ZSTD_copyCCtx()
4989
4990   Then, consume your input using ZSTD_compressContinue().
4991   There are some important considerations to keep in mind when using this advanced function :
4992   - ZSTD_compressContinue() has no internal buffer. It uses externally provided buffers only.
4993   - Interface is synchronous : input is consumed entirely and produces 1+ compressed blocks.
4994   - Caller must ensure there is enough space in `dst` to store compressed data under worst case scenario.
4995     Worst case evaluation is provided by ZSTD_compressBound().
4996     ZSTD_compressContinue() doesn't guarantee recover after a failed compression.
4997   - ZSTD_compressContinue() presumes prior input ***is still accessible and unmodified*** (up to maximum distance size, see WindowLog).
4998     It remembers all previous contiguous blocks, plus one separated memory segment (which can itself consists of multiple contiguous blocks)
4999   - ZSTD_compressContinue() detects that prior input has been overwritten when `src` buffer overlaps.
5000     In which case, it will "discard" the relevant memory section from its history.
5001
5002   Finish a frame with ZSTD_compressEnd(), which will write the last block(s) and optional checksum.
5003   It's possible to use srcSize==0, in which case, it will write a final empty block to end the frame.
5004   Without last block mark, frames are considered unfinished (hence corrupted) by compliant decoders.
5005
5006   `ZSTD_CCtx` object can be re-used (ZSTD_compressBegin()) to compress again.
5007 */
5008
5009 /*=====   Buffer-less streaming compression functions  =====*/
5010 ZSTDLIB_API size_t ZSTD_compressBegin(ZSTD_CCtx* cctx, int compressionLevel);
5011 ZSTDLIB_API size_t ZSTD_compressBegin_usingDict(ZSTD_CCtx* cctx, const void* dict, size_t dictSize, int compressionLevel);
5012 ZSTDLIB_API size_t ZSTD_compressBegin_advanced(ZSTD_CCtx* cctx, const void* dict, size_t dictSize, ZSTD_parameters params, unsigned long long pledgedSrcSize); /**< pledgedSrcSize : If srcSize is not known at init time, use ZSTD_CONTENTSIZE_UNKNOWN */
5013 ZSTDLIB_API size_t ZSTD_compressBegin_usingCDict(ZSTD_CCtx* cctx, const ZSTD_CDict* cdict); /**< note: fails if cdict==NULL */
5014 ZSTDLIB_API size_t ZSTD_compressBegin_usingCDict_advanced(ZSTD_CCtx* const cctx, const ZSTD_CDict* const cdict, ZSTD_frameParameters const fParams, unsigned long long const pledgedSrcSize);   /* compression parameters are already set within cdict. pledgedSrcSize must be correct. If srcSize is not known, use macro ZSTD_CONTENTSIZE_UNKNOWN */
5015 ZSTDLIB_API size_t ZSTD_copyCCtx(ZSTD_CCtx* cctx, const ZSTD_CCtx* preparedCCtx, unsigned long long pledgedSrcSize); /**<  note: if pledgedSrcSize is not known, use ZSTD_CONTENTSIZE_UNKNOWN */
5016
5017 ZSTDLIB_API size_t ZSTD_compressContinue(ZSTD_CCtx* cctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize);
5018 ZSTDLIB_API size_t ZSTD_compressEnd(ZSTD_CCtx* cctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize);
5019
5020
5021 /*-
5022   Buffer-less streaming decompression (synchronous mode)
5023
5024   A ZSTD_DCtx object is required to track streaming operations.
5025   Use ZSTD_createDCtx() / ZSTD_freeDCtx() to manage it.
5026   A ZSTD_DCtx object can be re-used multiple times.
5027
5028   First typical operation is to retrieve frame parameters, using ZSTD_getFrameHeader().
5029   Frame header is extracted from the beginning of compressed frame, so providing only the frame's beginning is enough.
5030   Data fragment must be large enough to ensure successful decoding.
5031  `ZSTD_frameHeaderSize_max` bytes is guaranteed to always be large enough.
5032   @result : 0 : successful decoding, the `ZSTD_frameHeader` structure is correctly filled.
5033            >0 : `srcSize` is too small, please provide at least @result bytes on next attempt.
5034            errorCode, which can be tested using ZSTD_isError().
5035
5036   It fills a ZSTD_frameHeader structure with important information to correctly decode the frame,
5037   such as the dictionary ID, content size, or maximum back-reference distance (`windowSize`).
5038   Note that these values could be wrong, either because of data corruption, or because a 3rd party deliberately spoofs false information.
5039   As a consequence, check that values remain within valid application range.
5040   For example, do not allocate memory blindly, check that `windowSize` is within expectation.
5041   Each application can set its own limits, depending on local restrictions.
5042   For extended interoperability, it is recommended to support `windowSize` of at least 8 MB.
5043
5044   ZSTD_decompressContinue() needs previous data blocks during decompression, up to `windowSize` bytes.
5045   ZSTD_decompressContinue() is very sensitive to contiguity,
5046   if 2 blocks don't follow each other, make sure that either the compressor breaks contiguity at the same place,
5047   or that previous contiguous segment is large enough to properly handle maximum back-reference distance.
5048   There are multiple ways to guarantee this condition.
5049
5050   The most memory efficient way is to use a round buffer of sufficient size.
5051   Sufficient size is determined by invoking ZSTD_decodingBufferSize_min(),
5052   which can @return an error code if required value is too large for current system (in 32-bits mode).
5053   In a round buffer methodology, ZSTD_decompressContinue() decompresses each block next to previous one,
5054   up to the moment there is not enough room left in the buffer to guarantee decoding another full block,
5055   which maximum size is provided in `ZSTD_frameHeader` structure, field `blockSizeMax`.
5056   At which point, decoding can resume from the beginning of the buffer.
5057   Note that already decoded data stored in the buffer should be flushed before being overwritten.
5058
5059   There are alternatives possible, for example using two or more buffers of size `windowSize` each, though they consume more memory.
5060
5061   Finally, if you control the compression process, you can also ignore all buffer size rules,
5062   as long as the encoder and decoder progress in "lock-step",
5063   aka use exactly the same buffer sizes, break contiguity at the same place, etc.
5064
5065   Once buffers are setup, start decompression, with ZSTD_decompressBegin().
5066   If decompression requires a dictionary, use ZSTD_decompressBegin_usingDict() or ZSTD_decompressBegin_usingDDict().
5067
5068   Then use ZSTD_nextSrcSizeToDecompress() and ZSTD_decompressContinue() alternatively.
5069   ZSTD_nextSrcSizeToDecompress() tells how many bytes to provide as 'srcSize' to ZSTD_decompressContinue().
5070   ZSTD_decompressContinue() requires this _exact_ amount of bytes, or it will fail.
5071
5072  @result of ZSTD_decompressContinue() is the number of bytes regenerated within 'dst' (necessarily <= dstCapacity).
5073   It can be zero : it just means ZSTD_decompressContinue() has decoded some metadata item.
5074   It can also be an error code, which can be tested with ZSTD_isError().
5075
5076   A frame is fully decoded when ZSTD_nextSrcSizeToDecompress() returns zero.
5077   Context can then be reset to start a new decompression.
5078
5079   Note : it's possible to know if next input to present is a header or a block, using ZSTD_nextInputType().
5080   This information is not required to properly decode a frame.
5081
5082   == Special case : skippable frames ==
5083
5084   Skippable frames allow integration of user-defined data into a flow of concatenated frames.
5085   Skippable frames will be ignored (skipped) by decompressor.
5086   The format of skippable frames is as follows :
5087   a) Skippable frame ID - 4 Bytes, Little endian format, any value from 0x184D2A50 to 0x184D2A5F
5088   b) Frame Size - 4 Bytes, Little endian format, unsigned 32-bits
5089   c) Frame Content - any content (User Data) of length equal to Frame Size
5090   For skippable frames ZSTD_getFrameHeader() returns zfhPtr->frameType==ZSTD_skippableFrame.
5091   For skippable frames ZSTD_decompressContinue() always returns 0 : it only skips the content.
5092 */
5093
5094 /*=====   Buffer-less streaming decompression functions  =====*/
5095 typedef enum { ZSTD_frame, ZSTD_skippableFrame } ZSTD_frameType_e;
5096 typedef struct {
5097     unsigned long long frameContentSize; /* if == ZSTD_CONTENTSIZE_UNKNOWN, it means this field is not available. 0 means "empty" */
5098     unsigned long long windowSize;       /* can be very large, up to <= frameContentSize */
5099     unsigned blockSizeMax;
5100     ZSTD_frameType_e frameType;          /* if == ZSTD_skippableFrame, frameContentSize is the size of skippable content */
5101     unsigned headerSize;
5102     unsigned dictID;
5103     unsigned checksumFlag;
5104 } ZSTD_frameHeader;
5105
5106 /*! ZSTD_getFrameHeader() :
5107  *  decode Frame Header, or requires larger `srcSize`.
5108  * @return : 0, `zfhPtr` is correctly filled,
5109  *          >0, `srcSize` is too small, value is wanted `srcSize` amount,
5110  *           or an error code, which can be tested using ZSTD_isError() */
5111 ZSTDLIB_API size_t ZSTD_getFrameHeader(ZSTD_frameHeader* zfhPtr, const void* src, size_t srcSize);   /**< doesn't consume input */
5112 /*! ZSTD_getFrameHeader_advanced() :
5113  *  same as ZSTD_getFrameHeader(),
5114  *  with added capability to select a format (like ZSTD_f_zstd1_magicless) */
5115 ZSTDLIB_API size_t ZSTD_getFrameHeader_advanced(ZSTD_frameHeader* zfhPtr, const void* src, size_t srcSize, ZSTD_format_e format);
5116 ZSTDLIB_API size_t ZSTD_decodingBufferSize_min(unsigned long long windowSize, unsigned long long frameContentSize);  /**< when frame content size is not known, pass in frameContentSize == ZSTD_CONTENTSIZE_UNKNOWN */
5117
5118 ZSTDLIB_API size_t ZSTD_decompressBegin(ZSTD_DCtx* dctx);
5119 ZSTDLIB_API size_t ZSTD_decompressBegin_usingDict(ZSTD_DCtx* dctx, const void* dict, size_t dictSize);
5120 ZSTDLIB_API size_t ZSTD_decompressBegin_usingDDict(ZSTD_DCtx* dctx, const ZSTD_DDict* ddict);
5121
5122 ZSTDLIB_API size_t ZSTD_nextSrcSizeToDecompress(ZSTD_DCtx* dctx);
5123 ZSTDLIB_API size_t ZSTD_decompressContinue(ZSTD_DCtx* dctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize);
5124
5125 /* misc */
5126 ZSTDLIB_API void   ZSTD_copyDCtx(ZSTD_DCtx* dctx, const ZSTD_DCtx* preparedDCtx);
5127 typedef enum { ZSTDnit_frameHeader, ZSTDnit_blockHeader, ZSTDnit_block, ZSTDnit_lastBlock, ZSTDnit_checksum, ZSTDnit_skippableFrame } ZSTD_nextInputType_e;
5128 ZSTDLIB_API ZSTD_nextInputType_e ZSTD_nextInputType(ZSTD_DCtx* dctx);
5129
5130
5131
5132
5133 /* ============================ */
5134 /**       Block level API       */
5135 /* ============================ */
5136
5137 /*!
5138     Block functions produce and decode raw zstd blocks, without frame metadata.
5139     Frame metadata cost is typically ~12 bytes, which can be non-negligible for very small blocks (< 100 bytes).
5140     But users will have to take in charge needed metadata to regenerate data, such as compressed and content sizes.
5141
5142     A few rules to respect :
5143     - Compressing and decompressing require a context structure
5144       + Use ZSTD_createCCtx() and ZSTD_createDCtx()
5145     - It is necessary to init context before starting
5146       + compression : any ZSTD_compressBegin*() variant, including with dictionary
5147       + decompression : any ZSTD_decompressBegin*() variant, including with dictionary
5148       + copyCCtx() and copyDCtx() can be used too
5149     - Block size is limited, it must be <= ZSTD_getBlockSize() <= ZSTD_BLOCKSIZE_MAX == 128 KB
5150       + If input is larger than a block size, it's necessary to split input data into multiple blocks
5151       + For inputs larger than a single block, consider using regular ZSTD_compress() instead.
5152         Frame metadata is not that costly, and quickly becomes negligible as source size grows larger than a block.
5153     - When a block is considered not compressible enough, ZSTD_compressBlock() result will be 0 (zero) !
5154       ===> In which case, nothing is produced into `dst` !
5155       + User __must__ test for such outcome and deal directly with uncompressed data
5156       + A block cannot be declared incompressible if ZSTD_compressBlock() return value was != 0.
5157         Doing so would mess up with statistics history, leading to potential data corruption.
5158       + ZSTD_decompressBlock() _doesn't accept uncompressed data as input_ !!
5159       + In case of multiple successive blocks, should some of them be uncompressed,
5160         decoder must be informed of their existence in order to follow proper history.
5161         Use ZSTD_insertBlock() for such a case.
5162 */
5163
5164 /*=====   Raw zstd block functions  =====*/
5165 ZSTDLIB_API size_t ZSTD_getBlockSize   (const ZSTD_CCtx* cctx);
5166 ZSTDLIB_API size_t ZSTD_compressBlock  (ZSTD_CCtx* cctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize);
5167 ZSTDLIB_API size_t ZSTD_decompressBlock(ZSTD_DCtx* dctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize);
5168 ZSTDLIB_API size_t ZSTD_insertBlock    (ZSTD_DCtx* dctx, const void* blockStart, size_t blockSize);  /**< insert uncompressed block into `dctx` history. Useful for multi-blocks decompression. */
5169
5170
5171 #endif   /* ZSTD_H_ZSTD_STATIC_LINKING_ONLY */
5172
5173 #if defined (__cplusplus)
5174 }
5175 #endif
5176 /**** ended inlining ../zstd.h ****/
5177 #define FSE_STATIC_LINKING_ONLY
5178 /**** skipping file: fse.h ****/
5179 #define HUF_STATIC_LINKING_ONLY
5180 /**** skipping file: huf.h ****/
5181 #ifndef XXH_STATIC_LINKING_ONLY
5182 #  define XXH_STATIC_LINKING_ONLY  /* XXH64_state_t */
5183 #endif
5184 /**** start inlining xxhash.h ****/
5185 /*
5186  * xxHash - Extremely Fast Hash algorithm
5187  * Header File
5188  * Copyright (c) 2012-2020, Yann Collet, Facebook, Inc.
5189  *
5190  * You can contact the author at :
5191  * - xxHash source repository : https://github.com/Cyan4973/xxHash
5192  *
5193  * This source code is licensed under both the BSD-style license (found in the
5194  * LICENSE file in the root directory of this source tree) and the GPLv2 (found
5195  * in the COPYING file in the root directory of this source tree).
5196  * You may select, at your option, one of the above-listed licenses.
5197 */
5198
5199 /* Notice extracted from xxHash homepage :
5200
5201 xxHash is an extremely fast Hash algorithm, running at RAM speed limits.
5202 It also successfully passes all tests from the SMHasher suite.
5203
5204 Comparison (single thread, Windows Seven 32 bits, using SMHasher on a Core 2 Duo @3GHz)
5205
5206 Name            Speed       Q.Score   Author
5207 xxHash          5.4 GB/s     10
5208 CrapWow         3.2 GB/s      2       Andrew
5209 MumurHash 3a    2.7 GB/s     10       Austin Appleby
5210 SpookyHash      2.0 GB/s     10       Bob Jenkins
5211 SBox            1.4 GB/s      9       Bret Mulvey
5212 Lookup3         1.2 GB/s      9       Bob Jenkins
5213 SuperFastHash   1.2 GB/s      1       Paul Hsieh
5214 CityHash64      1.05 GB/s    10       Pike & Alakuijala
5215 FNV             0.55 GB/s     5       Fowler, Noll, Vo
5216 CRC32           0.43 GB/s     9
5217 MD5-32          0.33 GB/s    10       Ronald L. Rivest
5218 SHA1-32         0.28 GB/s    10
5219
5220 Q.Score is a measure of quality of the hash function.
5221 It depends on successfully passing SMHasher test set.
5222 10 is a perfect score.
5223
5224 A 64-bits version, named XXH64, is available since r35.
5225 It offers much better speed, but for 64-bits applications only.
5226 Name     Speed on 64 bits    Speed on 32 bits
5227 XXH64       13.8 GB/s            1.9 GB/s
5228 XXH32        6.8 GB/s            6.0 GB/s
5229 */
5230
5231 #if defined (__cplusplus)
5232 extern "C" {
5233 #endif
5234
5235 #ifndef XXHASH_H_5627135585666179
5236 #define XXHASH_H_5627135585666179 1
5237
5238
5239 /* ****************************
5240 *  Definitions
5241 ******************************/
5242 #include <stddef.h>   /* size_t */
5243 typedef enum { XXH_OK=0, XXH_ERROR } XXH_errorcode;
5244
5245
5246 /* ****************************
5247 *  API modifier
5248 ******************************/
5249 /** XXH_PRIVATE_API
5250 *   This is useful if you want to include xxhash functions in `static` mode
5251 *   in order to inline them, and remove their symbol from the public list.
5252 *   Methodology :
5253 *     #define XXH_PRIVATE_API
5254 *     #include "xxhash.h"
5255 *   `xxhash.c` is automatically included.
5256 *   It's not useful to compile and link it as a separate module anymore.
5257 */
5258 #ifdef XXH_PRIVATE_API
5259 #  ifndef XXH_STATIC_LINKING_ONLY
5260 #    define XXH_STATIC_LINKING_ONLY
5261 #  endif
5262 #  if defined(__GNUC__)
5263 #    define XXH_PUBLIC_API static __inline __attribute__((unused))
5264 #  elif defined (__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */)
5265 #    define XXH_PUBLIC_API static inline
5266 #  elif defined(_MSC_VER)
5267 #    define XXH_PUBLIC_API static __inline
5268 #  else
5269 #    define XXH_PUBLIC_API static   /* this version may generate warnings for unused static functions; disable the relevant warning */
5270 #  endif
5271 #else
5272 #  define XXH_PUBLIC_API   /* do nothing */
5273 #endif /* XXH_PRIVATE_API */
5274
5275 /*!XXH_NAMESPACE, aka Namespace Emulation :
5276
5277 If you want to include _and expose_ xxHash functions from within your own library,
5278 but also want to avoid symbol collisions with another library which also includes xxHash,
5279
5280 you can use XXH_NAMESPACE, to automatically prefix any public symbol from xxhash library
5281 with the value of XXH_NAMESPACE (so avoid to keep it NULL and avoid numeric values).
5282
5283 Note that no change is required within the calling program as long as it includes `xxhash.h` :
5284 regular symbol name will be automatically translated by this header.
5285 */
5286 #ifdef XXH_NAMESPACE
5287 #  define XXH_CAT(A,B) A##B
5288 #  define XXH_NAME2(A,B) XXH_CAT(A,B)
5289 #  define XXH32 XXH_NAME2(XXH_NAMESPACE, XXH32)
5290 #  define XXH64 XXH_NAME2(XXH_NAMESPACE, XXH64)
5291 #  define XXH_versionNumber XXH_NAME2(XXH_NAMESPACE, XXH_versionNumber)
5292 #  define XXH32_createState XXH_NAME2(XXH_NAMESPACE, XXH32_createState)
5293 #  define XXH64_createState XXH_NAME2(XXH_NAMESPACE, XXH64_createState)
5294 #  define XXH32_freeState XXH_NAME2(XXH_NAMESPACE, XXH32_freeState)
5295 #  define XXH64_freeState XXH_NAME2(XXH_NAMESPACE, XXH64_freeState)
5296 #  define XXH32_reset XXH_NAME2(XXH_NAMESPACE, XXH32_reset)
5297 #  define XXH64_reset XXH_NAME2(XXH_NAMESPACE, XXH64_reset)
5298 #  define XXH32_update XXH_NAME2(XXH_NAMESPACE, XXH32_update)
5299 #  define XXH64_update XXH_NAME2(XXH_NAMESPACE, XXH64_update)
5300 #  define XXH32_digest XXH_NAME2(XXH_NAMESPACE, XXH32_digest)
5301 #  define XXH64_digest XXH_NAME2(XXH_NAMESPACE, XXH64_digest)
5302 #  define XXH32_copyState XXH_NAME2(XXH_NAMESPACE, XXH32_copyState)
5303 #  define XXH64_copyState XXH_NAME2(XXH_NAMESPACE, XXH64_copyState)
5304 #  define XXH32_canonicalFromHash XXH_NAME2(XXH_NAMESPACE, XXH32_canonicalFromHash)
5305 #  define XXH64_canonicalFromHash XXH_NAME2(XXH_NAMESPACE, XXH64_canonicalFromHash)
5306 #  define XXH32_hashFromCanonical XXH_NAME2(XXH_NAMESPACE, XXH32_hashFromCanonical)
5307 #  define XXH64_hashFromCanonical XXH_NAME2(XXH_NAMESPACE, XXH64_hashFromCanonical)
5308 #endif
5309
5310
5311 /* *************************************
5312 *  Version
5313 ***************************************/
5314 #define XXH_VERSION_MAJOR    0
5315 #define XXH_VERSION_MINOR    6
5316 #define XXH_VERSION_RELEASE  2
5317 #define XXH_VERSION_NUMBER  (XXH_VERSION_MAJOR *100*100 + XXH_VERSION_MINOR *100 + XXH_VERSION_RELEASE)
5318 XXH_PUBLIC_API unsigned XXH_versionNumber (void);
5319
5320
5321 /* ****************************
5322 *  Simple Hash Functions
5323 ******************************/
5324 typedef unsigned int       XXH32_hash_t;
5325 typedef unsigned long long XXH64_hash_t;
5326
5327 XXH_PUBLIC_API XXH32_hash_t XXH32 (const void* input, size_t length, unsigned int seed);
5328 XXH_PUBLIC_API XXH64_hash_t XXH64 (const void* input, size_t length, unsigned long long seed);
5329
5330 /*!
5331 XXH32() :
5332     Calculate the 32-bits hash of sequence "length" bytes stored at memory address "input".
5333     The memory between input & input+length must be valid (allocated and read-accessible).
5334     "seed" can be used to alter the result predictably.
5335     Speed on Core 2 Duo @ 3 GHz (single thread, SMHasher benchmark) : 5.4 GB/s
5336 XXH64() :
5337     Calculate the 64-bits hash of sequence of length "len" stored at memory address "input".
5338     "seed" can be used to alter the result predictably.
5339     This function runs 2x faster on 64-bits systems, but slower on 32-bits systems (see benchmark).
5340 */
5341
5342
5343 /* ****************************
5344 *  Streaming Hash Functions
5345 ******************************/
5346 typedef struct XXH32_state_s XXH32_state_t;   /* incomplete type */
5347 typedef struct XXH64_state_s XXH64_state_t;   /* incomplete type */
5348
5349 /*! State allocation, compatible with dynamic libraries */
5350
5351 XXH_PUBLIC_API XXH32_state_t* XXH32_createState(void);
5352 XXH_PUBLIC_API XXH_errorcode  XXH32_freeState(XXH32_state_t* statePtr);
5353
5354 XXH_PUBLIC_API XXH64_state_t* XXH64_createState(void);
5355 XXH_PUBLIC_API XXH_errorcode  XXH64_freeState(XXH64_state_t* statePtr);
5356
5357
5358 /* hash streaming */
5359
5360 XXH_PUBLIC_API XXH_errorcode XXH32_reset  (XXH32_state_t* statePtr, unsigned int seed);
5361 XXH_PUBLIC_API XXH_errorcode XXH32_update (XXH32_state_t* statePtr, const void* input, size_t length);
5362 XXH_PUBLIC_API XXH32_hash_t  XXH32_digest (const XXH32_state_t* statePtr);
5363
5364 XXH_PUBLIC_API XXH_errorcode XXH64_reset  (XXH64_state_t* statePtr, unsigned long long seed);
5365 XXH_PUBLIC_API XXH_errorcode XXH64_update (XXH64_state_t* statePtr, const void* input, size_t length);
5366 XXH_PUBLIC_API XXH64_hash_t  XXH64_digest (const XXH64_state_t* statePtr);
5367
5368 /*
5369 These functions generate the xxHash of an input provided in multiple segments.
5370 Note that, for small input, they are slower than single-call functions, due to state management.
5371 For small input, prefer `XXH32()` and `XXH64()` .
5372
5373 XXH state must first be allocated, using XXH*_createState() .
5374
5375 Start a new hash by initializing state with a seed, using XXH*_reset().
5376
5377 Then, feed the hash state by calling XXH*_update() as many times as necessary.
5378 Obviously, input must be allocated and read accessible.
5379 The function returns an error code, with 0 meaning OK, and any other value meaning there is an error.
5380
5381 Finally, a hash value can be produced anytime, by using XXH*_digest().
5382 This function returns the nn-bits hash as an int or long long.
5383
5384 It's still possible to continue inserting input into the hash state after a digest,
5385 and generate some new hashes later on, by calling again XXH*_digest().
5386
5387 When done, free XXH state space if it was allocated dynamically.
5388 */
5389
5390
5391 /* **************************
5392 *  Utils
5393 ****************************/
5394 #if !(defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L))   /* ! C99 */
5395 #  define restrict   /* disable restrict */
5396 #endif
5397
5398 XXH_PUBLIC_API void XXH32_copyState(XXH32_state_t* restrict dst_state, const XXH32_state_t* restrict src_state);
5399 XXH_PUBLIC_API void XXH64_copyState(XXH64_state_t* restrict dst_state, const XXH64_state_t* restrict src_state);
5400
5401
5402 /* **************************
5403 *  Canonical representation
5404 ****************************/
5405 /* Default result type for XXH functions are primitive unsigned 32 and 64 bits.
5406 *  The canonical representation uses human-readable write convention, aka big-endian (large digits first).
5407 *  These functions allow transformation of hash result into and from its canonical format.
5408 *  This way, hash values can be written into a file / memory, and remain comparable on different systems and programs.
5409 */
5410 typedef struct { unsigned char digest[4]; } XXH32_canonical_t;
5411 typedef struct { unsigned char digest[8]; } XXH64_canonical_t;
5412
5413 XXH_PUBLIC_API void XXH32_canonicalFromHash(XXH32_canonical_t* dst, XXH32_hash_t hash);
5414 XXH_PUBLIC_API void XXH64_canonicalFromHash(XXH64_canonical_t* dst, XXH64_hash_t hash);
5415
5416 XXH_PUBLIC_API XXH32_hash_t XXH32_hashFromCanonical(const XXH32_canonical_t* src);
5417 XXH_PUBLIC_API XXH64_hash_t XXH64_hashFromCanonical(const XXH64_canonical_t* src);
5418
5419 #endif /* XXHASH_H_5627135585666179 */
5420
5421
5422
5423 /* ================================================================================================
5424    This section contains definitions which are not guaranteed to remain stable.
5425    They may change in future versions, becoming incompatible with a different version of the library.
5426    They shall only be used with static linking.
5427    Never use these definitions in association with dynamic linking !
5428 =================================================================================================== */
5429 #if defined(XXH_STATIC_LINKING_ONLY) && !defined(XXH_STATIC_H_3543687687345)
5430 #define XXH_STATIC_H_3543687687345
5431
5432 /* These definitions are only meant to allow allocation of XXH state
5433    statically, on stack, or in a struct for example.
5434    Do not use members directly. */
5435
5436    struct XXH32_state_s {
5437        unsigned total_len_32;
5438        unsigned large_len;
5439        unsigned v1;
5440        unsigned v2;
5441        unsigned v3;
5442        unsigned v4;
5443        unsigned mem32[4];   /* buffer defined as U32 for alignment */
5444        unsigned memsize;
5445        unsigned reserved;   /* never read nor write, will be removed in a future version */
5446    };   /* typedef'd to XXH32_state_t */
5447
5448    struct XXH64_state_s {
5449        unsigned long long total_len;
5450        unsigned long long v1;
5451        unsigned long long v2;
5452        unsigned long long v3;
5453        unsigned long long v4;
5454        unsigned long long mem64[4];   /* buffer defined as U64 for alignment */
5455        unsigned memsize;
5456        unsigned reserved[2];          /* never read nor write, will be removed in a future version */
5457    };   /* typedef'd to XXH64_state_t */
5458
5459
5460 #  ifdef XXH_PRIVATE_API
5461 /**** start inlining xxhash.c ****/
5462 /*
5463  *  xxHash - Fast Hash algorithm
5464  *  Copyright (c) 2012-2020, Yann Collet, Facebook, Inc.
5465  *
5466  *  You can contact the author at :
5467  *  - xxHash homepage: http://www.xxhash.com
5468  *  - xxHash source repository : https://github.com/Cyan4973/xxHash
5469  *
5470  * This source code is licensed under both the BSD-style license (found in the
5471  * LICENSE file in the root directory of this source tree) and the GPLv2 (found
5472  * in the COPYING file in the root directory of this source tree).
5473  * You may select, at your option, one of the above-listed licenses.
5474 */
5475
5476
5477 /* *************************************
5478 *  Tuning parameters
5479 ***************************************/
5480 /*!XXH_FORCE_MEMORY_ACCESS :
5481  * By default, access to unaligned memory is controlled by `memcpy()`, which is safe and portable.
5482  * Unfortunately, on some target/compiler combinations, the generated assembly is sub-optimal.
5483  * The below switch allow to select different access method for improved performance.
5484  * Method 0 (default) : use `memcpy()`. Safe and portable.
5485  * Method 1 : `__packed` statement. It depends on compiler extension (ie, not portable).
5486  *            This method is safe if your compiler supports it, and *generally* as fast or faster than `memcpy`.
5487  * Method 2 : direct access. This method doesn't depend on compiler but violate C standard.
5488  *            It can generate buggy code on targets which do not support unaligned memory accesses.
5489  *            But in some circumstances, it's the only known way to get the most performance (ie GCC + ARMv6)
5490  * See http://stackoverflow.com/a/32095106/646947 for details.
5491  * Prefer these methods in priority order (0 > 1 > 2)
5492  */
5493 #ifndef XXH_FORCE_MEMORY_ACCESS   /* can be defined externally, on command line for example */
5494 #  if defined(__GNUC__) && ( defined(__ARM_ARCH_6__) || defined(__ARM_ARCH_6J__) || defined(__ARM_ARCH_6K__) || defined(__ARM_ARCH_6Z__) || defined(__ARM_ARCH_6ZK__) || defined(__ARM_ARCH_6T2__) )
5495 #    define XXH_FORCE_MEMORY_ACCESS 2
5496 #  elif (defined(__INTEL_COMPILER) && !defined(WIN32)) || \
5497   (defined(__GNUC__) && ( defined(__ARM_ARCH_7__) || defined(__ARM_ARCH_7A__) || defined(__ARM_ARCH_7R__) || defined(__ARM_ARCH_7M__) || defined(__ARM_ARCH_7S__) )) || \
5498   defined(__ICCARM__)
5499 #    define XXH_FORCE_MEMORY_ACCESS 1
5500 #  endif
5501 #endif
5502
5503 /*!XXH_ACCEPT_NULL_INPUT_POINTER :
5504  * If the input pointer is a null pointer, xxHash default behavior is to trigger a memory access error, since it is a bad pointer.
5505  * When this option is enabled, xxHash output for null input pointers will be the same as a null-length input.
5506  * By default, this option is disabled. To enable it, uncomment below define :
5507  */
5508 /* #define XXH_ACCEPT_NULL_INPUT_POINTER 1 */
5509
5510 /*!XXH_FORCE_NATIVE_FORMAT :
5511  * By default, xxHash library provides endian-independent Hash values, based on little-endian convention.
5512  * Results are therefore identical for little-endian and big-endian CPU.
5513  * This comes at a performance cost for big-endian CPU, since some swapping is required to emulate little-endian format.
5514  * Should endian-independence be of no importance for your application, you may set the #define below to 1,
5515  * to improve speed for Big-endian CPU.
5516  * This option has no impact on Little_Endian CPU.
5517  */
5518 #ifndef XXH_FORCE_NATIVE_FORMAT   /* can be defined externally */
5519 #  define XXH_FORCE_NATIVE_FORMAT 0
5520 #endif
5521
5522 /*!XXH_FORCE_ALIGN_CHECK :
5523  * This is a minor performance trick, only useful with lots of very small keys.
5524  * It means : check for aligned/unaligned input.
5525  * The check costs one initial branch per hash; set to 0 when the input data
5526  * is guaranteed to be aligned.
5527  */
5528 #ifndef XXH_FORCE_ALIGN_CHECK /* can be defined externally */
5529 #  if defined(__i386) || defined(_M_IX86) || defined(__x86_64__) || defined(_M_X64)
5530 #    define XXH_FORCE_ALIGN_CHECK 0
5531 #  else
5532 #    define XXH_FORCE_ALIGN_CHECK 1
5533 #  endif
5534 #endif
5535
5536
5537 /* *************************************
5538 *  Includes & Memory related functions
5539 ***************************************/
5540 /* Modify the local functions below should you wish to use some other memory routines */
5541 /* for malloc(), free() */
5542 #include <stdlib.h>
5543 #include <stddef.h>     /* size_t */
5544 static void* XXH_malloc(size_t s) { return malloc(s); }
5545 static void  XXH_free  (void* p)  { free(p); }
5546 /* for memcpy() */
5547 #include <string.h>
5548 static void* XXH_memcpy(void* dest, const void* src, size_t size) { return memcpy(dest,src,size); }
5549
5550 #ifndef XXH_STATIC_LINKING_ONLY
5551 #  define XXH_STATIC_LINKING_ONLY
5552 #endif
5553 /**** skipping file: xxhash.h ****/
5554
5555
5556 /* *************************************
5557 *  Compiler Specific Options
5558 ***************************************/
5559 #if (defined(__GNUC__) && !defined(__STRICT_ANSI__)) || defined(__cplusplus) || defined(__STDC_VERSION__) && __STDC_VERSION__ >= 199901L   /* C99 */
5560 #  define INLINE_KEYWORD inline
5561 #else
5562 #  define INLINE_KEYWORD
5563 #endif
5564
5565 #if defined(__GNUC__) || defined(__ICCARM__)
5566 #  define FORCE_INLINE_ATTR __attribute__((always_inline))
5567 #elif defined(_MSC_VER)
5568 #  define FORCE_INLINE_ATTR __forceinline
5569 #else
5570 #  define FORCE_INLINE_ATTR
5571 #endif
5572
5573 #define FORCE_INLINE_TEMPLATE static INLINE_KEYWORD FORCE_INLINE_ATTR
5574
5575
5576 #ifdef _MSC_VER
5577 #  pragma warning(disable : 4127)      /* disable: C4127: conditional expression is constant */
5578 #endif
5579
5580
5581 /* *************************************
5582 *  Basic Types
5583 ***************************************/
5584 #ifndef MEM_MODULE
5585 # define MEM_MODULE
5586 # if !defined (__VMS) && (defined (__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */) )
5587 #   include <stdint.h>
5588     typedef uint8_t  BYTE;
5589     typedef uint16_t U16;
5590     typedef uint32_t U32;
5591     typedef  int32_t S32;
5592     typedef uint64_t U64;
5593 #  else
5594     typedef unsigned char      BYTE;
5595     typedef unsigned short     U16;
5596     typedef unsigned int       U32;
5597     typedef   signed int       S32;
5598     typedef unsigned long long U64;   /* if your compiler doesn't support unsigned long long, replace by another 64-bit type here. Note that xxhash.h will also need to be updated. */
5599 #  endif
5600 #endif
5601
5602
5603 #if (defined(XXH_FORCE_MEMORY_ACCESS) && (XXH_FORCE_MEMORY_ACCESS==2))
5604
5605 /* Force direct memory access. Only works on CPU which support unaligned memory access in hardware */
5606 static U32 XXH_read32(const void* memPtr) { return *(const U32*) memPtr; }
5607 static U64 XXH_read64(const void* memPtr) { return *(const U64*) memPtr; }
5608
5609 #elif (defined(XXH_FORCE_MEMORY_ACCESS) && (XXH_FORCE_MEMORY_ACCESS==1))
5610
5611 /* __pack instructions are safer, but compiler specific, hence potentially problematic for some compilers */
5612 /* currently only defined for gcc and icc */
5613 typedef union { U32 u32; U64 u64; } __attribute__((packed)) unalign;
5614
5615 static U32 XXH_read32(const void* ptr) { return ((const unalign*)ptr)->u32; }
5616 static U64 XXH_read64(const void* ptr) { return ((const unalign*)ptr)->u64; }
5617
5618 #else
5619
5620 /* portable and safe solution. Generally efficient.
5621  * see : http://stackoverflow.com/a/32095106/646947
5622  */
5623
5624 static U32 XXH_read32(const void* memPtr)
5625 {
5626     U32 val;
5627     memcpy(&val, memPtr, sizeof(val));
5628     return val;
5629 }
5630
5631 static U64 XXH_read64(const void* memPtr)
5632 {
5633     U64 val;
5634     memcpy(&val, memPtr, sizeof(val));
5635     return val;
5636 }
5637
5638 #endif   /* XXH_FORCE_DIRECT_MEMORY_ACCESS */
5639
5640
5641 /* ****************************************
5642 *  Compiler-specific Functions and Macros
5643 ******************************************/
5644 #define GCC_VERSION (__GNUC__ * 100 + __GNUC_MINOR__)
5645
5646 /* Note : although _rotl exists for minGW (GCC under windows), performance seems poor */
5647 #if defined(_MSC_VER)
5648 #  define XXH_rotl32(x,r) _rotl(x,r)
5649 #  define XXH_rotl64(x,r) _rotl64(x,r)
5650 #else
5651 #if defined(__ICCARM__)
5652 #  include <intrinsics.h>
5653 #  define XXH_rotl32(x,r) __ROR(x,(32 - r))
5654 #else
5655 #  define XXH_rotl32(x,r) ((x << r) | (x >> (32 - r)))
5656 #endif
5657 #  define XXH_rotl64(x,r) ((x << r) | (x >> (64 - r)))
5658 #endif
5659
5660 #if defined(_MSC_VER)     /* Visual Studio */
5661 #  define XXH_swap32 _byteswap_ulong
5662 #  define XXH_swap64 _byteswap_uint64
5663 #elif GCC_VERSION >= 403
5664 #  define XXH_swap32 __builtin_bswap32
5665 #  define XXH_swap64 __builtin_bswap64
5666 #else
5667 static U32 XXH_swap32 (U32 x)
5668 {
5669     return  ((x << 24) & 0xff000000 ) |
5670             ((x <<  8) & 0x00ff0000 ) |
5671             ((x >>  8) & 0x0000ff00 ) |
5672             ((x >> 24) & 0x000000ff );
5673 }
5674 static U64 XXH_swap64 (U64 x)
5675 {
5676     return  ((x << 56) & 0xff00000000000000ULL) |
5677             ((x << 40) & 0x00ff000000000000ULL) |
5678             ((x << 24) & 0x0000ff0000000000ULL) |
5679             ((x << 8)  & 0x000000ff00000000ULL) |
5680             ((x >> 8)  & 0x00000000ff000000ULL) |
5681             ((x >> 24) & 0x0000000000ff0000ULL) |
5682             ((x >> 40) & 0x000000000000ff00ULL) |
5683             ((x >> 56) & 0x00000000000000ffULL);
5684 }
5685 #endif
5686
5687
5688 /* *************************************
5689 *  Architecture Macros
5690 ***************************************/
5691 typedef enum { XXH_bigEndian=0, XXH_littleEndian=1 } XXH_endianess;
5692
5693 /* XXH_CPU_LITTLE_ENDIAN can be defined externally, for example on the compiler command line */
5694 #ifndef XXH_CPU_LITTLE_ENDIAN
5695     static const int g_one = 1;
5696 #   define XXH_CPU_LITTLE_ENDIAN   (*(const char*)(&g_one))
5697 #endif
5698
5699
5700 /* ***************************
5701 *  Memory reads
5702 *****************************/
5703 typedef enum { XXH_aligned, XXH_unaligned } XXH_alignment;
5704
5705 FORCE_INLINE_TEMPLATE U32 XXH_readLE32_align(const void* ptr, XXH_endianess endian, XXH_alignment align)
5706 {
5707     if (align==XXH_unaligned)
5708         return endian==XXH_littleEndian ? XXH_read32(ptr) : XXH_swap32(XXH_read32(ptr));
5709     else
5710         return endian==XXH_littleEndian ? *(const U32*)ptr : XXH_swap32(*(const U32*)ptr);
5711 }
5712
5713 FORCE_INLINE_TEMPLATE U32 XXH_readLE32(const void* ptr, XXH_endianess endian)
5714 {
5715     return XXH_readLE32_align(ptr, endian, XXH_unaligned);
5716 }
5717
5718 static U32 XXH_readBE32(const void* ptr)
5719 {
5720     return XXH_CPU_LITTLE_ENDIAN ? XXH_swap32(XXH_read32(ptr)) : XXH_read32(ptr);
5721 }
5722
5723 FORCE_INLINE_TEMPLATE U64 XXH_readLE64_align(const void* ptr, XXH_endianess endian, XXH_alignment align)
5724 {
5725     if (align==XXH_unaligned)
5726         return endian==XXH_littleEndian ? XXH_read64(ptr) : XXH_swap64(XXH_read64(ptr));
5727     else
5728         return endian==XXH_littleEndian ? *(const U64*)ptr : XXH_swap64(*(const U64*)ptr);
5729 }
5730
5731 FORCE_INLINE_TEMPLATE U64 XXH_readLE64(const void* ptr, XXH_endianess endian)
5732 {
5733     return XXH_readLE64_align(ptr, endian, XXH_unaligned);
5734 }
5735
5736 static U64 XXH_readBE64(const void* ptr)
5737 {
5738     return XXH_CPU_LITTLE_ENDIAN ? XXH_swap64(XXH_read64(ptr)) : XXH_read64(ptr);
5739 }
5740
5741
5742 /* *************************************
5743 *  Macros
5744 ***************************************/
5745 #define XXH_STATIC_ASSERT(c)   { enum { XXH_static_assert = 1/(int)(!!(c)) }; }    /* use only *after* variable declarations */
5746
5747
5748 /* *************************************
5749 *  Constants
5750 ***************************************/
5751 static const U32 PRIME32_1 = 2654435761U;
5752 static const U32 PRIME32_2 = 2246822519U;
5753 static const U32 PRIME32_3 = 3266489917U;
5754 static const U32 PRIME32_4 =  668265263U;
5755 static const U32 PRIME32_5 =  374761393U;
5756
5757 static const U64 PRIME64_1 = 11400714785074694791ULL;
5758 static const U64 PRIME64_2 = 14029467366897019727ULL;
5759 static const U64 PRIME64_3 =  1609587929392839161ULL;
5760 static const U64 PRIME64_4 =  9650029242287828579ULL;
5761 static const U64 PRIME64_5 =  2870177450012600261ULL;
5762
5763 XXH_PUBLIC_API unsigned XXH_versionNumber (void) { return XXH_VERSION_NUMBER; }
5764
5765
5766 /* **************************
5767 *  Utils
5768 ****************************/
5769 XXH_PUBLIC_API void XXH32_copyState(XXH32_state_t* restrict dstState, const XXH32_state_t* restrict srcState)
5770 {
5771     memcpy(dstState, srcState, sizeof(*dstState));
5772 }
5773
5774 XXH_PUBLIC_API void XXH64_copyState(XXH64_state_t* restrict dstState, const XXH64_state_t* restrict srcState)
5775 {
5776     memcpy(dstState, srcState, sizeof(*dstState));
5777 }
5778
5779
5780 /* ***************************
5781 *  Simple Hash Functions
5782 *****************************/
5783
5784 static U32 XXH32_round(U32 seed, U32 input)
5785 {
5786     seed += input * PRIME32_2;
5787     seed  = XXH_rotl32(seed, 13);
5788     seed *= PRIME32_1;
5789     return seed;
5790 }
5791
5792 FORCE_INLINE_TEMPLATE U32 XXH32_endian_align(const void* input, size_t len, U32 seed, XXH_endianess endian, XXH_alignment align)
5793 {
5794     const BYTE* p = (const BYTE*)input;
5795     const BYTE* bEnd = p + len;
5796     U32 h32;
5797 #define XXH_get32bits(p) XXH_readLE32_align(p, endian, align)
5798
5799 #ifdef XXH_ACCEPT_NULL_INPUT_POINTER
5800     if (p==NULL) {
5801         len=0;
5802         bEnd=p=(const BYTE*)(size_t)16;
5803     }
5804 #endif
5805
5806     if (len>=16) {
5807         const BYTE* const limit = bEnd - 16;
5808         U32 v1 = seed + PRIME32_1 + PRIME32_2;
5809         U32 v2 = seed + PRIME32_2;
5810         U32 v3 = seed + 0;
5811         U32 v4 = seed - PRIME32_1;
5812
5813         do {
5814             v1 = XXH32_round(v1, XXH_get32bits(p)); p+=4;
5815             v2 = XXH32_round(v2, XXH_get32bits(p)); p+=4;
5816             v3 = XXH32_round(v3, XXH_get32bits(p)); p+=4;
5817             v4 = XXH32_round(v4, XXH_get32bits(p)); p+=4;
5818         } while (p<=limit);
5819
5820         h32 = XXH_rotl32(v1, 1) + XXH_rotl32(v2, 7) + XXH_rotl32(v3, 12) + XXH_rotl32(v4, 18);
5821     } else {
5822         h32  = seed + PRIME32_5;
5823     }
5824
5825     h32 += (U32) len;
5826
5827     while (p+4<=bEnd) {
5828         h32 += XXH_get32bits(p) * PRIME32_3;
5829         h32  = XXH_rotl32(h32, 17) * PRIME32_4 ;
5830         p+=4;
5831     }
5832
5833     while (p<bEnd) {
5834         h32 += (*p) * PRIME32_5;
5835         h32 = XXH_rotl32(h32, 11) * PRIME32_1 ;
5836         p++;
5837     }
5838
5839     h32 ^= h32 >> 15;
5840     h32 *= PRIME32_2;
5841     h32 ^= h32 >> 13;
5842     h32 *= PRIME32_3;
5843     h32 ^= h32 >> 16;
5844
5845     return h32;
5846 }
5847
5848
5849 XXH_PUBLIC_API unsigned int XXH32 (const void* input, size_t len, unsigned int seed)
5850 {
5851 #if 0
5852     /* Simple version, good for code maintenance, but unfortunately slow for small inputs */
5853     XXH32_CREATESTATE_STATIC(state);
5854     XXH32_reset(state, seed);
5855     XXH32_update(state, input, len);
5856     return XXH32_digest(state);
5857 #else
5858     XXH_endianess endian_detected = (XXH_endianess)XXH_CPU_LITTLE_ENDIAN;
5859
5860     if (XXH_FORCE_ALIGN_CHECK) {
5861         if ((((size_t)input) & 3) == 0) {   /* Input is 4-bytes aligned, leverage the speed benefit */
5862             if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT)
5863                 return XXH32_endian_align(input, len, seed, XXH_littleEndian, XXH_aligned);
5864             else
5865                 return XXH32_endian_align(input, len, seed, XXH_bigEndian, XXH_aligned);
5866     }   }
5867
5868     if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT)
5869         return XXH32_endian_align(input, len, seed, XXH_littleEndian, XXH_unaligned);
5870     else
5871         return XXH32_endian_align(input, len, seed, XXH_bigEndian, XXH_unaligned);
5872 #endif
5873 }
5874
5875
5876 static U64 XXH64_round(U64 acc, U64 input)
5877 {
5878     acc += input * PRIME64_2;
5879     acc  = XXH_rotl64(acc, 31);
5880     acc *= PRIME64_1;
5881     return acc;
5882 }
5883
5884 static U64 XXH64_mergeRound(U64 acc, U64 val)
5885 {
5886     val  = XXH64_round(0, val);
5887     acc ^= val;
5888     acc  = acc * PRIME64_1 + PRIME64_4;
5889     return acc;
5890 }
5891
5892 FORCE_INLINE_TEMPLATE U64 XXH64_endian_align(const void* input, size_t len, U64 seed, XXH_endianess endian, XXH_alignment align)
5893 {
5894     const BYTE* p = (const BYTE*)input;
5895     const BYTE* const bEnd = p + len;
5896     U64 h64;
5897 #define XXH_get64bits(p) XXH_readLE64_align(p, endian, align)
5898
5899 #ifdef XXH_ACCEPT_NULL_INPUT_POINTER
5900     if (p==NULL) {
5901         len=0;
5902         bEnd=p=(const BYTE*)(size_t)32;
5903     }
5904 #endif
5905
5906     if (len>=32) {
5907         const BYTE* const limit = bEnd - 32;
5908         U64 v1 = seed + PRIME64_1 + PRIME64_2;
5909         U64 v2 = seed + PRIME64_2;
5910         U64 v3 = seed + 0;
5911         U64 v4 = seed - PRIME64_1;
5912
5913         do {
5914             v1 = XXH64_round(v1, XXH_get64bits(p)); p+=8;
5915             v2 = XXH64_round(v2, XXH_get64bits(p)); p+=8;
5916             v3 = XXH64_round(v3, XXH_get64bits(p)); p+=8;
5917             v4 = XXH64_round(v4, XXH_get64bits(p)); p+=8;
5918         } while (p<=limit);
5919
5920         h64 = XXH_rotl64(v1, 1) + XXH_rotl64(v2, 7) + XXH_rotl64(v3, 12) + XXH_rotl64(v4, 18);
5921         h64 = XXH64_mergeRound(h64, v1);
5922         h64 = XXH64_mergeRound(h64, v2);
5923         h64 = XXH64_mergeRound(h64, v3);
5924         h64 = XXH64_mergeRound(h64, v4);
5925
5926     } else {
5927         h64  = seed + PRIME64_5;
5928     }
5929
5930     h64 += (U64) len;
5931
5932     while (p+8<=bEnd) {
5933         U64 const k1 = XXH64_round(0, XXH_get64bits(p));
5934         h64 ^= k1;
5935         h64  = XXH_rotl64(h64,27) * PRIME64_1 + PRIME64_4;
5936         p+=8;
5937     }
5938
5939     if (p+4<=bEnd) {
5940         h64 ^= (U64)(XXH_get32bits(p)) * PRIME64_1;
5941         h64 = XXH_rotl64(h64, 23) * PRIME64_2 + PRIME64_3;
5942         p+=4;
5943     }
5944
5945     while (p<bEnd) {
5946         h64 ^= (*p) * PRIME64_5;
5947         h64 = XXH_rotl64(h64, 11) * PRIME64_1;
5948         p++;
5949     }
5950
5951     h64 ^= h64 >> 33;
5952     h64 *= PRIME64_2;
5953     h64 ^= h64 >> 29;
5954     h64 *= PRIME64_3;
5955     h64 ^= h64 >> 32;
5956
5957     return h64;
5958 }
5959
5960
5961 XXH_PUBLIC_API unsigned long long XXH64 (const void* input, size_t len, unsigned long long seed)
5962 {
5963 #if 0
5964     /* Simple version, good for code maintenance, but unfortunately slow for small inputs */
5965     XXH64_CREATESTATE_STATIC(state);
5966     XXH64_reset(state, seed);
5967     XXH64_update(state, input, len);
5968     return XXH64_digest(state);
5969 #else
5970     XXH_endianess endian_detected = (XXH_endianess)XXH_CPU_LITTLE_ENDIAN;
5971
5972     if (XXH_FORCE_ALIGN_CHECK) {
5973         if ((((size_t)input) & 7)==0) {  /* Input is aligned, let's leverage the speed advantage */
5974             if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT)
5975                 return XXH64_endian_align(input, len, seed, XXH_littleEndian, XXH_aligned);
5976             else
5977                 return XXH64_endian_align(input, len, seed, XXH_bigEndian, XXH_aligned);
5978     }   }
5979
5980     if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT)
5981         return XXH64_endian_align(input, len, seed, XXH_littleEndian, XXH_unaligned);
5982     else
5983         return XXH64_endian_align(input, len, seed, XXH_bigEndian, XXH_unaligned);
5984 #endif
5985 }
5986
5987
5988 /* **************************************************
5989 *  Advanced Hash Functions
5990 ****************************************************/
5991
5992 XXH_PUBLIC_API XXH32_state_t* XXH32_createState(void)
5993 {
5994     return (XXH32_state_t*)XXH_malloc(sizeof(XXH32_state_t));
5995 }
5996 XXH_PUBLIC_API XXH_errorcode XXH32_freeState(XXH32_state_t* statePtr)
5997 {
5998     XXH_free(statePtr);
5999     return XXH_OK;
6000 }
6001
6002 XXH_PUBLIC_API XXH64_state_t* XXH64_createState(void)
6003 {
6004     return (XXH64_state_t*)XXH_malloc(sizeof(XXH64_state_t));
6005 }
6006 XXH_PUBLIC_API XXH_errorcode XXH64_freeState(XXH64_state_t* statePtr)
6007 {
6008     XXH_free(statePtr);
6009     return XXH_OK;
6010 }
6011
6012
6013 /*** Hash feed ***/
6014
6015 XXH_PUBLIC_API XXH_errorcode XXH32_reset(XXH32_state_t* statePtr, unsigned int seed)
6016 {
6017     XXH32_state_t state;   /* using a local state to memcpy() in order to avoid strict-aliasing warnings */
6018     memset(&state, 0, sizeof(state)-4);   /* do not write into reserved, for future removal */
6019     state.v1 = seed + PRIME32_1 + PRIME32_2;
6020     state.v2 = seed + PRIME32_2;
6021     state.v3 = seed + 0;
6022     state.v4 = seed - PRIME32_1;
6023     memcpy(statePtr, &state, sizeof(state));
6024     return XXH_OK;
6025 }
6026
6027
6028 XXH_PUBLIC_API XXH_errorcode XXH64_reset(XXH64_state_t* statePtr, unsigned long long seed)
6029 {
6030     XXH64_state_t state;   /* using a local state to memcpy() in order to avoid strict-aliasing warnings */
6031     memset(&state, 0, sizeof(state)-8);   /* do not write into reserved, for future removal */
6032     state.v1 = seed + PRIME64_1 + PRIME64_2;
6033     state.v2 = seed + PRIME64_2;
6034     state.v3 = seed + 0;
6035     state.v4 = seed - PRIME64_1;
6036     memcpy(statePtr, &state, sizeof(state));
6037     return XXH_OK;
6038 }
6039
6040
6041 FORCE_INLINE_TEMPLATE XXH_errorcode XXH32_update_endian (XXH32_state_t* state, const void* input, size_t len, XXH_endianess endian)
6042 {
6043     const BYTE* p = (const BYTE*)input;
6044     const BYTE* const bEnd = p + len;
6045
6046 #ifdef XXH_ACCEPT_NULL_INPUT_POINTER
6047     if (input==NULL) return XXH_ERROR;
6048 #endif
6049
6050     state->total_len_32 += (unsigned)len;
6051     state->large_len |= (len>=16) | (state->total_len_32>=16);
6052
6053     if (state->memsize + len < 16)  {   /* fill in tmp buffer */
6054         XXH_memcpy((BYTE*)(state->mem32) + state->memsize, input, len);
6055         state->memsize += (unsigned)len;
6056         return XXH_OK;
6057     }
6058
6059     if (state->memsize) {   /* some data left from previous update */
6060         XXH_memcpy((BYTE*)(state->mem32) + state->memsize, input, 16-state->memsize);
6061         {   const U32* p32 = state->mem32;
6062             state->v1 = XXH32_round(state->v1, XXH_readLE32(p32, endian)); p32++;
6063             state->v2 = XXH32_round(state->v2, XXH_readLE32(p32, endian)); p32++;
6064             state->v3 = XXH32_round(state->v3, XXH_readLE32(p32, endian)); p32++;
6065             state->v4 = XXH32_round(state->v4, XXH_readLE32(p32, endian)); p32++;
6066         }
6067         p += 16-state->memsize;
6068         state->memsize = 0;
6069     }
6070
6071     if (p <= bEnd-16) {
6072         const BYTE* const limit = bEnd - 16;
6073         U32 v1 = state->v1;
6074         U32 v2 = state->v2;
6075         U32 v3 = state->v3;
6076         U32 v4 = state->v4;
6077
6078         do {
6079             v1 = XXH32_round(v1, XXH_readLE32(p, endian)); p+=4;
6080             v2 = XXH32_round(v2, XXH_readLE32(p, endian)); p+=4;
6081             v3 = XXH32_round(v3, XXH_readLE32(p, endian)); p+=4;
6082             v4 = XXH32_round(v4, XXH_readLE32(p, endian)); p+=4;
6083         } while (p<=limit);
6084
6085         state->v1 = v1;
6086         state->v2 = v2;
6087         state->v3 = v3;
6088         state->v4 = v4;
6089     }
6090
6091     if (p < bEnd) {
6092         XXH_memcpy(state->mem32, p, (size_t)(bEnd-p));
6093         state->memsize = (unsigned)(bEnd-p);
6094     }
6095
6096     return XXH_OK;
6097 }
6098
6099 XXH_PUBLIC_API XXH_errorcode XXH32_update (XXH32_state_t* state_in, const void* input, size_t len)
6100 {
6101     XXH_endianess endian_detected = (XXH_endianess)XXH_CPU_LITTLE_ENDIAN;
6102
6103     if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT)
6104         return XXH32_update_endian(state_in, input, len, XXH_littleEndian);
6105     else
6106         return XXH32_update_endian(state_in, input, len, XXH_bigEndian);
6107 }
6108
6109
6110
6111 FORCE_INLINE_TEMPLATE U32 XXH32_digest_endian (const XXH32_state_t* state, XXH_endianess endian)
6112 {
6113     const BYTE * p = (const BYTE*)state->mem32;
6114     const BYTE* const bEnd = (const BYTE*)(state->mem32) + state->memsize;
6115     U32 h32;
6116
6117     if (state->large_len) {
6118         h32 = XXH_rotl32(state->v1, 1) + XXH_rotl32(state->v2, 7) + XXH_rotl32(state->v3, 12) + XXH_rotl32(state->v4, 18);
6119     } else {
6120         h32 = state->v3 /* == seed */ + PRIME32_5;
6121     }
6122
6123     h32 += state->total_len_32;
6124
6125     while (p+4<=bEnd) {
6126         h32 += XXH_readLE32(p, endian) * PRIME32_3;
6127         h32  = XXH_rotl32(h32, 17) * PRIME32_4;
6128         p+=4;
6129     }
6130
6131     while (p<bEnd) {
6132         h32 += (*p) * PRIME32_5;
6133         h32  = XXH_rotl32(h32, 11) * PRIME32_1;
6134         p++;
6135     }
6136
6137     h32 ^= h32 >> 15;
6138     h32 *= PRIME32_2;
6139     h32 ^= h32 >> 13;
6140     h32 *= PRIME32_3;
6141     h32 ^= h32 >> 16;
6142
6143     return h32;
6144 }
6145
6146
6147 XXH_PUBLIC_API unsigned int XXH32_digest (const XXH32_state_t* state_in)
6148 {
6149     XXH_endianess endian_detected = (XXH_endianess)XXH_CPU_LITTLE_ENDIAN;
6150
6151     if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT)
6152         return XXH32_digest_endian(state_in, XXH_littleEndian);
6153     else
6154         return XXH32_digest_endian(state_in, XXH_bigEndian);
6155 }
6156
6157
6158
6159 /* **** XXH64 **** */
6160
6161 FORCE_INLINE_TEMPLATE XXH_errorcode XXH64_update_endian (XXH64_state_t* state, const void* input, size_t len, XXH_endianess endian)
6162 {
6163     const BYTE* p = (const BYTE*)input;
6164     const BYTE* const bEnd = p + len;
6165
6166 #ifdef XXH_ACCEPT_NULL_INPUT_POINTER
6167     if (input==NULL) return XXH_ERROR;
6168 #endif
6169
6170     state->total_len += len;
6171
6172     if (state->memsize + len < 32) {  /* fill in tmp buffer */
6173         if (input != NULL) {
6174             XXH_memcpy(((BYTE*)state->mem64) + state->memsize, input, len);
6175         }
6176         state->memsize += (U32)len;
6177         return XXH_OK;
6178     }
6179
6180     if (state->memsize) {   /* tmp buffer is full */
6181         XXH_memcpy(((BYTE*)state->mem64) + state->memsize, input, 32-state->memsize);
6182         state->v1 = XXH64_round(state->v1, XXH_readLE64(state->mem64+0, endian));
6183         state->v2 = XXH64_round(state->v2, XXH_readLE64(state->mem64+1, endian));
6184         state->v3 = XXH64_round(state->v3, XXH_readLE64(state->mem64+2, endian));
6185         state->v4 = XXH64_round(state->v4, XXH_readLE64(state->mem64+3, endian));
6186         p += 32-state->memsize;
6187         state->memsize = 0;
6188     }
6189
6190     if (p+32 <= bEnd) {
6191         const BYTE* const limit = bEnd - 32;
6192         U64 v1 = state->v1;
6193         U64 v2 = state->v2;
6194         U64 v3 = state->v3;
6195         U64 v4 = state->v4;
6196
6197         do {
6198             v1 = XXH64_round(v1, XXH_readLE64(p, endian)); p+=8;
6199             v2 = XXH64_round(v2, XXH_readLE64(p, endian)); p+=8;
6200             v3 = XXH64_round(v3, XXH_readLE64(p, endian)); p+=8;
6201             v4 = XXH64_round(v4, XXH_readLE64(p, endian)); p+=8;
6202         } while (p<=limit);
6203
6204         state->v1 = v1;
6205         state->v2 = v2;
6206         state->v3 = v3;
6207         state->v4 = v4;
6208     }
6209
6210     if (p < bEnd) {
6211         XXH_memcpy(state->mem64, p, (size_t)(bEnd-p));
6212         state->memsize = (unsigned)(bEnd-p);
6213     }
6214
6215     return XXH_OK;
6216 }
6217
6218 XXH_PUBLIC_API XXH_errorcode XXH64_update (XXH64_state_t* state_in, const void* input, size_t len)
6219 {
6220     XXH_endianess endian_detected = (XXH_endianess)XXH_CPU_LITTLE_ENDIAN;
6221
6222     if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT)
6223         return XXH64_update_endian(state_in, input, len, XXH_littleEndian);
6224     else
6225         return XXH64_update_endian(state_in, input, len, XXH_bigEndian);
6226 }
6227
6228
6229
6230 FORCE_INLINE_TEMPLATE U64 XXH64_digest_endian (const XXH64_state_t* state, XXH_endianess endian)
6231 {
6232     const BYTE * p = (const BYTE*)state->mem64;
6233     const BYTE* const bEnd = (const BYTE*)state->mem64 + state->memsize;
6234     U64 h64;
6235
6236     if (state->total_len >= 32) {
6237         U64 const v1 = state->v1;
6238         U64 const v2 = state->v2;
6239         U64 const v3 = state->v3;
6240         U64 const v4 = state->v4;
6241
6242         h64 = XXH_rotl64(v1, 1) + XXH_rotl64(v2, 7) + XXH_rotl64(v3, 12) + XXH_rotl64(v4, 18);
6243         h64 = XXH64_mergeRound(h64, v1);
6244         h64 = XXH64_mergeRound(h64, v2);
6245         h64 = XXH64_mergeRound(h64, v3);
6246         h64 = XXH64_mergeRound(h64, v4);
6247     } else {
6248         h64  = state->v3 + PRIME64_5;
6249     }
6250
6251     h64 += (U64) state->total_len;
6252
6253     while (p+8<=bEnd) {
6254         U64 const k1 = XXH64_round(0, XXH_readLE64(p, endian));
6255         h64 ^= k1;
6256         h64  = XXH_rotl64(h64,27) * PRIME64_1 + PRIME64_4;
6257         p+=8;
6258     }
6259
6260     if (p+4<=bEnd) {
6261         h64 ^= (U64)(XXH_readLE32(p, endian)) * PRIME64_1;
6262         h64  = XXH_rotl64(h64, 23) * PRIME64_2 + PRIME64_3;
6263         p+=4;
6264     }
6265
6266     while (p<bEnd) {
6267         h64 ^= (*p) * PRIME64_5;
6268         h64  = XXH_rotl64(h64, 11) * PRIME64_1;
6269         p++;
6270     }
6271
6272     h64 ^= h64 >> 33;
6273     h64 *= PRIME64_2;
6274     h64 ^= h64 >> 29;
6275     h64 *= PRIME64_3;
6276     h64 ^= h64 >> 32;
6277
6278     return h64;
6279 }
6280
6281
6282 XXH_PUBLIC_API unsigned long long XXH64_digest (const XXH64_state_t* state_in)
6283 {
6284     XXH_endianess endian_detected = (XXH_endianess)XXH_CPU_LITTLE_ENDIAN;
6285
6286     if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT)
6287         return XXH64_digest_endian(state_in, XXH_littleEndian);
6288     else
6289         return XXH64_digest_endian(state_in, XXH_bigEndian);
6290 }
6291
6292
6293 /* **************************
6294 *  Canonical representation
6295 ****************************/
6296
6297 /*! Default XXH result types are basic unsigned 32 and 64 bits.
6298 *   The canonical representation follows human-readable write convention, aka big-endian (large digits first).
6299 *   These functions allow transformation of hash result into and from its canonical format.
6300 *   This way, hash values can be written into a file or buffer, and remain comparable across different systems and programs.
6301 */
6302
6303 XXH_PUBLIC_API void XXH32_canonicalFromHash(XXH32_canonical_t* dst, XXH32_hash_t hash)
6304 {
6305     XXH_STATIC_ASSERT(sizeof(XXH32_canonical_t) == sizeof(XXH32_hash_t));
6306     if (XXH_CPU_LITTLE_ENDIAN) hash = XXH_swap32(hash);
6307     memcpy(dst, &hash, sizeof(*dst));
6308 }
6309
6310 XXH_PUBLIC_API void XXH64_canonicalFromHash(XXH64_canonical_t* dst, XXH64_hash_t hash)
6311 {
6312     XXH_STATIC_ASSERT(sizeof(XXH64_canonical_t) == sizeof(XXH64_hash_t));
6313     if (XXH_CPU_LITTLE_ENDIAN) hash = XXH_swap64(hash);
6314     memcpy(dst, &hash, sizeof(*dst));
6315 }
6316
6317 XXH_PUBLIC_API XXH32_hash_t XXH32_hashFromCanonical(const XXH32_canonical_t* src)
6318 {
6319     return XXH_readBE32(src);
6320 }
6321
6322 XXH_PUBLIC_API XXH64_hash_t XXH64_hashFromCanonical(const XXH64_canonical_t* src)
6323 {
6324     return XXH_readBE64(src);
6325 }
6326 /**** ended inlining xxhash.c ****/
6327 #  endif
6328
6329 #endif /* XXH_STATIC_LINKING_ONLY && XXH_STATIC_H_3543687687345 */
6330
6331
6332 #if defined (__cplusplus)
6333 }
6334 #endif
6335 /**** ended inlining xxhash.h ****/
6336
6337 #if defined (__cplusplus)
6338 extern "C" {
6339 #endif
6340
6341 /* ---- static assert (debug) --- */
6342 #define ZSTD_STATIC_ASSERT(c) DEBUG_STATIC_ASSERT(c)
6343 #define ZSTD_isError ERR_isError   /* for inlining */
6344 #define FSE_isError  ERR_isError
6345 #define HUF_isError  ERR_isError
6346
6347
6348 /*-*************************************
6349 *  shared macros
6350 ***************************************/
6351 #undef MIN
6352 #undef MAX
6353 #define MIN(a,b) ((a)<(b) ? (a) : (b))
6354 #define MAX(a,b) ((a)>(b) ? (a) : (b))
6355
6356 /**
6357  * Ignore: this is an internal helper.
6358  *
6359  * This is a helper function to help force C99-correctness during compilation.
6360  * Under strict compilation modes, variadic macro arguments can't be empty.
6361  * However, variadic function arguments can be. Using a function therefore lets
6362  * us statically check that at least one (string) argument was passed,
6363  * independent of the compilation flags.
6364  */
6365 static INLINE_KEYWORD UNUSED_ATTR
6366 void _force_has_format_string(const char *format, ...) {
6367   (void)format;
6368 }
6369
6370 /**
6371  * Ignore: this is an internal helper.
6372  *
6373  * We want to force this function invocation to be syntactically correct, but
6374  * we don't want to force runtime evaluation of its arguments.
6375  */
6376 #define _FORCE_HAS_FORMAT_STRING(...) \
6377   if (0) { \
6378     _force_has_format_string(__VA_ARGS__); \
6379   }
6380
6381 /**
6382  * Return the specified error if the condition evaluates to true.
6383  *
6384  * In debug modes, prints additional information.
6385  * In order to do that (particularly, printing the conditional that failed),
6386  * this can't just wrap RETURN_ERROR().
6387  */
6388 #define RETURN_ERROR_IF(cond, err, ...) \
6389   if (cond) { \
6390     RAWLOG(3, "%s:%d: ERROR!: check %s failed, returning %s", \
6391            __FILE__, __LINE__, ZSTD_QUOTE(cond), ZSTD_QUOTE(ERROR(err))); \
6392     _FORCE_HAS_FORMAT_STRING(__VA_ARGS__); \
6393     RAWLOG(3, ": " __VA_ARGS__); \
6394     RAWLOG(3, "\n"); \
6395     return ERROR(err); \
6396   }
6397
6398 /**
6399  * Unconditionally return the specified error.
6400  *
6401  * In debug modes, prints additional information.
6402  */
6403 #define RETURN_ERROR(err, ...) \
6404   do { \
6405     RAWLOG(3, "%s:%d: ERROR!: unconditional check failed, returning %s", \
6406            __FILE__, __LINE__, ZSTD_QUOTE(ERROR(err))); \
6407     _FORCE_HAS_FORMAT_STRING(__VA_ARGS__); \
6408     RAWLOG(3, ": " __VA_ARGS__); \
6409     RAWLOG(3, "\n"); \
6410     return ERROR(err); \
6411   } while(0);
6412
6413 /**
6414  * If the provided expression evaluates to an error code, returns that error code.
6415  *
6416  * In debug modes, prints additional information.
6417  */
6418 #define FORWARD_IF_ERROR(err, ...) \
6419   do { \
6420     size_t const err_code = (err); \
6421     if (ERR_isError(err_code)) { \
6422       RAWLOG(3, "%s:%d: ERROR!: forwarding error in %s: %s", \
6423              __FILE__, __LINE__, ZSTD_QUOTE(err), ERR_getErrorName(err_code)); \
6424       _FORCE_HAS_FORMAT_STRING(__VA_ARGS__); \
6425       RAWLOG(3, ": " __VA_ARGS__); \
6426       RAWLOG(3, "\n"); \
6427       return err_code; \
6428     } \
6429   } while(0);
6430
6431
6432 /*-*************************************
6433 *  Common constants
6434 ***************************************/
6435 #define ZSTD_OPT_NUM    (1<<12)
6436
6437 #define ZSTD_REP_NUM      3                 /* number of repcodes */
6438 #define ZSTD_REP_MOVE     (ZSTD_REP_NUM-1)
6439 static const U32 repStartValue[ZSTD_REP_NUM] = { 1, 4, 8 };
6440
6441 #define KB *(1 <<10)
6442 #define MB *(1 <<20)
6443 #define GB *(1U<<30)
6444
6445 #define BIT7 128
6446 #define BIT6  64
6447 #define BIT5  32
6448 #define BIT4  16
6449 #define BIT1   2
6450 #define BIT0   1
6451
6452 #define ZSTD_WINDOWLOG_ABSOLUTEMIN 10
6453 static const size_t ZSTD_fcs_fieldSize[4] = { 0, 2, 4, 8 };
6454 static const size_t ZSTD_did_fieldSize[4] = { 0, 1, 2, 4 };
6455
6456 #define ZSTD_FRAMEIDSIZE 4   /* magic number size */
6457
6458 #define ZSTD_BLOCKHEADERSIZE 3   /* C standard doesn't allow `static const` variable to be init using another `static const` variable */
6459 static const size_t ZSTD_blockHeaderSize = ZSTD_BLOCKHEADERSIZE;
6460 typedef enum { bt_raw, bt_rle, bt_compressed, bt_reserved } blockType_e;
6461
6462 #define ZSTD_FRAMECHECKSUMSIZE 4
6463
6464 #define MIN_SEQUENCES_SIZE 1 /* nbSeq==0 */
6465 #define MIN_CBLOCK_SIZE (1 /*litCSize*/ + 1 /* RLE or RAW */ + MIN_SEQUENCES_SIZE /* nbSeq==0 */)   /* for a non-null block */
6466
6467 #define HufLog 12
6468 typedef enum { set_basic, set_rle, set_compressed, set_repeat } symbolEncodingType_e;
6469
6470 #define LONGNBSEQ 0x7F00
6471
6472 #define MINMATCH 3
6473
6474 #define Litbits  8
6475 #define MaxLit ((1<<Litbits) - 1)
6476 #define MaxML   52
6477 #define MaxLL   35
6478 #define DefaultMaxOff 28
6479 #define MaxOff  31
6480 #define MaxSeq MAX(MaxLL, MaxML)   /* Assumption : MaxOff < MaxLL,MaxML */
6481 #define MLFSELog    9
6482 #define LLFSELog    9
6483 #define OffFSELog   8
6484 #define MaxFSELog  MAX(MAX(MLFSELog, LLFSELog), OffFSELog)
6485
6486 static const U32 LL_bits[MaxLL+1] = { 0, 0, 0, 0, 0, 0, 0, 0,
6487                                       0, 0, 0, 0, 0, 0, 0, 0,
6488                                       1, 1, 1, 1, 2, 2, 3, 3,
6489                                       4, 6, 7, 8, 9,10,11,12,
6490                                      13,14,15,16 };
6491 static const S16 LL_defaultNorm[MaxLL+1] = { 4, 3, 2, 2, 2, 2, 2, 2,
6492                                              2, 2, 2, 2, 2, 1, 1, 1,
6493                                              2, 2, 2, 2, 2, 2, 2, 2,
6494                                              2, 3, 2, 1, 1, 1, 1, 1,
6495                                             -1,-1,-1,-1 };
6496 #define LL_DEFAULTNORMLOG 6  /* for static allocation */
6497 static const U32 LL_defaultNormLog = LL_DEFAULTNORMLOG;
6498
6499 static const U32 ML_bits[MaxML+1] = { 0, 0, 0, 0, 0, 0, 0, 0,
6500                                       0, 0, 0, 0, 0, 0, 0, 0,
6501                                       0, 0, 0, 0, 0, 0, 0, 0,
6502                                       0, 0, 0, 0, 0, 0, 0, 0,
6503                                       1, 1, 1, 1, 2, 2, 3, 3,
6504                                       4, 4, 5, 7, 8, 9,10,11,
6505                                      12,13,14,15,16 };
6506 static const S16 ML_defaultNorm[MaxML+1] = { 1, 4, 3, 2, 2, 2, 2, 2,
6507                                              2, 1, 1, 1, 1, 1, 1, 1,
6508                                              1, 1, 1, 1, 1, 1, 1, 1,
6509                                              1, 1, 1, 1, 1, 1, 1, 1,
6510                                              1, 1, 1, 1, 1, 1, 1, 1,
6511                                              1, 1, 1, 1, 1, 1,-1,-1,
6512                                             -1,-1,-1,-1,-1 };
6513 #define ML_DEFAULTNORMLOG 6  /* for static allocation */
6514 static const U32 ML_defaultNormLog = ML_DEFAULTNORMLOG;
6515
6516 static const S16 OF_defaultNorm[DefaultMaxOff+1] = { 1, 1, 1, 1, 1, 1, 2, 2,
6517                                                      2, 1, 1, 1, 1, 1, 1, 1,
6518                                                      1, 1, 1, 1, 1, 1, 1, 1,
6519                                                     -1,-1,-1,-1,-1 };
6520 #define OF_DEFAULTNORMLOG 5  /* for static allocation */
6521 static const U32 OF_defaultNormLog = OF_DEFAULTNORMLOG;
6522
6523
6524 /*-*******************************************
6525 *  Shared functions to include for inlining
6526 *********************************************/
6527 static void ZSTD_copy8(void* dst, const void* src) {
6528 #if !defined(ZSTD_NO_INTRINSICS) && defined(__ARM_NEON)
6529     vst1_u8((uint8_t*)dst, vld1_u8((const uint8_t*)src));
6530 #else
6531     memcpy(dst, src, 8);
6532 #endif
6533 }
6534
6535 #define COPY8(d,s) { ZSTD_copy8(d,s); d+=8; s+=8; }
6536 static void ZSTD_copy16(void* dst, const void* src) {
6537 #if !defined(ZSTD_NO_INTRINSICS) && defined(__ARM_NEON)
6538     vst1q_u8((uint8_t*)dst, vld1q_u8((const uint8_t*)src));
6539 #else
6540     memcpy(dst, src, 16);
6541 #endif
6542 }
6543 #define COPY16(d,s) { ZSTD_copy16(d,s); d+=16; s+=16; }
6544
6545 #define WILDCOPY_OVERLENGTH 32
6546 #define WILDCOPY_VECLEN 16
6547
6548 typedef enum {
6549     ZSTD_no_overlap,
6550     ZSTD_overlap_src_before_dst
6551     /*  ZSTD_overlap_dst_before_src, */
6552 } ZSTD_overlap_e;
6553
6554 /*! ZSTD_wildcopy() :
6555  *  Custom version of memcpy(), can over read/write up to WILDCOPY_OVERLENGTH bytes (if length==0)
6556  *  @param ovtype controls the overlap detection
6557  *         - ZSTD_no_overlap: The source and destination are guaranteed to be at least WILDCOPY_VECLEN bytes apart.
6558  *         - ZSTD_overlap_src_before_dst: The src and dst may overlap, but they MUST be at least 8 bytes apart.
6559  *           The src buffer must be before the dst buffer.
6560  */
6561 MEM_STATIC FORCE_INLINE_ATTR
6562 void ZSTD_wildcopy(void* dst, const void* src, ptrdiff_t length, ZSTD_overlap_e const ovtype)
6563 {
6564     ptrdiff_t diff = (BYTE*)dst - (const BYTE*)src;
6565     const BYTE* ip = (const BYTE*)src;
6566     BYTE* op = (BYTE*)dst;
6567     BYTE* const oend = op + length;
6568
6569     assert(diff >= 8 || (ovtype == ZSTD_no_overlap && diff <= -WILDCOPY_VECLEN));
6570
6571     if (ovtype == ZSTD_overlap_src_before_dst && diff < WILDCOPY_VECLEN) {
6572         /* Handle short offset copies. */
6573         do {
6574             COPY8(op, ip)
6575         } while (op < oend);
6576     } else {
6577         assert(diff >= WILDCOPY_VECLEN || diff <= -WILDCOPY_VECLEN);
6578         /* Separate out the first COPY16() call because the copy length is
6579          * almost certain to be short, so the branches have different
6580          * probabilities. Since it is almost certain to be short, only do
6581          * one COPY16() in the first call. Then, do two calls per loop since
6582          * at that point it is more likely to have a high trip count.
6583          */
6584 #ifndef __aarch64__
6585         do {
6586             COPY16(op, ip);
6587         }
6588         while (op < oend);
6589 #else
6590         COPY16(op, ip);
6591         if (op >= oend) return;
6592         do {
6593             COPY16(op, ip);
6594             COPY16(op, ip);
6595         }
6596         while (op < oend);
6597 #endif
6598     }
6599 }
6600
6601 MEM_STATIC size_t ZSTD_limitCopy(void* dst, size_t dstCapacity, const void* src, size_t srcSize)
6602 {
6603     size_t const length = MIN(dstCapacity, srcSize);
6604     if (length > 0) {
6605         memcpy(dst, src, length);
6606     }
6607     return length;
6608 }
6609
6610 /* define "workspace is too large" as this number of times larger than needed */
6611 #define ZSTD_WORKSPACETOOLARGE_FACTOR 3
6612
6613 /* when workspace is continuously too large
6614  * during at least this number of times,
6615  * context's memory usage is considered wasteful,
6616  * because it's sized to handle a worst case scenario which rarely happens.
6617  * In which case, resize it down to free some memory */
6618 #define ZSTD_WORKSPACETOOLARGE_MAXDURATION 128
6619
6620
6621 /*-*******************************************
6622 *  Private declarations
6623 *********************************************/
6624 typedef struct seqDef_s {
6625     U32 offset;
6626     U16 litLength;
6627     U16 matchLength;
6628 } seqDef;
6629
6630 typedef struct {
6631     seqDef* sequencesStart;
6632     seqDef* sequences;
6633     BYTE* litStart;
6634     BYTE* lit;
6635     BYTE* llCode;
6636     BYTE* mlCode;
6637     BYTE* ofCode;
6638     size_t maxNbSeq;
6639     size_t maxNbLit;
6640     U32   longLengthID;   /* 0 == no longLength; 1 == Lit.longLength; 2 == Match.longLength; */
6641     U32   longLengthPos;
6642 } seqStore_t;
6643
6644 typedef struct {
6645     U32 litLength;
6646     U32 matchLength;
6647 } ZSTD_sequenceLength;
6648
6649 /**
6650  * Returns the ZSTD_sequenceLength for the given sequences. It handles the decoding of long sequences
6651  * indicated by longLengthPos and longLengthID, and adds MINMATCH back to matchLength.
6652  */
6653 MEM_STATIC ZSTD_sequenceLength ZSTD_getSequenceLength(seqStore_t const* seqStore, seqDef const* seq)
6654 {
6655     ZSTD_sequenceLength seqLen;
6656     seqLen.litLength = seq->litLength;
6657     seqLen.matchLength = seq->matchLength + MINMATCH;
6658     if (seqStore->longLengthPos == (U32)(seq - seqStore->sequencesStart)) {
6659         if (seqStore->longLengthID == 1) {
6660             seqLen.litLength += 0xFFFF;
6661         }
6662         if (seqStore->longLengthID == 2) {
6663             seqLen.matchLength += 0xFFFF;
6664         }
6665     }
6666     return seqLen;
6667 }
6668
6669 /**
6670  * Contains the compressed frame size and an upper-bound for the decompressed frame size.
6671  * Note: before using `compressedSize`, check for errors using ZSTD_isError().
6672  *       similarly, before using `decompressedBound`, check for errors using:
6673  *          `decompressedBound != ZSTD_CONTENTSIZE_ERROR`
6674  */
6675 typedef struct {
6676     size_t compressedSize;
6677     unsigned long long decompressedBound;
6678 } ZSTD_frameSizeInfo;   /* decompress & legacy */
6679
6680 const seqStore_t* ZSTD_getSeqStore(const ZSTD_CCtx* ctx);   /* compress & dictBuilder */
6681 void ZSTD_seqToCodes(const seqStore_t* seqStorePtr);   /* compress, dictBuilder, decodeCorpus (shouldn't get its definition from here) */
6682
6683 /* custom memory allocation functions */
6684 void* ZSTD_malloc(size_t size, ZSTD_customMem customMem);
6685 void* ZSTD_calloc(size_t size, ZSTD_customMem customMem);
6686 void ZSTD_free(void* ptr, ZSTD_customMem customMem);
6687
6688
6689 MEM_STATIC U32 ZSTD_highbit32(U32 val)   /* compress, dictBuilder, decodeCorpus */
6690 {
6691     assert(val != 0);
6692     {
6693 #   if defined(_MSC_VER)   /* Visual */
6694         unsigned long r=0;
6695         return _BitScanReverse(&r, val) ? (unsigned)r : 0;
6696 #   elif defined(__GNUC__) && (__GNUC__ >= 3)   /* GCC Intrinsic */
6697         return __builtin_clz (val) ^ 31;
6698 #   elif defined(__ICCARM__)    /* IAR Intrinsic */
6699         return 31 - __CLZ(val);
6700 #   else   /* Software version */
6701         static const U32 DeBruijnClz[32] = { 0, 9, 1, 10, 13, 21, 2, 29, 11, 14, 16, 18, 22, 25, 3, 30, 8, 12, 20, 28, 15, 17, 24, 7, 19, 27, 23, 6, 26, 5, 4, 31 };
6702         U32 v = val;
6703         v |= v >> 1;
6704         v |= v >> 2;
6705         v |= v >> 4;
6706         v |= v >> 8;
6707         v |= v >> 16;
6708         return DeBruijnClz[(v * 0x07C4ACDDU) >> 27];
6709 #   endif
6710     }
6711 }
6712
6713
6714 /* ZSTD_invalidateRepCodes() :
6715  * ensures next compression will not use repcodes from previous block.
6716  * Note : only works with regular variant;
6717  *        do not use with extDict variant ! */
6718 void ZSTD_invalidateRepCodes(ZSTD_CCtx* cctx);   /* zstdmt, adaptive_compression (shouldn't get this definition from here) */
6719
6720
6721 typedef struct {
6722     blockType_e blockType;
6723     U32 lastBlock;
6724     U32 origSize;
6725 } blockProperties_t;   /* declared here for decompress and fullbench */
6726
6727 /*! ZSTD_getcBlockSize() :
6728  *  Provides the size of compressed block from block header `src` */
6729 /* Used by: decompress, fullbench (does not get its definition from here) */
6730 size_t ZSTD_getcBlockSize(const void* src, size_t srcSize,
6731                           blockProperties_t* bpPtr);
6732
6733 /*! ZSTD_decodeSeqHeaders() :
6734  *  decode sequence header from src */
6735 /* Used by: decompress, fullbench (does not get its definition from here) */
6736 size_t ZSTD_decodeSeqHeaders(ZSTD_DCtx* dctx, int* nbSeqPtr,
6737                        const void* src, size_t srcSize);
6738
6739
6740 #if defined (__cplusplus)
6741 }
6742 #endif
6743
6744 #endif   /* ZSTD_CCOMMON_H_MODULE */
6745 /**** ended inlining zstd_internal.h ****/
6746 /**** start inlining pool.h ****/
6747 /*
6748  * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
6749  * All rights reserved.
6750  *
6751  * This source code is licensed under both the BSD-style license (found in the
6752  * LICENSE file in the root directory of this source tree) and the GPLv2 (found
6753  * in the COPYING file in the root directory of this source tree).
6754  * You may select, at your option, one of the above-listed licenses.
6755  */
6756
6757 #ifndef POOL_H
6758 #define POOL_H
6759
6760 #if defined (__cplusplus)
6761 extern "C" {
6762 #endif
6763
6764
6765 #include <stddef.h>   /* size_t */
6766 #define ZSTD_STATIC_LINKING_ONLY   /* ZSTD_customMem */
6767 /**** skipping file: ../zstd.h ****/
6768
6769 typedef struct POOL_ctx_s POOL_ctx;
6770
6771 /*! POOL_create() :
6772  *  Create a thread pool with at most `numThreads` threads.
6773  * `numThreads` must be at least 1.
6774  *  The maximum number of queued jobs before blocking is `queueSize`.
6775  * @return : POOL_ctx pointer on success, else NULL.
6776 */
6777 POOL_ctx* POOL_create(size_t numThreads, size_t queueSize);
6778
6779 POOL_ctx* POOL_create_advanced(size_t numThreads, size_t queueSize,
6780                                ZSTD_customMem customMem);
6781
6782 /*! POOL_free() :
6783  *  Free a thread pool returned by POOL_create().
6784  */
6785 void POOL_free(POOL_ctx* ctx);
6786
6787 /*! POOL_resize() :
6788  *  Expands or shrinks pool's number of threads.
6789  *  This is more efficient than releasing + creating a new context,
6790  *  since it tries to preserve and re-use existing threads.
6791  * `numThreads` must be at least 1.
6792  * @return : 0 when resize was successful,
6793  *           !0 (typically 1) if there is an error.
6794  *    note : only numThreads can be resized, queueSize remains unchanged.
6795  */
6796 int POOL_resize(POOL_ctx* ctx, size_t numThreads);
6797
6798 /*! POOL_sizeof() :
6799  * @return threadpool memory usage
6800  *  note : compatible with NULL (returns 0 in this case)
6801  */
6802 size_t POOL_sizeof(POOL_ctx* ctx);
6803
6804 /*! POOL_function :
6805  *  The function type that can be added to a thread pool.
6806  */
6807 typedef void (*POOL_function)(void*);
6808
6809 /*! POOL_add() :
6810  *  Add the job `function(opaque)` to the thread pool. `ctx` must be valid.
6811  *  Possibly blocks until there is room in the queue.
6812  *  Note : The function may be executed asynchronously,
6813  *         therefore, `opaque` must live until function has been completed.
6814  */
6815 void POOL_add(POOL_ctx* ctx, POOL_function function, void* opaque);
6816
6817
6818 /*! POOL_tryAdd() :
6819  *  Add the job `function(opaque)` to thread pool _if_ a worker is available.
6820  *  Returns immediately even if not (does not block).
6821  * @return : 1 if successful, 0 if not.
6822  */
6823 int POOL_tryAdd(POOL_ctx* ctx, POOL_function function, void* opaque);
6824
6825
6826 #if defined (__cplusplus)
6827 }
6828 #endif
6829
6830 #endif
6831 /**** ended inlining pool.h ****/
6832
6833 /* ======   Compiler specifics   ====== */
6834 #if defined(_MSC_VER)
6835 #  pragma warning(disable : 4204)        /* disable: C4204: non-constant aggregate initializer */
6836 #endif
6837
6838
6839 #ifdef ZSTD_MULTITHREAD
6840
6841 /**** start inlining threading.h ****/
6842 /**
6843  * Copyright (c) 2016 Tino Reichardt
6844  * All rights reserved.
6845  *
6846  * You can contact the author at:
6847  * - zstdmt source repository: https://github.com/mcmilk/zstdmt
6848  *
6849  * This source code is licensed under both the BSD-style license (found in the
6850  * LICENSE file in the root directory of this source tree) and the GPLv2 (found
6851  * in the COPYING file in the root directory of this source tree).
6852  * You may select, at your option, one of the above-listed licenses.
6853  */
6854
6855 #ifndef THREADING_H_938743
6856 #define THREADING_H_938743
6857
6858 /**** skipping file: debug.h ****/
6859
6860 #if defined (__cplusplus)
6861 extern "C" {
6862 #endif
6863
6864 #if defined(ZSTD_MULTITHREAD) && defined(_WIN32)
6865
6866 /**
6867  * Windows minimalist Pthread Wrapper, based on :
6868  * http://www.cse.wustl.edu/~schmidt/win32-cv-1.html
6869  */
6870 #ifdef WINVER
6871 #  undef WINVER
6872 #endif
6873 #define WINVER       0x0600
6874
6875 #ifdef _WIN32_WINNT
6876 #  undef _WIN32_WINNT
6877 #endif
6878 #define _WIN32_WINNT 0x0600
6879
6880 #ifndef WIN32_LEAN_AND_MEAN
6881 #  define WIN32_LEAN_AND_MEAN
6882 #endif
6883
6884 #undef ERROR   /* reported already defined on VS 2015 (Rich Geldreich) */
6885 #include <windows.h>
6886 #undef ERROR
6887 #define ERROR(name) ZSTD_ERROR(name)
6888
6889
6890 /* mutex */
6891 #define ZSTD_pthread_mutex_t           CRITICAL_SECTION
6892 #define ZSTD_pthread_mutex_init(a, b)  ((void)(b), InitializeCriticalSection((a)), 0)
6893 #define ZSTD_pthread_mutex_destroy(a)  DeleteCriticalSection((a))
6894 #define ZSTD_pthread_mutex_lock(a)     EnterCriticalSection((a))
6895 #define ZSTD_pthread_mutex_unlock(a)   LeaveCriticalSection((a))
6896
6897 /* condition variable */
6898 #define ZSTD_pthread_cond_t             CONDITION_VARIABLE
6899 #define ZSTD_pthread_cond_init(a, b)    ((void)(b), InitializeConditionVariable((a)), 0)
6900 #define ZSTD_pthread_cond_destroy(a)    ((void)(a))
6901 #define ZSTD_pthread_cond_wait(a, b)    SleepConditionVariableCS((a), (b), INFINITE)
6902 #define ZSTD_pthread_cond_signal(a)     WakeConditionVariable((a))
6903 #define ZSTD_pthread_cond_broadcast(a)  WakeAllConditionVariable((a))
6904
6905 /* ZSTD_pthread_create() and ZSTD_pthread_join() */
6906 typedef struct {
6907     HANDLE handle;
6908     void* (*start_routine)(void*);
6909     void* arg;
6910 } ZSTD_pthread_t;
6911
6912 int ZSTD_pthread_create(ZSTD_pthread_t* thread, const void* unused,
6913                    void* (*start_routine) (void*), void* arg);
6914
6915 int ZSTD_pthread_join(ZSTD_pthread_t thread, void** value_ptr);
6916
6917 /**
6918  * add here more wrappers as required
6919  */
6920
6921
6922 #elif defined(ZSTD_MULTITHREAD)    /* posix assumed ; need a better detection method */
6923 /* ===   POSIX Systems   === */
6924 #  include <pthread.h>
6925
6926 #if DEBUGLEVEL < 1
6927
6928 #define ZSTD_pthread_mutex_t            pthread_mutex_t
6929 #define ZSTD_pthread_mutex_init(a, b)   pthread_mutex_init((a), (b))
6930 #define ZSTD_pthread_mutex_destroy(a)   pthread_mutex_destroy((a))
6931 #define ZSTD_pthread_mutex_lock(a)      pthread_mutex_lock((a))
6932 #define ZSTD_pthread_mutex_unlock(a)    pthread_mutex_unlock((a))
6933
6934 #define ZSTD_pthread_cond_t             pthread_cond_t
6935 #define ZSTD_pthread_cond_init(a, b)    pthread_cond_init((a), (b))
6936 #define ZSTD_pthread_cond_destroy(a)    pthread_cond_destroy((a))
6937 #define ZSTD_pthread_cond_wait(a, b)    pthread_cond_wait((a), (b))
6938 #define ZSTD_pthread_cond_signal(a)     pthread_cond_signal((a))
6939 #define ZSTD_pthread_cond_broadcast(a)  pthread_cond_broadcast((a))
6940
6941 #define ZSTD_pthread_t                  pthread_t
6942 #define ZSTD_pthread_create(a, b, c, d) pthread_create((a), (b), (c), (d))
6943 #define ZSTD_pthread_join(a, b)         pthread_join((a),(b))
6944
6945 #else /* DEBUGLEVEL >= 1 */
6946
6947 /* Debug implementation of threading.
6948  * In this implementation we use pointers for mutexes and condition variables.
6949  * This way, if we forget to init/destroy them the program will crash or ASAN
6950  * will report leaks.
6951  */
6952
6953 #define ZSTD_pthread_mutex_t            pthread_mutex_t*
6954 int ZSTD_pthread_mutex_init(ZSTD_pthread_mutex_t* mutex, pthread_mutexattr_t const* attr);
6955 int ZSTD_pthread_mutex_destroy(ZSTD_pthread_mutex_t* mutex);
6956 #define ZSTD_pthread_mutex_lock(a)      pthread_mutex_lock(*(a))
6957 #define ZSTD_pthread_mutex_unlock(a)    pthread_mutex_unlock(*(a))
6958
6959 #define ZSTD_pthread_cond_t             pthread_cond_t*
6960 int ZSTD_pthread_cond_init(ZSTD_pthread_cond_t* cond, pthread_condattr_t const* attr);
6961 int ZSTD_pthread_cond_destroy(ZSTD_pthread_cond_t* cond);
6962 #define ZSTD_pthread_cond_wait(a, b)    pthread_cond_wait(*(a), *(b))
6963 #define ZSTD_pthread_cond_signal(a)     pthread_cond_signal(*(a))
6964 #define ZSTD_pthread_cond_broadcast(a)  pthread_cond_broadcast(*(a))
6965
6966 #define ZSTD_pthread_t                  pthread_t
6967 #define ZSTD_pthread_create(a, b, c, d) pthread_create((a), (b), (c), (d))
6968 #define ZSTD_pthread_join(a, b)         pthread_join((a),(b))
6969
6970 #endif
6971
6972 #else  /* ZSTD_MULTITHREAD not defined */
6973 /* No multithreading support */
6974
6975 typedef int ZSTD_pthread_mutex_t;
6976 #define ZSTD_pthread_mutex_init(a, b)   ((void)(a), (void)(b), 0)
6977 #define ZSTD_pthread_mutex_destroy(a)   ((void)(a))
6978 #define ZSTD_pthread_mutex_lock(a)      ((void)(a))
6979 #define ZSTD_pthread_mutex_unlock(a)    ((void)(a))
6980
6981 typedef int ZSTD_pthread_cond_t;
6982 #define ZSTD_pthread_cond_init(a, b)    ((void)(a), (void)(b), 0)
6983 #define ZSTD_pthread_cond_destroy(a)    ((void)(a))
6984 #define ZSTD_pthread_cond_wait(a, b)    ((void)(a), (void)(b))
6985 #define ZSTD_pthread_cond_signal(a)     ((void)(a))
6986 #define ZSTD_pthread_cond_broadcast(a)  ((void)(a))
6987
6988 /* do not use ZSTD_pthread_t */
6989
6990 #endif /* ZSTD_MULTITHREAD */
6991
6992 #if defined (__cplusplus)
6993 }
6994 #endif
6995
6996 #endif /* THREADING_H_938743 */
6997 /**** ended inlining threading.h ****/
6998
6999 /* A job is a function and an opaque argument */
7000 typedef struct POOL_job_s {
7001     POOL_function function;
7002     void *opaque;
7003 } POOL_job;
7004
7005 struct POOL_ctx_s {
7006     ZSTD_customMem customMem;
7007     /* Keep track of the threads */
7008     ZSTD_pthread_t* threads;
7009     size_t threadCapacity;
7010     size_t threadLimit;
7011
7012     /* The queue is a circular buffer */
7013     POOL_job *queue;
7014     size_t queueHead;
7015     size_t queueTail;
7016     size_t queueSize;
7017
7018     /* The number of threads working on jobs */
7019     size_t numThreadsBusy;
7020     /* Indicates if the queue is empty */
7021     int queueEmpty;
7022
7023     /* The mutex protects the queue */
7024     ZSTD_pthread_mutex_t queueMutex;
7025     /* Condition variable for pushers to wait on when the queue is full */
7026     ZSTD_pthread_cond_t queuePushCond;
7027     /* Condition variables for poppers to wait on when the queue is empty */
7028     ZSTD_pthread_cond_t queuePopCond;
7029     /* Indicates if the queue is shutting down */
7030     int shutdown;
7031 };
7032
7033 /* POOL_thread() :
7034  * Work thread for the thread pool.
7035  * Waits for jobs and executes them.
7036  * @returns : NULL on failure else non-null.
7037  */
7038 static void* POOL_thread(void* opaque) {
7039     POOL_ctx* const ctx = (POOL_ctx*)opaque;
7040     if (!ctx) { return NULL; }
7041     for (;;) {
7042         /* Lock the mutex and wait for a non-empty queue or until shutdown */
7043         ZSTD_pthread_mutex_lock(&ctx->queueMutex);
7044
7045         while ( ctx->queueEmpty
7046             || (ctx->numThreadsBusy >= ctx->threadLimit) ) {
7047             if (ctx->shutdown) {
7048                 /* even if !queueEmpty, (possible if numThreadsBusy >= threadLimit),
7049                  * a few threads will be shutdown while !queueEmpty,
7050                  * but enough threads will remain active to finish the queue */
7051                 ZSTD_pthread_mutex_unlock(&ctx->queueMutex);
7052                 return opaque;
7053             }
7054             ZSTD_pthread_cond_wait(&ctx->queuePopCond, &ctx->queueMutex);
7055         }
7056         /* Pop a job off the queue */
7057         {   POOL_job const job = ctx->queue[ctx->queueHead];
7058             ctx->queueHead = (ctx->queueHead + 1) % ctx->queueSize;
7059             ctx->numThreadsBusy++;
7060             ctx->queueEmpty = ctx->queueHead == ctx->queueTail;
7061             /* Unlock the mutex, signal a pusher, and run the job */
7062             ZSTD_pthread_cond_signal(&ctx->queuePushCond);
7063             ZSTD_pthread_mutex_unlock(&ctx->queueMutex);
7064
7065             job.function(job.opaque);
7066
7067             /* If the intended queue size was 0, signal after finishing job */
7068             ZSTD_pthread_mutex_lock(&ctx->queueMutex);
7069             ctx->numThreadsBusy--;
7070             if (ctx->queueSize == 1) {
7071                 ZSTD_pthread_cond_signal(&ctx->queuePushCond);
7072             }
7073             ZSTD_pthread_mutex_unlock(&ctx->queueMutex);
7074         }
7075     }  /* for (;;) */
7076     assert(0);  /* Unreachable */
7077 }
7078
7079 POOL_ctx* POOL_create(size_t numThreads, size_t queueSize) {
7080     return POOL_create_advanced(numThreads, queueSize, ZSTD_defaultCMem);
7081 }
7082
7083 POOL_ctx* POOL_create_advanced(size_t numThreads, size_t queueSize,
7084                                ZSTD_customMem customMem) {
7085     POOL_ctx* ctx;
7086     /* Check parameters */
7087     if (!numThreads) { return NULL; }
7088     /* Allocate the context and zero initialize */
7089     ctx = (POOL_ctx*)ZSTD_calloc(sizeof(POOL_ctx), customMem);
7090     if (!ctx) { return NULL; }
7091     /* Initialize the job queue.
7092      * It needs one extra space since one space is wasted to differentiate
7093      * empty and full queues.
7094      */
7095     ctx->queueSize = queueSize + 1;
7096     ctx->queue = (POOL_job*)ZSTD_malloc(ctx->queueSize * sizeof(POOL_job), customMem);
7097     ctx->queueHead = 0;
7098     ctx->queueTail = 0;
7099     ctx->numThreadsBusy = 0;
7100     ctx->queueEmpty = 1;
7101     {
7102         int error = 0;
7103         error |= ZSTD_pthread_mutex_init(&ctx->queueMutex, NULL);
7104         error |= ZSTD_pthread_cond_init(&ctx->queuePushCond, NULL);
7105         error |= ZSTD_pthread_cond_init(&ctx->queuePopCond, NULL);
7106         if (error) { POOL_free(ctx); return NULL; }
7107     }
7108     ctx->shutdown = 0;
7109     /* Allocate space for the thread handles */
7110     ctx->threads = (ZSTD_pthread_t*)ZSTD_malloc(numThreads * sizeof(ZSTD_pthread_t), customMem);
7111     ctx->threadCapacity = 0;
7112     ctx->customMem = customMem;
7113     /* Check for errors */
7114     if (!ctx->threads || !ctx->queue) { POOL_free(ctx); return NULL; }
7115     /* Initialize the threads */
7116     {   size_t i;
7117         for (i = 0; i < numThreads; ++i) {
7118             if (ZSTD_pthread_create(&ctx->threads[i], NULL, &POOL_thread, ctx)) {
7119                 ctx->threadCapacity = i;
7120                 POOL_free(ctx);
7121                 return NULL;
7122         }   }
7123         ctx->threadCapacity = numThreads;
7124         ctx->threadLimit = numThreads;
7125     }
7126     return ctx;
7127 }
7128
7129 /*! POOL_join() :
7130     Shutdown the queue, wake any sleeping threads, and join all of the threads.
7131 */
7132 static void POOL_join(POOL_ctx* ctx) {
7133     /* Shut down the queue */
7134     ZSTD_pthread_mutex_lock(&ctx->queueMutex);
7135     ctx->shutdown = 1;
7136     ZSTD_pthread_mutex_unlock(&ctx->queueMutex);
7137     /* Wake up sleeping threads */
7138     ZSTD_pthread_cond_broadcast(&ctx->queuePushCond);
7139     ZSTD_pthread_cond_broadcast(&ctx->queuePopCond);
7140     /* Join all of the threads */
7141     {   size_t i;
7142         for (i = 0; i < ctx->threadCapacity; ++i) {
7143             ZSTD_pthread_join(ctx->threads[i], NULL);  /* note : could fail */
7144     }   }
7145 }
7146
7147 void POOL_free(POOL_ctx *ctx) {
7148     if (!ctx) { return; }
7149     POOL_join(ctx);
7150     ZSTD_pthread_mutex_destroy(&ctx->queueMutex);
7151     ZSTD_pthread_cond_destroy(&ctx->queuePushCond);
7152     ZSTD_pthread_cond_destroy(&ctx->queuePopCond);
7153     ZSTD_free(ctx->queue, ctx->customMem);
7154     ZSTD_free(ctx->threads, ctx->customMem);
7155     ZSTD_free(ctx, ctx->customMem);
7156 }
7157
7158
7159
7160 size_t POOL_sizeof(POOL_ctx *ctx) {
7161     if (ctx==NULL) return 0;  /* supports sizeof NULL */
7162     return sizeof(*ctx)
7163         + ctx->queueSize * sizeof(POOL_job)
7164         + ctx->threadCapacity * sizeof(ZSTD_pthread_t);
7165 }
7166
7167
7168 /* @return : 0 on success, 1 on error */
7169 static int POOL_resize_internal(POOL_ctx* ctx, size_t numThreads)
7170 {
7171     if (numThreads <= ctx->threadCapacity) {
7172         if (!numThreads) return 1;
7173         ctx->threadLimit = numThreads;
7174         return 0;
7175     }
7176     /* numThreads > threadCapacity */
7177     {   ZSTD_pthread_t* const threadPool = (ZSTD_pthread_t*)ZSTD_malloc(numThreads * sizeof(ZSTD_pthread_t), ctx->customMem);
7178         if (!threadPool) return 1;
7179         /* replace existing thread pool */
7180         memcpy(threadPool, ctx->threads, ctx->threadCapacity * sizeof(*threadPool));
7181         ZSTD_free(ctx->threads, ctx->customMem);
7182         ctx->threads = threadPool;
7183         /* Initialize additional threads */
7184         {   size_t threadId;
7185             for (threadId = ctx->threadCapacity; threadId < numThreads; ++threadId) {
7186                 if (ZSTD_pthread_create(&threadPool[threadId], NULL, &POOL_thread, ctx)) {
7187                     ctx->threadCapacity = threadId;
7188                     return 1;
7189             }   }
7190     }   }
7191     /* successfully expanded */
7192     ctx->threadCapacity = numThreads;
7193     ctx->threadLimit = numThreads;
7194     return 0;
7195 }
7196
7197 /* @return : 0 on success, 1 on error */
7198 int POOL_resize(POOL_ctx* ctx, size_t numThreads)
7199 {
7200     int result;
7201     if (ctx==NULL) return 1;
7202     ZSTD_pthread_mutex_lock(&ctx->queueMutex);
7203     result = POOL_resize_internal(ctx, numThreads);
7204     ZSTD_pthread_cond_broadcast(&ctx->queuePopCond);
7205     ZSTD_pthread_mutex_unlock(&ctx->queueMutex);
7206     return result;
7207 }
7208
7209 /**
7210  * Returns 1 if the queue is full and 0 otherwise.
7211  *
7212  * When queueSize is 1 (pool was created with an intended queueSize of 0),
7213  * then a queue is empty if there is a thread free _and_ no job is waiting.
7214  */
7215 static int isQueueFull(POOL_ctx const* ctx) {
7216     if (ctx->queueSize > 1) {
7217         return ctx->queueHead == ((ctx->queueTail + 1) % ctx->queueSize);
7218     } else {
7219         return (ctx->numThreadsBusy == ctx->threadLimit) ||
7220                !ctx->queueEmpty;
7221     }
7222 }
7223
7224
7225 static void POOL_add_internal(POOL_ctx* ctx, POOL_function function, void *opaque)
7226 {
7227     POOL_job const job = {function, opaque};
7228     assert(ctx != NULL);
7229     if (ctx->shutdown) return;
7230
7231     ctx->queueEmpty = 0;
7232     ctx->queue[ctx->queueTail] = job;
7233     ctx->queueTail = (ctx->queueTail + 1) % ctx->queueSize;
7234     ZSTD_pthread_cond_signal(&ctx->queuePopCond);
7235 }
7236
7237 void POOL_add(POOL_ctx* ctx, POOL_function function, void* opaque)
7238 {
7239     assert(ctx != NULL);
7240     ZSTD_pthread_mutex_lock(&ctx->queueMutex);
7241     /* Wait until there is space in the queue for the new job */
7242     while (isQueueFull(ctx) && (!ctx->shutdown)) {
7243         ZSTD_pthread_cond_wait(&ctx->queuePushCond, &ctx->queueMutex);
7244     }
7245     POOL_add_internal(ctx, function, opaque);
7246     ZSTD_pthread_mutex_unlock(&ctx->queueMutex);
7247 }
7248
7249
7250 int POOL_tryAdd(POOL_ctx* ctx, POOL_function function, void* opaque)
7251 {
7252     assert(ctx != NULL);
7253     ZSTD_pthread_mutex_lock(&ctx->queueMutex);
7254     if (isQueueFull(ctx)) {
7255         ZSTD_pthread_mutex_unlock(&ctx->queueMutex);
7256         return 0;
7257     }
7258     POOL_add_internal(ctx, function, opaque);
7259     ZSTD_pthread_mutex_unlock(&ctx->queueMutex);
7260     return 1;
7261 }
7262
7263
7264 #else  /* ZSTD_MULTITHREAD  not defined */
7265
7266 /* ========================== */
7267 /* No multi-threading support */
7268 /* ========================== */
7269
7270
7271 /* We don't need any data, but if it is empty, malloc() might return NULL. */
7272 struct POOL_ctx_s {
7273     int dummy;
7274 };
7275 static POOL_ctx g_ctx;
7276
7277 POOL_ctx* POOL_create(size_t numThreads, size_t queueSize) {
7278     return POOL_create_advanced(numThreads, queueSize, ZSTD_defaultCMem);
7279 }
7280
7281 POOL_ctx* POOL_create_advanced(size_t numThreads, size_t queueSize, ZSTD_customMem customMem) {
7282     (void)numThreads;
7283     (void)queueSize;
7284     (void)customMem;
7285     return &g_ctx;
7286 }
7287
7288 void POOL_free(POOL_ctx* ctx) {
7289     assert(!ctx || ctx == &g_ctx);
7290     (void)ctx;
7291 }
7292
7293 int POOL_resize(POOL_ctx* ctx, size_t numThreads) {
7294     (void)ctx; (void)numThreads;
7295     return 0;
7296 }
7297
7298 void POOL_add(POOL_ctx* ctx, POOL_function function, void* opaque) {
7299     (void)ctx;
7300     function(opaque);
7301 }
7302
7303 int POOL_tryAdd(POOL_ctx* ctx, POOL_function function, void* opaque) {
7304     (void)ctx;
7305     function(opaque);
7306     return 1;
7307 }
7308
7309 size_t POOL_sizeof(POOL_ctx* ctx) {
7310     if (ctx==NULL) return 0;  /* supports sizeof NULL */
7311     assert(ctx == &g_ctx);
7312     return sizeof(*ctx);
7313 }
7314
7315 #endif  /* ZSTD_MULTITHREAD */
7316 /**** ended inlining common/pool.c ****/
7317 /**** start inlining common/zstd_common.c ****/
7318 /*
7319  * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
7320  * All rights reserved.
7321  *
7322  * This source code is licensed under both the BSD-style license (found in the
7323  * LICENSE file in the root directory of this source tree) and the GPLv2 (found
7324  * in the COPYING file in the root directory of this source tree).
7325  * You may select, at your option, one of the above-listed licenses.
7326  */
7327
7328
7329
7330 /*-*************************************
7331 *  Dependencies
7332 ***************************************/
7333 #include <stdlib.h>      /* malloc, calloc, free */
7334 #include <string.h>      /* memset */
7335 /**** skipping file: error_private.h ****/
7336 /**** skipping file: zstd_internal.h ****/
7337
7338
7339 /*-****************************************
7340 *  Version
7341 ******************************************/
7342 unsigned ZSTD_versionNumber(void) { return ZSTD_VERSION_NUMBER; }
7343
7344 const char* ZSTD_versionString(void) { return ZSTD_VERSION_STRING; }
7345
7346
7347 /*-****************************************
7348 *  ZSTD Error Management
7349 ******************************************/
7350 #undef ZSTD_isError   /* defined within zstd_internal.h */
7351 /*! ZSTD_isError() :
7352  *  tells if a return value is an error code
7353  *  symbol is required for external callers */
7354 unsigned ZSTD_isError(size_t code) { return ERR_isError(code); }
7355
7356 /*! ZSTD_getErrorName() :
7357  *  provides error code string from function result (useful for debugging) */
7358 const char* ZSTD_getErrorName(size_t code) { return ERR_getErrorName(code); }
7359
7360 /*! ZSTD_getError() :
7361  *  convert a `size_t` function result into a proper ZSTD_errorCode enum */
7362 ZSTD_ErrorCode ZSTD_getErrorCode(size_t code) { return ERR_getErrorCode(code); }
7363
7364 /*! ZSTD_getErrorString() :
7365  *  provides error code string from enum */
7366 const char* ZSTD_getErrorString(ZSTD_ErrorCode code) { return ERR_getErrorString(code); }
7367
7368
7369
7370 /*=**************************************************************
7371 *  Custom allocator
7372 ****************************************************************/
7373 void* ZSTD_malloc(size_t size, ZSTD_customMem customMem)
7374 {
7375     if (customMem.customAlloc)
7376         return customMem.customAlloc(customMem.opaque, size);
7377     return malloc(size);
7378 }
7379
7380 void* ZSTD_calloc(size_t size, ZSTD_customMem customMem)
7381 {
7382     if (customMem.customAlloc) {
7383         /* calloc implemented as malloc+memset;
7384          * not as efficient as calloc, but next best guess for custom malloc */
7385         void* const ptr = customMem.customAlloc(customMem.opaque, size);
7386         memset(ptr, 0, size);
7387         return ptr;
7388     }
7389     return calloc(1, size);
7390 }
7391
7392 void ZSTD_free(void* ptr, ZSTD_customMem customMem)
7393 {
7394     if (ptr!=NULL) {
7395         if (customMem.customFree)
7396             customMem.customFree(customMem.opaque, ptr);
7397         else
7398             free(ptr);
7399     }
7400 }
7401 /**** ended inlining common/zstd_common.c ****/
7402
7403 /**** start inlining compress/fse_compress.c ****/
7404 /* ******************************************************************
7405  * FSE : Finite State Entropy encoder
7406  * Copyright (c) 2013-2020, Yann Collet, Facebook, Inc.
7407  *
7408  *  You can contact the author at :
7409  *  - FSE source repository : https://github.com/Cyan4973/FiniteStateEntropy
7410  *  - Public forum : https://groups.google.com/forum/#!forum/lz4c
7411  *
7412  * This source code is licensed under both the BSD-style license (found in the
7413  * LICENSE file in the root directory of this source tree) and the GPLv2 (found
7414  * in the COPYING file in the root directory of this source tree).
7415  * You may select, at your option, one of the above-listed licenses.
7416 ****************************************************************** */
7417
7418 /* **************************************************************
7419 *  Includes
7420 ****************************************************************/
7421 #include <stdlib.h>     /* malloc, free, qsort */
7422 #include <string.h>     /* memcpy, memset */
7423 /**** skipping file: ../common/compiler.h ****/
7424 /**** skipping file: ../common/mem.h ****/
7425 /**** skipping file: ../common/debug.h ****/
7426 /**** start inlining hist.h ****/
7427 /* ******************************************************************
7428  * hist : Histogram functions
7429  * part of Finite State Entropy project
7430  * Copyright (c) 2013-2020, Yann Collet, Facebook, Inc.
7431  *
7432  *  You can contact the author at :
7433  *  - FSE source repository : https://github.com/Cyan4973/FiniteStateEntropy
7434  *  - Public forum : https://groups.google.com/forum/#!forum/lz4c
7435  *
7436  * This source code is licensed under both the BSD-style license (found in the
7437  * LICENSE file in the root directory of this source tree) and the GPLv2 (found
7438  * in the COPYING file in the root directory of this source tree).
7439  * You may select, at your option, one of the above-listed licenses.
7440 ****************************************************************** */
7441
7442 /* --- dependencies --- */
7443 #include <stddef.h>   /* size_t */
7444
7445
7446 /* --- simple histogram functions --- */
7447
7448 /*! HIST_count():
7449  *  Provides the precise count of each byte within a table 'count'.
7450  * 'count' is a table of unsigned int, of minimum size (*maxSymbolValuePtr+1).
7451  *  Updates *maxSymbolValuePtr with actual largest symbol value detected.
7452  * @return : count of the most frequent symbol (which isn't identified).
7453  *           or an error code, which can be tested using HIST_isError().
7454  *           note : if return == srcSize, there is only one symbol.
7455  */
7456 size_t HIST_count(unsigned* count, unsigned* maxSymbolValuePtr,
7457                   const void* src, size_t srcSize);
7458
7459 unsigned HIST_isError(size_t code);  /**< tells if a return value is an error code */
7460
7461
7462 /* --- advanced histogram functions --- */
7463
7464 #define HIST_WKSP_SIZE_U32 1024
7465 #define HIST_WKSP_SIZE    (HIST_WKSP_SIZE_U32 * sizeof(unsigned))
7466 /** HIST_count_wksp() :
7467  *  Same as HIST_count(), but using an externally provided scratch buffer.
7468  *  Benefit is this function will use very little stack space.
7469  * `workSpace` is a writable buffer which must be 4-bytes aligned,
7470  * `workSpaceSize` must be >= HIST_WKSP_SIZE
7471  */
7472 size_t HIST_count_wksp(unsigned* count, unsigned* maxSymbolValuePtr,
7473                        const void* src, size_t srcSize,
7474                        void* workSpace, size_t workSpaceSize);
7475
7476 /** HIST_countFast() :
7477  *  same as HIST_count(), but blindly trusts that all byte values within src are <= *maxSymbolValuePtr.
7478  *  This function is unsafe, and will segfault if any value within `src` is `> *maxSymbolValuePtr`
7479  */
7480 size_t HIST_countFast(unsigned* count, unsigned* maxSymbolValuePtr,
7481                       const void* src, size_t srcSize);
7482
7483 /** HIST_countFast_wksp() :
7484  *  Same as HIST_countFast(), but using an externally provided scratch buffer.
7485  * `workSpace` is a writable buffer which must be 4-bytes aligned,
7486  * `workSpaceSize` must be >= HIST_WKSP_SIZE
7487  */
7488 size_t HIST_countFast_wksp(unsigned* count, unsigned* maxSymbolValuePtr,
7489                            const void* src, size_t srcSize,
7490                            void* workSpace, size_t workSpaceSize);
7491
7492 /*! HIST_count_simple() :
7493  *  Same as HIST_countFast(), this function is unsafe,
7494  *  and will segfault if any value within `src` is `> *maxSymbolValuePtr`.
7495  *  It is also a bit slower for large inputs.
7496  *  However, it does not need any additional memory (not even on stack).
7497  * @return : count of the most frequent symbol.
7498  *  Note this function doesn't produce any error (i.e. it must succeed).
7499  */
7500 unsigned HIST_count_simple(unsigned* count, unsigned* maxSymbolValuePtr,
7501                            const void* src, size_t srcSize);
7502 /**** ended inlining hist.h ****/
7503 /**** skipping file: ../common/bitstream.h ****/
7504 #define FSE_STATIC_LINKING_ONLY
7505 /**** skipping file: ../common/fse.h ****/
7506 /**** skipping file: ../common/error_private.h ****/
7507
7508
7509 /* **************************************************************
7510 *  Error Management
7511 ****************************************************************/
7512 #define FSE_isError ERR_isError
7513
7514
7515 /* **************************************************************
7516 *  Templates
7517 ****************************************************************/
7518 /*
7519   designed to be included
7520   for type-specific functions (template emulation in C)
7521   Objective is to write these functions only once, for improved maintenance
7522 */
7523
7524 /* safety checks */
7525 #ifndef FSE_FUNCTION_EXTENSION
7526 #  error "FSE_FUNCTION_EXTENSION must be defined"
7527 #endif
7528 #ifndef FSE_FUNCTION_TYPE
7529 #  error "FSE_FUNCTION_TYPE must be defined"
7530 #endif
7531
7532 /* Function names */
7533 #define FSE_CAT(X,Y) X##Y
7534 #define FSE_FUNCTION_NAME(X,Y) FSE_CAT(X,Y)
7535 #define FSE_TYPE_NAME(X,Y) FSE_CAT(X,Y)
7536
7537
7538 /* Function templates */
7539
7540 /* FSE_buildCTable_wksp() :
7541  * Same as FSE_buildCTable(), but using an externally allocated scratch buffer (`workSpace`).
7542  * wkspSize should be sized to handle worst case situation, which is `1<<max_tableLog * sizeof(FSE_FUNCTION_TYPE)`
7543  * workSpace must also be properly aligned with FSE_FUNCTION_TYPE requirements
7544  */
7545 size_t FSE_buildCTable_wksp(FSE_CTable* ct,
7546                       const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog,
7547                             void* workSpace, size_t wkspSize)
7548 {
7549     U32 const tableSize = 1 << tableLog;
7550     U32 const tableMask = tableSize - 1;
7551     void* const ptr = ct;
7552     U16* const tableU16 = ( (U16*) ptr) + 2;
7553     void* const FSCT = ((U32*)ptr) + 1 /* header */ + (tableLog ? tableSize>>1 : 1) ;
7554     FSE_symbolCompressionTransform* const symbolTT = (FSE_symbolCompressionTransform*) (FSCT);
7555     U32 const step = FSE_TABLESTEP(tableSize);
7556     U32 cumul[FSE_MAX_SYMBOL_VALUE+2];
7557
7558     FSE_FUNCTION_TYPE* const tableSymbol = (FSE_FUNCTION_TYPE*)workSpace;
7559     U32 highThreshold = tableSize-1;
7560
7561     /* CTable header */
7562     if (((size_t)1 << tableLog) * sizeof(FSE_FUNCTION_TYPE) > wkspSize) return ERROR(tableLog_tooLarge);
7563     tableU16[-2] = (U16) tableLog;
7564     tableU16[-1] = (U16) maxSymbolValue;
7565     assert(tableLog < 16);   /* required for threshold strategy to work */
7566
7567     /* For explanations on how to distribute symbol values over the table :
7568      * http://fastcompression.blogspot.fr/2014/02/fse-distributing-symbol-values.html */
7569
7570      #ifdef __clang_analyzer__
7571      memset(tableSymbol, 0, sizeof(*tableSymbol) * tableSize);   /* useless initialization, just to keep scan-build happy */
7572      #endif
7573
7574     /* symbol start positions */
7575     {   U32 u;
7576         cumul[0] = 0;
7577         for (u=1; u <= maxSymbolValue+1; u++) {
7578             if (normalizedCounter[u-1]==-1) {  /* Low proba symbol */
7579                 cumul[u] = cumul[u-1] + 1;
7580                 tableSymbol[highThreshold--] = (FSE_FUNCTION_TYPE)(u-1);
7581             } else {
7582                 cumul[u] = cumul[u-1] + normalizedCounter[u-1];
7583         }   }
7584         cumul[maxSymbolValue+1] = tableSize+1;
7585     }
7586
7587     /* Spread symbols */
7588     {   U32 position = 0;
7589         U32 symbol;
7590         for (symbol=0; symbol<=maxSymbolValue; symbol++) {
7591             int nbOccurrences;
7592             int const freq = normalizedCounter[symbol];
7593             for (nbOccurrences=0; nbOccurrences<freq; nbOccurrences++) {
7594                 tableSymbol[position] = (FSE_FUNCTION_TYPE)symbol;
7595                 position = (position + step) & tableMask;
7596                 while (position > highThreshold)
7597                     position = (position + step) & tableMask;   /* Low proba area */
7598         }   }
7599
7600         assert(position==0);  /* Must have initialized all positions */
7601     }
7602
7603     /* Build table */
7604     {   U32 u; for (u=0; u<tableSize; u++) {
7605         FSE_FUNCTION_TYPE s = tableSymbol[u];   /* note : static analyzer may not understand tableSymbol is properly initialized */
7606         tableU16[cumul[s]++] = (U16) (tableSize+u);   /* TableU16 : sorted by symbol order; gives next state value */
7607     }   }
7608
7609     /* Build Symbol Transformation Table */
7610     {   unsigned total = 0;
7611         unsigned s;
7612         for (s=0; s<=maxSymbolValue; s++) {
7613             switch (normalizedCounter[s])
7614             {
7615             case  0:
7616                 /* filling nonetheless, for compatibility with FSE_getMaxNbBits() */
7617                 symbolTT[s].deltaNbBits = ((tableLog+1) << 16) - (1<<tableLog);
7618                 break;
7619
7620             case -1:
7621             case  1:
7622                 symbolTT[s].deltaNbBits = (tableLog << 16) - (1<<tableLog);
7623                 symbolTT[s].deltaFindState = total - 1;
7624                 total ++;
7625                 break;
7626             default :
7627                 {
7628                     U32 const maxBitsOut = tableLog - BIT_highbit32 (normalizedCounter[s]-1);
7629                     U32 const minStatePlus = normalizedCounter[s] << maxBitsOut;
7630                     symbolTT[s].deltaNbBits = (maxBitsOut << 16) - minStatePlus;
7631                     symbolTT[s].deltaFindState = total - normalizedCounter[s];
7632                     total +=  normalizedCounter[s];
7633     }   }   }   }
7634
7635 #if 0  /* debug : symbol costs */
7636     DEBUGLOG(5, "\n --- table statistics : ");
7637     {   U32 symbol;
7638         for (symbol=0; symbol<=maxSymbolValue; symbol++) {
7639             DEBUGLOG(5, "%3u: w=%3i,   maxBits=%u, fracBits=%.2f",
7640                 symbol, normalizedCounter[symbol],
7641                 FSE_getMaxNbBits(symbolTT, symbol),
7642                 (double)FSE_bitCost(symbolTT, tableLog, symbol, 8) / 256);
7643         }
7644     }
7645 #endif
7646
7647     return 0;
7648 }
7649
7650
7651 size_t FSE_buildCTable(FSE_CTable* ct, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog)
7652 {
7653     FSE_FUNCTION_TYPE tableSymbol[FSE_MAX_TABLESIZE];   /* memset() is not necessary, even if static analyzer complain about it */
7654     return FSE_buildCTable_wksp(ct, normalizedCounter, maxSymbolValue, tableLog, tableSymbol, sizeof(tableSymbol));
7655 }
7656
7657
7658
7659 #ifndef FSE_COMMONDEFS_ONLY
7660
7661
7662 /*-**************************************************************
7663 *  FSE NCount encoding
7664 ****************************************************************/
7665 size_t FSE_NCountWriteBound(unsigned maxSymbolValue, unsigned tableLog)
7666 {
7667     size_t const maxHeaderSize = (((maxSymbolValue+1) * tableLog) >> 3) + 3;
7668     return maxSymbolValue ? maxHeaderSize : FSE_NCOUNTBOUND;  /* maxSymbolValue==0 ? use default */
7669 }
7670
7671 static size_t
7672 FSE_writeNCount_generic (void* header, size_t headerBufferSize,
7673                    const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog,
7674                          unsigned writeIsSafe)
7675 {
7676     BYTE* const ostart = (BYTE*) header;
7677     BYTE* out = ostart;
7678     BYTE* const oend = ostart + headerBufferSize;
7679     int nbBits;
7680     const int tableSize = 1 << tableLog;
7681     int remaining;
7682     int threshold;
7683     U32 bitStream = 0;
7684     int bitCount = 0;
7685     unsigned symbol = 0;
7686     unsigned const alphabetSize = maxSymbolValue + 1;
7687     int previousIs0 = 0;
7688
7689     /* Table Size */
7690     bitStream += (tableLog-FSE_MIN_TABLELOG) << bitCount;
7691     bitCount  += 4;
7692
7693     /* Init */
7694     remaining = tableSize+1;   /* +1 for extra accuracy */
7695     threshold = tableSize;
7696     nbBits = tableLog+1;
7697
7698     while ((symbol < alphabetSize) && (remaining>1)) {  /* stops at 1 */
7699         if (previousIs0) {
7700             unsigned start = symbol;
7701             while ((symbol < alphabetSize) && !normalizedCounter[symbol]) symbol++;
7702             if (symbol == alphabetSize) break;   /* incorrect distribution */
7703             while (symbol >= start+24) {
7704                 start+=24;
7705                 bitStream += 0xFFFFU << bitCount;
7706                 if ((!writeIsSafe) && (out > oend-2))
7707                     return ERROR(dstSize_tooSmall);   /* Buffer overflow */
7708                 out[0] = (BYTE) bitStream;
7709                 out[1] = (BYTE)(bitStream>>8);
7710                 out+=2;
7711                 bitStream>>=16;
7712             }
7713             while (symbol >= start+3) {
7714                 start+=3;
7715                 bitStream += 3 << bitCount;
7716                 bitCount += 2;
7717             }
7718             bitStream += (symbol-start) << bitCount;
7719             bitCount += 2;
7720             if (bitCount>16) {
7721                 if ((!writeIsSafe) && (out > oend - 2))
7722                     return ERROR(dstSize_tooSmall);   /* Buffer overflow */
7723                 out[0] = (BYTE)bitStream;
7724                 out[1] = (BYTE)(bitStream>>8);
7725                 out += 2;
7726                 bitStream >>= 16;
7727                 bitCount -= 16;
7728         }   }
7729         {   int count = normalizedCounter[symbol++];
7730             int const max = (2*threshold-1) - remaining;
7731             remaining -= count < 0 ? -count : count;
7732             count++;   /* +1 for extra accuracy */
7733             if (count>=threshold)
7734                 count += max;   /* [0..max[ [max..threshold[ (...) [threshold+max 2*threshold[ */
7735             bitStream += count << bitCount;
7736             bitCount  += nbBits;
7737             bitCount  -= (count<max);
7738             previousIs0  = (count==1);
7739             if (remaining<1) return ERROR(GENERIC);
7740             while (remaining<threshold) { nbBits--; threshold>>=1; }
7741         }
7742         if (bitCount>16) {
7743             if ((!writeIsSafe) && (out > oend - 2))
7744                 return ERROR(dstSize_tooSmall);   /* Buffer overflow */
7745             out[0] = (BYTE)bitStream;
7746             out[1] = (BYTE)(bitStream>>8);
7747             out += 2;
7748             bitStream >>= 16;
7749             bitCount -= 16;
7750     }   }
7751
7752     if (remaining != 1)
7753         return ERROR(GENERIC);  /* incorrect normalized distribution */
7754     assert(symbol <= alphabetSize);
7755
7756     /* flush remaining bitStream */
7757     if ((!writeIsSafe) && (out > oend - 2))
7758         return ERROR(dstSize_tooSmall);   /* Buffer overflow */
7759     out[0] = (BYTE)bitStream;
7760     out[1] = (BYTE)(bitStream>>8);
7761     out+= (bitCount+7) /8;
7762
7763     return (out-ostart);
7764 }
7765
7766
7767 size_t FSE_writeNCount (void* buffer, size_t bufferSize,
7768                   const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog)
7769 {
7770     if (tableLog > FSE_MAX_TABLELOG) return ERROR(tableLog_tooLarge);   /* Unsupported */
7771     if (tableLog < FSE_MIN_TABLELOG) return ERROR(GENERIC);   /* Unsupported */
7772
7773     if (bufferSize < FSE_NCountWriteBound(maxSymbolValue, tableLog))
7774         return FSE_writeNCount_generic(buffer, bufferSize, normalizedCounter, maxSymbolValue, tableLog, 0);
7775
7776     return FSE_writeNCount_generic(buffer, bufferSize, normalizedCounter, maxSymbolValue, tableLog, 1 /* write in buffer is safe */);
7777 }
7778
7779
7780 /*-**************************************************************
7781 *  FSE Compression Code
7782 ****************************************************************/
7783
7784 FSE_CTable* FSE_createCTable (unsigned maxSymbolValue, unsigned tableLog)
7785 {
7786     size_t size;
7787     if (tableLog > FSE_TABLELOG_ABSOLUTE_MAX) tableLog = FSE_TABLELOG_ABSOLUTE_MAX;
7788     size = FSE_CTABLE_SIZE_U32 (tableLog, maxSymbolValue) * sizeof(U32);
7789     return (FSE_CTable*)malloc(size);
7790 }
7791
7792 void FSE_freeCTable (FSE_CTable* ct) { free(ct); }
7793
7794 /* provides the minimum logSize to safely represent a distribution */
7795 static unsigned FSE_minTableLog(size_t srcSize, unsigned maxSymbolValue)
7796 {
7797     U32 minBitsSrc = BIT_highbit32((U32)(srcSize)) + 1;
7798     U32 minBitsSymbols = BIT_highbit32(maxSymbolValue) + 2;
7799     U32 minBits = minBitsSrc < minBitsSymbols ? minBitsSrc : minBitsSymbols;
7800     assert(srcSize > 1); /* Not supported, RLE should be used instead */
7801     return minBits;
7802 }
7803
7804 unsigned FSE_optimalTableLog_internal(unsigned maxTableLog, size_t srcSize, unsigned maxSymbolValue, unsigned minus)
7805 {
7806     U32 maxBitsSrc = BIT_highbit32((U32)(srcSize - 1)) - minus;
7807     U32 tableLog = maxTableLog;
7808     U32 minBits = FSE_minTableLog(srcSize, maxSymbolValue);
7809     assert(srcSize > 1); /* Not supported, RLE should be used instead */
7810     if (tableLog==0) tableLog = FSE_DEFAULT_TABLELOG;
7811     if (maxBitsSrc < tableLog) tableLog = maxBitsSrc;   /* Accuracy can be reduced */
7812     if (minBits > tableLog) tableLog = minBits;   /* Need a minimum to safely represent all symbol values */
7813     if (tableLog < FSE_MIN_TABLELOG) tableLog = FSE_MIN_TABLELOG;
7814     if (tableLog > FSE_MAX_TABLELOG) tableLog = FSE_MAX_TABLELOG;
7815     return tableLog;
7816 }
7817
7818 unsigned FSE_optimalTableLog(unsigned maxTableLog, size_t srcSize, unsigned maxSymbolValue)
7819 {
7820     return FSE_optimalTableLog_internal(maxTableLog, srcSize, maxSymbolValue, 2);
7821 }
7822
7823
7824 /* Secondary normalization method.
7825    To be used when primary method fails. */
7826
7827 static size_t FSE_normalizeM2(short* norm, U32 tableLog, const unsigned* count, size_t total, U32 maxSymbolValue)
7828 {
7829     short const NOT_YET_ASSIGNED = -2;
7830     U32 s;
7831     U32 distributed = 0;
7832     U32 ToDistribute;
7833
7834     /* Init */
7835     U32 const lowThreshold = (U32)(total >> tableLog);
7836     U32 lowOne = (U32)((total * 3) >> (tableLog + 1));
7837
7838     for (s=0; s<=maxSymbolValue; s++) {
7839         if (count[s] == 0) {
7840             norm[s]=0;
7841             continue;
7842         }
7843         if (count[s] <= lowThreshold) {
7844             norm[s] = -1;
7845             distributed++;
7846             total -= count[s];
7847             continue;
7848         }
7849         if (count[s] <= lowOne) {
7850             norm[s] = 1;
7851             distributed++;
7852             total -= count[s];
7853             continue;
7854         }
7855
7856         norm[s]=NOT_YET_ASSIGNED;
7857     }
7858     ToDistribute = (1 << tableLog) - distributed;
7859
7860     if (ToDistribute == 0)
7861         return 0;
7862
7863     if ((total / ToDistribute) > lowOne) {
7864         /* risk of rounding to zero */
7865         lowOne = (U32)((total * 3) / (ToDistribute * 2));
7866         for (s=0; s<=maxSymbolValue; s++) {
7867             if ((norm[s] == NOT_YET_ASSIGNED) && (count[s] <= lowOne)) {
7868                 norm[s] = 1;
7869                 distributed++;
7870                 total -= count[s];
7871                 continue;
7872         }   }
7873         ToDistribute = (1 << tableLog) - distributed;
7874     }
7875
7876     if (distributed == maxSymbolValue+1) {
7877         /* all values are pretty poor;
7878            probably incompressible data (should have already been detected);
7879            find max, then give all remaining points to max */
7880         U32 maxV = 0, maxC = 0;
7881         for (s=0; s<=maxSymbolValue; s++)
7882             if (count[s] > maxC) { maxV=s; maxC=count[s]; }
7883         norm[maxV] += (short)ToDistribute;
7884         return 0;
7885     }
7886
7887     if (total == 0) {
7888         /* all of the symbols were low enough for the lowOne or lowThreshold */
7889         for (s=0; ToDistribute > 0; s = (s+1)%(maxSymbolValue+1))
7890             if (norm[s] > 0) { ToDistribute--; norm[s]++; }
7891         return 0;
7892     }
7893
7894     {   U64 const vStepLog = 62 - tableLog;
7895         U64 const mid = (1ULL << (vStepLog-1)) - 1;
7896         U64 const rStep = ((((U64)1<<vStepLog) * ToDistribute) + mid) / total;   /* scale on remaining */
7897         U64 tmpTotal = mid;
7898         for (s=0; s<=maxSymbolValue; s++) {
7899             if (norm[s]==NOT_YET_ASSIGNED) {
7900                 U64 const end = tmpTotal + (count[s] * rStep);
7901                 U32 const sStart = (U32)(tmpTotal >> vStepLog);
7902                 U32 const sEnd = (U32)(end >> vStepLog);
7903                 U32 const weight = sEnd - sStart;
7904                 if (weight < 1)
7905                     return ERROR(GENERIC);
7906                 norm[s] = (short)weight;
7907                 tmpTotal = end;
7908     }   }   }
7909
7910     return 0;
7911 }
7912
7913
7914 size_t FSE_normalizeCount (short* normalizedCounter, unsigned tableLog,
7915                            const unsigned* count, size_t total,
7916                            unsigned maxSymbolValue)
7917 {
7918     /* Sanity checks */
7919     if (tableLog==0) tableLog = FSE_DEFAULT_TABLELOG;
7920     if (tableLog < FSE_MIN_TABLELOG) return ERROR(GENERIC);   /* Unsupported size */
7921     if (tableLog > FSE_MAX_TABLELOG) return ERROR(tableLog_tooLarge);   /* Unsupported size */
7922     if (tableLog < FSE_minTableLog(total, maxSymbolValue)) return ERROR(GENERIC);   /* Too small tableLog, compression potentially impossible */
7923
7924     {   static U32 const rtbTable[] = {     0, 473195, 504333, 520860, 550000, 700000, 750000, 830000 };
7925         U64 const scale = 62 - tableLog;
7926         U64 const step = ((U64)1<<62) / total;   /* <== here, one division ! */
7927         U64 const vStep = 1ULL<<(scale-20);
7928         int stillToDistribute = 1<<tableLog;
7929         unsigned s;
7930         unsigned largest=0;
7931         short largestP=0;
7932         U32 lowThreshold = (U32)(total >> tableLog);
7933
7934         for (s=0; s<=maxSymbolValue; s++) {
7935             if (count[s] == total) return 0;   /* rle special case */
7936             if (count[s] == 0) { normalizedCounter[s]=0; continue; }
7937             if (count[s] <= lowThreshold) {
7938                 normalizedCounter[s] = -1;
7939                 stillToDistribute--;
7940             } else {
7941                 short proba = (short)((count[s]*step) >> scale);
7942                 if (proba<8) {
7943                     U64 restToBeat = vStep * rtbTable[proba];
7944                     proba += (count[s]*step) - ((U64)proba<<scale) > restToBeat;
7945                 }
7946                 if (proba > largestP) { largestP=proba; largest=s; }
7947                 normalizedCounter[s] = proba;
7948                 stillToDistribute -= proba;
7949         }   }
7950         if (-stillToDistribute >= (normalizedCounter[largest] >> 1)) {
7951             /* corner case, need another normalization method */
7952             size_t const errorCode = FSE_normalizeM2(normalizedCounter, tableLog, count, total, maxSymbolValue);
7953             if (FSE_isError(errorCode)) return errorCode;
7954         }
7955         else normalizedCounter[largest] += (short)stillToDistribute;
7956     }
7957
7958 #if 0
7959     {   /* Print Table (debug) */
7960         U32 s;
7961         U32 nTotal = 0;
7962         for (s=0; s<=maxSymbolValue; s++)
7963             RAWLOG(2, "%3i: %4i \n", s, normalizedCounter[s]);
7964         for (s=0; s<=maxSymbolValue; s++)
7965             nTotal += abs(normalizedCounter[s]);
7966         if (nTotal != (1U<<tableLog))
7967             RAWLOG(2, "Warning !!! Total == %u != %u !!!", nTotal, 1U<<tableLog);
7968         getchar();
7969     }
7970 #endif
7971
7972     return tableLog;
7973 }
7974
7975
7976 /* fake FSE_CTable, for raw (uncompressed) input */
7977 size_t FSE_buildCTable_raw (FSE_CTable* ct, unsigned nbBits)
7978 {
7979     const unsigned tableSize = 1 << nbBits;
7980     const unsigned tableMask = tableSize - 1;
7981     const unsigned maxSymbolValue = tableMask;
7982     void* const ptr = ct;
7983     U16* const tableU16 = ( (U16*) ptr) + 2;
7984     void* const FSCT = ((U32*)ptr) + 1 /* header */ + (tableSize>>1);   /* assumption : tableLog >= 1 */
7985     FSE_symbolCompressionTransform* const symbolTT = (FSE_symbolCompressionTransform*) (FSCT);
7986     unsigned s;
7987
7988     /* Sanity checks */
7989     if (nbBits < 1) return ERROR(GENERIC);             /* min size */
7990
7991     /* header */
7992     tableU16[-2] = (U16) nbBits;
7993     tableU16[-1] = (U16) maxSymbolValue;
7994
7995     /* Build table */
7996     for (s=0; s<tableSize; s++)
7997         tableU16[s] = (U16)(tableSize + s);
7998
7999     /* Build Symbol Transformation Table */
8000     {   const U32 deltaNbBits = (nbBits << 16) - (1 << nbBits);
8001         for (s=0; s<=maxSymbolValue; s++) {
8002             symbolTT[s].deltaNbBits = deltaNbBits;
8003             symbolTT[s].deltaFindState = s-1;
8004     }   }
8005
8006     return 0;
8007 }
8008
8009 /* fake FSE_CTable, for rle input (always same symbol) */
8010 size_t FSE_buildCTable_rle (FSE_CTable* ct, BYTE symbolValue)
8011 {
8012     void* ptr = ct;
8013     U16* tableU16 = ( (U16*) ptr) + 2;
8014     void* FSCTptr = (U32*)ptr + 2;
8015     FSE_symbolCompressionTransform* symbolTT = (FSE_symbolCompressionTransform*) FSCTptr;
8016
8017     /* header */
8018     tableU16[-2] = (U16) 0;
8019     tableU16[-1] = (U16) symbolValue;
8020
8021     /* Build table */
8022     tableU16[0] = 0;
8023     tableU16[1] = 0;   /* just in case */
8024
8025     /* Build Symbol Transformation Table */
8026     symbolTT[symbolValue].deltaNbBits = 0;
8027     symbolTT[symbolValue].deltaFindState = 0;
8028
8029     return 0;
8030 }
8031
8032
8033 static size_t FSE_compress_usingCTable_generic (void* dst, size_t dstSize,
8034                            const void* src, size_t srcSize,
8035                            const FSE_CTable* ct, const unsigned fast)
8036 {
8037     const BYTE* const istart = (const BYTE*) src;
8038     const BYTE* const iend = istart + srcSize;
8039     const BYTE* ip=iend;
8040
8041     BIT_CStream_t bitC;
8042     FSE_CState_t CState1, CState2;
8043
8044     /* init */
8045     if (srcSize <= 2) return 0;
8046     { size_t const initError = BIT_initCStream(&bitC, dst, dstSize);
8047       if (FSE_isError(initError)) return 0; /* not enough space available to write a bitstream */ }
8048
8049 #define FSE_FLUSHBITS(s)  (fast ? BIT_flushBitsFast(s) : BIT_flushBits(s))
8050
8051     if (srcSize & 1) {
8052         FSE_initCState2(&CState1, ct, *--ip);
8053         FSE_initCState2(&CState2, ct, *--ip);
8054         FSE_encodeSymbol(&bitC, &CState1, *--ip);
8055         FSE_FLUSHBITS(&bitC);
8056     } else {
8057         FSE_initCState2(&CState2, ct, *--ip);
8058         FSE_initCState2(&CState1, ct, *--ip);
8059     }
8060
8061     /* join to mod 4 */
8062     srcSize -= 2;
8063     if ((sizeof(bitC.bitContainer)*8 > FSE_MAX_TABLELOG*4+7 ) && (srcSize & 2)) {  /* test bit 2 */
8064         FSE_encodeSymbol(&bitC, &CState2, *--ip);
8065         FSE_encodeSymbol(&bitC, &CState1, *--ip);
8066         FSE_FLUSHBITS(&bitC);
8067     }
8068
8069     /* 2 or 4 encoding per loop */
8070     while ( ip>istart ) {
8071
8072         FSE_encodeSymbol(&bitC, &CState2, *--ip);
8073
8074         if (sizeof(bitC.bitContainer)*8 < FSE_MAX_TABLELOG*2+7 )   /* this test must be static */
8075             FSE_FLUSHBITS(&bitC);
8076
8077         FSE_encodeSymbol(&bitC, &CState1, *--ip);
8078
8079         if (sizeof(bitC.bitContainer)*8 > FSE_MAX_TABLELOG*4+7 ) {  /* this test must be static */
8080             FSE_encodeSymbol(&bitC, &CState2, *--ip);
8081             FSE_encodeSymbol(&bitC, &CState1, *--ip);
8082         }
8083
8084         FSE_FLUSHBITS(&bitC);
8085     }
8086
8087     FSE_flushCState(&bitC, &CState2);
8088     FSE_flushCState(&bitC, &CState1);
8089     return BIT_closeCStream(&bitC);
8090 }
8091
8092 size_t FSE_compress_usingCTable (void* dst, size_t dstSize,
8093                            const void* src, size_t srcSize,
8094                            const FSE_CTable* ct)
8095 {
8096     unsigned const fast = (dstSize >= FSE_BLOCKBOUND(srcSize));
8097
8098     if (fast)
8099         return FSE_compress_usingCTable_generic(dst, dstSize, src, srcSize, ct, 1);
8100     else
8101         return FSE_compress_usingCTable_generic(dst, dstSize, src, srcSize, ct, 0);
8102 }
8103
8104
8105 size_t FSE_compressBound(size_t size) { return FSE_COMPRESSBOUND(size); }
8106
8107 /* FSE_compress_wksp() :
8108  * Same as FSE_compress2(), but using an externally allocated scratch buffer (`workSpace`).
8109  * `wkspSize` size must be `(1<<tableLog)`.
8110  */
8111 size_t FSE_compress_wksp (void* dst, size_t dstSize, const void* src, size_t srcSize, unsigned maxSymbolValue, unsigned tableLog, void* workSpace, size_t wkspSize)
8112 {
8113     BYTE* const ostart = (BYTE*) dst;
8114     BYTE* op = ostart;
8115     BYTE* const oend = ostart + dstSize;
8116
8117     unsigned count[FSE_MAX_SYMBOL_VALUE+1];
8118     S16   norm[FSE_MAX_SYMBOL_VALUE+1];
8119     FSE_CTable* CTable = (FSE_CTable*)workSpace;
8120     size_t const CTableSize = FSE_CTABLE_SIZE_U32(tableLog, maxSymbolValue);
8121     void* scratchBuffer = (void*)(CTable + CTableSize);
8122     size_t const scratchBufferSize = wkspSize - (CTableSize * sizeof(FSE_CTable));
8123
8124     /* init conditions */
8125     if (wkspSize < FSE_WKSP_SIZE_U32(tableLog, maxSymbolValue)) return ERROR(tableLog_tooLarge);
8126     if (srcSize <= 1) return 0;  /* Not compressible */
8127     if (!maxSymbolValue) maxSymbolValue = FSE_MAX_SYMBOL_VALUE;
8128     if (!tableLog) tableLog = FSE_DEFAULT_TABLELOG;
8129
8130     /* Scan input and build symbol stats */
8131     {   CHECK_V_F(maxCount, HIST_count_wksp(count, &maxSymbolValue, src, srcSize, scratchBuffer, scratchBufferSize) );
8132         if (maxCount == srcSize) return 1;   /* only a single symbol in src : rle */
8133         if (maxCount == 1) return 0;         /* each symbol present maximum once => not compressible */
8134         if (maxCount < (srcSize >> 7)) return 0;   /* Heuristic : not compressible enough */
8135     }
8136
8137     tableLog = FSE_optimalTableLog(tableLog, srcSize, maxSymbolValue);
8138     CHECK_F( FSE_normalizeCount(norm, tableLog, count, srcSize, maxSymbolValue) );
8139
8140     /* Write table description header */
8141     {   CHECK_V_F(nc_err, FSE_writeNCount(op, oend-op, norm, maxSymbolValue, tableLog) );
8142         op += nc_err;
8143     }
8144
8145     /* Compress */
8146     CHECK_F( FSE_buildCTable_wksp(CTable, norm, maxSymbolValue, tableLog, scratchBuffer, scratchBufferSize) );
8147     {   CHECK_V_F(cSize, FSE_compress_usingCTable(op, oend - op, src, srcSize, CTable) );
8148         if (cSize == 0) return 0;   /* not enough space for compressed data */
8149         op += cSize;
8150     }
8151
8152     /* check compressibility */
8153     if ( (size_t)(op-ostart) >= srcSize-1 ) return 0;
8154
8155     return op-ostart;
8156 }
8157
8158 typedef struct {
8159     FSE_CTable CTable_max[FSE_CTABLE_SIZE_U32(FSE_MAX_TABLELOG, FSE_MAX_SYMBOL_VALUE)];
8160     BYTE scratchBuffer[1 << FSE_MAX_TABLELOG];
8161 } fseWkspMax_t;
8162
8163 size_t FSE_compress2 (void* dst, size_t dstCapacity, const void* src, size_t srcSize, unsigned maxSymbolValue, unsigned tableLog)
8164 {
8165     fseWkspMax_t scratchBuffer;
8166     DEBUG_STATIC_ASSERT(sizeof(scratchBuffer) >= FSE_WKSP_SIZE_U32(FSE_MAX_TABLELOG, FSE_MAX_SYMBOL_VALUE));   /* compilation failures here means scratchBuffer is not large enough */
8167     if (tableLog > FSE_MAX_TABLELOG) return ERROR(tableLog_tooLarge);
8168     return FSE_compress_wksp(dst, dstCapacity, src, srcSize, maxSymbolValue, tableLog, &scratchBuffer, sizeof(scratchBuffer));
8169 }
8170
8171 size_t FSE_compress (void* dst, size_t dstCapacity, const void* src, size_t srcSize)
8172 {
8173     return FSE_compress2(dst, dstCapacity, src, srcSize, FSE_MAX_SYMBOL_VALUE, FSE_DEFAULT_TABLELOG);
8174 }
8175
8176
8177 #endif   /* FSE_COMMONDEFS_ONLY */
8178 /**** ended inlining compress/fse_compress.c ****/
8179 /**** start inlining compress/hist.c ****/
8180 /* ******************************************************************
8181  * hist : Histogram functions
8182  * part of Finite State Entropy project
8183  * Copyright (c) 2013-2020, Yann Collet, Facebook, Inc.
8184  *
8185  *  You can contact the author at :
8186  *  - FSE source repository : https://github.com/Cyan4973/FiniteStateEntropy
8187  *  - Public forum : https://groups.google.com/forum/#!forum/lz4c
8188  *
8189  * This source code is licensed under both the BSD-style license (found in the
8190  * LICENSE file in the root directory of this source tree) and the GPLv2 (found
8191  * in the COPYING file in the root directory of this source tree).
8192  * You may select, at your option, one of the above-listed licenses.
8193 ****************************************************************** */
8194
8195 /* --- dependencies --- */
8196 /**** skipping file: ../common/mem.h ****/
8197 /**** skipping file: ../common/debug.h ****/
8198 /**** skipping file: ../common/error_private.h ****/
8199 /**** skipping file: hist.h ****/
8200
8201
8202 /* --- Error management --- */
8203 unsigned HIST_isError(size_t code) { return ERR_isError(code); }
8204
8205 /*-**************************************************************
8206  *  Histogram functions
8207  ****************************************************************/
8208 unsigned HIST_count_simple(unsigned* count, unsigned* maxSymbolValuePtr,
8209                            const void* src, size_t srcSize)
8210 {
8211     const BYTE* ip = (const BYTE*)src;
8212     const BYTE* const end = ip + srcSize;
8213     unsigned maxSymbolValue = *maxSymbolValuePtr;
8214     unsigned largestCount=0;
8215
8216     memset(count, 0, (maxSymbolValue+1) * sizeof(*count));
8217     if (srcSize==0) { *maxSymbolValuePtr = 0; return 0; }
8218
8219     while (ip<end) {
8220         assert(*ip <= maxSymbolValue);
8221         count[*ip++]++;
8222     }
8223
8224     while (!count[maxSymbolValue]) maxSymbolValue--;
8225     *maxSymbolValuePtr = maxSymbolValue;
8226
8227     {   U32 s;
8228         for (s=0; s<=maxSymbolValue; s++)
8229             if (count[s] > largestCount) largestCount = count[s];
8230     }
8231
8232     return largestCount;
8233 }
8234
8235 typedef enum { trustInput, checkMaxSymbolValue } HIST_checkInput_e;
8236
8237 /* HIST_count_parallel_wksp() :
8238  * store histogram into 4 intermediate tables, recombined at the end.
8239  * this design makes better use of OoO cpus,
8240  * and is noticeably faster when some values are heavily repeated.
8241  * But it needs some additional workspace for intermediate tables.
8242  * `workSpace` size must be a table of size >= HIST_WKSP_SIZE_U32.
8243  * @return : largest histogram frequency,
8244  *           or an error code (notably when histogram would be larger than *maxSymbolValuePtr). */
8245 static size_t HIST_count_parallel_wksp(
8246                                 unsigned* count, unsigned* maxSymbolValuePtr,
8247                                 const void* source, size_t sourceSize,
8248                                 HIST_checkInput_e check,
8249                                 U32* const workSpace)
8250 {
8251     const BYTE* ip = (const BYTE*)source;
8252     const BYTE* const iend = ip+sourceSize;
8253     unsigned maxSymbolValue = *maxSymbolValuePtr;
8254     unsigned max=0;
8255     U32* const Counting1 = workSpace;
8256     U32* const Counting2 = Counting1 + 256;
8257     U32* const Counting3 = Counting2 + 256;
8258     U32* const Counting4 = Counting3 + 256;
8259
8260     memset(workSpace, 0, 4*256*sizeof(unsigned));
8261
8262     /* safety checks */
8263     if (!sourceSize) {
8264         memset(count, 0, maxSymbolValue + 1);
8265         *maxSymbolValuePtr = 0;
8266         return 0;
8267     }
8268     if (!maxSymbolValue) maxSymbolValue = 255;            /* 0 == default */
8269
8270     /* by stripes of 16 bytes */
8271     {   U32 cached = MEM_read32(ip); ip += 4;
8272         while (ip < iend-15) {
8273             U32 c = cached; cached = MEM_read32(ip); ip += 4;
8274             Counting1[(BYTE) c     ]++;
8275             Counting2[(BYTE)(c>>8) ]++;
8276             Counting3[(BYTE)(c>>16)]++;
8277             Counting4[       c>>24 ]++;
8278             c = cached; cached = MEM_read32(ip); ip += 4;
8279             Counting1[(BYTE) c     ]++;
8280             Counting2[(BYTE)(c>>8) ]++;
8281             Counting3[(BYTE)(c>>16)]++;
8282             Counting4[       c>>24 ]++;
8283             c = cached; cached = MEM_read32(ip); ip += 4;
8284             Counting1[(BYTE) c     ]++;
8285             Counting2[(BYTE)(c>>8) ]++;
8286             Counting3[(BYTE)(c>>16)]++;
8287             Counting4[       c>>24 ]++;
8288             c = cached; cached = MEM_read32(ip); ip += 4;
8289             Counting1[(BYTE) c     ]++;
8290             Counting2[(BYTE)(c>>8) ]++;
8291             Counting3[(BYTE)(c>>16)]++;
8292             Counting4[       c>>24 ]++;
8293         }
8294         ip-=4;
8295     }
8296
8297     /* finish last symbols */
8298     while (ip<iend) Counting1[*ip++]++;
8299
8300     if (check) {   /* verify stats will fit into destination table */
8301         U32 s; for (s=255; s>maxSymbolValue; s--) {
8302             Counting1[s] += Counting2[s] + Counting3[s] + Counting4[s];
8303             if (Counting1[s]) return ERROR(maxSymbolValue_tooSmall);
8304     }   }
8305
8306     {   U32 s;
8307         if (maxSymbolValue > 255) maxSymbolValue = 255;
8308         for (s=0; s<=maxSymbolValue; s++) {
8309             count[s] = Counting1[s] + Counting2[s] + Counting3[s] + Counting4[s];
8310             if (count[s] > max) max = count[s];
8311     }   }
8312
8313     while (!count[maxSymbolValue]) maxSymbolValue--;
8314     *maxSymbolValuePtr = maxSymbolValue;
8315     return (size_t)max;
8316 }
8317
8318 /* HIST_countFast_wksp() :
8319  * Same as HIST_countFast(), but using an externally provided scratch buffer.
8320  * `workSpace` is a writable buffer which must be 4-bytes aligned,
8321  * `workSpaceSize` must be >= HIST_WKSP_SIZE
8322  */
8323 size_t HIST_countFast_wksp(unsigned* count, unsigned* maxSymbolValuePtr,
8324                           const void* source, size_t sourceSize,
8325                           void* workSpace, size_t workSpaceSize)
8326 {
8327     if (sourceSize < 1500) /* heuristic threshold */
8328         return HIST_count_simple(count, maxSymbolValuePtr, source, sourceSize);
8329     if ((size_t)workSpace & 3) return ERROR(GENERIC);  /* must be aligned on 4-bytes boundaries */
8330     if (workSpaceSize < HIST_WKSP_SIZE) return ERROR(workSpace_tooSmall);
8331     return HIST_count_parallel_wksp(count, maxSymbolValuePtr, source, sourceSize, trustInput, (U32*)workSpace);
8332 }
8333
8334 /* fast variant (unsafe : won't check if src contains values beyond count[] limit) */
8335 size_t HIST_countFast(unsigned* count, unsigned* maxSymbolValuePtr,
8336                      const void* source, size_t sourceSize)
8337 {
8338     unsigned tmpCounters[HIST_WKSP_SIZE_U32];
8339     return HIST_countFast_wksp(count, maxSymbolValuePtr, source, sourceSize, tmpCounters, sizeof(tmpCounters));
8340 }
8341
8342 /* HIST_count_wksp() :
8343  * Same as HIST_count(), but using an externally provided scratch buffer.
8344  * `workSpace` size must be table of >= HIST_WKSP_SIZE_U32 unsigned */
8345 size_t HIST_count_wksp(unsigned* count, unsigned* maxSymbolValuePtr,
8346                        const void* source, size_t sourceSize,
8347                        void* workSpace, size_t workSpaceSize)
8348 {
8349     if ((size_t)workSpace & 3) return ERROR(GENERIC);  /* must be aligned on 4-bytes boundaries */
8350     if (workSpaceSize < HIST_WKSP_SIZE) return ERROR(workSpace_tooSmall);
8351     if (*maxSymbolValuePtr < 255)
8352         return HIST_count_parallel_wksp(count, maxSymbolValuePtr, source, sourceSize, checkMaxSymbolValue, (U32*)workSpace);
8353     *maxSymbolValuePtr = 255;
8354     return HIST_countFast_wksp(count, maxSymbolValuePtr, source, sourceSize, workSpace, workSpaceSize);
8355 }
8356
8357 size_t HIST_count(unsigned* count, unsigned* maxSymbolValuePtr,
8358                  const void* src, size_t srcSize)
8359 {
8360     unsigned tmpCounters[HIST_WKSP_SIZE_U32];
8361     return HIST_count_wksp(count, maxSymbolValuePtr, src, srcSize, tmpCounters, sizeof(tmpCounters));
8362 }
8363 /**** ended inlining compress/hist.c ****/
8364 /**** start inlining compress/huf_compress.c ****/
8365 /* ******************************************************************
8366  * Huffman encoder, part of New Generation Entropy library
8367  * Copyright (c) 2013-2020, Yann Collet, Facebook, Inc.
8368  *
8369  *  You can contact the author at :
8370  *  - FSE+HUF source repository : https://github.com/Cyan4973/FiniteStateEntropy
8371  *  - Public forum : https://groups.google.com/forum/#!forum/lz4c
8372  *
8373  * This source code is licensed under both the BSD-style license (found in the
8374  * LICENSE file in the root directory of this source tree) and the GPLv2 (found
8375  * in the COPYING file in the root directory of this source tree).
8376  * You may select, at your option, one of the above-listed licenses.
8377 ****************************************************************** */
8378
8379 /* **************************************************************
8380 *  Compiler specifics
8381 ****************************************************************/
8382 #ifdef _MSC_VER    /* Visual Studio */
8383 #  pragma warning(disable : 4127)        /* disable: C4127: conditional expression is constant */
8384 #endif
8385
8386
8387 /* **************************************************************
8388 *  Includes
8389 ****************************************************************/
8390 #include <string.h>     /* memcpy, memset */
8391 #include <stdio.h>      /* printf (debug) */
8392 /**** skipping file: ../common/compiler.h ****/
8393 /**** skipping file: ../common/bitstream.h ****/
8394 /**** skipping file: hist.h ****/
8395 #define FSE_STATIC_LINKING_ONLY   /* FSE_optimalTableLog_internal */
8396 /**** skipping file: ../common/fse.h ****/
8397 #define HUF_STATIC_LINKING_ONLY
8398 /**** skipping file: ../common/huf.h ****/
8399 /**** skipping file: ../common/error_private.h ****/
8400
8401
8402 /* **************************************************************
8403 *  Error Management
8404 ****************************************************************/
8405 #define HUF_isError ERR_isError
8406 #define HUF_STATIC_ASSERT(c) DEBUG_STATIC_ASSERT(c)   /* use only *after* variable declarations */
8407
8408
8409 /* **************************************************************
8410 *  Utils
8411 ****************************************************************/
8412 unsigned HUF_optimalTableLog(unsigned maxTableLog, size_t srcSize, unsigned maxSymbolValue)
8413 {
8414     return FSE_optimalTableLog_internal(maxTableLog, srcSize, maxSymbolValue, 1);
8415 }
8416
8417
8418 /* *******************************************************
8419 *  HUF : Huffman block compression
8420 *********************************************************/
8421 /* HUF_compressWeights() :
8422  * Same as FSE_compress(), but dedicated to huff0's weights compression.
8423  * The use case needs much less stack memory.
8424  * Note : all elements within weightTable are supposed to be <= HUF_TABLELOG_MAX.
8425  */
8426 #define MAX_FSE_TABLELOG_FOR_HUFF_HEADER 6
8427 static size_t HUF_compressWeights (void* dst, size_t dstSize, const void* weightTable, size_t wtSize)
8428 {
8429     BYTE* const ostart = (BYTE*) dst;
8430     BYTE* op = ostart;
8431     BYTE* const oend = ostart + dstSize;
8432
8433     unsigned maxSymbolValue = HUF_TABLELOG_MAX;
8434     U32 tableLog = MAX_FSE_TABLELOG_FOR_HUFF_HEADER;
8435
8436     FSE_CTable CTable[FSE_CTABLE_SIZE_U32(MAX_FSE_TABLELOG_FOR_HUFF_HEADER, HUF_TABLELOG_MAX)];
8437     BYTE scratchBuffer[1<<MAX_FSE_TABLELOG_FOR_HUFF_HEADER];
8438
8439     unsigned count[HUF_TABLELOG_MAX+1];
8440     S16 norm[HUF_TABLELOG_MAX+1];
8441
8442     /* init conditions */
8443     if (wtSize <= 1) return 0;  /* Not compressible */
8444
8445     /* Scan input and build symbol stats */
8446     {   unsigned const maxCount = HIST_count_simple(count, &maxSymbolValue, weightTable, wtSize);   /* never fails */
8447         if (maxCount == wtSize) return 1;   /* only a single symbol in src : rle */
8448         if (maxCount == 1) return 0;        /* each symbol present maximum once => not compressible */
8449     }
8450
8451     tableLog = FSE_optimalTableLog(tableLog, wtSize, maxSymbolValue);
8452     CHECK_F( FSE_normalizeCount(norm, tableLog, count, wtSize, maxSymbolValue) );
8453
8454     /* Write table description header */
8455     {   CHECK_V_F(hSize, FSE_writeNCount(op, (size_t)(oend-op), norm, maxSymbolValue, tableLog) );
8456         op += hSize;
8457     }
8458
8459     /* Compress */
8460     CHECK_F( FSE_buildCTable_wksp(CTable, norm, maxSymbolValue, tableLog, scratchBuffer, sizeof(scratchBuffer)) );
8461     {   CHECK_V_F(cSize, FSE_compress_usingCTable(op, (size_t)(oend - op), weightTable, wtSize, CTable) );
8462         if (cSize == 0) return 0;   /* not enough space for compressed data */
8463         op += cSize;
8464     }
8465
8466     return (size_t)(op-ostart);
8467 }
8468
8469
8470 struct HUF_CElt_s {
8471   U16  val;
8472   BYTE nbBits;
8473 };   /* typedef'd to HUF_CElt within "huf.h" */
8474
8475 /*! HUF_writeCTable() :
8476     `CTable` : Huffman tree to save, using huf representation.
8477     @return : size of saved CTable */
8478 size_t HUF_writeCTable (void* dst, size_t maxDstSize,
8479                         const HUF_CElt* CTable, unsigned maxSymbolValue, unsigned huffLog)
8480 {
8481     BYTE bitsToWeight[HUF_TABLELOG_MAX + 1];   /* precomputed conversion table */
8482     BYTE huffWeight[HUF_SYMBOLVALUE_MAX];
8483     BYTE* op = (BYTE*)dst;
8484     U32 n;
8485
8486      /* check conditions */
8487     if (maxSymbolValue > HUF_SYMBOLVALUE_MAX) return ERROR(maxSymbolValue_tooLarge);
8488
8489     /* convert to weight */
8490     bitsToWeight[0] = 0;
8491     for (n=1; n<huffLog+1; n++)
8492         bitsToWeight[n] = (BYTE)(huffLog + 1 - n);
8493     for (n=0; n<maxSymbolValue; n++)
8494         huffWeight[n] = bitsToWeight[CTable[n].nbBits];
8495
8496     /* attempt weights compression by FSE */
8497     {   CHECK_V_F(hSize, HUF_compressWeights(op+1, maxDstSize-1, huffWeight, maxSymbolValue) );
8498         if ((hSize>1) & (hSize < maxSymbolValue/2)) {   /* FSE compressed */
8499             op[0] = (BYTE)hSize;
8500             return hSize+1;
8501     }   }
8502
8503     /* write raw values as 4-bits (max : 15) */
8504     if (maxSymbolValue > (256-128)) return ERROR(GENERIC);   /* should not happen : likely means source cannot be compressed */
8505     if (((maxSymbolValue+1)/2) + 1 > maxDstSize) return ERROR(dstSize_tooSmall);   /* not enough space within dst buffer */
8506     op[0] = (BYTE)(128 /*special case*/ + (maxSymbolValue-1));
8507     huffWeight[maxSymbolValue] = 0;   /* to be sure it doesn't cause msan issue in final combination */
8508     for (n=0; n<maxSymbolValue; n+=2)
8509         op[(n/2)+1] = (BYTE)((huffWeight[n] << 4) + huffWeight[n+1]);
8510     return ((maxSymbolValue+1)/2) + 1;
8511 }
8512
8513
8514 size_t HUF_readCTable (HUF_CElt* CTable, unsigned* maxSymbolValuePtr, const void* src, size_t srcSize, unsigned* hasZeroWeights)
8515 {
8516     BYTE huffWeight[HUF_SYMBOLVALUE_MAX + 1];   /* init not required, even though some static analyzer may complain */
8517     U32 rankVal[HUF_TABLELOG_ABSOLUTEMAX + 1];   /* large enough for values from 0 to 16 */
8518     U32 tableLog = 0;
8519     U32 nbSymbols = 0;
8520
8521     /* get symbol weights */
8522     CHECK_V_F(readSize, HUF_readStats(huffWeight, HUF_SYMBOLVALUE_MAX+1, rankVal, &nbSymbols, &tableLog, src, srcSize));
8523
8524     /* check result */
8525     if (tableLog > HUF_TABLELOG_MAX) return ERROR(tableLog_tooLarge);
8526     if (nbSymbols > *maxSymbolValuePtr+1) return ERROR(maxSymbolValue_tooSmall);
8527
8528     /* Prepare base value per rank */
8529     {   U32 n, nextRankStart = 0;
8530         for (n=1; n<=tableLog; n++) {
8531             U32 current = nextRankStart;
8532             nextRankStart += (rankVal[n] << (n-1));
8533             rankVal[n] = current;
8534     }   }
8535
8536     /* fill nbBits */
8537     *hasZeroWeights = 0;
8538     {   U32 n; for (n=0; n<nbSymbols; n++) {
8539             const U32 w = huffWeight[n];
8540             *hasZeroWeights |= (w == 0);
8541             CTable[n].nbBits = (BYTE)(tableLog + 1 - w) & -(w != 0);
8542     }   }
8543
8544     /* fill val */
8545     {   U16 nbPerRank[HUF_TABLELOG_MAX+2]  = {0};  /* support w=0=>n=tableLog+1 */
8546         U16 valPerRank[HUF_TABLELOG_MAX+2] = {0};
8547         { U32 n; for (n=0; n<nbSymbols; n++) nbPerRank[CTable[n].nbBits]++; }
8548         /* determine stating value per rank */
8549         valPerRank[tableLog+1] = 0;   /* for w==0 */
8550         {   U16 min = 0;
8551             U32 n; for (n=tableLog; n>0; n--) {  /* start at n=tablelog <-> w=1 */
8552                 valPerRank[n] = min;     /* get starting value within each rank */
8553                 min += nbPerRank[n];
8554                 min >>= 1;
8555         }   }
8556         /* assign value within rank, symbol order */
8557         { U32 n; for (n=0; n<nbSymbols; n++) CTable[n].val = valPerRank[CTable[n].nbBits]++; }
8558     }
8559
8560     *maxSymbolValuePtr = nbSymbols - 1;
8561     return readSize;
8562 }
8563
8564 U32 HUF_getNbBits(const void* symbolTable, U32 symbolValue)
8565 {
8566     const HUF_CElt* table = (const HUF_CElt*)symbolTable;
8567     assert(symbolValue <= HUF_SYMBOLVALUE_MAX);
8568     return table[symbolValue].nbBits;
8569 }
8570
8571
8572 typedef struct nodeElt_s {
8573     U32 count;
8574     U16 parent;
8575     BYTE byte;
8576     BYTE nbBits;
8577 } nodeElt;
8578
8579 static U32 HUF_setMaxHeight(nodeElt* huffNode, U32 lastNonNull, U32 maxNbBits)
8580 {
8581     const U32 largestBits = huffNode[lastNonNull].nbBits;
8582     if (largestBits <= maxNbBits) return largestBits;   /* early exit : no elt > maxNbBits */
8583
8584     /* there are several too large elements (at least >= 2) */
8585     {   int totalCost = 0;
8586         const U32 baseCost = 1 << (largestBits - maxNbBits);
8587         int n = (int)lastNonNull;
8588
8589         while (huffNode[n].nbBits > maxNbBits) {
8590             totalCost += baseCost - (1 << (largestBits - huffNode[n].nbBits));
8591             huffNode[n].nbBits = (BYTE)maxNbBits;
8592             n --;
8593         }  /* n stops at huffNode[n].nbBits <= maxNbBits */
8594         while (huffNode[n].nbBits == maxNbBits) n--;   /* n end at index of smallest symbol using < maxNbBits */
8595
8596         /* renorm totalCost */
8597         totalCost >>= (largestBits - maxNbBits);  /* note : totalCost is necessarily a multiple of baseCost */
8598
8599         /* repay normalized cost */
8600         {   U32 const noSymbol = 0xF0F0F0F0;
8601             U32 rankLast[HUF_TABLELOG_MAX+2];
8602
8603             /* Get pos of last (smallest) symbol per rank */
8604             memset(rankLast, 0xF0, sizeof(rankLast));
8605             {   U32 currentNbBits = maxNbBits;
8606                 int pos;
8607                 for (pos=n ; pos >= 0; pos--) {
8608                     if (huffNode[pos].nbBits >= currentNbBits) continue;
8609                     currentNbBits = huffNode[pos].nbBits;   /* < maxNbBits */
8610                     rankLast[maxNbBits-currentNbBits] = (U32)pos;
8611             }   }
8612
8613             while (totalCost > 0) {
8614                 U32 nBitsToDecrease = BIT_highbit32((U32)totalCost) + 1;
8615                 for ( ; nBitsToDecrease > 1; nBitsToDecrease--) {
8616                     U32 const highPos = rankLast[nBitsToDecrease];
8617                     U32 const lowPos = rankLast[nBitsToDecrease-1];
8618                     if (highPos == noSymbol) continue;
8619                     if (lowPos == noSymbol) break;
8620                     {   U32 const highTotal = huffNode[highPos].count;
8621                         U32 const lowTotal = 2 * huffNode[lowPos].count;
8622                         if (highTotal <= lowTotal) break;
8623                 }   }
8624                 /* only triggered when no more rank 1 symbol left => find closest one (note : there is necessarily at least one !) */
8625                 /* HUF_MAX_TABLELOG test just to please gcc 5+; but it should not be necessary */
8626                 while ((nBitsToDecrease<=HUF_TABLELOG_MAX) && (rankLast[nBitsToDecrease] == noSymbol))
8627                     nBitsToDecrease ++;
8628                 totalCost -= 1 << (nBitsToDecrease-1);
8629                 if (rankLast[nBitsToDecrease-1] == noSymbol)
8630                     rankLast[nBitsToDecrease-1] = rankLast[nBitsToDecrease];   /* this rank is no longer empty */
8631                 huffNode[rankLast[nBitsToDecrease]].nbBits ++;
8632                 if (rankLast[nBitsToDecrease] == 0)    /* special case, reached largest symbol */
8633                     rankLast[nBitsToDecrease] = noSymbol;
8634                 else {
8635                     rankLast[nBitsToDecrease]--;
8636                     if (huffNode[rankLast[nBitsToDecrease]].nbBits != maxNbBits-nBitsToDecrease)
8637                         rankLast[nBitsToDecrease] = noSymbol;   /* this rank is now empty */
8638             }   }   /* while (totalCost > 0) */
8639
8640             while (totalCost < 0) {  /* Sometimes, cost correction overshoot */
8641                 if (rankLast[1] == noSymbol) {  /* special case : no rank 1 symbol (using maxNbBits-1); let's create one from largest rank 0 (using maxNbBits) */
8642                     while (huffNode[n].nbBits == maxNbBits) n--;
8643                     huffNode[n+1].nbBits--;
8644                     assert(n >= 0);
8645                     rankLast[1] = (U32)(n+1);
8646                     totalCost++;
8647                     continue;
8648                 }
8649                 huffNode[ rankLast[1] + 1 ].nbBits--;
8650                 rankLast[1]++;
8651                 totalCost ++;
8652     }   }   }   /* there are several too large elements (at least >= 2) */
8653
8654     return maxNbBits;
8655 }
8656
8657 typedef struct {
8658     U32 base;
8659     U32 current;
8660 } rankPos;
8661
8662 typedef nodeElt huffNodeTable[HUF_CTABLE_WORKSPACE_SIZE_U32];
8663
8664 #define RANK_POSITION_TABLE_SIZE 32
8665
8666 typedef struct {
8667   huffNodeTable huffNodeTbl;
8668   rankPos rankPosition[RANK_POSITION_TABLE_SIZE];
8669 } HUF_buildCTable_wksp_tables;
8670
8671 static void HUF_sort(nodeElt* huffNode, const unsigned* count, U32 maxSymbolValue, rankPos* rankPosition)
8672 {
8673     U32 n;
8674
8675     memset(rankPosition, 0, sizeof(*rankPosition) * RANK_POSITION_TABLE_SIZE);
8676     for (n=0; n<=maxSymbolValue; n++) {
8677         U32 r = BIT_highbit32(count[n] + 1);
8678         rankPosition[r].base ++;
8679     }
8680     for (n=30; n>0; n--) rankPosition[n-1].base += rankPosition[n].base;
8681     for (n=0; n<32; n++) rankPosition[n].current = rankPosition[n].base;
8682     for (n=0; n<=maxSymbolValue; n++) {
8683         U32 const c = count[n];
8684         U32 const r = BIT_highbit32(c+1) + 1;
8685         U32 pos = rankPosition[r].current++;
8686         while ((pos > rankPosition[r].base) && (c > huffNode[pos-1].count)) {
8687             huffNode[pos] = huffNode[pos-1];
8688             pos--;
8689         }
8690         huffNode[pos].count = c;
8691         huffNode[pos].byte  = (BYTE)n;
8692     }
8693 }
8694
8695
8696 /** HUF_buildCTable_wksp() :
8697  *  Same as HUF_buildCTable(), but using externally allocated scratch buffer.
8698  *  `workSpace` must be aligned on 4-bytes boundaries, and be at least as large as sizeof(HUF_buildCTable_wksp_tables).
8699  */
8700 #define STARTNODE (HUF_SYMBOLVALUE_MAX+1)
8701
8702 size_t HUF_buildCTable_wksp (HUF_CElt* tree, const unsigned* count, U32 maxSymbolValue, U32 maxNbBits, void* workSpace, size_t wkspSize)
8703 {
8704     HUF_buildCTable_wksp_tables* const wksp_tables = (HUF_buildCTable_wksp_tables*)workSpace;
8705     nodeElt* const huffNode0 = wksp_tables->huffNodeTbl;
8706     nodeElt* const huffNode = huffNode0+1;
8707     int nonNullRank;
8708     int lowS, lowN;
8709     int nodeNb = STARTNODE;
8710     int n, nodeRoot;
8711
8712     /* safety checks */
8713     if (((size_t)workSpace & 3) != 0) return ERROR(GENERIC);  /* must be aligned on 4-bytes boundaries */
8714     if (wkspSize < sizeof(HUF_buildCTable_wksp_tables))
8715       return ERROR(workSpace_tooSmall);
8716     if (maxNbBits == 0) maxNbBits = HUF_TABLELOG_DEFAULT;
8717     if (maxSymbolValue > HUF_SYMBOLVALUE_MAX)
8718       return ERROR(maxSymbolValue_tooLarge);
8719     memset(huffNode0, 0, sizeof(huffNodeTable));
8720
8721     /* sort, decreasing order */
8722     HUF_sort(huffNode, count, maxSymbolValue, wksp_tables->rankPosition);
8723
8724     /* init for parents */
8725     nonNullRank = (int)maxSymbolValue;
8726     while(huffNode[nonNullRank].count == 0) nonNullRank--;
8727     lowS = nonNullRank; nodeRoot = nodeNb + lowS - 1; lowN = nodeNb;
8728     huffNode[nodeNb].count = huffNode[lowS].count + huffNode[lowS-1].count;
8729     huffNode[lowS].parent = huffNode[lowS-1].parent = (U16)nodeNb;
8730     nodeNb++; lowS-=2;
8731     for (n=nodeNb; n<=nodeRoot; n++) huffNode[n].count = (U32)(1U<<30);
8732     huffNode0[0].count = (U32)(1U<<31);  /* fake entry, strong barrier */
8733
8734     /* create parents */
8735     while (nodeNb <= nodeRoot) {
8736         int const n1 = (huffNode[lowS].count < huffNode[lowN].count) ? lowS-- : lowN++;
8737         int const n2 = (huffNode[lowS].count < huffNode[lowN].count) ? lowS-- : lowN++;
8738         huffNode[nodeNb].count = huffNode[n1].count + huffNode[n2].count;
8739         huffNode[n1].parent = huffNode[n2].parent = (U16)nodeNb;
8740         nodeNb++;
8741     }
8742
8743     /* distribute weights (unlimited tree height) */
8744     huffNode[nodeRoot].nbBits = 0;
8745     for (n=nodeRoot-1; n>=STARTNODE; n--)
8746         huffNode[n].nbBits = huffNode[ huffNode[n].parent ].nbBits + 1;
8747     for (n=0; n<=nonNullRank; n++)
8748         huffNode[n].nbBits = huffNode[ huffNode[n].parent ].nbBits + 1;
8749
8750     /* enforce maxTableLog */
8751     maxNbBits = HUF_setMaxHeight(huffNode, (U32)nonNullRank, maxNbBits);
8752
8753     /* fill result into tree (val, nbBits) */
8754     {   U16 nbPerRank[HUF_TABLELOG_MAX+1] = {0};
8755         U16 valPerRank[HUF_TABLELOG_MAX+1] = {0};
8756         int const alphabetSize = (int)(maxSymbolValue + 1);
8757         if (maxNbBits > HUF_TABLELOG_MAX) return ERROR(GENERIC);   /* check fit into table */
8758         for (n=0; n<=nonNullRank; n++)
8759             nbPerRank[huffNode[n].nbBits]++;
8760         /* determine stating value per rank */
8761         {   U16 min = 0;
8762             for (n=(int)maxNbBits; n>0; n--) {
8763                 valPerRank[n] = min;      /* get starting value within each rank */
8764                 min += nbPerRank[n];
8765                 min >>= 1;
8766         }   }
8767         for (n=0; n<alphabetSize; n++)
8768             tree[huffNode[n].byte].nbBits = huffNode[n].nbBits;   /* push nbBits per symbol, symbol order */
8769         for (n=0; n<alphabetSize; n++)
8770             tree[n].val = valPerRank[tree[n].nbBits]++;   /* assign value within rank, symbol order */
8771     }
8772
8773     return maxNbBits;
8774 }
8775
8776 /** HUF_buildCTable() :
8777  * @return : maxNbBits
8778  *  Note : count is used before tree is written, so they can safely overlap
8779  */
8780 size_t HUF_buildCTable (HUF_CElt* tree, const unsigned* count, unsigned maxSymbolValue, unsigned maxNbBits)
8781 {
8782     HUF_buildCTable_wksp_tables workspace;
8783     return HUF_buildCTable_wksp(tree, count, maxSymbolValue, maxNbBits, &workspace, sizeof(workspace));
8784 }
8785
8786 size_t HUF_estimateCompressedSize(const HUF_CElt* CTable, const unsigned* count, unsigned maxSymbolValue)
8787 {
8788     size_t nbBits = 0;
8789     int s;
8790     for (s = 0; s <= (int)maxSymbolValue; ++s) {
8791         nbBits += CTable[s].nbBits * count[s];
8792     }
8793     return nbBits >> 3;
8794 }
8795
8796 int HUF_validateCTable(const HUF_CElt* CTable, const unsigned* count, unsigned maxSymbolValue) {
8797   int bad = 0;
8798   int s;
8799   for (s = 0; s <= (int)maxSymbolValue; ++s) {
8800     bad |= (count[s] != 0) & (CTable[s].nbBits == 0);
8801   }
8802   return !bad;
8803 }
8804
8805 size_t HUF_compressBound(size_t size) { return HUF_COMPRESSBOUND(size); }
8806
8807 FORCE_INLINE_TEMPLATE void
8808 HUF_encodeSymbol(BIT_CStream_t* bitCPtr, U32 symbol, const HUF_CElt* CTable)
8809 {
8810     BIT_addBitsFast(bitCPtr, CTable[symbol].val, CTable[symbol].nbBits);
8811 }
8812
8813 #define HUF_FLUSHBITS(s)  BIT_flushBits(s)
8814
8815 #define HUF_FLUSHBITS_1(stream) \
8816     if (sizeof((stream)->bitContainer)*8 < HUF_TABLELOG_MAX*2+7) HUF_FLUSHBITS(stream)
8817
8818 #define HUF_FLUSHBITS_2(stream) \
8819     if (sizeof((stream)->bitContainer)*8 < HUF_TABLELOG_MAX*4+7) HUF_FLUSHBITS(stream)
8820
8821 FORCE_INLINE_TEMPLATE size_t
8822 HUF_compress1X_usingCTable_internal_body(void* dst, size_t dstSize,
8823                                    const void* src, size_t srcSize,
8824                                    const HUF_CElt* CTable)
8825 {
8826     const BYTE* ip = (const BYTE*) src;
8827     BYTE* const ostart = (BYTE*)dst;
8828     BYTE* const oend = ostart + dstSize;
8829     BYTE* op = ostart;
8830     size_t n;
8831     BIT_CStream_t bitC;
8832
8833     /* init */
8834     if (dstSize < 8) return 0;   /* not enough space to compress */
8835     { size_t const initErr = BIT_initCStream(&bitC, op, (size_t)(oend-op));
8836       if (HUF_isError(initErr)) return 0; }
8837
8838     n = srcSize & ~3;  /* join to mod 4 */
8839     switch (srcSize & 3)
8840     {
8841         case 3 : HUF_encodeSymbol(&bitC, ip[n+ 2], CTable);
8842                  HUF_FLUSHBITS_2(&bitC);
8843                  /* fall-through */
8844         case 2 : HUF_encodeSymbol(&bitC, ip[n+ 1], CTable);
8845                  HUF_FLUSHBITS_1(&bitC);
8846                  /* fall-through */
8847         case 1 : HUF_encodeSymbol(&bitC, ip[n+ 0], CTable);
8848                  HUF_FLUSHBITS(&bitC);
8849                  /* fall-through */
8850         case 0 : /* fall-through */
8851         default: break;
8852     }
8853
8854     for (; n>0; n-=4) {  /* note : n&3==0 at this stage */
8855         HUF_encodeSymbol(&bitC, ip[n- 1], CTable);
8856         HUF_FLUSHBITS_1(&bitC);
8857         HUF_encodeSymbol(&bitC, ip[n- 2], CTable);
8858         HUF_FLUSHBITS_2(&bitC);
8859         HUF_encodeSymbol(&bitC, ip[n- 3], CTable);
8860         HUF_FLUSHBITS_1(&bitC);
8861         HUF_encodeSymbol(&bitC, ip[n- 4], CTable);
8862         HUF_FLUSHBITS(&bitC);
8863     }
8864
8865     return BIT_closeCStream(&bitC);
8866 }
8867
8868 #if DYNAMIC_BMI2
8869
8870 static TARGET_ATTRIBUTE("bmi2") size_t
8871 HUF_compress1X_usingCTable_internal_bmi2(void* dst, size_t dstSize,
8872                                    const void* src, size_t srcSize,
8873                                    const HUF_CElt* CTable)
8874 {
8875     return HUF_compress1X_usingCTable_internal_body(dst, dstSize, src, srcSize, CTable);
8876 }
8877
8878 static size_t
8879 HUF_compress1X_usingCTable_internal_default(void* dst, size_t dstSize,
8880                                       const void* src, size_t srcSize,
8881                                       const HUF_CElt* CTable)
8882 {
8883     return HUF_compress1X_usingCTable_internal_body(dst, dstSize, src, srcSize, CTable);
8884 }
8885
8886 static size_t
8887 HUF_compress1X_usingCTable_internal(void* dst, size_t dstSize,
8888                               const void* src, size_t srcSize,
8889                               const HUF_CElt* CTable, const int bmi2)
8890 {
8891     if (bmi2) {
8892         return HUF_compress1X_usingCTable_internal_bmi2(dst, dstSize, src, srcSize, CTable);
8893     }
8894     return HUF_compress1X_usingCTable_internal_default(dst, dstSize, src, srcSize, CTable);
8895 }
8896
8897 #else
8898
8899 static size_t
8900 HUF_compress1X_usingCTable_internal(void* dst, size_t dstSize,
8901                               const void* src, size_t srcSize,
8902                               const HUF_CElt* CTable, const int bmi2)
8903 {
8904     (void)bmi2;
8905     return HUF_compress1X_usingCTable_internal_body(dst, dstSize, src, srcSize, CTable);
8906 }
8907
8908 #endif
8909
8910 size_t HUF_compress1X_usingCTable(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable)
8911 {
8912     return HUF_compress1X_usingCTable_internal(dst, dstSize, src, srcSize, CTable, /* bmi2 */ 0);
8913 }
8914
8915
8916 static size_t
8917 HUF_compress4X_usingCTable_internal(void* dst, size_t dstSize,
8918                               const void* src, size_t srcSize,
8919                               const HUF_CElt* CTable, int bmi2)
8920 {
8921     size_t const segmentSize = (srcSize+3)/4;   /* first 3 segments */
8922     const BYTE* ip = (const BYTE*) src;
8923     const BYTE* const iend = ip + srcSize;
8924     BYTE* const ostart = (BYTE*) dst;
8925     BYTE* const oend = ostart + dstSize;
8926     BYTE* op = ostart;
8927
8928     if (dstSize < 6 + 1 + 1 + 1 + 8) return 0;   /* minimum space to compress successfully */
8929     if (srcSize < 12) return 0;   /* no saving possible : too small input */
8930     op += 6;   /* jumpTable */
8931
8932     assert(op <= oend);
8933     {   CHECK_V_F(cSize, HUF_compress1X_usingCTable_internal(op, (size_t)(oend-op), ip, segmentSize, CTable, bmi2) );
8934         if (cSize==0) return 0;
8935         assert(cSize <= 65535);
8936         MEM_writeLE16(ostart, (U16)cSize);
8937         op += cSize;
8938     }
8939
8940     ip += segmentSize;
8941     assert(op <= oend);
8942     {   CHECK_V_F(cSize, HUF_compress1X_usingCTable_internal(op, (size_t)(oend-op), ip, segmentSize, CTable, bmi2) );
8943         if (cSize==0) return 0;
8944         assert(cSize <= 65535);
8945         MEM_writeLE16(ostart+2, (U16)cSize);
8946         op += cSize;
8947     }
8948
8949     ip += segmentSize;
8950     assert(op <= oend);
8951     {   CHECK_V_F(cSize, HUF_compress1X_usingCTable_internal(op, (size_t)(oend-op), ip, segmentSize, CTable, bmi2) );
8952         if (cSize==0) return 0;
8953         assert(cSize <= 65535);
8954         MEM_writeLE16(ostart+4, (U16)cSize);
8955         op += cSize;
8956     }
8957
8958     ip += segmentSize;
8959     assert(op <= oend);
8960     assert(ip <= iend);
8961     {   CHECK_V_F(cSize, HUF_compress1X_usingCTable_internal(op, (size_t)(oend-op), ip, (size_t)(iend-ip), CTable, bmi2) );
8962         if (cSize==0) return 0;
8963         op += cSize;
8964     }
8965
8966     return (size_t)(op-ostart);
8967 }
8968
8969 size_t HUF_compress4X_usingCTable(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable)
8970 {
8971     return HUF_compress4X_usingCTable_internal(dst, dstSize, src, srcSize, CTable, /* bmi2 */ 0);
8972 }
8973
8974 typedef enum { HUF_singleStream, HUF_fourStreams } HUF_nbStreams_e;
8975
8976 static size_t HUF_compressCTable_internal(
8977                 BYTE* const ostart, BYTE* op, BYTE* const oend,
8978                 const void* src, size_t srcSize,
8979                 HUF_nbStreams_e nbStreams, const HUF_CElt* CTable, const int bmi2)
8980 {
8981     size_t const cSize = (nbStreams==HUF_singleStream) ?
8982                          HUF_compress1X_usingCTable_internal(op, (size_t)(oend - op), src, srcSize, CTable, bmi2) :
8983                          HUF_compress4X_usingCTable_internal(op, (size_t)(oend - op), src, srcSize, CTable, bmi2);
8984     if (HUF_isError(cSize)) { return cSize; }
8985     if (cSize==0) { return 0; }   /* uncompressible */
8986     op += cSize;
8987     /* check compressibility */
8988     assert(op >= ostart);
8989     if ((size_t)(op-ostart) >= srcSize-1) { return 0; }
8990     return (size_t)(op-ostart);
8991 }
8992
8993 typedef struct {
8994     unsigned count[HUF_SYMBOLVALUE_MAX + 1];
8995     HUF_CElt CTable[HUF_SYMBOLVALUE_MAX + 1];
8996     HUF_buildCTable_wksp_tables buildCTable_wksp;
8997 } HUF_compress_tables_t;
8998
8999 /* HUF_compress_internal() :
9000  * `workSpace` must a table of at least HUF_WORKSPACE_SIZE_U32 unsigned */
9001 static size_t
9002 HUF_compress_internal (void* dst, size_t dstSize,
9003                  const void* src, size_t srcSize,
9004                        unsigned maxSymbolValue, unsigned huffLog,
9005                        HUF_nbStreams_e nbStreams,
9006                        void* workSpace, size_t wkspSize,
9007                        HUF_CElt* oldHufTable, HUF_repeat* repeat, int preferRepeat,
9008                  const int bmi2)
9009 {
9010     HUF_compress_tables_t* const table = (HUF_compress_tables_t*)workSpace;
9011     BYTE* const ostart = (BYTE*)dst;
9012     BYTE* const oend = ostart + dstSize;
9013     BYTE* op = ostart;
9014
9015     HUF_STATIC_ASSERT(sizeof(*table) <= HUF_WORKSPACE_SIZE);
9016
9017     /* checks & inits */
9018     if (((size_t)workSpace & 3) != 0) return ERROR(GENERIC);  /* must be aligned on 4-bytes boundaries */
9019     if (wkspSize < HUF_WORKSPACE_SIZE) return ERROR(workSpace_tooSmall);
9020     if (!srcSize) return 0;  /* Uncompressed */
9021     if (!dstSize) return 0;  /* cannot fit anything within dst budget */
9022     if (srcSize > HUF_BLOCKSIZE_MAX) return ERROR(srcSize_wrong);   /* current block size limit */
9023     if (huffLog > HUF_TABLELOG_MAX) return ERROR(tableLog_tooLarge);
9024     if (maxSymbolValue > HUF_SYMBOLVALUE_MAX) return ERROR(maxSymbolValue_tooLarge);
9025     if (!maxSymbolValue) maxSymbolValue = HUF_SYMBOLVALUE_MAX;
9026     if (!huffLog) huffLog = HUF_TABLELOG_DEFAULT;
9027
9028     /* Heuristic : If old table is valid, use it for small inputs */
9029     if (preferRepeat && repeat && *repeat == HUF_repeat_valid) {
9030         return HUF_compressCTable_internal(ostart, op, oend,
9031                                            src, srcSize,
9032                                            nbStreams, oldHufTable, bmi2);
9033     }
9034
9035     /* Scan input and build symbol stats */
9036     {   CHECK_V_F(largest, HIST_count_wksp (table->count, &maxSymbolValue, (const BYTE*)src, srcSize, workSpace, wkspSize) );
9037         if (largest == srcSize) { *ostart = ((const BYTE*)src)[0]; return 1; }   /* single symbol, rle */
9038         if (largest <= (srcSize >> 7)+4) return 0;   /* heuristic : probably not compressible enough */
9039     }
9040
9041     /* Check validity of previous table */
9042     if ( repeat
9043       && *repeat == HUF_repeat_check
9044       && !HUF_validateCTable(oldHufTable, table->count, maxSymbolValue)) {
9045         *repeat = HUF_repeat_none;
9046     }
9047     /* Heuristic : use existing table for small inputs */
9048     if (preferRepeat && repeat && *repeat != HUF_repeat_none) {
9049         return HUF_compressCTable_internal(ostart, op, oend,
9050                                            src, srcSize,
9051                                            nbStreams, oldHufTable, bmi2);
9052     }
9053
9054     /* Build Huffman Tree */
9055     huffLog = HUF_optimalTableLog(huffLog, srcSize, maxSymbolValue);
9056     {   size_t const maxBits = HUF_buildCTable_wksp(table->CTable, table->count,
9057                                             maxSymbolValue, huffLog,
9058                                             &table->buildCTable_wksp, sizeof(table->buildCTable_wksp));
9059         CHECK_F(maxBits);
9060         huffLog = (U32)maxBits;
9061         /* Zero unused symbols in CTable, so we can check it for validity */
9062         memset(table->CTable + (maxSymbolValue + 1), 0,
9063                sizeof(table->CTable) - ((maxSymbolValue + 1) * sizeof(HUF_CElt)));
9064     }
9065
9066     /* Write table description header */
9067     {   CHECK_V_F(hSize, HUF_writeCTable (op, dstSize, table->CTable, maxSymbolValue, huffLog) );
9068         /* Check if using previous huffman table is beneficial */
9069         if (repeat && *repeat != HUF_repeat_none) {
9070             size_t const oldSize = HUF_estimateCompressedSize(oldHufTable, table->count, maxSymbolValue);
9071             size_t const newSize = HUF_estimateCompressedSize(table->CTable, table->count, maxSymbolValue);
9072             if (oldSize <= hSize + newSize || hSize + 12 >= srcSize) {
9073                 return HUF_compressCTable_internal(ostart, op, oend,
9074                                                    src, srcSize,
9075                                                    nbStreams, oldHufTable, bmi2);
9076         }   }
9077
9078         /* Use the new huffman table */
9079         if (hSize + 12ul >= srcSize) { return 0; }
9080         op += hSize;
9081         if (repeat) { *repeat = HUF_repeat_none; }
9082         if (oldHufTable)
9083             memcpy(oldHufTable, table->CTable, sizeof(table->CTable));  /* Save new table */
9084     }
9085     return HUF_compressCTable_internal(ostart, op, oend,
9086                                        src, srcSize,
9087                                        nbStreams, table->CTable, bmi2);
9088 }
9089
9090
9091 size_t HUF_compress1X_wksp (void* dst, size_t dstSize,
9092                       const void* src, size_t srcSize,
9093                       unsigned maxSymbolValue, unsigned huffLog,
9094                       void* workSpace, size_t wkspSize)
9095 {
9096     return HUF_compress_internal(dst, dstSize, src, srcSize,
9097                                  maxSymbolValue, huffLog, HUF_singleStream,
9098                                  workSpace, wkspSize,
9099                                  NULL, NULL, 0, 0 /*bmi2*/);
9100 }
9101
9102 size_t HUF_compress1X_repeat (void* dst, size_t dstSize,
9103                       const void* src, size_t srcSize,
9104                       unsigned maxSymbolValue, unsigned huffLog,
9105                       void* workSpace, size_t wkspSize,
9106                       HUF_CElt* hufTable, HUF_repeat* repeat, int preferRepeat, int bmi2)
9107 {
9108     return HUF_compress_internal(dst, dstSize, src, srcSize,
9109                                  maxSymbolValue, huffLog, HUF_singleStream,
9110                                  workSpace, wkspSize, hufTable,
9111                                  repeat, preferRepeat, bmi2);
9112 }
9113
9114 size_t HUF_compress1X (void* dst, size_t dstSize,
9115                  const void* src, size_t srcSize,
9116                  unsigned maxSymbolValue, unsigned huffLog)
9117 {
9118     unsigned workSpace[HUF_WORKSPACE_SIZE_U32];
9119     return HUF_compress1X_wksp(dst, dstSize, src, srcSize, maxSymbolValue, huffLog, workSpace, sizeof(workSpace));
9120 }
9121
9122 /* HUF_compress4X_repeat():
9123  * compress input using 4 streams.
9124  * provide workspace to generate compression tables */
9125 size_t HUF_compress4X_wksp (void* dst, size_t dstSize,
9126                       const void* src, size_t srcSize,
9127                       unsigned maxSymbolValue, unsigned huffLog,
9128                       void* workSpace, size_t wkspSize)
9129 {
9130     return HUF_compress_internal(dst, dstSize, src, srcSize,
9131                                  maxSymbolValue, huffLog, HUF_fourStreams,
9132                                  workSpace, wkspSize,
9133                                  NULL, NULL, 0, 0 /*bmi2*/);
9134 }
9135
9136 /* HUF_compress4X_repeat():
9137  * compress input using 4 streams.
9138  * re-use an existing huffman compression table */
9139 size_t HUF_compress4X_repeat (void* dst, size_t dstSize,
9140                       const void* src, size_t srcSize,
9141                       unsigned maxSymbolValue, unsigned huffLog,
9142                       void* workSpace, size_t wkspSize,
9143                       HUF_CElt* hufTable, HUF_repeat* repeat, int preferRepeat, int bmi2)
9144 {
9145     return HUF_compress_internal(dst, dstSize, src, srcSize,
9146                                  maxSymbolValue, huffLog, HUF_fourStreams,
9147                                  workSpace, wkspSize,
9148                                  hufTable, repeat, preferRepeat, bmi2);
9149 }
9150
9151 size_t HUF_compress2 (void* dst, size_t dstSize,
9152                 const void* src, size_t srcSize,
9153                 unsigned maxSymbolValue, unsigned huffLog)
9154 {
9155     unsigned workSpace[HUF_WORKSPACE_SIZE_U32];
9156     return HUF_compress4X_wksp(dst, dstSize, src, srcSize, maxSymbolValue, huffLog, workSpace, sizeof(workSpace));
9157 }
9158
9159 size_t HUF_compress (void* dst, size_t maxDstSize, const void* src, size_t srcSize)
9160 {
9161     return HUF_compress2(dst, maxDstSize, src, srcSize, 255, HUF_TABLELOG_DEFAULT);
9162 }
9163 /**** ended inlining compress/huf_compress.c ****/
9164 /**** start inlining compress/zstd_compress_literals.c ****/
9165 /*
9166  * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
9167  * All rights reserved.
9168  *
9169  * This source code is licensed under both the BSD-style license (found in the
9170  * LICENSE file in the root directory of this source tree) and the GPLv2 (found
9171  * in the COPYING file in the root directory of this source tree).
9172  * You may select, at your option, one of the above-listed licenses.
9173  */
9174
9175  /*-*************************************
9176  *  Dependencies
9177  ***************************************/
9178 /**** start inlining zstd_compress_literals.h ****/
9179 /*
9180  * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
9181  * All rights reserved.
9182  *
9183  * This source code is licensed under both the BSD-style license (found in the
9184  * LICENSE file in the root directory of this source tree) and the GPLv2 (found
9185  * in the COPYING file in the root directory of this source tree).
9186  * You may select, at your option, one of the above-listed licenses.
9187  */
9188
9189 #ifndef ZSTD_COMPRESS_LITERALS_H
9190 #define ZSTD_COMPRESS_LITERALS_H
9191
9192 /**** start inlining zstd_compress_internal.h ****/
9193 /*
9194  * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
9195  * All rights reserved.
9196  *
9197  * This source code is licensed under both the BSD-style license (found in the
9198  * LICENSE file in the root directory of this source tree) and the GPLv2 (found
9199  * in the COPYING file in the root directory of this source tree).
9200  * You may select, at your option, one of the above-listed licenses.
9201  */
9202
9203 /* This header contains definitions
9204  * that shall **only** be used by modules within lib/compress.
9205  */
9206
9207 #ifndef ZSTD_COMPRESS_H
9208 #define ZSTD_COMPRESS_H
9209
9210 /*-*************************************
9211 *  Dependencies
9212 ***************************************/
9213 /**** skipping file: ../common/zstd_internal.h ****/
9214 /**** start inlining zstd_cwksp.h ****/
9215 /*
9216  * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
9217  * All rights reserved.
9218  *
9219  * This source code is licensed under both the BSD-style license (found in the
9220  * LICENSE file in the root directory of this source tree) and the GPLv2 (found
9221  * in the COPYING file in the root directory of this source tree).
9222  * You may select, at your option, one of the above-listed licenses.
9223  */
9224
9225 #ifndef ZSTD_CWKSP_H
9226 #define ZSTD_CWKSP_H
9227
9228 /*-*************************************
9229 *  Dependencies
9230 ***************************************/
9231 /**** skipping file: ../common/zstd_internal.h ****/
9232
9233 #if defined (__cplusplus)
9234 extern "C" {
9235 #endif
9236
9237 /*-*************************************
9238 *  Constants
9239 ***************************************/
9240
9241 /* Since the workspace is effectively its own little malloc implementation /
9242  * arena, when we run under ASAN, we should similarly insert redzones between
9243  * each internal element of the workspace, so ASAN will catch overruns that
9244  * reach outside an object but that stay inside the workspace.
9245  *
9246  * This defines the size of that redzone.
9247  */
9248 #ifndef ZSTD_CWKSP_ASAN_REDZONE_SIZE
9249 #define ZSTD_CWKSP_ASAN_REDZONE_SIZE 128
9250 #endif
9251
9252 /*-*************************************
9253 *  Structures
9254 ***************************************/
9255 typedef enum {
9256     ZSTD_cwksp_alloc_objects,
9257     ZSTD_cwksp_alloc_buffers,
9258     ZSTD_cwksp_alloc_aligned
9259 } ZSTD_cwksp_alloc_phase_e;
9260
9261 /**
9262  * Zstd fits all its internal datastructures into a single continuous buffer,
9263  * so that it only needs to perform a single OS allocation (or so that a buffer
9264  * can be provided to it and it can perform no allocations at all). This buffer
9265  * is called the workspace.
9266  *
9267  * Several optimizations complicate that process of allocating memory ranges
9268  * from this workspace for each internal datastructure:
9269  *
9270  * - These different internal datastructures have different setup requirements:
9271  *
9272  *   - The static objects need to be cleared once and can then be trivially
9273  *     reused for each compression.
9274  *
9275  *   - Various buffers don't need to be initialized at all--they are always
9276  *     written into before they're read.
9277  *
9278  *   - The matchstate tables have a unique requirement that they don't need
9279  *     their memory to be totally cleared, but they do need the memory to have
9280  *     some bound, i.e., a guarantee that all values in the memory they've been
9281  *     allocated is less than some maximum value (which is the starting value
9282  *     for the indices that they will then use for compression). When this
9283  *     guarantee is provided to them, they can use the memory without any setup
9284  *     work. When it can't, they have to clear the area.
9285  *
9286  * - These buffers also have different alignment requirements.
9287  *
9288  * - We would like to reuse the objects in the workspace for multiple
9289  *   compressions without having to perform any expensive reallocation or
9290  *   reinitialization work.
9291  *
9292  * - We would like to be able to efficiently reuse the workspace across
9293  *   multiple compressions **even when the compression parameters change** and
9294  *   we need to resize some of the objects (where possible).
9295  *
9296  * To attempt to manage this buffer, given these constraints, the ZSTD_cwksp
9297  * abstraction was created. It works as follows:
9298  *
9299  * Workspace Layout:
9300  *
9301  * [                        ... workspace ...                         ]
9302  * [objects][tables ... ->] free space [<- ... aligned][<- ... buffers]
9303  *
9304  * The various objects that live in the workspace are divided into the
9305  * following categories, and are allocated separately:
9306  *
9307  * - Static objects: this is optionally the enclosing ZSTD_CCtx or ZSTD_CDict,
9308  *   so that literally everything fits in a single buffer. Note: if present,
9309  *   this must be the first object in the workspace, since ZSTD_free{CCtx,
9310  *   CDict}() rely on a pointer comparison to see whether one or two frees are
9311  *   required.
9312  *
9313  * - Fixed size objects: these are fixed-size, fixed-count objects that are
9314  *   nonetheless "dynamically" allocated in the workspace so that we can
9315  *   control how they're initialized separately from the broader ZSTD_CCtx.
9316  *   Examples:
9317  *   - Entropy Workspace
9318  *   - 2 x ZSTD_compressedBlockState_t
9319  *   - CDict dictionary contents
9320  *
9321  * - Tables: these are any of several different datastructures (hash tables,
9322  *   chain tables, binary trees) that all respect a common format: they are
9323  *   uint32_t arrays, all of whose values are between 0 and (nextSrc - base).
9324  *   Their sizes depend on the cparams.
9325  *
9326  * - Aligned: these buffers are used for various purposes that require 4 byte
9327  *   alignment, but don't require any initialization before they're used.
9328  *
9329  * - Buffers: these buffers are used for various purposes that don't require
9330  *   any alignment or initialization before they're used. This means they can
9331  *   be moved around at no cost for a new compression.
9332  *
9333  * Allocating Memory:
9334  *
9335  * The various types of objects must be allocated in order, so they can be
9336  * correctly packed into the workspace buffer. That order is:
9337  *
9338  * 1. Objects
9339  * 2. Buffers
9340  * 3. Aligned
9341  * 4. Tables
9342  *
9343  * Attempts to reserve objects of different types out of order will fail.
9344  */
9345 typedef struct {
9346     void* workspace;
9347     void* workspaceEnd;
9348
9349     void* objectEnd;
9350     void* tableEnd;
9351     void* tableValidEnd;
9352     void* allocStart;
9353
9354     int allocFailed;
9355     int workspaceOversizedDuration;
9356     ZSTD_cwksp_alloc_phase_e phase;
9357 } ZSTD_cwksp;
9358
9359 /*-*************************************
9360 *  Functions
9361 ***************************************/
9362
9363 MEM_STATIC size_t ZSTD_cwksp_available_space(ZSTD_cwksp* ws);
9364
9365 MEM_STATIC void ZSTD_cwksp_assert_internal_consistency(ZSTD_cwksp* ws) {
9366     (void)ws;
9367     assert(ws->workspace <= ws->objectEnd);
9368     assert(ws->objectEnd <= ws->tableEnd);
9369     assert(ws->objectEnd <= ws->tableValidEnd);
9370     assert(ws->tableEnd <= ws->allocStart);
9371     assert(ws->tableValidEnd <= ws->allocStart);
9372     assert(ws->allocStart <= ws->workspaceEnd);
9373 }
9374
9375 /**
9376  * Align must be a power of 2.
9377  */
9378 MEM_STATIC size_t ZSTD_cwksp_align(size_t size, size_t const align) {
9379     size_t const mask = align - 1;
9380     assert((align & mask) == 0);
9381     return (size + mask) & ~mask;
9382 }
9383
9384 /**
9385  * Use this to determine how much space in the workspace we will consume to
9386  * allocate this object. (Normally it should be exactly the size of the object,
9387  * but under special conditions, like ASAN, where we pad each object, it might
9388  * be larger.)
9389  *
9390  * Since tables aren't currently redzoned, you don't need to call through this
9391  * to figure out how much space you need for the matchState tables. Everything
9392  * else is though.
9393  */
9394 MEM_STATIC size_t ZSTD_cwksp_alloc_size(size_t size) {
9395 #if defined (ADDRESS_SANITIZER) && !defined (ZSTD_ASAN_DONT_POISON_WORKSPACE)
9396     return size + 2 * ZSTD_CWKSP_ASAN_REDZONE_SIZE;
9397 #else
9398     return size;
9399 #endif
9400 }
9401
9402 MEM_STATIC void ZSTD_cwksp_internal_advance_phase(
9403         ZSTD_cwksp* ws, ZSTD_cwksp_alloc_phase_e phase) {
9404     assert(phase >= ws->phase);
9405     if (phase > ws->phase) {
9406         if (ws->phase < ZSTD_cwksp_alloc_buffers &&
9407                 phase >= ZSTD_cwksp_alloc_buffers) {
9408             ws->tableValidEnd = ws->objectEnd;
9409         }
9410         if (ws->phase < ZSTD_cwksp_alloc_aligned &&
9411                 phase >= ZSTD_cwksp_alloc_aligned) {
9412             /* If unaligned allocations down from a too-large top have left us
9413              * unaligned, we need to realign our alloc ptr. Technically, this
9414              * can consume space that is unaccounted for in the neededSpace
9415              * calculation. However, I believe this can only happen when the
9416              * workspace is too large, and specifically when it is too large
9417              * by a larger margin than the space that will be consumed. */
9418             /* TODO: cleaner, compiler warning friendly way to do this??? */
9419             ws->allocStart = (BYTE*)ws->allocStart - ((size_t)ws->allocStart & (sizeof(U32)-1));
9420             if (ws->allocStart < ws->tableValidEnd) {
9421                 ws->tableValidEnd = ws->allocStart;
9422             }
9423         }
9424         ws->phase = phase;
9425     }
9426 }
9427
9428 /**
9429  * Returns whether this object/buffer/etc was allocated in this workspace.
9430  */
9431 MEM_STATIC int ZSTD_cwksp_owns_buffer(const ZSTD_cwksp* ws, const void* ptr) {
9432     return (ptr != NULL) && (ws->workspace <= ptr) && (ptr <= ws->workspaceEnd);
9433 }
9434
9435 /**
9436  * Internal function. Do not use directly.
9437  */
9438 MEM_STATIC void* ZSTD_cwksp_reserve_internal(
9439         ZSTD_cwksp* ws, size_t bytes, ZSTD_cwksp_alloc_phase_e phase) {
9440     void* alloc;
9441     void* bottom = ws->tableEnd;
9442     ZSTD_cwksp_internal_advance_phase(ws, phase);
9443     alloc = (BYTE *)ws->allocStart - bytes;
9444
9445 #if defined (ADDRESS_SANITIZER) && !defined (ZSTD_ASAN_DONT_POISON_WORKSPACE)
9446     /* over-reserve space */
9447     alloc = (BYTE *)alloc - 2 * ZSTD_CWKSP_ASAN_REDZONE_SIZE;
9448 #endif
9449
9450     DEBUGLOG(5, "cwksp: reserving %p %zd bytes, %zd bytes remaining",
9451         alloc, bytes, ZSTD_cwksp_available_space(ws) - bytes);
9452     ZSTD_cwksp_assert_internal_consistency(ws);
9453     assert(alloc >= bottom);
9454     if (alloc < bottom) {
9455         DEBUGLOG(4, "cwksp: alloc failed!");
9456         ws->allocFailed = 1;
9457         return NULL;
9458     }
9459     if (alloc < ws->tableValidEnd) {
9460         ws->tableValidEnd = alloc;
9461     }
9462     ws->allocStart = alloc;
9463
9464 #if defined (ADDRESS_SANITIZER) && !defined (ZSTD_ASAN_DONT_POISON_WORKSPACE)
9465     /* Move alloc so there's ZSTD_CWKSP_ASAN_REDZONE_SIZE unused space on
9466      * either size. */
9467     alloc = (BYTE *)alloc + ZSTD_CWKSP_ASAN_REDZONE_SIZE;
9468     __asan_unpoison_memory_region(alloc, bytes);
9469 #endif
9470
9471     return alloc;
9472 }
9473
9474 /**
9475  * Reserves and returns unaligned memory.
9476  */
9477 MEM_STATIC BYTE* ZSTD_cwksp_reserve_buffer(ZSTD_cwksp* ws, size_t bytes) {
9478     return (BYTE*)ZSTD_cwksp_reserve_internal(ws, bytes, ZSTD_cwksp_alloc_buffers);
9479 }
9480
9481 /**
9482  * Reserves and returns memory sized on and aligned on sizeof(unsigned).
9483  */
9484 MEM_STATIC void* ZSTD_cwksp_reserve_aligned(ZSTD_cwksp* ws, size_t bytes) {
9485     assert((bytes & (sizeof(U32)-1)) == 0);
9486     return ZSTD_cwksp_reserve_internal(ws, ZSTD_cwksp_align(bytes, sizeof(U32)), ZSTD_cwksp_alloc_aligned);
9487 }
9488
9489 /**
9490  * Aligned on sizeof(unsigned). These buffers have the special property that
9491  * their values remain constrained, allowing us to re-use them without
9492  * memset()-ing them.
9493  */
9494 MEM_STATIC void* ZSTD_cwksp_reserve_table(ZSTD_cwksp* ws, size_t bytes) {
9495     const ZSTD_cwksp_alloc_phase_e phase = ZSTD_cwksp_alloc_aligned;
9496     void* alloc = ws->tableEnd;
9497     void* end = (BYTE *)alloc + bytes;
9498     void* top = ws->allocStart;
9499
9500     DEBUGLOG(5, "cwksp: reserving %p table %zd bytes, %zd bytes remaining",
9501         alloc, bytes, ZSTD_cwksp_available_space(ws) - bytes);
9502     assert((bytes & (sizeof(U32)-1)) == 0);
9503     ZSTD_cwksp_internal_advance_phase(ws, phase);
9504     ZSTD_cwksp_assert_internal_consistency(ws);
9505     assert(end <= top);
9506     if (end > top) {
9507         DEBUGLOG(4, "cwksp: table alloc failed!");
9508         ws->allocFailed = 1;
9509         return NULL;
9510     }
9511     ws->tableEnd = end;
9512
9513 #if defined (ADDRESS_SANITIZER) && !defined (ZSTD_ASAN_DONT_POISON_WORKSPACE)
9514     __asan_unpoison_memory_region(alloc, bytes);
9515 #endif
9516
9517     return alloc;
9518 }
9519
9520 /**
9521  * Aligned on sizeof(void*).
9522  */
9523 MEM_STATIC void* ZSTD_cwksp_reserve_object(ZSTD_cwksp* ws, size_t bytes) {
9524     size_t roundedBytes = ZSTD_cwksp_align(bytes, sizeof(void*));
9525     void* alloc = ws->objectEnd;
9526     void* end = (BYTE*)alloc + roundedBytes;
9527
9528 #if defined (ADDRESS_SANITIZER) && !defined (ZSTD_ASAN_DONT_POISON_WORKSPACE)
9529     /* over-reserve space */
9530     end = (BYTE *)end + 2 * ZSTD_CWKSP_ASAN_REDZONE_SIZE;
9531 #endif
9532
9533     DEBUGLOG(5,
9534         "cwksp: reserving %p object %zd bytes (rounded to %zd), %zd bytes remaining",
9535         alloc, bytes, roundedBytes, ZSTD_cwksp_available_space(ws) - roundedBytes);
9536     assert(((size_t)alloc & (sizeof(void*)-1)) == 0);
9537     assert((bytes & (sizeof(void*)-1)) == 0);
9538     ZSTD_cwksp_assert_internal_consistency(ws);
9539     /* we must be in the first phase, no advance is possible */
9540     if (ws->phase != ZSTD_cwksp_alloc_objects || end > ws->workspaceEnd) {
9541         DEBUGLOG(4, "cwksp: object alloc failed!");
9542         ws->allocFailed = 1;
9543         return NULL;
9544     }
9545     ws->objectEnd = end;
9546     ws->tableEnd = end;
9547     ws->tableValidEnd = end;
9548
9549 #if defined (ADDRESS_SANITIZER) && !defined (ZSTD_ASAN_DONT_POISON_WORKSPACE)
9550     /* Move alloc so there's ZSTD_CWKSP_ASAN_REDZONE_SIZE unused space on
9551      * either size. */
9552     alloc = (BYTE *)alloc + ZSTD_CWKSP_ASAN_REDZONE_SIZE;
9553     __asan_unpoison_memory_region(alloc, bytes);
9554 #endif
9555
9556     return alloc;
9557 }
9558
9559 MEM_STATIC void ZSTD_cwksp_mark_tables_dirty(ZSTD_cwksp* ws) {
9560     DEBUGLOG(4, "cwksp: ZSTD_cwksp_mark_tables_dirty");
9561
9562 #if defined (MEMORY_SANITIZER) && !defined (ZSTD_MSAN_DONT_POISON_WORKSPACE)
9563     /* To validate that the table re-use logic is sound, and that we don't
9564      * access table space that we haven't cleaned, we re-"poison" the table
9565      * space every time we mark it dirty. */
9566     {
9567         size_t size = (BYTE*)ws->tableValidEnd - (BYTE*)ws->objectEnd;
9568         assert(__msan_test_shadow(ws->objectEnd, size) == -1);
9569         __msan_poison(ws->objectEnd, size);
9570     }
9571 #endif
9572
9573     assert(ws->tableValidEnd >= ws->objectEnd);
9574     assert(ws->tableValidEnd <= ws->allocStart);
9575     ws->tableValidEnd = ws->objectEnd;
9576     ZSTD_cwksp_assert_internal_consistency(ws);
9577 }
9578
9579 MEM_STATIC void ZSTD_cwksp_mark_tables_clean(ZSTD_cwksp* ws) {
9580     DEBUGLOG(4, "cwksp: ZSTD_cwksp_mark_tables_clean");
9581     assert(ws->tableValidEnd >= ws->objectEnd);
9582     assert(ws->tableValidEnd <= ws->allocStart);
9583     if (ws->tableValidEnd < ws->tableEnd) {
9584         ws->tableValidEnd = ws->tableEnd;
9585     }
9586     ZSTD_cwksp_assert_internal_consistency(ws);
9587 }
9588
9589 /**
9590  * Zero the part of the allocated tables not already marked clean.
9591  */
9592 MEM_STATIC void ZSTD_cwksp_clean_tables(ZSTD_cwksp* ws) {
9593     DEBUGLOG(4, "cwksp: ZSTD_cwksp_clean_tables");
9594     assert(ws->tableValidEnd >= ws->objectEnd);
9595     assert(ws->tableValidEnd <= ws->allocStart);
9596     if (ws->tableValidEnd < ws->tableEnd) {
9597         memset(ws->tableValidEnd, 0, (BYTE*)ws->tableEnd - (BYTE*)ws->tableValidEnd);
9598     }
9599     ZSTD_cwksp_mark_tables_clean(ws);
9600 }
9601
9602 /**
9603  * Invalidates table allocations.
9604  * All other allocations remain valid.
9605  */
9606 MEM_STATIC void ZSTD_cwksp_clear_tables(ZSTD_cwksp* ws) {
9607     DEBUGLOG(4, "cwksp: clearing tables!");
9608
9609 #if defined (ADDRESS_SANITIZER) && !defined (ZSTD_ASAN_DONT_POISON_WORKSPACE)
9610     {
9611         size_t size = (BYTE*)ws->tableValidEnd - (BYTE*)ws->objectEnd;
9612         __asan_poison_memory_region(ws->objectEnd, size);
9613     }
9614 #endif
9615
9616     ws->tableEnd = ws->objectEnd;
9617     ZSTD_cwksp_assert_internal_consistency(ws);
9618 }
9619
9620 /**
9621  * Invalidates all buffer, aligned, and table allocations.
9622  * Object allocations remain valid.
9623  */
9624 MEM_STATIC void ZSTD_cwksp_clear(ZSTD_cwksp* ws) {
9625     DEBUGLOG(4, "cwksp: clearing!");
9626
9627 #if defined (MEMORY_SANITIZER) && !defined (ZSTD_MSAN_DONT_POISON_WORKSPACE)
9628     /* To validate that the context re-use logic is sound, and that we don't
9629      * access stuff that this compression hasn't initialized, we re-"poison"
9630      * the workspace (or at least the non-static, non-table parts of it)
9631      * every time we start a new compression. */
9632     {
9633         size_t size = (BYTE*)ws->workspaceEnd - (BYTE*)ws->tableValidEnd;
9634         __msan_poison(ws->tableValidEnd, size);
9635     }
9636 #endif
9637
9638 #if defined (ADDRESS_SANITIZER) && !defined (ZSTD_ASAN_DONT_POISON_WORKSPACE)
9639     {
9640         size_t size = (BYTE*)ws->workspaceEnd - (BYTE*)ws->objectEnd;
9641         __asan_poison_memory_region(ws->objectEnd, size);
9642     }
9643 #endif
9644
9645     ws->tableEnd = ws->objectEnd;
9646     ws->allocStart = ws->workspaceEnd;
9647     ws->allocFailed = 0;
9648     if (ws->phase > ZSTD_cwksp_alloc_buffers) {
9649         ws->phase = ZSTD_cwksp_alloc_buffers;
9650     }
9651     ZSTD_cwksp_assert_internal_consistency(ws);
9652 }
9653
9654 /**
9655  * The provided workspace takes ownership of the buffer [start, start+size).
9656  * Any existing values in the workspace are ignored (the previously managed
9657  * buffer, if present, must be separately freed).
9658  */
9659 MEM_STATIC void ZSTD_cwksp_init(ZSTD_cwksp* ws, void* start, size_t size) {
9660     DEBUGLOG(4, "cwksp: init'ing workspace with %zd bytes", size);
9661     assert(((size_t)start & (sizeof(void*)-1)) == 0); /* ensure correct alignment */
9662     ws->workspace = start;
9663     ws->workspaceEnd = (BYTE*)start + size;
9664     ws->objectEnd = ws->workspace;
9665     ws->tableValidEnd = ws->objectEnd;
9666     ws->phase = ZSTD_cwksp_alloc_objects;
9667     ZSTD_cwksp_clear(ws);
9668     ws->workspaceOversizedDuration = 0;
9669     ZSTD_cwksp_assert_internal_consistency(ws);
9670 }
9671
9672 MEM_STATIC size_t ZSTD_cwksp_create(ZSTD_cwksp* ws, size_t size, ZSTD_customMem customMem) {
9673     void* workspace = ZSTD_malloc(size, customMem);
9674     DEBUGLOG(4, "cwksp: creating new workspace with %zd bytes", size);
9675     RETURN_ERROR_IF(workspace == NULL, memory_allocation, "NULL pointer!");
9676     ZSTD_cwksp_init(ws, workspace, size);
9677     return 0;
9678 }
9679
9680 MEM_STATIC void ZSTD_cwksp_free(ZSTD_cwksp* ws, ZSTD_customMem customMem) {
9681     void *ptr = ws->workspace;
9682     DEBUGLOG(4, "cwksp: freeing workspace");
9683     memset(ws, 0, sizeof(ZSTD_cwksp));
9684     ZSTD_free(ptr, customMem);
9685 }
9686
9687 /**
9688  * Moves the management of a workspace from one cwksp to another. The src cwksp
9689  * is left in an invalid state (src must be re-init()'ed before its used again).
9690  */
9691 MEM_STATIC void ZSTD_cwksp_move(ZSTD_cwksp* dst, ZSTD_cwksp* src) {
9692     *dst = *src;
9693     memset(src, 0, sizeof(ZSTD_cwksp));
9694 }
9695
9696 MEM_STATIC size_t ZSTD_cwksp_sizeof(const ZSTD_cwksp* ws) {
9697     return (size_t)((BYTE*)ws->workspaceEnd - (BYTE*)ws->workspace);
9698 }
9699
9700 MEM_STATIC int ZSTD_cwksp_reserve_failed(const ZSTD_cwksp* ws) {
9701     return ws->allocFailed;
9702 }
9703
9704 /*-*************************************
9705 *  Functions Checking Free Space
9706 ***************************************/
9707
9708 MEM_STATIC size_t ZSTD_cwksp_available_space(ZSTD_cwksp* ws) {
9709     return (size_t)((BYTE*)ws->allocStart - (BYTE*)ws->tableEnd);
9710 }
9711
9712 MEM_STATIC int ZSTD_cwksp_check_available(ZSTD_cwksp* ws, size_t additionalNeededSpace) {
9713     return ZSTD_cwksp_available_space(ws) >= additionalNeededSpace;
9714 }
9715
9716 MEM_STATIC int ZSTD_cwksp_check_too_large(ZSTD_cwksp* ws, size_t additionalNeededSpace) {
9717     return ZSTD_cwksp_check_available(
9718         ws, additionalNeededSpace * ZSTD_WORKSPACETOOLARGE_FACTOR);
9719 }
9720
9721 MEM_STATIC int ZSTD_cwksp_check_wasteful(ZSTD_cwksp* ws, size_t additionalNeededSpace) {
9722     return ZSTD_cwksp_check_too_large(ws, additionalNeededSpace)
9723         && ws->workspaceOversizedDuration > ZSTD_WORKSPACETOOLARGE_MAXDURATION;
9724 }
9725
9726 MEM_STATIC void ZSTD_cwksp_bump_oversized_duration(
9727         ZSTD_cwksp* ws, size_t additionalNeededSpace) {
9728     if (ZSTD_cwksp_check_too_large(ws, additionalNeededSpace)) {
9729         ws->workspaceOversizedDuration++;
9730     } else {
9731         ws->workspaceOversizedDuration = 0;
9732     }
9733 }
9734
9735 #if defined (__cplusplus)
9736 }
9737 #endif
9738
9739 #endif /* ZSTD_CWKSP_H */
9740 /**** ended inlining zstd_cwksp.h ****/
9741 #ifdef ZSTD_MULTITHREAD
9742 /**** start inlining zstdmt_compress.h ****/
9743 /*
9744  * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
9745  * All rights reserved.
9746  *
9747  * This source code is licensed under both the BSD-style license (found in the
9748  * LICENSE file in the root directory of this source tree) and the GPLv2 (found
9749  * in the COPYING file in the root directory of this source tree).
9750  * You may select, at your option, one of the above-listed licenses.
9751  */
9752
9753  #ifndef ZSTDMT_COMPRESS_H
9754  #define ZSTDMT_COMPRESS_H
9755
9756  #if defined (__cplusplus)
9757  extern "C" {
9758  #endif
9759
9760
9761 /* Note : This is an internal API.
9762  *        These APIs used to be exposed with ZSTDLIB_API,
9763  *        because it used to be the only way to invoke MT compression.
9764  *        Now, it's recommended to use ZSTD_compress2 and ZSTD_compressStream2()
9765  *        instead.
9766  *
9767  *        If you depend on these APIs and can't switch, then define
9768  *        ZSTD_LEGACY_MULTITHREADED_API when making the dynamic library.
9769  *        However, we may completely remove these functions in a future
9770  *        release, so please switch soon.
9771  *
9772  *        This API requires ZSTD_MULTITHREAD to be defined during compilation,
9773  *        otherwise ZSTDMT_createCCtx*() will fail.
9774  */
9775
9776 #ifdef ZSTD_LEGACY_MULTITHREADED_API
9777 #  define ZSTDMT_API ZSTDLIB_API
9778 #else
9779 #  define ZSTDMT_API
9780 #endif
9781
9782 /* ===   Dependencies   === */
9783 #include <stddef.h>                /* size_t */
9784 #define ZSTD_STATIC_LINKING_ONLY   /* ZSTD_parameters */
9785 /**** skipping file: ../zstd.h ****/
9786
9787
9788 /* ===   Constants   === */
9789 #ifndef ZSTDMT_NBWORKERS_MAX
9790 #  define ZSTDMT_NBWORKERS_MAX 200
9791 #endif
9792 #ifndef ZSTDMT_JOBSIZE_MIN
9793 #  define ZSTDMT_JOBSIZE_MIN (1 MB)
9794 #endif
9795 #define ZSTDMT_JOBLOG_MAX   (MEM_32bits() ? 29 : 30)
9796 #define ZSTDMT_JOBSIZE_MAX  (MEM_32bits() ? (512 MB) : (1024 MB))
9797
9798
9799 /* ===   Memory management   === */
9800 typedef struct ZSTDMT_CCtx_s ZSTDMT_CCtx;
9801 /* Requires ZSTD_MULTITHREAD to be defined during compilation, otherwise it will return NULL. */
9802 ZSTDMT_API ZSTDMT_CCtx* ZSTDMT_createCCtx(unsigned nbWorkers);
9803 /* Requires ZSTD_MULTITHREAD to be defined during compilation, otherwise it will return NULL. */
9804 ZSTDMT_API ZSTDMT_CCtx* ZSTDMT_createCCtx_advanced(unsigned nbWorkers,
9805                                                     ZSTD_customMem cMem);
9806 ZSTDMT_API size_t ZSTDMT_freeCCtx(ZSTDMT_CCtx* mtctx);
9807
9808 ZSTDMT_API size_t ZSTDMT_sizeof_CCtx(ZSTDMT_CCtx* mtctx);
9809
9810
9811 /* ===   Simple one-pass compression function   === */
9812
9813 ZSTDMT_API size_t ZSTDMT_compressCCtx(ZSTDMT_CCtx* mtctx,
9814                                        void* dst, size_t dstCapacity,
9815                                  const void* src, size_t srcSize,
9816                                        int compressionLevel);
9817
9818
9819
9820 /* ===   Streaming functions   === */
9821
9822 ZSTDMT_API size_t ZSTDMT_initCStream(ZSTDMT_CCtx* mtctx, int compressionLevel);
9823 ZSTDMT_API size_t ZSTDMT_resetCStream(ZSTDMT_CCtx* mtctx, unsigned long long pledgedSrcSize);  /**< if srcSize is not known at reset time, use ZSTD_CONTENTSIZE_UNKNOWN. Note: for compatibility with older programs, 0 means the same as ZSTD_CONTENTSIZE_UNKNOWN, but it will change in the future to mean "empty" */
9824
9825 ZSTDMT_API size_t ZSTDMT_nextInputSizeHint(const ZSTDMT_CCtx* mtctx);
9826 ZSTDMT_API size_t ZSTDMT_compressStream(ZSTDMT_CCtx* mtctx, ZSTD_outBuffer* output, ZSTD_inBuffer* input);
9827
9828 ZSTDMT_API size_t ZSTDMT_flushStream(ZSTDMT_CCtx* mtctx, ZSTD_outBuffer* output);   /**< @return : 0 == all flushed; >0 : still some data to be flushed; or an error code (ZSTD_isError()) */
9829 ZSTDMT_API size_t ZSTDMT_endStream(ZSTDMT_CCtx* mtctx, ZSTD_outBuffer* output);     /**< @return : 0 == all flushed; >0 : still some data to be flushed; or an error code (ZSTD_isError()) */
9830
9831
9832 /* ===   Advanced functions and parameters  === */
9833
9834 ZSTDMT_API size_t ZSTDMT_compress_advanced(ZSTDMT_CCtx* mtctx,
9835                                           void* dst, size_t dstCapacity,
9836                                     const void* src, size_t srcSize,
9837                                     const ZSTD_CDict* cdict,
9838                                           ZSTD_parameters params,
9839                                           int overlapLog);
9840
9841 ZSTDMT_API size_t ZSTDMT_initCStream_advanced(ZSTDMT_CCtx* mtctx,
9842                                         const void* dict, size_t dictSize,   /* dict can be released after init, a local copy is preserved within zcs */
9843                                         ZSTD_parameters params,
9844                                         unsigned long long pledgedSrcSize);  /* pledgedSrcSize is optional and can be zero == unknown */
9845
9846 ZSTDMT_API size_t ZSTDMT_initCStream_usingCDict(ZSTDMT_CCtx* mtctx,
9847                                         const ZSTD_CDict* cdict,
9848                                         ZSTD_frameParameters fparams,
9849                                         unsigned long long pledgedSrcSize);  /* note : zero means empty */
9850
9851 /* ZSTDMT_parameter :
9852  * List of parameters that can be set using ZSTDMT_setMTCtxParameter() */
9853 typedef enum {
9854     ZSTDMT_p_jobSize,     /* Each job is compressed in parallel. By default, this value is dynamically determined depending on compression parameters. Can be set explicitly here. */
9855     ZSTDMT_p_overlapLog,  /* Each job may reload a part of previous job to enhance compression ratio; 0 == no overlap, 6(default) == use 1/8th of window, >=9 == use full window. This is a "sticky" parameter : its value will be re-used on next compression job */
9856     ZSTDMT_p_rsyncable    /* Enables rsyncable mode. */
9857 } ZSTDMT_parameter;
9858
9859 /* ZSTDMT_setMTCtxParameter() :
9860  * allow setting individual parameters, one at a time, among a list of enums defined in ZSTDMT_parameter.
9861  * The function must be called typically after ZSTD_createCCtx() but __before ZSTDMT_init*() !__
9862  * Parameters not explicitly reset by ZSTDMT_init*() remain the same in consecutive compression sessions.
9863  * @return : 0, or an error code (which can be tested using ZSTD_isError()) */
9864 ZSTDMT_API size_t ZSTDMT_setMTCtxParameter(ZSTDMT_CCtx* mtctx, ZSTDMT_parameter parameter, int value);
9865
9866 /* ZSTDMT_getMTCtxParameter() :
9867  * Query the ZSTDMT_CCtx for a parameter value.
9868  * @return : 0, or an error code (which can be tested using ZSTD_isError()) */
9869 ZSTDMT_API size_t ZSTDMT_getMTCtxParameter(ZSTDMT_CCtx* mtctx, ZSTDMT_parameter parameter, int* value);
9870
9871
9872 /*! ZSTDMT_compressStream_generic() :
9873  *  Combines ZSTDMT_compressStream() with optional ZSTDMT_flushStream() or ZSTDMT_endStream()
9874  *  depending on flush directive.
9875  * @return : minimum amount of data still to be flushed
9876  *           0 if fully flushed
9877  *           or an error code
9878  *  note : needs to be init using any ZSTD_initCStream*() variant */
9879 ZSTDMT_API size_t ZSTDMT_compressStream_generic(ZSTDMT_CCtx* mtctx,
9880                                                 ZSTD_outBuffer* output,
9881                                                 ZSTD_inBuffer* input,
9882                                                 ZSTD_EndDirective endOp);
9883
9884
9885 /* ========================================================
9886  * ===  Private interface, for use by ZSTD_compress.c   ===
9887  * ===  Not exposed in libzstd. Never invoke directly   ===
9888  * ======================================================== */
9889
9890  /*! ZSTDMT_toFlushNow()
9891   *  Tell how many bytes are ready to be flushed immediately.
9892   *  Probe the oldest active job (not yet entirely flushed) and check its output buffer.
9893   *  If return 0, it means there is no active job,
9894   *  or, it means oldest job is still active, but everything produced has been flushed so far,
9895   *  therefore flushing is limited by speed of oldest job. */
9896 size_t ZSTDMT_toFlushNow(ZSTDMT_CCtx* mtctx);
9897
9898 /*! ZSTDMT_CCtxParam_setMTCtxParameter()
9899  *  like ZSTDMT_setMTCtxParameter(), but into a ZSTD_CCtx_Params */
9900 size_t ZSTDMT_CCtxParam_setMTCtxParameter(ZSTD_CCtx_params* params, ZSTDMT_parameter parameter, int value);
9901
9902 /*! ZSTDMT_CCtxParam_setNbWorkers()
9903  *  Set nbWorkers, and clamp it.
9904  *  Also reset jobSize and overlapLog */
9905 size_t ZSTDMT_CCtxParam_setNbWorkers(ZSTD_CCtx_params* params, unsigned nbWorkers);
9906
9907 /*! ZSTDMT_updateCParams_whileCompressing() :
9908  *  Updates only a selected set of compression parameters, to remain compatible with current frame.
9909  *  New parameters will be applied to next compression job. */
9910 void ZSTDMT_updateCParams_whileCompressing(ZSTDMT_CCtx* mtctx, const ZSTD_CCtx_params* cctxParams);
9911
9912 /*! ZSTDMT_getFrameProgression():
9913  *  tells how much data has been consumed (input) and produced (output) for current frame.
9914  *  able to count progression inside worker threads.
9915  */
9916 ZSTD_frameProgression ZSTDMT_getFrameProgression(ZSTDMT_CCtx* mtctx);
9917
9918
9919 /*! ZSTDMT_initCStream_internal() :
9920  *  Private use only. Init streaming operation.
9921  *  expects params to be valid.
9922  *  must receive dict, or cdict, or none, but not both.
9923  *  @return : 0, or an error code */
9924 size_t ZSTDMT_initCStream_internal(ZSTDMT_CCtx* zcs,
9925                     const void* dict, size_t dictSize, ZSTD_dictContentType_e dictContentType,
9926                     const ZSTD_CDict* cdict,
9927                     ZSTD_CCtx_params params, unsigned long long pledgedSrcSize);
9928
9929
9930 #if defined (__cplusplus)
9931 }
9932 #endif
9933
9934 #endif   /* ZSTDMT_COMPRESS_H */
9935 /**** ended inlining zstdmt_compress.h ****/
9936 #endif
9937
9938 #if defined (__cplusplus)
9939 extern "C" {
9940 #endif
9941
9942
9943 /*-*************************************
9944 *  Constants
9945 ***************************************/
9946 #define kSearchStrength      8
9947 #define HASH_READ_SIZE       8
9948 #define ZSTD_DUBT_UNSORTED_MARK 1   /* For btlazy2 strategy, index ZSTD_DUBT_UNSORTED_MARK==1 means "unsorted".
9949                                        It could be confused for a real successor at index "1", if sorted as larger than its predecessor.
9950                                        It's not a big deal though : candidate will just be sorted again.
9951                                        Additionally, candidate position 1 will be lost.
9952                                        But candidate 1 cannot hide a large tree of candidates, so it's a minimal loss.
9953                                        The benefit is that ZSTD_DUBT_UNSORTED_MARK cannot be mishandled after table re-use with a different strategy.
9954                                        This constant is required by ZSTD_compressBlock_btlazy2() and ZSTD_reduceTable_internal() */
9955
9956
9957 /*-*************************************
9958 *  Context memory management
9959 ***************************************/
9960 typedef enum { ZSTDcs_created=0, ZSTDcs_init, ZSTDcs_ongoing, ZSTDcs_ending } ZSTD_compressionStage_e;
9961 typedef enum { zcss_init=0, zcss_load, zcss_flush } ZSTD_cStreamStage;
9962
9963 typedef struct ZSTD_prefixDict_s {
9964     const void* dict;
9965     size_t dictSize;
9966     ZSTD_dictContentType_e dictContentType;
9967 } ZSTD_prefixDict;
9968
9969 typedef struct {
9970     void* dictBuffer;
9971     void const* dict;
9972     size_t dictSize;
9973     ZSTD_dictContentType_e dictContentType;
9974     ZSTD_CDict* cdict;
9975 } ZSTD_localDict;
9976
9977 typedef struct {
9978     U32 CTable[HUF_CTABLE_SIZE_U32(255)];
9979     HUF_repeat repeatMode;
9980 } ZSTD_hufCTables_t;
9981
9982 typedef struct {
9983     FSE_CTable offcodeCTable[FSE_CTABLE_SIZE_U32(OffFSELog, MaxOff)];
9984     FSE_CTable matchlengthCTable[FSE_CTABLE_SIZE_U32(MLFSELog, MaxML)];
9985     FSE_CTable litlengthCTable[FSE_CTABLE_SIZE_U32(LLFSELog, MaxLL)];
9986     FSE_repeat offcode_repeatMode;
9987     FSE_repeat matchlength_repeatMode;
9988     FSE_repeat litlength_repeatMode;
9989 } ZSTD_fseCTables_t;
9990
9991 typedef struct {
9992     ZSTD_hufCTables_t huf;
9993     ZSTD_fseCTables_t fse;
9994 } ZSTD_entropyCTables_t;
9995
9996 typedef struct {
9997     U32 off;
9998     U32 len;
9999 } ZSTD_match_t;
10000
10001 typedef struct {
10002     int price;
10003     U32 off;
10004     U32 mlen;
10005     U32 litlen;
10006     U32 rep[ZSTD_REP_NUM];
10007 } ZSTD_optimal_t;
10008
10009 typedef enum { zop_dynamic=0, zop_predef } ZSTD_OptPrice_e;
10010
10011 typedef struct {
10012     /* All tables are allocated inside cctx->workspace by ZSTD_resetCCtx_internal() */
10013     unsigned* litFreq;           /* table of literals statistics, of size 256 */
10014     unsigned* litLengthFreq;     /* table of litLength statistics, of size (MaxLL+1) */
10015     unsigned* matchLengthFreq;   /* table of matchLength statistics, of size (MaxML+1) */
10016     unsigned* offCodeFreq;       /* table of offCode statistics, of size (MaxOff+1) */
10017     ZSTD_match_t* matchTable;    /* list of found matches, of size ZSTD_OPT_NUM+1 */
10018     ZSTD_optimal_t* priceTable;  /* All positions tracked by optimal parser, of size ZSTD_OPT_NUM+1 */
10019
10020     U32  litSum;                 /* nb of literals */
10021     U32  litLengthSum;           /* nb of litLength codes */
10022     U32  matchLengthSum;         /* nb of matchLength codes */
10023     U32  offCodeSum;             /* nb of offset codes */
10024     U32  litSumBasePrice;        /* to compare to log2(litfreq) */
10025     U32  litLengthSumBasePrice;  /* to compare to log2(llfreq)  */
10026     U32  matchLengthSumBasePrice;/* to compare to log2(mlfreq)  */
10027     U32  offCodeSumBasePrice;    /* to compare to log2(offreq)  */
10028     ZSTD_OptPrice_e priceType;   /* prices can be determined dynamically, or follow a pre-defined cost structure */
10029     const ZSTD_entropyCTables_t* symbolCosts;  /* pre-calculated dictionary statistics */
10030     ZSTD_literalCompressionMode_e literalCompressionMode;
10031 } optState_t;
10032
10033 typedef struct {
10034   ZSTD_entropyCTables_t entropy;
10035   U32 rep[ZSTD_REP_NUM];
10036 } ZSTD_compressedBlockState_t;
10037
10038 typedef struct {
10039     BYTE const* nextSrc;    /* next block here to continue on current prefix */
10040     BYTE const* base;       /* All regular indexes relative to this position */
10041     BYTE const* dictBase;   /* extDict indexes relative to this position */
10042     U32 dictLimit;          /* below that point, need extDict */
10043     U32 lowLimit;           /* below that point, no more valid data */
10044 } ZSTD_window_t;
10045
10046 typedef struct ZSTD_matchState_t ZSTD_matchState_t;
10047 struct ZSTD_matchState_t {
10048     ZSTD_window_t window;   /* State for window round buffer management */
10049     U32 loadedDictEnd;      /* index of end of dictionary, within context's referential.
10050                              * When loadedDictEnd != 0, a dictionary is in use, and still valid.
10051                              * This relies on a mechanism to set loadedDictEnd=0 when dictionary is no longer within distance.
10052                              * Such mechanism is provided within ZSTD_window_enforceMaxDist() and ZSTD_checkDictValidity().
10053                              * When dict referential is copied into active context (i.e. not attached),
10054                              * loadedDictEnd == dictSize, since referential starts from zero.
10055                              */
10056     U32 nextToUpdate;       /* index from which to continue table update */
10057     U32 hashLog3;           /* dispatch table for matches of len==3 : larger == faster, more memory */
10058     U32* hashTable;
10059     U32* hashTable3;
10060     U32* chainTable;
10061     optState_t opt;         /* optimal parser state */
10062     const ZSTD_matchState_t* dictMatchState;
10063     ZSTD_compressionParameters cParams;
10064 };
10065
10066 typedef struct {
10067     ZSTD_compressedBlockState_t* prevCBlock;
10068     ZSTD_compressedBlockState_t* nextCBlock;
10069     ZSTD_matchState_t matchState;
10070 } ZSTD_blockState_t;
10071
10072 typedef struct {
10073     U32 offset;
10074     U32 checksum;
10075 } ldmEntry_t;
10076
10077 typedef struct {
10078     ZSTD_window_t window;   /* State for the window round buffer management */
10079     ldmEntry_t* hashTable;
10080     U32 loadedDictEnd;
10081     BYTE* bucketOffsets;    /* Next position in bucket to insert entry */
10082     U64 hashPower;          /* Used to compute the rolling hash.
10083                              * Depends on ldmParams.minMatchLength */
10084 } ldmState_t;
10085
10086 typedef struct {
10087     U32 enableLdm;          /* 1 if enable long distance matching */
10088     U32 hashLog;            /* Log size of hashTable */
10089     U32 bucketSizeLog;      /* Log bucket size for collision resolution, at most 8 */
10090     U32 minMatchLength;     /* Minimum match length */
10091     U32 hashRateLog;       /* Log number of entries to skip */
10092     U32 windowLog;          /* Window log for the LDM */
10093 } ldmParams_t;
10094
10095 typedef struct {
10096     U32 offset;
10097     U32 litLength;
10098     U32 matchLength;
10099 } rawSeq;
10100
10101 typedef struct {
10102   rawSeq* seq;     /* The start of the sequences */
10103   size_t pos;      /* The position where reading stopped. <= size. */
10104   size_t size;     /* The number of sequences. <= capacity. */
10105   size_t capacity; /* The capacity starting from `seq` pointer */
10106 } rawSeqStore_t;
10107
10108 typedef struct {
10109     int collectSequences;
10110     ZSTD_Sequence* seqStart;
10111     size_t seqIndex;
10112     size_t maxSequences;
10113 } SeqCollector;
10114
10115 struct ZSTD_CCtx_params_s {
10116     ZSTD_format_e format;
10117     ZSTD_compressionParameters cParams;
10118     ZSTD_frameParameters fParams;
10119
10120     int compressionLevel;
10121     int forceWindow;           /* force back-references to respect limit of
10122                                 * 1<<wLog, even for dictionary */
10123     size_t targetCBlockSize;   /* Tries to fit compressed block size to be around targetCBlockSize.
10124                                 * No target when targetCBlockSize == 0.
10125                                 * There is no guarantee on compressed block size */
10126     int srcSizeHint;           /* User's best guess of source size.
10127                                 * Hint is not valid when srcSizeHint == 0.
10128                                 * There is no guarantee that hint is close to actual source size */
10129
10130     ZSTD_dictAttachPref_e attachDictPref;
10131     ZSTD_literalCompressionMode_e literalCompressionMode;
10132
10133     /* Multithreading: used to pass parameters to mtctx */
10134     int nbWorkers;
10135     size_t jobSize;
10136     int overlapLog;
10137     int rsyncable;
10138
10139     /* Long distance matching parameters */
10140     ldmParams_t ldmParams;
10141
10142     /* Internal use, for createCCtxParams() and freeCCtxParams() only */
10143     ZSTD_customMem customMem;
10144 };  /* typedef'd to ZSTD_CCtx_params within "zstd.h" */
10145
10146 struct ZSTD_CCtx_s {
10147     ZSTD_compressionStage_e stage;
10148     int cParamsChanged;                  /* == 1 if cParams(except wlog) or compression level are changed in requestedParams. Triggers transmission of new params to ZSTDMT (if available) then reset to 0. */
10149     int bmi2;                            /* == 1 if the CPU supports BMI2 and 0 otherwise. CPU support is determined dynamically once per context lifetime. */
10150     ZSTD_CCtx_params requestedParams;
10151     ZSTD_CCtx_params appliedParams;
10152     U32   dictID;
10153
10154     ZSTD_cwksp workspace; /* manages buffer for dynamic allocations */
10155     size_t blockSize;
10156     unsigned long long pledgedSrcSizePlusOne;  /* this way, 0 (default) == unknown */
10157     unsigned long long consumedSrcSize;
10158     unsigned long long producedCSize;
10159     XXH64_state_t xxhState;
10160     ZSTD_customMem customMem;
10161     size_t staticSize;
10162     SeqCollector seqCollector;
10163     int isFirstBlock;
10164     int initialized;
10165
10166     seqStore_t seqStore;      /* sequences storage ptrs */
10167     ldmState_t ldmState;      /* long distance matching state */
10168     rawSeq* ldmSequences;     /* Storage for the ldm output sequences */
10169     size_t maxNbLdmSequences;
10170     rawSeqStore_t externSeqStore; /* Mutable reference to external sequences */
10171     ZSTD_blockState_t blockState;
10172     U32* entropyWorkspace;  /* entropy workspace of HUF_WORKSPACE_SIZE bytes */
10173
10174     /* streaming */
10175     char*  inBuff;
10176     size_t inBuffSize;
10177     size_t inToCompress;
10178     size_t inBuffPos;
10179     size_t inBuffTarget;
10180     char*  outBuff;
10181     size_t outBuffSize;
10182     size_t outBuffContentSize;
10183     size_t outBuffFlushedSize;
10184     ZSTD_cStreamStage streamStage;
10185     U32    frameEnded;
10186
10187     /* Dictionary */
10188     ZSTD_localDict localDict;
10189     const ZSTD_CDict* cdict;
10190     ZSTD_prefixDict prefixDict;   /* single-usage dictionary */
10191
10192     /* Multi-threading */
10193 #ifdef ZSTD_MULTITHREAD
10194     ZSTDMT_CCtx* mtctx;
10195 #endif
10196 };
10197
10198 typedef enum { ZSTD_dtlm_fast, ZSTD_dtlm_full } ZSTD_dictTableLoadMethod_e;
10199
10200 typedef enum { ZSTD_noDict = 0, ZSTD_extDict = 1, ZSTD_dictMatchState = 2 } ZSTD_dictMode_e;
10201
10202
10203 typedef size_t (*ZSTD_blockCompressor) (
10204         ZSTD_matchState_t* bs, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
10205         void const* src, size_t srcSize);
10206 ZSTD_blockCompressor ZSTD_selectBlockCompressor(ZSTD_strategy strat, ZSTD_dictMode_e dictMode);
10207
10208
10209 MEM_STATIC U32 ZSTD_LLcode(U32 litLength)
10210 {
10211     static const BYTE LL_Code[64] = {  0,  1,  2,  3,  4,  5,  6,  7,
10212                                        8,  9, 10, 11, 12, 13, 14, 15,
10213                                       16, 16, 17, 17, 18, 18, 19, 19,
10214                                       20, 20, 20, 20, 21, 21, 21, 21,
10215                                       22, 22, 22, 22, 22, 22, 22, 22,
10216                                       23, 23, 23, 23, 23, 23, 23, 23,
10217                                       24, 24, 24, 24, 24, 24, 24, 24,
10218                                       24, 24, 24, 24, 24, 24, 24, 24 };
10219     static const U32 LL_deltaCode = 19;
10220     return (litLength > 63) ? ZSTD_highbit32(litLength) + LL_deltaCode : LL_Code[litLength];
10221 }
10222
10223 /* ZSTD_MLcode() :
10224  * note : mlBase = matchLength - MINMATCH;
10225  *        because it's the format it's stored in seqStore->sequences */
10226 MEM_STATIC U32 ZSTD_MLcode(U32 mlBase)
10227 {
10228     static const BYTE ML_Code[128] = { 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15,
10229                                       16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
10230                                       32, 32, 33, 33, 34, 34, 35, 35, 36, 36, 36, 36, 37, 37, 37, 37,
10231                                       38, 38, 38, 38, 38, 38, 38, 38, 39, 39, 39, 39, 39, 39, 39, 39,
10232                                       40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40,
10233                                       41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41,
10234                                       42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42,
10235                                       42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42 };
10236     static const U32 ML_deltaCode = 36;
10237     return (mlBase > 127) ? ZSTD_highbit32(mlBase) + ML_deltaCode : ML_Code[mlBase];
10238 }
10239
10240 typedef struct repcodes_s {
10241     U32 rep[3];
10242 } repcodes_t;
10243
10244 MEM_STATIC repcodes_t ZSTD_updateRep(U32 const rep[3], U32 const offset, U32 const ll0)
10245 {
10246     repcodes_t newReps;
10247     if (offset >= ZSTD_REP_NUM) {  /* full offset */
10248         newReps.rep[2] = rep[1];
10249         newReps.rep[1] = rep[0];
10250         newReps.rep[0] = offset - ZSTD_REP_MOVE;
10251     } else {   /* repcode */
10252         U32 const repCode = offset + ll0;
10253         if (repCode > 0) {  /* note : if repCode==0, no change */
10254             U32 const currentOffset = (repCode==ZSTD_REP_NUM) ? (rep[0] - 1) : rep[repCode];
10255             newReps.rep[2] = (repCode >= 2) ? rep[1] : rep[2];
10256             newReps.rep[1] = rep[0];
10257             newReps.rep[0] = currentOffset;
10258         } else {   /* repCode == 0 */
10259             memcpy(&newReps, rep, sizeof(newReps));
10260         }
10261     }
10262     return newReps;
10263 }
10264
10265 /* ZSTD_cParam_withinBounds:
10266  * @return 1 if value is within cParam bounds,
10267  * 0 otherwise */
10268 MEM_STATIC int ZSTD_cParam_withinBounds(ZSTD_cParameter cParam, int value)
10269 {
10270     ZSTD_bounds const bounds = ZSTD_cParam_getBounds(cParam);
10271     if (ZSTD_isError(bounds.error)) return 0;
10272     if (value < bounds.lowerBound) return 0;
10273     if (value > bounds.upperBound) return 0;
10274     return 1;
10275 }
10276
10277 /* ZSTD_noCompressBlock() :
10278  * Writes uncompressed block to dst buffer from given src.
10279  * Returns the size of the block */
10280 MEM_STATIC size_t ZSTD_noCompressBlock (void* dst, size_t dstCapacity, const void* src, size_t srcSize, U32 lastBlock)
10281 {
10282     U32 const cBlockHeader24 = lastBlock + (((U32)bt_raw)<<1) + (U32)(srcSize << 3);
10283     RETURN_ERROR_IF(srcSize + ZSTD_blockHeaderSize > dstCapacity,
10284                     dstSize_tooSmall, "dst buf too small for uncompressed block");
10285     MEM_writeLE24(dst, cBlockHeader24);
10286     memcpy((BYTE*)dst + ZSTD_blockHeaderSize, src, srcSize);
10287     return ZSTD_blockHeaderSize + srcSize;
10288 }
10289
10290 MEM_STATIC size_t ZSTD_rleCompressBlock (void* dst, size_t dstCapacity, BYTE src, size_t srcSize, U32 lastBlock)
10291 {
10292     BYTE* const op = (BYTE*)dst;
10293     U32 const cBlockHeader = lastBlock + (((U32)bt_rle)<<1) + (U32)(srcSize << 3);
10294     RETURN_ERROR_IF(dstCapacity < 4, dstSize_tooSmall, "");
10295     MEM_writeLE24(op, cBlockHeader);
10296     op[3] = src;
10297     return 4;
10298 }
10299
10300
10301 /* ZSTD_minGain() :
10302  * minimum compression required
10303  * to generate a compress block or a compressed literals section.
10304  * note : use same formula for both situations */
10305 MEM_STATIC size_t ZSTD_minGain(size_t srcSize, ZSTD_strategy strat)
10306 {
10307     U32 const minlog = (strat>=ZSTD_btultra) ? (U32)(strat) - 1 : 6;
10308     ZSTD_STATIC_ASSERT(ZSTD_btultra == 8);
10309     assert(ZSTD_cParam_withinBounds(ZSTD_c_strategy, strat));
10310     return (srcSize >> minlog) + 2;
10311 }
10312
10313 MEM_STATIC int ZSTD_disableLiteralsCompression(const ZSTD_CCtx_params* cctxParams)
10314 {
10315     switch (cctxParams->literalCompressionMode) {
10316     case ZSTD_lcm_huffman:
10317         return 0;
10318     case ZSTD_lcm_uncompressed:
10319         return 1;
10320     default:
10321         assert(0 /* impossible: pre-validated */);
10322         /* fall-through */
10323     case ZSTD_lcm_auto:
10324         return (cctxParams->cParams.strategy == ZSTD_fast) && (cctxParams->cParams.targetLength > 0);
10325     }
10326 }
10327
10328 /*! ZSTD_safecopyLiterals() :
10329  *  memcpy() function that won't read beyond more than WILDCOPY_OVERLENGTH bytes past ilimit_w.
10330  *  Only called when the sequence ends past ilimit_w, so it only needs to be optimized for single
10331  *  large copies.
10332  */
10333 static void ZSTD_safecopyLiterals(BYTE* op, BYTE const* ip, BYTE const* const iend, BYTE const* ilimit_w) {
10334     assert(iend > ilimit_w);
10335     if (ip <= ilimit_w) {
10336         ZSTD_wildcopy(op, ip, ilimit_w - ip, ZSTD_no_overlap);
10337         op += ilimit_w - ip;
10338         ip = ilimit_w;
10339     }
10340     while (ip < iend) *op++ = *ip++;
10341 }
10342
10343 /*! ZSTD_storeSeq() :
10344  *  Store a sequence (litlen, litPtr, offCode and mlBase) into seqStore_t.
10345  *  `offCode` : distance to match + ZSTD_REP_MOVE (values <= ZSTD_REP_MOVE are repCodes).
10346  *  `mlBase` : matchLength - MINMATCH
10347  *  Allowed to overread literals up to litLimit.
10348 */
10349 HINT_INLINE UNUSED_ATTR
10350 void ZSTD_storeSeq(seqStore_t* seqStorePtr, size_t litLength, const BYTE* literals, const BYTE* litLimit, U32 offCode, size_t mlBase)
10351 {
10352     BYTE const* const litLimit_w = litLimit - WILDCOPY_OVERLENGTH;
10353     BYTE const* const litEnd = literals + litLength;
10354 #if defined(DEBUGLEVEL) && (DEBUGLEVEL >= 6)
10355     static const BYTE* g_start = NULL;
10356     if (g_start==NULL) g_start = (const BYTE*)literals;  /* note : index only works for compression within a single segment */
10357     {   U32 const pos = (U32)((const BYTE*)literals - g_start);
10358         DEBUGLOG(6, "Cpos%7u :%3u literals, match%4u bytes at offCode%7u",
10359                pos, (U32)litLength, (U32)mlBase+MINMATCH, (U32)offCode);
10360     }
10361 #endif
10362     assert((size_t)(seqStorePtr->sequences - seqStorePtr->sequencesStart) < seqStorePtr->maxNbSeq);
10363     /* copy Literals */
10364     assert(seqStorePtr->maxNbLit <= 128 KB);
10365     assert(seqStorePtr->lit + litLength <= seqStorePtr->litStart + seqStorePtr->maxNbLit);
10366     assert(literals + litLength <= litLimit);
10367     if (litEnd <= litLimit_w) {
10368         /* Common case we can use wildcopy.
10369          * First copy 16 bytes, because literals are likely short.
10370          */
10371         assert(WILDCOPY_OVERLENGTH >= 16);
10372         ZSTD_copy16(seqStorePtr->lit, literals);
10373         if (litLength > 16) {
10374             ZSTD_wildcopy(seqStorePtr->lit+16, literals+16, (ptrdiff_t)litLength-16, ZSTD_no_overlap);
10375         }
10376     } else {
10377         ZSTD_safecopyLiterals(seqStorePtr->lit, literals, litEnd, litLimit_w);
10378     }
10379     seqStorePtr->lit += litLength;
10380
10381     /* literal Length */
10382     if (litLength>0xFFFF) {
10383         assert(seqStorePtr->longLengthID == 0); /* there can only be a single long length */
10384         seqStorePtr->longLengthID = 1;
10385         seqStorePtr->longLengthPos = (U32)(seqStorePtr->sequences - seqStorePtr->sequencesStart);
10386     }
10387     seqStorePtr->sequences[0].litLength = (U16)litLength;
10388
10389     /* match offset */
10390     seqStorePtr->sequences[0].offset = offCode + 1;
10391
10392     /* match Length */
10393     if (mlBase>0xFFFF) {
10394         assert(seqStorePtr->longLengthID == 0); /* there can only be a single long length */
10395         seqStorePtr->longLengthID = 2;
10396         seqStorePtr->longLengthPos = (U32)(seqStorePtr->sequences - seqStorePtr->sequencesStart);
10397     }
10398     seqStorePtr->sequences[0].matchLength = (U16)mlBase;
10399
10400     seqStorePtr->sequences++;
10401 }
10402
10403
10404 /*-*************************************
10405 *  Match length counter
10406 ***************************************/
10407 static unsigned ZSTD_NbCommonBytes (size_t val)
10408 {
10409     if (MEM_isLittleEndian()) {
10410         if (MEM_64bits()) {
10411 #       if defined(_MSC_VER) && defined(_WIN64)
10412             unsigned long r = 0;
10413             return _BitScanForward64( &r, (U64)val ) ? (unsigned)(r >> 3) : 0;
10414 #       elif defined(__GNUC__) && (__GNUC__ >= 4)
10415             return (__builtin_ctzll((U64)val) >> 3);
10416 #       else
10417             static const int DeBruijnBytePos[64] = { 0, 0, 0, 0, 0, 1, 1, 2,
10418                                                      0, 3, 1, 3, 1, 4, 2, 7,
10419                                                      0, 2, 3, 6, 1, 5, 3, 5,
10420                                                      1, 3, 4, 4, 2, 5, 6, 7,
10421                                                      7, 0, 1, 2, 3, 3, 4, 6,
10422                                                      2, 6, 5, 5, 3, 4, 5, 6,
10423                                                      7, 1, 2, 4, 6, 4, 4, 5,
10424                                                      7, 2, 6, 5, 7, 6, 7, 7 };
10425             return DeBruijnBytePos[((U64)((val & -(long long)val) * 0x0218A392CDABBD3FULL)) >> 58];
10426 #       endif
10427         } else { /* 32 bits */
10428 #       if defined(_MSC_VER)
10429             unsigned long r=0;
10430             return _BitScanForward( &r, (U32)val ) ? (unsigned)(r >> 3) : 0;
10431 #       elif defined(__GNUC__) && (__GNUC__ >= 3)
10432             return (__builtin_ctz((U32)val) >> 3);
10433 #       else
10434             static const int DeBruijnBytePos[32] = { 0, 0, 3, 0, 3, 1, 3, 0,
10435                                                      3, 2, 2, 1, 3, 2, 0, 1,
10436                                                      3, 3, 1, 2, 2, 2, 2, 0,
10437                                                      3, 1, 2, 0, 1, 0, 1, 1 };
10438             return DeBruijnBytePos[((U32)((val & -(S32)val) * 0x077CB531U)) >> 27];
10439 #       endif
10440         }
10441     } else {  /* Big Endian CPU */
10442         if (MEM_64bits()) {
10443 #       if defined(_MSC_VER) && defined(_WIN64)
10444             unsigned long r = 0;
10445             return _BitScanReverse64( &r, val ) ? (unsigned)(r >> 3) : 0;
10446 #       elif defined(__GNUC__) && (__GNUC__ >= 4)
10447             return (__builtin_clzll(val) >> 3);
10448 #       else
10449             unsigned r;
10450             const unsigned n32 = sizeof(size_t)*4;   /* calculate this way due to compiler complaining in 32-bits mode */
10451             if (!(val>>n32)) { r=4; } else { r=0; val>>=n32; }
10452             if (!(val>>16)) { r+=2; val>>=8; } else { val>>=24; }
10453             r += (!val);
10454             return r;
10455 #       endif
10456         } else { /* 32 bits */
10457 #       if defined(_MSC_VER)
10458             unsigned long r = 0;
10459             return _BitScanReverse( &r, (unsigned long)val ) ? (unsigned)(r >> 3) : 0;
10460 #       elif defined(__GNUC__) && (__GNUC__ >= 3)
10461             return (__builtin_clz((U32)val) >> 3);
10462 #       else
10463             unsigned r;
10464             if (!(val>>16)) { r=2; val>>=8; } else { r=0; val>>=24; }
10465             r += (!val);
10466             return r;
10467 #       endif
10468     }   }
10469 }
10470
10471
10472 MEM_STATIC size_t ZSTD_count(const BYTE* pIn, const BYTE* pMatch, const BYTE* const pInLimit)
10473 {
10474     const BYTE* const pStart = pIn;
10475     const BYTE* const pInLoopLimit = pInLimit - (sizeof(size_t)-1);
10476
10477     if (pIn < pInLoopLimit) {
10478         { size_t const diff = MEM_readST(pMatch) ^ MEM_readST(pIn);
10479           if (diff) return ZSTD_NbCommonBytes(diff); }
10480         pIn+=sizeof(size_t); pMatch+=sizeof(size_t);
10481         while (pIn < pInLoopLimit) {
10482             size_t const diff = MEM_readST(pMatch) ^ MEM_readST(pIn);
10483             if (!diff) { pIn+=sizeof(size_t); pMatch+=sizeof(size_t); continue; }
10484             pIn += ZSTD_NbCommonBytes(diff);
10485             return (size_t)(pIn - pStart);
10486     }   }
10487     if (MEM_64bits() && (pIn<(pInLimit-3)) && (MEM_read32(pMatch) == MEM_read32(pIn))) { pIn+=4; pMatch+=4; }
10488     if ((pIn<(pInLimit-1)) && (MEM_read16(pMatch) == MEM_read16(pIn))) { pIn+=2; pMatch+=2; }
10489     if ((pIn<pInLimit) && (*pMatch == *pIn)) pIn++;
10490     return (size_t)(pIn - pStart);
10491 }
10492
10493 /** ZSTD_count_2segments() :
10494  *  can count match length with `ip` & `match` in 2 different segments.
10495  *  convention : on reaching mEnd, match count continue starting from iStart
10496  */
10497 MEM_STATIC size_t
10498 ZSTD_count_2segments(const BYTE* ip, const BYTE* match,
10499                      const BYTE* iEnd, const BYTE* mEnd, const BYTE* iStart)
10500 {
10501     const BYTE* const vEnd = MIN( ip + (mEnd - match), iEnd);
10502     size_t const matchLength = ZSTD_count(ip, match, vEnd);
10503     if (match + matchLength != mEnd) return matchLength;
10504     DEBUGLOG(7, "ZSTD_count_2segments: found a 2-parts match (current length==%zu)", matchLength);
10505     DEBUGLOG(7, "distance from match beginning to end dictionary = %zi", mEnd - match);
10506     DEBUGLOG(7, "distance from current pos to end buffer = %zi", iEnd - ip);
10507     DEBUGLOG(7, "next byte : ip==%02X, istart==%02X", ip[matchLength], *iStart);
10508     DEBUGLOG(7, "final match length = %zu", matchLength + ZSTD_count(ip+matchLength, iStart, iEnd));
10509     return matchLength + ZSTD_count(ip+matchLength, iStart, iEnd);
10510 }
10511
10512
10513 /*-*************************************
10514  *  Hashes
10515  ***************************************/
10516 static const U32 prime3bytes = 506832829U;
10517 static U32    ZSTD_hash3(U32 u, U32 h) { return ((u << (32-24)) * prime3bytes)  >> (32-h) ; }
10518 MEM_STATIC size_t ZSTD_hash3Ptr(const void* ptr, U32 h) { return ZSTD_hash3(MEM_readLE32(ptr), h); } /* only in zstd_opt.h */
10519
10520 static const U32 prime4bytes = 2654435761U;
10521 static U32    ZSTD_hash4(U32 u, U32 h) { return (u * prime4bytes) >> (32-h) ; }
10522 static size_t ZSTD_hash4Ptr(const void* ptr, U32 h) { return ZSTD_hash4(MEM_read32(ptr), h); }
10523
10524 static const U64 prime5bytes = 889523592379ULL;
10525 static size_t ZSTD_hash5(U64 u, U32 h) { return (size_t)(((u  << (64-40)) * prime5bytes) >> (64-h)) ; }
10526 static size_t ZSTD_hash5Ptr(const void* p, U32 h) { return ZSTD_hash5(MEM_readLE64(p), h); }
10527
10528 static const U64 prime6bytes = 227718039650203ULL;
10529 static size_t ZSTD_hash6(U64 u, U32 h) { return (size_t)(((u  << (64-48)) * prime6bytes) >> (64-h)) ; }
10530 static size_t ZSTD_hash6Ptr(const void* p, U32 h) { return ZSTD_hash6(MEM_readLE64(p), h); }
10531
10532 static const U64 prime7bytes = 58295818150454627ULL;
10533 static size_t ZSTD_hash7(U64 u, U32 h) { return (size_t)(((u  << (64-56)) * prime7bytes) >> (64-h)) ; }
10534 static size_t ZSTD_hash7Ptr(const void* p, U32 h) { return ZSTD_hash7(MEM_readLE64(p), h); }
10535
10536 static const U64 prime8bytes = 0xCF1BBCDCB7A56463ULL;
10537 static size_t ZSTD_hash8(U64 u, U32 h) { return (size_t)(((u) * prime8bytes) >> (64-h)) ; }
10538 static size_t ZSTD_hash8Ptr(const void* p, U32 h) { return ZSTD_hash8(MEM_readLE64(p), h); }
10539
10540 MEM_STATIC size_t ZSTD_hashPtr(const void* p, U32 hBits, U32 mls)
10541 {
10542     switch(mls)
10543     {
10544     default:
10545     case 4: return ZSTD_hash4Ptr(p, hBits);
10546     case 5: return ZSTD_hash5Ptr(p, hBits);
10547     case 6: return ZSTD_hash6Ptr(p, hBits);
10548     case 7: return ZSTD_hash7Ptr(p, hBits);
10549     case 8: return ZSTD_hash8Ptr(p, hBits);
10550     }
10551 }
10552
10553 /** ZSTD_ipow() :
10554  * Return base^exponent.
10555  */
10556 static U64 ZSTD_ipow(U64 base, U64 exponent)
10557 {
10558     U64 power = 1;
10559     while (exponent) {
10560       if (exponent & 1) power *= base;
10561       exponent >>= 1;
10562       base *= base;
10563     }
10564     return power;
10565 }
10566
10567 #define ZSTD_ROLL_HASH_CHAR_OFFSET 10
10568
10569 /** ZSTD_rollingHash_append() :
10570  * Add the buffer to the hash value.
10571  */
10572 static U64 ZSTD_rollingHash_append(U64 hash, void const* buf, size_t size)
10573 {
10574     BYTE const* istart = (BYTE const*)buf;
10575     size_t pos;
10576     for (pos = 0; pos < size; ++pos) {
10577         hash *= prime8bytes;
10578         hash += istart[pos] + ZSTD_ROLL_HASH_CHAR_OFFSET;
10579     }
10580     return hash;
10581 }
10582
10583 /** ZSTD_rollingHash_compute() :
10584  * Compute the rolling hash value of the buffer.
10585  */
10586 MEM_STATIC U64 ZSTD_rollingHash_compute(void const* buf, size_t size)
10587 {
10588     return ZSTD_rollingHash_append(0, buf, size);
10589 }
10590
10591 /** ZSTD_rollingHash_primePower() :
10592  * Compute the primePower to be passed to ZSTD_rollingHash_rotate() for a hash
10593  * over a window of length bytes.
10594  */
10595 MEM_STATIC U64 ZSTD_rollingHash_primePower(U32 length)
10596 {
10597     return ZSTD_ipow(prime8bytes, length - 1);
10598 }
10599
10600 /** ZSTD_rollingHash_rotate() :
10601  * Rotate the rolling hash by one byte.
10602  */
10603 MEM_STATIC U64 ZSTD_rollingHash_rotate(U64 hash, BYTE toRemove, BYTE toAdd, U64 primePower)
10604 {
10605     hash -= (toRemove + ZSTD_ROLL_HASH_CHAR_OFFSET) * primePower;
10606     hash *= prime8bytes;
10607     hash += toAdd + ZSTD_ROLL_HASH_CHAR_OFFSET;
10608     return hash;
10609 }
10610
10611 /*-*************************************
10612 *  Round buffer management
10613 ***************************************/
10614 #if (ZSTD_WINDOWLOG_MAX_64 > 31)
10615 # error "ZSTD_WINDOWLOG_MAX is too large : would overflow ZSTD_CURRENT_MAX"
10616 #endif
10617 /* Max current allowed */
10618 #define ZSTD_CURRENT_MAX ((3U << 29) + (1U << ZSTD_WINDOWLOG_MAX))
10619 /* Maximum chunk size before overflow correction needs to be called again */
10620 #define ZSTD_CHUNKSIZE_MAX                                                     \
10621     ( ((U32)-1)                  /* Maximum ending current index */            \
10622     - ZSTD_CURRENT_MAX)          /* Maximum beginning lowLimit */
10623
10624 /**
10625  * ZSTD_window_clear():
10626  * Clears the window containing the history by simply setting it to empty.
10627  */
10628 MEM_STATIC void ZSTD_window_clear(ZSTD_window_t* window)
10629 {
10630     size_t const endT = (size_t)(window->nextSrc - window->base);
10631     U32 const end = (U32)endT;
10632
10633     window->lowLimit = end;
10634     window->dictLimit = end;
10635 }
10636
10637 /**
10638  * ZSTD_window_hasExtDict():
10639  * Returns non-zero if the window has a non-empty extDict.
10640  */
10641 MEM_STATIC U32 ZSTD_window_hasExtDict(ZSTD_window_t const window)
10642 {
10643     return window.lowLimit < window.dictLimit;
10644 }
10645
10646 /**
10647  * ZSTD_matchState_dictMode():
10648  * Inspects the provided matchState and figures out what dictMode should be
10649  * passed to the compressor.
10650  */
10651 MEM_STATIC ZSTD_dictMode_e ZSTD_matchState_dictMode(const ZSTD_matchState_t *ms)
10652 {
10653     return ZSTD_window_hasExtDict(ms->window) ?
10654         ZSTD_extDict :
10655         ms->dictMatchState != NULL ?
10656             ZSTD_dictMatchState :
10657             ZSTD_noDict;
10658 }
10659
10660 /**
10661  * ZSTD_window_needOverflowCorrection():
10662  * Returns non-zero if the indices are getting too large and need overflow
10663  * protection.
10664  */
10665 MEM_STATIC U32 ZSTD_window_needOverflowCorrection(ZSTD_window_t const window,
10666                                                   void const* srcEnd)
10667 {
10668     U32 const current = (U32)((BYTE const*)srcEnd - window.base);
10669     return current > ZSTD_CURRENT_MAX;
10670 }
10671
10672 /**
10673  * ZSTD_window_correctOverflow():
10674  * Reduces the indices to protect from index overflow.
10675  * Returns the correction made to the indices, which must be applied to every
10676  * stored index.
10677  *
10678  * The least significant cycleLog bits of the indices must remain the same,
10679  * which may be 0. Every index up to maxDist in the past must be valid.
10680  * NOTE: (maxDist & cycleMask) must be zero.
10681  */
10682 MEM_STATIC U32 ZSTD_window_correctOverflow(ZSTD_window_t* window, U32 cycleLog,
10683                                            U32 maxDist, void const* src)
10684 {
10685     /* preemptive overflow correction:
10686      * 1. correction is large enough:
10687      *    lowLimit > (3<<29) ==> current > 3<<29 + 1<<windowLog
10688      *    1<<windowLog <= newCurrent < 1<<chainLog + 1<<windowLog
10689      *
10690      *    current - newCurrent
10691      *    > (3<<29 + 1<<windowLog) - (1<<windowLog + 1<<chainLog)
10692      *    > (3<<29) - (1<<chainLog)
10693      *    > (3<<29) - (1<<30)             (NOTE: chainLog <= 30)
10694      *    > 1<<29
10695      *
10696      * 2. (ip+ZSTD_CHUNKSIZE_MAX - cctx->base) doesn't overflow:
10697      *    After correction, current is less than (1<<chainLog + 1<<windowLog).
10698      *    In 64-bit mode we are safe, because we have 64-bit ptrdiff_t.
10699      *    In 32-bit mode we are safe, because (chainLog <= 29), so
10700      *    ip+ZSTD_CHUNKSIZE_MAX - cctx->base < 1<<32.
10701      * 3. (cctx->lowLimit + 1<<windowLog) < 1<<32:
10702      *    windowLog <= 31 ==> 3<<29 + 1<<windowLog < 7<<29 < 1<<32.
10703      */
10704     U32 const cycleMask = (1U << cycleLog) - 1;
10705     U32 const current = (U32)((BYTE const*)src - window->base);
10706     U32 const currentCycle0 = current & cycleMask;
10707     /* Exclude zero so that newCurrent - maxDist >= 1. */
10708     U32 const currentCycle1 = currentCycle0 == 0 ? (1U << cycleLog) : currentCycle0;
10709     U32 const newCurrent = currentCycle1 + maxDist;
10710     U32 const correction = current - newCurrent;
10711     assert((maxDist & cycleMask) == 0);
10712     assert(current > newCurrent);
10713     /* Loose bound, should be around 1<<29 (see above) */
10714     assert(correction > 1<<28);
10715
10716     window->base += correction;
10717     window->dictBase += correction;
10718     if (window->lowLimit <= correction) window->lowLimit = 1;
10719     else window->lowLimit -= correction;
10720     if (window->dictLimit <= correction) window->dictLimit = 1;
10721     else window->dictLimit -= correction;
10722
10723     /* Ensure we can still reference the full window. */
10724     assert(newCurrent >= maxDist);
10725     assert(newCurrent - maxDist >= 1);
10726     /* Ensure that lowLimit and dictLimit didn't underflow. */
10727     assert(window->lowLimit <= newCurrent);
10728     assert(window->dictLimit <= newCurrent);
10729
10730     DEBUGLOG(4, "Correction of 0x%x bytes to lowLimit=0x%x", correction,
10731              window->lowLimit);
10732     return correction;
10733 }
10734
10735 /**
10736  * ZSTD_window_enforceMaxDist():
10737  * Updates lowLimit so that:
10738  *    (srcEnd - base) - lowLimit == maxDist + loadedDictEnd
10739  *
10740  * It ensures index is valid as long as index >= lowLimit.
10741  * This must be called before a block compression call.
10742  *
10743  * loadedDictEnd is only defined if a dictionary is in use for current compression.
10744  * As the name implies, loadedDictEnd represents the index at end of dictionary.
10745  * The value lies within context's referential, it can be directly compared to blockEndIdx.
10746  *
10747  * If loadedDictEndPtr is NULL, no dictionary is in use, and we use loadedDictEnd == 0.
10748  * If loadedDictEndPtr is not NULL, we set it to zero after updating lowLimit.
10749  * This is because dictionaries are allowed to be referenced fully
10750  * as long as the last byte of the dictionary is in the window.
10751  * Once input has progressed beyond window size, dictionary cannot be referenced anymore.
10752  *
10753  * In normal dict mode, the dictionary lies between lowLimit and dictLimit.
10754  * In dictMatchState mode, lowLimit and dictLimit are the same,
10755  * and the dictionary is below them.
10756  * forceWindow and dictMatchState are therefore incompatible.
10757  */
10758 MEM_STATIC void
10759 ZSTD_window_enforceMaxDist(ZSTD_window_t* window,
10760                      const void* blockEnd,
10761                            U32   maxDist,
10762                            U32*  loadedDictEndPtr,
10763                      const ZSTD_matchState_t** dictMatchStatePtr)
10764 {
10765     U32 const blockEndIdx = (U32)((BYTE const*)blockEnd - window->base);
10766     U32 const loadedDictEnd = (loadedDictEndPtr != NULL) ? *loadedDictEndPtr : 0;
10767     DEBUGLOG(5, "ZSTD_window_enforceMaxDist: blockEndIdx=%u, maxDist=%u, loadedDictEnd=%u",
10768                 (unsigned)blockEndIdx, (unsigned)maxDist, (unsigned)loadedDictEnd);
10769
10770     /* - When there is no dictionary : loadedDictEnd == 0.
10771          In which case, the test (blockEndIdx > maxDist) is merely to avoid
10772          overflowing next operation `newLowLimit = blockEndIdx - maxDist`.
10773        - When there is a standard dictionary :
10774          Index referential is copied from the dictionary,
10775          which means it starts from 0.
10776          In which case, loadedDictEnd == dictSize,
10777          and it makes sense to compare `blockEndIdx > maxDist + dictSize`
10778          since `blockEndIdx` also starts from zero.
10779        - When there is an attached dictionary :
10780          loadedDictEnd is expressed within the referential of the context,
10781          so it can be directly compared against blockEndIdx.
10782     */
10783     if (blockEndIdx > maxDist + loadedDictEnd) {
10784         U32 const newLowLimit = blockEndIdx - maxDist;
10785         if (window->lowLimit < newLowLimit) window->lowLimit = newLowLimit;
10786         if (window->dictLimit < window->lowLimit) {
10787             DEBUGLOG(5, "Update dictLimit to match lowLimit, from %u to %u",
10788                         (unsigned)window->dictLimit, (unsigned)window->lowLimit);
10789             window->dictLimit = window->lowLimit;
10790         }
10791         /* On reaching window size, dictionaries are invalidated */
10792         if (loadedDictEndPtr) *loadedDictEndPtr = 0;
10793         if (dictMatchStatePtr) *dictMatchStatePtr = NULL;
10794     }
10795 }
10796
10797 /* Similar to ZSTD_window_enforceMaxDist(),
10798  * but only invalidates dictionary
10799  * when input progresses beyond window size.
10800  * assumption : loadedDictEndPtr and dictMatchStatePtr are valid (non NULL)
10801  *              loadedDictEnd uses same referential as window->base
10802  *              maxDist is the window size */
10803 MEM_STATIC void
10804 ZSTD_checkDictValidity(const ZSTD_window_t* window,
10805                        const void* blockEnd,
10806                              U32   maxDist,
10807                              U32*  loadedDictEndPtr,
10808                        const ZSTD_matchState_t** dictMatchStatePtr)
10809 {
10810     assert(loadedDictEndPtr != NULL);
10811     assert(dictMatchStatePtr != NULL);
10812     {   U32 const blockEndIdx = (U32)((BYTE const*)blockEnd - window->base);
10813         U32 const loadedDictEnd = *loadedDictEndPtr;
10814         DEBUGLOG(5, "ZSTD_checkDictValidity: blockEndIdx=%u, maxDist=%u, loadedDictEnd=%u",
10815                     (unsigned)blockEndIdx, (unsigned)maxDist, (unsigned)loadedDictEnd);
10816         assert(blockEndIdx >= loadedDictEnd);
10817
10818         if (blockEndIdx > loadedDictEnd + maxDist) {
10819             /* On reaching window size, dictionaries are invalidated.
10820              * For simplification, if window size is reached anywhere within next block,
10821              * the dictionary is invalidated for the full block.
10822              */
10823             DEBUGLOG(6, "invalidating dictionary for current block (distance > windowSize)");
10824             *loadedDictEndPtr = 0;
10825             *dictMatchStatePtr = NULL;
10826         } else {
10827             if (*loadedDictEndPtr != 0) {
10828                 DEBUGLOG(6, "dictionary considered valid for current block");
10829     }   }   }
10830 }
10831
10832 MEM_STATIC void ZSTD_window_init(ZSTD_window_t* window) {
10833     memset(window, 0, sizeof(*window));
10834     window->base = (BYTE const*)"";
10835     window->dictBase = (BYTE const*)"";
10836     window->dictLimit = 1;    /* start from 1, so that 1st position is valid */
10837     window->lowLimit = 1;     /* it ensures first and later CCtx usages compress the same */
10838     window->nextSrc = window->base + 1;   /* see issue #1241 */
10839 }
10840
10841 /**
10842  * ZSTD_window_update():
10843  * Updates the window by appending [src, src + srcSize) to the window.
10844  * If it is not contiguous, the current prefix becomes the extDict, and we
10845  * forget about the extDict. Handles overlap of the prefix and extDict.
10846  * Returns non-zero if the segment is contiguous.
10847  */
10848 MEM_STATIC U32 ZSTD_window_update(ZSTD_window_t* window,
10849                                   void const* src, size_t srcSize)
10850 {
10851     BYTE const* const ip = (BYTE const*)src;
10852     U32 contiguous = 1;
10853     DEBUGLOG(5, "ZSTD_window_update");
10854     if (srcSize == 0)
10855         return contiguous;
10856     assert(window->base != NULL);
10857     assert(window->dictBase != NULL);
10858     /* Check if blocks follow each other */
10859     if (src != window->nextSrc) {
10860         /* not contiguous */
10861         size_t const distanceFromBase = (size_t)(window->nextSrc - window->base);
10862         DEBUGLOG(5, "Non contiguous blocks, new segment starts at %u", window->dictLimit);
10863         window->lowLimit = window->dictLimit;
10864         assert(distanceFromBase == (size_t)(U32)distanceFromBase);  /* should never overflow */
10865         window->dictLimit = (U32)distanceFromBase;
10866         window->dictBase = window->base;
10867         window->base = ip - distanceFromBase;
10868         /* ms->nextToUpdate = window->dictLimit; */
10869         if (window->dictLimit - window->lowLimit < HASH_READ_SIZE) window->lowLimit = window->dictLimit;   /* too small extDict */
10870         contiguous = 0;
10871     }
10872     window->nextSrc = ip + srcSize;
10873     /* if input and dictionary overlap : reduce dictionary (area presumed modified by input) */
10874     if ( (ip+srcSize > window->dictBase + window->lowLimit)
10875        & (ip < window->dictBase + window->dictLimit)) {
10876         ptrdiff_t const highInputIdx = (ip + srcSize) - window->dictBase;
10877         U32 const lowLimitMax = (highInputIdx > (ptrdiff_t)window->dictLimit) ? window->dictLimit : (U32)highInputIdx;
10878         window->lowLimit = lowLimitMax;
10879         DEBUGLOG(5, "Overlapping extDict and input : new lowLimit = %u", window->lowLimit);
10880     }
10881     return contiguous;
10882 }
10883
10884 /**
10885  * Returns the lowest allowed match index. It may either be in the ext-dict or the prefix.
10886  */
10887 MEM_STATIC U32 ZSTD_getLowestMatchIndex(const ZSTD_matchState_t* ms, U32 current, unsigned windowLog)
10888 {
10889     U32    const maxDistance = 1U << windowLog;
10890     U32    const lowestValid = ms->window.lowLimit;
10891     U32    const withinWindow = (current - lowestValid > maxDistance) ? current - maxDistance : lowestValid;
10892     U32    const isDictionary = (ms->loadedDictEnd != 0);
10893     U32    const matchLowest = isDictionary ? lowestValid : withinWindow;
10894     return matchLowest;
10895 }
10896
10897 /**
10898  * Returns the lowest allowed match index in the prefix.
10899  */
10900 MEM_STATIC U32 ZSTD_getLowestPrefixIndex(const ZSTD_matchState_t* ms, U32 current, unsigned windowLog)
10901 {
10902     U32    const maxDistance = 1U << windowLog;
10903     U32    const lowestValid = ms->window.dictLimit;
10904     U32    const withinWindow = (current - lowestValid > maxDistance) ? current - maxDistance : lowestValid;
10905     U32    const isDictionary = (ms->loadedDictEnd != 0);
10906     U32    const matchLowest = isDictionary ? lowestValid : withinWindow;
10907     return matchLowest;
10908 }
10909
10910
10911
10912 /* debug functions */
10913 #if (DEBUGLEVEL>=2)
10914
10915 MEM_STATIC double ZSTD_fWeight(U32 rawStat)
10916 {
10917     U32 const fp_accuracy = 8;
10918     U32 const fp_multiplier = (1 << fp_accuracy);
10919     U32 const newStat = rawStat + 1;
10920     U32 const hb = ZSTD_highbit32(newStat);
10921     U32 const BWeight = hb * fp_multiplier;
10922     U32 const FWeight = (newStat << fp_accuracy) >> hb;
10923     U32 const weight = BWeight + FWeight;
10924     assert(hb + fp_accuracy < 31);
10925     return (double)weight / fp_multiplier;
10926 }
10927
10928 /* display a table content,
10929  * listing each element, its frequency, and its predicted bit cost */
10930 MEM_STATIC void ZSTD_debugTable(const U32* table, U32 max)
10931 {
10932     unsigned u, sum;
10933     for (u=0, sum=0; u<=max; u++) sum += table[u];
10934     DEBUGLOG(2, "total nb elts: %u", sum);
10935     for (u=0; u<=max; u++) {
10936         DEBUGLOG(2, "%2u: %5u  (%.2f)",
10937                 u, table[u], ZSTD_fWeight(sum) - ZSTD_fWeight(table[u]) );
10938     }
10939 }
10940
10941 #endif
10942
10943
10944 #if defined (__cplusplus)
10945 }
10946 #endif
10947
10948 /* ===============================================================
10949  * Shared internal declarations
10950  * These prototypes may be called from sources not in lib/compress
10951  * =============================================================== */
10952
10953 /* ZSTD_loadCEntropy() :
10954  * dict : must point at beginning of a valid zstd dictionary.
10955  * return : size of dictionary header (size of magic number + dict ID + entropy tables)
10956  * assumptions : magic number supposed already checked
10957  *               and dictSize >= 8 */
10958 size_t ZSTD_loadCEntropy(ZSTD_compressedBlockState_t* bs, void* workspace,
10959                          short* offcodeNCount, unsigned* offcodeMaxValue,
10960                          const void* const dict, size_t dictSize);
10961
10962 void ZSTD_reset_compressedBlockState(ZSTD_compressedBlockState_t* bs);
10963
10964 /* ==============================================================
10965  * Private declarations
10966  * These prototypes shall only be called from within lib/compress
10967  * ============================================================== */
10968
10969 /* ZSTD_getCParamsFromCCtxParams() :
10970  * cParams are built depending on compressionLevel, src size hints,
10971  * LDM and manually set compression parameters.
10972  * Note: srcSizeHint == 0 means 0!
10973  */
10974 ZSTD_compressionParameters ZSTD_getCParamsFromCCtxParams(
10975         const ZSTD_CCtx_params* CCtxParams, U64 srcSizeHint, size_t dictSize);
10976
10977 /*! ZSTD_initCStream_internal() :
10978  *  Private use only. Init streaming operation.
10979  *  expects params to be valid.
10980  *  must receive dict, or cdict, or none, but not both.
10981  *  @return : 0, or an error code */
10982 size_t ZSTD_initCStream_internal(ZSTD_CStream* zcs,
10983                      const void* dict, size_t dictSize,
10984                      const ZSTD_CDict* cdict,
10985                      const ZSTD_CCtx_params* params, unsigned long long pledgedSrcSize);
10986
10987 void ZSTD_resetSeqStore(seqStore_t* ssPtr);
10988
10989 /*! ZSTD_getCParamsFromCDict() :
10990  *  as the name implies */
10991 ZSTD_compressionParameters ZSTD_getCParamsFromCDict(const ZSTD_CDict* cdict);
10992
10993 /* ZSTD_compressBegin_advanced_internal() :
10994  * Private use only. To be called from zstdmt_compress.c. */
10995 size_t ZSTD_compressBegin_advanced_internal(ZSTD_CCtx* cctx,
10996                                     const void* dict, size_t dictSize,
10997                                     ZSTD_dictContentType_e dictContentType,
10998                                     ZSTD_dictTableLoadMethod_e dtlm,
10999                                     const ZSTD_CDict* cdict,
11000                                     const ZSTD_CCtx_params* params,
11001                                     unsigned long long pledgedSrcSize);
11002
11003 /* ZSTD_compress_advanced_internal() :
11004  * Private use only. To be called from zstdmt_compress.c. */
11005 size_t ZSTD_compress_advanced_internal(ZSTD_CCtx* cctx,
11006                                        void* dst, size_t dstCapacity,
11007                                  const void* src, size_t srcSize,
11008                                  const void* dict,size_t dictSize,
11009                                  const ZSTD_CCtx_params* params);
11010
11011
11012 /* ZSTD_writeLastEmptyBlock() :
11013  * output an empty Block with end-of-frame mark to complete a frame
11014  * @return : size of data written into `dst` (== ZSTD_blockHeaderSize (defined in zstd_internal.h))
11015  *           or an error code if `dstCapacity` is too small (<ZSTD_blockHeaderSize)
11016  */
11017 size_t ZSTD_writeLastEmptyBlock(void* dst, size_t dstCapacity);
11018
11019
11020 /* ZSTD_referenceExternalSequences() :
11021  * Must be called before starting a compression operation.
11022  * seqs must parse a prefix of the source.
11023  * This cannot be used when long range matching is enabled.
11024  * Zstd will use these sequences, and pass the literals to a secondary block
11025  * compressor.
11026  * @return : An error code on failure.
11027  * NOTE: seqs are not verified! Invalid sequences can cause out-of-bounds memory
11028  * access and data corruption.
11029  */
11030 size_t ZSTD_referenceExternalSequences(ZSTD_CCtx* cctx, rawSeq* seq, size_t nbSeq);
11031
11032 /** ZSTD_cycleLog() :
11033  *  condition for correct operation : hashLog > 1 */
11034 U32 ZSTD_cycleLog(U32 hashLog, ZSTD_strategy strat);
11035
11036 #endif /* ZSTD_COMPRESS_H */
11037 /**** ended inlining zstd_compress_internal.h ****/
11038
11039
11040 size_t ZSTD_noCompressLiterals (void* dst, size_t dstCapacity, const void* src, size_t srcSize);
11041
11042 size_t ZSTD_compressRleLiteralsBlock (void* dst, size_t dstCapacity, const void* src, size_t srcSize);
11043
11044 size_t ZSTD_compressLiterals (ZSTD_hufCTables_t const* prevHuf,
11045                               ZSTD_hufCTables_t* nextHuf,
11046                               ZSTD_strategy strategy, int disableLiteralCompression,
11047                               void* dst, size_t dstCapacity,
11048                         const void* src, size_t srcSize,
11049                               void* entropyWorkspace, size_t entropyWorkspaceSize,
11050                         const int bmi2);
11051
11052 #endif /* ZSTD_COMPRESS_LITERALS_H */
11053 /**** ended inlining zstd_compress_literals.h ****/
11054
11055 size_t ZSTD_noCompressLiterals (void* dst, size_t dstCapacity, const void* src, size_t srcSize)
11056 {
11057     BYTE* const ostart = (BYTE* const)dst;
11058     U32   const flSize = 1 + (srcSize>31) + (srcSize>4095);
11059
11060     RETURN_ERROR_IF(srcSize + flSize > dstCapacity, dstSize_tooSmall, "");
11061
11062     switch(flSize)
11063     {
11064         case 1: /* 2 - 1 - 5 */
11065             ostart[0] = (BYTE)((U32)set_basic + (srcSize<<3));
11066             break;
11067         case 2: /* 2 - 2 - 12 */
11068             MEM_writeLE16(ostart, (U16)((U32)set_basic + (1<<2) + (srcSize<<4)));
11069             break;
11070         case 3: /* 2 - 2 - 20 */
11071             MEM_writeLE32(ostart, (U32)((U32)set_basic + (3<<2) + (srcSize<<4)));
11072             break;
11073         default:   /* not necessary : flSize is {1,2,3} */
11074             assert(0);
11075     }
11076
11077     memcpy(ostart + flSize, src, srcSize);
11078     DEBUGLOG(5, "Raw literals: %u -> %u", (U32)srcSize, (U32)(srcSize + flSize));
11079     return srcSize + flSize;
11080 }
11081
11082 size_t ZSTD_compressRleLiteralsBlock (void* dst, size_t dstCapacity, const void* src, size_t srcSize)
11083 {
11084     BYTE* const ostart = (BYTE* const)dst;
11085     U32   const flSize = 1 + (srcSize>31) + (srcSize>4095);
11086
11087     (void)dstCapacity;  /* dstCapacity already guaranteed to be >=4, hence large enough */
11088
11089     switch(flSize)
11090     {
11091         case 1: /* 2 - 1 - 5 */
11092             ostart[0] = (BYTE)((U32)set_rle + (srcSize<<3));
11093             break;
11094         case 2: /* 2 - 2 - 12 */
11095             MEM_writeLE16(ostart, (U16)((U32)set_rle + (1<<2) + (srcSize<<4)));
11096             break;
11097         case 3: /* 2 - 2 - 20 */
11098             MEM_writeLE32(ostart, (U32)((U32)set_rle + (3<<2) + (srcSize<<4)));
11099             break;
11100         default:   /* not necessary : flSize is {1,2,3} */
11101             assert(0);
11102     }
11103
11104     ostart[flSize] = *(const BYTE*)src;
11105     DEBUGLOG(5, "RLE literals: %u -> %u", (U32)srcSize, (U32)flSize + 1);
11106     return flSize+1;
11107 }
11108
11109 size_t ZSTD_compressLiterals (ZSTD_hufCTables_t const* prevHuf,
11110                               ZSTD_hufCTables_t* nextHuf,
11111                               ZSTD_strategy strategy, int disableLiteralCompression,
11112                               void* dst, size_t dstCapacity,
11113                         const void* src, size_t srcSize,
11114                               void* entropyWorkspace, size_t entropyWorkspaceSize,
11115                         const int bmi2)
11116 {
11117     size_t const minGain = ZSTD_minGain(srcSize, strategy);
11118     size_t const lhSize = 3 + (srcSize >= 1 KB) + (srcSize >= 16 KB);
11119     BYTE*  const ostart = (BYTE*)dst;
11120     U32 singleStream = srcSize < 256;
11121     symbolEncodingType_e hType = set_compressed;
11122     size_t cLitSize;
11123
11124     DEBUGLOG(5,"ZSTD_compressLiterals (disableLiteralCompression=%i srcSize=%u)",
11125                 disableLiteralCompression, (U32)srcSize);
11126
11127     /* Prepare nextEntropy assuming reusing the existing table */
11128     memcpy(nextHuf, prevHuf, sizeof(*prevHuf));
11129
11130     if (disableLiteralCompression)
11131         return ZSTD_noCompressLiterals(dst, dstCapacity, src, srcSize);
11132
11133     /* small ? don't even attempt compression (speed opt) */
11134 #   define COMPRESS_LITERALS_SIZE_MIN 63
11135     {   size_t const minLitSize = (prevHuf->repeatMode == HUF_repeat_valid) ? 6 : COMPRESS_LITERALS_SIZE_MIN;
11136         if (srcSize <= minLitSize) return ZSTD_noCompressLiterals(dst, dstCapacity, src, srcSize);
11137     }
11138
11139     RETURN_ERROR_IF(dstCapacity < lhSize+1, dstSize_tooSmall, "not enough space for compression");
11140     {   HUF_repeat repeat = prevHuf->repeatMode;
11141         int const preferRepeat = strategy < ZSTD_lazy ? srcSize <= 1024 : 0;
11142         if (repeat == HUF_repeat_valid && lhSize == 3) singleStream = 1;
11143         cLitSize = singleStream ?
11144             HUF_compress1X_repeat(
11145                 ostart+lhSize, dstCapacity-lhSize, src, srcSize,
11146                 HUF_SYMBOLVALUE_MAX, HUF_TABLELOG_DEFAULT, entropyWorkspace, entropyWorkspaceSize,
11147                 (HUF_CElt*)nextHuf->CTable, &repeat, preferRepeat, bmi2) :
11148             HUF_compress4X_repeat(
11149                 ostart+lhSize, dstCapacity-lhSize, src, srcSize,
11150                 HUF_SYMBOLVALUE_MAX, HUF_TABLELOG_DEFAULT, entropyWorkspace, entropyWorkspaceSize,
11151                 (HUF_CElt*)nextHuf->CTable, &repeat, preferRepeat, bmi2);
11152         if (repeat != HUF_repeat_none) {
11153             /* reused the existing table */
11154             DEBUGLOG(5, "Reusing previous huffman table");
11155             hType = set_repeat;
11156         }
11157     }
11158
11159     if ((cLitSize==0) | (cLitSize >= srcSize - minGain) | ERR_isError(cLitSize)) {
11160         memcpy(nextHuf, prevHuf, sizeof(*prevHuf));
11161         return ZSTD_noCompressLiterals(dst, dstCapacity, src, srcSize);
11162     }
11163     if (cLitSize==1) {
11164         memcpy(nextHuf, prevHuf, sizeof(*prevHuf));
11165         return ZSTD_compressRleLiteralsBlock(dst, dstCapacity, src, srcSize);
11166     }
11167
11168     if (hType == set_compressed) {
11169         /* using a newly constructed table */
11170         nextHuf->repeatMode = HUF_repeat_check;
11171     }
11172
11173     /* Build header */
11174     switch(lhSize)
11175     {
11176     case 3: /* 2 - 2 - 10 - 10 */
11177         {   U32 const lhc = hType + ((!singleStream) << 2) + ((U32)srcSize<<4) + ((U32)cLitSize<<14);
11178             MEM_writeLE24(ostart, lhc);
11179             break;
11180         }
11181     case 4: /* 2 - 2 - 14 - 14 */
11182         {   U32 const lhc = hType + (2 << 2) + ((U32)srcSize<<4) + ((U32)cLitSize<<18);
11183             MEM_writeLE32(ostart, lhc);
11184             break;
11185         }
11186     case 5: /* 2 - 2 - 18 - 18 */
11187         {   U32 const lhc = hType + (3 << 2) + ((U32)srcSize<<4) + ((U32)cLitSize<<22);
11188             MEM_writeLE32(ostart, lhc);
11189             ostart[4] = (BYTE)(cLitSize >> 10);
11190             break;
11191         }
11192     default:  /* not possible : lhSize is {3,4,5} */
11193         assert(0);
11194     }
11195     DEBUGLOG(5, "Compressed literals: %u -> %u", (U32)srcSize, (U32)(lhSize+cLitSize));
11196     return lhSize+cLitSize;
11197 }
11198 /**** ended inlining compress/zstd_compress_literals.c ****/
11199 /**** start inlining compress/zstd_compress_sequences.c ****/
11200 /*
11201  * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
11202  * All rights reserved.
11203  *
11204  * This source code is licensed under both the BSD-style license (found in the
11205  * LICENSE file in the root directory of this source tree) and the GPLv2 (found
11206  * in the COPYING file in the root directory of this source tree).
11207  * You may select, at your option, one of the above-listed licenses.
11208  */
11209
11210  /*-*************************************
11211  *  Dependencies
11212  ***************************************/
11213 /**** start inlining zstd_compress_sequences.h ****/
11214 /*
11215  * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
11216  * All rights reserved.
11217  *
11218  * This source code is licensed under both the BSD-style license (found in the
11219  * LICENSE file in the root directory of this source tree) and the GPLv2 (found
11220  * in the COPYING file in the root directory of this source tree).
11221  * You may select, at your option, one of the above-listed licenses.
11222  */
11223
11224 #ifndef ZSTD_COMPRESS_SEQUENCES_H
11225 #define ZSTD_COMPRESS_SEQUENCES_H
11226
11227 /**** skipping file: ../common/fse.h ****/
11228 /**** skipping file: ../common/zstd_internal.h ****/
11229
11230 typedef enum {
11231     ZSTD_defaultDisallowed = 0,
11232     ZSTD_defaultAllowed = 1
11233 } ZSTD_defaultPolicy_e;
11234
11235 symbolEncodingType_e
11236 ZSTD_selectEncodingType(
11237         FSE_repeat* repeatMode, unsigned const* count, unsigned const max,
11238         size_t const mostFrequent, size_t nbSeq, unsigned const FSELog,
11239         FSE_CTable const* prevCTable,
11240         short const* defaultNorm, U32 defaultNormLog,
11241         ZSTD_defaultPolicy_e const isDefaultAllowed,
11242         ZSTD_strategy const strategy);
11243
11244 size_t
11245 ZSTD_buildCTable(void* dst, size_t dstCapacity,
11246                 FSE_CTable* nextCTable, U32 FSELog, symbolEncodingType_e type,
11247                 unsigned* count, U32 max,
11248                 const BYTE* codeTable, size_t nbSeq,
11249                 const S16* defaultNorm, U32 defaultNormLog, U32 defaultMax,
11250                 const FSE_CTable* prevCTable, size_t prevCTableSize,
11251                 void* entropyWorkspace, size_t entropyWorkspaceSize);
11252
11253 size_t ZSTD_encodeSequences(
11254             void* dst, size_t dstCapacity,
11255             FSE_CTable const* CTable_MatchLength, BYTE const* mlCodeTable,
11256             FSE_CTable const* CTable_OffsetBits, BYTE const* ofCodeTable,
11257             FSE_CTable const* CTable_LitLength, BYTE const* llCodeTable,
11258             seqDef const* sequences, size_t nbSeq, int longOffsets, int bmi2);
11259
11260 size_t ZSTD_fseBitCost(
11261     FSE_CTable const* ctable,
11262     unsigned const* count,
11263     unsigned const max);
11264
11265 size_t ZSTD_crossEntropyCost(short const* norm, unsigned accuracyLog,
11266                              unsigned const* count, unsigned const max);
11267 #endif /* ZSTD_COMPRESS_SEQUENCES_H */
11268 /**** ended inlining zstd_compress_sequences.h ****/
11269
11270 /**
11271  * -log2(x / 256) lookup table for x in [0, 256).
11272  * If x == 0: Return 0
11273  * Else: Return floor(-log2(x / 256) * 256)
11274  */
11275 static unsigned const kInverseProbabilityLog256[256] = {
11276     0,    2048, 1792, 1642, 1536, 1453, 1386, 1329, 1280, 1236, 1197, 1162,
11277     1130, 1100, 1073, 1047, 1024, 1001, 980,  960,  941,  923,  906,  889,
11278     874,  859,  844,  830,  817,  804,  791,  779,  768,  756,  745,  734,
11279     724,  714,  704,  694,  685,  676,  667,  658,  650,  642,  633,  626,
11280     618,  610,  603,  595,  588,  581,  574,  567,  561,  554,  548,  542,
11281     535,  529,  523,  517,  512,  506,  500,  495,  489,  484,  478,  473,
11282     468,  463,  458,  453,  448,  443,  438,  434,  429,  424,  420,  415,
11283     411,  407,  402,  398,  394,  390,  386,  382,  377,  373,  370,  366,
11284     362,  358,  354,  350,  347,  343,  339,  336,  332,  329,  325,  322,
11285     318,  315,  311,  308,  305,  302,  298,  295,  292,  289,  286,  282,
11286     279,  276,  273,  270,  267,  264,  261,  258,  256,  253,  250,  247,
11287     244,  241,  239,  236,  233,  230,  228,  225,  222,  220,  217,  215,
11288     212,  209,  207,  204,  202,  199,  197,  194,  192,  190,  187,  185,
11289     182,  180,  178,  175,  173,  171,  168,  166,  164,  162,  159,  157,
11290     155,  153,  151,  149,  146,  144,  142,  140,  138,  136,  134,  132,
11291     130,  128,  126,  123,  121,  119,  117,  115,  114,  112,  110,  108,
11292     106,  104,  102,  100,  98,   96,   94,   93,   91,   89,   87,   85,
11293     83,   82,   80,   78,   76,   74,   73,   71,   69,   67,   66,   64,
11294     62,   61,   59,   57,   55,   54,   52,   50,   49,   47,   46,   44,
11295     42,   41,   39,   37,   36,   34,   33,   31,   30,   28,   26,   25,
11296     23,   22,   20,   19,   17,   16,   14,   13,   11,   10,   8,    7,
11297     5,    4,    2,    1,
11298 };
11299
11300 static unsigned ZSTD_getFSEMaxSymbolValue(FSE_CTable const* ctable) {
11301   void const* ptr = ctable;
11302   U16 const* u16ptr = (U16 const*)ptr;
11303   U32 const maxSymbolValue = MEM_read16(u16ptr + 1);
11304   return maxSymbolValue;
11305 }
11306
11307 /**
11308  * Returns the cost in bytes of encoding the normalized count header.
11309  * Returns an error if any of the helper functions return an error.
11310  */
11311 static size_t ZSTD_NCountCost(unsigned const* count, unsigned const max,
11312                               size_t const nbSeq, unsigned const FSELog)
11313 {
11314     BYTE wksp[FSE_NCOUNTBOUND];
11315     S16 norm[MaxSeq + 1];
11316     const U32 tableLog = FSE_optimalTableLog(FSELog, nbSeq, max);
11317     FORWARD_IF_ERROR(FSE_normalizeCount(norm, tableLog, count, nbSeq, max), "");
11318     return FSE_writeNCount(wksp, sizeof(wksp), norm, max, tableLog);
11319 }
11320
11321 /**
11322  * Returns the cost in bits of encoding the distribution described by count
11323  * using the entropy bound.
11324  */
11325 static size_t ZSTD_entropyCost(unsigned const* count, unsigned const max, size_t const total)
11326 {
11327     unsigned cost = 0;
11328     unsigned s;
11329     for (s = 0; s <= max; ++s) {
11330         unsigned norm = (unsigned)((256 * count[s]) / total);
11331         if (count[s] != 0 && norm == 0)
11332             norm = 1;
11333         assert(count[s] < total);
11334         cost += count[s] * kInverseProbabilityLog256[norm];
11335     }
11336     return cost >> 8;
11337 }
11338
11339 /**
11340  * Returns the cost in bits of encoding the distribution in count using ctable.
11341  * Returns an error if ctable cannot represent all the symbols in count.
11342  */
11343 size_t ZSTD_fseBitCost(
11344     FSE_CTable const* ctable,
11345     unsigned const* count,
11346     unsigned const max)
11347 {
11348     unsigned const kAccuracyLog = 8;
11349     size_t cost = 0;
11350     unsigned s;
11351     FSE_CState_t cstate;
11352     FSE_initCState(&cstate, ctable);
11353     if (ZSTD_getFSEMaxSymbolValue(ctable) < max) {
11354         DEBUGLOG(5, "Repeat FSE_CTable has maxSymbolValue %u < %u",
11355                     ZSTD_getFSEMaxSymbolValue(ctable), max);
11356         return ERROR(GENERIC);
11357     }
11358     for (s = 0; s <= max; ++s) {
11359         unsigned const tableLog = cstate.stateLog;
11360         unsigned const badCost = (tableLog + 1) << kAccuracyLog;
11361         unsigned const bitCost = FSE_bitCost(cstate.symbolTT, tableLog, s, kAccuracyLog);
11362         if (count[s] == 0)
11363             continue;
11364         if (bitCost >= badCost) {
11365             DEBUGLOG(5, "Repeat FSE_CTable has Prob[%u] == 0", s);
11366             return ERROR(GENERIC);
11367         }
11368         cost += (size_t)count[s] * bitCost;
11369     }
11370     return cost >> kAccuracyLog;
11371 }
11372
11373 /**
11374  * Returns the cost in bits of encoding the distribution in count using the
11375  * table described by norm. The max symbol support by norm is assumed >= max.
11376  * norm must be valid for every symbol with non-zero probability in count.
11377  */
11378 size_t ZSTD_crossEntropyCost(short const* norm, unsigned accuracyLog,
11379                              unsigned const* count, unsigned const max)
11380 {
11381     unsigned const shift = 8 - accuracyLog;
11382     size_t cost = 0;
11383     unsigned s;
11384     assert(accuracyLog <= 8);
11385     for (s = 0; s <= max; ++s) {
11386         unsigned const normAcc = (norm[s] != -1) ? (unsigned)norm[s] : 1;
11387         unsigned const norm256 = normAcc << shift;
11388         assert(norm256 > 0);
11389         assert(norm256 < 256);
11390         cost += count[s] * kInverseProbabilityLog256[norm256];
11391     }
11392     return cost >> 8;
11393 }
11394
11395 symbolEncodingType_e
11396 ZSTD_selectEncodingType(
11397         FSE_repeat* repeatMode, unsigned const* count, unsigned const max,
11398         size_t const mostFrequent, size_t nbSeq, unsigned const FSELog,
11399         FSE_CTable const* prevCTable,
11400         short const* defaultNorm, U32 defaultNormLog,
11401         ZSTD_defaultPolicy_e const isDefaultAllowed,
11402         ZSTD_strategy const strategy)
11403 {
11404     ZSTD_STATIC_ASSERT(ZSTD_defaultDisallowed == 0 && ZSTD_defaultAllowed != 0);
11405     if (mostFrequent == nbSeq) {
11406         *repeatMode = FSE_repeat_none;
11407         if (isDefaultAllowed && nbSeq <= 2) {
11408             /* Prefer set_basic over set_rle when there are 2 or less symbols,
11409              * since RLE uses 1 byte, but set_basic uses 5-6 bits per symbol.
11410              * If basic encoding isn't possible, always choose RLE.
11411              */
11412             DEBUGLOG(5, "Selected set_basic");
11413             return set_basic;
11414         }
11415         DEBUGLOG(5, "Selected set_rle");
11416         return set_rle;
11417     }
11418     if (strategy < ZSTD_lazy) {
11419         if (isDefaultAllowed) {
11420             size_t const staticFse_nbSeq_max = 1000;
11421             size_t const mult = 10 - strategy;
11422             size_t const baseLog = 3;
11423             size_t const dynamicFse_nbSeq_min = (((size_t)1 << defaultNormLog) * mult) >> baseLog;  /* 28-36 for offset, 56-72 for lengths */
11424             assert(defaultNormLog >= 5 && defaultNormLog <= 6);  /* xx_DEFAULTNORMLOG */
11425             assert(mult <= 9 && mult >= 7);
11426             if ( (*repeatMode == FSE_repeat_valid)
11427               && (nbSeq < staticFse_nbSeq_max) ) {
11428                 DEBUGLOG(5, "Selected set_repeat");
11429                 return set_repeat;
11430             }
11431             if ( (nbSeq < dynamicFse_nbSeq_min)
11432               || (mostFrequent < (nbSeq >> (defaultNormLog-1))) ) {
11433                 DEBUGLOG(5, "Selected set_basic");
11434                 /* The format allows default tables to be repeated, but it isn't useful.
11435                  * When using simple heuristics to select encoding type, we don't want
11436                  * to confuse these tables with dictionaries. When running more careful
11437                  * analysis, we don't need to waste time checking both repeating tables
11438                  * and default tables.
11439                  */
11440                 *repeatMode = FSE_repeat_none;
11441                 return set_basic;
11442             }
11443         }
11444     } else {
11445         size_t const basicCost = isDefaultAllowed ? ZSTD_crossEntropyCost(defaultNorm, defaultNormLog, count, max) : ERROR(GENERIC);
11446         size_t const repeatCost = *repeatMode != FSE_repeat_none ? ZSTD_fseBitCost(prevCTable, count, max) : ERROR(GENERIC);
11447         size_t const NCountCost = ZSTD_NCountCost(count, max, nbSeq, FSELog);
11448         size_t const compressedCost = (NCountCost << 3) + ZSTD_entropyCost(count, max, nbSeq);
11449
11450         if (isDefaultAllowed) {
11451             assert(!ZSTD_isError(basicCost));
11452             assert(!(*repeatMode == FSE_repeat_valid && ZSTD_isError(repeatCost)));
11453         }
11454         assert(!ZSTD_isError(NCountCost));
11455         assert(compressedCost < ERROR(maxCode));
11456         DEBUGLOG(5, "Estimated bit costs: basic=%u\trepeat=%u\tcompressed=%u",
11457                     (unsigned)basicCost, (unsigned)repeatCost, (unsigned)compressedCost);
11458         if (basicCost <= repeatCost && basicCost <= compressedCost) {
11459             DEBUGLOG(5, "Selected set_basic");
11460             assert(isDefaultAllowed);
11461             *repeatMode = FSE_repeat_none;
11462             return set_basic;
11463         }
11464         if (repeatCost <= compressedCost) {
11465             DEBUGLOG(5, "Selected set_repeat");
11466             assert(!ZSTD_isError(repeatCost));
11467             return set_repeat;
11468         }
11469         assert(compressedCost < basicCost && compressedCost < repeatCost);
11470     }
11471     DEBUGLOG(5, "Selected set_compressed");
11472     *repeatMode = FSE_repeat_check;
11473     return set_compressed;
11474 }
11475
11476 size_t
11477 ZSTD_buildCTable(void* dst, size_t dstCapacity,
11478                 FSE_CTable* nextCTable, U32 FSELog, symbolEncodingType_e type,
11479                 unsigned* count, U32 max,
11480                 const BYTE* codeTable, size_t nbSeq,
11481                 const S16* defaultNorm, U32 defaultNormLog, U32 defaultMax,
11482                 const FSE_CTable* prevCTable, size_t prevCTableSize,
11483                 void* entropyWorkspace, size_t entropyWorkspaceSize)
11484 {
11485     BYTE* op = (BYTE*)dst;
11486     const BYTE* const oend = op + dstCapacity;
11487     DEBUGLOG(6, "ZSTD_buildCTable (dstCapacity=%u)", (unsigned)dstCapacity);
11488
11489     switch (type) {
11490     case set_rle:
11491         FORWARD_IF_ERROR(FSE_buildCTable_rle(nextCTable, (BYTE)max), "");
11492         RETURN_ERROR_IF(dstCapacity==0, dstSize_tooSmall, "not enough space");
11493         *op = codeTable[0];
11494         return 1;
11495     case set_repeat:
11496         memcpy(nextCTable, prevCTable, prevCTableSize);
11497         return 0;
11498     case set_basic:
11499         FORWARD_IF_ERROR(FSE_buildCTable_wksp(nextCTable, defaultNorm, defaultMax, defaultNormLog, entropyWorkspace, entropyWorkspaceSize), "");  /* note : could be pre-calculated */
11500         return 0;
11501     case set_compressed: {
11502         S16 norm[MaxSeq + 1];
11503         size_t nbSeq_1 = nbSeq;
11504         const U32 tableLog = FSE_optimalTableLog(FSELog, nbSeq, max);
11505         if (count[codeTable[nbSeq-1]] > 1) {
11506             count[codeTable[nbSeq-1]]--;
11507             nbSeq_1--;
11508         }
11509         assert(nbSeq_1 > 1);
11510         FORWARD_IF_ERROR(FSE_normalizeCount(norm, tableLog, count, nbSeq_1, max), "");
11511         {   size_t const NCountSize = FSE_writeNCount(op, oend - op, norm, max, tableLog);   /* overflow protected */
11512             FORWARD_IF_ERROR(NCountSize, "FSE_writeNCount failed");
11513             FORWARD_IF_ERROR(FSE_buildCTable_wksp(nextCTable, norm, max, tableLog, entropyWorkspace, entropyWorkspaceSize), "");
11514             return NCountSize;
11515         }
11516     }
11517     default: assert(0); RETURN_ERROR(GENERIC, "impossible to reach");
11518     }
11519 }
11520
11521 FORCE_INLINE_TEMPLATE size_t
11522 ZSTD_encodeSequences_body(
11523             void* dst, size_t dstCapacity,
11524             FSE_CTable const* CTable_MatchLength, BYTE const* mlCodeTable,
11525             FSE_CTable const* CTable_OffsetBits, BYTE const* ofCodeTable,
11526             FSE_CTable const* CTable_LitLength, BYTE const* llCodeTable,
11527             seqDef const* sequences, size_t nbSeq, int longOffsets)
11528 {
11529     BIT_CStream_t blockStream;
11530     FSE_CState_t  stateMatchLength;
11531     FSE_CState_t  stateOffsetBits;
11532     FSE_CState_t  stateLitLength;
11533
11534     RETURN_ERROR_IF(
11535         ERR_isError(BIT_initCStream(&blockStream, dst, dstCapacity)),
11536         dstSize_tooSmall, "not enough space remaining");
11537     DEBUGLOG(6, "available space for bitstream : %i  (dstCapacity=%u)",
11538                 (int)(blockStream.endPtr - blockStream.startPtr),
11539                 (unsigned)dstCapacity);
11540
11541     /* first symbols */
11542     FSE_initCState2(&stateMatchLength, CTable_MatchLength, mlCodeTable[nbSeq-1]);
11543     FSE_initCState2(&stateOffsetBits,  CTable_OffsetBits,  ofCodeTable[nbSeq-1]);
11544     FSE_initCState2(&stateLitLength,   CTable_LitLength,   llCodeTable[nbSeq-1]);
11545     BIT_addBits(&blockStream, sequences[nbSeq-1].litLength, LL_bits[llCodeTable[nbSeq-1]]);
11546     if (MEM_32bits()) BIT_flushBits(&blockStream);
11547     BIT_addBits(&blockStream, sequences[nbSeq-1].matchLength, ML_bits[mlCodeTable[nbSeq-1]]);
11548     if (MEM_32bits()) BIT_flushBits(&blockStream);
11549     if (longOffsets) {
11550         U32 const ofBits = ofCodeTable[nbSeq-1];
11551         unsigned const extraBits = ofBits - MIN(ofBits, STREAM_ACCUMULATOR_MIN-1);
11552         if (extraBits) {
11553             BIT_addBits(&blockStream, sequences[nbSeq-1].offset, extraBits);
11554             BIT_flushBits(&blockStream);
11555         }
11556         BIT_addBits(&blockStream, sequences[nbSeq-1].offset >> extraBits,
11557                     ofBits - extraBits);
11558     } else {
11559         BIT_addBits(&blockStream, sequences[nbSeq-1].offset, ofCodeTable[nbSeq-1]);
11560     }
11561     BIT_flushBits(&blockStream);
11562
11563     {   size_t n;
11564         for (n=nbSeq-2 ; n<nbSeq ; n--) {      /* intentional underflow */
11565             BYTE const llCode = llCodeTable[n];
11566             BYTE const ofCode = ofCodeTable[n];
11567             BYTE const mlCode = mlCodeTable[n];
11568             U32  const llBits = LL_bits[llCode];
11569             U32  const ofBits = ofCode;
11570             U32  const mlBits = ML_bits[mlCode];
11571             DEBUGLOG(6, "encoding: litlen:%2u - matchlen:%2u - offCode:%7u",
11572                         (unsigned)sequences[n].litLength,
11573                         (unsigned)sequences[n].matchLength + MINMATCH,
11574                         (unsigned)sequences[n].offset);
11575                                                                             /* 32b*/  /* 64b*/
11576                                                                             /* (7)*/  /* (7)*/
11577             FSE_encodeSymbol(&blockStream, &stateOffsetBits, ofCode);       /* 15 */  /* 15 */
11578             FSE_encodeSymbol(&blockStream, &stateMatchLength, mlCode);      /* 24 */  /* 24 */
11579             if (MEM_32bits()) BIT_flushBits(&blockStream);                  /* (7)*/
11580             FSE_encodeSymbol(&blockStream, &stateLitLength, llCode);        /* 16 */  /* 33 */
11581             if (MEM_32bits() || (ofBits+mlBits+llBits >= 64-7-(LLFSELog+MLFSELog+OffFSELog)))
11582                 BIT_flushBits(&blockStream);                                /* (7)*/
11583             BIT_addBits(&blockStream, sequences[n].litLength, llBits);
11584             if (MEM_32bits() && ((llBits+mlBits)>24)) BIT_flushBits(&blockStream);
11585             BIT_addBits(&blockStream, sequences[n].matchLength, mlBits);
11586             if (MEM_32bits() || (ofBits+mlBits+llBits > 56)) BIT_flushBits(&blockStream);
11587             if (longOffsets) {
11588                 unsigned const extraBits = ofBits - MIN(ofBits, STREAM_ACCUMULATOR_MIN-1);
11589                 if (extraBits) {
11590                     BIT_addBits(&blockStream, sequences[n].offset, extraBits);
11591                     BIT_flushBits(&blockStream);                            /* (7)*/
11592                 }
11593                 BIT_addBits(&blockStream, sequences[n].offset >> extraBits,
11594                             ofBits - extraBits);                            /* 31 */
11595             } else {
11596                 BIT_addBits(&blockStream, sequences[n].offset, ofBits);     /* 31 */
11597             }
11598             BIT_flushBits(&blockStream);                                    /* (7)*/
11599             DEBUGLOG(7, "remaining space : %i", (int)(blockStream.endPtr - blockStream.ptr));
11600     }   }
11601
11602     DEBUGLOG(6, "ZSTD_encodeSequences: flushing ML state with %u bits", stateMatchLength.stateLog);
11603     FSE_flushCState(&blockStream, &stateMatchLength);
11604     DEBUGLOG(6, "ZSTD_encodeSequences: flushing Off state with %u bits", stateOffsetBits.stateLog);
11605     FSE_flushCState(&blockStream, &stateOffsetBits);
11606     DEBUGLOG(6, "ZSTD_encodeSequences: flushing LL state with %u bits", stateLitLength.stateLog);
11607     FSE_flushCState(&blockStream, &stateLitLength);
11608
11609     {   size_t const streamSize = BIT_closeCStream(&blockStream);
11610         RETURN_ERROR_IF(streamSize==0, dstSize_tooSmall, "not enough space");
11611         return streamSize;
11612     }
11613 }
11614
11615 static size_t
11616 ZSTD_encodeSequences_default(
11617             void* dst, size_t dstCapacity,
11618             FSE_CTable const* CTable_MatchLength, BYTE const* mlCodeTable,
11619             FSE_CTable const* CTable_OffsetBits, BYTE const* ofCodeTable,
11620             FSE_CTable const* CTable_LitLength, BYTE const* llCodeTable,
11621             seqDef const* sequences, size_t nbSeq, int longOffsets)
11622 {
11623     return ZSTD_encodeSequences_body(dst, dstCapacity,
11624                                     CTable_MatchLength, mlCodeTable,
11625                                     CTable_OffsetBits, ofCodeTable,
11626                                     CTable_LitLength, llCodeTable,
11627                                     sequences, nbSeq, longOffsets);
11628 }
11629
11630
11631 #if DYNAMIC_BMI2
11632
11633 static TARGET_ATTRIBUTE("bmi2") size_t
11634 ZSTD_encodeSequences_bmi2(
11635             void* dst, size_t dstCapacity,
11636             FSE_CTable const* CTable_MatchLength, BYTE const* mlCodeTable,
11637             FSE_CTable const* CTable_OffsetBits, BYTE const* ofCodeTable,
11638             FSE_CTable const* CTable_LitLength, BYTE const* llCodeTable,
11639             seqDef const* sequences, size_t nbSeq, int longOffsets)
11640 {
11641     return ZSTD_encodeSequences_body(dst, dstCapacity,
11642                                     CTable_MatchLength, mlCodeTable,
11643                                     CTable_OffsetBits, ofCodeTable,
11644                                     CTable_LitLength, llCodeTable,
11645                                     sequences, nbSeq, longOffsets);
11646 }
11647
11648 #endif
11649
11650 size_t ZSTD_encodeSequences(
11651             void* dst, size_t dstCapacity,
11652             FSE_CTable const* CTable_MatchLength, BYTE const* mlCodeTable,
11653             FSE_CTable const* CTable_OffsetBits, BYTE const* ofCodeTable,
11654             FSE_CTable const* CTable_LitLength, BYTE const* llCodeTable,
11655             seqDef const* sequences, size_t nbSeq, int longOffsets, int bmi2)
11656 {
11657     DEBUGLOG(5, "ZSTD_encodeSequences: dstCapacity = %u", (unsigned)dstCapacity);
11658 #if DYNAMIC_BMI2
11659     if (bmi2) {
11660         return ZSTD_encodeSequences_bmi2(dst, dstCapacity,
11661                                          CTable_MatchLength, mlCodeTable,
11662                                          CTable_OffsetBits, ofCodeTable,
11663                                          CTable_LitLength, llCodeTable,
11664                                          sequences, nbSeq, longOffsets);
11665     }
11666 #endif
11667     (void)bmi2;
11668     return ZSTD_encodeSequences_default(dst, dstCapacity,
11669                                         CTable_MatchLength, mlCodeTable,
11670                                         CTable_OffsetBits, ofCodeTable,
11671                                         CTable_LitLength, llCodeTable,
11672                                         sequences, nbSeq, longOffsets);
11673 }
11674 /**** ended inlining compress/zstd_compress_sequences.c ****/
11675 /**** start inlining compress/zstd_compress_superblock.c ****/
11676 /*
11677  * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
11678  * All rights reserved.
11679  *
11680  * This source code is licensed under both the BSD-style license (found in the
11681  * LICENSE file in the root directory of this source tree) and the GPLv2 (found
11682  * in the COPYING file in the root directory of this source tree).
11683  * You may select, at your option, one of the above-listed licenses.
11684  */
11685
11686  /*-*************************************
11687  *  Dependencies
11688  ***************************************/
11689 /**** start inlining zstd_compress_superblock.h ****/
11690 /*
11691  * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
11692  * All rights reserved.
11693  *
11694  * This source code is licensed under both the BSD-style license (found in the
11695  * LICENSE file in the root directory of this source tree) and the GPLv2 (found
11696  * in the COPYING file in the root directory of this source tree).
11697  * You may select, at your option, one of the above-listed licenses.
11698  */
11699
11700 #ifndef ZSTD_COMPRESS_ADVANCED_H
11701 #define ZSTD_COMPRESS_ADVANCED_H
11702
11703 /*-*************************************
11704 *  Dependencies
11705 ***************************************/
11706
11707 /**** skipping file: ../zstd.h ****/
11708
11709 /*-*************************************
11710 *  Target Compressed Block Size
11711 ***************************************/
11712
11713 /* ZSTD_compressSuperBlock() :
11714  * Used to compress a super block when targetCBlockSize is being used.
11715  * The given block will be compressed into multiple sub blocks that are around targetCBlockSize. */
11716 size_t ZSTD_compressSuperBlock(ZSTD_CCtx* zc,
11717                                void* dst, size_t dstCapacity,
11718                                void const* src, size_t srcSize,
11719                                unsigned lastBlock);
11720
11721 #endif /* ZSTD_COMPRESS_ADVANCED_H */
11722 /**** ended inlining zstd_compress_superblock.h ****/
11723
11724 /**** skipping file: ../common/zstd_internal.h ****/
11725 /**** skipping file: hist.h ****/
11726 /**** skipping file: zstd_compress_internal.h ****/
11727 /**** skipping file: zstd_compress_sequences.h ****/
11728 /**** skipping file: zstd_compress_literals.h ****/
11729
11730 /*-*************************************
11731 *  Superblock entropy buffer structs
11732 ***************************************/
11733 /** ZSTD_hufCTablesMetadata_t :
11734  *  Stores Literals Block Type for a super-block in hType, and
11735  *  huffman tree description in hufDesBuffer.
11736  *  hufDesSize refers to the size of huffman tree description in bytes.
11737  *  This metadata is populated in ZSTD_buildSuperBlockEntropy_literal() */
11738 typedef struct {
11739     symbolEncodingType_e hType;
11740     BYTE hufDesBuffer[500]; /* TODO give name to this value */
11741     size_t hufDesSize;
11742 } ZSTD_hufCTablesMetadata_t;
11743
11744 /** ZSTD_fseCTablesMetadata_t :
11745  *  Stores symbol compression modes for a super-block in {ll, ol, ml}Type, and
11746  *  fse tables in fseTablesBuffer.
11747  *  fseTablesSize refers to the size of fse tables in bytes.
11748  *  This metadata is populated in ZSTD_buildSuperBlockEntropy_sequences() */
11749 typedef struct {
11750     symbolEncodingType_e llType;
11751     symbolEncodingType_e ofType;
11752     symbolEncodingType_e mlType;
11753     BYTE fseTablesBuffer[500]; /* TODO give name to this value */
11754     size_t fseTablesSize;
11755     size_t lastCountSize; /* This is to account for bug in 1.3.4. More detail in ZSTD_compressSubBlock_sequences() */
11756 } ZSTD_fseCTablesMetadata_t;
11757
11758 typedef struct {
11759     ZSTD_hufCTablesMetadata_t hufMetadata;
11760     ZSTD_fseCTablesMetadata_t fseMetadata;
11761 } ZSTD_entropyCTablesMetadata_t;
11762
11763
11764 /** ZSTD_buildSuperBlockEntropy_literal() :
11765  *  Builds entropy for the super-block literals.
11766  *  Stores literals block type (raw, rle, compressed, repeat) and
11767  *  huffman description table to hufMetadata.
11768  *  @return : size of huffman description table or error code */
11769 static size_t ZSTD_buildSuperBlockEntropy_literal(void* const src, size_t srcSize,
11770                                             const ZSTD_hufCTables_t* prevHuf,
11771                                                   ZSTD_hufCTables_t* nextHuf,
11772                                                   ZSTD_hufCTablesMetadata_t* hufMetadata,
11773                                                   const int disableLiteralsCompression,
11774                                                   void* workspace, size_t wkspSize)
11775 {
11776     BYTE* const wkspStart = (BYTE*)workspace;
11777     BYTE* const wkspEnd = wkspStart + wkspSize;
11778     BYTE* const countWkspStart = wkspStart;
11779     unsigned* const countWksp = (unsigned*)workspace;
11780     const size_t countWkspSize = (HUF_SYMBOLVALUE_MAX + 1) * sizeof(unsigned);
11781     BYTE* const nodeWksp = countWkspStart + countWkspSize;
11782     const size_t nodeWkspSize = wkspEnd-nodeWksp;
11783     unsigned maxSymbolValue = 255;
11784     unsigned huffLog = HUF_TABLELOG_DEFAULT;
11785     HUF_repeat repeat = prevHuf->repeatMode;
11786
11787     DEBUGLOG(5, "ZSTD_buildSuperBlockEntropy_literal (srcSize=%zu)", srcSize);
11788
11789     /* Prepare nextEntropy assuming reusing the existing table */
11790     memcpy(nextHuf, prevHuf, sizeof(*prevHuf));
11791
11792     if (disableLiteralsCompression) {
11793         DEBUGLOG(5, "set_basic - disabled");
11794         hufMetadata->hType = set_basic;
11795         return 0;
11796     }
11797
11798     /* small ? don't even attempt compression (speed opt) */
11799 #   define COMPRESS_LITERALS_SIZE_MIN 63
11800     {   size_t const minLitSize = (prevHuf->repeatMode == HUF_repeat_valid) ? 6 : COMPRESS_LITERALS_SIZE_MIN;
11801         if (srcSize <= minLitSize) {
11802             DEBUGLOG(5, "set_basic - too small");
11803             hufMetadata->hType = set_basic;
11804             return 0;
11805         }
11806     }
11807
11808     /* Scan input and build symbol stats */
11809     {   size_t const largest = HIST_count_wksp (countWksp, &maxSymbolValue, (const BYTE*)src, srcSize, workspace, wkspSize);
11810         FORWARD_IF_ERROR(largest, "HIST_count_wksp failed");
11811         if (largest == srcSize) {
11812             DEBUGLOG(5, "set_rle");
11813             hufMetadata->hType = set_rle;
11814             return 0;
11815         }
11816         if (largest <= (srcSize >> 7)+4) {
11817             DEBUGLOG(5, "set_basic - no gain");
11818             hufMetadata->hType = set_basic;
11819             return 0;
11820         }
11821     }
11822
11823     /* Validate the previous Huffman table */
11824     if (repeat == HUF_repeat_check && !HUF_validateCTable((HUF_CElt const*)prevHuf->CTable, countWksp, maxSymbolValue)) {
11825         repeat = HUF_repeat_none;
11826     }
11827
11828     /* Build Huffman Tree */
11829     memset(nextHuf->CTable, 0, sizeof(nextHuf->CTable));
11830     huffLog = HUF_optimalTableLog(huffLog, srcSize, maxSymbolValue);
11831     {   size_t const maxBits = HUF_buildCTable_wksp((HUF_CElt*)nextHuf->CTable, countWksp,
11832                                                     maxSymbolValue, huffLog,
11833                                                     nodeWksp, nodeWkspSize);
11834         FORWARD_IF_ERROR(maxBits, "HUF_buildCTable_wksp");
11835         huffLog = (U32)maxBits;
11836         {   /* Build and write the CTable */
11837             size_t const newCSize = HUF_estimateCompressedSize(
11838                     (HUF_CElt*)nextHuf->CTable, countWksp, maxSymbolValue);
11839             size_t const hSize = HUF_writeCTable(
11840                     hufMetadata->hufDesBuffer, sizeof(hufMetadata->hufDesBuffer),
11841                     (HUF_CElt*)nextHuf->CTable, maxSymbolValue, huffLog);
11842             /* Check against repeating the previous CTable */
11843             if (repeat != HUF_repeat_none) {
11844                 size_t const oldCSize = HUF_estimateCompressedSize(
11845                         (HUF_CElt const*)prevHuf->CTable, countWksp, maxSymbolValue);
11846                 if (oldCSize < srcSize && (oldCSize <= hSize + newCSize || hSize + 12 >= srcSize)) {
11847                     DEBUGLOG(5, "set_repeat - smaller");
11848                     memcpy(nextHuf, prevHuf, sizeof(*prevHuf));
11849                     hufMetadata->hType = set_repeat;
11850                     return 0;
11851                 }
11852             }
11853             if (newCSize + hSize >= srcSize) {
11854                 DEBUGLOG(5, "set_basic - no gains");
11855                 memcpy(nextHuf, prevHuf, sizeof(*prevHuf));
11856                 hufMetadata->hType = set_basic;
11857                 return 0;
11858             }
11859             DEBUGLOG(5, "set_compressed (hSize=%u)", (U32)hSize);
11860             hufMetadata->hType = set_compressed;
11861             nextHuf->repeatMode = HUF_repeat_check;
11862             return hSize;
11863         }
11864     }
11865 }
11866
11867 /** ZSTD_buildSuperBlockEntropy_sequences() :
11868  *  Builds entropy for the super-block sequences.
11869  *  Stores symbol compression modes and fse table to fseMetadata.
11870  *  @return : size of fse tables or error code */
11871 static size_t ZSTD_buildSuperBlockEntropy_sequences(seqStore_t* seqStorePtr,
11872                                               const ZSTD_fseCTables_t* prevEntropy,
11873                                                     ZSTD_fseCTables_t* nextEntropy,
11874                                               const ZSTD_CCtx_params* cctxParams,
11875                                                     ZSTD_fseCTablesMetadata_t* fseMetadata,
11876                                                     void* workspace, size_t wkspSize)
11877 {
11878     BYTE* const wkspStart = (BYTE*)workspace;
11879     BYTE* const wkspEnd = wkspStart + wkspSize;
11880     BYTE* const countWkspStart = wkspStart;
11881     unsigned* const countWksp = (unsigned*)workspace;
11882     const size_t countWkspSize = (MaxSeq + 1) * sizeof(unsigned);
11883     BYTE* const cTableWksp = countWkspStart + countWkspSize;
11884     const size_t cTableWkspSize = wkspEnd-cTableWksp;
11885     ZSTD_strategy const strategy = cctxParams->cParams.strategy;
11886     FSE_CTable* CTable_LitLength = nextEntropy->litlengthCTable;
11887     FSE_CTable* CTable_OffsetBits = nextEntropy->offcodeCTable;
11888     FSE_CTable* CTable_MatchLength = nextEntropy->matchlengthCTable;
11889     const BYTE* const ofCodeTable = seqStorePtr->ofCode;
11890     const BYTE* const llCodeTable = seqStorePtr->llCode;
11891     const BYTE* const mlCodeTable = seqStorePtr->mlCode;
11892     size_t const nbSeq = seqStorePtr->sequences - seqStorePtr->sequencesStart;
11893     BYTE* const ostart = fseMetadata->fseTablesBuffer;
11894     BYTE* const oend = ostart + sizeof(fseMetadata->fseTablesBuffer);
11895     BYTE* op = ostart;
11896
11897     assert(cTableWkspSize >= (1 << MaxFSELog) * sizeof(FSE_FUNCTION_TYPE));
11898     DEBUGLOG(5, "ZSTD_buildSuperBlockEntropy_sequences (nbSeq=%zu)", nbSeq);
11899     memset(workspace, 0, wkspSize);
11900
11901     fseMetadata->lastCountSize = 0;
11902     /* convert length/distances into codes */
11903     ZSTD_seqToCodes(seqStorePtr);
11904     /* build CTable for Literal Lengths */
11905     {   U32 LLtype;
11906         unsigned max = MaxLL;
11907         size_t const mostFrequent = HIST_countFast_wksp(countWksp, &max, llCodeTable, nbSeq, workspace, wkspSize);  /* can't fail */
11908         DEBUGLOG(5, "Building LL table");
11909         nextEntropy->litlength_repeatMode = prevEntropy->litlength_repeatMode;
11910         LLtype = ZSTD_selectEncodingType(&nextEntropy->litlength_repeatMode,
11911                                         countWksp, max, mostFrequent, nbSeq,
11912                                         LLFSELog, prevEntropy->litlengthCTable,
11913                                         LL_defaultNorm, LL_defaultNormLog,
11914                                         ZSTD_defaultAllowed, strategy);
11915         assert(set_basic < set_compressed && set_rle < set_compressed);
11916         assert(!(LLtype < set_compressed && nextEntropy->litlength_repeatMode != FSE_repeat_none)); /* We don't copy tables */
11917         {   size_t const countSize = ZSTD_buildCTable(op, oend - op, CTable_LitLength, LLFSELog, (symbolEncodingType_e)LLtype,
11918                                                     countWksp, max, llCodeTable, nbSeq, LL_defaultNorm, LL_defaultNormLog, MaxLL,
11919                                                     prevEntropy->litlengthCTable, sizeof(prevEntropy->litlengthCTable),
11920                                                     cTableWksp, cTableWkspSize);
11921             FORWARD_IF_ERROR(countSize, "ZSTD_buildCTable for LitLens failed");
11922             if (LLtype == set_compressed)
11923                 fseMetadata->lastCountSize = countSize;
11924             op += countSize;
11925             fseMetadata->llType = (symbolEncodingType_e) LLtype;
11926     }   }
11927     /* build CTable for Offsets */
11928     {   U32 Offtype;
11929         unsigned max = MaxOff;
11930         size_t const mostFrequent = HIST_countFast_wksp(countWksp, &max, ofCodeTable, nbSeq, workspace, wkspSize);  /* can't fail */
11931         /* We can only use the basic table if max <= DefaultMaxOff, otherwise the offsets are too large */
11932         ZSTD_defaultPolicy_e const defaultPolicy = (max <= DefaultMaxOff) ? ZSTD_defaultAllowed : ZSTD_defaultDisallowed;
11933         DEBUGLOG(5, "Building OF table");
11934         nextEntropy->offcode_repeatMode = prevEntropy->offcode_repeatMode;
11935         Offtype = ZSTD_selectEncodingType(&nextEntropy->offcode_repeatMode,
11936                                         countWksp, max, mostFrequent, nbSeq,
11937                                         OffFSELog, prevEntropy->offcodeCTable,
11938                                         OF_defaultNorm, OF_defaultNormLog,
11939                                         defaultPolicy, strategy);
11940         assert(!(Offtype < set_compressed && nextEntropy->offcode_repeatMode != FSE_repeat_none)); /* We don't copy tables */
11941         {   size_t const countSize = ZSTD_buildCTable(op, oend - op, CTable_OffsetBits, OffFSELog, (symbolEncodingType_e)Offtype,
11942                                                     countWksp, max, ofCodeTable, nbSeq, OF_defaultNorm, OF_defaultNormLog, DefaultMaxOff,
11943                                                     prevEntropy->offcodeCTable, sizeof(prevEntropy->offcodeCTable),
11944                                                     cTableWksp, cTableWkspSize);
11945             FORWARD_IF_ERROR(countSize, "ZSTD_buildCTable for Offsets failed");
11946             if (Offtype == set_compressed)
11947                 fseMetadata->lastCountSize = countSize;
11948             op += countSize;
11949             fseMetadata->ofType = (symbolEncodingType_e) Offtype;
11950     }   }
11951     /* build CTable for MatchLengths */
11952     {   U32 MLtype;
11953         unsigned max = MaxML;
11954         size_t const mostFrequent = HIST_countFast_wksp(countWksp, &max, mlCodeTable, nbSeq, workspace, wkspSize);   /* can't fail */
11955         DEBUGLOG(5, "Building ML table (remaining space : %i)", (int)(oend-op));
11956         nextEntropy->matchlength_repeatMode = prevEntropy->matchlength_repeatMode;
11957         MLtype = ZSTD_selectEncodingType(&nextEntropy->matchlength_repeatMode,
11958                                         countWksp, max, mostFrequent, nbSeq,
11959                                         MLFSELog, prevEntropy->matchlengthCTable,
11960                                         ML_defaultNorm, ML_defaultNormLog,
11961                                         ZSTD_defaultAllowed, strategy);
11962         assert(!(MLtype < set_compressed && nextEntropy->matchlength_repeatMode != FSE_repeat_none)); /* We don't copy tables */
11963         {   size_t const countSize = ZSTD_buildCTable(op, oend - op, CTable_MatchLength, MLFSELog, (symbolEncodingType_e)MLtype,
11964                                                     countWksp, max, mlCodeTable, nbSeq, ML_defaultNorm, ML_defaultNormLog, MaxML,
11965                                                     prevEntropy->matchlengthCTable, sizeof(prevEntropy->matchlengthCTable),
11966                                                     cTableWksp, cTableWkspSize);
11967             FORWARD_IF_ERROR(countSize, "ZSTD_buildCTable for MatchLengths failed");
11968             if (MLtype == set_compressed)
11969                 fseMetadata->lastCountSize = countSize;
11970             op += countSize;
11971             fseMetadata->mlType = (symbolEncodingType_e) MLtype;
11972     }   }
11973     assert((size_t) (op-ostart) <= sizeof(fseMetadata->fseTablesBuffer));
11974     return op-ostart;
11975 }
11976
11977
11978 /** ZSTD_buildSuperBlockEntropy() :
11979  *  Builds entropy for the super-block.
11980  *  @return : 0 on success or error code */
11981 static size_t
11982 ZSTD_buildSuperBlockEntropy(seqStore_t* seqStorePtr,
11983                       const ZSTD_entropyCTables_t* prevEntropy,
11984                             ZSTD_entropyCTables_t* nextEntropy,
11985                       const ZSTD_CCtx_params* cctxParams,
11986                             ZSTD_entropyCTablesMetadata_t* entropyMetadata,
11987                             void* workspace, size_t wkspSize)
11988 {
11989     size_t const litSize = seqStorePtr->lit - seqStorePtr->litStart;
11990     DEBUGLOG(5, "ZSTD_buildSuperBlockEntropy");
11991     entropyMetadata->hufMetadata.hufDesSize =
11992         ZSTD_buildSuperBlockEntropy_literal(seqStorePtr->litStart, litSize,
11993                                             &prevEntropy->huf, &nextEntropy->huf,
11994                                             &entropyMetadata->hufMetadata,
11995                                             ZSTD_disableLiteralsCompression(cctxParams),
11996                                             workspace, wkspSize);
11997     FORWARD_IF_ERROR(entropyMetadata->hufMetadata.hufDesSize, "ZSTD_buildSuperBlockEntropy_literal failed");
11998     entropyMetadata->fseMetadata.fseTablesSize =
11999         ZSTD_buildSuperBlockEntropy_sequences(seqStorePtr,
12000                                               &prevEntropy->fse, &nextEntropy->fse,
12001                                               cctxParams,
12002                                               &entropyMetadata->fseMetadata,
12003                                               workspace, wkspSize);
12004     FORWARD_IF_ERROR(entropyMetadata->fseMetadata.fseTablesSize, "ZSTD_buildSuperBlockEntropy_sequences failed");
12005     return 0;
12006 }
12007
12008 /** ZSTD_compressSubBlock_literal() :
12009  *  Compresses literals section for a sub-block.
12010  *  When we have to write the Huffman table we will sometimes choose a header
12011  *  size larger than necessary. This is because we have to pick the header size
12012  *  before we know the table size + compressed size, so we have a bound on the
12013  *  table size. If we guessed incorrectly, we fall back to uncompressed literals.
12014  *
12015  *  We write the header when writeEntropy=1 and set entropyWrriten=1 when we succeeded
12016  *  in writing the header, otherwise it is set to 0.
12017  *
12018  *  hufMetadata->hType has literals block type info.
12019  *      If it is set_basic, all sub-blocks literals section will be Raw_Literals_Block.
12020  *      If it is set_rle, all sub-blocks literals section will be RLE_Literals_Block.
12021  *      If it is set_compressed, first sub-block's literals section will be Compressed_Literals_Block
12022  *      If it is set_compressed, first sub-block's literals section will be Treeless_Literals_Block
12023  *      and the following sub-blocks' literals sections will be Treeless_Literals_Block.
12024  *  @return : compressed size of literals section of a sub-block
12025  *            Or 0 if it unable to compress.
12026  *            Or error code */
12027 static size_t ZSTD_compressSubBlock_literal(const HUF_CElt* hufTable,
12028                                     const ZSTD_hufCTablesMetadata_t* hufMetadata,
12029                                     const BYTE* literals, size_t litSize,
12030                                     void* dst, size_t dstSize,
12031                                     const int bmi2, int writeEntropy, int* entropyWritten)
12032 {
12033     size_t const header = writeEntropy ? 200 : 0;
12034     size_t const lhSize = 3 + (litSize >= (1 KB - header)) + (litSize >= (16 KB - header));
12035     BYTE* const ostart = (BYTE*)dst;
12036     BYTE* const oend = ostart + dstSize;
12037     BYTE* op = ostart + lhSize;
12038     U32 const singleStream = lhSize == 3;
12039     symbolEncodingType_e hType = writeEntropy ? hufMetadata->hType : set_repeat;
12040     size_t cLitSize = 0;
12041
12042     (void)bmi2; /* TODO bmi2... */
12043
12044     DEBUGLOG(5, "ZSTD_compressSubBlock_literal (litSize=%zu, lhSize=%zu, writeEntropy=%d)", litSize, lhSize, writeEntropy);
12045
12046     *entropyWritten = 0;
12047     if (litSize == 0 || hufMetadata->hType == set_basic) {
12048       DEBUGLOG(5, "ZSTD_compressSubBlock_literal using raw literal");
12049       return ZSTD_noCompressLiterals(dst, dstSize, literals, litSize);
12050     } else if (hufMetadata->hType == set_rle) {
12051       DEBUGLOG(5, "ZSTD_compressSubBlock_literal using rle literal");
12052       return ZSTD_compressRleLiteralsBlock(dst, dstSize, literals, litSize);
12053     }
12054
12055     assert(litSize > 0);
12056     assert(hufMetadata->hType == set_compressed || hufMetadata->hType == set_repeat);
12057
12058     if (writeEntropy && hufMetadata->hType == set_compressed) {
12059         memcpy(op, hufMetadata->hufDesBuffer, hufMetadata->hufDesSize);
12060         op += hufMetadata->hufDesSize;
12061         cLitSize += hufMetadata->hufDesSize;
12062         DEBUGLOG(5, "ZSTD_compressSubBlock_literal (hSize=%zu)", hufMetadata->hufDesSize);
12063     }
12064
12065     /* TODO bmi2 */
12066     {   const size_t cSize = singleStream ? HUF_compress1X_usingCTable(op, oend-op, literals, litSize, hufTable)
12067                                           : HUF_compress4X_usingCTable(op, oend-op, literals, litSize, hufTable);
12068         op += cSize;
12069         cLitSize += cSize;
12070         if (cSize == 0 || ERR_isError(cSize)) {
12071             DEBUGLOG(5, "Failed to write entropy tables %s", ZSTD_getErrorName(cSize));
12072             return 0;
12073         }
12074         /* If we expand and we aren't writing a header then emit uncompressed */
12075         if (!writeEntropy && cLitSize >= litSize) {
12076             DEBUGLOG(5, "ZSTD_compressSubBlock_literal using raw literal because uncompressible");
12077             return ZSTD_noCompressLiterals(dst, dstSize, literals, litSize);
12078         }
12079         /* If we are writing headers then allow expansion that doesn't change our header size. */
12080         if (lhSize < (size_t)(3 + (cLitSize >= 1 KB) + (cLitSize >= 16 KB))) {
12081             assert(cLitSize > litSize);
12082             DEBUGLOG(5, "Literals expanded beyond allowed header size");
12083             return ZSTD_noCompressLiterals(dst, dstSize, literals, litSize);
12084         }
12085         DEBUGLOG(5, "ZSTD_compressSubBlock_literal (cSize=%zu)", cSize);
12086     }
12087
12088     /* Build header */
12089     switch(lhSize)
12090     {
12091     case 3: /* 2 - 2 - 10 - 10 */
12092         {   U32 const lhc = hType + ((!singleStream) << 2) + ((U32)litSize<<4) + ((U32)cLitSize<<14);
12093             MEM_writeLE24(ostart, lhc);
12094             break;
12095         }
12096     case 4: /* 2 - 2 - 14 - 14 */
12097         {   U32 const lhc = hType + (2 << 2) + ((U32)litSize<<4) + ((U32)cLitSize<<18);
12098             MEM_writeLE32(ostart, lhc);
12099             break;
12100         }
12101     case 5: /* 2 - 2 - 18 - 18 */
12102         {   U32 const lhc = hType + (3 << 2) + ((U32)litSize<<4) + ((U32)cLitSize<<22);
12103             MEM_writeLE32(ostart, lhc);
12104             ostart[4] = (BYTE)(cLitSize >> 10);
12105             break;
12106         }
12107     default:  /* not possible : lhSize is {3,4,5} */
12108         assert(0);
12109     }
12110     *entropyWritten = 1;
12111     DEBUGLOG(5, "Compressed literals: %u -> %u", (U32)litSize, (U32)(op-ostart));
12112     return op-ostart;
12113 }
12114
12115 static size_t ZSTD_seqDecompressedSize(seqStore_t const* seqStore, const seqDef* sequences, size_t nbSeq, size_t litSize, int lastSequence) {
12116     const seqDef* const sstart = sequences;
12117     const seqDef* const send = sequences + nbSeq;
12118     const seqDef* sp = sstart;
12119     size_t matchLengthSum = 0;
12120     size_t litLengthSum = 0;
12121     while (send-sp > 0) {
12122         ZSTD_sequenceLength const seqLen = ZSTD_getSequenceLength(seqStore, sp);
12123         litLengthSum += seqLen.litLength;
12124         matchLengthSum += seqLen.matchLength;
12125         sp++;
12126     }
12127     assert(litLengthSum <= litSize);
12128     if (!lastSequence) {
12129         assert(litLengthSum == litSize);
12130     }
12131     return matchLengthSum + litSize;
12132 }
12133
12134 /** ZSTD_compressSubBlock_sequences() :
12135  *  Compresses sequences section for a sub-block.
12136  *  fseMetadata->llType, fseMetadata->ofType, and fseMetadata->mlType have
12137  *  symbol compression modes for the super-block.
12138  *  The first successfully compressed block will have these in its header.
12139  *  We set entropyWritten=1 when we succeed in compressing the sequences.
12140  *  The following sub-blocks will always have repeat mode.
12141  *  @return : compressed size of sequences section of a sub-block
12142  *            Or 0 if it is unable to compress
12143  *            Or error code. */
12144 static size_t ZSTD_compressSubBlock_sequences(const ZSTD_fseCTables_t* fseTables,
12145                                               const ZSTD_fseCTablesMetadata_t* fseMetadata,
12146                                               const seqDef* sequences, size_t nbSeq,
12147                                               const BYTE* llCode, const BYTE* mlCode, const BYTE* ofCode,
12148                                               const ZSTD_CCtx_params* cctxParams,
12149                                               void* dst, size_t dstCapacity,
12150                                               const int bmi2, int writeEntropy, int* entropyWritten)
12151 {
12152     const int longOffsets = cctxParams->cParams.windowLog > STREAM_ACCUMULATOR_MIN;
12153     BYTE* const ostart = (BYTE*)dst;
12154     BYTE* const oend = ostart + dstCapacity;
12155     BYTE* op = ostart;
12156     BYTE* seqHead;
12157
12158     DEBUGLOG(5, "ZSTD_compressSubBlock_sequences (nbSeq=%zu, writeEntropy=%d, longOffsets=%d)", nbSeq, writeEntropy, longOffsets);
12159
12160     *entropyWritten = 0;
12161     /* Sequences Header */
12162     RETURN_ERROR_IF((oend-op) < 3 /*max nbSeq Size*/ + 1 /*seqHead*/,
12163                     dstSize_tooSmall, "");
12164     if (nbSeq < 0x7F)
12165         *op++ = (BYTE)nbSeq;
12166     else if (nbSeq < LONGNBSEQ)
12167         op[0] = (BYTE)((nbSeq>>8) + 0x80), op[1] = (BYTE)nbSeq, op+=2;
12168     else
12169         op[0]=0xFF, MEM_writeLE16(op+1, (U16)(nbSeq - LONGNBSEQ)), op+=3;
12170     if (nbSeq==0) {
12171         return op - ostart;
12172     }
12173
12174     /* seqHead : flags for FSE encoding type */
12175     seqHead = op++;
12176
12177     DEBUGLOG(5, "ZSTD_compressSubBlock_sequences (seqHeadSize=%u)", (unsigned)(op-ostart));
12178
12179     if (writeEntropy) {
12180         const U32 LLtype = fseMetadata->llType;
12181         const U32 Offtype = fseMetadata->ofType;
12182         const U32 MLtype = fseMetadata->mlType;
12183         DEBUGLOG(5, "ZSTD_compressSubBlock_sequences (fseTablesSize=%zu)", fseMetadata->fseTablesSize);
12184         *seqHead = (BYTE)((LLtype<<6) + (Offtype<<4) + (MLtype<<2));
12185         memcpy(op, fseMetadata->fseTablesBuffer, fseMetadata->fseTablesSize);
12186         op += fseMetadata->fseTablesSize;
12187     } else {
12188         const U32 repeat = set_repeat;
12189         *seqHead = (BYTE)((repeat<<6) + (repeat<<4) + (repeat<<2));
12190     }
12191
12192     {   size_t const bitstreamSize = ZSTD_encodeSequences(
12193                                         op, oend - op,
12194                                         fseTables->matchlengthCTable, mlCode,
12195                                         fseTables->offcodeCTable, ofCode,
12196                                         fseTables->litlengthCTable, llCode,
12197                                         sequences, nbSeq,
12198                                         longOffsets, bmi2);
12199         FORWARD_IF_ERROR(bitstreamSize, "ZSTD_encodeSequences failed");
12200         op += bitstreamSize;
12201         /* zstd versions <= 1.3.4 mistakenly report corruption when
12202          * FSE_readNCount() receives a buffer < 4 bytes.
12203          * Fixed by https://github.com/facebook/zstd/pull/1146.
12204          * This can happen when the last set_compressed table present is 2
12205          * bytes and the bitstream is only one byte.
12206          * In this exceedingly rare case, we will simply emit an uncompressed
12207          * block, since it isn't worth optimizing.
12208          */
12209 #ifndef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
12210         if (writeEntropy && fseMetadata->lastCountSize && fseMetadata->lastCountSize + bitstreamSize < 4) {
12211             /* NCountSize >= 2 && bitstreamSize > 0 ==> lastCountSize == 3 */
12212             assert(fseMetadata->lastCountSize + bitstreamSize == 3);
12213             DEBUGLOG(5, "Avoiding bug in zstd decoder in versions <= 1.3.4 by "
12214                         "emitting an uncompressed block.");
12215             return 0;
12216         }
12217 #endif
12218         DEBUGLOG(5, "ZSTD_compressSubBlock_sequences (bitstreamSize=%zu)", bitstreamSize);
12219     }
12220
12221     /* zstd versions <= 1.4.0 mistakenly report error when
12222      * sequences section body size is less than 3 bytes.
12223      * Fixed by https://github.com/facebook/zstd/pull/1664.
12224      * This can happen when the previous sequences section block is compressed
12225      * with rle mode and the current block's sequences section is compressed
12226      * with repeat mode where sequences section body size can be 1 byte.
12227      */
12228 #ifndef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
12229     if (op-seqHead < 4) {
12230         DEBUGLOG(5, "Avoiding bug in zstd decoder in versions <= 1.4.0 by emitting "
12231                     "an uncompressed block when sequences are < 4 bytes");
12232         return 0;
12233     }
12234 #endif
12235
12236     *entropyWritten = 1;
12237     return op - ostart;
12238 }
12239
12240 /** ZSTD_compressSubBlock() :
12241  *  Compresses a single sub-block.
12242  *  @return : compressed size of the sub-block
12243  *            Or 0 if it failed to compress. */
12244 static size_t ZSTD_compressSubBlock(const ZSTD_entropyCTables_t* entropy,
12245                                     const ZSTD_entropyCTablesMetadata_t* entropyMetadata,
12246                                     const seqDef* sequences, size_t nbSeq,
12247                                     const BYTE* literals, size_t litSize,
12248                                     const BYTE* llCode, const BYTE* mlCode, const BYTE* ofCode,
12249                                     const ZSTD_CCtx_params* cctxParams,
12250                                     void* dst, size_t dstCapacity,
12251                                     const int bmi2,
12252                                     int writeLitEntropy, int writeSeqEntropy,
12253                                     int* litEntropyWritten, int* seqEntropyWritten,
12254                                     U32 lastBlock)
12255 {
12256     BYTE* const ostart = (BYTE*)dst;
12257     BYTE* const oend = ostart + dstCapacity;
12258     BYTE* op = ostart + ZSTD_blockHeaderSize;
12259     DEBUGLOG(5, "ZSTD_compressSubBlock (litSize=%zu, nbSeq=%zu, writeLitEntropy=%d, writeSeqEntropy=%d, lastBlock=%d)",
12260                 litSize, nbSeq, writeLitEntropy, writeSeqEntropy, lastBlock);
12261     {   size_t cLitSize = ZSTD_compressSubBlock_literal((const HUF_CElt*)entropy->huf.CTable,
12262                                                         &entropyMetadata->hufMetadata, literals, litSize,
12263                                                         op, oend-op, bmi2, writeLitEntropy, litEntropyWritten);
12264         FORWARD_IF_ERROR(cLitSize, "ZSTD_compressSubBlock_literal failed");
12265         if (cLitSize == 0) return 0;
12266         op += cLitSize;
12267     }
12268     {   size_t cSeqSize = ZSTD_compressSubBlock_sequences(&entropy->fse,
12269                                                   &entropyMetadata->fseMetadata,
12270                                                   sequences, nbSeq,
12271                                                   llCode, mlCode, ofCode,
12272                                                   cctxParams,
12273                                                   op, oend-op,
12274                                                   bmi2, writeSeqEntropy, seqEntropyWritten);
12275         FORWARD_IF_ERROR(cSeqSize, "ZSTD_compressSubBlock_sequences failed");
12276         if (cSeqSize == 0) return 0;
12277         op += cSeqSize;
12278     }
12279     /* Write block header */
12280     {   size_t cSize = (op-ostart)-ZSTD_blockHeaderSize;
12281         U32 const cBlockHeader24 = lastBlock + (((U32)bt_compressed)<<1) + (U32)(cSize << 3);
12282         MEM_writeLE24(ostart, cBlockHeader24);
12283     }
12284     return op-ostart;
12285 }
12286
12287 static size_t ZSTD_estimateSubBlockSize_literal(const BYTE* literals, size_t litSize,
12288                                                 const ZSTD_hufCTables_t* huf,
12289                                                 const ZSTD_hufCTablesMetadata_t* hufMetadata,
12290                                                 void* workspace, size_t wkspSize,
12291                                                 int writeEntropy)
12292 {
12293     unsigned* const countWksp = (unsigned*)workspace;
12294     unsigned maxSymbolValue = 255;
12295     size_t literalSectionHeaderSize = 3; /* Use hard coded size of 3 bytes */
12296
12297     if (hufMetadata->hType == set_basic) return litSize;
12298     else if (hufMetadata->hType == set_rle) return 1;
12299     else if (hufMetadata->hType == set_compressed || hufMetadata->hType == set_repeat) {
12300         size_t const largest = HIST_count_wksp (countWksp, &maxSymbolValue, (const BYTE*)literals, litSize, workspace, wkspSize);
12301         if (ZSTD_isError(largest)) return litSize;
12302         {   size_t cLitSizeEstimate = HUF_estimateCompressedSize((const HUF_CElt*)huf->CTable, countWksp, maxSymbolValue);
12303             if (writeEntropy) cLitSizeEstimate += hufMetadata->hufDesSize;
12304             return cLitSizeEstimate + literalSectionHeaderSize;
12305     }   }
12306     assert(0); /* impossible */
12307     return 0;
12308 }
12309
12310 static size_t ZSTD_estimateSubBlockSize_symbolType(symbolEncodingType_e type,
12311                         const BYTE* codeTable, unsigned maxCode,
12312                         size_t nbSeq, const FSE_CTable* fseCTable,
12313                         const U32* additionalBits,
12314                         short const* defaultNorm, U32 defaultNormLog,
12315                         void* workspace, size_t wkspSize)
12316 {
12317     unsigned* const countWksp = (unsigned*)workspace;
12318     const BYTE* ctp = codeTable;
12319     const BYTE* const ctStart = ctp;
12320     const BYTE* const ctEnd = ctStart + nbSeq;
12321     size_t cSymbolTypeSizeEstimateInBits = 0;
12322     unsigned max = maxCode;
12323
12324     HIST_countFast_wksp(countWksp, &max, codeTable, nbSeq, workspace, wkspSize);  /* can't fail */
12325     if (type == set_basic) {
12326         cSymbolTypeSizeEstimateInBits = ZSTD_crossEntropyCost(defaultNorm, defaultNormLog, countWksp, max);
12327     } else if (type == set_rle) {
12328         cSymbolTypeSizeEstimateInBits = 0;
12329     } else if (type == set_compressed || type == set_repeat) {
12330         cSymbolTypeSizeEstimateInBits = ZSTD_fseBitCost(fseCTable, countWksp, max);
12331     }
12332     if (ZSTD_isError(cSymbolTypeSizeEstimateInBits)) return nbSeq * 10;
12333     while (ctp < ctEnd) {
12334         if (additionalBits) cSymbolTypeSizeEstimateInBits += additionalBits[*ctp];
12335         else cSymbolTypeSizeEstimateInBits += *ctp; /* for offset, offset code is also the number of additional bits */
12336         ctp++;
12337     }
12338     return cSymbolTypeSizeEstimateInBits / 8;
12339 }
12340
12341 static size_t ZSTD_estimateSubBlockSize_sequences(const BYTE* ofCodeTable,
12342                                                   const BYTE* llCodeTable,
12343                                                   const BYTE* mlCodeTable,
12344                                                   size_t nbSeq,
12345                                                   const ZSTD_fseCTables_t* fseTables,
12346                                                   const ZSTD_fseCTablesMetadata_t* fseMetadata,
12347                                                   void* workspace, size_t wkspSize,
12348                                                   int writeEntropy)
12349 {
12350     size_t sequencesSectionHeaderSize = 3; /* Use hard coded size of 3 bytes */
12351     size_t cSeqSizeEstimate = 0;
12352     cSeqSizeEstimate += ZSTD_estimateSubBlockSize_symbolType(fseMetadata->ofType, ofCodeTable, MaxOff,
12353                                          nbSeq, fseTables->offcodeCTable, NULL,
12354                                          OF_defaultNorm, OF_defaultNormLog,
12355                                          workspace, wkspSize);
12356     cSeqSizeEstimate += ZSTD_estimateSubBlockSize_symbolType(fseMetadata->llType, llCodeTable, MaxLL,
12357                                          nbSeq, fseTables->litlengthCTable, LL_bits,
12358                                          LL_defaultNorm, LL_defaultNormLog,
12359                                          workspace, wkspSize);
12360     cSeqSizeEstimate += ZSTD_estimateSubBlockSize_symbolType(fseMetadata->mlType, mlCodeTable, MaxML,
12361                                          nbSeq, fseTables->matchlengthCTable, ML_bits,
12362                                          ML_defaultNorm, ML_defaultNormLog,
12363                                          workspace, wkspSize);
12364     if (writeEntropy) cSeqSizeEstimate += fseMetadata->fseTablesSize;
12365     return cSeqSizeEstimate + sequencesSectionHeaderSize;
12366 }
12367
12368 static size_t ZSTD_estimateSubBlockSize(const BYTE* literals, size_t litSize,
12369                                         const BYTE* ofCodeTable,
12370                                         const BYTE* llCodeTable,
12371                                         const BYTE* mlCodeTable,
12372                                         size_t nbSeq,
12373                                         const ZSTD_entropyCTables_t* entropy,
12374                                         const ZSTD_entropyCTablesMetadata_t* entropyMetadata,
12375                                         void* workspace, size_t wkspSize,
12376                                         int writeLitEntropy, int writeSeqEntropy) {
12377     size_t cSizeEstimate = 0;
12378     cSizeEstimate += ZSTD_estimateSubBlockSize_literal(literals, litSize,
12379                                                          &entropy->huf, &entropyMetadata->hufMetadata,
12380                                                          workspace, wkspSize, writeLitEntropy);
12381     cSizeEstimate += ZSTD_estimateSubBlockSize_sequences(ofCodeTable, llCodeTable, mlCodeTable,
12382                                                          nbSeq, &entropy->fse, &entropyMetadata->fseMetadata,
12383                                                          workspace, wkspSize, writeSeqEntropy);
12384     return cSizeEstimate + ZSTD_blockHeaderSize;
12385 }
12386
12387 static int ZSTD_needSequenceEntropyTables(ZSTD_fseCTablesMetadata_t const* fseMetadata)
12388 {
12389     if (fseMetadata->llType == set_compressed || fseMetadata->llType == set_rle)
12390         return 1;
12391     if (fseMetadata->mlType == set_compressed || fseMetadata->mlType == set_rle)
12392         return 1;
12393     if (fseMetadata->ofType == set_compressed || fseMetadata->ofType == set_rle)
12394         return 1;
12395     return 0;
12396 }
12397
12398 /** ZSTD_compressSubBlock_multi() :
12399  *  Breaks super-block into multiple sub-blocks and compresses them.
12400  *  Entropy will be written to the first block.
12401  *  The following blocks will use repeat mode to compress.
12402  *  All sub-blocks are compressed blocks (no raw or rle blocks).
12403  *  @return : compressed size of the super block (which is multiple ZSTD blocks)
12404  *            Or 0 if it failed to compress. */
12405 static size_t ZSTD_compressSubBlock_multi(const seqStore_t* seqStorePtr,
12406                             const ZSTD_compressedBlockState_t* prevCBlock,
12407                             ZSTD_compressedBlockState_t* nextCBlock,
12408                             const ZSTD_entropyCTablesMetadata_t* entropyMetadata,
12409                             const ZSTD_CCtx_params* cctxParams,
12410                                   void* dst, size_t dstCapacity,
12411                             const void* src, size_t srcSize,
12412                             const int bmi2, U32 lastBlock,
12413                             void* workspace, size_t wkspSize)
12414 {
12415     const seqDef* const sstart = seqStorePtr->sequencesStart;
12416     const seqDef* const send = seqStorePtr->sequences;
12417     const seqDef* sp = sstart;
12418     const BYTE* const lstart = seqStorePtr->litStart;
12419     const BYTE* const lend = seqStorePtr->lit;
12420     const BYTE* lp = lstart;
12421     BYTE const* ip = (BYTE const*)src;
12422     BYTE const* const iend = ip + srcSize;
12423     BYTE* const ostart = (BYTE*)dst;
12424     BYTE* const oend = ostart + dstCapacity;
12425     BYTE* op = ostart;
12426     const BYTE* llCodePtr = seqStorePtr->llCode;
12427     const BYTE* mlCodePtr = seqStorePtr->mlCode;
12428     const BYTE* ofCodePtr = seqStorePtr->ofCode;
12429     size_t targetCBlockSize = cctxParams->targetCBlockSize;
12430     size_t litSize, seqCount;
12431     int writeLitEntropy = entropyMetadata->hufMetadata.hType == set_compressed;
12432     int writeSeqEntropy = 1;
12433     int lastSequence = 0;
12434
12435     DEBUGLOG(5, "ZSTD_compressSubBlock_multi (litSize=%u, nbSeq=%u)",
12436                 (unsigned)(lend-lp), (unsigned)(send-sstart));
12437
12438     litSize = 0;
12439     seqCount = 0;
12440     do {
12441         size_t cBlockSizeEstimate = 0;
12442         if (sstart == send) {
12443             lastSequence = 1;
12444         } else {
12445             const seqDef* const sequence = sp + seqCount;
12446             lastSequence = sequence == send - 1;
12447             litSize += ZSTD_getSequenceLength(seqStorePtr, sequence).litLength;
12448             seqCount++;
12449         }
12450         if (lastSequence) {
12451             assert(lp <= lend);
12452             assert(litSize <= (size_t)(lend - lp));
12453             litSize = (size_t)(lend - lp);
12454         }
12455         /* I think there is an optimization opportunity here.
12456          * Calling ZSTD_estimateSubBlockSize for every sequence can be wasteful
12457          * since it recalculates estimate from scratch.
12458          * For example, it would recount literal distribution and symbol codes everytime.
12459          */
12460         cBlockSizeEstimate = ZSTD_estimateSubBlockSize(lp, litSize, ofCodePtr, llCodePtr, mlCodePtr, seqCount,
12461                                                        &nextCBlock->entropy, entropyMetadata,
12462                                                        workspace, wkspSize, writeLitEntropy, writeSeqEntropy);
12463         if (cBlockSizeEstimate > targetCBlockSize || lastSequence) {
12464             int litEntropyWritten = 0;
12465             int seqEntropyWritten = 0;
12466             const size_t decompressedSize = ZSTD_seqDecompressedSize(seqStorePtr, sp, seqCount, litSize, lastSequence);
12467             const size_t cSize = ZSTD_compressSubBlock(&nextCBlock->entropy, entropyMetadata,
12468                                                        sp, seqCount,
12469                                                        lp, litSize,
12470                                                        llCodePtr, mlCodePtr, ofCodePtr,
12471                                                        cctxParams,
12472                                                        op, oend-op,
12473                                                        bmi2, writeLitEntropy, writeSeqEntropy,
12474                                                        &litEntropyWritten, &seqEntropyWritten,
12475                                                        lastBlock && lastSequence);
12476             FORWARD_IF_ERROR(cSize, "ZSTD_compressSubBlock failed");
12477             if (cSize > 0 && cSize < decompressedSize) {
12478                 DEBUGLOG(5, "Committed the sub-block");
12479                 assert(ip + decompressedSize <= iend);
12480                 ip += decompressedSize;
12481                 sp += seqCount;
12482                 lp += litSize;
12483                 op += cSize;
12484                 llCodePtr += seqCount;
12485                 mlCodePtr += seqCount;
12486                 ofCodePtr += seqCount;
12487                 litSize = 0;
12488                 seqCount = 0;
12489                 /* Entropy only needs to be written once */
12490                 if (litEntropyWritten) {
12491                     writeLitEntropy = 0;
12492                 }
12493                 if (seqEntropyWritten) {
12494                     writeSeqEntropy = 0;
12495                 }
12496             }
12497         }
12498     } while (!lastSequence);
12499     if (writeLitEntropy) {
12500         DEBUGLOG(5, "ZSTD_compressSubBlock_multi has literal entropy tables unwritten");
12501         memcpy(&nextCBlock->entropy.huf, &prevCBlock->entropy.huf, sizeof(prevCBlock->entropy.huf));
12502     }
12503     if (writeSeqEntropy && ZSTD_needSequenceEntropyTables(&entropyMetadata->fseMetadata)) {
12504         /* If we haven't written our entropy tables, then we've violated our contract and
12505          * must emit an uncompressed block.
12506          */
12507         DEBUGLOG(5, "ZSTD_compressSubBlock_multi has sequence entropy tables unwritten");
12508         return 0;
12509     }
12510     if (ip < iend) {
12511         size_t const cSize = ZSTD_noCompressBlock(op, oend - op, ip, iend - ip, lastBlock);
12512         DEBUGLOG(5, "ZSTD_compressSubBlock_multi last sub-block uncompressed, %zu bytes", (size_t)(iend - ip));
12513         FORWARD_IF_ERROR(cSize, "ZSTD_noCompressBlock failed");
12514         assert(cSize != 0);
12515         op += cSize;
12516         /* We have to regenerate the repcodes because we've skipped some sequences */
12517         if (sp < send) {
12518             seqDef const* seq;
12519             repcodes_t rep;
12520             memcpy(&rep, prevCBlock->rep, sizeof(rep));
12521             for (seq = sstart; seq < sp; ++seq) {
12522                 rep = ZSTD_updateRep(rep.rep, seq->offset - 1, ZSTD_getSequenceLength(seqStorePtr, seq).litLength == 0);
12523             }
12524             memcpy(nextCBlock->rep, &rep, sizeof(rep));
12525         }
12526     }
12527     DEBUGLOG(5, "ZSTD_compressSubBlock_multi compressed");
12528     return op-ostart;
12529 }
12530
12531 size_t ZSTD_compressSuperBlock(ZSTD_CCtx* zc,
12532                                void* dst, size_t dstCapacity,
12533                                void const* src, size_t srcSize,
12534                                unsigned lastBlock) {
12535     ZSTD_entropyCTablesMetadata_t entropyMetadata;
12536
12537     FORWARD_IF_ERROR(ZSTD_buildSuperBlockEntropy(&zc->seqStore,
12538           &zc->blockState.prevCBlock->entropy,
12539           &zc->blockState.nextCBlock->entropy,
12540           &zc->appliedParams,
12541           &entropyMetadata,
12542           zc->entropyWorkspace, HUF_WORKSPACE_SIZE /* statically allocated in resetCCtx */), "");
12543
12544     return ZSTD_compressSubBlock_multi(&zc->seqStore,
12545             zc->blockState.prevCBlock,
12546             zc->blockState.nextCBlock,
12547             &entropyMetadata,
12548             &zc->appliedParams,
12549             dst, dstCapacity,
12550             src, srcSize,
12551             zc->bmi2, lastBlock,
12552             zc->entropyWorkspace, HUF_WORKSPACE_SIZE /* statically allocated in resetCCtx */);
12553 }
12554 /**** ended inlining compress/zstd_compress_superblock.c ****/
12555 /**** start inlining compress/zstd_compress.c ****/
12556 /*
12557  * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
12558  * All rights reserved.
12559  *
12560  * This source code is licensed under both the BSD-style license (found in the
12561  * LICENSE file in the root directory of this source tree) and the GPLv2 (found
12562  * in the COPYING file in the root directory of this source tree).
12563  * You may select, at your option, one of the above-listed licenses.
12564  */
12565
12566 /*-*************************************
12567 *  Dependencies
12568 ***************************************/
12569 #include <limits.h>         /* INT_MAX */
12570 #include <string.h>         /* memset */
12571 /**** start inlining ../common/cpu.h ****/
12572 /*
12573  * Copyright (c) 2018-2020, Facebook, Inc.
12574  * All rights reserved.
12575  *
12576  * This source code is licensed under both the BSD-style license (found in the
12577  * LICENSE file in the root directory of this source tree) and the GPLv2 (found
12578  * in the COPYING file in the root directory of this source tree).
12579  * You may select, at your option, one of the above-listed licenses.
12580  */
12581
12582 #ifndef ZSTD_COMMON_CPU_H
12583 #define ZSTD_COMMON_CPU_H
12584
12585 /**
12586  * Implementation taken from folly/CpuId.h
12587  * https://github.com/facebook/folly/blob/master/folly/CpuId.h
12588  */
12589
12590 #include <string.h>
12591
12592 /**** skipping file: mem.h ****/
12593
12594 #ifdef _MSC_VER
12595 #include <intrin.h>
12596 #endif
12597
12598 typedef struct {
12599     U32 f1c;
12600     U32 f1d;
12601     U32 f7b;
12602     U32 f7c;
12603 } ZSTD_cpuid_t;
12604
12605 MEM_STATIC ZSTD_cpuid_t ZSTD_cpuid(void) {
12606     U32 f1c = 0;
12607     U32 f1d = 0;
12608     U32 f7b = 0;
12609     U32 f7c = 0;
12610 #if defined(_MSC_VER) && (defined(_M_X64) || defined(_M_IX86))
12611     int reg[4];
12612     __cpuid((int*)reg, 0);
12613     {
12614         int const n = reg[0];
12615         if (n >= 1) {
12616             __cpuid((int*)reg, 1);
12617             f1c = (U32)reg[2];
12618             f1d = (U32)reg[3];
12619         }
12620         if (n >= 7) {
12621             __cpuidex((int*)reg, 7, 0);
12622             f7b = (U32)reg[1];
12623             f7c = (U32)reg[2];
12624         }
12625     }
12626 #elif defined(__i386__) && defined(__PIC__) && !defined(__clang__) && defined(__GNUC__)
12627     /* The following block like the normal cpuid branch below, but gcc
12628      * reserves ebx for use of its pic register so we must specially
12629      * handle the save and restore to avoid clobbering the register
12630      */
12631     U32 n;
12632     __asm__(
12633         "pushl %%ebx\n\t"
12634         "cpuid\n\t"
12635         "popl %%ebx\n\t"
12636         : "=a"(n)
12637         : "a"(0)
12638         : "ecx", "edx");
12639     if (n >= 1) {
12640       U32 f1a;
12641       __asm__(
12642           "pushl %%ebx\n\t"
12643           "cpuid\n\t"
12644           "popl %%ebx\n\t"
12645           : "=a"(f1a), "=c"(f1c), "=d"(f1d)
12646           : "a"(1));
12647     }
12648     if (n >= 7) {
12649       __asm__(
12650           "pushl %%ebx\n\t"
12651           "cpuid\n\t"
12652           "movl %%ebx, %%eax\n\t"
12653           "popl %%ebx"
12654           : "=a"(f7b), "=c"(f7c)
12655           : "a"(7), "c"(0)
12656           : "edx");
12657     }
12658 #elif defined(__x86_64__) || defined(_M_X64) || defined(__i386__)
12659     U32 n;
12660     __asm__("cpuid" : "=a"(n) : "a"(0) : "ebx", "ecx", "edx");
12661     if (n >= 1) {
12662       U32 f1a;
12663       __asm__("cpuid" : "=a"(f1a), "=c"(f1c), "=d"(f1d) : "a"(1) : "ebx");
12664     }
12665     if (n >= 7) {
12666       U32 f7a;
12667       __asm__("cpuid"
12668               : "=a"(f7a), "=b"(f7b), "=c"(f7c)
12669               : "a"(7), "c"(0)
12670               : "edx");
12671     }
12672 #endif
12673     {
12674         ZSTD_cpuid_t cpuid;
12675         cpuid.f1c = f1c;
12676         cpuid.f1d = f1d;
12677         cpuid.f7b = f7b;
12678         cpuid.f7c = f7c;
12679         return cpuid;
12680     }
12681 }
12682
12683 #define X(name, r, bit)                                                        \
12684   MEM_STATIC int ZSTD_cpuid_##name(ZSTD_cpuid_t const cpuid) {                 \
12685     return ((cpuid.r) & (1U << bit)) != 0;                                     \
12686   }
12687
12688 /* cpuid(1): Processor Info and Feature Bits. */
12689 #define C(name, bit) X(name, f1c, bit)
12690   C(sse3, 0)
12691   C(pclmuldq, 1)
12692   C(dtes64, 2)
12693   C(monitor, 3)
12694   C(dscpl, 4)
12695   C(vmx, 5)
12696   C(smx, 6)
12697   C(eist, 7)
12698   C(tm2, 8)
12699   C(ssse3, 9)
12700   C(cnxtid, 10)
12701   C(fma, 12)
12702   C(cx16, 13)
12703   C(xtpr, 14)
12704   C(pdcm, 15)
12705   C(pcid, 17)
12706   C(dca, 18)
12707   C(sse41, 19)
12708   C(sse42, 20)
12709   C(x2apic, 21)
12710   C(movbe, 22)
12711   C(popcnt, 23)
12712   C(tscdeadline, 24)
12713   C(aes, 25)
12714   C(xsave, 26)
12715   C(osxsave, 27)
12716   C(avx, 28)
12717   C(f16c, 29)
12718   C(rdrand, 30)
12719 #undef C
12720 #define D(name, bit) X(name, f1d, bit)
12721   D(fpu, 0)
12722   D(vme, 1)
12723   D(de, 2)
12724   D(pse, 3)
12725   D(tsc, 4)
12726   D(msr, 5)
12727   D(pae, 6)
12728   D(mce, 7)
12729   D(cx8, 8)
12730   D(apic, 9)
12731   D(sep, 11)
12732   D(mtrr, 12)
12733   D(pge, 13)
12734   D(mca, 14)
12735   D(cmov, 15)
12736   D(pat, 16)
12737   D(pse36, 17)
12738   D(psn, 18)
12739   D(clfsh, 19)
12740   D(ds, 21)
12741   D(acpi, 22)
12742   D(mmx, 23)
12743   D(fxsr, 24)
12744   D(sse, 25)
12745   D(sse2, 26)
12746   D(ss, 27)
12747   D(htt, 28)
12748   D(tm, 29)
12749   D(pbe, 31)
12750 #undef D
12751
12752 /* cpuid(7): Extended Features. */
12753 #define B(name, bit) X(name, f7b, bit)
12754   B(bmi1, 3)
12755   B(hle, 4)
12756   B(avx2, 5)
12757   B(smep, 7)
12758   B(bmi2, 8)
12759   B(erms, 9)
12760   B(invpcid, 10)
12761   B(rtm, 11)
12762   B(mpx, 14)
12763   B(avx512f, 16)
12764   B(avx512dq, 17)
12765   B(rdseed, 18)
12766   B(adx, 19)
12767   B(smap, 20)
12768   B(avx512ifma, 21)
12769   B(pcommit, 22)
12770   B(clflushopt, 23)
12771   B(clwb, 24)
12772   B(avx512pf, 26)
12773   B(avx512er, 27)
12774   B(avx512cd, 28)
12775   B(sha, 29)
12776   B(avx512bw, 30)
12777   B(avx512vl, 31)
12778 #undef B
12779 #define C(name, bit) X(name, f7c, bit)
12780   C(prefetchwt1, 0)
12781   C(avx512vbmi, 1)
12782 #undef C
12783
12784 #undef X
12785
12786 #endif /* ZSTD_COMMON_CPU_H */
12787 /**** ended inlining ../common/cpu.h ****/
12788 /**** skipping file: ../common/mem.h ****/
12789 /**** skipping file: hist.h ****/
12790 #define FSE_STATIC_LINKING_ONLY   /* FSE_encodeSymbol */
12791 /**** skipping file: ../common/fse.h ****/
12792 #define HUF_STATIC_LINKING_ONLY
12793 /**** skipping file: ../common/huf.h ****/
12794 /**** skipping file: zstd_compress_internal.h ****/
12795 /**** skipping file: zstd_compress_sequences.h ****/
12796 /**** skipping file: zstd_compress_literals.h ****/
12797 /**** start inlining zstd_fast.h ****/
12798 /*
12799  * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
12800  * All rights reserved.
12801  *
12802  * This source code is licensed under both the BSD-style license (found in the
12803  * LICENSE file in the root directory of this source tree) and the GPLv2 (found
12804  * in the COPYING file in the root directory of this source tree).
12805  * You may select, at your option, one of the above-listed licenses.
12806  */
12807
12808 #ifndef ZSTD_FAST_H
12809 #define ZSTD_FAST_H
12810
12811 #if defined (__cplusplus)
12812 extern "C" {
12813 #endif
12814
12815 /**** skipping file: ../common/mem.h ****/
12816 /**** skipping file: zstd_compress_internal.h ****/
12817
12818 void ZSTD_fillHashTable(ZSTD_matchState_t* ms,
12819                         void const* end, ZSTD_dictTableLoadMethod_e dtlm);
12820 size_t ZSTD_compressBlock_fast(
12821         ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
12822         void const* src, size_t srcSize);
12823 size_t ZSTD_compressBlock_fast_dictMatchState(
12824         ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
12825         void const* src, size_t srcSize);
12826 size_t ZSTD_compressBlock_fast_extDict(
12827         ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
12828         void const* src, size_t srcSize);
12829
12830 #if defined (__cplusplus)
12831 }
12832 #endif
12833
12834 #endif /* ZSTD_FAST_H */
12835 /**** ended inlining zstd_fast.h ****/
12836 /**** start inlining zstd_double_fast.h ****/
12837 /*
12838  * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
12839  * All rights reserved.
12840  *
12841  * This source code is licensed under both the BSD-style license (found in the
12842  * LICENSE file in the root directory of this source tree) and the GPLv2 (found
12843  * in the COPYING file in the root directory of this source tree).
12844  * You may select, at your option, one of the above-listed licenses.
12845  */
12846
12847 #ifndef ZSTD_DOUBLE_FAST_H
12848 #define ZSTD_DOUBLE_FAST_H
12849
12850 #if defined (__cplusplus)
12851 extern "C" {
12852 #endif
12853
12854 /**** skipping file: ../common/mem.h ****/
12855 /**** skipping file: zstd_compress_internal.h ****/
12856
12857 void ZSTD_fillDoubleHashTable(ZSTD_matchState_t* ms,
12858                               void const* end, ZSTD_dictTableLoadMethod_e dtlm);
12859 size_t ZSTD_compressBlock_doubleFast(
12860         ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
12861         void const* src, size_t srcSize);
12862 size_t ZSTD_compressBlock_doubleFast_dictMatchState(
12863         ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
12864         void const* src, size_t srcSize);
12865 size_t ZSTD_compressBlock_doubleFast_extDict(
12866         ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
12867         void const* src, size_t srcSize);
12868
12869
12870 #if defined (__cplusplus)
12871 }
12872 #endif
12873
12874 #endif /* ZSTD_DOUBLE_FAST_H */
12875 /**** ended inlining zstd_double_fast.h ****/
12876 /**** start inlining zstd_lazy.h ****/
12877 /*
12878  * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
12879  * All rights reserved.
12880  *
12881  * This source code is licensed under both the BSD-style license (found in the
12882  * LICENSE file in the root directory of this source tree) and the GPLv2 (found
12883  * in the COPYING file in the root directory of this source tree).
12884  * You may select, at your option, one of the above-listed licenses.
12885  */
12886
12887 #ifndef ZSTD_LAZY_H
12888 #define ZSTD_LAZY_H
12889
12890 #if defined (__cplusplus)
12891 extern "C" {
12892 #endif
12893
12894 /**** skipping file: zstd_compress_internal.h ****/
12895
12896 U32 ZSTD_insertAndFindFirstIndex(ZSTD_matchState_t* ms, const BYTE* ip);
12897
12898 void ZSTD_preserveUnsortedMark (U32* const table, U32 const size, U32 const reducerValue);  /*! used in ZSTD_reduceIndex(). preemptively increase value of ZSTD_DUBT_UNSORTED_MARK */
12899
12900 size_t ZSTD_compressBlock_btlazy2(
12901         ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
12902         void const* src, size_t srcSize);
12903 size_t ZSTD_compressBlock_lazy2(
12904         ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
12905         void const* src, size_t srcSize);
12906 size_t ZSTD_compressBlock_lazy(
12907         ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
12908         void const* src, size_t srcSize);
12909 size_t ZSTD_compressBlock_greedy(
12910         ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
12911         void const* src, size_t srcSize);
12912
12913 size_t ZSTD_compressBlock_btlazy2_dictMatchState(
12914         ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
12915         void const* src, size_t srcSize);
12916 size_t ZSTD_compressBlock_lazy2_dictMatchState(
12917         ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
12918         void const* src, size_t srcSize);
12919 size_t ZSTD_compressBlock_lazy_dictMatchState(
12920         ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
12921         void const* src, size_t srcSize);
12922 size_t ZSTD_compressBlock_greedy_dictMatchState(
12923         ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
12924         void const* src, size_t srcSize);
12925
12926 size_t ZSTD_compressBlock_greedy_extDict(
12927         ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
12928         void const* src, size_t srcSize);
12929 size_t ZSTD_compressBlock_lazy_extDict(
12930         ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
12931         void const* src, size_t srcSize);
12932 size_t ZSTD_compressBlock_lazy2_extDict(
12933         ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
12934         void const* src, size_t srcSize);
12935 size_t ZSTD_compressBlock_btlazy2_extDict(
12936         ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
12937         void const* src, size_t srcSize);
12938
12939 #if defined (__cplusplus)
12940 }
12941 #endif
12942
12943 #endif /* ZSTD_LAZY_H */
12944 /**** ended inlining zstd_lazy.h ****/
12945 /**** start inlining zstd_opt.h ****/
12946 /*
12947  * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
12948  * All rights reserved.
12949  *
12950  * This source code is licensed under both the BSD-style license (found in the
12951  * LICENSE file in the root directory of this source tree) and the GPLv2 (found
12952  * in the COPYING file in the root directory of this source tree).
12953  * You may select, at your option, one of the above-listed licenses.
12954  */
12955
12956 #ifndef ZSTD_OPT_H
12957 #define ZSTD_OPT_H
12958
12959 #if defined (__cplusplus)
12960 extern "C" {
12961 #endif
12962
12963 /**** skipping file: zstd_compress_internal.h ****/
12964
12965 /* used in ZSTD_loadDictionaryContent() */
12966 void ZSTD_updateTree(ZSTD_matchState_t* ms, const BYTE* ip, const BYTE* iend);
12967
12968 size_t ZSTD_compressBlock_btopt(
12969         ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
12970         void const* src, size_t srcSize);
12971 size_t ZSTD_compressBlock_btultra(
12972         ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
12973         void const* src, size_t srcSize);
12974 size_t ZSTD_compressBlock_btultra2(
12975         ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
12976         void const* src, size_t srcSize);
12977
12978
12979 size_t ZSTD_compressBlock_btopt_dictMatchState(
12980         ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
12981         void const* src, size_t srcSize);
12982 size_t ZSTD_compressBlock_btultra_dictMatchState(
12983         ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
12984         void const* src, size_t srcSize);
12985
12986 size_t ZSTD_compressBlock_btopt_extDict(
12987         ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
12988         void const* src, size_t srcSize);
12989 size_t ZSTD_compressBlock_btultra_extDict(
12990         ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
12991         void const* src, size_t srcSize);
12992
12993         /* note : no btultra2 variant for extDict nor dictMatchState,
12994          * because btultra2 is not meant to work with dictionaries
12995          * and is only specific for the first block (no prefix) */
12996
12997 #if defined (__cplusplus)
12998 }
12999 #endif
13000
13001 #endif /* ZSTD_OPT_H */
13002 /**** ended inlining zstd_opt.h ****/
13003 /**** start inlining zstd_ldm.h ****/
13004 /*
13005  * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
13006  * All rights reserved.
13007  *
13008  * This source code is licensed under both the BSD-style license (found in the
13009  * LICENSE file in the root directory of this source tree) and the GPLv2 (found
13010  * in the COPYING file in the root directory of this source tree).
13011  * You may select, at your option, one of the above-listed licenses.
13012  */
13013
13014 #ifndef ZSTD_LDM_H
13015 #define ZSTD_LDM_H
13016
13017 #if defined (__cplusplus)
13018 extern "C" {
13019 #endif
13020
13021 /**** skipping file: zstd_compress_internal.h ****/
13022 /**** skipping file: ../zstd.h ****/
13023
13024 /*-*************************************
13025 *  Long distance matching
13026 ***************************************/
13027
13028 #define ZSTD_LDM_DEFAULT_WINDOW_LOG ZSTD_WINDOWLOG_LIMIT_DEFAULT
13029
13030 void ZSTD_ldm_fillHashTable(
13031             ldmState_t* state, const BYTE* ip,
13032             const BYTE* iend, ldmParams_t const* params);
13033
13034 /**
13035  * ZSTD_ldm_generateSequences():
13036  *
13037  * Generates the sequences using the long distance match finder.
13038  * Generates long range matching sequences in `sequences`, which parse a prefix
13039  * of the source. `sequences` must be large enough to store every sequence,
13040  * which can be checked with `ZSTD_ldm_getMaxNbSeq()`.
13041  * @returns 0 or an error code.
13042  *
13043  * NOTE: The user must have called ZSTD_window_update() for all of the input
13044  * they have, even if they pass it to ZSTD_ldm_generateSequences() in chunks.
13045  * NOTE: This function returns an error if it runs out of space to store
13046  *       sequences.
13047  */
13048 size_t ZSTD_ldm_generateSequences(
13049             ldmState_t* ldms, rawSeqStore_t* sequences,
13050             ldmParams_t const* params, void const* src, size_t srcSize);
13051
13052 /**
13053  * ZSTD_ldm_blockCompress():
13054  *
13055  * Compresses a block using the predefined sequences, along with a secondary
13056  * block compressor. The literals section of every sequence is passed to the
13057  * secondary block compressor, and those sequences are interspersed with the
13058  * predefined sequences. Returns the length of the last literals.
13059  * Updates `rawSeqStore.pos` to indicate how many sequences have been consumed.
13060  * `rawSeqStore.seq` may also be updated to split the last sequence between two
13061  * blocks.
13062  * @return The length of the last literals.
13063  *
13064  * NOTE: The source must be at most the maximum block size, but the predefined
13065  * sequences can be any size, and may be longer than the block. In the case that
13066  * they are longer than the block, the last sequences may need to be split into
13067  * two. We handle that case correctly, and update `rawSeqStore` appropriately.
13068  * NOTE: This function does not return any errors.
13069  */
13070 size_t ZSTD_ldm_blockCompress(rawSeqStore_t* rawSeqStore,
13071             ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
13072             void const* src, size_t srcSize);
13073
13074 /**
13075  * ZSTD_ldm_skipSequences():
13076  *
13077  * Skip past `srcSize` bytes worth of sequences in `rawSeqStore`.
13078  * Avoids emitting matches less than `minMatch` bytes.
13079  * Must be called for data with is not passed to ZSTD_ldm_blockCompress().
13080  */
13081 void ZSTD_ldm_skipSequences(rawSeqStore_t* rawSeqStore, size_t srcSize,
13082     U32 const minMatch);
13083
13084
13085 /** ZSTD_ldm_getTableSize() :
13086  *  Estimate the space needed for long distance matching tables or 0 if LDM is
13087  *  disabled.
13088  */
13089 size_t ZSTD_ldm_getTableSize(ldmParams_t params);
13090
13091 /** ZSTD_ldm_getSeqSpace() :
13092  *  Return an upper bound on the number of sequences that can be produced by
13093  *  the long distance matcher, or 0 if LDM is disabled.
13094  */
13095 size_t ZSTD_ldm_getMaxNbSeq(ldmParams_t params, size_t maxChunkSize);
13096
13097 /** ZSTD_ldm_adjustParameters() :
13098  *  If the params->hashRateLog is not set, set it to its default value based on
13099  *  windowLog and params->hashLog.
13100  *
13101  *  Ensures that params->bucketSizeLog is <= params->hashLog (setting it to
13102  *  params->hashLog if it is not).
13103  *
13104  *  Ensures that the minMatchLength >= targetLength during optimal parsing.
13105  */
13106 void ZSTD_ldm_adjustParameters(ldmParams_t* params,
13107                                ZSTD_compressionParameters const* cParams);
13108
13109 #if defined (__cplusplus)
13110 }
13111 #endif
13112
13113 #endif /* ZSTD_FAST_H */
13114 /**** ended inlining zstd_ldm.h ****/
13115 /**** skipping file: zstd_compress_superblock.h ****/
13116
13117
13118 /*-*************************************
13119 *  Helper functions
13120 ***************************************/
13121 /* ZSTD_compressBound()
13122  * Note that the result from this function is only compatible with the "normal"
13123  * full-block strategy.
13124  * When there are a lot of small blocks due to frequent flush in streaming mode
13125  * the overhead of headers can make the compressed data to be larger than the
13126  * return value of ZSTD_compressBound().
13127  */
13128 size_t ZSTD_compressBound(size_t srcSize) {
13129     return ZSTD_COMPRESSBOUND(srcSize);
13130 }
13131
13132
13133 /*-*************************************
13134 *  Context memory management
13135 ***************************************/
13136 struct ZSTD_CDict_s {
13137     const void* dictContent;
13138     size_t dictContentSize;
13139     U32* entropyWorkspace; /* entropy workspace of HUF_WORKSPACE_SIZE bytes */
13140     ZSTD_cwksp workspace;
13141     ZSTD_matchState_t matchState;
13142     ZSTD_compressedBlockState_t cBlockState;
13143     ZSTD_customMem customMem;
13144     U32 dictID;
13145     int compressionLevel; /* 0 indicates that advanced API was used to select CDict params */
13146 };  /* typedef'd to ZSTD_CDict within "zstd.h" */
13147
13148 ZSTD_CCtx* ZSTD_createCCtx(void)
13149 {
13150     return ZSTD_createCCtx_advanced(ZSTD_defaultCMem);
13151 }
13152
13153 static void ZSTD_initCCtx(ZSTD_CCtx* cctx, ZSTD_customMem memManager)
13154 {
13155     assert(cctx != NULL);
13156     memset(cctx, 0, sizeof(*cctx));
13157     cctx->customMem = memManager;
13158     cctx->bmi2 = ZSTD_cpuid_bmi2(ZSTD_cpuid());
13159     {   size_t const err = ZSTD_CCtx_reset(cctx, ZSTD_reset_parameters);
13160         assert(!ZSTD_isError(err));
13161         (void)err;
13162     }
13163 }
13164
13165 ZSTD_CCtx* ZSTD_createCCtx_advanced(ZSTD_customMem customMem)
13166 {
13167     ZSTD_STATIC_ASSERT(zcss_init==0);
13168     ZSTD_STATIC_ASSERT(ZSTD_CONTENTSIZE_UNKNOWN==(0ULL - 1));
13169     if (!customMem.customAlloc ^ !customMem.customFree) return NULL;
13170     {   ZSTD_CCtx* const cctx = (ZSTD_CCtx*)ZSTD_malloc(sizeof(ZSTD_CCtx), customMem);
13171         if (!cctx) return NULL;
13172         ZSTD_initCCtx(cctx, customMem);
13173         return cctx;
13174     }
13175 }
13176
13177 ZSTD_CCtx* ZSTD_initStaticCCtx(void* workspace, size_t workspaceSize)
13178 {
13179     ZSTD_cwksp ws;
13180     ZSTD_CCtx* cctx;
13181     if (workspaceSize <= sizeof(ZSTD_CCtx)) return NULL;  /* minimum size */
13182     if ((size_t)workspace & 7) return NULL;  /* must be 8-aligned */
13183     ZSTD_cwksp_init(&ws, workspace, workspaceSize);
13184
13185     cctx = (ZSTD_CCtx*)ZSTD_cwksp_reserve_object(&ws, sizeof(ZSTD_CCtx));
13186     if (cctx == NULL) return NULL;
13187
13188     memset(cctx, 0, sizeof(ZSTD_CCtx));
13189     ZSTD_cwksp_move(&cctx->workspace, &ws);
13190     cctx->staticSize = workspaceSize;
13191
13192     /* statically sized space. entropyWorkspace never moves (but prev/next block swap places) */
13193     if (!ZSTD_cwksp_check_available(&cctx->workspace, HUF_WORKSPACE_SIZE + 2 * sizeof(ZSTD_compressedBlockState_t))) return NULL;
13194     cctx->blockState.prevCBlock = (ZSTD_compressedBlockState_t*)ZSTD_cwksp_reserve_object(&cctx->workspace, sizeof(ZSTD_compressedBlockState_t));
13195     cctx->blockState.nextCBlock = (ZSTD_compressedBlockState_t*)ZSTD_cwksp_reserve_object(&cctx->workspace, sizeof(ZSTD_compressedBlockState_t));
13196     cctx->entropyWorkspace = (U32*)ZSTD_cwksp_reserve_object(&cctx->workspace, HUF_WORKSPACE_SIZE);
13197     cctx->bmi2 = ZSTD_cpuid_bmi2(ZSTD_cpuid());
13198     return cctx;
13199 }
13200
13201 /**
13202  * Clears and frees all of the dictionaries in the CCtx.
13203  */
13204 static void ZSTD_clearAllDicts(ZSTD_CCtx* cctx)
13205 {
13206     ZSTD_free(cctx->localDict.dictBuffer, cctx->customMem);
13207     ZSTD_freeCDict(cctx->localDict.cdict);
13208     memset(&cctx->localDict, 0, sizeof(cctx->localDict));
13209     memset(&cctx->prefixDict, 0, sizeof(cctx->prefixDict));
13210     cctx->cdict = NULL;
13211 }
13212
13213 static size_t ZSTD_sizeof_localDict(ZSTD_localDict dict)
13214 {
13215     size_t const bufferSize = dict.dictBuffer != NULL ? dict.dictSize : 0;
13216     size_t const cdictSize = ZSTD_sizeof_CDict(dict.cdict);
13217     return bufferSize + cdictSize;
13218 }
13219
13220 static void ZSTD_freeCCtxContent(ZSTD_CCtx* cctx)
13221 {
13222     assert(cctx != NULL);
13223     assert(cctx->staticSize == 0);
13224     ZSTD_clearAllDicts(cctx);
13225 #ifdef ZSTD_MULTITHREAD
13226     ZSTDMT_freeCCtx(cctx->mtctx); cctx->mtctx = NULL;
13227 #endif
13228     ZSTD_cwksp_free(&cctx->workspace, cctx->customMem);
13229 }
13230
13231 size_t ZSTD_freeCCtx(ZSTD_CCtx* cctx)
13232 {
13233     if (cctx==NULL) return 0;   /* support free on NULL */
13234     RETURN_ERROR_IF(cctx->staticSize, memory_allocation,
13235                     "not compatible with static CCtx");
13236     {
13237         int cctxInWorkspace = ZSTD_cwksp_owns_buffer(&cctx->workspace, cctx);
13238         ZSTD_freeCCtxContent(cctx);
13239         if (!cctxInWorkspace) {
13240             ZSTD_free(cctx, cctx->customMem);
13241         }
13242     }
13243     return 0;
13244 }
13245
13246
13247 static size_t ZSTD_sizeof_mtctx(const ZSTD_CCtx* cctx)
13248 {
13249 #ifdef ZSTD_MULTITHREAD
13250     return ZSTDMT_sizeof_CCtx(cctx->mtctx);
13251 #else
13252     (void)cctx;
13253     return 0;
13254 #endif
13255 }
13256
13257
13258 size_t ZSTD_sizeof_CCtx(const ZSTD_CCtx* cctx)
13259 {
13260     if (cctx==NULL) return 0;   /* support sizeof on NULL */
13261     /* cctx may be in the workspace */
13262     return (cctx->workspace.workspace == cctx ? 0 : sizeof(*cctx))
13263            + ZSTD_cwksp_sizeof(&cctx->workspace)
13264            + ZSTD_sizeof_localDict(cctx->localDict)
13265            + ZSTD_sizeof_mtctx(cctx);
13266 }
13267
13268 size_t ZSTD_sizeof_CStream(const ZSTD_CStream* zcs)
13269 {
13270     return ZSTD_sizeof_CCtx(zcs);  /* same object */
13271 }
13272
13273 /* private API call, for dictBuilder only */
13274 const seqStore_t* ZSTD_getSeqStore(const ZSTD_CCtx* ctx) { return &(ctx->seqStore); }
13275
13276 static ZSTD_CCtx_params ZSTD_makeCCtxParamsFromCParams(
13277         ZSTD_compressionParameters cParams)
13278 {
13279     ZSTD_CCtx_params cctxParams;
13280     memset(&cctxParams, 0, sizeof(cctxParams));
13281     cctxParams.cParams = cParams;
13282     cctxParams.compressionLevel = ZSTD_CLEVEL_DEFAULT;  /* should not matter, as all cParams are presumed properly defined */
13283     assert(!ZSTD_checkCParams(cParams));
13284     cctxParams.fParams.contentSizeFlag = 1;
13285     return cctxParams;
13286 }
13287
13288 static ZSTD_CCtx_params* ZSTD_createCCtxParams_advanced(
13289         ZSTD_customMem customMem)
13290 {
13291     ZSTD_CCtx_params* params;
13292     if (!customMem.customAlloc ^ !customMem.customFree) return NULL;
13293     params = (ZSTD_CCtx_params*)ZSTD_calloc(
13294             sizeof(ZSTD_CCtx_params), customMem);
13295     if (!params) { return NULL; }
13296     params->customMem = customMem;
13297     params->compressionLevel = ZSTD_CLEVEL_DEFAULT;
13298     params->fParams.contentSizeFlag = 1;
13299     return params;
13300 }
13301
13302 ZSTD_CCtx_params* ZSTD_createCCtxParams(void)
13303 {
13304     return ZSTD_createCCtxParams_advanced(ZSTD_defaultCMem);
13305 }
13306
13307 size_t ZSTD_freeCCtxParams(ZSTD_CCtx_params* params)
13308 {
13309     if (params == NULL) { return 0; }
13310     ZSTD_free(params, params->customMem);
13311     return 0;
13312 }
13313
13314 size_t ZSTD_CCtxParams_reset(ZSTD_CCtx_params* params)
13315 {
13316     return ZSTD_CCtxParams_init(params, ZSTD_CLEVEL_DEFAULT);
13317 }
13318
13319 size_t ZSTD_CCtxParams_init(ZSTD_CCtx_params* cctxParams, int compressionLevel) {
13320     RETURN_ERROR_IF(!cctxParams, GENERIC, "NULL pointer!");
13321     memset(cctxParams, 0, sizeof(*cctxParams));
13322     cctxParams->compressionLevel = compressionLevel;
13323     cctxParams->fParams.contentSizeFlag = 1;
13324     return 0;
13325 }
13326
13327 size_t ZSTD_CCtxParams_init_advanced(ZSTD_CCtx_params* cctxParams, ZSTD_parameters params)
13328 {
13329     RETURN_ERROR_IF(!cctxParams, GENERIC, "NULL pointer!");
13330     FORWARD_IF_ERROR( ZSTD_checkCParams(params.cParams) , "");
13331     memset(cctxParams, 0, sizeof(*cctxParams));
13332     assert(!ZSTD_checkCParams(params.cParams));
13333     cctxParams->cParams = params.cParams;
13334     cctxParams->fParams = params.fParams;
13335     cctxParams->compressionLevel = ZSTD_CLEVEL_DEFAULT;   /* should not matter, as all cParams are presumed properly defined */
13336     return 0;
13337 }
13338
13339 /* ZSTD_assignParamsToCCtxParams() :
13340  * params is presumed valid at this stage */
13341 static ZSTD_CCtx_params ZSTD_assignParamsToCCtxParams(
13342         const ZSTD_CCtx_params* cctxParams, const ZSTD_parameters* params)
13343 {
13344     ZSTD_CCtx_params ret = *cctxParams;
13345     assert(!ZSTD_checkCParams(params->cParams));
13346     ret.cParams = params->cParams;
13347     ret.fParams = params->fParams;
13348     ret.compressionLevel = ZSTD_CLEVEL_DEFAULT;   /* should not matter, as all cParams are presumed properly defined */
13349     return ret;
13350 }
13351
13352 ZSTD_bounds ZSTD_cParam_getBounds(ZSTD_cParameter param)
13353 {
13354     ZSTD_bounds bounds = { 0, 0, 0 };
13355
13356     switch(param)
13357     {
13358     case ZSTD_c_compressionLevel:
13359         bounds.lowerBound = ZSTD_minCLevel();
13360         bounds.upperBound = ZSTD_maxCLevel();
13361         return bounds;
13362
13363     case ZSTD_c_windowLog:
13364         bounds.lowerBound = ZSTD_WINDOWLOG_MIN;
13365         bounds.upperBound = ZSTD_WINDOWLOG_MAX;
13366         return bounds;
13367
13368     case ZSTD_c_hashLog:
13369         bounds.lowerBound = ZSTD_HASHLOG_MIN;
13370         bounds.upperBound = ZSTD_HASHLOG_MAX;
13371         return bounds;
13372
13373     case ZSTD_c_chainLog:
13374         bounds.lowerBound = ZSTD_CHAINLOG_MIN;
13375         bounds.upperBound = ZSTD_CHAINLOG_MAX;
13376         return bounds;
13377
13378     case ZSTD_c_searchLog:
13379         bounds.lowerBound = ZSTD_SEARCHLOG_MIN;
13380         bounds.upperBound = ZSTD_SEARCHLOG_MAX;
13381         return bounds;
13382
13383     case ZSTD_c_minMatch:
13384         bounds.lowerBound = ZSTD_MINMATCH_MIN;
13385         bounds.upperBound = ZSTD_MINMATCH_MAX;
13386         return bounds;
13387
13388     case ZSTD_c_targetLength:
13389         bounds.lowerBound = ZSTD_TARGETLENGTH_MIN;
13390         bounds.upperBound = ZSTD_TARGETLENGTH_MAX;
13391         return bounds;
13392
13393     case ZSTD_c_strategy:
13394         bounds.lowerBound = ZSTD_STRATEGY_MIN;
13395         bounds.upperBound = ZSTD_STRATEGY_MAX;
13396         return bounds;
13397
13398     case ZSTD_c_contentSizeFlag:
13399         bounds.lowerBound = 0;
13400         bounds.upperBound = 1;
13401         return bounds;
13402
13403     case ZSTD_c_checksumFlag:
13404         bounds.lowerBound = 0;
13405         bounds.upperBound = 1;
13406         return bounds;
13407
13408     case ZSTD_c_dictIDFlag:
13409         bounds.lowerBound = 0;
13410         bounds.upperBound = 1;
13411         return bounds;
13412
13413     case ZSTD_c_nbWorkers:
13414         bounds.lowerBound = 0;
13415 #ifdef ZSTD_MULTITHREAD
13416         bounds.upperBound = ZSTDMT_NBWORKERS_MAX;
13417 #else
13418         bounds.upperBound = 0;
13419 #endif
13420         return bounds;
13421
13422     case ZSTD_c_jobSize:
13423         bounds.lowerBound = 0;
13424 #ifdef ZSTD_MULTITHREAD
13425         bounds.upperBound = ZSTDMT_JOBSIZE_MAX;
13426 #else
13427         bounds.upperBound = 0;
13428 #endif
13429         return bounds;
13430
13431     case ZSTD_c_overlapLog:
13432 #ifdef ZSTD_MULTITHREAD
13433         bounds.lowerBound = ZSTD_OVERLAPLOG_MIN;
13434         bounds.upperBound = ZSTD_OVERLAPLOG_MAX;
13435 #else
13436         bounds.lowerBound = 0;
13437         bounds.upperBound = 0;
13438 #endif
13439         return bounds;
13440
13441     case ZSTD_c_enableLongDistanceMatching:
13442         bounds.lowerBound = 0;
13443         bounds.upperBound = 1;
13444         return bounds;
13445
13446     case ZSTD_c_ldmHashLog:
13447         bounds.lowerBound = ZSTD_LDM_HASHLOG_MIN;
13448         bounds.upperBound = ZSTD_LDM_HASHLOG_MAX;
13449         return bounds;
13450
13451     case ZSTD_c_ldmMinMatch:
13452         bounds.lowerBound = ZSTD_LDM_MINMATCH_MIN;
13453         bounds.upperBound = ZSTD_LDM_MINMATCH_MAX;
13454         return bounds;
13455
13456     case ZSTD_c_ldmBucketSizeLog:
13457         bounds.lowerBound = ZSTD_LDM_BUCKETSIZELOG_MIN;
13458         bounds.upperBound = ZSTD_LDM_BUCKETSIZELOG_MAX;
13459         return bounds;
13460
13461     case ZSTD_c_ldmHashRateLog:
13462         bounds.lowerBound = ZSTD_LDM_HASHRATELOG_MIN;
13463         bounds.upperBound = ZSTD_LDM_HASHRATELOG_MAX;
13464         return bounds;
13465
13466     /* experimental parameters */
13467     case ZSTD_c_rsyncable:
13468         bounds.lowerBound = 0;
13469         bounds.upperBound = 1;
13470         return bounds;
13471
13472     case ZSTD_c_forceMaxWindow :
13473         bounds.lowerBound = 0;
13474         bounds.upperBound = 1;
13475         return bounds;
13476
13477     case ZSTD_c_format:
13478         ZSTD_STATIC_ASSERT(ZSTD_f_zstd1 < ZSTD_f_zstd1_magicless);
13479         bounds.lowerBound = ZSTD_f_zstd1;
13480         bounds.upperBound = ZSTD_f_zstd1_magicless;   /* note : how to ensure at compile time that this is the highest value enum ? */
13481         return bounds;
13482
13483     case ZSTD_c_forceAttachDict:
13484         ZSTD_STATIC_ASSERT(ZSTD_dictDefaultAttach < ZSTD_dictForceCopy);
13485         bounds.lowerBound = ZSTD_dictDefaultAttach;
13486         bounds.upperBound = ZSTD_dictForceLoad;       /* note : how to ensure at compile time that this is the highest value enum ? */
13487         return bounds;
13488
13489     case ZSTD_c_literalCompressionMode:
13490         ZSTD_STATIC_ASSERT(ZSTD_lcm_auto < ZSTD_lcm_huffman && ZSTD_lcm_huffman < ZSTD_lcm_uncompressed);
13491         bounds.lowerBound = ZSTD_lcm_auto;
13492         bounds.upperBound = ZSTD_lcm_uncompressed;
13493         return bounds;
13494
13495     case ZSTD_c_targetCBlockSize:
13496         bounds.lowerBound = ZSTD_TARGETCBLOCKSIZE_MIN;
13497         bounds.upperBound = ZSTD_TARGETCBLOCKSIZE_MAX;
13498         return bounds;
13499
13500     case ZSTD_c_srcSizeHint:
13501         bounds.lowerBound = ZSTD_SRCSIZEHINT_MIN;
13502         bounds.upperBound = ZSTD_SRCSIZEHINT_MAX;
13503         return bounds;
13504
13505     default:
13506         bounds.error = ERROR(parameter_unsupported);
13507         return bounds;
13508     }
13509 }
13510
13511 /* ZSTD_cParam_clampBounds:
13512  * Clamps the value into the bounded range.
13513  */
13514 static size_t ZSTD_cParam_clampBounds(ZSTD_cParameter cParam, int* value)
13515 {
13516     ZSTD_bounds const bounds = ZSTD_cParam_getBounds(cParam);
13517     if (ZSTD_isError(bounds.error)) return bounds.error;
13518     if (*value < bounds.lowerBound) *value = bounds.lowerBound;
13519     if (*value > bounds.upperBound) *value = bounds.upperBound;
13520     return 0;
13521 }
13522
13523 #define BOUNDCHECK(cParam, val) { \
13524     RETURN_ERROR_IF(!ZSTD_cParam_withinBounds(cParam,val), \
13525                     parameter_outOfBound, "Param out of bounds"); \
13526 }
13527
13528
13529 static int ZSTD_isUpdateAuthorized(ZSTD_cParameter param)
13530 {
13531     switch(param)
13532     {
13533     case ZSTD_c_compressionLevel:
13534     case ZSTD_c_hashLog:
13535     case ZSTD_c_chainLog:
13536     case ZSTD_c_searchLog:
13537     case ZSTD_c_minMatch:
13538     case ZSTD_c_targetLength:
13539     case ZSTD_c_strategy:
13540         return 1;
13541
13542     case ZSTD_c_format:
13543     case ZSTD_c_windowLog:
13544     case ZSTD_c_contentSizeFlag:
13545     case ZSTD_c_checksumFlag:
13546     case ZSTD_c_dictIDFlag:
13547     case ZSTD_c_forceMaxWindow :
13548     case ZSTD_c_nbWorkers:
13549     case ZSTD_c_jobSize:
13550     case ZSTD_c_overlapLog:
13551     case ZSTD_c_rsyncable:
13552     case ZSTD_c_enableLongDistanceMatching:
13553     case ZSTD_c_ldmHashLog:
13554     case ZSTD_c_ldmMinMatch:
13555     case ZSTD_c_ldmBucketSizeLog:
13556     case ZSTD_c_ldmHashRateLog:
13557     case ZSTD_c_forceAttachDict:
13558     case ZSTD_c_literalCompressionMode:
13559     case ZSTD_c_targetCBlockSize:
13560     case ZSTD_c_srcSizeHint:
13561     default:
13562         return 0;
13563     }
13564 }
13565
13566 size_t ZSTD_CCtx_setParameter(ZSTD_CCtx* cctx, ZSTD_cParameter param, int value)
13567 {
13568     DEBUGLOG(4, "ZSTD_CCtx_setParameter (%i, %i)", (int)param, value);
13569     if (cctx->streamStage != zcss_init) {
13570         if (ZSTD_isUpdateAuthorized(param)) {
13571             cctx->cParamsChanged = 1;
13572         } else {
13573             RETURN_ERROR(stage_wrong, "can only set params in ctx init stage");
13574     }   }
13575
13576     switch(param)
13577     {
13578     case ZSTD_c_nbWorkers:
13579         RETURN_ERROR_IF((value!=0) && cctx->staticSize, parameter_unsupported,
13580                         "MT not compatible with static alloc");
13581         break;
13582
13583     case ZSTD_c_compressionLevel:
13584     case ZSTD_c_windowLog:
13585     case ZSTD_c_hashLog:
13586     case ZSTD_c_chainLog:
13587     case ZSTD_c_searchLog:
13588     case ZSTD_c_minMatch:
13589     case ZSTD_c_targetLength:
13590     case ZSTD_c_strategy:
13591     case ZSTD_c_ldmHashRateLog:
13592     case ZSTD_c_format:
13593     case ZSTD_c_contentSizeFlag:
13594     case ZSTD_c_checksumFlag:
13595     case ZSTD_c_dictIDFlag:
13596     case ZSTD_c_forceMaxWindow:
13597     case ZSTD_c_forceAttachDict:
13598     case ZSTD_c_literalCompressionMode:
13599     case ZSTD_c_jobSize:
13600     case ZSTD_c_overlapLog:
13601     case ZSTD_c_rsyncable:
13602     case ZSTD_c_enableLongDistanceMatching:
13603     case ZSTD_c_ldmHashLog:
13604     case ZSTD_c_ldmMinMatch:
13605     case ZSTD_c_ldmBucketSizeLog:
13606     case ZSTD_c_targetCBlockSize:
13607     case ZSTD_c_srcSizeHint:
13608         break;
13609
13610     default: RETURN_ERROR(parameter_unsupported, "unknown parameter");
13611     }
13612     return ZSTD_CCtxParams_setParameter(&cctx->requestedParams, param, value);
13613 }
13614
13615 size_t ZSTD_CCtxParams_setParameter(ZSTD_CCtx_params* CCtxParams,
13616                                     ZSTD_cParameter param, int value)
13617 {
13618     DEBUGLOG(4, "ZSTD_CCtxParams_setParameter (%i, %i)", (int)param, value);
13619     switch(param)
13620     {
13621     case ZSTD_c_format :
13622         BOUNDCHECK(ZSTD_c_format, value);
13623         CCtxParams->format = (ZSTD_format_e)value;
13624         return (size_t)CCtxParams->format;
13625
13626     case ZSTD_c_compressionLevel : {
13627         FORWARD_IF_ERROR(ZSTD_cParam_clampBounds(param, &value), "");
13628         if (value) {  /* 0 : does not change current level */
13629             CCtxParams->compressionLevel = value;
13630         }
13631         if (CCtxParams->compressionLevel >= 0) return (size_t)CCtxParams->compressionLevel;
13632         return 0;  /* return type (size_t) cannot represent negative values */
13633     }
13634
13635     case ZSTD_c_windowLog :
13636         if (value!=0)   /* 0 => use default */
13637             BOUNDCHECK(ZSTD_c_windowLog, value);
13638         CCtxParams->cParams.windowLog = (U32)value;
13639         return CCtxParams->cParams.windowLog;
13640
13641     case ZSTD_c_hashLog :
13642         if (value!=0)   /* 0 => use default */
13643             BOUNDCHECK(ZSTD_c_hashLog, value);
13644         CCtxParams->cParams.hashLog = (U32)value;
13645         return CCtxParams->cParams.hashLog;
13646
13647     case ZSTD_c_chainLog :
13648         if (value!=0)   /* 0 => use default */
13649             BOUNDCHECK(ZSTD_c_chainLog, value);
13650         CCtxParams->cParams.chainLog = (U32)value;
13651         return CCtxParams->cParams.chainLog;
13652
13653     case ZSTD_c_searchLog :
13654         if (value!=0)   /* 0 => use default */
13655             BOUNDCHECK(ZSTD_c_searchLog, value);
13656         CCtxParams->cParams.searchLog = (U32)value;
13657         return (size_t)value;
13658
13659     case ZSTD_c_minMatch :
13660         if (value!=0)   /* 0 => use default */
13661             BOUNDCHECK(ZSTD_c_minMatch, value);
13662         CCtxParams->cParams.minMatch = value;
13663         return CCtxParams->cParams.minMatch;
13664
13665     case ZSTD_c_targetLength :
13666         BOUNDCHECK(ZSTD_c_targetLength, value);
13667         CCtxParams->cParams.targetLength = value;
13668         return CCtxParams->cParams.targetLength;
13669
13670     case ZSTD_c_strategy :
13671         if (value!=0)   /* 0 => use default */
13672             BOUNDCHECK(ZSTD_c_strategy, value);
13673         CCtxParams->cParams.strategy = (ZSTD_strategy)value;
13674         return (size_t)CCtxParams->cParams.strategy;
13675
13676     case ZSTD_c_contentSizeFlag :
13677         /* Content size written in frame header _when known_ (default:1) */
13678         DEBUGLOG(4, "set content size flag = %u", (value!=0));
13679         CCtxParams->fParams.contentSizeFlag = value != 0;
13680         return CCtxParams->fParams.contentSizeFlag;
13681
13682     case ZSTD_c_checksumFlag :
13683         /* A 32-bits content checksum will be calculated and written at end of frame (default:0) */
13684         CCtxParams->fParams.checksumFlag = value != 0;
13685         return CCtxParams->fParams.checksumFlag;
13686
13687     case ZSTD_c_dictIDFlag : /* When applicable, dictionary's dictID is provided in frame header (default:1) */
13688         DEBUGLOG(4, "set dictIDFlag = %u", (value!=0));
13689         CCtxParams->fParams.noDictIDFlag = !value;
13690         return !CCtxParams->fParams.noDictIDFlag;
13691
13692     case ZSTD_c_forceMaxWindow :
13693         CCtxParams->forceWindow = (value != 0);
13694         return CCtxParams->forceWindow;
13695
13696     case ZSTD_c_forceAttachDict : {
13697         const ZSTD_dictAttachPref_e pref = (ZSTD_dictAttachPref_e)value;
13698         BOUNDCHECK(ZSTD_c_forceAttachDict, pref);
13699         CCtxParams->attachDictPref = pref;
13700         return CCtxParams->attachDictPref;
13701     }
13702
13703     case ZSTD_c_literalCompressionMode : {
13704         const ZSTD_literalCompressionMode_e lcm = (ZSTD_literalCompressionMode_e)value;
13705         BOUNDCHECK(ZSTD_c_literalCompressionMode, lcm);
13706         CCtxParams->literalCompressionMode = lcm;
13707         return CCtxParams->literalCompressionMode;
13708     }
13709
13710     case ZSTD_c_nbWorkers :
13711 #ifndef ZSTD_MULTITHREAD
13712         RETURN_ERROR_IF(value!=0, parameter_unsupported, "not compiled with multithreading");
13713         return 0;
13714 #else
13715         FORWARD_IF_ERROR(ZSTD_cParam_clampBounds(param, &value), "");
13716         CCtxParams->nbWorkers = value;
13717         return CCtxParams->nbWorkers;
13718 #endif
13719
13720     case ZSTD_c_jobSize :
13721 #ifndef ZSTD_MULTITHREAD
13722         RETURN_ERROR_IF(value!=0, parameter_unsupported, "not compiled with multithreading");
13723         return 0;
13724 #else
13725         /* Adjust to the minimum non-default value. */
13726         if (value != 0 && value < ZSTDMT_JOBSIZE_MIN)
13727             value = ZSTDMT_JOBSIZE_MIN;
13728         FORWARD_IF_ERROR(ZSTD_cParam_clampBounds(param, &value), "");
13729         assert(value >= 0);
13730         CCtxParams->jobSize = value;
13731         return CCtxParams->jobSize;
13732 #endif
13733
13734     case ZSTD_c_overlapLog :
13735 #ifndef ZSTD_MULTITHREAD
13736         RETURN_ERROR_IF(value!=0, parameter_unsupported, "not compiled with multithreading");
13737         return 0;
13738 #else
13739         FORWARD_IF_ERROR(ZSTD_cParam_clampBounds(ZSTD_c_overlapLog, &value), "");
13740         CCtxParams->overlapLog = value;
13741         return CCtxParams->overlapLog;
13742 #endif
13743
13744     case ZSTD_c_rsyncable :
13745 #ifndef ZSTD_MULTITHREAD
13746         RETURN_ERROR_IF(value!=0, parameter_unsupported, "not compiled with multithreading");
13747         return 0;
13748 #else
13749         FORWARD_IF_ERROR(ZSTD_cParam_clampBounds(ZSTD_c_overlapLog, &value), "");
13750         CCtxParams->rsyncable = value;
13751         return CCtxParams->rsyncable;
13752 #endif
13753
13754     case ZSTD_c_enableLongDistanceMatching :
13755         CCtxParams->ldmParams.enableLdm = (value!=0);
13756         return CCtxParams->ldmParams.enableLdm;
13757
13758     case ZSTD_c_ldmHashLog :
13759         if (value!=0)   /* 0 ==> auto */
13760             BOUNDCHECK(ZSTD_c_ldmHashLog, value);
13761         CCtxParams->ldmParams.hashLog = value;
13762         return CCtxParams->ldmParams.hashLog;
13763
13764     case ZSTD_c_ldmMinMatch :
13765         if (value!=0)   /* 0 ==> default */
13766             BOUNDCHECK(ZSTD_c_ldmMinMatch, value);
13767         CCtxParams->ldmParams.minMatchLength = value;
13768         return CCtxParams->ldmParams.minMatchLength;
13769
13770     case ZSTD_c_ldmBucketSizeLog :
13771         if (value!=0)   /* 0 ==> default */
13772             BOUNDCHECK(ZSTD_c_ldmBucketSizeLog, value);
13773         CCtxParams->ldmParams.bucketSizeLog = value;
13774         return CCtxParams->ldmParams.bucketSizeLog;
13775
13776     case ZSTD_c_ldmHashRateLog :
13777         RETURN_ERROR_IF(value > ZSTD_WINDOWLOG_MAX - ZSTD_HASHLOG_MIN,
13778                         parameter_outOfBound, "Param out of bounds!");
13779         CCtxParams->ldmParams.hashRateLog = value;
13780         return CCtxParams->ldmParams.hashRateLog;
13781
13782     case ZSTD_c_targetCBlockSize :
13783         if (value!=0)   /* 0 ==> default */
13784             BOUNDCHECK(ZSTD_c_targetCBlockSize, value);
13785         CCtxParams->targetCBlockSize = value;
13786         return CCtxParams->targetCBlockSize;
13787
13788     case ZSTD_c_srcSizeHint :
13789         if (value!=0)    /* 0 ==> default */
13790             BOUNDCHECK(ZSTD_c_srcSizeHint, value);
13791         CCtxParams->srcSizeHint = value;
13792         return CCtxParams->srcSizeHint;
13793
13794     default: RETURN_ERROR(parameter_unsupported, "unknown parameter");
13795     }
13796 }
13797
13798 size_t ZSTD_CCtx_getParameter(ZSTD_CCtx* cctx, ZSTD_cParameter param, int* value)
13799 {
13800     return ZSTD_CCtxParams_getParameter(&cctx->requestedParams, param, value);
13801 }
13802
13803 size_t ZSTD_CCtxParams_getParameter(
13804         ZSTD_CCtx_params* CCtxParams, ZSTD_cParameter param, int* value)
13805 {
13806     switch(param)
13807     {
13808     case ZSTD_c_format :
13809         *value = CCtxParams->format;
13810         break;
13811     case ZSTD_c_compressionLevel :
13812         *value = CCtxParams->compressionLevel;
13813         break;
13814     case ZSTD_c_windowLog :
13815         *value = (int)CCtxParams->cParams.windowLog;
13816         break;
13817     case ZSTD_c_hashLog :
13818         *value = (int)CCtxParams->cParams.hashLog;
13819         break;
13820     case ZSTD_c_chainLog :
13821         *value = (int)CCtxParams->cParams.chainLog;
13822         break;
13823     case ZSTD_c_searchLog :
13824         *value = CCtxParams->cParams.searchLog;
13825         break;
13826     case ZSTD_c_minMatch :
13827         *value = CCtxParams->cParams.minMatch;
13828         break;
13829     case ZSTD_c_targetLength :
13830         *value = CCtxParams->cParams.targetLength;
13831         break;
13832     case ZSTD_c_strategy :
13833         *value = (unsigned)CCtxParams->cParams.strategy;
13834         break;
13835     case ZSTD_c_contentSizeFlag :
13836         *value = CCtxParams->fParams.contentSizeFlag;
13837         break;
13838     case ZSTD_c_checksumFlag :
13839         *value = CCtxParams->fParams.checksumFlag;
13840         break;
13841     case ZSTD_c_dictIDFlag :
13842         *value = !CCtxParams->fParams.noDictIDFlag;
13843         break;
13844     case ZSTD_c_forceMaxWindow :
13845         *value = CCtxParams->forceWindow;
13846         break;
13847     case ZSTD_c_forceAttachDict :
13848         *value = CCtxParams->attachDictPref;
13849         break;
13850     case ZSTD_c_literalCompressionMode :
13851         *value = CCtxParams->literalCompressionMode;
13852         break;
13853     case ZSTD_c_nbWorkers :
13854 #ifndef ZSTD_MULTITHREAD
13855         assert(CCtxParams->nbWorkers == 0);
13856 #endif
13857         *value = CCtxParams->nbWorkers;
13858         break;
13859     case ZSTD_c_jobSize :
13860 #ifndef ZSTD_MULTITHREAD
13861         RETURN_ERROR(parameter_unsupported, "not compiled with multithreading");
13862 #else
13863         assert(CCtxParams->jobSize <= INT_MAX);
13864         *value = (int)CCtxParams->jobSize;
13865         break;
13866 #endif
13867     case ZSTD_c_overlapLog :
13868 #ifndef ZSTD_MULTITHREAD
13869         RETURN_ERROR(parameter_unsupported, "not compiled with multithreading");
13870 #else
13871         *value = CCtxParams->overlapLog;
13872         break;
13873 #endif
13874     case ZSTD_c_rsyncable :
13875 #ifndef ZSTD_MULTITHREAD
13876         RETURN_ERROR(parameter_unsupported, "not compiled with multithreading");
13877 #else
13878         *value = CCtxParams->rsyncable;
13879         break;
13880 #endif
13881     case ZSTD_c_enableLongDistanceMatching :
13882         *value = CCtxParams->ldmParams.enableLdm;
13883         break;
13884     case ZSTD_c_ldmHashLog :
13885         *value = CCtxParams->ldmParams.hashLog;
13886         break;
13887     case ZSTD_c_ldmMinMatch :
13888         *value = CCtxParams->ldmParams.minMatchLength;
13889         break;
13890     case ZSTD_c_ldmBucketSizeLog :
13891         *value = CCtxParams->ldmParams.bucketSizeLog;
13892         break;
13893     case ZSTD_c_ldmHashRateLog :
13894         *value = CCtxParams->ldmParams.hashRateLog;
13895         break;
13896     case ZSTD_c_targetCBlockSize :
13897         *value = (int)CCtxParams->targetCBlockSize;
13898         break;
13899     case ZSTD_c_srcSizeHint :
13900         *value = (int)CCtxParams->srcSizeHint;
13901         break;
13902     default: RETURN_ERROR(parameter_unsupported, "unknown parameter");
13903     }
13904     return 0;
13905 }
13906
13907 /** ZSTD_CCtx_setParametersUsingCCtxParams() :
13908  *  just applies `params` into `cctx`
13909  *  no action is performed, parameters are merely stored.
13910  *  If ZSTDMT is enabled, parameters are pushed to cctx->mtctx.
13911  *    This is possible even if a compression is ongoing.
13912  *    In which case, new parameters will be applied on the fly, starting with next compression job.
13913  */
13914 size_t ZSTD_CCtx_setParametersUsingCCtxParams(
13915         ZSTD_CCtx* cctx, const ZSTD_CCtx_params* params)
13916 {
13917     DEBUGLOG(4, "ZSTD_CCtx_setParametersUsingCCtxParams");
13918     RETURN_ERROR_IF(cctx->streamStage != zcss_init, stage_wrong,
13919                     "The context is in the wrong stage!");
13920     RETURN_ERROR_IF(cctx->cdict, stage_wrong,
13921                     "Can't override parameters with cdict attached (some must "
13922                     "be inherited from the cdict).");
13923
13924     cctx->requestedParams = *params;
13925     return 0;
13926 }
13927
13928 ZSTDLIB_API size_t ZSTD_CCtx_setPledgedSrcSize(ZSTD_CCtx* cctx, unsigned long long pledgedSrcSize)
13929 {
13930     DEBUGLOG(4, "ZSTD_CCtx_setPledgedSrcSize to %u bytes", (U32)pledgedSrcSize);
13931     RETURN_ERROR_IF(cctx->streamStage != zcss_init, stage_wrong,
13932                     "Can't set pledgedSrcSize when not in init stage.");
13933     cctx->pledgedSrcSizePlusOne = pledgedSrcSize+1;
13934     return 0;
13935 }
13936
13937 /**
13938  * Initializes the local dict using the requested parameters.
13939  * NOTE: This does not use the pledged src size, because it may be used for more
13940  * than one compression.
13941  */
13942 static size_t ZSTD_initLocalDict(ZSTD_CCtx* cctx)
13943 {
13944     ZSTD_localDict* const dl = &cctx->localDict;
13945     ZSTD_compressionParameters const cParams = ZSTD_getCParamsFromCCtxParams(
13946             &cctx->requestedParams, ZSTD_CONTENTSIZE_UNKNOWN, dl->dictSize);
13947     if (dl->dict == NULL) {
13948         /* No local dictionary. */
13949         assert(dl->dictBuffer == NULL);
13950         assert(dl->cdict == NULL);
13951         assert(dl->dictSize == 0);
13952         return 0;
13953     }
13954     if (dl->cdict != NULL) {
13955         assert(cctx->cdict == dl->cdict);
13956         /* Local dictionary already initialized. */
13957         return 0;
13958     }
13959     assert(dl->dictSize > 0);
13960     assert(cctx->cdict == NULL);
13961     assert(cctx->prefixDict.dict == NULL);
13962
13963     dl->cdict = ZSTD_createCDict_advanced(
13964             dl->dict,
13965             dl->dictSize,
13966             ZSTD_dlm_byRef,
13967             dl->dictContentType,
13968             cParams,
13969             cctx->customMem);
13970     RETURN_ERROR_IF(!dl->cdict, memory_allocation, "ZSTD_createCDict_advanced failed");
13971     cctx->cdict = dl->cdict;
13972     return 0;
13973 }
13974
13975 size_t ZSTD_CCtx_loadDictionary_advanced(
13976         ZSTD_CCtx* cctx, const void* dict, size_t dictSize,
13977         ZSTD_dictLoadMethod_e dictLoadMethod, ZSTD_dictContentType_e dictContentType)
13978 {
13979     RETURN_ERROR_IF(cctx->streamStage != zcss_init, stage_wrong,
13980                     "Can't load a dictionary when ctx is not in init stage.");
13981     RETURN_ERROR_IF(cctx->staticSize, memory_allocation,
13982                     "no malloc for static CCtx");
13983     DEBUGLOG(4, "ZSTD_CCtx_loadDictionary_advanced (size: %u)", (U32)dictSize);
13984     ZSTD_clearAllDicts(cctx);  /* in case one already exists */
13985     if (dict == NULL || dictSize == 0)  /* no dictionary mode */
13986         return 0;
13987     if (dictLoadMethod == ZSTD_dlm_byRef) {
13988         cctx->localDict.dict = dict;
13989     } else {
13990         void* dictBuffer = ZSTD_malloc(dictSize, cctx->customMem);
13991         RETURN_ERROR_IF(!dictBuffer, memory_allocation, "NULL pointer!");
13992         memcpy(dictBuffer, dict, dictSize);
13993         cctx->localDict.dictBuffer = dictBuffer;
13994         cctx->localDict.dict = dictBuffer;
13995     }
13996     cctx->localDict.dictSize = dictSize;
13997     cctx->localDict.dictContentType = dictContentType;
13998     return 0;
13999 }
14000
14001 ZSTDLIB_API size_t ZSTD_CCtx_loadDictionary_byReference(
14002       ZSTD_CCtx* cctx, const void* dict, size_t dictSize)
14003 {
14004     return ZSTD_CCtx_loadDictionary_advanced(
14005             cctx, dict, dictSize, ZSTD_dlm_byRef, ZSTD_dct_auto);
14006 }
14007
14008 ZSTDLIB_API size_t ZSTD_CCtx_loadDictionary(ZSTD_CCtx* cctx, const void* dict, size_t dictSize)
14009 {
14010     return ZSTD_CCtx_loadDictionary_advanced(
14011             cctx, dict, dictSize, ZSTD_dlm_byCopy, ZSTD_dct_auto);
14012 }
14013
14014
14015 size_t ZSTD_CCtx_refCDict(ZSTD_CCtx* cctx, const ZSTD_CDict* cdict)
14016 {
14017     RETURN_ERROR_IF(cctx->streamStage != zcss_init, stage_wrong,
14018                     "Can't ref a dict when ctx not in init stage.");
14019     /* Free the existing local cdict (if any) to save memory. */
14020     ZSTD_clearAllDicts(cctx);
14021     cctx->cdict = cdict;
14022     return 0;
14023 }
14024
14025 size_t ZSTD_CCtx_refPrefix(ZSTD_CCtx* cctx, const void* prefix, size_t prefixSize)
14026 {
14027     return ZSTD_CCtx_refPrefix_advanced(cctx, prefix, prefixSize, ZSTD_dct_rawContent);
14028 }
14029
14030 size_t ZSTD_CCtx_refPrefix_advanced(
14031         ZSTD_CCtx* cctx, const void* prefix, size_t prefixSize, ZSTD_dictContentType_e dictContentType)
14032 {
14033     RETURN_ERROR_IF(cctx->streamStage != zcss_init, stage_wrong,
14034                     "Can't ref a prefix when ctx not in init stage.");
14035     ZSTD_clearAllDicts(cctx);
14036     if (prefix != NULL && prefixSize > 0) {
14037         cctx->prefixDict.dict = prefix;
14038         cctx->prefixDict.dictSize = prefixSize;
14039         cctx->prefixDict.dictContentType = dictContentType;
14040     }
14041     return 0;
14042 }
14043
14044 /*! ZSTD_CCtx_reset() :
14045  *  Also dumps dictionary */
14046 size_t ZSTD_CCtx_reset(ZSTD_CCtx* cctx, ZSTD_ResetDirective reset)
14047 {
14048     if ( (reset == ZSTD_reset_session_only)
14049       || (reset == ZSTD_reset_session_and_parameters) ) {
14050         cctx->streamStage = zcss_init;
14051         cctx->pledgedSrcSizePlusOne = 0;
14052     }
14053     if ( (reset == ZSTD_reset_parameters)
14054       || (reset == ZSTD_reset_session_and_parameters) ) {
14055         RETURN_ERROR_IF(cctx->streamStage != zcss_init, stage_wrong,
14056                         "Can't reset parameters only when not in init stage.");
14057         ZSTD_clearAllDicts(cctx);
14058         return ZSTD_CCtxParams_reset(&cctx->requestedParams);
14059     }
14060     return 0;
14061 }
14062
14063
14064 /** ZSTD_checkCParams() :
14065     control CParam values remain within authorized range.
14066     @return : 0, or an error code if one value is beyond authorized range */
14067 size_t ZSTD_checkCParams(ZSTD_compressionParameters cParams)
14068 {
14069     BOUNDCHECK(ZSTD_c_windowLog, (int)cParams.windowLog);
14070     BOUNDCHECK(ZSTD_c_chainLog,  (int)cParams.chainLog);
14071     BOUNDCHECK(ZSTD_c_hashLog,   (int)cParams.hashLog);
14072     BOUNDCHECK(ZSTD_c_searchLog, (int)cParams.searchLog);
14073     BOUNDCHECK(ZSTD_c_minMatch,  (int)cParams.minMatch);
14074     BOUNDCHECK(ZSTD_c_targetLength,(int)cParams.targetLength);
14075     BOUNDCHECK(ZSTD_c_strategy,  cParams.strategy);
14076     return 0;
14077 }
14078
14079 /** ZSTD_clampCParams() :
14080  *  make CParam values within valid range.
14081  *  @return : valid CParams */
14082 static ZSTD_compressionParameters
14083 ZSTD_clampCParams(ZSTD_compressionParameters cParams)
14084 {
14085 #   define CLAMP_TYPE(cParam, val, type) {                                \
14086         ZSTD_bounds const bounds = ZSTD_cParam_getBounds(cParam);         \
14087         if ((int)val<bounds.lowerBound) val=(type)bounds.lowerBound;      \
14088         else if ((int)val>bounds.upperBound) val=(type)bounds.upperBound; \
14089     }
14090 #   define CLAMP(cParam, val) CLAMP_TYPE(cParam, val, unsigned)
14091     CLAMP(ZSTD_c_windowLog, cParams.windowLog);
14092     CLAMP(ZSTD_c_chainLog,  cParams.chainLog);
14093     CLAMP(ZSTD_c_hashLog,   cParams.hashLog);
14094     CLAMP(ZSTD_c_searchLog, cParams.searchLog);
14095     CLAMP(ZSTD_c_minMatch,  cParams.minMatch);
14096     CLAMP(ZSTD_c_targetLength,cParams.targetLength);
14097     CLAMP_TYPE(ZSTD_c_strategy,cParams.strategy, ZSTD_strategy);
14098     return cParams;
14099 }
14100
14101 /** ZSTD_cycleLog() :
14102  *  condition for correct operation : hashLog > 1 */
14103 U32 ZSTD_cycleLog(U32 hashLog, ZSTD_strategy strat)
14104 {
14105     U32 const btScale = ((U32)strat >= (U32)ZSTD_btlazy2);
14106     return hashLog - btScale;
14107 }
14108
14109 /** ZSTD_adjustCParams_internal() :
14110  *  optimize `cPar` for a specified input (`srcSize` and `dictSize`).
14111  *  mostly downsize to reduce memory consumption and initialization latency.
14112  * `srcSize` can be ZSTD_CONTENTSIZE_UNKNOWN when not known.
14113  *  note : `srcSize==0` means 0!
14114  *  condition : cPar is presumed validated (can be checked using ZSTD_checkCParams()). */
14115 static ZSTD_compressionParameters
14116 ZSTD_adjustCParams_internal(ZSTD_compressionParameters cPar,
14117                             unsigned long long srcSize,
14118                             size_t dictSize)
14119 {
14120     static const U64 minSrcSize = 513; /* (1<<9) + 1 */
14121     static const U64 maxWindowResize = 1ULL << (ZSTD_WINDOWLOG_MAX-1);
14122     assert(ZSTD_checkCParams(cPar)==0);
14123
14124     if (dictSize && srcSize == ZSTD_CONTENTSIZE_UNKNOWN)
14125         srcSize = minSrcSize;
14126
14127     /* resize windowLog if input is small enough, to use less memory */
14128     if ( (srcSize < maxWindowResize)
14129       && (dictSize < maxWindowResize) )  {
14130         U32 const tSize = (U32)(srcSize + dictSize);
14131         static U32 const hashSizeMin = 1 << ZSTD_HASHLOG_MIN;
14132         U32 const srcLog = (tSize < hashSizeMin) ? ZSTD_HASHLOG_MIN :
14133                             ZSTD_highbit32(tSize-1) + 1;
14134         if (cPar.windowLog > srcLog) cPar.windowLog = srcLog;
14135     }
14136     if (cPar.hashLog > cPar.windowLog+1) cPar.hashLog = cPar.windowLog+1;
14137     {   U32 const cycleLog = ZSTD_cycleLog(cPar.chainLog, cPar.strategy);
14138         if (cycleLog > cPar.windowLog)
14139             cPar.chainLog -= (cycleLog - cPar.windowLog);
14140     }
14141
14142     if (cPar.windowLog < ZSTD_WINDOWLOG_ABSOLUTEMIN)
14143         cPar.windowLog = ZSTD_WINDOWLOG_ABSOLUTEMIN;  /* minimum wlog required for valid frame header */
14144
14145     return cPar;
14146 }
14147
14148 ZSTD_compressionParameters
14149 ZSTD_adjustCParams(ZSTD_compressionParameters cPar,
14150                    unsigned long long srcSize,
14151                    size_t dictSize)
14152 {
14153     cPar = ZSTD_clampCParams(cPar);   /* resulting cPar is necessarily valid (all parameters within range) */
14154     if (srcSize == 0) srcSize = ZSTD_CONTENTSIZE_UNKNOWN;
14155     return ZSTD_adjustCParams_internal(cPar, srcSize, dictSize);
14156 }
14157
14158 static ZSTD_compressionParameters ZSTD_getCParams_internal(int compressionLevel, unsigned long long srcSizeHint, size_t dictSize);
14159 static ZSTD_parameters ZSTD_getParams_internal(int compressionLevel, unsigned long long srcSizeHint, size_t dictSize);
14160
14161 ZSTD_compressionParameters ZSTD_getCParamsFromCCtxParams(
14162         const ZSTD_CCtx_params* CCtxParams, U64 srcSizeHint, size_t dictSize)
14163 {
14164     ZSTD_compressionParameters cParams;
14165     if (srcSizeHint == ZSTD_CONTENTSIZE_UNKNOWN && CCtxParams->srcSizeHint > 0) {
14166       srcSizeHint = CCtxParams->srcSizeHint;
14167     }
14168     cParams = ZSTD_getCParams_internal(CCtxParams->compressionLevel, srcSizeHint, dictSize);
14169     if (CCtxParams->ldmParams.enableLdm) cParams.windowLog = ZSTD_LDM_DEFAULT_WINDOW_LOG;
14170     if (CCtxParams->cParams.windowLog) cParams.windowLog = CCtxParams->cParams.windowLog;
14171     if (CCtxParams->cParams.hashLog) cParams.hashLog = CCtxParams->cParams.hashLog;
14172     if (CCtxParams->cParams.chainLog) cParams.chainLog = CCtxParams->cParams.chainLog;
14173     if (CCtxParams->cParams.searchLog) cParams.searchLog = CCtxParams->cParams.searchLog;
14174     if (CCtxParams->cParams.minMatch) cParams.minMatch = CCtxParams->cParams.minMatch;
14175     if (CCtxParams->cParams.targetLength) cParams.targetLength = CCtxParams->cParams.targetLength;
14176     if (CCtxParams->cParams.strategy) cParams.strategy = CCtxParams->cParams.strategy;
14177     assert(!ZSTD_checkCParams(cParams));
14178     /* srcSizeHint == 0 means 0 */
14179     return ZSTD_adjustCParams_internal(cParams, srcSizeHint, dictSize);
14180 }
14181
14182 static size_t
14183 ZSTD_sizeof_matchState(const ZSTD_compressionParameters* const cParams,
14184                        const U32 forCCtx)
14185 {
14186     size_t const chainSize = (cParams->strategy == ZSTD_fast) ? 0 : ((size_t)1 << cParams->chainLog);
14187     size_t const hSize = ((size_t)1) << cParams->hashLog;
14188     U32    const hashLog3 = (forCCtx && cParams->minMatch==3) ? MIN(ZSTD_HASHLOG3_MAX, cParams->windowLog) : 0;
14189     size_t const h3Size = hashLog3 ? ((size_t)1) << hashLog3 : 0;
14190     /* We don't use ZSTD_cwksp_alloc_size() here because the tables aren't
14191      * surrounded by redzones in ASAN. */
14192     size_t const tableSpace = chainSize * sizeof(U32)
14193                             + hSize * sizeof(U32)
14194                             + h3Size * sizeof(U32);
14195     size_t const optPotentialSpace =
14196         ZSTD_cwksp_alloc_size((MaxML+1) * sizeof(U32))
14197       + ZSTD_cwksp_alloc_size((MaxLL+1) * sizeof(U32))
14198       + ZSTD_cwksp_alloc_size((MaxOff+1) * sizeof(U32))
14199       + ZSTD_cwksp_alloc_size((1<<Litbits) * sizeof(U32))
14200       + ZSTD_cwksp_alloc_size((ZSTD_OPT_NUM+1) * sizeof(ZSTD_match_t))
14201       + ZSTD_cwksp_alloc_size((ZSTD_OPT_NUM+1) * sizeof(ZSTD_optimal_t));
14202     size_t const optSpace = (forCCtx && (cParams->strategy >= ZSTD_btopt))
14203                                 ? optPotentialSpace
14204                                 : 0;
14205     DEBUGLOG(4, "chainSize: %u - hSize: %u - h3Size: %u",
14206                 (U32)chainSize, (U32)hSize, (U32)h3Size);
14207     return tableSpace + optSpace;
14208 }
14209
14210 size_t ZSTD_estimateCCtxSize_usingCCtxParams(const ZSTD_CCtx_params* params)
14211 {
14212     RETURN_ERROR_IF(params->nbWorkers > 0, GENERIC, "Estimate CCtx size is supported for single-threaded compression only.");
14213     {   ZSTD_compressionParameters const cParams =
14214                 ZSTD_getCParamsFromCCtxParams(params, ZSTD_CONTENTSIZE_UNKNOWN, 0);
14215         size_t const blockSize = MIN(ZSTD_BLOCKSIZE_MAX, (size_t)1 << cParams.windowLog);
14216         U32    const divider = (cParams.minMatch==3) ? 3 : 4;
14217         size_t const maxNbSeq = blockSize / divider;
14218         size_t const tokenSpace = ZSTD_cwksp_alloc_size(WILDCOPY_OVERLENGTH + blockSize)
14219                                 + ZSTD_cwksp_alloc_size(maxNbSeq * sizeof(seqDef))
14220                                 + 3 * ZSTD_cwksp_alloc_size(maxNbSeq * sizeof(BYTE));
14221         size_t const entropySpace = ZSTD_cwksp_alloc_size(HUF_WORKSPACE_SIZE);
14222         size_t const blockStateSpace = 2 * ZSTD_cwksp_alloc_size(sizeof(ZSTD_compressedBlockState_t));
14223         size_t const matchStateSize = ZSTD_sizeof_matchState(&cParams, /* forCCtx */ 1);
14224
14225         size_t const ldmSpace = ZSTD_ldm_getTableSize(params->ldmParams);
14226         size_t const ldmSeqSpace = ZSTD_cwksp_alloc_size(ZSTD_ldm_getMaxNbSeq(params->ldmParams, blockSize) * sizeof(rawSeq));
14227
14228         /* estimateCCtxSize is for one-shot compression. So no buffers should
14229          * be needed. However, we still allocate two 0-sized buffers, which can
14230          * take space under ASAN. */
14231         size_t const bufferSpace = ZSTD_cwksp_alloc_size(0)
14232                                  + ZSTD_cwksp_alloc_size(0);
14233
14234         size_t const cctxSpace = ZSTD_cwksp_alloc_size(sizeof(ZSTD_CCtx));
14235
14236         size_t const neededSpace =
14237             cctxSpace +
14238             entropySpace +
14239             blockStateSpace +
14240             ldmSpace +
14241             ldmSeqSpace +
14242             matchStateSize +
14243             tokenSpace +
14244             bufferSpace;
14245
14246         DEBUGLOG(5, "estimate workspace : %u", (U32)neededSpace);
14247         return neededSpace;
14248     }
14249 }
14250
14251 size_t ZSTD_estimateCCtxSize_usingCParams(ZSTD_compressionParameters cParams)
14252 {
14253     ZSTD_CCtx_params const params = ZSTD_makeCCtxParamsFromCParams(cParams);
14254     return ZSTD_estimateCCtxSize_usingCCtxParams(&params);
14255 }
14256
14257 static size_t ZSTD_estimateCCtxSize_internal(int compressionLevel)
14258 {
14259     ZSTD_compressionParameters const cParams = ZSTD_getCParams_internal(compressionLevel, ZSTD_CONTENTSIZE_UNKNOWN, 0);
14260     return ZSTD_estimateCCtxSize_usingCParams(cParams);
14261 }
14262
14263 size_t ZSTD_estimateCCtxSize(int compressionLevel)
14264 {
14265     int level;
14266     size_t memBudget = 0;
14267     for (level=MIN(compressionLevel, 1); level<=compressionLevel; level++) {
14268         size_t const newMB = ZSTD_estimateCCtxSize_internal(level);
14269         if (newMB > memBudget) memBudget = newMB;
14270     }
14271     return memBudget;
14272 }
14273
14274 size_t ZSTD_estimateCStreamSize_usingCCtxParams(const ZSTD_CCtx_params* params)
14275 {
14276     RETURN_ERROR_IF(params->nbWorkers > 0, GENERIC, "Estimate CCtx size is supported for single-threaded compression only.");
14277     {   ZSTD_compressionParameters const cParams =
14278                 ZSTD_getCParamsFromCCtxParams(params, ZSTD_CONTENTSIZE_UNKNOWN, 0);
14279         size_t const CCtxSize = ZSTD_estimateCCtxSize_usingCCtxParams(params);
14280         size_t const blockSize = MIN(ZSTD_BLOCKSIZE_MAX, (size_t)1 << cParams.windowLog);
14281         size_t const inBuffSize = ((size_t)1 << cParams.windowLog) + blockSize;
14282         size_t const outBuffSize = ZSTD_compressBound(blockSize) + 1;
14283         size_t const streamingSize = ZSTD_cwksp_alloc_size(inBuffSize)
14284                                    + ZSTD_cwksp_alloc_size(outBuffSize);
14285
14286         return CCtxSize + streamingSize;
14287     }
14288 }
14289
14290 size_t ZSTD_estimateCStreamSize_usingCParams(ZSTD_compressionParameters cParams)
14291 {
14292     ZSTD_CCtx_params const params = ZSTD_makeCCtxParamsFromCParams(cParams);
14293     return ZSTD_estimateCStreamSize_usingCCtxParams(&params);
14294 }
14295
14296 static size_t ZSTD_estimateCStreamSize_internal(int compressionLevel)
14297 {
14298     ZSTD_compressionParameters const cParams = ZSTD_getCParams_internal(compressionLevel, ZSTD_CONTENTSIZE_UNKNOWN, 0);
14299     return ZSTD_estimateCStreamSize_usingCParams(cParams);
14300 }
14301
14302 size_t ZSTD_estimateCStreamSize(int compressionLevel)
14303 {
14304     int level;
14305     size_t memBudget = 0;
14306     for (level=MIN(compressionLevel, 1); level<=compressionLevel; level++) {
14307         size_t const newMB = ZSTD_estimateCStreamSize_internal(level);
14308         if (newMB > memBudget) memBudget = newMB;
14309     }
14310     return memBudget;
14311 }
14312
14313 /* ZSTD_getFrameProgression():
14314  * tells how much data has been consumed (input) and produced (output) for current frame.
14315  * able to count progression inside worker threads (non-blocking mode).
14316  */
14317 ZSTD_frameProgression ZSTD_getFrameProgression(const ZSTD_CCtx* cctx)
14318 {
14319 #ifdef ZSTD_MULTITHREAD
14320     if (cctx->appliedParams.nbWorkers > 0) {
14321         return ZSTDMT_getFrameProgression(cctx->mtctx);
14322     }
14323 #endif
14324     {   ZSTD_frameProgression fp;
14325         size_t const buffered = (cctx->inBuff == NULL) ? 0 :
14326                                 cctx->inBuffPos - cctx->inToCompress;
14327         if (buffered) assert(cctx->inBuffPos >= cctx->inToCompress);
14328         assert(buffered <= ZSTD_BLOCKSIZE_MAX);
14329         fp.ingested = cctx->consumedSrcSize + buffered;
14330         fp.consumed = cctx->consumedSrcSize;
14331         fp.produced = cctx->producedCSize;
14332         fp.flushed  = cctx->producedCSize;   /* simplified; some data might still be left within streaming output buffer */
14333         fp.currentJobID = 0;
14334         fp.nbActiveWorkers = 0;
14335         return fp;
14336 }   }
14337
14338 /*! ZSTD_toFlushNow()
14339  *  Only useful for multithreading scenarios currently (nbWorkers >= 1).
14340  */
14341 size_t ZSTD_toFlushNow(ZSTD_CCtx* cctx)
14342 {
14343 #ifdef ZSTD_MULTITHREAD
14344     if (cctx->appliedParams.nbWorkers > 0) {
14345         return ZSTDMT_toFlushNow(cctx->mtctx);
14346     }
14347 #endif
14348     (void)cctx;
14349     return 0;   /* over-simplification; could also check if context is currently running in streaming mode, and in which case, report how many bytes are left to be flushed within output buffer */
14350 }
14351
14352 static void ZSTD_assertEqualCParams(ZSTD_compressionParameters cParams1,
14353                                     ZSTD_compressionParameters cParams2)
14354 {
14355     (void)cParams1;
14356     (void)cParams2;
14357     assert(cParams1.windowLog    == cParams2.windowLog);
14358     assert(cParams1.chainLog     == cParams2.chainLog);
14359     assert(cParams1.hashLog      == cParams2.hashLog);
14360     assert(cParams1.searchLog    == cParams2.searchLog);
14361     assert(cParams1.minMatch     == cParams2.minMatch);
14362     assert(cParams1.targetLength == cParams2.targetLength);
14363     assert(cParams1.strategy     == cParams2.strategy);
14364 }
14365
14366 void ZSTD_reset_compressedBlockState(ZSTD_compressedBlockState_t* bs)
14367 {
14368     int i;
14369     for (i = 0; i < ZSTD_REP_NUM; ++i)
14370         bs->rep[i] = repStartValue[i];
14371     bs->entropy.huf.repeatMode = HUF_repeat_none;
14372     bs->entropy.fse.offcode_repeatMode = FSE_repeat_none;
14373     bs->entropy.fse.matchlength_repeatMode = FSE_repeat_none;
14374     bs->entropy.fse.litlength_repeatMode = FSE_repeat_none;
14375 }
14376
14377 /*! ZSTD_invalidateMatchState()
14378  *  Invalidate all the matches in the match finder tables.
14379  *  Requires nextSrc and base to be set (can be NULL).
14380  */
14381 static void ZSTD_invalidateMatchState(ZSTD_matchState_t* ms)
14382 {
14383     ZSTD_window_clear(&ms->window);
14384
14385     ms->nextToUpdate = ms->window.dictLimit;
14386     ms->loadedDictEnd = 0;
14387     ms->opt.litLengthSum = 0;  /* force reset of btopt stats */
14388     ms->dictMatchState = NULL;
14389 }
14390
14391 /**
14392  * Indicates whether this compression proceeds directly from user-provided
14393  * source buffer to user-provided destination buffer (ZSTDb_not_buffered), or
14394  * whether the context needs to buffer the input/output (ZSTDb_buffered).
14395  */
14396 typedef enum {
14397     ZSTDb_not_buffered,
14398     ZSTDb_buffered
14399 } ZSTD_buffered_policy_e;
14400
14401 /**
14402  * Controls, for this matchState reset, whether the tables need to be cleared /
14403  * prepared for the coming compression (ZSTDcrp_makeClean), or whether the
14404  * tables can be left unclean (ZSTDcrp_leaveDirty), because we know that a
14405  * subsequent operation will overwrite the table space anyways (e.g., copying
14406  * the matchState contents in from a CDict).
14407  */
14408 typedef enum {
14409     ZSTDcrp_makeClean,
14410     ZSTDcrp_leaveDirty
14411 } ZSTD_compResetPolicy_e;
14412
14413 /**
14414  * Controls, for this matchState reset, whether indexing can continue where it
14415  * left off (ZSTDirp_continue), or whether it needs to be restarted from zero
14416  * (ZSTDirp_reset).
14417  */
14418 typedef enum {
14419     ZSTDirp_continue,
14420     ZSTDirp_reset
14421 } ZSTD_indexResetPolicy_e;
14422
14423 typedef enum {
14424     ZSTD_resetTarget_CDict,
14425     ZSTD_resetTarget_CCtx
14426 } ZSTD_resetTarget_e;
14427
14428 static size_t
14429 ZSTD_reset_matchState(ZSTD_matchState_t* ms,
14430                       ZSTD_cwksp* ws,
14431                 const ZSTD_compressionParameters* cParams,
14432                 const ZSTD_compResetPolicy_e crp,
14433                 const ZSTD_indexResetPolicy_e forceResetIndex,
14434                 const ZSTD_resetTarget_e forWho)
14435 {
14436     size_t const chainSize = (cParams->strategy == ZSTD_fast) ? 0 : ((size_t)1 << cParams->chainLog);
14437     size_t const hSize = ((size_t)1) << cParams->hashLog;
14438     U32    const hashLog3 = ((forWho == ZSTD_resetTarget_CCtx) && cParams->minMatch==3) ? MIN(ZSTD_HASHLOG3_MAX, cParams->windowLog) : 0;
14439     size_t const h3Size = hashLog3 ? ((size_t)1) << hashLog3 : 0;
14440
14441     DEBUGLOG(4, "reset indices : %u", forceResetIndex == ZSTDirp_reset);
14442     if (forceResetIndex == ZSTDirp_reset) {
14443         ZSTD_window_init(&ms->window);
14444         ZSTD_cwksp_mark_tables_dirty(ws);
14445     }
14446
14447     ms->hashLog3 = hashLog3;
14448
14449     ZSTD_invalidateMatchState(ms);
14450
14451     assert(!ZSTD_cwksp_reserve_failed(ws)); /* check that allocation hasn't already failed */
14452
14453     ZSTD_cwksp_clear_tables(ws);
14454
14455     DEBUGLOG(5, "reserving table space");
14456     /* table Space */
14457     ms->hashTable = (U32*)ZSTD_cwksp_reserve_table(ws, hSize * sizeof(U32));
14458     ms->chainTable = (U32*)ZSTD_cwksp_reserve_table(ws, chainSize * sizeof(U32));
14459     ms->hashTable3 = (U32*)ZSTD_cwksp_reserve_table(ws, h3Size * sizeof(U32));
14460     RETURN_ERROR_IF(ZSTD_cwksp_reserve_failed(ws), memory_allocation,
14461                     "failed a workspace allocation in ZSTD_reset_matchState");
14462
14463     DEBUGLOG(4, "reset table : %u", crp!=ZSTDcrp_leaveDirty);
14464     if (crp!=ZSTDcrp_leaveDirty) {
14465         /* reset tables only */
14466         ZSTD_cwksp_clean_tables(ws);
14467     }
14468
14469     /* opt parser space */
14470     if ((forWho == ZSTD_resetTarget_CCtx) && (cParams->strategy >= ZSTD_btopt)) {
14471         DEBUGLOG(4, "reserving optimal parser space");
14472         ms->opt.litFreq = (unsigned*)ZSTD_cwksp_reserve_aligned(ws, (1<<Litbits) * sizeof(unsigned));
14473         ms->opt.litLengthFreq = (unsigned*)ZSTD_cwksp_reserve_aligned(ws, (MaxLL+1) * sizeof(unsigned));
14474         ms->opt.matchLengthFreq = (unsigned*)ZSTD_cwksp_reserve_aligned(ws, (MaxML+1) * sizeof(unsigned));
14475         ms->opt.offCodeFreq = (unsigned*)ZSTD_cwksp_reserve_aligned(ws, (MaxOff+1) * sizeof(unsigned));
14476         ms->opt.matchTable = (ZSTD_match_t*)ZSTD_cwksp_reserve_aligned(ws, (ZSTD_OPT_NUM+1) * sizeof(ZSTD_match_t));
14477         ms->opt.priceTable = (ZSTD_optimal_t*)ZSTD_cwksp_reserve_aligned(ws, (ZSTD_OPT_NUM+1) * sizeof(ZSTD_optimal_t));
14478     }
14479
14480     ms->cParams = *cParams;
14481
14482     RETURN_ERROR_IF(ZSTD_cwksp_reserve_failed(ws), memory_allocation,
14483                     "failed a workspace allocation in ZSTD_reset_matchState");
14484
14485     return 0;
14486 }
14487
14488 /* ZSTD_indexTooCloseToMax() :
14489  * minor optimization : prefer memset() rather than reduceIndex()
14490  * which is measurably slow in some circumstances (reported for Visual Studio).
14491  * Works when re-using a context for a lot of smallish inputs :
14492  * if all inputs are smaller than ZSTD_INDEXOVERFLOW_MARGIN,
14493  * memset() will be triggered before reduceIndex().
14494  */
14495 #define ZSTD_INDEXOVERFLOW_MARGIN (16 MB)
14496 static int ZSTD_indexTooCloseToMax(ZSTD_window_t w)
14497 {
14498     return (size_t)(w.nextSrc - w.base) > (ZSTD_CURRENT_MAX - ZSTD_INDEXOVERFLOW_MARGIN);
14499 }
14500
14501 /*! ZSTD_resetCCtx_internal() :
14502     note : `params` are assumed fully validated at this stage */
14503 static size_t ZSTD_resetCCtx_internal(ZSTD_CCtx* zc,
14504                                       ZSTD_CCtx_params params,
14505                                       U64 const pledgedSrcSize,
14506                                       ZSTD_compResetPolicy_e const crp,
14507                                       ZSTD_buffered_policy_e const zbuff)
14508 {
14509     ZSTD_cwksp* const ws = &zc->workspace;
14510     DEBUGLOG(4, "ZSTD_resetCCtx_internal: pledgedSrcSize=%u, wlog=%u",
14511                 (U32)pledgedSrcSize, params.cParams.windowLog);
14512     assert(!ZSTD_isError(ZSTD_checkCParams(params.cParams)));
14513
14514     zc->isFirstBlock = 1;
14515
14516     if (params.ldmParams.enableLdm) {
14517         /* Adjust long distance matching parameters */
14518         ZSTD_ldm_adjustParameters(&params.ldmParams, &params.cParams);
14519         assert(params.ldmParams.hashLog >= params.ldmParams.bucketSizeLog);
14520         assert(params.ldmParams.hashRateLog < 32);
14521         zc->ldmState.hashPower = ZSTD_rollingHash_primePower(params.ldmParams.minMatchLength);
14522     }
14523
14524     {   size_t const windowSize = MAX(1, (size_t)MIN(((U64)1 << params.cParams.windowLog), pledgedSrcSize));
14525         size_t const blockSize = MIN(ZSTD_BLOCKSIZE_MAX, windowSize);
14526         U32    const divider = (params.cParams.minMatch==3) ? 3 : 4;
14527         size_t const maxNbSeq = blockSize / divider;
14528         size_t const tokenSpace = ZSTD_cwksp_alloc_size(WILDCOPY_OVERLENGTH + blockSize)
14529                                 + ZSTD_cwksp_alloc_size(maxNbSeq * sizeof(seqDef))
14530                                 + 3 * ZSTD_cwksp_alloc_size(maxNbSeq * sizeof(BYTE));
14531         size_t const buffOutSize = (zbuff==ZSTDb_buffered) ? ZSTD_compressBound(blockSize)+1 : 0;
14532         size_t const buffInSize = (zbuff==ZSTDb_buffered) ? windowSize + blockSize : 0;
14533         size_t const matchStateSize = ZSTD_sizeof_matchState(&params.cParams, /* forCCtx */ 1);
14534         size_t const maxNbLdmSeq = ZSTD_ldm_getMaxNbSeq(params.ldmParams, blockSize);
14535
14536         ZSTD_indexResetPolicy_e needsIndexReset = zc->initialized ? ZSTDirp_continue : ZSTDirp_reset;
14537
14538         if (ZSTD_indexTooCloseToMax(zc->blockState.matchState.window)) {
14539             needsIndexReset = ZSTDirp_reset;
14540         }
14541
14542         if (!zc->staticSize) ZSTD_cwksp_bump_oversized_duration(ws, 0);
14543
14544         /* Check if workspace is large enough, alloc a new one if needed */
14545         {   size_t const cctxSpace = zc->staticSize ? ZSTD_cwksp_alloc_size(sizeof(ZSTD_CCtx)) : 0;
14546             size_t const entropySpace = ZSTD_cwksp_alloc_size(HUF_WORKSPACE_SIZE);
14547             size_t const blockStateSpace = 2 * ZSTD_cwksp_alloc_size(sizeof(ZSTD_compressedBlockState_t));
14548             size_t const bufferSpace = ZSTD_cwksp_alloc_size(buffInSize) + ZSTD_cwksp_alloc_size(buffOutSize);
14549             size_t const ldmSpace = ZSTD_ldm_getTableSize(params.ldmParams);
14550             size_t const ldmSeqSpace = ZSTD_cwksp_alloc_size(maxNbLdmSeq * sizeof(rawSeq));
14551
14552             size_t const neededSpace =
14553                 cctxSpace +
14554                 entropySpace +
14555                 blockStateSpace +
14556                 ldmSpace +
14557                 ldmSeqSpace +
14558                 matchStateSize +
14559                 tokenSpace +
14560                 bufferSpace;
14561
14562             int const workspaceTooSmall = ZSTD_cwksp_sizeof(ws) < neededSpace;
14563             int const workspaceWasteful = ZSTD_cwksp_check_wasteful(ws, neededSpace);
14564
14565             DEBUGLOG(4, "Need %zuKB workspace, including %zuKB for match state, and %zuKB for buffers",
14566                         neededSpace>>10, matchStateSize>>10, bufferSpace>>10);
14567             DEBUGLOG(4, "windowSize: %zu - blockSize: %zu", windowSize, blockSize);
14568
14569             if (workspaceTooSmall || workspaceWasteful) {
14570                 DEBUGLOG(4, "Resize workspaceSize from %zuKB to %zuKB",
14571                             ZSTD_cwksp_sizeof(ws) >> 10,
14572                             neededSpace >> 10);
14573
14574                 RETURN_ERROR_IF(zc->staticSize, memory_allocation, "static cctx : no resize");
14575
14576                 needsIndexReset = ZSTDirp_reset;
14577
14578                 ZSTD_cwksp_free(ws, zc->customMem);
14579                 FORWARD_IF_ERROR(ZSTD_cwksp_create(ws, neededSpace, zc->customMem), "");
14580
14581                 DEBUGLOG(5, "reserving object space");
14582                 /* Statically sized space.
14583                  * entropyWorkspace never moves,
14584                  * though prev/next block swap places */
14585                 assert(ZSTD_cwksp_check_available(ws, 2 * sizeof(ZSTD_compressedBlockState_t)));
14586                 zc->blockState.prevCBlock = (ZSTD_compressedBlockState_t*) ZSTD_cwksp_reserve_object(ws, sizeof(ZSTD_compressedBlockState_t));
14587                 RETURN_ERROR_IF(zc->blockState.prevCBlock == NULL, memory_allocation, "couldn't allocate prevCBlock");
14588                 zc->blockState.nextCBlock = (ZSTD_compressedBlockState_t*) ZSTD_cwksp_reserve_object(ws, sizeof(ZSTD_compressedBlockState_t));
14589                 RETURN_ERROR_IF(zc->blockState.nextCBlock == NULL, memory_allocation, "couldn't allocate nextCBlock");
14590                 zc->entropyWorkspace = (U32*) ZSTD_cwksp_reserve_object(ws, HUF_WORKSPACE_SIZE);
14591                 RETURN_ERROR_IF(zc->blockState.nextCBlock == NULL, memory_allocation, "couldn't allocate entropyWorkspace");
14592         }   }
14593
14594         ZSTD_cwksp_clear(ws);
14595
14596         /* init params */
14597         zc->appliedParams = params;
14598         zc->blockState.matchState.cParams = params.cParams;
14599         zc->pledgedSrcSizePlusOne = pledgedSrcSize+1;
14600         zc->consumedSrcSize = 0;
14601         zc->producedCSize = 0;
14602         if (pledgedSrcSize == ZSTD_CONTENTSIZE_UNKNOWN)
14603             zc->appliedParams.fParams.contentSizeFlag = 0;
14604         DEBUGLOG(4, "pledged content size : %u ; flag : %u",
14605             (unsigned)pledgedSrcSize, zc->appliedParams.fParams.contentSizeFlag);
14606         zc->blockSize = blockSize;
14607
14608         XXH64_reset(&zc->xxhState, 0);
14609         zc->stage = ZSTDcs_init;
14610         zc->dictID = 0;
14611
14612         ZSTD_reset_compressedBlockState(zc->blockState.prevCBlock);
14613
14614         /* ZSTD_wildcopy() is used to copy into the literals buffer,
14615          * so we have to oversize the buffer by WILDCOPY_OVERLENGTH bytes.
14616          */
14617         zc->seqStore.litStart = ZSTD_cwksp_reserve_buffer(ws, blockSize + WILDCOPY_OVERLENGTH);
14618         zc->seqStore.maxNbLit = blockSize;
14619
14620         /* buffers */
14621         zc->inBuffSize = buffInSize;
14622         zc->inBuff = (char*)ZSTD_cwksp_reserve_buffer(ws, buffInSize);
14623         zc->outBuffSize = buffOutSize;
14624         zc->outBuff = (char*)ZSTD_cwksp_reserve_buffer(ws, buffOutSize);
14625
14626         /* ldm bucketOffsets table */
14627         if (params.ldmParams.enableLdm) {
14628             /* TODO: avoid memset? */
14629             size_t const ldmBucketSize =
14630                   ((size_t)1) << (params.ldmParams.hashLog -
14631                                   params.ldmParams.bucketSizeLog);
14632             zc->ldmState.bucketOffsets = ZSTD_cwksp_reserve_buffer(ws, ldmBucketSize);
14633             memset(zc->ldmState.bucketOffsets, 0, ldmBucketSize);
14634         }
14635
14636         /* sequences storage */
14637         ZSTD_referenceExternalSequences(zc, NULL, 0);
14638         zc->seqStore.maxNbSeq = maxNbSeq;
14639         zc->seqStore.llCode = ZSTD_cwksp_reserve_buffer(ws, maxNbSeq * sizeof(BYTE));
14640         zc->seqStore.mlCode = ZSTD_cwksp_reserve_buffer(ws, maxNbSeq * sizeof(BYTE));
14641         zc->seqStore.ofCode = ZSTD_cwksp_reserve_buffer(ws, maxNbSeq * sizeof(BYTE));
14642         zc->seqStore.sequencesStart = (seqDef*)ZSTD_cwksp_reserve_aligned(ws, maxNbSeq * sizeof(seqDef));
14643
14644         FORWARD_IF_ERROR(ZSTD_reset_matchState(
14645             &zc->blockState.matchState,
14646             ws,
14647             &params.cParams,
14648             crp,
14649             needsIndexReset,
14650             ZSTD_resetTarget_CCtx), "");
14651
14652         /* ldm hash table */
14653         if (params.ldmParams.enableLdm) {
14654             /* TODO: avoid memset? */
14655             size_t const ldmHSize = ((size_t)1) << params.ldmParams.hashLog;
14656             zc->ldmState.hashTable = (ldmEntry_t*)ZSTD_cwksp_reserve_aligned(ws, ldmHSize * sizeof(ldmEntry_t));
14657             memset(zc->ldmState.hashTable, 0, ldmHSize * sizeof(ldmEntry_t));
14658             zc->ldmSequences = (rawSeq*)ZSTD_cwksp_reserve_aligned(ws, maxNbLdmSeq * sizeof(rawSeq));
14659             zc->maxNbLdmSequences = maxNbLdmSeq;
14660
14661             ZSTD_window_init(&zc->ldmState.window);
14662             ZSTD_window_clear(&zc->ldmState.window);
14663             zc->ldmState.loadedDictEnd = 0;
14664         }
14665
14666         DEBUGLOG(3, "wksp: finished allocating, %zd bytes remain available", ZSTD_cwksp_available_space(ws));
14667         zc->initialized = 1;
14668
14669         return 0;
14670     }
14671 }
14672
14673 /* ZSTD_invalidateRepCodes() :
14674  * ensures next compression will not use repcodes from previous block.
14675  * Note : only works with regular variant;
14676  *        do not use with extDict variant ! */
14677 void ZSTD_invalidateRepCodes(ZSTD_CCtx* cctx) {
14678     int i;
14679     for (i=0; i<ZSTD_REP_NUM; i++) cctx->blockState.prevCBlock->rep[i] = 0;
14680     assert(!ZSTD_window_hasExtDict(cctx->blockState.matchState.window));
14681 }
14682
14683 /* These are the approximate sizes for each strategy past which copying the
14684  * dictionary tables into the working context is faster than using them
14685  * in-place.
14686  */
14687 static const size_t attachDictSizeCutoffs[ZSTD_STRATEGY_MAX+1] = {
14688     8 KB,  /* unused */
14689     8 KB,  /* ZSTD_fast */
14690     16 KB, /* ZSTD_dfast */
14691     32 KB, /* ZSTD_greedy */
14692     32 KB, /* ZSTD_lazy */
14693     32 KB, /* ZSTD_lazy2 */
14694     32 KB, /* ZSTD_btlazy2 */
14695     32 KB, /* ZSTD_btopt */
14696     8 KB,  /* ZSTD_btultra */
14697     8 KB   /* ZSTD_btultra2 */
14698 };
14699
14700 static int ZSTD_shouldAttachDict(const ZSTD_CDict* cdict,
14701                                  const ZSTD_CCtx_params* params,
14702                                  U64 pledgedSrcSize)
14703 {
14704     size_t cutoff = attachDictSizeCutoffs[cdict->matchState.cParams.strategy];
14705     return ( pledgedSrcSize <= cutoff
14706           || pledgedSrcSize == ZSTD_CONTENTSIZE_UNKNOWN
14707           || params->attachDictPref == ZSTD_dictForceAttach )
14708         && params->attachDictPref != ZSTD_dictForceCopy
14709         && !params->forceWindow; /* dictMatchState isn't correctly
14710                                  * handled in _enforceMaxDist */
14711 }
14712
14713 static size_t
14714 ZSTD_resetCCtx_byAttachingCDict(ZSTD_CCtx* cctx,
14715                         const ZSTD_CDict* cdict,
14716                         ZSTD_CCtx_params params,
14717                         U64 pledgedSrcSize,
14718                         ZSTD_buffered_policy_e zbuff)
14719 {
14720     {   const ZSTD_compressionParameters* const cdict_cParams = &cdict->matchState.cParams;
14721         unsigned const windowLog = params.cParams.windowLog;
14722         assert(windowLog != 0);
14723         /* Resize working context table params for input only, since the dict
14724          * has its own tables. */
14725         /* pledgeSrcSize == 0 means 0! */
14726         params.cParams = ZSTD_adjustCParams_internal(*cdict_cParams, pledgedSrcSize, 0);
14727         params.cParams.windowLog = windowLog;
14728         FORWARD_IF_ERROR(ZSTD_resetCCtx_internal(cctx, params, pledgedSrcSize,
14729                                                  ZSTDcrp_makeClean, zbuff), "");
14730         assert(cctx->appliedParams.cParams.strategy == cdict_cParams->strategy);
14731     }
14732
14733     {   const U32 cdictEnd = (U32)( cdict->matchState.window.nextSrc
14734                                   - cdict->matchState.window.base);
14735         const U32 cdictLen = cdictEnd - cdict->matchState.window.dictLimit;
14736         if (cdictLen == 0) {
14737             /* don't even attach dictionaries with no contents */
14738             DEBUGLOG(4, "skipping attaching empty dictionary");
14739         } else {
14740             DEBUGLOG(4, "attaching dictionary into context");
14741             cctx->blockState.matchState.dictMatchState = &cdict->matchState;
14742
14743             /* prep working match state so dict matches never have negative indices
14744              * when they are translated to the working context's index space. */
14745             if (cctx->blockState.matchState.window.dictLimit < cdictEnd) {
14746                 cctx->blockState.matchState.window.nextSrc =
14747                     cctx->blockState.matchState.window.base + cdictEnd;
14748                 ZSTD_window_clear(&cctx->blockState.matchState.window);
14749             }
14750             /* loadedDictEnd is expressed within the referential of the active context */
14751             cctx->blockState.matchState.loadedDictEnd = cctx->blockState.matchState.window.dictLimit;
14752     }   }
14753
14754     cctx->dictID = cdict->dictID;
14755
14756     /* copy block state */
14757     memcpy(cctx->blockState.prevCBlock, &cdict->cBlockState, sizeof(cdict->cBlockState));
14758
14759     return 0;
14760 }
14761
14762 static size_t ZSTD_resetCCtx_byCopyingCDict(ZSTD_CCtx* cctx,
14763                             const ZSTD_CDict* cdict,
14764                             ZSTD_CCtx_params params,
14765                             U64 pledgedSrcSize,
14766                             ZSTD_buffered_policy_e zbuff)
14767 {
14768     const ZSTD_compressionParameters *cdict_cParams = &cdict->matchState.cParams;
14769
14770     DEBUGLOG(4, "copying dictionary into context");
14771
14772     {   unsigned const windowLog = params.cParams.windowLog;
14773         assert(windowLog != 0);
14774         /* Copy only compression parameters related to tables. */
14775         params.cParams = *cdict_cParams;
14776         params.cParams.windowLog = windowLog;
14777         FORWARD_IF_ERROR(ZSTD_resetCCtx_internal(cctx, params, pledgedSrcSize,
14778                                                  ZSTDcrp_leaveDirty, zbuff), "");
14779         assert(cctx->appliedParams.cParams.strategy == cdict_cParams->strategy);
14780         assert(cctx->appliedParams.cParams.hashLog == cdict_cParams->hashLog);
14781         assert(cctx->appliedParams.cParams.chainLog == cdict_cParams->chainLog);
14782     }
14783
14784     ZSTD_cwksp_mark_tables_dirty(&cctx->workspace);
14785
14786     /* copy tables */
14787     {   size_t const chainSize = (cdict_cParams->strategy == ZSTD_fast) ? 0 : ((size_t)1 << cdict_cParams->chainLog);
14788         size_t const hSize =  (size_t)1 << cdict_cParams->hashLog;
14789
14790         memcpy(cctx->blockState.matchState.hashTable,
14791                cdict->matchState.hashTable,
14792                hSize * sizeof(U32));
14793         memcpy(cctx->blockState.matchState.chainTable,
14794                cdict->matchState.chainTable,
14795                chainSize * sizeof(U32));
14796     }
14797
14798     /* Zero the hashTable3, since the cdict never fills it */
14799     {   int const h3log = cctx->blockState.matchState.hashLog3;
14800         size_t const h3Size = h3log ? ((size_t)1 << h3log) : 0;
14801         assert(cdict->matchState.hashLog3 == 0);
14802         memset(cctx->blockState.matchState.hashTable3, 0, h3Size * sizeof(U32));
14803     }
14804
14805     ZSTD_cwksp_mark_tables_clean(&cctx->workspace);
14806
14807     /* copy dictionary offsets */
14808     {   ZSTD_matchState_t const* srcMatchState = &cdict->matchState;
14809         ZSTD_matchState_t* dstMatchState = &cctx->blockState.matchState;
14810         dstMatchState->window       = srcMatchState->window;
14811         dstMatchState->nextToUpdate = srcMatchState->nextToUpdate;
14812         dstMatchState->loadedDictEnd= srcMatchState->loadedDictEnd;
14813     }
14814
14815     cctx->dictID = cdict->dictID;
14816
14817     /* copy block state */
14818     memcpy(cctx->blockState.prevCBlock, &cdict->cBlockState, sizeof(cdict->cBlockState));
14819
14820     return 0;
14821 }
14822
14823 /* We have a choice between copying the dictionary context into the working
14824  * context, or referencing the dictionary context from the working context
14825  * in-place. We decide here which strategy to use. */
14826 static size_t ZSTD_resetCCtx_usingCDict(ZSTD_CCtx* cctx,
14827                             const ZSTD_CDict* cdict,
14828                             const ZSTD_CCtx_params* params,
14829                             U64 pledgedSrcSize,
14830                             ZSTD_buffered_policy_e zbuff)
14831 {
14832
14833     DEBUGLOG(4, "ZSTD_resetCCtx_usingCDict (pledgedSrcSize=%u)",
14834                 (unsigned)pledgedSrcSize);
14835
14836     if (ZSTD_shouldAttachDict(cdict, params, pledgedSrcSize)) {
14837         return ZSTD_resetCCtx_byAttachingCDict(
14838             cctx, cdict, *params, pledgedSrcSize, zbuff);
14839     } else {
14840         return ZSTD_resetCCtx_byCopyingCDict(
14841             cctx, cdict, *params, pledgedSrcSize, zbuff);
14842     }
14843 }
14844
14845 /*! ZSTD_copyCCtx_internal() :
14846  *  Duplicate an existing context `srcCCtx` into another one `dstCCtx`.
14847  *  Only works during stage ZSTDcs_init (i.e. after creation, but before first call to ZSTD_compressContinue()).
14848  *  The "context", in this case, refers to the hash and chain tables,
14849  *  entropy tables, and dictionary references.
14850  * `windowLog` value is enforced if != 0, otherwise value is copied from srcCCtx.
14851  * @return : 0, or an error code */
14852 static size_t ZSTD_copyCCtx_internal(ZSTD_CCtx* dstCCtx,
14853                             const ZSTD_CCtx* srcCCtx,
14854                             ZSTD_frameParameters fParams,
14855                             U64 pledgedSrcSize,
14856                             ZSTD_buffered_policy_e zbuff)
14857 {
14858     DEBUGLOG(5, "ZSTD_copyCCtx_internal");
14859     RETURN_ERROR_IF(srcCCtx->stage!=ZSTDcs_init, stage_wrong,
14860                     "Can't copy a ctx that's not in init stage.");
14861
14862     memcpy(&dstCCtx->customMem, &srcCCtx->customMem, sizeof(ZSTD_customMem));
14863     {   ZSTD_CCtx_params params = dstCCtx->requestedParams;
14864         /* Copy only compression parameters related to tables. */
14865         params.cParams = srcCCtx->appliedParams.cParams;
14866         params.fParams = fParams;
14867         ZSTD_resetCCtx_internal(dstCCtx, params, pledgedSrcSize,
14868                                 ZSTDcrp_leaveDirty, zbuff);
14869         assert(dstCCtx->appliedParams.cParams.windowLog == srcCCtx->appliedParams.cParams.windowLog);
14870         assert(dstCCtx->appliedParams.cParams.strategy == srcCCtx->appliedParams.cParams.strategy);
14871         assert(dstCCtx->appliedParams.cParams.hashLog == srcCCtx->appliedParams.cParams.hashLog);
14872         assert(dstCCtx->appliedParams.cParams.chainLog == srcCCtx->appliedParams.cParams.chainLog);
14873         assert(dstCCtx->blockState.matchState.hashLog3 == srcCCtx->blockState.matchState.hashLog3);
14874     }
14875
14876     ZSTD_cwksp_mark_tables_dirty(&dstCCtx->workspace);
14877
14878     /* copy tables */
14879     {   size_t const chainSize = (srcCCtx->appliedParams.cParams.strategy == ZSTD_fast) ? 0 : ((size_t)1 << srcCCtx->appliedParams.cParams.chainLog);
14880         size_t const hSize =  (size_t)1 << srcCCtx->appliedParams.cParams.hashLog;
14881         int const h3log = srcCCtx->blockState.matchState.hashLog3;
14882         size_t const h3Size = h3log ? ((size_t)1 << h3log) : 0;
14883
14884         memcpy(dstCCtx->blockState.matchState.hashTable,
14885                srcCCtx->blockState.matchState.hashTable,
14886                hSize * sizeof(U32));
14887         memcpy(dstCCtx->blockState.matchState.chainTable,
14888                srcCCtx->blockState.matchState.chainTable,
14889                chainSize * sizeof(U32));
14890         memcpy(dstCCtx->blockState.matchState.hashTable3,
14891                srcCCtx->blockState.matchState.hashTable3,
14892                h3Size * sizeof(U32));
14893     }
14894
14895     ZSTD_cwksp_mark_tables_clean(&dstCCtx->workspace);
14896
14897     /* copy dictionary offsets */
14898     {
14899         const ZSTD_matchState_t* srcMatchState = &srcCCtx->blockState.matchState;
14900         ZSTD_matchState_t* dstMatchState = &dstCCtx->blockState.matchState;
14901         dstMatchState->window       = srcMatchState->window;
14902         dstMatchState->nextToUpdate = srcMatchState->nextToUpdate;
14903         dstMatchState->loadedDictEnd= srcMatchState->loadedDictEnd;
14904     }
14905     dstCCtx->dictID = srcCCtx->dictID;
14906
14907     /* copy block state */
14908     memcpy(dstCCtx->blockState.prevCBlock, srcCCtx->blockState.prevCBlock, sizeof(*srcCCtx->blockState.prevCBlock));
14909
14910     return 0;
14911 }
14912
14913 /*! ZSTD_copyCCtx() :
14914  *  Duplicate an existing context `srcCCtx` into another one `dstCCtx`.
14915  *  Only works during stage ZSTDcs_init (i.e. after creation, but before first call to ZSTD_compressContinue()).
14916  *  pledgedSrcSize==0 means "unknown".
14917 *   @return : 0, or an error code */
14918 size_t ZSTD_copyCCtx(ZSTD_CCtx* dstCCtx, const ZSTD_CCtx* srcCCtx, unsigned long long pledgedSrcSize)
14919 {
14920     ZSTD_frameParameters fParams = { 1 /*content*/, 0 /*checksum*/, 0 /*noDictID*/ };
14921     ZSTD_buffered_policy_e const zbuff = (ZSTD_buffered_policy_e)(srcCCtx->inBuffSize>0);
14922     ZSTD_STATIC_ASSERT((U32)ZSTDb_buffered==1);
14923     if (pledgedSrcSize==0) pledgedSrcSize = ZSTD_CONTENTSIZE_UNKNOWN;
14924     fParams.contentSizeFlag = (pledgedSrcSize != ZSTD_CONTENTSIZE_UNKNOWN);
14925
14926     return ZSTD_copyCCtx_internal(dstCCtx, srcCCtx,
14927                                 fParams, pledgedSrcSize,
14928                                 zbuff);
14929 }
14930
14931
14932 #define ZSTD_ROWSIZE 16
14933 /*! ZSTD_reduceTable() :
14934  *  reduce table indexes by `reducerValue`, or squash to zero.
14935  *  PreserveMark preserves "unsorted mark" for btlazy2 strategy.
14936  *  It must be set to a clear 0/1 value, to remove branch during inlining.
14937  *  Presume table size is a multiple of ZSTD_ROWSIZE
14938  *  to help auto-vectorization */
14939 FORCE_INLINE_TEMPLATE void
14940 ZSTD_reduceTable_internal (U32* const table, U32 const size, U32 const reducerValue, int const preserveMark)
14941 {
14942     int const nbRows = (int)size / ZSTD_ROWSIZE;
14943     int cellNb = 0;
14944     int rowNb;
14945     assert((size & (ZSTD_ROWSIZE-1)) == 0);  /* multiple of ZSTD_ROWSIZE */
14946     assert(size < (1U<<31));   /* can be casted to int */
14947
14948 #if defined (MEMORY_SANITIZER) && !defined (ZSTD_MSAN_DONT_POISON_WORKSPACE)
14949     /* To validate that the table re-use logic is sound, and that we don't
14950      * access table space that we haven't cleaned, we re-"poison" the table
14951      * space every time we mark it dirty.
14952      *
14953      * This function however is intended to operate on those dirty tables and
14954      * re-clean them. So when this function is used correctly, we can unpoison
14955      * the memory it operated on. This introduces a blind spot though, since
14956      * if we now try to operate on __actually__ poisoned memory, we will not
14957      * detect that. */
14958     __msan_unpoison(table, size * sizeof(U32));
14959 #endif
14960
14961     for (rowNb=0 ; rowNb < nbRows ; rowNb++) {
14962         int column;
14963         for (column=0; column<ZSTD_ROWSIZE; column++) {
14964             if (preserveMark) {
14965                 U32 const adder = (table[cellNb] == ZSTD_DUBT_UNSORTED_MARK) ? reducerValue : 0;
14966                 table[cellNb] += adder;
14967             }
14968             if (table[cellNb] < reducerValue) table[cellNb] = 0;
14969             else table[cellNb] -= reducerValue;
14970             cellNb++;
14971     }   }
14972 }
14973
14974 static void ZSTD_reduceTable(U32* const table, U32 const size, U32 const reducerValue)
14975 {
14976     ZSTD_reduceTable_internal(table, size, reducerValue, 0);
14977 }
14978
14979 static void ZSTD_reduceTable_btlazy2(U32* const table, U32 const size, U32 const reducerValue)
14980 {
14981     ZSTD_reduceTable_internal(table, size, reducerValue, 1);
14982 }
14983
14984 /*! ZSTD_reduceIndex() :
14985 *   rescale all indexes to avoid future overflow (indexes are U32) */
14986 static void ZSTD_reduceIndex (ZSTD_matchState_t* ms, ZSTD_CCtx_params const* params, const U32 reducerValue)
14987 {
14988     {   U32 const hSize = (U32)1 << params->cParams.hashLog;
14989         ZSTD_reduceTable(ms->hashTable, hSize, reducerValue);
14990     }
14991
14992     if (params->cParams.strategy != ZSTD_fast) {
14993         U32 const chainSize = (U32)1 << params->cParams.chainLog;
14994         if (params->cParams.strategy == ZSTD_btlazy2)
14995             ZSTD_reduceTable_btlazy2(ms->chainTable, chainSize, reducerValue);
14996         else
14997             ZSTD_reduceTable(ms->chainTable, chainSize, reducerValue);
14998     }
14999
15000     if (ms->hashLog3) {
15001         U32 const h3Size = (U32)1 << ms->hashLog3;
15002         ZSTD_reduceTable(ms->hashTable3, h3Size, reducerValue);
15003     }
15004 }
15005
15006
15007 /*-*******************************************************
15008 *  Block entropic compression
15009 *********************************************************/
15010
15011 /* See doc/zstd_compression_format.md for detailed format description */
15012
15013 void ZSTD_seqToCodes(const seqStore_t* seqStorePtr)
15014 {
15015     const seqDef* const sequences = seqStorePtr->sequencesStart;
15016     BYTE* const llCodeTable = seqStorePtr->llCode;
15017     BYTE* const ofCodeTable = seqStorePtr->ofCode;
15018     BYTE* const mlCodeTable = seqStorePtr->mlCode;
15019     U32 const nbSeq = (U32)(seqStorePtr->sequences - seqStorePtr->sequencesStart);
15020     U32 u;
15021     assert(nbSeq <= seqStorePtr->maxNbSeq);
15022     for (u=0; u<nbSeq; u++) {
15023         U32 const llv = sequences[u].litLength;
15024         U32 const mlv = sequences[u].matchLength;
15025         llCodeTable[u] = (BYTE)ZSTD_LLcode(llv);
15026         ofCodeTable[u] = (BYTE)ZSTD_highbit32(sequences[u].offset);
15027         mlCodeTable[u] = (BYTE)ZSTD_MLcode(mlv);
15028     }
15029     if (seqStorePtr->longLengthID==1)
15030         llCodeTable[seqStorePtr->longLengthPos] = MaxLL;
15031     if (seqStorePtr->longLengthID==2)
15032         mlCodeTable[seqStorePtr->longLengthPos] = MaxML;
15033 }
15034
15035 /* ZSTD_useTargetCBlockSize():
15036  * Returns if target compressed block size param is being used.
15037  * If used, compression will do best effort to make a compressed block size to be around targetCBlockSize.
15038  * Returns 1 if true, 0 otherwise. */
15039 static int ZSTD_useTargetCBlockSize(const ZSTD_CCtx_params* cctxParams)
15040 {
15041     DEBUGLOG(5, "ZSTD_useTargetCBlockSize (targetCBlockSize=%zu)", cctxParams->targetCBlockSize);
15042     return (cctxParams->targetCBlockSize != 0);
15043 }
15044
15045 /* ZSTD_compressSequences_internal():
15046  * actually compresses both literals and sequences */
15047 MEM_STATIC size_t
15048 ZSTD_compressSequences_internal(seqStore_t* seqStorePtr,
15049                           const ZSTD_entropyCTables_t* prevEntropy,
15050                                 ZSTD_entropyCTables_t* nextEntropy,
15051                           const ZSTD_CCtx_params* cctxParams,
15052                                 void* dst, size_t dstCapacity,
15053                                 void* entropyWorkspace, size_t entropyWkspSize,
15054                           const int bmi2)
15055 {
15056     const int longOffsets = cctxParams->cParams.windowLog > STREAM_ACCUMULATOR_MIN;
15057     ZSTD_strategy const strategy = cctxParams->cParams.strategy;
15058     unsigned count[MaxSeq+1];
15059     FSE_CTable* CTable_LitLength = nextEntropy->fse.litlengthCTable;
15060     FSE_CTable* CTable_OffsetBits = nextEntropy->fse.offcodeCTable;
15061     FSE_CTable* CTable_MatchLength = nextEntropy->fse.matchlengthCTable;
15062     U32 LLtype, Offtype, MLtype;   /* compressed, raw or rle */
15063     const seqDef* const sequences = seqStorePtr->sequencesStart;
15064     const BYTE* const ofCodeTable = seqStorePtr->ofCode;
15065     const BYTE* const llCodeTable = seqStorePtr->llCode;
15066     const BYTE* const mlCodeTable = seqStorePtr->mlCode;
15067     BYTE* const ostart = (BYTE*)dst;
15068     BYTE* const oend = ostart + dstCapacity;
15069     BYTE* op = ostart;
15070     size_t const nbSeq = (size_t)(seqStorePtr->sequences - seqStorePtr->sequencesStart);
15071     BYTE* seqHead;
15072     BYTE* lastNCount = NULL;
15073
15074     DEBUGLOG(5, "ZSTD_compressSequences_internal (nbSeq=%zu)", nbSeq);
15075     ZSTD_STATIC_ASSERT(HUF_WORKSPACE_SIZE >= (1<<MAX(MLFSELog,LLFSELog)));
15076
15077     /* Compress literals */
15078     {   const BYTE* const literals = seqStorePtr->litStart;
15079         size_t const litSize = (size_t)(seqStorePtr->lit - literals);
15080         size_t const cSize = ZSTD_compressLiterals(
15081                                     &prevEntropy->huf, &nextEntropy->huf,
15082                                     cctxParams->cParams.strategy,
15083                                     ZSTD_disableLiteralsCompression(cctxParams),
15084                                     op, dstCapacity,
15085                                     literals, litSize,
15086                                     entropyWorkspace, entropyWkspSize,
15087                                     bmi2);
15088         FORWARD_IF_ERROR(cSize, "ZSTD_compressLiterals failed");
15089         assert(cSize <= dstCapacity);
15090         op += cSize;
15091     }
15092
15093     /* Sequences Header */
15094     RETURN_ERROR_IF((oend-op) < 3 /*max nbSeq Size*/ + 1 /*seqHead*/,
15095                     dstSize_tooSmall, "Can't fit seq hdr in output buf!");
15096     if (nbSeq < 128) {
15097         *op++ = (BYTE)nbSeq;
15098     } else if (nbSeq < LONGNBSEQ) {
15099         op[0] = (BYTE)((nbSeq>>8) + 0x80);
15100         op[1] = (BYTE)nbSeq;
15101         op+=2;
15102     } else {
15103         op[0]=0xFF;
15104         MEM_writeLE16(op+1, (U16)(nbSeq - LONGNBSEQ));
15105         op+=3;
15106     }
15107     assert(op <= oend);
15108     if (nbSeq==0) {
15109         /* Copy the old tables over as if we repeated them */
15110         memcpy(&nextEntropy->fse, &prevEntropy->fse, sizeof(prevEntropy->fse));
15111         return (size_t)(op - ostart);
15112     }
15113
15114     /* seqHead : flags for FSE encoding type */
15115     seqHead = op++;
15116     assert(op <= oend);
15117
15118     /* convert length/distances into codes */
15119     ZSTD_seqToCodes(seqStorePtr);
15120     /* build CTable for Literal Lengths */
15121     {   unsigned max = MaxLL;
15122         size_t const mostFrequent = HIST_countFast_wksp(count, &max, llCodeTable, nbSeq, entropyWorkspace, entropyWkspSize);   /* can't fail */
15123         DEBUGLOG(5, "Building LL table");
15124         nextEntropy->fse.litlength_repeatMode = prevEntropy->fse.litlength_repeatMode;
15125         LLtype = ZSTD_selectEncodingType(&nextEntropy->fse.litlength_repeatMode,
15126                                         count, max, mostFrequent, nbSeq,
15127                                         LLFSELog, prevEntropy->fse.litlengthCTable,
15128                                         LL_defaultNorm, LL_defaultNormLog,
15129                                         ZSTD_defaultAllowed, strategy);
15130         assert(set_basic < set_compressed && set_rle < set_compressed);
15131         assert(!(LLtype < set_compressed && nextEntropy->fse.litlength_repeatMode != FSE_repeat_none)); /* We don't copy tables */
15132         {   size_t const countSize = ZSTD_buildCTable(
15133                 op, (size_t)(oend - op),
15134                 CTable_LitLength, LLFSELog, (symbolEncodingType_e)LLtype,
15135                 count, max, llCodeTable, nbSeq,
15136                 LL_defaultNorm, LL_defaultNormLog, MaxLL,
15137                 prevEntropy->fse.litlengthCTable,
15138                 sizeof(prevEntropy->fse.litlengthCTable),
15139                 entropyWorkspace, entropyWkspSize);
15140             FORWARD_IF_ERROR(countSize, "ZSTD_buildCTable for LitLens failed");
15141             if (LLtype == set_compressed)
15142                 lastNCount = op;
15143             op += countSize;
15144             assert(op <= oend);
15145     }   }
15146     /* build CTable for Offsets */
15147     {   unsigned max = MaxOff;
15148         size_t const mostFrequent = HIST_countFast_wksp(
15149             count, &max, ofCodeTable, nbSeq, entropyWorkspace, entropyWkspSize);  /* can't fail */
15150         /* We can only use the basic table if max <= DefaultMaxOff, otherwise the offsets are too large */
15151         ZSTD_defaultPolicy_e const defaultPolicy = (max <= DefaultMaxOff) ? ZSTD_defaultAllowed : ZSTD_defaultDisallowed;
15152         DEBUGLOG(5, "Building OF table");
15153         nextEntropy->fse.offcode_repeatMode = prevEntropy->fse.offcode_repeatMode;
15154         Offtype = ZSTD_selectEncodingType(&nextEntropy->fse.offcode_repeatMode,
15155                                         count, max, mostFrequent, nbSeq,
15156                                         OffFSELog, prevEntropy->fse.offcodeCTable,
15157                                         OF_defaultNorm, OF_defaultNormLog,
15158                                         defaultPolicy, strategy);
15159         assert(!(Offtype < set_compressed && nextEntropy->fse.offcode_repeatMode != FSE_repeat_none)); /* We don't copy tables */
15160         {   size_t const countSize = ZSTD_buildCTable(
15161                 op, (size_t)(oend - op),
15162                 CTable_OffsetBits, OffFSELog, (symbolEncodingType_e)Offtype,
15163                 count, max, ofCodeTable, nbSeq,
15164                 OF_defaultNorm, OF_defaultNormLog, DefaultMaxOff,
15165                 prevEntropy->fse.offcodeCTable,
15166                 sizeof(prevEntropy->fse.offcodeCTable),
15167                 entropyWorkspace, entropyWkspSize);
15168             FORWARD_IF_ERROR(countSize, "ZSTD_buildCTable for Offsets failed");
15169             if (Offtype == set_compressed)
15170                 lastNCount = op;
15171             op += countSize;
15172             assert(op <= oend);
15173     }   }
15174     /* build CTable for MatchLengths */
15175     {   unsigned max = MaxML;
15176         size_t const mostFrequent = HIST_countFast_wksp(
15177             count, &max, mlCodeTable, nbSeq, entropyWorkspace, entropyWkspSize);   /* can't fail */
15178         DEBUGLOG(5, "Building ML table (remaining space : %i)", (int)(oend-op));
15179         nextEntropy->fse.matchlength_repeatMode = prevEntropy->fse.matchlength_repeatMode;
15180         MLtype = ZSTD_selectEncodingType(&nextEntropy->fse.matchlength_repeatMode,
15181                                         count, max, mostFrequent, nbSeq,
15182                                         MLFSELog, prevEntropy->fse.matchlengthCTable,
15183                                         ML_defaultNorm, ML_defaultNormLog,
15184                                         ZSTD_defaultAllowed, strategy);
15185         assert(!(MLtype < set_compressed && nextEntropy->fse.matchlength_repeatMode != FSE_repeat_none)); /* We don't copy tables */
15186         {   size_t const countSize = ZSTD_buildCTable(
15187                 op, (size_t)(oend - op),
15188                 CTable_MatchLength, MLFSELog, (symbolEncodingType_e)MLtype,
15189                 count, max, mlCodeTable, nbSeq,
15190                 ML_defaultNorm, ML_defaultNormLog, MaxML,
15191                 prevEntropy->fse.matchlengthCTable,
15192                 sizeof(prevEntropy->fse.matchlengthCTable),
15193                 entropyWorkspace, entropyWkspSize);
15194             FORWARD_IF_ERROR(countSize, "ZSTD_buildCTable for MatchLengths failed");
15195             if (MLtype == set_compressed)
15196                 lastNCount = op;
15197             op += countSize;
15198             assert(op <= oend);
15199     }   }
15200
15201     *seqHead = (BYTE)((LLtype<<6) + (Offtype<<4) + (MLtype<<2));
15202
15203     {   size_t const bitstreamSize = ZSTD_encodeSequences(
15204                                         op, (size_t)(oend - op),
15205                                         CTable_MatchLength, mlCodeTable,
15206                                         CTable_OffsetBits, ofCodeTable,
15207                                         CTable_LitLength, llCodeTable,
15208                                         sequences, nbSeq,
15209                                         longOffsets, bmi2);
15210         FORWARD_IF_ERROR(bitstreamSize, "ZSTD_encodeSequences failed");
15211         op += bitstreamSize;
15212         assert(op <= oend);
15213         /* zstd versions <= 1.3.4 mistakenly report corruption when
15214          * FSE_readNCount() receives a buffer < 4 bytes.
15215          * Fixed by https://github.com/facebook/zstd/pull/1146.
15216          * This can happen when the last set_compressed table present is 2
15217          * bytes and the bitstream is only one byte.
15218          * In this exceedingly rare case, we will simply emit an uncompressed
15219          * block, since it isn't worth optimizing.
15220          */
15221         if (lastNCount && (op - lastNCount) < 4) {
15222             /* NCountSize >= 2 && bitstreamSize > 0 ==> lastCountSize == 3 */
15223             assert(op - lastNCount == 3);
15224             DEBUGLOG(5, "Avoiding bug in zstd decoder in versions <= 1.3.4 by "
15225                         "emitting an uncompressed block.");
15226             return 0;
15227         }
15228     }
15229
15230     DEBUGLOG(5, "compressed block size : %u", (unsigned)(op - ostart));
15231     return (size_t)(op - ostart);
15232 }
15233
15234 MEM_STATIC size_t
15235 ZSTD_compressSequences(seqStore_t* seqStorePtr,
15236                        const ZSTD_entropyCTables_t* prevEntropy,
15237                              ZSTD_entropyCTables_t* nextEntropy,
15238                        const ZSTD_CCtx_params* cctxParams,
15239                              void* dst, size_t dstCapacity,
15240                              size_t srcSize,
15241                              void* entropyWorkspace, size_t entropyWkspSize,
15242                              int bmi2)
15243 {
15244     size_t const cSize = ZSTD_compressSequences_internal(
15245                             seqStorePtr, prevEntropy, nextEntropy, cctxParams,
15246                             dst, dstCapacity,
15247                             entropyWorkspace, entropyWkspSize, bmi2);
15248     if (cSize == 0) return 0;
15249     /* When srcSize <= dstCapacity, there is enough space to write a raw uncompressed block.
15250      * Since we ran out of space, block must be not compressible, so fall back to raw uncompressed block.
15251      */
15252     if ((cSize == ERROR(dstSize_tooSmall)) & (srcSize <= dstCapacity))
15253         return 0;  /* block not compressed */
15254     FORWARD_IF_ERROR(cSize, "ZSTD_compressSequences_internal failed");
15255
15256     /* Check compressibility */
15257     {   size_t const maxCSize = srcSize - ZSTD_minGain(srcSize, cctxParams->cParams.strategy);
15258         if (cSize >= maxCSize) return 0;  /* block not compressed */
15259     }
15260
15261     return cSize;
15262 }
15263
15264 /* ZSTD_selectBlockCompressor() :
15265  * Not static, but internal use only (used by long distance matcher)
15266  * assumption : strat is a valid strategy */
15267 ZSTD_blockCompressor ZSTD_selectBlockCompressor(ZSTD_strategy strat, ZSTD_dictMode_e dictMode)
15268 {
15269     static const ZSTD_blockCompressor blockCompressor[3][ZSTD_STRATEGY_MAX+1] = {
15270         { ZSTD_compressBlock_fast  /* default for 0 */,
15271           ZSTD_compressBlock_fast,
15272           ZSTD_compressBlock_doubleFast,
15273           ZSTD_compressBlock_greedy,
15274           ZSTD_compressBlock_lazy,
15275           ZSTD_compressBlock_lazy2,
15276           ZSTD_compressBlock_btlazy2,
15277           ZSTD_compressBlock_btopt,
15278           ZSTD_compressBlock_btultra,
15279           ZSTD_compressBlock_btultra2 },
15280         { ZSTD_compressBlock_fast_extDict  /* default for 0 */,
15281           ZSTD_compressBlock_fast_extDict,
15282           ZSTD_compressBlock_doubleFast_extDict,
15283           ZSTD_compressBlock_greedy_extDict,
15284           ZSTD_compressBlock_lazy_extDict,
15285           ZSTD_compressBlock_lazy2_extDict,
15286           ZSTD_compressBlock_btlazy2_extDict,
15287           ZSTD_compressBlock_btopt_extDict,
15288           ZSTD_compressBlock_btultra_extDict,
15289           ZSTD_compressBlock_btultra_extDict },
15290         { ZSTD_compressBlock_fast_dictMatchState  /* default for 0 */,
15291           ZSTD_compressBlock_fast_dictMatchState,
15292           ZSTD_compressBlock_doubleFast_dictMatchState,
15293           ZSTD_compressBlock_greedy_dictMatchState,
15294           ZSTD_compressBlock_lazy_dictMatchState,
15295           ZSTD_compressBlock_lazy2_dictMatchState,
15296           ZSTD_compressBlock_btlazy2_dictMatchState,
15297           ZSTD_compressBlock_btopt_dictMatchState,
15298           ZSTD_compressBlock_btultra_dictMatchState,
15299           ZSTD_compressBlock_btultra_dictMatchState }
15300     };
15301     ZSTD_blockCompressor selectedCompressor;
15302     ZSTD_STATIC_ASSERT((unsigned)ZSTD_fast == 1);
15303
15304     assert(ZSTD_cParam_withinBounds(ZSTD_c_strategy, strat));
15305     selectedCompressor = blockCompressor[(int)dictMode][(int)strat];
15306     assert(selectedCompressor != NULL);
15307     return selectedCompressor;
15308 }
15309
15310 static void ZSTD_storeLastLiterals(seqStore_t* seqStorePtr,
15311                                    const BYTE* anchor, size_t lastLLSize)
15312 {
15313     memcpy(seqStorePtr->lit, anchor, lastLLSize);
15314     seqStorePtr->lit += lastLLSize;
15315 }
15316
15317 void ZSTD_resetSeqStore(seqStore_t* ssPtr)
15318 {
15319     ssPtr->lit = ssPtr->litStart;
15320     ssPtr->sequences = ssPtr->sequencesStart;
15321     ssPtr->longLengthID = 0;
15322 }
15323
15324 typedef enum { ZSTDbss_compress, ZSTDbss_noCompress } ZSTD_buildSeqStore_e;
15325
15326 static size_t ZSTD_buildSeqStore(ZSTD_CCtx* zc, const void* src, size_t srcSize)
15327 {
15328     ZSTD_matchState_t* const ms = &zc->blockState.matchState;
15329     DEBUGLOG(5, "ZSTD_buildSeqStore (srcSize=%zu)", srcSize);
15330     assert(srcSize <= ZSTD_BLOCKSIZE_MAX);
15331     /* Assert that we have correctly flushed the ctx params into the ms's copy */
15332     ZSTD_assertEqualCParams(zc->appliedParams.cParams, ms->cParams);
15333     if (srcSize < MIN_CBLOCK_SIZE+ZSTD_blockHeaderSize+1) {
15334         ZSTD_ldm_skipSequences(&zc->externSeqStore, srcSize, zc->appliedParams.cParams.minMatch);
15335         return ZSTDbss_noCompress; /* don't even attempt compression below a certain srcSize */
15336     }
15337     ZSTD_resetSeqStore(&(zc->seqStore));
15338     /* required for optimal parser to read stats from dictionary */
15339     ms->opt.symbolCosts = &zc->blockState.prevCBlock->entropy;
15340     /* tell the optimal parser how we expect to compress literals */
15341     ms->opt.literalCompressionMode = zc->appliedParams.literalCompressionMode;
15342     /* a gap between an attached dict and the current window is not safe,
15343      * they must remain adjacent,
15344      * and when that stops being the case, the dict must be unset */
15345     assert(ms->dictMatchState == NULL || ms->loadedDictEnd == ms->window.dictLimit);
15346
15347     /* limited update after a very long match */
15348     {   const BYTE* const base = ms->window.base;
15349         const BYTE* const istart = (const BYTE*)src;
15350         const U32 current = (U32)(istart-base);
15351         if (sizeof(ptrdiff_t)==8) assert(istart - base < (ptrdiff_t)(U32)(-1));   /* ensure no overflow */
15352         if (current > ms->nextToUpdate + 384)
15353             ms->nextToUpdate = current - MIN(192, (U32)(current - ms->nextToUpdate - 384));
15354     }
15355
15356     /* select and store sequences */
15357     {   ZSTD_dictMode_e const dictMode = ZSTD_matchState_dictMode(ms);
15358         size_t lastLLSize;
15359         {   int i;
15360             for (i = 0; i < ZSTD_REP_NUM; ++i)
15361                 zc->blockState.nextCBlock->rep[i] = zc->blockState.prevCBlock->rep[i];
15362         }
15363         if (zc->externSeqStore.pos < zc->externSeqStore.size) {
15364             assert(!zc->appliedParams.ldmParams.enableLdm);
15365             /* Updates ldmSeqStore.pos */
15366             lastLLSize =
15367                 ZSTD_ldm_blockCompress(&zc->externSeqStore,
15368                                        ms, &zc->seqStore,
15369                                        zc->blockState.nextCBlock->rep,
15370                                        src, srcSize);
15371             assert(zc->externSeqStore.pos <= zc->externSeqStore.size);
15372         } else if (zc->appliedParams.ldmParams.enableLdm) {
15373             rawSeqStore_t ldmSeqStore = {NULL, 0, 0, 0};
15374
15375             ldmSeqStore.seq = zc->ldmSequences;
15376             ldmSeqStore.capacity = zc->maxNbLdmSequences;
15377             /* Updates ldmSeqStore.size */
15378             FORWARD_IF_ERROR(ZSTD_ldm_generateSequences(&zc->ldmState, &ldmSeqStore,
15379                                                &zc->appliedParams.ldmParams,
15380                                                src, srcSize), "");
15381             /* Updates ldmSeqStore.pos */
15382             lastLLSize =
15383                 ZSTD_ldm_blockCompress(&ldmSeqStore,
15384                                        ms, &zc->seqStore,
15385                                        zc->blockState.nextCBlock->rep,
15386                                        src, srcSize);
15387             assert(ldmSeqStore.pos == ldmSeqStore.size);
15388         } else {   /* not long range mode */
15389             ZSTD_blockCompressor const blockCompressor = ZSTD_selectBlockCompressor(zc->appliedParams.cParams.strategy, dictMode);
15390             lastLLSize = blockCompressor(ms, &zc->seqStore, zc->blockState.nextCBlock->rep, src, srcSize);
15391         }
15392         {   const BYTE* const lastLiterals = (const BYTE*)src + srcSize - lastLLSize;
15393             ZSTD_storeLastLiterals(&zc->seqStore, lastLiterals, lastLLSize);
15394     }   }
15395     return ZSTDbss_compress;
15396 }
15397
15398 static void ZSTD_copyBlockSequences(ZSTD_CCtx* zc)
15399 {
15400     const seqStore_t* seqStore = ZSTD_getSeqStore(zc);
15401     const seqDef* seqs = seqStore->sequencesStart;
15402     size_t seqsSize = seqStore->sequences - seqs;
15403
15404     ZSTD_Sequence* outSeqs = &zc->seqCollector.seqStart[zc->seqCollector.seqIndex];
15405     size_t i; size_t position; int repIdx;
15406
15407     assert(zc->seqCollector.seqIndex + 1 < zc->seqCollector.maxSequences);
15408     for (i = 0, position = 0; i < seqsSize; ++i) {
15409         outSeqs[i].offset = seqs[i].offset;
15410         outSeqs[i].litLength = seqs[i].litLength;
15411         outSeqs[i].matchLength = seqs[i].matchLength + MINMATCH;
15412
15413         if (i == seqStore->longLengthPos) {
15414             if (seqStore->longLengthID == 1) {
15415                 outSeqs[i].litLength += 0x10000;
15416             } else if (seqStore->longLengthID == 2) {
15417                 outSeqs[i].matchLength += 0x10000;
15418             }
15419         }
15420
15421         if (outSeqs[i].offset <= ZSTD_REP_NUM) {
15422             outSeqs[i].rep = outSeqs[i].offset;
15423             repIdx = (unsigned int)i - outSeqs[i].offset;
15424
15425             if (outSeqs[i].litLength == 0) {
15426                 if (outSeqs[i].offset < 3) {
15427                     --repIdx;
15428                 } else {
15429                     repIdx = (unsigned int)i - 1;
15430                 }
15431                 ++outSeqs[i].rep;
15432             }
15433             assert(repIdx >= -3);
15434             outSeqs[i].offset = repIdx >= 0 ? outSeqs[repIdx].offset : repStartValue[-repIdx - 1];
15435             if (outSeqs[i].rep == 4) {
15436                 --outSeqs[i].offset;
15437             }
15438         } else {
15439             outSeqs[i].offset -= ZSTD_REP_NUM;
15440         }
15441
15442         position += outSeqs[i].litLength;
15443         outSeqs[i].matchPos = (unsigned int)position;
15444         position += outSeqs[i].matchLength;
15445     }
15446     zc->seqCollector.seqIndex += seqsSize;
15447 }
15448
15449 size_t ZSTD_getSequences(ZSTD_CCtx* zc, ZSTD_Sequence* outSeqs,
15450     size_t outSeqsSize, const void* src, size_t srcSize)
15451 {
15452     const size_t dstCapacity = ZSTD_compressBound(srcSize);
15453     void* dst = ZSTD_malloc(dstCapacity, ZSTD_defaultCMem);
15454     SeqCollector seqCollector;
15455
15456     RETURN_ERROR_IF(dst == NULL, memory_allocation, "NULL pointer!");
15457
15458     seqCollector.collectSequences = 1;
15459     seqCollector.seqStart = outSeqs;
15460     seqCollector.seqIndex = 0;
15461     seqCollector.maxSequences = outSeqsSize;
15462     zc->seqCollector = seqCollector;
15463
15464     ZSTD_compress2(zc, dst, dstCapacity, src, srcSize);
15465     ZSTD_free(dst, ZSTD_defaultCMem);
15466     return zc->seqCollector.seqIndex;
15467 }
15468
15469 /* Returns true if the given block is a RLE block */
15470 static int ZSTD_isRLE(const BYTE *ip, size_t length) {
15471     size_t i;
15472     if (length < 2) return 1;
15473     for (i = 1; i < length; ++i) {
15474         if (ip[0] != ip[i]) return 0;
15475     }
15476     return 1;
15477 }
15478
15479 /* Returns true if the given block may be RLE.
15480  * This is just a heuristic based on the compressibility.
15481  * It may return both false positives and false negatives.
15482  */
15483 static int ZSTD_maybeRLE(seqStore_t const* seqStore)
15484 {
15485     size_t const nbSeqs = (size_t)(seqStore->sequences - seqStore->sequencesStart);
15486     size_t const nbLits = (size_t)(seqStore->lit - seqStore->litStart);
15487
15488     return nbSeqs < 4 && nbLits < 10;
15489 }
15490
15491 static void ZSTD_confirmRepcodesAndEntropyTables(ZSTD_CCtx* zc)
15492 {
15493     ZSTD_compressedBlockState_t* const tmp = zc->blockState.prevCBlock;
15494     zc->blockState.prevCBlock = zc->blockState.nextCBlock;
15495     zc->blockState.nextCBlock = tmp;
15496 }
15497
15498 static size_t ZSTD_compressBlock_internal(ZSTD_CCtx* zc,
15499                                         void* dst, size_t dstCapacity,
15500                                         const void* src, size_t srcSize, U32 frame)
15501 {
15502     /* This the upper bound for the length of an rle block.
15503      * This isn't the actual upper bound. Finding the real threshold
15504      * needs further investigation.
15505      */
15506     const U32 rleMaxLength = 25;
15507     size_t cSize;
15508     const BYTE* ip = (const BYTE*)src;
15509     BYTE* op = (BYTE*)dst;
15510     DEBUGLOG(5, "ZSTD_compressBlock_internal (dstCapacity=%u, dictLimit=%u, nextToUpdate=%u)",
15511                 (unsigned)dstCapacity, (unsigned)zc->blockState.matchState.window.dictLimit,
15512                 (unsigned)zc->blockState.matchState.nextToUpdate);
15513
15514     {   const size_t bss = ZSTD_buildSeqStore(zc, src, srcSize);
15515         FORWARD_IF_ERROR(bss, "ZSTD_buildSeqStore failed");
15516         if (bss == ZSTDbss_noCompress) { cSize = 0; goto out; }
15517     }
15518
15519     if (zc->seqCollector.collectSequences) {
15520         ZSTD_copyBlockSequences(zc);
15521         return 0;
15522     }
15523
15524     /* encode sequences and literals */
15525     cSize = ZSTD_compressSequences(&zc->seqStore,
15526             &zc->blockState.prevCBlock->entropy, &zc->blockState.nextCBlock->entropy,
15527             &zc->appliedParams,
15528             dst, dstCapacity,
15529             srcSize,
15530             zc->entropyWorkspace, HUF_WORKSPACE_SIZE /* statically allocated in resetCCtx */,
15531             zc->bmi2);
15532
15533     if (frame &&
15534         /* We don't want to emit our first block as a RLE even if it qualifies because
15535          * doing so will cause the decoder (cli only) to throw a "should consume all input error."
15536          * This is only an issue for zstd <= v1.4.3
15537          */
15538         !zc->isFirstBlock &&
15539         cSize < rleMaxLength &&
15540         ZSTD_isRLE(ip, srcSize))
15541     {
15542         cSize = 1;
15543         op[0] = ip[0];
15544     }
15545
15546 out:
15547     if (!ZSTD_isError(cSize) && cSize > 1) {
15548         ZSTD_confirmRepcodesAndEntropyTables(zc);
15549     }
15550     /* We check that dictionaries have offset codes available for the first
15551      * block. After the first block, the offcode table might not have large
15552      * enough codes to represent the offsets in the data.
15553      */
15554     if (zc->blockState.prevCBlock->entropy.fse.offcode_repeatMode == FSE_repeat_valid)
15555         zc->blockState.prevCBlock->entropy.fse.offcode_repeatMode = FSE_repeat_check;
15556
15557     return cSize;
15558 }
15559
15560 static size_t ZSTD_compressBlock_targetCBlockSize_body(ZSTD_CCtx* zc,
15561                                void* dst, size_t dstCapacity,
15562                                const void* src, size_t srcSize,
15563                                const size_t bss, U32 lastBlock)
15564 {
15565     DEBUGLOG(6, "Attempting ZSTD_compressSuperBlock()");
15566     if (bss == ZSTDbss_compress) {
15567         if (/* We don't want to emit our first block as a RLE even if it qualifies because
15568             * doing so will cause the decoder (cli only) to throw a "should consume all input error."
15569             * This is only an issue for zstd <= v1.4.3
15570             */
15571             !zc->isFirstBlock &&
15572             ZSTD_maybeRLE(&zc->seqStore) &&
15573             ZSTD_isRLE((BYTE const*)src, srcSize))
15574         {
15575             return ZSTD_rleCompressBlock(dst, dstCapacity, *(BYTE const*)src, srcSize, lastBlock);
15576         }
15577         /* Attempt superblock compression.
15578          *
15579          * Note that compressed size of ZSTD_compressSuperBlock() is not bound by the
15580          * standard ZSTD_compressBound(). This is a problem, because even if we have
15581          * space now, taking an extra byte now could cause us to run out of space later
15582          * and violate ZSTD_compressBound().
15583          *
15584          * Define blockBound(blockSize) = blockSize + ZSTD_blockHeaderSize.
15585          *
15586          * In order to respect ZSTD_compressBound() we must attempt to emit a raw
15587          * uncompressed block in these cases:
15588          *   * cSize == 0: Return code for an uncompressed block.
15589          *   * cSize == dstSize_tooSmall: We may have expanded beyond blockBound(srcSize).
15590          *     ZSTD_noCompressBlock() will return dstSize_tooSmall if we are really out of
15591          *     output space.
15592          *   * cSize >= blockBound(srcSize): We have expanded the block too much so
15593          *     emit an uncompressed block.
15594          */
15595         {
15596             size_t const cSize = ZSTD_compressSuperBlock(zc, dst, dstCapacity, src, srcSize, lastBlock);
15597             if (cSize != ERROR(dstSize_tooSmall)) {
15598                 size_t const maxCSize = srcSize - ZSTD_minGain(srcSize, zc->appliedParams.cParams.strategy);
15599                 FORWARD_IF_ERROR(cSize, "ZSTD_compressSuperBlock failed");
15600                 if (cSize != 0 && cSize < maxCSize + ZSTD_blockHeaderSize) {
15601                     ZSTD_confirmRepcodesAndEntropyTables(zc);
15602                     return cSize;
15603                 }
15604             }
15605         }
15606     }
15607
15608     DEBUGLOG(6, "Resorting to ZSTD_noCompressBlock()");
15609     /* Superblock compression failed, attempt to emit a single no compress block.
15610      * The decoder will be able to stream this block since it is uncompressed.
15611      */
15612     return ZSTD_noCompressBlock(dst, dstCapacity, src, srcSize, lastBlock);
15613 }
15614
15615 static size_t ZSTD_compressBlock_targetCBlockSize(ZSTD_CCtx* zc,
15616                                void* dst, size_t dstCapacity,
15617                                const void* src, size_t srcSize,
15618                                U32 lastBlock)
15619 {
15620     size_t cSize = 0;
15621     const size_t bss = ZSTD_buildSeqStore(zc, src, srcSize);
15622     DEBUGLOG(5, "ZSTD_compressBlock_targetCBlockSize (dstCapacity=%u, dictLimit=%u, nextToUpdate=%u, srcSize=%zu)",
15623                 (unsigned)dstCapacity, (unsigned)zc->blockState.matchState.window.dictLimit, (unsigned)zc->blockState.matchState.nextToUpdate, srcSize);
15624     FORWARD_IF_ERROR(bss, "ZSTD_buildSeqStore failed");
15625
15626     cSize = ZSTD_compressBlock_targetCBlockSize_body(zc, dst, dstCapacity, src, srcSize, bss, lastBlock);
15627     FORWARD_IF_ERROR(cSize, "ZSTD_compressBlock_targetCBlockSize_body failed");
15628
15629     if (zc->blockState.prevCBlock->entropy.fse.offcode_repeatMode == FSE_repeat_valid)
15630         zc->blockState.prevCBlock->entropy.fse.offcode_repeatMode = FSE_repeat_check;
15631
15632     return cSize;
15633 }
15634
15635 static void ZSTD_overflowCorrectIfNeeded(ZSTD_matchState_t* ms,
15636                                          ZSTD_cwksp* ws,
15637                                          ZSTD_CCtx_params const* params,
15638                                          void const* ip,
15639                                          void const* iend)
15640 {
15641     if (ZSTD_window_needOverflowCorrection(ms->window, iend)) {
15642         U32 const maxDist = (U32)1 << params->cParams.windowLog;
15643         U32 const cycleLog = ZSTD_cycleLog(params->cParams.chainLog, params->cParams.strategy);
15644         U32 const correction = ZSTD_window_correctOverflow(&ms->window, cycleLog, maxDist, ip);
15645         ZSTD_STATIC_ASSERT(ZSTD_CHAINLOG_MAX <= 30);
15646         ZSTD_STATIC_ASSERT(ZSTD_WINDOWLOG_MAX_32 <= 30);
15647         ZSTD_STATIC_ASSERT(ZSTD_WINDOWLOG_MAX <= 31);
15648         ZSTD_cwksp_mark_tables_dirty(ws);
15649         ZSTD_reduceIndex(ms, params, correction);
15650         ZSTD_cwksp_mark_tables_clean(ws);
15651         if (ms->nextToUpdate < correction) ms->nextToUpdate = 0;
15652         else ms->nextToUpdate -= correction;
15653         /* invalidate dictionaries on overflow correction */
15654         ms->loadedDictEnd = 0;
15655         ms->dictMatchState = NULL;
15656     }
15657 }
15658
15659 /*! ZSTD_compress_frameChunk() :
15660 *   Compress a chunk of data into one or multiple blocks.
15661 *   All blocks will be terminated, all input will be consumed.
15662 *   Function will issue an error if there is not enough `dstCapacity` to hold the compressed content.
15663 *   Frame is supposed already started (header already produced)
15664 *   @return : compressed size, or an error code
15665 */
15666 static size_t ZSTD_compress_frameChunk (ZSTD_CCtx* cctx,
15667                                      void* dst, size_t dstCapacity,
15668                                const void* src, size_t srcSize,
15669                                      U32 lastFrameChunk)
15670 {
15671     size_t blockSize = cctx->blockSize;
15672     size_t remaining = srcSize;
15673     const BYTE* ip = (const BYTE*)src;
15674     BYTE* const ostart = (BYTE*)dst;
15675     BYTE* op = ostart;
15676     U32 const maxDist = (U32)1 << cctx->appliedParams.cParams.windowLog;
15677
15678     assert(cctx->appliedParams.cParams.windowLog <= ZSTD_WINDOWLOG_MAX);
15679
15680     DEBUGLOG(5, "ZSTD_compress_frameChunk (blockSize=%u)", (unsigned)blockSize);
15681     if (cctx->appliedParams.fParams.checksumFlag && srcSize)
15682         XXH64_update(&cctx->xxhState, src, srcSize);
15683
15684     while (remaining) {
15685         ZSTD_matchState_t* const ms = &cctx->blockState.matchState;
15686         U32 const lastBlock = lastFrameChunk & (blockSize >= remaining);
15687
15688         RETURN_ERROR_IF(dstCapacity < ZSTD_blockHeaderSize + MIN_CBLOCK_SIZE,
15689                         dstSize_tooSmall,
15690                         "not enough space to store compressed block");
15691         if (remaining < blockSize) blockSize = remaining;
15692
15693         ZSTD_overflowCorrectIfNeeded(
15694             ms, &cctx->workspace, &cctx->appliedParams, ip, ip + blockSize);
15695         ZSTD_checkDictValidity(&ms->window, ip + blockSize, maxDist, &ms->loadedDictEnd, &ms->dictMatchState);
15696
15697         /* Ensure hash/chain table insertion resumes no sooner than lowlimit */
15698         if (ms->nextToUpdate < ms->window.lowLimit) ms->nextToUpdate = ms->window.lowLimit;
15699
15700         {   size_t cSize;
15701             if (ZSTD_useTargetCBlockSize(&cctx->appliedParams)) {
15702                 cSize = ZSTD_compressBlock_targetCBlockSize(cctx, op, dstCapacity, ip, blockSize, lastBlock);
15703                 FORWARD_IF_ERROR(cSize, "ZSTD_compressBlock_targetCBlockSize failed");
15704                 assert(cSize > 0);
15705                 assert(cSize <= blockSize + ZSTD_blockHeaderSize);
15706             } else {
15707                 cSize = ZSTD_compressBlock_internal(cctx,
15708                                         op+ZSTD_blockHeaderSize, dstCapacity-ZSTD_blockHeaderSize,
15709                                         ip, blockSize, 1 /* frame */);
15710                 FORWARD_IF_ERROR(cSize, "ZSTD_compressBlock_internal failed");
15711
15712                 if (cSize == 0) {  /* block is not compressible */
15713                     cSize = ZSTD_noCompressBlock(op, dstCapacity, ip, blockSize, lastBlock);
15714                     FORWARD_IF_ERROR(cSize, "ZSTD_noCompressBlock failed");
15715                 } else {
15716                     U32 const cBlockHeader = cSize == 1 ?
15717                         lastBlock + (((U32)bt_rle)<<1) + (U32)(blockSize << 3) :
15718                         lastBlock + (((U32)bt_compressed)<<1) + (U32)(cSize << 3);
15719                     MEM_writeLE24(op, cBlockHeader);
15720                     cSize += ZSTD_blockHeaderSize;
15721                 }
15722             }
15723
15724
15725             ip += blockSize;
15726             assert(remaining >= blockSize);
15727             remaining -= blockSize;
15728             op += cSize;
15729             assert(dstCapacity >= cSize);
15730             dstCapacity -= cSize;
15731             cctx->isFirstBlock = 0;
15732             DEBUGLOG(5, "ZSTD_compress_frameChunk: adding a block of size %u",
15733                         (unsigned)cSize);
15734     }   }
15735
15736     if (lastFrameChunk && (op>ostart)) cctx->stage = ZSTDcs_ending;
15737     return (size_t)(op-ostart);
15738 }
15739
15740
15741 static size_t ZSTD_writeFrameHeader(void* dst, size_t dstCapacity,
15742                                     const ZSTD_CCtx_params* params, U64 pledgedSrcSize, U32 dictID)
15743 {   BYTE* const op = (BYTE*)dst;
15744     U32   const dictIDSizeCodeLength = (dictID>0) + (dictID>=256) + (dictID>=65536);   /* 0-3 */
15745     U32   const dictIDSizeCode = params->fParams.noDictIDFlag ? 0 : dictIDSizeCodeLength;   /* 0-3 */
15746     U32   const checksumFlag = params->fParams.checksumFlag>0;
15747     U32   const windowSize = (U32)1 << params->cParams.windowLog;
15748     U32   const singleSegment = params->fParams.contentSizeFlag && (windowSize >= pledgedSrcSize);
15749     BYTE  const windowLogByte = (BYTE)((params->cParams.windowLog - ZSTD_WINDOWLOG_ABSOLUTEMIN) << 3);
15750     U32   const fcsCode = params->fParams.contentSizeFlag ?
15751                      (pledgedSrcSize>=256) + (pledgedSrcSize>=65536+256) + (pledgedSrcSize>=0xFFFFFFFFU) : 0;  /* 0-3 */
15752     BYTE  const frameHeaderDescriptionByte = (BYTE)(dictIDSizeCode + (checksumFlag<<2) + (singleSegment<<5) + (fcsCode<<6) );
15753     size_t pos=0;
15754
15755     assert(!(params->fParams.contentSizeFlag && pledgedSrcSize == ZSTD_CONTENTSIZE_UNKNOWN));
15756     RETURN_ERROR_IF(dstCapacity < ZSTD_FRAMEHEADERSIZE_MAX, dstSize_tooSmall,
15757                     "dst buf is too small to fit worst-case frame header size.");
15758     DEBUGLOG(4, "ZSTD_writeFrameHeader : dictIDFlag : %u ; dictID : %u ; dictIDSizeCode : %u",
15759                 !params->fParams.noDictIDFlag, (unsigned)dictID, (unsigned)dictIDSizeCode);
15760
15761     if (params->format == ZSTD_f_zstd1) {
15762         MEM_writeLE32(dst, ZSTD_MAGICNUMBER);
15763         pos = 4;
15764     }
15765     op[pos++] = frameHeaderDescriptionByte;
15766     if (!singleSegment) op[pos++] = windowLogByte;
15767     switch(dictIDSizeCode)
15768     {
15769         default:  assert(0); /* impossible */
15770         case 0 : break;
15771         case 1 : op[pos] = (BYTE)(dictID); pos++; break;
15772         case 2 : MEM_writeLE16(op+pos, (U16)dictID); pos+=2; break;
15773         case 3 : MEM_writeLE32(op+pos, dictID); pos+=4; break;
15774     }
15775     switch(fcsCode)
15776     {
15777         default:  assert(0); /* impossible */
15778         case 0 : if (singleSegment) op[pos++] = (BYTE)(pledgedSrcSize); break;
15779         case 1 : MEM_writeLE16(op+pos, (U16)(pledgedSrcSize-256)); pos+=2; break;
15780         case 2 : MEM_writeLE32(op+pos, (U32)(pledgedSrcSize)); pos+=4; break;
15781         case 3 : MEM_writeLE64(op+pos, (U64)(pledgedSrcSize)); pos+=8; break;
15782     }
15783     return pos;
15784 }
15785
15786 /* ZSTD_writeLastEmptyBlock() :
15787  * output an empty Block with end-of-frame mark to complete a frame
15788  * @return : size of data written into `dst` (== ZSTD_blockHeaderSize (defined in zstd_internal.h))
15789  *           or an error code if `dstCapacity` is too small (<ZSTD_blockHeaderSize)
15790  */
15791 size_t ZSTD_writeLastEmptyBlock(void* dst, size_t dstCapacity)
15792 {
15793     RETURN_ERROR_IF(dstCapacity < ZSTD_blockHeaderSize, dstSize_tooSmall,
15794                     "dst buf is too small to write frame trailer empty block.");
15795     {   U32 const cBlockHeader24 = 1 /*lastBlock*/ + (((U32)bt_raw)<<1);  /* 0 size */
15796         MEM_writeLE24(dst, cBlockHeader24);
15797         return ZSTD_blockHeaderSize;
15798     }
15799 }
15800
15801 size_t ZSTD_referenceExternalSequences(ZSTD_CCtx* cctx, rawSeq* seq, size_t nbSeq)
15802 {
15803     RETURN_ERROR_IF(cctx->stage != ZSTDcs_init, stage_wrong,
15804                     "wrong cctx stage");
15805     RETURN_ERROR_IF(cctx->appliedParams.ldmParams.enableLdm,
15806                     parameter_unsupported,
15807                     "incompatible with ldm");
15808     cctx->externSeqStore.seq = seq;
15809     cctx->externSeqStore.size = nbSeq;
15810     cctx->externSeqStore.capacity = nbSeq;
15811     cctx->externSeqStore.pos = 0;
15812     return 0;
15813 }
15814
15815
15816 static size_t ZSTD_compressContinue_internal (ZSTD_CCtx* cctx,
15817                               void* dst, size_t dstCapacity,
15818                         const void* src, size_t srcSize,
15819                                U32 frame, U32 lastFrameChunk)
15820 {
15821     ZSTD_matchState_t* const ms = &cctx->blockState.matchState;
15822     size_t fhSize = 0;
15823
15824     DEBUGLOG(5, "ZSTD_compressContinue_internal, stage: %u, srcSize: %u",
15825                 cctx->stage, (unsigned)srcSize);
15826     RETURN_ERROR_IF(cctx->stage==ZSTDcs_created, stage_wrong,
15827                     "missing init (ZSTD_compressBegin)");
15828
15829     if (frame && (cctx->stage==ZSTDcs_init)) {
15830         fhSize = ZSTD_writeFrameHeader(dst, dstCapacity, &cctx->appliedParams,
15831                                        cctx->pledgedSrcSizePlusOne-1, cctx->dictID);
15832         FORWARD_IF_ERROR(fhSize, "ZSTD_writeFrameHeader failed");
15833         assert(fhSize <= dstCapacity);
15834         dstCapacity -= fhSize;
15835         dst = (char*)dst + fhSize;
15836         cctx->stage = ZSTDcs_ongoing;
15837     }
15838
15839     if (!srcSize) return fhSize;  /* do not generate an empty block if no input */
15840
15841     if (!ZSTD_window_update(&ms->window, src, srcSize)) {
15842         ms->nextToUpdate = ms->window.dictLimit;
15843     }
15844     if (cctx->appliedParams.ldmParams.enableLdm) {
15845         ZSTD_window_update(&cctx->ldmState.window, src, srcSize);
15846     }
15847
15848     if (!frame) {
15849         /* overflow check and correction for block mode */
15850         ZSTD_overflowCorrectIfNeeded(
15851             ms, &cctx->workspace, &cctx->appliedParams,
15852             src, (BYTE const*)src + srcSize);
15853     }
15854
15855     DEBUGLOG(5, "ZSTD_compressContinue_internal (blockSize=%u)", (unsigned)cctx->blockSize);
15856     {   size_t const cSize = frame ?
15857                              ZSTD_compress_frameChunk (cctx, dst, dstCapacity, src, srcSize, lastFrameChunk) :
15858                              ZSTD_compressBlock_internal (cctx, dst, dstCapacity, src, srcSize, 0 /* frame */);
15859         FORWARD_IF_ERROR(cSize, "%s", frame ? "ZSTD_compress_frameChunk failed" : "ZSTD_compressBlock_internal failed");
15860         cctx->consumedSrcSize += srcSize;
15861         cctx->producedCSize += (cSize + fhSize);
15862         assert(!(cctx->appliedParams.fParams.contentSizeFlag && cctx->pledgedSrcSizePlusOne == 0));
15863         if (cctx->pledgedSrcSizePlusOne != 0) {  /* control src size */
15864             ZSTD_STATIC_ASSERT(ZSTD_CONTENTSIZE_UNKNOWN == (unsigned long long)-1);
15865             RETURN_ERROR_IF(
15866                 cctx->consumedSrcSize+1 > cctx->pledgedSrcSizePlusOne,
15867                 srcSize_wrong,
15868                 "error : pledgedSrcSize = %u, while realSrcSize >= %u",
15869                 (unsigned)cctx->pledgedSrcSizePlusOne-1,
15870                 (unsigned)cctx->consumedSrcSize);
15871         }
15872         return cSize + fhSize;
15873     }
15874 }
15875
15876 size_t ZSTD_compressContinue (ZSTD_CCtx* cctx,
15877                               void* dst, size_t dstCapacity,
15878                         const void* src, size_t srcSize)
15879 {
15880     DEBUGLOG(5, "ZSTD_compressContinue (srcSize=%u)", (unsigned)srcSize);
15881     return ZSTD_compressContinue_internal(cctx, dst, dstCapacity, src, srcSize, 1 /* frame mode */, 0 /* last chunk */);
15882 }
15883
15884
15885 size_t ZSTD_getBlockSize(const ZSTD_CCtx* cctx)
15886 {
15887     ZSTD_compressionParameters const cParams = cctx->appliedParams.cParams;
15888     assert(!ZSTD_checkCParams(cParams));
15889     return MIN (ZSTD_BLOCKSIZE_MAX, (U32)1 << cParams.windowLog);
15890 }
15891
15892 size_t ZSTD_compressBlock(ZSTD_CCtx* cctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize)
15893 {
15894     DEBUGLOG(5, "ZSTD_compressBlock: srcSize = %u", (unsigned)srcSize);
15895     { size_t const blockSizeMax = ZSTD_getBlockSize(cctx);
15896       RETURN_ERROR_IF(srcSize > blockSizeMax, srcSize_wrong, "input is larger than a block"); }
15897
15898     return ZSTD_compressContinue_internal(cctx, dst, dstCapacity, src, srcSize, 0 /* frame mode */, 0 /* last chunk */);
15899 }
15900
15901 /*! ZSTD_loadDictionaryContent() :
15902  *  @return : 0, or an error code
15903  */
15904 static size_t ZSTD_loadDictionaryContent(ZSTD_matchState_t* ms,
15905                                          ldmState_t* ls,
15906                                          ZSTD_cwksp* ws,
15907                                          ZSTD_CCtx_params const* params,
15908                                          const void* src, size_t srcSize,
15909                                          ZSTD_dictTableLoadMethod_e dtlm)
15910 {
15911     const BYTE* ip = (const BYTE*) src;
15912     const BYTE* const iend = ip + srcSize;
15913
15914     ZSTD_window_update(&ms->window, src, srcSize);
15915     ms->loadedDictEnd = params->forceWindow ? 0 : (U32)(iend - ms->window.base);
15916
15917     if (params->ldmParams.enableLdm && ls != NULL) {
15918         ZSTD_window_update(&ls->window, src, srcSize);
15919         ls->loadedDictEnd = params->forceWindow ? 0 : (U32)(iend - ls->window.base);
15920     }
15921
15922     /* Assert that we the ms params match the params we're being given */
15923     ZSTD_assertEqualCParams(params->cParams, ms->cParams);
15924
15925     if (srcSize <= HASH_READ_SIZE) return 0;
15926
15927     while (iend - ip > HASH_READ_SIZE) {
15928         size_t const remaining = (size_t)(iend - ip);
15929         size_t const chunk = MIN(remaining, ZSTD_CHUNKSIZE_MAX);
15930         const BYTE* const ichunk = ip + chunk;
15931
15932         ZSTD_overflowCorrectIfNeeded(ms, ws, params, ip, ichunk);
15933
15934         if (params->ldmParams.enableLdm && ls != NULL)
15935             ZSTD_ldm_fillHashTable(ls, (const BYTE*)src, (const BYTE*)src + srcSize, &params->ldmParams);
15936
15937         switch(params->cParams.strategy)
15938         {
15939         case ZSTD_fast:
15940             ZSTD_fillHashTable(ms, ichunk, dtlm);
15941             break;
15942         case ZSTD_dfast:
15943             ZSTD_fillDoubleHashTable(ms, ichunk, dtlm);
15944             break;
15945
15946         case ZSTD_greedy:
15947         case ZSTD_lazy:
15948         case ZSTD_lazy2:
15949             if (chunk >= HASH_READ_SIZE)
15950                 ZSTD_insertAndFindFirstIndex(ms, ichunk-HASH_READ_SIZE);
15951             break;
15952
15953         case ZSTD_btlazy2:   /* we want the dictionary table fully sorted */
15954         case ZSTD_btopt:
15955         case ZSTD_btultra:
15956         case ZSTD_btultra2:
15957             if (chunk >= HASH_READ_SIZE)
15958                 ZSTD_updateTree(ms, ichunk-HASH_READ_SIZE, ichunk);
15959             break;
15960
15961         default:
15962             assert(0);  /* not possible : not a valid strategy id */
15963         }
15964
15965         ip = ichunk;
15966     }
15967
15968     ms->nextToUpdate = (U32)(iend - ms->window.base);
15969     return 0;
15970 }
15971
15972
15973 /* Dictionaries that assign zero probability to symbols that show up causes problems
15974    when FSE encoding.  Refuse dictionaries that assign zero probability to symbols
15975    that we may encounter during compression.
15976    NOTE: This behavior is not standard and could be improved in the future. */
15977 static size_t ZSTD_checkDictNCount(short* normalizedCounter, unsigned dictMaxSymbolValue, unsigned maxSymbolValue) {
15978     U32 s;
15979     RETURN_ERROR_IF(dictMaxSymbolValue < maxSymbolValue, dictionary_corrupted, "dict fse tables don't have all symbols");
15980     for (s = 0; s <= maxSymbolValue; ++s) {
15981         RETURN_ERROR_IF(normalizedCounter[s] == 0, dictionary_corrupted, "dict fse tables don't have all symbols");
15982     }
15983     return 0;
15984 }
15985
15986 size_t ZSTD_loadCEntropy(ZSTD_compressedBlockState_t* bs, void* workspace,
15987                          short* offcodeNCount, unsigned* offcodeMaxValue,
15988                          const void* const dict, size_t dictSize)
15989 {
15990     const BYTE* dictPtr = (const BYTE*)dict;    /* skip magic num and dict ID */
15991     const BYTE* const dictEnd = dictPtr + dictSize;
15992     dictPtr += 8;
15993     bs->entropy.huf.repeatMode = HUF_repeat_check;
15994
15995     {   unsigned maxSymbolValue = 255;
15996         unsigned hasZeroWeights = 1;
15997         size_t const hufHeaderSize = HUF_readCTable((HUF_CElt*)bs->entropy.huf.CTable, &maxSymbolValue, dictPtr,
15998             dictEnd-dictPtr, &hasZeroWeights);
15999
16000         /* We only set the loaded table as valid if it contains all non-zero
16001          * weights. Otherwise, we set it to check */
16002         if (!hasZeroWeights)
16003             bs->entropy.huf.repeatMode = HUF_repeat_valid;
16004
16005         RETURN_ERROR_IF(HUF_isError(hufHeaderSize), dictionary_corrupted, "");
16006         RETURN_ERROR_IF(maxSymbolValue < 255, dictionary_corrupted, "");
16007         dictPtr += hufHeaderSize;
16008     }
16009
16010     {   unsigned offcodeLog;
16011         size_t const offcodeHeaderSize = FSE_readNCount(offcodeNCount, offcodeMaxValue, &offcodeLog, dictPtr, dictEnd-dictPtr);
16012         RETURN_ERROR_IF(FSE_isError(offcodeHeaderSize), dictionary_corrupted, "");
16013         RETURN_ERROR_IF(offcodeLog > OffFSELog, dictionary_corrupted, "");
16014         /* Defer checking offcodeMaxValue because we need to know the size of the dictionary content */
16015         /* fill all offset symbols to avoid garbage at end of table */
16016         RETURN_ERROR_IF(FSE_isError(FSE_buildCTable_wksp(
16017                 bs->entropy.fse.offcodeCTable,
16018                 offcodeNCount, MaxOff, offcodeLog,
16019                 workspace, HUF_WORKSPACE_SIZE)),
16020             dictionary_corrupted, "");
16021         dictPtr += offcodeHeaderSize;
16022     }
16023
16024     {   short matchlengthNCount[MaxML+1];
16025         unsigned matchlengthMaxValue = MaxML, matchlengthLog;
16026         size_t const matchlengthHeaderSize = FSE_readNCount(matchlengthNCount, &matchlengthMaxValue, &matchlengthLog, dictPtr, dictEnd-dictPtr);
16027         RETURN_ERROR_IF(FSE_isError(matchlengthHeaderSize), dictionary_corrupted, "");
16028         RETURN_ERROR_IF(matchlengthLog > MLFSELog, dictionary_corrupted, "");
16029         /* Every match length code must have non-zero probability */
16030         FORWARD_IF_ERROR( ZSTD_checkDictNCount(matchlengthNCount, matchlengthMaxValue, MaxML), "");
16031         RETURN_ERROR_IF(FSE_isError(FSE_buildCTable_wksp(
16032                 bs->entropy.fse.matchlengthCTable,
16033                 matchlengthNCount, matchlengthMaxValue, matchlengthLog,
16034                 workspace, HUF_WORKSPACE_SIZE)),
16035             dictionary_corrupted, "");
16036         dictPtr += matchlengthHeaderSize;
16037     }
16038
16039     {   short litlengthNCount[MaxLL+1];
16040         unsigned litlengthMaxValue = MaxLL, litlengthLog;
16041         size_t const litlengthHeaderSize = FSE_readNCount(litlengthNCount, &litlengthMaxValue, &litlengthLog, dictPtr, dictEnd-dictPtr);
16042         RETURN_ERROR_IF(FSE_isError(litlengthHeaderSize), dictionary_corrupted, "");
16043         RETURN_ERROR_IF(litlengthLog > LLFSELog, dictionary_corrupted, "");
16044         /* Every literal length code must have non-zero probability */
16045         FORWARD_IF_ERROR( ZSTD_checkDictNCount(litlengthNCount, litlengthMaxValue, MaxLL), "");
16046         RETURN_ERROR_IF(FSE_isError(FSE_buildCTable_wksp(
16047                 bs->entropy.fse.litlengthCTable,
16048                 litlengthNCount, litlengthMaxValue, litlengthLog,
16049                 workspace, HUF_WORKSPACE_SIZE)),
16050             dictionary_corrupted, "");
16051         dictPtr += litlengthHeaderSize;
16052     }
16053
16054     RETURN_ERROR_IF(dictPtr+12 > dictEnd, dictionary_corrupted, "");
16055     bs->rep[0] = MEM_readLE32(dictPtr+0);
16056     bs->rep[1] = MEM_readLE32(dictPtr+4);
16057     bs->rep[2] = MEM_readLE32(dictPtr+8);
16058     dictPtr += 12;
16059
16060     return dictPtr - (const BYTE*)dict;
16061 }
16062
16063 /* Dictionary format :
16064  * See :
16065  * https://github.com/facebook/zstd/blob/master/doc/zstd_compression_format.md#dictionary-format
16066  */
16067 /*! ZSTD_loadZstdDictionary() :
16068  * @return : dictID, or an error code
16069  *  assumptions : magic number supposed already checked
16070  *                dictSize supposed >= 8
16071  */
16072 static size_t ZSTD_loadZstdDictionary(ZSTD_compressedBlockState_t* bs,
16073                                       ZSTD_matchState_t* ms,
16074                                       ZSTD_cwksp* ws,
16075                                       ZSTD_CCtx_params const* params,
16076                                       const void* dict, size_t dictSize,
16077                                       ZSTD_dictTableLoadMethod_e dtlm,
16078                                       void* workspace)
16079 {
16080     const BYTE* dictPtr = (const BYTE*)dict;
16081     const BYTE* const dictEnd = dictPtr + dictSize;
16082     short offcodeNCount[MaxOff+1];
16083     unsigned offcodeMaxValue = MaxOff;
16084     size_t dictID;
16085     size_t eSize;
16086
16087     ZSTD_STATIC_ASSERT(HUF_WORKSPACE_SIZE >= (1<<MAX(MLFSELog,LLFSELog)));
16088     assert(dictSize >= 8);
16089     assert(MEM_readLE32(dictPtr) == ZSTD_MAGIC_DICTIONARY);
16090
16091     dictID = params->fParams.noDictIDFlag ? 0 :  MEM_readLE32(dictPtr + 4 /* skip magic number */ );
16092     eSize = ZSTD_loadCEntropy(bs, workspace, offcodeNCount, &offcodeMaxValue, dict, dictSize);
16093     FORWARD_IF_ERROR(eSize, "ZSTD_loadCEntropy failed");
16094     dictPtr += eSize;
16095
16096     {   size_t const dictContentSize = (size_t)(dictEnd - dictPtr);
16097         U32 offcodeMax = MaxOff;
16098         if (dictContentSize <= ((U32)-1) - 128 KB) {
16099             U32 const maxOffset = (U32)dictContentSize + 128 KB; /* The maximum offset that must be supported */
16100             offcodeMax = ZSTD_highbit32(maxOffset); /* Calculate minimum offset code required to represent maxOffset */
16101         }
16102         /* All offset values <= dictContentSize + 128 KB must be representable */
16103         FORWARD_IF_ERROR(ZSTD_checkDictNCount(offcodeNCount, offcodeMaxValue, MIN(offcodeMax, MaxOff)), "");
16104         /* All repCodes must be <= dictContentSize and != 0*/
16105         {   U32 u;
16106             for (u=0; u<3; u++) {
16107                 RETURN_ERROR_IF(bs->rep[u] == 0, dictionary_corrupted, "");
16108                 RETURN_ERROR_IF(bs->rep[u] > dictContentSize, dictionary_corrupted, "");
16109         }   }
16110
16111         bs->entropy.fse.offcode_repeatMode = FSE_repeat_valid;
16112         bs->entropy.fse.matchlength_repeatMode = FSE_repeat_valid;
16113         bs->entropy.fse.litlength_repeatMode = FSE_repeat_valid;
16114         FORWARD_IF_ERROR(ZSTD_loadDictionaryContent(
16115             ms, NULL, ws, params, dictPtr, dictContentSize, dtlm), "");
16116         return dictID;
16117     }
16118 }
16119
16120 /** ZSTD_compress_insertDictionary() :
16121 *   @return : dictID, or an error code */
16122 static size_t
16123 ZSTD_compress_insertDictionary(ZSTD_compressedBlockState_t* bs,
16124                                ZSTD_matchState_t* ms,
16125                                ldmState_t* ls,
16126                                ZSTD_cwksp* ws,
16127                          const ZSTD_CCtx_params* params,
16128                          const void* dict, size_t dictSize,
16129                                ZSTD_dictContentType_e dictContentType,
16130                                ZSTD_dictTableLoadMethod_e dtlm,
16131                                void* workspace)
16132 {
16133     DEBUGLOG(4, "ZSTD_compress_insertDictionary (dictSize=%u)", (U32)dictSize);
16134     if ((dict==NULL) || (dictSize<8)) {
16135         RETURN_ERROR_IF(dictContentType == ZSTD_dct_fullDict, dictionary_wrong, "");
16136         return 0;
16137     }
16138
16139     ZSTD_reset_compressedBlockState(bs);
16140
16141     /* dict restricted modes */
16142     if (dictContentType == ZSTD_dct_rawContent)
16143         return ZSTD_loadDictionaryContent(ms, ls, ws, params, dict, dictSize, dtlm);
16144
16145     if (MEM_readLE32(dict) != ZSTD_MAGIC_DICTIONARY) {
16146         if (dictContentType == ZSTD_dct_auto) {
16147             DEBUGLOG(4, "raw content dictionary detected");
16148             return ZSTD_loadDictionaryContent(
16149                 ms, ls, ws, params, dict, dictSize, dtlm);
16150         }
16151         RETURN_ERROR_IF(dictContentType == ZSTD_dct_fullDict, dictionary_wrong, "");
16152         assert(0);   /* impossible */
16153     }
16154
16155     /* dict as full zstd dictionary */
16156     return ZSTD_loadZstdDictionary(
16157         bs, ms, ws, params, dict, dictSize, dtlm, workspace);
16158 }
16159
16160 #define ZSTD_USE_CDICT_PARAMS_SRCSIZE_CUTOFF (128 KB)
16161 #define ZSTD_USE_CDICT_PARAMS_DICTSIZE_MULTIPLIER (6)
16162
16163 /*! ZSTD_compressBegin_internal() :
16164  * @return : 0, or an error code */
16165 static size_t ZSTD_compressBegin_internal(ZSTD_CCtx* cctx,
16166                                     const void* dict, size_t dictSize,
16167                                     ZSTD_dictContentType_e dictContentType,
16168                                     ZSTD_dictTableLoadMethod_e dtlm,
16169                                     const ZSTD_CDict* cdict,
16170                                     const ZSTD_CCtx_params* params, U64 pledgedSrcSize,
16171                                     ZSTD_buffered_policy_e zbuff)
16172 {
16173     DEBUGLOG(4, "ZSTD_compressBegin_internal: wlog=%u", params->cParams.windowLog);
16174     /* params are supposed to be fully validated at this point */
16175     assert(!ZSTD_isError(ZSTD_checkCParams(params->cParams)));
16176     assert(!((dict) && (cdict)));  /* either dict or cdict, not both */
16177     if ( (cdict)
16178       && (cdict->dictContentSize > 0)
16179       && ( pledgedSrcSize < ZSTD_USE_CDICT_PARAMS_SRCSIZE_CUTOFF
16180         || pledgedSrcSize < cdict->dictContentSize * ZSTD_USE_CDICT_PARAMS_DICTSIZE_MULTIPLIER
16181         || pledgedSrcSize == ZSTD_CONTENTSIZE_UNKNOWN
16182         || cdict->compressionLevel == 0)
16183       && (params->attachDictPref != ZSTD_dictForceLoad) ) {
16184         return ZSTD_resetCCtx_usingCDict(cctx, cdict, params, pledgedSrcSize, zbuff);
16185     }
16186
16187     FORWARD_IF_ERROR( ZSTD_resetCCtx_internal(cctx, *params, pledgedSrcSize,
16188                                      ZSTDcrp_makeClean, zbuff) , "");
16189     {   size_t const dictID = cdict ?
16190                 ZSTD_compress_insertDictionary(
16191                         cctx->blockState.prevCBlock, &cctx->blockState.matchState,
16192                         &cctx->ldmState, &cctx->workspace, &cctx->appliedParams, cdict->dictContent,
16193                         cdict->dictContentSize, dictContentType, dtlm,
16194                         cctx->entropyWorkspace)
16195               : ZSTD_compress_insertDictionary(
16196                         cctx->blockState.prevCBlock, &cctx->blockState.matchState,
16197                         &cctx->ldmState, &cctx->workspace, &cctx->appliedParams, dict, dictSize,
16198                         dictContentType, dtlm, cctx->entropyWorkspace);
16199         FORWARD_IF_ERROR(dictID, "ZSTD_compress_insertDictionary failed");
16200         assert(dictID <= UINT_MAX);
16201         cctx->dictID = (U32)dictID;
16202     }
16203     return 0;
16204 }
16205
16206 size_t ZSTD_compressBegin_advanced_internal(ZSTD_CCtx* cctx,
16207                                     const void* dict, size_t dictSize,
16208                                     ZSTD_dictContentType_e dictContentType,
16209                                     ZSTD_dictTableLoadMethod_e dtlm,
16210                                     const ZSTD_CDict* cdict,
16211                                     const ZSTD_CCtx_params* params,
16212                                     unsigned long long pledgedSrcSize)
16213 {
16214     DEBUGLOG(4, "ZSTD_compressBegin_advanced_internal: wlog=%u", params->cParams.windowLog);
16215     /* compression parameters verification and optimization */
16216     FORWARD_IF_ERROR( ZSTD_checkCParams(params->cParams) , "");
16217     return ZSTD_compressBegin_internal(cctx,
16218                                        dict, dictSize, dictContentType, dtlm,
16219                                        cdict,
16220                                        params, pledgedSrcSize,
16221                                        ZSTDb_not_buffered);
16222 }
16223
16224 /*! ZSTD_compressBegin_advanced() :
16225 *   @return : 0, or an error code */
16226 size_t ZSTD_compressBegin_advanced(ZSTD_CCtx* cctx,
16227                              const void* dict, size_t dictSize,
16228                                    ZSTD_parameters params, unsigned long long pledgedSrcSize)
16229 {
16230     ZSTD_CCtx_params const cctxParams =
16231             ZSTD_assignParamsToCCtxParams(&cctx->requestedParams, &params);
16232     return ZSTD_compressBegin_advanced_internal(cctx,
16233                                             dict, dictSize, ZSTD_dct_auto, ZSTD_dtlm_fast,
16234                                             NULL /*cdict*/,
16235                                             &cctxParams, pledgedSrcSize);
16236 }
16237
16238 size_t ZSTD_compressBegin_usingDict(ZSTD_CCtx* cctx, const void* dict, size_t dictSize, int compressionLevel)
16239 {
16240     ZSTD_parameters const params = ZSTD_getParams_internal(compressionLevel, ZSTD_CONTENTSIZE_UNKNOWN, dictSize);
16241     ZSTD_CCtx_params const cctxParams =
16242             ZSTD_assignParamsToCCtxParams(&cctx->requestedParams, &params);
16243     DEBUGLOG(4, "ZSTD_compressBegin_usingDict (dictSize=%u)", (unsigned)dictSize);
16244     return ZSTD_compressBegin_internal(cctx, dict, dictSize, ZSTD_dct_auto, ZSTD_dtlm_fast, NULL,
16245                                        &cctxParams, ZSTD_CONTENTSIZE_UNKNOWN, ZSTDb_not_buffered);
16246 }
16247
16248 size_t ZSTD_compressBegin(ZSTD_CCtx* cctx, int compressionLevel)
16249 {
16250     return ZSTD_compressBegin_usingDict(cctx, NULL, 0, compressionLevel);
16251 }
16252
16253
16254 /*! ZSTD_writeEpilogue() :
16255 *   Ends a frame.
16256 *   @return : nb of bytes written into dst (or an error code) */
16257 static size_t ZSTD_writeEpilogue(ZSTD_CCtx* cctx, void* dst, size_t dstCapacity)
16258 {
16259     BYTE* const ostart = (BYTE*)dst;
16260     BYTE* op = ostart;
16261     size_t fhSize = 0;
16262
16263     DEBUGLOG(4, "ZSTD_writeEpilogue");
16264     RETURN_ERROR_IF(cctx->stage == ZSTDcs_created, stage_wrong, "init missing");
16265
16266     /* special case : empty frame */
16267     if (cctx->stage == ZSTDcs_init) {
16268         fhSize = ZSTD_writeFrameHeader(dst, dstCapacity, &cctx->appliedParams, 0, 0);
16269         FORWARD_IF_ERROR(fhSize, "ZSTD_writeFrameHeader failed");
16270         dstCapacity -= fhSize;
16271         op += fhSize;
16272         cctx->stage = ZSTDcs_ongoing;
16273     }
16274
16275     if (cctx->stage != ZSTDcs_ending) {
16276         /* write one last empty block, make it the "last" block */
16277         U32 const cBlockHeader24 = 1 /* last block */ + (((U32)bt_raw)<<1) + 0;
16278         RETURN_ERROR_IF(dstCapacity<4, dstSize_tooSmall, "no room for epilogue");
16279         MEM_writeLE32(op, cBlockHeader24);
16280         op += ZSTD_blockHeaderSize;
16281         dstCapacity -= ZSTD_blockHeaderSize;
16282     }
16283
16284     if (cctx->appliedParams.fParams.checksumFlag) {
16285         U32 const checksum = (U32) XXH64_digest(&cctx->xxhState);
16286         RETURN_ERROR_IF(dstCapacity<4, dstSize_tooSmall, "no room for checksum");
16287         DEBUGLOG(4, "ZSTD_writeEpilogue: write checksum : %08X", (unsigned)checksum);
16288         MEM_writeLE32(op, checksum);
16289         op += 4;
16290     }
16291
16292     cctx->stage = ZSTDcs_created;  /* return to "created but no init" status */
16293     return op-ostart;
16294 }
16295
16296 size_t ZSTD_compressEnd (ZSTD_CCtx* cctx,
16297                          void* dst, size_t dstCapacity,
16298                    const void* src, size_t srcSize)
16299 {
16300     size_t endResult;
16301     size_t const cSize = ZSTD_compressContinue_internal(cctx,
16302                                 dst, dstCapacity, src, srcSize,
16303                                 1 /* frame mode */, 1 /* last chunk */);
16304     FORWARD_IF_ERROR(cSize, "ZSTD_compressContinue_internal failed");
16305     endResult = ZSTD_writeEpilogue(cctx, (char*)dst + cSize, dstCapacity-cSize);
16306     FORWARD_IF_ERROR(endResult, "ZSTD_writeEpilogue failed");
16307     assert(!(cctx->appliedParams.fParams.contentSizeFlag && cctx->pledgedSrcSizePlusOne == 0));
16308     if (cctx->pledgedSrcSizePlusOne != 0) {  /* control src size */
16309         ZSTD_STATIC_ASSERT(ZSTD_CONTENTSIZE_UNKNOWN == (unsigned long long)-1);
16310         DEBUGLOG(4, "end of frame : controlling src size");
16311         RETURN_ERROR_IF(
16312             cctx->pledgedSrcSizePlusOne != cctx->consumedSrcSize+1,
16313             srcSize_wrong,
16314              "error : pledgedSrcSize = %u, while realSrcSize = %u",
16315             (unsigned)cctx->pledgedSrcSizePlusOne-1,
16316             (unsigned)cctx->consumedSrcSize);
16317     }
16318     return cSize + endResult;
16319 }
16320
16321
16322 static size_t ZSTD_compress_internal (ZSTD_CCtx* cctx,
16323                                       void* dst, size_t dstCapacity,
16324                                 const void* src, size_t srcSize,
16325                                 const void* dict,size_t dictSize,
16326                                 const ZSTD_parameters* params)
16327 {
16328     ZSTD_CCtx_params const cctxParams =
16329             ZSTD_assignParamsToCCtxParams(&cctx->requestedParams, params);
16330     DEBUGLOG(4, "ZSTD_compress_internal");
16331     return ZSTD_compress_advanced_internal(cctx,
16332                                            dst, dstCapacity,
16333                                            src, srcSize,
16334                                            dict, dictSize,
16335                                            &cctxParams);
16336 }
16337
16338 size_t ZSTD_compress_advanced (ZSTD_CCtx* cctx,
16339                                void* dst, size_t dstCapacity,
16340                          const void* src, size_t srcSize,
16341                          const void* dict,size_t dictSize,
16342                                ZSTD_parameters params)
16343 {
16344     DEBUGLOG(4, "ZSTD_compress_advanced");
16345     FORWARD_IF_ERROR(ZSTD_checkCParams(params.cParams), "");
16346     return ZSTD_compress_internal(cctx,
16347                                   dst, dstCapacity,
16348                                   src, srcSize,
16349                                   dict, dictSize,
16350                                   &params);
16351 }
16352
16353 /* Internal */
16354 size_t ZSTD_compress_advanced_internal(
16355         ZSTD_CCtx* cctx,
16356         void* dst, size_t dstCapacity,
16357         const void* src, size_t srcSize,
16358         const void* dict,size_t dictSize,
16359         const ZSTD_CCtx_params* params)
16360 {
16361     DEBUGLOG(4, "ZSTD_compress_advanced_internal (srcSize:%u)", (unsigned)srcSize);
16362     FORWARD_IF_ERROR( ZSTD_compressBegin_internal(cctx,
16363                          dict, dictSize, ZSTD_dct_auto, ZSTD_dtlm_fast, NULL,
16364                          params, srcSize, ZSTDb_not_buffered) , "");
16365     return ZSTD_compressEnd(cctx, dst, dstCapacity, src, srcSize);
16366 }
16367
16368 size_t ZSTD_compress_usingDict(ZSTD_CCtx* cctx,
16369                                void* dst, size_t dstCapacity,
16370                          const void* src, size_t srcSize,
16371                          const void* dict, size_t dictSize,
16372                                int compressionLevel)
16373 {
16374     ZSTD_parameters const params = ZSTD_getParams_internal(compressionLevel, srcSize, dict ? dictSize : 0);
16375     ZSTD_CCtx_params cctxParams = ZSTD_assignParamsToCCtxParams(&cctx->requestedParams, &params);
16376     DEBUGLOG(4, "ZSTD_compress_usingDict (srcSize=%u)", (unsigned)srcSize);
16377     assert(params.fParams.contentSizeFlag == 1);
16378     return ZSTD_compress_advanced_internal(cctx, dst, dstCapacity, src, srcSize, dict, dictSize, &cctxParams);
16379 }
16380
16381 size_t ZSTD_compressCCtx(ZSTD_CCtx* cctx,
16382                          void* dst, size_t dstCapacity,
16383                    const void* src, size_t srcSize,
16384                          int compressionLevel)
16385 {
16386     DEBUGLOG(4, "ZSTD_compressCCtx (srcSize=%u)", (unsigned)srcSize);
16387     assert(cctx != NULL);
16388     return ZSTD_compress_usingDict(cctx, dst, dstCapacity, src, srcSize, NULL, 0, compressionLevel);
16389 }
16390
16391 size_t ZSTD_compress(void* dst, size_t dstCapacity,
16392                const void* src, size_t srcSize,
16393                      int compressionLevel)
16394 {
16395     size_t result;
16396     ZSTD_CCtx ctxBody;
16397     ZSTD_initCCtx(&ctxBody, ZSTD_defaultCMem);
16398     result = ZSTD_compressCCtx(&ctxBody, dst, dstCapacity, src, srcSize, compressionLevel);
16399     ZSTD_freeCCtxContent(&ctxBody);   /* can't free ctxBody itself, as it's on stack; free only heap content */
16400     return result;
16401 }
16402
16403
16404 /* =====  Dictionary API  ===== */
16405
16406 /*! ZSTD_estimateCDictSize_advanced() :
16407  *  Estimate amount of memory that will be needed to create a dictionary with following arguments */
16408 size_t ZSTD_estimateCDictSize_advanced(
16409         size_t dictSize, ZSTD_compressionParameters cParams,
16410         ZSTD_dictLoadMethod_e dictLoadMethod)
16411 {
16412     DEBUGLOG(5, "sizeof(ZSTD_CDict) : %u", (unsigned)sizeof(ZSTD_CDict));
16413     return ZSTD_cwksp_alloc_size(sizeof(ZSTD_CDict))
16414          + ZSTD_cwksp_alloc_size(HUF_WORKSPACE_SIZE)
16415          + ZSTD_sizeof_matchState(&cParams, /* forCCtx */ 0)
16416          + (dictLoadMethod == ZSTD_dlm_byRef ? 0
16417             : ZSTD_cwksp_alloc_size(ZSTD_cwksp_align(dictSize, sizeof(void *))));
16418 }
16419
16420 size_t ZSTD_estimateCDictSize(size_t dictSize, int compressionLevel)
16421 {
16422     ZSTD_compressionParameters const cParams = ZSTD_getCParams_internal(compressionLevel, ZSTD_CONTENTSIZE_UNKNOWN, dictSize);
16423     return ZSTD_estimateCDictSize_advanced(dictSize, cParams, ZSTD_dlm_byCopy);
16424 }
16425
16426 size_t ZSTD_sizeof_CDict(const ZSTD_CDict* cdict)
16427 {
16428     if (cdict==NULL) return 0;   /* support sizeof on NULL */
16429     DEBUGLOG(5, "sizeof(*cdict) : %u", (unsigned)sizeof(*cdict));
16430     /* cdict may be in the workspace */
16431     return (cdict->workspace.workspace == cdict ? 0 : sizeof(*cdict))
16432         + ZSTD_cwksp_sizeof(&cdict->workspace);
16433 }
16434
16435 static size_t ZSTD_initCDict_internal(
16436                     ZSTD_CDict* cdict,
16437               const void* dictBuffer, size_t dictSize,
16438                     ZSTD_dictLoadMethod_e dictLoadMethod,
16439                     ZSTD_dictContentType_e dictContentType,
16440                     ZSTD_compressionParameters cParams)
16441 {
16442     DEBUGLOG(3, "ZSTD_initCDict_internal (dictContentType:%u)", (unsigned)dictContentType);
16443     assert(!ZSTD_checkCParams(cParams));
16444     cdict->matchState.cParams = cParams;
16445     if ((dictLoadMethod == ZSTD_dlm_byRef) || (!dictBuffer) || (!dictSize)) {
16446         cdict->dictContent = dictBuffer;
16447     } else {
16448          void *internalBuffer = ZSTD_cwksp_reserve_object(&cdict->workspace, ZSTD_cwksp_align(dictSize, sizeof(void*)));
16449         RETURN_ERROR_IF(!internalBuffer, memory_allocation, "NULL pointer!");
16450         cdict->dictContent = internalBuffer;
16451         memcpy(internalBuffer, dictBuffer, dictSize);
16452     }
16453     cdict->dictContentSize = dictSize;
16454
16455     cdict->entropyWorkspace = (U32*)ZSTD_cwksp_reserve_object(&cdict->workspace, HUF_WORKSPACE_SIZE);
16456
16457
16458     /* Reset the state to no dictionary */
16459     ZSTD_reset_compressedBlockState(&cdict->cBlockState);
16460     FORWARD_IF_ERROR(ZSTD_reset_matchState(
16461         &cdict->matchState,
16462         &cdict->workspace,
16463         &cParams,
16464         ZSTDcrp_makeClean,
16465         ZSTDirp_reset,
16466         ZSTD_resetTarget_CDict), "");
16467     /* (Maybe) load the dictionary
16468      * Skips loading the dictionary if it is < 8 bytes.
16469      */
16470     {   ZSTD_CCtx_params params;
16471         memset(&params, 0, sizeof(params));
16472         params.compressionLevel = ZSTD_CLEVEL_DEFAULT;
16473         params.fParams.contentSizeFlag = 1;
16474         params.cParams = cParams;
16475         {   size_t const dictID = ZSTD_compress_insertDictionary(
16476                     &cdict->cBlockState, &cdict->matchState, NULL, &cdict->workspace,
16477                     &params, cdict->dictContent, cdict->dictContentSize,
16478                     dictContentType, ZSTD_dtlm_full, cdict->entropyWorkspace);
16479             FORWARD_IF_ERROR(dictID, "ZSTD_compress_insertDictionary failed");
16480             assert(dictID <= (size_t)(U32)-1);
16481             cdict->dictID = (U32)dictID;
16482         }
16483     }
16484
16485     return 0;
16486 }
16487
16488 ZSTD_CDict* ZSTD_createCDict_advanced(const void* dictBuffer, size_t dictSize,
16489                                       ZSTD_dictLoadMethod_e dictLoadMethod,
16490                                       ZSTD_dictContentType_e dictContentType,
16491                                       ZSTD_compressionParameters cParams, ZSTD_customMem customMem)
16492 {
16493     DEBUGLOG(3, "ZSTD_createCDict_advanced, mode %u", (unsigned)dictContentType);
16494     if (!customMem.customAlloc ^ !customMem.customFree) return NULL;
16495
16496     {   size_t const workspaceSize =
16497             ZSTD_cwksp_alloc_size(sizeof(ZSTD_CDict)) +
16498             ZSTD_cwksp_alloc_size(HUF_WORKSPACE_SIZE) +
16499             ZSTD_sizeof_matchState(&cParams, /* forCCtx */ 0) +
16500             (dictLoadMethod == ZSTD_dlm_byRef ? 0
16501              : ZSTD_cwksp_alloc_size(ZSTD_cwksp_align(dictSize, sizeof(void*))));
16502         void* const workspace = ZSTD_malloc(workspaceSize, customMem);
16503         ZSTD_cwksp ws;
16504         ZSTD_CDict* cdict;
16505
16506         if (!workspace) {
16507             ZSTD_free(workspace, customMem);
16508             return NULL;
16509         }
16510
16511         ZSTD_cwksp_init(&ws, workspace, workspaceSize);
16512
16513         cdict = (ZSTD_CDict*)ZSTD_cwksp_reserve_object(&ws, sizeof(ZSTD_CDict));
16514         assert(cdict != NULL);
16515         ZSTD_cwksp_move(&cdict->workspace, &ws);
16516         cdict->customMem = customMem;
16517         cdict->compressionLevel = 0; /* signals advanced API usage */
16518
16519         if (ZSTD_isError( ZSTD_initCDict_internal(cdict,
16520                                         dictBuffer, dictSize,
16521                                         dictLoadMethod, dictContentType,
16522                                         cParams) )) {
16523             ZSTD_freeCDict(cdict);
16524             return NULL;
16525         }
16526
16527         return cdict;
16528     }
16529 }
16530
16531 ZSTD_CDict* ZSTD_createCDict(const void* dict, size_t dictSize, int compressionLevel)
16532 {
16533     ZSTD_compressionParameters cParams = ZSTD_getCParams_internal(compressionLevel, ZSTD_CONTENTSIZE_UNKNOWN, dictSize);
16534     ZSTD_CDict* cdict = ZSTD_createCDict_advanced(dict, dictSize,
16535                                                   ZSTD_dlm_byCopy, ZSTD_dct_auto,
16536                                                   cParams, ZSTD_defaultCMem);
16537     if (cdict)
16538         cdict->compressionLevel = compressionLevel == 0 ? ZSTD_CLEVEL_DEFAULT : compressionLevel;
16539     return cdict;
16540 }
16541
16542 ZSTD_CDict* ZSTD_createCDict_byReference(const void* dict, size_t dictSize, int compressionLevel)
16543 {
16544     ZSTD_compressionParameters cParams = ZSTD_getCParams_internal(compressionLevel, ZSTD_CONTENTSIZE_UNKNOWN, dictSize);
16545     return ZSTD_createCDict_advanced(dict, dictSize,
16546                                      ZSTD_dlm_byRef, ZSTD_dct_auto,
16547                                      cParams, ZSTD_defaultCMem);
16548 }
16549
16550 size_t ZSTD_freeCDict(ZSTD_CDict* cdict)
16551 {
16552     if (cdict==NULL) return 0;   /* support free on NULL */
16553     {   ZSTD_customMem const cMem = cdict->customMem;
16554         int cdictInWorkspace = ZSTD_cwksp_owns_buffer(&cdict->workspace, cdict);
16555         ZSTD_cwksp_free(&cdict->workspace, cMem);
16556         if (!cdictInWorkspace) {
16557             ZSTD_free(cdict, cMem);
16558         }
16559         return 0;
16560     }
16561 }
16562
16563 /*! ZSTD_initStaticCDict_advanced() :
16564  *  Generate a digested dictionary in provided memory area.
16565  *  workspace: The memory area to emplace the dictionary into.
16566  *             Provided pointer must 8-bytes aligned.
16567  *             It must outlive dictionary usage.
16568  *  workspaceSize: Use ZSTD_estimateCDictSize()
16569  *                 to determine how large workspace must be.
16570  *  cParams : use ZSTD_getCParams() to transform a compression level
16571  *            into its relevants cParams.
16572  * @return : pointer to ZSTD_CDict*, or NULL if error (size too small)
16573  *  Note : there is no corresponding "free" function.
16574  *         Since workspace was allocated externally, it must be freed externally.
16575  */
16576 const ZSTD_CDict* ZSTD_initStaticCDict(
16577                                  void* workspace, size_t workspaceSize,
16578                            const void* dict, size_t dictSize,
16579                                  ZSTD_dictLoadMethod_e dictLoadMethod,
16580                                  ZSTD_dictContentType_e dictContentType,
16581                                  ZSTD_compressionParameters cParams)
16582 {
16583     size_t const matchStateSize = ZSTD_sizeof_matchState(&cParams, /* forCCtx */ 0);
16584     size_t const neededSize = ZSTD_cwksp_alloc_size(sizeof(ZSTD_CDict))
16585                             + (dictLoadMethod == ZSTD_dlm_byRef ? 0
16586                                : ZSTD_cwksp_alloc_size(ZSTD_cwksp_align(dictSize, sizeof(void*))))
16587                             + ZSTD_cwksp_alloc_size(HUF_WORKSPACE_SIZE)
16588                             + matchStateSize;
16589     ZSTD_CDict* cdict;
16590
16591     if ((size_t)workspace & 7) return NULL;  /* 8-aligned */
16592
16593     {
16594         ZSTD_cwksp ws;
16595         ZSTD_cwksp_init(&ws, workspace, workspaceSize);
16596         cdict = (ZSTD_CDict*)ZSTD_cwksp_reserve_object(&ws, sizeof(ZSTD_CDict));
16597         if (cdict == NULL) return NULL;
16598         ZSTD_cwksp_move(&cdict->workspace, &ws);
16599     }
16600
16601     DEBUGLOG(4, "(workspaceSize < neededSize) : (%u < %u) => %u",
16602         (unsigned)workspaceSize, (unsigned)neededSize, (unsigned)(workspaceSize < neededSize));
16603     if (workspaceSize < neededSize) return NULL;
16604
16605     if (ZSTD_isError( ZSTD_initCDict_internal(cdict,
16606                                               dict, dictSize,
16607                                               dictLoadMethod, dictContentType,
16608                                               cParams) ))
16609         return NULL;
16610
16611     return cdict;
16612 }
16613
16614 ZSTD_compressionParameters ZSTD_getCParamsFromCDict(const ZSTD_CDict* cdict)
16615 {
16616     assert(cdict != NULL);
16617     return cdict->matchState.cParams;
16618 }
16619
16620 /* ZSTD_compressBegin_usingCDict_advanced() :
16621  * cdict must be != NULL */
16622 size_t ZSTD_compressBegin_usingCDict_advanced(
16623     ZSTD_CCtx* const cctx, const ZSTD_CDict* const cdict,
16624     ZSTD_frameParameters const fParams, unsigned long long const pledgedSrcSize)
16625 {
16626     DEBUGLOG(4, "ZSTD_compressBegin_usingCDict_advanced");
16627     RETURN_ERROR_IF(cdict==NULL, dictionary_wrong, "NULL pointer!");
16628     {   ZSTD_CCtx_params params = cctx->requestedParams;
16629         params.cParams = ( pledgedSrcSize < ZSTD_USE_CDICT_PARAMS_SRCSIZE_CUTOFF
16630                         || pledgedSrcSize < cdict->dictContentSize * ZSTD_USE_CDICT_PARAMS_DICTSIZE_MULTIPLIER
16631                         || pledgedSrcSize == ZSTD_CONTENTSIZE_UNKNOWN
16632                         || cdict->compressionLevel == 0 )
16633                       && (params.attachDictPref != ZSTD_dictForceLoad) ?
16634                 ZSTD_getCParamsFromCDict(cdict)
16635               : ZSTD_getCParams(cdict->compressionLevel,
16636                                 pledgedSrcSize,
16637                                 cdict->dictContentSize);
16638         /* Increase window log to fit the entire dictionary and source if the
16639          * source size is known. Limit the increase to 19, which is the
16640          * window log for compression level 1 with the largest source size.
16641          */
16642         if (pledgedSrcSize != ZSTD_CONTENTSIZE_UNKNOWN) {
16643             U32 const limitedSrcSize = (U32)MIN(pledgedSrcSize, 1U << 19);
16644             U32 const limitedSrcLog = limitedSrcSize > 1 ? ZSTD_highbit32(limitedSrcSize - 1) + 1 : 1;
16645             params.cParams.windowLog = MAX(params.cParams.windowLog, limitedSrcLog);
16646         }
16647         params.fParams = fParams;
16648         return ZSTD_compressBegin_internal(cctx,
16649                                            NULL, 0, ZSTD_dct_auto, ZSTD_dtlm_fast,
16650                                            cdict,
16651                                            &params, pledgedSrcSize,
16652                                            ZSTDb_not_buffered);
16653     }
16654 }
16655
16656 /* ZSTD_compressBegin_usingCDict() :
16657  * pledgedSrcSize=0 means "unknown"
16658  * if pledgedSrcSize>0, it will enable contentSizeFlag */
16659 size_t ZSTD_compressBegin_usingCDict(ZSTD_CCtx* cctx, const ZSTD_CDict* cdict)
16660 {
16661     ZSTD_frameParameters const fParams = { 0 /*content*/, 0 /*checksum*/, 0 /*noDictID*/ };
16662     DEBUGLOG(4, "ZSTD_compressBegin_usingCDict : dictIDFlag == %u", !fParams.noDictIDFlag);
16663     return ZSTD_compressBegin_usingCDict_advanced(cctx, cdict, fParams, ZSTD_CONTENTSIZE_UNKNOWN);
16664 }
16665
16666 size_t ZSTD_compress_usingCDict_advanced(ZSTD_CCtx* cctx,
16667                                 void* dst, size_t dstCapacity,
16668                                 const void* src, size_t srcSize,
16669                                 const ZSTD_CDict* cdict, ZSTD_frameParameters fParams)
16670 {
16671     FORWARD_IF_ERROR(ZSTD_compressBegin_usingCDict_advanced(cctx, cdict, fParams, srcSize), "");   /* will check if cdict != NULL */
16672     return ZSTD_compressEnd(cctx, dst, dstCapacity, src, srcSize);
16673 }
16674
16675 /*! ZSTD_compress_usingCDict() :
16676  *  Compression using a digested Dictionary.
16677  *  Faster startup than ZSTD_compress_usingDict(), recommended when same dictionary is used multiple times.
16678  *  Note that compression parameters are decided at CDict creation time
16679  *  while frame parameters are hardcoded */
16680 size_t ZSTD_compress_usingCDict(ZSTD_CCtx* cctx,
16681                                 void* dst, size_t dstCapacity,
16682                                 const void* src, size_t srcSize,
16683                                 const ZSTD_CDict* cdict)
16684 {
16685     ZSTD_frameParameters const fParams = { 1 /*content*/, 0 /*checksum*/, 0 /*noDictID*/ };
16686     return ZSTD_compress_usingCDict_advanced(cctx, dst, dstCapacity, src, srcSize, cdict, fParams);
16687 }
16688
16689
16690
16691 /* ******************************************************************
16692 *  Streaming
16693 ********************************************************************/
16694
16695 ZSTD_CStream* ZSTD_createCStream(void)
16696 {
16697     DEBUGLOG(3, "ZSTD_createCStream");
16698     return ZSTD_createCStream_advanced(ZSTD_defaultCMem);
16699 }
16700
16701 ZSTD_CStream* ZSTD_initStaticCStream(void *workspace, size_t workspaceSize)
16702 {
16703     return ZSTD_initStaticCCtx(workspace, workspaceSize);
16704 }
16705
16706 ZSTD_CStream* ZSTD_createCStream_advanced(ZSTD_customMem customMem)
16707 {   /* CStream and CCtx are now same object */
16708     return ZSTD_createCCtx_advanced(customMem);
16709 }
16710
16711 size_t ZSTD_freeCStream(ZSTD_CStream* zcs)
16712 {
16713     return ZSTD_freeCCtx(zcs);   /* same object */
16714 }
16715
16716
16717
16718 /*======   Initialization   ======*/
16719
16720 size_t ZSTD_CStreamInSize(void)  { return ZSTD_BLOCKSIZE_MAX; }
16721
16722 size_t ZSTD_CStreamOutSize(void)
16723 {
16724     return ZSTD_compressBound(ZSTD_BLOCKSIZE_MAX) + ZSTD_blockHeaderSize + 4 /* 32-bits hash */ ;
16725 }
16726
16727 static size_t ZSTD_resetCStream_internal(ZSTD_CStream* cctx,
16728                     const void* const dict, size_t const dictSize, ZSTD_dictContentType_e const dictContentType,
16729                     const ZSTD_CDict* const cdict,
16730                     ZSTD_CCtx_params params, unsigned long long const pledgedSrcSize)
16731 {
16732     DEBUGLOG(4, "ZSTD_resetCStream_internal");
16733     /* Finalize the compression parameters */
16734     params.cParams = ZSTD_getCParamsFromCCtxParams(&params, pledgedSrcSize, dictSize);
16735     /* params are supposed to be fully validated at this point */
16736     assert(!ZSTD_isError(ZSTD_checkCParams(params.cParams)));
16737     assert(!((dict) && (cdict)));  /* either dict or cdict, not both */
16738
16739     FORWARD_IF_ERROR( ZSTD_compressBegin_internal(cctx,
16740                                          dict, dictSize, dictContentType, ZSTD_dtlm_fast,
16741                                          cdict,
16742                                          &params, pledgedSrcSize,
16743                                          ZSTDb_buffered) , "");
16744
16745     cctx->inToCompress = 0;
16746     cctx->inBuffPos = 0;
16747     cctx->inBuffTarget = cctx->blockSize
16748                       + (cctx->blockSize == pledgedSrcSize);   /* for small input: avoid automatic flush on reaching end of block, since it would require to add a 3-bytes null block to end frame */
16749     cctx->outBuffContentSize = cctx->outBuffFlushedSize = 0;
16750     cctx->streamStage = zcss_load;
16751     cctx->frameEnded = 0;
16752     return 0;   /* ready to go */
16753 }
16754
16755 /* ZSTD_resetCStream():
16756  * pledgedSrcSize == 0 means "unknown" */
16757 size_t ZSTD_resetCStream(ZSTD_CStream* zcs, unsigned long long pss)
16758 {
16759     /* temporary : 0 interpreted as "unknown" during transition period.
16760      * Users willing to specify "unknown" **must** use ZSTD_CONTENTSIZE_UNKNOWN.
16761      * 0 will be interpreted as "empty" in the future.
16762      */
16763     U64 const pledgedSrcSize = (pss==0) ? ZSTD_CONTENTSIZE_UNKNOWN : pss;
16764     DEBUGLOG(4, "ZSTD_resetCStream: pledgedSrcSize = %u", (unsigned)pledgedSrcSize);
16765     FORWARD_IF_ERROR( ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only) , "");
16766     FORWARD_IF_ERROR( ZSTD_CCtx_setPledgedSrcSize(zcs, pledgedSrcSize) , "");
16767     return 0;
16768 }
16769
16770 /*! ZSTD_initCStream_internal() :
16771  *  Note : for lib/compress only. Used by zstdmt_compress.c.
16772  *  Assumption 1 : params are valid
16773  *  Assumption 2 : either dict, or cdict, is defined, not both */
16774 size_t ZSTD_initCStream_internal(ZSTD_CStream* zcs,
16775                     const void* dict, size_t dictSize, const ZSTD_CDict* cdict,
16776                     const ZSTD_CCtx_params* params,
16777                     unsigned long long pledgedSrcSize)
16778 {
16779     DEBUGLOG(4, "ZSTD_initCStream_internal");
16780     FORWARD_IF_ERROR( ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only) , "");
16781     FORWARD_IF_ERROR( ZSTD_CCtx_setPledgedSrcSize(zcs, pledgedSrcSize) , "");
16782     assert(!ZSTD_isError(ZSTD_checkCParams(params->cParams)));
16783     zcs->requestedParams = *params;
16784     assert(!((dict) && (cdict)));  /* either dict or cdict, not both */
16785     if (dict) {
16786         FORWARD_IF_ERROR( ZSTD_CCtx_loadDictionary(zcs, dict, dictSize) , "");
16787     } else {
16788         /* Dictionary is cleared if !cdict */
16789         FORWARD_IF_ERROR( ZSTD_CCtx_refCDict(zcs, cdict) , "");
16790     }
16791     return 0;
16792 }
16793
16794 /* ZSTD_initCStream_usingCDict_advanced() :
16795  * same as ZSTD_initCStream_usingCDict(), with control over frame parameters */
16796 size_t ZSTD_initCStream_usingCDict_advanced(ZSTD_CStream* zcs,
16797                                             const ZSTD_CDict* cdict,
16798                                             ZSTD_frameParameters fParams,
16799                                             unsigned long long pledgedSrcSize)
16800 {
16801     DEBUGLOG(4, "ZSTD_initCStream_usingCDict_advanced");
16802     FORWARD_IF_ERROR( ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only) , "");
16803     FORWARD_IF_ERROR( ZSTD_CCtx_setPledgedSrcSize(zcs, pledgedSrcSize) , "");
16804     zcs->requestedParams.fParams = fParams;
16805     FORWARD_IF_ERROR( ZSTD_CCtx_refCDict(zcs, cdict) , "");
16806     return 0;
16807 }
16808
16809 /* note : cdict must outlive compression session */
16810 size_t ZSTD_initCStream_usingCDict(ZSTD_CStream* zcs, const ZSTD_CDict* cdict)
16811 {
16812     DEBUGLOG(4, "ZSTD_initCStream_usingCDict");
16813     FORWARD_IF_ERROR( ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only) , "");
16814     FORWARD_IF_ERROR( ZSTD_CCtx_refCDict(zcs, cdict) , "");
16815     return 0;
16816 }
16817
16818
16819 /* ZSTD_initCStream_advanced() :
16820  * pledgedSrcSize must be exact.
16821  * if srcSize is not known at init time, use value ZSTD_CONTENTSIZE_UNKNOWN.
16822  * dict is loaded with default parameters ZSTD_dct_auto and ZSTD_dlm_byCopy. */
16823 size_t ZSTD_initCStream_advanced(ZSTD_CStream* zcs,
16824                                  const void* dict, size_t dictSize,
16825                                  ZSTD_parameters params, unsigned long long pss)
16826 {
16827     /* for compatibility with older programs relying on this behavior.
16828      * Users should now specify ZSTD_CONTENTSIZE_UNKNOWN.
16829      * This line will be removed in the future.
16830      */
16831     U64 const pledgedSrcSize = (pss==0 && params.fParams.contentSizeFlag==0) ? ZSTD_CONTENTSIZE_UNKNOWN : pss;
16832     DEBUGLOG(4, "ZSTD_initCStream_advanced");
16833     FORWARD_IF_ERROR( ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only) , "");
16834     FORWARD_IF_ERROR( ZSTD_CCtx_setPledgedSrcSize(zcs, pledgedSrcSize) , "");
16835     FORWARD_IF_ERROR( ZSTD_checkCParams(params.cParams) , "");
16836     zcs->requestedParams = ZSTD_assignParamsToCCtxParams(&zcs->requestedParams, &params);
16837     FORWARD_IF_ERROR( ZSTD_CCtx_loadDictionary(zcs, dict, dictSize) , "");
16838     return 0;
16839 }
16840
16841 size_t ZSTD_initCStream_usingDict(ZSTD_CStream* zcs, const void* dict, size_t dictSize, int compressionLevel)
16842 {
16843     DEBUGLOG(4, "ZSTD_initCStream_usingDict");
16844     FORWARD_IF_ERROR( ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only) , "");
16845     FORWARD_IF_ERROR( ZSTD_CCtx_setParameter(zcs, ZSTD_c_compressionLevel, compressionLevel) , "");
16846     FORWARD_IF_ERROR( ZSTD_CCtx_loadDictionary(zcs, dict, dictSize) , "");
16847     return 0;
16848 }
16849
16850 size_t ZSTD_initCStream_srcSize(ZSTD_CStream* zcs, int compressionLevel, unsigned long long pss)
16851 {
16852     /* temporary : 0 interpreted as "unknown" during transition period.
16853      * Users willing to specify "unknown" **must** use ZSTD_CONTENTSIZE_UNKNOWN.
16854      * 0 will be interpreted as "empty" in the future.
16855      */
16856     U64 const pledgedSrcSize = (pss==0) ? ZSTD_CONTENTSIZE_UNKNOWN : pss;
16857     DEBUGLOG(4, "ZSTD_initCStream_srcSize");
16858     FORWARD_IF_ERROR( ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only) , "");
16859     FORWARD_IF_ERROR( ZSTD_CCtx_refCDict(zcs, NULL) , "");
16860     FORWARD_IF_ERROR( ZSTD_CCtx_setParameter(zcs, ZSTD_c_compressionLevel, compressionLevel) , "");
16861     FORWARD_IF_ERROR( ZSTD_CCtx_setPledgedSrcSize(zcs, pledgedSrcSize) , "");
16862     return 0;
16863 }
16864
16865 size_t ZSTD_initCStream(ZSTD_CStream* zcs, int compressionLevel)
16866 {
16867     DEBUGLOG(4, "ZSTD_initCStream");
16868     FORWARD_IF_ERROR( ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only) , "");
16869     FORWARD_IF_ERROR( ZSTD_CCtx_refCDict(zcs, NULL) , "");
16870     FORWARD_IF_ERROR( ZSTD_CCtx_setParameter(zcs, ZSTD_c_compressionLevel, compressionLevel) , "");
16871     return 0;
16872 }
16873
16874 /*======   Compression   ======*/
16875
16876 static size_t ZSTD_nextInputSizeHint(const ZSTD_CCtx* cctx)
16877 {
16878     size_t hintInSize = cctx->inBuffTarget - cctx->inBuffPos;
16879     if (hintInSize==0) hintInSize = cctx->blockSize;
16880     return hintInSize;
16881 }
16882
16883 /** ZSTD_compressStream_generic():
16884  *  internal function for all *compressStream*() variants
16885  *  non-static, because can be called from zstdmt_compress.c
16886  * @return : hint size for next input */
16887 static size_t ZSTD_compressStream_generic(ZSTD_CStream* zcs,
16888                                           ZSTD_outBuffer* output,
16889                                           ZSTD_inBuffer* input,
16890                                           ZSTD_EndDirective const flushMode)
16891 {
16892     const char* const istart = (const char*)input->src;
16893     const char* const iend = input->size != 0 ? istart + input->size : istart;
16894     const char* ip = input->pos != 0 ? istart + input->pos : istart;
16895     char* const ostart = (char*)output->dst;
16896     char* const oend = output->size != 0 ? ostart + output->size : ostart;
16897     char* op = output->pos != 0 ? ostart + output->pos : ostart;
16898     U32 someMoreWork = 1;
16899
16900     /* check expectations */
16901     DEBUGLOG(5, "ZSTD_compressStream_generic, flush=%u", (unsigned)flushMode);
16902     assert(zcs->inBuff != NULL);
16903     assert(zcs->inBuffSize > 0);
16904     assert(zcs->outBuff !=  NULL);
16905     assert(zcs->outBuffSize > 0);
16906     assert(output->pos <= output->size);
16907     assert(input->pos <= input->size);
16908
16909     while (someMoreWork) {
16910         switch(zcs->streamStage)
16911         {
16912         case zcss_init:
16913             RETURN_ERROR(init_missing, "call ZSTD_initCStream() first!");
16914
16915         case zcss_load:
16916             if ( (flushMode == ZSTD_e_end)
16917               && ((size_t)(oend-op) >= ZSTD_compressBound(iend-ip))  /* enough dstCapacity */
16918               && (zcs->inBuffPos == 0) ) {
16919                 /* shortcut to compression pass directly into output buffer */
16920                 size_t const cSize = ZSTD_compressEnd(zcs,
16921                                                 op, oend-op, ip, iend-ip);
16922                 DEBUGLOG(4, "ZSTD_compressEnd : cSize=%u", (unsigned)cSize);
16923                 FORWARD_IF_ERROR(cSize, "ZSTD_compressEnd failed");
16924                 ip = iend;
16925                 op += cSize;
16926                 zcs->frameEnded = 1;
16927                 ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only);
16928                 someMoreWork = 0; break;
16929             }
16930             /* complete loading into inBuffer */
16931             {   size_t const toLoad = zcs->inBuffTarget - zcs->inBuffPos;
16932                 size_t const loaded = ZSTD_limitCopy(
16933                                         zcs->inBuff + zcs->inBuffPos, toLoad,
16934                                         ip, iend-ip);
16935                 zcs->inBuffPos += loaded;
16936                 if (loaded != 0)
16937                     ip += loaded;
16938                 if ( (flushMode == ZSTD_e_continue)
16939                   && (zcs->inBuffPos < zcs->inBuffTarget) ) {
16940                     /* not enough input to fill full block : stop here */
16941                     someMoreWork = 0; break;
16942                 }
16943                 if ( (flushMode == ZSTD_e_flush)
16944                   && (zcs->inBuffPos == zcs->inToCompress) ) {
16945                     /* empty */
16946                     someMoreWork = 0; break;
16947                 }
16948             }
16949             /* compress current block (note : this stage cannot be stopped in the middle) */
16950             DEBUGLOG(5, "stream compression stage (flushMode==%u)", flushMode);
16951             {   void* cDst;
16952                 size_t cSize;
16953                 size_t const iSize = zcs->inBuffPos - zcs->inToCompress;
16954                 size_t oSize = oend-op;
16955                 unsigned const lastBlock = (flushMode == ZSTD_e_end) && (ip==iend);
16956                 if (oSize >= ZSTD_compressBound(iSize))
16957                     cDst = op;   /* compress into output buffer, to skip flush stage */
16958                 else
16959                     cDst = zcs->outBuff, oSize = zcs->outBuffSize;
16960                 cSize = lastBlock ?
16961                         ZSTD_compressEnd(zcs, cDst, oSize,
16962                                     zcs->inBuff + zcs->inToCompress, iSize) :
16963                         ZSTD_compressContinue(zcs, cDst, oSize,
16964                                     zcs->inBuff + zcs->inToCompress, iSize);
16965                 FORWARD_IF_ERROR(cSize, "%s", lastBlock ? "ZSTD_compressEnd failed" : "ZSTD_compressContinue failed");
16966                 zcs->frameEnded = lastBlock;
16967                 /* prepare next block */
16968                 zcs->inBuffTarget = zcs->inBuffPos + zcs->blockSize;
16969                 if (zcs->inBuffTarget > zcs->inBuffSize)
16970                     zcs->inBuffPos = 0, zcs->inBuffTarget = zcs->blockSize;
16971                 DEBUGLOG(5, "inBuffTarget:%u / inBuffSize:%u",
16972                          (unsigned)zcs->inBuffTarget, (unsigned)zcs->inBuffSize);
16973                 if (!lastBlock)
16974                     assert(zcs->inBuffTarget <= zcs->inBuffSize);
16975                 zcs->inToCompress = zcs->inBuffPos;
16976                 if (cDst == op) {  /* no need to flush */
16977                     op += cSize;
16978                     if (zcs->frameEnded) {
16979                         DEBUGLOG(5, "Frame completed directly in outBuffer");
16980                         someMoreWork = 0;
16981                         ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only);
16982                     }
16983                     break;
16984                 }
16985                 zcs->outBuffContentSize = cSize;
16986                 zcs->outBuffFlushedSize = 0;
16987                 zcs->streamStage = zcss_flush; /* pass-through to flush stage */
16988             }
16989             /* fall-through */
16990         case zcss_flush:
16991             DEBUGLOG(5, "flush stage");
16992             {   size_t const toFlush = zcs->outBuffContentSize - zcs->outBuffFlushedSize;
16993                 size_t const flushed = ZSTD_limitCopy(op, (size_t)(oend-op),
16994                             zcs->outBuff + zcs->outBuffFlushedSize, toFlush);
16995                 DEBUGLOG(5, "toFlush: %u into %u ==> flushed: %u",
16996                             (unsigned)toFlush, (unsigned)(oend-op), (unsigned)flushed);
16997                 if (flushed)
16998                     op += flushed;
16999                 zcs->outBuffFlushedSize += flushed;
17000                 if (toFlush!=flushed) {
17001                     /* flush not fully completed, presumably because dst is too small */
17002                     assert(op==oend);
17003                     someMoreWork = 0;
17004                     break;
17005                 }
17006                 zcs->outBuffContentSize = zcs->outBuffFlushedSize = 0;
17007                 if (zcs->frameEnded) {
17008                     DEBUGLOG(5, "Frame completed on flush");
17009                     someMoreWork = 0;
17010                     ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only);
17011                     break;
17012                 }
17013                 zcs->streamStage = zcss_load;
17014                 break;
17015             }
17016
17017         default: /* impossible */
17018             assert(0);
17019         }
17020     }
17021
17022     input->pos = ip - istart;
17023     output->pos = op - ostart;
17024     if (zcs->frameEnded) return 0;
17025     return ZSTD_nextInputSizeHint(zcs);
17026 }
17027
17028 static size_t ZSTD_nextInputSizeHint_MTorST(const ZSTD_CCtx* cctx)
17029 {
17030 #ifdef ZSTD_MULTITHREAD
17031     if (cctx->appliedParams.nbWorkers >= 1) {
17032         assert(cctx->mtctx != NULL);
17033         return ZSTDMT_nextInputSizeHint(cctx->mtctx);
17034     }
17035 #endif
17036     return ZSTD_nextInputSizeHint(cctx);
17037
17038 }
17039
17040 size_t ZSTD_compressStream(ZSTD_CStream* zcs, ZSTD_outBuffer* output, ZSTD_inBuffer* input)
17041 {
17042     FORWARD_IF_ERROR( ZSTD_compressStream2(zcs, output, input, ZSTD_e_continue) , "");
17043     return ZSTD_nextInputSizeHint_MTorST(zcs);
17044 }
17045
17046
17047 size_t ZSTD_compressStream2( ZSTD_CCtx* cctx,
17048                              ZSTD_outBuffer* output,
17049                              ZSTD_inBuffer* input,
17050                              ZSTD_EndDirective endOp)
17051 {
17052     DEBUGLOG(5, "ZSTD_compressStream2, endOp=%u ", (unsigned)endOp);
17053     /* check conditions */
17054     RETURN_ERROR_IF(output->pos > output->size, GENERIC, "invalid buffer");
17055     RETURN_ERROR_IF(input->pos  > input->size, GENERIC, "invalid buffer");
17056     assert(cctx!=NULL);
17057
17058     /* transparent initialization stage */
17059     if (cctx->streamStage == zcss_init) {
17060         ZSTD_CCtx_params params = cctx->requestedParams;
17061         ZSTD_prefixDict const prefixDict = cctx->prefixDict;
17062         FORWARD_IF_ERROR( ZSTD_initLocalDict(cctx) , ""); /* Init the local dict if present. */
17063         memset(&cctx->prefixDict, 0, sizeof(cctx->prefixDict));   /* single usage */
17064         assert(prefixDict.dict==NULL || cctx->cdict==NULL);    /* only one can be set */
17065         DEBUGLOG(4, "ZSTD_compressStream2 : transparent init stage");
17066         if (endOp == ZSTD_e_end) cctx->pledgedSrcSizePlusOne = input->size + 1;  /* auto-fix pledgedSrcSize */
17067         params.cParams = ZSTD_getCParamsFromCCtxParams(
17068                 &cctx->requestedParams, cctx->pledgedSrcSizePlusOne-1, 0 /*dictSize*/);
17069
17070
17071 #ifdef ZSTD_MULTITHREAD
17072         if ((cctx->pledgedSrcSizePlusOne-1) <= ZSTDMT_JOBSIZE_MIN) {
17073             params.nbWorkers = 0; /* do not invoke multi-threading when src size is too small */
17074         }
17075         if (params.nbWorkers > 0) {
17076             /* mt context creation */
17077             if (cctx->mtctx == NULL) {
17078                 DEBUGLOG(4, "ZSTD_compressStream2: creating new mtctx for nbWorkers=%u",
17079                             params.nbWorkers);
17080                 cctx->mtctx = ZSTDMT_createCCtx_advanced((U32)params.nbWorkers, cctx->customMem);
17081                 RETURN_ERROR_IF(cctx->mtctx == NULL, memory_allocation, "NULL pointer!");
17082             }
17083             /* mt compression */
17084             DEBUGLOG(4, "call ZSTDMT_initCStream_internal as nbWorkers=%u", params.nbWorkers);
17085             FORWARD_IF_ERROR( ZSTDMT_initCStream_internal(
17086                         cctx->mtctx,
17087                         prefixDict.dict, prefixDict.dictSize, prefixDict.dictContentType,
17088                         cctx->cdict, params, cctx->pledgedSrcSizePlusOne-1) , "");
17089             cctx->streamStage = zcss_load;
17090             cctx->appliedParams.nbWorkers = params.nbWorkers;
17091         } else
17092 #endif
17093         {   FORWARD_IF_ERROR( ZSTD_resetCStream_internal(cctx,
17094                             prefixDict.dict, prefixDict.dictSize, prefixDict.dictContentType,
17095                             cctx->cdict,
17096                             params, cctx->pledgedSrcSizePlusOne-1) , "");
17097             assert(cctx->streamStage == zcss_load);
17098             assert(cctx->appliedParams.nbWorkers == 0);
17099     }   }
17100     /* end of transparent initialization stage */
17101
17102     /* compression stage */
17103 #ifdef ZSTD_MULTITHREAD
17104     if (cctx->appliedParams.nbWorkers > 0) {
17105         int const forceMaxProgress = (endOp == ZSTD_e_flush || endOp == ZSTD_e_end);
17106         size_t flushMin;
17107         assert(forceMaxProgress || endOp == ZSTD_e_continue /* Protection for a new flush type */);
17108         if (cctx->cParamsChanged) {
17109             ZSTDMT_updateCParams_whileCompressing(cctx->mtctx, &cctx->requestedParams);
17110             cctx->cParamsChanged = 0;
17111         }
17112         do {
17113             flushMin = ZSTDMT_compressStream_generic(cctx->mtctx, output, input, endOp);
17114             if ( ZSTD_isError(flushMin)
17115               || (endOp == ZSTD_e_end && flushMin == 0) ) { /* compression completed */
17116                 ZSTD_CCtx_reset(cctx, ZSTD_reset_session_only);
17117             }
17118             FORWARD_IF_ERROR(flushMin, "ZSTDMT_compressStream_generic failed");
17119         } while (forceMaxProgress && flushMin != 0 && output->pos < output->size);
17120         DEBUGLOG(5, "completed ZSTD_compressStream2 delegating to ZSTDMT_compressStream_generic");
17121         /* Either we don't require maximum forward progress, we've finished the
17122          * flush, or we are out of output space.
17123          */
17124         assert(!forceMaxProgress || flushMin == 0 || output->pos == output->size);
17125         return flushMin;
17126     }
17127 #endif
17128     FORWARD_IF_ERROR( ZSTD_compressStream_generic(cctx, output, input, endOp) , "");
17129     DEBUGLOG(5, "completed ZSTD_compressStream2");
17130     return cctx->outBuffContentSize - cctx->outBuffFlushedSize; /* remaining to flush */
17131 }
17132
17133 size_t ZSTD_compressStream2_simpleArgs (
17134                             ZSTD_CCtx* cctx,
17135                             void* dst, size_t dstCapacity, size_t* dstPos,
17136                       const void* src, size_t srcSize, size_t* srcPos,
17137                             ZSTD_EndDirective endOp)
17138 {
17139     ZSTD_outBuffer output = { dst, dstCapacity, *dstPos };
17140     ZSTD_inBuffer  input  = { src, srcSize, *srcPos };
17141     /* ZSTD_compressStream2() will check validity of dstPos and srcPos */
17142     size_t const cErr = ZSTD_compressStream2(cctx, &output, &input, endOp);
17143     *dstPos = output.pos;
17144     *srcPos = input.pos;
17145     return cErr;
17146 }
17147
17148 size_t ZSTD_compress2(ZSTD_CCtx* cctx,
17149                       void* dst, size_t dstCapacity,
17150                       const void* src, size_t srcSize)
17151 {
17152     DEBUGLOG(4, "ZSTD_compress2 (srcSize=%u)", (unsigned)srcSize);
17153     ZSTD_CCtx_reset(cctx, ZSTD_reset_session_only);
17154     {   size_t oPos = 0;
17155         size_t iPos = 0;
17156         size_t const result = ZSTD_compressStream2_simpleArgs(cctx,
17157                                         dst, dstCapacity, &oPos,
17158                                         src, srcSize, &iPos,
17159                                         ZSTD_e_end);
17160         FORWARD_IF_ERROR(result, "ZSTD_compressStream2_simpleArgs failed");
17161         if (result != 0) {  /* compression not completed, due to lack of output space */
17162             assert(oPos == dstCapacity);
17163             RETURN_ERROR(dstSize_tooSmall, "");
17164         }
17165         assert(iPos == srcSize);   /* all input is expected consumed */
17166         return oPos;
17167     }
17168 }
17169
17170 /*======   Finalize   ======*/
17171
17172 /*! ZSTD_flushStream() :
17173  * @return : amount of data remaining to flush */
17174 size_t ZSTD_flushStream(ZSTD_CStream* zcs, ZSTD_outBuffer* output)
17175 {
17176     ZSTD_inBuffer input = { NULL, 0, 0 };
17177     return ZSTD_compressStream2(zcs, output, &input, ZSTD_e_flush);
17178 }
17179
17180
17181 size_t ZSTD_endStream(ZSTD_CStream* zcs, ZSTD_outBuffer* output)
17182 {
17183     ZSTD_inBuffer input = { NULL, 0, 0 };
17184     size_t const remainingToFlush = ZSTD_compressStream2(zcs, output, &input, ZSTD_e_end);
17185     FORWARD_IF_ERROR( remainingToFlush , "ZSTD_compressStream2 failed");
17186     if (zcs->appliedParams.nbWorkers > 0) return remainingToFlush;   /* minimal estimation */
17187     /* single thread mode : attempt to calculate remaining to flush more precisely */
17188     {   size_t const lastBlockSize = zcs->frameEnded ? 0 : ZSTD_BLOCKHEADERSIZE;
17189         size_t const checksumSize = (size_t)(zcs->frameEnded ? 0 : zcs->appliedParams.fParams.checksumFlag * 4);
17190         size_t const toFlush = remainingToFlush + lastBlockSize + checksumSize;
17191         DEBUGLOG(4, "ZSTD_endStream : remaining to flush : %u", (unsigned)toFlush);
17192         return toFlush;
17193     }
17194 }
17195
17196
17197 /*-=====  Pre-defined compression levels  =====-*/
17198
17199 #define ZSTD_MAX_CLEVEL     22
17200 int ZSTD_maxCLevel(void) { return ZSTD_MAX_CLEVEL; }
17201 int ZSTD_minCLevel(void) { return (int)-ZSTD_TARGETLENGTH_MAX; }
17202
17203 static const ZSTD_compressionParameters ZSTD_defaultCParameters[4][ZSTD_MAX_CLEVEL+1] = {
17204 {   /* "default" - for any srcSize > 256 KB */
17205     /* W,  C,  H,  S,  L, TL, strat */
17206     { 19, 12, 13,  1,  6,  1, ZSTD_fast    },  /* base for negative levels */
17207     { 19, 13, 14,  1,  7,  0, ZSTD_fast    },  /* level  1 */
17208     { 20, 15, 16,  1,  6,  0, ZSTD_fast    },  /* level  2 */
17209     { 21, 16, 17,  1,  5,  0, ZSTD_dfast   },  /* level  3 */
17210     { 21, 18, 18,  1,  5,  0, ZSTD_dfast   },  /* level  4 */
17211     { 21, 18, 19,  2,  5,  2, ZSTD_greedy  },  /* level  5 */
17212     { 21, 19, 19,  3,  5,  4, ZSTD_greedy  },  /* level  6 */
17213     { 21, 19, 19,  3,  5,  8, ZSTD_lazy    },  /* level  7 */
17214     { 21, 19, 19,  3,  5, 16, ZSTD_lazy2   },  /* level  8 */
17215     { 21, 19, 20,  4,  5, 16, ZSTD_lazy2   },  /* level  9 */
17216     { 22, 20, 21,  4,  5, 16, ZSTD_lazy2   },  /* level 10 */
17217     { 22, 21, 22,  4,  5, 16, ZSTD_lazy2   },  /* level 11 */
17218     { 22, 21, 22,  5,  5, 16, ZSTD_lazy2   },  /* level 12 */
17219     { 22, 21, 22,  5,  5, 32, ZSTD_btlazy2 },  /* level 13 */
17220     { 22, 22, 23,  5,  5, 32, ZSTD_btlazy2 },  /* level 14 */
17221     { 22, 23, 23,  6,  5, 32, ZSTD_btlazy2 },  /* level 15 */
17222     { 22, 22, 22,  5,  5, 48, ZSTD_btopt   },  /* level 16 */
17223     { 23, 23, 22,  5,  4, 64, ZSTD_btopt   },  /* level 17 */
17224     { 23, 23, 22,  6,  3, 64, ZSTD_btultra },  /* level 18 */
17225     { 23, 24, 22,  7,  3,256, ZSTD_btultra2},  /* level 19 */
17226     { 25, 25, 23,  7,  3,256, ZSTD_btultra2},  /* level 20 */
17227     { 26, 26, 24,  7,  3,512, ZSTD_btultra2},  /* level 21 */
17228     { 27, 27, 25,  9,  3,999, ZSTD_btultra2},  /* level 22 */
17229 },
17230 {   /* for srcSize <= 256 KB */
17231     /* W,  C,  H,  S,  L,  T, strat */
17232     { 18, 12, 13,  1,  5,  1, ZSTD_fast    },  /* base for negative levels */
17233     { 18, 13, 14,  1,  6,  0, ZSTD_fast    },  /* level  1 */
17234     { 18, 14, 14,  1,  5,  0, ZSTD_dfast   },  /* level  2 */
17235     { 18, 16, 16,  1,  4,  0, ZSTD_dfast   },  /* level  3 */
17236     { 18, 16, 17,  2,  5,  2, ZSTD_greedy  },  /* level  4.*/
17237     { 18, 18, 18,  3,  5,  2, ZSTD_greedy  },  /* level  5.*/
17238     { 18, 18, 19,  3,  5,  4, ZSTD_lazy    },  /* level  6.*/
17239     { 18, 18, 19,  4,  4,  4, ZSTD_lazy    },  /* level  7 */
17240     { 18, 18, 19,  4,  4,  8, ZSTD_lazy2   },  /* level  8 */
17241     { 18, 18, 19,  5,  4,  8, ZSTD_lazy2   },  /* level  9 */
17242     { 18, 18, 19,  6,  4,  8, ZSTD_lazy2   },  /* level 10 */
17243     { 18, 18, 19,  5,  4, 12, ZSTD_btlazy2 },  /* level 11.*/
17244     { 18, 19, 19,  7,  4, 12, ZSTD_btlazy2 },  /* level 12.*/
17245     { 18, 18, 19,  4,  4, 16, ZSTD_btopt   },  /* level 13 */
17246     { 18, 18, 19,  4,  3, 32, ZSTD_btopt   },  /* level 14.*/
17247     { 18, 18, 19,  6,  3,128, ZSTD_btopt   },  /* level 15.*/
17248     { 18, 19, 19,  6,  3,128, ZSTD_btultra },  /* level 16.*/
17249     { 18, 19, 19,  8,  3,256, ZSTD_btultra },  /* level 17.*/
17250     { 18, 19, 19,  6,  3,128, ZSTD_btultra2},  /* level 18.*/
17251     { 18, 19, 19,  8,  3,256, ZSTD_btultra2},  /* level 19.*/
17252     { 18, 19, 19, 10,  3,512, ZSTD_btultra2},  /* level 20.*/
17253     { 18, 19, 19, 12,  3,512, ZSTD_btultra2},  /* level 21.*/
17254     { 18, 19, 19, 13,  3,999, ZSTD_btultra2},  /* level 22.*/
17255 },
17256 {   /* for srcSize <= 128 KB */
17257     /* W,  C,  H,  S,  L,  T, strat */
17258     { 17, 12, 12,  1,  5,  1, ZSTD_fast    },  /* base for negative levels */
17259     { 17, 12, 13,  1,  6,  0, ZSTD_fast    },  /* level  1 */
17260     { 17, 13, 15,  1,  5,  0, ZSTD_fast    },  /* level  2 */
17261     { 17, 15, 16,  2,  5,  0, ZSTD_dfast   },  /* level  3 */
17262     { 17, 17, 17,  2,  4,  0, ZSTD_dfast   },  /* level  4 */
17263     { 17, 16, 17,  3,  4,  2, ZSTD_greedy  },  /* level  5 */
17264     { 17, 17, 17,  3,  4,  4, ZSTD_lazy    },  /* level  6 */
17265     { 17, 17, 17,  3,  4,  8, ZSTD_lazy2   },  /* level  7 */
17266     { 17, 17, 17,  4,  4,  8, ZSTD_lazy2   },  /* level  8 */
17267     { 17, 17, 17,  5,  4,  8, ZSTD_lazy2   },  /* level  9 */
17268     { 17, 17, 17,  6,  4,  8, ZSTD_lazy2   },  /* level 10 */
17269     { 17, 17, 17,  5,  4,  8, ZSTD_btlazy2 },  /* level 11 */
17270     { 17, 18, 17,  7,  4, 12, ZSTD_btlazy2 },  /* level 12 */
17271     { 17, 18, 17,  3,  4, 12, ZSTD_btopt   },  /* level 13.*/
17272     { 17, 18, 17,  4,  3, 32, ZSTD_btopt   },  /* level 14.*/
17273     { 17, 18, 17,  6,  3,256, ZSTD_btopt   },  /* level 15.*/
17274     { 17, 18, 17,  6,  3,128, ZSTD_btultra },  /* level 16.*/
17275     { 17, 18, 17,  8,  3,256, ZSTD_btultra },  /* level 17.*/
17276     { 17, 18, 17, 10,  3,512, ZSTD_btultra },  /* level 18.*/
17277     { 17, 18, 17,  5,  3,256, ZSTD_btultra2},  /* level 19.*/
17278     { 17, 18, 17,  7,  3,512, ZSTD_btultra2},  /* level 20.*/
17279     { 17, 18, 17,  9,  3,512, ZSTD_btultra2},  /* level 21.*/
17280     { 17, 18, 17, 11,  3,999, ZSTD_btultra2},  /* level 22.*/
17281 },
17282 {   /* for srcSize <= 16 KB */
17283     /* W,  C,  H,  S,  L,  T, strat */
17284     { 14, 12, 13,  1,  5,  1, ZSTD_fast    },  /* base for negative levels */
17285     { 14, 14, 15,  1,  5,  0, ZSTD_fast    },  /* level  1 */
17286     { 14, 14, 15,  1,  4,  0, ZSTD_fast    },  /* level  2 */
17287     { 14, 14, 15,  2,  4,  0, ZSTD_dfast   },  /* level  3 */
17288     { 14, 14, 14,  4,  4,  2, ZSTD_greedy  },  /* level  4 */
17289     { 14, 14, 14,  3,  4,  4, ZSTD_lazy    },  /* level  5.*/
17290     { 14, 14, 14,  4,  4,  8, ZSTD_lazy2   },  /* level  6 */
17291     { 14, 14, 14,  6,  4,  8, ZSTD_lazy2   },  /* level  7 */
17292     { 14, 14, 14,  8,  4,  8, ZSTD_lazy2   },  /* level  8.*/
17293     { 14, 15, 14,  5,  4,  8, ZSTD_btlazy2 },  /* level  9.*/
17294     { 14, 15, 14,  9,  4,  8, ZSTD_btlazy2 },  /* level 10.*/
17295     { 14, 15, 14,  3,  4, 12, ZSTD_btopt   },  /* level 11.*/
17296     { 14, 15, 14,  4,  3, 24, ZSTD_btopt   },  /* level 12.*/
17297     { 14, 15, 14,  5,  3, 32, ZSTD_btultra },  /* level 13.*/
17298     { 14, 15, 15,  6,  3, 64, ZSTD_btultra },  /* level 14.*/
17299     { 14, 15, 15,  7,  3,256, ZSTD_btultra },  /* level 15.*/
17300     { 14, 15, 15,  5,  3, 48, ZSTD_btultra2},  /* level 16.*/
17301     { 14, 15, 15,  6,  3,128, ZSTD_btultra2},  /* level 17.*/
17302     { 14, 15, 15,  7,  3,256, ZSTD_btultra2},  /* level 18.*/
17303     { 14, 15, 15,  8,  3,256, ZSTD_btultra2},  /* level 19.*/
17304     { 14, 15, 15,  8,  3,512, ZSTD_btultra2},  /* level 20.*/
17305     { 14, 15, 15,  9,  3,512, ZSTD_btultra2},  /* level 21.*/
17306     { 14, 15, 15, 10,  3,999, ZSTD_btultra2},  /* level 22.*/
17307 },
17308 };
17309
17310 /*! ZSTD_getCParams_internal() :
17311  * @return ZSTD_compressionParameters structure for a selected compression level, srcSize and dictSize.
17312  *  Note: srcSizeHint 0 means 0, use ZSTD_CONTENTSIZE_UNKNOWN for unknown.
17313  *        Use dictSize == 0 for unknown or unused. */
17314 static ZSTD_compressionParameters ZSTD_getCParams_internal(int compressionLevel, unsigned long long srcSizeHint, size_t dictSize)
17315 {
17316     int const unknown = srcSizeHint == ZSTD_CONTENTSIZE_UNKNOWN;
17317     size_t const addedSize = unknown && dictSize > 0 ? 500 : 0;
17318     U64 const rSize = unknown && dictSize == 0 ? ZSTD_CONTENTSIZE_UNKNOWN : srcSizeHint+dictSize+addedSize;
17319     U32 const tableID = (rSize <= 256 KB) + (rSize <= 128 KB) + (rSize <= 16 KB);
17320     int row = compressionLevel;
17321     DEBUGLOG(5, "ZSTD_getCParams_internal (cLevel=%i)", compressionLevel);
17322     if (compressionLevel == 0) row = ZSTD_CLEVEL_DEFAULT;   /* 0 == default */
17323     if (compressionLevel < 0) row = 0;   /* entry 0 is baseline for fast mode */
17324     if (compressionLevel > ZSTD_MAX_CLEVEL) row = ZSTD_MAX_CLEVEL;
17325     {   ZSTD_compressionParameters cp = ZSTD_defaultCParameters[tableID][row];
17326         if (compressionLevel < 0) cp.targetLength = (unsigned)(-compressionLevel);   /* acceleration factor */
17327         /* refine parameters based on srcSize & dictSize */
17328         return ZSTD_adjustCParams_internal(cp, srcSizeHint, dictSize);
17329     }
17330 }
17331
17332 /*! ZSTD_getCParams() :
17333  * @return ZSTD_compressionParameters structure for a selected compression level, srcSize and dictSize.
17334  *  Size values are optional, provide 0 if not known or unused */
17335 ZSTD_compressionParameters ZSTD_getCParams(int compressionLevel, unsigned long long srcSizeHint, size_t dictSize)
17336 {
17337     if (srcSizeHint == 0) srcSizeHint = ZSTD_CONTENTSIZE_UNKNOWN;
17338     return ZSTD_getCParams_internal(compressionLevel, srcSizeHint, dictSize);
17339 }
17340
17341 /*! ZSTD_getParams() :
17342  *  same idea as ZSTD_getCParams()
17343  * @return a `ZSTD_parameters` structure (instead of `ZSTD_compressionParameters`).
17344  *  Fields of `ZSTD_frameParameters` are set to default values */
17345 static ZSTD_parameters ZSTD_getParams_internal(int compressionLevel, unsigned long long srcSizeHint, size_t dictSize) {
17346     ZSTD_parameters params;
17347     ZSTD_compressionParameters const cParams = ZSTD_getCParams_internal(compressionLevel, srcSizeHint, dictSize);
17348     DEBUGLOG(5, "ZSTD_getParams (cLevel=%i)", compressionLevel);
17349     memset(&params, 0, sizeof(params));
17350     params.cParams = cParams;
17351     params.fParams.contentSizeFlag = 1;
17352     return params;
17353 }
17354
17355 /*! ZSTD_getParams() :
17356  *  same idea as ZSTD_getCParams()
17357  * @return a `ZSTD_parameters` structure (instead of `ZSTD_compressionParameters`).
17358  *  Fields of `ZSTD_frameParameters` are set to default values */
17359 ZSTD_parameters ZSTD_getParams(int compressionLevel, unsigned long long srcSizeHint, size_t dictSize) {
17360     if (srcSizeHint == 0) srcSizeHint = ZSTD_CONTENTSIZE_UNKNOWN;
17361     return ZSTD_getParams_internal(compressionLevel, srcSizeHint, dictSize);
17362 }
17363 /**** ended inlining compress/zstd_compress.c ****/
17364 /**** start inlining compress/zstd_double_fast.c ****/
17365 /*
17366  * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
17367  * All rights reserved.
17368  *
17369  * This source code is licensed under both the BSD-style license (found in the
17370  * LICENSE file in the root directory of this source tree) and the GPLv2 (found
17371  * in the COPYING file in the root directory of this source tree).
17372  * You may select, at your option, one of the above-listed licenses.
17373  */
17374
17375 /**** skipping file: zstd_compress_internal.h ****/
17376 /**** skipping file: zstd_double_fast.h ****/
17377
17378
17379 void ZSTD_fillDoubleHashTable(ZSTD_matchState_t* ms,
17380                               void const* end, ZSTD_dictTableLoadMethod_e dtlm)
17381 {
17382     const ZSTD_compressionParameters* const cParams = &ms->cParams;
17383     U32* const hashLarge = ms->hashTable;
17384     U32  const hBitsL = cParams->hashLog;
17385     U32  const mls = cParams->minMatch;
17386     U32* const hashSmall = ms->chainTable;
17387     U32  const hBitsS = cParams->chainLog;
17388     const BYTE* const base = ms->window.base;
17389     const BYTE* ip = base + ms->nextToUpdate;
17390     const BYTE* const iend = ((const BYTE*)end) - HASH_READ_SIZE;
17391     const U32 fastHashFillStep = 3;
17392
17393     /* Always insert every fastHashFillStep position into the hash tables.
17394      * Insert the other positions into the large hash table if their entry
17395      * is empty.
17396      */
17397     for (; ip + fastHashFillStep - 1 <= iend; ip += fastHashFillStep) {
17398         U32 const current = (U32)(ip - base);
17399         U32 i;
17400         for (i = 0; i < fastHashFillStep; ++i) {
17401             size_t const smHash = ZSTD_hashPtr(ip + i, hBitsS, mls);
17402             size_t const lgHash = ZSTD_hashPtr(ip + i, hBitsL, 8);
17403             if (i == 0)
17404                 hashSmall[smHash] = current + i;
17405             if (i == 0 || hashLarge[lgHash] == 0)
17406                 hashLarge[lgHash] = current + i;
17407             /* Only load extra positions for ZSTD_dtlm_full */
17408             if (dtlm == ZSTD_dtlm_fast)
17409                 break;
17410     }   }
17411 }
17412
17413
17414 FORCE_INLINE_TEMPLATE
17415 size_t ZSTD_compressBlock_doubleFast_generic(
17416         ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
17417         void const* src, size_t srcSize,
17418         U32 const mls /* template */, ZSTD_dictMode_e const dictMode)
17419 {
17420     ZSTD_compressionParameters const* cParams = &ms->cParams;
17421     U32* const hashLong = ms->hashTable;
17422     const U32 hBitsL = cParams->hashLog;
17423     U32* const hashSmall = ms->chainTable;
17424     const U32 hBitsS = cParams->chainLog;
17425     const BYTE* const base = ms->window.base;
17426     const BYTE* const istart = (const BYTE*)src;
17427     const BYTE* ip = istart;
17428     const BYTE* anchor = istart;
17429     const U32 endIndex = (U32)((size_t)(istart - base) + srcSize);
17430     /* presumes that, if there is a dictionary, it must be using Attach mode */
17431     const U32 prefixLowestIndex = ZSTD_getLowestPrefixIndex(ms, endIndex, cParams->windowLog);
17432     const BYTE* const prefixLowest = base + prefixLowestIndex;
17433     const BYTE* const iend = istart + srcSize;
17434     const BYTE* const ilimit = iend - HASH_READ_SIZE;
17435     U32 offset_1=rep[0], offset_2=rep[1];
17436     U32 offsetSaved = 0;
17437
17438     const ZSTD_matchState_t* const dms = ms->dictMatchState;
17439     const ZSTD_compressionParameters* const dictCParams =
17440                                      dictMode == ZSTD_dictMatchState ?
17441                                      &dms->cParams : NULL;
17442     const U32* const dictHashLong  = dictMode == ZSTD_dictMatchState ?
17443                                      dms->hashTable : NULL;
17444     const U32* const dictHashSmall = dictMode == ZSTD_dictMatchState ?
17445                                      dms->chainTable : NULL;
17446     const U32 dictStartIndex       = dictMode == ZSTD_dictMatchState ?
17447                                      dms->window.dictLimit : 0;
17448     const BYTE* const dictBase     = dictMode == ZSTD_dictMatchState ?
17449                                      dms->window.base : NULL;
17450     const BYTE* const dictStart    = dictMode == ZSTD_dictMatchState ?
17451                                      dictBase + dictStartIndex : NULL;
17452     const BYTE* const dictEnd      = dictMode == ZSTD_dictMatchState ?
17453                                      dms->window.nextSrc : NULL;
17454     const U32 dictIndexDelta       = dictMode == ZSTD_dictMatchState ?
17455                                      prefixLowestIndex - (U32)(dictEnd - dictBase) :
17456                                      0;
17457     const U32 dictHBitsL           = dictMode == ZSTD_dictMatchState ?
17458                                      dictCParams->hashLog : hBitsL;
17459     const U32 dictHBitsS           = dictMode == ZSTD_dictMatchState ?
17460                                      dictCParams->chainLog : hBitsS;
17461     const U32 dictAndPrefixLength  = (U32)((ip - prefixLowest) + (dictEnd - dictStart));
17462
17463     DEBUGLOG(5, "ZSTD_compressBlock_doubleFast_generic");
17464
17465     assert(dictMode == ZSTD_noDict || dictMode == ZSTD_dictMatchState);
17466
17467     /* if a dictionary is attached, it must be within window range */
17468     if (dictMode == ZSTD_dictMatchState) {
17469         assert(ms->window.dictLimit + (1U << cParams->windowLog) >= endIndex);
17470     }
17471
17472     /* init */
17473     ip += (dictAndPrefixLength == 0);
17474     if (dictMode == ZSTD_noDict) {
17475         U32 const current = (U32)(ip - base);
17476         U32 const windowLow = ZSTD_getLowestPrefixIndex(ms, current, cParams->windowLog);
17477         U32 const maxRep = current - windowLow;
17478         if (offset_2 > maxRep) offsetSaved = offset_2, offset_2 = 0;
17479         if (offset_1 > maxRep) offsetSaved = offset_1, offset_1 = 0;
17480     }
17481     if (dictMode == ZSTD_dictMatchState) {
17482         /* dictMatchState repCode checks don't currently handle repCode == 0
17483          * disabling. */
17484         assert(offset_1 <= dictAndPrefixLength);
17485         assert(offset_2 <= dictAndPrefixLength);
17486     }
17487
17488     /* Main Search Loop */
17489     while (ip < ilimit) {   /* < instead of <=, because repcode check at (ip+1) */
17490         size_t mLength;
17491         U32 offset;
17492         size_t const h2 = ZSTD_hashPtr(ip, hBitsL, 8);
17493         size_t const h = ZSTD_hashPtr(ip, hBitsS, mls);
17494         size_t const dictHL = ZSTD_hashPtr(ip, dictHBitsL, 8);
17495         size_t const dictHS = ZSTD_hashPtr(ip, dictHBitsS, mls);
17496         U32 const current = (U32)(ip-base);
17497         U32 const matchIndexL = hashLong[h2];
17498         U32 matchIndexS = hashSmall[h];
17499         const BYTE* matchLong = base + matchIndexL;
17500         const BYTE* match = base + matchIndexS;
17501         const U32 repIndex = current + 1 - offset_1;
17502         const BYTE* repMatch = (dictMode == ZSTD_dictMatchState
17503                             && repIndex < prefixLowestIndex) ?
17504                                dictBase + (repIndex - dictIndexDelta) :
17505                                base + repIndex;
17506         hashLong[h2] = hashSmall[h] = current;   /* update hash tables */
17507
17508         /* check dictMatchState repcode */
17509         if (dictMode == ZSTD_dictMatchState
17510             && ((U32)((prefixLowestIndex-1) - repIndex) >= 3 /* intentional underflow */)
17511             && (MEM_read32(repMatch) == MEM_read32(ip+1)) ) {
17512             const BYTE* repMatchEnd = repIndex < prefixLowestIndex ? dictEnd : iend;
17513             mLength = ZSTD_count_2segments(ip+1+4, repMatch+4, iend, repMatchEnd, prefixLowest) + 4;
17514             ip++;
17515             ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, 0, mLength-MINMATCH);
17516             goto _match_stored;
17517         }
17518
17519         /* check noDict repcode */
17520         if ( dictMode == ZSTD_noDict
17521           && ((offset_1 > 0) & (MEM_read32(ip+1-offset_1) == MEM_read32(ip+1)))) {
17522             mLength = ZSTD_count(ip+1+4, ip+1+4-offset_1, iend) + 4;
17523             ip++;
17524             ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, 0, mLength-MINMATCH);
17525             goto _match_stored;
17526         }
17527
17528         if (matchIndexL > prefixLowestIndex) {
17529             /* check prefix long match */
17530             if (MEM_read64(matchLong) == MEM_read64(ip)) {
17531                 mLength = ZSTD_count(ip+8, matchLong+8, iend) + 8;
17532                 offset = (U32)(ip-matchLong);
17533                 while (((ip>anchor) & (matchLong>prefixLowest)) && (ip[-1] == matchLong[-1])) { ip--; matchLong--; mLength++; } /* catch up */
17534                 goto _match_found;
17535             }
17536         } else if (dictMode == ZSTD_dictMatchState) {
17537             /* check dictMatchState long match */
17538             U32 const dictMatchIndexL = dictHashLong[dictHL];
17539             const BYTE* dictMatchL = dictBase + dictMatchIndexL;
17540             assert(dictMatchL < dictEnd);
17541
17542             if (dictMatchL > dictStart && MEM_read64(dictMatchL) == MEM_read64(ip)) {
17543                 mLength = ZSTD_count_2segments(ip+8, dictMatchL+8, iend, dictEnd, prefixLowest) + 8;
17544                 offset = (U32)(current - dictMatchIndexL - dictIndexDelta);
17545                 while (((ip>anchor) & (dictMatchL>dictStart)) && (ip[-1] == dictMatchL[-1])) { ip--; dictMatchL--; mLength++; } /* catch up */
17546                 goto _match_found;
17547         }   }
17548
17549         if (matchIndexS > prefixLowestIndex) {
17550             /* check prefix short match */
17551             if (MEM_read32(match) == MEM_read32(ip)) {
17552                 goto _search_next_long;
17553             }
17554         } else if (dictMode == ZSTD_dictMatchState) {
17555             /* check dictMatchState short match */
17556             U32 const dictMatchIndexS = dictHashSmall[dictHS];
17557             match = dictBase + dictMatchIndexS;
17558             matchIndexS = dictMatchIndexS + dictIndexDelta;
17559
17560             if (match > dictStart && MEM_read32(match) == MEM_read32(ip)) {
17561                 goto _search_next_long;
17562         }   }
17563
17564         ip += ((ip-anchor) >> kSearchStrength) + 1;
17565 #if defined(__aarch64__)
17566         PREFETCH_L1(ip+256);
17567 #endif
17568         continue;
17569
17570 _search_next_long:
17571
17572         {   size_t const hl3 = ZSTD_hashPtr(ip+1, hBitsL, 8);
17573             size_t const dictHLNext = ZSTD_hashPtr(ip+1, dictHBitsL, 8);
17574             U32 const matchIndexL3 = hashLong[hl3];
17575             const BYTE* matchL3 = base + matchIndexL3;
17576             hashLong[hl3] = current + 1;
17577
17578             /* check prefix long +1 match */
17579             if (matchIndexL3 > prefixLowestIndex) {
17580                 if (MEM_read64(matchL3) == MEM_read64(ip+1)) {
17581                     mLength = ZSTD_count(ip+9, matchL3+8, iend) + 8;
17582                     ip++;
17583                     offset = (U32)(ip-matchL3);
17584                     while (((ip>anchor) & (matchL3>prefixLowest)) && (ip[-1] == matchL3[-1])) { ip--; matchL3--; mLength++; } /* catch up */
17585                     goto _match_found;
17586                 }
17587             } else if (dictMode == ZSTD_dictMatchState) {
17588                 /* check dict long +1 match */
17589                 U32 const dictMatchIndexL3 = dictHashLong[dictHLNext];
17590                 const BYTE* dictMatchL3 = dictBase + dictMatchIndexL3;
17591                 assert(dictMatchL3 < dictEnd);
17592                 if (dictMatchL3 > dictStart && MEM_read64(dictMatchL3) == MEM_read64(ip+1)) {
17593                     mLength = ZSTD_count_2segments(ip+1+8, dictMatchL3+8, iend, dictEnd, prefixLowest) + 8;
17594                     ip++;
17595                     offset = (U32)(current + 1 - dictMatchIndexL3 - dictIndexDelta);
17596                     while (((ip>anchor) & (dictMatchL3>dictStart)) && (ip[-1] == dictMatchL3[-1])) { ip--; dictMatchL3--; mLength++; } /* catch up */
17597                     goto _match_found;
17598         }   }   }
17599
17600         /* if no long +1 match, explore the short match we found */
17601         if (dictMode == ZSTD_dictMatchState && matchIndexS < prefixLowestIndex) {
17602             mLength = ZSTD_count_2segments(ip+4, match+4, iend, dictEnd, prefixLowest) + 4;
17603             offset = (U32)(current - matchIndexS);
17604             while (((ip>anchor) & (match>dictStart)) && (ip[-1] == match[-1])) { ip--; match--; mLength++; } /* catch up */
17605         } else {
17606             mLength = ZSTD_count(ip+4, match+4, iend) + 4;
17607             offset = (U32)(ip - match);
17608             while (((ip>anchor) & (match>prefixLowest)) && (ip[-1] == match[-1])) { ip--; match--; mLength++; } /* catch up */
17609         }
17610
17611         /* fall-through */
17612
17613 _match_found:
17614         offset_2 = offset_1;
17615         offset_1 = offset;
17616
17617         ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, offset + ZSTD_REP_MOVE, mLength-MINMATCH);
17618
17619 _match_stored:
17620         /* match found */
17621         ip += mLength;
17622         anchor = ip;
17623
17624         if (ip <= ilimit) {
17625             /* Complementary insertion */
17626             /* done after iLimit test, as candidates could be > iend-8 */
17627             {   U32 const indexToInsert = current+2;
17628                 hashLong[ZSTD_hashPtr(base+indexToInsert, hBitsL, 8)] = indexToInsert;
17629                 hashLong[ZSTD_hashPtr(ip-2, hBitsL, 8)] = (U32)(ip-2-base);
17630                 hashSmall[ZSTD_hashPtr(base+indexToInsert, hBitsS, mls)] = indexToInsert;
17631                 hashSmall[ZSTD_hashPtr(ip-1, hBitsS, mls)] = (U32)(ip-1-base);
17632             }
17633
17634             /* check immediate repcode */
17635             if (dictMode == ZSTD_dictMatchState) {
17636                 while (ip <= ilimit) {
17637                     U32 const current2 = (U32)(ip-base);
17638                     U32 const repIndex2 = current2 - offset_2;
17639                     const BYTE* repMatch2 = dictMode == ZSTD_dictMatchState
17640                         && repIndex2 < prefixLowestIndex ?
17641                             dictBase + repIndex2 - dictIndexDelta :
17642                             base + repIndex2;
17643                     if ( ((U32)((prefixLowestIndex-1) - (U32)repIndex2) >= 3 /* intentional overflow */)
17644                        && (MEM_read32(repMatch2) == MEM_read32(ip)) ) {
17645                         const BYTE* const repEnd2 = repIndex2 < prefixLowestIndex ? dictEnd : iend;
17646                         size_t const repLength2 = ZSTD_count_2segments(ip+4, repMatch2+4, iend, repEnd2, prefixLowest) + 4;
17647                         U32 tmpOffset = offset_2; offset_2 = offset_1; offset_1 = tmpOffset;   /* swap offset_2 <=> offset_1 */
17648                         ZSTD_storeSeq(seqStore, 0, anchor, iend, 0, repLength2-MINMATCH);
17649                         hashSmall[ZSTD_hashPtr(ip, hBitsS, mls)] = current2;
17650                         hashLong[ZSTD_hashPtr(ip, hBitsL, 8)] = current2;
17651                         ip += repLength2;
17652                         anchor = ip;
17653                         continue;
17654                     }
17655                     break;
17656             }   }
17657
17658             if (dictMode == ZSTD_noDict) {
17659                 while ( (ip <= ilimit)
17660                      && ( (offset_2>0)
17661                         & (MEM_read32(ip) == MEM_read32(ip - offset_2)) )) {
17662                     /* store sequence */
17663                     size_t const rLength = ZSTD_count(ip+4, ip+4-offset_2, iend) + 4;
17664                     U32 const tmpOff = offset_2; offset_2 = offset_1; offset_1 = tmpOff;  /* swap offset_2 <=> offset_1 */
17665                     hashSmall[ZSTD_hashPtr(ip, hBitsS, mls)] = (U32)(ip-base);
17666                     hashLong[ZSTD_hashPtr(ip, hBitsL, 8)] = (U32)(ip-base);
17667                     ZSTD_storeSeq(seqStore, 0, anchor, iend, 0, rLength-MINMATCH);
17668                     ip += rLength;
17669                     anchor = ip;
17670                     continue;   /* faster when present ... (?) */
17671         }   }   }
17672     }   /* while (ip < ilimit) */
17673
17674     /* save reps for next block */
17675     rep[0] = offset_1 ? offset_1 : offsetSaved;
17676     rep[1] = offset_2 ? offset_2 : offsetSaved;
17677
17678     /* Return the last literals size */
17679     return (size_t)(iend - anchor);
17680 }
17681
17682
17683 size_t ZSTD_compressBlock_doubleFast(
17684         ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
17685         void const* src, size_t srcSize)
17686 {
17687     const U32 mls = ms->cParams.minMatch;
17688     switch(mls)
17689     {
17690     default: /* includes case 3 */
17691     case 4 :
17692         return ZSTD_compressBlock_doubleFast_generic(ms, seqStore, rep, src, srcSize, 4, ZSTD_noDict);
17693     case 5 :
17694         return ZSTD_compressBlock_doubleFast_generic(ms, seqStore, rep, src, srcSize, 5, ZSTD_noDict);
17695     case 6 :
17696         return ZSTD_compressBlock_doubleFast_generic(ms, seqStore, rep, src, srcSize, 6, ZSTD_noDict);
17697     case 7 :
17698         return ZSTD_compressBlock_doubleFast_generic(ms, seqStore, rep, src, srcSize, 7, ZSTD_noDict);
17699     }
17700 }
17701
17702
17703 size_t ZSTD_compressBlock_doubleFast_dictMatchState(
17704         ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
17705         void const* src, size_t srcSize)
17706 {
17707     const U32 mls = ms->cParams.minMatch;
17708     switch(mls)
17709     {
17710     default: /* includes case 3 */
17711     case 4 :
17712         return ZSTD_compressBlock_doubleFast_generic(ms, seqStore, rep, src, srcSize, 4, ZSTD_dictMatchState);
17713     case 5 :
17714         return ZSTD_compressBlock_doubleFast_generic(ms, seqStore, rep, src, srcSize, 5, ZSTD_dictMatchState);
17715     case 6 :
17716         return ZSTD_compressBlock_doubleFast_generic(ms, seqStore, rep, src, srcSize, 6, ZSTD_dictMatchState);
17717     case 7 :
17718         return ZSTD_compressBlock_doubleFast_generic(ms, seqStore, rep, src, srcSize, 7, ZSTD_dictMatchState);
17719     }
17720 }
17721
17722
17723 static size_t ZSTD_compressBlock_doubleFast_extDict_generic(
17724         ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
17725         void const* src, size_t srcSize,
17726         U32 const mls /* template */)
17727 {
17728     ZSTD_compressionParameters const* cParams = &ms->cParams;
17729     U32* const hashLong = ms->hashTable;
17730     U32  const hBitsL = cParams->hashLog;
17731     U32* const hashSmall = ms->chainTable;
17732     U32  const hBitsS = cParams->chainLog;
17733     const BYTE* const istart = (const BYTE*)src;
17734     const BYTE* ip = istart;
17735     const BYTE* anchor = istart;
17736     const BYTE* const iend = istart + srcSize;
17737     const BYTE* const ilimit = iend - 8;
17738     const BYTE* const base = ms->window.base;
17739     const U32   endIndex = (U32)((size_t)(istart - base) + srcSize);
17740     const U32   lowLimit = ZSTD_getLowestMatchIndex(ms, endIndex, cParams->windowLog);
17741     const U32   dictStartIndex = lowLimit;
17742     const U32   dictLimit = ms->window.dictLimit;
17743     const U32   prefixStartIndex = (dictLimit > lowLimit) ? dictLimit : lowLimit;
17744     const BYTE* const prefixStart = base + prefixStartIndex;
17745     const BYTE* const dictBase = ms->window.dictBase;
17746     const BYTE* const dictStart = dictBase + dictStartIndex;
17747     const BYTE* const dictEnd = dictBase + prefixStartIndex;
17748     U32 offset_1=rep[0], offset_2=rep[1];
17749
17750     DEBUGLOG(5, "ZSTD_compressBlock_doubleFast_extDict_generic (srcSize=%zu)", srcSize);
17751
17752     /* if extDict is invalidated due to maxDistance, switch to "regular" variant */
17753     if (prefixStartIndex == dictStartIndex)
17754         return ZSTD_compressBlock_doubleFast_generic(ms, seqStore, rep, src, srcSize, mls, ZSTD_noDict);
17755
17756     /* Search Loop */
17757     while (ip < ilimit) {  /* < instead of <=, because (ip+1) */
17758         const size_t hSmall = ZSTD_hashPtr(ip, hBitsS, mls);
17759         const U32 matchIndex = hashSmall[hSmall];
17760         const BYTE* const matchBase = matchIndex < prefixStartIndex ? dictBase : base;
17761         const BYTE* match = matchBase + matchIndex;
17762
17763         const size_t hLong = ZSTD_hashPtr(ip, hBitsL, 8);
17764         const U32 matchLongIndex = hashLong[hLong];
17765         const BYTE* const matchLongBase = matchLongIndex < prefixStartIndex ? dictBase : base;
17766         const BYTE* matchLong = matchLongBase + matchLongIndex;
17767
17768         const U32 current = (U32)(ip-base);
17769         const U32 repIndex = current + 1 - offset_1;   /* offset_1 expected <= current +1 */
17770         const BYTE* const repBase = repIndex < prefixStartIndex ? dictBase : base;
17771         const BYTE* const repMatch = repBase + repIndex;
17772         size_t mLength;
17773         hashSmall[hSmall] = hashLong[hLong] = current;   /* update hash table */
17774
17775         if ((((U32)((prefixStartIndex-1) - repIndex) >= 3) /* intentional underflow : ensure repIndex doesn't overlap dict + prefix */
17776             & (repIndex > dictStartIndex))
17777           && (MEM_read32(repMatch) == MEM_read32(ip+1)) ) {
17778             const BYTE* repMatchEnd = repIndex < prefixStartIndex ? dictEnd : iend;
17779             mLength = ZSTD_count_2segments(ip+1+4, repMatch+4, iend, repMatchEnd, prefixStart) + 4;
17780             ip++;
17781             ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, 0, mLength-MINMATCH);
17782         } else {
17783             if ((matchLongIndex > dictStartIndex) && (MEM_read64(matchLong) == MEM_read64(ip))) {
17784                 const BYTE* const matchEnd = matchLongIndex < prefixStartIndex ? dictEnd : iend;
17785                 const BYTE* const lowMatchPtr = matchLongIndex < prefixStartIndex ? dictStart : prefixStart;
17786                 U32 offset;
17787                 mLength = ZSTD_count_2segments(ip+8, matchLong+8, iend, matchEnd, prefixStart) + 8;
17788                 offset = current - matchLongIndex;
17789                 while (((ip>anchor) & (matchLong>lowMatchPtr)) && (ip[-1] == matchLong[-1])) { ip--; matchLong--; mLength++; }   /* catch up */
17790                 offset_2 = offset_1;
17791                 offset_1 = offset;
17792                 ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, offset + ZSTD_REP_MOVE, mLength-MINMATCH);
17793
17794             } else if ((matchIndex > dictStartIndex) && (MEM_read32(match) == MEM_read32(ip))) {
17795                 size_t const h3 = ZSTD_hashPtr(ip+1, hBitsL, 8);
17796                 U32 const matchIndex3 = hashLong[h3];
17797                 const BYTE* const match3Base = matchIndex3 < prefixStartIndex ? dictBase : base;
17798                 const BYTE* match3 = match3Base + matchIndex3;
17799                 U32 offset;
17800                 hashLong[h3] = current + 1;
17801                 if ( (matchIndex3 > dictStartIndex) && (MEM_read64(match3) == MEM_read64(ip+1)) ) {
17802                     const BYTE* const matchEnd = matchIndex3 < prefixStartIndex ? dictEnd : iend;
17803                     const BYTE* const lowMatchPtr = matchIndex3 < prefixStartIndex ? dictStart : prefixStart;
17804                     mLength = ZSTD_count_2segments(ip+9, match3+8, iend, matchEnd, prefixStart) + 8;
17805                     ip++;
17806                     offset = current+1 - matchIndex3;
17807                     while (((ip>anchor) & (match3>lowMatchPtr)) && (ip[-1] == match3[-1])) { ip--; match3--; mLength++; } /* catch up */
17808                 } else {
17809                     const BYTE* const matchEnd = matchIndex < prefixStartIndex ? dictEnd : iend;
17810                     const BYTE* const lowMatchPtr = matchIndex < prefixStartIndex ? dictStart : prefixStart;
17811                     mLength = ZSTD_count_2segments(ip+4, match+4, iend, matchEnd, prefixStart) + 4;
17812                     offset = current - matchIndex;
17813                     while (((ip>anchor) & (match>lowMatchPtr)) && (ip[-1] == match[-1])) { ip--; match--; mLength++; }   /* catch up */
17814                 }
17815                 offset_2 = offset_1;
17816                 offset_1 = offset;
17817                 ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, offset + ZSTD_REP_MOVE, mLength-MINMATCH);
17818
17819             } else {
17820                 ip += ((ip-anchor) >> kSearchStrength) + 1;
17821                 continue;
17822         }   }
17823
17824         /* move to next sequence start */
17825         ip += mLength;
17826         anchor = ip;
17827
17828         if (ip <= ilimit) {
17829             /* Complementary insertion */
17830             /* done after iLimit test, as candidates could be > iend-8 */
17831             {   U32 const indexToInsert = current+2;
17832                 hashLong[ZSTD_hashPtr(base+indexToInsert, hBitsL, 8)] = indexToInsert;
17833                 hashLong[ZSTD_hashPtr(ip-2, hBitsL, 8)] = (U32)(ip-2-base);
17834                 hashSmall[ZSTD_hashPtr(base+indexToInsert, hBitsS, mls)] = indexToInsert;
17835                 hashSmall[ZSTD_hashPtr(ip-1, hBitsS, mls)] = (U32)(ip-1-base);
17836             }
17837
17838             /* check immediate repcode */
17839             while (ip <= ilimit) {
17840                 U32 const current2 = (U32)(ip-base);
17841                 U32 const repIndex2 = current2 - offset_2;
17842                 const BYTE* repMatch2 = repIndex2 < prefixStartIndex ? dictBase + repIndex2 : base + repIndex2;
17843                 if ( (((U32)((prefixStartIndex-1) - repIndex2) >= 3)   /* intentional overflow : ensure repIndex2 doesn't overlap dict + prefix */
17844                     & (repIndex2 > dictStartIndex))
17845                   && (MEM_read32(repMatch2) == MEM_read32(ip)) ) {
17846                     const BYTE* const repEnd2 = repIndex2 < prefixStartIndex ? dictEnd : iend;
17847                     size_t const repLength2 = ZSTD_count_2segments(ip+4, repMatch2+4, iend, repEnd2, prefixStart) + 4;
17848                     U32 const tmpOffset = offset_2; offset_2 = offset_1; offset_1 = tmpOffset;   /* swap offset_2 <=> offset_1 */
17849                     ZSTD_storeSeq(seqStore, 0, anchor, iend, 0, repLength2-MINMATCH);
17850                     hashSmall[ZSTD_hashPtr(ip, hBitsS, mls)] = current2;
17851                     hashLong[ZSTD_hashPtr(ip, hBitsL, 8)] = current2;
17852                     ip += repLength2;
17853                     anchor = ip;
17854                     continue;
17855                 }
17856                 break;
17857     }   }   }
17858
17859     /* save reps for next block */
17860     rep[0] = offset_1;
17861     rep[1] = offset_2;
17862
17863     /* Return the last literals size */
17864     return (size_t)(iend - anchor);
17865 }
17866
17867
17868 size_t ZSTD_compressBlock_doubleFast_extDict(
17869         ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
17870         void const* src, size_t srcSize)
17871 {
17872     U32 const mls = ms->cParams.minMatch;
17873     switch(mls)
17874     {
17875     default: /* includes case 3 */
17876     case 4 :
17877         return ZSTD_compressBlock_doubleFast_extDict_generic(ms, seqStore, rep, src, srcSize, 4);
17878     case 5 :
17879         return ZSTD_compressBlock_doubleFast_extDict_generic(ms, seqStore, rep, src, srcSize, 5);
17880     case 6 :
17881         return ZSTD_compressBlock_doubleFast_extDict_generic(ms, seqStore, rep, src, srcSize, 6);
17882     case 7 :
17883         return ZSTD_compressBlock_doubleFast_extDict_generic(ms, seqStore, rep, src, srcSize, 7);
17884     }
17885 }
17886 /**** ended inlining compress/zstd_double_fast.c ****/
17887 /**** start inlining compress/zstd_fast.c ****/
17888 /*
17889  * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
17890  * All rights reserved.
17891  *
17892  * This source code is licensed under both the BSD-style license (found in the
17893  * LICENSE file in the root directory of this source tree) and the GPLv2 (found
17894  * in the COPYING file in the root directory of this source tree).
17895  * You may select, at your option, one of the above-listed licenses.
17896  */
17897
17898 /**** skipping file: zstd_compress_internal.h ****/
17899 /**** skipping file: zstd_fast.h ****/
17900
17901
17902 void ZSTD_fillHashTable(ZSTD_matchState_t* ms,
17903                         const void* const end,
17904                         ZSTD_dictTableLoadMethod_e dtlm)
17905 {
17906     const ZSTD_compressionParameters* const cParams = &ms->cParams;
17907     U32* const hashTable = ms->hashTable;
17908     U32  const hBits = cParams->hashLog;
17909     U32  const mls = cParams->minMatch;
17910     const BYTE* const base = ms->window.base;
17911     const BYTE* ip = base + ms->nextToUpdate;
17912     const BYTE* const iend = ((const BYTE*)end) - HASH_READ_SIZE;
17913     const U32 fastHashFillStep = 3;
17914
17915     /* Always insert every fastHashFillStep position into the hash table.
17916      * Insert the other positions if their hash entry is empty.
17917      */
17918     for ( ; ip + fastHashFillStep < iend + 2; ip += fastHashFillStep) {
17919         U32 const current = (U32)(ip - base);
17920         size_t const hash0 = ZSTD_hashPtr(ip, hBits, mls);
17921         hashTable[hash0] = current;
17922         if (dtlm == ZSTD_dtlm_fast) continue;
17923         /* Only load extra positions for ZSTD_dtlm_full */
17924         {   U32 p;
17925             for (p = 1; p < fastHashFillStep; ++p) {
17926                 size_t const hash = ZSTD_hashPtr(ip + p, hBits, mls);
17927                 if (hashTable[hash] == 0) {  /* not yet filled */
17928                     hashTable[hash] = current + p;
17929     }   }   }   }
17930 }
17931
17932
17933 FORCE_INLINE_TEMPLATE size_t
17934 ZSTD_compressBlock_fast_generic(
17935         ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
17936         void const* src, size_t srcSize,
17937         U32 const mls)
17938 {
17939     const ZSTD_compressionParameters* const cParams = &ms->cParams;
17940     U32* const hashTable = ms->hashTable;
17941     U32 const hlog = cParams->hashLog;
17942     /* support stepSize of 0 */
17943     size_t const stepSize = cParams->targetLength + !(cParams->targetLength) + 1;
17944     const BYTE* const base = ms->window.base;
17945     const BYTE* const istart = (const BYTE*)src;
17946     /* We check ip0 (ip + 0) and ip1 (ip + 1) each loop */
17947     const BYTE* ip0 = istart;
17948     const BYTE* ip1;
17949     const BYTE* anchor = istart;
17950     const U32   endIndex = (U32)((size_t)(istart - base) + srcSize);
17951     const U32   prefixStartIndex = ZSTD_getLowestPrefixIndex(ms, endIndex, cParams->windowLog);
17952     const BYTE* const prefixStart = base + prefixStartIndex;
17953     const BYTE* const iend = istart + srcSize;
17954     const BYTE* const ilimit = iend - HASH_READ_SIZE;
17955     U32 offset_1=rep[0], offset_2=rep[1];
17956     U32 offsetSaved = 0;
17957
17958     /* init */
17959     DEBUGLOG(5, "ZSTD_compressBlock_fast_generic");
17960     ip0 += (ip0 == prefixStart);
17961     ip1 = ip0 + 1;
17962     {   U32 const current = (U32)(ip0 - base);
17963         U32 const windowLow = ZSTD_getLowestPrefixIndex(ms, current, cParams->windowLog);
17964         U32 const maxRep = current - windowLow;
17965         if (offset_2 > maxRep) offsetSaved = offset_2, offset_2 = 0;
17966         if (offset_1 > maxRep) offsetSaved = offset_1, offset_1 = 0;
17967     }
17968
17969     /* Main Search Loop */
17970 #ifdef __INTEL_COMPILER
17971     /* From intel 'The vector pragma indicates that the loop should be
17972      * vectorized if it is legal to do so'. Can be used together with
17973      * #pragma ivdep (but have opted to exclude that because intel
17974      * warns against using it).*/
17975     #pragma vector always
17976 #endif
17977     while (ip1 < ilimit) {   /* < instead of <=, because check at ip0+2 */
17978         size_t mLength;
17979         BYTE const* ip2 = ip0 + 2;
17980         size_t const h0 = ZSTD_hashPtr(ip0, hlog, mls);
17981         U32 const val0 = MEM_read32(ip0);
17982         size_t const h1 = ZSTD_hashPtr(ip1, hlog, mls);
17983         U32 const val1 = MEM_read32(ip1);
17984         U32 const current0 = (U32)(ip0-base);
17985         U32 const current1 = (U32)(ip1-base);
17986         U32 const matchIndex0 = hashTable[h0];
17987         U32 const matchIndex1 = hashTable[h1];
17988         BYTE const* repMatch = ip2 - offset_1;
17989         const BYTE* match0 = base + matchIndex0;
17990         const BYTE* match1 = base + matchIndex1;
17991         U32 offcode;
17992
17993 #if defined(__aarch64__)
17994         PREFETCH_L1(ip0+256);
17995 #endif
17996
17997         hashTable[h0] = current0;   /* update hash table */
17998         hashTable[h1] = current1;   /* update hash table */
17999
18000         assert(ip0 + 1 == ip1);
18001
18002         if ((offset_1 > 0) & (MEM_read32(repMatch) == MEM_read32(ip2))) {
18003             mLength = (ip2[-1] == repMatch[-1]) ? 1 : 0;
18004             ip0 = ip2 - mLength;
18005             match0 = repMatch - mLength;
18006             mLength += 4;
18007             offcode = 0;
18008             goto _match;
18009         }
18010         if ((matchIndex0 > prefixStartIndex) && MEM_read32(match0) == val0) {
18011             /* found a regular match */
18012             goto _offset;
18013         }
18014         if ((matchIndex1 > prefixStartIndex) && MEM_read32(match1) == val1) {
18015             /* found a regular match after one literal */
18016             ip0 = ip1;
18017             match0 = match1;
18018             goto _offset;
18019         }
18020         {   size_t const step = ((size_t)(ip0-anchor) >> (kSearchStrength - 1)) + stepSize;
18021             assert(step >= 2);
18022             ip0 += step;
18023             ip1 += step;
18024             continue;
18025         }
18026 _offset: /* Requires: ip0, match0 */
18027         /* Compute the offset code */
18028         offset_2 = offset_1;
18029         offset_1 = (U32)(ip0-match0);
18030         offcode = offset_1 + ZSTD_REP_MOVE;
18031         mLength = 4;
18032         /* Count the backwards match length */
18033         while (((ip0>anchor) & (match0>prefixStart))
18034              && (ip0[-1] == match0[-1])) { ip0--; match0--; mLength++; } /* catch up */
18035
18036 _match: /* Requires: ip0, match0, offcode */
18037         /* Count the forward length */
18038         mLength += ZSTD_count(ip0+mLength, match0+mLength, iend);
18039         ZSTD_storeSeq(seqStore, (size_t)(ip0-anchor), anchor, iend, offcode, mLength-MINMATCH);
18040         /* match found */
18041         ip0 += mLength;
18042         anchor = ip0;
18043
18044         if (ip0 <= ilimit) {
18045             /* Fill Table */
18046             assert(base+current0+2 > istart);  /* check base overflow */
18047             hashTable[ZSTD_hashPtr(base+current0+2, hlog, mls)] = current0+2;  /* here because current+2 could be > iend-8 */
18048             hashTable[ZSTD_hashPtr(ip0-2, hlog, mls)] = (U32)(ip0-2-base);
18049
18050             if (offset_2 > 0) { /* offset_2==0 means offset_2 is invalidated */
18051                 while ( (ip0 <= ilimit) && (MEM_read32(ip0) == MEM_read32(ip0 - offset_2)) ) {
18052                     /* store sequence */
18053                     size_t const rLength = ZSTD_count(ip0+4, ip0+4-offset_2, iend) + 4;
18054                     { U32 const tmpOff = offset_2; offset_2 = offset_1; offset_1 = tmpOff; } /* swap offset_2 <=> offset_1 */
18055                     hashTable[ZSTD_hashPtr(ip0, hlog, mls)] = (U32)(ip0-base);
18056                     ip0 += rLength;
18057                     ZSTD_storeSeq(seqStore, 0 /*litLen*/, anchor, iend, 0 /*offCode*/, rLength-MINMATCH);
18058                     anchor = ip0;
18059                     continue;   /* faster when present (confirmed on gcc-8) ... (?) */
18060         }   }   }
18061         ip1 = ip0 + 1;
18062     }
18063
18064     /* save reps for next block */
18065     rep[0] = offset_1 ? offset_1 : offsetSaved;
18066     rep[1] = offset_2 ? offset_2 : offsetSaved;
18067
18068     /* Return the last literals size */
18069     return (size_t)(iend - anchor);
18070 }
18071
18072
18073 size_t ZSTD_compressBlock_fast(
18074         ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
18075         void const* src, size_t srcSize)
18076 {
18077     U32 const mls = ms->cParams.minMatch;
18078     assert(ms->dictMatchState == NULL);
18079     switch(mls)
18080     {
18081     default: /* includes case 3 */
18082     case 4 :
18083         return ZSTD_compressBlock_fast_generic(ms, seqStore, rep, src, srcSize, 4);
18084     case 5 :
18085         return ZSTD_compressBlock_fast_generic(ms, seqStore, rep, src, srcSize, 5);
18086     case 6 :
18087         return ZSTD_compressBlock_fast_generic(ms, seqStore, rep, src, srcSize, 6);
18088     case 7 :
18089         return ZSTD_compressBlock_fast_generic(ms, seqStore, rep, src, srcSize, 7);
18090     }
18091 }
18092
18093 FORCE_INLINE_TEMPLATE
18094 size_t ZSTD_compressBlock_fast_dictMatchState_generic(
18095         ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
18096         void const* src, size_t srcSize, U32 const mls)
18097 {
18098     const ZSTD_compressionParameters* const cParams = &ms->cParams;
18099     U32* const hashTable = ms->hashTable;
18100     U32 const hlog = cParams->hashLog;
18101     /* support stepSize of 0 */
18102     U32 const stepSize = cParams->targetLength + !(cParams->targetLength);
18103     const BYTE* const base = ms->window.base;
18104     const BYTE* const istart = (const BYTE*)src;
18105     const BYTE* ip = istart;
18106     const BYTE* anchor = istart;
18107     const U32   prefixStartIndex = ms->window.dictLimit;
18108     const BYTE* const prefixStart = base + prefixStartIndex;
18109     const BYTE* const iend = istart + srcSize;
18110     const BYTE* const ilimit = iend - HASH_READ_SIZE;
18111     U32 offset_1=rep[0], offset_2=rep[1];
18112     U32 offsetSaved = 0;
18113
18114     const ZSTD_matchState_t* const dms = ms->dictMatchState;
18115     const ZSTD_compressionParameters* const dictCParams = &dms->cParams ;
18116     const U32* const dictHashTable = dms->hashTable;
18117     const U32 dictStartIndex       = dms->window.dictLimit;
18118     const BYTE* const dictBase     = dms->window.base;
18119     const BYTE* const dictStart    = dictBase + dictStartIndex;
18120     const BYTE* const dictEnd      = dms->window.nextSrc;
18121     const U32 dictIndexDelta       = prefixStartIndex - (U32)(dictEnd - dictBase);
18122     const U32 dictAndPrefixLength  = (U32)(ip - prefixStart + dictEnd - dictStart);
18123     const U32 dictHLog             = dictCParams->hashLog;
18124
18125     /* if a dictionary is still attached, it necessarily means that
18126      * it is within window size. So we just check it. */
18127     const U32 maxDistance = 1U << cParams->windowLog;
18128     const U32 endIndex = (U32)((size_t)(ip - base) + srcSize);
18129     assert(endIndex - prefixStartIndex <= maxDistance);
18130     (void)maxDistance; (void)endIndex;   /* these variables are not used when assert() is disabled */
18131
18132     /* ensure there will be no no underflow
18133      * when translating a dict index into a local index */
18134     assert(prefixStartIndex >= (U32)(dictEnd - dictBase));
18135
18136     /* init */
18137     DEBUGLOG(5, "ZSTD_compressBlock_fast_dictMatchState_generic");
18138     ip += (dictAndPrefixLength == 0);
18139     /* dictMatchState repCode checks don't currently handle repCode == 0
18140      * disabling. */
18141     assert(offset_1 <= dictAndPrefixLength);
18142     assert(offset_2 <= dictAndPrefixLength);
18143
18144     /* Main Search Loop */
18145     while (ip < ilimit) {   /* < instead of <=, because repcode check at (ip+1) */
18146         size_t mLength;
18147         size_t const h = ZSTD_hashPtr(ip, hlog, mls);
18148         U32 const current = (U32)(ip-base);
18149         U32 const matchIndex = hashTable[h];
18150         const BYTE* match = base + matchIndex;
18151         const U32 repIndex = current + 1 - offset_1;
18152         const BYTE* repMatch = (repIndex < prefixStartIndex) ?
18153                                dictBase + (repIndex - dictIndexDelta) :
18154                                base + repIndex;
18155         hashTable[h] = current;   /* update hash table */
18156
18157         if ( ((U32)((prefixStartIndex-1) - repIndex) >= 3) /* intentional underflow : ensure repIndex isn't overlapping dict + prefix */
18158           && (MEM_read32(repMatch) == MEM_read32(ip+1)) ) {
18159             const BYTE* const repMatchEnd = repIndex < prefixStartIndex ? dictEnd : iend;
18160             mLength = ZSTD_count_2segments(ip+1+4, repMatch+4, iend, repMatchEnd, prefixStart) + 4;
18161             ip++;
18162             ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, 0, mLength-MINMATCH);
18163         } else if ( (matchIndex <= prefixStartIndex) ) {
18164             size_t const dictHash = ZSTD_hashPtr(ip, dictHLog, mls);
18165             U32 const dictMatchIndex = dictHashTable[dictHash];
18166             const BYTE* dictMatch = dictBase + dictMatchIndex;
18167             if (dictMatchIndex <= dictStartIndex ||
18168                 MEM_read32(dictMatch) != MEM_read32(ip)) {
18169                 assert(stepSize >= 1);
18170                 ip += ((ip-anchor) >> kSearchStrength) + stepSize;
18171                 continue;
18172             } else {
18173                 /* found a dict match */
18174                 U32 const offset = (U32)(current-dictMatchIndex-dictIndexDelta);
18175                 mLength = ZSTD_count_2segments(ip+4, dictMatch+4, iend, dictEnd, prefixStart) + 4;
18176                 while (((ip>anchor) & (dictMatch>dictStart))
18177                      && (ip[-1] == dictMatch[-1])) {
18178                     ip--; dictMatch--; mLength++;
18179                 } /* catch up */
18180                 offset_2 = offset_1;
18181                 offset_1 = offset;
18182                 ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, offset + ZSTD_REP_MOVE, mLength-MINMATCH);
18183             }
18184         } else if (MEM_read32(match) != MEM_read32(ip)) {
18185             /* it's not a match, and we're not going to check the dictionary */
18186             assert(stepSize >= 1);
18187             ip += ((ip-anchor) >> kSearchStrength) + stepSize;
18188             continue;
18189         } else {
18190             /* found a regular match */
18191             U32 const offset = (U32)(ip-match);
18192             mLength = ZSTD_count(ip+4, match+4, iend) + 4;
18193             while (((ip>anchor) & (match>prefixStart))
18194                  && (ip[-1] == match[-1])) { ip--; match--; mLength++; } /* catch up */
18195             offset_2 = offset_1;
18196             offset_1 = offset;
18197             ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, offset + ZSTD_REP_MOVE, mLength-MINMATCH);
18198         }
18199
18200         /* match found */
18201         ip += mLength;
18202         anchor = ip;
18203
18204         if (ip <= ilimit) {
18205             /* Fill Table */
18206             assert(base+current+2 > istart);  /* check base overflow */
18207             hashTable[ZSTD_hashPtr(base+current+2, hlog, mls)] = current+2;  /* here because current+2 could be > iend-8 */
18208             hashTable[ZSTD_hashPtr(ip-2, hlog, mls)] = (U32)(ip-2-base);
18209
18210             /* check immediate repcode */
18211             while (ip <= ilimit) {
18212                 U32 const current2 = (U32)(ip-base);
18213                 U32 const repIndex2 = current2 - offset_2;
18214                 const BYTE* repMatch2 = repIndex2 < prefixStartIndex ?
18215                         dictBase - dictIndexDelta + repIndex2 :
18216                         base + repIndex2;
18217                 if ( ((U32)((prefixStartIndex-1) - (U32)repIndex2) >= 3 /* intentional overflow */)
18218                    && (MEM_read32(repMatch2) == MEM_read32(ip)) ) {
18219                     const BYTE* const repEnd2 = repIndex2 < prefixStartIndex ? dictEnd : iend;
18220                     size_t const repLength2 = ZSTD_count_2segments(ip+4, repMatch2+4, iend, repEnd2, prefixStart) + 4;
18221                     U32 tmpOffset = offset_2; offset_2 = offset_1; offset_1 = tmpOffset;   /* swap offset_2 <=> offset_1 */
18222                     ZSTD_storeSeq(seqStore, 0, anchor, iend, 0, repLength2-MINMATCH);
18223                     hashTable[ZSTD_hashPtr(ip, hlog, mls)] = current2;
18224                     ip += repLength2;
18225                     anchor = ip;
18226                     continue;
18227                 }
18228                 break;
18229             }
18230         }
18231     }
18232
18233     /* save reps for next block */
18234     rep[0] = offset_1 ? offset_1 : offsetSaved;
18235     rep[1] = offset_2 ? offset_2 : offsetSaved;
18236
18237     /* Return the last literals size */
18238     return (size_t)(iend - anchor);
18239 }
18240
18241 size_t ZSTD_compressBlock_fast_dictMatchState(
18242         ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
18243         void const* src, size_t srcSize)
18244 {
18245     U32 const mls = ms->cParams.minMatch;
18246     assert(ms->dictMatchState != NULL);
18247     switch(mls)
18248     {
18249     default: /* includes case 3 */
18250     case 4 :
18251         return ZSTD_compressBlock_fast_dictMatchState_generic(ms, seqStore, rep, src, srcSize, 4);
18252     case 5 :
18253         return ZSTD_compressBlock_fast_dictMatchState_generic(ms, seqStore, rep, src, srcSize, 5);
18254     case 6 :
18255         return ZSTD_compressBlock_fast_dictMatchState_generic(ms, seqStore, rep, src, srcSize, 6);
18256     case 7 :
18257         return ZSTD_compressBlock_fast_dictMatchState_generic(ms, seqStore, rep, src, srcSize, 7);
18258     }
18259 }
18260
18261
18262 static size_t ZSTD_compressBlock_fast_extDict_generic(
18263         ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
18264         void const* src, size_t srcSize, U32 const mls)
18265 {
18266     const ZSTD_compressionParameters* const cParams = &ms->cParams;
18267     U32* const hashTable = ms->hashTable;
18268     U32 const hlog = cParams->hashLog;
18269     /* support stepSize of 0 */
18270     U32 const stepSize = cParams->targetLength + !(cParams->targetLength);
18271     const BYTE* const base = ms->window.base;
18272     const BYTE* const dictBase = ms->window.dictBase;
18273     const BYTE* const istart = (const BYTE*)src;
18274     const BYTE* ip = istart;
18275     const BYTE* anchor = istart;
18276     const U32   endIndex = (U32)((size_t)(istart - base) + srcSize);
18277     const U32   lowLimit = ZSTD_getLowestMatchIndex(ms, endIndex, cParams->windowLog);
18278     const U32   dictStartIndex = lowLimit;
18279     const BYTE* const dictStart = dictBase + dictStartIndex;
18280     const U32   dictLimit = ms->window.dictLimit;
18281     const U32   prefixStartIndex = dictLimit < lowLimit ? lowLimit : dictLimit;
18282     const BYTE* const prefixStart = base + prefixStartIndex;
18283     const BYTE* const dictEnd = dictBase + prefixStartIndex;
18284     const BYTE* const iend = istart + srcSize;
18285     const BYTE* const ilimit = iend - 8;
18286     U32 offset_1=rep[0], offset_2=rep[1];
18287
18288     DEBUGLOG(5, "ZSTD_compressBlock_fast_extDict_generic (offset_1=%u)", offset_1);
18289
18290     /* switch to "regular" variant if extDict is invalidated due to maxDistance */
18291     if (prefixStartIndex == dictStartIndex)
18292         return ZSTD_compressBlock_fast_generic(ms, seqStore, rep, src, srcSize, mls);
18293
18294     /* Search Loop */
18295     while (ip < ilimit) {  /* < instead of <=, because (ip+1) */
18296         const size_t h = ZSTD_hashPtr(ip, hlog, mls);
18297         const U32    matchIndex = hashTable[h];
18298         const BYTE* const matchBase = matchIndex < prefixStartIndex ? dictBase : base;
18299         const BYTE*  match = matchBase + matchIndex;
18300         const U32    current = (U32)(ip-base);
18301         const U32    repIndex = current + 1 - offset_1;
18302         const BYTE* const repBase = repIndex < prefixStartIndex ? dictBase : base;
18303         const BYTE* const repMatch = repBase + repIndex;
18304         hashTable[h] = current;   /* update hash table */
18305         DEBUGLOG(7, "offset_1 = %u , current = %u", offset_1, current);
18306         assert(offset_1 <= current +1);   /* check repIndex */
18307
18308         if ( (((U32)((prefixStartIndex-1) - repIndex) >= 3) /* intentional underflow */ & (repIndex > dictStartIndex))
18309            && (MEM_read32(repMatch) == MEM_read32(ip+1)) ) {
18310             const BYTE* const repMatchEnd = repIndex < prefixStartIndex ? dictEnd : iend;
18311             size_t const rLength = ZSTD_count_2segments(ip+1 +4, repMatch +4, iend, repMatchEnd, prefixStart) + 4;
18312             ip++;
18313             ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, 0, rLength-MINMATCH);
18314             ip += rLength;
18315             anchor = ip;
18316         } else {
18317             if ( (matchIndex < dictStartIndex) ||
18318                  (MEM_read32(match) != MEM_read32(ip)) ) {
18319                 assert(stepSize >= 1);
18320                 ip += ((ip-anchor) >> kSearchStrength) + stepSize;
18321                 continue;
18322             }
18323             {   const BYTE* const matchEnd = matchIndex < prefixStartIndex ? dictEnd : iend;
18324                 const BYTE* const lowMatchPtr = matchIndex < prefixStartIndex ? dictStart : prefixStart;
18325                 U32 const offset = current - matchIndex;
18326                 size_t mLength = ZSTD_count_2segments(ip+4, match+4, iend, matchEnd, prefixStart) + 4;
18327                 while (((ip>anchor) & (match>lowMatchPtr)) && (ip[-1] == match[-1])) { ip--; match--; mLength++; }   /* catch up */
18328                 offset_2 = offset_1; offset_1 = offset;  /* update offset history */
18329                 ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, offset + ZSTD_REP_MOVE, mLength-MINMATCH);
18330                 ip += mLength;
18331                 anchor = ip;
18332         }   }
18333
18334         if (ip <= ilimit) {
18335             /* Fill Table */
18336             hashTable[ZSTD_hashPtr(base+current+2, hlog, mls)] = current+2;
18337             hashTable[ZSTD_hashPtr(ip-2, hlog, mls)] = (U32)(ip-2-base);
18338             /* check immediate repcode */
18339             while (ip <= ilimit) {
18340                 U32 const current2 = (U32)(ip-base);
18341                 U32 const repIndex2 = current2 - offset_2;
18342                 const BYTE* const repMatch2 = repIndex2 < prefixStartIndex ? dictBase + repIndex2 : base + repIndex2;
18343                 if ( (((U32)((prefixStartIndex-1) - repIndex2) >= 3) & (repIndex2 > dictStartIndex))  /* intentional overflow */
18344                    && (MEM_read32(repMatch2) == MEM_read32(ip)) ) {
18345                     const BYTE* const repEnd2 = repIndex2 < prefixStartIndex ? dictEnd : iend;
18346                     size_t const repLength2 = ZSTD_count_2segments(ip+4, repMatch2+4, iend, repEnd2, prefixStart) + 4;
18347                     { U32 const tmpOffset = offset_2; offset_2 = offset_1; offset_1 = tmpOffset; }  /* swap offset_2 <=> offset_1 */
18348                     ZSTD_storeSeq(seqStore, 0 /*litlen*/, anchor, iend, 0 /*offcode*/, repLength2-MINMATCH);
18349                     hashTable[ZSTD_hashPtr(ip, hlog, mls)] = current2;
18350                     ip += repLength2;
18351                     anchor = ip;
18352                     continue;
18353                 }
18354                 break;
18355     }   }   }
18356
18357     /* save reps for next block */
18358     rep[0] = offset_1;
18359     rep[1] = offset_2;
18360
18361     /* Return the last literals size */
18362     return (size_t)(iend - anchor);
18363 }
18364
18365
18366 size_t ZSTD_compressBlock_fast_extDict(
18367         ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
18368         void const* src, size_t srcSize)
18369 {
18370     U32 const mls = ms->cParams.minMatch;
18371     switch(mls)
18372     {
18373     default: /* includes case 3 */
18374     case 4 :
18375         return ZSTD_compressBlock_fast_extDict_generic(ms, seqStore, rep, src, srcSize, 4);
18376     case 5 :
18377         return ZSTD_compressBlock_fast_extDict_generic(ms, seqStore, rep, src, srcSize, 5);
18378     case 6 :
18379         return ZSTD_compressBlock_fast_extDict_generic(ms, seqStore, rep, src, srcSize, 6);
18380     case 7 :
18381         return ZSTD_compressBlock_fast_extDict_generic(ms, seqStore, rep, src, srcSize, 7);
18382     }
18383 }
18384 /**** ended inlining compress/zstd_fast.c ****/
18385 /**** start inlining compress/zstd_lazy.c ****/
18386 /*
18387  * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
18388  * All rights reserved.
18389  *
18390  * This source code is licensed under both the BSD-style license (found in the
18391  * LICENSE file in the root directory of this source tree) and the GPLv2 (found
18392  * in the COPYING file in the root directory of this source tree).
18393  * You may select, at your option, one of the above-listed licenses.
18394  */
18395
18396 /**** skipping file: zstd_compress_internal.h ****/
18397 /**** skipping file: zstd_lazy.h ****/
18398
18399
18400 /*-*************************************
18401 *  Binary Tree search
18402 ***************************************/
18403
18404 static void
18405 ZSTD_updateDUBT(ZSTD_matchState_t* ms,
18406                 const BYTE* ip, const BYTE* iend,
18407                 U32 mls)
18408 {
18409     const ZSTD_compressionParameters* const cParams = &ms->cParams;
18410     U32* const hashTable = ms->hashTable;
18411     U32  const hashLog = cParams->hashLog;
18412
18413     U32* const bt = ms->chainTable;
18414     U32  const btLog  = cParams->chainLog - 1;
18415     U32  const btMask = (1 << btLog) - 1;
18416
18417     const BYTE* const base = ms->window.base;
18418     U32 const target = (U32)(ip - base);
18419     U32 idx = ms->nextToUpdate;
18420
18421     if (idx != target)
18422         DEBUGLOG(7, "ZSTD_updateDUBT, from %u to %u (dictLimit:%u)",
18423                     idx, target, ms->window.dictLimit);
18424     assert(ip + 8 <= iend);   /* condition for ZSTD_hashPtr */
18425     (void)iend;
18426
18427     assert(idx >= ms->window.dictLimit);   /* condition for valid base+idx */
18428     for ( ; idx < target ; idx++) {
18429         size_t const h  = ZSTD_hashPtr(base + idx, hashLog, mls);   /* assumption : ip + 8 <= iend */
18430         U32    const matchIndex = hashTable[h];
18431
18432         U32*   const nextCandidatePtr = bt + 2*(idx&btMask);
18433         U32*   const sortMarkPtr  = nextCandidatePtr + 1;
18434
18435         DEBUGLOG(8, "ZSTD_updateDUBT: insert %u", idx);
18436         hashTable[h] = idx;   /* Update Hash Table */
18437         *nextCandidatePtr = matchIndex;   /* update BT like a chain */
18438         *sortMarkPtr = ZSTD_DUBT_UNSORTED_MARK;
18439     }
18440     ms->nextToUpdate = target;
18441 }
18442
18443
18444 /** ZSTD_insertDUBT1() :
18445  *  sort one already inserted but unsorted position
18446  *  assumption : current >= btlow == (current - btmask)
18447  *  doesn't fail */
18448 static void
18449 ZSTD_insertDUBT1(ZSTD_matchState_t* ms,
18450                  U32 current, const BYTE* inputEnd,
18451                  U32 nbCompares, U32 btLow,
18452                  const ZSTD_dictMode_e dictMode)
18453 {
18454     const ZSTD_compressionParameters* const cParams = &ms->cParams;
18455     U32* const bt = ms->chainTable;
18456     U32  const btLog  = cParams->chainLog - 1;
18457     U32  const btMask = (1 << btLog) - 1;
18458     size_t commonLengthSmaller=0, commonLengthLarger=0;
18459     const BYTE* const base = ms->window.base;
18460     const BYTE* const dictBase = ms->window.dictBase;
18461     const U32 dictLimit = ms->window.dictLimit;
18462     const BYTE* const ip = (current>=dictLimit) ? base + current : dictBase + current;
18463     const BYTE* const iend = (current>=dictLimit) ? inputEnd : dictBase + dictLimit;
18464     const BYTE* const dictEnd = dictBase + dictLimit;
18465     const BYTE* const prefixStart = base + dictLimit;
18466     const BYTE* match;
18467     U32* smallerPtr = bt + 2*(current&btMask);
18468     U32* largerPtr  = smallerPtr + 1;
18469     U32 matchIndex = *smallerPtr;   /* this candidate is unsorted : next sorted candidate is reached through *smallerPtr, while *largerPtr contains previous unsorted candidate (which is already saved and can be overwritten) */
18470     U32 dummy32;   /* to be nullified at the end */
18471     U32 const windowValid = ms->window.lowLimit;
18472     U32 const maxDistance = 1U << cParams->windowLog;
18473     U32 const windowLow = (current - windowValid > maxDistance) ? current - maxDistance : windowValid;
18474
18475
18476     DEBUGLOG(8, "ZSTD_insertDUBT1(%u) (dictLimit=%u, lowLimit=%u)",
18477                 current, dictLimit, windowLow);
18478     assert(current >= btLow);
18479     assert(ip < iend);   /* condition for ZSTD_count */
18480
18481     while (nbCompares-- && (matchIndex > windowLow)) {
18482         U32* const nextPtr = bt + 2*(matchIndex & btMask);
18483         size_t matchLength = MIN(commonLengthSmaller, commonLengthLarger);   /* guaranteed minimum nb of common bytes */
18484         assert(matchIndex < current);
18485         /* note : all candidates are now supposed sorted,
18486          * but it's still possible to have nextPtr[1] == ZSTD_DUBT_UNSORTED_MARK
18487          * when a real index has the same value as ZSTD_DUBT_UNSORTED_MARK */
18488
18489         if ( (dictMode != ZSTD_extDict)
18490           || (matchIndex+matchLength >= dictLimit)  /* both in current segment*/
18491           || (current < dictLimit) /* both in extDict */) {
18492             const BYTE* const mBase = ( (dictMode != ZSTD_extDict)
18493                                      || (matchIndex+matchLength >= dictLimit)) ?
18494                                         base : dictBase;
18495             assert( (matchIndex+matchLength >= dictLimit)   /* might be wrong if extDict is incorrectly set to 0 */
18496                  || (current < dictLimit) );
18497             match = mBase + matchIndex;
18498             matchLength += ZSTD_count(ip+matchLength, match+matchLength, iend);
18499         } else {
18500             match = dictBase + matchIndex;
18501             matchLength += ZSTD_count_2segments(ip+matchLength, match+matchLength, iend, dictEnd, prefixStart);
18502             if (matchIndex+matchLength >= dictLimit)
18503                 match = base + matchIndex;   /* preparation for next read of match[matchLength] */
18504         }
18505
18506         DEBUGLOG(8, "ZSTD_insertDUBT1: comparing %u with %u : found %u common bytes ",
18507                     current, matchIndex, (U32)matchLength);
18508
18509         if (ip+matchLength == iend) {   /* equal : no way to know if inf or sup */
18510             break;   /* drop , to guarantee consistency ; miss a bit of compression, but other solutions can corrupt tree */
18511         }
18512
18513         if (match[matchLength] < ip[matchLength]) {  /* necessarily within buffer */
18514             /* match is smaller than current */
18515             *smallerPtr = matchIndex;             /* update smaller idx */
18516             commonLengthSmaller = matchLength;    /* all smaller will now have at least this guaranteed common length */
18517             if (matchIndex <= btLow) { smallerPtr=&dummy32; break; }   /* beyond tree size, stop searching */
18518             DEBUGLOG(8, "ZSTD_insertDUBT1: %u (>btLow=%u) is smaller : next => %u",
18519                         matchIndex, btLow, nextPtr[1]);
18520             smallerPtr = nextPtr+1;               /* new "candidate" => larger than match, which was smaller than target */
18521             matchIndex = nextPtr[1];              /* new matchIndex, larger than previous and closer to current */
18522         } else {
18523             /* match is larger than current */
18524             *largerPtr = matchIndex;
18525             commonLengthLarger = matchLength;
18526             if (matchIndex <= btLow) { largerPtr=&dummy32; break; }   /* beyond tree size, stop searching */
18527             DEBUGLOG(8, "ZSTD_insertDUBT1: %u (>btLow=%u) is larger => %u",
18528                         matchIndex, btLow, nextPtr[0]);
18529             largerPtr = nextPtr;
18530             matchIndex = nextPtr[0];
18531     }   }
18532
18533     *smallerPtr = *largerPtr = 0;
18534 }
18535
18536
18537 static size_t
18538 ZSTD_DUBT_findBetterDictMatch (
18539         ZSTD_matchState_t* ms,
18540         const BYTE* const ip, const BYTE* const iend,
18541         size_t* offsetPtr,
18542         size_t bestLength,
18543         U32 nbCompares,
18544         U32 const mls,
18545         const ZSTD_dictMode_e dictMode)
18546 {
18547     const ZSTD_matchState_t * const dms = ms->dictMatchState;
18548     const ZSTD_compressionParameters* const dmsCParams = &dms->cParams;
18549     const U32 * const dictHashTable = dms->hashTable;
18550     U32         const hashLog = dmsCParams->hashLog;
18551     size_t      const h  = ZSTD_hashPtr(ip, hashLog, mls);
18552     U32               dictMatchIndex = dictHashTable[h];
18553
18554     const BYTE* const base = ms->window.base;
18555     const BYTE* const prefixStart = base + ms->window.dictLimit;
18556     U32         const current = (U32)(ip-base);
18557     const BYTE* const dictBase = dms->window.base;
18558     const BYTE* const dictEnd = dms->window.nextSrc;
18559     U32         const dictHighLimit = (U32)(dms->window.nextSrc - dms->window.base);
18560     U32         const dictLowLimit = dms->window.lowLimit;
18561     U32         const dictIndexDelta = ms->window.lowLimit - dictHighLimit;
18562
18563     U32*        const dictBt = dms->chainTable;
18564     U32         const btLog  = dmsCParams->chainLog - 1;
18565     U32         const btMask = (1 << btLog) - 1;
18566     U32         const btLow = (btMask >= dictHighLimit - dictLowLimit) ? dictLowLimit : dictHighLimit - btMask;
18567
18568     size_t commonLengthSmaller=0, commonLengthLarger=0;
18569
18570     (void)dictMode;
18571     assert(dictMode == ZSTD_dictMatchState);
18572
18573     while (nbCompares-- && (dictMatchIndex > dictLowLimit)) {
18574         U32* const nextPtr = dictBt + 2*(dictMatchIndex & btMask);
18575         size_t matchLength = MIN(commonLengthSmaller, commonLengthLarger);   /* guaranteed minimum nb of common bytes */
18576         const BYTE* match = dictBase + dictMatchIndex;
18577         matchLength += ZSTD_count_2segments(ip+matchLength, match+matchLength, iend, dictEnd, prefixStart);
18578         if (dictMatchIndex+matchLength >= dictHighLimit)
18579             match = base + dictMatchIndex + dictIndexDelta;   /* to prepare for next usage of match[matchLength] */
18580
18581         if (matchLength > bestLength) {
18582             U32 matchIndex = dictMatchIndex + dictIndexDelta;
18583             if ( (4*(int)(matchLength-bestLength)) > (int)(ZSTD_highbit32(current-matchIndex+1) - ZSTD_highbit32((U32)offsetPtr[0]+1)) ) {
18584                 DEBUGLOG(9, "ZSTD_DUBT_findBetterDictMatch(%u) : found better match length %u -> %u and offsetCode %u -> %u (dictMatchIndex %u, matchIndex %u)",
18585                     current, (U32)bestLength, (U32)matchLength, (U32)*offsetPtr, ZSTD_REP_MOVE + current - matchIndex, dictMatchIndex, matchIndex);
18586                 bestLength = matchLength, *offsetPtr = ZSTD_REP_MOVE + current - matchIndex;
18587             }
18588             if (ip+matchLength == iend) {   /* reached end of input : ip[matchLength] is not valid, no way to know if it's larger or smaller than match */
18589                 break;   /* drop, to guarantee consistency (miss a little bit of compression) */
18590             }
18591         }
18592
18593         if (match[matchLength] < ip[matchLength]) {
18594             if (dictMatchIndex <= btLow) { break; }   /* beyond tree size, stop the search */
18595             commonLengthSmaller = matchLength;    /* all smaller will now have at least this guaranteed common length */
18596             dictMatchIndex = nextPtr[1];              /* new matchIndex larger than previous (closer to current) */
18597         } else {
18598             /* match is larger than current */
18599             if (dictMatchIndex <= btLow) { break; }   /* beyond tree size, stop the search */
18600             commonLengthLarger = matchLength;
18601             dictMatchIndex = nextPtr[0];
18602         }
18603     }
18604
18605     if (bestLength >= MINMATCH) {
18606         U32 const mIndex = current - ((U32)*offsetPtr - ZSTD_REP_MOVE); (void)mIndex;
18607         DEBUGLOG(8, "ZSTD_DUBT_findBetterDictMatch(%u) : found match of length %u and offsetCode %u (pos %u)",
18608                     current, (U32)bestLength, (U32)*offsetPtr, mIndex);
18609     }
18610     return bestLength;
18611
18612 }
18613
18614
18615 static size_t
18616 ZSTD_DUBT_findBestMatch(ZSTD_matchState_t* ms,
18617                         const BYTE* const ip, const BYTE* const iend,
18618                         size_t* offsetPtr,
18619                         U32 const mls,
18620                         const ZSTD_dictMode_e dictMode)
18621 {
18622     const ZSTD_compressionParameters* const cParams = &ms->cParams;
18623     U32*   const hashTable = ms->hashTable;
18624     U32    const hashLog = cParams->hashLog;
18625     size_t const h  = ZSTD_hashPtr(ip, hashLog, mls);
18626     U32          matchIndex  = hashTable[h];
18627
18628     const BYTE* const base = ms->window.base;
18629     U32    const current = (U32)(ip-base);
18630     U32    const windowLow = ZSTD_getLowestMatchIndex(ms, current, cParams->windowLog);
18631
18632     U32*   const bt = ms->chainTable;
18633     U32    const btLog  = cParams->chainLog - 1;
18634     U32    const btMask = (1 << btLog) - 1;
18635     U32    const btLow = (btMask >= current) ? 0 : current - btMask;
18636     U32    const unsortLimit = MAX(btLow, windowLow);
18637
18638     U32*         nextCandidate = bt + 2*(matchIndex&btMask);
18639     U32*         unsortedMark = bt + 2*(matchIndex&btMask) + 1;
18640     U32          nbCompares = 1U << cParams->searchLog;
18641     U32          nbCandidates = nbCompares;
18642     U32          previousCandidate = 0;
18643
18644     DEBUGLOG(7, "ZSTD_DUBT_findBestMatch (%u) ", current);
18645     assert(ip <= iend-8);   /* required for h calculation */
18646
18647     /* reach end of unsorted candidates list */
18648     while ( (matchIndex > unsortLimit)
18649          && (*unsortedMark == ZSTD_DUBT_UNSORTED_MARK)
18650          && (nbCandidates > 1) ) {
18651         DEBUGLOG(8, "ZSTD_DUBT_findBestMatch: candidate %u is unsorted",
18652                     matchIndex);
18653         *unsortedMark = previousCandidate;  /* the unsortedMark becomes a reversed chain, to move up back to original position */
18654         previousCandidate = matchIndex;
18655         matchIndex = *nextCandidate;
18656         nextCandidate = bt + 2*(matchIndex&btMask);
18657         unsortedMark = bt + 2*(matchIndex&btMask) + 1;
18658         nbCandidates --;
18659     }
18660
18661     /* nullify last candidate if it's still unsorted
18662      * simplification, detrimental to compression ratio, beneficial for speed */
18663     if ( (matchIndex > unsortLimit)
18664       && (*unsortedMark==ZSTD_DUBT_UNSORTED_MARK) ) {
18665         DEBUGLOG(7, "ZSTD_DUBT_findBestMatch: nullify last unsorted candidate %u",
18666                     matchIndex);
18667         *nextCandidate = *unsortedMark = 0;
18668     }
18669
18670     /* batch sort stacked candidates */
18671     matchIndex = previousCandidate;
18672     while (matchIndex) {  /* will end on matchIndex == 0 */
18673         U32* const nextCandidateIdxPtr = bt + 2*(matchIndex&btMask) + 1;
18674         U32 const nextCandidateIdx = *nextCandidateIdxPtr;
18675         ZSTD_insertDUBT1(ms, matchIndex, iend,
18676                          nbCandidates, unsortLimit, dictMode);
18677         matchIndex = nextCandidateIdx;
18678         nbCandidates++;
18679     }
18680
18681     /* find longest match */
18682     {   size_t commonLengthSmaller = 0, commonLengthLarger = 0;
18683         const BYTE* const dictBase = ms->window.dictBase;
18684         const U32 dictLimit = ms->window.dictLimit;
18685         const BYTE* const dictEnd = dictBase + dictLimit;
18686         const BYTE* const prefixStart = base + dictLimit;
18687         U32* smallerPtr = bt + 2*(current&btMask);
18688         U32* largerPtr  = bt + 2*(current&btMask) + 1;
18689         U32 matchEndIdx = current + 8 + 1;
18690         U32 dummy32;   /* to be nullified at the end */
18691         size_t bestLength = 0;
18692
18693         matchIndex  = hashTable[h];
18694         hashTable[h] = current;   /* Update Hash Table */
18695
18696         while (nbCompares-- && (matchIndex > windowLow)) {
18697             U32* const nextPtr = bt + 2*(matchIndex & btMask);
18698             size_t matchLength = MIN(commonLengthSmaller, commonLengthLarger);   /* guaranteed minimum nb of common bytes */
18699             const BYTE* match;
18700
18701             if ((dictMode != ZSTD_extDict) || (matchIndex+matchLength >= dictLimit)) {
18702                 match = base + matchIndex;
18703                 matchLength += ZSTD_count(ip+matchLength, match+matchLength, iend);
18704             } else {
18705                 match = dictBase + matchIndex;
18706                 matchLength += ZSTD_count_2segments(ip+matchLength, match+matchLength, iend, dictEnd, prefixStart);
18707                 if (matchIndex+matchLength >= dictLimit)
18708                     match = base + matchIndex;   /* to prepare for next usage of match[matchLength] */
18709             }
18710
18711             if (matchLength > bestLength) {
18712                 if (matchLength > matchEndIdx - matchIndex)
18713                     matchEndIdx = matchIndex + (U32)matchLength;
18714                 if ( (4*(int)(matchLength-bestLength)) > (int)(ZSTD_highbit32(current-matchIndex+1) - ZSTD_highbit32((U32)offsetPtr[0]+1)) )
18715                     bestLength = matchLength, *offsetPtr = ZSTD_REP_MOVE + current - matchIndex;
18716                 if (ip+matchLength == iend) {   /* equal : no way to know if inf or sup */
18717                     if (dictMode == ZSTD_dictMatchState) {
18718                         nbCompares = 0; /* in addition to avoiding checking any
18719                                          * further in this loop, make sure we
18720                                          * skip checking in the dictionary. */
18721                     }
18722                     break;   /* drop, to guarantee consistency (miss a little bit of compression) */
18723                 }
18724             }
18725
18726             if (match[matchLength] < ip[matchLength]) {
18727                 /* match is smaller than current */
18728                 *smallerPtr = matchIndex;             /* update smaller idx */
18729                 commonLengthSmaller = matchLength;    /* all smaller will now have at least this guaranteed common length */
18730                 if (matchIndex <= btLow) { smallerPtr=&dummy32; break; }   /* beyond tree size, stop the search */
18731                 smallerPtr = nextPtr+1;               /* new "smaller" => larger of match */
18732                 matchIndex = nextPtr[1];              /* new matchIndex larger than previous (closer to current) */
18733             } else {
18734                 /* match is larger than current */
18735                 *largerPtr = matchIndex;
18736                 commonLengthLarger = matchLength;
18737                 if (matchIndex <= btLow) { largerPtr=&dummy32; break; }   /* beyond tree size, stop the search */
18738                 largerPtr = nextPtr;
18739                 matchIndex = nextPtr[0];
18740         }   }
18741
18742         *smallerPtr = *largerPtr = 0;
18743
18744         if (dictMode == ZSTD_dictMatchState && nbCompares) {
18745             bestLength = ZSTD_DUBT_findBetterDictMatch(
18746                     ms, ip, iend,
18747                     offsetPtr, bestLength, nbCompares,
18748                     mls, dictMode);
18749         }
18750
18751         assert(matchEndIdx > current+8); /* ensure nextToUpdate is increased */
18752         ms->nextToUpdate = matchEndIdx - 8;   /* skip repetitive patterns */
18753         if (bestLength >= MINMATCH) {
18754             U32 const mIndex = current - ((U32)*offsetPtr - ZSTD_REP_MOVE); (void)mIndex;
18755             DEBUGLOG(8, "ZSTD_DUBT_findBestMatch(%u) : found match of length %u and offsetCode %u (pos %u)",
18756                         current, (U32)bestLength, (U32)*offsetPtr, mIndex);
18757         }
18758         return bestLength;
18759     }
18760 }
18761
18762
18763 /** ZSTD_BtFindBestMatch() : Tree updater, providing best match */
18764 FORCE_INLINE_TEMPLATE size_t
18765 ZSTD_BtFindBestMatch( ZSTD_matchState_t* ms,
18766                 const BYTE* const ip, const BYTE* const iLimit,
18767                       size_t* offsetPtr,
18768                 const U32 mls /* template */,
18769                 const ZSTD_dictMode_e dictMode)
18770 {
18771     DEBUGLOG(7, "ZSTD_BtFindBestMatch");
18772     if (ip < ms->window.base + ms->nextToUpdate) return 0;   /* skipped area */
18773     ZSTD_updateDUBT(ms, ip, iLimit, mls);
18774     return ZSTD_DUBT_findBestMatch(ms, ip, iLimit, offsetPtr, mls, dictMode);
18775 }
18776
18777
18778 static size_t
18779 ZSTD_BtFindBestMatch_selectMLS (  ZSTD_matchState_t* ms,
18780                             const BYTE* ip, const BYTE* const iLimit,
18781                                   size_t* offsetPtr)
18782 {
18783     switch(ms->cParams.minMatch)
18784     {
18785     default : /* includes case 3 */
18786     case 4 : return ZSTD_BtFindBestMatch(ms, ip, iLimit, offsetPtr, 4, ZSTD_noDict);
18787     case 5 : return ZSTD_BtFindBestMatch(ms, ip, iLimit, offsetPtr, 5, ZSTD_noDict);
18788     case 7 :
18789     case 6 : return ZSTD_BtFindBestMatch(ms, ip, iLimit, offsetPtr, 6, ZSTD_noDict);
18790     }
18791 }
18792
18793
18794 static size_t ZSTD_BtFindBestMatch_dictMatchState_selectMLS (
18795                         ZSTD_matchState_t* ms,
18796                         const BYTE* ip, const BYTE* const iLimit,
18797                         size_t* offsetPtr)
18798 {
18799     switch(ms->cParams.minMatch)
18800     {
18801     default : /* includes case 3 */
18802     case 4 : return ZSTD_BtFindBestMatch(ms, ip, iLimit, offsetPtr, 4, ZSTD_dictMatchState);
18803     case 5 : return ZSTD_BtFindBestMatch(ms, ip, iLimit, offsetPtr, 5, ZSTD_dictMatchState);
18804     case 7 :
18805     case 6 : return ZSTD_BtFindBestMatch(ms, ip, iLimit, offsetPtr, 6, ZSTD_dictMatchState);
18806     }
18807 }
18808
18809
18810 static size_t ZSTD_BtFindBestMatch_extDict_selectMLS (
18811                         ZSTD_matchState_t* ms,
18812                         const BYTE* ip, const BYTE* const iLimit,
18813                         size_t* offsetPtr)
18814 {
18815     switch(ms->cParams.minMatch)
18816     {
18817     default : /* includes case 3 */
18818     case 4 : return ZSTD_BtFindBestMatch(ms, ip, iLimit, offsetPtr, 4, ZSTD_extDict);
18819     case 5 : return ZSTD_BtFindBestMatch(ms, ip, iLimit, offsetPtr, 5, ZSTD_extDict);
18820     case 7 :
18821     case 6 : return ZSTD_BtFindBestMatch(ms, ip, iLimit, offsetPtr, 6, ZSTD_extDict);
18822     }
18823 }
18824
18825
18826
18827 /* *********************************
18828 *  Hash Chain
18829 ***********************************/
18830 #define NEXT_IN_CHAIN(d, mask)   chainTable[(d) & (mask)]
18831
18832 /* Update chains up to ip (excluded)
18833    Assumption : always within prefix (i.e. not within extDict) */
18834 static U32 ZSTD_insertAndFindFirstIndex_internal(
18835                         ZSTD_matchState_t* ms,
18836                         const ZSTD_compressionParameters* const cParams,
18837                         const BYTE* ip, U32 const mls)
18838 {
18839     U32* const hashTable  = ms->hashTable;
18840     const U32 hashLog = cParams->hashLog;
18841     U32* const chainTable = ms->chainTable;
18842     const U32 chainMask = (1 << cParams->chainLog) - 1;
18843     const BYTE* const base = ms->window.base;
18844     const U32 target = (U32)(ip - base);
18845     U32 idx = ms->nextToUpdate;
18846
18847     while(idx < target) { /* catch up */
18848         size_t const h = ZSTD_hashPtr(base+idx, hashLog, mls);
18849         NEXT_IN_CHAIN(idx, chainMask) = hashTable[h];
18850         hashTable[h] = idx;
18851         idx++;
18852     }
18853
18854     ms->nextToUpdate = target;
18855     return hashTable[ZSTD_hashPtr(ip, hashLog, mls)];
18856 }
18857
18858 U32 ZSTD_insertAndFindFirstIndex(ZSTD_matchState_t* ms, const BYTE* ip) {
18859     const ZSTD_compressionParameters* const cParams = &ms->cParams;
18860     return ZSTD_insertAndFindFirstIndex_internal(ms, cParams, ip, ms->cParams.minMatch);
18861 }
18862
18863
18864 /* inlining is important to hardwire a hot branch (template emulation) */
18865 FORCE_INLINE_TEMPLATE
18866 size_t ZSTD_HcFindBestMatch_generic (
18867                         ZSTD_matchState_t* ms,
18868                         const BYTE* const ip, const BYTE* const iLimit,
18869                         size_t* offsetPtr,
18870                         const U32 mls, const ZSTD_dictMode_e dictMode)
18871 {
18872     const ZSTD_compressionParameters* const cParams = &ms->cParams;
18873     U32* const chainTable = ms->chainTable;
18874     const U32 chainSize = (1 << cParams->chainLog);
18875     const U32 chainMask = chainSize-1;
18876     const BYTE* const base = ms->window.base;
18877     const BYTE* const dictBase = ms->window.dictBase;
18878     const U32 dictLimit = ms->window.dictLimit;
18879     const BYTE* const prefixStart = base + dictLimit;
18880     const BYTE* const dictEnd = dictBase + dictLimit;
18881     const U32 current = (U32)(ip-base);
18882     const U32 maxDistance = 1U << cParams->windowLog;
18883     const U32 lowestValid = ms->window.lowLimit;
18884     const U32 withinMaxDistance = (current - lowestValid > maxDistance) ? current - maxDistance : lowestValid;
18885     const U32 isDictionary = (ms->loadedDictEnd != 0);
18886     const U32 lowLimit = isDictionary ? lowestValid : withinMaxDistance;
18887     const U32 minChain = current > chainSize ? current - chainSize : 0;
18888     U32 nbAttempts = 1U << cParams->searchLog;
18889     size_t ml=4-1;
18890
18891     /* HC4 match finder */
18892     U32 matchIndex = ZSTD_insertAndFindFirstIndex_internal(ms, cParams, ip, mls);
18893
18894     for ( ; (matchIndex>lowLimit) & (nbAttempts>0) ; nbAttempts--) {
18895         size_t currentMl=0;
18896         if ((dictMode != ZSTD_extDict) || matchIndex >= dictLimit) {
18897             const BYTE* const match = base + matchIndex;
18898             assert(matchIndex >= dictLimit);   /* ensures this is true if dictMode != ZSTD_extDict */
18899             if (match[ml] == ip[ml])   /* potentially better */
18900                 currentMl = ZSTD_count(ip, match, iLimit);
18901         } else {
18902             const BYTE* const match = dictBase + matchIndex;
18903             assert(match+4 <= dictEnd);
18904             if (MEM_read32(match) == MEM_read32(ip))   /* assumption : matchIndex <= dictLimit-4 (by table construction) */
18905                 currentMl = ZSTD_count_2segments(ip+4, match+4, iLimit, dictEnd, prefixStart) + 4;
18906         }
18907
18908         /* save best solution */
18909         if (currentMl > ml) {
18910             ml = currentMl;
18911             *offsetPtr = current - matchIndex + ZSTD_REP_MOVE;
18912             if (ip+currentMl == iLimit) break; /* best possible, avoids read overflow on next attempt */
18913         }
18914
18915         if (matchIndex <= minChain) break;
18916         matchIndex = NEXT_IN_CHAIN(matchIndex, chainMask);
18917     }
18918
18919     if (dictMode == ZSTD_dictMatchState) {
18920         const ZSTD_matchState_t* const dms = ms->dictMatchState;
18921         const U32* const dmsChainTable = dms->chainTable;
18922         const U32 dmsChainSize         = (1 << dms->cParams.chainLog);
18923         const U32 dmsChainMask         = dmsChainSize - 1;
18924         const U32 dmsLowestIndex       = dms->window.dictLimit;
18925         const BYTE* const dmsBase      = dms->window.base;
18926         const BYTE* const dmsEnd       = dms->window.nextSrc;
18927         const U32 dmsSize              = (U32)(dmsEnd - dmsBase);
18928         const U32 dmsIndexDelta        = dictLimit - dmsSize;
18929         const U32 dmsMinChain = dmsSize > dmsChainSize ? dmsSize - dmsChainSize : 0;
18930
18931         matchIndex = dms->hashTable[ZSTD_hashPtr(ip, dms->cParams.hashLog, mls)];
18932
18933         for ( ; (matchIndex>dmsLowestIndex) & (nbAttempts>0) ; nbAttempts--) {
18934             size_t currentMl=0;
18935             const BYTE* const match = dmsBase + matchIndex;
18936             assert(match+4 <= dmsEnd);
18937             if (MEM_read32(match) == MEM_read32(ip))   /* assumption : matchIndex <= dictLimit-4 (by table construction) */
18938                 currentMl = ZSTD_count_2segments(ip+4, match+4, iLimit, dmsEnd, prefixStart) + 4;
18939
18940             /* save best solution */
18941             if (currentMl > ml) {
18942                 ml = currentMl;
18943                 *offsetPtr = current - (matchIndex + dmsIndexDelta) + ZSTD_REP_MOVE;
18944                 if (ip+currentMl == iLimit) break; /* best possible, avoids read overflow on next attempt */
18945             }
18946
18947             if (matchIndex <= dmsMinChain) break;
18948             matchIndex = dmsChainTable[matchIndex & dmsChainMask];
18949         }
18950     }
18951
18952     return ml;
18953 }
18954
18955
18956 FORCE_INLINE_TEMPLATE size_t ZSTD_HcFindBestMatch_selectMLS (
18957                         ZSTD_matchState_t* ms,
18958                         const BYTE* ip, const BYTE* const iLimit,
18959                         size_t* offsetPtr)
18960 {
18961     switch(ms->cParams.minMatch)
18962     {
18963     default : /* includes case 3 */
18964     case 4 : return ZSTD_HcFindBestMatch_generic(ms, ip, iLimit, offsetPtr, 4, ZSTD_noDict);
18965     case 5 : return ZSTD_HcFindBestMatch_generic(ms, ip, iLimit, offsetPtr, 5, ZSTD_noDict);
18966     case 7 :
18967     case 6 : return ZSTD_HcFindBestMatch_generic(ms, ip, iLimit, offsetPtr, 6, ZSTD_noDict);
18968     }
18969 }
18970
18971
18972 static size_t ZSTD_HcFindBestMatch_dictMatchState_selectMLS (
18973                         ZSTD_matchState_t* ms,
18974                         const BYTE* ip, const BYTE* const iLimit,
18975                         size_t* offsetPtr)
18976 {
18977     switch(ms->cParams.minMatch)
18978     {
18979     default : /* includes case 3 */
18980     case 4 : return ZSTD_HcFindBestMatch_generic(ms, ip, iLimit, offsetPtr, 4, ZSTD_dictMatchState);
18981     case 5 : return ZSTD_HcFindBestMatch_generic(ms, ip, iLimit, offsetPtr, 5, ZSTD_dictMatchState);
18982     case 7 :
18983     case 6 : return ZSTD_HcFindBestMatch_generic(ms, ip, iLimit, offsetPtr, 6, ZSTD_dictMatchState);
18984     }
18985 }
18986
18987
18988 FORCE_INLINE_TEMPLATE size_t ZSTD_HcFindBestMatch_extDict_selectMLS (
18989                         ZSTD_matchState_t* ms,
18990                         const BYTE* ip, const BYTE* const iLimit,
18991                         size_t* offsetPtr)
18992 {
18993     switch(ms->cParams.minMatch)
18994     {
18995     default : /* includes case 3 */
18996     case 4 : return ZSTD_HcFindBestMatch_generic(ms, ip, iLimit, offsetPtr, 4, ZSTD_extDict);
18997     case 5 : return ZSTD_HcFindBestMatch_generic(ms, ip, iLimit, offsetPtr, 5, ZSTD_extDict);
18998     case 7 :
18999     case 6 : return ZSTD_HcFindBestMatch_generic(ms, ip, iLimit, offsetPtr, 6, ZSTD_extDict);
19000     }
19001 }
19002
19003
19004 /* *******************************
19005 *  Common parser - lazy strategy
19006 *********************************/
19007 typedef enum { search_hashChain, search_binaryTree } searchMethod_e;
19008
19009 FORCE_INLINE_TEMPLATE size_t
19010 ZSTD_compressBlock_lazy_generic(
19011                         ZSTD_matchState_t* ms, seqStore_t* seqStore,
19012                         U32 rep[ZSTD_REP_NUM],
19013                         const void* src, size_t srcSize,
19014                         const searchMethod_e searchMethod, const U32 depth,
19015                         ZSTD_dictMode_e const dictMode)
19016 {
19017     const BYTE* const istart = (const BYTE*)src;
19018     const BYTE* ip = istart;
19019     const BYTE* anchor = istart;
19020     const BYTE* const iend = istart + srcSize;
19021     const BYTE* const ilimit = iend - 8;
19022     const BYTE* const base = ms->window.base;
19023     const U32 prefixLowestIndex = ms->window.dictLimit;
19024     const BYTE* const prefixLowest = base + prefixLowestIndex;
19025
19026     typedef size_t (*searchMax_f)(
19027                         ZSTD_matchState_t* ms,
19028                         const BYTE* ip, const BYTE* iLimit, size_t* offsetPtr);
19029     searchMax_f const searchMax = dictMode == ZSTD_dictMatchState ?
19030         (searchMethod==search_binaryTree ? ZSTD_BtFindBestMatch_dictMatchState_selectMLS
19031                                          : ZSTD_HcFindBestMatch_dictMatchState_selectMLS) :
19032         (searchMethod==search_binaryTree ? ZSTD_BtFindBestMatch_selectMLS
19033                                          : ZSTD_HcFindBestMatch_selectMLS);
19034     U32 offset_1 = rep[0], offset_2 = rep[1], savedOffset=0;
19035
19036     const ZSTD_matchState_t* const dms = ms->dictMatchState;
19037     const U32 dictLowestIndex      = dictMode == ZSTD_dictMatchState ?
19038                                      dms->window.dictLimit : 0;
19039     const BYTE* const dictBase     = dictMode == ZSTD_dictMatchState ?
19040                                      dms->window.base : NULL;
19041     const BYTE* const dictLowest   = dictMode == ZSTD_dictMatchState ?
19042                                      dictBase + dictLowestIndex : NULL;
19043     const BYTE* const dictEnd      = dictMode == ZSTD_dictMatchState ?
19044                                      dms->window.nextSrc : NULL;
19045     const U32 dictIndexDelta       = dictMode == ZSTD_dictMatchState ?
19046                                      prefixLowestIndex - (U32)(dictEnd - dictBase) :
19047                                      0;
19048     const U32 dictAndPrefixLength = (U32)((ip - prefixLowest) + (dictEnd - dictLowest));
19049
19050     DEBUGLOG(5, "ZSTD_compressBlock_lazy_generic (dictMode=%u)", (U32)dictMode);
19051
19052     /* init */
19053     ip += (dictAndPrefixLength == 0);
19054     if (dictMode == ZSTD_noDict) {
19055         U32 const current = (U32)(ip - base);
19056         U32 const windowLow = ZSTD_getLowestPrefixIndex(ms, current, ms->cParams.windowLog);
19057         U32 const maxRep = current - windowLow;
19058         if (offset_2 > maxRep) savedOffset = offset_2, offset_2 = 0;
19059         if (offset_1 > maxRep) savedOffset = offset_1, offset_1 = 0;
19060     }
19061     if (dictMode == ZSTD_dictMatchState) {
19062         /* dictMatchState repCode checks don't currently handle repCode == 0
19063          * disabling. */
19064         assert(offset_1 <= dictAndPrefixLength);
19065         assert(offset_2 <= dictAndPrefixLength);
19066     }
19067
19068     /* Match Loop */
19069 #if defined(__GNUC__) && defined(__x86_64__)
19070     /* I've measured random a 5% speed loss on levels 5 & 6 (greedy) when the
19071      * code alignment is perturbed. To fix the instability align the loop on 32-bytes.
19072      */
19073     __asm__(".p2align 5");
19074 #endif
19075     while (ip < ilimit) {
19076         size_t matchLength=0;
19077         size_t offset=0;
19078         const BYTE* start=ip+1;
19079
19080         /* check repCode */
19081         if (dictMode == ZSTD_dictMatchState) {
19082             const U32 repIndex = (U32)(ip - base) + 1 - offset_1;
19083             const BYTE* repMatch = (dictMode == ZSTD_dictMatchState
19084                                 && repIndex < prefixLowestIndex) ?
19085                                    dictBase + (repIndex - dictIndexDelta) :
19086                                    base + repIndex;
19087             if (((U32)((prefixLowestIndex-1) - repIndex) >= 3 /* intentional underflow */)
19088                 && (MEM_read32(repMatch) == MEM_read32(ip+1)) ) {
19089                 const BYTE* repMatchEnd = repIndex < prefixLowestIndex ? dictEnd : iend;
19090                 matchLength = ZSTD_count_2segments(ip+1+4, repMatch+4, iend, repMatchEnd, prefixLowest) + 4;
19091                 if (depth==0) goto _storeSequence;
19092             }
19093         }
19094         if ( dictMode == ZSTD_noDict
19095           && ((offset_1 > 0) & (MEM_read32(ip+1-offset_1) == MEM_read32(ip+1)))) {
19096             matchLength = ZSTD_count(ip+1+4, ip+1+4-offset_1, iend) + 4;
19097             if (depth==0) goto _storeSequence;
19098         }
19099
19100         /* first search (depth 0) */
19101         {   size_t offsetFound = 999999999;
19102             size_t const ml2 = searchMax(ms, ip, iend, &offsetFound);
19103             if (ml2 > matchLength)
19104                 matchLength = ml2, start = ip, offset=offsetFound;
19105         }
19106
19107         if (matchLength < 4) {
19108             ip += ((ip-anchor) >> kSearchStrength) + 1;   /* jump faster over incompressible sections */
19109             continue;
19110         }
19111
19112         /* let's try to find a better solution */
19113         if (depth>=1)
19114         while (ip<ilimit) {
19115             ip ++;
19116             if ( (dictMode == ZSTD_noDict)
19117               && (offset) && ((offset_1>0) & (MEM_read32(ip) == MEM_read32(ip - offset_1)))) {
19118                 size_t const mlRep = ZSTD_count(ip+4, ip+4-offset_1, iend) + 4;
19119                 int const gain2 = (int)(mlRep * 3);
19120                 int const gain1 = (int)(matchLength*3 - ZSTD_highbit32((U32)offset+1) + 1);
19121                 if ((mlRep >= 4) && (gain2 > gain1))
19122                     matchLength = mlRep, offset = 0, start = ip;
19123             }
19124             if (dictMode == ZSTD_dictMatchState) {
19125                 const U32 repIndex = (U32)(ip - base) - offset_1;
19126                 const BYTE* repMatch = repIndex < prefixLowestIndex ?
19127                                dictBase + (repIndex - dictIndexDelta) :
19128                                base + repIndex;
19129                 if (((U32)((prefixLowestIndex-1) - repIndex) >= 3 /* intentional underflow */)
19130                     && (MEM_read32(repMatch) == MEM_read32(ip)) ) {
19131                     const BYTE* repMatchEnd = repIndex < prefixLowestIndex ? dictEnd : iend;
19132                     size_t const mlRep = ZSTD_count_2segments(ip+4, repMatch+4, iend, repMatchEnd, prefixLowest) + 4;
19133                     int const gain2 = (int)(mlRep * 3);
19134                     int const gain1 = (int)(matchLength*3 - ZSTD_highbit32((U32)offset+1) + 1);
19135                     if ((mlRep >= 4) && (gain2 > gain1))
19136                         matchLength = mlRep, offset = 0, start = ip;
19137                 }
19138             }
19139             {   size_t offset2=999999999;
19140                 size_t const ml2 = searchMax(ms, ip, iend, &offset2);
19141                 int const gain2 = (int)(ml2*4 - ZSTD_highbit32((U32)offset2+1));   /* raw approx */
19142                 int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)offset+1) + 4);
19143                 if ((ml2 >= 4) && (gain2 > gain1)) {
19144                     matchLength = ml2, offset = offset2, start = ip;
19145                     continue;   /* search a better one */
19146             }   }
19147
19148             /* let's find an even better one */
19149             if ((depth==2) && (ip<ilimit)) {
19150                 ip ++;
19151                 if ( (dictMode == ZSTD_noDict)
19152                   && (offset) && ((offset_1>0) & (MEM_read32(ip) == MEM_read32(ip - offset_1)))) {
19153                     size_t const mlRep = ZSTD_count(ip+4, ip+4-offset_1, iend) + 4;
19154                     int const gain2 = (int)(mlRep * 4);
19155                     int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)offset+1) + 1);
19156                     if ((mlRep >= 4) && (gain2 > gain1))
19157                         matchLength = mlRep, offset = 0, start = ip;
19158                 }
19159                 if (dictMode == ZSTD_dictMatchState) {
19160                     const U32 repIndex = (U32)(ip - base) - offset_1;
19161                     const BYTE* repMatch = repIndex < prefixLowestIndex ?
19162                                    dictBase + (repIndex - dictIndexDelta) :
19163                                    base + repIndex;
19164                     if (((U32)((prefixLowestIndex-1) - repIndex) >= 3 /* intentional underflow */)
19165                         && (MEM_read32(repMatch) == MEM_read32(ip)) ) {
19166                         const BYTE* repMatchEnd = repIndex < prefixLowestIndex ? dictEnd : iend;
19167                         size_t const mlRep = ZSTD_count_2segments(ip+4, repMatch+4, iend, repMatchEnd, prefixLowest) + 4;
19168                         int const gain2 = (int)(mlRep * 4);
19169                         int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)offset+1) + 1);
19170                         if ((mlRep >= 4) && (gain2 > gain1))
19171                             matchLength = mlRep, offset = 0, start = ip;
19172                     }
19173                 }
19174                 {   size_t offset2=999999999;
19175                     size_t const ml2 = searchMax(ms, ip, iend, &offset2);
19176                     int const gain2 = (int)(ml2*4 - ZSTD_highbit32((U32)offset2+1));   /* raw approx */
19177                     int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)offset+1) + 7);
19178                     if ((ml2 >= 4) && (gain2 > gain1)) {
19179                         matchLength = ml2, offset = offset2, start = ip;
19180                         continue;
19181             }   }   }
19182             break;  /* nothing found : store previous solution */
19183         }
19184
19185         /* NOTE:
19186          * start[-offset+ZSTD_REP_MOVE-1] is undefined behavior.
19187          * (-offset+ZSTD_REP_MOVE-1) is unsigned, and is added to start, which
19188          * overflows the pointer, which is undefined behavior.
19189          */
19190         /* catch up */
19191         if (offset) {
19192             if (dictMode == ZSTD_noDict) {
19193                 while ( ((start > anchor) & (start - (offset-ZSTD_REP_MOVE) > prefixLowest))
19194                      && (start[-1] == (start-(offset-ZSTD_REP_MOVE))[-1]) )  /* only search for offset within prefix */
19195                     { start--; matchLength++; }
19196             }
19197             if (dictMode == ZSTD_dictMatchState) {
19198                 U32 const matchIndex = (U32)((start-base) - (offset - ZSTD_REP_MOVE));
19199                 const BYTE* match = (matchIndex < prefixLowestIndex) ? dictBase + matchIndex - dictIndexDelta : base + matchIndex;
19200                 const BYTE* const mStart = (matchIndex < prefixLowestIndex) ? dictLowest : prefixLowest;
19201                 while ((start>anchor) && (match>mStart) && (start[-1] == match[-1])) { start--; match--; matchLength++; }  /* catch up */
19202             }
19203             offset_2 = offset_1; offset_1 = (U32)(offset - ZSTD_REP_MOVE);
19204         }
19205         /* store sequence */
19206 _storeSequence:
19207         {   size_t const litLength = start - anchor;
19208             ZSTD_storeSeq(seqStore, litLength, anchor, iend, (U32)offset, matchLength-MINMATCH);
19209             anchor = ip = start + matchLength;
19210         }
19211
19212         /* check immediate repcode */
19213         if (dictMode == ZSTD_dictMatchState) {
19214             while (ip <= ilimit) {
19215                 U32 const current2 = (U32)(ip-base);
19216                 U32 const repIndex = current2 - offset_2;
19217                 const BYTE* repMatch = dictMode == ZSTD_dictMatchState
19218                     && repIndex < prefixLowestIndex ?
19219                         dictBase - dictIndexDelta + repIndex :
19220                         base + repIndex;
19221                 if ( ((U32)((prefixLowestIndex-1) - (U32)repIndex) >= 3 /* intentional overflow */)
19222                    && (MEM_read32(repMatch) == MEM_read32(ip)) ) {
19223                     const BYTE* const repEnd2 = repIndex < prefixLowestIndex ? dictEnd : iend;
19224                     matchLength = ZSTD_count_2segments(ip+4, repMatch+4, iend, repEnd2, prefixLowest) + 4;
19225                     offset = offset_2; offset_2 = offset_1; offset_1 = (U32)offset;   /* swap offset_2 <=> offset_1 */
19226                     ZSTD_storeSeq(seqStore, 0, anchor, iend, 0, matchLength-MINMATCH);
19227                     ip += matchLength;
19228                     anchor = ip;
19229                     continue;
19230                 }
19231                 break;
19232             }
19233         }
19234
19235         if (dictMode == ZSTD_noDict) {
19236             while ( ((ip <= ilimit) & (offset_2>0))
19237                  && (MEM_read32(ip) == MEM_read32(ip - offset_2)) ) {
19238                 /* store sequence */
19239                 matchLength = ZSTD_count(ip+4, ip+4-offset_2, iend) + 4;
19240                 offset = offset_2; offset_2 = offset_1; offset_1 = (U32)offset; /* swap repcodes */
19241                 ZSTD_storeSeq(seqStore, 0, anchor, iend, 0, matchLength-MINMATCH);
19242                 ip += matchLength;
19243                 anchor = ip;
19244                 continue;   /* faster when present ... (?) */
19245     }   }   }
19246
19247     /* Save reps for next block */
19248     rep[0] = offset_1 ? offset_1 : savedOffset;
19249     rep[1] = offset_2 ? offset_2 : savedOffset;
19250
19251     /* Return the last literals size */
19252     return (size_t)(iend - anchor);
19253 }
19254
19255
19256 size_t ZSTD_compressBlock_btlazy2(
19257         ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
19258         void const* src, size_t srcSize)
19259 {
19260     return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_binaryTree, 2, ZSTD_noDict);
19261 }
19262
19263 size_t ZSTD_compressBlock_lazy2(
19264         ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
19265         void const* src, size_t srcSize)
19266 {
19267     return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 2, ZSTD_noDict);
19268 }
19269
19270 size_t ZSTD_compressBlock_lazy(
19271         ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
19272         void const* src, size_t srcSize)
19273 {
19274     return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 1, ZSTD_noDict);
19275 }
19276
19277 size_t ZSTD_compressBlock_greedy(
19278         ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
19279         void const* src, size_t srcSize)
19280 {
19281     return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 0, ZSTD_noDict);
19282 }
19283
19284 size_t ZSTD_compressBlock_btlazy2_dictMatchState(
19285         ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
19286         void const* src, size_t srcSize)
19287 {
19288     return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_binaryTree, 2, ZSTD_dictMatchState);
19289 }
19290
19291 size_t ZSTD_compressBlock_lazy2_dictMatchState(
19292         ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
19293         void const* src, size_t srcSize)
19294 {
19295     return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 2, ZSTD_dictMatchState);
19296 }
19297
19298 size_t ZSTD_compressBlock_lazy_dictMatchState(
19299         ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
19300         void const* src, size_t srcSize)
19301 {
19302     return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 1, ZSTD_dictMatchState);
19303 }
19304
19305 size_t ZSTD_compressBlock_greedy_dictMatchState(
19306         ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
19307         void const* src, size_t srcSize)
19308 {
19309     return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 0, ZSTD_dictMatchState);
19310 }
19311
19312
19313 FORCE_INLINE_TEMPLATE
19314 size_t ZSTD_compressBlock_lazy_extDict_generic(
19315                         ZSTD_matchState_t* ms, seqStore_t* seqStore,
19316                         U32 rep[ZSTD_REP_NUM],
19317                         const void* src, size_t srcSize,
19318                         const searchMethod_e searchMethod, const U32 depth)
19319 {
19320     const BYTE* const istart = (const BYTE*)src;
19321     const BYTE* ip = istart;
19322     const BYTE* anchor = istart;
19323     const BYTE* const iend = istart + srcSize;
19324     const BYTE* const ilimit = iend - 8;
19325     const BYTE* const base = ms->window.base;
19326     const U32 dictLimit = ms->window.dictLimit;
19327     const BYTE* const prefixStart = base + dictLimit;
19328     const BYTE* const dictBase = ms->window.dictBase;
19329     const BYTE* const dictEnd  = dictBase + dictLimit;
19330     const BYTE* const dictStart  = dictBase + ms->window.lowLimit;
19331     const U32 windowLog = ms->cParams.windowLog;
19332
19333     typedef size_t (*searchMax_f)(
19334                         ZSTD_matchState_t* ms,
19335                         const BYTE* ip, const BYTE* iLimit, size_t* offsetPtr);
19336     searchMax_f searchMax = searchMethod==search_binaryTree ? ZSTD_BtFindBestMatch_extDict_selectMLS : ZSTD_HcFindBestMatch_extDict_selectMLS;
19337
19338     U32 offset_1 = rep[0], offset_2 = rep[1];
19339
19340     DEBUGLOG(5, "ZSTD_compressBlock_lazy_extDict_generic");
19341
19342     /* init */
19343     ip += (ip == prefixStart);
19344
19345     /* Match Loop */
19346 #if defined(__GNUC__) && defined(__x86_64__)
19347     /* I've measured random a 5% speed loss on levels 5 & 6 (greedy) when the
19348      * code alignment is perturbed. To fix the instability align the loop on 32-bytes.
19349      */
19350     __asm__(".p2align 5");
19351 #endif
19352     while (ip < ilimit) {
19353         size_t matchLength=0;
19354         size_t offset=0;
19355         const BYTE* start=ip+1;
19356         U32 current = (U32)(ip-base);
19357
19358         /* check repCode */
19359         {   const U32 windowLow = ZSTD_getLowestMatchIndex(ms, current+1, windowLog);
19360             const U32 repIndex = (U32)(current+1 - offset_1);
19361             const BYTE* const repBase = repIndex < dictLimit ? dictBase : base;
19362             const BYTE* const repMatch = repBase + repIndex;
19363             if (((U32)((dictLimit-1) - repIndex) >= 3) & (repIndex > windowLow))   /* intentional overflow */
19364             if (MEM_read32(ip+1) == MEM_read32(repMatch)) {
19365                 /* repcode detected we should take it */
19366                 const BYTE* const repEnd = repIndex < dictLimit ? dictEnd : iend;
19367                 matchLength = ZSTD_count_2segments(ip+1+4, repMatch+4, iend, repEnd, prefixStart) + 4;
19368                 if (depth==0) goto _storeSequence;
19369         }   }
19370
19371         /* first search (depth 0) */
19372         {   size_t offsetFound = 999999999;
19373             size_t const ml2 = searchMax(ms, ip, iend, &offsetFound);
19374             if (ml2 > matchLength)
19375                 matchLength = ml2, start = ip, offset=offsetFound;
19376         }
19377
19378          if (matchLength < 4) {
19379             ip += ((ip-anchor) >> kSearchStrength) + 1;   /* jump faster over incompressible sections */
19380             continue;
19381         }
19382
19383         /* let's try to find a better solution */
19384         if (depth>=1)
19385         while (ip<ilimit) {
19386             ip ++;
19387             current++;
19388             /* check repCode */
19389             if (offset) {
19390                 const U32 windowLow = ZSTD_getLowestMatchIndex(ms, current, windowLog);
19391                 const U32 repIndex = (U32)(current - offset_1);
19392                 const BYTE* const repBase = repIndex < dictLimit ? dictBase : base;
19393                 const BYTE* const repMatch = repBase + repIndex;
19394                 if (((U32)((dictLimit-1) - repIndex) >= 3) & (repIndex > windowLow))  /* intentional overflow */
19395                 if (MEM_read32(ip) == MEM_read32(repMatch)) {
19396                     /* repcode detected */
19397                     const BYTE* const repEnd = repIndex < dictLimit ? dictEnd : iend;
19398                     size_t const repLength = ZSTD_count_2segments(ip+4, repMatch+4, iend, repEnd, prefixStart) + 4;
19399                     int const gain2 = (int)(repLength * 3);
19400                     int const gain1 = (int)(matchLength*3 - ZSTD_highbit32((U32)offset+1) + 1);
19401                     if ((repLength >= 4) && (gain2 > gain1))
19402                         matchLength = repLength, offset = 0, start = ip;
19403             }   }
19404
19405             /* search match, depth 1 */
19406             {   size_t offset2=999999999;
19407                 size_t const ml2 = searchMax(ms, ip, iend, &offset2);
19408                 int const gain2 = (int)(ml2*4 - ZSTD_highbit32((U32)offset2+1));   /* raw approx */
19409                 int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)offset+1) + 4);
19410                 if ((ml2 >= 4) && (gain2 > gain1)) {
19411                     matchLength = ml2, offset = offset2, start = ip;
19412                     continue;   /* search a better one */
19413             }   }
19414
19415             /* let's find an even better one */
19416             if ((depth==2) && (ip<ilimit)) {
19417                 ip ++;
19418                 current++;
19419                 /* check repCode */
19420                 if (offset) {
19421                     const U32 windowLow = ZSTD_getLowestMatchIndex(ms, current, windowLog);
19422                     const U32 repIndex = (U32)(current - offset_1);
19423                     const BYTE* const repBase = repIndex < dictLimit ? dictBase : base;
19424                     const BYTE* const repMatch = repBase + repIndex;
19425                     if (((U32)((dictLimit-1) - repIndex) >= 3) & (repIndex > windowLow))  /* intentional overflow */
19426                     if (MEM_read32(ip) == MEM_read32(repMatch)) {
19427                         /* repcode detected */
19428                         const BYTE* const repEnd = repIndex < dictLimit ? dictEnd : iend;
19429                         size_t const repLength = ZSTD_count_2segments(ip+4, repMatch+4, iend, repEnd, prefixStart) + 4;
19430                         int const gain2 = (int)(repLength * 4);
19431                         int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)offset+1) + 1);
19432                         if ((repLength >= 4) && (gain2 > gain1))
19433                             matchLength = repLength, offset = 0, start = ip;
19434                 }   }
19435
19436                 /* search match, depth 2 */
19437                 {   size_t offset2=999999999;
19438                     size_t const ml2 = searchMax(ms, ip, iend, &offset2);
19439                     int const gain2 = (int)(ml2*4 - ZSTD_highbit32((U32)offset2+1));   /* raw approx */
19440                     int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)offset+1) + 7);
19441                     if ((ml2 >= 4) && (gain2 > gain1)) {
19442                         matchLength = ml2, offset = offset2, start = ip;
19443                         continue;
19444             }   }   }
19445             break;  /* nothing found : store previous solution */
19446         }
19447
19448         /* catch up */
19449         if (offset) {
19450             U32 const matchIndex = (U32)((start-base) - (offset - ZSTD_REP_MOVE));
19451             const BYTE* match = (matchIndex < dictLimit) ? dictBase + matchIndex : base + matchIndex;
19452             const BYTE* const mStart = (matchIndex < dictLimit) ? dictStart : prefixStart;
19453             while ((start>anchor) && (match>mStart) && (start[-1] == match[-1])) { start--; match--; matchLength++; }  /* catch up */
19454             offset_2 = offset_1; offset_1 = (U32)(offset - ZSTD_REP_MOVE);
19455         }
19456
19457         /* store sequence */
19458 _storeSequence:
19459         {   size_t const litLength = start - anchor;
19460             ZSTD_storeSeq(seqStore, litLength, anchor, iend, (U32)offset, matchLength-MINMATCH);
19461             anchor = ip = start + matchLength;
19462         }
19463
19464         /* check immediate repcode */
19465         while (ip <= ilimit) {
19466             const U32 repCurrent = (U32)(ip-base);
19467             const U32 windowLow = ZSTD_getLowestMatchIndex(ms, repCurrent, windowLog);
19468             const U32 repIndex = repCurrent - offset_2;
19469             const BYTE* const repBase = repIndex < dictLimit ? dictBase : base;
19470             const BYTE* const repMatch = repBase + repIndex;
19471             if (((U32)((dictLimit-1) - repIndex) >= 3) & (repIndex > windowLow))  /* intentional overflow */
19472             if (MEM_read32(ip) == MEM_read32(repMatch)) {
19473                 /* repcode detected we should take it */
19474                 const BYTE* const repEnd = repIndex < dictLimit ? dictEnd : iend;
19475                 matchLength = ZSTD_count_2segments(ip+4, repMatch+4, iend, repEnd, prefixStart) + 4;
19476                 offset = offset_2; offset_2 = offset_1; offset_1 = (U32)offset;   /* swap offset history */
19477                 ZSTD_storeSeq(seqStore, 0, anchor, iend, 0, matchLength-MINMATCH);
19478                 ip += matchLength;
19479                 anchor = ip;
19480                 continue;   /* faster when present ... (?) */
19481             }
19482             break;
19483     }   }
19484
19485     /* Save reps for next block */
19486     rep[0] = offset_1;
19487     rep[1] = offset_2;
19488
19489     /* Return the last literals size */
19490     return (size_t)(iend - anchor);
19491 }
19492
19493
19494 size_t ZSTD_compressBlock_greedy_extDict(
19495         ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
19496         void const* src, size_t srcSize)
19497 {
19498     return ZSTD_compressBlock_lazy_extDict_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 0);
19499 }
19500
19501 size_t ZSTD_compressBlock_lazy_extDict(
19502         ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
19503         void const* src, size_t srcSize)
19504
19505 {
19506     return ZSTD_compressBlock_lazy_extDict_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 1);
19507 }
19508
19509 size_t ZSTD_compressBlock_lazy2_extDict(
19510         ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
19511         void const* src, size_t srcSize)
19512
19513 {
19514     return ZSTD_compressBlock_lazy_extDict_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 2);
19515 }
19516
19517 size_t ZSTD_compressBlock_btlazy2_extDict(
19518         ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
19519         void const* src, size_t srcSize)
19520
19521 {
19522     return ZSTD_compressBlock_lazy_extDict_generic(ms, seqStore, rep, src, srcSize, search_binaryTree, 2);
19523 }
19524 /**** ended inlining compress/zstd_lazy.c ****/
19525 /**** start inlining compress/zstd_ldm.c ****/
19526 /*
19527  * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
19528  * All rights reserved.
19529  *
19530  * This source code is licensed under both the BSD-style license (found in the
19531  * LICENSE file in the root directory of this source tree) and the GPLv2 (found
19532  * in the COPYING file in the root directory of this source tree).
19533  * You may select, at your option, one of the above-listed licenses.
19534  */
19535
19536 /**** skipping file: zstd_ldm.h ****/
19537
19538 /**** skipping file: ../common/debug.h ****/
19539 /**** skipping file: zstd_fast.h ****/
19540 /**** skipping file: zstd_double_fast.h ****/
19541
19542 #define LDM_BUCKET_SIZE_LOG 3
19543 #define LDM_MIN_MATCH_LENGTH 64
19544 #define LDM_HASH_RLOG 7
19545 #define LDM_HASH_CHAR_OFFSET 10
19546
19547 void ZSTD_ldm_adjustParameters(ldmParams_t* params,
19548                                ZSTD_compressionParameters const* cParams)
19549 {
19550     params->windowLog = cParams->windowLog;
19551     ZSTD_STATIC_ASSERT(LDM_BUCKET_SIZE_LOG <= ZSTD_LDM_BUCKETSIZELOG_MAX);
19552     DEBUGLOG(4, "ZSTD_ldm_adjustParameters");
19553     if (!params->bucketSizeLog) params->bucketSizeLog = LDM_BUCKET_SIZE_LOG;
19554     if (!params->minMatchLength) params->minMatchLength = LDM_MIN_MATCH_LENGTH;
19555     if (cParams->strategy >= ZSTD_btopt) {
19556       /* Get out of the way of the optimal parser */
19557       U32 const minMatch = MAX(cParams->targetLength, params->minMatchLength);
19558       assert(minMatch >= ZSTD_LDM_MINMATCH_MIN);
19559       assert(minMatch <= ZSTD_LDM_MINMATCH_MAX);
19560       params->minMatchLength = minMatch;
19561     }
19562     if (params->hashLog == 0) {
19563         params->hashLog = MAX(ZSTD_HASHLOG_MIN, params->windowLog - LDM_HASH_RLOG);
19564         assert(params->hashLog <= ZSTD_HASHLOG_MAX);
19565     }
19566     if (params->hashRateLog == 0) {
19567         params->hashRateLog = params->windowLog < params->hashLog
19568                                    ? 0
19569                                    : params->windowLog - params->hashLog;
19570     }
19571     params->bucketSizeLog = MIN(params->bucketSizeLog, params->hashLog);
19572 }
19573
19574 size_t ZSTD_ldm_getTableSize(ldmParams_t params)
19575 {
19576     size_t const ldmHSize = ((size_t)1) << params.hashLog;
19577     size_t const ldmBucketSizeLog = MIN(params.bucketSizeLog, params.hashLog);
19578     size_t const ldmBucketSize = ((size_t)1) << (params.hashLog - ldmBucketSizeLog);
19579     size_t const totalSize = ZSTD_cwksp_alloc_size(ldmBucketSize)
19580                            + ZSTD_cwksp_alloc_size(ldmHSize * sizeof(ldmEntry_t));
19581     return params.enableLdm ? totalSize : 0;
19582 }
19583
19584 size_t ZSTD_ldm_getMaxNbSeq(ldmParams_t params, size_t maxChunkSize)
19585 {
19586     return params.enableLdm ? (maxChunkSize / params.minMatchLength) : 0;
19587 }
19588
19589 /** ZSTD_ldm_getSmallHash() :
19590  *  numBits should be <= 32
19591  *  If numBits==0, returns 0.
19592  *  @return : the most significant numBits of value. */
19593 static U32 ZSTD_ldm_getSmallHash(U64 value, U32 numBits)
19594 {
19595     assert(numBits <= 32);
19596     return numBits == 0 ? 0 : (U32)(value >> (64 - numBits));
19597 }
19598
19599 /** ZSTD_ldm_getChecksum() :
19600  *  numBitsToDiscard should be <= 32
19601  *  @return : the next most significant 32 bits after numBitsToDiscard */
19602 static U32 ZSTD_ldm_getChecksum(U64 hash, U32 numBitsToDiscard)
19603 {
19604     assert(numBitsToDiscard <= 32);
19605     return (hash >> (64 - 32 - numBitsToDiscard)) & 0xFFFFFFFF;
19606 }
19607
19608 /** ZSTD_ldm_getTag() ;
19609  *  Given the hash, returns the most significant numTagBits bits
19610  *  after (32 + hbits) bits.
19611  *
19612  *  If there are not enough bits remaining, return the last
19613  *  numTagBits bits. */
19614 static U32 ZSTD_ldm_getTag(U64 hash, U32 hbits, U32 numTagBits)
19615 {
19616     assert(numTagBits < 32 && hbits <= 32);
19617     if (32 - hbits < numTagBits) {
19618         return hash & (((U32)1 << numTagBits) - 1);
19619     } else {
19620         return (hash >> (32 - hbits - numTagBits)) & (((U32)1 << numTagBits) - 1);
19621     }
19622 }
19623
19624 /** ZSTD_ldm_getBucket() :
19625  *  Returns a pointer to the start of the bucket associated with hash. */
19626 static ldmEntry_t* ZSTD_ldm_getBucket(
19627         ldmState_t* ldmState, size_t hash, ldmParams_t const ldmParams)
19628 {
19629     return ldmState->hashTable + (hash << ldmParams.bucketSizeLog);
19630 }
19631
19632 /** ZSTD_ldm_insertEntry() :
19633  *  Insert the entry with corresponding hash into the hash table */
19634 static void ZSTD_ldm_insertEntry(ldmState_t* ldmState,
19635                                  size_t const hash, const ldmEntry_t entry,
19636                                  ldmParams_t const ldmParams)
19637 {
19638     BYTE* const bucketOffsets = ldmState->bucketOffsets;
19639     *(ZSTD_ldm_getBucket(ldmState, hash, ldmParams) + bucketOffsets[hash]) = entry;
19640     bucketOffsets[hash]++;
19641     bucketOffsets[hash] &= ((U32)1 << ldmParams.bucketSizeLog) - 1;
19642 }
19643
19644 /** ZSTD_ldm_makeEntryAndInsertByTag() :
19645  *
19646  *  Gets the small hash, checksum, and tag from the rollingHash.
19647  *
19648  *  If the tag matches (1 << ldmParams.hashRateLog)-1, then
19649  *  creates an ldmEntry from the offset, and inserts it into the hash table.
19650  *
19651  *  hBits is the length of the small hash, which is the most significant hBits
19652  *  of rollingHash. The checksum is the next 32 most significant bits, followed
19653  *  by ldmParams.hashRateLog bits that make up the tag. */
19654 static void ZSTD_ldm_makeEntryAndInsertByTag(ldmState_t* ldmState,
19655                                              U64 const rollingHash,
19656                                              U32 const hBits,
19657                                              U32 const offset,
19658                                              ldmParams_t const ldmParams)
19659 {
19660     U32 const tag = ZSTD_ldm_getTag(rollingHash, hBits, ldmParams.hashRateLog);
19661     U32 const tagMask = ((U32)1 << ldmParams.hashRateLog) - 1;
19662     if (tag == tagMask) {
19663         U32 const hash = ZSTD_ldm_getSmallHash(rollingHash, hBits);
19664         U32 const checksum = ZSTD_ldm_getChecksum(rollingHash, hBits);
19665         ldmEntry_t entry;
19666         entry.offset = offset;
19667         entry.checksum = checksum;
19668         ZSTD_ldm_insertEntry(ldmState, hash, entry, ldmParams);
19669     }
19670 }
19671
19672 /** ZSTD_ldm_countBackwardsMatch() :
19673  *  Returns the number of bytes that match backwards before pIn and pMatch.
19674  *
19675  *  We count only bytes where pMatch >= pBase and pIn >= pAnchor. */
19676 static size_t ZSTD_ldm_countBackwardsMatch(
19677             const BYTE* pIn, const BYTE* pAnchor,
19678             const BYTE* pMatch, const BYTE* pBase)
19679 {
19680     size_t matchLength = 0;
19681     while (pIn > pAnchor && pMatch > pBase && pIn[-1] == pMatch[-1]) {
19682         pIn--;
19683         pMatch--;
19684         matchLength++;
19685     }
19686     return matchLength;
19687 }
19688
19689 /** ZSTD_ldm_fillFastTables() :
19690  *
19691  *  Fills the relevant tables for the ZSTD_fast and ZSTD_dfast strategies.
19692  *  This is similar to ZSTD_loadDictionaryContent.
19693  *
19694  *  The tables for the other strategies are filled within their
19695  *  block compressors. */
19696 static size_t ZSTD_ldm_fillFastTables(ZSTD_matchState_t* ms,
19697                                       void const* end)
19698 {
19699     const BYTE* const iend = (const BYTE*)end;
19700
19701     switch(ms->cParams.strategy)
19702     {
19703     case ZSTD_fast:
19704         ZSTD_fillHashTable(ms, iend, ZSTD_dtlm_fast);
19705         break;
19706
19707     case ZSTD_dfast:
19708         ZSTD_fillDoubleHashTable(ms, iend, ZSTD_dtlm_fast);
19709         break;
19710
19711     case ZSTD_greedy:
19712     case ZSTD_lazy:
19713     case ZSTD_lazy2:
19714     case ZSTD_btlazy2:
19715     case ZSTD_btopt:
19716     case ZSTD_btultra:
19717     case ZSTD_btultra2:
19718         break;
19719     default:
19720         assert(0);  /* not possible : not a valid strategy id */
19721     }
19722
19723     return 0;
19724 }
19725
19726 /** ZSTD_ldm_fillLdmHashTable() :
19727  *
19728  *  Fills hashTable from (lastHashed + 1) to iend (non-inclusive).
19729  *  lastHash is the rolling hash that corresponds to lastHashed.
19730  *
19731  *  Returns the rolling hash corresponding to position iend-1. */
19732 static U64 ZSTD_ldm_fillLdmHashTable(ldmState_t* state,
19733                                      U64 lastHash, const BYTE* lastHashed,
19734                                      const BYTE* iend, const BYTE* base,
19735                                      U32 hBits, ldmParams_t const ldmParams)
19736 {
19737     U64 rollingHash = lastHash;
19738     const BYTE* cur = lastHashed + 1;
19739
19740     while (cur < iend) {
19741         rollingHash = ZSTD_rollingHash_rotate(rollingHash, cur[-1],
19742                                               cur[ldmParams.minMatchLength-1],
19743                                               state->hashPower);
19744         ZSTD_ldm_makeEntryAndInsertByTag(state,
19745                                          rollingHash, hBits,
19746                                          (U32)(cur - base), ldmParams);
19747         ++cur;
19748     }
19749     return rollingHash;
19750 }
19751
19752 void ZSTD_ldm_fillHashTable(
19753             ldmState_t* state, const BYTE* ip,
19754             const BYTE* iend, ldmParams_t const* params)
19755 {
19756     DEBUGLOG(5, "ZSTD_ldm_fillHashTable");
19757     if ((size_t)(iend - ip) >= params->minMatchLength) {
19758         U64 startingHash = ZSTD_rollingHash_compute(ip, params->minMatchLength);
19759         ZSTD_ldm_fillLdmHashTable(
19760             state, startingHash, ip, iend - params->minMatchLength, state->window.base,
19761             params->hashLog - params->bucketSizeLog,
19762             *params);
19763     }
19764 }
19765
19766
19767 /** ZSTD_ldm_limitTableUpdate() :
19768  *
19769  *  Sets cctx->nextToUpdate to a position corresponding closer to anchor
19770  *  if it is far way
19771  *  (after a long match, only update tables a limited amount). */
19772 static void ZSTD_ldm_limitTableUpdate(ZSTD_matchState_t* ms, const BYTE* anchor)
19773 {
19774     U32 const current = (U32)(anchor - ms->window.base);
19775     if (current > ms->nextToUpdate + 1024) {
19776         ms->nextToUpdate =
19777             current - MIN(512, current - ms->nextToUpdate - 1024);
19778     }
19779 }
19780
19781 static size_t ZSTD_ldm_generateSequences_internal(
19782         ldmState_t* ldmState, rawSeqStore_t* rawSeqStore,
19783         ldmParams_t const* params, void const* src, size_t srcSize)
19784 {
19785     /* LDM parameters */
19786     int const extDict = ZSTD_window_hasExtDict(ldmState->window);
19787     U32 const minMatchLength = params->minMatchLength;
19788     U64 const hashPower = ldmState->hashPower;
19789     U32 const hBits = params->hashLog - params->bucketSizeLog;
19790     U32 const ldmBucketSize = 1U << params->bucketSizeLog;
19791     U32 const hashRateLog = params->hashRateLog;
19792     U32 const ldmTagMask = (1U << params->hashRateLog) - 1;
19793     /* Prefix and extDict parameters */
19794     U32 const dictLimit = ldmState->window.dictLimit;
19795     U32 const lowestIndex = extDict ? ldmState->window.lowLimit : dictLimit;
19796     BYTE const* const base = ldmState->window.base;
19797     BYTE const* const dictBase = extDict ? ldmState->window.dictBase : NULL;
19798     BYTE const* const dictStart = extDict ? dictBase + lowestIndex : NULL;
19799     BYTE const* const dictEnd = extDict ? dictBase + dictLimit : NULL;
19800     BYTE const* const lowPrefixPtr = base + dictLimit;
19801     /* Input bounds */
19802     BYTE const* const istart = (BYTE const*)src;
19803     BYTE const* const iend = istart + srcSize;
19804     BYTE const* const ilimit = iend - MAX(minMatchLength, HASH_READ_SIZE);
19805     /* Input positions */
19806     BYTE const* anchor = istart;
19807     BYTE const* ip = istart;
19808     /* Rolling hash */
19809     BYTE const* lastHashed = NULL;
19810     U64 rollingHash = 0;
19811
19812     while (ip <= ilimit) {
19813         size_t mLength;
19814         U32 const current = (U32)(ip - base);
19815         size_t forwardMatchLength = 0, backwardMatchLength = 0;
19816         ldmEntry_t* bestEntry = NULL;
19817         if (ip != istart) {
19818             rollingHash = ZSTD_rollingHash_rotate(rollingHash, lastHashed[0],
19819                                                   lastHashed[minMatchLength],
19820                                                   hashPower);
19821         } else {
19822             rollingHash = ZSTD_rollingHash_compute(ip, minMatchLength);
19823         }
19824         lastHashed = ip;
19825
19826         /* Do not insert and do not look for a match */
19827         if (ZSTD_ldm_getTag(rollingHash, hBits, hashRateLog) != ldmTagMask) {
19828            ip++;
19829            continue;
19830         }
19831
19832         /* Get the best entry and compute the match lengths */
19833         {
19834             ldmEntry_t* const bucket =
19835                 ZSTD_ldm_getBucket(ldmState,
19836                                    ZSTD_ldm_getSmallHash(rollingHash, hBits),
19837                                    *params);
19838             ldmEntry_t* cur;
19839             size_t bestMatchLength = 0;
19840             U32 const checksum = ZSTD_ldm_getChecksum(rollingHash, hBits);
19841
19842             for (cur = bucket; cur < bucket + ldmBucketSize; ++cur) {
19843                 size_t curForwardMatchLength, curBackwardMatchLength,
19844                        curTotalMatchLength;
19845                 if (cur->checksum != checksum || cur->offset <= lowestIndex) {
19846                     continue;
19847                 }
19848                 if (extDict) {
19849                     BYTE const* const curMatchBase =
19850                         cur->offset < dictLimit ? dictBase : base;
19851                     BYTE const* const pMatch = curMatchBase + cur->offset;
19852                     BYTE const* const matchEnd =
19853                         cur->offset < dictLimit ? dictEnd : iend;
19854                     BYTE const* const lowMatchPtr =
19855                         cur->offset < dictLimit ? dictStart : lowPrefixPtr;
19856
19857                     curForwardMatchLength = ZSTD_count_2segments(
19858                                                 ip, pMatch, iend,
19859                                                 matchEnd, lowPrefixPtr);
19860                     if (curForwardMatchLength < minMatchLength) {
19861                         continue;
19862                     }
19863                     curBackwardMatchLength =
19864                         ZSTD_ldm_countBackwardsMatch(ip, anchor, pMatch,
19865                                                      lowMatchPtr);
19866                     curTotalMatchLength = curForwardMatchLength +
19867                                           curBackwardMatchLength;
19868                 } else { /* !extDict */
19869                     BYTE const* const pMatch = base + cur->offset;
19870                     curForwardMatchLength = ZSTD_count(ip, pMatch, iend);
19871                     if (curForwardMatchLength < minMatchLength) {
19872                         continue;
19873                     }
19874                     curBackwardMatchLength =
19875                         ZSTD_ldm_countBackwardsMatch(ip, anchor, pMatch,
19876                                                      lowPrefixPtr);
19877                     curTotalMatchLength = curForwardMatchLength +
19878                                           curBackwardMatchLength;
19879                 }
19880
19881                 if (curTotalMatchLength > bestMatchLength) {
19882                     bestMatchLength = curTotalMatchLength;
19883                     forwardMatchLength = curForwardMatchLength;
19884                     backwardMatchLength = curBackwardMatchLength;
19885                     bestEntry = cur;
19886                 }
19887             }
19888         }
19889
19890         /* No match found -- continue searching */
19891         if (bestEntry == NULL) {
19892             ZSTD_ldm_makeEntryAndInsertByTag(ldmState, rollingHash,
19893                                              hBits, current,
19894                                              *params);
19895             ip++;
19896             continue;
19897         }
19898
19899         /* Match found */
19900         mLength = forwardMatchLength + backwardMatchLength;
19901         ip -= backwardMatchLength;
19902
19903         {
19904             /* Store the sequence:
19905              * ip = current - backwardMatchLength
19906              * The match is at (bestEntry->offset - backwardMatchLength)
19907              */
19908             U32 const matchIndex = bestEntry->offset;
19909             U32 const offset = current - matchIndex;
19910             rawSeq* const seq = rawSeqStore->seq + rawSeqStore->size;
19911
19912             /* Out of sequence storage */
19913             if (rawSeqStore->size == rawSeqStore->capacity)
19914                 return ERROR(dstSize_tooSmall);
19915             seq->litLength = (U32)(ip - anchor);
19916             seq->matchLength = (U32)mLength;
19917             seq->offset = offset;
19918             rawSeqStore->size++;
19919         }
19920
19921         /* Insert the current entry into the hash table */
19922         ZSTD_ldm_makeEntryAndInsertByTag(ldmState, rollingHash, hBits,
19923                                          (U32)(lastHashed - base),
19924                                          *params);
19925
19926         assert(ip + backwardMatchLength == lastHashed);
19927
19928         /* Fill the hash table from lastHashed+1 to ip+mLength*/
19929         /* Heuristic: don't need to fill the entire table at end of block */
19930         if (ip + mLength <= ilimit) {
19931             rollingHash = ZSTD_ldm_fillLdmHashTable(
19932                               ldmState, rollingHash, lastHashed,
19933                               ip + mLength, base, hBits, *params);
19934             lastHashed = ip + mLength - 1;
19935         }
19936         ip += mLength;
19937         anchor = ip;
19938     }
19939     return iend - anchor;
19940 }
19941
19942 /*! ZSTD_ldm_reduceTable() :
19943  *  reduce table indexes by `reducerValue` */
19944 static void ZSTD_ldm_reduceTable(ldmEntry_t* const table, U32 const size,
19945                                  U32 const reducerValue)
19946 {
19947     U32 u;
19948     for (u = 0; u < size; u++) {
19949         if (table[u].offset < reducerValue) table[u].offset = 0;
19950         else table[u].offset -= reducerValue;
19951     }
19952 }
19953
19954 size_t ZSTD_ldm_generateSequences(
19955         ldmState_t* ldmState, rawSeqStore_t* sequences,
19956         ldmParams_t const* params, void const* src, size_t srcSize)
19957 {
19958     U32 const maxDist = 1U << params->windowLog;
19959     BYTE const* const istart = (BYTE const*)src;
19960     BYTE const* const iend = istart + srcSize;
19961     size_t const kMaxChunkSize = 1 << 20;
19962     size_t const nbChunks = (srcSize / kMaxChunkSize) + ((srcSize % kMaxChunkSize) != 0);
19963     size_t chunk;
19964     size_t leftoverSize = 0;
19965
19966     assert(ZSTD_CHUNKSIZE_MAX >= kMaxChunkSize);
19967     /* Check that ZSTD_window_update() has been called for this chunk prior
19968      * to passing it to this function.
19969      */
19970     assert(ldmState->window.nextSrc >= (BYTE const*)src + srcSize);
19971     /* The input could be very large (in zstdmt), so it must be broken up into
19972      * chunks to enforce the maximum distance and handle overflow correction.
19973      */
19974     assert(sequences->pos <= sequences->size);
19975     assert(sequences->size <= sequences->capacity);
19976     for (chunk = 0; chunk < nbChunks && sequences->size < sequences->capacity; ++chunk) {
19977         BYTE const* const chunkStart = istart + chunk * kMaxChunkSize;
19978         size_t const remaining = (size_t)(iend - chunkStart);
19979         BYTE const *const chunkEnd =
19980             (remaining < kMaxChunkSize) ? iend : chunkStart + kMaxChunkSize;
19981         size_t const chunkSize = chunkEnd - chunkStart;
19982         size_t newLeftoverSize;
19983         size_t const prevSize = sequences->size;
19984
19985         assert(chunkStart < iend);
19986         /* 1. Perform overflow correction if necessary. */
19987         if (ZSTD_window_needOverflowCorrection(ldmState->window, chunkEnd)) {
19988             U32 const ldmHSize = 1U << params->hashLog;
19989             U32 const correction = ZSTD_window_correctOverflow(
19990                 &ldmState->window, /* cycleLog */ 0, maxDist, chunkStart);
19991             ZSTD_ldm_reduceTable(ldmState->hashTable, ldmHSize, correction);
19992             /* invalidate dictionaries on overflow correction */
19993             ldmState->loadedDictEnd = 0;
19994         }
19995         /* 2. We enforce the maximum offset allowed.
19996          *
19997          * kMaxChunkSize should be small enough that we don't lose too much of
19998          * the window through early invalidation.
19999          * TODO: * Test the chunk size.
20000          *       * Try invalidation after the sequence generation and test the
20001          *         the offset against maxDist directly.
20002          *
20003          * NOTE: Because of dictionaries + sequence splitting we MUST make sure
20004          * that any offset used is valid at the END of the sequence, since it may
20005          * be split into two sequences. This condition holds when using
20006          * ZSTD_window_enforceMaxDist(), but if we move to checking offsets
20007          * against maxDist directly, we'll have to carefully handle that case.
20008          */
20009         ZSTD_window_enforceMaxDist(&ldmState->window, chunkEnd, maxDist, &ldmState->loadedDictEnd, NULL);
20010         /* 3. Generate the sequences for the chunk, and get newLeftoverSize. */
20011         newLeftoverSize = ZSTD_ldm_generateSequences_internal(
20012             ldmState, sequences, params, chunkStart, chunkSize);
20013         if (ZSTD_isError(newLeftoverSize))
20014             return newLeftoverSize;
20015         /* 4. We add the leftover literals from previous iterations to the first
20016          *    newly generated sequence, or add the `newLeftoverSize` if none are
20017          *    generated.
20018          */
20019         /* Prepend the leftover literals from the last call */
20020         if (prevSize < sequences->size) {
20021             sequences->seq[prevSize].litLength += (U32)leftoverSize;
20022             leftoverSize = newLeftoverSize;
20023         } else {
20024             assert(newLeftoverSize == chunkSize);
20025             leftoverSize += chunkSize;
20026         }
20027     }
20028     return 0;
20029 }
20030
20031 void ZSTD_ldm_skipSequences(rawSeqStore_t* rawSeqStore, size_t srcSize, U32 const minMatch) {
20032     while (srcSize > 0 && rawSeqStore->pos < rawSeqStore->size) {
20033         rawSeq* seq = rawSeqStore->seq + rawSeqStore->pos;
20034         if (srcSize <= seq->litLength) {
20035             /* Skip past srcSize literals */
20036             seq->litLength -= (U32)srcSize;
20037             return;
20038         }
20039         srcSize -= seq->litLength;
20040         seq->litLength = 0;
20041         if (srcSize < seq->matchLength) {
20042             /* Skip past the first srcSize of the match */
20043             seq->matchLength -= (U32)srcSize;
20044             if (seq->matchLength < minMatch) {
20045                 /* The match is too short, omit it */
20046                 if (rawSeqStore->pos + 1 < rawSeqStore->size) {
20047                     seq[1].litLength += seq[0].matchLength;
20048                 }
20049                 rawSeqStore->pos++;
20050             }
20051             return;
20052         }
20053         srcSize -= seq->matchLength;
20054         seq->matchLength = 0;
20055         rawSeqStore->pos++;
20056     }
20057 }
20058
20059 /**
20060  * If the sequence length is longer than remaining then the sequence is split
20061  * between this block and the next.
20062  *
20063  * Returns the current sequence to handle, or if the rest of the block should
20064  * be literals, it returns a sequence with offset == 0.
20065  */
20066 static rawSeq maybeSplitSequence(rawSeqStore_t* rawSeqStore,
20067                                  U32 const remaining, U32 const minMatch)
20068 {
20069     rawSeq sequence = rawSeqStore->seq[rawSeqStore->pos];
20070     assert(sequence.offset > 0);
20071     /* Likely: No partial sequence */
20072     if (remaining >= sequence.litLength + sequence.matchLength) {
20073         rawSeqStore->pos++;
20074         return sequence;
20075     }
20076     /* Cut the sequence short (offset == 0 ==> rest is literals). */
20077     if (remaining <= sequence.litLength) {
20078         sequence.offset = 0;
20079     } else if (remaining < sequence.litLength + sequence.matchLength) {
20080         sequence.matchLength = remaining - sequence.litLength;
20081         if (sequence.matchLength < minMatch) {
20082             sequence.offset = 0;
20083         }
20084     }
20085     /* Skip past `remaining` bytes for the future sequences. */
20086     ZSTD_ldm_skipSequences(rawSeqStore, remaining, minMatch);
20087     return sequence;
20088 }
20089
20090 size_t ZSTD_ldm_blockCompress(rawSeqStore_t* rawSeqStore,
20091     ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
20092     void const* src, size_t srcSize)
20093 {
20094     const ZSTD_compressionParameters* const cParams = &ms->cParams;
20095     unsigned const minMatch = cParams->minMatch;
20096     ZSTD_blockCompressor const blockCompressor =
20097         ZSTD_selectBlockCompressor(cParams->strategy, ZSTD_matchState_dictMode(ms));
20098     /* Input bounds */
20099     BYTE const* const istart = (BYTE const*)src;
20100     BYTE const* const iend = istart + srcSize;
20101     /* Input positions */
20102     BYTE const* ip = istart;
20103
20104     DEBUGLOG(5, "ZSTD_ldm_blockCompress: srcSize=%zu", srcSize);
20105     assert(rawSeqStore->pos <= rawSeqStore->size);
20106     assert(rawSeqStore->size <= rawSeqStore->capacity);
20107     /* Loop through each sequence and apply the block compressor to the lits */
20108     while (rawSeqStore->pos < rawSeqStore->size && ip < iend) {
20109         /* maybeSplitSequence updates rawSeqStore->pos */
20110         rawSeq const sequence = maybeSplitSequence(rawSeqStore,
20111                                                    (U32)(iend - ip), minMatch);
20112         int i;
20113         /* End signal */
20114         if (sequence.offset == 0)
20115             break;
20116
20117         assert(ip + sequence.litLength + sequence.matchLength <= iend);
20118
20119         /* Fill tables for block compressor */
20120         ZSTD_ldm_limitTableUpdate(ms, ip);
20121         ZSTD_ldm_fillFastTables(ms, ip);
20122         /* Run the block compressor */
20123         DEBUGLOG(5, "pos %u : calling block compressor on segment of size %u", (unsigned)(ip-istart), sequence.litLength);
20124         {
20125             size_t const newLitLength =
20126                 blockCompressor(ms, seqStore, rep, ip, sequence.litLength);
20127             ip += sequence.litLength;
20128             /* Update the repcodes */
20129             for (i = ZSTD_REP_NUM - 1; i > 0; i--)
20130                 rep[i] = rep[i-1];
20131             rep[0] = sequence.offset;
20132             /* Store the sequence */
20133             ZSTD_storeSeq(seqStore, newLitLength, ip - newLitLength, iend,
20134                           sequence.offset + ZSTD_REP_MOVE,
20135                           sequence.matchLength - MINMATCH);
20136             ip += sequence.matchLength;
20137         }
20138     }
20139     /* Fill the tables for the block compressor */
20140     ZSTD_ldm_limitTableUpdate(ms, ip);
20141     ZSTD_ldm_fillFastTables(ms, ip);
20142     /* Compress the last literals */
20143     return blockCompressor(ms, seqStore, rep, ip, iend - ip);
20144 }
20145 /**** ended inlining compress/zstd_ldm.c ****/
20146 /**** start inlining compress/zstd_opt.c ****/
20147 /*
20148  * Copyright (c) 2016-2020, Przemyslaw Skibinski, Yann Collet, Facebook, Inc.
20149  * All rights reserved.
20150  *
20151  * This source code is licensed under both the BSD-style license (found in the
20152  * LICENSE file in the root directory of this source tree) and the GPLv2 (found
20153  * in the COPYING file in the root directory of this source tree).
20154  * You may select, at your option, one of the above-listed licenses.
20155  */
20156
20157 /**** skipping file: zstd_compress_internal.h ****/
20158 /**** skipping file: hist.h ****/
20159 /**** skipping file: zstd_opt.h ****/
20160
20161
20162 #define ZSTD_LITFREQ_ADD    2   /* scaling factor for litFreq, so that frequencies adapt faster to new stats */
20163 #define ZSTD_FREQ_DIV       4   /* log factor when using previous stats to init next stats */
20164 #define ZSTD_MAX_PRICE     (1<<30)
20165
20166 #define ZSTD_PREDEF_THRESHOLD 1024   /* if srcSize < ZSTD_PREDEF_THRESHOLD, symbols' cost is assumed static, directly determined by pre-defined distributions */
20167
20168
20169 /*-*************************************
20170 *  Price functions for optimal parser
20171 ***************************************/
20172
20173 #if 0    /* approximation at bit level */
20174 #  define BITCOST_ACCURACY 0
20175 #  define BITCOST_MULTIPLIER (1 << BITCOST_ACCURACY)
20176 #  define WEIGHT(stat)  ((void)opt, ZSTD_bitWeight(stat))
20177 #elif 0  /* fractional bit accuracy */
20178 #  define BITCOST_ACCURACY 8
20179 #  define BITCOST_MULTIPLIER (1 << BITCOST_ACCURACY)
20180 #  define WEIGHT(stat,opt) ((void)opt, ZSTD_fracWeight(stat))
20181 #else    /* opt==approx, ultra==accurate */
20182 #  define BITCOST_ACCURACY 8
20183 #  define BITCOST_MULTIPLIER (1 << BITCOST_ACCURACY)
20184 #  define WEIGHT(stat,opt) (opt ? ZSTD_fracWeight(stat) : ZSTD_bitWeight(stat))
20185 #endif
20186
20187 MEM_STATIC U32 ZSTD_bitWeight(U32 stat)
20188 {
20189     return (ZSTD_highbit32(stat+1) * BITCOST_MULTIPLIER);
20190 }
20191
20192 MEM_STATIC U32 ZSTD_fracWeight(U32 rawStat)
20193 {
20194     U32 const stat = rawStat + 1;
20195     U32 const hb = ZSTD_highbit32(stat);
20196     U32 const BWeight = hb * BITCOST_MULTIPLIER;
20197     U32 const FWeight = (stat << BITCOST_ACCURACY) >> hb;
20198     U32 const weight = BWeight + FWeight;
20199     assert(hb + BITCOST_ACCURACY < 31);
20200     return weight;
20201 }
20202
20203 #if (DEBUGLEVEL>=2)
20204 /* debugging function,
20205  * @return price in bytes as fractional value
20206  * for debug messages only */
20207 MEM_STATIC double ZSTD_fCost(U32 price)
20208 {
20209     return (double)price / (BITCOST_MULTIPLIER*8);
20210 }
20211 #endif
20212
20213 static int ZSTD_compressedLiterals(optState_t const* const optPtr)
20214 {
20215     return optPtr->literalCompressionMode != ZSTD_lcm_uncompressed;
20216 }
20217
20218 static void ZSTD_setBasePrices(optState_t* optPtr, int optLevel)
20219 {
20220     if (ZSTD_compressedLiterals(optPtr))
20221         optPtr->litSumBasePrice = WEIGHT(optPtr->litSum, optLevel);
20222     optPtr->litLengthSumBasePrice = WEIGHT(optPtr->litLengthSum, optLevel);
20223     optPtr->matchLengthSumBasePrice = WEIGHT(optPtr->matchLengthSum, optLevel);
20224     optPtr->offCodeSumBasePrice = WEIGHT(optPtr->offCodeSum, optLevel);
20225 }
20226
20227
20228 /* ZSTD_downscaleStat() :
20229  * reduce all elements in table by a factor 2^(ZSTD_FREQ_DIV+malus)
20230  * return the resulting sum of elements */
20231 static U32 ZSTD_downscaleStat(unsigned* table, U32 lastEltIndex, int malus)
20232 {
20233     U32 s, sum=0;
20234     DEBUGLOG(5, "ZSTD_downscaleStat (nbElts=%u)", (unsigned)lastEltIndex+1);
20235     assert(ZSTD_FREQ_DIV+malus > 0 && ZSTD_FREQ_DIV+malus < 31);
20236     for (s=0; s<lastEltIndex+1; s++) {
20237         table[s] = 1 + (table[s] >> (ZSTD_FREQ_DIV+malus));
20238         sum += table[s];
20239     }
20240     return sum;
20241 }
20242
20243 /* ZSTD_rescaleFreqs() :
20244  * if first block (detected by optPtr->litLengthSum == 0) : init statistics
20245  *    take hints from dictionary if there is one
20246  *    or init from zero, using src for literals stats, or flat 1 for match symbols
20247  * otherwise downscale existing stats, to be used as seed for next block.
20248  */
20249 static void
20250 ZSTD_rescaleFreqs(optState_t* const optPtr,
20251             const BYTE* const src, size_t const srcSize,
20252                   int const optLevel)
20253 {
20254     int const compressedLiterals = ZSTD_compressedLiterals(optPtr);
20255     DEBUGLOG(5, "ZSTD_rescaleFreqs (srcSize=%u)", (unsigned)srcSize);
20256     optPtr->priceType = zop_dynamic;
20257
20258     if (optPtr->litLengthSum == 0) {  /* first block : init */
20259         if (srcSize <= ZSTD_PREDEF_THRESHOLD) {  /* heuristic */
20260             DEBUGLOG(5, "(srcSize <= ZSTD_PREDEF_THRESHOLD) => zop_predef");
20261             optPtr->priceType = zop_predef;
20262         }
20263
20264         assert(optPtr->symbolCosts != NULL);
20265         if (optPtr->symbolCosts->huf.repeatMode == HUF_repeat_valid) {
20266             /* huffman table presumed generated by dictionary */
20267             optPtr->priceType = zop_dynamic;
20268
20269             if (compressedLiterals) {
20270                 unsigned lit;
20271                 assert(optPtr->litFreq != NULL);
20272                 optPtr->litSum = 0;
20273                 for (lit=0; lit<=MaxLit; lit++) {
20274                     U32 const scaleLog = 11;   /* scale to 2K */
20275                     U32 const bitCost = HUF_getNbBits(optPtr->symbolCosts->huf.CTable, lit);
20276                     assert(bitCost <= scaleLog);
20277                     optPtr->litFreq[lit] = bitCost ? 1 << (scaleLog-bitCost) : 1 /*minimum to calculate cost*/;
20278                     optPtr->litSum += optPtr->litFreq[lit];
20279             }   }
20280
20281             {   unsigned ll;
20282                 FSE_CState_t llstate;
20283                 FSE_initCState(&llstate, optPtr->symbolCosts->fse.litlengthCTable);
20284                 optPtr->litLengthSum = 0;
20285                 for (ll=0; ll<=MaxLL; ll++) {
20286                     U32 const scaleLog = 10;   /* scale to 1K */
20287                     U32 const bitCost = FSE_getMaxNbBits(llstate.symbolTT, ll);
20288                     assert(bitCost < scaleLog);
20289                     optPtr->litLengthFreq[ll] = bitCost ? 1 << (scaleLog-bitCost) : 1 /*minimum to calculate cost*/;
20290                     optPtr->litLengthSum += optPtr->litLengthFreq[ll];
20291             }   }
20292
20293             {   unsigned ml;
20294                 FSE_CState_t mlstate;
20295                 FSE_initCState(&mlstate, optPtr->symbolCosts->fse.matchlengthCTable);
20296                 optPtr->matchLengthSum = 0;
20297                 for (ml=0; ml<=MaxML; ml++) {
20298                     U32 const scaleLog = 10;
20299                     U32 const bitCost = FSE_getMaxNbBits(mlstate.symbolTT, ml);
20300                     assert(bitCost < scaleLog);
20301                     optPtr->matchLengthFreq[ml] = bitCost ? 1 << (scaleLog-bitCost) : 1 /*minimum to calculate cost*/;
20302                     optPtr->matchLengthSum += optPtr->matchLengthFreq[ml];
20303             }   }
20304
20305             {   unsigned of;
20306                 FSE_CState_t ofstate;
20307                 FSE_initCState(&ofstate, optPtr->symbolCosts->fse.offcodeCTable);
20308                 optPtr->offCodeSum = 0;
20309                 for (of=0; of<=MaxOff; of++) {
20310                     U32 const scaleLog = 10;
20311                     U32 const bitCost = FSE_getMaxNbBits(ofstate.symbolTT, of);
20312                     assert(bitCost < scaleLog);
20313                     optPtr->offCodeFreq[of] = bitCost ? 1 << (scaleLog-bitCost) : 1 /*minimum to calculate cost*/;
20314                     optPtr->offCodeSum += optPtr->offCodeFreq[of];
20315             }   }
20316
20317         } else {  /* not a dictionary */
20318
20319             assert(optPtr->litFreq != NULL);
20320             if (compressedLiterals) {
20321                 unsigned lit = MaxLit;
20322                 HIST_count_simple(optPtr->litFreq, &lit, src, srcSize);   /* use raw first block to init statistics */
20323                 optPtr->litSum = ZSTD_downscaleStat(optPtr->litFreq, MaxLit, 1);
20324             }
20325
20326             {   unsigned ll;
20327                 for (ll=0; ll<=MaxLL; ll++)
20328                     optPtr->litLengthFreq[ll] = 1;
20329             }
20330             optPtr->litLengthSum = MaxLL+1;
20331
20332             {   unsigned ml;
20333                 for (ml=0; ml<=MaxML; ml++)
20334                     optPtr->matchLengthFreq[ml] = 1;
20335             }
20336             optPtr->matchLengthSum = MaxML+1;
20337
20338             {   unsigned of;
20339                 for (of=0; of<=MaxOff; of++)
20340                     optPtr->offCodeFreq[of] = 1;
20341             }
20342             optPtr->offCodeSum = MaxOff+1;
20343
20344         }
20345
20346     } else {   /* new block : re-use previous statistics, scaled down */
20347
20348         if (compressedLiterals)
20349             optPtr->litSum = ZSTD_downscaleStat(optPtr->litFreq, MaxLit, 1);
20350         optPtr->litLengthSum = ZSTD_downscaleStat(optPtr->litLengthFreq, MaxLL, 0);
20351         optPtr->matchLengthSum = ZSTD_downscaleStat(optPtr->matchLengthFreq, MaxML, 0);
20352         optPtr->offCodeSum = ZSTD_downscaleStat(optPtr->offCodeFreq, MaxOff, 0);
20353     }
20354
20355     ZSTD_setBasePrices(optPtr, optLevel);
20356 }
20357
20358 /* ZSTD_rawLiteralsCost() :
20359  * price of literals (only) in specified segment (which length can be 0).
20360  * does not include price of literalLength symbol */
20361 static U32 ZSTD_rawLiteralsCost(const BYTE* const literals, U32 const litLength,
20362                                 const optState_t* const optPtr,
20363                                 int optLevel)
20364 {
20365     if (litLength == 0) return 0;
20366
20367     if (!ZSTD_compressedLiterals(optPtr))
20368         return (litLength << 3) * BITCOST_MULTIPLIER;  /* Uncompressed - 8 bytes per literal. */
20369
20370     if (optPtr->priceType == zop_predef)
20371         return (litLength*6) * BITCOST_MULTIPLIER;  /* 6 bit per literal - no statistic used */
20372
20373     /* dynamic statistics */
20374     {   U32 price = litLength * optPtr->litSumBasePrice;
20375         U32 u;
20376         for (u=0; u < litLength; u++) {
20377             assert(WEIGHT(optPtr->litFreq[literals[u]], optLevel) <= optPtr->litSumBasePrice);   /* literal cost should never be negative */
20378             price -= WEIGHT(optPtr->litFreq[literals[u]], optLevel);
20379         }
20380         return price;
20381     }
20382 }
20383
20384 /* ZSTD_litLengthPrice() :
20385  * cost of literalLength symbol */
20386 static U32 ZSTD_litLengthPrice(U32 const litLength, const optState_t* const optPtr, int optLevel)
20387 {
20388     if (optPtr->priceType == zop_predef) return WEIGHT(litLength, optLevel);
20389
20390     /* dynamic statistics */
20391     {   U32 const llCode = ZSTD_LLcode(litLength);
20392         return (LL_bits[llCode] * BITCOST_MULTIPLIER)
20393              + optPtr->litLengthSumBasePrice
20394              - WEIGHT(optPtr->litLengthFreq[llCode], optLevel);
20395     }
20396 }
20397
20398 /* ZSTD_getMatchPrice() :
20399  * Provides the cost of the match part (offset + matchLength) of a sequence
20400  * Must be combined with ZSTD_fullLiteralsCost() to get the full cost of a sequence.
20401  * optLevel: when <2, favors small offset for decompression speed (improved cache efficiency) */
20402 FORCE_INLINE_TEMPLATE U32
20403 ZSTD_getMatchPrice(U32 const offset,
20404                    U32 const matchLength,
20405              const optState_t* const optPtr,
20406                    int const optLevel)
20407 {
20408     U32 price;
20409     U32 const offCode = ZSTD_highbit32(offset+1);
20410     U32 const mlBase = matchLength - MINMATCH;
20411     assert(matchLength >= MINMATCH);
20412
20413     if (optPtr->priceType == zop_predef)  /* fixed scheme, do not use statistics */
20414         return WEIGHT(mlBase, optLevel) + ((16 + offCode) * BITCOST_MULTIPLIER);
20415
20416     /* dynamic statistics */
20417     price = (offCode * BITCOST_MULTIPLIER) + (optPtr->offCodeSumBasePrice - WEIGHT(optPtr->offCodeFreq[offCode], optLevel));
20418     if ((optLevel<2) /*static*/ && offCode >= 20)
20419         price += (offCode-19)*2 * BITCOST_MULTIPLIER; /* handicap for long distance offsets, favor decompression speed */
20420
20421     /* match Length */
20422     {   U32 const mlCode = ZSTD_MLcode(mlBase);
20423         price += (ML_bits[mlCode] * BITCOST_MULTIPLIER) + (optPtr->matchLengthSumBasePrice - WEIGHT(optPtr->matchLengthFreq[mlCode], optLevel));
20424     }
20425
20426     price += BITCOST_MULTIPLIER / 5;   /* heuristic : make matches a bit more costly to favor less sequences -> faster decompression speed */
20427
20428     DEBUGLOG(8, "ZSTD_getMatchPrice(ml:%u) = %u", matchLength, price);
20429     return price;
20430 }
20431
20432 /* ZSTD_updateStats() :
20433  * assumption : literals + litLengtn <= iend */
20434 static void ZSTD_updateStats(optState_t* const optPtr,
20435                              U32 litLength, const BYTE* literals,
20436                              U32 offsetCode, U32 matchLength)
20437 {
20438     /* literals */
20439     if (ZSTD_compressedLiterals(optPtr)) {
20440         U32 u;
20441         for (u=0; u < litLength; u++)
20442             optPtr->litFreq[literals[u]] += ZSTD_LITFREQ_ADD;
20443         optPtr->litSum += litLength*ZSTD_LITFREQ_ADD;
20444     }
20445
20446     /* literal Length */
20447     {   U32 const llCode = ZSTD_LLcode(litLength);
20448         optPtr->litLengthFreq[llCode]++;
20449         optPtr->litLengthSum++;
20450     }
20451
20452     /* match offset code (0-2=>repCode; 3+=>offset+2) */
20453     {   U32 const offCode = ZSTD_highbit32(offsetCode+1);
20454         assert(offCode <= MaxOff);
20455         optPtr->offCodeFreq[offCode]++;
20456         optPtr->offCodeSum++;
20457     }
20458
20459     /* match Length */
20460     {   U32 const mlBase = matchLength - MINMATCH;
20461         U32 const mlCode = ZSTD_MLcode(mlBase);
20462         optPtr->matchLengthFreq[mlCode]++;
20463         optPtr->matchLengthSum++;
20464     }
20465 }
20466
20467
20468 /* ZSTD_readMINMATCH() :
20469  * function safe only for comparisons
20470  * assumption : memPtr must be at least 4 bytes before end of buffer */
20471 MEM_STATIC U32 ZSTD_readMINMATCH(const void* memPtr, U32 length)
20472 {
20473     switch (length)
20474     {
20475     default :
20476     case 4 : return MEM_read32(memPtr);
20477     case 3 : if (MEM_isLittleEndian())
20478                 return MEM_read32(memPtr)<<8;
20479              else
20480                 return MEM_read32(memPtr)>>8;
20481     }
20482 }
20483
20484
20485 /* Update hashTable3 up to ip (excluded)
20486    Assumption : always within prefix (i.e. not within extDict) */
20487 static U32 ZSTD_insertAndFindFirstIndexHash3 (ZSTD_matchState_t* ms,
20488                                               U32* nextToUpdate3,
20489                                               const BYTE* const ip)
20490 {
20491     U32* const hashTable3 = ms->hashTable3;
20492     U32 const hashLog3 = ms->hashLog3;
20493     const BYTE* const base = ms->window.base;
20494     U32 idx = *nextToUpdate3;
20495     U32 const target = (U32)(ip - base);
20496     size_t const hash3 = ZSTD_hash3Ptr(ip, hashLog3);
20497     assert(hashLog3 > 0);
20498
20499     while(idx < target) {
20500         hashTable3[ZSTD_hash3Ptr(base+idx, hashLog3)] = idx;
20501         idx++;
20502     }
20503
20504     *nextToUpdate3 = target;
20505     return hashTable3[hash3];
20506 }
20507
20508
20509 /*-*************************************
20510 *  Binary Tree search
20511 ***************************************/
20512 /** ZSTD_insertBt1() : add one or multiple positions to tree.
20513  *  ip : assumed <= iend-8 .
20514  * @return : nb of positions added */
20515 static U32 ZSTD_insertBt1(
20516                 ZSTD_matchState_t* ms,
20517                 const BYTE* const ip, const BYTE* const iend,
20518                 U32 const mls, const int extDict)
20519 {
20520     const ZSTD_compressionParameters* const cParams = &ms->cParams;
20521     U32*   const hashTable = ms->hashTable;
20522     U32    const hashLog = cParams->hashLog;
20523     size_t const h  = ZSTD_hashPtr(ip, hashLog, mls);
20524     U32*   const bt = ms->chainTable;
20525     U32    const btLog  = cParams->chainLog - 1;
20526     U32    const btMask = (1 << btLog) - 1;
20527     U32 matchIndex = hashTable[h];
20528     size_t commonLengthSmaller=0, commonLengthLarger=0;
20529     const BYTE* const base = ms->window.base;
20530     const BYTE* const dictBase = ms->window.dictBase;
20531     const U32 dictLimit = ms->window.dictLimit;
20532     const BYTE* const dictEnd = dictBase + dictLimit;
20533     const BYTE* const prefixStart = base + dictLimit;
20534     const BYTE* match;
20535     const U32 current = (U32)(ip-base);
20536     const U32 btLow = btMask >= current ? 0 : current - btMask;
20537     U32* smallerPtr = bt + 2*(current&btMask);
20538     U32* largerPtr  = smallerPtr + 1;
20539     U32 dummy32;   /* to be nullified at the end */
20540     U32 const windowLow = ms->window.lowLimit;
20541     U32 matchEndIdx = current+8+1;
20542     size_t bestLength = 8;
20543     U32 nbCompares = 1U << cParams->searchLog;
20544 #ifdef ZSTD_C_PREDICT
20545     U32 predictedSmall = *(bt + 2*((current-1)&btMask) + 0);
20546     U32 predictedLarge = *(bt + 2*((current-1)&btMask) + 1);
20547     predictedSmall += (predictedSmall>0);
20548     predictedLarge += (predictedLarge>0);
20549 #endif /* ZSTD_C_PREDICT */
20550
20551     DEBUGLOG(8, "ZSTD_insertBt1 (%u)", current);
20552
20553     assert(ip <= iend-8);   /* required for h calculation */
20554     hashTable[h] = current;   /* Update Hash Table */
20555
20556     assert(windowLow > 0);
20557     while (nbCompares-- && (matchIndex >= windowLow)) {
20558         U32* const nextPtr = bt + 2*(matchIndex & btMask);
20559         size_t matchLength = MIN(commonLengthSmaller, commonLengthLarger);   /* guaranteed minimum nb of common bytes */
20560         assert(matchIndex < current);
20561
20562 #ifdef ZSTD_C_PREDICT   /* note : can create issues when hlog small <= 11 */
20563         const U32* predictPtr = bt + 2*((matchIndex-1) & btMask);   /* written this way, as bt is a roll buffer */
20564         if (matchIndex == predictedSmall) {
20565             /* no need to check length, result known */
20566             *smallerPtr = matchIndex;
20567             if (matchIndex <= btLow) { smallerPtr=&dummy32; break; }   /* beyond tree size, stop the search */
20568             smallerPtr = nextPtr+1;               /* new "smaller" => larger of match */
20569             matchIndex = nextPtr[1];              /* new matchIndex larger than previous (closer to current) */
20570             predictedSmall = predictPtr[1] + (predictPtr[1]>0);
20571             continue;
20572         }
20573         if (matchIndex == predictedLarge) {
20574             *largerPtr = matchIndex;
20575             if (matchIndex <= btLow) { largerPtr=&dummy32; break; }   /* beyond tree size, stop the search */
20576             largerPtr = nextPtr;
20577             matchIndex = nextPtr[0];
20578             predictedLarge = predictPtr[0] + (predictPtr[0]>0);
20579             continue;
20580         }
20581 #endif
20582
20583         if (!extDict || (matchIndex+matchLength >= dictLimit)) {
20584             assert(matchIndex+matchLength >= dictLimit);   /* might be wrong if actually extDict */
20585             match = base + matchIndex;
20586             matchLength += ZSTD_count(ip+matchLength, match+matchLength, iend);
20587         } else {
20588             match = dictBase + matchIndex;
20589             matchLength += ZSTD_count_2segments(ip+matchLength, match+matchLength, iend, dictEnd, prefixStart);
20590             if (matchIndex+matchLength >= dictLimit)
20591                 match = base + matchIndex;   /* to prepare for next usage of match[matchLength] */
20592         }
20593
20594         if (matchLength > bestLength) {
20595             bestLength = matchLength;
20596             if (matchLength > matchEndIdx - matchIndex)
20597                 matchEndIdx = matchIndex + (U32)matchLength;
20598         }
20599
20600         if (ip+matchLength == iend) {   /* equal : no way to know if inf or sup */
20601             break;   /* drop , to guarantee consistency ; miss a bit of compression, but other solutions can corrupt tree */
20602         }
20603
20604         if (match[matchLength] < ip[matchLength]) {  /* necessarily within buffer */
20605             /* match is smaller than current */
20606             *smallerPtr = matchIndex;             /* update smaller idx */
20607             commonLengthSmaller = matchLength;    /* all smaller will now have at least this guaranteed common length */
20608             if (matchIndex <= btLow) { smallerPtr=&dummy32; break; }   /* beyond tree size, stop searching */
20609             smallerPtr = nextPtr+1;               /* new "candidate" => larger than match, which was smaller than target */
20610             matchIndex = nextPtr[1];              /* new matchIndex, larger than previous and closer to current */
20611         } else {
20612             /* match is larger than current */
20613             *largerPtr = matchIndex;
20614             commonLengthLarger = matchLength;
20615             if (matchIndex <= btLow) { largerPtr=&dummy32; break; }   /* beyond tree size, stop searching */
20616             largerPtr = nextPtr;
20617             matchIndex = nextPtr[0];
20618     }   }
20619
20620     *smallerPtr = *largerPtr = 0;
20621     {   U32 positions = 0;
20622         if (bestLength > 384) positions = MIN(192, (U32)(bestLength - 384));   /* speed optimization */
20623         assert(matchEndIdx > current + 8);
20624         return MAX(positions, matchEndIdx - (current + 8));
20625     }
20626 }
20627
20628 FORCE_INLINE_TEMPLATE
20629 void ZSTD_updateTree_internal(
20630                 ZSTD_matchState_t* ms,
20631                 const BYTE* const ip, const BYTE* const iend,
20632                 const U32 mls, const ZSTD_dictMode_e dictMode)
20633 {
20634     const BYTE* const base = ms->window.base;
20635     U32 const target = (U32)(ip - base);
20636     U32 idx = ms->nextToUpdate;
20637     DEBUGLOG(6, "ZSTD_updateTree_internal, from %u to %u  (dictMode:%u)",
20638                 idx, target, dictMode);
20639
20640     while(idx < target) {
20641         U32 const forward = ZSTD_insertBt1(ms, base+idx, iend, mls, dictMode == ZSTD_extDict);
20642         assert(idx < (U32)(idx + forward));
20643         idx += forward;
20644     }
20645     assert((size_t)(ip - base) <= (size_t)(U32)(-1));
20646     assert((size_t)(iend - base) <= (size_t)(U32)(-1));
20647     ms->nextToUpdate = target;
20648 }
20649
20650 void ZSTD_updateTree(ZSTD_matchState_t* ms, const BYTE* ip, const BYTE* iend) {
20651     ZSTD_updateTree_internal(ms, ip, iend, ms->cParams.minMatch, ZSTD_noDict);
20652 }
20653
20654 FORCE_INLINE_TEMPLATE
20655 U32 ZSTD_insertBtAndGetAllMatches (
20656                     ZSTD_match_t* matches,   /* store result (found matches) in this table (presumed large enough) */
20657                     ZSTD_matchState_t* ms,
20658                     U32* nextToUpdate3,
20659                     const BYTE* const ip, const BYTE* const iLimit, const ZSTD_dictMode_e dictMode,
20660                     const U32 rep[ZSTD_REP_NUM],
20661                     U32 const ll0,   /* tells if associated literal length is 0 or not. This value must be 0 or 1 */
20662                     const U32 lengthToBeat,
20663                     U32 const mls /* template */)
20664 {
20665     const ZSTD_compressionParameters* const cParams = &ms->cParams;
20666     U32 const sufficient_len = MIN(cParams->targetLength, ZSTD_OPT_NUM -1);
20667     const BYTE* const base = ms->window.base;
20668     U32 const current = (U32)(ip-base);
20669     U32 const hashLog = cParams->hashLog;
20670     U32 const minMatch = (mls==3) ? 3 : 4;
20671     U32* const hashTable = ms->hashTable;
20672     size_t const h  = ZSTD_hashPtr(ip, hashLog, mls);
20673     U32 matchIndex  = hashTable[h];
20674     U32* const bt   = ms->chainTable;
20675     U32 const btLog = cParams->chainLog - 1;
20676     U32 const btMask= (1U << btLog) - 1;
20677     size_t commonLengthSmaller=0, commonLengthLarger=0;
20678     const BYTE* const dictBase = ms->window.dictBase;
20679     U32 const dictLimit = ms->window.dictLimit;
20680     const BYTE* const dictEnd = dictBase + dictLimit;
20681     const BYTE* const prefixStart = base + dictLimit;
20682     U32 const btLow = (btMask >= current) ? 0 : current - btMask;
20683     U32 const windowLow = ZSTD_getLowestMatchIndex(ms, current, cParams->windowLog);
20684     U32 const matchLow = windowLow ? windowLow : 1;
20685     U32* smallerPtr = bt + 2*(current&btMask);
20686     U32* largerPtr  = bt + 2*(current&btMask) + 1;
20687     U32 matchEndIdx = current+8+1;   /* farthest referenced position of any match => detects repetitive patterns */
20688     U32 dummy32;   /* to be nullified at the end */
20689     U32 mnum = 0;
20690     U32 nbCompares = 1U << cParams->searchLog;
20691
20692     const ZSTD_matchState_t* dms    = dictMode == ZSTD_dictMatchState ? ms->dictMatchState : NULL;
20693     const ZSTD_compressionParameters* const dmsCParams =
20694                                       dictMode == ZSTD_dictMatchState ? &dms->cParams : NULL;
20695     const BYTE* const dmsBase       = dictMode == ZSTD_dictMatchState ? dms->window.base : NULL;
20696     const BYTE* const dmsEnd        = dictMode == ZSTD_dictMatchState ? dms->window.nextSrc : NULL;
20697     U32         const dmsHighLimit  = dictMode == ZSTD_dictMatchState ? (U32)(dmsEnd - dmsBase) : 0;
20698     U32         const dmsLowLimit   = dictMode == ZSTD_dictMatchState ? dms->window.lowLimit : 0;
20699     U32         const dmsIndexDelta = dictMode == ZSTD_dictMatchState ? windowLow - dmsHighLimit : 0;
20700     U32         const dmsHashLog    = dictMode == ZSTD_dictMatchState ? dmsCParams->hashLog : hashLog;
20701     U32         const dmsBtLog      = dictMode == ZSTD_dictMatchState ? dmsCParams->chainLog - 1 : btLog;
20702     U32         const dmsBtMask     = dictMode == ZSTD_dictMatchState ? (1U << dmsBtLog) - 1 : 0;
20703     U32         const dmsBtLow      = dictMode == ZSTD_dictMatchState && dmsBtMask < dmsHighLimit - dmsLowLimit ? dmsHighLimit - dmsBtMask : dmsLowLimit;
20704
20705     size_t bestLength = lengthToBeat-1;
20706     DEBUGLOG(8, "ZSTD_insertBtAndGetAllMatches: current=%u", current);
20707
20708     /* check repCode */
20709     assert(ll0 <= 1);   /* necessarily 1 or 0 */
20710     {   U32 const lastR = ZSTD_REP_NUM + ll0;
20711         U32 repCode;
20712         for (repCode = ll0; repCode < lastR; repCode++) {
20713             U32 const repOffset = (repCode==ZSTD_REP_NUM) ? (rep[0] - 1) : rep[repCode];
20714             U32 const repIndex = current - repOffset;
20715             U32 repLen = 0;
20716             assert(current >= dictLimit);
20717             if (repOffset-1 /* intentional overflow, discards 0 and -1 */ < current-dictLimit) {  /* equivalent to `current > repIndex >= dictLimit` */
20718                 /* We must validate the repcode offset because when we're using a dictionary the
20719                  * valid offset range shrinks when the dictionary goes out of bounds.
20720                  */
20721                 if ((repIndex >= windowLow) & (ZSTD_readMINMATCH(ip, minMatch) == ZSTD_readMINMATCH(ip - repOffset, minMatch))) {
20722                     repLen = (U32)ZSTD_count(ip+minMatch, ip+minMatch-repOffset, iLimit) + minMatch;
20723                 }
20724             } else {  /* repIndex < dictLimit || repIndex >= current */
20725                 const BYTE* const repMatch = dictMode == ZSTD_dictMatchState ?
20726                                              dmsBase + repIndex - dmsIndexDelta :
20727                                              dictBase + repIndex;
20728                 assert(current >= windowLow);
20729                 if ( dictMode == ZSTD_extDict
20730                   && ( ((repOffset-1) /*intentional overflow*/ < current - windowLow)  /* equivalent to `current > repIndex >= windowLow` */
20731                      & (((U32)((dictLimit-1) - repIndex) >= 3) ) /* intentional overflow : do not test positions overlapping 2 memory segments */)
20732                   && (ZSTD_readMINMATCH(ip, minMatch) == ZSTD_readMINMATCH(repMatch, minMatch)) ) {
20733                     repLen = (U32)ZSTD_count_2segments(ip+minMatch, repMatch+minMatch, iLimit, dictEnd, prefixStart) + minMatch;
20734                 }
20735                 if (dictMode == ZSTD_dictMatchState
20736                   && ( ((repOffset-1) /*intentional overflow*/ < current - (dmsLowLimit + dmsIndexDelta))  /* equivalent to `current > repIndex >= dmsLowLimit` */
20737                      & ((U32)((dictLimit-1) - repIndex) >= 3) ) /* intentional overflow : do not test positions overlapping 2 memory segments */
20738                   && (ZSTD_readMINMATCH(ip, minMatch) == ZSTD_readMINMATCH(repMatch, minMatch)) ) {
20739                     repLen = (U32)ZSTD_count_2segments(ip+minMatch, repMatch+minMatch, iLimit, dmsEnd, prefixStart) + minMatch;
20740             }   }
20741             /* save longer solution */
20742             if (repLen > bestLength) {
20743                 DEBUGLOG(8, "found repCode %u (ll0:%u, offset:%u) of length %u",
20744                             repCode, ll0, repOffset, repLen);
20745                 bestLength = repLen;
20746                 matches[mnum].off = repCode - ll0;
20747                 matches[mnum].len = (U32)repLen;
20748                 mnum++;
20749                 if ( (repLen > sufficient_len)
20750                    | (ip+repLen == iLimit) ) {  /* best possible */
20751                     return mnum;
20752     }   }   }   }
20753
20754     /* HC3 match finder */
20755     if ((mls == 3) /*static*/ && (bestLength < mls)) {
20756         U32 const matchIndex3 = ZSTD_insertAndFindFirstIndexHash3(ms, nextToUpdate3, ip);
20757         if ((matchIndex3 >= matchLow)
20758           & (current - matchIndex3 < (1<<18)) /*heuristic : longer distance likely too expensive*/ ) {
20759             size_t mlen;
20760             if ((dictMode == ZSTD_noDict) /*static*/ || (dictMode == ZSTD_dictMatchState) /*static*/ || (matchIndex3 >= dictLimit)) {
20761                 const BYTE* const match = base + matchIndex3;
20762                 mlen = ZSTD_count(ip, match, iLimit);
20763             } else {
20764                 const BYTE* const match = dictBase + matchIndex3;
20765                 mlen = ZSTD_count_2segments(ip, match, iLimit, dictEnd, prefixStart);
20766             }
20767
20768             /* save best solution */
20769             if (mlen >= mls /* == 3 > bestLength */) {
20770                 DEBUGLOG(8, "found small match with hlog3, of length %u",
20771                             (U32)mlen);
20772                 bestLength = mlen;
20773                 assert(current > matchIndex3);
20774                 assert(mnum==0);  /* no prior solution */
20775                 matches[0].off = (current - matchIndex3) + ZSTD_REP_MOVE;
20776                 matches[0].len = (U32)mlen;
20777                 mnum = 1;
20778                 if ( (mlen > sufficient_len) |
20779                      (ip+mlen == iLimit) ) {  /* best possible length */
20780                     ms->nextToUpdate = current+1;  /* skip insertion */
20781                     return 1;
20782         }   }   }
20783         /* no dictMatchState lookup: dicts don't have a populated HC3 table */
20784     }
20785
20786     hashTable[h] = current;   /* Update Hash Table */
20787
20788     while (nbCompares-- && (matchIndex >= matchLow)) {
20789         U32* const nextPtr = bt + 2*(matchIndex & btMask);
20790         const BYTE* match;
20791         size_t matchLength = MIN(commonLengthSmaller, commonLengthLarger);   /* guaranteed minimum nb of common bytes */
20792         assert(current > matchIndex);
20793
20794         if ((dictMode == ZSTD_noDict) || (dictMode == ZSTD_dictMatchState) || (matchIndex+matchLength >= dictLimit)) {
20795             assert(matchIndex+matchLength >= dictLimit);  /* ensure the condition is correct when !extDict */
20796             match = base + matchIndex;
20797             if (matchIndex >= dictLimit) assert(memcmp(match, ip, matchLength) == 0);  /* ensure early section of match is equal as expected */
20798             matchLength += ZSTD_count(ip+matchLength, match+matchLength, iLimit);
20799         } else {
20800             match = dictBase + matchIndex;
20801             assert(memcmp(match, ip, matchLength) == 0);  /* ensure early section of match is equal as expected */
20802             matchLength += ZSTD_count_2segments(ip+matchLength, match+matchLength, iLimit, dictEnd, prefixStart);
20803             if (matchIndex+matchLength >= dictLimit)
20804                 match = base + matchIndex;   /* prepare for match[matchLength] read */
20805         }
20806
20807         if (matchLength > bestLength) {
20808             DEBUGLOG(8, "found match of length %u at distance %u (offCode=%u)",
20809                     (U32)matchLength, current - matchIndex, current - matchIndex + ZSTD_REP_MOVE);
20810             assert(matchEndIdx > matchIndex);
20811             if (matchLength > matchEndIdx - matchIndex)
20812                 matchEndIdx = matchIndex + (U32)matchLength;
20813             bestLength = matchLength;
20814             matches[mnum].off = (current - matchIndex) + ZSTD_REP_MOVE;
20815             matches[mnum].len = (U32)matchLength;
20816             mnum++;
20817             if ( (matchLength > ZSTD_OPT_NUM)
20818                | (ip+matchLength == iLimit) /* equal : no way to know if inf or sup */) {
20819                 if (dictMode == ZSTD_dictMatchState) nbCompares = 0; /* break should also skip searching dms */
20820                 break; /* drop, to preserve bt consistency (miss a little bit of compression) */
20821             }
20822         }
20823
20824         if (match[matchLength] < ip[matchLength]) {
20825             /* match smaller than current */
20826             *smallerPtr = matchIndex;             /* update smaller idx */
20827             commonLengthSmaller = matchLength;    /* all smaller will now have at least this guaranteed common length */
20828             if (matchIndex <= btLow) { smallerPtr=&dummy32; break; }   /* beyond tree size, stop the search */
20829             smallerPtr = nextPtr+1;               /* new candidate => larger than match, which was smaller than current */
20830             matchIndex = nextPtr[1];              /* new matchIndex, larger than previous, closer to current */
20831         } else {
20832             *largerPtr = matchIndex;
20833             commonLengthLarger = matchLength;
20834             if (matchIndex <= btLow) { largerPtr=&dummy32; break; }   /* beyond tree size, stop the search */
20835             largerPtr = nextPtr;
20836             matchIndex = nextPtr[0];
20837     }   }
20838
20839     *smallerPtr = *largerPtr = 0;
20840
20841     if (dictMode == ZSTD_dictMatchState && nbCompares) {
20842         size_t const dmsH = ZSTD_hashPtr(ip, dmsHashLog, mls);
20843         U32 dictMatchIndex = dms->hashTable[dmsH];
20844         const U32* const dmsBt = dms->chainTable;
20845         commonLengthSmaller = commonLengthLarger = 0;
20846         while (nbCompares-- && (dictMatchIndex > dmsLowLimit)) {
20847             const U32* const nextPtr = dmsBt + 2*(dictMatchIndex & dmsBtMask);
20848             size_t matchLength = MIN(commonLengthSmaller, commonLengthLarger);   /* guaranteed minimum nb of common bytes */
20849             const BYTE* match = dmsBase + dictMatchIndex;
20850             matchLength += ZSTD_count_2segments(ip+matchLength, match+matchLength, iLimit, dmsEnd, prefixStart);
20851             if (dictMatchIndex+matchLength >= dmsHighLimit)
20852                 match = base + dictMatchIndex + dmsIndexDelta;   /* to prepare for next usage of match[matchLength] */
20853
20854             if (matchLength > bestLength) {
20855                 matchIndex = dictMatchIndex + dmsIndexDelta;
20856                 DEBUGLOG(8, "found dms match of length %u at distance %u (offCode=%u)",
20857                         (U32)matchLength, current - matchIndex, current - matchIndex + ZSTD_REP_MOVE);
20858                 if (matchLength > matchEndIdx - matchIndex)
20859                     matchEndIdx = matchIndex + (U32)matchLength;
20860                 bestLength = matchLength;
20861                 matches[mnum].off = (current - matchIndex) + ZSTD_REP_MOVE;
20862                 matches[mnum].len = (U32)matchLength;
20863                 mnum++;
20864                 if ( (matchLength > ZSTD_OPT_NUM)
20865                    | (ip+matchLength == iLimit) /* equal : no way to know if inf or sup */) {
20866                     break;   /* drop, to guarantee consistency (miss a little bit of compression) */
20867                 }
20868             }
20869
20870             if (dictMatchIndex <= dmsBtLow) { break; }   /* beyond tree size, stop the search */
20871             if (match[matchLength] < ip[matchLength]) {
20872                 commonLengthSmaller = matchLength;    /* all smaller will now have at least this guaranteed common length */
20873                 dictMatchIndex = nextPtr[1];              /* new matchIndex larger than previous (closer to current) */
20874             } else {
20875                 /* match is larger than current */
20876                 commonLengthLarger = matchLength;
20877                 dictMatchIndex = nextPtr[0];
20878             }
20879         }
20880     }
20881
20882     assert(matchEndIdx > current+8);
20883     ms->nextToUpdate = matchEndIdx - 8;  /* skip repetitive patterns */
20884     return mnum;
20885 }
20886
20887
20888 FORCE_INLINE_TEMPLATE U32 ZSTD_BtGetAllMatches (
20889                         ZSTD_match_t* matches,   /* store result (match found, increasing size) in this table */
20890                         ZSTD_matchState_t* ms,
20891                         U32* nextToUpdate3,
20892                         const BYTE* ip, const BYTE* const iHighLimit, const ZSTD_dictMode_e dictMode,
20893                         const U32 rep[ZSTD_REP_NUM],
20894                         U32 const ll0,
20895                         U32 const lengthToBeat)
20896 {
20897     const ZSTD_compressionParameters* const cParams = &ms->cParams;
20898     U32 const matchLengthSearch = cParams->minMatch;
20899     DEBUGLOG(8, "ZSTD_BtGetAllMatches");
20900     if (ip < ms->window.base + ms->nextToUpdate) return 0;   /* skipped area */
20901     ZSTD_updateTree_internal(ms, ip, iHighLimit, matchLengthSearch, dictMode);
20902     switch(matchLengthSearch)
20903     {
20904     case 3 : return ZSTD_insertBtAndGetAllMatches(matches, ms, nextToUpdate3, ip, iHighLimit, dictMode, rep, ll0, lengthToBeat, 3);
20905     default :
20906     case 4 : return ZSTD_insertBtAndGetAllMatches(matches, ms, nextToUpdate3, ip, iHighLimit, dictMode, rep, ll0, lengthToBeat, 4);
20907     case 5 : return ZSTD_insertBtAndGetAllMatches(matches, ms, nextToUpdate3, ip, iHighLimit, dictMode, rep, ll0, lengthToBeat, 5);
20908     case 7 :
20909     case 6 : return ZSTD_insertBtAndGetAllMatches(matches, ms, nextToUpdate3, ip, iHighLimit, dictMode, rep, ll0, lengthToBeat, 6);
20910     }
20911 }
20912
20913
20914 /*-*******************************
20915 *  Optimal parser
20916 *********************************/
20917
20918
20919 static U32 ZSTD_totalLen(ZSTD_optimal_t sol)
20920 {
20921     return sol.litlen + sol.mlen;
20922 }
20923
20924 #if 0 /* debug */
20925
20926 static void
20927 listStats(const U32* table, int lastEltID)
20928 {
20929     int const nbElts = lastEltID + 1;
20930     int enb;
20931     for (enb=0; enb < nbElts; enb++) {
20932         (void)table;
20933         /* RAWLOG(2, "%3i:%3i,  ", enb, table[enb]); */
20934         RAWLOG(2, "%4i,", table[enb]);
20935     }
20936     RAWLOG(2, " \n");
20937 }
20938
20939 #endif
20940
20941 FORCE_INLINE_TEMPLATE size_t
20942 ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms,
20943                                seqStore_t* seqStore,
20944                                U32 rep[ZSTD_REP_NUM],
20945                          const void* src, size_t srcSize,
20946                          const int optLevel,
20947                          const ZSTD_dictMode_e dictMode)
20948 {
20949     optState_t* const optStatePtr = &ms->opt;
20950     const BYTE* const istart = (const BYTE*)src;
20951     const BYTE* ip = istart;
20952     const BYTE* anchor = istart;
20953     const BYTE* const iend = istart + srcSize;
20954     const BYTE* const ilimit = iend - 8;
20955     const BYTE* const base = ms->window.base;
20956     const BYTE* const prefixStart = base + ms->window.dictLimit;
20957     const ZSTD_compressionParameters* const cParams = &ms->cParams;
20958
20959     U32 const sufficient_len = MIN(cParams->targetLength, ZSTD_OPT_NUM -1);
20960     U32 const minMatch = (cParams->minMatch == 3) ? 3 : 4;
20961     U32 nextToUpdate3 = ms->nextToUpdate;
20962
20963     ZSTD_optimal_t* const opt = optStatePtr->priceTable;
20964     ZSTD_match_t* const matches = optStatePtr->matchTable;
20965     ZSTD_optimal_t lastSequence;
20966
20967     /* init */
20968     DEBUGLOG(5, "ZSTD_compressBlock_opt_generic: current=%u, prefix=%u, nextToUpdate=%u",
20969                 (U32)(ip - base), ms->window.dictLimit, ms->nextToUpdate);
20970     assert(optLevel <= 2);
20971     ZSTD_rescaleFreqs(optStatePtr, (const BYTE*)src, srcSize, optLevel);
20972     ip += (ip==prefixStart);
20973
20974     /* Match Loop */
20975     while (ip < ilimit) {
20976         U32 cur, last_pos = 0;
20977
20978         /* find first match */
20979         {   U32 const litlen = (U32)(ip - anchor);
20980             U32 const ll0 = !litlen;
20981             U32 const nbMatches = ZSTD_BtGetAllMatches(matches, ms, &nextToUpdate3, ip, iend, dictMode, rep, ll0, minMatch);
20982             if (!nbMatches) { ip++; continue; }
20983
20984             /* initialize opt[0] */
20985             { U32 i ; for (i=0; i<ZSTD_REP_NUM; i++) opt[0].rep[i] = rep[i]; }
20986             opt[0].mlen = 0;  /* means is_a_literal */
20987             opt[0].litlen = litlen;
20988             /* We don't need to include the actual price of the literals because
20989              * it is static for the duration of the forward pass, and is included
20990              * in every price. We include the literal length to avoid negative
20991              * prices when we subtract the previous literal length.
20992              */
20993             opt[0].price = ZSTD_litLengthPrice(litlen, optStatePtr, optLevel);
20994
20995             /* large match -> immediate encoding */
20996             {   U32 const maxML = matches[nbMatches-1].len;
20997                 U32 const maxOffset = matches[nbMatches-1].off;
20998                 DEBUGLOG(6, "found %u matches of maxLength=%u and maxOffCode=%u at cPos=%u => start new series",
20999                             nbMatches, maxML, maxOffset, (U32)(ip-prefixStart));
21000
21001                 if (maxML > sufficient_len) {
21002                     lastSequence.litlen = litlen;
21003                     lastSequence.mlen = maxML;
21004                     lastSequence.off = maxOffset;
21005                     DEBUGLOG(6, "large match (%u>%u), immediate encoding",
21006                                 maxML, sufficient_len);
21007                     cur = 0;
21008                     last_pos = ZSTD_totalLen(lastSequence);
21009                     goto _shortestPath;
21010             }   }
21011
21012             /* set prices for first matches starting position == 0 */
21013             {   U32 const literalsPrice = opt[0].price + ZSTD_litLengthPrice(0, optStatePtr, optLevel);
21014                 U32 pos;
21015                 U32 matchNb;
21016                 for (pos = 1; pos < minMatch; pos++) {
21017                     opt[pos].price = ZSTD_MAX_PRICE;   /* mlen, litlen and price will be fixed during forward scanning */
21018                 }
21019                 for (matchNb = 0; matchNb < nbMatches; matchNb++) {
21020                     U32 const offset = matches[matchNb].off;
21021                     U32 const end = matches[matchNb].len;
21022                     for ( ; pos <= end ; pos++ ) {
21023                         U32 const matchPrice = ZSTD_getMatchPrice(offset, pos, optStatePtr, optLevel);
21024                         U32 const sequencePrice = literalsPrice + matchPrice;
21025                         DEBUGLOG(7, "rPos:%u => set initial price : %.2f",
21026                                     pos, ZSTD_fCost(sequencePrice));
21027                         opt[pos].mlen = pos;
21028                         opt[pos].off = offset;
21029                         opt[pos].litlen = litlen;
21030                         opt[pos].price = sequencePrice;
21031                 }   }
21032                 last_pos = pos-1;
21033             }
21034         }
21035
21036         /* check further positions */
21037         for (cur = 1; cur <= last_pos; cur++) {
21038             const BYTE* const inr = ip + cur;
21039             assert(cur < ZSTD_OPT_NUM);
21040             DEBUGLOG(7, "cPos:%zi==rPos:%u", inr-istart, cur)
21041
21042             /* Fix current position with one literal if cheaper */
21043             {   U32 const litlen = (opt[cur-1].mlen == 0) ? opt[cur-1].litlen + 1 : 1;
21044                 int const price = opt[cur-1].price
21045                                 + ZSTD_rawLiteralsCost(ip+cur-1, 1, optStatePtr, optLevel)
21046                                 + ZSTD_litLengthPrice(litlen, optStatePtr, optLevel)
21047                                 - ZSTD_litLengthPrice(litlen-1, optStatePtr, optLevel);
21048                 assert(price < 1000000000); /* overflow check */
21049                 if (price <= opt[cur].price) {
21050                     DEBUGLOG(7, "cPos:%zi==rPos:%u : better price (%.2f<=%.2f) using literal (ll==%u) (hist:%u,%u,%u)",
21051                                 inr-istart, cur, ZSTD_fCost(price), ZSTD_fCost(opt[cur].price), litlen,
21052                                 opt[cur-1].rep[0], opt[cur-1].rep[1], opt[cur-1].rep[2]);
21053                     opt[cur].mlen = 0;
21054                     opt[cur].off = 0;
21055                     opt[cur].litlen = litlen;
21056                     opt[cur].price = price;
21057                 } else {
21058                     DEBUGLOG(7, "cPos:%zi==rPos:%u : literal would cost more (%.2f>%.2f) (hist:%u,%u,%u)",
21059                                 inr-istart, cur, ZSTD_fCost(price), ZSTD_fCost(opt[cur].price),
21060                                 opt[cur].rep[0], opt[cur].rep[1], opt[cur].rep[2]);
21061                 }
21062             }
21063
21064             /* Set the repcodes of the current position. We must do it here
21065              * because we rely on the repcodes of the 2nd to last sequence being
21066              * correct to set the next chunks repcodes during the backward
21067              * traversal.
21068              */
21069             ZSTD_STATIC_ASSERT(sizeof(opt[cur].rep) == sizeof(repcodes_t));
21070             assert(cur >= opt[cur].mlen);
21071             if (opt[cur].mlen != 0) {
21072                 U32 const prev = cur - opt[cur].mlen;
21073                 repcodes_t newReps = ZSTD_updateRep(opt[prev].rep, opt[cur].off, opt[cur].litlen==0);
21074                 memcpy(opt[cur].rep, &newReps, sizeof(repcodes_t));
21075             } else {
21076                 memcpy(opt[cur].rep, opt[cur - 1].rep, sizeof(repcodes_t));
21077             }
21078
21079             /* last match must start at a minimum distance of 8 from oend */
21080             if (inr > ilimit) continue;
21081
21082             if (cur == last_pos) break;
21083
21084             if ( (optLevel==0) /*static_test*/
21085               && (opt[cur+1].price <= opt[cur].price + (BITCOST_MULTIPLIER/2)) ) {
21086                 DEBUGLOG(7, "move to next rPos:%u : price is <=", cur+1);
21087                 continue;  /* skip unpromising positions; about ~+6% speed, -0.01 ratio */
21088             }
21089
21090             {   U32 const ll0 = (opt[cur].mlen != 0);
21091                 U32 const litlen = (opt[cur].mlen == 0) ? opt[cur].litlen : 0;
21092                 U32 const previousPrice = opt[cur].price;
21093                 U32 const basePrice = previousPrice + ZSTD_litLengthPrice(0, optStatePtr, optLevel);
21094                 U32 const nbMatches = ZSTD_BtGetAllMatches(matches, ms, &nextToUpdate3, inr, iend, dictMode, opt[cur].rep, ll0, minMatch);
21095                 U32 matchNb;
21096                 if (!nbMatches) {
21097                     DEBUGLOG(7, "rPos:%u : no match found", cur);
21098                     continue;
21099                 }
21100
21101                 {   U32 const maxML = matches[nbMatches-1].len;
21102                     DEBUGLOG(7, "cPos:%zi==rPos:%u, found %u matches, of maxLength=%u",
21103                                 inr-istart, cur, nbMatches, maxML);
21104
21105                     if ( (maxML > sufficient_len)
21106                       || (cur + maxML >= ZSTD_OPT_NUM) ) {
21107                         lastSequence.mlen = maxML;
21108                         lastSequence.off = matches[nbMatches-1].off;
21109                         lastSequence.litlen = litlen;
21110                         cur -= (opt[cur].mlen==0) ? opt[cur].litlen : 0;  /* last sequence is actually only literals, fix cur to last match - note : may underflow, in which case, it's first sequence, and it's okay */
21111                         last_pos = cur + ZSTD_totalLen(lastSequence);
21112                         if (cur > ZSTD_OPT_NUM) cur = 0;   /* underflow => first match */
21113                         goto _shortestPath;
21114                 }   }
21115
21116                 /* set prices using matches found at position == cur */
21117                 for (matchNb = 0; matchNb < nbMatches; matchNb++) {
21118                     U32 const offset = matches[matchNb].off;
21119                     U32 const lastML = matches[matchNb].len;
21120                     U32 const startML = (matchNb>0) ? matches[matchNb-1].len+1 : minMatch;
21121                     U32 mlen;
21122
21123                     DEBUGLOG(7, "testing match %u => offCode=%4u, mlen=%2u, llen=%2u",
21124                                 matchNb, matches[matchNb].off, lastML, litlen);
21125
21126                     for (mlen = lastML; mlen >= startML; mlen--) {  /* scan downward */
21127                         U32 const pos = cur + mlen;
21128                         int const price = basePrice + ZSTD_getMatchPrice(offset, mlen, optStatePtr, optLevel);
21129
21130                         if ((pos > last_pos) || (price < opt[pos].price)) {
21131                             DEBUGLOG(7, "rPos:%u (ml=%2u) => new better price (%.2f<%.2f)",
21132                                         pos, mlen, ZSTD_fCost(price), ZSTD_fCost(opt[pos].price));
21133                             while (last_pos < pos) { opt[last_pos+1].price = ZSTD_MAX_PRICE; last_pos++; }   /* fill empty positions */
21134                             opt[pos].mlen = mlen;
21135                             opt[pos].off = offset;
21136                             opt[pos].litlen = litlen;
21137                             opt[pos].price = price;
21138                         } else {
21139                             DEBUGLOG(7, "rPos:%u (ml=%2u) => new price is worse (%.2f>=%.2f)",
21140                                         pos, mlen, ZSTD_fCost(price), ZSTD_fCost(opt[pos].price));
21141                             if (optLevel==0) break;  /* early update abort; gets ~+10% speed for about -0.01 ratio loss */
21142                         }
21143             }   }   }
21144         }  /* for (cur = 1; cur <= last_pos; cur++) */
21145
21146         lastSequence = opt[last_pos];
21147         cur = last_pos > ZSTD_totalLen(lastSequence) ? last_pos - ZSTD_totalLen(lastSequence) : 0;  /* single sequence, and it starts before `ip` */
21148         assert(cur < ZSTD_OPT_NUM);  /* control overflow*/
21149
21150 _shortestPath:   /* cur, last_pos, best_mlen, best_off have to be set */
21151         assert(opt[0].mlen == 0);
21152
21153         /* Set the next chunk's repcodes based on the repcodes of the beginning
21154          * of the last match, and the last sequence. This avoids us having to
21155          * update them while traversing the sequences.
21156          */
21157         if (lastSequence.mlen != 0) {
21158             repcodes_t reps = ZSTD_updateRep(opt[cur].rep, lastSequence.off, lastSequence.litlen==0);
21159             memcpy(rep, &reps, sizeof(reps));
21160         } else {
21161             memcpy(rep, opt[cur].rep, sizeof(repcodes_t));
21162         }
21163
21164         {   U32 const storeEnd = cur + 1;
21165             U32 storeStart = storeEnd;
21166             U32 seqPos = cur;
21167
21168             DEBUGLOG(6, "start reverse traversal (last_pos:%u, cur:%u)",
21169                         last_pos, cur); (void)last_pos;
21170             assert(storeEnd < ZSTD_OPT_NUM);
21171             DEBUGLOG(6, "last sequence copied into pos=%u (llen=%u,mlen=%u,ofc=%u)",
21172                         storeEnd, lastSequence.litlen, lastSequence.mlen, lastSequence.off);
21173             opt[storeEnd] = lastSequence;
21174             while (seqPos > 0) {
21175                 U32 const backDist = ZSTD_totalLen(opt[seqPos]);
21176                 storeStart--;
21177                 DEBUGLOG(6, "sequence from rPos=%u copied into pos=%u (llen=%u,mlen=%u,ofc=%u)",
21178                             seqPos, storeStart, opt[seqPos].litlen, opt[seqPos].mlen, opt[seqPos].off);
21179                 opt[storeStart] = opt[seqPos];
21180                 seqPos = (seqPos > backDist) ? seqPos - backDist : 0;
21181             }
21182
21183             /* save sequences */
21184             DEBUGLOG(6, "sending selected sequences into seqStore")
21185             {   U32 storePos;
21186                 for (storePos=storeStart; storePos <= storeEnd; storePos++) {
21187                     U32 const llen = opt[storePos].litlen;
21188                     U32 const mlen = opt[storePos].mlen;
21189                     U32 const offCode = opt[storePos].off;
21190                     U32 const advance = llen + mlen;
21191                     DEBUGLOG(6, "considering seq starting at %zi, llen=%u, mlen=%u",
21192                                 anchor - istart, (unsigned)llen, (unsigned)mlen);
21193
21194                     if (mlen==0) {  /* only literals => must be last "sequence", actually starting a new stream of sequences */
21195                         assert(storePos == storeEnd);   /* must be last sequence */
21196                         ip = anchor + llen;     /* last "sequence" is a bunch of literals => don't progress anchor */
21197                         continue;   /* will finish */
21198                     }
21199
21200                     assert(anchor + llen <= iend);
21201                     ZSTD_updateStats(optStatePtr, llen, anchor, offCode, mlen);
21202                     ZSTD_storeSeq(seqStore, llen, anchor, iend, offCode, mlen-MINMATCH);
21203                     anchor += advance;
21204                     ip = anchor;
21205             }   }
21206             ZSTD_setBasePrices(optStatePtr, optLevel);
21207         }
21208     }   /* while (ip < ilimit) */
21209
21210     /* Return the last literals size */
21211     return (size_t)(iend - anchor);
21212 }
21213
21214
21215 size_t ZSTD_compressBlock_btopt(
21216         ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
21217         const void* src, size_t srcSize)
21218 {
21219     DEBUGLOG(5, "ZSTD_compressBlock_btopt");
21220     return ZSTD_compressBlock_opt_generic(ms, seqStore, rep, src, srcSize, 0 /*optLevel*/, ZSTD_noDict);
21221 }
21222
21223
21224 /* used in 2-pass strategy */
21225 static U32 ZSTD_upscaleStat(unsigned* table, U32 lastEltIndex, int bonus)
21226 {
21227     U32 s, sum=0;
21228     assert(ZSTD_FREQ_DIV+bonus >= 0);
21229     for (s=0; s<lastEltIndex+1; s++) {
21230         table[s] <<= ZSTD_FREQ_DIV+bonus;
21231         table[s]--;
21232         sum += table[s];
21233     }
21234     return sum;
21235 }
21236
21237 /* used in 2-pass strategy */
21238 MEM_STATIC void ZSTD_upscaleStats(optState_t* optPtr)
21239 {
21240     if (ZSTD_compressedLiterals(optPtr))
21241         optPtr->litSum = ZSTD_upscaleStat(optPtr->litFreq, MaxLit, 0);
21242     optPtr->litLengthSum = ZSTD_upscaleStat(optPtr->litLengthFreq, MaxLL, 0);
21243     optPtr->matchLengthSum = ZSTD_upscaleStat(optPtr->matchLengthFreq, MaxML, 0);
21244     optPtr->offCodeSum = ZSTD_upscaleStat(optPtr->offCodeFreq, MaxOff, 0);
21245 }
21246
21247 /* ZSTD_initStats_ultra():
21248  * make a first compression pass, just to seed stats with more accurate starting values.
21249  * only works on first block, with no dictionary and no ldm.
21250  * this function cannot error, hence its contract must be respected.
21251  */
21252 static void
21253 ZSTD_initStats_ultra(ZSTD_matchState_t* ms,
21254                      seqStore_t* seqStore,
21255                      U32 rep[ZSTD_REP_NUM],
21256                const void* src, size_t srcSize)
21257 {
21258     U32 tmpRep[ZSTD_REP_NUM];  /* updated rep codes will sink here */
21259     memcpy(tmpRep, rep, sizeof(tmpRep));
21260
21261     DEBUGLOG(4, "ZSTD_initStats_ultra (srcSize=%zu)", srcSize);
21262     assert(ms->opt.litLengthSum == 0);    /* first block */
21263     assert(seqStore->sequences == seqStore->sequencesStart);   /* no ldm */
21264     assert(ms->window.dictLimit == ms->window.lowLimit);   /* no dictionary */
21265     assert(ms->window.dictLimit - ms->nextToUpdate <= 1);  /* no prefix (note: intentional overflow, defined as 2-complement) */
21266
21267     ZSTD_compressBlock_opt_generic(ms, seqStore, tmpRep, src, srcSize, 2 /*optLevel*/, ZSTD_noDict);   /* generate stats into ms->opt*/
21268
21269     /* invalidate first scan from history */
21270     ZSTD_resetSeqStore(seqStore);
21271     ms->window.base -= srcSize;
21272     ms->window.dictLimit += (U32)srcSize;
21273     ms->window.lowLimit = ms->window.dictLimit;
21274     ms->nextToUpdate = ms->window.dictLimit;
21275
21276     /* re-inforce weight of collected statistics */
21277     ZSTD_upscaleStats(&ms->opt);
21278 }
21279
21280 size_t ZSTD_compressBlock_btultra(
21281         ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
21282         const void* src, size_t srcSize)
21283 {
21284     DEBUGLOG(5, "ZSTD_compressBlock_btultra (srcSize=%zu)", srcSize);
21285     return ZSTD_compressBlock_opt_generic(ms, seqStore, rep, src, srcSize, 2 /*optLevel*/, ZSTD_noDict);
21286 }
21287
21288 size_t ZSTD_compressBlock_btultra2(
21289         ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
21290         const void* src, size_t srcSize)
21291 {
21292     U32 const current = (U32)((const BYTE*)src - ms->window.base);
21293     DEBUGLOG(5, "ZSTD_compressBlock_btultra2 (srcSize=%zu)", srcSize);
21294
21295     /* 2-pass strategy:
21296      * this strategy makes a first pass over first block to collect statistics
21297      * and seed next round's statistics with it.
21298      * After 1st pass, function forgets everything, and starts a new block.
21299      * Consequently, this can only work if no data has been previously loaded in tables,
21300      * aka, no dictionary, no prefix, no ldm preprocessing.
21301      * The compression ratio gain is generally small (~0.5% on first block),
21302      * the cost is 2x cpu time on first block. */
21303     assert(srcSize <= ZSTD_BLOCKSIZE_MAX);
21304     if ( (ms->opt.litLengthSum==0)   /* first block */
21305       && (seqStore->sequences == seqStore->sequencesStart)  /* no ldm */
21306       && (ms->window.dictLimit == ms->window.lowLimit)   /* no dictionary */
21307       && (current == ms->window.dictLimit)   /* start of frame, nothing already loaded nor skipped */
21308       && (srcSize > ZSTD_PREDEF_THRESHOLD)
21309       ) {
21310         ZSTD_initStats_ultra(ms, seqStore, rep, src, srcSize);
21311     }
21312
21313     return ZSTD_compressBlock_opt_generic(ms, seqStore, rep, src, srcSize, 2 /*optLevel*/, ZSTD_noDict);
21314 }
21315
21316 size_t ZSTD_compressBlock_btopt_dictMatchState(
21317         ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
21318         const void* src, size_t srcSize)
21319 {
21320     return ZSTD_compressBlock_opt_generic(ms, seqStore, rep, src, srcSize, 0 /*optLevel*/, ZSTD_dictMatchState);
21321 }
21322
21323 size_t ZSTD_compressBlock_btultra_dictMatchState(
21324         ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
21325         const void* src, size_t srcSize)
21326 {
21327     return ZSTD_compressBlock_opt_generic(ms, seqStore, rep, src, srcSize, 2 /*optLevel*/, ZSTD_dictMatchState);
21328 }
21329
21330 size_t ZSTD_compressBlock_btopt_extDict(
21331         ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
21332         const void* src, size_t srcSize)
21333 {
21334     return ZSTD_compressBlock_opt_generic(ms, seqStore, rep, src, srcSize, 0 /*optLevel*/, ZSTD_extDict);
21335 }
21336
21337 size_t ZSTD_compressBlock_btultra_extDict(
21338         ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
21339         const void* src, size_t srcSize)
21340 {
21341     return ZSTD_compressBlock_opt_generic(ms, seqStore, rep, src, srcSize, 2 /*optLevel*/, ZSTD_extDict);
21342 }
21343
21344 /* note : no btultra2 variant for extDict nor dictMatchState,
21345  * because btultra2 is not meant to work with dictionaries
21346  * and is only specific for the first block (no prefix) */
21347 /**** ended inlining compress/zstd_opt.c ****/
21348
21349 /**** start inlining decompress/huf_decompress.c ****/
21350 /* ******************************************************************
21351  * huff0 huffman decoder,
21352  * part of Finite State Entropy library
21353  * Copyright (c) 2013-2020, Yann Collet, Facebook, Inc.
21354  *
21355  *  You can contact the author at :
21356  *  - FSE+HUF source repository : https://github.com/Cyan4973/FiniteStateEntropy
21357  *
21358  * This source code is licensed under both the BSD-style license (found in the
21359  * LICENSE file in the root directory of this source tree) and the GPLv2 (found
21360  * in the COPYING file in the root directory of this source tree).
21361  * You may select, at your option, one of the above-listed licenses.
21362 ****************************************************************** */
21363
21364 /* **************************************************************
21365 *  Dependencies
21366 ****************************************************************/
21367 #include <string.h>     /* memcpy, memset */
21368 /**** skipping file: ../common/compiler.h ****/
21369 /**** skipping file: ../common/bitstream.h ****/
21370 /**** skipping file: ../common/fse.h ****/
21371 #define HUF_STATIC_LINKING_ONLY
21372 /**** skipping file: ../common/huf.h ****/
21373 /**** skipping file: ../common/error_private.h ****/
21374
21375 /* **************************************************************
21376 *  Macros
21377 ****************************************************************/
21378
21379 /* These two optional macros force the use one way or another of the two
21380  * Huffman decompression implementations. You can't force in both directions
21381  * at the same time.
21382  */
21383 #if defined(HUF_FORCE_DECOMPRESS_X1) && \
21384     defined(HUF_FORCE_DECOMPRESS_X2)
21385 #error "Cannot force the use of the X1 and X2 decoders at the same time!"
21386 #endif
21387
21388
21389 /* **************************************************************
21390 *  Error Management
21391 ****************************************************************/
21392 #define HUF_isError ERR_isError
21393
21394
21395 /* **************************************************************
21396 *  Byte alignment for workSpace management
21397 ****************************************************************/
21398 #define HUF_ALIGN(x, a)         HUF_ALIGN_MASK((x), (a) - 1)
21399 #define HUF_ALIGN_MASK(x, mask) (((x) + (mask)) & ~(mask))
21400
21401
21402 /* **************************************************************
21403 *  BMI2 Variant Wrappers
21404 ****************************************************************/
21405 #if DYNAMIC_BMI2
21406
21407 #define HUF_DGEN(fn)                                                        \
21408                                                                             \
21409     static size_t fn##_default(                                             \
21410                   void* dst,  size_t dstSize,                               \
21411             const void* cSrc, size_t cSrcSize,                              \
21412             const HUF_DTable* DTable)                                       \
21413     {                                                                       \
21414         return fn##_body(dst, dstSize, cSrc, cSrcSize, DTable);             \
21415     }                                                                       \
21416                                                                             \
21417     static TARGET_ATTRIBUTE("bmi2") size_t fn##_bmi2(                       \
21418                   void* dst,  size_t dstSize,                               \
21419             const void* cSrc, size_t cSrcSize,                              \
21420             const HUF_DTable* DTable)                                       \
21421     {                                                                       \
21422         return fn##_body(dst, dstSize, cSrc, cSrcSize, DTable);             \
21423     }                                                                       \
21424                                                                             \
21425     static size_t fn(void* dst, size_t dstSize, void const* cSrc,           \
21426                      size_t cSrcSize, HUF_DTable const* DTable, int bmi2)   \
21427     {                                                                       \
21428         if (bmi2) {                                                         \
21429             return fn##_bmi2(dst, dstSize, cSrc, cSrcSize, DTable);         \
21430         }                                                                   \
21431         return fn##_default(dst, dstSize, cSrc, cSrcSize, DTable);          \
21432     }
21433
21434 #else
21435
21436 #define HUF_DGEN(fn)                                                        \
21437     static size_t fn(void* dst, size_t dstSize, void const* cSrc,           \
21438                      size_t cSrcSize, HUF_DTable const* DTable, int bmi2)   \
21439     {                                                                       \
21440         (void)bmi2;                                                         \
21441         return fn##_body(dst, dstSize, cSrc, cSrcSize, DTable);             \
21442     }
21443
21444 #endif
21445
21446
21447 /*-***************************/
21448 /*  generic DTableDesc       */
21449 /*-***************************/
21450 typedef struct { BYTE maxTableLog; BYTE tableType; BYTE tableLog; BYTE reserved; } DTableDesc;
21451
21452 static DTableDesc HUF_getDTableDesc(const HUF_DTable* table)
21453 {
21454     DTableDesc dtd;
21455     memcpy(&dtd, table, sizeof(dtd));
21456     return dtd;
21457 }
21458
21459
21460 #ifndef HUF_FORCE_DECOMPRESS_X2
21461
21462 /*-***************************/
21463 /*  single-symbol decoding   */
21464 /*-***************************/
21465 typedef struct { BYTE byte; BYTE nbBits; } HUF_DEltX1;   /* single-symbol decoding */
21466
21467 size_t HUF_readDTableX1_wksp(HUF_DTable* DTable, const void* src, size_t srcSize, void* workSpace, size_t wkspSize)
21468 {
21469     U32 tableLog = 0;
21470     U32 nbSymbols = 0;
21471     size_t iSize;
21472     void* const dtPtr = DTable + 1;
21473     HUF_DEltX1* const dt = (HUF_DEltX1*)dtPtr;
21474
21475     U32* rankVal;
21476     BYTE* huffWeight;
21477     size_t spaceUsed32 = 0;
21478
21479     rankVal = (U32 *)workSpace + spaceUsed32;
21480     spaceUsed32 += HUF_TABLELOG_ABSOLUTEMAX + 1;
21481     huffWeight = (BYTE *)((U32 *)workSpace + spaceUsed32);
21482     spaceUsed32 += HUF_ALIGN(HUF_SYMBOLVALUE_MAX + 1, sizeof(U32)) >> 2;
21483
21484     if ((spaceUsed32 << 2) > wkspSize) return ERROR(tableLog_tooLarge);
21485
21486     DEBUG_STATIC_ASSERT(sizeof(DTableDesc) == sizeof(HUF_DTable));
21487     /* memset(huffWeight, 0, sizeof(huffWeight)); */   /* is not necessary, even though some analyzer complain ... */
21488
21489     iSize = HUF_readStats(huffWeight, HUF_SYMBOLVALUE_MAX + 1, rankVal, &nbSymbols, &tableLog, src, srcSize);
21490     if (HUF_isError(iSize)) return iSize;
21491
21492     /* Table header */
21493     {   DTableDesc dtd = HUF_getDTableDesc(DTable);
21494         if (tableLog > (U32)(dtd.maxTableLog+1)) return ERROR(tableLog_tooLarge);   /* DTable too small, Huffman tree cannot fit in */
21495         dtd.tableType = 0;
21496         dtd.tableLog = (BYTE)tableLog;
21497         memcpy(DTable, &dtd, sizeof(dtd));
21498     }
21499
21500     /* Calculate starting value for each rank */
21501     {   U32 n, nextRankStart = 0;
21502         for (n=1; n<tableLog+1; n++) {
21503             U32 const current = nextRankStart;
21504             nextRankStart += (rankVal[n] << (n-1));
21505             rankVal[n] = current;
21506     }   }
21507
21508     /* fill DTable */
21509     {   U32 n;
21510         size_t const nEnd = nbSymbols;
21511         for (n=0; n<nEnd; n++) {
21512             size_t const w = huffWeight[n];
21513             size_t const length = (1 << w) >> 1;
21514             size_t const uStart = rankVal[w];
21515             size_t const uEnd = uStart + length;
21516             size_t u;
21517             HUF_DEltX1 D;
21518             D.byte = (BYTE)n;
21519             D.nbBits = (BYTE)(tableLog + 1 - w);
21520             rankVal[w] = (U32)uEnd;
21521             if (length < 4) {
21522                 /* Use length in the loop bound so the compiler knows it is short. */
21523                 for (u = 0; u < length; ++u)
21524                     dt[uStart + u] = D;
21525             } else {
21526                 /* Unroll the loop 4 times, we know it is a power of 2. */
21527                 for (u = uStart; u < uEnd; u += 4) {
21528                     dt[u + 0] = D;
21529                     dt[u + 1] = D;
21530                     dt[u + 2] = D;
21531                     dt[u + 3] = D;
21532     }   }   }   }
21533     return iSize;
21534 }
21535
21536 size_t HUF_readDTableX1(HUF_DTable* DTable, const void* src, size_t srcSize)
21537 {
21538     U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32];
21539     return HUF_readDTableX1_wksp(DTable, src, srcSize,
21540                                  workSpace, sizeof(workSpace));
21541 }
21542
21543 FORCE_INLINE_TEMPLATE BYTE
21544 HUF_decodeSymbolX1(BIT_DStream_t* Dstream, const HUF_DEltX1* dt, const U32 dtLog)
21545 {
21546     size_t const val = BIT_lookBitsFast(Dstream, dtLog); /* note : dtLog >= 1 */
21547     BYTE const c = dt[val].byte;
21548     BIT_skipBits(Dstream, dt[val].nbBits);
21549     return c;
21550 }
21551
21552 #define HUF_DECODE_SYMBOLX1_0(ptr, DStreamPtr) \
21553     *ptr++ = HUF_decodeSymbolX1(DStreamPtr, dt, dtLog)
21554
21555 #define HUF_DECODE_SYMBOLX1_1(ptr, DStreamPtr)  \
21556     if (MEM_64bits() || (HUF_TABLELOG_MAX<=12)) \
21557         HUF_DECODE_SYMBOLX1_0(ptr, DStreamPtr)
21558
21559 #define HUF_DECODE_SYMBOLX1_2(ptr, DStreamPtr) \
21560     if (MEM_64bits()) \
21561         HUF_DECODE_SYMBOLX1_0(ptr, DStreamPtr)
21562
21563 HINT_INLINE size_t
21564 HUF_decodeStreamX1(BYTE* p, BIT_DStream_t* const bitDPtr, BYTE* const pEnd, const HUF_DEltX1* const dt, const U32 dtLog)
21565 {
21566     BYTE* const pStart = p;
21567
21568     /* up to 4 symbols at a time */
21569     while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) & (p < pEnd-3)) {
21570         HUF_DECODE_SYMBOLX1_2(p, bitDPtr);
21571         HUF_DECODE_SYMBOLX1_1(p, bitDPtr);
21572         HUF_DECODE_SYMBOLX1_2(p, bitDPtr);
21573         HUF_DECODE_SYMBOLX1_0(p, bitDPtr);
21574     }
21575
21576     /* [0-3] symbols remaining */
21577     if (MEM_32bits())
21578         while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) & (p < pEnd))
21579             HUF_DECODE_SYMBOLX1_0(p, bitDPtr);
21580
21581     /* no more data to retrieve from bitstream, no need to reload */
21582     while (p < pEnd)
21583         HUF_DECODE_SYMBOLX1_0(p, bitDPtr);
21584
21585     return pEnd-pStart;
21586 }
21587
21588 FORCE_INLINE_TEMPLATE size_t
21589 HUF_decompress1X1_usingDTable_internal_body(
21590           void* dst,  size_t dstSize,
21591     const void* cSrc, size_t cSrcSize,
21592     const HUF_DTable* DTable)
21593 {
21594     BYTE* op = (BYTE*)dst;
21595     BYTE* const oend = op + dstSize;
21596     const void* dtPtr = DTable + 1;
21597     const HUF_DEltX1* const dt = (const HUF_DEltX1*)dtPtr;
21598     BIT_DStream_t bitD;
21599     DTableDesc const dtd = HUF_getDTableDesc(DTable);
21600     U32 const dtLog = dtd.tableLog;
21601
21602     CHECK_F( BIT_initDStream(&bitD, cSrc, cSrcSize) );
21603
21604     HUF_decodeStreamX1(op, &bitD, oend, dt, dtLog);
21605
21606     if (!BIT_endOfDStream(&bitD)) return ERROR(corruption_detected);
21607
21608     return dstSize;
21609 }
21610
21611 FORCE_INLINE_TEMPLATE size_t
21612 HUF_decompress4X1_usingDTable_internal_body(
21613           void* dst,  size_t dstSize,
21614     const void* cSrc, size_t cSrcSize,
21615     const HUF_DTable* DTable)
21616 {
21617     /* Check */
21618     if (cSrcSize < 10) return ERROR(corruption_detected);  /* strict minimum : jump table + 1 byte per stream */
21619
21620     {   const BYTE* const istart = (const BYTE*) cSrc;
21621         BYTE* const ostart = (BYTE*) dst;
21622         BYTE* const oend = ostart + dstSize;
21623         BYTE* const olimit = oend - 3;
21624         const void* const dtPtr = DTable + 1;
21625         const HUF_DEltX1* const dt = (const HUF_DEltX1*)dtPtr;
21626
21627         /* Init */
21628         BIT_DStream_t bitD1;
21629         BIT_DStream_t bitD2;
21630         BIT_DStream_t bitD3;
21631         BIT_DStream_t bitD4;
21632         size_t const length1 = MEM_readLE16(istart);
21633         size_t const length2 = MEM_readLE16(istart+2);
21634         size_t const length3 = MEM_readLE16(istart+4);
21635         size_t const length4 = cSrcSize - (length1 + length2 + length3 + 6);
21636         const BYTE* const istart1 = istart + 6;  /* jumpTable */
21637         const BYTE* const istart2 = istart1 + length1;
21638         const BYTE* const istart3 = istart2 + length2;
21639         const BYTE* const istart4 = istart3 + length3;
21640         const size_t segmentSize = (dstSize+3) / 4;
21641         BYTE* const opStart2 = ostart + segmentSize;
21642         BYTE* const opStart3 = opStart2 + segmentSize;
21643         BYTE* const opStart4 = opStart3 + segmentSize;
21644         BYTE* op1 = ostart;
21645         BYTE* op2 = opStart2;
21646         BYTE* op3 = opStart3;
21647         BYTE* op4 = opStart4;
21648         DTableDesc const dtd = HUF_getDTableDesc(DTable);
21649         U32 const dtLog = dtd.tableLog;
21650         U32 endSignal = 1;
21651
21652         if (length4 > cSrcSize) return ERROR(corruption_detected);   /* overflow */
21653         CHECK_F( BIT_initDStream(&bitD1, istart1, length1) );
21654         CHECK_F( BIT_initDStream(&bitD2, istart2, length2) );
21655         CHECK_F( BIT_initDStream(&bitD3, istart3, length3) );
21656         CHECK_F( BIT_initDStream(&bitD4, istart4, length4) );
21657
21658         /* up to 16 symbols per loop (4 symbols per stream) in 64-bit mode */
21659         for ( ; (endSignal) & (op4 < olimit) ; ) {
21660             HUF_DECODE_SYMBOLX1_2(op1, &bitD1);
21661             HUF_DECODE_SYMBOLX1_2(op2, &bitD2);
21662             HUF_DECODE_SYMBOLX1_2(op3, &bitD3);
21663             HUF_DECODE_SYMBOLX1_2(op4, &bitD4);
21664             HUF_DECODE_SYMBOLX1_1(op1, &bitD1);
21665             HUF_DECODE_SYMBOLX1_1(op2, &bitD2);
21666             HUF_DECODE_SYMBOLX1_1(op3, &bitD3);
21667             HUF_DECODE_SYMBOLX1_1(op4, &bitD4);
21668             HUF_DECODE_SYMBOLX1_2(op1, &bitD1);
21669             HUF_DECODE_SYMBOLX1_2(op2, &bitD2);
21670             HUF_DECODE_SYMBOLX1_2(op3, &bitD3);
21671             HUF_DECODE_SYMBOLX1_2(op4, &bitD4);
21672             HUF_DECODE_SYMBOLX1_0(op1, &bitD1);
21673             HUF_DECODE_SYMBOLX1_0(op2, &bitD2);
21674             HUF_DECODE_SYMBOLX1_0(op3, &bitD3);
21675             HUF_DECODE_SYMBOLX1_0(op4, &bitD4);
21676             endSignal &= BIT_reloadDStreamFast(&bitD1) == BIT_DStream_unfinished;
21677             endSignal &= BIT_reloadDStreamFast(&bitD2) == BIT_DStream_unfinished;
21678             endSignal &= BIT_reloadDStreamFast(&bitD3) == BIT_DStream_unfinished;
21679             endSignal &= BIT_reloadDStreamFast(&bitD4) == BIT_DStream_unfinished;
21680         }
21681
21682         /* check corruption */
21683         /* note : should not be necessary : op# advance in lock step, and we control op4.
21684          *        but curiously, binary generated by gcc 7.2 & 7.3 with -mbmi2 runs faster when >=1 test is present */
21685         if (op1 > opStart2) return ERROR(corruption_detected);
21686         if (op2 > opStart3) return ERROR(corruption_detected);
21687         if (op3 > opStart4) return ERROR(corruption_detected);
21688         /* note : op4 supposed already verified within main loop */
21689
21690         /* finish bitStreams one by one */
21691         HUF_decodeStreamX1(op1, &bitD1, opStart2, dt, dtLog);
21692         HUF_decodeStreamX1(op2, &bitD2, opStart3, dt, dtLog);
21693         HUF_decodeStreamX1(op3, &bitD3, opStart4, dt, dtLog);
21694         HUF_decodeStreamX1(op4, &bitD4, oend,     dt, dtLog);
21695
21696         /* check */
21697         { U32 const endCheck = BIT_endOfDStream(&bitD1) & BIT_endOfDStream(&bitD2) & BIT_endOfDStream(&bitD3) & BIT_endOfDStream(&bitD4);
21698           if (!endCheck) return ERROR(corruption_detected); }
21699
21700         /* decoded size */
21701         return dstSize;
21702     }
21703 }
21704
21705
21706 typedef size_t (*HUF_decompress_usingDTable_t)(void *dst, size_t dstSize,
21707                                                const void *cSrc,
21708                                                size_t cSrcSize,
21709                                                const HUF_DTable *DTable);
21710
21711 HUF_DGEN(HUF_decompress1X1_usingDTable_internal)
21712 HUF_DGEN(HUF_decompress4X1_usingDTable_internal)
21713
21714
21715
21716 size_t HUF_decompress1X1_usingDTable(
21717           void* dst,  size_t dstSize,
21718     const void* cSrc, size_t cSrcSize,
21719     const HUF_DTable* DTable)
21720 {
21721     DTableDesc dtd = HUF_getDTableDesc(DTable);
21722     if (dtd.tableType != 0) return ERROR(GENERIC);
21723     return HUF_decompress1X1_usingDTable_internal(dst, dstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0);
21724 }
21725
21726 size_t HUF_decompress1X1_DCtx_wksp(HUF_DTable* DCtx, void* dst, size_t dstSize,
21727                                    const void* cSrc, size_t cSrcSize,
21728                                    void* workSpace, size_t wkspSize)
21729 {
21730     const BYTE* ip = (const BYTE*) cSrc;
21731
21732     size_t const hSize = HUF_readDTableX1_wksp(DCtx, cSrc, cSrcSize, workSpace, wkspSize);
21733     if (HUF_isError(hSize)) return hSize;
21734     if (hSize >= cSrcSize) return ERROR(srcSize_wrong);
21735     ip += hSize; cSrcSize -= hSize;
21736
21737     return HUF_decompress1X1_usingDTable_internal(dst, dstSize, ip, cSrcSize, DCtx, /* bmi2 */ 0);
21738 }
21739
21740
21741 size_t HUF_decompress1X1_DCtx(HUF_DTable* DCtx, void* dst, size_t dstSize,
21742                               const void* cSrc, size_t cSrcSize)
21743 {
21744     U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32];
21745     return HUF_decompress1X1_DCtx_wksp(DCtx, dst, dstSize, cSrc, cSrcSize,
21746                                        workSpace, sizeof(workSpace));
21747 }
21748
21749 size_t HUF_decompress1X1 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
21750 {
21751     HUF_CREATE_STATIC_DTABLEX1(DTable, HUF_TABLELOG_MAX);
21752     return HUF_decompress1X1_DCtx (DTable, dst, dstSize, cSrc, cSrcSize);
21753 }
21754
21755 size_t HUF_decompress4X1_usingDTable(
21756           void* dst,  size_t dstSize,
21757     const void* cSrc, size_t cSrcSize,
21758     const HUF_DTable* DTable)
21759 {
21760     DTableDesc dtd = HUF_getDTableDesc(DTable);
21761     if (dtd.tableType != 0) return ERROR(GENERIC);
21762     return HUF_decompress4X1_usingDTable_internal(dst, dstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0);
21763 }
21764
21765 static size_t HUF_decompress4X1_DCtx_wksp_bmi2(HUF_DTable* dctx, void* dst, size_t dstSize,
21766                                    const void* cSrc, size_t cSrcSize,
21767                                    void* workSpace, size_t wkspSize, int bmi2)
21768 {
21769     const BYTE* ip = (const BYTE*) cSrc;
21770
21771     size_t const hSize = HUF_readDTableX1_wksp (dctx, cSrc, cSrcSize,
21772                                                 workSpace, wkspSize);
21773     if (HUF_isError(hSize)) return hSize;
21774     if (hSize >= cSrcSize) return ERROR(srcSize_wrong);
21775     ip += hSize; cSrcSize -= hSize;
21776
21777     return HUF_decompress4X1_usingDTable_internal(dst, dstSize, ip, cSrcSize, dctx, bmi2);
21778 }
21779
21780 size_t HUF_decompress4X1_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize,
21781                                    const void* cSrc, size_t cSrcSize,
21782                                    void* workSpace, size_t wkspSize)
21783 {
21784     return HUF_decompress4X1_DCtx_wksp_bmi2(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize, 0);
21785 }
21786
21787
21788 size_t HUF_decompress4X1_DCtx (HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
21789 {
21790     U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32];
21791     return HUF_decompress4X1_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize,
21792                                        workSpace, sizeof(workSpace));
21793 }
21794 size_t HUF_decompress4X1 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
21795 {
21796     HUF_CREATE_STATIC_DTABLEX1(DTable, HUF_TABLELOG_MAX);
21797     return HUF_decompress4X1_DCtx(DTable, dst, dstSize, cSrc, cSrcSize);
21798 }
21799
21800 #endif /* HUF_FORCE_DECOMPRESS_X2 */
21801
21802
21803 #ifndef HUF_FORCE_DECOMPRESS_X1
21804
21805 /* *************************/
21806 /* double-symbols decoding */
21807 /* *************************/
21808
21809 typedef struct { U16 sequence; BYTE nbBits; BYTE length; } HUF_DEltX2;  /* double-symbols decoding */
21810 typedef struct { BYTE symbol; BYTE weight; } sortedSymbol_t;
21811 typedef U32 rankValCol_t[HUF_TABLELOG_MAX + 1];
21812 typedef rankValCol_t rankVal_t[HUF_TABLELOG_MAX];
21813
21814
21815 /* HUF_fillDTableX2Level2() :
21816  * `rankValOrigin` must be a table of at least (HUF_TABLELOG_MAX + 1) U32 */
21817 static void HUF_fillDTableX2Level2(HUF_DEltX2* DTable, U32 sizeLog, const U32 consumed,
21818                            const U32* rankValOrigin, const int minWeight,
21819                            const sortedSymbol_t* sortedSymbols, const U32 sortedListSize,
21820                            U32 nbBitsBaseline, U16 baseSeq)
21821 {
21822     HUF_DEltX2 DElt;
21823     U32 rankVal[HUF_TABLELOG_MAX + 1];
21824
21825     /* get pre-calculated rankVal */
21826     memcpy(rankVal, rankValOrigin, sizeof(rankVal));
21827
21828     /* fill skipped values */
21829     if (minWeight>1) {
21830         U32 i, skipSize = rankVal[minWeight];
21831         MEM_writeLE16(&(DElt.sequence), baseSeq);
21832         DElt.nbBits   = (BYTE)(consumed);
21833         DElt.length   = 1;
21834         for (i = 0; i < skipSize; i++)
21835             DTable[i] = DElt;
21836     }
21837
21838     /* fill DTable */
21839     {   U32 s; for (s=0; s<sortedListSize; s++) {   /* note : sortedSymbols already skipped */
21840             const U32 symbol = sortedSymbols[s].symbol;
21841             const U32 weight = sortedSymbols[s].weight;
21842             const U32 nbBits = nbBitsBaseline - weight;
21843             const U32 length = 1 << (sizeLog-nbBits);
21844             const U32 start = rankVal[weight];
21845             U32 i = start;
21846             const U32 end = start + length;
21847
21848             MEM_writeLE16(&(DElt.sequence), (U16)(baseSeq + (symbol << 8)));
21849             DElt.nbBits = (BYTE)(nbBits + consumed);
21850             DElt.length = 2;
21851             do { DTable[i++] = DElt; } while (i<end);   /* since length >= 1 */
21852
21853             rankVal[weight] += length;
21854     }   }
21855 }
21856
21857
21858 static void HUF_fillDTableX2(HUF_DEltX2* DTable, const U32 targetLog,
21859                            const sortedSymbol_t* sortedList, const U32 sortedListSize,
21860                            const U32* rankStart, rankVal_t rankValOrigin, const U32 maxWeight,
21861                            const U32 nbBitsBaseline)
21862 {
21863     U32 rankVal[HUF_TABLELOG_MAX + 1];
21864     const int scaleLog = nbBitsBaseline - targetLog;   /* note : targetLog >= srcLog, hence scaleLog <= 1 */
21865     const U32 minBits  = nbBitsBaseline - maxWeight;
21866     U32 s;
21867
21868     memcpy(rankVal, rankValOrigin, sizeof(rankVal));
21869
21870     /* fill DTable */
21871     for (s=0; s<sortedListSize; s++) {
21872         const U16 symbol = sortedList[s].symbol;
21873         const U32 weight = sortedList[s].weight;
21874         const U32 nbBits = nbBitsBaseline - weight;
21875         const U32 start = rankVal[weight];
21876         const U32 length = 1 << (targetLog-nbBits);
21877
21878         if (targetLog-nbBits >= minBits) {   /* enough room for a second symbol */
21879             U32 sortedRank;
21880             int minWeight = nbBits + scaleLog;
21881             if (minWeight < 1) minWeight = 1;
21882             sortedRank = rankStart[minWeight];
21883             HUF_fillDTableX2Level2(DTable+start, targetLog-nbBits, nbBits,
21884                            rankValOrigin[nbBits], minWeight,
21885                            sortedList+sortedRank, sortedListSize-sortedRank,
21886                            nbBitsBaseline, symbol);
21887         } else {
21888             HUF_DEltX2 DElt;
21889             MEM_writeLE16(&(DElt.sequence), symbol);
21890             DElt.nbBits = (BYTE)(nbBits);
21891             DElt.length = 1;
21892             {   U32 const end = start + length;
21893                 U32 u;
21894                 for (u = start; u < end; u++) DTable[u] = DElt;
21895         }   }
21896         rankVal[weight] += length;
21897     }
21898 }
21899
21900 size_t HUF_readDTableX2_wksp(HUF_DTable* DTable,
21901                        const void* src, size_t srcSize,
21902                              void* workSpace, size_t wkspSize)
21903 {
21904     U32 tableLog, maxW, sizeOfSort, nbSymbols;
21905     DTableDesc dtd = HUF_getDTableDesc(DTable);
21906     U32 const maxTableLog = dtd.maxTableLog;
21907     size_t iSize;
21908     void* dtPtr = DTable+1;   /* force compiler to avoid strict-aliasing */
21909     HUF_DEltX2* const dt = (HUF_DEltX2*)dtPtr;
21910     U32 *rankStart;
21911
21912     rankValCol_t* rankVal;
21913     U32* rankStats;
21914     U32* rankStart0;
21915     sortedSymbol_t* sortedSymbol;
21916     BYTE* weightList;
21917     size_t spaceUsed32 = 0;
21918
21919     rankVal = (rankValCol_t *)((U32 *)workSpace + spaceUsed32);
21920     spaceUsed32 += (sizeof(rankValCol_t) * HUF_TABLELOG_MAX) >> 2;
21921     rankStats = (U32 *)workSpace + spaceUsed32;
21922     spaceUsed32 += HUF_TABLELOG_MAX + 1;
21923     rankStart0 = (U32 *)workSpace + spaceUsed32;
21924     spaceUsed32 += HUF_TABLELOG_MAX + 2;
21925     sortedSymbol = (sortedSymbol_t *)workSpace + (spaceUsed32 * sizeof(U32)) / sizeof(sortedSymbol_t);
21926     spaceUsed32 += HUF_ALIGN(sizeof(sortedSymbol_t) * (HUF_SYMBOLVALUE_MAX + 1), sizeof(U32)) >> 2;
21927     weightList = (BYTE *)((U32 *)workSpace + spaceUsed32);
21928     spaceUsed32 += HUF_ALIGN(HUF_SYMBOLVALUE_MAX + 1, sizeof(U32)) >> 2;
21929
21930     if ((spaceUsed32 << 2) > wkspSize) return ERROR(tableLog_tooLarge);
21931
21932     rankStart = rankStart0 + 1;
21933     memset(rankStats, 0, sizeof(U32) * (2 * HUF_TABLELOG_MAX + 2 + 1));
21934
21935     DEBUG_STATIC_ASSERT(sizeof(HUF_DEltX2) == sizeof(HUF_DTable));   /* if compiler fails here, assertion is wrong */
21936     if (maxTableLog > HUF_TABLELOG_MAX) return ERROR(tableLog_tooLarge);
21937     /* memset(weightList, 0, sizeof(weightList)); */  /* is not necessary, even though some analyzer complain ... */
21938
21939     iSize = HUF_readStats(weightList, HUF_SYMBOLVALUE_MAX + 1, rankStats, &nbSymbols, &tableLog, src, srcSize);
21940     if (HUF_isError(iSize)) return iSize;
21941
21942     /* check result */
21943     if (tableLog > maxTableLog) return ERROR(tableLog_tooLarge);   /* DTable can't fit code depth */
21944
21945     /* find maxWeight */
21946     for (maxW = tableLog; rankStats[maxW]==0; maxW--) {}  /* necessarily finds a solution before 0 */
21947
21948     /* Get start index of each weight */
21949     {   U32 w, nextRankStart = 0;
21950         for (w=1; w<maxW+1; w++) {
21951             U32 current = nextRankStart;
21952             nextRankStart += rankStats[w];
21953             rankStart[w] = current;
21954         }
21955         rankStart[0] = nextRankStart;   /* put all 0w symbols at the end of sorted list*/
21956         sizeOfSort = nextRankStart;
21957     }
21958
21959     /* sort symbols by weight */
21960     {   U32 s;
21961         for (s=0; s<nbSymbols; s++) {
21962             U32 const w = weightList[s];
21963             U32 const r = rankStart[w]++;
21964             sortedSymbol[r].symbol = (BYTE)s;
21965             sortedSymbol[r].weight = (BYTE)w;
21966         }
21967         rankStart[0] = 0;   /* forget 0w symbols; this is beginning of weight(1) */
21968     }
21969
21970     /* Build rankVal */
21971     {   U32* const rankVal0 = rankVal[0];
21972         {   int const rescale = (maxTableLog-tableLog) - 1;   /* tableLog <= maxTableLog */
21973             U32 nextRankVal = 0;
21974             U32 w;
21975             for (w=1; w<maxW+1; w++) {
21976                 U32 current = nextRankVal;
21977                 nextRankVal += rankStats[w] << (w+rescale);
21978                 rankVal0[w] = current;
21979         }   }
21980         {   U32 const minBits = tableLog+1 - maxW;
21981             U32 consumed;
21982             for (consumed = minBits; consumed < maxTableLog - minBits + 1; consumed++) {
21983                 U32* const rankValPtr = rankVal[consumed];
21984                 U32 w;
21985                 for (w = 1; w < maxW+1; w++) {
21986                     rankValPtr[w] = rankVal0[w] >> consumed;
21987     }   }   }   }
21988
21989     HUF_fillDTableX2(dt, maxTableLog,
21990                    sortedSymbol, sizeOfSort,
21991                    rankStart0, rankVal, maxW,
21992                    tableLog+1);
21993
21994     dtd.tableLog = (BYTE)maxTableLog;
21995     dtd.tableType = 1;
21996     memcpy(DTable, &dtd, sizeof(dtd));
21997     return iSize;
21998 }
21999
22000 size_t HUF_readDTableX2(HUF_DTable* DTable, const void* src, size_t srcSize)
22001 {
22002   U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32];
22003   return HUF_readDTableX2_wksp(DTable, src, srcSize,
22004                                workSpace, sizeof(workSpace));
22005 }
22006
22007
22008 FORCE_INLINE_TEMPLATE U32
22009 HUF_decodeSymbolX2(void* op, BIT_DStream_t* DStream, const HUF_DEltX2* dt, const U32 dtLog)
22010 {
22011     size_t const val = BIT_lookBitsFast(DStream, dtLog);   /* note : dtLog >= 1 */
22012     memcpy(op, dt+val, 2);
22013     BIT_skipBits(DStream, dt[val].nbBits);
22014     return dt[val].length;
22015 }
22016
22017 FORCE_INLINE_TEMPLATE U32
22018 HUF_decodeLastSymbolX2(void* op, BIT_DStream_t* DStream, const HUF_DEltX2* dt, const U32 dtLog)
22019 {
22020     size_t const val = BIT_lookBitsFast(DStream, dtLog);   /* note : dtLog >= 1 */
22021     memcpy(op, dt+val, 1);
22022     if (dt[val].length==1) BIT_skipBits(DStream, dt[val].nbBits);
22023     else {
22024         if (DStream->bitsConsumed < (sizeof(DStream->bitContainer)*8)) {
22025             BIT_skipBits(DStream, dt[val].nbBits);
22026             if (DStream->bitsConsumed > (sizeof(DStream->bitContainer)*8))
22027                 /* ugly hack; works only because it's the last symbol. Note : can't easily extract nbBits from just this symbol */
22028                 DStream->bitsConsumed = (sizeof(DStream->bitContainer)*8);
22029     }   }
22030     return 1;
22031 }
22032
22033 #define HUF_DECODE_SYMBOLX2_0(ptr, DStreamPtr) \
22034     ptr += HUF_decodeSymbolX2(ptr, DStreamPtr, dt, dtLog)
22035
22036 #define HUF_DECODE_SYMBOLX2_1(ptr, DStreamPtr) \
22037     if (MEM_64bits() || (HUF_TABLELOG_MAX<=12)) \
22038         ptr += HUF_decodeSymbolX2(ptr, DStreamPtr, dt, dtLog)
22039
22040 #define HUF_DECODE_SYMBOLX2_2(ptr, DStreamPtr) \
22041     if (MEM_64bits()) \
22042         ptr += HUF_decodeSymbolX2(ptr, DStreamPtr, dt, dtLog)
22043
22044 HINT_INLINE size_t
22045 HUF_decodeStreamX2(BYTE* p, BIT_DStream_t* bitDPtr, BYTE* const pEnd,
22046                 const HUF_DEltX2* const dt, const U32 dtLog)
22047 {
22048     BYTE* const pStart = p;
22049
22050     /* up to 8 symbols at a time */
22051     while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) & (p < pEnd-(sizeof(bitDPtr->bitContainer)-1))) {
22052         HUF_DECODE_SYMBOLX2_2(p, bitDPtr);
22053         HUF_DECODE_SYMBOLX2_1(p, bitDPtr);
22054         HUF_DECODE_SYMBOLX2_2(p, bitDPtr);
22055         HUF_DECODE_SYMBOLX2_0(p, bitDPtr);
22056     }
22057
22058     /* closer to end : up to 2 symbols at a time */
22059     while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) & (p <= pEnd-2))
22060         HUF_DECODE_SYMBOLX2_0(p, bitDPtr);
22061
22062     while (p <= pEnd-2)
22063         HUF_DECODE_SYMBOLX2_0(p, bitDPtr);   /* no need to reload : reached the end of DStream */
22064
22065     if (p < pEnd)
22066         p += HUF_decodeLastSymbolX2(p, bitDPtr, dt, dtLog);
22067
22068     return p-pStart;
22069 }
22070
22071 FORCE_INLINE_TEMPLATE size_t
22072 HUF_decompress1X2_usingDTable_internal_body(
22073           void* dst,  size_t dstSize,
22074     const void* cSrc, size_t cSrcSize,
22075     const HUF_DTable* DTable)
22076 {
22077     BIT_DStream_t bitD;
22078
22079     /* Init */
22080     CHECK_F( BIT_initDStream(&bitD, cSrc, cSrcSize) );
22081
22082     /* decode */
22083     {   BYTE* const ostart = (BYTE*) dst;
22084         BYTE* const oend = ostart + dstSize;
22085         const void* const dtPtr = DTable+1;   /* force compiler to not use strict-aliasing */
22086         const HUF_DEltX2* const dt = (const HUF_DEltX2*)dtPtr;
22087         DTableDesc const dtd = HUF_getDTableDesc(DTable);
22088         HUF_decodeStreamX2(ostart, &bitD, oend, dt, dtd.tableLog);
22089     }
22090
22091     /* check */
22092     if (!BIT_endOfDStream(&bitD)) return ERROR(corruption_detected);
22093
22094     /* decoded size */
22095     return dstSize;
22096 }
22097
22098 FORCE_INLINE_TEMPLATE size_t
22099 HUF_decompress4X2_usingDTable_internal_body(
22100           void* dst,  size_t dstSize,
22101     const void* cSrc, size_t cSrcSize,
22102     const HUF_DTable* DTable)
22103 {
22104     if (cSrcSize < 10) return ERROR(corruption_detected);   /* strict minimum : jump table + 1 byte per stream */
22105
22106     {   const BYTE* const istart = (const BYTE*) cSrc;
22107         BYTE* const ostart = (BYTE*) dst;
22108         BYTE* const oend = ostart + dstSize;
22109         BYTE* const olimit = oend - (sizeof(size_t)-1);
22110         const void* const dtPtr = DTable+1;
22111         const HUF_DEltX2* const dt = (const HUF_DEltX2*)dtPtr;
22112
22113         /* Init */
22114         BIT_DStream_t bitD1;
22115         BIT_DStream_t bitD2;
22116         BIT_DStream_t bitD3;
22117         BIT_DStream_t bitD4;
22118         size_t const length1 = MEM_readLE16(istart);
22119         size_t const length2 = MEM_readLE16(istart+2);
22120         size_t const length3 = MEM_readLE16(istart+4);
22121         size_t const length4 = cSrcSize - (length1 + length2 + length3 + 6);
22122         const BYTE* const istart1 = istart + 6;  /* jumpTable */
22123         const BYTE* const istart2 = istart1 + length1;
22124         const BYTE* const istart3 = istart2 + length2;
22125         const BYTE* const istart4 = istart3 + length3;
22126         size_t const segmentSize = (dstSize+3) / 4;
22127         BYTE* const opStart2 = ostart + segmentSize;
22128         BYTE* const opStart3 = opStart2 + segmentSize;
22129         BYTE* const opStart4 = opStart3 + segmentSize;
22130         BYTE* op1 = ostart;
22131         BYTE* op2 = opStart2;
22132         BYTE* op3 = opStart3;
22133         BYTE* op4 = opStart4;
22134         U32 endSignal = 1;
22135         DTableDesc const dtd = HUF_getDTableDesc(DTable);
22136         U32 const dtLog = dtd.tableLog;
22137
22138         if (length4 > cSrcSize) return ERROR(corruption_detected);   /* overflow */
22139         CHECK_F( BIT_initDStream(&bitD1, istart1, length1) );
22140         CHECK_F( BIT_initDStream(&bitD2, istart2, length2) );
22141         CHECK_F( BIT_initDStream(&bitD3, istart3, length3) );
22142         CHECK_F( BIT_initDStream(&bitD4, istart4, length4) );
22143
22144         /* 16-32 symbols per loop (4-8 symbols per stream) */
22145         for ( ; (endSignal) & (op4 < olimit); ) {
22146 #if defined(__clang__) && (defined(__x86_64__) || defined(__i386__))
22147             HUF_DECODE_SYMBOLX2_2(op1, &bitD1);
22148             HUF_DECODE_SYMBOLX2_1(op1, &bitD1);
22149             HUF_DECODE_SYMBOLX2_2(op1, &bitD1);
22150             HUF_DECODE_SYMBOLX2_0(op1, &bitD1);
22151             HUF_DECODE_SYMBOLX2_2(op2, &bitD2);
22152             HUF_DECODE_SYMBOLX2_1(op2, &bitD2);
22153             HUF_DECODE_SYMBOLX2_2(op2, &bitD2);
22154             HUF_DECODE_SYMBOLX2_0(op2, &bitD2);
22155             endSignal &= BIT_reloadDStreamFast(&bitD1) == BIT_DStream_unfinished;
22156             endSignal &= BIT_reloadDStreamFast(&bitD2) == BIT_DStream_unfinished;
22157             HUF_DECODE_SYMBOLX2_2(op3, &bitD3);
22158             HUF_DECODE_SYMBOLX2_1(op3, &bitD3);
22159             HUF_DECODE_SYMBOLX2_2(op3, &bitD3);
22160             HUF_DECODE_SYMBOLX2_0(op3, &bitD3);
22161             HUF_DECODE_SYMBOLX2_2(op4, &bitD4);
22162             HUF_DECODE_SYMBOLX2_1(op4, &bitD4);
22163             HUF_DECODE_SYMBOLX2_2(op4, &bitD4);
22164             HUF_DECODE_SYMBOLX2_0(op4, &bitD4);
22165             endSignal &= BIT_reloadDStreamFast(&bitD3) == BIT_DStream_unfinished;
22166             endSignal &= BIT_reloadDStreamFast(&bitD4) == BIT_DStream_unfinished;
22167 #else
22168             HUF_DECODE_SYMBOLX2_2(op1, &bitD1);
22169             HUF_DECODE_SYMBOLX2_2(op2, &bitD2);
22170             HUF_DECODE_SYMBOLX2_2(op3, &bitD3);
22171             HUF_DECODE_SYMBOLX2_2(op4, &bitD4);
22172             HUF_DECODE_SYMBOLX2_1(op1, &bitD1);
22173             HUF_DECODE_SYMBOLX2_1(op2, &bitD2);
22174             HUF_DECODE_SYMBOLX2_1(op3, &bitD3);
22175             HUF_DECODE_SYMBOLX2_1(op4, &bitD4);
22176             HUF_DECODE_SYMBOLX2_2(op1, &bitD1);
22177             HUF_DECODE_SYMBOLX2_2(op2, &bitD2);
22178             HUF_DECODE_SYMBOLX2_2(op3, &bitD3);
22179             HUF_DECODE_SYMBOLX2_2(op4, &bitD4);
22180             HUF_DECODE_SYMBOLX2_0(op1, &bitD1);
22181             HUF_DECODE_SYMBOLX2_0(op2, &bitD2);
22182             HUF_DECODE_SYMBOLX2_0(op3, &bitD3);
22183             HUF_DECODE_SYMBOLX2_0(op4, &bitD4);
22184             endSignal = (U32)LIKELY(
22185                         (BIT_reloadDStreamFast(&bitD1) == BIT_DStream_unfinished)
22186                       & (BIT_reloadDStreamFast(&bitD2) == BIT_DStream_unfinished)
22187                       & (BIT_reloadDStreamFast(&bitD3) == BIT_DStream_unfinished)
22188                       & (BIT_reloadDStreamFast(&bitD4) == BIT_DStream_unfinished));
22189 #endif
22190         }
22191
22192         /* check corruption */
22193         if (op1 > opStart2) return ERROR(corruption_detected);
22194         if (op2 > opStart3) return ERROR(corruption_detected);
22195         if (op3 > opStart4) return ERROR(corruption_detected);
22196         /* note : op4 already verified within main loop */
22197
22198         /* finish bitStreams one by one */
22199         HUF_decodeStreamX2(op1, &bitD1, opStart2, dt, dtLog);
22200         HUF_decodeStreamX2(op2, &bitD2, opStart3, dt, dtLog);
22201         HUF_decodeStreamX2(op3, &bitD3, opStart4, dt, dtLog);
22202         HUF_decodeStreamX2(op4, &bitD4, oend,     dt, dtLog);
22203
22204         /* check */
22205         { U32 const endCheck = BIT_endOfDStream(&bitD1) & BIT_endOfDStream(&bitD2) & BIT_endOfDStream(&bitD3) & BIT_endOfDStream(&bitD4);
22206           if (!endCheck) return ERROR(corruption_detected); }
22207
22208         /* decoded size */
22209         return dstSize;
22210     }
22211 }
22212
22213 HUF_DGEN(HUF_decompress1X2_usingDTable_internal)
22214 HUF_DGEN(HUF_decompress4X2_usingDTable_internal)
22215
22216 size_t HUF_decompress1X2_usingDTable(
22217           void* dst,  size_t dstSize,
22218     const void* cSrc, size_t cSrcSize,
22219     const HUF_DTable* DTable)
22220 {
22221     DTableDesc dtd = HUF_getDTableDesc(DTable);
22222     if (dtd.tableType != 1) return ERROR(GENERIC);
22223     return HUF_decompress1X2_usingDTable_internal(dst, dstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0);
22224 }
22225
22226 size_t HUF_decompress1X2_DCtx_wksp(HUF_DTable* DCtx, void* dst, size_t dstSize,
22227                                    const void* cSrc, size_t cSrcSize,
22228                                    void* workSpace, size_t wkspSize)
22229 {
22230     const BYTE* ip = (const BYTE*) cSrc;
22231
22232     size_t const hSize = HUF_readDTableX2_wksp(DCtx, cSrc, cSrcSize,
22233                                                workSpace, wkspSize);
22234     if (HUF_isError(hSize)) return hSize;
22235     if (hSize >= cSrcSize) return ERROR(srcSize_wrong);
22236     ip += hSize; cSrcSize -= hSize;
22237
22238     return HUF_decompress1X2_usingDTable_internal(dst, dstSize, ip, cSrcSize, DCtx, /* bmi2 */ 0);
22239 }
22240
22241
22242 size_t HUF_decompress1X2_DCtx(HUF_DTable* DCtx, void* dst, size_t dstSize,
22243                               const void* cSrc, size_t cSrcSize)
22244 {
22245     U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32];
22246     return HUF_decompress1X2_DCtx_wksp(DCtx, dst, dstSize, cSrc, cSrcSize,
22247                                        workSpace, sizeof(workSpace));
22248 }
22249
22250 size_t HUF_decompress1X2 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
22251 {
22252     HUF_CREATE_STATIC_DTABLEX2(DTable, HUF_TABLELOG_MAX);
22253     return HUF_decompress1X2_DCtx(DTable, dst, dstSize, cSrc, cSrcSize);
22254 }
22255
22256 size_t HUF_decompress4X2_usingDTable(
22257           void* dst,  size_t dstSize,
22258     const void* cSrc, size_t cSrcSize,
22259     const HUF_DTable* DTable)
22260 {
22261     DTableDesc dtd = HUF_getDTableDesc(DTable);
22262     if (dtd.tableType != 1) return ERROR(GENERIC);
22263     return HUF_decompress4X2_usingDTable_internal(dst, dstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0);
22264 }
22265
22266 static size_t HUF_decompress4X2_DCtx_wksp_bmi2(HUF_DTable* dctx, void* dst, size_t dstSize,
22267                                    const void* cSrc, size_t cSrcSize,
22268                                    void* workSpace, size_t wkspSize, int bmi2)
22269 {
22270     const BYTE* ip = (const BYTE*) cSrc;
22271
22272     size_t hSize = HUF_readDTableX2_wksp(dctx, cSrc, cSrcSize,
22273                                          workSpace, wkspSize);
22274     if (HUF_isError(hSize)) return hSize;
22275     if (hSize >= cSrcSize) return ERROR(srcSize_wrong);
22276     ip += hSize; cSrcSize -= hSize;
22277
22278     return HUF_decompress4X2_usingDTable_internal(dst, dstSize, ip, cSrcSize, dctx, bmi2);
22279 }
22280
22281 size_t HUF_decompress4X2_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize,
22282                                    const void* cSrc, size_t cSrcSize,
22283                                    void* workSpace, size_t wkspSize)
22284 {
22285     return HUF_decompress4X2_DCtx_wksp_bmi2(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize, /* bmi2 */ 0);
22286 }
22287
22288
22289 size_t HUF_decompress4X2_DCtx(HUF_DTable* dctx, void* dst, size_t dstSize,
22290                               const void* cSrc, size_t cSrcSize)
22291 {
22292     U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32];
22293     return HUF_decompress4X2_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize,
22294                                        workSpace, sizeof(workSpace));
22295 }
22296
22297 size_t HUF_decompress4X2 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
22298 {
22299     HUF_CREATE_STATIC_DTABLEX2(DTable, HUF_TABLELOG_MAX);
22300     return HUF_decompress4X2_DCtx(DTable, dst, dstSize, cSrc, cSrcSize);
22301 }
22302
22303 #endif /* HUF_FORCE_DECOMPRESS_X1 */
22304
22305
22306 /* ***********************************/
22307 /* Universal decompression selectors */
22308 /* ***********************************/
22309
22310 size_t HUF_decompress1X_usingDTable(void* dst, size_t maxDstSize,
22311                                     const void* cSrc, size_t cSrcSize,
22312                                     const HUF_DTable* DTable)
22313 {
22314     DTableDesc const dtd = HUF_getDTableDesc(DTable);
22315 #if defined(HUF_FORCE_DECOMPRESS_X1)
22316     (void)dtd;
22317     assert(dtd.tableType == 0);
22318     return HUF_decompress1X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0);
22319 #elif defined(HUF_FORCE_DECOMPRESS_X2)
22320     (void)dtd;
22321     assert(dtd.tableType == 1);
22322     return HUF_decompress1X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0);
22323 #else
22324     return dtd.tableType ? HUF_decompress1X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0) :
22325                            HUF_decompress1X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0);
22326 #endif
22327 }
22328
22329 size_t HUF_decompress4X_usingDTable(void* dst, size_t maxDstSize,
22330                                     const void* cSrc, size_t cSrcSize,
22331                                     const HUF_DTable* DTable)
22332 {
22333     DTableDesc const dtd = HUF_getDTableDesc(DTable);
22334 #if defined(HUF_FORCE_DECOMPRESS_X1)
22335     (void)dtd;
22336     assert(dtd.tableType == 0);
22337     return HUF_decompress4X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0);
22338 #elif defined(HUF_FORCE_DECOMPRESS_X2)
22339     (void)dtd;
22340     assert(dtd.tableType == 1);
22341     return HUF_decompress4X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0);
22342 #else
22343     return dtd.tableType ? HUF_decompress4X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0) :
22344                            HUF_decompress4X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0);
22345 #endif
22346 }
22347
22348
22349 #if !defined(HUF_FORCE_DECOMPRESS_X1) && !defined(HUF_FORCE_DECOMPRESS_X2)
22350 typedef struct { U32 tableTime; U32 decode256Time; } algo_time_t;
22351 static const algo_time_t algoTime[16 /* Quantization */][3 /* single, double, quad */] =
22352 {
22353     /* single, double, quad */
22354     {{0,0}, {1,1}, {2,2}},  /* Q==0 : impossible */
22355     {{0,0}, {1,1}, {2,2}},  /* Q==1 : impossible */
22356     {{  38,130}, {1313, 74}, {2151, 38}},   /* Q == 2 : 12-18% */
22357     {{ 448,128}, {1353, 74}, {2238, 41}},   /* Q == 3 : 18-25% */
22358     {{ 556,128}, {1353, 74}, {2238, 47}},   /* Q == 4 : 25-32% */
22359     {{ 714,128}, {1418, 74}, {2436, 53}},   /* Q == 5 : 32-38% */
22360     {{ 883,128}, {1437, 74}, {2464, 61}},   /* Q == 6 : 38-44% */
22361     {{ 897,128}, {1515, 75}, {2622, 68}},   /* Q == 7 : 44-50% */
22362     {{ 926,128}, {1613, 75}, {2730, 75}},   /* Q == 8 : 50-56% */
22363     {{ 947,128}, {1729, 77}, {3359, 77}},   /* Q == 9 : 56-62% */
22364     {{1107,128}, {2083, 81}, {4006, 84}},   /* Q ==10 : 62-69% */
22365     {{1177,128}, {2379, 87}, {4785, 88}},   /* Q ==11 : 69-75% */
22366     {{1242,128}, {2415, 93}, {5155, 84}},   /* Q ==12 : 75-81% */
22367     {{1349,128}, {2644,106}, {5260,106}},   /* Q ==13 : 81-87% */
22368     {{1455,128}, {2422,124}, {4174,124}},   /* Q ==14 : 87-93% */
22369     {{ 722,128}, {1891,145}, {1936,146}},   /* Q ==15 : 93-99% */
22370 };
22371 #endif
22372
22373 /** HUF_selectDecoder() :
22374  *  Tells which decoder is likely to decode faster,
22375  *  based on a set of pre-computed metrics.
22376  * @return : 0==HUF_decompress4X1, 1==HUF_decompress4X2 .
22377  *  Assumption : 0 < dstSize <= 128 KB */
22378 U32 HUF_selectDecoder (size_t dstSize, size_t cSrcSize)
22379 {
22380     assert(dstSize > 0);
22381     assert(dstSize <= 128*1024);
22382 #if defined(HUF_FORCE_DECOMPRESS_X1)
22383     (void)dstSize;
22384     (void)cSrcSize;
22385     return 0;
22386 #elif defined(HUF_FORCE_DECOMPRESS_X2)
22387     (void)dstSize;
22388     (void)cSrcSize;
22389     return 1;
22390 #else
22391     /* decoder timing evaluation */
22392     {   U32 const Q = (cSrcSize >= dstSize) ? 15 : (U32)(cSrcSize * 16 / dstSize);   /* Q < 16 */
22393         U32 const D256 = (U32)(dstSize >> 8);
22394         U32 const DTime0 = algoTime[Q][0].tableTime + (algoTime[Q][0].decode256Time * D256);
22395         U32 DTime1 = algoTime[Q][1].tableTime + (algoTime[Q][1].decode256Time * D256);
22396         DTime1 += DTime1 >> 3;  /* advantage to algorithm using less memory, to reduce cache eviction */
22397         return DTime1 < DTime0;
22398     }
22399 #endif
22400 }
22401
22402
22403 typedef size_t (*decompressionAlgo)(void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize);
22404
22405 size_t HUF_decompress (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
22406 {
22407 #if !defined(HUF_FORCE_DECOMPRESS_X1) && !defined(HUF_FORCE_DECOMPRESS_X2)
22408     static const decompressionAlgo decompress[2] = { HUF_decompress4X1, HUF_decompress4X2 };
22409 #endif
22410
22411     /* validation checks */
22412     if (dstSize == 0) return ERROR(dstSize_tooSmall);
22413     if (cSrcSize > dstSize) return ERROR(corruption_detected);   /* invalid */
22414     if (cSrcSize == dstSize) { memcpy(dst, cSrc, dstSize); return dstSize; }   /* not compressed */
22415     if (cSrcSize == 1) { memset(dst, *(const BYTE*)cSrc, dstSize); return dstSize; }   /* RLE */
22416
22417     {   U32 const algoNb = HUF_selectDecoder(dstSize, cSrcSize);
22418 #if defined(HUF_FORCE_DECOMPRESS_X1)
22419         (void)algoNb;
22420         assert(algoNb == 0);
22421         return HUF_decompress4X1(dst, dstSize, cSrc, cSrcSize);
22422 #elif defined(HUF_FORCE_DECOMPRESS_X2)
22423         (void)algoNb;
22424         assert(algoNb == 1);
22425         return HUF_decompress4X2(dst, dstSize, cSrc, cSrcSize);
22426 #else
22427         return decompress[algoNb](dst, dstSize, cSrc, cSrcSize);
22428 #endif
22429     }
22430 }
22431
22432 size_t HUF_decompress4X_DCtx (HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
22433 {
22434     /* validation checks */
22435     if (dstSize == 0) return ERROR(dstSize_tooSmall);
22436     if (cSrcSize > dstSize) return ERROR(corruption_detected);   /* invalid */
22437     if (cSrcSize == dstSize) { memcpy(dst, cSrc, dstSize); return dstSize; }   /* not compressed */
22438     if (cSrcSize == 1) { memset(dst, *(const BYTE*)cSrc, dstSize); return dstSize; }   /* RLE */
22439
22440     {   U32 const algoNb = HUF_selectDecoder(dstSize, cSrcSize);
22441 #if defined(HUF_FORCE_DECOMPRESS_X1)
22442         (void)algoNb;
22443         assert(algoNb == 0);
22444         return HUF_decompress4X1_DCtx(dctx, dst, dstSize, cSrc, cSrcSize);
22445 #elif defined(HUF_FORCE_DECOMPRESS_X2)
22446         (void)algoNb;
22447         assert(algoNb == 1);
22448         return HUF_decompress4X2_DCtx(dctx, dst, dstSize, cSrc, cSrcSize);
22449 #else
22450         return algoNb ? HUF_decompress4X2_DCtx(dctx, dst, dstSize, cSrc, cSrcSize) :
22451                         HUF_decompress4X1_DCtx(dctx, dst, dstSize, cSrc, cSrcSize) ;
22452 #endif
22453     }
22454 }
22455
22456 size_t HUF_decompress4X_hufOnly(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
22457 {
22458     U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32];
22459     return HUF_decompress4X_hufOnly_wksp(dctx, dst, dstSize, cSrc, cSrcSize,
22460                                          workSpace, sizeof(workSpace));
22461 }
22462
22463
22464 size_t HUF_decompress4X_hufOnly_wksp(HUF_DTable* dctx, void* dst,
22465                                      size_t dstSize, const void* cSrc,
22466                                      size_t cSrcSize, void* workSpace,
22467                                      size_t wkspSize)
22468 {
22469     /* validation checks */
22470     if (dstSize == 0) return ERROR(dstSize_tooSmall);
22471     if (cSrcSize == 0) return ERROR(corruption_detected);
22472
22473     {   U32 const algoNb = HUF_selectDecoder(dstSize, cSrcSize);
22474 #if defined(HUF_FORCE_DECOMPRESS_X1)
22475         (void)algoNb;
22476         assert(algoNb == 0);
22477         return HUF_decompress4X1_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize);
22478 #elif defined(HUF_FORCE_DECOMPRESS_X2)
22479         (void)algoNb;
22480         assert(algoNb == 1);
22481         return HUF_decompress4X2_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize);
22482 #else
22483         return algoNb ? HUF_decompress4X2_DCtx_wksp(dctx, dst, dstSize, cSrc,
22484                             cSrcSize, workSpace, wkspSize):
22485                         HUF_decompress4X1_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize);
22486 #endif
22487     }
22488 }
22489
22490 size_t HUF_decompress1X_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize,
22491                                   const void* cSrc, size_t cSrcSize,
22492                                   void* workSpace, size_t wkspSize)
22493 {
22494     /* validation checks */
22495     if (dstSize == 0) return ERROR(dstSize_tooSmall);
22496     if (cSrcSize > dstSize) return ERROR(corruption_detected);   /* invalid */
22497     if (cSrcSize == dstSize) { memcpy(dst, cSrc, dstSize); return dstSize; }   /* not compressed */
22498     if (cSrcSize == 1) { memset(dst, *(const BYTE*)cSrc, dstSize); return dstSize; }   /* RLE */
22499
22500     {   U32 const algoNb = HUF_selectDecoder(dstSize, cSrcSize);
22501 #if defined(HUF_FORCE_DECOMPRESS_X1)
22502         (void)algoNb;
22503         assert(algoNb == 0);
22504         return HUF_decompress1X1_DCtx_wksp(dctx, dst, dstSize, cSrc,
22505                                 cSrcSize, workSpace, wkspSize);
22506 #elif defined(HUF_FORCE_DECOMPRESS_X2)
22507         (void)algoNb;
22508         assert(algoNb == 1);
22509         return HUF_decompress1X2_DCtx_wksp(dctx, dst, dstSize, cSrc,
22510                                 cSrcSize, workSpace, wkspSize);
22511 #else
22512         return algoNb ? HUF_decompress1X2_DCtx_wksp(dctx, dst, dstSize, cSrc,
22513                                 cSrcSize, workSpace, wkspSize):
22514                         HUF_decompress1X1_DCtx_wksp(dctx, dst, dstSize, cSrc,
22515                                 cSrcSize, workSpace, wkspSize);
22516 #endif
22517     }
22518 }
22519
22520 size_t HUF_decompress1X_DCtx(HUF_DTable* dctx, void* dst, size_t dstSize,
22521                              const void* cSrc, size_t cSrcSize)
22522 {
22523     U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32];
22524     return HUF_decompress1X_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize,
22525                                       workSpace, sizeof(workSpace));
22526 }
22527
22528
22529 size_t HUF_decompress1X_usingDTable_bmi2(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable, int bmi2)
22530 {
22531     DTableDesc const dtd = HUF_getDTableDesc(DTable);
22532 #if defined(HUF_FORCE_DECOMPRESS_X1)
22533     (void)dtd;
22534     assert(dtd.tableType == 0);
22535     return HUF_decompress1X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2);
22536 #elif defined(HUF_FORCE_DECOMPRESS_X2)
22537     (void)dtd;
22538     assert(dtd.tableType == 1);
22539     return HUF_decompress1X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2);
22540 #else
22541     return dtd.tableType ? HUF_decompress1X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2) :
22542                            HUF_decompress1X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2);
22543 #endif
22544 }
22545
22546 #ifndef HUF_FORCE_DECOMPRESS_X2
22547 size_t HUF_decompress1X1_DCtx_wksp_bmi2(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize, int bmi2)
22548 {
22549     const BYTE* ip = (const BYTE*) cSrc;
22550
22551     size_t const hSize = HUF_readDTableX1_wksp(dctx, cSrc, cSrcSize, workSpace, wkspSize);
22552     if (HUF_isError(hSize)) return hSize;
22553     if (hSize >= cSrcSize) return ERROR(srcSize_wrong);
22554     ip += hSize; cSrcSize -= hSize;
22555
22556     return HUF_decompress1X1_usingDTable_internal(dst, dstSize, ip, cSrcSize, dctx, bmi2);
22557 }
22558 #endif
22559
22560 size_t HUF_decompress4X_usingDTable_bmi2(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable, int bmi2)
22561 {
22562     DTableDesc const dtd = HUF_getDTableDesc(DTable);
22563 #if defined(HUF_FORCE_DECOMPRESS_X1)
22564     (void)dtd;
22565     assert(dtd.tableType == 0);
22566     return HUF_decompress4X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2);
22567 #elif defined(HUF_FORCE_DECOMPRESS_X2)
22568     (void)dtd;
22569     assert(dtd.tableType == 1);
22570     return HUF_decompress4X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2);
22571 #else
22572     return dtd.tableType ? HUF_decompress4X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2) :
22573                            HUF_decompress4X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2);
22574 #endif
22575 }
22576
22577 size_t HUF_decompress4X_hufOnly_wksp_bmi2(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize, int bmi2)
22578 {
22579     /* validation checks */
22580     if (dstSize == 0) return ERROR(dstSize_tooSmall);
22581     if (cSrcSize == 0) return ERROR(corruption_detected);
22582
22583     {   U32 const algoNb = HUF_selectDecoder(dstSize, cSrcSize);
22584 #if defined(HUF_FORCE_DECOMPRESS_X1)
22585         (void)algoNb;
22586         assert(algoNb == 0);
22587         return HUF_decompress4X1_DCtx_wksp_bmi2(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize, bmi2);
22588 #elif defined(HUF_FORCE_DECOMPRESS_X2)
22589         (void)algoNb;
22590         assert(algoNb == 1);
22591         return HUF_decompress4X2_DCtx_wksp_bmi2(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize, bmi2);
22592 #else
22593         return algoNb ? HUF_decompress4X2_DCtx_wksp_bmi2(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize, bmi2) :
22594                         HUF_decompress4X1_DCtx_wksp_bmi2(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize, bmi2);
22595 #endif
22596     }
22597 }
22598 /**** ended inlining decompress/huf_decompress.c ****/
22599 /**** start inlining decompress/zstd_ddict.c ****/
22600 /*
22601  * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
22602  * All rights reserved.
22603  *
22604  * This source code is licensed under both the BSD-style license (found in the
22605  * LICENSE file in the root directory of this source tree) and the GPLv2 (found
22606  * in the COPYING file in the root directory of this source tree).
22607  * You may select, at your option, one of the above-listed licenses.
22608  */
22609
22610 /* zstd_ddict.c :
22611  * concentrates all logic that needs to know the internals of ZSTD_DDict object */
22612
22613 /*-*******************************************************
22614 *  Dependencies
22615 *********************************************************/
22616 #include <string.h>      /* memcpy, memmove, memset */
22617 /**** skipping file: ../common/cpu.h ****/
22618 /**** skipping file: ../common/mem.h ****/
22619 #define FSE_STATIC_LINKING_ONLY
22620 /**** skipping file: ../common/fse.h ****/
22621 #define HUF_STATIC_LINKING_ONLY
22622 /**** skipping file: ../common/huf.h ****/
22623 /**** start inlining zstd_decompress_internal.h ****/
22624 /*
22625  * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
22626  * All rights reserved.
22627  *
22628  * This source code is licensed under both the BSD-style license (found in the
22629  * LICENSE file in the root directory of this source tree) and the GPLv2 (found
22630  * in the COPYING file in the root directory of this source tree).
22631  * You may select, at your option, one of the above-listed licenses.
22632  */
22633
22634
22635 /* zstd_decompress_internal:
22636  * objects and definitions shared within lib/decompress modules */
22637
22638  #ifndef ZSTD_DECOMPRESS_INTERNAL_H
22639  #define ZSTD_DECOMPRESS_INTERNAL_H
22640
22641
22642 /*-*******************************************************
22643  *  Dependencies
22644  *********************************************************/
22645 /**** skipping file: ../common/mem.h ****/
22646 /**** skipping file: ../common/zstd_internal.h ****/
22647
22648
22649
22650 /*-*******************************************************
22651  *  Constants
22652  *********************************************************/
22653 static const U32 LL_base[MaxLL+1] = {
22654                  0,    1,    2,     3,     4,     5,     6,      7,
22655                  8,    9,   10,    11,    12,    13,    14,     15,
22656                 16,   18,   20,    22,    24,    28,    32,     40,
22657                 48,   64, 0x80, 0x100, 0x200, 0x400, 0x800, 0x1000,
22658                 0x2000, 0x4000, 0x8000, 0x10000 };
22659
22660 static const U32 OF_base[MaxOff+1] = {
22661                  0,        1,       1,       5,     0xD,     0x1D,     0x3D,     0x7D,
22662                  0xFD,   0x1FD,   0x3FD,   0x7FD,   0xFFD,   0x1FFD,   0x3FFD,   0x7FFD,
22663                  0xFFFD, 0x1FFFD, 0x3FFFD, 0x7FFFD, 0xFFFFD, 0x1FFFFD, 0x3FFFFD, 0x7FFFFD,
22664                  0xFFFFFD, 0x1FFFFFD, 0x3FFFFFD, 0x7FFFFFD, 0xFFFFFFD, 0x1FFFFFFD, 0x3FFFFFFD, 0x7FFFFFFD };
22665
22666 static const U32 OF_bits[MaxOff+1] = {
22667                      0,  1,  2,  3,  4,  5,  6,  7,
22668                      8,  9, 10, 11, 12, 13, 14, 15,
22669                     16, 17, 18, 19, 20, 21, 22, 23,
22670                     24, 25, 26, 27, 28, 29, 30, 31 };
22671
22672 static const U32 ML_base[MaxML+1] = {
22673                      3,  4,  5,    6,     7,     8,     9,    10,
22674                     11, 12, 13,   14,    15,    16,    17,    18,
22675                     19, 20, 21,   22,    23,    24,    25,    26,
22676                     27, 28, 29,   30,    31,    32,    33,    34,
22677                     35, 37, 39,   41,    43,    47,    51,    59,
22678                     67, 83, 99, 0x83, 0x103, 0x203, 0x403, 0x803,
22679                     0x1003, 0x2003, 0x4003, 0x8003, 0x10003 };
22680
22681
22682 /*-*******************************************************
22683  *  Decompression types
22684  *********************************************************/
22685  typedef struct {
22686      U32 fastMode;
22687      U32 tableLog;
22688  } ZSTD_seqSymbol_header;
22689
22690  typedef struct {
22691      U16  nextState;
22692      BYTE nbAdditionalBits;
22693      BYTE nbBits;
22694      U32  baseValue;
22695  } ZSTD_seqSymbol;
22696
22697  #define SEQSYMBOL_TABLE_SIZE(log)   (1 + (1 << (log)))
22698
22699 typedef struct {
22700     ZSTD_seqSymbol LLTable[SEQSYMBOL_TABLE_SIZE(LLFSELog)];    /* Note : Space reserved for FSE Tables */
22701     ZSTD_seqSymbol OFTable[SEQSYMBOL_TABLE_SIZE(OffFSELog)];   /* is also used as temporary workspace while building hufTable during DDict creation */
22702     ZSTD_seqSymbol MLTable[SEQSYMBOL_TABLE_SIZE(MLFSELog)];    /* and therefore must be at least HUF_DECOMPRESS_WORKSPACE_SIZE large */
22703     HUF_DTable hufTable[HUF_DTABLE_SIZE(HufLog)];  /* can accommodate HUF_decompress4X */
22704     U32 rep[ZSTD_REP_NUM];
22705 } ZSTD_entropyDTables_t;
22706
22707 typedef enum { ZSTDds_getFrameHeaderSize, ZSTDds_decodeFrameHeader,
22708                ZSTDds_decodeBlockHeader, ZSTDds_decompressBlock,
22709                ZSTDds_decompressLastBlock, ZSTDds_checkChecksum,
22710                ZSTDds_decodeSkippableHeader, ZSTDds_skipFrame } ZSTD_dStage;
22711
22712 typedef enum { zdss_init=0, zdss_loadHeader,
22713                zdss_read, zdss_load, zdss_flush } ZSTD_dStreamStage;
22714
22715 typedef enum {
22716     ZSTD_use_indefinitely = -1,  /* Use the dictionary indefinitely */
22717     ZSTD_dont_use = 0,           /* Do not use the dictionary (if one exists free it) */
22718     ZSTD_use_once = 1            /* Use the dictionary once and set to ZSTD_dont_use */
22719 } ZSTD_dictUses_e;
22720
22721 typedef enum {
22722     ZSTD_obm_buffered = 0,  /* Buffer the output */
22723     ZSTD_obm_stable = 1     /* ZSTD_outBuffer is stable */
22724 } ZSTD_outBufferMode_e;
22725
22726 struct ZSTD_DCtx_s
22727 {
22728     const ZSTD_seqSymbol* LLTptr;
22729     const ZSTD_seqSymbol* MLTptr;
22730     const ZSTD_seqSymbol* OFTptr;
22731     const HUF_DTable* HUFptr;
22732     ZSTD_entropyDTables_t entropy;
22733     U32 workspace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32];   /* space needed when building huffman tables */
22734     const void* previousDstEnd;   /* detect continuity */
22735     const void* prefixStart;      /* start of current segment */
22736     const void* virtualStart;     /* virtual start of previous segment if it was just before current one */
22737     const void* dictEnd;          /* end of previous segment */
22738     size_t expected;
22739     ZSTD_frameHeader fParams;
22740     U64 decodedSize;
22741     blockType_e bType;            /* used in ZSTD_decompressContinue(), store blockType between block header decoding and block decompression stages */
22742     ZSTD_dStage stage;
22743     U32 litEntropy;
22744     U32 fseEntropy;
22745     XXH64_state_t xxhState;
22746     size_t headerSize;
22747     ZSTD_format_e format;
22748     const BYTE* litPtr;
22749     ZSTD_customMem customMem;
22750     size_t litSize;
22751     size_t rleSize;
22752     size_t staticSize;
22753     int bmi2;                     /* == 1 if the CPU supports BMI2 and 0 otherwise. CPU support is determined dynamically once per context lifetime. */
22754
22755     /* dictionary */
22756     ZSTD_DDict* ddictLocal;
22757     const ZSTD_DDict* ddict;     /* set by ZSTD_initDStream_usingDDict(), or ZSTD_DCtx_refDDict() */
22758     U32 dictID;
22759     int ddictIsCold;             /* if == 1 : dictionary is "new" for working context, and presumed "cold" (not in cpu cache) */
22760     ZSTD_dictUses_e dictUses;
22761
22762     /* streaming */
22763     ZSTD_dStreamStage streamStage;
22764     char*  inBuff;
22765     size_t inBuffSize;
22766     size_t inPos;
22767     size_t maxWindowSize;
22768     char*  outBuff;
22769     size_t outBuffSize;
22770     size_t outStart;
22771     size_t outEnd;
22772     size_t lhSize;
22773     void* legacyContext;
22774     U32 previousLegacyVersion;
22775     U32 legacyVersion;
22776     U32 hostageByte;
22777     int noForwardProgress;
22778     ZSTD_outBufferMode_e outBufferMode;
22779     ZSTD_outBuffer expectedOutBuffer;
22780
22781     /* workspace */
22782     BYTE litBuffer[ZSTD_BLOCKSIZE_MAX + WILDCOPY_OVERLENGTH];
22783     BYTE headerBuffer[ZSTD_FRAMEHEADERSIZE_MAX];
22784
22785     size_t oversizedDuration;
22786
22787 #ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
22788     void const* dictContentBeginForFuzzing;
22789     void const* dictContentEndForFuzzing;
22790 #endif
22791 };  /* typedef'd to ZSTD_DCtx within "zstd.h" */
22792
22793
22794 /*-*******************************************************
22795  *  Shared internal functions
22796  *********************************************************/
22797
22798 /*! ZSTD_loadDEntropy() :
22799  *  dict : must point at beginning of a valid zstd dictionary.
22800  * @return : size of dictionary header (size of magic number + dict ID + entropy tables) */
22801 size_t ZSTD_loadDEntropy(ZSTD_entropyDTables_t* entropy,
22802                    const void* const dict, size_t const dictSize);
22803
22804 /*! ZSTD_checkContinuity() :
22805  *  check if next `dst` follows previous position, where decompression ended.
22806  *  If yes, do nothing (continue on current segment).
22807  *  If not, classify previous segment as "external dictionary", and start a new segment.
22808  *  This function cannot fail. */
22809 void ZSTD_checkContinuity(ZSTD_DCtx* dctx, const void* dst);
22810
22811
22812 #endif /* ZSTD_DECOMPRESS_INTERNAL_H */
22813 /**** ended inlining zstd_decompress_internal.h ****/
22814 /**** start inlining zstd_ddict.h ****/
22815 /*
22816  * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
22817  * All rights reserved.
22818  *
22819  * This source code is licensed under both the BSD-style license (found in the
22820  * LICENSE file in the root directory of this source tree) and the GPLv2 (found
22821  * in the COPYING file in the root directory of this source tree).
22822  * You may select, at your option, one of the above-listed licenses.
22823  */
22824
22825
22826 #ifndef ZSTD_DDICT_H
22827 #define ZSTD_DDICT_H
22828
22829 /*-*******************************************************
22830  *  Dependencies
22831  *********************************************************/
22832 #include <stddef.h>   /* size_t */
22833 /**** skipping file: ../zstd.h ****/
22834
22835
22836 /*-*******************************************************
22837  *  Interface
22838  *********************************************************/
22839
22840 /* note: several prototypes are already published in `zstd.h` :
22841  * ZSTD_createDDict()
22842  * ZSTD_createDDict_byReference()
22843  * ZSTD_createDDict_advanced()
22844  * ZSTD_freeDDict()
22845  * ZSTD_initStaticDDict()
22846  * ZSTD_sizeof_DDict()
22847  * ZSTD_estimateDDictSize()
22848  * ZSTD_getDictID_fromDict()
22849  */
22850
22851 const void* ZSTD_DDict_dictContent(const ZSTD_DDict* ddict);
22852 size_t ZSTD_DDict_dictSize(const ZSTD_DDict* ddict);
22853
22854 void ZSTD_copyDDictParameters(ZSTD_DCtx* dctx, const ZSTD_DDict* ddict);
22855
22856
22857
22858 #endif /* ZSTD_DDICT_H */
22859 /**** ended inlining zstd_ddict.h ****/
22860
22861 #if defined(ZSTD_LEGACY_SUPPORT) && (ZSTD_LEGACY_SUPPORT>=1)
22862 /**** start inlining ../legacy/zstd_legacy.h ****/
22863 /*
22864  * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
22865  * All rights reserved.
22866  *
22867  * This source code is licensed under both the BSD-style license (found in the
22868  * LICENSE file in the root directory of this source tree) and the GPLv2 (found
22869  * in the COPYING file in the root directory of this source tree).
22870  * You may select, at your option, one of the above-listed licenses.
22871  */
22872
22873 #ifndef ZSTD_LEGACY_H
22874 #define ZSTD_LEGACY_H
22875
22876 #if defined (__cplusplus)
22877 extern "C" {
22878 #endif
22879
22880 /* *************************************
22881 *  Includes
22882 ***************************************/
22883 /**** skipping file: ../common/mem.h ****/
22884 /**** skipping file: ../common/error_private.h ****/
22885 /**** skipping file: ../common/zstd_internal.h ****/
22886
22887 #if !defined (ZSTD_LEGACY_SUPPORT) || (ZSTD_LEGACY_SUPPORT == 0)
22888 #  undef ZSTD_LEGACY_SUPPORT
22889 #  define ZSTD_LEGACY_SUPPORT 8
22890 #endif
22891
22892 #if (ZSTD_LEGACY_SUPPORT <= 1)
22893 /**** start inlining zstd_v01.h ****/
22894 /*
22895  * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
22896  * All rights reserved.
22897  *
22898  * This source code is licensed under both the BSD-style license (found in the
22899  * LICENSE file in the root directory of this source tree) and the GPLv2 (found
22900  * in the COPYING file in the root directory of this source tree).
22901  * You may select, at your option, one of the above-listed licenses.
22902  */
22903
22904 #ifndef ZSTD_V01_H_28739879432
22905 #define ZSTD_V01_H_28739879432
22906
22907 #if defined (__cplusplus)
22908 extern "C" {
22909 #endif
22910
22911 /* *************************************
22912 *  Includes
22913 ***************************************/
22914 #include <stddef.h>   /* size_t */
22915
22916
22917 /* *************************************
22918 *  Simple one-step function
22919 ***************************************/
22920 /**
22921 ZSTDv01_decompress() : decompress ZSTD frames compliant with v0.1.x format
22922     compressedSize : is the exact source size
22923     maxOriginalSize : is the size of the 'dst' buffer, which must be already allocated.
22924                       It must be equal or larger than originalSize, otherwise decompression will fail.
22925     return : the number of bytes decompressed into destination buffer (originalSize)
22926              or an errorCode if it fails (which can be tested using ZSTDv01_isError())
22927 */
22928 size_t ZSTDv01_decompress( void* dst, size_t maxOriginalSize,
22929                      const void* src, size_t compressedSize);
22930
22931  /**
22932  ZSTDv01_findFrameSizeInfoLegacy() : get the source length and decompressed bound of a ZSTD frame compliant with v0.1.x format
22933      srcSize : The size of the 'src' buffer, at least as large as the frame pointed to by 'src'
22934      cSize (output parameter)  : the number of bytes that would be read to decompress this frame
22935                                  or an error code if it fails (which can be tested using ZSTDv01_isError())
22936      dBound (output parameter) : an upper-bound for the decompressed size of the data in the frame
22937                                  or ZSTD_CONTENTSIZE_ERROR if an error occurs
22938
22939      note : assumes `cSize` and `dBound` are _not_ NULL.
22940  */
22941 void ZSTDv01_findFrameSizeInfoLegacy(const void *src, size_t srcSize,
22942                                      size_t* cSize, unsigned long long* dBound);
22943
22944 /**
22945 ZSTDv01_isError() : tells if the result of ZSTDv01_decompress() is an error
22946 */
22947 unsigned ZSTDv01_isError(size_t code);
22948
22949
22950 /* *************************************
22951 *  Advanced functions
22952 ***************************************/
22953 typedef struct ZSTDv01_Dctx_s ZSTDv01_Dctx;
22954 ZSTDv01_Dctx* ZSTDv01_createDCtx(void);
22955 size_t ZSTDv01_freeDCtx(ZSTDv01_Dctx* dctx);
22956
22957 size_t ZSTDv01_decompressDCtx(void* ctx,
22958                               void* dst, size_t maxOriginalSize,
22959                         const void* src, size_t compressedSize);
22960
22961 /* *************************************
22962 *  Streaming functions
22963 ***************************************/
22964 size_t ZSTDv01_resetDCtx(ZSTDv01_Dctx* dctx);
22965
22966 size_t ZSTDv01_nextSrcSizeToDecompress(ZSTDv01_Dctx* dctx);
22967 size_t ZSTDv01_decompressContinue(ZSTDv01_Dctx* dctx, void* dst, size_t maxDstSize, const void* src, size_t srcSize);
22968 /**
22969   Use above functions alternatively.
22970   ZSTD_nextSrcSizeToDecompress() tells how much bytes to provide as 'srcSize' to ZSTD_decompressContinue().
22971   ZSTD_decompressContinue() will use previous data blocks to improve compression if they are located prior to current block.
22972   Result is the number of bytes regenerated within 'dst'.
22973   It can be zero, which is not an error; it just means ZSTD_decompressContinue() has decoded some header.
22974 */
22975
22976 /* *************************************
22977 *  Prefix - version detection
22978 ***************************************/
22979 #define ZSTDv01_magicNumber   0xFD2FB51E   /* Big Endian version */
22980 #define ZSTDv01_magicNumberLE 0x1EB52FFD   /* Little Endian version */
22981
22982
22983 #if defined (__cplusplus)
22984 }
22985 #endif
22986
22987 #endif /* ZSTD_V01_H_28739879432 */
22988 /**** ended inlining zstd_v01.h ****/
22989 #endif
22990 #if (ZSTD_LEGACY_SUPPORT <= 2)
22991 /**** start inlining zstd_v02.h ****/
22992 /*
22993  * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
22994  * All rights reserved.
22995  *
22996  * This source code is licensed under both the BSD-style license (found in the
22997  * LICENSE file in the root directory of this source tree) and the GPLv2 (found
22998  * in the COPYING file in the root directory of this source tree).
22999  * You may select, at your option, one of the above-listed licenses.
23000  */
23001
23002 #ifndef ZSTD_V02_H_4174539423
23003 #define ZSTD_V02_H_4174539423
23004
23005 #if defined (__cplusplus)
23006 extern "C" {
23007 #endif
23008
23009 /* *************************************
23010 *  Includes
23011 ***************************************/
23012 #include <stddef.h>   /* size_t */
23013
23014
23015 /* *************************************
23016 *  Simple one-step function
23017 ***************************************/
23018 /**
23019 ZSTDv02_decompress() : decompress ZSTD frames compliant with v0.2.x format
23020     compressedSize : is the exact source size
23021     maxOriginalSize : is the size of the 'dst' buffer, which must be already allocated.
23022                       It must be equal or larger than originalSize, otherwise decompression will fail.
23023     return : the number of bytes decompressed into destination buffer (originalSize)
23024              or an errorCode if it fails (which can be tested using ZSTDv01_isError())
23025 */
23026 size_t ZSTDv02_decompress( void* dst, size_t maxOriginalSize,
23027                      const void* src, size_t compressedSize);
23028
23029  /**
23030  ZSTDv02_findFrameSizeInfoLegacy() : get the source length and decompressed bound of a ZSTD frame compliant with v0.2.x format
23031      srcSize : The size of the 'src' buffer, at least as large as the frame pointed to by 'src'
23032      cSize (output parameter)  : the number of bytes that would be read to decompress this frame
23033                                  or an error code if it fails (which can be tested using ZSTDv01_isError())
23034      dBound (output parameter) : an upper-bound for the decompressed size of the data in the frame
23035                                  or ZSTD_CONTENTSIZE_ERROR if an error occurs
23036
23037     note : assumes `cSize` and `dBound` are _not_ NULL.
23038  */
23039 void ZSTDv02_findFrameSizeInfoLegacy(const void *src, size_t srcSize,
23040                                      size_t* cSize, unsigned long long* dBound);
23041
23042 /**
23043 ZSTDv02_isError() : tells if the result of ZSTDv02_decompress() is an error
23044 */
23045 unsigned ZSTDv02_isError(size_t code);
23046
23047
23048 /* *************************************
23049 *  Advanced functions
23050 ***************************************/
23051 typedef struct ZSTDv02_Dctx_s ZSTDv02_Dctx;
23052 ZSTDv02_Dctx* ZSTDv02_createDCtx(void);
23053 size_t ZSTDv02_freeDCtx(ZSTDv02_Dctx* dctx);
23054
23055 size_t ZSTDv02_decompressDCtx(void* ctx,
23056                               void* dst, size_t maxOriginalSize,
23057                         const void* src, size_t compressedSize);
23058
23059 /* *************************************
23060 *  Streaming functions
23061 ***************************************/
23062 size_t ZSTDv02_resetDCtx(ZSTDv02_Dctx* dctx);
23063
23064 size_t ZSTDv02_nextSrcSizeToDecompress(ZSTDv02_Dctx* dctx);
23065 size_t ZSTDv02_decompressContinue(ZSTDv02_Dctx* dctx, void* dst, size_t maxDstSize, const void* src, size_t srcSize);
23066 /**
23067   Use above functions alternatively.
23068   ZSTD_nextSrcSizeToDecompress() tells how much bytes to provide as 'srcSize' to ZSTD_decompressContinue().
23069   ZSTD_decompressContinue() will use previous data blocks to improve compression if they are located prior to current block.
23070   Result is the number of bytes regenerated within 'dst'.
23071   It can be zero, which is not an error; it just means ZSTD_decompressContinue() has decoded some header.
23072 */
23073
23074 /* *************************************
23075 *  Prefix - version detection
23076 ***************************************/
23077 #define ZSTDv02_magicNumber 0xFD2FB522   /* v0.2 */
23078
23079
23080 #if defined (__cplusplus)
23081 }
23082 #endif
23083
23084 #endif /* ZSTD_V02_H_4174539423 */
23085 /**** ended inlining zstd_v02.h ****/
23086 #endif
23087 #if (ZSTD_LEGACY_SUPPORT <= 3)
23088 /**** start inlining zstd_v03.h ****/
23089 /*
23090  * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
23091  * All rights reserved.
23092  *
23093  * This source code is licensed under both the BSD-style license (found in the
23094  * LICENSE file in the root directory of this source tree) and the GPLv2 (found
23095  * in the COPYING file in the root directory of this source tree).
23096  * You may select, at your option, one of the above-listed licenses.
23097  */
23098
23099 #ifndef ZSTD_V03_H_298734209782
23100 #define ZSTD_V03_H_298734209782
23101
23102 #if defined (__cplusplus)
23103 extern "C" {
23104 #endif
23105
23106 /* *************************************
23107 *  Includes
23108 ***************************************/
23109 #include <stddef.h>   /* size_t */
23110
23111
23112 /* *************************************
23113 *  Simple one-step function
23114 ***************************************/
23115 /**
23116 ZSTDv03_decompress() : decompress ZSTD frames compliant with v0.3.x format
23117     compressedSize : is the exact source size
23118     maxOriginalSize : is the size of the 'dst' buffer, which must be already allocated.
23119                       It must be equal or larger than originalSize, otherwise decompression will fail.
23120     return : the number of bytes decompressed into destination buffer (originalSize)
23121              or an errorCode if it fails (which can be tested using ZSTDv01_isError())
23122 */
23123 size_t ZSTDv03_decompress( void* dst, size_t maxOriginalSize,
23124                      const void* src, size_t compressedSize);
23125
23126  /**
23127  ZSTDv03_findFrameSizeInfoLegacy() : get the source length and decompressed bound of a ZSTD frame compliant with v0.3.x format
23128      srcSize : The size of the 'src' buffer, at least as large as the frame pointed to by 'src'
23129      cSize (output parameter)  : the number of bytes that would be read to decompress this frame
23130                                  or an error code if it fails (which can be tested using ZSTDv01_isError())
23131      dBound (output parameter) : an upper-bound for the decompressed size of the data in the frame
23132                                  or ZSTD_CONTENTSIZE_ERROR if an error occurs
23133
23134     note : assumes `cSize` and `dBound` are _not_ NULL.
23135  */
23136  void ZSTDv03_findFrameSizeInfoLegacy(const void *src, size_t srcSize,
23137                                       size_t* cSize, unsigned long long* dBound);
23138
23139     /**
23140 ZSTDv03_isError() : tells if the result of ZSTDv03_decompress() is an error
23141 */
23142 unsigned ZSTDv03_isError(size_t code);
23143
23144
23145 /* *************************************
23146 *  Advanced functions
23147 ***************************************/
23148 typedef struct ZSTDv03_Dctx_s ZSTDv03_Dctx;
23149 ZSTDv03_Dctx* ZSTDv03_createDCtx(void);
23150 size_t ZSTDv03_freeDCtx(ZSTDv03_Dctx* dctx);
23151
23152 size_t ZSTDv03_decompressDCtx(void* ctx,
23153                               void* dst, size_t maxOriginalSize,
23154                         const void* src, size_t compressedSize);
23155
23156 /* *************************************
23157 *  Streaming functions
23158 ***************************************/
23159 size_t ZSTDv03_resetDCtx(ZSTDv03_Dctx* dctx);
23160
23161 size_t ZSTDv03_nextSrcSizeToDecompress(ZSTDv03_Dctx* dctx);
23162 size_t ZSTDv03_decompressContinue(ZSTDv03_Dctx* dctx, void* dst, size_t maxDstSize, const void* src, size_t srcSize);
23163 /**
23164   Use above functions alternatively.
23165   ZSTD_nextSrcSizeToDecompress() tells how much bytes to provide as 'srcSize' to ZSTD_decompressContinue().
23166   ZSTD_decompressContinue() will use previous data blocks to improve compression if they are located prior to current block.
23167   Result is the number of bytes regenerated within 'dst'.
23168   It can be zero, which is not an error; it just means ZSTD_decompressContinue() has decoded some header.
23169 */
23170
23171 /* *************************************
23172 *  Prefix - version detection
23173 ***************************************/
23174 #define ZSTDv03_magicNumber 0xFD2FB523   /* v0.3 */
23175
23176
23177 #if defined (__cplusplus)
23178 }
23179 #endif
23180
23181 #endif /* ZSTD_V03_H_298734209782 */
23182 /**** ended inlining zstd_v03.h ****/
23183 #endif
23184 #if (ZSTD_LEGACY_SUPPORT <= 4)
23185 /**** start inlining zstd_v04.h ****/
23186 /*
23187  * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
23188  * All rights reserved.
23189  *
23190  * This source code is licensed under both the BSD-style license (found in the
23191  * LICENSE file in the root directory of this source tree) and the GPLv2 (found
23192  * in the COPYING file in the root directory of this source tree).
23193  * You may select, at your option, one of the above-listed licenses.
23194  */
23195
23196 #ifndef ZSTD_V04_H_91868324769238
23197 #define ZSTD_V04_H_91868324769238
23198
23199 #if defined (__cplusplus)
23200 extern "C" {
23201 #endif
23202
23203 /* *************************************
23204 *  Includes
23205 ***************************************/
23206 #include <stddef.h>   /* size_t */
23207
23208
23209 /* *************************************
23210 *  Simple one-step function
23211 ***************************************/
23212 /**
23213 ZSTDv04_decompress() : decompress ZSTD frames compliant with v0.4.x format
23214     compressedSize : is the exact source size
23215     maxOriginalSize : is the size of the 'dst' buffer, which must be already allocated.
23216                       It must be equal or larger than originalSize, otherwise decompression will fail.
23217     return : the number of bytes decompressed into destination buffer (originalSize)
23218              or an errorCode if it fails (which can be tested using ZSTDv01_isError())
23219 */
23220 size_t ZSTDv04_decompress( void* dst, size_t maxOriginalSize,
23221                      const void* src, size_t compressedSize);
23222
23223  /**
23224  ZSTDv04_findFrameSizeInfoLegacy() : get the source length and decompressed bound of a ZSTD frame compliant with v0.4.x format
23225      srcSize : The size of the 'src' buffer, at least as large as the frame pointed to by 'src'
23226      cSize (output parameter)  : the number of bytes that would be read to decompress this frame
23227                                  or an error code if it fails (which can be tested using ZSTDv01_isError())
23228      dBound (output parameter) : an upper-bound for the decompressed size of the data in the frame
23229                                  or ZSTD_CONTENTSIZE_ERROR if an error occurs
23230
23231     note : assumes `cSize` and `dBound` are _not_ NULL.
23232  */
23233  void ZSTDv04_findFrameSizeInfoLegacy(const void *src, size_t srcSize,
23234                                       size_t* cSize, unsigned long long* dBound);
23235
23236 /**
23237 ZSTDv04_isError() : tells if the result of ZSTDv04_decompress() is an error
23238 */
23239 unsigned ZSTDv04_isError(size_t code);
23240
23241
23242 /* *************************************
23243 *  Advanced functions
23244 ***************************************/
23245 typedef struct ZSTDv04_Dctx_s ZSTDv04_Dctx;
23246 ZSTDv04_Dctx* ZSTDv04_createDCtx(void);
23247 size_t ZSTDv04_freeDCtx(ZSTDv04_Dctx* dctx);
23248
23249 size_t ZSTDv04_decompressDCtx(ZSTDv04_Dctx* dctx,
23250                               void* dst, size_t maxOriginalSize,
23251                         const void* src, size_t compressedSize);
23252
23253
23254 /* *************************************
23255 *  Direct Streaming
23256 ***************************************/
23257 size_t ZSTDv04_resetDCtx(ZSTDv04_Dctx* dctx);
23258
23259 size_t ZSTDv04_nextSrcSizeToDecompress(ZSTDv04_Dctx* dctx);
23260 size_t ZSTDv04_decompressContinue(ZSTDv04_Dctx* dctx, void* dst, size_t maxDstSize, const void* src, size_t srcSize);
23261 /**
23262   Use above functions alternatively.
23263   ZSTD_nextSrcSizeToDecompress() tells how much bytes to provide as 'srcSize' to ZSTD_decompressContinue().
23264   ZSTD_decompressContinue() will use previous data blocks to improve compression if they are located prior to current block.
23265   Result is the number of bytes regenerated within 'dst'.
23266   It can be zero, which is not an error; it just means ZSTD_decompressContinue() has decoded some header.
23267 */
23268
23269
23270 /* *************************************
23271 *  Buffered Streaming
23272 ***************************************/
23273 typedef struct ZBUFFv04_DCtx_s ZBUFFv04_DCtx;
23274 ZBUFFv04_DCtx* ZBUFFv04_createDCtx(void);
23275 size_t         ZBUFFv04_freeDCtx(ZBUFFv04_DCtx* dctx);
23276
23277 size_t ZBUFFv04_decompressInit(ZBUFFv04_DCtx* dctx);
23278 size_t ZBUFFv04_decompressWithDictionary(ZBUFFv04_DCtx* dctx, const void* dict, size_t dictSize);
23279
23280 size_t ZBUFFv04_decompressContinue(ZBUFFv04_DCtx* dctx, void* dst, size_t* maxDstSizePtr, const void* src, size_t* srcSizePtr);
23281
23282 /** ************************************************
23283 *  Streaming decompression
23284 *
23285 *  A ZBUFF_DCtx object is required to track streaming operation.
23286 *  Use ZBUFF_createDCtx() and ZBUFF_freeDCtx() to create/release resources.
23287 *  Use ZBUFF_decompressInit() to start a new decompression operation.
23288 *  ZBUFF_DCtx objects can be reused multiple times.
23289 *
23290 *  Optionally, a reference to a static dictionary can be set, using ZBUFF_decompressWithDictionary()
23291 *  It must be the same content as the one set during compression phase.
23292 *  Dictionary content must remain accessible during the decompression process.
23293 *
23294 *  Use ZBUFF_decompressContinue() repetitively to consume your input.
23295 *  *srcSizePtr and *maxDstSizePtr can be any size.
23296 *  The function will report how many bytes were read or written by modifying *srcSizePtr and *maxDstSizePtr.
23297 *  Note that it may not consume the entire input, in which case it's up to the caller to present remaining input again.
23298 *  The content of dst will be overwritten (up to *maxDstSizePtr) at each function call, so save its content if it matters or change dst.
23299 *  @return : a hint to preferred nb of bytes to use as input for next function call (it's only a hint, to improve latency)
23300 *            or 0 when a frame is completely decoded
23301 *            or an error code, which can be tested using ZBUFF_isError().
23302 *
23303 *  Hint : recommended buffer sizes (not compulsory) : ZBUFF_recommendedDInSize / ZBUFF_recommendedDOutSize
23304 *  output : ZBUFF_recommendedDOutSize==128 KB block size is the internal unit, it ensures it's always possible to write a full block when it's decoded.
23305 *  input : ZBUFF_recommendedDInSize==128Kb+3; just follow indications from ZBUFF_decompressContinue() to minimize latency. It should always be <= 128 KB + 3 .
23306 * **************************************************/
23307 unsigned ZBUFFv04_isError(size_t errorCode);
23308 const char* ZBUFFv04_getErrorName(size_t errorCode);
23309
23310
23311 /** The below functions provide recommended buffer sizes for Compression or Decompression operations.
23312 *   These sizes are not compulsory, they just tend to offer better latency */
23313 size_t ZBUFFv04_recommendedDInSize(void);
23314 size_t ZBUFFv04_recommendedDOutSize(void);
23315
23316
23317 /* *************************************
23318 *  Prefix - version detection
23319 ***************************************/
23320 #define ZSTDv04_magicNumber 0xFD2FB524   /* v0.4 */
23321
23322
23323 #if defined (__cplusplus)
23324 }
23325 #endif
23326
23327 #endif /* ZSTD_V04_H_91868324769238 */
23328 /**** ended inlining zstd_v04.h ****/
23329 #endif
23330 #if (ZSTD_LEGACY_SUPPORT <= 5)
23331 /**** start inlining zstd_v05.h ****/
23332 /*
23333  * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
23334  * All rights reserved.
23335  *
23336  * This source code is licensed under both the BSD-style license (found in the
23337  * LICENSE file in the root directory of this source tree) and the GPLv2 (found
23338  * in the COPYING file in the root directory of this source tree).
23339  * You may select, at your option, one of the above-listed licenses.
23340  */
23341
23342 #ifndef ZSTDv05_H
23343 #define ZSTDv05_H
23344
23345 #if defined (__cplusplus)
23346 extern "C" {
23347 #endif
23348
23349 /*-*************************************
23350 *  Dependencies
23351 ***************************************/
23352 #include <stddef.h>   /* size_t */
23353 /**** skipping file: ../common/mem.h ****/
23354
23355
23356 /* *************************************
23357 *  Simple functions
23358 ***************************************/
23359 /*! ZSTDv05_decompress() :
23360     `compressedSize` : is the _exact_ size of the compressed blob, otherwise decompression will fail.
23361     `dstCapacity` must be large enough, equal or larger than originalSize.
23362     @return : the number of bytes decompressed into `dst` (<= `dstCapacity`),
23363               or an errorCode if it fails (which can be tested using ZSTDv05_isError()) */
23364 size_t ZSTDv05_decompress( void* dst, size_t dstCapacity,
23365                      const void* src, size_t compressedSize);
23366
23367  /**
23368  ZSTDv05_findFrameSizeInfoLegacy() : get the source length and decompressed bound of a ZSTD frame compliant with v0.5.x format
23369      srcSize : The size of the 'src' buffer, at least as large as the frame pointed to by 'src'
23370      cSize (output parameter)  : the number of bytes that would be read to decompress this frame
23371                                  or an error code if it fails (which can be tested using ZSTDv01_isError())
23372      dBound (output parameter) : an upper-bound for the decompressed size of the data in the frame
23373                                  or ZSTD_CONTENTSIZE_ERROR if an error occurs
23374
23375     note : assumes `cSize` and `dBound` are _not_ NULL.
23376  */
23377 void ZSTDv05_findFrameSizeInfoLegacy(const void *src, size_t srcSize,
23378                                      size_t* cSize, unsigned long long* dBound);
23379
23380 /* *************************************
23381 *  Helper functions
23382 ***************************************/
23383 /* Error Management */
23384 unsigned    ZSTDv05_isError(size_t code);          /*!< tells if a `size_t` function result is an error code */
23385 const char* ZSTDv05_getErrorName(size_t code);     /*!< provides readable string for an error code */
23386
23387
23388 /* *************************************
23389 *  Explicit memory management
23390 ***************************************/
23391 /** Decompression context */
23392 typedef struct ZSTDv05_DCtx_s ZSTDv05_DCtx;
23393 ZSTDv05_DCtx* ZSTDv05_createDCtx(void);
23394 size_t ZSTDv05_freeDCtx(ZSTDv05_DCtx* dctx);      /*!< @return : errorCode */
23395
23396 /** ZSTDv05_decompressDCtx() :
23397 *   Same as ZSTDv05_decompress(), but requires an already allocated ZSTDv05_DCtx (see ZSTDv05_createDCtx()) */
23398 size_t ZSTDv05_decompressDCtx(ZSTDv05_DCtx* ctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize);
23399
23400
23401 /*-***********************
23402 *  Simple Dictionary API
23403 *************************/
23404 /*! ZSTDv05_decompress_usingDict() :
23405 *   Decompression using a pre-defined Dictionary content (see dictBuilder).
23406 *   Dictionary must be identical to the one used during compression, otherwise regenerated data will be corrupted.
23407 *   Note : dict can be NULL, in which case, it's equivalent to ZSTDv05_decompressDCtx() */
23408 size_t ZSTDv05_decompress_usingDict(ZSTDv05_DCtx* dctx,
23409                                             void* dst, size_t dstCapacity,
23410                                       const void* src, size_t srcSize,
23411                                       const void* dict,size_t dictSize);
23412
23413 /*-************************
23414 *  Advanced Streaming API
23415 ***************************/
23416 typedef enum { ZSTDv05_fast, ZSTDv05_greedy, ZSTDv05_lazy, ZSTDv05_lazy2, ZSTDv05_btlazy2, ZSTDv05_opt, ZSTDv05_btopt } ZSTDv05_strategy;
23417 typedef struct {
23418     U64 srcSize;
23419     U32 windowLog;     /* the only useful information to retrieve */
23420     U32 contentLog; U32 hashLog; U32 searchLog; U32 searchLength; U32 targetLength; ZSTDv05_strategy strategy;
23421 } ZSTDv05_parameters;
23422 size_t ZSTDv05_getFrameParams(ZSTDv05_parameters* params, const void* src, size_t srcSize);
23423
23424 size_t ZSTDv05_decompressBegin_usingDict(ZSTDv05_DCtx* dctx, const void* dict, size_t dictSize);
23425 void   ZSTDv05_copyDCtx(ZSTDv05_DCtx* dstDCtx, const ZSTDv05_DCtx* srcDCtx);
23426 size_t ZSTDv05_nextSrcSizeToDecompress(ZSTDv05_DCtx* dctx);
23427 size_t ZSTDv05_decompressContinue(ZSTDv05_DCtx* dctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize);
23428
23429
23430 /*-***********************
23431 *  ZBUFF API
23432 *************************/
23433 typedef struct ZBUFFv05_DCtx_s ZBUFFv05_DCtx;
23434 ZBUFFv05_DCtx* ZBUFFv05_createDCtx(void);
23435 size_t         ZBUFFv05_freeDCtx(ZBUFFv05_DCtx* dctx);
23436
23437 size_t ZBUFFv05_decompressInit(ZBUFFv05_DCtx* dctx);
23438 size_t ZBUFFv05_decompressInitDictionary(ZBUFFv05_DCtx* dctx, const void* dict, size_t dictSize);
23439
23440 size_t ZBUFFv05_decompressContinue(ZBUFFv05_DCtx* dctx,
23441                                             void* dst, size_t* dstCapacityPtr,
23442                                       const void* src, size_t* srcSizePtr);
23443
23444 /*-***************************************************************************
23445 *  Streaming decompression
23446 *
23447 *  A ZBUFFv05_DCtx object is required to track streaming operations.
23448 *  Use ZBUFFv05_createDCtx() and ZBUFFv05_freeDCtx() to create/release resources.
23449 *  Use ZBUFFv05_decompressInit() to start a new decompression operation,
23450 *   or ZBUFFv05_decompressInitDictionary() if decompression requires a dictionary.
23451 *  Note that ZBUFFv05_DCtx objects can be reused multiple times.
23452 *
23453 *  Use ZBUFFv05_decompressContinue() repetitively to consume your input.
23454 *  *srcSizePtr and *dstCapacityPtr can be any size.
23455 *  The function will report how many bytes were read or written by modifying *srcSizePtr and *dstCapacityPtr.
23456 *  Note that it may not consume the entire input, in which case it's up to the caller to present remaining input again.
23457 *  The content of @dst will be overwritten (up to *dstCapacityPtr) at each function call, so save its content if it matters or change @dst.
23458 *  @return : a hint to preferred nb of bytes to use as input for next function call (it's only a hint, to help latency)
23459 *            or 0 when a frame is completely decoded
23460 *            or an error code, which can be tested using ZBUFFv05_isError().
23461 *
23462 *  Hint : recommended buffer sizes (not compulsory) : ZBUFFv05_recommendedDInSize() / ZBUFFv05_recommendedDOutSize()
23463 *  output : ZBUFFv05_recommendedDOutSize==128 KB block size is the internal unit, it ensures it's always possible to write a full block when decoded.
23464 *  input  : ZBUFFv05_recommendedDInSize==128Kb+3; just follow indications from ZBUFFv05_decompressContinue() to minimize latency. It should always be <= 128 KB + 3 .
23465 * *******************************************************************************/
23466
23467
23468 /* *************************************
23469 *  Tool functions
23470 ***************************************/
23471 unsigned ZBUFFv05_isError(size_t errorCode);
23472 const char* ZBUFFv05_getErrorName(size_t errorCode);
23473
23474 /** Functions below provide recommended buffer sizes for Compression or Decompression operations.
23475 *   These sizes are just hints, and tend to offer better latency */
23476 size_t ZBUFFv05_recommendedDInSize(void);
23477 size_t ZBUFFv05_recommendedDOutSize(void);
23478
23479
23480
23481 /*-*************************************
23482 *  Constants
23483 ***************************************/
23484 #define ZSTDv05_MAGICNUMBER 0xFD2FB525   /* v0.5 */
23485
23486
23487
23488
23489 #if defined (__cplusplus)
23490 }
23491 #endif
23492
23493 #endif  /* ZSTDv0505_H */
23494 /**** ended inlining zstd_v05.h ****/
23495 #endif
23496 #if (ZSTD_LEGACY_SUPPORT <= 6)
23497 /**** start inlining zstd_v06.h ****/
23498 /*
23499  * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
23500  * All rights reserved.
23501  *
23502  * This source code is licensed under both the BSD-style license (found in the
23503  * LICENSE file in the root directory of this source tree) and the GPLv2 (found
23504  * in the COPYING file in the root directory of this source tree).
23505  * You may select, at your option, one of the above-listed licenses.
23506  */
23507
23508 #ifndef ZSTDv06_H
23509 #define ZSTDv06_H
23510
23511 #if defined (__cplusplus)
23512 extern "C" {
23513 #endif
23514
23515 /*======  Dependency  ======*/
23516 #include <stddef.h>   /* size_t */
23517
23518
23519 /*======  Export for Windows  ======*/
23520 /*!
23521 *  ZSTDv06_DLL_EXPORT :
23522 *  Enable exporting of functions when building a Windows DLL
23523 */
23524 #if defined(_WIN32) && defined(ZSTDv06_DLL_EXPORT) && (ZSTDv06_DLL_EXPORT==1)
23525 #  define ZSTDLIBv06_API __declspec(dllexport)
23526 #else
23527 #  define ZSTDLIBv06_API
23528 #endif
23529
23530
23531 /* *************************************
23532 *  Simple functions
23533 ***************************************/
23534 /*! ZSTDv06_decompress() :
23535     `compressedSize` : is the _exact_ size of the compressed blob, otherwise decompression will fail.
23536     `dstCapacity` must be large enough, equal or larger than originalSize.
23537     @return : the number of bytes decompressed into `dst` (<= `dstCapacity`),
23538               or an errorCode if it fails (which can be tested using ZSTDv06_isError()) */
23539 ZSTDLIBv06_API size_t ZSTDv06_decompress( void* dst, size_t dstCapacity,
23540                                     const void* src, size_t compressedSize);
23541
23542 /**
23543 ZSTDv06_findFrameSizeInfoLegacy() : get the source length and decompressed bound of a ZSTD frame compliant with v0.6.x format
23544     srcSize : The size of the 'src' buffer, at least as large as the frame pointed to by 'src'
23545     cSize (output parameter)  : the number of bytes that would be read to decompress this frame
23546                                 or an error code if it fails (which can be tested using ZSTDv01_isError())
23547     dBound (output parameter) : an upper-bound for the decompressed size of the data in the frame
23548                                 or ZSTD_CONTENTSIZE_ERROR if an error occurs
23549
23550     note : assumes `cSize` and `dBound` are _not_ NULL.
23551 */
23552 void ZSTDv06_findFrameSizeInfoLegacy(const void *src, size_t srcSize,
23553                                      size_t* cSize, unsigned long long* dBound);
23554
23555 /* *************************************
23556 *  Helper functions
23557 ***************************************/
23558 ZSTDLIBv06_API size_t      ZSTDv06_compressBound(size_t srcSize); /*!< maximum compressed size (worst case scenario) */
23559
23560 /* Error Management */
23561 ZSTDLIBv06_API unsigned    ZSTDv06_isError(size_t code);          /*!< tells if a `size_t` function result is an error code */
23562 ZSTDLIBv06_API const char* ZSTDv06_getErrorName(size_t code);     /*!< provides readable string for an error code */
23563
23564
23565 /* *************************************
23566 *  Explicit memory management
23567 ***************************************/
23568 /** Decompression context */
23569 typedef struct ZSTDv06_DCtx_s ZSTDv06_DCtx;
23570 ZSTDLIBv06_API ZSTDv06_DCtx* ZSTDv06_createDCtx(void);
23571 ZSTDLIBv06_API size_t     ZSTDv06_freeDCtx(ZSTDv06_DCtx* dctx);      /*!< @return : errorCode */
23572
23573 /** ZSTDv06_decompressDCtx() :
23574 *   Same as ZSTDv06_decompress(), but requires an already allocated ZSTDv06_DCtx (see ZSTDv06_createDCtx()) */
23575 ZSTDLIBv06_API size_t ZSTDv06_decompressDCtx(ZSTDv06_DCtx* ctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize);
23576
23577
23578 /*-***********************
23579 *  Dictionary API
23580 *************************/
23581 /*! ZSTDv06_decompress_usingDict() :
23582 *   Decompression using a pre-defined Dictionary content (see dictBuilder).
23583 *   Dictionary must be identical to the one used during compression, otherwise regenerated data will be corrupted.
23584 *   Note : dict can be NULL, in which case, it's equivalent to ZSTDv06_decompressDCtx() */
23585 ZSTDLIBv06_API size_t ZSTDv06_decompress_usingDict(ZSTDv06_DCtx* dctx,
23586                                                    void* dst, size_t dstCapacity,
23587                                              const void* src, size_t srcSize,
23588                                              const void* dict,size_t dictSize);
23589
23590
23591 /*-************************
23592 *  Advanced Streaming API
23593 ***************************/
23594 struct ZSTDv06_frameParams_s { unsigned long long frameContentSize; unsigned windowLog; };
23595 typedef struct ZSTDv06_frameParams_s ZSTDv06_frameParams;
23596
23597 ZSTDLIBv06_API size_t ZSTDv06_getFrameParams(ZSTDv06_frameParams* fparamsPtr, const void* src, size_t srcSize);   /**< doesn't consume input */
23598 ZSTDLIBv06_API size_t ZSTDv06_decompressBegin_usingDict(ZSTDv06_DCtx* dctx, const void* dict, size_t dictSize);
23599 ZSTDLIBv06_API void   ZSTDv06_copyDCtx(ZSTDv06_DCtx* dctx, const ZSTDv06_DCtx* preparedDCtx);
23600
23601 ZSTDLIBv06_API size_t ZSTDv06_nextSrcSizeToDecompress(ZSTDv06_DCtx* dctx);
23602 ZSTDLIBv06_API size_t ZSTDv06_decompressContinue(ZSTDv06_DCtx* dctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize);
23603
23604
23605
23606 /* *************************************
23607 *  ZBUFF API
23608 ***************************************/
23609
23610 typedef struct ZBUFFv06_DCtx_s ZBUFFv06_DCtx;
23611 ZSTDLIBv06_API ZBUFFv06_DCtx* ZBUFFv06_createDCtx(void);
23612 ZSTDLIBv06_API size_t         ZBUFFv06_freeDCtx(ZBUFFv06_DCtx* dctx);
23613
23614 ZSTDLIBv06_API size_t ZBUFFv06_decompressInit(ZBUFFv06_DCtx* dctx);
23615 ZSTDLIBv06_API size_t ZBUFFv06_decompressInitDictionary(ZBUFFv06_DCtx* dctx, const void* dict, size_t dictSize);
23616
23617 ZSTDLIBv06_API size_t ZBUFFv06_decompressContinue(ZBUFFv06_DCtx* dctx,
23618                                                   void* dst, size_t* dstCapacityPtr,
23619                                             const void* src, size_t* srcSizePtr);
23620
23621 /*-***************************************************************************
23622 *  Streaming decompression howto
23623 *
23624 *  A ZBUFFv06_DCtx object is required to track streaming operations.
23625 *  Use ZBUFFv06_createDCtx() and ZBUFFv06_freeDCtx() to create/release resources.
23626 *  Use ZBUFFv06_decompressInit() to start a new decompression operation,
23627 *   or ZBUFFv06_decompressInitDictionary() if decompression requires a dictionary.
23628 *  Note that ZBUFFv06_DCtx objects can be re-init multiple times.
23629 *
23630 *  Use ZBUFFv06_decompressContinue() repetitively to consume your input.
23631 *  *srcSizePtr and *dstCapacityPtr can be any size.
23632 *  The function will report how many bytes were read or written by modifying *srcSizePtr and *dstCapacityPtr.
23633 *  Note that it may not consume the entire input, in which case it's up to the caller to present remaining input again.
23634 *  The content of `dst` will be overwritten (up to *dstCapacityPtr) at each function call, so save its content if it matters, or change `dst`.
23635 *  @return : a hint to preferred nb of bytes to use as input for next function call (it's only a hint, to help latency),
23636 *            or 0 when a frame is completely decoded,
23637 *            or an error code, which can be tested using ZBUFFv06_isError().
23638 *
23639 *  Hint : recommended buffer sizes (not compulsory) : ZBUFFv06_recommendedDInSize() and ZBUFFv06_recommendedDOutSize()
23640 *  output : ZBUFFv06_recommendedDOutSize== 128 KB block size is the internal unit, it ensures it's always possible to write a full block when decoded.
23641 *  input  : ZBUFFv06_recommendedDInSize == 128KB + 3;
23642 *           just follow indications from ZBUFFv06_decompressContinue() to minimize latency. It should always be <= 128 KB + 3 .
23643 * *******************************************************************************/
23644
23645
23646 /* *************************************
23647 *  Tool functions
23648 ***************************************/
23649 ZSTDLIBv06_API unsigned ZBUFFv06_isError(size_t errorCode);
23650 ZSTDLIBv06_API const char* ZBUFFv06_getErrorName(size_t errorCode);
23651
23652 /** Functions below provide recommended buffer sizes for Compression or Decompression operations.
23653 *   These sizes are just hints, they tend to offer better latency */
23654 ZSTDLIBv06_API size_t ZBUFFv06_recommendedDInSize(void);
23655 ZSTDLIBv06_API size_t ZBUFFv06_recommendedDOutSize(void);
23656
23657
23658 /*-*************************************
23659 *  Constants
23660 ***************************************/
23661 #define ZSTDv06_MAGICNUMBER 0xFD2FB526   /* v0.6 */
23662
23663
23664
23665 #if defined (__cplusplus)
23666 }
23667 #endif
23668
23669 #endif  /* ZSTDv06_BUFFERED_H */
23670 /**** ended inlining zstd_v06.h ****/
23671 #endif
23672 #if (ZSTD_LEGACY_SUPPORT <= 7)
23673 /**** start inlining zstd_v07.h ****/
23674 /*
23675  * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
23676  * All rights reserved.
23677  *
23678  * This source code is licensed under both the BSD-style license (found in the
23679  * LICENSE file in the root directory of this source tree) and the GPLv2 (found
23680  * in the COPYING file in the root directory of this source tree).
23681  * You may select, at your option, one of the above-listed licenses.
23682  */
23683
23684 #ifndef ZSTDv07_H_235446
23685 #define ZSTDv07_H_235446
23686
23687 #if defined (__cplusplus)
23688 extern "C" {
23689 #endif
23690
23691 /*======  Dependency  ======*/
23692 #include <stddef.h>   /* size_t */
23693
23694
23695 /*======  Export for Windows  ======*/
23696 /*!
23697 *  ZSTDv07_DLL_EXPORT :
23698 *  Enable exporting of functions when building a Windows DLL
23699 */
23700 #if defined(_WIN32) && defined(ZSTDv07_DLL_EXPORT) && (ZSTDv07_DLL_EXPORT==1)
23701 #  define ZSTDLIBv07_API __declspec(dllexport)
23702 #else
23703 #  define ZSTDLIBv07_API
23704 #endif
23705
23706
23707 /* *************************************
23708 *  Simple API
23709 ***************************************/
23710 /*! ZSTDv07_getDecompressedSize() :
23711 *   @return : decompressed size if known, 0 otherwise.
23712        note 1 : if `0`, follow up with ZSTDv07_getFrameParams() to know precise failure cause.
23713        note 2 : decompressed size could be wrong or intentionally modified !
23714                 always ensure results fit within application's authorized limits */
23715 unsigned long long ZSTDv07_getDecompressedSize(const void* src, size_t srcSize);
23716
23717 /*! ZSTDv07_decompress() :
23718     `compressedSize` : must be _exact_ size of compressed input, otherwise decompression will fail.
23719     `dstCapacity` must be equal or larger than originalSize.
23720     @return : the number of bytes decompressed into `dst` (<= `dstCapacity`),
23721               or an errorCode if it fails (which can be tested using ZSTDv07_isError()) */
23722 ZSTDLIBv07_API size_t ZSTDv07_decompress( void* dst, size_t dstCapacity,
23723                                     const void* src, size_t compressedSize);
23724
23725 /**
23726 ZSTDv07_findFrameSizeInfoLegacy() : get the source length and decompressed bound of a ZSTD frame compliant with v0.7.x format
23727     srcSize : The size of the 'src' buffer, at least as large as the frame pointed to by 'src'
23728     cSize (output parameter)  : the number of bytes that would be read to decompress this frame
23729                                 or an error code if it fails (which can be tested using ZSTDv01_isError())
23730     dBound (output parameter) : an upper-bound for the decompressed size of the data in the frame
23731                                 or ZSTD_CONTENTSIZE_ERROR if an error occurs
23732
23733     note : assumes `cSize` and `dBound` are _not_ NULL.
23734 */
23735 void ZSTDv07_findFrameSizeInfoLegacy(const void *src, size_t srcSize,
23736                                      size_t* cSize, unsigned long long* dBound);
23737
23738 /*======  Helper functions  ======*/
23739 ZSTDLIBv07_API unsigned    ZSTDv07_isError(size_t code);          /*!< tells if a `size_t` function result is an error code */
23740 ZSTDLIBv07_API const char* ZSTDv07_getErrorName(size_t code);     /*!< provides readable string from an error code */
23741
23742
23743 /*-*************************************
23744 *  Explicit memory management
23745 ***************************************/
23746 /** Decompression context */
23747 typedef struct ZSTDv07_DCtx_s ZSTDv07_DCtx;
23748 ZSTDLIBv07_API ZSTDv07_DCtx* ZSTDv07_createDCtx(void);
23749 ZSTDLIBv07_API size_t     ZSTDv07_freeDCtx(ZSTDv07_DCtx* dctx);      /*!< @return : errorCode */
23750
23751 /** ZSTDv07_decompressDCtx() :
23752 *   Same as ZSTDv07_decompress(), requires an allocated ZSTDv07_DCtx (see ZSTDv07_createDCtx()) */
23753 ZSTDLIBv07_API size_t ZSTDv07_decompressDCtx(ZSTDv07_DCtx* ctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize);
23754
23755
23756 /*-************************
23757 *  Simple dictionary API
23758 ***************************/
23759 /*! ZSTDv07_decompress_usingDict() :
23760 *   Decompression using a pre-defined Dictionary content (see dictBuilder).
23761 *   Dictionary must be identical to the one used during compression.
23762 *   Note : This function load the dictionary, resulting in a significant startup time */
23763 ZSTDLIBv07_API size_t ZSTDv07_decompress_usingDict(ZSTDv07_DCtx* dctx,
23764                                                    void* dst, size_t dstCapacity,
23765                                              const void* src, size_t srcSize,
23766                                              const void* dict,size_t dictSize);
23767
23768
23769 /*-**************************
23770 *  Advanced Dictionary API
23771 ****************************/
23772 /*! ZSTDv07_createDDict() :
23773 *   Create a digested dictionary, ready to start decompression operation without startup delay.
23774 *   `dict` can be released after creation */
23775 typedef struct ZSTDv07_DDict_s ZSTDv07_DDict;
23776 ZSTDLIBv07_API ZSTDv07_DDict* ZSTDv07_createDDict(const void* dict, size_t dictSize);
23777 ZSTDLIBv07_API size_t      ZSTDv07_freeDDict(ZSTDv07_DDict* ddict);
23778
23779 /*! ZSTDv07_decompress_usingDDict() :
23780 *   Decompression using a pre-digested Dictionary
23781 *   Faster startup than ZSTDv07_decompress_usingDict(), recommended when same dictionary is used multiple times. */
23782 ZSTDLIBv07_API size_t ZSTDv07_decompress_usingDDict(ZSTDv07_DCtx* dctx,
23783                                                     void* dst, size_t dstCapacity,
23784                                               const void* src, size_t srcSize,
23785                                               const ZSTDv07_DDict* ddict);
23786
23787 typedef struct {
23788     unsigned long long frameContentSize;
23789     unsigned windowSize;
23790     unsigned dictID;
23791     unsigned checksumFlag;
23792 } ZSTDv07_frameParams;
23793
23794 ZSTDLIBv07_API size_t ZSTDv07_getFrameParams(ZSTDv07_frameParams* fparamsPtr, const void* src, size_t srcSize);   /**< doesn't consume input */
23795
23796
23797
23798
23799 /* *************************************
23800 *  Streaming functions
23801 ***************************************/
23802 typedef struct ZBUFFv07_DCtx_s ZBUFFv07_DCtx;
23803 ZSTDLIBv07_API ZBUFFv07_DCtx* ZBUFFv07_createDCtx(void);
23804 ZSTDLIBv07_API size_t      ZBUFFv07_freeDCtx(ZBUFFv07_DCtx* dctx);
23805
23806 ZSTDLIBv07_API size_t ZBUFFv07_decompressInit(ZBUFFv07_DCtx* dctx);
23807 ZSTDLIBv07_API size_t ZBUFFv07_decompressInitDictionary(ZBUFFv07_DCtx* dctx, const void* dict, size_t dictSize);
23808
23809 ZSTDLIBv07_API size_t ZBUFFv07_decompressContinue(ZBUFFv07_DCtx* dctx,
23810                                             void* dst, size_t* dstCapacityPtr,
23811                                       const void* src, size_t* srcSizePtr);
23812
23813 /*-***************************************************************************
23814 *  Streaming decompression howto
23815 *
23816 *  A ZBUFFv07_DCtx object is required to track streaming operations.
23817 *  Use ZBUFFv07_createDCtx() and ZBUFFv07_freeDCtx() to create/release resources.
23818 *  Use ZBUFFv07_decompressInit() to start a new decompression operation,
23819 *   or ZBUFFv07_decompressInitDictionary() if decompression requires a dictionary.
23820 *  Note that ZBUFFv07_DCtx objects can be re-init multiple times.
23821 *
23822 *  Use ZBUFFv07_decompressContinue() repetitively to consume your input.
23823 *  *srcSizePtr and *dstCapacityPtr can be any size.
23824 *  The function will report how many bytes were read or written by modifying *srcSizePtr and *dstCapacityPtr.
23825 *  Note that it may not consume the entire input, in which case it's up to the caller to present remaining input again.
23826 *  The content of `dst` will be overwritten (up to *dstCapacityPtr) at each function call, so save its content if it matters, or change `dst`.
23827 *  @return : a hint to preferred nb of bytes to use as input for next function call (it's only a hint, to help latency),
23828 *            or 0 when a frame is completely decoded,
23829 *            or an error code, which can be tested using ZBUFFv07_isError().
23830 *
23831 *  Hint : recommended buffer sizes (not compulsory) : ZBUFFv07_recommendedDInSize() and ZBUFFv07_recommendedDOutSize()
23832 *  output : ZBUFFv07_recommendedDOutSize== 128 KB block size is the internal unit, it ensures it's always possible to write a full block when decoded.
23833 *  input  : ZBUFFv07_recommendedDInSize == 128KB + 3;
23834 *           just follow indications from ZBUFFv07_decompressContinue() to minimize latency. It should always be <= 128 KB + 3 .
23835 * *******************************************************************************/
23836
23837
23838 /* *************************************
23839 *  Tool functions
23840 ***************************************/
23841 ZSTDLIBv07_API unsigned ZBUFFv07_isError(size_t errorCode);
23842 ZSTDLIBv07_API const char* ZBUFFv07_getErrorName(size_t errorCode);
23843
23844 /** Functions below provide recommended buffer sizes for Compression or Decompression operations.
23845 *   These sizes are just hints, they tend to offer better latency */
23846 ZSTDLIBv07_API size_t ZBUFFv07_recommendedDInSize(void);
23847 ZSTDLIBv07_API size_t ZBUFFv07_recommendedDOutSize(void);
23848
23849
23850 /*-*************************************
23851 *  Constants
23852 ***************************************/
23853 #define ZSTDv07_MAGICNUMBER            0xFD2FB527   /* v0.7 */
23854
23855
23856 #if defined (__cplusplus)
23857 }
23858 #endif
23859
23860 #endif  /* ZSTDv07_H_235446 */
23861 /**** ended inlining zstd_v07.h ****/
23862 #endif
23863
23864 /** ZSTD_isLegacy() :
23865     @return : > 0 if supported by legacy decoder. 0 otherwise.
23866               return value is the version.
23867 */
23868 MEM_STATIC unsigned ZSTD_isLegacy(const void* src, size_t srcSize)
23869 {
23870     U32 magicNumberLE;
23871     if (srcSize<4) return 0;
23872     magicNumberLE = MEM_readLE32(src);
23873     switch(magicNumberLE)
23874     {
23875 #if (ZSTD_LEGACY_SUPPORT <= 1)
23876         case ZSTDv01_magicNumberLE:return 1;
23877 #endif
23878 #if (ZSTD_LEGACY_SUPPORT <= 2)
23879         case ZSTDv02_magicNumber : return 2;
23880 #endif
23881 #if (ZSTD_LEGACY_SUPPORT <= 3)
23882         case ZSTDv03_magicNumber : return 3;
23883 #endif
23884 #if (ZSTD_LEGACY_SUPPORT <= 4)
23885         case ZSTDv04_magicNumber : return 4;
23886 #endif
23887 #if (ZSTD_LEGACY_SUPPORT <= 5)
23888         case ZSTDv05_MAGICNUMBER : return 5;
23889 #endif
23890 #if (ZSTD_LEGACY_SUPPORT <= 6)
23891         case ZSTDv06_MAGICNUMBER : return 6;
23892 #endif
23893 #if (ZSTD_LEGACY_SUPPORT <= 7)
23894         case ZSTDv07_MAGICNUMBER : return 7;
23895 #endif
23896         default : return 0;
23897     }
23898 }
23899
23900
23901 MEM_STATIC unsigned long long ZSTD_getDecompressedSize_legacy(const void* src, size_t srcSize)
23902 {
23903     U32 const version = ZSTD_isLegacy(src, srcSize);
23904     if (version < 5) return 0;  /* no decompressed size in frame header, or not a legacy format */
23905 #if (ZSTD_LEGACY_SUPPORT <= 5)
23906     if (version==5) {
23907         ZSTDv05_parameters fParams;
23908         size_t const frResult = ZSTDv05_getFrameParams(&fParams, src, srcSize);
23909         if (frResult != 0) return 0;
23910         return fParams.srcSize;
23911     }
23912 #endif
23913 #if (ZSTD_LEGACY_SUPPORT <= 6)
23914     if (version==6) {
23915         ZSTDv06_frameParams fParams;
23916         size_t const frResult = ZSTDv06_getFrameParams(&fParams, src, srcSize);
23917         if (frResult != 0) return 0;
23918         return fParams.frameContentSize;
23919     }
23920 #endif
23921 #if (ZSTD_LEGACY_SUPPORT <= 7)
23922     if (version==7) {
23923         ZSTDv07_frameParams fParams;
23924         size_t const frResult = ZSTDv07_getFrameParams(&fParams, src, srcSize);
23925         if (frResult != 0) return 0;
23926         return fParams.frameContentSize;
23927     }
23928 #endif
23929     return 0;   /* should not be possible */
23930 }
23931
23932
23933 MEM_STATIC size_t ZSTD_decompressLegacy(
23934                      void* dst, size_t dstCapacity,
23935                const void* src, size_t compressedSize,
23936                const void* dict,size_t dictSize)
23937 {
23938     U32 const version = ZSTD_isLegacy(src, compressedSize);
23939     (void)dst; (void)dstCapacity; (void)dict; (void)dictSize;  /* unused when ZSTD_LEGACY_SUPPORT >= 8 */
23940     switch(version)
23941     {
23942 #if (ZSTD_LEGACY_SUPPORT <= 1)
23943         case 1 :
23944             return ZSTDv01_decompress(dst, dstCapacity, src, compressedSize);
23945 #endif
23946 #if (ZSTD_LEGACY_SUPPORT <= 2)
23947         case 2 :
23948             return ZSTDv02_decompress(dst, dstCapacity, src, compressedSize);
23949 #endif
23950 #if (ZSTD_LEGACY_SUPPORT <= 3)
23951         case 3 :
23952             return ZSTDv03_decompress(dst, dstCapacity, src, compressedSize);
23953 #endif
23954 #if (ZSTD_LEGACY_SUPPORT <= 4)
23955         case 4 :
23956             return ZSTDv04_decompress(dst, dstCapacity, src, compressedSize);
23957 #endif
23958 #if (ZSTD_LEGACY_SUPPORT <= 5)
23959         case 5 :
23960             {   size_t result;
23961                 ZSTDv05_DCtx* const zd = ZSTDv05_createDCtx();
23962                 if (zd==NULL) return ERROR(memory_allocation);
23963                 result = ZSTDv05_decompress_usingDict(zd, dst, dstCapacity, src, compressedSize, dict, dictSize);
23964                 ZSTDv05_freeDCtx(zd);
23965                 return result;
23966             }
23967 #endif
23968 #if (ZSTD_LEGACY_SUPPORT <= 6)
23969         case 6 :
23970             {   size_t result;
23971                 ZSTDv06_DCtx* const zd = ZSTDv06_createDCtx();
23972                 if (zd==NULL) return ERROR(memory_allocation);
23973                 result = ZSTDv06_decompress_usingDict(zd, dst, dstCapacity, src, compressedSize, dict, dictSize);
23974                 ZSTDv06_freeDCtx(zd);
23975                 return result;
23976             }
23977 #endif
23978 #if (ZSTD_LEGACY_SUPPORT <= 7)
23979         case 7 :
23980             {   size_t result;
23981                 ZSTDv07_DCtx* const zd = ZSTDv07_createDCtx();
23982                 if (zd==NULL) return ERROR(memory_allocation);
23983                 result = ZSTDv07_decompress_usingDict(zd, dst, dstCapacity, src, compressedSize, dict, dictSize);
23984                 ZSTDv07_freeDCtx(zd);
23985                 return result;
23986             }
23987 #endif
23988         default :
23989             return ERROR(prefix_unknown);
23990     }
23991 }
23992
23993 MEM_STATIC ZSTD_frameSizeInfo ZSTD_findFrameSizeInfoLegacy(const void *src, size_t srcSize)
23994 {
23995     ZSTD_frameSizeInfo frameSizeInfo;
23996     U32 const version = ZSTD_isLegacy(src, srcSize);
23997     switch(version)
23998     {
23999 #if (ZSTD_LEGACY_SUPPORT <= 1)
24000         case 1 :
24001             ZSTDv01_findFrameSizeInfoLegacy(src, srcSize,
24002                 &frameSizeInfo.compressedSize,
24003                 &frameSizeInfo.decompressedBound);
24004             break;
24005 #endif
24006 #if (ZSTD_LEGACY_SUPPORT <= 2)
24007         case 2 :
24008             ZSTDv02_findFrameSizeInfoLegacy(src, srcSize,
24009                 &frameSizeInfo.compressedSize,
24010                 &frameSizeInfo.decompressedBound);
24011             break;
24012 #endif
24013 #if (ZSTD_LEGACY_SUPPORT <= 3)
24014         case 3 :
24015             ZSTDv03_findFrameSizeInfoLegacy(src, srcSize,
24016                 &frameSizeInfo.compressedSize,
24017                 &frameSizeInfo.decompressedBound);
24018             break;
24019 #endif
24020 #if (ZSTD_LEGACY_SUPPORT <= 4)
24021         case 4 :
24022             ZSTDv04_findFrameSizeInfoLegacy(src, srcSize,
24023                 &frameSizeInfo.compressedSize,
24024                 &frameSizeInfo.decompressedBound);
24025             break;
24026 #endif
24027 #if (ZSTD_LEGACY_SUPPORT <= 5)
24028         case 5 :
24029             ZSTDv05_findFrameSizeInfoLegacy(src, srcSize,
24030                 &frameSizeInfo.compressedSize,
24031                 &frameSizeInfo.decompressedBound);
24032             break;
24033 #endif
24034 #if (ZSTD_LEGACY_SUPPORT <= 6)
24035         case 6 :
24036             ZSTDv06_findFrameSizeInfoLegacy(src, srcSize,
24037                 &frameSizeInfo.compressedSize,
24038                 &frameSizeInfo.decompressedBound);
24039             break;
24040 #endif
24041 #if (ZSTD_LEGACY_SUPPORT <= 7)
24042         case 7 :
24043             ZSTDv07_findFrameSizeInfoLegacy(src, srcSize,
24044                 &frameSizeInfo.compressedSize,
24045                 &frameSizeInfo.decompressedBound);
24046             break;
24047 #endif
24048         default :
24049             frameSizeInfo.compressedSize = ERROR(prefix_unknown);
24050             frameSizeInfo.decompressedBound = ZSTD_CONTENTSIZE_ERROR;
24051             break;
24052     }
24053     if (!ZSTD_isError(frameSizeInfo.compressedSize) && frameSizeInfo.compressedSize > srcSize) {
24054         frameSizeInfo.compressedSize = ERROR(srcSize_wrong);
24055         frameSizeInfo.decompressedBound = ZSTD_CONTENTSIZE_ERROR;
24056     }
24057     return frameSizeInfo;
24058 }
24059
24060 MEM_STATIC size_t ZSTD_findFrameCompressedSizeLegacy(const void *src, size_t srcSize)
24061 {
24062     ZSTD_frameSizeInfo frameSizeInfo = ZSTD_findFrameSizeInfoLegacy(src, srcSize);
24063     return frameSizeInfo.compressedSize;
24064 }
24065
24066 MEM_STATIC size_t ZSTD_freeLegacyStreamContext(void* legacyContext, U32 version)
24067 {
24068     switch(version)
24069     {
24070         default :
24071         case 1 :
24072         case 2 :
24073         case 3 :
24074             (void)legacyContext;
24075             return ERROR(version_unsupported);
24076 #if (ZSTD_LEGACY_SUPPORT <= 4)
24077         case 4 : return ZBUFFv04_freeDCtx((ZBUFFv04_DCtx*)legacyContext);
24078 #endif
24079 #if (ZSTD_LEGACY_SUPPORT <= 5)
24080         case 5 : return ZBUFFv05_freeDCtx((ZBUFFv05_DCtx*)legacyContext);
24081 #endif
24082 #if (ZSTD_LEGACY_SUPPORT <= 6)
24083         case 6 : return ZBUFFv06_freeDCtx((ZBUFFv06_DCtx*)legacyContext);
24084 #endif
24085 #if (ZSTD_LEGACY_SUPPORT <= 7)
24086         case 7 : return ZBUFFv07_freeDCtx((ZBUFFv07_DCtx*)legacyContext);
24087 #endif
24088     }
24089 }
24090
24091
24092 MEM_STATIC size_t ZSTD_initLegacyStream(void** legacyContext, U32 prevVersion, U32 newVersion,
24093                                         const void* dict, size_t dictSize)
24094 {
24095     DEBUGLOG(5, "ZSTD_initLegacyStream for v0.%u", newVersion);
24096     if (prevVersion != newVersion) ZSTD_freeLegacyStreamContext(*legacyContext, prevVersion);
24097     switch(newVersion)
24098     {
24099         default :
24100         case 1 :
24101         case 2 :
24102         case 3 :
24103             (void)dict; (void)dictSize;
24104             return 0;
24105 #if (ZSTD_LEGACY_SUPPORT <= 4)
24106         case 4 :
24107         {
24108             ZBUFFv04_DCtx* dctx = (prevVersion != newVersion) ? ZBUFFv04_createDCtx() : (ZBUFFv04_DCtx*)*legacyContext;
24109             if (dctx==NULL) return ERROR(memory_allocation);
24110             ZBUFFv04_decompressInit(dctx);
24111             ZBUFFv04_decompressWithDictionary(dctx, dict, dictSize);
24112             *legacyContext = dctx;
24113             return 0;
24114         }
24115 #endif
24116 #if (ZSTD_LEGACY_SUPPORT <= 5)
24117         case 5 :
24118         {
24119             ZBUFFv05_DCtx* dctx = (prevVersion != newVersion) ? ZBUFFv05_createDCtx() : (ZBUFFv05_DCtx*)*legacyContext;
24120             if (dctx==NULL) return ERROR(memory_allocation);
24121             ZBUFFv05_decompressInitDictionary(dctx, dict, dictSize);
24122             *legacyContext = dctx;
24123             return 0;
24124         }
24125 #endif
24126 #if (ZSTD_LEGACY_SUPPORT <= 6)
24127         case 6 :
24128         {
24129             ZBUFFv06_DCtx* dctx = (prevVersion != newVersion) ? ZBUFFv06_createDCtx() : (ZBUFFv06_DCtx*)*legacyContext;
24130             if (dctx==NULL) return ERROR(memory_allocation);
24131             ZBUFFv06_decompressInitDictionary(dctx, dict, dictSize);
24132             *legacyContext = dctx;
24133             return 0;
24134         }
24135 #endif
24136 #if (ZSTD_LEGACY_SUPPORT <= 7)
24137         case 7 :
24138         {
24139             ZBUFFv07_DCtx* dctx = (prevVersion != newVersion) ? ZBUFFv07_createDCtx() : (ZBUFFv07_DCtx*)*legacyContext;
24140             if (dctx==NULL) return ERROR(memory_allocation);
24141             ZBUFFv07_decompressInitDictionary(dctx, dict, dictSize);
24142             *legacyContext = dctx;
24143             return 0;
24144         }
24145 #endif
24146     }
24147 }
24148
24149
24150
24151 MEM_STATIC size_t ZSTD_decompressLegacyStream(void* legacyContext, U32 version,
24152                                               ZSTD_outBuffer* output, ZSTD_inBuffer* input)
24153 {
24154     DEBUGLOG(5, "ZSTD_decompressLegacyStream for v0.%u", version);
24155     switch(version)
24156     {
24157         default :
24158         case 1 :
24159         case 2 :
24160         case 3 :
24161             (void)legacyContext; (void)output; (void)input;
24162             return ERROR(version_unsupported);
24163 #if (ZSTD_LEGACY_SUPPORT <= 4)
24164         case 4 :
24165             {
24166                 ZBUFFv04_DCtx* dctx = (ZBUFFv04_DCtx*) legacyContext;
24167                 const void* src = (const char*)input->src + input->pos;
24168                 size_t readSize = input->size - input->pos;
24169                 void* dst = (char*)output->dst + output->pos;
24170                 size_t decodedSize = output->size - output->pos;
24171                 size_t const hintSize = ZBUFFv04_decompressContinue(dctx, dst, &decodedSize, src, &readSize);
24172                 output->pos += decodedSize;
24173                 input->pos += readSize;
24174                 return hintSize;
24175             }
24176 #endif
24177 #if (ZSTD_LEGACY_SUPPORT <= 5)
24178         case 5 :
24179             {
24180                 ZBUFFv05_DCtx* dctx = (ZBUFFv05_DCtx*) legacyContext;
24181                 const void* src = (const char*)input->src + input->pos;
24182                 size_t readSize = input->size - input->pos;
24183                 void* dst = (char*)output->dst + output->pos;
24184                 size_t decodedSize = output->size - output->pos;
24185                 size_t const hintSize = ZBUFFv05_decompressContinue(dctx, dst, &decodedSize, src, &readSize);
24186                 output->pos += decodedSize;
24187                 input->pos += readSize;
24188                 return hintSize;
24189             }
24190 #endif
24191 #if (ZSTD_LEGACY_SUPPORT <= 6)
24192         case 6 :
24193             {
24194                 ZBUFFv06_DCtx* dctx = (ZBUFFv06_DCtx*) legacyContext;
24195                 const void* src = (const char*)input->src + input->pos;
24196                 size_t readSize = input->size - input->pos;
24197                 void* dst = (char*)output->dst + output->pos;
24198                 size_t decodedSize = output->size - output->pos;
24199                 size_t const hintSize = ZBUFFv06_decompressContinue(dctx, dst, &decodedSize, src, &readSize);
24200                 output->pos += decodedSize;
24201                 input->pos += readSize;
24202                 return hintSize;
24203             }
24204 #endif
24205 #if (ZSTD_LEGACY_SUPPORT <= 7)
24206         case 7 :
24207             {
24208                 ZBUFFv07_DCtx* dctx = (ZBUFFv07_DCtx*) legacyContext;
24209                 const void* src = (const char*)input->src + input->pos;
24210                 size_t readSize = input->size - input->pos;
24211                 void* dst = (char*)output->dst + output->pos;
24212                 size_t decodedSize = output->size - output->pos;
24213                 size_t const hintSize = ZBUFFv07_decompressContinue(dctx, dst, &decodedSize, src, &readSize);
24214                 output->pos += decodedSize;
24215                 input->pos += readSize;
24216                 return hintSize;
24217             }
24218 #endif
24219     }
24220 }
24221
24222
24223 #if defined (__cplusplus)
24224 }
24225 #endif
24226
24227 #endif   /* ZSTD_LEGACY_H */
24228 /**** ended inlining ../legacy/zstd_legacy.h ****/
24229 #endif
24230
24231
24232
24233 /*-*******************************************************
24234 *  Types
24235 *********************************************************/
24236 struct ZSTD_DDict_s {
24237     void* dictBuffer;
24238     const void* dictContent;
24239     size_t dictSize;
24240     ZSTD_entropyDTables_t entropy;
24241     U32 dictID;
24242     U32 entropyPresent;
24243     ZSTD_customMem cMem;
24244 };  /* typedef'd to ZSTD_DDict within "zstd.h" */
24245
24246 const void* ZSTD_DDict_dictContent(const ZSTD_DDict* ddict)
24247 {
24248     assert(ddict != NULL);
24249     return ddict->dictContent;
24250 }
24251
24252 size_t ZSTD_DDict_dictSize(const ZSTD_DDict* ddict)
24253 {
24254     assert(ddict != NULL);
24255     return ddict->dictSize;
24256 }
24257
24258 void ZSTD_copyDDictParameters(ZSTD_DCtx* dctx, const ZSTD_DDict* ddict)
24259 {
24260     DEBUGLOG(4, "ZSTD_copyDDictParameters");
24261     assert(dctx != NULL);
24262     assert(ddict != NULL);
24263     dctx->dictID = ddict->dictID;
24264     dctx->prefixStart = ddict->dictContent;
24265     dctx->virtualStart = ddict->dictContent;
24266     dctx->dictEnd = (const BYTE*)ddict->dictContent + ddict->dictSize;
24267     dctx->previousDstEnd = dctx->dictEnd;
24268 #ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
24269     dctx->dictContentBeginForFuzzing = dctx->prefixStart;
24270     dctx->dictContentEndForFuzzing = dctx->previousDstEnd;
24271 #endif
24272     if (ddict->entropyPresent) {
24273         dctx->litEntropy = 1;
24274         dctx->fseEntropy = 1;
24275         dctx->LLTptr = ddict->entropy.LLTable;
24276         dctx->MLTptr = ddict->entropy.MLTable;
24277         dctx->OFTptr = ddict->entropy.OFTable;
24278         dctx->HUFptr = ddict->entropy.hufTable;
24279         dctx->entropy.rep[0] = ddict->entropy.rep[0];
24280         dctx->entropy.rep[1] = ddict->entropy.rep[1];
24281         dctx->entropy.rep[2] = ddict->entropy.rep[2];
24282     } else {
24283         dctx->litEntropy = 0;
24284         dctx->fseEntropy = 0;
24285     }
24286 }
24287
24288
24289 static size_t
24290 ZSTD_loadEntropy_intoDDict(ZSTD_DDict* ddict,
24291                            ZSTD_dictContentType_e dictContentType)
24292 {
24293     ddict->dictID = 0;
24294     ddict->entropyPresent = 0;
24295     if (dictContentType == ZSTD_dct_rawContent) return 0;
24296
24297     if (ddict->dictSize < 8) {
24298         if (dictContentType == ZSTD_dct_fullDict)
24299             return ERROR(dictionary_corrupted);   /* only accept specified dictionaries */
24300         return 0;   /* pure content mode */
24301     }
24302     {   U32 const magic = MEM_readLE32(ddict->dictContent);
24303         if (magic != ZSTD_MAGIC_DICTIONARY) {
24304             if (dictContentType == ZSTD_dct_fullDict)
24305                 return ERROR(dictionary_corrupted);   /* only accept specified dictionaries */
24306             return 0;   /* pure content mode */
24307         }
24308     }
24309     ddict->dictID = MEM_readLE32((const char*)ddict->dictContent + ZSTD_FRAMEIDSIZE);
24310
24311     /* load entropy tables */
24312     RETURN_ERROR_IF(ZSTD_isError(ZSTD_loadDEntropy(
24313             &ddict->entropy, ddict->dictContent, ddict->dictSize)),
24314         dictionary_corrupted, "");
24315     ddict->entropyPresent = 1;
24316     return 0;
24317 }
24318
24319
24320 static size_t ZSTD_initDDict_internal(ZSTD_DDict* ddict,
24321                                       const void* dict, size_t dictSize,
24322                                       ZSTD_dictLoadMethod_e dictLoadMethod,
24323                                       ZSTD_dictContentType_e dictContentType)
24324 {
24325     if ((dictLoadMethod == ZSTD_dlm_byRef) || (!dict) || (!dictSize)) {
24326         ddict->dictBuffer = NULL;
24327         ddict->dictContent = dict;
24328         if (!dict) dictSize = 0;
24329     } else {
24330         void* const internalBuffer = ZSTD_malloc(dictSize, ddict->cMem);
24331         ddict->dictBuffer = internalBuffer;
24332         ddict->dictContent = internalBuffer;
24333         if (!internalBuffer) return ERROR(memory_allocation);
24334         memcpy(internalBuffer, dict, dictSize);
24335     }
24336     ddict->dictSize = dictSize;
24337     ddict->entropy.hufTable[0] = (HUF_DTable)((HufLog)*0x1000001);  /* cover both little and big endian */
24338
24339     /* parse dictionary content */
24340     FORWARD_IF_ERROR( ZSTD_loadEntropy_intoDDict(ddict, dictContentType) , "");
24341
24342     return 0;
24343 }
24344
24345 ZSTD_DDict* ZSTD_createDDict_advanced(const void* dict, size_t dictSize,
24346                                       ZSTD_dictLoadMethod_e dictLoadMethod,
24347                                       ZSTD_dictContentType_e dictContentType,
24348                                       ZSTD_customMem customMem)
24349 {
24350     if (!customMem.customAlloc ^ !customMem.customFree) return NULL;
24351
24352     {   ZSTD_DDict* const ddict = (ZSTD_DDict*) ZSTD_malloc(sizeof(ZSTD_DDict), customMem);
24353         if (ddict == NULL) return NULL;
24354         ddict->cMem = customMem;
24355         {   size_t const initResult = ZSTD_initDDict_internal(ddict,
24356                                             dict, dictSize,
24357                                             dictLoadMethod, dictContentType);
24358             if (ZSTD_isError(initResult)) {
24359                 ZSTD_freeDDict(ddict);
24360                 return NULL;
24361         }   }
24362         return ddict;
24363     }
24364 }
24365
24366 /*! ZSTD_createDDict() :
24367 *   Create a digested dictionary, to start decompression without startup delay.
24368 *   `dict` content is copied inside DDict.
24369 *   Consequently, `dict` can be released after `ZSTD_DDict` creation */
24370 ZSTD_DDict* ZSTD_createDDict(const void* dict, size_t dictSize)
24371 {
24372     ZSTD_customMem const allocator = { NULL, NULL, NULL };
24373     return ZSTD_createDDict_advanced(dict, dictSize, ZSTD_dlm_byCopy, ZSTD_dct_auto, allocator);
24374 }
24375
24376 /*! ZSTD_createDDict_byReference() :
24377  *  Create a digested dictionary, to start decompression without startup delay.
24378  *  Dictionary content is simply referenced, it will be accessed during decompression.
24379  *  Warning : dictBuffer must outlive DDict (DDict must be freed before dictBuffer) */
24380 ZSTD_DDict* ZSTD_createDDict_byReference(const void* dictBuffer, size_t dictSize)
24381 {
24382     ZSTD_customMem const allocator = { NULL, NULL, NULL };
24383     return ZSTD_createDDict_advanced(dictBuffer, dictSize, ZSTD_dlm_byRef, ZSTD_dct_auto, allocator);
24384 }
24385
24386
24387 const ZSTD_DDict* ZSTD_initStaticDDict(
24388                                 void* sBuffer, size_t sBufferSize,
24389                                 const void* dict, size_t dictSize,
24390                                 ZSTD_dictLoadMethod_e dictLoadMethod,
24391                                 ZSTD_dictContentType_e dictContentType)
24392 {
24393     size_t const neededSpace = sizeof(ZSTD_DDict)
24394                              + (dictLoadMethod == ZSTD_dlm_byRef ? 0 : dictSize);
24395     ZSTD_DDict* const ddict = (ZSTD_DDict*)sBuffer;
24396     assert(sBuffer != NULL);
24397     assert(dict != NULL);
24398     if ((size_t)sBuffer & 7) return NULL;   /* 8-aligned */
24399     if (sBufferSize < neededSpace) return NULL;
24400     if (dictLoadMethod == ZSTD_dlm_byCopy) {
24401         memcpy(ddict+1, dict, dictSize);  /* local copy */
24402         dict = ddict+1;
24403     }
24404     if (ZSTD_isError( ZSTD_initDDict_internal(ddict,
24405                                               dict, dictSize,
24406                                               ZSTD_dlm_byRef, dictContentType) ))
24407         return NULL;
24408     return ddict;
24409 }
24410
24411
24412 size_t ZSTD_freeDDict(ZSTD_DDict* ddict)
24413 {
24414     if (ddict==NULL) return 0;   /* support free on NULL */
24415     {   ZSTD_customMem const cMem = ddict->cMem;
24416         ZSTD_free(ddict->dictBuffer, cMem);
24417         ZSTD_free(ddict, cMem);
24418         return 0;
24419     }
24420 }
24421
24422 /*! ZSTD_estimateDDictSize() :
24423  *  Estimate amount of memory that will be needed to create a dictionary for decompression.
24424  *  Note : dictionary created by reference using ZSTD_dlm_byRef are smaller */
24425 size_t ZSTD_estimateDDictSize(size_t dictSize, ZSTD_dictLoadMethod_e dictLoadMethod)
24426 {
24427     return sizeof(ZSTD_DDict) + (dictLoadMethod == ZSTD_dlm_byRef ? 0 : dictSize);
24428 }
24429
24430 size_t ZSTD_sizeof_DDict(const ZSTD_DDict* ddict)
24431 {
24432     if (ddict==NULL) return 0;   /* support sizeof on NULL */
24433     return sizeof(*ddict) + (ddict->dictBuffer ? ddict->dictSize : 0) ;
24434 }
24435
24436 /*! ZSTD_getDictID_fromDDict() :
24437  *  Provides the dictID of the dictionary loaded into `ddict`.
24438  *  If @return == 0, the dictionary is not conformant to Zstandard specification, or empty.
24439  *  Non-conformant dictionaries can still be loaded, but as content-only dictionaries. */
24440 unsigned ZSTD_getDictID_fromDDict(const ZSTD_DDict* ddict)
24441 {
24442     if (ddict==NULL) return 0;
24443     return ZSTD_getDictID_fromDict(ddict->dictContent, ddict->dictSize);
24444 }
24445 /**** ended inlining decompress/zstd_ddict.c ****/
24446 /**** start inlining decompress/zstd_decompress.c ****/
24447 /*
24448  * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
24449  * All rights reserved.
24450  *
24451  * This source code is licensed under both the BSD-style license (found in the
24452  * LICENSE file in the root directory of this source tree) and the GPLv2 (found
24453  * in the COPYING file in the root directory of this source tree).
24454  * You may select, at your option, one of the above-listed licenses.
24455  */
24456
24457
24458 /* ***************************************************************
24459 *  Tuning parameters
24460 *****************************************************************/
24461 /*!
24462  * HEAPMODE :
24463  * Select how default decompression function ZSTD_decompress() allocates its context,
24464  * on stack (0), or into heap (1, default; requires malloc()).
24465  * Note that functions with explicit context such as ZSTD_decompressDCtx() are unaffected.
24466  */
24467 #ifndef ZSTD_HEAPMODE
24468 #  define ZSTD_HEAPMODE 1
24469 #endif
24470
24471 /*!
24472 *  LEGACY_SUPPORT :
24473 *  if set to 1+, ZSTD_decompress() can decode older formats (v0.1+)
24474 */
24475 #ifndef ZSTD_LEGACY_SUPPORT
24476 #  define ZSTD_LEGACY_SUPPORT 0
24477 #endif
24478
24479 /*!
24480  *  MAXWINDOWSIZE_DEFAULT :
24481  *  maximum window size accepted by DStream __by default__.
24482  *  Frames requiring more memory will be rejected.
24483  *  It's possible to set a different limit using ZSTD_DCtx_setMaxWindowSize().
24484  */
24485 #ifndef ZSTD_MAXWINDOWSIZE_DEFAULT
24486 #  define ZSTD_MAXWINDOWSIZE_DEFAULT (((U32)1 << ZSTD_WINDOWLOG_LIMIT_DEFAULT) + 1)
24487 #endif
24488
24489 /*!
24490  *  NO_FORWARD_PROGRESS_MAX :
24491  *  maximum allowed nb of calls to ZSTD_decompressStream()
24492  *  without any forward progress
24493  *  (defined as: no byte read from input, and no byte flushed to output)
24494  *  before triggering an error.
24495  */
24496 #ifndef ZSTD_NO_FORWARD_PROGRESS_MAX
24497 #  define ZSTD_NO_FORWARD_PROGRESS_MAX 16
24498 #endif
24499
24500
24501 /*-*******************************************************
24502 *  Dependencies
24503 *********************************************************/
24504 #include <string.h>      /* memcpy, memmove, memset */
24505 /**** skipping file: ../common/cpu.h ****/
24506 /**** skipping file: ../common/mem.h ****/
24507 #define FSE_STATIC_LINKING_ONLY
24508 /**** skipping file: ../common/fse.h ****/
24509 #define HUF_STATIC_LINKING_ONLY
24510 /**** skipping file: ../common/huf.h ****/
24511 /**** skipping file: ../common/zstd_internal.h ****/
24512 /**** skipping file: zstd_decompress_internal.h ****/
24513 /**** skipping file: zstd_ddict.h ****/
24514 /**** start inlining zstd_decompress_block.h ****/
24515 /*
24516  * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
24517  * All rights reserved.
24518  *
24519  * This source code is licensed under both the BSD-style license (found in the
24520  * LICENSE file in the root directory of this source tree) and the GPLv2 (found
24521  * in the COPYING file in the root directory of this source tree).
24522  * You may select, at your option, one of the above-listed licenses.
24523  */
24524
24525
24526 #ifndef ZSTD_DEC_BLOCK_H
24527 #define ZSTD_DEC_BLOCK_H
24528
24529 /*-*******************************************************
24530  *  Dependencies
24531  *********************************************************/
24532 #include <stddef.h>   /* size_t */
24533 /**** skipping file: ../zstd.h ****/
24534 /**** skipping file: ../common/zstd_internal.h ****/
24535 /**** skipping file: zstd_decompress_internal.h ****/
24536
24537
24538 /* ===   Prototypes   === */
24539
24540 /* note: prototypes already published within `zstd.h` :
24541  * ZSTD_decompressBlock()
24542  */
24543
24544 /* note: prototypes already published within `zstd_internal.h` :
24545  * ZSTD_getcBlockSize()
24546  * ZSTD_decodeSeqHeaders()
24547  */
24548
24549
24550 /* ZSTD_decompressBlock_internal() :
24551  * decompress block, starting at `src`,
24552  * into destination buffer `dst`.
24553  * @return : decompressed block size,
24554  *           or an error code (which can be tested using ZSTD_isError())
24555  */
24556 size_t ZSTD_decompressBlock_internal(ZSTD_DCtx* dctx,
24557                                void* dst, size_t dstCapacity,
24558                          const void* src, size_t srcSize, const int frame);
24559
24560 /* ZSTD_buildFSETable() :
24561  * generate FSE decoding table for one symbol (ll, ml or off)
24562  * this function must be called with valid parameters only
24563  * (dt is large enough, normalizedCounter distribution total is a power of 2, max is within range, etc.)
24564  * in which case it cannot fail.
24565  * Internal use only.
24566  */
24567 void ZSTD_buildFSETable(ZSTD_seqSymbol* dt,
24568              const short* normalizedCounter, unsigned maxSymbolValue,
24569              const U32* baseValue, const U32* nbAdditionalBits,
24570                    unsigned tableLog);
24571
24572
24573 #endif /* ZSTD_DEC_BLOCK_H */
24574 /**** ended inlining zstd_decompress_block.h ****/
24575
24576 #if defined(ZSTD_LEGACY_SUPPORT) && (ZSTD_LEGACY_SUPPORT>=1)
24577 /**** skipping file: ../legacy/zstd_legacy.h ****/
24578 #endif
24579
24580
24581 /*-*************************************************************
24582 *   Context management
24583 ***************************************************************/
24584 size_t ZSTD_sizeof_DCtx (const ZSTD_DCtx* dctx)
24585 {
24586     if (dctx==NULL) return 0;   /* support sizeof NULL */
24587     return sizeof(*dctx)
24588            + ZSTD_sizeof_DDict(dctx->ddictLocal)
24589            + dctx->inBuffSize + dctx->outBuffSize;
24590 }
24591
24592 size_t ZSTD_estimateDCtxSize(void) { return sizeof(ZSTD_DCtx); }
24593
24594
24595 static size_t ZSTD_startingInputLength(ZSTD_format_e format)
24596 {
24597     size_t const startingInputLength = ZSTD_FRAMEHEADERSIZE_PREFIX(format);
24598     /* only supports formats ZSTD_f_zstd1 and ZSTD_f_zstd1_magicless */
24599     assert( (format == ZSTD_f_zstd1) || (format == ZSTD_f_zstd1_magicless) );
24600     return startingInputLength;
24601 }
24602
24603 static void ZSTD_initDCtx_internal(ZSTD_DCtx* dctx)
24604 {
24605     dctx->format = ZSTD_f_zstd1;  /* ZSTD_decompressBegin() invokes ZSTD_startingInputLength() with argument dctx->format */
24606     dctx->staticSize  = 0;
24607     dctx->maxWindowSize = ZSTD_MAXWINDOWSIZE_DEFAULT;
24608     dctx->ddict       = NULL;
24609     dctx->ddictLocal  = NULL;
24610     dctx->dictEnd     = NULL;
24611     dctx->ddictIsCold = 0;
24612     dctx->dictUses = ZSTD_dont_use;
24613     dctx->inBuff      = NULL;
24614     dctx->inBuffSize  = 0;
24615     dctx->outBuffSize = 0;
24616     dctx->streamStage = zdss_init;
24617     dctx->legacyContext = NULL;
24618     dctx->previousLegacyVersion = 0;
24619     dctx->noForwardProgress = 0;
24620     dctx->oversizedDuration = 0;
24621     dctx->bmi2 = ZSTD_cpuid_bmi2(ZSTD_cpuid());
24622     dctx->outBufferMode = ZSTD_obm_buffered;
24623 #ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
24624     dctx->dictContentEndForFuzzing = NULL;
24625 #endif
24626 }
24627
24628 ZSTD_DCtx* ZSTD_initStaticDCtx(void *workspace, size_t workspaceSize)
24629 {
24630     ZSTD_DCtx* const dctx = (ZSTD_DCtx*) workspace;
24631
24632     if ((size_t)workspace & 7) return NULL;  /* 8-aligned */
24633     if (workspaceSize < sizeof(ZSTD_DCtx)) return NULL;  /* minimum size */
24634
24635     ZSTD_initDCtx_internal(dctx);
24636     dctx->staticSize = workspaceSize;
24637     dctx->inBuff = (char*)(dctx+1);
24638     return dctx;
24639 }
24640
24641 ZSTD_DCtx* ZSTD_createDCtx_advanced(ZSTD_customMem customMem)
24642 {
24643     if (!customMem.customAlloc ^ !customMem.customFree) return NULL;
24644
24645     {   ZSTD_DCtx* const dctx = (ZSTD_DCtx*)ZSTD_malloc(sizeof(*dctx), customMem);
24646         if (!dctx) return NULL;
24647         dctx->customMem = customMem;
24648         ZSTD_initDCtx_internal(dctx);
24649         return dctx;
24650     }
24651 }
24652
24653 ZSTD_DCtx* ZSTD_createDCtx(void)
24654 {
24655     DEBUGLOG(3, "ZSTD_createDCtx");
24656     return ZSTD_createDCtx_advanced(ZSTD_defaultCMem);
24657 }
24658
24659 static void ZSTD_clearDict(ZSTD_DCtx* dctx)
24660 {
24661     ZSTD_freeDDict(dctx->ddictLocal);
24662     dctx->ddictLocal = NULL;
24663     dctx->ddict = NULL;
24664     dctx->dictUses = ZSTD_dont_use;
24665 }
24666
24667 size_t ZSTD_freeDCtx(ZSTD_DCtx* dctx)
24668 {
24669     if (dctx==NULL) return 0;   /* support free on NULL */
24670     RETURN_ERROR_IF(dctx->staticSize, memory_allocation, "not compatible with static DCtx");
24671     {   ZSTD_customMem const cMem = dctx->customMem;
24672         ZSTD_clearDict(dctx);
24673         ZSTD_free(dctx->inBuff, cMem);
24674         dctx->inBuff = NULL;
24675 #if defined(ZSTD_LEGACY_SUPPORT) && (ZSTD_LEGACY_SUPPORT >= 1)
24676         if (dctx->legacyContext)
24677             ZSTD_freeLegacyStreamContext(dctx->legacyContext, dctx->previousLegacyVersion);
24678 #endif
24679         ZSTD_free(dctx, cMem);
24680         return 0;
24681     }
24682 }
24683
24684 /* no longer useful */
24685 void ZSTD_copyDCtx(ZSTD_DCtx* dstDCtx, const ZSTD_DCtx* srcDCtx)
24686 {
24687     size_t const toCopy = (size_t)((char*)(&dstDCtx->inBuff) - (char*)dstDCtx);
24688     memcpy(dstDCtx, srcDCtx, toCopy);  /* no need to copy workspace */
24689 }
24690
24691
24692 /*-*************************************************************
24693  *   Frame header decoding
24694  ***************************************************************/
24695
24696 /*! ZSTD_isFrame() :
24697  *  Tells if the content of `buffer` starts with a valid Frame Identifier.
24698  *  Note : Frame Identifier is 4 bytes. If `size < 4`, @return will always be 0.
24699  *  Note 2 : Legacy Frame Identifiers are considered valid only if Legacy Support is enabled.
24700  *  Note 3 : Skippable Frame Identifiers are considered valid. */
24701 unsigned ZSTD_isFrame(const void* buffer, size_t size)
24702 {
24703     if (size < ZSTD_FRAMEIDSIZE) return 0;
24704     {   U32 const magic = MEM_readLE32(buffer);
24705         if (magic == ZSTD_MAGICNUMBER) return 1;
24706         if ((magic & ZSTD_MAGIC_SKIPPABLE_MASK) == ZSTD_MAGIC_SKIPPABLE_START) return 1;
24707     }
24708 #if defined(ZSTD_LEGACY_SUPPORT) && (ZSTD_LEGACY_SUPPORT >= 1)
24709     if (ZSTD_isLegacy(buffer, size)) return 1;
24710 #endif
24711     return 0;
24712 }
24713
24714 /** ZSTD_frameHeaderSize_internal() :
24715  *  srcSize must be large enough to reach header size fields.
24716  *  note : only works for formats ZSTD_f_zstd1 and ZSTD_f_zstd1_magicless.
24717  * @return : size of the Frame Header
24718  *           or an error code, which can be tested with ZSTD_isError() */
24719 static size_t ZSTD_frameHeaderSize_internal(const void* src, size_t srcSize, ZSTD_format_e format)
24720 {
24721     size_t const minInputSize = ZSTD_startingInputLength(format);
24722     RETURN_ERROR_IF(srcSize < minInputSize, srcSize_wrong, "");
24723
24724     {   BYTE const fhd = ((const BYTE*)src)[minInputSize-1];
24725         U32 const dictID= fhd & 3;
24726         U32 const singleSegment = (fhd >> 5) & 1;
24727         U32 const fcsId = fhd >> 6;
24728         return minInputSize + !singleSegment
24729              + ZSTD_did_fieldSize[dictID] + ZSTD_fcs_fieldSize[fcsId]
24730              + (singleSegment && !fcsId);
24731     }
24732 }
24733
24734 /** ZSTD_frameHeaderSize() :
24735  *  srcSize must be >= ZSTD_frameHeaderSize_prefix.
24736  * @return : size of the Frame Header,
24737  *           or an error code (if srcSize is too small) */
24738 size_t ZSTD_frameHeaderSize(const void* src, size_t srcSize)
24739 {
24740     return ZSTD_frameHeaderSize_internal(src, srcSize, ZSTD_f_zstd1);
24741 }
24742
24743
24744 /** ZSTD_getFrameHeader_advanced() :
24745  *  decode Frame Header, or require larger `srcSize`.
24746  *  note : only works for formats ZSTD_f_zstd1 and ZSTD_f_zstd1_magicless
24747  * @return : 0, `zfhPtr` is correctly filled,
24748  *          >0, `srcSize` is too small, value is wanted `srcSize` amount,
24749  *           or an error code, which can be tested using ZSTD_isError() */
24750 size_t ZSTD_getFrameHeader_advanced(ZSTD_frameHeader* zfhPtr, const void* src, size_t srcSize, ZSTD_format_e format)
24751 {
24752     const BYTE* ip = (const BYTE*)src;
24753     size_t const minInputSize = ZSTD_startingInputLength(format);
24754
24755     memset(zfhPtr, 0, sizeof(*zfhPtr));   /* not strictly necessary, but static analyzer do not understand that zfhPtr is only going to be read only if return value is zero, since they are 2 different signals */
24756     if (srcSize < minInputSize) return minInputSize;
24757     RETURN_ERROR_IF(src==NULL, GENERIC, "invalid parameter");
24758
24759     if ( (format != ZSTD_f_zstd1_magicless)
24760       && (MEM_readLE32(src) != ZSTD_MAGICNUMBER) ) {
24761         if ((MEM_readLE32(src) & ZSTD_MAGIC_SKIPPABLE_MASK) == ZSTD_MAGIC_SKIPPABLE_START) {
24762             /* skippable frame */
24763             if (srcSize < ZSTD_SKIPPABLEHEADERSIZE)
24764                 return ZSTD_SKIPPABLEHEADERSIZE; /* magic number + frame length */
24765             memset(zfhPtr, 0, sizeof(*zfhPtr));
24766             zfhPtr->frameContentSize = MEM_readLE32((const char *)src + ZSTD_FRAMEIDSIZE);
24767             zfhPtr->frameType = ZSTD_skippableFrame;
24768             return 0;
24769         }
24770         RETURN_ERROR(prefix_unknown, "");
24771     }
24772
24773     /* ensure there is enough `srcSize` to fully read/decode frame header */
24774     {   size_t const fhsize = ZSTD_frameHeaderSize_internal(src, srcSize, format);
24775         if (srcSize < fhsize) return fhsize;
24776         zfhPtr->headerSize = (U32)fhsize;
24777     }
24778
24779     {   BYTE const fhdByte = ip[minInputSize-1];
24780         size_t pos = minInputSize;
24781         U32 const dictIDSizeCode = fhdByte&3;
24782         U32 const checksumFlag = (fhdByte>>2)&1;
24783         U32 const singleSegment = (fhdByte>>5)&1;
24784         U32 const fcsID = fhdByte>>6;
24785         U64 windowSize = 0;
24786         U32 dictID = 0;
24787         U64 frameContentSize = ZSTD_CONTENTSIZE_UNKNOWN;
24788         RETURN_ERROR_IF((fhdByte & 0x08) != 0, frameParameter_unsupported,
24789                         "reserved bits, must be zero");
24790
24791         if (!singleSegment) {
24792             BYTE const wlByte = ip[pos++];
24793             U32 const windowLog = (wlByte >> 3) + ZSTD_WINDOWLOG_ABSOLUTEMIN;
24794             RETURN_ERROR_IF(windowLog > ZSTD_WINDOWLOG_MAX, frameParameter_windowTooLarge, "");
24795             windowSize = (1ULL << windowLog);
24796             windowSize += (windowSize >> 3) * (wlByte&7);
24797         }
24798         switch(dictIDSizeCode)
24799         {
24800             default: assert(0);  /* impossible */
24801             case 0 : break;
24802             case 1 : dictID = ip[pos]; pos++; break;
24803             case 2 : dictID = MEM_readLE16(ip+pos); pos+=2; break;
24804             case 3 : dictID = MEM_readLE32(ip+pos); pos+=4; break;
24805         }
24806         switch(fcsID)
24807         {
24808             default: assert(0);  /* impossible */
24809             case 0 : if (singleSegment) frameContentSize = ip[pos]; break;
24810             case 1 : frameContentSize = MEM_readLE16(ip+pos)+256; break;
24811             case 2 : frameContentSize = MEM_readLE32(ip+pos); break;
24812             case 3 : frameContentSize = MEM_readLE64(ip+pos); break;
24813         }
24814         if (singleSegment) windowSize = frameContentSize;
24815
24816         zfhPtr->frameType = ZSTD_frame;
24817         zfhPtr->frameContentSize = frameContentSize;
24818         zfhPtr->windowSize = windowSize;
24819         zfhPtr->blockSizeMax = (unsigned) MIN(windowSize, ZSTD_BLOCKSIZE_MAX);
24820         zfhPtr->dictID = dictID;
24821         zfhPtr->checksumFlag = checksumFlag;
24822     }
24823     return 0;
24824 }
24825
24826 /** ZSTD_getFrameHeader() :
24827  *  decode Frame Header, or require larger `srcSize`.
24828  *  note : this function does not consume input, it only reads it.
24829  * @return : 0, `zfhPtr` is correctly filled,
24830  *          >0, `srcSize` is too small, value is wanted `srcSize` amount,
24831  *           or an error code, which can be tested using ZSTD_isError() */
24832 size_t ZSTD_getFrameHeader(ZSTD_frameHeader* zfhPtr, const void* src, size_t srcSize)
24833 {
24834     return ZSTD_getFrameHeader_advanced(zfhPtr, src, srcSize, ZSTD_f_zstd1);
24835 }
24836
24837
24838 /** ZSTD_getFrameContentSize() :
24839  *  compatible with legacy mode
24840  * @return : decompressed size of the single frame pointed to be `src` if known, otherwise
24841  *         - ZSTD_CONTENTSIZE_UNKNOWN if the size cannot be determined
24842  *         - ZSTD_CONTENTSIZE_ERROR if an error occurred (e.g. invalid magic number, srcSize too small) */
24843 unsigned long long ZSTD_getFrameContentSize(const void *src, size_t srcSize)
24844 {
24845 #if defined(ZSTD_LEGACY_SUPPORT) && (ZSTD_LEGACY_SUPPORT >= 1)
24846     if (ZSTD_isLegacy(src, srcSize)) {
24847         unsigned long long const ret = ZSTD_getDecompressedSize_legacy(src, srcSize);
24848         return ret == 0 ? ZSTD_CONTENTSIZE_UNKNOWN : ret;
24849     }
24850 #endif
24851     {   ZSTD_frameHeader zfh;
24852         if (ZSTD_getFrameHeader(&zfh, src, srcSize) != 0)
24853             return ZSTD_CONTENTSIZE_ERROR;
24854         if (zfh.frameType == ZSTD_skippableFrame) {
24855             return 0;
24856         } else {
24857             return zfh.frameContentSize;
24858     }   }
24859 }
24860
24861 static size_t readSkippableFrameSize(void const* src, size_t srcSize)
24862 {
24863     size_t const skippableHeaderSize = ZSTD_SKIPPABLEHEADERSIZE;
24864     U32 sizeU32;
24865
24866     RETURN_ERROR_IF(srcSize < ZSTD_SKIPPABLEHEADERSIZE, srcSize_wrong, "");
24867
24868     sizeU32 = MEM_readLE32((BYTE const*)src + ZSTD_FRAMEIDSIZE);
24869     RETURN_ERROR_IF((U32)(sizeU32 + ZSTD_SKIPPABLEHEADERSIZE) < sizeU32,
24870                     frameParameter_unsupported, "");
24871     {
24872         size_t const skippableSize = skippableHeaderSize + sizeU32;
24873         RETURN_ERROR_IF(skippableSize > srcSize, srcSize_wrong, "");
24874         return skippableSize;
24875     }
24876 }
24877
24878 /** ZSTD_findDecompressedSize() :
24879  *  compatible with legacy mode
24880  *  `srcSize` must be the exact length of some number of ZSTD compressed and/or
24881  *      skippable frames
24882  *  @return : decompressed size of the frames contained */
24883 unsigned long long ZSTD_findDecompressedSize(const void* src, size_t srcSize)
24884 {
24885     unsigned long long totalDstSize = 0;
24886
24887     while (srcSize >= ZSTD_startingInputLength(ZSTD_f_zstd1)) {
24888         U32 const magicNumber = MEM_readLE32(src);
24889
24890         if ((magicNumber & ZSTD_MAGIC_SKIPPABLE_MASK) == ZSTD_MAGIC_SKIPPABLE_START) {
24891             size_t const skippableSize = readSkippableFrameSize(src, srcSize);
24892             if (ZSTD_isError(skippableSize)) {
24893                 return ZSTD_CONTENTSIZE_ERROR;
24894             }
24895             assert(skippableSize <= srcSize);
24896
24897             src = (const BYTE *)src + skippableSize;
24898             srcSize -= skippableSize;
24899             continue;
24900         }
24901
24902         {   unsigned long long const ret = ZSTD_getFrameContentSize(src, srcSize);
24903             if (ret >= ZSTD_CONTENTSIZE_ERROR) return ret;
24904
24905             /* check for overflow */
24906             if (totalDstSize + ret < totalDstSize) return ZSTD_CONTENTSIZE_ERROR;
24907             totalDstSize += ret;
24908         }
24909         {   size_t const frameSrcSize = ZSTD_findFrameCompressedSize(src, srcSize);
24910             if (ZSTD_isError(frameSrcSize)) {
24911                 return ZSTD_CONTENTSIZE_ERROR;
24912             }
24913
24914             src = (const BYTE *)src + frameSrcSize;
24915             srcSize -= frameSrcSize;
24916         }
24917     }  /* while (srcSize >= ZSTD_frameHeaderSize_prefix) */
24918
24919     if (srcSize) return ZSTD_CONTENTSIZE_ERROR;
24920
24921     return totalDstSize;
24922 }
24923
24924 /** ZSTD_getDecompressedSize() :
24925  *  compatible with legacy mode
24926  * @return : decompressed size if known, 0 otherwise
24927              note : 0 can mean any of the following :
24928                    - frame content is empty
24929                    - decompressed size field is not present in frame header
24930                    - frame header unknown / not supported
24931                    - frame header not complete (`srcSize` too small) */
24932 unsigned long long ZSTD_getDecompressedSize(const void* src, size_t srcSize)
24933 {
24934     unsigned long long const ret = ZSTD_getFrameContentSize(src, srcSize);
24935     ZSTD_STATIC_ASSERT(ZSTD_CONTENTSIZE_ERROR < ZSTD_CONTENTSIZE_UNKNOWN);
24936     return (ret >= ZSTD_CONTENTSIZE_ERROR) ? 0 : ret;
24937 }
24938
24939
24940 /** ZSTD_decodeFrameHeader() :
24941  * `headerSize` must be the size provided by ZSTD_frameHeaderSize().
24942  * @return : 0 if success, or an error code, which can be tested using ZSTD_isError() */
24943 static size_t ZSTD_decodeFrameHeader(ZSTD_DCtx* dctx, const void* src, size_t headerSize)
24944 {
24945     size_t const result = ZSTD_getFrameHeader_advanced(&(dctx->fParams), src, headerSize, dctx->format);
24946     if (ZSTD_isError(result)) return result;    /* invalid header */
24947     RETURN_ERROR_IF(result>0, srcSize_wrong, "headerSize too small");
24948 #ifndef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
24949     /* Skip the dictID check in fuzzing mode, because it makes the search
24950      * harder.
24951      */
24952     RETURN_ERROR_IF(dctx->fParams.dictID && (dctx->dictID != dctx->fParams.dictID),
24953                     dictionary_wrong, "");
24954 #endif
24955     if (dctx->fParams.checksumFlag) XXH64_reset(&dctx->xxhState, 0);
24956     return 0;
24957 }
24958
24959 static ZSTD_frameSizeInfo ZSTD_errorFrameSizeInfo(size_t ret)
24960 {
24961     ZSTD_frameSizeInfo frameSizeInfo;
24962     frameSizeInfo.compressedSize = ret;
24963     frameSizeInfo.decompressedBound = ZSTD_CONTENTSIZE_ERROR;
24964     return frameSizeInfo;
24965 }
24966
24967 static ZSTD_frameSizeInfo ZSTD_findFrameSizeInfo(const void* src, size_t srcSize)
24968 {
24969     ZSTD_frameSizeInfo frameSizeInfo;
24970     memset(&frameSizeInfo, 0, sizeof(ZSTD_frameSizeInfo));
24971
24972 #if defined(ZSTD_LEGACY_SUPPORT) && (ZSTD_LEGACY_SUPPORT >= 1)
24973     if (ZSTD_isLegacy(src, srcSize))
24974         return ZSTD_findFrameSizeInfoLegacy(src, srcSize);
24975 #endif
24976
24977     if ((srcSize >= ZSTD_SKIPPABLEHEADERSIZE)
24978         && (MEM_readLE32(src) & ZSTD_MAGIC_SKIPPABLE_MASK) == ZSTD_MAGIC_SKIPPABLE_START) {
24979         frameSizeInfo.compressedSize = readSkippableFrameSize(src, srcSize);
24980         assert(ZSTD_isError(frameSizeInfo.compressedSize) ||
24981                frameSizeInfo.compressedSize <= srcSize);
24982         return frameSizeInfo;
24983     } else {
24984         const BYTE* ip = (const BYTE*)src;
24985         const BYTE* const ipstart = ip;
24986         size_t remainingSize = srcSize;
24987         size_t nbBlocks = 0;
24988         ZSTD_frameHeader zfh;
24989
24990         /* Extract Frame Header */
24991         {   size_t const ret = ZSTD_getFrameHeader(&zfh, src, srcSize);
24992             if (ZSTD_isError(ret))
24993                 return ZSTD_errorFrameSizeInfo(ret);
24994             if (ret > 0)
24995                 return ZSTD_errorFrameSizeInfo(ERROR(srcSize_wrong));
24996         }
24997
24998         ip += zfh.headerSize;
24999         remainingSize -= zfh.headerSize;
25000
25001         /* Iterate over each block */
25002         while (1) {
25003             blockProperties_t blockProperties;
25004             size_t const cBlockSize = ZSTD_getcBlockSize(ip, remainingSize, &blockProperties);
25005             if (ZSTD_isError(cBlockSize))
25006                 return ZSTD_errorFrameSizeInfo(cBlockSize);
25007
25008             if (ZSTD_blockHeaderSize + cBlockSize > remainingSize)
25009                 return ZSTD_errorFrameSizeInfo(ERROR(srcSize_wrong));
25010
25011             ip += ZSTD_blockHeaderSize + cBlockSize;
25012             remainingSize -= ZSTD_blockHeaderSize + cBlockSize;
25013             nbBlocks++;
25014
25015             if (blockProperties.lastBlock) break;
25016         }
25017
25018         /* Final frame content checksum */
25019         if (zfh.checksumFlag) {
25020             if (remainingSize < 4)
25021                 return ZSTD_errorFrameSizeInfo(ERROR(srcSize_wrong));
25022             ip += 4;
25023         }
25024
25025         frameSizeInfo.compressedSize = ip - ipstart;
25026         frameSizeInfo.decompressedBound = (zfh.frameContentSize != ZSTD_CONTENTSIZE_UNKNOWN)
25027                                         ? zfh.frameContentSize
25028                                         : nbBlocks * zfh.blockSizeMax;
25029         return frameSizeInfo;
25030     }
25031 }
25032
25033 /** ZSTD_findFrameCompressedSize() :
25034  *  compatible with legacy mode
25035  *  `src` must point to the start of a ZSTD frame, ZSTD legacy frame, or skippable frame
25036  *  `srcSize` must be at least as large as the frame contained
25037  *  @return : the compressed size of the frame starting at `src` */
25038 size_t ZSTD_findFrameCompressedSize(const void *src, size_t srcSize)
25039 {
25040     ZSTD_frameSizeInfo const frameSizeInfo = ZSTD_findFrameSizeInfo(src, srcSize);
25041     return frameSizeInfo.compressedSize;
25042 }
25043
25044 /** ZSTD_decompressBound() :
25045  *  compatible with legacy mode
25046  *  `src` must point to the start of a ZSTD frame or a skippeable frame
25047  *  `srcSize` must be at least as large as the frame contained
25048  *  @return : the maximum decompressed size of the compressed source
25049  */
25050 unsigned long long ZSTD_decompressBound(const void* src, size_t srcSize)
25051 {
25052     unsigned long long bound = 0;
25053     /* Iterate over each frame */
25054     while (srcSize > 0) {
25055         ZSTD_frameSizeInfo const frameSizeInfo = ZSTD_findFrameSizeInfo(src, srcSize);
25056         size_t const compressedSize = frameSizeInfo.compressedSize;
25057         unsigned long long const decompressedBound = frameSizeInfo.decompressedBound;
25058         if (ZSTD_isError(compressedSize) || decompressedBound == ZSTD_CONTENTSIZE_ERROR)
25059             return ZSTD_CONTENTSIZE_ERROR;
25060         assert(srcSize >= compressedSize);
25061         src = (const BYTE*)src + compressedSize;
25062         srcSize -= compressedSize;
25063         bound += decompressedBound;
25064     }
25065     return bound;
25066 }
25067
25068
25069 /*-*************************************************************
25070  *   Frame decoding
25071  ***************************************************************/
25072
25073 /** ZSTD_insertBlock() :
25074  *  insert `src` block into `dctx` history. Useful to track uncompressed blocks. */
25075 size_t ZSTD_insertBlock(ZSTD_DCtx* dctx, const void* blockStart, size_t blockSize)
25076 {
25077     DEBUGLOG(5, "ZSTD_insertBlock: %u bytes", (unsigned)blockSize);
25078     ZSTD_checkContinuity(dctx, blockStart);
25079     dctx->previousDstEnd = (const char*)blockStart + blockSize;
25080     return blockSize;
25081 }
25082
25083
25084 static size_t ZSTD_copyRawBlock(void* dst, size_t dstCapacity,
25085                           const void* src, size_t srcSize)
25086 {
25087     DEBUGLOG(5, "ZSTD_copyRawBlock");
25088     if (dst == NULL) {
25089         if (srcSize == 0) return 0;
25090         RETURN_ERROR(dstBuffer_null, "");
25091     }
25092     RETURN_ERROR_IF(srcSize > dstCapacity, dstSize_tooSmall, "");
25093     memcpy(dst, src, srcSize);
25094     return srcSize;
25095 }
25096
25097 static size_t ZSTD_setRleBlock(void* dst, size_t dstCapacity,
25098                                BYTE b,
25099                                size_t regenSize)
25100 {
25101     if (dst == NULL) {
25102         if (regenSize == 0) return 0;
25103         RETURN_ERROR(dstBuffer_null, "");
25104     }
25105     RETURN_ERROR_IF(regenSize > dstCapacity, dstSize_tooSmall, "");
25106     memset(dst, b, regenSize);
25107     return regenSize;
25108 }
25109
25110
25111 /*! ZSTD_decompressFrame() :
25112  * @dctx must be properly initialized
25113  *  will update *srcPtr and *srcSizePtr,
25114  *  to make *srcPtr progress by one frame. */
25115 static size_t ZSTD_decompressFrame(ZSTD_DCtx* dctx,
25116                                    void* dst, size_t dstCapacity,
25117                              const void** srcPtr, size_t *srcSizePtr)
25118 {
25119     const BYTE* ip = (const BYTE*)(*srcPtr);
25120     BYTE* const ostart = (BYTE* const)dst;
25121     BYTE* const oend = dstCapacity != 0 ? ostart + dstCapacity : ostart;
25122     BYTE* op = ostart;
25123     size_t remainingSrcSize = *srcSizePtr;
25124
25125     DEBUGLOG(4, "ZSTD_decompressFrame (srcSize:%i)", (int)*srcSizePtr);
25126
25127     /* check */
25128     RETURN_ERROR_IF(
25129         remainingSrcSize < ZSTD_FRAMEHEADERSIZE_MIN(dctx->format)+ZSTD_blockHeaderSize,
25130         srcSize_wrong, "");
25131
25132     /* Frame Header */
25133     {   size_t const frameHeaderSize = ZSTD_frameHeaderSize_internal(
25134                 ip, ZSTD_FRAMEHEADERSIZE_PREFIX(dctx->format), dctx->format);
25135         if (ZSTD_isError(frameHeaderSize)) return frameHeaderSize;
25136         RETURN_ERROR_IF(remainingSrcSize < frameHeaderSize+ZSTD_blockHeaderSize,
25137                         srcSize_wrong, "");
25138         FORWARD_IF_ERROR( ZSTD_decodeFrameHeader(dctx, ip, frameHeaderSize) , "");
25139         ip += frameHeaderSize; remainingSrcSize -= frameHeaderSize;
25140     }
25141
25142     /* Loop on each block */
25143     while (1) {
25144         size_t decodedSize;
25145         blockProperties_t blockProperties;
25146         size_t const cBlockSize = ZSTD_getcBlockSize(ip, remainingSrcSize, &blockProperties);
25147         if (ZSTD_isError(cBlockSize)) return cBlockSize;
25148
25149         ip += ZSTD_blockHeaderSize;
25150         remainingSrcSize -= ZSTD_blockHeaderSize;
25151         RETURN_ERROR_IF(cBlockSize > remainingSrcSize, srcSize_wrong, "");
25152
25153         switch(blockProperties.blockType)
25154         {
25155         case bt_compressed:
25156             decodedSize = ZSTD_decompressBlock_internal(dctx, op, oend-op, ip, cBlockSize, /* frame */ 1);
25157             break;
25158         case bt_raw :
25159             decodedSize = ZSTD_copyRawBlock(op, oend-op, ip, cBlockSize);
25160             break;
25161         case bt_rle :
25162             decodedSize = ZSTD_setRleBlock(op, oend-op, *ip, blockProperties.origSize);
25163             break;
25164         case bt_reserved :
25165         default:
25166             RETURN_ERROR(corruption_detected, "invalid block type");
25167         }
25168
25169         if (ZSTD_isError(decodedSize)) return decodedSize;
25170         if (dctx->fParams.checksumFlag)
25171             XXH64_update(&dctx->xxhState, op, decodedSize);
25172         if (decodedSize != 0)
25173             op += decodedSize;
25174         assert(ip != NULL);
25175         ip += cBlockSize;
25176         remainingSrcSize -= cBlockSize;
25177         if (blockProperties.lastBlock) break;
25178     }
25179
25180     if (dctx->fParams.frameContentSize != ZSTD_CONTENTSIZE_UNKNOWN) {
25181         RETURN_ERROR_IF((U64)(op-ostart) != dctx->fParams.frameContentSize,
25182                         corruption_detected, "");
25183     }
25184     if (dctx->fParams.checksumFlag) { /* Frame content checksum verification */
25185         U32 const checkCalc = (U32)XXH64_digest(&dctx->xxhState);
25186         U32 checkRead;
25187         RETURN_ERROR_IF(remainingSrcSize<4, checksum_wrong, "");
25188         checkRead = MEM_readLE32(ip);
25189         RETURN_ERROR_IF(checkRead != checkCalc, checksum_wrong, "");
25190         ip += 4;
25191         remainingSrcSize -= 4;
25192     }
25193
25194     /* Allow caller to get size read */
25195     *srcPtr = ip;
25196     *srcSizePtr = remainingSrcSize;
25197     return op-ostart;
25198 }
25199
25200 static size_t ZSTD_decompressMultiFrame(ZSTD_DCtx* dctx,
25201                                         void* dst, size_t dstCapacity,
25202                                   const void* src, size_t srcSize,
25203                                   const void* dict, size_t dictSize,
25204                                   const ZSTD_DDict* ddict)
25205 {
25206     void* const dststart = dst;
25207     int moreThan1Frame = 0;
25208
25209     DEBUGLOG(5, "ZSTD_decompressMultiFrame");
25210     assert(dict==NULL || ddict==NULL);  /* either dict or ddict set, not both */
25211
25212     if (ddict) {
25213         dict = ZSTD_DDict_dictContent(ddict);
25214         dictSize = ZSTD_DDict_dictSize(ddict);
25215     }
25216
25217     while (srcSize >= ZSTD_startingInputLength(dctx->format)) {
25218
25219 #if defined(ZSTD_LEGACY_SUPPORT) && (ZSTD_LEGACY_SUPPORT >= 1)
25220         if (ZSTD_isLegacy(src, srcSize)) {
25221             size_t decodedSize;
25222             size_t const frameSize = ZSTD_findFrameCompressedSizeLegacy(src, srcSize);
25223             if (ZSTD_isError(frameSize)) return frameSize;
25224             RETURN_ERROR_IF(dctx->staticSize, memory_allocation,
25225                 "legacy support is not compatible with static dctx");
25226
25227             decodedSize = ZSTD_decompressLegacy(dst, dstCapacity, src, frameSize, dict, dictSize);
25228             if (ZSTD_isError(decodedSize)) return decodedSize;
25229
25230             assert(decodedSize <=- dstCapacity);
25231             dst = (BYTE*)dst + decodedSize;
25232             dstCapacity -= decodedSize;
25233
25234             src = (const BYTE*)src + frameSize;
25235             srcSize -= frameSize;
25236
25237             continue;
25238         }
25239 #endif
25240
25241         {   U32 const magicNumber = MEM_readLE32(src);
25242             DEBUGLOG(4, "reading magic number %08X (expecting %08X)",
25243                         (unsigned)magicNumber, ZSTD_MAGICNUMBER);
25244             if ((magicNumber & ZSTD_MAGIC_SKIPPABLE_MASK) == ZSTD_MAGIC_SKIPPABLE_START) {
25245                 size_t const skippableSize = readSkippableFrameSize(src, srcSize);
25246                 FORWARD_IF_ERROR(skippableSize, "readSkippableFrameSize failed");
25247                 assert(skippableSize <= srcSize);
25248
25249                 src = (const BYTE *)src + skippableSize;
25250                 srcSize -= skippableSize;
25251                 continue;
25252         }   }
25253
25254         if (ddict) {
25255             /* we were called from ZSTD_decompress_usingDDict */
25256             FORWARD_IF_ERROR(ZSTD_decompressBegin_usingDDict(dctx, ddict), "");
25257         } else {
25258             /* this will initialize correctly with no dict if dict == NULL, so
25259              * use this in all cases but ddict */
25260             FORWARD_IF_ERROR(ZSTD_decompressBegin_usingDict(dctx, dict, dictSize), "");
25261         }
25262         ZSTD_checkContinuity(dctx, dst);
25263
25264         {   const size_t res = ZSTD_decompressFrame(dctx, dst, dstCapacity,
25265                                                     &src, &srcSize);
25266             RETURN_ERROR_IF(
25267                 (ZSTD_getErrorCode(res) == ZSTD_error_prefix_unknown)
25268              && (moreThan1Frame==1),
25269                 srcSize_wrong,
25270                 "at least one frame successfully completed, but following "
25271                 "bytes are garbage: it's more likely to be a srcSize error, "
25272                 "specifying more bytes than compressed size of frame(s). This "
25273                 "error message replaces ERROR(prefix_unknown), which would be "
25274                 "confusing, as the first header is actually correct. Note that "
25275                 "one could be unlucky, it might be a corruption error instead, "
25276                 "happening right at the place where we expect zstd magic "
25277                 "bytes. But this is _much_ less likely than a srcSize field "
25278                 "error.");
25279             if (ZSTD_isError(res)) return res;
25280             assert(res <= dstCapacity);
25281             if (res != 0)
25282                 dst = (BYTE*)dst + res;
25283             dstCapacity -= res;
25284         }
25285         moreThan1Frame = 1;
25286     }  /* while (srcSize >= ZSTD_frameHeaderSize_prefix) */
25287
25288     RETURN_ERROR_IF(srcSize, srcSize_wrong, "input not entirely consumed");
25289
25290     return (BYTE*)dst - (BYTE*)dststart;
25291 }
25292
25293 size_t ZSTD_decompress_usingDict(ZSTD_DCtx* dctx,
25294                                  void* dst, size_t dstCapacity,
25295                            const void* src, size_t srcSize,
25296                            const void* dict, size_t dictSize)
25297 {
25298     return ZSTD_decompressMultiFrame(dctx, dst, dstCapacity, src, srcSize, dict, dictSize, NULL);
25299 }
25300
25301
25302 static ZSTD_DDict const* ZSTD_getDDict(ZSTD_DCtx* dctx)
25303 {
25304     switch (dctx->dictUses) {
25305     default:
25306         assert(0 /* Impossible */);
25307         /* fall-through */
25308     case ZSTD_dont_use:
25309         ZSTD_clearDict(dctx);
25310         return NULL;
25311     case ZSTD_use_indefinitely:
25312         return dctx->ddict;
25313     case ZSTD_use_once:
25314         dctx->dictUses = ZSTD_dont_use;
25315         return dctx->ddict;
25316     }
25317 }
25318
25319 size_t ZSTD_decompressDCtx(ZSTD_DCtx* dctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize)
25320 {
25321     return ZSTD_decompress_usingDDict(dctx, dst, dstCapacity, src, srcSize, ZSTD_getDDict(dctx));
25322 }
25323
25324
25325 size_t ZSTD_decompress(void* dst, size_t dstCapacity, const void* src, size_t srcSize)
25326 {
25327 #if defined(ZSTD_HEAPMODE) && (ZSTD_HEAPMODE>=1)
25328     size_t regenSize;
25329     ZSTD_DCtx* const dctx = ZSTD_createDCtx();
25330     RETURN_ERROR_IF(dctx==NULL, memory_allocation, "NULL pointer!");
25331     regenSize = ZSTD_decompressDCtx(dctx, dst, dstCapacity, src, srcSize);
25332     ZSTD_freeDCtx(dctx);
25333     return regenSize;
25334 #else   /* stack mode */
25335     ZSTD_DCtx dctx;
25336     ZSTD_initDCtx_internal(&dctx);
25337     return ZSTD_decompressDCtx(&dctx, dst, dstCapacity, src, srcSize);
25338 #endif
25339 }
25340
25341
25342 /*-**************************************
25343 *   Advanced Streaming Decompression API
25344 *   Bufferless and synchronous
25345 ****************************************/
25346 size_t ZSTD_nextSrcSizeToDecompress(ZSTD_DCtx* dctx) { return dctx->expected; }
25347
25348 /**
25349  * Similar to ZSTD_nextSrcSizeToDecompress(), but when when a block input can be streamed,
25350  * we allow taking a partial block as the input. Currently only raw uncompressed blocks can
25351  * be streamed.
25352  *
25353  * For blocks that can be streamed, this allows us to reduce the latency until we produce
25354  * output, and avoid copying the input.
25355  *
25356  * @param inputSize - The total amount of input that the caller currently has.
25357  */
25358 static size_t ZSTD_nextSrcSizeToDecompressWithInputSize(ZSTD_DCtx* dctx, size_t inputSize) {
25359     if (!(dctx->stage == ZSTDds_decompressBlock || dctx->stage == ZSTDds_decompressLastBlock))
25360         return dctx->expected;
25361     if (dctx->bType != bt_raw)
25362         return dctx->expected;
25363     return MIN(MAX(inputSize, 1), dctx->expected);
25364 }
25365
25366 ZSTD_nextInputType_e ZSTD_nextInputType(ZSTD_DCtx* dctx) {
25367     switch(dctx->stage)
25368     {
25369     default:   /* should not happen */
25370         assert(0);
25371     case ZSTDds_getFrameHeaderSize:
25372     case ZSTDds_decodeFrameHeader:
25373         return ZSTDnit_frameHeader;
25374     case ZSTDds_decodeBlockHeader:
25375         return ZSTDnit_blockHeader;
25376     case ZSTDds_decompressBlock:
25377         return ZSTDnit_block;
25378     case ZSTDds_decompressLastBlock:
25379         return ZSTDnit_lastBlock;
25380     case ZSTDds_checkChecksum:
25381         return ZSTDnit_checksum;
25382     case ZSTDds_decodeSkippableHeader:
25383     case ZSTDds_skipFrame:
25384         return ZSTDnit_skippableFrame;
25385     }
25386 }
25387
25388 static int ZSTD_isSkipFrame(ZSTD_DCtx* dctx) { return dctx->stage == ZSTDds_skipFrame; }
25389
25390 /** ZSTD_decompressContinue() :
25391  *  srcSize : must be the exact nb of bytes expected (see ZSTD_nextSrcSizeToDecompress())
25392  *  @return : nb of bytes generated into `dst` (necessarily <= `dstCapacity)
25393  *            or an error code, which can be tested using ZSTD_isError() */
25394 size_t ZSTD_decompressContinue(ZSTD_DCtx* dctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize)
25395 {
25396     DEBUGLOG(5, "ZSTD_decompressContinue (srcSize:%u)", (unsigned)srcSize);
25397     /* Sanity check */
25398     RETURN_ERROR_IF(srcSize != ZSTD_nextSrcSizeToDecompressWithInputSize(dctx, srcSize), srcSize_wrong, "not allowed");
25399     if (dstCapacity) ZSTD_checkContinuity(dctx, dst);
25400
25401     switch (dctx->stage)
25402     {
25403     case ZSTDds_getFrameHeaderSize :
25404         assert(src != NULL);
25405         if (dctx->format == ZSTD_f_zstd1) {  /* allows header */
25406             assert(srcSize >= ZSTD_FRAMEIDSIZE);  /* to read skippable magic number */
25407             if ((MEM_readLE32(src) & ZSTD_MAGIC_SKIPPABLE_MASK) == ZSTD_MAGIC_SKIPPABLE_START) {        /* skippable frame */
25408                 memcpy(dctx->headerBuffer, src, srcSize);
25409                 dctx->expected = ZSTD_SKIPPABLEHEADERSIZE - srcSize;  /* remaining to load to get full skippable frame header */
25410                 dctx->stage = ZSTDds_decodeSkippableHeader;
25411                 return 0;
25412         }   }
25413         dctx->headerSize = ZSTD_frameHeaderSize_internal(src, srcSize, dctx->format);
25414         if (ZSTD_isError(dctx->headerSize)) return dctx->headerSize;
25415         memcpy(dctx->headerBuffer, src, srcSize);
25416         dctx->expected = dctx->headerSize - srcSize;
25417         dctx->stage = ZSTDds_decodeFrameHeader;
25418         return 0;
25419
25420     case ZSTDds_decodeFrameHeader:
25421         assert(src != NULL);
25422         memcpy(dctx->headerBuffer + (dctx->headerSize - srcSize), src, srcSize);
25423         FORWARD_IF_ERROR(ZSTD_decodeFrameHeader(dctx, dctx->headerBuffer, dctx->headerSize), "");
25424         dctx->expected = ZSTD_blockHeaderSize;
25425         dctx->stage = ZSTDds_decodeBlockHeader;
25426         return 0;
25427
25428     case ZSTDds_decodeBlockHeader:
25429         {   blockProperties_t bp;
25430             size_t const cBlockSize = ZSTD_getcBlockSize(src, ZSTD_blockHeaderSize, &bp);
25431             if (ZSTD_isError(cBlockSize)) return cBlockSize;
25432             RETURN_ERROR_IF(cBlockSize > dctx->fParams.blockSizeMax, corruption_detected, "Block Size Exceeds Maximum");
25433             dctx->expected = cBlockSize;
25434             dctx->bType = bp.blockType;
25435             dctx->rleSize = bp.origSize;
25436             if (cBlockSize) {
25437                 dctx->stage = bp.lastBlock ? ZSTDds_decompressLastBlock : ZSTDds_decompressBlock;
25438                 return 0;
25439             }
25440             /* empty block */
25441             if (bp.lastBlock) {
25442                 if (dctx->fParams.checksumFlag) {
25443                     dctx->expected = 4;
25444                     dctx->stage = ZSTDds_checkChecksum;
25445                 } else {
25446                     dctx->expected = 0; /* end of frame */
25447                     dctx->stage = ZSTDds_getFrameHeaderSize;
25448                 }
25449             } else {
25450                 dctx->expected = ZSTD_blockHeaderSize;  /* jump to next header */
25451                 dctx->stage = ZSTDds_decodeBlockHeader;
25452             }
25453             return 0;
25454         }
25455
25456     case ZSTDds_decompressLastBlock:
25457     case ZSTDds_decompressBlock:
25458         DEBUGLOG(5, "ZSTD_decompressContinue: case ZSTDds_decompressBlock");
25459         {   size_t rSize;
25460             switch(dctx->bType)
25461             {
25462             case bt_compressed:
25463                 DEBUGLOG(5, "ZSTD_decompressContinue: case bt_compressed");
25464                 rSize = ZSTD_decompressBlock_internal(dctx, dst, dstCapacity, src, srcSize, /* frame */ 1);
25465                 dctx->expected = 0;  /* Streaming not supported */
25466                 break;
25467             case bt_raw :
25468                 assert(srcSize <= dctx->expected);
25469                 rSize = ZSTD_copyRawBlock(dst, dstCapacity, src, srcSize);
25470                 FORWARD_IF_ERROR(rSize, "ZSTD_copyRawBlock failed");
25471                 assert(rSize == srcSize);
25472                 dctx->expected -= rSize;
25473                 break;
25474             case bt_rle :
25475                 rSize = ZSTD_setRleBlock(dst, dstCapacity, *(const BYTE*)src, dctx->rleSize);
25476                 dctx->expected = 0;  /* Streaming not supported */
25477                 break;
25478             case bt_reserved :   /* should never happen */
25479             default:
25480                 RETURN_ERROR(corruption_detected, "invalid block type");
25481             }
25482             FORWARD_IF_ERROR(rSize, "");
25483             RETURN_ERROR_IF(rSize > dctx->fParams.blockSizeMax, corruption_detected, "Decompressed Block Size Exceeds Maximum");
25484             DEBUGLOG(5, "ZSTD_decompressContinue: decoded size from block : %u", (unsigned)rSize);
25485             dctx->decodedSize += rSize;
25486             if (dctx->fParams.checksumFlag) XXH64_update(&dctx->xxhState, dst, rSize);
25487             dctx->previousDstEnd = (char*)dst + rSize;
25488
25489             /* Stay on the same stage until we are finished streaming the block. */
25490             if (dctx->expected > 0) {
25491                 return rSize;
25492             }
25493
25494             if (dctx->stage == ZSTDds_decompressLastBlock) {   /* end of frame */
25495                 DEBUGLOG(4, "ZSTD_decompressContinue: decoded size from frame : %u", (unsigned)dctx->decodedSize);
25496                 RETURN_ERROR_IF(
25497                     dctx->fParams.frameContentSize != ZSTD_CONTENTSIZE_UNKNOWN
25498                  && dctx->decodedSize != dctx->fParams.frameContentSize,
25499                     corruption_detected, "");
25500                 if (dctx->fParams.checksumFlag) {  /* another round for frame checksum */
25501                     dctx->expected = 4;
25502                     dctx->stage = ZSTDds_checkChecksum;
25503                 } else {
25504                     dctx->expected = 0;   /* ends here */
25505                     dctx->stage = ZSTDds_getFrameHeaderSize;
25506                 }
25507             } else {
25508                 dctx->stage = ZSTDds_decodeBlockHeader;
25509                 dctx->expected = ZSTD_blockHeaderSize;
25510             }
25511             return rSize;
25512         }
25513
25514     case ZSTDds_checkChecksum:
25515         assert(srcSize == 4);  /* guaranteed by dctx->expected */
25516         {   U32 const h32 = (U32)XXH64_digest(&dctx->xxhState);
25517             U32 const check32 = MEM_readLE32(src);
25518             DEBUGLOG(4, "ZSTD_decompressContinue: checksum : calculated %08X :: %08X read", (unsigned)h32, (unsigned)check32);
25519             RETURN_ERROR_IF(check32 != h32, checksum_wrong, "");
25520             dctx->expected = 0;
25521             dctx->stage = ZSTDds_getFrameHeaderSize;
25522             return 0;
25523         }
25524
25525     case ZSTDds_decodeSkippableHeader:
25526         assert(src != NULL);
25527         assert(srcSize <= ZSTD_SKIPPABLEHEADERSIZE);
25528         memcpy(dctx->headerBuffer + (ZSTD_SKIPPABLEHEADERSIZE - srcSize), src, srcSize);   /* complete skippable header */
25529         dctx->expected = MEM_readLE32(dctx->headerBuffer + ZSTD_FRAMEIDSIZE);   /* note : dctx->expected can grow seriously large, beyond local buffer size */
25530         dctx->stage = ZSTDds_skipFrame;
25531         return 0;
25532
25533     case ZSTDds_skipFrame:
25534         dctx->expected = 0;
25535         dctx->stage = ZSTDds_getFrameHeaderSize;
25536         return 0;
25537
25538     default:
25539         assert(0);   /* impossible */
25540         RETURN_ERROR(GENERIC, "impossible to reach");   /* some compiler require default to do something */
25541     }
25542 }
25543
25544
25545 static size_t ZSTD_refDictContent(ZSTD_DCtx* dctx, const void* dict, size_t dictSize)
25546 {
25547     dctx->dictEnd = dctx->previousDstEnd;
25548     dctx->virtualStart = (const char*)dict - ((const char*)(dctx->previousDstEnd) - (const char*)(dctx->prefixStart));
25549     dctx->prefixStart = dict;
25550     dctx->previousDstEnd = (const char*)dict + dictSize;
25551 #ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
25552     dctx->dictContentBeginForFuzzing = dctx->prefixStart;
25553     dctx->dictContentEndForFuzzing = dctx->previousDstEnd;
25554 #endif
25555     return 0;
25556 }
25557
25558 /*! ZSTD_loadDEntropy() :
25559  *  dict : must point at beginning of a valid zstd dictionary.
25560  * @return : size of entropy tables read */
25561 size_t
25562 ZSTD_loadDEntropy(ZSTD_entropyDTables_t* entropy,
25563                   const void* const dict, size_t const dictSize)
25564 {
25565     const BYTE* dictPtr = (const BYTE*)dict;
25566     const BYTE* const dictEnd = dictPtr + dictSize;
25567
25568     RETURN_ERROR_IF(dictSize <= 8, dictionary_corrupted, "dict is too small");
25569     assert(MEM_readLE32(dict) == ZSTD_MAGIC_DICTIONARY);   /* dict must be valid */
25570     dictPtr += 8;   /* skip header = magic + dictID */
25571
25572     ZSTD_STATIC_ASSERT(offsetof(ZSTD_entropyDTables_t, OFTable) == offsetof(ZSTD_entropyDTables_t, LLTable) + sizeof(entropy->LLTable));
25573     ZSTD_STATIC_ASSERT(offsetof(ZSTD_entropyDTables_t, MLTable) == offsetof(ZSTD_entropyDTables_t, OFTable) + sizeof(entropy->OFTable));
25574     ZSTD_STATIC_ASSERT(sizeof(entropy->LLTable) + sizeof(entropy->OFTable) + sizeof(entropy->MLTable) >= HUF_DECOMPRESS_WORKSPACE_SIZE);
25575     {   void* const workspace = &entropy->LLTable;   /* use fse tables as temporary workspace; implies fse tables are grouped together */
25576         size_t const workspaceSize = sizeof(entropy->LLTable) + sizeof(entropy->OFTable) + sizeof(entropy->MLTable);
25577 #ifdef HUF_FORCE_DECOMPRESS_X1
25578         /* in minimal huffman, we always use X1 variants */
25579         size_t const hSize = HUF_readDTableX1_wksp(entropy->hufTable,
25580                                                 dictPtr, dictEnd - dictPtr,
25581                                                 workspace, workspaceSize);
25582 #else
25583         size_t const hSize = HUF_readDTableX2_wksp(entropy->hufTable,
25584                                                 dictPtr, dictEnd - dictPtr,
25585                                                 workspace, workspaceSize);
25586 #endif
25587         RETURN_ERROR_IF(HUF_isError(hSize), dictionary_corrupted, "");
25588         dictPtr += hSize;
25589     }
25590
25591     {   short offcodeNCount[MaxOff+1];
25592         unsigned offcodeMaxValue = MaxOff, offcodeLog;
25593         size_t const offcodeHeaderSize = FSE_readNCount(offcodeNCount, &offcodeMaxValue, &offcodeLog, dictPtr, dictEnd-dictPtr);
25594         RETURN_ERROR_IF(FSE_isError(offcodeHeaderSize), dictionary_corrupted, "");
25595         RETURN_ERROR_IF(offcodeMaxValue > MaxOff, dictionary_corrupted, "");
25596         RETURN_ERROR_IF(offcodeLog > OffFSELog, dictionary_corrupted, "");
25597         ZSTD_buildFSETable( entropy->OFTable,
25598                             offcodeNCount, offcodeMaxValue,
25599                             OF_base, OF_bits,
25600                             offcodeLog);
25601         dictPtr += offcodeHeaderSize;
25602     }
25603
25604     {   short matchlengthNCount[MaxML+1];
25605         unsigned matchlengthMaxValue = MaxML, matchlengthLog;
25606         size_t const matchlengthHeaderSize = FSE_readNCount(matchlengthNCount, &matchlengthMaxValue, &matchlengthLog, dictPtr, dictEnd-dictPtr);
25607         RETURN_ERROR_IF(FSE_isError(matchlengthHeaderSize), dictionary_corrupted, "");
25608         RETURN_ERROR_IF(matchlengthMaxValue > MaxML, dictionary_corrupted, "");
25609         RETURN_ERROR_IF(matchlengthLog > MLFSELog, dictionary_corrupted, "");
25610         ZSTD_buildFSETable( entropy->MLTable,
25611                             matchlengthNCount, matchlengthMaxValue,
25612                             ML_base, ML_bits,
25613                             matchlengthLog);
25614         dictPtr += matchlengthHeaderSize;
25615     }
25616
25617     {   short litlengthNCount[MaxLL+1];
25618         unsigned litlengthMaxValue = MaxLL, litlengthLog;
25619         size_t const litlengthHeaderSize = FSE_readNCount(litlengthNCount, &litlengthMaxValue, &litlengthLog, dictPtr, dictEnd-dictPtr);
25620         RETURN_ERROR_IF(FSE_isError(litlengthHeaderSize), dictionary_corrupted, "");
25621         RETURN_ERROR_IF(litlengthMaxValue > MaxLL, dictionary_corrupted, "");
25622         RETURN_ERROR_IF(litlengthLog > LLFSELog, dictionary_corrupted, "");
25623         ZSTD_buildFSETable( entropy->LLTable,
25624                             litlengthNCount, litlengthMaxValue,
25625                             LL_base, LL_bits,
25626                             litlengthLog);
25627         dictPtr += litlengthHeaderSize;
25628     }
25629
25630     RETURN_ERROR_IF(dictPtr+12 > dictEnd, dictionary_corrupted, "");
25631     {   int i;
25632         size_t const dictContentSize = (size_t)(dictEnd - (dictPtr+12));
25633         for (i=0; i<3; i++) {
25634             U32 const rep = MEM_readLE32(dictPtr); dictPtr += 4;
25635             RETURN_ERROR_IF(rep==0 || rep > dictContentSize,
25636                             dictionary_corrupted, "");
25637             entropy->rep[i] = rep;
25638     }   }
25639
25640     return dictPtr - (const BYTE*)dict;
25641 }
25642
25643 static size_t ZSTD_decompress_insertDictionary(ZSTD_DCtx* dctx, const void* dict, size_t dictSize)
25644 {
25645     if (dictSize < 8) return ZSTD_refDictContent(dctx, dict, dictSize);
25646     {   U32 const magic = MEM_readLE32(dict);
25647         if (magic != ZSTD_MAGIC_DICTIONARY) {
25648             return ZSTD_refDictContent(dctx, dict, dictSize);   /* pure content mode */
25649     }   }
25650     dctx->dictID = MEM_readLE32((const char*)dict + ZSTD_FRAMEIDSIZE);
25651
25652     /* load entropy tables */
25653     {   size_t const eSize = ZSTD_loadDEntropy(&dctx->entropy, dict, dictSize);
25654         RETURN_ERROR_IF(ZSTD_isError(eSize), dictionary_corrupted, "");
25655         dict = (const char*)dict + eSize;
25656         dictSize -= eSize;
25657     }
25658     dctx->litEntropy = dctx->fseEntropy = 1;
25659
25660     /* reference dictionary content */
25661     return ZSTD_refDictContent(dctx, dict, dictSize);
25662 }
25663
25664 size_t ZSTD_decompressBegin(ZSTD_DCtx* dctx)
25665 {
25666     assert(dctx != NULL);
25667     dctx->expected = ZSTD_startingInputLength(dctx->format);  /* dctx->format must be properly set */
25668     dctx->stage = ZSTDds_getFrameHeaderSize;
25669     dctx->decodedSize = 0;
25670     dctx->previousDstEnd = NULL;
25671     dctx->prefixStart = NULL;
25672     dctx->virtualStart = NULL;
25673     dctx->dictEnd = NULL;
25674     dctx->entropy.hufTable[0] = (HUF_DTable)((HufLog)*0x1000001);  /* cover both little and big endian */
25675     dctx->litEntropy = dctx->fseEntropy = 0;
25676     dctx->dictID = 0;
25677     dctx->bType = bt_reserved;
25678     ZSTD_STATIC_ASSERT(sizeof(dctx->entropy.rep) == sizeof(repStartValue));
25679     memcpy(dctx->entropy.rep, repStartValue, sizeof(repStartValue));  /* initial repcodes */
25680     dctx->LLTptr = dctx->entropy.LLTable;
25681     dctx->MLTptr = dctx->entropy.MLTable;
25682     dctx->OFTptr = dctx->entropy.OFTable;
25683     dctx->HUFptr = dctx->entropy.hufTable;
25684     return 0;
25685 }
25686
25687 size_t ZSTD_decompressBegin_usingDict(ZSTD_DCtx* dctx, const void* dict, size_t dictSize)
25688 {
25689     FORWARD_IF_ERROR( ZSTD_decompressBegin(dctx) , "");
25690     if (dict && dictSize)
25691         RETURN_ERROR_IF(
25692             ZSTD_isError(ZSTD_decompress_insertDictionary(dctx, dict, dictSize)),
25693             dictionary_corrupted, "");
25694     return 0;
25695 }
25696
25697
25698 /* ======   ZSTD_DDict   ====== */
25699
25700 size_t ZSTD_decompressBegin_usingDDict(ZSTD_DCtx* dctx, const ZSTD_DDict* ddict)
25701 {
25702     DEBUGLOG(4, "ZSTD_decompressBegin_usingDDict");
25703     assert(dctx != NULL);
25704     if (ddict) {
25705         const char* const dictStart = (const char*)ZSTD_DDict_dictContent(ddict);
25706         size_t const dictSize = ZSTD_DDict_dictSize(ddict);
25707         const void* const dictEnd = dictStart + dictSize;
25708         dctx->ddictIsCold = (dctx->dictEnd != dictEnd);
25709         DEBUGLOG(4, "DDict is %s",
25710                     dctx->ddictIsCold ? "~cold~" : "hot!");
25711     }
25712     FORWARD_IF_ERROR( ZSTD_decompressBegin(dctx) , "");
25713     if (ddict) {   /* NULL ddict is equivalent to no dictionary */
25714         ZSTD_copyDDictParameters(dctx, ddict);
25715     }
25716     return 0;
25717 }
25718
25719 /*! ZSTD_getDictID_fromDict() :
25720  *  Provides the dictID stored within dictionary.
25721  *  if @return == 0, the dictionary is not conformant with Zstandard specification.
25722  *  It can still be loaded, but as a content-only dictionary. */
25723 unsigned ZSTD_getDictID_fromDict(const void* dict, size_t dictSize)
25724 {
25725     if (dictSize < 8) return 0;
25726     if (MEM_readLE32(dict) != ZSTD_MAGIC_DICTIONARY) return 0;
25727     return MEM_readLE32((const char*)dict + ZSTD_FRAMEIDSIZE);
25728 }
25729
25730 /*! ZSTD_getDictID_fromFrame() :
25731  *  Provides the dictID required to decompress frame stored within `src`.
25732  *  If @return == 0, the dictID could not be decoded.
25733  *  This could for one of the following reasons :
25734  *  - The frame does not require a dictionary (most common case).
25735  *  - The frame was built with dictID intentionally removed.
25736  *    Needed dictionary is a hidden information.
25737  *    Note : this use case also happens when using a non-conformant dictionary.
25738  *  - `srcSize` is too small, and as a result, frame header could not be decoded.
25739  *    Note : possible if `srcSize < ZSTD_FRAMEHEADERSIZE_MAX`.
25740  *  - This is not a Zstandard frame.
25741  *  When identifying the exact failure cause, it's possible to use
25742  *  ZSTD_getFrameHeader(), which will provide a more precise error code. */
25743 unsigned ZSTD_getDictID_fromFrame(const void* src, size_t srcSize)
25744 {
25745     ZSTD_frameHeader zfp = { 0, 0, 0, ZSTD_frame, 0, 0, 0 };
25746     size_t const hError = ZSTD_getFrameHeader(&zfp, src, srcSize);
25747     if (ZSTD_isError(hError)) return 0;
25748     return zfp.dictID;
25749 }
25750
25751
25752 /*! ZSTD_decompress_usingDDict() :
25753 *   Decompression using a pre-digested Dictionary
25754 *   Use dictionary without significant overhead. */
25755 size_t ZSTD_decompress_usingDDict(ZSTD_DCtx* dctx,
25756                                   void* dst, size_t dstCapacity,
25757                             const void* src, size_t srcSize,
25758                             const ZSTD_DDict* ddict)
25759 {
25760     /* pass content and size in case legacy frames are encountered */
25761     return ZSTD_decompressMultiFrame(dctx, dst, dstCapacity, src, srcSize,
25762                                      NULL, 0,
25763                                      ddict);
25764 }
25765
25766
25767 /*=====================================
25768 *   Streaming decompression
25769 *====================================*/
25770
25771 ZSTD_DStream* ZSTD_createDStream(void)
25772 {
25773     DEBUGLOG(3, "ZSTD_createDStream");
25774     return ZSTD_createDStream_advanced(ZSTD_defaultCMem);
25775 }
25776
25777 ZSTD_DStream* ZSTD_initStaticDStream(void *workspace, size_t workspaceSize)
25778 {
25779     return ZSTD_initStaticDCtx(workspace, workspaceSize);
25780 }
25781
25782 ZSTD_DStream* ZSTD_createDStream_advanced(ZSTD_customMem customMem)
25783 {
25784     return ZSTD_createDCtx_advanced(customMem);
25785 }
25786
25787 size_t ZSTD_freeDStream(ZSTD_DStream* zds)
25788 {
25789     return ZSTD_freeDCtx(zds);
25790 }
25791
25792
25793 /* ***  Initialization  *** */
25794
25795 size_t ZSTD_DStreamInSize(void)  { return ZSTD_BLOCKSIZE_MAX + ZSTD_blockHeaderSize; }
25796 size_t ZSTD_DStreamOutSize(void) { return ZSTD_BLOCKSIZE_MAX; }
25797
25798 size_t ZSTD_DCtx_loadDictionary_advanced(ZSTD_DCtx* dctx,
25799                                    const void* dict, size_t dictSize,
25800                                          ZSTD_dictLoadMethod_e dictLoadMethod,
25801                                          ZSTD_dictContentType_e dictContentType)
25802 {
25803     RETURN_ERROR_IF(dctx->streamStage != zdss_init, stage_wrong, "");
25804     ZSTD_clearDict(dctx);
25805     if (dict && dictSize != 0) {
25806         dctx->ddictLocal = ZSTD_createDDict_advanced(dict, dictSize, dictLoadMethod, dictContentType, dctx->customMem);
25807         RETURN_ERROR_IF(dctx->ddictLocal == NULL, memory_allocation, "NULL pointer!");
25808         dctx->ddict = dctx->ddictLocal;
25809         dctx->dictUses = ZSTD_use_indefinitely;
25810     }
25811     return 0;
25812 }
25813
25814 size_t ZSTD_DCtx_loadDictionary_byReference(ZSTD_DCtx* dctx, const void* dict, size_t dictSize)
25815 {
25816     return ZSTD_DCtx_loadDictionary_advanced(dctx, dict, dictSize, ZSTD_dlm_byRef, ZSTD_dct_auto);
25817 }
25818
25819 size_t ZSTD_DCtx_loadDictionary(ZSTD_DCtx* dctx, const void* dict, size_t dictSize)
25820 {
25821     return ZSTD_DCtx_loadDictionary_advanced(dctx, dict, dictSize, ZSTD_dlm_byCopy, ZSTD_dct_auto);
25822 }
25823
25824 size_t ZSTD_DCtx_refPrefix_advanced(ZSTD_DCtx* dctx, const void* prefix, size_t prefixSize, ZSTD_dictContentType_e dictContentType)
25825 {
25826     FORWARD_IF_ERROR(ZSTD_DCtx_loadDictionary_advanced(dctx, prefix, prefixSize, ZSTD_dlm_byRef, dictContentType), "");
25827     dctx->dictUses = ZSTD_use_once;
25828     return 0;
25829 }
25830
25831 size_t ZSTD_DCtx_refPrefix(ZSTD_DCtx* dctx, const void* prefix, size_t prefixSize)
25832 {
25833     return ZSTD_DCtx_refPrefix_advanced(dctx, prefix, prefixSize, ZSTD_dct_rawContent);
25834 }
25835
25836
25837 /* ZSTD_initDStream_usingDict() :
25838  * return : expected size, aka ZSTD_startingInputLength().
25839  * this function cannot fail */
25840 size_t ZSTD_initDStream_usingDict(ZSTD_DStream* zds, const void* dict, size_t dictSize)
25841 {
25842     DEBUGLOG(4, "ZSTD_initDStream_usingDict");
25843     FORWARD_IF_ERROR( ZSTD_DCtx_reset(zds, ZSTD_reset_session_only) , "");
25844     FORWARD_IF_ERROR( ZSTD_DCtx_loadDictionary(zds, dict, dictSize) , "");
25845     return ZSTD_startingInputLength(zds->format);
25846 }
25847
25848 /* note : this variant can't fail */
25849 size_t ZSTD_initDStream(ZSTD_DStream* zds)
25850 {
25851     DEBUGLOG(4, "ZSTD_initDStream");
25852     return ZSTD_initDStream_usingDDict(zds, NULL);
25853 }
25854
25855 /* ZSTD_initDStream_usingDDict() :
25856  * ddict will just be referenced, and must outlive decompression session
25857  * this function cannot fail */
25858 size_t ZSTD_initDStream_usingDDict(ZSTD_DStream* dctx, const ZSTD_DDict* ddict)
25859 {
25860     FORWARD_IF_ERROR( ZSTD_DCtx_reset(dctx, ZSTD_reset_session_only) , "");
25861     FORWARD_IF_ERROR( ZSTD_DCtx_refDDict(dctx, ddict) , "");
25862     return ZSTD_startingInputLength(dctx->format);
25863 }
25864
25865 /* ZSTD_resetDStream() :
25866  * return : expected size, aka ZSTD_startingInputLength().
25867  * this function cannot fail */
25868 size_t ZSTD_resetDStream(ZSTD_DStream* dctx)
25869 {
25870     FORWARD_IF_ERROR(ZSTD_DCtx_reset(dctx, ZSTD_reset_session_only), "");
25871     return ZSTD_startingInputLength(dctx->format);
25872 }
25873
25874
25875 size_t ZSTD_DCtx_refDDict(ZSTD_DCtx* dctx, const ZSTD_DDict* ddict)
25876 {
25877     RETURN_ERROR_IF(dctx->streamStage != zdss_init, stage_wrong, "");
25878     ZSTD_clearDict(dctx);
25879     if (ddict) {
25880         dctx->ddict = ddict;
25881         dctx->dictUses = ZSTD_use_indefinitely;
25882     }
25883     return 0;
25884 }
25885
25886 /* ZSTD_DCtx_setMaxWindowSize() :
25887  * note : no direct equivalence in ZSTD_DCtx_setParameter,
25888  * since this version sets windowSize, and the other sets windowLog */
25889 size_t ZSTD_DCtx_setMaxWindowSize(ZSTD_DCtx* dctx, size_t maxWindowSize)
25890 {
25891     ZSTD_bounds const bounds = ZSTD_dParam_getBounds(ZSTD_d_windowLogMax);
25892     size_t const min = (size_t)1 << bounds.lowerBound;
25893     size_t const max = (size_t)1 << bounds.upperBound;
25894     RETURN_ERROR_IF(dctx->streamStage != zdss_init, stage_wrong, "");
25895     RETURN_ERROR_IF(maxWindowSize < min, parameter_outOfBound, "");
25896     RETURN_ERROR_IF(maxWindowSize > max, parameter_outOfBound, "");
25897     dctx->maxWindowSize = maxWindowSize;
25898     return 0;
25899 }
25900
25901 size_t ZSTD_DCtx_setFormat(ZSTD_DCtx* dctx, ZSTD_format_e format)
25902 {
25903     return ZSTD_DCtx_setParameter(dctx, ZSTD_d_format, format);
25904 }
25905
25906 ZSTD_bounds ZSTD_dParam_getBounds(ZSTD_dParameter dParam)
25907 {
25908     ZSTD_bounds bounds = { 0, 0, 0 };
25909     switch(dParam) {
25910         case ZSTD_d_windowLogMax:
25911             bounds.lowerBound = ZSTD_WINDOWLOG_ABSOLUTEMIN;
25912             bounds.upperBound = ZSTD_WINDOWLOG_MAX;
25913             return bounds;
25914         case ZSTD_d_format:
25915             bounds.lowerBound = (int)ZSTD_f_zstd1;
25916             bounds.upperBound = (int)ZSTD_f_zstd1_magicless;
25917             ZSTD_STATIC_ASSERT(ZSTD_f_zstd1 < ZSTD_f_zstd1_magicless);
25918             return bounds;
25919         case ZSTD_d_stableOutBuffer:
25920             bounds.lowerBound = (int)ZSTD_obm_buffered;
25921             bounds.upperBound = (int)ZSTD_obm_stable;
25922             return bounds;
25923         default:;
25924     }
25925     bounds.error = ERROR(parameter_unsupported);
25926     return bounds;
25927 }
25928
25929 /* ZSTD_dParam_withinBounds:
25930  * @return 1 if value is within dParam bounds,
25931  * 0 otherwise */
25932 static int ZSTD_dParam_withinBounds(ZSTD_dParameter dParam, int value)
25933 {
25934     ZSTD_bounds const bounds = ZSTD_dParam_getBounds(dParam);
25935     if (ZSTD_isError(bounds.error)) return 0;
25936     if (value < bounds.lowerBound) return 0;
25937     if (value > bounds.upperBound) return 0;
25938     return 1;
25939 }
25940
25941 #define CHECK_DBOUNDS(p,v) {                \
25942     RETURN_ERROR_IF(!ZSTD_dParam_withinBounds(p, v), parameter_outOfBound, ""); \
25943 }
25944
25945 size_t ZSTD_DCtx_setParameter(ZSTD_DCtx* dctx, ZSTD_dParameter dParam, int value)
25946 {
25947     RETURN_ERROR_IF(dctx->streamStage != zdss_init, stage_wrong, "");
25948     switch(dParam) {
25949         case ZSTD_d_windowLogMax:
25950             if (value == 0) value = ZSTD_WINDOWLOG_LIMIT_DEFAULT;
25951             CHECK_DBOUNDS(ZSTD_d_windowLogMax, value);
25952             dctx->maxWindowSize = ((size_t)1) << value;
25953             return 0;
25954         case ZSTD_d_format:
25955             CHECK_DBOUNDS(ZSTD_d_format, value);
25956             dctx->format = (ZSTD_format_e)value;
25957             return 0;
25958         case ZSTD_d_stableOutBuffer:
25959             CHECK_DBOUNDS(ZSTD_d_stableOutBuffer, value);
25960             dctx->outBufferMode = (ZSTD_outBufferMode_e)value;
25961             return 0;
25962         default:;
25963     }
25964     RETURN_ERROR(parameter_unsupported, "");
25965 }
25966
25967 size_t ZSTD_DCtx_reset(ZSTD_DCtx* dctx, ZSTD_ResetDirective reset)
25968 {
25969     if ( (reset == ZSTD_reset_session_only)
25970       || (reset == ZSTD_reset_session_and_parameters) ) {
25971         dctx->streamStage = zdss_init;
25972         dctx->noForwardProgress = 0;
25973     }
25974     if ( (reset == ZSTD_reset_parameters)
25975       || (reset == ZSTD_reset_session_and_parameters) ) {
25976         RETURN_ERROR_IF(dctx->streamStage != zdss_init, stage_wrong, "");
25977         ZSTD_clearDict(dctx);
25978         dctx->format = ZSTD_f_zstd1;
25979         dctx->maxWindowSize = ZSTD_MAXWINDOWSIZE_DEFAULT;
25980     }
25981     return 0;
25982 }
25983
25984
25985 size_t ZSTD_sizeof_DStream(const ZSTD_DStream* dctx)
25986 {
25987     return ZSTD_sizeof_DCtx(dctx);
25988 }
25989
25990 size_t ZSTD_decodingBufferSize_min(unsigned long long windowSize, unsigned long long frameContentSize)
25991 {
25992     size_t const blockSize = (size_t) MIN(windowSize, ZSTD_BLOCKSIZE_MAX);
25993     unsigned long long const neededRBSize = windowSize + blockSize + (WILDCOPY_OVERLENGTH * 2);
25994     unsigned long long const neededSize = MIN(frameContentSize, neededRBSize);
25995     size_t const minRBSize = (size_t) neededSize;
25996     RETURN_ERROR_IF((unsigned long long)minRBSize != neededSize,
25997                     frameParameter_windowTooLarge, "");
25998     return minRBSize;
25999 }
26000
26001 size_t ZSTD_estimateDStreamSize(size_t windowSize)
26002 {
26003     size_t const blockSize = MIN(windowSize, ZSTD_BLOCKSIZE_MAX);
26004     size_t const inBuffSize = blockSize;  /* no block can be larger */
26005     size_t const outBuffSize = ZSTD_decodingBufferSize_min(windowSize, ZSTD_CONTENTSIZE_UNKNOWN);
26006     return ZSTD_estimateDCtxSize() + inBuffSize + outBuffSize;
26007 }
26008
26009 size_t ZSTD_estimateDStreamSize_fromFrame(const void* src, size_t srcSize)
26010 {
26011     U32 const windowSizeMax = 1U << ZSTD_WINDOWLOG_MAX;   /* note : should be user-selectable, but requires an additional parameter (or a dctx) */
26012     ZSTD_frameHeader zfh;
26013     size_t const err = ZSTD_getFrameHeader(&zfh, src, srcSize);
26014     if (ZSTD_isError(err)) return err;
26015     RETURN_ERROR_IF(err>0, srcSize_wrong, "");
26016     RETURN_ERROR_IF(zfh.windowSize > windowSizeMax,
26017                     frameParameter_windowTooLarge, "");
26018     return ZSTD_estimateDStreamSize((size_t)zfh.windowSize);
26019 }
26020
26021
26022 /* *****   Decompression   ***** */
26023
26024 static int ZSTD_DCtx_isOverflow(ZSTD_DStream* zds, size_t const neededInBuffSize, size_t const neededOutBuffSize)
26025 {
26026     return (zds->inBuffSize + zds->outBuffSize) >= (neededInBuffSize + neededOutBuffSize) * ZSTD_WORKSPACETOOLARGE_FACTOR;
26027 }
26028
26029 static void ZSTD_DCtx_updateOversizedDuration(ZSTD_DStream* zds, size_t const neededInBuffSize, size_t const neededOutBuffSize)
26030 {
26031     if (ZSTD_DCtx_isOverflow(zds, neededInBuffSize, neededOutBuffSize))
26032         zds->oversizedDuration++;
26033     else
26034         zds->oversizedDuration = 0;
26035 }
26036
26037 static int ZSTD_DCtx_isOversizedTooLong(ZSTD_DStream* zds)
26038 {
26039     return zds->oversizedDuration >= ZSTD_WORKSPACETOOLARGE_MAXDURATION;
26040 }
26041
26042 /* Checks that the output buffer hasn't changed if ZSTD_obm_stable is used. */
26043 static size_t ZSTD_checkOutBuffer(ZSTD_DStream const* zds, ZSTD_outBuffer const* output)
26044 {
26045     ZSTD_outBuffer const expect = zds->expectedOutBuffer;
26046     /* No requirement when ZSTD_obm_stable is not enabled. */
26047     if (zds->outBufferMode != ZSTD_obm_stable)
26048         return 0;
26049     /* Any buffer is allowed in zdss_init, this must be the same for every other call until
26050      * the context is reset.
26051      */
26052     if (zds->streamStage == zdss_init)
26053         return 0;
26054     /* The buffer must match our expectation exactly. */
26055     if (expect.dst == output->dst && expect.pos == output->pos && expect.size == output->size)
26056         return 0;
26057     RETURN_ERROR(dstBuffer_wrong, "ZSTD_obm_stable enabled but output differs!");
26058 }
26059
26060 /* Calls ZSTD_decompressContinue() with the right parameters for ZSTD_decompressStream()
26061  * and updates the stage and the output buffer state. This call is extracted so it can be
26062  * used both when reading directly from the ZSTD_inBuffer, and in buffered input mode.
26063  * NOTE: You must break after calling this function since the streamStage is modified.
26064  */
26065 static size_t ZSTD_decompressContinueStream(
26066             ZSTD_DStream* zds, char** op, char* oend,
26067             void const* src, size_t srcSize) {
26068     int const isSkipFrame = ZSTD_isSkipFrame(zds);
26069     if (zds->outBufferMode == ZSTD_obm_buffered) {
26070         size_t const dstSize = isSkipFrame ? 0 : zds->outBuffSize - zds->outStart;
26071         size_t const decodedSize = ZSTD_decompressContinue(zds,
26072                 zds->outBuff + zds->outStart, dstSize, src, srcSize);
26073         FORWARD_IF_ERROR(decodedSize, "");
26074         if (!decodedSize && !isSkipFrame) {
26075             zds->streamStage = zdss_read;
26076         } else {
26077             zds->outEnd = zds->outStart + decodedSize;
26078             zds->streamStage = zdss_flush;
26079         }
26080     } else {
26081         /* Write directly into the output buffer */
26082         size_t const dstSize = isSkipFrame ? 0 : oend - *op;
26083         size_t const decodedSize = ZSTD_decompressContinue(zds, *op, dstSize, src, srcSize);
26084         FORWARD_IF_ERROR(decodedSize, "");
26085         *op += decodedSize;
26086         /* Flushing is not needed. */
26087         zds->streamStage = zdss_read;
26088         assert(*op <= oend);
26089         assert(zds->outBufferMode == ZSTD_obm_stable);
26090     }
26091     return 0;
26092 }
26093
26094 size_t ZSTD_decompressStream(ZSTD_DStream* zds, ZSTD_outBuffer* output, ZSTD_inBuffer* input)
26095 {
26096     const char* const src = (const char*)input->src;
26097     const char* const istart = input->pos != 0 ? src + input->pos : src;
26098     const char* const iend = input->size != 0 ? src + input->size : src;
26099     const char* ip = istart;
26100     char* const dst = (char*)output->dst;
26101     char* const ostart = output->pos != 0 ? dst + output->pos : dst;
26102     char* const oend = output->size != 0 ? dst + output->size : dst;
26103     char* op = ostart;
26104     U32 someMoreWork = 1;
26105
26106     DEBUGLOG(5, "ZSTD_decompressStream");
26107     RETURN_ERROR_IF(
26108         input->pos > input->size,
26109         srcSize_wrong,
26110         "forbidden. in: pos: %u   vs size: %u",
26111         (U32)input->pos, (U32)input->size);
26112     RETURN_ERROR_IF(
26113         output->pos > output->size,
26114         dstSize_tooSmall,
26115         "forbidden. out: pos: %u   vs size: %u",
26116         (U32)output->pos, (U32)output->size);
26117     DEBUGLOG(5, "input size : %u", (U32)(input->size - input->pos));
26118     FORWARD_IF_ERROR(ZSTD_checkOutBuffer(zds, output), "");
26119
26120     while (someMoreWork) {
26121         switch(zds->streamStage)
26122         {
26123         case zdss_init :
26124             DEBUGLOG(5, "stage zdss_init => transparent reset ");
26125             zds->streamStage = zdss_loadHeader;
26126             zds->lhSize = zds->inPos = zds->outStart = zds->outEnd = 0;
26127             zds->legacyVersion = 0;
26128             zds->hostageByte = 0;
26129             zds->expectedOutBuffer = *output;
26130             /* fall-through */
26131
26132         case zdss_loadHeader :
26133             DEBUGLOG(5, "stage zdss_loadHeader (srcSize : %u)", (U32)(iend - ip));
26134 #if defined(ZSTD_LEGACY_SUPPORT) && (ZSTD_LEGACY_SUPPORT>=1)
26135             if (zds->legacyVersion) {
26136                 RETURN_ERROR_IF(zds->staticSize, memory_allocation,
26137                     "legacy support is incompatible with static dctx");
26138                 {   size_t const hint = ZSTD_decompressLegacyStream(zds->legacyContext, zds->legacyVersion, output, input);
26139                     if (hint==0) zds->streamStage = zdss_init;
26140                     return hint;
26141             }   }
26142 #endif
26143             {   size_t const hSize = ZSTD_getFrameHeader_advanced(&zds->fParams, zds->headerBuffer, zds->lhSize, zds->format);
26144                 DEBUGLOG(5, "header size : %u", (U32)hSize);
26145                 if (ZSTD_isError(hSize)) {
26146 #if defined(ZSTD_LEGACY_SUPPORT) && (ZSTD_LEGACY_SUPPORT>=1)
26147                     U32 const legacyVersion = ZSTD_isLegacy(istart, iend-istart);
26148                     if (legacyVersion) {
26149                         ZSTD_DDict const* const ddict = ZSTD_getDDict(zds);
26150                         const void* const dict = ddict ? ZSTD_DDict_dictContent(ddict) : NULL;
26151                         size_t const dictSize = ddict ? ZSTD_DDict_dictSize(ddict) : 0;
26152                         DEBUGLOG(5, "ZSTD_decompressStream: detected legacy version v0.%u", legacyVersion);
26153                         RETURN_ERROR_IF(zds->staticSize, memory_allocation,
26154                             "legacy support is incompatible with static dctx");
26155                         FORWARD_IF_ERROR(ZSTD_initLegacyStream(&zds->legacyContext,
26156                                     zds->previousLegacyVersion, legacyVersion,
26157                                     dict, dictSize), "");
26158                         zds->legacyVersion = zds->previousLegacyVersion = legacyVersion;
26159                         {   size_t const hint = ZSTD_decompressLegacyStream(zds->legacyContext, legacyVersion, output, input);
26160                             if (hint==0) zds->streamStage = zdss_init;   /* or stay in stage zdss_loadHeader */
26161                             return hint;
26162                     }   }
26163 #endif
26164                     return hSize;   /* error */
26165                 }
26166                 if (hSize != 0) {   /* need more input */
26167                     size_t const toLoad = hSize - zds->lhSize;   /* if hSize!=0, hSize > zds->lhSize */
26168                     size_t const remainingInput = (size_t)(iend-ip);
26169                     assert(iend >= ip);
26170                     if (toLoad > remainingInput) {   /* not enough input to load full header */
26171                         if (remainingInput > 0) {
26172                             memcpy(zds->headerBuffer + zds->lhSize, ip, remainingInput);
26173                             zds->lhSize += remainingInput;
26174                         }
26175                         input->pos = input->size;
26176                         return (MAX((size_t)ZSTD_FRAMEHEADERSIZE_MIN(zds->format), hSize) - zds->lhSize) + ZSTD_blockHeaderSize;   /* remaining header bytes + next block header */
26177                     }
26178                     assert(ip != NULL);
26179                     memcpy(zds->headerBuffer + zds->lhSize, ip, toLoad); zds->lhSize = hSize; ip += toLoad;
26180                     break;
26181             }   }
26182
26183             /* check for single-pass mode opportunity */
26184             if (zds->fParams.frameContentSize != ZSTD_CONTENTSIZE_UNKNOWN
26185                 && zds->fParams.frameType != ZSTD_skippableFrame
26186                 && (U64)(size_t)(oend-op) >= zds->fParams.frameContentSize) {
26187                 size_t const cSize = ZSTD_findFrameCompressedSize(istart, iend-istart);
26188                 if (cSize <= (size_t)(iend-istart)) {
26189                     /* shortcut : using single-pass mode */
26190                     size_t const decompressedSize = ZSTD_decompress_usingDDict(zds, op, oend-op, istart, cSize, ZSTD_getDDict(zds));
26191                     if (ZSTD_isError(decompressedSize)) return decompressedSize;
26192                     DEBUGLOG(4, "shortcut to single-pass ZSTD_decompress_usingDDict()")
26193                     ip = istart + cSize;
26194                     op += decompressedSize;
26195                     zds->expected = 0;
26196                     zds->streamStage = zdss_init;
26197                     someMoreWork = 0;
26198                     break;
26199             }   }
26200
26201             /* Check output buffer is large enough for ZSTD_odm_stable. */
26202             if (zds->outBufferMode == ZSTD_obm_stable
26203                 && zds->fParams.frameType != ZSTD_skippableFrame
26204                 && zds->fParams.frameContentSize != ZSTD_CONTENTSIZE_UNKNOWN
26205                 && (U64)(size_t)(oend-op) < zds->fParams.frameContentSize) {
26206                 RETURN_ERROR(dstSize_tooSmall, "ZSTD_obm_stable passed but ZSTD_outBuffer is too small");
26207             }
26208
26209             /* Consume header (see ZSTDds_decodeFrameHeader) */
26210             DEBUGLOG(4, "Consume header");
26211             FORWARD_IF_ERROR(ZSTD_decompressBegin_usingDDict(zds, ZSTD_getDDict(zds)), "");
26212
26213             if ((MEM_readLE32(zds->headerBuffer) & ZSTD_MAGIC_SKIPPABLE_MASK) == ZSTD_MAGIC_SKIPPABLE_START) {  /* skippable frame */
26214                 zds->expected = MEM_readLE32(zds->headerBuffer + ZSTD_FRAMEIDSIZE);
26215                 zds->stage = ZSTDds_skipFrame;
26216             } else {
26217                 FORWARD_IF_ERROR(ZSTD_decodeFrameHeader(zds, zds->headerBuffer, zds->lhSize), "");
26218                 zds->expected = ZSTD_blockHeaderSize;
26219                 zds->stage = ZSTDds_decodeBlockHeader;
26220             }
26221
26222             /* control buffer memory usage */
26223             DEBUGLOG(4, "Control max memory usage (%u KB <= max %u KB)",
26224                         (U32)(zds->fParams.windowSize >>10),
26225                         (U32)(zds->maxWindowSize >> 10) );
26226             zds->fParams.windowSize = MAX(zds->fParams.windowSize, 1U << ZSTD_WINDOWLOG_ABSOLUTEMIN);
26227             RETURN_ERROR_IF(zds->fParams.windowSize > zds->maxWindowSize,
26228                             frameParameter_windowTooLarge, "");
26229
26230             /* Adapt buffer sizes to frame header instructions */
26231             {   size_t const neededInBuffSize = MAX(zds->fParams.blockSizeMax, 4 /* frame checksum */);
26232                 size_t const neededOutBuffSize = zds->outBufferMode == ZSTD_obm_buffered
26233                         ? ZSTD_decodingBufferSize_min(zds->fParams.windowSize, zds->fParams.frameContentSize)
26234                         : 0;
26235
26236                 ZSTD_DCtx_updateOversizedDuration(zds, neededInBuffSize, neededOutBuffSize);
26237
26238                 {   int const tooSmall = (zds->inBuffSize < neededInBuffSize) || (zds->outBuffSize < neededOutBuffSize);
26239                     int const tooLarge = ZSTD_DCtx_isOversizedTooLong(zds);
26240
26241                     if (tooSmall || tooLarge) {
26242                         size_t const bufferSize = neededInBuffSize + neededOutBuffSize;
26243                         DEBUGLOG(4, "inBuff  : from %u to %u",
26244                                     (U32)zds->inBuffSize, (U32)neededInBuffSize);
26245                         DEBUGLOG(4, "outBuff : from %u to %u",
26246                                     (U32)zds->outBuffSize, (U32)neededOutBuffSize);
26247                         if (zds->staticSize) {  /* static DCtx */
26248                             DEBUGLOG(4, "staticSize : %u", (U32)zds->staticSize);
26249                             assert(zds->staticSize >= sizeof(ZSTD_DCtx));  /* controlled at init */
26250                             RETURN_ERROR_IF(
26251                                 bufferSize > zds->staticSize - sizeof(ZSTD_DCtx),
26252                                 memory_allocation, "");
26253                         } else {
26254                             ZSTD_free(zds->inBuff, zds->customMem);
26255                             zds->inBuffSize = 0;
26256                             zds->outBuffSize = 0;
26257                             zds->inBuff = (char*)ZSTD_malloc(bufferSize, zds->customMem);
26258                             RETURN_ERROR_IF(zds->inBuff == NULL, memory_allocation, "");
26259                         }
26260                         zds->inBuffSize = neededInBuffSize;
26261                         zds->outBuff = zds->inBuff + zds->inBuffSize;
26262                         zds->outBuffSize = neededOutBuffSize;
26263             }   }   }
26264             zds->streamStage = zdss_read;
26265             /* fall-through */
26266
26267         case zdss_read:
26268             DEBUGLOG(5, "stage zdss_read");
26269             {   size_t const neededInSize = ZSTD_nextSrcSizeToDecompressWithInputSize(zds, iend - ip);
26270                 DEBUGLOG(5, "neededInSize = %u", (U32)neededInSize);
26271                 if (neededInSize==0) {  /* end of frame */
26272                     zds->streamStage = zdss_init;
26273                     someMoreWork = 0;
26274                     break;
26275                 }
26276                 if ((size_t)(iend-ip) >= neededInSize) {  /* decode directly from src */
26277                     FORWARD_IF_ERROR(ZSTD_decompressContinueStream(zds, &op, oend, ip, neededInSize), "");
26278                     ip += neededInSize;
26279                     /* Function modifies the stage so we must break */
26280                     break;
26281             }   }
26282             if (ip==iend) { someMoreWork = 0; break; }   /* no more input */
26283             zds->streamStage = zdss_load;
26284             /* fall-through */
26285
26286         case zdss_load:
26287             {   size_t const neededInSize = ZSTD_nextSrcSizeToDecompress(zds);
26288                 size_t const toLoad = neededInSize - zds->inPos;
26289                 int const isSkipFrame = ZSTD_isSkipFrame(zds);
26290                 size_t loadedSize;
26291                 /* At this point we shouldn't be decompressing a block that we can stream. */
26292                 assert(neededInSize == ZSTD_nextSrcSizeToDecompressWithInputSize(zds, iend - ip));
26293                 if (isSkipFrame) {
26294                     loadedSize = MIN(toLoad, (size_t)(iend-ip));
26295                 } else {
26296                     RETURN_ERROR_IF(toLoad > zds->inBuffSize - zds->inPos,
26297                                     corruption_detected,
26298                                     "should never happen");
26299                     loadedSize = ZSTD_limitCopy(zds->inBuff + zds->inPos, toLoad, ip, iend-ip);
26300                 }
26301                 ip += loadedSize;
26302                 zds->inPos += loadedSize;
26303                 if (loadedSize < toLoad) { someMoreWork = 0; break; }   /* not enough input, wait for more */
26304
26305                 /* decode loaded input */
26306                 zds->inPos = 0;   /* input is consumed */
26307                 FORWARD_IF_ERROR(ZSTD_decompressContinueStream(zds, &op, oend, zds->inBuff, neededInSize), "");
26308                 /* Function modifies the stage so we must break */
26309                 break;
26310             }
26311         case zdss_flush:
26312             {   size_t const toFlushSize = zds->outEnd - zds->outStart;
26313                 size_t const flushedSize = ZSTD_limitCopy(op, oend-op, zds->outBuff + zds->outStart, toFlushSize);
26314                 op += flushedSize;
26315                 zds->outStart += flushedSize;
26316                 if (flushedSize == toFlushSize) {  /* flush completed */
26317                     zds->streamStage = zdss_read;
26318                     if ( (zds->outBuffSize < zds->fParams.frameContentSize)
26319                       && (zds->outStart + zds->fParams.blockSizeMax > zds->outBuffSize) ) {
26320                         DEBUGLOG(5, "restart filling outBuff from beginning (left:%i, needed:%u)",
26321                                 (int)(zds->outBuffSize - zds->outStart),
26322                                 (U32)zds->fParams.blockSizeMax);
26323                         zds->outStart = zds->outEnd = 0;
26324                     }
26325                     break;
26326             }   }
26327             /* cannot complete flush */
26328             someMoreWork = 0;
26329             break;
26330
26331         default:
26332             assert(0);    /* impossible */
26333             RETURN_ERROR(GENERIC, "impossible to reach");   /* some compiler require default to do something */
26334     }   }
26335
26336     /* result */
26337     input->pos = (size_t)(ip - (const char*)(input->src));
26338     output->pos = (size_t)(op - (char*)(output->dst));
26339
26340     /* Update the expected output buffer for ZSTD_obm_stable. */
26341     zds->expectedOutBuffer = *output;
26342
26343     if ((ip==istart) && (op==ostart)) {  /* no forward progress */
26344         zds->noForwardProgress ++;
26345         if (zds->noForwardProgress >= ZSTD_NO_FORWARD_PROGRESS_MAX) {
26346             RETURN_ERROR_IF(op==oend, dstSize_tooSmall, "");
26347             RETURN_ERROR_IF(ip==iend, srcSize_wrong, "");
26348             assert(0);
26349         }
26350     } else {
26351         zds->noForwardProgress = 0;
26352     }
26353     {   size_t nextSrcSizeHint = ZSTD_nextSrcSizeToDecompress(zds);
26354         if (!nextSrcSizeHint) {   /* frame fully decoded */
26355             if (zds->outEnd == zds->outStart) {  /* output fully flushed */
26356                 if (zds->hostageByte) {
26357                     if (input->pos >= input->size) {
26358                         /* can't release hostage (not present) */
26359                         zds->streamStage = zdss_read;
26360                         return 1;
26361                     }
26362                     input->pos++;  /* release hostage */
26363                 }   /* zds->hostageByte */
26364                 return 0;
26365             }  /* zds->outEnd == zds->outStart */
26366             if (!zds->hostageByte) { /* output not fully flushed; keep last byte as hostage; will be released when all output is flushed */
26367                 input->pos--;   /* note : pos > 0, otherwise, impossible to finish reading last block */
26368                 zds->hostageByte=1;
26369             }
26370             return 1;
26371         }  /* nextSrcSizeHint==0 */
26372         nextSrcSizeHint += ZSTD_blockHeaderSize * (ZSTD_nextInputType(zds) == ZSTDnit_block);   /* preload header of next block */
26373         assert(zds->inPos <= nextSrcSizeHint);
26374         nextSrcSizeHint -= zds->inPos;   /* part already loaded*/
26375         return nextSrcSizeHint;
26376     }
26377 }
26378
26379 size_t ZSTD_decompressStream_simpleArgs (
26380                             ZSTD_DCtx* dctx,
26381                             void* dst, size_t dstCapacity, size_t* dstPos,
26382                       const void* src, size_t srcSize, size_t* srcPos)
26383 {
26384     ZSTD_outBuffer output = { dst, dstCapacity, *dstPos };
26385     ZSTD_inBuffer  input  = { src, srcSize, *srcPos };
26386     /* ZSTD_compress_generic() will check validity of dstPos and srcPos */
26387     size_t const cErr = ZSTD_decompressStream(dctx, &output, &input);
26388     *dstPos = output.pos;
26389     *srcPos = input.pos;
26390     return cErr;
26391 }
26392 /**** ended inlining decompress/zstd_decompress.c ****/
26393 /**** start inlining decompress/zstd_decompress_block.c ****/
26394 /*
26395  * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
26396  * All rights reserved.
26397  *
26398  * This source code is licensed under both the BSD-style license (found in the
26399  * LICENSE file in the root directory of this source tree) and the GPLv2 (found
26400  * in the COPYING file in the root directory of this source tree).
26401  * You may select, at your option, one of the above-listed licenses.
26402  */
26403
26404 /* zstd_decompress_block :
26405  * this module takes care of decompressing _compressed_ block */
26406
26407 /*-*******************************************************
26408 *  Dependencies
26409 *********************************************************/
26410 #include <string.h>      /* memcpy, memmove, memset */
26411 /**** skipping file: ../common/compiler.h ****/
26412 /**** skipping file: ../common/cpu.h ****/
26413 /**** skipping file: ../common/mem.h ****/
26414 #define FSE_STATIC_LINKING_ONLY
26415 /**** skipping file: ../common/fse.h ****/
26416 #define HUF_STATIC_LINKING_ONLY
26417 /**** skipping file: ../common/huf.h ****/
26418 /**** skipping file: ../common/zstd_internal.h ****/
26419 /**** skipping file: zstd_decompress_internal.h ****/
26420 /**** skipping file: zstd_ddict.h ****/
26421 /**** skipping file: zstd_decompress_block.h ****/
26422
26423 /*_*******************************************************
26424 *  Macros
26425 **********************************************************/
26426
26427 /* These two optional macros force the use one way or another of the two
26428  * ZSTD_decompressSequences implementations. You can't force in both directions
26429  * at the same time.
26430  */
26431 #if defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT) && \
26432     defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG)
26433 #error "Cannot force the use of the short and the long ZSTD_decompressSequences variants!"
26434 #endif
26435
26436
26437 /*_*******************************************************
26438 *  Memory operations
26439 **********************************************************/
26440 static void ZSTD_copy4(void* dst, const void* src) { memcpy(dst, src, 4); }
26441
26442
26443 /*-*************************************************************
26444  *   Block decoding
26445  ***************************************************************/
26446
26447 /*! ZSTD_getcBlockSize() :
26448  *  Provides the size of compressed block from block header `src` */
26449 size_t ZSTD_getcBlockSize(const void* src, size_t srcSize,
26450                           blockProperties_t* bpPtr)
26451 {
26452     RETURN_ERROR_IF(srcSize < ZSTD_blockHeaderSize, srcSize_wrong, "");
26453
26454     {   U32 const cBlockHeader = MEM_readLE24(src);
26455         U32 const cSize = cBlockHeader >> 3;
26456         bpPtr->lastBlock = cBlockHeader & 1;
26457         bpPtr->blockType = (blockType_e)((cBlockHeader >> 1) & 3);
26458         bpPtr->origSize = cSize;   /* only useful for RLE */
26459         if (bpPtr->blockType == bt_rle) return 1;
26460         RETURN_ERROR_IF(bpPtr->blockType == bt_reserved, corruption_detected, "");
26461         return cSize;
26462     }
26463 }
26464
26465
26466 /* Hidden declaration for fullbench */
26467 size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx,
26468                           const void* src, size_t srcSize);
26469 /*! ZSTD_decodeLiteralsBlock() :
26470  * @return : nb of bytes read from src (< srcSize )
26471  *  note : symbol not declared but exposed for fullbench */
26472 size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx,
26473                           const void* src, size_t srcSize)   /* note : srcSize < BLOCKSIZE */
26474 {
26475     DEBUGLOG(5, "ZSTD_decodeLiteralsBlock");
26476     RETURN_ERROR_IF(srcSize < MIN_CBLOCK_SIZE, corruption_detected, "");
26477
26478     {   const BYTE* const istart = (const BYTE*) src;
26479         symbolEncodingType_e const litEncType = (symbolEncodingType_e)(istart[0] & 3);
26480
26481         switch(litEncType)
26482         {
26483         case set_repeat:
26484             DEBUGLOG(5, "set_repeat flag : re-using stats from previous compressed literals block");
26485             RETURN_ERROR_IF(dctx->litEntropy==0, dictionary_corrupted, "");
26486             /* fall-through */
26487
26488         case set_compressed:
26489             RETURN_ERROR_IF(srcSize < 5, corruption_detected, "srcSize >= MIN_CBLOCK_SIZE == 3; here we need up to 5 for case 3");
26490             {   size_t lhSize, litSize, litCSize;
26491                 U32 singleStream=0;
26492                 U32 const lhlCode = (istart[0] >> 2) & 3;
26493                 U32 const lhc = MEM_readLE32(istart);
26494                 size_t hufSuccess;
26495                 switch(lhlCode)
26496                 {
26497                 case 0: case 1: default:   /* note : default is impossible, since lhlCode into [0..3] */
26498                     /* 2 - 2 - 10 - 10 */
26499                     singleStream = !lhlCode;
26500                     lhSize = 3;
26501                     litSize  = (lhc >> 4) & 0x3FF;
26502                     litCSize = (lhc >> 14) & 0x3FF;
26503                     break;
26504                 case 2:
26505                     /* 2 - 2 - 14 - 14 */
26506                     lhSize = 4;
26507                     litSize  = (lhc >> 4) & 0x3FFF;
26508                     litCSize = lhc >> 18;
26509                     break;
26510                 case 3:
26511                     /* 2 - 2 - 18 - 18 */
26512                     lhSize = 5;
26513                     litSize  = (lhc >> 4) & 0x3FFFF;
26514                     litCSize = (lhc >> 22) + ((size_t)istart[4] << 10);
26515                     break;
26516                 }
26517                 RETURN_ERROR_IF(litSize > ZSTD_BLOCKSIZE_MAX, corruption_detected, "");
26518                 RETURN_ERROR_IF(litCSize + lhSize > srcSize, corruption_detected, "");
26519
26520                 /* prefetch huffman table if cold */
26521                 if (dctx->ddictIsCold && (litSize > 768 /* heuristic */)) {
26522                     PREFETCH_AREA(dctx->HUFptr, sizeof(dctx->entropy.hufTable));
26523                 }
26524
26525                 if (litEncType==set_repeat) {
26526                     if (singleStream) {
26527                         hufSuccess = HUF_decompress1X_usingDTable_bmi2(
26528                             dctx->litBuffer, litSize, istart+lhSize, litCSize,
26529                             dctx->HUFptr, dctx->bmi2);
26530                     } else {
26531                         hufSuccess = HUF_decompress4X_usingDTable_bmi2(
26532                             dctx->litBuffer, litSize, istart+lhSize, litCSize,
26533                             dctx->HUFptr, dctx->bmi2);
26534                     }
26535                 } else {
26536                     if (singleStream) {
26537 #if defined(HUF_FORCE_DECOMPRESS_X2)
26538                         hufSuccess = HUF_decompress1X_DCtx_wksp(
26539                             dctx->entropy.hufTable, dctx->litBuffer, litSize,
26540                             istart+lhSize, litCSize, dctx->workspace,
26541                             sizeof(dctx->workspace));
26542 #else
26543                         hufSuccess = HUF_decompress1X1_DCtx_wksp_bmi2(
26544                             dctx->entropy.hufTable, dctx->litBuffer, litSize,
26545                             istart+lhSize, litCSize, dctx->workspace,
26546                             sizeof(dctx->workspace), dctx->bmi2);
26547 #endif
26548                     } else {
26549                         hufSuccess = HUF_decompress4X_hufOnly_wksp_bmi2(
26550                             dctx->entropy.hufTable, dctx->litBuffer, litSize,
26551                             istart+lhSize, litCSize, dctx->workspace,
26552                             sizeof(dctx->workspace), dctx->bmi2);
26553                     }
26554                 }
26555
26556                 RETURN_ERROR_IF(HUF_isError(hufSuccess), corruption_detected, "");
26557
26558                 dctx->litPtr = dctx->litBuffer;
26559                 dctx->litSize = litSize;
26560                 dctx->litEntropy = 1;
26561                 if (litEncType==set_compressed) dctx->HUFptr = dctx->entropy.hufTable;
26562                 memset(dctx->litBuffer + dctx->litSize, 0, WILDCOPY_OVERLENGTH);
26563                 return litCSize + lhSize;
26564             }
26565
26566         case set_basic:
26567             {   size_t litSize, lhSize;
26568                 U32 const lhlCode = ((istart[0]) >> 2) & 3;
26569                 switch(lhlCode)
26570                 {
26571                 case 0: case 2: default:   /* note : default is impossible, since lhlCode into [0..3] */
26572                     lhSize = 1;
26573                     litSize = istart[0] >> 3;
26574                     break;
26575                 case 1:
26576                     lhSize = 2;
26577                     litSize = MEM_readLE16(istart) >> 4;
26578                     break;
26579                 case 3:
26580                     lhSize = 3;
26581                     litSize = MEM_readLE24(istart) >> 4;
26582                     break;
26583                 }
26584
26585                 if (lhSize+litSize+WILDCOPY_OVERLENGTH > srcSize) {  /* risk reading beyond src buffer with wildcopy */
26586                     RETURN_ERROR_IF(litSize+lhSize > srcSize, corruption_detected, "");
26587                     memcpy(dctx->litBuffer, istart+lhSize, litSize);
26588                     dctx->litPtr = dctx->litBuffer;
26589                     dctx->litSize = litSize;
26590                     memset(dctx->litBuffer + dctx->litSize, 0, WILDCOPY_OVERLENGTH);
26591                     return lhSize+litSize;
26592                 }
26593                 /* direct reference into compressed stream */
26594                 dctx->litPtr = istart+lhSize;
26595                 dctx->litSize = litSize;
26596                 return lhSize+litSize;
26597             }
26598
26599         case set_rle:
26600             {   U32 const lhlCode = ((istart[0]) >> 2) & 3;
26601                 size_t litSize, lhSize;
26602                 switch(lhlCode)
26603                 {
26604                 case 0: case 2: default:   /* note : default is impossible, since lhlCode into [0..3] */
26605                     lhSize = 1;
26606                     litSize = istart[0] >> 3;
26607                     break;
26608                 case 1:
26609                     lhSize = 2;
26610                     litSize = MEM_readLE16(istart) >> 4;
26611                     break;
26612                 case 3:
26613                     lhSize = 3;
26614                     litSize = MEM_readLE24(istart) >> 4;
26615                     RETURN_ERROR_IF(srcSize<4, corruption_detected, "srcSize >= MIN_CBLOCK_SIZE == 3; here we need lhSize+1 = 4");
26616                     break;
26617                 }
26618                 RETURN_ERROR_IF(litSize > ZSTD_BLOCKSIZE_MAX, corruption_detected, "");
26619                 memset(dctx->litBuffer, istart[lhSize], litSize + WILDCOPY_OVERLENGTH);
26620                 dctx->litPtr = dctx->litBuffer;
26621                 dctx->litSize = litSize;
26622                 return lhSize+1;
26623             }
26624         default:
26625             RETURN_ERROR(corruption_detected, "impossible");
26626         }
26627     }
26628 }
26629
26630 /* Default FSE distribution tables.
26631  * These are pre-calculated FSE decoding tables using default distributions as defined in specification :
26632  * https://github.com/facebook/zstd/blob/master/doc/zstd_compression_format.md#default-distributions
26633  * They were generated programmatically with following method :
26634  * - start from default distributions, present in /lib/common/zstd_internal.h
26635  * - generate tables normally, using ZSTD_buildFSETable()
26636  * - printout the content of tables
26637  * - pretify output, report below, test with fuzzer to ensure it's correct */
26638
26639 /* Default FSE distribution table for Literal Lengths */
26640 static const ZSTD_seqSymbol LL_defaultDTable[(1<<LL_DEFAULTNORMLOG)+1] = {
26641      {  1,  1,  1, LL_DEFAULTNORMLOG},  /* header : fastMode, tableLog */
26642      /* nextState, nbAddBits, nbBits, baseVal */
26643      {  0,  0,  4,    0},  { 16,  0,  4,    0},
26644      { 32,  0,  5,    1},  {  0,  0,  5,    3},
26645      {  0,  0,  5,    4},  {  0,  0,  5,    6},
26646      {  0,  0,  5,    7},  {  0,  0,  5,    9},
26647      {  0,  0,  5,   10},  {  0,  0,  5,   12},
26648      {  0,  0,  6,   14},  {  0,  1,  5,   16},
26649      {  0,  1,  5,   20},  {  0,  1,  5,   22},
26650      {  0,  2,  5,   28},  {  0,  3,  5,   32},
26651      {  0,  4,  5,   48},  { 32,  6,  5,   64},
26652      {  0,  7,  5,  128},  {  0,  8,  6,  256},
26653      {  0, 10,  6, 1024},  {  0, 12,  6, 4096},
26654      { 32,  0,  4,    0},  {  0,  0,  4,    1},
26655      {  0,  0,  5,    2},  { 32,  0,  5,    4},
26656      {  0,  0,  5,    5},  { 32,  0,  5,    7},
26657      {  0,  0,  5,    8},  { 32,  0,  5,   10},
26658      {  0,  0,  5,   11},  {  0,  0,  6,   13},
26659      { 32,  1,  5,   16},  {  0,  1,  5,   18},
26660      { 32,  1,  5,   22},  {  0,  2,  5,   24},
26661      { 32,  3,  5,   32},  {  0,  3,  5,   40},
26662      {  0,  6,  4,   64},  { 16,  6,  4,   64},
26663      { 32,  7,  5,  128},  {  0,  9,  6,  512},
26664      {  0, 11,  6, 2048},  { 48,  0,  4,    0},
26665      { 16,  0,  4,    1},  { 32,  0,  5,    2},
26666      { 32,  0,  5,    3},  { 32,  0,  5,    5},
26667      { 32,  0,  5,    6},  { 32,  0,  5,    8},
26668      { 32,  0,  5,    9},  { 32,  0,  5,   11},
26669      { 32,  0,  5,   12},  {  0,  0,  6,   15},
26670      { 32,  1,  5,   18},  { 32,  1,  5,   20},
26671      { 32,  2,  5,   24},  { 32,  2,  5,   28},
26672      { 32,  3,  5,   40},  { 32,  4,  5,   48},
26673      {  0, 16,  6,65536},  {  0, 15,  6,32768},
26674      {  0, 14,  6,16384},  {  0, 13,  6, 8192},
26675 };   /* LL_defaultDTable */
26676
26677 /* Default FSE distribution table for Offset Codes */
26678 static const ZSTD_seqSymbol OF_defaultDTable[(1<<OF_DEFAULTNORMLOG)+1] = {
26679     {  1,  1,  1, OF_DEFAULTNORMLOG},  /* header : fastMode, tableLog */
26680     /* nextState, nbAddBits, nbBits, baseVal */
26681     {  0,  0,  5,    0},     {  0,  6,  4,   61},
26682     {  0,  9,  5,  509},     {  0, 15,  5,32765},
26683     {  0, 21,  5,2097149},   {  0,  3,  5,    5},
26684     {  0,  7,  4,  125},     {  0, 12,  5, 4093},
26685     {  0, 18,  5,262141},    {  0, 23,  5,8388605},
26686     {  0,  5,  5,   29},     {  0,  8,  4,  253},
26687     {  0, 14,  5,16381},     {  0, 20,  5,1048573},
26688     {  0,  2,  5,    1},     { 16,  7,  4,  125},
26689     {  0, 11,  5, 2045},     {  0, 17,  5,131069},
26690     {  0, 22,  5,4194301},   {  0,  4,  5,   13},
26691     { 16,  8,  4,  253},     {  0, 13,  5, 8189},
26692     {  0, 19,  5,524285},    {  0,  1,  5,    1},
26693     { 16,  6,  4,   61},     {  0, 10,  5, 1021},
26694     {  0, 16,  5,65533},     {  0, 28,  5,268435453},
26695     {  0, 27,  5,134217725}, {  0, 26,  5,67108861},
26696     {  0, 25,  5,33554429},  {  0, 24,  5,16777213},
26697 };   /* OF_defaultDTable */
26698
26699
26700 /* Default FSE distribution table for Match Lengths */
26701 static const ZSTD_seqSymbol ML_defaultDTable[(1<<ML_DEFAULTNORMLOG)+1] = {
26702     {  1,  1,  1, ML_DEFAULTNORMLOG},  /* header : fastMode, tableLog */
26703     /* nextState, nbAddBits, nbBits, baseVal */
26704     {  0,  0,  6,    3},  {  0,  0,  4,    4},
26705     { 32,  0,  5,    5},  {  0,  0,  5,    6},
26706     {  0,  0,  5,    8},  {  0,  0,  5,    9},
26707     {  0,  0,  5,   11},  {  0,  0,  6,   13},
26708     {  0,  0,  6,   16},  {  0,  0,  6,   19},
26709     {  0,  0,  6,   22},  {  0,  0,  6,   25},
26710     {  0,  0,  6,   28},  {  0,  0,  6,   31},
26711     {  0,  0,  6,   34},  {  0,  1,  6,   37},
26712     {  0,  1,  6,   41},  {  0,  2,  6,   47},
26713     {  0,  3,  6,   59},  {  0,  4,  6,   83},
26714     {  0,  7,  6,  131},  {  0,  9,  6,  515},
26715     { 16,  0,  4,    4},  {  0,  0,  4,    5},
26716     { 32,  0,  5,    6},  {  0,  0,  5,    7},
26717     { 32,  0,  5,    9},  {  0,  0,  5,   10},
26718     {  0,  0,  6,   12},  {  0,  0,  6,   15},
26719     {  0,  0,  6,   18},  {  0,  0,  6,   21},
26720     {  0,  0,  6,   24},  {  0,  0,  6,   27},
26721     {  0,  0,  6,   30},  {  0,  0,  6,   33},
26722     {  0,  1,  6,   35},  {  0,  1,  6,   39},
26723     {  0,  2,  6,   43},  {  0,  3,  6,   51},
26724     {  0,  4,  6,   67},  {  0,  5,  6,   99},
26725     {  0,  8,  6,  259},  { 32,  0,  4,    4},
26726     { 48,  0,  4,    4},  { 16,  0,  4,    5},
26727     { 32,  0,  5,    7},  { 32,  0,  5,    8},
26728     { 32,  0,  5,   10},  { 32,  0,  5,   11},
26729     {  0,  0,  6,   14},  {  0,  0,  6,   17},
26730     {  0,  0,  6,   20},  {  0,  0,  6,   23},
26731     {  0,  0,  6,   26},  {  0,  0,  6,   29},
26732     {  0,  0,  6,   32},  {  0, 16,  6,65539},
26733     {  0, 15,  6,32771},  {  0, 14,  6,16387},
26734     {  0, 13,  6, 8195},  {  0, 12,  6, 4099},
26735     {  0, 11,  6, 2051},  {  0, 10,  6, 1027},
26736 };   /* ML_defaultDTable */
26737
26738
26739 static void ZSTD_buildSeqTable_rle(ZSTD_seqSymbol* dt, U32 baseValue, U32 nbAddBits)
26740 {
26741     void* ptr = dt;
26742     ZSTD_seqSymbol_header* const DTableH = (ZSTD_seqSymbol_header*)ptr;
26743     ZSTD_seqSymbol* const cell = dt + 1;
26744
26745     DTableH->tableLog = 0;
26746     DTableH->fastMode = 0;
26747
26748     cell->nbBits = 0;
26749     cell->nextState = 0;
26750     assert(nbAddBits < 255);
26751     cell->nbAdditionalBits = (BYTE)nbAddBits;
26752     cell->baseValue = baseValue;
26753 }
26754
26755
26756 /* ZSTD_buildFSETable() :
26757  * generate FSE decoding table for one symbol (ll, ml or off)
26758  * cannot fail if input is valid =>
26759  * all inputs are presumed validated at this stage */
26760 void
26761 ZSTD_buildFSETable(ZSTD_seqSymbol* dt,
26762             const short* normalizedCounter, unsigned maxSymbolValue,
26763             const U32* baseValue, const U32* nbAdditionalBits,
26764             unsigned tableLog)
26765 {
26766     ZSTD_seqSymbol* const tableDecode = dt+1;
26767     U16 symbolNext[MaxSeq+1];
26768
26769     U32 const maxSV1 = maxSymbolValue + 1;
26770     U32 const tableSize = 1 << tableLog;
26771     U32 highThreshold = tableSize-1;
26772
26773     /* Sanity Checks */
26774     assert(maxSymbolValue <= MaxSeq);
26775     assert(tableLog <= MaxFSELog);
26776
26777     /* Init, lay down lowprob symbols */
26778     {   ZSTD_seqSymbol_header DTableH;
26779         DTableH.tableLog = tableLog;
26780         DTableH.fastMode = 1;
26781         {   S16 const largeLimit= (S16)(1 << (tableLog-1));
26782             U32 s;
26783             for (s=0; s<maxSV1; s++) {
26784                 if (normalizedCounter[s]==-1) {
26785                     tableDecode[highThreshold--].baseValue = s;
26786                     symbolNext[s] = 1;
26787                 } else {
26788                     if (normalizedCounter[s] >= largeLimit) DTableH.fastMode=0;
26789                     assert(normalizedCounter[s]>=0);
26790                     symbolNext[s] = (U16)normalizedCounter[s];
26791         }   }   }
26792         memcpy(dt, &DTableH, sizeof(DTableH));
26793     }
26794
26795     /* Spread symbols */
26796     {   U32 const tableMask = tableSize-1;
26797         U32 const step = FSE_TABLESTEP(tableSize);
26798         U32 s, position = 0;
26799         for (s=0; s<maxSV1; s++) {
26800             int i;
26801             for (i=0; i<normalizedCounter[s]; i++) {
26802                 tableDecode[position].baseValue = s;
26803                 position = (position + step) & tableMask;
26804                 while (position > highThreshold) position = (position + step) & tableMask;   /* lowprob area */
26805         }   }
26806         assert(position == 0); /* position must reach all cells once, otherwise normalizedCounter is incorrect */
26807     }
26808
26809     /* Build Decoding table */
26810     {   U32 u;
26811         for (u=0; u<tableSize; u++) {
26812             U32 const symbol = tableDecode[u].baseValue;
26813             U32 const nextState = symbolNext[symbol]++;
26814             tableDecode[u].nbBits = (BYTE) (tableLog - BIT_highbit32(nextState) );
26815             tableDecode[u].nextState = (U16) ( (nextState << tableDecode[u].nbBits) - tableSize);
26816             assert(nbAdditionalBits[symbol] < 255);
26817             tableDecode[u].nbAdditionalBits = (BYTE)nbAdditionalBits[symbol];
26818             tableDecode[u].baseValue = baseValue[symbol];
26819     }   }
26820 }
26821
26822
26823 /*! ZSTD_buildSeqTable() :
26824  * @return : nb bytes read from src,
26825  *           or an error code if it fails */
26826 static size_t ZSTD_buildSeqTable(ZSTD_seqSymbol* DTableSpace, const ZSTD_seqSymbol** DTablePtr,
26827                                  symbolEncodingType_e type, unsigned max, U32 maxLog,
26828                                  const void* src, size_t srcSize,
26829                                  const U32* baseValue, const U32* nbAdditionalBits,
26830                                  const ZSTD_seqSymbol* defaultTable, U32 flagRepeatTable,
26831                                  int ddictIsCold, int nbSeq)
26832 {
26833     switch(type)
26834     {
26835     case set_rle :
26836         RETURN_ERROR_IF(!srcSize, srcSize_wrong, "");
26837         RETURN_ERROR_IF((*(const BYTE*)src) > max, corruption_detected, "");
26838         {   U32 const symbol = *(const BYTE*)src;
26839             U32 const baseline = baseValue[symbol];
26840             U32 const nbBits = nbAdditionalBits[symbol];
26841             ZSTD_buildSeqTable_rle(DTableSpace, baseline, nbBits);
26842         }
26843         *DTablePtr = DTableSpace;
26844         return 1;
26845     case set_basic :
26846         *DTablePtr = defaultTable;
26847         return 0;
26848     case set_repeat:
26849         RETURN_ERROR_IF(!flagRepeatTable, corruption_detected, "");
26850         /* prefetch FSE table if used */
26851         if (ddictIsCold && (nbSeq > 24 /* heuristic */)) {
26852             const void* const pStart = *DTablePtr;
26853             size_t const pSize = sizeof(ZSTD_seqSymbol) * (SEQSYMBOL_TABLE_SIZE(maxLog));
26854             PREFETCH_AREA(pStart, pSize);
26855         }
26856         return 0;
26857     case set_compressed :
26858         {   unsigned tableLog;
26859             S16 norm[MaxSeq+1];
26860             size_t const headerSize = FSE_readNCount(norm, &max, &tableLog, src, srcSize);
26861             RETURN_ERROR_IF(FSE_isError(headerSize), corruption_detected, "");
26862             RETURN_ERROR_IF(tableLog > maxLog, corruption_detected, "");
26863             ZSTD_buildFSETable(DTableSpace, norm, max, baseValue, nbAdditionalBits, tableLog);
26864             *DTablePtr = DTableSpace;
26865             return headerSize;
26866         }
26867     default :
26868         assert(0);
26869         RETURN_ERROR(GENERIC, "impossible");
26870     }
26871 }
26872
26873 size_t ZSTD_decodeSeqHeaders(ZSTD_DCtx* dctx, int* nbSeqPtr,
26874                              const void* src, size_t srcSize)
26875 {
26876     const BYTE* const istart = (const BYTE* const)src;
26877     const BYTE* const iend = istart + srcSize;
26878     const BYTE* ip = istart;
26879     int nbSeq;
26880     DEBUGLOG(5, "ZSTD_decodeSeqHeaders");
26881
26882     /* check */
26883     RETURN_ERROR_IF(srcSize < MIN_SEQUENCES_SIZE, srcSize_wrong, "");
26884
26885     /* SeqHead */
26886     nbSeq = *ip++;
26887     if (!nbSeq) {
26888         *nbSeqPtr=0;
26889         RETURN_ERROR_IF(srcSize != 1, srcSize_wrong, "");
26890         return 1;
26891     }
26892     if (nbSeq > 0x7F) {
26893         if (nbSeq == 0xFF) {
26894             RETURN_ERROR_IF(ip+2 > iend, srcSize_wrong, "");
26895             nbSeq = MEM_readLE16(ip) + LONGNBSEQ, ip+=2;
26896         } else {
26897             RETURN_ERROR_IF(ip >= iend, srcSize_wrong, "");
26898             nbSeq = ((nbSeq-0x80)<<8) + *ip++;
26899         }
26900     }
26901     *nbSeqPtr = nbSeq;
26902
26903     /* FSE table descriptors */
26904     RETURN_ERROR_IF(ip+1 > iend, srcSize_wrong, ""); /* minimum possible size: 1 byte for symbol encoding types */
26905     {   symbolEncodingType_e const LLtype = (symbolEncodingType_e)(*ip >> 6);
26906         symbolEncodingType_e const OFtype = (symbolEncodingType_e)((*ip >> 4) & 3);
26907         symbolEncodingType_e const MLtype = (symbolEncodingType_e)((*ip >> 2) & 3);
26908         ip++;
26909
26910         /* Build DTables */
26911         {   size_t const llhSize = ZSTD_buildSeqTable(dctx->entropy.LLTable, &dctx->LLTptr,
26912                                                       LLtype, MaxLL, LLFSELog,
26913                                                       ip, iend-ip,
26914                                                       LL_base, LL_bits,
26915                                                       LL_defaultDTable, dctx->fseEntropy,
26916                                                       dctx->ddictIsCold, nbSeq);
26917             RETURN_ERROR_IF(ZSTD_isError(llhSize), corruption_detected, "ZSTD_buildSeqTable failed");
26918             ip += llhSize;
26919         }
26920
26921         {   size_t const ofhSize = ZSTD_buildSeqTable(dctx->entropy.OFTable, &dctx->OFTptr,
26922                                                       OFtype, MaxOff, OffFSELog,
26923                                                       ip, iend-ip,
26924                                                       OF_base, OF_bits,
26925                                                       OF_defaultDTable, dctx->fseEntropy,
26926                                                       dctx->ddictIsCold, nbSeq);
26927             RETURN_ERROR_IF(ZSTD_isError(ofhSize), corruption_detected, "ZSTD_buildSeqTable failed");
26928             ip += ofhSize;
26929         }
26930
26931         {   size_t const mlhSize = ZSTD_buildSeqTable(dctx->entropy.MLTable, &dctx->MLTptr,
26932                                                       MLtype, MaxML, MLFSELog,
26933                                                       ip, iend-ip,
26934                                                       ML_base, ML_bits,
26935                                                       ML_defaultDTable, dctx->fseEntropy,
26936                                                       dctx->ddictIsCold, nbSeq);
26937             RETURN_ERROR_IF(ZSTD_isError(mlhSize), corruption_detected, "ZSTD_buildSeqTable failed");
26938             ip += mlhSize;
26939         }
26940     }
26941
26942     return ip-istart;
26943 }
26944
26945
26946 typedef struct {
26947     size_t litLength;
26948     size_t matchLength;
26949     size_t offset;
26950     const BYTE* match;
26951 } seq_t;
26952
26953 typedef struct {
26954     size_t state;
26955     const ZSTD_seqSymbol* table;
26956 } ZSTD_fseState;
26957
26958 typedef struct {
26959     BIT_DStream_t DStream;
26960     ZSTD_fseState stateLL;
26961     ZSTD_fseState stateOffb;
26962     ZSTD_fseState stateML;
26963     size_t prevOffset[ZSTD_REP_NUM];
26964     const BYTE* prefixStart;
26965     const BYTE* dictEnd;
26966     size_t pos;
26967 } seqState_t;
26968
26969 /*! ZSTD_overlapCopy8() :
26970  *  Copies 8 bytes from ip to op and updates op and ip where ip <= op.
26971  *  If the offset is < 8 then the offset is spread to at least 8 bytes.
26972  *
26973  *  Precondition: *ip <= *op
26974  *  Postcondition: *op - *op >= 8
26975  */
26976 HINT_INLINE void ZSTD_overlapCopy8(BYTE** op, BYTE const** ip, size_t offset) {
26977     assert(*ip <= *op);
26978     if (offset < 8) {
26979         /* close range match, overlap */
26980         static const U32 dec32table[] = { 0, 1, 2, 1, 4, 4, 4, 4 };   /* added */
26981         static const int dec64table[] = { 8, 8, 8, 7, 8, 9,10,11 };   /* subtracted */
26982         int const sub2 = dec64table[offset];
26983         (*op)[0] = (*ip)[0];
26984         (*op)[1] = (*ip)[1];
26985         (*op)[2] = (*ip)[2];
26986         (*op)[3] = (*ip)[3];
26987         *ip += dec32table[offset];
26988         ZSTD_copy4(*op+4, *ip);
26989         *ip -= sub2;
26990     } else {
26991         ZSTD_copy8(*op, *ip);
26992     }
26993     *ip += 8;
26994     *op += 8;
26995     assert(*op - *ip >= 8);
26996 }
26997
26998 /*! ZSTD_safecopy() :
26999  *  Specialized version of memcpy() that is allowed to READ up to WILDCOPY_OVERLENGTH past the input buffer
27000  *  and write up to 16 bytes past oend_w (op >= oend_w is allowed).
27001  *  This function is only called in the uncommon case where the sequence is near the end of the block. It
27002  *  should be fast for a single long sequence, but can be slow for several short sequences.
27003  *
27004  *  @param ovtype controls the overlap detection
27005  *         - ZSTD_no_overlap: The source and destination are guaranteed to be at least WILDCOPY_VECLEN bytes apart.
27006  *         - ZSTD_overlap_src_before_dst: The src and dst may overlap and may be any distance apart.
27007  *           The src buffer must be before the dst buffer.
27008  */
27009 static void ZSTD_safecopy(BYTE* op, BYTE* const oend_w, BYTE const* ip, ptrdiff_t length, ZSTD_overlap_e ovtype) {
27010     ptrdiff_t const diff = op - ip;
27011     BYTE* const oend = op + length;
27012
27013     assert((ovtype == ZSTD_no_overlap && (diff <= -8 || diff >= 8 || op >= oend_w)) ||
27014            (ovtype == ZSTD_overlap_src_before_dst && diff >= 0));
27015
27016     if (length < 8) {
27017         /* Handle short lengths. */
27018         while (op < oend) *op++ = *ip++;
27019         return;
27020     }
27021     if (ovtype == ZSTD_overlap_src_before_dst) {
27022         /* Copy 8 bytes and ensure the offset >= 8 when there can be overlap. */
27023         assert(length >= 8);
27024         ZSTD_overlapCopy8(&op, &ip, diff);
27025         assert(op - ip >= 8);
27026         assert(op <= oend);
27027     }
27028
27029     if (oend <= oend_w) {
27030         /* No risk of overwrite. */
27031         ZSTD_wildcopy(op, ip, length, ovtype);
27032         return;
27033     }
27034     if (op <= oend_w) {
27035         /* Wildcopy until we get close to the end. */
27036         assert(oend > oend_w);
27037         ZSTD_wildcopy(op, ip, oend_w - op, ovtype);
27038         ip += oend_w - op;
27039         op = oend_w;
27040     }
27041     /* Handle the leftovers. */
27042     while (op < oend) *op++ = *ip++;
27043 }
27044
27045 /* ZSTD_execSequenceEnd():
27046  * This version handles cases that are near the end of the output buffer. It requires
27047  * more careful checks to make sure there is no overflow. By separating out these hard
27048  * and unlikely cases, we can speed up the common cases.
27049  *
27050  * NOTE: This function needs to be fast for a single long sequence, but doesn't need
27051  * to be optimized for many small sequences, since those fall into ZSTD_execSequence().
27052  */
27053 FORCE_NOINLINE
27054 size_t ZSTD_execSequenceEnd(BYTE* op,
27055                             BYTE* const oend, seq_t sequence,
27056                             const BYTE** litPtr, const BYTE* const litLimit,
27057                             const BYTE* const prefixStart, const BYTE* const virtualStart, const BYTE* const dictEnd)
27058 {
27059     BYTE* const oLitEnd = op + sequence.litLength;
27060     size_t const sequenceLength = sequence.litLength + sequence.matchLength;
27061     const BYTE* const iLitEnd = *litPtr + sequence.litLength;
27062     const BYTE* match = oLitEnd - sequence.offset;
27063     BYTE* const oend_w = oend - WILDCOPY_OVERLENGTH;
27064
27065     /* bounds checks : careful of address space overflow in 32-bit mode */
27066     RETURN_ERROR_IF(sequenceLength > (size_t)(oend - op), dstSize_tooSmall, "last match must fit within dstBuffer");
27067     RETURN_ERROR_IF(sequence.litLength > (size_t)(litLimit - *litPtr), corruption_detected, "try to read beyond literal buffer");
27068     assert(op < op + sequenceLength);
27069     assert(oLitEnd < op + sequenceLength);
27070
27071     /* copy literals */
27072     ZSTD_safecopy(op, oend_w, *litPtr, sequence.litLength, ZSTD_no_overlap);
27073     op = oLitEnd;
27074     *litPtr = iLitEnd;
27075
27076     /* copy Match */
27077     if (sequence.offset > (size_t)(oLitEnd - prefixStart)) {
27078         /* offset beyond prefix */
27079         RETURN_ERROR_IF(sequence.offset > (size_t)(oLitEnd - virtualStart), corruption_detected, "");
27080         match = dictEnd - (prefixStart-match);
27081         if (match + sequence.matchLength <= dictEnd) {
27082             memmove(oLitEnd, match, sequence.matchLength);
27083             return sequenceLength;
27084         }
27085         /* span extDict & currentPrefixSegment */
27086         {   size_t const length1 = dictEnd - match;
27087             memmove(oLitEnd, match, length1);
27088             op = oLitEnd + length1;
27089             sequence.matchLength -= length1;
27090             match = prefixStart;
27091     }   }
27092     ZSTD_safecopy(op, oend_w, match, sequence.matchLength, ZSTD_overlap_src_before_dst);
27093     return sequenceLength;
27094 }
27095
27096 HINT_INLINE
27097 size_t ZSTD_execSequence(BYTE* op,
27098                          BYTE* const oend, seq_t sequence,
27099                          const BYTE** litPtr, const BYTE* const litLimit,
27100                          const BYTE* const prefixStart, const BYTE* const virtualStart, const BYTE* const dictEnd)
27101 {
27102     BYTE* const oLitEnd = op + sequence.litLength;
27103     size_t const sequenceLength = sequence.litLength + sequence.matchLength;
27104     BYTE* const oMatchEnd = op + sequenceLength;   /* risk : address space overflow (32-bits) */
27105     BYTE* const oend_w = oend - WILDCOPY_OVERLENGTH;   /* risk : address space underflow on oend=NULL */
27106     const BYTE* const iLitEnd = *litPtr + sequence.litLength;
27107     const BYTE* match = oLitEnd - sequence.offset;
27108
27109     assert(op != NULL /* Precondition */);
27110     assert(oend_w < oend /* No underflow */);
27111     /* Handle edge cases in a slow path:
27112      *   - Read beyond end of literals
27113      *   - Match end is within WILDCOPY_OVERLIMIT of oend
27114      *   - 32-bit mode and the match length overflows
27115      */
27116     if (UNLIKELY(
27117             iLitEnd > litLimit ||
27118             oMatchEnd > oend_w ||
27119             (MEM_32bits() && (size_t)(oend - op) < sequenceLength + WILDCOPY_OVERLENGTH)))
27120         return ZSTD_execSequenceEnd(op, oend, sequence, litPtr, litLimit, prefixStart, virtualStart, dictEnd);
27121
27122     /* Assumptions (everything else goes into ZSTD_execSequenceEnd()) */
27123     assert(op <= oLitEnd /* No overflow */);
27124     assert(oLitEnd < oMatchEnd /* Non-zero match & no overflow */);
27125     assert(oMatchEnd <= oend /* No underflow */);
27126     assert(iLitEnd <= litLimit /* Literal length is in bounds */);
27127     assert(oLitEnd <= oend_w /* Can wildcopy literals */);
27128     assert(oMatchEnd <= oend_w /* Can wildcopy matches */);
27129
27130     /* Copy Literals:
27131      * Split out litLength <= 16 since it is nearly always true. +1.6% on gcc-9.
27132      * We likely don't need the full 32-byte wildcopy.
27133      */
27134     assert(WILDCOPY_OVERLENGTH >= 16);
27135     ZSTD_copy16(op, (*litPtr));
27136     if (UNLIKELY(sequence.litLength > 16)) {
27137         ZSTD_wildcopy(op+16, (*litPtr)+16, sequence.litLength-16, ZSTD_no_overlap);
27138     }
27139     op = oLitEnd;
27140     *litPtr = iLitEnd;   /* update for next sequence */
27141
27142     /* Copy Match */
27143     if (sequence.offset > (size_t)(oLitEnd - prefixStart)) {
27144         /* offset beyond prefix -> go into extDict */
27145         RETURN_ERROR_IF(UNLIKELY(sequence.offset > (size_t)(oLitEnd - virtualStart)), corruption_detected, "");
27146         match = dictEnd + (match - prefixStart);
27147         if (match + sequence.matchLength <= dictEnd) {
27148             memmove(oLitEnd, match, sequence.matchLength);
27149             return sequenceLength;
27150         }
27151         /* span extDict & currentPrefixSegment */
27152         {   size_t const length1 = dictEnd - match;
27153             memmove(oLitEnd, match, length1);
27154             op = oLitEnd + length1;
27155             sequence.matchLength -= length1;
27156             match = prefixStart;
27157     }   }
27158     /* Match within prefix of 1 or more bytes */
27159     assert(op <= oMatchEnd);
27160     assert(oMatchEnd <= oend_w);
27161     assert(match >= prefixStart);
27162     assert(sequence.matchLength >= 1);
27163
27164     /* Nearly all offsets are >= WILDCOPY_VECLEN bytes, which means we can use wildcopy
27165      * without overlap checking.
27166      */
27167     if (LIKELY(sequence.offset >= WILDCOPY_VECLEN)) {
27168         /* We bet on a full wildcopy for matches, since we expect matches to be
27169          * longer than literals (in general). In silesia, ~10% of matches are longer
27170          * than 16 bytes.
27171          */
27172         ZSTD_wildcopy(op, match, (ptrdiff_t)sequence.matchLength, ZSTD_no_overlap);
27173         return sequenceLength;
27174     }
27175     assert(sequence.offset < WILDCOPY_VECLEN);
27176
27177     /* Copy 8 bytes and spread the offset to be >= 8. */
27178     ZSTD_overlapCopy8(&op, &match, sequence.offset);
27179
27180     /* If the match length is > 8 bytes, then continue with the wildcopy. */
27181     if (sequence.matchLength > 8) {
27182         assert(op < oMatchEnd);
27183         ZSTD_wildcopy(op, match, (ptrdiff_t)sequence.matchLength-8, ZSTD_overlap_src_before_dst);
27184     }
27185     return sequenceLength;
27186 }
27187
27188 static void
27189 ZSTD_initFseState(ZSTD_fseState* DStatePtr, BIT_DStream_t* bitD, const ZSTD_seqSymbol* dt)
27190 {
27191     const void* ptr = dt;
27192     const ZSTD_seqSymbol_header* const DTableH = (const ZSTD_seqSymbol_header*)ptr;
27193     DStatePtr->state = BIT_readBits(bitD, DTableH->tableLog);
27194     DEBUGLOG(6, "ZSTD_initFseState : val=%u using %u bits",
27195                 (U32)DStatePtr->state, DTableH->tableLog);
27196     BIT_reloadDStream(bitD);
27197     DStatePtr->table = dt + 1;
27198 }
27199
27200 FORCE_INLINE_TEMPLATE void
27201 ZSTD_updateFseState(ZSTD_fseState* DStatePtr, BIT_DStream_t* bitD)
27202 {
27203     ZSTD_seqSymbol const DInfo = DStatePtr->table[DStatePtr->state];
27204     U32 const nbBits = DInfo.nbBits;
27205     size_t const lowBits = BIT_readBits(bitD, nbBits);
27206     DStatePtr->state = DInfo.nextState + lowBits;
27207 }
27208
27209 FORCE_INLINE_TEMPLATE void
27210 ZSTD_updateFseStateWithDInfo(ZSTD_fseState* DStatePtr, BIT_DStream_t* bitD, ZSTD_seqSymbol const DInfo)
27211 {
27212     U32 const nbBits = DInfo.nbBits;
27213     size_t const lowBits = BIT_readBits(bitD, nbBits);
27214     DStatePtr->state = DInfo.nextState + lowBits;
27215 }
27216
27217 /* We need to add at most (ZSTD_WINDOWLOG_MAX_32 - 1) bits to read the maximum
27218  * offset bits. But we can only read at most (STREAM_ACCUMULATOR_MIN_32 - 1)
27219  * bits before reloading. This value is the maximum number of bytes we read
27220  * after reloading when we are decoding long offsets.
27221  */
27222 #define LONG_OFFSETS_MAX_EXTRA_BITS_32                       \
27223     (ZSTD_WINDOWLOG_MAX_32 > STREAM_ACCUMULATOR_MIN_32       \
27224         ? ZSTD_WINDOWLOG_MAX_32 - STREAM_ACCUMULATOR_MIN_32  \
27225         : 0)
27226
27227 typedef enum { ZSTD_lo_isRegularOffset, ZSTD_lo_isLongOffset=1 } ZSTD_longOffset_e;
27228 typedef enum { ZSTD_p_noPrefetch=0, ZSTD_p_prefetch=1 } ZSTD_prefetch_e;
27229
27230 FORCE_INLINE_TEMPLATE seq_t
27231 ZSTD_decodeSequence(seqState_t* seqState, const ZSTD_longOffset_e longOffsets, const ZSTD_prefetch_e prefetch)
27232 {
27233     seq_t seq;
27234     ZSTD_seqSymbol const llDInfo = seqState->stateLL.table[seqState->stateLL.state];
27235     ZSTD_seqSymbol const mlDInfo = seqState->stateML.table[seqState->stateML.state];
27236     ZSTD_seqSymbol const ofDInfo = seqState->stateOffb.table[seqState->stateOffb.state];
27237     U32 const llBase = llDInfo.baseValue;
27238     U32 const mlBase = mlDInfo.baseValue;
27239     U32 const ofBase = ofDInfo.baseValue;
27240     BYTE const llBits = llDInfo.nbAdditionalBits;
27241     BYTE const mlBits = mlDInfo.nbAdditionalBits;
27242     BYTE const ofBits = ofDInfo.nbAdditionalBits;
27243     BYTE const totalBits = llBits+mlBits+ofBits;
27244
27245     /* sequence */
27246     {   size_t offset;
27247         if (ofBits > 1) {
27248             ZSTD_STATIC_ASSERT(ZSTD_lo_isLongOffset == 1);
27249             ZSTD_STATIC_ASSERT(LONG_OFFSETS_MAX_EXTRA_BITS_32 == 5);
27250             assert(ofBits <= MaxOff);
27251             if (MEM_32bits() && longOffsets && (ofBits >= STREAM_ACCUMULATOR_MIN_32)) {
27252                 U32 const extraBits = ofBits - MIN(ofBits, 32 - seqState->DStream.bitsConsumed);
27253                 offset = ofBase + (BIT_readBitsFast(&seqState->DStream, ofBits - extraBits) << extraBits);
27254                 BIT_reloadDStream(&seqState->DStream);
27255                 if (extraBits) offset += BIT_readBitsFast(&seqState->DStream, extraBits);
27256                 assert(extraBits <= LONG_OFFSETS_MAX_EXTRA_BITS_32);   /* to avoid another reload */
27257             } else {
27258                 offset = ofBase + BIT_readBitsFast(&seqState->DStream, ofBits/*>0*/);   /* <=  (ZSTD_WINDOWLOG_MAX-1) bits */
27259                 if (MEM_32bits()) BIT_reloadDStream(&seqState->DStream);
27260             }
27261             seqState->prevOffset[2] = seqState->prevOffset[1];
27262             seqState->prevOffset[1] = seqState->prevOffset[0];
27263             seqState->prevOffset[0] = offset;
27264         } else {
27265             U32 const ll0 = (llBase == 0);
27266             if (LIKELY((ofBits == 0))) {
27267                 if (LIKELY(!ll0))
27268                     offset = seqState->prevOffset[0];
27269                 else {
27270                     offset = seqState->prevOffset[1];
27271                     seqState->prevOffset[1] = seqState->prevOffset[0];
27272                     seqState->prevOffset[0] = offset;
27273                 }
27274             } else {
27275                 offset = ofBase + ll0 + BIT_readBitsFast(&seqState->DStream, 1);
27276                 {   size_t temp = (offset==3) ? seqState->prevOffset[0] - 1 : seqState->prevOffset[offset];
27277                     temp += !temp;   /* 0 is not valid; input is corrupted; force offset to 1 */
27278                     if (offset != 1) seqState->prevOffset[2] = seqState->prevOffset[1];
27279                     seqState->prevOffset[1] = seqState->prevOffset[0];
27280                     seqState->prevOffset[0] = offset = temp;
27281         }   }   }
27282         seq.offset = offset;
27283     }
27284
27285     seq.matchLength = mlBase;
27286     if (mlBits > 0)
27287         seq.matchLength += BIT_readBitsFast(&seqState->DStream, mlBits/*>0*/);
27288
27289     if (MEM_32bits() && (mlBits+llBits >= STREAM_ACCUMULATOR_MIN_32-LONG_OFFSETS_MAX_EXTRA_BITS_32))
27290         BIT_reloadDStream(&seqState->DStream);
27291     if (MEM_64bits() && UNLIKELY(totalBits >= STREAM_ACCUMULATOR_MIN_64-(LLFSELog+MLFSELog+OffFSELog)))
27292         BIT_reloadDStream(&seqState->DStream);
27293     /* Ensure there are enough bits to read the rest of data in 64-bit mode. */
27294     ZSTD_STATIC_ASSERT(16+LLFSELog+MLFSELog+OffFSELog < STREAM_ACCUMULATOR_MIN_64);
27295
27296     seq.litLength = llBase;
27297     if (llBits > 0)
27298         seq.litLength += BIT_readBitsFast(&seqState->DStream, llBits/*>0*/);
27299
27300     if (MEM_32bits())
27301         BIT_reloadDStream(&seqState->DStream);
27302
27303     DEBUGLOG(6, "seq: litL=%u, matchL=%u, offset=%u",
27304                 (U32)seq.litLength, (U32)seq.matchLength, (U32)seq.offset);
27305
27306     if (prefetch == ZSTD_p_prefetch) {
27307         size_t const pos = seqState->pos + seq.litLength;
27308         const BYTE* const matchBase = (seq.offset > pos) ? seqState->dictEnd : seqState->prefixStart;
27309         seq.match = matchBase + pos - seq.offset;  /* note : this operation can overflow when seq.offset is really too large, which can only happen when input is corrupted.
27310                                                     * No consequence though : no memory access will occur, offset is only used for prefetching */
27311         seqState->pos = pos + seq.matchLength;
27312     }
27313
27314     /* ANS state update
27315      * gcc-9.0.0 does 2.5% worse with ZSTD_updateFseStateWithDInfo().
27316      * clang-9.2.0 does 7% worse with ZSTD_updateFseState().
27317      * Naturally it seems like ZSTD_updateFseStateWithDInfo() should be the
27318      * better option, so it is the default for other compilers. But, if you
27319      * measure that it is worse, please put up a pull request.
27320      */
27321     {
27322 #if defined(__GNUC__) && !defined(__clang__)
27323         const int kUseUpdateFseState = 1;
27324 #else
27325         const int kUseUpdateFseState = 0;
27326 #endif
27327         if (kUseUpdateFseState) {
27328             ZSTD_updateFseState(&seqState->stateLL, &seqState->DStream);    /* <=  9 bits */
27329             ZSTD_updateFseState(&seqState->stateML, &seqState->DStream);    /* <=  9 bits */
27330             if (MEM_32bits()) BIT_reloadDStream(&seqState->DStream);    /* <= 18 bits */
27331             ZSTD_updateFseState(&seqState->stateOffb, &seqState->DStream);  /* <=  8 bits */
27332         } else {
27333             ZSTD_updateFseStateWithDInfo(&seqState->stateLL, &seqState->DStream, llDInfo);    /* <=  9 bits */
27334             ZSTD_updateFseStateWithDInfo(&seqState->stateML, &seqState->DStream, mlDInfo);    /* <=  9 bits */
27335             if (MEM_32bits()) BIT_reloadDStream(&seqState->DStream);    /* <= 18 bits */
27336             ZSTD_updateFseStateWithDInfo(&seqState->stateOffb, &seqState->DStream, ofDInfo);  /* <=  8 bits */
27337         }
27338     }
27339
27340     return seq;
27341 }
27342
27343 #ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
27344 static int ZSTD_dictionaryIsActive(ZSTD_DCtx const* dctx, BYTE const* prefixStart, BYTE const* oLitEnd)
27345 {
27346     size_t const windowSize = dctx->fParams.windowSize;
27347     /* No dictionary used. */
27348     if (dctx->dictContentEndForFuzzing == NULL) return 0;
27349     /* Dictionary is our prefix. */
27350     if (prefixStart == dctx->dictContentBeginForFuzzing) return 1;
27351     /* Dictionary is not our ext-dict. */
27352     if (dctx->dictEnd != dctx->dictContentEndForFuzzing) return 0;
27353     /* Dictionary is not within our window size. */
27354     if ((size_t)(oLitEnd - prefixStart) >= windowSize) return 0;
27355     /* Dictionary is active. */
27356     return 1;
27357 }
27358
27359 MEM_STATIC void ZSTD_assertValidSequence(
27360         ZSTD_DCtx const* dctx,
27361         BYTE const* op, BYTE const* oend,
27362         seq_t const seq,
27363         BYTE const* prefixStart, BYTE const* virtualStart)
27364 {
27365     size_t const windowSize = dctx->fParams.windowSize;
27366     size_t const sequenceSize = seq.litLength + seq.matchLength;
27367     BYTE const* const oLitEnd = op + seq.litLength;
27368     DEBUGLOG(6, "Checking sequence: litL=%u matchL=%u offset=%u",
27369             (U32)seq.litLength, (U32)seq.matchLength, (U32)seq.offset);
27370     assert(op <= oend);
27371     assert((size_t)(oend - op) >= sequenceSize);
27372     assert(sequenceSize <= ZSTD_BLOCKSIZE_MAX);
27373     if (ZSTD_dictionaryIsActive(dctx, prefixStart, oLitEnd)) {
27374         size_t const dictSize = (size_t)((char const*)dctx->dictContentEndForFuzzing - (char const*)dctx->dictContentBeginForFuzzing);
27375         /* Offset must be within the dictionary. */
27376         assert(seq.offset <= (size_t)(oLitEnd - virtualStart));
27377         assert(seq.offset <= windowSize + dictSize);
27378     } else {
27379         /* Offset must be within our window. */
27380         assert(seq.offset <= windowSize);
27381     }
27382 }
27383 #endif
27384
27385 #ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG
27386 FORCE_INLINE_TEMPLATE size_t
27387 DONT_VECTORIZE
27388 ZSTD_decompressSequences_body( ZSTD_DCtx* dctx,
27389                                void* dst, size_t maxDstSize,
27390                          const void* seqStart, size_t seqSize, int nbSeq,
27391                          const ZSTD_longOffset_e isLongOffset,
27392                          const int frame)
27393 {
27394     const BYTE* ip = (const BYTE*)seqStart;
27395     const BYTE* const iend = ip + seqSize;
27396     BYTE* const ostart = (BYTE* const)dst;
27397     BYTE* const oend = ostart + maxDstSize;
27398     BYTE* op = ostart;
27399     const BYTE* litPtr = dctx->litPtr;
27400     const BYTE* const litEnd = litPtr + dctx->litSize;
27401     const BYTE* const prefixStart = (const BYTE*) (dctx->prefixStart);
27402     const BYTE* const vBase = (const BYTE*) (dctx->virtualStart);
27403     const BYTE* const dictEnd = (const BYTE*) (dctx->dictEnd);
27404     DEBUGLOG(5, "ZSTD_decompressSequences_body");
27405     (void)frame;
27406
27407     /* Regen sequences */
27408     if (nbSeq) {
27409         seqState_t seqState;
27410         size_t error = 0;
27411         dctx->fseEntropy = 1;
27412         { U32 i; for (i=0; i<ZSTD_REP_NUM; i++) seqState.prevOffset[i] = dctx->entropy.rep[i]; }
27413         RETURN_ERROR_IF(
27414             ERR_isError(BIT_initDStream(&seqState.DStream, ip, iend-ip)),
27415             corruption_detected, "");
27416         ZSTD_initFseState(&seqState.stateLL, &seqState.DStream, dctx->LLTptr);
27417         ZSTD_initFseState(&seqState.stateOffb, &seqState.DStream, dctx->OFTptr);
27418         ZSTD_initFseState(&seqState.stateML, &seqState.DStream, dctx->MLTptr);
27419         assert(dst != NULL);
27420
27421         ZSTD_STATIC_ASSERT(
27422                 BIT_DStream_unfinished < BIT_DStream_completed &&
27423                 BIT_DStream_endOfBuffer < BIT_DStream_completed &&
27424                 BIT_DStream_completed < BIT_DStream_overflow);
27425
27426 #if defined(__GNUC__) && defined(__x86_64__)
27427         /* Align the decompression loop to 32 + 16 bytes.
27428          *
27429          * zstd compiled with gcc-9 on an Intel i9-9900k shows 10% decompression
27430          * speed swings based on the alignment of the decompression loop. This
27431          * performance swing is caused by parts of the decompression loop falling
27432          * out of the DSB. The entire decompression loop should fit in the DSB,
27433          * when it can't we get much worse performance. You can measure if you've
27434          * hit the good case or the bad case with this perf command for some
27435          * compressed file test.zst:
27436          *
27437          *   perf stat -e cycles -e instructions -e idq.all_dsb_cycles_any_uops \
27438          *             -e idq.all_mite_cycles_any_uops -- ./zstd -tq test.zst
27439          *
27440          * If you see most cycles served out of the MITE you've hit the bad case.
27441          * If you see most cycles served out of the DSB you've hit the good case.
27442          * If it is pretty even then you may be in an okay case.
27443          *
27444          * I've been able to reproduce this issue on the following CPUs:
27445          *   - Kabylake: Macbook Pro (15-inch, 2019) 2.4 GHz Intel Core i9
27446          *               Use Instruments->Counters to get DSB/MITE cycles.
27447          *               I never got performance swings, but I was able to
27448          *               go from the good case of mostly DSB to half of the
27449          *               cycles served from MITE.
27450          *   - Coffeelake: Intel i9-9900k
27451          *
27452          * I haven't been able to reproduce the instability or DSB misses on any
27453          * of the following CPUS:
27454          *   - Haswell
27455          *   - Broadwell: Intel(R) Xeon(R) CPU E5-2680 v4 @ 2.40GH
27456          *   - Skylake
27457          *
27458          * If you are seeing performance stability this script can help test.
27459          * It tests on 4 commits in zstd where I saw performance change.
27460          *
27461          *   https://gist.github.com/terrelln/9889fc06a423fd5ca6e99351564473f4
27462          */
27463         __asm__(".p2align 5");
27464         __asm__("nop");
27465         __asm__(".p2align 4");
27466 #endif
27467         for ( ; ; ) {
27468             seq_t const sequence = ZSTD_decodeSequence(&seqState, isLongOffset, ZSTD_p_noPrefetch);
27469             size_t const oneSeqSize = ZSTD_execSequence(op, oend, sequence, &litPtr, litEnd, prefixStart, vBase, dictEnd);
27470 #if defined(FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION) && defined(FUZZING_ASSERT_VALID_SEQUENCE)
27471             assert(!ZSTD_isError(oneSeqSize));
27472             if (frame) ZSTD_assertValidSequence(dctx, op, oend, sequence, prefixStart, vBase);
27473 #endif
27474             DEBUGLOG(6, "regenerated sequence size : %u", (U32)oneSeqSize);
27475             BIT_reloadDStream(&(seqState.DStream));
27476             /* gcc and clang both don't like early returns in this loop.
27477              * gcc doesn't like early breaks either.
27478              * Instead save an error and report it at the end.
27479              * When there is an error, don't increment op, so we don't
27480              * overwrite.
27481              */
27482             if (UNLIKELY(ZSTD_isError(oneSeqSize))) error = oneSeqSize;
27483             else op += oneSeqSize;
27484             if (UNLIKELY(!--nbSeq)) break;
27485         }
27486
27487         /* check if reached exact end */
27488         DEBUGLOG(5, "ZSTD_decompressSequences_body: after decode loop, remaining nbSeq : %i", nbSeq);
27489         if (ZSTD_isError(error)) return error;
27490         RETURN_ERROR_IF(nbSeq, corruption_detected, "");
27491         RETURN_ERROR_IF(BIT_reloadDStream(&seqState.DStream) < BIT_DStream_completed, corruption_detected, "");
27492         /* save reps for next block */
27493         { U32 i; for (i=0; i<ZSTD_REP_NUM; i++) dctx->entropy.rep[i] = (U32)(seqState.prevOffset[i]); }
27494     }
27495
27496     /* last literal segment */
27497     {   size_t const lastLLSize = litEnd - litPtr;
27498         RETURN_ERROR_IF(lastLLSize > (size_t)(oend-op), dstSize_tooSmall, "");
27499         if (op != NULL) {
27500             memcpy(op, litPtr, lastLLSize);
27501             op += lastLLSize;
27502         }
27503     }
27504
27505     return op-ostart;
27506 }
27507
27508 static size_t
27509 ZSTD_decompressSequences_default(ZSTD_DCtx* dctx,
27510                                  void* dst, size_t maxDstSize,
27511                            const void* seqStart, size_t seqSize, int nbSeq,
27512                            const ZSTD_longOffset_e isLongOffset,
27513                            const int frame)
27514 {
27515     return ZSTD_decompressSequences_body(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame);
27516 }
27517 #endif /* ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG */
27518
27519 #ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT
27520 FORCE_INLINE_TEMPLATE size_t
27521 ZSTD_decompressSequencesLong_body(
27522                                ZSTD_DCtx* dctx,
27523                                void* dst, size_t maxDstSize,
27524                          const void* seqStart, size_t seqSize, int nbSeq,
27525                          const ZSTD_longOffset_e isLongOffset,
27526                          const int frame)
27527 {
27528     const BYTE* ip = (const BYTE*)seqStart;
27529     const BYTE* const iend = ip + seqSize;
27530     BYTE* const ostart = (BYTE* const)dst;
27531     BYTE* const oend = ostart + maxDstSize;
27532     BYTE* op = ostart;
27533     const BYTE* litPtr = dctx->litPtr;
27534     const BYTE* const litEnd = litPtr + dctx->litSize;
27535     const BYTE* const prefixStart = (const BYTE*) (dctx->prefixStart);
27536     const BYTE* const dictStart = (const BYTE*) (dctx->virtualStart);
27537     const BYTE* const dictEnd = (const BYTE*) (dctx->dictEnd);
27538     (void)frame;
27539
27540     /* Regen sequences */
27541     if (nbSeq) {
27542 #define STORED_SEQS 4
27543 #define STORED_SEQS_MASK (STORED_SEQS-1)
27544 #define ADVANCED_SEQS 4
27545         seq_t sequences[STORED_SEQS];
27546         int const seqAdvance = MIN(nbSeq, ADVANCED_SEQS);
27547         seqState_t seqState;
27548         int seqNb;
27549         dctx->fseEntropy = 1;
27550         { int i; for (i=0; i<ZSTD_REP_NUM; i++) seqState.prevOffset[i] = dctx->entropy.rep[i]; }
27551         seqState.prefixStart = prefixStart;
27552         seqState.pos = (size_t)(op-prefixStart);
27553         seqState.dictEnd = dictEnd;
27554         assert(dst != NULL);
27555         assert(iend >= ip);
27556         RETURN_ERROR_IF(
27557             ERR_isError(BIT_initDStream(&seqState.DStream, ip, iend-ip)),
27558             corruption_detected, "");
27559         ZSTD_initFseState(&seqState.stateLL, &seqState.DStream, dctx->LLTptr);
27560         ZSTD_initFseState(&seqState.stateOffb, &seqState.DStream, dctx->OFTptr);
27561         ZSTD_initFseState(&seqState.stateML, &seqState.DStream, dctx->MLTptr);
27562
27563         /* prepare in advance */
27564         for (seqNb=0; (BIT_reloadDStream(&seqState.DStream) <= BIT_DStream_completed) && (seqNb<seqAdvance); seqNb++) {
27565             sequences[seqNb] = ZSTD_decodeSequence(&seqState, isLongOffset, ZSTD_p_prefetch);
27566             PREFETCH_L1(sequences[seqNb].match); PREFETCH_L1(sequences[seqNb].match + sequences[seqNb].matchLength - 1); /* note : it's safe to invoke PREFETCH() on any memory address, including invalid ones */
27567         }
27568         RETURN_ERROR_IF(seqNb<seqAdvance, corruption_detected, "");
27569
27570         /* decode and decompress */
27571         for ( ; (BIT_reloadDStream(&(seqState.DStream)) <= BIT_DStream_completed) && (seqNb<nbSeq) ; seqNb++) {
27572             seq_t const sequence = ZSTD_decodeSequence(&seqState, isLongOffset, ZSTD_p_prefetch);
27573             size_t const oneSeqSize = ZSTD_execSequence(op, oend, sequences[(seqNb-ADVANCED_SEQS) & STORED_SEQS_MASK], &litPtr, litEnd, prefixStart, dictStart, dictEnd);
27574 #if defined(FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION) && defined(FUZZING_ASSERT_VALID_SEQUENCE)
27575             assert(!ZSTD_isError(oneSeqSize));
27576             if (frame) ZSTD_assertValidSequence(dctx, op, oend, sequences[(seqNb-ADVANCED_SEQS) & STORED_SEQS_MASK], prefixStart, dictStart);
27577 #endif
27578             if (ZSTD_isError(oneSeqSize)) return oneSeqSize;
27579             PREFETCH_L1(sequence.match); PREFETCH_L1(sequence.match + sequence.matchLength - 1); /* note : it's safe to invoke PREFETCH() on any memory address, including invalid ones */
27580             sequences[seqNb & STORED_SEQS_MASK] = sequence;
27581             op += oneSeqSize;
27582         }
27583         RETURN_ERROR_IF(seqNb<nbSeq, corruption_detected, "");
27584
27585         /* finish queue */
27586         seqNb -= seqAdvance;
27587         for ( ; seqNb<nbSeq ; seqNb++) {
27588             size_t const oneSeqSize = ZSTD_execSequence(op, oend, sequences[seqNb&STORED_SEQS_MASK], &litPtr, litEnd, prefixStart, dictStart, dictEnd);
27589 #if defined(FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION) && defined(FUZZING_ASSERT_VALID_SEQUENCE)
27590             assert(!ZSTD_isError(oneSeqSize));
27591             if (frame) ZSTD_assertValidSequence(dctx, op, oend, sequences[seqNb&STORED_SEQS_MASK], prefixStart, dictStart);
27592 #endif
27593             if (ZSTD_isError(oneSeqSize)) return oneSeqSize;
27594             op += oneSeqSize;
27595         }
27596
27597         /* save reps for next block */
27598         { U32 i; for (i=0; i<ZSTD_REP_NUM; i++) dctx->entropy.rep[i] = (U32)(seqState.prevOffset[i]); }
27599     }
27600
27601     /* last literal segment */
27602     {   size_t const lastLLSize = litEnd - litPtr;
27603         RETURN_ERROR_IF(lastLLSize > (size_t)(oend-op), dstSize_tooSmall, "");
27604         if (op != NULL) {
27605             memcpy(op, litPtr, lastLLSize);
27606             op += lastLLSize;
27607         }
27608     }
27609
27610     return op-ostart;
27611 }
27612
27613 static size_t
27614 ZSTD_decompressSequencesLong_default(ZSTD_DCtx* dctx,
27615                                  void* dst, size_t maxDstSize,
27616                            const void* seqStart, size_t seqSize, int nbSeq,
27617                            const ZSTD_longOffset_e isLongOffset,
27618                            const int frame)
27619 {
27620     return ZSTD_decompressSequencesLong_body(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame);
27621 }
27622 #endif /* ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT */
27623
27624
27625
27626 #if DYNAMIC_BMI2
27627
27628 #ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG
27629 static TARGET_ATTRIBUTE("bmi2") size_t
27630 DONT_VECTORIZE
27631 ZSTD_decompressSequences_bmi2(ZSTD_DCtx* dctx,
27632                                  void* dst, size_t maxDstSize,
27633                            const void* seqStart, size_t seqSize, int nbSeq,
27634                            const ZSTD_longOffset_e isLongOffset,
27635                            const int frame)
27636 {
27637     return ZSTD_decompressSequences_body(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame);
27638 }
27639 #endif /* ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG */
27640
27641 #ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT
27642 static TARGET_ATTRIBUTE("bmi2") size_t
27643 ZSTD_decompressSequencesLong_bmi2(ZSTD_DCtx* dctx,
27644                                  void* dst, size_t maxDstSize,
27645                            const void* seqStart, size_t seqSize, int nbSeq,
27646                            const ZSTD_longOffset_e isLongOffset,
27647                            const int frame)
27648 {
27649     return ZSTD_decompressSequencesLong_body(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame);
27650 }
27651 #endif /* ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT */
27652
27653 #endif /* DYNAMIC_BMI2 */
27654
27655 typedef size_t (*ZSTD_decompressSequences_t)(
27656                             ZSTD_DCtx* dctx,
27657                             void* dst, size_t maxDstSize,
27658                             const void* seqStart, size_t seqSize, int nbSeq,
27659                             const ZSTD_longOffset_e isLongOffset,
27660                             const int frame);
27661
27662 #ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG
27663 static size_t
27664 ZSTD_decompressSequences(ZSTD_DCtx* dctx, void* dst, size_t maxDstSize,
27665                    const void* seqStart, size_t seqSize, int nbSeq,
27666                    const ZSTD_longOffset_e isLongOffset,
27667                    const int frame)
27668 {
27669     DEBUGLOG(5, "ZSTD_decompressSequences");
27670 #if DYNAMIC_BMI2
27671     if (dctx->bmi2) {
27672         return ZSTD_decompressSequences_bmi2(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame);
27673     }
27674 #endif
27675   return ZSTD_decompressSequences_default(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame);
27676 }
27677 #endif /* ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG */
27678
27679
27680 #ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT
27681 /* ZSTD_decompressSequencesLong() :
27682  * decompression function triggered when a minimum share of offsets is considered "long",
27683  * aka out of cache.
27684  * note : "long" definition seems overloaded here, sometimes meaning "wider than bitstream register", and sometimes meaning "farther than memory cache distance".
27685  * This function will try to mitigate main memory latency through the use of prefetching */
27686 static size_t
27687 ZSTD_decompressSequencesLong(ZSTD_DCtx* dctx,
27688                              void* dst, size_t maxDstSize,
27689                              const void* seqStart, size_t seqSize, int nbSeq,
27690                              const ZSTD_longOffset_e isLongOffset,
27691                              const int frame)
27692 {
27693     DEBUGLOG(5, "ZSTD_decompressSequencesLong");
27694 #if DYNAMIC_BMI2
27695     if (dctx->bmi2) {
27696         return ZSTD_decompressSequencesLong_bmi2(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame);
27697     }
27698 #endif
27699   return ZSTD_decompressSequencesLong_default(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame);
27700 }
27701 #endif /* ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT */
27702
27703
27704
27705 #if !defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT) && \
27706     !defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG)
27707 /* ZSTD_getLongOffsetsShare() :
27708  * condition : offTable must be valid
27709  * @return : "share" of long offsets (arbitrarily defined as > (1<<23))
27710  *           compared to maximum possible of (1<<OffFSELog) */
27711 static unsigned
27712 ZSTD_getLongOffsetsShare(const ZSTD_seqSymbol* offTable)
27713 {
27714     const void* ptr = offTable;
27715     U32 const tableLog = ((const ZSTD_seqSymbol_header*)ptr)[0].tableLog;
27716     const ZSTD_seqSymbol* table = offTable + 1;
27717     U32 const max = 1 << tableLog;
27718     U32 u, total = 0;
27719     DEBUGLOG(5, "ZSTD_getLongOffsetsShare: (tableLog=%u)", tableLog);
27720
27721     assert(max <= (1 << OffFSELog));  /* max not too large */
27722     for (u=0; u<max; u++) {
27723         if (table[u].nbAdditionalBits > 22) total += 1;
27724     }
27725
27726     assert(tableLog <= OffFSELog);
27727     total <<= (OffFSELog - tableLog);  /* scale to OffFSELog */
27728
27729     return total;
27730 }
27731 #endif
27732
27733 size_t
27734 ZSTD_decompressBlock_internal(ZSTD_DCtx* dctx,
27735                               void* dst, size_t dstCapacity,
27736                         const void* src, size_t srcSize, const int frame)
27737 {   /* blockType == blockCompressed */
27738     const BYTE* ip = (const BYTE*)src;
27739     /* isLongOffset must be true if there are long offsets.
27740      * Offsets are long if they are larger than 2^STREAM_ACCUMULATOR_MIN.
27741      * We don't expect that to be the case in 64-bit mode.
27742      * In block mode, window size is not known, so we have to be conservative.
27743      * (note: but it could be evaluated from current-lowLimit)
27744      */
27745     ZSTD_longOffset_e const isLongOffset = (ZSTD_longOffset_e)(MEM_32bits() && (!frame || (dctx->fParams.windowSize > (1ULL << STREAM_ACCUMULATOR_MIN))));
27746     DEBUGLOG(5, "ZSTD_decompressBlock_internal (size : %u)", (U32)srcSize);
27747
27748     RETURN_ERROR_IF(srcSize >= ZSTD_BLOCKSIZE_MAX, srcSize_wrong, "");
27749
27750     /* Decode literals section */
27751     {   size_t const litCSize = ZSTD_decodeLiteralsBlock(dctx, src, srcSize);
27752         DEBUGLOG(5, "ZSTD_decodeLiteralsBlock : %u", (U32)litCSize);
27753         if (ZSTD_isError(litCSize)) return litCSize;
27754         ip += litCSize;
27755         srcSize -= litCSize;
27756     }
27757
27758     /* Build Decoding Tables */
27759     {
27760         /* These macros control at build-time which decompressor implementation
27761          * we use. If neither is defined, we do some inspection and dispatch at
27762          * runtime.
27763          */
27764 #if !defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT) && \
27765     !defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG)
27766         int usePrefetchDecoder = dctx->ddictIsCold;
27767 #endif
27768         int nbSeq;
27769         size_t const seqHSize = ZSTD_decodeSeqHeaders(dctx, &nbSeq, ip, srcSize);
27770         if (ZSTD_isError(seqHSize)) return seqHSize;
27771         ip += seqHSize;
27772         srcSize -= seqHSize;
27773
27774         RETURN_ERROR_IF(dst == NULL && nbSeq > 0, dstSize_tooSmall, "NULL not handled");
27775
27776 #if !defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT) && \
27777     !defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG)
27778         if ( !usePrefetchDecoder
27779           && (!frame || (dctx->fParams.windowSize > (1<<24)))
27780           && (nbSeq>ADVANCED_SEQS) ) {  /* could probably use a larger nbSeq limit */
27781             U32 const shareLongOffsets = ZSTD_getLongOffsetsShare(dctx->OFTptr);
27782             U32 const minShare = MEM_64bits() ? 7 : 20; /* heuristic values, correspond to 2.73% and 7.81% */
27783             usePrefetchDecoder = (shareLongOffsets >= minShare);
27784         }
27785 #endif
27786
27787         dctx->ddictIsCold = 0;
27788
27789 #if !defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT) && \
27790     !defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG)
27791         if (usePrefetchDecoder)
27792 #endif
27793 #ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT
27794             return ZSTD_decompressSequencesLong(dctx, dst, dstCapacity, ip, srcSize, nbSeq, isLongOffset, frame);
27795 #endif
27796
27797 #ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG
27798         /* else */
27799         return ZSTD_decompressSequences(dctx, dst, dstCapacity, ip, srcSize, nbSeq, isLongOffset, frame);
27800 #endif
27801     }
27802 }
27803
27804
27805 void ZSTD_checkContinuity(ZSTD_DCtx* dctx, const void* dst)
27806 {
27807     if (dst != dctx->previousDstEnd) {   /* not contiguous */
27808         dctx->dictEnd = dctx->previousDstEnd;
27809         dctx->virtualStart = (const char*)dst - ((const char*)(dctx->previousDstEnd) - (const char*)(dctx->prefixStart));
27810         dctx->prefixStart = dst;
27811         dctx->previousDstEnd = dst;
27812     }
27813 }
27814
27815
27816 size_t ZSTD_decompressBlock(ZSTD_DCtx* dctx,
27817                             void* dst, size_t dstCapacity,
27818                       const void* src, size_t srcSize)
27819 {
27820     size_t dSize;
27821     ZSTD_checkContinuity(dctx, dst);
27822     dSize = ZSTD_decompressBlock_internal(dctx, dst, dstCapacity, src, srcSize, /* frame */ 0);
27823     dctx->previousDstEnd = (char*)dst + dSize;
27824     return dSize;
27825 }
27826 /**** ended inlining decompress/zstd_decompress_block.c ****/