contrib/zstd/tests/decodecorpus.c

   1 /**
   2  * Copyright (c) 2017-present, Facebook, Inc.
   3  * All rights reserved.
   4  *
   5  * This source code is licensed under the BSD-style license found in the
   6  * LICENSE file in the root directory of this source tree. An additional grant
   7  * of patent rights can be found in the PATENTS file in the same directory.
   8  */
   9
  10 #include <limits.h>
  11 #include <math.h>
  12 #include <stddef.h>
  13 #include <stdio.h>
  14 #include <stdlib.h>
  15 #include <string.h>
  16 #include <time.h>
  17
  18 #include "zstd.h"
  19 #include "zstd_internal.h"
  20 #include "mem.h"
  21 #define ZDICT_STATIC_LINKING_ONLY
  22 #include "zdict.h"
  23
  24 // Direct access to internal compression functions is required
  25 #include "zstd_compress.c"
  26
  27 #define XXH_STATIC_LINKING_ONLY
  28 #include "xxhash.h"     /* XXH64 */
  29
  30 #ifndef MIN
  31     #define MIN(a, b) ((a) < (b) ? (a) : (b))
  32 #endif
  33
  34 #ifndef MAX_PATH
  35     #ifdef PATH_MAX
  36         #define MAX_PATH PATH_MAX
  37     #else
  38         #define MAX_PATH 256
  39     #endif
  40 #endif
  41
  42 /*-************************************
  43 *  DISPLAY Macros
  44 **************************************/
  45 #define DISPLAY(...)          fprintf(stderr, __VA_ARGS__)
  46 #define DISPLAYLEVEL(l, ...)  if (g_displayLevel>=l) { DISPLAY(__VA_ARGS__); }
  47 static U32 g_displayLevel = 0;
  48
  49 #define DISPLAYUPDATE(...)                                                     \
  50     do {                                                                       \
  51         if ((clockSpan(g_displayClock) > g_refreshRate) ||                     \
  52             (g_displayLevel >= 4)) {                                           \
  53             g_displayClock = clock();                                          \
  54             DISPLAY(__VA_ARGS__);                                              \
  55             if (g_displayLevel >= 4) fflush(stderr);                           \
  56         }                                                                      \
  57     } while (0)
  58 static const clock_t g_refreshRate = CLOCKS_PER_SEC / 6;
  59 static clock_t g_displayClock = 0;
  60
  61 static clock_t clockSpan(clock_t cStart)
  62 {
  63     return clock() - cStart;   /* works even when overflow; max span ~ 30mn */
  64 }
  65
  66 #define CHECKERR(code)                                                         \
  67     do {                                                                       \
  68         if (ZSTD_isError(code)) {                                              \
  69             DISPLAY("Error occurred while generating data: %s\n",              \
  70                     ZSTD_getErrorName(code));                                  \
  71             exit(1);                                                           \
  72         }                                                                      \
  73     } while (0)
  74
  75 /*-*******************************************************
  76 *  Random function
  77 *********************************************************/
  78 static unsigned RAND(unsigned* src)
  79 {
  80 #define RAND_rotl32(x,r) ((x << r) | (x >> (32 - r)))
  81     static const U32 prime1 = 2654435761U;
  82     static const U32 prime2 = 2246822519U;
  83     U32 rand32 = *src;
  84     rand32 *= prime1;
  85     rand32 += prime2;
  86     rand32  = RAND_rotl32(rand32, 13);
  87     *src = rand32;
  88     return RAND_rotl32(rand32, 27);
  89 #undef RAND_rotl32
  90 }
  91
  92 #define DISTSIZE (8192)
  93
  94 /* Write `size` bytes into `ptr`, all of which are less than or equal to `maxSymb` */
  95 static void RAND_bufferMaxSymb(U32* seed, void* ptr, size_t size, int maxSymb)
  96 {
  97     size_t i;
  98     BYTE* op = ptr;
  99
 100     for (i = 0; i < size; i++) {
 101         op[i] = (BYTE) (RAND(seed) % (maxSymb + 1));
 102     }
 103 }
 104
 105 /* Write `size` random bytes into `ptr` */
 106 static void RAND_buffer(U32* seed, void* ptr, size_t size)
 107 {
 108     size_t i;
 109     BYTE* op = ptr;
 110
 111     for (i = 0; i + 4 <= size; i += 4) {
 112         MEM_writeLE32(op + i, RAND(seed));
 113     }
 114     for (; i < size; i++) {
 115         op[i] = RAND(seed) & 0xff;
 116     }
 117 }
 118
 119 /* Write `size` bytes into `ptr` following the distribution `dist` */
 120 static void RAND_bufferDist(U32* seed, BYTE* dist, void* ptr, size_t size)
 121 {
 122     size_t i;
 123     BYTE* op = ptr;
 124
 125     for (i = 0; i < size; i++) {
 126         op[i] = dist[RAND(seed) % DISTSIZE];
 127     }
 128 }
 129
 130 /* Generate a random distribution where the frequency of each symbol follows a
 131  * geometric distribution defined by `weight`
 132  * `dist` should have size at least `DISTSIZE` */
 133 static void RAND_genDist(U32* seed, BYTE* dist, double weight)
 134 {
 135     size_t i = 0;
 136     size_t statesLeft = DISTSIZE;
 137     BYTE symb = (BYTE) (RAND(seed) % 256);
 138     BYTE step = (BYTE) ((RAND(seed) % 256) | 1); /* force it to be odd so it's relatively prime to 256 */
 139
 140     while (i < DISTSIZE) {
 141         size_t states = ((size_t)(weight * statesLeft)) + 1;
 142         size_t j;
 143         for (j = 0; j < states && i < DISTSIZE; j++, i++) {
 144             dist[i] = symb;
 145         }
 146
 147         symb += step;
 148         statesLeft -= states;
 149     }
 150 }
 151
 152 /* Generates a random number in the range [min, max) */
 153 static inline U32 RAND_range(U32* seed, U32 min, U32 max)
 154 {
 155     return (RAND(seed) % (max-min)) + min;
 156 }
 157
 158 #define ROUND(x) ((U32)(x + 0.5))
 159
 160 /* Generates a random number in an exponential distribution with mean `mean` */
 161 static double RAND_exp(U32* seed, double mean)
 162 {
 163     double const u = RAND(seed) / (double) UINT_MAX;
 164     return log(1-u) * (-mean);
 165 }
 166
 167 /*-*******************************************************
 168 *  Constants and Structs
 169 *********************************************************/
 170 const char *BLOCK_TYPES[] = {"raw", "rle", "compressed"};
 171
 172 #define MAX_DECOMPRESSED_SIZE_LOG 20
 173 #define MAX_DECOMPRESSED_SIZE (1ULL << MAX_DECOMPRESSED_SIZE_LOG)
 174
 175 #define MAX_WINDOW_LOG 22 /* Recommended support is 8MB, so limit to 4MB + mantissa */
 176 #define MAX_BLOCK_SIZE (128ULL * 1024)
 177
 178 #define MIN_SEQ_LEN (3)
 179 #define MAX_NB_SEQ ((MAX_BLOCK_SIZE + MIN_SEQ_LEN - 1) / MIN_SEQ_LEN)
 180
 181 BYTE CONTENT_BUFFER[MAX_DECOMPRESSED_SIZE];
 182 BYTE FRAME_BUFFER[MAX_DECOMPRESSED_SIZE * 2];
 183 BYTE LITERAL_BUFFER[MAX_BLOCK_SIZE];
 184
 185 seqDef SEQUENCE_BUFFER[MAX_NB_SEQ];
 186 BYTE SEQUENCE_LITERAL_BUFFER[MAX_BLOCK_SIZE]; /* storeSeq expects a place to copy literals to */
 187 BYTE SEQUENCE_LLCODE[MAX_BLOCK_SIZE];
 188 BYTE SEQUENCE_MLCODE[MAX_BLOCK_SIZE];
 189 BYTE SEQUENCE_OFCODE[MAX_BLOCK_SIZE];
 190
 191 unsigned WKSP[1024];
 192
 193 typedef struct {
 194     size_t contentSize; /* 0 means unknown (unless contentSize == windowSize == 0) */
 195     unsigned windowSize; /* contentSize >= windowSize means single segment */
 196 } frameHeader_t;
 197
 198 /* For repeat modes */
 199 typedef struct {
 200     U32 rep[ZSTD_REP_NUM];
 201
 202     int hufInit;
 203     /* the distribution used in the previous block for repeat mode */
 204     BYTE hufDist[DISTSIZE];
 205     U32 hufTable [256]; /* HUF_CElt is an incomplete type */
 206
 207     int fseInit;
 208     FSE_CTable offcodeCTable  [FSE_CTABLE_SIZE_U32(OffFSELog, MaxOff)];
 209     FSE_CTable matchlengthCTable[FSE_CTABLE_SIZE_U32(MLFSELog, MaxML)];
 210     FSE_CTable litlengthCTable  [FSE_CTABLE_SIZE_U32(LLFSELog, MaxLL)];
 211
 212     /* Symbols that were present in the previous distribution, for use with
 213      * set_repeat */
 214     BYTE litlengthSymbolSet[36];
 215     BYTE offsetSymbolSet[29];
 216     BYTE matchlengthSymbolSet[53];
 217 } cblockStats_t;
 218
 219 typedef struct {
 220     void* data;
 221     void* dataStart;
 222     void* dataEnd;
 223
 224     void* src;
 225     void* srcStart;
 226     void* srcEnd;
 227
 228     frameHeader_t header;
 229
 230     cblockStats_t stats;
 231     cblockStats_t oldStats; /* so they can be rolled back if uncompressible */
 232 } frame_t;
 233
 234 typedef struct {
 235     int useDict;
 236     U32 dictID;
 237     size_t dictContentSize;
 238     BYTE* dictContent;
 239 } dictInfo;
 240 /*-*******************************************************
 241 *  Generator Functions
 242 *********************************************************/
 243
 244 struct {
 245     int contentSize; /* force the content size to be present */
 246 } opts; /* advanced options on generation */
 247
 248 /* Generate and write a random frame header */
 249 static void writeFrameHeader(U32* seed, frame_t* frame, dictInfo info)
 250 {
 251     BYTE* const op = frame->data;
 252     size_t pos = 0;
 253     frameHeader_t fh;
 254
 255     BYTE windowByte = 0;
 256
 257     int singleSegment = 0;
 258     int contentSizeFlag = 0;
 259     int fcsCode = 0;
 260
 261     memset(&fh, 0, sizeof(fh));
 262
 263     /* generate window size */
 264     {
 265         /* Follow window algorithm from specification */
 266         int const exponent = RAND(seed) % (MAX_WINDOW_LOG - 10);
 267         int const mantissa = RAND(seed) % 8;
 268         windowByte = (BYTE) ((exponent << 3) | mantissa);
 269         fh.windowSize = (1U << (exponent + 10));
 270         fh.windowSize += fh.windowSize / 8 * mantissa;
 271     }
 272
 273     {
 274         /* Generate random content size */
 275         size_t highBit;
 276         if (RAND(seed) & 7) {
 277             /* do content of at least 128 bytes */
 278             highBit = 1ULL << RAND_range(seed, 7, MAX_DECOMPRESSED_SIZE_LOG);
 279         } else if (RAND(seed) & 3) {
 280             /* do small content */
 281             highBit = 1ULL << RAND_range(seed, 0, 7);
 282         } else {
 283             /* 0 size frame */
 284             highBit = 0;
 285         }
 286         fh.contentSize = highBit ? highBit + (RAND(seed) % highBit) : 0;
 287
 288         /* provide size sometimes */
 289         contentSizeFlag = opts.contentSize | (RAND(seed) & 1);
 290
 291         if (contentSizeFlag && (fh.contentSize == 0 || !(RAND(seed) & 7))) {
 292             /* do single segment sometimes */
 293             fh.windowSize = (U32) fh.contentSize;
 294             singleSegment = 1;
 295         }
 296     }
 297
 298     if (contentSizeFlag) {
 299         /* Determine how large fcs field has to be */
 300         int minFcsCode = (fh.contentSize >= 256) +
 301                                (fh.contentSize >= 65536 + 256) +
 302                                (fh.contentSize > 0xFFFFFFFFU);
 303         if (!singleSegment && !minFcsCode) {
 304             minFcsCode = 1;
 305         }
 306         fcsCode = minFcsCode + (RAND(seed) % (4 - minFcsCode));
 307         if (fcsCode == 1 && fh.contentSize < 256) fcsCode++;
 308     }
 309
 310     /* write out the header */
 311     MEM_writeLE32(op + pos, ZSTD_MAGICNUMBER);
 312     pos += 4;
 313
 314     {
 315         /*
 316          * fcsCode: 2-bit flag specifying how many bytes used to represent Frame_Content_Size (bits 7-6)
 317          * singleSegment: 1-bit flag describing if data must be regenerated within a single continuous memory segment. (bit 5)
 318          * contentChecksumFlag: 1-bit flag that is set if frame includes checksum at the end -- set to 1 below (bit 2)
 319          * dictBits: 2-bit flag describing how many bytes Dictionary_ID uses -- set to 3 (bits 1-0)
 320          * For more information: https://github.com/facebook/zstd/blob/dev/doc/zstd_compression_format.md#frame_header
 321          */
 322         int const dictBits = info.useDict ? 3 : 0;
 323         BYTE const frameHeaderDescriptor =
 324                 (BYTE) ((fcsCode << 6) | (singleSegment << 5) | (1 << 2) | dictBits);
 325         op[pos++] = frameHeaderDescriptor;
 326     }
 327
 328     if (!singleSegment) {
 329         op[pos++] = windowByte;
 330     }
 331     if (info.useDict) {
 332         MEM_writeLE32(op + pos, (U32) info.dictID);
 333         pos += 4;
 334     }
 335     if (contentSizeFlag) {
 336         switch (fcsCode) {
 337         default: /* Impossible */
 338         case 0: op[pos++] = (BYTE) fh.contentSize; break;
 339         case 1: MEM_writeLE16(op + pos, (U16) (fh.contentSize - 256)); pos += 2; break;
 340         case 2: MEM_writeLE32(op + pos, (U32) fh.contentSize); pos += 4; break;
 341         case 3: MEM_writeLE64(op + pos, (U64) fh.contentSize); pos += 8; break;
 342         }
 343     }
 344
 345     DISPLAYLEVEL(2, " frame content size:\t%u\n", (U32)fh.contentSize);
 346     DISPLAYLEVEL(2, " frame window size:\t%u\n", fh.windowSize);
 347     DISPLAYLEVEL(2, " content size flag:\t%d\n", contentSizeFlag);
 348     DISPLAYLEVEL(2, " single segment flag:\t%d\n", singleSegment);
 349
 350     frame->data = op + pos;
 351     frame->header = fh;
 352 }
 353
 354 /* Write a literal block in either raw or RLE form, return the literals size */
 355 static size_t writeLiteralsBlockSimple(U32* seed, frame_t* frame, size_t contentSize)
 356 {
 357     BYTE* op = (BYTE*)frame->data;
 358     int const type = RAND(seed) % 2;
 359     int const sizeFormatDesc = RAND(seed) % 8;
 360     size_t litSize;
 361     size_t maxLitSize = MIN(contentSize, MAX_BLOCK_SIZE);
 362
 363     if (sizeFormatDesc == 0) {
 364         /* Size_FormatDesc = ?0 */
 365         maxLitSize = MIN(maxLitSize, 31);
 366     } else if (sizeFormatDesc <= 4) {
 367         /* Size_FormatDesc = 01 */
 368         maxLitSize = MIN(maxLitSize, 4095);
 369     } else {
 370         /* Size_Format = 11 */
 371         maxLitSize = MIN(maxLitSize, 1048575);
 372     }
 373
 374     litSize = RAND(seed) % (maxLitSize + 1);
 375     if (frame->src == frame->srcStart && litSize == 0) {
 376         litSize = 1; /* no empty literals if there's nothing preceding this block */
 377     }
 378     if (litSize + 3 > contentSize) {
 379         litSize = contentSize; /* no matches shorter than 3 are allowed */
 380     }
 381     /* use smallest size format that fits */
 382     if (litSize < 32) {
 383         op[0] = (type | (0 << 2) | (litSize << 3)) & 0xff;
 384         op += 1;
 385     } else if (litSize < 4096) {
 386         op[0] = (type | (1 << 2) | (litSize << 4)) & 0xff;
 387         op[1] = (litSize >> 4) & 0xff;
 388         op += 2;
 389     } else {
 390         op[0] = (type | (3 << 2) | (litSize << 4)) & 0xff;
 391         op[1] = (litSize >> 4) & 0xff;
 392         op[2] = (litSize >> 12) & 0xff;
 393         op += 3;
 394     }
 395
 396     if (type == 0) {
 397         /* Raw literals */
 398         DISPLAYLEVEL(4, "   raw literals\n");
 399
 400         RAND_buffer(seed, LITERAL_BUFFER, litSize);
 401         memcpy(op, LITERAL_BUFFER, litSize);
 402         op += litSize;
 403     } else {
 404         /* RLE literals */
 405         BYTE const symb = (BYTE) (RAND(seed) % 256);
 406
 407         DISPLAYLEVEL(4, "   rle literals: 0x%02x\n", (U32)symb);
 408
 409         memset(LITERAL_BUFFER, symb, litSize);
 410         op[0] = symb;
 411         op++;
 412     }
 413
 414     frame->data = op;
 415
 416     return litSize;
 417 }
 418
 419 /* Generate a Huffman header for the given source */
 420 static size_t writeHufHeader(U32* seed, HUF_CElt* hufTable, void* dst, size_t dstSize,
 421                                  const void* src, size_t srcSize)
 422 {
 423     BYTE* const ostart = (BYTE*)dst;
 424     BYTE* op = ostart;
 425
 426     unsigned huffLog = 11;
 427     U32 maxSymbolValue = 255;
 428
 429     U32 count[HUF_SYMBOLVALUE_MAX+1];
 430
 431     /* Scan input and build symbol stats */
 432     {   size_t const largest = FSE_count_wksp (count, &maxSymbolValue, (const BYTE*)src, srcSize, WKSP);
 433         if (largest == srcSize) { *ostart = ((const BYTE*)src)[0]; return 0; }   /* single symbol, rle */
 434         if (largest <= (srcSize >> 7)+1) return 0;   /* Fast heuristic : not compressible enough */
 435     }
 436
 437     /* Build Huffman Tree */
 438     /* Max Huffman log is 11, min is highbit(maxSymbolValue)+1 */
 439     huffLog = RAND_range(seed, ZSTD_highbit32(maxSymbolValue)+1, huffLog+1);
 440     DISPLAYLEVEL(6, "     huffman log: %u\n", huffLog);
 441     {   size_t const maxBits = HUF_buildCTable_wksp (hufTable, count, maxSymbolValue, huffLog, WKSP, sizeof(WKSP));
 442         CHECKERR(maxBits);
 443         huffLog = (U32)maxBits;
 444     }
 445
 446     /* Write table description header */
 447     {   size_t const hSize = HUF_writeCTable (op, dstSize, hufTable, maxSymbolValue, huffLog);
 448         if (hSize + 12 >= srcSize) return 0;   /* not useful to try compression */
 449         op += hSize;
 450     }
 451
 452     return op - ostart;
 453 }
 454
 455 /* Write a Huffman coded literals block and return the litearls size */
 456 static size_t writeLiteralsBlockCompressed(U32* seed, frame_t* frame, size_t contentSize)
 457 {
 458     BYTE* origop = (BYTE*)frame->data;
 459     BYTE* opend = (BYTE*)frame->dataEnd;
 460     BYTE* op;
 461     BYTE* const ostart = origop;
 462     int const sizeFormat = RAND(seed) % 4;
 463     size_t litSize;
 464     size_t hufHeaderSize = 0;
 465     size_t compressedSize = 0;
 466     size_t maxLitSize = MIN(contentSize-3, MAX_BLOCK_SIZE);
 467
 468     symbolEncodingType_e hType;
 469
 470     if (contentSize < 64) {
 471         /* make sure we get reasonably-sized literals for compression */
 472         return ERROR(GENERIC);
 473     }
 474
 475     DISPLAYLEVEL(4, "   compressed literals\n");
 476
 477     switch (sizeFormat) {
 478     case 0: /* fall through, size is the same as case 1 */
 479     case 1:
 480         maxLitSize = MIN(maxLitSize, 1023);
 481         origop += 3;
 482         break;
 483     case 2:
 484         maxLitSize = MIN(maxLitSize, 16383);
 485         origop += 4;
 486         break;
 487     case 3:
 488         maxLitSize = MIN(maxLitSize, 262143);
 489         origop += 5;
 490         break;
 491     default:; /* impossible */
 492     }
 493
 494     do {
 495         op = origop;
 496         do {
 497             litSize = RAND(seed) % (maxLitSize + 1);
 498         } while (litSize < 32); /* avoid small literal sizes */
 499         if (litSize + 3 > contentSize) {
 500             litSize = contentSize; /* no matches shorter than 3 are allowed */
 501         }
 502
 503         /* most of the time generate a new distribution */
 504         if ((RAND(seed) & 3) || !frame->stats.hufInit) {
 505             do {
 506                 if (RAND(seed) & 3) {
 507                     /* add 10 to ensure some compressability */
 508                     double const weight = ((RAND(seed) % 90) + 10) / 100.0;
 509
 510                     DISPLAYLEVEL(5, "    distribution weight: %d%%\n",
 511                                  (int)(weight * 100));
 512
 513                     RAND_genDist(seed, frame->stats.hufDist, weight);
 514                 } else {
 515                     /* sometimes do restricted range literals to force
 516                      * non-huffman headers */
 517                     DISPLAYLEVEL(5, "    small range literals\n");
 518                     RAND_bufferMaxSymb(seed, frame->stats.hufDist, DISTSIZE,
 519                                        15);
 520                 }
 521                 RAND_bufferDist(seed, frame->stats.hufDist, LITERAL_BUFFER,
 522                                 litSize);
 523
 524                 /* generate the header from the distribution instead of the
 525                  * actual data to avoid bugs with symbols that were in the
 526                  * distribution but never showed up in the output */
 527                 hufHeaderSize = writeHufHeader(
 528                         seed, (HUF_CElt*)frame->stats.hufTable, op, opend - op,
 529                         frame->stats.hufDist, DISTSIZE);
 530                 CHECKERR(hufHeaderSize);
 531                 /* repeat until a valid header is written */
 532             } while (hufHeaderSize == 0);
 533             op += hufHeaderSize;
 534             hType = set_compressed;
 535
 536             frame->stats.hufInit = 1;
 537         } else {
 538             /* repeat the distribution/table from last time */
 539             DISPLAYLEVEL(5, "    huffman repeat stats\n");
 540             RAND_bufferDist(seed, frame->stats.hufDist, LITERAL_BUFFER,
 541                             litSize);
 542             hufHeaderSize = 0;
 543             hType = set_repeat;
 544         }
 545
 546         do {
 547             compressedSize =
 548                     sizeFormat == 0
 549                             ? HUF_compress1X_usingCTable(
 550                                       op, opend - op, LITERAL_BUFFER, litSize,
 551                                       (HUF_CElt*)frame->stats.hufTable)
 552                             : HUF_compress4X_usingCTable(
 553                                       op, opend - op, LITERAL_BUFFER, litSize,
 554                                       (HUF_CElt*)frame->stats.hufTable);
 555             CHECKERR(compressedSize);
 556             /* this only occurs when it could not compress or similar */
 557         } while (compressedSize <= 0);
 558
 559         op += compressedSize;
 560
 561         compressedSize += hufHeaderSize;
 562         DISPLAYLEVEL(5, "    regenerated size: %u\n", (U32)litSize);
 563         DISPLAYLEVEL(5, "    compressed size: %u\n", (U32)compressedSize);
 564         if (compressedSize >= litSize) {
 565             DISPLAYLEVEL(5, "     trying again\n");
 566             /* if we have to try again, reset the stats so we don't accidentally
 567              * try to repeat a distribution we just made */
 568             frame->stats = frame->oldStats;
 569         } else {
 570             break;
 571         }
 572     } while (1);
 573
 574     /* write header */
 575     switch (sizeFormat) {
 576     case 0: /* fall through, size is the same as case 1 */
 577     case 1: {
 578         U32 const header = hType | (sizeFormat << 2) | ((U32)litSize << 4) |
 579                            ((U32)compressedSize << 14);
 580         MEM_writeLE24(ostart, header);
 581         break;
 582     }
 583     case 2: {
 584         U32 const header = hType | (sizeFormat << 2) | ((U32)litSize << 4) |
 585                            ((U32)compressedSize << 18);
 586         MEM_writeLE32(ostart, header);
 587         break;
 588     }
 589     case 3: {
 590         U32 const header = hType | (sizeFormat << 2) | ((U32)litSize << 4) |
 591                            ((U32)compressedSize << 22);
 592         MEM_writeLE32(ostart, header);
 593         ostart[4] = (BYTE)(compressedSize >> 10);
 594         break;
 595     }
 596     default:; /* impossible */
 597     }
 598
 599     frame->data = op;
 600     return litSize;
 601 }
 602
 603 static size_t writeLiteralsBlock(U32* seed, frame_t* frame, size_t contentSize)
 604 {
 605     /* only do compressed for larger segments to avoid compressibility issues */
 606     if (RAND(seed) & 7 && contentSize >= 64) {
 607         return writeLiteralsBlockCompressed(seed, frame, contentSize);
 608     } else {
 609         return writeLiteralsBlockSimple(seed, frame, contentSize);
 610     }
 611 }
 612
 613 static inline void initSeqStore(seqStore_t *seqStore) {
 614     seqStore->sequencesStart = SEQUENCE_BUFFER;
 615     seqStore->litStart = SEQUENCE_LITERAL_BUFFER;
 616     seqStore->llCode = SEQUENCE_LLCODE;
 617     seqStore->mlCode = SEQUENCE_MLCODE;
 618     seqStore->ofCode = SEQUENCE_OFCODE;
 619
 620     ZSTD_resetSeqStore(seqStore);
 621 }
 622
 623 /* Randomly generate sequence commands */
 624 static U32 generateSequences(U32* seed, frame_t* frame, seqStore_t* seqStore,
 625                                 size_t contentSize, size_t literalsSize, dictInfo info)
 626 {
 627     /* The total length of all the matches */
 628     size_t const remainingMatch = contentSize - literalsSize;
 629     size_t excessMatch = 0;
 630     U32 numSequences = 0;
 631
 632     U32 i;
 633
 634
 635     const BYTE* literals = LITERAL_BUFFER;
 636     BYTE* srcPtr = frame->src;
 637
 638     if (literalsSize != contentSize) {
 639         /* each match must be at least MIN_SEQ_LEN, so this is the maximum
 640          * number of sequences we can have */
 641         U32 const maxSequences = (U32)remainingMatch / MIN_SEQ_LEN;
 642         numSequences = (RAND(seed) % maxSequences) + 1;
 643
 644         /* the extra match lengths we have to allocate to each sequence */
 645         excessMatch = remainingMatch - numSequences * MIN_SEQ_LEN;
 646     }
 647
 648     DISPLAYLEVEL(5, "    total match lengths: %u\n", (U32)remainingMatch);
 649     for (i = 0; i < numSequences; i++) {
 650         /* Generate match and literal lengths by exponential distribution to
 651          * ensure nice numbers */
 652         U32 matchLen =
 653                 MIN_SEQ_LEN +
 654                 ROUND(RAND_exp(seed, excessMatch / (double)(numSequences - i)));
 655         U32 literalLen =
 656                 (RAND(seed) & 7)
 657                         ? ROUND(RAND_exp(seed,
 658                                          literalsSize /
 659                                                  (double)(numSequences - i)))
 660                         : 0;
 661         /* actual offset, code to send, and point to copy up to when shifting
 662          * codes in the repeat offsets history */
 663         U32 offset, offsetCode, repIndex;
 664
 665         /* bounds checks */
 666         matchLen = (U32) MIN(matchLen, excessMatch + MIN_SEQ_LEN);
 667         literalLen = MIN(literalLen, (U32) literalsSize);
 668         if (i == 0 && srcPtr == frame->srcStart && literalLen == 0) literalLen = 1;
 669         if (i + 1 == numSequences) matchLen = MIN_SEQ_LEN + (U32) excessMatch;
 670
 671         memcpy(srcPtr, literals, literalLen);
 672         srcPtr += literalLen;
 673         do {
 674             if (RAND(seed) & 7) {
 675                 /* do a normal offset */
 676                 U32 const dataDecompressed = (U32)((BYTE*)srcPtr-(BYTE*)frame->srcStart);
 677                 offset = (RAND(seed) %
 678                           MIN(frame->header.windowSize,
 679                               (size_t)((BYTE*)srcPtr - (BYTE*)frame->srcStart))) +
 680                          1;
 681                 if (info.useDict && (RAND(seed) & 1) && i + 1 != numSequences && dataDecompressed < frame->header.windowSize) {
 682                     /* need to occasionally generate offsets that go past the start */
 683                     /* including i+1 != numSequences because the last sequences has to adhere to predetermined contentSize */
 684                     U32 lenPastStart = (RAND(seed) % info.dictContentSize) + 1;
 685                     offset = (U32)((BYTE*)srcPtr - (BYTE*)frame->srcStart)+lenPastStart;
 686                     if (offset > frame->header.windowSize) {
 687                         if (lenPastStart < MIN_SEQ_LEN) {
 688                             /* when offset > windowSize, matchLen bound by end of dictionary (lenPastStart) */
 689                             /* this also means that lenPastStart must be greater than MIN_SEQ_LEN */
 690                             /* make sure lenPastStart does not go past dictionary start though */
 691                             lenPastStart = MIN(lenPastStart+MIN_SEQ_LEN, (U32)info.dictContentSize);
 692                             offset = (U32)((BYTE*)srcPtr - (BYTE*)frame->srcStart) + lenPastStart;
 693                         }
 694                         {
 695                             U32 const matchLenBound = MIN(frame->header.windowSize, lenPastStart);
 696                             matchLen = MIN(matchLen, matchLenBound);
 697                         }
 698                     }
 699                 }
 700                 offsetCode = offset + ZSTD_REP_MOVE;
 701                 repIndex = 2;
 702             } else {
 703                 /* do a repeat offset */
 704                 offsetCode = RAND(seed) % 3;
 705                 if (literalLen > 0) {
 706                     offset = frame->stats.rep[offsetCode];
 707                     repIndex = offsetCode;
 708                 } else {
 709                     /* special case */
 710                     offset = offsetCode == 2 ? frame->stats.rep[0] - 1
 711                                            : frame->stats.rep[offsetCode + 1];
 712                     repIndex = MIN(2, offsetCode + 1);
 713                 }
 714             }
 715         } while (((!info.useDict) && (offset > (size_t)((BYTE*)srcPtr - (BYTE*)frame->srcStart))) || offset == 0);
 716
 717         {
 718             size_t j;
 719             BYTE* const dictEnd = info.dictContent + info.dictContentSize;
 720             for (j = 0; j < matchLen; j++) {
 721                 if ((U32)((BYTE*)srcPtr - (BYTE*)frame->srcStart) < offset) {
 722                     /* copy from dictionary instead of literals */
 723                     size_t const dictOffset = offset - (srcPtr - (BYTE*)frame->srcStart);
 724                     *srcPtr = *(dictEnd - dictOffset);
 725                 }
 726                 else {
 727                     *srcPtr = *(srcPtr-offset);
 728                 }
 729                 srcPtr++;
 730             }
 731         }
 732
 733         {   int r;
 734             for (r = repIndex; r > 0; r--) {
 735                 frame->stats.rep[r] = frame->stats.rep[r - 1];
 736             }
 737             frame->stats.rep[0] = offset;
 738         }
 739
 740         DISPLAYLEVEL(6, "      LL: %5u OF: %5u ML: %5u", literalLen, offset, matchLen);
 741         DISPLAYLEVEL(7, " srcPos: %8u seqNb: %3u",
 742                      (U32)((BYTE*)srcPtr - (BYTE*)frame->srcStart), i);
 743         DISPLAYLEVEL(6, "\n");
 744         if (offsetCode < 3) {
 745             DISPLAYLEVEL(7, "        repeat offset: %d\n", repIndex);
 746         }
 747         /* use libzstd sequence handling */
 748         ZSTD_storeSeq(seqStore, literalLen, literals, offsetCode,
 749                       matchLen - MINMATCH);
 750
 751         literalsSize -= literalLen;
 752         excessMatch -= (matchLen - MIN_SEQ_LEN);
 753         literals += literalLen;
 754     }
 755
 756     memcpy(srcPtr, literals, literalsSize);
 757     srcPtr += literalsSize;
 758     DISPLAYLEVEL(6, "      excess literals: %5u", (U32)literalsSize);
 759     DISPLAYLEVEL(7, " srcPos: %8u", (U32)((BYTE*)srcPtr - (BYTE*)frame->srcStart));
 760     DISPLAYLEVEL(6, "\n");
 761
 762     return numSequences;
 763 }
 764
 765 static void initSymbolSet(const BYTE* symbols, size_t len, BYTE* set, BYTE maxSymbolValue)
 766 {
 767     size_t i;
 768
 769     memset(set, 0, (size_t)maxSymbolValue+1);
 770
 771     for (i = 0; i < len; i++) {
 772         set[symbols[i]] = 1;
 773     }
 774 }
 775
 776 static int isSymbolSubset(const BYTE* symbols, size_t len, const BYTE* set, BYTE maxSymbolValue)
 777 {
 778     size_t i;
 779
 780     for (i = 0; i < len; i++) {
 781         if (symbols[i] > maxSymbolValue || !set[symbols[i]]) {
 782             return 0;
 783         }
 784     }
 785     return 1;
 786 }
 787
 788 static size_t writeSequences(U32* seed, frame_t* frame, seqStore_t* seqStorePtr,
 789                              size_t nbSeq)
 790 {
 791     /* This code is mostly copied from ZSTD_compressSequences in zstd_compress.c */
 792     U32 count[MaxSeq+1];
 793     S16 norm[MaxSeq+1];
 794     FSE_CTable* CTable_LitLength = frame->stats.litlengthCTable;
 795     FSE_CTable* CTable_OffsetBits = frame->stats.offcodeCTable;
 796     FSE_CTable* CTable_MatchLength = frame->stats.matchlengthCTable;
 797     U32 LLtype, Offtype, MLtype;   /* compressed, raw or rle */
 798     const seqDef* const sequences = seqStorePtr->sequencesStart;
 799     const BYTE* const ofCodeTable = seqStorePtr->ofCode;
 800     const BYTE* const llCodeTable = seqStorePtr->llCode;
 801     const BYTE* const mlCodeTable = seqStorePtr->mlCode;
 802     BYTE* const oend = (BYTE*)frame->dataEnd;
 803     BYTE* op = (BYTE*)frame->data;
 804     BYTE* seqHead;
 805     BYTE scratchBuffer[1<<MAX(MLFSELog,LLFSELog)];
 806
 807     /* literals compressing block removed so that can be done separately */
 808
 809     /* Sequences Header */
 810     if ((oend-op) < 3 /*max nbSeq Size*/ + 1 /*seqHead */) return ERROR(dstSize_tooSmall);
 811     if (nbSeq < 0x7F) *op++ = (BYTE)nbSeq;
 812     else if (nbSeq < LONGNBSEQ) op[0] = (BYTE)((nbSeq>>8) + 0x80), op[1] = (BYTE)nbSeq, op+=2;
 813     else op[0]=0xFF, MEM_writeLE16(op+1, (U16)(nbSeq - LONGNBSEQ)), op+=3;
 814
 815     /* seqHead : flags for FSE encoding type */
 816     seqHead = op++;
 817
 818     if (nbSeq==0) {
 819         frame->data = op;
 820
 821         return 0;
 822     }
 823
 824     /* convert length/distances into codes */
 825     ZSTD_seqToCodes(seqStorePtr);
 826
 827     /* CTable for Literal Lengths */
 828     {   U32 max = MaxLL;
 829         size_t const mostFrequent = FSE_countFast_wksp(count, &max, llCodeTable, nbSeq, WKSP);
 830         if (mostFrequent == nbSeq) {
 831             /* do RLE if we have the chance */
 832             *op++ = llCodeTable[0];
 833             FSE_buildCTable_rle(CTable_LitLength, (BYTE)max);
 834             LLtype = set_rle;
 835         } else if (frame->stats.fseInit && !(RAND(seed) & 3) &&
 836                    isSymbolSubset(llCodeTable, nbSeq,
 837                                   frame->stats.litlengthSymbolSet, 35)) {
 838             /* maybe do repeat mode if we're allowed to */
 839             LLtype = set_repeat;
 840         } else if (!(RAND(seed) & 3)) {
 841             /* maybe use the default distribution */
 842             FSE_buildCTable_wksp(CTable_LitLength, LL_defaultNorm, MaxLL, LL_defaultNormLog, scratchBuffer, sizeof(scratchBuffer));
 843             LLtype = set_basic;
 844         } else {
 845             /* fall back on a full table */
 846             size_t nbSeq_1 = nbSeq;
 847             const U32 tableLog = FSE_optimalTableLog(LLFSELog, nbSeq, max);
 848             if (count[llCodeTable[nbSeq-1]]>1) { count[llCodeTable[nbSeq-1]]--; nbSeq_1--; }
 849             FSE_normalizeCount(norm, tableLog, count, nbSeq_1, max);
 850             { size_t const NCountSize = FSE_writeNCount(op, oend-op, norm, max, tableLog);   /* overflow protected */
 851               if (FSE_isError(NCountSize)) return ERROR(GENERIC);
 852               op += NCountSize; }
 853             FSE_buildCTable_wksp(CTable_LitLength, norm, max, tableLog, scratchBuffer, sizeof(scratchBuffer));
 854             LLtype = set_compressed;
 855     }   }
 856
 857     /* CTable for Offsets */
 858     /* see Literal Lengths for descriptions of mode choices */
 859     {   U32 max = MaxOff;
 860         size_t const mostFrequent = FSE_countFast_wksp(count, &max, ofCodeTable, nbSeq, WKSP);
 861         if (mostFrequent == nbSeq) {
 862             *op++ = ofCodeTable[0];
 863             FSE_buildCTable_rle(CTable_OffsetBits, (BYTE)max);
 864             Offtype = set_rle;
 865         } else if (frame->stats.fseInit && !(RAND(seed) & 3) &&
 866                    isSymbolSubset(ofCodeTable, nbSeq,
 867                                   frame->stats.offsetSymbolSet, 28)) {
 868             Offtype = set_repeat;
 869         } else if (!(RAND(seed) & 3)) {
 870             FSE_buildCTable_wksp(CTable_OffsetBits, OF_defaultNorm, MaxOff, OF_defaultNormLog, scratchBuffer, sizeof(scratchBuffer));
 871             Offtype = set_basic;
 872         } else {
 873             size_t nbSeq_1 = nbSeq;
 874             const U32 tableLog = FSE_optimalTableLog(OffFSELog, nbSeq, max);
 875             if (count[ofCodeTable[nbSeq-1]]>1) { count[ofCodeTable[nbSeq-1]]--; nbSeq_1--; }
 876             FSE_normalizeCount(norm, tableLog, count, nbSeq_1, max);
 877             { size_t const NCountSize = FSE_writeNCount(op, oend-op, norm, max, tableLog);   /* overflow protected */
 878               if (FSE_isError(NCountSize)) return ERROR(GENERIC);
 879               op += NCountSize; }
 880             FSE_buildCTable_wksp(CTable_OffsetBits, norm, max, tableLog, scratchBuffer, sizeof(scratchBuffer));
 881             Offtype = set_compressed;
 882     }   }
 883
 884     /* CTable for MatchLengths */
 885     /* see Literal Lengths for descriptions of mode choices */
 886     {   U32 max = MaxML;
 887         size_t const mostFrequent = FSE_countFast_wksp(count, &max, mlCodeTable, nbSeq, WKSP);
 888         if (mostFrequent == nbSeq) {
 889             *op++ = *mlCodeTable;
 890             FSE_buildCTable_rle(CTable_MatchLength, (BYTE)max);
 891             MLtype = set_rle;
 892         } else if (frame->stats.fseInit && !(RAND(seed) & 3) &&
 893                    isSymbolSubset(mlCodeTable, nbSeq,
 894                                   frame->stats.matchlengthSymbolSet, 52)) {
 895             MLtype = set_repeat;
 896         } else if (!(RAND(seed) & 3)) {
 897             /* sometimes do default distribution */
 898             FSE_buildCTable_wksp(CTable_MatchLength, ML_defaultNorm, MaxML, ML_defaultNormLog, scratchBuffer, sizeof(scratchBuffer));
 899             MLtype = set_basic;
 900         } else {
 901             /* fall back on table */
 902             size_t nbSeq_1 = nbSeq;
 903             const U32 tableLog = FSE_optimalTableLog(MLFSELog, nbSeq, max);
 904             if (count[mlCodeTable[nbSeq-1]]>1) { count[mlCodeTable[nbSeq-1]]--; nbSeq_1--; }
 905             FSE_normalizeCount(norm, tableLog, count, nbSeq_1, max);
 906             { size_t const NCountSize = FSE_writeNCount(op, oend-op, norm, max, tableLog);   /* overflow protected */
 907               if (FSE_isError(NCountSize)) return ERROR(GENERIC);
 908               op += NCountSize; }
 909             FSE_buildCTable_wksp(CTable_MatchLength, norm, max, tableLog, scratchBuffer, sizeof(scratchBuffer));
 910             MLtype = set_compressed;
 911     }   }
 912     frame->stats.fseInit = 1;
 913     initSymbolSet(llCodeTable, nbSeq, frame->stats.litlengthSymbolSet, 35);
 914     initSymbolSet(ofCodeTable, nbSeq, frame->stats.offsetSymbolSet, 28);
 915     initSymbolSet(mlCodeTable, nbSeq, frame->stats.matchlengthSymbolSet, 52);
 916
 917     DISPLAYLEVEL(5, "    LL type: %d OF type: %d ML type: %d\n", LLtype, Offtype, MLtype);
 918
 919     *seqHead = (BYTE)((LLtype<<6) + (Offtype<<4) + (MLtype<<2));
 920
 921     /* Encoding Sequences */
 922     {   BIT_CStream_t blockStream;
 923         FSE_CState_t  stateMatchLength;
 924         FSE_CState_t  stateOffsetBits;
 925         FSE_CState_t  stateLitLength;
 926
 927         CHECK_E(BIT_initCStream(&blockStream, op, oend-op), dstSize_tooSmall); /* not enough space remaining */
 928
 929         /* first symbols */
 930         FSE_initCState2(&stateMatchLength, CTable_MatchLength, mlCodeTable[nbSeq-1]);
 931         FSE_initCState2(&stateOffsetBits,  CTable_OffsetBits,  ofCodeTable[nbSeq-1]);
 932         FSE_initCState2(&stateLitLength,   CTable_LitLength,   llCodeTable[nbSeq-1]);
 933         BIT_addBits(&blockStream, sequences[nbSeq-1].litLength, LL_bits[llCodeTable[nbSeq-1]]);
 934         if (MEM_32bits()) BIT_flushBits(&blockStream);
 935         BIT_addBits(&blockStream, sequences[nbSeq-1].matchLength, ML_bits[mlCodeTable[nbSeq-1]]);
 936         if (MEM_32bits()) BIT_flushBits(&blockStream);
 937         BIT_addBits(&blockStream, sequences[nbSeq-1].offset, ofCodeTable[nbSeq-1]);
 938         BIT_flushBits(&blockStream);
 939
 940         {   size_t n;
 941             for (n=nbSeq-2 ; n<nbSeq ; n--) {      /* intentional underflow */
 942                 BYTE const llCode = llCodeTable[n];
 943                 BYTE const ofCode = ofCodeTable[n];
 944                 BYTE const mlCode = mlCodeTable[n];
 945                 U32  const llBits = LL_bits[llCode];
 946                 U32  const ofBits = ofCode;                                     /* 32b*/  /* 64b*/
 947                 U32  const mlBits = ML_bits[mlCode];
 948                                                                                 /* (7)*/  /* (7)*/
 949                 FSE_encodeSymbol(&blockStream, &stateOffsetBits, ofCode);       /* 15 */  /* 15 */
 950                 FSE_encodeSymbol(&blockStream, &stateMatchLength, mlCode);      /* 24 */  /* 24 */
 951                 if (MEM_32bits()) BIT_flushBits(&blockStream);                  /* (7)*/
 952                 FSE_encodeSymbol(&blockStream, &stateLitLength, llCode);        /* 16 */  /* 33 */
 953                 if (MEM_32bits() || (ofBits+mlBits+llBits >= 64-7-(LLFSELog+MLFSELog+OffFSELog)))
 954                     BIT_flushBits(&blockStream);                                /* (7)*/
 955                 BIT_addBits(&blockStream, sequences[n].litLength, llBits);
 956                 if (MEM_32bits() && ((llBits+mlBits)>24)) BIT_flushBits(&blockStream);
 957                 BIT_addBits(&blockStream, sequences[n].matchLength, mlBits);
 958                 if (MEM_32bits()) BIT_flushBits(&blockStream);                  /* (7)*/
 959                 BIT_addBits(&blockStream, sequences[n].offset, ofBits);         /* 31 */
 960                 BIT_flushBits(&blockStream);                                    /* (7)*/
 961         }   }
 962
 963         FSE_flushCState(&blockStream, &stateMatchLength);
 964         FSE_flushCState(&blockStream, &stateOffsetBits);
 965         FSE_flushCState(&blockStream, &stateLitLength);
 966
 967         {   size_t const streamSize = BIT_closeCStream(&blockStream);
 968             if (streamSize==0) return ERROR(dstSize_tooSmall);   /* not enough space */
 969             op += streamSize;
 970     }   }
 971
 972     frame->data = op;
 973
 974     return 0;
 975 }
 976
 977 static size_t writeSequencesBlock(U32* seed, frame_t* frame, size_t contentSize,
 978                                   size_t literalsSize, dictInfo info)
 979 {
 980     seqStore_t seqStore;
 981     size_t numSequences;
 982
 983
 984     initSeqStore(&seqStore);
 985
 986     /* randomly generate sequences */
 987     numSequences = generateSequences(seed, frame, &seqStore, contentSize, literalsSize, info);
 988     /* write them out to the frame data */
 989     CHECKERR(writeSequences(seed, frame, &seqStore, numSequences));
 990
 991     return numSequences;
 992 }
 993
 994 static size_t writeCompressedBlock(U32* seed, frame_t* frame, size_t contentSize, dictInfo info)
 995 {
 996     BYTE* const blockStart = (BYTE*)frame->data;
 997     size_t literalsSize;
 998     size_t nbSeq;
 999
1000     DISPLAYLEVEL(4, "  compressed block:\n");
1001
1002     literalsSize = writeLiteralsBlock(seed, frame, contentSize);
1003
1004     DISPLAYLEVEL(4, "   literals size: %u\n", (U32)literalsSize);
1005
1006     nbSeq = writeSequencesBlock(seed, frame, contentSize, literalsSize, info);
1007
1008     DISPLAYLEVEL(4, "   number of sequences: %u\n", (U32)nbSeq);
1009
1010     return (BYTE*)frame->data - blockStart;
1011 }
1012
1013 static void writeBlock(U32* seed, frame_t* frame, size_t contentSize,
1014                        int lastBlock, dictInfo info)
1015 {
1016     int const blockTypeDesc = RAND(seed) % 8;
1017     size_t blockSize;
1018     int blockType;
1019
1020     BYTE *const header = (BYTE*)frame->data;
1021     BYTE *op = header + 3;
1022
1023     DISPLAYLEVEL(3, " block:\n");
1024     DISPLAYLEVEL(3, "  block content size: %u\n", (U32)contentSize);
1025     DISPLAYLEVEL(3, "  last block: %s\n", lastBlock ? "yes" : "no");
1026
1027     if (blockTypeDesc == 0) {
1028         /* Raw data frame */
1029
1030         RAND_buffer(seed, frame->src, contentSize);
1031         memcpy(op, frame->src, contentSize);
1032
1033         op += contentSize;
1034         blockType = 0;
1035         blockSize = contentSize;
1036     } else if (blockTypeDesc == 1) {
1037         /* RLE */
1038         BYTE const symbol = RAND(seed) & 0xff;
1039
1040         op[0] = symbol;
1041         memset(frame->src, symbol, contentSize);
1042
1043         op++;
1044         blockType = 1;
1045         blockSize = contentSize;
1046     } else {
1047         /* compressed, most common */
1048         size_t compressedSize;
1049         blockType = 2;
1050
1051         frame->oldStats = frame->stats;
1052
1053         frame->data = op;
1054         compressedSize = writeCompressedBlock(seed, frame, contentSize, info);
1055         if (compressedSize > contentSize) {
1056             blockType = 0;
1057             memcpy(op, frame->src, contentSize);
1058
1059             op += contentSize;
1060             blockSize = contentSize; /* fall back on raw block if data doesn't
1061                                         compress */
1062
1063             frame->stats = frame->oldStats; /* don't update the stats */
1064         } else {
1065             op += compressedSize;
1066             blockSize = compressedSize;
1067         }
1068     }
1069     frame->src = (BYTE*)frame->src + contentSize;
1070
1071     DISPLAYLEVEL(3, "  block type: %s\n", BLOCK_TYPES[blockType]);
1072     DISPLAYLEVEL(3, "  block size field: %u\n", (U32)blockSize);
1073
1074     header[0] = (BYTE) ((lastBlock | (blockType << 1) | (blockSize << 3)) & 0xff);
1075     MEM_writeLE16(header + 1, (U16) (blockSize >> 5));
1076
1077     frame->data = op;
1078 }
1079
1080 static void writeBlocks(U32* seed, frame_t* frame, dictInfo info)
1081 {
1082     size_t contentLeft = frame->header.contentSize;
1083     size_t const maxBlockSize = MIN(MAX_BLOCK_SIZE, frame->header.windowSize);
1084     while (1) {
1085         /* 1 in 4 chance of ending frame */
1086         int const lastBlock = contentLeft > maxBlockSize ? 0 : !(RAND(seed) & 3);
1087         size_t blockContentSize;
1088         if (lastBlock) {
1089             blockContentSize = contentLeft;
1090         } else {
1091             if (contentLeft > 0 && (RAND(seed) & 7)) {
1092                 /* some variable size blocks */
1093                 blockContentSize = RAND(seed) % (MIN(maxBlockSize, contentLeft)+1);
1094             } else if (contentLeft > maxBlockSize && (RAND(seed) & 1)) {
1095                 /* some full size blocks */
1096                 blockContentSize = maxBlockSize;
1097             } else {
1098                 /* some empty blocks */
1099                 blockContentSize = 0;
1100             }
1101         }
1102
1103         writeBlock(seed, frame, blockContentSize, lastBlock, info);
1104
1105         contentLeft -= blockContentSize;
1106         if (lastBlock) break;
1107     }
1108 }
1109
1110 static void writeChecksum(frame_t* frame)
1111 {
1112     /* write checksum so implementations can verify their output */
1113     U64 digest = XXH64(frame->srcStart, (BYTE*)frame->src-(BYTE*)frame->srcStart, 0);
1114     DISPLAYLEVEL(2, "  checksum: %08x\n", (U32)digest);
1115     MEM_writeLE32(frame->data, (U32)digest);
1116     frame->data = (BYTE*)frame->data + 4;
1117 }
1118
1119 static void outputBuffer(const void* buf, size_t size, const char* const path)
1120 {
1121     /* write data out to file */
1122     const BYTE* ip = (const BYTE*)buf;
1123     FILE* out;
1124     if (path) {
1125         out = fopen(path, "wb");
1126     } else {
1127         out = stdout;
1128     }
1129     if (!out) {
1130         fprintf(stderr, "Failed to open file at %s: ", path);
1131         perror(NULL);
1132         exit(1);
1133     }
1134
1135     {
1136         size_t fsize = size;
1137         size_t written = 0;
1138         while (written < fsize) {
1139             written += fwrite(ip + written, 1, fsize - written, out);
1140             if (ferror(out)) {
1141                 fprintf(stderr, "Failed to write to file at %s: ", path);
1142                 perror(NULL);
1143                 exit(1);
1144             }
1145         }
1146     }
1147
1148     if (path) {
1149         fclose(out);
1150     }
1151 }
1152
1153 static void initFrame(frame_t* fr)
1154 {
1155     memset(fr, 0, sizeof(*fr));
1156     fr->data = fr->dataStart = FRAME_BUFFER;
1157     fr->dataEnd = FRAME_BUFFER + sizeof(FRAME_BUFFER);
1158     fr->src = fr->srcStart = CONTENT_BUFFER;
1159     fr->srcEnd = CONTENT_BUFFER + sizeof(CONTENT_BUFFER);
1160
1161     /* init repeat codes */
1162     fr->stats.rep[0] = 1;
1163     fr->stats.rep[1] = 4;
1164     fr->stats.rep[2] = 8;
1165 }
1166
1167 /* Return the final seed */
1168 static U32 generateFrame(U32 seed, frame_t* fr, dictInfo info)
1169 {
1170     /* generate a complete frame */
1171     DISPLAYLEVEL(1, "frame seed: %u\n", seed);
1172     initFrame(fr);
1173
1174     writeFrameHeader(&seed, fr, info);
1175     writeBlocks(&seed, fr, info);
1176     writeChecksum(fr);
1177
1178     return seed;
1179 }
1180
1181 /*_*******************************************************
1182 *  Dictionary Helper Functions
1183 *********************************************************/
1184 /* returns 0 if successful, otherwise returns 1 upon error */
1185 static int genRandomDict(U32 dictID, U32 seed, size_t dictSize, BYTE* fullDict){
1186     /* allocate space for samples */
1187     int ret = 0;
1188     unsigned const numSamples = 4;
1189     size_t sampleSizes[4];
1190     BYTE* const samples = malloc(5000*sizeof(BYTE));
1191     if (samples == NULL) {
1192         DISPLAY("Error: could not allocate space for samples\n");
1193         return 1;
1194     }
1195
1196     /* generate samples */
1197     {
1198         unsigned literalValue = 1;
1199         unsigned samplesPos = 0;
1200         size_t currSize = 1;
1201         while (literalValue <= 4) {
1202             sampleSizes[literalValue - 1] = currSize;
1203             {
1204                 size_t k;
1205                 for (k = 0; k < currSize; k++) {
1206                     *(samples + (samplesPos++)) = (BYTE)literalValue;
1207                 }
1208             }
1209             literalValue++;
1210             currSize *= 16;
1211         }
1212     }
1213
1214
1215     {
1216         /* create variables */
1217         size_t dictWriteSize = 0;
1218         ZDICT_params_t zdictParams;
1219         size_t const headerSize = MAX(dictSize/4, 256);
1220         size_t const dictContentSize = dictSize - headerSize;
1221         BYTE* const dictContent = fullDict + headerSize;
1222         if (dictContentSize < ZDICT_CONTENTSIZE_MIN || dictSize < ZDICT_DICTSIZE_MIN) {
1223             DISPLAY("Error: dictionary size is too small\n");
1224             ret = 1;
1225             goto exitGenRandomDict;
1226         }
1227
1228         /* init dictionary params */
1229         memset(&zdictParams, 0, sizeof(zdictParams));
1230         zdictParams.dictID = dictID;
1231         zdictParams.notificationLevel = 1;
1232
1233         /* fill in dictionary content */
1234         RAND_buffer(&seed, (void*)dictContent, dictContentSize);
1235
1236         /* finalize dictionary with random samples */
1237         dictWriteSize = ZDICT_finalizeDictionary(fullDict, dictSize,
1238                                     dictContent, dictContentSize,
1239                                     samples, sampleSizes, numSamples,
1240                                     zdictParams);
1241
1242         if (ZDICT_isError(dictWriteSize)) {
1243             DISPLAY("Could not finalize dictionary: %s\n", ZDICT_getErrorName(dictWriteSize));
1244             ret = 1;
1245         }
1246     }
1247
1248 exitGenRandomDict:
1249     free(samples);
1250     return ret;
1251 }
1252
1253 static dictInfo initDictInfo(int useDict, size_t dictContentSize, BYTE* dictContent, U32 dictID){
1254     /* allocate space statically */
1255     dictInfo dictOp;
1256     memset(&dictOp, 0, sizeof(dictOp));
1257     dictOp.useDict = useDict;
1258     dictOp.dictContentSize = dictContentSize;
1259     dictOp.dictContent = dictContent;
1260     dictOp.dictID = dictID;
1261     return dictOp;
1262 }
1263
1264 /*-*******************************************************
1265 *  Test Mode
1266 *********************************************************/
1267
1268 BYTE DECOMPRESSED_BUFFER[MAX_DECOMPRESSED_SIZE];
1269
1270 static size_t testDecodeSimple(frame_t* fr)
1271 {
1272     /* test decoding the generated data with the simple API */
1273     size_t const ret = ZSTD_decompress(DECOMPRESSED_BUFFER, MAX_DECOMPRESSED_SIZE,
1274                            fr->dataStart, (BYTE*)fr->data - (BYTE*)fr->dataStart);
1275
1276     if (ZSTD_isError(ret)) return ret;
1277
1278     if (memcmp(DECOMPRESSED_BUFFER, fr->srcStart,
1279                (BYTE*)fr->src - (BYTE*)fr->srcStart) != 0) {
1280         return ERROR(corruption_detected);
1281     }
1282
1283     return ret;
1284 }
1285
1286 static size_t testDecodeStreaming(frame_t* fr)
1287 {
1288     /* test decoding the generated data with the streaming API */
1289     ZSTD_DStream* zd = ZSTD_createDStream();
1290     ZSTD_inBuffer in;
1291     ZSTD_outBuffer out;
1292     size_t ret;
1293
1294     if (!zd) return ERROR(memory_allocation);
1295
1296     in.src = fr->dataStart;
1297     in.pos = 0;
1298     in.size = (BYTE*)fr->data - (BYTE*)fr->dataStart;
1299
1300     out.dst = DECOMPRESSED_BUFFER;
1301     out.pos = 0;
1302     out.size = ZSTD_DStreamOutSize();
1303
1304     ZSTD_initDStream(zd);
1305     while (1) {
1306         ret = ZSTD_decompressStream(zd, &out, &in);
1307         if (ZSTD_isError(ret)) goto cleanup; /* error */
1308         if (ret == 0) break; /* frame is done */
1309
1310         /* force decoding to be done in chunks */
1311         out.size += MIN(ZSTD_DStreamOutSize(), MAX_DECOMPRESSED_SIZE - out.size);
1312     }
1313
1314     ret = out.pos;
1315
1316     if (memcmp(out.dst, fr->srcStart, out.pos) != 0) {
1317         return ERROR(corruption_detected);
1318     }
1319
1320 cleanup:
1321     ZSTD_freeDStream(zd);
1322     return ret;
1323 }
1324
1325 static size_t testDecodeWithDict(U32 seed)
1326 {
1327     /* create variables */
1328     size_t const dictSize = RAND(&seed) % (10 << 20) + ZDICT_DICTSIZE_MIN + ZDICT_CONTENTSIZE_MIN;
1329     U32 const dictID = RAND(&seed);
1330     size_t errorDetected = 0;
1331     BYTE* const fullDict = malloc(dictSize);
1332     if (fullDict == NULL) {
1333         return ERROR(GENERIC);
1334     }
1335
1336     /* generate random dictionary */
1337     {
1338         int const ret = genRandomDict(dictID, seed, dictSize, fullDict);
1339         if (ret != 0) {
1340             errorDetected = ERROR(GENERIC);
1341             goto dictTestCleanup;
1342         }
1343     }
1344
1345
1346     {
1347         frame_t fr;
1348
1349         /* generate frame */
1350         {
1351             size_t const headerSize = MAX(dictSize/4, 256);
1352             size_t const dictContentSize = dictSize-headerSize;
1353             BYTE* const dictContent = fullDict+headerSize;
1354             dictInfo const info = initDictInfo(1, dictContentSize, dictContent, dictID);
1355             seed = generateFrame(seed, &fr, info);
1356         }
1357
1358         /* manually decompress and check difference */
1359         {
1360             ZSTD_DCtx* const dctx = ZSTD_createDCtx();
1361             {
1362                 size_t const returnValue = ZSTD_decompress_usingDict(dctx, DECOMPRESSED_BUFFER, MAX_DECOMPRESSED_SIZE,
1363                                                        fr.dataStart, (BYTE*)fr.data - (BYTE*)fr.dataStart,
1364                                                        fullDict, dictSize);
1365                 if (ZSTD_isError(returnValue)) {
1366                     errorDetected = returnValue;
1367                     goto dictTestCleanup;
1368                 }
1369             }
1370
1371             if (memcmp(DECOMPRESSED_BUFFER, fr.srcStart, (BYTE*)fr.src - (BYTE*)fr.srcStart) != 0) {
1372                 errorDetected = ERROR(corruption_detected);
1373                 goto dictTestCleanup;
1374             }
1375             ZSTD_freeDCtx(dctx);
1376         }
1377     }
1378
1379 dictTestCleanup:
1380     free(fullDict);
1381     return errorDetected;
1382 }
1383
1384 static int runTestMode(U32 seed, unsigned numFiles, unsigned const testDurationS)
1385 {
1386     unsigned fnum;
1387
1388     clock_t const startClock = clock();
1389     clock_t const maxClockSpan = testDurationS * CLOCKS_PER_SEC;
1390
1391     if (numFiles == 0 && !testDurationS) numFiles = 1;
1392
1393     DISPLAY("seed: %u\n", seed);
1394
1395     for (fnum = 0; fnum < numFiles || clockSpan(startClock) < maxClockSpan; fnum++) {
1396         frame_t fr;
1397         U32 const seedCopy = seed;
1398         if (fnum < numFiles)
1399             DISPLAYUPDATE("\r%u/%u        ", fnum, numFiles);
1400         else
1401             DISPLAYUPDATE("\r%u           ", fnum);
1402
1403         {
1404             dictInfo const info = initDictInfo(0, 0, NULL, 0);
1405             seed = generateFrame(seed, &fr, info);
1406         }
1407
1408         {   size_t const r = testDecodeSimple(&fr);
1409             if (ZSTD_isError(r)) {
1410                 DISPLAY("Error in simple mode on test seed %u: %s\n", seedCopy,
1411                         ZSTD_getErrorName(r));
1412                 return 1;
1413             }
1414         }
1415         {   size_t const r = testDecodeStreaming(&fr);
1416             if (ZSTD_isError(r)) {
1417                 DISPLAY("Error in streaming mode on test seed %u: %s\n", seedCopy,
1418                         ZSTD_getErrorName(r));
1419                 return 1;
1420             }
1421         }
1422         {
1423             /* don't create a dictionary that is too big */
1424             size_t const r = testDecodeWithDict(seed);
1425             if (ZSTD_isError(r)) {
1426                 DISPLAY("Error in dictionary mode on test seed %u: %s\n", seedCopy, ZSTD_getErrorName(r));
1427                 return 1;
1428             }
1429         }
1430     }
1431
1432     DISPLAY("\r%u tests completed: ", fnum);
1433     DISPLAY("OK\n");
1434
1435     return 0;
1436 }
1437
1438 /*-*******************************************************
1439 *  File I/O
1440 *********************************************************/
1441
1442 static int generateFile(U32 seed, const char* const path,
1443                          const char* const origPath)
1444 {
1445     frame_t fr;
1446
1447     DISPLAY("seed: %u\n", seed);
1448
1449     {
1450         dictInfo const info = initDictInfo(0, 0, NULL, 0);
1451         generateFrame(seed, &fr, info);
1452     }
1453
1454     outputBuffer(fr.dataStart, (BYTE*)fr.data - (BYTE*)fr.dataStart, path);
1455     if (origPath) {
1456         outputBuffer(fr.srcStart, (BYTE*)fr.src - (BYTE*)fr.srcStart, origPath);
1457     }
1458     return 0;
1459 }
1460
1461 static int generateCorpus(U32 seed, unsigned numFiles, const char* const path,
1462                          const char* const origPath)
1463 {
1464     char outPath[MAX_PATH];
1465     unsigned fnum;
1466
1467     DISPLAY("seed: %u\n", seed);
1468
1469     for (fnum = 0; fnum < numFiles; fnum++) {
1470         frame_t fr;
1471
1472         DISPLAYUPDATE("\r%u/%u        ", fnum, numFiles);
1473
1474         {
1475             dictInfo const info = initDictInfo(0, 0, NULL, 0);
1476             seed = generateFrame(seed, &fr, info);
1477         }
1478
1479         if (snprintf(outPath, MAX_PATH, "%s/z%06u.zst", path, fnum) + 1 > MAX_PATH) {
1480             DISPLAY("Error: path too long\n");
1481             return 1;
1482         }
1483         outputBuffer(fr.dataStart, (BYTE*)fr.data - (BYTE*)fr.dataStart, outPath);
1484
1485         if (origPath) {
1486             if (snprintf(outPath, MAX_PATH, "%s/z%06u", origPath, fnum) + 1 > MAX_PATH) {
1487                 DISPLAY("Error: path too long\n");
1488                 return 1;
1489             }
1490             outputBuffer(fr.srcStart, (BYTE*)fr.src - (BYTE*)fr.srcStart, outPath);
1491         }
1492     }
1493
1494     DISPLAY("\r%u/%u      \n", fnum, numFiles);
1495
1496     return 0;
1497 }
1498
1499 static int generateCorpusWithDict(U32 seed, unsigned numFiles, const char* const path,
1500                                     const char* const origPath, const size_t dictSize)
1501 {
1502     char outPath[MAX_PATH];
1503     BYTE* fullDict;
1504     U32 const dictID = RAND(&seed);
1505     int errorDetected = 0;
1506
1507     if (snprintf(outPath, MAX_PATH, "%s/dictionary", path) + 1 > MAX_PATH) {
1508         DISPLAY("Error: path too long\n");
1509         return 1;
1510     }
1511
1512     /* allocate space for the dictionary */
1513     fullDict = malloc(dictSize);
1514     if (fullDict == NULL) {
1515         DISPLAY("Error: could not allocate space for full dictionary.\n");
1516         return 1;
1517     }
1518
1519     /* randomly generate the dictionary */
1520     {
1521         int const ret = genRandomDict(dictID, seed, dictSize, fullDict);
1522         if (ret != 0) {
1523             errorDetected = ret;
1524             goto dictCleanup;
1525         }
1526     }
1527
1528     /* write out dictionary */
1529     if (numFiles != 0) {
1530         if (snprintf(outPath, MAX_PATH, "%s/dictionary", path) + 1 > MAX_PATH) {
1531             DISPLAY("Error: dictionary path too long\n");
1532             errorDetected = 1;
1533             goto dictCleanup;
1534         }
1535         outputBuffer(fullDict, dictSize, outPath);
1536     }
1537     else {
1538         outputBuffer(fullDict, dictSize, "dictionary");
1539     }
1540
1541     /* generate random compressed/decompressed files */
1542     {
1543         unsigned fnum;
1544         for (fnum = 0; fnum < MAX(numFiles, 1); fnum++) {
1545             frame_t fr;
1546             DISPLAYUPDATE("\r%u/%u        ", fnum, numFiles);
1547             {
1548                 size_t const headerSize = MAX(dictSize/4, 256);
1549                 size_t const dictContentSize = dictSize-headerSize;
1550                 BYTE* const dictContent = fullDict+headerSize;
1551                 dictInfo const info = initDictInfo(1, dictContentSize, dictContent, dictID);
1552                 seed = generateFrame(seed, &fr, info);
1553             }
1554
1555             if (numFiles != 0) {
1556                 if (snprintf(outPath, MAX_PATH, "%s/z%06u.zst", path, fnum) + 1 > MAX_PATH) {
1557                     DISPLAY("Error: path too long\n");
1558                     errorDetected = 1;
1559                     goto dictCleanup;
1560                 }
1561                 outputBuffer(fr.dataStart, (BYTE*)fr.data - (BYTE*)fr.dataStart, outPath);
1562
1563                 if (origPath) {
1564                     if (snprintf(outPath, MAX_PATH, "%s/z%06u", origPath, fnum) + 1 > MAX_PATH) {
1565                         DISPLAY("Error: path too long\n");
1566                         errorDetected = 1;
1567                         goto dictCleanup;
1568                     }
1569                     outputBuffer(fr.srcStart, (BYTE*)fr.src - (BYTE*)fr.srcStart, outPath);
1570                 }
1571             }
1572             else {
1573                 outputBuffer(fr.dataStart, (BYTE*)fr.data - (BYTE*)fr.dataStart, path);
1574                 if (origPath) {
1575                     outputBuffer(fr.srcStart, (BYTE*)fr.src - (BYTE*)fr.srcStart, origPath);
1576                 }
1577             }
1578         }
1579     }
1580
1581 dictCleanup:
1582     free(fullDict);
1583     return errorDetected;
1584 }
1585
1586
1587 /*_*******************************************************
1588 *  Command line
1589 *********************************************************/
1590 static U32 makeSeed(void)
1591 {
1592     U32 t = (U32) time(NULL);
1593     return XXH32(&t, sizeof(t), 0) % 65536;
1594 }
1595
1596 static unsigned readInt(const char** argument)
1597 {
1598     unsigned val = 0;
1599     while ((**argument>='0') && (**argument<='9')) {
1600         val *= 10;
1601         val += **argument - '0';
1602         (*argument)++;
1603     }
1604     return val;
1605 }
1606
1607 static void usage(const char* programName)
1608 {
1609     DISPLAY( "Usage :\n");
1610     DISPLAY( "      %s [args]\n", programName);
1611     DISPLAY( "\n");
1612     DISPLAY( "Arguments :\n");
1613     DISPLAY( " -p<path> : select output path (default:stdout)\n");
1614     DISPLAY( "                in multiple files mode this should be a directory\n");
1615     DISPLAY( " -o<path> : select path to output original file (default:no output)\n");
1616     DISPLAY( "                in multiple files mode this should be a directory\n");
1617     DISPLAY( " -s#      : select seed (default:random based on time)\n");
1618     DISPLAY( " -n#      : number of files to generate (default:1)\n");
1619     DISPLAY( " -t       : activate test mode (test files against libzstd instead of outputting them)\n");
1620     DISPLAY( " -T#      : length of time to run tests for\n");
1621     DISPLAY( " -v       : increase verbosity level (default:0, max:7)\n");
1622     DISPLAY( " -h/H     : display help/long help and exit\n");
1623 }
1624
1625 static void advancedUsage(const char* programName)
1626 {
1627     usage(programName);
1628     DISPLAY( "\n");
1629     DISPLAY( "Advanced arguments :\n");
1630     DISPLAY( " --content-size    : always include the content size in the frame header\n");
1631     DISPLAY( " --use-dict=#      : include a dictionary used to decompress the corpus\n");
1632 }
1633
1634 /*! readU32FromChar() :
1635     @return : unsigned integer value read from input in `char` format
1636     allows and interprets K, KB, KiB, M, MB and MiB suffix.
1637     Will also modify `*stringPtr`, advancing it to position where it stopped reading.
1638     Note : function result can overflow if digit string > MAX_UINT */
1639 static unsigned readU32FromChar(const char** stringPtr)
1640 {
1641     unsigned result = 0;
1642     while ((**stringPtr >='0') && (**stringPtr <='9'))
1643         result *= 10, result += **stringPtr - '0', (*stringPtr)++ ;
1644     if ((**stringPtr=='K') || (**stringPtr=='M')) {
1645         result <<= 10;
1646         if (**stringPtr=='M') result <<= 10;
1647         (*stringPtr)++ ;
1648         if (**stringPtr=='i') (*stringPtr)++;
1649         if (**stringPtr=='B') (*stringPtr)++;
1650     }
1651     return result;
1652 }
1653
1654 /** longCommandWArg() :
1655  *  check if *stringPtr is the same as longCommand.
1656  *  If yes, @return 1 and advances *stringPtr to the position which immediately follows longCommand.
1657  *  @return 0 and doesn't modify *stringPtr otherwise.
1658  */
1659 static unsigned longCommandWArg(const char** stringPtr, const char* longCommand)
1660 {
1661     size_t const comSize = strlen(longCommand);
1662     int const result = !strncmp(*stringPtr, longCommand, comSize);
1663     if (result) *stringPtr += comSize;
1664     return result;
1665 }
1666
1667 int main(int argc, char** argv)
1668 {
1669     U32 seed = 0;
1670     int seedset = 0;
1671     unsigned numFiles = 0;
1672     unsigned testDuration = 0;
1673     int testMode = 0;
1674     const char* path = NULL;
1675     const char* origPath = NULL;
1676     int useDict = 0;
1677     unsigned dictSize = (10 << 10); /* 10 kB default */
1678
1679     int argNb;
1680
1681     /* Check command line */
1682     for (argNb=1; argNb<argc; argNb++) {
1683         const char* argument = argv[argNb];
1684         if(!argument) continue;   /* Protection if argument empty */
1685
1686         /* Handle commands. Aggregated commands are allowed */
1687         if (argument[0]=='-') {
1688             argument++;
1689             while (*argument!=0) {
1690                 switch(*argument)
1691                 {
1692                 case 'h':
1693                     usage(argv[0]);
1694                     return 0;
1695                 case 'H':
1696                     advancedUsage(argv[0]);
1697                     return 0;
1698                 case 'v':
1699                     argument++;
1700                     g_displayLevel++;
1701                     break;
1702                 case 's':
1703                     argument++;
1704                     seedset=1;
1705                     seed = readInt(&argument);
1706                     break;
1707                 case 'n':
1708                     argument++;
1709                     numFiles = readInt(&argument);
1710                     break;
1711                 case 'T':
1712                     argument++;
1713                     testDuration = readInt(&argument);
1714                     if (*argument == 'm') {
1715                         testDuration *= 60;
1716                         argument++;
1717                         if (*argument == 'n') argument++;
1718                     }
1719                     break;
1720                 case 'o':
1721                     argument++;
1722                     origPath = argument;
1723                     argument += strlen(argument);
1724                     break;
1725                 case 'p':
1726                     argument++;
1727                     path = argument;
1728                     argument += strlen(argument);
1729                     break;
1730                 case 't':
1731                     argument++;
1732                     testMode = 1;
1733                     break;
1734                 case '-':
1735                     argument++;
1736                     if (strcmp(argument, "content-size") == 0) {
1737                         opts.contentSize = 1;
1738                     } else if (longCommandWArg(&argument, "use-dict=")) {
1739                         dictSize = readU32FromChar(&argument);
1740                         useDict = 1;
1741                     } else {
1742                         advancedUsage(argv[0]);
1743                         return 1;
1744                     }
1745                     argument += strlen(argument);
1746                     break;
1747                 default:
1748                     usage(argv[0]);
1749                     return 1;
1750     }   }   }   }   /* for (argNb=1; argNb<argc; argNb++) */
1751
1752     if (!seedset) {
1753         seed = makeSeed();
1754     }
1755
1756     if (testMode) {
1757         return runTestMode(seed, numFiles, testDuration);
1758     } else {
1759         if (testDuration) {
1760             DISPLAY("Error: -T requires test mode (-t)\n\n");
1761             usage(argv[0]);
1762             return 1;
1763         }
1764     }
1765
1766     if (!path) {
1767         DISPLAY("Error: path is required in file generation mode\n");
1768         usage(argv[0]);
1769         return 1;
1770     }
1771
1772     if (numFiles == 0 && useDict == 0) {
1773         return generateFile(seed, path, origPath);
1774     } else if (useDict == 0){
1775         return generateCorpus(seed, numFiles, path, origPath);
1776     } else {
1777         /* should generate files with a dictionary */
1778         return generateCorpusWithDict(seed, numFiles, path, origPath, dictSize);
1779     }
1780
1781 }