1 /*-*************************************
3 ***************************************/
4 #include <stdio.h> /* fprintf */
5 #include <stdlib.h> /* malloc, free, qsort */
6 #include <string.h> /* memset */
7 #include <time.h> /* clock */
9 #include "util.h" /* UTIL_getFileSize, UTIL_getTotalFileSize */
10 #ifndef ZDICT_STATIC_LINKING_ONLY
11 #define ZDICT_STATIC_LINKING_ONLY
15 /*-*************************************
17 ***************************************/
18 #define DISPLAY(...) fprintf(stderr, __VA_ARGS__)
19 #define DISPLAYLEVEL(l, ...) if (displayLevel>=l) { DISPLAY(__VA_ARGS__); }
21 #define LOCALDISPLAYUPDATE(displayLevel, l, ...) \
22 if (displayLevel >= l) { \
23 if ((clock() - g_time > refreshRate) || (displayLevel >= 4)) { \
25 DISPLAY(__VA_ARGS__); \
28 #define DISPLAYUPDATE(l, ...) LOCALDISPLAYUPDATE(displayLevel, l, __VA_ARGS__)
29 static const clock_t refreshRate = CLOCKS_PER_SEC * 15 / 100;
30 static clock_t g_time = 0;
34 /* ********************************************************
35 * Random Dictionary Builder
36 **********************************************************/
38 * Returns the sum of the sample sizes.
40 static size_t RANDOM_sum(const size_t *samplesSizes, unsigned nbSamples) {
43 for (i = 0; i < nbSamples; ++i) {
44 sum += samplesSizes[i];
51 * A segment is an inclusive range in the source.
60 * Selects a random segment from totalSamplesSize - k + 1 possible segments
62 static RANDOM_segment_t RANDOM_selectSegment(const size_t totalSamplesSize,
63 ZDICT_random_params_t parameters) {
64 const U32 k = parameters.k;
65 RANDOM_segment_t segment;
68 /* Randomly generate a number from 0 to sampleSizes - k */
69 index = rand()%(totalSamplesSize - k + 1);
72 segment.begin = index;
73 segment.end = index + k - 1;
80 * Check the validity of the parameters.
81 * Returns non-zero if the parameters are valid and 0 otherwise.
83 static int RANDOM_checkParameters(ZDICT_random_params_t parameters,
85 /* k is a required parameter */
86 if (parameters.k == 0) {
89 /* k <= maxDictSize */
90 if (parameters.k > maxDictSize) {
98 * Given the prepared context build the dictionary.
100 static size_t RANDOM_buildDictionary(const size_t totalSamplesSize, const BYTE *samples,
101 void *dictBuffer, size_t dictBufferCapacity,
102 ZDICT_random_params_t parameters) {
103 BYTE *const dict = (BYTE *)dictBuffer;
104 size_t tail = dictBufferCapacity;
105 const int displayLevel = parameters.zParams.notificationLevel;
108 /* Select a segment */
109 RANDOM_segment_t segment = RANDOM_selectSegment(totalSamplesSize, parameters);
112 segmentSize = MIN(segment.end - segment.begin + 1, tail);
115 memcpy(dict + tail, samples + segment.begin, segmentSize);
118 (U32)(((dictBufferCapacity - tail) * 100) / dictBufferCapacity));
127 ZDICTLIB_API size_t ZDICT_trainFromBuffer_random(
128 void *dictBuffer, size_t dictBufferCapacity,
129 const void *samplesBuffer, const size_t *samplesSizes, unsigned nbSamples,
130 ZDICT_random_params_t parameters) {
131 const int displayLevel = parameters.zParams.notificationLevel;
132 BYTE* const dict = (BYTE*)dictBuffer;
134 if (!RANDOM_checkParameters(parameters, dictBufferCapacity)) {
135 DISPLAYLEVEL(1, "k is incorrect\n");
136 return ERROR(GENERIC);
138 if (nbSamples == 0) {
139 DISPLAYLEVEL(1, "Random must have at least one input file\n");
140 return ERROR(GENERIC);
142 if (dictBufferCapacity < ZDICT_DICTSIZE_MIN) {
143 DISPLAYLEVEL(1, "dictBufferCapacity must be at least %u\n",
145 return ERROR(dstSize_tooSmall);
147 const size_t totalSamplesSize = RANDOM_sum(samplesSizes, nbSamples);
148 const BYTE *const samples = (const BYTE *)samplesBuffer;
150 DISPLAYLEVEL(2, "Building dictionary\n");
152 const size_t tail = RANDOM_buildDictionary(totalSamplesSize, samples,
153 dictBuffer, dictBufferCapacity, parameters);
154 const size_t dictSize = ZDICT_finalizeDictionary(
155 dict, dictBufferCapacity, dict + tail, dictBufferCapacity - tail,
156 samplesBuffer, samplesSizes, nbSamples, parameters.zParams);
157 if (!ZSTD_isError(dictSize)) {
158 DISPLAYLEVEL(2, "Constructed dictionary of size %u\n",