2 * Copyright (c) 2017-present, Facebook, Inc.
5 * This source code is licensed under both the BSD-style license (found in the
6 * LICENSE file in the root directory of this source tree) and the GPLv2 (found
7 * in the COPYING file in the root directory of this source tree).
8 * You may select, at your option, one of the above-listed licenses.
11 /* *********************************************************
12 * Turn on Large Files support (>4GB) for 32-bit Linux/Unix
13 ***********************************************************/
14 #if !defined(__64BIT__) || defined(__MINGW32__) /* No point defining Large file for 64 bit but MinGW-w64 requires it */
15 # if !defined(_FILE_OFFSET_BITS)
16 # define _FILE_OFFSET_BITS 64 /* turn off_t into a 64-bit type for ftello, fseeko */
18 # if !defined(_LARGEFILE_SOURCE) /* obsolete macro, replaced with _FILE_OFFSET_BITS */
19 # define _LARGEFILE_SOURCE 1 /* Large File Support extension (LFS) - fseeko, ftello */
21 # if defined(_AIX) || defined(__hpux)
22 # define _LARGE_FILES /* Large file support on 32-bits AIX and HP-UX */
26 /* ************************************************************
27 * Avoid fseek()'s 2GiB barrier with MSVC, MacOS, *BSD, MinGW
28 ***************************************************************/
29 #if defined(_MSC_VER) && _MSC_VER >= 1400
30 # define LONG_SEEK _fseeki64
31 #elif !defined(__64BIT__) && (PLATFORM_POSIX_VERSION >= 200112L) /* No point defining Large file for 64 bit */
32 # define LONG_SEEK fseeko
33 #elif defined(__MINGW32__) && !defined(__STRICT_ANSI__) && !defined(__NO_MINGW_LFS) && defined(__MSVCRT__)
34 # define LONG_SEEK fseeko64
35 #elif defined(_WIN32) && !defined(__DJGPP__)
37 static int LONG_SEEK(FILE* file, __int64 offset, int origin) {
40 off.QuadPart = offset;
41 if (origin == SEEK_END)
43 else if (origin == SEEK_CUR)
44 method = FILE_CURRENT;
48 if (SetFilePointerEx((HANDLE) _get_osfhandle(_fileno(file)), off, NULL, method))
54 # define LONG_SEEK fseek
57 #include <stdlib.h> /* malloc, free */
58 #include <stdio.h> /* FILE* */
60 #define XXH_STATIC_LINKING_ONLY
61 #define XXH_NAMESPACE ZSTD_
64 #define ZSTD_STATIC_LINKING_ONLY
66 #include "zstd_errors.h"
68 #include "zstd_seekable.h"
71 #define ERROR(name) ((size_t)-ZSTD_error_##name)
73 #define CHECK_IO(f) { int const errcod = (f); if (errcod < 0) return ERROR(seekableIO); }
77 #define MIN(a, b) ((a) < (b) ? (a) : (b))
78 #define MAX(a, b) ((a) > (b) ? (a) : (b))
80 /* Special-case callbacks for FILE* and in-memory modes, so that we can treat
81 * them the same way as the advanced API */
82 static int ZSTD_seekable_read_FILE(void* opaque, void* buffer, size_t n)
84 size_t const result = fread(buffer, 1, n, (FILE*)opaque);
91 static int ZSTD_seekable_seek_FILE(void* opaque, S64 offset, int origin)
93 int const ret = LONG_SEEK((FILE*)opaque, offset, origin);
95 return fflush((FILE*)opaque);
104 static int ZSTD_seekable_read_buff(void* opaque, void* buffer, size_t n)
106 buffWrapper_t* buff = (buffWrapper_t*) opaque;
107 if (buff->size + n > buff->pos) return -1;
108 memcpy(buffer, (const BYTE*)buff->ptr + buff->pos, n);
113 static int ZSTD_seekable_seek_buff(void* opaque, S64 offset, int origin)
115 buffWrapper_t* buff = (buffWrapper_t*) opaque;
116 unsigned long long newOffset;
122 newOffset = (unsigned long long)buff->pos + offset;
125 newOffset = (unsigned long long)buff->size - offset;
128 if (newOffset < 0 || newOffset > buff->size) {
131 buff->pos = newOffset;
142 seekEntry_t* entries;
148 #define SEEKABLE_BUFF_SIZE ZSTD_BLOCKSIZE_ABSOLUTEMAX
150 struct ZSTD_seekable_s {
151 ZSTD_DStream* dstream;
152 seekTable_t seekTable;
153 ZSTD_seekable_customFile src;
155 U64 decompressedOffset;
158 BYTE inBuff[SEEKABLE_BUFF_SIZE]; /* need to do our own input buffering */
159 BYTE outBuff[SEEKABLE_BUFF_SIZE]; /* so we can efficiently decompress the
160 starts of chunks before we get to the
162 ZSTD_inBuffer in; /* maintain continuity across ZSTD_seekable_decompress operations */
163 buffWrapper_t buffWrapper; /* for `src.opaque` in in-memory mode */
165 XXH64_state_t xxhState;
168 ZSTD_seekable* ZSTD_seekable_create(void)
170 ZSTD_seekable* zs = malloc(sizeof(ZSTD_seekable));
172 if (zs == NULL) return NULL;
174 /* also initializes stage to zsds_init */
175 memset(zs, 0, sizeof(*zs));
177 zs->dstream = ZSTD_createDStream();
178 if (zs->dstream == NULL) {
186 size_t ZSTD_seekable_free(ZSTD_seekable* zs)
188 if (zs == NULL) return 0; /* support free on null */
189 ZSTD_freeDStream(zs->dstream);
190 free(zs->seekTable.entries);
196 /** ZSTD_seekable_offsetToFrameIndex() :
197 * Performs a binary search to find the last frame with a decompressed offset
199 * @return : the frame's index */
200 U32 ZSTD_seekable_offsetToFrameIndex(ZSTD_seekable* const zs, U64 pos)
203 U32 hi = zs->seekTable.tableLen;
205 if (pos >= zs->seekTable.entries[zs->seekTable.tableLen].dOffset) {
206 return zs->seekTable.tableLen;
209 while (lo + 1 < hi) {
210 U32 const mid = lo + ((hi - lo) >> 1);
211 if (zs->seekTable.entries[mid].dOffset <= pos) {
220 U32 ZSTD_seekable_getNumFrames(ZSTD_seekable* const zs)
222 return zs->seekTable.tableLen;
225 U64 ZSTD_seekable_getFrameCompressedOffset(ZSTD_seekable* const zs, U32 frameIndex)
227 if (frameIndex >= zs->seekTable.tableLen) return ZSTD_SEEKABLE_FRAMEINDEX_TOOLARGE;
228 return zs->seekTable.entries[frameIndex].cOffset;
231 U64 ZSTD_seekable_getFrameDecompressedOffset(ZSTD_seekable* const zs, U32 frameIndex)
233 if (frameIndex >= zs->seekTable.tableLen) return ZSTD_SEEKABLE_FRAMEINDEX_TOOLARGE;
234 return zs->seekTable.entries[frameIndex].dOffset;
237 size_t ZSTD_seekable_getFrameCompressedSize(ZSTD_seekable* const zs, U32 frameIndex)
239 if (frameIndex >= zs->seekTable.tableLen) return ERROR(frameIndex_tooLarge);
240 return zs->seekTable.entries[frameIndex + 1].cOffset -
241 zs->seekTable.entries[frameIndex].cOffset;
244 size_t ZSTD_seekable_getFrameDecompressedSize(ZSTD_seekable* const zs, U32 frameIndex)
246 if (frameIndex > zs->seekTable.tableLen) return ERROR(frameIndex_tooLarge);
247 return zs->seekTable.entries[frameIndex + 1].dOffset -
248 zs->seekTable.entries[frameIndex].dOffset;
251 static size_t ZSTD_seekable_loadSeekTable(ZSTD_seekable* zs)
254 ZSTD_seekable_customFile src = zs->src;
255 /* read the footer, fixed size */
256 CHECK_IO(src.seek(src.opaque, -(int)ZSTD_seekTableFooterSize, SEEK_END));
257 CHECK_IO(src.read(src.opaque, zs->inBuff, ZSTD_seekTableFooterSize));
259 if (MEM_readLE32(zs->inBuff + 5) != ZSTD_SEEKABLE_MAGICNUMBER) {
260 return ERROR(prefix_unknown);
263 { BYTE const sfd = zs->inBuff[4];
264 checksumFlag = sfd >> 7;
266 /* check reserved bits */
267 if ((checksumFlag >> 2) & 0x1f) {
268 return ERROR(corruption_detected);
272 { U32 const numFrames = MEM_readLE32(zs->inBuff);
273 U32 const sizePerEntry = 8 + (checksumFlag?4:0);
274 U32 const tableSize = sizePerEntry * numFrames;
275 U32 const frameSize = tableSize + ZSTD_seekTableFooterSize + ZSTD_skippableHeaderSize;
277 U32 remaining = frameSize - ZSTD_seekTableFooterSize; /* don't need to re-read footer */
279 U32 const toRead = MIN(remaining, SEEKABLE_BUFF_SIZE);
281 CHECK_IO(src.seek(src.opaque, -(S64)frameSize, SEEK_END));
282 CHECK_IO(src.read(src.opaque, zs->inBuff, toRead));
287 if (MEM_readLE32(zs->inBuff) != (ZSTD_MAGIC_SKIPPABLE_START | 0xE)) {
288 return ERROR(prefix_unknown);
290 if (MEM_readLE32(zs->inBuff+4) + ZSTD_skippableHeaderSize != frameSize) {
291 return ERROR(prefix_unknown);
294 { /* Allocate an extra entry at the end so that we can do size
295 * computations on the last element without special case */
296 seekEntry_t* entries = (seekEntry_t*)malloc(sizeof(seekEntry_t) * (numFrames + 1));
297 const BYTE* tableBase = zs->inBuff + ZSTD_skippableHeaderSize;
308 return ERROR(memory_allocation);
311 /* compute cumulative positions */
312 for (; idx < numFrames; idx++) {
313 if (pos + sizePerEntry > SEEKABLE_BUFF_SIZE) {
314 U32 const toRead = MIN(remaining, SEEKABLE_BUFF_SIZE);
315 U32 const offset = SEEKABLE_BUFF_SIZE - pos;
316 memmove(zs->inBuff, zs->inBuff + pos, offset); /* move any data we haven't read yet */
317 CHECK_IO(src.read(src.opaque, zs->inBuff+offset, toRead));
321 entries[idx].cOffset = cOffset;
322 entries[idx].dOffset = dOffset;
324 cOffset += MEM_readLE32(zs->inBuff + pos);
326 dOffset += MEM_readLE32(zs->inBuff + pos);
329 entries[idx].checksum = MEM_readLE32(zs->inBuff + pos);
333 entries[numFrames].cOffset = cOffset;
334 entries[numFrames].dOffset = dOffset;
336 zs->seekTable.entries = entries;
337 zs->seekTable.tableLen = numFrames;
338 zs->seekTable.checksumFlag = checksumFlag;
344 size_t ZSTD_seekable_initBuff(ZSTD_seekable* zs, const void* src, size_t srcSize)
346 zs->buffWrapper = (buffWrapper_t){src, srcSize, 0};
347 { ZSTD_seekable_customFile srcFile = {&zs->buffWrapper,
348 &ZSTD_seekable_read_buff,
349 &ZSTD_seekable_seek_buff};
350 return ZSTD_seekable_initAdvanced(zs, srcFile); }
353 size_t ZSTD_seekable_initFile(ZSTD_seekable* zs, FILE* src)
355 ZSTD_seekable_customFile srcFile = {src, &ZSTD_seekable_read_FILE,
356 &ZSTD_seekable_seek_FILE};
357 return ZSTD_seekable_initAdvanced(zs, srcFile);
360 size_t ZSTD_seekable_initAdvanced(ZSTD_seekable* zs, ZSTD_seekable_customFile src)
364 { const size_t seekTableInit = ZSTD_seekable_loadSeekTable(zs);
365 if (ZSTD_isError(seekTableInit)) return seekTableInit; }
367 zs->decompressedOffset = (U64)-1;
368 zs->curFrame = (U32)-1;
370 { const size_t dstreamInit = ZSTD_initDStream(zs->dstream);
371 if (ZSTD_isError(dstreamInit)) return dstreamInit; }
375 size_t ZSTD_seekable_decompress(ZSTD_seekable* zs, void* dst, size_t len, U64 offset)
377 U32 targetFrame = ZSTD_seekable_offsetToFrameIndex(zs, offset);
379 /* check if we can continue from a previous decompress job */
380 if (targetFrame != zs->curFrame || offset != zs->decompressedOffset) {
381 zs->decompressedOffset = zs->seekTable.entries[targetFrame].dOffset;
382 zs->curFrame = targetFrame;
384 CHECK_IO(zs->src.seek(zs->src.opaque,
385 zs->seekTable.entries[targetFrame].cOffset,
387 zs->in = (ZSTD_inBuffer){zs->inBuff, 0, 0};
388 XXH64_reset(&zs->xxhState, 0);
389 ZSTD_resetDStream(zs->dstream);
392 while (zs->decompressedOffset < offset + len) {
394 ZSTD_outBuffer outTmp;
396 if (zs->decompressedOffset < offset) {
397 /* dummy decompressions until we get to the target offset */
398 outTmp = (ZSTD_outBuffer){zs->outBuff, MIN(SEEKABLE_BUFF_SIZE, offset - zs->decompressedOffset), 0};
400 outTmp = (ZSTD_outBuffer){dst, len, zs->decompressedOffset - offset};
403 prevOutPos = outTmp.pos;
404 toRead = ZSTD_decompressStream(zs->dstream, &outTmp, &zs->in);
405 if (ZSTD_isError(toRead)) {
409 if (zs->seekTable.checksumFlag) {
410 XXH64_update(&zs->xxhState, (BYTE*)outTmp.dst + prevOutPos,
411 outTmp.pos - prevOutPos);
413 zs->decompressedOffset += outTmp.pos - prevOutPos;
418 /* verify checksum */
419 if (zs->seekTable.checksumFlag &&
420 (XXH64_digest(&zs->xxhState) & 0xFFFFFFFFU) !=
421 zs->seekTable.entries[targetFrame].checksum) {
422 return ERROR(corruption_detected);
425 if (zs->decompressedOffset < offset + len) {
426 /* go back to the start and force a reset of the stream */
427 targetFrame = ZSTD_seekable_offsetToFrameIndex(zs, zs->decompressedOffset);
432 /* read in more data if we're done with this buffer */
433 if (zs->in.pos == zs->in.size) {
434 toRead = MIN(toRead, SEEKABLE_BUFF_SIZE);
435 CHECK_IO(zs->src.read(zs->src.opaque, zs->inBuff, toRead));
436 zs->in.size = toRead;
440 } while (zs->decompressedOffset != offset + len);
445 size_t ZSTD_seekable_decompressFrame(ZSTD_seekable* zs, void* dst, size_t dstSize, U32 frameIndex)
447 if (frameIndex >= zs->seekTable.tableLen) {
448 return ERROR(frameIndex_tooLarge);
452 size_t const decompressedSize =
453 zs->seekTable.entries[frameIndex + 1].dOffset -
454 zs->seekTable.entries[frameIndex].dOffset;
455 if (dstSize < decompressedSize) {
456 return ERROR(dstSize_tooSmall);
458 return ZSTD_seekable_decompress(
459 zs, dst, decompressedSize,
460 zs->seekTable.entries[frameIndex].dOffset);