2 * Copyright (c) 2017-present, Facebook, Inc.
5 * This source code is licensed under both the BSD-style license (found in the
6 * LICENSE file in the root directory of this source tree) and the GPLv2 (found
7 * in the COPYING file in the root directory of this source tree).
8 * You may select, at your option, one of the above-listed licenses.
11 /* *********************************************************
12 * Turn on Large Files support (>4GB) for 32-bit Linux/Unix
13 ***********************************************************/
14 #if !defined(__64BIT__) || defined(__MINGW32__) /* No point defining Large file for 64 bit but MinGW-w64 requires it */
15 # if !defined(_FILE_OFFSET_BITS)
16 # define _FILE_OFFSET_BITS 64 /* turn off_t into a 64-bit type for ftello, fseeko */
18 # if !defined(_LARGEFILE_SOURCE) /* obsolete macro, replaced with _FILE_OFFSET_BITS */
19 # define _LARGEFILE_SOURCE 1 /* Large File Support extension (LFS) - fseeko, ftello */
21 # if defined(_AIX) || defined(__hpux)
22 # define _LARGE_FILES /* Large file support on 32-bits AIX and HP-UX */
26 /* ************************************************************
27 * Avoid fseek()'s 2GiB barrier with MSVC, macOS, *BSD, MinGW
28 ***************************************************************/
29 #if defined(_MSC_VER) && _MSC_VER >= 1400
30 # define LONG_SEEK _fseeki64
31 #elif !defined(__64BIT__) && (PLATFORM_POSIX_VERSION >= 200112L) /* No point defining Large file for 64 bit */
32 # define LONG_SEEK fseeko
33 #elif defined(__MINGW32__) && !defined(__STRICT_ANSI__) && !defined(__NO_MINGW_LFS) && defined(__MSVCRT__)
34 # define LONG_SEEK fseeko64
35 #elif defined(_WIN32) && !defined(__DJGPP__)
37 static int LONG_SEEK(FILE* file, __int64 offset, int origin) {
40 off.QuadPart = offset;
41 if (origin == SEEK_END)
43 else if (origin == SEEK_CUR)
44 method = FILE_CURRENT;
48 if (SetFilePointerEx((HANDLE) _get_osfhandle(_fileno(file)), off, NULL, method))
54 # define LONG_SEEK fseek
57 #include <stdlib.h> /* malloc, free */
58 #include <stdio.h> /* FILE* */
61 #define XXH_STATIC_LINKING_ONLY
62 #define XXH_NAMESPACE ZSTD_
65 #define ZSTD_STATIC_LINKING_ONLY
67 #include "zstd_errors.h"
69 #include "zstd_seekable.h"
72 #define ERROR(name) ((size_t)-ZSTD_error_##name)
74 #define CHECK_IO(f) { int const errcod = (f); if (errcod < 0) return ERROR(seekableIO); }
78 #define MIN(a, b) ((a) < (b) ? (a) : (b))
79 #define MAX(a, b) ((a) > (b) ? (a) : (b))
81 /* Special-case callbacks for FILE* and in-memory modes, so that we can treat
82 * them the same way as the advanced API */
83 static int ZSTD_seekable_read_FILE(void* opaque, void* buffer, size_t n)
85 size_t const result = fread(buffer, 1, n, (FILE*)opaque);
92 static int ZSTD_seekable_seek_FILE(void* opaque, long long offset, int origin)
94 int const ret = LONG_SEEK((FILE*)opaque, offset, origin);
96 return fflush((FILE*)opaque);
105 static int ZSTD_seekable_read_buff(void* opaque, void* buffer, size_t n)
107 buffWrapper_t* buff = (buffWrapper_t*) opaque;
108 if (buff->size + n > buff->pos) return -1;
109 memcpy(buffer, (const BYTE*)buff->ptr + buff->pos, n);
114 static int ZSTD_seekable_seek_buff(void* opaque, long long offset, int origin)
116 buffWrapper_t* const buff = (buffWrapper_t*) opaque;
117 unsigned long long newOffset;
123 newOffset = (unsigned long long)buff->pos + offset;
126 newOffset = (unsigned long long)buff->size - offset;
129 assert(0); /* not possible */
131 if (newOffset > buff->size) {
134 buff->pos = newOffset;
145 seekEntry_t* entries;
151 #define SEEKABLE_BUFF_SIZE ZSTD_BLOCKSIZE_MAX
153 struct ZSTD_seekable_s {
154 ZSTD_DStream* dstream;
155 seekTable_t seekTable;
156 ZSTD_seekable_customFile src;
158 U64 decompressedOffset;
161 BYTE inBuff[SEEKABLE_BUFF_SIZE]; /* need to do our own input buffering */
162 BYTE outBuff[SEEKABLE_BUFF_SIZE]; /* so we can efficiently decompress the
163 starts of chunks before we get to the
165 ZSTD_inBuffer in; /* maintain continuity across ZSTD_seekable_decompress operations */
166 buffWrapper_t buffWrapper; /* for `src.opaque` in in-memory mode */
168 XXH64_state_t xxhState;
171 ZSTD_seekable* ZSTD_seekable_create(void)
173 ZSTD_seekable* zs = malloc(sizeof(ZSTD_seekable));
175 if (zs == NULL) return NULL;
177 /* also initializes stage to zsds_init */
178 memset(zs, 0, sizeof(*zs));
180 zs->dstream = ZSTD_createDStream();
181 if (zs->dstream == NULL) {
189 size_t ZSTD_seekable_free(ZSTD_seekable* zs)
191 if (zs == NULL) return 0; /* support free on null */
192 ZSTD_freeDStream(zs->dstream);
193 free(zs->seekTable.entries);
199 /** ZSTD_seekable_offsetToFrameIndex() :
200 * Performs a binary search to find the last frame with a decompressed offset
202 * @return : the frame's index */
203 U32 ZSTD_seekable_offsetToFrameIndex(ZSTD_seekable* const zs, unsigned long long pos)
206 U32 hi = zs->seekTable.tableLen;
208 if (pos >= zs->seekTable.entries[zs->seekTable.tableLen].dOffset) {
209 return zs->seekTable.tableLen;
212 while (lo + 1 < hi) {
213 U32 const mid = lo + ((hi - lo) >> 1);
214 if (zs->seekTable.entries[mid].dOffset <= pos) {
223 U32 ZSTD_seekable_getNumFrames(ZSTD_seekable* const zs)
225 return zs->seekTable.tableLen;
228 unsigned long long ZSTD_seekable_getFrameCompressedOffset(ZSTD_seekable* const zs, U32 frameIndex)
230 if (frameIndex >= zs->seekTable.tableLen) return ZSTD_SEEKABLE_FRAMEINDEX_TOOLARGE;
231 return zs->seekTable.entries[frameIndex].cOffset;
234 unsigned long long ZSTD_seekable_getFrameDecompressedOffset(ZSTD_seekable* const zs, U32 frameIndex)
236 if (frameIndex >= zs->seekTable.tableLen) return ZSTD_SEEKABLE_FRAMEINDEX_TOOLARGE;
237 return zs->seekTable.entries[frameIndex].dOffset;
240 size_t ZSTD_seekable_getFrameCompressedSize(ZSTD_seekable* const zs, U32 frameIndex)
242 if (frameIndex >= zs->seekTable.tableLen) return ERROR(frameIndex_tooLarge);
243 return zs->seekTable.entries[frameIndex + 1].cOffset -
244 zs->seekTable.entries[frameIndex].cOffset;
247 size_t ZSTD_seekable_getFrameDecompressedSize(ZSTD_seekable* const zs, U32 frameIndex)
249 if (frameIndex > zs->seekTable.tableLen) return ERROR(frameIndex_tooLarge);
250 return zs->seekTable.entries[frameIndex + 1].dOffset -
251 zs->seekTable.entries[frameIndex].dOffset;
254 static size_t ZSTD_seekable_loadSeekTable(ZSTD_seekable* zs)
257 ZSTD_seekable_customFile src = zs->src;
258 /* read the footer, fixed size */
259 CHECK_IO(src.seek(src.opaque, -(int)ZSTD_seekTableFooterSize, SEEK_END));
260 CHECK_IO(src.read(src.opaque, zs->inBuff, ZSTD_seekTableFooterSize));
262 if (MEM_readLE32(zs->inBuff + 5) != ZSTD_SEEKABLE_MAGICNUMBER) {
263 return ERROR(prefix_unknown);
266 { BYTE const sfd = zs->inBuff[4];
267 checksumFlag = sfd >> 7;
269 /* check reserved bits */
270 if ((checksumFlag >> 2) & 0x1f) {
271 return ERROR(corruption_detected);
275 { U32 const numFrames = MEM_readLE32(zs->inBuff);
276 U32 const sizePerEntry = 8 + (checksumFlag?4:0);
277 U32 const tableSize = sizePerEntry * numFrames;
278 U32 const frameSize = tableSize + ZSTD_seekTableFooterSize + ZSTD_skippableHeaderSize;
280 U32 remaining = frameSize - ZSTD_seekTableFooterSize; /* don't need to re-read footer */
282 U32 const toRead = MIN(remaining, SEEKABLE_BUFF_SIZE);
284 CHECK_IO(src.seek(src.opaque, -(S64)frameSize, SEEK_END));
285 CHECK_IO(src.read(src.opaque, zs->inBuff, toRead));
290 if (MEM_readLE32(zs->inBuff) != (ZSTD_MAGIC_SKIPPABLE_START | 0xE)) {
291 return ERROR(prefix_unknown);
293 if (MEM_readLE32(zs->inBuff+4) + ZSTD_skippableHeaderSize != frameSize) {
294 return ERROR(prefix_unknown);
297 { /* Allocate an extra entry at the end so that we can do size
298 * computations on the last element without special case */
299 seekEntry_t* entries = (seekEntry_t*)malloc(sizeof(seekEntry_t) * (numFrames + 1));
310 return ERROR(memory_allocation);
313 /* compute cumulative positions */
314 for (; idx < numFrames; idx++) {
315 if (pos + sizePerEntry > SEEKABLE_BUFF_SIZE) {
316 U32 const offset = SEEKABLE_BUFF_SIZE - pos;
317 U32 const toRead = MIN(remaining, SEEKABLE_BUFF_SIZE - offset);
318 memmove(zs->inBuff, zs->inBuff + pos, offset); /* move any data we haven't read yet */
319 CHECK_IO(src.read(src.opaque, zs->inBuff+offset, toRead));
323 entries[idx].cOffset = cOffset;
324 entries[idx].dOffset = dOffset;
326 cOffset += MEM_readLE32(zs->inBuff + pos);
328 dOffset += MEM_readLE32(zs->inBuff + pos);
331 entries[idx].checksum = MEM_readLE32(zs->inBuff + pos);
335 entries[numFrames].cOffset = cOffset;
336 entries[numFrames].dOffset = dOffset;
338 zs->seekTable.entries = entries;
339 zs->seekTable.tableLen = numFrames;
340 zs->seekTable.checksumFlag = checksumFlag;
346 size_t ZSTD_seekable_initBuff(ZSTD_seekable* zs, const void* src, size_t srcSize)
348 zs->buffWrapper = (buffWrapper_t){src, srcSize, 0};
349 { ZSTD_seekable_customFile srcFile = {&zs->buffWrapper,
350 &ZSTD_seekable_read_buff,
351 &ZSTD_seekable_seek_buff};
352 return ZSTD_seekable_initAdvanced(zs, srcFile); }
355 size_t ZSTD_seekable_initFile(ZSTD_seekable* zs, FILE* src)
357 ZSTD_seekable_customFile srcFile = {src, &ZSTD_seekable_read_FILE,
358 &ZSTD_seekable_seek_FILE};
359 return ZSTD_seekable_initAdvanced(zs, srcFile);
362 size_t ZSTD_seekable_initAdvanced(ZSTD_seekable* zs, ZSTD_seekable_customFile src)
366 { const size_t seekTableInit = ZSTD_seekable_loadSeekTable(zs);
367 if (ZSTD_isError(seekTableInit)) return seekTableInit; }
369 zs->decompressedOffset = (U64)-1;
370 zs->curFrame = (U32)-1;
372 { const size_t dstreamInit = ZSTD_initDStream(zs->dstream);
373 if (ZSTD_isError(dstreamInit)) return dstreamInit; }
377 size_t ZSTD_seekable_decompress(ZSTD_seekable* zs, void* dst, size_t len, unsigned long long offset)
379 U32 targetFrame = ZSTD_seekable_offsetToFrameIndex(zs, offset);
381 /* check if we can continue from a previous decompress job */
382 if (targetFrame != zs->curFrame || offset != zs->decompressedOffset) {
383 zs->decompressedOffset = zs->seekTable.entries[targetFrame].dOffset;
384 zs->curFrame = targetFrame;
386 CHECK_IO(zs->src.seek(zs->src.opaque,
387 zs->seekTable.entries[targetFrame].cOffset,
389 zs->in = (ZSTD_inBuffer){zs->inBuff, 0, 0};
390 XXH64_reset(&zs->xxhState, 0);
391 ZSTD_resetDStream(zs->dstream);
394 while (zs->decompressedOffset < offset + len) {
396 ZSTD_outBuffer outTmp;
398 if (zs->decompressedOffset < offset) {
399 /* dummy decompressions until we get to the target offset */
400 outTmp = (ZSTD_outBuffer){zs->outBuff, MIN(SEEKABLE_BUFF_SIZE, offset - zs->decompressedOffset), 0};
402 outTmp = (ZSTD_outBuffer){dst, len, zs->decompressedOffset - offset};
405 prevOutPos = outTmp.pos;
406 toRead = ZSTD_decompressStream(zs->dstream, &outTmp, &zs->in);
407 if (ZSTD_isError(toRead)) {
411 if (zs->seekTable.checksumFlag) {
412 XXH64_update(&zs->xxhState, (BYTE*)outTmp.dst + prevOutPos,
413 outTmp.pos - prevOutPos);
415 zs->decompressedOffset += outTmp.pos - prevOutPos;
420 /* verify checksum */
421 if (zs->seekTable.checksumFlag &&
422 (XXH64_digest(&zs->xxhState) & 0xFFFFFFFFU) !=
423 zs->seekTable.entries[targetFrame].checksum) {
424 return ERROR(corruption_detected);
427 if (zs->decompressedOffset < offset + len) {
428 /* go back to the start and force a reset of the stream */
429 targetFrame = ZSTD_seekable_offsetToFrameIndex(zs, zs->decompressedOffset);
434 /* read in more data if we're done with this buffer */
435 if (zs->in.pos == zs->in.size) {
436 toRead = MIN(toRead, SEEKABLE_BUFF_SIZE);
437 CHECK_IO(zs->src.read(zs->src.opaque, zs->inBuff, toRead));
438 zs->in.size = toRead;
442 } while (zs->decompressedOffset != offset + len);
447 size_t ZSTD_seekable_decompressFrame(ZSTD_seekable* zs, void* dst, size_t dstSize, U32 frameIndex)
449 if (frameIndex >= zs->seekTable.tableLen) {
450 return ERROR(frameIndex_tooLarge);
454 size_t const decompressedSize =
455 zs->seekTable.entries[frameIndex + 1].dOffset -
456 zs->seekTable.entries[frameIndex].dOffset;
457 if (dstSize < decompressedSize) {
458 return ERROR(dstSize_tooSmall);
460 return ZSTD_seekable_decompress(
461 zs, dst, decompressedSize,
462 zs->seekTable.entries[frameIndex].dOffset);