From 512a7ff8a0eee1ed7c654a5cefbd90e9e5c75223 Mon Sep 17 00:00:00 2001 From: kientzle Date: Thu, 5 Mar 2009 02:37:05 +0000 Subject: [PATCH] Merge r551,r561 from libarchive.googlecode.com: Update gzip read filter to fully take advantage of the new peek/consume I/O support. In particular, this now properly handles concatenated gzip streams. --- .../archive_read_support_compression_gzip.c | 447 +++++++++--------- lib/libarchive/test/test_compat_gzip.c | 2 +- 2 files changed, 218 insertions(+), 231 deletions(-) diff --git a/lib/libarchive/archive_read_support_compression_gzip.c b/lib/libarchive/archive_read_support_compression_gzip.c index 1c7473d6d1c..91ba270fcc8 100644 --- a/lib/libarchive/archive_read_support_compression_gzip.c +++ b/lib/libarchive/archive_read_support_compression_gzip.c @@ -51,14 +51,11 @@ __FBSDID("$FreeBSD$"); #ifdef HAVE_ZLIB_H struct private_data { z_stream stream; + char in_stream; unsigned char *out_block; size_t out_block_size; int64_t total_out; unsigned long crc; - int header_count; - char header_done; - char header_state; - char header_flags; char eof; /* True = found end of compressed data. */ }; @@ -72,10 +69,14 @@ static int gzip_filter_close(struct archive_read_filter *); * them. (In fact, we like detecting them because we can give better * error messages.) So the bid framework here gets compiled even * if zlib is unavailable. + * + * TODO: If zlib is unavailable, gzip_bidder_init() should + * use the compress_program framework to try to fire up an external + * gunzip program. */ -static int gzip_bidder_bid(struct archive_read_filter_bidder *, struct archive_read_filter *); +static int gzip_bidder_bid(struct archive_read_filter_bidder *, + struct archive_read_filter *); static int gzip_bidder_init(struct archive_read_filter *); -static int gzip_bidder_free(struct archive_read_filter_bidder *); int archive_read_support_compression_gzip(struct archive *_a) @@ -89,62 +90,116 @@ archive_read_support_compression_gzip(struct archive *_a) bidder->data = NULL; bidder->bid = gzip_bidder_bid; bidder->init = gzip_bidder_init; - bidder->free = gzip_bidder_free; - return (ARCHIVE_OK); -} - -static int -gzip_bidder_free(struct archive_read_filter_bidder *self){ - (void)self; /* UNUSED */ + bidder->free = NULL; /* No data, so no cleanup necessary. */ return (ARCHIVE_OK); } /* - * Test whether we can handle this data. + * Read and verify the header. * - * This logic returns zero if any part of the signature fails. It - * also tries to Do The Right Thing if a very short buffer prevents us - * from verifying as much as we would like. + * Returns zero if the header couldn't be validated, else returns + * number of bytes in header. If pbits is non-NULL, it receives a + * count of bits verified, suitable for use by bidder. */ static int -gzip_bidder_bid(struct archive_read_filter_bidder *self, - struct archive_read_filter *filter) +peek_at_header(struct archive_read_filter *filter, int *pbits) { - const unsigned char *buffer; - size_t avail; - int bits_checked; - - (void)self; /* UNUSED */ - - buffer = __archive_read_filter_ahead(filter, 8, &avail); - if (buffer == NULL) + const unsigned char *p; + ssize_t avail, len; + int bits = 0; + int header_flags; + + /* Start by looking at the first ten bytes of the header, which + * is all fixed layout. */ + len = 10; + p = __archive_read_filter_ahead(filter, len, &avail); + if (p == NULL || avail == 0) return (0); - - bits_checked = 0; - if (buffer[0] != 037) /* Verify first ID byte. */ + if (p[0] != 037) return (0); - bits_checked += 8; - - if (buffer[1] != 0213) /* Verify second ID byte. */ + bits += 8; + if (p[1] != 0213) return (0); - bits_checked += 8; - - if (buffer[2] != 8) /* Compression must be 'deflate'. */ + bits += 8; + if (p[2] != 8) /* We only support deflation. */ return (0); - bits_checked += 8; + bits += 8; + if ((p[3] & 0xE0)!= 0) /* No reserved flags set. */ + return (0); + bits += 3; + header_flags = p[3]; + /* Bytes 4-7 are mod time. */ + /* Byte 8 is deflate flags. */ + /* XXXX TODO: return deflate flags back to consume_header for use + in initializing the decompressor. */ + /* Byte 9 is OS. */ + + /* Optional extra data: 2 byte length plus variable body. */ + if (header_flags & 4) { + p = __archive_read_filter_ahead(filter, len + 2, &avail); + if (p == NULL) + return (0); + len += ((int)p[len + 1] << 8) | (int)p[len]; + } - if ((buffer[3] & 0xE0)!= 0) /* No reserved flags set. */ + /* Null-terminated optional filename. */ + if (header_flags & 8) { + do { + ++len; + if (avail < len) + p = __archive_read_filter_ahead(filter, + len, &avail); + if (p == NULL) + return (0); + } while (p[len - 1] != 0); + } + + /* Null-terminated optional comment. */ + if (header_flags & 16) { + do { + ++len; + if (avail < len) + p = __archive_read_filter_ahead(filter, + len, &avail); + if (p == NULL) + return (0); + } while (p[len - 1] != 0); + } + + /* Optional header CRC */ + if ((header_flags & 2)) { + p = __archive_read_filter_ahead(filter, len + 2, &avail); + if (p == NULL) + return (0); +#if 0 + int hcrc = ((int)p[len + 1] << 8) | (int)p[len]; + int crc = /* XXX TODO: Compute header CRC. */; + if (crc != hcrc) return (0); - bits_checked += 3; + bits += 16; +#endif + len += 2; + } + + if (pbits != NULL) + *pbits = bits; + return (len); +} + +/* + * Bidder just verifies the header and returns the number of verified bits. + */ +static int +gzip_bidder_bid(struct archive_read_filter_bidder *self, + struct archive_read_filter *filter) +{ + int bits_checked; - /* - * TODO: Verify more; in particular, gzip has an optional - * header CRC, which would give us 16 more verified bits. We - * may also be able to verify certain constraints on other - * fields. - */ + (void)self; /* UNUSED */ - return (bits_checked); + if (peek_at_header(filter, &bits_checked)) + return (bits_checked); + return (0); } @@ -185,8 +240,7 @@ gzip_bidder_init(struct archive_read_filter *self) free(out_block); free(state); archive_set_error(&self->archive->archive, ENOMEM, - "Can't allocate data for %s decompression", - self->name); + "Can't allocate data for gzip decompression"); return (ARCHIVE_FATAL); } @@ -197,148 +251,99 @@ gzip_bidder_init(struct archive_read_filter *self) self->skip = NULL; /* not supported */ self->close = gzip_filter_close; - state->crc = crc32(0L, NULL, 0); - state->header_done = 0; /* We've not yet begun to parse header... */ + state->in_stream = 0; /* We're not actually within a stream yet. */ return (ARCHIVE_OK); } static int -header(struct archive_read_filter *self) +consume_header(struct archive_read_filter *self) { struct private_data *state; - int ret, b; + ssize_t avail; + size_t len; + int ret; state = (struct private_data *)self->data; - /* - * If still parsing the header, interpret the - * next byte. - */ - b = *(state->stream.next_in++); - state->stream.avail_in--; - - /* - * Simple state machine to parse the GZip header one byte at - * a time. If you see a way to make this easier to understand, - * please let me know. ;-) - */ - switch (state->header_state) { - case 0: /* First byte of signature. */ - /* We only return EOF for a failure here. */ - if (b != 037) - return (ARCHIVE_EOF); - state->header_state = 1; + /* If this is a real header, consume it. */ + len = peek_at_header(self->upstream, NULL); + if (len == 0) + return (ARCHIVE_EOF); + __archive_read_filter_consume(self->upstream, len); + + /* Initialize CRC accumulator. */ + state->crc = crc32(0L, NULL, 0); + + /* Initialize compression library. */ + state->stream.next_in = (unsigned char *)(uintptr_t) + __archive_read_filter_ahead(self->upstream, 1, &avail); + state->stream.avail_in = avail; + ret = inflateInit2(&(state->stream), + -15 /* Don't check for zlib header */); + + /* Decipher the error code. */ + switch (ret) { + case Z_OK: + state->in_stream = 1; + return (ARCHIVE_OK); + case Z_STREAM_ERROR: + archive_set_error(&self->archive->archive, + ARCHIVE_ERRNO_MISC, + "Internal error initializing compression library: " + "invalid setup parameter"); break; - case 1: /* Second byte of signature. */ - case 2: /* Compression type must be 8 == deflate. */ - if (b != (0xff & "\037\213\010"[(int)state->header_state])) { - archive_set_error(&self->archive->archive, - ARCHIVE_ERRNO_MISC, - "Invalid GZip header (saw %d at offset %d)", - b, state->header_state); - return (ARCHIVE_FATAL); - } - ++state->header_state; + case Z_MEM_ERROR: + archive_set_error(&self->archive->archive, ENOMEM, + "Internal error initializing compression library: " + "out of memory"); break; - case 3: /* GZip flags. */ - state->header_flags = b; - state->header_state = 4; + case Z_VERSION_ERROR: + archive_set_error(&self->archive->archive, + ARCHIVE_ERRNO_MISC, + "Internal error initializing compression library: " + "invalid library version"); break; - case 4: case 5: case 6: case 7: /* Mod time. */ - case 8: /* Deflate flags. */ - case 9: /* OS. */ - ++state->header_state; + default: + archive_set_error(&self->archive->archive, + ARCHIVE_ERRNO_MISC, + "Internal error initializing compression library: " + " Zlib error %d", ret); break; - case 10: /* Optional Extra: First byte of Length. */ - if ((state->header_flags & 4)) { - state->header_count = 255 & (int)b; - state->header_state = 11; - break; - } - /* Fall through if no Optional Extra field. */ - case 11: /* Optional Extra: Second byte of Length. */ - if ((state->header_flags & 4)) { - state->header_count - = (0xff00 & ((int)b << 8)) | state->header_count; - state->header_state = 12; - break; - } - /* Fall through if no Optional Extra field. */ - case 12: /* Optional Extra Field: counted length. */ - if ((state->header_flags & 4)) { - --state->header_count; - if (state->header_count == 0) state->header_state = 13; - else state->header_state = 12; - break; - } - /* Fall through if no Optional Extra field. */ - case 13: /* Optional Original Filename. */ - if ((state->header_flags & 8)) { - if (b == 0) state->header_state = 14; - else state->header_state = 13; - break; - } - /* Fall through if no Optional Original Filename. */ - case 14: /* Optional Comment. */ - if ((state->header_flags & 16)) { - if (b == 0) state->header_state = 15; - else state->header_state = 14; - break; - } - /* Fall through if no Optional Comment. */ - case 15: /* Optional Header CRC: First byte. */ - if ((state->header_flags & 2)) { - state->header_state = 16; - break; - } - /* Fall through if no Optional Header CRC. */ - case 16: /* Optional Header CRC: Second byte. */ - if ((state->header_flags & 2)) { - state->header_state = 17; - break; - } - /* Fall through if no Optional Header CRC. */ - case 17: /* First byte of compressed data. */ - state->header_done = 1; /* done with header */ - state->stream.avail_in++; /* Discard first byte. */ - state->stream.next_in--; + } + return (ARCHIVE_FATAL); +} - /* Initialize compression library. */ - ret = inflateInit2(&(state->stream), - -15 /* Don't check for zlib header */); +static int +consume_trailer(struct archive_read_filter *self) +{ + struct private_data *state; + const unsigned char *p; + ssize_t avail; - /* Decipher the error code. */ - switch (ret) { - case Z_OK: - return (ARCHIVE_OK); - case Z_STREAM_ERROR: - archive_set_error(&self->archive->archive, - ARCHIVE_ERRNO_MISC, - "Internal error initializing compression library: " - "invalid setup parameter"); - break; - case Z_MEM_ERROR: - archive_set_error(&self->archive->archive, ENOMEM, - "Internal error initializing compression library: " - "out of memory"); - break; - case Z_VERSION_ERROR: - archive_set_error(&self->archive->archive, - ARCHIVE_ERRNO_MISC, - "Internal error initializing compression library: " - "invalid library version"); - break; - default: - archive_set_error(&self->archive->archive, - ARCHIVE_ERRNO_MISC, - "Internal error initializing compression library: " - " Zlib error %d", ret); - break; - } + state = (struct private_data *)self->data; + + state->in_stream = 0; + switch (inflateEnd(&(state->stream))) { + case Z_OK: + break; + default: + archive_set_error(&self->archive->archive, + ARCHIVE_ERRNO_MISC, + "Failed to clean up gzip decompressor"); return (ARCHIVE_FATAL); } + /* GZip trailer is a fixed 8 byte structure. */ + p = __archive_read_filter_ahead(self->upstream, 8, &avail); + if (p == NULL || avail == 0) + return (ARCHIVE_FATAL); + + /* XXX TODO: Verify the length and CRC. */ + + /* We've verified the trailer, so consume it now. */ + __archive_read_filter_consume(self->upstream, 8); + return (ARCHIVE_OK); } @@ -346,12 +351,11 @@ static ssize_t gzip_filter_read(struct archive_read_filter *self, const void **p) { struct private_data *state; - size_t read_avail, decompressed; - const void *read_buf; + size_t decompressed; + ssize_t avail_in; int ret; state = (struct private_data *)self->data; - read_avail = 0; /* Empty our output buffer. */ state->stream.next_out = state->out_block; @@ -359,62 +363,47 @@ gzip_filter_read(struct archive_read_filter *self, const void **p) /* Try to fill the output buffer. */ while (state->stream.avail_out > 0 && !state->eof) { - /* If the last upstream block is done, get another one. */ - if (state->stream.avail_in == 0) { - read_buf = __archive_read_filter_ahead(self->upstream, - 1, &ret); - if (read_buf == NULL) - return (ARCHIVE_FATAL); - /* stream.next_in is really const, but zlib - * doesn't declare it so. */ - state->stream.next_in - = (unsigned char *)(uintptr_t)read_buf; - state->stream.avail_in = ret; - /* There is no more data, return whatever we have. */ - if (ret == 0) { + /* If we're not in a stream, read a header + * and initialize the decompression library. */ + if (!state->in_stream) { + ret = consume_header(self); + if (ret == ARCHIVE_EOF) { state->eof = 1; break; } - __archive_read_filter_consume(self->upstream, ret); - } - - /* If we're still parsing header bytes, walk through those. */ - if (!state->header_done) { - ret = header(self); if (ret < ARCHIVE_OK) return (ret); - if (ret == ARCHIVE_EOF) - state->eof = 1; - } else { - /* Decompress as much as we can in one pass. */ - /* XXX Skip trailer XXX */ - ret = inflate(&(state->stream), 0); - switch (ret) { - case Z_STREAM_END: /* Found end of stream. */ - switch (inflateEnd(&(state->stream))) { - case Z_OK: - break; - default: - archive_set_error(&self->archive->archive, - ARCHIVE_ERRNO_MISC, - "Failed to clean up gzip decompressor"); - return (ARCHIVE_FATAL); - } - /* zlib has been torn down */ - state->header_done = 0; - state->eof = 1; - /* FALL THROUGH */ - case Z_OK: /* Decompressor made some progress. */ - /* If we filled our buffer, update stats and return. */ - break; - default: - /* Return an error. */ - archive_set_error(&self->archive->archive, - ARCHIVE_ERRNO_MISC, - "%s decompression failed", - self->archive->archive.compression_name); - return (ARCHIVE_FATAL); - } + } + + /* Peek at the next available data. */ + /* ZLib treats stream.next_in as const but doesn't declare + * it so, hence this ugly cast. */ + state->stream.next_in = (unsigned char *)(uintptr_t) + __archive_read_filter_ahead(self->upstream, 1, &avail_in); + if (state->stream.next_in == NULL) + return (ARCHIVE_FATAL); + state->stream.avail_in = avail_in; + + /* Decompress and consume some of that data. */ + ret = inflate(&(state->stream), 0); + switch (ret) { + case Z_OK: /* Decompressor made some progress. */ + __archive_read_filter_consume(self->upstream, + avail_in - state->stream.avail_in); + break; + case Z_STREAM_END: /* Found end of stream. */ + __archive_read_filter_consume(self->upstream, + avail_in - state->stream.avail_in); + /* Consume the stream trailer; release the + * decompression library. */ + ret = consume_trailer(self); + break; + default: + /* Return an error. */ + archive_set_error(&self->archive->archive, + ARCHIVE_ERRNO_MISC, + "gzip decompression failed"); + return (ARCHIVE_FATAL); } } @@ -426,7 +415,6 @@ gzip_filter_read(struct archive_read_filter *self, const void **p) else *p = state->out_block; return (decompressed); - } /* @@ -441,15 +429,14 @@ gzip_filter_close(struct archive_read_filter *self) state = (struct private_data *)self->data; ret = ARCHIVE_OK; - if (state->header_done) { + if (state->in_stream) { switch (inflateEnd(&(state->stream))) { case Z_OK: break; default: archive_set_error(&(self->archive->archive), - ARCHIVE_ERRNO_MISC, - "Failed to clean up %s compressor", - self->archive->archive.compression_name); + ARCHIVE_ERRNO_MISC, + "Failed to clean up gzip compressor"); ret = ARCHIVE_FATAL; } } diff --git a/lib/libarchive/test/test_compat_gzip.c b/lib/libarchive/test/test_compat_gzip.c index 0140a448f06..b1d9be3ea23 100644 --- a/lib/libarchive/test/test_compat_gzip.c +++ b/lib/libarchive/test/test_compat_gzip.c @@ -86,7 +86,7 @@ DEFINE_TEST(test_compat_gzip) /* This sample has been 'split', each piece compressed separately, * then concatenated. Gunzip will emit the concatenated result. */ /* Not supported in libarchive 2.6 and earlier */ - /* verify("test_compat_gzip_1.tgz"); */ + verify("test_compat_gzip_1.tgz"); /* This sample has been compressed as a single stream, but then * some unrelated garbage text has been appended to the end. */ verify("test_compat_gzip_2.tgz"); -- 2.45.2