1 /* zlib.c --- interface to the zlib compression library
2 Ian Lance Taylor <ian@cygnus.com>
4 This file is part of GNU CVS.
6 GNU CVS is free software; you can redistribute it and/or modify it
7 under the terms of the GNU General Public License as published by the
8 Free Software Foundation; either version 2, or (at your option) any
11 This program is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details. */
16 /* The routines in this file are the interface between the CVS
17 client/server support and the zlib compression library. */
23 #if defined (SERVER_SUPPORT) || defined (CLIENT_SUPPORT)
27 /* OS/2 doesn't have EIO. FIXME: this whole notion of turning
28 a different error into EIO strikes me as pretty dubious. */
33 /* The compression interface is built upon the buffer data structure.
34 We provide a buffer type which compresses or decompresses the data
35 which passes through it. An input buffer decompresses the data
36 read from an underlying buffer, and an output buffer compresses the
37 data before writing it to an underlying buffer. */
39 /* This structure is the closure field of the buffer. */
41 struct compress_buffer
43 /* The underlying buffer. */
45 /* The compression information. */
49 static void compress_error PROTO((int, int, z_stream *, const char *));
50 static int compress_buffer_input PROTO((void *, char *, int, int, int *));
51 static int compress_buffer_output PROTO((void *, const char *, int, int *));
52 static int compress_buffer_flush PROTO((void *));
53 static int compress_buffer_block PROTO((void *, int));
54 static int compress_buffer_shutdown_input PROTO((struct buffer *));
55 static int compress_buffer_shutdown_output PROTO((struct buffer *));
57 /* Report an error from one of the zlib functions. */
60 compress_error (status, zstatus, zstr, msg)
75 sprintf (buf, "error %d", zstatus);
80 zstatus == Z_ERRNO ? hold_errno : 0,
84 /* Create a compression buffer. */
87 compress_buffer_initialize (buf, input, level, memory)
91 void (*memory) PROTO((struct buffer *));
93 struct compress_buffer *n;
96 n = (struct compress_buffer *) xmalloc (sizeof *n);
97 memset (n, 0, sizeof *n);
102 zstatus = inflateInit (&n->zstr);
104 zstatus = deflateInit (&n->zstr, level);
106 compress_error (1, zstatus, &n->zstr, "compression initialization");
108 /* There may already be data buffered on BUF. For an output
109 buffer, this is OK, because these routines will just use the
110 buffer routines to append data to the (uncompressed) data
111 already on BUF. An input buffer expects to handle a single
112 buffer_data of buffered input to be uncompressed, so that is OK
113 provided there is only one buffer. At present that is all
114 there ever will be; if this changes, compress_buffer_input must
115 be modified to handle multiple input buffers. */
116 assert (! input || buf->data == NULL || buf->data->next == NULL);
118 return buf_initialize (input ? compress_buffer_input : NULL,
119 input ? NULL : compress_buffer_output,
120 input ? NULL : compress_buffer_flush,
121 compress_buffer_block,
123 ? compress_buffer_shutdown_input
124 : compress_buffer_shutdown_output),
129 /* Input data from a compression buffer. */
132 compress_buffer_input (closure, data, need, size, got)
139 struct compress_buffer *cb = (struct compress_buffer *) closure;
140 struct buffer_data *bd;
142 if (cb->buf->input == NULL)
145 /* We use a single buffer_data structure to buffer up data which
146 the z_stream structure won't use yet. We can safely store this
147 on cb->buf->data, because we never call the buffer routines on
148 cb->buf; we only call the buffer input routine, since that
149 gives us the semantics we want. As noted in
150 compress_buffer_initialize, the buffer_data structure may
151 already exist, and hold data which was already read and
152 buffered before the decompression began. */
156 bd = ((struct buffer_data *) xmalloc (sizeof (struct buffer_data)));
159 bd->text = (char *) xmalloc (BUFFER_DATA_SIZE);
160 if (bd->text == NULL)
170 cb->zstr.avail_out = size;
171 cb->zstr.next_out = (Bytef *) data;
175 int zstatus, sofar, status, nread;
177 /* First try to inflate any data we already have buffered up.
178 This is useful even if we don't have any buffered data,
179 because there may be data buffered inside the z_stream
182 cb->zstr.avail_in = bd->size;
183 cb->zstr.next_in = (Bytef *) bd->bufp;
187 zstatus = inflate (&cb->zstr, Z_NO_FLUSH);
188 if (zstatus == Z_STREAM_END)
190 if (zstatus != Z_OK && zstatus != Z_BUF_ERROR)
192 compress_error (0, zstatus, &cb->zstr, "inflate");
195 } while (cb->zstr.avail_in > 0
196 && cb->zstr.avail_out > 0);
198 bd->size = cb->zstr.avail_in;
199 bd->bufp = (char *) cb->zstr.next_in;
201 if (zstatus == Z_STREAM_END)
204 /* If we have obtained NEED bytes, then return, unless NEED is
205 zero and we haven't obtained anything at all. If NEED is
206 zero, we will keep reading from the underlying buffer until
207 we either can't read anything, or we have managed to
208 inflate at least one byte. */
209 sofar = size - cb->zstr.avail_out;
210 if (sofar > 0 && sofar >= need)
213 /* All our buffered data should have been processed at this
215 assert (bd->size == 0);
217 /* This will work well in the server, because this call will
218 do an unblocked read and fetch all the available data. In
219 the client, this will read a single byte from the stdio
220 stream, which will cause us to call inflate once per byte.
221 It would be more efficient if we could make a call which
222 would fetch all the available bytes, and at least one byte. */
224 status = (*cb->buf->input) (cb->buf->closure, bd->text,
226 BUFFER_DATA_SIZE, &nread);
230 /* If we didn't read anything, then presumably the buffer is
231 in nonblocking mode, and we should just get out now with
232 whatever we've inflated. */
243 *got = size - cb->zstr.avail_out;
248 /* Output data to a compression buffer. */
251 compress_buffer_output (closure, data, have, wrote)
257 struct compress_buffer *cb = (struct compress_buffer *) closure;
259 cb->zstr.avail_in = have;
260 cb->zstr.next_in = (unsigned char *) data;
262 while (cb->zstr.avail_in > 0)
264 char buffer[BUFFER_DATA_SIZE];
267 cb->zstr.avail_out = BUFFER_DATA_SIZE;
268 cb->zstr.next_out = (unsigned char *) buffer;
270 zstatus = deflate (&cb->zstr, Z_NO_FLUSH);
273 compress_error (0, zstatus, &cb->zstr, "deflate");
277 if (cb->zstr.avail_out != BUFFER_DATA_SIZE)
278 buf_output (cb->buf, buffer,
279 BUFFER_DATA_SIZE - cb->zstr.avail_out);
284 /* We will only be here because buf_send_output was called on the
285 compression buffer. That means that we should now call
286 buf_send_output on the underlying buffer. */
287 return buf_send_output (cb->buf);
290 /* Flush a compression buffer. */
293 compress_buffer_flush (closure)
296 struct compress_buffer *cb = (struct compress_buffer *) closure;
298 cb->zstr.avail_in = 0;
299 cb->zstr.next_in = NULL;
303 char buffer[BUFFER_DATA_SIZE];
306 cb->zstr.avail_out = BUFFER_DATA_SIZE;
307 cb->zstr.next_out = (unsigned char *) buffer;
309 zstatus = deflate (&cb->zstr, Z_SYNC_FLUSH);
311 /* The deflate function will return Z_BUF_ERROR if it can't do
312 anything, which in this case means that all data has been
314 if (zstatus == Z_BUF_ERROR)
319 compress_error (0, zstatus, &cb->zstr, "deflate flush");
323 if (cb->zstr.avail_out != BUFFER_DATA_SIZE)
324 buf_output (cb->buf, buffer,
325 BUFFER_DATA_SIZE - cb->zstr.avail_out);
327 /* If the deflate function did not fill the output buffer,
328 then all data has been flushed. */
329 if (cb->zstr.avail_out > 0)
333 /* Now flush the underlying buffer. Note that if the original
334 call to buf_flush passed 1 for the BLOCK argument, then the
335 buffer will already have been set into blocking mode, so we
336 should always pass 0 here. */
337 return buf_flush (cb->buf, 0);
340 /* The block routine for a compression buffer. */
343 compress_buffer_block (closure, block)
347 struct compress_buffer *cb = (struct compress_buffer *) closure;
350 return set_block (cb->buf);
352 return set_nonblock (cb->buf);
355 /* Shut down an input buffer. */
358 compress_buffer_shutdown_input (buf)
361 struct compress_buffer *cb = (struct compress_buffer *) buf->closure;
364 /* Don't make any attempt to pick up trailing data since we are shutting
365 * down. If the client doesn't know we are shutting down, we might not
366 * see the EOF we are expecting.
369 zstatus = inflateEnd (&cb->zstr);
372 compress_error (0, zstatus, &cb->zstr, "inflateEnd");
376 return buf_shutdown (cb->buf);
379 /* Shut down an output buffer. */
382 compress_buffer_shutdown_output (buf)
385 struct compress_buffer *cb = (struct compress_buffer *) buf->closure;
390 char buffer[BUFFER_DATA_SIZE];
392 cb->zstr.avail_out = BUFFER_DATA_SIZE;
393 cb->zstr.next_out = (unsigned char *) buffer;
395 zstatus = deflate (&cb->zstr, Z_FINISH);
396 if (zstatus != Z_OK && zstatus != Z_STREAM_END)
398 compress_error (0, zstatus, &cb->zstr, "deflate finish");
402 if (cb->zstr.avail_out != BUFFER_DATA_SIZE)
403 buf_output (cb->buf, buffer,
404 BUFFER_DATA_SIZE - cb->zstr.avail_out);
405 } while (zstatus != Z_STREAM_END);
407 zstatus = deflateEnd (&cb->zstr);
410 compress_error (0, zstatus, &cb->zstr, "deflateEnd");
414 status = buf_flush (cb->buf, 1);
418 return buf_shutdown (cb->buf);
423 /* Here is our librarified gzip implementation. It is very minimal
424 but attempts to be RFC1952 compliant. */
426 /* GZIP ID byte values */
430 /* Compression methods */
431 #define GZIP_CDEFLATE 8
436 #define GZIP_FEXTRA 4
438 #define GZIP_FCOMMENT 16
440 /* BUF should contain SIZE bytes of gzipped data (RFC1952/RFC1951).
441 We are to uncompress the data and write the result to the file
442 descriptor FD. If something goes wrong, give a nonfatal error message
443 mentioning FULLNAME as the name of the file for FD. Return 1 if
444 it is an error we can't recover from. */
447 gunzip_and_write (fd, fullname, buf, size)
456 unsigned char outbuf[32768];
461 error (0, 0, "gzipped data too small - lacks complete header");
464 if (buf[0] != GZIP_ID1 || buf[1] != GZIP_ID2)
466 error (0, 0, "gzipped data does not start with gzip identification");
469 if (buf[2] != GZIP_CDEFLATE)
471 error (0, 0, "only the deflate compression method is supported");
475 /* Skip over the fixed header, and then skip any of the variable-length
476 fields. As we skip each field, we keep pos <= size. The checks
477 on positions and lengths are really checks for malformed or
478 incomplete gzip data. */
480 if (buf[3] & GZIP_FEXTRA)
484 error (0, 0, "%s lacks proper gzip XLEN field", fullname);
487 pos += buf[pos] + (buf[pos + 1] << 8) + 2;
490 error (0, 0, "%s lacks proper gzip \"extra field\"", fullname);
495 if (buf[3] & GZIP_FNAME)
497 unsigned char *p = memchr(buf + pos, '\0', size - pos);
500 error (0, 0, "%s has bad gzip filename field", fullname);
505 if (buf[3] & GZIP_FCOMMENT)
507 unsigned char *p = memchr(buf + pos, '\0', size - pos);
510 error (0, 0, "%s has bad gzip comment field", fullname);
515 if (buf[3] & GZIP_FHCRC)
520 error (0, 0, "%s has bad gzip CRC16 field", fullname);
525 /* There could be no data to decompress - check and short circuit. */
528 error (0, 0, "gzip data incomplete for %s (no data)", fullname);
532 memset (&zstr, 0, sizeof zstr);
533 /* Passing a negative argument tells zlib not to look for a zlib
534 (RFC1950) header. This is an undocumented feature; I suppose if
535 we wanted to be anal we could synthesize a header instead,
537 zstatus = inflateInit2 (&zstr, -15);
540 compress_error (1, zstatus, &zstr, fullname);
542 /* I don't see why we should have to include the 8 byte trailer in
543 avail_in. But I see that zlib/gzio.c does, and it seemed to fix
544 a fairly rare bug in which we'd get a Z_BUF_ERROR for no obvious
546 zstr.avail_in = size - pos;
547 zstr.next_in = buf + pos;
549 crc = crc32 (0, NULL, 0);
553 zstr.avail_out = sizeof (outbuf);
554 zstr.next_out = outbuf;
555 zstatus = inflate (&zstr, Z_NO_FLUSH);
556 if (zstatus != Z_STREAM_END && zstatus != Z_OK)
558 compress_error (0, zstatus, &zstr, fullname);
561 if (write (fd, outbuf, sizeof (outbuf) - zstr.avail_out) < 0)
563 error (0, errno, "writing decompressed file %s", fullname);
566 crc = crc32 (crc, outbuf, sizeof (outbuf) - zstr.avail_out);
567 } while (zstatus != Z_STREAM_END);
568 zstatus = inflateEnd (&zstr);
570 compress_error (0, zstatus, &zstr, fullname);
572 /* Check that there is still 8 trailer bytes remaining (CRC32
573 and ISIZE). Check total decomp. data, plus header len (pos)
574 against input buffer total size. */
575 pos += zstr.total_in;
578 error (0, 0, "gzip data incomplete for %s (no trailer)", fullname);
582 if (crc != ((unsigned long)buf[pos]
583 + ((unsigned long)buf[pos + 1] << 8)
584 + ((unsigned long)buf[pos + 2] << 16)
585 + ((unsigned long)buf[pos + 3] << 24)))
587 error (0, 0, "CRC error uncompressing %s", fullname);
591 if (zstr.total_out != ((unsigned long)buf[pos + 4]
592 + ((unsigned long)buf[pos + 5] << 8)
593 + ((unsigned long)buf[pos + 6] << 16)
594 + ((unsigned long)buf[pos + 7] << 24)))
596 error (0, 0, "invalid length uncompressing %s", fullname);
603 /* Read all of FD and put the gzipped data (RFC1952/RFC1951) into *BUF,
604 replacing previous contents of *BUF. *BUF is xmalloc'd and *SIZE is
605 its allocated size. Put the actual number of bytes of data in
606 *LEN. If something goes wrong, give a nonfatal error mentioning
607 FULLNAME as the name of the file for FD, and return 1 if we can't
608 recover from it). LEVEL is the compression level (1-9). */
611 read_and_gzip (fd, fullname, buf, size, len, level)
613 const char *fullname;
621 unsigned char inbuf[8192];
627 unsigned char *newbuf;
630 newbuf = xrealloc (*buf, *size);
633 error (0, 0, "out of memory");
638 (*buf)[0] = GZIP_ID1;
639 (*buf)[1] = GZIP_ID2;
640 (*buf)[2] = GZIP_CDEFLATE;
642 (*buf)[4] = (*buf)[5] = (*buf)[6] = (*buf)[7] = 0;
643 /* Could set this based on level, but why bother? */
647 memset (&zstr, 0, sizeof zstr);
648 zstatus = deflateInit2 (&zstr, level, Z_DEFLATED, -15, 8,
650 crc = crc32 (0, NULL, 0);
653 compress_error (0, zstatus, &zstr, fullname);
657 /* Adjust for 10-byte output header (filled in above) */
659 zstr.avail_out = *size - 10;
660 zstr.next_out = *buf + 10;
666 nread = read (fd, inbuf, sizeof inbuf);
669 error (0, errno, "cannot read %s", fullname);
675 crc = crc32 (crc, inbuf, nread);
676 zstr.next_in = inbuf;
677 zstr.avail_in = nread;
681 /* I don't see this documented anywhere, but deflate seems
682 to tend to dump core sometimes if we pass it Z_FINISH and
683 a small (e.g. 2147 byte) avail_out. So we insist on at
684 least 4096 bytes (that is what zlib/gzio.c uses). */
686 if (zstr.avail_out < 4096)
688 unsigned char *newbuf;
690 assert(zstr.avail_out + zstr.total_out == *size);
691 assert(zstr.next_out == *buf + zstr.total_out);
693 newbuf = xrealloc (*buf, *size);
696 error (0, 0, "out of memory");
700 zstr.next_out = *buf + zstr.total_out;
701 zstr.avail_out = *size - zstr.total_out;
702 assert(zstr.avail_out + zstr.total_out == *size);
703 assert(zstr.next_out == *buf + zstr.total_out);
706 zstatus = deflate (&zstr, finish ? Z_FINISH : 0);
707 if (zstatus == Z_STREAM_END)
709 else if (zstatus != Z_OK)
710 compress_error (0, zstatus, &zstr, fullname);
711 } while (zstr.avail_out == 0);
714 /* Need to add the CRC information (8 bytes)
715 to the end of the gzip'd output.
716 Ensure there is enough space in the output buffer
718 if (zstr.avail_out < 8)
720 unsigned char *newbuf;
722 assert(zstr.avail_out + zstr.total_out == *size);
723 assert(zstr.next_out == *buf + zstr.total_out);
724 *size += 8 - zstr.avail_out;
725 newbuf = realloc (*buf, *size);
728 error (0, 0, "out of memory");
732 zstr.next_out = *buf + zstr.total_out;
733 zstr.avail_out = *size - zstr.total_out;
734 assert(zstr.avail_out + zstr.total_out == *size);
735 assert(zstr.next_out == *buf + zstr.total_out);
737 *zstr.next_out++ = (unsigned char)(crc & 0xff);
738 *zstr.next_out++ = (unsigned char)((crc >> 8) & 0xff);
739 *zstr.next_out++ = (unsigned char)((crc >> 16) & 0xff);
740 *zstr.next_out++ = (unsigned char)((crc >> 24) & 0xff);
742 *zstr.next_out++ = (unsigned char)(zstr.total_in & 0xff);
743 *zstr.next_out++ = (unsigned char)((zstr.total_in >> 8) & 0xff);
744 *zstr.next_out++ = (unsigned char)((zstr.total_in >> 16) & 0xff);
745 *zstr.next_out++ = (unsigned char)((zstr.total_in >> 24) & 0xff);
749 assert(zstr.avail_out + zstr.total_out == *size);
750 assert(zstr.next_out == *buf + zstr.total_out);
752 *len = zstr.total_out;
754 zstatus = deflateEnd (&zstr);
756 compress_error (0, zstatus, &zstr, fullname);
760 #endif /* defined (SERVER_SUPPORT) || defined (CLIENT_SUPPORT) */