1 /* $NetBSD: file.c,v 1.5 2011/02/16 18:35:39 joerg Exp $ */
3 /* $OpenBSD: file.c,v 1.11 2010/07/02 20:48:48 nicm Exp $ */
6 * Copyright (c) 1999 James Howard and Dag-Erling Coïdan Smørgrav
7 * Copyright (C) 2008-2010 Gabor Kovesdan <gabor@FreeBSD.org>
8 * Copyright (C) 2010 Dimitry Andric <dimitry@andric.com>
11 * Redistribution and use in source and binary forms, with or without
12 * modification, are permitted provided that the following conditions
14 * 1. Redistributions of source code must retain the above copyright
15 * notice, this list of conditions and the following disclaimer.
16 * 2. Redistributions in binary form must reproduce the above copyright
17 * notice, this list of conditions and the following disclaimer in the
18 * documentation and/or other materials provided with the distribution.
20 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
24 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
33 #include <sys/cdefs.h>
34 __FBSDID("$FreeBSD$");
36 #include <sys/param.h>
39 #include <sys/types.h>
62 #define MAXBUFSIZ (32 * 1024)
65 static gzFile gzbufdesc;
67 static lzma_stream lstrm = LZMA_STREAM_INIT;
68 static lzma_action laction;
69 static uint8_t lin_buf[MAXBUFSIZ];
72 static BZFILE* bzbufdesc;
75 static unsigned char *buffer;
76 static unsigned char *bufpos;
80 static unsigned char *lnbuf;
81 static size_t lnbuflen;
84 grep_refill(struct file *f)
88 if (filebehave == FILE_MMAP)
94 if (filebehave == FILE_GZIP) {
95 nr = gzread(gzbufdesc, buffer, MAXBUFSIZ);
97 } else if (filebehave == FILE_BZIP && bzbufdesc != NULL) {
100 nr = BZ2_bzRead(&bzerr, bzbufdesc, buffer, MAXBUFSIZ);
104 /* No problem, nr will be okay */
106 case BZ_DATA_ERROR_MAGIC:
108 * As opposed to gzread(), which simply returns the
109 * plain file data, if it is not in the correct
110 * compressed format, BZ2_bzRead() instead aborts.
112 * So, just restart at the beginning of the file again,
113 * and use plain reads from now on.
115 BZ2_bzReadClose(&bzerr, bzbufdesc);
117 if (lseek(f->fd, 0, SEEK_SET) == -1)
119 nr = read(f->fd, buffer, MAXBUFSIZ);
122 /* Make sure we exit with an error */
127 } else if ((filebehave == FILE_XZ) || (filebehave == FILE_LZMA)) {
129 lstrm.next_out = buffer;
132 if (lstrm.avail_in == 0) {
133 lstrm.next_in = lin_buf;
134 nr = read(f->fd, lin_buf, MAXBUFSIZ);
139 laction = LZMA_FINISH;
144 ret = lzma_code(&lstrm, laction);
146 if (ret != LZMA_OK && ret != LZMA_STREAM_END)
149 if (lstrm.avail_out == 0 || ret == LZMA_STREAM_END) {
150 bufrem = MAXBUFSIZ - lstrm.avail_out;
151 lstrm.next_out = buffer;
152 lstrm.avail_out = MAXBUFSIZ;
154 } while (bufrem == 0 && ret != LZMA_STREAM_END);
157 #endif /* WIHTOUT_LZMA */
159 nr = read(f->fd, buffer, MAXBUFSIZ);
169 grep_lnbufgrow(size_t newlen)
172 if (lnbuflen < newlen) {
173 lnbuf = grep_realloc(lnbuf, newlen);
181 grep_fgetln(struct file *f, size_t *lenp)
189 /* Fill the buffer, if necessary */
190 if (bufrem == 0 && grep_refill(f) != 0)
194 /* Return zero length to indicate EOF */
199 /* Look for a newline in the remaining part of the buffer */
200 if ((p = memchr(bufpos, '\n', bufrem)) != NULL) {
201 ++p; /* advance over newline */
210 /* We have to copy the current buffered data to the line buffer */
211 for (len = bufrem, off = 0; ; len += bufrem) {
212 /* Make sure there is room for more data */
213 if (grep_lnbufgrow(len + LNBUFBUMP))
215 memcpy(lnbuf + off, bufpos, len - off);
217 if (grep_refill(f) != 0)
220 /* EOF: return partial line */
222 if ((p = memchr(bufpos, '\n', bufrem)) == NULL &&
223 filebehave != FILE_MMAP)
226 /* mmap EOF: return partial line, consume buffer */
229 /* got it: finish up the line (like code above) */
234 if (grep_lnbufgrow(len))
236 memcpy(lnbuf + off, bufpos, diff);
250 * Opens a file for processing.
253 grep_open(const char *path)
257 f = grep_malloc(sizeof *f);
258 memset(f, 0, sizeof *f);
260 /* Processing stdin implies --line-buffered. */
262 f->fd = STDIN_FILENO;
263 } else if ((f->fd = open(path, O_RDONLY)) == -1)
266 if (filebehave == FILE_MMAP) {
269 if ((fstat(f->fd, &st) == -1) || (st.st_size > OFF_MAX) ||
270 (!S_ISREG(st.st_mode)))
271 filebehave = FILE_STDIO;
273 int flags = MAP_PRIVATE | MAP_NOCORE | MAP_NOSYNC;
274 #ifdef MAP_PREFAULT_READ
275 flags |= MAP_PREFAULT_READ;
278 buffer = mmap(NULL, fsiz, PROT_READ, flags,
280 if (buffer == MAP_FAILED)
281 filebehave = FILE_STDIO;
285 madvise(buffer, st.st_size, MADV_SEQUENTIAL);
290 if ((buffer == NULL) || (buffer == MAP_FAILED))
291 buffer = grep_malloc(MAXBUFSIZ);
293 if (filebehave == FILE_GZIP &&
294 (gzbufdesc = gzdopen(f->fd, "r")) == NULL)
297 #ifndef WITHOUT_BZIP2
298 if (filebehave == FILE_BZIP &&
299 (bzbufdesc = BZ2_bzdopen(f->fd, "r")) == NULL)
303 else if ((filebehave == FILE_XZ) || (filebehave == FILE_LZMA)) {
306 ret = (filebehave == FILE_XZ) ?
307 lzma_stream_decoder(&lstrm, UINT64_MAX,
309 lzma_alone_decoder(&lstrm, UINT64_MAX);
315 lstrm.avail_out = MAXBUFSIZ;
320 /* Fill read buffer, also catches errors early */
321 if (bufrem == 0 && grep_refill(f) != 0)
324 /* Check for binary stuff, if necessary */
325 if (binbehave != BINFILE_TEXT && memchr(bufpos, '\0', bufrem) != NULL)
341 grep_close(struct file *f)
346 /* Reset read buffer and line buffer */
347 if (filebehave == FILE_MMAP) {
348 munmap(buffer, fsiz);