1 /* $NetBSD: file.c,v 1.5 2011/02/16 18:35:39 joerg Exp $ */
3 /* $OpenBSD: file.c,v 1.11 2010/07/02 20:48:48 nicm Exp $ */
6 * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
8 * Copyright (c) 1999 James Howard and Dag-Erling Coïdan Smørgrav
9 * Copyright (C) 2008-2010 Gabor Kovesdan <gabor@FreeBSD.org>
10 * Copyright (C) 2010 Dimitry Andric <dimitry@andric.com>
11 * All rights reserved.
13 * Redistribution and use in source and binary forms, with or without
14 * modification, are permitted provided that the following conditions
16 * 1. Redistributions of source code must retain the above copyright
17 * notice, this list of conditions and the following disclaimer.
18 * 2. Redistributions in binary form must reproduce the above copyright
19 * notice, this list of conditions and the following disclaimer in the
20 * documentation and/or other materials provided with the distribution.
22 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
23 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
24 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
25 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
26 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
27 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
28 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
29 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
30 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
31 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
35 #include <sys/cdefs.h>
36 __FBSDID("$FreeBSD$");
38 #include <sys/param.h>
41 #include <sys/types.h>
64 #define MAXBUFSIZ (32 * 1024)
67 static gzFile gzbufdesc;
69 static lzma_stream lstrm = LZMA_STREAM_INIT;
70 static lzma_action laction;
71 static uint8_t lin_buf[MAXBUFSIZ];
74 static BZFILE* bzbufdesc;
77 static unsigned char *buffer;
78 static unsigned char *bufpos;
82 static unsigned char *lnbuf;
83 static size_t lnbuflen;
86 grep_refill(struct file *f)
93 if (filebehave == FILE_MMAP)
101 nr = gzread(gzbufdesc, buffer, MAXBUFSIZ);
103 #ifndef WITHOUT_BZIP2
105 if (bzbufdesc != NULL) {
108 nr = BZ2_bzRead(&bzerr, bzbufdesc, buffer, MAXBUFSIZ);
112 /* No problem, nr will be okay */
114 case BZ_DATA_ERROR_MAGIC:
116 * As opposed to gzread(), which simply returns the
117 * plain file data, if it is not in the correct
118 * compressed format, BZ2_bzRead() instead aborts.
120 * So, just restart at the beginning of the file again,
121 * and use plain reads from now on.
123 BZ2_bzReadClose(&bzerr, bzbufdesc);
125 if (lseek(f->fd, 0, SEEK_SET) == -1)
127 nr = read(f->fd, buffer, MAXBUFSIZ);
130 /* Make sure we exit with an error */
135 * Also an error case; we should never have a scenario
136 * where we have an open file but no bzip descriptor
137 * at this point. See: grep_open
145 lstrm.next_out = buffer;
148 if (lstrm.avail_in == 0) {
149 lstrm.next_in = lin_buf;
150 nr = read(f->fd, lin_buf, MAXBUFSIZ);
155 laction = LZMA_FINISH;
160 lzmaret = lzma_code(&lstrm, laction);
162 if (lzmaret != LZMA_OK && lzmaret != LZMA_STREAM_END)
165 if (lstrm.avail_out == 0 || lzmaret == LZMA_STREAM_END) {
166 bufrem = MAXBUFSIZ - lstrm.avail_out;
167 lstrm.next_out = buffer;
168 lstrm.avail_out = MAXBUFSIZ;
170 } while (bufrem == 0 && lzmaret != LZMA_STREAM_END);
173 #endif /* WITHOUT_LZMA */
175 nr = read(f->fd, buffer, MAXBUFSIZ);
185 grep_lnbufgrow(size_t newlen)
188 if (lnbuflen < newlen) {
189 lnbuf = grep_realloc(lnbuf, newlen);
197 grep_fgetln(struct file *f, size_t *lenp)
205 /* Fill the buffer, if necessary */
206 if (bufrem == 0 && grep_refill(f) != 0)
210 /* Return zero length to indicate EOF */
215 /* Look for a newline in the remaining part of the buffer */
216 if ((p = memchr(bufpos, fileeol, bufrem)) != NULL) {
217 ++p; /* advance over newline */
226 /* We have to copy the current buffered data to the line buffer */
227 for (len = bufrem, off = 0; ; len += bufrem) {
228 /* Make sure there is room for more data */
229 if (grep_lnbufgrow(len + LNBUFBUMP))
231 memcpy(lnbuf + off, bufpos, len - off);
232 /* With FILE_MMAP, this is EOF; there's no more to refill */
233 if (filebehave == FILE_MMAP) {
238 /* Fetch more to try and find EOL/EOF */
239 if (grep_refill(f) != 0)
242 /* EOF: return partial line */
244 if ((p = memchr(bufpos, fileeol, bufrem)) == NULL)
246 /* got it: finish up the line (like code above) */
250 if (grep_lnbufgrow(len))
252 memcpy(lnbuf + off, bufpos, diff);
266 * Opens a file for processing.
269 grep_open(const char *path)
276 f = grep_malloc(sizeof *f);
277 memset(f, 0, sizeof *f);
279 /* Processing stdin implies --line-buffered. */
281 f->fd = STDIN_FILENO;
282 } else if ((f->fd = open(path, O_RDONLY)) == -1)
285 if (filebehave == FILE_MMAP) {
288 if ((fstat(f->fd, &st) == -1) || (st.st_size > OFF_MAX) ||
289 (!S_ISREG(st.st_mode)))
290 filebehave = FILE_STDIO;
292 int flags = MAP_PRIVATE | MAP_NOCORE | MAP_NOSYNC;
293 #ifdef MAP_PREFAULT_READ
294 flags |= MAP_PREFAULT_READ;
297 buffer = mmap(NULL, fsiz, PROT_READ, flags,
299 if (buffer == MAP_FAILED)
300 filebehave = FILE_STDIO;
304 madvise(buffer, st.st_size, MADV_SEQUENTIAL);
309 if ((buffer == NULL) || (buffer == MAP_FAILED))
310 buffer = grep_malloc(MAXBUFSIZ);
312 switch (filebehave) {
314 if ((gzbufdesc = gzdopen(f->fd, "r")) == NULL)
317 #ifndef WITHOUT_BZIP2
319 if ((bzbufdesc = BZ2_bzdopen(f->fd, "r")) == NULL)
327 if (filebehave == FILE_XZ)
328 lzmaret = lzma_stream_decoder(&lstrm, UINT64_MAX,
331 lzmaret = lzma_alone_decoder(&lstrm, UINT64_MAX);
333 if (lzmaret != LZMA_OK)
337 lstrm.avail_out = MAXBUFSIZ;
343 /* Fill read buffer, also catches errors early */
344 if (bufrem == 0 && grep_refill(f) != 0)
347 /* Check for binary stuff, if necessary */
348 if (binbehave != BINFILE_TEXT && fileeol != '\0' &&
349 memchr(bufpos, '\0', bufrem) != NULL)
365 grep_close(struct file *f)
370 /* Reset read buffer and line buffer */
371 if (filebehave == FILE_MMAP) {
372 munmap(buffer, fsiz);