2 * Copyright (c) 2019 Christos Zoulas
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
14 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
15 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
16 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
17 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
18 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
19 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
20 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
21 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
22 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
23 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
24 * POSSIBILITY OF SUCH DAMAGE.
28 * Parse CSV object serialization format (RFC-4180, RFC-7111)
35 FILE_RCSID("@(#)$File: is_csv.c,v 1.6 2020/08/09 16:43:36 christos Exp $")
41 #include <sys/types.h>
47 #define DPRINTF(fmt, ...) printf(fmt, __VA_ARGS__)
49 #define DPRINTF(fmt, ...)
54 * check all the lines in the buffer
56 * check only up-to the number of lines specified
58 * the last line count is always ignored if it does not end in CRLF
64 static int csv_parse(const unsigned char *, const unsigned char *);
66 static const unsigned char *
67 eatquote(const unsigned char *uc, const unsigned char *ue)
72 unsigned char c = *uc++;
74 // We already got one, done.
81 // quote-quote escapes
92 csv_parse(const unsigned char *uc, const unsigned char *ue)
94 size_t nf = 0, tf = 0, nl = 0;
99 // Eat until the matching quote
100 uc = eatquote(uc, ue);
106 DPRINTF("%zu %zu %zu\n", nl, nf, tf);
110 return tf != 0 && tf == nf;
113 // First time and no fields, give up
116 // First time, set the number of fields
118 } else if (tf != nf) {
119 // Field number mismatch, we are done.
133 file_is_csv(struct magic_set *ms, const struct buffer *b, int looks_text)
135 const unsigned char *uc = CAST(const unsigned char *, b->fbuf);
136 const unsigned char *ue = uc + b->flen;
137 int mime = ms->flags & MAGIC_MIME;
142 if ((ms->flags & (MAGIC_APPLE|MAGIC_EXTENSION)) != 0)
145 if (!csv_parse(uc, ue))
148 if (mime == MAGIC_MIME_ENCODING)
152 if (file_printf(ms, "text/csv") == -1)
157 if (file_printf(ms, "CSV text") == -1)
165 #include <sys/types.h>
166 #include <sys/stat.h>
175 main(int argc, char *argv[])
181 if ((fd = open(argv[1], O_RDONLY)) == -1)
182 err(EXIT_FAILURE, "Can't open `%s'", argv[1]);
184 if (fstat(fd, &st) == -1)
185 err(EXIT_FAILURE, "Can't stat `%s'", argv[1]);
187 if ((p = malloc(st.st_size)) == NULL)
188 err(EXIT_FAILURE, "Can't allocate %jd bytes",
189 (intmax_t)st.st_size);
190 if (read(fd, p, st.st_size) != st.st_size)
191 err(EXIT_FAILURE, "Can't read %jd bytes",
192 (intmax_t)st.st_size);
193 printf("is csv %d\n", csv_parse(p, p + st.st_size));