1 /* $NetBSD: uniq.c,v 1.4 2008/04/28 20:24:17 martin Exp $ */
4 * Copyright (c) 2007 The NetBSD Foundation, Inc.
7 * This code is derived from software contributed to The NetBSD Foundation
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29 * POSSIBILITY OF SUCH DAMAGE.
31 #include <sys/cdefs.h>
32 __FBSDID("$FreeBSD$");
43 extern const HASHINFO hinfo;
45 void uniq(const char *);
46 static int comp(const char *, char **, size_t *);
49 * Preserve only unique content lines in a file. Input lines that have
50 * content [alphanumeric characters before a comment] are white-space
51 * normalized and have their comments removed. Then they are placed
52 * in a hash table, and only the first instance of them is printed.
53 * Comment lines without any alphanumeric content are always printed
54 * since they are there to make the file "pretty". Comment lines with
55 * alphanumeric content are also placed into the hash table and only
59 uniq(const char *fname)
63 static const DBT data = { NULL, 0 };
68 if ((db = dbopen(NULL, O_RDWR, 0, DB_HASH, &hinfo)) == NULL)
69 err(1, "Cannot create in memory database");
71 if ((fp = fopen(fname, "r")) == NULL)
72 err(1, "Cannot open `%s'", fname);
73 while ((line = fgetln(fp, &len)) != NULL) {
76 if (!comp(line, &compline, &complen)) {
77 (void)fprintf(stdout, "%*.*s", (int)len, (int)len,
83 switch ((db->put)(db, &key, &data, R_NOOVERWRITE)) {
85 (void)fprintf(stdout, "%*.*s", (int)len, (int)len,
102 * normalize whitespace in the original line and place a new string
103 * with whitespace converted to a single space in compline. If the line
104 * contains just comments, we preserve them. If it contains data and
105 * comments, we kill the comments. Return 1 if the line had actual
106 * contents, or 0 if it was just a comment without alphanumeric characters.
109 comp(const char *origline, char **compline, size_t *len)
111 const unsigned char *p;
114 size_t l = *len, complen;
115 int hasalnum, iscomment;
117 /* Eat leading space */
118 for (p = (const unsigned char *)origline; l && *p && isspace(*p);
121 if ((cline = malloc(l + 1)) == NULL)
122 err(1, "Cannot allocate %zu bytes", l + 1);
123 (void)memcpy(cline, p, l);
132 for (q = (unsigned char *)cline; l && *p; p++, l--) {
134 if (complen && isspace(q[-1]))
139 if (!iscomment && *p == '#') {
144 hasalnum |= isalnum(*p);
150 /* Eat trailing space */
151 while (complen && isspace(q[-1])) {