1 /* $NetBSD: uniq.c,v 1.4 2008/04/28 20:24:17 martin Exp $ */
4 * SPDX-License-Identifier: BSD-2-Clause-NetBSD
6 * Copyright (c) 2007 The NetBSD Foundation, Inc.
9 * This code is derived from software contributed to The NetBSD Foundation
12 * Redistribution and use in source and binary forms, with or without
13 * modification, are permitted provided that the following conditions
15 * 1. Redistributions of source code must retain the above copyright
16 * notice, this list of conditions and the following disclaimer.
17 * 2. Redistributions in binary form must reproduce the above copyright
18 * notice, this list of conditions and the following disclaimer in the
19 * documentation and/or other materials provided with the distribution.
21 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
22 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
23 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
24 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
25 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
26 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
27 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
28 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
29 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
30 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
31 * POSSIBILITY OF SUCH DAMAGE.
33 #include <sys/cdefs.h>
34 __FBSDID("$FreeBSD$");
47 static int comp(const char *, char **, size_t *);
50 * Preserve only unique content lines in a file. Input lines that have
51 * content [alphanumeric characters before a comment] are white-space
52 * normalized and have their comments removed. Then they are placed
53 * in a hash table, and only the first instance of them is printed.
54 * Comment lines without any alphanumeric content are always printed
55 * since they are there to make the file "pretty". Comment lines with
56 * alphanumeric content are also placed into the hash table and only
60 uniq(const char *fname)
64 static const DBT data = { NULL, 0 };
69 if ((db = dbopen(NULL, O_RDWR, 0, DB_HASH, &hinfo)) == NULL)
70 err(1, "Cannot create in memory database");
72 if ((fp = fopen(fname, "r")) == NULL)
73 err(1, "Cannot open `%s'", fname);
74 while ((line = fgetln(fp, &len)) != NULL) {
77 if (!comp(line, &compline, &complen)) {
78 (void)fprintf(stdout, "%*.*s", (int)len, (int)len,
84 switch ((db->put)(db, &key, &data, R_NOOVERWRITE)) {
86 (void)fprintf(stdout, "%*.*s", (int)len, (int)len,
103 * normalize whitespace in the original line and place a new string
104 * with whitespace converted to a single space in compline. If the line
105 * contains just comments, we preserve them. If it contains data and
106 * comments, we kill the comments. Return 1 if the line had actual
107 * contents, or 0 if it was just a comment without alphanumeric characters.
110 comp(const char *origline, char **compline, size_t *len)
112 const unsigned char *p;
115 size_t l = *len, complen;
116 int hasalnum, iscomment;
118 /* Eat leading space */
119 for (p = (const unsigned char *)origline; l && *p && isspace(*p);
122 if ((cline = malloc(l + 1)) == NULL)
123 err(1, "Cannot allocate %zu bytes", l + 1);
124 (void)memcpy(cline, p, l);
133 for (q = (unsigned char *)cline; l && *p; p++, l--) {
135 if (complen && isspace(q[-1]))
140 if (!iscomment && *p == '#') {
145 hasalnum |= isalnum(*p);
151 /* Eat trailing space */
152 while (complen && isspace(q[-1])) {