]> CyberLeo.Net >> Repos - FreeBSD/releng/10.0.git/blob - usr.bin/cut/cut.c
- Copy stable/10 (r259064) to releng/10.0 as part of the
[FreeBSD/releng/10.0.git] / usr.bin / cut / cut.c
1 /*
2  * Copyright (c) 1989, 1993
3  *      The Regents of the University of California.  All rights reserved.
4  *
5  * This code is derived from software contributed to Berkeley by
6  * Adam S. Moskowitz of Menlo Consulting and Marciano Pitargue.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  * 4. Neither the name of the University nor the names of its contributors
17  *    may be used to endorse or promote products derived from this software
18  *    without specific prior written permission.
19  *
20  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
21  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
24  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30  * SUCH DAMAGE.
31  */
32
33 #ifndef lint
34 static const char copyright[] =
35 "@(#) Copyright (c) 1989, 1993\n\
36         The Regents of the University of California.  All rights reserved.\n";
37 static const char sccsid[] = "@(#)cut.c 8.3 (Berkeley) 5/4/95";
38 #endif /* not lint */
39 #include <sys/cdefs.h>
40 __FBSDID("$FreeBSD$");
41
42 #include <ctype.h>
43 #include <err.h>
44 #include <errno.h>
45 #include <limits.h>
46 #include <locale.h>
47 #include <stdio.h>
48 #include <stdlib.h>
49 #include <string.h>
50 #include <unistd.h>
51 #include <wchar.h>
52
53 static int      bflag;
54 static int      cflag;
55 static wchar_t  dchar;
56 static char     dcharmb[MB_LEN_MAX + 1];
57 static int      dflag;
58 static int      fflag;
59 static int      nflag;
60 static int      sflag;
61 static int      wflag;
62
63 static size_t   autostart, autostop, maxval;
64 static char *   positions;
65
66 static int      b_cut(FILE *, const char *);
67 static int      b_n_cut(FILE *, const char *);
68 static int      c_cut(FILE *, const char *);
69 static int      f_cut(FILE *, const char *);
70 static void     get_list(char *);
71 static int      is_delim(wchar_t);
72 static void     needpos(size_t);
73 static void     usage(void);
74
75 int
76 main(int argc, char *argv[])
77 {
78         FILE *fp;
79         int (*fcn)(FILE *, const char *);
80         int ch, rval;
81         size_t n;
82
83         setlocale(LC_ALL, "");
84
85         fcn = NULL;
86         dchar = '\t';                   /* default delimiter is \t */
87         strcpy(dcharmb, "\t");
88
89         while ((ch = getopt(argc, argv, "b:c:d:f:snw")) != -1)
90                 switch(ch) {
91                 case 'b':
92                         get_list(optarg);
93                         bflag = 1;
94                         break;
95                 case 'c':
96                         get_list(optarg);
97                         cflag = 1;
98                         break;
99                 case 'd':
100                         n = mbrtowc(&dchar, optarg, MB_LEN_MAX, NULL);
101                         if (dchar == '\0' || n != strlen(optarg))
102                                 errx(1, "bad delimiter");
103                         strcpy(dcharmb, optarg);
104                         dflag = 1;
105                         break;
106                 case 'f':
107                         get_list(optarg);
108                         fflag = 1;
109                         break;
110                 case 's':
111                         sflag = 1;
112                         break;
113                 case 'n':
114                         nflag = 1;
115                         break;
116                 case 'w':
117                         wflag = 1;
118                         break;
119                 case '?':
120                 default:
121                         usage();
122                 }
123         argc -= optind;
124         argv += optind;
125
126         if (fflag) {
127                 if (bflag || cflag || nflag || (wflag && dflag))
128                         usage();
129         } else if (!(bflag || cflag) || dflag || sflag || wflag)
130                 usage();
131         else if (!bflag && nflag)
132                 usage();
133
134         if (fflag)
135                 fcn = f_cut;
136         else if (cflag)
137                 fcn = MB_CUR_MAX > 1 ? c_cut : b_cut;
138         else if (bflag)
139                 fcn = nflag && MB_CUR_MAX > 1 ? b_n_cut : b_cut;
140
141         rval = 0;
142         if (*argv)
143                 for (; *argv; ++argv) {
144                         if (strcmp(*argv, "-") == 0)
145                                 rval |= fcn(stdin, "stdin");
146                         else {
147                                 if (!(fp = fopen(*argv, "r"))) {
148                                         warn("%s", *argv);
149                                         rval = 1;
150                                         continue;
151                                 }
152                                 fcn(fp, *argv);
153                                 (void)fclose(fp);
154                         }
155                 }
156         else
157                 rval = fcn(stdin, "stdin");
158         exit(rval);
159 }
160
161 static void
162 get_list(char *list)
163 {
164         size_t setautostart, start, stop;
165         char *pos;
166         char *p;
167
168         /*
169          * set a byte in the positions array to indicate if a field or
170          * column is to be selected; use +1, it's 1-based, not 0-based.
171          * Numbers and number ranges may be overlapping, repeated, and in
172          * any order. We handle "-3-5" although there's no real reason to.
173          */
174         for (; (p = strsep(&list, ", \t")) != NULL;) {
175                 setautostart = start = stop = 0;
176                 if (*p == '-') {
177                         ++p;
178                         setautostart = 1;
179                 }
180                 if (isdigit((unsigned char)*p)) {
181                         start = stop = strtol(p, &p, 10);
182                         if (setautostart && start > autostart)
183                                 autostart = start;
184                 }
185                 if (*p == '-') {
186                         if (isdigit((unsigned char)p[1]))
187                                 stop = strtol(p + 1, &p, 10);
188                         if (*p == '-') {
189                                 ++p;
190                                 if (!autostop || autostop > stop)
191                                         autostop = stop;
192                         }
193                 }
194                 if (*p)
195                         errx(1, "[-bcf] list: illegal list value");
196                 if (!stop || !start)
197                         errx(1, "[-bcf] list: values may not include zero");
198                 if (maxval < stop) {
199                         maxval = stop;
200                         needpos(maxval + 1);
201                 }
202                 for (pos = positions + start; start++ <= stop; *pos++ = 1);
203         }
204
205         /* overlapping ranges */
206         if (autostop && maxval > autostop) {
207                 maxval = autostop;
208                 needpos(maxval + 1);
209         }
210
211         /* set autostart */
212         if (autostart)
213                 memset(positions + 1, '1', autostart);
214 }
215
216 static void
217 needpos(size_t n)
218 {
219         static size_t npos;
220         size_t oldnpos;
221
222         /* Grow the positions array to at least the specified size. */
223         if (n > npos) {
224                 oldnpos = npos;
225                 if (npos == 0)
226                         npos = n;
227                 while (n > npos)
228                         npos *= 2;
229                 if ((positions = realloc(positions, npos)) == NULL)
230                         err(1, "realloc");
231                 memset((char *)positions + oldnpos, 0, npos - oldnpos);
232         }
233 }
234
235 static int
236 b_cut(FILE *fp, const char *fname __unused)
237 {
238         int ch, col;
239         char *pos;
240
241         ch = 0;
242         for (;;) {
243                 pos = positions + 1;
244                 for (col = maxval; col; --col) {
245                         if ((ch = getc(fp)) == EOF)
246                                 return (0);
247                         if (ch == '\n')
248                                 break;
249                         if (*pos++)
250                                 (void)putchar(ch);
251                 }
252                 if (ch != '\n') {
253                         if (autostop)
254                                 while ((ch = getc(fp)) != EOF && ch != '\n')
255                                         (void)putchar(ch);
256                         else
257                                 while ((ch = getc(fp)) != EOF && ch != '\n');
258                 }
259                 (void)putchar('\n');
260         }
261         return (0);
262 }
263
264 /*
265  * Cut based on byte positions, taking care not to split multibyte characters.
266  * Although this function also handles the case where -n is not specified,
267  * b_cut() ought to be much faster.
268  */
269 static int
270 b_n_cut(FILE *fp, const char *fname)
271 {
272         size_t col, i, lbuflen;
273         char *lbuf;
274         int canwrite, clen, warned;
275         mbstate_t mbs;
276
277         memset(&mbs, 0, sizeof(mbs));
278         warned = 0;
279         while ((lbuf = fgetln(fp, &lbuflen)) != NULL) {
280                 for (col = 0; lbuflen > 0; col += clen) {
281                         if ((clen = mbrlen(lbuf, lbuflen, &mbs)) < 0) {
282                                 if (!warned) {
283                                         warn("%s", fname);
284                                         warned = 1;
285                                 }
286                                 memset(&mbs, 0, sizeof(mbs));
287                                 clen = 1;
288                         }
289                         if (clen == 0 || *lbuf == '\n')
290                                 break;
291                         if (col < maxval && !positions[1 + col]) {
292                                 /*
293                                  * Print the character if (1) after an initial
294                                  * segment of un-selected bytes, the rest of
295                                  * it is selected, and (2) the last byte is
296                                  * selected.
297                                  */
298                                 i = col;
299                                 while (i < col + clen && i < maxval &&
300                                     !positions[1 + i])
301                                         i++;
302                                 canwrite = i < col + clen;
303                                 for (; i < col + clen && i < maxval; i++)
304                                         canwrite &= positions[1 + i];
305                                 if (canwrite)
306                                         fwrite(lbuf, 1, clen, stdout);
307                         } else {
308                                 /*
309                                  * Print the character if all of it has
310                                  * been selected.
311                                  */
312                                 canwrite = 1;
313                                 for (i = col; i < col + clen; i++)
314                                         if ((i >= maxval && !autostop) ||
315                                             (i < maxval && !positions[1 + i])) {
316                                                 canwrite = 0;
317                                                 break;
318                                         }
319                                 if (canwrite)
320                                         fwrite(lbuf, 1, clen, stdout);
321                         }
322                         lbuf += clen;
323                         lbuflen -= clen;
324                 }
325                 if (lbuflen > 0)
326                         putchar('\n');
327         }
328         return (warned);
329 }
330
331 static int
332 c_cut(FILE *fp, const char *fname)
333 {
334         wint_t ch;
335         int col;
336         char *pos;
337
338         ch = 0;
339         for (;;) {
340                 pos = positions + 1;
341                 for (col = maxval; col; --col) {
342                         if ((ch = getwc(fp)) == WEOF)
343                                 goto out;
344                         if (ch == '\n')
345                                 break;
346                         if (*pos++)
347                                 (void)putwchar(ch);
348                 }
349                 if (ch != '\n') {
350                         if (autostop)
351                                 while ((ch = getwc(fp)) != WEOF && ch != '\n')
352                                         (void)putwchar(ch);
353                         else
354                                 while ((ch = getwc(fp)) != WEOF && ch != '\n');
355                 }
356                 (void)putwchar('\n');
357         }
358 out:
359         if (ferror(fp)) {
360                 warn("%s", fname);
361                 return (1);
362         }
363         return (0);
364 }
365
366 static int
367 is_delim(wchar_t ch)
368 {
369         if (wflag) {
370                 if (ch == ' ' || ch == '\t')
371                         return 1;
372         } else {
373                 if (ch == dchar)
374                         return 1;
375         }
376         return 0;
377 }
378
379 static int
380 f_cut(FILE *fp, const char *fname)
381 {
382         wchar_t ch;
383         int field, i, isdelim;
384         char *pos, *p;
385         int output;
386         char *lbuf, *mlbuf;
387         size_t clen, lbuflen, reallen;
388
389         mlbuf = NULL;
390         while ((lbuf = fgetln(fp, &lbuflen)) != NULL) {
391                 reallen = lbuflen;
392                 /* Assert EOL has a newline. */
393                 if (*(lbuf + lbuflen - 1) != '\n') {
394                         /* Can't have > 1 line with no trailing newline. */
395                         mlbuf = malloc(lbuflen + 1);
396                         if (mlbuf == NULL)
397                                 err(1, "malloc");
398                         memcpy(mlbuf, lbuf, lbuflen);
399                         *(mlbuf + lbuflen) = '\n';
400                         lbuf = mlbuf;
401                         reallen++;
402                 }
403                 output = 0;
404                 for (isdelim = 0, p = lbuf;; p += clen) {
405                         clen = mbrtowc(&ch, p, lbuf + reallen - p, NULL);
406                         if (clen == (size_t)-1 || clen == (size_t)-2) {
407                                 warnc(EILSEQ, "%s", fname);
408                                 free(mlbuf);
409                                 return (1);
410                         }
411                         if (clen == 0)
412                                 clen = 1;
413                         /* this should work if newline is delimiter */
414                         if (is_delim(ch))
415                                 isdelim = 1;
416                         if (ch == '\n') {
417                                 if (!isdelim && !sflag)
418                                         (void)fwrite(lbuf, lbuflen, 1, stdout);
419                                 break;
420                         }
421                 }
422                 if (!isdelim)
423                         continue;
424
425                 pos = positions + 1;
426                 for (field = maxval, p = lbuf; field; --field, ++pos) {
427                         if (*pos && output++)
428                                 for (i = 0; dcharmb[i] != '\0'; i++)
429                                         putchar(dcharmb[i]);
430                         for (;;) {
431                                 clen = mbrtowc(&ch, p, lbuf + reallen - p,
432                                     NULL);
433                                 if (clen == (size_t)-1 || clen == (size_t)-2) {
434                                         warnc(EILSEQ, "%s", fname);
435                                         free(mlbuf);
436                                         return (1);
437                                 }
438                                 if (clen == 0)
439                                         clen = 1;
440                                 p += clen;
441                                 if (ch == '\n' || is_delim(ch)) {
442                                         /* compress whitespace */
443                                         if (wflag && ch != '\n')
444                                                 while (is_delim(*p))
445                                                         p++;
446                                         break;
447                                 }
448                                 if (*pos)
449                                         for (i = 0; i < (int)clen; i++)
450                                                 putchar(p[i - clen]);
451                         }
452                         if (ch == '\n')
453                                 break;
454                 }
455                 if (ch != '\n') {
456                         if (autostop) {
457                                 if (output)
458                                         for (i = 0; dcharmb[i] != '\0'; i++)
459                                                 putchar(dcharmb[i]);
460                                 for (; (ch = *p) != '\n'; ++p)
461                                         (void)putchar(ch);
462                         } else
463                                 for (; (ch = *p) != '\n'; ++p);
464                 }
465                 (void)putchar('\n');
466         }
467         free(mlbuf);
468         return (0);
469 }
470
471 static void
472 usage(void)
473 {
474         (void)fprintf(stderr, "%s\n%s\n%s\n",
475                 "usage: cut -b list [-n] [file ...]",
476                 "       cut -c list [file ...]",
477                 "       cut -f list [-s] [-w | -d delim] [file ...]");
478         exit(1);
479 }