]> CyberLeo.Net >> Repos - FreeBSD/releng/10.3.git/blob - lib/libc/regex/grot/split.c
- Copy stable/10@296371 to releng/10.3 in preparation for 10.3-RC1
[FreeBSD/releng/10.3.git] / lib / libc / regex / grot / split.c
1 #include <sys/cdefs.h>
2 __FBSDID("$FreeBSD$");
3
4 #include <stdio.h>
5 #include <string.h>
6
7 #include "split.ih"
8
9 /*
10  - split - divide a string into fields, like awk split()
11  == int split(char *string, char *fields[], int nfields, char *sep);
12  - fields: list is not NULL-terminated
13  - nfields: number of entries available in fields[]
14  - sep: "" white, "c" single char, "ab" [ab]+
15  */
16 int                             /* number of fields, including overflow */
17 split(char *string, char *fields[], int nfields, char *sep)
18 {
19         char *p = string;
20         char c;                 /* latest character */
21         char sepc = sep[0];
22         char sepc2;
23         int fn;
24         char **fp = fields;
25         char *sepp;
26         int trimtrail;
27
28         /* white space */
29         if (sepc == '\0') {
30                 while ((c = *p++) == ' ' || c == '\t')
31                         continue;
32                 p--;
33                 trimtrail = 1;
34                 sep = " \t";    /* note, code below knows this is 2 long */
35                 sepc = ' ';
36         } else
37                 trimtrail = 0;
38         sepc2 = sep[1];         /* now we can safely pick this up */
39
40         /* catch empties */
41         if (*p == '\0')
42                 return(0);
43
44         /* single separator */
45         if (sepc2 == '\0') {
46                 fn = nfields;
47                 for (;;) {
48                         *fp++ = p;
49                         fn--;
50                         if (fn == 0)
51                                 break;
52                         while ((c = *p++) != sepc)
53                                 if (c == '\0')
54                                         return(nfields - fn);
55                         *(p-1) = '\0';
56                 }
57                 /* we have overflowed the fields vector -- just count them */
58                 fn = nfields;
59                 for (;;) {
60                         while ((c = *p++) != sepc)
61                                 if (c == '\0')
62                                         return(fn);
63                         fn++;
64                 }
65                 /* not reached */
66         }
67
68         /* two separators */
69         if (sep[2] == '\0') {
70                 fn = nfields;
71                 for (;;) {
72                         *fp++ = p;
73                         fn--;
74                         while ((c = *p++) != sepc && c != sepc2)
75                                 if (c == '\0') {
76                                         if (trimtrail && **(fp-1) == '\0')
77                                                 fn++;
78                                         return(nfields - fn);
79                                 }
80                         if (fn == 0)
81                                 break;
82                         *(p-1) = '\0';
83                         while ((c = *p++) == sepc || c == sepc2)
84                                 continue;
85                         p--;
86                 }
87                 /* we have overflowed the fields vector -- just count them */
88                 fn = nfields;
89                 while (c != '\0') {
90                         while ((c = *p++) == sepc || c == sepc2)
91                                 continue;
92                         p--;
93                         fn++;
94                         while ((c = *p++) != '\0' && c != sepc && c != sepc2)
95                                 continue;
96                 }
97                 /* might have to trim trailing white space */
98                 if (trimtrail) {
99                         p--;
100                         while ((c = *--p) == sepc || c == sepc2)
101                                 continue;
102                         p++;
103                         if (*p != '\0') {
104                                 if (fn == nfields+1)
105                                         *p = '\0';
106                                 fn--;
107                         }
108                 }
109                 return(fn);
110         }
111
112         /* n separators */
113         fn = 0;
114         for (;;) {
115                 if (fn < nfields)
116                         *fp++ = p;
117                 fn++;
118                 for (;;) {
119                         c = *p++;
120                         if (c == '\0')
121                                 return(fn);
122                         sepp = sep;
123                         while ((sepc = *sepp++) != '\0' && sepc != c)
124                                 continue;
125                         if (sepc != '\0')       /* it was a separator */
126                                 break;
127                 }
128                 if (fn < nfields)
129                         *(p-1) = '\0';
130                 for (;;) {
131                         c = *p++;
132                         sepp = sep;
133                         while ((sepc = *sepp++) != '\0' && sepc != c)
134                                 continue;
135                         if (sepc == '\0')       /* it wasn't a separator */
136                                 break;
137                 }
138                 p--;
139         }
140
141         /* not reached */
142 }
143
144 #ifdef TEST_SPLIT
145
146
147 /*
148  * test program
149  * pgm          runs regression
150  * pgm sep      splits stdin lines by sep
151  * pgm str sep  splits str by sep
152  * pgm str sep n        splits str by sep n times
153  */
154 int
155 main(int argc, char *argv[])
156 {
157         char buf[512];
158         int n;
159 #       define  MNF     10
160         char *fields[MNF];
161
162         if (argc > 4)
163                 for (n = atoi(argv[3]); n > 0; n--) {
164                         (void) strcpy(buf, argv[1]);
165                 }
166         else if (argc > 3)
167                 for (n = atoi(argv[3]); n > 0; n--) {
168                         (void) strcpy(buf, argv[1]);
169                         (void) split(buf, fields, MNF, argv[2]);
170                 }
171         else if (argc > 2)
172                 dosplit(argv[1], argv[2]);
173         else if (argc > 1)
174                 while (fgets(buf, sizeof(buf), stdin) != NULL) {
175                         buf[strlen(buf)-1] = '\0';      /* stomp newline */
176                         dosplit(buf, argv[1]);
177                 }
178         else
179                 regress();
180
181         exit(0);
182 }
183
184 void
185 dosplit(char *string, char *seps)
186 {
187 #       define  NF      5
188         char *fields[NF];
189         int nf;
190
191         nf = split(string, fields, NF, seps);
192         print(nf, NF, fields);
193 }
194
195 void
196 print(int nf, int nfp, char *fields[])
197 {
198         int fn;
199         int bound;
200
201         bound = (nf > nfp) ? nfp : nf;
202         printf("%d:\t", nf);
203         for (fn = 0; fn < bound; fn++)
204                 printf("\"%s\"%s", fields[fn], (fn+1 < nf) ? ", " : "\n");
205 }
206
207 #define RNF     5               /* some table entries know this */
208 struct {
209         char *str;
210         char *seps;
211         int nf;
212         char *fi[RNF];
213 } tests[] = {
214         "",             " ",    0,      { "" },
215         " ",            " ",    2,      { "", "" },
216         "x",            " ",    1,      { "x" },
217         "xy",           " ",    1,      { "xy" },
218         "x y",          " ",    2,      { "x", "y" },
219         "abc def  g ",  " ",    5,      { "abc", "def", "", "g", "" },
220         "  a bcd",      " ",    4,      { "", "", "a", "bcd" },
221         "a b c d e f",  " ",    6,      { "a", "b", "c", "d", "e f" },
222         " a b c d ",    " ",    6,      { "", "a", "b", "c", "d " },
223
224         "",             " _",   0,      { "" },
225         " ",            " _",   2,      { "", "" },
226         "x",            " _",   1,      { "x" },
227         "x y",          " _",   2,      { "x", "y" },
228         "ab _ cd",      " _",   2,      { "ab", "cd" },
229         " a_b  c ",     " _",   5,      { "", "a", "b", "c", "" },
230         "a b c_d e f",  " _",   6,      { "a", "b", "c", "d", "e f" },
231         " a b c d ",    " _",   6,      { "", "a", "b", "c", "d " },
232
233         "",             " _~",  0,      { "" },
234         " ",            " _~",  2,      { "", "" },
235         "x",            " _~",  1,      { "x" },
236         "x y",          " _~",  2,      { "x", "y" },
237         "ab _~ cd",     " _~",  2,      { "ab", "cd" },
238         " a_b  c~",     " _~",  5,      { "", "a", "b", "c", "" },
239         "a b_c d~e f",  " _~",  6,      { "a", "b", "c", "d", "e f" },
240         "~a b c d ",    " _~",  6,      { "", "a", "b", "c", "d " },
241
242         "",             " _~-", 0,      { "" },
243         " ",            " _~-", 2,      { "", "" },
244         "x",            " _~-", 1,      { "x" },
245         "x y",          " _~-", 2,      { "x", "y" },
246         "ab _~- cd",    " _~-", 2,      { "ab", "cd" },
247         " a_b  c~",     " _~-", 5,      { "", "a", "b", "c", "" },
248         "a b_c-d~e f",  " _~-", 6,      { "a", "b", "c", "d", "e f" },
249         "~a-b c d ",    " _~-", 6,      { "", "a", "b", "c", "d " },
250
251         "",             "  ",   0,      { "" },
252         " ",            "  ",   2,      { "", "" },
253         "x",            "  ",   1,      { "x" },
254         "xy",           "  ",   1,      { "xy" },
255         "x y",          "  ",   2,      { "x", "y" },
256         "abc def  g ",  "  ",   4,      { "abc", "def", "g", "" },
257         "  a bcd",      "  ",   3,      { "", "a", "bcd" },
258         "a b c d e f",  "  ",   6,      { "a", "b", "c", "d", "e f" },
259         " a b c d ",    "  ",   6,      { "", "a", "b", "c", "d " },
260
261         "",             "",     0,      { "" },
262         " ",            "",     0,      { "" },
263         "x",            "",     1,      { "x" },
264         "xy",           "",     1,      { "xy" },
265         "x y",          "",     2,      { "x", "y" },
266         "abc def  g ",  "",     3,      { "abc", "def", "g" },
267         "\t a bcd",     "",     2,      { "a", "bcd" },
268         "  a \tb\t c ", "",     3,      { "a", "b", "c" },
269         "a b c d e ",   "",     5,      { "a", "b", "c", "d", "e" },
270         "a b\tc d e f", "",     6,      { "a", "b", "c", "d", "e f" },
271         " a b c d e f ",        "",     6,      { "a", "b", "c", "d", "e f " },
272
273         NULL,           NULL,   0,      { NULL },
274 };
275
276 void
277 regress(void)
278 {
279         char buf[512];
280         int n;
281         char *fields[RNF+1];
282         int nf;
283         int i;
284         int printit;
285         char *f;
286
287         for (n = 0; tests[n].str != NULL; n++) {
288                 (void) strcpy(buf, tests[n].str);
289                 fields[RNF] = NULL;
290                 nf = split(buf, fields, RNF, tests[n].seps);
291                 printit = 0;
292                 if (nf != tests[n].nf) {
293                         printf("split `%s' by `%s' gave %d fields, not %d\n",
294                                 tests[n].str, tests[n].seps, nf, tests[n].nf);
295                         printit = 1;
296                 } else if (fields[RNF] != NULL) {
297                         printf("split() went beyond array end\n");
298                         printit = 1;
299                 } else {
300                         for (i = 0; i < nf && i < RNF; i++) {
301                                 f = fields[i];
302                                 if (f == NULL)
303                                         f = "(NULL)";
304                                 if (strcmp(f, tests[n].fi[i]) != 0) {
305                                         printf("split `%s' by `%s' field %d is `%s', not `%s'\n",
306                                                 tests[n].str, tests[n].seps,
307                                                 i, fields[i], tests[n].fi[i]);
308                                         printit = 1;
309                                 }
310                         }
311                 }
312                 if (printit)
313                         print(nf, RNF, fields);
314         }
315 }
316 #endif