]> CyberLeo.Net >> Repos - FreeBSD/releng/10.0.git/blob - lib/libc/regex/grot/split.c
- Copy stable/10 (r259064) to releng/10.0 as part of the
[FreeBSD/releng/10.0.git] / lib / libc / regex / grot / split.c
1 #include <sys/cdefs.h>
2 __FBSDID("$FreeBSD$");
3
4 #include <stdio.h>
5 #include <string.h>
6
7 /*
8  - split - divide a string into fields, like awk split()
9  = int split(char *string, char *fields[], int nfields, char *sep);
10  */
11 int                             /* number of fields, including overflow */
12 split(string, fields, nfields, sep)
13 char *string;
14 char *fields[];                 /* list is not NULL-terminated */
15 int nfields;                    /* number of entries available in fields[] */
16 char *sep;                      /* "" white, "c" single char, "ab" [ab]+ */
17 {
18         char *p = string;
19         char c;                 /* latest character */
20         char sepc = sep[0];
21         char sepc2;
22         int fn;
23         char **fp = fields;
24         char *sepp;
25         int trimtrail;
26
27         /* white space */
28         if (sepc == '\0') {
29                 while ((c = *p++) == ' ' || c == '\t')
30                         continue;
31                 p--;
32                 trimtrail = 1;
33                 sep = " \t";    /* note, code below knows this is 2 long */
34                 sepc = ' ';
35         } else
36                 trimtrail = 0;
37         sepc2 = sep[1];         /* now we can safely pick this up */
38
39         /* catch empties */
40         if (*p == '\0')
41                 return(0);
42
43         /* single separator */
44         if (sepc2 == '\0') {
45                 fn = nfields;
46                 for (;;) {
47                         *fp++ = p;
48                         fn--;
49                         if (fn == 0)
50                                 break;
51                         while ((c = *p++) != sepc)
52                                 if (c == '\0')
53                                         return(nfields - fn);
54                         *(p-1) = '\0';
55                 }
56                 /* we have overflowed the fields vector -- just count them */
57                 fn = nfields;
58                 for (;;) {
59                         while ((c = *p++) != sepc)
60                                 if (c == '\0')
61                                         return(fn);
62                         fn++;
63                 }
64                 /* not reached */
65         }
66
67         /* two separators */
68         if (sep[2] == '\0') {
69                 fn = nfields;
70                 for (;;) {
71                         *fp++ = p;
72                         fn--;
73                         while ((c = *p++) != sepc && c != sepc2)
74                                 if (c == '\0') {
75                                         if (trimtrail && **(fp-1) == '\0')
76                                                 fn++;
77                                         return(nfields - fn);
78                                 }
79                         if (fn == 0)
80                                 break;
81                         *(p-1) = '\0';
82                         while ((c = *p++) == sepc || c == sepc2)
83                                 continue;
84                         p--;
85                 }
86                 /* we have overflowed the fields vector -- just count them */
87                 fn = nfields;
88                 while (c != '\0') {
89                         while ((c = *p++) == sepc || c == sepc2)
90                                 continue;
91                         p--;
92                         fn++;
93                         while ((c = *p++) != '\0' && c != sepc && c != sepc2)
94                                 continue;
95                 }
96                 /* might have to trim trailing white space */
97                 if (trimtrail) {
98                         p--;
99                         while ((c = *--p) == sepc || c == sepc2)
100                                 continue;
101                         p++;
102                         if (*p != '\0') {
103                                 if (fn == nfields+1)
104                                         *p = '\0';
105                                 fn--;
106                         }
107                 }
108                 return(fn);
109         }
110
111         /* n separators */
112         fn = 0;
113         for (;;) {
114                 if (fn < nfields)
115                         *fp++ = p;
116                 fn++;
117                 for (;;) {
118                         c = *p++;
119                         if (c == '\0')
120                                 return(fn);
121                         sepp = sep;
122                         while ((sepc = *sepp++) != '\0' && sepc != c)
123                                 continue;
124                         if (sepc != '\0')       /* it was a separator */
125                                 break;
126                 }
127                 if (fn < nfields)
128                         *(p-1) = '\0';
129                 for (;;) {
130                         c = *p++;
131                         sepp = sep;
132                         while ((sepc = *sepp++) != '\0' && sepc != c)
133                                 continue;
134                         if (sepc == '\0')       /* it wasn't a separator */
135                                 break;
136                 }
137                 p--;
138         }
139
140         /* not reached */
141 }
142
143 #ifdef TEST_SPLIT
144
145
146 /*
147  * test program
148  * pgm          runs regression
149  * pgm sep      splits stdin lines by sep
150  * pgm str sep  splits str by sep
151  * pgm str sep n        splits str by sep n times
152  */
153 int
154 main(argc, argv)
155 int argc;
156 char *argv[];
157 {
158         char buf[512];
159         int n;
160 #       define  MNF     10
161         char *fields[MNF];
162
163         if (argc > 4)
164                 for (n = atoi(argv[3]); n > 0; n--) {
165                         (void) strcpy(buf, argv[1]);
166                 }
167         else if (argc > 3)
168                 for (n = atoi(argv[3]); n > 0; n--) {
169                         (void) strcpy(buf, argv[1]);
170                         (void) split(buf, fields, MNF, argv[2]);
171                 }
172         else if (argc > 2)
173                 dosplit(argv[1], argv[2]);
174         else if (argc > 1)
175                 while (fgets(buf, sizeof(buf), stdin) != NULL) {
176                         buf[strlen(buf)-1] = '\0';      /* stomp newline */
177                         dosplit(buf, argv[1]);
178                 }
179         else
180                 regress();
181
182         exit(0);
183 }
184
185 dosplit(string, seps)
186 char *string;
187 char *seps;
188 {
189 #       define  NF      5
190         char *fields[NF];
191         int nf;
192
193         nf = split(string, fields, NF, seps);
194         print(nf, NF, fields);
195 }
196
197 print(nf, nfp, fields)
198 int nf;
199 int nfp;
200 char *fields[];
201 {
202         int fn;
203         int bound;
204
205         bound = (nf > nfp) ? nfp : nf;
206         printf("%d:\t", nf);
207         for (fn = 0; fn < bound; fn++)
208                 printf("\"%s\"%s", fields[fn], (fn+1 < nf) ? ", " : "\n");
209 }
210
211 #define RNF     5               /* some table entries know this */
212 struct {
213         char *str;
214         char *seps;
215         int nf;
216         char *fi[RNF];
217 } tests[] = {
218         "",             " ",    0,      { "" },
219         " ",            " ",    2,      { "", "" },
220         "x",            " ",    1,      { "x" },
221         "xy",           " ",    1,      { "xy" },
222         "x y",          " ",    2,      { "x", "y" },
223         "abc def  g ",  " ",    5,      { "abc", "def", "", "g", "" },
224         "  a bcd",      " ",    4,      { "", "", "a", "bcd" },
225         "a b c d e f",  " ",    6,      { "a", "b", "c", "d", "e f" },
226         " a b c d ",    " ",    6,      { "", "a", "b", "c", "d " },
227
228         "",             " _",   0,      { "" },
229         " ",            " _",   2,      { "", "" },
230         "x",            " _",   1,      { "x" },
231         "x y",          " _",   2,      { "x", "y" },
232         "ab _ cd",      " _",   2,      { "ab", "cd" },
233         " a_b  c ",     " _",   5,      { "", "a", "b", "c", "" },
234         "a b c_d e f",  " _",   6,      { "a", "b", "c", "d", "e f" },
235         " a b c d ",    " _",   6,      { "", "a", "b", "c", "d " },
236
237         "",             " _~",  0,      { "" },
238         " ",            " _~",  2,      { "", "" },
239         "x",            " _~",  1,      { "x" },
240         "x y",          " _~",  2,      { "x", "y" },
241         "ab _~ cd",     " _~",  2,      { "ab", "cd" },
242         " a_b  c~",     " _~",  5,      { "", "a", "b", "c", "" },
243         "a b_c d~e f",  " _~",  6,      { "a", "b", "c", "d", "e f" },
244         "~a b c d ",    " _~",  6,      { "", "a", "b", "c", "d " },
245
246         "",             " _~-", 0,      { "" },
247         " ",            " _~-", 2,      { "", "" },
248         "x",            " _~-", 1,      { "x" },
249         "x y",          " _~-", 2,      { "x", "y" },
250         "ab _~- cd",    " _~-", 2,      { "ab", "cd" },
251         " a_b  c~",     " _~-", 5,      { "", "a", "b", "c", "" },
252         "a b_c-d~e f",  " _~-", 6,      { "a", "b", "c", "d", "e f" },
253         "~a-b c d ",    " _~-", 6,      { "", "a", "b", "c", "d " },
254
255         "",             "  ",   0,      { "" },
256         " ",            "  ",   2,      { "", "" },
257         "x",            "  ",   1,      { "x" },
258         "xy",           "  ",   1,      { "xy" },
259         "x y",          "  ",   2,      { "x", "y" },
260         "abc def  g ",  "  ",   4,      { "abc", "def", "g", "" },
261         "  a bcd",      "  ",   3,      { "", "a", "bcd" },
262         "a b c d e f",  "  ",   6,      { "a", "b", "c", "d", "e f" },
263         " a b c d ",    "  ",   6,      { "", "a", "b", "c", "d " },
264
265         "",             "",     0,      { "" },
266         " ",            "",     0,      { "" },
267         "x",            "",     1,      { "x" },
268         "xy",           "",     1,      { "xy" },
269         "x y",          "",     2,      { "x", "y" },
270         "abc def  g ",  "",     3,      { "abc", "def", "g" },
271         "\t a bcd",     "",     2,      { "a", "bcd" },
272         "  a \tb\t c ", "",     3,      { "a", "b", "c" },
273         "a b c d e ",   "",     5,      { "a", "b", "c", "d", "e" },
274         "a b\tc d e f", "",     6,      { "a", "b", "c", "d", "e f" },
275         " a b c d e f ",        "",     6,      { "a", "b", "c", "d", "e f " },
276
277         NULL,           NULL,   0,      { NULL },
278 };
279
280 regress()
281 {
282         char buf[512];
283         int n;
284         char *fields[RNF+1];
285         int nf;
286         int i;
287         int printit;
288         char *f;
289
290         for (n = 0; tests[n].str != NULL; n++) {
291                 (void) strcpy(buf, tests[n].str);
292                 fields[RNF] = NULL;
293                 nf = split(buf, fields, RNF, tests[n].seps);
294                 printit = 0;
295                 if (nf != tests[n].nf) {
296                         printf("split `%s' by `%s' gave %d fields, not %d\n",
297                                 tests[n].str, tests[n].seps, nf, tests[n].nf);
298                         printit = 1;
299                 } else if (fields[RNF] != NULL) {
300                         printf("split() went beyond array end\n");
301                         printit = 1;
302                 } else {
303                         for (i = 0; i < nf && i < RNF; i++) {
304                                 f = fields[i];
305                                 if (f == NULL)
306                                         f = "(NULL)";
307                                 if (strcmp(f, tests[n].fi[i]) != 0) {
308                                         printf("split `%s' by `%s' field %d is `%s', not `%s'\n",
309                                                 tests[n].str, tests[n].seps,
310                                                 i, fields[i], tests[n].fi[i]);
311                                         printit = 1;
312                                 }
313                         }
314                 }
315                 if (printit)
316                         print(nf, RNF, fields);
317         }
318 }
319 #endif