]> CyberLeo.Net >> Repos - FreeBSD/stable/9.git/blob - lib/libc/regex/grot/split.c
MFstable/10 r292144:
[FreeBSD/stable/9.git] / lib / libc / regex / grot / split.c
1 #include <sys/cdefs.h>
2 __FBSDID("$FreeBSD$");
3
4 #include <stdio.h>
5 #include <string.h>
6
7 /*
8  - split - divide a string into fields, like awk split()
9  == int split(char *string, char *fields[], int nfields, char *sep);
10  - fields: list is not NULL-terminated
11  - nfields: number of entries available in fields[]
12  - sep: "" white, "c" single char, "ab" [ab]+
13  */
14 int                             /* number of fields, including overflow */
15 split(char *string, char *fields[], int nfields, char *sep)
16 {
17         char *p = string;
18         char c;                 /* latest character */
19         char sepc = sep[0];
20         char sepc2;
21         int fn;
22         char **fp = fields;
23         char *sepp;
24         int trimtrail;
25
26         /* white space */
27         if (sepc == '\0') {
28                 while ((c = *p++) == ' ' || c == '\t')
29                         continue;
30                 p--;
31                 trimtrail = 1;
32                 sep = " \t";    /* note, code below knows this is 2 long */
33                 sepc = ' ';
34         } else
35                 trimtrail = 0;
36         sepc2 = sep[1];         /* now we can safely pick this up */
37
38         /* catch empties */
39         if (*p == '\0')
40                 return(0);
41
42         /* single separator */
43         if (sepc2 == '\0') {
44                 fn = nfields;
45                 for (;;) {
46                         *fp++ = p;
47                         fn--;
48                         if (fn == 0)
49                                 break;
50                         while ((c = *p++) != sepc)
51                                 if (c == '\0')
52                                         return(nfields - fn);
53                         *(p-1) = '\0';
54                 }
55                 /* we have overflowed the fields vector -- just count them */
56                 fn = nfields;
57                 for (;;) {
58                         while ((c = *p++) != sepc)
59                                 if (c == '\0')
60                                         return(fn);
61                         fn++;
62                 }
63                 /* not reached */
64         }
65
66         /* two separators */
67         if (sep[2] == '\0') {
68                 fn = nfields;
69                 for (;;) {
70                         *fp++ = p;
71                         fn--;
72                         while ((c = *p++) != sepc && c != sepc2)
73                                 if (c == '\0') {
74                                         if (trimtrail && **(fp-1) == '\0')
75                                                 fn++;
76                                         return(nfields - fn);
77                                 }
78                         if (fn == 0)
79                                 break;
80                         *(p-1) = '\0';
81                         while ((c = *p++) == sepc || c == sepc2)
82                                 continue;
83                         p--;
84                 }
85                 /* we have overflowed the fields vector -- just count them */
86                 fn = nfields;
87                 while (c != '\0') {
88                         while ((c = *p++) == sepc || c == sepc2)
89                                 continue;
90                         p--;
91                         fn++;
92                         while ((c = *p++) != '\0' && c != sepc && c != sepc2)
93                                 continue;
94                 }
95                 /* might have to trim trailing white space */
96                 if (trimtrail) {
97                         p--;
98                         while ((c = *--p) == sepc || c == sepc2)
99                                 continue;
100                         p++;
101                         if (*p != '\0') {
102                                 if (fn == nfields+1)
103                                         *p = '\0';
104                                 fn--;
105                         }
106                 }
107                 return(fn);
108         }
109
110         /* n separators */
111         fn = 0;
112         for (;;) {
113                 if (fn < nfields)
114                         *fp++ = p;
115                 fn++;
116                 for (;;) {
117                         c = *p++;
118                         if (c == '\0')
119                                 return(fn);
120                         sepp = sep;
121                         while ((sepc = *sepp++) != '\0' && sepc != c)
122                                 continue;
123                         if (sepc != '\0')       /* it was a separator */
124                                 break;
125                 }
126                 if (fn < nfields)
127                         *(p-1) = '\0';
128                 for (;;) {
129                         c = *p++;
130                         sepp = sep;
131                         while ((sepc = *sepp++) != '\0' && sepc != c)
132                                 continue;
133                         if (sepc == '\0')       /* it wasn't a separator */
134                                 break;
135                 }
136                 p--;
137         }
138
139         /* not reached */
140 }
141
142 #ifdef TEST_SPLIT
143
144
145 /*
146  * test program
147  * pgm          runs regression
148  * pgm sep      splits stdin lines by sep
149  * pgm str sep  splits str by sep
150  * pgm str sep n        splits str by sep n times
151  */
152 int
153 main(int argc, char *argv[])
154 {
155         char buf[512];
156         int n;
157 #       define  MNF     10
158         char *fields[MNF];
159
160         if (argc > 4)
161                 for (n = atoi(argv[3]); n > 0; n--) {
162                         (void) strcpy(buf, argv[1]);
163                 }
164         else if (argc > 3)
165                 for (n = atoi(argv[3]); n > 0; n--) {
166                         (void) strcpy(buf, argv[1]);
167                         (void) split(buf, fields, MNF, argv[2]);
168                 }
169         else if (argc > 2)
170                 dosplit(argv[1], argv[2]);
171         else if (argc > 1)
172                 while (fgets(buf, sizeof(buf), stdin) != NULL) {
173                         buf[strlen(buf)-1] = '\0';      /* stomp newline */
174                         dosplit(buf, argv[1]);
175                 }
176         else
177                 regress();
178
179         exit(0);
180 }
181
182 void
183 dosplit(char *string, char *seps)
184 {
185 #       define  NF      5
186         char *fields[NF];
187         int nf;
188
189         nf = split(string, fields, NF, seps);
190         print(nf, NF, fields);
191 }
192
193 void
194 print(int nf, int nfp, char *fields[])
195 {
196         int fn;
197         int bound;
198
199         bound = (nf > nfp) ? nfp : nf;
200         printf("%d:\t", nf);
201         for (fn = 0; fn < bound; fn++)
202                 printf("\"%s\"%s", fields[fn], (fn+1 < nf) ? ", " : "\n");
203 }
204
205 #define RNF     5               /* some table entries know this */
206 struct {
207         char *str;
208         char *seps;
209         int nf;
210         char *fi[RNF];
211 } tests[] = {
212         "",             " ",    0,      { "" },
213         " ",            " ",    2,      { "", "" },
214         "x",            " ",    1,      { "x" },
215         "xy",           " ",    1,      { "xy" },
216         "x y",          " ",    2,      { "x", "y" },
217         "abc def  g ",  " ",    5,      { "abc", "def", "", "g", "" },
218         "  a bcd",      " ",    4,      { "", "", "a", "bcd" },
219         "a b c d e f",  " ",    6,      { "a", "b", "c", "d", "e f" },
220         " a b c d ",    " ",    6,      { "", "a", "b", "c", "d " },
221
222         "",             " _",   0,      { "" },
223         " ",            " _",   2,      { "", "" },
224         "x",            " _",   1,      { "x" },
225         "x y",          " _",   2,      { "x", "y" },
226         "ab _ cd",      " _",   2,      { "ab", "cd" },
227         " a_b  c ",     " _",   5,      { "", "a", "b", "c", "" },
228         "a b c_d e f",  " _",   6,      { "a", "b", "c", "d", "e f" },
229         " a b c d ",    " _",   6,      { "", "a", "b", "c", "d " },
230
231         "",             " _~",  0,      { "" },
232         " ",            " _~",  2,      { "", "" },
233         "x",            " _~",  1,      { "x" },
234         "x y",          " _~",  2,      { "x", "y" },
235         "ab _~ cd",     " _~",  2,      { "ab", "cd" },
236         " a_b  c~",     " _~",  5,      { "", "a", "b", "c", "" },
237         "a b_c d~e f",  " _~",  6,      { "a", "b", "c", "d", "e f" },
238         "~a b c d ",    " _~",  6,      { "", "a", "b", "c", "d " },
239
240         "",             " _~-", 0,      { "" },
241         " ",            " _~-", 2,      { "", "" },
242         "x",            " _~-", 1,      { "x" },
243         "x y",          " _~-", 2,      { "x", "y" },
244         "ab _~- cd",    " _~-", 2,      { "ab", "cd" },
245         " a_b  c~",     " _~-", 5,      { "", "a", "b", "c", "" },
246         "a b_c-d~e f",  " _~-", 6,      { "a", "b", "c", "d", "e f" },
247         "~a-b c d ",    " _~-", 6,      { "", "a", "b", "c", "d " },
248
249         "",             "  ",   0,      { "" },
250         " ",            "  ",   2,      { "", "" },
251         "x",            "  ",   1,      { "x" },
252         "xy",           "  ",   1,      { "xy" },
253         "x y",          "  ",   2,      { "x", "y" },
254         "abc def  g ",  "  ",   4,      { "abc", "def", "g", "" },
255         "  a bcd",      "  ",   3,      { "", "a", "bcd" },
256         "a b c d e f",  "  ",   6,      { "a", "b", "c", "d", "e f" },
257         " a b c d ",    "  ",   6,      { "", "a", "b", "c", "d " },
258
259         "",             "",     0,      { "" },
260         " ",            "",     0,      { "" },
261         "x",            "",     1,      { "x" },
262         "xy",           "",     1,      { "xy" },
263         "x y",          "",     2,      { "x", "y" },
264         "abc def  g ",  "",     3,      { "abc", "def", "g" },
265         "\t a bcd",     "",     2,      { "a", "bcd" },
266         "  a \tb\t c ", "",     3,      { "a", "b", "c" },
267         "a b c d e ",   "",     5,      { "a", "b", "c", "d", "e" },
268         "a b\tc d e f", "",     6,      { "a", "b", "c", "d", "e f" },
269         " a b c d e f ",        "",     6,      { "a", "b", "c", "d", "e f " },
270
271         NULL,           NULL,   0,      { NULL },
272 };
273
274 void
275 regress(void)
276 {
277         char buf[512];
278         int n;
279         char *fields[RNF+1];
280         int nf;
281         int i;
282         int printit;
283         char *f;
284
285         for (n = 0; tests[n].str != NULL; n++) {
286                 (void) strcpy(buf, tests[n].str);
287                 fields[RNF] = NULL;
288                 nf = split(buf, fields, RNF, tests[n].seps);
289                 printit = 0;
290                 if (nf != tests[n].nf) {
291                         printf("split `%s' by `%s' gave %d fields, not %d\n",
292                                 tests[n].str, tests[n].seps, nf, tests[n].nf);
293                         printit = 1;
294                 } else if (fields[RNF] != NULL) {
295                         printf("split() went beyond array end\n");
296                         printit = 1;
297                 } else {
298                         for (i = 0; i < nf && i < RNF; i++) {
299                                 f = fields[i];
300                                 if (f == NULL)
301                                         f = "(NULL)";
302                                 if (strcmp(f, tests[n].fi[i]) != 0) {
303                                         printf("split `%s' by `%s' field %d is `%s', not `%s'\n",
304                                                 tests[n].str, tests[n].seps,
305                                                 i, fields[i], tests[n].fi[i]);
306                                         printit = 1;
307                                 }
308                         }
309                 }
310                 if (printit)
311                         print(nf, RNF, fields);
312         }
313 }
314 #endif