1 /****************************************************************
2 Copyright (C) Lucent Technologies 1997
5 Permission to use, copy, modify, and distribute this software and
6 its documentation for any purpose and without fee is hereby
7 granted, provided that the above copyright notice appear in all
8 copies and that both that the copyright notice and this
9 permission notice and warranty disclaimer appear in supporting
10 documentation, and that the name Lucent Technologies or any of
11 its entities not be used in advertising or publicity pertaining
12 to distribution of the software without specific, written prior
15 LUCENT DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE,
16 INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS.
17 IN NO EVENT SHALL LUCENT OR ANY OF ITS ENTITIES BE LIABLE FOR ANY
18 SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
19 WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER
20 IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,
21 ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF
23 ****************************************************************/
37 char EMPTY[] = { '\0' };
39 bool innew; /* true = infile has not been read by readrec */
42 int recsize = RECSIZE;
44 int fieldssize = RECSIZE;
46 Cell **fldtab; /* pointers to Cells */
47 static size_t len_inputFS = 0;
48 static char *inputFS = NULL; /* FS at time of input, for field splitting */
51 int nfields = MAXFLD; /* last allocated slot for $i */
53 bool donefld; /* true = implies rec broken into fields */
54 bool donerec; /* true = record is valid (no flds have changed) */
56 int lastfld = 0; /* last used field */
57 int argno = 1; /* current input argument number */
58 extern Awkfloat *ARGC;
60 static Cell dollar0 = { OCELL, CFLD, NULL, EMPTY, 0.0, REC|STR|DONTFREE, NULL, NULL };
61 static Cell dollar1 = { OCELL, CFLD, NULL, EMPTY, 0.0, FLD|STR|DONTFREE, NULL, NULL };
63 void recinit(unsigned int n)
65 if ( (record = (char *) malloc(n)) == NULL
66 || (fields = (char *) malloc(n+1)) == NULL
67 || (fldtab = (Cell **) calloc(nfields+2, sizeof(*fldtab))) == NULL
68 || (fldtab[0] = (Cell *) malloc(sizeof(**fldtab))) == NULL)
69 FATAL("out of space for $0 and fields");
72 fldtab[0]->sval = record;
73 fldtab[0]->nval = tostring("0");
74 makefields(1, nfields);
77 void makefields(int n1, int n2) /* create $n1..$n2 inclusive */
82 for (i = n1; i <= n2; i++) {
83 fldtab[i] = (Cell *) malloc(sizeof(**fldtab));
84 if (fldtab[i] == NULL)
85 FATAL("out of space in makefields %d", i);
87 snprintf(temp, sizeof(temp), "%d", i);
88 fldtab[i]->nval = tostring(temp);
97 for (i = 1; i < *ARGC; i++) {
98 p = getargv(i); /* find 1st real filename */
99 if (p == NULL || *p == '\0') { /* deleted or zapped */
104 setsval(lookup("FILENAME", symtab), p);
107 setclvar(p); /* a commandline assignment before filename */
110 infile = stdin; /* no filenames, so use stdin */
115 * POSIX specifies that fields are supposed to be evaluated as if they were
116 * split using the value of FS at the time that the record's value ($0) was
119 * Since field-splitting is done lazily, we save the current value of FS
120 * whenever a new record is read in (implicitly or via getline), or when
121 * a new value is assigned to $0.
126 if ((len = strlen(getsval(fsloc))) < len_inputFS) {
127 strcpy(inputFS, *FS); /* for subsequent field splitting */
131 len_inputFS = len + 1;
132 inputFS = (char *) realloc(inputFS, len_inputFS);
134 FATAL("field separator %.10s... is too long", *FS);
135 memcpy(inputFS, *FS, len_inputFS);
138 static bool firsttime = true;
140 int getrec(char **pbuf, int *pbufsize, bool isrecord) /* get next input record */
141 { /* note: cares whether buf == record */
145 int bufsize = *pbufsize, savebufsize = bufsize;
151 DPRINTF("RS=<%s>, FS=<%s>, ARGC=%g, FILENAME=%s\n",
152 *RS, *FS, *ARGC, *FILENAME);
160 while (argno < *ARGC || infile == stdin) {
161 DPRINTF("argno=%d, file=|%s|\n", argno, file);
162 if (infile == NULL) { /* have to open a new file */
163 file = getargv(argno);
164 if (file == NULL || *file == '\0') { /* deleted or zapped */
168 if (isclvar(file)) { /* a var=value arg */
174 DPRINTF("opening file %s\n", file);
175 if (*file == '-' && *(file+1) == '\0')
177 else if ((infile = fopen(file, "r")) == NULL)
178 FATAL("can't open file %s", file);
179 setfval(fnrloc, 0.0);
181 c = readrec(&buf, &bufsize, infile, innew);
184 if (c != 0 || buf[0] != '\0') { /* normal record */
188 if (freeable(fldtab[0]))
189 xfree(fldtab[0]->sval);
190 fldtab[0]->sval = buf; /* buf == record */
191 fldtab[0]->tval = REC | STR | DONTFREE;
192 if (is_number(fldtab[0]->sval, & result)) {
193 fldtab[0]->fval = result;
194 fldtab[0]->tval |= NUM;
197 setfval(nrloc, nrloc->fval+1);
198 setfval(fnrloc, fnrloc->fval+1);
203 /* EOF arrived on this file; set up next */
211 *pbufsize = savebufsize;
212 return 0; /* true end of file */
217 if (infile != NULL && infile != stdin)
223 int readrec(char **pbuf, int *pbufsize, FILE *inf, bool newflag) /* read one record into buf */
226 char *rr, *buf = *pbuf;
227 int bufsize = *pbufsize;
228 char *rs = getsval(rsloc);
233 fa *pfa = makedfa(rs, 1);
235 found = fnematch(pfa, inf, &buf, &bufsize, recsize);
237 int tempstat = pfa->initstat;
239 found = fnematch(pfa, inf, &buf, &bufsize, recsize);
240 pfa->initstat = tempstat;
243 setptr(patbeg, '\0');
245 if ((sep = *rs) == 0) {
247 while ((c=getc(inf)) == '\n' && c != EOF) /* skip leading \n's */
253 for (; (c=getc(inf)) != sep && c != EOF; ) {
254 if (rr-buf+1 > bufsize)
255 if (!adjbuf(&buf, &bufsize, 1+rr-buf,
256 recsize, &rr, "readrec 1"))
257 FATAL("input record `%.30s...' too long", buf);
260 if (*rs == sep || c == EOF)
262 if ((c = getc(inf)) == '\n' || c == EOF) /* 2 in a row */
264 if (!adjbuf(&buf, &bufsize, 2+rr-buf, recsize, &rr,
266 FATAL("input record `%.30s...' too long", buf);
270 if (!adjbuf(&buf, &bufsize, 1+rr-buf, recsize, &rr, "readrec 3"))
271 FATAL("input record `%.30s...' too long", buf);
276 isrec = *buf || !feof(inf);
277 DPRINTF("readrec saw <%s>, returns %d\n", buf, isrec);
281 char *getargv(int n) /* get ARGV[n] */
285 extern Array *ARGVtab;
287 snprintf(temp, sizeof(temp), "%d", n);
288 if (lookup(temp, ARGVtab) == NULL)
290 x = setsymtab(temp, "", 0.0, STR, ARGVtab);
292 DPRINTF("getargv(%d) returns |%s|\n", n, s);
296 void setclvar(char *s) /* set var=value from s */
302 for (p=s; *p != '='; p++)
305 p = qstring(p, '\0');
306 q = setsymtab(s, p, 0.0, STR, symtab);
308 if (is_number(q->sval, & result)) {
312 DPRINTF("command line set %s to |%s|\n", s, p);
316 void fldbld(void) /* create fields from current record */
318 /* this relies on having fields[] the same length as $0 */
319 /* the fields are all stored in this one array with \0's */
320 /* possibly with a final trailing \0 not associated with any field */
327 if (!isstr(fldtab[0]))
331 if (n > fieldssize) {
333 if ((fields = (char *) malloc(n+2)) == NULL) /* possibly 2 final \0s */
334 FATAL("out of space for fields in fldbld %d", n);
338 i = 0; /* number of fields accumulated here */
339 if (inputFS == NULL) /* make sure we have a copy of FS */
341 if (strlen(inputFS) > 1) { /* it's a regular expression */
342 i = refldbld(r, inputFS);
343 } else if ((sep = *inputFS) == ' ') { /* default whitespace */
345 while (*r == ' ' || *r == '\t' || *r == '\n')
352 if (freeable(fldtab[i]))
353 xfree(fldtab[i]->sval);
354 fldtab[i]->sval = fr;
355 fldtab[i]->tval = FLD | STR | DONTFREE;
358 while (*r != ' ' && *r != '\t' && *r != '\n' && *r != '\0');
362 } else if ((sep = *inputFS) == 0) { /* new: FS="" => 1 char/field */
363 for (i = 0; *r != '\0'; r += n) {
364 char buf[MB_LEN_MAX + 1];
369 if (freeable(fldtab[i]))
370 xfree(fldtab[i]->sval);
371 n = mblen(r, MB_LEN_MAX);
376 fldtab[i]->sval = tostring(buf);
377 fldtab[i]->tval = FLD | STR;
380 } else if (*r != 0) { /* if 0, it's a null field */
381 /* subtlecase : if length(FS) == 1 && length(RS > 0)
382 * \n is NOT a field separator (cf awk book 61,84).
383 * this variable is tested in the inner while loop.
385 int rtest = '\n'; /* normal case */
392 if (freeable(fldtab[i]))
393 xfree(fldtab[i]->sval);
394 fldtab[i]->sval = fr;
395 fldtab[i]->tval = FLD | STR | DONTFREE;
396 while (*r != sep && *r != rtest && *r != '\0') /* \n is always a separator */
405 FATAL("record `%.30s...' has too many fields; can't happen", r);
406 cleanfld(i+1, lastfld); /* clean out junk from previous record */
409 for (j = 1; j <= lastfld; j++) {
413 if(is_number(p->sval, & result)) {
418 setfval(nfloc, (Awkfloat) lastfld);
419 donerec = true; /* restore */
421 for (j = 0; j <= lastfld; j++) {
423 printf("field %d (%s): |%s|\n", j, p->nval, p->sval);
428 void cleanfld(int n1, int n2) /* clean out fields n1 .. n2 inclusive */
429 { /* nvals remain intact */
433 for (i = n1; i <= n2; i++) {
438 p->tval = FLD | STR | DONTFREE;
442 void newfld(int n) /* add field n after end of existing lastfld */
446 cleanfld(lastfld+1, n);
448 setfval(nfloc, (Awkfloat) n);
451 void setlastfld(int n) /* set lastfld cleaning fldtab cells if necessary */
454 FATAL("cannot set NF to a negative value");
459 cleanfld(lastfld+1, n);
461 cleanfld(n+1, lastfld);
466 Cell *fieldadr(int n) /* get nth field */
469 FATAL("trying to access out of range field %d", n);
470 if (n > nfields) /* fields after NF are empty */
471 growfldtab(n); /* but does not increase NF */
475 void growfldtab(int n) /* make new fields up to at least $n */
477 int nf = 2 * nfields;
482 s = (nf+1) * (sizeof (struct Cell *)); /* freebsd: how much do we need? */
483 if (s / sizeof(struct Cell *) - 1 == (size_t)nf) /* didn't overflow */
484 fldtab = (Cell **) realloc(fldtab, s);
485 else /* overflow sizeof int */
486 xfree(fldtab); /* make it null */
488 FATAL("out of space creating %d fields", nf);
489 makefields(nfields+1, nf);
493 int refldbld(const char *rec, const char *fs) /* build fields from reg expr in FS */
495 /* this relies on having fields[] the same length as $0 */
496 /* the fields are all stored in this one array with \0's */
502 if (n > fieldssize) {
504 if ((fields = (char *) malloc(n+1)) == NULL)
505 FATAL("out of space for fields in refldbld %d", n);
512 pfa = makedfa(fs, 1);
513 DPRINTF("into refldbld, rec = <%s>, pat = <%s>\n", rec, fs);
514 tempstat = pfa->initstat;
518 if (freeable(fldtab[i]))
519 xfree(fldtab[i]->sval);
520 fldtab[i]->tval = FLD | STR | DONTFREE;
521 fldtab[i]->sval = fr;
522 DPRINTF("refldbld: i=%d\n", i);
523 if (nematch(pfa, rec)) {
524 pfa->initstat = 2; /* horrible coupling to b.c */
525 DPRINTF("match %s (%d chars)\n", patbeg, patlen);
526 strncpy(fr, rec, patbeg-rec);
527 fr += patbeg - rec + 1;
529 rec = patbeg + patlen;
531 DPRINTF("no match %s\n", rec);
533 pfa->initstat = tempstat;
540 void recbld(void) /* create $0 from $1..$NF if necessary */
544 char *sep = getsval(ofsloc);
549 for (i = 1; i <= *NF; i++) {
550 p = getsval(fldtab[i]);
551 if (!adjbuf(&record, &recsize, 1+strlen(p)+r-record, recsize, &r, "recbld 1"))
552 FATAL("created $0 `%.30s...' too long", record);
553 while ((*r = *p++) != 0)
556 if (!adjbuf(&record, &recsize, 2+strlen(sep)+r-record, recsize, &r, "recbld 2"))
557 FATAL("created $0 `%.30s...' too long", record);
558 for (p = sep; (*r = *p++) != 0; )
562 if (!adjbuf(&record, &recsize, 2+r-record, recsize, &r, "recbld 3"))
563 FATAL("built giant record `%.30s...'", record);
565 DPRINTF("in recbld inputFS=%s, fldtab[0]=%p\n", inputFS, (void*)fldtab[0]);
567 if (freeable(fldtab[0]))
568 xfree(fldtab[0]->sval);
569 fldtab[0]->tval = REC | STR | DONTFREE;
570 fldtab[0]->sval = record;
572 DPRINTF("in recbld inputFS=%s, fldtab[0]=%p\n", inputFS, (void*)fldtab[0]);
573 DPRINTF("recbld = |%s|\n", record);
579 void yyerror(const char *s)
584 void SYNTAX(const char *fmt, ...)
586 extern char *cmdname, *curfname;
587 static int been_here = 0;
592 fprintf(stderr, "%s: ", cmdname);
594 vfprintf(stderr, fmt, varg);
596 fprintf(stderr, " at source line %d", lineno);
597 if (curfname != NULL)
598 fprintf(stderr, " in function %s", curfname);
599 if (compile_time == COMPILING && cursource() != NULL)
600 fprintf(stderr, " source file %s", cursource());
601 fprintf(stderr, "\n");
606 extern int bracecnt, brackcnt, parencnt;
608 void bracecheck(void)
611 static int beenhere = 0;
615 while ((c = input()) != EOF && c != '\0')
617 bcheck2(bracecnt, '{', '}');
618 bcheck2(brackcnt, '[', ']');
619 bcheck2(parencnt, '(', ')');
622 void bcheck2(int n, int c1, int c2)
625 fprintf(stderr, "\tmissing %c\n", c2);
627 fprintf(stderr, "\t%d missing %c's\n", n, c2);
629 fprintf(stderr, "\textra %c\n", c2);
631 fprintf(stderr, "\t%d extra %c's\n", -n, c2);
634 void FATAL(const char *fmt, ...)
636 extern char *cmdname;
640 fprintf(stderr, "%s: ", cmdname);
642 vfprintf(stderr, fmt, varg);
645 if (dbg > 1) /* core dump if serious debugging on */
650 void WARNING(const char *fmt, ...)
652 extern char *cmdname;
656 fprintf(stderr, "%s: ", cmdname);
658 vfprintf(stderr, fmt, varg);
665 extern Node *curnode;
667 fprintf(stderr, "\n");
668 if (compile_time != ERROR_PRINTING) {
670 fprintf(stderr, " input record number %d", (int) (*FNR));
671 if (strcmp(*FILENAME, "-") != 0)
672 fprintf(stderr, ", file %s", *FILENAME);
673 fprintf(stderr, "\n");
676 fprintf(stderr, " source line number %d", curnode->lineno);
678 fprintf(stderr, " source line number %d", lineno);
679 if (compile_time == COMPILING && cursource() != NULL)
680 fprintf(stderr, " source file %s", cursource());
681 fprintf(stderr, "\n");
686 void eprint(void) /* try to print context around error */
690 static int been_here = 0;
691 extern char ebuf[], *ep;
693 if (compile_time != COMPILING || been_here++ > 0 || ebuf == ep)
698 if (p > ebuf && *p == '\n')
700 for ( ; p > ebuf && *p != '\n' && *p != '\0'; p--)
704 fprintf(stderr, " context is\n\t");
705 for (q=ep-1; q>=p && *q!=' ' && *q!='\t' && *q!='\n'; q--)
710 fprintf(stderr, " >>> ");
714 fprintf(stderr, " <<< ");
716 while ((c = input()) != '\n' && c != '\0' && c != EOF) {
727 case '{': bracecnt++; break;
728 case '}': bracecnt--; break;
729 case '[': brackcnt++; break;
730 case ']': brackcnt--; break;
731 case '(': parencnt++; break;
732 case ')': parencnt--; break;
736 double errcheck(double x, const char *s)
741 WARNING("%s argument out of domain", s);
743 } else if (errno == ERANGE) {
745 WARNING("%s result out of range", s);
751 int isclvar(const char *s) /* is s of form var=something ? */
755 if (!isalpha((uschar) *s) && *s != '_')
758 if (!(isalnum((uschar) *s) || *s == '_'))
760 return *s == '=' && s > os;
763 /* strtod is supposed to be a proper test of what's a valid number */
764 /* appears to be broken in gcc on linux: thinks 0x123 is a valid FP number */
765 /* wrong: violates 4.10.1.4 of ansi C standard */
767 /* well, not quite. As of C99, hex floating point is allowed. so this is
768 * a bit of a mess. We work around the mess by checking for a hexadecimal
769 * value and disallowing it. Similarly, we now follow gawk and allow only
770 * +nan, -nan, +inf, and -inf for NaN and infinity values.
774 * This routine now has a more complicated interface, the main point
775 * being to avoid the double conversion of a string to double, and
776 * also to convey out, if requested, the information that the numeric
777 * value was a leading string or is all of the string. The latter bit
778 * is used in getfval().
781 bool is_valid_number(const char *s, bool trailing_stuff_ok,
782 bool *no_trailing, double *result)
791 *no_trailing = false;
797 * This test, while allowed by newer POSIX standards, represents a regression
798 * where hex strings were treated as numbers in nawk the whole time it has been
799 * in FreeBSD (since 2001). The POSIX 2001 through 2004 standards mandated this
800 * behavior and the current standard allows it. Deviate from upstream by restoring
801 * the prior FreeBSD behavior.
804 // no hex floating point, sorry
805 if (s[0] == '0' && tolower(s[1]) == 'x')
809 // allow +nan, -nan, +inf, -inf, any other letter, no
810 if (s[0] == '+' || s[0] == '-') {
811 is_nan = (strncasecmp(s+1, "nan", 3) == 0);
812 is_inf = (strncasecmp(s+1, "inf", 3) == 0);
813 if ((is_nan || is_inf)
814 && (isspace(s[4]) || s[4] == '\0'))
816 else if (! isdigit(s[1]) && s[1] != '.')
819 else if (! isdigit(s[0]) && s[0] != '.')
825 if (ep == s || errno == ERANGE)
828 if (isnan(r) && s[0] == '-' && signbit(r) == 0)
835 * check for trailing stuff
840 if (no_trailing != NULL)
841 *no_trailing = (*ep == '\0');
843 // return true if found the end, or trailing stuff is allowed
844 retval = *ep == '\0' || trailing_stuff_ok;