]> CyberLeo.Net >> Repos - FreeBSD/FreeBSD.git/blob - bin/sh/parser.c
Merge branch 'releng/11.3' into releng-CDN/11.3
[FreeBSD/FreeBSD.git] / bin / sh / parser.c
1 /*-
2  * Copyright (c) 1991, 1993
3  *      The Regents of the University of California.  All rights reserved.
4  *
5  * This code is derived from software contributed to Berkeley by
6  * Kenneth Almquist.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  * 4. Neither the name of the University nor the names of its contributors
17  *    may be used to endorse or promote products derived from this software
18  *    without specific prior written permission.
19  *
20  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
21  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
24  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30  * SUCH DAMAGE.
31  */
32
33 #ifndef lint
34 #if 0
35 static char sccsid[] = "@(#)parser.c    8.7 (Berkeley) 5/16/95";
36 #endif
37 #endif /* not lint */
38 #include <sys/cdefs.h>
39 __FBSDID("$FreeBSD$");
40
41 #include <stdlib.h>
42 #include <unistd.h>
43 #include <stdio.h>
44
45 #include "shell.h"
46 #include "parser.h"
47 #include "nodes.h"
48 #include "expand.h"     /* defines rmescapes() */
49 #include "syntax.h"
50 #include "options.h"
51 #include "input.h"
52 #include "output.h"
53 #include "var.h"
54 #include "error.h"
55 #include "memalloc.h"
56 #include "mystring.h"
57 #include "alias.h"
58 #include "show.h"
59 #include "eval.h"
60 #include "exec.h"       /* to check for special builtins */
61 #ifndef NO_HISTORY
62 #include "myhistedit.h"
63 #endif
64
65 /*
66  * Shell command parser.
67  */
68
69 #define PROMPTLEN       128
70
71 /* values of checkkwd variable */
72 #define CHKALIAS        0x1
73 #define CHKKWD          0x2
74 #define CHKNL           0x4
75
76 /* values returned by readtoken */
77 #include "token.h"
78
79
80
81 struct heredoc {
82         struct heredoc *next;   /* next here document in list */
83         union node *here;               /* redirection node */
84         char *eofmark;          /* string indicating end of input */
85         int striptabs;          /* if set, strip leading tabs */
86 };
87
88 struct parser_temp {
89         struct parser_temp *next;
90         void *data;
91 };
92
93
94 static struct heredoc *heredoclist;     /* list of here documents to read */
95 static int doprompt;            /* if set, prompt the user */
96 static int needprompt;          /* true if interactive and at start of line */
97 static int lasttoken;           /* last token read */
98 static int tokpushback;         /* last token pushed back */
99 static char *wordtext;          /* text of last word returned by readtoken */
100 static int checkkwd;
101 static struct nodelist *backquotelist;
102 static union node *redirnode;
103 static struct heredoc *heredoc;
104 static int quoteflag;           /* set if (part of) last token was quoted */
105 static int startlinno;          /* line # where last token started */
106 static int funclinno;           /* line # where the current function started */
107 static struct parser_temp *parser_temp;
108
109 #define NOEOFMARK ((const char *)&heredoclist)
110
111
112 static union node *list(int);
113 static union node *andor(void);
114 static union node *pipeline(void);
115 static union node *command(void);
116 static union node *simplecmd(union node **, union node *);
117 static union node *makename(void);
118 static union node *makebinary(int type, union node *n1, union node *n2);
119 static void parsefname(void);
120 static void parseheredoc(void);
121 static int peektoken(void);
122 static int readtoken(void);
123 static int xxreadtoken(void);
124 static int readtoken1(int, const char *, const char *, int);
125 static int noexpand(char *);
126 static void consumetoken(int);
127 static void synexpect(int) __dead2;
128 static void synerror(const char *) __dead2;
129 static void setprompt(int);
130 static int pgetc_linecont(void);
131
132
133 static void *
134 parser_temp_alloc(size_t len)
135 {
136         struct parser_temp *t;
137
138         INTOFF;
139         t = ckmalloc(sizeof(*t));
140         t->data = NULL;
141         t->next = parser_temp;
142         parser_temp = t;
143         t->data = ckmalloc(len);
144         INTON;
145         return t->data;
146 }
147
148
149 static void *
150 parser_temp_realloc(void *ptr, size_t len)
151 {
152         struct parser_temp *t;
153
154         INTOFF;
155         t = parser_temp;
156         if (ptr != t->data)
157                 error("bug: parser_temp_realloc misused");
158         t->data = ckrealloc(t->data, len);
159         INTON;
160         return t->data;
161 }
162
163
164 static void
165 parser_temp_free_upto(void *ptr)
166 {
167         struct parser_temp *t;
168         int done = 0;
169
170         INTOFF;
171         while (parser_temp != NULL && !done) {
172                 t = parser_temp;
173                 parser_temp = t->next;
174                 done = t->data == ptr;
175                 ckfree(t->data);
176                 ckfree(t);
177         }
178         INTON;
179         if (!done)
180                 error("bug: parser_temp_free_upto misused");
181 }
182
183
184 static void
185 parser_temp_free_all(void)
186 {
187         struct parser_temp *t;
188
189         INTOFF;
190         while (parser_temp != NULL) {
191                 t = parser_temp;
192                 parser_temp = t->next;
193                 ckfree(t->data);
194                 ckfree(t);
195         }
196         INTON;
197 }
198
199
200 /*
201  * Read and parse a command.  Returns NEOF on end of file.  (NULL is a
202  * valid parse tree indicating a blank line.)
203  */
204
205 union node *
206 parsecmd(int interact)
207 {
208         int t;
209
210         /* This assumes the parser is not re-entered,
211          * which could happen if we add command substitution on PS1/PS2.
212          */
213         parser_temp_free_all();
214         heredoclist = NULL;
215
216         tokpushback = 0;
217         checkkwd = 0;
218         doprompt = interact;
219         if (doprompt)
220                 setprompt(1);
221         else
222                 setprompt(0);
223         needprompt = 0;
224         t = readtoken();
225         if (t == TEOF)
226                 return NEOF;
227         if (t == TNL)
228                 return NULL;
229         tokpushback++;
230         return list(1);
231 }
232
233
234 /*
235  * Read and parse words for wordexp.
236  * Returns a list of NARG nodes; NULL if there are no words.
237  */
238 union node *
239 parsewordexp(void)
240 {
241         union node *n, *first = NULL, **pnext;
242         int t;
243
244         /* This assumes the parser is not re-entered,
245          * which could happen if we add command substitution on PS1/PS2.
246          */
247         parser_temp_free_all();
248         heredoclist = NULL;
249
250         tokpushback = 0;
251         checkkwd = 0;
252         doprompt = 0;
253         setprompt(0);
254         needprompt = 0;
255         pnext = &first;
256         while ((t = readtoken()) != TEOF) {
257                 if (t != TWORD)
258                         synexpect(TWORD);
259                 n = makename();
260                 *pnext = n;
261                 pnext = &n->narg.next;
262         }
263         return first;
264 }
265
266
267 static union node *
268 list(int nlflag)
269 {
270         union node *ntop, *n1, *n2, *n3;
271         int tok;
272
273         checkkwd = CHKNL | CHKKWD | CHKALIAS;
274         if (!nlflag && tokendlist[peektoken()])
275                 return NULL;
276         ntop = n1 = NULL;
277         for (;;) {
278                 n2 = andor();
279                 tok = readtoken();
280                 if (tok == TBACKGND) {
281                         if (n2 != NULL && n2->type == NPIPE) {
282                                 n2->npipe.backgnd = 1;
283                         } else if (n2 != NULL && n2->type == NREDIR) {
284                                 n2->type = NBACKGND;
285                         } else {
286                                 n3 = (union node *)stalloc(sizeof (struct nredir));
287                                 n3->type = NBACKGND;
288                                 n3->nredir.n = n2;
289                                 n3->nredir.redirect = NULL;
290                                 n2 = n3;
291                         }
292                 }
293                 if (ntop == NULL)
294                         ntop = n2;
295                 else if (n1 == NULL) {
296                         n1 = makebinary(NSEMI, ntop, n2);
297                         ntop = n1;
298                 }
299                 else {
300                         n3 = makebinary(NSEMI, n1->nbinary.ch2, n2);
301                         n1->nbinary.ch2 = n3;
302                         n1 = n3;
303                 }
304                 switch (tok) {
305                 case TBACKGND:
306                 case TSEMI:
307                         tok = readtoken();
308                         /* FALLTHROUGH */
309                 case TNL:
310                         if (tok == TNL) {
311                                 parseheredoc();
312                                 if (nlflag)
313                                         return ntop;
314                         } else if (tok == TEOF && nlflag) {
315                                 parseheredoc();
316                                 return ntop;
317                         } else {
318                                 tokpushback++;
319                         }
320                         checkkwd = CHKNL | CHKKWD | CHKALIAS;
321                         if (!nlflag && tokendlist[peektoken()])
322                                 return ntop;
323                         break;
324                 case TEOF:
325                         if (heredoclist)
326                                 parseheredoc();
327                         else
328                                 pungetc();              /* push back EOF on input */
329                         return ntop;
330                 default:
331                         if (nlflag)
332                                 synexpect(-1);
333                         tokpushback++;
334                         return ntop;
335                 }
336         }
337 }
338
339
340
341 static union node *
342 andor(void)
343 {
344         union node *n;
345         int t;
346
347         n = pipeline();
348         for (;;) {
349                 if ((t = readtoken()) == TAND) {
350                         t = NAND;
351                 } else if (t == TOR) {
352                         t = NOR;
353                 } else {
354                         tokpushback++;
355                         return n;
356                 }
357                 n = makebinary(t, n, pipeline());
358         }
359 }
360
361
362
363 static union node *
364 pipeline(void)
365 {
366         union node *n1, *n2, *pipenode;
367         struct nodelist *lp, *prev;
368         int negate, t;
369
370         negate = 0;
371         checkkwd = CHKNL | CHKKWD | CHKALIAS;
372         TRACE(("pipeline: entered\n"));
373         while (readtoken() == TNOT)
374                 negate = !negate;
375         tokpushback++;
376         n1 = command();
377         if (readtoken() == TPIPE) {
378                 pipenode = (union node *)stalloc(sizeof (struct npipe));
379                 pipenode->type = NPIPE;
380                 pipenode->npipe.backgnd = 0;
381                 lp = (struct nodelist *)stalloc(sizeof (struct nodelist));
382                 pipenode->npipe.cmdlist = lp;
383                 lp->n = n1;
384                 do {
385                         prev = lp;
386                         lp = (struct nodelist *)stalloc(sizeof (struct nodelist));
387                         checkkwd = CHKNL | CHKKWD | CHKALIAS;
388                         t = readtoken();
389                         tokpushback++;
390                         if (t == TNOT)
391                                 lp->n = pipeline();
392                         else
393                                 lp->n = command();
394                         prev->next = lp;
395                 } while (readtoken() == TPIPE);
396                 lp->next = NULL;
397                 n1 = pipenode;
398         }
399         tokpushback++;
400         if (negate) {
401                 n2 = (union node *)stalloc(sizeof (struct nnot));
402                 n2->type = NNOT;
403                 n2->nnot.com = n1;
404                 return n2;
405         } else
406                 return n1;
407 }
408
409
410
411 static union node *
412 command(void)
413 {
414         union node *n1, *n2;
415         union node *ap, **app;
416         union node *cp, **cpp;
417         union node *redir, **rpp;
418         int t;
419         int is_subshell;
420
421         checkkwd = CHKNL | CHKKWD | CHKALIAS;
422         is_subshell = 0;
423         redir = NULL;
424         n1 = NULL;
425         rpp = &redir;
426
427         /* Check for redirection which may precede command */
428         while (readtoken() == TREDIR) {
429                 *rpp = n2 = redirnode;
430                 rpp = &n2->nfile.next;
431                 parsefname();
432         }
433         tokpushback++;
434
435         switch (readtoken()) {
436         case TIF:
437                 n1 = (union node *)stalloc(sizeof (struct nif));
438                 n1->type = NIF;
439                 if ((n1->nif.test = list(0)) == NULL)
440                         synexpect(-1);
441                 consumetoken(TTHEN);
442                 n1->nif.ifpart = list(0);
443                 n2 = n1;
444                 while (readtoken() == TELIF) {
445                         n2->nif.elsepart = (union node *)stalloc(sizeof (struct nif));
446                         n2 = n2->nif.elsepart;
447                         n2->type = NIF;
448                         if ((n2->nif.test = list(0)) == NULL)
449                                 synexpect(-1);
450                         consumetoken(TTHEN);
451                         n2->nif.ifpart = list(0);
452                 }
453                 if (lasttoken == TELSE)
454                         n2->nif.elsepart = list(0);
455                 else {
456                         n2->nif.elsepart = NULL;
457                         tokpushback++;
458                 }
459                 consumetoken(TFI);
460                 checkkwd = CHKKWD | CHKALIAS;
461                 break;
462         case TWHILE:
463         case TUNTIL:
464                 t = lasttoken;
465                 if ((n1 = list(0)) == NULL)
466                         synexpect(-1);
467                 consumetoken(TDO);
468                 n1 = makebinary((t == TWHILE)? NWHILE : NUNTIL, n1, list(0));
469                 consumetoken(TDONE);
470                 checkkwd = CHKKWD | CHKALIAS;
471                 break;
472         case TFOR:
473                 if (readtoken() != TWORD || quoteflag || ! goodname(wordtext))
474                         synerror("Bad for loop variable");
475                 n1 = (union node *)stalloc(sizeof (struct nfor));
476                 n1->type = NFOR;
477                 n1->nfor.var = wordtext;
478                 while (readtoken() == TNL)
479                         ;
480                 if (lasttoken == TWORD && ! quoteflag && equal(wordtext, "in")) {
481                         app = &ap;
482                         while (readtoken() == TWORD) {
483                                 n2 = makename();
484                                 *app = n2;
485                                 app = &n2->narg.next;
486                         }
487                         *app = NULL;
488                         n1->nfor.args = ap;
489                         if (lasttoken != TNL && lasttoken != TSEMI)
490                                 synexpect(-1);
491                 } else {
492                         static char argvars[5] = {
493                                 CTLVAR, VSNORMAL|VSQUOTE, '@', '=', '\0'
494                         };
495                         n2 = (union node *)stalloc(sizeof (struct narg));
496                         n2->type = NARG;
497                         n2->narg.text = argvars;
498                         n2->narg.backquote = NULL;
499                         n2->narg.next = NULL;
500                         n1->nfor.args = n2;
501                         /*
502                          * Newline or semicolon here is optional (but note
503                          * that the original Bourne shell only allowed NL).
504                          */
505                         if (lasttoken != TNL && lasttoken != TSEMI)
506                                 tokpushback++;
507                 }
508                 checkkwd = CHKNL | CHKKWD | CHKALIAS;
509                 if ((t = readtoken()) == TDO)
510                         t = TDONE;
511                 else if (t == TBEGIN)
512                         t = TEND;
513                 else
514                         synexpect(-1);
515                 n1->nfor.body = list(0);
516                 consumetoken(t);
517                 checkkwd = CHKKWD | CHKALIAS;
518                 break;
519         case TCASE:
520                 n1 = (union node *)stalloc(sizeof (struct ncase));
521                 n1->type = NCASE;
522                 consumetoken(TWORD);
523                 n1->ncase.expr = makename();
524                 while (readtoken() == TNL);
525                 if (lasttoken != TWORD || ! equal(wordtext, "in"))
526                         synerror("expecting \"in\"");
527                 cpp = &n1->ncase.cases;
528                 checkkwd = CHKNL | CHKKWD, readtoken();
529                 while (lasttoken != TESAC) {
530                         *cpp = cp = (union node *)stalloc(sizeof (struct nclist));
531                         cp->type = NCLIST;
532                         app = &cp->nclist.pattern;
533                         if (lasttoken == TLP)
534                                 readtoken();
535                         for (;;) {
536                                 *app = ap = makename();
537                                 checkkwd = CHKNL | CHKKWD;
538                                 if (readtoken() != TPIPE)
539                                         break;
540                                 app = &ap->narg.next;
541                                 readtoken();
542                         }
543                         ap->narg.next = NULL;
544                         if (lasttoken != TRP)
545                                 synexpect(TRP);
546                         cp->nclist.body = list(0);
547
548                         checkkwd = CHKNL | CHKKWD | CHKALIAS;
549                         if ((t = readtoken()) != TESAC) {
550                                 if (t == TENDCASE)
551                                         ;
552                                 else if (t == TFALLTHRU)
553                                         cp->type = NCLISTFALLTHRU;
554                                 else
555                                         synexpect(TENDCASE);
556                                 checkkwd = CHKNL | CHKKWD, readtoken();
557                         }
558                         cpp = &cp->nclist.next;
559                 }
560                 *cpp = NULL;
561                 checkkwd = CHKKWD | CHKALIAS;
562                 break;
563         case TLP:
564                 n1 = (union node *)stalloc(sizeof (struct nredir));
565                 n1->type = NSUBSHELL;
566                 n1->nredir.n = list(0);
567                 n1->nredir.redirect = NULL;
568                 consumetoken(TRP);
569                 checkkwd = CHKKWD | CHKALIAS;
570                 is_subshell = 1;
571                 break;
572         case TBEGIN:
573                 n1 = list(0);
574                 consumetoken(TEND);
575                 checkkwd = CHKKWD | CHKALIAS;
576                 break;
577         /* A simple command must have at least one redirection or word. */
578         case TBACKGND:
579         case TSEMI:
580         case TAND:
581         case TOR:
582         case TPIPE:
583         case TENDCASE:
584         case TFALLTHRU:
585         case TEOF:
586         case TNL:
587         case TRP:
588                 if (!redir)
589                         synexpect(-1);
590         case TWORD:
591                 tokpushback++;
592                 n1 = simplecmd(rpp, redir);
593                 return n1;
594         default:
595                 synexpect(-1);
596         }
597
598         /* Now check for redirection which may follow command */
599         while (readtoken() == TREDIR) {
600                 *rpp = n2 = redirnode;
601                 rpp = &n2->nfile.next;
602                 parsefname();
603         }
604         tokpushback++;
605         *rpp = NULL;
606         if (redir) {
607                 if (!is_subshell) {
608                         n2 = (union node *)stalloc(sizeof (struct nredir));
609                         n2->type = NREDIR;
610                         n2->nredir.n = n1;
611                         n1 = n2;
612                 }
613                 n1->nredir.redirect = redir;
614         }
615
616         return n1;
617 }
618
619
620 static union node *
621 simplecmd(union node **rpp, union node *redir)
622 {
623         union node *args, **app;
624         union node **orig_rpp = rpp;
625         union node *n = NULL;
626         int special;
627         int savecheckkwd;
628
629         /* If we don't have any redirections already, then we must reset */
630         /* rpp to be the address of the local redir variable.  */
631         if (redir == NULL)
632                 rpp = &redir;
633
634         args = NULL;
635         app = &args;
636         /*
637          * We save the incoming value, because we need this for shell
638          * functions.  There can not be a redirect or an argument between
639          * the function name and the open parenthesis.
640          */
641         orig_rpp = rpp;
642
643         savecheckkwd = CHKALIAS;
644
645         for (;;) {
646                 checkkwd = savecheckkwd;
647                 if (readtoken() == TWORD) {
648                         n = makename();
649                         *app = n;
650                         app = &n->narg.next;
651                         if (savecheckkwd != 0 && !isassignment(wordtext))
652                                 savecheckkwd = 0;
653                 } else if (lasttoken == TREDIR) {
654                         *rpp = n = redirnode;
655                         rpp = &n->nfile.next;
656                         parsefname();   /* read name of redirection file */
657                 } else if (lasttoken == TLP && app == &args->narg.next
658                                             && rpp == orig_rpp) {
659                         /* We have a function */
660                         consumetoken(TRP);
661                         funclinno = plinno;
662                         /*
663                          * - Require plain text.
664                          * - Functions with '/' cannot be called.
665                          * - Reject name=().
666                          * - Reject ksh extended glob patterns.
667                          */
668                         if (!noexpand(n->narg.text) || quoteflag ||
669                             strchr(n->narg.text, '/') ||
670                             strchr("!%*+-=?@}~",
671                                 n->narg.text[strlen(n->narg.text) - 1]))
672                                 synerror("Bad function name");
673                         rmescapes(n->narg.text);
674                         if (find_builtin(n->narg.text, &special) >= 0 &&
675                             special)
676                                 synerror("Cannot override a special builtin with a function");
677                         n->type = NDEFUN;
678                         n->narg.next = command();
679                         funclinno = 0;
680                         return n;
681                 } else {
682                         tokpushback++;
683                         break;
684                 }
685         }
686         *app = NULL;
687         *rpp = NULL;
688         n = (union node *)stalloc(sizeof (struct ncmd));
689         n->type = NCMD;
690         n->ncmd.args = args;
691         n->ncmd.redirect = redir;
692         return n;
693 }
694
695 static union node *
696 makename(void)
697 {
698         union node *n;
699
700         n = (union node *)stalloc(sizeof (struct narg));
701         n->type = NARG;
702         n->narg.next = NULL;
703         n->narg.text = wordtext;
704         n->narg.backquote = backquotelist;
705         return n;
706 }
707
708 static union node *
709 makebinary(int type, union node *n1, union node *n2)
710 {
711         union node *n;
712
713         n = (union node *)stalloc(sizeof (struct nbinary));
714         n->type = type;
715         n->nbinary.ch1 = n1;
716         n->nbinary.ch2 = n2;
717         return (n);
718 }
719
720 void
721 forcealias(void)
722 {
723         checkkwd |= CHKALIAS;
724 }
725
726 void
727 fixredir(union node *n, const char *text, int err)
728 {
729         TRACE(("Fix redir %s %d\n", text, err));
730         if (!err)
731                 n->ndup.vname = NULL;
732
733         if (is_digit(text[0]) && text[1] == '\0')
734                 n->ndup.dupfd = digit_val(text[0]);
735         else if (text[0] == '-' && text[1] == '\0')
736                 n->ndup.dupfd = -1;
737         else {
738
739                 if (err)
740                         synerror("Bad fd number");
741                 else
742                         n->ndup.vname = makename();
743         }
744 }
745
746
747 static void
748 parsefname(void)
749 {
750         union node *n = redirnode;
751
752         consumetoken(TWORD);
753         if (n->type == NHERE) {
754                 struct heredoc *here = heredoc;
755                 struct heredoc *p;
756
757                 if (quoteflag == 0)
758                         n->type = NXHERE;
759                 TRACE(("Here document %d\n", n->type));
760                 if (here->striptabs) {
761                         while (*wordtext == '\t')
762                                 wordtext++;
763                 }
764                 if (! noexpand(wordtext))
765                         synerror("Illegal eof marker for << redirection");
766                 rmescapes(wordtext);
767                 here->eofmark = wordtext;
768                 here->next = NULL;
769                 if (heredoclist == NULL)
770                         heredoclist = here;
771                 else {
772                         for (p = heredoclist ; p->next ; p = p->next);
773                         p->next = here;
774                 }
775         } else if (n->type == NTOFD || n->type == NFROMFD) {
776                 fixredir(n, wordtext, 0);
777         } else {
778                 n->nfile.fname = makename();
779         }
780 }
781
782
783 /*
784  * Input any here documents.
785  */
786
787 static void
788 parseheredoc(void)
789 {
790         struct heredoc *here;
791         union node *n;
792
793         while (heredoclist) {
794                 here = heredoclist;
795                 heredoclist = here->next;
796                 if (needprompt) {
797                         setprompt(2);
798                         needprompt = 0;
799                 }
800                 readtoken1(pgetc(), here->here->type == NHERE? SQSYNTAX : DQSYNTAX,
801                                 here->eofmark, here->striptabs);
802                 n = makename();
803                 here->here->nhere.doc = n;
804         }
805 }
806
807 static int
808 peektoken(void)
809 {
810         int t;
811
812         t = readtoken();
813         tokpushback++;
814         return (t);
815 }
816
817 static int
818 readtoken(void)
819 {
820         int t;
821         struct alias *ap;
822 #ifdef DEBUG
823         int alreadyseen = tokpushback;
824 #endif
825
826         top:
827         t = xxreadtoken();
828
829         /*
830          * eat newlines
831          */
832         if (checkkwd & CHKNL) {
833                 while (t == TNL) {
834                         parseheredoc();
835                         t = xxreadtoken();
836                 }
837         }
838
839         /*
840          * check for keywords and aliases
841          */
842         if (t == TWORD && !quoteflag)
843         {
844                 const char * const *pp;
845
846                 if (checkkwd & CHKKWD)
847                         for (pp = parsekwd; *pp; pp++) {
848                                 if (**pp == *wordtext && equal(*pp, wordtext))
849                                 {
850                                         lasttoken = t = pp - parsekwd + KWDOFFSET;
851                                         TRACE(("keyword %s recognized\n", tokname[t]));
852                                         goto out;
853                                 }
854                         }
855                 if (checkkwd & CHKALIAS &&
856                     (ap = lookupalias(wordtext, 1)) != NULL) {
857                         pushstring(ap->val, strlen(ap->val), ap);
858                         goto top;
859                 }
860         }
861 out:
862         if (t != TNOT)
863                 checkkwd = 0;
864
865 #ifdef DEBUG
866         if (!alreadyseen)
867             TRACE(("token %s %s\n", tokname[t], t == TWORD ? wordtext : ""));
868         else
869             TRACE(("reread token %s %s\n", tokname[t], t == TWORD ? wordtext : ""));
870 #endif
871         return (t);
872 }
873
874
875 /*
876  * Read the next input token.
877  * If the token is a word, we set backquotelist to the list of cmds in
878  *      backquotes.  We set quoteflag to true if any part of the word was
879  *      quoted.
880  * If the token is TREDIR, then we set redirnode to a structure containing
881  *      the redirection.
882  * In all cases, the variable startlinno is set to the number of the line
883  *      on which the token starts.
884  *
885  * [Change comment:  here documents and internal procedures]
886  * [Readtoken shouldn't have any arguments.  Perhaps we should make the
887  *  word parsing code into a separate routine.  In this case, readtoken
888  *  doesn't need to have any internal procedures, but parseword does.
889  *  We could also make parseoperator in essence the main routine, and
890  *  have parseword (readtoken1?) handle both words and redirection.]
891  */
892
893 #define RETURN(token)   return lasttoken = token
894
895 static int
896 xxreadtoken(void)
897 {
898         int c;
899
900         if (tokpushback) {
901                 tokpushback = 0;
902                 return lasttoken;
903         }
904         if (needprompt) {
905                 setprompt(2);
906                 needprompt = 0;
907         }
908         startlinno = plinno;
909         for (;;) {      /* until token or start of word found */
910                 c = pgetc_macro();
911                 switch (c) {
912                 case ' ': case '\t':
913                         continue;
914                 case '#':
915                         while ((c = pgetc()) != '\n' && c != PEOF);
916                         pungetc();
917                         continue;
918                 case '\\':
919                         if (pgetc() == '\n') {
920                                 startlinno = ++plinno;
921                                 if (doprompt)
922                                         setprompt(2);
923                                 else
924                                         setprompt(0);
925                                 continue;
926                         }
927                         pungetc();
928                         /* FALLTHROUGH */
929                 default:
930                         return readtoken1(c, BASESYNTAX, (char *)NULL, 0);
931                 case '\n':
932                         plinno++;
933                         needprompt = doprompt;
934                         RETURN(TNL);
935                 case PEOF:
936                         RETURN(TEOF);
937                 case '&':
938                         if (pgetc_linecont() == '&')
939                                 RETURN(TAND);
940                         pungetc();
941                         RETURN(TBACKGND);
942                 case '|':
943                         if (pgetc_linecont() == '|')
944                                 RETURN(TOR);
945                         pungetc();
946                         RETURN(TPIPE);
947                 case ';':
948                         c = pgetc_linecont();
949                         if (c == ';')
950                                 RETURN(TENDCASE);
951                         else if (c == '&')
952                                 RETURN(TFALLTHRU);
953                         pungetc();
954                         RETURN(TSEMI);
955                 case '(':
956                         RETURN(TLP);
957                 case ')':
958                         RETURN(TRP);
959                 }
960         }
961 #undef RETURN
962 }
963
964
965 #define MAXNEST_static 8
966 struct tokenstate
967 {
968         const char *syntax; /* *SYNTAX */
969         int parenlevel; /* levels of parentheses in arithmetic */
970         enum tokenstate_category
971         {
972                 TSTATE_TOP,
973                 TSTATE_VAR_OLD, /* ${var+-=?}, inherits dquotes */
974                 TSTATE_VAR_NEW, /* other ${var...}, own dquote state */
975                 TSTATE_ARITH
976         } category;
977 };
978
979
980 /*
981  * Check to see whether we are at the end of the here document.  When this
982  * is called, c is set to the first character of the next input line.  If
983  * we are at the end of the here document, this routine sets the c to PEOF.
984  * The new value of c is returned.
985  */
986
987 static int
988 checkend(int c, const char *eofmark, int striptabs)
989 {
990         if (striptabs) {
991                 while (c == '\t')
992                         c = pgetc();
993         }
994         if (c == *eofmark) {
995                 int c2;
996                 const char *q;
997
998                 for (q = eofmark + 1; c2 = pgetc(), *q != '\0' && c2 == *q; q++)
999                         ;
1000                 if ((c2 == PEOF || c2 == '\n') && *q == '\0') {
1001                         c = PEOF;
1002                         if (c2 == '\n') {
1003                                 plinno++;
1004                                 needprompt = doprompt;
1005                         }
1006                 } else {
1007                         pungetc();
1008                         pushstring(eofmark + 1, q - (eofmark + 1), NULL);
1009                 }
1010         } else if (c == '\n' && *eofmark == '\0') {
1011                 c = PEOF;
1012                 plinno++;
1013                 needprompt = doprompt;
1014         }
1015         return (c);
1016 }
1017
1018
1019 /*
1020  * Parse a redirection operator.  The variable "out" points to a string
1021  * specifying the fd to be redirected.  The variable "c" contains the
1022  * first character of the redirection operator.
1023  */
1024
1025 static void
1026 parseredir(char *out, int c)
1027 {
1028         char fd = *out;
1029         union node *np;
1030
1031         np = (union node *)stalloc(sizeof (struct nfile));
1032         if (c == '>') {
1033                 np->nfile.fd = 1;
1034                 c = pgetc_linecont();
1035                 if (c == '>')
1036                         np->type = NAPPEND;
1037                 else if (c == '&')
1038                         np->type = NTOFD;
1039                 else if (c == '|')
1040                         np->type = NCLOBBER;
1041                 else {
1042                         np->type = NTO;
1043                         pungetc();
1044                 }
1045         } else {        /* c == '<' */
1046                 np->nfile.fd = 0;
1047                 c = pgetc_linecont();
1048                 if (c == '<') {
1049                         if (sizeof (struct nfile) != sizeof (struct nhere)) {
1050                                 np = (union node *)stalloc(sizeof (struct nhere));
1051                                 np->nfile.fd = 0;
1052                         }
1053                         np->type = NHERE;
1054                         heredoc = (struct heredoc *)stalloc(sizeof (struct heredoc));
1055                         heredoc->here = np;
1056                         if ((c = pgetc_linecont()) == '-') {
1057                                 heredoc->striptabs = 1;
1058                         } else {
1059                                 heredoc->striptabs = 0;
1060                                 pungetc();
1061                         }
1062                 } else if (c == '&')
1063                         np->type = NFROMFD;
1064                 else if (c == '>')
1065                         np->type = NFROMTO;
1066                 else {
1067                         np->type = NFROM;
1068                         pungetc();
1069                 }
1070         }
1071         if (fd != '\0')
1072                 np->nfile.fd = digit_val(fd);
1073         redirnode = np;
1074 }
1075
1076 /*
1077  * Called to parse command substitutions.
1078  */
1079
1080 static char *
1081 parsebackq(char *out, struct nodelist **pbqlist,
1082                 int oldstyle, int dblquote, int quoted)
1083 {
1084         struct nodelist **nlpp;
1085         union node *n;
1086         char *volatile str;
1087         struct jmploc jmploc;
1088         struct jmploc *const savehandler = handler;
1089         size_t savelen;
1090         int saveprompt;
1091         const int bq_startlinno = plinno;
1092         char *volatile ostr = NULL;
1093         struct parsefile *const savetopfile = getcurrentfile();
1094         struct heredoc *const saveheredoclist = heredoclist;
1095         struct heredoc *here;
1096
1097         str = NULL;
1098         if (setjmp(jmploc.loc)) {
1099                 popfilesupto(savetopfile);
1100                 if (str)
1101                         ckfree(str);
1102                 if (ostr)
1103                         ckfree(ostr);
1104                 heredoclist = saveheredoclist;
1105                 handler = savehandler;
1106                 if (exception == EXERROR) {
1107                         startlinno = bq_startlinno;
1108                         synerror("Error in command substitution");
1109                 }
1110                 longjmp(handler->loc, 1);
1111         }
1112         INTOFF;
1113         savelen = out - stackblock();
1114         if (savelen > 0) {
1115                 str = ckmalloc(savelen);
1116                 memcpy(str, stackblock(), savelen);
1117         }
1118         handler = &jmploc;
1119         heredoclist = NULL;
1120         INTON;
1121         if (oldstyle) {
1122                 /* We must read until the closing backquote, giving special
1123                    treatment to some slashes, and then push the string and
1124                    reread it as input, interpreting it normally.  */
1125                 char *oout;
1126                 int c;
1127                 int olen;
1128
1129
1130                 STARTSTACKSTR(oout);
1131                 for (;;) {
1132                         if (needprompt) {
1133                                 setprompt(2);
1134                                 needprompt = 0;
1135                         }
1136                         CHECKSTRSPACE(2, oout);
1137                         c = pgetc_linecont();
1138                         if (c == '`')
1139                                 break;
1140                         switch (c) {
1141                         case '\\':
1142                                 c = pgetc();
1143                                 if (c != '\\' && c != '`' && c != '$'
1144                                     && (!dblquote || c != '"'))
1145                                         USTPUTC('\\', oout);
1146                                 break;
1147
1148                         case '\n':
1149                                 plinno++;
1150                                 needprompt = doprompt;
1151                                 break;
1152
1153                         case PEOF:
1154                                 startlinno = plinno;
1155                                 synerror("EOF in backquote substitution");
1156                                 break;
1157
1158                         default:
1159                                 break;
1160                         }
1161                         USTPUTC(c, oout);
1162                 }
1163                 USTPUTC('\0', oout);
1164                 olen = oout - stackblock();
1165                 INTOFF;
1166                 ostr = ckmalloc(olen);
1167                 memcpy(ostr, stackblock(), olen);
1168                 setinputstring(ostr, 1);
1169                 INTON;
1170         }
1171         nlpp = pbqlist;
1172         while (*nlpp)
1173                 nlpp = &(*nlpp)->next;
1174         *nlpp = (struct nodelist *)stalloc(sizeof (struct nodelist));
1175         (*nlpp)->next = NULL;
1176
1177         if (oldstyle) {
1178                 saveprompt = doprompt;
1179                 doprompt = 0;
1180         }
1181
1182         n = list(0);
1183
1184         if (oldstyle) {
1185                 if (peektoken() != TEOF)
1186                         synexpect(-1);
1187                 doprompt = saveprompt;
1188         } else
1189                 consumetoken(TRP);
1190
1191         (*nlpp)->n = n;
1192         if (oldstyle) {
1193                 /*
1194                  * Start reading from old file again, ignoring any pushed back
1195                  * tokens left from the backquote parsing
1196                  */
1197                 popfile();
1198                 tokpushback = 0;
1199         }
1200         STARTSTACKSTR(out);
1201         CHECKSTRSPACE(savelen + 1, out);
1202         INTOFF;
1203         if (str) {
1204                 memcpy(out, str, savelen);
1205                 STADJUST(savelen, out);
1206                 ckfree(str);
1207                 str = NULL;
1208         }
1209         if (ostr) {
1210                 ckfree(ostr);
1211                 ostr = NULL;
1212         }
1213         here = saveheredoclist;
1214         if (here != NULL) {
1215                 while (here->next != NULL)
1216                         here = here->next;
1217                 here->next = heredoclist;
1218                 heredoclist = saveheredoclist;
1219         }
1220         handler = savehandler;
1221         INTON;
1222         if (quoted)
1223                 USTPUTC(CTLBACKQ | CTLQUOTE, out);
1224         else
1225                 USTPUTC(CTLBACKQ, out);
1226         return out;
1227 }
1228
1229
1230 /*
1231  * Called to parse a backslash escape sequence inside $'...'.
1232  * The backslash has already been read.
1233  */
1234 static char *
1235 readcstyleesc(char *out)
1236 {
1237         int c, vc, i, n;
1238         unsigned int v;
1239
1240         c = pgetc();
1241         switch (c) {
1242         case '\0':
1243                 synerror("Unterminated quoted string");
1244         case '\n':
1245                 plinno++;
1246                 if (doprompt)
1247                         setprompt(2);
1248                 else
1249                         setprompt(0);
1250                 return out;
1251         case '\\':
1252         case '\'':
1253         case '"':
1254                 v = c;
1255                 break;
1256         case 'a': v = '\a'; break;
1257         case 'b': v = '\b'; break;
1258         case 'e': v = '\033'; break;
1259         case 'f': v = '\f'; break;
1260         case 'n': v = '\n'; break;
1261         case 'r': v = '\r'; break;
1262         case 't': v = '\t'; break;
1263         case 'v': v = '\v'; break;
1264         case 'x':
1265                   v = 0;
1266                   for (;;) {
1267                           c = pgetc();
1268                           if (c >= '0' && c <= '9')
1269                                   v = (v << 4) + c - '0';
1270                           else if (c >= 'A' && c <= 'F')
1271                                   v = (v << 4) + c - 'A' + 10;
1272                           else if (c >= 'a' && c <= 'f')
1273                                   v = (v << 4) + c - 'a' + 10;
1274                           else
1275                                   break;
1276                   }
1277                   pungetc();
1278                   break;
1279         case '0': case '1': case '2': case '3':
1280         case '4': case '5': case '6': case '7':
1281                   v = c - '0';
1282                   c = pgetc();
1283                   if (c >= '0' && c <= '7') {
1284                           v <<= 3;
1285                           v += c - '0';
1286                           c = pgetc();
1287                           if (c >= '0' && c <= '7') {
1288                                   v <<= 3;
1289                                   v += c - '0';
1290                           } else
1291                                   pungetc();
1292                   } else
1293                           pungetc();
1294                   break;
1295         case 'c':
1296                   c = pgetc();
1297                   if (c < 0x3f || c > 0x7a || c == 0x60)
1298                           synerror("Bad escape sequence");
1299                   if (c == '\\' && pgetc() != '\\')
1300                           synerror("Bad escape sequence");
1301                   if (c == '?')
1302                           v = 127;
1303                   else
1304                           v = c & 0x1f;
1305                   break;
1306         case 'u':
1307         case 'U':
1308                   n = c == 'U' ? 8 : 4;
1309                   v = 0;
1310                   for (i = 0; i < n; i++) {
1311                           c = pgetc();
1312                           if (c >= '0' && c <= '9')
1313                                   v = (v << 4) + c - '0';
1314                           else if (c >= 'A' && c <= 'F')
1315                                   v = (v << 4) + c - 'A' + 10;
1316                           else if (c >= 'a' && c <= 'f')
1317                                   v = (v << 4) + c - 'a' + 10;
1318                           else
1319                                   synerror("Bad escape sequence");
1320                   }
1321                   if (v == 0 || (v >= 0xd800 && v <= 0xdfff))
1322                           synerror("Bad escape sequence");
1323                   /* We really need iconv here. */
1324                   if (initial_localeisutf8 && v > 127) {
1325                           CHECKSTRSPACE(4, out);
1326                           /*
1327                            * We cannot use wctomb() as the locale may have
1328                            * changed.
1329                            */
1330                           if (v <= 0x7ff) {
1331                                   USTPUTC(0xc0 | v >> 6, out);
1332                                   USTPUTC(0x80 | (v & 0x3f), out);
1333                                   return out;
1334                           } else if (v <= 0xffff) {
1335                                   USTPUTC(0xe0 | v >> 12, out);
1336                                   USTPUTC(0x80 | ((v >> 6) & 0x3f), out);
1337                                   USTPUTC(0x80 | (v & 0x3f), out);
1338                                   return out;
1339                           } else if (v <= 0x10ffff) {
1340                                   USTPUTC(0xf0 | v >> 18, out);
1341                                   USTPUTC(0x80 | ((v >> 12) & 0x3f), out);
1342                                   USTPUTC(0x80 | ((v >> 6) & 0x3f), out);
1343                                   USTPUTC(0x80 | (v & 0x3f), out);
1344                                   return out;
1345                           }
1346                   }
1347                   if (v > 127)
1348                           v = '?';
1349                   break;
1350         default:
1351                   synerror("Bad escape sequence");
1352         }
1353         vc = (char)v;
1354         /*
1355          * We can't handle NUL bytes.
1356          * POSIX says we should skip till the closing quote.
1357          */
1358         if (vc == '\0') {
1359                 while ((c = pgetc()) != '\'') {
1360                         if (c == '\\')
1361                                 c = pgetc();
1362                         if (c == PEOF)
1363                                 synerror("Unterminated quoted string");
1364                         if (c == '\n') {
1365                                 plinno++;
1366                                 if (doprompt)
1367                                         setprompt(2);
1368                                 else
1369                                         setprompt(0);
1370                         }
1371                 }
1372                 pungetc();
1373                 return out;
1374         }
1375         if (SQSYNTAX[vc] == CCTL)
1376                 USTPUTC(CTLESC, out);
1377         USTPUTC(vc, out);
1378         return out;
1379 }
1380
1381
1382 /*
1383  * If eofmark is NULL, read a word or a redirection symbol.  If eofmark
1384  * is not NULL, read a here document.  In the latter case, eofmark is the
1385  * word which marks the end of the document and striptabs is true if
1386  * leading tabs should be stripped from the document.  The argument firstc
1387  * is the first character of the input token or document.
1388  *
1389  * Because C does not have internal subroutines, I have simulated them
1390  * using goto's to implement the subroutine linkage.  The following macros
1391  * will run code that appears at the end of readtoken1.
1392  */
1393
1394 #define PARSESUB()      {goto parsesub; parsesub_return:;}
1395 #define PARSEARITH()    {goto parsearith; parsearith_return:;}
1396
1397 static int
1398 readtoken1(int firstc, char const *initialsyntax, const char *eofmark,
1399     int striptabs)
1400 {
1401         int c = firstc;
1402         char *out;
1403         int len;
1404         struct nodelist *bqlist;
1405         int quotef;
1406         int newvarnest;
1407         int level;
1408         int synentry;
1409         struct tokenstate state_static[MAXNEST_static];
1410         int maxnest = MAXNEST_static;
1411         struct tokenstate *state = state_static;
1412         int sqiscstyle = 0;
1413
1414         startlinno = plinno;
1415         quotef = 0;
1416         bqlist = NULL;
1417         newvarnest = 0;
1418         level = 0;
1419         state[level].syntax = initialsyntax;
1420         state[level].parenlevel = 0;
1421         state[level].category = TSTATE_TOP;
1422
1423         STARTSTACKSTR(out);
1424         loop: { /* for each line, until end of word */
1425                 if (eofmark && eofmark != NOEOFMARK)
1426                         /* set c to PEOF if at end of here document */
1427                         c = checkend(c, eofmark, striptabs);
1428                 for (;;) {      /* until end of line or end of word */
1429                         CHECKSTRSPACE(4, out);  /* permit 4 calls to USTPUTC */
1430
1431                         synentry = state[level].syntax[c];
1432
1433                         switch(synentry) {
1434                         case CNL:       /* '\n' */
1435                                 if (state[level].syntax == BASESYNTAX)
1436                                         goto endword;   /* exit outer loop */
1437                                 USTPUTC(c, out);
1438                                 plinno++;
1439                                 if (doprompt)
1440                                         setprompt(2);
1441                                 else
1442                                         setprompt(0);
1443                                 c = pgetc();
1444                                 goto loop;              /* continue outer loop */
1445                         case CSBACK:
1446                                 if (sqiscstyle) {
1447                                         out = readcstyleesc(out);
1448                                         break;
1449                                 }
1450                                 /* FALLTHROUGH */
1451                         case CWORD:
1452                                 USTPUTC(c, out);
1453                                 break;
1454                         case CCTL:
1455                                 if (eofmark == NULL || initialsyntax != SQSYNTAX)
1456                                         USTPUTC(CTLESC, out);
1457                                 USTPUTC(c, out);
1458                                 break;
1459                         case CBACK:     /* backslash */
1460                                 c = pgetc();
1461                                 if (c == PEOF) {
1462                                         USTPUTC('\\', out);
1463                                         pungetc();
1464                                 } else if (c == '\n') {
1465                                         plinno++;
1466                                         if (doprompt)
1467                                                 setprompt(2);
1468                                         else
1469                                                 setprompt(0);
1470                                 } else {
1471                                         if (state[level].syntax == DQSYNTAX &&
1472                                             c != '\\' && c != '`' && c != '$' &&
1473                                             (c != '"' || (eofmark != NULL &&
1474                                                 newvarnest == 0)) &&
1475                                             (c != '}' || state[level].category != TSTATE_VAR_OLD))
1476                                                 USTPUTC('\\', out);
1477                                         if ((eofmark == NULL ||
1478                                             newvarnest > 0) &&
1479                                             state[level].syntax == BASESYNTAX)
1480                                                 USTPUTC(CTLQUOTEMARK, out);
1481                                         if (SQSYNTAX[c] == CCTL)
1482                                                 USTPUTC(CTLESC, out);
1483                                         USTPUTC(c, out);
1484                                         if ((eofmark == NULL ||
1485                                             newvarnest > 0) &&
1486                                             state[level].syntax == BASESYNTAX &&
1487                                             state[level].category == TSTATE_VAR_OLD)
1488                                                 USTPUTC(CTLQUOTEEND, out);
1489                                         quotef++;
1490                                 }
1491                                 break;
1492                         case CSQUOTE:
1493                                 USTPUTC(CTLQUOTEMARK, out);
1494                                 state[level].syntax = SQSYNTAX;
1495                                 sqiscstyle = 0;
1496                                 break;
1497                         case CDQUOTE:
1498                                 USTPUTC(CTLQUOTEMARK, out);
1499                                 state[level].syntax = DQSYNTAX;
1500                                 break;
1501                         case CENDQUOTE:
1502                                 if (eofmark != NULL && newvarnest == 0)
1503                                         USTPUTC(c, out);
1504                                 else {
1505                                         if (state[level].category == TSTATE_VAR_OLD)
1506                                                 USTPUTC(CTLQUOTEEND, out);
1507                                         state[level].syntax = BASESYNTAX;
1508                                         quotef++;
1509                                 }
1510                                 break;
1511                         case CVAR:      /* '$' */
1512                                 PARSESUB();             /* parse substitution */
1513                                 break;
1514                         case CENDVAR:   /* '}' */
1515                                 if (level > 0 &&
1516                                     ((state[level].category == TSTATE_VAR_OLD &&
1517                                       state[level].syntax ==
1518                                       state[level - 1].syntax) ||
1519                                     (state[level].category == TSTATE_VAR_NEW &&
1520                                      state[level].syntax == BASESYNTAX))) {
1521                                         if (state[level].category == TSTATE_VAR_NEW)
1522                                                 newvarnest--;
1523                                         level--;
1524                                         USTPUTC(CTLENDVAR, out);
1525                                 } else {
1526                                         USTPUTC(c, out);
1527                                 }
1528                                 break;
1529                         case CLP:       /* '(' in arithmetic */
1530                                 state[level].parenlevel++;
1531                                 USTPUTC(c, out);
1532                                 break;
1533                         case CRP:       /* ')' in arithmetic */
1534                                 if (state[level].parenlevel > 0) {
1535                                         USTPUTC(c, out);
1536                                         --state[level].parenlevel;
1537                                 } else {
1538                                         if (pgetc_linecont() == ')') {
1539                                                 if (level > 0 &&
1540                                                     state[level].category == TSTATE_ARITH) {
1541                                                         level--;
1542                                                         USTPUTC(CTLENDARI, out);
1543                                                 } else
1544                                                         USTPUTC(')', out);
1545                                         } else {
1546                                                 /*
1547                                                  * unbalanced parens
1548                                                  *  (don't 2nd guess - no error)
1549                                                  */
1550                                                 pungetc();
1551                                                 USTPUTC(')', out);
1552                                         }
1553                                 }
1554                                 break;
1555                         case CBQUOTE:   /* '`' */
1556                                 out = parsebackq(out, &bqlist, 1,
1557                                     state[level].syntax == DQSYNTAX &&
1558                                     (eofmark == NULL || newvarnest > 0),
1559                                     state[level].syntax == DQSYNTAX || state[level].syntax == ARISYNTAX);
1560                                 break;
1561                         case CEOF:
1562                                 goto endword;           /* exit outer loop */
1563                         case CIGN:
1564                                 break;
1565                         default:
1566                                 if (level == 0)
1567                                         goto endword;   /* exit outer loop */
1568                                 USTPUTC(c, out);
1569                         }
1570                         c = pgetc_macro();
1571                 }
1572         }
1573 endword:
1574         if (state[level].syntax == ARISYNTAX)
1575                 synerror("Missing '))'");
1576         if (state[level].syntax != BASESYNTAX && eofmark == NULL)
1577                 synerror("Unterminated quoted string");
1578         if (state[level].category == TSTATE_VAR_OLD ||
1579             state[level].category == TSTATE_VAR_NEW) {
1580                 startlinno = plinno;
1581                 synerror("Missing '}'");
1582         }
1583         if (state != state_static)
1584                 parser_temp_free_upto(state);
1585         USTPUTC('\0', out);
1586         len = out - stackblock();
1587         out = stackblock();
1588         if (eofmark == NULL) {
1589                 if ((c == '>' || c == '<')
1590                  && quotef == 0
1591                  && len <= 2
1592                  && (*out == '\0' || is_digit(*out))) {
1593                         parseredir(out, c);
1594                         return lasttoken = TREDIR;
1595                 } else {
1596                         pungetc();
1597                 }
1598         }
1599         quoteflag = quotef;
1600         backquotelist = bqlist;
1601         grabstackblock(len);
1602         wordtext = out;
1603         return lasttoken = TWORD;
1604 /* end of readtoken routine */
1605
1606
1607 /*
1608  * Parse a substitution.  At this point, we have read the dollar sign
1609  * and nothing else.
1610  */
1611
1612 parsesub: {
1613         int subtype;
1614         int typeloc;
1615         int flags;
1616         char *p;
1617         static const char types[] = "}-+?=";
1618         int linno;
1619         int length;
1620         int c1;
1621
1622         c = pgetc_linecont();
1623         if (c == '(') { /* $(command) or $((arith)) */
1624                 if (pgetc_linecont() == '(') {
1625                         PARSEARITH();
1626                 } else {
1627                         pungetc();
1628                         out = parsebackq(out, &bqlist, 0,
1629                             state[level].syntax == DQSYNTAX &&
1630                             (eofmark == NULL || newvarnest > 0),
1631                             state[level].syntax == DQSYNTAX ||
1632                             state[level].syntax == ARISYNTAX);
1633                 }
1634         } else if (c == '{' || is_name(c) || is_special(c)) {
1635                 USTPUTC(CTLVAR, out);
1636                 typeloc = out - stackblock();
1637                 USTPUTC(VSNORMAL, out);
1638                 subtype = VSNORMAL;
1639                 flags = 0;
1640                 if (c == '{') {
1641                         c = pgetc_linecont();
1642                         subtype = 0;
1643                 }
1644 varname:
1645                 if (!is_eof(c) && is_name(c)) {
1646                         length = 0;
1647                         do {
1648                                 STPUTC(c, out);
1649                                 c = pgetc_linecont();
1650                                 length++;
1651                         } while (!is_eof(c) && is_in_name(c));
1652                         if (length == 6 &&
1653                             strncmp(out - length, "LINENO", length) == 0) {
1654                                 /* Replace the variable name with the
1655                                  * current line number. */
1656                                 STADJUST(-6, out);
1657                                 CHECKSTRSPACE(11, out);
1658                                 linno = plinno;
1659                                 if (funclinno != 0)
1660                                         linno -= funclinno - 1;
1661                                 length = snprintf(out, 11, "%d", linno);
1662                                 if (length > 10)
1663                                         length = 10;
1664                                 out += length;
1665                                 flags |= VSLINENO;
1666                         }
1667                 } else if (is_digit(c)) {
1668                         if (subtype != VSNORMAL) {
1669                                 do {
1670                                         STPUTC(c, out);
1671                                         c = pgetc_linecont();
1672                                 } while (is_digit(c));
1673                         } else {
1674                                 USTPUTC(c, out);
1675                                 c = pgetc_linecont();
1676                         }
1677                 } else if (is_special(c)) {
1678                         c1 = c;
1679                         c = pgetc_linecont();
1680                         if (subtype == 0 && c1 == '#') {
1681                                 subtype = VSLENGTH;
1682                                 if (strchr(types, c) == NULL && c != ':' &&
1683                                     c != '#' && c != '%')
1684                                         goto varname;
1685                                 c1 = c;
1686                                 c = pgetc_linecont();
1687                                 if (c1 != '}' && c == '}') {
1688                                         pungetc();
1689                                         c = c1;
1690                                         goto varname;
1691                                 }
1692                                 pungetc();
1693                                 c = c1;
1694                                 c1 = '#';
1695                                 subtype = 0;
1696                         }
1697                         USTPUTC(c1, out);
1698                 } else {
1699                         subtype = VSERROR;
1700                         if (c == '}')
1701                                 pungetc();
1702                         else if (c == '\n' || c == PEOF)
1703                                 synerror("Unexpected end of line in substitution");
1704                         else if (BASESYNTAX[c] != CCTL)
1705                                 USTPUTC(c, out);
1706                 }
1707                 if (subtype == 0) {
1708                         switch (c) {
1709                         case ':':
1710                                 flags |= VSNUL;
1711                                 c = pgetc_linecont();
1712                                 /*FALLTHROUGH*/
1713                         default:
1714                                 p = strchr(types, c);
1715                                 if (p == NULL) {
1716                                         if (c == '\n' || c == PEOF)
1717                                                 synerror("Unexpected end of line in substitution");
1718                                         if (flags == VSNUL)
1719                                                 STPUTC(':', out);
1720                                         if (BASESYNTAX[c] != CCTL)
1721                                                 STPUTC(c, out);
1722                                         subtype = VSERROR;
1723                                 } else
1724                                         subtype = p - types + VSNORMAL;
1725                                 break;
1726                         case '%':
1727                         case '#':
1728                                 {
1729                                         int cc = c;
1730                                         subtype = c == '#' ? VSTRIMLEFT :
1731                                                              VSTRIMRIGHT;
1732                                         c = pgetc_linecont();
1733                                         if (c == cc)
1734                                                 subtype++;
1735                                         else
1736                                                 pungetc();
1737                                         break;
1738                                 }
1739                         }
1740                 } else if (subtype != VSERROR) {
1741                         if (subtype == VSLENGTH && c != '}')
1742                                 subtype = VSERROR;
1743                         pungetc();
1744                 }
1745                 STPUTC('=', out);
1746                 if (state[level].syntax == DQSYNTAX ||
1747                     state[level].syntax == ARISYNTAX)
1748                         flags |= VSQUOTE;
1749                 *(stackblock() + typeloc) = subtype | flags;
1750                 if (subtype != VSNORMAL) {
1751                         if (level + 1 >= maxnest) {
1752                                 maxnest *= 2;
1753                                 if (state == state_static) {
1754                                         state = parser_temp_alloc(
1755                                             maxnest * sizeof(*state));
1756                                         memcpy(state, state_static,
1757                                             MAXNEST_static * sizeof(*state));
1758                                 } else
1759                                         state = parser_temp_realloc(state,
1760                                             maxnest * sizeof(*state));
1761                         }
1762                         level++;
1763                         state[level].parenlevel = 0;
1764                         if (subtype == VSMINUS || subtype == VSPLUS ||
1765                             subtype == VSQUESTION || subtype == VSASSIGN) {
1766                                 /*
1767                                  * For operators that were in the Bourne shell,
1768                                  * inherit the double-quote state.
1769                                  */
1770                                 state[level].syntax = state[level - 1].syntax;
1771                                 state[level].category = TSTATE_VAR_OLD;
1772                         } else {
1773                                 /*
1774                                  * The other operators take a pattern,
1775                                  * so go to BASESYNTAX.
1776                                  * Also, ' and " are now special, even
1777                                  * in here documents.
1778                                  */
1779                                 state[level].syntax = BASESYNTAX;
1780                                 state[level].category = TSTATE_VAR_NEW;
1781                                 newvarnest++;
1782                         }
1783                 }
1784         } else if (c == '\'' && state[level].syntax == BASESYNTAX) {
1785                 /* $'cstylequotes' */
1786                 USTPUTC(CTLQUOTEMARK, out);
1787                 state[level].syntax = SQSYNTAX;
1788                 sqiscstyle = 1;
1789         } else {
1790                 USTPUTC('$', out);
1791                 pungetc();
1792         }
1793         goto parsesub_return;
1794 }
1795
1796
1797 /*
1798  * Parse an arithmetic expansion (indicate start of one and set state)
1799  */
1800 parsearith: {
1801
1802         if (level + 1 >= maxnest) {
1803                 maxnest *= 2;
1804                 if (state == state_static) {
1805                         state = parser_temp_alloc(
1806                             maxnest * sizeof(*state));
1807                         memcpy(state, state_static,
1808                             MAXNEST_static * sizeof(*state));
1809                 } else
1810                         state = parser_temp_realloc(state,
1811                             maxnest * sizeof(*state));
1812         }
1813         level++;
1814         state[level].syntax = ARISYNTAX;
1815         state[level].parenlevel = 0;
1816         state[level].category = TSTATE_ARITH;
1817         USTPUTC(CTLARI, out);
1818         if (state[level - 1].syntax == DQSYNTAX)
1819                 USTPUTC('"',out);
1820         else
1821                 USTPUTC(' ',out);
1822         goto parsearith_return;
1823 }
1824
1825 } /* end of readtoken */
1826
1827
1828 /*
1829  * Returns true if the text contains nothing to expand (no dollar signs
1830  * or backquotes).
1831  */
1832
1833 static int
1834 noexpand(char *text)
1835 {
1836         char *p;
1837         char c;
1838
1839         p = text;
1840         while ((c = *p++) != '\0') {
1841                 if ( c == CTLQUOTEMARK)
1842                         continue;
1843                 if (c == CTLESC)
1844                         p++;
1845                 else if (BASESYNTAX[(int)c] == CCTL)
1846                         return 0;
1847         }
1848         return 1;
1849 }
1850
1851
1852 /*
1853  * Return true if the argument is a legal variable name (a letter or
1854  * underscore followed by zero or more letters, underscores, and digits).
1855  */
1856
1857 int
1858 goodname(const char *name)
1859 {
1860         const char *p;
1861
1862         p = name;
1863         if (! is_name(*p))
1864                 return 0;
1865         while (*++p) {
1866                 if (! is_in_name(*p))
1867                         return 0;
1868         }
1869         return 1;
1870 }
1871
1872
1873 int
1874 isassignment(const char *p)
1875 {
1876         if (!is_name(*p))
1877                 return 0;
1878         p++;
1879         for (;;) {
1880                 if (*p == '=')
1881                         return 1;
1882                 else if (!is_in_name(*p))
1883                         return 0;
1884                 p++;
1885         }
1886 }
1887
1888
1889 static void
1890 consumetoken(int token)
1891 {
1892         if (readtoken() != token)
1893                 synexpect(token);
1894 }
1895
1896
1897 /*
1898  * Called when an unexpected token is read during the parse.  The argument
1899  * is the token that is expected, or -1 if more than one type of token can
1900  * occur at this point.
1901  */
1902
1903 static void
1904 synexpect(int token)
1905 {
1906         char msg[64];
1907
1908         if (token >= 0) {
1909                 fmtstr(msg, 64, "%s unexpected (expecting %s)",
1910                         tokname[lasttoken], tokname[token]);
1911         } else {
1912                 fmtstr(msg, 64, "%s unexpected", tokname[lasttoken]);
1913         }
1914         synerror(msg);
1915 }
1916
1917
1918 static void
1919 synerror(const char *msg)
1920 {
1921         if (commandname)
1922                 outfmt(out2, "%s: %d: ", commandname, startlinno);
1923         else if (arg0)
1924                 outfmt(out2, "%s: ", arg0);
1925         outfmt(out2, "Syntax error: %s\n", msg);
1926         error((char *)NULL);
1927 }
1928
1929 static void
1930 setprompt(int which)
1931 {
1932         whichprompt = which;
1933         if (which == 0)
1934                 return;
1935
1936 #ifndef NO_HISTORY
1937         if (!el)
1938 #endif
1939         {
1940                 out2str(getprompt(NULL));
1941                 flushout(out2);
1942         }
1943 }
1944
1945 static int
1946 pgetc_linecont(void)
1947 {
1948         int c;
1949
1950         while ((c = pgetc_macro()) == '\\') {
1951                 c = pgetc();
1952                 if (c == '\n') {
1953                         plinno++;
1954                         if (doprompt)
1955                                 setprompt(2);
1956                         else
1957                                 setprompt(0);
1958                 } else {
1959                         pungetc();
1960                         /* Allow the backslash to be pushed back. */
1961                         pushstring("\\", 1, NULL);
1962                         return (pgetc());
1963                 }
1964         }
1965         return (c);
1966 }
1967
1968 /*
1969  * called by editline -- any expansions to the prompt
1970  *    should be added here.
1971  */
1972 char *
1973 getprompt(void *unused __unused)
1974 {
1975         static char ps[PROMPTLEN];
1976         const char *fmt;
1977         const char *pwd;
1978         int i, trim;
1979         static char internal_error[] = "??";
1980
1981         /*
1982          * Select prompt format.
1983          */
1984         switch (whichprompt) {
1985         case 0:
1986                 fmt = "";
1987                 break;
1988         case 1:
1989                 fmt = ps1val();
1990                 break;
1991         case 2:
1992                 fmt = ps2val();
1993                 break;
1994         default:
1995                 return internal_error;
1996         }
1997
1998         /*
1999          * Format prompt string.
2000          */
2001         for (i = 0; (i < PROMPTLEN - 1) && (*fmt != '\0'); i++, fmt++)
2002                 if (*fmt == '\\')
2003                         switch (*++fmt) {
2004
2005                                 /*
2006                                  * Hostname.
2007                                  *
2008                                  * \h specifies just the local hostname,
2009                                  * \H specifies fully-qualified hostname.
2010                                  */
2011                         case 'h':
2012                         case 'H':
2013                                 ps[i] = '\0';
2014                                 gethostname(&ps[i], PROMPTLEN - i - 1);
2015                                 ps[PROMPTLEN - 1] = '\0';
2016                                 /* Skip to end of hostname. */
2017                                 trim = (*fmt == 'h') ? '.' : '\0';
2018                                 while ((ps[i] != '\0') && (ps[i] != trim))
2019                                         i++;
2020                                 --i;
2021                                 break;
2022
2023                                 /*
2024                                  * Working directory.
2025                                  *
2026                                  * \W specifies just the final component,
2027                                  * \w specifies the entire path.
2028                                  */
2029                         case 'W':
2030                         case 'w':
2031                                 pwd = lookupvar("PWD");
2032                                 if (pwd == NULL || *pwd == '\0')
2033                                         pwd = "?";
2034                                 if (*fmt == 'W' &&
2035                                     *pwd == '/' && pwd[1] != '\0')
2036                                         strlcpy(&ps[i], strrchr(pwd, '/') + 1,
2037                                             PROMPTLEN - i);
2038                                 else
2039                                         strlcpy(&ps[i], pwd, PROMPTLEN - i);
2040                                 /* Skip to end of path. */
2041                                 while (ps[i + 1] != '\0')
2042                                         i++;
2043                                 break;
2044
2045                                 /*
2046                                  * Superuser status.
2047                                  *
2048                                  * '$' for normal users, '#' for root.
2049                                  */
2050                         case '$':
2051                                 ps[i] = (geteuid() != 0) ? '$' : '#';
2052                                 break;
2053
2054                                 /*
2055                                  * A literal \.
2056                                  */
2057                         case '\\':
2058                                 ps[i] = '\\';
2059                                 break;
2060
2061                                 /*
2062                                  * Emit unrecognized formats verbatim.
2063                                  */
2064                         default:
2065                                 ps[i] = '\\';
2066                                 if (i < PROMPTLEN - 2)
2067                                         ps[++i] = *fmt;
2068                                 break;
2069                         }
2070                 else
2071                         ps[i] = *fmt;
2072         ps[i] = '\0';
2073         return (ps);
2074 }
2075
2076
2077 const char *
2078 expandstr(const char *ps)
2079 {
2080         union node n;
2081         struct jmploc jmploc;
2082         struct jmploc *const savehandler = handler;
2083         const int saveprompt = doprompt;
2084         struct parsefile *const savetopfile = getcurrentfile();
2085         struct parser_temp *const saveparser_temp = parser_temp;
2086         const char *result = NULL;
2087
2088         if (!setjmp(jmploc.loc)) {
2089                 handler = &jmploc;
2090                 parser_temp = NULL;
2091                 setinputstring(ps, 1);
2092                 doprompt = 0;
2093                 readtoken1(pgetc(), DQSYNTAX, NOEOFMARK, 0);
2094                 if (backquotelist != NULL)
2095                         error("Command substitution not allowed here");
2096
2097                 n.narg.type = NARG;
2098                 n.narg.next = NULL;
2099                 n.narg.text = wordtext;
2100                 n.narg.backquote = backquotelist;
2101
2102                 expandarg(&n, NULL, 0);
2103                 result = stackblock();
2104                 INTOFF;
2105         }
2106         handler = savehandler;
2107         doprompt = saveprompt;
2108         popfilesupto(savetopfile);
2109         if (parser_temp != saveparser_temp) {
2110                 parser_temp_free_all();
2111                 parser_temp = saveparser_temp;
2112         }
2113         if (result != NULL) {
2114                 INTON;
2115         } else if (exception == EXINT)
2116                 raise(SIGINT);
2117         return result;
2118 }