1 /* $Header: /src/pub/tcsh/sh.glob.c,v 3.62 2004/12/25 21:15:07 christos Exp $ */
3 * sh.glob.c: Regular expression expansion
6 * Copyright (c) 1980, 1991 The Regents of the University of California.
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
12 * 1. Redistributions of source code must retain the above copyright
13 * notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 * notice, this list of conditions and the following disclaimer in the
16 * documentation and/or other materials provided with the distribution.
17 * 3. Neither the name of the University nor the names of its contributors
18 * may be used to endorse or promote products derived from this software
19 * without specific prior written permission.
21 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
22 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
25 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
26 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
27 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
28 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
30 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
35 RCSID("$Id: sh.glob.c,v 3.62 2004/12/25 21:15:07 christos Exp $")
43 static int pargsiz, gargsiz;
48 #define G_NONE 0 /* No globbing needed */
49 #define G_GLOB 1 /* string contains *?[] characters */
50 #define G_CSH 2 /* string contains ~`{ characters */
52 #define GLOBSPACE 100 /* Alloc increment */
53 #define LONGBSIZE 10240 /* Backquote expansion buffer size */
68 * globbing is now done in two stages. In the first pass we expand
69 * csh globbing idioms ~`{ and then we proceed doing the normal
70 * globbing if needed ?*[
72 * Csh type globbing is handled in globexpand() and the rest is
73 * handled in glob() which is part of the 4.4BSD libc.
76 static Char *globtilde __P((Char **, Char *));
77 static Char *handleone __P((Char *, Char **, int));
78 static Char **libglob __P((Char **));
79 static Char **globexpand __P((Char **));
80 static int globbrace __P((Char *, Char *, Char ***));
81 static void expbrace __P((Char ***, Char ***, int));
82 static void pword __P((int));
83 static void psave __P((Char));
84 static void backeval __P((Char *, int));
90 Char gbuf[BUFSIZE], *gstart, *b, *u, *e;
98 for (b = gstart, e = &gbuf[BUFSIZE - 1];
99 *s && *s != '/' && *s != ':' && b < e;
103 if (gethdir(gstart)) {
104 if (adrof(STRnonomatch))
108 stderror(ERR_UNKUSER, short2str(gstart));
110 stderror(ERR_NOHOME);
112 b = &gstart[Strlen(gstart)];
114 slash = gstart[0] == '/' && gstart[1] == '\0';
122 if (slash && gstart[1] == '/')
125 return (Strsave(gstart));
136 * kfk - 17 Jan 1984 - stack hack allows user to get at arbitrary dir names
137 * in stack. PWP: let =foobar pass through (for X windows)
139 if (old[1] == '-' && (old[2] == '\0' || old[2] == '/')) {
144 else if (Isdigit(old[1])) {
147 for (b = &old[2]; Isdigit(*b); b++)
148 dig = dig * 10 + (*b - '0');
149 if (*b != '\0' && *b != '/')
150 /* =<number>foobar */
157 if (!getstakd(new, dig))
160 /* Copy the rest of the string */
161 for (d = &new[Strlen(new)];
162 d < &new[BUFSIZE - 1] && (*d++ = *b++) != '\0';)
174 Char *pm, *pe, *lm, *pl;
177 int size = GLOBSPACE;
179 nv = vl = (Char **) xmalloc((size_t) (sizeof(Char *) * size));
183 /* copy part up to the brace */
184 for (lm = gbuf, p = s; *p != LBRC; *lm++ = *p++)
187 /* check for balanced braces */
188 for (i = 0, pe = ++p; *pe; pe++)
190 /* Ignore everything between [] */
191 for (++pe; *pe != RBRK && *pe != EOS; pe++)
198 else if (*pe == LBRC)
200 else if (*pe == RBRC) {
206 if (i != 0 || *pe == '\0') {
211 for (i = 0, pl = pm = p; pm <= pe; pm++)
214 for (++pm; *pm != RBRK && *pm != EOS; pm++)
238 (void) Strcpy(lm, pl);
239 (void) Strcat(gbuf, pe + 1);
241 *vl++ = Strsave(gbuf);
244 if (vl == &nv[size]) {
246 nv = (Char **) xrealloc((ptr_t) nv,
247 (size_t) (size * sizeof(Char *)));
248 vl = &nv[size - GLOBSPACE];
262 expbrace(nvp, elp, size)
266 Char **vl, **el, **nv, *s;
272 for (el = vl; *el; el++)
275 for (s = *vl; s; s = *++vl) {
279 /* leave {} untouched for find */
280 if (s[0] == '{' && (s[1] == '\0' || (s[1] == '}' && s[2] == '\0')))
282 if ((b = Strchr(s, '{')) != NULL) {
286 if ((len = globbrace(s, b, &bl)) < 0) {
288 stderror(ERR_MISSING, -len);
296 if (&el[len] >= &nv[size]) {
298 l = (int) (&el[len] - &nv[size]);
299 size += GLOBSPACE > l ? GLOBSPACE : l;
302 nv = (Char **) xrealloc((ptr_t) nv,
303 (size_t) (size * sizeof(Char *)));
316 for (bp = el; bp != vp; bp--)
326 for (bp = bl + 1; *bp; *vp++ = *bp++)
342 Char **nv, **vl, **el;
343 int size = GLOBSPACE;
346 nv = vl = (Char **) xmalloc((size_t) (sizeof(Char *) * size));
350 * Step 1: expand backquotes.
352 while ((s = *v++) != '\0') {
353 if (Strchr(s, '`')) {
356 (void) dobackp(s, 0);
357 for (i = 0; i < pargc; i++) {
359 if (vl == &nv[size]) {
361 nv = (Char **) xrealloc((ptr_t) nv,
362 (size_t) (size * sizeof(Char *)));
363 vl = &nv[size - GLOBSPACE];
366 xfree((ptr_t) pargv);
371 if (vl == &nv[size]) {
373 nv = (Char **) xrealloc((ptr_t) nv,
374 (size_t) (size * sizeof(Char *)));
375 vl = &nv[size - GLOBSPACE];
385 * Step 2: expand braces
388 expbrace(&nv, &el, size);
395 for (s = *vl; s; s = *++vl)
397 Char gp[BUFSIZE], *ns;
399 *vl = globtilde(nv, s);
402 if ((ns = globequal(gp, s)) == NULL) {
403 if (!adrof(STRnonomatch)) {
410 /* Expansion succeeded */
421 * Step 4: expand .. if the variable symlinks==expand is set
423 if (symlinks == SYM_EXPAND) {
424 for (s = *vl; s; s = *++vl) {
425 *vl = dnormalize(s, 1);
435 handleone(str, vl, action)
446 setname(short2str(str));
448 stderror(ERR_NAME | ERR_AMBIG);
452 for (t = vlp; (p = *t++) != '\0'; chars++)
455 str = (Char *)xmalloc((size_t)(chars * sizeof(Char)));
456 for (t = vlp, strp = str; (p = *t++) != '\0'; chars++) {
458 *strp++ = *p++ & TRIM;
465 str = Strsave(strip(*vlp));
478 int gflgs = GLOB_QUOTE | GLOB_NOMAGIC | GLOB_ALTNOT;
481 int nonomatch = adrof(STRnonomatch) != 0, magic = 0, match = 0;
491 gflgs |= GLOB_NOCHECK;
494 ptr = short2qstr(*vl);
495 switch (glob(ptr, gflgs, 0, &globv)) {
499 stderror(ERR_NAME | ERR_GLOB);
508 if (globv.gl_flags & GLOB_MAGCHAR) {
509 match |= (globv.gl_matchc != 0);
512 gflgs |= GLOB_APPEND;
515 vl = (globv.gl_pathc == 0 || (magic && !match && !nonomatch)) ?
516 NULL : blk2short(globv.gl_pathv);
527 Char *v[2], **vl, **vo;
530 noglob = adrof(STRnoglob) != 0;
537 return (strip(Strsave(str)));
541 * Expand back-quote, tilde and brace
544 if (noglob || (gflg & G_GLOB) == 0) {
547 return (Strsave(STRNULL));
550 return (handleone(str, vo, action));
558 else if (noglob || (gflg & G_GLOB) == 0)
559 return (strip(Strsave(str)));
564 if ((gflg & G_CSH) && vl != vo)
567 setname(short2str(str));
568 stderror(ERR_NAME | ERR_NOMATCH);
572 return (Strsave(STRNULL));
575 return (handleone(str, vl, action));
592 gargc = blklen(gargv);
596 noglob = adrof(STRnoglob) != 0;
600 * Expand back-quote, tilde and brace
602 vl = vo = globexpand(v);
604 vl = vo = saveblk(v);
606 if (!noglob && (gflg & G_GLOB)) {
614 gargc = vl ? blklen(vl) : 0;
622 gargv = (Char **) xmalloc((size_t) (sizeof(Char *) * gargsiz));
630 void (*f) __P((Char));
634 while ((p = *t++) != '\0')
645 while ((p = *t++) != '\0')
656 while ((p = *t++) != '\0') {
657 if (*p == '~' || *p == '=')
659 else if (*p == '{' &&
660 (p[1] == '\0' || (p[1] == '}' && p[2] == '\0')))
663 * The following line used to be *(c = p++), but hp broke their
664 * optimizer in 9.01, so we break the assignment into two pieces
665 * The careful reader here will note that *most* compiler workarounds
666 * in tcsh are either for apollo/DomainOS or hpux. Is it a coincidence?
668 while ( *(c = p) != '\0') {
674 * We do want to expand echo `echo '*'`, so we don't\
675 * use this piece of code anymore.
677 while (*p && *p != '`')
679 if (*p) /* Quoted chars */
684 if (*p) /* The matching ` */
694 else if (symlinks == SYM_EXPAND &&
695 *p && ISDOTDOT(c) && (c == *(t-1) || *(c-1) == '/') )
702 * Command substitute cp. If literal, then this is a substitution from a
703 * << redirection, and so we should not crunch blanks and tabs, separating
704 * words only at newlines.
712 Char *ep, word[LONGBSIZE];
721 pargv = (Char **) xmalloc((size_t) (sizeof(Char *) * pargsiz));
723 pargcp = pargs = word;
725 pnleft = LONGBSIZE - 4;
727 for (lp = cp; *lp != '`'; lp++) {
736 for (rp = lp; *rp && *rp != '`'; rp++)
743 oops: stderror(ERR_UNMATCHED, '`');
746 backeval(ep, literal);
753 backeval(cp, literal)
759 struct command faket;
762 Char *fakecom[2], ibuf[BUFSIZE];
767 quoted = (literal || (cp[0] & QUOTE)) ? QUOTE : 0;
768 faket.t_dtyp = NODE_COMMAND;
769 faket.t_dflg = F_BACKQ;
773 faket.t_dcom = fakecom;
774 fakecom[0] = STRfakecom1;
778 * We do the psave job to temporarily change the current job so that the
779 * following fork is considered a separate job. This is so that when
780 * backquotes are used in a builtin function that calls glob the "current
781 * job" is not corrupted. We only need one level of pushed jobs as long as
782 * we are sure to fork here.
787 * It would be nicer if we could integrate this redirection more with the
788 * routines in sh.sem.c by doing a fake execute on a builtin function that
792 if (pfork(&faket, -1) == 0) {
794 struct command *volatile t;
796 (void) close(pvec[0]);
797 (void) dmove(pvec[1], 1);
798 (void) dmove(SHDIAG, 2);
802 * Bugfix for nested backquotes by Michael Greim <greim@sbsvax.UUCP>,
803 * posted to comp.bugs.4bsd 12 Sep. 1989.
805 if (pargv) /* mg, 21.dec.88 */
806 blkfree(pargv), pargv = 0, pargsiz = 0;
809 for (arginp = cp; *cp; cp++) {
811 if (is_set(STRcsubstnonl) && (*cp == '\n' || *cp == '\r'))
816 * In the child ``forget'' everything about current aliases or
828 if (paraml.next && paraml.next != ¶ml)
831 paraml.next = paraml.prev = ¶ml;
832 paraml.word = STRNULL;
837 * For the sake of reset()
841 freesyn(t), t = NULL;
850 xfree((ptr_t) seterr);
858 t = syntax(paraml.next, ¶ml, 0);
862 (void) sigignore(SIGTSTP);
865 (void) sigignore(SIGTTIN);
868 (void) sigignore(SIGTTOU);
870 execute(t, -1, NULL, NULL, TRUE);
873 freesyn(t), t = NULL;
877 (void) close(pvec[1]);
891 icnt = read(pvec[0], tmp, tibuf + BUFSIZE - tmp);
892 while (icnt == -1 && errno == EINTR);
903 while (tmp < tibuf + icnt) {
906 len = normal_mbtowc(&ip[i], tmp, tibuf + icnt - tmp);
909 if (!eof && (size_t)(tibuf + icnt - tmp) < MB_CUR_MAX) {
910 break; /* Maybe a partial character */
912 ip[i] = (unsigned char) *tmp | INVALID_BYTE; /* Error */
920 memmove (tibuf, tmp, tibuf + icnt - tmp);
921 tmp = tibuf + (tibuf + icnt - tmp);
933 #endif /* WINNT_NATIVE */
936 * Continue around the loop one more time, so that we can eat
937 * the last newline without terminating this word.
942 if (!quoted && (c == ' ' || c == '\t'))
948 * Unless at end-of-file, we will form a new word here if there were
949 * characters in the word, or in any case when we take text literally.
950 * If we didn't make empty words here when literal was set then we
951 * would lose blank lines.
953 if (c != 0 && (cnt || literal))
958 (void) close(pvec[0]);
968 stderror(ERR_WTOOLONG);
969 *pargcp++ = (Char) c;
977 if (pargc == pargsiz - 1) {
978 pargsiz += GLOBSPACE;
979 pargv = (Char **) xrealloc((ptr_t) pargv,
980 (size_t) (pargsiz * sizeof(Char *)));
983 pargv[pargc++] = Strsave(pargs);
990 Gmatch(string, pattern)
991 Char *string, *pattern;
993 return Gnmatch(string, pattern, NULL);
997 Gnmatch(string, pattern, endstr)
998 Char *string, *pattern, **endstr;
1000 Char **blk, **p, *tstring = string;
1001 int gpol = 1, gres = 0;
1003 if (*pattern == '^') {
1008 blk = (Char **) xmalloc((size_t) (GLOBSPACE * sizeof(Char *)));
1009 blk[0] = Strsave(pattern);
1012 expbrace(&blk, NULL, GLOBSPACE);
1015 /* Exact matches only */
1016 for (p = blk; *p; p++)
1017 gres |= t_pmatch(string, *p, &tstring, 1) == 2 ? 1 : 0;
1019 /* partial matches */
1020 int minc = 0x7fffffff;
1021 for (p = blk; *p; p++)
1022 if (t_pmatch(string, *p, &tstring, 1) != 0) {
1023 int t = (int) (tstring - string);
1025 if (minc == -1 || minc > t)
1028 *endstr = string + minc;
1032 return(gres == gpol);
1036 * Return 2 on exact match,
1037 * Return 1 on substring match.
1038 * Return 0 on no match.
1039 * *estr will point to the end of the longest exact or substring match.
1042 t_pmatch(string, pattern, estr, cs)
1043 Char *string, *pattern, **estr;
1046 NLSChar stringc, patternc, rangec;
1047 int match, negate_range;
1048 Char *oestr, *pestr, *nstring;
1050 for (nstring = string;; string = nstring) {
1051 stringc = *nstring++;
1052 TRIM_AND_EXTEND(nstring, stringc);
1054 * apollo compiler bug: switch (patternc = *pattern++) dies
1056 patternc = *pattern++;
1057 TRIM_AND_EXTEND(pattern, patternc);
1061 return (stringc == '\0' ? 2 : 1);
1069 while (*string) string++;
1077 switch(t_pmatch(string, pattern, estr, cs)) {
1086 abort(); /* Cannot happen */
1089 stringc = *string++;
1092 TRIM_AND_EXTEND(string, stringc);
1106 if ((negate_range = (*pattern == '^')) != 0)
1108 while ((rangec = *pattern++) != '\0') {
1113 TRIM_AND_EXTEND(pattern, rangec);
1114 if (*pattern == '-' && pattern[1] != ']') {
1117 rangec2 = *pattern++;
1118 TRIM_AND_EXTEND(pattern, rangec2);
1119 match = (globcharcoll(stringc, rangec2, 0) <= 0 &&
1120 globcharcoll(rangec, stringc, 0) <= 0);
1123 match = (stringc == rangec);
1126 stderror(ERR_NAME | ERR_MISSING, ']');
1127 if ((!match) && (stringc == '\0'))
1129 if (match == negate_range)
1134 TRIM_AND_EXTEND(pattern, patternc);
1135 if (cs ? patternc != stringc
1136 #if defined (NLS) && defined (SHORT_STRINGS)
1137 : towlower(patternc) != towlower(stringc))
1139 : Tolower(patternc) != Tolower(stringc))
1159 n = (int) ((p - s1) + (q - s2) - 1);
1160 if (++gargc >= gargsiz) {
1161 gargsiz += GLOBSPACE;
1162 gargv = (Char **) xrealloc((ptr_t) gargv,
1163 (size_t) (gargsiz * sizeof(Char *)));
1166 p = gargv[gargc - 1] = (Char *) xmalloc((size_t) (n * sizeof(Char)));
1167 for (q = s1; (*p++ = *q++) != '\0';)
1169 for (p--, q = s2; (*p++ = *q++) != '\0';)
1173 #if defined(FILEC) && defined(TIOCSTI)
1178 if (!a) /* check for NULL */
1183 if (!*a) /* check for NULL */
1184 return (*b ? 1 : 0);
1188 return (int) collate(*a, *b);