1 /* $Header: /src/pub/tcsh/sh.glob.c,v 3.44 2000/01/14 22:57:28 christos Exp $ */
3 * sh.glob.c: Regular expression expansion
6 * Copyright (c) 1980, 1991 The Regents of the University of California.
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
12 * 1. Redistributions of source code must retain the above copyright
13 * notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 * notice, this list of conditions and the following disclaimer in the
16 * documentation and/or other materials provided with the distribution.
17 * 3. All advertising materials mentioning features or use of this software
18 * must display the following acknowledgement:
19 * This product includes software developed by the University of
20 * California, Berkeley and its contributors.
21 * 4. Neither the name of the University nor the names of its contributors
22 * may be used to endorse or promote products derived from this software
23 * without specific prior written permission.
25 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
26 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
28 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
29 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
30 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
31 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
32 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
33 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
34 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
39 RCSID("$Id: sh.glob.c,v 3.44 2000/01/14 22:57:28 christos Exp $")
46 static int pargsiz, gargsiz;
51 #define G_NONE 0 /* No globbing needed */
52 #define G_GLOB 1 /* string contains *?[] characters */
53 #define G_CSH 2 /* string contains ~`{ characters */
55 #define GLOBSPACE 100 /* Alloc increment */
56 #define LONGBSIZE 10240 /* Backquote expansion buffer size */
71 * globbing is now done in two stages. In the first pass we expand
72 * csh globbing idioms ~`{ and then we proceed doing the normal
73 * globbing if needed ?*[
75 * Csh type globbing is handled in globexpand() and the rest is
76 * handled in glob() which is part of the 4.4BSD libc.
79 static Char *globtilde __P((Char **, Char *));
80 static Char *handleone __P((Char *, Char **, int));
81 static Char **libglob __P((Char **));
82 static Char **globexpand __P((Char **));
83 static int globbrace __P((Char *, Char *, Char ***));
84 static void expbrace __P((Char ***, Char ***, int));
85 static int pmatch __P((Char *, Char *, Char **));
86 static void pword __P((int));
87 static void psave __P((int));
88 static void backeval __P((Char *, bool));
94 Char gbuf[BUFSIZE], *gstart, *b, *u, *e;
102 for (b = gstart, e = &gbuf[BUFSIZE - 1];
103 *s && *s != '/' && *s != ':' && b < e;
107 if (gethdir(gstart)) {
108 if (adrof(STRnonomatch))
112 stderror(ERR_UNKUSER, short2str(gstart));
114 stderror(ERR_NOHOME);
116 b = &gstart[Strlen(gstart)];
118 slash = gstart[0] == '/' && gstart[1] == '\0';
126 if (slash && gstart[1] == '/')
129 return (Strsave(gstart));
140 * kfk - 17 Jan 1984 - stack hack allows user to get at arbitrary dir names
141 * in stack. PWP: let =foobar pass through (for X windows)
143 if (old[1] == '-' && (old[2] == '\0' || old[2] == '/')) {
148 else if (Isdigit(old[1])) {
151 for (b = &old[2]; Isdigit(*b); b++)
152 dig = dig * 10 + (*b - '0');
153 if (*b != '\0' && *b != '/')
154 /* =<number>foobar */
161 if (!getstakd(new, dig))
164 /* Copy the rest of the string */
165 for (d = &new[Strlen(new)];
166 d < &new[BUFSIZE - 1] && (*d++ = *b++) != '\0';)
178 Char *pm, *pe, *lm, *pl;
181 int size = GLOBSPACE;
183 nv = vl = (Char **) xmalloc((size_t) (sizeof(Char *) * size));
187 /* copy part up to the brace */
188 for (lm = gbuf, p = s; *p != LBRC; *lm++ = *p++)
191 /* check for balanced braces */
192 for (i = 0, pe = ++p; *pe; pe++)
194 if (Ismbyte1(*pe) && *(pe + 1) != EOS)
197 #endif /* DSPMBYTE */
199 /* Ignore everything between [] */
200 for (++pe; *pe != RBRK && *pe != EOS; pe++)
202 if (Ismbyte1(*pe) && *(pe + 1) != EOS)
205 #endif /* DSPMBYTE */
212 else if (*pe == LBRC)
214 else if (*pe == RBRC) {
220 if (i != 0 || *pe == '\0') {
225 for (i = 0, pl = pm = p; pm <= pe; pm++)
227 if (Ismbyte1(*pm) && pm + 1 <= pe)
230 #endif /* DSPMBYTE */
233 for (++pm; *pm != RBRK && *pm != EOS; pm++)
235 if (Ismbyte1(*pm) && *(pm + 1) != EOS)
238 #endif /* DSPMBYTE */
262 (void) Strcpy(lm, pl);
263 (void) Strcat(gbuf, pe + 1);
265 *vl++ = Strsave(gbuf);
268 if (vl == &nv[size]) {
270 nv = (Char **) xrealloc((ptr_t) nv,
271 (size_t) (size * sizeof(Char *)));
272 vl = &nv[size - GLOBSPACE];
286 expbrace(nvp, elp, size)
290 Char **vl, **el, **nv, *s;
296 for (el = vl; *el; el++)
299 for (s = *vl; s; s = *++vl) {
303 /* leave {} untouched for find */
304 if (s[0] == '{' && (s[1] == '\0' || (s[1] == '}' && s[2] == '\0')))
306 if ((b = Strchr(s, '{')) != NULL) {
310 #if defined (DSPMBYTE)
311 if (b != s && Ismbyte2(*b) && Ismbyte1(*(b-1))) {
312 /* The "{" is the 2nd byte of a MB character */
315 #endif /* DSPMBYTE */
316 if ((len = globbrace(s, b, &bl)) < 0) {
318 stderror(ERR_MISSING, -len);
326 if (&el[len] >= &nv[size]) {
328 l = (int) (&el[len] - &nv[size]);
329 size += GLOBSPACE > l ? GLOBSPACE : l;
332 nv = (Char **) xrealloc((ptr_t) nv,
333 (size_t) (size * sizeof(Char *)));
346 for (bp = el; bp != vp; bp--)
356 for (bp = bl + 1; *bp; *vp++ = *bp++)
372 Char **nv, **vl, **el;
373 int size = GLOBSPACE;
376 nv = vl = (Char **) xmalloc((size_t) (sizeof(Char *) * size));
380 * Step 1: expand backquotes.
382 while ((s = *v++) != '\0') {
383 if (Strchr(s, '`')) {
386 (void) dobackp(s, 0);
387 for (i = 0; i < pargc; i++) {
389 if (vl == &nv[size]) {
391 nv = (Char **) xrealloc((ptr_t) nv,
392 (size_t) (size * sizeof(Char *)));
393 vl = &nv[size - GLOBSPACE];
396 xfree((ptr_t) pargv);
401 if (vl == &nv[size]) {
403 nv = (Char **) xrealloc((ptr_t) nv,
404 (size_t) (size * sizeof(Char *)));
405 vl = &nv[size - GLOBSPACE];
415 * Step 2: expand braces
418 expbrace(&nv, &el, size);
425 for (s = *vl; s; s = *++vl)
427 Char gp[BUFSIZE], *ns;
429 *vl = globtilde(nv, s);
432 if ((ns = globequal(gp, s)) == NULL) {
433 if (!adrof(STRnonomatch)) {
440 /* Expansion succeeded */
451 * Step 4: expand .. if the variable symlinks==expand is set
453 if ( symlinks == SYM_EXPAND )
454 for (s = *vl; s; s = *++vl) {
455 *vl = dnormalize(s, 1);
464 handleone(str, vl, action)
475 setname(short2str(str));
477 stderror(ERR_NAME | ERR_AMBIG);
481 for (t = vlp; (p = *t++) != '\0'; chars++)
484 str = (Char *)xmalloc((size_t)(chars * sizeof(Char)));
485 for (t = vlp, strp = str; (p = *t++) != '\0'; chars++) {
487 *strp++ = *p++ & TRIM;
494 str = Strsave(strip(*vlp));
507 int gflgs = GLOB_QUOTE | GLOB_NOMAGIC | GLOB_ALTNOT;
510 int nonomatch = adrof(STRnonomatch) != 0, magic = 0, match = 0;
520 gflgs |= GLOB_NOCHECK;
523 ptr = short2qstr(*vl);
524 switch (glob(ptr, gflgs, 0, &globv)) {
528 stderror(ERR_NAME | ERR_GLOB);
537 if (globv.gl_flags & GLOB_MAGCHAR) {
538 match |= (globv.gl_matchc != 0);
541 gflgs |= GLOB_APPEND;
544 vl = (globv.gl_pathc == 0 || (magic && !match && !nonomatch)) ?
545 NULL : blk2short(globv.gl_pathv);
556 Char *v[2], **vl, **vo;
559 noglob = adrof(STRnoglob) != 0;
566 return (strip(Strsave(str)));
570 * Expand back-quote, tilde and brace
573 if (noglob || (gflg & G_GLOB) == 0) {
576 return (Strsave(STRNULL));
579 return (handleone(str, vo, action));
587 else if (noglob || (gflg & G_GLOB) == 0)
588 return (strip(Strsave(str)));
593 if ((gflg & G_CSH) && vl != vo)
596 setname(short2str(str));
597 stderror(ERR_NAME | ERR_NOMATCH);
601 return (Strsave(STRNULL));
604 return (handleone(str, vl, action));
621 gargc = blklen(gargv);
625 noglob = adrof(STRnoglob) != 0;
629 * Expand back-quote, tilde and brace
631 vl = vo = globexpand(v);
633 vl = vo = saveblk(v);
635 if (!noglob && (gflg & G_GLOB)) {
643 gargc = vl ? blklen(vl) : 0;
651 gargv = (Char **) xmalloc((size_t) (sizeof(Char *) * gargsiz));
659 void (*f) __P((int));
663 while ((p = *t++) != '\0')
674 while ((p = *t++) != '\0')
683 register Char *p, *c;
685 while ((p = *t++) != '\0') {
686 if (*p == '~' || *p == '=')
688 else if (*p == '{' &&
689 (p[1] == '\0' || (p[1] == '}' && p[2] == '\0')))
692 * The following line used to be *(c = p++), but hp broke their
693 * optimizer in 9.01, so we break the assignment into two pieces
694 * The careful reader here will note that *most* compiler workarounds
695 * in tcsh are either for apollo/DomainOS or hpux. Is it a coincidence?
697 while ( *(c = p) != '\0') {
703 * We do want to expand echo `echo '*'`, so we don't\
704 * use this piece of code anymore.
706 while (*p && *p != '`')
708 if (*p) /* Quoted chars */
713 if (*p) /* The matching ` */
723 else if (symlinks == SYM_EXPAND &&
724 *p && ISDOTDOT(c) && (c == *(t-1) || *(c-1) == '/') )
731 * Command substitute cp. If literal, then this is a substitution from a
732 * << redirection, and so we should not crunch blanks and tabs, separating
733 * words only at newlines.
740 register Char *lp, *rp;
741 Char *ep, word[LONGBSIZE];
750 pargv = (Char **) xmalloc((size_t) (sizeof(Char *) * pargsiz));
752 pargcp = pargs = word;
754 pnleft = LONGBSIZE - 4;
756 #if defined(DSPMBYTE)
757 for (lp = cp;; lp++) {
759 (lp-1 < cp || !Ismbyte2(*lp) || !Ismbyte1(*(lp-1)))) {
763 for (lp = cp; *lp != '`'; lp++) {
764 #endif /* DSPMBYTE */
773 for (rp = lp; *rp && *rp != '`'; rp++)
780 oops: stderror(ERR_UNMATCHED, '`');
783 backeval(ep, literal);
790 backeval(cp, literal)
794 register int icnt, c;
796 struct command faket;
799 Char *fakecom[2], ibuf[BUFSIZE];
804 quoted = (literal || (cp[0] & QUOTE)) ? QUOTE : 0;
805 faket.t_dtyp = NODE_COMMAND;
806 faket.t_dflg = F_BACKQ;
810 faket.t_dcom = fakecom;
811 fakecom[0] = STRfakecom1;
815 * We do the psave job to temporarily change the current job so that the
816 * following fork is considered a separate job. This is so that when
817 * backquotes are used in a builtin function that calls glob the "current
818 * job" is not corrupted. We only need one level of pushed jobs as long as
819 * we are sure to fork here.
824 * It would be nicer if we could integrate this redirection more with the
825 * routines in sh.sem.c by doing a fake execute on a builtin function that
829 if (pfork(&faket, -1) == 0) {
832 (void) close(pvec[0]);
833 (void) dmove(pvec[1], 1);
834 (void) dmove(SHDIAG, 2);
837 * Bugfix for nested backquotes by Michael Greim <greim@sbsvax.UUCP>,
838 * posted to comp.bugs.4bsd 12 Sep. 1989.
840 if (pargv) /* mg, 21.dec.88 */
841 blkfree(pargv), pargv = 0, pargsiz = 0;
848 * In the child ``forget'' everything about current aliases or
859 t = syntax(paraml.next, ¶ml, 0);
863 t->t_dflg |= F_NOFORK;
865 (void) sigignore(SIGTSTP);
868 (void) sigignore(SIGTTIN);
871 (void) sigignore(SIGTTOU);
873 execute(t, -1, NULL, NULL);
877 (void) close(pvec[1]);
889 icnt = read(pvec[0], tibuf, BUFSIZE);
890 while (icnt == -1 && errno == EINTR);
895 for (i = 0; i < icnt; i++)
896 ip[i] = (unsigned char) tibuf[i];
910 * Continue around the loop one more time, so that we can eat
911 * the last newline without terminating this word.
916 if (!quoted && (c == ' ' || c == '\t'))
922 * Unless at end-of-file, we will form a new word here if there were
923 * characters in the word, or in any case when we take text literally.
924 * If we didn't make empty words here when literal was set then we
925 * would lose blank lines.
927 if (c != -1 && (cnt || literal))
931 (void) close(pvec[0]);
941 stderror(ERR_WTOOLONG);
942 *pargcp++ = (Char) c;
950 if (pargc == pargsiz - 1) {
951 pargsiz += GLOBSPACE;
952 pargv = (Char **) xrealloc((ptr_t) pargv,
953 (size_t) (pargsiz * sizeof(Char *)));
955 pargv[pargc++] = Strsave(pargs);
962 Gmatch(string, pattern)
963 Char *string, *pattern;
965 return Gnmatch(string, pattern, NULL);
969 Gnmatch(string, pattern, endstr)
970 Char *string, *pattern, **endstr;
972 Char **blk, **p, *tstring = string;
973 int gpol = 1, gres = 0;
975 if (*pattern == '^') {
980 blk = (Char **) xmalloc((size_t) (GLOBSPACE * sizeof(Char *)));
981 blk[0] = Strsave(pattern);
984 expbrace(&blk, NULL, GLOBSPACE);
987 /* Exact matches only */
988 for (p = blk; *p; p++)
989 gres |= pmatch(string, *p, &tstring) == 2 ? 1 : 0;
991 /* partial matches */
992 int minc = 0x7fffffff;
993 for (p = blk; *p; p++)
994 if (pmatch(string, *p, &tstring) != 0) {
995 int t = (int) (tstring - string);
997 if (minc == -1 || minc > t)
1000 *endstr = string + minc;
1004 return(gres == gpol);
1008 * Return 2 on exact match,
1009 * Return 1 on substring match.
1010 * Return 0 on no match.
1011 * *estr will point to the end of the longest exact or substring match.
1014 pmatch(string, pattern, estr)
1015 register Char *string, *pattern, **estr;
1017 register Char stringc, patternc;
1018 int match, negate_range;
1019 Char rangec, *oestr, *pestr;
1022 stringc = *string & TRIM;
1024 * apollo compiler bug: switch (patternc = *pattern++) dies
1026 patternc = *pattern++;
1030 return (stringc == 0 ? 2 : 1);
1038 while (*string) string++;
1046 switch(pmatch(string, pattern, estr)) {
1055 abort(); /* Cannot happen */
1072 if ((negate_range = (*pattern == '^')) != 0)
1074 while ((rangec = *pattern++) != '\0') {
1079 if (rangec == '-' && *(pattern-2) != '[' && *pattern != ']') {
1080 match = (globcharcoll(stringc, *pattern & TRIM) <= 0 &&
1081 globcharcoll(*(pattern-2) & TRIM, stringc) <= 0);
1085 match = (stringc == (rangec & TRIM));
1088 stderror(ERR_NAME | ERR_MISSING, ']');
1089 if (match == negate_range)
1094 if ((patternc & TRIM) != stringc)
1106 register Char *p, *q;
1113 n = (int) ((p - s1) + (q - s2) - 1);
1114 if (++gargc >= gargsiz) {
1115 gargsiz += GLOBSPACE;
1116 gargv = (Char **) xrealloc((ptr_t) gargv,
1117 (size_t) (gargsiz * sizeof(Char *)));
1120 p = gargv[gargc - 1] = (Char *) xmalloc((size_t) (n * sizeof(Char)));
1121 for (q = s1; (*p++ = *q++) != '\0';)
1123 for (p--, q = s2; (*p++ = *q++) != '\0';)
1130 register Char **a, **b;
1132 if (!a) /* check for NULL */
1137 if (!*a) /* check for NULL */
1138 return (*b ? 1 : 0);
1142 return (int) collate(*a, *b);