2 * sh.glob.c: Regular expression expansion
5 * Copyright (c) 1980, 1991 The Regents of the University of California.
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in the
15 * documentation and/or other materials provided with the distribution.
16 * 3. Neither the name of the University nor the names of its contributors
17 * may be used to endorse or promote products derived from this software
18 * without specific prior written permission.
20 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
24 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
41 #define G_NONE 0 /* No globbing needed */
42 #define G_GLOB 1 /* string contains *?[] characters */
43 #define G_CSH 2 /* string contains ~`{ characters */
45 #define GLOBSPACE 100 /* Alloc increment */
55 * globbing is now done in two stages. In the first pass we expand
56 * csh globbing idioms ~`{ and then we proceed doing the normal
57 * globbing if needed ?*[
59 * Csh type globbing is handled in globexpand() and the rest is
60 * handled in glob() which is part of the 4.4BSD libc.
63 static Char *globtilde (Char *);
64 static Char *handleone (Char *, Char **, int);
65 static Char **libglob (Char **);
66 static Char **globexpand (Char **, int);
67 static int globbrace (const Char *, Char ***);
68 static void expbrace (Char ***, Char ***, int);
69 static void pword (struct blk_buf *, struct Strbuf *);
70 static void backeval (struct blk_buf *, struct Strbuf *, Char *,
75 Char *name, *u, *home, *res;
82 for (s++; *s && *s != '/' && *s != ':'; s++)
85 name = Strnsave(u + 1, s - (u + 1));
86 cleanup_push(name, xfree);
89 if (adrof(STRnonomatch)) {
94 stderror(ERR_UNKUSER, short2str(name));
99 if (home[0] == '/' && home[1] == '\0' && s[0] == '/')
102 res = Strspl(home, s);
108 /* Returns a newly allocated string, old or NULL */
117 * kfk - 17 Jan 1984 - stack hack allows user to get at arbitrary dir names
118 * in stack. PWP: let =foobar pass through (for X windows)
120 if (old[1] == '-' && (old[2] == '\0' || old[2] == '/')) {
122 const Char *olddir = varval (STRowd);
124 if (olddir && *olddir &&
125 !dcwd->di_next->di_name && !dcwd->di_prev->di_name)
126 return Strspl(olddir, &old[2]);
130 else if (Isdigit(old[1])) {
133 for (b = &old[2]; Isdigit(*b); b++)
134 dig = dig * 10 + (*b - '0');
135 if (*b != '\0' && *b != '/')
136 /* =<number>foobar */
146 return Strspl(dir, b);
150 globbrace(const Char *s, Char ***bl)
152 struct Strbuf gbuf = Strbuf_INIT;
153 struct blk_buf bb = BLK_BUF_INIT;
155 const Char *p, *pm, *pe, *pl;
158 /* copy part up to the brace */
159 for (p = s; *p != LBRC; p++)
163 /* check for balanced braces */
164 for (i = 0, pe = ++p; *pe; pe++)
166 /* Ignore everything between [] */
167 for (++pe; *pe != RBRK && *pe != EOS; pe++)
172 else if (*pe == LBRC)
174 else if (*pe == RBRC) {
180 if (i != 0 || *pe == '\0')
183 Strbuf_appendn(&gbuf, s, prefix_len);
185 for (i = 0, pl = pm = p; pm <= pe; pm++)
188 for (++pm; *pm != RBRK && *pm != EOS; pm++)
209 gbuf.len = prefix_len;
210 Strbuf_appendn(&gbuf, pl, pm - pl);
211 Strbuf_append(&gbuf, pe + 1);
212 Strbuf_terminate(&gbuf);
213 bb_append(&bb, Strsave(gbuf.s));
220 *bl = bb_finish(&bb);
227 expbrace(Char ***nvp, Char ***elp, int size)
229 Char **vl, **el, **nv, *s;
235 el = vl + blklen(vl);
237 for (s = *vl; s; s = *++vl) {
240 /* leave {} untouched for find */
241 if (s[0] == '{' && (s[1] == '\0' || (s[1] == '}' && s[2] == '\0')))
243 if (Strchr(s, '{') != NULL) {
247 if ((len = globbrace(s, &bl)) < 0)
248 stderror(ERR_MISSING, -len);
255 if (&el[len] >= &nv[size]) {
257 l = &el[len] - &nv[size];
258 size += GLOBSPACE > l ? GLOBSPACE : l;
261 nv = xrealloc(nv, size * sizeof(Char *));
262 *nvp = nv; /* To keep cleanups working */
275 for (bp = el; bp != vp; bp--)
285 for (bp = bl + 1; *bp; *vp++ = *bp++)
296 globexpand(Char **v, int noglob)
299 Char ***fnv, **vl, **el;
300 int size = GLOBSPACE;
303 fnv = xmalloc(sizeof(Char ***));
304 *fnv = vl = xmalloc(sizeof(Char *) * size);
306 cleanup_push(fnv, blk_indirect_cleanup);
309 * Step 1: expand backquotes.
311 while ((s = *v++) != NULL) {
312 if (Strchr(s, '`')) {
316 expanded = dobackp(s, 0);
317 for (i = 0; expanded[i] != NULL; i++) {
319 if (vl == &(*fnv)[size]) {
321 *fnv = xrealloc(*fnv, size * sizeof(Char *));
322 vl = &(*fnv)[size - GLOBSPACE];
329 if (vl == &(*fnv)[size]) {
331 *fnv = xrealloc(*fnv, size * sizeof(Char *));
332 vl = &(*fnv)[size - GLOBSPACE];
342 * Step 2: expand braces
345 expbrace(fnv, &el, size);
352 for (s = *vl; s; s = *++vl)
359 if ((ns = globequal(s)) == NULL) {
360 if (!adrof(STRnonomatch))
361 stderror(ERR_DEEP); /* Error */
364 /* Expansion succeeded */
375 * Step 4: expand .. if the variable symlinks==expand is set
377 if (symlinks == SYM_EXPAND) {
378 for (s = *vl; s; s = *++vl) {
379 *vl = dnormalize(s, 1);
393 handleone(Char *str, Char **vl, int action)
400 setname(short2str(str));
402 stderror(ERR_NAME | ERR_AMBIG);
406 for (t = vl; (p = *t++) != NULL; chars++)
408 str = xmalloc(chars * sizeof(Char));
409 for (t = vl, strp = str; (p = *t++) != NULL; chars++) {
411 *strp++ = *p++ & TRIM;
418 str = Strsave(strip(*vl));
430 int gflgs = GLOB_QUOTE | GLOB_NOMAGIC | GLOB_ALTNOT;
433 int nonomatch = adrof(STRnonomatch) != 0, magic = 0, match = 0;
435 if (adrof(STRglobdot))
438 if (adrof(STRglobstar))
449 gflgs |= GLOB_NOCHECK;
452 ptr = short2qstr(*vl);
453 switch (glob(ptr, gflgs, 0, &globv)) {
457 stderror(ERR_NAME | ERR_GLOB);
466 if (globv.gl_flags & GLOB_MAGCHAR) {
467 match |= (globv.gl_matchc != 0);
470 gflgs |= GLOB_APPEND;
473 vl = (globv.gl_pathc == 0 || (magic && !match && !nonomatch)) ?
474 NULL : blk2short(globv.gl_pathv);
480 globone(Char *str, int action)
482 Char *v[2], **vl, **vo;
485 noglob = adrof(STRnoglob) != 0;
490 return (strip(Strsave(str)));
494 * Expand back-quote, tilde and brace
496 vo = globexpand(v, noglob);
497 if (noglob || (gflg & G_GLOB) == 0) {
501 cleanup_push(vo, blk_cleanup);
503 else if (noglob || (gflg & G_GLOB) == 0)
504 return (strip(Strsave(str)));
516 setname(short2str(str));
517 stderror(ERR_NAME | ERR_NOMATCH);
520 if (vl && vl[0] == NULL) {
523 return (Strsave(STRNULL));
526 return (handleone(str, vl, action));
536 globall(Char **v, int gflg)
544 noglob = adrof(STRnoglob) != 0;
548 * Expand back-quote, tilde and brace
550 vl = vo = globexpand(v, noglob);
552 vl = vo = saveblk(v);
554 if (!noglob && (gflg & G_GLOB)) {
555 cleanup_push(vo, blk_cleanup);
568 glob_all_or_error(Char **v)
574 v = globall(v, gflag);
576 stderror(ERR_NAME | ERR_NOMATCH);
585 rscan(Char **t, void (*f) (Char))
589 while ((p = *t++) != NULL)
599 while ((p = *t++) != NULL)
601 #if INVALID_BYTE != 0
602 if ((*p & INVALID_BYTE) != INVALID_BYTE) /* *p < INVALID_BYTE */
616 while ((p = *t++) != NULL) {
617 if (*p == '~' || *p == '=')
619 else if (*p == '{' &&
620 (p[1] == '\0' || (p[1] == '}' && p[2] == '\0')))
627 * We do want to expand echo `echo '*'`, so we don't\
628 * use this piece of code anymore.
631 while (*p && *p != '`')
633 if (*p) /* Quoted chars */
638 if (!*p) /* The matching ` */
646 else if (symlinks == SYM_EXPAND &&
647 p[1] && ISDOTDOT(p) && (p == *(t-1) || *(p-1) == '/') )
656 * Command substitute cp. If literal, then this is a substitution from a
657 * << redirection, and so we should not crunch blanks and tabs, separating
658 * words only at newlines.
661 dobackp(Char *cp, int literal)
663 struct Strbuf word = Strbuf_INIT;
664 struct blk_buf bb = BLK_BUF_INIT;
667 cleanup_push(&bb, bb_cleanup);
668 cleanup_push(&word, Strbuf_cleanup);
670 for (lp = cp; *lp != '\0' && *lp != '`'; lp++)
672 Strbuf_appendn(&word, cp, lp - cp);
676 for (rp = lp; *rp && *rp != '`'; rp++)
685 stderror(ERR_UNMATCHED, '`');
687 ep = Strnsave(lp, rp - lp);
688 cleanup_push(ep, xfree);
689 backeval(&bb, &word, ep, literal);
697 return bb_finish(&bb);
702 backeval(struct blk_buf *bb, struct Strbuf *word, Char *cp, int literal)
706 struct command faket;
709 Char *fakecom[2], ibuf[BUFSIZE];
714 for (ip = cp; (*ip & QUOTE) != 0; ip++)
716 quoted = *ip == '\0';
719 faket.t_dtyp = NODE_COMMAND;
720 faket.t_dflg = F_BACKQ;
724 faket.t_dcom = fakecom;
725 fakecom[0] = STRfakecom1;
729 * We do the psave job to temporarily change the current job so that the
730 * following fork is considered a separate job. This is so that when
731 * backquotes are used in a builtin function that calls glob the "current
732 * job" is not corrupted. We only need one level of pushed jobs as long as
733 * we are sure to fork here.
736 cleanup_push(&faket, psavejob_cleanup); /* faket is only a marker */
739 * It would be nicer if we could integrate this redirection more with the
740 * routines in sh.sem.c by doing a fake execute on a builtin function that
744 cleanup_push(&pvec[0], open_cleanup);
745 cleanup_push(&pvec[1], open_cleanup);
746 if (pfork(&faket, -1) == 0) {
752 (void) dmove(pvec[1], 1);
753 (void) dmove(SHDIAG, 2);
757 for (arginp = cp; *cp; cp++) {
759 if (is_set(STRcsubstnonl) && (*cp == '\n' || *cp == '\r'))
764 * In the child ``forget'' everything about current aliases or
772 omark = cleanup_push_mark();
775 struct wordent paraml1;
784 cleanup_pop_mark(omark);
794 (void) lex(¶ml1);
795 cleanup_push(¶ml1, lex_cleanup);
799 t = syntax(paraml1.next, ¶ml1, 0);
800 cleanup_push(t, syntax_cleanup);
801 /* The F_BACKQ flag must set so the job output is correct if
802 * printexitvalue is set. If it's not set, the job output
803 * will have "Exit N" appended where N is the exit status. */
805 t->t_dflg = F_BACKQ|F_NOFORK;
809 signal(SIGTSTP, SIG_IGN);
812 signal(SIGTTIN, SIG_IGN);
815 signal(SIGTTOU, SIG_IGN);
817 execute(t, -1, NULL, NULL, TRUE);
819 cleanup_until(¶ml1);
822 cleanup_until(&pvec[1]);
831 icnt = wide_read(pvec[0], ibuf, BUFSIZE, 0);
841 #if defined(WINNT_NATIVE) || defined(__CYGWIN__)
844 #endif /* WINNT_NATIVE || __CYGWIN__ */
847 * Continue around the loop one more time, so that we can eat
848 * the last newline without terminating this word.
853 if (!quoted && (c == ' ' || c == '\t'))
856 if (c == '\\' || quoted)
858 Strbuf_append1(word, c);
861 * Unless at end-of-file, we will form a new word here if there were
862 * characters in the word, or in any case when we take text literally.
863 * If we didn't make empty words here when literal was set then we
864 * would lose blank lines.
866 if (c != 0 && (cnt || literal))
871 cleanup_until(&pvec[0]);
873 cleanup_until(&faket); /* psavejob_cleanup(); */
877 pword(struct blk_buf *bb, struct Strbuf *word)
881 s = Strbuf_finish(word);
887 Gmatch(const Char *string, const Char *pattern)
889 return Gnmatch(string, pattern, NULL);
893 Gnmatch(const Char *string, const Char *pattern, const Char **endstr)
896 const Char *tstring = string;
897 int gpol = 1, gres = 0;
899 if (*pattern == '^') {
904 fblk = xmalloc(sizeof(Char ***));
905 *fblk = xmalloc(GLOBSPACE * sizeof(Char *));
906 (*fblk)[0] = Strsave(pattern);
909 cleanup_push(fblk, blk_indirect_cleanup);
910 expbrace(fblk, NULL, GLOBSPACE);
913 /* Exact matches only */
914 for (p = *fblk; *p; p++)
915 gres |= t_pmatch(string, *p, &tstring, 1) == 2 ? 1 : 0;
919 /* partial matches */
920 end = Strend(string);
921 for (p = *fblk; *p; p++)
922 if (t_pmatch(string, *p, &tstring, 1) != 0) {
931 return(gres == gpol);
935 * Return 2 on exact match,
936 * Return 1 on substring match.
937 * Return 0 on no match.
938 * *estr will point to the end of the longest exact or substring match.
941 t_pmatch(const Char *string, const Char *pattern, const Char **estr, int cs)
943 Char stringc, patternc, rangec;
944 int match, negate_range;
945 const Char *pestr, *nstring;
947 for (nstring = string;; string = nstring) {
948 stringc = *nstring++ & TRIM;
949 patternc = *pattern++ & TRIM;
953 return (stringc == '\0' ? 2 : 1);
960 *estr = Strend(string);
966 switch(t_pmatch(string, pattern, estr, cs)) {
970 pestr = *estr;/*FIXME: does not guarantee longest match */
975 abort(); /* Cannot happen */
977 stringc = *string++ & TRIM;
991 if ((negate_range = (*pattern == '^')) != 0)
993 while ((rangec = *pattern++ & TRIM) != '\0') {
998 if (*pattern == '-' && pattern[1] != ']') {
1001 rangec2 = *pattern++ & TRIM;
1002 match = (globcharcoll(stringc, rangec2, 0) <= 0 &&
1003 globcharcoll(rangec, stringc, 0) <= 0);
1006 match = (stringc == rangec);
1009 stderror(ERR_NAME | ERR_MISSING, ']');
1010 if ((!match) && (stringc == '\0'))
1012 if (match == negate_range)
1016 if (cs ? patternc != stringc
1017 : Tolower(patternc) != Tolower(stringc))