2 * Copyright (c) 2003-2007 Tim Kientzle
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer
10 * in this position and unchanged.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in the
13 * documentation and/or other materials provided with the distribution.
15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR(S) ``AS IS'' AND ANY EXPRESS OR
16 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
17 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
18 * IN NO EVENT SHALL THE AUTHOR(S) BE LIABLE FOR ANY DIRECT, INDIRECT,
19 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
20 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
21 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
22 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
23 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
24 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 #include "archive_platform.h"
28 __FBSDID("$FreeBSD$");
37 #include "archive_pathmatch.h"
40 * Check whether a character 'c' is matched by a list specification [...]:
41 * * Leading '!' or '^' negates the class.
42 * * <char>-<char> is a range of characters
43 * * \<char> removes any special meaning for <char>
45 * Some interesting boundary cases:
46 * a-d-e is one range (a-d) followed by two single characters - and e.
47 * \a-\d is same as a-d
48 * a\-d is three single characters: a, d, -
49 * Trailing - is not special (so [a-] is two characters a and -).
50 * Initial - is not special ([a-] is same as [-a] is same as [\\-a])
51 * This function never sees a trailing \.
56 pm_list(const char *start, const char *end, const char c, int flags)
58 const char *p = start;
59 char rangeStart = '\0', nextRangeStart;
60 int match = 1, nomatch = 0;
62 /* This will be used soon... */
63 (void)flags; /* UNUSED */
65 /* If this is a negated class, return success for nomatch. */
66 if ((*p == '!' || *p == '^') && p < end) {
73 nextRangeStart = '\0';
76 /* Trailing or initial '-' is not special. */
77 if ((rangeStart == '\0') || (p == end - 1)) {
84 if ((rangeStart <= c) && (c <= rangeEnd))
94 nextRangeStart = *p; /* Possible start of range. */
96 rangeStart = nextRangeStart;
103 pm_list_w(const wchar_t *start, const wchar_t *end, const wchar_t c, int flags)
105 const wchar_t *p = start;
106 wchar_t rangeStart = L'\0', nextRangeStart;
107 int match = 1, nomatch = 0;
109 /* This will be used soon... */
110 (void)flags; /* UNUSED */
112 /* If this is a negated class, return success for nomatch. */
113 if ((*p == L'!' || *p == L'^') && p < end) {
120 nextRangeStart = L'\0';
123 /* Trailing or initial '-' is not special. */
124 if ((rangeStart == L'\0') || (p == end - 1)) {
128 wchar_t rangeEnd = *++p;
129 if (rangeEnd == L'\\')
131 if ((rangeStart <= c) && (c <= rangeEnd))
141 nextRangeStart = *p; /* Possible start of range. */
143 rangeStart = nextRangeStart;
150 * If s is pointing to "./", ".//", "./././" or the like, skip it.
153 pm_slashskip(const char *s) {
155 || (s[0] == '.' && s[1] == '/')
156 || (s[0] == '.' && s[1] == '\0'))
161 static const wchar_t *
162 pm_slashskip_w(const wchar_t *s) {
164 || (s[0] == L'.' && s[1] == L'/')
165 || (s[0] == L'.' && s[1] == L'\0'))
171 pm(const char *p, const char *s, int flags)
176 * Ignore leading './', './/', '././', etc.
178 if (s[0] == '.' && s[1] == '/')
179 s = pm_slashskip(s + 1);
180 if (p[0] == '.' && p[1] == '/')
181 p = pm_slashskip(p + 1);
187 if (flags & PATHMATCH_NO_ANCHOR_END)
189 /* "dir" == "dir/" == "dir/." */
194 /* ? always succeeds, unless we hit end of 's' */
199 /* "*" == "**" == "***" ... */
202 /* Trailing '*' always succeeds. */
206 if (archive_pathmatch(p, s, flags))
213 * Find the end of the [...] character class,
214 * ignoring \] that might occur within the class.
217 while (*end != '\0' && *end != ']') {
218 if (*end == '\\' && end[1] != '\0')
223 /* We found [...], try to match it. */
224 if (!pm_list(p + 1, end, *s, flags))
226 p = end; /* Jump to trailing ']' char. */
229 /* No final ']', so just match '['. */
234 /* Trailing '\\' matches itself. */
245 if (*s != '/' && *s != '\0')
247 /* Note: pattern "/\./" won't match "/";
248 * pm_slashskip() correctly stops at backslash. */
251 if (*p == '\0' && (flags & PATHMATCH_NO_ANCHOR_END))
253 --p; /* Counteract the increment below. */
257 /* '$' is special only at end of pattern and only
258 * if PATHMATCH_NO_ANCHOR_END is specified. */
259 if (p[1] == '\0' && (flags & PATHMATCH_NO_ANCHOR_END)){
260 /* "dir" == "dir/" == "dir/." */
261 return (*pm_slashskip(s) == '\0');
263 /* Otherwise, '$' is not special. */
276 pm_w(const wchar_t *p, const wchar_t *s, int flags)
281 * Ignore leading './', './/', '././', etc.
283 if (s[0] == L'.' && s[1] == L'/')
284 s = pm_slashskip_w(s + 1);
285 if (p[0] == L'.' && p[1] == L'/')
286 p = pm_slashskip_w(p + 1);
292 if (flags & PATHMATCH_NO_ANCHOR_END)
294 /* "dir" == "dir/" == "dir/." */
295 s = pm_slashskip_w(s);
297 return (*s == L'\0');
299 /* ? always succeeds, unless we hit end of 's' */
304 /* "*" == "**" == "***" ... */
307 /* Trailing '*' always succeeds. */
311 if (archive_pathmatch_w(p, s, flags))
318 * Find the end of the [...] character class,
319 * ignoring \] that might occur within the class.
322 while (*end != L'\0' && *end != L']') {
323 if (*end == L'\\' && end[1] != L'\0')
328 /* We found [...], try to match it. */
329 if (!pm_list_w(p + 1, end, *s, flags))
331 p = end; /* Jump to trailing ']' char. */
334 /* No final ']', so just match '['. */
339 /* Trailing '\\' matches itself. */
350 if (*s != L'/' && *s != L'\0')
352 /* Note: pattern "/\./" won't match "/";
353 * pm_slashskip() correctly stops at backslash. */
354 p = pm_slashskip_w(p);
355 s = pm_slashskip_w(s);
356 if (*p == L'\0' && (flags & PATHMATCH_NO_ANCHOR_END))
358 --p; /* Counteract the increment below. */
362 /* '$' is special only at end of pattern and only
363 * if PATHMATCH_NO_ANCHOR_END is specified. */
364 if (p[1] == L'\0' && (flags & PATHMATCH_NO_ANCHOR_END)){
365 /* "dir" == "dir/" == "dir/." */
366 return (*pm_slashskip_w(s) == L'\0');
368 /* Otherwise, '$' is not special. */
380 /* Main entry point. */
382 __archive_pathmatch(const char *p, const char *s, int flags)
384 /* Empty pattern only matches the empty string. */
385 if (p == NULL || *p == '\0')
386 return (s == NULL || *s == '\0');
388 /* Leading '^' anchors the start of the pattern. */
391 flags &= ~PATHMATCH_NO_ANCHOR_START;
394 if (*p == '/' && *s != '/')
397 /* Certain patterns and file names anchor implicitly. */
398 if (*p == '*' || *p == '/' || *p == '/') {
403 return (pm(p, s, flags));
406 /* If start is unanchored, try to match start of each path element. */
407 if (flags & PATHMATCH_NO_ANCHOR_START) {
408 for ( ; s != NULL; s = strchr(s, '/')) {
417 /* Default: Match from beginning. */
418 return (pm(p, s, flags));
422 __archive_pathmatch_w(const wchar_t *p, const wchar_t *s, int flags)
424 /* Empty pattern only matches the empty string. */
425 if (p == NULL || *p == L'\0')
426 return (s == NULL || *s == L'\0');
428 /* Leading '^' anchors the start of the pattern. */
431 flags &= ~PATHMATCH_NO_ANCHOR_START;
434 if (*p == L'/' && *s != L'/')
437 /* Certain patterns and file names anchor implicitly. */
438 if (*p == L'*' || *p == L'/' || *p == L'/') {
443 return (pm_w(p, s, flags));
446 /* If start is unanchored, try to match start of each path element. */
447 if (flags & PATHMATCH_NO_ANCHOR_START) {
448 for ( ; s != NULL; s = wcschr(s, L'/')) {
451 if (pm_w(p, s, flags))
457 /* Default: Match from beginning. */
458 return (pm_w(p, s, flags));