2 * dirent_uri.c: a library to manipulate URIs and directory entries.
4 * ====================================================================
5 * Licensed to the Apache Software Foundation (ASF) under one
6 * or more contributor license agreements. See the NOTICE file
7 * distributed with this work for additional information
8 * regarding copyright ownership. The ASF licenses this file
9 * to you under the Apache License, Version 2.0 (the
10 * "License"); you may not use this file except in compliance
11 * with the License. You may obtain a copy of the License at
13 * http://www.apache.org/licenses/LICENSE-2.0
15 * Unless required by applicable law or agreed to in writing,
16 * software distributed under the License is distributed on an
17 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
18 * KIND, either express or implied. See the License for the
19 * specific language governing permissions and limitations
21 * ====================================================================
33 #include "svn_private_config.h"
34 #include "svn_string.h"
35 #include "svn_dirent_uri.h"
37 #include "svn_ctype.h"
39 #include "dirent_uri.h"
40 #include "private/svn_fspath.h"
41 #include "private/svn_cert.h"
43 /* The canonical empty path. Can this be changed? Well, change the empty
44 test below and the path library will work, not so sure about the fs/wc
46 #define SVN_EMPTY_PATH ""
48 /* TRUE if s is the canonical empty path, FALSE otherwise */
49 #define SVN_PATH_IS_EMPTY(s) ((s)[0] == '\0')
51 /* TRUE if s,n is the platform's empty path ("."), FALSE otherwise. Can
52 this be changed? Well, the path library will work, not so sure about
54 #define SVN_PATH_IS_PLATFORM_EMPTY(s,n) ((n) == 1 && (s)[0] == '.')
56 /* This check must match the check on top of dirent_uri-tests.c and
58 #if defined(WIN32) || defined(__CYGWIN__) || defined(__OS2__)
59 #define SVN_USE_DOS_PATHS
62 /* Path type definition. Used only by internal functions. */
63 typedef enum path_type_t {
70 /**** Forward declarations *****/
73 relpath_is_canonical(const char *relpath);
76 /**** Internal implementation functions *****/
78 /* Return an internal-style new path based on PATH, allocated in POOL.
80 * "Internal-style" means that separators are all '/'.
83 internal_style(const char *path, apr_pool_t *pool)
85 #if '/' != SVN_PATH_LOCAL_SEPARATOR
87 char *p = apr_pstrdup(pool, path);
90 /* Convert all local-style separators to the canonical ones. */
91 for (; *p != '\0'; ++p)
92 if (*p == SVN_PATH_LOCAL_SEPARATOR)
100 /* Locale insensitive tolower() for converting parts of dirents and urls
101 while canonicalizing */
103 canonicalize_to_lower(char c)
105 if (c < 'A' || c > 'Z')
108 return (char)(c - 'A' + 'a');
111 /* Locale insensitive toupper() for converting parts of dirents and urls
112 while canonicalizing */
114 canonicalize_to_upper(char c)
116 if (c < 'a' || c > 'z')
119 return (char)(c - 'a' + 'A');
122 /* Calculates the length of the dirent absolute or non absolute root in
123 DIRENT, return 0 if dirent is not rooted */
125 dirent_root_length(const char *dirent, apr_size_t len)
127 #ifdef SVN_USE_DOS_PATHS
128 if (len >= 2 && dirent[1] == ':' &&
129 ((dirent[0] >= 'A' && dirent[0] <= 'Z') ||
130 (dirent[0] >= 'a' && dirent[0] <= 'z')))
132 return (len > 2 && dirent[2] == '/') ? 3 : 2;
135 if (len > 2 && dirent[0] == '/' && dirent[1] == '/')
139 while (i < len && dirent[i] != '/')
143 return len; /* Cygwin drive alias, invalid path on WIN32 */
147 while (i < len && dirent[i] != '/')
152 #endif /* SVN_USE_DOS_PATHS */
153 if (len >= 1 && dirent[0] == '/')
160 /* Return the length of substring necessary to encompass the entire
161 * previous dirent segment in DIRENT, which should be a LEN byte string.
163 * A trailing slash will not be included in the returned length except
164 * in the case in which DIRENT is absolute and there are no more
168 dirent_previous_segment(const char *dirent,
175 while (len > 0 && dirent[len] != '/'
176 #ifdef SVN_USE_DOS_PATHS
177 && (dirent[len] != ':' || len != 1)
178 #endif /* SVN_USE_DOS_PATHS */
182 /* check if the remaining segment including trailing '/' is a root dirent */
183 if (dirent_root_length(dirent, len+1) == len + 1)
189 /* Calculates the length occupied by the schema defined root of URI */
191 uri_schema_root_length(const char *uri, apr_size_t len)
195 for (i = 0; i < len; i++)
199 if (i > 0 && uri[i-1] == ':' && i < len-1 && uri[i+1] == '/')
201 /* We have an absolute uri */
202 if (i == 5 && strncmp("file", uri, 4) == 0)
203 return 7; /* file:// */
206 for (i += 2; i < len; i++)
210 return len; /* Only a hostname is found */
221 /* Returns TRUE if svn_dirent_is_absolute(dirent) or when dirent has
222 a non absolute root. (E.g. '/' or 'F:' on Windows) */
224 dirent_is_rooted(const char *dirent)
229 /* Root on all systems */
230 if (dirent[0] == '/')
233 /* On Windows, dirent is also absolute when it starts with 'H:' or 'H:/'
234 where 'H' is any letter. */
235 #ifdef SVN_USE_DOS_PATHS
236 if (((dirent[0] >= 'A' && dirent[0] <= 'Z') ||
237 (dirent[0] >= 'a' && dirent[0] <= 'z')) &&
240 #endif /* SVN_USE_DOS_PATHS */
245 /* Return the length of substring necessary to encompass the entire
246 * previous relpath segment in RELPATH, which should be a LEN byte string.
248 * A trailing slash will not be included in the returned length.
251 relpath_previous_segment(const char *relpath,
258 while (len > 0 && relpath[len] != '/')
264 /* Return the length of substring necessary to encompass the entire
265 * previous uri segment in URI, which should be a LEN byte string.
267 * A trailing slash will not be included in the returned length except
268 * in the case in which URI is absolute and there are no more
272 uri_previous_segment(const char *uri,
275 apr_size_t root_length;
280 root_length = uri_schema_root_length(uri, len);
283 while (len > root_length && uri[i] != '/')
286 if (i == 0 && len > 1 && *uri == '/')
292 /* Return the canonicalized version of PATH, of type TYPE, allocated in
296 canonicalize(path_type_t type, const char *path, apr_pool_t *pool)
301 apr_size_t schemelen = 0;
302 apr_size_t canon_segments = 0;
303 svn_boolean_t url = FALSE;
304 char *schema_data = NULL;
306 /* "" is already canonical, so just return it; note that later code
307 depends on path not being zero-length. */
308 if (SVN_PATH_IS_EMPTY(path))
310 assert(type != type_uri);
314 dst = canon = apr_pcalloc(pool, strlen(path) + 1);
316 /* If this is supposed to be an URI, it should start with
317 "scheme://". We'll copy the scheme, host name, etc. to DST and
320 if (type == type_uri)
324 while (*src && (*src != '/') && (*src != ':'))
327 if (*src == ':' && *(src+1) == '/' && *(src+2) == '/')
333 /* Found a scheme, convert to lowercase and copy to dst. */
337 *(dst++) = canonicalize_to_lower((*src++));
346 /* This might be the hostname */
348 while (*src && (*src != '/') && (*src != '@'))
353 /* Copy the username & password. */
354 seglen = src - seg + 1;
355 memcpy(dst, seg, seglen);
362 /* Found a hostname, convert to lowercase and copy to dst. */
365 *(dst++) = *(src++); /* Copy '[' */
368 || (*src >= '0' && (*src <= '9'))
369 || (*src >= 'a' && (*src <= 'f'))
370 || (*src >= 'A' && (*src <= 'F')))
372 *(dst++) = canonicalize_to_lower((*src++));
376 *(dst++) = *(src++); /* Copy ']' */
379 while (*src && (*src != '/') && (*src != ':'))
380 *(dst++) = canonicalize_to_lower((*src++));
384 /* We probably have a port number: Is it a default portnumber
385 which doesn't belong in a canonical url? */
386 if (src[1] == '8' && src[2] == '0'
387 && (src[3]== '/'|| !src[3])
388 && !strncmp(canon, "http:", 5))
392 else if (src[1] == '4' && src[2] == '4' && src[3] == '3'
393 && (src[4]== '/'|| !src[4])
394 && !strncmp(canon, "https:", 6))
398 else if (src[1] == '3' && src[2] == '6'
399 && src[3] == '9' && src[4] == '0'
400 && (src[5]== '/'|| !src[5])
401 && !strncmp(canon, "svn:", 4))
405 else if (src[1] == '/' || !src[1])
410 while (*src && (*src != '/'))
411 *(dst++) = canonicalize_to_lower((*src++));
414 /* Copy trailing slash, or null-terminator. */
417 /* Move src and dst forward only if we are not
418 * at null-terminator yet. */
430 /* Copy to DST any separator or drive letter that must come before the
431 first regular path segment. */
432 if (! url && type != type_relpath)
435 /* If this is an absolute path, then just copy over the initial
436 separator character. */
441 #ifdef SVN_USE_DOS_PATHS
442 /* On Windows permit two leading separator characters which means an
444 if ((type == type_dirent) && *src == '/')
446 #endif /* SVN_USE_DOS_PATHS */
448 #ifdef SVN_USE_DOS_PATHS
449 /* On Windows the first segment can be a drive letter, which we normalize
451 else if (type == type_dirent &&
452 ((*src >= 'a' && *src <= 'z') ||
453 (*src >= 'A' && *src <= 'Z')) &&
456 *(dst++) = canonicalize_to_upper(*(src++));
457 /* Leave the ':' to be processed as (or as part of) a path segment
458 by the following code block, so we need not care whether it has
461 #endif /* SVN_USE_DOS_PATHS */
466 /* Parse each segment, finding the closing '/' (which might look
467 like '%2F' for URIs). */
468 const char *next = src;
469 apr_size_t slash_len = 0;
473 && (! (type == type_uri && next[0] == '%' && next[1] == '2' &&
474 canonicalize_to_upper(next[2]) == 'F')))
479 /* Record how long our "slash" is. */
482 else if (type == type_uri && next[0] == '%')
488 || (seglen == 1 && src[0] == '.')
489 || (type == type_uri && seglen == 3 && src[0] == '%' && src[1] == '2'
490 && canonicalize_to_upper(src[2]) == 'E'))
492 /* Empty or noop segment, so do nothing. (For URIs, '%2E'
493 is equivalent to '.'). */
495 #ifdef SVN_USE_DOS_PATHS
496 /* If this is the first path segment of a file:// URI and it contains a
497 windows drive letter, convert the drive letter to upper case. */
498 else if (url && canon_segments == 1 && seglen >= 2 &&
499 (strncmp(canon, "file:", 5) == 0) &&
500 src[0] >= 'a' && src[0] <= 'z' && src[1] == ':')
502 *(dst++) = canonicalize_to_upper(src[0]);
504 if (seglen > 2) /* drive relative path */
506 memcpy(dst, src + 2, seglen - 2);
514 #endif /* SVN_USE_DOS_PATHS */
517 /* An actual segment, append it to the destination path */
518 memcpy(dst, src, seglen);
525 /* Skip over trailing slash to the next segment. */
526 src = next + slash_len;
529 /* Remove the trailing slash if there was at least one
530 * canonical segment and the last segment ends with a slash.
532 * But keep in mind that, for URLs, the scheme counts as a
533 * canonical segment -- so if path is ONLY a scheme (such
534 * as "https://") we should NOT remove the trailing slash. */
535 if ((canon_segments > 0 && *(dst - 1) == '/')
536 && ! (url && path[schemelen] == '\0'))
543 #ifdef SVN_USE_DOS_PATHS
544 /* Skip leading double slashes when there are less than 2
545 * canon segments. UNC paths *MUST* have two segments. */
546 if ((type == type_dirent) && canon[0] == '/' && canon[1] == '/')
548 if (canon_segments < 2)
552 /* Now we're sure this is a valid UNC path, convert the server name
553 (the first path segment) to lowercase as Windows treats it as case
555 Note: normally the share name is treated as case insensitive too,
556 but it seems to be possible to configure Samba to treat those as
557 case sensitive, so better leave that alone. */
558 for (dst = canon + 2; *dst && *dst != '/'; dst++)
559 *dst = canonicalize_to_lower(*dst);
562 #endif /* SVN_USE_DOS_PATHS */
564 /* Check the normalization of characters in a uri */
577 if (!svn_ctype_isxdigit(*(src+1)) ||
578 !svn_ctype_isxdigit(*(src+2)))
584 if (!svn_uri__char_validity[(unsigned char)*src])
593 apr_size_t pre_schema_size = (apr_size_t)(schema_data - canon);
595 dst = apr_palloc(pool, (apr_size_t)(src - canon) + need_extra + 1);
596 memcpy(dst, canon, pre_schema_size);
599 dst += pre_schema_size;
614 if (!svn_ctype_isxdigit(*(src+1)) ||
615 !svn_ctype_isxdigit(*(src+2)))
626 digitz[0] = *(++src);
627 digitz[1] = *(++src);
630 val = (int)strtol(digitz, NULL, 16);
632 if (svn_uri__char_validity[(unsigned char)val])
633 *(dst++) = (char)val;
637 *(dst++) = canonicalize_to_upper(digitz[0]);
638 *(dst++) = canonicalize_to_upper(digitz[1]);
643 if (!svn_uri__char_validity[(unsigned char)*src])
645 apr_snprintf(dst, 4, "%%%02X", (unsigned char)*src);
660 /* Return the string length of the longest common ancestor of PATH1 and PATH2.
661 * Pass type_uri for TYPE if PATH1 and PATH2 are URIs, and type_dirent if
662 * PATH1 and PATH2 are regular paths.
664 * If the two paths do not share a common ancestor, return 0.
666 * New strings are allocated in POOL.
669 get_longest_ancestor_length(path_type_t types,
674 apr_size_t path1_len, path2_len;
676 apr_size_t last_dirsep = 0;
677 #ifdef SVN_USE_DOS_PATHS
678 svn_boolean_t unc = FALSE;
681 path1_len = strlen(path1);
682 path2_len = strlen(path2);
684 if (SVN_PATH_IS_EMPTY(path1) || SVN_PATH_IS_EMPTY(path2))
687 while (path1[i] == path2[i])
689 /* Keep track of the last directory separator we hit. */
695 /* If we get to the end of either path, break out. */
696 if ((i == path1_len) || (i == path2_len))
700 /* two special cases:
701 1. '/' is the longest common ancestor of '/' and '/foo' */
702 if (i == 1 && path1[0] == '/' && path2[0] == '/')
704 /* 2. '' is the longest common ancestor of any non-matching
705 * strings 'foo' and 'bar' */
706 if (types == type_dirent && i == 0)
709 /* Handle some windows specific cases */
710 #ifdef SVN_USE_DOS_PATHS
711 if (types == type_dirent)
713 /* don't count the '//' from UNC paths */
714 if (last_dirsep == 1 && path1[0] == '/' && path1[1] == '/')
721 if (i == 3 && path1[2] == '/' && path1[1] == ':')
724 /* Cannot use SVN_ERR_ASSERT here, so we'll have to crash, sorry.
725 * Note that this assertion triggers only if the code above has
726 * been broken. The code below relies on this assertion, because
727 * it uses [i - 1] as index. */
731 if ((path1[i - 1] == ':' && path2[i] == '/') ||
732 (path2[i - 1] == ':' && path1[i] == '/'))
735 if (path1[i - 1] == ':' || path2[i - 1] == ':')
738 #endif /* SVN_USE_DOS_PATHS */
740 /* last_dirsep is now the offset of the last directory separator we
741 crossed before reaching a non-matching byte. i is the offset of
742 that non-matching byte, and is guaranteed to be <= the length of
743 whichever path is shorter.
744 If one of the paths is the common part return that. */
745 if (((i == path1_len) && (path2[i] == '/'))
746 || ((i == path2_len) && (path1[i] == '/'))
747 || ((i == path1_len) && (i == path2_len)))
751 /* Nothing in common but the root folder '/' or 'X:/' for Windows
753 #ifdef SVN_USE_DOS_PATHS
756 /* X:/foo and X:/bar returns X:/ */
757 if ((types == type_dirent) &&
758 last_dirsep == 2 && path1[1] == ':' && path1[2] == '/'
759 && path2[1] == ':' && path2[2] == '/')
761 #endif /* SVN_USE_DOS_PATHS */
762 if (last_dirsep == 0 && path1[0] == '/' && path2[0] == '/')
764 #ifdef SVN_USE_DOS_PATHS
772 /* Determine whether PATH2 is a child of PATH1.
774 * PATH2 is a child of PATH1 if
775 * 1) PATH1 is empty, and PATH2 is not empty and not an absolute path.
777 * 2) PATH2 is has n components, PATH1 has x < n components,
778 * and PATH1 matches PATH2 in all its x components.
779 * Components are separated by a slash, '/'.
781 * Pass type_uri for TYPE if PATH1 and PATH2 are URIs, and type_dirent if
782 * PATH1 and PATH2 are regular paths.
784 * If PATH2 is not a child of PATH1, return NULL.
786 * If PATH2 is a child of PATH1, and POOL is not NULL, allocate a copy
787 * of the child part of PATH2 in POOL and return a pointer to the
788 * newly allocated child part.
790 * If PATH2 is a child of PATH1, and POOL is NULL, return a pointer
791 * pointing to the child part of PATH2.
794 is_child(path_type_t type, const char *path1, const char *path2,
799 /* Allow "" and "foo" or "H:foo" to be parent/child */
800 if (SVN_PATH_IS_EMPTY(path1)) /* "" is the parent */
802 if (SVN_PATH_IS_EMPTY(path2)) /* "" not a child */
805 /* check if this is an absolute path */
806 if ((type == type_uri) ||
807 (type == type_dirent && dirent_is_rooted(path2)))
810 /* everything else is child */
811 return pool ? apr_pstrdup(pool, path2) : path2;
814 /* Reach the end of at least one of the paths. How should we handle
815 things like path1:"foo///bar" and path2:"foo/bar/baz"? It doesn't
816 appear to arise in the current Subversion code, it's not clear to me
817 if they should be parent/child or not. */
818 /* Hmmm... aren't paths assumed to be canonical in this function?
819 * How can "foo///bar" even happen if the paths are canonical? */
820 for (i = 0; path1[i] && path2[i]; i++)
821 if (path1[i] != path2[i])
824 /* FIXME: This comment does not really match
825 * the checks made in the code it refers to: */
826 /* There are two cases that are parent/child
828 .../foo path2[i] == '/'
833 Other root paths (like X:/) fall under the former case:
835 X:/foo path2[i] != '/'
837 Check for '//' to avoid matching '/' and '//srv'.
839 if (path1[i] == '\0' && path2[i])
841 if (path1[i - 1] == '/'
842 #ifdef SVN_USE_DOS_PATHS
843 || ((type == type_dirent) && path1[i - 1] == ':')
856 return pool ? apr_pstrdup(pool, path2 + i) : path2 + i;
858 else if (path2[i] == '/')
864 return pool ? apr_pstrdup(pool, path2 + i + 1) : path2 + i + 1;
873 /* Otherwise, path2 isn't a child. */
878 /**** Public API functions ****/
881 svn_dirent_internal_style(const char *dirent, apr_pool_t *pool)
883 return svn_dirent_canonicalize(internal_style(dirent, pool), pool);
887 svn_dirent_local_style(const char *dirent, apr_pool_t *pool)
889 /* Internally, Subversion represents the current directory with the
890 empty string. But users like to see "." . */
891 if (SVN_PATH_IS_EMPTY(dirent))
894 #if '/' != SVN_PATH_LOCAL_SEPARATOR
896 char *p = apr_pstrdup(pool, dirent);
899 /* Convert all canonical separators to the local-style ones. */
900 for (; *p != '\0'; ++p)
902 *p = SVN_PATH_LOCAL_SEPARATOR;
910 svn_relpath__internal_style(const char *relpath,
913 return svn_relpath_canonicalize(internal_style(relpath, pool), pool);
917 /* We decided against using apr_filepath_root here because of the negative
918 performance impact (creating a pool and converting strings ). */
920 svn_dirent_is_root(const char *dirent, apr_size_t len)
922 #ifdef SVN_USE_DOS_PATHS
923 /* On Windows and Cygwin, 'H:' or 'H:/' (where 'H' is any letter)
924 are also root directories */
925 if ((len == 2 || ((len == 3) && (dirent[2] == '/'))) &&
926 (dirent[1] == ':') &&
927 ((dirent[0] >= 'A' && dirent[0] <= 'Z') ||
928 (dirent[0] >= 'a' && dirent[0] <= 'z')))
931 /* On Windows and Cygwin //server/share is a root directory,
932 and on Cygwin //drive is a drive alias */
933 if (len >= 2 && dirent[0] == '/' && dirent[1] == '/'
934 && dirent[len - 1] != '/')
938 for (i = len; i >= 2; i--)
940 if (dirent[i] == '/')
948 return (segments <= 1);
950 return (segments == 1); /* //drive is invalid on plain Windows */
955 /* directory is root if it's equal to '/' */
956 if (len == 1 && dirent[0] == '/')
963 svn_uri_is_root(const char *uri, apr_size_t len)
965 assert(svn_uri_is_canonical(uri, NULL));
966 return (len == uri_schema_root_length(uri, len));
969 char *svn_dirent_join(const char *base,
970 const char *component,
973 apr_size_t blen = strlen(base);
974 apr_size_t clen = strlen(component);
978 assert(svn_dirent_is_canonical(base, pool));
979 assert(svn_dirent_is_canonical(component, pool));
981 /* If the component is absolute, then return it. */
982 if (svn_dirent_is_absolute(component))
983 return apr_pmemdup(pool, component, clen + 1);
985 /* If either is empty return the other */
986 if (SVN_PATH_IS_EMPTY(base))
987 return apr_pmemdup(pool, component, clen + 1);
988 if (SVN_PATH_IS_EMPTY(component))
989 return apr_pmemdup(pool, base, blen + 1);
991 #ifdef SVN_USE_DOS_PATHS
992 if (component[0] == '/')
994 /* '/' is drive relative on Windows, not absolute like on Posix */
995 if (dirent_is_rooted(base))
997 /* Join component without '/' to root-of(base) */
998 blen = dirent_root_length(base, blen);
1002 if (blen == 2 && base[1] == ':') /* "C:" case */
1004 char *root = apr_pmemdup(pool, base, 3);
1005 root[2] = '/'; /* We don't need the final '\0' */
1012 return apr_pstrndup(pool, base, blen);
1015 return apr_pmemdup(pool, component, clen + 1);
1017 else if (dirent_is_rooted(component))
1018 return apr_pmemdup(pool, component, clen + 1);
1019 #endif /* SVN_USE_DOS_PATHS */
1021 /* if last character of base is already a separator, don't add a '/' */
1023 if (base[blen - 1] == '/'
1024 #ifdef SVN_USE_DOS_PATHS
1025 || base[blen - 1] == ':'
1030 /* Construct the new, combined dirent. */
1031 dirent = apr_palloc(pool, blen + add_separator + clen + 1);
1032 memcpy(dirent, base, blen);
1035 memcpy(dirent + blen + add_separator, component, clen + 1);
1040 char *svn_dirent_join_many(apr_pool_t *pool, const char *base, ...)
1042 #define MAX_SAVED_LENGTHS 10
1043 apr_size_t saved_lengths[MAX_SAVED_LENGTHS];
1044 apr_size_t total_len;
1054 total_len = strlen(base);
1056 assert(svn_dirent_is_canonical(base, pool));
1058 /* if last character of base is already a separator, don't add a '/' */
1061 || base[total_len - 1] == '/'
1062 #ifdef SVN_USE_DOS_PATHS
1063 || base[total_len - 1] == ':'
1068 saved_lengths[0] = total_len;
1070 /* Compute the length of the resulting string. */
1074 while ((s = va_arg(va, const char *)) != NULL)
1078 assert(svn_dirent_is_canonical(s, pool));
1080 if (SVN_PATH_IS_EMPTY(s))
1083 if (nargs++ < MAX_SAVED_LENGTHS)
1084 saved_lengths[nargs] = len;
1086 if (dirent_is_rooted(s))
1091 #ifdef SVN_USE_DOS_PATHS
1092 if (!svn_dirent_is_absolute(s)) /* Handle non absolute roots */
1094 /* Set new base and skip the current argument */
1095 base = s = svn_dirent_join(base, s, pool);
1097 saved_lengths[0] = total_len = len = strlen(s);
1100 #endif /* SVN_USE_DOS_PATHS */
1102 base = ""; /* Don't add base */
1103 saved_lengths[0] = 0;
1107 if (s[len - 1] == '/'
1108 #ifdef SVN_USE_DOS_PATHS
1109 || s[len - 1] == ':'
1114 else if (nargs <= base_arg + 1)
1116 total_len += add_separator + len;
1120 total_len += 1 + len;
1125 /* base == "/" and no further components. just return that. */
1126 if (add_separator == 0 && total_len == 1)
1127 return apr_pmemdup(pool, "/", 2);
1129 /* we got the total size. allocate it, with room for a NULL character. */
1130 dirent = p = apr_palloc(pool, total_len + 1);
1132 /* if we aren't supposed to skip forward to an absolute component, and if
1133 this is not an empty base that we are skipping, then copy the base
1135 if (! SVN_PATH_IS_EMPTY(base))
1137 memcpy(p, base, len = saved_lengths[0]);
1143 while ((s = va_arg(va, const char *)) != NULL)
1145 if (SVN_PATH_IS_EMPTY(s))
1148 if (++nargs < base_arg)
1151 if (nargs < MAX_SAVED_LENGTHS)
1152 len = saved_lengths[nargs];
1156 /* insert a separator if we aren't copying in the first component
1157 (which can happen when base_arg is set). also, don't put in a slash
1158 if the prior character is a slash (occurs when prior component
1161 ( ! (nargs - 1 <= base_arg) || add_separator))
1164 /* copy the new component and advance the pointer */
1171 assert((apr_size_t)(p - dirent) == total_len);
1177 svn_relpath_join(const char *base,
1178 const char *component,
1181 apr_size_t blen = strlen(base);
1182 apr_size_t clen = strlen(component);
1185 assert(relpath_is_canonical(base));
1186 assert(relpath_is_canonical(component));
1188 /* If either is empty return the other */
1190 return apr_pmemdup(pool, component, clen + 1);
1192 return apr_pmemdup(pool, base, blen + 1);
1194 path = apr_palloc(pool, blen + 1 + clen + 1);
1195 memcpy(path, base, blen);
1197 memcpy(path + blen + 1, component, clen + 1);
1203 svn_dirent_dirname(const char *dirent, apr_pool_t *pool)
1205 apr_size_t len = strlen(dirent);
1207 assert(svn_dirent_is_canonical(dirent, pool));
1209 if (len == dirent_root_length(dirent, len))
1210 return apr_pstrmemdup(pool, dirent, len);
1212 return apr_pstrmemdup(pool, dirent, dirent_previous_segment(dirent, len));
1216 svn_dirent_basename(const char *dirent, apr_pool_t *pool)
1218 apr_size_t len = strlen(dirent);
1221 assert(!pool || svn_dirent_is_canonical(dirent, pool));
1223 if (svn_dirent_is_root(dirent, len))
1228 while (start > 0 && dirent[start - 1] != '/'
1229 #ifdef SVN_USE_DOS_PATHS
1230 && dirent[start - 1] != ':'
1237 return apr_pstrmemdup(pool, dirent + start, len - start);
1239 return dirent + start;
1243 svn_dirent_split(const char **dirpath,
1244 const char **base_name,
1248 assert(dirpath != base_name);
1251 *dirpath = svn_dirent_dirname(dirent, pool);
1254 *base_name = svn_dirent_basename(dirent, pool);
1258 svn_relpath_dirname(const char *relpath,
1261 apr_size_t len = strlen(relpath);
1263 assert(relpath_is_canonical(relpath));
1265 return apr_pstrmemdup(pool, relpath,
1266 relpath_previous_segment(relpath, len));
1270 svn_relpath_basename(const char *relpath,
1273 apr_size_t len = strlen(relpath);
1276 assert(relpath_is_canonical(relpath));
1279 while (start > 0 && relpath[start - 1] != '/')
1283 return apr_pstrmemdup(pool, relpath + start, len - start);
1285 return relpath + start;
1289 svn_relpath_split(const char **dirpath,
1290 const char **base_name,
1291 const char *relpath,
1294 assert(dirpath != base_name);
1297 *dirpath = svn_relpath_dirname(relpath, pool);
1300 *base_name = svn_relpath_basename(relpath, pool);
1304 svn_relpath_prefix(const char *relpath,
1306 apr_pool_t *result_pool)
1309 assert(relpath_is_canonical(relpath));
1311 if (max_components <= 0)
1314 for (end = relpath; *end; end++)
1318 if (!--max_components)
1323 return apr_pstrmemdup(result_pool, relpath, end-relpath);
1327 svn_uri_dirname(const char *uri, apr_pool_t *pool)
1329 apr_size_t len = strlen(uri);
1331 assert(svn_uri_is_canonical(uri, pool));
1333 if (svn_uri_is_root(uri, len))
1334 return apr_pstrmemdup(pool, uri, len);
1336 return apr_pstrmemdup(pool, uri, uri_previous_segment(uri, len));
1340 svn_uri_basename(const char *uri, apr_pool_t *pool)
1342 apr_size_t len = strlen(uri);
1345 assert(svn_uri_is_canonical(uri, NULL));
1347 if (svn_uri_is_root(uri, len))
1351 while (start > 0 && uri[start - 1] != '/')
1354 return svn_path_uri_decode(uri + start, pool);
1358 svn_uri_split(const char **dirpath,
1359 const char **base_name,
1363 assert(dirpath != base_name);
1366 *dirpath = svn_uri_dirname(uri, pool);
1369 *base_name = svn_uri_basename(uri, pool);
1373 svn_dirent_get_longest_ancestor(const char *dirent1,
1374 const char *dirent2,
1377 return apr_pstrndup(pool, dirent1,
1378 get_longest_ancestor_length(type_dirent, dirent1,
1383 svn_relpath_get_longest_ancestor(const char *relpath1,
1384 const char *relpath2,
1387 assert(relpath_is_canonical(relpath1));
1388 assert(relpath_is_canonical(relpath2));
1390 return apr_pstrndup(pool, relpath1,
1391 get_longest_ancestor_length(type_relpath, relpath1,
1396 svn_uri_get_longest_ancestor(const char *uri1,
1400 apr_size_t uri_ancestor_len;
1403 assert(svn_uri_is_canonical(uri1, NULL));
1404 assert(svn_uri_is_canonical(uri2, NULL));
1409 /* No shared protocol => no common prefix */
1410 if (uri1[i] != uri2[i])
1411 return apr_pmemdup(pool, SVN_EMPTY_PATH,
1412 sizeof(SVN_EMPTY_PATH));
1417 /* They're both URLs, so EOS can't come before ':' */
1418 assert((uri1[i] != '\0') && (uri2[i] != '\0'));
1423 i += 3; /* Advance past '://' */
1425 uri_ancestor_len = get_longest_ancestor_length(type_uri, uri1 + i,
1428 if (uri_ancestor_len == 0 ||
1429 (uri_ancestor_len == 1 && (uri1 + i)[0] == '/'))
1430 return apr_pmemdup(pool, SVN_EMPTY_PATH, sizeof(SVN_EMPTY_PATH));
1432 return apr_pstrndup(pool, uri1, uri_ancestor_len + i);
1436 svn_dirent_is_child(const char *parent_dirent,
1437 const char *child_dirent,
1440 return is_child(type_dirent, parent_dirent, child_dirent, pool);
1444 svn_dirent_skip_ancestor(const char *parent_dirent,
1445 const char *child_dirent)
1447 apr_size_t len = strlen(parent_dirent);
1448 apr_size_t root_len;
1450 if (0 != strncmp(parent_dirent, child_dirent, len))
1451 return NULL; /* parent_dirent is no ancestor of child_dirent */
1453 if (child_dirent[len] == 0)
1454 return ""; /* parent_dirent == child_dirent */
1456 /* Child == parent + more-characters */
1458 root_len = dirent_root_length(child_dirent, strlen(child_dirent));
1460 /* Different root, e.g. ("" "/...") or ("//z" "//z/share") */
1463 /* Now, child == [root-of-parent] + [rest-of-parent] + more-characters.
1464 * It must be one of the following forms.
1466 * rlen parent child bad? rlen=len? c[len]=/?
1475 * 2 "a:b" "a:b/foo" *
1476 * 3 "a:/" "a:/foo" *
1477 * 3 "a:/b" "a:/bad" !
1478 * 3 "a:/b" "a:/b/foo" *
1479 * 5 "//s/s" "//s/s/foo" * *
1480 * 5 "//s/s/b" "//s/s/bad" !
1481 * 5 "//s/s/b" "//s/s/b/foo" *
1484 if (child_dirent[len] == '/')
1485 /* "parent|child" is one of:
1486 * "[a:]b|/foo" "[a:]/b|/foo" "//s/s|/foo" "//s/s/b|/foo" */
1487 return child_dirent + len + 1;
1489 if (root_len == len)
1490 /* "parent|child" is "|foo" "/|foo" "a:|foo" "a:/|foo" "//s/s|/foo" */
1491 return child_dirent + len;
1497 svn_relpath_skip_ancestor(const char *parent_relpath,
1498 const char *child_relpath)
1500 apr_size_t len = strlen(parent_relpath);
1502 assert(relpath_is_canonical(parent_relpath));
1503 assert(relpath_is_canonical(child_relpath));
1506 return child_relpath;
1508 if (0 != strncmp(parent_relpath, child_relpath, len))
1509 return NULL; /* parent_relpath is no ancestor of child_relpath */
1511 if (child_relpath[len] == 0)
1512 return ""; /* parent_relpath == child_relpath */
1514 if (child_relpath[len] == '/')
1515 return child_relpath + len + 1;
1523 uri_skip_ancestor(const char *parent_uri,
1524 const char *child_uri)
1526 apr_size_t len = strlen(parent_uri);
1528 assert(svn_uri_is_canonical(parent_uri, NULL));
1529 assert(svn_uri_is_canonical(child_uri, NULL));
1531 if (0 != strncmp(parent_uri, child_uri, len))
1532 return NULL; /* parent_uri is no ancestor of child_uri */
1534 if (child_uri[len] == 0)
1535 return ""; /* parent_uri == child_uri */
1537 if (child_uri[len] == '/')
1538 return child_uri + len + 1;
1544 svn_uri_skip_ancestor(const char *parent_uri,
1545 const char *child_uri,
1546 apr_pool_t *result_pool)
1548 const char *result = uri_skip_ancestor(parent_uri, child_uri);
1550 return result ? svn_path_uri_decode(result, result_pool) : NULL;
1554 svn_dirent_is_ancestor(const char *parent_dirent, const char *child_dirent)
1556 return svn_dirent_skip_ancestor(parent_dirent, child_dirent) != NULL;
1560 svn_uri__is_ancestor(const char *parent_uri, const char *child_uri)
1562 return uri_skip_ancestor(parent_uri, child_uri) != NULL;
1567 svn_dirent_is_absolute(const char *dirent)
1572 /* dirent is absolute if it starts with '/' on non-Windows platforms
1573 or with '//' on Windows platforms */
1574 if (dirent[0] == '/'
1575 #ifdef SVN_USE_DOS_PATHS
1576 && dirent[1] == '/' /* Single '/' depends on current drive */
1581 /* On Windows, dirent is also absolute when it starts with 'H:/'
1582 where 'H' is any letter. */
1583 #ifdef SVN_USE_DOS_PATHS
1584 if (((dirent[0] >= 'A' && dirent[0] <= 'Z')) &&
1585 (dirent[1] == ':') && (dirent[2] == '/'))
1587 #endif /* SVN_USE_DOS_PATHS */
1593 svn_dirent_get_absolute(const char **pabsolute,
1594 const char *relative,
1598 apr_status_t apr_err;
1599 const char *path_apr;
1601 SVN_ERR_ASSERT(! svn_path_is_url(relative));
1603 /* Merge the current working directory with the relative dirent. */
1604 SVN_ERR(svn_path_cstring_from_utf8(&path_apr, relative, pool));
1606 apr_err = apr_filepath_merge(&buffer, NULL,
1608 APR_FILEPATH_NOTRELATIVE,
1612 /* In some cases when the passed path or its ancestor(s) do not exist
1613 or no longer exist apr returns an error.
1615 In many of these cases we would like to return a path anyway, when the
1616 passed path was already a safe absolute path. So check for that now to
1619 svn_dirent_is_absolute() doesn't perform the necessary checks to see
1620 if the path doesn't need post processing to be in the canonical absolute
1624 if (svn_dirent_is_absolute(relative)
1625 && svn_dirent_is_canonical(relative, pool)
1626 && !svn_path_is_backpath_present(relative))
1628 *pabsolute = apr_pstrdup(pool, relative);
1629 return SVN_NO_ERROR;
1632 return svn_error_createf(SVN_ERR_BAD_FILENAME,
1633 svn_error_create(apr_err, NULL, NULL),
1634 _("Couldn't determine absolute path of '%s'"),
1635 svn_dirent_local_style(relative, pool));
1638 SVN_ERR(svn_path_cstring_to_utf8(pabsolute, buffer, pool));
1639 *pabsolute = svn_dirent_canonicalize(*pabsolute, pool);
1640 return SVN_NO_ERROR;
1644 svn_uri_canonicalize(const char *uri, apr_pool_t *pool)
1646 return canonicalize(type_uri, uri, pool);
1650 svn_relpath_canonicalize(const char *relpath, apr_pool_t *pool)
1652 return canonicalize(type_relpath, relpath, pool);
1656 svn_dirent_canonicalize(const char *dirent, apr_pool_t *pool)
1658 const char *dst = canonicalize(type_dirent, dirent, pool);
1660 #ifdef SVN_USE_DOS_PATHS
1661 /* Handle a specific case on Windows where path == "X:/". Here we have to
1662 append the final '/', as svn_path_canonicalize will chop this of. */
1663 if (((dirent[0] >= 'A' && dirent[0] <= 'Z') ||
1664 (dirent[0] >= 'a' && dirent[0] <= 'z')) &&
1665 dirent[1] == ':' && dirent[2] == '/' &&
1668 char *dst_slash = apr_pcalloc(pool, 4);
1669 dst_slash[0] = canonicalize_to_upper(dirent[0]);
1672 dst_slash[3] = '\0';
1676 #endif /* SVN_USE_DOS_PATHS */
1682 svn_dirent_is_canonical(const char *dirent, apr_pool_t *scratch_pool)
1684 const char *ptr = dirent;
1688 #ifdef SVN_USE_DOS_PATHS
1689 /* Check for UNC paths */
1692 /* TODO: Scan hostname and sharename and fall back to part code */
1694 /* ### Fall back to old implementation */
1695 return (strcmp(dirent, svn_dirent_canonicalize(dirent, scratch_pool))
1698 #endif /* SVN_USE_DOS_PATHS */
1700 #ifdef SVN_USE_DOS_PATHS
1701 else if (((*ptr >= 'a' && *ptr <= 'z') || (*ptr >= 'A' && *ptr <= 'Z')) &&
1704 /* The only canonical drive names are "A:"..."Z:", no lower case */
1705 if (*ptr < 'A' || *ptr > 'Z')
1713 #endif /* SVN_USE_DOS_PATHS */
1715 return relpath_is_canonical(ptr);
1718 static svn_boolean_t
1719 relpath_is_canonical(const char *relpath)
1721 const char *dot_pos, *ptr = relpath;
1723 unsigned pattern = 0;
1725 /* RELPATH is canonical if it has:
1727 * - no start and closing '/'
1731 /* invalid beginnings */
1735 if (ptr[0] == '.' && (ptr[1] == '/' || ptr[1] == '\0'))
1738 /* valid special cases */
1743 /* invalid endings */
1744 if (ptr[len-1] == '/' || (ptr[len-1] == '.' && ptr[len-2] == '/'))
1747 /* '.' are rare. So, search for them globally. There will often be no
1748 * more than one hit. Also note that we already checked for invalid
1749 * starts and endings, i.e. we only need to check for "/./"
1751 for (dot_pos = memchr(ptr, '.', len);
1753 dot_pos = strchr(dot_pos+1, '.'))
1754 if (dot_pos > ptr && dot_pos[-1] == '/' && dot_pos[1] == '/')
1757 /* Now validate the rest of the path. */
1758 for (i = 0; i < len - 1; ++i)
1760 pattern = ((pattern & 0xff) << 8) + (unsigned char)ptr[i];
1761 if (pattern == 0x101 * (unsigned char)('/'))
1769 svn_relpath_is_canonical(const char *relpath)
1771 return relpath_is_canonical(relpath);
1775 svn_uri_is_canonical(const char *uri, apr_pool_t *scratch_pool)
1777 const char *ptr = uri, *seg = uri;
1778 const char *schema_data = NULL;
1780 /* URI is canonical if it has:
1781 * - lowercase URL scheme
1782 * - lowercase URL hostname
1786 * - uppercase hex-encoded pair digits ("%AB", not "%ab")
1792 if (! svn_path_is_url(uri))
1795 /* Skip the scheme. */
1796 while (*ptr && (*ptr != '/') && (*ptr != ':'))
1799 /* No scheme? No good. */
1800 if (! (*ptr == ':' && *(ptr+1) == '/' && *(ptr+2) == '/'))
1803 /* Found a scheme, check that it's all lowercase. */
1807 if (*ptr >= 'A' && *ptr <= 'Z')
1814 /* Scheme only? That works. */
1818 /* This might be the hostname */
1820 while (*ptr && (*ptr != '/') && (*ptr != '@'))
1826 /* Found a hostname, check that it's all lowercase. */
1833 || (*ptr >= '0' && *ptr <= '9')
1834 || (*ptr >= 'a' && *ptr <= 'f'))
1844 while (*ptr && *ptr != '/' && *ptr != ':')
1846 if (*ptr >= 'A' && *ptr <= 'Z')
1851 /* Found a portnumber */
1854 apr_int64_t port = 0;
1859 while (*ptr >= '0' && *ptr <= '9')
1861 port = 10 * port + (*ptr - '0');
1865 if (ptr == schema_data && (*ptr == '/' || *ptr == '\0'))
1866 return FALSE; /* Fail on "http://host:" */
1868 if (port == 80 && strncmp(uri, "http:", 5) == 0)
1870 else if (port == 443 && strncmp(uri, "https:", 6) == 0)
1872 else if (port == 3690 && strncmp(uri, "svn:", 4) == 0)
1875 while (*ptr && *ptr != '/')
1876 ++ptr; /* Allow "http://host:stuff" */
1881 #ifdef SVN_USE_DOS_PATHS
1882 if (schema_data && *ptr == '/')
1884 /* If this is a file url, ptr now points to the third '/' in
1885 file:///C:/path. Check that if we have such a URL the drive
1886 letter is in uppercase. */
1887 if (strncmp(uri, "file:", 5) == 0 &&
1888 ! (*(ptr+1) >= 'A' && *(ptr+1) <= 'Z') &&
1892 #endif /* SVN_USE_DOS_PATHS */
1894 /* Now validate the rest of the URI. */
1896 while (*ptr && (*ptr != '/'))
1900 apr_size_t seglen = ptr - seg;
1902 if (seglen == 1 && *seg == '.')
1903 return FALSE; /* /./ */
1905 if (*ptr == '/' && *(ptr+1) == '/')
1906 return FALSE; /* // */
1908 if (! *ptr && *(ptr - 1) == '/' && ptr - 1 != uri)
1909 return FALSE; /* foo/ */
1918 while (*ptr && (*ptr != '/'))
1931 /* Can't usesvn_ctype_isxdigit() because lower case letters are
1932 not in our canonical format */
1933 if (((*(ptr+1) < '0' || *(ptr+1) > '9'))
1934 && (*(ptr+1) < 'A' || *(ptr+1) > 'F'))
1936 else if (((*(ptr+2) < '0' || *(ptr+2) > '9'))
1937 && (*(ptr+2) < 'A' || *(ptr+2) > 'F'))
1940 digitz[0] = *(++ptr);
1941 digitz[1] = *(++ptr);
1943 val = (int)strtol(digitz, NULL, 16);
1945 if (svn_uri__char_validity[val])
1946 return FALSE; /* Should not have been escaped */
1948 else if (*ptr != '/' && !svn_uri__char_validity[(unsigned char)*ptr])
1949 return FALSE; /* Character should have been escaped */
1957 svn_dirent_condense_targets(const char **pcommon,
1958 apr_array_header_t **pcondensed_targets,
1959 const apr_array_header_t *targets,
1960 svn_boolean_t remove_redundancies,
1961 apr_pool_t *result_pool,
1962 apr_pool_t *scratch_pool)
1964 int i, num_condensed = targets->nelts;
1965 svn_boolean_t *removed;
1966 apr_array_header_t *abs_targets;
1968 /* Early exit when there's no data to work on. */
1969 if (targets->nelts <= 0)
1972 if (pcondensed_targets)
1973 *pcondensed_targets = NULL;
1974 return SVN_NO_ERROR;
1977 /* Get the absolute path of the first target. */
1978 SVN_ERR(svn_dirent_get_absolute(pcommon,
1979 APR_ARRAY_IDX(targets, 0, const char *),
1982 /* Early exit when there's only one dirent to work on. */
1983 if (targets->nelts == 1)
1985 *pcommon = apr_pstrdup(result_pool, *pcommon);
1986 if (pcondensed_targets)
1987 *pcondensed_targets = apr_array_make(result_pool, 0,
1988 sizeof(const char *));
1989 return SVN_NO_ERROR;
1992 /* Copy the targets array, but with absolute dirents instead of
1993 relative. Also, find the pcommon argument by finding what is
1994 common in all of the absolute dirents. NOTE: This is not as
1995 efficient as it could be. The calculation of the basedir could
1996 be done in the loop below, which would save some calls to
1997 svn_dirent_get_longest_ancestor. I decided to do it this way
1998 because I thought it would be simpler, since this way, we don't
1999 even do the loop if we don't need to condense the targets. */
2001 removed = apr_pcalloc(scratch_pool, (targets->nelts *
2002 sizeof(svn_boolean_t)));
2003 abs_targets = apr_array_make(scratch_pool, targets->nelts,
2004 sizeof(const char *));
2006 APR_ARRAY_PUSH(abs_targets, const char *) = *pcommon;
2008 for (i = 1; i < targets->nelts; ++i)
2010 const char *rel = APR_ARRAY_IDX(targets, i, const char *);
2011 const char *absolute;
2012 SVN_ERR(svn_dirent_get_absolute(&absolute, rel, scratch_pool));
2013 APR_ARRAY_PUSH(abs_targets, const char *) = absolute;
2014 *pcommon = svn_dirent_get_longest_ancestor(*pcommon, absolute,
2018 *pcommon = apr_pstrdup(result_pool, *pcommon);
2020 if (pcondensed_targets != NULL)
2024 if (remove_redundancies)
2026 /* Find the common part of each pair of targets. If
2027 common part is equal to one of the dirents, the other
2028 is a child of it, and can be removed. If a target is
2029 equal to *pcommon, it can also be removed. */
2031 /* First pass: when one non-removed target is a child of
2032 another non-removed target, remove the child. */
2033 for (i = 0; i < abs_targets->nelts; ++i)
2040 for (j = i + 1; j < abs_targets->nelts; ++j)
2042 const char *abs_targets_i;
2043 const char *abs_targets_j;
2044 const char *ancestor;
2049 abs_targets_i = APR_ARRAY_IDX(abs_targets, i, const char *);
2050 abs_targets_j = APR_ARRAY_IDX(abs_targets, j, const char *);
2052 ancestor = svn_dirent_get_longest_ancestor
2053 (abs_targets_i, abs_targets_j, scratch_pool);
2055 if (*ancestor == '\0')
2058 if (strcmp(ancestor, abs_targets_i) == 0)
2063 else if (strcmp(ancestor, abs_targets_j) == 0)
2071 /* Second pass: when a target is the same as *pcommon,
2072 remove the target. */
2073 for (i = 0; i < abs_targets->nelts; ++i)
2075 const char *abs_targets_i = APR_ARRAY_IDX(abs_targets, i,
2078 if ((strcmp(abs_targets_i, *pcommon) == 0) && (! removed[i]))
2086 /* Now create the return array, and copy the non-removed items */
2087 basedir_len = strlen(*pcommon);
2088 *pcondensed_targets = apr_array_make(result_pool, num_condensed,
2089 sizeof(const char *));
2091 for (i = 0; i < abs_targets->nelts; ++i)
2093 const char *rel_item = APR_ARRAY_IDX(abs_targets, i, const char *);
2095 /* Skip this if it's been removed. */
2099 /* If a common prefix was found, condensed_targets are given
2100 relative to that prefix. */
2101 if (basedir_len > 0)
2103 /* Only advance our pointer past a dirent separator if
2104 REL_ITEM isn't the same as *PCOMMON.
2106 If *PCOMMON is a root dirent, basedir_len will already
2107 include the closing '/', so never advance the pointer
2110 rel_item += basedir_len;
2112 ! svn_dirent_is_root(*pcommon, basedir_len))
2116 APR_ARRAY_PUSH(*pcondensed_targets, const char *)
2117 = apr_pstrdup(result_pool, rel_item);
2121 return SVN_NO_ERROR;
2125 svn_uri_condense_targets(const char **pcommon,
2126 apr_array_header_t **pcondensed_targets,
2127 const apr_array_header_t *targets,
2128 svn_boolean_t remove_redundancies,
2129 apr_pool_t *result_pool,
2130 apr_pool_t *scratch_pool)
2132 int i, num_condensed = targets->nelts;
2133 apr_array_header_t *uri_targets;
2134 svn_boolean_t *removed;
2136 /* Early exit when there's no data to work on. */
2137 if (targets->nelts <= 0)
2140 if (pcondensed_targets)
2141 *pcondensed_targets = NULL;
2142 return SVN_NO_ERROR;
2145 *pcommon = svn_uri_canonicalize(APR_ARRAY_IDX(targets, 0, const char *),
2148 /* Early exit when there's only one uri to work on. */
2149 if (targets->nelts == 1)
2151 *pcommon = apr_pstrdup(result_pool, *pcommon);
2152 if (pcondensed_targets)
2153 *pcondensed_targets = apr_array_make(result_pool, 0,
2154 sizeof(const char *));
2155 return SVN_NO_ERROR;
2158 /* Find the pcommon argument by finding what is common in all of the
2159 uris. NOTE: This is not as efficient as it could be. The calculation
2160 of the basedir could be done in the loop below, which would
2161 save some calls to svn_uri_get_longest_ancestor. I decided to do it
2162 this way because I thought it would be simpler, since this way, we don't
2163 even do the loop if we don't need to condense the targets. */
2165 removed = apr_pcalloc(scratch_pool, (targets->nelts *
2166 sizeof(svn_boolean_t)));
2167 uri_targets = apr_array_make(scratch_pool, targets->nelts,
2168 sizeof(const char *));
2170 APR_ARRAY_PUSH(uri_targets, const char *) = *pcommon;
2172 for (i = 1; i < targets->nelts; ++i)
2174 const char *uri = svn_uri_canonicalize(
2175 APR_ARRAY_IDX(targets, i, const char *),
2177 APR_ARRAY_PUSH(uri_targets, const char *) = uri;
2179 /* If the commonmost ancestor so far is empty, there's no point
2180 in continuing to search for a common ancestor at all. But
2181 we'll keep looping for the sake of canonicalizing the
2182 targets, I suppose. */
2183 if (**pcommon != '\0')
2184 *pcommon = svn_uri_get_longest_ancestor(*pcommon, uri,
2188 *pcommon = apr_pstrdup(result_pool, *pcommon);
2190 if (pcondensed_targets != NULL)
2194 if (remove_redundancies)
2196 /* Find the common part of each pair of targets. If
2197 common part is equal to one of the dirents, the other
2198 is a child of it, and can be removed. If a target is
2199 equal to *pcommon, it can also be removed. */
2201 /* First pass: when one non-removed target is a child of
2202 another non-removed target, remove the child. */
2203 for (i = 0; i < uri_targets->nelts; ++i)
2210 for (j = i + 1; j < uri_targets->nelts; ++j)
2214 const char *ancestor;
2219 uri_i = APR_ARRAY_IDX(uri_targets, i, const char *);
2220 uri_j = APR_ARRAY_IDX(uri_targets, j, const char *);
2222 ancestor = svn_uri_get_longest_ancestor(uri_i,
2226 if (*ancestor == '\0')
2229 if (strcmp(ancestor, uri_i) == 0)
2234 else if (strcmp(ancestor, uri_j) == 0)
2242 /* Second pass: when a target is the same as *pcommon,
2243 remove the target. */
2244 for (i = 0; i < uri_targets->nelts; ++i)
2246 const char *uri_targets_i = APR_ARRAY_IDX(uri_targets, i,
2249 if ((strcmp(uri_targets_i, *pcommon) == 0) && (! removed[i]))
2257 /* Now create the return array, and copy the non-removed items */
2258 basedir_len = strlen(*pcommon);
2259 *pcondensed_targets = apr_array_make(result_pool, num_condensed,
2260 sizeof(const char *));
2262 for (i = 0; i < uri_targets->nelts; ++i)
2264 const char *rel_item = APR_ARRAY_IDX(uri_targets, i, const char *);
2266 /* Skip this if it's been removed. */
2270 /* If a common prefix was found, condensed_targets are given
2271 relative to that prefix. */
2272 if (basedir_len > 0)
2274 /* Only advance our pointer past a dirent separator if
2275 REL_ITEM isn't the same as *PCOMMON.
2277 If *PCOMMON is a root dirent, basedir_len will already
2278 include the closing '/', so never advance the pointer
2281 rel_item += basedir_len;
2282 if ((rel_item[0] == '/') ||
2283 (rel_item[0] && !svn_uri_is_root(*pcommon, basedir_len)))
2289 APR_ARRAY_PUSH(*pcondensed_targets, const char *)
2290 = svn_path_uri_decode(rel_item, result_pool);
2294 return SVN_NO_ERROR;
2298 svn_dirent_is_under_root(svn_boolean_t *under_root,
2299 const char **result_path,
2300 const char *base_path,
2302 apr_pool_t *result_pool)
2304 apr_status_t status;
2307 *under_root = FALSE;
2309 *result_path = NULL;
2311 status = apr_filepath_merge(&full_path,
2314 APR_FILEPATH_NOTABOVEROOT
2315 | APR_FILEPATH_SECUREROOTTEST,
2318 if (status == APR_SUCCESS)
2321 *result_path = svn_dirent_canonicalize(full_path, result_pool);
2323 return SVN_NO_ERROR;
2325 else if (status == APR_EABOVEROOT)
2327 *under_root = FALSE;
2328 return SVN_NO_ERROR;
2331 return svn_error_wrap_apr(status, NULL);
2335 svn_uri_get_dirent_from_file_url(const char **dirent,
2339 const char *hostname, *path;
2341 SVN_ERR_ASSERT(svn_uri_is_canonical(url, pool));
2343 /* Verify that the URL is well-formed (loosely) */
2345 /* First, check for the "file://" prefix. */
2346 if (strncmp(url, "file://", 7) != 0)
2347 return svn_error_createf(SVN_ERR_RA_ILLEGAL_URL, NULL,
2348 _("Local URL '%s' does not contain 'file://' "
2351 /* Find the HOSTNAME portion and the PATH portion of the URL. The host
2352 name is between the "file://" prefix and the next occurrence of '/'. We
2353 are considering everything from that '/' until the end of the URL to be
2354 the absolute path portion of the URL.
2355 If we got just "file://", treat it the same as "file:///". */
2357 path = strchr(hostname, '/');
2359 hostname = apr_pstrmemdup(pool, hostname, path - hostname);
2363 /* URI-decode HOSTNAME, and set it to NULL if it is "" or "localhost". */
2364 if (*hostname == '\0')
2368 hostname = svn_path_uri_decode(hostname, pool);
2369 if (strcmp(hostname, "localhost") == 0)
2373 /* Duplicate the URL, starting at the top of the path.
2374 At the same time, we URI-decode the path. */
2375 #ifdef SVN_USE_DOS_PATHS
2376 /* On Windows, we'll typically have to skip the leading / if the
2377 path starts with a drive letter. Like most Web browsers, We
2378 support two variants of this scheme:
2383 Note that, at least on WinNT and above, file:////./X:/path will
2384 also work, so we must make sure the transformation doesn't break
2385 that, and file:///path (that looks within the current drive
2386 only) should also keep working.
2387 If we got a non-empty hostname other than localhost, we convert this
2388 into an UNC path. In this case, we obviously don't strip the slash
2389 even if the path looks like it starts with a drive letter.
2392 static const char valid_drive_letters[] =
2393 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz";
2394 /* Casting away const! */
2395 char *dup_path = (char *)svn_path_uri_decode(path, pool);
2397 /* This check assumes ':' and '|' are already decoded! */
2398 if (!hostname && dup_path[1] && strchr(valid_drive_letters, dup_path[1])
2399 && (dup_path[2] == ':' || dup_path[2] == '|'))
2401 /* Skip the leading slash. */
2404 if (dup_path[1] == '|')
2407 if (dup_path[2] == '/' || dup_path[2] == '\\' || dup_path[2] == '\0')
2409 /* Dirents have upper case drive letters in their canonical form */
2410 dup_path[0] = canonicalize_to_upper(dup_path[0]);
2412 if (dup_path[2] == '\0')
2414 /* A valid dirent for the driveroot must be like "C:/" instead of
2415 just "C:" or svn_dirent_join() will use the current directory
2416 on the drive instead */
2417 char *new_path = apr_pcalloc(pool, 4);
2418 new_path[0] = dup_path[0];
2422 dup_path = new_path;
2425 dup_path[2] = '/'; /* Ensure not relative for '\' after drive! */
2430 if (dup_path[0] == '/' && dup_path[1] == '\0')
2431 return svn_error_createf(SVN_ERR_RA_ILLEGAL_URL, NULL,
2432 _("Local URL '%s' contains only a hostname, "
2435 /* We still know that the path starts with a slash. */
2436 *dirent = apr_pstrcat(pool, "//", hostname, dup_path, SVN_VA_NULL);
2441 #else /* !SVN_USE_DOS_PATHS */
2442 /* Currently, the only hostnames we are allowing on non-Win32 platforms
2443 are the empty string and 'localhost'. */
2445 return svn_error_createf(SVN_ERR_RA_ILLEGAL_URL, NULL,
2446 _("Local URL '%s' contains unsupported hostname"),
2449 *dirent = svn_path_uri_decode(path, pool);
2450 #endif /* SVN_USE_DOS_PATHS */
2451 return SVN_NO_ERROR;
2455 svn_uri_get_file_url_from_dirent(const char **url,
2459 assert(svn_dirent_is_canonical(dirent, pool));
2461 SVN_ERR(svn_dirent_get_absolute(&dirent, dirent, pool));
2463 dirent = svn_path_uri_encode(dirent, pool);
2465 #ifndef SVN_USE_DOS_PATHS
2466 if (dirent[0] == '/' && dirent[1] == '\0')
2467 dirent = NULL; /* "file://" is the canonical form of "file:///" */
2469 *url = apr_pstrcat(pool, "file://", dirent, SVN_VA_NULL);
2471 if (dirent[0] == '/')
2473 /* Handle UNC paths //server/share -> file://server/share */
2474 assert(dirent[1] == '/'); /* Expect UNC, not non-absolute */
2476 *url = apr_pstrcat(pool, "file:", dirent, SVN_VA_NULL);
2480 char *uri = apr_pstrcat(pool, "file:///", dirent, SVN_VA_NULL);
2481 apr_size_t len = 8 /* strlen("file:///") */ + strlen(dirent);
2483 /* "C:/" is a canonical dirent on Windows,
2484 but "file:///C:/" is not a canonical uri */
2485 if (uri[len-1] == '/')
2492 return SVN_NO_ERROR;
2497 /* -------------- The fspath API (see private/svn_fspath.h) -------------- */
2500 svn_fspath__is_canonical(const char *fspath)
2502 return fspath[0] == '/' && relpath_is_canonical(fspath + 1);
2507 svn_fspath__canonicalize(const char *fspath,
2510 if ((fspath[0] == '/') && (fspath[1] == '\0'))
2513 return apr_pstrcat(pool, "/", svn_relpath_canonicalize(fspath, pool),
2519 svn_fspath__is_root(const char *fspath, apr_size_t len)
2521 /* directory is root if it's equal to '/' */
2522 return (len == 1 && fspath[0] == '/');
2527 svn_fspath__skip_ancestor(const char *parent_fspath,
2528 const char *child_fspath)
2530 assert(svn_fspath__is_canonical(parent_fspath));
2531 assert(svn_fspath__is_canonical(child_fspath));
2533 return svn_relpath_skip_ancestor(parent_fspath + 1, child_fspath + 1);
2538 svn_fspath__dirname(const char *fspath,
2541 assert(svn_fspath__is_canonical(fspath));
2543 if (fspath[0] == '/' && fspath[1] == '\0')
2544 return apr_pstrdup(pool, fspath);
2546 return apr_pstrcat(pool, "/", svn_relpath_dirname(fspath + 1, pool),
2552 svn_fspath__basename(const char *fspath,
2556 assert(svn_fspath__is_canonical(fspath));
2558 result = svn_relpath_basename(fspath + 1, pool);
2560 assert(strchr(result, '/') == NULL);
2565 svn_fspath__split(const char **dirpath,
2566 const char **base_name,
2568 apr_pool_t *result_pool)
2570 assert(dirpath != base_name);
2573 *dirpath = svn_fspath__dirname(fspath, result_pool);
2576 *base_name = svn_fspath__basename(fspath, result_pool);
2580 svn_fspath__join(const char *fspath,
2581 const char *relpath,
2582 apr_pool_t *result_pool)
2585 assert(svn_fspath__is_canonical(fspath));
2586 assert(svn_relpath_is_canonical(relpath));
2588 if (relpath[0] == '\0')
2589 result = apr_pstrdup(result_pool, fspath);
2590 else if (fspath[1] == '\0')
2591 result = apr_pstrcat(result_pool, "/", relpath, SVN_VA_NULL);
2593 result = apr_pstrcat(result_pool, fspath, "/", relpath, SVN_VA_NULL);
2595 assert(svn_fspath__is_canonical(result));
2600 svn_fspath__get_longest_ancestor(const char *fspath1,
2601 const char *fspath2,
2602 apr_pool_t *result_pool)
2605 assert(svn_fspath__is_canonical(fspath1));
2606 assert(svn_fspath__is_canonical(fspath2));
2608 result = apr_pstrcat(result_pool, "/",
2609 svn_relpath_get_longest_ancestor(fspath1 + 1,
2614 assert(svn_fspath__is_canonical(result));
2621 /* -------------- The urlpath API (see private/svn_fspath.h) ------------- */
2624 svn_urlpath__canonicalize(const char *uri,
2627 if (svn_path_is_url(uri))
2629 uri = svn_uri_canonicalize(uri, pool);
2633 uri = svn_fspath__canonicalize(uri, pool);
2634 /* Do a little dance to normalize hex encoding. */
2635 uri = svn_path_uri_decode(uri, pool);
2636 uri = svn_path_uri_encode(uri, pool);
2642 /* -------------- The cert API (see private/svn_cert.h) ------------- */
2645 svn_cert__match_dns_identity(svn_string_t *pattern, svn_string_t *hostname)
2647 apr_size_t pattern_pos = 0, hostname_pos = 0;
2649 /* support leading wildcards that composed of the only character in the
2650 * left-most label. */
2651 if (pattern->len >= 2 &&
2652 pattern->data[pattern_pos] == '*' &&
2653 pattern->data[pattern_pos + 1] == '.')
2655 while (hostname_pos < hostname->len &&
2656 hostname->data[hostname_pos] != '.')
2660 /* Assume that the wildcard must match something. Rule 2 says
2661 * that *.example.com should not match example.com. If the wildcard
2662 * ends up not matching anything then it matches .example.com which
2663 * seems to be essentially the same as just example.com */
2664 if (hostname_pos == 0)
2670 while (pattern_pos < pattern->len && hostname_pos < hostname->len)
2672 char pattern_c = pattern->data[pattern_pos];
2673 char hostname_c = hostname->data[hostname_pos];
2675 /* fold case as described in RFC 4343.
2676 * Note: We actually convert to lowercase, since our URI
2677 * canonicalization code converts to lowercase and generally
2678 * most certs are issued with lowercase DNS names, meaning
2679 * this avoids the fold operation in most cases. The RFC
2680 * suggests the opposite transformation, but doesn't require
2681 * any specific implementation in any case. It is critical
2682 * that this folding be locale independent so you can't use
2684 pattern_c = canonicalize_to_lower(pattern_c);
2685 hostname_c = canonicalize_to_lower(hostname_c);
2687 if (pattern_c != hostname_c)
2694 /* characters match so skip both */
2700 /* ignore a trailing period on the hostname since this has no effect on the
2701 * security of the matching. See the following for the long explanation as
2703 * https://bugzilla.mozilla.org/show_bug.cgi?id=134402#c28
2705 if (pattern_pos == pattern->len &&
2706 hostname_pos == hostname->len - 1 &&
2707 hostname->data[hostname_pos] == '.')
2710 if (pattern_pos != pattern->len || hostname_pos != hostname->len)
2712 /* end didn't match */