2 * dirent_uri.c: a library to manipulate URIs and directory entries.
4 * ====================================================================
5 * Licensed to the Apache Software Foundation (ASF) under one
6 * or more contributor license agreements. See the NOTICE file
7 * distributed with this work for additional information
8 * regarding copyright ownership. The ASF licenses this file
9 * to you under the Apache License, Version 2.0 (the
10 * "License"); you may not use this file except in compliance
11 * with the License. You may obtain a copy of the License at
13 * http://www.apache.org/licenses/LICENSE-2.0
15 * Unless required by applicable law or agreed to in writing,
16 * software distributed under the License is distributed on an
17 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
18 * KIND, either express or implied. See the License for the
19 * specific language governing permissions and limitations
21 * ====================================================================
33 #include "svn_private_config.h"
34 #include "svn_string.h"
35 #include "svn_dirent_uri.h"
37 #include "svn_ctype.h"
39 #include "dirent_uri.h"
40 #include "private/svn_fspath.h"
41 #include "private/svn_cert.h"
43 /* The canonical empty path. Can this be changed? Well, change the empty
44 test below and the path library will work, not so sure about the fs/wc
46 #define SVN_EMPTY_PATH ""
48 /* TRUE if s is the canonical empty path, FALSE otherwise */
49 #define SVN_PATH_IS_EMPTY(s) ((s)[0] == '\0')
51 /* TRUE if s,n is the platform's empty path ("."), FALSE otherwise. Can
52 this be changed? Well, the path library will work, not so sure about
54 #define SVN_PATH_IS_PLATFORM_EMPTY(s,n) ((n) == 1 && (s)[0] == '.')
56 /* This check must match the check on top of dirent_uri-tests.c and
58 #if defined(WIN32) || defined(__CYGWIN__) || defined(__OS2__)
59 #define SVN_USE_DOS_PATHS
62 /* Path type definition. Used only by internal functions. */
63 typedef enum path_type_t {
70 /**** Forward declarations *****/
73 relpath_is_canonical(const char *relpath);
76 /**** Internal implementation functions *****/
78 /* Return an internal-style new path based on PATH, allocated in POOL.
80 * "Internal-style" means that separators are all '/'.
83 internal_style(const char *path, apr_pool_t *pool)
85 #if '/' != SVN_PATH_LOCAL_SEPARATOR
87 char *p = apr_pstrdup(pool, path);
90 /* Convert all local-style separators to the canonical ones. */
91 for (; *p != '\0'; ++p)
92 if (*p == SVN_PATH_LOCAL_SEPARATOR)
100 /* Locale insensitive tolower() for converting parts of dirents and urls
101 while canonicalizing */
103 canonicalize_to_lower(char c)
105 if (c < 'A' || c > 'Z')
108 return (char)(c - 'A' + 'a');
111 /* Locale insensitive toupper() for converting parts of dirents and urls
112 while canonicalizing */
114 canonicalize_to_upper(char c)
116 if (c < 'a' || c > 'z')
119 return (char)(c - 'a' + 'A');
122 /* Calculates the length of the dirent absolute or non absolute root in
123 DIRENT, return 0 if dirent is not rooted */
125 dirent_root_length(const char *dirent, apr_size_t len)
127 #ifdef SVN_USE_DOS_PATHS
128 if (len >= 2 && dirent[1] == ':' &&
129 ((dirent[0] >= 'A' && dirent[0] <= 'Z') ||
130 (dirent[0] >= 'a' && dirent[0] <= 'z')))
132 return (len > 2 && dirent[2] == '/') ? 3 : 2;
135 if (len > 2 && dirent[0] == '/' && dirent[1] == '/')
139 while (i < len && dirent[i] != '/')
143 return len; /* Cygwin drive alias, invalid path on WIN32 */
147 while (i < len && dirent[i] != '/')
152 #endif /* SVN_USE_DOS_PATHS */
153 if (len >= 1 && dirent[0] == '/')
160 /* Return the length of substring necessary to encompass the entire
161 * previous dirent segment in DIRENT, which should be a LEN byte string.
163 * A trailing slash will not be included in the returned length except
164 * in the case in which DIRENT is absolute and there are no more
168 dirent_previous_segment(const char *dirent,
175 while (len > 0 && dirent[len] != '/'
176 #ifdef SVN_USE_DOS_PATHS
177 && (dirent[len] != ':' || len != 1)
178 #endif /* SVN_USE_DOS_PATHS */
182 /* check if the remaining segment including trailing '/' is a root dirent */
183 if (dirent_root_length(dirent, len+1) == len + 1)
189 /* Calculates the length occupied by the schema defined root of URI */
191 uri_schema_root_length(const char *uri, apr_size_t len)
195 for (i = 0; i < len; i++)
199 if (i > 0 && uri[i-1] == ':' && i < len-1 && uri[i+1] == '/')
201 /* We have an absolute uri */
202 if (i == 5 && strncmp("file", uri, 4) == 0)
203 return 7; /* file:// */
206 for (i += 2; i < len; i++)
210 return len; /* Only a hostname is found */
221 /* Returns TRUE if svn_dirent_is_absolute(dirent) or when dirent has
222 a non absolute root. (E.g. '/' or 'F:' on Windows) */
224 dirent_is_rooted(const char *dirent)
229 /* Root on all systems */
230 if (dirent[0] == '/')
233 /* On Windows, dirent is also absolute when it starts with 'H:' or 'H:/'
234 where 'H' is any letter. */
235 #ifdef SVN_USE_DOS_PATHS
236 if (((dirent[0] >= 'A' && dirent[0] <= 'Z') ||
237 (dirent[0] >= 'a' && dirent[0] <= 'z')) &&
240 #endif /* SVN_USE_DOS_PATHS */
245 /* Return the length of substring necessary to encompass the entire
246 * previous relpath segment in RELPATH, which should be a LEN byte string.
248 * A trailing slash will not be included in the returned length.
251 relpath_previous_segment(const char *relpath,
258 while (len > 0 && relpath[len] != '/')
264 /* Return the length of substring necessary to encompass the entire
265 * previous uri segment in URI, which should be a LEN byte string.
267 * A trailing slash will not be included in the returned length except
268 * in the case in which URI is absolute and there are no more
272 uri_previous_segment(const char *uri,
275 apr_size_t root_length;
280 root_length = uri_schema_root_length(uri, len);
283 while (len > root_length && uri[i] != '/')
286 if (i == 0 && len > 1 && *uri == '/')
292 /* Return the canonicalized version of PATH, of type TYPE, allocated in
296 canonicalize(path_type_t type, const char *path, apr_pool_t *pool)
301 apr_size_t schemelen = 0;
302 apr_size_t canon_segments = 0;
303 svn_boolean_t url = FALSE;
304 char *schema_data = NULL;
306 /* "" is already canonical, so just return it; note that later code
307 depends on path not being zero-length. */
308 if (SVN_PATH_IS_EMPTY(path))
310 assert(type != type_uri);
314 dst = canon = apr_pcalloc(pool, strlen(path) + 1);
316 /* If this is supposed to be an URI, it should start with
317 "scheme://". We'll copy the scheme, host name, etc. to DST and
320 if (type == type_uri)
324 while (*src && (*src != '/') && (*src != ':'))
327 if (*src == ':' && *(src+1) == '/' && *(src+2) == '/')
333 /* Found a scheme, convert to lowercase and copy to dst. */
337 *(dst++) = canonicalize_to_lower((*src++));
346 /* This might be the hostname */
348 while (*src && (*src != '/') && (*src != '@'))
353 /* Copy the username & password. */
354 seglen = src - seg + 1;
355 memcpy(dst, seg, seglen);
362 /* Found a hostname, convert to lowercase and copy to dst. */
365 *(dst++) = *(src++); /* Copy '[' */
368 || (*src >= '0' && (*src <= '9'))
369 || (*src >= 'a' && (*src <= 'f'))
370 || (*src >= 'A' && (*src <= 'F')))
372 *(dst++) = canonicalize_to_lower((*src++));
376 *(dst++) = *(src++); /* Copy ']' */
379 while (*src && (*src != '/') && (*src != ':'))
380 *(dst++) = canonicalize_to_lower((*src++));
384 /* We probably have a port number: Is it a default portnumber
385 which doesn't belong in a canonical url? */
386 if (src[1] == '8' && src[2] == '0'
387 && (src[3]== '/'|| !src[3])
388 && !strncmp(canon, "http:", 5))
392 else if (src[1] == '4' && src[2] == '4' && src[3] == '3'
393 && (src[4]== '/'|| !src[4])
394 && !strncmp(canon, "https:", 6))
398 else if (src[1] == '3' && src[2] == '6'
399 && src[3] == '9' && src[4] == '0'
400 && (src[5]== '/'|| !src[5])
401 && !strncmp(canon, "svn:", 4))
405 else if (src[1] == '/' || !src[1])
410 while (*src && (*src != '/'))
411 *(dst++) = canonicalize_to_lower((*src++));
414 /* Copy trailing slash, or null-terminator. */
417 /* Move src and dst forward only if we are not
418 * at null-terminator yet. */
430 /* Copy to DST any separator or drive letter that must come before the
431 first regular path segment. */
432 if (! url && type != type_relpath)
435 /* If this is an absolute path, then just copy over the initial
436 separator character. */
441 #ifdef SVN_USE_DOS_PATHS
442 /* On Windows permit two leading separator characters which means an
444 if ((type == type_dirent) && *src == '/')
446 #endif /* SVN_USE_DOS_PATHS */
448 #ifdef SVN_USE_DOS_PATHS
449 /* On Windows the first segment can be a drive letter, which we normalize
451 else if (type == type_dirent &&
452 ((*src >= 'a' && *src <= 'z') ||
453 (*src >= 'A' && *src <= 'Z')) &&
456 *(dst++) = canonicalize_to_upper(*(src++));
457 /* Leave the ':' to be processed as (or as part of) a path segment
458 by the following code block, so we need not care whether it has
461 #endif /* SVN_USE_DOS_PATHS */
466 /* Parse each segment, finding the closing '/' (which might look
467 like '%2F' for URIs). */
468 const char *next = src;
469 apr_size_t slash_len = 0;
473 && (! (type == type_uri && next[0] == '%' && next[1] == '2' &&
474 canonicalize_to_upper(next[2]) == 'F')))
479 /* Record how long our "slash" is. */
482 else if (type == type_uri && next[0] == '%')
488 || (seglen == 1 && src[0] == '.')
489 || (type == type_uri && seglen == 3 && src[0] == '%' && src[1] == '2'
490 && canonicalize_to_upper(src[2]) == 'E'))
492 /* Empty or noop segment, so do nothing. (For URIs, '%2E'
493 is equivalent to '.'). */
495 #ifdef SVN_USE_DOS_PATHS
496 /* If this is the first path segment of a file:// URI and it contains a
497 windows drive letter, convert the drive letter to upper case. */
498 else if (url && canon_segments == 1 && seglen == 2 &&
499 (strncmp(canon, "file:", 5) == 0) &&
500 src[0] >= 'a' && src[0] <= 'z' && src[1] == ':')
502 *(dst++) = canonicalize_to_upper(src[0]);
508 #endif /* SVN_USE_DOS_PATHS */
511 /* An actual segment, append it to the destination path */
512 memcpy(dst, src, seglen);
519 /* Skip over trailing slash to the next segment. */
520 src = next + slash_len;
523 /* Remove the trailing slash if there was at least one
524 * canonical segment and the last segment ends with a slash.
526 * But keep in mind that, for URLs, the scheme counts as a
527 * canonical segment -- so if path is ONLY a scheme (such
528 * as "https://") we should NOT remove the trailing slash. */
529 if ((canon_segments > 0 && *(dst - 1) == '/')
530 && ! (url && path[schemelen] == '\0'))
537 #ifdef SVN_USE_DOS_PATHS
538 /* Skip leading double slashes when there are less than 2
539 * canon segments. UNC paths *MUST* have two segments. */
540 if ((type == type_dirent) && canon[0] == '/' && canon[1] == '/')
542 if (canon_segments < 2)
546 /* Now we're sure this is a valid UNC path, convert the server name
547 (the first path segment) to lowercase as Windows treats it as case
549 Note: normally the share name is treated as case insensitive too,
550 but it seems to be possible to configure Samba to treat those as
551 case sensitive, so better leave that alone. */
552 for (dst = canon + 2; *dst && *dst != '/'; dst++)
553 *dst = canonicalize_to_lower(*dst);
556 #endif /* SVN_USE_DOS_PATHS */
558 /* Check the normalization of characters in a uri */
571 if (!svn_ctype_isxdigit(*(src+1)) ||
572 !svn_ctype_isxdigit(*(src+2)))
578 if (!svn_uri__char_validity[(unsigned char)*src])
587 apr_size_t pre_schema_size = (apr_size_t)(schema_data - canon);
589 dst = apr_palloc(pool, (apr_size_t)(src - canon) + need_extra + 1);
590 memcpy(dst, canon, pre_schema_size);
593 dst += pre_schema_size;
608 if (!svn_ctype_isxdigit(*(src+1)) ||
609 !svn_ctype_isxdigit(*(src+2)))
620 digitz[0] = *(++src);
621 digitz[1] = *(++src);
624 val = (int)strtol(digitz, NULL, 16);
626 if (svn_uri__char_validity[(unsigned char)val])
627 *(dst++) = (char)val;
631 *(dst++) = canonicalize_to_upper(digitz[0]);
632 *(dst++) = canonicalize_to_upper(digitz[1]);
637 if (!svn_uri__char_validity[(unsigned char)*src])
639 apr_snprintf(dst, 4, "%%%02X", (unsigned char)*src);
654 /* Return the string length of the longest common ancestor of PATH1 and PATH2.
655 * Pass type_uri for TYPE if PATH1 and PATH2 are URIs, and type_dirent if
656 * PATH1 and PATH2 are regular paths.
658 * If the two paths do not share a common ancestor, return 0.
660 * New strings are allocated in POOL.
663 get_longest_ancestor_length(path_type_t types,
668 apr_size_t path1_len, path2_len;
670 apr_size_t last_dirsep = 0;
671 #ifdef SVN_USE_DOS_PATHS
672 svn_boolean_t unc = FALSE;
675 path1_len = strlen(path1);
676 path2_len = strlen(path2);
678 if (SVN_PATH_IS_EMPTY(path1) || SVN_PATH_IS_EMPTY(path2))
681 while (path1[i] == path2[i])
683 /* Keep track of the last directory separator we hit. */
689 /* If we get to the end of either path, break out. */
690 if ((i == path1_len) || (i == path2_len))
694 /* two special cases:
695 1. '/' is the longest common ancestor of '/' and '/foo' */
696 if (i == 1 && path1[0] == '/' && path2[0] == '/')
698 /* 2. '' is the longest common ancestor of any non-matching
699 * strings 'foo' and 'bar' */
700 if (types == type_dirent && i == 0)
703 /* Handle some windows specific cases */
704 #ifdef SVN_USE_DOS_PATHS
705 if (types == type_dirent)
707 /* don't count the '//' from UNC paths */
708 if (last_dirsep == 1 && path1[0] == '/' && path1[1] == '/')
715 if (i == 3 && path1[2] == '/' && path1[1] == ':')
718 /* Cannot use SVN_ERR_ASSERT here, so we'll have to crash, sorry.
719 * Note that this assertion triggers only if the code above has
720 * been broken. The code below relies on this assertion, because
721 * it uses [i - 1] as index. */
725 if ((path1[i - 1] == ':' && path2[i] == '/') ||
726 (path2[i - 1] == ':' && path1[i] == '/'))
729 if (path1[i - 1] == ':' || path2[i - 1] == ':')
732 #endif /* SVN_USE_DOS_PATHS */
734 /* last_dirsep is now the offset of the last directory separator we
735 crossed before reaching a non-matching byte. i is the offset of
736 that non-matching byte, and is guaranteed to be <= the length of
737 whichever path is shorter.
738 If one of the paths is the common part return that. */
739 if (((i == path1_len) && (path2[i] == '/'))
740 || ((i == path2_len) && (path1[i] == '/'))
741 || ((i == path1_len) && (i == path2_len)))
745 /* Nothing in common but the root folder '/' or 'X:/' for Windows
747 #ifdef SVN_USE_DOS_PATHS
750 /* X:/foo and X:/bar returns X:/ */
751 if ((types == type_dirent) &&
752 last_dirsep == 2 && path1[1] == ':' && path1[2] == '/'
753 && path2[1] == ':' && path2[2] == '/')
755 #endif /* SVN_USE_DOS_PATHS */
756 if (last_dirsep == 0 && path1[0] == '/' && path2[0] == '/')
758 #ifdef SVN_USE_DOS_PATHS
766 /* Determine whether PATH2 is a child of PATH1.
768 * PATH2 is a child of PATH1 if
769 * 1) PATH1 is empty, and PATH2 is not empty and not an absolute path.
771 * 2) PATH2 is has n components, PATH1 has x < n components,
772 * and PATH1 matches PATH2 in all its x components.
773 * Components are separated by a slash, '/'.
775 * Pass type_uri for TYPE if PATH1 and PATH2 are URIs, and type_dirent if
776 * PATH1 and PATH2 are regular paths.
778 * If PATH2 is not a child of PATH1, return NULL.
780 * If PATH2 is a child of PATH1, and POOL is not NULL, allocate a copy
781 * of the child part of PATH2 in POOL and return a pointer to the
782 * newly allocated child part.
784 * If PATH2 is a child of PATH1, and POOL is NULL, return a pointer
785 * pointing to the child part of PATH2.
788 is_child(path_type_t type, const char *path1, const char *path2,
793 /* Allow "" and "foo" or "H:foo" to be parent/child */
794 if (SVN_PATH_IS_EMPTY(path1)) /* "" is the parent */
796 if (SVN_PATH_IS_EMPTY(path2)) /* "" not a child */
799 /* check if this is an absolute path */
800 if ((type == type_uri) ||
801 (type == type_dirent && dirent_is_rooted(path2)))
804 /* everything else is child */
805 return pool ? apr_pstrdup(pool, path2) : path2;
808 /* Reach the end of at least one of the paths. How should we handle
809 things like path1:"foo///bar" and path2:"foo/bar/baz"? It doesn't
810 appear to arise in the current Subversion code, it's not clear to me
811 if they should be parent/child or not. */
812 /* Hmmm... aren't paths assumed to be canonical in this function?
813 * How can "foo///bar" even happen if the paths are canonical? */
814 for (i = 0; path1[i] && path2[i]; i++)
815 if (path1[i] != path2[i])
818 /* FIXME: This comment does not really match
819 * the checks made in the code it refers to: */
820 /* There are two cases that are parent/child
822 .../foo path2[i] == '/'
827 Other root paths (like X:/) fall under the former case:
829 X:/foo path2[i] != '/'
831 Check for '//' to avoid matching '/' and '//srv'.
833 if (path1[i] == '\0' && path2[i])
835 if (path1[i - 1] == '/'
836 #ifdef SVN_USE_DOS_PATHS
837 || ((type == type_dirent) && path1[i - 1] == ':')
850 return pool ? apr_pstrdup(pool, path2 + i) : path2 + i;
852 else if (path2[i] == '/')
858 return pool ? apr_pstrdup(pool, path2 + i + 1) : path2 + i + 1;
867 /* Otherwise, path2 isn't a child. */
872 /**** Public API functions ****/
875 svn_dirent_internal_style(const char *dirent, apr_pool_t *pool)
877 return svn_dirent_canonicalize(internal_style(dirent, pool), pool);
881 svn_dirent_local_style(const char *dirent, apr_pool_t *pool)
883 /* Internally, Subversion represents the current directory with the
884 empty string. But users like to see "." . */
885 if (SVN_PATH_IS_EMPTY(dirent))
888 #if '/' != SVN_PATH_LOCAL_SEPARATOR
890 char *p = apr_pstrdup(pool, dirent);
893 /* Convert all canonical separators to the local-style ones. */
894 for (; *p != '\0'; ++p)
896 *p = SVN_PATH_LOCAL_SEPARATOR;
904 svn_relpath__internal_style(const char *relpath,
907 return svn_relpath_canonicalize(internal_style(relpath, pool), pool);
911 /* We decided against using apr_filepath_root here because of the negative
912 performance impact (creating a pool and converting strings ). */
914 svn_dirent_is_root(const char *dirent, apr_size_t len)
916 #ifdef SVN_USE_DOS_PATHS
917 /* On Windows and Cygwin, 'H:' or 'H:/' (where 'H' is any letter)
918 are also root directories */
919 if ((len == 2 || ((len == 3) && (dirent[2] == '/'))) &&
920 (dirent[1] == ':') &&
921 ((dirent[0] >= 'A' && dirent[0] <= 'Z') ||
922 (dirent[0] >= 'a' && dirent[0] <= 'z')))
925 /* On Windows and Cygwin //server/share is a root directory,
926 and on Cygwin //drive is a drive alias */
927 if (len >= 2 && dirent[0] == '/' && dirent[1] == '/'
928 && dirent[len - 1] != '/')
932 for (i = len; i >= 2; i--)
934 if (dirent[i] == '/')
942 return (segments <= 1);
944 return (segments == 1); /* //drive is invalid on plain Windows */
949 /* directory is root if it's equal to '/' */
950 if (len == 1 && dirent[0] == '/')
957 svn_uri_is_root(const char *uri, apr_size_t len)
959 assert(svn_uri_is_canonical(uri, NULL));
960 return (len == uri_schema_root_length(uri, len));
963 char *svn_dirent_join(const char *base,
964 const char *component,
967 apr_size_t blen = strlen(base);
968 apr_size_t clen = strlen(component);
972 assert(svn_dirent_is_canonical(base, pool));
973 assert(svn_dirent_is_canonical(component, pool));
975 /* If the component is absolute, then return it. */
976 if (svn_dirent_is_absolute(component))
977 return apr_pmemdup(pool, component, clen + 1);
979 /* If either is empty return the other */
980 if (SVN_PATH_IS_EMPTY(base))
981 return apr_pmemdup(pool, component, clen + 1);
982 if (SVN_PATH_IS_EMPTY(component))
983 return apr_pmemdup(pool, base, blen + 1);
985 #ifdef SVN_USE_DOS_PATHS
986 if (component[0] == '/')
988 /* '/' is drive relative on Windows, not absolute like on Posix */
989 if (dirent_is_rooted(base))
991 /* Join component without '/' to root-of(base) */
992 blen = dirent_root_length(base, blen);
996 if (blen == 2 && base[1] == ':') /* "C:" case */
998 char *root = apr_pmemdup(pool, base, 3);
999 root[2] = '/'; /* We don't need the final '\0' */
1006 return apr_pstrndup(pool, base, blen);
1009 return apr_pmemdup(pool, component, clen + 1);
1011 else if (dirent_is_rooted(component))
1012 return apr_pmemdup(pool, component, clen + 1);
1013 #endif /* SVN_USE_DOS_PATHS */
1015 /* if last character of base is already a separator, don't add a '/' */
1017 if (base[blen - 1] == '/'
1018 #ifdef SVN_USE_DOS_PATHS
1019 || base[blen - 1] == ':'
1024 /* Construct the new, combined dirent. */
1025 dirent = apr_palloc(pool, blen + add_separator + clen + 1);
1026 memcpy(dirent, base, blen);
1029 memcpy(dirent + blen + add_separator, component, clen + 1);
1034 char *svn_dirent_join_many(apr_pool_t *pool, const char *base, ...)
1036 #define MAX_SAVED_LENGTHS 10
1037 apr_size_t saved_lengths[MAX_SAVED_LENGTHS];
1038 apr_size_t total_len;
1048 total_len = strlen(base);
1050 assert(svn_dirent_is_canonical(base, pool));
1052 /* if last character of base is already a separator, don't add a '/' */
1055 || base[total_len - 1] == '/'
1056 #ifdef SVN_USE_DOS_PATHS
1057 || base[total_len - 1] == ':'
1062 saved_lengths[0] = total_len;
1064 /* Compute the length of the resulting string. */
1068 while ((s = va_arg(va, const char *)) != NULL)
1072 assert(svn_dirent_is_canonical(s, pool));
1074 if (SVN_PATH_IS_EMPTY(s))
1077 if (nargs++ < MAX_SAVED_LENGTHS)
1078 saved_lengths[nargs] = len;
1080 if (dirent_is_rooted(s))
1085 #ifdef SVN_USE_DOS_PATHS
1086 if (!svn_dirent_is_absolute(s)) /* Handle non absolute roots */
1088 /* Set new base and skip the current argument */
1089 base = s = svn_dirent_join(base, s, pool);
1091 saved_lengths[0] = total_len = len = strlen(s);
1094 #endif /* SVN_USE_DOS_PATHS */
1096 base = ""; /* Don't add base */
1097 saved_lengths[0] = 0;
1101 if (s[len - 1] == '/'
1102 #ifdef SVN_USE_DOS_PATHS
1103 || s[len - 1] == ':'
1108 else if (nargs <= base_arg + 1)
1110 total_len += add_separator + len;
1114 total_len += 1 + len;
1119 /* base == "/" and no further components. just return that. */
1120 if (add_separator == 0 && total_len == 1)
1121 return apr_pmemdup(pool, "/", 2);
1123 /* we got the total size. allocate it, with room for a NULL character. */
1124 dirent = p = apr_palloc(pool, total_len + 1);
1126 /* if we aren't supposed to skip forward to an absolute component, and if
1127 this is not an empty base that we are skipping, then copy the base
1129 if (! SVN_PATH_IS_EMPTY(base))
1131 memcpy(p, base, len = saved_lengths[0]);
1137 while ((s = va_arg(va, const char *)) != NULL)
1139 if (SVN_PATH_IS_EMPTY(s))
1142 if (++nargs < base_arg)
1145 if (nargs < MAX_SAVED_LENGTHS)
1146 len = saved_lengths[nargs];
1150 /* insert a separator if we aren't copying in the first component
1151 (which can happen when base_arg is set). also, don't put in a slash
1152 if the prior character is a slash (occurs when prior component
1155 ( ! (nargs - 1 <= base_arg) || add_separator))
1158 /* copy the new component and advance the pointer */
1165 assert((apr_size_t)(p - dirent) == total_len);
1171 svn_relpath_join(const char *base,
1172 const char *component,
1175 apr_size_t blen = strlen(base);
1176 apr_size_t clen = strlen(component);
1179 assert(relpath_is_canonical(base));
1180 assert(relpath_is_canonical(component));
1182 /* If either is empty return the other */
1184 return apr_pmemdup(pool, component, clen + 1);
1186 return apr_pmemdup(pool, base, blen + 1);
1188 path = apr_palloc(pool, blen + 1 + clen + 1);
1189 memcpy(path, base, blen);
1191 memcpy(path + blen + 1, component, clen + 1);
1197 svn_dirent_dirname(const char *dirent, apr_pool_t *pool)
1199 apr_size_t len = strlen(dirent);
1201 assert(svn_dirent_is_canonical(dirent, pool));
1203 if (len == dirent_root_length(dirent, len))
1204 return apr_pstrmemdup(pool, dirent, len);
1206 return apr_pstrmemdup(pool, dirent, dirent_previous_segment(dirent, len));
1210 svn_dirent_basename(const char *dirent, apr_pool_t *pool)
1212 apr_size_t len = strlen(dirent);
1215 assert(!pool || svn_dirent_is_canonical(dirent, pool));
1217 if (svn_dirent_is_root(dirent, len))
1222 while (start > 0 && dirent[start - 1] != '/'
1223 #ifdef SVN_USE_DOS_PATHS
1224 && dirent[start - 1] != ':'
1231 return apr_pstrmemdup(pool, dirent + start, len - start);
1233 return dirent + start;
1237 svn_dirent_split(const char **dirpath,
1238 const char **base_name,
1242 assert(dirpath != base_name);
1245 *dirpath = svn_dirent_dirname(dirent, pool);
1248 *base_name = svn_dirent_basename(dirent, pool);
1252 svn_relpath_dirname(const char *relpath,
1255 apr_size_t len = strlen(relpath);
1257 assert(relpath_is_canonical(relpath));
1259 return apr_pstrmemdup(pool, relpath,
1260 relpath_previous_segment(relpath, len));
1264 svn_relpath_basename(const char *relpath,
1267 apr_size_t len = strlen(relpath);
1270 assert(relpath_is_canonical(relpath));
1273 while (start > 0 && relpath[start - 1] != '/')
1277 return apr_pstrmemdup(pool, relpath + start, len - start);
1279 return relpath + start;
1283 svn_relpath_split(const char **dirpath,
1284 const char **base_name,
1285 const char *relpath,
1288 assert(dirpath != base_name);
1291 *dirpath = svn_relpath_dirname(relpath, pool);
1294 *base_name = svn_relpath_basename(relpath, pool);
1298 svn_relpath_prefix(const char *relpath,
1300 apr_pool_t *result_pool)
1303 assert(relpath_is_canonical(relpath));
1305 if (max_components <= 0)
1308 for (end = relpath; *end; end++)
1312 if (!--max_components)
1317 return apr_pstrmemdup(result_pool, relpath, end-relpath);
1321 svn_uri_dirname(const char *uri, apr_pool_t *pool)
1323 apr_size_t len = strlen(uri);
1325 assert(svn_uri_is_canonical(uri, pool));
1327 if (svn_uri_is_root(uri, len))
1328 return apr_pstrmemdup(pool, uri, len);
1330 return apr_pstrmemdup(pool, uri, uri_previous_segment(uri, len));
1334 svn_uri_basename(const char *uri, apr_pool_t *pool)
1336 apr_size_t len = strlen(uri);
1339 assert(svn_uri_is_canonical(uri, NULL));
1341 if (svn_uri_is_root(uri, len))
1345 while (start > 0 && uri[start - 1] != '/')
1348 return svn_path_uri_decode(uri + start, pool);
1352 svn_uri_split(const char **dirpath,
1353 const char **base_name,
1357 assert(dirpath != base_name);
1360 *dirpath = svn_uri_dirname(uri, pool);
1363 *base_name = svn_uri_basename(uri, pool);
1367 svn_dirent_get_longest_ancestor(const char *dirent1,
1368 const char *dirent2,
1371 return apr_pstrndup(pool, dirent1,
1372 get_longest_ancestor_length(type_dirent, dirent1,
1377 svn_relpath_get_longest_ancestor(const char *relpath1,
1378 const char *relpath2,
1381 assert(relpath_is_canonical(relpath1));
1382 assert(relpath_is_canonical(relpath2));
1384 return apr_pstrndup(pool, relpath1,
1385 get_longest_ancestor_length(type_relpath, relpath1,
1390 svn_uri_get_longest_ancestor(const char *uri1,
1394 apr_size_t uri_ancestor_len;
1397 assert(svn_uri_is_canonical(uri1, NULL));
1398 assert(svn_uri_is_canonical(uri2, NULL));
1403 /* No shared protocol => no common prefix */
1404 if (uri1[i] != uri2[i])
1405 return apr_pmemdup(pool, SVN_EMPTY_PATH,
1406 sizeof(SVN_EMPTY_PATH));
1411 /* They're both URLs, so EOS can't come before ':' */
1412 assert((uri1[i] != '\0') && (uri2[i] != '\0'));
1417 i += 3; /* Advance past '://' */
1419 uri_ancestor_len = get_longest_ancestor_length(type_uri, uri1 + i,
1422 if (uri_ancestor_len == 0 ||
1423 (uri_ancestor_len == 1 && (uri1 + i)[0] == '/'))
1424 return apr_pmemdup(pool, SVN_EMPTY_PATH, sizeof(SVN_EMPTY_PATH));
1426 return apr_pstrndup(pool, uri1, uri_ancestor_len + i);
1430 svn_dirent_is_child(const char *parent_dirent,
1431 const char *child_dirent,
1434 return is_child(type_dirent, parent_dirent, child_dirent, pool);
1438 svn_dirent_skip_ancestor(const char *parent_dirent,
1439 const char *child_dirent)
1441 apr_size_t len = strlen(parent_dirent);
1442 apr_size_t root_len;
1444 if (0 != strncmp(parent_dirent, child_dirent, len))
1445 return NULL; /* parent_dirent is no ancestor of child_dirent */
1447 if (child_dirent[len] == 0)
1448 return ""; /* parent_dirent == child_dirent */
1450 /* Child == parent + more-characters */
1452 root_len = dirent_root_length(child_dirent, strlen(child_dirent));
1454 /* Different root, e.g. ("" "/...") or ("//z" "//z/share") */
1457 /* Now, child == [root-of-parent] + [rest-of-parent] + more-characters.
1458 * It must be one of the following forms.
1460 * rlen parent child bad? rlen=len? c[len]=/?
1469 * 2 "a:b" "a:b/foo" *
1470 * 3 "a:/" "a:/foo" *
1471 * 3 "a:/b" "a:/bad" !
1472 * 3 "a:/b" "a:/b/foo" *
1473 * 5 "//s/s" "//s/s/foo" * *
1474 * 5 "//s/s/b" "//s/s/bad" !
1475 * 5 "//s/s/b" "//s/s/b/foo" *
1478 if (child_dirent[len] == '/')
1479 /* "parent|child" is one of:
1480 * "[a:]b|/foo" "[a:]/b|/foo" "//s/s|/foo" "//s/s/b|/foo" */
1481 return child_dirent + len + 1;
1483 if (root_len == len)
1484 /* "parent|child" is "|foo" "/|foo" "a:|foo" "a:/|foo" "//s/s|/foo" */
1485 return child_dirent + len;
1491 svn_relpath_skip_ancestor(const char *parent_relpath,
1492 const char *child_relpath)
1494 apr_size_t len = strlen(parent_relpath);
1496 assert(relpath_is_canonical(parent_relpath));
1497 assert(relpath_is_canonical(child_relpath));
1500 return child_relpath;
1502 if (0 != strncmp(parent_relpath, child_relpath, len))
1503 return NULL; /* parent_relpath is no ancestor of child_relpath */
1505 if (child_relpath[len] == 0)
1506 return ""; /* parent_relpath == child_relpath */
1508 if (child_relpath[len] == '/')
1509 return child_relpath + len + 1;
1517 uri_skip_ancestor(const char *parent_uri,
1518 const char *child_uri)
1520 apr_size_t len = strlen(parent_uri);
1522 assert(svn_uri_is_canonical(parent_uri, NULL));
1523 assert(svn_uri_is_canonical(child_uri, NULL));
1525 if (0 != strncmp(parent_uri, child_uri, len))
1526 return NULL; /* parent_uri is no ancestor of child_uri */
1528 if (child_uri[len] == 0)
1529 return ""; /* parent_uri == child_uri */
1531 if (child_uri[len] == '/')
1532 return child_uri + len + 1;
1538 svn_uri_skip_ancestor(const char *parent_uri,
1539 const char *child_uri,
1540 apr_pool_t *result_pool)
1542 const char *result = uri_skip_ancestor(parent_uri, child_uri);
1544 return result ? svn_path_uri_decode(result, result_pool) : NULL;
1548 svn_dirent_is_ancestor(const char *parent_dirent, const char *child_dirent)
1550 return svn_dirent_skip_ancestor(parent_dirent, child_dirent) != NULL;
1554 svn_uri__is_ancestor(const char *parent_uri, const char *child_uri)
1556 return uri_skip_ancestor(parent_uri, child_uri) != NULL;
1561 svn_dirent_is_absolute(const char *dirent)
1566 /* dirent is absolute if it starts with '/' on non-Windows platforms
1567 or with '//' on Windows platforms */
1568 if (dirent[0] == '/'
1569 #ifdef SVN_USE_DOS_PATHS
1570 && dirent[1] == '/' /* Single '/' depends on current drive */
1575 /* On Windows, dirent is also absolute when it starts with 'H:/'
1576 where 'H' is any letter. */
1577 #ifdef SVN_USE_DOS_PATHS
1578 if (((dirent[0] >= 'A' && dirent[0] <= 'Z')) &&
1579 (dirent[1] == ':') && (dirent[2] == '/'))
1581 #endif /* SVN_USE_DOS_PATHS */
1587 svn_dirent_get_absolute(const char **pabsolute,
1588 const char *relative,
1592 apr_status_t apr_err;
1593 const char *path_apr;
1595 SVN_ERR_ASSERT(! svn_path_is_url(relative));
1597 /* Merge the current working directory with the relative dirent. */
1598 SVN_ERR(svn_path_cstring_from_utf8(&path_apr, relative, pool));
1600 apr_err = apr_filepath_merge(&buffer, NULL,
1602 APR_FILEPATH_NOTRELATIVE,
1606 /* In some cases when the passed path or its ancestor(s) do not exist
1607 or no longer exist apr returns an error.
1609 In many of these cases we would like to return a path anyway, when the
1610 passed path was already a safe absolute path. So check for that now to
1613 svn_dirent_is_absolute() doesn't perform the necessary checks to see
1614 if the path doesn't need post processing to be in the canonical absolute
1618 if (svn_dirent_is_absolute(relative)
1619 && svn_dirent_is_canonical(relative, pool)
1620 && !svn_path_is_backpath_present(relative))
1622 *pabsolute = apr_pstrdup(pool, relative);
1623 return SVN_NO_ERROR;
1626 return svn_error_createf(SVN_ERR_BAD_FILENAME,
1627 svn_error_create(apr_err, NULL, NULL),
1628 _("Couldn't determine absolute path of '%s'"),
1629 svn_dirent_local_style(relative, pool));
1632 SVN_ERR(svn_path_cstring_to_utf8(pabsolute, buffer, pool));
1633 *pabsolute = svn_dirent_canonicalize(*pabsolute, pool);
1634 return SVN_NO_ERROR;
1638 svn_uri_canonicalize(const char *uri, apr_pool_t *pool)
1640 return canonicalize(type_uri, uri, pool);
1644 svn_relpath_canonicalize(const char *relpath, apr_pool_t *pool)
1646 return canonicalize(type_relpath, relpath, pool);
1650 svn_dirent_canonicalize(const char *dirent, apr_pool_t *pool)
1652 const char *dst = canonicalize(type_dirent, dirent, pool);
1654 #ifdef SVN_USE_DOS_PATHS
1655 /* Handle a specific case on Windows where path == "X:/". Here we have to
1656 append the final '/', as svn_path_canonicalize will chop this of. */
1657 if (((dirent[0] >= 'A' && dirent[0] <= 'Z') ||
1658 (dirent[0] >= 'a' && dirent[0] <= 'z')) &&
1659 dirent[1] == ':' && dirent[2] == '/' &&
1662 char *dst_slash = apr_pcalloc(pool, 4);
1663 dst_slash[0] = canonicalize_to_upper(dirent[0]);
1666 dst_slash[3] = '\0';
1670 #endif /* SVN_USE_DOS_PATHS */
1676 svn_dirent_is_canonical(const char *dirent, apr_pool_t *scratch_pool)
1678 const char *ptr = dirent;
1682 #ifdef SVN_USE_DOS_PATHS
1683 /* Check for UNC paths */
1686 /* TODO: Scan hostname and sharename and fall back to part code */
1688 /* ### Fall back to old implementation */
1689 return (strcmp(dirent, svn_dirent_canonicalize(dirent, scratch_pool))
1692 #endif /* SVN_USE_DOS_PATHS */
1694 #ifdef SVN_USE_DOS_PATHS
1695 else if (((*ptr >= 'a' && *ptr <= 'z') || (*ptr >= 'A' && *ptr <= 'Z')) &&
1698 /* The only canonical drive names are "A:"..."Z:", no lower case */
1699 if (*ptr < 'A' || *ptr > 'Z')
1707 #endif /* SVN_USE_DOS_PATHS */
1709 return relpath_is_canonical(ptr);
1712 static svn_boolean_t
1713 relpath_is_canonical(const char *relpath)
1715 const char *dot_pos, *ptr = relpath;
1717 unsigned pattern = 0;
1719 /* RELPATH is canonical if it has:
1721 * - no start and closing '/'
1725 /* invalid beginnings */
1729 if (ptr[0] == '.' && (ptr[1] == '/' || ptr[1] == '\0'))
1732 /* valid special cases */
1737 /* invalid endings */
1738 if (ptr[len-1] == '/' || (ptr[len-1] == '.' && ptr[len-2] == '/'))
1741 /* '.' are rare. So, search for them globally. There will often be no
1742 * more than one hit. Also note that we already checked for invalid
1743 * starts and endings, i.e. we only need to check for "/./"
1745 for (dot_pos = memchr(ptr, '.', len);
1747 dot_pos = strchr(dot_pos+1, '.'))
1748 if (dot_pos > ptr && dot_pos[-1] == '/' && dot_pos[1] == '/')
1751 /* Now validate the rest of the path. */
1752 for (i = 0; i < len - 1; ++i)
1754 pattern = ((pattern & 0xff) << 8) + (unsigned char)ptr[i];
1755 if (pattern == 0x101 * (unsigned char)('/'))
1763 svn_relpath_is_canonical(const char *relpath)
1765 return relpath_is_canonical(relpath);
1769 svn_uri_is_canonical(const char *uri, apr_pool_t *scratch_pool)
1771 const char *ptr = uri, *seg = uri;
1772 const char *schema_data = NULL;
1774 /* URI is canonical if it has:
1775 * - lowercase URL scheme
1776 * - lowercase URL hostname
1780 * - uppercase hex-encoded pair digits ("%AB", not "%ab")
1786 if (! svn_path_is_url(uri))
1789 /* Skip the scheme. */
1790 while (*ptr && (*ptr != '/') && (*ptr != ':'))
1793 /* No scheme? No good. */
1794 if (! (*ptr == ':' && *(ptr+1) == '/' && *(ptr+2) == '/'))
1797 /* Found a scheme, check that it's all lowercase. */
1801 if (*ptr >= 'A' && *ptr <= 'Z')
1808 /* Scheme only? That works. */
1812 /* This might be the hostname */
1814 while (*ptr && (*ptr != '/') && (*ptr != '@'))
1820 /* Found a hostname, check that it's all lowercase. */
1827 || (*ptr >= '0' && *ptr <= '9')
1828 || (*ptr >= 'a' && *ptr <= 'f'))
1838 while (*ptr && *ptr != '/' && *ptr != ':')
1840 if (*ptr >= 'A' && *ptr <= 'Z')
1845 /* Found a portnumber */
1848 apr_int64_t port = 0;
1853 while (*ptr >= '0' && *ptr <= '9')
1855 port = 10 * port + (*ptr - '0');
1859 if (ptr == schema_data)
1860 return FALSE; /* Fail on "http://host:" */
1862 if (*ptr && *ptr != '/')
1863 return FALSE; /* Not a port number */
1865 if (port == 80 && strncmp(uri, "http:", 5) == 0)
1867 else if (port == 443 && strncmp(uri, "https:", 6) == 0)
1869 else if (port == 3690 && strncmp(uri, "svn:", 4) == 0)
1875 #ifdef SVN_USE_DOS_PATHS
1876 if (schema_data && *ptr == '/')
1878 /* If this is a file url, ptr now points to the third '/' in
1879 file:///C:/path. Check that if we have such a URL the drive
1880 letter is in uppercase. */
1881 if (strncmp(uri, "file:", 5) == 0 &&
1882 ! (*(ptr+1) >= 'A' && *(ptr+1) <= 'Z') &&
1886 #endif /* SVN_USE_DOS_PATHS */
1888 /* Now validate the rest of the URI. */
1890 while (*ptr && (*ptr != '/'))
1894 apr_size_t seglen = ptr - seg;
1896 if (seglen == 1 && *seg == '.')
1897 return FALSE; /* /./ */
1899 if (*ptr == '/' && *(ptr+1) == '/')
1900 return FALSE; /* // */
1902 if (! *ptr && *(ptr - 1) == '/' && ptr - 1 != uri)
1903 return FALSE; /* foo/ */
1912 while (*ptr && (*ptr != '/'))
1925 /* Can't usesvn_ctype_isxdigit() because lower case letters are
1926 not in our canonical format */
1927 if (((*(ptr+1) < '0' || *(ptr+1) > '9'))
1928 && (*(ptr+1) < 'A' || *(ptr+1) > 'F'))
1930 else if (((*(ptr+2) < '0' || *(ptr+2) > '9'))
1931 && (*(ptr+2) < 'A' || *(ptr+2) > 'F'))
1934 digitz[0] = *(++ptr);
1935 digitz[1] = *(++ptr);
1937 val = (int)strtol(digitz, NULL, 16);
1939 if (svn_uri__char_validity[val])
1940 return FALSE; /* Should not have been escaped */
1942 else if (*ptr != '/' && !svn_uri__char_validity[(unsigned char)*ptr])
1943 return FALSE; /* Character should have been escaped */
1951 svn_dirent_condense_targets(const char **pcommon,
1952 apr_array_header_t **pcondensed_targets,
1953 const apr_array_header_t *targets,
1954 svn_boolean_t remove_redundancies,
1955 apr_pool_t *result_pool,
1956 apr_pool_t *scratch_pool)
1958 int i, num_condensed = targets->nelts;
1959 svn_boolean_t *removed;
1960 apr_array_header_t *abs_targets;
1962 /* Early exit when there's no data to work on. */
1963 if (targets->nelts <= 0)
1966 if (pcondensed_targets)
1967 *pcondensed_targets = NULL;
1968 return SVN_NO_ERROR;
1971 /* Get the absolute path of the first target. */
1972 SVN_ERR(svn_dirent_get_absolute(pcommon,
1973 APR_ARRAY_IDX(targets, 0, const char *),
1976 /* Early exit when there's only one dirent to work on. */
1977 if (targets->nelts == 1)
1979 *pcommon = apr_pstrdup(result_pool, *pcommon);
1980 if (pcondensed_targets)
1981 *pcondensed_targets = apr_array_make(result_pool, 0,
1982 sizeof(const char *));
1983 return SVN_NO_ERROR;
1986 /* Copy the targets array, but with absolute dirents instead of
1987 relative. Also, find the pcommon argument by finding what is
1988 common in all of the absolute dirents. NOTE: This is not as
1989 efficient as it could be. The calculation of the basedir could
1990 be done in the loop below, which would save some calls to
1991 svn_dirent_get_longest_ancestor. I decided to do it this way
1992 because I thought it would be simpler, since this way, we don't
1993 even do the loop if we don't need to condense the targets. */
1995 removed = apr_pcalloc(scratch_pool, (targets->nelts *
1996 sizeof(svn_boolean_t)));
1997 abs_targets = apr_array_make(scratch_pool, targets->nelts,
1998 sizeof(const char *));
2000 APR_ARRAY_PUSH(abs_targets, const char *) = *pcommon;
2002 for (i = 1; i < targets->nelts; ++i)
2004 const char *rel = APR_ARRAY_IDX(targets, i, const char *);
2005 const char *absolute;
2006 SVN_ERR(svn_dirent_get_absolute(&absolute, rel, scratch_pool));
2007 APR_ARRAY_PUSH(abs_targets, const char *) = absolute;
2008 *pcommon = svn_dirent_get_longest_ancestor(*pcommon, absolute,
2012 *pcommon = apr_pstrdup(result_pool, *pcommon);
2014 if (pcondensed_targets != NULL)
2018 if (remove_redundancies)
2020 /* Find the common part of each pair of targets. If
2021 common part is equal to one of the dirents, the other
2022 is a child of it, and can be removed. If a target is
2023 equal to *pcommon, it can also be removed. */
2025 /* First pass: when one non-removed target is a child of
2026 another non-removed target, remove the child. */
2027 for (i = 0; i < abs_targets->nelts; ++i)
2034 for (j = i + 1; j < abs_targets->nelts; ++j)
2036 const char *abs_targets_i;
2037 const char *abs_targets_j;
2038 const char *ancestor;
2043 abs_targets_i = APR_ARRAY_IDX(abs_targets, i, const char *);
2044 abs_targets_j = APR_ARRAY_IDX(abs_targets, j, const char *);
2046 ancestor = svn_dirent_get_longest_ancestor
2047 (abs_targets_i, abs_targets_j, scratch_pool);
2049 if (*ancestor == '\0')
2052 if (strcmp(ancestor, abs_targets_i) == 0)
2057 else if (strcmp(ancestor, abs_targets_j) == 0)
2065 /* Second pass: when a target is the same as *pcommon,
2066 remove the target. */
2067 for (i = 0; i < abs_targets->nelts; ++i)
2069 const char *abs_targets_i = APR_ARRAY_IDX(abs_targets, i,
2072 if ((strcmp(abs_targets_i, *pcommon) == 0) && (! removed[i]))
2080 /* Now create the return array, and copy the non-removed items */
2081 basedir_len = strlen(*pcommon);
2082 *pcondensed_targets = apr_array_make(result_pool, num_condensed,
2083 sizeof(const char *));
2085 for (i = 0; i < abs_targets->nelts; ++i)
2087 const char *rel_item = APR_ARRAY_IDX(abs_targets, i, const char *);
2089 /* Skip this if it's been removed. */
2093 /* If a common prefix was found, condensed_targets are given
2094 relative to that prefix. */
2095 if (basedir_len > 0)
2097 /* Only advance our pointer past a dirent separator if
2098 REL_ITEM isn't the same as *PCOMMON.
2100 If *PCOMMON is a root dirent, basedir_len will already
2101 include the closing '/', so never advance the pointer
2104 rel_item += basedir_len;
2106 ! svn_dirent_is_root(*pcommon, basedir_len))
2110 APR_ARRAY_PUSH(*pcondensed_targets, const char *)
2111 = apr_pstrdup(result_pool, rel_item);
2115 return SVN_NO_ERROR;
2119 svn_uri_condense_targets(const char **pcommon,
2120 apr_array_header_t **pcondensed_targets,
2121 const apr_array_header_t *targets,
2122 svn_boolean_t remove_redundancies,
2123 apr_pool_t *result_pool,
2124 apr_pool_t *scratch_pool)
2126 int i, num_condensed = targets->nelts;
2127 apr_array_header_t *uri_targets;
2128 svn_boolean_t *removed;
2130 /* Early exit when there's no data to work on. */
2131 if (targets->nelts <= 0)
2134 if (pcondensed_targets)
2135 *pcondensed_targets = NULL;
2136 return SVN_NO_ERROR;
2139 *pcommon = svn_uri_canonicalize(APR_ARRAY_IDX(targets, 0, const char *),
2142 /* Early exit when there's only one uri to work on. */
2143 if (targets->nelts == 1)
2145 *pcommon = apr_pstrdup(result_pool, *pcommon);
2146 if (pcondensed_targets)
2147 *pcondensed_targets = apr_array_make(result_pool, 0,
2148 sizeof(const char *));
2149 return SVN_NO_ERROR;
2152 /* Find the pcommon argument by finding what is common in all of the
2153 uris. NOTE: This is not as efficient as it could be. The calculation
2154 of the basedir could be done in the loop below, which would
2155 save some calls to svn_uri_get_longest_ancestor. I decided to do it
2156 this way because I thought it would be simpler, since this way, we don't
2157 even do the loop if we don't need to condense the targets. */
2159 removed = apr_pcalloc(scratch_pool, (targets->nelts *
2160 sizeof(svn_boolean_t)));
2161 uri_targets = apr_array_make(scratch_pool, targets->nelts,
2162 sizeof(const char *));
2164 APR_ARRAY_PUSH(uri_targets, const char *) = *pcommon;
2166 for (i = 1; i < targets->nelts; ++i)
2168 const char *uri = svn_uri_canonicalize(
2169 APR_ARRAY_IDX(targets, i, const char *),
2171 APR_ARRAY_PUSH(uri_targets, const char *) = uri;
2173 /* If the commonmost ancestor so far is empty, there's no point
2174 in continuing to search for a common ancestor at all. But
2175 we'll keep looping for the sake of canonicalizing the
2176 targets, I suppose. */
2177 if (**pcommon != '\0')
2178 *pcommon = svn_uri_get_longest_ancestor(*pcommon, uri,
2182 *pcommon = apr_pstrdup(result_pool, *pcommon);
2184 if (pcondensed_targets != NULL)
2188 if (remove_redundancies)
2190 /* Find the common part of each pair of targets. If
2191 common part is equal to one of the dirents, the other
2192 is a child of it, and can be removed. If a target is
2193 equal to *pcommon, it can also be removed. */
2195 /* First pass: when one non-removed target is a child of
2196 another non-removed target, remove the child. */
2197 for (i = 0; i < uri_targets->nelts; ++i)
2204 for (j = i + 1; j < uri_targets->nelts; ++j)
2208 const char *ancestor;
2213 uri_i = APR_ARRAY_IDX(uri_targets, i, const char *);
2214 uri_j = APR_ARRAY_IDX(uri_targets, j, const char *);
2216 ancestor = svn_uri_get_longest_ancestor(uri_i,
2220 if (*ancestor == '\0')
2223 if (strcmp(ancestor, uri_i) == 0)
2228 else if (strcmp(ancestor, uri_j) == 0)
2236 /* Second pass: when a target is the same as *pcommon,
2237 remove the target. */
2238 for (i = 0; i < uri_targets->nelts; ++i)
2240 const char *uri_targets_i = APR_ARRAY_IDX(uri_targets, i,
2243 if ((strcmp(uri_targets_i, *pcommon) == 0) && (! removed[i]))
2251 /* Now create the return array, and copy the non-removed items */
2252 basedir_len = strlen(*pcommon);
2253 *pcondensed_targets = apr_array_make(result_pool, num_condensed,
2254 sizeof(const char *));
2256 for (i = 0; i < uri_targets->nelts; ++i)
2258 const char *rel_item = APR_ARRAY_IDX(uri_targets, i, const char *);
2260 /* Skip this if it's been removed. */
2264 /* If a common prefix was found, condensed_targets are given
2265 relative to that prefix. */
2266 if (basedir_len > 0)
2268 /* Only advance our pointer past a dirent separator if
2269 REL_ITEM isn't the same as *PCOMMON.
2271 If *PCOMMON is a root dirent, basedir_len will already
2272 include the closing '/', so never advance the pointer
2275 rel_item += basedir_len;
2276 if ((rel_item[0] == '/') ||
2277 (rel_item[0] && !svn_uri_is_root(*pcommon, basedir_len)))
2283 APR_ARRAY_PUSH(*pcondensed_targets, const char *)
2284 = svn_path_uri_decode(rel_item, result_pool);
2288 return SVN_NO_ERROR;
2292 svn_dirent_is_under_root(svn_boolean_t *under_root,
2293 const char **result_path,
2294 const char *base_path,
2296 apr_pool_t *result_pool)
2298 apr_status_t status;
2301 *under_root = FALSE;
2303 *result_path = NULL;
2305 status = apr_filepath_merge(&full_path,
2308 APR_FILEPATH_NOTABOVEROOT
2309 | APR_FILEPATH_SECUREROOTTEST,
2312 if (status == APR_SUCCESS)
2315 *result_path = svn_dirent_canonicalize(full_path, result_pool);
2317 return SVN_NO_ERROR;
2319 else if (status == APR_EABOVEROOT)
2321 *under_root = FALSE;
2322 return SVN_NO_ERROR;
2325 return svn_error_wrap_apr(status, NULL);
2329 svn_uri_get_dirent_from_file_url(const char **dirent,
2333 const char *hostname, *path;
2335 SVN_ERR_ASSERT(svn_uri_is_canonical(url, pool));
2337 /* Verify that the URL is well-formed (loosely) */
2339 /* First, check for the "file://" prefix. */
2340 if (strncmp(url, "file://", 7) != 0)
2341 return svn_error_createf(SVN_ERR_RA_ILLEGAL_URL, NULL,
2342 _("Local URL '%s' does not contain 'file://' "
2345 /* Find the HOSTNAME portion and the PATH portion of the URL. The host
2346 name is between the "file://" prefix and the next occurrence of '/'. We
2347 are considering everything from that '/' until the end of the URL to be
2348 the absolute path portion of the URL.
2349 If we got just "file://", treat it the same as "file:///". */
2351 path = strchr(hostname, '/');
2353 hostname = apr_pstrmemdup(pool, hostname, path - hostname);
2357 /* URI-decode HOSTNAME, and set it to NULL if it is "" or "localhost". */
2358 if (*hostname == '\0')
2362 hostname = svn_path_uri_decode(hostname, pool);
2363 if (strcmp(hostname, "localhost") == 0)
2367 /* Duplicate the URL, starting at the top of the path.
2368 At the same time, we URI-decode the path. */
2369 #ifdef SVN_USE_DOS_PATHS
2370 /* On Windows, we'll typically have to skip the leading / if the
2371 path starts with a drive letter. Like most Web browsers, We
2372 support two variants of this scheme:
2377 Note that, at least on WinNT and above, file:////./X:/path will
2378 also work, so we must make sure the transformation doesn't break
2379 that, and file:///path (that looks within the current drive
2380 only) should also keep working.
2381 If we got a non-empty hostname other than localhost, we convert this
2382 into an UNC path. In this case, we obviously don't strip the slash
2383 even if the path looks like it starts with a drive letter.
2386 static const char valid_drive_letters[] =
2387 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz";
2388 /* Casting away const! */
2389 char *dup_path = (char *)svn_path_uri_decode(path, pool);
2391 /* This check assumes ':' and '|' are already decoded! */
2392 if (!hostname && dup_path[1] && strchr(valid_drive_letters, dup_path[1])
2393 && (dup_path[2] == ':' || dup_path[2] == '|'))
2395 /* Skip the leading slash. */
2398 if (dup_path[1] == '|')
2401 if (dup_path[2] == '/' || dup_path[2] == '\0')
2403 if (dup_path[2] == '\0')
2405 /* A valid dirent for the driveroot must be like "C:/" instead of
2406 just "C:" or svn_dirent_join() will use the current directory
2407 on the drive instead */
2408 char *new_path = apr_pcalloc(pool, 4);
2409 new_path[0] = dup_path[0];
2413 dup_path = new_path;
2419 if (dup_path[0] == '/' && dup_path[1] == '\0')
2420 return svn_error_createf(SVN_ERR_RA_ILLEGAL_URL, NULL,
2421 _("Local URL '%s' contains only a hostname, "
2424 /* We still know that the path starts with a slash. */
2425 *dirent = apr_pstrcat(pool, "//", hostname, dup_path, SVN_VA_NULL);
2430 #else /* !SVN_USE_DOS_PATHS */
2431 /* Currently, the only hostnames we are allowing on non-Win32 platforms
2432 are the empty string and 'localhost'. */
2434 return svn_error_createf(SVN_ERR_RA_ILLEGAL_URL, NULL,
2435 _("Local URL '%s' contains unsupported hostname"),
2438 *dirent = svn_path_uri_decode(path, pool);
2439 #endif /* SVN_USE_DOS_PATHS */
2440 return SVN_NO_ERROR;
2444 svn_uri_get_file_url_from_dirent(const char **url,
2448 assert(svn_dirent_is_canonical(dirent, pool));
2450 SVN_ERR(svn_dirent_get_absolute(&dirent, dirent, pool));
2452 dirent = svn_path_uri_encode(dirent, pool);
2454 #ifndef SVN_USE_DOS_PATHS
2455 if (dirent[0] == '/' && dirent[1] == '\0')
2456 dirent = NULL; /* "file://" is the canonical form of "file:///" */
2458 *url = apr_pstrcat(pool, "file://", dirent, SVN_VA_NULL);
2460 if (dirent[0] == '/')
2462 /* Handle UNC paths //server/share -> file://server/share */
2463 assert(dirent[1] == '/'); /* Expect UNC, not non-absolute */
2465 *url = apr_pstrcat(pool, "file:", dirent, SVN_VA_NULL);
2469 char *uri = apr_pstrcat(pool, "file:///", dirent, SVN_VA_NULL);
2470 apr_size_t len = 8 /* strlen("file:///") */ + strlen(dirent);
2472 /* "C:/" is a canonical dirent on Windows,
2473 but "file:///C:/" is not a canonical uri */
2474 if (uri[len-1] == '/')
2481 return SVN_NO_ERROR;
2486 /* -------------- The fspath API (see private/svn_fspath.h) -------------- */
2489 svn_fspath__is_canonical(const char *fspath)
2491 return fspath[0] == '/' && relpath_is_canonical(fspath + 1);
2496 svn_fspath__canonicalize(const char *fspath,
2499 if ((fspath[0] == '/') && (fspath[1] == '\0'))
2502 return apr_pstrcat(pool, "/", svn_relpath_canonicalize(fspath, pool),
2508 svn_fspath__is_root(const char *fspath, apr_size_t len)
2510 /* directory is root if it's equal to '/' */
2511 return (len == 1 && fspath[0] == '/');
2516 svn_fspath__skip_ancestor(const char *parent_fspath,
2517 const char *child_fspath)
2519 assert(svn_fspath__is_canonical(parent_fspath));
2520 assert(svn_fspath__is_canonical(child_fspath));
2522 return svn_relpath_skip_ancestor(parent_fspath + 1, child_fspath + 1);
2527 svn_fspath__dirname(const char *fspath,
2530 assert(svn_fspath__is_canonical(fspath));
2532 if (fspath[0] == '/' && fspath[1] == '\0')
2533 return apr_pstrdup(pool, fspath);
2535 return apr_pstrcat(pool, "/", svn_relpath_dirname(fspath + 1, pool),
2541 svn_fspath__basename(const char *fspath,
2545 assert(svn_fspath__is_canonical(fspath));
2547 result = svn_relpath_basename(fspath + 1, pool);
2549 assert(strchr(result, '/') == NULL);
2554 svn_fspath__split(const char **dirpath,
2555 const char **base_name,
2557 apr_pool_t *result_pool)
2559 assert(dirpath != base_name);
2562 *dirpath = svn_fspath__dirname(fspath, result_pool);
2565 *base_name = svn_fspath__basename(fspath, result_pool);
2569 svn_fspath__join(const char *fspath,
2570 const char *relpath,
2571 apr_pool_t *result_pool)
2574 assert(svn_fspath__is_canonical(fspath));
2575 assert(svn_relpath_is_canonical(relpath));
2577 if (relpath[0] == '\0')
2578 result = apr_pstrdup(result_pool, fspath);
2579 else if (fspath[1] == '\0')
2580 result = apr_pstrcat(result_pool, "/", relpath, SVN_VA_NULL);
2582 result = apr_pstrcat(result_pool, fspath, "/", relpath, SVN_VA_NULL);
2584 assert(svn_fspath__is_canonical(result));
2589 svn_fspath__get_longest_ancestor(const char *fspath1,
2590 const char *fspath2,
2591 apr_pool_t *result_pool)
2594 assert(svn_fspath__is_canonical(fspath1));
2595 assert(svn_fspath__is_canonical(fspath2));
2597 result = apr_pstrcat(result_pool, "/",
2598 svn_relpath_get_longest_ancestor(fspath1 + 1,
2603 assert(svn_fspath__is_canonical(result));
2610 /* -------------- The urlpath API (see private/svn_fspath.h) ------------- */
2613 svn_urlpath__canonicalize(const char *uri,
2616 if (svn_path_is_url(uri))
2618 uri = svn_uri_canonicalize(uri, pool);
2622 uri = svn_fspath__canonicalize(uri, pool);
2623 /* Do a little dance to normalize hex encoding. */
2624 uri = svn_path_uri_decode(uri, pool);
2625 uri = svn_path_uri_encode(uri, pool);
2631 /* -------------- The cert API (see private/svn_cert.h) ------------- */
2634 svn_cert__match_dns_identity(svn_string_t *pattern, svn_string_t *hostname)
2636 apr_size_t pattern_pos = 0, hostname_pos = 0;
2638 /* support leading wildcards that composed of the only character in the
2639 * left-most label. */
2640 if (pattern->len >= 2 &&
2641 pattern->data[pattern_pos] == '*' &&
2642 pattern->data[pattern_pos + 1] == '.')
2644 while (hostname_pos < hostname->len &&
2645 hostname->data[hostname_pos] != '.')
2649 /* Assume that the wildcard must match something. Rule 2 says
2650 * that *.example.com should not match example.com. If the wildcard
2651 * ends up not matching anything then it matches .example.com which
2652 * seems to be essentially the same as just example.com */
2653 if (hostname_pos == 0)
2659 while (pattern_pos < pattern->len && hostname_pos < hostname->len)
2661 char pattern_c = pattern->data[pattern_pos];
2662 char hostname_c = hostname->data[hostname_pos];
2664 /* fold case as described in RFC 4343.
2665 * Note: We actually convert to lowercase, since our URI
2666 * canonicalization code converts to lowercase and generally
2667 * most certs are issued with lowercase DNS names, meaning
2668 * this avoids the fold operation in most cases. The RFC
2669 * suggests the opposite transformation, but doesn't require
2670 * any specific implementation in any case. It is critical
2671 * that this folding be locale independent so you can't use
2673 pattern_c = canonicalize_to_lower(pattern_c);
2674 hostname_c = canonicalize_to_lower(hostname_c);
2676 if (pattern_c != hostname_c)
2683 /* characters match so skip both */
2689 /* ignore a trailing period on the hostname since this has no effect on the
2690 * security of the matching. See the following for the long explanation as
2692 * https://bugzilla.mozilla.org/show_bug.cgi?id=134402#c28
2694 if (pattern_pos == pattern->len &&
2695 hostname_pos == hostname->len - 1 &&
2696 hostname->data[hostname_pos] == '.')
2699 if (pattern_pos != pattern->len || hostname_pos != hostname->len)
2701 /* end didn't match */