2 * paths.c: a path manipulation library using svn_stringbuf_t
4 * ====================================================================
5 * Licensed to the Apache Software Foundation (ASF) under one
6 * or more contributor license agreements. See the NOTICE file
7 * distributed with this work for additional information
8 * regarding copyright ownership. The ASF licenses this file
9 * to you under the Apache License, Version 2.0 (the
10 * "License"); you may not use this file except in compliance
11 * with the License. You may obtain a copy of the License at
13 * http://www.apache.org/licenses/LICENSE-2.0
15 * Unless required by applicable law or agreed to in writing,
16 * software distributed under the License is distributed on an
17 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
18 * KIND, either express or implied. See the License for the
19 * specific language governing permissions and limitations
21 * ====================================================================
29 #include <apr_file_info.h>
33 #include "svn_string.h"
34 #include "svn_dirent_uri.h"
36 #include "svn_private_config.h" /* for SVN_PATH_LOCAL_SEPARATOR */
38 #include "svn_io.h" /* for svn_io_stat() */
39 #include "svn_ctype.h"
41 #include "dirent_uri.h"
44 /* The canonical empty path. Can this be changed? Well, change the empty
45 test below and the path library will work, not so sure about the fs/wc
47 #define SVN_EMPTY_PATH ""
49 /* TRUE if s is the canonical empty path, FALSE otherwise */
50 #define SVN_PATH_IS_EMPTY(s) ((s)[0] == '\0')
52 /* TRUE if s,n is the platform's empty path ("."), FALSE otherwise. Can
53 this be changed? Well, the path library will work, not so sure about
55 #define SVN_PATH_IS_PLATFORM_EMPTY(s,n) ((n) == 1 && (s)[0] == '.')
61 /* This function is an approximation of svn_path_is_canonical.
62 * It is supposed to be used in functions that do not have access
63 * to a pool, but still want to assert that a path is canonical.
65 * PATH with length LEN is assumed to be canonical if it isn't
66 * the platform's empty path (see definition of SVN_PATH_IS_PLATFORM_EMPTY),
67 * and does not contain "/./", and any one of the following
68 * conditions is also met:
70 * 1. PATH has zero length
71 * 2. PATH is the root directory (what exactly a root directory is
72 * depends on the platform)
73 * 3. PATH is not a root directory and does not end with '/'
75 * If possible, please use svn_path_is_canonical instead.
78 is_canonical(const char *path,
81 return (! SVN_PATH_IS_PLATFORM_EMPTY(path, len)
82 && strstr(path, "/./") == NULL
84 || (len == 1 && path[0] == '/')
85 || (path[len-1] != '/')
86 #if defined(WIN32) || defined(__CYGWIN__)
87 || svn_dirent_is_root(path, len)
94 /* functionality of svn_path_is_canonical but without the deprecation */
96 svn_path_is_canonical_internal(const char *path, apr_pool_t *pool)
98 return svn_uri_is_canonical(path, pool) ||
99 svn_dirent_is_canonical(path, pool) ||
100 svn_relpath_is_canonical(path);
104 svn_path_is_canonical(const char *path, apr_pool_t *pool)
106 return svn_path_is_canonical_internal(path, pool);
109 /* functionality of svn_path_join but without the deprecation */
111 svn_path_join_internal(const char *base,
112 const char *component,
115 apr_size_t blen = strlen(base);
116 apr_size_t clen = strlen(component);
119 assert(svn_path_is_canonical_internal(base, pool));
120 assert(svn_path_is_canonical_internal(component, pool));
122 /* If the component is absolute, then return it. */
123 if (*component == '/')
124 return apr_pmemdup(pool, component, clen + 1);
126 /* If either is empty return the other */
127 if (SVN_PATH_IS_EMPTY(base))
128 return apr_pmemdup(pool, component, clen + 1);
129 if (SVN_PATH_IS_EMPTY(component))
130 return apr_pmemdup(pool, base, blen + 1);
132 if (blen == 1 && base[0] == '/')
133 blen = 0; /* Ignore base, just return separator + component */
135 /* Construct the new, combined path. */
136 path = apr_palloc(pool, blen + 1 + clen + 1);
137 memcpy(path, base, blen);
139 memcpy(path + blen + 1, component, clen + 1);
144 char *svn_path_join(const char *base,
145 const char *component,
148 return svn_path_join_internal(base, component, pool);
151 char *svn_path_join_many(apr_pool_t *pool, const char *base, ...)
153 #define MAX_SAVED_LENGTHS 10
154 apr_size_t saved_lengths[MAX_SAVED_LENGTHS];
155 apr_size_t total_len;
162 svn_boolean_t base_is_empty = FALSE, base_is_root = FALSE;
165 total_len = strlen(base);
167 assert(svn_path_is_canonical_internal(base, pool));
169 if (total_len == 1 && *base == '/')
171 else if (SVN_PATH_IS_EMPTY(base))
173 total_len = sizeof(SVN_EMPTY_PATH) - 1;
174 base_is_empty = TRUE;
177 saved_lengths[0] = total_len;
179 /* Compute the length of the resulting string. */
183 while ((s = va_arg(va, const char *)) != NULL)
187 assert(svn_path_is_canonical_internal(s, pool));
189 if (SVN_PATH_IS_EMPTY(s))
192 if (nargs++ < MAX_SAVED_LENGTHS)
193 saved_lengths[nargs] = len;
197 /* an absolute path. skip all components to this point and reset
201 base_is_root = len == 1;
202 base_is_empty = FALSE;
204 else if (nargs == base_arg
205 || (nargs == base_arg + 1 && base_is_root)
208 /* if we have skipped everything up to this arg, then the base
209 and all prior components are empty. just set the length to
210 this component; do not add a separator. If the base is empty
211 we can now ignore it. */
214 base_is_empty = FALSE;
221 total_len += 1 + len;
226 /* base == "/" and no further components. just return that. */
227 if (base_is_root && total_len == 1)
228 return apr_pmemdup(pool, "/", 2);
230 /* we got the total size. allocate it, with room for a NULL character. */
231 path = p = apr_palloc(pool, total_len + 1);
233 /* if we aren't supposed to skip forward to an absolute component, and if
234 this is not an empty base that we are skipping, then copy the base
236 if (base_arg == 0 && ! (SVN_PATH_IS_EMPTY(base) && ! base_is_empty))
238 if (SVN_PATH_IS_EMPTY(base))
239 memcpy(p, SVN_EMPTY_PATH, len = saved_lengths[0]);
241 memcpy(p, base, len = saved_lengths[0]);
247 while ((s = va_arg(va, const char *)) != NULL)
249 if (SVN_PATH_IS_EMPTY(s))
252 if (++nargs < base_arg)
255 if (nargs < MAX_SAVED_LENGTHS)
256 len = saved_lengths[nargs];
260 /* insert a separator if we aren't copying in the first component
261 (which can happen when base_arg is set). also, don't put in a slash
262 if the prior character is a slash (occurs when prior component
264 if (p != path && p[-1] != '/')
267 /* copy the new component and advance the pointer */
274 assert((apr_size_t)(p - path) == total_len);
282 svn_path_component_count(const char *path)
284 apr_size_t count = 0;
286 assert(is_canonical(path, strlen(path)));
297 while (*path && *path != '/')
308 /* Return the length of substring necessary to encompass the entire
309 * previous path segment in PATH, which should be a LEN byte string.
311 * A trailing slash will not be included in the returned length except
312 * in the case in which PATH is absolute and there are no more
316 previous_segment(const char *path,
322 while (len > 0 && path[--len] != '/')
325 if (len == 0 && path[0] == '/')
333 svn_path_add_component(svn_stringbuf_t *path,
334 const char *component)
336 apr_size_t len = strlen(component);
338 assert(is_canonical(path->data, path->len));
339 assert(is_canonical(component, strlen(component)));
341 /* Append a dir separator, but only if this path is neither empty
342 nor consists of a single dir separator already. */
343 if ((! SVN_PATH_IS_EMPTY(path->data))
344 && (! ((path->len == 1) && (*(path->data) == '/'))))
347 svn_stringbuf_appendbytes(path, &dirsep, sizeof(dirsep));
350 svn_stringbuf_appendbytes(path, component, len);
355 svn_path_remove_component(svn_stringbuf_t *path)
357 assert(is_canonical(path->data, path->len));
359 path->len = previous_segment(path->data, path->len);
360 path->data[path->len] = '\0';
365 svn_path_remove_components(svn_stringbuf_t *path, apr_size_t n)
369 svn_path_remove_component(path);
376 svn_path_dirname(const char *path, apr_pool_t *pool)
378 apr_size_t len = strlen(path);
380 assert(svn_path_is_canonical_internal(path, pool));
382 return apr_pstrmemdup(pool, path, previous_segment(path, len));
387 svn_path_basename(const char *path, apr_pool_t *pool)
389 apr_size_t len = strlen(path);
392 assert(svn_path_is_canonical_internal(path, pool));
394 if (len == 1 && path[0] == '/')
399 while (start > 0 && path[start - 1] != '/')
403 return apr_pstrmemdup(pool, path + start, len - start);
407 svn_path_is_empty(const char *path)
409 assert(is_canonical(path, strlen(path)));
411 if (SVN_PATH_IS_EMPTY(path))
418 svn_path_compare_paths(const char *path1,
421 apr_size_t path1_len = strlen(path1);
422 apr_size_t path2_len = strlen(path2);
423 apr_size_t min_len = ((path1_len < path2_len) ? path1_len : path2_len);
426 assert(is_canonical(path1, path1_len));
427 assert(is_canonical(path2, path2_len));
429 /* Skip past common prefix. */
430 while (i < min_len && path1[i] == path2[i])
433 /* Are the paths exactly the same? */
434 if ((path1_len == path2_len) && (i >= min_len))
437 /* Children of paths are greater than their parents, but less than
438 greater siblings of their parents. */
439 if ((path1[i] == '/') && (path2[i] == 0))
441 if ((path2[i] == '/') && (path1[i] == 0))
448 /* Common prefix was skipped above, next character is compared to
449 determine order. We need to use an unsigned comparison, though,
450 so a "next character" of NULL (0x00) sorts numerically
452 return (unsigned char)(path1[i]) < (unsigned char)(path2[i]) ? -1 : 1;
455 /* Return the string length of the longest common ancestor of PATH1 and PATH2.
457 * This function handles everything except the URL-handling logic
458 * of svn_path_get_longest_ancestor, and assumes that PATH1 and
459 * PATH2 are *not* URLs.
461 * If the two paths do not share a common ancestor, return 0.
463 * New strings are allocated in POOL.
466 get_path_ancestor_length(const char *path1,
470 apr_size_t path1_len, path2_len;
472 apr_size_t last_dirsep = 0;
474 path1_len = strlen(path1);
475 path2_len = strlen(path2);
477 if (SVN_PATH_IS_EMPTY(path1) || SVN_PATH_IS_EMPTY(path2))
480 while (path1[i] == path2[i])
482 /* Keep track of the last directory separator we hit. */
488 /* If we get to the end of either path, break out. */
489 if ((i == path1_len) || (i == path2_len))
493 /* two special cases:
494 1. '/' is the longest common ancestor of '/' and '/foo'
495 2. '/' is the longest common ancestor of '/rif' and '/raf' */
496 if (i == 1 && path1[0] == '/' && path2[0] == '/')
499 /* last_dirsep is now the offset of the last directory separator we
500 crossed before reaching a non-matching byte. i is the offset of
501 that non-matching byte. */
502 if (((i == path1_len) && (path2[i] == '/'))
503 || ((i == path2_len) && (path1[i] == '/'))
504 || ((i == path1_len) && (i == path2_len)))
507 if (last_dirsep == 0 && path1[0] == '/' && path2[0] == '/')
514 svn_path_get_longest_ancestor(const char *path1,
518 svn_boolean_t path1_is_url = svn_path_is_url(path1);
519 svn_boolean_t path2_is_url = svn_path_is_url(path2);
521 /* Are we messing with URLs? If we have a mix of URLs and non-URLs,
522 there's nothing common between them. */
523 if (path1_is_url && path2_is_url)
525 return svn_uri_get_longest_ancestor(path1, path2, pool);
527 else if ((! path1_is_url) && (! path2_is_url))
529 return apr_pstrndup(pool, path1,
530 get_path_ancestor_length(path1, path2, pool));
534 /* A URL and a non-URL => no common prefix */
535 return apr_pmemdup(pool, SVN_EMPTY_PATH, sizeof(SVN_EMPTY_PATH));
540 svn_path_is_child(const char *path1,
546 /* assert (is_canonical (path1, strlen (path1))); ### Expensive strlen */
547 /* assert (is_canonical (path2, strlen (path2))); ### Expensive strlen */
549 /* Allow "" and "foo" to be parent/child */
550 if (SVN_PATH_IS_EMPTY(path1)) /* "" is the parent */
552 if (SVN_PATH_IS_EMPTY(path2) /* "" not a child */
553 || path2[0] == '/') /* "/foo" not a child */
556 /* everything else is child */
557 return pool ? apr_pstrdup(pool, path2) : path2;
560 /* Reach the end of at least one of the paths. How should we handle
561 things like path1:"foo///bar" and path2:"foo/bar/baz"? It doesn't
562 appear to arise in the current Subversion code, it's not clear to me
563 if they should be parent/child or not. */
564 for (i = 0; path1[i] && path2[i]; i++)
565 if (path1[i] != path2[i])
568 /* There are two cases that are parent/child
570 .../foo path2[i] == '/'
575 if (path1[i] == '\0' && path2[i])
578 return pool ? apr_pstrdup(pool, path2 + i + 1) : path2 + i + 1;
579 else if (i == 1 && path1[0] == '/')
580 return pool ? apr_pstrdup(pool, path2 + 1) : path2 + 1;
583 /* Otherwise, path2 isn't a child. */
589 svn_path_is_ancestor(const char *path1, const char *path2)
591 apr_size_t path1_len = strlen(path1);
593 /* If path1 is empty and path2 is not absoulte, then path1 is an ancestor. */
594 if (SVN_PATH_IS_EMPTY(path1))
595 return *path2 != '/';
597 /* If path1 is a prefix of path2, then:
598 - If path1 ends in a path separator,
599 - If the paths are of the same length
601 - path2 starts a new path component after the common prefix,
602 then path1 is an ancestor. */
603 if (strncmp(path1, path2, path1_len) == 0)
604 return path1[path1_len - 1] == '/'
605 || (path2[path1_len] == '/' || path2[path1_len] == '\0');
612 svn_path_decompose(const char *path,
617 apr_array_header_t *components =
618 apr_array_make(pool, 1, sizeof(const char *));
620 assert(svn_path_is_canonical_internal(path, pool));
622 if (SVN_PATH_IS_EMPTY(path))
623 return components; /* ### Should we return a "" component? */
625 /* If PATH is absolute, store the '/' as the first component. */
631 APR_ARRAY_PUSH(components, const char *)
632 = apr_pstrmemdup(pool, &dirsep, sizeof(dirsep));
636 if (path[i] == '\0') /* path is a single '/' */
642 if ((path[i] == '/') || (path[i] == '\0'))
644 if (SVN_PATH_IS_PLATFORM_EMPTY(path + oldi, i - oldi))
645 APR_ARRAY_PUSH(components, const char *) = SVN_EMPTY_PATH;
647 APR_ARRAY_PUSH(components, const char *)
648 = apr_pstrmemdup(pool, path + oldi, i - oldi);
651 oldi = i; /* skipping past the dirsep */
663 svn_path_compose(const apr_array_header_t *components,
666 apr_size_t *lengths = apr_palloc(pool, components->nelts*sizeof(*lengths));
667 apr_size_t max_length = components->nelts;
672 /* Get the length of each component so a total length can be
674 for (i = 0; i < components->nelts; ++i)
676 apr_size_t l = strlen(APR_ARRAY_IDX(components, i, const char *));
681 path = apr_palloc(pool, max_length + 1);
684 for (i = 0; i < components->nelts; ++i)
686 /* Append a '/' to the path. Handle the case with an absolute
687 path where a '/' appears in the first component. Only append
688 a '/' if the component is the second component that does not
689 follow a "/" first component; or it is the third or later
692 (i == 1 && strcmp("/", APR_ARRAY_IDX(components,
694 const char *)) != 0))
699 memcpy(p, APR_ARRAY_IDX(components, i, const char *), lengths[i]);
710 svn_path_is_single_path_component(const char *name)
712 assert(is_canonical(name, strlen(name)));
714 /* Can't be empty or `..' */
715 if (SVN_PATH_IS_EMPTY(name)
716 || (name[0] == '.' && name[1] == '.' && name[2] == '\0'))
719 /* Slashes are bad, m'kay... */
720 if (strchr(name, '/') != NULL)
729 svn_path_is_dotpath_present(const char *path)
733 /* The empty string does not have a dotpath */
737 /* Handle "." or a leading "./" */
738 if (path[0] == '.' && (path[1] == '\0' || path[1] == '/'))
741 /* Paths of length 1 (at this point) have no dotpath present. */
745 /* If any segment is "/./", then a dotpath is present. */
746 if (strstr(path, "/./") != NULL)
749 /* Does the path end in "/." ? */
751 return path[len - 2] == '/' && path[len - 1] == '.';
755 svn_path_is_backpath_present(const char *path)
759 /* 0 and 1-length paths do not have a backpath */
760 if (path[0] == '\0' || path[1] == '\0')
763 /* Handle ".." or a leading "../" */
764 if (path[0] == '.' && path[1] == '.' && (path[2] == '\0' || path[2] == '/'))
767 /* Paths of length 2 (at this point) have no backpath present. */
771 /* If any segment is "..", then a backpath is present. */
772 if (strstr(path, "/../") != NULL)
775 /* Does the path end in "/.." ? */
777 return path[len - 3] == '/' && path[len - 2] == '.' && path[len - 1] == '.';
783 /* Examine PATH as a potential URI, and return a substring of PATH
784 that immediately follows the (scheme):// portion of the URI, or
785 NULL if PATH doesn't appear to be a valid URI. The returned value
786 is not alloced -- it shares memory with PATH. */
788 skip_uri_scheme(const char *path)
792 /* A scheme is terminated by a : and cannot contain any /'s. */
793 for (j = 0; path[j] && path[j] != ':'; ++j)
797 if (j > 0 && path[j] == ':' && path[j+1] == '/' && path[j+2] == '/')
805 svn_path_is_url(const char *path)
807 /* ### This function is reaaaaaaaaaaaaaally stupid right now.
808 We're just going to look for:
810 (scheme)://(optional_stuff)
812 Where (scheme) has no ':' or '/' characters.
814 Someday it might be nice to have an actual URI parser here.
816 return skip_uri_scheme(path) != NULL;
821 /* Here is the BNF for path components in a URI. "pchar" is a
822 character in a path component.
824 pchar = unreserved | escaped |
825 ":" | "@" | "&" | "=" | "+" | "$" | ","
826 unreserved = alphanum | mark
827 mark = "-" | "_" | "." | "!" | "~" | "*" | "'" | "(" | ")"
829 Note that "escaped" doesn't really apply to what users can put in
830 their paths, so that really means the set of characters is:
832 alphanum | mark | ":" | "@" | "&" | "=" | "+" | "$" | ","
834 const char svn_uri__char_validity[256] = {
835 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
836 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
837 0, 1, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
838 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 0, 0,
841 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
842 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1,
843 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
844 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0,
847 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
848 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
849 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
850 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
853 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
854 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
855 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
856 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
861 svn_path_is_uri_safe(const char *path)
865 /* Skip the URI scheme. */
866 path = skip_uri_scheme(path);
868 /* No scheme? Get outta here. */
872 /* Skip to the first slash that's after the URI scheme. */
873 path = strchr(path, '/');
875 /* If there's no first slash, then there's only a host portion;
876 therefore there couldn't be any uri-unsafe characters after the
877 host... so return true. */
881 for (i = 0; path[i]; i++)
883 /* Allow '%XX' (where each X is a hex digit) */
886 if (svn_ctype_isxdigit(path[i + 1]) &&
887 svn_ctype_isxdigit(path[i + 2]))
894 else if (! svn_uri__char_validity[((unsigned char)path[i])])
904 /* URI-encode each character c in PATH for which TABLE[c] is 0.
905 If no encoding was needed, return PATH, else return a new string allocated
908 uri_escape(const char *path, const char table[], apr_pool_t *pool)
910 svn_stringbuf_t *retstr;
911 apr_size_t i, copied = 0;
914 retstr = svn_stringbuf_create_ensure(strlen(path), pool);
915 for (i = 0; path[i]; i++)
917 c = (unsigned char)path[i];
921 /* If we got here, we're looking at a character that isn't
922 supported by the (or at least, our) URI encoding scheme. We
923 need to escape this character. */
925 /* First things first, copy all the good stuff that we haven't
926 yet copied into our output buffer. */
928 svn_stringbuf_appendbytes(retstr, path + copied,
931 /* Now, write in our escaped character, consisting of the
932 '%' and two digits. We cast the C to unsigned char here because
933 the 'X' format character will be tempted to treat it as an unsigned
934 int...which causes problem when messing with 0x80-0xFF chars.
935 We also need space for a null as apr_snprintf will write one. */
936 svn_stringbuf_ensure(retstr, retstr->len + 4);
937 apr_snprintf(retstr->data + retstr->len, 4, "%%%02X", (unsigned char)c);
940 /* Finally, update our copy counter. */
944 /* If we didn't encode anything, we don't need to duplicate the string. */
945 if (retstr->len == 0)
948 /* Anything left to copy? */
950 svn_stringbuf_appendbytes(retstr, path + copied, i - copied);
952 /* retstr is null-terminated either by apr_snprintf or the svn_stringbuf
960 svn_path_uri_encode(const char *path, apr_pool_t *pool)
964 ret = uri_escape(path, svn_uri__char_validity, pool);
966 /* Our interface guarantees a copy. */
968 return apr_pstrdup(pool, path);
973 static const char iri_escape_chars[256] = {
974 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
975 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
976 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
977 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
978 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
979 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
980 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
981 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
984 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
985 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
986 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
987 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
988 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
989 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
990 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
991 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
995 svn_path_uri_from_iri(const char *iri, apr_pool_t *pool)
997 return uri_escape(iri, iri_escape_chars, pool);
1000 static const char uri_autoescape_chars[256] = {
1001 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1002 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1003 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1004 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1,
1007 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1008 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1,
1009 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1010 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 1,
1013 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1014 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1015 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1016 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1019 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1020 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1021 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1022 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1026 svn_path_uri_autoescape(const char *uri, apr_pool_t *pool)
1028 return uri_escape(uri, uri_autoescape_chars, pool);
1032 svn_path_uri_decode(const char *path, apr_pool_t *pool)
1034 svn_stringbuf_t *retstr;
1036 svn_boolean_t query_start = FALSE;
1038 /* avoid repeated realloc */
1039 retstr = svn_stringbuf_create_ensure(strlen(path) + 1, pool);
1042 for (i = 0; path[i]; i++)
1048 /* Mark the start of the query string, if it exists. */
1051 else if (c == '+' && query_start)
1053 /* Only do this if we are into the query string.
1054 * RFC 2396, section 3.3 */
1057 else if (c == '%' && svn_ctype_isxdigit(path[i + 1])
1058 && svn_ctype_isxdigit(path[i+2]))
1061 digitz[0] = path[++i];
1062 digitz[1] = path[++i];
1064 c = (char)(strtol(digitz, NULL, 16));
1067 retstr->data[retstr->len++] = c;
1070 /* Null-terminate this bad-boy. */
1071 retstr->data[retstr->len] = 0;
1073 return retstr->data;
1078 svn_path_url_add_component2(const char *url,
1079 const char *component,
1082 /* = svn_path_uri_encode() but without always copying */
1083 component = uri_escape(component, svn_uri__char_validity, pool);
1085 return svn_path_join_internal(url, component, pool);
1089 svn_path_get_absolute(const char **pabsolute,
1090 const char *relative,
1093 if (svn_path_is_url(relative))
1095 *pabsolute = apr_pstrdup(pool, relative);
1096 return SVN_NO_ERROR;
1099 return svn_dirent_get_absolute(pabsolute, relative, pool);
1103 #if !defined(WIN32) && !defined(DARWIN)
1104 /** Get APR's internal path encoding. */
1105 static svn_error_t *
1106 get_path_encoding(svn_boolean_t *path_is_utf8, apr_pool_t *pool)
1108 apr_status_t apr_err;
1111 apr_err = apr_filepath_encoding(&encoding_style, pool);
1113 return svn_error_wrap_apr(apr_err,
1114 _("Can't determine the native path encoding"));
1116 /* ### What to do about APR_FILEPATH_ENCODING_UNKNOWN?
1117 Well, for now we'll just punt to the svn_utf_ functions;
1118 those will at least do the ASCII-subset check. */
1119 *path_is_utf8 = (encoding_style == APR_FILEPATH_ENCODING_UTF8);
1120 return SVN_NO_ERROR;
1126 svn_path_cstring_from_utf8(const char **path_apr,
1127 const char *path_utf8,
1130 #if !defined(WIN32) && !defined(DARWIN)
1131 svn_boolean_t path_is_utf8;
1132 SVN_ERR(get_path_encoding(&path_is_utf8, pool));
1136 *path_apr = apr_pstrdup(pool, path_utf8);
1137 return SVN_NO_ERROR;
1139 #if !defined(WIN32) && !defined(DARWIN)
1141 return svn_utf_cstring_from_utf8(path_apr, path_utf8, pool);
1147 svn_path_cstring_to_utf8(const char **path_utf8,
1148 const char *path_apr,
1151 #if !defined(WIN32) && !defined(DARWIN)
1152 svn_boolean_t path_is_utf8;
1153 SVN_ERR(get_path_encoding(&path_is_utf8, pool));
1157 *path_utf8 = apr_pstrdup(pool, path_apr);
1158 return SVN_NO_ERROR;
1160 #if !defined(WIN32) && !defined(DARWIN)
1162 return svn_utf_cstring_to_utf8(path_utf8, path_apr, pool);
1168 svn_path_illegal_path_escape(const char *path, apr_pool_t *pool)
1170 svn_stringbuf_t *retstr;
1171 apr_size_t i, copied = 0;
1174 /* At least one control character:
1175 strlen - 1 (control) + \ + N + N + N + null . */
1176 retstr = svn_stringbuf_create_ensure(strlen(path) + 4, pool);
1177 for (i = 0; path[i]; i++)
1179 c = (unsigned char)path[i];
1180 if (! svn_ctype_iscntrl(c))
1183 /* If we got here, we're looking at a character that isn't
1184 supported by the (or at least, our) URI encoding scheme. We
1185 need to escape this character. */
1187 /* First things first, copy all the good stuff that we haven't
1188 yet copied into our output buffer. */
1190 svn_stringbuf_appendbytes(retstr, path + copied,
1193 /* Make sure buffer is big enough for '\' 'N' 'N' 'N' (and NUL) */
1194 svn_stringbuf_ensure(retstr, retstr->len + 5);
1195 /*### The backslash separator doesn't work too great with Windows,
1196 but it's what we'll use for consistency with invalid utf8
1197 formatting (until someone has a better idea) */
1198 apr_snprintf(retstr->data + retstr->len, 5, "\\%03o", (unsigned char)c);
1201 /* Finally, update our copy counter. */
1205 /* If we didn't encode anything, we don't need to duplicate the string. */
1206 if (retstr->len == 0)
1209 /* Anything left to copy? */
1211 svn_stringbuf_appendbytes(retstr, path + copied, i - copied);
1213 /* retstr is null-terminated either by apr_snprintf or the svn_stringbuf
1216 return retstr->data;
1220 svn_path_check_valid(const char *path, apr_pool_t *pool)
1224 for (c = path; *c; c++)
1226 if (svn_ctype_iscntrl(*c))
1228 return svn_error_createf(SVN_ERR_FS_PATH_SYNTAX, NULL,
1229 _("Invalid control character '0x%02x' in path '%s'"),
1231 svn_path_illegal_path_escape(svn_dirent_local_style(path, pool),
1236 return SVN_NO_ERROR;
1240 svn_path_splitext(const char **path_root,
1241 const char **path_ext,
1245 const char *last_dot, *last_slash;
1247 /* Easy out -- why do all the work when there's no way to report it? */
1248 if (! (path_root || path_ext))
1251 /* Do we even have a period in this thing? And if so, is there
1252 anything after it? We look for the "rightmost" period in the
1254 last_dot = strrchr(path, '.');
1255 if (last_dot && (*(last_dot + 1) != '\0'))
1257 /* If we have a period, we need to make sure it occurs in the
1258 final path component -- that there's no path separator
1259 between the last period and the end of the PATH -- otherwise,
1260 it doesn't count. Also, we want to make sure that our period
1261 isn't the first character of the last component. */
1262 last_slash = strrchr(path, '/');
1263 if ((last_slash && (last_dot > (last_slash + 1)))
1264 || ((! last_slash) && (last_dot > path)))
1267 *path_root = apr_pstrmemdup(pool, path,
1268 (last_dot - path + 1) * sizeof(*path));
1270 *path_ext = apr_pstrdup(pool, last_dot + 1);
1274 /* If we get here, we never found a suitable separator character, so
1275 there's no split. */
1277 *path_root = apr_pstrdup(pool, path);
1283 /* Repository relative URLs (^/). */
1286 svn_path_is_repos_relative_url(const char *path)
1288 return (0 == strncmp("^/", path, 2));
1292 svn_path_resolve_repos_relative_url(const char **absolute_url,
1293 const char *relative_url,
1294 const char *repos_root_url,
1297 if (! svn_path_is_repos_relative_url(relative_url))
1298 return svn_error_createf(SVN_ERR_BAD_URL, NULL,
1299 _("Improper relative URL '%s'"),
1302 /* No assumptions are made about the canonicalization of the input
1303 * arguments, it is presumed that the output will be canonicalized after
1304 * this function, which will remove any duplicate path separator.
1306 *absolute_url = apr_pstrcat(pool, repos_root_url, relative_url + 1,
1309 return SVN_NO_ERROR;