3 * ====================================================================
4 * Licensed to the Apache Software Foundation (ASF) under one
5 * or more contributor license agreements. See the NOTICE file
6 * distributed with this work for additional information
7 * regarding copyright ownership. The ASF licenses this file
8 * to you under the Apache License, Version 2.0 (the
9 * "License"); you may not use this file except in compliance
10 * with the License. You may obtain a copy of the License at
12 * http://www.apache.org/licenses/LICENSE-2.0
14 * Unless required by applicable law or agreed to in writing,
15 * software distributed under the License is distributed on an
16 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
17 * KIND, either express or implied. See the License for the
18 * specific language governing permissions and limitations
20 * ====================================================================
23 * @file svn_dirent_uri.h
24 * @brief A library to manipulate URIs, relative paths and directory entries.
26 * This library makes a clear distinction between several path formats:
28 * - a dirent is a path on (local) disc or a UNC path (Windows) in
29 * either relative or absolute format.
31 * "/foo/bar", "X:/temp", "//server/share", "A:/" (Windows only), ""
35 * - a uri, for our purposes, is a percent-encoded, absolute path
36 * (URI) that starts with a schema definition. In practice, these
37 * tend to look like URLs, but never carry query strings.
39 * "http://server", "file:///path/to/repos",
40 * "svn+ssh://user@host:123/My%20Stuff/file.doc"
42 * "file", "dir/file", "A:/dir", "/My%20Stuff/file.doc", ""
44 * - a relative path (relpath) is an unrooted path that can be joined
45 * to any other relative path, uri or dirent. A relative path is
46 * never rooted/prefixed by a '/'.
48 * "file", "dir/file", "dir/subdir/../file", ""
50 * "/file", "http://server/file"
52 * This distinction is needed because on Windows we have to handle some
53 * dirents and URIs differently. Since it's not possible to determine from
54 * the path string if it's a dirent or a URI, it's up to the API user to
55 * make this choice. See also issue #2028.
57 * All incoming and outgoing paths are non-NULL unless otherwise documented.
59 * All of these functions expect paths passed into them to be in canonical
62 * - @c svn_dirent_canonicalize()
63 * - @c svn_dirent_is_canonical()
64 * - @c svn_dirent_internal_style()
65 * - @c svn_relpath_canonicalize()
66 * - @c svn_relpath_is_canonical()
67 * - @c svn_relpath__internal_style()
68 * - @c svn_uri_canonicalize()
69 * - @c svn_uri_is_canonical()
71 * The Subversion codebase also recognizes some other classes of path:
73 * - A Subversion filesystem path (fspath) -- otherwise known as a
74 * path within a repository -- is a path relative to the root of
75 * the repository filesystem, that starts with a slash ("/"). The
76 * rules for a fspath are the same as for a relpath except for the
77 * leading '/'. A fspath never ends with '/' except when the whole
78 * path is just '/'. The fspath API is private (see
79 * private/svn_fspath.h).
81 * - A URL path (urlpath) is just the path part of a URL (the part
82 * that follows the schema, username, hostname, and port). These
83 * are also like relpaths, except that they have a leading slash
84 * (like fspaths) and are URI-encoded. The urlpath API is also
85 * private (see private/svn_fspath.h)
87 * "/svn/repos/trunk/README",
88 * "/svn/repos/!svn/bc/45/file%20with%20spaces.txt"
90 * So, which path API is appropriate for your use-case?
92 * - If your path refers to a local file, directory, symlink, etc. of
93 * the sort that you can examine and operate on with other software
94 * on your computer, it's a dirent.
96 * - If your path is a full URL -- with a schema, hostname (maybe),
97 * and path portion -- it's a uri.
99 * - If your path is relative, and is somewhat ambiguous unless it's
100 * joined to some other more explicit (possible absolute) base
101 * (such as a dirent or URL), it's a relpath.
103 * - If your path is the virtual path of a versioned object inside a
104 * Subversion repository, it could be one of two different types of
105 * paths. We'd prefer to use relpaths (relative to the root
106 * directory of the virtual repository filesystem) for that stuff,
107 * but some legacy code uses fspaths. You'll need to figure out if
108 * your code expects repository paths to have a leading '/' or not.
109 * If so, they are fspaths; otherwise they are relpaths.
111 * - If your path refers only to the path part of URL -- as if
112 * someone hacked off the initial schema and hostname portion --
113 * it's a urlpath. To date, the ra_dav modules are the only ones
114 * within Subversion that make use of urlpaths, and this is because
115 * WebDAV makes heavy use of that form of path specification.
117 * When translating between local paths (dirents) and uris code should
118 * always go via the relative path format, perhaps by truncating a
119 * parent portion from a path with svn_*_skip_ancestor(), or by
120 * converting portions to basenames and then joining to existing
123 * SECURITY WARNING: If a path that is received from an untrusted
124 * source -- such as from the network -- is converted to a dirent it
125 * should be tested with svn_dirent_is_under_root() before you can
126 * assume the path to be a safe local path.
128 * MEMORY ALLOCATION: A function documented as allocating the result
129 * in a pool may instead return a static string such as "." or "". If
130 * the result is equal to an input, it will duplicate the input.
133 #ifndef SVN_DIRENT_URI_H
134 #define SVN_DIRENT_URI_H
137 #include <apr_pools.h>
138 #include <apr_tables.h>
140 #include "svn_types.h"
144 #endif /* __cplusplus */
147 /** Convert @a dirent from the local style to the canonical internal style.
148 * "Local style" means native path separators and "." for the empty path.
150 * Allocate the result in @a result_pool.
155 svn_dirent_internal_style(const char *dirent,
156 apr_pool_t *result_pool);
158 /** Convert @a dirent from the internal style to the local style.
159 * "Local style" means native path separators and "." for the empty path.
160 * If the input is not canonical, the output may not be canonical.
162 * Allocate the result in @a result_pool.
167 svn_dirent_local_style(const char *dirent,
168 apr_pool_t *result_pool);
170 /** Convert @a relpath from the local style to the canonical internal style.
171 * "Local style" means native path separators and "." for the empty path.
173 * Allocate the result in @a result_pool.
178 svn_relpath__internal_style(const char *relpath,
179 apr_pool_t *result_pool);
182 /** Join a base dirent (@a base) with a component (@a component).
184 * If either @a base or @a component is the empty string, then the other
185 * argument will be copied and returned. If both are the empty string then
186 * empty string is returned.
188 * If the @a component is an absolute dirent, then it is copied and returned.
189 * The platform specific rules for joining paths are used to join the components.
191 * This function is NOT appropriate for native (local) file
192 * dirents. Only for "internal" canonicalized dirents, since it uses '/'
195 * Allocate the result in @a result_pool.
200 svn_dirent_join(const char *base,
201 const char *component,
202 apr_pool_t *result_pool);
204 /** Join multiple components onto a @a base dirent. The components are
205 * terminated by a @c SVN_VA_NULL.
207 * If any component is the empty string, it will be ignored.
209 * If any component is an absolute dirent, then it resets the base and
210 * further components will be appended to it.
212 * See svn_dirent_join() for further notes about joining dirents.
214 * Allocate the result in @a result_pool.
219 svn_dirent_join_many(apr_pool_t *result_pool,
221 ...) SVN_NEEDS_SENTINEL_NULL;
223 /** Join a base relpath (@a base) with a component (@a component).
224 * @a component need not be a single component.
226 * If either @a base or @a component is the empty path, then the other
227 * argument will be copied and returned. If both are the empty path the
228 * empty path is returned.
230 * Allocate the result in @a result_pool.
235 svn_relpath_join(const char *base,
236 const char *component,
237 apr_pool_t *result_pool);
239 /** Gets the name of the specified canonicalized @a dirent as it is known
240 * within its parent directory. If the @a dirent is root, return "". The
241 * returned value will not have slashes in it.
243 * Example: svn_dirent_basename("/foo/bar") -> "bar"
245 * If @a result_pool is NULL, return a pointer to the basename in @a dirent,
246 * otherwise allocate the result in @a result_pool.
248 * @note If an empty string is passed, then an empty string will be returned.
253 svn_dirent_basename(const char *dirent,
254 apr_pool_t *result_pool);
256 /** Get the dirname of the specified canonicalized @a dirent, defined as
257 * the dirent with its basename removed.
259 * If @a dirent is root ("/", "X:/", "//server/share/") or "", it is returned
262 * Allocate the result in @a result_pool.
267 svn_dirent_dirname(const char *dirent,
268 apr_pool_t *result_pool);
270 /** Divide the canonicalized @a dirent into @a *dirpath and @a *base_name.
272 * If @a dirpath or @a base_name is NULL, then don't set that one.
274 * Either @a dirpath or @a base_name may be @a dirent's own address, but they
275 * may not both be the same address, or the results are undefined.
277 * If @a dirent has two or more components, the separator between @a dirpath
278 * and @a base_name is not included in either of the new names.
281 * - <pre>"/foo/bar/baz" ==> "/foo/bar" and "baz"</pre>
282 * - <pre>"/bar" ==> "/" and "bar"</pre>
283 * - <pre>"/" ==> "/" and ""</pre>
284 * - <pre>"bar" ==> "" and "bar"</pre>
285 * - <pre>"" ==> "" and ""</pre>
286 * Windows: - <pre>"X:/" ==> "X:/" and ""</pre>
287 * - <pre>"X:/foo" ==> "X:/" and "foo"</pre>
288 * - <pre>"X:foo" ==> "X:" and "foo"</pre>
289 * Posix: - <pre>"X:foo" ==> "" and "X:foo"</pre>
291 * Allocate the results in @a result_pool.
296 svn_dirent_split(const char **dirpath,
297 const char **base_name,
299 apr_pool_t *result_pool);
301 /** Divide the canonicalized @a relpath into @a *dirpath and @a *base_name.
303 * If @a dirpath or @a base_name is NULL, then don't set that one.
305 * Either @a dirpath or @a base_name may be @a relpaths's own address, but
306 * they may not both be the same address, or the results are undefined.
308 * If @a relpath has two or more components, the separator between @a dirpath
309 * and @a base_name is not included in either of the new names.
312 * - <pre>"foo/bar/baz" ==> "foo/bar" and "baz"</pre>
313 * - <pre>"bar" ==> "" and "bar"</pre>
314 * - <pre>"" ==> "" and ""</pre>
316 * Allocate the results in @a result_pool.
321 svn_relpath_split(const char **dirpath,
322 const char **base_name,
324 apr_pool_t *result_pool);
326 /** Get the basename of the specified canonicalized @a relpath. The
327 * basename is defined as the last component of the relpath. If the @a
328 * relpath has only one component then that is returned. The returned
329 * value will have no slashes in it.
331 * Example: svn_relpath_basename("/trunk/foo/bar") -> "bar"
333 * If @a result_pool is NULL, return a pointer to the basename in @a relpath,
334 * otherwise allocate the result in @a result_pool.
336 * @note If an empty string is passed, then an empty string will be returned.
341 svn_relpath_basename(const char *relpath,
342 apr_pool_t *result_pool);
344 /** Get the dirname of the specified canonicalized @a relpath, defined as
345 * the relpath with its basename removed.
347 * If @a relpath is empty, "" is returned.
349 * Allocate the result in @a result_pool.
354 svn_relpath_dirname(const char *relpath,
355 apr_pool_t *result_pool);
357 /** Return a maximum of @a max_components components of @a relpath. This is
358 * an efficient way of calling svn_relpath_dirname() multiple times until only
359 * a specific number of components is left.
361 * Allocate the result in @a result_pool (or statically in case of 0)
366 svn_relpath_prefix(const char *relpath,
368 apr_pool_t *result_pool);
371 /** Divide the canonicalized @a uri into a uri @a *dirpath and a
372 * (URI-decoded) relpath @a *base_name.
374 * If @a dirpath or @a base_name is NULL, then don't set that one.
376 * Either @a dirpath or @a base_name may be @a uri's own address, but they
377 * may not both be the same address, or the results are undefined.
379 * If @a uri has two or more components, the separator between @a dirpath
380 * and @a base_name is not included in either of the new names.
383 * - <pre>"http://server/foo/bar" ==> "http://server/foo" and "bar"</pre>
385 * Allocate the result in @a result_pool.
390 svn_uri_split(const char **dirpath,
391 const char **base_name,
393 apr_pool_t *result_pool);
395 /** Get the (URI-decoded) basename of the specified canonicalized @a
396 * uri. The basename is defined as the last component of the uri. If
397 * the @a uri is root, return "". The returned value will have no
400 * Example: svn_uri_basename("http://server/foo/bar") -> "bar"
402 * Allocate the result in @a result_pool.
407 svn_uri_basename(const char *uri,
408 apr_pool_t *result_pool);
410 /** Get the dirname of the specified canonicalized @a uri, defined as
411 * the uri with its basename removed.
413 * If @a uri is root (e.g. "http://server"), it is returned
416 * Allocate the result in @a result_pool.
421 svn_uri_dirname(const char *uri,
422 apr_pool_t *result_pool);
424 /** Return TRUE if @a dirent is considered absolute on the platform at
425 * hand. E.g. '/foo' on Posix platforms or 'X:/foo', '//server/share/foo'
431 svn_dirent_is_absolute(const char *dirent);
433 /** Return TRUE if @a dirent is considered a root directory on the platform
437 * On Windows: '/', 'X:/', '//server/share', 'X:'
439 * Note that on Windows '/' and 'X:' are roots, but paths starting with this
440 * root are not absolute.
445 svn_dirent_is_root(const char *dirent,
448 /** Return TRUE if @a uri is a root URL (e.g., "http://server").
453 svn_uri_is_root(const char *uri,
456 /** Return a new dirent like @a dirent, but transformed such that some types
457 * of dirent specification redundancies are removed.
460 * - collapsing redundant "/./" elements
461 * - removing multiple adjacent separator characters
462 * - removing trailing separator characters
463 * - converting the server name of a UNC path to lower case (on Windows)
464 * - converting a drive letter to upper case (on Windows)
466 * and possibly other semantically inoperative transformations.
468 * Allocate the result in @a result_pool.
473 svn_dirent_canonicalize(const char *dirent,
474 apr_pool_t *result_pool);
477 /** Return a new relpath like @a relpath, but transformed such that some types
478 * of relpath specification redundancies are removed.
481 * - collapsing redundant "/./" elements
482 * - removing multiple adjacent separator characters
483 * - removing trailing separator characters
485 * and possibly other semantically inoperative transformations.
487 * Allocate the result in @a result_pool.
492 svn_relpath_canonicalize(const char *relpath,
493 apr_pool_t *result_pool);
496 /** Return a new uri like @a uri, but transformed such that some types
497 * of uri specification redundancies are removed.
500 * - collapsing redundant "/./" elements
501 * - removing multiple adjacent separator characters
502 * - removing trailing separator characters
503 * - normalizing the escaping of the path component by unescaping
504 * characters that don't need escaping and escaping characters that do
505 * need escaping but weren't
506 * - removing the port number if it is the default port number (80 for
507 * http, 443 for https, 3690 for svn)
509 * and possibly other semantically inoperative transformations.
511 * Allocate the result in @a result_pool.
516 svn_uri_canonicalize(const char *uri,
517 apr_pool_t *result_pool);
519 /** Return @c TRUE iff @a dirent is canonical.
521 * Use @a scratch_pool for temporary allocations.
523 * @note The test for canonicalization is currently defined as
524 * "looks exactly the same as @c svn_dirent_canonicalize() would make
527 * @see svn_dirent_canonicalize()
531 svn_dirent_is_canonical(const char *dirent,
532 apr_pool_t *scratch_pool);
534 /** Return @c TRUE iff @a relpath is canonical.
536 * @see svn_relpath_canonicalize()
540 svn_relpath_is_canonical(const char *relpath);
542 /** Return @c TRUE iff @a uri is canonical.
544 * Use @a scratch_pool for temporary allocations.
546 * @see svn_uri_canonicalize()
550 svn_uri_is_canonical(const char *uri,
551 apr_pool_t *scratch_pool);
553 /** Return the longest common dirent shared by two canonicalized dirents,
554 * @a dirent1 and @a dirent2. If there's no common ancestor, return the
557 * Allocate the result in @a result_pool.
562 svn_dirent_get_longest_ancestor(const char *dirent1,
564 apr_pool_t *result_pool);
566 /** Return the longest common path shared by two relative paths,
567 * @a relpath1 and @a relpath2. If there's no common ancestor, return the
570 * Allocate the result in @a result_pool.
575 svn_relpath_get_longest_ancestor(const char *relpath1,
576 const char *relpath2,
577 apr_pool_t *result_pool);
579 /** Return the longest common path shared by two canonicalized uris,
580 * @a uri1 and @a uri2. If there's no common ancestor, return the
581 * empty path. In order for two URLs to have a common ancestor, they
582 * must (a) have the same protocol (since two URLs with the same path
583 * but different protocols may point at completely different
584 * resources), and (b) share a common ancestor in their path
585 * component, i.e. 'protocol://' is not a sufficient ancestor.
587 * Allocate the result in @a result_pool.
592 svn_uri_get_longest_ancestor(const char *uri1,
594 apr_pool_t *result_pool);
596 /** Convert @a relative canonicalized dirent to an absolute dirent and
597 * return the results in @a *pabsolute.
598 * Raise SVN_ERR_BAD_FILENAME if the absolute dirent cannot be determined.
600 * Allocate the result in @a result_pool.
605 svn_dirent_get_absolute(const char **pabsolute,
606 const char *relative,
607 apr_pool_t *result_pool);
609 /** Similar to svn_dirent_skip_ancestor(), except that if @a child_dirent is
610 * the same as @a parent_dirent, it is not considered a child, so the result
611 * is @c NULL; an empty string is never returned.
613 * If @a result_pool is NULL, return a pointer into @a child_dirent, otherwise
614 * allocate the result in @a result_pool.
616 * ### TODO: Deprecate, as the semantics are trivially
617 * obtainable from *_skip_ancestor().
622 svn_dirent_is_child(const char *parent_dirent,
623 const char *child_dirent,
624 apr_pool_t *result_pool);
626 /** Return TRUE if @a parent_dirent is an ancestor of @a child_dirent or
627 * the dirents are equal, and FALSE otherwise.
629 * ### TODO: Deprecate, as the semantics are trivially
630 * obtainable from *_skip_ancestor().
635 svn_dirent_is_ancestor(const char *parent_dirent,
636 const char *child_dirent);
638 /** Return TRUE if @a parent_uri is an ancestor of @a child_uri or
639 * the uris are equal, and FALSE otherwise.
642 svn_uri__is_ancestor(const char *parent_uri,
643 const char *child_uri);
646 /** Return the relative path part of @a child_dirent that is below
647 * @a parent_dirent, or just "" if @a parent_dirent is equal to
648 * @a child_dirent. If @a child_dirent is not below or equal to
649 * @a parent_dirent, return NULL.
651 * If one of @a parent_dirent and @a child_dirent is absolute and
652 * the other relative, return NULL.
657 svn_dirent_skip_ancestor(const char *parent_dirent,
658 const char *child_dirent);
660 /** Return the relative path part of @a child_relpath that is below
661 * @a parent_relpath, or just "" if @a parent_relpath is equal to
662 * @a child_relpath. If @a child_relpath is not below @a parent_relpath,
668 svn_relpath_skip_ancestor(const char *parent_relpath,
669 const char *child_relpath);
671 /** Return the URI-decoded relative path of @a child_uri that is below
672 * @a parent_uri, or just "" if @a parent_uri is equal to @a child_uri. If
673 * @a child_uri is not below @a parent_uri, return NULL.
675 * Allocate the result in @a result_pool.
680 svn_uri_skip_ancestor(const char *parent_uri,
681 const char *child_uri,
682 apr_pool_t *result_pool);
684 /** Find the common prefix of the canonicalized dirents in @a targets
685 * (an array of <tt>const char *</tt>'s), and remove redundant dirents if @a
686 * remove_redundancies is TRUE.
688 * - Set @a *pcommon to the absolute dirent of the dirent common to
689 * all of the targets. If the targets have no common prefix (e.g.
690 * "C:/file" and "D:/file" on Windows), set @a *pcommon to the empty
693 * - If @a pcondensed_targets is non-NULL, set @a *pcondensed_targets
694 * to an array of targets relative to @a *pcommon, and if
695 * @a remove_redundancies is TRUE, omit any dirents that are
696 * descendants of another dirent in @a targets. If *pcommon
697 * is empty, @a *pcondensed_targets will contain absolute dirents;
698 * redundancies can still be removed. If @a pcondensed_targets is NULL,
701 * Else if there is exactly one target, then
703 * - Set @a *pcommon to that target, and
705 * - If @a pcondensed_targets is non-NULL, set @a *pcondensed_targets
706 * to an array containing zero elements. Else if
707 * @a pcondensed_targets is NULL, leave it alone.
709 * If there are no items in @a targets, set @a *pcommon and (if
710 * applicable) @a *pcondensed_targets to @c NULL.
712 * Allocate the results in @a result_pool. Use @a scratch_pool for
713 * temporary allocations.
718 svn_dirent_condense_targets(const char **pcommon,
719 apr_array_header_t **pcondensed_targets,
720 const apr_array_header_t *targets,
721 svn_boolean_t remove_redundancies,
722 apr_pool_t *result_pool,
723 apr_pool_t *scratch_pool);
725 /** Find the common prefix of the canonicalized uris in @a targets
726 * (an array of <tt>const char *</tt>'s), and remove redundant uris if @a
727 * remove_redundancies is TRUE.
729 * - Set @a *pcommon to the common base uri of all of the targets.
730 * If the targets have no common prefix (e.g. "http://srv1/file"
731 * and "http://srv2/file"), set @a *pcommon to the empty
734 * - If @a pcondensed_targets is non-NULL, set @a *pcondensed_targets
735 * to an array of URI-decoded targets relative to @a *pcommon, and
736 * if @a remove_redundancies is TRUE, omit any uris that are
737 * descendants of another uri in @a targets. If *pcommon is
738 * empty, @a *pcondensed_targets will contain absolute uris;
739 * redundancies can still be removed. If @a pcondensed_targets is
740 * NULL, leave it alone.
742 * Else if there is exactly one target, then
744 * - Set @a *pcommon to that target, and
746 * - If @a pcondensed_targets is non-NULL, set @a *pcondensed_targets
747 * to an array containing zero elements. Else if
748 * @a pcondensed_targets is NULL, leave it alone.
750 * If there are no items in @a targets, set @a *pcommon and (if
751 * applicable) @a *pcondensed_targets to @c NULL.
753 * Allocate the results in @a result_pool. Use @a scratch_pool for
754 * temporary allocations.
759 svn_uri_condense_targets(const char **pcommon,
760 apr_array_header_t **pcondensed_targets,
761 const apr_array_header_t *targets,
762 svn_boolean_t remove_redundancies,
763 apr_pool_t *result_pool,
764 apr_pool_t *scratch_pool);
766 /** Join @a path onto @a base_path, checking that @a path does not attempt
767 * to traverse above @a base_path. If @a path or any ".." component within
768 * it resolves to a path above @a base_path, or if @a path is an absolute
769 * path, then set @a *under_root to @c FALSE. Otherwise, set @a *under_root
770 * to @c TRUE and, if @a result_path is not @c NULL, set @a *result_path to
771 * the resulting path.
773 * @a path need not be canonical. @a base_path must be canonical and
774 * @a *result_path will be canonical.
776 * Allocate the result in @a result_pool.
778 * @note Use of this function is strongly encouraged. Do not roll your own.
779 * (http://cve.mitre.org/cgi-bin/cvename.cgi?name=2007-3846)
784 svn_dirent_is_under_root(svn_boolean_t *under_root,
785 const char **result_path,
786 const char *base_path,
788 apr_pool_t *result_pool);
790 /** Set @a *dirent to the path corresponding to the file:// URL @a url, using
791 * the platform-specific file:// rules.
793 * Allocate the result in @a result_pool.
798 svn_uri_get_dirent_from_file_url(const char **dirent,
800 apr_pool_t *result_pool);
802 /** Set @a *url to a file:// URL, corresponding to @a dirent using the
803 * platform specific dirent and file:// rules.
805 * Allocate the result in @a result_pool.
810 svn_uri_get_file_url_from_dirent(const char **url,
812 apr_pool_t *result_pool);
816 #endif /* __cplusplus */
818 #endif /* SVN_DIRENT_URI_H */