3 * ====================================================================
4 * Licensed to the Apache Software Foundation (ASF) under one
5 * or more contributor license agreements. See the NOTICE file
6 * distributed with this work for additional information
7 * regarding copyright ownership. The ASF licenses this file
8 * to you under the Apache License, Version 2.0 (the
9 * "License"); you may not use this file except in compliance
10 * with the License. You may obtain a copy of the License at
12 * http://www.apache.org/licenses/LICENSE-2.0
14 * Unless required by applicable law or agreed to in writing,
15 * software distributed under the License is distributed on an
16 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
17 * KIND, either express or implied. See the License for the
18 * specific language governing permissions and limitations
20 * ====================================================================
23 * @file svn_dirent_uri.h
24 * @brief A library to manipulate URIs, relative paths and directory entries.
26 * This library makes a clear distinction between several path formats:
28 * - a dirent is a path on (local) disc or a UNC path (Windows) in
29 * either relative or absolute format.
31 * "/foo/bar", "X:/temp", "//server/share", "A:/" (Windows only), ""
35 * - a uri, for our purposes, is a percent-encoded, absolute path
36 * (URI) that starts with a schema definition. In practice, these
37 * tend to look like URLs, but never carry query strings.
39 * "http://server", "file:///path/to/repos",
40 * "svn+ssh://user@host:123/My%20Stuff/file.doc"
42 * "file", "dir/file", "A:/dir", "/My%20Stuff/file.doc", ""
44 * - a relative path (relpath) is an unrooted path that can be joined
45 * to any other relative path, uri or dirent. A relative path is
46 * never rooted/prefixed by a '/'.
48 * "file", "dir/file", "dir/subdir/../file", ""
50 * "/file", "http://server/file"
52 * This distinction is needed because on Windows we have to handle some
53 * dirents and URIs differently. Since it's not possible to determine from
54 * the path string if it's a dirent or a URI, it's up to the API user to
55 * make this choice. See also issue #2028.
57 * All incoming and outgoing paths are non-NULL unless otherwise documented.
59 * All of these functions expect paths passed into them to be in canonical
62 * - @c svn_dirent_canonicalize()
63 * - @c svn_dirent_is_canonical()
64 * - @c svn_dirent_internal_style()
65 * - @c svn_relpath_canonicalize()
66 * - @c svn_relpath_is_canonical()
67 * - @c svn_relpath__internal_style()
68 * - @c svn_uri_canonicalize()
69 * - @c svn_uri_is_canonical()
71 * The Subversion codebase also recognizes some other classes of path:
73 * - A Subversion filesystem path (fspath) -- otherwise known as a
74 * path within a repository -- is a path relative to the root of
75 * the repository filesystem, that starts with a slash ("/"). The
76 * rules for a fspath are the same as for a relpath except for the
77 * leading '/'. A fspath never ends with '/' except when the whole
78 * path is just '/'. The fspath API is private (see
79 * private/svn_fspath.h).
81 * - A URL path (urlpath) is just the path part of a URL (the part
82 * that follows the schema, username, hostname, and port). These
83 * are also like relpaths, except that they have a leading slash
84 * (like fspaths) and are URI-encoded. The urlpath API is also
85 * private (see private/svn_fspath.h)
87 * "/svn/repos/trunk/README",
88 * "/svn/repos/!svn/bc/45/file%20with%20spaces.txt"
90 * So, which path API is appropriate for your use-case?
92 * - If your path refers to a local file, directory, symlink, etc. of
93 * the sort that you can examine and operate on with other software
94 * on your computer, it's a dirent.
96 * - If your path is a full URL -- with a schema, hostname (maybe),
97 * and path portion -- it's a uri.
99 * - If your path is relative, and is somewhat ambiguous unless it's
100 * joined to some other more explicit (possible absolute) base
101 * (such as a dirent or URL), it's a relpath.
103 * - If your path is the virtual path of a versioned object inside a
104 * Subversion repository, it could be one of two different types of
105 * paths. We'd prefer to use relpaths (relative to the root
106 * directory of the virtual repository filesystem) for that stuff,
107 * but some legacy code uses fspaths. You'll need to figure out if
108 * your code expects repository paths to have a leading '/' or not.
109 * If so, they are fspaths; otherwise they are relpaths.
111 * - If your path refers only to the path part of URL -- as if
112 * someone hacked off the initial schema and hostname portion --
113 * it's a urlpath. To date, the ra_dav modules are the only ones
114 * within Subversion that make use of urlpaths, and this is because
115 * WebDAV makes heavy use of that form of path specification.
117 * When translating between local paths (dirents) and uris code should
118 * always go via the relative path format, perhaps by truncating a
119 * parent portion from a path with svn_*_skip_ancestor(), or by
120 * converting portions to basenames and then joining to existing
123 * SECURITY WARNING: If a path that is received from an untrusted
124 * source -- such as from the network -- is converted to a dirent it
125 * should be tested with svn_dirent_is_under_root() before you can
126 * assume the path to be a safe local path.
128 * MEMORY ALLOCATION: A function documented as allocating the result
129 * in a pool may instead return a static string such as "." or "". If
130 * the result is equal to an input, it will duplicate the input.
133 #ifndef SVN_DIRENT_URI_H
134 #define SVN_DIRENT_URI_H
137 #include <apr_pools.h>
138 #include <apr_tables.h>
140 #include "svn_types.h"
144 #endif /* __cplusplus */
147 /** Convert @a dirent from the local style to the canonical internal style.
148 * "Local style" means native path separators and "." for the empty path.
150 * Allocate the result in @a result_pool.
155 svn_dirent_internal_style(const char *dirent,
156 apr_pool_t *result_pool);
158 /** Convert @a dirent from the internal style to the local style.
159 * "Local style" means native path separators and "." for the empty path.
160 * If the input is not canonical, the output may not be canonical.
162 * Allocate the result in @a result_pool.
167 svn_dirent_local_style(const char *dirent,
168 apr_pool_t *result_pool);
170 /** Convert @a relpath from the local style to the canonical internal style.
171 * "Local style" means native path separators and "." for the empty path.
173 * Allocate the result in @a result_pool.
178 svn_relpath__internal_style(const char *relpath,
179 apr_pool_t *result_pool);
182 /** Join a base dirent (@a base) with a component (@a component).
184 * If either @a base or @a component is the empty string, then the other
185 * argument will be copied and returned. If both are the empty string then
186 * empty string is returned.
188 * If the @a component is an absolute dirent, then it is copied and returned.
189 * The platform specific rules for joining paths are used to join the components.
191 * This function is NOT appropriate for native (local) file
192 * dirents. Only for "internal" canonicalized dirents, since it uses '/'
195 * Allocate the result in @a result_pool.
200 svn_dirent_join(const char *base,
201 const char *component,
202 apr_pool_t *result_pool);
204 /** Join multiple components onto a @a base dirent. The components are
205 * terminated by a @c NULL.
207 * If any component is the empty string, it will be ignored.
209 * If any component is an absolute dirent, then it resets the base and
210 * further components will be appended to it.
212 * See svn_dirent_join() for further notes about joining dirents.
214 * Allocate the result in @a result_pool.
219 svn_dirent_join_many(apr_pool_t *result_pool,
223 /** Join a base relpath (@a base) with a component (@a component).
224 * @a component need not be a single component.
226 * If either @a base or @a component is the empty path, then the other
227 * argument will be copied and returned. If both are the empty path the
228 * empty path is returned.
230 * Allocate the result in @a result_pool.
235 svn_relpath_join(const char *base,
236 const char *component,
237 apr_pool_t *result_pool);
239 /** Gets the name of the specified canonicalized @a dirent as it is known
240 * within its parent directory. If the @a dirent is root, return "". The
241 * returned value will not have slashes in it.
243 * Example: svn_dirent_basename("/foo/bar") -> "bar"
245 * If @a result_pool is NULL, return a pointer to the basename in @a dirent,
246 * otherwise allocate the result in @a result_pool.
248 * @note If an empty string is passed, then an empty string will be returned.
253 svn_dirent_basename(const char *dirent,
254 apr_pool_t *result_pool);
256 /** Get the dirname of the specified canonicalized @a dirent, defined as
257 * the dirent with its basename removed.
259 * If @a dirent is root ("/", "X:/", "//server/share/") or "", it is returned
262 * Allocate the result in @a result_pool.
267 svn_dirent_dirname(const char *dirent,
268 apr_pool_t *result_pool);
270 /** Divide the canonicalized @a dirent into @a *dirpath and @a *base_name.
272 * If @a dirpath or @a base_name is NULL, then don't set that one.
274 * Either @a dirpath or @a base_name may be @a dirent's own address, but they
275 * may not both be the same address, or the results are undefined.
277 * If @a dirent has two or more components, the separator between @a dirpath
278 * and @a base_name is not included in either of the new names.
281 * - <pre>"/foo/bar/baz" ==> "/foo/bar" and "baz"</pre>
282 * - <pre>"/bar" ==> "/" and "bar"</pre>
283 * - <pre>"/" ==> "/" and ""</pre>
284 * - <pre>"bar" ==> "" and "bar"</pre>
285 * - <pre>"" ==> "" and ""</pre>
286 * Windows: - <pre>"X:/" ==> "X:/" and ""</pre>
287 * - <pre>"X:/foo" ==> "X:/" and "foo"</pre>
288 * - <pre>"X:foo" ==> "X:" and "foo"</pre>
289 * Posix: - <pre>"X:foo" ==> "" and "X:foo"</pre>
291 * Allocate the results in @a result_pool.
296 svn_dirent_split(const char **dirpath,
297 const char **base_name,
299 apr_pool_t *result_pool);
301 /** Divide the canonicalized @a relpath into @a *dirpath and @a *base_name.
303 * If @a dirpath or @a base_name is NULL, then don't set that one.
305 * Either @a dirpath or @a base_name may be @a relpaths's own address, but
306 * they may not both be the same address, or the results are undefined.
308 * If @a relpath has two or more components, the separator between @a dirpath
309 * and @a base_name is not included in either of the new names.
312 * - <pre>"foo/bar/baz" ==> "foo/bar" and "baz"</pre>
313 * - <pre>"bar" ==> "" and "bar"</pre>
314 * - <pre>"" ==> "" and ""</pre>
316 * Allocate the results in @a result_pool.
321 svn_relpath_split(const char **dirpath,
322 const char **base_name,
324 apr_pool_t *result_pool);
326 /** Get the basename of the specified canonicalized @a relpath. The
327 * basename is defined as the last component of the relpath. If the @a
328 * relpath has only one component then that is returned. The returned
329 * value will have no slashes in it.
331 * Example: svn_relpath_basename("/trunk/foo/bar") -> "bar"
333 * If @a result_pool is NULL, return a pointer to the basename in @a relpath,
334 * otherwise allocate the result in @a result_pool.
336 * @note If an empty string is passed, then an empty string will be returned.
341 svn_relpath_basename(const char *relpath,
342 apr_pool_t *result_pool);
344 /** Get the dirname of the specified canonicalized @a relpath, defined as
345 * the relpath with its basename removed.
347 * If @a relpath is empty, "" is returned.
349 * Allocate the result in @a result_pool.
354 svn_relpath_dirname(const char *relpath,
355 apr_pool_t *result_pool);
358 /** Divide the canonicalized @a uri into a uri @a *dirpath and a
359 * (URI-decoded) relpath @a *base_name.
361 * If @a dirpath or @a base_name is NULL, then don't set that one.
363 * Either @a dirpath or @a base_name may be @a uri's own address, but they
364 * may not both be the same address, or the results are undefined.
366 * If @a uri has two or more components, the separator between @a dirpath
367 * and @a base_name is not included in either of the new names.
370 * - <pre>"http://server/foo/bar" ==> "http://server/foo" and "bar"</pre>
372 * Allocate the result in @a result_pool.
377 svn_uri_split(const char **dirpath,
378 const char **base_name,
380 apr_pool_t *result_pool);
382 /** Get the (URI-decoded) basename of the specified canonicalized @a
383 * uri. The basename is defined as the last component of the uri. If
384 * the @a uri is root, return "". The returned value will have no
387 * Example: svn_uri_basename("http://server/foo/bar") -> "bar"
389 * Allocate the result in @a result_pool.
394 svn_uri_basename(const char *uri,
395 apr_pool_t *result_pool);
397 /** Get the dirname of the specified canonicalized @a uri, defined as
398 * the uri with its basename removed.
400 * If @a uri is root (e.g. "http://server"), it is returned
403 * Allocate the result in @a result_pool.
408 svn_uri_dirname(const char *uri,
409 apr_pool_t *result_pool);
411 /** Return TRUE if @a dirent is considered absolute on the platform at
412 * hand. E.g. '/foo' on Posix platforms or 'X:/foo', '//server/share/foo'
418 svn_dirent_is_absolute(const char *dirent);
420 /** Return TRUE if @a dirent is considered a root directory on the platform
424 * On Windows: '/', 'X:/', '//server/share', 'X:'
426 * Note that on Windows '/' and 'X:' are roots, but paths starting with this
427 * root are not absolute.
432 svn_dirent_is_root(const char *dirent,
435 /** Return TRUE if @a uri is a root URL (e.g., "http://server").
440 svn_uri_is_root(const char *uri,
443 /** Return a new dirent like @a dirent, but transformed such that some types
444 * of dirent specification redundancies are removed.
447 * - collapsing redundant "/./" elements
448 * - removing multiple adjacent separator characters
449 * - removing trailing separator characters
450 * - converting the server name of a UNC path to lower case (on Windows)
451 * - converting a drive letter to upper case (on Windows)
453 * and possibly other semantically inoperative transformations.
455 * Allocate the result in @a result_pool.
460 svn_dirent_canonicalize(const char *dirent,
461 apr_pool_t *result_pool);
464 /** Return a new relpath like @a relpath, but transformed such that some types
465 * of relpath specification redundancies are removed.
468 * - collapsing redundant "/./" elements
469 * - removing multiple adjacent separator characters
470 * - removing trailing separator characters
472 * and possibly other semantically inoperative transformations.
474 * Allocate the result in @a result_pool.
479 svn_relpath_canonicalize(const char *relpath,
480 apr_pool_t *result_pool);
483 /** Return a new uri like @a uri, but transformed such that some types
484 * of uri specification redundancies are removed.
487 * - collapsing redundant "/./" elements
488 * - removing multiple adjacent separator characters
489 * - removing trailing separator characters
490 * - normalizing the escaping of the path component by unescaping
491 * characters that don't need escaping and escaping characters that do
492 * need escaping but weren't
493 * - removing the port number if it is the default port number (80 for
494 * http, 443 for https, 3690 for svn)
496 * and possibly other semantically inoperative transformations.
498 * Allocate the result in @a result_pool.
503 svn_uri_canonicalize(const char *uri,
504 apr_pool_t *result_pool);
506 /** Return @c TRUE iff @a dirent is canonical.
508 * Use @a scratch_pool for temporary allocations.
510 * @note The test for canonicalization is currently defined as
511 * "looks exactly the same as @c svn_dirent_canonicalize() would make
514 * @see svn_dirent_canonicalize()
518 svn_dirent_is_canonical(const char *dirent,
519 apr_pool_t *scratch_pool);
521 /** Return @c TRUE iff @a relpath is canonical.
523 * @see svn_relpath_canonicalize()
527 svn_relpath_is_canonical(const char *relpath);
529 /** Return @c TRUE iff @a uri is canonical.
531 * Use @a scratch_pool for temporary allocations.
533 * @see svn_uri_canonicalize()
537 svn_uri_is_canonical(const char *uri,
538 apr_pool_t *scratch_pool);
540 /** Return the longest common dirent shared by two canonicalized dirents,
541 * @a dirent1 and @a dirent2. If there's no common ancestor, return the
544 * Allocate the result in @a result_pool.
549 svn_dirent_get_longest_ancestor(const char *dirent1,
551 apr_pool_t *result_pool);
553 /** Return the longest common path shared by two relative paths,
554 * @a relpath1 and @a relpath2. If there's no common ancestor, return the
557 * Allocate the result in @a result_pool.
562 svn_relpath_get_longest_ancestor(const char *relpath1,
563 const char *relpath2,
564 apr_pool_t *result_pool);
566 /** Return the longest common path shared by two canonicalized uris,
567 * @a uri1 and @a uri2. If there's no common ancestor, return the
568 * empty path. In order for two URLs to have a common ancestor, they
569 * must (a) have the same protocol (since two URLs with the same path
570 * but different protocols may point at completely different
571 * resources), and (b) share a common ancestor in their path
572 * component, i.e. 'protocol://' is not a sufficient ancestor.
574 * Allocate the result in @a result_pool.
579 svn_uri_get_longest_ancestor(const char *uri1,
581 apr_pool_t *result_pool);
583 /** Convert @a relative canonicalized dirent to an absolute dirent and
584 * return the results in @a *pabsolute.
585 * Raise SVN_ERR_BAD_FILENAME if the absolute dirent cannot be determined.
587 * Allocate the result in @a result_pool.
592 svn_dirent_get_absolute(const char **pabsolute,
593 const char *relative,
594 apr_pool_t *result_pool);
596 /** Similar to svn_dirent_skip_ancestor(), except that if @a child_dirent is
597 * the same as @a parent_dirent, it is not considered a child, so the result
598 * is @c NULL; an empty string is never returned.
600 * If @a result_pool is NULL, return a pointer into @a child_dirent, otherwise
601 * allocate the result in @a result_pool.
603 * ### TODO: Deprecate, as the semantics are trivially
604 * obtainable from *_skip_ancestor().
609 svn_dirent_is_child(const char *parent_dirent,
610 const char *child_dirent,
611 apr_pool_t *result_pool);
613 /** Return TRUE if @a parent_dirent is an ancestor of @a child_dirent or
614 * the dirents are equal, and FALSE otherwise.
616 * ### TODO: Deprecate, as the semantics are trivially
617 * obtainable from *_skip_ancestor().
622 svn_dirent_is_ancestor(const char *parent_dirent,
623 const char *child_dirent);
625 /** Return TRUE if @a parent_uri is an ancestor of @a child_uri or
626 * the uris are equal, and FALSE otherwise.
629 svn_uri__is_ancestor(const char *parent_uri,
630 const char *child_uri);
633 /** Return the relative path part of @a child_dirent that is below
634 * @a parent_dirent, or just "" if @a parent_dirent is equal to
635 * @a child_dirent. If @a child_dirent is not below or equal to
636 * @a parent_dirent, return NULL.
638 * If one of @a parent_dirent and @a child_dirent is absolute and
639 * the other relative, return NULL.
644 svn_dirent_skip_ancestor(const char *parent_dirent,
645 const char *child_dirent);
647 /** Return the relative path part of @a child_relpath that is below
648 * @a parent_relpath, or just "" if @a parent_relpath is equal to
649 * @a child_relpath. If @a child_relpath is not below or equal to
650 * @a parent_relpath, return NULL.
655 svn_relpath_skip_ancestor(const char *parent_relpath,
656 const char *child_relpath);
658 /** Return the URI-decoded relative path of @a child_uri that is below
659 * @a parent_uri, or just "" if @a parent_uri is equal to @a child_uri. If
660 * @a child_uri is not below or equal to @a parent_uri, return NULL.
662 * Allocate the result in @a result_pool.
667 svn_uri_skip_ancestor(const char *parent_uri,
668 const char *child_uri,
669 apr_pool_t *result_pool);
671 /** Find the common prefix of the canonicalized dirents in @a targets
672 * (an array of <tt>const char *</tt>'s), and remove redundant dirents if @a
673 * remove_redundancies is TRUE.
675 * - Set @a *pcommon to the absolute dirent of the dirent common to
676 * all of the targets. If the targets have no common prefix (e.g.
677 * "C:/file" and "D:/file" on Windows), set @a *pcommon to the empty
680 * - If @a pcondensed_targets is non-NULL, set @a *pcondensed_targets
681 * to an array of targets relative to @a *pcommon, and if
682 * @a remove_redundancies is TRUE, omit any dirents that are
683 * descendants of another dirent in @a targets. If *pcommon
684 * is empty, @a *pcondensed_targets will contain absolute dirents;
685 * redundancies can still be removed. If @a pcondensed_targets is NULL,
688 * Else if there is exactly one target, then
690 * - Set @a *pcommon to that target, and
692 * - If @a pcondensed_targets is non-NULL, set @a *pcondensed_targets
693 * to an array containing zero elements. Else if
694 * @a pcondensed_targets is NULL, leave it alone.
696 * If there are no items in @a targets, set @a *pcommon and (if
697 * applicable) @a *pcondensed_targets to @c NULL.
699 * Allocate the results in @a result_pool. Use @a scratch_pool for
700 * temporary allocations.
705 svn_dirent_condense_targets(const char **pcommon,
706 apr_array_header_t **pcondensed_targets,
707 const apr_array_header_t *targets,
708 svn_boolean_t remove_redundancies,
709 apr_pool_t *result_pool,
710 apr_pool_t *scratch_pool);
712 /** Find the common prefix of the canonicalized uris in @a targets
713 * (an array of <tt>const char *</tt>'s), and remove redundant uris if @a
714 * remove_redundancies is TRUE.
716 * - Set @a *pcommon to the common base uri of all of the targets.
717 * If the targets have no common prefix (e.g. "http://srv1/file"
718 * and "http://srv2/file"), set @a *pcommon to the empty
721 * - If @a pcondensed_targets is non-NULL, set @a *pcondensed_targets
722 * to an array of URI-decoded targets relative to @a *pcommon, and
723 * if @a remove_redundancies is TRUE, omit any uris that are
724 * descendants of another uri in @a targets. If *pcommon is
725 * empty, @a *pcondensed_targets will contain absolute uris;
726 * redundancies can still be removed. If @a pcondensed_targets is
727 * NULL, leave it alone.
729 * Else if there is exactly one target, then
731 * - Set @a *pcommon to that target, and
733 * - If @a pcondensed_targets is non-NULL, set @a *pcondensed_targets
734 * to an array containing zero elements. Else if
735 * @a pcondensed_targets is NULL, leave it alone.
737 * If there are no items in @a targets, set @a *pcommon and (if
738 * applicable) @a *pcondensed_targets to @c NULL.
740 * Allocate the results in @a result_pool. Use @a scratch_pool for
741 * temporary allocations.
746 svn_uri_condense_targets(const char **pcommon,
747 apr_array_header_t **pcondensed_targets,
748 const apr_array_header_t *targets,
749 svn_boolean_t remove_redundancies,
750 apr_pool_t *result_pool,
751 apr_pool_t *scratch_pool);
753 /** Join @a path onto @a base_path, checking that @a path does not attempt
754 * to traverse above @a base_path. If @a path or any ".." component within
755 * it resolves to a path above @a base_path, or if @a path is an absolute
756 * path, then set @a *under_root to @c FALSE. Otherwise, set @a *under_root
757 * to @c TRUE and, if @a result_path is not @c NULL, set @a *result_path to
758 * the resulting path.
760 * @a path need not be canonical. @a base_path must be canonical and
761 * @a *result_path will be canonical.
763 * Allocate the result in @a result_pool.
765 * @note Use of this function is strongly encouraged. Do not roll your own.
766 * (http://cve.mitre.org/cgi-bin/cvename.cgi?name=2007-3846)
771 svn_dirent_is_under_root(svn_boolean_t *under_root,
772 const char **result_path,
773 const char *base_path,
775 apr_pool_t *result_pool);
777 /** Set @a *dirent to the path corresponding to the file:// URL @a url, using
778 * the platform-specific file:// rules.
780 * Allocate the result in @a result_pool.
785 svn_uri_get_dirent_from_file_url(const char **dirent,
787 apr_pool_t *result_pool);
789 /** Set @a *url to a file:// URL, corresponding to @a dirent using the
790 * platform specific dirent and file:// rules.
792 * Allocate the result in @a result_pool.
797 svn_uri_get_file_url_from_dirent(const char **url,
799 apr_pool_t *result_pool);
803 #endif /* __cplusplus */
805 #endif /* SVN_DIRENT_URI_H */