2 * utf.c: UTF-8 conversion routines
4 * ====================================================================
5 * Licensed to the Apache Software Foundation (ASF) under one
6 * or more contributor license agreements. See the NOTICE file
7 * distributed with this work for additional information
8 * regarding copyright ownership. The ASF licenses this file
9 * to you under the Apache License, Version 2.0 (the
10 * "License"); you may not use this file except in compliance
11 * with the License. You may obtain a copy of the License at
13 * http://www.apache.org/licenses/LICENSE-2.0
15 * Unless required by applicable law or agreed to in writing,
16 * software distributed under the License is distributed on an
17 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
18 * KIND, either express or implied. See the License for the
19 * specific language governing permissions and limitations
21 * ====================================================================
30 #include <apr_strings.h>
32 #include <apr_xlate.h>
33 #include <apr_atomic.h>
36 #include "svn_string.h"
37 #include "svn_error.h"
38 #include "svn_pools.h"
39 #include "svn_ctype.h"
41 #include "svn_private_config.h"
42 #include "win32_xlate.h"
44 #include "private/svn_utf_private.h"
45 #include "private/svn_dep_compat.h"
46 #include "private/svn_string_private.h"
47 #include "private/svn_mutex.h"
51 /* Use these static strings to maximize performance on standard conversions.
52 * Any strings on other locations are still valid, however.
54 static const char *SVN_UTF_NTOU_XLATE_HANDLE = "svn-utf-ntou-xlate-handle";
55 static const char *SVN_UTF_UTON_XLATE_HANDLE = "svn-utf-uton-xlate-handle";
57 static const char *SVN_APR_UTF8_CHARSET = "UTF-8";
59 static svn_mutex__t *xlate_handle_mutex = NULL;
60 static svn_boolean_t assume_native_charset_is_utf8 = FALSE;
62 /* The xlate handle cache is a global hash table with linked lists of xlate
63 * handles. In multi-threaded environments, a thread "borrows" an xlate
64 * handle from the cache during a translation and puts it back afterwards.
65 * This avoids holding a global lock for all translations.
66 * If there is no handle for a particular key when needed, a new is
67 * handle is created and put in the cache after use.
68 * This means that there will be at most N handles open for a key, where N
69 * is the number of simultanous handles in use for that key. */
71 typedef struct xlate_handle_node_t {
73 /* FALSE if the handle is not valid, since its pool is being
76 /* The name of a char encoding or APR_LOCALE_CHARSET. */
77 const char *frompage, *topage;
78 struct xlate_handle_node_t *next;
79 } xlate_handle_node_t;
81 /* This maps const char * userdata_key strings to xlate_handle_node_t **
82 handles to the first entry in the linked list of xlate handles. We don't
83 store the pointer to the list head directly in the hash table, since we
84 remove/insert entries at the head in the list in the code below, and
85 we can't use apr_hash_set() in each character translation because that
86 function allocates memory in each call where the value is non-NULL.
87 Since these allocations take place in a global pool, this would be a
89 static apr_hash_t *xlate_handle_hash = NULL;
91 /* "1st level cache" to standard conversion maps. We may access these
92 * using atomic xchange ops, i.e. without further thread synchronization.
93 * If the respective item is NULL, fallback to hash lookup.
95 static void * volatile xlat_ntou_static_handle = NULL;
96 static void * volatile xlat_uton_static_handle = NULL;
98 /* Clean up the xlate handle cache. */
100 xlate_cleanup(void *arg)
102 /* We set the cache variables to NULL so that translation works in other
103 cleanup functions, even if it isn't cached then. */
104 xlate_handle_hash = NULL;
106 /* ensure no stale objects get accessed */
107 xlat_ntou_static_handle = NULL;
108 xlat_uton_static_handle = NULL;
113 /* Set the handle of ARG to NULL. */
115 xlate_handle_node_cleanup(void *arg)
117 xlate_handle_node_t *node = arg;
124 svn_utf_initialize2(svn_boolean_t assume_native_utf8,
127 if (!xlate_handle_hash)
129 /* We create our own subpool, which we protect with the mutex.
130 We can't use the pool passed to us by the caller, since we will
131 use it for xlate handle allocations, possibly in multiple threads,
132 and pool allocation is not thread-safe. */
133 apr_pool_t *subpool = svn_pool_create(pool);
135 svn_error_t *err = svn_mutex__init(&mutex, TRUE, subpool);
138 svn_error_clear(err);
142 xlate_handle_mutex = mutex;
143 xlate_handle_hash = apr_hash_make(subpool);
145 apr_pool_cleanup_register(subpool, NULL, xlate_cleanup,
146 apr_pool_cleanup_null);
149 if (!assume_native_charset_is_utf8)
150 assume_native_charset_is_utf8 = assume_native_utf8;
153 /* Return a unique string key based on TOPAGE and FROMPAGE. TOPAGE and
154 * FROMPAGE can be any valid arguments of the same name to
155 * apr_xlate_open(). Allocate the returned string in POOL. */
157 get_xlate_key(const char *topage,
158 const char *frompage,
161 /* In the cases of SVN_APR_LOCALE_CHARSET and SVN_APR_DEFAULT_CHARSET
162 * topage/frompage is really an int, not a valid string. So generate a
163 * unique key accordingly. */
164 if (frompage == SVN_APR_LOCALE_CHARSET)
165 frompage = "APR_LOCALE_CHARSET";
166 else if (frompage == SVN_APR_DEFAULT_CHARSET)
167 frompage = "APR_DEFAULT_CHARSET";
169 if (topage == SVN_APR_LOCALE_CHARSET)
170 topage = "APR_LOCALE_CHARSET";
171 else if (topage == SVN_APR_DEFAULT_CHARSET)
172 topage = "APR_DEFAULT_CHARSET";
174 return apr_pstrcat(pool, "svn-utf-", frompage, "to", topage,
175 "-xlate-handle", (char *)NULL);
178 /* Atomically replace the content in *MEM with NEW_VALUE and return
179 * the previous content of *MEM. If atomicy cannot be guaranteed,
180 * *MEM will not be modified and NEW_VALUE is simply returned to
183 static APR_INLINE void*
184 atomic_swap(void * volatile * mem, void *new_value)
187 #if APR_VERSION_AT_LEAST(1,3,0)
188 /* Cast is necessary because of APR bug:
189 https://issues.apache.org/bugzilla/show_bug.cgi?id=50731 */
190 return apr_atomic_xchgptr((volatile void **)mem, new_value);
192 /* old APRs don't support atomic swaps. Simply return the
193 * input to the caller for further proccessing. */
197 /* no threads - no sync. necessary */
198 void *old_value = (void*)*mem;
204 /* Set *RET to a newly created handle node for converting from FROMPAGE
205 to TOPAGE, If apr_xlate_open() returns APR_EINVAL or APR_ENOTIMPL, set
206 (*RET)->handle to NULL. If fail for any other reason, return the error.
207 Allocate *RET and its xlate handle in POOL. */
209 xlate_alloc_handle(xlate_handle_node_t **ret,
210 const char *topage, const char *frompage,
213 apr_status_t apr_err;
216 /* The error handling doesn't support the following cases, since we don't
217 use them currently. Catch this here. */
218 SVN_ERR_ASSERT(frompage != SVN_APR_DEFAULT_CHARSET
219 && topage != SVN_APR_DEFAULT_CHARSET
220 && (frompage != SVN_APR_LOCALE_CHARSET
221 || topage != SVN_APR_LOCALE_CHARSET));
223 /* Try to create a handle. */
225 apr_err = svn_subr__win32_xlate_open((win32_xlate_t **)&handle, topage,
228 apr_err = apr_xlate_open(&handle, topage, frompage, pool);
231 if (APR_STATUS_IS_EINVAL(apr_err) || APR_STATUS_IS_ENOTIMPL(apr_err))
233 else if (apr_err != APR_SUCCESS)
236 char apr_strerr[512];
238 /* Can't use svn_error_wrap_apr here because it calls functions in
239 this file, leading to infinite recursion. */
240 if (frompage == SVN_APR_LOCALE_CHARSET)
241 errstr = apr_psprintf(pool,
242 _("Can't create a character converter from "
243 "native encoding to '%s'"), topage);
244 else if (topage == SVN_APR_LOCALE_CHARSET)
245 errstr = apr_psprintf(pool,
246 _("Can't create a character converter from "
247 "'%s' to native encoding"), frompage);
249 errstr = apr_psprintf(pool,
250 _("Can't create a character converter from "
251 "'%s' to '%s'"), frompage, topage);
253 /* Just put the error on the stack, since svn_error_create duplicates it
254 later. APR_STRERR will be in the local encoding, not in UTF-8, though.
256 svn_strerror(apr_err, apr_strerr, sizeof(apr_strerr));
257 return svn_error_create(apr_err,
258 svn_error_create(apr_err, NULL, apr_strerr),
262 /* Allocate and initialize the node. */
263 *ret = apr_palloc(pool, sizeof(xlate_handle_node_t));
264 (*ret)->handle = handle;
265 (*ret)->valid = TRUE;
266 (*ret)->frompage = ((frompage != SVN_APR_LOCALE_CHARSET)
267 ? apr_pstrdup(pool, frompage) : frompage);
268 (*ret)->topage = ((topage != SVN_APR_LOCALE_CHARSET)
269 ? apr_pstrdup(pool, topage) : topage);
272 /* If we are called from inside a pool cleanup handler, the just created
273 xlate handle will be closed when that handler returns by a newly
274 registered cleanup handler, however, the handle is still cached by us.
275 To prevent this, we register a cleanup handler that will reset the valid
276 flag of our node, so we don't use an invalid handle. */
278 apr_pool_cleanup_register(pool, *ret, xlate_handle_node_cleanup,
279 apr_pool_cleanup_null);
284 /* Extend xlate_alloc_handle by using USERDATA_KEY as a key in our
285 global hash map, if available.
287 Allocate *RET and its xlate handle in POOL if svn_utf_initialize()
288 hasn't been called or USERDATA_KEY is NULL. Else, allocate them
289 in the pool of xlate_handle_hash.
291 Note: this function is not thread-safe. Call get_xlate_handle_node
294 get_xlate_handle_node_internal(xlate_handle_node_t **ret,
295 const char *topage, const char *frompage,
296 const char *userdata_key, apr_pool_t *pool)
298 /* If we already have a handle, just return it. */
299 if (userdata_key && xlate_handle_hash)
301 xlate_handle_node_t *old_node = NULL;
303 /* 2nd level: hash lookup */
304 xlate_handle_node_t **old_node_p = svn_hash_gets(xlate_handle_hash,
307 old_node = *old_node_p;
310 /* Ensure that the handle is still valid. */
313 /* Remove from the list. */
314 *old_node_p = old_node->next;
315 old_node->next = NULL;
322 /* Note that we still have the mutex locked (if it is initialized), so we
323 can use the global pool for creating the new xlate handle. */
325 /* Use the correct pool for creating the handle. */
326 pool = apr_hash_pool_get(xlate_handle_hash);
328 return xlate_alloc_handle(ret, topage, frompage, pool);
331 /* Set *RET to a handle node for converting from FROMPAGE to TOPAGE,
332 creating the handle node if it doesn't exist in USERDATA_KEY.
333 If a node is not cached and apr_xlate_open() returns APR_EINVAL or
334 APR_ENOTIMPL, set (*RET)->handle to NULL. If fail for any other
335 reason, return the error.
337 Allocate *RET and its xlate handle in POOL if svn_utf_initialize()
338 hasn't been called or USERDATA_KEY is NULL. Else, allocate them
339 in the pool of xlate_handle_hash. */
341 get_xlate_handle_node(xlate_handle_node_t **ret,
342 const char *topage, const char *frompage,
343 const char *userdata_key, apr_pool_t *pool)
345 xlate_handle_node_t *old_node = NULL;
347 /* If we already have a handle, just return it. */
350 if (xlate_handle_hash)
352 /* 1st level: global, static items */
353 if (userdata_key == SVN_UTF_NTOU_XLATE_HANDLE)
354 old_node = atomic_swap(&xlat_ntou_static_handle, NULL);
355 else if (userdata_key == SVN_UTF_UTON_XLATE_HANDLE)
356 old_node = atomic_swap(&xlat_uton_static_handle, NULL);
358 if (old_node && old_node->valid)
367 /* We fall back on a per-pool cache instead. */
368 apr_pool_userdata_get(&p, userdata_key, pool);
370 /* Ensure that the handle is still valid. */
371 if (old_node && old_node->valid)
377 return xlate_alloc_handle(ret, topage, frompage, pool);
381 SVN_MUTEX__WITH_LOCK(xlate_handle_mutex,
382 get_xlate_handle_node_internal(ret,
391 /* Put back NODE into the xlate handle cache for use by other calls.
393 Note: this function is not thread-safe. Call put_xlate_handle_node
396 put_xlate_handle_node_internal(xlate_handle_node_t *node,
397 const char *userdata_key)
399 xlate_handle_node_t **node_p = svn_hash_gets(xlate_handle_hash, userdata_key);
402 userdata_key = apr_pstrdup(apr_hash_pool_get(xlate_handle_hash),
404 node_p = apr_palloc(apr_hash_pool_get(xlate_handle_hash),
407 svn_hash_sets(xlate_handle_hash, userdata_key, node_p);
409 node->next = *node_p;
415 /* Put back NODE into the xlate handle cache for use by other calls.
416 If there is no global cache, store the handle in POOL.
417 Ignore errors related to locking/unlocking the mutex. */
419 put_xlate_handle_node(xlate_handle_node_t *node,
420 const char *userdata_key,
423 assert(node->next == NULL);
427 /* push previous global node to the hash */
428 if (xlate_handle_hash)
430 /* 1st level: global, static items */
431 if (userdata_key == SVN_UTF_NTOU_XLATE_HANDLE)
432 node = atomic_swap(&xlat_ntou_static_handle, node);
433 else if (userdata_key == SVN_UTF_UTON_XLATE_HANDLE)
434 node = atomic_swap(&xlat_uton_static_handle, node);
438 SVN_MUTEX__WITH_LOCK(xlate_handle_mutex,
439 put_xlate_handle_node_internal(node,
444 /* Store it in the per-pool cache. */
445 apr_pool_userdata_set(node, userdata_key, apr_pool_cleanup_null, pool);
451 /* Return the apr_xlate handle for converting native characters to UTF-8. */
453 get_ntou_xlate_handle_node(xlate_handle_node_t **ret, apr_pool_t *pool)
455 return get_xlate_handle_node(ret, SVN_APR_UTF8_CHARSET,
456 assume_native_charset_is_utf8
457 ? SVN_APR_UTF8_CHARSET
458 : SVN_APR_LOCALE_CHARSET,
459 SVN_UTF_NTOU_XLATE_HANDLE, pool);
463 /* Return the apr_xlate handle for converting UTF-8 to native characters.
464 Create one if it doesn't exist. If unable to find a handle, or
465 unable to create one because apr_xlate_open returned APR_EINVAL, then
466 set *RET to null and return SVN_NO_ERROR; if fail for some other
467 reason, return error. */
469 get_uton_xlate_handle_node(xlate_handle_node_t **ret, apr_pool_t *pool)
471 return get_xlate_handle_node(ret,
472 assume_native_charset_is_utf8
473 ? SVN_APR_UTF8_CHARSET
474 : SVN_APR_LOCALE_CHARSET,
475 SVN_APR_UTF8_CHARSET,
476 SVN_UTF_UTON_XLATE_HANDLE, pool);
480 /* Copy LEN bytes of SRC, converting non-ASCII and zero bytes to ?\nnn
481 sequences, allocating the result in POOL. */
483 fuzzy_escape(const char *src, apr_size_t len, apr_pool_t *pool)
485 const char *src_orig = src, *src_end = src + len;
486 apr_size_t new_len = 0;
488 const char *new_orig;
490 /* First count how big a dest string we'll need. */
491 while (src < src_end)
493 if (! svn_ctype_isascii(*src) || *src == '\0')
494 new_len += 5; /* 5 slots, for "?\XXX" */
496 new_len += 1; /* one slot for the 7-bit char */
501 /* Allocate that amount, plus one slot for '\0' character. */
502 new = apr_palloc(pool, new_len + 1);
506 /* And fill it up. */
507 while (src_orig < src_end)
509 if (! svn_ctype_isascii(*src_orig) || src_orig == '\0')
511 /* This is the same format as svn_xml_fuzzy_escape uses, but that
512 function escapes different characters. Please keep in sync!
513 ### If we add another fuzzy escape somewhere, we should abstract
514 ### this out to a common function. */
515 apr_snprintf(new, 6, "?\\%03u", (unsigned char) *src_orig);
532 /* Convert SRC_LENGTH bytes of SRC_DATA in NODE->handle, store the result
533 in *DEST, which is allocated in POOL. */
535 convert_to_stringbuf(xlate_handle_node_t *node,
536 const char *src_data,
537 apr_size_t src_length,
538 svn_stringbuf_t **dest,
542 apr_status_t apr_err;
544 apr_err = svn_subr__win32_xlate_to_stringbuf((win32_xlate_t *) node->handle,
545 src_data, src_length,
548 apr_size_t buflen = src_length * 2;
549 apr_status_t apr_err;
550 apr_size_t srclen = src_length;
551 apr_size_t destlen = buflen;
553 /* Initialize *DEST to an empty stringbuf.
554 A 1:2 ratio of input bytes to output bytes (as assigned above)
555 should be enough for most translations, and if it turns out not
556 to be enough, we'll grow the buffer again, sizing it based on a
557 1:3 ratio of the remainder of the string. */
558 *dest = svn_stringbuf_create_ensure(buflen + 1, pool);
560 /* Not only does it not make sense to convert an empty string, but
561 apr-iconv is quite unreasonable about not allowing that. */
567 /* Set up state variables for xlate. */
568 destlen = buflen - (*dest)->len;
570 /* Attempt the conversion. */
571 apr_err = apr_xlate_conv_buffer(node->handle,
572 src_data + (src_length - srclen),
574 (*dest)->data + (*dest)->len,
577 /* Now, update the *DEST->len to track the amount of output data
578 churned out so far from this loop. */
579 (*dest)->len += ((buflen - (*dest)->len) - destlen);
580 buflen += srclen * 3; /* 3 is middle ground, 2 wasn't enough
581 for all characters in the buffer, 4 is
582 maximum character size (currently) */
585 } while (apr_err == APR_SUCCESS && srclen != 0);
588 /* If we exited the loop with an error, return the error. */
594 /* Can't use svn_error_wrap_apr here because it calls functions in
595 this file, leading to infinite recursion. */
596 if (node->frompage == SVN_APR_LOCALE_CHARSET)
597 errstr = apr_psprintf
598 (pool, _("Can't convert string from native encoding to '%s':"),
600 else if (node->topage == SVN_APR_LOCALE_CHARSET)
601 errstr = apr_psprintf
602 (pool, _("Can't convert string from '%s' to native encoding:"),
605 errstr = apr_psprintf
606 (pool, _("Can't convert string from '%s' to '%s':"),
607 node->frompage, node->topage);
609 err = svn_error_create(apr_err, NULL, fuzzy_escape(src_data,
611 return svn_error_create(apr_err, err, errstr);
613 /* Else, exited due to success. Trim the result buffer down to the
615 (*dest)->data[(*dest)->len] = '\0';
621 /* Return APR_EINVAL if the first LEN bytes of DATA contain anything
622 other than seven-bit, non-control (except for whitespace) ASCII
623 characters, finding the error pool from POOL. Otherwise, return
626 check_non_ascii(const char *data, apr_size_t len, apr_pool_t *pool)
628 const char *data_start = data;
630 for (; len > 0; --len, data++)
632 if ((! svn_ctype_isascii(*data))
633 || ((! svn_ctype_isspace(*data))
634 && svn_ctype_iscntrl(*data)))
636 /* Show the printable part of the data, followed by the
637 decimal code of the questionable character. Because if a
638 user ever gets this error, she's going to have to spend
639 time tracking down the non-ASCII data, so we want to help
640 as much as possible. And yes, we just call the unsafe
641 data "non-ASCII", even though the actual constraint is
642 somewhat more complex than that. */
644 if (data - data_start)
646 const char *error_data
647 = apr_pstrndup(pool, data_start, (data - data_start));
649 return svn_error_createf
651 _("Safe data '%s' was followed by non-ASCII byte %d: "
652 "unable to convert to/from UTF-8"),
653 error_data, *((const unsigned char *) data));
657 return svn_error_createf
659 _("Non-ASCII character (code %d) detected, "
660 "and unable to convert to/from UTF-8"),
661 *((const unsigned char *) data));
669 /* Construct an error with code APR_EINVAL and with a suitable message
670 * to describe the invalid UTF-8 sequence DATA of length LEN (which
671 * may have embedded NULLs). We can't simply print the data, almost
672 * by definition we don't really know how it is encoded.
675 invalid_utf8(const char *data, apr_size_t len, apr_pool_t *pool)
677 const char *last = svn_utf__last_valid(data, len);
678 const char *valid_txt = "", *invalid_txt = "";
680 size_t valid, invalid;
682 /* We will display at most 24 valid octets (this may split a leading
683 multi-byte character) as that should fit on one 80 character line. */
687 for (i = 0; i < valid; ++i)
688 valid_txt = apr_pstrcat(pool, valid_txt,
689 apr_psprintf(pool, " %02x",
690 (unsigned char)last[i-valid]),
693 /* 4 invalid octets will guarantee that the faulty octet is displayed */
694 invalid = data + len - last;
697 for (i = 0; i < invalid; ++i)
698 invalid_txt = apr_pstrcat(pool, invalid_txt,
699 apr_psprintf(pool, " %02x",
700 (unsigned char)last[i]),
703 return svn_error_createf(APR_EINVAL, NULL,
704 _("Valid UTF-8 data\n(hex:%s)\n"
705 "followed by invalid UTF-8 sequence\n(hex:%s)"),
706 valid_txt, invalid_txt);
709 /* Verify that the sequence DATA of length LEN is valid UTF-8.
710 If it is not, return an error with code APR_EINVAL. */
712 check_utf8(const char *data, apr_size_t len, apr_pool_t *pool)
714 if (! svn_utf__is_valid(data, len))
715 return invalid_utf8(data, len, pool);
719 /* Verify that the NULL terminated sequence DATA is valid UTF-8.
720 If it is not, return an error with code APR_EINVAL. */
722 check_cstring_utf8(const char *data, apr_pool_t *pool)
725 if (! svn_utf__cstring_is_valid(data))
726 return invalid_utf8(data, strlen(data), pool);
732 svn_utf_stringbuf_to_utf8(svn_stringbuf_t **dest,
733 const svn_stringbuf_t *src,
736 xlate_handle_node_t *node;
739 SVN_ERR(get_ntou_xlate_handle_node(&node, pool));
743 err = convert_to_stringbuf(node, src->data, src->len, dest, pool);
745 err = check_utf8((*dest)->data, (*dest)->len, pool);
749 err = check_non_ascii(src->data, src->len, pool);
751 *dest = svn_stringbuf_dup(src, pool);
754 return svn_error_compose_create(err,
755 put_xlate_handle_node
757 SVN_UTF_NTOU_XLATE_HANDLE,
763 svn_utf_string_to_utf8(const svn_string_t **dest,
764 const svn_string_t *src,
767 svn_stringbuf_t *destbuf;
768 xlate_handle_node_t *node;
771 SVN_ERR(get_ntou_xlate_handle_node(&node, pool));
775 err = convert_to_stringbuf(node, src->data, src->len, &destbuf, pool);
777 err = check_utf8(destbuf->data, destbuf->len, pool);
779 *dest = svn_stringbuf__morph_into_string(destbuf);
783 err = check_non_ascii(src->data, src->len, pool);
785 *dest = svn_string_dup(src, pool);
788 return svn_error_compose_create(err,
789 put_xlate_handle_node
791 SVN_UTF_NTOU_XLATE_HANDLE,
796 /* Common implementation for svn_utf_cstring_to_utf8,
797 svn_utf_cstring_to_utf8_ex, svn_utf_cstring_from_utf8 and
798 svn_utf_cstring_from_utf8_ex. Convert SRC to DEST using NODE->handle as
799 the translator and allocating from POOL. */
801 convert_cstring(const char **dest,
803 xlate_handle_node_t *node,
808 svn_stringbuf_t *destbuf;
809 SVN_ERR(convert_to_stringbuf(node, src, strlen(src),
811 *dest = destbuf->data;
815 apr_size_t len = strlen(src);
816 SVN_ERR(check_non_ascii(src, len, pool));
817 *dest = apr_pstrmemdup(pool, src, len);
824 svn_utf_cstring_to_utf8(const char **dest,
828 xlate_handle_node_t *node;
831 SVN_ERR(get_ntou_xlate_handle_node(&node, pool));
832 err = convert_cstring(dest, src, node, pool);
833 SVN_ERR(svn_error_compose_create(err,
834 put_xlate_handle_node
836 SVN_UTF_NTOU_XLATE_HANDLE,
838 return check_cstring_utf8(*dest, pool);
843 svn_utf_cstring_to_utf8_ex2(const char **dest,
845 const char *frompage,
848 xlate_handle_node_t *node;
850 const char *convset_key = get_xlate_key(SVN_APR_UTF8_CHARSET, frompage,
853 SVN_ERR(get_xlate_handle_node(&node, SVN_APR_UTF8_CHARSET, frompage,
855 err = convert_cstring(dest, src, node, pool);
856 SVN_ERR(svn_error_compose_create(err,
857 put_xlate_handle_node
859 SVN_UTF_NTOU_XLATE_HANDLE,
862 return check_cstring_utf8(*dest, pool);
867 svn_utf_cstring_to_utf8_ex(const char **dest,
869 const char *frompage,
870 const char *convset_key,
873 return svn_utf_cstring_to_utf8_ex2(dest, src, frompage, pool);
878 svn_utf_stringbuf_from_utf8(svn_stringbuf_t **dest,
879 const svn_stringbuf_t *src,
882 xlate_handle_node_t *node;
885 SVN_ERR(get_uton_xlate_handle_node(&node, pool));
889 err = check_utf8(src->data, src->len, pool);
891 err = convert_to_stringbuf(node, src->data, src->len, dest, pool);
895 err = check_non_ascii(src->data, src->len, pool);
897 *dest = svn_stringbuf_dup(src, pool);
900 err = svn_error_compose_create(
902 put_xlate_handle_node(node, SVN_UTF_UTON_XLATE_HANDLE, pool));
909 svn_utf_string_from_utf8(const svn_string_t **dest,
910 const svn_string_t *src,
913 svn_stringbuf_t *dbuf;
914 xlate_handle_node_t *node;
917 SVN_ERR(get_uton_xlate_handle_node(&node, pool));
921 err = check_utf8(src->data, src->len, pool);
923 err = convert_to_stringbuf(node, src->data, src->len,
926 *dest = svn_stringbuf__morph_into_string(dbuf);
930 err = check_non_ascii(src->data, src->len, pool);
932 *dest = svn_string_dup(src, pool);
935 err = svn_error_compose_create(
937 put_xlate_handle_node(node, SVN_UTF_UTON_XLATE_HANDLE, pool));
944 svn_utf_cstring_from_utf8(const char **dest,
948 xlate_handle_node_t *node;
951 SVN_ERR(check_cstring_utf8(src, pool));
953 SVN_ERR(get_uton_xlate_handle_node(&node, pool));
954 err = convert_cstring(dest, src, node, pool);
955 err = svn_error_compose_create(
957 put_xlate_handle_node(node, SVN_UTF_UTON_XLATE_HANDLE, pool));
964 svn_utf_cstring_from_utf8_ex2(const char **dest,
969 xlate_handle_node_t *node;
971 const char *convset_key = get_xlate_key(topage, SVN_APR_UTF8_CHARSET,
974 SVN_ERR(check_cstring_utf8(src, pool));
976 SVN_ERR(get_xlate_handle_node(&node, topage, SVN_APR_UTF8_CHARSET,
978 err = convert_cstring(dest, src, node, pool);
979 err = svn_error_compose_create(
981 put_xlate_handle_node(node, convset_key, pool));
988 svn_utf_cstring_from_utf8_ex(const char **dest,
991 const char *convset_key,
994 return svn_utf_cstring_from_utf8_ex2(dest, src, topage, pool);
999 svn_utf__cstring_from_utf8_fuzzy(const char *src,
1001 svn_error_t *(*convert_from_utf8)
1002 (const char **, const char *, apr_pool_t *))
1004 const char *escaped, *converted;
1007 escaped = fuzzy_escape(src, strlen(src), pool);
1009 /* Okay, now we have a *new* UTF-8 string, one that's guaranteed to
1010 contain only 7-bit bytes :-). Recode to native... */
1011 err = convert_from_utf8(((const char **) &converted), escaped, pool);
1015 svn_error_clear(err);
1021 /* ### Check the client locale, maybe we can avoid that second
1022 * conversion! See Ulrich Drepper's patch at
1023 * http://subversion.tigris.org/issues/show_bug.cgi?id=807.
1029 svn_utf_cstring_from_utf8_fuzzy(const char *src,
1032 return svn_utf__cstring_from_utf8_fuzzy(src, pool,
1033 svn_utf_cstring_from_utf8);
1038 svn_utf_cstring_from_utf8_stringbuf(const char **dest,
1039 const svn_stringbuf_t *src,
1042 svn_stringbuf_t *destbuf;
1044 SVN_ERR(svn_utf_stringbuf_from_utf8(&destbuf, src, pool));
1045 *dest = destbuf->data;
1047 return SVN_NO_ERROR;
1052 svn_utf_cstring_from_utf8_string(const char **dest,
1053 const svn_string_t *src,
1056 svn_stringbuf_t *dbuf;
1057 xlate_handle_node_t *node;
1060 SVN_ERR(get_uton_xlate_handle_node(&node, pool));
1064 err = check_utf8(src->data, src->len, pool);
1066 err = convert_to_stringbuf(node, src->data, src->len,
1073 err = check_non_ascii(src->data, src->len, pool);
1075 *dest = apr_pstrmemdup(pool, src->data, src->len);
1078 err = svn_error_compose_create(
1080 put_xlate_handle_node(node, SVN_UTF_UTON_XLATE_HANDLE, pool));