2 * xml.c: xml helper code shared among the Subversion libraries.
4 * ====================================================================
5 * Licensed to the Apache Software Foundation (ASF) under one
6 * or more contributor license agreements. See the NOTICE file
7 * distributed with this work for additional information
8 * regarding copyright ownership. The ASF licenses this file
9 * to you under the Apache License, Version 2.0 (the
10 * "License"); you may not use this file except in compliance
11 * with the License. You may obtain a copy of the License at
13 * http://www.apache.org/licenses/LICENSE-2.0
15 * Unless required by applicable law or agreed to in writing,
16 * software distributed under the License is distributed on an
17 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
18 * KIND, either express or implied. See the License for the
19 * specific language governing permissions and limitations
21 * ====================================================================
29 #include "svn_private_config.h" /* for SVN_HAVE_OLD_EXPAT */
31 #include "svn_pools.h"
33 #include "svn_error.h"
34 #include "svn_ctype.h"
36 #include "private/svn_utf_private.h"
37 #include "private/svn_subr_private.h"
39 #ifdef SVN_HAVE_OLD_EXPAT
45 #ifndef XML_VERSION_AT_LEAST
46 #define XML_VERSION_AT_LEAST(major,minor,patch) \
47 (((major) < XML_MAJOR_VERSION) \
48 || ((major) == XML_MAJOR_VERSION && (minor) < XML_MINOR_VERSION) \
49 || ((major) == XML_MAJOR_VERSION && (minor) == XML_MINOR_VERSION && \
50 (patch) <= XML_MICRO_VERSION))
51 #endif /* XML_VERSION_AT_LEAST */
54 #error Expat is unusable -- it has been compiled for wide characters
58 svn_xml__compiled_version(void)
60 static const char xml_version_str[] = APR_STRINGIFY(XML_MAJOR_VERSION)
61 "." APR_STRINGIFY(XML_MINOR_VERSION)
62 "." APR_STRINGIFY(XML_MICRO_VERSION);
64 return xml_version_str;
68 svn_xml__runtime_version(void)
70 const char *expat_version = XML_ExpatVersion();
72 if (!strncmp(expat_version, "expat_", 6))
79 /* The private internals for a parser object. */
80 struct svn_xml_parser_t
82 /** the expat parser */
85 /** the SVN callbacks to call from the Expat callbacks */
86 svn_xml_start_elem start_handler;
87 svn_xml_end_elem end_handler;
88 svn_xml_char_data data_handler;
90 /** the user's baton for private data */
93 /** if non-@c NULL, an error happened while parsing */
96 /** where this object is allocated, so we can free it easily */
102 /*** XML character validation ***/
105 svn_xml_is_xml_safe(const char *data, apr_size_t len)
107 const char *end = data + len;
110 if (! svn_utf__is_valid(data, len))
113 for (p = data; p < end; p++)
115 unsigned char c = *p;
117 if (svn_ctype_iscntrl(c))
119 if ((c != SVN_CTYPE_ASCII_TAB)
120 && (c != SVN_CTYPE_ASCII_LINEFEED)
121 && (c != SVN_CTYPE_ASCII_CARRIAGERETURN)
122 && (c != SVN_CTYPE_ASCII_DELETE))
133 /*** XML escaping. ***/
137 * If *OUTSTR is @c NULL, set *OUTSTR to a new stringbuf allocated
138 * in POOL, else append to the existing stringbuf there.
141 xml_escape_cdata(svn_stringbuf_t **outstr,
146 const char *end = data + len;
147 const char *p = data, *q;
150 *outstr = svn_stringbuf_create_empty(pool);
154 /* Find a character which needs to be quoted and append bytes up
155 to that point. Strictly speaking, '>' only needs to be
156 quoted if it follows "]]", but it's easier to quote it all
159 So, why are we escaping '\r' here? Well, according to the
160 XML spec, '\r\n' gets converted to '\n' during XML parsing.
161 Also, any '\r' not followed by '\n' is converted to '\n'. By
162 golly, if we say we want to escape a '\r', we want to make
163 sure it remains a '\r'! */
165 while (q < end && *q != '&' && *q != '<' && *q != '>' && *q != '\r')
167 svn_stringbuf_appendbytes(*outstr, p, q - p);
169 /* We may already be a winner. */
173 /* Append the entity reference for the character. */
175 svn_stringbuf_appendcstr(*outstr, "&");
177 svn_stringbuf_appendcstr(*outstr, "<");
179 svn_stringbuf_appendcstr(*outstr, ">");
181 svn_stringbuf_appendcstr(*outstr, " ");
187 /* Essentially the same as xml_escape_cdata, with the addition of
188 whitespace and quote characters. */
190 xml_escape_attr(svn_stringbuf_t **outstr,
195 const char *end = data + len;
196 const char *p = data, *q;
199 *outstr = svn_stringbuf_create_ensure(len, pool);
203 /* Find a character which needs to be quoted and append bytes up
206 while (q < end && *q != '&' && *q != '<' && *q != '>'
207 && *q != '"' && *q != '\'' && *q != '\r'
208 && *q != '\n' && *q != '\t')
210 svn_stringbuf_appendbytes(*outstr, p, q - p);
212 /* We may already be a winner. */
216 /* Append the entity reference for the character. */
218 svn_stringbuf_appendcstr(*outstr, "&");
220 svn_stringbuf_appendcstr(*outstr, "<");
222 svn_stringbuf_appendcstr(*outstr, ">");
224 svn_stringbuf_appendcstr(*outstr, """);
226 svn_stringbuf_appendcstr(*outstr, "'");
228 svn_stringbuf_appendcstr(*outstr, " ");
230 svn_stringbuf_appendcstr(*outstr, " ");
232 svn_stringbuf_appendcstr(*outstr, "	");
240 svn_xml_escape_cdata_stringbuf(svn_stringbuf_t **outstr,
241 const svn_stringbuf_t *string,
244 xml_escape_cdata(outstr, string->data, string->len, pool);
249 svn_xml_escape_cdata_string(svn_stringbuf_t **outstr,
250 const svn_string_t *string,
253 xml_escape_cdata(outstr, string->data, string->len, pool);
258 svn_xml_escape_cdata_cstring(svn_stringbuf_t **outstr,
262 xml_escape_cdata(outstr, string, (apr_size_t) strlen(string), pool);
267 svn_xml_escape_attr_stringbuf(svn_stringbuf_t **outstr,
268 const svn_stringbuf_t *string,
271 xml_escape_attr(outstr, string->data, string->len, pool);
276 svn_xml_escape_attr_string(svn_stringbuf_t **outstr,
277 const svn_string_t *string,
280 xml_escape_attr(outstr, string->data, string->len, pool);
285 svn_xml_escape_attr_cstring(svn_stringbuf_t **outstr,
289 xml_escape_attr(outstr, string, (apr_size_t) strlen(string), pool);
294 svn_xml_fuzzy_escape(const char *string, apr_pool_t *pool)
296 const char *end = string + strlen(string);
297 const char *p = string, *q;
298 svn_stringbuf_t *outstr;
299 char escaped_char[6]; /* ? \ u u u \0 */
301 for (q = p; q < end; q++)
303 if (svn_ctype_iscntrl(*q)
304 && ! ((*q == '\n') || (*q == '\r') || (*q == '\t')))
308 /* Return original string if no unsafe characters found. */
312 outstr = svn_stringbuf_create_empty(pool);
317 /* Traverse till either unsafe character or eos. */
319 && ((! svn_ctype_iscntrl(*q))
320 || (*q == '\n') || (*q == '\r') || (*q == '\t')))
323 /* copy chunk before marker */
324 svn_stringbuf_appendbytes(outstr, p, q - p);
329 /* Append an escaped version of the unsafe character.
331 ### This format was chosen for consistency with
332 ### svn_utf__cstring_from_utf8_fuzzy(). The two functions
333 ### should probably share code, even though they escape
334 ### different characters.
336 apr_snprintf(escaped_char, sizeof(escaped_char), "?\\%03u",
338 svn_stringbuf_appendcstr(outstr, escaped_char);
347 /*** Map from the Expat callback types to the SVN XML types. ***/
349 static void expat_start_handler(void *userData,
350 const XML_Char *name,
351 const XML_Char **atts)
353 svn_xml_parser_t *svn_parser = userData;
355 (*svn_parser->start_handler)(svn_parser->baton, name, atts);
357 #if XML_VERSION_AT_LEAST(1, 95, 8)
358 /* Stop XML parsing if svn_xml_signal_bailout() was called.
359 We cannot do this in svn_xml_signal_bailout() because Expat
360 documentation states that XML_StopParser() must be called only from
362 if (svn_parser->error)
363 (void) XML_StopParser(svn_parser->parser, 0 /* resumable */);
367 static void expat_end_handler(void *userData, const XML_Char *name)
369 svn_xml_parser_t *svn_parser = userData;
371 (*svn_parser->end_handler)(svn_parser->baton, name);
373 #if XML_VERSION_AT_LEAST(1, 95, 8)
374 /* Stop XML parsing if svn_xml_signal_bailout() was called.
375 We cannot do this in svn_xml_signal_bailout() because Expat
376 documentation states that XML_StopParser() must be called only from
378 if (svn_parser->error)
379 (void) XML_StopParser(svn_parser->parser, 0 /* resumable */);
383 static void expat_data_handler(void *userData, const XML_Char *s, int len)
385 svn_xml_parser_t *svn_parser = userData;
387 (*svn_parser->data_handler)(svn_parser->baton, s, (apr_size_t)len);
389 #if XML_VERSION_AT_LEAST(1, 95, 8)
390 /* Stop XML parsing if svn_xml_signal_bailout() was called.
391 We cannot do this in svn_xml_signal_bailout() because Expat
392 documentation states that XML_StopParser() must be called only from
394 if (svn_parser->error)
395 (void) XML_StopParser(svn_parser->parser, 0 /* resumable */);
399 #if XML_VERSION_AT_LEAST(1, 95, 8)
400 static void expat_entity_declaration(void *userData,
401 const XML_Char *entityName,
402 int is_parameter_entity,
403 const XML_Char *value,
405 const XML_Char *base,
406 const XML_Char *systemId,
407 const XML_Char *publicId,
408 const XML_Char *notationName)
410 svn_xml_parser_t *svn_parser = userData;
412 /* Stop the parser if an entity declaration is hit. */
413 XML_StopParser(svn_parser->parser, 0 /* resumable */);
416 /* A noop default_handler. */
417 static void expat_default_handler(void *userData, const XML_Char *s, int len)
422 /*** Making a parser. ***/
424 static apr_status_t parser_cleanup(void *data)
426 svn_xml_parser_t *svn_parser = data;
428 /* Free Expat parser. */
429 if (svn_parser->parser)
431 XML_ParserFree(svn_parser->parser);
432 svn_parser->parser = NULL;
438 svn_xml_make_parser(void *baton,
439 svn_xml_start_elem start_handler,
440 svn_xml_end_elem end_handler,
441 svn_xml_char_data data_handler,
444 svn_xml_parser_t *svn_parser;
445 XML_Parser parser = XML_ParserCreate(NULL);
447 XML_SetElementHandler(parser,
448 start_handler ? expat_start_handler : NULL,
449 end_handler ? expat_end_handler : NULL);
450 XML_SetCharacterDataHandler(parser,
451 data_handler ? expat_data_handler : NULL);
453 #if XML_VERSION_AT_LEAST(1, 95, 8)
454 XML_SetEntityDeclHandler(parser, expat_entity_declaration);
456 XML_SetDefaultHandler(parser, expat_default_handler);
459 svn_parser = apr_pcalloc(pool, sizeof(*svn_parser));
461 svn_parser->parser = parser;
462 svn_parser->start_handler = start_handler;
463 svn_parser->end_handler = end_handler;
464 svn_parser->data_handler = data_handler;
465 svn_parser->baton = baton;
466 svn_parser->pool = pool;
468 /* store our parser info as the UserData in the Expat parser */
469 XML_SetUserData(parser, svn_parser);
471 /* Register pool cleanup handler to free Expat XML parser on cleanup,
472 if svn_xml_free_parser() was not called explicitly. */
473 apr_pool_cleanup_register(svn_parser->pool, svn_parser,
474 parser_cleanup, apr_pool_cleanup_null);
482 svn_xml_free_parser(svn_xml_parser_t *svn_parser)
484 apr_pool_cleanup_run(svn_parser->pool, svn_parser, parser_cleanup);
491 svn_xml_parse(svn_xml_parser_t *svn_parser,
494 svn_boolean_t is_final)
499 /* Parse some xml data */
500 success = XML_Parse(svn_parser->parser, buf, (int) len, is_final);
502 /* Did an error occur somewhere *inside* the expat callbacks? */
503 if (svn_parser->error)
505 /* Kill all parsers and return the error */
506 svn_xml_free_parser(svn_parser);
507 return svn_parser->error;
510 /* If expat choked internally, return its error. */
513 /* Line num is "int" in Expat v1, "long" in v2; hide the difference. */
514 long line = XML_GetCurrentLineNumber(svn_parser->parser);
516 err = svn_error_createf
517 (SVN_ERR_XML_MALFORMED, NULL,
518 _("Malformed XML: %s at line %ld"),
519 XML_ErrorString(XML_GetErrorCode(svn_parser->parser)), line);
521 /* Kill all parsers and return the expat error */
522 svn_xml_free_parser(svn_parser);
531 void svn_xml_signal_bailout(svn_error_t *error,
532 svn_xml_parser_t *svn_parser)
534 /* This will cause the current XML_Parse() call to finish quickly! */
535 XML_SetElementHandler(svn_parser->parser, NULL, NULL);
536 XML_SetCharacterDataHandler(svn_parser->parser, NULL);
537 #if XML_VERSION_AT_LEAST(1, 95, 8)
538 XML_SetEntityDeclHandler(svn_parser->parser, NULL);
540 /* Once outside of XML_Parse(), the existence of this field will
541 cause svn_delta_parse()'s main read-loop to return error. */
542 svn_parser->error = error;
552 /*** Attribute walking. ***/
555 svn_xml_get_attr_value(const char *name, const char *const *atts)
557 while (atts && (*atts))
559 if (strcmp(atts[0], name) == 0)
562 atts += 2; /* continue looping */
565 /* Else no such attribute name seen. */
571 /*** Printing XML ***/
574 svn_xml_make_header2(svn_stringbuf_t **str, const char *encoding,
579 *str = svn_stringbuf_create_empty(pool);
580 svn_stringbuf_appendcstr(*str, "<?xml version=\"1.0\"");
583 encoding = apr_psprintf(pool, " encoding=\"%s\"", encoding);
584 svn_stringbuf_appendcstr(*str, encoding);
586 svn_stringbuf_appendcstr(*str, "?>\n");
591 /*** Creating attribute hashes. ***/
593 /* Combine an existing attribute list ATTS with a HASH that itself
594 represents an attribute list. Iff PRESERVE is true, then no value
595 already in HASH will be changed, else values from ATTS will
596 override previous values in HASH. */
598 amalgamate(const char **atts,
600 svn_boolean_t preserve,
606 for (key = *atts; key; key = *(++atts))
608 const char *val = *(++atts);
611 /* kff todo: should we also insist that val be non-null here?
614 keylen = strlen(key);
615 if (preserve && ((apr_hash_get(ht, key, keylen)) != NULL))
618 apr_hash_set(ht, apr_pstrndup(pool, key, keylen), keylen,
619 val ? apr_pstrdup(pool, val) : NULL);
625 svn_xml_ap_to_hash(va_list ap, apr_pool_t *pool)
627 apr_hash_t *ht = apr_hash_make(pool);
630 while ((key = va_arg(ap, char *)) != NULL)
632 const char *val = va_arg(ap, const char *);
633 svn_hash_sets(ht, key, val);
641 svn_xml_make_att_hash(const char **atts, apr_pool_t *pool)
643 apr_hash_t *ht = apr_hash_make(pool);
644 amalgamate(atts, ht, 0, pool); /* third arg irrelevant in this case */
650 svn_xml_hash_atts_overlaying(const char **atts,
654 amalgamate(atts, ht, 0, pool);
659 svn_xml_hash_atts_preserving(const char **atts,
663 amalgamate(atts, ht, 1, pool);
668 /*** Making XML tags. ***/
672 svn_xml_make_open_tag_hash(svn_stringbuf_t **str,
674 enum svn_xml_open_tag_style style,
676 apr_hash_t *attributes)
678 apr_hash_index_t *hi;
679 apr_size_t est_size = strlen(tagname) + 4 + apr_hash_count(attributes) * 30;
682 *str = svn_stringbuf_create_ensure(est_size, pool);
684 svn_stringbuf_appendcstr(*str, "<");
685 svn_stringbuf_appendcstr(*str, tagname);
687 for (hi = apr_hash_first(pool, attributes); hi; hi = apr_hash_next(hi))
692 apr_hash_this(hi, &key, NULL, &val);
695 svn_stringbuf_appendcstr(*str, "\n ");
696 svn_stringbuf_appendcstr(*str, key);
697 svn_stringbuf_appendcstr(*str, "=\"");
698 svn_xml_escape_attr_cstring(str, val, pool);
699 svn_stringbuf_appendcstr(*str, "\"");
702 if (style == svn_xml_self_closing)
703 svn_stringbuf_appendcstr(*str, "/");
704 svn_stringbuf_appendcstr(*str, ">");
705 if (style != svn_xml_protect_pcdata)
706 svn_stringbuf_appendcstr(*str, "\n");
711 svn_xml_make_open_tag_v(svn_stringbuf_t **str,
713 enum svn_xml_open_tag_style style,
717 apr_pool_t *subpool = svn_pool_create(pool);
718 apr_hash_t *ht = svn_xml_ap_to_hash(ap, subpool);
720 svn_xml_make_open_tag_hash(str, pool, style, tagname, ht);
721 svn_pool_destroy(subpool);
727 svn_xml_make_open_tag(svn_stringbuf_t **str,
729 enum svn_xml_open_tag_style style,
735 va_start(ap, tagname);
736 svn_xml_make_open_tag_v(str, pool, style, tagname, ap);
741 void svn_xml_make_close_tag(svn_stringbuf_t **str,
746 *str = svn_stringbuf_create_empty(pool);
748 svn_stringbuf_appendcstr(*str, "</");
749 svn_stringbuf_appendcstr(*str, tagname);
750 svn_stringbuf_appendcstr(*str, ">\n");