2 * xml.c: xml helper code shared among the Subversion libraries.
4 * ====================================================================
5 * Licensed to the Apache Software Foundation (ASF) under one
6 * or more contributor license agreements. See the NOTICE file
7 * distributed with this work for additional information
8 * regarding copyright ownership. The ASF licenses this file
9 * to you under the Apache License, Version 2.0 (the
10 * "License"); you may not use this file except in compliance
11 * with the License. You may obtain a copy of the License at
13 * http://www.apache.org/licenses/LICENSE-2.0
15 * Unless required by applicable law or agreed to in writing,
16 * software distributed under the License is distributed on an
17 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
18 * KIND, either express or implied. See the License for the
19 * specific language governing permissions and limitations
21 * ====================================================================
29 #include "svn_private_config.h" /* for SVN_HAVE_OLD_EXPAT */
31 #include "svn_pools.h"
33 #include "svn_error.h"
34 #include "svn_ctype.h"
36 #include "private/svn_utf_private.h"
37 #include "private/svn_subr_private.h"
39 #ifdef SVN_HAVE_OLD_EXPAT
46 #error Expat is unusable -- it has been compiled for wide characters
49 #ifndef XML_VERSION_AT_LEAST
50 #define XML_VERSION_AT_LEAST(major,minor,patch) \
51 (((major) < XML_MAJOR_VERSION) \
52 || ((major) == XML_MAJOR_VERSION && (minor) < XML_MINOR_VERSION) \
53 || ((major) == XML_MAJOR_VERSION && (minor) == XML_MINOR_VERSION && \
54 (patch) <= XML_MICRO_VERSION))
55 #endif /* XML_VERSION_AT_LEAST */
58 svn_xml__compiled_version(void)
60 static const char xml_version_str[] = APR_STRINGIFY(XML_MAJOR_VERSION)
61 "." APR_STRINGIFY(XML_MINOR_VERSION)
62 "." APR_STRINGIFY(XML_MICRO_VERSION);
64 return xml_version_str;
68 svn_xml__runtime_version(void)
70 const char *expat_version = XML_ExpatVersion();
72 if (!strncmp(expat_version, "expat_", 6))
79 /* The private internals for a parser object. */
80 struct svn_xml_parser_t
82 /** the expat parser */
85 /** the SVN callbacks to call from the Expat callbacks */
86 svn_xml_start_elem start_handler;
87 svn_xml_end_elem end_handler;
88 svn_xml_char_data data_handler;
90 /** the user's baton for private data */
93 /** if non-@c NULL, an error happened while parsing */
96 /** where this object is allocated, so we can free it easily */
102 /*** XML character validation ***/
105 svn_xml_is_xml_safe(const char *data, apr_size_t len)
107 const char *end = data + len;
110 if (! svn_utf__is_valid(data, len))
113 for (p = data; p < end; p++)
115 unsigned char c = *p;
117 if (svn_ctype_iscntrl(c))
119 if ((c != SVN_CTYPE_ASCII_TAB)
120 && (c != SVN_CTYPE_ASCII_LINEFEED)
121 && (c != SVN_CTYPE_ASCII_CARRIAGERETURN)
122 && (c != SVN_CTYPE_ASCII_DELETE))
133 /*** XML escaping. ***/
137 * If *OUTSTR is @c NULL, set *OUTSTR to a new stringbuf allocated
138 * in POOL, else append to the existing stringbuf there.
141 xml_escape_cdata(svn_stringbuf_t **outstr,
146 const char *end = data + len;
147 const char *p = data, *q;
150 *outstr = svn_stringbuf_create_empty(pool);
154 /* Find a character which needs to be quoted and append bytes up
155 to that point. Strictly speaking, '>' only needs to be
156 quoted if it follows "]]", but it's easier to quote it all
159 So, why are we escaping '\r' here? Well, according to the
160 XML spec, '\r\n' gets converted to '\n' during XML parsing.
161 Also, any '\r' not followed by '\n' is converted to '\n'. By
162 golly, if we say we want to escape a '\r', we want to make
163 sure it remains a '\r'! */
165 while (q < end && *q != '&' && *q != '<' && *q != '>' && *q != '\r')
167 svn_stringbuf_appendbytes(*outstr, p, q - p);
169 /* We may already be a winner. */
173 /* Append the entity reference for the character. */
175 svn_stringbuf_appendcstr(*outstr, "&");
177 svn_stringbuf_appendcstr(*outstr, "<");
179 svn_stringbuf_appendcstr(*outstr, ">");
181 svn_stringbuf_appendcstr(*outstr, " ");
187 /* Essentially the same as xml_escape_cdata, with the addition of
188 whitespace and quote characters. */
190 xml_escape_attr(svn_stringbuf_t **outstr,
195 const char *end = data + len;
196 const char *p = data, *q;
199 *outstr = svn_stringbuf_create_ensure(len, pool);
203 /* Find a character which needs to be quoted and append bytes up
206 while (q < end && *q != '&' && *q != '<' && *q != '>'
207 && *q != '"' && *q != '\'' && *q != '\r'
208 && *q != '\n' && *q != '\t')
210 svn_stringbuf_appendbytes(*outstr, p, q - p);
212 /* We may already be a winner. */
216 /* Append the entity reference for the character. */
218 svn_stringbuf_appendcstr(*outstr, "&");
220 svn_stringbuf_appendcstr(*outstr, "<");
222 svn_stringbuf_appendcstr(*outstr, ">");
224 svn_stringbuf_appendcstr(*outstr, """);
226 svn_stringbuf_appendcstr(*outstr, "'");
228 svn_stringbuf_appendcstr(*outstr, " ");
230 svn_stringbuf_appendcstr(*outstr, " ");
232 svn_stringbuf_appendcstr(*outstr, "	");
240 svn_xml_escape_cdata_stringbuf(svn_stringbuf_t **outstr,
241 const svn_stringbuf_t *string,
244 xml_escape_cdata(outstr, string->data, string->len, pool);
249 svn_xml_escape_cdata_string(svn_stringbuf_t **outstr,
250 const svn_string_t *string,
253 xml_escape_cdata(outstr, string->data, string->len, pool);
258 svn_xml_escape_cdata_cstring(svn_stringbuf_t **outstr,
262 xml_escape_cdata(outstr, string, (apr_size_t) strlen(string), pool);
267 svn_xml_escape_attr_stringbuf(svn_stringbuf_t **outstr,
268 const svn_stringbuf_t *string,
271 xml_escape_attr(outstr, string->data, string->len, pool);
276 svn_xml_escape_attr_string(svn_stringbuf_t **outstr,
277 const svn_string_t *string,
280 xml_escape_attr(outstr, string->data, string->len, pool);
285 svn_xml_escape_attr_cstring(svn_stringbuf_t **outstr,
289 xml_escape_attr(outstr, string, (apr_size_t) strlen(string), pool);
294 svn_xml_fuzzy_escape(const char *string, apr_pool_t *pool)
296 const char *end = string + strlen(string);
297 const char *p = string, *q;
298 svn_stringbuf_t *outstr;
299 char escaped_char[6]; /* ? \ u u u \0 */
301 for (q = p; q < end; q++)
303 if (svn_ctype_iscntrl(*q)
304 && ! ((*q == '\n') || (*q == '\r') || (*q == '\t')))
308 /* Return original string if no unsafe characters found. */
312 outstr = svn_stringbuf_create_empty(pool);
317 /* Traverse till either unsafe character or eos. */
319 && ((! svn_ctype_iscntrl(*q))
320 || (*q == '\n') || (*q == '\r') || (*q == '\t')))
323 /* copy chunk before marker */
324 svn_stringbuf_appendbytes(outstr, p, q - p);
329 /* Append an escaped version of the unsafe character.
331 ### This format was chosen for consistency with
332 ### svn_utf__cstring_from_utf8_fuzzy(). The two functions
333 ### should probably share code, even though they escape
334 ### different characters.
336 apr_snprintf(escaped_char, sizeof(escaped_char), "?\\%03u",
338 svn_stringbuf_appendcstr(outstr, escaped_char);
347 /*** Map from the Expat callback types to the SVN XML types. ***/
349 static void expat_start_handler(void *userData,
350 const XML_Char *name,
351 const XML_Char **atts)
353 svn_xml_parser_t *svn_parser = userData;
355 (*svn_parser->start_handler)(svn_parser->baton, name, atts);
358 static void expat_end_handler(void *userData, const XML_Char *name)
360 svn_xml_parser_t *svn_parser = userData;
362 (*svn_parser->end_handler)(svn_parser->baton, name);
365 static void expat_data_handler(void *userData, const XML_Char *s, int len)
367 svn_xml_parser_t *svn_parser = userData;
369 (*svn_parser->data_handler)(svn_parser->baton, s, (apr_size_t)len);
372 #if XML_VERSION_AT_LEAST(1, 95, 8)
373 static void expat_entity_declaration(void *userData,
374 const XML_Char *entityName,
375 int is_parameter_entity,
376 const XML_Char *value,
378 const XML_Char *base,
379 const XML_Char *systemId,
380 const XML_Char *publicId,
381 const XML_Char *notationName)
383 svn_xml_parser_t *svn_parser = userData;
385 /* Stop the parser if an entity declaration is hit. */
386 XML_StopParser(svn_parser->parser, 0 /* resumable */);
389 /* A noop default_handler. */
390 static void expat_default_handler(void *userData, const XML_Char *s, int len)
395 /*** Making a parser. ***/
398 svn_xml_make_parser(void *baton,
399 svn_xml_start_elem start_handler,
400 svn_xml_end_elem end_handler,
401 svn_xml_char_data data_handler,
404 svn_xml_parser_t *svn_parser;
407 XML_Parser parser = XML_ParserCreate(NULL);
409 XML_SetElementHandler(parser,
410 start_handler ? expat_start_handler : NULL,
411 end_handler ? expat_end_handler : NULL);
412 XML_SetCharacterDataHandler(parser,
413 data_handler ? expat_data_handler : NULL);
415 #if XML_VERSION_AT_LEAST(1, 95, 8)
416 XML_SetEntityDeclHandler(parser, expat_entity_declaration);
418 XML_SetDefaultHandler(parser, expat_default_handler);
421 /* ### we probably don't want this pool; or at least we should pass it
422 ### to the callbacks and clear it periodically. */
423 subpool = svn_pool_create(pool);
425 svn_parser = apr_pcalloc(subpool, sizeof(*svn_parser));
427 svn_parser->parser = parser;
428 svn_parser->start_handler = start_handler;
429 svn_parser->end_handler = end_handler;
430 svn_parser->data_handler = data_handler;
431 svn_parser->baton = baton;
432 svn_parser->pool = subpool;
434 /* store our parser info as the UserData in the Expat parser */
435 XML_SetUserData(parser, svn_parser);
443 svn_xml_free_parser(svn_xml_parser_t *svn_parser)
445 /* Free the expat parser */
446 XML_ParserFree(svn_parser->parser);
448 /* Free the subversion parser */
449 svn_pool_destroy(svn_parser->pool);
456 svn_xml_parse(svn_xml_parser_t *svn_parser,
459 svn_boolean_t is_final)
464 /* Parse some xml data */
465 success = XML_Parse(svn_parser->parser, buf, (int) len, is_final);
467 /* If expat choked internally, return its error. */
470 /* Line num is "int" in Expat v1, "long" in v2; hide the difference. */
471 long line = XML_GetCurrentLineNumber(svn_parser->parser);
473 err = svn_error_createf
474 (SVN_ERR_XML_MALFORMED, NULL,
475 _("Malformed XML: %s at line %ld"),
476 XML_ErrorString(XML_GetErrorCode(svn_parser->parser)), line);
478 /* Kill all parsers and return the expat error */
479 svn_xml_free_parser(svn_parser);
483 /* Did an error occur somewhere *inside* the expat callbacks? */
484 if (svn_parser->error)
486 err = svn_parser->error;
487 svn_xml_free_parser(svn_parser);
496 void svn_xml_signal_bailout(svn_error_t *error,
497 svn_xml_parser_t *svn_parser)
499 /* This will cause the current XML_Parse() call to finish quickly! */
500 XML_SetElementHandler(svn_parser->parser, NULL, NULL);
501 XML_SetCharacterDataHandler(svn_parser->parser, NULL);
502 #if XML_VERSION_AT_LEAST(1, 95, 8)
503 XML_SetEntityDeclHandler(svn_parser->parser, NULL);
506 /* Once outside of XML_Parse(), the existence of this field will
507 cause svn_delta_parse()'s main read-loop to return error. */
508 svn_parser->error = error;
518 /*** Attribute walking. ***/
521 svn_xml_get_attr_value(const char *name, const char *const *atts)
523 while (atts && (*atts))
525 if (strcmp(atts[0], name) == 0)
528 atts += 2; /* continue looping */
531 /* Else no such attribute name seen. */
537 /*** Printing XML ***/
540 svn_xml_make_header2(svn_stringbuf_t **str, const char *encoding,
545 *str = svn_stringbuf_create_empty(pool);
546 svn_stringbuf_appendcstr(*str, "<?xml version=\"1.0\"");
549 encoding = apr_psprintf(pool, " encoding=\"%s\"", encoding);
550 svn_stringbuf_appendcstr(*str, encoding);
552 svn_stringbuf_appendcstr(*str, "?>\n");
557 /*** Creating attribute hashes. ***/
559 /* Combine an existing attribute list ATTS with a HASH that itself
560 represents an attribute list. Iff PRESERVE is true, then no value
561 already in HASH will be changed, else values from ATTS will
562 override previous values in HASH. */
564 amalgamate(const char **atts,
566 svn_boolean_t preserve,
572 for (key = *atts; key; key = *(++atts))
574 const char *val = *(++atts);
577 /* kff todo: should we also insist that val be non-null here?
580 keylen = strlen(key);
581 if (preserve && ((apr_hash_get(ht, key, keylen)) != NULL))
584 apr_hash_set(ht, apr_pstrndup(pool, key, keylen), keylen,
585 val ? apr_pstrdup(pool, val) : NULL);
591 svn_xml_ap_to_hash(va_list ap, apr_pool_t *pool)
593 apr_hash_t *ht = apr_hash_make(pool);
596 while ((key = va_arg(ap, char *)) != NULL)
598 const char *val = va_arg(ap, const char *);
599 svn_hash_sets(ht, key, val);
607 svn_xml_make_att_hash(const char **atts, apr_pool_t *pool)
609 apr_hash_t *ht = apr_hash_make(pool);
610 amalgamate(atts, ht, 0, pool); /* third arg irrelevant in this case */
616 svn_xml_hash_atts_overlaying(const char **atts,
620 amalgamate(atts, ht, 0, pool);
625 svn_xml_hash_atts_preserving(const char **atts,
629 amalgamate(atts, ht, 1, pool);
634 /*** Making XML tags. ***/
638 svn_xml_make_open_tag_hash(svn_stringbuf_t **str,
640 enum svn_xml_open_tag_style style,
642 apr_hash_t *attributes)
644 apr_hash_index_t *hi;
645 apr_size_t est_size = strlen(tagname) + 4 + apr_hash_count(attributes) * 30;
648 *str = svn_stringbuf_create_ensure(est_size, pool);
650 svn_stringbuf_appendcstr(*str, "<");
651 svn_stringbuf_appendcstr(*str, tagname);
653 for (hi = apr_hash_first(pool, attributes); hi; hi = apr_hash_next(hi))
658 apr_hash_this(hi, &key, NULL, &val);
661 svn_stringbuf_appendcstr(*str, "\n ");
662 svn_stringbuf_appendcstr(*str, key);
663 svn_stringbuf_appendcstr(*str, "=\"");
664 svn_xml_escape_attr_cstring(str, val, pool);
665 svn_stringbuf_appendcstr(*str, "\"");
668 if (style == svn_xml_self_closing)
669 svn_stringbuf_appendcstr(*str, "/");
670 svn_stringbuf_appendcstr(*str, ">");
671 if (style != svn_xml_protect_pcdata)
672 svn_stringbuf_appendcstr(*str, "\n");
677 svn_xml_make_open_tag_v(svn_stringbuf_t **str,
679 enum svn_xml_open_tag_style style,
683 apr_pool_t *subpool = svn_pool_create(pool);
684 apr_hash_t *ht = svn_xml_ap_to_hash(ap, subpool);
686 svn_xml_make_open_tag_hash(str, pool, style, tagname, ht);
687 svn_pool_destroy(subpool);
693 svn_xml_make_open_tag(svn_stringbuf_t **str,
695 enum svn_xml_open_tag_style style,
701 va_start(ap, tagname);
702 svn_xml_make_open_tag_v(str, pool, style, tagname, ap);
707 void svn_xml_make_close_tag(svn_stringbuf_t **str,
712 *str = svn_stringbuf_create_empty(pool);
714 svn_stringbuf_appendcstr(*str, "</");
715 svn_stringbuf_appendcstr(*str, tagname);
716 svn_stringbuf_appendcstr(*str, ">\n");