2 * xml.c: xml helper code shared among the Subversion libraries.
4 * ====================================================================
5 * Licensed to the Apache Software Foundation (ASF) under one
6 * or more contributor license agreements. See the NOTICE file
7 * distributed with this work for additional information
8 * regarding copyright ownership. The ASF licenses this file
9 * to you under the Apache License, Version 2.0 (the
10 * "License"); you may not use this file except in compliance
11 * with the License. You may obtain a copy of the License at
13 * http://www.apache.org/licenses/LICENSE-2.0
15 * Unless required by applicable law or agreed to in writing,
16 * software distributed under the License is distributed on an
17 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
18 * KIND, either express or implied. See the License for the
19 * specific language governing permissions and limitations
21 * ====================================================================
29 #include "svn_private_config.h" /* for SVN_HAVE_OLD_EXPAT */
31 #include "svn_pools.h"
33 #include "svn_error.h"
34 #include "svn_ctype.h"
36 #include "private/svn_utf_private.h"
37 #include "private/svn_subr_private.h"
39 #ifdef SVN_HAVE_OLD_EXPAT
46 #error Expat is unusable -- it has been compiled for wide characters
50 svn_xml__compiled_version(void)
52 static const char xml_version_str[] = APR_STRINGIFY(XML_MAJOR_VERSION)
53 "." APR_STRINGIFY(XML_MINOR_VERSION)
54 "." APR_STRINGIFY(XML_MICRO_VERSION);
56 return xml_version_str;
60 svn_xml__runtime_version(void)
62 const char *expat_version = XML_ExpatVersion();
64 if (!strncmp(expat_version, "expat_", 6))
71 /* The private internals for a parser object. */
72 struct svn_xml_parser_t
74 /** the expat parser */
77 /** the SVN callbacks to call from the Expat callbacks */
78 svn_xml_start_elem start_handler;
79 svn_xml_end_elem end_handler;
80 svn_xml_char_data data_handler;
82 /** the user's baton for private data */
85 /** if non-@c NULL, an error happened while parsing */
88 /** where this object is allocated, so we can free it easily */
94 /*** XML character validation ***/
97 svn_xml_is_xml_safe(const char *data, apr_size_t len)
99 const char *end = data + len;
102 if (! svn_utf__is_valid(data, len))
105 for (p = data; p < end; p++)
107 unsigned char c = *p;
109 if (svn_ctype_iscntrl(c))
111 if ((c != SVN_CTYPE_ASCII_TAB)
112 && (c != SVN_CTYPE_ASCII_LINEFEED)
113 && (c != SVN_CTYPE_ASCII_CARRIAGERETURN)
114 && (c != SVN_CTYPE_ASCII_DELETE))
125 /*** XML escaping. ***/
129 * If *OUTSTR is @c NULL, set *OUTSTR to a new stringbuf allocated
130 * in POOL, else append to the existing stringbuf there.
133 xml_escape_cdata(svn_stringbuf_t **outstr,
138 const char *end = data + len;
139 const char *p = data, *q;
142 *outstr = svn_stringbuf_create_empty(pool);
146 /* Find a character which needs to be quoted and append bytes up
147 to that point. Strictly speaking, '>' only needs to be
148 quoted if it follows "]]", but it's easier to quote it all
151 So, why are we escaping '\r' here? Well, according to the
152 XML spec, '\r\n' gets converted to '\n' during XML parsing.
153 Also, any '\r' not followed by '\n' is converted to '\n'. By
154 golly, if we say we want to escape a '\r', we want to make
155 sure it remains a '\r'! */
157 while (q < end && *q != '&' && *q != '<' && *q != '>' && *q != '\r')
159 svn_stringbuf_appendbytes(*outstr, p, q - p);
161 /* We may already be a winner. */
165 /* Append the entity reference for the character. */
167 svn_stringbuf_appendcstr(*outstr, "&");
169 svn_stringbuf_appendcstr(*outstr, "<");
171 svn_stringbuf_appendcstr(*outstr, ">");
173 svn_stringbuf_appendcstr(*outstr, " ");
179 /* Essentially the same as xml_escape_cdata, with the addition of
180 whitespace and quote characters. */
182 xml_escape_attr(svn_stringbuf_t **outstr,
187 const char *end = data + len;
188 const char *p = data, *q;
191 *outstr = svn_stringbuf_create_ensure(len, pool);
195 /* Find a character which needs to be quoted and append bytes up
198 while (q < end && *q != '&' && *q != '<' && *q != '>'
199 && *q != '"' && *q != '\'' && *q != '\r'
200 && *q != '\n' && *q != '\t')
202 svn_stringbuf_appendbytes(*outstr, p, q - p);
204 /* We may already be a winner. */
208 /* Append the entity reference for the character. */
210 svn_stringbuf_appendcstr(*outstr, "&");
212 svn_stringbuf_appendcstr(*outstr, "<");
214 svn_stringbuf_appendcstr(*outstr, ">");
216 svn_stringbuf_appendcstr(*outstr, """);
218 svn_stringbuf_appendcstr(*outstr, "'");
220 svn_stringbuf_appendcstr(*outstr, " ");
222 svn_stringbuf_appendcstr(*outstr, " ");
224 svn_stringbuf_appendcstr(*outstr, "	");
232 svn_xml_escape_cdata_stringbuf(svn_stringbuf_t **outstr,
233 const svn_stringbuf_t *string,
236 xml_escape_cdata(outstr, string->data, string->len, pool);
241 svn_xml_escape_cdata_string(svn_stringbuf_t **outstr,
242 const svn_string_t *string,
245 xml_escape_cdata(outstr, string->data, string->len, pool);
250 svn_xml_escape_cdata_cstring(svn_stringbuf_t **outstr,
254 xml_escape_cdata(outstr, string, (apr_size_t) strlen(string), pool);
259 svn_xml_escape_attr_stringbuf(svn_stringbuf_t **outstr,
260 const svn_stringbuf_t *string,
263 xml_escape_attr(outstr, string->data, string->len, pool);
268 svn_xml_escape_attr_string(svn_stringbuf_t **outstr,
269 const svn_string_t *string,
272 xml_escape_attr(outstr, string->data, string->len, pool);
277 svn_xml_escape_attr_cstring(svn_stringbuf_t **outstr,
281 xml_escape_attr(outstr, string, (apr_size_t) strlen(string), pool);
286 svn_xml_fuzzy_escape(const char *string, apr_pool_t *pool)
288 const char *end = string + strlen(string);
289 const char *p = string, *q;
290 svn_stringbuf_t *outstr;
291 char escaped_char[6]; /* ? \ u u u \0 */
293 for (q = p; q < end; q++)
295 if (svn_ctype_iscntrl(*q)
296 && ! ((*q == '\n') || (*q == '\r') || (*q == '\t')))
300 /* Return original string if no unsafe characters found. */
304 outstr = svn_stringbuf_create_empty(pool);
309 /* Traverse till either unsafe character or eos. */
311 && ((! svn_ctype_iscntrl(*q))
312 || (*q == '\n') || (*q == '\r') || (*q == '\t')))
315 /* copy chunk before marker */
316 svn_stringbuf_appendbytes(outstr, p, q - p);
321 /* Append an escaped version of the unsafe character.
323 ### This format was chosen for consistency with
324 ### svn_utf__cstring_from_utf8_fuzzy(). The two functions
325 ### should probably share code, even though they escape
326 ### different characters.
328 apr_snprintf(escaped_char, sizeof(escaped_char), "?\\%03u",
330 svn_stringbuf_appendcstr(outstr, escaped_char);
339 /*** Map from the Expat callback types to the SVN XML types. ***/
341 static void expat_start_handler(void *userData,
342 const XML_Char *name,
343 const XML_Char **atts)
345 svn_xml_parser_t *svn_parser = userData;
347 (*svn_parser->start_handler)(svn_parser->baton, name, atts);
350 static void expat_end_handler(void *userData, const XML_Char *name)
352 svn_xml_parser_t *svn_parser = userData;
354 (*svn_parser->end_handler)(svn_parser->baton, name);
357 static void expat_data_handler(void *userData, const XML_Char *s, int len)
359 svn_xml_parser_t *svn_parser = userData;
361 (*svn_parser->data_handler)(svn_parser->baton, s, (apr_size_t)len);
365 /*** Making a parser. ***/
368 svn_xml_make_parser(void *baton,
369 svn_xml_start_elem start_handler,
370 svn_xml_end_elem end_handler,
371 svn_xml_char_data data_handler,
374 svn_xml_parser_t *svn_parser;
377 XML_Parser parser = XML_ParserCreate(NULL);
379 XML_SetElementHandler(parser,
380 start_handler ? expat_start_handler : NULL,
381 end_handler ? expat_end_handler : NULL);
382 XML_SetCharacterDataHandler(parser,
383 data_handler ? expat_data_handler : NULL);
385 /* ### we probably don't want this pool; or at least we should pass it
386 ### to the callbacks and clear it periodically. */
387 subpool = svn_pool_create(pool);
389 svn_parser = apr_pcalloc(subpool, sizeof(*svn_parser));
391 svn_parser->parser = parser;
392 svn_parser->start_handler = start_handler;
393 svn_parser->end_handler = end_handler;
394 svn_parser->data_handler = data_handler;
395 svn_parser->baton = baton;
396 svn_parser->pool = subpool;
398 /* store our parser info as the UserData in the Expat parser */
399 XML_SetUserData(parser, svn_parser);
407 svn_xml_free_parser(svn_xml_parser_t *svn_parser)
409 /* Free the expat parser */
410 XML_ParserFree(svn_parser->parser);
412 /* Free the subversion parser */
413 svn_pool_destroy(svn_parser->pool);
420 svn_xml_parse(svn_xml_parser_t *svn_parser,
423 svn_boolean_t is_final)
428 /* Parse some xml data */
429 success = XML_Parse(svn_parser->parser, buf, (int) len, is_final);
431 /* If expat choked internally, return its error. */
434 /* Line num is "int" in Expat v1, "long" in v2; hide the difference. */
435 long line = XML_GetCurrentLineNumber(svn_parser->parser);
437 err = svn_error_createf
438 (SVN_ERR_XML_MALFORMED, NULL,
439 _("Malformed XML: %s at line %ld"),
440 XML_ErrorString(XML_GetErrorCode(svn_parser->parser)), line);
442 /* Kill all parsers and return the expat error */
443 svn_xml_free_parser(svn_parser);
447 /* Did an error occur somewhere *inside* the expat callbacks? */
448 if (svn_parser->error)
450 err = svn_parser->error;
451 svn_xml_free_parser(svn_parser);
460 void svn_xml_signal_bailout(svn_error_t *error,
461 svn_xml_parser_t *svn_parser)
463 /* This will cause the current XML_Parse() call to finish quickly! */
464 XML_SetElementHandler(svn_parser->parser, NULL, NULL);
465 XML_SetCharacterDataHandler(svn_parser->parser, NULL);
467 /* Once outside of XML_Parse(), the existence of this field will
468 cause svn_delta_parse()'s main read-loop to return error. */
469 svn_parser->error = error;
479 /*** Attribute walking. ***/
482 svn_xml_get_attr_value(const char *name, const char *const *atts)
484 while (atts && (*atts))
486 if (strcmp(atts[0], name) == 0)
489 atts += 2; /* continue looping */
492 /* Else no such attribute name seen. */
498 /*** Printing XML ***/
501 svn_xml_make_header2(svn_stringbuf_t **str, const char *encoding,
506 *str = svn_stringbuf_create_empty(pool);
507 svn_stringbuf_appendcstr(*str, "<?xml version=\"1.0\"");
510 encoding = apr_psprintf(pool, " encoding=\"%s\"", encoding);
511 svn_stringbuf_appendcstr(*str, encoding);
513 svn_stringbuf_appendcstr(*str, "?>\n");
518 /*** Creating attribute hashes. ***/
520 /* Combine an existing attribute list ATTS with a HASH that itself
521 represents an attribute list. Iff PRESERVE is true, then no value
522 already in HASH will be changed, else values from ATTS will
523 override previous values in HASH. */
525 amalgamate(const char **atts,
527 svn_boolean_t preserve,
533 for (key = *atts; key; key = *(++atts))
535 const char *val = *(++atts);
538 /* kff todo: should we also insist that val be non-null here?
541 keylen = strlen(key);
542 if (preserve && ((apr_hash_get(ht, key, keylen)) != NULL))
545 apr_hash_set(ht, apr_pstrndup(pool, key, keylen), keylen,
546 val ? apr_pstrdup(pool, val) : NULL);
552 svn_xml_ap_to_hash(va_list ap, apr_pool_t *pool)
554 apr_hash_t *ht = apr_hash_make(pool);
557 while ((key = va_arg(ap, char *)) != NULL)
559 const char *val = va_arg(ap, const char *);
560 svn_hash_sets(ht, key, val);
568 svn_xml_make_att_hash(const char **atts, apr_pool_t *pool)
570 apr_hash_t *ht = apr_hash_make(pool);
571 amalgamate(atts, ht, 0, pool); /* third arg irrelevant in this case */
577 svn_xml_hash_atts_overlaying(const char **atts,
581 amalgamate(atts, ht, 0, pool);
586 svn_xml_hash_atts_preserving(const char **atts,
590 amalgamate(atts, ht, 1, pool);
595 /*** Making XML tags. ***/
599 svn_xml_make_open_tag_hash(svn_stringbuf_t **str,
601 enum svn_xml_open_tag_style style,
603 apr_hash_t *attributes)
605 apr_hash_index_t *hi;
606 apr_size_t est_size = strlen(tagname) + 4 + apr_hash_count(attributes) * 30;
609 *str = svn_stringbuf_create_ensure(est_size, pool);
611 svn_stringbuf_appendcstr(*str, "<");
612 svn_stringbuf_appendcstr(*str, tagname);
614 for (hi = apr_hash_first(pool, attributes); hi; hi = apr_hash_next(hi))
619 apr_hash_this(hi, &key, NULL, &val);
622 svn_stringbuf_appendcstr(*str, "\n ");
623 svn_stringbuf_appendcstr(*str, key);
624 svn_stringbuf_appendcstr(*str, "=\"");
625 svn_xml_escape_attr_cstring(str, val, pool);
626 svn_stringbuf_appendcstr(*str, "\"");
629 if (style == svn_xml_self_closing)
630 svn_stringbuf_appendcstr(*str, "/");
631 svn_stringbuf_appendcstr(*str, ">");
632 if (style != svn_xml_protect_pcdata)
633 svn_stringbuf_appendcstr(*str, "\n");
638 svn_xml_make_open_tag_v(svn_stringbuf_t **str,
640 enum svn_xml_open_tag_style style,
644 apr_pool_t *subpool = svn_pool_create(pool);
645 apr_hash_t *ht = svn_xml_ap_to_hash(ap, subpool);
647 svn_xml_make_open_tag_hash(str, pool, style, tagname, ht);
648 svn_pool_destroy(subpool);
654 svn_xml_make_open_tag(svn_stringbuf_t **str,
656 enum svn_xml_open_tag_style style,
662 va_start(ap, tagname);
663 svn_xml_make_open_tag_v(str, pool, style, tagname, ap);
668 void svn_xml_make_close_tag(svn_stringbuf_t **str,
673 *str = svn_stringbuf_create_empty(pool);
675 svn_stringbuf_appendcstr(*str, "</");
676 svn_stringbuf_appendcstr(*str, tagname);
677 svn_stringbuf_appendcstr(*str, ">\n");