2 * xml.c: xml helper code shared among the Subversion libraries.
4 * ====================================================================
5 * Licensed to the Apache Software Foundation (ASF) under one
6 * or more contributor license agreements. See the NOTICE file
7 * distributed with this work for additional information
8 * regarding copyright ownership. The ASF licenses this file
9 * to you under the Apache License, Version 2.0 (the
10 * "License"); you may not use this file except in compliance
11 * with the License. You may obtain a copy of the License at
13 * http://www.apache.org/licenses/LICENSE-2.0
15 * Unless required by applicable law or agreed to in writing,
16 * software distributed under the License is distributed on an
17 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
18 * KIND, either express or implied. See the License for the
19 * specific language governing permissions and limitations
21 * ====================================================================
29 #include "svn_private_config.h" /* for SVN_HAVE_OLD_EXPAT */
31 #include "svn_pools.h"
33 #include "svn_error.h"
34 #include "svn_ctype.h"
36 #include "private/svn_utf_private.h"
38 #ifdef SVN_HAVE_OLD_EXPAT
45 #error Expat is unusable -- it has been compiled for wide characters
48 /* The private internals for a parser object. */
49 struct svn_xml_parser_t
51 /** the expat parser */
54 /** the SVN callbacks to call from the Expat callbacks */
55 svn_xml_start_elem start_handler;
56 svn_xml_end_elem end_handler;
57 svn_xml_char_data data_handler;
59 /** the user's baton for private data */
62 /** if non-@c NULL, an error happened while parsing */
65 /** where this object is allocated, so we can free it easily */
71 /*** XML character validation ***/
74 svn_xml_is_xml_safe(const char *data, apr_size_t len)
76 const char *end = data + len;
79 if (! svn_utf__is_valid(data, len))
82 for (p = data; p < end; p++)
86 if (svn_ctype_iscntrl(c))
88 if ((c != SVN_CTYPE_ASCII_TAB)
89 && (c != SVN_CTYPE_ASCII_LINEFEED)
90 && (c != SVN_CTYPE_ASCII_CARRIAGERETURN)
91 && (c != SVN_CTYPE_ASCII_DELETE))
102 /*** XML escaping. ***/
106 * If *OUTSTR is @c NULL, set *OUTSTR to a new stringbuf allocated
107 * in POOL, else append to the existing stringbuf there.
110 xml_escape_cdata(svn_stringbuf_t **outstr,
115 const char *end = data + len;
116 const char *p = data, *q;
119 *outstr = svn_stringbuf_create_empty(pool);
123 /* Find a character which needs to be quoted and append bytes up
124 to that point. Strictly speaking, '>' only needs to be
125 quoted if it follows "]]", but it's easier to quote it all
128 So, why are we escaping '\r' here? Well, according to the
129 XML spec, '\r\n' gets converted to '\n' during XML parsing.
130 Also, any '\r' not followed by '\n' is converted to '\n'. By
131 golly, if we say we want to escape a '\r', we want to make
132 sure it remains a '\r'! */
134 while (q < end && *q != '&' && *q != '<' && *q != '>' && *q != '\r')
136 svn_stringbuf_appendbytes(*outstr, p, q - p);
138 /* We may already be a winner. */
142 /* Append the entity reference for the character. */
144 svn_stringbuf_appendcstr(*outstr, "&");
146 svn_stringbuf_appendcstr(*outstr, "<");
148 svn_stringbuf_appendcstr(*outstr, ">");
150 svn_stringbuf_appendcstr(*outstr, " ");
156 /* Essentially the same as xml_escape_cdata, with the addition of
157 whitespace and quote characters. */
159 xml_escape_attr(svn_stringbuf_t **outstr,
164 const char *end = data + len;
165 const char *p = data, *q;
168 *outstr = svn_stringbuf_create_ensure(len, pool);
172 /* Find a character which needs to be quoted and append bytes up
175 while (q < end && *q != '&' && *q != '<' && *q != '>'
176 && *q != '"' && *q != '\'' && *q != '\r'
177 && *q != '\n' && *q != '\t')
179 svn_stringbuf_appendbytes(*outstr, p, q - p);
181 /* We may already be a winner. */
185 /* Append the entity reference for the character. */
187 svn_stringbuf_appendcstr(*outstr, "&");
189 svn_stringbuf_appendcstr(*outstr, "<");
191 svn_stringbuf_appendcstr(*outstr, ">");
193 svn_stringbuf_appendcstr(*outstr, """);
195 svn_stringbuf_appendcstr(*outstr, "'");
197 svn_stringbuf_appendcstr(*outstr, " ");
199 svn_stringbuf_appendcstr(*outstr, " ");
201 svn_stringbuf_appendcstr(*outstr, "	");
209 svn_xml_escape_cdata_stringbuf(svn_stringbuf_t **outstr,
210 const svn_stringbuf_t *string,
213 xml_escape_cdata(outstr, string->data, string->len, pool);
218 svn_xml_escape_cdata_string(svn_stringbuf_t **outstr,
219 const svn_string_t *string,
222 xml_escape_cdata(outstr, string->data, string->len, pool);
227 svn_xml_escape_cdata_cstring(svn_stringbuf_t **outstr,
231 xml_escape_cdata(outstr, string, (apr_size_t) strlen(string), pool);
236 svn_xml_escape_attr_stringbuf(svn_stringbuf_t **outstr,
237 const svn_stringbuf_t *string,
240 xml_escape_attr(outstr, string->data, string->len, pool);
245 svn_xml_escape_attr_string(svn_stringbuf_t **outstr,
246 const svn_string_t *string,
249 xml_escape_attr(outstr, string->data, string->len, pool);
254 svn_xml_escape_attr_cstring(svn_stringbuf_t **outstr,
258 xml_escape_attr(outstr, string, (apr_size_t) strlen(string), pool);
263 svn_xml_fuzzy_escape(const char *string, apr_pool_t *pool)
265 const char *end = string + strlen(string);
266 const char *p = string, *q;
267 svn_stringbuf_t *outstr;
268 char escaped_char[6]; /* ? \ u u u \0 */
270 for (q = p; q < end; q++)
272 if (svn_ctype_iscntrl(*q)
273 && ! ((*q == '\n') || (*q == '\r') || (*q == '\t')))
277 /* Return original string if no unsafe characters found. */
281 outstr = svn_stringbuf_create_empty(pool);
286 /* Traverse till either unsafe character or eos. */
288 && ((! svn_ctype_iscntrl(*q))
289 || (*q == '\n') || (*q == '\r') || (*q == '\t')))
292 /* copy chunk before marker */
293 svn_stringbuf_appendbytes(outstr, p, q - p);
298 /* Append an escaped version of the unsafe character.
300 ### This format was chosen for consistency with
301 ### svn_utf__cstring_from_utf8_fuzzy(). The two functions
302 ### should probably share code, even though they escape
303 ### different characters.
305 apr_snprintf(escaped_char, sizeof(escaped_char), "?\\%03u",
307 svn_stringbuf_appendcstr(outstr, escaped_char);
316 /*** Map from the Expat callback types to the SVN XML types. ***/
318 static void expat_start_handler(void *userData,
319 const XML_Char *name,
320 const XML_Char **atts)
322 svn_xml_parser_t *svn_parser = userData;
324 (*svn_parser->start_handler)(svn_parser->baton, name, atts);
327 static void expat_end_handler(void *userData, const XML_Char *name)
329 svn_xml_parser_t *svn_parser = userData;
331 (*svn_parser->end_handler)(svn_parser->baton, name);
334 static void expat_data_handler(void *userData, const XML_Char *s, int len)
336 svn_xml_parser_t *svn_parser = userData;
338 (*svn_parser->data_handler)(svn_parser->baton, s, (apr_size_t)len);
342 /*** Making a parser. ***/
345 svn_xml_make_parser(void *baton,
346 svn_xml_start_elem start_handler,
347 svn_xml_end_elem end_handler,
348 svn_xml_char_data data_handler,
351 svn_xml_parser_t *svn_parser;
354 XML_Parser parser = XML_ParserCreate(NULL);
356 XML_SetElementHandler(parser,
357 start_handler ? expat_start_handler : NULL,
358 end_handler ? expat_end_handler : NULL);
359 XML_SetCharacterDataHandler(parser,
360 data_handler ? expat_data_handler : NULL);
362 /* ### we probably don't want this pool; or at least we should pass it
363 ### to the callbacks and clear it periodically. */
364 subpool = svn_pool_create(pool);
366 svn_parser = apr_pcalloc(subpool, sizeof(*svn_parser));
368 svn_parser->parser = parser;
369 svn_parser->start_handler = start_handler;
370 svn_parser->end_handler = end_handler;
371 svn_parser->data_handler = data_handler;
372 svn_parser->baton = baton;
373 svn_parser->pool = subpool;
375 /* store our parser info as the UserData in the Expat parser */
376 XML_SetUserData(parser, svn_parser);
384 svn_xml_free_parser(svn_xml_parser_t *svn_parser)
386 /* Free the expat parser */
387 XML_ParserFree(svn_parser->parser);
389 /* Free the subversion parser */
390 svn_pool_destroy(svn_parser->pool);
397 svn_xml_parse(svn_xml_parser_t *svn_parser,
400 svn_boolean_t is_final)
405 /* Parse some xml data */
406 success = XML_Parse(svn_parser->parser, buf, (int) len, is_final);
408 /* If expat choked internally, return its error. */
411 /* Line num is "int" in Expat v1, "long" in v2; hide the difference. */
412 long line = XML_GetCurrentLineNumber(svn_parser->parser);
414 err = svn_error_createf
415 (SVN_ERR_XML_MALFORMED, NULL,
416 _("Malformed XML: %s at line %ld"),
417 XML_ErrorString(XML_GetErrorCode(svn_parser->parser)), line);
419 /* Kill all parsers and return the expat error */
420 svn_xml_free_parser(svn_parser);
424 /* Did an error occur somewhere *inside* the expat callbacks? */
425 if (svn_parser->error)
427 err = svn_parser->error;
428 svn_xml_free_parser(svn_parser);
437 void svn_xml_signal_bailout(svn_error_t *error,
438 svn_xml_parser_t *svn_parser)
440 /* This will cause the current XML_Parse() call to finish quickly! */
441 XML_SetElementHandler(svn_parser->parser, NULL, NULL);
442 XML_SetCharacterDataHandler(svn_parser->parser, NULL);
444 /* Once outside of XML_Parse(), the existence of this field will
445 cause svn_delta_parse()'s main read-loop to return error. */
446 svn_parser->error = error;
456 /*** Attribute walking. ***/
459 svn_xml_get_attr_value(const char *name, const char *const *atts)
461 while (atts && (*atts))
463 if (strcmp(atts[0], name) == 0)
466 atts += 2; /* continue looping */
469 /* Else no such attribute name seen. */
475 /*** Printing XML ***/
478 svn_xml_make_header2(svn_stringbuf_t **str, const char *encoding,
483 *str = svn_stringbuf_create_empty(pool);
484 svn_stringbuf_appendcstr(*str, "<?xml version=\"1.0\"");
487 encoding = apr_psprintf(pool, " encoding=\"%s\"", encoding);
488 svn_stringbuf_appendcstr(*str, encoding);
490 svn_stringbuf_appendcstr(*str, "?>\n");
495 /*** Creating attribute hashes. ***/
497 /* Combine an existing attribute list ATTS with a HASH that itself
498 represents an attribute list. Iff PRESERVE is true, then no value
499 already in HASH will be changed, else values from ATTS will
500 override previous values in HASH. */
502 amalgamate(const char **atts,
504 svn_boolean_t preserve,
510 for (key = *atts; key; key = *(++atts))
512 const char *val = *(++atts);
515 /* kff todo: should we also insist that val be non-null here?
518 keylen = strlen(key);
519 if (preserve && ((apr_hash_get(ht, key, keylen)) != NULL))
522 apr_hash_set(ht, apr_pstrndup(pool, key, keylen), keylen,
523 val ? apr_pstrdup(pool, val) : NULL);
529 svn_xml_ap_to_hash(va_list ap, apr_pool_t *pool)
531 apr_hash_t *ht = apr_hash_make(pool);
534 while ((key = va_arg(ap, char *)) != NULL)
536 const char *val = va_arg(ap, const char *);
537 svn_hash_sets(ht, key, val);
545 svn_xml_make_att_hash(const char **atts, apr_pool_t *pool)
547 apr_hash_t *ht = apr_hash_make(pool);
548 amalgamate(atts, ht, 0, pool); /* third arg irrelevant in this case */
554 svn_xml_hash_atts_overlaying(const char **atts,
558 amalgamate(atts, ht, 0, pool);
563 svn_xml_hash_atts_preserving(const char **atts,
567 amalgamate(atts, ht, 1, pool);
572 /*** Making XML tags. ***/
576 svn_xml_make_open_tag_hash(svn_stringbuf_t **str,
578 enum svn_xml_open_tag_style style,
580 apr_hash_t *attributes)
582 apr_hash_index_t *hi;
583 apr_size_t est_size = strlen(tagname) + 4 + apr_hash_count(attributes) * 30;
586 *str = svn_stringbuf_create_ensure(est_size, pool);
588 svn_stringbuf_appendcstr(*str, "<");
589 svn_stringbuf_appendcstr(*str, tagname);
591 for (hi = apr_hash_first(pool, attributes); hi; hi = apr_hash_next(hi))
596 apr_hash_this(hi, &key, NULL, &val);
599 svn_stringbuf_appendcstr(*str, "\n ");
600 svn_stringbuf_appendcstr(*str, key);
601 svn_stringbuf_appendcstr(*str, "=\"");
602 svn_xml_escape_attr_cstring(str, val, pool);
603 svn_stringbuf_appendcstr(*str, "\"");
606 if (style == svn_xml_self_closing)
607 svn_stringbuf_appendcstr(*str, "/");
608 svn_stringbuf_appendcstr(*str, ">");
609 if (style != svn_xml_protect_pcdata)
610 svn_stringbuf_appendcstr(*str, "\n");
615 svn_xml_make_open_tag_v(svn_stringbuf_t **str,
617 enum svn_xml_open_tag_style style,
621 apr_pool_t *subpool = svn_pool_create(pool);
622 apr_hash_t *ht = svn_xml_ap_to_hash(ap, subpool);
624 svn_xml_make_open_tag_hash(str, pool, style, tagname, ht);
625 svn_pool_destroy(subpool);
631 svn_xml_make_open_tag(svn_stringbuf_t **str,
633 enum svn_xml_open_tag_style style,
639 va_start(ap, tagname);
640 svn_xml_make_open_tag_v(str, pool, style, tagname, ap);
645 void svn_xml_make_close_tag(svn_stringbuf_t **str,
650 *str = svn_stringbuf_create_empty(pool);
652 svn_stringbuf_appendcstr(*str, "</");
653 svn_stringbuf_appendcstr(*str, tagname);
654 svn_stringbuf_appendcstr(*str, ">\n");