2 * xml.c : standard XML parsing routines for ra_serf
4 * ====================================================================
5 * Licensed to the Apache Software Foundation (ASF) under one
6 * or more contributor license agreements. See the NOTICE file
7 * distributed with this work for additional information
8 * regarding copyright ownership. The ASF licenses this file
9 * to you under the Apache License, Version 2.0 (the
10 * "License"); you may not use this file except in compliance
11 * with the License. You may obtain a copy of the License at
13 * http://www.apache.org/licenses/LICENSE-2.0
15 * Unless required by applicable law or agreed to in writing,
16 * software distributed under the License is distributed on an
17 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
18 * KIND, either express or implied. See the License for the
19 * specific language governing permissions and limitations
21 * ====================================================================
30 #include "svn_pools.h"
34 #include "../libsvn_ra/ra_loader.h"
35 #include "svn_config.h"
36 #include "svn_delta.h"
39 #include "svn_private_config.h"
40 #include "private/svn_string_private.h"
45 struct svn_ra_serf__xml_context_t {
46 /* Current state information. */
47 svn_ra_serf__xml_estate_t *current;
49 /* If WAITING.NAMESPACE != NULL, wait for NAMESPACE:NAME element to be
50 closed before looking for transitions from CURRENT->STATE. */
51 svn_ra_serf__dav_props_t waiting;
53 /* The transition table. */
54 const svn_ra_serf__xml_transition_t *ttable;
56 /* The callback information. */
57 svn_ra_serf__xml_opened_t opened_cb;
58 svn_ra_serf__xml_closed_t closed_cb;
59 svn_ra_serf__xml_cdata_t cdata_cb;
62 /* Linked list of free states. */
63 svn_ra_serf__xml_estate_t *free_states;
66 /* Used to verify we are not re-entering a callback, specifically to
67 ensure SCRATCH_POOL is not cleared while an outer callback is
69 svn_boolean_t within_callback;
70 #define START_CALLBACK(xmlctx) \
72 svn_ra_serf__xml_context_t *xmlctx__tmp = (xmlctx); \
73 SVN_ERR_ASSERT(!xmlctx__tmp->within_callback); \
74 xmlctx__tmp->within_callback = TRUE; \
76 #define END_CALLBACK(xmlctx) ((xmlctx)->within_callback = FALSE)
78 #define START_CALLBACK(xmlctx) /* empty */
79 #define END_CALLBACK(xmlctx) /* empty */
80 #endif /* SVN_DEBUG */
82 apr_pool_t *scratch_pool;
86 struct svn_ra_serf__xml_estate_t {
87 /* The current state value. */
90 /* The xml tag that opened this state. Waiting for the tag to close. */
91 svn_ra_serf__dav_props_t tag;
93 /* Should the CLOSED_CB function be called for custom processing when
94 this tag is closed? */
95 svn_boolean_t custom_close;
97 /* A pool may be constructed for this state. */
98 apr_pool_t *state_pool;
100 /* The namespaces extent for this state/element. This will start with
101 the parent's NS_LIST, and we will push new namespaces into our
102 local list. The parent will be unaffected by our locally-scoped data. */
103 svn_ra_serf__ns_t *ns_list;
105 /* Any collected attribute values. char * -> svn_string_t *. May be NULL
106 if no attributes have been collected. */
109 /* Any collected cdata. May be NULL if no cdata is being collected. */
110 svn_stringbuf_t *cdata;
112 /* Previous/outer state. */
113 svn_ra_serf__xml_estate_t *prev;
119 define_namespaces(svn_ra_serf__ns_t **ns_list,
120 const char *const *attrs,
121 apr_pool_t *(*get_pool)(void *baton),
124 const char *const *tmp_attrs = attrs;
126 for (tmp_attrs = attrs; *tmp_attrs != NULL; tmp_attrs += 2)
128 if (strncmp(*tmp_attrs, "xmlns", 5) == 0)
130 const svn_ra_serf__ns_t *cur_ns;
131 svn_boolean_t found = FALSE;
134 /* The empty prefix, or a named-prefix. */
135 if (tmp_attrs[0][5] == ':')
136 prefix = &tmp_attrs[0][6];
140 /* Have we already defined this ns previously? */
141 for (cur_ns = *ns_list; cur_ns; cur_ns = cur_ns->next)
143 if (strcmp(cur_ns->namespace, prefix) == 0)
153 svn_ra_serf__ns_t *new_ns;
156 pool = get_pool(baton);
159 new_ns = apr_palloc(pool, sizeof(*new_ns));
160 new_ns->namespace = apr_pstrdup(pool, prefix);
161 new_ns->url = apr_pstrdup(pool, tmp_attrs[1]);
163 /* Push into the front of NS_LIST. Parent states will point
164 to later in the chain, so will be unaffected by
165 shadowing/other namespaces pushed onto NS_LIST. */
166 new_ns->next = *ns_list;
175 svn_ra_serf__define_ns(svn_ra_serf__ns_t **ns_list,
176 const char *const *attrs,
177 apr_pool_t *result_pool)
179 define_namespaces(ns_list, attrs, NULL /* get_pool */, result_pool);
184 * Look up NAME in the NS_LIST list for previously declared namespace
185 * definitions and return a DAV_PROPS_T-tuple that has values.
188 svn_ra_serf__expand_ns(svn_ra_serf__dav_props_t *returned_prop_name,
189 const svn_ra_serf__ns_t *ns_list,
194 colon = strchr(name, ':');
197 const svn_ra_serf__ns_t *ns;
199 for (ns = ns_list; ns; ns = ns->next)
201 if (strncmp(ns->namespace, name, colon - name) == 0)
203 returned_prop_name->namespace = ns->url;
204 returned_prop_name->name = colon + 1;
211 const svn_ra_serf__ns_t *ns;
213 for (ns = ns_list; ns; ns = ns->next)
215 if (! ns->namespace[0])
217 returned_prop_name->namespace = ns->url;
218 returned_prop_name->name = name;
224 /* If the prefix is not found, then the name is NOT within a
226 returned_prop_name->namespace = "";
227 returned_prop_name->name = name;
231 #define XML_HEADER "<?xml version=\"1.0\" encoding=\"utf-8\"?>"
234 svn_ra_serf__add_xml_header_buckets(serf_bucket_t *agg_bucket,
235 serf_bucket_alloc_t *bkt_alloc)
239 tmp = SERF_BUCKET_SIMPLE_STRING_LEN(XML_HEADER, sizeof(XML_HEADER) - 1,
241 serf_bucket_aggregate_append(agg_bucket, tmp);
245 svn_ra_serf__add_open_tag_buckets(serf_bucket_t *agg_bucket,
246 serf_bucket_alloc_t *bkt_alloc,
247 const char *tag, ...)
253 tmp = SERF_BUCKET_SIMPLE_STRING_LEN("<", 1, bkt_alloc);
254 serf_bucket_aggregate_append(agg_bucket, tmp);
256 tmp = SERF_BUCKET_SIMPLE_STRING(tag, bkt_alloc);
257 serf_bucket_aggregate_append(agg_bucket, tmp);
260 while ((key = va_arg(ap, char *)) != NULL)
262 const char *val = va_arg(ap, const char *);
265 tmp = SERF_BUCKET_SIMPLE_STRING_LEN(" ", 1, bkt_alloc);
266 serf_bucket_aggregate_append(agg_bucket, tmp);
268 tmp = SERF_BUCKET_SIMPLE_STRING(key, bkt_alloc);
269 serf_bucket_aggregate_append(agg_bucket, tmp);
271 tmp = SERF_BUCKET_SIMPLE_STRING_LEN("=\"", 2, bkt_alloc);
272 serf_bucket_aggregate_append(agg_bucket, tmp);
274 tmp = SERF_BUCKET_SIMPLE_STRING(val, bkt_alloc);
275 serf_bucket_aggregate_append(agg_bucket, tmp);
277 tmp = SERF_BUCKET_SIMPLE_STRING_LEN("\"", 1, bkt_alloc);
278 serf_bucket_aggregate_append(agg_bucket, tmp);
283 tmp = SERF_BUCKET_SIMPLE_STRING_LEN(">", 1, bkt_alloc);
284 serf_bucket_aggregate_append(agg_bucket, tmp);
288 svn_ra_serf__add_close_tag_buckets(serf_bucket_t *agg_bucket,
289 serf_bucket_alloc_t *bkt_alloc,
294 tmp = SERF_BUCKET_SIMPLE_STRING_LEN("</", 2, bkt_alloc);
295 serf_bucket_aggregate_append(agg_bucket, tmp);
297 tmp = SERF_BUCKET_SIMPLE_STRING(tag, bkt_alloc);
298 serf_bucket_aggregate_append(agg_bucket, tmp);
300 tmp = SERF_BUCKET_SIMPLE_STRING_LEN(">", 1, bkt_alloc);
301 serf_bucket_aggregate_append(agg_bucket, tmp);
305 svn_ra_serf__add_cdata_len_buckets(serf_bucket_t *agg_bucket,
306 serf_bucket_alloc_t *bkt_alloc,
307 const char *data, apr_size_t len)
309 const char *end = data + len;
310 const char *p = data, *q;
311 serf_bucket_t *tmp_bkt;
315 /* Find a character which needs to be quoted and append bytes up
316 to that point. Strictly speaking, '>' only needs to be
317 quoted if it follows "]]", but it's easier to quote it all
320 So, why are we escaping '\r' here? Well, according to the
321 XML spec, '\r\n' gets converted to '\n' during XML parsing.
322 Also, any '\r' not followed by '\n' is converted to '\n'. By
323 golly, if we say we want to escape a '\r', we want to make
324 sure it remains a '\r'! */
326 while (q < end && *q != '&' && *q != '<' && *q != '>' && *q != '\r')
330 tmp_bkt = SERF_BUCKET_SIMPLE_STRING_LEN(p, q - p, bkt_alloc);
331 serf_bucket_aggregate_append(agg_bucket, tmp_bkt);
333 /* We may already be a winner. */
337 /* Append the entity reference for the character. */
340 tmp_bkt = SERF_BUCKET_SIMPLE_STRING_LEN("&", sizeof("&") - 1,
342 serf_bucket_aggregate_append(agg_bucket, tmp_bkt);
346 tmp_bkt = SERF_BUCKET_SIMPLE_STRING_LEN("<", sizeof("<") - 1,
348 serf_bucket_aggregate_append(agg_bucket, tmp_bkt);
352 tmp_bkt = SERF_BUCKET_SIMPLE_STRING_LEN(">", sizeof(">") - 1,
354 serf_bucket_aggregate_append(agg_bucket, tmp_bkt);
358 tmp_bkt = SERF_BUCKET_SIMPLE_STRING_LEN(" ", sizeof(" ") - 1,
360 serf_bucket_aggregate_append(agg_bucket, tmp_bkt);
367 void svn_ra_serf__add_tag_buckets(serf_bucket_t *agg_bucket, const char *tag,
369 serf_bucket_alloc_t *bkt_alloc)
371 svn_ra_serf__add_open_tag_buckets(agg_bucket, bkt_alloc, tag, NULL);
375 svn_ra_serf__add_cdata_len_buckets(agg_bucket, bkt_alloc,
376 value, strlen(value));
379 svn_ra_serf__add_close_tag_buckets(agg_bucket, bkt_alloc, tag);
383 svn_ra_serf__xml_push_state(svn_ra_serf__xml_parser_t *parser,
386 svn_ra_serf__xml_state_t *new_state;
388 if (!parser->free_state)
390 new_state = apr_palloc(parser->pool, sizeof(*new_state));
391 new_state->pool = svn_pool_create(parser->pool);
395 new_state = parser->free_state;
396 parser->free_state = parser->free_state->prev;
398 svn_pool_clear(new_state->pool);
403 new_state->private = parser->state->private;
404 new_state->ns_list = parser->state->ns_list;
408 new_state->private = NULL;
409 new_state->ns_list = NULL;
412 new_state->current_state = state;
414 /* Add it to the state chain. */
415 new_state->prev = parser->state;
416 parser->state = new_state;
419 void svn_ra_serf__xml_pop_state(svn_ra_serf__xml_parser_t *parser)
421 svn_ra_serf__xml_state_t *cur_state;
423 cur_state = parser->state;
424 parser->state = cur_state->prev;
425 cur_state->prev = parser->free_state;
426 parser->free_state = cur_state;
430 /* Return a pool for XES to use for self-alloc (and other specifics). */
432 xes_pool(const svn_ra_serf__xml_estate_t *xes)
434 /* Move up through parent states looking for one with a pool. This
435 will always terminate since the initial state has a pool. */
436 while (xes->state_pool == NULL)
438 return xes->state_pool;
443 ensure_pool(svn_ra_serf__xml_estate_t *xes)
445 if (xes->state_pool == NULL)
446 xes->state_pool = svn_pool_create(xes_pool(xes));
450 /* This callback is used by define_namespaces() to wait until a pool is
451 required before constructing it. */
453 lazy_create_pool(void *baton)
455 svn_ra_serf__xml_estate_t *xes = baton;
458 return xes->state_pool;
462 svn_ra_serf__xml_context_destroy(
463 svn_ra_serf__xml_context_t *xmlctx)
465 svn_pool_destroy(xmlctx->scratch_pool);
468 svn_ra_serf__xml_context_t *
469 svn_ra_serf__xml_context_create(
470 const svn_ra_serf__xml_transition_t *ttable,
471 svn_ra_serf__xml_opened_t opened_cb,
472 svn_ra_serf__xml_closed_t closed_cb,
473 svn_ra_serf__xml_cdata_t cdata_cb,
475 apr_pool_t *result_pool)
477 svn_ra_serf__xml_context_t *xmlctx;
478 svn_ra_serf__xml_estate_t *xes;
480 xmlctx = apr_pcalloc(result_pool, sizeof(*xmlctx));
481 xmlctx->ttable = ttable;
482 xmlctx->opened_cb = opened_cb;
483 xmlctx->closed_cb = closed_cb;
484 xmlctx->cdata_cb = cdata_cb;
485 xmlctx->baton = baton;
486 xmlctx->scratch_pool = svn_pool_create(result_pool);
488 xes = apr_pcalloc(result_pool, sizeof(*xes));
489 /* XES->STATE == 0 */
491 /* Child states may use this pool to allocate themselves. If a child
492 needs to collect information, then it will construct a subpool and
493 will use that to allocate itself and its collected data. */
494 xes->state_pool = result_pool;
496 xmlctx->current = xes;
503 svn_ra_serf__xml_gather_since(svn_ra_serf__xml_estate_t *xes,
510 pool = xes->state_pool;
512 data = apr_hash_make(pool);
514 for (; xes != NULL; xes = xes->prev)
516 if (xes->attrs != NULL)
518 apr_hash_index_t *hi;
520 for (hi = apr_hash_first(pool, xes->attrs); hi;
521 hi = apr_hash_next(hi))
527 /* Parent name/value lifetimes are at least as long as POOL. */
528 apr_hash_this(hi, &key, &klen, &val);
529 apr_hash_set(data, key, klen, val);
533 if (xes->state == stop_state)
542 svn_ra_serf__xml_note(svn_ra_serf__xml_estate_t *xes,
547 svn_ra_serf__xml_estate_t *scan;
549 for (scan = xes; scan != NULL && scan->state != state; scan = scan->prev)
552 SVN_ERR_ASSERT_NO_RETURN(scan != NULL);
554 /* Make sure the target state has a pool. */
557 /* ... and attribute storage. */
558 if (scan->attrs == NULL)
559 scan->attrs = apr_hash_make(scan->state_pool);
561 /* In all likelihood, NAME is a string constant. But we can't really
562 be sure. And it isn't like we're storing a billion of these into
564 svn_hash_sets(scan->attrs,
565 apr_pstrdup(scan->state_pool, name),
566 apr_pstrdup(scan->state_pool, value));
571 svn_ra_serf__xml_state_pool(svn_ra_serf__xml_estate_t *xes)
573 /* If they asked for a pool, then ensure that we have one to provide. */
576 return xes->state_pool;
581 svn_ra_serf__xml_cb_start(svn_ra_serf__xml_context_t *xmlctx,
582 const char *raw_name,
583 const char *const *attrs)
585 svn_ra_serf__xml_estate_t *current = xmlctx->current;
586 svn_ra_serf__dav_props_t elemname;
587 const svn_ra_serf__xml_transition_t *scan;
588 apr_pool_t *new_pool;
589 svn_ra_serf__xml_estate_t *new_xes;
591 /* If we're waiting for an element to close, then just ignore all
592 other element-opens. */
593 if (xmlctx->waiting.namespace != NULL)
596 /* Look for xmlns: attributes. Lazily create the state pool if any
598 define_namespaces(¤t->ns_list, attrs, lazy_create_pool, current);
600 svn_ra_serf__expand_ns(&elemname, current->ns_list, raw_name);
602 for (scan = xmlctx->ttable; scan->ns != NULL; ++scan)
604 if (scan->from_state != current->state)
607 /* Wildcard tag match. */
608 if (*scan->name == '*')
611 /* Found a specific transition. */
612 if (strcmp(elemname.name, scan->name) == 0
613 && strcmp(elemname.namespace, scan->ns) == 0)
616 if (scan->ns == NULL)
618 if (current->state == 0)
620 return svn_error_createf(
621 SVN_ERR_RA_DAV_MALFORMED_DATA, NULL,
622 _("XML Parsing failed: Unexpected root element '%s'"),
626 xmlctx->waiting = elemname;
631 /* We should not be told to collect cdata if the closed_cb will not
633 SVN_ERR_ASSERT(!scan->collect_cdata || scan->custom_close);
635 /* Found a transition. Make it happen. */
637 /* ### todo. push state */
639 /* ### how to use free states? */
640 /* This state should be allocated in the extent pool. If we will be
641 collecting information for this state, then construct a subpool.
643 ### potentially optimize away the subpool if none of the
644 ### attributes are present. subpools are cheap, tho... */
645 new_pool = xes_pool(current);
646 if (scan->collect_cdata || scan->collect_attrs[0])
648 new_pool = svn_pool_create(new_pool);
650 /* Prep the new state. */
651 new_xes = apr_pcalloc(new_pool, sizeof(*new_xes));
652 new_xes->state_pool = new_pool;
654 /* If we're supposed to collect cdata, then set up a buffer for
655 this. The existence of this buffer will instruct our cdata
656 callback to collect the cdata. */
657 if (scan->collect_cdata)
658 new_xes->cdata = svn_stringbuf_create_empty(new_pool);
660 if (scan->collect_attrs[0] != NULL)
662 const char *const *saveattr = &scan->collect_attrs[0];
664 new_xes->attrs = apr_hash_make(new_pool);
665 for (; *saveattr != NULL; ++saveattr)
670 if (**saveattr == '?')
672 name = *saveattr + 1;
673 value = svn_xml_get_attr_value(name, attrs);
678 value = svn_xml_get_attr_value(name, attrs);
680 return svn_error_createf(SVN_ERR_XML_ATTRIB_NOT_FOUND,
682 _("Missing XML attribute: '%s'"),
687 svn_hash_sets(new_xes->attrs, name,
688 apr_pstrdup(new_pool, value));
694 /* Prep the new state. */
695 new_xes = apr_pcalloc(new_pool, sizeof(*new_xes));
696 /* STATE_POOL remains NULL. */
699 /* Some basic copies to set up the new estate. */
700 new_xes->state = scan->to_state;
701 new_xes->tag.name = apr_pstrdup(new_pool, elemname.name);
702 new_xes->tag.namespace = apr_pstrdup(new_pool, elemname.namespace);
703 new_xes->custom_close = scan->custom_close;
705 /* Start with the parent's namespace set. */
706 new_xes->ns_list = current->ns_list;
708 /* The new state is prepared. Make it current. */
709 new_xes->prev = current;
710 xmlctx->current = new_xes;
712 if (xmlctx->opened_cb)
714 START_CALLBACK(xmlctx);
715 SVN_ERR(xmlctx->opened_cb(new_xes, xmlctx->baton,
716 new_xes->state, &new_xes->tag,
717 xmlctx->scratch_pool));
718 END_CALLBACK(xmlctx);
719 svn_pool_clear(xmlctx->scratch_pool);
727 svn_ra_serf__xml_cb_end(svn_ra_serf__xml_context_t *xmlctx,
728 const char *raw_name)
730 svn_ra_serf__xml_estate_t *xes = xmlctx->current;
731 svn_ra_serf__dav_props_t elemname;
733 svn_ra_serf__expand_ns(&elemname, xes->ns_list, raw_name);
735 if (xmlctx->waiting.namespace != NULL)
737 /* If this element is not the closer, then keep waiting... */
738 if (strcmp(elemname.name, xmlctx->waiting.name) != 0
739 || strcmp(elemname.namespace, xmlctx->waiting.namespace) != 0)
742 /* Found it. Stop waiting, and go back for more. */
743 xmlctx->waiting.namespace = NULL;
747 /* We should be looking at the same tag that opened the current state.
749 Unknown elements are simply skipped, so we wouldn't reach this check.
751 Known elements push a new state for a given tag. Some other elemname
752 would imply closing an ancestor tag (where did ours go?) or a spurious
754 if (strcmp(elemname.name, xes->tag.name) != 0
755 || strcmp(elemname.namespace, xes->tag.namespace) != 0)
756 return svn_error_create(SVN_ERR_XML_MALFORMED, NULL,
757 _("The response contains invalid XML"));
759 if (xes->custom_close)
761 const svn_string_t *cdata;
765 cdata = svn_stringbuf__morph_into_string(xes->cdata);
767 /* We might toss the pool holding this structure, but it could also
768 be within a parent pool. In any case, for safety's sake, disable
769 the stringbuf against future Badness. */
770 xes->cdata->pool = NULL;
776 START_CALLBACK(xmlctx);
777 SVN_ERR(xmlctx->closed_cb(xes, xmlctx->baton, xes->state,
779 xmlctx->scratch_pool));
780 END_CALLBACK(xmlctx);
781 svn_pool_clear(xmlctx->scratch_pool);
785 xmlctx->current = xes->prev;
787 /* ### not everything should go on the free state list. XES may go
788 ### away with the state pool. */
789 xes->prev = xmlctx->free_states;
790 xmlctx->free_states = xes;
792 /* If there is a STATE_POOL, then toss it. This will get rid of as much
793 memory as possible. Potentially the XES (if we didn't create a pool
794 right away, then XES may be in a parent pool). */
796 svn_pool_destroy(xes->state_pool);
803 svn_ra_serf__xml_cb_cdata(svn_ra_serf__xml_context_t *xmlctx,
807 /* If we are waiting for a closing tag, then we are uninterested in
808 the cdata. Just return. */
809 if (xmlctx->waiting.namespace != NULL)
812 /* If the current state is collecting cdata, then copy the cdata. */
813 if (xmlctx->current->cdata != NULL)
815 svn_stringbuf_appendbytes(xmlctx->current->cdata, data, len);
817 /* ... else if a CDATA_CB has been supplied, then invoke it for
819 else if (xmlctx->cdata_cb != NULL)
821 START_CALLBACK(xmlctx);
822 SVN_ERR(xmlctx->cdata_cb(xmlctx->current,
824 xmlctx->current->state,
826 xmlctx->scratch_pool));
827 END_CALLBACK(xmlctx);
828 svn_pool_clear(xmlctx->scratch_pool);