4 * Base XmlParser Class.
5 * Requires the expat.so/.dll, usually enabled by default.
6 * Used by HtmlParser and RssParser.
10 * TODO: Convert more perl Html::Element style to our XmlElement style
11 * Needed additions to XmlElement:
12 * Html::Element::parent() <=> XmlElement::parent
13 * Html::Element::attr() <=> XmlElement::getAttr()
14 * Html::Element::tag <=> XmlElement::_tag
15 * Html::Element::content_list() <=> ->getContent() ??? or ->_children[]
16 * all_external_attr_names() <=>
19 * The HtmlParser object set by xml_parse() doesn't keep its parameters,
20 * esp. $this->root is lost. So we have to this into a global.
24 * This file is part of PhpWiki.
26 * PhpWiki is free software; you can redistribute it and/or modify
27 * it under the terms of the GNU General Public License as published by
28 * the Free Software Foundation; either version 2 of the License, or
29 * (at your option) any later version.
31 * PhpWiki is distributed in the hope that it will be useful,
32 * but WITHOUT ANY WARRANTY; without even the implied warranty of
33 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
34 * GNU General Public License for more details.
36 * You should have received a copy of the GNU General Public License along
37 * with PhpWiki; if not, write to the Free Software Foundation, Inc.,
38 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
42 * class XmlParser - Parse into a tree of XmlElement nodes.
45 * inside the handlers no globals are transported, only class vars.
46 * when leaving the handler class all class vars are destroyed, so we
47 * have to copy the root to a global.
53 public $_parser, $root, $current, $previous, $parent;
55 function XmlParser($encoding = '')
58 $this->_parser = xml_parser_create($encoding);
60 $this->_parser = xml_parser_create();
62 xml_parser_set_option($this->_parser, XML_OPTION_TARGET_ENCODING, 'UTF-8');
64 //This unfortunately does not work
65 //xml_set_object($this->_parser, &$this);
67 xml_set_element_handler($this->_parser,
68 array(&$this, 'tag_open'),
69 array(&$this, 'tag_close'));
70 xml_set_character_data_handler($this->_parser,
71 array(&$this, 'cdata'));
72 //xml_set_element_handler($this->_parser, "tag_open", "tag_close");
73 //xml_set_character_data_handler($this->_parser, "cdata");
75 // Hack: workaround php OO bug
76 unset($GLOBALS['xml_parser_root']);
81 global $xml_parser_root, $xml_parser_current;
83 if (!empty($this->_parser)) xml_parser_free($this->_parser);
84 unset($this->_parser);
86 if (isset($xml_parser_root)) {
87 $xml_parser_root->_destruct();
88 unset($xml_parser_root); // nested parsing forbidden!
90 unset($xml_parser_current);
93 function tag_open($parser, $name, $attrs = '')
95 $this->_tag = strtolower($name);
96 $node = new XmlElement($this->_tag);
97 if (is_string($attrs) and !empty($attrs)) {
98 // lowercase attr names
99 foreach (explode(' ', $attrs) as $pair) {
100 if (strstr($pair, "=")) {
101 list($key, $val) = explode('=', $pair);
102 $key = strtolower(trim($key));
103 $val = str_replace(array('"', "'"), '', trim($val));
104 $node->_attr[$key] = $val;
106 $key = str_replace(array('"', "'"), '', strtolower(trim($pair)));
107 $node->_attr[$key] = $key;
110 } elseif (!empty($attrs) and is_array($attrs)) {
111 foreach ($attrs as $key => $val) {
112 $key = strtolower(trim($key));
113 $val = str_replace(array('"', "'"), '', trim($val));
114 $node->_attr[$key] = $val;
117 if (!is_null($this->current)) {
118 $this->current->_content[] =& $node; // copy or ref?
119 $node->previous =& $this->current; // ref to parallel prev
121 $this->current =& $node; // ref
122 if (empty($this->root)) {
123 $this->root =& $node; // ref for === test below
124 $GLOBALS['xml_parser_root'] =& $this->root; // copy
128 function tag_close($parser, $name, $attrs = '')
130 $this->current->parent = $this->current; // copy!
131 $this->current =& $this->current->parent; // ref!
132 //unset($this->current);
135 function cdata($parser, $data)
137 if (isset($this->current)) {
138 $this->current->_content[] = $data;
140 trigger_error(sprintf("unparsed content outside tags: %s", $data), E_USER_WARNING);
142 if ($this->current === $this->root) { // workaround php OO bug: ref => copy
143 $GLOBALS['xml_parser_root'] =& $this->root; // copy!
144 //$this->root = $this->current; // copy?
148 function parse($content, $is_final = true)
150 xml_parse($this->_parser, $content, $is_final) or
151 trigger_error(sprintf("XML error: %s at line %d",
152 xml_error_string(xml_get_error_code($this->_parser)),
153 xml_get_current_line_number($this->_parser)),
157 function parse_url($file, $debug = false)
159 if (get_cfg_var('allow_url_fopen')) {
160 if (!($fp = fopen("$file", "r"))) {
161 trigger_error("Error parse url $file");
165 while ($data = fread($fp, 4096)) {
169 $this->parse($content);
171 // other url_fopen workarounds: curl, socket (http 80 only)
172 $data = url_get_contents($file);
174 trigger_error("Error parse url $file");
186 // c-hanging-comment-ender-p: nil
187 // indent-tabs-mode: nil