2 rcs_id('$Id: XmlParser.php,v 1.1 2004-05-24 17:31:31 rurban Exp $');
4 * Base XmlParser Class.
5 * Requires the expat.so/.dll, usually enabled by default.
6 * Used by HtmlParser and RssParser.
10 * TODO: Convert more perl Html::Element style to our XmlElement style
11 * Needed additions to XmlElement:
12 * Html::Element::parent() <=> XmlElement::parent
13 * Html::Element::attr() <=> XmlElement::getAttr()
14 * Html::Element::tag <=> XmlElement::_tag
15 * Html::Element::content_list() <=> ->getContent() ??? or ->_children[]
16 * all_external_attr_names() <=>
19 * The HtmlParser object set by xml_parse() doesn't keep its parameters,
20 * esp. $this->root is lost. So we have to this into a global.
24 This file is part of PhpWiki.
26 PhpWiki is free software; you can redistribute it and/or modify
27 it under the terms of the GNU General Public License as published by
28 the Free Software Foundation; either version 2 of the License, or
29 (at your option) any later version.
31 PhpWiki is distributed in the hope that it will be useful,
32 but WITHOUT ANY WARRANTY; without even the implied warranty of
33 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
34 GNU General Public License for more details.
36 You should have received a copy of the GNU General Public License
37 along with PhpWiki; if not, write to the Free Software
38 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
42 * class XmlParser - Parse into a tree of XmlElement nodes.
45 * inside the handlers no globals are transported, only class vars.
46 * when leaving the handler class all class vars are destroyed, so we
47 * have to copy the root to a global.
52 var $_parser, $root, $current;
54 function XmlParser($encoding = '') { // "ISO-8859-1"
56 $this->_parser = xml_parser_create($encoding);
58 $this->_parser = xml_parser_create();
59 xml_parser_set_option($xml_parser, XML_OPTION_TARGET_ENCODING, $GLOBALS[$charset]);
60 //xml_set_object($this->_parser, &$this);
61 xml_set_element_handler($this->_parser,
62 array(&$this, 'tag_open'),
63 array(&$this, 'tag_close' ));
64 xml_set_character_data_handler($this->_parser,
65 array(&$this, 'cdata'));
66 //xml_set_element_handler($this->_parser, "tag_open", "tag_close");
67 //xml_set_character_data_handler($this->_parser, "cdata");
69 // Hack: workaround php OO bug
70 unset($GLOBALS['xml_parser_root']);
73 function __destruct() {
74 global $xml_parser_root, $xml_parser_current;
76 if (!empty($this->_parser)) xml_parser_free($this->_parser);
77 unset($this->_parser);
79 if (isset($xml_parser_root)) {
80 $xml_parser_root->__destruct();
81 unset($xml_parser_root); // nested parsing forbidden!
83 unset($xml_parser_current);
86 function tag_open($parser, $name, $attrs='') {
87 $this->_tag = strtolower($name);
88 $node = new XmlElement($this->_tag);
89 if (is_string($attrs) and !empty($attrs)) {
90 // lowercase attr names
91 foreach(split(' ',$attrs) as $pair) {
92 if (strstr($pair,"=")) {
93 list($key,$val) = split('=',$pair);
94 $key = strtolower(trim($key));
95 $val = str_replace(array('"',"'"),'',trim($val));
96 $node->_attr[$key] = $val;
98 $key = str_replace(array('"',"'"),'',strtolower(trim($pair)));
99 $node->_attr[$key] = $key;
102 } elseif (!empty($attrs) and is_array($attrs)) {
103 foreach ($attrs as $key => $val) {
104 $key = strtolower(trim($key));
105 $val = str_replace(array('"',"'"),'',trim($val));
106 $node->_attr[$key] = $val;
109 if (!is_null($this->current)) {
110 $this->current->_content[] =& $node; // copy or ref?
111 $node->parent =& $this->current; // ref
113 $this->current =& $node; // ref
114 if (empty($this->root)) {
115 $this->root =& $node; // ref for === test below
116 $GLOBALS['xml_parser_root'] =& $this->root; // copy
120 function tag_close($parser, $name, $attrs='') {
121 //$this->parent = $this->current; // copy!
122 //unset($this->current);
125 function cdata($parser, $data) {
126 if (isset($this->current)) {
127 $this->current->_content[] = $data;
129 trigger_error(sprintf("unparsed content outside tags: %s",$data), E_USER_WARNING);
131 if ($this->current === $this->root) { // workaround php OO bug: ref => copy
132 $GLOBALS['xml_parser_root'] =& $this->root; // copy!
133 //$this->root = $this->current; // copy?
137 function parse($content, $is_final = true) {
138 xml_parse($this->_parser, $content, $is_final) or
139 trigger_error(sprintf("XML error: %s at line %d",
140 xml_error_string(xml_get_error_code($this->_parser)),
141 xml_get_current_line_number($this->_parser)),
145 function parse_url($file, $debug=false) {
146 if (ini_get('allow_url_fopen')) { //FIXME: get_cfg_var
147 $fp = fopen("$file","r") or die("Error reading XML file, $file");
148 while ($data = fread($fp, 4096)) {
149 xml_parse($this->_parser, $data, feof($fp)) or
150 trigger_error(sprintf("XML error: %s at line %d",
151 xml_error_string(xml_get_error_code($this->_parser)),
152 xml_get_current_line_number($this->_parser)),
157 // other url_fopen workarounds: curl, socket (http 80 only)
158 require_once("lib/HttpClient.php");
159 $bits = parse_url($file);
160 $host = $bits['host'];
161 $port = isset($bits['port']) ? $bits['port'] : 80;
162 $path = isset($bits['path']) ? $bits['path'] : '/';
163 if (isset($bits['query'])) {
164 $path .= '?'.$bits['query'];
166 $client = new HttpClient($host, $port);
167 $client->use_gzip = false;
168 if ($debug) $client->debug = true;
169 if (!$client->get($path)) {
172 $data = $client->getContent();
174 if (empty($data)) return;
180 // $Log: not supported by cvs2svn $
182 // 2004-04-09 16:30:50 rurban:
183 // added fsockopen allow_url_fopen = Off workaround
190 // c-hanging-comment-ender-p: nil
191 // indent-tabs-mode: nil