]> CyberLeo.Net >> Repos - SourceForge/phpwiki.git/blob - lib/RssParser.php
fix for RSS feeds without detailled <item> tags:
[SourceForge/phpwiki.git] / lib / RssParser.php
1 <?php // -*-php-*-
2 rcs_id('$Id: RssParser.php,v 1.11 2005-04-10 10:24:58 rurban Exp $');
3 /**
4  * Simple RSSParser Class
5  * Based on Duncan Gough RSSParser class
6  * Copyleft Arnaud Fontaine
7  * Licence : GPL
8  * See lib/plugin/RssFeed.php and lib/XmlParser.php
9  *
10  * The myth of RSS compatibility:
11  *   http://diveintomark.org/archives/2004/02/04/incompatible-rss
12  */
13
14 /*
15  This file is part of PhpWiki.
16
17  PhpWiki is free software; you can redistribute it and/or modify
18  it under the terms of the GNU General Public License as published by
19  the Free Software Foundation; either version 2 of the License, or
20  (at your option) any later version.
21
22  PhpWiki is distributed in the hope that it will be useful,
23  but WITHOUT ANY WARRANTY; without even the implied warranty of
24  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
25  GNU General Public License for more details.
26
27  You should have received a copy of the GNU General Public License
28  along with PhpWiki; if not, write to the Free Software
29  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
30  */
31
32 /**
33  * 2004-04-09 16:30:50 rurban: 
34  *   added fsockopen allow_url_fopen = Off workaround
35  * 2004-04-12 20:04:12 rurban: 
36  *   fixes for IMAGE element (sf.net)
37  * 2005-04-10 11:17:35 rurban
38  *   certain RSS dont contain <item> tags to describe the list of <items>
39  *     http://ws.audioscrobbler.com/rdf/ for example
40  */
41
42 require_once('lib/XmlParser.php');
43
44 class RSSParser 
45 extends XmlParser {
46
47     var $title = "";
48     var $link  = "";
49     var $description = "";
50     var $inside_item = false;
51     var $list_items = false;
52     var $item  = array();
53     var $items;
54     var $channel;
55     var $divers = "";
56     var $date = "";
57
58     function tag_open($parser, $name, $attrs=''){
59         global $current_tag, $current_attrs;
60
61         $current_tag = $name;
62         $current_attrs = $attrs;
63         if ($name == "ITEM")
64             $this->inside_item = true;
65         elseif ($name == "ITEMS")
66             $this->list_items = true;
67         elseif ($name == "IMAGE")
68             $this->inside_item = true;
69     }
70
71     function tag_close($parser, $tagName, $attrs=''){
72         global $current_tag;
73
74         if ($tagName == "ITEM") {
75             if (empty($this->items)) {
76                 $this->items = array(); 
77                 $GLOBALS['rss_parser_items'] =& $this->items;
78             } elseif (!empty($this->items[0]['link']) and $this->items[0]['title'] == '') {
79                 // override the initial <items> list with detailed <item>'s
80                 $this->items = array();
81                 $GLOBALS['rss_parser_items'] =& $this->items;
82             }
83             $this->items[] = array("title"       => $this->item['TITLE'],
84                                    "description" => @$this->item['DESCRIPTION'],
85                                    "link"        => $this->item['LINK']);
86             $this->item = array("TITLE"       => "",
87                                 "DESCRIPTION" => "",
88                                 "LINK"        => "");
89             $this->inside_item = false;
90         } elseif ($tagName == "IMAGE") {
91             $this->item = array("TITLE"       => "",
92                                 "DESCRIPTION" => "",
93                                 "LINK"        => "");
94             $this->inside_item = false;
95         } elseif ($tagName == "CHANNEL") {
96             $this->channel = array("title" => $this->title,
97                                    "description" => $this->description,
98                                    "link" => $this->link,
99                                    "date" => $this->date,
100                                    "divers" => $this->divers);
101             $GLOBALS['rss_parser_channel'] =& $this->channel;
102             $this->title       = "";
103             $this->description = "";
104             $this->link        = "";
105             $this->divers      = "";
106             $this->date        = "";
107         } elseif ($tagName == "ITEMS") {
108             $GLOBALS['rss_parser_items'] =& $this->items;
109             $this->item = array("TITLE"       => "",
110                                 "DESCRIPTION" => "",
111                                 "LINK"        => "");
112             $this->list_items = false;
113         }
114     }
115
116     function cdata($parser, $data){
117         global $current_tag, $current_attrs;
118
119         if ($this->inside_item) {
120             if (empty($this->item[$current_tag]))
121                 $this->item[$current_tag] = '';
122             if ($current_tag == 'LINK') {
123                 if (trim($data))
124                     $this->item[$current_tag] = trim($data);
125             } else {
126                 $this->item[$current_tag] .= trim($data);
127             }
128         } elseif ($this->list_items) {
129             if ($current_tag == 'RDF:LI') {
130                 // FIXME: avoid duplicates. cdata called back 4x per RDF:LI
131                 if ($this->items[count($this->items)-1]['link'] != @$current_attrs['RDF:RESOURCE'])
132                     $this->items[] = array('link' => @$current_attrs['RDF:RESOURCE'],
133                                            'title' => '');
134             }
135         } else {
136             switch ($current_tag) {
137             case "TITLE":
138                 if (trim($data))
139                     $this->title .= " " . trim($data);
140                 break;
141             case "DESCRIPTION":
142                 if (trim($data))
143                     $this->description .= trim($data);
144                 break;
145             case "LINK":
146                 if (trim($data))
147                     $this->link = trim($data);
148                 break;
149             case "DC:DATE":
150                 if (trim($data))
151                     $this->date .= " " . trim($data);
152             default:
153                 if (trim($data))
154                     $this->divers .= " " . $current_tag."/".$data;
155                 break;
156             }
157         }
158     }
159     
160     function parse($content, $is_final = true) {
161         xml_parse($this->_parser, $content, $is_final) or 
162             trigger_error(sprintf("XML error: %s at line %d", 
163                                   xml_error_string(xml_get_error_code($this->_parser)), 
164                                   xml_get_current_line_number($this->_parser)),
165                           E_USER_WARNING);
166         //OO workaround: parser object looses its params. we have to store them in globals
167         if ($is_final) {
168             if (empty($this->items)) {
169                 $this->items   = @$GLOBALS['rss_parser_items'];
170                 $this->channel = @$GLOBALS['rss_parser_channel'];
171             }
172             unset($GLOBALS['rss_parser_items']);
173             unset($GLOBALS['rss_parser_channel']);
174         }
175     }
176 }
177
178 // $Log: not supported by cvs2svn $
179 // Revision 1.10  2005/01/22 11:45:09  rurban
180 // docs
181 //
182 // Revision 1.9  2004/06/08 21:12:02  rurban
183 // is_final fix for incremental parsing
184 //
185 // Revision 1.8  2004/06/08 21:03:20  rurban
186 // updated RssParser for XmlParser quirks (store parser object params in globals)
187 //
188 // Revision 1.7  2004/05/24 17:31:31  rurban
189 // new XmlParser and HtmlParser, RssParser based on that.
190 //
191 // Revision 1.6  2004/05/18 16:18:36  rurban
192 // AutoSplit at subpage seperators
193 // RssFeed stability fix for empty feeds or broken connections
194 //
195 // Revision 1.5  2004/04/26 20:44:34  rurban
196 // locking table specific for better databases
197 //
198 // Revision 1.4  2004/04/18 01:11:51  rurban
199 // more numeric pagename fixes.
200 // fixed action=upload with merge conflict warnings.
201 // charset changed from constant to global (dynamic utf-8 switching)
202 //
203
204 // For emacs users
205 // Local Variables:
206 // mode: php
207 // tab-width: 8
208 // c-basic-offset: 4
209 // c-hanging-comment-ender-p: nil
210 // indent-tabs-mode: nil
211 // End:
212 ?>