4 * Copyright 2005 Wincor Nixdorf International GmbH
6 * This file is part of PhpWiki.
8 * PhpWiki is free software; you can redistribute it and/or modify
9 * it under the terms of the GNU General Public License as published by
10 * the Free Software Foundation; either version 2 of the License, or
11 * (at your option) any later version.
13 * PhpWiki is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 * GNU General Public License for more details.
18 * You should have received a copy of the GNU General Public License along
19 * with PhpWiki; if not, write to the Free Software Foundation, Inc.,
20 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
24 * HtmlConverter: Convert HTML tags as far to Wiki markup as possible
25 * and eliminate all other HTML markup, so the output can be
26 * copied and pasted into a wiki page.
27 * Credit to an unknown programmer, who has provided the first
28 * version 0.01 on http://www.gpgstudy.com/striphtml.phps
29 * Usage: <<HtmlConverter >>
30 * Author: HendrikScheider <hendrik.scheider@wincor-nixdorf.com>
33 class WikiPlugin_HtmlConverter extends WikiPlugin
38 return "HtmlConverter";
41 function getDescription()
43 return _("Convert HTML markup into wiki markup.");
46 function getDefaultArguments()
51 function run($dbi, $argstr, &$request, $basepage)
54 /* plugin not yet has arguments - save for later (copied from UpLoad)
55 $args = $this->getArgs($argstr, $request);
59 $form = HTML::form(array('action' => $request->getPostURL(),
60 'enctype' => 'multipart/form-data',
62 $contents = HTML::div(array('class' => 'wikiaction'));
63 $contents->pushContent(HTML::input(array('type' => 'hidden',
64 'name' => 'MAX_FILE_SIZE',
65 'value' => MAX_UPLOAD_SIZE)));
66 $contents->pushContent(HTML::input(array('name' => 'userfile',
69 $contents->pushContent(HTML::raw(" "));
70 $contents->pushContent(HTML::input(array('value' => _("Convert"),
71 'type' => 'submit')));
72 $form->pushContent($contents);
75 $userfile = $request->getUploadedFile('userfile');
77 $userfile_name = $userfile->getName();
78 $userfile_name = basename($userfile_name);
79 $userfile_tmpname = $userfile->getTmpName();
81 if (!preg_match("/(\.html|\.htm)$/i", $userfile_name)) {
82 $message->pushContent(_("Only files with extension HTML are allowed"), HTML::br(), HTML::br());
84 $message->pushContent(_("Processed $userfile_name"), HTML::br(), HTML::br());
85 $message->pushContent(_("Copy the output below and paste it into your Wiki page."), HTML::br());
86 $message->pushContent($this->_process($userfile_tmpname));
89 $message->pushContent(HTML::br(), HTML::br());
93 $result->pushContent($form);
94 $result->pushContent($message);
98 function _processA(&$file)
101 $file = eregi_replace(
102 "<a([[:space:]]+)href([[:space:]]*)=([[:space:]]*)\"([-/.a-zA-Z0-9_~#@%$?&=:\200-\377\(\)[:space:]]+)\"([^>]*)>", "{{\\4}}", $file);
104 $file = eregi_replace("{{([-/a-zA-Z0-9._~#@%$?&=:\200-\377\(\)[:space:]]+)}}([^<]+)</a>", "[ \\2 | \\1 ]", $file);
107 function _processIMG(&$file)
110 $img_regexp = "_<img\s+src\s*=\s*\"([-/.a-zA-Z0-9\_~#@%$?&=:\200-\377\(\)\s]+)\"[^>]*>_";
112 $file = preg_replace($img_regexp, "\n\n[Upload:\\1]", $file);
115 function _processUL(&$file)
118 // put any <li>-Tag in a new line to indent correctly and strip trailing white space (including new-lines)
119 $file = str_replace("<li", "\n<li", $file);
120 $file = preg_replace("/<li>\s*/", "<li>", $file);
122 $enclosing_regexp = "_(.*)<ul\s?[^>]*>((?U).*)</ul>(.*)_is";
124 $embedded_fragment_array = array();
125 $found = preg_match($enclosing_regexp, $file, $embedded_fragment_array);
127 $indented = str_replace($indent_tag, "\t" . $indent_tag, $embedded_fragment_array[2]);
128 // string the file together again with the indented part in the middle.
129 // a <p> is inserted instead of the erased <ul> tags to have a paragraph generated at the end of the script
130 $file = $embedded_fragment_array[1] . "<p>" . $indented . $embedded_fragment_array[3];
131 $found = preg_match($enclosing_regexp, $file, $embedded_fragment_array);
135 function _process($file_name)
138 $file = file_get_contents($file_name);
139 $file = html_entity_decode($file);
141 $ascii = '[\x00-\x7F]';
142 $euc = '[\xA1-\xFE][\xA1-\xFE]';
143 $character = "$ascii|$euc";
145 $this->_processA($file);
146 $this->_processIMG($file);
147 $this->_processUL($file);
149 $file = str_replace("\r\n", "\n", $file);
151 $file = eregi_replace("<h1[[:space:]]?[^>]*>", "\n\n!!!!", $file);
153 $file = eregi_replace("<h2[[:space:]]?[^>]*>", "\n\n!!!", $file);
155 $file = eregi_replace("<h3[[:space:]]?[^>]*>", "\n\n!!", $file);
157 $file = eregi_replace("<h4[[:space:]]?[^>]*>", "\n\n!", $file);
159 $file = eregi_replace("<h5[[:space:]]?[^>]*>", "\n\n__", $file);
161 $file = eregi_replace("</h1>", "\n\n", $file);
163 $file = eregi_replace("</h2>", "\n\n", $file);
165 $file = eregi_replace("</h3>", "\n\n", $file);
167 $file = eregi_replace("</h4>", "\n\n", $file);
169 $file = eregi_replace("</h5>", "__\n\n", $file);
171 $file = eregi_replace("<hr[[:space:]]?[^>]*>", "\n----\n", $file);
173 $file = eregi_replace("<li[[:space:]]?[^>]*>", "* ", $file);
175 // strip all tags, except for <pre>, which is supported by wiki
176 // and <p>'s which will be converted after compression.
177 $file = strip_tags($file, "<pre><p>");
178 // strip </p> end tags with trailing white space
179 $file = preg_replace("_</p>\s*_i", "", $file);
181 // get rid of all blank lines
182 $file = preg_replace("/\n\s*\n/", "\n", $file);
184 // finally only add paragraphs where defined by inserting double new-lines
185 // be sure to only catch <p> or <p[space]...> and not <pre>!
186 // Actually <p> tags with all white space and one new-line before
187 // and after around are replaced
188 $file = preg_replace("_\n?[^\S\n]*<p(\s[^>]*|)>[^\S\n]*\n?_i", "\n\n", $file);
190 // strip attributes from <pre>-Tags and add a new-line before
191 $file = preg_replace("_<pre(\s[^>]*|)>_iU", "\n<pre>", $file);
193 $outputArea = HTML::textarea(array('rows' => '30', 'cols' => '80'));
195 $outputArea->pushContent(_($file));
196 $result->pushContent($outputArea);