lib/plugin/HtmlConverter.php

   1 <?php // -*-php-*-
   2 rcs_id('$Id$');
   3 /*
   4  * Copyright 2005 Wincor Nixdorf International GmbH
   5  *
   6  * This file is part of PhpWiki.
   7  *
   8  * PhpWiki is free software; you can redistribute it and/or modify
   9  * it under the terms of the GNU General Public License as published by
  10  * the Free Software Foundation; either version 2 of the License, or
  11  * (at your option) any later version.
  12  *
  13  * PhpWiki is distributed in the hope that it will be useful,
  14  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  15  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  16  * GNU General Public License for more details.
  17  *
  18  * You should have received a copy of the GNU General Public License
  19  * along with PhpWiki; if not, write to the Free Software
  20  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
  21  */
  22
  23 /**
  24  * HtmlConverter:  Convert HTML tags as far to Wiki markup as possible
  25  *          and eliminate all other HTML markup, so the output can be
  26  *          copied and pasted into a wiki page.
  27  *          Credit to an unknown programmer, who has provided the first
  28  *          version 0.01 on http://www.gpgstudy.com/striphtml.phps
  29  * Usage:   <?plugin HtmlConverter ?>
  30  * Author:  HendrikScheider <hendrik.scheider@wincor-nixdorf.com>
  31  */
  32
  33 class WikiPlugin_HtmlConverter extends WikiPlugin
  34 {
  35
  36     function getName () {
  37         return "HtmlConverter";
  38     }
  39
  40     function getDescription () {
  41         return _("Convert HTML markup into wiki markup.");
  42     }
  43
  44     function getVersion() {
  45         return preg_replace("/[Revision: $]/", '',
  46                             "\$Revision$");
  47     }
  48
  49     function getDefaultArguments() {
  50         return array();
  51     }
  52
  53     function run($dbi, $argstr, &$request, $basepage) {
  54
  55         /* plugin not yet has arguments - save for later (copied from UpLoad)
  56         $args = $this->getArgs($argstr, $request);
  57         extract($args);
  58                 */
  59
  60         $form = HTML::form(array('action' => $request->getPostURL(),
  61                                  'enctype' => 'multipart/form-data',
  62                                  'method' => 'post'));
  63         $contents = HTML::div(array('class' => 'wikiaction'));
  64         $contents->pushContent(HTML::input(array('type' => 'hidden',
  65                                                  'name' => 'MAX_FILE_SIZE',
  66                                                  'value' => MAX_UPLOAD_SIZE)));
  67         $contents->pushContent(HTML::input(array('name' => 'userfile',
  68                                                  'type' => 'file',
  69                                                  'size' => '50')));
  70         $contents->pushContent(HTML::raw(" "));
  71         $contents->pushContent(HTML::input(array('value' => _("Convert"),
  72                                                  'type' => 'submit')));
  73         $form->pushContent($contents);
  74
  75         $message = HTML();
  76         $userfile = $request->getUploadedFile('userfile');
  77         if ($userfile) {
  78             $userfile_name = $userfile->getName();
  79             $userfile_name = basename($userfile_name);
  80             $userfile_tmpname = $userfile->getTmpName();
  81
  82             if ( !preg_match("/(\.html|\.htm)$/i", $userfile_name)) {
  83                 $message->pushContent(_("Only files with extension HTML are allowed"),HTML::br(),HTML::br());
  84             } else {
  85                 $message->pushContent( _("Processed $userfile_name"), HTML::br(), HTML::br());
  86                 $message->pushContent( _("Copy the output below and paste it into your Wiki page."), HTML::br());
  87                 $message->pushContent( $this->_process( $userfile_tmpname));
  88             }
  89         } else {
  90             $message->pushContent(HTML::br(),HTML::br());
  91         }
  92
  93         $result = HTML();
  94         $result->pushContent($form);
  95         $result->pushContent($message);
  96         return $result;
  97     }
  98
  99         function _processA(&$file) {
 100
 101             $file = eregi_replace(
 102             "<a([[:space:]]+)href([[:space:]]*)=([[:space:]]*)\"([-/.a-zA-Z0-9_~#@%$?&=:\200-\377\(\)[:space:]]+)\"([^>]*)>", "{{\\4}}", $file);
 103
 104                 $file = eregi_replace("{{([-/a-zA-Z0-9._~#@%$?&=:\200-\377\(\)[:space:]]+)}}([^<]+)</a>", "[ \\2 | \\1 ]", $file);
 105         }
 106
 107         function _processIMG(&$file) {
 108
 109                 $img_regexp = "_<img\s+src\s*=\s*\"([-/.a-zA-Z0-9\_~#@%$?&=:\200-\377\(\)\s]+)\"[^>]*>_";
 110
 111             $file = preg_replace( $img_regexp, "\n\n[Upload:\\1]", $file);
 112         }
 113
 114         function _processUL( &$file) {
 115
 116                 // put any <li>-Tag in a new line to indent correctly and strip trailing white space (including new-lines)
 117                 $file = str_replace( "<li", "\n<li", $file);
 118                 $file = preg_replace( "/<li>\s*/", "<li>", $file);
 119
 120                 $enclosing_regexp = "_(.*)<ul\s?[^>]*>((?U).*)</ul>(.*)_is";
 121                 $indent_tag = "<li";
 122                 $embedded_fragment_array = array();
 123                 $found = preg_match( $enclosing_regexp, $file, $embedded_fragment_array);
 124                 while ( $found) {
 125                         $indented = str_replace( $indent_tag, "\t".$indent_tag, $embedded_fragment_array[2]);
 126                         // string the file together again with the indented part in the middle.
 127                         // a <p> is inserted instead of the erased <ul> tags to have a paragraph generated at the end of the script
 128                         $file = $embedded_fragment_array[1] . "<p>" . $indented . $embedded_fragment_array[3];
 129                         $found = preg_match( $enclosing_regexp, $file, $embedded_fragment_array);
 130                 }
 131         }
 132
 133         function _process( $file_name) {
 134                 $result = HTML();
 135             $file = file_get_contents( $file_name);
 136                 $file = html_entity_decode( $file);
 137
 138                 $ascii  =  '[\x00-\x7F]';
 139                 $euc  =  '[\xA1-\xFE][\xA1-\xFE]';
 140                 $character  =  "$ascii|$euc";
 141
 142                 $this->_processA( $file);
 143                 $this->_processIMG( $file);
 144                 $this->_processUL( $file);
 145
 146                 $file = str_replace ("\r\n", "\n", $file);
 147
 148                 $file = eregi_replace ("<h1[[:space:]]?[^>]*>", "\n\n!!!!", $file);
 149
 150                 $file = eregi_replace ("<h2[[:space:]]?[^>]*>", "\n\n!!!", $file);
 151
 152                 $file = eregi_replace ("<h3[[:space:]]?[^>]*>", "\n\n!!", $file);
 153
 154                 $file = eregi_replace ("<h4[[:space:]]?[^>]*>", "\n\n!", $file);
 155
 156                 $file = eregi_replace ("<h5[[:space:]]?[^>]*>", "\n\n__", $file);
 157
 158                 $file = eregi_replace ("</h1>", "\n\n", $file);
 159
 160                 $file = eregi_replace ("</h2>", "\n\n", $file);
 161
 162                 $file = eregi_replace ("</h3>", "\n\n", $file);
 163
 164                 $file = eregi_replace ("</h4>", "\n\n", $file);
 165
 166                 $file = eregi_replace ("</h5>", "__\n\n", $file);
 167
 168                 $file = eregi_replace ("<hr[[:space:]]?[^>]*>", "\n----\n", $file);
 169
 170                 $file = eregi_replace ("<li[[:space:]]?[^>]*>", "* ", $file);
 171
 172                 // strip all tags, except for <pre>, which is supported by wiki
 173                 // and <p>'s which will be converted after compression.
 174                 $file = strip_tags($file, "<pre><p>");
 175                 // strip </p> end tags with trailing white space
 176                 $file = preg_replace ("_</p>\s*_i", "", $file);
 177
 178                 // get rid of all blank lines
 179                 $file = preg_replace( "/\n\s*\n/", "\n", $file);
 180
 181                 // finally only add paragraphs where defined by inserting double new-lines
 182                 // be sure to only catch <p> or <p[space]...> and not <pre>!
 183                 // Actually <p> tags with all white space and one new-line before
 184                 // and after around are replaced
 185                 $file = preg_replace ("_\n?[^\S\n]*<p(\s[^>]*|)>[^\S\n]*\n?_i", "\n\n", $file);
 186
 187                 // strip attributes from <pre>-Tags and add a new-line before
 188                 $file = preg_replace ("_<pre(\s[^>]*|)>_iU", "\n<pre>", $file);
 189
 190         $outputArea = HTML::textarea(array('rows' => '30', 'cols' => '80'));
 191
 192                 $outputArea->pushContent( _($file));
 193                 $result->pushContent( $outputArea);
 194                 return $result;
 195         }
 196 }
 197 ?>