lib/plugin/HtmlConverter.php

   1 <?php
   2
   3 /*
   4  * Copyright 2005 Wincor Nixdorf International GmbH
   5  *
   6  * This file is part of PhpWiki.
   7  *
   8  * PhpWiki is free software; you can redistribute it and/or modify
   9  * it under the terms of the GNU General Public License as published by
  10  * the Free Software Foundation; either version 2 of the License, or
  11  * (at your option) any later version.
  12  *
  13  * PhpWiki is distributed in the hope that it will be useful,
  14  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  15  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  16  * GNU General Public License for more details.
  17  *
  18  * You should have received a copy of the GNU General Public License along
  19  * with PhpWiki; if not, write to the Free Software Foundation, Inc.,
  20  * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
  21  */
  22
  23 /**
  24  * HtmlConverter:  Convert HTML tags as far to Wiki markup as possible
  25  *          and eliminate all other HTML markup, so the output can be
  26  *          copied and pasted into a wiki page.
  27  *          Credit to an unknown programmer, who has provided the first
  28  *          version 0.01 on http://www.gpgstudy.com/striphtml.phps
  29  * Usage:   <<HtmlConverter >>
  30  * Author:  HendrikScheider <hendrik.scheider@wincor-nixdorf.com>
  31  */
  32
  33 class WikiPlugin_HtmlConverter extends WikiPlugin
  34 {
  35     function getDescription()
  36     {
  37         return _("Convert HTML markup into wiki markup.");
  38     }
  39
  40     function getDefaultArguments()
  41     {
  42         return array();
  43     }
  44
  45     /**
  46      * @param WikiDB $dbi
  47      * @param string $argstr
  48      * @param WikiRequest $request
  49      * @param string $basepage
  50      * @return mixed
  51      */
  52     function run($dbi, $argstr, &$request, $basepage)
  53     {
  54         $form = HTML::form(array('action' => $request->getPostURL(),
  55             'enctype' => 'multipart/form-data',
  56             'method' => 'post'));
  57         $contents = HTML::div(array('class' => 'wikiaction'));
  58         $contents->pushContent(HTML::input(array('type' => 'hidden',
  59             'name' => 'MAX_FILE_SIZE',
  60             'value' => MAX_UPLOAD_SIZE)));
  61         $contents->pushContent(HTML::input(array('name' => 'userfile',
  62             'type' => 'file')));
  63         $contents->pushContent(HTML::raw(" "));
  64         $contents->pushContent(HTML::input(array('value' => _("Convert"),
  65             'type' => 'submit')));
  66         $form->pushContent($contents);
  67
  68         $message = HTML();
  69         $userfile = $request->getUploadedFile('userfile');
  70         if ($userfile) {
  71             $userfile_name = $userfile->getName();
  72             $userfile_name = basename($userfile_name);
  73             $userfile_tmpname = $userfile->getTmpName();
  74
  75             if (!preg_match("/(\.html|\.htm)$/i", $userfile_name)) {
  76                 $message->pushContent(_("Only files with extension HTML are allowed"), HTML::br(), HTML::br());
  77             } else {
  78                 $message->pushContent(_("Processed $userfile_name"), HTML::br(), HTML::br());
  79                 $message->pushContent(_("Copy the output below and paste it into your Wiki page."), HTML::br());
  80                 $message->pushContent($this->process($userfile_tmpname));
  81             }
  82         } else {
  83             $message->pushContent(HTML::br(), HTML::br());
  84         }
  85
  86         $result = HTML();
  87         $result->pushContent($form);
  88         $result->pushContent($message);
  89         return $result;
  90     }
  91
  92     private function processA(&$file)
  93     {
  94
  95         $file = eregi_replace(
  96             "<a([[:space:]]+)href([[:space:]]*)=([[:space:]]*)\"([-/.a-zA-Z0-9_~#@%$?&=:\200-\377\(\)[:space:]]+)\"([^>]*)>", "{{\\4}}", $file);
  97
  98         $file = eregi_replace("{{([-/a-zA-Z0-9._~#@%$?&=:\200-\377\(\)[:space:]]+)}}([^<]+)</a>", "[ \\2 | \\1 ]", $file);
  99     }
 100
 101     private function processIMG(&$file)
 102     {
 103
 104         $img_regexp = "_<img\s+src\s*=\s*\"([-/.a-zA-Z0-9\_~#@%$?&=:\200-\377\(\)\s]+)\"[^>]*>_";
 105
 106         $file = preg_replace($img_regexp, "\n\n[Upload:\\1]", $file);
 107     }
 108
 109     private function processUL(&$file)
 110     {
 111
 112         // put any <li>-Tag in a new line to indent correctly and strip trailing white space (including new-lines)
 113         $file = str_replace("<li", "\n<li", $file);
 114         $file = preg_replace("/<li>\s*/", "<li>", $file);
 115
 116         $enclosing_regexp = "_(.*)<ul\s?[^>]*>((?U).*)</ul>(.*)_is";
 117         $indent_tag = "<li";
 118         $embedded_fragment_array = array();
 119         $found = preg_match($enclosing_regexp, $file, $embedded_fragment_array);
 120         while ($found) {
 121             $indented = str_replace($indent_tag, "\t" . $indent_tag, $embedded_fragment_array[2]);
 122             // string the file together again with the indented part in the middle.
 123             // a <p> is inserted instead of the erased <ul> tags to have a paragraph generated at the end of the script
 124             $file = $embedded_fragment_array[1] . "<p>" . $indented . $embedded_fragment_array[3];
 125             $found = preg_match($enclosing_regexp, $file, $embedded_fragment_array);
 126         }
 127     }
 128
 129     private function process($file_name)
 130     {
 131         $result = HTML();
 132         $file = file_get_contents($file_name);
 133         $file = html_entity_decode($file);
 134
 135         $this->processA($file);
 136         $this->processIMG($file);
 137         $this->processUL($file);
 138
 139         $file = str_replace("\r\n", "\n", $file);
 140
 141         $file = eregi_replace("<h1[[:space:]]?[^>]*>", "\n\n!!!!", $file);
 142
 143         $file = eregi_replace("<h2[[:space:]]?[^>]*>", "\n\n!!!", $file);
 144
 145         $file = eregi_replace("<h3[[:space:]]?[^>]*>", "\n\n!!", $file);
 146
 147         $file = eregi_replace("<h4[[:space:]]?[^>]*>", "\n\n!", $file);
 148
 149         $file = eregi_replace("<h5[[:space:]]?[^>]*>", "\n\n__", $file);
 150
 151         $file = eregi_replace("</h1>", "\n\n", $file);
 152
 153         $file = eregi_replace("</h2>", "\n\n", $file);
 154
 155         $file = eregi_replace("</h3>", "\n\n", $file);
 156
 157         $file = eregi_replace("</h4>", "\n\n", $file);
 158
 159         $file = eregi_replace("</h5>", "__\n\n", $file);
 160
 161         $file = eregi_replace("<hr[[:space:]]?[^>]*>", "\n----\n", $file);
 162
 163         $file = eregi_replace("<li[[:space:]]?[^>]*>", "* ", $file);
 164
 165         // strip all tags, except for <pre>, which is supported by wiki
 166         // and <p>'s which will be converted after compression.
 167         $file = strip_tags($file, "<pre><p>");
 168         // strip </p> end tags with trailing white space
 169         $file = preg_replace("_</p>\s*_i", "", $file);
 170
 171         // get rid of all blank lines
 172         $file = preg_replace("/\n\s*\n/", "\n", $file);
 173
 174         // finally only add paragraphs where defined by inserting double new-lines
 175         // be sure to only catch <p> or <p[space]...> and not <pre>!
 176         // Actually <p> tags with all white space and one new-line before
 177         // and after around are replaced
 178         $file = preg_replace("_\n?[^\S\n]*<p(\s[^>]*|)>[^\S\n]*\n?_i", "\n\n", $file);
 179
 180         // strip attributes from <pre>-Tags and add a new-line before
 181         $file = preg_replace("_<pre(\s[^>]*|)>_iU", "\n<pre>", $file);
 182
 183         $outputArea = HTML::textarea(array('rows' => '30', 'cols' => '80'));
 184
 185         $outputArea->pushContent(_($file));
 186         $result->pushContent($outputArea);
 187         return $result;
 188     }
 189 }