lib/transform.php

   1 <?php rcs_id('$Id: transform.php,v 1.8.2.1 2001-03-02 03:48:47 dairiki Exp $');
   2    // expects $pagehash and $html to be set
   3
   4    function tokenize($str, $pattern, &$orig, &$ntokens) {
   5       global $FieldSeparator;
   6       // Find any strings in $str that match $pattern and
   7       // store them in $orig, replacing them with tokens
   8       // starting at number $ntokens - returns tokenized string
   9       $new = '';
  10       while (preg_match("/^(.*?)($pattern)/", $str, $matches)) {
  11          $linktoken = $FieldSeparator . $FieldSeparator . ($ntokens++) . $FieldSeparator;
  12          $new .= $matches[1] . $linktoken;
  13          $orig[] = $matches[2];
  14          $str = substr($str, strlen($matches[0]));
  15       }
  16       $new .= $str;
  17       return $new;
  18    }
  19
  20
  21    // Prepare replacements for references [\d+]
  22    for ($i = 1; $i < (NUM_LINKS + 1); $i++) {
  23       if (! empty($pagehash['refs'][$i])) {
  24          if (preg_match("/($InlineImages)$/i", $pagehash['refs'][$i])) {
  25             // embed images
  26             $embedded[$i] = LinkImage($pagehash['refs'][$i]);
  27          } else {
  28             // ordinary link
  29             $embedded[$i] = LinkURL($pagehash['refs'][$i], "[$i]");
  30          }
  31       }
  32    }
  33
  34
  35    // only call these once, for efficiency
  36    $quick_search_box  = RenderQuickSearch();
  37    $full_search_box   = RenderFullSearch();
  38    $most_popular_list = RenderMostPopular();
  39
  40
  41    // Loop over all lines of the page and apply transformation rules
  42    $numlines = count($pagehash["content"]);
  43
  44    for ($index = 0; $index < $numlines; $index++) {
  45       unset($tokens);
  46       unset($replacements);
  47       $ntokens = 0;
  48       $replacements = array();
  49
  50       $tmpline = $pagehash['content'][$index];
  51
  52       if (!strlen($tmpline) || $tmpline == "\r") {
  53          // this is a blank line, send <p>
  54          $html .= SetHTMLOutputMode('', ZERO_LEVEL, 0);
  55          continue;
  56       }
  57
  58 /* If your web server is not accessble to the general public, you may
  59 allow this code below, which allows embedded HTML. If just anyone can reach
  60 your web server it is highly advised that you do not allow this.
  61
  62       elseif (preg_match("/(^\|)(.*)/", $tmpline, $matches)) {
  63          // HTML mode
  64          $html .= SetHTMLOutputMode("", ZERO_LEVEL, 0);
  65          $html .= $matches[2];
  66          continue;
  67       }
  68 */
  69
  70
  71       //////////////////////////////////////////////////////////
  72       // New linking scheme: links are in brackets. This will
  73       // emulate typical HTML linking as well as Wiki linking.
  74
  75       // First need to protect [[.
  76       $oldn = $ntokens;
  77       $tmpline = tokenize($tmpline, '\[\[', $replacements, $ntokens);
  78       while ($oldn < $ntokens)
  79          $replacements[$oldn++] = '[';
  80
  81       // Now process the [\d+] links which are numeric references
  82       $oldn = $ntokens;
  83       $tmpline = tokenize($tmpline, '\[\s*\d+\s*\]', $replacements, $ntokens);
  84       while ($oldn < $ntokens) {
  85          $num = (int) substr($replacements[$oldn], 1);
  86          if (! empty($embedded[$num]))
  87             $replacements[$oldn] = $embedded[$num];
  88          $oldn++;
  89       }
  90
  91       // match anything else between brackets
  92       $oldn = $ntokens;
  93       $tmpline = tokenize($tmpline, '\[.+?\]', $replacements, $ntokens);
  94       while ($oldn < $ntokens) {
  95         $link = ParseAndLink($replacements[$oldn]);
  96         $replacements[$oldn] = $link['link'];
  97         $oldn++;
  98       }
  99
 100       //////////////////////////////////////////////////////////
 101       // replace all URL's with tokens, so we don't confuse them
 102       // with Wiki words later. Wiki words in URL's break things.
 103       // URLs preceeded by a '!' are not linked
 104
 105       $tmpline = tokenize($tmpline, "!?\b($AllowedProtocols):[^\s<>\[\]\"'()]*[^\s<>\[\]\"'(),.?]", $replacements, $ntokens);
 106       while ($oldn < $ntokens) {
 107         if($replacements[$oldn][0] == '!')
 108            $replacements[$oldn] = substr($replacements[$oldn], 1);
 109         else
 110            $replacements[$oldn] = LinkURL($replacements[$oldn]);
 111         $oldn++;
 112       }
 113
 114       //////////////////////////////////////////////////////////
 115       // Link Wiki words
 116       // Wikiwords preceeded by a '!' are not linked
 117
 118       $oldn = $ntokens;
 119       $tmpline = tokenize($tmpline, "!?$WikiNameRegexp", $replacements, $ntokens);
 120       while ($oldn < $ntokens) {
 121         $old = $replacements[$oldn];
 122         if ($old[0] == '!') {
 123           $replacements[$oldn] = substr($old,1);
 124         } elseif (IsWikiPage($dbi, $old)) {
 125           $replacements[$oldn] = LinkExistingWikiWord($old);
 126         } else {
 127           $replacements[$oldn] = LinkUnknownWikiWord($old);
 128         }
 129         $oldn++;
 130       }
 131
 132
 133       //////////////////////////////////////////////////////////
 134       // escape HTML metachars
 135       $tmpline = str_replace('&', '&amp;', $tmpline);
 136       $tmpline = str_replace('>', '&gt;', $tmpline);
 137       $tmpline = str_replace('<', '&lt;', $tmpline);
 138
 139
 140       // %%% are linebreaks
 141       $tmpline = str_replace('%%%', '<br>', $tmpline);
 142
 143       // bold italics (old way)
 144       $tmpline = preg_replace("|(''''')(.*?)(''''')|",
 145                               "<strong><em>\\2</em></strong>", $tmpline);
 146
 147       // bold (old way)
 148       $tmpline = preg_replace("|(''')(.*?)(''')|",
 149                               "<strong>\\2</strong>", $tmpline);
 150
 151       // bold
 152       $tmpline = preg_replace("|(__)(.*?)(__)|",
 153                               "<strong>\\2</strong>", $tmpline);
 154
 155       // italics
 156       $tmpline = preg_replace("|('')(.*?)('')|",
 157                               "<em>\\2</em>", $tmpline);
 158
 159
 160       //////////////////////////////////////////////////////////
 161       // unordered, ordered, and dictionary list  (using TAB)
 162
 163       if (preg_match("/(^\t+)(.*?)(:\t)(.*$)/", $tmpline, $matches)) {
 164          // this is a dictionary list (<dl>) item
 165          $numtabs = strlen($matches[1]);
 166          $html .= SetHTMLOutputMode('dl', NESTED_LEVEL, $numtabs);
 167          $tmpline = '';
 168          if(trim($matches[2]))
 169             $tmpline = '<dt>' . $matches[2];
 170          $tmpline .= '<dd>' . $matches[4];
 171
 172       } elseif (preg_match("/(^\t+)(\*|\d+|#)/", $tmpline, $matches)) {
 173          // this is part of a list (<ul>, <ol>)
 174          $numtabs = strlen($matches[1]);
 175          if ($matches[2] == '*') {
 176             $listtag = 'ul';
 177          } else {
 178             $listtag = 'ol'; // a rather tacit assumption. oh well.
 179          }
 180          $tmpline = preg_replace("/^(\t+)(\*|\d+|#)/", "", $tmpline);
 181          $html .= SetHTMLOutputMode($listtag, NESTED_LEVEL, $numtabs);
 182          $html .= '<li>';
 183
 184
 185       //////////////////////////////////////////////////////////
 186       // tabless markup for unordered, ordered, and dictionary lists
 187       // ul/ol list types can be mixed, so we only look at the last
 188       // character. Changes e.g. from "**#*" to "###*" go unnoticed.
 189       // and wouldn't make a difference to the HTML layout anyway.
 190
 191       // unordered lists <UL>: "*"
 192       } elseif (preg_match("/^([#*]*\*)[^#]/", $tmpline, $matches)) {
 193          // this is part of an unordered list
 194          $numtabs = strlen($matches[1]);
 195          $tmpline = preg_replace("/^([#*]*\*)/", '', $tmpline);
 196          $html .= SetHTMLOutputMode('ul', NESTED_LEVEL, $numtabs);
 197          $html .= '<li>';
 198
 199       // ordered lists <OL>: "#"
 200       } elseif (preg_match("/^([#*]*\#)/", $tmpline, $matches)) {
 201          // this is part of an ordered list
 202          $numtabs = strlen($matches[1]);
 203          $tmpline = preg_replace("/^([#*]*\#)/", "", $tmpline);
 204          $html .= SetHTMLOutputMode('ol', NESTED_LEVEL, $numtabs);
 205          $html .= '<li>';
 206
 207       // definition lists <DL>: ";text:text"
 208       } elseif (preg_match("/(^;+)(.*?):(.*$)/", $tmpline, $matches)) {
 209          // this is a dictionary list item
 210          $numtabs = strlen($matches[1]);
 211          $html .= SetHTMLOutputMode('dl', NESTED_LEVEL, $numtabs);
 212          $tmpline = '';
 213          if(trim($matches[2]))
 214             $tmpline = '<dt>' . $matches[2];
 215          $tmpline .= '<dd>' . $matches[3];
 216
 217
 218       //////////////////////////////////////////////////////////
 219       // remaining modes: preformatted text, headings, normal text
 220
 221       } elseif (preg_match("/^\s+/", $tmpline)) {
 222          // this is preformatted text, i.e. <pre>
 223          $html .= SetHTMLOutputMode('pre', ZERO_LEVEL, 0);
 224
 225       } elseif (preg_match("/^(!{1,3})[^!]/", $tmpline, $whichheading)) {
 226          // lines starting with !,!!,!!! are headings
 227          if($whichheading[1] == '!') $heading = 'h3';
 228          elseif($whichheading[1] == '!!') $heading = 'h2';
 229          elseif($whichheading[1] == '!!!') $heading = 'h1';
 230          $tmpline = preg_replace("/^!+/", '', $tmpline);
 231          $html .= SetHTMLOutputMode($heading, ZERO_LEVEL, 0);
 232
 233       } elseif (preg_match('/^-{4,}\s*(.*?)\s*$/', $tmpline, $matches)) {
 234          // four or more dashes to <hr>
 235          // <hr> can not be contained in a
 236          $html .= SetHTMLOutputMode('', ZERO_LEVEL, 0) . "<hr>\n";
 237          if ( ($tmpline = $matches[1]) != '' ) {
 238             $html .= SetHTMLOutputMode('p', ZERO_LEVEL, 0);
 239          }
 240       } else {
 241          // it's ordinary output if nothing else
 242          $html .= SetHTMLOutputMode('p', ZERO_LEVEL, 0);
 243       }
 244
 245       // These are still problems as far as generating correct HTML is
 246       // concerned.  Paragraph (<p>) elements are not allowed to contain
 247       // other block-level elements (like <form>s).
 248       $tmpline = str_replace('%%Search%%', $quick_search_box, $tmpline);
 249       $tmpline = str_replace('%%Fullsearch%%', $full_search_box, $tmpline);
 250       $tmpline = str_replace('%%Mostpopular%%', $most_popular_list, $tmpline);
 251       if(defined('WIKI_ADMIN') && strstr($tmpline, '%%ADMIN-'))
 252          $tmpline = ParseAdminTokens($tmpline);
 253
 254
 255       ///////////////////////////////////////////////////////
 256       // Replace tokens
 257
 258       for ($i = 0; $i < $ntokens; $i++)
 259           $tmpline = str_replace($FieldSeparator.$FieldSeparator.$i.$FieldSeparator, $replacements[$i], $tmpline);
 260
 261
 262       $html .= $tmpline . "\n";
 263    }
 264
 265    $html .= SetHTMLOutputMode('', ZERO_LEVEL, 0);
 266 ?>