lib/transform.php

   1 <?php rcs_id('$Id: transform.php,v 1.8.2.5 2005-01-07 14:23:05 rurban Exp $');
   2    // expects $pagehash and $html to be set
   3
   4    function tokenize($str, $pattern, &$orig, &$ntokens) {
   5       global $FieldSeparator;
   6       // Find any strings in $str that match $pattern and
   7       // store them in $orig, replacing them with tokens
   8       // starting at number $ntokens - returns tokenized string
   9       $new = '';
  10       while (preg_match("/^(.*?)($pattern)/", $str, $matches)) {
  11          $linktoken = $FieldSeparator . $FieldSeparator . ($ntokens++) . $FieldSeparator;
  12          $new .= $matches[1] . $linktoken;
  13          $orig[] = $matches[2];
  14          $str = substr($str, strlen($matches[0]));
  15       }
  16       $new .= $str;
  17       return $new;
  18    }
  19
  20
  21    // Prepare replacements for references [\d+]
  22    for ($i = 1; $i < (NUM_LINKS + 1); $i++) {
  23       if (! empty($pagehash['refs'][$i])) {
  24          if (preg_match("/($InlineImages)$/i", $pagehash['refs'][$i])) {
  25             // embed images
  26             $embedded[$i] = LinkImage($pagehash['refs'][$i]);
  27          } else {
  28             // ordinary link
  29             $embedded[$i] = LinkURL($pagehash['refs'][$i], "[$i]");
  30          }
  31       }
  32    }
  33
  34
  35    // Loop over all lines of the page and apply transformation rules
  36    $numlines = count($pagehash["content"]);
  37
  38    for ($index = 0; $index < $numlines; $index++) {
  39       unset($tokens);
  40       unset($replacements);
  41       $ntokens = 0;
  42       $replacements = array();
  43
  44       $tmpline = $pagehash['content'][$index];
  45
  46       if (!strlen($tmpline) || $tmpline == "\r") {
  47          // this is a blank line, send <p>
  48          $html .= SetHTMLOutputMode('', ZERO_LEVEL, 0);
  49          continue;
  50       }
  51
  52 /* If your web server is not accessble to the general public, you may
  53 allow this code below, which allows embedded HTML. If just anyone can reach
  54 your web server it is highly advised that you do not allow this.
  55
  56       elseif (preg_match("/(^\|)(.*)/", $tmpline, $matches)) {
  57          // HTML mode
  58          $html .= SetHTMLOutputMode("", ZERO_LEVEL, 0);
  59          $html .= $matches[2];
  60          continue;
  61       }
  62 */
  63
  64
  65       //////////////////////////////////////////////////////////
  66       // New linking scheme: links are in brackets. This will
  67       // emulate typical HTML linking as well as Wiki linking.
  68
  69       // First need to protect [[.
  70       $oldn = $ntokens;
  71       $tmpline = tokenize($tmpline, '\[\[', $replacements, $ntokens);
  72       while ($oldn < $ntokens)
  73          $replacements[$oldn++] = '[';
  74
  75       // Now process the [\d+] links which are numeric references
  76       $oldn = $ntokens;
  77       $tmpline = tokenize($tmpline, '\[\s*\d+\s*\]', $replacements, $ntokens);
  78       while ($oldn < $ntokens) {
  79          $num = (int) substr($replacements[$oldn], 1);
  80          if (! empty($embedded[$num]))
  81             $replacements[$oldn] = $embedded[$num];
  82          $oldn++;
  83       }
  84
  85       // match anything else between brackets
  86       $oldn = $ntokens;
  87       $tmpline = tokenize($tmpline, '\[.+?\]', $replacements, $ntokens);
  88       while ($oldn < $ntokens) {
  89         $link = ParseAndLink($replacements[$oldn]);
  90         $replacements[$oldn] = $link['link'];
  91         $oldn++;
  92       }
  93
  94       //////////////////////////////////////////////////////////
  95       // replace all URL's with tokens, so we don't confuse them
  96       // with Wiki words later. Wiki words in URL's break things.
  97       // URLs preceeded by a '!' are not linked
  98
  99       $tmpline = tokenize($tmpline, "!?\b($AllowedProtocols):[^\s<>\[\]\"'()]*[^\s<>\[\]\"'(),.?]", $replacements, $ntokens);
 100       while ($oldn < $ntokens) {
 101         if($replacements[$oldn][0] == '!')
 102            $replacements[$oldn] = substr($replacements[$oldn], 1);
 103         else
 104            $replacements[$oldn] = LinkURL($replacements[$oldn]);
 105         $oldn++;
 106       }
 107
 108       //////////////////////////////////////////////////////////
 109       // Link Wiki words
 110       // Wikiwords preceeded by a '!' are not linked
 111
 112       $oldn = $ntokens;
 113       $tmpline = tokenize($tmpline, "!?$WikiNameRegexp", $replacements, $ntokens);
 114       while ($oldn < $ntokens) {
 115         $old = $replacements[$oldn];
 116         if ($old[0] == '!') {
 117           $replacements[$oldn] = substr($old,1);
 118         } elseif (IsWikiPage($dbi, $old)) {
 119           $replacements[$oldn] = LinkExistingWikiWord($old);
 120         } else {
 121           $replacements[$oldn] = LinkUnknownWikiWord($old);
 122         }
 123         $oldn++;
 124       }
 125
 126
 127       //////////////////////////////////////////////////////////
 128       // escape HTML metachars
 129       $tmpline = str_replace('&', '&amp;', $tmpline);
 130       $tmpline = str_replace('>', '&gt;', $tmpline);
 131       $tmpline = str_replace('<', '&lt;', $tmpline);
 132
 133
 134       // %%% are linebreaks
 135       $tmpline = str_replace('%%%', '<br />', $tmpline);
 136
 137       // bold italics (old way)
 138       $tmpline = preg_replace("|(''''')(.*?)(''''')|",
 139                               "<strong><em>\\2</em></strong>", $tmpline);
 140
 141       // bold (old way)
 142       $tmpline = preg_replace("|(''')(.*?)(''')|",
 143                               "<strong>\\2</strong>", $tmpline);
 144
 145       // bold
 146       $tmpline = preg_replace("|(__)(.*?)(__)|",
 147                               "<strong>\\2</strong>", $tmpline);
 148
 149       // italics
 150       $tmpline = preg_replace("|('')(.*?)('')|",
 151                               "<em>\\2</em>", $tmpline);
 152
 153
 154       //////////////////////////////////////////////////////////
 155       // unordered, ordered, and dictionary list  (using TAB)
 156
 157       if (preg_match("/(^\t+)(.*?)(:\t)(.*$)/", $tmpline, $matches)) {
 158          // this is a dictionary list (<dl>) item
 159          $numtabs = strlen($matches[1]);
 160          $html .= SetHTMLOutputMode('dl', NESTED_LEVEL, $numtabs);
 161          $tmpline = '';
 162          if(trim($matches[2]))
 163             $tmpline = '<dt>' . $matches[2];
 164          $tmpline .= '<dd>' . $matches[4];
 165
 166       } elseif (preg_match("/(^\t+)(\*|\d+|#)/", $tmpline, $matches)) {
 167          // this is part of a list (<ul>, <ol>)
 168          $numtabs = strlen($matches[1]);
 169          if ($matches[2] == '*') {
 170             $listtag = 'ul';
 171          } else {
 172             $listtag = 'ol'; // a rather tacit assumption. oh well.
 173          }
 174          $tmpline = preg_replace("/^(\t+)(\*|\d+|#)/", "", $tmpline);
 175          $html .= SetHTMLOutputMode($listtag, NESTED_LEVEL, $numtabs);
 176          $html .= '<li>';
 177
 178
 179       //////////////////////////////////////////////////////////
 180       // tabless markup for unordered, ordered, and dictionary lists
 181       // ul/ol list types can be mixed, so we only look at the last
 182       // character. Changes e.g. from "**#*" to "###*" go unnoticed.
 183       // and wouldn't make a difference to the HTML layout anyway.
 184
 185       // unordered lists <UL>: "*"
 186       } elseif (preg_match("/^([#*]*\*)[^#]/", $tmpline, $matches)) {
 187          // this is part of an unordered list
 188          $numtabs = strlen($matches[1]);
 189          $tmpline = preg_replace("/^([#*]*\*)/", '', $tmpline);
 190          $html .= SetHTMLOutputMode('ul', NESTED_LEVEL, $numtabs);
 191          $html .= '<li>';
 192
 193       // ordered lists <OL>: "#"
 194       } elseif (preg_match("/^([#*]*\#)/", $tmpline, $matches)) {
 195          // this is part of an ordered list
 196          $numtabs = strlen($matches[1]);
 197          $tmpline = preg_replace("/^([#*]*\#)/", "", $tmpline);
 198          $html .= SetHTMLOutputMode('ol', NESTED_LEVEL, $numtabs);
 199          $html .= '<li>';
 200
 201       // definition lists <DL>: ";text:text"
 202       } elseif (preg_match("/(^;+)(.*?):(.*$)/", $tmpline, $matches)) {
 203          // this is a dictionary list item
 204          $numtabs = strlen($matches[1]);
 205          $html .= SetHTMLOutputMode('dl', NESTED_LEVEL, $numtabs);
 206          $tmpline = '';
 207          if(trim($matches[2]))
 208             $tmpline = '<dt>' . $matches[2];
 209          $tmpline .= '<dd>' . $matches[3];
 210
 211
 212       //////////////////////////////////////////////////////////
 213       // remaining modes: preformatted text, headings, normal text
 214
 215       } elseif (preg_match("/^\s+/", $tmpline)) {
 216          // this is preformatted text, i.e. <pre>
 217          $html .= SetHTMLOutputMode('pre', ZERO_LEVEL, 0);
 218
 219       } elseif (preg_match("/^(!{1,3})[^!]/", $tmpline, $whichheading)) {
 220          // lines starting with !,!!,!!! are headings
 221          if($whichheading[1] == '!') $heading = 'h3';
 222          elseif($whichheading[1] == '!!') $heading = 'h2';
 223          elseif($whichheading[1] == '!!!') $heading = 'h1';
 224          $tmpline = preg_replace("/^!+/", '', $tmpline);
 225          $html .= SetHTMLOutputMode($heading, ZERO_LEVEL, 0);
 226
 227       } elseif (preg_match('/^-{4,}\s*(.*?)\s*$/', $tmpline, $matches)) {
 228          // four or more dashes to <hr>
 229          // <hr> can not be contained in a
 230          $html .= SetHTMLOutputMode('', ZERO_LEVEL, 0) . "<hr>\n";
 231          if ( ($tmpline = $matches[1]) != '' ) {
 232             $html .= SetHTMLOutputMode('p', ZERO_LEVEL, 0);
 233          }
 234       } else {
 235          // it's ordinary output if nothing else
 236          $html .= SetHTMLOutputMode('p', ZERO_LEVEL, 0);
 237       }
 238
 239       // These are still problems as far as generating correct HTML is
 240       // concerned.  Paragraph (<p>) elements are not allowed to contain
 241       // other block-level elements (like <form>s).
 242       if (strstr($tmpline, '%%Search%%'))
 243          $tmpline = str_replace('%%Search%%', RenderQuickSearch(), $tmpline);
 244       if (strstr($tmpline, '%%Fullsearch%%'))
 245          $tmpline = str_replace('%%Fullsearch%%', RenderFullSearch(), $tmpline);
 246       if (strstr($tmpline, '%%Mostpopular%%'))
 247          $tmpline = str_replace('%%Mostpopular%%', RenderMostPopular(), $tmpline);
 248       if(defined('WIKI_ADMIN') && strstr($tmpline, '%%ADMIN-'))
 249          $tmpline = ParseAdminTokens($tmpline);
 250
 251
 252       ///////////////////////////////////////////////////////
 253       // Replace tokens
 254
 255       for ($i = 0; $i < $ntokens; $i++)
 256           $tmpline = str_replace($FieldSeparator.$FieldSeparator.$i.$FieldSeparator, $replacements[$i], $tmpline);
 257
 258
 259       $html .= $tmpline . "\n";
 260    }
 261
 262    $html .= SetHTMLOutputMode('', ZERO_LEVEL, 0);
 263 ?>