lib/transform.php

   1 <?php rcs_id('$Id: transform.php,v 1.8 2001-01-04 18:34:15 ahollosi Exp $');
   2    // expects $pagehash and $html to be set
   3
   4    function tokenize($str, $pattern, &$orig, &$ntokens) {
   5       global $FieldSeparator;
   6       // Find any strings in $str that match $pattern and
   7       // store them in $orig, replacing them with tokens
   8       // starting at number $ntokens - returns tokenized string
   9       $new = '';
  10       while (preg_match("/^(.*?)($pattern)/", $str, $matches)) {
  11          $linktoken = $FieldSeparator . $FieldSeparator . ($ntokens++) . $FieldSeparator;
  12          $new .= $matches[1] . $linktoken;
  13          $orig[] = $matches[2];
  14          $str = substr($str, strlen($matches[0]));
  15       }
  16       $new .= $str;
  17       return $new;
  18    }
  19
  20
  21    // Prepare replacements for references [\d+]
  22    for ($i = 1; $i < (NUM_LINKS + 1); $i++) {
  23       if (! empty($pagehash['refs'][$i])) {
  24          if (preg_match("/($InlineImages)$/i", $pagehash['refs'][$i])) {
  25             // embed images
  26             $embedded[$i] = LinkImage($pagehash['refs'][$i]);
  27          } else {
  28             // ordinary link
  29             $embedded[$i] = LinkURL($pagehash['refs'][$i], "[$i]");
  30          }
  31       }
  32    }
  33
  34
  35    // only call these once, for efficiency
  36    $quick_search_box  = RenderQuickSearch();
  37    $full_search_box   = RenderFullSearch();
  38    $most_popular_list = RenderMostPopular();
  39
  40
  41    // Loop over all lines of the page and apply transformation rules
  42    $numlines = count($pagehash["content"]);
  43
  44    for ($index = 0; $index < $numlines; $index++) {
  45       unset($tokens);
  46       unset($replacements);
  47       $ntokens = 0;
  48       $replacements = array();
  49
  50       $tmpline = $pagehash['content'][$index];
  51
  52       if (!strlen($tmpline) || $tmpline == "\r") {
  53          // this is a blank line, send <p>
  54          $html .= SetHTMLOutputMode('', ZERO_LEVEL, 0);
  55          continue;
  56       }
  57
  58 /* If your web server is not accessble to the general public, you may
  59 allow this code below, which allows embedded HTML. If just anyone can reach
  60 your web server it is highly advised that you do not allow this.
  61
  62       elseif (preg_match("/(^\|)(.*)/", $tmpline, $matches)) {
  63          // HTML mode
  64          $html .= SetHTMLOutputMode("", ZERO_LEVEL, 0);
  65          $html .= $matches[2];
  66          continue;
  67       }
  68 */
  69
  70
  71       //////////////////////////////////////////////////////////
  72       // New linking scheme: links are in brackets. This will
  73       // emulate typical HTML linking as well as Wiki linking.
  74
  75       // First need to protect [[.
  76       $oldn = $ntokens;
  77       $tmpline = tokenize($tmpline, '\[\[', $replacements, $ntokens);
  78       while ($oldn < $ntokens)
  79          $replacements[$oldn++] = '[';
  80
  81       // Now process the [\d+] links which are numeric references
  82       $oldn = $ntokens;
  83       $tmpline = tokenize($tmpline, '\[\s*\d+\s*\]', $replacements, $ntokens);
  84       while ($oldn < $ntokens) {
  85          $num = (int) substr($replacements[$oldn], 1);
  86          if (! empty($embedded[$num]))
  87             $replacements[$oldn] = $embedded[$num];
  88          $oldn++;
  89       }
  90
  91       // match anything else between brackets
  92       $oldn = $ntokens;
  93       $tmpline = tokenize($tmpline, '\[.+?\]', $replacements, $ntokens);
  94       while ($oldn < $ntokens) {
  95         $link = ParseAndLink($replacements[$oldn]);
  96         $replacements[$oldn] = $link['link'];
  97         $oldn++;
  98       }
  99
 100       //////////////////////////////////////////////////////////
 101       // replace all URL's with tokens, so we don't confuse them
 102       // with Wiki words later. Wiki words in URL's break things.
 103       // URLs preceeded by a '!' are not linked
 104
 105       $tmpline = tokenize($tmpline, "!?\b($AllowedProtocols):[^\s<>\[\]\"'()]*[^\s<>\[\]\"'(),.?]", $replacements, $ntokens);
 106       while ($oldn < $ntokens) {
 107         if($replacements[$oldn][0] == '!')
 108            $replacements[$oldn] = substr($replacements[$oldn], 1);
 109         else
 110            $replacements[$oldn] = LinkURL($replacements[$oldn]);
 111         $oldn++;
 112       }
 113
 114       //////////////////////////////////////////////////////////
 115       // Link Wiki words
 116       // Wikiwords preceeded by a '!' are not linked
 117
 118       $oldn = $ntokens;
 119       $tmpline = tokenize($tmpline, "!?$WikiNameRegexp", $replacements, $ntokens);
 120       while ($oldn < $ntokens) {
 121         $old = $replacements[$oldn];
 122         if ($old[0] == '!') {
 123           $replacements[$oldn] = substr($old,1);
 124         } elseif (IsWikiPage($dbi, $old)) {
 125           $replacements[$oldn] = LinkExistingWikiWord($old);
 126         } else {
 127           $replacements[$oldn] = LinkUnknownWikiWord($old);
 128         }
 129         $oldn++;
 130       }
 131
 132
 133       //////////////////////////////////////////////////////////
 134       // escape HTML metachars
 135       $tmpline = str_replace('&', '&amp;', $tmpline);
 136       $tmpline = str_replace('>', '&gt;', $tmpline);
 137       $tmpline = str_replace('<', '&lt;', $tmpline);
 138
 139       // four or more dashes to <hr>
 140       $tmpline = ereg_replace("^-{4,}", '<hr>', $tmpline);
 141
 142       // %%% are linebreaks
 143       $tmpline = str_replace('%%%', '<br>', $tmpline);
 144
 145       // bold italics (old way)
 146       $tmpline = preg_replace("|(''''')(.*?)(''''')|",
 147                               "<strong><em>\\2</em></strong>", $tmpline);
 148
 149       // bold (old way)
 150       $tmpline = preg_replace("|(''')(.*?)(''')|",
 151                               "<strong>\\2</strong>", $tmpline);
 152
 153       // bold
 154       $tmpline = preg_replace("|(__)(.*?)(__)|",
 155                               "<strong>\\2</strong>", $tmpline);
 156
 157       // italics
 158       $tmpline = preg_replace("|('')(.*?)('')|",
 159                               "<em>\\2</em>", $tmpline);
 160
 161
 162       //////////////////////////////////////////////////////////
 163       // unordered, ordered, and dictionary list  (using TAB)
 164
 165       if (preg_match("/(^\t+)(.*?)(:\t)(.*$)/", $tmpline, $matches)) {
 166          // this is a dictionary list (<dl>) item
 167          $numtabs = strlen($matches[1]);
 168          $html .= SetHTMLOutputMode('dl', NESTED_LEVEL, $numtabs);
 169          $tmpline = '';
 170          if(trim($matches[2]))
 171             $tmpline = '<dt>' . $matches[2];
 172          $tmpline .= '<dd>' . $matches[4];
 173
 174       } elseif (preg_match("/(^\t+)(\*|\d+|#)/", $tmpline, $matches)) {
 175          // this is part of a list (<ul>, <ol>)
 176          $numtabs = strlen($matches[1]);
 177          if ($matches[2] == '*') {
 178             $listtag = 'ul';
 179          } else {
 180             $listtag = 'ol'; // a rather tacit assumption. oh well.
 181          }
 182          $tmpline = preg_replace("/^(\t+)(\*|\d+|#)/", "", $tmpline);
 183          $html .= SetHTMLOutputMode($listtag, NESTED_LEVEL, $numtabs);
 184          $html .= '<li>';
 185
 186
 187       //////////////////////////////////////////////////////////
 188       // tabless markup for unordered, ordered, and dictionary lists
 189       // ul/ol list types can be mixed, so we only look at the last
 190       // character. Changes e.g. from "**#*" to "###*" go unnoticed.
 191       // and wouldn't make a difference to the HTML layout anyway.
 192
 193       // unordered lists <UL>: "*"
 194       } elseif (preg_match("/^([#*]*\*)[^#]/", $tmpline, $matches)) {
 195          // this is part of an unordered list
 196          $numtabs = strlen($matches[1]);
 197          $tmpline = preg_replace("/^([#*]*\*)/", '', $tmpline);
 198          $html .= SetHTMLOutputMode('ul', NESTED_LEVEL, $numtabs);
 199          $html .= '<li>';
 200
 201       // ordered lists <OL>: "#"
 202       } elseif (preg_match("/^([#*]*\#)/", $tmpline, $matches)) {
 203          // this is part of an ordered list
 204          $numtabs = strlen($matches[1]);
 205          $tmpline = preg_replace("/^([#*]*\#)/", "", $tmpline);
 206          $html .= SetHTMLOutputMode('ol', NESTED_LEVEL, $numtabs);
 207          $html .= '<li>';
 208
 209       // definition lists <DL>: ";text:text"
 210       } elseif (preg_match("/(^;+)(.*?):(.*$)/", $tmpline, $matches)) {
 211          // this is a dictionary list item
 212          $numtabs = strlen($matches[1]);
 213          $html .= SetHTMLOutputMode('dl', NESTED_LEVEL, $numtabs);
 214          $tmpline = '';
 215          if(trim($matches[2]))
 216             $tmpline = '<dt>' . $matches[2];
 217          $tmpline .= '<dd>' . $matches[3];
 218
 219
 220       //////////////////////////////////////////////////////////
 221       // remaining modes: preformatted text, headings, normal text
 222
 223       } elseif (preg_match("/^\s+/", $tmpline)) {
 224          // this is preformatted text, i.e. <pre>
 225          $html .= SetHTMLOutputMode('pre', ZERO_LEVEL, 0);
 226
 227       } elseif (preg_match("/^(!{1,3})[^!]/", $tmpline, $whichheading)) {
 228          // lines starting with !,!!,!!! are headings
 229          if($whichheading[1] == '!') $heading = 'h3';
 230          elseif($whichheading[1] == '!!') $heading = 'h2';
 231          elseif($whichheading[1] == '!!!') $heading = 'h1';
 232          $tmpline = preg_replace("/^!+/", '', $tmpline);
 233          $html .= SetHTMLOutputMode($heading, ZERO_LEVEL, 0);
 234
 235       } else {
 236          // it's ordinary output if nothing else
 237          $html .= SetHTMLOutputMode('p', ZERO_LEVEL, 0);
 238       }
 239
 240       $tmpline = str_replace('%%Search%%', $quick_search_box, $tmpline);
 241       $tmpline = str_replace('%%Fullsearch%%', $full_search_box, $tmpline);
 242       $tmpline = str_replace('%%Mostpopular%%', $most_popular_list, $tmpline);
 243       if(defined('WIKI_ADMIN') && strstr($tmpline, '%%ADMIN-'))
 244          $tmpline = ParseAdminTokens($tmpline);
 245
 246
 247       ///////////////////////////////////////////////////////
 248       // Replace tokens
 249
 250       for ($i = 0; $i < $ntokens; $i++)
 251           $tmpline = str_replace($FieldSeparator.$FieldSeparator.$i.$FieldSeparator, $replacements[$i], $tmpline);
 252
 253
 254       $html .= $tmpline . "\n";
 255    }
 256
 257    $html .= SetHTMLOutputMode('', ZERO_LEVEL, 0);
 258 ?>