lib/transform.php

   1 <!-- $Id: transform.php,v 1.7 2000-11-08 15:40:00 ahollosi Exp $ -->
   2 <?php
   3    // expects $pagehash and $html to be set
   4
   5
   6    function tokenize($str, $pattern, &$orig, &$ntokens) {
   7       global $FieldSeparator;
   8       // Find any strings in $str that match $pattern and
   9       // store them in $tokens[], replacing them with a token
  10       // replaced strings are stored in $orig
  11       $new = "";
  12       while (preg_match("/^(.*?)($pattern)/", $str, $matches)) {
  13          $linktoken = $FieldSeparator . $FieldSeparator . ($ntokens++) . $FieldSeparator;
  14          $new .= $matches[1] . $linktoken;
  15          $orig[] = $matches[2];
  16          $str = substr($str, strlen($matches[0]));
  17       }
  18       $new .= $str;
  19       return $new;
  20    }
  21
  22
  23    // Prepare replacements for references [\d+]
  24    for ($i = 1; $i < (NUM_LINKS + 1); $i++) {
  25       if (! empty($pagehash['refs'][$i])) {
  26          if (preg_match("/($InlineImages)$/i", $pagehash['refs'][$i])) {
  27             // embed images
  28             $embedded[$i] = LinkImage($pagehash['refs'][$i]);
  29          } else {
  30             // ordinary link
  31             $embedded[$i] = "<a href=\"" . $pagehash['refs'][$i] . "\">[$i]</a>";
  32          }
  33       }
  34    }
  35
  36    $numlines = count($pagehash["content"]);
  37
  38    // only call these once, for efficiency
  39    $quick_search_box  = RenderQuickSearch();
  40    $full_search_box   = RenderFullSearch();
  41    $most_popular_list = RenderMostPopular();
  42
  43
  44    // Loop over all lines of the page and apply transformation rules
  45    for ($index = 0; $index < $numlines; $index++) {
  46       unset($tokens);
  47       unset($replacements);
  48       $ntokens = 0;
  49       $replacements = array();
  50
  51       $tmpline = $pagehash["content"][$index];
  52
  53       if (!strlen($tmpline) || $tmpline == "\r") {
  54          // this is a blank line, send <p>
  55          $html .= SetHTMLOutputMode("p", ZERO_DEPTH, 0);
  56          continue;
  57       }
  58
  59 /* If your web server is not accessble to the general public, you may
  60 allow this code below, which allows embedded HTML. If just anyone can reach
  61 your web server it is highly advised that you do not allow this.
  62
  63       elseif (preg_match("/(^\|)(.*)/", $tmpline, $matches)) {
  64          // HTML mode
  65          $html .= SetHTMLOutputMode("", ZERO_DEPTH, 0);
  66          $html .= $matches[2];
  67          continue;
  68       }
  69 */
  70
  71
  72       //////////////////////////////////////////////////////////
  73       // New linking scheme: links are in brackets. This will
  74       // emulate typical HTML linking as well as Wiki linking.
  75
  76       // First need to protect [[.
  77       $oldn = $ntokens;
  78       $tmpline = tokenize($tmpline, "\[\[", $replacements, $ntokens);
  79       while ($oldn < $ntokens)
  80          $replacements[$oldn++] = "[";
  81
  82       // Now process the [\d+] links which are numeric references
  83       $oldn = $ntokens;
  84       $tmpline = tokenize($tmpline, "\[\s*\d+\s*\]", $replacements ,$ntokens);
  85       while ($oldn < $ntokens) {
  86          $num = (int)substr($replacements[$oldn], 1);
  87          if (! empty($embedded[$num]))
  88             $replacements[$oldn] = $embedded[$num];
  89          $oldn++;
  90       }
  91
  92       // match anything else between brackets
  93       $oldn = $ntokens;
  94       $tmpline = tokenize($tmpline, "\[.+?\]", $replacements, $ntokens);
  95       while ($oldn < $ntokens) {
  96         $link = ParseAndLink($replacements[$oldn]);
  97         $replacements[$oldn] = $link['link'];
  98         $oldn++;
  99       }
 100
 101       //////////////////////////////////////////////////////////
 102       // replace all URL's with tokens, so we don't confuse them
 103       // with Wiki words later. Wiki words in URL's break things.
 104
 105       $tmpline = tokenize($tmpline, "!?\b($AllowedProtocols):[^\s<>\[\]\"'()]*[^\s<>\[\]\"'(),.?]", $replacements, $ntokens);
 106       while ($oldn < $ntokens) {
 107         if($replacements[$oldn][0] == '!')
 108            $replacements[$oldn] = substr($replacements[$oldn], 1);
 109         else
 110            $replacements[$oldn] = LinkURL($replacements[$oldn]);
 111         $oldn++;
 112       }
 113
 114       //////////////////////////////////////////////////////////
 115       // Link Wiki words
 116       // Wikiwords preceeded by a '!' are not linked
 117
 118       $oldn = $ntokens;
 119       $tmpline = tokenize($tmpline, "!?$WikiNameRegexp", $replacements, $ntokens);
 120       while ($oldn < $ntokens) {
 121         $old = $replacements[$oldn];
 122         if ($old[0] == '!') {
 123           $replacements[$oldn] = substr($old,1);
 124         } elseif (IsWikiPage($dbi, $old)) {
 125           $replacements[$oldn] = LinkExistingWikiWord($old);
 126         } else {
 127           $replacements[$oldn] = LinkUnknownWikiWord($old);
 128         }
 129         $oldn++;
 130       }
 131
 132       // escape HTML metachars
 133       $tmpline = str_replace("&", "&amp;", $tmpline);
 134       $tmpline = str_replace(">", "&gt;", $tmpline);
 135       $tmpline = str_replace("<", "&lt;", $tmpline);
 136
 137       // four or more dashes to <hr>
 138       $tmpline = ereg_replace("^-{4,}", "<hr>", $tmpline);
 139
 140       // %%% are linebreaks
 141       $tmpline = str_replace("%%%", "<br>", $tmpline);
 142
 143       // bold italics
 144       $tmpline = preg_replace("|(''''')(.*?)(''''')|",
 145                               "<strong><em>\\2</em></strong>", $tmpline);
 146
 147       // bold
 148       $tmpline = preg_replace("|(''')(.*?)(''')|",
 149                               "<strong>\\2</strong>", $tmpline);
 150
 151       // bold
 152       $tmpline = preg_replace("|(__)(.*?)(__)|",
 153                               "<strong>\\2</strong>", $tmpline);
 154
 155       // italics
 156       $tmpline = preg_replace("|('')(.*?)('')|",
 157                               "<em>\\2</em>", $tmpline);
 158
 159
 160       // HTML modes: pre, unordered/ordered lists, term/def  (using TAB)
 161       if (preg_match("/(^\t+)(.*?)(:\t)(.*$)/", $tmpline, $matches)) {
 162          // this is a dictionary list item
 163          $numtabs = strlen($matches[1]);
 164          $html .= SetHTMLOutputMode("dl", SINGLE_DEPTH, $numtabs);
 165          $tmpline = '';
 166          if(trim($matches[2]))
 167             $tmpline = "<dt>" . $matches[2];
 168          $tmpline .= "<dd>" . $matches[4];
 169
 170       } elseif (preg_match("/(^\t+)(\*|\d+|#)/", $tmpline, $matches)) {
 171          // this is part of a list
 172          $numtabs = strlen($matches[1]);
 173          if ($matches[2] == "*") {
 174             $listtag = "ul";
 175          } else {
 176             $listtag = "ol"; // a rather tacit assumption. oh well.
 177          }
 178          $tmpline = preg_replace("/^(\t+)(\*|\d+|#)/", "", $tmpline);
 179          $html .= SetHTMLOutputMode($listtag, SINGLE_DEPTH, $numtabs);
 180          $html .= "<li>";
 181
 182       // tabless markup for unordered and ordered lists
 183       // list types can be mixed, so we only look at the last
 184       // character. Changes e.g. from "**#*" to "###*" go unnoticed.
 185       // and wouldn't make a difference to the HTML layout anyway.
 186
 187       // unordered lists <UL>: "*"
 188       } elseif (preg_match("/^([#*]*\*)[^#]/", $tmpline, $matches)) {
 189          // this is part of an unordered list
 190          $numtabs = strlen($matches[1]);
 191          $listtag = "ul";
 192
 193          $tmpline = preg_replace("/^([#*]*\*)/", "", $tmpline);
 194          $html .= SetHTMLOutputMode($listtag, SINGLE_DEPTH, $numtabs);
 195          $html .= "<li>";
 196
 197       // ordered lists <OL>: "#"
 198       } elseif (preg_match("/^([#*]*\#)/", $tmpline, $matches)) {
 199          // this is part of an ordered list
 200          $numtabs = strlen($matches[1]);
 201          $listtag = "ol";
 202
 203          $tmpline = preg_replace("/^([#*]*\#)/", "", $tmpline);
 204          $html .= SetHTMLOutputMode($listtag, SINGLE_DEPTH, $numtabs);
 205          $html .= "<li>";
 206
 207       // definition lists <DL>: ";text:text"
 208       } elseif (preg_match("/(^;+)(.*?):(.*$)/", $tmpline, $matches)) {
 209          // this is a dictionary list item
 210          $numtabs = strlen($matches[1]);
 211          $html .= SetHTMLOutputMode("dl", SINGLE_DEPTH, $numtabs);
 212          $tmpline = '';
 213          if(trim($matches[2]))
 214             $tmpline = "<dt>" . $matches[2];
 215          $tmpline .= "<dd>" . $matches[3];
 216
 217
 218       } elseif (preg_match("/^\s+/", $tmpline)) {
 219          // this is preformatted text, i.e. <pre>
 220          $html .= SetHTMLOutputMode("pre", ZERO_DEPTH, 0);
 221
 222       } elseif (preg_match("/^(!{1,3})[^!]/", $tmpline, $whichheading)) {
 223          // lines starting with !,!!,!!! are headings
 224          if($whichheading[1] == '!') $heading = "h3";
 225          elseif($whichheading[1] == '!!') $heading = "h2";
 226          elseif($whichheading[1] == '!!!') $heading = "h1";
 227          $tmpline = preg_replace("/^!+/", "", $tmpline);
 228          $html .= SetHTMLOutputMode($heading, ZERO_DEPTH, 0);
 229
 230       } else {
 231          // it's ordinary output if nothing else
 232          $html .= SetHTMLOutputMode("", ZERO_DEPTH, 0);
 233       }
 234
 235       $tmpline = str_replace("%%Search%%", $quick_search_box, $tmpline);
 236       $tmpline = str_replace("%%Fullsearch%%", $full_search_box, $tmpline);
 237       $tmpline = str_replace("%%Mostpopular%%", $most_popular_list, $tmpline);
 238       if(defined('WIKI_ADMIN') && strstr($tmpline, "%%ADMIN-"))
 239          $tmpline = ParseAdminTokens($tmpline);
 240
 241       ///////////////////////////////////////////////////////
 242       // Replace tokens
 243
 244       for ($i = 0; $i < $ntokens; $i++)
 245           $tmpline = str_replace($FieldSeparator.$FieldSeparator.$i.$FieldSeparator, $replacements[$i], $tmpline);
 246
 247
 248       $html .= "$tmpline\n";
 249    }
 250
 251    $html .= SetHTMLOutputMode("", ZERO_DEPTH, 0);
 252 ?>