lib/transform.php

   1 <!-- $Id: transform.php,v 1.4 2000-10-24 10:32:37 ahollosi Exp $ -->
   2 <?php
   3    // expects $pagehash and $html to be set
   4
   5    // Set up inline links and images
   6    for ($i = 1; $i < (NUM_LINKS + 1); $i++) {
   7       if (! empty($pagehash['refs'][$i])) {
   8          if (preg_match("/png$/i", $pagehash['refs'][$i])) {
   9             // embed PNG images
  10             $embedded[$i] = "<img src='" . $pagehash['refs'][$i] . "'>";
  11          } else {
  12             // ordinary embedded link
  13             $embedded[$i] = "<a href='" . $pagehash['refs'][$i] . "'>[$i]</a>";
  14          }
  15       }
  16    }
  17
  18    $numlines = count($pagehash["content"]);
  19
  20    // only call these once, for efficiency
  21    $quick_search_box  = RenderQuickSearch();
  22    $full_search_box   = RenderFullSearch();
  23    $most_popular_list = RenderMostPopular();
  24
  25
  26
  27    // Loop over all lines of the page and apply transformation rules
  28    for ($index = 0; $index < $numlines; $index++) {
  29       unset($tokens);
  30       unset($replacements);
  31       $ntokens = 0;
  32
  33       $tmpline = $pagehash["content"][$index];
  34
  35       if (!strlen($tmpline) || $tmpline == "\r") {
  36          // this is a blank line, send <p>
  37          $html .= SetHTMLOutputMode("p", ZERO_DEPTH, 0);
  38          continue;
  39       }
  40
  41 /* If your web server is not accessble to the general public, you may
  42 allow this code below, which allows embedded HTML. If just anyone can reach
  43 your web server it is highly advised that you do not allow this.
  44
  45       elseif (preg_match("/(^\|)(.*)/", $tmpline, $matches)) {
  46          // HTML mode
  47          $html .= SetHTMLOutputMode("", ZERO_DEPTH, 0);
  48          $html .= $matches[2];
  49          continue;
  50       }
  51 */
  52
  53
  54       //////////////////////////////////////////////////////////
  55       // New linking scheme: links are in brackets. This will
  56       // emulate typical HTML linking as well as Wiki linking.
  57
  58       // match anything between brackets except only numbers
  59       // trying:
  60       $numBracketLinks = preg_match_all("/\[.+?\]/", $tmpline, $brktlinks);
  61       /* On 12 Jul,2000 Jeff <dairiki@dairiki.org> adds:
  62        *
  63        * Simple sorting doesnt work, since (in ASCII) '[' comes between
  64        * the upper- and lower-case characters.
  65        *
  66        * Using sort "[[Link] [Link]" will come out wrong, using
  67        * rsort "[[link] [link]" will come out wrong.
  68        * (An appropriate usort would work.)
  69        *
  70        * I've added a look-behind assertion to the preg_replace which,
  71        * I think, fixes the problem.  I only hope that all PHP versions
  72        * support look-behind assertions....
  73       // sort instead of rsort or "[[link] [link]" will be rendered wrong.
  74       sort($brktlinks[0]);
  75       reset($brktlinks[0]);
  76        */
  77
  78       for ($i = 0; $i < $numBracketLinks; $i++) {
  79          $brktlink = preg_quote($brktlinks[0][$i]);
  80          $linktoken = $FieldSeparator . $FieldSeparator . ++$ntokens . $FieldSeparator;
  81          /* PS:
  82           * If you're wondering about the double $FieldSeparator,
  83           * consider what happens to (the admittedly sick):
  84           *   "[Link1] [Link2]1[Link3]"
  85           *
  86           * Answer: without the double field separator, it gets
  87           *  tokenized to "%1% %2%1%3%" (using % to represent $FieldSeparator),
  88           *  which will get munged as soon as '%1%' is substituted with it's
  89           *  final value.
  90           */
  91          $tmpline = preg_replace("|(?<!\[)$brktlink|",
  92                                  $linktoken,
  93                                  $tmpline);
  94
  95          $tokens[] = $linktoken;
  96          $link = ParseAndLink($brktlinks[0][$i]);
  97          $replacements[] = $link['link'];
  98       }
  99
 100       //////////////////////////////////////////////////////////
 101       // replace all URL's with tokens, so we don't confuse them
 102       // with Wiki words later. Wiki words in URL's break things.
 103
 104       $hasURLs = preg_match_all("/\b($AllowedProtocols):[^\s\<\>\[\]\"'\(\)]*[^\s\<\>\[\]\"'\(\)\,\.\?]/", $tmpline, $urls);
 105
 106       // have to sort, otherwise errors creep in when the domain appears
 107       // in two consecutive URL's on the same line, but the second is
 108       // longer e.g. http://c2.com followed by http://c2.com/wiki
 109       rsort($urls[0]);
 110       reset($urls[0]);
 111
 112       for ($i = 0; $i < $hasURLs; $i++) {
 113          $inplaceURL = preg_quote($urls[0][$i]);
 114          $URLtoken = $FieldSeparator . $FieldSeparator . ++$ntokens . $FieldSeparator;
 115          $tmpline = preg_replace("|$inplaceURL|",
 116                                  $URLtoken,
 117                                  $tmpline);
 118
 119          $tokens[] = $URLtoken;
 120          $replacements[] = LinkURL($urls[0][$i]);
 121       }
 122
 123       // escape HTML metachars
 124       $tmpline = ereg_replace("[&]", "&amp;", $tmpline);
 125       $tmpline = ereg_replace("[>]", "&gt;", $tmpline);
 126       $tmpline = ereg_replace("[<]", "&lt;", $tmpline);
 127
 128       // four or more dashes to <hr>
 129       $tmpline = ereg_replace("^-{4,}", "<hr>", $tmpline);
 130
 131
 132       // %%% are linebreaks
 133       $tmpline = str_replace("%%%", "<br>", $tmpline);
 134
 135       // bold italics
 136       $tmpline = preg_replace("|(''''')(.*?)(''''')|",
 137                               "<strong><em>\\2</em></strong>",
 138                               $tmpline);
 139
 140       // bold
 141       $tmpline = preg_replace("|(''')(.*?)(''')|",
 142                               "<strong>\\2</strong>",
 143                               $tmpline);
 144
 145       // bold
 146       $tmpline = preg_replace("|(__)(.*?)(__)|",
 147                               "<strong>\\2</strong>",
 148                               $tmpline);
 149
 150       // italics
 151       $tmpline = preg_replace("|('')(.*?)('')|",
 152                               "<em>\\2</em>",
 153                               $tmpline);
 154
 155       // Link Wiki words
 156       // Wikiwords preceeded by a '!' are not linked
 157       if (preg_match_all("#!?\b(([A-Z][a-z]+){2,})\b#",
 158                          $tmpline, $link)) {
 159          // uniq the list of matches
 160          unset($hash);
 161          for ($i = 0; $link[0][$i]; $i++) {
 162             if(strstr($link[0][$i], '!'))       // hashval sports a value
 163                $hashval = "0000:".$link[0][$i]; // in front that guarantees
 164             else                                // correct sorting
 165                $hashval = sprintf("%04d:%s", 9876-strlen($link[0][$i])
 166                                           , $link[0][$i]);
 167             $hash[$hashval] = 1;
 168          }
 169
 170          // all '!WikiName' entries are sorted first
 171          ksort($hash);
 172          while (list($realfile, $val) = each($hash)) {
 173             $realfile = substr($realfile, 5);   // get rid of sort value
 174             $token = $FieldSeparator . $FieldSeparator . ++$ntokens . $FieldSeparator;
 175             $tmpline = str_replace($realfile, $token, $tmpline);
 176
 177             $tokens[] = $token;
 178             if (strstr($realfile, '!')) {
 179                $replacements[] = substr($realfile, 1);
 180             }
 181             elseif (IsWikiPage($dbi, $realfile)) {
 182                $replacements[] = LinkExistingWikiWord($realfile);
 183             } else {
 184                $replacements[] = LinkUnknownWikiWord($realfile);
 185             }
 186          }
 187       }
 188
 189       ///////////////////////////////////////////////////////
 190       // Replace tokens
 191       for ($i = 0; $i < $ntokens; $i++)
 192           $tmpline = str_replace($tokens[$i], $replacements[$i], $tmpline);
 193
 194
 195       // match and replace all user-defined links ([1], [2], [3]...)
 196       preg_match_all("|\[(\d+)\]|", $tmpline, $match);
 197       if (count($match[0])) {
 198          for ($k = 0; $k < count($match[0]); $k++) {
 199             if (! empty($embedded[$match[1][$k]])) {
 200                $linkpattern = preg_quote($match[0][$k]);
 201                $tmpline = preg_replace("|$linkpattern|",
 202                                        $embedded[$match[1][$k]],
 203                                        $tmpline);
 204             }
 205          }
 206       }
 207
 208       // HTML modes: pre, unordered/ordered lists, term/def  (using TAB)
 209       if (preg_match("/(^\t+)(.*?)(:\t)(.*$)/", $tmpline, $matches)) {
 210          // this is a dictionary list item
 211          $numtabs = strlen($matches[1]);
 212          $html .= SetHTMLOutputMode("dl", SINGLE_DEPTH, $numtabs);
 213          $tmpline = '';
 214          if(trim($matches[2]))
 215             $tmpline = "<dt>" . $matches[2];
 216          $tmpline .= "<dd>" . $matches[4];
 217
 218       } elseif (preg_match("/(^\t+)(\*|\d+|#)/", $tmpline, $matches)) {
 219          // this is part of a list
 220          $numtabs = strlen($matches[1]);
 221          if ($matches[2] == "*") {
 222             $listtag = "ul";
 223          } else {
 224             $listtag = "ol"; // a rather tacit assumption. oh well.
 225          }
 226          $tmpline = preg_replace("/^(\t+)(\*|\d+|#)/", "", $tmpline);
 227          $html .= SetHTMLOutputMode($listtag, SINGLE_DEPTH, $numtabs);
 228          $html .= "<li>";
 229
 230       // tabless markup for unordered and ordered lists
 231       // list types can be mixed, so we only look at the last
 232       // character. Changes e.g. from "**#*" to "###*" go unnoticed.
 233       // and wouldn't make a difference to the HTML layout anyway.
 234
 235       // unordered lists <UL>: "*"
 236       } elseif (preg_match("/^([#*]*\*)[^#]/", $tmpline, $matches)) {
 237          // this is part of an unordered list
 238          $numtabs = strlen($matches[1]);
 239          $listtag = "ul";
 240
 241          $tmpline = preg_replace("/^([#*]*\*)/", "", $tmpline);
 242          $html .= SetHTMLOutputMode($listtag, SINGLE_DEPTH, $numtabs);
 243          $html .= "<li>";
 244
 245       // ordered lists <OL>: "#"
 246       } elseif (preg_match("/^([#*]*\#)/", $tmpline, $matches)) {
 247          // this is part of an ordered list
 248          $numtabs = strlen($matches[1]);
 249          $listtag = "ol";
 250
 251          $tmpline = preg_replace("/^([#*]*\#)/", "", $tmpline);
 252          $html .= SetHTMLOutputMode($listtag, SINGLE_DEPTH, $numtabs);
 253          $html .= "<li>";
 254
 255       // definition lists <DL>: ";text:text"
 256       } elseif (preg_match("/(^;+)(.*?):(.*$)/", $tmpline, $matches)) {
 257          // this is a dictionary list item
 258          $numtabs = strlen($matches[1]);
 259          $html .= SetHTMLOutputMode("dl", SINGLE_DEPTH, $numtabs);
 260          $tmpline = '';
 261          if(trim($matches[2]))
 262             $tmpline = "<dt>" . $matches[2];
 263          $tmpline .= "<dd>" . $matches[3];
 264
 265
 266       } elseif (preg_match("/^\s+/", $tmpline)) {
 267          // this is preformatted text, i.e. <pre>
 268          $html .= SetHTMLOutputMode("pre", ZERO_DEPTH, 0);
 269
 270       } elseif (preg_match("/^(!{1,3})[^!]/", $tmpline, $whichheading)) {
 271          // lines starting with !,!!,!!! are headings
 272          if($whichheading[1] == '!') $heading = "h3";
 273          elseif($whichheading[1] == '!!') $heading = "h2";
 274          elseif($whichheading[1] == '!!!') $heading = "h1";
 275          $tmpline = preg_replace("/^!+/", "", $tmpline);
 276          $html .= SetHTMLOutputMode($heading, ZERO_DEPTH, 0);
 277
 278       } else {
 279          // it's ordinary output if nothing else
 280          $html .= SetHTMLOutputMode("", ZERO_DEPTH, 0);
 281       }
 282
 283       $tmpline = str_replace("%%Search%%", $quick_search_box, $tmpline);
 284       $tmpline = str_replace("%%Fullsearch%%", $full_search_box, $tmpline);
 285       $tmpline = str_replace("%%Mostpopular%%", $most_popular_list, $tmpline);
 286
 287       $html .= "$tmpline\n";
 288    }
 289
 290
 291    $html .= SetHTMLOutputMode("", ZERO_DEPTH, 0);
 292 ?>