wiki_transform.php3

   1 <!-- $Id: wiki_transform.php3,v 1.13 2000-07-12 18:47:53 dairiki Exp $ -->
   2 <?
   3    // expects $pagehash and $html to be set
   4
   5    // Set up inline links and images
   6    for ($i = 1; $i < (NUM_LINKS + 1); $i++) {
   7       if (! empty($pagehash['refs'][$i])) {
   8          if (preg_match("/png$/i", $pagehash['refs'][$i])) {
   9             // embed PNG images
  10             $embedded[$i] = "<img src='" . $pagehash['refs'][$i] . "'>";
  11          } else {
  12             // ordinary embedded link
  13             $embedded[$i] = "<a href='" . $pagehash['refs'][$i] . "'>[$i]</a>";
  14          }
  15       }
  16    }
  17
  18    $numlines = count($pagehash["content"]);
  19
  20    // Loop over all lines of the page and apply transformation rules
  21    for ($index = 0; $index < $numlines; $index++) {
  22       unset($tokens);
  23       unset($replacements);
  24       $ntokens = 0;
  25
  26       $tmpline = $pagehash["content"][$index];
  27
  28       if (!strlen($tmpline) || $tmpline == "\r") {
  29          // this is a blank line, send <p>
  30          $html .= SetHTMLOutputMode("p", ZERO_DEPTH, 0);
  31          continue;
  32       }
  33
  34 /* If your web server is not accessble to the general public, you may
  35 allow this code below, which allows embedded HTML. If just anyone can reach
  36 your web server it is highly advised that you do not allow this.
  37
  38       elseif (preg_match("/(^\|)(.*)/", $tmpline, $matches)) {
  39          // HTML mode
  40          $html .= SetHTMLOutputMode("", ZERO_DEPTH, 0);
  41          $html .= $matches[2];
  42          continue;
  43       }
  44 */
  45
  46
  47       //////////////////////////////////////////////////////////
  48       // New linking scheme: links are in brackets. This will
  49       // emulate typical HTML linking as well as Wiki linking.
  50
  51       // match anything between brackets except only numbers
  52       // trying:
  53       $numBracketLinks = preg_match_all("/\[.+?\]/", $tmpline, $brktlinks);
  54       /* On 12 Jul,2000 Jeff <dairiki@dairiki.org> adds:
  55        *
  56        * Simple sorting doesnt work, since (in ASCII) '[' comes between
  57        * the upper- and lower-case characters.
  58        *
  59        * Using sort "[[Link] [Link]" will come out wrong, using
  60        * rsort "[[link] [link]" will come out wrong.
  61        * (An appropriate usort would work.)
  62        *
  63        * I've added a look-behind assertion to the preg_replace which,
  64        * I think, fixes the problem.  I only hope that all PHP versions
  65        * support look-behind assertions....
  66       // sort instead of rsort or "[[link] [link]" will be rendered wrong.
  67       sort($brktlinks[0]);
  68       reset($brktlinks[0]);
  69        */
  70
  71       for ($i = 0; $i < $numBracketLinks; $i++) {
  72          $brktlink = preg_quote($brktlinks[0][$i]);
  73          $linktoken = $FieldSeparator . $FieldSeparator . ++$ntokens . $FieldSeparator;
  74          /* PS:
  75           * If you're wondering about the double $FieldSeparator,
  76           * consider what happens to (the admittedly sick):
  77           *   "[Link1] [Link2]1[Link3]"
  78           *
  79           * Answer: without the double field separator, it gets
  80           *  tokenized to "%1% %2%1%3%" (using % to represent $FieldSeparator),
  81           *  which will get munged as soon as '%1%' is substituted with it's
  82           *  final value.
  83           */
  84          $tmpline = preg_replace("|(?<!\[)$brktlink|",
  85                                  $linktoken,
  86                                  $tmpline);
  87
  88          $tokens[] = $linktoken;
  89          $replacements[] = ParseAndLink($brktlinks[0][$i]);
  90       }
  91
  92       //////////////////////////////////////////////////////////
  93       // replace all URL's with tokens, so we don't confuse them
  94       // with Wiki words later. Wiki words in URL's break things.
  95
  96       $hasURLs = preg_match_all("/\b($AllowedProtocols):[^\s\<\>\[\]\"'\(\)]*[^\s\<\>\[\]\"'\(\)\,\.\?]/", $tmpline, $urls);
  97
  98       // have to sort, otherwise errors creep in when the domain appears
  99       // in two consecutive URL's on the same line, but the second is
 100       // longer e.g. http://c2.com followed by http://c2.com/wiki
 101       rsort($urls[0]);
 102       reset($urls[0]);
 103
 104       for ($i = 0; $i < $hasURLs; $i++) {
 105          $inplaceURL = preg_quote($urls[0][$i]);
 106          $URLtoken = $FieldSeparator . $FieldSeparator . ++$ntokens . $FieldSeparator;
 107          $tmpline = preg_replace("|$inplaceURL|",
 108                                  $URLtoken,
 109                                  $tmpline);
 110
 111          $tokens[] = $URLtoken;
 112          $replacements[] = LinkURL($urls[0][$i]);
 113       }
 114
 115       // escape HTML metachars
 116       $tmpline = ereg_replace("[&]", "&amp;", $tmpline);
 117       $tmpline = ereg_replace("[>]", "&gt;", $tmpline);
 118       $tmpline = ereg_replace("[<]", "&lt;", $tmpline);
 119
 120       // four or more dashes to <hr>
 121       $tmpline = ereg_replace("^-{4,}", "<hr>", $tmpline);
 122
 123
 124       // %%% are linebreaks
 125       $tmpline = str_replace("%%%", "<br>", $tmpline);
 126
 127       // bold italics
 128       $tmpline = preg_replace("|(''''')(.*?)(''''')|",
 129                               "<strong><em>\\2</em></strong>",
 130                               $tmpline);
 131
 132       // bold
 133       $tmpline = preg_replace("|(''')(.*?)(''')|",
 134                               "<strong>\\2</strong>",
 135                               $tmpline);
 136
 137       // bold
 138       $tmpline = preg_replace("|(__)(.*?)(__)|",
 139                               "<strong>\\2</strong>",
 140                               $tmpline);
 141
 142       // italics
 143       $tmpline = preg_replace("|('')(.*?)('')|",
 144                               "<em>\\2</em>",
 145                               $tmpline);
 146
 147       // Link Wiki words
 148       // Wikiwords preceeded by a '!' are not linked
 149       if (preg_match_all("#!?\b(([A-Z][a-z]+){2,})\b#",
 150                          $tmpline, $link)) {
 151          // uniq the list of matches
 152          unset($hash);
 153          for ($i = 0; $link[0][$i]; $i++) {
 154             // $realfile = $link[0][$i];
 155             $hash[$link[0][$i]]++;
 156          }
 157
 158          // all '!WikiName' entries are sorted first
 159          ksort($hash);
 160          while (list($realfile, $val) = each($hash)) {
 161             $token = $FieldSeparator . $FieldSeparator . ++$ntokens . $FieldSeparator;
 162             $tmpline = str_replace($realfile, $token, $tmpline);
 163             $tokens[] = $token;
 164             if (strstr($realfile, '!')) {
 165                $replacements[] = substr($realfile, 1);
 166             }
 167             elseif (IsWikiPage($dbi, $realfile)) {
 168                $replacements[] = LinkExistingWikiWord($realfile);
 169             } else {
 170                $replacements[] = LinkUnknownWikiWord($realfile);
 171             }
 172          }
 173       }
 174
 175       ///////////////////////////////////////////////////////
 176       // Replace tokens
 177       for ($i = 0; $i < $ntokens; $i++)
 178           $tmpline = str_replace($tokens[$i], $replacements[$i], $tmpline);
 179
 180
 181       // match and replace all user-defined links ([1], [2], [3]...)
 182       preg_match_all("|\[(\d+)\]|", $tmpline, $match);
 183       if (count($match[0])) {
 184          for ($k = 0; $k < count($match[0]); $k++) {
 185             if (! empty($embedded[$match[1][$k]])) {
 186                $linkpattern = preg_quote($match[0][$k]);
 187                $tmpline = preg_replace("|$linkpattern|",
 188                                        $embedded[$match[1][$k]],
 189                                        $tmpline);
 190             }
 191          }
 192       }
 193
 194       // HTML modes: pre, unordered/ordered lists, term/def
 195       if (preg_match("/(^\t)(.*?)(:\t)(.*$)/", $tmpline, $matches)) {
 196          // this is a dictionary list item
 197          $html .= SetHTMLOutputMode("dl", SINGLE_DEPTH, 1);
 198          $tmpline = "<dt>" . $matches[2] . "<dd>" . $matches[4];
 199
 200       // oops, the \d needed to be \d+, thanks alister@minotaur.nu
 201       } elseif (preg_match("/(^\t+)(\*|\d+|#)/", $tmpline, $matches)) {
 202          // this is part of a list
 203          $numtabs = strlen($matches[1]);
 204          if ($matches[2] == "*") {
 205             $listtag = "ul";
 206          } else {
 207             $listtag = "ol"; // a rather tacit assumption. oh well.
 208          }
 209          $tmpline = preg_replace("/^(\t+)(\*|\d+|#)/", "", $tmpline);
 210          $html .= SetHTMLOutputMode($listtag, SINGLE_DEPTH, $numtabs);
 211          $html .= "<li>";
 212
 213       // tabless markup for unordered and ordered lists
 214
 215       // first, unordered lists: one or more astericks at the
 216       // start of a line indicate a <UL> block
 217
 218       } elseif (preg_match("/^([*]+)/", $tmpline, $matches)) {
 219          // this is part of an unordered list
 220          $numtabs = strlen($matches[1]);
 221          $listtag = "ul";
 222
 223          $tmpline = preg_replace("/^([*]+)/", "", $tmpline);
 224          $html .= SetHTMLOutputMode($listtag, SINGLE_DEPTH, $numtabs);
 225          $html .= "<li>";
 226
 227       // second, ordered lists <OL>
 228       } elseif (preg_match("/^([#]+)/", $tmpline, $matches)) {
 229          // this is part of an ordered list
 230          $numtabs = strlen($matches[1]);
 231          $listtag = "ol";
 232
 233          $tmpline = preg_replace("/^([#]+)/", "", $tmpline);
 234          $html .= SetHTMLOutputMode($listtag, SINGLE_DEPTH, $numtabs);
 235          $html .= "<li>";
 236
 237
 238       } elseif (preg_match("/^\s+/", $tmpline)) {
 239          // this is preformatted text, i.e. <pre>
 240          $html .= SetHTMLOutputMode("pre", ZERO_DEPTH, 0);
 241
 242       } elseif (preg_match("/^(!{1,3})[^!]/", $tmpline, $whichheading)) {
 243          // lines starting with !,!!,!!! are headings
 244          if($whichheading[1] == '!') $heading = "h3";
 245          elseif($whichheading[1] == '!!') $heading = "h2";
 246          elseif($whichheading[1] == '!!!') $heading = "h1";
 247          $tmpline = preg_replace("/^!+/", "", $tmpline);
 248          $html .= SetHTMLOutputMode($heading, ZERO_DEPTH, 0);
 249
 250       } else {
 251          // it's ordinary output if nothing else
 252          $html .= SetHTMLOutputMode("", ZERO_DEPTH, 0);
 253       }
 254
 255       $tmpline = str_replace("%%Search%%", RenderQuickSearch(), $tmpline);
 256       $tmpline = str_replace("%%Fullsearch%%", RenderFullSearch(), $tmpline);
 257       $tmpline = str_replace("%%Mostpopular%%", RenderMostPopular(), $tmpline);
 258
 259       $html .= "$tmpline";
 260    }
 261
 262
 263    $html .= SetHTMLOutputMode("", ZERO_DEPTH, 0);
 264 ?>