lib/InlineParser.php

   1 <?php
   2
   3 /* Copyright (C) 2002 Geoffrey T. Dairiki <dairiki@dairiki.org>
   4  * Copyright (C) 2004-2010 Reini Urban
   5  * Copyright (C) 2008-2010 Marc-Etienne Vargenau, Alcatel-Lucent
   6  *
   7  * This file is part of PhpWiki.
   8  *
   9  * PhpWiki is free software; you can redistribute it and/or modify
  10  * it under the terms of the GNU General Public License as published by
  11  * the Free Software Foundation; either version 2 of the License, or
  12  * (at your option) any later version.
  13  *
  14  * PhpWiki is distributed in the hope that it will be useful,
  15  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  16  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  17  * GNU General Public License for more details.
  18  *
  19  * You should have received a copy of the GNU General Public License along
  20  * with PhpWiki; if not, write to the Free Software Foundation, Inc.,
  21  * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
  22  */
  23 /**
  24  * This is the code which deals with the inline part of the
  25  * wiki-markup.
  26  *
  27  * @package Markup
  28  * @author Geoffrey T. Dairiki, Reini Urban
  29  */
  30
  31 /**
  32  * This is the character used in wiki markup to escape characters with
  33  * special meaning.
  34  */
  35 define('ESCAPE_CHAR', '~');
  36
  37 require_once 'lib/CachedMarkup.php';
  38 require_once(dirname(__FILE__) . '/stdlib.php');
  39
  40 function WikiEscape($text)
  41 {
  42     return str_replace('#', ESCAPE_CHAR . '#', $text);
  43 }
  44
  45 function UnWikiEscape($text)
  46 {
  47     return preg_replace('/' . ESCAPE_CHAR . '(.)/', '\1', $text);
  48 }
  49
  50 /**
  51  * Return type from RegexpSet::match and RegexpSet::nextMatch.
  52  *
  53  * @see RegexpSet
  54  */
  55 class RegexpSet_match
  56 {
  57     /**
  58      * The text leading up the the next match.
  59      */
  60     public $prematch;
  61     /**
  62      * The matched text.
  63      */
  64     public $match;
  65     /**
  66      * The text following the matched text.
  67      */
  68     public $postmatch;
  69     /**
  70      * Index of the regular expression which matched.
  71      */
  72     public $regexp_ind;
  73 }
  74
  75 /**
  76  * A set of regular expressions.
  77  *
  78  * This class is probably only useful for InlineTransformer.
  79  */
  80 class RegexpSet
  81 {
  82     /**
  83      * @param array $regexps A list of regular expressions.  The
  84      * regular expressions should not include any sub-pattern groups
  85      * "(...)".  (Anonymous groups, like "(?:...)", as well as
  86      * look-ahead and look-behind assertions are okay.)
  87      */
  88     function __construct($regexps)
  89     {
  90         assert($regexps);
  91         $this->_regexps = array_unique($regexps);
  92         if (!defined('_INLINE_OPTIMIZATION')) define('_INLINE_OPTIMIZATION', 0);
  93     }
  94
  95     /**
  96      * Search text for the next matching regexp from the Regexp Set.
  97      *
  98      * @param string $text The text to search.
  99      *
 100      * @return RegexpSet_match A RegexpSet_match object, or false if no match.
 101      */
 102     function match($text)
 103     {
 104         return $this->_match($text, $this->_regexps, '*?');
 105     }
 106
 107     /**
 108      * Search for next matching regexp.
 109      *
 110      * Here, 'next' has two meanings:
 111      *
 112      * Match the next regexp(s) in the set, at the same position as the last match.
 113      *
 114      * If that fails, match the whole RegexpSet, starting after the position of the
 115      * previous match.
 116      *
 117      * @param string $text Text to search.
 118      *
 119      * @param RegexpSet_match $prevMatch A RegexpSet_match object.
 120      * $prevMatch should be a match object obtained by a previous
 121      * match upon the same value of $text.
 122      *
 123      * @return RegexpSet_match A RegexpSet_match object, or false if no match.
 124      */
 125     function nextMatch($text, $prevMatch)
 126     {
 127         // Try to find match at same position.
 128         $pos = strlen($prevMatch->prematch);
 129         $regexps = array_slice($this->_regexps, $prevMatch->regexp_ind + 1);
 130         if ($regexps) {
 131             $repeat = sprintf('{%d}', $pos);
 132             if (($match = $this->_match($text, $regexps, $repeat))) {
 133                 $match->regexp_ind += $prevMatch->regexp_ind + 1;
 134                 return $match;
 135             }
 136
 137         }
 138
 139         // Failed.  Look for match after current position.
 140         $repeat = sprintf('{%d,}?', $pos + 1);
 141         return $this->_match($text, $this->_regexps, $repeat);
 142     }
 143
 144     // Syntax: http://www.pcre.org/pcre.txt
 145     //   x - EXTENDED, ignore whitespace
 146     //   s - DOTALL
 147     //   A - ANCHORED
 148     //   S - STUDY
 149     function _match($text, $regexps, $repeat)
 150     {
 151         // If one of the regexps is an empty string, php will crash here:
 152         // sf.net: Fatal error: Allowed memory size of 8388608 bytes exhausted
 153         //         (tried to allocate 634 bytes)
 154         if (_INLINE_OPTIMIZATION) { // disabled, wrong
 155             // So we try to minize memory usage, by looping explicitly,
 156             // and storing only those regexp which actually match.
 157             // There may be more than one, so we have to find the longest,
 158             // and match inside until the shortest is empty.
 159             $matched = array();
 160             $matched_ind = array();
 161             for ($i = 0; $i < count($regexps); $i++) {
 162                 if (!trim($regexps[$i])) {
 163                     trigger_error("empty regexp $i", E_USER_WARNING);
 164                     continue;
 165                 }
 166                 $pat = "/ ( . $repeat ) ( " . $regexps[$i] . " ) /x";
 167                 if (preg_match($pat, $text, $_m)) {
 168                     $m = $_m; // FIXME: prematch, postmatch is wrong
 169                     $matched[] = $regexps[$i];
 170                     $matched_ind[] = $i;
 171                     $regexp_ind = $i;
 172                 }
 173             }
 174             // To overcome ANCHORED:
 175             // We could sort by longest match and iterate over these.
 176             if (empty($matched)) return false;
 177         }
 178         $match = new RegexpSet_match;
 179
 180         // Optimization: if the matches are only "$" and another, then omit "$"
 181         if (!_INLINE_OPTIMIZATION or count($matched) > 2) {
 182             assert(!empty($repeat));
 183             assert(!empty($regexps));
 184             // We could do much better, if we would know the matching markup for the
 185             // longest regexp match:
 186             $hugepat = "/ ( . $repeat ) ( (" . join(')|(', $regexps) . ") ) /Asx";
 187             // Proposed premature optimization 1:
 188             //$hugepat= "/ ( . $repeat ) ( (" . join(')|(', array_values($matched)) . ") ) /Asx";
 189             if (!preg_match($hugepat, $text, $m)) {
 190                 return false;
 191             }
 192             // Proposed premature optimization 1:
 193             //$match->regexp_ind = $matched_ind[count($m) - 4];
 194             $match->regexp_ind = count($m) - 4;
 195         } else {
 196             $match->regexp_ind = $regexp_ind;
 197         }
 198
 199         $match->postmatch = substr($text, strlen($m[0]));
 200         $match->prematch = $m[1];
 201         $match->match = $m[2];
 202
 203         /* DEBUGGING */
 204         if (DEBUG & _DEBUG_PARSER) {
 205             static $_already_dumped = 0;
 206             if (!$_already_dumped) {
 207                 var_dump($regexps);
 208                 if (_INLINE_OPTIMIZATION)
 209                     var_dump($matched);
 210                 var_dump($matched_ind);
 211             }
 212             $_already_dumped = 1;
 213             PrintXML(HTML::dl(HTML::dt("input"),
 214                 HTML::dd(HTML::pre($text)),
 215                 HTML::dt("regexp"),
 216                 HTML::dd(HTML::pre($match->regexp_ind, ":", $regexps[$match->regexp_ind])),
 217                 HTML::dt("prematch"),
 218                 HTML::dd(HTML::pre($match->prematch)),
 219                 HTML::dt("match"),
 220                 HTML::dd(HTML::pre($match->match)),
 221                 HTML::dt("postmatch"),
 222                 HTML::dd(HTML::pre($match->postmatch))
 223             ));
 224         }
 225         return $match;
 226     }
 227 }
 228
 229 /**
 230  * A simple markup rule (i.e. terminal token).
 231  *
 232  * These are defined by a regexp.
 233  *
 234  * When a match is found for the regexp, the matching text is replaced.
 235  * The replacement content is obtained by calling the SimpleMarkup::markup method.
 236  */
 237 class SimpleMarkup
 238 {
 239     public $_match_regexp;
 240
 241     /** Get regexp.
 242      *
 243      * @return string Regexp which matches this token.
 244      */
 245     function getMatchRegexp()
 246     {
 247         return $this->_match_regexp;
 248     }
 249
 250     /** Markup matching text.
 251      *
 252      * @param string $match The text which matched the regexp
 253      * (obtained from getMatchRegexp).
 254      *
 255      * @return mixed The expansion of the matched text.
 256      */
 257     function markup($match /*, $body */)
 258     {
 259         trigger_error("pure virtual", E_USER_ERROR);
 260     }
 261 }
 262
 263 /**
 264  * A balanced markup rule.
 265  *
 266  * These are defined by a start regexp, and an end regexp.
 267  */
 268 class BalancedMarkup
 269 {
 270     public $_start_regexp;
 271
 272     /** Get the starting regexp for this rule.
 273      *
 274      * @return string The starting regexp.
 275      */
 276     function getStartRegexp()
 277     {
 278         return $this->_start_regexp;
 279     }
 280
 281     /** Get the ending regexp for this rule.
 282      *
 283      * @param string $match The text which matched the starting regexp.
 284      *
 285      * @return string The ending regexp.
 286      */
 287     function getEndRegexp($match)
 288     {
 289         return $this->_end_regexp;
 290     }
 291
 292     /** Get expansion for matching input.
 293      *
 294      * @param string $match The text which matched the starting regexp.
 295      *
 296      * @param mixed $body Transformed text found between the starting
 297      * and ending regexps.
 298      *
 299      * @return mixed The expansion of the matched text.
 300      */
 301     function markup($match, $body)
 302     {
 303         trigger_error("pure virtual", E_USER_ERROR);
 304     }
 305 }
 306
 307 class Markup_escape extends SimpleMarkup
 308 {
 309     function getMatchRegexp()
 310     {
 311         return ESCAPE_CHAR . '(?: [[:alnum:]]+ | .)';
 312     }
 313
 314     function markup($match)
 315     {
 316         assert(strlen($match) >= 2);
 317         return substr($match, 1);
 318     }
 319 }
 320
 321 /**
 322  * [image.jpg size=50% border=5], [image.jpg size=50x30]
 323  * Support for the following attributes: see stdlib.php:LinkImage()
 324  *   size=<percent>%, size=<width>x<height>
 325  *   border=n, align=\w+, hspace=n, vspace=n
 326  *   width=n, height=n
 327  *   title, lang, id, alt
 328  */
 329 function isImageLink($link)
 330 {
 331     if (!$link) return false;
 332     assert(defined('INLINE_IMAGES'));
 333     return preg_match("/\\.(" . INLINE_IMAGES . ")$/i", $link)
 334         or preg_match("/\\.(" . INLINE_IMAGES . ")\s+(size|border|align|hspace|vspace|type|data|width|height|title|lang|id|alt)=/i", $link);
 335 }
 336
 337 function LinkBracketLink($bracketlink)
 338 {
 339
 340     // $bracketlink will start and end with brackets; in between will
 341     // be either a page name, a URL or both separated by a pipe.
 342
 343     $wikicreolesyntax = false;
 344
 345     if (string_starts_with($bracketlink, "[[") or string_starts_with($bracketlink, "#[[")) {
 346         $wikicreolesyntax = true;
 347         $bracketlink = str_replace("[[", "[", $bracketlink);
 348         $bracketlink = str_replace("]]", "]", $bracketlink);
 349     }
 350
 351     // Strip brackets and leading space
 352     // bug#1904088  Some brackets links on 2 lines cause the parser to crash
 353     preg_match('/(\#?) \[\s* (?: (.*?) \s* (?<!' . ESCAPE_CHAR . ')(\|) )? \s* (.+?) \s*\]/x',
 354         str_replace("\n", " ", $bracketlink), $matches);
 355     if (count($matches) < 4) {
 356         // "[ personal\ninformation manager | PhpWiki:PersonalWiki ]"
 357         trigger_error(_("Invalid [] syntax ignored") . _(": ") . $bracketlink, E_USER_WARNING);
 358         return new Cached_Link;
 359     }
 360     list (, $hash, $label, $bar, $rawlink) = $matches;
 361
 362     if ($wikicreolesyntax and $label) {
 363         $temp = $label;
 364         $label = $rawlink;
 365         $rawlink = $temp;
 366     }
 367
 368     // Mediawiki compatibility: allow "Image:" and "File:"
 369     // as synonyms of "Upload:"
 370     // Allow "upload:", "image:" and "file:" also
 371     // Remove spaces before and after ":", if any
 372     if (string_starts_with($rawlink, "Upload")) {
 373         $rawlink = preg_replace("/^Upload\\s*:\\s*/", "Upload:", $rawlink);
 374     } elseif (string_starts_with($rawlink, "upload")) {
 375         $rawlink = preg_replace("/^upload\\s*:\\s*/", "Upload:", $rawlink);
 376     } elseif (string_starts_with($rawlink, "Image")) {
 377         $rawlink = preg_replace("/^Image\\s*:\\s*/", "Upload:", $rawlink);
 378     } elseif (string_starts_with($rawlink, "image")) {
 379         $rawlink = preg_replace("/^image\\s*:\\s*/", "Upload:", $rawlink);
 380     } elseif (string_starts_with($rawlink, "File")) {
 381         $rawlink = preg_replace("/^File\\s*:\\s*/", "Upload:", $rawlink);
 382     } elseif (string_starts_with($rawlink, "file")) {
 383         $rawlink = preg_replace("/^file\\s*:\\s*/", "Upload:", $rawlink);
 384     }
 385
 386     $label = UnWikiEscape($label);
 387     /*
 388      * Check if the user has typed a explicit URL. This solves the
 389      * problem where the URLs have a ~ character, which would be stripped away.
 390      *   "[http:/server/~name/]" will work as expected
 391      *   "http:/server/~name/"   will NOT work as expected, will remove the ~
 392      */
 393     if (string_starts_with($rawlink, "http://")
 394         or string_starts_with($rawlink, "https://")
 395     ) {
 396         $link = $rawlink;
 397         // Mozilla Browser URI Obfuscation Weakness 2004-06-14
 398         //   http://www.securityfocus.com/bid/10532/
 399         //   goodurl+"%2F%20%20%20."+badurl
 400         if (preg_match("/%2F(%20)+\./i", $rawlink)) {
 401             $rawlink = preg_replace("/%2F(%20)+\./i", "%2F.", $rawlink);
 402         }
 403     } else {
 404         // Check page name lenght
 405         if (!string_starts_with($rawlink, "Upload:")) {
 406             if (strlen($rawlink) > MAX_PAGENAME_LENGTH) {
 407                 return HTML::span(array('class' => 'error'),
 408                     _('Page name too long'));
 409             }
 410         }
 411         // Check illegal characters in page names: <>[]{}|"
 412         if (preg_match("/[<\[\{\|\"\}\]>]/", $rawlink, $matches) > 0) {
 413             return HTML::span(array('class' => 'error'),
 414                 sprintf(_("Illegal character “%s” in page name."),
 415                     $matches[0]));
 416         }
 417         $link = UnWikiEscape($rawlink);
 418     }
 419
 420     /* Relatives links by Joel Schaubert.
 421      * Recognize [../bla] or [/bla] as relative links, without needing http://
 422      * but [ /link ] only if SUBPAGE_SEPERATOR is not "/".
 423      * Normally /Page links to the subpage /Page.
 424      */
 425     if (SUBPAGE_SEPARATOR == '/') {
 426         if (preg_match('/^\.\.\//', $link)) {
 427             return new Cached_ExternalLink($link, $label);
 428         }
 429     } elseif (preg_match('/^(\.\.\/|\/)/', $link)) {
 430         return new Cached_ExternalLink($link, $label);
 431     }
 432
 433     // Handle "[[SandBox|{{image.jpg}}]]" and "[[SandBox|{{image.jpg|alt text}}]]"
 434     if (string_starts_with($label, "{{")) {
 435         $imgurl = substr($label, 2, -2); // Remove "{{" and "}}"
 436         $pipe = strpos($imgurl, '|');
 437         if ($pipe === false) {
 438             $label = LinkImage(getUploadDataPath() . $imgurl, $link);
 439         } else {
 440             list($img, $alt) = explode("|", $imgurl);
 441             $label = LinkImage(getUploadDataPath() . $img, $alt);
 442         }
 443     } else
 444
 445         // [label|link]
 446         // If label looks like a url to an image or object, we want an image link.
 447         if (isImageLink($label)) {
 448             $imgurl = $label;
 449             $intermap = getInterwikiMap();
 450             if (preg_match("/^" . $intermap->getRegexp() . ":/", $label)) {
 451                 $imgurl = $intermap->link($label);
 452                 $imgurl = $imgurl->getAttr('href');
 453             } elseif (!preg_match("#^(" . ALLOWED_PROTOCOLS . "):#", $imgurl)) {
 454                 // local theme linkname like 'images/next.gif'.
 455                 global $WikiTheme;
 456                 $imgurl = $WikiTheme->getImageURL($imgurl);
 457             }
 458             // for objects (non-images) the link is taken as alt tag,
 459             // which is in return taken as alternative img
 460             $label = LinkImage($imgurl, $link);
 461         }
 462
 463     if ($hash) {
 464         // It's an anchor, not a link...
 465         $id = MangleXmlIdentifier($link);
 466         return HTML::a(array('id' => $id), $bar ? $label : $link);
 467     }
 468
 469     if (preg_match("#^(" . ALLOWED_PROTOCOLS . "):#", $link)) {
 470         // if it's an image, embed it; otherwise, it's a regular link
 471         if (isImageLink($link) and empty($label)) // patch #1348996 by Robert Litwiniec
 472             return LinkImage($link, $label);
 473         else
 474             return new Cached_ExternalLink($link, $label);
 475     } elseif (substr($link, 0, 8) == 'phpwiki:')
 476         return new Cached_PhpwikiURL($link, $label); /* Semantic relations and attributes.
 477      * Relation and attribute names must be word chars only, no space.
 478      * Links and Attributes may contain everything. word, nums, units, space, groupsep, numsep, ...
 479      */
 480     elseif (preg_match("/^ (\w+) (:[:=]) (.*) $/x", $link) and !isImageLink($link))
 481         return new Cached_SemanticLink($link, $label); /* Do not store the link */
 482     elseif (substr($link, 0, 1) == ':')
 483         return new Cached_WikiLink($link, $label); /*
 484      * Inline images in Interwiki urls's:
 485      * [File:my_image.gif] inlines the image,
 486      * File:my_image.gif shows a plain inter-wiki link,
 487      * [what a pic|File:my_image.gif] shows a named inter-wiki link to the gif
 488      * [File:my_image.gif|what a pic] shows an inlined image linked to the page "what a pic"
 489      *
 490      * Note that for simplicity we will accept embedded object tags (non-images)
 491      * here also, and separate them later in LinkImage()
 492      */
 493     elseif (strstr($link, ':')
 494         and ($intermap = getInterwikiMap())
 495             and preg_match("/^" . $intermap->getRegexp() . ":/", $link)
 496     ) {
 497         // trigger_error("label: $label link: $link", E_USER_WARNING);
 498         if (empty($label) and isImageLink($link)) {
 499             // if without label => inlined image [File:xx.gif]
 500             $imgurl = $intermap->link($link);
 501             return LinkImage($imgurl->getAttr('href'));
 502         }
 503         return new Cached_InterwikiLink($link, $label);
 504     } else {
 505         // Split anchor off end of pagename.
 506         if (preg_match('/\A(.*)(?<!' . ESCAPE_CHAR . ')#(.*?)\Z/', $rawlink, $m)) {
 507             list(, $rawlink, $anchor) = $m;
 508             $pagename = UnWikiEscape($rawlink);
 509             $anchor = UnWikiEscape($anchor);
 510             if (!$label)
 511                 $label = $link;
 512         } else {
 513             $pagename = $link;
 514             $anchor = false;
 515         }
 516         return new Cached_WikiLink($pagename, $label, $anchor);
 517     }
 518 }
 519
 520 class Markup_wikicreolebracketlink extends SimpleMarkup
 521 {
 522     public $_match_regexp = "\\#? \\[\\[ .*? [^]\\s] .*? \\]\\]";
 523
 524     function markup($match)
 525     {
 526         $link = LinkBracketLink($match);
 527         assert($link->isInlineElement());
 528         return $link;
 529     }
 530 }
 531
 532 class Markup_bracketlink extends SimpleMarkup
 533 {
 534     public $_match_regexp = "\\#? \\[ .*? [^]\\s] .*? \\]";
 535
 536     function markup($match)
 537     {
 538         $link = LinkBracketLink($match);
 539         assert($link->isInlineElement());
 540         return $link;
 541     }
 542 }
 543
 544 class Markup_spellcheck extends SimpleMarkup
 545 {
 546     function __construct()
 547     {
 548         $this->suggestions = $GLOBALS['request']->getArg('suggestions');
 549     }
 550
 551     function getMatchRegexp()
 552     {
 553         if (empty($this->suggestions))
 554             return "(?# false )";
 555         $words = array_keys($this->suggestions);
 556         return "(?<= \W ) (?:" . join('|', $words) . ") (?= \W )";
 557     }
 558
 559     function markup($match)
 560     {
 561         if (empty($this->suggestions) or empty($this->suggestions[$match]))
 562             return $match;
 563         return new Cached_SpellCheck(UnWikiEscape($match), $this->suggestions[$match]);
 564     }
 565 }
 566
 567 class Markup_searchhighlight extends SimpleMarkup
 568 {
 569     function __construct()
 570     {
 571         $result = $GLOBALS['request']->_searchhighlight;
 572         require_once 'lib/TextSearchQuery.php';
 573         $query = new TextSearchQuery($result['query']);
 574         $this->hilight_re = $query->getHighlightRegexp();
 575         $this->engine = $result['engine'];
 576     }
 577
 578     function getMatchRegexp()
 579     {
 580         return $this->hilight_re;
 581     }
 582
 583     function markup($match)
 584     {
 585         return new Cached_SearchHighlight(UnWikiEscape($match), $this->engine);
 586     }
 587 }
 588
 589 class Markup_url extends SimpleMarkup
 590 {
 591     function getMatchRegexp()
 592     {
 593         return "(?<![[:alnum:]]) (?:" . ALLOWED_PROTOCOLS . ") : [^\s<>\"']+ (?<![ ,.?; \] \) ])";
 594     }
 595
 596     function markup($match)
 597     {
 598         return new Cached_ExternalLink(UnWikiEscape($match));
 599     }
 600 }
 601
 602 class Markup_interwiki extends SimpleMarkup
 603 {
 604     function getMatchRegexp()
 605     {
 606         $map = getInterwikiMap();
 607         return "(?<! [[:alnum:]])" . $map->getRegexp() . ": [^:=]\S+ (?<![ ,.?;! \] \) \" \' ])";
 608     }
 609
 610     function markup($match)
 611     {
 612         return new Cached_InterwikiLink(UnWikiEscape($match));
 613     }
 614 }
 615
 616 class Markup_semanticlink extends SimpleMarkup
 617 {
 618     // No units separated by space allowed here
 619     // For :: (relations) only words, no comma,
 620     // but for := (attributes) comma and dots are allowed. Units with groupsep.
 621     // Ending dots or comma are not part of the link.
 622     public $_match_regexp = "(?: \w+:=\S+(?<![\.,]))|(?: \w+::[\w\.]+(?<!\.))";
 623
 624     function markup($match)
 625     {
 626         return new Cached_SemanticLink(UnWikiEscape($match));
 627     }
 628 }
 629
 630 class Markup_wikiword extends SimpleMarkup
 631 {
 632     function getMatchRegexp()
 633     {
 634         global $WikiNameRegexp;
 635         if (!trim($WikiNameRegexp)) return " " . WIKI_NAME_REGEXP;
 636         return " $WikiNameRegexp";
 637     }
 638
 639     function markup($match)
 640     {
 641         if (!$match) return false;
 642         if ($this->_isWikiUserPage($match))
 643             return new Cached_UserLink($match); //$this->_UserLink($match);
 644         else
 645             return new Cached_WikiLink($match);
 646     }
 647
 648     // FIXME: there's probably a more useful place to put these two functions
 649     function _isWikiUserPage($page)
 650     {
 651         global $request;
 652         $dbi = $request->getDbh();
 653         $page_handle = $dbi->getPage($page);
 654         if ($page_handle and $page_handle->get('pref'))
 655             return true;
 656         else
 657             return false;
 658     }
 659
 660     function _UserLink($PageName)
 661     {
 662         $link = HTML::a(array('href' => $PageName));
 663         $link->pushContent(PossiblyGlueIconToText('wikiuser', $PageName));
 664         $link->setAttr('class', 'wikiuser');
 665         return $link;
 666     }
 667 }
 668
 669 class Markup_linebreak extends SimpleMarkup
 670 {
 671     public $_match_regexp = "(?: (?<! %) %%% (?! %) | \\\\\\\\ | <\s*(?:br|BR)\s*> | <\s*(?:br|BR)\s*\/\s*> )";
 672
 673     function markup($match)
 674     {
 675         return HTML::br();
 676     }
 677 }
 678
 679 class Markup_wikicreole_italics extends BalancedMarkup
 680 {
 681     public $_start_regexp = "\\/\\/";
 682
 683     function getEndRegexp($match)
 684     {
 685         return "\\/\\/";
 686     }
 687
 688     function markup($match, $body)
 689     {
 690         $tag = 'em';
 691         return new HtmlElement($tag, $body);
 692     }
 693 }
 694
 695 class Markup_wikicreole_bold extends BalancedMarkup
 696 {
 697     public $_start_regexp = "\\*\\*";
 698
 699     function getEndRegexp($match)
 700     {
 701         return "\\*\\*";
 702     }
 703
 704     function markup($match, $body)
 705     {
 706         $tag = 'strong';
 707         return new HtmlElement($tag, $body);
 708     }
 709 }
 710
 711 class Markup_wikicreole_monospace extends BalancedMarkup
 712 {
 713     public $_start_regexp = "\\#\\#";
 714
 715     function getEndRegexp($match)
 716     {
 717         return "\\#\\#";
 718     }
 719
 720     function markup($match, $body)
 721     {
 722         return new HtmlElement('span', array('class' => 'tt'), $body);
 723     }
 724 }
 725
 726 class Markup_wikicreole_underline extends BalancedMarkup
 727 {
 728     public $_start_regexp = "\\_\\_";
 729
 730     function getEndRegexp($match)
 731     {
 732         return "\\_\\_";
 733     }
 734
 735     function markup($match, $body)
 736     {
 737         $tag = 'u';
 738         return new HtmlElement($tag, $body);
 739     }
 740 }
 741
 742 class Markup_wikicreole_superscript extends BalancedMarkup
 743 {
 744     public $_start_regexp = "\\^\\^";
 745
 746     function getEndRegexp($match)
 747     {
 748         return "\\^\\^";
 749     }
 750
 751     function markup($match, $body)
 752     {
 753         $tag = 'sup';
 754         return new HtmlElement($tag, $body);
 755     }
 756 }
 757
 758 class Markup_wikicreole_subscript extends BalancedMarkup
 759 {
 760     public $_start_regexp = ",,";
 761
 762     function getEndRegexp($match)
 763     {
 764         return $match;
 765     }
 766
 767     function markup($match, $body)
 768     {
 769         $tag = 'sub';
 770         return new HtmlElement($tag, $body);
 771     }
 772 }
 773
 774 class Markup_old_emphasis extends BalancedMarkup
 775 {
 776     public $_start_regexp = "''";
 777
 778     function getEndRegexp($match)
 779     {
 780         return $match;
 781     }
 782
 783     function markup($match, $body)
 784     {
 785         $tag = 'em';
 786         return new HtmlElement($tag, $body);
 787     }
 788 }
 789
 790 class Markup_nestled_emphasis extends BalancedMarkup
 791 {
 792     function getStartRegexp()
 793     {
 794         static $start_regexp = false;
 795
 796         if (!$start_regexp) {
 797             // The three possible delimiters
 798             // (none of which can be followed by itself.)
 799             $i = "_ (?! _)";
 800             $b = "\\* (?! \\*)";
 801             $tt = "= (?! =)";
 802
 803             $any = "(?: ${i}|${b}|${tt})"; // any of the three.
 804
 805             // Any of [_*=] is okay if preceded by space or one of [-"'/:]
 806             $start[] = "(?<= \\s|^|[-\"'\\/:]) ${any}";
 807
 808             // _ or * is okay after = as long as not immediately followed by =
 809             $start[] = "(?<= =) (?: ${i}|${b}) (?! =)";
 810             // etc...
 811             $start[] = "(?<= _) (?: ${b}|${tt}) (?! _)";
 812             $start[] = "(?<= \\*) (?: ${i}|${tt}) (?! \\*)";
 813
 814             // any delimiter okay after an opening brace ( [{<(] )
 815             // as long as it's not immediately followed by the matching closing
 816             // brace.
 817             $start[] = "(?<= { ) ${any} (?! } )";
 818             $start[] = "(?<= < ) ${any} (?! > )";
 819             $start[] = "(?<= \\( ) ${any} (?! \\) )";
 820
 821             $start = "(?:" . join('|', $start) . ")";
 822
 823             // Any of the above must be immediately followed by non-whitespace.
 824             $start_regexp = $start . "(?= \S)";
 825         }
 826
 827         return $start_regexp;
 828     }
 829
 830     function getEndRegexp($match)
 831     {
 832         $chr = preg_quote($match);
 833         return "(?<= \S | ^ ) (?<! $chr) $chr (?! $chr) (?= \s | [-)}>\"'\\/:.,;!? _*=] | $)";
 834     }
 835
 836     function markup($match, $body)
 837     {
 838         switch ($match) {
 839             case '*':
 840                 return new HtmlElement('b', $body);
 841             case '=':
 842                 return new HtmlElement('span', array('class' => 'tt'), $body);
 843             case '_':
 844                 return new HtmlElement('i', $body);
 845         }
 846     }
 847 }
 848
 849 class Markup_html_emphasis extends BalancedMarkup
 850 {
 851     public $_start_regexp =
 852         "<(?: b|big|i|small|tt|em|strong|cite|code|dfn|kbd|samp|s|strike|del|var|sup|sub )>";
 853
 854     function getEndRegexp($match)
 855     {
 856         return "<\\/" . substr($match, 1);
 857     }
 858
 859     function markup($match, $body)
 860     {
 861         $tag = substr($match, 1, -1);
 862         if (($tag == 'big') || ($tag == 'strike') || ($tag == 'tt')) {
 863             return new HtmlElement('span', array('class' => $tag), $body);
 864         }
 865         return new HtmlElement($tag, $body);
 866     }
 867 }
 868
 869 class Markup_html_divspan extends BalancedMarkup
 870 {
 871     public $_start_regexp =
 872         "<(?: div|span )(?: \s[^>]*)?>";
 873
 874     function getEndRegexp($match)
 875     {
 876         if (substr($match, 1, 4) == 'span')
 877             $tag = 'span';
 878         else
 879             $tag = 'div';
 880         return "<\\/" . $tag . '>';
 881     }
 882
 883     function markup($match, $body)
 884     {
 885         if (substr($match, 1, 4) == 'span')
 886             $tag = 'span';
 887         else
 888             $tag = 'div';
 889         $rest = substr($match, 1 + strlen($tag), -1);
 890         if (!empty($rest)) {
 891             $args = parse_attributes($rest);
 892         } else {
 893             $args = array();
 894         }
 895         return new HtmlElement($tag, $args, $body);
 896     }
 897 }
 898
 899 class Markup_html_abbr extends BalancedMarkup
 900 {
 901     //rurban: abbr|acronym need an optional title tag.
 902     //sf.net bug #728595
 903     public $_start_regexp = "<(?: abbr|acronym )(?: [^>]*)?>";
 904
 905     function getEndRegexp($match)
 906     {
 907         if (substr($match, 1, 4) == 'abbr')
 908             $tag = 'abbr';
 909         else
 910             $tag = 'acronym';
 911         return "<\\/" . $tag . '>';
 912     }
 913
 914     function markup($match, $body)
 915     {
 916         // 'acronym' is deprecated in HTML 5, replace by 'abbr'
 917         $tag = 'abbr';
 918         $rest = substr($match, 1 + strlen($tag), -1);
 919         $attrs = parse_attributes($rest);
 920         // Remove attributes other than title and lang
 921         $allowedargs = array();
 922         foreach ($attrs as $key => $value) {
 923             if (in_array($key, array("title", "lang"))) {
 924                 $allowedargs[$key] = $value;
 925             }
 926         }
 927         return new HtmlElement($tag, $allowedargs, $body);
 928     }
 929 }
 930
 931 /** ENABLE_MARKUP_COLOR
 932  *  See http://www.pmwiki.org/wiki/PmWiki/WikiStyles and
 933  *      http://www.flexwiki.com/default.aspx/FlexWiki/FormattingRules.html
 934  */
 935 class Markup_color extends BalancedMarkup
 936 {
 937     // %color=blue% blue text %% and back to normal
 938     public $_start_regexp = "%color=(?: [^%]*)%";
 939     public $_end_regexp = "%%";
 940
 941     function markup($match, $body)
 942     {
 943         $color = strtolower(substr($match, 7, -1));
 944
 945         $morecolors = array('beige' => '#f5f5dc',
 946             'brown' => '#a52a2a',
 947             'chocolate' => '#d2691e',
 948             'cyan' => '#00ffff',
 949             'gold' => '#ffd700',
 950             'ivory' => '#fffff0',
 951             'indigo' => '#4b0082',
 952             'magenta' => '#ff00ff',
 953             'orange' => '#ffa500',
 954             'pink' => '#ffc0cb',
 955             'salmon' => '#fa8072',
 956             'snow' => '#fffafa',
 957             'turquoise' => '#40e0d0',
 958             'violet' => '#ee82ee',
 959         );
 960
 961         if (isset($morecolors[$color])) {
 962             $color = $morecolors[$color];
 963         }
 964
 965         // HTML 4 defines the following 16 colors
 966         if (in_array($color, array('aqua', 'black', 'blue', 'fuchsia',
 967             'gray', 'green', 'lime', 'maroon',
 968             'navy', 'olive', 'purple', 'red',
 969             'silver', 'teal', 'white', 'yellow'))
 970             or ((substr($color, 0, 1) == '#')
 971                 and ((strlen($color) == 4) or (strlen($color) == 7))
 972                     and (strspn(substr($color, 1), '0123456789abcdef') == strlen($color) - 1))
 973         ) {
 974             return new HtmlElement('span', array('style' => "color: $color"), $body);
 975         } else {
 976             return new HtmlElement('span', array('class' => 'error'),
 977                 sprintf(_("unknown color %s ignored"), substr($match, 7, -1)));
 978         }
 979     }
 980 }
 981
 982 // Wikicreole placeholder
 983 // <<<placeholder>>>
 984 class Markup_placeholder extends SimpleMarkup
 985 {
 986     public $_match_regexp = '<<<.*?>>>';
 987
 988     function markup($match)
 989     {
 990         return HTML::span($match);
 991     }
 992 }
 993
 994 // Single-line HTML comment
 995 // <!-- This is a comment -->
 996 class Markup_html_comment extends SimpleMarkup
 997 {
 998     public $_match_regexp = '<!--.*?-->';
 999
1000     function markup($match)
1001     {
1002         return HTML::raw('');
1003     }
1004 }
1005
1006 // Special version for single-line plugins formatting,
1007 //  like: '<small>< ?plugin PopularNearby ? ></small>'
1008 class Markup_plugin extends SimpleMarkup
1009 {
1010     public $_match_regexp = '<\?plugin(?:-form)?\s[^\n]+?\?>';
1011
1012     function markup($match)
1013     {
1014         return new Cached_PluginInvocation($match);
1015     }
1016 }
1017
1018 // Special version for single-line Wikicreole plugins formatting.
1019 class Markup_plugin_wikicreole extends SimpleMarkup
1020 {
1021     public $_match_regexp = '<<[^\n]+?>>';
1022
1023     function markup($match)
1024     {
1025         $pi = str_replace("<<", "<?plugin ", $match);
1026         $pi = str_replace(">>", " ?>", $pi);
1027         return new Cached_PluginInvocation($pi);
1028     }
1029 }
1030
1031 // Special version for plugins in xml syntax, mediawiki-style
1032 // <name arg=value>body</name> or <name /> => < ? plugin pluginname arg=value body ? >
1033 // PLUGIN_MARKUP_MAP = "html:RawHtml dot:GraphViz toc:CreateToc amath:AsciiMath richtable:RichTable include:IncludePage tex:TexToPng"
1034 class Markup_xml_plugin extends BalancedMarkup
1035 {
1036     //public $_start_regexp = "<(?: ".join('|',PLUGIN_MARKUP_MAP)." )(?: \s[^>]*)>";
1037
1038     function getStartRegexp()
1039     {
1040         global $PLUGIN_MARKUP_MAP;
1041         static $_start_regexp;
1042         if ($_start_regexp) return $_start_regexp;
1043         if (empty($PLUGIN_MARKUP_MAP)) return '';
1044         //"<(?: html|search|extsearch|dot|toc|math|richtable|include|tex )(?: \s[^>]*)>"
1045         $_start_regexp = "<(?: " . join('|', array_keys($PLUGIN_MARKUP_MAP)) . " )(?: \s[^>]*|\\/ )>";
1046         return $_start_regexp;
1047     }
1048
1049     function getEndRegexp($match)
1050     {
1051         return "<\\/" . $match . '>';
1052     }
1053
1054     function markup($match, $body)
1055     {
1056         global $PLUGIN_MARKUP_MAP;
1057         $name = substr($match, 2, -2);
1058         $vars = '';
1059         if (preg_match('/^(\S+)\|(.*)$/', $name, $_m)) {
1060             $name = $_m[1];
1061             $vars = $_m[2]; //str_replace(' ', '&', $_m[2]);
1062         }
1063         if (!isset($PLUGIN_MARKUP_MAP[$name])) {
1064             trigger_error("No plugin for $name $vars defined.", E_USER_WARNING);
1065             return "";
1066         }
1067         $plugin = $PLUGIN_MARKUP_MAP[$name];
1068         return new Cached_PluginInvocation("<" . "?plugin $plugin $vars $body ?" . ">");
1069     }
1070 }
1071
1072 /**
1073  *  Mediawiki <nowiki>
1074  *  <nowiki>...</nowiki>
1075  */
1076 class Markup_nowiki extends SimpleMarkup
1077 {
1078     public $_match_regexp = '<nowiki>.*?<\/nowiki>';
1079
1080     function markup($match)
1081     {
1082         // Remove <nowiki> and </nowiki>
1083         return HTML::raw(substr($match, 8, -9));
1084     }
1085 }
1086
1087 /**
1088  *  Wikicreole preformatted
1089  *  {{{
1090  *  }}}
1091  */
1092 class Markup_wikicreole_preformatted extends SimpleMarkup
1093 {
1094     public $_match_regexp = '\{\{\{.*?\}\}\}';
1095
1096     function markup($match)
1097     {
1098         // Remove {{{ and }}}
1099         return new HtmlElement('span', array('class' => 'tt'), substr($match, 3, -3));
1100     }
1101 }
1102
1103 /** ENABLE_MARKUP_TEMPLATE
1104  *  Template syntax similar to Mediawiki
1105  *  {{template}}
1106  * => < ? plugin Template page=template ? >
1107  *  {{template|var1=value1|var2=value|...}}
1108  * => < ? plugin Template page=template var=value ... ? >
1109  *
1110  * The {{...}} syntax is also used for:
1111  *  - Wikicreole images
1112  *  - videos
1113  *  - predefined icons
1114  */
1115 class Markup_template_plugin extends SimpleMarkup
1116 {
1117     // patch #1732793: allow \n, mult. {{ }} in one line, and single letters
1118     public $_match_regexp = '\{\{.*?\}\}';
1119
1120     function markup($match)
1121     {
1122
1123         $page = substr($match, 2, -2);
1124         $page = trim($page);
1125
1126         // Check for predefined icons.
1127         $predefinedicons = array(":)" => "ic_smile.png",
1128             ":(" => "ic_sad.png",
1129             ":P" => "ic_tongue.png",
1130             ":D" => "ic_biggrin.png",
1131             ";)" => "ic_wink.png",
1132             "(y)" => "ic_handyes.png",
1133             "(n)" => "ic_handno.png",
1134             "(i)" => "ic_info.png",
1135             "(/)" => "ic_check.png",
1136             "(x)" => "ic_cross.png",
1137             "(!)" => "ic_danger.png",
1138             "(+)" => "ic_plus.png",
1139             "(-)" => "ic_minus.png",
1140             "(?)" => "ic_help.png",
1141             "(on)" => "ic_lighton.png",
1142             "(off)" => "ic_lightoff.png",
1143             "(*)" => "ic_yellowstar.png",
1144             "(*r)" => "ic_redstar.png",
1145             "(*g)" => "ic_greenstar.png",
1146             "(*b)" => "ic_bluestar.png",
1147             "(*y)" => "ic_yellowstar.png",
1148         );
1149         foreach ($predefinedicons as $ascii => $icon) {
1150             if ($page == $ascii) {
1151                 return LinkImage(DATA_PATH . "/themes/default/images/$icon", $page);
1152             }
1153         }
1154
1155         if (strpos($page, "|") === false) {
1156             $imagename = $page;
1157             $alt = "";
1158         } else {
1159             $imagename = substr($page, 0, strpos($page, "|"));
1160             $alt = ltrim(strstr($page, "|"), "|");
1161         }
1162
1163         // It's not a Mediawiki template, it's a Wikicreole image
1164         if (is_image($imagename)) {
1165             if ((strpos($imagename, "http://") === 0) || (strpos($imagename, "https://") === 0)) {
1166                 return LinkImage($imagename, $alt);
1167             } elseif ($imagename[0] == '/') {
1168                 return LinkImage(DATA_PATH . '/' . $imagename, $alt);
1169             } else {
1170                 return LinkImage(getUploadDataPath() . $imagename, $alt);
1171             }
1172         }
1173
1174         // It's a video
1175         if (is_video($imagename)) {
1176             $s = '<' . '?plugin Video file="' . $imagename . '" ?' . '>';
1177             return new Cached_PluginInvocation($s);
1178         }
1179
1180         $page = str_replace("\n", "", $page);
1181
1182         // The argument value might contain a double quote (")
1183         // We have to encode that.
1184         $page = htmlspecialchars($page);
1185
1186         $vars = '';
1187
1188         if (preg_match('/^(\S+?)\|(.*)$/', $page, $_m)) {
1189             $page = $_m[1];
1190             $vars = '"' . preg_replace('/\|/', '" "', $_m[2]) . '"';
1191             $vars = preg_replace('/"(\S+)=([^"]*)"/', '\\1="\\2"', $vars);
1192         }
1193
1194         // page may contain a version number
1195         // {{foo?version=5}}
1196         // in that case, output is "page=foo rev=5"
1197         if (strstr($page, "?")) {
1198             $page = str_replace("?version=", "\" rev=\"", $page);
1199         }
1200
1201         if ($vars)
1202             $s = '<' . '?plugin Template page="' . $page . '" ' . $vars . ' ?' . '>';
1203         else
1204             $s = '<' . '?plugin Template page="' . $page . '" ?' . '>';
1205         return new Cached_PluginInvocation($s);
1206     }
1207 }
1208
1209 // "..." => "&#133;"  browser specific display (not cached?)
1210 // Support some HTML::Entities: (C) for copy, --- for mdash, -- for ndash
1211 // TODO: "--" => "&emdash;" browser specific display (not cached?)
1212
1213 class Markup_html_entities extends SimpleMarkup
1214 {
1215     //public $_match_regexp = '(: \.\.\.|\-\-|\-\-\-|\(C\) )';
1216
1217     function __construct()
1218     {
1219         $this->_entities = array('...' => '&#133;',
1220             '--' => '&ndash;',
1221             '---' => '&mdash;',
1222             '(C)' => '&copy;',
1223             '&copy;' => '&copy;',
1224             '&trade;' => '&trade;',
1225         );
1226         $this->_match_regexp =
1227             '(: ' .
1228                 join('|', array_map('preg_quote', array_keys($this->_entities))) .
1229                 ' )';
1230     }
1231
1232     function markup($match)
1233     {
1234         return HTML::Raw($this->_entities[$match]);
1235     }
1236 }
1237
1238 class Markup_isonumchars extends SimpleMarkup
1239 {
1240     public $_match_regexp = '\&\#\d{2,5};';
1241
1242     function markup($match)
1243     {
1244         return HTML::Raw($match);
1245     }
1246 }
1247
1248 class Markup_isohexchars extends SimpleMarkup
1249 {
1250     // hexnums, like &#x00A4; <=> &curren;
1251     public $_match_regexp = '\&\#x[0-9a-fA-F]{2,4};';
1252
1253     function markup($match)
1254     {
1255         return HTML::Raw($match);
1256     }
1257 }
1258
1259 // FIXME: Do away with magic phpwiki forms.  (Maybe phpwiki: links too?)
1260
1261 class InlineTransformer
1262 {
1263     public $_regexps = array();
1264     public $_markup = array();
1265
1266     function InlineTransformer($markup_types = false)
1267     {
1268         global $request;
1269         // We need to extend the inline parsers by certain actions, like SearchHighlight,
1270         // SpellCheck and maybe CreateToc.
1271         if (!$markup_types) {
1272             $non_default = false;
1273             $markup_types = array
1274             ('escape', 'wikicreolebracketlink', 'bracketlink', 'url',
1275                 'html_comment', 'placeholder',
1276                 'interwiki', 'semanticlink', 'wikiword', 'linebreak',
1277                 'wikicreole_superscript',
1278                 'wikicreole_subscript',
1279                 'wikicreole_italics', 'wikicreole_bold',
1280                 'wikicreole_monospace',
1281                 'wikicreole_underline',
1282                 'old_emphasis', 'nestled_emphasis',
1283                 'html_emphasis', 'html_abbr', 'plugin', 'plugin_wikicreole',
1284                 'isonumchars', 'isohexchars', /*'html_entities'*/
1285             );
1286             if (DISABLE_MARKUP_WIKIWORD)
1287                 $markup_types = array_remove($markup_types, 'wikiword');
1288
1289             $action = $request->getArg('action');
1290             if ($action == 'SpellCheck' and $request->getArg('suggestions')) { // insert it after url
1291                 array_splice($markup_types, 2, 1, array('url', 'spellcheck'));
1292             }
1293             if (isset($request->_searchhighlight)) { // insert it after url
1294                 array_splice($markup_types, 2, 1, array('url', 'searchhighlight'));
1295                 //$request->setArg('searchhighlight', false);
1296             }
1297         } else {
1298             $non_default = true;
1299         }
1300         foreach ($markup_types as $mtype) {
1301             $class = "Markup_$mtype";
1302             $this->_addMarkup(new $class);
1303         }
1304         $this->_addMarkup(new Markup_nowiki);
1305         if (ENABLE_MARKUP_DIVSPAN and !$non_default)
1306             $this->_addMarkup(new Markup_html_divspan);
1307         if (ENABLE_MARKUP_COLOR and !$non_default)
1308             $this->_addMarkup(new Markup_color);
1309         // Markup_wikicreole_preformatted must be before Markup_template_plugin
1310         $this->_addMarkup(new Markup_wikicreole_preformatted);
1311         if (ENABLE_MARKUP_TEMPLATE and !$non_default)
1312             $this->_addMarkup(new Markup_template_plugin);
1313         // This does not work yet
1314         if (PLUGIN_MARKUP_MAP and !$non_default)
1315             $this->_addMarkup(new Markup_xml_plugin);
1316     }
1317
1318     function _addMarkup($markup)
1319     {
1320         if (isa($markup, 'SimpleMarkup'))
1321             $regexp = $markup->getMatchRegexp();
1322         else
1323             $regexp = $markup->getStartRegexp();
1324
1325         assert(!isset($this->_markup[$regexp]));
1326         assert(strlen(trim($regexp)) > 0);
1327         $this->_regexps[] = $regexp;
1328         $this->_markup[] = $markup;
1329     }
1330
1331     function parse(&$text, $end_regexps = array('$'))
1332     {
1333         $regexps = $this->_regexps;
1334
1335         // $end_re takes precedence: "favor reduce over shift"
1336         array_unshift($regexps, $end_regexps[0]);
1337         //array_push($regexps, $end_regexps[0]);
1338         $regexps = new RegexpSet($regexps);
1339
1340         $input = $text;
1341         $output = new XmlContent;
1342
1343         $match = $regexps->match($input);
1344
1345         while ($match) {
1346             if ($match->regexp_ind == 0) {
1347                 // No start pattern found before end pattern.
1348                 // We're all done!
1349                 if (isset($markup) and is_object($markup)
1350                     and isa($markup, 'Markup_plugin')
1351                 ) {
1352                     $current =& $output->_content[count($output->_content) - 1];
1353                     $current->setTightness(true, true);
1354                 }
1355                 $output->pushContent($match->prematch);
1356                 $text = $match->postmatch;
1357                 return $output;
1358             }
1359
1360             $markup = $this->_markup[$match->regexp_ind - 1];
1361             $body = $this->_parse_markup_body($markup, $match->match,
1362                 $match->postmatch, $end_regexps);
1363             if (!$body) {
1364                 // Couldn't match balanced expression.
1365                 // Ignore and look for next matching start regexp.
1366                 $match = $regexps->nextMatch($input, $match);
1367                 continue;
1368             }
1369
1370             // Matched markup.  Eat input, push output.
1371             // FIXME: combine adjacent strings.
1372             if (isa($markup, 'SimpleMarkup'))
1373                 $current = $markup->markup($match->match);
1374             else
1375                 $current = $markup->markup($match->match, $body);
1376             $input = $match->postmatch;
1377             if (isset($markup) and is_object($markup)
1378                 and isa($markup, 'Markup_plugin')
1379             ) {
1380                 $current->setTightness(true, true);
1381             }
1382             $output->pushContent($match->prematch, $current);
1383
1384             $match = $regexps->match($input);
1385         }
1386
1387         // No pattern matched, not even the end pattern.
1388         // Parse fails.
1389         return false;
1390     }
1391
1392     function _parse_markup_body($markup, $match, &$text, $end_regexps)
1393     {
1394         if (isa($markup, 'SimpleMarkup')) {
1395             return true; // Done. SimpleMarkup is simple.
1396         }
1397
1398         if (!is_object($markup)) {
1399            return false; // Some error: Should assert
1400         }
1401         array_unshift($end_regexps, $markup->getEndRegexp($match));
1402
1403         // Optimization: if no end pattern in text, we know the
1404         // parse will fail.  This is an important optimization,
1405         // e.g. when text is "*lots *of *start *delims *with
1406         // *no *matching *end *delims".
1407         $ends_pat = "/(?:" . join(").*(?:", $end_regexps) . ")/xs";
1408         if (!@preg_match($ends_pat, $text)) { // Add "@" to avoid warning with "{{(*y)}}"
1409             return false;
1410         }
1411         return $this->parse($text, $end_regexps);
1412     }
1413 }
1414
1415 class LinkTransformer extends InlineTransformer
1416 {
1417     function __construct()
1418     {
1419         parent::__construct(array('escape', 'wikicreolebracketlink', 'bracketlink', 'url',
1420             'semanticlink', 'interwiki', 'wikiword',
1421         ));
1422     }
1423 }
1424
1425 class NowikiTransformer extends InlineTransformer
1426 {
1427     function __construct()
1428     {
1429         parent::__construct
1430         (array('linebreak',
1431             'html_emphasis', 'html_abbr', 'plugin', 'plugin_wikicreole',
1432             'isonumchars', 'isohexchars', /*'html_entities',*/
1433         ));
1434     }
1435 }
1436
1437 function TransformInline($text, $basepage = false)
1438 {
1439     static $trfm;
1440     $action = $GLOBALS['request']->getArg('action');
1441     if (empty($trfm) or $action == 'SpellCheck') {
1442         $trfm = new InlineTransformer;
1443     }
1444
1445     if ($basepage) {
1446         return new CacheableMarkup($trfm->parse($text), $basepage);
1447     }
1448     return $trfm->parse($text);
1449 }
1450
1451 function TransformLinks($text, $basepage = false)
1452 {
1453     static $trfm;
1454
1455     if (empty($trfm)) {
1456         $trfm = new LinkTransformer;
1457     }
1458
1459     if ($basepage) {
1460         return new CacheableMarkup($trfm->parse($text), $basepage);
1461     }
1462     return $trfm->parse($text);
1463 }
1464
1465 /**
1466  * Transform only html markup and entities.
1467  */
1468 function TransformInlineNowiki($text, $basepage = false)
1469 {
1470     static $trfm;
1471
1472     if (empty($trfm)) {
1473         $trfm = new NowikiTransformer;
1474     }
1475     if ($basepage) {
1476         return new CacheableMarkup($trfm->parse($text), $basepage);
1477     }
1478     return $trfm->parse($text);
1479 }
1480
1481 // Local Variables:
1482 // mode: php
1483 // tab-width: 8
1484 // c-basic-offset: 4
1485 // c-hanging-comment-ender-p: nil
1486 // indent-tabs-mode: nil
1487 // End: