lib/stdlib.php

   1 <?php //rcs_id('$Id: stdlib.php,v 1.137 2003-02-18 23:13:40 dairiki Exp $');
   2
   3 /*
   4   Standard functions for Wiki functionality
   5     WikiURL($pagename, $args, $get_abs_url)
   6     IconForLink($protocol_or_url)
   7     LinkURL($url, $linktext)
   8     LinkImage($url, $alt)
   9
  10     MakeWikiForm ($pagename, $args, $class, $button_text)
  11     SplitQueryArgs ($query_args)
  12     LinkPhpwikiURL($url, $text)
  13     LinkBracketLink($bracketlink)
  14     ExtractWikiPageLinks($content)
  15     ConvertOldMarkup($content)
  16
  17     class Stack { push($item), pop(), cnt(), top() }
  18
  19     split_pagename ($page)
  20     NoSuchRevision ($request, $page, $version)
  21     TimezoneOffset ($time, $no_colon)
  22     Iso8601DateTime ($time)
  23     Rfc2822DateTime ($time)
  24     CTime ($time)
  25     __printf ($fmt)
  26     __sprintf ($fmt)
  27     __vsprintf ($fmt, $args)
  28     better_srand($seed = '')
  29     count_all($arg)
  30     isSubPage($pagename)
  31     subPageSlice($pagename, $pos)
  32     explodePageList($input, $perm = false)
  33
  34   function: LinkInterWikiLink($link, $linktext)
  35   moved to: lib/interwiki.php
  36   function: linkExistingWikiWord($wikiword, $linktext, $version)
  37   moved to: lib/Theme.php
  38   function: LinkUnknownWikiWord($wikiword, $linktext)
  39   moved to: lib/Theme.php
  40   function: UpdateRecentChanges($dbi, $pagename, $isnewpage)
  41   gone see: lib/plugin/RecentChanges.php
  42 */
  43
  44 /**
  45  * This is the character used in wiki markup to escape characters with
  46  * special meaning.
  47  */
  48 define('ESCAPE_CHAR', '~');
  49
  50 /**
  51  * Convert string to a valid XML identifier.
  52  *
  53  * XML 1.0 identifiers are of the form: [A-Za-z][A-Za-z0-9:_.-]*
  54  *
  55  * We would like to have, e.g. named anchors within wiki pages
  56  * names like "Table of Contents" --- clearly not a valid XML
  57  * fragment identifier.
  58  *
  59  * This function implements a one-to-one map from {any string}
  60  * to {valid XML identifiers}.
  61  *
  62  * It does this by
  63  * converting all bytes not in [A-Za-z0-9:_-],
  64  * and any leading byte not in [A-Za-z] to 'xbb.',
  65  * where 'bb' is the hexadecimal representation of the
  66  * character.
  67  *
  68  * As a special case, the empty string is converted to 'empty.'
  69  *
  70  * @param string $str
  71  * @return string
  72  */
  73 function MangleXmlIdentifier($str)
  74 {
  75     if (!$str)
  76         return 'empty.';
  77
  78     return preg_replace('/[^-_:A-Za-z0-9]|(?<=^)[^A-Za-z]/e',
  79                         "'x' . sprintf('%02x', ord('\\0')) . '.'",
  80                         $str);
  81 }
  82
  83
  84 /**
  85  * Generates a valid URL for a given Wiki pagename.
  86  * @param mixed $pagename If a string this will be the name of the Wiki page to link to.
  87  *                        If a WikiDB_Page object function will extract the name to link to.
  88  *                        If a WikiDB_PageRevision object function will extract the name to link to.
  89  * @param array $args
  90  * @param boolean $get_abs_url Default value is false.
  91  * @return string The absolute URL to the page passed as $pagename.
  92  */
  93 function WikiURL($pagename, $args = '', $get_abs_url = false) {
  94     $anchor = false;
  95
  96     if (is_object($pagename)) {
  97         if (isa($pagename, 'WikiDB_Page')) {
  98             $pagename = $pagename->getName();
  99         }
 100         elseif (isa($pagename, 'WikiDB_PageRevision')) {
 101             $page = $pagename->getPage();
 102             $args['version'] = $pagename->getVersion();
 103             $pagename = $page->getName();
 104         }
 105         elseif (isa($pagename, 'WikiPageName')) {
 106             $anchor = $pagename->anchor;
 107             $pagename = $pagename->name;
 108         }
 109     }
 110
 111     if (is_array($args)) {
 112         $enc_args = array();
 113         foreach  ($args as $key => $val) {
 114             if (!is_array($val)) // ugly hack for getURLtoSelf() which also takes POST vars
 115               $enc_args[] = urlencode($key) . '=' . urlencode($val);
 116         }
 117         $args = join('&', $enc_args);
 118     }
 119
 120     if (USE_PATH_INFO) {
 121         $url = $get_abs_url ? SERVER_URL . VIRTUAL_PATH . "/" : "";
 122         $url .= preg_replace('/%2f/i', '/', rawurlencode($pagename));
 123         if ($args)
 124             $url .= "?$args";
 125     }
 126     else {
 127         $url = $get_abs_url ? SERVER_URL . SCRIPT_NAME : basename(SCRIPT_NAME);
 128         $url .= "?pagename=" . rawurlencode($pagename);
 129         if ($args)
 130             $url .= "&$args";
 131     }
 132     if ($anchor)
 133         $url .= "#" . MangleXmlIdentifier($anchor);
 134     return $url;
 135 }
 136
 137 /**
 138  * Generates icon in front of links.
 139  *
 140  * @param string $protocol_or_url URL or protocol to determine which icon to use.
 141  *
 142  * @return HtmlElement HtmlElement object that contains data to create img link to
 143  * icon for use with url or protocol passed to the function. False if no img to be
 144  * displayed.
 145  */
 146 function IconForLink($protocol_or_url) {
 147     global $Theme;
 148     if ($filename_suffix = false) {
 149         // display apache style icon for file type instead of protocol icon
 150         // - archive: unix:gz,bz2,tgz,tar,z; mac:dmg,dmgz,bin,img,cpt,sit; pc:zip;
 151         // - document: html, htm, text, txt, rtf, pdf, doc
 152         // - non-inlined image: jpg,jpeg,png,gif,tiff,tif,swf,pict,psd,eps,ps
 153         // - audio: mp3,mp2,aiff,aif,au
 154         // - multimedia: mpeg,mpg,mov,qt
 155     } else {
 156         list ($proto) = explode(':', $protocol_or_url, 2);
 157         $src = $Theme->getLinkIconURL($proto);
 158         if ($src)
 159             return HTML::img(array('src' => $src, 'alt' => $proto, 'class' => 'linkicon', 'border' => 0));
 160         else
 161             return false;
 162     }
 163 }
 164
 165 /**
 166  * Glue icon in front of text.
 167  *
 168  * @param string $protocol_or_url Protocol or URL.  Used to determine the
 169  * proper icon.
 170  * @param string $text The text.
 171  * @return XmlContent.
 172  */
 173 function PossiblyGlueIconToText($proto_or_url, $text) {
 174     $icon = IconForLink($proto_or_url);
 175     if ($icon) {
 176         preg_match('/^\s*(\S*)(.*?)\s*$/', $text, $m);
 177         list (, $first_word, $tail) = $m;
 178         $text = HTML::span(array('style' => 'white-space: nowrap'),
 179                            $icon, $first_word);
 180         if ($tail)
 181             $text = HTML($text, $tail);
 182     }
 183     return $text;
 184 }
 185
 186 /**
 187  * Determines if the url passed to function is safe, by detecting if the characters
 188  * '<', '>', or '"' are present.
 189  *
 190  * @param string $url URL to check for unsafe characters.
 191  * @return boolean True if same, false else.
 192  */
 193 function IsSafeURL($url) {
 194     return !ereg('[<>"]', $url);
 195 }
 196
 197 /**
 198  * Generates an HtmlElement object to store data for a link.
 199  *
 200  * @param string $url URL that the link will point to.
 201  * @param string $linktext Text to be displayed as link.
 202  * @return HtmlElement HtmlElement object that contains data to construct an html link.
 203  */
 204 function LinkURL($url, $linktext = '') {
 205     // FIXME: Is this needed (or sufficient?)
 206     if(! IsSafeURL($url)) {
 207         $link = HTML::strong(HTML::u(array('class' => 'baduri'),
 208                                      _("BAD URL -- remove all of <, >, \"")));
 209     }
 210     else {
 211         if (!$linktext)
 212             $linktext = preg_replace("/mailto:/A", "", $url);
 213
 214         $link = HTML::a(array('href' => $url),
 215                         PossiblyGlueIconToText($url, $linktext));
 216
 217     }
 218     $link->setAttr('class', $linktext ? 'namedurl' : 'rawurl');
 219     return $link;
 220 }
 221
 222
 223 function LinkImage($url, $alt = false) {
 224     // FIXME: Is this needed (or sufficient?)
 225     if(! IsSafeURL($url)) {
 226         $link = HTML::strong(HTML::u(array('class' => 'baduri'),
 227                                      _("BAD URL -- remove all of <, >, \"")));
 228     }
 229     else {
 230         if (empty($alt))
 231             $alt = $url;
 232         $link = HTML::img(array('src' => $url, 'alt' => $alt));
 233     }
 234     $link->setAttr('class', 'inlineimage');
 235     return $link;
 236 }
 237
 238
 239
 240 class Stack {
 241     var $items = array();
 242     var $size = 0;
 243
 244     function push($item) {
 245         $this->items[$this->size] = $item;
 246         $this->size++;
 247         return true;
 248     }
 249
 250     function pop() {
 251         if ($this->size == 0) {
 252             return false; // stack is empty
 253         }
 254         $this->size--;
 255         return $this->items[$this->size];
 256     }
 257
 258     function cnt() {
 259         return $this->size;
 260     }
 261
 262     function top() {
 263         if($this->size)
 264             return $this->items[$this->size - 1];
 265         else
 266             return '';
 267     }
 268
 269 }
 270 // end class definition
 271
 272
 273 function MakeWikiForm ($pagename, $args, $class, $button_text = '') {
 274     // HACK: so as to not completely break old PhpWikiAdministration pages.
 275     trigger_error("MagicPhpWikiURL forms are no longer supported.  "
 276                   . "Use the WikiFormPlugin instead.", E_USER_NOTICE);
 277
 278     global $request;
 279     $loader = new WikiPluginLoader;
 280     @$action = (string)$args['action'];
 281     return $loader->expandPI("<?plugin WikiForm action=$action ?>", $request);
 282 }
 283
 284 function SplitQueryArgs ($query_args = '')
 285 {
 286     $split_args = split('&', $query_args);
 287     $args = array();
 288     while (list($key, $val) = each($split_args))
 289         if (preg_match('/^ ([^=]+) =? (.*) /x', $val, $m))
 290             $args[$m[1]] = $m[2];
 291     return $args;
 292 }
 293
 294 function LinkPhpwikiURL($url, $text = '') {
 295     $args = array();
 296
 297     if (!preg_match('/^ phpwiki: ([^?]*) [?]? (.*) $/x', $url, $m)) {
 298         return HTML::strong(array('class' => 'rawurl'),
 299                             HTML::u(array('class' => 'baduri'),
 300                                     _("BAD phpwiki: URL")));
 301     }
 302
 303     if ($m[1])
 304         $pagename = urldecode($m[1]);
 305     $qargs = $m[2];
 306
 307     if (empty($pagename) &&
 308         preg_match('/^(diff|edit|links|info)=([^&]+)$/', $qargs, $m)) {
 309         // Convert old style links (to not break diff links in
 310         // RecentChanges).
 311         $pagename = urldecode($m[2]);
 312         $args = array("action" => $m[1]);
 313     }
 314     else {
 315         $args = SplitQueryArgs($qargs);
 316     }
 317
 318     if (empty($pagename))
 319         $pagename = $GLOBALS['request']->getArg('pagename');
 320
 321     if (isset($args['action']) && $args['action'] == 'browse')
 322         unset($args['action']);
 323
 324     /*FIXME:
 325       if (empty($args['action']))
 326       $class = 'wikilink';
 327       else if (is_safe_action($args['action']))
 328       $class = 'wikiaction';
 329     */
 330     if (empty($args['action']) || is_safe_action($args['action']))
 331         $class = 'wikiaction';
 332     else {
 333         // Don't allow administrative links on unlocked pages.
 334         $page = $GLOBALS['request']->getPage();
 335         if (!$page->get('locked'))
 336             return HTML::span(array('class' => 'wikiunsafe'),
 337                               HTML::u(_("Lock page to enable link")));
 338         $class = 'wikiadmin';
 339     }
 340
 341     // FIXME: ug, don't like this
 342     if (preg_match('/=\d*\(/', $qargs))
 343         return MakeWikiForm($pagename, $args, $class, $text);
 344     if (!$text)
 345         $text = HTML::span(array('class' => 'rawurl'), $url);
 346
 347     return HTML::a(array('href'  => WikiURL($pagename, $args),
 348                          'class' => $class),
 349                    $text);
 350 }
 351
 352 /**
 353  * A class to assist in parsing wiki pagenames.
 354  *
 355  * Now with subpages and anchors, parsing and passing around
 356  * pagenames is more complicated.  This should help.
 357  */
 358 class WikiPagename
 359 {
 360     /** Short name for page.
 361      *
 362      * This is the value of $name passed to the constructor.
 363      */
 364     var $shortName;
 365
 366     /** The full page name.
 367      *
 368      * This is the full name of the page (without anchor).
 369      */
 370     var $name;
 371
 372     /** The anchor.
 373      *
 374      * This is the referenced anchor within the page, or the empty string.
 375      */
 376     var $anchor;
 377
 378     /** Constructor
 379      *
 380      * @param string $name Page name.
 381      * This can be a relative subpage name (like '/SubPage'), and can also
 382      * include an anchor (e.g. 'SandBox#anchorname' or just '#anchor').
 383      *
 384      * If you want to include the character '#' within the page name,
 385      * you can escape it with ~.  (The escape character doesn't work for '/').
 386      */
 387     function WikiPageName($name, $basename) {
 388         $this->shortName = $this->unescape($name);
 389
 390         if ($name[0] == SUBPAGE_SEPARATOR or $name[0] == '#')
 391             $name = $this->_pagename($basename) . $name;
 392
 393         $split = preg_split("/\s*(?<!" . ESCAPE_CHAR . ")#\s*/", $name, 2);
 394         if (count($split) > 1)
 395             list ($name, $anchor) = $split;
 396         else
 397             $anchor = '';
 398
 399         $this->name = $this->unescape($name);
 400         $this->anchor = $this->unescape($anchor);
 401     }
 402
 403     function escape($page) {
 404         return str_replace('#', ESCAPE_CHAR . '#', $page);
 405     }
 406
 407     function unescape($page) {
 408         return preg_replace('/' . ESCAPE_CHAR . '(.)/', '\1', $page);
 409     }
 410
 411     function _pagename($page) {
 412         if (isa($page, 'WikiDB_Page'))
 413             return $page->getName();
 414         elseif (isa($page, 'WikiDB_PageRevision'))
 415             return $page->getPageName();
 416         elseif (isa($page, 'WikiPageName'))
 417             return $page->name;
 418         assert(is_string($page));
 419         return $page;
 420     }
 421 }
 422
 423 function LinkBracketLink($bracketlink) {
 424     global $request, $AllowedProtocols, $InlineImages;
 425
 426     include_once("lib/interwiki.php");
 427     $intermap = InterWikiMap::GetMap($request);
 428
 429     // $bracketlink will start and end with brackets; in between will
 430     // be either a page name, a URL or both separated by a pipe.
 431
 432     // strip brackets and leading space
 433     preg_match('/(\#?) \[\s* (?: (.+?) \s* (?<!' . ESCAPE_CHAR . ')(\|) )? \s* (.+?) \s*\]/x',
 434                $bracketlink, $matches);
 435     list (, $hash, $label, $bar, $link) = $matches;
 436
 437     $wikipage = new WikiPageName($link, $request->getPage());
 438     $label = WikiPageName::unescape($label);
 439     $link = WikiPageName::unescape($link);
 440
 441     // if label looks like a url to an image, we want an image link.
 442     if (preg_match("/\\.($InlineImages)$/i", $label)) {
 443         $imgurl = $label;
 444         if (! preg_match("#^($AllowedProtocols):#", $imgurl)) {
 445             // linkname like 'images/next.gif'.
 446             global $Theme;
 447             $imgurl = $Theme->getImageURL($linkname);
 448         }
 449         $label = LinkImage($imgurl, $link);
 450     }
 451
 452     if ($hash) {
 453         // It's an anchor, not a link...
 454         $id = MangleXmlIdentifier($link);
 455         return HTML::a(array('name' => $id, 'id' => $id),
 456                        $bar ? $label : $link);
 457     }
 458
 459     $dbi = $request->getDbh();
 460     if ($dbi->isWikiPage($wikipage->name)) {
 461         return WikiLink($wikipage, 'known', $label);
 462     }
 463     elseif (preg_match("#^($AllowedProtocols):#", $link)) {
 464         // if it's an image, embed it; otherwise, it's a regular link
 465         if (preg_match("/\\.($InlineImages)$/i", $link))
 466             // no image link, just the src. see [img|link] above
 467             return LinkImage($link, $label);
 468         else
 469             return LinkURL($link, $label);
 470     }
 471     elseif (preg_match("/^phpwiki:/", $link))
 472         return LinkPhpwikiURL($link, $label);
 473     elseif (preg_match("/^" . $intermap->getRegexp() . ":/", $link))
 474         return $intermap->link($link, $label);
 475     else {
 476         return WikiLink($wikipage, 'unknown', $label);
 477     }
 478 }
 479
 480 /**
 481  * Extract internal links from wiki page.
 482  *
 483  * @param mixed $content The raw wiki-text, either as
 484  * an array of lines or as one big string.
 485  *
 486  * @return array List of the names of pages linked to.
 487  */
 488 function ExtractWikiPageLinks($content) {
 489     list ($wikilinks,) = ExtractLinks($content);
 490     return $wikilinks;
 491 }
 492
 493 /**
 494  * Extract external links from a wiki page.
 495  *
 496  * @param mixed $content The raw wiki-text, either as
 497  * an array of lines or as one big string.
 498  *
 499  * @return array List of the names of pages linked to.
 500  */
 501 function ExtractExternalLinks($content) {
 502     list (, $urls) = ExtractLinks($content);
 503     return $urls;
 504 }
 505
 506 /**
 507  * Extract links from wiki page.
 508  *
 509  * FIXME: this should be done by the transform code.
 510  *
 511  * @param mixed $content The raw wiki-text, either as
 512  * an array of lines or as one big string.
 513  *
 514  * @return array List of two arrays.  The first contains
 515  * the internal links (names of pages linked to), the second
 516  * contains external URLs linked to.
 517  */
 518 function ExtractLinks($content) {
 519     include_once('lib/interwiki.php');
 520     global $request, $WikiNameRegexp, $AllowedProtocols;
 521
 522     if (is_string($content))
 523         $content = explode("\n", $content);
 524
 525     $wikilinks = array();
 526     $urls = array();
 527
 528     foreach ($content as $line) {
 529         // remove plugin code
 530         $line = preg_replace('/<\?plugin\s+\w.*?\?>/', '', $line);
 531         // remove escaped '['
 532         $line = str_replace('[[', ' ', $line);
 533         // remove footnotes
 534         $line = preg_replace('/\[\d+\]/', ' ', $line);
 535
 536         // bracket links (only type wiki-* is of interest)
 537         $numBracketLinks = preg_match_all("/\[\s*([^\]|]+\|)?\s*(\S.*?)\s*\]/",
 538                                           $line, $brktlinks);
 539         for ($i = 0; $i < $numBracketLinks; $i++) {
 540             $link = LinkBracketLink($brktlinks[0][$i]);
 541             $class = $link->getAttr('class');
 542             if (preg_match('/^(named-)?wiki(unknown)?$/', $class)) {
 543                 if ($brktlinks[2][$i][0] == SUBPAGE_SEPARATOR) {
 544                     $wikilinks[$request->getArg('pagename') . $brktlinks[2][$i]] = 1;
 545                 } else {
 546                     $wikilinks[$brktlinks[2][$i]] = 1;
 547                 }
 548             }
 549             elseif (preg_match('/^(namedurl|rawurl|(named-)?interwiki)$/', $class)) {
 550                 $urls[$brktlinks[2][$i]] = 1;
 551             }
 552             $line = str_replace($brktlinks[0][$i], '', $line);
 553         }
 554
 555         // Raw URLs
 556         preg_match_all("/!?\b($AllowedProtocols):[^\s<>\[\]\"'()]*[^\s<>\[\]\"'(),.?]/",
 557                        $line, $link);
 558         foreach ($link[0] as $url) {
 559             if ($url[0] <> '!') {
 560                 $urls[$url] = 1;
 561             }
 562             $line = str_replace($url, '', $line);
 563         }
 564
 565         // Interwiki links
 566         $map = InterWikiMap::GetMap($request);
 567         $regexp = pcre_fix_posix_classes("!?(?<![[:alnum:]])")
 568             . $map->getRegexp() . ":[^\\s.,;?()]+";
 569         preg_match_all("/$regexp/", $line, $link);
 570         foreach ($link[0] as $interlink) {
 571             if ($interlink[0] <> '!') {
 572                 $link = $map->link($interlink);
 573                 $urls[$link->getAttr('href')] = 1;
 574             }
 575             $line = str_replace($interlink, '', $line);
 576         }
 577
 578         // BumpyText old-style wiki links
 579         if (preg_match_all("/!?$WikiNameRegexp/", $line, $link)) {
 580             for ($i = 0; isset($link[0][$i]); $i++) {
 581                 if($link[0][$i][0] <> '!') {
 582                     if ($link[0][$i][0] == SUBPAGE_SEPARATOR) {
 583                         $wikilinks[$request->getArg('pagename') . $link[0][$i]] = 1;
 584                     } else {
 585                         $wikilinks[$link[0][$i]] = 1;
 586                     }
 587                 }
 588             }
 589         }
 590     }
 591     return array(array_keys($wikilinks), array_keys($urls));
 592 }
 593
 594
 595 /**
 596  * Convert old page markup to new-style markup.
 597  *
 598  * @param string $text Old-style wiki markup.
 599  *
 600  * @param string $markup_type
 601  * One of: <dl>
 602  * <dt><code>"block"</code>  <dd>Convert all markup.
 603  * <dt><code>"inline"</code> <dd>Convert only inline markup.
 604  * <dt><code>"links"</code>  <dd>Convert only link markup.
 605  * </dl>
 606  *
 607  * @return string New-style wiki markup.
 608  *
 609  * @bugs Footnotes don't work quite as before (esp if there are
 610  *   multiple references to the same footnote.  But close enough,
 611  *   probably for now....
 612  */
 613 function ConvertOldMarkup ($text, $markup_type = "block") {
 614
 615     static $subs;
 616     static $block_re;
 617
 618     if (empty($subs)) {
 619         /*****************************************************************
 620          * Conversions for inline markup:
 621          */
 622
 623         // escape tilde's
 624         $orig[] = '/~/';
 625         $repl[] = '~~';
 626
 627         // escape escaped brackets
 628         $orig[] = '/\[\[/';
 629         $repl[] = '~[';
 630
 631         // change ! escapes to ~'s.
 632         global $AllowedProtocols, $WikiNameRegexp, $request;
 633         include_once('lib/interwiki.php');
 634         $map = InterWikiMap::GetMap($request);
 635         $bang_esc[] = "(?:$AllowedProtocols):[^\s<>\[\]\"'()]*[^\s<>\[\]\"'(),.?]";
 636         $bang_esc[] = $map->getRegexp() . ":[^\\s.,;?()]+"; // FIXME: is this really needed?
 637         $bang_esc[] = $WikiNameRegexp;
 638         $orig[] = '/!((?:' . join(')|(', $bang_esc) . '))/';
 639         $repl[] = '~\\1';
 640
 641         $subs["links"] = array($orig, $repl);
 642
 643         // Escape '<'s
 644         //$orig[] = '/<(?!\?plugin)|(?<!^)</m';
 645         //$repl[] = '~<';
 646
 647         // Convert footnote references.
 648         $orig[] = '/(?<=.)(?<!~)\[\s*(\d+)\s*\]/m';
 649         $repl[] = '#[|ftnt_ref_\\1]<sup>~[[\\1|#ftnt_\\1]~]</sup>';
 650
 651         // Convert old style emphases to HTML style emphasis.
 652         $orig[] = '/__(.*?)__/';
 653         $repl[] = '<strong>\\1</strong>';
 654         $orig[] = "/''(.*?)''/";
 655         $repl[] = '<em>\\1</em>';
 656
 657         // Escape nestled markup.
 658         $orig[] = '/^(?<=^|\s)[=_](?=\S)|(?<=\S)[=_*](?=\s|$)/m';
 659         $repl[] = '~\\0';
 660
 661         // in old markup headings only allowed at beginning of line
 662         $orig[] = '/!/';
 663         $repl[] = '~!';
 664
 665         $subs["inline"] = array($orig, $repl);
 666
 667         /*****************************************************************
 668          * Patterns which match block markup constructs which take
 669          * special handling...
 670          */
 671
 672         // Indented blocks
 673         $blockpats[] = '[ \t]+\S(?:.*\s*\n[ \t]+\S)*';
 674
 675         // Tables
 676         $blockpats[] = '\|(?:.*\n\|)*';
 677
 678         // List items
 679         $blockpats[] = '[#*;]*(?:[*#]|;.*?:)';
 680
 681         // Footnote definitions
 682         $blockpats[] = '\[\s*(\d+)\s*\]';
 683
 684         // Plugins
 685         $blockpats[] = '<\?plugin(?:-form)?\b.*\?>\s*$';
 686
 687         // Section Title
 688         $blockpats[] = '!{1,3}[^!]';
 689
 690         $block_re = ( '/\A((?:.|\n)*?)(^(?:'
 691                       . join("|", $blockpats)
 692                       . ').*$)\n?/m' );
 693
 694     }
 695
 696     if ($markup_type != "block") {
 697         list ($orig, $repl) = $subs[$markup_type];
 698         return preg_replace($orig, $repl, $text);
 699     }
 700     else {
 701         list ($orig, $repl) = $subs['inline'];
 702         $out = '';
 703         while (preg_match($block_re, $text, $m)) {
 704             $text = substr($text, strlen($m[0]));
 705             list (,$leading_text, $block) = $m;
 706             $suffix = "\n";
 707
 708             if (strchr(" \t", $block[0])) {
 709                 // Indented block
 710                 $prefix = "<pre>\n";
 711                 $suffix = "\n</pre>\n";
 712             }
 713             elseif ($block[0] == '|') {
 714                 // Old-style table
 715                 $prefix = "<?plugin OldStyleTable\n";
 716                 $suffix = "\n?>\n";
 717             }
 718             elseif (strchr("#*;", $block[0])) {
 719                 // Old-style list item
 720                 preg_match('/^([#*;]*)([*#]|;.*?:) */', $block, $m);
 721                 list (,$ind,$bullet) = $m;
 722                 $block = substr($block, strlen($m[0]));
 723
 724                 $indent = str_repeat('     ', strlen($ind));
 725                 if ($bullet[0] == ';') {
 726                     //$term = ltrim(substr($bullet, 1));
 727                     //return $indent . $term . "\n" . $indent . '     ';
 728                     $prefix = $ind . $bullet;
 729                 }
 730                 else
 731                     $prefix = $indent . $bullet . ' ';
 732             }
 733             elseif ($block[0] == '[') {
 734                 // Footnote definition
 735                 preg_match('/^\[\s*(\d+)\s*\]/', $block, $m);
 736                 $footnum = $m[1];
 737                 $block = substr($block, strlen($m[0]));
 738                 $prefix = "#[|ftnt_${footnum}]~[[${footnum}|#ftnt_ref_${footnum}]~] ";
 739             }
 740             elseif ($block[0] == '<') {
 741                 // Plugin.
 742                 // HACK: no inline markup...
 743                 $prefix = $block;
 744                 $block = '';
 745             }
 746             elseif ($block[0] == '!') {
 747                 // Section heading
 748                 preg_match('/^!{1,3}/', $block, $m);
 749                 $prefix = $m[0];
 750                 $block = substr($block, strlen($m[0]));
 751             }
 752             else {
 753                 // AAck!
 754                 assert(0);
 755             }
 756
 757             $out .= ( preg_replace($orig, $repl, $leading_text)
 758                       . $prefix
 759                       . preg_replace($orig, $repl, $block)
 760                       . $suffix );
 761         }
 762         return $out . preg_replace($orig, $repl, $text);
 763     }
 764 }
 765
 766
 767 /**
 768  * Expand tabs in string.
 769  *
 770  * Converts all tabs to (the appropriate number of) spaces.
 771  *
 772  * @param string $str
 773  * @param integer $tab_width
 774  * @return string
 775  */
 776 function expand_tabs($str, $tab_width = 8) {
 777     $split = split("\t", $str);
 778     $tail = array_pop($split);
 779     $expanded = "\n";
 780     foreach ($split as $hunk) {
 781         $expanded .= $hunk;
 782         $pos = strlen(strrchr($expanded, "\n")) - 1;
 783         $expanded .= str_repeat(" ", ($tab_width - $pos % $tab_width));
 784     }
 785     return substr($expanded, 1) . $tail;
 786 }
 787
 788 /**
 789  * Split WikiWords in page names.
 790  *
 791  * It has been deemed useful to split WikiWords (into "Wiki Words") in
 792  * places like page titles. This is rumored to help search engines
 793  * quite a bit.
 794  *
 795  * @param $page string The page name.
 796  *
 797  * @return string The split name.
 798  */
 799 function split_pagename ($page) {
 800
 801     if (preg_match("/\s/", $page))
 802         return $page;           // Already split --- don't split any more.
 803
 804     // FIXME: this algorithm is Anglo-centric.
 805     static $RE;
 806     if (!isset($RE)) {
 807         // This mess splits between a lower-case letter followed by
 808         // either an upper-case or a numeral; except that it wont
 809         // split the prefixes 'Mc', 'De', or 'Di' off of their tails.
 810         $RE[] = '/([[:lower:]])((?<!Mc|De|Di)[[:upper:]]|\d)/';
 811         // This the single-letter words 'I' and 'A' from any following
 812         // capitalized words.
 813         $sep = preg_quote(SUBPAGE_SEPARATOR, '/');
 814         $RE[] = "/(?<= |${sep}|^)([AI])([[:upper:]][[:lower:]])/";
 815         // Split numerals from following letters.
 816         $RE[] = '/(\d)([[:alpha:]])/';
 817
 818         foreach ($RE as $key => $val)
 819             $RE[$key] = pcre_fix_posix_classes($val);
 820     }
 821
 822     foreach ($RE as $regexp) {
 823         $page = preg_replace($regexp, '\\1 \\2', $page);
 824     }
 825     return $page;
 826 }
 827
 828 function NoSuchRevision (&$request, $page, $version) {
 829     $html = HTML(HTML::h2(_("Revision Not Found")),
 830                  HTML::p(fmt("I'm sorry.  Version %d of %s is not in the database.",
 831                              $version, WikiLink($page, 'auto'))));
 832     include_once('lib/Template.php');
 833     GeneratePage($html, _("Bad Version"), $page->getCurrentRevision());
 834     $request->finish();
 835 }
 836
 837
 838 /**
 839  * Get time offset for local time zone.
 840  *
 841  * @param $time time_t Get offset for this time. Default: now.
 842  * @param $no_colon boolean Don't put colon between hours and minutes.
 843  * @return string Offset as a string in the format +HH:MM.
 844  */
 845 function TimezoneOffset ($time = false, $no_colon = false) {
 846     if ($time === false)
 847         $time = time();
 848     $secs = date('Z', $time);
 849
 850     if ($secs < 0) {
 851         $sign = '-';
 852         $secs = -$secs;
 853     }
 854     else {
 855         $sign = '+';
 856     }
 857     $colon = $no_colon ? '' : ':';
 858     $mins = intval(($secs + 30) / 60);
 859     return sprintf("%s%02d%s%02d",
 860                    $sign, $mins / 60, $colon, $mins % 60);
 861 }
 862
 863
 864 /**
 865  * Format time in ISO-8601 format.
 866  *
 867  * @param $time time_t Time.  Default: now.
 868  * @return string Date and time in ISO-8601 format.
 869  */
 870 function Iso8601DateTime ($time = false) {
 871     if ($time === false)
 872         $time = time();
 873     $tzoff = TimezoneOffset($time);
 874     $date  = date('Y-m-d', $time);
 875     $time  = date('H:i:s', $time);
 876     return $date . 'T' . $time . $tzoff;
 877 }
 878
 879 /**
 880  * Format time in RFC-2822 format.
 881  *
 882  * @param $time time_t Time.  Default: now.
 883  * @return string Date and time in RFC-2822 format.
 884  */
 885 function Rfc2822DateTime ($time = false) {
 886     if ($time === false)
 887         $time = time();
 888     return date('D, j M Y H:i:s ', $time) . TimezoneOffset($time, 'no colon');
 889 }
 890
 891 /**
 892  * Format time in RFC-1123 format.
 893  *
 894  * @param $time time_t Time.  Default: now.
 895  * @return string Date and time in RFC-1123 format.
 896  */
 897 function Rfc1123DateTime ($time = false) {
 898     if ($time === false)
 899         $time = time();
 900     return gmdate('D, d M Y H:i:s \G\M\T', $time);
 901 }
 902
 903 /** Parse date in RFC-1123 format.
 904  *
 905  * According to RFC 1123 we must accept dates in the following
 906  * formats:
 907  *
 908  *   Sun, 06 Nov 1994 08:49:37 GMT  ; RFC 822, updated by RFC 1123
 909  *   Sunday, 06-Nov-94 08:49:37 GMT ; RFC 850, obsoleted by RFC 1036
 910  *   Sun Nov  6 08:49:37 1994       ; ANSI C's asctime() format
 911  *
 912  * (Though we're only allowed to generate dates in the first format.)
 913  */
 914 function ParseRfc1123DateTime ($timestr) {
 915     $timestr = trim($timestr);
 916     if (preg_match('/^ \w{3},\s* (\d{1,2}) \s* (\w{3}) \s* (\d{4}) \s*'
 917                    .'(\d\d):(\d\d):(\d\d) \s* GMT $/ix',
 918                    $timestr, $m)) {
 919         list(, $mday, $mon, $year, $hh, $mm, $ss) = $m;
 920     }
 921     elseif (preg_match('/^ \w+,\s* (\d{1,2})-(\w{3})-(\d{2}|\d{4}) \s*'
 922                        .'(\d\d):(\d\d):(\d\d) \s* GMT $/ix',
 923                        $timestr, $m)) {
 924         list(, $mday, $mon, $year, $hh, $mm, $ss) = $m;
 925         if ($year < 70) $year += 2000;
 926         elseif ($year < 100) $year += 1900;
 927     }
 928     elseif (preg_match('/^\w+\s* (\w{3}) \s* (\d{1,2}) \s*'
 929                        .'(\d\d):(\d\d):(\d\d) \s* (\d{4})$/ix',
 930                        $timestr, $m)) {
 931         list(, $mon, $mday, $hh, $mm, $ss, $year) = $m;
 932     }
 933     else {
 934         // Parse failed.
 935         return false;
 936     }
 937
 938     $time = strtotime("$mday $mon $year ${hh}:${mm}:${ss} GMT");
 939     if ($time == -1)
 940         return false;           // failed
 941     return $time;
 942 }
 943
 944 /**
 945  * Format time to standard 'ctime' format.
 946  *
 947  * @param $time time_t Time.  Default: now.
 948  * @return string Date and time.
 949  */
 950 function CTime ($time = false)
 951 {
 952     if ($time === false)
 953         $time = time();
 954     return date("D M j H:i:s Y", $time);
 955 }
 956
 957
 958
 959 /**
 960  * Internationalized printf.
 961  *
 962  * This is essentially the same as PHP's built-in printf
 963  * with the following exceptions:
 964  * <ol>
 965  * <li> It passes the format string through gettext().
 966  * <li> It supports the argument reordering extensions.
 967  * </ol>
 968  *
 969  * Example:
 970  *
 971  * In php code, use:
 972  * <pre>
 973  *    __printf("Differences between versions %s and %s of %s",
 974  *             $new_link, $old_link, $page_link);
 975  * </pre>
 976  *
 977  * Then in locale/po/de.po, one can reorder the printf arguments:
 978  *
 979  * <pre>
 980  *    msgid "Differences between %s and %s of %s."
 981  *    msgstr "Der Unterschiedsergebnis von %3$s, zwischen %1$s und %2$s."
 982  * </pre>
 983  *
 984  * (Note that while PHP tries to expand $vars within double-quotes,
 985  * the values in msgstr undergo no such expansion, so the '$'s
 986  * okay...)
 987  *
 988  * One shouldn't use reordered arguments in the default format string.
 989  * Backslashes in the default string would be necessary to escape the
 990  * '$'s, and they'll cause all kinds of trouble....
 991  */
 992 function __printf ($fmt) {
 993     $args = func_get_args();
 994     array_shift($args);
 995     echo __vsprintf($fmt, $args);
 996 }
 997
 998 /**
 999  * Internationalized sprintf.
1000  *
1001  * This is essentially the same as PHP's built-in printf with the
1002  * following exceptions:
1003  *
1004  * <ol>
1005  * <li> It passes the format string through gettext().
1006  * <li> It supports the argument reordering extensions.
1007  * </ol>
1008  *
1009  * @see __printf
1010  */
1011 function __sprintf ($fmt) {
1012     $args = func_get_args();
1013     array_shift($args);
1014     return __vsprintf($fmt, $args);
1015 }
1016
1017 /**
1018  * Internationalized vsprintf.
1019  *
1020  * This is essentially the same as PHP's built-in printf with the
1021  * following exceptions:
1022  *
1023  * <ol>
1024  * <li> It passes the format string through gettext().
1025  * <li> It supports the argument reordering extensions.
1026  * </ol>
1027  *
1028  * @see __printf
1029  */
1030 function __vsprintf ($fmt, $args) {
1031     $fmt = gettext($fmt);
1032     // PHP's sprintf doesn't support variable with specifiers,
1033     // like sprintf("%*s", 10, "x"); --- so we won't either.
1034
1035     if (preg_match_all('/(?<!%)%(\d+)\$/x', $fmt, $m)) {
1036         // Format string has '%2$s' style argument reordering.
1037         // PHP doesn't support this.
1038         if (preg_match('/(?<!%)%[- ]?\d*[^- \d$]/x', $fmt))
1039             // literal variable name substitution only to keep locale
1040             // strings uncluttered
1041             trigger_error(sprintf(_("Can't mix '%s' with '%s' type format strings"),
1042                                   '%1\$s','%s'), E_USER_WARNING); //php+locale error
1043
1044         $fmt = preg_replace('/(?<!%)%\d+\$/x', '%', $fmt);
1045         $newargs = array();
1046
1047         // Reorder arguments appropriately.
1048         foreach($m[1] as $argnum) {
1049             if ($argnum < 1 || $argnum > count($args))
1050                 trigger_error(sprintf(_("%s: argument index out of range"),
1051                                       $argnum), E_USER_WARNING);
1052             $newargs[] = $args[$argnum - 1];
1053         }
1054         $args = $newargs;
1055     }
1056
1057     // Not all PHP's have vsprintf, so...
1058     array_unshift($args, $fmt);
1059     return call_user_func_array('sprintf', $args);
1060 }
1061
1062
1063 class fileSet {
1064     /**
1065      * Build an array in $this->_fileList of files from $dirname.
1066      * Subdirectories are not traversed.
1067      *
1068      * (This was a function LoadDir in lib/loadsave.php)
1069      * See also http://www.php.net/manual/en/function.readdir.php
1070      */
1071     function getFiles() {
1072         return $this->_fileList;
1073     }
1074
1075     function _filenameSelector($filename) {
1076         if (! $this->_pattern)
1077             return true;
1078         else {
1079             return glob_match ($this->_pattern, $filename, $this->_case);
1080         }
1081     }
1082
1083     function fileSet($directory, $filepattern = false) {
1084         $this->_fileList = array();
1085         $this->_pattern = $filepattern;
1086         $this->_case = !isWindows();
1087         $this->_pathsep = '/';
1088
1089         if (empty($directory)) {
1090             trigger_error(sprintf(_("%s is empty."), 'directoryname'),
1091                           E_USER_NOTICE);
1092             return; // early return
1093         }
1094
1095         @ $dir_handle = opendir($dir=$directory);
1096         if (empty($dir_handle)) {
1097             trigger_error(sprintf(_("Unable to open directory '%s' for reading"),
1098                                   $dir), E_USER_NOTICE);
1099             return; // early return
1100         }
1101
1102         while ($filename = readdir($dir_handle)) {
1103             if ($filename[0] == '.' || filetype($dir . $this->_pathsep . $filename) != 'file')
1104                 continue;
1105             if ($this->_filenameSelector($filename)) {
1106                 array_push($this->_fileList, "$filename");
1107                 //trigger_error(sprintf(_("found file %s"), $filename),
1108                 //                      E_USER_NOTICE); //debugging
1109             }
1110         }
1111         closedir($dir_handle);
1112     }
1113 };
1114
1115 // File globbing
1116
1117 // expands a list containing regex's to its matching entries
1118 class ListRegexExpand {
1119     var $match, $list, $index, $case_sensitive;
1120     function ListRegexExpand (&$list, $match, $case_sensitive = true) {
1121         $this->match = str_replace('/','\/',$match);
1122         $this->list = &$list;
1123         $this->case_sensitive = $case_sensitive;
1124     }
1125     function listMatchCallback ($item, $key) {
1126         if (preg_match('/' . $this->match . ($this->case_sensitive ? '/' : '/i'), $item)) {
1127             unset($this->list[$this->index]);
1128             $this->list[] = $item;
1129         }
1130     }
1131     function expandRegex ($index, &$pages) {
1132         $this->index = $index;
1133         array_walk($pages, array($this, 'listMatchCallback'));
1134         return $this->list;
1135     }
1136 }
1137
1138 // convert fileglob to regex style
1139 function glob_to_pcre ($glob) {
1140     $re = preg_replace('/\./', '\\.', $glob);
1141     $re = preg_replace(array('/\*/','/\?/'), array('.*','.'), $glob);
1142     if (!preg_match('/^[\?\*]/',$glob))
1143         $re = '^' . $re;
1144     if (!preg_match('/[\?\*]$/',$glob))
1145         $re = $re . '$';
1146     return $re;
1147 }
1148
1149 function glob_match ($glob, $against, $case_sensitive = true) {
1150     return preg_match('/' . glob_to_pcre($glob) . ($case_sensitive ? '/' : '/i'), $against);
1151 }
1152
1153 function explodeList($input, $allnames, $glob_style = true, $case_sensitive = true) {
1154     $list = explode(',',$input);
1155     // expand wildcards from list of $allnames
1156     if (preg_match('/[\?\*]/',$input)) {
1157         for ($i = 0; $i < sizeof($list); $i++) {
1158             $f = $list[$i];
1159             if (preg_match('/[\?\*]/',$f)) {
1160                 reset($allnames);
1161                 $expand = new ListRegexExpand(&$list, $glob_style ? glob_to_pcre($f) : $f, $case_sensitive);
1162                 $expand->expandRegex($i, &$allnames);
1163             }
1164         }
1165     }
1166     return $list;
1167 }
1168
1169 // echo implode(":",explodeList("Test*",array("xx","Test1","Test2")));
1170
1171 function explodePageList($input, $perm = false) {
1172     // expand wildcards from list of all pages
1173     if (preg_match('/[\?\*]/',$input)) {
1174         $dbi = $GLOBALS['request']->_dbi;
1175         $allPagehandles = $dbi->getAllPages($perm);
1176         while ($pagehandle = $allPagehandles->next()) {
1177             $allPages[] = $pagehandle->getName();
1178         }
1179         return explodeList($input, &$allPages);
1180     } else {
1181         return explode(',',$input);
1182     }
1183 }
1184
1185 // Class introspections
1186
1187 /** Determine whether object is of a specified type.
1188  *
1189  * @param $object object An object.
1190  * @param $class string Class name.
1191  * @return bool True iff $object is a $class
1192  * or a sub-type of $class.
1193  */
1194 function isa ($object, $class)
1195 {
1196     $lclass = strtolower($class);
1197
1198     return is_object($object)
1199         && ( get_class($object) == strtolower($lclass)
1200              || is_subclass_of($object, $lclass) );
1201 }
1202
1203 /** Determine whether (possible) object has method.
1204  *
1205  * @param $object mixed Object
1206  * @param $method string Method name
1207  * @return bool True iff $object is an object with has method $method.
1208  */
1209 function can ($object, $method)
1210 {
1211     return is_object($object) && method_exists($object, strtolower($method));
1212 }
1213
1214 /** Hash a value.
1215  *
1216  * This is used for generating ETags.
1217  */
1218 function hash ($x) {
1219     if (is_scalar($x)) {
1220         return $x;
1221     }
1222     elseif (is_array($x)) {
1223         ksort($x);
1224         return md5(serialize($x));
1225     }
1226     elseif (is_object($x)) {
1227         return $x->hash();
1228     }
1229     trigger_error("Can't hash $x", E_USER_ERROR);
1230 }
1231
1232
1233 /**
1234  * Seed the random number generator.
1235  *
1236  * better_srand() ensures the randomizer is seeded only once.
1237  *
1238  * How random do you want it? See:
1239  * http://www.php.net/manual/en/function.srand.php
1240  * http://www.php.net/manual/en/function.mt-srand.php
1241  */
1242 function better_srand($seed = '') {
1243     static $wascalled = FALSE;
1244     if (!$wascalled) {
1245         $seed = $seed === '' ? (double) microtime() * 1000000 : $seed;
1246         srand($seed);
1247         $wascalled = TRUE;
1248         //trigger_error("new random seed", E_USER_NOTICE); //debugging
1249     }
1250 }
1251
1252 /**
1253  * Recursively count all non-empty elements
1254  * in array of any dimension or mixed - i.e.
1255  * array('1' => 2, '2' => array('1' => 3, '2' => 4))
1256  * See http://www.php.net/manual/en/function.count.php
1257  */
1258 function count_all($arg) {
1259     // skip if argument is empty
1260     if ($arg) {
1261         //print_r($arg); //debugging
1262         $count = 0;
1263         // not an array, return 1 (base case)
1264         if(!is_array($arg))
1265             return 1;
1266         // else call recursively for all elements $arg
1267         foreach($arg as $key => $val)
1268             $count += count_all($val);
1269         return $count;
1270     }
1271 }
1272
1273 function isSubPage($pagename) {
1274     return (strstr($pagename, SUBPAGE_SEPARATOR));
1275 }
1276
1277 function subPageSlice($pagename, $pos) {
1278     $pages = explode(SUBPAGE_SEPARATOR,$pagename);
1279     $pages = array_slice($pages,$pos,1);
1280     return $pages[0];
1281 }
1282
1283 // $Log: not supported by cvs2svn $
1284 // Revision 1.136  2003/02/18 21:52:07  dairiki
1285 // Fix so that one can still link to wiki pages with # in their names.
1286 // (This was made difficult by the introduction of named tags, since
1287 // '[Page #1]' is now a link to anchor '1' in page 'Page'.
1288 //
1289 // Now the ~ escape for page names should work: [Page ~#1].
1290 //
1291 // Revision 1.135  2003/02/18 19:17:04  dairiki
1292 // split_pagename():
1293 //     Bug fix. 'ThisIsABug' was being split to 'This IsA Bug'.
1294 //     Cleanup up subpage splitting code.
1295 //
1296 // Revision 1.134  2003/02/16 19:44:20  dairiki
1297 // New function hash().  This is a helper, primarily for generating
1298 // HTTP ETags.
1299 //
1300 // Revision 1.133  2003/02/16 04:50:09  dairiki
1301 // New functions:
1302 // Rfc1123DateTime(), ParseRfc1123DateTime()
1303 // for converting unix timestamps to and from strings.
1304 //
1305 // These functions produce and grok the time strings
1306 // in the format specified by RFC 2616 for use in HTTP headers
1307 // (like Last-Modified).
1308 //
1309 // Revision 1.132  2003/01/04 22:19:43  carstenklapp
1310 // Bugfix UnfoldSubpages: "Undefined offset: 1" error when plugin invoked
1311 // on a page with no subpages (explodeList(): array 0-based, sizeof 1-based).
1312 //
1313
1314 // (c-file-style: "gnu")
1315 // Local Variables:
1316 // mode: php
1317 // tab-width: 8
1318 // c-basic-offset: 4
1319 // c-hanging-comment-ender-p: nil
1320 // indent-tabs-mode: nil
1321 // End:
1322 ?>