1 <?php rcs_id('$Id: transform.php,v 1.14 2001-02-10 22:15:08 dairiki Exp $');
3 define('WT_TOKENIZER', 1);
4 define('WT_SIMPLE_MARKUP', 2);
5 define('WT_MODE_MARKUP', 3);
10 function WikiTransform() -- init
12 function register($type, $function)
13 Registers transformer functions
14 This should be done *before* calling do_transform
16 $type ... one of WT_TOKENIZER, WT_SIMPLE_MARKUP, WT_MODE_MARKUP
17 Currently on WT_MODE_MARKUP has a special meaning.
18 If one WT_MODE_MARKUP really sets the html mode, then
19 all successive WT_MODE_MARKUP functions are skipped
21 $function ... function name
23 function SetHTMLMode($tag, $tagtype, $level)
24 Wiki HTML output can, at any given time, be in only one mode.
25 It will be something like Unordered List, Preformatted Text,
26 plain text etc. When we change modes we have to issue close tags
27 for one mode and start tags for another.
28 SetHTMLMode takes care of this.
30 $tag ... HTML tag to insert
31 $tagtype ... ZERO_LEVEL - close all open tags before inserting $tag
32 NESTED_LEVEL - close tags until depths match
33 $level ... nesting level (depth) of $tag
34 nesting is arbitrary limited to 10 levels
36 function do_transform($html, $content)
37 contains main-loop and calls transformer functions
39 $html ... HTML header (if needed, otherwise '')
40 $content ... wiki markup as array of lines
44 // public variables (only meaningful during do_transform)
45 var $linenumber; // current linenumber
46 var $replacements; // storage for tokenized strings of current line
47 var $tokencounter; // counter of $replacements array
50 var $content; // wiki markup, array of lines
51 var $mode_set; // stores if a HTML mode for this line has been set
52 var $trfrm_func; // array of registered functions
53 var $stack; // stack for SetHTMLMode (keeping track of open tags)
56 function WikiTransform()
58 $this->trfrm_func = array();
59 $this->stack = new Stack;
62 // register transformation functions
63 function register($type, $function)
65 $this->trfrm_func[] = array ($type, $function);
68 // sets current mode like list, preformatted text, plain text, ...
69 // takes care of closing (open) tags
70 function SetHTMLMode($tag, $tagtype, $level)
72 $this->mode_set = 1; // in order to prevent other mode markup
76 if ($tagtype == ZERO_LEVEL) {
77 // empty the stack until $level == 0;
78 if ($tag == $this->stack->top()) {
79 return; // same tag? -> nothing to do
81 while ($this->stack->cnt() > 0) {
82 $closetag = $this->stack->pop();
83 $retvar .= "</$closetag>\n";
87 $retvar .= "<$tag>\n";
88 $this->stack->push($tag);
92 } elseif ($tagtype == NESTED_LEVEL) {
93 if ($level <= $this->stack->cnt()) {
94 // $tag has fewer nestings (old: tabs) than stack,
95 // reduce stack to that tab count
96 while ($this->stack->cnt() > $level) {
97 $closetag = $this->stack->pop();
98 if ($closetag == false) {
99 //echo "bounds error in tag stack";
102 $retvar .= "</$closetag>\n";
105 // if list type isn't the same,
106 // back up one more and push new tag
107 if ($tag != $this->stack->top()) {
108 $closetag = $this->stack->pop();
109 $retvar .= "</$closetag><$tag>\n";
110 $this->stack->push($tag);
113 } else { // $level > $this->stack->cnt()
114 // we add the diff to the stack
115 // stack might be zero
116 while ($this->stack->cnt() < $level) {
117 $retvar .= "<$tag>\n";
118 $this->stack->push($tag);
119 if ($this->stack->cnt() > 10) {
120 // arbitrarily limit tag nesting
121 ExitWiki(gettext ("Stack bounds exceeded in SetHTMLOutputMode"));
126 } else { // unknown $tagtype
127 ExitWiki ("Passed bad tag type value in SetHTMLOutputMode");
135 // work horse and main loop
136 // this function does the transform from wiki markup to HTML
137 function do_transform($html, $content)
139 global $FieldSeparator;
141 $this->content = $content;
143 // Loop over all lines of the page and apply transformation rules
144 $numlines = count($this->content);
145 for ($lnum = 0; $lnum < $numlines; $lnum++)
147 $this->tokencounter = 0;
148 $this->replacements = array();
149 $this->linenumber = $lnum;
150 $line = $this->content[$lnum];
152 // blank lines clear the current mode
153 if (!strlen($line) || $line == "\r") {
154 $html .= $this->SetHTMLMode('', ZERO_LEVEL, 0);
160 // main loop applying all registered functions
161 // tokenizers, markup, html mode, ...
162 // functions are executed in order of registering
163 for ($func = 0; $func < count($this->trfrm_func); $func++) {
164 // if HTMLmode is already set then skip all following
165 // WT_MODE_MARKUP functions
166 if ($this->mode_set &&
167 ($this->trfrm_func[$func][0] == WT_MODE_MARKUP)) {
170 // call registered function
171 $line = $this->trfrm_func[$func][1]($line, $this);
174 // Replace tokens ($replacements was filled by wtt_* functions)
175 for ($i = 0; $i < $this->tokencounter; $i++) {
176 $line = str_replace($FieldSeparator.$FieldSeparator.$i.$FieldSeparator, $this->replacements[$i], $line);
179 $html .= $line . "\n";
182 $html .= $this->SetHTMLMode('', ZERO_LEVEL, 0);
186 // end do_transfrom()
189 // end class WikiTransform
192 //////////////////////////////////////////////////////////
194 $transform = new WikiTransform;
196 // register functions
197 // functions are applied in order of registering
199 $transform->register(WT_TOKENIZER, 'wtt_bracketlinks');
200 $transform->register(WT_TOKENIZER, 'wtt_urls');
201 if ($InterWikiLinking) {
202 $transform->register(WT_TOKENIZER, 'wtt_interwikilinks');
204 $transform->register(WT_TOKENIZER, 'wtt_bumpylinks');
206 $transform->register(WT_SIMPLE_MARKUP, 'wtm_htmlchars');
207 $transform->register(WT_SIMPLE_MARKUP, 'wtm_linebreak');
208 $transform->register(WT_SIMPLE_MARKUP, 'wtm_bold_italics');
209 $transform->register(WT_SIMPLE_MARKUP, 'wtm_title_search');
210 $transform->register(WT_SIMPLE_MARKUP, 'wtm_fulltext_search');
211 $transform->register(WT_SIMPLE_MARKUP, 'wtm_mostpopular');
213 $transform->register(WT_MODE_MARKUP, 'wtm_list_ul');
214 $transform->register(WT_MODE_MARKUP, 'wtm_list_ol');
215 $transform->register(WT_MODE_MARKUP, 'wtm_list_dl');
216 $transform->register(WT_MODE_MARKUP, 'wtm_preformatted');
217 $transform->register(WT_MODE_MARKUP, 'wtm_headings');
218 $transform->register(WT_MODE_MARKUP, 'wtm_hr');
219 $transform->register(WT_MODE_MARKUP, 'wtm_paragraph');
221 $html = $transform->do_transform($html, $pagehash['content']);
225 Requirements for functions registered to WikiTransform:
227 Signature: function wtm_xxxx($line, &$transform)
229 $line ... current line containing wiki markup
230 (Note: it may already contain HTML from other transform functions)
231 &$transform ... WikiTransform object -- public variables of this
232 object and their use see above.
234 Functions have to return $line (doesn't matter if modified or not)
235 All conversion should take place inside $line.
237 Tokenizer functions should use $transform->replacements to store
238 the replacement strings. Also, they have to keep track of
239 $transform->tokencounter. See functions below. Back substitution
240 of tokenized strings is done by do_transform().
245 //////////////////////////////////////////////////////////
246 // Tokenizer functions
248 // helper function which does actual tokenizing and is
249 // called by other wtt_* functions
250 function wt_tokenize($str, $pattern, &$orig, &$ntokens) {
251 global $FieldSeparator;
252 // Find any strings in $str that match $pattern and
253 // store them in $orig, replacing them with tokens
254 // starting at number $ntokens - returns tokenized string
256 while (preg_match("/^(.*?)($pattern)/", $str, $matches)) {
257 $linktoken = $FieldSeparator . $FieldSeparator . ($ntokens++) . $FieldSeparator;
258 $new .= $matches[1] . $linktoken;
259 $orig[] = $matches[2];
260 $str = substr($str, strlen($matches[0]));
267 // New linking scheme: links are in brackets. This will
268 // emulate typical HTML linking as well as Wiki linking.
269 function wtt_bracketlinks($line, &$trfrm)
271 static $footnotes = array();
274 $n = $ntok = $trfrm->tokencounter;
275 $line = wt_tokenize($line, '\[\[', $trfrm->replacements, $ntok);
277 $trfrm->replacements[$n++] = '[';
280 // match anything else between brackets
281 $line = wt_tokenize($line, '\[.+?\]', $trfrm->replacements, $ntok);
283 $link = ParseAndLink($trfrm->replacements[$n]);
284 if (strpos($link['type'], 'footnote') === false) {
285 $trfrm->replacements[$n] = $link['link'];
287 $ftnt = $link['link'];
288 if (isset($footnotes[$ftnt])) {
289 $trfrm->replacements[$n] = "<A NAME=\"footnote-$ftnt\"></A><A HREF=\"#footnote-rev-$ftnt\">[$ftnt]</A>";
290 } else { // first encounter of [x]
291 $trfrm->replacements[$n] = "<A NAME=\"footnote-rev-$ftnt\"></A><SUP><A HREF=\"#footnote-$ftnt\">[$ftnt]</A></SUP>";
292 $footnotes[$ftnt] = 1;
298 $trfrm->tokencounter = $ntok;
303 // replace all URL's with tokens, so we don't confuse them
304 // with Wiki words later. Wiki words in URL's break things.
305 // URLs preceeded by a '!' are not linked
306 function wtt_urls($line, &$trfrm)
308 global $AllowedProtocols;
310 $n = $ntok = $trfrm->tokencounter;
311 $line = wt_tokenize($line, "!?\b($AllowedProtocols):[^\s<>\[\]\"'()]*[^\s<>\[\]\"'(),.?]", $trfrm->replacements, $ntok);
313 if($trfrm->replacements[$n][0] == '!')
314 $trfrm->replacements[$n] = substr($trfrm->replacements[$n], 1);
316 $trfrm->replacements[$n] = LinkURL($trfrm->replacements[$n]);
320 $trfrm->tokencounter = $ntok;
326 // Link InterWiki links
327 // These can be protected by a '!' like Wiki words.
328 function wtt_interwikilinks($line, &$trfrm)
330 global $InterWikiLinkRegexp, $WikiNameRegexp;
332 $n = $ntok = $trfrm->tokencounter;
333 $line = wt_tokenize($line, "!?(?<![A-Za-z0-9])$InterWikiLinkRegexp:$WikiNameRegexp", $trfrm->replacements, $ntok);
335 $old = $trfrm->replacements[$n];
336 if ($old[0] == '!') {
337 $trfrm->replacements[$n] = substr($old,1);
339 $trfrm->replacements[$n] = LinkInterWikiLink($old);
344 $trfrm->tokencounter = $ntok;
349 // Link Wiki words (BumpyText)
350 // Wikiwords preceeded by a '!' are not linked
351 function wtt_bumpylinks($line, &$trfrm)
353 global $WikiNameRegexp, $dbi;
355 $n = $ntok = $trfrm->tokencounter;
356 $line = wt_tokenize($line, "!?$WikiNameRegexp", $trfrm->replacements, $ntok);
358 $old = $trfrm->replacements[$n];
359 if ($old[0] == '!') {
360 $trfrm->replacements[$n] = substr($old,1);
361 } elseif (IsWikiPage($dbi, $old)) {
362 $trfrm->replacements[$n] = LinkExistingWikiWord($old);
364 $trfrm->replacements[$n] = LinkUnknownWikiWord($old);
369 $trfrm->tokencounter = $ntok;
373 // end of tokenizer functions
374 //////////////////////////////////////////////////////////
377 //////////////////////////////////////////////////////////
378 // basic simple markup functions
380 // escape HTML metachars
381 function wtm_htmlchars($line, &$transformer)
383 $line = str_replace('&', '&', $line);
384 $line = str_replace('>', '>', $line);
385 $line = str_replace('<', '<', $line);
390 // %%% are linebreaks
391 function wtm_linebreak($line, &$transformer) {
392 return str_replace('%%%', '<br>', $line);
396 function wtm_bold_italics($line, &$transformer) {
397 $line = preg_replace('|(__)(.*?)(__)|', '<strong>\2</strong>', $line);
398 $line = preg_replace("|('')(.*?)('')|", '<em>\2</em>', $line);
404 //////////////////////////////////////////////////////////
405 // some tokens to be replaced by (dynamic) content
407 // wiki token: title search dialog
408 function wtm_title_search($line, &$transformer) {
409 if (strpos($line, '%%Search%%') !== false) {
410 $html = LinkPhpwikiURL(
411 "phpwiki:?action=search&searchterm=()&searchtype=title",
414 $line = str_replace('%%Search%%', $html, $line);
419 // wiki token: fulltext search dialog
420 function wtm_fulltext_search($line, &$transformer) {
421 if (strpos($line, '%%Fullsearch%%') !== false) {
422 $html = LinkPhpwikiURL(
423 "phpwiki:?action=search&searchterm=()&searchtype=full",
426 $line = str_replace('%%Fullsearch%%', $html, $line);
431 // wiki token: mostpopular list
432 function wtm_mostpopular($line, &$transformer) {
433 global $ScriptUrl, $dbi;
434 if (strpos($line, '%%Mostpopular%%') !== false) {
435 $query = InitMostPopular($dbi, MOST_POPULAR_LIST_LENGTH);
437 while ($qhash = MostPopularNextMatch($dbi, $query)) {
438 $html .= "<DD>$qhash[hits] ... " . LinkExistingWikiWord($qhash['pagename']) . "\n";
441 $line = str_replace('%%Mostpopular%%', $html, $line);
447 //////////////////////////////////////////////////////////
448 // mode markup functions
451 // tabless markup for unordered, ordered, and dictionary lists
452 // ul/ol list types can be mixed, so we only look at the last
453 // character. Changes e.g. from "**#*" to "###*" go unnoticed.
454 // and wouldn't make a difference to the HTML layout anyway.
456 // unordered lists <UL>: "*"
457 // has to be registereed before list OL
458 function wtm_list_ul($line, &$trfrm) {
459 if (preg_match("/^([#*]*\*)[^#]/", $line, $matches)) {
460 $numtabs = strlen($matches[1]);
461 $line = preg_replace("/^([#*]*\*)/", '', $line);
462 $html = $trfrm->SetHTMLMode('ul', NESTED_LEVEL, $numtabs) . '<li>';
463 $line = $html . $line;
468 // ordered lists <OL>: "#"
469 function wtm_list_ol($line, &$trfrm) {
470 if (preg_match("/^([#*]*\#)/", $line, $matches)) {
471 $numtabs = strlen($matches[1]);
472 $line = preg_replace("/^([#*]*\#)/", "", $line);
473 $html = $trfrm->SetHTMLMode('ol', NESTED_LEVEL, $numtabs) . '<li>';
474 $line = $html . $line;
480 // definition lists <DL>: ";text:text"
481 function wtm_list_dl($line, &$trfrm) {
482 if (preg_match("/(^;+)(.*?):(.*$)/", $line, $matches)) {
483 $numtabs = strlen($matches[1]);
484 $line = $trfrm->SetHTMLMode('dl', NESTED_LEVEL, $numtabs);
485 if(trim($matches[2]))
486 $line = '<dt>' . $matches[2];
487 $line .= '<dd>' . $matches[3];
492 // mode: preformatted text, i.e. <pre>
493 function wtm_preformatted($line, &$trfrm) {
494 if (preg_match("/^\s+/", $line)) {
495 $line = $trfrm->SetHTMLMode('pre', ZERO_LEVEL, 0) . $line;
500 // mode: headings, i.e. <h1>, <h2>, <h3>
501 // lines starting with !,!!,!!! are headings
502 function wtm_headings($line, &$trfrm) {
503 if (preg_match("/^(!{1,3})[^!]/", $line, $whichheading)) {
504 if($whichheading[1] == '!') $heading = 'h3';
505 elseif($whichheading[1] == '!!') $heading = 'h2';
506 elseif($whichheading[1] == '!!!') $heading = 'h1';
507 $line = preg_replace("/^!+/", '', $line);
508 $line = $trfrm->SetHTMLMode($heading, ZERO_LEVEL, 0) . $line;
513 // four or more dashes to <hr>
514 // Note this is of type WT_MODE_MARKUP becuase <hr>'s aren't
515 // allowed within <p>'s. (e.g. "<p><hr></p>" is not valid HTML.)
516 function wtm_hr($line, &$trfrm) {
517 if (preg_match('/^-{4,}(.*)$/', $line, $m)) {
518 $line = $trfrm->SetHTMLMode('', ZERO_LEVEL, 0) . '<hr>';
520 $line .= $trfrm->SetHTMLMode('p', ZERO_LEVEL, 0) . $m[1];
525 // default mode: simple text paragraph
526 function wtm_paragraph($line, &$trfrm) {
527 $line = $trfrm->SetHTMLMode('p', ZERO_LEVEL, 0) . $line;