1 <?php rcs_id('$Id: transform.php,v 1.17 2001-02-15 21:37:08 dairiki Exp $');
3 define('WT_SIMPLE_MARKUP', 0);
4 define('WT_TOKENIZER', 1);
5 define('WT_MODE_MARKUP', 2);
7 define("ZERO_LEVEL", 0);
8 define("NESTED_LEVEL", 1);
13 function WikiTransform() -- init
15 function register($type, $function, [$regexp])
16 Registers transformer functions
17 This should be done *before* calling do_transform
21 If one WT_MODE_MARKUP really sets the html mode, then
22 all successive WT_MODE_MARKUP functions are skipped
24 The transformer function is called once for each match
25 of the $regexp in the line. The matched values are tokenized
26 to protect them from further transformation.
28 $function: function name
30 $regexp: Required for WT_TOKENIZER functions. Optional for others.
31 If given, the transformer function will only be called if the
32 line matches the $regexp.
34 function SetHTMLMode($tag, $tagtype, $level)
35 Wiki HTML output can, at any given time, be in only one mode.
36 It will be something like Unordered List, Preformatted Text,
37 plain text etc. When we change modes we have to issue close tags
38 for one mode and start tags for another.
39 SetHTMLMode takes care of this.
41 $tag ... HTML tag to insert
42 If $tag is an array, first element give tag, second element
43 is a hash containing arguments for the tag.
44 $tagtype ... ZERO_LEVEL - close all open tags before inserting $tag
45 NESTED_LEVEL - close tags until depths match
46 $level ... nesting level (depth) of $tag
47 nesting is arbitrary limited to 10 levels
49 function do_transform($html, $content)
50 contains main-loop and calls transformer functions
52 $html ... HTML header (if needed, otherwise '')
53 $content ... wiki markup as array of lines
57 // public variables (only meaningful during do_transform)
58 var $linenumber; // current linenumber
59 var $replacements; // storage for tokenized strings of current line
60 var $user_data; // can be used by the transformer functions
61 // to store miscellaneous data.
64 var $content; // wiki markup, array of lines
65 var $mode_set; // stores if a HTML mode for this line has been set
66 var $trfrm_func; // array of registered functions
67 var $stack; // stack for SetHTMLMode (keeping track of open tags)
70 function WikiTransform()
72 $this->trfrm_func = array();
73 $this->stack = new Stack;
76 // register transformation functions
77 function register($type, $function, $regexp = false)
79 $this->trfrm_func[] = array ($type, $function, $regexp);
82 // sets current mode like list, preformatted text, plain text, ...
83 // takes care of closing (open) tags
84 function SetHTMLMode($tag, $tagtype, $level)
94 $this->mode_set = 1; // in order to prevent other mode markup
98 if ($tagtype == ZERO_LEVEL) {
99 // empty the stack until $level == 0;
100 if ($tag == $this->stack->top()) {
101 return; // same tag? -> nothing to do
103 while ($this->stack->cnt() > 0) {
104 $closetag = $this->stack->pop();
105 $retvar .= "</$closetag>\n";
109 $retvar .= StartTag($tag, $args) . "\n";
110 $this->stack->push($tag);
114 } elseif ($tagtype == NESTED_LEVEL) {
115 if ($level <= $this->stack->cnt()) {
116 // $tag has fewer nestings (old: tabs) than stack,
117 // reduce stack to that tab count
118 while ($this->stack->cnt() > $level) {
119 $closetag = $this->stack->pop();
120 if ($closetag == false) {
121 //echo "bounds error in tag stack";
124 $retvar .= "</$closetag>\n";
127 // if list type isn't the same,
128 // back up one more and push new tag
129 if ($tag != $this->stack->top()) {
130 $closetag = $this->stack->pop();
131 $retvar .= "</$closetag>" . StartTag($tag, $args) . "\n";
132 $this->stack->push($tag);
135 } else { // $level > $this->stack->cnt()
136 // we add the diff to the stack
137 // stack might be zero
138 while ($this->stack->cnt() < $level) {
139 $retvar .= StartTag($tag, $args) . "\n";
140 $this->stack->push($tag);
141 if ($this->stack->cnt() > 10) {
142 // arbitrarily limit tag nesting
143 ExitWiki(gettext ("Stack bounds exceeded in SetHTMLOutputMode"));
148 } else { // unknown $tagtype
149 ExitWiki ("Passed bad tag type value in SetHTMLOutputMode");
152 return $this->token($retvar);
157 // work horse and main loop
158 // this function does the transform from wiki markup to HTML
159 function do_transform($html, $content)
161 global $FieldSeparator;
163 $this->content = $content;
164 $this->replacements = array();
165 $this->user_data = array();
167 // Loop over all lines of the page and apply transformation rules
168 $numlines = count($this->content);
169 for ($lnum = 0; $lnum < $numlines; $lnum++)
172 $this->linenumber = $lnum;
173 $line = $this->content[$lnum];
175 // blank lines clear the current mode
176 if (!strlen($line) || $line == "\r") {
177 $html .= $this->SetHTMLMode('', ZERO_LEVEL, 0);
183 // main loop applying all registered functions
184 // tokenizers, markup, html mode, ...
185 // functions are executed in order of registering
186 for (reset($this->trfrm_func);
187 list($flags, $func, $regexp) = current($this->trfrm_func);
188 next($this->trfrm_func)) {
190 // if HTMLmode is already set then skip all following
191 // WT_MODE_MARKUP functions
192 if ($this->mode_set && ($flags & WT_MODE_MARKUP) != 0)
195 if (!empty($regexp) && !preg_match("/$regexp/", $line))
198 // call registered function
199 if (($flags & WT_TOKENIZER) != 0)
200 $line = $this->tokenize($line, $regexp, $func);
202 $line = $func($line, $this);
205 $html .= $line . "\n";
208 $html .= $this->SetHTMLMode('', ZERO_LEVEL, 0);
210 return $this->untokenize($html);
212 // end do_transfrom()
214 // Register a new token.
215 function token($repl) {
216 global $FieldSeparator;
217 $tok = $FieldSeparator . sizeof($this->replacements) . $FieldSeparator;
218 $this->replacements[] = $repl;
222 // helper function which does actual tokenizing
223 function tokenize($str, $pattern, $func) {
224 // Find any strings in $str that match $pattern and
225 // store them in $orig, replacing them with tokens
226 // starting at number $ntokens - returns tokenized string
228 while (preg_match("/^(.*?)($pattern)/", $str, $matches)) {
229 $str = substr($str, strlen($matches[0]));
230 $new .= $matches[1] . $this->token($func($matches[2], $this));
235 function untokenize($line) {
236 global $FieldSeparator;
238 $chunks = explode ($FieldSeparator, "$line ");
240 for ($i = 1; $i < count($chunks); $i += 2)
243 $line .= $this->replacements[$tok] . $chunks[$i + 1];
248 // end class WikiTransform
251 //////////////////////////////////////////////////////////
253 $transform = new WikiTransform;
255 // register functions
256 // functions are applied in order of registering
258 $transform->register(WT_TOKENIZER, 'wtt_doublebrackets', '\[\[');
259 $transform->register(WT_TOKENIZER, 'wtt_footnotes', '^\[\d+\]');
260 $transform->register(WT_TOKENIZER, 'wtt_footnoterefs', '\[\d+\]');
261 $transform->register(WT_TOKENIZER, 'wtt_bracketlinks', '\[.+?\]');
262 $transform->register(WT_TOKENIZER, 'wtt_urls',
263 "!?\b($AllowedProtocols):[^\s<>\[\]\"'()]*[^\s<>\[\]\"'(),.?]");
265 if (function_exists('wtt_interwikilinks')) {
266 $transform->register(WT_TOKENIZER, 'wtt_interwikilinks',
267 "!?(?<![A-Za-z0-9])$InterWikiLinkRegexp:$WikiNameRegexp");
269 $transform->register(WT_TOKENIZER, 'wtt_bumpylinks', "!?$WikiNameRegexp");
271 if (function_exists('wtm_table')) {
272 $transform->register(WT_MODE_MARKUP, 'wtm_table', '^\|');
274 $transform->register(WT_SIMPLE_MARKUP, 'wtm_htmlchars');
275 $transform->register(WT_SIMPLE_MARKUP, 'wtm_linebreak');
276 $transform->register(WT_SIMPLE_MARKUP, 'wtm_bold_italics');
277 $transform->register(WT_SIMPLE_MARKUP, 'wtm_title_search');
278 $transform->register(WT_SIMPLE_MARKUP, 'wtm_fulltext_search');
279 $transform->register(WT_SIMPLE_MARKUP, 'wtm_mostpopular');
281 $transform->register(WT_MODE_MARKUP, 'wtm_list_ul');
282 $transform->register(WT_MODE_MARKUP, 'wtm_list_ol');
283 $transform->register(WT_MODE_MARKUP, 'wtm_list_dl');
284 $transform->register(WT_MODE_MARKUP, 'wtm_preformatted');
285 $transform->register(WT_MODE_MARKUP, 'wtm_headings');
286 $transform->register(WT_MODE_MARKUP, 'wtm_hr');
287 $transform->register(WT_MODE_MARKUP, 'wtm_paragraph');
289 $html = $transform->do_transform($html, $pagehash['content']);
292 Requirements for functions registered to WikiTransform:
294 Signature: function wtm_xxxx($line, &$transform)
296 $line ... current line containing wiki markup
297 (Note: it may already contain HTML from other transform functions)
298 &$transform ... WikiTransform object -- public variables of this
299 object and their use see above.
301 Functions have to return $line (doesn't matter if modified or not)
302 All conversion should take place inside $line.
304 Tokenizer functions should use $transform->replacements to store
305 the replacement strings. Also, they have to keep track of
306 $transform->tokencounter. See functions below. Back substitution
307 of tokenized strings is done by do_transform().
312 //////////////////////////////////////////////////////////
313 // Tokenizer functions
316 function wtt_doublebrackets($match, &$trfrm)
321 function wtt_footnotes($match, &$trfrm)
323 // FIXME: should this set HTML mode?
324 $ftnt = trim(substr($match,1,-1)) + 0;
328 $fnlist = $trfrm->user_data['footnotes'][$ftnt];
329 if (!is_array($fnlist))
330 return $html . $fntext;
332 $trfrm->user_data['footnotes'][$ftnt] = 'footnote_seen';
334 while (list($k, $anchor) = each($fnlist))
336 $html .= Element("a", array("name" => "footnote-$ftnt",
337 "href" => "#$anchor",
338 "class" => "footnote-rev"),
345 function wtt_footnoterefs($match, &$trfrm)
347 $ftnt = trim(substr($match,1,-1)) + 0;
349 $footnote_definition_seen = false;
351 if (empty($trfrm->user_data['footnotes']))
352 $trfrm->user_data['footnotes'] = array();
353 if (empty($trfrm->user_data['footnotes'][$ftnt]))
354 $trfrm->user_data['footnotes'][$ftnt] = array();
355 else if (!is_array($trfrm->user_data['footnotes'][$ftnt]))
356 $footnote_definition_seen = true;
359 $args['href'] = "#footnote-$ftnt";
360 if (!$footnote_definition_seen)
362 $args['name'] = "footrev-$ftnt-" .
363 count($trfrm->user_data['footnotes'][$ftnt]);
364 $trfrm->user_data['footnotes'][$ftnt][] = $args['name'];
367 return Element('sup', array('class' => 'footnote'),
368 QElement("a", $args, "[$ftnt]"));
371 function wtt_bracketlinks($match, &$trfrm)
373 $link = ParseAndLink($match);
374 return $link["link"];
379 // replace all URL's with tokens, so we don't confuse them
380 // with Wiki words later. Wiki words in URL's break things.
381 // URLs preceeded by a '!' are not linked
382 function wtt_urls($match, &$trfrm)
384 if ($match[0] == "!")
385 return htmlspecialchars(substr($match,1));
386 return LinkURL($match);
389 // Link Wiki words (BumpyText)
390 // Wikiwords preceeded by a '!' are not linked
391 function wtt_bumpylinks($match, &$trfrm)
394 if ($match[0] == "!")
395 return htmlspecialchars(substr($match,1));
396 // FIXME: make a LinkWikiWord() function?
397 if (IsWikiPage($dbi, $match))
398 return LinkExistingWikiWord($match);
399 return LinkUnknownWikiWord($match);
402 // end of tokenizer functions
403 //////////////////////////////////////////////////////////
406 //////////////////////////////////////////////////////////
407 // basic simple markup functions
409 // escape HTML metachars
410 function wtm_htmlchars($line, &$transformer)
412 $line = str_replace('&', '&', $line);
413 $line = str_replace('>', '>', $line);
414 $line = str_replace('<', '<', $line);
419 // %%% are linebreaks
420 function wtm_linebreak($line, &$transformer) {
421 return str_replace('%%%', '<br>', $line);
425 function wtm_bold_italics($line, &$transformer) {
426 $line = preg_replace('|(__)(.*?)(__)|', '<strong>\2</strong>', $line);
427 $line = preg_replace("|('')(.*?)('')|", '<em>\2</em>', $line);
433 //////////////////////////////////////////////////////////
434 // some tokens to be replaced by (dynamic) content
436 // wiki token: title search dialog
437 function wtm_title_search($line, &$transformer) {
438 if (strpos($line, '%%Search%%') !== false) {
439 $html = LinkPhpwikiURL(
440 "phpwiki:?action=search&searchterm=()&searchtype=title",
443 $line = str_replace('%%Search%%', $html, $line);
448 // wiki token: fulltext search dialog
449 function wtm_fulltext_search($line, &$transformer) {
450 if (strpos($line, '%%Fullsearch%%') !== false) {
451 $html = LinkPhpwikiURL(
452 "phpwiki:?action=search&searchterm=()&searchtype=full",
455 $line = str_replace('%%Fullsearch%%', $html, $line);
460 // wiki token: mostpopular list
461 function wtm_mostpopular($line, &$transformer) {
462 global $ScriptUrl, $dbi;
463 if (strpos($line, '%%Mostpopular%%') !== false) {
464 $query = InitMostPopular($dbi, MOST_POPULAR_LIST_LENGTH);
466 while ($qhash = MostPopularNextMatch($dbi, $query)) {
467 $html .= "<DD>$qhash[hits] ... " . LinkExistingWikiWord($qhash['pagename']) . "\n";
470 $line = str_replace('%%Mostpopular%%', $html, $line);
476 //////////////////////////////////////////////////////////
477 // mode markup functions
480 // tabless markup for unordered, ordered, and dictionary lists
481 // ul/ol list types can be mixed, so we only look at the last
482 // character. Changes e.g. from "**#*" to "###*" go unnoticed.
483 // and wouldn't make a difference to the HTML layout anyway.
485 // unordered lists <UL>: "*"
486 // has to be registereed before list OL
487 function wtm_list_ul($line, &$trfrm) {
488 if (preg_match("/^([#*]*\*)[^#]/", $line, $matches)) {
489 $numtabs = strlen($matches[1]);
490 $line = preg_replace("/^([#*]*\*)/", '', $line);
491 $html = $trfrm->SetHTMLMode('ul', NESTED_LEVEL, $numtabs) . '<li>';
492 $line = $html . $line;
497 // ordered lists <OL>: "#"
498 function wtm_list_ol($line, &$trfrm) {
499 if (preg_match("/^([#*]*\#)/", $line, $matches)) {
500 $numtabs = strlen($matches[1]);
501 $line = preg_replace("/^([#*]*\#)/", "", $line);
502 $html = $trfrm->SetHTMLMode('ol', NESTED_LEVEL, $numtabs) . '<li>';
503 $line = $html . $line;
509 // definition lists <DL>: ";text:text"
510 function wtm_list_dl($line, &$trfrm) {
511 if (preg_match("/(^;+)(.*?):(.*$)/", $line, $matches)) {
512 $numtabs = strlen($matches[1]);
513 $line = $trfrm->SetHTMLMode('dl', NESTED_LEVEL, $numtabs);
514 if(trim($matches[2]))
515 $line = '<dt>' . $matches[2];
516 $line .= '<dd>' . $matches[3];
521 // mode: preformatted text, i.e. <pre>
522 function wtm_preformatted($line, &$trfrm) {
523 if (preg_match("/^\s+/", $line)) {
524 $line = $trfrm->SetHTMLMode('pre', ZERO_LEVEL, 0) . $line;
529 // mode: headings, i.e. <h1>, <h2>, <h3>
530 // lines starting with !,!!,!!! are headings
531 function wtm_headings($line, &$trfrm) {
532 if (preg_match("/^(!{1,3})[^!]/", $line, $whichheading)) {
533 if($whichheading[1] == '!') $heading = 'h3';
534 elseif($whichheading[1] == '!!') $heading = 'h2';
535 elseif($whichheading[1] == '!!!') $heading = 'h1';
536 $line = preg_replace("/^!+/", '', $line);
537 $line = $trfrm->SetHTMLMode($heading, ZERO_LEVEL, 0) . $line;
543 function wtm_table($line, &$trfrm)
546 while (preg_match('/^(\|+)(v*)([<>^]?)([^|]*)/', $line, $m))
548 $line = substr($line, strlen($m[0]));
551 if (strlen($m[1]) > 1)
552 $td['colspan'] = strlen($m[1]);
553 if (strlen($m[2]) > 0)
554 $td['rowspan'] = strlen($m[2]) + 1;
557 $td['align'] = 'center';
558 else if ($m[3] == '>')
559 $td['align'] = 'right';
561 $td['align'] = 'left';
563 $row .= $trfrm->token(StartTag('td', $td) . " ");
565 $row .= $trfrm->token(" </td>");
567 assert(empty($line));
568 $row = $trfrm->token("<tr>") . $row . $trfrm->token("</tr>");
570 return $trfrm->SetHTMLMode(array('table',
571 array('align' => 'center',
579 // four or more dashes to <hr>
580 // Note this is of type WT_MODE_MARKUP becuase <hr>'s aren't
581 // allowed within <p>'s. (e.g. "<p><hr></p>" is not valid HTML.)
582 function wtm_hr($line, &$trfrm) {
583 if (preg_match('/^-{4,}(.*)$/', $line, $m)) {
584 $line = $trfrm->SetHTMLMode('', ZERO_LEVEL, 0) . '<hr>';
586 $line .= $trfrm->SetHTMLMode('p', ZERO_LEVEL, 0) . $m[1];
591 // default mode: simple text paragraph
592 function wtm_paragraph($line, &$trfrm) {
593 $line = $trfrm->SetHTMLMode('p', ZERO_LEVEL, 0) . $line;
600 // c-file-style: "ellemtel"