1 <?php rcs_id('$Id: transform.php,v 1.20 2001-03-07 16:45:20 dairiki Exp $');
3 define('WT_SIMPLE_MARKUP', 0);
4 define('WT_TOKENIZER', 1);
5 define('WT_MODE_MARKUP', 2);
7 define("ZERO_LEVEL", 0);
8 define("NESTED_LEVEL", 1);
13 function WikiTransform() -- init
15 function register($type, $function, [$regexp])
16 Registers transformer functions
17 This should be done *before* calling do_transform
21 If one WT_MODE_MARKUP really sets the html mode, then
22 all successive WT_MODE_MARKUP functions are skipped
24 The transformer function is called once for each match
25 of the $regexp in the line. The matched values are tokenized
26 to protect them from further transformation.
28 $function: function name
30 $regexp: Required for WT_TOKENIZER functions. Optional for others.
31 If given, the transformer function will only be called if the
32 line matches the $regexp.
34 function SetHTMLMode($tag, $tagtype, $level)
35 This is a helper function used to keep track of what HTML
36 block-level element we are currently processing.
37 Block-level elements are things like paragraphs "<p>",
38 pre-formatted text "<pre>", and the various list elements:
39 "<ul>", "<ol>" and "<dl>". Note that some of these elements
40 can be nested, while others can not. (In particular, according to
41 the HTML 4.01 specification, a paragraph "<p>" element is not
42 allowed to contain any other block-level elements. Also <pre>,
43 <li>, <dt>, <dd>, <h1> ... have this same restriction.)
45 SetHTMLMode generates whatever HTML is necessary to get us into
46 the requested element type at the requested nesting level.
48 $tag ... type of HTML element to open.
49 If $tag is an array, $tag[0] gives the element type,
50 and $tag[1] should be a hash containing attribute-value
51 pairs for the element.
53 If $tag is the empty string, all open elements (down to the
54 level requested by $level) are closed. Use
55 SetHTMLMode('',0) to close all open block-level elements.
57 $level ... requested nesting level for current element.
58 The nesting level for top level block is one (which is
61 Nesting is arbitrary limited to 10 levels
63 function do_transform($html, $content)
64 contains main-loop and calls transformer functions
66 $html ... HTML header (if needed, otherwise '')
67 $content ... wiki markup as array of lines
71 // public variables (only meaningful during do_transform)
72 var $linenumber; // current linenumber
73 var $replacements; // storage for tokenized strings of current line
74 var $user_data; // can be used by the transformer functions
75 // to store miscellaneous data.
78 var $content; // wiki markup, array of lines
79 var $mode_set; // stores if a HTML mode for this line has been set
80 var $trfrm_func; // array of registered functions
81 var $stack; // stack for SetHTMLMode (keeping track of open tags)
84 function WikiTransform()
86 $this->trfrm_func = array();
87 $this->stack = new Stack;
90 // register transformation functions
91 function register($type, $function, $regexp = false)
93 $this->trfrm_func[] = array ($type, $function, $regexp);
96 // sets current mode like list, preformatted text, plain text, ...
97 // takes care of closing (open) tags
98 function SetHTMLMode($tag, $level = 1)
100 if (is_array($tag)) {
108 $this->mode_set = 1; // in order to prevent other mode markup
113 // arbitrarily limit tag nesting
114 ExitWiki(gettext ("Lists nested too deep in SetHTMLOutputMode"));
117 if ($level <= $this->stack->cnt()) {
118 // $tag has fewer nestings (old: tabs) than stack,
119 // reduce stack to that tab count
120 while ($this->stack->cnt() > $level) {
121 $closetag = $this->stack->pop();
122 assert('$closetag != false');
123 $retvar .= "</$closetag>\n";
126 // if list type isn't the same,
127 // back up one more and push new tag
128 if ($tag && $tag != $this->stack->top()) {
129 $closetag = $this->stack->pop();
130 $retvar .= "</$closetag>" . StartTag($tag, $args) . "\n";
131 $this->stack->push($tag);
134 } else {// $level > $this->stack->cnt()
135 // Test for and close top level elements which are not allowed to contain
136 // other block-level elements.
137 if ($this->stack->cnt() == 1 and
138 preg_match('/^(p|pre|h\d)$/i', $this->stack->top()))
140 $closetag = $this->stack->pop();
141 $retvar .= "</$closetag>";
144 // we add the diff to the stack
145 // stack might be zero
146 if ($this->stack->cnt() < $level) {
147 while ($this->stack->cnt() < $level - 1) {
148 // This is a bit of a hack:
150 // We're not nested deep enough, and have to make up some kind of block
151 // element to nest within.
153 // Currently, this can only happen for nested list element
154 // (either <ul> <ol> or <dl>). What we used to do here is
155 // to open extra lists of whatever type was requested.
156 // This would result in invalid HTML, since and list is
157 // not allowed to contain another list without first containing
158 // a list item. ("<ul><ul><li>Item</ul></ul>" is invalid.)
160 // So now, when we need extra list elements, we use a <dl>, and
161 // open it with an empty <dd>.
162 $retvar .= "<dl><dd>";
163 $this->stack->push('dl');
166 $retvar .= StartTag($tag, $args) . "\n";
167 $this->stack->push($tag);
171 return $this->token($retvar);
176 // work horse and main loop
177 // this function does the transform from wiki markup to HTML
178 function do_transform($html, $content)
180 global $FieldSeparator;
182 $this->content = $content;
183 $this->replacements = array();
184 $this->user_data = array();
186 // Loop over all lines of the page and apply transformation rules
187 $numlines = count($this->content);
188 for ($lnum = 0; $lnum < $numlines; $lnum++)
191 $this->linenumber = $lnum;
192 $line = $this->content[$lnum];
194 // blank lines clear the current mode (to force new paragraph)
195 if (!strlen($line) || $line == "\r") {
196 $html .= $this->SetHTMLMode('', 0);
202 // main loop applying all registered functions
203 // tokenizers, markup, html mode, ...
204 // functions are executed in order of registering
205 for (reset($this->trfrm_func);
206 list($flags, $func, $regexp) = current($this->trfrm_func);
207 next($this->trfrm_func)) {
209 // if HTMLmode is already set then skip all following
210 // WT_MODE_MARKUP functions
211 if ($this->mode_set && ($flags & WT_MODE_MARKUP) != 0)
214 if (!empty($regexp) && !preg_match("/$regexp/", $line))
217 // call registered function
218 if (($flags & WT_TOKENIZER) != 0)
219 $line = $this->tokenize($line, $regexp, $func);
221 $line = $func($line, $this);
224 $html .= $line . "\n";
227 $html .= $this->SetHTMLMode('', 0);
229 return $this->untokenize($html);
231 // end do_transfrom()
233 // Register a new token.
234 function token($repl) {
235 global $FieldSeparator;
236 $tok = $FieldSeparator . sizeof($this->replacements) . $FieldSeparator;
237 $this->replacements[] = $repl;
241 // helper function which does actual tokenizing
242 function tokenize($str, $pattern, $func) {
243 // Find any strings in $str that match $pattern and
244 // store them in $orig, replacing them with tokens
245 // starting at number $ntokens - returns tokenized string
247 while (preg_match("/^(.*?)($pattern)/", $str, $matches)) {
248 $str = substr($str, strlen($matches[0]));
249 $new .= $matches[1] . $this->token($func($matches[2], $this));
254 function untokenize($line) {
255 global $FieldSeparator;
257 $chunks = explode ($FieldSeparator, "$line ");
259 for ($i = 1; $i < count($chunks); $i += 2)
262 $line .= $this->replacements[$tok] . $chunks[$i + 1];
267 // end class WikiTransform
270 //////////////////////////////////////////////////////////
272 $transform = new WikiTransform;
274 // register functions
275 // functions are applied in order of registering
277 $transform->register(WT_TOKENIZER, 'wtt_doublebrackets', '\[\[');
278 $transform->register(WT_TOKENIZER, 'wtt_footnotes', '^\[\d+\]');
279 $transform->register(WT_TOKENIZER, 'wtt_footnoterefs', '\[\d+\]');
280 $transform->register(WT_TOKENIZER, 'wtt_bracketlinks', '\[.+?\]');
281 $transform->register(WT_TOKENIZER, 'wtt_urls',
282 "!?\b($AllowedProtocols):[^\s<>\[\]\"'()]*[^\s<>\[\]\"'(),.?]");
284 if (function_exists('wtt_interwikilinks')) {
285 $transform->register(WT_TOKENIZER, 'wtt_interwikilinks',
286 "!?(?<![[:alnum:]])$InterWikiLinkRegexp:$WikiNameRegexp");
288 $transform->register(WT_TOKENIZER, 'wtt_bumpylinks', "!?$WikiNameRegexp");
290 if (function_exists('wtm_table')) {
291 $transform->register(WT_MODE_MARKUP, 'wtm_table', '^\|');
293 $transform->register(WT_SIMPLE_MARKUP, 'wtm_htmlchars');
294 $transform->register(WT_SIMPLE_MARKUP, 'wtm_linebreak');
295 $transform->register(WT_SIMPLE_MARKUP, 'wtm_bold_italics');
296 $transform->register(WT_SIMPLE_MARKUP, 'wtm_title_search');
297 $transform->register(WT_SIMPLE_MARKUP, 'wtm_fulltext_search');
298 $transform->register(WT_SIMPLE_MARKUP, 'wtm_mostpopular');
300 $transform->register(WT_MODE_MARKUP, 'wtm_list_ul');
301 $transform->register(WT_MODE_MARKUP, 'wtm_list_ol');
302 $transform->register(WT_MODE_MARKUP, 'wtm_list_dl');
303 $transform->register(WT_MODE_MARKUP, 'wtm_preformatted');
304 $transform->register(WT_MODE_MARKUP, 'wtm_headings');
305 $transform->register(WT_MODE_MARKUP, 'wtm_hr');
306 $transform->register(WT_MODE_MARKUP, 'wtm_paragraph');
308 $html = $transform->do_transform($html, $pagehash['content']);
311 Requirements for functions registered to WikiTransform:
313 Signature: function wtm_xxxx($line, &$transform)
315 $line ... current line containing wiki markup
316 (Note: it may already contain HTML from other transform functions)
317 &$transform ... WikiTransform object -- public variables of this
318 object and their use see above.
320 Functions have to return $line (doesn't matter if modified or not)
321 All conversion should take place inside $line.
323 Tokenizer functions should use $transform->replacements to store
324 the replacement strings. Also, they have to keep track of
325 $transform->tokencounter. See functions below. Back substitution
326 of tokenized strings is done by do_transform().
331 //////////////////////////////////////////////////////////
332 // Tokenizer functions
335 function wtt_doublebrackets($match, &$trfrm)
340 function wtt_footnotes($match, &$trfrm)
342 // FIXME: should this set HTML mode?
343 $ftnt = trim(substr($match,1,-1)) + 0;
347 $fnlist = $trfrm->user_data['footnotes'][$ftnt];
348 if (!is_array($fnlist))
349 return $html . $fntext;
351 $trfrm->user_data['footnotes'][$ftnt] = 'footnote_seen';
353 while (list($k, $anchor) = each($fnlist))
355 $html .= Element("a", array("name" => "footnote-$ftnt",
356 "href" => "#$anchor",
357 "class" => "footnote-rev"),
364 function wtt_footnoterefs($match, &$trfrm)
366 $ftnt = trim(substr($match,1,-1)) + 0;
368 $footnote_definition_seen = false;
370 if (empty($trfrm->user_data['footnotes']))
371 $trfrm->user_data['footnotes'] = array();
372 if (empty($trfrm->user_data['footnotes'][$ftnt]))
373 $trfrm->user_data['footnotes'][$ftnt] = array();
374 else if (!is_array($trfrm->user_data['footnotes'][$ftnt]))
375 $footnote_definition_seen = true;
378 $args['href'] = "#footnote-$ftnt";
379 if (!$footnote_definition_seen)
381 $args['name'] = "footrev-$ftnt-" .
382 count($trfrm->user_data['footnotes'][$ftnt]);
383 $trfrm->user_data['footnotes'][$ftnt][] = $args['name'];
386 return Element('sup', array('class' => 'footnote'),
387 QElement("a", $args, "[$ftnt]"));
390 function wtt_bracketlinks($match, &$trfrm)
392 $link = ParseAndLink($match);
393 return $link["link"];
398 // replace all URL's with tokens, so we don't confuse them
399 // with Wiki words later. Wiki words in URL's break things.
400 // URLs preceeded by a '!' are not linked
401 function wtt_urls($match, &$trfrm)
403 if ($match[0] == "!")
404 return htmlspecialchars(substr($match,1));
405 return LinkURL($match);
408 // Link Wiki words (BumpyText)
409 // Wikiwords preceeded by a '!' are not linked
410 function wtt_bumpylinks($match, &$trfrm)
413 if ($match[0] == "!")
414 return htmlspecialchars(substr($match,1));
415 // FIXME: make a LinkWikiWord() function?
416 if (IsWikiPage($dbi, $match))
417 return LinkExistingWikiWord($match);
418 return LinkUnknownWikiWord($match);
421 // end of tokenizer functions
422 //////////////////////////////////////////////////////////
425 //////////////////////////////////////////////////////////
426 // basic simple markup functions
428 // escape HTML metachars
429 function wtm_htmlchars($line, &$transformer)
431 $line = str_replace('&', '&', $line);
432 $line = str_replace('>', '>', $line);
433 $line = str_replace('<', '<', $line);
438 // %%% are linebreaks
439 function wtm_linebreak($line, &$transformer) {
440 return str_replace('%%%', '<br>', $line);
444 function wtm_bold_italics($line, &$transformer) {
445 $line = preg_replace('|(__)(.*?)(__)|', '<strong>\2</strong>', $line);
446 $line = preg_replace("|('')(.*?)('')|", '<em>\2</em>', $line);
452 //////////////////////////////////////////////////////////
453 // some tokens to be replaced by (dynamic) content
455 // wiki token: title search dialog
456 function wtm_title_search($line, &$transformer) {
457 if (strpos($line, '%%Search%%') !== false) {
458 $html = LinkPhpwikiURL(
459 "phpwiki:?action=search&searchterm=()&searchtype=title",
462 $line = str_replace('%%Search%%', $html, $line);
467 // wiki token: fulltext search dialog
468 function wtm_fulltext_search($line, &$transformer) {
469 if (strpos($line, '%%Fullsearch%%') !== false) {
470 $html = LinkPhpwikiURL(
471 "phpwiki:?action=search&searchterm=()&searchtype=full",
474 $line = str_replace('%%Fullsearch%%', $html, $line);
479 // wiki token: mostpopular list
480 function wtm_mostpopular($line, &$transformer) {
481 global $ScriptUrl, $dbi;
482 if (strpos($line, '%%Mostpopular%%') !== false) {
483 $query = InitMostPopular($dbi, MOST_POPULAR_LIST_LENGTH);
485 while ($qhash = MostPopularNextMatch($dbi, $query)) {
486 $html .= "<DD>$qhash[hits] ... " . LinkExistingWikiWord($qhash['pagename']) . "\n";
489 $line = str_replace('%%Mostpopular%%', $html, $line);
495 //////////////////////////////////////////////////////////
496 // mode markup functions
499 // tabless markup for unordered, ordered, and dictionary lists
500 // ul/ol list types can be mixed, so we only look at the last
501 // character. Changes e.g. from "**#*" to "###*" go unnoticed.
502 // and wouldn't make a difference to the HTML layout anyway.
504 // unordered lists <UL>: "*"
505 // has to be registereed before list OL
506 function wtm_list_ul($line, &$trfrm) {
507 if (preg_match("/^([#*;]*\*)[^#]/", $line, $matches)) {
508 $numtabs = strlen($matches[1]);
509 $line = preg_replace("/^([#*]*\*)/", '', $line);
510 $html = $trfrm->SetHTMLMode('ul', $numtabs) . '<li>';
511 $line = $html . $line;
516 // ordered lists <OL>: "#"
517 function wtm_list_ol($line, &$trfrm) {
518 if (preg_match("/^([#*;]*\#)/", $line, $matches)) {
519 $numtabs = strlen($matches[1]);
520 $line = preg_replace("/^([#*]*\#)/", "", $line);
521 $html = $trfrm->SetHTMLMode('ol', $numtabs) . '<li>';
522 $line = $html . $line;
528 // definition lists <DL>: ";text:text"
529 function wtm_list_dl($line, &$trfrm) {
530 if (preg_match("/^([#*;]*;)(.*?):(.*$)/", $line, $matches)) {
531 $numtabs = strlen($matches[1]);
532 $line = $trfrm->SetHTMLMode('dl', $numtabs);
533 if(trim($matches[2]))
534 $line .= '<dt>' . $matches[2];
535 $line .= '<dd>' . $matches[3];
540 // mode: preformatted text, i.e. <pre>
541 function wtm_preformatted($line, &$trfrm) {
542 if (preg_match("/^\s+/", $line)) {
543 $line = $trfrm->SetHTMLMode('pre') . $line;
548 // mode: headings, i.e. <h1>, <h2>, <h3>
549 // lines starting with !,!!,!!! are headings
550 function wtm_headings($line, &$trfrm) {
551 if (preg_match("/^(!{1,3})[^!]/", $line, $whichheading)) {
552 if($whichheading[1] == '!') $heading = 'h3';
553 elseif($whichheading[1] == '!!') $heading = 'h2';
554 elseif($whichheading[1] == '!!!') $heading = 'h1';
555 $line = preg_replace("/^!+/", '', $line);
556 $line = $trfrm->SetHTMLMode($heading) . $line;
562 function wtm_table($line, &$trfrm)
565 while (preg_match('/^(\|+)(v*)([<>^]?)([^|]*)/', $line, $m))
567 $line = substr($line, strlen($m[0]));
570 if (strlen($m[1]) > 1)
571 $td['colspan'] = strlen($m[1]);
572 if (strlen($m[2]) > 0)
573 $td['rowspan'] = strlen($m[2]) + 1;
576 $td['align'] = 'center';
577 else if ($m[3] == '>')
578 $td['align'] = 'right';
580 $td['align'] = 'left';
582 $row .= $trfrm->token(StartTag('td', $td) . " ");
584 $row .= $trfrm->token(" </td>");
586 assert(empty($line));
587 $row = $trfrm->token("<tr>") . $row . $trfrm->token("</tr>");
589 return $trfrm->SetHTMLMode(array('table',
590 array('align' => 'center',
597 // four or more dashes to <hr>
598 // Note this is of type WT_MODE_MARKUP becuase <hr>'s aren't
599 // allowed within <p>'s. (e.g. "<p><hr></p>" is not valid HTML.)
600 function wtm_hr($line, &$trfrm) {
601 if (preg_match('/^-{4,}(.*)$/', $line, $m)) {
602 $line = $trfrm->SetHTMLMode('', 0) . '<hr>';
604 $line .= $trfrm->SetHTMLMode('p') . $m[1];
609 // default mode: simple text paragraph
610 function wtm_paragraph($line, &$trfrm) {
611 $line = $trfrm->SetHTMLMode('p') . $line;
618 // c-file-style: "ellemtel"