1 <?php rcs_id('$Id: transform.php,v 1.15 2001-02-12 01:43:10 dairiki Exp $');
3 define('WT_TOKENIZER', 1);
4 define('WT_SIMPLE_MARKUP', 2);
5 define('WT_MODE_MARKUP', 3);
7 define("ZERO_LEVEL", 0);
8 define("NESTED_LEVEL", 1);
13 function WikiTransform() -- init
15 function register($type, $function)
16 Registers transformer functions
17 This should be done *before* calling do_transform
19 $type ... one of WT_TOKENIZER, WT_SIMPLE_MARKUP, WT_MODE_MARKUP
20 Currently on WT_MODE_MARKUP has a special meaning.
21 If one WT_MODE_MARKUP really sets the html mode, then
22 all successive WT_MODE_MARKUP functions are skipped
24 $function ... function name
26 function SetHTMLMode($tag, $tagtype, $level)
27 Wiki HTML output can, at any given time, be in only one mode.
28 It will be something like Unordered List, Preformatted Text,
29 plain text etc. When we change modes we have to issue close tags
30 for one mode and start tags for another.
31 SetHTMLMode takes care of this.
33 $tag ... HTML tag to insert
34 $tagtype ... ZERO_LEVEL - close all open tags before inserting $tag
35 NESTED_LEVEL - close tags until depths match
36 $level ... nesting level (depth) of $tag
37 nesting is arbitrary limited to 10 levels
39 function do_transform($html, $content)
40 contains main-loop and calls transformer functions
42 $html ... HTML header (if needed, otherwise '')
43 $content ... wiki markup as array of lines
47 // public variables (only meaningful during do_transform)
48 var $linenumber; // current linenumber
49 var $replacements; // storage for tokenized strings of current line
50 var $tokencounter; // counter of $replacements array
53 var $content; // wiki markup, array of lines
54 var $mode_set; // stores if a HTML mode for this line has been set
55 var $trfrm_func; // array of registered functions
56 var $stack; // stack for SetHTMLMode (keeping track of open tags)
59 function WikiTransform()
61 $this->trfrm_func = array();
62 $this->stack = new Stack;
65 // register transformation functions
66 function register($type, $function)
68 $this->trfrm_func[] = array ($type, $function);
71 // sets current mode like list, preformatted text, plain text, ...
72 // takes care of closing (open) tags
73 function SetHTMLMode($tag, $tagtype, $level)
75 $this->mode_set = 1; // in order to prevent other mode markup
79 if ($tagtype == ZERO_LEVEL) {
80 // empty the stack until $level == 0;
81 if ($tag == $this->stack->top()) {
82 return; // same tag? -> nothing to do
84 while ($this->stack->cnt() > 0) {
85 $closetag = $this->stack->pop();
86 $retvar .= "</$closetag>\n";
90 $retvar .= "<$tag>\n";
91 $this->stack->push($tag);
95 } elseif ($tagtype == NESTED_LEVEL) {
96 if ($level <= $this->stack->cnt()) {
97 // $tag has fewer nestings (old: tabs) than stack,
98 // reduce stack to that tab count
99 while ($this->stack->cnt() > $level) {
100 $closetag = $this->stack->pop();
101 if ($closetag == false) {
102 //echo "bounds error in tag stack";
105 $retvar .= "</$closetag>\n";
108 // if list type isn't the same,
109 // back up one more and push new tag
110 if ($tag != $this->stack->top()) {
111 $closetag = $this->stack->pop();
112 $retvar .= "</$closetag><$tag>\n";
113 $this->stack->push($tag);
116 } else { // $level > $this->stack->cnt()
117 // we add the diff to the stack
118 // stack might be zero
119 while ($this->stack->cnt() < $level) {
120 $retvar .= "<$tag>\n";
121 $this->stack->push($tag);
122 if ($this->stack->cnt() > 10) {
123 // arbitrarily limit tag nesting
124 ExitWiki(gettext ("Stack bounds exceeded in SetHTMLOutputMode"));
129 } else { // unknown $tagtype
130 ExitWiki ("Passed bad tag type value in SetHTMLOutputMode");
138 // work horse and main loop
139 // this function does the transform from wiki markup to HTML
140 function do_transform($html, $content)
142 global $FieldSeparator;
144 $this->content = $content;
146 // Loop over all lines of the page and apply transformation rules
147 $numlines = count($this->content);
148 for ($lnum = 0; $lnum < $numlines; $lnum++)
150 $this->tokencounter = 0;
151 $this->replacements = array();
152 $this->linenumber = $lnum;
153 $line = $this->content[$lnum];
155 // blank lines clear the current mode
156 if (!strlen($line) || $line == "\r") {
157 $html .= $this->SetHTMLMode('', ZERO_LEVEL, 0);
163 // main loop applying all registered functions
164 // tokenizers, markup, html mode, ...
165 // functions are executed in order of registering
166 for ($func = 0; $func < count($this->trfrm_func); $func++) {
167 // if HTMLmode is already set then skip all following
168 // WT_MODE_MARKUP functions
169 if ($this->mode_set &&
170 ($this->trfrm_func[$func][0] == WT_MODE_MARKUP)) {
173 // call registered function
174 $line = $this->trfrm_func[$func][1]($line, $this);
177 // Replace tokens ($replacements was filled by wtt_* functions)
178 for ($i = 0; $i < $this->tokencounter; $i++) {
179 $line = str_replace($FieldSeparator.$FieldSeparator.$i.$FieldSeparator, $this->replacements[$i], $line);
182 $html .= $line . "\n";
185 $html .= $this->SetHTMLMode('', ZERO_LEVEL, 0);
189 // end do_transfrom()
192 // end class WikiTransform
195 //////////////////////////////////////////////////////////
197 $transform = new WikiTransform;
199 // register functions
200 // functions are applied in order of registering
202 $transform->register(WT_TOKENIZER, 'wtt_bracketlinks');
203 $transform->register(WT_TOKENIZER, 'wtt_urls');
204 if (function_exists('wtt_interwikilinks')) {
205 $transform->register(WT_TOKENIZER, 'wtt_interwikilinks');
207 $transform->register(WT_TOKENIZER, 'wtt_bumpylinks');
209 $transform->register(WT_SIMPLE_MARKUP, 'wtm_htmlchars');
210 $transform->register(WT_SIMPLE_MARKUP, 'wtm_linebreak');
211 $transform->register(WT_SIMPLE_MARKUP, 'wtm_bold_italics');
212 $transform->register(WT_SIMPLE_MARKUP, 'wtm_title_search');
213 $transform->register(WT_SIMPLE_MARKUP, 'wtm_fulltext_search');
214 $transform->register(WT_SIMPLE_MARKUP, 'wtm_mostpopular');
216 $transform->register(WT_MODE_MARKUP, 'wtm_list_ul');
217 $transform->register(WT_MODE_MARKUP, 'wtm_list_ol');
218 $transform->register(WT_MODE_MARKUP, 'wtm_list_dl');
219 $transform->register(WT_MODE_MARKUP, 'wtm_preformatted');
220 $transform->register(WT_MODE_MARKUP, 'wtm_headings');
221 $transform->register(WT_MODE_MARKUP, 'wtm_hr');
222 $transform->register(WT_MODE_MARKUP, 'wtm_paragraph');
224 $html = $transform->do_transform($html, $pagehash['content']);
228 Requirements for functions registered to WikiTransform:
230 Signature: function wtm_xxxx($line, &$transform)
232 $line ... current line containing wiki markup
233 (Note: it may already contain HTML from other transform functions)
234 &$transform ... WikiTransform object -- public variables of this
235 object and their use see above.
237 Functions have to return $line (doesn't matter if modified or not)
238 All conversion should take place inside $line.
240 Tokenizer functions should use $transform->replacements to store
241 the replacement strings. Also, they have to keep track of
242 $transform->tokencounter. See functions below. Back substitution
243 of tokenized strings is done by do_transform().
248 //////////////////////////////////////////////////////////
249 // Tokenizer functions
251 // helper function which does actual tokenizing and is
252 // called by other wtt_* functions
253 function wt_tokenize($str, $pattern, &$orig, &$ntokens) {
254 global $FieldSeparator;
255 // Find any strings in $str that match $pattern and
256 // store them in $orig, replacing them with tokens
257 // starting at number $ntokens - returns tokenized string
259 while (preg_match("/^(.*?)($pattern)/", $str, $matches)) {
260 $linktoken = $FieldSeparator . $FieldSeparator . ($ntokens++) . $FieldSeparator;
261 $new .= $matches[1] . $linktoken;
262 $orig[] = $matches[2];
263 $str = substr($str, strlen($matches[0]));
270 // New linking scheme: links are in brackets. This will
271 // emulate typical HTML linking as well as Wiki linking.
272 function wtt_bracketlinks($line, &$trfrm)
274 static $footnotes = array();
277 $n = $ntok = $trfrm->tokencounter;
278 $line = wt_tokenize($line, '\[\[', $trfrm->replacements, $ntok);
280 $trfrm->replacements[$n++] = '[';
283 // match anything else between brackets
284 $line = wt_tokenize($line, '\[.+?\]', $trfrm->replacements, $ntok);
286 $link = ParseAndLink($trfrm->replacements[$n]);
287 if (strpos($link['type'], 'footnote') === false) {
288 $trfrm->replacements[$n] = $link['link'];
290 $ftnt = $link['link'];
291 if (isset($footnotes[$ftnt])) {
292 $trfrm->replacements[$n] = "<A NAME=\"footnote-$ftnt\"></A><A HREF=\"#footnote-rev-$ftnt\">[$ftnt]</A>";
293 } else { // first encounter of [x]
294 $trfrm->replacements[$n] = "<A NAME=\"footnote-rev-$ftnt\"></A><SUP><A HREF=\"#footnote-$ftnt\">[$ftnt]</A></SUP>";
295 $footnotes[$ftnt] = 1;
301 $trfrm->tokencounter = $ntok;
306 // replace all URL's with tokens, so we don't confuse them
307 // with Wiki words later. Wiki words in URL's break things.
308 // URLs preceeded by a '!' are not linked
309 function wtt_urls($line, &$trfrm)
311 global $AllowedProtocols;
313 $n = $ntok = $trfrm->tokencounter;
314 $line = wt_tokenize($line, "!?\b($AllowedProtocols):[^\s<>\[\]\"'()]*[^\s<>\[\]\"'(),.?]", $trfrm->replacements, $ntok);
316 if($trfrm->replacements[$n][0] == '!')
317 $trfrm->replacements[$n] = substr($trfrm->replacements[$n], 1);
319 $trfrm->replacements[$n] = LinkURL($trfrm->replacements[$n]);
323 $trfrm->tokencounter = $ntok;
330 // Link Wiki words (BumpyText)
331 // Wikiwords preceeded by a '!' are not linked
332 function wtt_bumpylinks($line, &$trfrm)
334 global $WikiNameRegexp, $dbi;
336 $n = $ntok = $trfrm->tokencounter;
337 $line = wt_tokenize($line, "!?$WikiNameRegexp", $trfrm->replacements, $ntok);
339 $old = $trfrm->replacements[$n];
340 if ($old[0] == '!') {
341 $trfrm->replacements[$n] = substr($old,1);
342 } elseif (IsWikiPage($dbi, $old)) {
343 $trfrm->replacements[$n] = LinkExistingWikiWord($old);
345 $trfrm->replacements[$n] = LinkUnknownWikiWord($old);
350 $trfrm->tokencounter = $ntok;
354 // end of tokenizer functions
355 //////////////////////////////////////////////////////////
358 //////////////////////////////////////////////////////////
359 // basic simple markup functions
361 // escape HTML metachars
362 function wtm_htmlchars($line, &$transformer)
364 $line = str_replace('&', '&', $line);
365 $line = str_replace('>', '>', $line);
366 $line = str_replace('<', '<', $line);
371 // %%% are linebreaks
372 function wtm_linebreak($line, &$transformer) {
373 return str_replace('%%%', '<br>', $line);
377 function wtm_bold_italics($line, &$transformer) {
378 $line = preg_replace('|(__)(.*?)(__)|', '<strong>\2</strong>', $line);
379 $line = preg_replace("|('')(.*?)('')|", '<em>\2</em>', $line);
385 //////////////////////////////////////////////////////////
386 // some tokens to be replaced by (dynamic) content
388 // wiki token: title search dialog
389 function wtm_title_search($line, &$transformer) {
390 if (strpos($line, '%%Search%%') !== false) {
391 $html = LinkPhpwikiURL(
392 "phpwiki:?action=search&searchterm=()&searchtype=title",
395 $line = str_replace('%%Search%%', $html, $line);
400 // wiki token: fulltext search dialog
401 function wtm_fulltext_search($line, &$transformer) {
402 if (strpos($line, '%%Fullsearch%%') !== false) {
403 $html = LinkPhpwikiURL(
404 "phpwiki:?action=search&searchterm=()&searchtype=full",
407 $line = str_replace('%%Fullsearch%%', $html, $line);
412 // wiki token: mostpopular list
413 function wtm_mostpopular($line, &$transformer) {
414 global $ScriptUrl, $dbi;
415 if (strpos($line, '%%Mostpopular%%') !== false) {
416 $query = InitMostPopular($dbi, MOST_POPULAR_LIST_LENGTH);
418 while ($qhash = MostPopularNextMatch($dbi, $query)) {
419 $html .= "<DD>$qhash[hits] ... " . LinkExistingWikiWord($qhash['pagename']) . "\n";
422 $line = str_replace('%%Mostpopular%%', $html, $line);
428 //////////////////////////////////////////////////////////
429 // mode markup functions
432 // tabless markup for unordered, ordered, and dictionary lists
433 // ul/ol list types can be mixed, so we only look at the last
434 // character. Changes e.g. from "**#*" to "###*" go unnoticed.
435 // and wouldn't make a difference to the HTML layout anyway.
437 // unordered lists <UL>: "*"
438 // has to be registereed before list OL
439 function wtm_list_ul($line, &$trfrm) {
440 if (preg_match("/^([#*]*\*)[^#]/", $line, $matches)) {
441 $numtabs = strlen($matches[1]);
442 $line = preg_replace("/^([#*]*\*)/", '', $line);
443 $html = $trfrm->SetHTMLMode('ul', NESTED_LEVEL, $numtabs) . '<li>';
444 $line = $html . $line;
449 // ordered lists <OL>: "#"
450 function wtm_list_ol($line, &$trfrm) {
451 if (preg_match("/^([#*]*\#)/", $line, $matches)) {
452 $numtabs = strlen($matches[1]);
453 $line = preg_replace("/^([#*]*\#)/", "", $line);
454 $html = $trfrm->SetHTMLMode('ol', NESTED_LEVEL, $numtabs) . '<li>';
455 $line = $html . $line;
461 // definition lists <DL>: ";text:text"
462 function wtm_list_dl($line, &$trfrm) {
463 if (preg_match("/(^;+)(.*?):(.*$)/", $line, $matches)) {
464 $numtabs = strlen($matches[1]);
465 $line = $trfrm->SetHTMLMode('dl', NESTED_LEVEL, $numtabs);
466 if(trim($matches[2]))
467 $line = '<dt>' . $matches[2];
468 $line .= '<dd>' . $matches[3];
473 // mode: preformatted text, i.e. <pre>
474 function wtm_preformatted($line, &$trfrm) {
475 if (preg_match("/^\s+/", $line)) {
476 $line = $trfrm->SetHTMLMode('pre', ZERO_LEVEL, 0) . $line;
481 // mode: headings, i.e. <h1>, <h2>, <h3>
482 // lines starting with !,!!,!!! are headings
483 function wtm_headings($line, &$trfrm) {
484 if (preg_match("/^(!{1,3})[^!]/", $line, $whichheading)) {
485 if($whichheading[1] == '!') $heading = 'h3';
486 elseif($whichheading[1] == '!!') $heading = 'h2';
487 elseif($whichheading[1] == '!!!') $heading = 'h1';
488 $line = preg_replace("/^!+/", '', $line);
489 $line = $trfrm->SetHTMLMode($heading, ZERO_LEVEL, 0) . $line;
494 // four or more dashes to <hr>
495 // Note this is of type WT_MODE_MARKUP becuase <hr>'s aren't
496 // allowed within <p>'s. (e.g. "<p><hr></p>" is not valid HTML.)
497 function wtm_hr($line, &$trfrm) {
498 if (preg_match('/^-{4,}(.*)$/', $line, $m)) {
499 $line = $trfrm->SetHTMLMode('', ZERO_LEVEL, 0) . '<hr>';
501 $line .= $trfrm->SetHTMLMode('p', ZERO_LEVEL, 0) . $m[1];
506 // default mode: simple text paragraph
507 function wtm_paragraph($line, &$trfrm) {
508 $line = $trfrm->SetHTMLMode('p', ZERO_LEVEL, 0) . $line;
515 // c-file-style: "ellemtel"