1 <?php rcs_id('$Id: transform.php,v 1.11 2001-02-07 17:21:33 ahollosi Exp $');
3 define('WT_TOKENIZER', 1);
4 define('WT_SIMPLE_MARKUP', 2);
5 define('WT_MODE_MARKUP', 3);
10 function WikiTransform() -- init
12 function register($type, $function)
13 Registers transformer functions
14 This should be done *before* calling do_transform
16 $type ... one of WT_TOKENIZER, WT_SIMPLE_MARKUP, WT_MODE_MARKUP
17 Currently on WT_MODE_MARKUP has a special meaning.
18 If one WT_MODE_MARKUP really sets the html mode, then
19 all successive WT_MODE_MARKUP functions are skipped
21 $function ... function name
23 function SetHTMLMode($tag, $tagtype, $level)
24 Wiki HTML output can, at any given time, be in only one mode.
25 It will be something like Unordered List, Preformatted Text,
26 plain text etc. When we change modes we have to issue close tags
27 for one mode and start tags for another.
28 SetHTMLMode takes care of this.
30 $tag ... HTML tag to insert
31 $tagtype ... ZERO_LEVEL - close all open tags before inserting $tag
32 NESTED_LEVEL - close tags until depths match
33 $level ... nesting level (depth) of $tag
34 nesting is arbitrary limited to 10 levels
36 function do_transform($html, $content)
37 contains main-loop and calls transformer functions
39 $html ... HTML header (if needed, otherwise '')
40 $content ... wiki markup as array of lines
44 // public variables (only meaningful during do_transform)
45 var $linenumber; // current linenumber
46 var $replacements; // storage for tokenized strings of current line
47 var $tokencounter; // counter of $replacements array
50 var $content; // wiki markup, array of lines
51 var $mode_set; // stores if a HTML mode for this line has been set
52 var $trfrm_func; // array of registered functions
53 var $stack; // stack for SetHTMLMode (keeping track of open tags)
56 function WikiTransform()
58 $this->trfrm_func = array();
59 $this->stack = new Stack;
62 // register transformation functions
63 function register($type, $function)
65 $this->trfrm_func[] = array ($type, $function);
68 // sets current mode like list, preformatted text, plain text, ...
69 // takes care of closing (open) tags
70 function SetHTMLMode($tag, $tagtype, $level)
72 $this->mode_set = 1; // in order to prevent other mode markup
76 if ($tagtype == ZERO_LEVEL) {
77 // empty the stack until $level == 0;
78 if ($tag == $this->stack->top()) {
79 return; // same tag? -> nothing to do
81 while ($this->stack->cnt() > 0) {
82 $closetag = $this->stack->pop();
83 $retvar .= "</$closetag>\n";
87 $retvar .= "<$tag>\n";
88 $this->stack->push($tag);
92 } elseif ($tagtype == NESTED_LEVEL) {
93 if ($level <= $this->stack->cnt()) {
94 // $tag has fewer nestings (old: tabs) than stack,
95 // reduce stack to that tab count
96 while ($this->stack->cnt() > $level) {
97 $closetag = $this->stack->pop();
98 if ($closetag == false) {
99 //echo "bounds error in tag stack";
102 $retvar .= "</$closetag>\n";
105 // if list type isn't the same,
106 // back up one more and push new tag
107 if ($tag != $this->stack->top()) {
108 $closetag = $this->stack->pop();
109 $retvar .= "</$closetag><$tag>\n";
110 $this->stack->push($tag);
113 } else { // $level > $this->stack->cnt()
114 // we add the diff to the stack
115 // stack might be zero
116 while ($this->stack->cnt() < $level) {
117 $retvar .= "<$tag>\n";
118 $this->stack->push($tag);
119 if ($this->stack->cnt() > 10) {
120 // arbitrarily limit tag nesting
121 ExitWiki(gettext ("Stack bounds exceeded in SetHTMLOutputMode"));
126 } else { // unknown $tagtype
127 ExitWiki ("Passed bad tag type value in SetHTMLOutputMode");
135 // work horse and main loop
136 // this function does the transform from wiki markup to HTML
137 function do_transform($html, $content)
139 global $FieldSeparator;
141 $this->content = $content;
143 // Loop over all lines of the page and apply transformation rules
144 $numlines = count($this->content);
145 for ($lnum = 0; $lnum < $numlines; $lnum++)
147 $this->tokencounter = 0;
148 $this->replacements = array();
149 $this->linenumber = $lnum;
150 $line = $this->content[$lnum];
152 // blank lines clear the current mode
153 if (!strlen($line) || $line == "\r") {
154 $html .= $this->SetHTMLMode('', ZERO_LEVEL, 0);
160 // main loop applying all registered functions
161 // tokenizers, markup, html mode, ...
162 // functions are executed in order of registering
163 for ($func = 0; $func < count($this->trfrm_func); $func++) {
164 // if HTMLmode is already set then skip all following
165 // WT_MODE_MARKUP functions
166 if ($this->mode_set &&
167 ($this->trfrm_func[$func][0] == WT_MODE_MARKUP)) {
170 // call registered function
171 $line = $this->trfrm_func[$func][1]($line, $this);
174 // Replace tokens ($replacements was filled by wtt_* functions)
175 for ($i = 0; $i < $this->tokencounter; $i++) {
176 $line = str_replace($FieldSeparator.$FieldSeparator.$i.$FieldSeparator, $this->replacements[$i], $line);
179 $html .= $line . "\n";
182 $html .= $this->SetHTMLMode('', ZERO_LEVEL, 0);
186 // end do_transfrom()
189 // end class WikiTransform
192 //////////////////////////////////////////////////////////
194 $transform = new WikiTransform;
196 // register functions
197 // functions are applied in order of registering
199 $transform->register(WT_TOKENIZER, 'wtt_bracketlinks');
200 $transform->register(WT_TOKENIZER, 'wtt_urls');
201 $transform->register(WT_TOKENIZER, 'wtt_bumpylinks');
203 $transform->register(WT_SIMPLE_MARKUP, 'wtm_htmlchars');
204 $transform->register(WT_SIMPLE_MARKUP, 'wtm_hr');
205 $transform->register(WT_SIMPLE_MARKUP, 'wtm_linebreak');
206 $transform->register(WT_SIMPLE_MARKUP, 'wtm_bold_italics');
207 $transform->register(WT_SIMPLE_MARKUP, 'wtm_title_search');
208 $transform->register(WT_SIMPLE_MARKUP, 'wtm_fulltext_search');
209 $transform->register(WT_SIMPLE_MARKUP, 'wtm_mostpopular');
211 $transform->register(WT_MODE_MARKUP, 'wtm_list_ul');
212 $transform->register(WT_MODE_MARKUP, 'wtm_list_ol');
213 $transform->register(WT_MODE_MARKUP, 'wtm_list_dl');
214 $transform->register(WT_MODE_MARKUP, 'wtm_preformatted');
215 $transform->register(WT_MODE_MARKUP, 'wtm_headings');
216 $transform->register(WT_MODE_MARKUP, 'wtm_paragraph');
218 $html = $transform->do_transform($html, $pagehash['content']);
222 Requirements for functions registered to WikiTransform:
224 Signature: function wtm_xxxx($line, &$transform)
226 $line ... current line containing wiki markup
227 (Note: it may already contain HTML from other transform functions)
228 &$transform ... WikiTransform object -- public variables of this
229 object and their use see above.
231 Functions have to return $line (doesn't matter if modified or not)
232 All conversion should take place inside $line.
234 Tokenizer functions should use $transform->replacements to store
235 the replacement strings. Also, they have to keep track of
236 $transform->tokencounter. See functions below. Back substitution
237 of tokenized strings is done by do_transform().
242 //////////////////////////////////////////////////////////
243 // Tokenizer functions
245 // helper function which does actual tokenizing and is
246 // called by other wtt_* functions
247 function wt_tokenize($str, $pattern, &$orig, &$ntokens) {
248 global $FieldSeparator;
249 // Find any strings in $str that match $pattern and
250 // store them in $orig, replacing them with tokens
251 // starting at number $ntokens - returns tokenized string
253 while (preg_match("/^(.*?)($pattern)/", $str, $matches)) {
254 $linktoken = $FieldSeparator . $FieldSeparator . ($ntokens++) . $FieldSeparator;
255 $new .= $matches[1] . $linktoken;
256 $orig[] = $matches[2];
257 $str = substr($str, strlen($matches[0]));
264 // New linking scheme: links are in brackets. This will
265 // emulate typical HTML linking as well as Wiki linking.
266 function wtt_bracketlinks($line, &$trfrm)
268 static $footnotes = array();
271 $n = $ntok = $trfrm->tokencounter;
272 $line = wt_tokenize($line, '\[\[', $trfrm->replacements, $ntok);
274 $trfrm->replacements[$n++] = '[';
277 // match anything else between brackets
278 $line = wt_tokenize($line, '\[.+?\]', $trfrm->replacements, $ntok);
280 $link = ParseAndLink($trfrm->replacements[$n]);
281 if (strpos($link['type'], 'footnote') === false) {
282 $trfrm->replacements[$n] = $link['link'];
284 $ftnt = $link['link'];
285 if (isset($footnotes[$ftnt])) {
286 $trfrm->replacements[$n] = "<A NAME=\"footnote-$ftnt\"></A><A HREF=\"#footnote-rev-$ftnt\">[$ftnt]</A>";
287 } else { // first encounter of [x]
288 $trfrm->replacements[$n] = "<A NAME=\"footnote-rev-$ftnt\"></A><SUP><A HREF=\"#footnote-$ftnt\">[$ftnt]</A></SUP>";
289 $footnotes[$ftnt] = 1;
295 $trfrm->tokencounter = $ntok;
300 // replace all URL's with tokens, so we don't confuse them
301 // with Wiki words later. Wiki words in URL's break things.
302 // URLs preceeded by a '!' are not linked
303 function wtt_urls($line, &$trfrm)
305 global $AllowedProtocols;
307 $n = $ntok = $trfrm->tokencounter;
308 $line = wt_tokenize($line, "!?\b($AllowedProtocols):[^\s<>\[\]\"'()]*[^\s<>\[\]\"'(),.?]", $trfrm->replacements, $ntok);
310 if($trfrm->replacements[$n][0] == '!')
311 $trfrm->replacements[$n] = substr($trfrm->replacements[$n], 1);
313 $trfrm->replacements[$n] = LinkURL($trfrm->replacements[$n]);
317 $trfrm->tokencounter = $ntok;
321 // Link Wiki words (BumpyText)
322 // Wikiwords preceeded by a '!' are not linked
323 function wtt_bumpylinks($line, &$trfrm)
325 global $WikiNameRegexp, $dbi;
327 $n = $ntok = $trfrm->tokencounter;
328 $line = wt_tokenize($line, "!?$WikiNameRegexp", $trfrm->replacements, $ntok);
330 $old = $trfrm->replacements[$n];
331 if ($old[0] == '!') {
332 $trfrm->replacements[$n] = substr($old,1);
333 } elseif (IsWikiPage($dbi, $old)) {
334 $trfrm->replacements[$n] = LinkExistingWikiWord($old);
336 $trfrm->replacements[$n] = LinkUnknownWikiWord($old);
341 $trfrm->tokencounter = $ntok;
345 // end of tokenizer functions
346 //////////////////////////////////////////////////////////
349 //////////////////////////////////////////////////////////
350 // basic simple markup functions
352 // escape HTML metachars
353 function wtm_htmlchars($line, &$transformer)
355 $line = str_replace('&', '&', $line);
356 $line = str_replace('>', '>', $line);
357 $line = str_replace('<', '<', $line);
361 // four or more dashes to <hr>
362 function wtm_hr($line, &$transformer) {
363 return ereg_replace("^-{4,}", '<hr>', $line);
366 // %%% are linebreaks
367 function wtm_linebreak($line, &$transformer) {
368 return str_replace('%%%', '<br>', $line);
372 function wtm_bold_italics($line, &$transformer) {
373 $line = preg_replace('|(__)(.*?)(__)|', '<strong>\2</strong>', $line);
374 $line = preg_replace("|('')(.*?)('')|", '<em>\2</em>', $line);
380 //////////////////////////////////////////////////////////
381 // some tokens to be replaced by (dynamic) content
383 // wiki token: title search dialog
384 function wtm_title_search($line, &$transformer) {
386 if (strpos($line, '%%Search%%') !== false) {
387 $html = "<form action=\"$ScriptUrl\">\n" .
388 "<input type=text size=30 name=search>\n" .
389 "<input type=submit value=\"". gettext("Search") .
391 $line = str_replace('%%Search%%', $html, $line);
396 // wiki token: fulltext search dialog
397 function wtm_fulltext_search($line, &$transformer) {
399 if (strpos($line, '%%Fullsearch%%') !== false) {
400 $html = "<form action=\"$ScriptUrl\">\n" .
401 "<input type=text size=30 name=full\n" .
402 "<input type=submit value=\"". gettext("Search") .
404 $line = str_replace('%%Fullsearch%%', $html, $line);
409 // wiki token: mostpopular list
410 function wtm_mostpopular($line, &$transformer) {
411 global $ScriptUrl, $dbi;
412 if (strpos($line, '%%Mostpopular%%') !== false) {
413 $query = InitMostPopular($dbi, MOST_POPULAR_LIST_LENGTH);
415 while ($qhash = MostPopularNextMatch($dbi, $query)) {
416 $html .= "<DD>$qhash[hits] ... " . LinkExistingWikiWord($qhash['pagename']) . "\n";
419 $line = str_replace('%%Mostpopular%%', $html, $line);
425 //////////////////////////////////////////////////////////
426 // mode markup functions
429 // tabless markup for unordered, ordered, and dictionary lists
430 // ul/ol list types can be mixed, so we only look at the last
431 // character. Changes e.g. from "**#*" to "###*" go unnoticed.
432 // and wouldn't make a difference to the HTML layout anyway.
434 // unordered lists <UL>: "*"
435 // has to be registereed before list OL
436 function wtm_list_ul($line, &$trfrm) {
437 if (preg_match("/^([#*]*\*)[^#]/", $line, $matches)) {
438 $numtabs = strlen($matches[1]);
439 $line = preg_replace("/^([#*]*\*)/", '', $line);
440 $html = $trfrm->SetHTMLMode('ul', NESTED_LEVEL, $numtabs) . '<li>';
441 $line = $html . $line;
446 // ordered lists <OL>: "#"
447 function wtm_list_ol($line, &$trfrm) {
448 if (preg_match("/^([#*]*\#)/", $line, $matches)) {
449 $numtabs = strlen($matches[1]);
450 $line = preg_replace("/^([#*]*\#)/", "", $line);
451 $html = $trfrm->SetHTMLMode('ol', NESTED_LEVEL, $numtabs) . '<li>';
452 $line = $html . $line;
458 // definition lists <DL>: ";text:text"
459 function wtm_list_dl($line, &$trfrm) {
460 if (preg_match("/(^;+)(.*?):(.*$)/", $line, $matches)) {
461 $numtabs = strlen($matches[1]);
462 $line = $trfrm->SetHTMLMode('dl', NESTED_LEVEL, $numtabs);
463 if(trim($matches[2]))
464 $line = '<dt>' . $matches[2];
465 $line .= '<dd>' . $matches[3];
470 // mode: preformatted text, i.e. <pre>
471 function wtm_preformatted($line, &$trfrm) {
472 if (preg_match("/^\s+/", $line)) {
473 $line = $trfrm->SetHTMLMode('pre', ZERO_LEVEL, 0) . $line;
478 // mode: headings, i.e. <h1>, <h2>, <h3>
479 // lines starting with !,!!,!!! are headings
480 function wtm_headings($line, &$trfrm) {
481 if (preg_match("/^(!{1,3})[^!]/", $line, $whichheading)) {
482 if($whichheading[1] == '!') $heading = 'h3';
483 elseif($whichheading[1] == '!!') $heading = 'h2';
484 elseif($whichheading[1] == '!!!') $heading = 'h1';
485 $line = preg_replace("/^!+/", '', $line);
486 $line = $trfrm->SetHTMLMode($heading, ZERO_LEVEL, 0) . $line;
491 // default mode: simple text paragraph
492 function wtm_paragraph($line, &$trfrm) {
493 $line = $trfrm->SetHTMLMode('p', ZERO_LEVEL, 0) . $line;