1 <?php rcs_id('$Id: transform.php,v 1.10 2001-02-07 16:38:33 ahollosi Exp $');
3 define('WT_TOKENIZER', 1);
4 define('WT_SIMPLE_MARKUP', 2);
5 define('WT_MODE_MARKUP', 3);
10 function WikiTransform() -- init
12 function register($type, $function)
13 Registers transformer functions
14 This should be done *before* calling do_transform
16 $type ... one of WT_TOKENIZER, WT_SIMPLE_MARKUP, WT_MODE_MARKUP
17 Currently on WT_MODE_MARKUP has a special meaning.
18 If one WT_MODE_MARKUP really sets the html mode, then
19 all successive WT_MODE_MARKUP functions are skipped
21 $function ... function name
23 function SetHTMLMode($tag, $tagtype, $level)
24 Wiki HTML output can, at any given time, be in only one mode.
25 It will be something like Unordered List, Preformatted Text,
26 plain text etc. When we change modes we have to issue close tags
27 for one mode and start tags for another.
28 SetHTMLMode takes care of this.
30 $tag ... HTML tag to insert
31 $tagtype ... ZERO_LEVEL - close all open tags before inserting $tag
32 NESTED_LEVEL - close tags until depths match
33 $level ... nesting level (depth) of $tag
34 nesting is arbitrary limited to 10 levels
36 function do_transform($html, $content)
37 contains main-loop and calls transformer functions
39 $html ... HTML header (if needed, otherwise '')
40 $content ... wiki markup as array of lines
44 // public variables (only meaningful during do_transform)
45 var $linenumber; // current linenumber
46 var $replacements; // storage for tokenized strings of current line
47 var $tokencounter; // counter of $replacements array
50 var $content; // wiki markup, array of lines
51 var $mode_set; // stores if a HTML mode for this line has been set
52 var $trfrm_func; // array of registered functions
53 var $stack; // stack for SetHTMLMode (keeping track of open tags)
56 function WikiTransform()
58 $this->trfrm_func = array();
59 $this->stack = new Stack;
62 // register transformation functions
63 function register($type, $function)
65 $this->trfrm_func[] = array ($type, $function);
68 // sets current mode like list, preformatted text, plain text, ...
69 // takes care of closing (open) tags
70 function SetHTMLMode($tag, $tagtype, $level)
72 $this->mode_set = 1; // in order to prevent other mode markup
76 if ($tagtype == ZERO_LEVEL) {
77 // empty the stack until $level == 0;
78 if ($tag == $this->stack->top()) {
79 return; // same tag? -> nothing to do
81 while ($this->stack->cnt() > 0) {
82 $closetag = $this->stack->pop();
83 $retvar .= "</$closetag>\n";
87 $retvar .= "<$tag>\n";
88 $this->stack->push($tag);
92 } elseif ($tagtype == NESTED_LEVEL) {
93 if ($level <= $this->stack->cnt()) {
94 // $tag has fewer nestings (old: tabs) than stack,
95 // reduce stack to that tab count
96 while ($this->stack->cnt() > $level) {
97 $closetag = $this->stack->pop();
98 if ($closetag == false) {
99 //echo "bounds error in tag stack";
102 $retvar .= "</$closetag>\n";
105 // if list type isn't the same,
106 // back up one more and push new tag
107 if ($tag != $this->stack->top()) {
108 $closetag = $this->stack->pop();
109 $retvar .= "</$closetag><$tag>\n";
110 $this->stack->push($tag);
113 } else { // $level > $this->stack->cnt()
114 // we add the diff to the stack
115 // stack might be zero
116 while ($this->stack->cnt() < $level) {
117 $retvar .= "<$tag>\n";
118 $this->stack->push($tag);
119 if ($this->stack->cnt() > 10) {
120 // arbitrarily limit tag nesting
121 ExitWiki(gettext ("Stack bounds exceeded in SetHTMLOutputMode"));
126 } else { // unknown $tagtype
127 ExitWiki ("Passed bad tag type value in SetHTMLOutputMode");
135 // work horse and main loop
136 // this function does the transform from wiki markup to HTML
137 function do_transform($html, $content)
139 global $FieldSeparator;
141 $this->content = $content;
143 // Loop over all lines of the page and apply transformation rules
144 $numlines = count($this->content);
145 for ($lnum = 0; $lnum < $numlines; $lnum++)
147 $this->tokencounter = 0;
148 $this->replacements = array();
149 $this->linenumber = $lnum;
150 $line = $this->content[$lnum];
152 // blank lines clear the current mode
153 if (!strlen($line) || $line == "\r") {
154 $html .= $this->SetHTMLMode('', ZERO_LEVEL, 0);
160 // main loop applying all registered functions
161 // tokenizers, markup, html mode, ...
162 // functions are executed in order of registering
163 for ($func = 0; $func < count($this->trfrm_func); $func++) {
164 // if HTMLmode is already set then skip all following
165 // WT_MODE_MARKUP functions
166 if ($this->mode_set &&
167 ($this->trfrm_func[$func][0] == WT_MODE_MARKUP)) {
170 // call registered function
171 $line = $this->trfrm_func[$func][1]($line, $this);
174 // Replace tokens ($replacements was filled by wtt_* functions)
175 for ($i = 0; $i < $this->tokencounter; $i++) {
176 $line = str_replace($FieldSeparator.$FieldSeparator.$i.$FieldSeparator, $this->replacements[$i], $line);
179 $html .= $line . "\n";
182 $html .= $this->SetHTMLMode('', ZERO_LEVEL, 0);
186 // end do_transfrom()
189 // end class WikiTransform
192 //////////////////////////////////////////////////////////
194 $transform = new WikiTransform;
196 // register functions
197 // functions are applied in order of registering
199 $transform->register(WT_TOKENIZER, 'wtt_bracketlinks');
200 $transform->register(WT_TOKENIZER, 'wtt_urls');
201 $transform->register(WT_TOKENIZER, 'wtt_bumpylinks');
203 $transform->register(WT_SIMPLE_MARKUP, 'wtm_htmlchars');
204 $transform->register(WT_SIMPLE_MARKUP, 'wtm_hr');
205 $transform->register(WT_SIMPLE_MARKUP, 'wtm_linebreak');
206 $transform->register(WT_SIMPLE_MARKUP, 'wtm_bold_italics');
207 $transform->register(WT_SIMPLE_MARKUP, 'wtm_title_search');
208 $transform->register(WT_SIMPLE_MARKUP, 'wtm_fulltext_search');
209 $transform->register(WT_SIMPLE_MARKUP, 'wtm_mostpopular');
211 $transform->register(WT_MODE_MARKUP, 'wtm_list_ul');
212 $transform->register(WT_MODE_MARKUP, 'wtm_list_ol');
213 $transform->register(WT_MODE_MARKUP, 'wtm_list_dl');
214 $transform->register(WT_MODE_MARKUP, 'wtm_preformatted');
215 $transform->register(WT_MODE_MARKUP, 'wtm_headings');
216 $transform->register(WT_MODE_MARKUP, 'wtm_paragraph');
218 $html = $transform->do_transform($html, $pagehash['content']);
222 Requirements for functions registered to WikiTransform:
224 Signature: function wtm_xxxx($line, &$transform)
226 $line ... current line containing wiki markup
227 (Note: it may already contain HTML from other transform functions)
228 &$transform ... WikiTransform object -- public variables of this
229 object and their use see above.
231 Functions have to return $line (doesn't matter if modified or not)
232 All conversion should take place inside $line.
234 Tokenizer functions should use $transform->replacements to store
235 the replacement strings. Also, they have to keep track of
236 $transform->tokencounter. See functions below. Back substitution
237 of tokenized strings is done by do_transform().
242 //////////////////////////////////////////////////////////
243 // Tokenizer functions
245 // helper function which does actual tokenizing and is
246 // called by other wtt_* functions
247 function wt_tokenize($str, $pattern, &$orig, &$ntokens) {
248 global $FieldSeparator;
249 // Find any strings in $str that match $pattern and
250 // store them in $orig, replacing them with tokens
251 // starting at number $ntokens - returns tokenized string
253 while (preg_match("/^(.*?)($pattern)/", $str, $matches)) {
254 $linktoken = $FieldSeparator . $FieldSeparator . ($ntokens++) . $FieldSeparator;
255 $new .= $matches[1] . $linktoken;
256 $orig[] = $matches[2];
257 $str = substr($str, strlen($matches[0]));
264 // New linking scheme: links are in brackets. This will
265 // emulate typical HTML linking as well as Wiki linking.
266 function wtt_bracketlinks($line, &$trfrm)
269 $n = $ntok = $trfrm->tokencounter;
270 $line = wt_tokenize($line, '\[\[', $trfrm->replacements, $ntok);
272 $trfrm->replacements[$n++] = '[';
275 // match anything else between brackets
276 $line = wt_tokenize($line, '\[.+?\]', $trfrm->replacements, $ntok);
278 $link = ParseAndLink($trfrm->replacements[$n]);
279 $trfrm->replacements[$n++] = $link['link'];
282 $trfrm->tokencounter = $ntok;
287 // replace all URL's with tokens, so we don't confuse them
288 // with Wiki words later. Wiki words in URL's break things.
289 // URLs preceeded by a '!' are not linked
290 function wtt_urls($line, &$trfrm)
292 global $AllowedProtocols;
294 $n = $ntok = $trfrm->tokencounter;
295 $line = wt_tokenize($line, "!?\b($AllowedProtocols):[^\s<>\[\]\"'()]*[^\s<>\[\]\"'(),.?]", $trfrm->replacements, $ntok);
297 if($trfrm->replacements[$n][0] == '!')
298 $trfrm->replacements[$n] = substr($trfrm->replacements[$n], 1);
300 $trfrm->replacements[$n] = LinkURL($trfrm->replacements[$n]);
304 $trfrm->tokencounter = $ntok;
308 // Link Wiki words (BumpyText)
309 // Wikiwords preceeded by a '!' are not linked
310 function wtt_bumpylinks($line, &$trfrm)
312 global $WikiNameRegexp, $dbi;
314 $n = $ntok = $trfrm->tokencounter;
315 $line = wt_tokenize($line, "!?$WikiNameRegexp", $trfrm->replacements, $ntok);
317 $old = $trfrm->replacements[$n];
318 if ($old[0] == '!') {
319 $trfrm->replacements[$n] = substr($old,1);
320 } elseif (IsWikiPage($dbi, $old)) {
321 $trfrm->replacements[$n] = LinkExistingWikiWord($old);
323 $trfrm->replacements[$n] = LinkUnknownWikiWord($old);
328 $trfrm->tokencounter = $ntok;
332 // end of tokenizer functions
333 //////////////////////////////////////////////////////////
336 //////////////////////////////////////////////////////////
337 // basic simple markup functions
339 // escape HTML metachars
340 function wtm_htmlchars($line, &$transformer)
342 $line = str_replace('&', '&', $line);
343 $line = str_replace('>', '>', $line);
344 $line = str_replace('<', '<', $line);
348 // four or more dashes to <hr>
349 function wtm_hr($line, &$transformer) {
350 return ereg_replace("^-{4,}", '<hr>', $line);
353 // %%% are linebreaks
354 function wtm_linebreak($line, &$transformer) {
355 return str_replace('%%%', '<br>', $line);
359 function wtm_bold_italics($line, &$transformer) {
360 $line = preg_replace('|(__)(.*?)(__)|', '<strong>\2</strong>', $line);
361 $line = preg_replace("|('')(.*?)('')|", '<em>\2</em>', $line);
367 //////////////////////////////////////////////////////////
368 // some tokens to be replaced by (dynamic) content
370 // wiki token: title search dialog
371 function wtm_title_search($line, &$transformer) {
373 if (strpos($line, '%%Search%%') !== false) {
374 $html = "<form action=\"$ScriptUrl\">\n" .
375 "<input type=text size=30 name=search>\n" .
376 "<input type=submit value=\"". gettext("Search") .
378 $line = str_replace('%%Search%%', $html, $line);
383 // wiki token: fulltext search dialog
384 function wtm_fulltext_search($line, &$transformer) {
386 if (strpos($line, '%%Fullsearch%%') !== false) {
387 $html = "<form action=\"$ScriptUrl\">\n" .
388 "<input type=text size=30 name=full\n" .
389 "<input type=submit value=\"". gettext("Search") .
391 $line = str_replace('%%Fullsearch%%', $html, $line);
396 // wiki token: mostpopular list
397 function wtm_mostpopular($line, &$transformer) {
398 global $ScriptUrl, $dbi;
399 if (strpos($line, '%%Mostpopular%%') !== false) {
400 $query = InitMostPopular($dbi, MOST_POPULAR_LIST_LENGTH);
402 while ($qhash = MostPopularNextMatch($dbi, $query)) {
403 $html .= "<DD>$qhash[hits] ... " . LinkExistingWikiWord($qhash['pagename']) . "\n";
406 $line = str_replace('%%Mostpopular%%', $html, $line);
412 //////////////////////////////////////////////////////////
413 // mode markup functions
416 // tabless markup for unordered, ordered, and dictionary lists
417 // ul/ol list types can be mixed, so we only look at the last
418 // character. Changes e.g. from "**#*" to "###*" go unnoticed.
419 // and wouldn't make a difference to the HTML layout anyway.
421 // unordered lists <UL>: "*"
422 // has to be registereed before list OL
423 function wtm_list_ul($line, &$trfrm) {
424 if (preg_match("/^([#*]*\*)[^#]/", $line, $matches)) {
425 $numtabs = strlen($matches[1]);
426 $line = preg_replace("/^([#*]*\*)/", '', $line);
427 $html = $trfrm->SetHTMLMode('ul', NESTED_LEVEL, $numtabs) . '<li>';
428 $line = $html . $line;
433 // ordered lists <OL>: "#"
434 function wtm_list_ol($line, &$trfrm) {
435 if (preg_match("/^([#*]*\#)/", $line, $matches)) {
436 $numtabs = strlen($matches[1]);
437 $line = preg_replace("/^([#*]*\#)/", "", $line);
438 $html = $trfrm->SetHTMLMode('ol', NESTED_LEVEL, $numtabs) . '<li>';
439 $line = $html . $line;
445 // definition lists <DL>: ";text:text"
446 function wtm_list_dl($line, &$trfrm) {
447 if (preg_match("/(^;+)(.*?):(.*$)/", $line, $matches)) {
448 $numtabs = strlen($matches[1]);
449 $line = $trfrm->SetHTMLMode('dl', NESTED_LEVEL, $numtabs);
450 if(trim($matches[2]))
451 $line = '<dt>' . $matches[2];
452 $line .= '<dd>' . $matches[3];
457 // mode: preformatted text, i.e. <pre>
458 function wtm_preformatted($line, &$trfrm) {
459 if (preg_match("/^\s+/", $line)) {
460 $line = $trfrm->SetHTMLMode('pre', ZERO_LEVEL, 0) . $line;
465 // mode: headings, i.e. <h1>, <h2>, <h3>
466 // lines starting with !,!!,!!! are headings
467 function wtm_headings($line, &$trfrm) {
468 if (preg_match("/^(!{1,3})[^!]/", $line, $whichheading)) {
469 if($whichheading[1] == '!') $heading = 'h3';
470 elseif($whichheading[1] == '!!') $heading = 'h2';
471 elseif($whichheading[1] == '!!!') $heading = 'h1';
472 $line = preg_replace("/^!+/", '', $line);
473 $line = $trfrm->SetHTMLMode($heading, ZERO_LEVEL, 0) . $line;
478 // default mode: simple text paragraph
479 function wtm_paragraph($line, &$trfrm) {
480 $line = $trfrm->SetHTMLMode('p', ZERO_LEVEL, 0) . $line;