1 <?php rcs_id('$Id: transform.php,v 1.9 2001-02-04 18:22:34 ahollosi Exp $');
3 define('WT_TOKENIZER', 1);
4 define('WT_SIMPLE_MARKUP', 2);
5 define('WT_MODE_MARKUP', 3);
10 function WikiTransform() -- init
12 function register($type, $function)
13 Registers transformer functions
14 This should be done *before* calling do_transform
16 $type ... one of WT_TOKENIZER, WT_SIMPLE_MARKUP, WT_MODE_MARKUP
17 Currently on WT_MODE_MARKUP has a special meaning.
18 If one WT_MODE_MARKUP really sets the html mode, then
19 all successive WT_MODE_MARKUP functions are skipped
21 $function ... function name
23 function SetHTMLMode($tag, $tagtype, $level)
24 Wiki HTML output can, at any given time, be in only one mode.
25 It will be something like Unordered List, Preformatted Text,
26 plain text etc. When we change modes we have to issue close tags
27 for one mode and start tags for another.
28 SetHTMLMode takes care of this.
30 $tag ... HTML tag to insert
31 $tagtype ... ZERO_LEVEL - close all open tags before inserting $tag
32 NESTED_LEVEL - close tags until depths match
33 $level ... nesting level (depth) of $tag
34 nesting is arbitrary limited to 10 levels
36 function do_transform($html, $content)
37 contains main-loop and calls transformer functions
39 $html ... HTML header (if needed, otherwise '')
40 $content ... wiki markup as array of lines
44 // public variables (only meaningful during do_transform)
45 var $linenumber; // current linenumber
46 var $replacements; // storage for tokenized strings of current line
47 var $tokencounter; // counter of $replacements array
50 var $content; // wiki markup, array of lines
51 var $mode_set; // stores if a HTML mode for this line has been set
52 var $trfrm_func; // array of registered functions
53 var $stack; // stack for SetHTMLMode (keeping track of open tags)
56 function WikiTransform()
58 $this->trfrm_func = array();
59 $this->stack = new Stack;
62 // register transformation functions
63 function register($type, $function)
65 $this->trfrm_func[] = array ($type, $function);
68 // sets current mode like list, preformatted text, plain text, ...
69 // takes care of closing (open) tags
70 function SetHTMLMode($tag, $tagtype, $level)
72 $this->mode_set = 1; // in order to prevent other mode markup
76 if ($tagtype == ZERO_LEVEL) {
77 // empty the stack until $level == 0;
78 if ($tag == $this->stack->top()) {
79 return; // same tag? -> nothing to do
81 while ($this->stack->cnt() > 0) {
82 $closetag = $this->stack->pop();
83 $retvar .= "</$closetag>\n";
87 $retvar .= "<$tag>\n";
88 $this->stack->push($tag);
92 } elseif ($tagtype == NESTED_LEVEL) {
93 if ($level < $this->stack->cnt()) {
94 // $tag has fewer nestings (old: tabs) than stack,
95 // reduce stack to that tab count
96 while ($this->stack->cnt() > $level) {
97 $closetag = $this->stack->pop();
98 if ($closetag == false) {
99 //echo "bounds error in tag stack";
102 $retvar .= "</$closetag>\n";
105 // if list type isn't the same,
106 // back up one more and push new tag
107 if ($tag != $this->stack->top()) {
108 $closetag = $this->stack->pop();
109 $retvar .= "</$closetag><$tag>\n";
110 $this->stack->push($tag);
113 } elseif ($level > $this->stack->cnt()) {
114 // we add the diff to the stack
115 // stack might be zero
116 while ($this->stack->cnt() < $level) {
117 $retvar .= "<$tag>\n";
118 $this->stack->push($tag);
119 if ($this->stack->cnt() > 10) {
120 // arbitrarily limit tag nesting
121 ExitWiki(gettext ("Stack bounds exceeded in SetHTMLOutputMode"));
125 } else { // $level == $stack->cnt()
126 if ($tag == $this->stack->top()) {
127 return; // same tag? -> nothing to do
129 // different tag - close old one, add new one
130 $closetag = $this->stack->pop();
131 $retvar .= "</$closetag>\n";
132 $retvar .= "<$tag>\n";
133 $this->stack->push($tag);
138 } else { // unknown $tagtype
139 ExitWiki ("Passed bad tag type value in SetHTMLOutputMode");
147 // work horse and main loop
148 // this function does the transform from wiki markup to HTML
149 function do_transform($html, $content)
151 global $FieldSeparator;
153 $this->content = $content;
155 // Loop over all lines of the page and apply transformation rules
156 $numlines = count($this->content);
157 for ($lnum = 0; $lnum < $numlines; $lnum++)
159 $this->tokencounter = 0;
160 $this->replacements = array();
161 $this->linenumber = $lnum;
162 $line = $this->content[$lnum];
164 // blank lines clear the current mode
165 if (!strlen($line) || $line == "\r") {
166 $html .= $this->SetHTMLMode('', ZERO_LEVEL, 0);
172 // main loop applying all registered functions
173 // tokenizers, markup, html mode, ...
174 // functions are executed in order of registering
175 for ($func = 0; $func < count($this->trfrm_func); $func++) {
176 // if HTMLmode is already set then skip all following
177 // WT_MODE_MARKUP functions
178 if ($this->mode_set &&
179 ($this->trfrm_func[$func][0] == WT_MODE_MARKUP)) {
182 // call registered function
183 $line = $this->trfrm_func[$func][1]($line, $this);
186 // Replace tokens ($replacements was filled by wtt_* functions)
187 for ($i = 0; $i < $this->tokencounter; $i++) {
188 $line = str_replace($FieldSeparator.$FieldSeparator.$i.$FieldSeparator, $this->replacements[$i], $line);
191 $html .= $line . "\n";
194 $html .= $this->SetHTMLMode('', ZERO_LEVEL, 0);
198 // end do_transfrom()
201 // end class WikiTransform
204 //////////////////////////////////////////////////////////
206 $transform = new WikiTransform;
208 // register functions
209 // functions are applied in order of registering
211 $transform->register(WT_TOKENIZER, 'wtt_bracketlinks');
212 $transform->register(WT_TOKENIZER, 'wtt_urls');
213 $transform->register(WT_TOKENIZER, 'wtt_bumpylinks');
215 $transform->register(WT_SIMPLE_MARKUP, 'wtm_htmlchars');
216 $transform->register(WT_SIMPLE_MARKUP, 'wtm_hr');
217 $transform->register(WT_SIMPLE_MARKUP, 'wtm_linebreak');
218 $transform->register(WT_SIMPLE_MARKUP, 'wtm_bold_italics');
219 $transform->register(WT_SIMPLE_MARKUP, 'wtm_title_search');
220 $transform->register(WT_SIMPLE_MARKUP, 'wtm_fulltext_search');
221 $transform->register(WT_SIMPLE_MARKUP, 'wtm_mostpopular');
223 $transform->register(WT_MODE_MARKUP, 'wtm_list_ul');
224 $transform->register(WT_MODE_MARKUP, 'wtm_list_ol');
225 $transform->register(WT_MODE_MARKUP, 'wtm_list_dl');
226 $transform->register(WT_MODE_MARKUP, 'wtm_preformatted');
227 $transform->register(WT_MODE_MARKUP, 'wtm_headings');
228 $transform->register(WT_MODE_MARKUP, 'wtm_paragraph');
230 $html = $transform->do_transform($html, $pagehash['content']);
234 Requirements for functions registered to WikiTransform:
236 Signature: function wtm_xxxx($line, &$transform)
238 $line ... current line containing wiki markup
239 (Note: it may already contain HTML from other transform functions)
240 &$transform ... WikiTransform object -- public variables of this
241 object and their use see above.
243 Functions have to return $line (doesn't matter if modified or not)
244 All conversion should take place inside $line.
246 Tokenizer functions should use $transform->replacements to store
247 the replacement strings. Also, they have to keep track of
248 $transform->tokencounter. See functions below. Back substitution
249 of tokenized strings is done by do_transform().
254 //////////////////////////////////////////////////////////
255 // Tokenizer functions
257 // helper function which does actual tokenizing and is
258 // called by other wtt_* functions
259 function wt_tokenize($str, $pattern, &$orig, &$ntokens) {
260 global $FieldSeparator;
261 // Find any strings in $str that match $pattern and
262 // store them in $orig, replacing them with tokens
263 // starting at number $ntokens - returns tokenized string
265 while (preg_match("/^(.*?)($pattern)/", $str, $matches)) {
266 $linktoken = $FieldSeparator . $FieldSeparator . ($ntokens++) . $FieldSeparator;
267 $new .= $matches[1] . $linktoken;
268 $orig[] = $matches[2];
269 $str = substr($str, strlen($matches[0]));
276 // New linking scheme: links are in brackets. This will
277 // emulate typical HTML linking as well as Wiki linking.
278 function wtt_bracketlinks($line, &$trfrm)
281 $n = $ntok = $trfrm->tokencounter;
282 $line = wt_tokenize($line, '\[\[', $trfrm->replacements, $ntok);
284 $trfrm->replacements[$n++] = '[';
287 // match anything else between brackets
288 $line = wt_tokenize($line, '\[.+?\]', $trfrm->replacements, $ntok);
290 $link = ParseAndLink($trfrm->replacements[$n]);
291 $trfrm->replacements[$n++] = $link['link'];
294 $trfrm->tokencounter = $ntok;
299 // replace all URL's with tokens, so we don't confuse them
300 // with Wiki words later. Wiki words in URL's break things.
301 // URLs preceeded by a '!' are not linked
302 function wtt_urls($line, &$trfrm)
304 global $AllowedProtocols;
306 $n = $ntok = $trfrm->tokencounter;
307 $line = wt_tokenize($line, "!?\b($AllowedProtocols):[^\s<>\[\]\"'()]*[^\s<>\[\]\"'(),.?]", $trfrm->replacements, $ntok);
309 if($trfrm->replacements[$n][0] == '!')
310 $trfrm->replacements[$n] = substr($trfrm->replacements[$n], 1);
312 $trfrm->replacements[$n] = LinkURL($trfrm->replacements[$n]);
316 $trfrm->tokencounter = $ntok;
320 // Link Wiki words (BumpyText)
321 // Wikiwords preceeded by a '!' are not linked
322 function wtt_bumpylinks($line, &$trfrm)
324 global $WikiNameRegexp, $dbi;
326 $n = $ntok = $trfrm->tokencounter;
327 $line = wt_tokenize($line, "!?$WikiNameRegexp", $trfrm->replacements, $ntok);
329 $old = $trfrm->replacements[$n];
330 if ($old[0] == '!') {
331 $trfrm->replacements[$n] = substr($old,1);
332 } elseif (IsWikiPage($dbi, $old)) {
333 $trfrm->replacements[$n] = LinkExistingWikiWord($old);
335 $trfrm->replacements[$n] = LinkUnknownWikiWord($old);
340 $trfrm->tokencounter = $ntok;
344 // end of tokenizer functions
345 //////////////////////////////////////////////////////////
348 //////////////////////////////////////////////////////////
349 // basic simple markup functions
351 // escape HTML metachars
352 function wtm_htmlchars($line, &$transformer)
354 $line = str_replace('&', '&', $line);
355 $line = str_replace('>', '>', $line);
356 $line = str_replace('<', '<', $line);
360 // four or more dashes to <hr>
361 function wtm_hr($line, &$transformer) {
362 return ereg_replace("^-{4,}", '<hr>', $line);
365 // %%% are linebreaks
366 function wtm_linebreak($line, &$transformer) {
367 return str_replace('%%%', '<br>', $line);
371 function wtm_bold_italics($line, &$transformer) {
372 $line = preg_replace('|(__)(.*?)(__)|', '<strong>\2</strong>', $line);
373 $line = preg_replace("|('')(.*?)('')|", '<em>\2</em>', $line);
379 //////////////////////////////////////////////////////////
380 // some tokens to be replaced by (dynamic) content
382 // wiki token: title search dialog
383 function wtm_title_search($line, &$transformer) {
385 if (strpos($line, '%%Search%%') !== false) {
386 $html = "<form action=\"$ScriptUrl\">\n" .
387 "<input type=text size=30 name=search>\n" .
388 "<input type=submit value=\"". gettext("Search") .
390 $line = str_replace('%%Search%%', $html, $line);
395 // wiki token: fulltext search dialog
396 function wtm_fulltext_search($line, &$transformer) {
398 if (strpos($line, '%%Fullsearch%%') !== false) {
399 $html = "<form action=\"$ScriptUrl\">\n" .
400 "<input type=text size=30 name=full\n" .
401 "<input type=submit value=\"". gettext("Search") .
403 $line = str_replace('%%Fullsearch%%', $html, $line);
408 // wiki token: mostpopular list
409 function wtm_mostpopular($line, &$transformer) {
410 global $ScriptUrl, $dbi;
411 if (strpos($line, '%%Mostpopular%%') !== false) {
412 $query = InitMostPopular($dbi, MOST_POPULAR_LIST_LENGTH);
414 while ($qhash = MostPopularNextMatch($dbi, $query)) {
415 $html .= "<DD>$qhash[hits] ... " . LinkExistingWikiWord($qhash['pagename']) . "\n";
418 $line = str_replace('%%Mostpopular%%', $html, $line);
424 //////////////////////////////////////////////////////////
425 // mode markup functions
428 // tabless markup for unordered, ordered, and dictionary lists
429 // ul/ol list types can be mixed, so we only look at the last
430 // character. Changes e.g. from "**#*" to "###*" go unnoticed.
431 // and wouldn't make a difference to the HTML layout anyway.
433 // unordered lists <UL>: "*"
434 // has to be registereed before list OL
435 function wtm_list_ul($line, &$trfrm) {
436 if (preg_match("/^([#*]*\*)[^#]/", $line, $matches)) {
437 $numtabs = strlen($matches[1]);
438 $line = preg_replace("/^([#*]*\*)/", '', $line);
439 $html = $trfrm->SetHTMLMode('ul', NESTED_LEVEL, $numtabs) . '<li>';
440 $line = $html . $line;
445 // ordered lists <OL>: "#"
446 function wtm_list_ol($line, &$trfrm) {
447 if (preg_match("/^([#*]*\#)/", $line, $matches)) {
448 $numtabs = strlen($matches[1]);
449 $line = preg_replace("/^([#*]*\#)/", "", $line);
450 $html = $trfrm->SetHTMLMode('ol', NESTED_LEVEL, $numtabs) . '<li>';
451 $line = $html . $line;
457 // definition lists <DL>: ";text:text"
458 function wtm_list_dl($line, &$trfrm) {
459 if (preg_match("/(^;+)(.*?):(.*$)/", $line, $matches)) {
460 $numtabs = strlen($matches[1]);
461 $line = $trfrm->SetHTMLMode('dl', NESTED_LEVEL, $numtabs);
462 if(trim($matches[2]))
463 $line = '<dt>' . $matches[2];
464 $line .= '<dd>' . $matches[3];
469 // mode: preformatted text, i.e. <pre>
470 function wtm_preformatted($line, &$trfrm) {
471 if (preg_match("/^\s+/", $line)) {
472 $line = $trfrm->SetHTMLMode('pre', ZERO_LEVEL, 0) . $line;
477 // mode: headings, i.e. <h1>, <h2>, <h3>
478 // lines starting with !,!!,!!! are headings
479 function wtm_headings($line, &$trfrm) {
480 if (preg_match("/^(!{1,3})[^!]/", $line, $whichheading)) {
481 if($whichheading[1] == '!') $heading = 'h3';
482 elseif($whichheading[1] == '!!') $heading = 'h2';
483 elseif($whichheading[1] == '!!!') $heading = 'h1';
484 $line = preg_replace("/^!+/", '', $line);
485 $line = $trfrm->SetHTMLMode($heading, ZERO_LEVEL, 0) . $line;
490 // default mode: simple text paragraph
491 function wtm_paragraph($line, &$trfrm) {
492 $line = $trfrm->SetHTMLMode('p', ZERO_LEVEL, 0) . $line;