1 <?php rcs_id('$Id: BlockParser.php,v 1.8 2002-01-28 03:59:30 dairiki Exp $');
2 /* Copyright (C) 2002, Geoffrey T. Dairiki <dairiki@dairiki.org>
4 * This file is part of PhpWiki.
6 * PhpWiki is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
11 * PhpWiki is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
16 * You should have received a copy of the GNU General Public License
17 * along with PhpWiki; if not, write to the Free Software
18 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
20 require_once('lib/HtmlElement.php');
22 require_once('lib/transform.php');
25 extends WikiTransform {
26 function InlineTransform() {
27 global $WikiNameRegexp, $AllowedProtocols, $InterWikiLinkRegexp;
29 $this->WikiTransform();
32 // functions are applied in order of registering
34 $this->register(WT_SIMPLE_MARKUP, 'wtm_plugin_link');
36 $this->register(WT_TOKENIZER, 'wtt_doublebrackets', '\[\[');
37 //$this->register(WT_TOKENIZER, 'wtt_footnotes', '^\[\d+\]');
38 //$this->register(WT_TOKENIZER, 'wtt_footnoterefs', '\[\d+\]');
39 $this->register(WT_TOKENIZER, 'wtt_bracketlinks', '\[.+?\]');
40 $this->register(WT_TOKENIZER, 'wtt_urls',
41 "!?\b($AllowedProtocols):[^\s<>\[\]\"'()]*[^\s<>\[\]\"'(),.?]");
43 if (function_exists('wtt_interwikilinks')) {
44 $this->register(WT_TOKENIZER, 'wtt_interwikilinks',
45 pcre_fix_posix_classes("!?(?<![[:alnum:]])") .
46 "$InterWikiLinkRegexp:[^\\s.,;?()]+");
48 $this->register(WT_TOKENIZER, 'wtt_bumpylinks', "!?$WikiNameRegexp");
50 $this->register(WT_SIMPLE_MARKUP, 'wtm_htmlchars');
51 $this->register(WT_SIMPLE_MARKUP, 'wtm_linebreak');
52 $this->register(WT_SIMPLE_MARKUP, 'wtm_bold_italics');
56 function TransformInline ($text) {
57 // The old transform code does funny things with trailing
60 $trfm = new InlineTransform;
61 preg_match('/\s*$/', $text, $m);
63 // This "\n" -> "\r" hackage is to fool the old transform code
64 // into continuing italics across lines.
65 $in = str_replace("\n", "\r", $text);
66 $out = preg_replace('/\s*$/', '', AsXML($trfm->do_transform('', array($in))));
67 $out = str_replace("\r", "\n", $out);
71 if (false && $out != $text) {
72 echo(" IN <pre>'" . htmlspecialchars($text) . "'</pre><br>\n");
73 echo("OUT <pre>'" . htmlspecialchars($out) . "'</pre><br>\n");
75 return new RawXml($out);
79 ////////////////////////////////////////////////////////////////
82 define("BLOCK_NEVER_TIGHTEN", 0);
83 define("BLOCK_NOTIGHTEN_AFTER", 1);
84 define("BLOCK_NOTIGHTEN_BEFORE", 2);
85 define("BLOCK_NOTIGHTEN_EITHER", 3);
94 function parse (&$input, $tighten_mode = BLOCK_NEVER_TIGHTEN) {
97 for ($block = BlockParser::_nextBlock($input); $block; $block = $nextBlock) {
98 while ($nextBlock = BlockParser::_nextBlock($input)) {
99 // Attempt to merge current with following block.
100 if (! $block->merge($nextBlock))
101 break; // can't merge
104 $output = $block->finish($tighten_mode);
106 if (is_array($output))
107 foreach ($output as $x)
110 $content[] = $output;
115 function _nextBlock (&$input) {
116 global $Block_BlockTypes;
121 foreach ($Block_BlockTypes as $type) {
122 if ($m = $input->match($type->_re)) {
123 BlockParser::_debug('>', get_class($type), $input);
126 $block->_followsBreak = $input->atBreak();
127 if (!$block->_parse($input, $m)) {
128 BlockParser::_debug('[', "_parse failed", $input);
131 $block->_preceedsBreak = $input->eatSpace();
132 BlockParser::_debug('<', get_class($type), $input);
137 if ($input->getDepth() == 0) {
138 // We should never get here.
139 //preg_match('/.*/A', substr($this->_text, $this->_pos), $m);// get first line
140 trigger_error("Couldn't match block: '".rawurlencode($m[0])."'", E_USER_NOTICE);
142 //FIXME:$this->_debug("no match");
146 function _debug ($tab, $msg, $input) {
149 $tab = str_repeat($tab, $input->getDepth() + 1);
150 printXML(HTML::div("$tab $msg: at: '",
151 HTML::tt($input->where()),
157 class BlockParser_Match {
158 function BlockParser_Match ($match_data) {
159 $this->_m = $match_data;
162 function getPrefix () {
166 function getMatch ($n = 0) {
167 $text = $this->_m[$n + 2];
168 //if (preg_match('/\n./s', $text)) {
169 $prefix = $this->getPrefix();
170 $text = str_replace("\n$prefix", "\n", $text);
177 class BlockParser_Input {
179 function BlockParser_Input ($text) {
180 $this->_text = $text;
184 // Expand leading tabs.
185 // FIXME: do this better.
187 // We want to ensure the only characters matching \s are ' ' and "\n".
189 $this->_text = preg_replace('/(?![ \n])\s/', ' ', $this->_text);
190 assert(!preg_match('/(?![ \n])\s/', $this->_text));
191 if (!preg_match('/\n$/', $this->_text))
192 $this->_text .= "\n";
194 $this->_set_prefix ('');
195 $this->_atBreak = false;
199 function _set_prefix ($prefix, $next_prefix = false) {
200 if ($next_prefix === false)
201 $next_prefix = $prefix;
203 $this->_prefix = $prefix;
204 $this->_next_prefix = $next_prefix;
206 $this->_regexp_cache = array();
208 $blank = "(:?$prefix)?\s*\n";
209 $this->_blank_pat = "/$blank/A";
210 $this->_eof_pat = "/\\Z|(?!$blank|${prefix}.)/A";
214 return preg_match($this->_eof_pat, substr($this->_text, $this->_pos));
217 function match ($regexp) {
218 $cache = &$this->_regexp_cache;
219 if (!isset($cache[$regexp])) {
220 // Fix up any '^'s in pattern (add our prefix)
221 $re = preg_replace('/(?<! [ [ \\\\ ]) \^ /x',
222 '^' . $this->_next_prefix, $regexp);
224 // Fix any match backreferences (like '\1').
225 $re = preg_replace('/(?<= [^ \\\\ ] [ \\\\ ] )( \\d+ )/ex', "'\\1' + 2", $re);
227 $re = "/(" . $this->_prefix . ")($re)/Am";
228 $cache[$regexp] = $re;
231 $re = $cache[$regexp];
233 if (preg_match($re, substr($this->_text, $this->_pos), $m)) {
234 return new BlockParser_Match($m);
239 function accept ($match) {
240 $text = $match->_m[0];
242 assert(substr($this->_text, $this->_pos, strlen($text)) == $text);
243 $this->_pos += strlen($text);
246 assert(preg_match("/\n$/", $text));
248 if ($this->_next_prefix != $this->_prefix)
249 $this->_set_prefix($this->_next_prefix);
251 $this->_atBreak = false;
256 * Consume blank lines.
258 * @return bool True if any blank lines where comsumed.
260 function eatSpace () {
261 if (preg_match($this->_blank_pat, substr($this->_text, $this->_pos), $m)) {
262 $this->_pos += strlen($m[0]);
263 if ($this->_next_prefix != $this->_prefix)
264 $this->_set_prefix($this->_next_prefix);
265 $this->_atBreak = true;
267 while (preg_match($this->_blank_pat, substr($this->_text, $this->_pos), $m)) {
268 $this->_pos += strlen($m[0]);
272 return $this->_atBreak;
275 function atBreak () {
276 return $this->_atBreak;
279 function getDepth () {
280 return $this->_depth;
285 if (($m = $this->match('.*\n')))
286 return sprintf('[%s]%s', $m->getPrefix(), $m->getMatch());
290 function subBlock ($initial_prefix, $subsequent_prefix = false) {
291 if ($subsequent_prefix === false)
292 $subsequent_prefix = $initial_prefix;
294 return new BlockParser_InputSubBlock ($this, $initial_prefix, $subsequent_prefix);
298 class BlockParser_InputSubBlock extends BlockParser_Input
300 function BlockParser_InputSubBlock (&$block, $initial_prefix, $subsequent_prefix) {
301 $this->_text = &$block->_text;
302 $this->_pos = &$block->_pos;
303 $this->_atBreak = &$block->_atBreak;
305 $this->_depth = $block->_depth + 1;
307 $this->_set_prefix($block->_prefix . $initial_prefix,
308 $block->_next_prefix . $subsequent_prefix);
318 var $_followsBreak = false;
319 var $_preceedsBreak = false;
320 var $_content = array();
323 function _parse (&$input, $match) {
324 trigger_error('pure virtual', E_USER_ERROR);
327 function _pushContent ($c) {
329 $c = func_get_args();
331 $this->_content[] = $x;
334 function isTerminal () {
338 function merge ($followingBlock) {
342 function finish (/*$tighten*/) {
343 return new HtmlElement($this->_tag, $this->_attr, $this->_content);
348 class CompoundBlock extends Block
350 function isTerminal () {
356 class Block_blockquote extends CompoundBlock
358 var $_tag ='blockquote';
360 var $_re = '\ +(?=\S)';
362 function _parse (&$input, $m) {
363 $indent = $m->getMatch();
364 $this->_depth = strlen($indent);
365 $this->_content = BlockParser::parse($input->subBlock($indent),
366 BLOCK_NOTIGHTEN_EITHER);
370 function merge ($nextBlock) {
371 if (get_class($nextBlock) != 'block_blockquote')
373 assert ($nextBlock->_depth < $this->_depth);
375 $content = $nextBlock->_content;
376 array_unshift($content, $this->finish());
377 $this->_content = $content;
382 class Block_list extends CompoundBlock
384 //var $_tag = 'ol' or 'ul';
385 var $_re = '\ {0,4}([*+#]|-(?!-)|o(?=\ ))\ *(?=\S)';
387 function _parse (&$input, $m) {
388 // A list as the first content in a list is not allowed.
391 // Should markup as <ul><li>* Item</li></ul>,
392 // not <ul><li><ul><li>Item</li></ul>/li></ul>.
394 if (preg_match('/[-*o+#;]\s*$/', $m->getPrefix()))
397 $prefix = $m->getMatch();
398 $leader = preg_quote($prefix, '/');
399 $indent = sprintf("\\ {%d}", strlen($prefix));
401 $bullet = $m->getMatch(1);
402 $this->_tag = $bullet == '#' ? 'ol' : 'ul';
404 $text = $input->subBlock($leader, $indent);
405 $content = BlockParser::parse($text, BLOCK_NOTIGHTEN_AFTER);
406 $this->_pushContent(HTML::li(false, $content));
410 function merge ($nextBlock) {
411 if (!isa($nextBlock, 'Block_list') || $this->_tag != $nextBlock->_tag)
414 $this->_pushContent($nextBlock->_content);
420 class Block_dl extends Block_list
423 var $_re = '(\ {0,4})([^\s!].*):\s*?\n(?=(?:\s*^)+(\1\ +)\S)';
424 // 1-------12--------2 3-----3
426 function _parse (&$input, $m) {
427 $term = TransformInline(rtrim($m->getMatch(2)));
428 $indent = $m->getMatch(3);
432 $this->_pushContent(HTML::dt(false, $term),
434 BlockParser::parse($input->subBlock($indent),
435 BLOCK_NOTIGHTEN_AFTER)));
440 class Block_table_dl_row extends HtmlElement
442 function Block_table_dl_row ($defn) {
443 $this->HtmlElement('tr', /*array('valign' => 'top'), */
444 HTML::td(false, $defn));
448 function setWidth ($ncols) {
449 assert ($ncols >= $this->_ncols);
450 if ($ncols <= $this->_ncols)
452 $last_td = &$this->_content[count($this->_content) - 1];
453 $span = max(1, (int)$last_td->getAttr('colspan'));
454 $last_td->setAttr('colspan', $span + $ncols - $this->_ncols);
455 $this->_ncols = $ncols;
458 function setTerm ($term, $rowspan = 1) {
459 if ($term->isEmpty())
462 $th = HTML::th(/*array('align' => 'right'),*/ $term);
464 $th->setAttr('rowspan', $rowspan);
465 $this->unshiftContent($th);
469 class Block_table_dl extends Block_list
472 var $_attr = array('class' => 'wiki-dl-table',
473 'border' => 2, // FIXME: CSS?
478 var $_re = '(\ {0,4})((?![\s!]).*)?[|]\s*?\n(?=(?:\s*^)+(\1\ +)\S)';
479 // 1-------12-----------2 3-----3
481 function _parse (&$input, $m) {
482 $term = TransformInline(rtrim($m->getMatch(2)));
483 $indent = $m->getMatch(3);
486 $defn = BlockParser::parse($input->subBlock($indent),
487 BLOCK_NOTIGHTEN_AFTER);
489 $this->_pushDefinition($term, $defn);
493 function _pushDefinition ($term, $defn) {
494 if (!is_array($defn))
495 $defn = array($defn);
500 foreach ($defn as $item) {
501 if (! isa($item, 'HtmlElement')
502 || $item->getTag() != 'table'
503 || $item->getAttr('class') != 'wiki-dl-table') {
510 $rows[] = new Block_table_dl_row($grp);
515 foreach ($subtable->getContent() as $tr) {
522 $rows[] = new Block_table_dl_row($grp);
524 $nrows = count($rows);
525 $rows[0]->setTerm($term, $nrows);
526 $this->_pushContent($rows);
530 $rows = &$this->_content;
533 foreach ($rows as $tr)
534 $ncols = max($ncols, $tr->_ncols);
535 foreach ($rows as $key => $tr)
536 $rows[$key]->setWidth($ncols);
538 return parent::finish();
542 class Block_oldlists extends Block_list
544 //var $_tag = 'ol', 'ul', or 'dl';
545 var $_re = '(?:([*#])|;(.*):).*?(?=\S)';
548 function _parse (&$input, $m) {
549 if (!preg_match('/[*#;]*$/A', $m->getPrefix()))
552 $prefix = $m->getMatch();
554 $leader = preg_quote($prefix, '/');
556 $oldindent = '[*#;](?=[#*]|;.*:.*?\S)';
557 $newindent = sprintf('\\ {%d}', strlen($prefix));
558 $indent = "(?:$oldindent|$newindent)";
560 $bullet = $m->getMatch(1);
562 $this->_tag = $bullet == '*' ? 'ul' : 'ol';
567 $term = trim($m->getMatch(2));
569 $this->_pushContent(HTML::dt(false, TransformInline($term)));
573 $item->pushContent(BlockParser::parse($input->subBlock($leader, $indent),
574 BLOCK_NOTIGHTEN_AFTER));
575 $this->_pushContent($item);
580 class Block_pre extends Block
583 var $_re = '<(pre|verbatim)>(.*?(?:\s*\n^.*?)*?)(?<!~)<\/\1>\s*?\n';
584 // 1------------1 2------------------2
586 function _parse (&$input, $m) {
589 $text = $m->getMatch(2);
590 $tag = $m->getMatch(1);
593 $text = TransformInline($text);
595 $this->_pushContent($text);
600 class Block_plugin extends Block
603 var $_attr = array('class' => 'plugin');
604 var $_re = '<\?plugin(?:-form)?.*?(?:\n^.*?)*?(?<!~)\?>\s*?\n';
606 function _parse (&$input, $m) {
608 $loader = new WikiPluginLoader;
610 $this->_pushContent($loader->expandPI($m->getMatch(), $request));
615 class Block_hr extends Block
618 var $_re = '-{4,}\s*?\n';
620 function _parse (&$input, $m) {
626 class Block_heading extends Block
628 var $_re = '(!{1,3})(.*)\n';
630 function _parse (&$input, $m) {
632 $this->_tag = "h" . (5 - strlen($m->getMatch(1)));
633 $this->_pushContent(TransformInline(trim($m->getMatch(2))));
638 class Block_p extends Block
643 function _parse (&$input, $m) {
644 $this->_text = $m->getMatch();
649 function merge ($nextBlock) {
650 if ($this->_preceedsBreak || get_class($nextBlock) != 'block_p')
653 $this->_text .= $nextBlock->_text;
654 $this->_preceedsBreak = $nextBlock->_preceedsBreak;
658 function finish ($tighten) {
659 $this->_pushContent(TransformInline(trim($this->_text)));
661 if ($this->_followsBreak && ($tighten & BLOCK_NOTIGHTEN_AFTER) != 0)
663 elseif ($this->_preceedsBreak && ($tighten & BLOCK_NOTIGHTEN_BEFORE) != 0)
666 return $tighten ? $this->_content : parent::finish();
670 class Block_email_blockquote extends CompoundBlock
672 // FIXME: move CSS to CSS.
673 var $_tag ='blockquote';
674 var $_attr = array('style' => 'border-left-width: medium; border-left-color: #0f0; border-left-style: ridge; padding-left: 1em; margin-left: 0em; margin-right: 0em;');
678 function _parse (&$input, $m) {
679 $prefix = $m->getMatch();
680 $indent = "(?:$prefix|>(?=\s*?\n))";
681 $this->_content = BlockParser::parse($input->subBlock($indent),
682 BLOCK_NOTIGHTEN_EITHER);
687 ////////////////////////////////////////////////////////////////
692 $GLOBALS['Block_BlockTypes'] = array(new Block_oldlists,
696 new Block_blockquote,
700 new Block_email_blockquote,
704 // FIXME: This is temporary, too...
705 function NewTransform ($text) {
709 // Expand leading tabs.
710 // FIXME: do this better. also move it...
711 $text = preg_replace('/^\ *[^\ \S\n][^\S\n]*/me', "str_repeat(' ', strlen('\\0'))", $text);
712 assert(!preg_match('/^\ *\t/', $text));
714 $input = new BlockParser_Input($text);
715 return BlockParser::parse($input);
720 function TransformRevision ($revision) {
721 if ($revision->get('markup') == 'new') {
722 return NewTransform($revision->getPackedContent());
725 return do_transform($revision->getContent());
730 // (c-file-style: "gnu")
735 // c-hanging-comment-ender-p: nil
736 // indent-tabs-mode: nil