1 <?php rcs_id('$Id: BlockParser.php,v 1.17 2002-01-31 05:05:11 dairiki Exp $');
2 /* Copyright (C) 2002, Geoffrey T. Dairiki <dairiki@dairiki.org>
4 * This file is part of PhpWiki.
6 * PhpWiki is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
11 * PhpWiki is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
16 * You should have received a copy of the GNU General Public License
17 * along with PhpWiki; if not, write to the Free Software
18 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
20 require_once('lib/HtmlElement.php');
21 require_once('lib/InlineParser.php');
23 require_once('lib/transform.php');
25 ////////////////////////////////////////////////////////////////
28 define("BLOCK_NEVER_TIGHTEN", 0);
29 define("BLOCK_NOTIGHTEN_AFTER", 1);
30 define("BLOCK_NOTIGHTEN_BEFORE", 2);
31 define("BLOCK_NOTIGHTEN_EITHER", 3);
40 function parse (&$input, $tighten_mode = BLOCK_NEVER_TIGHTEN) {
43 for ($block = BlockParser::_nextBlock($input); $block; $block = $nextBlock) {
44 while ($nextBlock = BlockParser::_nextBlock($input)) {
45 // Attempt to merge current with following block.
46 if (! $block->merge($nextBlock))
50 $content->pushContent($block->finish($tighten_mode));
55 function _nextBlock (&$input) {
56 global $Block_BlockTypes;
61 foreach ($Block_BlockTypes as $type) {
62 if ($m = $input->match($type->_re)) {
63 BlockParser::_debug('>', get_class($type), $input);
66 $block->_followsBreak = $input->atBreak();
67 if (!$block->_parse($input, $m)) {
68 BlockParser::_debug('[', "_parse failed", $input);
71 $block->_preceedsBreak = $input->eatSpace();
72 BlockParser::_debug('<', get_class($type), $input);
77 if ($input->getDepth() == 0) {
78 // We should never get here.
79 //preg_match('/.*/A', substr($this->_text, $this->_pos), $m);// get first line
80 trigger_error("Couldn't match block: '".rawurlencode($m[0])."'", E_USER_NOTICE);
82 //FIXME:$this->_debug("no match");
86 function _debug ($tab, $msg, $input) {
89 $tab = str_repeat($tab, $input->getDepth() + 1);
90 printXML(HTML::div("$tab $msg: at: '",
91 HTML::tt($input->where()),
97 class BlockParser_Match {
98 function BlockParser_Match ($match_data) {
99 $this->_m = $match_data;
102 function getPrefix () {
106 function getMatch ($n = 0) {
107 $text = $this->_m[$n + 2];
108 //if (preg_match('/\n./s', $text)) {
109 $prefix = $this->getPrefix();
110 $text = str_replace("\n$prefix", "\n", $text);
117 class BlockParser_Input {
119 function BlockParser_Input ($text) {
120 $this->_text = $text;
124 // Expand leading tabs.
125 // FIXME: do this better.
127 // We want to ensure the only characters matching \s are ' ' and "\n".
129 $this->_text = preg_replace('/(?![ \n])\s/', ' ', $this->_text);
130 assert(!preg_match('/(?![ \n])\s/', $this->_text));
131 if (!preg_match('/\n$/', $this->_text))
132 $this->_text .= "\n";
134 $this->_set_prefix ('');
135 $this->_atBreak = false;
139 function _set_prefix ($prefix, $next_prefix = false) {
140 if ($next_prefix === false)
141 $next_prefix = $prefix;
143 $this->_prefix = $prefix;
144 $this->_next_prefix = $next_prefix;
146 $this->_regexp_cache = array();
148 $blank = "(:?$prefix)?\s*\n";
149 $this->_blank_pat = "/$blank/A";
150 $this->_eof_pat = "/\\Z|(?!$blank|${prefix}.)/A";
154 return preg_match($this->_eof_pat, substr($this->_text, $this->_pos));
157 function match ($regexp) {
158 $cache = &$this->_regexp_cache;
159 if (!isset($cache[$regexp])) {
160 // Fix up any '^'s in pattern (add our prefix)
161 $re = preg_replace('/(?<! [ [ \\\\ ]) \^ /x',
162 '^' . $this->_next_prefix, $regexp);
164 // Fix any match backreferences (like '\1').
165 $re = preg_replace('/(?<= [^ \\\\ ] [ \\\\ ] )( \\d+ )/ex', "'\\1' + 2", $re);
167 $re = "/(" . $this->_prefix . ")($re)/Am";
168 $cache[$regexp] = $re;
171 $re = $cache[$regexp];
173 if (preg_match($re, substr($this->_text, $this->_pos), $m)) {
174 return new BlockParser_Match($m);
179 function accept ($match) {
180 $text = $match->_m[0];
182 assert(substr($this->_text, $this->_pos, strlen($text)) == $text);
183 $this->_pos += strlen($text);
186 assert(preg_match("/\n$/", $text));
188 if ($this->_next_prefix != $this->_prefix)
189 $this->_set_prefix($this->_next_prefix);
191 $this->_atBreak = false;
196 * Consume blank lines.
198 * @return bool True if any blank lines where comsumed.
200 function eatSpace () {
201 if (preg_match($this->_blank_pat, substr($this->_text, $this->_pos), $m)) {
202 $this->_pos += strlen($m[0]);
203 if ($this->_next_prefix != $this->_prefix)
204 $this->_set_prefix($this->_next_prefix);
205 $this->_atBreak = true;
207 while (preg_match($this->_blank_pat, substr($this->_text, $this->_pos), $m)) {
208 $this->_pos += strlen($m[0]);
212 return $this->_atBreak;
215 function atBreak () {
216 return $this->_atBreak;
219 function getDepth () {
220 return $this->_depth;
225 if (($m = $this->match('.*\n')))
226 return sprintf('[%s]%s', $m->getPrefix(), $m->getMatch());
230 function subBlock ($initial_prefix, $subsequent_prefix = false) {
231 if ($subsequent_prefix === false)
232 $subsequent_prefix = $initial_prefix;
234 return new BlockParser_InputSubBlock ($this, $initial_prefix, $subsequent_prefix);
238 class BlockParser_InputSubBlock extends BlockParser_Input
240 function BlockParser_InputSubBlock (&$block, $initial_prefix, $subsequent_prefix) {
241 $this->_text = &$block->_text;
242 $this->_pos = &$block->_pos;
243 $this->_atBreak = &$block->_atBreak;
245 $this->_depth = $block->_depth + 1;
247 $this->_set_prefix($block->_prefix . $initial_prefix,
248 $block->_next_prefix . $subsequent_prefix);
258 var $_followsBreak = false;
259 var $_preceedsBreak = false;
260 var $_content = array();
263 function _parse (&$input, $match) {
264 trigger_error('pure virtual', E_USER_ERROR);
267 function _pushContent ($c) {
269 $c = func_get_args();
271 $this->_content[] = $x;
274 function isTerminal () {
278 function merge ($followingBlock) {
282 function finish (/*$tighten*/) {
283 return new HtmlElement($this->_tag, $this->_attr, $this->_content);
288 class CompoundBlock extends Block
290 function isTerminal () {
296 class Block_blockquote extends CompoundBlock
298 var $_tag ='blockquote';
300 var $_re = '\ +(?=\S)';
302 function _parse (&$input, $m) {
303 $indent = $m->getMatch();
304 $this->_depth = strlen($indent);
305 $this->_content[] = BlockParser::parse($input->subBlock($indent),
306 BLOCK_NOTIGHTEN_EITHER);
310 function merge ($nextBlock) {
311 if (get_class($nextBlock) != 'block_blockquote')
313 assert ($nextBlock->_depth < $this->_depth);
315 $content = $nextBlock->_content;
316 array_unshift($content, $this->finish());
317 $this->_content = $content;
322 class Block_list extends CompoundBlock
324 //var $_tag = 'ol' or 'ul';
325 var $_re = '\ {0,4}([+#]|-(?!-)|[o](?=\ )|[*](?!\S[^*]*(?<=\S)[*](?!\S)))\ *(?=\S)';
327 function _parse (&$input, $m) {
328 // A list as the first content in a list is not allowed.
331 // Should markup as <ul><li>* Item</li></ul>,
332 // not <ul><li><ul><li>Item</li></ul>/li></ul>.
334 if (preg_match('/[-*o+#;]\s*$/', $m->getPrefix()))
337 $prefix = $m->getMatch();
338 $leader = preg_quote($prefix, '/');
339 $indent = sprintf("\\ {%d}", strlen($prefix));
341 $bullet = $m->getMatch(1);
342 $this->_tag = $bullet == '#' ? 'ol' : 'ul';
344 $text = $input->subBlock($leader, $indent);
345 $content = BlockParser::parse($text, BLOCK_NOTIGHTEN_AFTER);
346 $this->_pushContent(HTML::li(false, $content));
350 function merge ($nextBlock) {
351 if (!isa($nextBlock, 'Block_list') || $this->_tag != $nextBlock->_tag)
354 $this->_pushContent($nextBlock->_content);
359 class Block_dl extends Block_list
362 var $_re = '(\ {0,4})([^\s!].*):\s*?\n(?=(?:\s*^)+(\1\ +)\S)';
363 // 1-------12--------2 3-----3
365 function _parse (&$input, $m) {
366 $term = TransformInline(rtrim($m->getMatch(2)));
367 $indent = $m->getMatch(3);
371 $this->_pushContent(HTML::dt(false, $term),
373 BlockParser::parse($input->subBlock($indent),
374 BLOCK_NOTIGHTEN_AFTER)));
381 class Block_table_dl_defn extends XmlContent
386 function Block_table_dl_defn ($term, $defn) {
388 if (!is_array($defn))
389 $defn = $defn->getContent();
391 $this->_ncols = $this->_ComputeNcols($defn);
394 foreach ($defn as $item) {
395 if ($this->_IsASubtable($item))
396 $this->_addSubtable($item);
398 $this->_addToRow($item);
402 $th = HTML::th($term);
403 if ($this->_nrows > 1)
404 $th->setAttr('rowspan', $this->_nrows);
405 $this->_setTerm($th);
408 function _addToRow ($item) {
409 if (empty($this->_accum)) {
410 $this->_accum = HTML::td();
411 if ($this->_ncols > 2)
412 $this->_accum->setAttr('colspan', $this->_ncols - 1);
414 $this->_accum->pushContent($item);
417 function _flushRow () {
418 if (!empty($this->_accum)) {
419 $this->pushContent(HTML::tr($this->_accum));
420 $this->_accum = false;
425 function _addSubtable ($table) {
427 foreach ($table->getContent() as $subdef) {
428 $this->pushContent($subdef);
429 $this->_nrows += $subdef->nrows();
433 function _setTerm ($th) {
434 $first_row = &$this->_content[0];
435 if (isa($first_row, 'Block_table_dl_defn'))
436 $first_row->_setTerm($th);
438 $first_row->unshiftContent($th);
441 function _ComputeNcols ($defn) {
443 foreach ($defn as $item) {
444 if ($this->_IsASubtable($item)) {
445 $row = $this->_FirstDefn($item);
446 $ncols = max($ncols, $row->ncols() + 1);
452 function _IsASubtable ($item) {
453 return isa($item, 'HtmlElement')
454 && $item->getTag() == 'table'
455 && $item->getAttr('class') == 'wiki-dl-table';
458 function _FirstDefn ($subtable) {
459 $defs = $subtable->getContent();
464 return $this->_ncols;
468 return $this->_nrows;
471 function setWidth ($ncols) {
472 assert($ncols >= $this->_ncols);
473 if ($ncols <= $this->_ncols)
475 $rows = &$this->_content;
476 for ($i = 0; $i < count($rows); $i++) {
478 if (isa($row, 'Block_table_dl_defn'))
479 $row->setWidth($ncols - 1);
481 $n = count($row->_content);
482 $lastcol = &$row->_content[$n - 1];
483 $lastcol->setAttr('colspan', $ncols - 1);
489 class Block_table_dl extends Block_list
492 var $_attr = array('class' => 'wiki-dl-table',
493 'border' => 2, // FIXME: CSS?
498 var $_re = '(\ {0,4})((?![\s!]).*)?[|]\s*?\n(?=(?:\s*^)+(\1\ +)\S)';
499 // 1-------12-----------2 3-----3
501 function _parse (&$input, $m) {
502 $term = TransformInline(rtrim($m->getMatch(2)));
503 $indent = $m->getMatch(3);
506 $defn = BlockParser::parse($input->subBlock($indent),
507 BLOCK_NOTIGHTEN_AFTER);
509 $this->_pushContent(new Block_table_dl_defn($term, $defn));
514 $defs = &$this->_content;
517 foreach ($defs as $defn)
518 $ncols = max($ncols, $defn->ncols());
519 foreach ($defs as $key => $defn)
520 $defs[$key]->setWidth($ncols);
522 return parent::finish();
526 class Block_oldlists extends Block_list
528 //var $_tag = 'ol', 'ul', or 'dl';
529 var $_re = '(?:([*](?!\S[^*]*(?<=\S)[*](?!\S))|[#])|;(.*):).*?(?=\S)';
530 // 1------------------------------1 2--2
532 function _parse (&$input, $m) {
533 if (!preg_match('/[*#;]*$/A', $m->getPrefix()))
536 $prefix = $m->getMatch();
538 $leader = preg_quote($prefix, '/');
540 $oldindent = '[*#;](?=[#*]|;.*:.*?\S)';
541 $newindent = sprintf('\\ {%d}', strlen($prefix));
542 $indent = "(?:$oldindent|$newindent)";
544 $bullet = $m->getMatch(1);
546 $this->_tag = $bullet == '*' ? 'ul' : 'ol';
551 $term = trim($m->getMatch(2));
553 $this->_pushContent(HTML::dt(false, TransformInline($term)));
557 $item->pushContent(BlockParser::parse($input->subBlock($leader, $indent),
558 BLOCK_NOTIGHTEN_AFTER));
559 $this->_pushContent($item);
564 class Block_pre extends Block
567 var $_re = '<(pre|verbatim)>(.*?(?:\s*\n^.*?)*?)(?<!~)<\/\1>\s*?\n';
568 // 1------------1 2------------------2
570 function _parse (&$input, $m) {
573 $text = $m->getMatch(2);
574 $tag = $m->getMatch(1);
576 // FIXME: no <img>, <big>, <small>, <sup>, or <sub>'s allowed
579 $text = TransformInline($text);
581 $this->_pushContent($text);
586 class Block_plugin extends Block
589 var $_attr = array('class' => 'plugin');
590 var $_re = '<\?plugin(?:-form)?.*?(?:\n^.*?)*?(?<!~)\?>\s*?\n';
592 function _parse (&$input, $m) {
594 $loader = new WikiPluginLoader;
596 $this->_pushContent($loader->expandPI($m->getMatch(), $request));
601 class Block_hr extends Block
604 var $_re = '-{4,}\s*?\n';
606 function _parse (&$input, $m) {
612 class Block_heading extends Block
614 var $_re = '(!{1,3})(.*)\n';
616 function _parse (&$input, $m) {
618 $this->_tag = "h" . (5 - strlen($m->getMatch(1)));
619 $this->_pushContent(TransformInline(trim($m->getMatch(2))));
624 class Block_p extends Block
629 function _parse (&$input, $m) {
630 $this->_text = $m->getMatch();
635 function merge ($nextBlock) {
636 if ($this->_preceedsBreak || get_class($nextBlock) != 'block_p')
639 $this->_text .= $nextBlock->_text;
640 $this->_preceedsBreak = $nextBlock->_preceedsBreak;
644 function finish ($tighten) {
645 $this->_pushContent(TransformInline(trim($this->_text)));
647 if ($this->_followsBreak && ($tighten & BLOCK_NOTIGHTEN_AFTER) != 0)
649 elseif ($this->_preceedsBreak && ($tighten & BLOCK_NOTIGHTEN_BEFORE) != 0)
652 return $tighten ? $this->_content : parent::finish();
656 class Block_email_blockquote extends CompoundBlock
658 // FIXME: move CSS to CSS.
659 var $_tag ='blockquote';
660 var $_attr = array('style' => 'border-left-width: medium; border-left-color: #0f0; border-left-style: ridge; padding-left: 1em; margin-left: 0em; margin-right: 0em;');
664 function _parse (&$input, $m) {
665 $prefix = $m->getMatch();
666 $indent = "(?:$prefix|>(?=\s*?\n))";
667 $this->_content[] = BlockParser::parse($input->subBlock($indent),
668 BLOCK_NOTIGHTEN_EITHER);
673 ////////////////////////////////////////////////////////////////
678 $GLOBALS['Block_BlockTypes'] = array(new Block_oldlists,
682 new Block_blockquote,
686 new Block_email_blockquote,
690 // FIXME: This is temporary, too...
691 function NewTransform ($text) {
695 // Expand leading tabs.
696 // FIXME: do this better. also move it...
697 $text = preg_replace('/^\ *[^\ \S\n][^\S\n]*/me', "str_repeat(' ', strlen('\\0'))", $text);
698 assert(!preg_match('/^\ *\t/', $text));
700 $input = new BlockParser_Input($text);
701 return BlockParser::parse($input);
706 function TransformRevision ($revision) {
707 if ($revision->get('markup') == 'new') {
708 return NewTransform($revision->getPackedContent());
711 return do_transform($revision->getContent());
716 // (c-file-style: "gnu")
721 // c-hanging-comment-ender-p: nil
722 // indent-tabs-mode: nil