1 <?php rcs_id('$Id: BlockParser.php,v 1.22 2002-02-08 03:01:11 dairiki Exp $');
2 /* Copyright (C) 2002, Geoffrey T. Dairiki <dairiki@dairiki.org>
4 * This file is part of PhpWiki.
6 * PhpWiki is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
11 * PhpWiki is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
16 * You should have received a copy of the GNU General Public License
17 * along with PhpWiki; if not, write to the Free Software
18 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
20 require_once('lib/HtmlElement.php');
21 require_once('lib/InlineParser.php');
23 ////////////////////////////////////////////////////////////////
33 // FIXME: unify this with the RegexpSet in InlinePArser.
36 * Return type from RegexpSet::match and RegexpSet::nextMatch.
40 class AnchoredRegexpSet_match {
47 * The text following the matched text.
52 * Index of the regular expression which matched.
58 * A set of regular expressions.
60 * This class is probably only useful for InlineTransformer.
62 class AnchoredRegexpSet
66 * @param $regexps array A list of regular expressions. The
67 * regular expressions should not include any sub-pattern groups
68 * "(...)". (Anonymous groups, like "(?:...)", as well as
69 * look-ahead and look-behind assertions are fine.)
71 function AnchoredRegexpSet ($regexps) {
72 $this->_regexps = $regexps;
73 $this->_re = "/((" . join(")|(", $regexps) . "))/Ax";
77 * Search text for the next matching regexp from the Regexp Set.
79 * @param $text string The text to search.
81 * @return object A RegexpSet_match object, or false if no match.
83 function match ($text) {
84 if (! preg_match($this->_re, $text, $m)) {
88 $match = new AnchoredRegexpSet_match;
89 $match->postmatch = substr($text, strlen($m[0]));
90 $match->match = $m[1];
91 $match->regexp_ind = count($m) - 3;
96 * Search for next matching regexp.
98 * Here, 'next' has two meanings:
100 * Match the next regexp(s) in the set, at the same position as the last match.
102 * If that fails, match the whole RegexpSet, starting after the position of the
105 * @param $text string Text to search.
107 * @param $prevMatch A RegexpSet_match object
109 * $prevMatch should be a match object obtained by a previous
110 * match upon the same value of $text.
112 * @return object A RegexpSet_match object, or false if no match.
114 function nextMatch ($text, $prevMatch) {
115 // Try to find match at same position.
116 $regexps = array_slice($this->_regexps, $prevMatch->regexp_ind + 1);
121 $pat= "/ ( (" . join(')|(', $regexps) . ") ) /Axs";
123 if (! preg_match($pat, $text, $m)) {
127 $match = new AnchoredRegexpSet_match;
128 $match->postmatch = substr($text, strlen($m[0]));
129 $match->match = $m[1];
130 $match->regexp_ind = count($m) - 3 + $prevMatch->regexp_ind + 1;;
137 class BlockParser_Input {
139 function BlockParser_Input ($text) {
141 // Expand leading tabs.
142 // FIXME: do this better.
144 // We want to ensure the only characters matching \s are ' ' and "\n".
146 $text = preg_replace('/(?![ \n])\s/', ' ', $text);
147 assert(!preg_match('/(?![ \n])\s/', $text));
149 $this->_lines = preg_split('/[^\S\n]*\n/', $text);
153 function currentLine () {
154 if ($this->_pos >= count($this->_lines))
156 return $this->_lines[$this->_pos];
159 function nextLine () {
161 if ($this->_pos >= count($this->_lines))
163 return $this->_lines[$this->_pos];
166 function advance () {
174 function setPos ($pos) {
178 function getPrefix () {
182 function getDepth () {
187 if ($this->_pos < count($this->_lines))
188 return $this->_lines[$this->_pos];
195 function _debug ($tab, $msg) {
197 $where = $this->where();
198 $tab = str_repeat('____', $this->getDepth() ) . $tab;
199 printXML(HTML::div("$tab $msg: at: '",
205 class BlockParser_InputSubBlock extends BlockParser_Input
207 function BlockParser_InputSubBlock (&$input, $prefix_re, $initial_prefix = false) {
208 $this->_input = &$input;
209 $this->_prefix_pat = "/$prefix_re|\\s*\$/Ax";
210 $this->_prefix = $initial_prefix;
213 $this->_block_types = &$input->_block_types;
214 $this->_regexpset = &$input->_regexpset;
217 function currentLine () {
218 if (($line = $this->_input->currentLine()) === false)
220 if ($this->_prefix === false) {
221 if (!preg_match($this->_prefix_pat, $line, $m))
223 $this->_prefix = $m[0];
225 return (string) substr($line, strlen($this->_prefix));
228 function nextLine () {
229 if (($line = $this->_input->nextLine()) === false) {
230 $this->_prefix = false;
233 if (!preg_match($this->_prefix_pat, $line, $m)) {
234 $this->_prefix = false;
237 $this->_prefix = $m[0];
238 return (string) substr($line, strlen($this->_prefix));
241 function advance () {
242 $this->_prefix = false;
243 $this->_input->advance();
247 return array($this->_prefix, $this->_input->getPos());
250 function setPos ($pos) {
251 $this->_prefix = $pos[0];
252 $this->_input->setPos($pos[1]);
255 function getPrefix () {
256 assert ($this->_prefix !== false);
257 return $this->_input->getPrefix() . $this->_prefix;
260 function getDepth () {
261 return $this->_input->getDepth() + 1;
265 return $this->_input->where();
271 class Tightenable extends HtmlElement {
272 var $_isTight = false;
274 function Tightenable ($tag /*, ...*/) {
275 $this->_init(func_get_args());
278 function tighten () {
279 if (! $this->_isTight) {
280 $content = &$this->_content;
281 for ($i = 0; $i < count($content); $i++) {
282 if (!isa($content[$i], 'Tightenable'))
284 $content[$i]->tighten();
286 $this->_isTight = true;
290 function canTighten () {
291 $content = &$this->_content;
292 for ($i = 0; $i < count($content); $i++) {
293 if (!isa($content[$i], 'Tightenable'))
295 if (!$content[$i]->canTighten())
302 class TightenableParagraph extends Tightenable {
303 function TightenableParagraph (/*...*/) {
305 $this->pushContent(func_get_args());
308 function tighten () {
309 $this->_isTight = true;
312 function canTighten () {
316 function printXML () {
318 return XmlContent::printXML();
320 return parent::printXML();
325 return XmlContent::asXML();
327 return parent::asXML();
331 class ParsedBlock extends Tightenable {
332 var $_isLoose = false;
334 function ParsedBlock (&$input, $tag = 'div', $attr = false) {
335 $this->Tightenable($tag, $attr);
336 $this->_initBlockTypes();
337 $this->_parse($input);
340 function canTighten () {
343 return parent::canTighten();
346 function _parse (&$input) {
347 for ($block = $this->_getBlock($input); $block; $block = $nextBlock) {
348 while ($nextBlock = $this->_getBlock($input)) {
349 // Attempt to merge current with following block.
350 if (! ($merged = $block->merge($nextBlock, $this->_atSpace)) ) {
351 break; // can't merge
355 $this->pushContent($block->finish());
360 function _initBlockTypes () {
361 foreach (array('oldlists', 'list', 'dl', 'table_dl',
362 'blockquote', 'heading', 'hr', 'pre', 'email_blockquote',
365 $class = "Block_$type";
367 $this->_block_types[] = $proto;
368 $this->_regexps[] = $proto->_re;
370 $this->_regexpset = new AnchoredRegexpSet($this->_regexps);
373 function _getLine (&$input) {
374 $line = $input->currentLine();
375 if ( $line || $line === false) {
376 $this->_atSpace = false;
380 $pos = $input->getPos();
381 while ( ($line = $input->nextLine()) !== false ) {
383 $this->_atSpace = true;
384 $this->_isLoose = true;
389 $input->setPos($pos);
393 function _getBlock (&$input) {
394 if (! ($line = $this->_getLine($input)) )
397 $re_set = &$this->_regexpset;
398 for ($m = $re_set->match($line); $m; $m = $re_set->nextMatch($line, $m)) {
399 $block = $this->_block_types[$m->regexp_ind];
400 //$this->_debug('>', get_class($block));
402 if ($block->_match($input, $m)) {
403 //$this->_debug('<', get_class($block));
404 //$block->_follows_space = $this->_atSpace;
407 //$this->_debug('[', "_match failed");
411 //if ($input->getDepth() == 0) {
412 // We should never get here.
413 //preg_match('/.*/A', substr($this->_text, $this->_pos), $m);// get first line
414 trigger_error("Couldn't match block: '$line'", E_USER_NOTICE);
421 class WikiText extends ParsedBlock {
422 function WikiText ($text) {
423 $input = new BlockParser_Input($text);
424 $this->ParsedBlock($input);
428 class SubBlock extends ParsedBlock {
429 function SubBlock (&$input, $indent_re, $initial_indent = false,
430 $tag = 'div', $attr = false) {
431 $subinput = new BlockParser_InputSubBlock($input, $indent_re, $initial_indent);
432 $this->ParsedBlock($subinput, $tag, $attr);
440 function _match (&$input, $match) {
441 trigger_error('pure virtual', E_USER_ERROR);
444 function merge ($followingBlock, $followsSpace) {
449 trigger_error('pure virtual', E_USER_ERROR);
453 class Block_blockquote extends BlockMarkup
457 var $_re = '\ +(?=\S)';
459 function _match (&$input, $m) {
460 $this->_depth = strlen($m->match);
461 $indent = sprintf("\\ {%d}", $this->_depth);
462 $this->_block = new SubBlock($input, $indent, $m->match,
467 function merge ($nextBlock, $followsSpace) {
468 if (get_class($nextBlock) == get_class($this)) {
469 assert ($nextBlock->_depth < $this->_depth);
470 $nextBlock->_block->unshiftContent($this->_block);
477 return $this->_block;
481 class Block_list extends BlockMarkup
483 //var $_tag = 'ol' or 'ul';
485 (?: [+#] | -(?!-) | [o](?=\ )
486 | [*] (?! \S[^*]*(?<=\S)[*](?!\S) )
489 var $_isLoose = false;
490 var $_content = array();
492 function _match (&$input, $m) {
493 // A list as the first content in a list is not allowed.
496 // Should markup as <ul><li>* Item</li></ul>,
497 // not <ul><li><ul><li>Item</li></ul>/li></ul>.
499 if (preg_match('/[*#+-o]/', $input->getPrefix())) {
504 $indent = sprintf("\\ {%d}", strlen($prefix));
506 $bullet = trim($m->match);
507 $this->_tag = $bullet == '#' ? 'ol' : 'ul';
509 $this->_content[] = new SubBlock($input, $indent, $m->match, 'li');
513 function merge ($nextBlock, $followsSpace) {
514 if (isa($nextBlock, 'Block_list') && $this->_tag == $nextBlock->_tag) {
515 array_splice($this->_content, count($this->_content), 0,
516 $nextBlock->_content);
518 $this->_isLoose = true;
525 $list = new Tightenable($this->_tag, false, $this->_content);
526 if (!$this->_isLoose && $list->canTighten())
532 class Block_dl extends Block_list
535 var $_re = '\ {0,4}\S.*:\s*$';
537 function _match (&$input, $m) {
538 if (!($p = $this->_do_match($input, $m)))
540 list ($term, $defn) = $p;
542 $this->_content[] = HTML::dt($term);
543 $this->_content[] = $defn;
547 function _do_match (&$input, $m) {
548 $pos = $input->getPos();
550 $firstIndent = strspn($m->match, ' ');
551 $pat = sprintf('/\ {%d,%d}(?=\s*\S)/A', $firstIndent + 1, $firstIndent + 5);
553 while ( ($line = $input->nextLine()) !== false ) {
554 if (preg_match($pat, $line, $mm)) {
555 $indent = strlen($mm[0]);
559 $input->setPos($pos);
562 return false; // No body found.
564 $input->advance(); // Skip the first line.
566 $term = TransformInline(rtrim(substr(trim($m->match),0,-1)));
567 $defn = new SubBlock($input, sprintf("\\ {%d}", $indent), false, 'dd');
568 return array($term, $defn);
574 class Block_table_dl_defn extends XmlContent
579 function Block_table_dl_defn ($term, $defn) {
581 if (!is_array($defn))
582 $defn = $defn->getContent();
584 $this->_ncols = $this->_ComputeNcols($defn);
587 foreach ($defn as $item) {
588 if ($this->_IsASubtable($item))
589 $this->_addSubtable($item);
591 $this->_addToRow($item);
595 $th = HTML::th($term);
596 if ($this->_nrows > 1)
597 $th->setAttr('rowspan', $this->_nrows);
598 $this->_setTerm($th);
601 function _addToRow ($item) {
602 if (empty($this->_accum)) {
603 $this->_accum = HTML::td();
604 if ($this->_ncols > 2)
605 $this->_accum->setAttr('colspan', $this->_ncols - 1);
607 $this->_accum->pushContent($item);
610 function _flushRow () {
611 if (!empty($this->_accum)) {
612 $this->pushContent(HTML::tr($this->_accum));
613 $this->_accum = false;
618 function _addSubtable ($table) {
620 foreach ($table->getContent() as $subdef) {
621 $this->pushContent($subdef);
622 $this->_nrows += $subdef->nrows();
626 function _setTerm ($th) {
627 $first_row = &$this->_content[0];
628 if (isa($first_row, 'Block_table_dl_defn'))
629 $first_row->_setTerm($th);
631 $first_row->unshiftContent($th);
634 function _ComputeNcols ($defn) {
636 foreach ($defn as $item) {
637 if ($this->_IsASubtable($item)) {
638 $row = $this->_FirstDefn($item);
639 $ncols = max($ncols, $row->ncols() + 1);
645 function _IsASubtable ($item) {
646 return isa($item, 'HtmlElement')
647 && $item->getTag() == 'table'
648 && $item->getAttr('class') == 'wiki-dl-table';
651 function _FirstDefn ($subtable) {
652 $defs = $subtable->getContent();
657 return $this->_ncols;
661 return $this->_nrows;
664 function setWidth ($ncols) {
665 assert($ncols >= $this->_ncols);
666 if ($ncols <= $this->_ncols)
668 $rows = &$this->_content;
669 for ($i = 0; $i < count($rows); $i++) {
671 if (isa($row, 'Block_table_dl_defn'))
672 $row->setWidth($ncols - 1);
674 $n = count($row->_content);
675 $lastcol = &$row->_content[$n - 1];
676 $lastcol->setAttr('colspan', $ncols - 1);
682 class Block_table_dl extends Block_dl
685 var $_attr = array('class' => 'wiki-dl-table',
686 'border' => 2, // FIXME: CSS?
691 var $_re = '\ {0,4} (?:\S.*)? \| \s* $';
693 function _match (&$input, $m) {
694 if (!($p = $this->_do_match($input, $m)))
696 list ($term, $defn) = $p;
698 $this->_content[] = new Block_table_dl_defn($term, $defn);
704 $defs = &$this->_content;
707 foreach ($defs as $defn)
708 $ncols = max($ncols, $defn->ncols());
709 foreach ($defs as $key => $defn)
710 $defs[$key]->setWidth($ncols);
712 return HTML::table(array('class' => 'wiki-dl-table',
713 'border' => 2, // FIXME: CSS?
720 class Block_oldlists extends Block_list
722 //var $_tag = 'ol', 'ul', or 'dl';
723 var $_re = '(?: [*] (?! \S[^*]* (?<=\S) [*](?!\S) )
728 function _match (&$input, $m) {
730 if (!preg_match('/[*#;]*$/A', $input->getPrefix())) {
736 $oldindent = '[*#;](?=[#*]|;.*:.*\S)';
737 $newindent = sprintf('\\ {%d}', strlen($prefix));
738 $indent = "(?:$oldindent|$newindent)";
740 $bullet = $prefix[0];
741 if ($bullet == '*') {
745 elseif ($bullet == '#') {
751 list ($term,) = explode(':', substr($prefix, 1), 2);
754 $this->_content[] = HTML::dt(false, TransformInline($term));
758 $this->_content[] = new SubBlock($input, $indent, $m->match, $itemtag);
763 class Block_pre extends BlockMarkup
765 var $_re = '<(?:pre|verbatim)>';
767 function _match (&$input, $m) {
768 $endtag = '</' . substr($m->match, 1);
770 if (($text = $this->_getBlock($input, $endtag)) === false)
773 if (ltrim($m->postmatch))
774 array_unshift($text, $m->postmatch);
775 $text = join("\n", $text);
777 // FIXME: no <img>, <big>, <small>, <sup>, or <sub>'s allowed
779 if ($m->match == '<pre>')
780 $text = TransformInline($text);
782 $this->_html = HTML::pre(false, $text);
786 function _getBlock (&$input, $end_tag) {
787 $pos = $input->getPos();
790 while ( ($line = $input->nextLine()) !== false ) {
791 if (rtrim($line) == $end_tag) {
797 $input->setPos($pos);
807 class Block_plugin extends Block_pre
809 var $_re = '<\?plugin(?:-form)?(?!\S)';
812 /* <?plugin Backlinks
816 function _match (&$input, $m) {
817 $pos = $input->getPos();
818 $pi = $m->match . $m->postmatch;
819 while (!preg_match('/(?<!~)\?>\s*$/', $pi)) {
820 if (($line = $input->nextLine()) === false) {
821 $input->setPos($pos);
828 $this->_plugin_pi = $pi;
834 $loader = new WikiPluginLoader;
836 return HTML::div(array('class' => 'plugin'),
837 $loader->expandPI($this->_plugin_pi, $request));
841 class Block_email_blockquote extends Block_blockquote
843 // FIXME: move CSS to CSS.
844 var $_attr = array('style' => 'border-left-width: medium; border-left-color: #0f0; border-left-style: ridge; padding-left: 1em; margin-left: 0em; margin-right: 0em;');
848 function _match (&$input, $m) {
849 $indent = str_replace(' ', '\\ ', $m->match);
850 $this->_block = new SubBlock($input, $indent, $m->match,
851 'blockquote', $this->_attr);
856 class Block_hr extends BlockMarkup
858 var $_re = '-{4,}\s*$';
860 function _match (&$input, $m) {
870 class Block_heading extends BlockMarkup
874 function _match (&$input, $m) {
875 $this->_tag = "h" . (5 - strlen($m->match));
876 $this->_text = TransformInline(trim($m->postmatch));
882 return new HtmlElement($this->_tag, false, $this->_text);
886 class Block_p extends BlockMarkup
891 function _match (&$input, $m) {
892 $this->_text = $m->match;
897 function merge ($nextBlock, $followsSpace) {
898 $class = get_class($nextBlock);
899 if ($class == 'block_p' && !$followsSpace) {
900 $this->_text .= "\n" . $nextBlock->_text;
907 $content = TransformInline(trim($this->_text));
908 return new TightenableParagraph($content);
912 ////////////////////////////////////////////////////////////////
917 function TransformText ($text, $markup = 2.0) {
918 if (isa($text, 'WikiDB_PageRevision')) {
920 $text = $rev->getPackedContent();
921 $markup = $rev->get('markup');
924 if (empty($markup) || $markup < 2.0) {
925 include_once("lib/transform.php");
926 return do_transform($text);
927 //$text = ConvertOldMarkup($text);
930 // Expand leading tabs.
931 // FIXME: do this better. also move it...
932 $text = preg_replace('/^\ *[^\ \S\n][^\S\n]*/me', "str_repeat(' ', strlen('\\0'))", $text);
933 assert(!preg_match('/^\ *\t/', $text));
935 $output = new WikiText($text);
940 // (c-file-style: "gnu")
945 // c-hanging-comment-ender-p: nil
946 // indent-tabs-mode: nil