1 <?php rcs_id('$Id: BlockParser.php,v 1.28 2002-11-01 16:49:29 dairiki Exp $');
2 /* Copyright (C) 2002, Geoffrey T. Dairiki <dairiki@dairiki.org>
4 * This file is part of PhpWiki.
6 * PhpWiki is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
11 * PhpWiki is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
16 * You should have received a copy of the GNU General Public License
17 * along with PhpWiki; if not, write to the Free Software
18 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
20 require_once('lib/HtmlElement.php');
21 require_once('lib/InlineParser.php');
23 ////////////////////////////////////////////////////////////////
33 // FIXME: unify this with the RegexpSet in InlinePArser.
36 * Return type from RegexpSet::match and RegexpSet::nextMatch.
40 class AnchoredRegexpSet_match {
47 * The text following the matched text.
52 * Index of the regular expression which matched.
58 * A set of regular expressions.
60 * This class is probably only useful for InlineTransformer.
62 class AnchoredRegexpSet
66 * @param $regexps array A list of regular expressions. The
67 * regular expressions should not include any sub-pattern groups
68 * "(...)". (Anonymous groups, like "(?:...)", as well as
69 * look-ahead and look-behind assertions are fine.)
71 function AnchoredRegexpSet ($regexps) {
72 $this->_regexps = $regexps;
73 $this->_re = "/((" . join(")|(", $regexps) . "))/Ax";
77 * Search text for the next matching regexp from the Regexp Set.
79 * @param $text string The text to search.
81 * @return object A RegexpSet_match object, or false if no match.
83 function match ($text) {
84 if (! preg_match($this->_re, $text, $m)) {
88 $match = new AnchoredRegexpSet_match;
89 $match->postmatch = substr($text, strlen($m[0]));
90 $match->match = $m[1];
91 $match->regexp_ind = count($m) - 3;
96 * Search for next matching regexp.
98 * Here, 'next' has two meanings:
100 * Match the next regexp(s) in the set, at the same position as the last match.
102 * If that fails, match the whole RegexpSet, starting after the position of the
105 * @param $text string Text to search.
107 * @param $prevMatch A RegexpSet_match object
109 * $prevMatch should be a match object obtained by a previous
110 * match upon the same value of $text.
112 * @return object A RegexpSet_match object, or false if no match.
114 function nextMatch ($text, $prevMatch) {
115 // Try to find match at same position.
116 $regexps = array_slice($this->_regexps, $prevMatch->regexp_ind + 1);
121 $pat= "/ ( (" . join(')|(', $regexps) . ") ) /Axs";
123 if (! preg_match($pat, $text, $m)) {
127 $match = new AnchoredRegexpSet_match;
128 $match->postmatch = substr($text, strlen($m[0]));
129 $match->match = $m[1];
130 $match->regexp_ind = count($m) - 3 + $prevMatch->regexp_ind + 1;;
137 class BlockParser_Input {
139 function BlockParser_Input ($text) {
141 // Expand leading tabs.
142 // FIXME: do this better.
144 // We want to ensure the only characters matching \s are ' ' and "\n".
146 $text = preg_replace('/(?![ \n])\s/', ' ', $text);
147 assert(!preg_match('/(?![ \n])\s/', $text));
149 $this->_lines = preg_split('/[^\S\n]*\n/', $text);
151 $this->_atSpace = false;
155 function skipSpace () {
156 while ($this->_pos < count($this->_lines)) {
157 if ($this->_lines[$this->_pos] != '')
160 $this->_atSpace = true;
162 return $this->_atSpace;
165 function currentLine () {
166 if ($this->_pos >= count($this->_lines)) {
169 return $this->_lines[$this->_pos];
172 function nextLine () {
173 $this->_atSpace = $this->_lines[$this->_pos++] === '';
174 if ($this->_pos >= count($this->_lines)) {
177 return $this->_lines[$this->_pos];
180 function advance () {
181 $this->_atSpace = $this->_lines[$this->_pos++] === '';
185 return array($this->_pos, $this->_atSpace);
188 function setPos ($pos) {
189 list($this->_pos, $this->_atSpace) = $pos;
192 function getPrefix () {
196 function getDepth () {
201 if ($this->_pos < count($this->_lines))
202 return $this->_lines[$this->_pos];
207 function _debug ($tab, $msg) {
209 $where = $this->where();
210 $tab = str_repeat('____', $this->getDepth() ) . $tab;
211 printXML(HTML::div("$tab $msg: at: '",
217 class BlockParser_InputSubBlock extends BlockParser_Input
219 function BlockParser_InputSubBlock (&$input, $prefix_re, $initial_prefix = false) {
220 $this->_input = &$input;
221 $this->_prefix_pat = "/$prefix_re|\\s*\$/Ax";
222 $this->_atSpace = false;
224 if (($line = $input->currentLine()) === false)
225 $this->_line = false;
226 elseif ($initial_prefix) {
227 assert(substr($line, 0, strlen($initial_prefix)) == $initial_prefix);
228 $this->_line = (string) substr($line, strlen($initial_prefix));
230 elseif (preg_match($this->_prefix_pat, $line, $m))
231 $this->_line = (string) substr($line, strlen($m[0]));
233 $this->_line = false;
236 function skipSpace () {
237 while ($this->_line === '') {
240 return $this->_atSpace;
243 function currentLine () {
247 function nextLine () {
248 $this->_atSpace = $this->_line === '';
249 $line = $this->_input->nextLine();
250 if ($line !== false && preg_match($this->_prefix_pat, $line, $m))
251 $this->_line = (string) substr($line, strlen($m[0]));
253 $this->_line = false;
257 function advance () {
258 $this->_atSpace = $this->_line === '';
259 $line = $this->_input->nextLine();
260 if ($line !== false && preg_match($this->_prefix_pat, $line, $m))
261 $this->_line = (string) substr($line, strlen($m[0]));
263 $this->_line = false;
267 return array($this->_line, $this->_atSpace, $this->_input->getPos());
270 function setPos ($pos) {
271 $this->_line = $pos[0];
272 $this->_atSpace = $pos[1];
273 $this->_input->setPos($pos[2]);
276 function getPrefix () {
277 assert ($this->_line !== false);
278 $line = $this->_input->currentLine();
279 assert ($line !== false && strlen($line) >= strlen($this->_line));
280 return substr($line, 0, strlen($line) - strlen($this->_line));
283 function getDepth () {
284 return $this->_input->getDepth() + 1;
288 return $this->_input->where();
294 class Tightenable extends HtmlElement {
295 var $_isTight = false;
297 function Tightenable ($tag /*, ...*/) {
298 $this->_init(func_get_args());
301 function tighten () {
302 if (! $this->_isTight) {
303 $content = &$this->_content;
304 for ($i = 0; $i < count($content); $i++) {
305 if (!isa($content[$i], 'Tightenable'))
307 $content[$i]->tighten();
309 $this->_isTight = true;
313 function canTighten () {
314 $content = &$this->_content;
315 for ($i = 0; $i < count($content); $i++) {
316 if (!isa($content[$i], 'Tightenable'))
318 if (!$content[$i]->canTighten())
325 class TightenableParagraph extends Tightenable {
326 function TightenableParagraph (/*...*/) {
328 $this->pushContent(func_get_args());
331 function tighten () {
332 $this->_isTight = true;
335 function canTighten () {
340 function printXML () {
342 return XmlContent::printXML();
344 return parent::printXML();
349 return XmlContent::asXML();
351 return parent::asXML();
355 class ParsedBlock extends Tightenable {
356 var $_isLoose = false;
358 function ParsedBlock (&$input, $tag = 'div', $attr = false) {
359 $this->Tightenable($tag, $attr);
360 $this->_initBlockTypes();
361 $this->_parse($input);
364 function canTighten () {
367 return parent::canTighten();
370 function _parse (&$input) {
371 for ($block = $this->_getBlock($input); $block; $block = $nextBlock) {
372 while ($nextBlock = $this->_getBlock($input)) {
373 // Attempt to merge current with following block.
374 if (! ($merged = $block->merge($nextBlock, $this->_atSpace)) ) {
375 break; // can't merge
379 $this->pushContent($block->finish());
384 function _initBlockTypes () {
385 foreach (array('oldlists', 'list', 'dl', 'table_dl',
386 'blockquote', 'heading', 'hr', 'pre', 'email_blockquote',
389 $class = "Block_$type";
391 $this->_block_types[] = $proto;
392 $this->_regexps[] = $proto->_re;
394 $this->_regexpset = new AnchoredRegexpSet($this->_regexps);
397 function _getBlock (&$input) {
398 $this->_atSpace = $input->skipSpace();
400 if (! ($line = $input->currentLine()) )
404 $this->_isLoose = true;
406 $re_set = &$this->_regexpset;
407 for ($m = $re_set->match($line); $m; $m = $re_set->nextMatch($line, $m)) {
408 $block = $this->_block_types[$m->regexp_ind];
409 //$input->_debug('>', get_class($block));
411 if ($block->_match($input, $m)) {
412 //$input->_debug('<', get_class($block));
415 //$input->_debug('[', "_match failed");
418 trigger_error("Couldn't match block: '$line'", E_USER_NOTICE);
423 class WikiText extends ParsedBlock {
424 function WikiText ($text) {
425 $input = new BlockParser_Input($text);
426 $this->ParsedBlock($input);
430 class SubBlock extends ParsedBlock {
431 function SubBlock (&$input, $indent_re, $initial_indent = false,
432 $tag = 'div', $attr = false) {
433 $subinput = new BlockParser_InputSubBlock($input, $indent_re, $initial_indent);
434 $this->ParsedBlock($subinput, $tag, $attr);
442 function _match (&$input, $match) {
443 trigger_error('pure virtual', E_USER_ERROR);
446 function merge ($followingBlock, $followsSpace) {
451 trigger_error('pure virtual', E_USER_ERROR);
455 class Block_blockquote extends BlockMarkup
459 var $_re = '\ +(?=\S)';
461 function _match (&$input, $m) {
462 $this->_depth = strlen($m->match);
463 $indent = sprintf("\\ {%d}", $this->_depth);
464 $this->_block = new SubBlock($input, $indent, $m->match,
469 function merge ($nextBlock, $followsSpace) {
470 if (get_class($nextBlock) == get_class($this)) {
471 assert ($nextBlock->_depth < $this->_depth);
472 $nextBlock->_block->unshiftContent($this->_block);
479 return $this->_block;
483 class Block_list extends BlockMarkup
485 //var $_tag = 'ol' or 'ul';
491 | [*] (?! \S[^*]*(?<=\S)[*](?!\S) )
494 var $_isLoose = false;
495 var $_content = array();
497 function _match (&$input, $m) {
498 // A list as the first content in a list is not allowed.
501 // Should markup as <ul><li>* Item</li></ul>,
502 // not <ul><li><ul><li>Item</li></ul>/li></ul>.
504 if (preg_match('/[*#+-o]/', $input->getPrefix())) {
509 $indent = sprintf("\\ {%d}", strlen($prefix));
511 $bullet = trim($m->match);
512 $this->_tag = $bullet == '#' ? 'ol' : 'ul';
514 $this->_content[] = new SubBlock($input, $indent, $m->match, 'li');
518 function merge ($nextBlock, $followsSpace) {
519 if (isa($nextBlock, 'Block_list') && $this->_tag == $nextBlock->_tag) {
520 array_splice($this->_content, count($this->_content), 0,
521 $nextBlock->_content);
523 $this->_isLoose = true;
530 $list = new Tightenable($this->_tag, false, $this->_content);
531 if (!$this->_isLoose && $list->canTighten())
537 class Block_dl extends Block_list
540 var $_re = '\ {0,4}\S.*:\s*$';
542 function _match (&$input, $m) {
543 if (!($p = $this->_do_match($input, $m)))
545 list ($term, $defn) = $p;
547 $this->_content[] = HTML::dt($term);
548 $this->_content[] = $defn;
552 function _do_match (&$input, $m) {
553 $pos = $input->getPos();
555 $firstIndent = strspn($m->match, ' ');
556 $pat = sprintf('/\ {%d,%d}(?=\s*\S)/A', $firstIndent + 1, $firstIndent + 5);
560 $line = $input->currentLine();
562 if (!$line || !preg_match($pat, $line, $mm)) {
563 $input->setPos($pos);
564 return false; // No body found.
567 $indent = strlen($mm[0]);
568 $term = TransformInline(rtrim(substr(trim($m->match),0,-1)));
569 $defn = new SubBlock($input, sprintf("\\ {%d}", $indent), false, 'dd');
570 return array($term, $defn);
576 class Block_table_dl_defn extends XmlContent
581 function Block_table_dl_defn ($term, $defn) {
583 if (!is_array($defn))
584 $defn = $defn->getContent();
586 $this->_ncols = $this->_ComputeNcols($defn);
589 foreach ($defn as $item) {
590 if ($this->_IsASubtable($item))
591 $this->_addSubtable($item);
593 $this->_addToRow($item);
597 $th = HTML::th($term);
598 if ($this->_nrows > 1)
599 $th->setAttr('rowspan', $this->_nrows);
600 $this->_setTerm($th);
603 function _addToRow ($item) {
604 if (empty($this->_accum)) {
605 $this->_accum = HTML::td();
606 if ($this->_ncols > 2)
607 $this->_accum->setAttr('colspan', $this->_ncols - 1);
609 $this->_accum->pushContent($item);
612 function _flushRow () {
613 if (!empty($this->_accum)) {
614 $this->pushContent(HTML::tr($this->_accum));
615 $this->_accum = false;
620 function _addSubtable ($table) {
622 foreach ($table->getContent() as $subdef) {
623 $this->pushContent($subdef);
624 $this->_nrows += $subdef->nrows();
628 function _setTerm ($th) {
629 $first_row = &$this->_content[0];
630 if (isa($first_row, 'Block_table_dl_defn'))
631 $first_row->_setTerm($th);
633 $first_row->unshiftContent($th);
636 function _ComputeNcols ($defn) {
638 foreach ($defn as $item) {
639 if ($this->_IsASubtable($item)) {
640 $row = $this->_FirstDefn($item);
641 $ncols = max($ncols, $row->ncols() + 1);
647 function _IsASubtable ($item) {
648 return isa($item, 'HtmlElement')
649 && $item->getTag() == 'table'
650 && $item->getAttr('class') == 'wiki-dl-table';
653 function _FirstDefn ($subtable) {
654 $defs = $subtable->getContent();
659 return $this->_ncols;
663 return $this->_nrows;
666 function setWidth ($ncols) {
667 assert($ncols >= $this->_ncols);
668 if ($ncols <= $this->_ncols)
670 $rows = &$this->_content;
671 for ($i = 0; $i < count($rows); $i++) {
673 if (isa($row, 'Block_table_dl_defn'))
674 $row->setWidth($ncols - 1);
676 $n = count($row->_content);
677 $lastcol = &$row->_content[$n - 1];
678 $lastcol->setAttr('colspan', $ncols - 1);
684 class Block_table_dl extends Block_dl
686 var $_re = '\ {0,4} (?:\S.*)? \| \s* $';
688 function _match (&$input, $m) {
689 if (!($p = $this->_do_match($input, $m)))
691 list ($term, $defn) = $p;
693 $this->_content[] = new Block_table_dl_defn($term, $defn);
699 $defs = &$this->_content;
702 foreach ($defs as $defn)
703 $ncols = max($ncols, $defn->ncols());
704 foreach ($defs as $key => $defn)
705 $defs[$key]->setWidth($ncols);
707 return HTML::table(array('class' => 'wiki-dl-table',
708 'border' => 2, // FIXME: CSS?
715 class Block_oldlists extends Block_list
717 //var $_tag = 'ol', 'ul', or 'dl';
718 var $_re = '(?: [*] (?! \S[^*]* (?<=\S) [*](?!\S) )
719 | [#] (?! \[ .*? \] )
723 function _match (&$input, $m) {
725 if (!preg_match('/[*#;]*$/A', $input->getPrefix())) {
731 $oldindent = '[*#;](?=[#*]|;.*:.*\S)';
732 $newindent = sprintf('\\ {%d}', strlen($prefix));
733 $indent = "(?:$oldindent|$newindent)";
735 $bullet = $prefix[0];
736 if ($bullet == '*') {
740 elseif ($bullet == '#') {
746 list ($term,) = explode(':', substr($prefix, 1), 2);
749 $this->_content[] = HTML::dt(false, TransformInline($term));
753 $this->_content[] = new SubBlock($input, $indent, $m->match, $itemtag);
758 class Block_pre extends BlockMarkup
760 var $_re = '<(?:pre|verbatim)>';
762 function _match (&$input, $m) {
763 $endtag = '</' . substr($m->match, 1);
765 $pos = $input->getPos();
767 $line = $m->postmatch;
768 while (ltrim($line) != $endtag) {
770 if (($line = $input->nextLine()) === false) {
771 $input->setPos($pos);
777 $text = join("\n", $text);
779 // FIXME: no <img>, <big>, <small>, <sup>, or <sub>'s allowed
781 if ($m->match == '<pre>')
782 $text = TransformInline($text);
784 $this->_html = HTML::pre(false, $text);
794 class Block_plugin extends Block_pre
796 var $_re = '<\?plugin(?:-form)?(?!\S)';
799 /* <?plugin Backlinks
803 function _match (&$input, $m) {
804 $pos = $input->getPos();
805 $pi = $m->match . $m->postmatch;
806 while (!preg_match('/(?<!~)\?>\s*$/', $pi)) {
807 if (($line = $input->nextLine()) === false) {
808 $input->setPos($pos);
815 $this->_plugin_pi = $pi;
821 $loader = new WikiPluginLoader;
823 return HTML::div(array('class' => 'plugin'),
824 $loader->expandPI($this->_plugin_pi, $request));
828 class Block_email_blockquote extends BlockMarkup
830 // FIXME: move CSS to CSS.
831 var $_attr = array('style' => 'border-left-width: medium; border-left-color: #0f0; border-left-style: ridge; padding-left: 1em; margin-left: 0em; margin-right: 0em;');
835 function _match (&$input, $m) {
836 $indent = str_replace(' ', '\\ ', $m->match) . '|>$';
837 $this->_block = new SubBlock($input, $indent, $m->match,
838 'blockquote', $this->_attr);
843 return $this->_block;
847 class Block_hr extends BlockMarkup
849 var $_re = '-{4,}\s*$';
851 function _match (&$input, $m) {
861 class Block_heading extends BlockMarkup
865 function _match (&$input, $m) {
866 $this->_tag = "h" . (5 - strlen($m->match));
867 $this->_text = TransformInline(trim($m->postmatch));
873 return new HtmlElement($this->_tag, false, $this->_text);
877 class Block_p extends BlockMarkup
882 function _match (&$input, $m) {
883 $this->_text = $m->match;
888 function merge ($nextBlock, $followsSpace) {
889 $class = get_class($nextBlock);
890 if ($class == 'block_p' && !$followsSpace) {
891 $this->_text .= "\n" . $nextBlock->_text;
898 $content = TransformInline(trim($this->_text));
899 return new TightenableParagraph($content);
903 ////////////////////////////////////////////////////////////////
906 function TransformText ($text, $markup = 2.0) {
907 if (isa($text, 'WikiDB_PageRevision')) {
909 $text = $rev->getPackedContent();
910 $markup = $rev->get('markup');
913 if (empty($markup) || $markup < 2.0) {
914 //include_once("lib/transform.php");
915 //return do_transform($text);
916 $text = ConvertOldMarkup($text);
919 // Expand leading tabs.
920 $text = expand_tabs($text);
924 $output = new WikiText($text);
925 return new XmlContent($output->getContent());
928 // (c-file-style: "gnu")
933 // c-hanging-comment-ender-p: nil
934 // indent-tabs-mode: nil