1 <?php rcs_id('$Id: BlockParser.php,v 1.18 2002-02-01 06:00:28 dairiki Exp $');
2 /* Copyright (C) 2002, Geoffrey T. Dairiki <dairiki@dairiki.org>
4 * This file is part of PhpWiki.
6 * PhpWiki is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
11 * PhpWiki is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
16 * You should have received a copy of the GNU General Public License
17 * along with PhpWiki; if not, write to the Free Software
18 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
20 require_once('lib/HtmlElement.php');
21 require_once('lib/InlineParser.php');
23 require_once('lib/transform.php');
25 ////////////////////////////////////////////////////////////////
35 // FIXME: unify this with the RegexpSet in InlinePArser.
38 * Return type from RegexpSet::match and RegexpSet::nextMatch.
42 class AnchoredRegexpSet_match {
49 * The text following the matched text.
54 * Index of the regular expression which matched.
60 * A set of regular expressions.
62 * This class is probably only useful for InlineTransformer.
64 class AnchoredRegexpSet
68 * @param $regexps array A list of regular expressions. The
69 * regular expressions should not include any sub-pattern groups
70 * "(...)". (Anonymous groups, like "(?:...)", as well as
71 * look-ahead and look-behind assertions are fine.)
73 function AnchoredRegexpSet ($regexps) {
74 $this->_regexps = $regexps;
75 $this->_re = "/((" . join(")|(", $regexps) . "))/Ax";
79 * Search text for the next matching regexp from the Regexp Set.
81 * @param $text string The text to search.
83 * @return object A RegexpSet_match object, or false if no match.
85 function match ($text) {
86 if (! preg_match($this->_re, $text, $m)) {
90 $match = new AnchoredRegexpSet_match;
91 $match->postmatch = substr($text, strlen($m[0]));
92 $match->match = $m[1];
93 $match->regexp_ind = count($m) - 3;
98 * Search for next matching regexp.
100 * Here, 'next' has two meanings:
102 * Match the next regexp(s) in the set, at the same position as the last match.
104 * If that fails, match the whole RegexpSet, starting after the position of the
107 * @param $text string Text to search.
109 * @param $prevMatch A RegexpSet_match object
111 * $prevMatch should be a match object obtained by a previous
112 * match upon the same value of $text.
114 * @return object A RegexpSet_match object, or false if no match.
116 function nextMatch ($text, $prevMatch) {
117 // Try to find match at same position.
118 $regexps = array_slice($this->_regexps, $prevMatch->regexp_ind + 1);
123 $pat= "/ ( (" . join(')|(', $regexps) . ") ) /Axs";
125 if (! preg_match($pat, $text, $m)) {
129 $match = new AnchoredRegexpSet_match;
130 $match->postmatch = substr($text, strlen($m[0]));
131 $match->match = $m[1];
132 $match->regexp_ind = count($m) - 3 + $prevMatch->regexp_ind + 1;;
139 class BlockParser_Input {
141 function BlockParser_Input ($text) {
143 // Expand leading tabs.
144 // FIXME: do this better.
146 // We want to ensure the only characters matching \s are ' ' and "\n".
148 $text = preg_replace('/(?![ \n])\s/', ' ', $text);
149 assert(!preg_match('/(?![ \n])\s/', $text));
151 $this->_lines = preg_split('/[^\S\n]*\n/', $text);
154 $this->_initBlockTypes();
157 function currentLine () {
158 if ($this->_pos >= count($this->_lines))
160 return $this->_lines[$this->_pos];
163 function nextLine () {
165 if ($this->_pos >= count($this->_lines))
167 return $this->_lines[$this->_pos];
170 function advance () {
178 function setPos ($pos) {
182 function getPrefix () {
186 function getDepth () {
191 if ($this->_pos < count($this->_lines))
192 return $this->_lines[$this->_pos];
198 function _initBlockTypes () {
199 foreach (array('oldlists', 'list', 'dl', 'table_dl',
200 'blockquote', 'heading', 'hr', 'pre', 'email_blockquote',
203 $class = "Block_$type";
205 $this->_block_types[] = $proto;
206 $this->_regexps[] = $proto->_re;
208 $this->_regexpset = new AnchoredRegexpSet($this->_regexps);
211 function getBlock() {
214 $line = $this->currentLine();
216 $atSpace = $this->getPos();
217 while ( ($line = $this->nextLine()) === '' )
220 if ($line === false) {
222 $this->setPos($atSpace);
226 $re_set = &$this->_regexpset;
227 for ($m = $re_set->match($line); $m; $m = $re_set->nextMatch($line, $m)) {
228 $block = $this->_block_types[$m->regexp_ind];
229 //$this->_debug('>', get_class($block));
231 if ($block->_match($this, $m)) {
232 //$this->_debug('<', get_class($block));
233 $block->_follows_space = (bool) $atSpace;
236 //$this->_debug('[', "_match failed");
240 //if ($input->getDepth() == 0) {
241 // We should never get here.
242 //preg_match('/.*/A', substr($this->_text, $this->_pos), $m);// get first line
243 trigger_error("Couldn't match block: '$line'", E_USER_NOTICE);
250 function _debug ($tab, $msg) {
252 $where = $this->where();
253 $tab = str_repeat('____', $this->getDepth() ) . $tab;
254 printXML(HTML::div("$tab $msg: at: '",
260 class BlockParser_InputSubBlock extends BlockParser_Input
262 function BlockParser_InputSubBlock (&$input, $prefix_re, $initial_prefix = false) {
263 $this->_input = &$input;
264 $this->_prefix_pat = "/$prefix_re|\\s*\$/Ax";
265 $this->_prefix = $initial_prefix;
268 $this->_block_types = &$input->_block_types;
269 $this->_regexpset = &$input->_regexpset;
272 function currentLine () {
273 if (($line = $this->_input->currentLine()) === false)
275 if ($this->_prefix === false) {
276 if (!preg_match($this->_prefix_pat, $line, $m))
278 $this->_prefix = $m[0];
280 return (string) substr($line, strlen($this->_prefix));
283 function nextLine () {
284 if (($line = $this->_input->nextLine()) === false) {
285 $this->_prefix = false;
288 if (!preg_match($this->_prefix_pat, $line, $m)) {
289 $this->_prefix = false;
292 $this->_prefix = $m[0];
293 return (string) substr($line, strlen($this->_prefix));
296 function advance () {
297 $this->_prefix = false;
298 $this->_input->advance();
302 return array($this->_prefix, $this->_input->getPos());
305 function setPos ($pos) {
306 $this->_prefix = $pos[0];
307 $this->_input->setPos($pos[1]);
310 function getPrefix () {
311 assert ($this->_prefix !== false);
312 return $this->_input->getPrefix() . $this->_prefix;
315 function getDepth () {
316 return $this->_input->getDepth() + 1;
320 return $this->_input->where();
325 class BlockMarkup extends XmlContent {
331 function _match (&$input, $match) {
332 trigger_error('pure virtual', E_USER_ERROR);
335 function merge ($followingBlock) {
339 function finish (/*$tighten*/) {
340 return new HtmlElement($this->_tag, $this->_attr, $this->getContent());
344 class Block extends BlockMarkup {
345 function Block ($text = false) {
348 $this->_parse(new BlockParser_Input($text));
351 function _parse (&$input) {
352 for ($block = $input->getBlock(); $block; $block = $nextBlock) {
353 while ($nextBlock = $input->getBlock()) {
354 // Attempt to merge current with following block.
355 if (! ($merged = $block->merge($nextBlock)) ) {
356 break; // can't merge
360 $this->pushContent($block->finish());
365 class SubBlock extends Block {
366 function SubBlock (&$input, $indent_re, $initial_indent = false) {
368 $this->_parse(new BlockParser_InputSubBlock($input,
375 class Block_blockquote extends Block
377 var $_tag ='blockquote';
380 var $_re = '\ +(?=\S)';
382 function _match (&$input, $m) {
383 $this->_depth = strlen($m->match);
384 $indent = sprintf("\\ {%d}", $this->_depth);
385 $this->pushContent(new SubBlock($input, $indent, $m->match));
389 function merge ($nextBlock) {
390 if (get_class($nextBlock) == 'block_blockquote') {
391 assert ($nextBlock->_depth < $this->_depth);
392 $nextBlock->unshiftContent($this->finish());
399 class Block_list extends Block
401 //var $_tag = 'ol' or 'ul';
403 (?: [+#] | -(?!-) | [o](?=\ )
404 | [*] (?! \S[^*]*(?<=\S)[*](?!\S) )
407 function _match (&$input, $m) {
408 // A list as the first content in a list is not allowed.
411 // Should markup as <ul><li>* Item</li></ul>,
412 // not <ul><li><ul><li>Item</li></ul>/li></ul>.
414 if (preg_match('/[*#+-o]/', $input->getPrefix())) {
419 $indent = sprintf("\\ {%d}", strlen($prefix));
421 $bullet = trim($m->match);
422 $this->_tag = $bullet == '#' ? 'ol' : 'ul';
424 $this->pushContent(HTML::li(new SubBlock($input, $indent, $m->match)));
428 function merge ($nextBlock) {
429 if (isa($nextBlock, 'Block_list') && $this->_tag == $nextBlock->_tag) {
430 $this->pushContent($nextBlock->getContent());
437 class Block_dl extends Block_list
440 var $_re = '\ {0,4}\S.*:\s*$';
442 function _match (&$input, $m) {
443 if (!($p = $this->_do_match($input, $m)))
445 list ($term, $defn) = $p;
447 $this->pushContent(HTML::dt($term), HTML::dd($defn));
451 function _do_match (&$input, $m) {
452 $pos = $input->getPos();
454 while ( ($line = $input->nextLine()) !== false ) {
455 if (preg_match('/\ *(?=\S)/A', $line, $mm)) {
456 $indent = strlen($mm[0]);
460 $input->setPos($pos);
463 return false; // No body found.
465 $input->advance(); // Skip the first line.
467 $term = TransformInline(rtrim(substr(trim($m->match),0,-1)));
468 $defn = new SubBlock($input, sprintf("\\ {%d}", $indent));
469 return array($term, $defn);
475 class Block_table_dl_defn extends XmlContent
480 function Block_table_dl_defn ($term, $defn) {
482 if (!is_array($defn))
483 $defn = $defn->getContent();
485 $this->_ncols = $this->_ComputeNcols($defn);
488 foreach ($defn as $item) {
489 if ($this->_IsASubtable($item))
490 $this->_addSubtable($item);
492 $this->_addToRow($item);
496 $th = HTML::th($term);
497 if ($this->_nrows > 1)
498 $th->setAttr('rowspan', $this->_nrows);
499 $this->_setTerm($th);
502 function _addToRow ($item) {
503 if (empty($this->_accum)) {
504 $this->_accum = HTML::td();
505 if ($this->_ncols > 2)
506 $this->_accum->setAttr('colspan', $this->_ncols - 1);
508 $this->_accum->pushContent($item);
511 function _flushRow () {
512 if (!empty($this->_accum)) {
513 $this->pushContent(HTML::tr($this->_accum));
514 $this->_accum = false;
519 function _addSubtable ($table) {
521 foreach ($table->getContent() as $subdef) {
522 $this->pushContent($subdef);
523 $this->_nrows += $subdef->nrows();
527 function _setTerm ($th) {
528 $first_row = &$this->_content[0];
529 if (isa($first_row, 'Block_table_dl_defn'))
530 $first_row->_setTerm($th);
532 $first_row->unshiftContent($th);
535 function _ComputeNcols ($defn) {
537 foreach ($defn as $item) {
538 if ($this->_IsASubtable($item)) {
539 $row = $this->_FirstDefn($item);
540 $ncols = max($ncols, $row->ncols() + 1);
546 function _IsASubtable ($item) {
547 return isa($item, 'HtmlElement')
548 && $item->getTag() == 'table'
549 && $item->getAttr('class') == 'wiki-dl-table';
552 function _FirstDefn ($subtable) {
553 $defs = $subtable->getContent();
558 return $this->_ncols;
562 return $this->_nrows;
565 function setWidth ($ncols) {
566 assert($ncols >= $this->_ncols);
567 if ($ncols <= $this->_ncols)
569 $rows = &$this->_content;
570 for ($i = 0; $i < count($rows); $i++) {
572 if (isa($row, 'Block_table_dl_defn'))
573 $row->setWidth($ncols - 1);
575 $n = count($row->_content);
576 $lastcol = &$row->_content[$n - 1];
577 $lastcol->setAttr('colspan', $ncols - 1);
583 class Block_table_dl extends Block_dl
586 var $_attr = array('class' => 'wiki-dl-table',
587 'border' => 2, // FIXME: CSS?
592 var $_re = '\ {0,4} (?:\S.*)? \| \s* $';
594 function _match (&$input, $m) {
595 if (!($p = $this->_do_match($input, $m)))
597 list ($term, $defn) = $p;
599 $this->pushContent(new Block_table_dl_defn($term, $defn));
604 $defs = &$this->_content;
607 foreach ($defs as $defn)
608 $ncols = max($ncols, $defn->ncols());
609 foreach ($defs as $key => $defn)
610 $defs[$key]->setWidth($ncols);
612 return parent::finish();
616 class Block_oldlists extends Block_list
618 //var $_tag = 'ol', 'ul', or 'dl';
619 var $_re = '(?: [*] (?! \S[^*]* (?<=\S) [*](?!\S) )
625 function _match (&$input, $m) {
627 if (!preg_match('/[*#;]*$/A', $input->getPrefix())) {
633 $oldindent = '[*#;](?=[#*]|;.*:.*\S)';
634 $newindent = sprintf('\\ {%d}', strlen($prefix));
635 $indent = "(?:$oldindent|$newindent)";
637 $bullet = $prefix[0];
638 if ($bullet == '*') {
642 elseif ($bullet == '#') {
648 list ($term,) = explode(':', substr($prefix, 1), 2);
651 $this->pushContent(HTML::dt(false, TransformInline($term)));
655 $item->pushContent(new SubBlock($input, $indent, $m->match));
656 $this->pushContent($item);
661 class Block_pre extends BlockMarkup
664 var $_re = '<(?:pre|verbatim)>';
666 function _match (&$input, $m) {
667 $endtag = '</' . substr($m->match, 1);
669 if (($text = $this->_getBlock($input, $endtag)) === false)
672 if (ltrim($m->postmatch))
673 array_unshift($text, $m->postmatch);
674 $text = join("\n", $text);
676 // FIXME: no <img>, <big>, <small>, <sup>, or <sub>'s allowed
678 if ($m->match == '<pre>')
679 $text = TransformInline($text);
681 $this->pushContent($text);
685 function _getBlock (&$input, $end_tag) {
686 $pos = $input->getPos();
689 while ( ($line = $input->nextLine()) !== false ) {
690 if (rtrim($line) == $end_tag) {
696 $input->setPos($pos);
702 class Block_plugin extends Block_pre
705 var $_attr = array('class' => 'plugin');
706 var $_re = '<\?plugin(?:-form)?\s';
708 function _match (&$input, $m) {
709 if (preg_match('/( .*? (?<!~) \?> ) (.*)/x', $m->postmatch, $mm)) {
712 $pi = $m->match . $m->postmatch;
714 printXML(HTML::p("PI:", $pi));
718 if (($text = $this->_getBlock($input, '?>')) === false)
720 $pi = $m->match . $m->postmatch . "\n" . join("\n", $text) . "\n?>";
724 $loader = new WikiPluginLoader;
725 $this->pushContent($loader->expandPI($pi, $request));
730 class Block_email_blockquote extends Block
732 // FIXME: move CSS to CSS.
733 var $_tag ='blockquote';
734 var $_attr = array('style' => 'border-left-width: medium; border-left-color: #0f0; border-left-style: ridge; padding-left: 1em; margin-left: 0em; margin-right: 0em;');
738 function _match (&$input, $m) {
739 $indent = str_replace(' ', '\\ ', $m->match);
740 $this->pushContent(new SubBlock($input, $indent, $m->match));
745 class Block_hr extends BlockMarkup
748 var $_re = '-{4,}\s*$';
750 function _match (&$input, $m) {
756 class Block_heading extends BlockMarkup
760 function _match (&$input, $m) {
761 $this->_tag = "h" . (5 - strlen($m->match));
762 $this->pushContent(TransformInline(trim($m->postmatch)));
768 class Block_p extends BlockMarkup
773 function _match (&$input, $m) {
774 $this->_text = $m->match;
779 function merge ($nextBlock) {
780 $class = get_class($nextBlock);
781 if ($class == 'block_p' && !$nextBlock->_follows_space) {
782 $this->_text .= $nextBlock->_text;
789 $this->pushContent(TransformInline(trim($this->_text)));
790 return parent::finish();
794 ////////////////////////////////////////////////////////////////
800 // FIXME: This is temporary, too...
801 function NewTransform ($text) {
805 // Expand leading tabs.
806 // FIXME: do this better. also move it...
807 $text = preg_replace('/^\ *[^\ \S\n][^\S\n]*/me', "str_repeat(' ', strlen('\\0'))", $text);
808 assert(!preg_match('/^\ *\t/', $text));
810 $output = new Block($text);
816 function TransformRevision ($revision) {
817 if ($revision->get('markup') == 'new') {
818 return NewTransform($revision->getPackedContent());
821 return do_transform($revision->getContent());
826 // (c-file-style: "gnu")
831 // c-hanging-comment-ender-p: nil
832 // indent-tabs-mode: nil