1 <?php rcs_id('$Id: BlockParser.php,v 1.16 2002-01-31 02:48:16 dairiki Exp $');
2 /* Copyright (C) 2002, Geoffrey T. Dairiki <dairiki@dairiki.org>
4 * This file is part of PhpWiki.
6 * PhpWiki is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
11 * PhpWiki is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
16 * You should have received a copy of the GNU General Public License
17 * along with PhpWiki; if not, write to the Free Software
18 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
20 require_once('lib/HtmlElement.php');
21 require_once('lib/InlineParser.php');
23 require_once('lib/transform.php');
26 extends WikiTransform {
27 function InlineTransform() {
28 global $WikiNameRegexp, $AllowedProtocols, $InterWikiLinkRegexp;
30 $this->WikiTransform();
33 // functions are applied in order of registering
35 $this->register(WT_SIMPLE_MARKUP, 'wtm_plugin_link');
37 $this->register(WT_TOKENIZER, 'wtt_doublebrackets', '\[\[');
38 //$this->register(WT_TOKENIZER, 'wtt_footnotes', '^\[\d+\]');
39 //$this->register(WT_TOKENIZER, 'wtt_footnoterefs', '\[\d+\]');
40 $this->register(WT_TOKENIZER, 'wtt_bracketlinks', '\[.+?\]');
41 $this->register(WT_TOKENIZER, 'wtt_urls',
42 "!?\b($AllowedProtocols):[^\s<>\[\]\"'()]*[^\s<>\[\]\"'(),.?]");
44 if (function_exists('wtt_interwikilinks')) {
45 $this->register(WT_TOKENIZER, 'wtt_interwikilinks',
46 pcre_fix_posix_classes("!?(?<![[:alnum:]])") .
47 "$InterWikiLinkRegexp:[^\\s.,;?()]+");
49 $this->register(WT_TOKENIZER, 'wtt_bumpylinks', "!?$WikiNameRegexp");
51 $this->register(WT_SIMPLE_MARKUP, 'wtm_htmlchars');
52 $this->register(WT_SIMPLE_MARKUP, 'wtm_linebreak');
53 $this->register(WT_SIMPLE_MARKUP, 'wtm_bold_italics');
57 function TransformInline ($text) {
58 // The old transform code does funny things with trailing
61 $trfm = new InlineTransform;
62 preg_match('/\s*$/', $text, $m);
64 // This "\n" -> "\r" hackage is to fool the old transform code
65 // into continuing italics across lines.
66 $in = str_replace("\n", "\r", $text);
67 $out = preg_replace('/\s*$/', '', AsXML($trfm->do_transform('', array($in))));
68 $out = str_replace("\r", "\n", $out);
72 if (false && $out != $text) {
73 echo(" IN <pre>'" . htmlspecialchars($text) . "'</pre><br>\n");
74 echo("OUT <pre>'" . htmlspecialchars($out) . "'</pre><br>\n");
76 return new RawXml($out);
80 ////////////////////////////////////////////////////////////////
83 define("BLOCK_NEVER_TIGHTEN", 0);
84 define("BLOCK_NOTIGHTEN_AFTER", 1);
85 define("BLOCK_NOTIGHTEN_BEFORE", 2);
86 define("BLOCK_NOTIGHTEN_EITHER", 3);
95 function parse (&$input, $tighten_mode = BLOCK_NEVER_TIGHTEN) {
98 for ($block = BlockParser::_nextBlock($input); $block; $block = $nextBlock) {
99 while ($nextBlock = BlockParser::_nextBlock($input)) {
100 // Attempt to merge current with following block.
101 if (! $block->merge($nextBlock))
102 break; // can't merge
105 $content->pushContent($block->finish($tighten_mode));
110 function _nextBlock (&$input) {
111 global $Block_BlockTypes;
116 foreach ($Block_BlockTypes as $type) {
117 if ($m = $input->match($type->_re)) {
118 BlockParser::_debug('>', get_class($type), $input);
121 $block->_followsBreak = $input->atBreak();
122 if (!$block->_parse($input, $m)) {
123 BlockParser::_debug('[', "_parse failed", $input);
126 $block->_preceedsBreak = $input->eatSpace();
127 BlockParser::_debug('<', get_class($type), $input);
132 if ($input->getDepth() == 0) {
133 // We should never get here.
134 //preg_match('/.*/A', substr($this->_text, $this->_pos), $m);// get first line
135 trigger_error("Couldn't match block: '".rawurlencode($m[0])."'", E_USER_NOTICE);
137 //FIXME:$this->_debug("no match");
141 function _debug ($tab, $msg, $input) {
144 $tab = str_repeat($tab, $input->getDepth() + 1);
145 printXML(HTML::div("$tab $msg: at: '",
146 HTML::tt($input->where()),
152 class BlockParser_Match {
153 function BlockParser_Match ($match_data) {
154 $this->_m = $match_data;
157 function getPrefix () {
161 function getMatch ($n = 0) {
162 $text = $this->_m[$n + 2];
163 //if (preg_match('/\n./s', $text)) {
164 $prefix = $this->getPrefix();
165 $text = str_replace("\n$prefix", "\n", $text);
172 class BlockParser_Input {
174 function BlockParser_Input ($text) {
175 $this->_text = $text;
179 // Expand leading tabs.
180 // FIXME: do this better.
182 // We want to ensure the only characters matching \s are ' ' and "\n".
184 $this->_text = preg_replace('/(?![ \n])\s/', ' ', $this->_text);
185 assert(!preg_match('/(?![ \n])\s/', $this->_text));
186 if (!preg_match('/\n$/', $this->_text))
187 $this->_text .= "\n";
189 $this->_set_prefix ('');
190 $this->_atBreak = false;
194 function _set_prefix ($prefix, $next_prefix = false) {
195 if ($next_prefix === false)
196 $next_prefix = $prefix;
198 $this->_prefix = $prefix;
199 $this->_next_prefix = $next_prefix;
201 $this->_regexp_cache = array();
203 $blank = "(:?$prefix)?\s*\n";
204 $this->_blank_pat = "/$blank/A";
205 $this->_eof_pat = "/\\Z|(?!$blank|${prefix}.)/A";
209 return preg_match($this->_eof_pat, substr($this->_text, $this->_pos));
212 function match ($regexp) {
213 $cache = &$this->_regexp_cache;
214 if (!isset($cache[$regexp])) {
215 // Fix up any '^'s in pattern (add our prefix)
216 $re = preg_replace('/(?<! [ [ \\\\ ]) \^ /x',
217 '^' . $this->_next_prefix, $regexp);
219 // Fix any match backreferences (like '\1').
220 $re = preg_replace('/(?<= [^ \\\\ ] [ \\\\ ] )( \\d+ )/ex', "'\\1' + 2", $re);
222 $re = "/(" . $this->_prefix . ")($re)/Am";
223 $cache[$regexp] = $re;
226 $re = $cache[$regexp];
228 if (preg_match($re, substr($this->_text, $this->_pos), $m)) {
229 return new BlockParser_Match($m);
234 function accept ($match) {
235 $text = $match->_m[0];
237 assert(substr($this->_text, $this->_pos, strlen($text)) == $text);
238 $this->_pos += strlen($text);
241 assert(preg_match("/\n$/", $text));
243 if ($this->_next_prefix != $this->_prefix)
244 $this->_set_prefix($this->_next_prefix);
246 $this->_atBreak = false;
251 * Consume blank lines.
253 * @return bool True if any blank lines where comsumed.
255 function eatSpace () {
256 if (preg_match($this->_blank_pat, substr($this->_text, $this->_pos), $m)) {
257 $this->_pos += strlen($m[0]);
258 if ($this->_next_prefix != $this->_prefix)
259 $this->_set_prefix($this->_next_prefix);
260 $this->_atBreak = true;
262 while (preg_match($this->_blank_pat, substr($this->_text, $this->_pos), $m)) {
263 $this->_pos += strlen($m[0]);
267 return $this->_atBreak;
270 function atBreak () {
271 return $this->_atBreak;
274 function getDepth () {
275 return $this->_depth;
280 if (($m = $this->match('.*\n')))
281 return sprintf('[%s]%s', $m->getPrefix(), $m->getMatch());
285 function subBlock ($initial_prefix, $subsequent_prefix = false) {
286 if ($subsequent_prefix === false)
287 $subsequent_prefix = $initial_prefix;
289 return new BlockParser_InputSubBlock ($this, $initial_prefix, $subsequent_prefix);
293 class BlockParser_InputSubBlock extends BlockParser_Input
295 function BlockParser_InputSubBlock (&$block, $initial_prefix, $subsequent_prefix) {
296 $this->_text = &$block->_text;
297 $this->_pos = &$block->_pos;
298 $this->_atBreak = &$block->_atBreak;
300 $this->_depth = $block->_depth + 1;
302 $this->_set_prefix($block->_prefix . $initial_prefix,
303 $block->_next_prefix . $subsequent_prefix);
313 var $_followsBreak = false;
314 var $_preceedsBreak = false;
315 var $_content = array();
318 function _parse (&$input, $match) {
319 trigger_error('pure virtual', E_USER_ERROR);
322 function _pushContent ($c) {
324 $c = func_get_args();
326 $this->_content[] = $x;
329 function isTerminal () {
333 function merge ($followingBlock) {
337 function finish (/*$tighten*/) {
338 return new HtmlElement($this->_tag, $this->_attr, $this->_content);
343 class CompoundBlock extends Block
345 function isTerminal () {
351 class Block_blockquote extends CompoundBlock
353 var $_tag ='blockquote';
355 var $_re = '\ +(?=\S)';
357 function _parse (&$input, $m) {
358 $indent = $m->getMatch();
359 $this->_depth = strlen($indent);
360 $this->_content[] = BlockParser::parse($input->subBlock($indent),
361 BLOCK_NOTIGHTEN_EITHER);
365 function merge ($nextBlock) {
366 if (get_class($nextBlock) != 'block_blockquote')
368 assert ($nextBlock->_depth < $this->_depth);
370 $content = $nextBlock->_content;
371 array_unshift($content, $this->finish());
372 $this->_content = $content;
377 class Block_list extends CompoundBlock
379 //var $_tag = 'ol' or 'ul';
380 var $_re = '\ {0,4}([+#]|-(?!-)|[o](?=\ )|[*](?!\S[^*]*(?<=\S)[*](?!\S)))\ *(?=\S)';
382 function _parse (&$input, $m) {
383 // A list as the first content in a list is not allowed.
386 // Should markup as <ul><li>* Item</li></ul>,
387 // not <ul><li><ul><li>Item</li></ul>/li></ul>.
389 if (preg_match('/[-*o+#;]\s*$/', $m->getPrefix()))
392 $prefix = $m->getMatch();
393 $leader = preg_quote($prefix, '/');
394 $indent = sprintf("\\ {%d}", strlen($prefix));
396 $bullet = $m->getMatch(1);
397 $this->_tag = $bullet == '#' ? 'ol' : 'ul';
399 $text = $input->subBlock($leader, $indent);
400 $content = BlockParser::parse($text, BLOCK_NOTIGHTEN_AFTER);
401 $this->_pushContent(HTML::li(false, $content));
405 function merge ($nextBlock) {
406 if (!isa($nextBlock, 'Block_list') || $this->_tag != $nextBlock->_tag)
409 $this->_pushContent($nextBlock->_content);
414 class Block_dl extends Block_list
417 var $_re = '(\ {0,4})([^\s!].*):\s*?\n(?=(?:\s*^)+(\1\ +)\S)';
418 // 1-------12--------2 3-----3
420 function _parse (&$input, $m) {
421 $term = TransformInline(rtrim($m->getMatch(2)));
422 $indent = $m->getMatch(3);
426 $this->_pushContent(HTML::dt(false, $term),
428 BlockParser::parse($input->subBlock($indent),
429 BLOCK_NOTIGHTEN_AFTER)));
436 class Block_table_dl_defn extends XmlContent
441 function Block_table_dl_defn ($term, $defn) {
443 if (!is_array($defn))
444 $defn = $defn->getContent();
446 $this->_ncols = $this->_ComputeNcols($defn);
449 foreach ($defn as $item) {
450 if ($this->_IsASubtable($item))
451 $this->_addSubtable($item);
453 $this->_addToRow($item);
457 $th = HTML::th($term);
458 if ($this->_nrows > 1)
459 $th->setAttr('rowspan', $this->_nrows);
460 $this->_setTerm($th);
463 function _addToRow ($item) {
464 if (empty($this->_accum)) {
465 $this->_accum = HTML::td();
466 if ($this->_ncols > 2)
467 $this->_accum->setAttr('colspan', $this->_ncols - 1);
469 $this->_accum->pushContent($item);
472 function _flushRow () {
473 if (!empty($this->_accum)) {
474 $this->pushContent(HTML::tr($this->_accum));
475 $this->_accum = false;
480 function _addSubtable ($table) {
482 foreach ($table->getContent() as $subdef) {
483 $this->pushContent($subdef);
484 $this->_nrows += $subdef->nrows();
488 function _setTerm ($th) {
489 $first_row = &$this->_content[0];
490 if (isa($first_row, 'Block_table_dl_defn'))
491 $first_row->_setTerm($th);
493 $first_row->unshiftContent($th);
496 function _ComputeNcols ($defn) {
498 foreach ($defn as $item) {
499 if ($this->_IsASubtable($item)) {
500 $row = $this->_FirstDefn($item);
501 $ncols = max($ncols, $row->ncols() + 1);
507 function _IsASubtable ($item) {
508 return isa($item, 'HtmlElement')
509 && $item->getTag() == 'table'
510 && $item->getAttr('class') == 'wiki-dl-table';
513 function _FirstDefn ($subtable) {
514 $defs = $subtable->getContent();
519 return $this->_ncols;
523 return $this->_nrows;
526 function setWidth ($ncols) {
527 assert($ncols >= $this->_ncols);
528 if ($ncols <= $this->_ncols)
530 $rows = &$this->_content;
531 for ($i = 0; $i < count($rows); $i++) {
533 if (isa($row, 'Block_table_dl_defn'))
534 $row->setWidth($ncols - 1);
536 $n = count($row->_content);
537 $lastcol = &$row->_content[$n - 1];
538 $lastcol->setAttr('colspan', $ncols - 1);
544 class Block_table_dl extends Block_list
547 var $_attr = array('class' => 'wiki-dl-table',
548 'border' => 2, // FIXME: CSS?
553 var $_re = '(\ {0,4})((?![\s!]).*)?[|]\s*?\n(?=(?:\s*^)+(\1\ +)\S)';
554 // 1-------12-----------2 3-----3
556 function _parse (&$input, $m) {
557 $term = TransformInline(rtrim($m->getMatch(2)));
558 $indent = $m->getMatch(3);
561 $defn = BlockParser::parse($input->subBlock($indent),
562 BLOCK_NOTIGHTEN_AFTER);
564 $this->_pushContent(new Block_table_dl_defn($term, $defn));
569 $defs = &$this->_content;
572 foreach ($defs as $defn)
573 $ncols = max($ncols, $defn->ncols());
574 foreach ($defs as $key => $defn)
575 $defs[$key]->setWidth($ncols);
577 return parent::finish();
581 class Block_oldlists extends Block_list
583 //var $_tag = 'ol', 'ul', or 'dl';
584 var $_re = '(?:([*](?!\S[^*]*(?<=\S)[*](?!\S))|[#])|;(.*):).*?(?=\S)';
585 // 1------------------------------1 2--2
587 function _parse (&$input, $m) {
588 if (!preg_match('/[*#;]*$/A', $m->getPrefix()))
591 $prefix = $m->getMatch();
593 $leader = preg_quote($prefix, '/');
595 $oldindent = '[*#;](?=[#*]|;.*:.*?\S)';
596 $newindent = sprintf('\\ {%d}', strlen($prefix));
597 $indent = "(?:$oldindent|$newindent)";
599 $bullet = $m->getMatch(1);
601 $this->_tag = $bullet == '*' ? 'ul' : 'ol';
606 $term = trim($m->getMatch(2));
608 $this->_pushContent(HTML::dt(false, TransformInline($term)));
612 $item->pushContent(BlockParser::parse($input->subBlock($leader, $indent),
613 BLOCK_NOTIGHTEN_AFTER));
614 $this->_pushContent($item);
619 class Block_pre extends Block
622 var $_re = '<(pre|verbatim)>(.*?(?:\s*\n^.*?)*?)(?<!~)<\/\1>\s*?\n';
623 // 1------------1 2------------------2
625 function _parse (&$input, $m) {
628 $text = $m->getMatch(2);
629 $tag = $m->getMatch(1);
632 $text = TransformInline($text);
634 $this->_pushContent($text);
639 class Block_plugin extends Block
642 var $_attr = array('class' => 'plugin');
643 var $_re = '<\?plugin(?:-form)?.*?(?:\n^.*?)*?(?<!~)\?>\s*?\n';
645 function _parse (&$input, $m) {
647 $loader = new WikiPluginLoader;
649 $this->_pushContent($loader->expandPI($m->getMatch(), $request));
654 class Block_hr extends Block
657 var $_re = '-{4,}\s*?\n';
659 function _parse (&$input, $m) {
665 class Block_heading extends Block
667 var $_re = '(!{1,3})(.*)\n';
669 function _parse (&$input, $m) {
671 $this->_tag = "h" . (5 - strlen($m->getMatch(1)));
672 $this->_pushContent(TransformInline(trim($m->getMatch(2))));
677 class Block_p extends Block
682 function _parse (&$input, $m) {
683 $this->_text = $m->getMatch();
688 function merge ($nextBlock) {
689 if ($this->_preceedsBreak || get_class($nextBlock) != 'block_p')
692 $this->_text .= $nextBlock->_text;
693 $this->_preceedsBreak = $nextBlock->_preceedsBreak;
697 function finish ($tighten) {
698 $this->_pushContent(TransformInline(trim($this->_text)));
700 if ($this->_followsBreak && ($tighten & BLOCK_NOTIGHTEN_AFTER) != 0)
702 elseif ($this->_preceedsBreak && ($tighten & BLOCK_NOTIGHTEN_BEFORE) != 0)
705 return $tighten ? $this->_content : parent::finish();
709 class Block_email_blockquote extends CompoundBlock
711 // FIXME: move CSS to CSS.
712 var $_tag ='blockquote';
713 var $_attr = array('style' => 'border-left-width: medium; border-left-color: #0f0; border-left-style: ridge; padding-left: 1em; margin-left: 0em; margin-right: 0em;');
717 function _parse (&$input, $m) {
718 $prefix = $m->getMatch();
719 $indent = "(?:$prefix|>(?=\s*?\n))";
720 $this->_content[] = BlockParser::parse($input->subBlock($indent),
721 BLOCK_NOTIGHTEN_EITHER);
726 ////////////////////////////////////////////////////////////////
731 $GLOBALS['Block_BlockTypes'] = array(new Block_oldlists,
735 new Block_blockquote,
739 new Block_email_blockquote,
743 // FIXME: This is temporary, too...
744 function NewTransform ($text) {
748 // Expand leading tabs.
749 // FIXME: do this better. also move it...
750 $text = preg_replace('/^\ *[^\ \S\n][^\S\n]*/me', "str_repeat(' ', strlen('\\0'))", $text);
751 assert(!preg_match('/^\ *\t/', $text));
753 $input = new BlockParser_Input($text);
754 return BlockParser::parse($input);
759 function TransformRevision ($revision) {
760 if ($revision->get('markup') == 'new') {
761 return NewTransform($revision->getPackedContent());
764 return do_transform($revision->getContent());
769 // (c-file-style: "gnu")
774 // c-hanging-comment-ender-p: nil
775 // indent-tabs-mode: nil