1 <?php rcs_id('$Id: InlineParser.php,v 1.7 2002-02-01 05:59:26 dairiki Exp $');
2 /* Copyright (C) 2002, Geoffrey T. Dairiki <dairiki@dairiki.org>
4 * This file is part of PhpWiki.
6 * PhpWiki is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
11 * PhpWiki is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
16 * You should have received a copy of the GNU General Public License
17 * along with PhpWiki; if not, write to the Free Software
18 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
20 require_once('lib/HtmlElement.php');
21 require_once('lib/interwiki.php');
23 //FIXME: intubate ESCAPE_CHAR into BlockParser.php.
24 define('ESCAPE_CHAR', '~');
27 * Return type from RegexpSet::match and RegexpSet::nextMatch.
31 class RegexpSet_match {
33 * The text leading up the the next match.
43 * The text following the matched text.
48 * Index of the regular expression which matched.
54 * A set of regular expressions.
56 * This class is probably only useful for InlineTransformer.
62 * @param $regexps array A list of regular expressions. The
63 * regular expressions should not include any sub-pattern groups
64 * "(...)". (Anonymous groups, like "(?:...)", as well as
65 * look-ahead and look-behind assertions are fine.)
67 function RegexpSet ($regexps) {
68 $this->_regexps = $regexps;
72 * Search text for the next matching regexp from the Regexp Set.
74 * @param $text string The text to search.
76 * @return object A RegexpSet_match object, or false if no match.
78 function match ($text) {
79 return $this->_match($text, $this->_regexps, '*?');
83 * Search for next matching regexp.
85 * Here, 'next' has two meanings:
87 * Match the next regexp(s) in the set, at the same position as the last match.
89 * If that fails, match the whole RegexpSet, starting after the position of the
92 * @param $text string Text to search.
94 * @param $prevMatch A RegexpSet_match object
96 * $prevMatch should be a match object obtained by a previous
97 * match upon the same value of $text.
99 * @return object A RegexpSet_match object, or false if no match.
101 function nextMatch ($text, $prevMatch) {
102 // Try to find match at same position.
103 $pos = strlen($prevMatch->prematch);
104 $regexps = array_slice($this->_regexps, $prevMatch->regexp_ind + 1);
106 $repeat = sprintf('{%d}', $pos);
107 if ( ($match = $this->_match($text, $regexps, $repeat)) ) {
108 $match->regexp_ind += $prevMatch->regexp_ind + 1;
114 // Failed. Look for match after current position.
115 $repeat = sprintf('{%d,}?', $pos + 1);
116 return $this->_match($text, $this->_regexps, $repeat);
120 function _match ($text, $regexps, $repeat) {
121 $pat= "/ ( . $repeat ) ( (" . join(')|(', $regexps) . ") ) /Axs";
123 if (! preg_match($pat, $text, $m)) {
127 $match = new RegexpSet_match;
128 $match->postmatch = substr($text, strlen($m[0]));
129 $match->prematch = $m[1];
130 $match->match = $m[2];
131 $match->regexp_ind = count($m) - 4;
134 PrintXML(HTML::dl(HTML::dt("input"),
135 HTML::dd(HTML::pre($text)),
137 HTML::dd(HTML::pre($match->match)),
139 HTML::dd(HTML::pre($regexps[$match->regexp_ind])),
140 HTML::dt("prematch"),
141 HTML::dd(HTML::pre($match->prematch))));
150 * A simple markup rule (i.e. terminal token).
152 * These are defined by a regexp.
154 * When a match is found for the regexp, the matching text is replaced.
155 * The replacement content is obtained by calling the SimpleMarkup::markup method.
163 * @return string Regexp which matches this token.
165 function getMatchRegexp () {
166 return $this->_match_regexp;
169 /** Markup matching text.
171 * @param $match string The text which matched the regexp
172 * (obtained from getMatchRegexp).
174 * @return mixed The expansion of the matched text.
176 function markup ($match /*, $body */) {
177 trigger_error("pure virtual", E_USER_ERROR);
182 * A balanced markup rule.
184 * These are defined by a start regexp, and and end regexp.
190 /** Get the starting regexp for this rule.
192 * @return string The starting regexp.
194 function getStartRegexp () {
195 return $this->_start_regexp;
198 /** Get the ending regexp for this rule.
200 * @param $match string The text which matched the starting regexp.
202 * @return string The ending regexp.
204 function getEndRegexp ($match) {
205 return $this->_end_regexp;
208 /** Get expansion for matching input.
210 * @param $match string The text which matched the starting regexp.
212 * @param $body mixed Transformed text found between the starting
213 * and ending regexps.
215 * @return mixed The expansion of the matched text.
217 function markup ($match, $body) {
218 trigger_error("pure virtual", E_USER_ERROR);
222 class Markup_escape extends SimpleMarkup
224 function getMatchRegexp () {
225 return ESCAPE_CHAR . ".";
228 function markup ($match) {
233 class Markup_bracketlink extends SimpleMarkup
235 var $_match_regexp = "\\[ .*?\S.*? \\]";
237 function markup ($match) {
238 $link = LinkBracketLink($match);
239 assert($link->isInlineElement());
244 class Markup_url extends SimpleMarkup
246 function getMatchRegexp () {
247 global $AllowedProtocols;
248 return "(?<![[:alnum:]]) (?:$AllowedProtocols) : [^\s<>\"']+ (?<![ ,.?; \] \) ])";
251 function markup ($match) {
252 return LinkURL($match);
257 class Markup_interwiki extends SimpleMarkup
259 function getMatchRegexp () {
261 $map = InterWikiMap::GetMap($request);
262 return "(?<! [[:alnum:]])" . $map->getRegexp(). ": \S+ (?<![ ,.?; \] \) \" \' ])";
265 function markup ($match) {
267 $map = InterWikiMap::GetMap($request);
268 return $map->link($match);
272 class Markup_wikiword extends SimpleMarkup
274 function getMatchRegexp () {
275 global $WikiNameRegexp;
276 return " $WikiNameRegexp";
279 function markup ($match) {
280 return WikiLink($match, 'auto');
284 class Markup_linebreak extends SimpleMarkup
286 var $_match_regexp = "(?: (?<! %) %%% (?! %) | <br> )";
293 class Markup_old_emphasis extends BalancedMarkup
295 var $_start_regexp = "''|__";
297 function getEndRegexp ($match) {
301 function markup ($match, $body) {
302 $tag = $match == "''" ? 'em' : 'strong';
303 return new HtmlElement($tag, $body);
307 class Markup_nestled_emphasis extends BalancedMarkup
309 //var $_start_regexp = "(?<! [[:alnum:]] ) [*_=] (?=[[:alnum:]])";
310 var $_start_regexp = "(?<= \s | ^ ) [*_=] (?= \S)";
312 function getEndRegexp ($match) {
313 //return "(?<= [[:alnum:]]) \\$match (?![[:alnum:]])";
314 return "(?<= \S) \\$match (?= \s | $)";
317 function markup ($match, $body) {
319 case '*': return new HtmlElement('b', $body);
320 case '=': return new HtmlElement('tt', $body);
321 default: return new HtmlElement('i', $body);
326 class Markup_html_emphasis extends BalancedMarkup
328 var $_start_regexp = "<(?: b|big|i|small|tt|
329 abbr|acronym|cite|code|dfn|kbd|samp|strong|var|
332 function getEndRegexp ($match) {
333 return "<\\/" . substr($match, 1);
336 function markup ($match, $body) {
337 $tag = substr($match, 1, -1);
338 return new HtmlElement($tag, $body);
342 // FIXME: Do away with magic phpwiki forms. (Maybe phpwiki: links too?)
343 // FIXME: Do away with plugin-links. They seem not to be used.
347 class InlineTransformer
349 var $_regexps = array();
350 var $_markup = array();
352 function InlineTransformer () {
353 foreach (array('escape', 'bracketlink', 'url',
354 'interwiki', 'wikiword', 'linebreak',
355 'old_emphasis', 'nestled_emphasis',
356 'html_emphasis') as $mtype) {
357 $class = "Markup_$mtype";
358 $this->_addMarkup(new $class);
362 function _addMarkup ($markup) {
363 if (isa($markup, 'SimpleMarkup'))
364 $regexp = $markup->getMatchRegexp();
366 $regexp = $markup->getStartRegexp();
368 assert(!isset($this->_markup[$regexp]));
369 $this->_regexps[] = $regexp;
370 $this->_markup[] = $markup;
373 function parse (&$text, $end_re = '$') {
375 $regexps = $this->_regexps;
377 // $end_re takes precedence: "favor reduce over shift"
378 array_unshift($regexps, $end_re);
379 $regexps = new RegexpSet($regexps);
382 $output = new XmlContent;
384 $match = $regexps->match($input);
387 if ($match->regexp_ind == 0) {
388 // No start pattern found before end pattern.
390 $output->pushContent($match->prematch);
391 $text = $match->postmatch;
395 $markup = $this->_markup[$match->regexp_ind - 1];
396 $body = $this->_parse_markup_body($markup, $match->match, $match->postmatch);
398 // Couldn't match balanced expression.
399 // Ignore and look for next matching start regexp.
400 $match = $regexps->nextMatch($input, $match);
404 // Matched markup. Eat input, push output.
405 // FIXME: combine adjacent strings.
406 $input = $match->postmatch;
407 $output->pushContent($match->prematch,
408 $markup->markup($match->match, $body));
410 $match = $regexps->match($input);
413 // No pattern matched, not even the end pattern.
418 function _parse_markup_body ($markup, $match, &$text) {
419 if (isa($markup, 'SimpleMarkup'))
420 return true; // Done. SimpleMarkup is simple.
422 $end_regexp = $markup->getEndRegexp($match);
423 return $this->parse($text, $end_regexp);
427 function TransformInline($text) {
430 $trfm = new InlineTransformer;
431 return $trfm->parse($text);
434 // (c-file-style: "gnu")
439 // c-hanging-comment-ender-p: nil
440 // indent-tabs-mode: nil