1 <?php rcs_id('$Id: transform.php,v 1.8.2.5 2005-01-07 14:23:05 rurban Exp $');
2 // expects $pagehash and $html to be set
4 function tokenize($str, $pattern, &$orig, &$ntokens) {
5 global $FieldSeparator;
6 // Find any strings in $str that match $pattern and
7 // store them in $orig, replacing them with tokens
8 // starting at number $ntokens - returns tokenized string
10 while (preg_match("/^(.*?)($pattern)/", $str, $matches)) {
11 $linktoken = $FieldSeparator . $FieldSeparator . ($ntokens++) . $FieldSeparator;
12 $new .= $matches[1] . $linktoken;
13 $orig[] = $matches[2];
14 $str = substr($str, strlen($matches[0]));
21 // Prepare replacements for references [\d+]
22 for ($i = 1; $i < (NUM_LINKS + 1); $i++) {
23 if (! empty($pagehash['refs'][$i])) {
24 if (preg_match("/($InlineImages)$/i", $pagehash['refs'][$i])) {
26 $embedded[$i] = LinkImage($pagehash['refs'][$i]);
29 $embedded[$i] = LinkURL($pagehash['refs'][$i], "[$i]");
35 // Loop over all lines of the page and apply transformation rules
36 $numlines = count($pagehash["content"]);
38 for ($index = 0; $index < $numlines; $index++) {
42 $replacements = array();
44 $tmpline = $pagehash['content'][$index];
46 if (!strlen($tmpline) || $tmpline == "\r") {
47 // this is a blank line, send <p>
48 $html .= SetHTMLOutputMode('', ZERO_LEVEL, 0);
52 /* If your web server is not accessble to the general public, you may
53 allow this code below, which allows embedded HTML. If just anyone can reach
54 your web server it is highly advised that you do not allow this.
56 elseif (preg_match("/(^\|)(.*)/", $tmpline, $matches)) {
58 $html .= SetHTMLOutputMode("", ZERO_LEVEL, 0);
65 //////////////////////////////////////////////////////////
66 // New linking scheme: links are in brackets. This will
67 // emulate typical HTML linking as well as Wiki linking.
69 // First need to protect [[.
71 $tmpline = tokenize($tmpline, '\[\[', $replacements, $ntokens);
72 while ($oldn < $ntokens)
73 $replacements[$oldn++] = '[';
75 // Now process the [\d+] links which are numeric references
77 $tmpline = tokenize($tmpline, '\[\s*\d+\s*\]', $replacements, $ntokens);
78 while ($oldn < $ntokens) {
79 $num = (int) substr($replacements[$oldn], 1);
80 if (! empty($embedded[$num]))
81 $replacements[$oldn] = $embedded[$num];
85 // match anything else between brackets
87 $tmpline = tokenize($tmpline, '\[.+?\]', $replacements, $ntokens);
88 while ($oldn < $ntokens) {
89 $link = ParseAndLink($replacements[$oldn]);
90 $replacements[$oldn] = $link['link'];
94 //////////////////////////////////////////////////////////
95 // replace all URL's with tokens, so we don't confuse them
96 // with Wiki words later. Wiki words in URL's break things.
97 // URLs preceeded by a '!' are not linked
99 $tmpline = tokenize($tmpline, "!?\b($AllowedProtocols):[^\s<>\[\]\"'()]*[^\s<>\[\]\"'(),.?]", $replacements, $ntokens);
100 while ($oldn < $ntokens) {
101 if($replacements[$oldn][0] == '!')
102 $replacements[$oldn] = substr($replacements[$oldn], 1);
104 $replacements[$oldn] = LinkURL($replacements[$oldn]);
108 //////////////////////////////////////////////////////////
110 // Wikiwords preceeded by a '!' are not linked
113 $tmpline = tokenize($tmpline, "!?$WikiNameRegexp", $replacements, $ntokens);
114 while ($oldn < $ntokens) {
115 $old = $replacements[$oldn];
116 if ($old[0] == '!') {
117 $replacements[$oldn] = substr($old,1);
118 } elseif (IsWikiPage($dbi, $old)) {
119 $replacements[$oldn] = LinkExistingWikiWord($old);
121 $replacements[$oldn] = LinkUnknownWikiWord($old);
127 //////////////////////////////////////////////////////////
128 // escape HTML metachars
129 $tmpline = str_replace('&', '&', $tmpline);
130 $tmpline = str_replace('>', '>', $tmpline);
131 $tmpline = str_replace('<', '<', $tmpline);
134 // %%% are linebreaks
135 $tmpline = str_replace('%%%', '<br />', $tmpline);
137 // bold italics (old way)
138 $tmpline = preg_replace("|(''''')(.*?)(''''')|",
139 "<strong><em>\\2</em></strong>", $tmpline);
142 $tmpline = preg_replace("|(''')(.*?)(''')|",
143 "<strong>\\2</strong>", $tmpline);
146 $tmpline = preg_replace("|(__)(.*?)(__)|",
147 "<strong>\\2</strong>", $tmpline);
150 $tmpline = preg_replace("|('')(.*?)('')|",
151 "<em>\\2</em>", $tmpline);
154 //////////////////////////////////////////////////////////
155 // unordered, ordered, and dictionary list (using TAB)
157 if (preg_match("/(^\t+)(.*?)(:\t)(.*$)/", $tmpline, $matches)) {
158 // this is a dictionary list (<dl>) item
159 $numtabs = strlen($matches[1]);
160 $html .= SetHTMLOutputMode('dl', NESTED_LEVEL, $numtabs);
162 if(trim($matches[2]))
163 $tmpline = '<dt>' . $matches[2];
164 $tmpline .= '<dd>' . $matches[4];
166 } elseif (preg_match("/(^\t+)(\*|\d+|#)/", $tmpline, $matches)) {
167 // this is part of a list (<ul>, <ol>)
168 $numtabs = strlen($matches[1]);
169 if ($matches[2] == '*') {
172 $listtag = 'ol'; // a rather tacit assumption. oh well.
174 $tmpline = preg_replace("/^(\t+)(\*|\d+|#)/", "", $tmpline);
175 $html .= SetHTMLOutputMode($listtag, NESTED_LEVEL, $numtabs);
179 //////////////////////////////////////////////////////////
180 // tabless markup for unordered, ordered, and dictionary lists
181 // ul/ol list types can be mixed, so we only look at the last
182 // character. Changes e.g. from "**#*" to "###*" go unnoticed.
183 // and wouldn't make a difference to the HTML layout anyway.
185 // unordered lists <UL>: "*"
186 } elseif (preg_match("/^([#*]*\*)[^#]/", $tmpline, $matches)) {
187 // this is part of an unordered list
188 $numtabs = strlen($matches[1]);
189 $tmpline = preg_replace("/^([#*]*\*)/", '', $tmpline);
190 $html .= SetHTMLOutputMode('ul', NESTED_LEVEL, $numtabs);
193 // ordered lists <OL>: "#"
194 } elseif (preg_match("/^([#*]*\#)/", $tmpline, $matches)) {
195 // this is part of an ordered list
196 $numtabs = strlen($matches[1]);
197 $tmpline = preg_replace("/^([#*]*\#)/", "", $tmpline);
198 $html .= SetHTMLOutputMode('ol', NESTED_LEVEL, $numtabs);
201 // definition lists <DL>: ";text:text"
202 } elseif (preg_match("/(^;+)(.*?):(.*$)/", $tmpline, $matches)) {
203 // this is a dictionary list item
204 $numtabs = strlen($matches[1]);
205 $html .= SetHTMLOutputMode('dl', NESTED_LEVEL, $numtabs);
207 if(trim($matches[2]))
208 $tmpline = '<dt>' . $matches[2];
209 $tmpline .= '<dd>' . $matches[3];
212 //////////////////////////////////////////////////////////
213 // remaining modes: preformatted text, headings, normal text
215 } elseif (preg_match("/^\s+/", $tmpline)) {
216 // this is preformatted text, i.e. <pre>
217 $html .= SetHTMLOutputMode('pre', ZERO_LEVEL, 0);
219 } elseif (preg_match("/^(!{1,3})[^!]/", $tmpline, $whichheading)) {
220 // lines starting with !,!!,!!! are headings
221 if($whichheading[1] == '!') $heading = 'h3';
222 elseif($whichheading[1] == '!!') $heading = 'h2';
223 elseif($whichheading[1] == '!!!') $heading = 'h1';
224 $tmpline = preg_replace("/^!+/", '', $tmpline);
225 $html .= SetHTMLOutputMode($heading, ZERO_LEVEL, 0);
227 } elseif (preg_match('/^-{4,}\s*(.*?)\s*$/', $tmpline, $matches)) {
228 // four or more dashes to <hr>
229 // <hr> can not be contained in a
230 $html .= SetHTMLOutputMode('', ZERO_LEVEL, 0) . "<hr>\n";
231 if ( ($tmpline = $matches[1]) != '' ) {
232 $html .= SetHTMLOutputMode('p', ZERO_LEVEL, 0);
235 // it's ordinary output if nothing else
236 $html .= SetHTMLOutputMode('p', ZERO_LEVEL, 0);
239 // These are still problems as far as generating correct HTML is
240 // concerned. Paragraph (<p>) elements are not allowed to contain
241 // other block-level elements (like <form>s).
242 if (strstr($tmpline, '%%Search%%'))
243 $tmpline = str_replace('%%Search%%', RenderQuickSearch(), $tmpline);
244 if (strstr($tmpline, '%%Fullsearch%%'))
245 $tmpline = str_replace('%%Fullsearch%%', RenderFullSearch(), $tmpline);
246 if (strstr($tmpline, '%%Mostpopular%%'))
247 $tmpline = str_replace('%%Mostpopular%%', RenderMostPopular(), $tmpline);
248 if(defined('WIKI_ADMIN') && strstr($tmpline, '%%ADMIN-'))
249 $tmpline = ParseAdminTokens($tmpline);
252 ///////////////////////////////////////////////////////
255 for ($i = 0; $i < $ntokens; $i++)
256 $tmpline = str_replace($FieldSeparator.$FieldSeparator.$i.$FieldSeparator, $replacements[$i], $tmpline);
259 $html .= $tmpline . "\n";
262 $html .= SetHTMLOutputMode('', ZERO_LEVEL, 0);