1 <?php rcs_id('$Id: transform.php,v 1.8 2001-01-04 18:34:15 ahollosi Exp $');
2 // expects $pagehash and $html to be set
4 function tokenize($str, $pattern, &$orig, &$ntokens) {
5 global $FieldSeparator;
6 // Find any strings in $str that match $pattern and
7 // store them in $orig, replacing them with tokens
8 // starting at number $ntokens - returns tokenized string
10 while (preg_match("/^(.*?)($pattern)/", $str, $matches)) {
11 $linktoken = $FieldSeparator . $FieldSeparator . ($ntokens++) . $FieldSeparator;
12 $new .= $matches[1] . $linktoken;
13 $orig[] = $matches[2];
14 $str = substr($str, strlen($matches[0]));
21 // Prepare replacements for references [\d+]
22 for ($i = 1; $i < (NUM_LINKS + 1); $i++) {
23 if (! empty($pagehash['refs'][$i])) {
24 if (preg_match("/($InlineImages)$/i", $pagehash['refs'][$i])) {
26 $embedded[$i] = LinkImage($pagehash['refs'][$i]);
29 $embedded[$i] = LinkURL($pagehash['refs'][$i], "[$i]");
35 // only call these once, for efficiency
36 $quick_search_box = RenderQuickSearch();
37 $full_search_box = RenderFullSearch();
38 $most_popular_list = RenderMostPopular();
41 // Loop over all lines of the page and apply transformation rules
42 $numlines = count($pagehash["content"]);
44 for ($index = 0; $index < $numlines; $index++) {
48 $replacements = array();
50 $tmpline = $pagehash['content'][$index];
52 if (!strlen($tmpline) || $tmpline == "\r") {
53 // this is a blank line, send <p>
54 $html .= SetHTMLOutputMode('', ZERO_LEVEL, 0);
58 /* If your web server is not accessble to the general public, you may
59 allow this code below, which allows embedded HTML. If just anyone can reach
60 your web server it is highly advised that you do not allow this.
62 elseif (preg_match("/(^\|)(.*)/", $tmpline, $matches)) {
64 $html .= SetHTMLOutputMode("", ZERO_LEVEL, 0);
71 //////////////////////////////////////////////////////////
72 // New linking scheme: links are in brackets. This will
73 // emulate typical HTML linking as well as Wiki linking.
75 // First need to protect [[.
77 $tmpline = tokenize($tmpline, '\[\[', $replacements, $ntokens);
78 while ($oldn < $ntokens)
79 $replacements[$oldn++] = '[';
81 // Now process the [\d+] links which are numeric references
83 $tmpline = tokenize($tmpline, '\[\s*\d+\s*\]', $replacements, $ntokens);
84 while ($oldn < $ntokens) {
85 $num = (int) substr($replacements[$oldn], 1);
86 if (! empty($embedded[$num]))
87 $replacements[$oldn] = $embedded[$num];
91 // match anything else between brackets
93 $tmpline = tokenize($tmpline, '\[.+?\]', $replacements, $ntokens);
94 while ($oldn < $ntokens) {
95 $link = ParseAndLink($replacements[$oldn]);
96 $replacements[$oldn] = $link['link'];
100 //////////////////////////////////////////////////////////
101 // replace all URL's with tokens, so we don't confuse them
102 // with Wiki words later. Wiki words in URL's break things.
103 // URLs preceeded by a '!' are not linked
105 $tmpline = tokenize($tmpline, "!?\b($AllowedProtocols):[^\s<>\[\]\"'()]*[^\s<>\[\]\"'(),.?]", $replacements, $ntokens);
106 while ($oldn < $ntokens) {
107 if($replacements[$oldn][0] == '!')
108 $replacements[$oldn] = substr($replacements[$oldn], 1);
110 $replacements[$oldn] = LinkURL($replacements[$oldn]);
114 //////////////////////////////////////////////////////////
116 // Wikiwords preceeded by a '!' are not linked
119 $tmpline = tokenize($tmpline, "!?$WikiNameRegexp", $replacements, $ntokens);
120 while ($oldn < $ntokens) {
121 $old = $replacements[$oldn];
122 if ($old[0] == '!') {
123 $replacements[$oldn] = substr($old,1);
124 } elseif (IsWikiPage($dbi, $old)) {
125 $replacements[$oldn] = LinkExistingWikiWord($old);
127 $replacements[$oldn] = LinkUnknownWikiWord($old);
133 //////////////////////////////////////////////////////////
134 // escape HTML metachars
135 $tmpline = str_replace('&', '&', $tmpline);
136 $tmpline = str_replace('>', '>', $tmpline);
137 $tmpline = str_replace('<', '<', $tmpline);
139 // four or more dashes to <hr>
140 $tmpline = ereg_replace("^-{4,}", '<hr>', $tmpline);
142 // %%% are linebreaks
143 $tmpline = str_replace('%%%', '<br>', $tmpline);
145 // bold italics (old way)
146 $tmpline = preg_replace("|(''''')(.*?)(''''')|",
147 "<strong><em>\\2</em></strong>", $tmpline);
150 $tmpline = preg_replace("|(''')(.*?)(''')|",
151 "<strong>\\2</strong>", $tmpline);
154 $tmpline = preg_replace("|(__)(.*?)(__)|",
155 "<strong>\\2</strong>", $tmpline);
158 $tmpline = preg_replace("|('')(.*?)('')|",
159 "<em>\\2</em>", $tmpline);
162 //////////////////////////////////////////////////////////
163 // unordered, ordered, and dictionary list (using TAB)
165 if (preg_match("/(^\t+)(.*?)(:\t)(.*$)/", $tmpline, $matches)) {
166 // this is a dictionary list (<dl>) item
167 $numtabs = strlen($matches[1]);
168 $html .= SetHTMLOutputMode('dl', NESTED_LEVEL, $numtabs);
170 if(trim($matches[2]))
171 $tmpline = '<dt>' . $matches[2];
172 $tmpline .= '<dd>' . $matches[4];
174 } elseif (preg_match("/(^\t+)(\*|\d+|#)/", $tmpline, $matches)) {
175 // this is part of a list (<ul>, <ol>)
176 $numtabs = strlen($matches[1]);
177 if ($matches[2] == '*') {
180 $listtag = 'ol'; // a rather tacit assumption. oh well.
182 $tmpline = preg_replace("/^(\t+)(\*|\d+|#)/", "", $tmpline);
183 $html .= SetHTMLOutputMode($listtag, NESTED_LEVEL, $numtabs);
187 //////////////////////////////////////////////////////////
188 // tabless markup for unordered, ordered, and dictionary lists
189 // ul/ol list types can be mixed, so we only look at the last
190 // character. Changes e.g. from "**#*" to "###*" go unnoticed.
191 // and wouldn't make a difference to the HTML layout anyway.
193 // unordered lists <UL>: "*"
194 } elseif (preg_match("/^([#*]*\*)[^#]/", $tmpline, $matches)) {
195 // this is part of an unordered list
196 $numtabs = strlen($matches[1]);
197 $tmpline = preg_replace("/^([#*]*\*)/", '', $tmpline);
198 $html .= SetHTMLOutputMode('ul', NESTED_LEVEL, $numtabs);
201 // ordered lists <OL>: "#"
202 } elseif (preg_match("/^([#*]*\#)/", $tmpline, $matches)) {
203 // this is part of an ordered list
204 $numtabs = strlen($matches[1]);
205 $tmpline = preg_replace("/^([#*]*\#)/", "", $tmpline);
206 $html .= SetHTMLOutputMode('ol', NESTED_LEVEL, $numtabs);
209 // definition lists <DL>: ";text:text"
210 } elseif (preg_match("/(^;+)(.*?):(.*$)/", $tmpline, $matches)) {
211 // this is a dictionary list item
212 $numtabs = strlen($matches[1]);
213 $html .= SetHTMLOutputMode('dl', NESTED_LEVEL, $numtabs);
215 if(trim($matches[2]))
216 $tmpline = '<dt>' . $matches[2];
217 $tmpline .= '<dd>' . $matches[3];
220 //////////////////////////////////////////////////////////
221 // remaining modes: preformatted text, headings, normal text
223 } elseif (preg_match("/^\s+/", $tmpline)) {
224 // this is preformatted text, i.e. <pre>
225 $html .= SetHTMLOutputMode('pre', ZERO_LEVEL, 0);
227 } elseif (preg_match("/^(!{1,3})[^!]/", $tmpline, $whichheading)) {
228 // lines starting with !,!!,!!! are headings
229 if($whichheading[1] == '!') $heading = 'h3';
230 elseif($whichheading[1] == '!!') $heading = 'h2';
231 elseif($whichheading[1] == '!!!') $heading = 'h1';
232 $tmpline = preg_replace("/^!+/", '', $tmpline);
233 $html .= SetHTMLOutputMode($heading, ZERO_LEVEL, 0);
236 // it's ordinary output if nothing else
237 $html .= SetHTMLOutputMode('p', ZERO_LEVEL, 0);
240 $tmpline = str_replace('%%Search%%', $quick_search_box, $tmpline);
241 $tmpline = str_replace('%%Fullsearch%%', $full_search_box, $tmpline);
242 $tmpline = str_replace('%%Mostpopular%%', $most_popular_list, $tmpline);
243 if(defined('WIKI_ADMIN') && strstr($tmpline, '%%ADMIN-'))
244 $tmpline = ParseAdminTokens($tmpline);
247 ///////////////////////////////////////////////////////
250 for ($i = 0; $i < $ntokens; $i++)
251 $tmpline = str_replace($FieldSeparator.$FieldSeparator.$i.$FieldSeparator, $replacements[$i], $tmpline);
254 $html .= $tmpline . "\n";
257 $html .= SetHTMLOutputMode('', ZERO_LEVEL, 0);