1 <?php rcs_id('$Id: transform.php,v 1.8.2.1 2001-03-02 03:48:47 dairiki Exp $');
2 // expects $pagehash and $html to be set
4 function tokenize($str, $pattern, &$orig, &$ntokens) {
5 global $FieldSeparator;
6 // Find any strings in $str that match $pattern and
7 // store them in $orig, replacing them with tokens
8 // starting at number $ntokens - returns tokenized string
10 while (preg_match("/^(.*?)($pattern)/", $str, $matches)) {
11 $linktoken = $FieldSeparator . $FieldSeparator . ($ntokens++) . $FieldSeparator;
12 $new .= $matches[1] . $linktoken;
13 $orig[] = $matches[2];
14 $str = substr($str, strlen($matches[0]));
21 // Prepare replacements for references [\d+]
22 for ($i = 1; $i < (NUM_LINKS + 1); $i++) {
23 if (! empty($pagehash['refs'][$i])) {
24 if (preg_match("/($InlineImages)$/i", $pagehash['refs'][$i])) {
26 $embedded[$i] = LinkImage($pagehash['refs'][$i]);
29 $embedded[$i] = LinkURL($pagehash['refs'][$i], "[$i]");
35 // only call these once, for efficiency
36 $quick_search_box = RenderQuickSearch();
37 $full_search_box = RenderFullSearch();
38 $most_popular_list = RenderMostPopular();
41 // Loop over all lines of the page and apply transformation rules
42 $numlines = count($pagehash["content"]);
44 for ($index = 0; $index < $numlines; $index++) {
48 $replacements = array();
50 $tmpline = $pagehash['content'][$index];
52 if (!strlen($tmpline) || $tmpline == "\r") {
53 // this is a blank line, send <p>
54 $html .= SetHTMLOutputMode('', ZERO_LEVEL, 0);
58 /* If your web server is not accessble to the general public, you may
59 allow this code below, which allows embedded HTML. If just anyone can reach
60 your web server it is highly advised that you do not allow this.
62 elseif (preg_match("/(^\|)(.*)/", $tmpline, $matches)) {
64 $html .= SetHTMLOutputMode("", ZERO_LEVEL, 0);
71 //////////////////////////////////////////////////////////
72 // New linking scheme: links are in brackets. This will
73 // emulate typical HTML linking as well as Wiki linking.
75 // First need to protect [[.
77 $tmpline = tokenize($tmpline, '\[\[', $replacements, $ntokens);
78 while ($oldn < $ntokens)
79 $replacements[$oldn++] = '[';
81 // Now process the [\d+] links which are numeric references
83 $tmpline = tokenize($tmpline, '\[\s*\d+\s*\]', $replacements, $ntokens);
84 while ($oldn < $ntokens) {
85 $num = (int) substr($replacements[$oldn], 1);
86 if (! empty($embedded[$num]))
87 $replacements[$oldn] = $embedded[$num];
91 // match anything else between brackets
93 $tmpline = tokenize($tmpline, '\[.+?\]', $replacements, $ntokens);
94 while ($oldn < $ntokens) {
95 $link = ParseAndLink($replacements[$oldn]);
96 $replacements[$oldn] = $link['link'];
100 //////////////////////////////////////////////////////////
101 // replace all URL's with tokens, so we don't confuse them
102 // with Wiki words later. Wiki words in URL's break things.
103 // URLs preceeded by a '!' are not linked
105 $tmpline = tokenize($tmpline, "!?\b($AllowedProtocols):[^\s<>\[\]\"'()]*[^\s<>\[\]\"'(),.?]", $replacements, $ntokens);
106 while ($oldn < $ntokens) {
107 if($replacements[$oldn][0] == '!')
108 $replacements[$oldn] = substr($replacements[$oldn], 1);
110 $replacements[$oldn] = LinkURL($replacements[$oldn]);
114 //////////////////////////////////////////////////////////
116 // Wikiwords preceeded by a '!' are not linked
119 $tmpline = tokenize($tmpline, "!?$WikiNameRegexp", $replacements, $ntokens);
120 while ($oldn < $ntokens) {
121 $old = $replacements[$oldn];
122 if ($old[0] == '!') {
123 $replacements[$oldn] = substr($old,1);
124 } elseif (IsWikiPage($dbi, $old)) {
125 $replacements[$oldn] = LinkExistingWikiWord($old);
127 $replacements[$oldn] = LinkUnknownWikiWord($old);
133 //////////////////////////////////////////////////////////
134 // escape HTML metachars
135 $tmpline = str_replace('&', '&', $tmpline);
136 $tmpline = str_replace('>', '>', $tmpline);
137 $tmpline = str_replace('<', '<', $tmpline);
140 // %%% are linebreaks
141 $tmpline = str_replace('%%%', '<br>', $tmpline);
143 // bold italics (old way)
144 $tmpline = preg_replace("|(''''')(.*?)(''''')|",
145 "<strong><em>\\2</em></strong>", $tmpline);
148 $tmpline = preg_replace("|(''')(.*?)(''')|",
149 "<strong>\\2</strong>", $tmpline);
152 $tmpline = preg_replace("|(__)(.*?)(__)|",
153 "<strong>\\2</strong>", $tmpline);
156 $tmpline = preg_replace("|('')(.*?)('')|",
157 "<em>\\2</em>", $tmpline);
160 //////////////////////////////////////////////////////////
161 // unordered, ordered, and dictionary list (using TAB)
163 if (preg_match("/(^\t+)(.*?)(:\t)(.*$)/", $tmpline, $matches)) {
164 // this is a dictionary list (<dl>) item
165 $numtabs = strlen($matches[1]);
166 $html .= SetHTMLOutputMode('dl', NESTED_LEVEL, $numtabs);
168 if(trim($matches[2]))
169 $tmpline = '<dt>' . $matches[2];
170 $tmpline .= '<dd>' . $matches[4];
172 } elseif (preg_match("/(^\t+)(\*|\d+|#)/", $tmpline, $matches)) {
173 // this is part of a list (<ul>, <ol>)
174 $numtabs = strlen($matches[1]);
175 if ($matches[2] == '*') {
178 $listtag = 'ol'; // a rather tacit assumption. oh well.
180 $tmpline = preg_replace("/^(\t+)(\*|\d+|#)/", "", $tmpline);
181 $html .= SetHTMLOutputMode($listtag, NESTED_LEVEL, $numtabs);
185 //////////////////////////////////////////////////////////
186 // tabless markup for unordered, ordered, and dictionary lists
187 // ul/ol list types can be mixed, so we only look at the last
188 // character. Changes e.g. from "**#*" to "###*" go unnoticed.
189 // and wouldn't make a difference to the HTML layout anyway.
191 // unordered lists <UL>: "*"
192 } elseif (preg_match("/^([#*]*\*)[^#]/", $tmpline, $matches)) {
193 // this is part of an unordered list
194 $numtabs = strlen($matches[1]);
195 $tmpline = preg_replace("/^([#*]*\*)/", '', $tmpline);
196 $html .= SetHTMLOutputMode('ul', NESTED_LEVEL, $numtabs);
199 // ordered lists <OL>: "#"
200 } elseif (preg_match("/^([#*]*\#)/", $tmpline, $matches)) {
201 // this is part of an ordered list
202 $numtabs = strlen($matches[1]);
203 $tmpline = preg_replace("/^([#*]*\#)/", "", $tmpline);
204 $html .= SetHTMLOutputMode('ol', NESTED_LEVEL, $numtabs);
207 // definition lists <DL>: ";text:text"
208 } elseif (preg_match("/(^;+)(.*?):(.*$)/", $tmpline, $matches)) {
209 // this is a dictionary list item
210 $numtabs = strlen($matches[1]);
211 $html .= SetHTMLOutputMode('dl', NESTED_LEVEL, $numtabs);
213 if(trim($matches[2]))
214 $tmpline = '<dt>' . $matches[2];
215 $tmpline .= '<dd>' . $matches[3];
218 //////////////////////////////////////////////////////////
219 // remaining modes: preformatted text, headings, normal text
221 } elseif (preg_match("/^\s+/", $tmpline)) {
222 // this is preformatted text, i.e. <pre>
223 $html .= SetHTMLOutputMode('pre', ZERO_LEVEL, 0);
225 } elseif (preg_match("/^(!{1,3})[^!]/", $tmpline, $whichheading)) {
226 // lines starting with !,!!,!!! are headings
227 if($whichheading[1] == '!') $heading = 'h3';
228 elseif($whichheading[1] == '!!') $heading = 'h2';
229 elseif($whichheading[1] == '!!!') $heading = 'h1';
230 $tmpline = preg_replace("/^!+/", '', $tmpline);
231 $html .= SetHTMLOutputMode($heading, ZERO_LEVEL, 0);
233 } elseif (preg_match('/^-{4,}\s*(.*?)\s*$/', $tmpline, $matches)) {
234 // four or more dashes to <hr>
235 // <hr> can not be contained in a
236 $html .= SetHTMLOutputMode('', ZERO_LEVEL, 0) . "<hr>\n";
237 if ( ($tmpline = $matches[1]) != '' ) {
238 $html .= SetHTMLOutputMode('p', ZERO_LEVEL, 0);
241 // it's ordinary output if nothing else
242 $html .= SetHTMLOutputMode('p', ZERO_LEVEL, 0);
245 // These are still problems as far as generating correct HTML is
246 // concerned. Paragraph (<p>) elements are not allowed to contain
247 // other block-level elements (like <form>s).
248 $tmpline = str_replace('%%Search%%', $quick_search_box, $tmpline);
249 $tmpline = str_replace('%%Fullsearch%%', $full_search_box, $tmpline);
250 $tmpline = str_replace('%%Mostpopular%%', $most_popular_list, $tmpline);
251 if(defined('WIKI_ADMIN') && strstr($tmpline, '%%ADMIN-'))
252 $tmpline = ParseAdminTokens($tmpline);
255 ///////////////////////////////////////////////////////
258 for ($i = 0; $i < $ntokens; $i++)
259 $tmpline = str_replace($FieldSeparator.$FieldSeparator.$i.$FieldSeparator, $replacements[$i], $tmpline);
262 $html .= $tmpline . "\n";
265 $html .= SetHTMLOutputMode('', ZERO_LEVEL, 0);