]> CyberLeo.Net >> Repos - Github/sugarcrm.git/blob - include/phpmailer/extras/htmlfilter.php
Release 6.5.3
[Github/sugarcrm.git] / include / phpmailer / extras / htmlfilter.php
1 <?php
2 /**
3  * htmlfilter.inc
4  * ---------------
5  * This set of functions allows you to filter html in order to remove
6  * any malicious tags from it. Useful in cases when you need to filter
7  * user input for any cross-site-scripting attempts.
8  *
9  * Copyright (C) 2002-2004 by Duke University
10  *
11  * This library is free software; you can redistribute it and/or
12  * modify it under the terms of the GNU Lesser General Public
13  * License as published by the Free Software Foundation; either
14  * version 2.1 of the License, or (at your option) any later version.
15  *
16  * This library is distributed in the hope that it will be useful,
17  * but WITHOUT ANY WARRANTY; without even the implied warranty of
18  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
19  * Lesser General Public License for more details.
20  *
21  * You should have received a copy of the GNU Lesser General Public
22  * License along with this library; if not, write to the Free Software
23  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  
24  * 02110-1301  USA
25  *
26  * @Author      Konstantin Riabitsev <icon@linux.duke.edu>
27  * @Version 1.1 ($Date: 2011-07-04 14:02:23 -0400 (Mon, 04 Jul 2011) $)
28  */
29
30 /**
31  * @Author  Jim Jagielski <jim@jaguNET.com / jimjag@gmail.com>
32  */
33
34 /**
35  * This function returns the final tag out of the tag name, an array
36  * of attributes, and the type of the tag. This function is called by 
37  * tln_sanitize internally.
38  *
39  * @param  $tagname      the name of the tag.
40  * @param  $attary       the array of attributes and their values
41  * @param  $tagtype      The type of the tag (see in comments).
42  * @return                       a string with the final tag representation.
43  */
44 function tln_tagprint($tagname, $attary, $tagtype){
45         $me = 'tln_tagprint';
46         if ($tagtype == 2){
47                 $fulltag = '</' . $tagname . '>';
48         } else {
49                 $fulltag = '<' . $tagname;
50                 if (is_array($attary) && sizeof($attary)){
51                         $atts = Array();
52                         while (list($attname, $attvalue) = each($attary)){
53                                 array_push($atts, "$attname=$attvalue");
54                         }
55                         $fulltag .= ' ' . join(' ', $atts);
56                 }
57                 if ($tagtype == 3){
58                         $fulltag .= ' /';
59                 }
60                 $fulltag .= '>';
61         }
62         return $fulltag;
63 }
64
65 /**
66  * A small helper function to use with array_walk. Modifies a by-ref
67  * value and makes it lowercase.
68  *
69  * @param  $val a value passed by-ref.
70  * @return              void since it modifies a by-ref value.
71  */
72 function tln_casenormalize(&$val){
73         $val = strtolower($val);
74 }
75
76 /**
77  * This function skips any whitespace from the current position within
78  * a string and to the next non-whitespace value.
79  * 
80  * @param  $body   the string
81  * @param  $offset the offset within the string where we should start
82  *                                 looking for the next non-whitespace character.
83  * @return                 the location within the $body where the next
84  *                                 non-whitespace char is located.
85  */
86 function tln_skipspace($body, $offset){
87         $me = 'tln_skipspace';
88         preg_match('/^(\s*)/s', substr($body, $offset), $matches);
89         if (sizeof($matches[1])){
90                 $count = strlen($matches[1]);
91                 $offset += $count;
92         }
93         return $offset;
94 }
95
96 /**
97  * This function looks for the next character within a string.  It's
98  * really just a glorified "strpos", except it catches the failures
99  * nicely.
100  *
101  * @param  $body   The string to look for needle in.
102  * @param  $offset Start looking from this position.
103  * @param  $needle The character/string to look for.
104  * @return                 location of the next occurance of the needle, or
105  *                                 strlen($body) if needle wasn't found.
106  */
107 function tln_findnxstr($body, $offset, $needle){
108         $me = 'tln_findnxstr';
109         $pos = strpos($body, $needle, $offset);
110         if ($pos === FALSE){
111                 $pos = strlen($body);
112         }
113         return $pos;
114 }
115
116 /**
117  * This function takes a PCRE-style regexp and tries to match it
118  * within the string.
119  *
120  * @param  $body   The string to look for needle in.
121  * @param  $offset Start looking from here.
122  * @param  $reg    A PCRE-style regex to match.
123  * @return                 Returns a false if no matches found, or an array
124  *                                 with the following members:
125  *                                 - integer with the location of the match within $body
126  *                                 - string with whatever content between offset and the match
127  *                                 - string with whatever it is we matched
128  */
129 function tln_findnxreg($body, $offset, $reg){
130         $me = 'tln_findnxreg';
131         $matches = Array();
132         $retarr = Array();
133         $preg_rule = '%^(.*?)(' . $reg . ')%s';
134         preg_match($preg_rule, substr($body, $offset), $matches);
135         if (!isset($matches[0])){
136                 $retarr = false;
137         } else {
138                 $retarr[0] = $offset + strlen($matches[1]);
139                 $retarr[1] = $matches[1];
140                 $retarr[2] = $matches[2];
141         }
142         return $retarr;
143 }
144
145 /**
146  * This function looks for the next tag.
147  *
148  * @param  $body   String where to look for the next tag.
149  * @param  $offset Start looking from here.
150  * @return                 false if no more tags exist in the body, or
151  *                                 an array with the following members:
152  *                                 - string with the name of the tag
153  *                                 - array with attributes and their values
154  *                                 - integer with tag type (1, 2, or 3)
155  *                                 - integer where the tag starts (starting "<")
156  *                                 - integer where the tag ends (ending ">")
157  *                                 first three members will be false, if the tag is invalid.
158  */
159 function tln_getnxtag($body, $offset){
160         $me = 'tln_getnxtag';
161         if ($offset > strlen($body)){
162                 return false;
163         }
164         $lt = tln_findnxstr($body, $offset, '<');
165         if ($lt == strlen($body)){
166                 return false;
167         }
168         /**
169          * We are here:
170          * blah blah <tag attribute="value">
171          * \---------^
172          */
173         $pos = tln_skipspace($body, $lt + 1);
174         if ($pos >= strlen($body)){
175                 return Array(false, false, false, $lt, strlen($body));
176         }
177         /**
178          * There are 3 kinds of tags:
179          * 1. Opening tag, e.g.:
180          *        <a href="blah">
181          * 2. Closing tag, e.g.:
182          *        </a>
183          * 3. XHTML-style content-less tag, e.g.:
184          *        <img src="blah"/>
185          */
186         $tagtype = false;
187         switch (substr($body, $pos, 1)){
188         case '/':
189                 $tagtype = 2;
190                 $pos++;
191                 break;
192         case '!':
193                 /**
194                  * A comment or an SGML declaration.
195                  */
196                 if (substr($body, $pos+1, 2) == '--'){
197                         $gt = strpos($body, '-->', $pos);
198                         if ($gt === false){
199                                 $gt = strlen($body);
200                         } else {
201                                 $gt += 2;
202                         }
203                         return Array(false, false, false, $lt, $gt);
204                 } else {
205                         $gt = tln_findnxstr($body, $pos, '>');
206                         return Array(false, false, false, $lt, $gt);
207                 }
208                 break;
209         default:
210                 /**
211                  * Assume tagtype 1 for now. If it's type 3, we'll switch values
212                  * later.
213                  */
214                 $tagtype = 1;
215                 break;
216         }
217         
218         $tag_start = $pos;
219         $tagname = '';
220         /**
221          * Look for next [\W-_], which will indicate the end of the tag name.
222          */
223         $regary = tln_findnxreg($body, $pos, '[^\w\-_]');
224         if ($regary == false){
225                 return Array(false, false, false, $lt, strlen($body));
226         }
227         list($pos, $tagname, $match) = $regary;
228         $tagname = strtolower($tagname);
229         
230         /**
231          * $match can be either of these:
232          * '>'  indicating the end of the tag entirely.
233          * '\s' indicating the end of the tag name.
234          * '/'  indicating that this is type-3 xhtml tag.
235          * 
236          * Whatever else we find there indicates an invalid tag.
237          */
238         switch ($match){
239         case '/':
240                 /**
241                  * This is an xhtml-style tag with a closing / at the
242                  * end, like so: <img src="blah"/>. Check if it's followed
243                  * by the closing bracket. If not, then this tag is invalid
244                  */
245                 if (substr($body, $pos, 2) == '/>'){
246                         $pos++;
247                         $tagtype = 3;
248                 } else {
249                         $gt = tln_findnxstr($body, $pos, '>');
250                         $retary = Array(false, false, false, $lt, $gt);
251                         return $retary;
252                 }
253         case '>':
254                 return Array($tagname, false, $tagtype, $lt, $pos);
255                 break;
256         default:
257                 /**
258                  * Check if it's whitespace
259                  */
260                 if (preg_match('/\s/', $match)){
261                 } else {
262                         /**
263                          * This is an invalid tag! Look for the next closing ">".
264                          */
265                         $gt = tln_findnxstr($body, $lt, '>');
266                         return Array(false, false, false, $lt, $gt);
267                 }
268         }
269         
270         /**
271          * At this point we're here:
272          * <tagname      attribute='blah'>
273          * \-------^
274          *
275          * At this point we loop in order to find all attributes.
276          */
277         $attname = '';
278         $atttype = false;
279         $attary = Array();
280         
281         while ($pos <= strlen($body)){
282                 $pos = tln_skipspace($body, $pos);
283                 if ($pos == strlen($body)){
284                         /**
285                          * Non-closed tag.
286                          */
287                         return Array(false, false, false, $lt, $pos);
288                 }
289                 /**
290                  * See if we arrived at a ">" or "/>", which means that we reached
291                  * the end of the tag.
292                  */
293                 $matches = Array();
294                 preg_match('%^(\s*)(>|/>)%s', substr($body, $pos), $matches);
295                 if (isset($matches[0]) && $matches[0]){
296                         /**
297                          * Yep. So we did.
298                          */
299                         $pos += strlen($matches[1]);
300                         if ($matches[2] == '/>'){
301                                 $tagtype = 3;
302                                 $pos++;
303                         }
304                         return Array($tagname, $attary, $tagtype, $lt, $pos);
305                 }
306                 
307                 /**
308                  * There are several types of attributes, with optional
309                  * [:space:] between members.
310                  * Type 1:
311                  *       attrname[:space:]=[:space:]'CDATA'
312                  * Type 2:
313                  *       attrname[:space:]=[:space:]"CDATA"
314                  * Type 3:
315                  *       attr[:space:]=[:space:]CDATA
316                  * Type 4:
317                  *       attrname
318                  *
319                  * We leave types 1 and 2 the same, type 3 we check for
320                  * '"' and convert to "&quot" if needed, then wrap in
321                  * double quotes. Type 4 we convert into:
322                  * attrname="yes".
323                  */
324                 $regary = tln_findnxreg($body, $pos, '[^\w\-_]');
325                 if ($regary == false){
326                         /**
327                          * Looks like body ended before the end of tag.
328                          */
329                         return Array(false, false, false, $lt, strlen($body));
330                 }
331                 list($pos, $attname, $match) = $regary;
332                 $attname = strtolower($attname);
333                 /**
334                  * We arrived at the end of attribute name. Several things possible
335                  * here:
336                  * '>'  means the end of the tag and this is attribute type 4
337                  * '/'  if followed by '>' means the same thing as above
338                  * '\s' means a lot of things -- look what it's followed by.
339                  *              anything else means the attribute is invalid.
340                  */
341                 switch($match){
342                 case '/':
343                         /**
344                          * This is an xhtml-style tag with a closing / at the
345                          * end, like so: <img src="blah"/>. Check if it's followed
346                          * by the closing bracket. If not, then this tag is invalid
347                          */
348                         if (substr($body, $pos, 2) == '/>'){
349                                 $pos++;
350                                 $tagtype = 3;
351                         } else {
352                                 $gt = tln_findnxstr($body, $pos, '>');
353                                 $retary = Array(false, false, false, $lt, $gt);
354                                 return $retary;
355                         }
356                 case '>':
357                         $attary{$attname} = '"yes"';
358                         return Array($tagname, $attary, $tagtype, $lt, $pos);
359                         break;
360                 default:
361                         /**
362                          * Skip whitespace and see what we arrive at.
363                          */
364                         $pos = tln_skipspace($body, $pos);
365                         $char = substr($body, $pos, 1);
366                         /**
367                          * Two things are valid here:
368                          * '=' means this is attribute type 1 2 or 3.
369                          * \w means this was attribute type 4.
370                          * anything else we ignore and re-loop. End of tag and
371                          * invalid stuff will be caught by our checks at the beginning
372                          * of the loop.
373                          */
374                         if ($char == '='){
375                                 $pos++;
376                                 $pos = tln_skipspace($body, $pos);
377                                 /**
378                                  * Here are 3 possibilities:
379                                  * "'"  attribute type 1
380                                  * '"'  attribute type 2
381                                  * everything else is the content of tag type 3
382                                  */
383                                 $quot = substr($body, $pos, 1);
384                                 if ($quot == '\''){
385                                         $regary = tln_findnxreg($body, $pos+1, '\'');
386                                         if ($regary == false){
387                                                 return Array(false, false, false, $lt, strlen($body));
388                                         }
389                                         list($pos, $attval, $match) = $regary;
390                                         $pos++;
391                                         $attary{$attname} = '\'' . $attval . '\'';
392                                 } else if ($quot == '"'){
393                                         $regary = tln_findnxreg($body, $pos+1, '\"');
394                                         if ($regary == false){
395                                                 return Array(false, false, false, $lt, strlen($body));
396                                         }
397                                         list($pos, $attval, $match) = $regary;
398                                         $pos++;
399                                         $attary{$attname} = '"' . $attval . '"';
400                                 } else {
401                                         /**
402                                          * These are hateful. Look for \s, or >.
403                                          */
404                                         $regary = tln_findnxreg($body, $pos, '[\s>]');
405                                         if ($regary == false){
406                                                 return Array(false, false, false, $lt, strlen($body));
407                                         }
408                                         list($pos, $attval, $match) = $regary;
409                                         /**
410                                          * If it's ">" it will be caught at the top.
411                                          */
412                                         $attval = preg_replace('/\"/s', '&quot;', $attval);
413                                         $attary{$attname} = '"' . $attval . '"';
414                                 }
415                         } else if (preg_match('|[\w/>]|', $char)) {
416                                 /**
417                                  * That was attribute type 4.
418                                  */
419                                 $attary{$attname} = '"yes"';
420                         } else {
421                                 /**
422                                  * An illegal character. Find next '>' and return.
423                                  */
424                                 $gt = tln_findnxstr($body, $pos, '>');
425                                 return Array(false, false, false, $lt, $gt);
426                         }
427                 }
428         }
429         /**
430          * The fact that we got here indicates that the tag end was never
431          * found. Return invalid tag indication so it gets stripped.
432          */
433         return Array(false, false, false, $lt, strlen($body));
434 }
435
436 /**
437  * Translates entities into literal values so they can be checked.
438  *
439  * @param $attvalue the by-ref value to check.
440  * @param $regex        the regular expression to check against.
441  * @param $hex          whether the entites are hexadecimal.
442  * @return                      True or False depending on whether there were matches.
443  */
444 function tln_deent(&$attvalue, $regex, $hex=false){
445         $me = 'tln_deent';
446         $ret_match = false;
447         preg_match_all($regex, $attvalue, $matches);
448         if (is_array($matches) && sizeof($matches[0]) > 0){
449                 $repl = Array();
450                 for ($i = 0; $i < sizeof($matches[0]); $i++){
451                         $numval = $matches[1][$i];
452                         if ($hex){
453                                 $numval = hexdec($numval);
454                         }
455                         $repl{$matches[0][$i]} = chr($numval);
456                 }
457                 $attvalue = strtr($attvalue, $repl);
458                 return true;
459         } else {
460                 return false;
461         }
462 }
463
464 /**
465  * This function checks attribute values for entity-encoded values
466  * and returns them translated into 8-bit strings so we can run
467  * checks on them.
468  *
469  * @param  $attvalue A string to run entity check against.
470  * @return                       Nothing, modifies a reference value.
471  */
472 function tln_defang(&$attvalue){
473         $me = 'tln_defang';
474         /**
475          * Skip this if there aren't ampersands or backslashes.
476          */
477         if (strpos($attvalue, '&') === false
478                 && strpos($attvalue, '\\') === false){
479                 return;
480         }
481         $m = false;
482         do {
483                 $m = false;
484                 $m = $m || tln_deent($attvalue, '/\&#0*(\d+);*/s');
485                 $m = $m || tln_deent($attvalue, '/\&#x0*((\d|[a-f])+);*/si', true);
486                 $m = $m || tln_deent($attvalue, '/\\\\(\d+)/s', true);
487         } while ($m == true);
488         $attvalue = stripslashes($attvalue);
489 }
490
491 /**
492  * Kill any tabs, newlines, or carriage returns. Our friends the
493  * makers of the browser with 95% market value decided that it'd
494  * be funny to make "java[tab]script" be just as good as "javascript".
495  * 
496  * @param  attvalue      The attribute value before extraneous spaces removed.
497  * @return attvalue      Nothing, modifies a reference value.
498  */
499 function tln_unspace(&$attvalue){
500         $me = 'tln_unspace';
501         if (strcspn($attvalue, "\t\r\n\0 ") != strlen($attvalue)){
502                 $attvalue = str_replace(Array("\t", "\r", "\n", "\0", " "), 
503                                                                 Array('',       '',       '',   '',       ''), $attvalue);
504         }
505 }
506
507 /**
508  * This function runs various checks against the attributes.
509  *
510  * @param  $tagname                     String with the name of the tag.
511  * @param  $attary                      Array with all tag attributes.
512  * @param  $rm_attnames         See description for tln_sanitize
513  * @param  $bad_attvals         See description for tln_sanitize
514  * @param  $add_attr_to_tag See description for tln_sanitize
515  * @return                                      Array with modified attributes.
516  */
517 function tln_fixatts($tagname, 
518                                  $attary, 
519                                  $rm_attnames,
520                                  $bad_attvals,
521                                  $add_attr_to_tag
522                                  ){
523         $me = 'tln_fixatts';
524         while (list($attname, $attvalue) = each($attary)){
525                 /**
526                  * See if this attribute should be removed.
527                  */
528                 foreach ($rm_attnames as $matchtag=>$matchattrs){
529                         if (preg_match($matchtag, $tagname)){
530                                 foreach ($matchattrs as $matchattr){
531                                         if (preg_match($matchattr, $attname)){
532                                                 unset($attary{$attname});
533                                                 continue;
534                                         }
535                                 }
536                         }
537                 }
538                 /**
539                  * Remove any backslashes, entities, or extraneous whitespace.
540                  */
541                 tln_defang($attvalue);
542                 tln_unspace($attvalue);
543                 
544                 /**
545                  * Now let's run checks on the attvalues.
546                  * I don't expect anyone to comprehend this. If you do,
547                  * get in touch with me so I can drive to where you live and
548                  * shake your hand personally. :)
549                  */
550                 foreach ($bad_attvals as $matchtag=>$matchattrs){
551                         if (preg_match($matchtag, $tagname)){
552                                 foreach ($matchattrs as $matchattr=>$valary){
553                                         if (preg_match($matchattr, $attname)){
554                                                 /**
555                                                  * There are two arrays in valary.
556                                                  * First is matches.
557                                                  * Second one is replacements
558                                                  */
559                                                 list($valmatch, $valrepl) = $valary;
560                                                 $newvalue = preg_replace($valmatch,$valrepl,$attvalue);
561                                                 if ($newvalue != $attvalue){
562                                                         $attary{$attname} = $newvalue;
563                                                 }
564                                         }
565                                 }
566                         }
567                 }
568         }
569         /**
570          * See if we need to append any attributes to this tag.
571          */
572         foreach ($add_attr_to_tag as $matchtag=>$addattary){
573                 if (preg_match($matchtag, $tagname)){
574                         $attary = array_merge($attary, $addattary);
575                 }
576         }
577         return $attary;
578 }
579
580 /**
581  *
582  * @param $body                                 the string with HTML you wish to filter
583  * @param $tag_list                             see description above
584  * @param $rm_tags_with_content see description above
585  * @param $self_closing_tags    see description above
586  * @param $force_tag_closing    see description above
587  * @param $rm_attnames                  see description above
588  * @param $bad_attvals                  see description above
589  * @param $add_attr_to_tag              see description above
590  * @return                                              tln_sanitized html safe to show on your pages.
591  */
592 function tln_sanitize($body, 
593                                   $tag_list, 
594                                   $rm_tags_with_content,
595                                   $self_closing_tags,
596                                   $force_tag_closing,
597                                   $rm_attnames,
598                                   $bad_attvals,
599                                   $add_attr_to_tag
600                                   )
601 {
602         $me = 'tln_sanitize';
603         /**
604          * Normalize rm_tags and rm_tags_with_content.
605          */
606         $rm_tags = array_shift($tag_list);
607         @array_walk($tag_list, 'tln_casenormalize');
608         @array_walk($rm_tags_with_content, 'tln_casenormalize');
609         @array_walk($self_closing_tags, 'tln_casenormalize');
610         /**
611          * See if tag_list is of tags to remove or tags to allow.
612          * false  means remove these tags
613          * true   means allow these tags
614          */
615         $curpos = 0;
616         $open_tags = Array();
617         $trusted = "<!-- begin tln_sanitized html -->\n";
618         $skip_content = false;
619         /**
620          * Take care of netscape's stupid javascript entities like
621          * &{alert('boo')};
622          */
623         $body = preg_replace('/&(\{.*?\};)/si', '&amp;\\1', $body);
624         while (($curtag = tln_getnxtag($body, $curpos)) != FALSE){
625                 list($tagname, $attary, $tagtype, $lt, $gt) = $curtag;
626                 $free_content = substr($body, $curpos, $lt - $curpos);
627                 if ($skip_content == false){
628                         $trusted .= $free_content;
629                 } else {
630                 }
631                 if ($tagname != FALSE){
632                         if ($tagtype == 2){
633                                 if ($skip_content == $tagname){
634                                         /**
635                                          * Got to the end of tag we needed to remove.
636                                          */
637                                         $tagname = false;
638                                         $skip_content = false;
639                                 } else {
640                                         if ($skip_content == false){
641                                                 if (isset($open_tags{$tagname}) && 
642                                                         $open_tags{$tagname} > 0){
643                                                         $open_tags{$tagname}--;
644                                                 } else {
645                                                         $tagname = false;
646                                                 }
647                                         } else {
648                                         }
649                                 }
650                         } else {
651                                 /**
652                                  * $rm_tags_with_content
653                                  */
654                                 if ($skip_content == false){
655                                         /**
656                                          * See if this is a self-closing type and change
657                                          * tagtype appropriately.
658                                          */
659                                         if ($tagtype == 1
660                                                 && in_array($tagname, $self_closing_tags)){
661                                                 $tagtype = 3;
662                                         }
663                                         /**
664                                          * See if we should skip this tag and any content
665                                          * inside it.
666                                          */
667                                         if ($tagtype == 1 
668                                                 && in_array($tagname, $rm_tags_with_content)){
669                                                 $skip_content = $tagname;
670                                         } else {
671                                                 if (($rm_tags == false 
672                                                          && in_array($tagname, $tag_list)) ||
673                                                         ($rm_tags == true 
674                                                          && !in_array($tagname, $tag_list))){
675                                                         $tagname = false;
676                                                 } else {
677                                                         if ($tagtype == 1){
678                                                                 if (isset($open_tags{$tagname})){
679                                                                         $open_tags{$tagname}++;
680                                                                 } else {
681                                                                         $open_tags{$tagname} = 1;
682                                                                 }
683                                                         }
684                                                         /**
685                                                          * This is where we run other checks.
686                                                          */
687                                                         if (is_array($attary) && sizeof($attary) > 0){
688                                                                 $attary = tln_fixatts($tagname,
689                                                                                                   $attary,
690                                                                                                   $rm_attnames,
691                                                                                                   $bad_attvals,
692                                                                                                   $add_attr_to_tag);
693                                                         }
694                                                 }
695                                         }
696                                 } else {
697                                 }
698                         }
699                         if ($tagname != false && $skip_content == false){
700                                 $trusted .= tln_tagprint($tagname, $attary, $tagtype);
701                         }
702                 } else {
703                 }
704                 $curpos = $gt + 1;
705         }
706         $trusted .= substr($body, $curpos, strlen($body) - $curpos);
707         if ($force_tag_closing == true){
708                 foreach ($open_tags as $tagname=>$opentimes){
709                         while ($opentimes > 0){
710                                 $trusted .= '</' . $tagname . '>';
711                                 $opentimes--;
712                         }
713                 }
714                 $trusted .= "\n";
715         }
716         $trusted .= "<!-- end tln_sanitized html -->\n";
717         return $trusted;
718 }
719
720 // 
721 // Use the nifty htmlfilter library
722 //
723
724
725 function HTMLFilter($body, $trans_image_path, $block_external_images = false) {
726
727         $tag_list = Array(
728                 false,
729                 "object",
730                 "meta",
731                 "html",
732                 "head",
733                 "base",
734                 "link",
735                 "frame",
736                 "iframe",
737                 "plaintext",
738                 "marquee"
739         );
740
741         $rm_tags_with_content = Array(
742                 "script",
743                 "applet",
744                 "embed",
745                 "title",
746                 "frameset",
747                 "xmp",
748                 "xml"
749         );
750
751         $self_closing_tags =  Array(
752                 "img",
753                 "br",
754                 "hr",
755                 "input",
756                 "outbind"
757         );
758
759         $force_tag_closing = true;
760
761         $rm_attnames = Array(
762                 "/.*/" =>
763                         Array(
764                                 // "/target/i",
765                                 "/^on.*/i",
766                                 "/^dynsrc/i",
767                                 "/^data.*/i",
768                                 "/^lowsrc.*/i"
769                         )
770         );
771
772         $bad_attvals = Array(
773                 "/.*/" =>
774                 Array(
775                         "/^src|background/i" =>
776                         Array(
777                                 Array(
778                                         "/^([\'\"])\s*\S+script\s*:.*([\'\"])/si",
779                                         "/^([\'\"])\s*mocha\s*:*.*([\'\"])/si",
780                                         "/^([\'\"])\s*about\s*:.*([\'\"])/si"
781                                 ),
782                                 Array(
783                                         "\\1$trans_image_path\\2",
784                                         "\\1$trans_image_path\\2",
785                                         "\\1$trans_image_path\\2",
786                                         "\\1$trans_image_path\\2"
787                                 )
788                         ),
789                         "/^href|action/i" =>
790                         Array(
791                                 Array(
792                                         "/^([\'\"])\s*\S+script\s*:.*([\'\"])/si",
793                                         "/^([\'\"])\s*mocha\s*:*.*([\'\"])/si",
794                                         "/^([\'\"])\s*about\s*:.*([\'\"])/si"
795                                 ),
796                                 Array(
797                                         "\\1#\\1",
798                                         "\\1#\\1",
799                                         "\\1#\\1",
800                                         "\\1#\\1"
801                                 )
802                         ),
803                         "/^style/i" =>
804                         Array(
805                                 Array(
806                                         "/expression/i",
807                                         "/binding/i",
808                                         "/behaviou*r/i",
809                                         "/include-source/i",
810                                         "/position\s*:\s*absolute/i",
811                                         "/url\s*\(\s*([\'\"])\s*\S+script\s*:.*([\'\"])\s*\)/si",
812                                         "/url\s*\(\s*([\'\"])\s*mocha\s*:.*([\'\"])\s*\)/si",
813                                         "/url\s*\(\s*([\'\"])\s*about\s*:.*([\'\"])\s*\)/si",
814                                         "/(.*)\s*:\s*url\s*\(\s*([\'\"]*)\s*\S+script\s*:.*([\'\"]*)\s*\)/si"
815                                 ),
816                                 Array(
817                                         "idiocy",
818                                         "idiocy",
819                                         "idiocy",
820                                         "idiocy",
821                                         "",
822                                         "url(\\1#\\1)",
823                                         "url(\\1#\\1)",
824                                         "url(\\1#\\1)",
825                                         "url(\\1#\\1)",
826                                         "url(\\1#\\1)",
827                                         "\\1:url(\\2#\\3)"
828                                 )
829                         )
830                 )
831         );
832
833         if ($block_external_images){
834                 array_push($bad_attvals{'/.*/'}{'/^src|background/i'}[0],
835                                 '/^([\'\"])\s*https*:.*([\'\"])/si');
836                 array_push($bad_attvals{'/.*/'}{'/^src|background/i'}[1],
837                                 "\\1$trans_image_path\\1");
838                 array_push($bad_attvals{'/.*/'}{'/^style/i'}[0],
839                                 '/url\(([\'\"])\s*https*:.*([\'\"])\)/si');
840                 array_push($bad_attvals{'/.*/'}{'/^style/i'}[1],
841                                 "url(\\1$trans_image_path\\1)");
842         }
843
844         $add_attr_to_tag = Array(
845                 "/^a$/i" =>
846                         Array('target'=>'"_blank"')
847         );
848
849         $trusted = tln_sanitize($body, 
850                         $tag_list, 
851                         $rm_tags_with_content,
852                         $self_closing_tags,
853                         $force_tag_closing,
854                         $rm_attnames,
855                         $bad_attvals,
856                         $add_attr_to_tag
857                         );
858         return $trusted;
859 }
860
861 ?>