From 08e67ac7a3d85c5def6258316aec212b1be7911b Mon Sep 17 00:00:00 2001 From: rurban Date: Mon, 5 Jul 2004 13:04:47 +0000 Subject: [PATCH] new RawHtml policies: ENABLE_RAW_HTML_LOCKEDONLY, ENABLE_RAW_HTML_SAFE git-svn-id: svn://svn.code.sf.net/p/phpwiki/code/trunk@3860 96ab9672-09ca-45d6-a79d-3d69d39ca109 --- config/config-default.ini | 6 +- config/config-dist.ini | 8 +- lib/plugin/RawHtml.php | 171 ++++++++++++++++++++++++++++++++++++-- 3 files changed, 174 insertions(+), 11 deletions(-) diff --git a/config/config-default.ini b/config/config-default.ini index 47f13e6a5..9666acd80 100644 --- a/config/config-default.ini +++ b/config/config-default.ini @@ -1,4 +1,4 @@ -; $Id: config-default.ini,v 1.15 2004-06-29 12:36:34 rurban Exp $ +; $Id: config-default.ini,v 1.16 2004-07-05 13:04:46 rurban Exp $ ; This is the default PhpWiki configuration for undefined config.ini entries. ; recent development features: @@ -12,7 +12,9 @@ WIKI_NAME = PhpWiki ENABLE_REVERSE_DNS = true ENCRYPTED_PASSWD = true ZIPDUMP_AUTH = false -ENABLE_RAW_HTML = false; +ENABLE_RAW_HTML = true +ENABLE_RAW_HTML_LOCKEDONLY = true +ENABLE_RAW_HTML_SAFE = true STRICT_MAILABLE_PAGEDUMPS = false DEFAULT_DUMP_DIR = /tmp/wikidump HTML_DUMP_DIR = /tmp/wikidumphtml diff --git a/config/config-dist.ini b/config/config-dist.ini index 6bcc3fcd9..3d25a84b2 100644 --- a/config/config-dist.ini +++ b/config/config-dist.ini @@ -99,7 +99,13 @@ ZIPDUMP_AUTH = false ; pages. This is a possible security threat, as much HTML (or, rather, ; JavaScript) can be very risky. If you are in a controlled environment, ; however, it could be of use. -ENABLE_RAW_HTML = false; +ENABLE_RAW_HTML = true +; If this is set, only pages locked by the Administrator may contain the RawHtml plugin +ENABLE_RAW_HTML_LOCKEDONLY = true +; If this is set, all unsafe html code is stripped automatically (experimental!) +; See http://chxo.com/scripts/safe_html-test.php +ENABLE_RAW_HTML_SAFE = true + ; If you define this to true, (MIME-type) page-dumps (either zip dumps, ; or "dumps to directory" will be encoded using the quoted-printable diff --git a/lib/plugin/RawHtml.php b/lib/plugin/RawHtml.php index 64ae0cc0e..782ba0975 100644 --- a/lib/plugin/RawHtml.php +++ b/lib/plugin/RawHtml.php @@ -1,7 +1,7 @@ getPage($basepage); + if (ENABLE_RAW_HTML_LOCKEDONLY) { + if (! $page->get('locked')) { + return $this->disabled(fmt("%s is only allowed in locked pages.", + _("Raw HTML"))); + } + } + if (ENABLE_RAW_HTML_SAFE) { + // check for javascript handlers (on*) and style tags with external urls. no javascript urls. + // See also http://simon.incutio.com/archive/2003/02/23/safeHtmlChecker + // But we should allow not only code semantic meaning, presentational markup also. - if (! $page->get('locked')) { - return $this->disabled(fmt("%s is only allowed in locked pages.", - _("Raw HTML"))); + // http://chxo.com/scripts/safe_html-test.php looks better + $argstr = $this->safe_html($argstr); + /*return $this->disabled(HTML(fmt("This %s plugin on %s is disabled because of unsafe HTML code. ",$this->getName(), $basepage), + fmt("See PhpWiki:allowing%20safe%20HTML") + )); + */ } return HTML::raw($argstr); } + + + // From http://chxo.com/scripts/safe_html-test.php + // safe_html by Chris Snyder (csnyder@chxo.com) for http://pcoms.net + // - Huge thanks to James Wetterau for testing and feedback! + +/* +Copyright 2003 Chris Snyder. All rights reserved. + +Redistribution and use in source and binary forms, with or without modification, +are permitted provided that the following conditions are met: + + 1. Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + + 2. Redistributions in binary form must reproduce the above + copyright notice, this list of conditions and the following + disclaimer in the documentation and/or other materials provided + with the distribution. + +THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, +INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND +FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE +AUTHOR OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; +OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, +WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR +OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF +ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + +/* + set of functions for sanitizing user input: + keeps "friendly" tags but strips javascript events and style attributes + closes any open comment tags + closes any open HTML tags - results may not be valid HTML, but + at least they will keep the rest of the page from breaking + + treats the following as malicious conditions and returns text stripped + of all html tags: + any instances of ='javascript: + event or style attributes remaining after initial replacement +*/ + + function strip_attributes ($html, $attrs) { + if (!is_array($attrs)) { + $array= array( "$attrs" ); + unset($attrs); + $attrs= $array; + } + + foreach ($attrs AS $attribute) { + // once for ", once for ', s makes the dot match linebreaks, too. + $search[]= "/".$attribute.'\s*=\s*".+"/Uis'; + $search[]= "/".$attribute."\s*=\s*'.+'/Uis"; + // and once more for unquoted attributes + $search[]= "/".$attribute."\s*=\s*\S+/i"; + } + $html= preg_replace($search, "", $html); + + // check for additional matches and strip all tags if found + foreach ($search AS $pattern) { + if (preg_match($pattern, $html)) { + $html= strip_tags($html); + break; + } + } + + return $html; + } + + function safe_html ($html, $allowedtags="") { + $version= "safe_html.php/0.4"; + + // anything with ="javascript: is right out -- strip all tags and return if found + $pattern= "/=\s*\S+script:\S+/Ui"; + if (preg_match($pattern, $html)) { + $html= strip_tags($html); + return $html; + } + + // setup -- $allowedtags is an array of $tag=>$closeit pairs, where $tag is an HTML tag to allow and $closeit is 1 if the tag requires a matching, closing tag + if ($allowedtags=="") { + $allowedtags= array ( "p"=>1, "br"=>0, "a"=>1, "img"=>0, "li"=>1, "ol"=>1, "ul"=>1, "b"=>1, "i"=>1, "em"=>1, "strong"=>1, "del"=>1, "ins"=>1, "u"=>1, "blockquote"=>1, "pre"=>1, "hr"=>0); + } + elseif (!is_array($allowedtags)) { + $array= array( "$allowedtags" ); + unset($allowedtags); + $allowedtags= $array; + } + + // there's some debate about this.. is strip_tags() better than rolling your own regex? + // note: a bug in PHP 4.3.1 caused improper handling of ! in tag attributes when using strip_tags() + $stripallowed= ""; + foreach ($allowedtags AS $tag=>$closeit) { + $stripallowed.= "<$tag>"; + } + + //print "Stripallowed: $stripallowed -- ".print_r($allowedtags,1); + $html= strip_tags($html, $stripallowed); + + // also, lets get rid of some pesky attributes that may be set on the remaining tags... + $badattrs= array("on\w+", "style"); + $html= $this->strip_attributes($html, $badattrs); + + // close html tags if necessary -- note that this WON'T be graceful formatting-wise, it just has to fix any maliciousness + foreach ($allowedtags AS $tag=>$closeit) { + if (!$closeit) continue; + $patternopen= "/<$tag\b[^>]*>/Ui"; + $patternclose= "/<\/$tag\b[^>]*>/Ui"; + $totalopen= preg_match_all ( $patternopen, $html, $matches ); + $totalclose= preg_match_all ( $patternclose, $html, $matches2 ); + if ($totalopen>$totalclose) { + $html.= str_repeat("", ($totalopen - $totalclose)); + } + } + + // close any open "; + return $html; + } } // $Log: not supported by cvs2svn $ +// Revision 1.8 2003/11/22 17:50:32 carstenklapp +// Minor internal change: Removed redundant call to gettext within +// fmt(). (locale make: RawHtml.php:65: warning: keyword nested in +// keyword arg) +// // Revision 1.7 2003/03/17 22:32:26 dairiki // Minor HTTP caching fix. // -- 2.45.2