wikimarkup
* Requires XmlParser, XmlElement and the expat (or now the libxml) library. This is all in core.
*/
/*
* Copyright (C) 2004 Reini Urban
*
* This file is part of PhpWiki.
*
* PhpWiki is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* PhpWiki is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License along
* with PhpWiki; if not, write to the Free Software Foundation, Inc.,
* 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
*/
/**
* Base class to implement html => wikitext converters,
* extendable for various wiki syntax versions.
* This is needed to be able to use htmlarea-alike editors,
* and to import XML or HTML documents.
*
* See also php-html.sf.net for a php-only version, if
* you don't have the expat/libxml extension included.
* See also http://search.cpan.org/~diberri/HTML-WikiConverter/
*
*/
// RssParser contains the XML (expat) and url-grabber methods
require_once 'lib/XmlParser.php';
class HtmlParser
extends XmlParser
{
var $dialect, $_handlers, $root;
/**
* dialect: "PhpWiki2", "PhpWiki"
* possible more dialects: MediaWiki, kwiki, c2
*/
function HtmlParser($dialect = "PhpWiki2", $encoding = '')
{
$classname = "HtmlParser_" . $dialect;
if (class_exists($classname))
$this->dialect = new $classname;
else {
trigger_error(sprintf("unknown HtmlParser dialect %s", $dialect), E_USER_ERROR);
}
$this->_handlers =& $this->dialect->_handlers;
$this->XmlParser($encoding);
xml_parser_set_option($this->_parser, XML_OPTION_CASE_FOLDING, 0);
xml_parser_set_option($this->_parser, XML_OPTION_SKIP_WHITE, 1);
}
// The three callbacks, called on walking through the HTML tree.
// No extensions needed from XmlParser.
/*
function tag_open($parser, $name, $attrs='') {
}
function tag_close($parser, $name, $attrs='') {
}
function cdata($parser, $data) {
}
function parse_url($file, $debug=false)
*/
function output()
{
if (is_null($this->root))
$this->root = $GLOBALS['xml_parser_root'];
$output = $this->wikify($this->root);
return $output;
}
function wikify($node, $parent = null)
{
$output = '';
if (isa($node, 'XmlElement')) {
$dialect =& $this->dialect;
$conv = $dialect->_handlers[$node->_tag];
if (is_string($conv) and method_exists($dialect, $conv)) {
$output = $dialect->$conv($node);
} elseif (is_array($conv)) {
foreach ($node->getContent() as $n) {
$output .= $this->wikify($n, $node);
}
$output = $conv[0] . $output . $conv[count($conv) - 1];
} elseif (!empty($conv)) {
$output = $conv;
foreach ($node->getContent() as $n) {
$output .= $this->wikify($n, $node);
}
} else {
foreach ($node->getContent() as $n) {
$output .= $this->wikify($n, $node);
}
}
} else {
$output = $node;
if ($parent and $parent->_tag != 'pre')
preg_replace("/ {2,}/", " ", $output);
if (trim($output) == '')
$output = '';
}
return $output;
}
/** elem_contents()
* $output = $parser->elem_contents( $elem );
* Returns a wikified version of the contents of the specified
* HTML element. This is done by passing each element of this
* element's content list through the C method, and
* returning the concatenated result.
*/
function elem_contents($node)
{
$output = '';
if (isa($node, 'XmlElement')) {
foreach ($node->getContent() as $child) {
$output .= $this->wikify($child, isset($node->parent) ? $node->parent : null);
}
} else {
$output = $this->wikify($content);
}
return $output;
}
//
// Private function: _elem_attr_str( $elem, @attrs )
//
// Returns a string containing a list of attribute names and
// values associated with the specified HTML element. Only
// attribute names included in @attrs will be added to the
// string of attributes that is returned. The return value
// is suitable for inserting into an HTML document, as
// attribute name/value pairs are specified in attr="value"
// format.
//
function _elem_attr_str($node, $attrs)
{
$s = '';
foreach ($node->_attr as $attr => $val) {
$attr = strtolower($attr);
if (in_array($attr, $attrs))
$s .= " $attr=\"$val\"";
}
return $s;
}
//
// Private function: _elem_has_ancestor( $elem, $tagname )
//
// Returns true if the specified HtmlElement has an ancestor element
// whose element tag equals $tag. This is useful for determining if
// an element belongs to the specified tag.
//
function _elem_has_ancestor($node, $tag)
{
if (isset($node->parent)) {
if ($node->parent->_tag == $tag) return true;
return $this->_elem_has_ancestor($node->parent, $tag);
}
return false;
}
//
// Private function: _elem_is_image_div( $elem )
//
// Returns true $elem is a container element (P or DIV) meant only to
// lay out an IMG.
//
// More specifically, returns true if the given element is a DIV or P
// element and the only child it contains is an IMG tag or an IMG tag
// contained within a sole A tag (not counting child elements with
// whitespace text only).
//
function _elem_is_image_div($node)
{
// Return false if node is undefined or isn't a DIV at all
if (!$node or !in_array($node->_tag, array("div", "p")))
return false;
$contents = $node->getContent();
// Returns true if sole child is an IMG tag
if (count($contents) == 1 and isset($contents[0]) and $contents[0]->_tag == 'img')
return true;
// Check if child is a sole A tag that contains an IMG tag
if (count($contents) == 1 and isset($contents[0]) and $contents[0]->_tag == 'a') {
$children = $contents[0]->getContent();
if (count($children) == 1 and isset($children[0]) and $children[0]->_tag == 'img')
return true;
}
return false;
}
/** preserves tags and content
*/
function wikify_default($node)
{
return $this->wikify_preserve($node);
}
/** preserves tags and content
*/
function wikify_preserve($node)
{
return $node->asXML();
}
function log($dummy)
{
}
}
class HtmlParser_PhpWiki2
extends HtmlParser
{
function HtmlParser_PhpWiki2()
{
$this->_handlers =
array('html' => '',
'head' => '',
'title' => '',
'meta' => '',
'link' => '',
'script' => '',
'body' => '',
'br' => "
",
'b' => array("*"),
'strong' => array("*"),
'i' => array("_"),
'em' => array("_"),
'hr' => "----\n\n",
// PRE blocks are handled specially (see tidy_whitespace and
// wikify methods)
'pre' => array("", "
"),
'dl' => array('', "\n\n"),
'dt' => array(';', ''),
'dd' => array(':', ''),
'p' => array("\n\n", "\n\n"),
'ul' => array('', "\n"),
'ol' => array('', "\n"),
'li' => "wikify_list_item",
'table' => "wikify_table",
'tr' => "wikify_tr",
'td' => "wikify_td",
'th' => "wikify_td",
'div' => array('', "\n\n"),
'img' => "wikify_img",
'a' => "wikify_link",
'span' => array('', ''),
'h1' => "wikify_h",
'h2' => "wikify_h",
'h3' => "wikify_h",
'h4' => "wikify_h",
'h5' => "wikify_h",
'h6' => "wikify_h",
'font' => array('', ''),
'sup' => "wikify_default",
'sub' => "wikify_default",
'nowiki' => "wikify_verbatim",
'verbatim' => "wikify_default",
'noinclude' => "wikify_noinclude",
);
}
function wikify_table($node)
{
$this->ident = '';
return "| \n" . $this->elem_contents($node) . "|\n\n";
}
function wikify_tr($node)
{
return "\n| " . $this->elem_contents($node);
}
function wikify_th($node)
{
$ident = empty($this->ident) ? '' : $this->ident;
$output = "$ident| ";
$content = $this->elem_contents($node);
preg_replace("s/^\s+/", "", $content);
$output .= $content;
$this->ident .= ' ';
return "$output |\n";
}
function wikify_list_item($node)
{
return ($this->_elem_has_ancestor($node, 'ol') ? '*' : '#') . " " . trim($this->elem_contents($node)) . "\n";
}
function wikify_link($node)
{
$url = $this->absolute_url($node->getAttr('href'));
$title = $this->elem_contents($node);
if (empty($url))
$title = trim($title);
// Just return the link title if this tag is contained
// within an header tag
if (isset($node->parent) and preg_match('/^h\d$/', $node->parent->_tag))
return $title;
// Return if this is a link to an image contained within
if (isset($node->parent) and $this->_elem_is_image_div($node->parent))
return $title;
// If HREF is the same as the link title, then
// just return the URL (it'll be converted into
// a clickable link by the wiki engine)
if ($url == $title) return $url;
return "[ $url | $title ]";
}
function wikify_h($node)
{
$level = substr($node->_tag, 1);
if ($level < 4) {
$markup = str_repeat('!', 4 - $level);
} else {
$markup = '!';
}
return $markup . ' ' . trim($this->elem_contents($node)) . "\n\n";
}
function wikify_verbatim($node)
{
$contents = $this->elem_contents($node);
return "\n\n$contents\n";
}
function wikify_noinclude($node)
{
return $this->elem_contents($node);
}
function wikify_img($node)
{
$image_url = $this->absolute_url($node->getAttr('src'));
$file = basename($image_url);
$alignment = $node->getAttr('align');
$this->log("Processing IMG tag for SRC: " . $image_url . "...");
//
// Grab attributes to be added to the [ Image ] markup (since 1.3.10)
//
if (!$alignment) {
if ($this->_elem_is_image_div($node->parent))
$image_div = $node->parent;
elseif (isset($node->parent) and $this->_elem_is_image_div($node->parent->parent))
$image_div = $node->parent->parent;
}
if (!$alignment and $image_div) {
$css_style = $image_div->getAttr('style');
$css_class = $image_div->getAttr('class');
// float => align: Check for float attribute; if it's there,
// then we'll add it to the [Image] syntax
if (!$alignment and preg_match("/float\:\s*(right|left)/i", $css_style, $m))
$alignment = $m[1];
if (!$alignment and preg_match("/float(right|left)/i", $css_class, $m)) ;
$alignment = $m[1];
if ($alignment) {
$attrs[] = "align=$alignment";
$this->log(" Image is contained within a DIV that specifies $alignment alignment");
$this->log(" Adding '$alignment' to [Image] markup attributes");
} else {
$this->log(" Image is not contained within a DIV for alignment");
}
} else {
$this->log(" Image is not contained within a DIV");
}
if ($alignment)
$attrs[] = "align=$alignment";
//
// Check if we need to request a thumbnail of this
// image; it's needed if the specified width attribute
// differs from the default size of the image
//
if ($width = $node->getAttr('width')) {
$this->log(" Image has WIDTH attribute of $width");
$this->log(" Checking whether resulting [Image] markup should specify a thumbnail...");
// Download the image from the network and store
$abs_url = $this->absolute_url($node->getAttr('src'));
$this->log(" Fetching image '$abs_url' from the network");
list($actual_w, $actual_h, $flag, $attr_str) = getimagesize($abs_url);
// If the WIDTH attribute of the IMG tag is not equal
// to the actual width of the image, then we need to
// create a thumbnail
if (preg_match("/^\d+$/", $width) and $width != $actual_w) {
$this->log(" IMG tag's WIDTH attribute ($width) differs from actual width of image ($actual_w)");
$this->log(" -- that means we're going to need a thumbnail");
$this->log(" Adding 'width' to list of attributes for [Image] markup");
$attrs[] = "width=$width";
$width_added = true;
}
$height = $node->getAttr('height');
if (preg_match("/^\d+$/", $height) and $height != $height_h) {
$this->log(" IMG tag's HEIGHT attribute ($height) differs from actual height of image ($actual_h)");
$this->log(" -- that means we're going to need a thumbnail");
$this->log(" Adding 'height' to list of attributes for [Image] markup");
if (isset($width_added))
$attrs[count($attr) - 1] = "size=" . $width . "x" . $height;
else
$attrs[] = "height=$height";
}
}
if ($alt = $node->getAttr('alt')) {
$this->log(" Adding alternate text '$alt' to [Image] markup");
$attrs[] = "alt=$alt";
}
$attr_str = join(' ', $attrs);
$this->log("...done processing IMG tag\n");
return "[ $file $attr_str ]";
}
}
// Local Variables:
// mode: php
// tab-width: 8
// c-basic-offset: 4
// c-hanging-comment-ender-p: nil
// indent-tabs-mode: nil
// End: