5 * Copyright (c) 2002-2011, Sebastian Bergmann <sebastian@phpunit.de>.
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
12 * * Redistributions of source code must retain the above copyright
13 * notice, this list of conditions and the following disclaimer.
15 * * Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in
17 * the documentation and/or other materials provided with the
20 * * Neither the name of Sebastian Bergmann nor the names of his
21 * contributors may be used to endorse or promote products derived
22 * from this software without specific prior written permission.
24 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
25 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
26 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
27 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
28 * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
29 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
30 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
31 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
32 * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
33 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
34 * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
35 * POSSIBILITY OF SUCH DAMAGE.
39 * @author Sebastian Bergmann <sebastian@phpunit.de>
40 * @copyright 2002-2011 Sebastian Bergmann <sebastian@phpunit.de>
41 * @license http://www.opensource.org/licenses/bsd-license.php BSD License
42 * @link http://www.phpunit.de/
43 * @since File available since Release 3.2.0
51 * @author Sebastian Bergmann <sebastian@phpunit.de>
52 * @copyright 2002-2011 Sebastian Bergmann <sebastian@phpunit.de>
53 * @license http://www.opensource.org/licenses/bsd-license.php BSD License
54 * @version Release: 3.5.13
55 * @link http://www.phpunit.de/
56 * @since Class available since Release 3.2.0
58 class PHPUnit_Util_XML
61 * @param string $string
63 * @author Kore Nordmann <mail@kore-nordmann.de>
64 * @since Method available since Release 3.4.6
66 public static function prepareString($string)
69 '([\\x00-\\x04\\x0b\\x0c\\x0e-\\x1f\\x7f])e',
70 'sprintf( "&#x%02x;", ord( "\\1" ) )',
72 self::convertToUtf8($string), ENT_COMPAT, 'UTF-8'
78 * Converts a string to UTF-8 encoding.
80 * @param string $string
82 * @since Method available since Release 3.2.19
84 protected static function convertToUtf8($string)
86 if (!self::isUtf8($string)) {
87 if (function_exists('mb_convert_encoding')) {
88 $string = mb_convert_encoding($string, 'UTF-8');
90 $string = utf8_encode($string);
98 * Checks a string for UTF-8 encoding.
100 * @param string $string
102 * @since Method available since Release 3.3.0
104 protected static function isUtf8($string)
106 $length = strlen($string);
108 for ($i = 0; $i < $length; $i++) {
109 if (ord($string[$i]) < 0x80) {
113 else if ((ord($string[$i]) & 0xE0) == 0xC0) {
117 else if ((ord($string[$i]) & 0xF0) == 0xE0) {
121 else if ((ord($string[$i]) & 0xF0) == 0xF0) {
129 for ($j = 0; $j < $n; $j++) {
130 if ((++$i == $length) || ((ord($string[$i]) & 0xC0) != 0x80)) {
140 * Loads an XML (or HTML) file into a DOMDocument object.
142 * @param string $filename
143 * @param boolean $isHtml
144 * @return DOMDocument
145 * @since Method available since Release 3.3.0
147 public static function loadFile($filename, $isHtml = FALSE)
149 $reporting = error_reporting(0);
150 $contents = file_get_contents($filename);
151 error_reporting($reporting);
153 if ($contents === FALSE) {
154 throw new PHPUnit_Framework_Exception(
156 'Could not read "%s".',
162 return self::load($contents, $isHtml, $filename);
166 * Load an $actual document into a DOMDocument. This is called
167 * from the selector assertions.
169 * If $actual is already a DOMDocument, it is returned with
170 * no changes. Otherwise, $actual is loaded into a new DOMDocument
171 * as either HTML or XML, depending on the value of $isHtml.
173 * Note: prior to PHPUnit 3.3.0, this method loaded a file and
174 * not a string as it currently does. To load a file into a
175 * DOMDocument, use loadFile() instead.
177 * @param string|DOMDocument $actual
178 * @param boolean $isHtml
179 * @param string $filename
180 * @return DOMDocument
181 * @since Method available since Release 3.3.0
182 * @author Mike Naberezny <mike@maintainable.com>
183 * @author Derek DeVries <derek@maintainable.com>
185 public static function load($actual, $isHtml = FALSE, $filename = '')
187 if ($actual instanceof DOMDocument) {
191 $document = new DOMDocument;
192 $internal = libxml_use_internal_errors(TRUE);
194 $reporting = error_reporting(0);
197 $loaded = $document->loadHTML($actual);
199 $loaded = $document->loadXML($actual);
202 foreach (libxml_get_errors() as $error) {
203 $message .= $error->message;
206 libxml_use_internal_errors($internal);
207 error_reporting($reporting);
209 if ($loaded === FALSE) {
210 if ($filename != '') {
211 throw new PHPUnit_Framework_Exception(
213 'Could not load "%s".%s',
216 $message != '' ? "\n" . $message : ''
220 throw new PHPUnit_Framework_Exception($message);
230 * @param DOMNode $node
232 * @since Method available since Release 3.4.0
234 public static function nodeToText(DOMNode $node)
236 if ($node->childNodes->length == 1) {
237 return $node->nodeValue;
242 foreach ($node->childNodes as $childNode) {
243 $result .= $node->ownerDocument->saveXML($childNode);
252 * @param DOMNode $node
253 * @since Method available since Release 3.3.0
254 * @author Mattis Stordalen Flister <mattis@xait.no>
256 public static function removeCharacterDataNodes(DOMNode $node)
258 if ($node->hasChildNodes()) {
259 for ($i = $node->childNodes->length - 1; $i >= 0; $i--) {
260 if (($child = $node->childNodes->item($i)) instanceof DOMCharacterData) {
261 $node->removeChild($child);
268 * "Convert" a DOMElement object into a PHP variable.
270 * @param DOMElement $element
272 * @since Method available since Release 3.4.0
274 public static function xmlToVariable(DOMElement $element)
278 switch ($element->tagName) {
282 foreach ($element->getElementsByTagName('element') as $element) {
283 $value = self::xmlToVariable($element->childNodes->item(1));
285 if ($element->hasAttribute('key')) {
286 $variable[(string)$element->getAttribute('key')] = $value;
288 $variable[] = $value;
295 $className = $element->getAttribute('class');
297 if ($element->hasChildNodes()) {
298 $arguments = $element->childNodes->item(1)->childNodes;
299 $constructorArgs = array();
301 foreach ($arguments as $argument) {
302 if ($argument instanceof DOMElement) {
303 $constructorArgs[] = self::xmlToVariable($argument);
307 $class = new ReflectionClass($className);
308 $variable = $class->newInstanceArgs($constructorArgs);
310 $variable = new $className;
316 $variable = $element->nodeValue == 'true' ? TRUE : FALSE;
323 $variable = $element->nodeValue;
325 settype($variable, $element->tagName);
334 * Validate list of keys in the associative array.
337 * @param array $validKeys
339 * @throws InvalidArgumentException
340 * @since Method available since Release 3.3.0
341 * @author Mike Naberezny <mike@maintainable.com>
342 * @author Derek DeVries <derek@maintainable.com>
344 public static function assertValidKeys(array $hash, array $validKeys)
348 // Normalize validation keys so that we can use both indexed and
349 // associative arrays.
350 foreach ($validKeys as $key => $val) {
351 is_int($key) ? $valids[$val] = NULL : $valids[$key] = $val;
354 $validKeys = array_keys($valids);
356 // Check for invalid keys.
357 foreach ($hash as $key => $value) {
358 if (!in_array($key, $validKeys)) {
363 if (!empty($unknown)) {
364 throw new InvalidArgumentException(
365 'Unknown key(s): ' . implode(', ', $unknown)
369 // Add default values for any valid keys that are empty.
370 foreach ($valids as $key => $value) {
371 if (!isset($hash[$key])) {
372 $hash[$key] = $value;
380 * Parse a CSS selector into an associative array suitable for
381 * use with findNodes().
383 * @param string $selector
384 * @param mixed $content
386 * @since Method available since Release 3.3.0
387 * @author Mike Naberezny <mike@maintainable.com>
388 * @author Derek DeVries <derek@maintainable.com>
390 public static function convertSelectToTag($selector, $content = TRUE)
392 $selector = trim(preg_replace("/\s+/", " ", $selector));
394 // substitute spaces within attribute value
395 while (preg_match('/\[[^\]]+"[^"]+\s[^"]+"\]/', $selector)) {
396 $selector = preg_replace(
397 '/(\[[^\]]+"[^"]+)\s([^"]+"\])/', "$1__SPACE__$2", $selector
401 if (strstr($selector, ' ')) {
402 $elements = explode(' ', $selector);
404 $elements = array($selector);
407 $previousTag = array();
409 foreach (array_reverse($elements) as $element) {
410 $element = str_replace('__SPACE__', ' ', $element);
413 if ($element == '>') {
414 $previousTag = array('child' => $previousTag['descendant']);
421 preg_match("/^([^\.#\[]*)/", $element, $eltMatches);
423 if (!empty($eltMatches[1])) {
424 $tag['tag'] = $eltMatches[1];
427 // match attributes (\[[^\]]*\]*), ids (#[^\.#\[]*),
428 // and classes (\.[^\.#\[]*))
430 "/(\[[^\]]*\]*|#[^\.#\[]*|\.[^\.#\[]*)/", $element, $matches
433 if (!empty($matches[1])) {
437 foreach ($matches[1] as $match) {
439 if (substr($match, 0, 1) == '#') {
440 $tag['id'] = substr($match, 1);
444 else if (substr($match, 0, 1) == '.') {
445 $classes[] = substr($match, 1);
449 else if (substr($match, 0, 1) == '[' &&
450 substr($match, -1, 1) == ']') {
451 $attribute = substr($match, 1, strlen($match) - 2);
452 $attribute = str_replace('"', '', $attribute);
455 if (strstr($attribute, '~=')) {
456 list($key, $value) = explode('~=', $attribute);
457 $value = "regexp:/.*\b$value\b.*/";
461 else if (strstr($attribute, '*=')) {
462 list($key, $value) = explode('*=', $attribute);
463 $value = "regexp:/.*$value.*/";
468 list($key, $value) = explode('=', $attribute);
471 $attrs[$key] = $value;
476 $tag['class'] = join(' ', $classes);
480 $tag['attributes'] = $attrs;
485 if (is_string($content)) {
486 $tag['content'] = $content;
489 // determine previous child/descendants
490 if (!empty($previousTag['descendant'])) {
491 $tag['descendant'] = $previousTag['descendant'];
494 else if (!empty($previousTag['child'])) {
495 $tag['child'] = $previousTag['child'];
498 $previousTag = array('descendant' => $tag);
505 * Parse an $actual document and return an array of DOMNodes
506 * matching the CSS $selector. If an error occurs, it will
509 * To only return nodes containing a certain content, give
510 * the $content to match as a string. Otherwise, setting
511 * $content to TRUE will return all nodes matching $selector.
513 * The $actual document may be a DOMDocument or a string
514 * containing XML or HTML, identified by $isHtml.
516 * @param array $selector
517 * @param string $content
518 * @param mixed $actual
519 * @param boolean $isHtml
520 * @return false|array
521 * @since Method available since Release 3.3.0
522 * @author Mike Naberezny <mike@maintainable.com>
523 * @author Derek DeVries <derek@maintainable.com>
524 * @author Tobias Schlitt <toby@php.net>
526 public static function cssSelect($selector, $content, $actual, $isHtml = TRUE)
528 $matcher = self::convertSelectToTag($selector, $content);
529 $dom = self::load($actual, $isHtml);
530 $tags = self::findNodes($dom, $matcher, $isHtml);
536 * Parse out the options from the tag using DOM object tree.
538 * @param DOMDocument $dom
539 * @param array $options
540 * @param boolean $isHtml
542 * @since Method available since Release 3.3.0
543 * @author Mike Naberezny <mike@maintainable.com>
544 * @author Derek DeVries <derek@maintainable.com>
545 * @author Tobias Schlitt <toby@php.net>
547 public static function findNodes(DOMDocument $dom, array $options, $isHtml = TRUE)
550 'id', 'class', 'tag', 'content', 'attributes', 'parent',
551 'child', 'ancestor', 'descendant', 'children'
555 $options = self::assertValidKeys($options, $valid);
557 // find the element by id
558 if ($options['id']) {
559 $options['attributes']['id'] = $options['id'];
562 if ($options['class']) {
563 $options['attributes']['class'] = $options['class'];
566 // find the element by a tag type
567 if ($options['tag']) {
569 $elements = self::getElementsByCaseInsensitiveTagName(
570 $dom, $options['tag']
573 $elements = $dom->getElementsByTagName($options['tag']);
576 foreach ($elements as $element) {
585 // no tag selected, get them all
588 'a', 'abbr', 'acronym', 'address', 'area', 'b', 'base', 'bdo',
589 'big', 'blockquote', 'body', 'br', 'button', 'caption', 'cite',
590 'code', 'col', 'colgroup', 'dd', 'del', 'div', 'dfn', 'dl',
591 'dt', 'em', 'fieldset', 'form', 'frame', 'frameset', 'h1', 'h2',
592 'h3', 'h4', 'h5', 'h6', 'head', 'hr', 'html', 'i', 'iframe',
593 'img', 'input', 'ins', 'kbd', 'label', 'legend', 'li', 'link',
594 'map', 'meta', 'noframes', 'noscript', 'object', 'ol', 'optgroup',
595 'option', 'p', 'param', 'pre', 'q', 'samp', 'script', 'select',
596 'small', 'span', 'strong', 'style', 'sub', 'sup', 'table',
597 'tbody', 'td', 'textarea', 'tfoot', 'th', 'thead', 'title',
598 'tr', 'tt', 'ul', 'var'
601 foreach ($tags as $tag) {
603 $elements = self::getElementsByCaseInsensitiveTagName(
607 $elements = $dom->getElementsByTagName($tag);
610 foreach ($elements as $element) {
620 // filter by attributes
621 if ($options['attributes']) {
622 foreach ($nodes as $node) {
625 foreach ($options['attributes'] as $name => $value) {
626 // match by regexp if like "regexp:/foo/i"
627 if (preg_match('/^regexp\s*:\s*(.*)/i', $value, $matches)) {
628 if (!preg_match($matches[1], $node->getAttribute($name))) {
633 // class can match only a part
634 else if ($name == 'class') {
635 // split to individual classes
636 $findClasses = explode(
637 ' ', preg_replace("/\s+/", " ", $value)
640 $allClasses = explode(
642 preg_replace("/\s+/", " ", $node->getAttribute($name))
645 // make sure each class given is in the actual node
646 foreach ($findClasses as $findClass) {
647 if (!in_array($findClass, $allClasses)) {
653 // match by exact string
655 if ($node->getAttribute($name) != $value) {
661 // if every attribute given matched
676 if ($options['content'] !== NULL) {
677 foreach ($nodes as $node) {
680 // match by regexp if like "regexp:/foo/i"
681 if (preg_match('/^regexp\s*:\s*(.*)/i', $options['content'], $matches)) {
682 if (!preg_match($matches[1], self::getNodeText($node))) {
687 // match by exact string
688 else if (strstr(self::getNodeText($node), $options['content']) === FALSE) {
705 // filter by parent node
706 if ($options['parent']) {
707 $parentNodes = self::findNodes($dom, $options['parent'], $isHtml);
708 $parentNode = isset($parentNodes[0]) ? $parentNodes[0] : NULL;
710 foreach ($nodes as $node) {
711 if ($parentNode !== $node->parentNode) {
726 // filter by child node
727 if ($options['child']) {
728 $childNodes = self::findNodes($dom, $options['child'], $isHtml);
729 $childNodes = !empty($childNodes) ? $childNodes : array();
731 foreach ($nodes as $node) {
732 foreach ($node->childNodes as $child) {
733 foreach ($childNodes as $childNode) {
734 if ($childNode === $child) {
749 // filter by ancestor
750 if ($options['ancestor']) {
751 $ancestorNodes = self::findNodes($dom, $options['ancestor'], $isHtml);
752 $ancestorNode = isset($ancestorNodes[0]) ? $ancestorNodes[0] : NULL;
754 foreach ($nodes as $node) {
755 $parent = $node->parentNode;
757 while ($parent->nodeType != XML_HTML_DOCUMENT_NODE) {
758 if ($parent === $ancestorNode) {
762 $parent = $parent->parentNode;
774 // filter by descendant
775 if ($options['descendant']) {
776 $descendantNodes = self::findNodes($dom, $options['descendant'], $isHtml);
777 $descendantNodes = !empty($descendantNodes) ? $descendantNodes : array();
779 foreach ($nodes as $node) {
780 foreach (self::getDescendants($node) as $descendant) {
781 foreach ($descendantNodes as $descendantNode) {
782 if ($descendantNode === $descendant) {
797 // filter by children
798 if ($options['children']) {
799 $validChild = array('count', 'greater_than', 'less_than', 'only');
800 $childOptions = self::assertValidKeys(
801 $options['children'], $validChild
804 foreach ($nodes as $node) {
805 $childNodes = $node->childNodes;
807 foreach ($childNodes as $childNode) {
808 if ($childNode->nodeType !== XML_CDATA_SECTION_NODE &&
809 $childNode->nodeType !== XML_TEXT_NODE) {
810 $children[] = $childNode;
814 // we must have children to pass this filter
815 if (!empty($children)) {
816 // exact count of children
817 if ($childOptions['count'] !== NULL) {
818 if (count($children) !== $childOptions['count']) {
823 // range count of children
824 else if ($childOptions['less_than'] !== NULL &&
825 $childOptions['greater_than'] !== NULL) {
826 if (count($children) >= $childOptions['less_than'] ||
827 count($children) <= $childOptions['greater_than']) {
832 // less than a given count
833 else if ($childOptions['less_than'] !== NULL) {
834 if (count($children) >= $childOptions['less_than']) {
839 // more than a given count
840 else if ($childOptions['greater_than'] !== NULL) {
841 if (count($children) <= $childOptions['greater_than']) {
846 // match each child against a specific tag
847 if ($childOptions['only']) {
848 $onlyNodes = self::findNodes(
849 $dom, $childOptions['only'], $isHtml
852 // try to match each child to one of the 'only' nodes
853 foreach ($children as $child) {
856 foreach ($onlyNodes as $onlyNode) {
857 if ($onlyNode === $child) {
880 // return the first node that matches all criteria
881 return !empty($nodes) ? $nodes : array();
885 * Recursively get flat array of all descendants of this node.
887 * @param DOMNode $node
889 * @since Method available since Release 3.3.0
890 * @author Mike Naberezny <mike@maintainable.com>
891 * @author Derek DeVries <derek@maintainable.com>
893 protected static function getDescendants(DOMNode $node)
895 $allChildren = array();
896 $childNodes = $node->childNodes ? $node->childNodes : array();
898 foreach ($childNodes as $child) {
899 if ($child->nodeType === XML_CDATA_SECTION_NODE ||
900 $child->nodeType === XML_TEXT_NODE) {
904 $children = self::getDescendants($child);
905 $allChildren = array_merge($allChildren, $children, array($child));
908 return isset($allChildren) ? $allChildren : array();
912 * Gets elements by case insensitive tagname.
914 * @param DOMDocument $dom
916 * @return DOMNodeList
917 * @since Method available since Release 3.4.0
919 protected static function getElementsByCaseInsensitiveTagName(DOMDocument $dom, $tag)
921 $elements = $dom->getElementsByTagName(strtolower($tag));
923 if ($elements->length == 0) {
924 $elements = $dom->getElementsByTagName(strtoupper($tag));
931 * Get the text value of this node's child text node.
933 * @param DOMNode $node
935 * @since Method available since Release 3.3.0
936 * @author Mike Naberezny <mike@maintainable.com>
937 * @author Derek DeVries <derek@maintainable.com>
939 protected static function getNodeText(DOMNode $node)
941 if (!$node->childNodes instanceof DOMNodeList) {
947 foreach ($node->childNodes as $childNode) {
948 if ($childNode->nodeType === XML_TEXT_NODE) {
949 $result .= trim($childNode->data) . ' ';
951 $result .= self::getNodeText($childNode);
955 return str_replace(' ', ' ', $result);