5 * Copyright (c) 2002-2009, Sebastian Bergmann <sb@sebastian-bergmann.de>.
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
12 * * Redistributions of source code must retain the above copyright
13 * notice, this list of conditions and the following disclaimer.
15 * * Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in
17 * the documentation and/or other materials provided with the
20 * * Neither the name of Sebastian Bergmann nor the names of his
21 * contributors may be used to endorse or promote products derived
22 * from this software without specific prior written permission.
24 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
25 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
26 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
27 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
28 * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
29 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
30 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
31 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
32 * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
33 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
34 * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
35 * POSSIBILITY OF SUCH DAMAGE.
39 * @author Sebastian Bergmann <sb@sebastian-bergmann.de>
40 * @copyright 2002-2009 Sebastian Bergmann <sb@sebastian-bergmann.de>
41 * @license http://www.opensource.org/licenses/bsd-license.php BSD License
43 * @link http://www.phpunit.de/
44 * @since File available since Release 3.2.0
47 require_once 'PHPUnit/Util/Filter.php';
49 PHPUnit_Util_Filter::addFileToFilter(__FILE__, 'PHPUNIT');
56 * @author Sebastian Bergmann <sb@sebastian-bergmann.de>
57 * @copyright 2002-2009 Sebastian Bergmann <sb@sebastian-bergmann.de>
58 * @license http://www.opensource.org/licenses/bsd-license.php BSD License
59 * @version Release: 3.3.17
60 * @link http://www.phpunit.de/
61 * @since Class available since Release 3.2.0
63 class PHPUnit_Util_XML
66 * Converts a string to UTF-8 encoding.
68 * @param string $string
70 * @since Method available since Release 3.2.19
72 public static function convertToUtf8($string)
74 if (!self::isUtf8($string)) {
75 if (function_exists('mb_convert_encoding')) {
76 $string = mb_convert_encoding($string, 'UTF-8');
78 $string = utf8_encode($string);
86 * Checks a string for UTF-8 encoding.
88 * @param string $string
90 * @since Method available since Release 3.3.0
92 public static function isUtf8($string)
94 $length = strlen($string);
96 for ($i = 0; $i < $length; $i++) {
97 if (ord($string[$i]) < 0x80) $n = 0;
98 elseif ((ord($string[$i]) & 0xE0) == 0xC0) $n = 1;
99 elseif ((ord($string[$i]) & 0xF0) == 0xE0) $n = 2;
100 elseif ((ord($string[$i]) & 0xF0) == 0xF0) $n = 3;
103 for ($j = 0; $j < $n; $j++) {
104 if ((++$i == $length) || ((ord($string[$i]) & 0xC0) != 0x80)) return FALSE;
112 * Loads an XML (or HTML) file into a DOMDocument object.
114 * @param string $filename
115 * @param boolean $isHtml
116 * @return DOMDocument
117 * @since Method available since Release 3.3.0
119 public static function loadFile($filename, $isHtml = FALSE)
121 $reporting = error_reporting(0);
122 $contents = file_get_contents($filename);
123 error_reporting($reporting);
125 if ($contents === FALSE) {
126 throw new RuntimeException(
128 'Could not read "%s".',
134 return self::load($contents, $isHtml, $filename);
138 * Load an $actual document into a DOMDocument. This is called
139 * from the selector assertions.
141 * If $actual is already a DOMDocument, it is returned with
142 * no changes. Otherwise, $actual is loaded into a new DOMDocument
143 * as either HTML or XML, depending on the value of $isHtml.
145 * Note: prior to PHPUnit 3.3.0, this method loaded a file and
146 * not a string as it currently does. To load a file into a
147 * DOMDocument, use loadFile() instead.
149 * @param string|DOMDocument $actual
150 * @param boolean $isHtml
151 * @param string $filename
152 * @return DOMDocument
153 * @since Method available since Release 3.3.0
154 * @author Mike Naberezny <mike@maintainable.com>
155 * @author Derek DeVries <derek@maintainable.com>
157 public static function load($actual, $isHtml = FALSE, $filename = '')
159 if ($actual instanceof DOMDocument) {
163 $internal = libxml_use_internal_errors(TRUE);
164 $reporting = error_reporting(0);
165 $dom = new DOMDocument;
168 $loaded = $dom->loadHTML($actual);
170 $loaded = $dom->loadXML($actual);
173 libxml_use_internal_errors($internal);
174 error_reporting($reporting);
176 if ($loaded === FALSE) {
179 foreach (libxml_get_errors() as $error) {
180 $message .= $error->message;
183 if ($filename != '') {
184 throw new RuntimeException(
186 'Could not load "%s".%s',
189 $message != '' ? "\n" . $message : ''
193 throw new RuntimeException($message);
203 * @param DOMNode $node
204 * @since Method available since Release 3.3.0
205 * @author Mattis Stordalen Flister <mattis@xait.no>
207 public static function removeCharacterDataNodes(DOMNode $node)
209 if ($node->hasChildNodes()) {
210 for ($i = $node->childNodes->length - 1; $i >= 0; $i--) {
211 if (($child = $node->childNodes->item($i)) instanceof DOMCharacterData) {
212 $node->removeChild($child);
219 * Validate list of keys in the associative array.
222 * @param array $validKeys
224 * @throws InvalidArgumentException
225 * @since Method available since Release 3.3.0
226 * @author Mike Naberezny <mike@maintainable.com>
227 * @author Derek DeVries <derek@maintainable.com>
229 public static function assertValidKeys(array $hash, array $validKeys)
233 // Normalize validation keys so that we can use both indexed and
234 // associative arrays.
235 foreach ($validKeys as $key => $val) {
236 is_int($key) ? $valids[$val] = NULL : $valids[$key] = $val;
239 $validKeys = array_keys($valids);
241 // Check for invalid keys.
242 foreach ($hash as $key => $value) {
243 if (!in_array($key, $validKeys)) {
248 if (!empty($unknown)) {
249 throw new InvalidArgumentException(
250 'Unknown key(s): ' . implode(', ', $unknown)
254 // Add default values for any valid keys that are empty.
255 foreach ($valids as $key => $value) {
256 if (!isset($hash[$key])) {
257 $hash[$key] = $value;
265 * Parse a CSS selector into an associative array suitable for
266 * use with findNodes().
268 * @param string $selector
269 * @param mixed $content
271 * @since Method available since Release 3.3.0
272 * @author Mike Naberezny <mike@maintainable.com>
273 * @author Derek DeVries <derek@maintainable.com>
275 public static function convertSelectToTag($selector, $content = TRUE)
277 $selector = trim(preg_replace("/\s+/", " ", $selector));
279 // substitute spaces within attribute value
280 while (preg_match('/\[[^\]]+"[^"]+\s[^"]+"\]/', $selector)) {
281 $selector = preg_replace('/(\[[^\]]+"[^"]+)\s([^"]+"\])/', "$1__SPACE__$2", $selector);
284 $elements = strstr($selector, ' ') ? explode(' ', $selector) : array($selector);
285 $previousTag = array();
287 foreach (array_reverse($elements) as $element) {
288 $element = str_replace('__SPACE__', ' ', $element);
291 if ($element == '>') {
292 $previousTag = array('child' => $previousTag['descendant']);
299 preg_match("/^([^\.#\[]*)/", $element, $eltMatches);
301 if (!empty($eltMatches[1])) {
302 $tag['tag'] = $eltMatches[1];
305 // match attributes (\[[^\]]*\]*), ids (#[^\.#\[]*), and classes (\.[^\.#\[]*))
306 preg_match_all("/(\[[^\]]*\]*|#[^\.#\[]*|\.[^\.#\[]*)/", $element, $matches);
308 if (!empty($matches[1])) {
312 foreach ($matches[1] as $match) {
314 if (substr($match, 0, 1) == '#') {
315 $tag['id'] = substr($match, 1);
319 else if (substr($match, 0, 1) == '.') {
320 $classes[] = substr($match, 1);
324 else if (substr($match, 0, 1) == '[' && substr($match, -1, 1) == ']') {
325 $attribute = substr($match, 1, strlen($match) - 2);
326 $attribute = str_replace('"', '', $attribute);
329 if (strstr($attribute, '~=')) {
330 list($key, $value) = explode('~=', $attribute);
331 $value = "regexp:/.*\b$value\b.*/";
335 else if (strstr($attribute, '*=')) {
336 list($key, $value) = explode('*=', $attribute);
337 $value = "regexp:/.*$value.*/";
342 list($key, $value) = explode('=', $attribute);
345 $attrs[$key] = $value;
350 $tag['class'] = join(' ', $classes);
354 $tag['attributes'] = $attrs;
359 if (is_string($content)) {
360 $tag['content'] = $content;
363 // determine previous child/descendants
364 if (!empty($previousTag['descendant'])) {
365 $tag['descendant'] = $previousTag['descendant'];
368 else if (!empty($previousTag['child'])) {
369 $tag['child'] = $previousTag['child'];
372 $previousTag = array('descendant' => $tag);
379 * Parse an $actual document and return an array of DOMNodes
380 * matching the CSS $selector. If an error occurs, it will
383 * To only return nodes containing a certain content, give
384 * the $content to match as a string. Otherwise, setting
385 * $content to TRUE will return all nodes matching $selector.
387 * The $actual document may be a DOMDocument or a string
388 * containing XML or HTML, identified by $isHtml.
390 * @param array $selector
391 * @param string $content
392 * @param mixed $actual
393 * @param boolean $isHtml
394 * @return false|array
395 * @since Method available since Release 3.3.0
396 * @author Mike Naberezny <mike@maintainable.com>
397 * @author Derek DeVries <derek@maintainable.com>
399 public static function cssSelect($selector, $content, $actual, $isHtml = TRUE)
401 $matcher = self::convertSelectToTag($selector, $content);
402 $dom = self::load($actual, $isHtml);
403 $tags = self::findNodes($dom, $matcher);
409 * Parse out the options from the tag using DOM object tree.
411 * @param DOMDocument $dom
412 * @param array $options
414 * @since Method available since Release 3.3.0
415 * @author Mike Naberezny <mike@maintainable.com>
416 * @author Derek DeVries <derek@maintainable.com>
418 public static function findNodes(DOMDocument $dom, array $options)
421 'id', 'class', 'tag', 'content', 'attributes', 'parent',
422 'child', 'ancestor', 'descendant', 'children'
426 $options = self::assertValidKeys($options, $valid);
428 // find the element by id
429 if ($options['id']) {
430 $options['attributes']['id'] = $options['id'];
433 if ($options['class']) {
434 $options['attributes']['class'] = $options['class'];
437 // find the element by a tag type
438 if ($options['tag']) {
439 $elements = $dom->getElementsByTagName($options['tag']);
441 foreach ($elements as $element) {
449 // no tag selected, get them all
452 'a', 'abbr', 'acronym', 'address', 'area', 'b', 'base', 'bdo',
453 'big', 'blockquote', 'body', 'br', 'button', 'caption', 'cite',
454 'code', 'col', 'colgroup', 'dd', 'del', 'div', 'dfn', 'dl',
455 'dt', 'em', 'fieldset', 'form', 'frame', 'frameset', 'h1', 'h2',
456 'h3', 'h4', 'h5', 'h6', 'head', 'hr', 'html', 'i', 'iframe',
457 'img', 'input', 'ins', 'kbd', 'label', 'legend', 'li', 'link',
458 'map', 'meta', 'noframes', 'noscript', 'object', 'ol', 'optgroup',
459 'option', 'p', 'param', 'pre', 'q', 'samp', 'script', 'select',
460 'small', 'span', 'strong', 'style', 'sub', 'sup', 'table',
461 'tbody', 'td', 'textarea', 'tfoot', 'th', 'thead', 'title',
462 'tr', 'tt', 'ul', 'var'
465 foreach ($tags as $tag) {
466 $elements = $dom->getElementsByTagName($tag);
468 foreach ($elements as $element) {
478 // filter by attributes
479 if ($options['attributes']) {
480 foreach ($nodes as $node) {
483 foreach ($options['attributes'] as $name => $value) {
484 // match by regexp if like "regexp:/foo/i"
485 if (preg_match('/^regexp\s*:\s*(.*)/i', $value, $matches)) {
486 if (!preg_match($matches[1], $node->getAttribute($name))) {
491 // class can match only a part
492 else if ($name == 'class') {
493 // split to individual classes
494 $findClasses = explode(' ', preg_replace("/\s+/", " ", $value));
495 $allClasses = explode(' ', preg_replace("/\s+/", " ", $node->getAttribute($name)));
497 // make sure each class given is in the actual node
498 foreach ($findClasses as $findClass) {
499 if (!in_array($findClass, $allClasses)) {
505 // match by exact string
507 if ($node->getAttribute($name) != $value) {
513 // if every attribute given matched
528 if ($options['content'] !== NULL) {
529 foreach ($nodes as $node) {
532 // match by regexp if like "regexp:/foo/i"
533 if (preg_match('/^regexp\s*:\s*(.*)/i', $options['content'], $matches)) {
534 if (!preg_match($matches[1], self::getNodeText($node))) {
539 // match by exact string
540 else if (strstr(self::getNodeText($node), $options['content']) === FALSE) {
557 // filter by parent node
558 if ($options['parent']) {
559 $parentNodes = self::findNodes($dom, $options['parent']);
560 $parentNode = isset($parentNodes[0]) ? $parentNodes[0] : NULL;
562 foreach ($nodes as $node) {
563 if ($parentNode !== $node->parentNode) {
578 // filter by child node
579 if ($options['child']) {
580 $childNodes = self::findNodes($dom, $options['child']);
581 $childNodes = !empty($childNodes) ? $childNodes : array();
583 foreach ($nodes as $node) {
584 foreach ($node->childNodes as $child) {
585 foreach ($childNodes as $childNode) {
586 if ($childNode === $child) {
601 // filter by ancestor
602 if ($options['ancestor']) {
603 $ancestorNodes = self::findNodes($dom, $options['ancestor']);
604 $ancestorNode = isset($ancestorNodes[0]) ? $ancestorNodes[0] : NULL;
606 foreach ($nodes as $node) {
607 $parent = $node->parentNode;
609 while ($parent->nodeType != XML_HTML_DOCUMENT_NODE) {
610 if ($parent === $ancestorNode) {
614 $parent = $parent->parentNode;
626 // filter by descendant
627 if ($options['descendant']) {
628 $descendantNodes = self::findNodes($dom, $options['descendant']);
629 $descendantNodes = !empty($descendantNodes) ? $descendantNodes : array();
631 foreach ($nodes as $node) {
632 foreach (self::getDescendants($node) as $descendant) {
633 foreach ($descendantNodes as $descendantNode) {
634 if ($descendantNode === $descendant) {
649 // filter by children
650 if ($options['children']) {
651 $validChild = array('count', 'greater_than', 'less_than', 'only');
652 $childOptions = self::assertValidKeys($options['children'], $validChild);
654 foreach ($nodes as $node) {
655 $childNodes = $node->childNodes;
657 foreach ($childNodes as $childNode) {
658 if ($childNode->nodeType !== XML_CDATA_SECTION_NODE &&
659 $childNode->nodeType !== XML_TEXT_NODE) {
660 $children[] = $childNode;
664 // we must have children to pass this filter
665 if (!empty($children)) {
666 // exact count of children
667 if ($childOptions['count'] !== NULL) {
668 if (count($children) !== $childOptions['count']) {
673 // range count of children
674 else if ($childOptions['less_than'] !== NULL &&
675 $childOptions['greater_than'] !== NULL) {
676 if (count($children) >= $childOptions['less_than'] ||
677 count($children) <= $childOptions['greater_than']) {
682 // less than a given count
683 else if ($childOptions['less_than'] !== NULL) {
684 if (count($children) >= $childOptions['less_than']) {
689 // more than a given count
690 else if ($childOptions['greater_than'] !== NULL) {
691 if (count($children) <= $childOptions['greater_than']) {
696 // match each child against a specific tag
697 if ($childOptions['only']) {
698 $onlyNodes = self::findNodes($dom, $childOptions['only']);
700 // try to match each child to one of the 'only' nodes
701 foreach ($children as $child) {
704 foreach ($onlyNodes as $onlyNode) {
705 if ($onlyNode === $child) {
728 // return the first node that matches all criteria
729 return !empty($nodes) ? $nodes : array();
733 * Recursively get flat array of all descendants of this node.
735 * @param DOMNode $node
737 * @since Method available since Release 3.3.0
738 * @author Mike Naberezny <mike@maintainable.com>
739 * @author Derek DeVries <derek@maintainable.com>
741 protected static function getDescendants(DOMNode $node)
743 $allChildren = array();
744 $childNodes = $node->childNodes ? $node->childNodes : array();
746 foreach ($childNodes as $child) {
747 if ($child->nodeType === XML_CDATA_SECTION_NODE ||
748 $child->nodeType === XML_TEXT_NODE) {
752 $children = self::getDescendants($child);
753 $allChildren = array_merge($allChildren, $children, array($child));
756 return isset($allChildren) ? $allChildren : array();
760 * Get the text value of this node's child text node.
762 * @param DOMNode $node
764 * @since Method available since Release 3.3.0
765 * @author Mike Naberezny <mike@maintainable.com>
766 * @author Derek DeVries <derek@maintainable.com>
768 protected static function getNodeText(DOMNode $node)
770 $childNodes = $node->childNodes instanceof DOMNodeList ? $node->childNodes : array();
773 foreach ($childNodes as $child) {
774 if ($child->nodeType === XML_TEXT_NODE) {
775 $text .= trim($child->data).' ';
777 $text .= self::getNodeText($child);
781 return str_replace(' ', ' ', $text);