4 * Copyright 2010, Moxiecode Systems AB
5 * Released under LGPL License.
7 * License: http://tinymce.moxiecode.com/license
8 * Contributing: http://tinymce.moxiecode.com/contributing
12 var Node = tinymce.html.Node;
15 * This class parses HTML code into a DOM like structure of nodes it will remove redundant whitespace and make
16 * sure that the node tree is valid according to the specified schema. So for example: <p>a<p>b</p>c</p> will become <p>a</p><p>b</p><p>c</p>
19 * var parser = new tinymce.html.DomParser({validate: true}, schema);
20 * var rootNode = parser.parse('<h1>content</h1>');
22 * @class tinymce.html.DomParser
27 * Constructs a new DomParser instance.
31 * @param {Object} settings Name/value collection of settings. comment, cdata, text, start and end are callbacks.
32 * @param {tinymce.html.Schema} schema HTML Schema class to use when parsing.
34 tinymce.html.DomParser = function(settings, schema) {
35 var self = this, nodeFilters = {}, attributeFilters = [], matchedNodes = {}, matchedAttributes = {};
37 settings = settings || {};
38 settings.validate = "validate" in settings ? settings.validate : true;
39 settings.root_name = settings.root_name || 'body';
40 self.schema = schema = schema || new tinymce.html.Schema();
42 function fixInvalidChildren(nodes) {
43 var ni, node, parent, parents, newParent, currentNode, tempNode, childNode, i,
44 childClone, nonEmptyElements, nonSplitableElements, sibling, nextNode;
46 nonSplitableElements = tinymce.makeMap('tr,td,th,tbody,thead,tfoot,table');
47 nonEmptyElements = schema.getNonEmptyElements();
49 for (ni = 0; ni < nodes.length; ni++) {
56 // Get list of all parent nodes until we find a valid parent to stick the child into
58 for (parent = node.parent; parent && !schema.isValidChild(parent.name, node.name) && !nonSplitableElements[parent.name]; parent = parent.parent)
61 // Found a suitable parent
62 if (parent && parents.length > 1) {
63 // Reverse the array since it makes looping easier
66 // Clone the related parent and insert that after the moved node
67 newParent = currentNode = self.filterNode(parents[0].clone());
69 // Start cloning and moving children on the left side of the target node
70 for (i = 0; i < parents.length - 1; i++) {
71 if (schema.isValidChild(currentNode.name, parents[i].name)) {
72 tempNode = self.filterNode(parents[i].clone());
73 currentNode.append(tempNode);
75 tempNode = currentNode;
77 for (childNode = parents[i].firstChild; childNode && childNode != parents[i + 1]; ) {
78 nextNode = childNode.next;
79 tempNode.append(childNode);
83 currentNode = tempNode;
86 if (!newParent.isEmpty(nonEmptyElements)) {
87 parent.insert(newParent, parents[0], true);
88 parent.insert(node, newParent);
90 parent.insert(node, parents[0], true);
93 // Check if the element is empty by looking through it's contents and special treatment for <p><br /></p>
95 if (parent.isEmpty(nonEmptyElements) || parent.firstChild === parent.lastChild && parent.firstChild.name === 'br') {
96 parent.empty().remove();
98 } else if (node.parent) {
99 // If it's an LI try to find a UL/OL for it or wrap it
100 if (node.name === 'li') {
102 if (sibling && (sibling.name === 'ul' || sibling.name === 'ul')) {
103 sibling.append(node);
108 if (sibling && (sibling.name === 'ul' || sibling.name === 'ul')) {
109 sibling.insert(node, sibling.firstChild, true);
113 node.wrap(self.filterNode(new Node('ul', 1)));
117 // Try wrapping the element in a DIV
118 if (schema.isValidChild(node.parent.name, 'div') && schema.isValidChild('div', node.name)) {
119 node.wrap(self.filterNode(new Node('div', 1)));
121 // We failed wrapping it, then remove or unwrap it
122 if (node.name === 'style' || node.name === 'script')
123 node.empty().remove();
132 * Runs the specified node though the element and attributes filters.
134 * @param {tinymce.html.Node} Node the node to run filters on.
135 * @return {tinymce.html.Node} The passed in node.
137 self.filterNode = function(node) {
140 // Run element filters
141 if (name in nodeFilters) {
142 list = matchedNodes[name];
147 matchedNodes[name] = [node];
150 // Run attribute filters
151 i = attributeFilters.length;
153 name = attributeFilters[i].name;
155 if (name in node.attributes.map) {
156 list = matchedAttributes[name];
161 matchedAttributes[name] = [node];
169 * Adds a node filter function to the parser, the parser will collect the specified nodes by name
170 * and then execute the callback ones it has finished parsing the document.
173 * parser.addNodeFilter('p,h1', function(nodes, name) {
174 * for (var i = 0; i < nodes.length; i++) {
175 * console.log(nodes[i].name);
178 * @method addNodeFilter
179 * @method {String} name Comma separated list of nodes to collect.
180 * @param {function} callback Callback function to execute once it has collected nodes.
182 self.addNodeFilter = function(name, callback) {
183 tinymce.each(tinymce.explode(name), function(name) {
184 var list = nodeFilters[name];
187 nodeFilters[name] = list = [];
194 * Adds a attribute filter function to the parser, the parser will collect nodes that has the specified attributes
195 * and then execute the callback ones it has finished parsing the document.
198 * parser.addAttributeFilter('src,href', function(nodes, name) {
199 * for (var i = 0; i < nodes.length; i++) {
200 * console.log(nodes[i].name);
203 * @method addAttributeFilter
204 * @method {String} name Comma separated list of nodes to collect.
205 * @param {function} callback Callback function to execute once it has collected nodes.
207 self.addAttributeFilter = function(name, callback) {
208 tinymce.each(tinymce.explode(name), function(name) {
211 for (i = 0; i < attributeFilters.length; i++) {
212 if (attributeFilters[i].name === name) {
213 attributeFilters[i].callbacks.push(callback);
218 attributeFilters.push({name: name, callbacks: [callback]});
223 * Parses the specified HTML string into a DOM like node tree and returns the result.
226 * var rootNode = new DomParser({...}).parse('<b>text</b>');
228 * @param {String} html Html string to sax parse.
229 * @param {Object} args Optional args object that gets passed to all filter functions.
230 * @return {tinymce.html.Node} Root node containing the tree.
232 self.parse = function(html, args) {
233 var parser, rootNode, node, nodes, i, l, fi, fl, list, name, validate,
234 blockElements, startWhiteSpaceRegExp, invalidChildren = [],
235 endWhiteSpaceRegExp, allWhiteSpaceRegExp, whiteSpaceElements, children, nonEmptyElements, rootBlockName;
239 matchedAttributes = {};
240 blockElements = tinymce.extend(tinymce.makeMap('script,style,head,html,body,title,meta,param'), schema.getBlockElements());
241 nonEmptyElements = schema.getNonEmptyElements();
242 children = schema.children;
243 validate = settings.validate;
244 rootBlockName = "forced_root_block" in args ? args.forced_root_block : settings.forced_root_block;
246 whiteSpaceElements = schema.getWhiteSpaceElements();
247 startWhiteSpaceRegExp = /^[ \t\r\n]+/;
248 endWhiteSpaceRegExp = /[ \t\r\n]+$/;
249 allWhiteSpaceRegExp = /[ \t\r\n]+/g;
251 function addRootBlocks() {
252 var node = rootNode.firstChild, next, rootBlockNode;
257 if (node.type == 3 || (node.type == 1 && node.name !== 'p' && !blockElements[node.name] && !node.attr('data-mce-type'))) {
258 if (!rootBlockNode) {
259 // Create a new root block element
260 rootBlockNode = createNode(rootBlockName, 1);
261 rootNode.insert(rootBlockNode, node);
262 rootBlockNode.append(node);
264 rootBlockNode.append(node);
266 rootBlockNode = null;
273 function createNode(name, type) {
274 var node = new Node(name, type), list;
276 if (name in nodeFilters) {
277 list = matchedNodes[name];
282 matchedNodes[name] = [node];
288 function removeWhitespaceBefore(node) {
289 var textNode, textVal, sibling;
291 for (textNode = node.prev; textNode && textNode.type === 3; ) {
292 textVal = textNode.value.replace(endWhiteSpaceRegExp, '');
294 if (textVal.length > 0) {
295 textNode.value = textVal;
296 textNode = textNode.prev;
298 sibling = textNode.prev;
305 parser = new tinymce.html.SaxParser({
307 fix_self_closing : !validate, // Let the DOM parser handle <li> in <li> or <p> in <p> for better results
309 cdata: function(text) {
310 node.append(createNode('#cdata', 4)).value = text;
313 text: function(text, raw) {
316 // Trim all redundant whitespace on non white space elements
317 if (!whiteSpaceElements[node.name]) {
318 text = text.replace(allWhiteSpaceRegExp, ' ');
320 if (node.lastChild && blockElements[node.lastChild.name])
321 text = text.replace(startWhiteSpaceRegExp, '');
324 // Do we need to create the node
325 if (text.length !== 0) {
326 textNode = createNode('#text', 3);
327 textNode.raw = !!raw;
328 node.append(textNode).value = text;
332 comment: function(text) {
333 node.append(createNode('#comment', 8)).value = text;
336 pi: function(name, text) {
337 node.append(createNode(name, 7)).value = text;
338 removeWhitespaceBefore(node);
341 doctype: function(text) {
344 newNode = node.append(createNode('#doctype', 10));
345 newNode.value = text;
346 removeWhitespaceBefore(node);
349 start: function(name, attrs, empty) {
350 var newNode, attrFiltersLen, elementRule, textNode, attrName, text, sibling, parent;
352 elementRule = validate ? schema.getElementRule(name) : {};
354 newNode = createNode(elementRule.outputName || name, 1);
355 newNode.attributes = attrs;
356 newNode.shortEnded = empty;
358 node.append(newNode);
360 // Check if node is valid child of the parent node is the child is
361 // unknown we don't collect it since it's probably a custom element
362 parent = children[node.name];
363 if (parent && children[newNode.name] && !parent[newNode.name])
364 invalidChildren.push(newNode);
366 attrFiltersLen = attributeFilters.length;
367 while (attrFiltersLen--) {
368 attrName = attributeFilters[attrFiltersLen].name;
370 if (attrName in attrs.map) {
371 list = matchedAttributes[attrName];
376 matchedAttributes[attrName] = [newNode];
380 // Trim whitespace before block
381 if (blockElements[name])
382 removeWhitespaceBefore(newNode);
384 // Change current node if the element wasn't empty i.e not <br /> or <img />
390 end: function(name) {
391 var textNode, elementRule, text, sibling, tempNode;
393 elementRule = validate ? schema.getElementRule(name) : {};
395 if (blockElements[name]) {
396 if (!whiteSpaceElements[node.name]) {
397 // Trim whitespace at beginning of block
398 for (textNode = node.firstChild; textNode && textNode.type === 3; ) {
399 text = textNode.value.replace(startWhiteSpaceRegExp, '');
401 if (text.length > 0) {
402 textNode.value = text;
403 textNode = textNode.next;
405 sibling = textNode.next;
411 // Trim whitespace at end of block
412 for (textNode = node.lastChild; textNode && textNode.type === 3; ) {
413 text = textNode.value.replace(endWhiteSpaceRegExp, '');
415 if (text.length > 0) {
416 textNode.value = text;
417 textNode = textNode.prev;
419 sibling = textNode.prev;
426 // Trim start white space
427 textNode = node.prev;
428 if (textNode && textNode.type === 3) {
429 text = textNode.value.replace(startWhiteSpaceRegExp, '');
432 textNode.value = text;
438 // Handle empty nodes
439 if (elementRule.removeEmpty || elementRule.paddEmpty) {
440 if (node.isEmpty(nonEmptyElements)) {
441 if (elementRule.paddEmpty)
442 node.empty().append(new Node('#text', '3')).value = '\u00a0';
444 // Leave nodes that have a name like <a name="name">
445 if (!node.attributes.map.name) {
446 tempNode = node.parent;
447 node.empty().remove();
460 rootNode = node = new Node(args.context || settings.root_name, 11);
464 // Fix invalid children or report invalid children in a contextual parsing
465 if (validate && invalidChildren.length) {
467 fixInvalidChildren(invalidChildren);
472 // Wrap nodes in the root into block elements if the root is body
473 if (rootBlockName && rootNode.name == 'body')
476 // Run filters only when the contents is valid
479 for (name in matchedNodes) {
480 list = nodeFilters[name];
481 nodes = matchedNodes[name];
483 // Remove already removed children
486 if (!nodes[fi].parent)
490 for (i = 0, l = list.length; i < l; i++)
491 list[i](nodes, name, args);
494 // Run attribute filters
495 for (i = 0, l = attributeFilters.length; i < l; i++) {
496 list = attributeFilters[i];
498 if (list.name in matchedAttributes) {
499 nodes = matchedAttributes[list.name];
501 // Remove already removed children
504 if (!nodes[fi].parent)
508 for (fi = 0, fl = list.callbacks.length; fi < fl; fi++)
509 list.callbacks[fi](nodes, list.name, args);
517 // Remove <br> at end of block elements Gecko and WebKit injects BR elements to
518 // make it possible to place the caret inside empty blocks. This logic tries to remove
519 // these elements and keep br elements that where intended to be there intact
520 if (settings.remove_trailing_brs) {
521 self.addNodeFilter('br', function(nodes, name) {
522 var i, l = nodes.length, node, blockElements = schema.getBlockElements(),
523 nonEmptyElements = schema.getNonEmptyElements(), parent, prev, prevName;
525 // Remove brs from body element as well
526 blockElements.body = 1;
528 // Must loop forwards since it will otherwise remove all brs in <p>a<br><br><br></p>
529 for (i = 0; i < l; i++) {
531 parent = node.parent;
533 if (blockElements[node.parent.name] && node === parent.lastChild) {
534 // Loop all nodes to the right of the current node and check for other BR elements
535 // excluding bookmarks since they are invisible
538 prevName = prev.name;
541 if (prevName !== "span" || prev.attr('data-mce-type') !== 'bookmark') {
542 // Found a non BR element
543 if (prevName !== "br")
546 // Found another br it's a <br><br> structure then don't remove anything
547 if (prevName === 'br') {
559 // Is the parent to be considered empty after we removed the BR
560 if (parent.isEmpty(nonEmptyElements)) {
561 elementRule = schema.getElementRule(parent.name);
563 // Remove or padd the element depending on schema rule
564 if (elementRule.removeEmpty)
566 else if (elementRule.paddEmpty)
567 parent.empty().append(new tinymce.html.Node('#text', 3)).value = '\u00a0';