]> CyberLeo.Net >> Repos - Github/sugarcrm.git/blob - include/javascript/tiny_mce/classes/html/DomParser.js
Release 6.5.0
[Github/sugarcrm.git] / include / javascript / tiny_mce / classes / html / DomParser.js
1 /**
2  * DomParser.js
3  *
4  * Copyright 2010, Moxiecode Systems AB
5  * Released under LGPL License.
6  *
7  * License: http://tinymce.moxiecode.com/license
8  * Contributing: http://tinymce.moxiecode.com/contributing
9  */
10
11 (function(tinymce) {
12         var Node = tinymce.html.Node;
13
14         /**
15          * This class parses HTML code into a DOM like structure of nodes it will remove redundant whitespace and make
16          * sure that the node tree is valid according to the specified schema. So for example: <p>a<p>b</p>c</p> will become <p>a</p><p>b</p><p>c</p>
17          *
18          * @example
19          * var parser = new tinymce.html.DomParser({validate: true}, schema);
20          * var rootNode = parser.parse('<h1>content</h1>');
21          *
22          * @class tinymce.html.DomParser
23          * @version 3.4
24          */
25
26         /**
27          * Constructs a new DomParser instance.
28          *
29          * @constructor
30          * @method DomParser
31          * @param {Object} settings Name/value collection of settings. comment, cdata, text, start and end are callbacks.
32          * @param {tinymce.html.Schema} schema HTML Schema class to use when parsing.
33          */
34         tinymce.html.DomParser = function(settings, schema) {
35                 var self = this, nodeFilters = {}, attributeFilters = [], matchedNodes = {}, matchedAttributes = {};
36
37                 settings = settings || {};
38                 settings.validate = "validate" in settings ? settings.validate : true;
39                 settings.root_name = settings.root_name || 'body';
40                 self.schema = schema = schema || new tinymce.html.Schema();
41
42                 function fixInvalidChildren(nodes) {
43                         var ni, node, parent, parents, newParent, currentNode, tempNode, childNode, i,
44                                 childClone, nonEmptyElements, nonSplitableElements, sibling, nextNode;
45
46                         nonSplitableElements = tinymce.makeMap('tr,td,th,tbody,thead,tfoot,table');
47                         nonEmptyElements = schema.getNonEmptyElements();
48
49                         for (ni = 0; ni < nodes.length; ni++) {
50                                 node = nodes[ni];
51
52                                 // Already removed
53                                 if (!node.parent)
54                                         continue;
55
56                                 // Get list of all parent nodes until we find a valid parent to stick the child into
57                                 parents = [node];
58                                 for (parent = node.parent; parent && !schema.isValidChild(parent.name, node.name) && !nonSplitableElements[parent.name]; parent = parent.parent)
59                                         parents.push(parent);
60
61                                 // Found a suitable parent
62                                 if (parent && parents.length > 1) {
63                                         // Reverse the array since it makes looping easier
64                                         parents.reverse();
65
66                                         // Clone the related parent and insert that after the moved node
67                                         newParent = currentNode = self.filterNode(parents[0].clone());
68
69                                         // Start cloning and moving children on the left side of the target node
70                                         for (i = 0; i < parents.length - 1; i++) {
71                                                 if (schema.isValidChild(currentNode.name, parents[i].name)) {
72                                                         tempNode = self.filterNode(parents[i].clone());
73                                                         currentNode.append(tempNode);
74                                                 } else
75                                                         tempNode = currentNode;
76
77                                                 for (childNode = parents[i].firstChild; childNode && childNode != parents[i + 1]; ) {
78                                                         nextNode = childNode.next;
79                                                         tempNode.append(childNode);
80                                                         childNode = nextNode;
81                                                 }
82
83                                                 currentNode = tempNode;
84                                         }
85
86                                         if (!newParent.isEmpty(nonEmptyElements)) {
87                                                 parent.insert(newParent, parents[0], true);
88                                                 parent.insert(node, newParent);
89                                         } else {
90                                                 parent.insert(node, parents[0], true);
91                                         }
92
93                                         // Check if the element is empty by looking through it's contents and special treatment for <p><br /></p>
94                                         parent = parents[0];
95                                         if (parent.isEmpty(nonEmptyElements) || parent.firstChild === parent.lastChild && parent.firstChild.name === 'br') {
96                                                 parent.empty().remove();
97                                         }
98                                 } else if (node.parent) {
99                                         // If it's an LI try to find a UL/OL for it or wrap it
100                                         if (node.name === 'li') {
101                                                 sibling = node.prev;
102                                                 if (sibling && (sibling.name === 'ul' || sibling.name === 'ul')) {
103                                                         sibling.append(node);
104                                                         continue;
105                                                 }
106
107                                                 sibling = node.next;
108                                                 if (sibling && (sibling.name === 'ul' || sibling.name === 'ul')) {
109                                                         sibling.insert(node, sibling.firstChild, true);
110                                                         continue;
111                                                 }
112
113                                                 node.wrap(self.filterNode(new Node('ul', 1)));
114                                                 continue;
115                                         }
116
117                                         // Try wrapping the element in a DIV
118                                         if (schema.isValidChild(node.parent.name, 'div') && schema.isValidChild('div', node.name)) {
119                                                 node.wrap(self.filterNode(new Node('div', 1)));
120                                         } else {
121                                                 // We failed wrapping it, then remove or unwrap it
122                                                 if (node.name === 'style' || node.name === 'script')
123                                                         node.empty().remove();
124                                                 else
125                                                         node.unwrap();
126                                         }
127                                 }
128                         }
129                 };
130
131                 /**
132                  * Runs the specified node though the element and attributes filters.
133                  *
134                  * @param {tinymce.html.Node} Node the node to run filters on.
135                  * @return {tinymce.html.Node} The passed in node.
136                  */
137                 self.filterNode = function(node) {
138                         var i, name, list;
139
140                         // Run element filters
141                         if (name in nodeFilters) {
142                                 list = matchedNodes[name];
143
144                                 if (list)
145                                         list.push(node);
146                                 else
147                                         matchedNodes[name] = [node];
148                         }
149
150                         // Run attribute filters
151                         i = attributeFilters.length;
152                         while (i--) {
153                                 name = attributeFilters[i].name;
154
155                                 if (name in node.attributes.map) {
156                                         list = matchedAttributes[name];
157
158                                         if (list)
159                                                 list.push(node);
160                                         else
161                                                 matchedAttributes[name] = [node];
162                                 }
163                         }
164
165                         return node;
166                 };
167
168                 /**
169                  * Adds a node filter function to the parser, the parser will collect the specified nodes by name
170                  * and then execute the callback ones it has finished parsing the document.
171                  *
172                  * @example
173                  * parser.addNodeFilter('p,h1', function(nodes, name) {
174                  *              for (var i = 0; i < nodes.length; i++) {
175                  *                      console.log(nodes[i].name);
176                  *              }
177                  * });
178                  * @method addNodeFilter
179                  * @method {String} name Comma separated list of nodes to collect.
180                  * @param {function} callback Callback function to execute once it has collected nodes.
181                  */
182                 self.addNodeFilter = function(name, callback) {
183                         tinymce.each(tinymce.explode(name), function(name) {
184                                 var list = nodeFilters[name];
185
186                                 if (!list)
187                                         nodeFilters[name] = list = [];
188
189                                 list.push(callback);
190                         });
191                 };
192
193                 /**
194                  * Adds a attribute filter function to the parser, the parser will collect nodes that has the specified attributes
195                  * and then execute the callback ones it has finished parsing the document.
196                  *
197                  * @example
198                  * parser.addAttributeFilter('src,href', function(nodes, name) {
199                  *              for (var i = 0; i < nodes.length; i++) {
200                  *                      console.log(nodes[i].name);
201                  *              }
202                  * });
203                  * @method addAttributeFilter
204                  * @method {String} name Comma separated list of nodes to collect.
205                  * @param {function} callback Callback function to execute once it has collected nodes.
206                  */
207                 self.addAttributeFilter = function(name, callback) {
208                         tinymce.each(tinymce.explode(name), function(name) {
209                                 var i;
210
211                                 for (i = 0; i < attributeFilters.length; i++) {
212                                         if (attributeFilters[i].name === name) {
213                                                 attributeFilters[i].callbacks.push(callback);
214                                                 return;
215                                         }
216                                 }
217
218                                 attributeFilters.push({name: name, callbacks: [callback]});
219                         });
220                 };
221
222                 /**
223                  * Parses the specified HTML string into a DOM like node tree and returns the result.
224                  *
225                  * @example
226                  * var rootNode = new DomParser({...}).parse('<b>text</b>');
227                  * @method parse
228                  * @param {String} html Html string to sax parse.
229                  * @param {Object} args Optional args object that gets passed to all filter functions.
230                  * @return {tinymce.html.Node} Root node containing the tree.
231                  */
232                 self.parse = function(html, args) {
233                         var parser, rootNode, node, nodes, i, l, fi, fl, list, name, validate,
234                                 blockElements, startWhiteSpaceRegExp, invalidChildren = [],
235                                 endWhiteSpaceRegExp, allWhiteSpaceRegExp, whiteSpaceElements, children, nonEmptyElements, rootBlockName;
236
237                         args = args || {};
238                         matchedNodes = {};
239                         matchedAttributes = {};
240                         blockElements = tinymce.extend(tinymce.makeMap('script,style,head,html,body,title,meta,param'), schema.getBlockElements());
241                         nonEmptyElements = schema.getNonEmptyElements();
242                         children = schema.children;
243                         validate = settings.validate;
244                         rootBlockName = "forced_root_block" in args ? args.forced_root_block : settings.forced_root_block;
245
246                         whiteSpaceElements = schema.getWhiteSpaceElements();
247                         startWhiteSpaceRegExp = /^[ \t\r\n]+/;
248                         endWhiteSpaceRegExp = /[ \t\r\n]+$/;
249                         allWhiteSpaceRegExp = /[ \t\r\n]+/g;
250
251                         function addRootBlocks() {
252                                 var node = rootNode.firstChild, next, rootBlockNode;
253
254                                 while (node) {
255                                         next = node.next;
256
257                                         if (node.type == 3 || (node.type == 1 && node.name !== 'p' && !blockElements[node.name] && !node.attr('data-mce-type'))) {
258                                                 if (!rootBlockNode) {
259                                                         // Create a new root block element
260                                                         rootBlockNode = createNode(rootBlockName, 1);
261                                                         rootNode.insert(rootBlockNode, node);
262                                                         rootBlockNode.append(node);
263                                                 } else
264                                                         rootBlockNode.append(node);
265                                         } else {
266                                                 rootBlockNode = null;
267                                         }
268
269                                         node = next;
270                                 };
271                         };
272
273                         function createNode(name, type) {
274                                 var node = new Node(name, type), list;
275
276                                 if (name in nodeFilters) {
277                                         list = matchedNodes[name];
278
279                                         if (list)
280                                                 list.push(node);
281                                         else
282                                                 matchedNodes[name] = [node];
283                                 }
284
285                                 return node;
286                         };
287
288                         function removeWhitespaceBefore(node) {
289                                 var textNode, textVal, sibling;
290
291                                 for (textNode = node.prev; textNode && textNode.type === 3; ) {
292                                         textVal = textNode.value.replace(endWhiteSpaceRegExp, '');
293
294                                         if (textVal.length > 0) {
295                                                 textNode.value = textVal;
296                                                 textNode = textNode.prev;
297                                         } else {
298                                                 sibling = textNode.prev;
299                                                 textNode.remove();
300                                                 textNode = sibling;
301                                         }
302                                 }
303                         };
304
305                         parser = new tinymce.html.SaxParser({
306                                 validate : validate,
307                                 fix_self_closing : !validate, // Let the DOM parser handle <li> in <li> or <p> in <p> for better results
308
309                                 cdata: function(text) {
310                                         node.append(createNode('#cdata', 4)).value = text;
311                                 },
312
313                                 text: function(text, raw) {
314                                         var textNode;
315
316                                         // Trim all redundant whitespace on non white space elements
317                                         if (!whiteSpaceElements[node.name]) {
318                                                 text = text.replace(allWhiteSpaceRegExp, ' ');
319
320                                                 if (node.lastChild && blockElements[node.lastChild.name])
321                                                         text = text.replace(startWhiteSpaceRegExp, '');
322                                         }
323
324                                         // Do we need to create the node
325                                         if (text.length !== 0) {
326                                                 textNode = createNode('#text', 3);
327                                                 textNode.raw = !!raw;
328                                                 node.append(textNode).value = text;
329                                         }
330                                 },
331
332                                 comment: function(text) {
333                                         node.append(createNode('#comment', 8)).value = text;
334                                 },
335
336                                 pi: function(name, text) {
337                                         node.append(createNode(name, 7)).value = text;
338                                         removeWhitespaceBefore(node);
339                                 },
340
341                                 doctype: function(text) {
342                                         var newNode;
343                 
344                                         newNode = node.append(createNode('#doctype', 10));
345                                         newNode.value = text;
346                                         removeWhitespaceBefore(node);
347                                 },
348
349                                 start: function(name, attrs, empty) {
350                                         var newNode, attrFiltersLen, elementRule, textNode, attrName, text, sibling, parent;
351
352                                         elementRule = validate ? schema.getElementRule(name) : {};
353                                         if (elementRule) {
354                                                 newNode = createNode(elementRule.outputName || name, 1);
355                                                 newNode.attributes = attrs;
356                                                 newNode.shortEnded = empty;
357
358                                                 node.append(newNode);
359
360                                                 // Check if node is valid child of the parent node is the child is
361                                                 // unknown we don't collect it since it's probably a custom element
362                                                 parent = children[node.name];
363                                                 if (parent && children[newNode.name] && !parent[newNode.name])
364                                                         invalidChildren.push(newNode);
365
366                                                 attrFiltersLen = attributeFilters.length;
367                                                 while (attrFiltersLen--) {
368                                                         attrName = attributeFilters[attrFiltersLen].name;
369
370                                                         if (attrName in attrs.map) {
371                                                                 list = matchedAttributes[attrName];
372
373                                                                 if (list)
374                                                                         list.push(newNode);
375                                                                 else
376                                                                         matchedAttributes[attrName] = [newNode];
377                                                         }
378                                                 }
379
380                                                 // Trim whitespace before block
381                                                 if (blockElements[name])
382                                                         removeWhitespaceBefore(newNode);
383
384                                                 // Change current node if the element wasn't empty i.e not <br /> or <img />
385                                                 if (!empty)
386                                                         node = newNode;
387                                         }
388                                 },
389
390                                 end: function(name) {
391                                         var textNode, elementRule, text, sibling, tempNode;
392
393                                         elementRule = validate ? schema.getElementRule(name) : {};
394                                         if (elementRule) {
395                                                 if (blockElements[name]) {
396                                                         if (!whiteSpaceElements[node.name]) {
397                                                                 // Trim whitespace at beginning of block
398                                                                 for (textNode = node.firstChild; textNode && textNode.type === 3; ) {
399                                                                         text = textNode.value.replace(startWhiteSpaceRegExp, '');
400
401                                                                         if (text.length > 0) {
402                                                                                 textNode.value = text;
403                                                                                 textNode = textNode.next;
404                                                                         } else {
405                                                                                 sibling = textNode.next;
406                                                                                 textNode.remove();
407                                                                                 textNode = sibling;
408                                                                         }
409                                                                 }
410
411                                                                 // Trim whitespace at end of block
412                                                                 for (textNode = node.lastChild; textNode && textNode.type === 3; ) {
413                                                                         text = textNode.value.replace(endWhiteSpaceRegExp, '');
414
415                                                                         if (text.length > 0) {
416                                                                                 textNode.value = text;
417                                                                                 textNode = textNode.prev;
418                                                                         } else {
419                                                                                 sibling = textNode.prev;
420                                                                                 textNode.remove();
421                                                                                 textNode = sibling;
422                                                                         }
423                                                                 }
424                                                         }
425
426                                                         // Trim start white space
427                                                         textNode = node.prev;
428                                                         if (textNode && textNode.type === 3) {
429                                                                 text = textNode.value.replace(startWhiteSpaceRegExp, '');
430
431                                                                 if (text.length > 0)
432                                                                         textNode.value = text;
433                                                                 else
434                                                                         textNode.remove();
435                                                         }
436                                                 }
437
438                                                 // Handle empty nodes
439                                                 if (elementRule.removeEmpty || elementRule.paddEmpty) {
440                                                         if (node.isEmpty(nonEmptyElements)) {
441                                                                 if (elementRule.paddEmpty)
442                                                                         node.empty().append(new Node('#text', '3')).value = '\u00a0';
443                                                                 else {
444                                                                         // Leave nodes that have a name like <a name="name">
445                                                                         if (!node.attributes.map.name) {
446                                                                                 tempNode = node.parent;
447                                                                                 node.empty().remove();
448                                                                                 node = tempNode;
449                                                                                 return;
450                                                                         }
451                                                                 }
452                                                         }
453                                                 }
454
455                                                 node = node.parent;
456                                         }
457                                 }
458                         }, schema);
459
460                         rootNode = node = new Node(args.context || settings.root_name, 11);
461
462                         parser.parse(html);
463
464                         // Fix invalid children or report invalid children in a contextual parsing
465                         if (validate && invalidChildren.length) {
466                                 if (!args.context)
467                                         fixInvalidChildren(invalidChildren);
468                                 else
469                                         args.invalid = true;
470                         }
471
472                         // Wrap nodes in the root into block elements if the root is body
473                         if (rootBlockName && rootNode.name == 'body')
474                                 addRootBlocks();
475
476                         // Run filters only when the contents is valid
477                         if (!args.invalid) {
478                                 // Run node filters
479                                 for (name in matchedNodes) {
480                                         list = nodeFilters[name];
481                                         nodes = matchedNodes[name];
482
483                                         // Remove already removed children
484                                         fi = nodes.length;
485                                         while (fi--) {
486                                                 if (!nodes[fi].parent)
487                                                         nodes.splice(fi, 1);
488                                         }
489
490                                         for (i = 0, l = list.length; i < l; i++)
491                                                 list[i](nodes, name, args);
492                                 }
493
494                                 // Run attribute filters
495                                 for (i = 0, l = attributeFilters.length; i < l; i++) {
496                                         list = attributeFilters[i];
497
498                                         if (list.name in matchedAttributes) {
499                                                 nodes = matchedAttributes[list.name];
500
501                                                 // Remove already removed children
502                                                 fi = nodes.length;
503                                                 while (fi--) {
504                                                         if (!nodes[fi].parent)
505                                                                 nodes.splice(fi, 1);
506                                                 }
507
508                                                 for (fi = 0, fl = list.callbacks.length; fi < fl; fi++)
509                                                         list.callbacks[fi](nodes, list.name, args);
510                                         }
511                                 }
512                         }
513
514                         return rootNode;
515                 };
516
517                 // Remove <br> at end of block elements Gecko and WebKit injects BR elements to
518                 // make it possible to place the caret inside empty blocks. This logic tries to remove
519                 // these elements and keep br elements that where intended to be there intact
520                 if (settings.remove_trailing_brs) {
521                         self.addNodeFilter('br', function(nodes, name) {
522                                 var i, l = nodes.length, node, blockElements = schema.getBlockElements(),
523                                         nonEmptyElements = schema.getNonEmptyElements(), parent, prev, prevName;
524
525                                 // Remove brs from body element as well
526                                 blockElements.body = 1;
527
528                                 // Must loop forwards since it will otherwise remove all brs in <p>a<br><br><br></p>
529                                 for (i = 0; i < l; i++) {
530                                         node = nodes[i];
531                                         parent = node.parent;
532
533                                         if (blockElements[node.parent.name] && node === parent.lastChild) {
534                                                 // Loop all nodes to the right of the current node and check for other BR elements
535                                                 // excluding bookmarks since they are invisible
536                                                 prev = node.prev;
537                                                 while (prev) {
538                                                         prevName = prev.name;
539
540                                                         // Ignore bookmarks
541                                                         if (prevName !== "span" || prev.attr('data-mce-type') !== 'bookmark') {
542                                                                 // Found a non BR element
543                                                                 if (prevName !== "br")
544                                                                         break;
545         
546                                                                 // Found another br it's a <br><br> structure then don't remove anything
547                                                                 if (prevName === 'br') {
548                                                                         node = null;
549                                                                         break;
550                                                                 }
551                                                         }
552
553                                                         prev = prev.prev;
554                                                 }
555
556                                                 if (node) {
557                                                         node.remove();
558
559                                                         // Is the parent to be considered empty after we removed the BR
560                                                         if (parent.isEmpty(nonEmptyElements)) {
561                                                                 elementRule = schema.getElementRule(parent.name);
562
563                                                                 // Remove or padd the element depending on schema rule
564                                                                 if (elementRule.removeEmpty)
565                                                                         parent.remove();
566                                                                 else if (elementRule.paddEmpty) 
567                                                                         parent.empty().append(new tinymce.html.Node('#text', 3)).value = '\u00a0';
568                                                         }
569                                                 }
570                                         }
571                                 }
572                         });
573                 }
574         }
575 })(tinymce);