]> CyberLeo.Net >> Repos - Github/sugarcrm.git/blob - include/javascript/tiny_mce/classes/html/SaxParser.js
Release 6.2.3
[Github/sugarcrm.git] / include / javascript / tiny_mce / classes / html / SaxParser.js
1 /**
2  * SaxParser.js
3  *
4  * Copyright 2010, Moxiecode Systems AB
5  * Released under LGPL License.
6  *
7  * License: http://tinymce.moxiecode.com/license
8  * Contributing: http://tinymce.moxiecode.com/contributing
9  */
10
11 (function(tinymce) {
12         /**
13          * This class parses HTML code using pure JavaScript and executes various events for each item it finds. It will
14          * always execute the events in the right order for tag soup code like <b><p></b></p>. It will also remove elements
15          * and attributes that doesn't fit the schema if the validate setting is enabled.
16          *
17          * @example
18          * var parser = new tinymce.html.SaxParser({
19          *     validate: true,
20          *
21          *     comment: function(text) {
22          *         console.log('Comment:', text);
23          *     },
24          *
25          *     cdata: function(text) {
26          *         console.log('CDATA:', text);
27          *     },
28          *
29          *     text: function(text, raw) {
30          *         console.log('Text:', text, 'Raw:', raw);
31          *     },
32          *
33          *     start: function(name, attrs, empty) {
34          *         console.log('Start:', name, attrs, empty);
35          *     },
36          *
37          *     end: function(name) {
38          *         console.log('End:', name);
39          *     },
40          *
41          *     pi: function(name, text) {
42          *         console.log('PI:', name, text);
43          *     },
44          *
45          *     doctype: function(text) {
46          *         console.log('DocType:', text);
47          *     }
48          * }, schema);
49          * @class tinymce.html.SaxParser
50          * @version 3.4
51          */
52
53         /**
54          * Constructs a new SaxParser instance.
55          *
56          * @constructor
57          * @method SaxParser
58          * @param {Object} settings Name/value collection of settings. comment, cdata, text, start and end are callbacks.
59          * @param {tinymce.html.Schema} schema HTML Schema class to use when parsing.
60          */
61         tinymce.html.SaxParser = function(settings, schema) {
62                 var self = this, noop = function() {};
63
64                 settings = settings || {};
65                 self.schema = schema = schema || new tinymce.html.Schema();
66
67                 if (settings.fix_self_closing !== false)
68                         settings.fix_self_closing = true;
69
70                 // Add handler functions from settings and setup default handlers
71                 tinymce.each('comment cdata text start end pi doctype'.split(' '), function(name) {
72                         if (name)
73                                 self[name] = settings[name] || noop;
74                 });
75
76                 /**
77                  * Parses the specified HTML string and executes the callbacks for each item it finds.
78                  *
79                  * @example
80                  * new SaxParser({...}).parse('<b>text</b>');
81                  * @method parse
82                  * @param {String} html Html string to sax parse.
83                  */
84                 self.parse = function(html) {
85                         var self = this, matches, index = 0, value, endRegExp, stack = [], attrList, i, text, name, isInternalElement, removeInternalElements,
86                                 shortEndedElements, fillAttrsMap, isShortEnded, validate, elementRule, isValidElement, attr, attribsValue,
87                                 validAttributesMap, validAttributePatterns, attributesRequired, attributesDefault, attributesForced, selfClosing,
88                                 tokenRegExp, attrRegExp, specialElements, attrValue, idCount = 0, decode = tinymce.html.Entities.decode, fixSelfClosing;
89
90                         function processEndTag(name) {
91                                 var pos, i;
92
93                                 // Find position of parent of the same type
94                                 pos = stack.length;
95                                 while (pos--) {
96                                         if (stack[pos].name === name)
97                                                 break;                                          
98                                 }
99
100                                 // Found parent
101                                 if (pos >= 0) {
102                                         // Close all the open elements
103                                         for (i = stack.length - 1; i >= pos; i--) {
104                                                 name = stack[i];
105
106                                                 if (name.valid)
107                                                         self.end(name.name);
108                                         }
109
110                                         // Remove the open elements from the stack
111                                         stack.length = pos;
112                                 }
113                         };
114
115                         // Precompile RegExps and map objects
116                         tokenRegExp = new RegExp('<(?:' +
117                                 '(?:!--([\\w\\W]*?)-->)|' + // Comment
118                                 '(?:!\\[CDATA\\[([\\w\\W]*?)\\]\\]>)|' + // CDATA
119                                 '(?:!DOCTYPE([\\w\\W]*?)>)|' + // DOCTYPE
120                                 '(?:\\?([^\\s\\/<>]+) ?([\\w\\W]*?)[?/]>)|' + // PI
121                                 '(?:\\/([^>]+)>)|' + // End element
122                                 '(?:([^\\s\\/<>]+)\\s*((?:[^"\'>]+(?:(?:"[^"]*")|(?:\'[^\']*\')|[^>]*))*)>)' + // Start element
123                         ')', 'g');
124
125                         attrRegExp = /([\w:\-]+)(?:\s*=\s*(?:(?:\"((?:\\.|[^\"])*)\")|(?:\'((?:\\.|[^\'])*)\')|([^>\s]+)))?/g;
126                         specialElements = {
127                                 'script' : /<\/script[^>]*>/gi,
128                                 'style' : /<\/style[^>]*>/gi,
129                                 'noscript' : /<\/noscript[^>]*>/gi
130                         };
131
132                         // Setup lookup tables for empty elements and boolean attributes
133                         shortEndedElements = schema.getShortEndedElements();
134                         selfClosing = schema.getSelfClosingElements();
135                         fillAttrsMap = schema.getBoolAttrs();
136                         validate = settings.validate;
137                         removeInternalElements = settings.remove_internals;
138                         fixSelfClosing = settings.fix_self_closing;
139
140                         while (matches = tokenRegExp.exec(html)) {
141                                 // Text
142                                 if (index < matches.index)
143                                         self.text(decode(html.substr(index, matches.index - index)));
144
145                                 if (value = matches[6]) { // End element
146                                         processEndTag(value.toLowerCase());
147                                 } else if (value = matches[7]) { // Start element
148                                         value = value.toLowerCase();
149                                         isShortEnded = value in shortEndedElements;
150
151                                         // Is self closing tag for example an <li> after an open <li>
152                                         if (fixSelfClosing && selfClosing[value] && stack.length > 0 && stack[stack.length - 1].name === value)
153                                                 processEndTag(value);
154
155                                         // Validate element
156                                         if (!validate || (elementRule = schema.getElementRule(value))) {
157                                                 isValidElement = true;
158
159                                                 // Grab attributes map and patters when validation is enabled
160                                                 if (validate) {
161                                                         validAttributesMap = elementRule.attributes;
162                                                         validAttributePatterns = elementRule.attributePatterns;
163                                                 }
164
165                                                 // Parse attributes
166                                                 if (attribsValue = matches[8]) {
167                                                         isInternalElement = attribsValue.indexOf('data-mce-type') !== -1; // Check if the element is an internal element
168
169                                                         // If the element has internal attributes then remove it if we are told to do so
170                                                         if (isInternalElement && removeInternalElements)
171                                                                 isValidElement = false;
172
173                                                         attrList = [];
174                                                         attrList.map = {};
175
176                                                         attribsValue.replace(attrRegExp, function(match, name, value, val2, val3) {
177                                                                 var attrRule, i;
178
179                                                                 name = name.toLowerCase();
180                                                                 value = name in fillAttrsMap ? name : decode(value || val2 || val3 || ''); // Handle boolean attribute than value attribute
181
182                                                                 // Validate name and value
183                                                                 if (validate && !isInternalElement && name.indexOf('data-') !== 0) {
184                                                                         attrRule = validAttributesMap[name];
185
186                                                                         // Find rule by pattern matching
187                                                                         if (!attrRule && validAttributePatterns) {
188                                                                                 i = validAttributePatterns.length;
189                                                                                 while (i--) {
190                                                                                         attrRule = validAttributePatterns[i];
191                                                                                         if (attrRule.pattern.test(name))
192                                                                                                 break;
193                                                                                 }
194
195                                                                                 // No rule matched
196                                                                                 if (i === -1)
197                                                                                         attrRule = null;
198                                                                         }
199
200                                                                         // No attribute rule found
201                                                                         if (!attrRule)
202                                                                                 return;
203
204                                                                         // Validate value
205                                                                         if (attrRule.validValues && !(value in attrRule.validValues))
206                                                                                 return;
207                                                                 }
208
209                                                                 // Add attribute to list and map
210                                                                 attrList.map[name] = value;
211                                                                 attrList.push({
212                                                                         name: name,
213                                                                         value: value
214                                                                 });
215                                                         });
216                                                 } else {
217                                                         attrList = [];
218                                                         attrList.map = {};
219                                                 }
220
221                                                 // Process attributes if validation is enabled
222                                                 if (validate && !isInternalElement) {
223                                                         attributesRequired = elementRule.attributesRequired;
224                                                         attributesDefault = elementRule.attributesDefault;
225                                                         attributesForced = elementRule.attributesForced;
226
227                                                         // Handle forced attributes
228                                                         if (attributesForced) {
229                                                                 i = attributesForced.length;
230                                                                 while (i--) {
231                                                                         attr = attributesForced[i];
232                                                                         name = attr.name;
233                                                                         attrValue = attr.value;
234
235                                                                         if (attrValue === '{$uid}')
236                                                                                 attrValue = 'mce_' + idCount++;
237
238                                                                         attrList.map[name] = attrValue;
239                                                                         attrList.push({name: name, value: attrValue});
240                                                                 }
241                                                         }
242
243                                                         // Handle default attributes
244                                                         if (attributesDefault) {
245                                                                 i = attributesDefault.length;
246                                                                 while (i--) {
247                                                                         attr = attributesDefault[i];
248                                                                         name = attr.name;
249
250                                                                         if (!(name in attrList.map)) {
251                                                                                 attrValue = attr.value;
252
253                                                                                 if (attrValue === '{$uid}')
254                                                                                         attrValue = 'mce_' + idCount++;
255
256                                                                                 attrList.map[name] = attrValue;
257                                                                                 attrList.push({name: name, value: attrValue});
258                                                                         }
259                                                                 }
260                                                         }
261
262                                                         // Handle required attributes
263                                                         if (attributesRequired) {
264                                                                 i = attributesRequired.length;
265                                                                 while (i--) {
266                                                                         if (attributesRequired[i] in attrList.map)
267                                                                                 break;
268                                                                 }
269
270                                                                 // None of the required attributes where found
271                                                                 if (i === -1)
272                                                                         isValidElement = false;
273                                                         }
274
275                                                         // Invalidate element if it's marked as bogus
276                                                         if (attrList.map['data-mce-bogus'])
277                                                                 isValidElement = false;
278                                                 }
279
280                                                 if (isValidElement)
281                                                         self.start(value, attrList, isShortEnded);
282                                         } else
283                                                 isValidElement = false;
284
285                                         // Treat script, noscript and style a bit different since they may include code that looks like elements
286                                         if (endRegExp = specialElements[value]) {
287                                                 endRegExp.lastIndex = index = matches.index + matches[0].length;
288
289                                                 if (matches = endRegExp.exec(html)) {
290                                                         if (isValidElement)
291                                                                 text = html.substr(index, matches.index - index);
292
293                                                         index = matches.index + matches[0].length;
294                                                 } else {
295                                                         text = html.substr(index);
296                                                         index = html.length;
297                                                 }
298
299                                                 if (isValidElement && text.length > 0)
300                                                         self.text(text, true);
301
302                                                 if (isValidElement)
303                                                         self.end(value);
304
305                                                 tokenRegExp.lastIndex = index;
306                                                 continue;
307                                         }
308
309                                         // Push value on to stack
310                                         if (!isShortEnded) {
311                                                 if (!attribsValue || attribsValue.indexOf('/') != attribsValue.length - 1)
312                                                         stack.push({name: value, valid: isValidElement});
313                                                 else if (isValidElement)
314                                                         self.end(value);
315                                         }
316                                 } else if (value = matches[1]) { // Comment
317                                         self.comment(value);
318                                 } else if (value = matches[2]) { // CDATA
319                                         self.cdata(value);
320                                 } else if (value = matches[3]) { // DOCTYPE
321                                         self.doctype(value);
322                                 } else if (value = matches[4]) { // PI
323                                         self.pi(value, matches[5]);
324                                 }
325
326                                 index = matches.index + matches[0].length;
327                         }
328
329                         // Text
330                         if (index < html.length)
331                                 self.text(decode(html.substr(index)));
332
333                         // Close any open elements
334                         for (i = stack.length - 1; i >= 0; i--) {
335                                 value = stack[i];
336
337                                 if (value.valid)
338                                         self.end(value.name);
339                         }
340                 };
341         }
342 })(tinymce);