2 Copyright (c) 2010, Yahoo! Inc. All rights reserved.
3 Code licensed under the BSD License:
4 http://developer.yahoo.com/yui/license.html
8 YUI.add('text-accentfold', function(Y) {
18 * Provides a basic accent folding implementation that converts common accented
19 * letters (like "á") to their non-accented forms (like "a").
22 * @submodule text-accentfold
27 * Provides a basic accent folding implementation that converts common accented
28 * letters (like "á") to their non-accented forms (like "a").
32 * This implementation is not comprehensive, and should only be used as a last
33 * resort when accent folding can't be done on the server. A comprehensive
34 * accent folding implementation would require much more character data to be
35 * sent to the browser, resulting in a significant performance penalty. This
36 * implementation strives for a compromise between usefulness and performance.
40 * Accent folding is a destructive operation that can't be reversed, and may
41 * change or destroy the actual meaning of the text depending on the language.
42 * It should not be used on strings that will later be displayed to a user,
43 * unless this is done with the understanding that linguistic meaning may be
44 * lost and that you may in fact confuse or insult the user by doing so.
48 * When used for matching, accent folding is likely to produce erroneous matches
49 * for languages in which characters with diacritics are considered different
50 * from their base characters, or where correct folding would map to other
51 * character sequences than just stripped characters. For example, in German
52 * "ü" is a character that's clearly different from "u" and should match "ue"
53 * instead. The word "betrügen" means "to defraud", while "betrugen" is the past
54 * tense of "to behave". The name "Müller" is expected to match "Mueller", but
55 * not "Muller". On the other hand, accent folding falls short for languages
56 * where different base characters are expected to match. In Japanese, for
57 * example, hiragana and katakana characters with the same pronunciation ("あ"
58 * and "ア") are commonly treated as equivalent for lookups, but accent folding
59 * treats them as different.
62 * @class Text.AccentFold
68 FoldData = Text.Data.AccentFold,
71 // -- Public Static Methods ------------------------------------------------
74 * Returns <code>true</code> if the specified string contains one or more
75 * characters that can be folded, <code>false</code> otherwise.
78 * @param {String} string String to test.
82 canFold: function (string) {
85 for (letter in FoldData) {
86 if (FoldData.hasOwnProperty(letter) &&
87 string.search(FoldData[letter]) !== -1) {
96 * Compares the accent-folded versions of two strings and returns
97 * <code>true</code> if they're the same, <code>false</code> otherwise. If
98 * a custom comparison function is supplied, the accent-folded strings will
99 * be passed to that function for comparison.
102 * @param {String} a First string to compare.
103 * @param {String} b Second string to compare.
104 * @param {Function} func (optional) Custom comparison function. Should
105 * return a truthy or falsy value.
106 * @return {Boolean} Results of the comparison.
109 compare: function (a, b, func) {
110 var aFolded = AccentFold.fold(a),
111 bFolded = AccentFold.fold(b);
113 return func ? !!func(aFolded, bFolded) : aFolded === bFolded;
118 * Returns a copy of <em>haystack</em> containing only the strings for which
119 * the supplied function returns <code>true</code>.
123 * While comparisons will be made using accent-folded strings, the returned
124 * array of matches will contain the original strings that were passed in.
128 * @param {Array} haystack Array of strings to filter.
129 * @param {Function} func Comparison function. Will receive an accent-folded
130 * haystack string as an argument, and should return a truthy or falsy
132 * @return {Array} Filtered copy of <em>haystack</em>.
135 filter: function (haystack, func) {
136 return YArray.filter(haystack, function (item) {
137 return func(AccentFold.fold(item));
142 * Accent-folds the specified string or array of strings and returns a copy
143 * in which common accented letters have been converted to their closest
144 * non-accented, lowercase forms.
147 * @param {String|Array} input String or array of strings to be folded.
148 * @return {String|Array} Folded string or array of strings.
151 fold: function (input) {
152 if (Y.Lang.isArray(input)) {
153 return YArray.map(input, AccentFold.fold);
156 input = input.toLowerCase();
158 Y.Object.each(FoldData, function (regex, letter) {
159 input = input.replace(regex, letter);
166 Text.AccentFold = AccentFold;
169 }, '3.3.0' ,{requires:['array-extras', 'text-data-accentfold']});
170 YUI.add('text-data-accentfold', function(Y) {
172 // The following tool was very helpful in creating these mappings:
173 // http://unicode.org/cldr/utility/list-unicodeset.jsp?a=[:toNFKD%3D/^a/:]&abb=on
175 Y.namespace('Text.Data').AccentFold = {
186 a: /[ªà-åāăąǎǟǡǻȁȃȧᵃḁẚạảấầẩẫậắằẳẵặⓐa]/gi,
190 e: /[è-ëēĕėęěȅȇȩᵉḕḗḙḛḝẹẻẽếềểễệₑℯⓔe]/gi,
192 g: /[ĝğġģǧǵᵍḡℊⓖg]/gi,
193 h: /[ĥȟʰḣḥḧḩḫẖℎⓗh]/gi,
194 i: /[ì-ïĩīĭįijǐȉȋᵢḭḯỉịⁱℹⅰⓘi]/gi,
197 l: /[ĺļľŀljˡḷḹḻḽℓⅼⓛl]/gi,
199 n: /[ñńņňǹṅṇṉṋⁿⓝn]/gi,
200 o: /[ºò-öōŏőơǒǫǭȍȏȫȭȯȱᵒṍṏṑṓọỏốồổỗộớờởỡợₒℴⓞo]/gi,
203 r: /[ŕŗřȑȓʳᵣṙṛṝṟⓡr]/gi,
204 s: /[śŝşšſșˢṡṣṥṧṩẛⓢs]/gi,
205 t: /[ţťțᵗṫṭṯṱẗⓣt]/gi,
206 u: /[ù-üũūŭůűųưǔǖǘǚǜȕȗᵘᵤṳṵṷṹṻụủứừửữựⓤu]/gi,
210 y: /[ýÿŷȳʸẏẙỳỵỷỹⓨy]/gi,
216 YUI.add('text-data-wordbreak', function(Y) {
218 Y.namespace('Text.Data').WordBreak = {
219 // The UnicodeSet utility is helpful for enumerating the specific code
220 // points covered by each of these regular expressions:
221 // http://unicode.org/cldr/utility/list-unicodeset.jsp
223 // The code sets from which these regexes were derived can be generated
224 // by the UnicodeSet utility using the links here:
225 // http://unicode.org/cldr/utility/properties.jsp?a=Word_Break#Word_Break
227 aletter : '[A-Za-zªµºÀ-ÖØ-öø-ˁˆ-ˑˠ-ˤˬˮͰ-ʹͶͷͺ-ͽΆΈ-ΊΌΎ-ΡΣ-ϵϷ-ҁҊ-ԧԱ-Ֆՙա-ևא-תװ-׳ؠ-يٮٯٱ-ۓەۥۦۮۯۺ-ۼۿܐܒ-ܯݍ-ޥޱߊ-ߪߴߵߺࠀ-ࠕࠚࠤࠨࡀ-ࡘऄ-हऽॐक़-ॡॱ-ॷॹ-ॿঅ-ঌএঐও-নপ-রলশ-হঽৎড়ঢ়য়-ৡৰৱਅ-ਊਏਐਓ-ਨਪ-ਰਲਲ਼ਵਸ਼ਸਹਖ਼-ੜਫ਼ੲ-ੴઅ-ઍએ-ઑઓ-નપ-રલળવ-હઽૐૠૡଅ-ଌଏଐଓ-ନପ-ରଲଳଵ-ହଽଡ଼ଢ଼ୟ-ୡୱஃஅ-ஊஎ-ஐஒ-கஙசஜஞடணதந-பம-ஹௐఅ-ఌఎ-ఐఒ-నప-ళవ-హఽౘౙౠౡಅ-ಌಎ-ಐಒ-ನಪ-ಳವ-ಹಽೞೠೡೱೲഅ-ഌഎ-ഐഒ-ഺഽൎൠൡൺ-ൿඅ-ඖක-නඳ-රලව-ෆༀཀ-ཇཉ-ཬྈ-ྌႠ-Ⴥა-ჺჼᄀ-ቈቊ-ቍቐ-ቖቘቚ-ቝበ-ኈኊ-ኍነ-ኰኲ-ኵኸ-ኾዀዂ-ዅወ-ዖዘ-ጐጒ-ጕጘ-ፚᎀ-ᎏᎠ-Ᏼᐁ-ᙬᙯ-ᙿᚁ-ᚚᚠ-ᛪᛮ-ᛰᜀ-ᜌᜎ-ᜑᜠ-ᜱᝀ-ᝑᝠ-ᝬᝮ-ᝰᠠ-ᡷᢀ-ᢨᢪᢰ-ᣵᤀ-ᤜᨀ-ᨖᬅ-ᬳᭅ-ᭋᮃ-ᮠᮮᮯᯀ-ᯥᰀ-ᰣᱍ-ᱏᱚ-ᱽᳩ-ᳬᳮ-ᳱᴀ-ᶿḀ-ἕἘ-Ἕἠ-ὅὈ-Ὅὐ-ὗὙὛὝὟ-ώᾀ-ᾴᾶ-ᾼιῂ-ῄῆ-ῌῐ-ΐῖ-Ίῠ-Ῥῲ-ῴῶ-ῼⁱⁿₐ-ₜℂℇℊ-ℓℕℙ-ℝℤΩℨK-ℭℯ-ℹℼ-ℿⅅ-ⅉⅎⅠ-ↈⒶ-ⓩⰀ-Ⱞⰰ-ⱞⱠ-ⳤⳫ-ⳮⴀ-ⴥⴰ-ⵥⵯⶀ-ⶖⶠ-ⶦⶨ-ⶮⶰ-ⶶⶸ-ⶾⷀ-ⷆⷈ-ⷎⷐ-ⷖⷘ-ⷞⸯ々〻〼ㄅ-ㄭㄱ-ㆎㆠ-ㆺꀀ-ꒌꓐ-ꓽꔀ-ꘌꘐ-ꘟꘪꘫꙀ-ꙮꙿ-ꚗꚠ-ꛯꜗ-ꜟꜢ-ꞈꞋ-ꞎꞐꞑꞠ-ꞩꟺ-ꠁꠃ-ꠅꠇ-ꠊꠌ-ꠢꡀ-ꡳꢂ-ꢳꣲ-ꣷꣻꤊ-ꤥꤰ-ꥆꥠ-ꥼꦄ-ꦲꧏꨀ-ꨨꩀ-ꩂꩄ-ꩋꬁ-ꬆꬉ-ꬎꬑ-ꬖꬠ-ꬦꬨ-ꬮꯀ-ꯢ가-힣ힰ-ퟆퟋ-ퟻff-stﬓ-ﬗיִײַ-ﬨשׁ-זּטּ-לּמּנּסּףּפּצּ-ﮱﯓ-ﴽﵐ-ﶏﶒ-ﷇﷰ-ﷻﹰ-ﹴﹶ-ﻼA-Za-zᅠ-하-ᅦᅧ-ᅬᅭ-ᅲᅳ-ᅵ]',
228 midnumlet : "['\\.‘’․﹒'.]",
229 midletter : '[:··״‧︓﹕:]',
230 midnum : '[,;;։،؍٬߸⁄︐︔﹐﹔,;]',
231 numeric : '[0-9٠-٩٫۰-۹߀-߉०-९০-৯੦-੯૦-૯୦-୯௦-௯౦-౯೦-೯൦-൯๐-๙໐-໙༠-༩၀-၉႐-႙០-៩᠐-᠙᥆-᥏᧐-᧙᪀-᪉᪐-᪙᭐-᭙᮰-᮹᱀-᱉᱐-᱙꘠-꘩꣐-꣙꤀-꤉꧐-꧙꩐-꩙꯰-꯹]',
234 newline : '[\u000B\u000C\u0085\u2028\u2029]',
235 extend : '[\u0300-\u036F\u0483-\u0489\u0591-\u05BD\u05BF\u05C1\u05C2\u05C4\u05C5\u05C7\u0610-\u061A\u064B-\u065F\u0670\u06D6-\u06DC\u06DF-\u06E4\u06E7\u06E8\u06EA-\u06ED\u0711\u0730-\u074A\u07A6-\u07B0\u07EB-\u07F3\u0816-\u0819\u081B-\u0823\u0825-\u0827\u0829-\u082D\u0859-\u085B\u0900-\u0903\u093A-\u093C\u093E-\u094F\u0951-\u0957\u0962\u0963\u0981-\u0983\u09BC\u09BE-\u09C4\u09C7\u09C8\u09CB-\u09CD\u09D7\u09E2\u09E3\u0A01-\u0A03\u0A3C\u0A3E-\u0A42\u0A47\u0A48\u0A4B-\u0A4D\u0A51\u0A70\u0A71\u0A75\u0A81-\u0A83\u0ABC\u0ABE-\u0AC5\u0AC7-\u0AC9\u0ACB-\u0ACD\u0AE2\u0AE3\u0B01-\u0B03\u0B3C\u0B3E-\u0B44\u0B47\u0B48\u0B4B-\u0B4D\u0B56\u0B57\u0B62\u0B63\u0B82\u0BBE-\u0BC2\u0BC6-\u0BC8\u0BCA-\u0BCD\u0BD7\u0C01-\u0C03\u0C3E-\u0C44\u0C46-\u0C48\u0C4A-\u0C4D\u0C55\u0C56\u0C62\u0C63\u0C82\u0C83\u0CBC\u0CBE-\u0CC4\u0CC6-\u0CC8\u0CCA-\u0CCD\u0CD5\u0CD6\u0CE2\u0CE3\u0D02\u0D03\u0D3E-\u0D44\u0D46-\u0D48\u0D4A-\u0D4D\u0D57\u0D62\u0D63\u0D82\u0D83\u0DCA\u0DCF-\u0DD4\u0DD6\u0DD8-\u0DDF\u0DF2\u0DF3\u0E31\u0E34-\u0E3A\u0E47-\u0E4E\u0EB1\u0EB4-\u0EB9\u0EBB\u0EBC\u0EC8-\u0ECD\u0F18\u0F19\u0F35\u0F37\u0F39\u0F3E\u0F3F\u0F71-\u0F84\u0F86\u0F87\u0F8D-\u0F97\u0F99-\u0FBC\u0FC6\u102B-\u103E\u1056-\u1059\u105E-\u1060\u1062-\u1064\u1067-\u106D\u1071-\u1074\u1082-\u108D\u108F\u109A-\u109D\u135D-\u135F\u1712-\u1714\u1732-\u1734\u1752\u1753\u1772\u1773\u17B6-\u17D3\u17DD\u180B-\u180D\u18A9\u1920-\u192B\u1930-\u193B\u19B0-\u19C0\u19C8\u19C9\u1A17-\u1A1B\u1A55-\u1A5E\u1A60-\u1A7C\u1A7F\u1B00-\u1B04\u1B34-\u1B44\u1B6B-\u1B73\u1B80-\u1B82\u1BA1-\u1BAA\u1BE6-\u1BF3\u1C24-\u1C37\u1CD0-\u1CD2\u1CD4-\u1CE8\u1CED\u1CF2\u1DC0-\u1DE6\u1DFC-\u1DFF\u200C\u200D\u20D0-\u20F0\u2CEF-\u2CF1\u2D7F\u2DE0-\u2DFF\u302A-\u302F\u3099\u309A\uA66F-\uA672\uA67C\uA67D\uA6F0\uA6F1\uA802\uA806\uA80B\uA823-\uA827\uA880\uA881\uA8B4-\uA8C4\uA8E0-\uA8F1\uA926-\uA92D\uA947-\uA953\uA980-\uA983\uA9B3-\uA9C0\uAA29-\uAA36\uAA43\uAA4C\uAA4D\uAA7B\uAAB0\uAAB2-\uAAB4\uAAB7\uAAB8\uAABE\uAABF\uAAC1\uABE3-\uABEA\uABEC\uABED\uFB1E\uFE00-\uFE0F\uFE20-\uFE26\uFF9E\uFF9F]',
236 format : '[\u00AD\u0600-\u0603\u06DD\u070F\u17B4\u17B5\u200E\u200F\u202A-\u202E\u2060-\u2064\u206A-\u206F\uFEFF\uFFF9-\uFFFB]',
237 katakana : '[〱-〵゛゜゠-ヺー-ヿㇰ-ㇿ㋐-㋾㌀-㍗ヲ-ン]',
238 extendnumlet: '[_‿⁀⁔︳︴﹍-﹏_]',
239 punctuation : '[!-#%-*,-\\/:;?@\\[-\\]_{}¡«·»¿;·՚-՟։֊־׀׃׆׳״؉؊،؍؛؞؟٪-٭۔܀-܍߷-߹࠰-࠾࡞।॥॰෴๏๚๛༄-༒༺-༽྅࿐-࿔࿙࿚၊-၏჻፡-፨᐀᙭᙮᚛᚜᛫-᛭᜵᜶។-៖៘-៚᠀-᠊᥄᥅᨞᨟᪠-᪦᪨-᪭᭚-᭠᯼-᯿᰻-᰿᱾᱿᳓‐-‧‰-⁃⁅-⁑⁓-⁞⁽⁾₍₎〈〉❨-❵⟅⟆⟦-⟯⦃-⦘⧘-⧛⧼⧽⳹-⳼⳾⳿⵰⸀-⸮⸰⸱、-〃〈-】〔-〟〰〽゠・꓾꓿꘍-꘏꙳꙾꛲-꛷꡴-꡷꣎꣏꣸-꣺꤮꤯꥟꧁-꧍꧞꧟꩜-꩟꫞꫟꯫﴾﴿︐-︙︰-﹒﹔-﹡﹣﹨﹪﹫!-#%-*,-/:;?@[-]_{}⦅-・]'
244 YUI.add('text-wordbreak', function(Y) {
247 * Provides utility methods for splitting strings on word breaks and determining
248 * whether a character index represents a word boundary.
251 * @submodule text-wordbreak
256 * Provides utility methods for splitting strings on word breaks and determining
257 * whether a character index represents a word boundary, using the generic word
258 * breaking algorithm defined in the Unicode Text Segmentation guidelines
259 * (<a href="http://unicode.org/reports/tr29/#Word_Boundaries">Unicode Standard
264 * This algorithm provides a reasonable default for many languages. However, it
265 * does not cover language or context specific requirements, and it does not
266 * provide meaningful results at all for languages that don't use spaces between
267 * words, such as Chinese, Japanese, Thai, Lao, Khmer, and others. Server-based
268 * word breaking services usually provide significantly better results with
269 * better performance.
272 * @class Text.WordBreak
277 WBData = Text.Data.WordBreak,
279 // Constants representing code point classifications.
294 // RegExp objects generated from code point data. Each regex matches a single
295 // character against a set of Unicode code points. The index of each item in
296 // this array must match its corresponding code point constant value defined
299 new RegExp(WBData.aletter),
300 new RegExp(WBData.midnumlet),
301 new RegExp(WBData.midletter),
302 new RegExp(WBData.midnum),
303 new RegExp(WBData.numeric),
304 new RegExp(WBData.cr),
305 new RegExp(WBData.lf),
306 new RegExp(WBData.newline),
307 new RegExp(WBData.extend),
308 new RegExp(WBData.format),
309 new RegExp(WBData.katakana),
310 new RegExp(WBData.extendnumlet)
314 PUNCTUATION = new RegExp('^' + WBData.punctuation + '$'),
318 // -- Public Static Methods ------------------------------------------------
321 * Splits the specified string into an array of individual words.
324 * @param {String} string String to split.
325 * @param {Object} options (optional) Options object containing zero or more
326 * of the following properties:
329 * <dt>ignoreCase (Boolean)</dt>
331 * If <code>true</code>, the string will be converted to lowercase
332 * before being split. Default is <code>false</code>.
335 * <dt>includePunctuation (Boolean)</dt>
337 * If <code>true</code>, the returned array will include punctuation
338 * characters. Default is <code>false</code>.
341 * <dt>includeWhitespace (Boolean)</dt>
343 * If <code>true</code>, the returned array will include whitespace
344 * characters. Default is <code>false</code>.
347 * @return {Array} Array of words.
350 getWords: function (string, options) {
352 map = WordBreak._classify(string),
364 if (options.ignoreCase) {
365 string = string.toLowerCase();
368 includePunctuation = options.includePunctuation;
369 includeWhitespace = options.includeWhitespace;
371 // Loop through each character in the classification map and determine
372 // whether it precedes a word boundary, building an array of distinct
374 for (; i < len; ++i) {
375 chr = string.charAt(i);
377 // Append this character to the current word.
380 // If there's a word boundary between the current character and the
381 // next character, append the current word to the words array and
382 // start building a new word.
383 if (WordBreak._isWordBoundary(map, i)) {
384 word = word.join(EMPTY_STRING);
387 (includeWhitespace || !WHITESPACE.test(word)) &&
388 (includePunctuation || !PUNCTUATION.test(word))) {
400 * Returns an array containing only unique words from the specified string.
401 * For example, the string <code>'foo bar baz foo'</code> would result in
402 * the array <code>['foo', 'bar', 'baz']</code>.
404 * @method getUniqueWords
405 * @param {String} string String to split.
406 * @param {Object} options (optional) Options (see <code>getWords()</code>
408 * @return {Array} Array of unique words.
411 getUniqueWords: function (string, options) {
412 return Y.Array.unique(WordBreak.getWords(string, options));
417 * Returns <code>true</code> if there is a word boundary between the
418 * specified character index and the next character index (or the end of the
423 * Note that there are always word breaks at the beginning and end of a
424 * string, so <code>isWordBoundary('', 0)</code> and
425 * <code>isWordBoundary('a', 0)</code> will both return <code>true</code>.
428 * @method isWordBoundary
429 * @param {String} string String to test.
430 * @param {Number} index Character index to test within the string.
431 * @return {Boolean} <code>true</code> for a word boundary,
432 * <code>false</code> otherwise.
435 isWordBoundary: function (string, index) {
436 return WordBreak._isWordBoundary(WordBreak._classify(string), index);
439 // -- Protected Static Methods ---------------------------------------------
442 * Returns a character classification map for the specified string.
445 * @param {String} string String to classify.
446 * @return {Array} Classification map.
450 _classify: function (string) {
456 stringLength = string.length,
457 setsLength = SETS.length,
460 for (; i < stringLength; ++i) {
461 chr = string.charAt(i);
464 for (j = 0; j < setsLength; ++j) {
467 if (set && set.test(chr)) {
481 * Returns <code>true</code> if there is a word boundary between the
482 * specified character index and the next character index (or the end of the
487 * Note that there are always word breaks at the beginning and end of a
488 * string, so <code>_isWordBoundary('', 0)</code> and
489 * <code>_isWordBoundary('a', 0)</code> will both return <code>true</code>.
492 * @method _isWordBoundary
493 * @param {Array} map Character classification map generated by
494 * <code>_classify</code>.
495 * @param {Number} index Character index to test.
500 _isWordBoundary: function (map, index) {
503 nextType = map[index + 1],
506 if (index < 0 || (index > map.length - 1 && index !== 0)) {
510 // WB5. Don't break between most letters.
511 if (type === ALETTER && nextType === ALETTER) {
515 nextNextType = map[index + 2];
517 // WB6. Don't break letters across certain punctuation.
518 if (type === ALETTER &&
519 (nextType === MIDLETTER || nextType === MIDNUMLET) &&
520 nextNextType === ALETTER) {
524 prevType = map[index - 1];
526 // WB7. Don't break letters across certain punctuation.
527 if ((type === MIDLETTER || type === MIDNUMLET) &&
528 nextType === ALETTER &&
529 prevType === ALETTER) {
533 // WB8/WB9/WB10. Don't break inside sequences of digits or digits
534 // adjacent to letters.
535 if ((type === NUMERIC || type === ALETTER) &&
536 (nextType === NUMERIC || nextType === ALETTER)) {
540 // WB11. Don't break inside numeric sequences like "3.2" or
542 if ((type === MIDNUM || type === MIDNUMLET) &&
543 nextType === NUMERIC &&
544 prevType === NUMERIC) {
548 // WB12. Don't break inside numeric sequences like "3.2" or
550 if (type === NUMERIC &&
551 (nextType === MIDNUM || nextType === MIDNUMLET) &&
552 nextNextType === NUMERIC) {
556 // WB4. Ignore format and extend characters.
557 if (type === EXTEND || type === FORMAT ||
558 prevType === EXTEND || prevType === FORMAT ||
559 nextType === EXTEND || nextType === FORMAT) {
563 // WB3. Don't break inside CRLF.
564 if (type === CR && nextType === LF) {
568 // WB3a. Break before newlines (including CR and LF).
569 if (type === NEWLINE || type === CR || type === LF) {
573 // WB3b. Break after newlines (including CR and LF).
574 if (nextType === NEWLINE || nextType === CR || nextType === LF) {
578 // WB13. Don't break between Katakana characters.
579 if (type === KATAKANA && nextType === KATAKANA) {
583 // WB13a. Don't break from extenders.
584 if (nextType === EXTENDNUMLET &&
585 (type === ALETTER || type === NUMERIC || type === KATAKANA ||
586 type === EXTENDNUMLET)) {
590 // WB13b. Don't break from extenders.
591 if (type === EXTENDNUMLET &&
592 (nextType === ALETTER || nextType === NUMERIC ||
593 nextType === KATAKANA)) {
597 // Break after any character not covered by the rules above.
602 Text.WordBreak = WordBreak;
605 }, '3.3.0' ,{requires:['array-extras', 'text-data-wordbreak']});
608 YUI.add('text', function(Y){}, '3.3.0' ,{use:['text-accentfold', 'text-wordbreak']});