1 // Copyright 2013 the V8 project authors. All rights reserved. 2 // Redistribution and use in source and binary forms, with or without 3 // modification, are permitted provided that the following conditions are 4 // met: 5 // 6 // * Redistributions of source code must retain the above copyright 7 // notice, this list of conditions and the following disclaimer. 8 // * Redistributions in binary form must reproduce the above 9 // copyright notice, this list of conditions and the following 10 // disclaimer in the documentation and/or other materials provided 11 // with the distribution. 12 // * Neither the name of Google Inc. nor the names of its 13 // contributors may be used to endorse or promote products derived 14 // from this software without specific prior written permission. 15 // 16 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 17 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 18 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 19 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 20 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 21 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 22 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 23 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 24 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 25 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 26 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 27 28 // Tests the new String.prototype.normalize method. 29 30 assertEquals(String.prototype.normalize.length, 0); 31 assertEquals(String.prototype.propertyIsEnumerable("normalize"), false); 32 33 // Common use case when searching for 'not very exact' match. 34 // These are examples of data one might encounter in real use. 35 var testRealUseCases = function() { 36 // Vietnamese legacy text, old Windows 9x / non-Unicode applications use 37 // windows-1258 code page, which is neither precomposed, nor decomposed. 38 assertEquals('ti\u00ea\u0301ng Vi\u00ea\u0323t'.normalize('NFKD'), 39 'ti\u1ebfng Vi\u1ec7t'.normalize('NFKD')); // all precomposed 40 41 // Various kinds of spaces 42 assertEquals('Google\u0020Maps'.normalize('NFKD'), // normal space 43 'Google\u00a0Maps'.normalize('NFKD')); // non-breaking space 44 assertEquals('Google\u0020Maps'.normalize('NFKD'), // normal space 45 'Google\u2002Maps'.normalize('NFKD')); // en-space 46 assertEquals('Google\u0020Maps'.normalize('NFKD'), // normal space 47 'Google\u2003Maps'.normalize('NFKD')); // em-space 48 assertEquals('Google\u0020Maps'.normalize('NFKD'), // normal space 49 'Google\u3000Maps'.normalize('NFKC')); // ideographic space 50 51 // Latin small ligature "fi" 52 assertEquals('fi'.normalize('NFKD'), '\ufb01'.normalize('NFKD')); 53 54 // , Latin small L with middle dot, used in Catalan and often represented 55 // as decomposed for non-Unicode environments ( l + ) 56 assertEquals('l\u00b7'.normalize('NFKD'), '\u0140'.normalize('NFKD')); 57 58 // Legacy text, Japanese narrow Kana (MS-DOS & Win 3.x time) 59 assertEquals('\u30d1\u30bd\u30b3\u30f3'.normalize('NFKD'), // : wide 60 '\uff8a\uff9f\uff7f\uff7a\uff9d'.normalize('NFKD')); // : narrow 61 // Also for Japanese, Latin fullwidth forms vs. ASCII 62 assertEquals('ABCD'.normalize('NFKD'), 63 '\uff21\uff22\uff23\uff24'.normalize('NFKD')); // , fullwidth 64 }(); 65 66 67 var testEdgeCases = function() { 68 // Make sure we throw RangeError, as the standard requires. 69 assertThrows('"".normalize(1234)', RangeError); 70 assertThrows('"".normalize("BAD")', RangeError); 71 72 // The standard does not say what kind of exceptions we should throw, so we 73 // will not be specific. But we still test that we throw errors. 74 assertThrows('s.normalize()'); // s is not defined 75 assertThrows('var s = null; s.normalize()'); 76 assertThrows('var s = undefined; s.normalize()'); 77 assertThrows('var s = 1234; s.normalize()'); // no normalize for non-strings 78 }(); 79 80 81 // Several kinds of mappings. No need to be comprehensive, we don't test 82 // the ICU functionality, we only test C - JavaScript 'glue' 83 var testData = [ 84 // org, default, NFC, NFD, NKFC, NKFD 85 ['\u00c7', // : Combining sequence, Latin 1 86 '\u00c7', '\u0043\u0327', 87 '\u00c7', '\u0043\u0327'], 88 ['\u0218', // : Combining sequence, non-Latin 1 89 '\u0218', '\u0053\u0326', 90 '\u0218', '\u0053\u0326'], 91 ['\uac00', // : Hangul 92 '\uac00', '\u1100\u1161', 93 '\uac00', '\u1100\u1161'], 94 ['\uff76', // : Narrow Kana 95 '\uff76', '\uff76', 96 '\u30ab', '\u30ab'], 97 ['\u00bc', // : Fractions 98 '\u00bc', '\u00bc', 99 '\u0031\u2044\u0034', '\u0031\u2044\u0034'], 100 ['\u01c6', // : Latin ligature 101 '\u01c6', '\u01c6', 102 '\u0064\u017e', '\u0064\u007a\u030c'], 103 ['s\u0307\u0323', // s + dot above + dot below, ordering of combining marks 104 '\u1e69', 's\u0323\u0307', 105 '\u1e69', 's\u0323\u0307'], 106 ['\u3300', // : Squared characters 107 '\u3300', '\u3300', 108 '\u30a2\u30d1\u30fc\u30c8', // 109 '\u30a2\u30cf\u309a\u30fc\u30c8'], // 110 ['\ufe37', // : Vertical forms 111 '\ufe37', '\ufe37', 112 '{' , '{'], 113 ['\u2079', // : superscript 9 114 '\u2079', '\u2079', 115 '9', '9'], 116 ['\ufee5\ufee6\ufee7\ufee8', // Arabic forms 117 '\ufee5\ufee6\ufee7\ufee8', '\ufee5\ufee6\ufee7\ufee8', 118 '\u0646\u0646\u0646\u0646', '\u0646\u0646\u0646\u0646'], 119 ['\u2460', // : Circled 120 '\u2460', '\u2460', 121 '1', '1'], 122 ['\u210c', // : Font variants 123 '\u210c', '\u210c', 124 'H', 'H'], 125 ['\u2126', // : Singleton, OHM sign vs. Greek capital letter OMEGA 126 '\u03a9', '\u03a9', 127 '\u03a9', '\u03a9'], 128 ['\ufdfb', // Long ligature, ARABIC LIGATURE JALLAJALALOUHOU 129 '\ufdfb', '\ufdfb', 130 '\u062C\u0644\u0020\u062C\u0644\u0627\u0644\u0647', 131 '\u062C\u0644\u0020\u062C\u0644\u0627\u0644\u0647'] 132 ]; 133 134 var testArray = function() { 135 var kNFC = 1, kNFD = 2, kNFKC = 3, kNFKD = 4; 136 for (var i = 0; i < testData.length; ++i) { 137 // the original, NFC and NFD should normalize to the same thing 138 for (var column = 0; column < 3; ++column) { 139 var str = testData[i][column]; 140 assertEquals(str.normalize(), testData[i][kNFC]); // defaults to NFC 141 assertEquals(str.normalize('NFC'), testData[i][kNFC]); 142 assertEquals(str.normalize('NFD'), testData[i][kNFD]); 143 assertEquals(str.normalize('NFKC'), testData[i][kNFKC]); 144 assertEquals(str.normalize('NFKD'), testData[i][kNFKD]); 145 } 146 } 147 }(); 148