Home | History | Annotate | Download | only in string
      1 // Copyright 2013 the V8 project authors. All rights reserved.
      2 // Redistribution and use in source and binary forms, with or without
      3 // modification, are permitted provided that the following conditions are
      4 // met:
      5 //
      6 //     * Redistributions of source code must retain the above copyright
      7 //       notice, this list of conditions and the following disclaimer.
      8 //     * Redistributions in binary form must reproduce the above
      9 //       copyright notice, this list of conditions and the following
     10 //       disclaimer in the documentation and/or other materials provided
     11 //       with the distribution.
     12 //     * Neither the name of Google Inc. nor the names of its
     13 //       contributors may be used to endorse or promote products derived
     14 //       from this software without specific prior written permission.
     15 //
     16 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
     17 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
     18 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
     19 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
     20 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
     21 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
     22 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
     23 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
     24 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
     25 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
     26 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
     27 
     28 // Tests the new String.prototype.normalize method.
     29 
     30 assertEquals(String.prototype.normalize.length, 0);
     31 assertEquals(String.prototype.propertyIsEnumerable("normalize"), false);
     32 
     33 // Common use case when searching for 'not very exact' match.
     34 // These are examples of data one might encounter in real use.
     35 var testRealUseCases = function() {
     36   // Vietnamese legacy text, old Windows 9x / non-Unicode applications use
     37   // windows-1258 code page, which is neither precomposed, nor decomposed.
     38   assertEquals('ti\u00ea\u0301ng Vi\u00ea\u0323t'.normalize('NFKD'),
     39    'ti\u1ebfng Vi\u1ec7t'.normalize('NFKD')); // all precomposed
     40 
     41   // Various kinds of spaces
     42   assertEquals('Google\u0020Maps'.normalize('NFKD'), // normal space
     43     'Google\u00a0Maps'.normalize('NFKD')); // non-breaking space
     44   assertEquals('Google\u0020Maps'.normalize('NFKD'), // normal space
     45     'Google\u2002Maps'.normalize('NFKD')); // en-space
     46   assertEquals('Google\u0020Maps'.normalize('NFKD'), // normal space
     47     'Google\u2003Maps'.normalize('NFKD')); // em-space
     48   assertEquals('Google\u0020Maps'.normalize('NFKD'), // normal space
     49     'Google\u3000Maps'.normalize('NFKC')); // ideographic space
     50 
     51   // Latin small ligature "fi"
     52   assertEquals('fi'.normalize('NFKD'), '\ufb01'.normalize('NFKD'));
     53 
     54   // , Latin small L with middle dot, used in Catalan and often represented
     55   // as decomposed for non-Unicode environments ( l + )
     56   assertEquals('l\u00b7'.normalize('NFKD'), '\u0140'.normalize('NFKD'));
     57 
     58   // Legacy text, Japanese narrow Kana (MS-DOS & Win 3.x time)
     59   assertEquals('\u30d1\u30bd\u30b3\u30f3'.normalize('NFKD'), //   :  wide
     60     '\uff8a\uff9f\uff7f\uff7a\uff9d'.normalize('NFKD')); //   :  narrow
     61   // Also for Japanese, Latin fullwidth forms vs. ASCII
     62   assertEquals('ABCD'.normalize('NFKD'),
     63     '\uff21\uff22\uff23\uff24'.normalize('NFKD')); // , fullwidth
     64 }();
     65 
     66 
     67 var testEdgeCases = function() {
     68   // Make sure we throw RangeError, as the standard requires.
     69   assertThrows('"".normalize(1234)', RangeError);
     70   assertThrows('"".normalize("BAD")', RangeError);
     71 
     72   // The standard does not say what kind of exceptions we should throw, so we
     73   // will not be specific. But we still test that we throw errors.
     74   assertThrows('s.normalize()'); // s is not defined
     75   assertThrows('var s = null; s.normalize()');
     76   assertThrows('var s = undefined; s.normalize()');
     77   assertThrows('var s = 1234; s.normalize()'); // no normalize for non-strings
     78 }();
     79 
     80 
     81 // Several kinds of mappings. No need to be comprehensive, we don't test
     82 // the ICU functionality, we only test C - JavaScript 'glue'
     83 var testData = [
     84   // org, default, NFC, NFD, NKFC, NKFD
     85   ['\u00c7', //  : Combining sequence, Latin 1
     86     '\u00c7', '\u0043\u0327',
     87     '\u00c7', '\u0043\u0327'],
     88   ['\u0218', //  : Combining sequence, non-Latin 1
     89     '\u0218', '\u0053\u0326',
     90     '\u0218', '\u0053\u0326'],
     91   ['\uac00', //  : Hangul
     92     '\uac00', '\u1100\u1161',
     93     '\uac00', '\u1100\u1161'],
     94   ['\uff76', //  : Narrow Kana
     95     '\uff76', '\uff76',
     96     '\u30ab', '\u30ab'],
     97   ['\u00bc', //  : Fractions
     98     '\u00bc', '\u00bc',
     99     '\u0031\u2044\u0034', '\u0031\u2044\u0034'],
    100   ['\u01c6', //   : Latin ligature
    101     '\u01c6', '\u01c6',
    102     '\u0064\u017e', '\u0064\u007a\u030c'],
    103   ['s\u0307\u0323', // s + dot above + dot below, ordering of combining marks
    104     '\u1e69', 's\u0323\u0307',
    105     '\u1e69', 's\u0323\u0307'],
    106   ['\u3300', //  : Squared characters
    107     '\u3300', '\u3300',
    108     '\u30a2\u30d1\u30fc\u30c8', // 
    109     '\u30a2\u30cf\u309a\u30fc\u30c8'], // 
    110   ['\ufe37', //  : Vertical forms
    111     '\ufe37', '\ufe37',
    112     '{' , '{'],
    113   ['\u2079', //  : superscript 9
    114     '\u2079', '\u2079',
    115     '9', '9'],
    116   ['\ufee5\ufee6\ufee7\ufee8', // Arabic forms
    117     '\ufee5\ufee6\ufee7\ufee8', '\ufee5\ufee6\ufee7\ufee8',
    118     '\u0646\u0646\u0646\u0646', '\u0646\u0646\u0646\u0646'],
    119   ['\u2460', //  : Circled
    120     '\u2460', '\u2460',
    121     '1', '1'],
    122   ['\u210c', //  : Font variants
    123     '\u210c', '\u210c',
    124     'H', 'H'],
    125   ['\u2126', //  : Singleton, OHM sign vs. Greek capital letter OMEGA
    126     '\u03a9', '\u03a9',
    127     '\u03a9', '\u03a9'],
    128   ['\ufdfb', // Long ligature, ARABIC LIGATURE JALLAJALALOUHOU
    129     '\ufdfb', '\ufdfb',
    130     '\u062C\u0644\u0020\u062C\u0644\u0627\u0644\u0647',
    131     '\u062C\u0644\u0020\u062C\u0644\u0627\u0644\u0647']
    132 ];
    133 
    134 var testArray = function() {
    135   var kNFC = 1, kNFD = 2, kNFKC = 3, kNFKD = 4;
    136   for (var i = 0; i < testData.length; ++i) {
    137     // the original, NFC and NFD should normalize to the same thing
    138     for (var column = 0; column < 3; ++column) {
    139       var str = testData[i][column];
    140       assertEquals(str.normalize(), testData[i][kNFC]); // defaults to NFC
    141       assertEquals(str.normalize('NFC'), testData[i][kNFC]);
    142       assertEquals(str.normalize('NFD'), testData[i][kNFD]);
    143       assertEquals(str.normalize('NFKC'), testData[i][kNFKC]);
    144       assertEquals(str.normalize('NFKD'), testData[i][kNFKD]);
    145     }
    146   }
    147 }();
    148