Home | History | Annotate | Download | only in mjsunit
      1 // Copyright 2009 the V8 project authors. All rights reserved.
      2 // Redistribution and use in source and binary forms, with or without
      3 // modification, are permitted provided that the following conditions are
      4 // met:
      5 //
      6 //     * Redistributions of source code must retain the above copyright
      7 //       notice, this list of conditions and the following disclaimer.
      8 //     * Redistributions in binary form must reproduce the above
      9 //       copyright notice, this list of conditions and the following
     10 //       disclaimer in the documentation and/or other materials provided
     11 //       with the distribution.
     12 //     * Neither the name of Google Inc. nor the names of its
     13 //       contributors may be used to endorse or promote products derived
     14 //       from this software without specific prior written permission.
     15 //
     16 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
     17 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
     18 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
     19 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
     20 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
     21 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
     22 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
     23 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
     24 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
     25 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
     26 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
     27 
     28 // Test Unicode character ranges in regexps.
     29 
     30 
     31 // Cyrillic.
     32 var cyrillic = {
     33   FIRST: "\u0410",   // A
     34   first: "\u0430",   // a
     35   LAST: "\u042f",    // YA
     36   last: "\u044f",    // ya
     37   MIDDLE: "\u0427",  // CHE
     38   middle: "\u0447",   // che
     39   // Actually no characters are between the cases in Cyrillic.
     40   BetweenCases: false};
     41 
     42 var SIGMA = "\u03a3";
     43 var sigma = "\u03c3";
     44 var alternative_sigma = "\u03c2";
     45 
     46 // Greek.
     47 var greek = {
     48   FIRST: "\u0391",     // ALPHA
     49   first: "\u03b1",     // alpha
     50   LAST: "\u03a9",      // OMEGA
     51   last: "\u03c9",      // omega
     52   MIDDLE: SIGMA,       // SIGMA
     53   middle: sigma,       // sigma
     54   // Epsilon acute is between ALPHA-OMEGA and alpha-omega, ie it
     55   // is between OMEGA and alpha.
     56   BetweenCases: "\u03ad"};
     57 
     58 
     59 function Range(from, to, flags) {
     60   return new RegExp("[" + from + "-" + to + "]", flags);
     61 }
     62 
     63 // Test Cyrillic and Greek separately.
     64 for (var lang = 0; lang < 2; lang++) {
     65   var chars = (lang == 0) ? cyrillic : greek;
     66 
     67   for (var i = 0; i < 2; i++) {
     68     var lc = (i == 0);  // Lower case.
     69     var first = lc ? chars.first : chars.FIRST;
     70     var middle = lc ? chars.middle : chars.MIDDLE;
     71     var last = lc ? chars.last : chars.LAST;
     72     var first_other_case = lc ? chars.FIRST : chars.first;
     73     var middle_other_case = lc ? chars.MIDDLE : chars.middle;
     74     var last_other_case = lc ? chars.LAST : chars.last;
     75 
     76     assertTrue(Range(first, last).test(first), 1);
     77     assertTrue(Range(first, last).test(middle), 2);
     78     assertTrue(Range(first, last).test(last), 3);
     79 
     80     assertFalse(Range(first, last).test(first_other_case), 4);
     81     assertFalse(Range(first, last).test(middle_other_case), 5);
     82     assertFalse(Range(first, last).test(last_other_case), 6);
     83 
     84     assertTrue(Range(first, last, "i").test(first), 7);
     85     assertTrue(Range(first, last, "i").test(middle), 8);
     86     assertTrue(Range(first, last, "i").test(last), 9);
     87 
     88     assertTrue(Range(first, last, "i").test(first_other_case), 10);
     89     assertTrue(Range(first, last, "i").test(middle_other_case), 11);
     90     assertTrue(Range(first, last, "i").test(last_other_case), 12);
     91 
     92     if (chars.BetweenCases) {
     93       assertFalse(Range(first, last).test(chars.BetweenCases), 13);
     94       assertFalse(Range(first, last, "i").test(chars.BetweenCases), 14);
     95     }
     96   }
     97   if (chars.BetweenCases) {
     98     assertTrue(Range(chars.FIRST, chars.last).test(chars.BetweenCases), 15);
     99     assertTrue(Range(chars.FIRST, chars.last, "i").test(chars.BetweenCases), 16);
    100   }
    101 }
    102 
    103 // Test range that covers both greek and cyrillic characters.
    104 for (key in greek) {
    105   assertTrue(Range(greek.FIRST, cyrillic.last).test(greek[key]), 17 + key);
    106   if (cyrillic[key]) {
    107     assertTrue(Range(greek.FIRST, cyrillic.last).test(cyrillic[key]), 18 + key);
    108   }
    109 }
    110 
    111 for (var i = 0; i < 2; i++) {
    112   var ignore_case = (i == 0);
    113   var flag = ignore_case ? "i" : "";
    114   assertTrue(Range(greek.first, cyrillic.LAST, flag).test(greek.first), 19);
    115   assertTrue(Range(greek.first, cyrillic.LAST, flag).test(greek.middle), 20);
    116   assertTrue(Range(greek.first, cyrillic.LAST, flag).test(greek.last), 21);
    117 
    118   assertTrue(Range(greek.first, cyrillic.LAST, flag).test(cyrillic.FIRST), 22);
    119   assertTrue(Range(greek.first, cyrillic.LAST, flag).test(cyrillic.MIDDLE), 23);
    120   assertTrue(Range(greek.first, cyrillic.LAST, flag).test(cyrillic.LAST), 24);
    121 
    122   // A range that covers the lower case greek letters and the upper case cyrillic
    123   // letters.
    124   assertEquals(ignore_case, Range(greek.first, cyrillic.LAST, flag).test(greek.FIRST), 25);
    125   assertEquals(ignore_case, Range(greek.first, cyrillic.LAST, flag).test(greek.MIDDLE), 26);
    126   assertEquals(ignore_case, Range(greek.first, cyrillic.LAST, flag).test(greek.LAST), 27);
    127 
    128   assertEquals(ignore_case, Range(greek.first, cyrillic.LAST, flag).test(cyrillic.first), 28);
    129   assertEquals(ignore_case, Range(greek.first, cyrillic.LAST, flag).test(cyrillic.middle), 29);
    130   assertEquals(ignore_case, Range(greek.first, cyrillic.LAST, flag).test(cyrillic.last), 30);
    131 }
    132 
    133 
    134 // Sigma is special because there are two lower case versions of the same upper
    135 // case character.  JS requires that case independece means that you should
    136 // convert everything to upper case, so the two sigma variants are equal to each
    137 // other in a case independt comparison.
    138 for (var i = 0; i < 2; i++) {
    139   var simple = (i != 0);
    140   var name = simple ? "" : "[]";
    141   var regex = simple ? SIGMA : "[" + SIGMA + "]";
    142 
    143   assertFalse(new RegExp(regex).test(sigma), 31 + name);
    144   assertFalse(new RegExp(regex).test(alternative_sigma), 32 + name);
    145   assertTrue(new RegExp(regex).test(SIGMA), 33 + name);
    146 
    147   assertTrue(new RegExp(regex, "i").test(sigma), 34 + name);
    148   // JSC and Tracemonkey fail this one.
    149   assertTrue(new RegExp(regex, "i").test(alternative_sigma), 35 + name);
    150   assertTrue(new RegExp(regex, "i").test(SIGMA), 36 + name);
    151 
    152   regex = simple ? sigma : "[" + sigma + "]";
    153 
    154   assertTrue(new RegExp(regex).test(sigma), 41 + name);
    155   assertFalse(new RegExp(regex).test(alternative_sigma), 42 + name);
    156   assertFalse(new RegExp(regex).test(SIGMA), 43 + name);
    157 
    158   assertTrue(new RegExp(regex, "i").test(sigma), 44 + name);
    159   // JSC and Tracemonkey fail this one.
    160   assertTrue(new RegExp(regex, "i").test(alternative_sigma), 45 + name);
    161   assertTrue(new RegExp(regex, "i").test(SIGMA), 46 + name);
    162 
    163   regex = simple ? alternative_sigma : "[" + alternative_sigma + "]";
    164 
    165   assertFalse(new RegExp(regex).test(sigma), 51 + name);
    166   assertTrue(new RegExp(regex).test(alternative_sigma), 52 + name);
    167   assertFalse(new RegExp(regex).test(SIGMA), 53 + name);
    168 
    169   // JSC and Tracemonkey fail this one.
    170   assertTrue(new RegExp(regex, "i").test(sigma), 54 + name);
    171   assertTrue(new RegExp(regex, "i").test(alternative_sigma), 55 + name);
    172   // JSC and Tracemonkey fail this one.
    173   assertTrue(new RegExp(regex, "i").test(SIGMA), 56 + name);
    174 }
    175 
    176 
    177 for (var add_non_ascii_character_to_subject = 0;
    178      add_non_ascii_character_to_subject < 2;
    179      add_non_ascii_character_to_subject++) {
    180   var suffix = add_non_ascii_character_to_subject ? "\ufffe" : "";
    181   // A range that covers both ASCII and non-ASCII.
    182   for (var i = 0; i < 2; i++) {
    183     var full = (i != 0);
    184     var mixed = full ? "[a-\uffff]" : "[a-" + cyrillic.LAST + "]";
    185     var f = full ? "f" : "c";
    186     for (var j = 0; j < 2; j++) {
    187       var ignore_case = (j == 0);
    188       var flag = ignore_case ? "i" : "";
    189       var re = new RegExp(mixed, flag);
    190       var expected =
    191           ignore_case || (full && !!add_non_ascii_character_to_subject);
    192       assertEquals(expected, re.test("A" + suffix), 58 + flag + f);
    193       assertTrue(re.test("a" + suffix), 59 + flag + f);
    194       assertTrue(re.test("~" + suffix), 60 + flag + f);
    195       assertTrue(re.test(cyrillic.MIDDLE), 61 + flag + f);
    196       assertEquals(ignore_case || full, re.test(cyrillic.middle), 62 + flag + f);
    197     }
    198   }
    199 }
    200