1 // Copyright 2009 the V8 project authors. All rights reserved. 2 // Redistribution and use in source and binary forms, with or without 3 // modification, are permitted provided that the following conditions are 4 // met: 5 // 6 // * Redistributions of source code must retain the above copyright 7 // notice, this list of conditions and the following disclaimer. 8 // * Redistributions in binary form must reproduce the above 9 // copyright notice, this list of conditions and the following 10 // disclaimer in the documentation and/or other materials provided 11 // with the distribution. 12 // * Neither the name of Google Inc. nor the names of its 13 // contributors may be used to endorse or promote products derived 14 // from this software without specific prior written permission. 15 // 16 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 17 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 18 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 19 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 20 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 21 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 22 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 23 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 24 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 25 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 26 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 27 28 // Test Unicode character ranges in regexps. 29 30 31 // Cyrillic. 32 var cyrillic = { 33 FIRST: "\u0410", // A 34 first: "\u0430", // a 35 LAST: "\u042f", // YA 36 last: "\u044f", // ya 37 MIDDLE: "\u0427", // CHE 38 middle: "\u0447", // che 39 // Actually no characters are between the cases in Cyrillic. 40 BetweenCases: false}; 41 42 var SIGMA = "\u03a3"; 43 var sigma = "\u03c3"; 44 var alternative_sigma = "\u03c2"; 45 46 // Greek. 47 var greek = { 48 FIRST: "\u0391", // ALPHA 49 first: "\u03b1", // alpha 50 LAST: "\u03a9", // OMEGA 51 last: "\u03c9", // omega 52 MIDDLE: SIGMA, // SIGMA 53 middle: sigma, // sigma 54 // Epsilon acute is between ALPHA-OMEGA and alpha-omega, ie it 55 // is between OMEGA and alpha. 56 BetweenCases: "\u03ad"}; 57 58 59 function Range(from, to, flags) { 60 return new RegExp("[" + from + "-" + to + "]", flags); 61 } 62 63 // Test Cyrillic and Greek separately. 64 for (var lang = 0; lang < 2; lang++) { 65 var chars = (lang == 0) ? cyrillic : greek; 66 67 for (var i = 0; i < 2; i++) { 68 var lc = (i == 0); // Lower case. 69 var first = lc ? chars.first : chars.FIRST; 70 var middle = lc ? chars.middle : chars.MIDDLE; 71 var last = lc ? chars.last : chars.LAST; 72 var first_other_case = lc ? chars.FIRST : chars.first; 73 var middle_other_case = lc ? chars.MIDDLE : chars.middle; 74 var last_other_case = lc ? chars.LAST : chars.last; 75 76 assertTrue(Range(first, last).test(first), 1); 77 assertTrue(Range(first, last).test(middle), 2); 78 assertTrue(Range(first, last).test(last), 3); 79 80 assertFalse(Range(first, last).test(first_other_case), 4); 81 assertFalse(Range(first, last).test(middle_other_case), 5); 82 assertFalse(Range(first, last).test(last_other_case), 6); 83 84 assertTrue(Range(first, last, "i").test(first), 7); 85 assertTrue(Range(first, last, "i").test(middle), 8); 86 assertTrue(Range(first, last, "i").test(last), 9); 87 88 assertTrue(Range(first, last, "i").test(first_other_case), 10); 89 assertTrue(Range(first, last, "i").test(middle_other_case), 11); 90 assertTrue(Range(first, last, "i").test(last_other_case), 12); 91 92 if (chars.BetweenCases) { 93 assertFalse(Range(first, last).test(chars.BetweenCases), 13); 94 assertFalse(Range(first, last, "i").test(chars.BetweenCases), 14); 95 } 96 } 97 if (chars.BetweenCases) { 98 assertTrue(Range(chars.FIRST, chars.last).test(chars.BetweenCases), 15); 99 assertTrue(Range(chars.FIRST, chars.last, "i").test(chars.BetweenCases), 16); 100 } 101 } 102 103 // Test range that covers both greek and cyrillic characters. 104 for (key in greek) { 105 assertTrue(Range(greek.FIRST, cyrillic.last).test(greek[key]), 17 + key); 106 if (cyrillic[key]) { 107 assertTrue(Range(greek.FIRST, cyrillic.last).test(cyrillic[key]), 18 + key); 108 } 109 } 110 111 for (var i = 0; i < 2; i++) { 112 var ignore_case = (i == 0); 113 var flag = ignore_case ? "i" : ""; 114 assertTrue(Range(greek.first, cyrillic.LAST, flag).test(greek.first), 19); 115 assertTrue(Range(greek.first, cyrillic.LAST, flag).test(greek.middle), 20); 116 assertTrue(Range(greek.first, cyrillic.LAST, flag).test(greek.last), 21); 117 118 assertTrue(Range(greek.first, cyrillic.LAST, flag).test(cyrillic.FIRST), 22); 119 assertTrue(Range(greek.first, cyrillic.LAST, flag).test(cyrillic.MIDDLE), 23); 120 assertTrue(Range(greek.first, cyrillic.LAST, flag).test(cyrillic.LAST), 24); 121 122 // A range that covers the lower case greek letters and the upper case cyrillic 123 // letters. 124 assertEquals(ignore_case, Range(greek.first, cyrillic.LAST, flag).test(greek.FIRST), 25); 125 assertEquals(ignore_case, Range(greek.first, cyrillic.LAST, flag).test(greek.MIDDLE), 26); 126 assertEquals(ignore_case, Range(greek.first, cyrillic.LAST, flag).test(greek.LAST), 27); 127 128 assertEquals(ignore_case, Range(greek.first, cyrillic.LAST, flag).test(cyrillic.first), 28); 129 assertEquals(ignore_case, Range(greek.first, cyrillic.LAST, flag).test(cyrillic.middle), 29); 130 assertEquals(ignore_case, Range(greek.first, cyrillic.LAST, flag).test(cyrillic.last), 30); 131 } 132 133 134 // Sigma is special because there are two lower case versions of the same upper 135 // case character. JS requires that case independece means that you should 136 // convert everything to upper case, so the two sigma variants are equal to each 137 // other in a case independt comparison. 138 for (var i = 0; i < 2; i++) { 139 var simple = (i != 0); 140 var name = simple ? "" : "[]"; 141 var regex = simple ? SIGMA : "[" + SIGMA + "]"; 142 143 assertFalse(new RegExp(regex).test(sigma), 31 + name); 144 assertFalse(new RegExp(regex).test(alternative_sigma), 32 + name); 145 assertTrue(new RegExp(regex).test(SIGMA), 33 + name); 146 147 assertTrue(new RegExp(regex, "i").test(sigma), 34 + name); 148 // JSC and Tracemonkey fail this one. 149 assertTrue(new RegExp(regex, "i").test(alternative_sigma), 35 + name); 150 assertTrue(new RegExp(regex, "i").test(SIGMA), 36 + name); 151 152 regex = simple ? sigma : "[" + sigma + "]"; 153 154 assertTrue(new RegExp(regex).test(sigma), 41 + name); 155 assertFalse(new RegExp(regex).test(alternative_sigma), 42 + name); 156 assertFalse(new RegExp(regex).test(SIGMA), 43 + name); 157 158 assertTrue(new RegExp(regex, "i").test(sigma), 44 + name); 159 // JSC and Tracemonkey fail this one. 160 assertTrue(new RegExp(regex, "i").test(alternative_sigma), 45 + name); 161 assertTrue(new RegExp(regex, "i").test(SIGMA), 46 + name); 162 163 regex = simple ? alternative_sigma : "[" + alternative_sigma + "]"; 164 165 assertFalse(new RegExp(regex).test(sigma), 51 + name); 166 assertTrue(new RegExp(regex).test(alternative_sigma), 52 + name); 167 assertFalse(new RegExp(regex).test(SIGMA), 53 + name); 168 169 // JSC and Tracemonkey fail this one. 170 assertTrue(new RegExp(regex, "i").test(sigma), 54 + name); 171 assertTrue(new RegExp(regex, "i").test(alternative_sigma), 55 + name); 172 // JSC and Tracemonkey fail this one. 173 assertTrue(new RegExp(regex, "i").test(SIGMA), 56 + name); 174 } 175 176 177 for (var add_non_ascii_character_to_subject = 0; 178 add_non_ascii_character_to_subject < 2; 179 add_non_ascii_character_to_subject++) { 180 var suffix = add_non_ascii_character_to_subject ? "\ufffe" : ""; 181 // A range that covers both ASCII and non-ASCII. 182 for (var i = 0; i < 2; i++) { 183 var full = (i != 0); 184 var mixed = full ? "[a-\uffff]" : "[a-" + cyrillic.LAST + "]"; 185 var f = full ? "f" : "c"; 186 for (var j = 0; j < 2; j++) { 187 var ignore_case = (j == 0); 188 var flag = ignore_case ? "i" : ""; 189 var re = new RegExp(mixed, flag); 190 var expected = 191 ignore_case || (full && !!add_non_ascii_character_to_subject); 192 assertEquals(expected, re.test("A" + suffix), 58 + flag + f); 193 assertTrue(re.test("a" + suffix), 59 + flag + f); 194 assertTrue(re.test("~" + suffix), 60 + flag + f); 195 assertTrue(re.test(cyrillic.MIDDLE), 61 + flag + f); 196 assertEquals(ignore_case || full, re.test(cyrillic.middle), 62 + flag + f); 197 } 198 } 199 } 200