1 /* 2 * Copyright (c) 2013 Yandex LLC. All rights reserved. 3 * 4 * Redistribution and use in source and binary forms, with or without 5 * modification, are permitted provided that the following conditions are 6 * met: 7 * 8 * * Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * * Redistributions in binary form must reproduce the above 11 * copyright notice, this list of conditions and the following disclaimer 12 * in the documentation and/or other materials provided with the 13 * distribution. 14 * * Neither the name of Yandex LLC nor the names of its 15 * contributors may be used to endorse or promote products derived from 16 * this software without specific prior written permission. 17 * 18 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 19 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 20 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 21 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 22 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 23 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 24 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 25 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 26 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 27 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 28 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 29 */ 30 31 #include "config.h" 32 #include "platform/text/UnicodeUtilities.h" 33 34 #include "wtf/Vector.h" 35 #include "wtf/text/WTFString.h" 36 #include "wtf/unicode/CharacterNames.h" 37 #include <gtest/gtest.h> 38 #include <unicode/uchar.h> 39 40 using namespace WebCore; 41 42 namespace { 43 44 static const UChar32 kMaxLatinCharCount = 256; 45 46 static bool isTestFirstAndLastCharsInCategoryFailed = false; 47 UBool U_CALLCONV testFirstAndLastCharsInCategory(const void *context, UChar32 start, UChar32 limit, UCharCategory type) 48 { 49 if (start >= kMaxLatinCharCount 50 && U_MASK(type) & (U_GC_S_MASK | U_GC_P_MASK | U_GC_Z_MASK | U_GC_CF_MASK) 51 && (!isSeparator(start) || !isSeparator(limit - 1))) { 52 isTestFirstAndLastCharsInCategoryFailed = true; 53 54 // Break enumeration process 55 return 0; 56 } 57 58 return 1; 59 } 60 61 TEST(WebCoreUnicodeUnit, Separators) 62 { 63 static const bool latinSeparatorTable[kMaxLatinCharCount] = { 64 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 65 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 66 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // space ! " # $ % & ' ( ) * + , - . / 67 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, // : ; < = > ? 68 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // @ 69 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, // [ \ ] ^ _ 70 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // ` 71 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, // { | } ~ 72 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 73 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 74 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 75 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 76 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 77 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 78 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 79 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0 80 }; 81 82 for (UChar32 character = 0; character < kMaxLatinCharCount; ++character) { 83 EXPECT_EQ(isSeparator(character), latinSeparatorTable[character]); 84 } 85 86 isTestFirstAndLastCharsInCategoryFailed = false; 87 u_enumCharTypes(&testFirstAndLastCharsInCategory, 0); 88 EXPECT_FALSE(isTestFirstAndLastCharsInCategoryFailed); 89 } 90 91 TEST(WebCoreUnicodeUnit, KanaLetters) 92 { 93 // Non Kana symbols 94 for (UChar character = 0; character < 0x3041; ++character) 95 EXPECT_FALSE(isKanaLetter(character)); 96 97 // Hiragana letters. 98 for (UChar character = 0x3041; character <= 0x3096; ++character) 99 EXPECT_TRUE(isKanaLetter(character)); 100 101 // Katakana letters. 102 for (UChar character = 0x30A1; character <= 0x30FA; ++character) 103 EXPECT_TRUE(isKanaLetter(character)); 104 } 105 106 TEST(WebCoreUnicodeUnit, ContainsKanaLetters) 107 { 108 // Non Kana symbols 109 String nonKanaString; 110 for (UChar character = 0; character < 0x3041; ++character) 111 nonKanaString.append(character); 112 EXPECT_FALSE(containsKanaLetters(nonKanaString)); 113 114 // Hiragana letters. 115 for (UChar character = 0x3041; character <= 0x3096; ++character) { 116 String str(nonKanaString); 117 str.append(character); 118 EXPECT_TRUE(containsKanaLetters(str)); 119 } 120 121 // Katakana letters. 122 for (UChar character = 0x30A1; character <= 0x30FA; ++character) { 123 String str(nonKanaString); 124 str.append(character); 125 EXPECT_TRUE(containsKanaLetters(str)); 126 } 127 } 128 129 TEST(WebCoreUnicodeUnit, FoldQuoteMarkOrSoftHyphenTest) 130 { 131 const UChar charactersToFold[] = { 132 hebrewPunctuationGershayim, leftDoubleQuotationMark, rightDoubleQuotationMark, 133 hebrewPunctuationGeresh, leftSingleQuotationMark, rightSingleQuotationMark, 134 softHyphen 135 }; 136 137 String stringToFold(charactersToFold, WTF_ARRAY_LENGTH(charactersToFold)); 138 Vector<UChar> buffer; 139 stringToFold.appendTo(buffer); 140 141 foldQuoteMarksAndSoftHyphens(stringToFold); 142 143 const String foldedString("\"\"\"\'\'\'\0", WTF_ARRAY_LENGTH(charactersToFold)); 144 EXPECT_EQ(stringToFold, foldedString); 145 146 foldQuoteMarksAndSoftHyphens(buffer.data(), buffer.size()); 147 EXPECT_EQ(String(buffer), foldedString); 148 } 149 150 TEST(WebCoreUnicodeUnit, OnlyKanaLettersEqualityTest) 151 { 152 const UChar nonKanaString1[] = { 'a', 'b', 'c', 'd' }; 153 const UChar nonKanaString2[] = { 'e', 'f', 'g' }; 154 155 // Check that non-Kana letters will be skipped. 156 EXPECT_TRUE(checkOnlyKanaLettersInStrings( 157 nonKanaString1, WTF_ARRAY_LENGTH(nonKanaString1), 158 nonKanaString2, WTF_ARRAY_LENGTH(nonKanaString2))); 159 160 const UChar kanaString[] = { 'e', 'f', 'g', 0x3041 }; 161 EXPECT_FALSE(checkOnlyKanaLettersInStrings( 162 kanaString, WTF_ARRAY_LENGTH(kanaString), 163 nonKanaString2, WTF_ARRAY_LENGTH(nonKanaString2))); 164 165 // Compare with self. 166 EXPECT_TRUE(checkOnlyKanaLettersInStrings( 167 kanaString, WTF_ARRAY_LENGTH(kanaString), 168 kanaString, WTF_ARRAY_LENGTH(kanaString))); 169 170 UChar voicedKanaString1[] = { 0x3042, 0x3099 }; 171 UChar voicedKanaString2[] = { 0x3042, 0x309A }; 172 173 // Comparing strings with different sound marks should fail. 174 EXPECT_FALSE(checkOnlyKanaLettersInStrings( 175 voicedKanaString1, WTF_ARRAY_LENGTH(voicedKanaString1), 176 voicedKanaString2, WTF_ARRAY_LENGTH(voicedKanaString2))); 177 178 // Now strings will be the same. 179 voicedKanaString2[1] = 0x3099; 180 EXPECT_TRUE(checkOnlyKanaLettersInStrings( 181 voicedKanaString1, WTF_ARRAY_LENGTH(voicedKanaString1), 182 voicedKanaString2, WTF_ARRAY_LENGTH(voicedKanaString2))); 183 184 voicedKanaString2[0] = 0x3043; 185 EXPECT_FALSE(checkOnlyKanaLettersInStrings( 186 voicedKanaString1, WTF_ARRAY_LENGTH(voicedKanaString1), 187 voicedKanaString2, WTF_ARRAY_LENGTH(voicedKanaString2))); 188 } 189 190 TEST(WebCoreUnicodeUnit, StringsWithKanaLettersTest) 191 { 192 const UChar nonKanaString1[] = { 'a', 'b', 'c' }; 193 const UChar nonKanaString2[] = { 'a', 'b', 'c' }; 194 195 // Check that non-Kana letters will be compared. 196 EXPECT_TRUE(checkKanaStringsEqual( 197 nonKanaString1, WTF_ARRAY_LENGTH(nonKanaString1), 198 nonKanaString2, WTF_ARRAY_LENGTH(nonKanaString2))); 199 200 const UChar kanaString[] = { 'a', 'b', 'c', 0x3041 }; 201 EXPECT_FALSE(checkKanaStringsEqual( 202 kanaString, WTF_ARRAY_LENGTH(kanaString), 203 nonKanaString2, WTF_ARRAY_LENGTH(nonKanaString2))); 204 205 // Compare with self. 206 EXPECT_TRUE(checkKanaStringsEqual( 207 kanaString, WTF_ARRAY_LENGTH(kanaString), 208 kanaString, WTF_ARRAY_LENGTH(kanaString))); 209 210 const UChar kanaString2[] = { 'x', 'y', 'z', 0x3041 }; 211 // Comparing strings with different non-Kana letters should fail. 212 EXPECT_FALSE(checkKanaStringsEqual( 213 kanaString, WTF_ARRAY_LENGTH(kanaString), 214 kanaString2, WTF_ARRAY_LENGTH(kanaString2))); 215 216 const UChar kanaString3[] = { 'a', 'b', 'c', 0x3042, 0x3099, 'm', 'n', 'o' }; 217 // Check that non-Kana letters after Kana letters will be compared. 218 EXPECT_TRUE(checkKanaStringsEqual( 219 kanaString3, WTF_ARRAY_LENGTH(kanaString3), 220 kanaString3, WTF_ARRAY_LENGTH(kanaString3))); 221 222 const UChar kanaString4[] = { 'a', 'b', 'c', 0x3042, 0x3099, 'm', 'n', 'o', 'p' }; 223 // And now comparing should fail. 224 EXPECT_FALSE(checkKanaStringsEqual( 225 kanaString3, WTF_ARRAY_LENGTH(kanaString3), 226 kanaString4, WTF_ARRAY_LENGTH(kanaString4))); 227 228 UChar voicedKanaString1[] = { 0x3042, 0x3099 }; 229 UChar voicedKanaString2[] = { 0x3042, 0x309A }; 230 231 // Comparing strings with different sound marks should fail. 232 EXPECT_FALSE(checkKanaStringsEqual( 233 voicedKanaString1, WTF_ARRAY_LENGTH(voicedKanaString1), 234 voicedKanaString2, WTF_ARRAY_LENGTH(voicedKanaString2))); 235 236 // Now strings will be the same. 237 voicedKanaString2[1] = 0x3099; 238 EXPECT_TRUE(checkKanaStringsEqual( 239 voicedKanaString1, WTF_ARRAY_LENGTH(voicedKanaString1), 240 voicedKanaString2, WTF_ARRAY_LENGTH(voicedKanaString2))); 241 242 voicedKanaString2[0] = 0x3043; 243 EXPECT_FALSE(checkKanaStringsEqual( 244 voicedKanaString1, WTF_ARRAY_LENGTH(voicedKanaString1), 245 voicedKanaString2, WTF_ARRAY_LENGTH(voicedKanaString2))); 246 } 247 248 } // namespace 249