Home | History | Annotate | Download | only in text
      1 /*
      2  * Copyright (c) 2013 Yandex LLC. All rights reserved.
      3  *
      4  * Redistribution and use in source and binary forms, with or without
      5  * modification, are permitted provided that the following conditions are
      6  * met:
      7  *
      8  *     * Redistributions of source code must retain the above copyright
      9  * notice, this list of conditions and the following disclaimer.
     10  *     * Redistributions in binary form must reproduce the above
     11  * copyright notice, this list of conditions and the following disclaimer
     12  * in the documentation and/or other materials provided with the
     13  * distribution.
     14  *     * Neither the name of Yandex LLC nor the names of its
     15  * contributors may be used to endorse or promote products derived from
     16  * this software without specific prior written permission.
     17  *
     18  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
     19  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
     20  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
     21  * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
     22  * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
     23  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
     24  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
     25  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
     26  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
     27  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
     28  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
     29  */
     30 
     31 #include "config.h"
     32 #include "platform/text/UnicodeUtilities.h"
     33 
     34 #include "wtf/Vector.h"
     35 #include "wtf/text/WTFString.h"
     36 #include "wtf/unicode/CharacterNames.h"
     37 #include <gtest/gtest.h>
     38 #include <unicode/uchar.h>
     39 
     40 using namespace WebCore;
     41 
     42 namespace {
     43 
     44 static const UChar32 kMaxLatinCharCount = 256;
     45 
     46 static bool isTestFirstAndLastCharsInCategoryFailed = false;
     47 UBool U_CALLCONV testFirstAndLastCharsInCategory(const void *context, UChar32 start, UChar32 limit, UCharCategory type)
     48 {
     49     if (start >= kMaxLatinCharCount
     50         && U_MASK(type) & (U_GC_S_MASK | U_GC_P_MASK | U_GC_Z_MASK | U_GC_CF_MASK)
     51         && (!isSeparator(start) || !isSeparator(limit - 1))) {
     52         isTestFirstAndLastCharsInCategoryFailed = true;
     53 
     54         // Break enumeration process
     55         return 0;
     56     }
     57 
     58     return 1;
     59 }
     60 
     61 TEST(WebCoreUnicodeUnit, Separators)
     62 {
     63     static const bool latinSeparatorTable[kMaxLatinCharCount] = {
     64         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
     65         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
     66         1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // space ! " # $ % & ' ( ) * + , - . /
     67         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, //                         : ; < = > ?
     68         1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, //   @
     69         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, //                         [ \ ] ^ _
     70         1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, //   `
     71         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, //                           { | } ~
     72         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
     73         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
     74         0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1,
     75         1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1,
     76         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
     77         0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0,
     78         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
     79         0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0
     80     };
     81 
     82     for (UChar32 character = 0; character < kMaxLatinCharCount; ++character) {
     83         EXPECT_EQ(isSeparator(character), latinSeparatorTable[character]);
     84     }
     85 
     86     isTestFirstAndLastCharsInCategoryFailed = false;
     87     u_enumCharTypes(&testFirstAndLastCharsInCategory, 0);
     88     EXPECT_FALSE(isTestFirstAndLastCharsInCategoryFailed);
     89 }
     90 
     91 TEST(WebCoreUnicodeUnit, KanaLetters)
     92 {
     93     // Non Kana symbols
     94     for (UChar character = 0; character < 0x3041; ++character)
     95         EXPECT_FALSE(isKanaLetter(character));
     96 
     97     // Hiragana letters.
     98     for (UChar character = 0x3041; character <= 0x3096; ++character)
     99         EXPECT_TRUE(isKanaLetter(character));
    100 
    101     // Katakana letters.
    102     for (UChar character = 0x30A1; character <= 0x30FA; ++character)
    103         EXPECT_TRUE(isKanaLetter(character));
    104 }
    105 
    106 TEST(WebCoreUnicodeUnit, ContainsKanaLetters)
    107 {
    108     // Non Kana symbols
    109     String nonKanaString;
    110     for (UChar character = 0; character < 0x3041; ++character)
    111         nonKanaString.append(character);
    112     EXPECT_FALSE(containsKanaLetters(nonKanaString));
    113 
    114     // Hiragana letters.
    115     for (UChar character = 0x3041; character <= 0x3096; ++character) {
    116         String str(nonKanaString);
    117         str.append(character);
    118         EXPECT_TRUE(containsKanaLetters(str));
    119     }
    120 
    121     // Katakana letters.
    122     for (UChar character = 0x30A1; character <= 0x30FA; ++character) {
    123         String str(nonKanaString);
    124         str.append(character);
    125         EXPECT_TRUE(containsKanaLetters(str));
    126     }
    127 }
    128 
    129 TEST(WebCoreUnicodeUnit, FoldQuoteMarkOrSoftHyphenTest)
    130 {
    131     const UChar charactersToFold[] = {
    132         hebrewPunctuationGershayim, leftDoubleQuotationMark, rightDoubleQuotationMark,
    133         hebrewPunctuationGeresh, leftSingleQuotationMark, rightSingleQuotationMark,
    134         softHyphen
    135     };
    136 
    137     String stringToFold(charactersToFold, WTF_ARRAY_LENGTH(charactersToFold));
    138     Vector<UChar> buffer;
    139     stringToFold.appendTo(buffer);
    140 
    141     foldQuoteMarksAndSoftHyphens(stringToFold);
    142 
    143     const String foldedString("\"\"\"\'\'\'\0", WTF_ARRAY_LENGTH(charactersToFold));
    144     EXPECT_EQ(stringToFold, foldedString);
    145 
    146     foldQuoteMarksAndSoftHyphens(buffer.data(), buffer.size());
    147     EXPECT_EQ(String(buffer), foldedString);
    148 }
    149 
    150 TEST(WebCoreUnicodeUnit, OnlyKanaLettersEqualityTest)
    151 {
    152     const UChar nonKanaString1[] = { 'a', 'b', 'c', 'd' };
    153     const UChar nonKanaString2[] = { 'e', 'f', 'g' };
    154 
    155     // Check that non-Kana letters will be skipped.
    156     EXPECT_TRUE(checkOnlyKanaLettersInStrings(
    157         nonKanaString1, WTF_ARRAY_LENGTH(nonKanaString1),
    158         nonKanaString2, WTF_ARRAY_LENGTH(nonKanaString2)));
    159 
    160     const UChar kanaString[] = { 'e', 'f', 'g', 0x3041 };
    161     EXPECT_FALSE(checkOnlyKanaLettersInStrings(
    162         kanaString, WTF_ARRAY_LENGTH(kanaString),
    163         nonKanaString2, WTF_ARRAY_LENGTH(nonKanaString2)));
    164 
    165     // Compare with self.
    166     EXPECT_TRUE(checkOnlyKanaLettersInStrings(
    167         kanaString, WTF_ARRAY_LENGTH(kanaString),
    168         kanaString, WTF_ARRAY_LENGTH(kanaString)));
    169 
    170     UChar voicedKanaString1[] = { 0x3042, 0x3099 };
    171     UChar voicedKanaString2[] = { 0x3042, 0x309A };
    172 
    173     // Comparing strings with different sound marks should fail.
    174     EXPECT_FALSE(checkOnlyKanaLettersInStrings(
    175         voicedKanaString1, WTF_ARRAY_LENGTH(voicedKanaString1),
    176         voicedKanaString2, WTF_ARRAY_LENGTH(voicedKanaString2)));
    177 
    178     // Now strings will be the same.
    179     voicedKanaString2[1] = 0x3099;
    180     EXPECT_TRUE(checkOnlyKanaLettersInStrings(
    181         voicedKanaString1, WTF_ARRAY_LENGTH(voicedKanaString1),
    182         voicedKanaString2, WTF_ARRAY_LENGTH(voicedKanaString2)));
    183 
    184     voicedKanaString2[0] = 0x3043;
    185     EXPECT_FALSE(checkOnlyKanaLettersInStrings(
    186         voicedKanaString1, WTF_ARRAY_LENGTH(voicedKanaString1),
    187         voicedKanaString2, WTF_ARRAY_LENGTH(voicedKanaString2)));
    188 }
    189 
    190 TEST(WebCoreUnicodeUnit, StringsWithKanaLettersTest)
    191 {
    192     const UChar nonKanaString1[] = { 'a', 'b', 'c' };
    193     const UChar nonKanaString2[] = { 'a', 'b', 'c' };
    194 
    195     // Check that non-Kana letters will be compared.
    196     EXPECT_TRUE(checkKanaStringsEqual(
    197         nonKanaString1, WTF_ARRAY_LENGTH(nonKanaString1),
    198         nonKanaString2, WTF_ARRAY_LENGTH(nonKanaString2)));
    199 
    200     const UChar kanaString[] = { 'a', 'b', 'c', 0x3041 };
    201     EXPECT_FALSE(checkKanaStringsEqual(
    202         kanaString, WTF_ARRAY_LENGTH(kanaString),
    203         nonKanaString2, WTF_ARRAY_LENGTH(nonKanaString2)));
    204 
    205     // Compare with self.
    206     EXPECT_TRUE(checkKanaStringsEqual(
    207         kanaString, WTF_ARRAY_LENGTH(kanaString),
    208         kanaString, WTF_ARRAY_LENGTH(kanaString)));
    209 
    210     const UChar kanaString2[] = { 'x', 'y', 'z', 0x3041 };
    211     // Comparing strings with different non-Kana letters should fail.
    212     EXPECT_FALSE(checkKanaStringsEqual(
    213         kanaString, WTF_ARRAY_LENGTH(kanaString),
    214         kanaString2, WTF_ARRAY_LENGTH(kanaString2)));
    215 
    216     const UChar kanaString3[] = { 'a', 'b', 'c', 0x3042, 0x3099, 'm', 'n', 'o' };
    217     // Check that non-Kana letters after Kana letters will be compared.
    218     EXPECT_TRUE(checkKanaStringsEqual(
    219         kanaString3, WTF_ARRAY_LENGTH(kanaString3),
    220         kanaString3, WTF_ARRAY_LENGTH(kanaString3)));
    221 
    222     const UChar kanaString4[] = { 'a', 'b', 'c', 0x3042, 0x3099, 'm', 'n', 'o', 'p' };
    223     // And now comparing should fail.
    224     EXPECT_FALSE(checkKanaStringsEqual(
    225         kanaString3, WTF_ARRAY_LENGTH(kanaString3),
    226         kanaString4, WTF_ARRAY_LENGTH(kanaString4)));
    227 
    228     UChar voicedKanaString1[] = { 0x3042, 0x3099 };
    229     UChar voicedKanaString2[] = { 0x3042, 0x309A };
    230 
    231     // Comparing strings with different sound marks should fail.
    232     EXPECT_FALSE(checkKanaStringsEqual(
    233         voicedKanaString1, WTF_ARRAY_LENGTH(voicedKanaString1),
    234         voicedKanaString2, WTF_ARRAY_LENGTH(voicedKanaString2)));
    235 
    236     // Now strings will be the same.
    237     voicedKanaString2[1] = 0x3099;
    238     EXPECT_TRUE(checkKanaStringsEqual(
    239         voicedKanaString1, WTF_ARRAY_LENGTH(voicedKanaString1),
    240         voicedKanaString2, WTF_ARRAY_LENGTH(voicedKanaString2)));
    241 
    242     voicedKanaString2[0] = 0x3043;
    243     EXPECT_FALSE(checkKanaStringsEqual(
    244         voicedKanaString1, WTF_ARRAY_LENGTH(voicedKanaString1),
    245         voicedKanaString2, WTF_ARRAY_LENGTH(voicedKanaString2)));
    246 }
    247 
    248 } // namespace
    249