Home | History | Annotate | Download | only in i18n
      1 // Copyright (c) 2011 The Chromium Authors. All rights reserved.
      2 // Use of this source code is governed by a BSD-style license that can be
      3 // found in the LICENSE file.
      4 
      5 #include <string>
      6 
      7 #include "base/i18n/rtl.h"
      8 #include "base/i18n/string_search.h"
      9 #include "base/strings/string16.h"
     10 #include "base/strings/utf_string_conversions.h"
     11 #include "testing/gtest/include/gtest/gtest.h"
     12 #include "third_party/icu/source/i18n/unicode/usearch.h"
     13 
     14 namespace base {
     15 namespace i18n {
     16 
     17 // Note on setting default locale for testing: The current default locale on
     18 // the Mac trybot is en_US_POSIX, with which primary-level collation strength
     19 // string search is case-sensitive, when normally it should be
     20 // case-insensitive. In other locales (including en_US which English speakers
     21 // in the U.S. use), this search would be case-insensitive as expected.
     22 
     23 TEST(StringSearchTest, ASCII) {
     24   std::string default_locale(uloc_getDefault());
     25   bool locale_is_posix = (default_locale == "en_US_POSIX");
     26   if (locale_is_posix)
     27     SetICUDefaultLocale("en_US");
     28 
     29   size_t index = 0;
     30   size_t length = 0;
     31 
     32   EXPECT_TRUE(StringSearchIgnoringCaseAndAccents(
     33       ASCIIToUTF16("hello"), ASCIIToUTF16("hello world"), &index, &length));
     34   EXPECT_EQ(0U, index);
     35   EXPECT_EQ(5U, length);
     36 
     37   EXPECT_FALSE(StringSearchIgnoringCaseAndAccents(
     38       ASCIIToUTF16("h    e l l o"), ASCIIToUTF16("h   e l l o"),
     39       &index, &length));
     40 
     41   EXPECT_TRUE(StringSearchIgnoringCaseAndAccents(
     42       ASCIIToUTF16("aabaaa"), ASCIIToUTF16("aaabaabaaa"), &index, &length));
     43   EXPECT_EQ(4U, index);
     44   EXPECT_EQ(6U, length);
     45 
     46   EXPECT_FALSE(StringSearchIgnoringCaseAndAccents(
     47       ASCIIToUTF16("searching within empty string"), string16(),
     48       &index, &length));
     49 
     50   EXPECT_TRUE(StringSearchIgnoringCaseAndAccents(
     51       string16(), ASCIIToUTF16("searching for empty string"), &index, &length));
     52   EXPECT_EQ(0U, index);
     53   EXPECT_EQ(0U, length);
     54 
     55   EXPECT_TRUE(StringSearchIgnoringCaseAndAccents(
     56       ASCIIToUTF16("case insensitivity"), ASCIIToUTF16("CaSe InSeNsItIvItY"),
     57       &index, &length));
     58   EXPECT_EQ(0U, index);
     59   EXPECT_EQ(18U, length);
     60 
     61   if (locale_is_posix)
     62     SetICUDefaultLocale(default_locale.data());
     63 }
     64 
     65 TEST(StringSearchTest, UnicodeLocaleIndependent) {
     66   // Base characters
     67   const string16 e_base = WideToUTF16(L"e");
     68   const string16 E_base = WideToUTF16(L"E");
     69   const string16 a_base = WideToUTF16(L"a");
     70 
     71   // Composed characters
     72   const string16 e_with_acute_accent = WideToUTF16(L"\u00e9");
     73   const string16 E_with_acute_accent = WideToUTF16(L"\u00c9");
     74   const string16 e_with_grave_accent = WideToUTF16(L"\u00e8");
     75   const string16 E_with_grave_accent = WideToUTF16(L"\u00c8");
     76   const string16 a_with_acute_accent = WideToUTF16(L"\u00e1");
     77 
     78   // Decomposed characters
     79   const string16 e_with_acute_combining_mark = WideToUTF16(L"e\u0301");
     80   const string16 E_with_acute_combining_mark = WideToUTF16(L"E\u0301");
     81   const string16 e_with_grave_combining_mark = WideToUTF16(L"e\u0300");
     82   const string16 E_with_grave_combining_mark = WideToUTF16(L"E\u0300");
     83   const string16 a_with_acute_combining_mark = WideToUTF16(L"a\u0301");
     84 
     85   std::string default_locale(uloc_getDefault());
     86   bool locale_is_posix = (default_locale == "en_US_POSIX");
     87   if (locale_is_posix)
     88     SetICUDefaultLocale("en_US");
     89 
     90   size_t index = 0;
     91   size_t length = 0;
     92 
     93   EXPECT_TRUE(StringSearchIgnoringCaseAndAccents(
     94       e_base, e_with_acute_accent, &index, &length));
     95   EXPECT_EQ(0U, index);
     96   EXPECT_EQ(e_with_acute_accent.size(), length);
     97 
     98   EXPECT_TRUE(StringSearchIgnoringCaseAndAccents(
     99       e_with_acute_accent, e_base, &index, &length));
    100   EXPECT_EQ(0U, index);
    101   EXPECT_EQ(e_base.size(), length);
    102 
    103   EXPECT_TRUE(StringSearchIgnoringCaseAndAccents(
    104       e_base, e_with_acute_combining_mark, &index, &length));
    105   EXPECT_EQ(0U, index);
    106   EXPECT_EQ(e_with_acute_combining_mark.size(), length);
    107 
    108   EXPECT_TRUE(StringSearchIgnoringCaseAndAccents(
    109       e_with_acute_combining_mark, e_base, &index, &length));
    110   EXPECT_EQ(0U, index);
    111   EXPECT_EQ(e_base.size(), length);
    112 
    113   EXPECT_TRUE(StringSearchIgnoringCaseAndAccents(
    114       e_with_acute_combining_mark, e_with_acute_accent,
    115       &index, &length));
    116   EXPECT_EQ(0U, index);
    117   EXPECT_EQ(e_with_acute_accent.size(), length);
    118 
    119   EXPECT_TRUE(StringSearchIgnoringCaseAndAccents(
    120       e_with_acute_accent, e_with_acute_combining_mark,
    121       &index, &length));
    122   EXPECT_EQ(0U, index);
    123   EXPECT_EQ(e_with_acute_combining_mark.size(), length);
    124 
    125   EXPECT_TRUE(StringSearchIgnoringCaseAndAccents(
    126       e_with_acute_combining_mark, e_with_grave_combining_mark,
    127       &index, &length));
    128   EXPECT_EQ(0U, index);
    129   EXPECT_EQ(e_with_grave_combining_mark.size(), length);
    130 
    131   EXPECT_TRUE(StringSearchIgnoringCaseAndAccents(
    132       e_with_grave_combining_mark, e_with_acute_combining_mark,
    133       &index, &length));
    134   EXPECT_EQ(0U, index);
    135   EXPECT_EQ(e_with_acute_combining_mark.size(), length);
    136 
    137   EXPECT_TRUE(StringSearchIgnoringCaseAndAccents(
    138       e_with_acute_combining_mark, e_with_grave_accent, &index, &length));
    139   EXPECT_EQ(0U, index);
    140   EXPECT_EQ(e_with_grave_accent.size(), length);
    141 
    142   EXPECT_TRUE(StringSearchIgnoringCaseAndAccents(
    143       e_with_grave_accent, e_with_acute_combining_mark, &index, &length));
    144   EXPECT_EQ(0U, index);
    145   EXPECT_EQ(e_with_acute_combining_mark.size(), length);
    146 
    147   EXPECT_TRUE(StringSearchIgnoringCaseAndAccents(
    148       E_with_acute_accent, e_with_acute_accent, &index, &length));
    149   EXPECT_EQ(0U, index);
    150   EXPECT_EQ(e_with_acute_accent.size(), length);
    151 
    152   EXPECT_TRUE(StringSearchIgnoringCaseAndAccents(
    153       E_with_grave_accent, e_with_acute_accent, &index, &length));
    154   EXPECT_EQ(0U, index);
    155   EXPECT_EQ(e_with_acute_accent.size(), length);
    156 
    157   EXPECT_TRUE(StringSearchIgnoringCaseAndAccents(
    158       E_with_acute_combining_mark, e_with_grave_accent, &index, &length));
    159   EXPECT_EQ(0U, index);
    160   EXPECT_EQ(e_with_grave_accent.size(), length);
    161 
    162   EXPECT_TRUE(StringSearchIgnoringCaseAndAccents(
    163       E_with_grave_combining_mark, e_with_acute_accent, &index, &length));
    164   EXPECT_EQ(0U, index);
    165   EXPECT_EQ(e_with_acute_accent.size(), length);
    166 
    167   EXPECT_TRUE(StringSearchIgnoringCaseAndAccents(
    168       E_base, e_with_grave_accent, &index, &length));
    169   EXPECT_EQ(0U, index);
    170   EXPECT_EQ(e_with_grave_accent.size(), length);
    171 
    172   EXPECT_FALSE(StringSearchIgnoringCaseAndAccents(
    173       a_with_acute_accent, e_with_acute_accent, &index, &length));
    174 
    175   EXPECT_FALSE(StringSearchIgnoringCaseAndAccents(
    176       a_with_acute_combining_mark, e_with_acute_combining_mark,
    177       &index, &length));
    178 
    179   if (locale_is_posix)
    180     SetICUDefaultLocale(default_locale.data());
    181 }
    182 
    183 TEST(StringSearchTest, UnicodeLocaleDependent) {
    184   // Base characters
    185   const string16 a_base = WideToUTF16(L"a");
    186 
    187   // Composed characters
    188   const string16 a_with_ring = WideToUTF16(L"\u00e5");
    189 
    190   EXPECT_TRUE(StringSearchIgnoringCaseAndAccents(
    191       a_base, a_with_ring, NULL, NULL));
    192 
    193   const char* default_locale = uloc_getDefault();
    194   SetICUDefaultLocale("da");
    195 
    196   EXPECT_FALSE(StringSearchIgnoringCaseAndAccents(
    197       a_base, a_with_ring, NULL, NULL));
    198 
    199   SetICUDefaultLocale(default_locale);
    200 }
    201 
    202 TEST(StringSearchTest, FixedPatternMultipleSearch) {
    203   std::string default_locale(uloc_getDefault());
    204   bool locale_is_posix = (default_locale == "en_US_POSIX");
    205   if (locale_is_posix)
    206     SetICUDefaultLocale("en_US");
    207 
    208   size_t index = 0;
    209   size_t length = 0;
    210 
    211   // Search "hello" over multiple texts.
    212   FixedPatternStringSearchIgnoringCaseAndAccents query(ASCIIToUTF16("hello"));
    213   EXPECT_TRUE(query.Search(ASCIIToUTF16("12hello34"), &index, &length));
    214   EXPECT_EQ(2U, index);
    215   EXPECT_EQ(5U, length);
    216   EXPECT_FALSE(query.Search(ASCIIToUTF16("bye"), &index, &length));
    217   EXPECT_TRUE(query.Search(ASCIIToUTF16("hELLo"), &index, &length));
    218   EXPECT_EQ(0U, index);
    219   EXPECT_EQ(5U, length);
    220 
    221   if (locale_is_posix)
    222     SetICUDefaultLocale(default_locale.data());
    223 }
    224 
    225 }  // namespace i18n
    226 }  // namespace base
    227